{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.218298848707588, "eval_steps": 500, "global_step": 15000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 63.09210744064006, "learning_rate": 2.702702702702703e-08, "loss": 2.0907, "step": 1 }, { "epoch": 0.0, "grad_norm": 41.448049465342834, "learning_rate": 5.405405405405406e-08, "loss": 2.0215, "step": 2 }, { "epoch": 0.0, "grad_norm": 47.026785887695155, "learning_rate": 8.108108108108109e-08, "loss": 2.0995, "step": 3 }, { "epoch": 0.0, "grad_norm": 52.83417573592187, "learning_rate": 1.0810810810810812e-07, "loss": 2.1134, "step": 4 }, { "epoch": 0.0, "grad_norm": 29.752645757077097, "learning_rate": 1.3513513513513515e-07, "loss": 2.1541, "step": 5 }, { "epoch": 0.0, "grad_norm": 41.74351662988793, "learning_rate": 1.6216216216216218e-07, "loss": 2.2341, "step": 6 }, { "epoch": 0.0, "grad_norm": 47.13605325623752, "learning_rate": 1.8918918918918921e-07, "loss": 2.3537, "step": 7 }, { "epoch": 0.0, "grad_norm": 40.05344148605918, "learning_rate": 2.1621621621621625e-07, "loss": 2.1526, "step": 8 }, { "epoch": 0.0, "grad_norm": 41.606123543309764, "learning_rate": 2.4324324324324326e-07, "loss": 2.2158, "step": 9 }, { "epoch": 0.0, "grad_norm": 47.3919369207522, "learning_rate": 2.702702702702703e-07, "loss": 2.2508, "step": 10 }, { "epoch": 0.0, "grad_norm": 44.71142856608848, "learning_rate": 2.972972972972973e-07, "loss": 2.1635, "step": 11 }, { "epoch": 0.0, "grad_norm": 42.27079352967162, "learning_rate": 3.2432432432432436e-07, "loss": 2.2709, "step": 12 }, { "epoch": 0.0, "grad_norm": 35.78531807299802, "learning_rate": 3.513513513513514e-07, "loss": 2.1149, "step": 13 }, { "epoch": 0.0, "grad_norm": 49.284697480404766, "learning_rate": 3.7837837837837843e-07, "loss": 2.2079, "step": 14 }, { "epoch": 0.0, "grad_norm": 40.71527633576261, "learning_rate": 4.0540540540540546e-07, "loss": 1.853, "step": 15 }, { "epoch": 0.0, "grad_norm": 46.29481642359868, "learning_rate": 4.324324324324325e-07, "loss": 1.9446, "step": 16 }, { "epoch": 0.0, "grad_norm": 38.30865061994851, "learning_rate": 4.5945945945945953e-07, "loss": 2.0898, "step": 17 }, { "epoch": 0.0, "grad_norm": 43.88696790640592, "learning_rate": 4.864864864864865e-07, "loss": 1.8325, "step": 18 }, { "epoch": 0.0, "grad_norm": 25.462836661475112, "learning_rate": 5.135135135135135e-07, "loss": 1.6462, "step": 19 }, { "epoch": 0.0, "grad_norm": 25.210403543119014, "learning_rate": 5.405405405405406e-07, "loss": 1.6375, "step": 20 }, { "epoch": 0.0, "grad_norm": 27.033049559916677, "learning_rate": 5.675675675675676e-07, "loss": 1.6669, "step": 21 }, { "epoch": 0.0, "grad_norm": 23.7804483699658, "learning_rate": 5.945945945945947e-07, "loss": 1.6073, "step": 22 }, { "epoch": 0.0, "grad_norm": 41.65583695037341, "learning_rate": 6.216216216216217e-07, "loss": 1.5485, "step": 23 }, { "epoch": 0.0, "grad_norm": 19.50002166627634, "learning_rate": 6.486486486486487e-07, "loss": 1.4954, "step": 24 }, { "epoch": 0.0, "grad_norm": 15.840159645176156, "learning_rate": 6.756756756756758e-07, "loss": 1.463, "step": 25 }, { "epoch": 0.0, "grad_norm": 14.39522714576107, "learning_rate": 7.027027027027028e-07, "loss": 1.1524, "step": 26 }, { "epoch": 0.0, "grad_norm": 10.525545979505534, "learning_rate": 7.297297297297298e-07, "loss": 1.2525, "step": 27 }, { "epoch": 0.0, "grad_norm": 21.623559694339768, "learning_rate": 7.567567567567569e-07, "loss": 1.2244, "step": 28 }, { "epoch": 0.0, "grad_norm": 12.438738315604772, "learning_rate": 7.837837837837839e-07, "loss": 1.242, "step": 29 }, { "epoch": 0.0, "grad_norm": 8.791264076001971, "learning_rate": 8.108108108108109e-07, "loss": 1.123, "step": 30 }, { "epoch": 0.0, "grad_norm": 8.444522217696221, "learning_rate": 8.37837837837838e-07, "loss": 1.128, "step": 31 }, { "epoch": 0.0, "grad_norm": 11.80937189736569, "learning_rate": 8.64864864864865e-07, "loss": 1.1132, "step": 32 }, { "epoch": 0.0, "grad_norm": 12.134902430214023, "learning_rate": 8.91891891891892e-07, "loss": 1.1739, "step": 33 }, { "epoch": 0.0, "grad_norm": 24.27912192427012, "learning_rate": 9.189189189189191e-07, "loss": 1.0642, "step": 34 }, { "epoch": 0.0, "grad_norm": 13.074055779236055, "learning_rate": 9.459459459459461e-07, "loss": 1.5063, "step": 35 }, { "epoch": 0.0, "grad_norm": 8.454507776358321, "learning_rate": 9.72972972972973e-07, "loss": 0.9876, "step": 36 }, { "epoch": 0.0, "grad_norm": 9.362154008331844, "learning_rate": 1.0000000000000002e-06, "loss": 1.3223, "step": 37 }, { "epoch": 0.0, "grad_norm": 11.847660888509978, "learning_rate": 1.027027027027027e-06, "loss": 1.0352, "step": 38 }, { "epoch": 0.0, "grad_norm": 6.8948029511869615, "learning_rate": 1.0540540540540542e-06, "loss": 1.115, "step": 39 }, { "epoch": 0.0, "grad_norm": 5.656382557200861, "learning_rate": 1.0810810810810812e-06, "loss": 1.3187, "step": 40 }, { "epoch": 0.0, "grad_norm": 6.842034747268024, "learning_rate": 1.1081081081081083e-06, "loss": 1.1373, "step": 41 }, { "epoch": 0.0, "grad_norm": 5.9197907315298846, "learning_rate": 1.1351351351351352e-06, "loss": 0.9229, "step": 42 }, { "epoch": 0.0, "grad_norm": 4.946532136719213, "learning_rate": 1.1621621621621624e-06, "loss": 1.2383, "step": 43 }, { "epoch": 0.0, "grad_norm": 5.933609927910655, "learning_rate": 1.1891891891891893e-06, "loss": 1.0346, "step": 44 }, { "epoch": 0.0, "grad_norm": 6.49703904887925, "learning_rate": 1.2162162162162164e-06, "loss": 1.0387, "step": 45 }, { "epoch": 0.0, "grad_norm": 4.762039043147149, "learning_rate": 1.2432432432432434e-06, "loss": 0.9998, "step": 46 }, { "epoch": 0.0, "grad_norm": 3.92883755344509, "learning_rate": 1.2702702702702705e-06, "loss": 1.0296, "step": 47 }, { "epoch": 0.0, "grad_norm": 7.005043590598583, "learning_rate": 1.2972972972972974e-06, "loss": 1.1131, "step": 48 }, { "epoch": 0.0, "grad_norm": 5.354632966884938, "learning_rate": 1.3243243243243246e-06, "loss": 1.0134, "step": 49 }, { "epoch": 0.0, "grad_norm": 4.556566511271708, "learning_rate": 1.3513513513513515e-06, "loss": 0.859, "step": 50 }, { "epoch": 0.0, "grad_norm": 5.632328269570113, "learning_rate": 1.3783783783783786e-06, "loss": 1.0472, "step": 51 }, { "epoch": 0.0, "grad_norm": 4.9486532482865435, "learning_rate": 1.4054054054054056e-06, "loss": 0.9433, "step": 52 }, { "epoch": 0.0, "grad_norm": 4.865192625631578, "learning_rate": 1.4324324324324327e-06, "loss": 1.1251, "step": 53 }, { "epoch": 0.0, "grad_norm": 4.357689894765142, "learning_rate": 1.4594594594594596e-06, "loss": 1.0328, "step": 54 }, { "epoch": 0.0, "grad_norm": 3.718722198461544, "learning_rate": 1.4864864864864868e-06, "loss": 1.0607, "step": 55 }, { "epoch": 0.0, "grad_norm": 5.324459427328635, "learning_rate": 1.5135135135135137e-06, "loss": 0.9641, "step": 56 }, { "epoch": 0.0, "grad_norm": 4.866079854989161, "learning_rate": 1.5405405405405409e-06, "loss": 1.1034, "step": 57 }, { "epoch": 0.0, "grad_norm": 4.98776615712582, "learning_rate": 1.5675675675675678e-06, "loss": 1.0333, "step": 58 }, { "epoch": 0.0, "grad_norm": 4.711843051671795, "learning_rate": 1.5945945945945947e-06, "loss": 1.1368, "step": 59 }, { "epoch": 0.0, "grad_norm": 3.96747806209191, "learning_rate": 1.6216216216216219e-06, "loss": 1.0649, "step": 60 }, { "epoch": 0.0, "grad_norm": 4.807553599493247, "learning_rate": 1.6486486486486488e-06, "loss": 0.8842, "step": 61 }, { "epoch": 0.01, "grad_norm": 3.2736315334006445, "learning_rate": 1.675675675675676e-06, "loss": 0.9168, "step": 62 }, { "epoch": 0.01, "grad_norm": 4.182243485448351, "learning_rate": 1.7027027027027028e-06, "loss": 0.9442, "step": 63 }, { "epoch": 0.01, "grad_norm": 4.4850171249159985, "learning_rate": 1.72972972972973e-06, "loss": 0.8695, "step": 64 }, { "epoch": 0.01, "grad_norm": 4.44246074518427, "learning_rate": 1.756756756756757e-06, "loss": 0.9205, "step": 65 }, { "epoch": 0.01, "grad_norm": 4.2871223675437555, "learning_rate": 1.783783783783784e-06, "loss": 0.9512, "step": 66 }, { "epoch": 0.01, "grad_norm": 20.313595128982193, "learning_rate": 1.810810810810811e-06, "loss": 0.9122, "step": 67 }, { "epoch": 0.01, "grad_norm": 3.4560526712826176, "learning_rate": 1.8378378378378381e-06, "loss": 0.915, "step": 68 }, { "epoch": 0.01, "grad_norm": 3.7990307229254463, "learning_rate": 1.864864864864865e-06, "loss": 0.9674, "step": 69 }, { "epoch": 0.01, "grad_norm": 3.920458916035328, "learning_rate": 1.8918918918918922e-06, "loss": 0.9847, "step": 70 }, { "epoch": 0.01, "grad_norm": 4.222083142609564, "learning_rate": 1.918918918918919e-06, "loss": 1.0758, "step": 71 }, { "epoch": 0.01, "grad_norm": 3.548309752947073, "learning_rate": 1.945945945945946e-06, "loss": 0.9791, "step": 72 }, { "epoch": 0.01, "grad_norm": 3.622733873280609, "learning_rate": 1.9729729729729734e-06, "loss": 0.9241, "step": 73 }, { "epoch": 0.01, "grad_norm": 3.173226704615171, "learning_rate": 2.0000000000000003e-06, "loss": 0.8568, "step": 74 }, { "epoch": 0.01, "grad_norm": 3.319861917917085, "learning_rate": 2.0270270270270273e-06, "loss": 0.93, "step": 75 }, { "epoch": 0.01, "grad_norm": 3.340663374122506, "learning_rate": 2.054054054054054e-06, "loss": 1.0522, "step": 76 }, { "epoch": 0.01, "grad_norm": 3.6689457635724394, "learning_rate": 2.0810810810810815e-06, "loss": 0.8734, "step": 77 }, { "epoch": 0.01, "grad_norm": 3.9546214054278646, "learning_rate": 2.1081081081081085e-06, "loss": 1.309, "step": 78 }, { "epoch": 0.01, "grad_norm": 4.1361774814504955, "learning_rate": 2.1351351351351354e-06, "loss": 1.0352, "step": 79 }, { "epoch": 0.01, "grad_norm": 3.933341932972953, "learning_rate": 2.1621621621621623e-06, "loss": 1.1228, "step": 80 }, { "epoch": 0.01, "grad_norm": 4.352479533301301, "learning_rate": 2.1891891891891897e-06, "loss": 0.9103, "step": 81 }, { "epoch": 0.01, "grad_norm": 4.620591320930049, "learning_rate": 2.2162162162162166e-06, "loss": 1.0265, "step": 82 }, { "epoch": 0.01, "grad_norm": 2.8703432705528513, "learning_rate": 2.2432432432432435e-06, "loss": 0.9905, "step": 83 }, { "epoch": 0.01, "grad_norm": 4.393478414302074, "learning_rate": 2.2702702702702705e-06, "loss": 1.0622, "step": 84 }, { "epoch": 0.01, "grad_norm": 6.265928855922091, "learning_rate": 2.297297297297298e-06, "loss": 1.1032, "step": 85 }, { "epoch": 0.01, "grad_norm": 4.107673309909188, "learning_rate": 2.3243243243243247e-06, "loss": 0.8631, "step": 86 }, { "epoch": 0.01, "grad_norm": 3.835601844883204, "learning_rate": 2.3513513513513517e-06, "loss": 1.0654, "step": 87 }, { "epoch": 0.01, "grad_norm": 3.7545219775073124, "learning_rate": 2.3783783783783786e-06, "loss": 1.0408, "step": 88 }, { "epoch": 0.01, "grad_norm": 4.924451663306775, "learning_rate": 2.4054054054054055e-06, "loss": 0.9569, "step": 89 }, { "epoch": 0.01, "grad_norm": 2.8906953580773687, "learning_rate": 2.432432432432433e-06, "loss": 0.9616, "step": 90 }, { "epoch": 0.01, "grad_norm": 3.575749328563311, "learning_rate": 2.45945945945946e-06, "loss": 1.0187, "step": 91 }, { "epoch": 0.01, "grad_norm": 8.469633477745303, "learning_rate": 2.4864864864864867e-06, "loss": 1.1732, "step": 92 }, { "epoch": 0.01, "grad_norm": 3.3897385105814117, "learning_rate": 2.5135135135135137e-06, "loss": 0.8862, "step": 93 }, { "epoch": 0.01, "grad_norm": 3.7948253205051694, "learning_rate": 2.540540540540541e-06, "loss": 1.0327, "step": 94 }, { "epoch": 0.01, "grad_norm": 5.371187255522291, "learning_rate": 2.5675675675675675e-06, "loss": 1.034, "step": 95 }, { "epoch": 0.01, "grad_norm": 4.560170089873212, "learning_rate": 2.594594594594595e-06, "loss": 0.8899, "step": 96 }, { "epoch": 0.01, "grad_norm": 4.671898125348859, "learning_rate": 2.621621621621622e-06, "loss": 0.791, "step": 97 }, { "epoch": 0.01, "grad_norm": 3.76292106021511, "learning_rate": 2.648648648648649e-06, "loss": 0.8861, "step": 98 }, { "epoch": 0.01, "grad_norm": 5.3526080641590505, "learning_rate": 2.6756756756756757e-06, "loss": 0.9757, "step": 99 }, { "epoch": 0.01, "grad_norm": 6.325620004738459, "learning_rate": 2.702702702702703e-06, "loss": 1.0721, "step": 100 }, { "epoch": 0.01, "grad_norm": 3.7766975500508364, "learning_rate": 2.72972972972973e-06, "loss": 0.9764, "step": 101 }, { "epoch": 0.01, "grad_norm": 3.833312727347566, "learning_rate": 2.7567567567567573e-06, "loss": 1.0064, "step": 102 }, { "epoch": 0.01, "grad_norm": 4.274198012440263, "learning_rate": 2.783783783783784e-06, "loss": 0.9874, "step": 103 }, { "epoch": 0.01, "grad_norm": 4.292666283682956, "learning_rate": 2.810810810810811e-06, "loss": 0.9572, "step": 104 }, { "epoch": 0.01, "grad_norm": 8.044630585777997, "learning_rate": 2.837837837837838e-06, "loss": 0.9615, "step": 105 }, { "epoch": 0.01, "grad_norm": 10.377028267012342, "learning_rate": 2.8648648648648654e-06, "loss": 0.7981, "step": 106 }, { "epoch": 0.01, "grad_norm": 6.230962434779337, "learning_rate": 2.891891891891892e-06, "loss": 0.9214, "step": 107 }, { "epoch": 0.01, "grad_norm": 5.078803174877479, "learning_rate": 2.9189189189189193e-06, "loss": 0.9328, "step": 108 }, { "epoch": 0.01, "grad_norm": 9.178026998368148, "learning_rate": 2.9459459459459462e-06, "loss": 0.8246, "step": 109 }, { "epoch": 0.01, "grad_norm": 3.632601979523958, "learning_rate": 2.9729729729729736e-06, "loss": 1.0224, "step": 110 }, { "epoch": 0.01, "grad_norm": 6.064716985739635, "learning_rate": 3e-06, "loss": 1.0143, "step": 111 }, { "epoch": 0.01, "grad_norm": 5.987949350631371, "learning_rate": 3.0270270270270274e-06, "loss": 0.9865, "step": 112 }, { "epoch": 0.01, "grad_norm": 4.197057468104055, "learning_rate": 3.0540540540540544e-06, "loss": 0.967, "step": 113 }, { "epoch": 0.01, "grad_norm": 5.023760757753928, "learning_rate": 3.0810810810810817e-06, "loss": 0.9915, "step": 114 }, { "epoch": 0.01, "grad_norm": 5.949813120525793, "learning_rate": 3.1081081081081082e-06, "loss": 0.9101, "step": 115 }, { "epoch": 0.01, "grad_norm": 4.733118568805419, "learning_rate": 3.1351351351351356e-06, "loss": 0.9872, "step": 116 }, { "epoch": 0.01, "grad_norm": 4.920966341519552, "learning_rate": 3.1621621621621625e-06, "loss": 0.8534, "step": 117 }, { "epoch": 0.01, "grad_norm": 5.071200176079295, "learning_rate": 3.1891891891891894e-06, "loss": 1.1142, "step": 118 }, { "epoch": 0.01, "grad_norm": 4.889251040672398, "learning_rate": 3.2162162162162164e-06, "loss": 0.9795, "step": 119 }, { "epoch": 0.01, "grad_norm": 5.9299095643721875, "learning_rate": 3.2432432432432437e-06, "loss": 0.8322, "step": 120 }, { "epoch": 0.01, "grad_norm": 10.5772155570809, "learning_rate": 3.2702702702702706e-06, "loss": 0.937, "step": 121 }, { "epoch": 0.01, "grad_norm": 5.747279093538681, "learning_rate": 3.2972972972972976e-06, "loss": 0.9593, "step": 122 }, { "epoch": 0.01, "grad_norm": 11.953456917374227, "learning_rate": 3.3243243243243245e-06, "loss": 0.9224, "step": 123 }, { "epoch": 0.01, "grad_norm": 4.896396900943141, "learning_rate": 3.351351351351352e-06, "loss": 0.8117, "step": 124 }, { "epoch": 0.01, "grad_norm": 4.8692754198154, "learning_rate": 3.3783783783783788e-06, "loss": 0.9104, "step": 125 }, { "epoch": 0.01, "grad_norm": 6.34690544735298, "learning_rate": 3.4054054054054057e-06, "loss": 1.0861, "step": 126 }, { "epoch": 0.01, "grad_norm": 5.362948551124936, "learning_rate": 3.4324324324324326e-06, "loss": 0.8018, "step": 127 }, { "epoch": 0.01, "grad_norm": 4.653801965753808, "learning_rate": 3.45945945945946e-06, "loss": 0.7235, "step": 128 }, { "epoch": 0.01, "grad_norm": 5.489050527532971, "learning_rate": 3.4864864864864865e-06, "loss": 0.8185, "step": 129 }, { "epoch": 0.01, "grad_norm": 5.124224436914261, "learning_rate": 3.513513513513514e-06, "loss": 0.7926, "step": 130 }, { "epoch": 0.01, "grad_norm": 10.999712639206107, "learning_rate": 3.5405405405405408e-06, "loss": 1.0187, "step": 131 }, { "epoch": 0.01, "grad_norm": 5.719248554836371, "learning_rate": 3.567567567567568e-06, "loss": 0.9715, "step": 132 }, { "epoch": 0.01, "grad_norm": 6.850255386958801, "learning_rate": 3.5945945945945946e-06, "loss": 0.9046, "step": 133 }, { "epoch": 0.01, "grad_norm": 5.275246259471631, "learning_rate": 3.621621621621622e-06, "loss": 1.0054, "step": 134 }, { "epoch": 0.01, "grad_norm": 9.780966148279665, "learning_rate": 3.648648648648649e-06, "loss": 0.9502, "step": 135 }, { "epoch": 0.01, "grad_norm": 5.857996828125333, "learning_rate": 3.6756756756756763e-06, "loss": 0.8745, "step": 136 }, { "epoch": 0.01, "grad_norm": 18.804929852954515, "learning_rate": 3.7027027027027028e-06, "loss": 0.8358, "step": 137 }, { "epoch": 0.01, "grad_norm": 6.666580613271323, "learning_rate": 3.72972972972973e-06, "loss": 0.9765, "step": 138 }, { "epoch": 0.01, "grad_norm": 16.59079768970637, "learning_rate": 3.756756756756757e-06, "loss": 0.9116, "step": 139 }, { "epoch": 0.01, "grad_norm": 10.078376920826488, "learning_rate": 3.7837837837837844e-06, "loss": 0.947, "step": 140 }, { "epoch": 0.01, "grad_norm": 5.230742828123224, "learning_rate": 3.810810810810811e-06, "loss": 0.7936, "step": 141 }, { "epoch": 0.01, "grad_norm": 6.251875425533412, "learning_rate": 3.837837837837838e-06, "loss": 0.7847, "step": 142 }, { "epoch": 0.01, "grad_norm": 21.089064452089342, "learning_rate": 3.864864864864865e-06, "loss": 0.805, "step": 143 }, { "epoch": 0.01, "grad_norm": 6.813479314855189, "learning_rate": 3.891891891891892e-06, "loss": 0.8976, "step": 144 }, { "epoch": 0.01, "grad_norm": 8.473649775180455, "learning_rate": 3.918918918918919e-06, "loss": 0.7821, "step": 145 }, { "epoch": 0.01, "grad_norm": 5.811233637559505, "learning_rate": 3.945945945945947e-06, "loss": 0.9501, "step": 146 }, { "epoch": 0.01, "grad_norm": 4.985364552629455, "learning_rate": 3.972972972972973e-06, "loss": 0.9514, "step": 147 }, { "epoch": 0.01, "grad_norm": 7.431266694054168, "learning_rate": 4.000000000000001e-06, "loss": 0.7551, "step": 148 }, { "epoch": 0.01, "grad_norm": 4.753559866561872, "learning_rate": 4.027027027027028e-06, "loss": 1.1062, "step": 149 }, { "epoch": 0.01, "grad_norm": 9.58835384775318, "learning_rate": 4.0540540540540545e-06, "loss": 0.7539, "step": 150 }, { "epoch": 0.01, "grad_norm": 7.457588006162735, "learning_rate": 4.0810810810810815e-06, "loss": 0.9726, "step": 151 }, { "epoch": 0.01, "grad_norm": 6.596394144876574, "learning_rate": 4.108108108108108e-06, "loss": 0.8226, "step": 152 }, { "epoch": 0.01, "grad_norm": 6.733853503611332, "learning_rate": 4.135135135135135e-06, "loss": 0.87, "step": 153 }, { "epoch": 0.01, "grad_norm": 5.71577603498809, "learning_rate": 4.162162162162163e-06, "loss": 0.7438, "step": 154 }, { "epoch": 0.01, "grad_norm": 5.658958532440653, "learning_rate": 4.189189189189189e-06, "loss": 0.7735, "step": 155 }, { "epoch": 0.01, "grad_norm": 8.494352208903642, "learning_rate": 4.216216216216217e-06, "loss": 0.8795, "step": 156 }, { "epoch": 0.01, "grad_norm": 7.513746715130291, "learning_rate": 4.243243243243244e-06, "loss": 0.8419, "step": 157 }, { "epoch": 0.01, "grad_norm": 14.129489357210856, "learning_rate": 4.270270270270271e-06, "loss": 0.8887, "step": 158 }, { "epoch": 0.01, "grad_norm": 6.485982506631931, "learning_rate": 4.297297297297298e-06, "loss": 0.7415, "step": 159 }, { "epoch": 0.01, "grad_norm": 7.495307495484502, "learning_rate": 4.324324324324325e-06, "loss": 0.8468, "step": 160 }, { "epoch": 0.01, "grad_norm": 11.894580487680233, "learning_rate": 4.351351351351352e-06, "loss": 0.8536, "step": 161 }, { "epoch": 0.01, "grad_norm": 28.270425570727863, "learning_rate": 4.378378378378379e-06, "loss": 0.8292, "step": 162 }, { "epoch": 0.01, "grad_norm": 17.981107840077165, "learning_rate": 4.4054054054054054e-06, "loss": 0.8553, "step": 163 }, { "epoch": 0.01, "grad_norm": 7.740632578049493, "learning_rate": 4.432432432432433e-06, "loss": 0.9331, "step": 164 }, { "epoch": 0.01, "grad_norm": 6.083395560536381, "learning_rate": 4.45945945945946e-06, "loss": 0.9074, "step": 165 }, { "epoch": 0.01, "grad_norm": 4.160025202563131, "learning_rate": 4.486486486486487e-06, "loss": 0.8046, "step": 166 }, { "epoch": 0.01, "grad_norm": 8.556373648188933, "learning_rate": 4.513513513513514e-06, "loss": 0.7178, "step": 167 }, { "epoch": 0.01, "grad_norm": 8.174998528877031, "learning_rate": 4.540540540540541e-06, "loss": 1.0875, "step": 168 }, { "epoch": 0.01, "grad_norm": 6.6416170532041425, "learning_rate": 4.567567567567568e-06, "loss": 0.8532, "step": 169 }, { "epoch": 0.01, "grad_norm": 7.585986727506687, "learning_rate": 4.594594594594596e-06, "loss": 0.8369, "step": 170 }, { "epoch": 0.01, "grad_norm": 6.119169963884442, "learning_rate": 4.621621621621622e-06, "loss": 0.7298, "step": 171 }, { "epoch": 0.01, "grad_norm": 18.974694130198262, "learning_rate": 4.6486486486486495e-06, "loss": 0.9494, "step": 172 }, { "epoch": 0.01, "grad_norm": 13.577484804958662, "learning_rate": 4.675675675675676e-06, "loss": 0.7836, "step": 173 }, { "epoch": 0.01, "grad_norm": 6.473307435218165, "learning_rate": 4.702702702702703e-06, "loss": 0.8785, "step": 174 }, { "epoch": 0.01, "grad_norm": 12.309467131845393, "learning_rate": 4.72972972972973e-06, "loss": 0.9952, "step": 175 }, { "epoch": 0.01, "grad_norm": 167.12808754379284, "learning_rate": 4.756756756756757e-06, "loss": 0.9495, "step": 176 }, { "epoch": 0.01, "grad_norm": 9.989622733894004, "learning_rate": 4.783783783783784e-06, "loss": 0.7514, "step": 177 }, { "epoch": 0.01, "grad_norm": 13.067919078628053, "learning_rate": 4.810810810810811e-06, "loss": 0.8249, "step": 178 }, { "epoch": 0.01, "grad_norm": 7.935641593095554, "learning_rate": 4.837837837837838e-06, "loss": 0.6688, "step": 179 }, { "epoch": 0.01, "grad_norm": 8.893171488568813, "learning_rate": 4.864864864864866e-06, "loss": 0.8727, "step": 180 }, { "epoch": 0.01, "grad_norm": 7.3781143637466995, "learning_rate": 4.891891891891893e-06, "loss": 0.7877, "step": 181 }, { "epoch": 0.01, "grad_norm": 4.358170942522939, "learning_rate": 4.91891891891892e-06, "loss": 0.8175, "step": 182 }, { "epoch": 0.01, "grad_norm": 13.741369902994137, "learning_rate": 4.9459459459459466e-06, "loss": 0.7942, "step": 183 }, { "epoch": 0.01, "grad_norm": 5.918663857808989, "learning_rate": 4.9729729729729735e-06, "loss": 0.9443, "step": 184 }, { "epoch": 0.02, "grad_norm": 8.353429675245378, "learning_rate": 5e-06, "loss": 0.7864, "step": 185 }, { "epoch": 0.02, "grad_norm": 6.7497784790141315, "learning_rate": 5.027027027027027e-06, "loss": 0.795, "step": 186 }, { "epoch": 0.02, "grad_norm": 15.351375545207794, "learning_rate": 5.054054054054054e-06, "loss": 0.8987, "step": 187 }, { "epoch": 0.02, "grad_norm": 14.258534725657086, "learning_rate": 5.081081081081082e-06, "loss": 0.9285, "step": 188 }, { "epoch": 0.02, "grad_norm": 6.817064617260092, "learning_rate": 5.108108108108108e-06, "loss": 0.973, "step": 189 }, { "epoch": 0.02, "grad_norm": 17.3062496560883, "learning_rate": 5.135135135135135e-06, "loss": 0.708, "step": 190 }, { "epoch": 0.02, "grad_norm": 7.203174443499325, "learning_rate": 5.162162162162162e-06, "loss": 0.9388, "step": 191 }, { "epoch": 0.02, "grad_norm": 40.67582300539913, "learning_rate": 5.18918918918919e-06, "loss": 1.0517, "step": 192 }, { "epoch": 0.02, "grad_norm": 6.624778758554718, "learning_rate": 5.216216216216217e-06, "loss": 1.0026, "step": 193 }, { "epoch": 0.02, "grad_norm": 14.287986400957072, "learning_rate": 5.243243243243244e-06, "loss": 0.906, "step": 194 }, { "epoch": 0.02, "grad_norm": 26.677020243165824, "learning_rate": 5.2702702702702705e-06, "loss": 0.77, "step": 195 }, { "epoch": 0.02, "grad_norm": 10.717079814807025, "learning_rate": 5.297297297297298e-06, "loss": 0.8379, "step": 196 }, { "epoch": 0.02, "grad_norm": 11.19930819211915, "learning_rate": 5.324324324324324e-06, "loss": 0.9831, "step": 197 }, { "epoch": 0.02, "grad_norm": 43.4313433637369, "learning_rate": 5.351351351351351e-06, "loss": 0.8114, "step": 198 }, { "epoch": 0.02, "grad_norm": 16.408372849316656, "learning_rate": 5.378378378378378e-06, "loss": 0.7613, "step": 199 }, { "epoch": 0.02, "grad_norm": 23.725845507583394, "learning_rate": 5.405405405405406e-06, "loss": 0.8821, "step": 200 }, { "epoch": 0.02, "grad_norm": 28.163039556065826, "learning_rate": 5.432432432432433e-06, "loss": 0.8628, "step": 201 }, { "epoch": 0.02, "grad_norm": 16.159000089346666, "learning_rate": 5.45945945945946e-06, "loss": 0.7834, "step": 202 }, { "epoch": 0.02, "grad_norm": 10.275740415764174, "learning_rate": 5.486486486486487e-06, "loss": 0.9121, "step": 203 }, { "epoch": 0.02, "grad_norm": 7.390622591115062, "learning_rate": 5.513513513513515e-06, "loss": 0.8141, "step": 204 }, { "epoch": 0.02, "grad_norm": 13.232428200498525, "learning_rate": 5.540540540540541e-06, "loss": 0.8234, "step": 205 }, { "epoch": 0.02, "grad_norm": 62.07516521308197, "learning_rate": 5.567567567567568e-06, "loss": 0.7933, "step": 206 }, { "epoch": 0.02, "grad_norm": 11.175694736971199, "learning_rate": 5.5945945945945945e-06, "loss": 0.8196, "step": 207 }, { "epoch": 0.02, "grad_norm": 6.0444088873022, "learning_rate": 5.621621621621622e-06, "loss": 0.7836, "step": 208 }, { "epoch": 0.02, "grad_norm": 39.72425191380748, "learning_rate": 5.648648648648649e-06, "loss": 0.7978, "step": 209 }, { "epoch": 0.02, "grad_norm": 18.83072139201907, "learning_rate": 5.675675675675676e-06, "loss": 0.7496, "step": 210 }, { "epoch": 0.02, "grad_norm": 19.39567354263128, "learning_rate": 5.702702702702702e-06, "loss": 0.8802, "step": 211 }, { "epoch": 0.02, "grad_norm": 11.085087085604622, "learning_rate": 5.729729729729731e-06, "loss": 1.029, "step": 212 }, { "epoch": 0.02, "grad_norm": 12.909762265401948, "learning_rate": 5.756756756756757e-06, "loss": 0.7876, "step": 213 }, { "epoch": 0.02, "grad_norm": 16.11046008150788, "learning_rate": 5.783783783783784e-06, "loss": 0.8135, "step": 214 }, { "epoch": 0.02, "grad_norm": 44.14717703238208, "learning_rate": 5.810810810810811e-06, "loss": 0.835, "step": 215 }, { "epoch": 0.02, "grad_norm": 18.177978861554003, "learning_rate": 5.837837837837839e-06, "loss": 0.8438, "step": 216 }, { "epoch": 0.02, "grad_norm": 66.71809569625383, "learning_rate": 5.8648648648648655e-06, "loss": 0.7977, "step": 217 }, { "epoch": 0.02, "grad_norm": 14.840364705847634, "learning_rate": 5.8918918918918924e-06, "loss": 0.8885, "step": 218 }, { "epoch": 0.02, "grad_norm": 9.681458196947158, "learning_rate": 5.9189189189189185e-06, "loss": 0.8049, "step": 219 }, { "epoch": 0.02, "grad_norm": 9.887191247030259, "learning_rate": 5.945945945945947e-06, "loss": 0.9249, "step": 220 }, { "epoch": 0.02, "grad_norm": 14.750787757895132, "learning_rate": 5.972972972972973e-06, "loss": 0.8357, "step": 221 }, { "epoch": 0.02, "grad_norm": 18.087394846992474, "learning_rate": 6e-06, "loss": 0.8042, "step": 222 }, { "epoch": 0.02, "grad_norm": 10.240412502278401, "learning_rate": 6.027027027027027e-06, "loss": 0.9406, "step": 223 }, { "epoch": 0.02, "grad_norm": 12.269878789685151, "learning_rate": 6.054054054054055e-06, "loss": 0.848, "step": 224 }, { "epoch": 0.02, "grad_norm": 21.600613059620084, "learning_rate": 6.081081081081082e-06, "loss": 0.7376, "step": 225 }, { "epoch": 0.02, "grad_norm": 12.963309761319014, "learning_rate": 6.108108108108109e-06, "loss": 0.8527, "step": 226 }, { "epoch": 0.02, "grad_norm": 10.669914035420659, "learning_rate": 6.135135135135135e-06, "loss": 0.7558, "step": 227 }, { "epoch": 0.02, "grad_norm": 53.12530809986306, "learning_rate": 6.162162162162163e-06, "loss": 0.8773, "step": 228 }, { "epoch": 0.02, "grad_norm": 36.05059540713405, "learning_rate": 6.1891891891891895e-06, "loss": 0.8124, "step": 229 }, { "epoch": 0.02, "grad_norm": 8.365441593192777, "learning_rate": 6.2162162162162164e-06, "loss": 0.9012, "step": 230 }, { "epoch": 0.02, "grad_norm": 21.84171449880579, "learning_rate": 6.243243243243243e-06, "loss": 1.0252, "step": 231 }, { "epoch": 0.02, "grad_norm": 11.051045322010468, "learning_rate": 6.270270270270271e-06, "loss": 0.8, "step": 232 }, { "epoch": 0.02, "grad_norm": 38.374965406477585, "learning_rate": 6.297297297297298e-06, "loss": 0.6923, "step": 233 }, { "epoch": 0.02, "grad_norm": 8.842298304296136, "learning_rate": 6.324324324324325e-06, "loss": 0.6765, "step": 234 }, { "epoch": 0.02, "grad_norm": 293.9493770531771, "learning_rate": 6.351351351351351e-06, "loss": 0.8428, "step": 235 }, { "epoch": 0.02, "grad_norm": 12.708204678434027, "learning_rate": 6.378378378378379e-06, "loss": 0.763, "step": 236 }, { "epoch": 0.02, "grad_norm": 11.827661198740815, "learning_rate": 6.405405405405406e-06, "loss": 0.9776, "step": 237 }, { "epoch": 0.02, "grad_norm": 34.965119172103456, "learning_rate": 6.432432432432433e-06, "loss": 1.1438, "step": 238 }, { "epoch": 0.02, "grad_norm": 12.538066150617878, "learning_rate": 6.45945945945946e-06, "loss": 0.771, "step": 239 }, { "epoch": 0.02, "grad_norm": 11.680474102043787, "learning_rate": 6.486486486486487e-06, "loss": 0.9046, "step": 240 }, { "epoch": 0.02, "grad_norm": 13.05422018950879, "learning_rate": 6.513513513513514e-06, "loss": 0.8716, "step": 241 }, { "epoch": 0.02, "grad_norm": 42.99120943286823, "learning_rate": 6.540540540540541e-06, "loss": 1.0477, "step": 242 }, { "epoch": 0.02, "grad_norm": 29.171658361373236, "learning_rate": 6.567567567567567e-06, "loss": 0.8403, "step": 243 }, { "epoch": 0.02, "grad_norm": 19.917751544861098, "learning_rate": 6.594594594594595e-06, "loss": 0.7563, "step": 244 }, { "epoch": 0.02, "grad_norm": 76.86140496135718, "learning_rate": 6.621621621621622e-06, "loss": 0.7932, "step": 245 }, { "epoch": 0.02, "grad_norm": 8.143892348910658, "learning_rate": 6.648648648648649e-06, "loss": 0.6029, "step": 246 }, { "epoch": 0.02, "grad_norm": 10.470600723252575, "learning_rate": 6.675675675675676e-06, "loss": 0.766, "step": 247 }, { "epoch": 0.02, "grad_norm": 7.93404184031104, "learning_rate": 6.702702702702704e-06, "loss": 0.8127, "step": 248 }, { "epoch": 0.02, "grad_norm": 9.90086838041159, "learning_rate": 6.729729729729731e-06, "loss": 0.8232, "step": 249 }, { "epoch": 0.02, "grad_norm": 19.44330816468144, "learning_rate": 6.7567567567567575e-06, "loss": 0.7112, "step": 250 }, { "epoch": 0.02, "grad_norm": 10.947870884598686, "learning_rate": 6.783783783783784e-06, "loss": 0.9063, "step": 251 }, { "epoch": 0.02, "grad_norm": 12.326745122540027, "learning_rate": 6.810810810810811e-06, "loss": 0.9685, "step": 252 }, { "epoch": 0.02, "grad_norm": 8.246205071186225, "learning_rate": 6.837837837837838e-06, "loss": 0.9441, "step": 253 }, { "epoch": 0.02, "grad_norm": 35.1358639145026, "learning_rate": 6.864864864864865e-06, "loss": 1.0168, "step": 254 }, { "epoch": 0.02, "grad_norm": 37.984856338464056, "learning_rate": 6.891891891891892e-06, "loss": 0.7486, "step": 255 }, { "epoch": 0.02, "grad_norm": 13.464001338586746, "learning_rate": 6.91891891891892e-06, "loss": 0.8285, "step": 256 }, { "epoch": 0.02, "grad_norm": 10.01327371335838, "learning_rate": 6.945945945945947e-06, "loss": 0.9119, "step": 257 }, { "epoch": 0.02, "grad_norm": 9.943870994340054, "learning_rate": 6.972972972972973e-06, "loss": 0.9181, "step": 258 }, { "epoch": 0.02, "grad_norm": 10.739836047039292, "learning_rate": 7e-06, "loss": 0.9713, "step": 259 }, { "epoch": 0.02, "grad_norm": 42.29798024529886, "learning_rate": 7.027027027027028e-06, "loss": 0.667, "step": 260 }, { "epoch": 0.02, "grad_norm": 22.42866752760827, "learning_rate": 7.054054054054055e-06, "loss": 0.7318, "step": 261 }, { "epoch": 0.02, "grad_norm": 13.08598647990953, "learning_rate": 7.0810810810810815e-06, "loss": 0.8075, "step": 262 }, { "epoch": 0.02, "grad_norm": 13.965215731114823, "learning_rate": 7.1081081081081085e-06, "loss": 1.0707, "step": 263 }, { "epoch": 0.02, "grad_norm": 9.03690171729719, "learning_rate": 7.135135135135136e-06, "loss": 0.8516, "step": 264 }, { "epoch": 0.02, "grad_norm": 13.126195171696416, "learning_rate": 7.162162162162163e-06, "loss": 0.8807, "step": 265 }, { "epoch": 0.02, "grad_norm": 48.83167530366758, "learning_rate": 7.189189189189189e-06, "loss": 0.7823, "step": 266 }, { "epoch": 0.02, "grad_norm": 11.281746359886013, "learning_rate": 7.216216216216216e-06, "loss": 0.8177, "step": 267 }, { "epoch": 0.02, "grad_norm": 18.678521837489985, "learning_rate": 7.243243243243244e-06, "loss": 0.8439, "step": 268 }, { "epoch": 0.02, "grad_norm": 48.95499846632492, "learning_rate": 7.270270270270271e-06, "loss": 0.8774, "step": 269 }, { "epoch": 0.02, "grad_norm": 21.23848843609404, "learning_rate": 7.297297297297298e-06, "loss": 0.7706, "step": 270 }, { "epoch": 0.02, "grad_norm": 7.103337137246995, "learning_rate": 7.324324324324325e-06, "loss": 0.7704, "step": 271 }, { "epoch": 0.02, "grad_norm": 13.435096376321923, "learning_rate": 7.3513513513513525e-06, "loss": 0.9393, "step": 272 }, { "epoch": 0.02, "grad_norm": 31.604525793900656, "learning_rate": 7.3783783783783794e-06, "loss": 0.7038, "step": 273 }, { "epoch": 0.02, "grad_norm": 11.1820896481639, "learning_rate": 7.4054054054054055e-06, "loss": 0.8782, "step": 274 }, { "epoch": 0.02, "grad_norm": 29.861537718787407, "learning_rate": 7.4324324324324324e-06, "loss": 0.9541, "step": 275 }, { "epoch": 0.02, "grad_norm": 9.413675337859308, "learning_rate": 7.45945945945946e-06, "loss": 0.8048, "step": 276 }, { "epoch": 0.02, "grad_norm": 14.658207912177437, "learning_rate": 7.486486486486487e-06, "loss": 0.8344, "step": 277 }, { "epoch": 0.02, "grad_norm": 24.12011249754865, "learning_rate": 7.513513513513514e-06, "loss": 0.8484, "step": 278 }, { "epoch": 0.02, "grad_norm": 13.683050242309397, "learning_rate": 7.540540540540541e-06, "loss": 0.8401, "step": 279 }, { "epoch": 0.02, "grad_norm": 30.951710006586342, "learning_rate": 7.567567567567569e-06, "loss": 0.8466, "step": 280 }, { "epoch": 0.02, "grad_norm": 11.206756155627268, "learning_rate": 7.594594594594596e-06, "loss": 0.8336, "step": 281 }, { "epoch": 0.02, "grad_norm": 11.33322173314846, "learning_rate": 7.621621621621622e-06, "loss": 0.9492, "step": 282 }, { "epoch": 0.02, "grad_norm": 17.580655502858523, "learning_rate": 7.648648648648649e-06, "loss": 0.7291, "step": 283 }, { "epoch": 0.02, "grad_norm": 54.61659065403924, "learning_rate": 7.675675675675676e-06, "loss": 0.7802, "step": 284 }, { "epoch": 0.02, "grad_norm": 9.870581737775675, "learning_rate": 7.702702702702704e-06, "loss": 0.908, "step": 285 }, { "epoch": 0.02, "grad_norm": 43.24613941366979, "learning_rate": 7.72972972972973e-06, "loss": 0.8057, "step": 286 }, { "epoch": 0.02, "grad_norm": 9.05910901731955, "learning_rate": 7.756756756756756e-06, "loss": 1.025, "step": 287 }, { "epoch": 0.02, "grad_norm": 8.752340650843097, "learning_rate": 7.783783783783784e-06, "loss": 0.7396, "step": 288 }, { "epoch": 0.02, "grad_norm": 9.68669964854762, "learning_rate": 7.810810810810812e-06, "loss": 0.7391, "step": 289 }, { "epoch": 0.02, "grad_norm": 12.654321468851618, "learning_rate": 7.837837837837838e-06, "loss": 1.0119, "step": 290 }, { "epoch": 0.02, "grad_norm": 47.915191525751524, "learning_rate": 7.864864864864866e-06, "loss": 0.9611, "step": 291 }, { "epoch": 0.02, "grad_norm": 18.333907389232806, "learning_rate": 7.891891891891894e-06, "loss": 0.9687, "step": 292 }, { "epoch": 0.02, "grad_norm": 8.1082761360877, "learning_rate": 7.91891891891892e-06, "loss": 0.7831, "step": 293 }, { "epoch": 0.02, "grad_norm": 23.48602561232429, "learning_rate": 7.945945945945946e-06, "loss": 0.8965, "step": 294 }, { "epoch": 0.02, "grad_norm": 6.460926653242866, "learning_rate": 7.972972972972974e-06, "loss": 0.9003, "step": 295 }, { "epoch": 0.02, "grad_norm": 11.915890253917796, "learning_rate": 8.000000000000001e-06, "loss": 0.8482, "step": 296 }, { "epoch": 0.02, "grad_norm": 21.819683719088506, "learning_rate": 8.027027027027027e-06, "loss": 0.7953, "step": 297 }, { "epoch": 0.02, "grad_norm": 19.7783148322034, "learning_rate": 8.054054054054055e-06, "loss": 0.8208, "step": 298 }, { "epoch": 0.02, "grad_norm": 13.65257668344217, "learning_rate": 8.081081081081081e-06, "loss": 0.7397, "step": 299 }, { "epoch": 0.02, "grad_norm": 5.508168222898289, "learning_rate": 8.108108108108109e-06, "loss": 0.9242, "step": 300 }, { "epoch": 0.02, "grad_norm": 14.12216937768534, "learning_rate": 8.135135135135137e-06, "loss": 0.7812, "step": 301 }, { "epoch": 0.02, "grad_norm": 58.2610147214343, "learning_rate": 8.162162162162163e-06, "loss": 0.8097, "step": 302 }, { "epoch": 0.02, "grad_norm": 67.13676650261509, "learning_rate": 8.189189189189189e-06, "loss": 0.7253, "step": 303 }, { "epoch": 0.02, "grad_norm": 11.723304970513432, "learning_rate": 8.216216216216217e-06, "loss": 0.9157, "step": 304 }, { "epoch": 0.02, "grad_norm": 15.014324067992717, "learning_rate": 8.243243243243245e-06, "loss": 0.9872, "step": 305 }, { "epoch": 0.02, "grad_norm": 15.576979719397047, "learning_rate": 8.27027027027027e-06, "loss": 0.812, "step": 306 }, { "epoch": 0.02, "grad_norm": 8.576466090425274, "learning_rate": 8.297297297297298e-06, "loss": 0.8314, "step": 307 }, { "epoch": 0.03, "grad_norm": 5.02320474319744, "learning_rate": 8.324324324324326e-06, "loss": 0.9842, "step": 308 }, { "epoch": 0.03, "grad_norm": 37.93003233763656, "learning_rate": 8.351351351351352e-06, "loss": 0.8876, "step": 309 }, { "epoch": 0.03, "grad_norm": 21.295280345764002, "learning_rate": 8.378378378378378e-06, "loss": 0.8483, "step": 310 }, { "epoch": 0.03, "grad_norm": 14.22497315408456, "learning_rate": 8.405405405405406e-06, "loss": 0.8202, "step": 311 }, { "epoch": 0.03, "grad_norm": 53.193557108033076, "learning_rate": 8.432432432432434e-06, "loss": 0.9068, "step": 312 }, { "epoch": 0.03, "grad_norm": 32.07621818371433, "learning_rate": 8.45945945945946e-06, "loss": 0.921, "step": 313 }, { "epoch": 0.03, "grad_norm": 10.069641389080939, "learning_rate": 8.486486486486488e-06, "loss": 0.8778, "step": 314 }, { "epoch": 0.03, "grad_norm": 18.053523835812715, "learning_rate": 8.513513513513514e-06, "loss": 0.9505, "step": 315 }, { "epoch": 0.03, "grad_norm": 30.015659405729796, "learning_rate": 8.540540540540542e-06, "loss": 0.8973, "step": 316 }, { "epoch": 0.03, "grad_norm": 7.319370360672046, "learning_rate": 8.567567567567568e-06, "loss": 0.9075, "step": 317 }, { "epoch": 0.03, "grad_norm": 32.16047427964124, "learning_rate": 8.594594594594595e-06, "loss": 0.8375, "step": 318 }, { "epoch": 0.03, "grad_norm": 15.609338453897303, "learning_rate": 8.621621621621622e-06, "loss": 0.8815, "step": 319 }, { "epoch": 0.03, "grad_norm": 5.704130449974582, "learning_rate": 8.64864864864865e-06, "loss": 0.9223, "step": 320 }, { "epoch": 0.03, "grad_norm": 23.296175132616224, "learning_rate": 8.675675675675677e-06, "loss": 0.8277, "step": 321 }, { "epoch": 0.03, "grad_norm": 7.552859261589776, "learning_rate": 8.702702702702703e-06, "loss": 0.9559, "step": 322 }, { "epoch": 0.03, "grad_norm": 15.665438869109835, "learning_rate": 8.72972972972973e-06, "loss": 0.9702, "step": 323 }, { "epoch": 0.03, "grad_norm": 10.197205514663084, "learning_rate": 8.756756756756759e-06, "loss": 0.9542, "step": 324 }, { "epoch": 0.03, "grad_norm": 9.659667664276105, "learning_rate": 8.783783783783785e-06, "loss": 1.0003, "step": 325 }, { "epoch": 0.03, "grad_norm": 10.982299903736404, "learning_rate": 8.810810810810811e-06, "loss": 0.8914, "step": 326 }, { "epoch": 0.03, "grad_norm": 11.340952539457385, "learning_rate": 8.837837837837839e-06, "loss": 0.882, "step": 327 }, { "epoch": 0.03, "grad_norm": 10.00825153269294, "learning_rate": 8.864864864864866e-06, "loss": 0.7987, "step": 328 }, { "epoch": 0.03, "grad_norm": 23.540477775816996, "learning_rate": 8.891891891891893e-06, "loss": 0.8074, "step": 329 }, { "epoch": 0.03, "grad_norm": 5.037275719179406, "learning_rate": 8.91891891891892e-06, "loss": 0.8387, "step": 330 }, { "epoch": 0.03, "grad_norm": 7.968204358297256, "learning_rate": 8.945945945945946e-06, "loss": 0.695, "step": 331 }, { "epoch": 0.03, "grad_norm": 7.318856178170313, "learning_rate": 8.972972972972974e-06, "loss": 0.9031, "step": 332 }, { "epoch": 0.03, "grad_norm": 10.935686502657077, "learning_rate": 9e-06, "loss": 0.9114, "step": 333 }, { "epoch": 0.03, "grad_norm": 12.193951959889082, "learning_rate": 9.027027027027028e-06, "loss": 0.9356, "step": 334 }, { "epoch": 0.03, "grad_norm": 9.002218206553692, "learning_rate": 9.054054054054054e-06, "loss": 0.7767, "step": 335 }, { "epoch": 0.03, "grad_norm": 13.997837968068213, "learning_rate": 9.081081081081082e-06, "loss": 0.8262, "step": 336 }, { "epoch": 0.03, "grad_norm": 9.435639627514435, "learning_rate": 9.10810810810811e-06, "loss": 0.7897, "step": 337 }, { "epoch": 0.03, "grad_norm": 7.673515641143361, "learning_rate": 9.135135135135136e-06, "loss": 0.8418, "step": 338 }, { "epoch": 0.03, "grad_norm": 24.025442039162453, "learning_rate": 9.162162162162162e-06, "loss": 0.8225, "step": 339 }, { "epoch": 0.03, "grad_norm": 5.376223169399579, "learning_rate": 9.189189189189191e-06, "loss": 0.8157, "step": 340 }, { "epoch": 0.03, "grad_norm": 7.43802341579848, "learning_rate": 9.216216216216217e-06, "loss": 0.9067, "step": 341 }, { "epoch": 0.03, "grad_norm": 31.498113437103736, "learning_rate": 9.243243243243243e-06, "loss": 0.8863, "step": 342 }, { "epoch": 0.03, "grad_norm": 4.208395070776476, "learning_rate": 9.270270270270271e-06, "loss": 0.7518, "step": 343 }, { "epoch": 0.03, "grad_norm": 11.349808483251481, "learning_rate": 9.297297297297299e-06, "loss": 0.7932, "step": 344 }, { "epoch": 0.03, "grad_norm": 10.33455696368958, "learning_rate": 9.324324324324325e-06, "loss": 0.8835, "step": 345 }, { "epoch": 0.03, "grad_norm": 10.756883745264732, "learning_rate": 9.351351351351353e-06, "loss": 0.9388, "step": 346 }, { "epoch": 0.03, "grad_norm": 7.126584689121563, "learning_rate": 9.378378378378379e-06, "loss": 0.9549, "step": 347 }, { "epoch": 0.03, "grad_norm": 7.771750612149306, "learning_rate": 9.405405405405407e-06, "loss": 0.8982, "step": 348 }, { "epoch": 0.03, "grad_norm": 16.16719526827546, "learning_rate": 9.432432432432433e-06, "loss": 0.7405, "step": 349 }, { "epoch": 0.03, "grad_norm": 12.004866216631472, "learning_rate": 9.45945945945946e-06, "loss": 0.8116, "step": 350 }, { "epoch": 0.03, "grad_norm": 22.29157327442273, "learning_rate": 9.486486486486487e-06, "loss": 0.8311, "step": 351 }, { "epoch": 0.03, "grad_norm": 7.442624737259189, "learning_rate": 9.513513513513514e-06, "loss": 0.8761, "step": 352 }, { "epoch": 0.03, "grad_norm": 6.386628643473507, "learning_rate": 9.540540540540542e-06, "loss": 0.9021, "step": 353 }, { "epoch": 0.03, "grad_norm": 19.339016076084857, "learning_rate": 9.567567567567568e-06, "loss": 0.9448, "step": 354 }, { "epoch": 0.03, "grad_norm": 15.423321124464712, "learning_rate": 9.594594594594594e-06, "loss": 0.8387, "step": 355 }, { "epoch": 0.03, "grad_norm": 44.03518591651131, "learning_rate": 9.621621621621622e-06, "loss": 0.9625, "step": 356 }, { "epoch": 0.03, "grad_norm": 10.677430139662313, "learning_rate": 9.64864864864865e-06, "loss": 0.7932, "step": 357 }, { "epoch": 0.03, "grad_norm": 41.77893396768942, "learning_rate": 9.675675675675676e-06, "loss": 0.9091, "step": 358 }, { "epoch": 0.03, "grad_norm": 14.501349416940233, "learning_rate": 9.702702702702704e-06, "loss": 0.8284, "step": 359 }, { "epoch": 0.03, "grad_norm": 9.592105906329472, "learning_rate": 9.729729729729732e-06, "loss": 0.7937, "step": 360 }, { "epoch": 0.03, "grad_norm": 12.98542648937007, "learning_rate": 9.756756756756758e-06, "loss": 1.0166, "step": 361 }, { "epoch": 0.03, "grad_norm": 18.600162256938955, "learning_rate": 9.783783783783785e-06, "loss": 0.8389, "step": 362 }, { "epoch": 0.03, "grad_norm": 11.395296324329149, "learning_rate": 9.810810810810811e-06, "loss": 0.8358, "step": 363 }, { "epoch": 0.03, "grad_norm": 20.771734506081142, "learning_rate": 9.83783783783784e-06, "loss": 0.8873, "step": 364 }, { "epoch": 0.03, "grad_norm": 15.65993859593466, "learning_rate": 9.864864864864865e-06, "loss": 0.9912, "step": 365 }, { "epoch": 0.03, "grad_norm": 11.50319106780396, "learning_rate": 9.891891891891893e-06, "loss": 0.8361, "step": 366 }, { "epoch": 0.03, "grad_norm": 32.95975633319058, "learning_rate": 9.91891891891892e-06, "loss": 0.8634, "step": 367 }, { "epoch": 0.03, "grad_norm": 13.692061678208075, "learning_rate": 9.945945945945947e-06, "loss": 0.8092, "step": 368 }, { "epoch": 0.03, "grad_norm": 12.716953515371314, "learning_rate": 9.972972972972975e-06, "loss": 0.942, "step": 369 }, { "epoch": 0.03, "grad_norm": 82.65871288690137, "learning_rate": 1e-05, "loss": 0.8142, "step": 370 }, { "epoch": 0.03, "grad_norm": 14.984308807124416, "learning_rate": 9.99999982698426e-06, "loss": 0.7474, "step": 371 }, { "epoch": 0.03, "grad_norm": 19.07609430392833, "learning_rate": 9.999999307937047e-06, "loss": 0.8128, "step": 372 }, { "epoch": 0.03, "grad_norm": 9.479775709543809, "learning_rate": 9.9999984428584e-06, "loss": 0.7832, "step": 373 }, { "epoch": 0.03, "grad_norm": 18.387923540354905, "learning_rate": 9.99999723174838e-06, "loss": 0.7065, "step": 374 }, { "epoch": 0.03, "grad_norm": 18.984544852756734, "learning_rate": 9.999995674607067e-06, "loss": 1.03, "step": 375 }, { "epoch": 0.03, "grad_norm": 9.703472141415524, "learning_rate": 9.99999377143457e-06, "loss": 0.9147, "step": 376 }, { "epoch": 0.03, "grad_norm": 21.100188675244212, "learning_rate": 9.999991522231024e-06, "loss": 0.8972, "step": 377 }, { "epoch": 0.03, "grad_norm": 21.197882737234895, "learning_rate": 9.99998892699658e-06, "loss": 0.8198, "step": 378 }, { "epoch": 0.03, "grad_norm": 47.95622701048999, "learning_rate": 9.999985985731423e-06, "loss": 0.7839, "step": 379 }, { "epoch": 0.03, "grad_norm": 11.682867285214934, "learning_rate": 9.999982698435748e-06, "loss": 0.8002, "step": 380 }, { "epoch": 0.03, "grad_norm": 16.06406263555143, "learning_rate": 9.999979065109791e-06, "loss": 0.8017, "step": 381 }, { "epoch": 0.03, "grad_norm": 21.273465938072796, "learning_rate": 9.999975085753801e-06, "loss": 0.9241, "step": 382 }, { "epoch": 0.03, "grad_norm": 59.854815074167185, "learning_rate": 9.99997076036805e-06, "loss": 0.7414, "step": 383 }, { "epoch": 0.03, "grad_norm": 15.653054979719489, "learning_rate": 9.999966088952842e-06, "loss": 0.7665, "step": 384 }, { "epoch": 0.03, "grad_norm": 23.33735783957125, "learning_rate": 9.999961071508497e-06, "loss": 0.813, "step": 385 }, { "epoch": 0.03, "grad_norm": 38.95163376222572, "learning_rate": 9.999955708035365e-06, "loss": 0.9086, "step": 386 }, { "epoch": 0.03, "grad_norm": 18.9349861876493, "learning_rate": 9.999949998533815e-06, "loss": 0.8665, "step": 387 }, { "epoch": 0.03, "grad_norm": 18.711072587977014, "learning_rate": 9.999943943004242e-06, "loss": 0.9797, "step": 388 }, { "epoch": 0.03, "grad_norm": 21.361431170756777, "learning_rate": 9.999937541447067e-06, "loss": 0.9983, "step": 389 }, { "epoch": 0.03, "grad_norm": 15.582938979737273, "learning_rate": 9.999930793862732e-06, "loss": 0.9204, "step": 390 }, { "epoch": 0.03, "grad_norm": 27.94977648249872, "learning_rate": 9.999923700251704e-06, "loss": 0.9338, "step": 391 }, { "epoch": 0.03, "grad_norm": 14.347217431070954, "learning_rate": 9.999916260614471e-06, "loss": 0.7673, "step": 392 }, { "epoch": 0.03, "grad_norm": 25.11071957357206, "learning_rate": 9.999908474951554e-06, "loss": 0.9327, "step": 393 }, { "epoch": 0.03, "grad_norm": 52.04473252984409, "learning_rate": 9.999900343263487e-06, "loss": 0.7869, "step": 394 }, { "epoch": 0.03, "grad_norm": 19.819982323886766, "learning_rate": 9.999891865550835e-06, "loss": 0.7551, "step": 395 }, { "epoch": 0.03, "grad_norm": 23.45317773095086, "learning_rate": 9.999883041814184e-06, "loss": 1.009, "step": 396 }, { "epoch": 0.03, "grad_norm": 9.49863677886503, "learning_rate": 9.999873872054145e-06, "loss": 0.9752, "step": 397 }, { "epoch": 0.03, "grad_norm": 14.319079242729478, "learning_rate": 9.99986435627135e-06, "loss": 0.9145, "step": 398 }, { "epoch": 0.03, "grad_norm": 34.90754528895307, "learning_rate": 9.99985449446646e-06, "loss": 0.8761, "step": 399 }, { "epoch": 0.03, "grad_norm": 26.24096701095773, "learning_rate": 9.99984428664016e-06, "loss": 0.8054, "step": 400 }, { "epoch": 0.03, "grad_norm": 14.045655627785289, "learning_rate": 9.999833732793154e-06, "loss": 0.6386, "step": 401 }, { "epoch": 0.03, "grad_norm": 30.77148153147053, "learning_rate": 9.99982283292617e-06, "loss": 0.8872, "step": 402 }, { "epoch": 0.03, "grad_norm": 99.80768094405511, "learning_rate": 9.999811587039964e-06, "loss": 0.8507, "step": 403 }, { "epoch": 0.03, "grad_norm": 50.15541055042202, "learning_rate": 9.999799995135316e-06, "loss": 0.882, "step": 404 }, { "epoch": 0.03, "grad_norm": 9.799442026624805, "learning_rate": 9.999788057213026e-06, "loss": 0.9453, "step": 405 }, { "epoch": 0.03, "grad_norm": 8.310578500393895, "learning_rate": 9.999775773273922e-06, "loss": 0.9552, "step": 406 }, { "epoch": 0.03, "grad_norm": 27.55216918044366, "learning_rate": 9.999763143318853e-06, "loss": 0.9458, "step": 407 }, { "epoch": 0.03, "grad_norm": 15.658885945918538, "learning_rate": 9.999750167348694e-06, "loss": 0.709, "step": 408 }, { "epoch": 0.03, "grad_norm": 21.74888396097429, "learning_rate": 9.999736845364342e-06, "loss": 0.9854, "step": 409 }, { "epoch": 0.03, "grad_norm": 21.960870027184015, "learning_rate": 9.999723177366719e-06, "loss": 0.8204, "step": 410 }, { "epoch": 0.03, "grad_norm": 14.416621672095202, "learning_rate": 9.999709163356772e-06, "loss": 0.8273, "step": 411 }, { "epoch": 0.03, "grad_norm": 9.039269424969744, "learning_rate": 9.999694803335468e-06, "loss": 1.0574, "step": 412 }, { "epoch": 0.03, "grad_norm": 11.429652592891859, "learning_rate": 9.999680097303805e-06, "loss": 0.8025, "step": 413 }, { "epoch": 0.03, "grad_norm": 20.34028441419009, "learning_rate": 9.999665045262799e-06, "loss": 0.8172, "step": 414 }, { "epoch": 0.03, "grad_norm": 54.22029061670841, "learning_rate": 9.999649647213491e-06, "loss": 0.8617, "step": 415 }, { "epoch": 0.03, "grad_norm": 14.818505791880616, "learning_rate": 9.999633903156947e-06, "loss": 0.7233, "step": 416 }, { "epoch": 0.03, "grad_norm": 16.380377983332654, "learning_rate": 9.999617813094256e-06, "loss": 0.7646, "step": 417 }, { "epoch": 0.03, "grad_norm": 67.89249470883054, "learning_rate": 9.999601377026533e-06, "loss": 0.8145, "step": 418 }, { "epoch": 0.03, "grad_norm": 33.99404947542081, "learning_rate": 9.999584594954913e-06, "loss": 1.0665, "step": 419 }, { "epoch": 0.03, "grad_norm": 11.974198270070763, "learning_rate": 9.99956746688056e-06, "loss": 0.9434, "step": 420 }, { "epoch": 0.03, "grad_norm": 21.085216202470985, "learning_rate": 9.99954999280466e-06, "loss": 0.8267, "step": 421 }, { "epoch": 0.03, "grad_norm": 8.519989197695768, "learning_rate": 9.99953217272842e-06, "loss": 0.694, "step": 422 }, { "epoch": 0.03, "grad_norm": 72.0450639432991, "learning_rate": 9.99951400665307e-06, "loss": 0.8817, "step": 423 }, { "epoch": 0.03, "grad_norm": 15.686591475421789, "learning_rate": 9.999495494579876e-06, "loss": 0.9289, "step": 424 }, { "epoch": 0.03, "grad_norm": 15.321139988054547, "learning_rate": 9.999476636510112e-06, "loss": 0.8548, "step": 425 }, { "epoch": 0.03, "grad_norm": 73.31438876403607, "learning_rate": 9.999457432445087e-06, "loss": 0.8143, "step": 426 }, { "epoch": 0.03, "grad_norm": 20.009931132840833, "learning_rate": 9.999437882386128e-06, "loss": 0.9922, "step": 427 }, { "epoch": 0.03, "grad_norm": 16.554001506590943, "learning_rate": 9.999417986334587e-06, "loss": 0.9504, "step": 428 }, { "epoch": 0.03, "grad_norm": 11.152764050819433, "learning_rate": 9.999397744291845e-06, "loss": 0.8524, "step": 429 }, { "epoch": 0.03, "grad_norm": 73.60849741763013, "learning_rate": 9.999377156259298e-06, "loss": 1.0088, "step": 430 }, { "epoch": 0.04, "grad_norm": 12.460274613470789, "learning_rate": 9.999356222238375e-06, "loss": 0.7579, "step": 431 }, { "epoch": 0.04, "grad_norm": 9.876954911654076, "learning_rate": 9.99933494223052e-06, "loss": 0.8904, "step": 432 }, { "epoch": 0.04, "grad_norm": 11.250303463849706, "learning_rate": 9.999313316237211e-06, "loss": 0.7619, "step": 433 }, { "epoch": 0.04, "grad_norm": 17.229309353493207, "learning_rate": 9.999291344259943e-06, "loss": 0.6296, "step": 434 }, { "epoch": 0.04, "grad_norm": 27.16986762458146, "learning_rate": 9.999269026300234e-06, "loss": 0.8607, "step": 435 }, { "epoch": 0.04, "grad_norm": 18.356589295904822, "learning_rate": 9.999246362359631e-06, "loss": 0.8597, "step": 436 }, { "epoch": 0.04, "grad_norm": 12.183166560627248, "learning_rate": 9.999223352439701e-06, "loss": 0.8629, "step": 437 }, { "epoch": 0.04, "grad_norm": 8.995941766241017, "learning_rate": 9.999199996542038e-06, "loss": 0.936, "step": 438 }, { "epoch": 0.04, "grad_norm": 70.1160321604293, "learning_rate": 9.999176294668258e-06, "loss": 0.8156, "step": 439 }, { "epoch": 0.04, "grad_norm": 8.437858254059023, "learning_rate": 9.999152246820001e-06, "loss": 0.8791, "step": 440 }, { "epoch": 0.04, "grad_norm": 13.629524415961784, "learning_rate": 9.999127852998932e-06, "loss": 0.9075, "step": 441 }, { "epoch": 0.04, "grad_norm": 11.658206569666648, "learning_rate": 9.999103113206736e-06, "loss": 0.9753, "step": 442 }, { "epoch": 0.04, "grad_norm": 6.233027530226944, "learning_rate": 9.99907802744513e-06, "loss": 0.8657, "step": 443 }, { "epoch": 0.04, "grad_norm": 12.489952303435613, "learning_rate": 9.999052595715845e-06, "loss": 0.8385, "step": 444 }, { "epoch": 0.04, "grad_norm": 8.85308572898137, "learning_rate": 9.999026818020647e-06, "loss": 0.7528, "step": 445 }, { "epoch": 0.04, "grad_norm": 14.363125086991815, "learning_rate": 9.999000694361315e-06, "loss": 0.8781, "step": 446 }, { "epoch": 0.04, "grad_norm": 7.8147922724491155, "learning_rate": 9.99897422473966e-06, "loss": 0.8988, "step": 447 }, { "epoch": 0.04, "grad_norm": 9.759788144245777, "learning_rate": 9.99894740915751e-06, "loss": 0.8792, "step": 448 }, { "epoch": 0.04, "grad_norm": 10.930411502289326, "learning_rate": 9.998920247616724e-06, "loss": 0.7594, "step": 449 }, { "epoch": 0.04, "grad_norm": 15.948737319156722, "learning_rate": 9.998892740119183e-06, "loss": 0.7225, "step": 450 }, { "epoch": 0.04, "grad_norm": 8.245018743731332, "learning_rate": 9.998864886666788e-06, "loss": 0.8376, "step": 451 }, { "epoch": 0.04, "grad_norm": 8.927031973432706, "learning_rate": 9.998836687261466e-06, "loss": 0.917, "step": 452 }, { "epoch": 0.04, "grad_norm": 68.44457516666587, "learning_rate": 9.998808141905171e-06, "loss": 0.6569, "step": 453 }, { "epoch": 0.04, "grad_norm": 11.778712971332425, "learning_rate": 9.998779250599877e-06, "loss": 0.6889, "step": 454 }, { "epoch": 0.04, "grad_norm": 10.145913860704326, "learning_rate": 9.998750013347584e-06, "loss": 0.8116, "step": 455 }, { "epoch": 0.04, "grad_norm": 14.055120216565927, "learning_rate": 9.998720430150316e-06, "loss": 0.9681, "step": 456 }, { "epoch": 0.04, "grad_norm": 12.036685065237625, "learning_rate": 9.99869050101012e-06, "loss": 0.7294, "step": 457 }, { "epoch": 0.04, "grad_norm": 8.070374183474446, "learning_rate": 9.998660225929066e-06, "loss": 0.7193, "step": 458 }, { "epoch": 0.04, "grad_norm": 15.193301442760763, "learning_rate": 9.99862960490925e-06, "loss": 0.8729, "step": 459 }, { "epoch": 0.04, "grad_norm": 24.32121791679853, "learning_rate": 9.998598637952792e-06, "loss": 0.8121, "step": 460 }, { "epoch": 0.04, "grad_norm": 13.715253203453173, "learning_rate": 9.998567325061834e-06, "loss": 0.8142, "step": 461 }, { "epoch": 0.04, "grad_norm": 10.391314046194852, "learning_rate": 9.998535666238545e-06, "loss": 0.847, "step": 462 }, { "epoch": 0.04, "grad_norm": 4.817008131711015, "learning_rate": 9.998503661485112e-06, "loss": 0.8997, "step": 463 }, { "epoch": 0.04, "grad_norm": 8.98268603874573, "learning_rate": 9.998471310803754e-06, "loss": 0.9557, "step": 464 }, { "epoch": 0.04, "grad_norm": 10.22966883613815, "learning_rate": 9.998438614196709e-06, "loss": 0.8092, "step": 465 }, { "epoch": 0.04, "grad_norm": 5.71949311627198, "learning_rate": 9.998405571666237e-06, "loss": 0.79, "step": 466 }, { "epoch": 0.04, "grad_norm": 8.299204246951271, "learning_rate": 9.998372183214628e-06, "loss": 0.9451, "step": 467 }, { "epoch": 0.04, "grad_norm": 6.352553299280233, "learning_rate": 9.998338448844193e-06, "loss": 0.8229, "step": 468 }, { "epoch": 0.04, "grad_norm": 12.701480493487278, "learning_rate": 9.998304368557264e-06, "loss": 0.8424, "step": 469 }, { "epoch": 0.04, "grad_norm": 29.9010218910551, "learning_rate": 9.9982699423562e-06, "loss": 0.6158, "step": 470 }, { "epoch": 0.04, "grad_norm": 18.898379582824525, "learning_rate": 9.998235170243384e-06, "loss": 0.8722, "step": 471 }, { "epoch": 0.04, "grad_norm": 7.534229213257622, "learning_rate": 9.998200052221225e-06, "loss": 0.6099, "step": 472 }, { "epoch": 0.04, "grad_norm": 5.617559645478937, "learning_rate": 9.99816458829215e-06, "loss": 0.8279, "step": 473 }, { "epoch": 0.04, "grad_norm": 11.528433696022832, "learning_rate": 9.998128778458613e-06, "loss": 0.7423, "step": 474 }, { "epoch": 0.04, "grad_norm": 19.452763832906626, "learning_rate": 9.998092622723095e-06, "loss": 0.8853, "step": 475 }, { "epoch": 0.04, "grad_norm": 11.607687984152511, "learning_rate": 9.998056121088098e-06, "loss": 0.894, "step": 476 }, { "epoch": 0.04, "grad_norm": 11.798486981877323, "learning_rate": 9.998019273556145e-06, "loss": 0.7839, "step": 477 }, { "epoch": 0.04, "grad_norm": 10.129697512795723, "learning_rate": 9.997982080129788e-06, "loss": 0.8359, "step": 478 }, { "epoch": 0.04, "grad_norm": 16.619690469837995, "learning_rate": 9.997944540811604e-06, "loss": 0.8599, "step": 479 }, { "epoch": 0.04, "grad_norm": 12.384986377355515, "learning_rate": 9.997906655604187e-06, "loss": 0.8412, "step": 480 }, { "epoch": 0.04, "grad_norm": 4.446378744474751, "learning_rate": 9.997868424510157e-06, "loss": 0.7223, "step": 481 }, { "epoch": 0.04, "grad_norm": 13.531849510333954, "learning_rate": 9.997829847532165e-06, "loss": 0.7298, "step": 482 }, { "epoch": 0.04, "grad_norm": 6.389313888303728, "learning_rate": 9.99779092467288e-06, "loss": 0.8489, "step": 483 }, { "epoch": 0.04, "grad_norm": 26.70632484403452, "learning_rate": 9.997751655934993e-06, "loss": 0.7851, "step": 484 }, { "epoch": 0.04, "grad_norm": 9.439671918202823, "learning_rate": 9.997712041321224e-06, "loss": 0.7441, "step": 485 }, { "epoch": 0.04, "grad_norm": 6.037847172015665, "learning_rate": 9.997672080834312e-06, "loss": 0.8028, "step": 486 }, { "epoch": 0.04, "grad_norm": 9.319705177930302, "learning_rate": 9.997631774477025e-06, "loss": 0.7406, "step": 487 }, { "epoch": 0.04, "grad_norm": 8.780172227792313, "learning_rate": 9.997591122252151e-06, "loss": 0.6629, "step": 488 }, { "epoch": 0.04, "grad_norm": 12.765849491442697, "learning_rate": 9.997550124162505e-06, "loss": 0.8551, "step": 489 }, { "epoch": 0.04, "grad_norm": 42.76686905336114, "learning_rate": 9.99750878021092e-06, "loss": 0.8103, "step": 490 }, { "epoch": 0.04, "grad_norm": 8.473647422158049, "learning_rate": 9.997467090400264e-06, "loss": 0.5554, "step": 491 }, { "epoch": 0.04, "grad_norm": 8.012366043371337, "learning_rate": 9.997425054733418e-06, "loss": 0.8308, "step": 492 }, { "epoch": 0.04, "grad_norm": 7.661421852178396, "learning_rate": 9.997382673213292e-06, "loss": 0.8219, "step": 493 }, { "epoch": 0.04, "grad_norm": 19.47089309477259, "learning_rate": 9.997339945842817e-06, "loss": 0.8561, "step": 494 }, { "epoch": 0.04, "grad_norm": 41.049332291403395, "learning_rate": 9.997296872624952e-06, "loss": 0.8416, "step": 495 }, { "epoch": 0.04, "grad_norm": 5.878836183985898, "learning_rate": 9.99725345356268e-06, "loss": 0.6601, "step": 496 }, { "epoch": 0.04, "grad_norm": 6.394440800021863, "learning_rate": 9.997209688659004e-06, "loss": 0.8918, "step": 497 }, { "epoch": 0.04, "grad_norm": 6.972600615103139, "learning_rate": 9.99716557791695e-06, "loss": 0.8948, "step": 498 }, { "epoch": 0.04, "grad_norm": 6.592321556402771, "learning_rate": 9.997121121339574e-06, "loss": 0.7166, "step": 499 }, { "epoch": 0.04, "grad_norm": 8.772451028326715, "learning_rate": 9.997076318929952e-06, "loss": 0.6199, "step": 500 }, { "epoch": 0.04, "grad_norm": 14.866626166936214, "learning_rate": 9.997031170691185e-06, "loss": 0.8597, "step": 501 }, { "epoch": 0.04, "grad_norm": 12.672064495051528, "learning_rate": 9.996985676626398e-06, "loss": 0.7912, "step": 502 }, { "epoch": 0.04, "grad_norm": 7.608075763625257, "learning_rate": 9.996939836738736e-06, "loss": 0.8034, "step": 503 }, { "epoch": 0.04, "grad_norm": 15.512777950005521, "learning_rate": 9.996893651031377e-06, "loss": 0.8629, "step": 504 }, { "epoch": 0.04, "grad_norm": 9.90129001736726, "learning_rate": 9.996847119507513e-06, "loss": 0.879, "step": 505 }, { "epoch": 0.04, "grad_norm": 12.11366108997228, "learning_rate": 9.996800242170366e-06, "loss": 0.8153, "step": 506 }, { "epoch": 0.04, "grad_norm": 13.447484190971544, "learning_rate": 9.996753019023178e-06, "loss": 0.8408, "step": 507 }, { "epoch": 0.04, "grad_norm": 7.109671040543963, "learning_rate": 9.996705450069219e-06, "loss": 1.0441, "step": 508 }, { "epoch": 0.04, "grad_norm": 10.140216221545709, "learning_rate": 9.996657535311783e-06, "loss": 0.6856, "step": 509 }, { "epoch": 0.04, "grad_norm": 10.469463780075216, "learning_rate": 9.996609274754183e-06, "loss": 0.7259, "step": 510 }, { "epoch": 0.04, "grad_norm": 7.96189644553197, "learning_rate": 9.99656066839976e-06, "loss": 0.9266, "step": 511 }, { "epoch": 0.04, "grad_norm": 13.949353734455318, "learning_rate": 9.996511716251878e-06, "loss": 0.8243, "step": 512 }, { "epoch": 0.04, "grad_norm": 7.637588078075644, "learning_rate": 9.996462418313925e-06, "loss": 0.9055, "step": 513 }, { "epoch": 0.04, "grad_norm": 17.414766143190207, "learning_rate": 9.996412774589312e-06, "loss": 0.8257, "step": 514 }, { "epoch": 0.04, "grad_norm": 105.3535998268008, "learning_rate": 9.996362785081475e-06, "loss": 0.8769, "step": 515 }, { "epoch": 0.04, "grad_norm": 7.399070067689621, "learning_rate": 9.996312449793872e-06, "loss": 0.7435, "step": 516 }, { "epoch": 0.04, "grad_norm": 10.605966365232831, "learning_rate": 9.99626176872999e-06, "loss": 0.9566, "step": 517 }, { "epoch": 0.04, "grad_norm": 11.739482851618803, "learning_rate": 9.996210741893334e-06, "loss": 0.8259, "step": 518 }, { "epoch": 0.04, "grad_norm": 26.58860849380813, "learning_rate": 9.996159369287436e-06, "loss": 0.8887, "step": 519 }, { "epoch": 0.04, "grad_norm": 28.865443231952558, "learning_rate": 9.996107650915851e-06, "loss": 0.8697, "step": 520 }, { "epoch": 0.04, "grad_norm": 13.273543874253734, "learning_rate": 9.996055586782158e-06, "loss": 0.8519, "step": 521 }, { "epoch": 0.04, "grad_norm": 11.655026322563018, "learning_rate": 9.996003176889962e-06, "loss": 0.7761, "step": 522 }, { "epoch": 0.04, "grad_norm": 12.34750613332825, "learning_rate": 9.995950421242887e-06, "loss": 0.8807, "step": 523 }, { "epoch": 0.04, "grad_norm": 6.820187379622272, "learning_rate": 9.995897319844588e-06, "loss": 0.7704, "step": 524 }, { "epoch": 0.04, "grad_norm": 9.56767507898871, "learning_rate": 9.995843872698734e-06, "loss": 0.6508, "step": 525 }, { "epoch": 0.04, "grad_norm": 13.342475881792621, "learning_rate": 9.995790079809031e-06, "loss": 0.621, "step": 526 }, { "epoch": 0.04, "grad_norm": 44.066648368484785, "learning_rate": 9.995735941179198e-06, "loss": 0.7811, "step": 527 }, { "epoch": 0.04, "grad_norm": 8.378457307459202, "learning_rate": 9.995681456812981e-06, "loss": 0.7337, "step": 528 }, { "epoch": 0.04, "grad_norm": 11.631531361554062, "learning_rate": 9.995626626714152e-06, "loss": 0.8762, "step": 529 }, { "epoch": 0.04, "grad_norm": 13.86011073227737, "learning_rate": 9.995571450886506e-06, "loss": 0.7705, "step": 530 }, { "epoch": 0.04, "grad_norm": 28.580487567109287, "learning_rate": 9.99551592933386e-06, "loss": 0.7632, "step": 531 }, { "epoch": 0.04, "grad_norm": 43.33520935228094, "learning_rate": 9.995460062060058e-06, "loss": 0.9407, "step": 532 }, { "epoch": 0.04, "grad_norm": 11.909652801913152, "learning_rate": 9.995403849068965e-06, "loss": 0.8536, "step": 533 }, { "epoch": 0.04, "grad_norm": 9.402057446232243, "learning_rate": 9.995347290364472e-06, "loss": 0.8637, "step": 534 }, { "epoch": 0.04, "grad_norm": 7.89031775768683, "learning_rate": 9.995290385950493e-06, "loss": 0.9203, "step": 535 }, { "epoch": 0.04, "grad_norm": 8.271432064165714, "learning_rate": 9.995233135830968e-06, "loss": 0.9156, "step": 536 }, { "epoch": 0.04, "grad_norm": 9.093368906953206, "learning_rate": 9.995175540009855e-06, "loss": 0.9131, "step": 537 }, { "epoch": 0.04, "grad_norm": 9.66103466481114, "learning_rate": 9.995117598491146e-06, "loss": 0.8857, "step": 538 }, { "epoch": 0.04, "grad_norm": 6.1237346833238115, "learning_rate": 9.995059311278845e-06, "loss": 0.826, "step": 539 }, { "epoch": 0.04, "grad_norm": 5.634030117867691, "learning_rate": 9.995000678376987e-06, "loss": 1.0652, "step": 540 }, { "epoch": 0.04, "grad_norm": 24.541992102496202, "learning_rate": 9.994941699789632e-06, "loss": 0.8759, "step": 541 }, { "epoch": 0.04, "grad_norm": 7.385627874316691, "learning_rate": 9.994882375520862e-06, "loss": 0.7202, "step": 542 }, { "epoch": 0.04, "grad_norm": 9.62743297829404, "learning_rate": 9.99482270557478e-06, "loss": 0.8987, "step": 543 }, { "epoch": 0.04, "grad_norm": 7.415142458764166, "learning_rate": 9.994762689955518e-06, "loss": 0.8352, "step": 544 }, { "epoch": 0.04, "grad_norm": 13.443510555984039, "learning_rate": 9.994702328667225e-06, "loss": 0.7744, "step": 545 }, { "epoch": 0.04, "grad_norm": 9.037861971705254, "learning_rate": 9.994641621714085e-06, "loss": 0.8173, "step": 546 }, { "epoch": 0.04, "grad_norm": 11.120780780804768, "learning_rate": 9.994580569100295e-06, "loss": 0.7303, "step": 547 }, { "epoch": 0.04, "grad_norm": 16.32836065555792, "learning_rate": 9.99451917083008e-06, "loss": 0.8896, "step": 548 }, { "epoch": 0.04, "grad_norm": 19.86637565436871, "learning_rate": 9.994457426907692e-06, "loss": 0.8949, "step": 549 }, { "epoch": 0.04, "grad_norm": 8.047045756352302, "learning_rate": 9.9943953373374e-06, "loss": 0.846, "step": 550 }, { "epoch": 0.04, "grad_norm": 8.179799801211328, "learning_rate": 9.994332902123505e-06, "loss": 0.5653, "step": 551 }, { "epoch": 0.04, "grad_norm": 10.708603057981128, "learning_rate": 9.994270121270327e-06, "loss": 0.8086, "step": 552 }, { "epoch": 0.04, "grad_norm": 8.102467932129697, "learning_rate": 9.994206994782207e-06, "loss": 0.8118, "step": 553 }, { "epoch": 0.04, "grad_norm": 38.096051297213926, "learning_rate": 9.994143522663519e-06, "loss": 0.8278, "step": 554 }, { "epoch": 0.05, "grad_norm": 21.453507615082344, "learning_rate": 9.994079704918654e-06, "loss": 0.821, "step": 555 }, { "epoch": 0.05, "grad_norm": 15.606877303262893, "learning_rate": 9.994015541552028e-06, "loss": 0.8356, "step": 556 }, { "epoch": 0.05, "grad_norm": 7.952697114146562, "learning_rate": 9.993951032568082e-06, "loss": 0.9863, "step": 557 }, { "epoch": 0.05, "grad_norm": 15.638502879234304, "learning_rate": 9.993886177971278e-06, "loss": 0.8728, "step": 558 }, { "epoch": 0.05, "grad_norm": 9.508113486056013, "learning_rate": 9.993820977766108e-06, "loss": 0.6744, "step": 559 }, { "epoch": 0.05, "grad_norm": 15.021278085558164, "learning_rate": 9.993755431957082e-06, "loss": 0.7812, "step": 560 }, { "epoch": 0.05, "grad_norm": 5.6549632416044, "learning_rate": 9.993689540548736e-06, "loss": 0.8249, "step": 561 }, { "epoch": 0.05, "grad_norm": 7.85201852254656, "learning_rate": 9.993623303545632e-06, "loss": 0.7181, "step": 562 }, { "epoch": 0.05, "grad_norm": 6.582155391119535, "learning_rate": 9.993556720952354e-06, "loss": 0.9711, "step": 563 }, { "epoch": 0.05, "grad_norm": 15.396503116993552, "learning_rate": 9.993489792773507e-06, "loss": 0.8035, "step": 564 }, { "epoch": 0.05, "grad_norm": 7.7702839069916205, "learning_rate": 9.993422519013726e-06, "loss": 0.7612, "step": 565 }, { "epoch": 0.05, "grad_norm": 18.015171235239254, "learning_rate": 9.993354899677665e-06, "loss": 0.8618, "step": 566 }, { "epoch": 0.05, "grad_norm": 18.01915151771511, "learning_rate": 9.993286934770004e-06, "loss": 0.8659, "step": 567 }, { "epoch": 0.05, "grad_norm": 7.346995314339446, "learning_rate": 9.993218624295446e-06, "loss": 0.8125, "step": 568 }, { "epoch": 0.05, "grad_norm": 5.706666932661295, "learning_rate": 9.99314996825872e-06, "loss": 0.8467, "step": 569 }, { "epoch": 0.05, "grad_norm": 9.44517150285393, "learning_rate": 9.993080966664579e-06, "loss": 0.8866, "step": 570 }, { "epoch": 0.05, "grad_norm": 9.70613542007537, "learning_rate": 9.993011619517793e-06, "loss": 0.9496, "step": 571 }, { "epoch": 0.05, "grad_norm": 8.317169078764937, "learning_rate": 9.992941926823166e-06, "loss": 0.7998, "step": 572 }, { "epoch": 0.05, "grad_norm": 9.107898931077647, "learning_rate": 9.992871888585518e-06, "loss": 0.7518, "step": 573 }, { "epoch": 0.05, "grad_norm": 52.801750389259034, "learning_rate": 9.992801504809698e-06, "loss": 0.8034, "step": 574 }, { "epoch": 0.05, "grad_norm": 46.3118984132503, "learning_rate": 9.992730775500578e-06, "loss": 0.8421, "step": 575 }, { "epoch": 0.05, "grad_norm": 8.819460983792244, "learning_rate": 9.99265970066305e-06, "loss": 0.7137, "step": 576 }, { "epoch": 0.05, "grad_norm": 8.52295056695198, "learning_rate": 9.992588280302034e-06, "loss": 0.9252, "step": 577 }, { "epoch": 0.05, "grad_norm": 9.29962370533259, "learning_rate": 9.992516514422474e-06, "loss": 0.7638, "step": 578 }, { "epoch": 0.05, "grad_norm": 9.114961360948504, "learning_rate": 9.992444403029335e-06, "loss": 0.7391, "step": 579 }, { "epoch": 0.05, "grad_norm": 18.57087743130155, "learning_rate": 9.99237194612761e-06, "loss": 0.7999, "step": 580 }, { "epoch": 0.05, "grad_norm": 15.113295270410948, "learning_rate": 9.99229914372231e-06, "loss": 0.7913, "step": 581 }, { "epoch": 0.05, "grad_norm": 8.48388464750635, "learning_rate": 9.992225995818476e-06, "loss": 0.7446, "step": 582 }, { "epoch": 0.05, "grad_norm": 9.584669962239762, "learning_rate": 9.99215250242117e-06, "loss": 0.7364, "step": 583 }, { "epoch": 0.05, "grad_norm": 5.79968652253179, "learning_rate": 9.992078663535475e-06, "loss": 0.7238, "step": 584 }, { "epoch": 0.05, "grad_norm": 7.113171104738922, "learning_rate": 9.992004479166507e-06, "loss": 0.7656, "step": 585 }, { "epoch": 0.05, "grad_norm": 7.847450193474083, "learning_rate": 9.991929949319397e-06, "loss": 0.8573, "step": 586 }, { "epoch": 0.05, "grad_norm": 8.359203786968319, "learning_rate": 9.991855073999299e-06, "loss": 0.8307, "step": 587 }, { "epoch": 0.05, "grad_norm": 5.441631950180476, "learning_rate": 9.991779853211401e-06, "loss": 0.8836, "step": 588 }, { "epoch": 0.05, "grad_norm": 23.24922130707377, "learning_rate": 9.991704286960906e-06, "loss": 0.8915, "step": 589 }, { "epoch": 0.05, "grad_norm": 8.206025850462575, "learning_rate": 9.991628375253044e-06, "loss": 0.9392, "step": 590 }, { "epoch": 0.05, "grad_norm": 8.145059409157405, "learning_rate": 9.991552118093069e-06, "loss": 0.6938, "step": 591 }, { "epoch": 0.05, "grad_norm": 10.315105346460362, "learning_rate": 9.991475515486258e-06, "loss": 0.7187, "step": 592 }, { "epoch": 0.05, "grad_norm": 5.962641471520498, "learning_rate": 9.99139856743791e-06, "loss": 0.7906, "step": 593 }, { "epoch": 0.05, "grad_norm": 7.458265783871584, "learning_rate": 9.991321273953357e-06, "loss": 0.9382, "step": 594 }, { "epoch": 0.05, "grad_norm": 7.945797071600797, "learning_rate": 9.991243635037942e-06, "loss": 0.7283, "step": 595 }, { "epoch": 0.05, "grad_norm": 6.866928978058393, "learning_rate": 9.991165650697039e-06, "loss": 0.9184, "step": 596 }, { "epoch": 0.05, "grad_norm": 6.511885796757297, "learning_rate": 9.991087320936046e-06, "loss": 0.8652, "step": 597 }, { "epoch": 0.05, "grad_norm": 9.231785413957425, "learning_rate": 9.991008645760385e-06, "loss": 0.9465, "step": 598 }, { "epoch": 0.05, "grad_norm": 9.372565656750247, "learning_rate": 9.990929625175498e-06, "loss": 0.7801, "step": 599 }, { "epoch": 0.05, "grad_norm": 8.094511258311677, "learning_rate": 9.990850259186857e-06, "loss": 0.6775, "step": 600 }, { "epoch": 0.05, "grad_norm": 10.555078383463702, "learning_rate": 9.990770547799953e-06, "loss": 0.592, "step": 601 }, { "epoch": 0.05, "grad_norm": 18.95371020735239, "learning_rate": 9.990690491020304e-06, "loss": 0.882, "step": 602 }, { "epoch": 0.05, "grad_norm": 58.73856983645397, "learning_rate": 9.990610088853446e-06, "loss": 0.898, "step": 603 }, { "epoch": 0.05, "grad_norm": 8.214048073862799, "learning_rate": 9.990529341304946e-06, "loss": 0.7934, "step": 604 }, { "epoch": 0.05, "grad_norm": 7.511290764183826, "learning_rate": 9.990448248380396e-06, "loss": 0.6018, "step": 605 }, { "epoch": 0.05, "grad_norm": 64.51133102591518, "learning_rate": 9.990366810085403e-06, "loss": 0.7759, "step": 606 }, { "epoch": 0.05, "grad_norm": 71.88669653549478, "learning_rate": 9.990285026425604e-06, "loss": 0.8268, "step": 607 }, { "epoch": 0.05, "grad_norm": 13.866712918824344, "learning_rate": 9.99020289740666e-06, "loss": 0.8188, "step": 608 }, { "epoch": 0.05, "grad_norm": 11.501494526769124, "learning_rate": 9.990120423034257e-06, "loss": 0.9362, "step": 609 }, { "epoch": 0.05, "grad_norm": 6.885597538297738, "learning_rate": 9.990037603314098e-06, "loss": 0.7901, "step": 610 }, { "epoch": 0.05, "grad_norm": 13.85336232473651, "learning_rate": 9.989954438251916e-06, "loss": 0.7303, "step": 611 }, { "epoch": 0.05, "grad_norm": 20.060591287148707, "learning_rate": 9.98987092785347e-06, "loss": 0.9798, "step": 612 }, { "epoch": 0.05, "grad_norm": 30.633730971156545, "learning_rate": 9.989787072124535e-06, "loss": 0.7626, "step": 613 }, { "epoch": 0.05, "grad_norm": 8.793244591626761, "learning_rate": 9.989702871070918e-06, "loss": 0.7987, "step": 614 }, { "epoch": 0.05, "grad_norm": 9.875060327355442, "learning_rate": 9.989618324698445e-06, "loss": 0.682, "step": 615 }, { "epoch": 0.05, "grad_norm": 72.97580919120949, "learning_rate": 9.989533433012965e-06, "loss": 0.9387, "step": 616 }, { "epoch": 0.05, "grad_norm": 14.408292072146214, "learning_rate": 9.989448196020355e-06, "loss": 0.9296, "step": 617 }, { "epoch": 0.05, "grad_norm": 7.130777903033677, "learning_rate": 9.989362613726515e-06, "loss": 0.6926, "step": 618 }, { "epoch": 0.05, "grad_norm": 15.925510702672547, "learning_rate": 9.989276686137364e-06, "loss": 0.7189, "step": 619 }, { "epoch": 0.05, "grad_norm": 11.511840518712551, "learning_rate": 9.989190413258854e-06, "loss": 0.8567, "step": 620 }, { "epoch": 0.05, "grad_norm": 9.17592349998759, "learning_rate": 9.98910379509695e-06, "loss": 0.8127, "step": 621 }, { "epoch": 0.05, "grad_norm": 5.84215044694441, "learning_rate": 9.989016831657652e-06, "loss": 0.8363, "step": 622 }, { "epoch": 0.05, "grad_norm": 5.944119202717555, "learning_rate": 9.988929522946976e-06, "loss": 0.7899, "step": 623 }, { "epoch": 0.05, "grad_norm": 19.26785296376063, "learning_rate": 9.988841868970962e-06, "loss": 0.8757, "step": 624 }, { "epoch": 0.05, "grad_norm": 10.636390260512607, "learning_rate": 9.98875386973568e-06, "loss": 0.7518, "step": 625 }, { "epoch": 0.05, "grad_norm": 21.91253000806537, "learning_rate": 9.988665525247217e-06, "loss": 0.7686, "step": 626 }, { "epoch": 0.05, "grad_norm": 7.139432620257901, "learning_rate": 9.988576835511687e-06, "loss": 0.7362, "step": 627 }, { "epoch": 0.05, "grad_norm": 9.27924269716798, "learning_rate": 9.988487800535233e-06, "loss": 0.8678, "step": 628 }, { "epoch": 0.05, "grad_norm": 9.24001209333957, "learning_rate": 9.98839842032401e-06, "loss": 0.9575, "step": 629 }, { "epoch": 0.05, "grad_norm": 7.188691399855119, "learning_rate": 9.98830869488421e-06, "loss": 0.6572, "step": 630 }, { "epoch": 0.05, "grad_norm": 11.12289938087904, "learning_rate": 9.988218624222036e-06, "loss": 0.7697, "step": 631 }, { "epoch": 0.05, "grad_norm": 9.330598690482764, "learning_rate": 9.988128208343727e-06, "loss": 0.8043, "step": 632 }, { "epoch": 0.05, "grad_norm": 45.96512746314069, "learning_rate": 9.988037447255537e-06, "loss": 0.7814, "step": 633 }, { "epoch": 0.05, "grad_norm": 22.111735762299233, "learning_rate": 9.987946340963749e-06, "loss": 0.7844, "step": 634 }, { "epoch": 0.05, "grad_norm": 10.93177817508431, "learning_rate": 9.987854889474667e-06, "loss": 0.8582, "step": 635 }, { "epoch": 0.05, "grad_norm": 11.006109742208354, "learning_rate": 9.987763092794621e-06, "loss": 0.7843, "step": 636 }, { "epoch": 0.05, "grad_norm": 12.534174103061337, "learning_rate": 9.987670950929963e-06, "loss": 0.8191, "step": 637 }, { "epoch": 0.05, "grad_norm": 29.15393915044455, "learning_rate": 9.98757846388707e-06, "loss": 0.7001, "step": 638 }, { "epoch": 0.05, "grad_norm": 15.198062054677523, "learning_rate": 9.987485631672345e-06, "loss": 0.8103, "step": 639 }, { "epoch": 0.05, "grad_norm": 23.488705504357615, "learning_rate": 9.987392454292208e-06, "loss": 0.7999, "step": 640 }, { "epoch": 0.05, "grad_norm": 10.33662065802245, "learning_rate": 9.987298931753111e-06, "loss": 0.778, "step": 641 }, { "epoch": 0.05, "grad_norm": 6.4768738924870535, "learning_rate": 9.987205064061526e-06, "loss": 1.093, "step": 642 }, { "epoch": 0.05, "grad_norm": 17.003363279686255, "learning_rate": 9.987110851223946e-06, "loss": 0.733, "step": 643 }, { "epoch": 0.05, "grad_norm": 11.123138576352284, "learning_rate": 9.987016293246896e-06, "loss": 0.9307, "step": 644 }, { "epoch": 0.05, "grad_norm": 5.779564996135507, "learning_rate": 9.986921390136916e-06, "loss": 0.9614, "step": 645 }, { "epoch": 0.05, "grad_norm": 6.224334648193334, "learning_rate": 9.986826141900577e-06, "loss": 0.8988, "step": 646 }, { "epoch": 0.05, "grad_norm": 24.475577569836908, "learning_rate": 9.986730548544468e-06, "loss": 0.724, "step": 647 }, { "epoch": 0.05, "grad_norm": 7.280460449672077, "learning_rate": 9.986634610075207e-06, "loss": 0.796, "step": 648 }, { "epoch": 0.05, "grad_norm": 7.542748537533427, "learning_rate": 9.986538326499433e-06, "loss": 0.7362, "step": 649 }, { "epoch": 0.05, "grad_norm": 11.064940928844694, "learning_rate": 9.986441697823808e-06, "loss": 0.9358, "step": 650 }, { "epoch": 0.05, "grad_norm": 116.55399093608999, "learning_rate": 9.986344724055022e-06, "loss": 0.7826, "step": 651 }, { "epoch": 0.05, "grad_norm": 6.78303370496228, "learning_rate": 9.986247405199782e-06, "loss": 0.6952, "step": 652 }, { "epoch": 0.05, "grad_norm": 17.642355639075667, "learning_rate": 9.986149741264827e-06, "loss": 0.7374, "step": 653 }, { "epoch": 0.05, "grad_norm": 10.017056289977392, "learning_rate": 9.986051732256913e-06, "loss": 0.9157, "step": 654 }, { "epoch": 0.05, "grad_norm": 10.064148719695446, "learning_rate": 9.985953378182827e-06, "loss": 0.8006, "step": 655 }, { "epoch": 0.05, "grad_norm": 5.47457956272967, "learning_rate": 9.985854679049371e-06, "loss": 0.7697, "step": 656 }, { "epoch": 0.05, "grad_norm": 21.651631018896396, "learning_rate": 9.985755634863378e-06, "loss": 0.8534, "step": 657 }, { "epoch": 0.05, "grad_norm": 6.719366299840288, "learning_rate": 9.985656245631702e-06, "loss": 0.7404, "step": 658 }, { "epoch": 0.05, "grad_norm": 7.698109554492035, "learning_rate": 9.985556511361221e-06, "loss": 0.8373, "step": 659 }, { "epoch": 0.05, "grad_norm": 5.361117337465909, "learning_rate": 9.985456432058839e-06, "loss": 0.9731, "step": 660 }, { "epoch": 0.05, "grad_norm": 6.763210772346195, "learning_rate": 9.985356007731482e-06, "loss": 0.8492, "step": 661 }, { "epoch": 0.05, "grad_norm": 30.9162879260446, "learning_rate": 9.985255238386097e-06, "loss": 0.8259, "step": 662 }, { "epoch": 0.05, "grad_norm": 7.5957623732265205, "learning_rate": 9.985154124029659e-06, "loss": 0.7056, "step": 663 }, { "epoch": 0.05, "grad_norm": 7.890736808979091, "learning_rate": 9.985052664669168e-06, "loss": 0.6557, "step": 664 }, { "epoch": 0.05, "grad_norm": 12.27263139518476, "learning_rate": 9.984950860311644e-06, "loss": 0.8214, "step": 665 }, { "epoch": 0.05, "grad_norm": 9.402801393874103, "learning_rate": 9.984848710964132e-06, "loss": 0.7401, "step": 666 }, { "epoch": 0.05, "grad_norm": 23.394724952251263, "learning_rate": 9.984746216633703e-06, "loss": 0.5883, "step": 667 }, { "epoch": 0.05, "grad_norm": 7.021658919636003, "learning_rate": 9.984643377327447e-06, "loss": 0.9109, "step": 668 }, { "epoch": 0.05, "grad_norm": 4.130236454929482, "learning_rate": 9.984540193052485e-06, "loss": 0.8465, "step": 669 }, { "epoch": 0.05, "grad_norm": 5.769902858828049, "learning_rate": 9.984436663815957e-06, "loss": 0.8428, "step": 670 }, { "epoch": 0.05, "grad_norm": 6.114982039936177, "learning_rate": 9.984332789625026e-06, "loss": 0.7195, "step": 671 }, { "epoch": 0.05, "grad_norm": 10.889498593203678, "learning_rate": 9.984228570486885e-06, "loss": 0.8967, "step": 672 }, { "epoch": 0.05, "grad_norm": 5.932489028692374, "learning_rate": 9.98412400640874e-06, "loss": 0.7486, "step": 673 }, { "epoch": 0.05, "grad_norm": 6.098005309857313, "learning_rate": 9.984019097397832e-06, "loss": 0.7823, "step": 674 }, { "epoch": 0.05, "grad_norm": 45.02380744832399, "learning_rate": 9.983913843461421e-06, "loss": 0.947, "step": 675 }, { "epoch": 0.05, "grad_norm": 6.546751936447832, "learning_rate": 9.98380824460679e-06, "loss": 0.8759, "step": 676 }, { "epoch": 0.05, "grad_norm": 7.2243920500059335, "learning_rate": 9.983702300841249e-06, "loss": 0.767, "step": 677 }, { "epoch": 0.06, "grad_norm": 4.046857789064689, "learning_rate": 9.983596012172127e-06, "loss": 0.7718, "step": 678 }, { "epoch": 0.06, "grad_norm": 5.504942305925027, "learning_rate": 9.983489378606785e-06, "loss": 0.7371, "step": 679 }, { "epoch": 0.06, "grad_norm": 6.879825384686828, "learning_rate": 9.983382400152597e-06, "loss": 0.6592, "step": 680 }, { "epoch": 0.06, "grad_norm": 10.16447589071718, "learning_rate": 9.983275076816969e-06, "loss": 0.8063, "step": 681 }, { "epoch": 0.06, "grad_norm": 6.238786324911869, "learning_rate": 9.983167408607328e-06, "loss": 0.7522, "step": 682 }, { "epoch": 0.06, "grad_norm": 10.215890960915836, "learning_rate": 9.983059395531126e-06, "loss": 0.9019, "step": 683 }, { "epoch": 0.06, "grad_norm": 6.033651462159386, "learning_rate": 9.982951037595839e-06, "loss": 0.7745, "step": 684 }, { "epoch": 0.06, "grad_norm": 30.753639761740306, "learning_rate": 9.982842334808965e-06, "loss": 0.6734, "step": 685 }, { "epoch": 0.06, "grad_norm": 7.483458909889676, "learning_rate": 9.982733287178024e-06, "loss": 0.7812, "step": 686 }, { "epoch": 0.06, "grad_norm": 6.177365800238773, "learning_rate": 9.982623894710568e-06, "loss": 0.5936, "step": 687 }, { "epoch": 0.06, "grad_norm": 15.630098730739194, "learning_rate": 9.982514157414165e-06, "loss": 0.6302, "step": 688 }, { "epoch": 0.06, "grad_norm": 4.984736565477625, "learning_rate": 9.98240407529641e-06, "loss": 0.7691, "step": 689 }, { "epoch": 0.06, "grad_norm": 6.432693743912565, "learning_rate": 9.98229364836492e-06, "loss": 0.6902, "step": 690 }, { "epoch": 0.06, "grad_norm": 6.165986107848322, "learning_rate": 9.98218287662734e-06, "loss": 0.7465, "step": 691 }, { "epoch": 0.06, "grad_norm": 10.432503462266839, "learning_rate": 9.982071760091334e-06, "loss": 0.7174, "step": 692 }, { "epoch": 0.06, "grad_norm": 4.484394514975136, "learning_rate": 9.981960298764591e-06, "loss": 0.8959, "step": 693 }, { "epoch": 0.06, "grad_norm": 75.90186566843316, "learning_rate": 9.98184849265483e-06, "loss": 0.8356, "step": 694 }, { "epoch": 0.06, "grad_norm": 7.2608229172175, "learning_rate": 9.981736341769781e-06, "loss": 0.8358, "step": 695 }, { "epoch": 0.06, "grad_norm": 4.995034634233328, "learning_rate": 9.98162384611721e-06, "loss": 0.8041, "step": 696 }, { "epoch": 0.06, "grad_norm": 6.720473969590034, "learning_rate": 9.981511005704905e-06, "loss": 0.8333, "step": 697 }, { "epoch": 0.06, "grad_norm": 9.76444910976019, "learning_rate": 9.98139782054067e-06, "loss": 0.7411, "step": 698 }, { "epoch": 0.06, "grad_norm": 17.596370679277303, "learning_rate": 9.98128429063234e-06, "loss": 0.8383, "step": 699 }, { "epoch": 0.06, "grad_norm": 4.711920513879975, "learning_rate": 9.981170415987774e-06, "loss": 0.7657, "step": 700 }, { "epoch": 0.06, "grad_norm": 8.744580599469144, "learning_rate": 9.98105619661485e-06, "loss": 0.8097, "step": 701 }, { "epoch": 0.06, "grad_norm": 7.07065657363302, "learning_rate": 9.980941632521472e-06, "loss": 0.783, "step": 702 }, { "epoch": 0.06, "grad_norm": 6.295848545653682, "learning_rate": 9.980826723715572e-06, "loss": 0.8645, "step": 703 }, { "epoch": 0.06, "grad_norm": 8.358935616768742, "learning_rate": 9.980711470205102e-06, "loss": 0.7963, "step": 704 }, { "epoch": 0.06, "grad_norm": 9.241298999677191, "learning_rate": 9.980595871998037e-06, "loss": 0.7444, "step": 705 }, { "epoch": 0.06, "grad_norm": 8.18634156502095, "learning_rate": 9.980479929102377e-06, "loss": 0.8947, "step": 706 }, { "epoch": 0.06, "grad_norm": 47.982377304650846, "learning_rate": 9.980363641526145e-06, "loss": 0.7308, "step": 707 }, { "epoch": 0.06, "grad_norm": 10.28182854752954, "learning_rate": 9.980247009277391e-06, "loss": 0.7749, "step": 708 }, { "epoch": 0.06, "grad_norm": 7.875265862503272, "learning_rate": 9.980130032364185e-06, "loss": 0.6975, "step": 709 }, { "epoch": 0.06, "grad_norm": 5.7454490609534625, "learning_rate": 9.980012710794624e-06, "loss": 0.8281, "step": 710 }, { "epoch": 0.06, "grad_norm": 4.854895941374371, "learning_rate": 9.979895044576829e-06, "loss": 0.8886, "step": 711 }, { "epoch": 0.06, "grad_norm": 4.812770872264873, "learning_rate": 9.979777033718938e-06, "loss": 0.7602, "step": 712 }, { "epoch": 0.06, "grad_norm": 13.114650721568749, "learning_rate": 9.97965867822912e-06, "loss": 0.8744, "step": 713 }, { "epoch": 0.06, "grad_norm": 6.11235671654343, "learning_rate": 9.979539978115568e-06, "loss": 0.6982, "step": 714 }, { "epoch": 0.06, "grad_norm": 7.261228859091739, "learning_rate": 9.979420933386497e-06, "loss": 0.8365, "step": 715 }, { "epoch": 0.06, "grad_norm": 10.343688127492557, "learning_rate": 9.979301544050143e-06, "loss": 0.7786, "step": 716 }, { "epoch": 0.06, "grad_norm": 9.053383160153807, "learning_rate": 9.979181810114771e-06, "loss": 0.6904, "step": 717 }, { "epoch": 0.06, "grad_norm": 6.804278320561714, "learning_rate": 9.979061731588666e-06, "loss": 0.8131, "step": 718 }, { "epoch": 0.06, "grad_norm": 24.677229568245547, "learning_rate": 9.978941308480137e-06, "loss": 0.9041, "step": 719 }, { "epoch": 0.06, "grad_norm": 5.969549266616758, "learning_rate": 9.978820540797521e-06, "loss": 0.8384, "step": 720 }, { "epoch": 0.06, "grad_norm": 22.663565775540853, "learning_rate": 9.978699428549175e-06, "loss": 1.0064, "step": 721 }, { "epoch": 0.06, "grad_norm": 4.362272357297812, "learning_rate": 9.978577971743477e-06, "loss": 0.8144, "step": 722 }, { "epoch": 0.06, "grad_norm": 9.040528256887317, "learning_rate": 9.978456170388838e-06, "loss": 0.7186, "step": 723 }, { "epoch": 0.06, "grad_norm": 11.804019951391107, "learning_rate": 9.978334024493686e-06, "loss": 0.7353, "step": 724 }, { "epoch": 0.06, "grad_norm": 8.812013060668532, "learning_rate": 9.978211534066471e-06, "loss": 0.7705, "step": 725 }, { "epoch": 0.06, "grad_norm": 6.740598429924063, "learning_rate": 9.978088699115673e-06, "loss": 0.835, "step": 726 }, { "epoch": 0.06, "grad_norm": 6.897707916902952, "learning_rate": 9.977965519649793e-06, "loss": 0.7805, "step": 727 }, { "epoch": 0.06, "grad_norm": 7.7187957644071155, "learning_rate": 9.977841995677355e-06, "loss": 0.6987, "step": 728 }, { "epoch": 0.06, "grad_norm": 31.282590316712046, "learning_rate": 9.977718127206909e-06, "loss": 0.7077, "step": 729 }, { "epoch": 0.06, "grad_norm": 9.352277370686064, "learning_rate": 9.977593914247024e-06, "loss": 0.6778, "step": 730 }, { "epoch": 0.06, "grad_norm": 48.19954764014883, "learning_rate": 9.977469356806299e-06, "loss": 0.8746, "step": 731 }, { "epoch": 0.06, "grad_norm": 18.704782543573447, "learning_rate": 9.977344454893354e-06, "loss": 0.844, "step": 732 }, { "epoch": 0.06, "grad_norm": 15.174353611194654, "learning_rate": 9.977219208516833e-06, "loss": 0.8113, "step": 733 }, { "epoch": 0.06, "grad_norm": 5.492339289354649, "learning_rate": 9.977093617685404e-06, "loss": 0.7634, "step": 734 }, { "epoch": 0.06, "grad_norm": 7.338302644714976, "learning_rate": 9.976967682407758e-06, "loss": 0.8516, "step": 735 }, { "epoch": 0.06, "grad_norm": 7.983333758170923, "learning_rate": 9.97684140269261e-06, "loss": 0.9512, "step": 736 }, { "epoch": 0.06, "grad_norm": 7.777921188900074, "learning_rate": 9.976714778548701e-06, "loss": 0.7497, "step": 737 }, { "epoch": 0.06, "grad_norm": 8.916373984163911, "learning_rate": 9.976587809984795e-06, "loss": 0.916, "step": 738 }, { "epoch": 0.06, "grad_norm": 6.787036602947435, "learning_rate": 9.976460497009674e-06, "loss": 0.9375, "step": 739 }, { "epoch": 0.06, "grad_norm": 5.364770410436953, "learning_rate": 9.976332839632155e-06, "loss": 0.6312, "step": 740 }, { "epoch": 0.06, "grad_norm": 4.322247544796824, "learning_rate": 9.976204837861068e-06, "loss": 0.8753, "step": 741 }, { "epoch": 0.06, "grad_norm": 5.443957648071532, "learning_rate": 9.976076491705276e-06, "loss": 0.7943, "step": 742 }, { "epoch": 0.06, "grad_norm": 12.419575551869404, "learning_rate": 9.975947801173656e-06, "loss": 0.8131, "step": 743 }, { "epoch": 0.06, "grad_norm": 5.777255877496506, "learning_rate": 9.975818766275118e-06, "loss": 0.9184, "step": 744 }, { "epoch": 0.06, "grad_norm": 11.940859893489135, "learning_rate": 9.975689387018591e-06, "loss": 0.6416, "step": 745 }, { "epoch": 0.06, "grad_norm": 13.008816765202388, "learning_rate": 9.975559663413029e-06, "loss": 0.7062, "step": 746 }, { "epoch": 0.06, "grad_norm": 7.167797892402563, "learning_rate": 9.97542959546741e-06, "loss": 0.5999, "step": 747 }, { "epoch": 0.06, "grad_norm": 4.353738233885998, "learning_rate": 9.975299183190734e-06, "loss": 0.7373, "step": 748 }, { "epoch": 0.06, "grad_norm": 5.559853128623595, "learning_rate": 9.975168426592028e-06, "loss": 0.7956, "step": 749 }, { "epoch": 0.06, "grad_norm": 22.636498283945155, "learning_rate": 9.975037325680341e-06, "loss": 0.8175, "step": 750 }, { "epoch": 0.06, "grad_norm": 9.22874708470722, "learning_rate": 9.974905880464745e-06, "loss": 0.7904, "step": 751 }, { "epoch": 0.06, "grad_norm": 8.822797181919888, "learning_rate": 9.974774090954339e-06, "loss": 0.9434, "step": 752 }, { "epoch": 0.06, "grad_norm": 7.860772888999804, "learning_rate": 9.974641957158242e-06, "loss": 0.852, "step": 753 }, { "epoch": 0.06, "grad_norm": 9.19956956070986, "learning_rate": 9.974509479085596e-06, "loss": 0.887, "step": 754 }, { "epoch": 0.06, "grad_norm": 8.69151889213691, "learning_rate": 9.974376656745574e-06, "loss": 0.8229, "step": 755 }, { "epoch": 0.06, "grad_norm": 9.46372560684122, "learning_rate": 9.974243490147366e-06, "loss": 0.801, "step": 756 }, { "epoch": 0.06, "grad_norm": 6.312659465417973, "learning_rate": 9.974109979300187e-06, "loss": 0.7765, "step": 757 }, { "epoch": 0.06, "grad_norm": 4.3638867032447415, "learning_rate": 9.973976124213278e-06, "loss": 0.8664, "step": 758 }, { "epoch": 0.06, "grad_norm": 13.756599665248617, "learning_rate": 9.973841924895904e-06, "loss": 0.8044, "step": 759 }, { "epoch": 0.06, "grad_norm": 9.308361457370733, "learning_rate": 9.97370738135735e-06, "loss": 0.8209, "step": 760 }, { "epoch": 0.06, "grad_norm": 13.670262940598521, "learning_rate": 9.973572493606928e-06, "loss": 0.9134, "step": 761 }, { "epoch": 0.06, "grad_norm": 18.096126530856427, "learning_rate": 9.973437261653973e-06, "loss": 0.538, "step": 762 }, { "epoch": 0.06, "grad_norm": 8.327060634411392, "learning_rate": 9.973301685507844e-06, "loss": 0.9501, "step": 763 }, { "epoch": 0.06, "grad_norm": 24.55584070533181, "learning_rate": 9.973165765177925e-06, "loss": 0.7773, "step": 764 }, { "epoch": 0.06, "grad_norm": 9.108620867789838, "learning_rate": 9.973029500673622e-06, "loss": 0.763, "step": 765 }, { "epoch": 0.06, "grad_norm": 7.978183697469351, "learning_rate": 9.972892892004363e-06, "loss": 0.6603, "step": 766 }, { "epoch": 0.06, "grad_norm": 8.088448138160917, "learning_rate": 9.972755939179604e-06, "loss": 0.7674, "step": 767 }, { "epoch": 0.06, "grad_norm": 8.872525630493133, "learning_rate": 9.972618642208823e-06, "loss": 0.7546, "step": 768 }, { "epoch": 0.06, "grad_norm": 10.049844894137848, "learning_rate": 9.972481001101523e-06, "loss": 0.7202, "step": 769 }, { "epoch": 0.06, "grad_norm": 8.019992840517824, "learning_rate": 9.972343015867228e-06, "loss": 0.7685, "step": 770 }, { "epoch": 0.06, "grad_norm": 13.369252931835081, "learning_rate": 9.972204686515486e-06, "loss": 0.7988, "step": 771 }, { "epoch": 0.06, "grad_norm": 17.282561723904053, "learning_rate": 9.972066013055874e-06, "loss": 0.8278, "step": 772 }, { "epoch": 0.06, "grad_norm": 10.26836660016062, "learning_rate": 9.971926995497987e-06, "loss": 0.7436, "step": 773 }, { "epoch": 0.06, "grad_norm": 12.25179735568449, "learning_rate": 9.971787633851447e-06, "loss": 0.8776, "step": 774 }, { "epoch": 0.06, "grad_norm": 24.541224534268473, "learning_rate": 9.971647928125894e-06, "loss": 0.7755, "step": 775 }, { "epoch": 0.06, "grad_norm": 4.905244768329663, "learning_rate": 9.971507878331005e-06, "loss": 0.7453, "step": 776 }, { "epoch": 0.06, "grad_norm": 7.05090508101291, "learning_rate": 9.971367484476465e-06, "loss": 0.856, "step": 777 }, { "epoch": 0.06, "grad_norm": 8.817778912742655, "learning_rate": 9.971226746571992e-06, "loss": 0.769, "step": 778 }, { "epoch": 0.06, "grad_norm": 5.3409807647908565, "learning_rate": 9.971085664627328e-06, "loss": 0.7754, "step": 779 }, { "epoch": 0.06, "grad_norm": 42.16575317865683, "learning_rate": 9.970944238652236e-06, "loss": 0.8689, "step": 780 }, { "epoch": 0.06, "grad_norm": 6.120024385474541, "learning_rate": 9.970802468656503e-06, "loss": 0.7669, "step": 781 }, { "epoch": 0.06, "grad_norm": 8.189114538713227, "learning_rate": 9.970660354649939e-06, "loss": 0.8429, "step": 782 }, { "epoch": 0.06, "grad_norm": 5.698893152267697, "learning_rate": 9.970517896642382e-06, "loss": 1.0017, "step": 783 }, { "epoch": 0.06, "grad_norm": 21.042069550275208, "learning_rate": 9.970375094643689e-06, "loss": 0.872, "step": 784 }, { "epoch": 0.06, "grad_norm": 13.878057295932173, "learning_rate": 9.970231948663743e-06, "loss": 0.8266, "step": 785 }, { "epoch": 0.06, "grad_norm": 6.336916372172675, "learning_rate": 9.970088458712451e-06, "loss": 0.8635, "step": 786 }, { "epoch": 0.06, "grad_norm": 7.0449806932194265, "learning_rate": 9.969944624799745e-06, "loss": 0.7286, "step": 787 }, { "epoch": 0.06, "grad_norm": 8.54847444967724, "learning_rate": 9.969800446935577e-06, "loss": 0.808, "step": 788 }, { "epoch": 0.06, "grad_norm": 4.276295039491074, "learning_rate": 9.969655925129924e-06, "loss": 0.8296, "step": 789 }, { "epoch": 0.06, "grad_norm": 5.854457082157973, "learning_rate": 9.96951105939279e-06, "loss": 0.6399, "step": 790 }, { "epoch": 0.06, "grad_norm": 17.09779184862654, "learning_rate": 9.9693658497342e-06, "loss": 0.7, "step": 791 }, { "epoch": 0.06, "grad_norm": 6.424050990095431, "learning_rate": 9.969220296164205e-06, "loss": 0.7608, "step": 792 }, { "epoch": 0.06, "grad_norm": 8.160276097768419, "learning_rate": 9.969074398692875e-06, "loss": 0.7454, "step": 793 }, { "epoch": 0.06, "grad_norm": 7.5014742118887705, "learning_rate": 9.96892815733031e-06, "loss": 0.7727, "step": 794 }, { "epoch": 0.06, "grad_norm": 8.284246371359577, "learning_rate": 9.968781572086628e-06, "loss": 0.8904, "step": 795 }, { "epoch": 0.06, "grad_norm": 5.881953539663061, "learning_rate": 9.968634642971978e-06, "loss": 0.7347, "step": 796 }, { "epoch": 0.06, "grad_norm": 7.722557182043191, "learning_rate": 9.968487369996523e-06, "loss": 0.6517, "step": 797 }, { "epoch": 0.06, "grad_norm": 5.794780158131567, "learning_rate": 9.968339753170459e-06, "loss": 0.7945, "step": 798 }, { "epoch": 0.06, "grad_norm": 4.871365261617554, "learning_rate": 9.968191792504001e-06, "loss": 0.6972, "step": 799 }, { "epoch": 0.06, "grad_norm": 6.930616893287979, "learning_rate": 9.968043488007386e-06, "loss": 0.8232, "step": 800 }, { "epoch": 0.07, "grad_norm": 4.28903403125063, "learning_rate": 9.967894839690884e-06, "loss": 0.9223, "step": 801 }, { "epoch": 0.07, "grad_norm": 5.325421378008341, "learning_rate": 9.967745847564776e-06, "loss": 0.8776, "step": 802 }, { "epoch": 0.07, "grad_norm": 7.932127702313135, "learning_rate": 9.967596511639378e-06, "loss": 0.909, "step": 803 }, { "epoch": 0.07, "grad_norm": 7.1841323377343524, "learning_rate": 9.96744683192502e-06, "loss": 0.7199, "step": 804 }, { "epoch": 0.07, "grad_norm": 6.642553610954152, "learning_rate": 9.967296808432066e-06, "loss": 0.7498, "step": 805 }, { "epoch": 0.07, "grad_norm": 5.2268716616633, "learning_rate": 9.967146441170896e-06, "loss": 0.6228, "step": 806 }, { "epoch": 0.07, "grad_norm": 4.121699384903832, "learning_rate": 9.966995730151915e-06, "loss": 0.8428, "step": 807 }, { "epoch": 0.07, "grad_norm": 15.56106843908596, "learning_rate": 9.966844675385555e-06, "loss": 0.7743, "step": 808 }, { "epoch": 0.07, "grad_norm": 5.948918031370312, "learning_rate": 9.966693276882272e-06, "loss": 0.8727, "step": 809 }, { "epoch": 0.07, "grad_norm": 4.664897988238388, "learning_rate": 9.966541534652538e-06, "loss": 0.6481, "step": 810 }, { "epoch": 0.07, "grad_norm": 6.177603280928483, "learning_rate": 9.966389448706859e-06, "loss": 0.78, "step": 811 }, { "epoch": 0.07, "grad_norm": 13.162756931173213, "learning_rate": 9.96623701905576e-06, "loss": 0.7799, "step": 812 }, { "epoch": 0.07, "grad_norm": 17.018743069941877, "learning_rate": 9.966084245709788e-06, "loss": 0.727, "step": 813 }, { "epoch": 0.07, "grad_norm": 7.666835410148128, "learning_rate": 9.96593112867952e-06, "loss": 0.7523, "step": 814 }, { "epoch": 0.07, "grad_norm": 5.540559464419787, "learning_rate": 9.965777667975546e-06, "loss": 0.8156, "step": 815 }, { "epoch": 0.07, "grad_norm": 4.951176468811312, "learning_rate": 9.965623863608494e-06, "loss": 0.7693, "step": 816 }, { "epoch": 0.07, "grad_norm": 3.7206632323204025, "learning_rate": 9.965469715589002e-06, "loss": 0.8613, "step": 817 }, { "epoch": 0.07, "grad_norm": 6.7556748849564325, "learning_rate": 9.96531522392774e-06, "loss": 0.9728, "step": 818 }, { "epoch": 0.07, "grad_norm": 5.8668072394888995, "learning_rate": 9.965160388635402e-06, "loss": 0.6932, "step": 819 }, { "epoch": 0.07, "grad_norm": 26.194320057358315, "learning_rate": 9.9650052097227e-06, "loss": 0.7615, "step": 820 }, { "epoch": 0.07, "grad_norm": 5.333708559215378, "learning_rate": 9.964849687200377e-06, "loss": 0.8371, "step": 821 }, { "epoch": 0.07, "grad_norm": 5.1594197263264965, "learning_rate": 9.964693821079194e-06, "loss": 0.7396, "step": 822 }, { "epoch": 0.07, "grad_norm": 5.1273121776194355, "learning_rate": 9.964537611369938e-06, "loss": 0.8317, "step": 823 }, { "epoch": 0.07, "grad_norm": 4.653307624111108, "learning_rate": 9.964381058083421e-06, "loss": 0.6836, "step": 824 }, { "epoch": 0.07, "grad_norm": 9.604982551891355, "learning_rate": 9.964224161230476e-06, "loss": 0.7292, "step": 825 }, { "epoch": 0.07, "grad_norm": 4.7403449517953735, "learning_rate": 9.96406692082196e-06, "loss": 0.7123, "step": 826 }, { "epoch": 0.07, "grad_norm": 8.227697079100423, "learning_rate": 9.963909336868758e-06, "loss": 0.8756, "step": 827 }, { "epoch": 0.07, "grad_norm": 6.115060769086787, "learning_rate": 9.963751409381774e-06, "loss": 0.7654, "step": 828 }, { "epoch": 0.07, "grad_norm": 16.90809934504262, "learning_rate": 9.963593138371939e-06, "loss": 0.779, "step": 829 }, { "epoch": 0.07, "grad_norm": 6.359338702571578, "learning_rate": 9.963434523850206e-06, "loss": 0.7796, "step": 830 }, { "epoch": 0.07, "grad_norm": 4.411438184565806, "learning_rate": 9.96327556582755e-06, "loss": 0.7237, "step": 831 }, { "epoch": 0.07, "grad_norm": 44.76047610019357, "learning_rate": 9.963116264314974e-06, "loss": 0.7859, "step": 832 }, { "epoch": 0.07, "grad_norm": 9.619867305900573, "learning_rate": 9.962956619323504e-06, "loss": 0.8518, "step": 833 }, { "epoch": 0.07, "grad_norm": 11.188991808541468, "learning_rate": 9.962796630864184e-06, "loss": 0.7654, "step": 834 }, { "epoch": 0.07, "grad_norm": 6.004332394358668, "learning_rate": 9.96263629894809e-06, "loss": 0.7891, "step": 835 }, { "epoch": 0.07, "grad_norm": 5.588585308557958, "learning_rate": 9.962475623586316e-06, "loss": 0.6648, "step": 836 }, { "epoch": 0.07, "grad_norm": 9.148501115648214, "learning_rate": 9.962314604789982e-06, "loss": 0.762, "step": 837 }, { "epoch": 0.07, "grad_norm": 14.722793520390981, "learning_rate": 9.962153242570233e-06, "loss": 0.7699, "step": 838 }, { "epoch": 0.07, "grad_norm": 9.859290348194369, "learning_rate": 9.961991536938237e-06, "loss": 0.6424, "step": 839 }, { "epoch": 0.07, "grad_norm": 3.8594444931415635, "learning_rate": 9.961829487905182e-06, "loss": 0.4996, "step": 840 }, { "epoch": 0.07, "grad_norm": 7.623088174752541, "learning_rate": 9.961667095482283e-06, "loss": 0.8039, "step": 841 }, { "epoch": 0.07, "grad_norm": 4.677367894065911, "learning_rate": 9.96150435968078e-06, "loss": 0.7172, "step": 842 }, { "epoch": 0.07, "grad_norm": 14.383615352481165, "learning_rate": 9.961341280511936e-06, "loss": 0.7313, "step": 843 }, { "epoch": 0.07, "grad_norm": 4.08138449110562, "learning_rate": 9.961177857987037e-06, "loss": 0.8424, "step": 844 }, { "epoch": 0.07, "grad_norm": 10.94874355130115, "learning_rate": 9.96101409211739e-06, "loss": 0.8608, "step": 845 }, { "epoch": 0.07, "grad_norm": 6.230957946137558, "learning_rate": 9.960849982914332e-06, "loss": 0.9696, "step": 846 }, { "epoch": 0.07, "grad_norm": 4.761848121321998, "learning_rate": 9.960685530389218e-06, "loss": 0.8284, "step": 847 }, { "epoch": 0.07, "grad_norm": 5.987747041413682, "learning_rate": 9.960520734553432e-06, "loss": 0.6766, "step": 848 }, { "epoch": 0.07, "grad_norm": 6.126573632799808, "learning_rate": 9.960355595418375e-06, "loss": 0.6997, "step": 849 }, { "epoch": 0.07, "grad_norm": 8.144664170303466, "learning_rate": 9.960190112995479e-06, "loss": 0.6508, "step": 850 }, { "epoch": 0.07, "grad_norm": 8.37940730156233, "learning_rate": 9.960024287296195e-06, "loss": 0.8425, "step": 851 }, { "epoch": 0.07, "grad_norm": 6.394458375642536, "learning_rate": 9.959858118332e-06, "loss": 0.7271, "step": 852 }, { "epoch": 0.07, "grad_norm": 9.177863306437578, "learning_rate": 9.959691606114393e-06, "loss": 0.7561, "step": 853 }, { "epoch": 0.07, "grad_norm": 8.302236980695053, "learning_rate": 9.959524750654898e-06, "loss": 0.8669, "step": 854 }, { "epoch": 0.07, "grad_norm": 10.544754941989344, "learning_rate": 9.959357551965063e-06, "loss": 0.7252, "step": 855 }, { "epoch": 0.07, "grad_norm": 4.99750362486209, "learning_rate": 9.959190010056458e-06, "loss": 0.7404, "step": 856 }, { "epoch": 0.07, "grad_norm": 36.414827290278545, "learning_rate": 9.959022124940678e-06, "loss": 0.7767, "step": 857 }, { "epoch": 0.07, "grad_norm": 14.503261023860437, "learning_rate": 9.958853896629344e-06, "loss": 0.9081, "step": 858 }, { "epoch": 0.07, "grad_norm": 4.67930378717921, "learning_rate": 9.958685325134097e-06, "loss": 0.7509, "step": 859 }, { "epoch": 0.07, "grad_norm": 4.865634714529107, "learning_rate": 9.958516410466601e-06, "loss": 0.8449, "step": 860 }, { "epoch": 0.07, "grad_norm": 7.771632641240565, "learning_rate": 9.95834715263855e-06, "loss": 0.7373, "step": 861 }, { "epoch": 0.07, "grad_norm": 6.04839485681136, "learning_rate": 9.958177551661655e-06, "loss": 0.8477, "step": 862 }, { "epoch": 0.07, "grad_norm": 19.242405087909226, "learning_rate": 9.958007607547652e-06, "loss": 0.8284, "step": 863 }, { "epoch": 0.07, "grad_norm": 5.213087523048071, "learning_rate": 9.957837320308309e-06, "loss": 0.7162, "step": 864 }, { "epoch": 0.07, "grad_norm": 5.418000678399474, "learning_rate": 9.957666689955403e-06, "loss": 0.7309, "step": 865 }, { "epoch": 0.07, "grad_norm": 7.648405072934473, "learning_rate": 9.957495716500747e-06, "loss": 0.9614, "step": 866 }, { "epoch": 0.07, "grad_norm": 3.8831454782837795, "learning_rate": 9.957324399956172e-06, "loss": 0.737, "step": 867 }, { "epoch": 0.07, "grad_norm": 8.28695972505524, "learning_rate": 9.957152740333534e-06, "loss": 0.8296, "step": 868 }, { "epoch": 0.07, "grad_norm": 5.43908349412972, "learning_rate": 9.956980737644715e-06, "loss": 0.9532, "step": 869 }, { "epoch": 0.07, "grad_norm": 6.23835385602191, "learning_rate": 9.956808391901615e-06, "loss": 0.7306, "step": 870 }, { "epoch": 0.07, "grad_norm": 4.725563833646563, "learning_rate": 9.956635703116166e-06, "loss": 0.696, "step": 871 }, { "epoch": 0.07, "grad_norm": 3.512834238982999, "learning_rate": 9.956462671300317e-06, "loss": 0.829, "step": 872 }, { "epoch": 0.07, "grad_norm": 3.0551970397151087, "learning_rate": 9.956289296466041e-06, "loss": 0.8191, "step": 873 }, { "epoch": 0.07, "grad_norm": 4.320361362167167, "learning_rate": 9.956115578625339e-06, "loss": 0.7991, "step": 874 }, { "epoch": 0.07, "grad_norm": 6.967663473238619, "learning_rate": 9.955941517790232e-06, "loss": 0.676, "step": 875 }, { "epoch": 0.07, "grad_norm": 5.394789910865541, "learning_rate": 9.955767113972767e-06, "loss": 0.761, "step": 876 }, { "epoch": 0.07, "grad_norm": 9.614141369274806, "learning_rate": 9.955592367185015e-06, "loss": 0.7956, "step": 877 }, { "epoch": 0.07, "grad_norm": 4.658583378466397, "learning_rate": 9.955417277439068e-06, "loss": 0.8907, "step": 878 }, { "epoch": 0.07, "grad_norm": 4.897367126235742, "learning_rate": 9.955241844747042e-06, "loss": 0.8763, "step": 879 }, { "epoch": 0.07, "grad_norm": 8.290562520715572, "learning_rate": 9.95506606912108e-06, "loss": 0.9094, "step": 880 }, { "epoch": 0.07, "grad_norm": 15.439530966584819, "learning_rate": 9.954889950573347e-06, "loss": 0.783, "step": 881 }, { "epoch": 0.07, "grad_norm": 37.29525276950261, "learning_rate": 9.95471348911603e-06, "loss": 0.9104, "step": 882 }, { "epoch": 0.07, "grad_norm": 5.187187775729593, "learning_rate": 9.954536684761343e-06, "loss": 0.9567, "step": 883 }, { "epoch": 0.07, "grad_norm": 6.512834614492173, "learning_rate": 9.95435953752152e-06, "loss": 0.8454, "step": 884 }, { "epoch": 0.07, "grad_norm": 5.405657187936197, "learning_rate": 9.954182047408823e-06, "loss": 0.7677, "step": 885 }, { "epoch": 0.07, "grad_norm": 6.9931936765485085, "learning_rate": 9.954004214435533e-06, "loss": 0.8084, "step": 886 }, { "epoch": 0.07, "grad_norm": 5.667047853835735, "learning_rate": 9.953826038613961e-06, "loss": 0.7438, "step": 887 }, { "epoch": 0.07, "grad_norm": 20.8710846896458, "learning_rate": 9.953647519956432e-06, "loss": 0.5489, "step": 888 }, { "epoch": 0.07, "grad_norm": 4.589623792937581, "learning_rate": 9.953468658475305e-06, "loss": 0.6361, "step": 889 }, { "epoch": 0.07, "grad_norm": 5.322500375505189, "learning_rate": 9.953289454182958e-06, "loss": 0.7671, "step": 890 }, { "epoch": 0.07, "grad_norm": 3.8203693202343194, "learning_rate": 9.953109907091792e-06, "loss": 0.7516, "step": 891 }, { "epoch": 0.07, "grad_norm": 6.990557318329104, "learning_rate": 9.952930017214233e-06, "loss": 0.7722, "step": 892 }, { "epoch": 0.07, "grad_norm": 6.7532650792567575, "learning_rate": 9.95274978456273e-06, "loss": 0.8809, "step": 893 }, { "epoch": 0.07, "grad_norm": 7.508917201796005, "learning_rate": 9.952569209149757e-06, "loss": 0.8374, "step": 894 }, { "epoch": 0.07, "grad_norm": 5.992657387810211, "learning_rate": 9.952388290987812e-06, "loss": 0.7263, "step": 895 }, { "epoch": 0.07, "grad_norm": 7.581471653942725, "learning_rate": 9.95220703008941e-06, "loss": 0.6452, "step": 896 }, { "epoch": 0.07, "grad_norm": 25.857956670490243, "learning_rate": 9.952025426467105e-06, "loss": 0.8136, "step": 897 }, { "epoch": 0.07, "grad_norm": 5.113196391952603, "learning_rate": 9.951843480133458e-06, "loss": 0.8126, "step": 898 }, { "epoch": 0.07, "grad_norm": 18.49369519994825, "learning_rate": 9.951661191101063e-06, "loss": 0.8243, "step": 899 }, { "epoch": 0.07, "grad_norm": 23.873132265642464, "learning_rate": 9.951478559382536e-06, "loss": 0.751, "step": 900 }, { "epoch": 0.07, "grad_norm": 4.6761732460968295, "learning_rate": 9.951295584990515e-06, "loss": 0.7602, "step": 901 }, { "epoch": 0.07, "grad_norm": 7.609129030298003, "learning_rate": 9.951112267937663e-06, "loss": 0.6253, "step": 902 }, { "epoch": 0.07, "grad_norm": 6.786478944356428, "learning_rate": 9.950928608236668e-06, "loss": 0.8975, "step": 903 }, { "epoch": 0.07, "grad_norm": 5.455662032494263, "learning_rate": 9.95074460590024e-06, "loss": 0.8192, "step": 904 }, { "epoch": 0.07, "grad_norm": 26.577339739078592, "learning_rate": 9.950560260941112e-06, "loss": 0.8423, "step": 905 }, { "epoch": 0.07, "grad_norm": 4.321835415456136, "learning_rate": 9.950375573372042e-06, "loss": 0.7966, "step": 906 }, { "epoch": 0.07, "grad_norm": 5.004862640712375, "learning_rate": 9.950190543205813e-06, "loss": 0.6711, "step": 907 }, { "epoch": 0.07, "grad_norm": 3.9501277130340715, "learning_rate": 9.95000517045523e-06, "loss": 0.7701, "step": 908 }, { "epoch": 0.07, "grad_norm": 13.828369329201337, "learning_rate": 9.949819455133121e-06, "loss": 0.7923, "step": 909 }, { "epoch": 0.07, "grad_norm": 4.881289854963828, "learning_rate": 9.949633397252339e-06, "loss": 0.7466, "step": 910 }, { "epoch": 0.07, "grad_norm": 4.0183721039686215, "learning_rate": 9.94944699682576e-06, "loss": 0.7513, "step": 911 }, { "epoch": 0.07, "grad_norm": 4.567117089736312, "learning_rate": 9.949260253866286e-06, "loss": 0.8003, "step": 912 }, { "epoch": 0.07, "grad_norm": 4.449917123996754, "learning_rate": 9.949073168386838e-06, "loss": 0.8539, "step": 913 }, { "epoch": 0.07, "grad_norm": 7.388867799156344, "learning_rate": 9.948885740400365e-06, "loss": 0.7033, "step": 914 }, { "epoch": 0.07, "grad_norm": 20.231811817769845, "learning_rate": 9.948697969919839e-06, "loss": 0.7475, "step": 915 }, { "epoch": 0.07, "grad_norm": 4.602413427599765, "learning_rate": 9.948509856958253e-06, "loss": 0.8719, "step": 916 }, { "epoch": 0.07, "grad_norm": 6.117071530642284, "learning_rate": 9.948321401528625e-06, "loss": 0.9034, "step": 917 }, { "epoch": 0.07, "grad_norm": 19.01094701332023, "learning_rate": 9.948132603644001e-06, "loss": 0.8231, "step": 918 }, { "epoch": 0.07, "grad_norm": 5.7879819054520105, "learning_rate": 9.947943463317445e-06, "loss": 0.6423, "step": 919 }, { "epoch": 0.07, "grad_norm": 5.155867668448361, "learning_rate": 9.947753980562045e-06, "loss": 0.7194, "step": 920 }, { "epoch": 0.07, "grad_norm": 23.22171030764841, "learning_rate": 9.947564155390916e-06, "loss": 0.7074, "step": 921 }, { "epoch": 0.07, "grad_norm": 6.393907034813833, "learning_rate": 9.947373987817194e-06, "loss": 0.7863, "step": 922 }, { "epoch": 0.07, "grad_norm": 5.475109094971649, "learning_rate": 9.947183477854042e-06, "loss": 0.9534, "step": 923 }, { "epoch": 0.08, "grad_norm": 4.53901485891668, "learning_rate": 9.946992625514646e-06, "loss": 0.7146, "step": 924 }, { "epoch": 0.08, "grad_norm": 13.622381875614638, "learning_rate": 9.946801430812208e-06, "loss": 0.8143, "step": 925 }, { "epoch": 0.08, "grad_norm": 9.549300713937553, "learning_rate": 9.946609893759966e-06, "loss": 0.749, "step": 926 }, { "epoch": 0.08, "grad_norm": 6.481220949010384, "learning_rate": 9.94641801437117e-06, "loss": 0.763, "step": 927 }, { "epoch": 0.08, "grad_norm": 5.56160845553395, "learning_rate": 9.946225792659104e-06, "loss": 0.7637, "step": 928 }, { "epoch": 0.08, "grad_norm": 6.161101929715245, "learning_rate": 9.946033228637069e-06, "loss": 0.9231, "step": 929 }, { "epoch": 0.08, "grad_norm": 5.096161531859888, "learning_rate": 9.945840322318391e-06, "loss": 0.7357, "step": 930 }, { "epoch": 0.08, "grad_norm": 6.544050522382474, "learning_rate": 9.945647073716422e-06, "loss": 0.7041, "step": 931 }, { "epoch": 0.08, "grad_norm": 7.850436824436384, "learning_rate": 9.945453482844535e-06, "loss": 0.8433, "step": 932 }, { "epoch": 0.08, "grad_norm": 6.440581462075318, "learning_rate": 9.945259549716127e-06, "loss": 0.6464, "step": 933 }, { "epoch": 0.08, "grad_norm": 5.340891125007818, "learning_rate": 9.94506527434462e-06, "loss": 0.7887, "step": 934 }, { "epoch": 0.08, "grad_norm": 7.592498978195759, "learning_rate": 9.944870656743462e-06, "loss": 0.8702, "step": 935 }, { "epoch": 0.08, "grad_norm": 9.84904867711743, "learning_rate": 9.944675696926117e-06, "loss": 0.7181, "step": 936 }, { "epoch": 0.08, "grad_norm": 21.662536540182213, "learning_rate": 9.944480394906079e-06, "loss": 0.7432, "step": 937 }, { "epoch": 0.08, "grad_norm": 7.27272270876898, "learning_rate": 9.944284750696865e-06, "loss": 0.7084, "step": 938 }, { "epoch": 0.08, "grad_norm": 6.765101245689383, "learning_rate": 9.944088764312014e-06, "loss": 0.6646, "step": 939 }, { "epoch": 0.08, "grad_norm": 5.518373504631924, "learning_rate": 9.943892435765093e-06, "loss": 0.8554, "step": 940 }, { "epoch": 0.08, "grad_norm": 3.940540018156056, "learning_rate": 9.943695765069683e-06, "loss": 0.8284, "step": 941 }, { "epoch": 0.08, "grad_norm": 5.63113714704677, "learning_rate": 9.943498752239398e-06, "loss": 0.7775, "step": 942 }, { "epoch": 0.08, "grad_norm": 4.503367299437282, "learning_rate": 9.943301397287874e-06, "loss": 0.7488, "step": 943 }, { "epoch": 0.08, "grad_norm": 6.246638434274801, "learning_rate": 9.943103700228768e-06, "loss": 0.7664, "step": 944 }, { "epoch": 0.08, "grad_norm": 11.15387398581592, "learning_rate": 9.942905661075759e-06, "loss": 0.6623, "step": 945 }, { "epoch": 0.08, "grad_norm": 3.8034695725627046, "learning_rate": 9.942707279842557e-06, "loss": 0.763, "step": 946 }, { "epoch": 0.08, "grad_norm": 5.608412165544444, "learning_rate": 9.94250855654289e-06, "loss": 0.7495, "step": 947 }, { "epoch": 0.08, "grad_norm": 4.523342986017144, "learning_rate": 9.942309491190509e-06, "loss": 0.8043, "step": 948 }, { "epoch": 0.08, "grad_norm": 6.493377748582832, "learning_rate": 9.942110083799192e-06, "loss": 0.9902, "step": 949 }, { "epoch": 0.08, "grad_norm": 10.937879370172757, "learning_rate": 9.94191033438274e-06, "loss": 0.7315, "step": 950 }, { "epoch": 0.08, "grad_norm": 5.216161601391438, "learning_rate": 9.941710242954976e-06, "loss": 0.5672, "step": 951 }, { "epoch": 0.08, "grad_norm": 4.703809997189642, "learning_rate": 9.941509809529746e-06, "loss": 0.9608, "step": 952 }, { "epoch": 0.08, "grad_norm": 5.620467549065916, "learning_rate": 9.941309034120925e-06, "loss": 0.7088, "step": 953 }, { "epoch": 0.08, "grad_norm": 4.461534523951197, "learning_rate": 9.941107916742405e-06, "loss": 0.8243, "step": 954 }, { "epoch": 0.08, "grad_norm": 22.560882535644645, "learning_rate": 9.940906457408103e-06, "loss": 0.7528, "step": 955 }, { "epoch": 0.08, "grad_norm": 4.27339083312216, "learning_rate": 9.940704656131967e-06, "loss": 0.7824, "step": 956 }, { "epoch": 0.08, "grad_norm": 6.427607352352942, "learning_rate": 9.940502512927958e-06, "loss": 0.8037, "step": 957 }, { "epoch": 0.08, "grad_norm": 21.486530270408817, "learning_rate": 9.940300027810067e-06, "loss": 0.6702, "step": 958 }, { "epoch": 0.08, "grad_norm": 3.6136062975255667, "learning_rate": 9.94009720079231e-06, "loss": 0.8245, "step": 959 }, { "epoch": 0.08, "grad_norm": 5.82086673808323, "learning_rate": 9.939894031888717e-06, "loss": 0.7954, "step": 960 }, { "epoch": 0.08, "grad_norm": 4.979728493186614, "learning_rate": 9.939690521113355e-06, "loss": 0.7076, "step": 961 }, { "epoch": 0.08, "grad_norm": 4.12188946976356, "learning_rate": 9.939486668480306e-06, "loss": 0.7562, "step": 962 }, { "epoch": 0.08, "grad_norm": 4.299141519077421, "learning_rate": 9.939282474003678e-06, "loss": 0.8121, "step": 963 }, { "epoch": 0.08, "grad_norm": 6.822823660416211, "learning_rate": 9.939077937697604e-06, "loss": 0.7428, "step": 964 }, { "epoch": 0.08, "grad_norm": 11.154851287993086, "learning_rate": 9.938873059576235e-06, "loss": 0.645, "step": 965 }, { "epoch": 0.08, "grad_norm": 4.516010444982705, "learning_rate": 9.938667839653752e-06, "loss": 0.7897, "step": 966 }, { "epoch": 0.08, "grad_norm": 4.313693456095772, "learning_rate": 9.93846227794436e-06, "loss": 0.7475, "step": 967 }, { "epoch": 0.08, "grad_norm": 7.846145724945153, "learning_rate": 9.938256374462286e-06, "loss": 0.716, "step": 968 }, { "epoch": 0.08, "grad_norm": 4.385874599495664, "learning_rate": 9.938050129221773e-06, "loss": 0.8602, "step": 969 }, { "epoch": 0.08, "grad_norm": 3.2789458776551155, "learning_rate": 9.937843542237099e-06, "loss": 0.602, "step": 970 }, { "epoch": 0.08, "grad_norm": 5.819009004471656, "learning_rate": 9.937636613522562e-06, "loss": 0.7621, "step": 971 }, { "epoch": 0.08, "grad_norm": 3.9582290627576424, "learning_rate": 9.93742934309248e-06, "loss": 0.837, "step": 972 }, { "epoch": 0.08, "grad_norm": 7.6323447591746705, "learning_rate": 9.9372217309612e-06, "loss": 0.7551, "step": 973 }, { "epoch": 0.08, "grad_norm": 3.631259920871801, "learning_rate": 9.937013777143087e-06, "loss": 0.6543, "step": 974 }, { "epoch": 0.08, "grad_norm": 6.509976918945766, "learning_rate": 9.936805481652536e-06, "loss": 0.8063, "step": 975 }, { "epoch": 0.08, "grad_norm": 8.516313771294374, "learning_rate": 9.936596844503962e-06, "loss": 0.8063, "step": 976 }, { "epoch": 0.08, "grad_norm": 11.591748132257733, "learning_rate": 9.936387865711802e-06, "loss": 0.8324, "step": 977 }, { "epoch": 0.08, "grad_norm": 4.603496334600352, "learning_rate": 9.936178545290519e-06, "loss": 0.938, "step": 978 }, { "epoch": 0.08, "grad_norm": 4.349682373081811, "learning_rate": 9.9359688832546e-06, "loss": 0.6962, "step": 979 }, { "epoch": 0.08, "grad_norm": 3.5776018820043127, "learning_rate": 9.935758879618556e-06, "loss": 0.7722, "step": 980 }, { "epoch": 0.08, "grad_norm": 4.072291376069065, "learning_rate": 9.93554853439692e-06, "loss": 0.6594, "step": 981 }, { "epoch": 0.08, "grad_norm": 4.592067075313992, "learning_rate": 9.935337847604246e-06, "loss": 0.6964, "step": 982 }, { "epoch": 0.08, "grad_norm": 2.9351920534778757, "learning_rate": 9.935126819255119e-06, "loss": 0.8057, "step": 983 }, { "epoch": 0.08, "grad_norm": 3.7762648145261517, "learning_rate": 9.934915449364141e-06, "loss": 0.7966, "step": 984 }, { "epoch": 0.08, "grad_norm": 6.6685467898552755, "learning_rate": 9.934703737945944e-06, "loss": 0.7689, "step": 985 }, { "epoch": 0.08, "grad_norm": 3.760959075165353, "learning_rate": 9.934491685015173e-06, "loss": 0.7678, "step": 986 }, { "epoch": 0.08, "grad_norm": 3.4864339606064476, "learning_rate": 9.934279290586511e-06, "loss": 0.618, "step": 987 }, { "epoch": 0.08, "grad_norm": 5.228770735384011, "learning_rate": 9.93406655467465e-06, "loss": 0.7949, "step": 988 }, { "epoch": 0.08, "grad_norm": 3.147651548750773, "learning_rate": 9.933853477294317e-06, "loss": 0.7729, "step": 989 }, { "epoch": 0.08, "grad_norm": 7.406575738382119, "learning_rate": 9.93364005846026e-06, "loss": 0.7157, "step": 990 }, { "epoch": 0.08, "grad_norm": 3.6224987419974766, "learning_rate": 9.933426298187243e-06, "loss": 0.7651, "step": 991 }, { "epoch": 0.08, "grad_norm": 4.119228522175504, "learning_rate": 9.933212196490063e-06, "loss": 0.5336, "step": 992 }, { "epoch": 0.08, "grad_norm": 8.300633494688958, "learning_rate": 9.932997753383538e-06, "loss": 0.8058, "step": 993 }, { "epoch": 0.08, "grad_norm": 4.380093926346218, "learning_rate": 9.932782968882506e-06, "loss": 0.7643, "step": 994 }, { "epoch": 0.08, "grad_norm": 5.201132628181408, "learning_rate": 9.932567843001835e-06, "loss": 0.926, "step": 995 }, { "epoch": 0.08, "grad_norm": 5.950779583930611, "learning_rate": 9.932352375756411e-06, "loss": 0.7841, "step": 996 }, { "epoch": 0.08, "grad_norm": 21.05161336474183, "learning_rate": 9.932136567161145e-06, "loss": 0.6382, "step": 997 }, { "epoch": 0.08, "grad_norm": 9.056181659713767, "learning_rate": 9.931920417230974e-06, "loss": 0.729, "step": 998 }, { "epoch": 0.08, "grad_norm": 23.988667050010946, "learning_rate": 9.931703925980856e-06, "loss": 0.8483, "step": 999 }, { "epoch": 0.08, "grad_norm": 9.214668855527453, "learning_rate": 9.931487093425775e-06, "loss": 0.6327, "step": 1000 }, { "epoch": 0.08, "grad_norm": 5.746109229520153, "learning_rate": 9.931269919580734e-06, "loss": 0.8844, "step": 1001 }, { "epoch": 0.08, "grad_norm": 5.299552832418721, "learning_rate": 9.931052404460766e-06, "loss": 0.7639, "step": 1002 }, { "epoch": 0.08, "grad_norm": 4.160625810606795, "learning_rate": 9.930834548080922e-06, "loss": 0.6231, "step": 1003 }, { "epoch": 0.08, "grad_norm": 3.5668914498558024, "learning_rate": 9.930616350456282e-06, "loss": 0.6595, "step": 1004 }, { "epoch": 0.08, "grad_norm": 4.63681778605169, "learning_rate": 9.930397811601943e-06, "loss": 0.801, "step": 1005 }, { "epoch": 0.08, "grad_norm": 5.496515292026558, "learning_rate": 9.930178931533032e-06, "loss": 0.6517, "step": 1006 }, { "epoch": 0.08, "grad_norm": 3.5020793942161, "learning_rate": 9.929959710264695e-06, "loss": 0.7167, "step": 1007 }, { "epoch": 0.08, "grad_norm": 17.1445253482021, "learning_rate": 9.929740147812106e-06, "loss": 0.816, "step": 1008 }, { "epoch": 0.08, "grad_norm": 11.27795117529141, "learning_rate": 9.929520244190458e-06, "loss": 0.7646, "step": 1009 }, { "epoch": 0.08, "grad_norm": 5.49549461415629, "learning_rate": 9.92929999941497e-06, "loss": 0.6922, "step": 1010 }, { "epoch": 0.08, "grad_norm": 4.993225096537147, "learning_rate": 9.929079413500884e-06, "loss": 0.9263, "step": 1011 }, { "epoch": 0.08, "grad_norm": 3.7330883391512684, "learning_rate": 9.928858486463467e-06, "loss": 0.8416, "step": 1012 }, { "epoch": 0.08, "grad_norm": 5.40128996639886, "learning_rate": 9.928637218318009e-06, "loss": 0.6479, "step": 1013 }, { "epoch": 0.08, "grad_norm": 4.0160429141944185, "learning_rate": 9.928415609079821e-06, "loss": 0.8747, "step": 1014 }, { "epoch": 0.08, "grad_norm": 3.945935170920747, "learning_rate": 9.92819365876424e-06, "loss": 0.8002, "step": 1015 }, { "epoch": 0.08, "grad_norm": 5.196704350368994, "learning_rate": 9.927971367386629e-06, "loss": 0.6667, "step": 1016 }, { "epoch": 0.08, "grad_norm": 7.304828327588545, "learning_rate": 9.92774873496237e-06, "loss": 0.6251, "step": 1017 }, { "epoch": 0.08, "grad_norm": 4.25473957858187, "learning_rate": 9.927525761506871e-06, "loss": 0.7353, "step": 1018 }, { "epoch": 0.08, "grad_norm": 4.640389992903682, "learning_rate": 9.927302447035563e-06, "loss": 0.833, "step": 1019 }, { "epoch": 0.08, "grad_norm": 5.760963916229781, "learning_rate": 9.9270787915639e-06, "loss": 0.7687, "step": 1020 }, { "epoch": 0.08, "grad_norm": 3.842918585443859, "learning_rate": 9.926854795107363e-06, "loss": 0.5339, "step": 1021 }, { "epoch": 0.08, "grad_norm": 10.666138604444795, "learning_rate": 9.92663045768145e-06, "loss": 0.756, "step": 1022 }, { "epoch": 0.08, "grad_norm": 3.906078679440604, "learning_rate": 9.926405779301691e-06, "loss": 0.6946, "step": 1023 }, { "epoch": 0.08, "grad_norm": 3.8490032747580383, "learning_rate": 9.92618075998363e-06, "loss": 0.8035, "step": 1024 }, { "epoch": 0.08, "grad_norm": 3.083633094743296, "learning_rate": 9.925955399742845e-06, "loss": 0.7367, "step": 1025 }, { "epoch": 0.08, "grad_norm": 3.593264625777261, "learning_rate": 9.925729698594931e-06, "loss": 0.7698, "step": 1026 }, { "epoch": 0.08, "grad_norm": 3.8398926563000746, "learning_rate": 9.925503656555503e-06, "loss": 0.6662, "step": 1027 }, { "epoch": 0.08, "grad_norm": 8.489739430039288, "learning_rate": 9.925277273640211e-06, "loss": 0.9429, "step": 1028 }, { "epoch": 0.08, "grad_norm": 5.856188681022169, "learning_rate": 9.925050549864718e-06, "loss": 0.6923, "step": 1029 }, { "epoch": 0.08, "grad_norm": 3.4962359611927623, "learning_rate": 9.92482348524472e-06, "loss": 0.8971, "step": 1030 }, { "epoch": 0.08, "grad_norm": 4.430716110068405, "learning_rate": 9.924596079795923e-06, "loss": 0.7463, "step": 1031 }, { "epoch": 0.08, "grad_norm": 3.1610562957822372, "learning_rate": 9.924368333534072e-06, "loss": 0.8483, "step": 1032 }, { "epoch": 0.08, "grad_norm": 3.8900016458608695, "learning_rate": 9.924140246474926e-06, "loss": 0.7393, "step": 1033 }, { "epoch": 0.08, "grad_norm": 3.671228713876813, "learning_rate": 9.923911818634269e-06, "loss": 0.9098, "step": 1034 }, { "epoch": 0.08, "grad_norm": 7.769093234314537, "learning_rate": 9.92368305002791e-06, "loss": 0.7433, "step": 1035 }, { "epoch": 0.08, "grad_norm": 5.43434876369951, "learning_rate": 9.923453940671683e-06, "loss": 0.664, "step": 1036 }, { "epoch": 0.08, "grad_norm": 5.72929780125456, "learning_rate": 9.923224490581443e-06, "loss": 0.5399, "step": 1037 }, { "epoch": 0.08, "grad_norm": 9.94036833098379, "learning_rate": 9.922994699773068e-06, "loss": 0.8391, "step": 1038 }, { "epoch": 0.08, "grad_norm": 5.117092099920273, "learning_rate": 9.922764568262464e-06, "loss": 0.8107, "step": 1039 }, { "epoch": 0.08, "grad_norm": 5.613431087025792, "learning_rate": 9.922534096065552e-06, "loss": 0.8647, "step": 1040 }, { "epoch": 0.08, "grad_norm": 4.64098920179695, "learning_rate": 9.92230328319829e-06, "loss": 0.7277, "step": 1041 }, { "epoch": 0.08, "grad_norm": 3.0310641486331624, "learning_rate": 9.922072129676644e-06, "loss": 0.6895, "step": 1042 }, { "epoch": 0.08, "grad_norm": 4.291735516284705, "learning_rate": 9.921840635516616e-06, "loss": 0.6512, "step": 1043 }, { "epoch": 0.08, "grad_norm": 4.495560680326641, "learning_rate": 9.921608800734227e-06, "loss": 0.7719, "step": 1044 }, { "epoch": 0.08, "grad_norm": 3.161303808316317, "learning_rate": 9.921376625345518e-06, "loss": 0.7885, "step": 1045 }, { "epoch": 0.08, "grad_norm": 10.851499882243445, "learning_rate": 9.921144109366559e-06, "loss": 0.9069, "step": 1046 }, { "epoch": 0.09, "grad_norm": 3.608379955270225, "learning_rate": 9.920911252813443e-06, "loss": 0.8589, "step": 1047 }, { "epoch": 0.09, "grad_norm": 14.927786675515918, "learning_rate": 9.920678055702282e-06, "loss": 0.6949, "step": 1048 }, { "epoch": 0.09, "grad_norm": 4.5890084066537655, "learning_rate": 9.920444518049218e-06, "loss": 0.6165, "step": 1049 }, { "epoch": 0.09, "grad_norm": 4.451038148414518, "learning_rate": 9.920210639870409e-06, "loss": 0.8226, "step": 1050 }, { "epoch": 0.09, "grad_norm": 2.937558557650849, "learning_rate": 9.919976421182047e-06, "loss": 0.7284, "step": 1051 }, { "epoch": 0.09, "grad_norm": 4.064327801853339, "learning_rate": 9.919741862000334e-06, "loss": 0.7984, "step": 1052 }, { "epoch": 0.09, "grad_norm": 8.973358208837215, "learning_rate": 9.91950696234151e-06, "loss": 0.6713, "step": 1053 }, { "epoch": 0.09, "grad_norm": 7.636967905058214, "learning_rate": 9.919271722221828e-06, "loss": 0.8619, "step": 1054 }, { "epoch": 0.09, "grad_norm": 5.402826456928093, "learning_rate": 9.919036141657568e-06, "loss": 0.829, "step": 1055 }, { "epoch": 0.09, "grad_norm": 6.993785916137985, "learning_rate": 9.918800220665035e-06, "loss": 0.7523, "step": 1056 }, { "epoch": 0.09, "grad_norm": 3.8553929408426555, "learning_rate": 9.918563959260555e-06, "loss": 0.7235, "step": 1057 }, { "epoch": 0.09, "grad_norm": 6.596063254727656, "learning_rate": 9.918327357460477e-06, "loss": 0.7393, "step": 1058 }, { "epoch": 0.09, "grad_norm": 4.259353763831047, "learning_rate": 9.91809041528118e-06, "loss": 0.7052, "step": 1059 }, { "epoch": 0.09, "grad_norm": 3.649189238817127, "learning_rate": 9.917853132739058e-06, "loss": 0.7828, "step": 1060 }, { "epoch": 0.09, "grad_norm": 4.093778551435277, "learning_rate": 9.917615509850536e-06, "loss": 0.6905, "step": 1061 }, { "epoch": 0.09, "grad_norm": 7.57228672830486, "learning_rate": 9.917377546632055e-06, "loss": 0.704, "step": 1062 }, { "epoch": 0.09, "grad_norm": 4.889695775191646, "learning_rate": 9.917139243100088e-06, "loss": 0.8628, "step": 1063 }, { "epoch": 0.09, "grad_norm": 3.3953735733294175, "learning_rate": 9.91690059927112e-06, "loss": 0.6576, "step": 1064 }, { "epoch": 0.09, "grad_norm": 8.329555407868353, "learning_rate": 9.916661615161674e-06, "loss": 0.8183, "step": 1065 }, { "epoch": 0.09, "grad_norm": 8.237534015676456, "learning_rate": 9.916422290788285e-06, "loss": 0.7328, "step": 1066 }, { "epoch": 0.09, "grad_norm": 5.745277757879374, "learning_rate": 9.916182626167518e-06, "loss": 0.7609, "step": 1067 }, { "epoch": 0.09, "grad_norm": 3.6367273981050947, "learning_rate": 9.915942621315959e-06, "loss": 0.5816, "step": 1068 }, { "epoch": 0.09, "grad_norm": 4.710848061986129, "learning_rate": 9.915702276250217e-06, "loss": 0.7976, "step": 1069 }, { "epoch": 0.09, "grad_norm": 3.9552797363841035, "learning_rate": 9.915461590986926e-06, "loss": 0.5878, "step": 1070 }, { "epoch": 0.09, "grad_norm": 3.90018679865148, "learning_rate": 9.915220565542743e-06, "loss": 0.7667, "step": 1071 }, { "epoch": 0.09, "grad_norm": 3.915986148623406, "learning_rate": 9.914979199934346e-06, "loss": 0.923, "step": 1072 }, { "epoch": 0.09, "grad_norm": 3.3974519555701472, "learning_rate": 9.914737494178442e-06, "loss": 0.5939, "step": 1073 }, { "epoch": 0.09, "grad_norm": 5.059956409317443, "learning_rate": 9.914495448291758e-06, "loss": 0.7642, "step": 1074 }, { "epoch": 0.09, "grad_norm": 6.082387550847085, "learning_rate": 9.914253062291044e-06, "loss": 0.7543, "step": 1075 }, { "epoch": 0.09, "grad_norm": 25.154978832787183, "learning_rate": 9.914010336193077e-06, "loss": 0.8236, "step": 1076 }, { "epoch": 0.09, "grad_norm": 3.9672640135861936, "learning_rate": 9.913767270014652e-06, "loss": 0.771, "step": 1077 }, { "epoch": 0.09, "grad_norm": 7.638904124693897, "learning_rate": 9.913523863772592e-06, "loss": 0.6481, "step": 1078 }, { "epoch": 0.09, "grad_norm": 4.028721901556489, "learning_rate": 9.913280117483745e-06, "loss": 0.6476, "step": 1079 }, { "epoch": 0.09, "grad_norm": 4.375989313113419, "learning_rate": 9.913036031164975e-06, "loss": 0.7894, "step": 1080 }, { "epoch": 0.09, "grad_norm": 4.220239002106115, "learning_rate": 9.912791604833178e-06, "loss": 0.8664, "step": 1081 }, { "epoch": 0.09, "grad_norm": 4.666463661160009, "learning_rate": 9.912546838505266e-06, "loss": 0.6611, "step": 1082 }, { "epoch": 0.09, "grad_norm": 6.9154326364342475, "learning_rate": 9.912301732198184e-06, "loss": 0.9785, "step": 1083 }, { "epoch": 0.09, "grad_norm": 4.277318960057017, "learning_rate": 9.912056285928891e-06, "loss": 0.8013, "step": 1084 }, { "epoch": 0.09, "grad_norm": 3.079100562094572, "learning_rate": 9.911810499714373e-06, "loss": 0.913, "step": 1085 }, { "epoch": 0.09, "grad_norm": 4.438400510465831, "learning_rate": 9.91156437357164e-06, "loss": 0.7312, "step": 1086 }, { "epoch": 0.09, "grad_norm": 4.14487571794865, "learning_rate": 9.91131790751773e-06, "loss": 0.9048, "step": 1087 }, { "epoch": 0.09, "grad_norm": 4.547231442082386, "learning_rate": 9.911071101569694e-06, "loss": 0.8275, "step": 1088 }, { "epoch": 0.09, "grad_norm": 5.7787973842987554, "learning_rate": 9.910823955744615e-06, "loss": 0.6561, "step": 1089 }, { "epoch": 0.09, "grad_norm": 3.4428992464361463, "learning_rate": 9.910576470059598e-06, "loss": 0.6894, "step": 1090 }, { "epoch": 0.09, "grad_norm": 6.076962340195229, "learning_rate": 9.91032864453177e-06, "loss": 0.808, "step": 1091 }, { "epoch": 0.09, "grad_norm": 5.985556415986422, "learning_rate": 9.910080479178282e-06, "loss": 0.7723, "step": 1092 }, { "epoch": 0.09, "grad_norm": 5.3835831629937285, "learning_rate": 9.90983197401631e-06, "loss": 0.9289, "step": 1093 }, { "epoch": 0.09, "grad_norm": 5.5393749699238315, "learning_rate": 9.909583129063046e-06, "loss": 0.9609, "step": 1094 }, { "epoch": 0.09, "grad_norm": 4.323564617243667, "learning_rate": 9.90933394433572e-06, "loss": 0.7551, "step": 1095 }, { "epoch": 0.09, "grad_norm": 4.5267962554846095, "learning_rate": 9.909084419851571e-06, "loss": 0.7603, "step": 1096 }, { "epoch": 0.09, "grad_norm": 3.9995133224500856, "learning_rate": 9.90883455562787e-06, "loss": 0.8373, "step": 1097 }, { "epoch": 0.09, "grad_norm": 9.165032274693864, "learning_rate": 9.908584351681911e-06, "loss": 0.8055, "step": 1098 }, { "epoch": 0.09, "grad_norm": 3.7727125927230594, "learning_rate": 9.908333808031007e-06, "loss": 0.703, "step": 1099 }, { "epoch": 0.09, "grad_norm": 5.820870400258755, "learning_rate": 9.908082924692499e-06, "loss": 0.7268, "step": 1100 }, { "epoch": 0.09, "grad_norm": 7.165965543667079, "learning_rate": 9.907831701683747e-06, "loss": 0.7373, "step": 1101 }, { "epoch": 0.09, "grad_norm": 7.652492053681831, "learning_rate": 9.907580139022139e-06, "loss": 0.8142, "step": 1102 }, { "epoch": 0.09, "grad_norm": 3.149190540920497, "learning_rate": 9.907328236725086e-06, "loss": 0.803, "step": 1103 }, { "epoch": 0.09, "grad_norm": 3.483704993006792, "learning_rate": 9.90707599481002e-06, "loss": 0.7613, "step": 1104 }, { "epoch": 0.09, "grad_norm": 5.172405368788908, "learning_rate": 9.906823413294398e-06, "loss": 0.8217, "step": 1105 }, { "epoch": 0.09, "grad_norm": 3.5411551906184147, "learning_rate": 9.906570492195698e-06, "loss": 0.9043, "step": 1106 }, { "epoch": 0.09, "grad_norm": 5.486225917593387, "learning_rate": 9.906317231531427e-06, "loss": 0.6923, "step": 1107 }, { "epoch": 0.09, "grad_norm": 15.853957582324012, "learning_rate": 9.906063631319111e-06, "loss": 0.7069, "step": 1108 }, { "epoch": 0.09, "grad_norm": 3.6877238672400647, "learning_rate": 9.9058096915763e-06, "loss": 0.8677, "step": 1109 }, { "epoch": 0.09, "grad_norm": 4.9036420214887295, "learning_rate": 9.905555412320569e-06, "loss": 0.6888, "step": 1110 }, { "epoch": 0.09, "grad_norm": 6.166212071831924, "learning_rate": 9.905300793569515e-06, "loss": 0.8216, "step": 1111 }, { "epoch": 0.09, "grad_norm": 4.8565221167143955, "learning_rate": 9.90504583534076e-06, "loss": 0.7228, "step": 1112 }, { "epoch": 0.09, "grad_norm": 5.336056210710972, "learning_rate": 9.904790537651949e-06, "loss": 0.612, "step": 1113 }, { "epoch": 0.09, "grad_norm": 2.871625813259238, "learning_rate": 9.904534900520748e-06, "loss": 0.7724, "step": 1114 }, { "epoch": 0.09, "grad_norm": 8.531092644906606, "learning_rate": 9.904278923964851e-06, "loss": 0.7963, "step": 1115 }, { "epoch": 0.09, "grad_norm": 3.737463118053625, "learning_rate": 9.904022608001975e-06, "loss": 0.8101, "step": 1116 }, { "epoch": 0.09, "grad_norm": 3.2928231451485708, "learning_rate": 9.903765952649854e-06, "loss": 0.6575, "step": 1117 }, { "epoch": 0.09, "grad_norm": 4.302129382605941, "learning_rate": 9.903508957926253e-06, "loss": 0.89, "step": 1118 }, { "epoch": 0.09, "grad_norm": 3.38020190482699, "learning_rate": 9.903251623848957e-06, "loss": 0.7938, "step": 1119 }, { "epoch": 0.09, "grad_norm": 4.7240983048709975, "learning_rate": 9.902993950435776e-06, "loss": 0.5785, "step": 1120 }, { "epoch": 0.09, "grad_norm": 3.4218352497309037, "learning_rate": 9.902735937704541e-06, "loss": 0.7044, "step": 1121 }, { "epoch": 0.09, "grad_norm": 4.475443915494931, "learning_rate": 9.902477585673109e-06, "loss": 0.9424, "step": 1122 }, { "epoch": 0.09, "grad_norm": 3.6710225942063683, "learning_rate": 9.902218894359359e-06, "loss": 0.7194, "step": 1123 }, { "epoch": 0.09, "grad_norm": 3.546319129901945, "learning_rate": 9.901959863781195e-06, "loss": 0.8076, "step": 1124 }, { "epoch": 0.09, "grad_norm": 3.3664768878646094, "learning_rate": 9.901700493956544e-06, "loss": 0.7384, "step": 1125 }, { "epoch": 0.09, "grad_norm": 2.806530483700508, "learning_rate": 9.901440784903354e-06, "loss": 0.6184, "step": 1126 }, { "epoch": 0.09, "grad_norm": 3.5366917484214175, "learning_rate": 9.9011807366396e-06, "loss": 0.8521, "step": 1127 }, { "epoch": 0.09, "grad_norm": 7.141202850953822, "learning_rate": 9.900920349183278e-06, "loss": 0.8012, "step": 1128 }, { "epoch": 0.09, "grad_norm": 3.265130767977999, "learning_rate": 9.90065962255241e-06, "loss": 0.8052, "step": 1129 }, { "epoch": 0.09, "grad_norm": 4.680723499730134, "learning_rate": 9.900398556765038e-06, "loss": 0.7557, "step": 1130 }, { "epoch": 0.09, "grad_norm": 11.458425188798179, "learning_rate": 9.900137151839233e-06, "loss": 0.59, "step": 1131 }, { "epoch": 0.09, "grad_norm": 6.148770459949628, "learning_rate": 9.89987540779308e-06, "loss": 0.8973, "step": 1132 }, { "epoch": 0.09, "grad_norm": 2.497490234921751, "learning_rate": 9.8996133246447e-06, "loss": 0.6229, "step": 1133 }, { "epoch": 0.09, "grad_norm": 3.7581806057091853, "learning_rate": 9.899350902412224e-06, "loss": 0.785, "step": 1134 }, { "epoch": 0.09, "grad_norm": 3.632708287602974, "learning_rate": 9.899088141113819e-06, "loss": 0.7011, "step": 1135 }, { "epoch": 0.09, "grad_norm": 2.703635349152616, "learning_rate": 9.898825040767666e-06, "loss": 0.7454, "step": 1136 }, { "epoch": 0.09, "grad_norm": 3.6772863618608467, "learning_rate": 9.898561601391977e-06, "loss": 0.859, "step": 1137 }, { "epoch": 0.09, "grad_norm": 16.065177253473, "learning_rate": 9.898297823004979e-06, "loss": 0.679, "step": 1138 }, { "epoch": 0.09, "grad_norm": 3.1070934550836493, "learning_rate": 9.898033705624928e-06, "loss": 0.7288, "step": 1139 }, { "epoch": 0.09, "grad_norm": 3.7684569781107493, "learning_rate": 9.897769249270106e-06, "loss": 0.7278, "step": 1140 }, { "epoch": 0.09, "grad_norm": 4.587958546895321, "learning_rate": 9.897504453958815e-06, "loss": 0.6837, "step": 1141 }, { "epoch": 0.09, "grad_norm": 3.0571552864798477, "learning_rate": 9.897239319709375e-06, "loss": 0.8673, "step": 1142 }, { "epoch": 0.09, "grad_norm": 3.403443207430614, "learning_rate": 9.896973846540142e-06, "loss": 0.6961, "step": 1143 }, { "epoch": 0.09, "grad_norm": 6.893497510576452, "learning_rate": 9.896708034469482e-06, "loss": 0.6644, "step": 1144 }, { "epoch": 0.09, "grad_norm": 4.622753770607722, "learning_rate": 9.896441883515794e-06, "loss": 0.6017, "step": 1145 }, { "epoch": 0.09, "grad_norm": 3.512599040118152, "learning_rate": 9.896175393697499e-06, "loss": 0.7315, "step": 1146 }, { "epoch": 0.09, "grad_norm": 8.982560874807408, "learning_rate": 9.895908565033036e-06, "loss": 0.7207, "step": 1147 }, { "epoch": 0.09, "grad_norm": 9.634916335681975, "learning_rate": 9.895641397540874e-06, "loss": 0.8745, "step": 1148 }, { "epoch": 0.09, "grad_norm": 3.113997486510584, "learning_rate": 9.895373891239502e-06, "loss": 0.8938, "step": 1149 }, { "epoch": 0.09, "grad_norm": 8.092991688453978, "learning_rate": 9.895106046147432e-06, "loss": 0.8294, "step": 1150 }, { "epoch": 0.09, "grad_norm": 5.173684016647621, "learning_rate": 9.894837862283201e-06, "loss": 0.9007, "step": 1151 }, { "epoch": 0.09, "grad_norm": 2.6763507861653935, "learning_rate": 9.894569339665372e-06, "loss": 0.785, "step": 1152 }, { "epoch": 0.09, "grad_norm": 3.6301244824476955, "learning_rate": 9.894300478312524e-06, "loss": 0.9116, "step": 1153 }, { "epoch": 0.09, "grad_norm": 4.32987022976241, "learning_rate": 9.894031278243266e-06, "loss": 0.7559, "step": 1154 }, { "epoch": 0.09, "grad_norm": 9.034278671980887, "learning_rate": 9.89376173947623e-06, "loss": 0.6825, "step": 1155 }, { "epoch": 0.09, "grad_norm": 5.249049503838335, "learning_rate": 9.893491862030065e-06, "loss": 0.671, "step": 1156 }, { "epoch": 0.09, "grad_norm": 4.120151287262343, "learning_rate": 9.893221645923452e-06, "loss": 0.5626, "step": 1157 }, { "epoch": 0.09, "grad_norm": 5.066088791253685, "learning_rate": 9.892951091175093e-06, "loss": 0.8943, "step": 1158 }, { "epoch": 0.09, "grad_norm": 8.783782213292914, "learning_rate": 9.892680197803707e-06, "loss": 0.8003, "step": 1159 }, { "epoch": 0.09, "grad_norm": 3.201175712678574, "learning_rate": 9.892408965828046e-06, "loss": 0.5925, "step": 1160 }, { "epoch": 0.09, "grad_norm": 4.661309716128301, "learning_rate": 9.89213739526688e-06, "loss": 0.7266, "step": 1161 }, { "epoch": 0.09, "grad_norm": 3.0405354335454304, "learning_rate": 9.891865486139002e-06, "loss": 0.704, "step": 1162 }, { "epoch": 0.09, "grad_norm": 4.838473687401868, "learning_rate": 9.89159323846323e-06, "loss": 0.7643, "step": 1163 }, { "epoch": 0.09, "grad_norm": 3.8642295887465643, "learning_rate": 9.891320652258406e-06, "loss": 0.8438, "step": 1164 }, { "epoch": 0.09, "grad_norm": 2.6984076540239452, "learning_rate": 9.891047727543398e-06, "loss": 0.7953, "step": 1165 }, { "epoch": 0.09, "grad_norm": 3.7914722923979927, "learning_rate": 9.890774464337086e-06, "loss": 0.6631, "step": 1166 }, { "epoch": 0.09, "grad_norm": 5.682970295545688, "learning_rate": 9.890500862658387e-06, "loss": 0.5272, "step": 1167 }, { "epoch": 0.09, "grad_norm": 4.661819920465713, "learning_rate": 9.890226922526238e-06, "loss": 0.7997, "step": 1168 }, { "epoch": 0.09, "grad_norm": 9.056706508755106, "learning_rate": 9.889952643959592e-06, "loss": 0.605, "step": 1169 }, { "epoch": 0.1, "grad_norm": 5.08371783545947, "learning_rate": 9.889678026977435e-06, "loss": 0.8474, "step": 1170 }, { "epoch": 0.1, "grad_norm": 2.88028529351079, "learning_rate": 9.889403071598769e-06, "loss": 0.7543, "step": 1171 }, { "epoch": 0.1, "grad_norm": 3.1051354067412933, "learning_rate": 9.889127777842624e-06, "loss": 0.7966, "step": 1172 }, { "epoch": 0.1, "grad_norm": 4.529648349875851, "learning_rate": 9.888852145728054e-06, "loss": 0.8149, "step": 1173 }, { "epoch": 0.1, "grad_norm": 5.820747980845195, "learning_rate": 9.888576175274132e-06, "loss": 0.7026, "step": 1174 }, { "epoch": 0.1, "grad_norm": 2.4140725505470724, "learning_rate": 9.888299866499957e-06, "loss": 0.7341, "step": 1175 }, { "epoch": 0.1, "grad_norm": 8.811083069309303, "learning_rate": 9.888023219424653e-06, "loss": 0.6041, "step": 1176 }, { "epoch": 0.1, "grad_norm": 3.4939161375008956, "learning_rate": 9.887746234067363e-06, "loss": 0.8145, "step": 1177 }, { "epoch": 0.1, "grad_norm": 3.645271050372457, "learning_rate": 9.88746891044726e-06, "loss": 0.7544, "step": 1178 }, { "epoch": 0.1, "grad_norm": 3.837622379800246, "learning_rate": 9.887191248583532e-06, "loss": 0.7722, "step": 1179 }, { "epoch": 0.1, "grad_norm": 3.500485781262857, "learning_rate": 9.8869132484954e-06, "loss": 0.7321, "step": 1180 }, { "epoch": 0.1, "grad_norm": 2.989167239484751, "learning_rate": 9.8866349102021e-06, "loss": 0.7465, "step": 1181 }, { "epoch": 0.1, "grad_norm": 3.2981052822821355, "learning_rate": 9.886356233722894e-06, "loss": 0.7189, "step": 1182 }, { "epoch": 0.1, "grad_norm": 3.1438841463435474, "learning_rate": 9.886077219077071e-06, "loss": 0.7959, "step": 1183 }, { "epoch": 0.1, "grad_norm": 23.913583349958333, "learning_rate": 9.885797866283937e-06, "loss": 0.7115, "step": 1184 }, { "epoch": 0.1, "grad_norm": 42.709794356624315, "learning_rate": 9.88551817536283e-06, "loss": 0.7295, "step": 1185 }, { "epoch": 0.1, "grad_norm": 5.256986148891334, "learning_rate": 9.8852381463331e-06, "loss": 0.702, "step": 1186 }, { "epoch": 0.1, "grad_norm": 3.946855501441445, "learning_rate": 9.884957779214133e-06, "loss": 0.8018, "step": 1187 }, { "epoch": 0.1, "grad_norm": 3.898297967568548, "learning_rate": 9.884677074025329e-06, "loss": 0.8004, "step": 1188 }, { "epoch": 0.1, "grad_norm": 5.380857753666877, "learning_rate": 9.884396030786116e-06, "loss": 0.6774, "step": 1189 }, { "epoch": 0.1, "grad_norm": 3.126318798216784, "learning_rate": 9.88411464951594e-06, "loss": 0.7404, "step": 1190 }, { "epoch": 0.1, "grad_norm": 4.154249770978452, "learning_rate": 9.88383293023428e-06, "loss": 0.8671, "step": 1191 }, { "epoch": 0.1, "grad_norm": 4.422385190539039, "learning_rate": 9.883550872960629e-06, "loss": 0.7488, "step": 1192 }, { "epoch": 0.1, "grad_norm": 3.677109055124081, "learning_rate": 9.883268477714508e-06, "loss": 0.7291, "step": 1193 }, { "epoch": 0.1, "grad_norm": 4.22588489269259, "learning_rate": 9.882985744515461e-06, "loss": 0.7681, "step": 1194 }, { "epoch": 0.1, "grad_norm": 11.721689097909122, "learning_rate": 9.882702673383056e-06, "loss": 0.8409, "step": 1195 }, { "epoch": 0.1, "grad_norm": 7.628247655467624, "learning_rate": 9.88241926433688e-06, "loss": 0.6905, "step": 1196 }, { "epoch": 0.1, "grad_norm": 3.489912960548631, "learning_rate": 9.88213551739655e-06, "loss": 0.7578, "step": 1197 }, { "epoch": 0.1, "grad_norm": 4.736191386382483, "learning_rate": 9.8818514325817e-06, "loss": 0.7734, "step": 1198 }, { "epoch": 0.1, "grad_norm": 3.3029953110072117, "learning_rate": 9.881567009911995e-06, "loss": 0.7993, "step": 1199 }, { "epoch": 0.1, "grad_norm": 3.37304964960025, "learning_rate": 9.881282249407114e-06, "loss": 0.7551, "step": 1200 }, { "epoch": 0.1, "grad_norm": 2.847014608114322, "learning_rate": 9.880997151086767e-06, "loss": 0.6851, "step": 1201 }, { "epoch": 0.1, "grad_norm": 7.602963107318313, "learning_rate": 9.880711714970682e-06, "loss": 0.8794, "step": 1202 }, { "epoch": 0.1, "grad_norm": 18.143295739591395, "learning_rate": 9.880425941078617e-06, "loss": 0.7016, "step": 1203 }, { "epoch": 0.1, "grad_norm": 2.9908623486806176, "learning_rate": 9.880139829430346e-06, "loss": 0.7635, "step": 1204 }, { "epoch": 0.1, "grad_norm": 3.7187187436499762, "learning_rate": 9.879853380045672e-06, "loss": 0.885, "step": 1205 }, { "epoch": 0.1, "grad_norm": 2.987013273660116, "learning_rate": 9.879566592944417e-06, "loss": 0.8207, "step": 1206 }, { "epoch": 0.1, "grad_norm": 3.034792493197225, "learning_rate": 9.87927946814643e-06, "loss": 0.7653, "step": 1207 }, { "epoch": 0.1, "grad_norm": 8.445968639494074, "learning_rate": 9.878992005671581e-06, "loss": 0.6941, "step": 1208 }, { "epoch": 0.1, "grad_norm": 2.8062090327018088, "learning_rate": 9.878704205539765e-06, "loss": 0.7038, "step": 1209 }, { "epoch": 0.1, "grad_norm": 10.61830280204193, "learning_rate": 9.878416067770898e-06, "loss": 0.7423, "step": 1210 }, { "epoch": 0.1, "grad_norm": 5.829245003002502, "learning_rate": 9.878127592384923e-06, "loss": 0.7727, "step": 1211 }, { "epoch": 0.1, "grad_norm": 4.393883352997549, "learning_rate": 9.877838779401803e-06, "loss": 0.6695, "step": 1212 }, { "epoch": 0.1, "grad_norm": 3.2174716737054845, "learning_rate": 9.877549628841528e-06, "loss": 0.712, "step": 1213 }, { "epoch": 0.1, "grad_norm": 4.90407546886971, "learning_rate": 9.877260140724104e-06, "loss": 0.6899, "step": 1214 }, { "epoch": 0.1, "grad_norm": 3.2478865697461625, "learning_rate": 9.87697031506957e-06, "loss": 0.7064, "step": 1215 }, { "epoch": 0.1, "grad_norm": 3.9199303918916093, "learning_rate": 9.876680151897981e-06, "loss": 0.8831, "step": 1216 }, { "epoch": 0.1, "grad_norm": 6.258564186550168, "learning_rate": 9.87638965122942e-06, "loss": 0.6719, "step": 1217 }, { "epoch": 0.1, "grad_norm": 3.7664582403507896, "learning_rate": 9.876098813083993e-06, "loss": 0.6672, "step": 1218 }, { "epoch": 0.1, "grad_norm": 3.6093551415355383, "learning_rate": 9.875807637481825e-06, "loss": 0.7742, "step": 1219 }, { "epoch": 0.1, "grad_norm": 7.480689390255201, "learning_rate": 9.875516124443064e-06, "loss": 0.7473, "step": 1220 }, { "epoch": 0.1, "grad_norm": 2.825691151118583, "learning_rate": 9.875224273987893e-06, "loss": 0.69, "step": 1221 }, { "epoch": 0.1, "grad_norm": 6.473230991173139, "learning_rate": 9.874932086136503e-06, "loss": 0.7992, "step": 1222 }, { "epoch": 0.1, "grad_norm": 11.567424305188858, "learning_rate": 9.874639560909118e-06, "loss": 0.6756, "step": 1223 }, { "epoch": 0.1, "grad_norm": 6.1197661778074774, "learning_rate": 9.874346698325983e-06, "loss": 0.9457, "step": 1224 }, { "epoch": 0.1, "grad_norm": 5.401258383044548, "learning_rate": 9.874053498407365e-06, "loss": 0.6093, "step": 1225 }, { "epoch": 0.1, "grad_norm": 3.068493519169048, "learning_rate": 9.873759961173554e-06, "loss": 0.7869, "step": 1226 }, { "epoch": 0.1, "grad_norm": 7.119172691670688, "learning_rate": 9.873466086644867e-06, "loss": 0.7752, "step": 1227 }, { "epoch": 0.1, "grad_norm": 6.845706370348848, "learning_rate": 9.87317187484164e-06, "loss": 0.6347, "step": 1228 }, { "epoch": 0.1, "grad_norm": 5.18166262147119, "learning_rate": 9.872877325784235e-06, "loss": 0.737, "step": 1229 }, { "epoch": 0.1, "grad_norm": 5.23514363104195, "learning_rate": 9.87258243949304e-06, "loss": 0.8877, "step": 1230 }, { "epoch": 0.1, "grad_norm": 5.930707484437629, "learning_rate": 9.872287215988456e-06, "loss": 0.5708, "step": 1231 }, { "epoch": 0.1, "grad_norm": 3.629782001284931, "learning_rate": 9.87199165529092e-06, "loss": 0.7978, "step": 1232 }, { "epoch": 0.1, "grad_norm": 6.204077121038583, "learning_rate": 9.871695757420885e-06, "loss": 0.7749, "step": 1233 }, { "epoch": 0.1, "grad_norm": 4.368812882976357, "learning_rate": 9.871399522398828e-06, "loss": 0.71, "step": 1234 }, { "epoch": 0.1, "grad_norm": 2.9552147719298585, "learning_rate": 9.87110295024525e-06, "loss": 0.6845, "step": 1235 }, { "epoch": 0.1, "grad_norm": 4.222280327890182, "learning_rate": 9.870806040980679e-06, "loss": 0.8664, "step": 1236 }, { "epoch": 0.1, "grad_norm": 3.921781730860754, "learning_rate": 9.870508794625662e-06, "loss": 0.7239, "step": 1237 }, { "epoch": 0.1, "grad_norm": 5.521359178989577, "learning_rate": 9.870211211200766e-06, "loss": 0.7154, "step": 1238 }, { "epoch": 0.1, "grad_norm": 5.086981897236873, "learning_rate": 9.86991329072659e-06, "loss": 0.6208, "step": 1239 }, { "epoch": 0.1, "grad_norm": 5.122179985145022, "learning_rate": 9.869615033223752e-06, "loss": 0.767, "step": 1240 }, { "epoch": 0.1, "grad_norm": 4.688880231626787, "learning_rate": 9.869316438712891e-06, "loss": 0.7438, "step": 1241 }, { "epoch": 0.1, "grad_norm": 4.281464908071756, "learning_rate": 9.869017507214672e-06, "loss": 0.9089, "step": 1242 }, { "epoch": 0.1, "grad_norm": 3.4748085789350203, "learning_rate": 9.868718238749785e-06, "loss": 0.6636, "step": 1243 }, { "epoch": 0.1, "grad_norm": 4.1330389738677065, "learning_rate": 9.868418633338938e-06, "loss": 0.6336, "step": 1244 }, { "epoch": 0.1, "grad_norm": 4.071640208100227, "learning_rate": 9.86811869100287e-06, "loss": 0.7533, "step": 1245 }, { "epoch": 0.1, "grad_norm": 4.431312299211099, "learning_rate": 9.867818411762336e-06, "loss": 0.6853, "step": 1246 }, { "epoch": 0.1, "grad_norm": 3.433496208969935, "learning_rate": 9.867517795638115e-06, "loss": 0.6625, "step": 1247 }, { "epoch": 0.1, "grad_norm": 6.380602274024577, "learning_rate": 9.867216842651017e-06, "loss": 0.784, "step": 1248 }, { "epoch": 0.1, "grad_norm": 16.182759959644432, "learning_rate": 9.866915552821865e-06, "loss": 0.6352, "step": 1249 }, { "epoch": 0.1, "grad_norm": 5.035815541348587, "learning_rate": 9.866613926171514e-06, "loss": 0.8515, "step": 1250 }, { "epoch": 0.1, "grad_norm": 5.090006178253494, "learning_rate": 9.866311962720835e-06, "loss": 0.9131, "step": 1251 }, { "epoch": 0.1, "grad_norm": 6.816810378484307, "learning_rate": 9.866009662490727e-06, "loss": 0.7506, "step": 1252 }, { "epoch": 0.1, "grad_norm": 3.743464506369369, "learning_rate": 9.865707025502112e-06, "loss": 0.6838, "step": 1253 }, { "epoch": 0.1, "grad_norm": 17.615484855741332, "learning_rate": 9.865404051775936e-06, "loss": 0.7172, "step": 1254 }, { "epoch": 0.1, "grad_norm": 4.504644141995805, "learning_rate": 9.86510074133316e-06, "loss": 0.6782, "step": 1255 }, { "epoch": 0.1, "grad_norm": 4.026086493993496, "learning_rate": 9.864797094194783e-06, "loss": 0.639, "step": 1256 }, { "epoch": 0.1, "grad_norm": 5.273730124505048, "learning_rate": 9.864493110381816e-06, "loss": 0.8838, "step": 1257 }, { "epoch": 0.1, "grad_norm": 3.9377573821834795, "learning_rate": 9.864188789915295e-06, "loss": 0.7895, "step": 1258 }, { "epoch": 0.1, "grad_norm": 4.5487719317790285, "learning_rate": 9.86388413281628e-06, "loss": 0.8105, "step": 1259 }, { "epoch": 0.1, "grad_norm": 3.1291290137230505, "learning_rate": 9.863579139105862e-06, "loss": 0.7212, "step": 1260 }, { "epoch": 0.1, "grad_norm": 4.790756925984459, "learning_rate": 9.863273808805141e-06, "loss": 0.6061, "step": 1261 }, { "epoch": 0.1, "grad_norm": 7.465308176691548, "learning_rate": 9.862968141935252e-06, "loss": 0.8746, "step": 1262 }, { "epoch": 0.1, "grad_norm": 3.6525539309667825, "learning_rate": 9.862662138517347e-06, "loss": 0.8401, "step": 1263 }, { "epoch": 0.1, "grad_norm": 4.739852255292235, "learning_rate": 9.862355798572604e-06, "loss": 0.8153, "step": 1264 }, { "epoch": 0.1, "grad_norm": 6.3575762048790105, "learning_rate": 9.862049122122226e-06, "loss": 0.8266, "step": 1265 }, { "epoch": 0.1, "grad_norm": 3.987603304843584, "learning_rate": 9.861742109187433e-06, "loss": 0.7441, "step": 1266 }, { "epoch": 0.1, "grad_norm": 12.003326557188275, "learning_rate": 9.861434759789475e-06, "loss": 0.7768, "step": 1267 }, { "epoch": 0.1, "grad_norm": 3.16378972002077, "learning_rate": 9.86112707394962e-06, "loss": 0.8174, "step": 1268 }, { "epoch": 0.1, "grad_norm": 4.749024088971492, "learning_rate": 9.860819051689163e-06, "loss": 0.6776, "step": 1269 }, { "epoch": 0.1, "grad_norm": 3.9307918192554983, "learning_rate": 9.860510693029424e-06, "loss": 0.6796, "step": 1270 }, { "epoch": 0.1, "grad_norm": 6.731818846528893, "learning_rate": 9.860201997991739e-06, "loss": 0.686, "step": 1271 }, { "epoch": 0.1, "grad_norm": 4.370986106627797, "learning_rate": 9.859892966597474e-06, "loss": 0.6092, "step": 1272 }, { "epoch": 0.1, "grad_norm": 4.647759419860096, "learning_rate": 9.859583598868013e-06, "loss": 0.7857, "step": 1273 }, { "epoch": 0.1, "grad_norm": 2.962857668988205, "learning_rate": 9.85927389482477e-06, "loss": 0.6617, "step": 1274 }, { "epoch": 0.1, "grad_norm": 6.423050561608751, "learning_rate": 9.858963854489179e-06, "loss": 0.6811, "step": 1275 }, { "epoch": 0.1, "grad_norm": 3.6952153575177755, "learning_rate": 9.858653477882691e-06, "loss": 0.6863, "step": 1276 }, { "epoch": 0.1, "grad_norm": 4.0237581720151425, "learning_rate": 9.858342765026793e-06, "loss": 0.6368, "step": 1277 }, { "epoch": 0.1, "grad_norm": 7.446626041979622, "learning_rate": 9.858031715942983e-06, "loss": 0.5983, "step": 1278 }, { "epoch": 0.1, "grad_norm": 4.324469182781904, "learning_rate": 9.857720330652791e-06, "loss": 0.7926, "step": 1279 }, { "epoch": 0.1, "grad_norm": 5.447489173862893, "learning_rate": 9.857408609177763e-06, "loss": 0.7575, "step": 1280 }, { "epoch": 0.1, "grad_norm": 4.780062264599041, "learning_rate": 9.857096551539476e-06, "loss": 0.6093, "step": 1281 }, { "epoch": 0.1, "grad_norm": 3.8045104779810845, "learning_rate": 9.856784157759525e-06, "loss": 0.7633, "step": 1282 }, { "epoch": 0.1, "grad_norm": 4.084174277425293, "learning_rate": 9.85647142785953e-06, "loss": 0.8884, "step": 1283 }, { "epoch": 0.1, "grad_norm": 17.856027150545184, "learning_rate": 9.856158361861132e-06, "loss": 0.6974, "step": 1284 }, { "epoch": 0.1, "grad_norm": 3.0808533240744116, "learning_rate": 9.855844959786e-06, "loss": 0.6732, "step": 1285 }, { "epoch": 0.1, "grad_norm": 3.130692814480986, "learning_rate": 9.85553122165582e-06, "loss": 0.7204, "step": 1286 }, { "epoch": 0.1, "grad_norm": 3.622360332815775, "learning_rate": 9.855217147492309e-06, "loss": 0.6865, "step": 1287 }, { "epoch": 0.1, "grad_norm": 4.327705891381432, "learning_rate": 9.854902737317198e-06, "loss": 0.7003, "step": 1288 }, { "epoch": 0.1, "grad_norm": 2.682869651435079, "learning_rate": 9.854587991152249e-06, "loss": 0.5716, "step": 1289 }, { "epoch": 0.1, "grad_norm": 3.423295045112559, "learning_rate": 9.854272909019245e-06, "loss": 0.9297, "step": 1290 }, { "epoch": 0.1, "grad_norm": 4.7457811764820175, "learning_rate": 9.85395749093999e-06, "loss": 0.8619, "step": 1291 }, { "epoch": 0.1, "grad_norm": 3.5570624935048167, "learning_rate": 9.853641736936315e-06, "loss": 0.8548, "step": 1292 }, { "epoch": 0.11, "grad_norm": 6.892499974946268, "learning_rate": 9.853325647030067e-06, "loss": 0.7379, "step": 1293 }, { "epoch": 0.11, "grad_norm": 3.3796158757922488, "learning_rate": 9.853009221243129e-06, "loss": 0.779, "step": 1294 }, { "epoch": 0.11, "grad_norm": 2.47836428277722, "learning_rate": 9.852692459597395e-06, "loss": 0.7652, "step": 1295 }, { "epoch": 0.11, "grad_norm": 2.893773105757726, "learning_rate": 9.852375362114787e-06, "loss": 0.808, "step": 1296 }, { "epoch": 0.11, "grad_norm": 4.927216898149595, "learning_rate": 9.852057928817252e-06, "loss": 0.8106, "step": 1297 }, { "epoch": 0.11, "grad_norm": 3.890251321374567, "learning_rate": 9.851740159726755e-06, "loss": 0.6629, "step": 1298 }, { "epoch": 0.11, "grad_norm": 5.677072589303417, "learning_rate": 9.851422054865292e-06, "loss": 0.8141, "step": 1299 }, { "epoch": 0.11, "grad_norm": 3.0892354294650444, "learning_rate": 9.851103614254874e-06, "loss": 0.6687, "step": 1300 }, { "epoch": 0.11, "grad_norm": 7.01215476816205, "learning_rate": 9.850784837917541e-06, "loss": 0.7888, "step": 1301 }, { "epoch": 0.11, "grad_norm": 3.10074205471101, "learning_rate": 9.850465725875356e-06, "loss": 0.7357, "step": 1302 }, { "epoch": 0.11, "grad_norm": 8.474427065852185, "learning_rate": 9.8501462781504e-06, "loss": 0.6398, "step": 1303 }, { "epoch": 0.11, "grad_norm": 3.8797306602969823, "learning_rate": 9.849826494764783e-06, "loss": 0.682, "step": 1304 }, { "epoch": 0.11, "grad_norm": 3.1228451636841297, "learning_rate": 9.849506375740637e-06, "loss": 0.6532, "step": 1305 }, { "epoch": 0.11, "grad_norm": 5.058043253431571, "learning_rate": 9.849185921100111e-06, "loss": 0.7395, "step": 1306 }, { "epoch": 0.11, "grad_norm": 3.884543817228299, "learning_rate": 9.84886513086539e-06, "loss": 0.7521, "step": 1307 }, { "epoch": 0.11, "grad_norm": 3.7404597676394116, "learning_rate": 9.848544005058668e-06, "loss": 0.6452, "step": 1308 }, { "epoch": 0.11, "grad_norm": 4.03529209310363, "learning_rate": 9.848222543702175e-06, "loss": 0.8362, "step": 1309 }, { "epoch": 0.11, "grad_norm": 6.319576305206275, "learning_rate": 9.847900746818153e-06, "loss": 0.8138, "step": 1310 }, { "epoch": 0.11, "grad_norm": 8.301016651406, "learning_rate": 9.847578614428874e-06, "loss": 0.7712, "step": 1311 }, { "epoch": 0.11, "grad_norm": 4.557192462840488, "learning_rate": 9.847256146556633e-06, "loss": 0.9223, "step": 1312 }, { "epoch": 0.11, "grad_norm": 3.7930994281395516, "learning_rate": 9.846933343223746e-06, "loss": 0.636, "step": 1313 }, { "epoch": 0.11, "grad_norm": 2.859412704473073, "learning_rate": 9.846610204452553e-06, "loss": 0.6062, "step": 1314 }, { "epoch": 0.11, "grad_norm": 5.643984249036258, "learning_rate": 9.846286730265418e-06, "loss": 0.8242, "step": 1315 }, { "epoch": 0.11, "grad_norm": 2.987864541937237, "learning_rate": 9.845962920684723e-06, "loss": 0.766, "step": 1316 }, { "epoch": 0.11, "grad_norm": 2.9479138581079813, "learning_rate": 9.845638775732883e-06, "loss": 0.9124, "step": 1317 }, { "epoch": 0.11, "grad_norm": 3.045048700796494, "learning_rate": 9.845314295432331e-06, "loss": 0.6485, "step": 1318 }, { "epoch": 0.11, "grad_norm": 5.320703422437716, "learning_rate": 9.844989479805521e-06, "loss": 0.7124, "step": 1319 }, { "epoch": 0.11, "grad_norm": 4.019287745659873, "learning_rate": 9.844664328874928e-06, "loss": 0.7201, "step": 1320 }, { "epoch": 0.11, "grad_norm": 3.225188193685589, "learning_rate": 9.844338842663064e-06, "loss": 0.6698, "step": 1321 }, { "epoch": 0.11, "grad_norm": 5.893974502105138, "learning_rate": 9.844013021192447e-06, "loss": 0.5956, "step": 1322 }, { "epoch": 0.11, "grad_norm": 3.0719570522862183, "learning_rate": 9.84368686448563e-06, "loss": 0.8989, "step": 1323 }, { "epoch": 0.11, "grad_norm": 2.9436413276156683, "learning_rate": 9.84336037256518e-06, "loss": 0.5827, "step": 1324 }, { "epoch": 0.11, "grad_norm": 4.851365162956393, "learning_rate": 9.8430335454537e-06, "loss": 0.7802, "step": 1325 }, { "epoch": 0.11, "grad_norm": 4.1196957284703695, "learning_rate": 9.842706383173803e-06, "loss": 0.9247, "step": 1326 }, { "epoch": 0.11, "grad_norm": 5.032915178173537, "learning_rate": 9.842378885748132e-06, "loss": 0.6944, "step": 1327 }, { "epoch": 0.11, "grad_norm": 3.393978549846444, "learning_rate": 9.842051053199352e-06, "loss": 0.6831, "step": 1328 }, { "epoch": 0.11, "grad_norm": 3.7570707697960333, "learning_rate": 9.84172288555015e-06, "loss": 0.7007, "step": 1329 }, { "epoch": 0.11, "grad_norm": 3.0620712391739846, "learning_rate": 9.84139438282324e-06, "loss": 0.7323, "step": 1330 }, { "epoch": 0.11, "grad_norm": 4.2511517165266115, "learning_rate": 9.841065545041353e-06, "loss": 0.6887, "step": 1331 }, { "epoch": 0.11, "grad_norm": 4.79888414711986, "learning_rate": 9.84073637222725e-06, "loss": 0.8734, "step": 1332 }, { "epoch": 0.11, "grad_norm": 4.1563644388943235, "learning_rate": 9.84040686440371e-06, "loss": 0.7673, "step": 1333 }, { "epoch": 0.11, "grad_norm": 4.282574467026865, "learning_rate": 9.840077021593538e-06, "loss": 0.7938, "step": 1334 }, { "epoch": 0.11, "grad_norm": 4.164066486128604, "learning_rate": 9.83974684381956e-06, "loss": 0.7682, "step": 1335 }, { "epoch": 0.11, "grad_norm": 3.592944111014293, "learning_rate": 9.839416331104625e-06, "loss": 0.7271, "step": 1336 }, { "epoch": 0.11, "grad_norm": 3.9796280426402593, "learning_rate": 9.83908548347161e-06, "loss": 0.5406, "step": 1337 }, { "epoch": 0.11, "grad_norm": 4.656920266592827, "learning_rate": 9.838754300943409e-06, "loss": 0.6151, "step": 1338 }, { "epoch": 0.11, "grad_norm": 4.074963637615123, "learning_rate": 9.838422783542945e-06, "loss": 0.8296, "step": 1339 }, { "epoch": 0.11, "grad_norm": 18.23943022448206, "learning_rate": 9.838090931293158e-06, "loss": 0.7592, "step": 1340 }, { "epoch": 0.11, "grad_norm": 3.2824548550695245, "learning_rate": 9.837758744217016e-06, "loss": 0.6474, "step": 1341 }, { "epoch": 0.11, "grad_norm": 3.0149532039872264, "learning_rate": 9.837426222337507e-06, "loss": 0.7854, "step": 1342 }, { "epoch": 0.11, "grad_norm": 5.185941002706444, "learning_rate": 9.837093365677644e-06, "loss": 0.7233, "step": 1343 }, { "epoch": 0.11, "grad_norm": 3.4361284649067456, "learning_rate": 9.836760174260465e-06, "loss": 0.4904, "step": 1344 }, { "epoch": 0.11, "grad_norm": 3.2142201547203877, "learning_rate": 9.836426648109025e-06, "loss": 0.8148, "step": 1345 }, { "epoch": 0.11, "grad_norm": 3.2303286304956362, "learning_rate": 9.83609278724641e-06, "loss": 0.6964, "step": 1346 }, { "epoch": 0.11, "grad_norm": 4.265667483490254, "learning_rate": 9.835758591695723e-06, "loss": 0.9437, "step": 1347 }, { "epoch": 0.11, "grad_norm": 2.802017275522471, "learning_rate": 9.835424061480094e-06, "loss": 0.9152, "step": 1348 }, { "epoch": 0.11, "grad_norm": 4.469253067501435, "learning_rate": 9.835089196622671e-06, "loss": 0.8302, "step": 1349 }, { "epoch": 0.11, "grad_norm": 2.3663676695982256, "learning_rate": 9.834753997146633e-06, "loss": 0.7885, "step": 1350 }, { "epoch": 0.11, "grad_norm": 4.681566637271748, "learning_rate": 9.834418463075177e-06, "loss": 0.7215, "step": 1351 }, { "epoch": 0.11, "grad_norm": 2.587681371757967, "learning_rate": 9.834082594431522e-06, "loss": 0.8074, "step": 1352 }, { "epoch": 0.11, "grad_norm": 4.609762766154074, "learning_rate": 9.833746391238916e-06, "loss": 0.7177, "step": 1353 }, { "epoch": 0.11, "grad_norm": 5.864333074469486, "learning_rate": 9.833409853520621e-06, "loss": 0.7291, "step": 1354 }, { "epoch": 0.11, "grad_norm": 4.222122466097027, "learning_rate": 9.833072981299932e-06, "loss": 0.6248, "step": 1355 }, { "epoch": 0.11, "grad_norm": 4.914777826483462, "learning_rate": 9.83273577460016e-06, "loss": 0.8035, "step": 1356 }, { "epoch": 0.11, "grad_norm": 4.21583162734347, "learning_rate": 9.832398233444644e-06, "loss": 0.6882, "step": 1357 }, { "epoch": 0.11, "grad_norm": 5.892542672832988, "learning_rate": 9.832060357856744e-06, "loss": 0.7175, "step": 1358 }, { "epoch": 0.11, "grad_norm": 6.650533048362267, "learning_rate": 9.83172214785984e-06, "loss": 0.7258, "step": 1359 }, { "epoch": 0.11, "grad_norm": 6.035461930987028, "learning_rate": 9.83138360347734e-06, "loss": 0.8521, "step": 1360 }, { "epoch": 0.11, "grad_norm": 3.8104720262286644, "learning_rate": 9.831044724732675e-06, "loss": 0.7937, "step": 1361 }, { "epoch": 0.11, "grad_norm": 3.9715382675032522, "learning_rate": 9.830705511649297e-06, "loss": 0.7865, "step": 1362 }, { "epoch": 0.11, "grad_norm": 12.693449097841741, "learning_rate": 9.83036596425068e-06, "loss": 0.91, "step": 1363 }, { "epoch": 0.11, "grad_norm": 10.523822309578007, "learning_rate": 9.830026082560324e-06, "loss": 0.774, "step": 1364 }, { "epoch": 0.11, "grad_norm": 5.1513051221823885, "learning_rate": 9.82968586660175e-06, "loss": 0.8242, "step": 1365 }, { "epoch": 0.11, "grad_norm": 5.508823435580966, "learning_rate": 9.829345316398504e-06, "loss": 0.7704, "step": 1366 }, { "epoch": 0.11, "grad_norm": 7.2015623358486245, "learning_rate": 9.829004431974155e-06, "loss": 0.8457, "step": 1367 }, { "epoch": 0.11, "grad_norm": 8.020546139595686, "learning_rate": 9.828663213352294e-06, "loss": 0.7546, "step": 1368 }, { "epoch": 0.11, "grad_norm": 3.912217681032362, "learning_rate": 9.828321660556533e-06, "loss": 0.8607, "step": 1369 }, { "epoch": 0.11, "grad_norm": 6.139422916838143, "learning_rate": 9.827979773610513e-06, "loss": 0.6751, "step": 1370 }, { "epoch": 0.11, "grad_norm": 3.9158330917625546, "learning_rate": 9.827637552537893e-06, "loss": 0.774, "step": 1371 }, { "epoch": 0.11, "grad_norm": 4.447669477789021, "learning_rate": 9.827294997362354e-06, "loss": 0.6145, "step": 1372 }, { "epoch": 0.11, "grad_norm": 3.6150675739777958, "learning_rate": 9.82695210810761e-06, "loss": 0.7509, "step": 1373 }, { "epoch": 0.11, "grad_norm": 2.9606610833748537, "learning_rate": 9.826608884797385e-06, "loss": 0.7393, "step": 1374 }, { "epoch": 0.11, "grad_norm": 3.1563997092642953, "learning_rate": 9.826265327455435e-06, "loss": 0.825, "step": 1375 }, { "epoch": 0.11, "grad_norm": 2.8612181489399884, "learning_rate": 9.825921436105534e-06, "loss": 0.6644, "step": 1376 }, { "epoch": 0.11, "grad_norm": 4.815457879604574, "learning_rate": 9.825577210771486e-06, "loss": 0.755, "step": 1377 }, { "epoch": 0.11, "grad_norm": 3.6836468187364555, "learning_rate": 9.825232651477109e-06, "loss": 0.7721, "step": 1378 }, { "epoch": 0.11, "grad_norm": 3.726206008686008, "learning_rate": 9.824887758246252e-06, "loss": 0.6431, "step": 1379 }, { "epoch": 0.11, "grad_norm": 4.978849678161101, "learning_rate": 9.824542531102779e-06, "loss": 0.7141, "step": 1380 }, { "epoch": 0.11, "grad_norm": 5.277986474909162, "learning_rate": 9.824196970070587e-06, "loss": 0.7952, "step": 1381 }, { "epoch": 0.11, "grad_norm": 4.211976656945047, "learning_rate": 9.82385107517359e-06, "loss": 0.7929, "step": 1382 }, { "epoch": 0.11, "grad_norm": 3.6968624569747814, "learning_rate": 9.823504846435722e-06, "loss": 0.7881, "step": 1383 }, { "epoch": 0.11, "grad_norm": 8.60074925097063, "learning_rate": 9.823158283880949e-06, "loss": 0.9359, "step": 1384 }, { "epoch": 0.11, "grad_norm": 3.504318647545033, "learning_rate": 9.822811387533256e-06, "loss": 0.882, "step": 1385 }, { "epoch": 0.11, "grad_norm": 2.756782688733998, "learning_rate": 9.822464157416644e-06, "loss": 0.5845, "step": 1386 }, { "epoch": 0.11, "grad_norm": 2.8280030843040076, "learning_rate": 9.82211659355515e-06, "loss": 0.6769, "step": 1387 }, { "epoch": 0.11, "grad_norm": 4.460742776352826, "learning_rate": 9.821768695972824e-06, "loss": 0.866, "step": 1388 }, { "epoch": 0.11, "grad_norm": 6.930601287950963, "learning_rate": 9.821420464693746e-06, "loss": 0.6968, "step": 1389 }, { "epoch": 0.11, "grad_norm": 4.359791930361724, "learning_rate": 9.821071899742012e-06, "loss": 0.7572, "step": 1390 }, { "epoch": 0.11, "grad_norm": 5.2867918000178085, "learning_rate": 9.820723001141746e-06, "loss": 0.7458, "step": 1391 }, { "epoch": 0.11, "grad_norm": 3.1276764961494656, "learning_rate": 9.820373768917095e-06, "loss": 0.8062, "step": 1392 }, { "epoch": 0.11, "grad_norm": 2.7633204537476272, "learning_rate": 9.820024203092229e-06, "loss": 0.6961, "step": 1393 }, { "epoch": 0.11, "grad_norm": 4.7491626303353955, "learning_rate": 9.819674303691338e-06, "loss": 0.9546, "step": 1394 }, { "epoch": 0.11, "grad_norm": 3.8720420922962315, "learning_rate": 9.819324070738637e-06, "loss": 0.6164, "step": 1395 }, { "epoch": 0.11, "grad_norm": 5.216852710886374, "learning_rate": 9.818973504258366e-06, "loss": 0.8797, "step": 1396 }, { "epoch": 0.11, "grad_norm": 3.3214315322637327, "learning_rate": 9.818622604274785e-06, "loss": 0.8203, "step": 1397 }, { "epoch": 0.11, "grad_norm": 4.427877516698093, "learning_rate": 9.81827137081218e-06, "loss": 0.7682, "step": 1398 }, { "epoch": 0.11, "grad_norm": 2.36370016904225, "learning_rate": 9.817919803894857e-06, "loss": 0.6713, "step": 1399 }, { "epoch": 0.11, "grad_norm": 4.600179590731129, "learning_rate": 9.81756790354715e-06, "loss": 0.7642, "step": 1400 }, { "epoch": 0.11, "grad_norm": 2.814208092833744, "learning_rate": 9.817215669793408e-06, "loss": 0.7536, "step": 1401 }, { "epoch": 0.11, "grad_norm": 6.274969998529941, "learning_rate": 9.81686310265801e-06, "loss": 0.5873, "step": 1402 }, { "epoch": 0.11, "grad_norm": 5.933085071967237, "learning_rate": 9.816510202165357e-06, "loss": 0.8715, "step": 1403 }, { "epoch": 0.11, "grad_norm": 2.9930877783172627, "learning_rate": 9.81615696833987e-06, "loss": 0.8511, "step": 1404 }, { "epoch": 0.11, "grad_norm": 10.18151334316766, "learning_rate": 9.815803401205995e-06, "loss": 0.7355, "step": 1405 }, { "epoch": 0.11, "grad_norm": 4.907339405641265, "learning_rate": 9.815449500788203e-06, "loss": 0.7398, "step": 1406 }, { "epoch": 0.11, "grad_norm": 2.2582056261583925, "learning_rate": 9.815095267110983e-06, "loss": 0.5298, "step": 1407 }, { "epoch": 0.11, "grad_norm": 2.873666475256101, "learning_rate": 9.814740700198855e-06, "loss": 0.7106, "step": 1408 }, { "epoch": 0.11, "grad_norm": 9.515688970151894, "learning_rate": 9.814385800076352e-06, "loss": 0.7679, "step": 1409 }, { "epoch": 0.11, "grad_norm": 2.645885120179381, "learning_rate": 9.814030566768041e-06, "loss": 0.6623, "step": 1410 }, { "epoch": 0.11, "grad_norm": 4.704194459795749, "learning_rate": 9.8136750002985e-06, "loss": 0.6011, "step": 1411 }, { "epoch": 0.11, "grad_norm": 3.2039376023315893, "learning_rate": 9.81331910069234e-06, "loss": 0.5915, "step": 1412 }, { "epoch": 0.11, "grad_norm": 6.431963012416284, "learning_rate": 9.812962867974192e-06, "loss": 0.6776, "step": 1413 }, { "epoch": 0.11, "grad_norm": 4.136702986877205, "learning_rate": 9.812606302168709e-06, "loss": 0.6939, "step": 1414 }, { "epoch": 0.11, "grad_norm": 4.9833394954813, "learning_rate": 9.812249403300565e-06, "loss": 0.8666, "step": 1415 }, { "epoch": 0.12, "grad_norm": 4.354514489605485, "learning_rate": 9.811892171394464e-06, "loss": 0.7489, "step": 1416 }, { "epoch": 0.12, "grad_norm": 2.8001839948301606, "learning_rate": 9.811534606475127e-06, "loss": 0.8282, "step": 1417 }, { "epoch": 0.12, "grad_norm": 7.215447239892779, "learning_rate": 9.811176708567295e-06, "loss": 0.6568, "step": 1418 }, { "epoch": 0.12, "grad_norm": 4.437516725787354, "learning_rate": 9.810818477695745e-06, "loss": 0.7617, "step": 1419 }, { "epoch": 0.12, "grad_norm": 3.1178580634854196, "learning_rate": 9.810459913885265e-06, "loss": 0.8128, "step": 1420 }, { "epoch": 0.12, "grad_norm": 2.398401994492875, "learning_rate": 9.81010101716067e-06, "loss": 0.6911, "step": 1421 }, { "epoch": 0.12, "grad_norm": 6.21656955629377, "learning_rate": 9.809741787546797e-06, "loss": 0.6232, "step": 1422 }, { "epoch": 0.12, "grad_norm": 3.3665971922059055, "learning_rate": 9.809382225068506e-06, "loss": 0.7555, "step": 1423 }, { "epoch": 0.12, "grad_norm": 4.180964991051591, "learning_rate": 9.809022329750684e-06, "loss": 0.7165, "step": 1424 }, { "epoch": 0.12, "grad_norm": 5.513670951307397, "learning_rate": 9.808662101618237e-06, "loss": 0.7134, "step": 1425 }, { "epoch": 0.12, "grad_norm": 5.059842872677863, "learning_rate": 9.808301540696094e-06, "loss": 0.7651, "step": 1426 }, { "epoch": 0.12, "grad_norm": 4.817648716935748, "learning_rate": 9.80794064700921e-06, "loss": 0.815, "step": 1427 }, { "epoch": 0.12, "grad_norm": 2.7239279947800816, "learning_rate": 9.807579420582558e-06, "loss": 0.7762, "step": 1428 }, { "epoch": 0.12, "grad_norm": 3.13670492941162, "learning_rate": 9.80721786144114e-06, "loss": 0.6804, "step": 1429 }, { "epoch": 0.12, "grad_norm": 7.517447696207459, "learning_rate": 9.806855969609978e-06, "loss": 0.8434, "step": 1430 }, { "epoch": 0.12, "grad_norm": 2.8457699893758757, "learning_rate": 9.806493745114117e-06, "loss": 0.7085, "step": 1431 }, { "epoch": 0.12, "grad_norm": 7.218995933348008, "learning_rate": 9.806131187978623e-06, "loss": 0.7737, "step": 1432 }, { "epoch": 0.12, "grad_norm": 4.243621549976317, "learning_rate": 9.805768298228589e-06, "loss": 0.6403, "step": 1433 }, { "epoch": 0.12, "grad_norm": 3.887382517738277, "learning_rate": 9.805405075889129e-06, "loss": 0.903, "step": 1434 }, { "epoch": 0.12, "grad_norm": 3.3395693112018314, "learning_rate": 9.805041520985382e-06, "loss": 0.7278, "step": 1435 }, { "epoch": 0.12, "grad_norm": 5.903451235150011, "learning_rate": 9.804677633542506e-06, "loss": 0.626, "step": 1436 }, { "epoch": 0.12, "grad_norm": 3.818325992960267, "learning_rate": 9.804313413585684e-06, "loss": 0.6567, "step": 1437 }, { "epoch": 0.12, "grad_norm": 4.724132711115311, "learning_rate": 9.803948861140124e-06, "loss": 0.6915, "step": 1438 }, { "epoch": 0.12, "grad_norm": 3.7630648843591423, "learning_rate": 9.803583976231054e-06, "loss": 0.7323, "step": 1439 }, { "epoch": 0.12, "grad_norm": 4.97228214912387, "learning_rate": 9.80321875888373e-06, "loss": 0.7567, "step": 1440 }, { "epoch": 0.12, "grad_norm": 3.9839792879030806, "learning_rate": 9.802853209123421e-06, "loss": 0.771, "step": 1441 }, { "epoch": 0.12, "grad_norm": 3.647333773792597, "learning_rate": 9.80248732697543e-06, "loss": 0.6099, "step": 1442 }, { "epoch": 0.12, "grad_norm": 4.183808026716674, "learning_rate": 9.802121112465075e-06, "loss": 0.6127, "step": 1443 }, { "epoch": 0.12, "grad_norm": 3.346608070926879, "learning_rate": 9.801754565617705e-06, "loss": 0.9237, "step": 1444 }, { "epoch": 0.12, "grad_norm": 3.2752442217734035, "learning_rate": 9.801387686458684e-06, "loss": 0.7318, "step": 1445 }, { "epoch": 0.12, "grad_norm": 3.7808130574377232, "learning_rate": 9.801020475013403e-06, "loss": 0.7514, "step": 1446 }, { "epoch": 0.12, "grad_norm": 4.443561104607242, "learning_rate": 9.800652931307275e-06, "loss": 0.7709, "step": 1447 }, { "epoch": 0.12, "grad_norm": 4.498737791381994, "learning_rate": 9.800285055365737e-06, "loss": 0.7431, "step": 1448 }, { "epoch": 0.12, "grad_norm": 5.20600439260958, "learning_rate": 9.799916847214247e-06, "loss": 0.7441, "step": 1449 }, { "epoch": 0.12, "grad_norm": 3.634669244965309, "learning_rate": 9.79954830687829e-06, "loss": 0.8426, "step": 1450 }, { "epoch": 0.12, "grad_norm": 5.8541970597774355, "learning_rate": 9.79917943438337e-06, "loss": 0.6051, "step": 1451 }, { "epoch": 0.12, "grad_norm": 5.608197206055959, "learning_rate": 9.798810229755013e-06, "loss": 0.6145, "step": 1452 }, { "epoch": 0.12, "grad_norm": 3.6834657894994485, "learning_rate": 9.798440693018773e-06, "loss": 0.6379, "step": 1453 }, { "epoch": 0.12, "grad_norm": 5.896879105166612, "learning_rate": 9.798070824200225e-06, "loss": 0.5685, "step": 1454 }, { "epoch": 0.12, "grad_norm": 4.336016558477347, "learning_rate": 9.797700623324964e-06, "loss": 0.9523, "step": 1455 }, { "epoch": 0.12, "grad_norm": 5.517274002876056, "learning_rate": 9.797330090418611e-06, "loss": 0.7818, "step": 1456 }, { "epoch": 0.12, "grad_norm": 4.823655865529944, "learning_rate": 9.796959225506809e-06, "loss": 0.8967, "step": 1457 }, { "epoch": 0.12, "grad_norm": 3.083259607958461, "learning_rate": 9.796588028615225e-06, "loss": 0.7802, "step": 1458 }, { "epoch": 0.12, "grad_norm": 4.985306766786993, "learning_rate": 9.796216499769546e-06, "loss": 0.8343, "step": 1459 }, { "epoch": 0.12, "grad_norm": 3.6340494882707577, "learning_rate": 9.795844638995488e-06, "loss": 0.8408, "step": 1460 }, { "epoch": 0.12, "grad_norm": 3.676162084189054, "learning_rate": 9.795472446318783e-06, "loss": 0.8181, "step": 1461 }, { "epoch": 0.12, "grad_norm": 3.039025258973637, "learning_rate": 9.79509992176519e-06, "loss": 0.6969, "step": 1462 }, { "epoch": 0.12, "grad_norm": 3.1368968951337015, "learning_rate": 9.79472706536049e-06, "loss": 0.709, "step": 1463 }, { "epoch": 0.12, "grad_norm": 5.107027935878554, "learning_rate": 9.794353877130486e-06, "loss": 0.8323, "step": 1464 }, { "epoch": 0.12, "grad_norm": 2.6985355905232855, "learning_rate": 9.793980357101007e-06, "loss": 0.6948, "step": 1465 }, { "epoch": 0.12, "grad_norm": 3.520592408347667, "learning_rate": 9.793606505297901e-06, "loss": 0.8019, "step": 1466 }, { "epoch": 0.12, "grad_norm": 6.025873076477271, "learning_rate": 9.793232321747041e-06, "loss": 0.7294, "step": 1467 }, { "epoch": 0.12, "grad_norm": 8.228704305364857, "learning_rate": 9.792857806474326e-06, "loss": 0.839, "step": 1468 }, { "epoch": 0.12, "grad_norm": 5.283289796361565, "learning_rate": 9.79248295950567e-06, "loss": 0.7583, "step": 1469 }, { "epoch": 0.12, "grad_norm": 3.2365232592925026, "learning_rate": 9.79210778086702e-06, "loss": 0.7232, "step": 1470 }, { "epoch": 0.12, "grad_norm": 3.5610705930577806, "learning_rate": 9.791732270584337e-06, "loss": 0.7624, "step": 1471 }, { "epoch": 0.12, "grad_norm": 71.87344968782362, "learning_rate": 9.791356428683609e-06, "loss": 0.6685, "step": 1472 }, { "epoch": 0.12, "grad_norm": 2.6023200462718195, "learning_rate": 9.790980255190848e-06, "loss": 0.6391, "step": 1473 }, { "epoch": 0.12, "grad_norm": 6.123095231800686, "learning_rate": 9.790603750132086e-06, "loss": 0.7494, "step": 1474 }, { "epoch": 0.12, "grad_norm": 2.7949851830378085, "learning_rate": 9.790226913533381e-06, "loss": 0.861, "step": 1475 }, { "epoch": 0.12, "grad_norm": 3.8635255046440697, "learning_rate": 9.789849745420811e-06, "loss": 0.7404, "step": 1476 }, { "epoch": 0.12, "grad_norm": 2.7249795024375283, "learning_rate": 9.78947224582048e-06, "loss": 0.7155, "step": 1477 }, { "epoch": 0.12, "grad_norm": 4.319728234195847, "learning_rate": 9.789094414758512e-06, "loss": 0.7326, "step": 1478 }, { "epoch": 0.12, "grad_norm": 3.9541784743255035, "learning_rate": 9.788716252261057e-06, "loss": 0.9948, "step": 1479 }, { "epoch": 0.12, "grad_norm": 3.6481944948746183, "learning_rate": 9.788337758354283e-06, "loss": 0.6686, "step": 1480 }, { "epoch": 0.12, "grad_norm": 4.33823988503373, "learning_rate": 9.787958933064388e-06, "loss": 0.7917, "step": 1481 }, { "epoch": 0.12, "grad_norm": 3.659800176796344, "learning_rate": 9.787579776417588e-06, "loss": 0.946, "step": 1482 }, { "epoch": 0.12, "grad_norm": 12.986213911061727, "learning_rate": 9.78720028844012e-06, "loss": 0.7908, "step": 1483 }, { "epoch": 0.12, "grad_norm": 6.70633643922088, "learning_rate": 9.786820469158252e-06, "loss": 0.7295, "step": 1484 }, { "epoch": 0.12, "grad_norm": 3.3918464495622973, "learning_rate": 9.786440318598264e-06, "loss": 0.8559, "step": 1485 }, { "epoch": 0.12, "grad_norm": 6.207525068754861, "learning_rate": 9.78605983678647e-06, "loss": 0.6354, "step": 1486 }, { "epoch": 0.12, "grad_norm": 3.550985950678446, "learning_rate": 9.7856790237492e-06, "loss": 0.7395, "step": 1487 }, { "epoch": 0.12, "grad_norm": 4.255351552300113, "learning_rate": 9.785297879512808e-06, "loss": 0.6068, "step": 1488 }, { "epoch": 0.12, "grad_norm": 3.828040458636102, "learning_rate": 9.784916404103673e-06, "loss": 0.7176, "step": 1489 }, { "epoch": 0.12, "grad_norm": 4.162791265980011, "learning_rate": 9.784534597548194e-06, "loss": 0.6831, "step": 1490 }, { "epoch": 0.12, "grad_norm": 3.5414114522811593, "learning_rate": 9.784152459872794e-06, "loss": 0.8602, "step": 1491 }, { "epoch": 0.12, "grad_norm": 3.6330627544278062, "learning_rate": 9.78376999110392e-06, "loss": 0.7169, "step": 1492 }, { "epoch": 0.12, "grad_norm": 3.4462182761143665, "learning_rate": 9.783387191268044e-06, "loss": 0.6611, "step": 1493 }, { "epoch": 0.12, "grad_norm": 4.12205422268827, "learning_rate": 9.783004060391652e-06, "loss": 0.8308, "step": 1494 }, { "epoch": 0.12, "grad_norm": 6.544013277773115, "learning_rate": 9.782620598501264e-06, "loss": 0.9487, "step": 1495 }, { "epoch": 0.12, "grad_norm": 5.2842059634444905, "learning_rate": 9.782236805623418e-06, "loss": 0.8053, "step": 1496 }, { "epoch": 0.12, "grad_norm": 3.012914193091309, "learning_rate": 9.781852681784674e-06, "loss": 0.7071, "step": 1497 }, { "epoch": 0.12, "grad_norm": 2.9169832886546394, "learning_rate": 9.781468227011615e-06, "loss": 0.8165, "step": 1498 }, { "epoch": 0.12, "grad_norm": 19.654348234412605, "learning_rate": 9.781083441330846e-06, "loss": 0.8418, "step": 1499 }, { "epoch": 0.12, "grad_norm": 3.0636544844751388, "learning_rate": 9.780698324769e-06, "loss": 0.7762, "step": 1500 }, { "epoch": 0.12, "grad_norm": 2.803171298191634, "learning_rate": 9.780312877352728e-06, "loss": 0.9643, "step": 1501 }, { "epoch": 0.12, "grad_norm": 4.879320141032356, "learning_rate": 9.779927099108708e-06, "loss": 0.6837, "step": 1502 }, { "epoch": 0.12, "grad_norm": 3.498759066490165, "learning_rate": 9.779540990063632e-06, "loss": 0.6386, "step": 1503 }, { "epoch": 0.12, "grad_norm": 3.484009818687112, "learning_rate": 9.779154550244228e-06, "loss": 0.8281, "step": 1504 }, { "epoch": 0.12, "grad_norm": 4.125103255513952, "learning_rate": 9.778767779677235e-06, "loss": 0.5755, "step": 1505 }, { "epoch": 0.12, "grad_norm": 13.7832085656859, "learning_rate": 9.778380678389422e-06, "loss": 0.7407, "step": 1506 }, { "epoch": 0.12, "grad_norm": 7.906356438572428, "learning_rate": 9.77799324640758e-06, "loss": 0.8034, "step": 1507 }, { "epoch": 0.12, "grad_norm": 4.80487227646703, "learning_rate": 9.77760548375852e-06, "loss": 0.7786, "step": 1508 }, { "epoch": 0.12, "grad_norm": 6.504099533589149, "learning_rate": 9.77721739046908e-06, "loss": 0.8263, "step": 1509 }, { "epoch": 0.12, "grad_norm": 7.899753141912911, "learning_rate": 9.776828966566114e-06, "loss": 0.6274, "step": 1510 }, { "epoch": 0.12, "grad_norm": 3.1207034813087717, "learning_rate": 9.776440212076507e-06, "loss": 0.7159, "step": 1511 }, { "epoch": 0.12, "grad_norm": 7.212287823348495, "learning_rate": 9.776051127027165e-06, "loss": 0.6522, "step": 1512 }, { "epoch": 0.12, "grad_norm": 3.7861539770247647, "learning_rate": 9.775661711445009e-06, "loss": 0.6826, "step": 1513 }, { "epoch": 0.12, "grad_norm": 3.5804504967248585, "learning_rate": 9.775271965356994e-06, "loss": 0.7533, "step": 1514 }, { "epoch": 0.12, "grad_norm": 3.7781668816987017, "learning_rate": 9.774881888790091e-06, "loss": 0.7047, "step": 1515 }, { "epoch": 0.12, "grad_norm": 3.255518383570825, "learning_rate": 9.774491481771296e-06, "loss": 0.7052, "step": 1516 }, { "epoch": 0.12, "grad_norm": 3.5353756843084536, "learning_rate": 9.774100744327628e-06, "loss": 0.8397, "step": 1517 }, { "epoch": 0.12, "grad_norm": 3.3009173636756506, "learning_rate": 9.77370967648613e-06, "loss": 0.7117, "step": 1518 }, { "epoch": 0.12, "grad_norm": 6.357232559699015, "learning_rate": 9.773318278273862e-06, "loss": 0.615, "step": 1519 }, { "epoch": 0.12, "grad_norm": 3.811067317360187, "learning_rate": 9.772926549717915e-06, "loss": 0.8086, "step": 1520 }, { "epoch": 0.12, "grad_norm": 2.9257194337598103, "learning_rate": 9.772534490845398e-06, "loss": 0.8217, "step": 1521 }, { "epoch": 0.12, "grad_norm": 4.239608551099964, "learning_rate": 9.772142101683443e-06, "loss": 0.6287, "step": 1522 }, { "epoch": 0.12, "grad_norm": 4.414276247472141, "learning_rate": 9.771749382259209e-06, "loss": 0.6379, "step": 1523 }, { "epoch": 0.12, "grad_norm": 4.410868529754491, "learning_rate": 9.771356332599868e-06, "loss": 0.8062, "step": 1524 }, { "epoch": 0.12, "grad_norm": 5.476402461146055, "learning_rate": 9.77096295273263e-06, "loss": 0.7956, "step": 1525 }, { "epoch": 0.12, "grad_norm": 3.1821749729809983, "learning_rate": 9.770569242684714e-06, "loss": 0.7438, "step": 1526 }, { "epoch": 0.12, "grad_norm": 2.626853742899924, "learning_rate": 9.770175202483367e-06, "loss": 0.6873, "step": 1527 }, { "epoch": 0.12, "grad_norm": 2.9209283050335193, "learning_rate": 9.769780832155862e-06, "loss": 0.6846, "step": 1528 }, { "epoch": 0.12, "grad_norm": 6.925387010633106, "learning_rate": 9.76938613172949e-06, "loss": 0.7209, "step": 1529 }, { "epoch": 0.12, "grad_norm": 4.953243123104708, "learning_rate": 9.768991101231567e-06, "loss": 0.7806, "step": 1530 }, { "epoch": 0.12, "grad_norm": 3.7479085895488358, "learning_rate": 9.768595740689432e-06, "loss": 0.7354, "step": 1531 }, { "epoch": 0.12, "grad_norm": 4.029064129851027, "learning_rate": 9.768200050130446e-06, "loss": 0.5923, "step": 1532 }, { "epoch": 0.12, "grad_norm": 3.345727472277948, "learning_rate": 9.767804029581993e-06, "loss": 0.6241, "step": 1533 }, { "epoch": 0.12, "grad_norm": 8.19472531105154, "learning_rate": 9.767407679071482e-06, "loss": 0.7253, "step": 1534 }, { "epoch": 0.12, "grad_norm": 3.4116277608646617, "learning_rate": 9.767010998626341e-06, "loss": 0.6667, "step": 1535 }, { "epoch": 0.12, "grad_norm": 6.4904954053061035, "learning_rate": 9.766613988274024e-06, "loss": 0.6583, "step": 1536 }, { "epoch": 0.12, "grad_norm": 3.301838046534502, "learning_rate": 9.766216648042004e-06, "loss": 0.7839, "step": 1537 }, { "epoch": 0.12, "grad_norm": 4.210564392011646, "learning_rate": 9.765818977957781e-06, "loss": 0.7943, "step": 1538 }, { "epoch": 0.12, "grad_norm": 3.478621495963454, "learning_rate": 9.765420978048879e-06, "loss": 0.9659, "step": 1539 }, { "epoch": 0.13, "grad_norm": 3.4065037649780527, "learning_rate": 9.765022648342839e-06, "loss": 0.7844, "step": 1540 }, { "epoch": 0.13, "grad_norm": 3.357295104102806, "learning_rate": 9.764623988867228e-06, "loss": 0.6693, "step": 1541 }, { "epoch": 0.13, "grad_norm": 3.6415067741208635, "learning_rate": 9.764224999649636e-06, "loss": 0.6968, "step": 1542 }, { "epoch": 0.13, "grad_norm": 4.769340923934249, "learning_rate": 9.763825680717679e-06, "loss": 0.7311, "step": 1543 }, { "epoch": 0.13, "grad_norm": 3.2399213836180696, "learning_rate": 9.763426032098986e-06, "loss": 0.8573, "step": 1544 }, { "epoch": 0.13, "grad_norm": 24.764452079320318, "learning_rate": 9.763026053821218e-06, "loss": 0.6742, "step": 1545 }, { "epoch": 0.13, "grad_norm": 3.5012147070447197, "learning_rate": 9.76262574591206e-06, "loss": 0.7019, "step": 1546 }, { "epoch": 0.13, "grad_norm": 7.7074823896995195, "learning_rate": 9.76222510839921e-06, "loss": 0.6455, "step": 1547 }, { "epoch": 0.13, "grad_norm": 3.794748635038636, "learning_rate": 9.761824141310397e-06, "loss": 0.767, "step": 1548 }, { "epoch": 0.13, "grad_norm": 5.048637103372197, "learning_rate": 9.761422844673372e-06, "loss": 0.6651, "step": 1549 }, { "epoch": 0.13, "grad_norm": 4.295827773749372, "learning_rate": 9.761021218515904e-06, "loss": 0.5697, "step": 1550 }, { "epoch": 0.13, "grad_norm": 4.503935681519453, "learning_rate": 9.760619262865792e-06, "loss": 0.7316, "step": 1551 }, { "epoch": 0.13, "grad_norm": 5.0531287369333855, "learning_rate": 9.76021697775085e-06, "loss": 0.7947, "step": 1552 }, { "epoch": 0.13, "grad_norm": 7.187633323599077, "learning_rate": 9.759814363198921e-06, "loss": 0.8353, "step": 1553 }, { "epoch": 0.13, "grad_norm": 3.089252124795491, "learning_rate": 9.759411419237868e-06, "loss": 0.7553, "step": 1554 }, { "epoch": 0.13, "grad_norm": 2.622250229174503, "learning_rate": 9.759008145895577e-06, "loss": 0.6976, "step": 1555 }, { "epoch": 0.13, "grad_norm": 2.3298340353415616, "learning_rate": 9.758604543199957e-06, "loss": 0.607, "step": 1556 }, { "epoch": 0.13, "grad_norm": 2.8074810879103573, "learning_rate": 9.758200611178938e-06, "loss": 0.8519, "step": 1557 }, { "epoch": 0.13, "grad_norm": 2.065575274393397, "learning_rate": 9.757796349860478e-06, "loss": 0.8482, "step": 1558 }, { "epoch": 0.13, "grad_norm": 4.928625489656893, "learning_rate": 9.757391759272554e-06, "loss": 0.7214, "step": 1559 }, { "epoch": 0.13, "grad_norm": 3.32932390498336, "learning_rate": 9.756986839443166e-06, "loss": 0.6417, "step": 1560 }, { "epoch": 0.13, "grad_norm": 4.878711826750918, "learning_rate": 9.756581590400333e-06, "loss": 0.748, "step": 1561 }, { "epoch": 0.13, "grad_norm": 3.4381105533696634, "learning_rate": 9.756176012172107e-06, "loss": 0.7327, "step": 1562 }, { "epoch": 0.13, "grad_norm": 3.260757765571373, "learning_rate": 9.755770104786553e-06, "loss": 0.6864, "step": 1563 }, { "epoch": 0.13, "grad_norm": 3.530872038852194, "learning_rate": 9.755363868271762e-06, "loss": 0.7138, "step": 1564 }, { "epoch": 0.13, "grad_norm": 3.6159314004526673, "learning_rate": 9.75495730265585e-06, "loss": 0.7214, "step": 1565 }, { "epoch": 0.13, "grad_norm": 3.248587499761892, "learning_rate": 9.754550407966952e-06, "loss": 0.721, "step": 1566 }, { "epoch": 0.13, "grad_norm": 4.698375023078237, "learning_rate": 9.754143184233228e-06, "loss": 0.8135, "step": 1567 }, { "epoch": 0.13, "grad_norm": 3.5734272848569355, "learning_rate": 9.753735631482864e-06, "loss": 0.6116, "step": 1568 }, { "epoch": 0.13, "grad_norm": 2.93473976656805, "learning_rate": 9.75332774974406e-06, "loss": 0.7677, "step": 1569 }, { "epoch": 0.13, "grad_norm": 3.681297471107408, "learning_rate": 9.752919539045045e-06, "loss": 0.7641, "step": 1570 }, { "epoch": 0.13, "grad_norm": 3.397010514729807, "learning_rate": 9.752510999414074e-06, "loss": 0.7181, "step": 1571 }, { "epoch": 0.13, "grad_norm": 4.271453857530892, "learning_rate": 9.752102130879416e-06, "loss": 0.694, "step": 1572 }, { "epoch": 0.13, "grad_norm": 6.312327619980903, "learning_rate": 9.75169293346937e-06, "loss": 0.7841, "step": 1573 }, { "epoch": 0.13, "grad_norm": 4.016671233218695, "learning_rate": 9.751283407212253e-06, "loss": 0.7293, "step": 1574 }, { "epoch": 0.13, "grad_norm": 3.6028262358301344, "learning_rate": 9.750873552136407e-06, "loss": 0.8454, "step": 1575 }, { "epoch": 0.13, "grad_norm": 5.9556768900797215, "learning_rate": 9.750463368270198e-06, "loss": 0.7693, "step": 1576 }, { "epoch": 0.13, "grad_norm": 4.865050985543065, "learning_rate": 9.750052855642013e-06, "loss": 0.8567, "step": 1577 }, { "epoch": 0.13, "grad_norm": 2.953356898139655, "learning_rate": 9.749642014280261e-06, "loss": 0.5878, "step": 1578 }, { "epoch": 0.13, "grad_norm": 3.267259520432038, "learning_rate": 9.749230844213375e-06, "loss": 0.7799, "step": 1579 }, { "epoch": 0.13, "grad_norm": 4.099288373331892, "learning_rate": 9.748819345469812e-06, "loss": 0.8095, "step": 1580 }, { "epoch": 0.13, "grad_norm": 5.361025667084002, "learning_rate": 9.748407518078048e-06, "loss": 0.791, "step": 1581 }, { "epoch": 0.13, "grad_norm": 4.136759686537642, "learning_rate": 9.747995362066587e-06, "loss": 0.7257, "step": 1582 }, { "epoch": 0.13, "grad_norm": 3.749714291511869, "learning_rate": 9.74758287746395e-06, "loss": 0.7365, "step": 1583 }, { "epoch": 0.13, "grad_norm": 4.1602821158679495, "learning_rate": 9.747170064298684e-06, "loss": 0.7499, "step": 1584 }, { "epoch": 0.13, "grad_norm": 4.312976124125651, "learning_rate": 9.74675692259936e-06, "loss": 0.8296, "step": 1585 }, { "epoch": 0.13, "grad_norm": 3.583681748239942, "learning_rate": 9.746343452394569e-06, "loss": 0.6602, "step": 1586 }, { "epoch": 0.13, "grad_norm": 4.890247791264872, "learning_rate": 9.745929653712924e-06, "loss": 0.8061, "step": 1587 }, { "epoch": 0.13, "grad_norm": 3.0714448128221137, "learning_rate": 9.745515526583066e-06, "loss": 0.7941, "step": 1588 }, { "epoch": 0.13, "grad_norm": 4.939323175831899, "learning_rate": 9.745101071033652e-06, "loss": 0.7877, "step": 1589 }, { "epoch": 0.13, "grad_norm": 3.3550098838732514, "learning_rate": 9.744686287093368e-06, "loss": 0.6597, "step": 1590 }, { "epoch": 0.13, "grad_norm": 2.472382571921731, "learning_rate": 9.744271174790915e-06, "loss": 0.8399, "step": 1591 }, { "epoch": 0.13, "grad_norm": 3.1502334028698082, "learning_rate": 9.743855734155028e-06, "loss": 0.6903, "step": 1592 }, { "epoch": 0.13, "grad_norm": 3.623558369747105, "learning_rate": 9.743439965214452e-06, "loss": 0.7642, "step": 1593 }, { "epoch": 0.13, "grad_norm": 5.995890866801721, "learning_rate": 9.743023867997964e-06, "loss": 0.7963, "step": 1594 }, { "epoch": 0.13, "grad_norm": 6.017176668193397, "learning_rate": 9.74260744253436e-06, "loss": 0.6918, "step": 1595 }, { "epoch": 0.13, "grad_norm": 3.0397103388014988, "learning_rate": 9.742190688852457e-06, "loss": 0.617, "step": 1596 }, { "epoch": 0.13, "grad_norm": 2.8398968635548134, "learning_rate": 9.741773606981101e-06, "loss": 0.6697, "step": 1597 }, { "epoch": 0.13, "grad_norm": 3.8420418288266456, "learning_rate": 9.741356196949154e-06, "loss": 0.6895, "step": 1598 }, { "epoch": 0.13, "grad_norm": 16.80330079068573, "learning_rate": 9.740938458785505e-06, "loss": 0.777, "step": 1599 }, { "epoch": 0.13, "grad_norm": 3.4688192895732644, "learning_rate": 9.740520392519063e-06, "loss": 0.8204, "step": 1600 }, { "epoch": 0.13, "grad_norm": 4.316045543354153, "learning_rate": 9.74010199817876e-06, "loss": 0.7809, "step": 1601 }, { "epoch": 0.13, "grad_norm": 4.262150862751194, "learning_rate": 9.739683275793554e-06, "loss": 0.7865, "step": 1602 }, { "epoch": 0.13, "grad_norm": 7.976718925923404, "learning_rate": 9.739264225392421e-06, "loss": 0.7256, "step": 1603 }, { "epoch": 0.13, "grad_norm": 2.6253850331117072, "learning_rate": 9.738844847004363e-06, "loss": 0.7406, "step": 1604 }, { "epoch": 0.13, "grad_norm": 4.931278221817675, "learning_rate": 9.738425140658403e-06, "loss": 0.8762, "step": 1605 }, { "epoch": 0.13, "grad_norm": 3.4771144454922815, "learning_rate": 9.738005106383588e-06, "loss": 0.7741, "step": 1606 }, { "epoch": 0.13, "grad_norm": 3.3991810278507995, "learning_rate": 9.737584744208986e-06, "loss": 0.7351, "step": 1607 }, { "epoch": 0.13, "grad_norm": 7.215732942096885, "learning_rate": 9.73716405416369e-06, "loss": 0.6021, "step": 1608 }, { "epoch": 0.13, "grad_norm": 6.971042511230237, "learning_rate": 9.736743036276814e-06, "loss": 0.7296, "step": 1609 }, { "epoch": 0.13, "grad_norm": 4.516028298366066, "learning_rate": 9.736321690577494e-06, "loss": 0.7452, "step": 1610 }, { "epoch": 0.13, "grad_norm": 3.4586796419005155, "learning_rate": 9.735900017094893e-06, "loss": 0.6768, "step": 1611 }, { "epoch": 0.13, "grad_norm": 6.562280809624755, "learning_rate": 9.735478015858188e-06, "loss": 0.6827, "step": 1612 }, { "epoch": 0.13, "grad_norm": 5.152785425889034, "learning_rate": 9.73505568689659e-06, "loss": 0.7027, "step": 1613 }, { "epoch": 0.13, "grad_norm": 3.7540001728442887, "learning_rate": 9.734633030239322e-06, "loss": 0.825, "step": 1614 }, { "epoch": 0.13, "grad_norm": 5.7794735133142785, "learning_rate": 9.734210045915638e-06, "loss": 0.7935, "step": 1615 }, { "epoch": 0.13, "grad_norm": 22.75812624211879, "learning_rate": 9.73378673395481e-06, "loss": 0.7182, "step": 1616 }, { "epoch": 0.13, "grad_norm": 3.81560985174846, "learning_rate": 9.733363094386133e-06, "loss": 0.783, "step": 1617 }, { "epoch": 0.13, "grad_norm": 4.258476658082455, "learning_rate": 9.732939127238926e-06, "loss": 0.6002, "step": 1618 }, { "epoch": 0.13, "grad_norm": 3.8999253106124563, "learning_rate": 9.73251483254253e-06, "loss": 0.6897, "step": 1619 }, { "epoch": 0.13, "grad_norm": 4.912416800591878, "learning_rate": 9.732090210326308e-06, "loss": 0.8058, "step": 1620 }, { "epoch": 0.13, "grad_norm": 8.827230766672944, "learning_rate": 9.731665260619649e-06, "loss": 0.6443, "step": 1621 }, { "epoch": 0.13, "grad_norm": 4.83679943374413, "learning_rate": 9.731239983451962e-06, "loss": 0.8323, "step": 1622 }, { "epoch": 0.13, "grad_norm": 3.473150037431046, "learning_rate": 9.730814378852677e-06, "loss": 0.5805, "step": 1623 }, { "epoch": 0.13, "grad_norm": 6.158690394884845, "learning_rate": 9.730388446851248e-06, "loss": 0.7607, "step": 1624 }, { "epoch": 0.13, "grad_norm": 7.0326071661230625, "learning_rate": 9.729962187477156e-06, "loss": 0.7769, "step": 1625 }, { "epoch": 0.13, "grad_norm": 3.5495280896586814, "learning_rate": 9.729535600759898e-06, "loss": 0.61, "step": 1626 }, { "epoch": 0.13, "grad_norm": 3.838332207588205, "learning_rate": 9.729108686728996e-06, "loss": 0.7304, "step": 1627 }, { "epoch": 0.13, "grad_norm": 4.160931677255557, "learning_rate": 9.728681445413995e-06, "loss": 0.635, "step": 1628 }, { "epoch": 0.13, "grad_norm": 6.7148478403544996, "learning_rate": 9.728253876844464e-06, "loss": 0.7475, "step": 1629 }, { "epoch": 0.13, "grad_norm": 2.9286470509589106, "learning_rate": 9.727825981049994e-06, "loss": 0.6701, "step": 1630 }, { "epoch": 0.13, "grad_norm": 3.476303799251314, "learning_rate": 9.727397758060198e-06, "loss": 0.6478, "step": 1631 }, { "epoch": 0.13, "grad_norm": 3.4558827574908944, "learning_rate": 9.72696920790471e-06, "loss": 0.6674, "step": 1632 }, { "epoch": 0.13, "grad_norm": 3.5000527582279424, "learning_rate": 9.72654033061319e-06, "loss": 0.8207, "step": 1633 }, { "epoch": 0.13, "grad_norm": 3.806506387468297, "learning_rate": 9.726111126215316e-06, "loss": 0.6835, "step": 1634 }, { "epoch": 0.13, "grad_norm": 4.105878418541739, "learning_rate": 9.725681594740796e-06, "loss": 0.8573, "step": 1635 }, { "epoch": 0.13, "grad_norm": 3.694683178377096, "learning_rate": 9.725251736219355e-06, "loss": 0.7239, "step": 1636 }, { "epoch": 0.13, "grad_norm": 3.509286681837293, "learning_rate": 9.72482155068074e-06, "loss": 0.8, "step": 1637 }, { "epoch": 0.13, "grad_norm": 5.755639336900618, "learning_rate": 9.724391038154723e-06, "loss": 0.8501, "step": 1638 }, { "epoch": 0.13, "grad_norm": 3.6837007945942197, "learning_rate": 9.723960198671101e-06, "loss": 0.7626, "step": 1639 }, { "epoch": 0.13, "grad_norm": 5.164929000928166, "learning_rate": 9.723529032259689e-06, "loss": 0.7144, "step": 1640 }, { "epoch": 0.13, "grad_norm": 3.3622909819007583, "learning_rate": 9.723097538950324e-06, "loss": 0.6487, "step": 1641 }, { "epoch": 0.13, "grad_norm": 3.775566972616635, "learning_rate": 9.72266571877287e-06, "loss": 0.7558, "step": 1642 }, { "epoch": 0.13, "grad_norm": 4.464648193463835, "learning_rate": 9.722233571757214e-06, "loss": 0.8902, "step": 1643 }, { "epoch": 0.13, "grad_norm": 4.249818869896293, "learning_rate": 9.72180109793326e-06, "loss": 0.7797, "step": 1644 }, { "epoch": 0.13, "grad_norm": 3.649693492349038, "learning_rate": 9.72136829733094e-06, "loss": 0.7602, "step": 1645 }, { "epoch": 0.13, "grad_norm": 3.128506318982704, "learning_rate": 9.720935169980205e-06, "loss": 0.6496, "step": 1646 }, { "epoch": 0.13, "grad_norm": 4.364456287479509, "learning_rate": 9.72050171591103e-06, "loss": 0.7596, "step": 1647 }, { "epoch": 0.13, "grad_norm": 4.707521581585037, "learning_rate": 9.720067935153415e-06, "loss": 0.7027, "step": 1648 }, { "epoch": 0.13, "grad_norm": 3.7451575476002277, "learning_rate": 9.719633827737379e-06, "loss": 0.6735, "step": 1649 }, { "epoch": 0.13, "grad_norm": 3.4153221993125813, "learning_rate": 9.719199393692963e-06, "loss": 0.8063, "step": 1650 }, { "epoch": 0.13, "grad_norm": 9.499821851026198, "learning_rate": 9.718764633050235e-06, "loss": 0.747, "step": 1651 }, { "epoch": 0.13, "grad_norm": 3.684862169262496, "learning_rate": 9.718329545839282e-06, "loss": 0.7243, "step": 1652 }, { "epoch": 0.13, "grad_norm": 3.9077420046052005, "learning_rate": 9.717894132090218e-06, "loss": 0.7649, "step": 1653 }, { "epoch": 0.13, "grad_norm": 4.438028296673117, "learning_rate": 9.71745839183317e-06, "loss": 0.8714, "step": 1654 }, { "epoch": 0.13, "grad_norm": 2.6841104629812276, "learning_rate": 9.717022325098301e-06, "loss": 0.6432, "step": 1655 }, { "epoch": 0.13, "grad_norm": 5.059769876649569, "learning_rate": 9.716585931915786e-06, "loss": 0.521, "step": 1656 }, { "epoch": 0.13, "grad_norm": 9.756685481067823, "learning_rate": 9.716149212315824e-06, "loss": 0.7772, "step": 1657 }, { "epoch": 0.13, "grad_norm": 3.1213390266795322, "learning_rate": 9.715712166328643e-06, "loss": 0.6246, "step": 1658 }, { "epoch": 0.13, "grad_norm": 3.1112938180535887, "learning_rate": 9.715274793984489e-06, "loss": 0.8226, "step": 1659 }, { "epoch": 0.13, "grad_norm": 3.9629629456608013, "learning_rate": 9.714837095313626e-06, "loss": 0.8292, "step": 1660 }, { "epoch": 0.13, "grad_norm": 5.082119336013578, "learning_rate": 9.71439907034635e-06, "loss": 0.651, "step": 1661 }, { "epoch": 0.13, "grad_norm": 3.559760814378013, "learning_rate": 9.713960719112976e-06, "loss": 0.8007, "step": 1662 }, { "epoch": 0.14, "grad_norm": 3.456020348047644, "learning_rate": 9.713522041643837e-06, "loss": 0.6624, "step": 1663 }, { "epoch": 0.14, "grad_norm": 6.340988530016686, "learning_rate": 9.713083037969292e-06, "loss": 0.7331, "step": 1664 }, { "epoch": 0.14, "grad_norm": 3.1034382008641166, "learning_rate": 9.712643708119729e-06, "loss": 0.7857, "step": 1665 }, { "epoch": 0.14, "grad_norm": 4.8534432361182, "learning_rate": 9.712204052125546e-06, "loss": 0.6796, "step": 1666 }, { "epoch": 0.14, "grad_norm": 4.362221883946943, "learning_rate": 9.711764070017172e-06, "loss": 0.7872, "step": 1667 }, { "epoch": 0.14, "grad_norm": 3.5273449809380115, "learning_rate": 9.711323761825057e-06, "loss": 0.7047, "step": 1668 }, { "epoch": 0.14, "grad_norm": 6.218440607655703, "learning_rate": 9.710883127579673e-06, "loss": 0.7077, "step": 1669 }, { "epoch": 0.14, "grad_norm": 3.809729051839772, "learning_rate": 9.710442167311514e-06, "loss": 0.8732, "step": 1670 }, { "epoch": 0.14, "grad_norm": 5.639587441069127, "learning_rate": 9.710000881051097e-06, "loss": 0.5721, "step": 1671 }, { "epoch": 0.14, "grad_norm": 4.916564432609439, "learning_rate": 9.709559268828963e-06, "loss": 0.5893, "step": 1672 }, { "epoch": 0.14, "grad_norm": 3.517988348365595, "learning_rate": 9.709117330675676e-06, "loss": 0.7944, "step": 1673 }, { "epoch": 0.14, "grad_norm": 4.081701792650104, "learning_rate": 9.708675066621814e-06, "loss": 0.9646, "step": 1674 }, { "epoch": 0.14, "grad_norm": 3.77230917704101, "learning_rate": 9.708232476697992e-06, "loss": 0.5697, "step": 1675 }, { "epoch": 0.14, "grad_norm": 3.1655052789769083, "learning_rate": 9.707789560934837e-06, "loss": 0.6759, "step": 1676 }, { "epoch": 0.14, "grad_norm": 3.469268732521162, "learning_rate": 9.707346319363002e-06, "loss": 0.775, "step": 1677 }, { "epoch": 0.14, "grad_norm": 3.3039809631673167, "learning_rate": 9.706902752013161e-06, "loss": 0.9036, "step": 1678 }, { "epoch": 0.14, "grad_norm": 4.301390790437207, "learning_rate": 9.706458858916013e-06, "loss": 0.7811, "step": 1679 }, { "epoch": 0.14, "grad_norm": 3.684122889489982, "learning_rate": 9.706014640102276e-06, "loss": 0.6429, "step": 1680 }, { "epoch": 0.14, "grad_norm": 4.579633319155013, "learning_rate": 9.705570095602696e-06, "loss": 0.6272, "step": 1681 }, { "epoch": 0.14, "grad_norm": 5.041456818256814, "learning_rate": 9.705125225448036e-06, "loss": 0.8406, "step": 1682 }, { "epoch": 0.14, "grad_norm": 5.3077083679831825, "learning_rate": 9.704680029669085e-06, "loss": 0.7385, "step": 1683 }, { "epoch": 0.14, "grad_norm": 6.416998630785171, "learning_rate": 9.704234508296653e-06, "loss": 0.593, "step": 1684 }, { "epoch": 0.14, "grad_norm": 3.884262700895454, "learning_rate": 9.703788661361573e-06, "loss": 0.7537, "step": 1685 }, { "epoch": 0.14, "grad_norm": 4.7039844730202836, "learning_rate": 9.703342488894699e-06, "loss": 0.7329, "step": 1686 }, { "epoch": 0.14, "grad_norm": 7.075261630793293, "learning_rate": 9.70289599092691e-06, "loss": 0.8081, "step": 1687 }, { "epoch": 0.14, "grad_norm": 3.236227572092828, "learning_rate": 9.702449167489108e-06, "loss": 0.653, "step": 1688 }, { "epoch": 0.14, "grad_norm": 5.77749139079335, "learning_rate": 9.702002018612212e-06, "loss": 0.7132, "step": 1689 }, { "epoch": 0.14, "grad_norm": 3.1191345565918405, "learning_rate": 9.701554544327171e-06, "loss": 0.7941, "step": 1690 }, { "epoch": 0.14, "grad_norm": 3.1856564607312383, "learning_rate": 9.701106744664954e-06, "loss": 0.7089, "step": 1691 }, { "epoch": 0.14, "grad_norm": 3.7536402174148438, "learning_rate": 9.70065861965655e-06, "loss": 0.6437, "step": 1692 }, { "epoch": 0.14, "grad_norm": 6.697915829945942, "learning_rate": 9.700210169332968e-06, "loss": 0.8034, "step": 1693 }, { "epoch": 0.14, "grad_norm": 3.897304716912581, "learning_rate": 9.69976139372525e-06, "loss": 0.8438, "step": 1694 }, { "epoch": 0.14, "grad_norm": 4.858907689189309, "learning_rate": 9.699312292864452e-06, "loss": 0.6158, "step": 1695 }, { "epoch": 0.14, "grad_norm": 3.7408560410532457, "learning_rate": 9.698862866781653e-06, "loss": 0.675, "step": 1696 }, { "epoch": 0.14, "grad_norm": 4.066613783840214, "learning_rate": 9.698413115507956e-06, "loss": 0.5913, "step": 1697 }, { "epoch": 0.14, "grad_norm": 2.943715922588739, "learning_rate": 9.69796303907449e-06, "loss": 0.7509, "step": 1698 }, { "epoch": 0.14, "grad_norm": 3.1752119270299954, "learning_rate": 9.697512637512398e-06, "loss": 0.897, "step": 1699 }, { "epoch": 0.14, "grad_norm": 5.332048346244037, "learning_rate": 9.697061910852857e-06, "loss": 0.7403, "step": 1700 }, { "epoch": 0.14, "grad_norm": 3.000810669673395, "learning_rate": 9.696610859127053e-06, "loss": 0.7969, "step": 1701 }, { "epoch": 0.14, "grad_norm": 4.5628617251272665, "learning_rate": 9.696159482366207e-06, "loss": 0.6227, "step": 1702 }, { "epoch": 0.14, "grad_norm": 5.810673287403003, "learning_rate": 9.695707780601556e-06, "loss": 0.7164, "step": 1703 }, { "epoch": 0.14, "grad_norm": 3.933997524287036, "learning_rate": 9.69525575386436e-06, "loss": 0.683, "step": 1704 }, { "epoch": 0.14, "grad_norm": 3.808249114411638, "learning_rate": 9.694803402185901e-06, "loss": 0.651, "step": 1705 }, { "epoch": 0.14, "grad_norm": 5.2463119126658, "learning_rate": 9.694350725597487e-06, "loss": 0.9314, "step": 1706 }, { "epoch": 0.14, "grad_norm": 3.3827877952299095, "learning_rate": 9.693897724130442e-06, "loss": 0.6986, "step": 1707 }, { "epoch": 0.14, "grad_norm": 5.257882224395762, "learning_rate": 9.693444397816123e-06, "loss": 0.6333, "step": 1708 }, { "epoch": 0.14, "grad_norm": 5.669050899034887, "learning_rate": 9.692990746685897e-06, "loss": 0.8155, "step": 1709 }, { "epoch": 0.14, "grad_norm": 8.978030928106222, "learning_rate": 9.692536770771162e-06, "loss": 0.9794, "step": 1710 }, { "epoch": 0.14, "grad_norm": 5.690780774152081, "learning_rate": 9.692082470103337e-06, "loss": 0.6688, "step": 1711 }, { "epoch": 0.14, "grad_norm": 8.713956379906906, "learning_rate": 9.69162784471386e-06, "loss": 0.6405, "step": 1712 }, { "epoch": 0.14, "grad_norm": 4.661951758377616, "learning_rate": 9.691172894634196e-06, "loss": 0.7027, "step": 1713 }, { "epoch": 0.14, "grad_norm": 5.149150609877356, "learning_rate": 9.690717619895828e-06, "loss": 0.7267, "step": 1714 }, { "epoch": 0.14, "grad_norm": 4.82640177701345, "learning_rate": 9.690262020530266e-06, "loss": 0.7559, "step": 1715 }, { "epoch": 0.14, "grad_norm": 4.24779206602431, "learning_rate": 9.689806096569042e-06, "loss": 0.8767, "step": 1716 }, { "epoch": 0.14, "grad_norm": 3.7460626768945606, "learning_rate": 9.689349848043704e-06, "loss": 0.7105, "step": 1717 }, { "epoch": 0.14, "grad_norm": 2.6161676148212667, "learning_rate": 9.688893274985832e-06, "loss": 0.7263, "step": 1718 }, { "epoch": 0.14, "grad_norm": 5.9366964310925425, "learning_rate": 9.68843637742702e-06, "loss": 0.6256, "step": 1719 }, { "epoch": 0.14, "grad_norm": 6.232294317812228, "learning_rate": 9.68797915539889e-06, "loss": 0.7877, "step": 1720 }, { "epoch": 0.14, "grad_norm": 4.269863659089573, "learning_rate": 9.687521608933086e-06, "loss": 0.7911, "step": 1721 }, { "epoch": 0.14, "grad_norm": 4.586358459197461, "learning_rate": 9.68706373806127e-06, "loss": 0.723, "step": 1722 }, { "epoch": 0.14, "grad_norm": 5.572545518875326, "learning_rate": 9.686605542815132e-06, "loss": 0.854, "step": 1723 }, { "epoch": 0.14, "grad_norm": 7.808445752083356, "learning_rate": 9.686147023226381e-06, "loss": 0.8211, "step": 1724 }, { "epoch": 0.14, "grad_norm": 4.572732887179025, "learning_rate": 9.68568817932675e-06, "loss": 0.6699, "step": 1725 }, { "epoch": 0.14, "grad_norm": 4.528070016874158, "learning_rate": 9.685229011147991e-06, "loss": 0.7123, "step": 1726 }, { "epoch": 0.14, "grad_norm": 4.733358412030933, "learning_rate": 9.684769518721887e-06, "loss": 0.7278, "step": 1727 }, { "epoch": 0.14, "grad_norm": 25.415134789600803, "learning_rate": 9.684309702080234e-06, "loss": 0.8275, "step": 1728 }, { "epoch": 0.14, "grad_norm": 5.826567339095941, "learning_rate": 9.683849561254854e-06, "loss": 0.7114, "step": 1729 }, { "epoch": 0.14, "grad_norm": 4.443699228473538, "learning_rate": 9.683389096277591e-06, "loss": 0.6388, "step": 1730 }, { "epoch": 0.14, "grad_norm": 4.174932894829508, "learning_rate": 9.682928307180317e-06, "loss": 0.6331, "step": 1731 }, { "epoch": 0.14, "grad_norm": 3.975173601550326, "learning_rate": 9.682467193994915e-06, "loss": 0.6824, "step": 1732 }, { "epoch": 0.14, "grad_norm": 3.051988374164017, "learning_rate": 9.682005756753301e-06, "loss": 0.6847, "step": 1733 }, { "epoch": 0.14, "grad_norm": 3.684899367072688, "learning_rate": 9.681543995487407e-06, "loss": 0.6022, "step": 1734 }, { "epoch": 0.14, "grad_norm": 3.868213850010096, "learning_rate": 9.681081910229194e-06, "loss": 0.7808, "step": 1735 }, { "epoch": 0.14, "grad_norm": 3.7400153113373045, "learning_rate": 9.680619501010636e-06, "loss": 0.726, "step": 1736 }, { "epoch": 0.14, "grad_norm": 2.7790315917630988, "learning_rate": 9.680156767863736e-06, "loss": 0.6927, "step": 1737 }, { "epoch": 0.14, "grad_norm": 4.170456886364669, "learning_rate": 9.679693710820521e-06, "loss": 0.8767, "step": 1738 }, { "epoch": 0.14, "grad_norm": 3.669372878457488, "learning_rate": 9.679230329913034e-06, "loss": 0.7986, "step": 1739 }, { "epoch": 0.14, "grad_norm": 4.311506097873196, "learning_rate": 9.678766625173348e-06, "loss": 0.7221, "step": 1740 }, { "epoch": 0.14, "grad_norm": 5.9754152293851135, "learning_rate": 9.678302596633549e-06, "loss": 0.8068, "step": 1741 }, { "epoch": 0.14, "grad_norm": 2.8927795881229543, "learning_rate": 9.677838244325754e-06, "loss": 0.6958, "step": 1742 }, { "epoch": 0.14, "grad_norm": 5.111544802536774, "learning_rate": 9.677373568282098e-06, "loss": 0.7129, "step": 1743 }, { "epoch": 0.14, "grad_norm": 4.69299567871582, "learning_rate": 9.676908568534739e-06, "loss": 0.6893, "step": 1744 }, { "epoch": 0.14, "grad_norm": 4.809374203726032, "learning_rate": 9.67644324511586e-06, "loss": 0.7542, "step": 1745 }, { "epoch": 0.14, "grad_norm": 3.7598658794809148, "learning_rate": 9.675977598057664e-06, "loss": 0.6899, "step": 1746 }, { "epoch": 0.14, "grad_norm": 3.179260241309964, "learning_rate": 9.675511627392375e-06, "loss": 0.6792, "step": 1747 }, { "epoch": 0.14, "grad_norm": 2.740633301983333, "learning_rate": 9.675045333152242e-06, "loss": 0.6809, "step": 1748 }, { "epoch": 0.14, "grad_norm": 3.582531376185183, "learning_rate": 9.674578715369536e-06, "loss": 0.694, "step": 1749 }, { "epoch": 0.14, "grad_norm": 3.599352365182395, "learning_rate": 9.674111774076549e-06, "loss": 0.6444, "step": 1750 }, { "epoch": 0.14, "grad_norm": 3.5431050033351172, "learning_rate": 9.673644509305596e-06, "loss": 0.7184, "step": 1751 }, { "epoch": 0.14, "grad_norm": 3.6986730858156793, "learning_rate": 9.673176921089016e-06, "loss": 0.7416, "step": 1752 }, { "epoch": 0.14, "grad_norm": 21.586212402551016, "learning_rate": 9.672709009459167e-06, "loss": 0.9409, "step": 1753 }, { "epoch": 0.14, "grad_norm": 4.365684397860148, "learning_rate": 9.672240774448434e-06, "loss": 0.8122, "step": 1754 }, { "epoch": 0.14, "grad_norm": 6.169262957732291, "learning_rate": 9.671772216089219e-06, "loss": 0.8823, "step": 1755 }, { "epoch": 0.14, "grad_norm": 4.264796285739442, "learning_rate": 9.671303334413952e-06, "loss": 0.6417, "step": 1756 }, { "epoch": 0.14, "grad_norm": 3.4620593306834664, "learning_rate": 9.670834129455083e-06, "loss": 0.6823, "step": 1757 }, { "epoch": 0.14, "grad_norm": 3.273104747839789, "learning_rate": 9.670364601245078e-06, "loss": 0.7805, "step": 1758 }, { "epoch": 0.14, "grad_norm": 3.3824265010799683, "learning_rate": 9.66989474981644e-06, "loss": 0.8015, "step": 1759 }, { "epoch": 0.14, "grad_norm": 2.929243097614128, "learning_rate": 9.669424575201679e-06, "loss": 0.6834, "step": 1760 }, { "epoch": 0.14, "grad_norm": 3.6156161624234584, "learning_rate": 9.668954077433336e-06, "loss": 0.7689, "step": 1761 }, { "epoch": 0.14, "grad_norm": 5.546147555348058, "learning_rate": 9.668483256543973e-06, "loss": 0.6405, "step": 1762 }, { "epoch": 0.14, "grad_norm": 3.411230876685932, "learning_rate": 9.668012112566175e-06, "loss": 0.7831, "step": 1763 }, { "epoch": 0.14, "grad_norm": 4.956644317993248, "learning_rate": 9.667540645532543e-06, "loss": 0.7594, "step": 1764 }, { "epoch": 0.14, "grad_norm": 8.876866042672079, "learning_rate": 9.667068855475713e-06, "loss": 0.8745, "step": 1765 }, { "epoch": 0.14, "grad_norm": 4.863997392704245, "learning_rate": 9.66659674242833e-06, "loss": 0.7023, "step": 1766 }, { "epoch": 0.14, "grad_norm": 2.640770332580646, "learning_rate": 9.666124306423069e-06, "loss": 0.8052, "step": 1767 }, { "epoch": 0.14, "grad_norm": 4.2726813397915056, "learning_rate": 9.665651547492624e-06, "loss": 0.765, "step": 1768 }, { "epoch": 0.14, "grad_norm": 4.267810710443771, "learning_rate": 9.665178465669717e-06, "loss": 0.759, "step": 1769 }, { "epoch": 0.14, "grad_norm": 2.7138884087296002, "learning_rate": 9.664705060987085e-06, "loss": 0.5243, "step": 1770 }, { "epoch": 0.14, "grad_norm": 5.452283881485013, "learning_rate": 9.664231333477493e-06, "loss": 0.7354, "step": 1771 }, { "epoch": 0.14, "grad_norm": 4.723021780442232, "learning_rate": 9.663757283173722e-06, "loss": 0.6879, "step": 1772 }, { "epoch": 0.14, "grad_norm": 3.456598824191064, "learning_rate": 9.663282910108582e-06, "loss": 0.919, "step": 1773 }, { "epoch": 0.14, "grad_norm": 3.7254655470463534, "learning_rate": 9.662808214314903e-06, "loss": 0.8854, "step": 1774 }, { "epoch": 0.14, "grad_norm": 6.629445656200479, "learning_rate": 9.662333195825534e-06, "loss": 0.8656, "step": 1775 }, { "epoch": 0.14, "grad_norm": 12.562796230998803, "learning_rate": 9.661857854673354e-06, "loss": 0.6733, "step": 1776 }, { "epoch": 0.14, "grad_norm": 11.915009040285069, "learning_rate": 9.661382190891256e-06, "loss": 0.8731, "step": 1777 }, { "epoch": 0.14, "grad_norm": 2.9094443813064395, "learning_rate": 9.66090620451216e-06, "loss": 0.7486, "step": 1778 }, { "epoch": 0.14, "grad_norm": 3.515241499222879, "learning_rate": 9.660429895569008e-06, "loss": 0.7267, "step": 1779 }, { "epoch": 0.14, "grad_norm": 3.858544465573257, "learning_rate": 9.659953264094762e-06, "loss": 0.8004, "step": 1780 }, { "epoch": 0.14, "grad_norm": 4.756422341648063, "learning_rate": 9.659476310122408e-06, "loss": 0.7356, "step": 1781 }, { "epoch": 0.14, "grad_norm": 3.7892863640391106, "learning_rate": 9.658999033684954e-06, "loss": 0.7049, "step": 1782 }, { "epoch": 0.14, "grad_norm": 3.104125614579526, "learning_rate": 9.658521434815434e-06, "loss": 0.8571, "step": 1783 }, { "epoch": 0.14, "grad_norm": 5.3104807382038, "learning_rate": 9.658043513546898e-06, "loss": 0.6898, "step": 1784 }, { "epoch": 0.14, "grad_norm": 2.912492451282596, "learning_rate": 9.657565269912419e-06, "loss": 0.7234, "step": 1785 }, { "epoch": 0.15, "grad_norm": 3.8341955253957387, "learning_rate": 9.657086703945097e-06, "loss": 0.7386, "step": 1786 }, { "epoch": 0.15, "grad_norm": 4.185507625532871, "learning_rate": 9.656607815678053e-06, "loss": 0.8705, "step": 1787 }, { "epoch": 0.15, "grad_norm": 3.89318246332328, "learning_rate": 9.656128605144428e-06, "loss": 0.6939, "step": 1788 }, { "epoch": 0.15, "grad_norm": 3.7465657167093154, "learning_rate": 9.655649072377387e-06, "loss": 0.7771, "step": 1789 }, { "epoch": 0.15, "grad_norm": 3.9439154072796496, "learning_rate": 9.655169217410114e-06, "loss": 0.7217, "step": 1790 }, { "epoch": 0.15, "grad_norm": 3.1702162148882467, "learning_rate": 9.65468904027582e-06, "loss": 0.7735, "step": 1791 }, { "epoch": 0.15, "grad_norm": 4.1311088520496, "learning_rate": 9.654208541007736e-06, "loss": 0.7909, "step": 1792 }, { "epoch": 0.15, "grad_norm": 5.868429103302607, "learning_rate": 9.653727719639117e-06, "loss": 0.6055, "step": 1793 }, { "epoch": 0.15, "grad_norm": 3.0001341093158387, "learning_rate": 9.653246576203236e-06, "loss": 0.7969, "step": 1794 }, { "epoch": 0.15, "grad_norm": 3.745962226406492, "learning_rate": 9.652765110733392e-06, "loss": 0.6836, "step": 1795 }, { "epoch": 0.15, "grad_norm": 4.870664572201797, "learning_rate": 9.652283323262907e-06, "loss": 0.8017, "step": 1796 }, { "epoch": 0.15, "grad_norm": 10.765902878112202, "learning_rate": 9.651801213825125e-06, "loss": 0.7454, "step": 1797 }, { "epoch": 0.15, "grad_norm": 5.38238967901382, "learning_rate": 9.651318782453407e-06, "loss": 0.6853, "step": 1798 }, { "epoch": 0.15, "grad_norm": 7.015738270677254, "learning_rate": 9.650836029181142e-06, "loss": 0.6958, "step": 1799 }, { "epoch": 0.15, "grad_norm": 4.6035277786668996, "learning_rate": 9.65035295404174e-06, "loss": 0.6783, "step": 1800 }, { "epoch": 0.15, "grad_norm": 6.62280486904139, "learning_rate": 9.649869557068632e-06, "loss": 0.7725, "step": 1801 }, { "epoch": 0.15, "grad_norm": 4.724076696970895, "learning_rate": 9.649385838295274e-06, "loss": 0.7587, "step": 1802 }, { "epoch": 0.15, "grad_norm": 34.01121497096036, "learning_rate": 9.64890179775514e-06, "loss": 0.7934, "step": 1803 }, { "epoch": 0.15, "grad_norm": 4.039469766190597, "learning_rate": 9.648417435481728e-06, "loss": 0.8182, "step": 1804 }, { "epoch": 0.15, "grad_norm": 16.172027999273965, "learning_rate": 9.647932751508561e-06, "loss": 0.5744, "step": 1805 }, { "epoch": 0.15, "grad_norm": 2.6978817473841286, "learning_rate": 9.647447745869185e-06, "loss": 0.6485, "step": 1806 }, { "epoch": 0.15, "grad_norm": 4.404682338788708, "learning_rate": 9.64696241859716e-06, "loss": 0.704, "step": 1807 }, { "epoch": 0.15, "grad_norm": 5.531883256365931, "learning_rate": 9.646476769726076e-06, "loss": 0.6829, "step": 1808 }, { "epoch": 0.15, "grad_norm": 3.0739698498616104, "learning_rate": 9.645990799289544e-06, "loss": 0.7043, "step": 1809 }, { "epoch": 0.15, "grad_norm": 10.458369186395796, "learning_rate": 9.645504507321192e-06, "loss": 0.6906, "step": 1810 }, { "epoch": 0.15, "grad_norm": 4.264114854895885, "learning_rate": 9.645017893854682e-06, "loss": 0.7697, "step": 1811 }, { "epoch": 0.15, "grad_norm": 4.64115015554052, "learning_rate": 9.644530958923683e-06, "loss": 0.7407, "step": 1812 }, { "epoch": 0.15, "grad_norm": 6.57708205817582, "learning_rate": 9.644043702561899e-06, "loss": 0.5949, "step": 1813 }, { "epoch": 0.15, "grad_norm": 2.974082388564952, "learning_rate": 9.643556124803049e-06, "loss": 0.8784, "step": 1814 }, { "epoch": 0.15, "grad_norm": 2.6420921537346542, "learning_rate": 9.643068225680877e-06, "loss": 0.8026, "step": 1815 }, { "epoch": 0.15, "grad_norm": 3.35434422701114, "learning_rate": 9.642580005229148e-06, "loss": 0.7062, "step": 1816 }, { "epoch": 0.15, "grad_norm": 10.526516538261736, "learning_rate": 9.64209146348165e-06, "loss": 0.8309, "step": 1817 }, { "epoch": 0.15, "grad_norm": 3.2630227711576154, "learning_rate": 9.641602600472195e-06, "loss": 0.7267, "step": 1818 }, { "epoch": 0.15, "grad_norm": 3.158520496304099, "learning_rate": 9.641113416234615e-06, "loss": 0.7555, "step": 1819 }, { "epoch": 0.15, "grad_norm": 6.2817029368610475, "learning_rate": 9.640623910802763e-06, "loss": 0.7808, "step": 1820 }, { "epoch": 0.15, "grad_norm": 5.778041260608554, "learning_rate": 9.640134084210515e-06, "loss": 0.8656, "step": 1821 }, { "epoch": 0.15, "grad_norm": 3.338031539263942, "learning_rate": 9.639643936491772e-06, "loss": 0.6372, "step": 1822 }, { "epoch": 0.15, "grad_norm": 3.0522752405586506, "learning_rate": 9.639153467680455e-06, "loss": 0.8437, "step": 1823 }, { "epoch": 0.15, "grad_norm": 3.6172029956296217, "learning_rate": 9.638662677810509e-06, "loss": 0.8244, "step": 1824 }, { "epoch": 0.15, "grad_norm": 3.234354990918008, "learning_rate": 9.638171566915897e-06, "loss": 0.7631, "step": 1825 }, { "epoch": 0.15, "grad_norm": 4.248374596154979, "learning_rate": 9.637680135030609e-06, "loss": 0.6777, "step": 1826 }, { "epoch": 0.15, "grad_norm": 3.889120425005195, "learning_rate": 9.637188382188654e-06, "loss": 0.7615, "step": 1827 }, { "epoch": 0.15, "grad_norm": 3.7514652989888835, "learning_rate": 9.636696308424066e-06, "loss": 0.7635, "step": 1828 }, { "epoch": 0.15, "grad_norm": 4.61654240479573, "learning_rate": 9.636203913770896e-06, "loss": 0.7345, "step": 1829 }, { "epoch": 0.15, "grad_norm": 18.458235311642586, "learning_rate": 9.635711198263225e-06, "loss": 0.7597, "step": 1830 }, { "epoch": 0.15, "grad_norm": 4.1459296580252545, "learning_rate": 9.63521816193515e-06, "loss": 0.8751, "step": 1831 }, { "epoch": 0.15, "grad_norm": 23.770557693442274, "learning_rate": 9.634724804820793e-06, "loss": 0.7278, "step": 1832 }, { "epoch": 0.15, "grad_norm": 3.1179063965618545, "learning_rate": 9.634231126954296e-06, "loss": 0.7737, "step": 1833 }, { "epoch": 0.15, "grad_norm": 2.80327418176487, "learning_rate": 9.633737128369824e-06, "loss": 0.5436, "step": 1834 }, { "epoch": 0.15, "grad_norm": 4.212624824775542, "learning_rate": 9.633242809101568e-06, "loss": 0.8598, "step": 1835 }, { "epoch": 0.15, "grad_norm": 17.212815482701565, "learning_rate": 9.632748169183737e-06, "loss": 0.7054, "step": 1836 }, { "epoch": 0.15, "grad_norm": 3.7657281770707995, "learning_rate": 9.632253208650562e-06, "loss": 0.8222, "step": 1837 }, { "epoch": 0.15, "grad_norm": 3.1747568899720426, "learning_rate": 9.631757927536297e-06, "loss": 0.7882, "step": 1838 }, { "epoch": 0.15, "grad_norm": 3.6537778128673626, "learning_rate": 9.63126232587522e-06, "loss": 0.84, "step": 1839 }, { "epoch": 0.15, "grad_norm": 3.8424096742359164, "learning_rate": 9.63076640370163e-06, "loss": 0.7681, "step": 1840 }, { "epoch": 0.15, "grad_norm": 3.5514062459548876, "learning_rate": 9.630270161049847e-06, "loss": 0.6818, "step": 1841 }, { "epoch": 0.15, "grad_norm": 2.9608597355195188, "learning_rate": 9.629773597954213e-06, "loss": 0.7155, "step": 1842 }, { "epoch": 0.15, "grad_norm": 9.645003243504389, "learning_rate": 9.629276714449095e-06, "loss": 0.6655, "step": 1843 }, { "epoch": 0.15, "grad_norm": 3.5498534043129286, "learning_rate": 9.62877951056888e-06, "loss": 0.7792, "step": 1844 }, { "epoch": 0.15, "grad_norm": 3.229474133748755, "learning_rate": 9.628281986347978e-06, "loss": 0.9283, "step": 1845 }, { "epoch": 0.15, "grad_norm": 4.264767916182582, "learning_rate": 9.62778414182082e-06, "loss": 0.687, "step": 1846 }, { "epoch": 0.15, "grad_norm": 2.8152615536340764, "learning_rate": 9.627285977021861e-06, "loss": 0.605, "step": 1847 }, { "epoch": 0.15, "grad_norm": 85.75608395244225, "learning_rate": 9.626787491985576e-06, "loss": 0.7308, "step": 1848 }, { "epoch": 0.15, "grad_norm": 4.733506267481033, "learning_rate": 9.626288686746465e-06, "loss": 0.8634, "step": 1849 }, { "epoch": 0.15, "grad_norm": 9.764897727206725, "learning_rate": 9.625789561339046e-06, "loss": 0.7068, "step": 1850 }, { "epoch": 0.15, "grad_norm": 3.8616128670841383, "learning_rate": 9.625290115797864e-06, "loss": 0.8541, "step": 1851 }, { "epoch": 0.15, "grad_norm": 2.961195298602995, "learning_rate": 9.624790350157482e-06, "loss": 0.6664, "step": 1852 }, { "epoch": 0.15, "grad_norm": 5.553594883581051, "learning_rate": 9.624290264452488e-06, "loss": 0.577, "step": 1853 }, { "epoch": 0.15, "grad_norm": 6.810953982965657, "learning_rate": 9.623789858717491e-06, "loss": 0.7871, "step": 1854 }, { "epoch": 0.15, "grad_norm": 3.525060023018861, "learning_rate": 9.623289132987122e-06, "loss": 0.6726, "step": 1855 }, { "epoch": 0.15, "grad_norm": 4.175663554760576, "learning_rate": 9.622788087296033e-06, "loss": 0.7703, "step": 1856 }, { "epoch": 0.15, "grad_norm": 3.9650322979692305, "learning_rate": 9.622286721678903e-06, "loss": 0.7014, "step": 1857 }, { "epoch": 0.15, "grad_norm": 5.314001414751438, "learning_rate": 9.621785036170425e-06, "loss": 0.7249, "step": 1858 }, { "epoch": 0.15, "grad_norm": 3.6232587886025094, "learning_rate": 9.621283030805324e-06, "loss": 0.8366, "step": 1859 }, { "epoch": 0.15, "grad_norm": 5.4504591504261155, "learning_rate": 9.620780705618338e-06, "loss": 0.6685, "step": 1860 }, { "epoch": 0.15, "grad_norm": 6.171991140368373, "learning_rate": 9.620278060644232e-06, "loss": 0.7909, "step": 1861 }, { "epoch": 0.15, "grad_norm": 4.3592939498546786, "learning_rate": 9.619775095917793e-06, "loss": 0.8664, "step": 1862 }, { "epoch": 0.15, "grad_norm": 3.38970622071405, "learning_rate": 9.61927181147383e-06, "loss": 0.6201, "step": 1863 }, { "epoch": 0.15, "grad_norm": 4.95411428406291, "learning_rate": 9.618768207347171e-06, "loss": 0.823, "step": 1864 }, { "epoch": 0.15, "grad_norm": 4.524357263767203, "learning_rate": 9.61826428357267e-06, "loss": 0.6175, "step": 1865 }, { "epoch": 0.15, "grad_norm": 5.098024400666344, "learning_rate": 9.617760040185202e-06, "loss": 0.6533, "step": 1866 }, { "epoch": 0.15, "grad_norm": 3.7909623854257997, "learning_rate": 9.617255477219662e-06, "loss": 0.7238, "step": 1867 }, { "epoch": 0.15, "grad_norm": 3.838778584529365, "learning_rate": 9.616750594710972e-06, "loss": 0.7322, "step": 1868 }, { "epoch": 0.15, "grad_norm": 3.784931088387499, "learning_rate": 9.61624539269407e-06, "loss": 0.7113, "step": 1869 }, { "epoch": 0.15, "grad_norm": 3.5834781467863133, "learning_rate": 9.615739871203922e-06, "loss": 0.7007, "step": 1870 }, { "epoch": 0.15, "grad_norm": 4.983176662639147, "learning_rate": 9.615234030275511e-06, "loss": 0.8822, "step": 1871 }, { "epoch": 0.15, "grad_norm": 4.8234030026158985, "learning_rate": 9.614727869943845e-06, "loss": 0.6518, "step": 1872 }, { "epoch": 0.15, "grad_norm": 4.107915054639396, "learning_rate": 9.614221390243955e-06, "loss": 0.7061, "step": 1873 }, { "epoch": 0.15, "grad_norm": 3.5722162038368026, "learning_rate": 9.61371459121089e-06, "loss": 0.8054, "step": 1874 }, { "epoch": 0.15, "grad_norm": 5.180211867570742, "learning_rate": 9.613207472879725e-06, "loss": 0.7597, "step": 1875 }, { "epoch": 0.15, "grad_norm": 4.895786217891239, "learning_rate": 9.612700035285557e-06, "loss": 0.7773, "step": 1876 }, { "epoch": 0.15, "grad_norm": 10.543365189270618, "learning_rate": 9.612192278463502e-06, "loss": 0.7339, "step": 1877 }, { "epoch": 0.15, "grad_norm": 2.9303148430854273, "learning_rate": 9.611684202448699e-06, "loss": 0.7264, "step": 1878 }, { "epoch": 0.15, "grad_norm": 3.2328542319356472, "learning_rate": 9.611175807276311e-06, "loss": 0.8334, "step": 1879 }, { "epoch": 0.15, "grad_norm": 3.350563349574019, "learning_rate": 9.610667092981526e-06, "loss": 0.6904, "step": 1880 }, { "epoch": 0.15, "grad_norm": 2.6365398544093166, "learning_rate": 9.610158059599546e-06, "loss": 0.5767, "step": 1881 }, { "epoch": 0.15, "grad_norm": 2.7046603502510562, "learning_rate": 9.6096487071656e-06, "loss": 0.7838, "step": 1882 }, { "epoch": 0.15, "grad_norm": 2.9165358776349994, "learning_rate": 9.609139035714938e-06, "loss": 0.812, "step": 1883 }, { "epoch": 0.15, "grad_norm": 3.4585004951500746, "learning_rate": 9.608629045282833e-06, "loss": 0.8616, "step": 1884 }, { "epoch": 0.15, "grad_norm": 3.1190589043565997, "learning_rate": 9.60811873590458e-06, "loss": 0.6763, "step": 1885 }, { "epoch": 0.15, "grad_norm": 3.7294620548658166, "learning_rate": 9.607608107615496e-06, "loss": 0.8731, "step": 1886 }, { "epoch": 0.15, "grad_norm": 4.328997582138383, "learning_rate": 9.60709716045092e-06, "loss": 0.7513, "step": 1887 }, { "epoch": 0.15, "grad_norm": 2.7809222946994168, "learning_rate": 9.60658589444621e-06, "loss": 0.8948, "step": 1888 }, { "epoch": 0.15, "grad_norm": 3.7609806153607837, "learning_rate": 9.606074309636751e-06, "loss": 0.7104, "step": 1889 }, { "epoch": 0.15, "grad_norm": 5.442058931950496, "learning_rate": 9.605562406057948e-06, "loss": 0.72, "step": 1890 }, { "epoch": 0.15, "grad_norm": 4.628884486758903, "learning_rate": 9.605050183745228e-06, "loss": 0.6918, "step": 1891 }, { "epoch": 0.15, "grad_norm": 15.338926483134891, "learning_rate": 9.604537642734039e-06, "loss": 0.685, "step": 1892 }, { "epoch": 0.15, "grad_norm": 4.331612460959655, "learning_rate": 9.604024783059851e-06, "loss": 0.8155, "step": 1893 }, { "epoch": 0.15, "grad_norm": 3.858972245844192, "learning_rate": 9.60351160475816e-06, "loss": 0.6348, "step": 1894 }, { "epoch": 0.15, "grad_norm": 4.1501786353724395, "learning_rate": 9.602998107864481e-06, "loss": 0.7534, "step": 1895 }, { "epoch": 0.15, "grad_norm": 6.364780975234508, "learning_rate": 9.602484292414348e-06, "loss": 0.8168, "step": 1896 }, { "epoch": 0.15, "grad_norm": 3.6894908704816025, "learning_rate": 9.601970158443324e-06, "loss": 0.7513, "step": 1897 }, { "epoch": 0.15, "grad_norm": 3.4982378535850573, "learning_rate": 9.601455705986989e-06, "loss": 0.5723, "step": 1898 }, { "epoch": 0.15, "grad_norm": 4.570514638212711, "learning_rate": 9.600940935080944e-06, "loss": 0.6902, "step": 1899 }, { "epoch": 0.15, "grad_norm": 2.828916395650367, "learning_rate": 9.600425845760816e-06, "loss": 0.7753, "step": 1900 }, { "epoch": 0.15, "grad_norm": 7.067852792139686, "learning_rate": 9.599910438062255e-06, "loss": 0.755, "step": 1901 }, { "epoch": 0.15, "grad_norm": 3.649984593118544, "learning_rate": 9.599394712020927e-06, "loss": 0.797, "step": 1902 }, { "epoch": 0.15, "grad_norm": 3.652218346686841, "learning_rate": 9.598878667672525e-06, "loss": 0.8341, "step": 1903 }, { "epoch": 0.15, "grad_norm": 3.169274276842476, "learning_rate": 9.598362305052764e-06, "loss": 0.7978, "step": 1904 }, { "epoch": 0.15, "grad_norm": 3.3453049081747848, "learning_rate": 9.597845624197376e-06, "loss": 0.8288, "step": 1905 }, { "epoch": 0.15, "grad_norm": 5.293662057668835, "learning_rate": 9.59732862514212e-06, "loss": 0.6776, "step": 1906 }, { "epoch": 0.15, "grad_norm": 3.7790879214603135, "learning_rate": 9.596811307922776e-06, "loss": 0.5941, "step": 1907 }, { "epoch": 0.15, "grad_norm": 3.983357639424124, "learning_rate": 9.596293672575147e-06, "loss": 0.8004, "step": 1908 }, { "epoch": 0.16, "grad_norm": 3.7850724701619787, "learning_rate": 9.595775719135054e-06, "loss": 0.6015, "step": 1909 }, { "epoch": 0.16, "grad_norm": 6.209393942261223, "learning_rate": 9.595257447638344e-06, "loss": 0.8922, "step": 1910 }, { "epoch": 0.16, "grad_norm": 4.840525113305607, "learning_rate": 9.594738858120885e-06, "loss": 0.6557, "step": 1911 }, { "epoch": 0.16, "grad_norm": 5.780534332511471, "learning_rate": 9.594219950618565e-06, "loss": 0.8158, "step": 1912 }, { "epoch": 0.16, "grad_norm": 3.564062676339331, "learning_rate": 9.593700725167298e-06, "loss": 0.7096, "step": 1913 }, { "epoch": 0.16, "grad_norm": 4.866051578170717, "learning_rate": 9.593181181803014e-06, "loss": 0.6888, "step": 1914 }, { "epoch": 0.16, "grad_norm": 2.6515255514994003, "learning_rate": 9.592661320561676e-06, "loss": 0.832, "step": 1915 }, { "epoch": 0.16, "grad_norm": 2.7834864921790334, "learning_rate": 9.592141141479254e-06, "loss": 0.7611, "step": 1916 }, { "epoch": 0.16, "grad_norm": 4.9199684368681105, "learning_rate": 9.59162064459175e-06, "loss": 0.7701, "step": 1917 }, { "epoch": 0.16, "grad_norm": 5.432288169454921, "learning_rate": 9.591099829935187e-06, "loss": 0.6937, "step": 1918 }, { "epoch": 0.16, "grad_norm": 3.6986372082979204, "learning_rate": 9.590578697545607e-06, "loss": 0.8122, "step": 1919 }, { "epoch": 0.16, "grad_norm": 4.075509683323473, "learning_rate": 9.590057247459077e-06, "loss": 0.7345, "step": 1920 }, { "epoch": 0.16, "grad_norm": 9.368961983218787, "learning_rate": 9.589535479711685e-06, "loss": 0.8236, "step": 1921 }, { "epoch": 0.16, "grad_norm": 4.275887276675151, "learning_rate": 9.589013394339537e-06, "loss": 0.6647, "step": 1922 }, { "epoch": 0.16, "grad_norm": 23.46387668240611, "learning_rate": 9.58849099137877e-06, "loss": 0.6335, "step": 1923 }, { "epoch": 0.16, "grad_norm": 11.820622139434557, "learning_rate": 9.587968270865534e-06, "loss": 0.5967, "step": 1924 }, { "epoch": 0.16, "grad_norm": 3.0046436101014744, "learning_rate": 9.587445232836005e-06, "loss": 0.731, "step": 1925 }, { "epoch": 0.16, "grad_norm": 7.857594921186901, "learning_rate": 9.586921877326381e-06, "loss": 0.7659, "step": 1926 }, { "epoch": 0.16, "grad_norm": 5.955937226745144, "learning_rate": 9.586398204372882e-06, "loss": 0.7501, "step": 1927 }, { "epoch": 0.16, "grad_norm": 3.871114498134791, "learning_rate": 9.585874214011749e-06, "loss": 0.7319, "step": 1928 }, { "epoch": 0.16, "grad_norm": 6.576319978287988, "learning_rate": 9.585349906279245e-06, "loss": 0.7733, "step": 1929 }, { "epoch": 0.16, "grad_norm": 5.532723547252205, "learning_rate": 9.584825281211656e-06, "loss": 0.8911, "step": 1930 }, { "epoch": 0.16, "grad_norm": 4.86223905744695, "learning_rate": 9.584300338845289e-06, "loss": 0.6837, "step": 1931 }, { "epoch": 0.16, "grad_norm": 4.322644156473204, "learning_rate": 9.583775079216472e-06, "loss": 0.9413, "step": 1932 }, { "epoch": 0.16, "grad_norm": 5.722805925680523, "learning_rate": 9.58324950236156e-06, "loss": 0.7049, "step": 1933 }, { "epoch": 0.16, "grad_norm": 4.879090590846604, "learning_rate": 9.582723608316921e-06, "loss": 0.7505, "step": 1934 }, { "epoch": 0.16, "grad_norm": 5.550854453836339, "learning_rate": 9.582197397118956e-06, "loss": 0.8024, "step": 1935 }, { "epoch": 0.16, "grad_norm": 7.736103598975232, "learning_rate": 9.581670868804079e-06, "loss": 0.7483, "step": 1936 }, { "epoch": 0.16, "grad_norm": 5.767619235708984, "learning_rate": 9.581144023408729e-06, "loss": 0.7122, "step": 1937 }, { "epoch": 0.16, "grad_norm": 3.1983253808885648, "learning_rate": 9.580616860969365e-06, "loss": 0.7379, "step": 1938 }, { "epoch": 0.16, "grad_norm": 23.541184725652943, "learning_rate": 9.580089381522476e-06, "loss": 0.6798, "step": 1939 }, { "epoch": 0.16, "grad_norm": 3.878537419811449, "learning_rate": 9.57956158510456e-06, "loss": 0.8329, "step": 1940 }, { "epoch": 0.16, "grad_norm": 6.6122434806498, "learning_rate": 9.579033471752148e-06, "loss": 0.7559, "step": 1941 }, { "epoch": 0.16, "grad_norm": 5.133554861086589, "learning_rate": 9.578505041501787e-06, "loss": 0.8041, "step": 1942 }, { "epoch": 0.16, "grad_norm": 12.392117446144335, "learning_rate": 9.57797629439005e-06, "loss": 0.7294, "step": 1943 }, { "epoch": 0.16, "grad_norm": 8.945281611934789, "learning_rate": 9.577447230453529e-06, "loss": 0.7469, "step": 1944 }, { "epoch": 0.16, "grad_norm": 4.1683942397239795, "learning_rate": 9.576917849728836e-06, "loss": 0.8529, "step": 1945 }, { "epoch": 0.16, "grad_norm": 6.520692594885672, "learning_rate": 9.57638815225261e-06, "loss": 0.7877, "step": 1946 }, { "epoch": 0.16, "grad_norm": 4.589191675738484, "learning_rate": 9.575858138061506e-06, "loss": 0.661, "step": 1947 }, { "epoch": 0.16, "grad_norm": 3.820931375483734, "learning_rate": 9.575327807192209e-06, "loss": 0.7366, "step": 1948 }, { "epoch": 0.16, "grad_norm": 3.4661618180292106, "learning_rate": 9.57479715968142e-06, "loss": 0.6171, "step": 1949 }, { "epoch": 0.16, "grad_norm": 3.9086771753899145, "learning_rate": 9.57426619556586e-06, "loss": 0.7694, "step": 1950 }, { "epoch": 0.16, "grad_norm": 6.650492966210364, "learning_rate": 9.57373491488228e-06, "loss": 0.861, "step": 1951 }, { "epoch": 0.16, "grad_norm": 6.2456134810704595, "learning_rate": 9.573203317667442e-06, "loss": 0.707, "step": 1952 }, { "epoch": 0.16, "grad_norm": 13.692815390595538, "learning_rate": 9.572671403958142e-06, "loss": 0.7654, "step": 1953 }, { "epoch": 0.16, "grad_norm": 5.992182603247182, "learning_rate": 9.572139173791185e-06, "loss": 0.8073, "step": 1954 }, { "epoch": 0.16, "grad_norm": 7.513363643468951, "learning_rate": 9.571606627203413e-06, "loss": 0.8222, "step": 1955 }, { "epoch": 0.16, "grad_norm": 4.741155559243881, "learning_rate": 9.571073764231675e-06, "loss": 0.8639, "step": 1956 }, { "epoch": 0.16, "grad_norm": 9.502065882743967, "learning_rate": 9.570540584912852e-06, "loss": 0.7418, "step": 1957 }, { "epoch": 0.16, "grad_norm": 5.462441810993187, "learning_rate": 9.570007089283841e-06, "loss": 0.7486, "step": 1958 }, { "epoch": 0.16, "grad_norm": 8.661879460531605, "learning_rate": 9.569473277381565e-06, "loss": 0.5803, "step": 1959 }, { "epoch": 0.16, "grad_norm": 5.580014944970623, "learning_rate": 9.568939149242966e-06, "loss": 0.7293, "step": 1960 }, { "epoch": 0.16, "grad_norm": 3.629131804674316, "learning_rate": 9.56840470490501e-06, "loss": 0.6889, "step": 1961 }, { "epoch": 0.16, "grad_norm": 5.291300986477962, "learning_rate": 9.567869944404682e-06, "loss": 0.8523, "step": 1962 }, { "epoch": 0.16, "grad_norm": 4.748288580761305, "learning_rate": 9.567334867778992e-06, "loss": 0.7286, "step": 1963 }, { "epoch": 0.16, "grad_norm": 4.837748808239457, "learning_rate": 9.566799475064973e-06, "loss": 0.7461, "step": 1964 }, { "epoch": 0.16, "grad_norm": 6.040507984847288, "learning_rate": 9.566263766299675e-06, "loss": 0.7607, "step": 1965 }, { "epoch": 0.16, "grad_norm": 2.69920525999466, "learning_rate": 9.56572774152017e-06, "loss": 0.6709, "step": 1966 }, { "epoch": 0.16, "grad_norm": 5.430829077976158, "learning_rate": 9.565191400763561e-06, "loss": 0.8636, "step": 1967 }, { "epoch": 0.16, "grad_norm": 2.666879830836213, "learning_rate": 9.564654744066959e-06, "loss": 0.7372, "step": 1968 }, { "epoch": 0.16, "grad_norm": 9.706743045295168, "learning_rate": 9.564117771467509e-06, "loss": 0.7752, "step": 1969 }, { "epoch": 0.16, "grad_norm": 4.226346527490758, "learning_rate": 9.56358048300237e-06, "loss": 0.8174, "step": 1970 }, { "epoch": 0.16, "grad_norm": 7.9886786268770065, "learning_rate": 9.563042878708728e-06, "loss": 0.5509, "step": 1971 }, { "epoch": 0.16, "grad_norm": 3.2923447172436435, "learning_rate": 9.562504958623788e-06, "loss": 0.7268, "step": 1972 }, { "epoch": 0.16, "grad_norm": 8.632310394426218, "learning_rate": 9.561966722784774e-06, "loss": 0.868, "step": 1973 }, { "epoch": 0.16, "grad_norm": 4.958841960391876, "learning_rate": 9.561428171228941e-06, "loss": 0.7845, "step": 1974 }, { "epoch": 0.16, "grad_norm": 4.729717093725785, "learning_rate": 9.560889303993557e-06, "loss": 0.7959, "step": 1975 }, { "epoch": 0.16, "grad_norm": 4.102901402558135, "learning_rate": 9.560350121115915e-06, "loss": 0.7684, "step": 1976 }, { "epoch": 0.16, "grad_norm": 4.075061004810444, "learning_rate": 9.559810622633332e-06, "loss": 0.732, "step": 1977 }, { "epoch": 0.16, "grad_norm": 4.90634097311846, "learning_rate": 9.559270808583142e-06, "loss": 0.5855, "step": 1978 }, { "epoch": 0.16, "grad_norm": 3.0505078913101116, "learning_rate": 9.558730679002703e-06, "loss": 0.7735, "step": 1979 }, { "epoch": 0.16, "grad_norm": 2.9272693231438844, "learning_rate": 9.558190233929396e-06, "loss": 0.6365, "step": 1980 }, { "epoch": 0.16, "grad_norm": 5.347141482559581, "learning_rate": 9.557649473400628e-06, "loss": 0.7674, "step": 1981 }, { "epoch": 0.16, "grad_norm": 3.310478891248533, "learning_rate": 9.557108397453816e-06, "loss": 0.6082, "step": 1982 }, { "epoch": 0.16, "grad_norm": 5.423444601805491, "learning_rate": 9.556567006126409e-06, "loss": 0.7377, "step": 1983 }, { "epoch": 0.16, "grad_norm": 4.746374117258417, "learning_rate": 9.556025299455876e-06, "loss": 0.6249, "step": 1984 }, { "epoch": 0.16, "grad_norm": 13.264254345867498, "learning_rate": 9.555483277479705e-06, "loss": 0.6606, "step": 1985 }, { "epoch": 0.16, "grad_norm": 3.7513680932794684, "learning_rate": 9.554940940235406e-06, "loss": 0.8212, "step": 1986 }, { "epoch": 0.16, "grad_norm": 3.044470117574727, "learning_rate": 9.554398287760515e-06, "loss": 0.8485, "step": 1987 }, { "epoch": 0.16, "grad_norm": 4.173017285118528, "learning_rate": 9.553855320092587e-06, "loss": 0.9029, "step": 1988 }, { "epoch": 0.16, "grad_norm": 6.720472294713218, "learning_rate": 9.553312037269196e-06, "loss": 0.7308, "step": 1989 }, { "epoch": 0.16, "grad_norm": 3.1250736699510395, "learning_rate": 9.552768439327941e-06, "loss": 0.7176, "step": 1990 }, { "epoch": 0.16, "grad_norm": 4.047436353972096, "learning_rate": 9.552224526306445e-06, "loss": 0.937, "step": 1991 }, { "epoch": 0.16, "grad_norm": 2.8621236169750195, "learning_rate": 9.551680298242348e-06, "loss": 0.7641, "step": 1992 }, { "epoch": 0.16, "grad_norm": 5.392121781727244, "learning_rate": 9.551135755173315e-06, "loss": 0.6102, "step": 1993 }, { "epoch": 0.16, "grad_norm": 4.014438324106472, "learning_rate": 9.55059089713703e-06, "loss": 0.7779, "step": 1994 }, { "epoch": 0.16, "grad_norm": 4.043296082866594, "learning_rate": 9.550045724171204e-06, "loss": 0.8215, "step": 1995 }, { "epoch": 0.16, "grad_norm": 4.648604972711596, "learning_rate": 9.549500236313562e-06, "loss": 0.5957, "step": 1996 }, { "epoch": 0.16, "grad_norm": 3.3469182083357625, "learning_rate": 9.54895443360186e-06, "loss": 0.8371, "step": 1997 }, { "epoch": 0.16, "grad_norm": 4.313193016165895, "learning_rate": 9.548408316073868e-06, "loss": 0.6786, "step": 1998 }, { "epoch": 0.16, "grad_norm": 3.0685193150962844, "learning_rate": 9.547861883767383e-06, "loss": 0.7183, "step": 1999 }, { "epoch": 0.16, "grad_norm": 4.551754914135308, "learning_rate": 9.547315136720217e-06, "loss": 0.606, "step": 2000 }, { "epoch": 0.16, "grad_norm": 4.388912798266088, "learning_rate": 9.546768074970213e-06, "loss": 0.7162, "step": 2001 }, { "epoch": 0.16, "grad_norm": 11.62340631243312, "learning_rate": 9.546220698555227e-06, "loss": 0.5885, "step": 2002 }, { "epoch": 0.16, "grad_norm": 7.283462508030961, "learning_rate": 9.545673007513145e-06, "loss": 0.545, "step": 2003 }, { "epoch": 0.16, "grad_norm": 4.312574679222201, "learning_rate": 9.54512500188187e-06, "loss": 0.5053, "step": 2004 }, { "epoch": 0.16, "grad_norm": 5.754950103029749, "learning_rate": 9.544576681699325e-06, "loss": 0.801, "step": 2005 }, { "epoch": 0.16, "grad_norm": 4.936679378804686, "learning_rate": 9.544028047003458e-06, "loss": 0.6033, "step": 2006 }, { "epoch": 0.16, "grad_norm": 7.202703373623657, "learning_rate": 9.54347909783224e-06, "loss": 0.7605, "step": 2007 }, { "epoch": 0.16, "grad_norm": 2.749700340286971, "learning_rate": 9.54292983422366e-06, "loss": 0.6187, "step": 2008 }, { "epoch": 0.16, "grad_norm": 3.6780006488104973, "learning_rate": 9.54238025621573e-06, "loss": 0.884, "step": 2009 }, { "epoch": 0.16, "grad_norm": 8.457104952900698, "learning_rate": 9.541830363846487e-06, "loss": 0.6231, "step": 2010 }, { "epoch": 0.16, "grad_norm": 5.015180909674552, "learning_rate": 9.541280157153983e-06, "loss": 0.7633, "step": 2011 }, { "epoch": 0.16, "grad_norm": 3.513798459864995, "learning_rate": 9.540729636176298e-06, "loss": 0.7383, "step": 2012 }, { "epoch": 0.16, "grad_norm": 4.955117948871653, "learning_rate": 9.540178800951533e-06, "loss": 0.7131, "step": 2013 }, { "epoch": 0.16, "grad_norm": 5.592603386141614, "learning_rate": 9.539627651517807e-06, "loss": 0.6427, "step": 2014 }, { "epoch": 0.16, "grad_norm": 4.088672247221793, "learning_rate": 9.539076187913262e-06, "loss": 1.0259, "step": 2015 }, { "epoch": 0.16, "grad_norm": 5.03446194313494, "learning_rate": 9.538524410176066e-06, "loss": 0.7748, "step": 2016 }, { "epoch": 0.16, "grad_norm": 4.069479300323137, "learning_rate": 9.537972318344403e-06, "loss": 0.8233, "step": 2017 }, { "epoch": 0.16, "grad_norm": 5.6678608365502505, "learning_rate": 9.537419912456484e-06, "loss": 0.8731, "step": 2018 }, { "epoch": 0.16, "grad_norm": 7.395120297258934, "learning_rate": 9.536867192550536e-06, "loss": 0.7527, "step": 2019 }, { "epoch": 0.16, "grad_norm": 3.3407798256646806, "learning_rate": 9.536314158664813e-06, "loss": 0.6686, "step": 2020 }, { "epoch": 0.16, "grad_norm": 5.156708901906526, "learning_rate": 9.535760810837584e-06, "loss": 0.8188, "step": 2021 }, { "epoch": 0.16, "grad_norm": 3.6540335836117044, "learning_rate": 9.53520714910715e-06, "loss": 0.6477, "step": 2022 }, { "epoch": 0.16, "grad_norm": 7.591064639731454, "learning_rate": 9.534653173511825e-06, "loss": 0.8695, "step": 2023 }, { "epoch": 0.16, "grad_norm": 3.983265518122732, "learning_rate": 9.534098884089948e-06, "loss": 0.8926, "step": 2024 }, { "epoch": 0.16, "grad_norm": 4.774210134866984, "learning_rate": 9.53354428087988e-06, "loss": 0.8349, "step": 2025 }, { "epoch": 0.16, "grad_norm": 5.1143741792919775, "learning_rate": 9.53298936392e-06, "loss": 0.7237, "step": 2026 }, { "epoch": 0.16, "grad_norm": 4.240390179722257, "learning_rate": 9.532434133248713e-06, "loss": 0.743, "step": 2027 }, { "epoch": 0.16, "grad_norm": 2.6729041427744438, "learning_rate": 9.531878588904448e-06, "loss": 0.7273, "step": 2028 }, { "epoch": 0.16, "grad_norm": 2.8738962510521997, "learning_rate": 9.531322730925648e-06, "loss": 0.7683, "step": 2029 }, { "epoch": 0.16, "grad_norm": 3.9668327773181953, "learning_rate": 9.530766559350784e-06, "loss": 0.7224, "step": 2030 }, { "epoch": 0.16, "grad_norm": 3.452322122144783, "learning_rate": 9.530210074218346e-06, "loss": 0.807, "step": 2031 }, { "epoch": 0.17, "grad_norm": 5.92006307280626, "learning_rate": 9.529653275566848e-06, "loss": 0.6063, "step": 2032 }, { "epoch": 0.17, "grad_norm": 2.662203907162793, "learning_rate": 9.529096163434822e-06, "loss": 0.8439, "step": 2033 }, { "epoch": 0.17, "grad_norm": 3.7089146074680674, "learning_rate": 9.528538737860822e-06, "loss": 0.7786, "step": 2034 }, { "epoch": 0.17, "grad_norm": 9.334824574146266, "learning_rate": 9.527980998883428e-06, "loss": 0.6236, "step": 2035 }, { "epoch": 0.17, "grad_norm": 3.4433397520672435, "learning_rate": 9.527422946541238e-06, "loss": 0.7464, "step": 2036 }, { "epoch": 0.17, "grad_norm": 3.7348087981084404, "learning_rate": 9.526864580872874e-06, "loss": 0.6583, "step": 2037 }, { "epoch": 0.17, "grad_norm": 4.156000313740864, "learning_rate": 9.526305901916977e-06, "loss": 0.759, "step": 2038 }, { "epoch": 0.17, "grad_norm": 4.847661734969043, "learning_rate": 9.525746909712211e-06, "loss": 0.769, "step": 2039 }, { "epoch": 0.17, "grad_norm": 4.725551787401016, "learning_rate": 9.525187604297263e-06, "loss": 0.6909, "step": 2040 }, { "epoch": 0.17, "grad_norm": 3.63523204318454, "learning_rate": 9.52462798571084e-06, "loss": 0.7335, "step": 2041 }, { "epoch": 0.17, "grad_norm": 4.570478317623154, "learning_rate": 9.52406805399167e-06, "loss": 0.7998, "step": 2042 }, { "epoch": 0.17, "grad_norm": 3.150966690651914, "learning_rate": 9.523507809178506e-06, "loss": 0.8675, "step": 2043 }, { "epoch": 0.17, "grad_norm": 3.106597272183591, "learning_rate": 9.52294725131012e-06, "loss": 0.747, "step": 2044 }, { "epoch": 0.17, "grad_norm": 4.94705803016184, "learning_rate": 9.522386380425304e-06, "loss": 0.8825, "step": 2045 }, { "epoch": 0.17, "grad_norm": 3.1018437421997573, "learning_rate": 9.521825196562875e-06, "loss": 0.7366, "step": 2046 }, { "epoch": 0.17, "grad_norm": 5.317760526975097, "learning_rate": 9.521263699761672e-06, "loss": 0.759, "step": 2047 }, { "epoch": 0.17, "grad_norm": 4.759913705581531, "learning_rate": 9.52070189006055e-06, "loss": 0.7566, "step": 2048 }, { "epoch": 0.17, "grad_norm": 4.868405365200601, "learning_rate": 9.520139767498396e-06, "loss": 0.6385, "step": 2049 }, { "epoch": 0.17, "grad_norm": 5.315602251041462, "learning_rate": 9.519577332114107e-06, "loss": 0.8282, "step": 2050 }, { "epoch": 0.17, "grad_norm": 5.184938433524998, "learning_rate": 9.51901458394661e-06, "loss": 0.5954, "step": 2051 }, { "epoch": 0.17, "grad_norm": 9.59952242453887, "learning_rate": 9.518451523034849e-06, "loss": 0.7167, "step": 2052 }, { "epoch": 0.17, "grad_norm": 4.544466954040468, "learning_rate": 9.51788814941779e-06, "loss": 0.6876, "step": 2053 }, { "epoch": 0.17, "grad_norm": 6.584001426821807, "learning_rate": 9.517324463134427e-06, "loss": 0.9588, "step": 2054 }, { "epoch": 0.17, "grad_norm": 7.014282958950346, "learning_rate": 9.516760464223768e-06, "loss": 0.7257, "step": 2055 }, { "epoch": 0.17, "grad_norm": 19.68556681860114, "learning_rate": 9.516196152724844e-06, "loss": 0.7014, "step": 2056 }, { "epoch": 0.17, "grad_norm": 3.639990438257149, "learning_rate": 9.515631528676709e-06, "loss": 0.6899, "step": 2057 }, { "epoch": 0.17, "grad_norm": 4.845043218929715, "learning_rate": 9.515066592118441e-06, "loss": 0.8476, "step": 2058 }, { "epoch": 0.17, "grad_norm": 5.235041824788746, "learning_rate": 9.514501343089135e-06, "loss": 0.6884, "step": 2059 }, { "epoch": 0.17, "grad_norm": 4.661354298247606, "learning_rate": 9.51393578162791e-06, "loss": 0.7492, "step": 2060 }, { "epoch": 0.17, "grad_norm": 11.40175527577864, "learning_rate": 9.513369907773907e-06, "loss": 0.8302, "step": 2061 }, { "epoch": 0.17, "grad_norm": 6.225136140618681, "learning_rate": 9.512803721566288e-06, "loss": 0.8657, "step": 2062 }, { "epoch": 0.17, "grad_norm": 3.438067838384648, "learning_rate": 9.512237223044236e-06, "loss": 0.6311, "step": 2063 }, { "epoch": 0.17, "grad_norm": 6.009569773614633, "learning_rate": 9.511670412246956e-06, "loss": 0.6985, "step": 2064 }, { "epoch": 0.17, "grad_norm": 7.165404543758577, "learning_rate": 9.511103289213678e-06, "loss": 0.6983, "step": 2065 }, { "epoch": 0.17, "grad_norm": 3.5925137624816283, "learning_rate": 9.510535853983646e-06, "loss": 0.7729, "step": 2066 }, { "epoch": 0.17, "grad_norm": 3.830215572789879, "learning_rate": 9.509968106596135e-06, "loss": 0.9328, "step": 2067 }, { "epoch": 0.17, "grad_norm": 3.7681736109660107, "learning_rate": 9.509400047090432e-06, "loss": 0.8825, "step": 2068 }, { "epoch": 0.17, "grad_norm": 4.029848217398536, "learning_rate": 9.508831675505852e-06, "loss": 0.6479, "step": 2069 }, { "epoch": 0.17, "grad_norm": 4.617958175930304, "learning_rate": 9.508262991881732e-06, "loss": 0.8034, "step": 2070 }, { "epoch": 0.17, "grad_norm": 28.21407865701392, "learning_rate": 9.507693996257423e-06, "loss": 0.5949, "step": 2071 }, { "epoch": 0.17, "grad_norm": 5.5739910955395375, "learning_rate": 9.50712468867231e-06, "loss": 0.701, "step": 2072 }, { "epoch": 0.17, "grad_norm": 4.565537743131155, "learning_rate": 9.506555069165788e-06, "loss": 0.6443, "step": 2073 }, { "epoch": 0.17, "grad_norm": 5.208104204780439, "learning_rate": 9.505985137777279e-06, "loss": 0.7324, "step": 2074 }, { "epoch": 0.17, "grad_norm": 4.2543928650833385, "learning_rate": 9.505414894546228e-06, "loss": 0.8729, "step": 2075 }, { "epoch": 0.17, "grad_norm": 12.149072373841449, "learning_rate": 9.504844339512096e-06, "loss": 0.5921, "step": 2076 }, { "epoch": 0.17, "grad_norm": 7.1978628038926145, "learning_rate": 9.50427347271437e-06, "loss": 0.7645, "step": 2077 }, { "epoch": 0.17, "grad_norm": 7.181758722730161, "learning_rate": 9.503702294192563e-06, "loss": 0.8186, "step": 2078 }, { "epoch": 0.17, "grad_norm": 2.8504907787196663, "learning_rate": 9.503130803986195e-06, "loss": 0.7339, "step": 2079 }, { "epoch": 0.17, "grad_norm": 5.938044664849103, "learning_rate": 9.502559002134825e-06, "loss": 0.7385, "step": 2080 }, { "epoch": 0.17, "grad_norm": 6.923340086153708, "learning_rate": 9.501986888678018e-06, "loss": 0.9369, "step": 2081 }, { "epoch": 0.17, "grad_norm": 3.0951155645712025, "learning_rate": 9.501414463655375e-06, "loss": 0.7347, "step": 2082 }, { "epoch": 0.17, "grad_norm": 6.190582623889132, "learning_rate": 9.500841727106505e-06, "loss": 0.7804, "step": 2083 }, { "epoch": 0.17, "grad_norm": 4.209644942656819, "learning_rate": 9.500268679071049e-06, "loss": 0.6868, "step": 2084 }, { "epoch": 0.17, "grad_norm": 5.367701846607473, "learning_rate": 9.499695319588665e-06, "loss": 0.8, "step": 2085 }, { "epoch": 0.17, "grad_norm": 3.0898186882174556, "learning_rate": 9.499121648699032e-06, "loss": 0.6248, "step": 2086 }, { "epoch": 0.17, "grad_norm": 4.262526272615166, "learning_rate": 9.498547666441851e-06, "loss": 0.7654, "step": 2087 }, { "epoch": 0.17, "grad_norm": 6.91330994481406, "learning_rate": 9.497973372856848e-06, "loss": 0.633, "step": 2088 }, { "epoch": 0.17, "grad_norm": 3.9779799090856396, "learning_rate": 9.497398767983765e-06, "loss": 0.8404, "step": 2089 }, { "epoch": 0.17, "grad_norm": 6.934955577626417, "learning_rate": 9.49682385186237e-06, "loss": 0.8079, "step": 2090 }, { "epoch": 0.17, "grad_norm": 10.4209913773888, "learning_rate": 9.49624862453245e-06, "loss": 0.7093, "step": 2091 }, { "epoch": 0.17, "grad_norm": 3.2256437758995236, "learning_rate": 9.495673086033813e-06, "loss": 0.6766, "step": 2092 }, { "epoch": 0.17, "grad_norm": 2.904518263986565, "learning_rate": 9.495097236406293e-06, "loss": 0.6487, "step": 2093 }, { "epoch": 0.17, "grad_norm": 3.9394011083893625, "learning_rate": 9.49452107568974e-06, "loss": 0.8508, "step": 2094 }, { "epoch": 0.17, "grad_norm": 20.7042875945951, "learning_rate": 9.493944603924028e-06, "loss": 0.7621, "step": 2095 }, { "epoch": 0.17, "grad_norm": 7.076726490097566, "learning_rate": 9.493367821149055e-06, "loss": 0.6712, "step": 2096 }, { "epoch": 0.17, "grad_norm": 4.889614597467827, "learning_rate": 9.492790727404735e-06, "loss": 0.8938, "step": 2097 }, { "epoch": 0.17, "grad_norm": 16.48751405685699, "learning_rate": 9.492213322731007e-06, "loss": 0.7715, "step": 2098 }, { "epoch": 0.17, "grad_norm": 4.029371087791076, "learning_rate": 9.491635607167833e-06, "loss": 0.8416, "step": 2099 }, { "epoch": 0.17, "grad_norm": 2.7653381349070747, "learning_rate": 9.491057580755195e-06, "loss": 0.68, "step": 2100 }, { "epoch": 0.17, "grad_norm": 3.722529293694443, "learning_rate": 9.490479243533091e-06, "loss": 0.6533, "step": 2101 }, { "epoch": 0.17, "grad_norm": 4.286052108979617, "learning_rate": 9.48990059554155e-06, "loss": 0.6244, "step": 2102 }, { "epoch": 0.17, "grad_norm": 9.147750980824322, "learning_rate": 9.489321636820618e-06, "loss": 0.7, "step": 2103 }, { "epoch": 0.17, "grad_norm": 4.37475200060871, "learning_rate": 9.48874236741036e-06, "loss": 0.6361, "step": 2104 }, { "epoch": 0.17, "grad_norm": 2.4498326683227556, "learning_rate": 9.488162787350868e-06, "loss": 0.7527, "step": 2105 }, { "epoch": 0.17, "grad_norm": 8.887814751834652, "learning_rate": 9.487582896682252e-06, "loss": 0.7385, "step": 2106 }, { "epoch": 0.17, "grad_norm": 4.504219828683532, "learning_rate": 9.487002695444642e-06, "loss": 0.7546, "step": 2107 }, { "epoch": 0.17, "grad_norm": 12.295969044189745, "learning_rate": 9.486422183678193e-06, "loss": 0.6661, "step": 2108 }, { "epoch": 0.17, "grad_norm": 6.7786179183041275, "learning_rate": 9.48584136142308e-06, "loss": 0.6965, "step": 2109 }, { "epoch": 0.17, "grad_norm": 5.284237269569144, "learning_rate": 9.485260228719502e-06, "loss": 0.5682, "step": 2110 }, { "epoch": 0.17, "grad_norm": 4.980833282126374, "learning_rate": 9.484678785607672e-06, "loss": 0.7451, "step": 2111 }, { "epoch": 0.17, "grad_norm": 5.92210072304862, "learning_rate": 9.484097032127832e-06, "loss": 0.6947, "step": 2112 }, { "epoch": 0.17, "grad_norm": 3.334226237755275, "learning_rate": 9.483514968320244e-06, "loss": 0.812, "step": 2113 }, { "epoch": 0.17, "grad_norm": 3.8236768840805073, "learning_rate": 9.482932594225191e-06, "loss": 0.8132, "step": 2114 }, { "epoch": 0.17, "grad_norm": 3.1970556987782914, "learning_rate": 9.482349909882973e-06, "loss": 0.7479, "step": 2115 }, { "epoch": 0.17, "grad_norm": 3.221272605889116, "learning_rate": 9.48176691533392e-06, "loss": 0.7033, "step": 2116 }, { "epoch": 0.17, "grad_norm": 3.603480952052131, "learning_rate": 9.481183610618376e-06, "loss": 0.6808, "step": 2117 }, { "epoch": 0.17, "grad_norm": 9.582406893560186, "learning_rate": 9.480599995776711e-06, "loss": 0.8008, "step": 2118 }, { "epoch": 0.17, "grad_norm": 3.2382574803857005, "learning_rate": 9.480016070849313e-06, "loss": 0.6857, "step": 2119 }, { "epoch": 0.17, "grad_norm": 5.564920211494573, "learning_rate": 9.479431835876596e-06, "loss": 0.6747, "step": 2120 }, { "epoch": 0.17, "grad_norm": 3.4924521573959746, "learning_rate": 9.47884729089899e-06, "loss": 0.7785, "step": 2121 }, { "epoch": 0.17, "grad_norm": 15.528146713415923, "learning_rate": 9.47826243595695e-06, "loss": 0.8467, "step": 2122 }, { "epoch": 0.17, "grad_norm": 5.41285363032105, "learning_rate": 9.477677271090953e-06, "loss": 0.5509, "step": 2123 }, { "epoch": 0.17, "grad_norm": 3.039445064345485, "learning_rate": 9.477091796341493e-06, "loss": 0.6809, "step": 2124 }, { "epoch": 0.17, "grad_norm": 4.1881250448313025, "learning_rate": 9.476506011749092e-06, "loss": 0.8855, "step": 2125 }, { "epoch": 0.17, "grad_norm": 3.2351841021100927, "learning_rate": 9.475919917354289e-06, "loss": 0.7689, "step": 2126 }, { "epoch": 0.17, "grad_norm": 5.544810584894087, "learning_rate": 9.475333513197645e-06, "loss": 0.9212, "step": 2127 }, { "epoch": 0.17, "grad_norm": 3.885776783984978, "learning_rate": 9.474746799319742e-06, "loss": 0.8256, "step": 2128 }, { "epoch": 0.17, "grad_norm": 6.88149219177898, "learning_rate": 9.474159775761187e-06, "loss": 0.5833, "step": 2129 }, { "epoch": 0.17, "grad_norm": 4.908739721336877, "learning_rate": 9.473572442562603e-06, "loss": 0.8242, "step": 2130 }, { "epoch": 0.17, "grad_norm": 3.413556592269906, "learning_rate": 9.472984799764636e-06, "loss": 0.8259, "step": 2131 }, { "epoch": 0.17, "grad_norm": 3.371195729587371, "learning_rate": 9.47239684740796e-06, "loss": 0.7847, "step": 2132 }, { "epoch": 0.17, "grad_norm": 7.3310849215939875, "learning_rate": 9.471808585533258e-06, "loss": 0.7993, "step": 2133 }, { "epoch": 0.17, "grad_norm": 6.936647099613488, "learning_rate": 9.471220014181247e-06, "loss": 0.7231, "step": 2134 }, { "epoch": 0.17, "grad_norm": 5.746035773959225, "learning_rate": 9.470631133392658e-06, "loss": 0.6573, "step": 2135 }, { "epoch": 0.17, "grad_norm": 4.2937572447606005, "learning_rate": 9.470041943208244e-06, "loss": 0.7179, "step": 2136 }, { "epoch": 0.17, "grad_norm": 5.616107754082006, "learning_rate": 9.469452443668783e-06, "loss": 0.7301, "step": 2137 }, { "epoch": 0.17, "grad_norm": 3.1741497862748744, "learning_rate": 9.468862634815071e-06, "loss": 0.7679, "step": 2138 }, { "epoch": 0.17, "grad_norm": 4.6015770803456935, "learning_rate": 9.468272516687927e-06, "loss": 0.9215, "step": 2139 }, { "epoch": 0.17, "grad_norm": 4.248050816852012, "learning_rate": 9.467682089328188e-06, "loss": 0.652, "step": 2140 }, { "epoch": 0.17, "grad_norm": 3.667877088209888, "learning_rate": 9.467091352776719e-06, "loss": 0.6745, "step": 2141 }, { "epoch": 0.17, "grad_norm": 5.7982642605322505, "learning_rate": 9.4665003070744e-06, "loss": 0.5877, "step": 2142 }, { "epoch": 0.17, "grad_norm": 6.232698648392658, "learning_rate": 9.465908952262138e-06, "loss": 0.6455, "step": 2143 }, { "epoch": 0.17, "grad_norm": 3.0300997784156403, "learning_rate": 9.465317288380856e-06, "loss": 0.664, "step": 2144 }, { "epoch": 0.17, "grad_norm": 4.661573369932758, "learning_rate": 9.464725315471503e-06, "loss": 0.7229, "step": 2145 }, { "epoch": 0.17, "grad_norm": 3.1932061298592256, "learning_rate": 9.464133033575044e-06, "loss": 0.7656, "step": 2146 }, { "epoch": 0.17, "grad_norm": 3.2102250362492124, "learning_rate": 9.463540442732471e-06, "loss": 0.8558, "step": 2147 }, { "epoch": 0.17, "grad_norm": 19.91230012279406, "learning_rate": 9.462947542984795e-06, "loss": 0.548, "step": 2148 }, { "epoch": 0.17, "grad_norm": 4.421373048694864, "learning_rate": 9.46235433437305e-06, "loss": 0.7712, "step": 2149 }, { "epoch": 0.17, "grad_norm": 22.704662749315894, "learning_rate": 9.461760816938284e-06, "loss": 0.7436, "step": 2150 }, { "epoch": 0.17, "grad_norm": 4.5205088420416795, "learning_rate": 9.461166990721577e-06, "loss": 0.7562, "step": 2151 }, { "epoch": 0.17, "grad_norm": 6.494619105681334, "learning_rate": 9.460572855764026e-06, "loss": 0.7307, "step": 2152 }, { "epoch": 0.17, "grad_norm": 7.0948556508911205, "learning_rate": 9.459978412106747e-06, "loss": 0.7919, "step": 2153 }, { "epoch": 0.17, "grad_norm": 3.971358603425492, "learning_rate": 9.459383659790878e-06, "loss": 0.9116, "step": 2154 }, { "epoch": 0.18, "grad_norm": 3.770241627934109, "learning_rate": 9.458788598857583e-06, "loss": 0.7459, "step": 2155 }, { "epoch": 0.18, "grad_norm": 3.9916453650688513, "learning_rate": 9.458193229348041e-06, "loss": 0.7347, "step": 2156 }, { "epoch": 0.18, "grad_norm": 4.685289420583578, "learning_rate": 9.457597551303456e-06, "loss": 0.7292, "step": 2157 }, { "epoch": 0.18, "grad_norm": 9.550061305922657, "learning_rate": 9.457001564765054e-06, "loss": 0.8577, "step": 2158 }, { "epoch": 0.18, "grad_norm": 4.658212701419767, "learning_rate": 9.45640526977408e-06, "loss": 0.6219, "step": 2159 }, { "epoch": 0.18, "grad_norm": 4.328712174049183, "learning_rate": 9.455808666371801e-06, "loss": 0.8334, "step": 2160 }, { "epoch": 0.18, "grad_norm": 31.283901558930665, "learning_rate": 9.455211754599507e-06, "loss": 0.8123, "step": 2161 }, { "epoch": 0.18, "grad_norm": 6.2149093110170845, "learning_rate": 9.454614534498506e-06, "loss": 0.6287, "step": 2162 }, { "epoch": 0.18, "grad_norm": 3.499307005845178, "learning_rate": 9.454017006110131e-06, "loss": 0.7581, "step": 2163 }, { "epoch": 0.18, "grad_norm": 4.901657518956858, "learning_rate": 9.453419169475735e-06, "loss": 0.7202, "step": 2164 }, { "epoch": 0.18, "grad_norm": 4.592944564405269, "learning_rate": 9.452821024636691e-06, "loss": 0.8367, "step": 2165 }, { "epoch": 0.18, "grad_norm": 5.226155793398849, "learning_rate": 9.452222571634395e-06, "loss": 0.6827, "step": 2166 }, { "epoch": 0.18, "grad_norm": 4.891219402143376, "learning_rate": 9.451623810510265e-06, "loss": 0.925, "step": 2167 }, { "epoch": 0.18, "grad_norm": 3.6963689247433322, "learning_rate": 9.451024741305735e-06, "loss": 0.741, "step": 2168 }, { "epoch": 0.18, "grad_norm": 3.2262127277331176, "learning_rate": 9.450425364062267e-06, "loss": 0.7828, "step": 2169 }, { "epoch": 0.18, "grad_norm": 4.250505801213511, "learning_rate": 9.449825678821342e-06, "loss": 0.7198, "step": 2170 }, { "epoch": 0.18, "grad_norm": 5.424240575005041, "learning_rate": 9.449225685624464e-06, "loss": 0.8776, "step": 2171 }, { "epoch": 0.18, "grad_norm": 3.2262148617883866, "learning_rate": 9.448625384513152e-06, "loss": 0.7382, "step": 2172 }, { "epoch": 0.18, "grad_norm": 3.277875025867993, "learning_rate": 9.448024775528952e-06, "loss": 0.6762, "step": 2173 }, { "epoch": 0.18, "grad_norm": 20.74502568538336, "learning_rate": 9.447423858713432e-06, "loss": 0.7334, "step": 2174 }, { "epoch": 0.18, "grad_norm": 29.118910404589663, "learning_rate": 9.446822634108176e-06, "loss": 0.7457, "step": 2175 }, { "epoch": 0.18, "grad_norm": 58.80773566882819, "learning_rate": 9.446221101754795e-06, "loss": 0.9126, "step": 2176 }, { "epoch": 0.18, "grad_norm": 19.832332368923616, "learning_rate": 9.445619261694919e-06, "loss": 0.7298, "step": 2177 }, { "epoch": 0.18, "grad_norm": 8.565317443396655, "learning_rate": 9.445017113970196e-06, "loss": 0.7495, "step": 2178 }, { "epoch": 0.18, "grad_norm": 7.2153398268895605, "learning_rate": 9.444414658622303e-06, "loss": 0.7608, "step": 2179 }, { "epoch": 0.18, "grad_norm": 5.836142163920102, "learning_rate": 9.44381189569293e-06, "loss": 0.7979, "step": 2180 }, { "epoch": 0.18, "grad_norm": 6.857945916804737, "learning_rate": 9.443208825223794e-06, "loss": 0.7325, "step": 2181 }, { "epoch": 0.18, "grad_norm": 3.7139954606241856, "learning_rate": 9.442605447256629e-06, "loss": 0.7275, "step": 2182 }, { "epoch": 0.18, "grad_norm": 3.2927347969230767, "learning_rate": 9.442001761833194e-06, "loss": 0.696, "step": 2183 }, { "epoch": 0.18, "grad_norm": 4.0152490524861895, "learning_rate": 9.441397768995269e-06, "loss": 0.8456, "step": 2184 }, { "epoch": 0.18, "grad_norm": 4.562548877766768, "learning_rate": 9.440793468784652e-06, "loss": 0.7004, "step": 2185 }, { "epoch": 0.18, "grad_norm": 3.2214597213621525, "learning_rate": 9.440188861243167e-06, "loss": 0.7021, "step": 2186 }, { "epoch": 0.18, "grad_norm": 4.992153901709443, "learning_rate": 9.439583946412655e-06, "loss": 0.6799, "step": 2187 }, { "epoch": 0.18, "grad_norm": 5.52581188192942, "learning_rate": 9.438978724334979e-06, "loss": 0.8105, "step": 2188 }, { "epoch": 0.18, "grad_norm": 6.3332410062828615, "learning_rate": 9.438373195052027e-06, "loss": 0.7983, "step": 2189 }, { "epoch": 0.18, "grad_norm": 4.543033824275824, "learning_rate": 9.4377673586057e-06, "loss": 0.785, "step": 2190 }, { "epoch": 0.18, "grad_norm": 3.4754594955103317, "learning_rate": 9.437161215037931e-06, "loss": 0.8684, "step": 2191 }, { "epoch": 0.18, "grad_norm": 2.581726752132544, "learning_rate": 9.436554764390668e-06, "loss": 0.6639, "step": 2192 }, { "epoch": 0.18, "grad_norm": 5.159742703622623, "learning_rate": 9.435948006705882e-06, "loss": 0.7564, "step": 2193 }, { "epoch": 0.18, "grad_norm": 4.920817169686874, "learning_rate": 9.43534094202556e-06, "loss": 0.7033, "step": 2194 }, { "epoch": 0.18, "grad_norm": 8.283248881171149, "learning_rate": 9.434733570391719e-06, "loss": 0.6106, "step": 2195 }, { "epoch": 0.18, "grad_norm": 3.7483595265567726, "learning_rate": 9.434125891846391e-06, "loss": 0.7352, "step": 2196 }, { "epoch": 0.18, "grad_norm": 8.247800003799725, "learning_rate": 9.433517906431631e-06, "loss": 0.6542, "step": 2197 }, { "epoch": 0.18, "grad_norm": 6.2287248294909014, "learning_rate": 9.432909614189518e-06, "loss": 0.8759, "step": 2198 }, { "epoch": 0.18, "grad_norm": 4.826646694421205, "learning_rate": 9.432301015162146e-06, "loss": 0.643, "step": 2199 }, { "epoch": 0.18, "grad_norm": 9.164982356453415, "learning_rate": 9.431692109391637e-06, "loss": 0.8407, "step": 2200 }, { "epoch": 0.18, "grad_norm": 4.94094157276911, "learning_rate": 9.43108289692013e-06, "loss": 0.9695, "step": 2201 }, { "epoch": 0.18, "grad_norm": 6.620832287217032, "learning_rate": 9.430473377789785e-06, "loss": 0.8258, "step": 2202 }, { "epoch": 0.18, "grad_norm": 4.160582982679015, "learning_rate": 9.429863552042786e-06, "loss": 0.8213, "step": 2203 }, { "epoch": 0.18, "grad_norm": 3.866757732410586, "learning_rate": 9.429253419721335e-06, "loss": 0.8619, "step": 2204 }, { "epoch": 0.18, "grad_norm": 3.6665511360027305, "learning_rate": 9.428642980867661e-06, "loss": 0.7055, "step": 2205 }, { "epoch": 0.18, "grad_norm": 25.09850099463008, "learning_rate": 9.428032235524007e-06, "loss": 0.9461, "step": 2206 }, { "epoch": 0.18, "grad_norm": 5.442019759611695, "learning_rate": 9.427421183732642e-06, "loss": 0.6679, "step": 2207 }, { "epoch": 0.18, "grad_norm": 8.97515284051912, "learning_rate": 9.426809825535851e-06, "loss": 0.6993, "step": 2208 }, { "epoch": 0.18, "grad_norm": 10.144782682181232, "learning_rate": 9.426198160975948e-06, "loss": 0.6951, "step": 2209 }, { "epoch": 0.18, "grad_norm": 4.949928000474687, "learning_rate": 9.425586190095263e-06, "loss": 0.8416, "step": 2210 }, { "epoch": 0.18, "grad_norm": 4.019983078496993, "learning_rate": 9.424973912936147e-06, "loss": 0.733, "step": 2211 }, { "epoch": 0.18, "grad_norm": 4.9531397698021244, "learning_rate": 9.424361329540976e-06, "loss": 0.7383, "step": 2212 }, { "epoch": 0.18, "grad_norm": 13.257236245716877, "learning_rate": 9.42374843995214e-06, "loss": 0.7672, "step": 2213 }, { "epoch": 0.18, "grad_norm": 9.82778335009299, "learning_rate": 9.42313524421206e-06, "loss": 0.5727, "step": 2214 }, { "epoch": 0.18, "grad_norm": 3.9354030172853096, "learning_rate": 9.42252174236317e-06, "loss": 0.7967, "step": 2215 }, { "epoch": 0.18, "grad_norm": 5.029573106170355, "learning_rate": 9.42190793444793e-06, "loss": 0.7446, "step": 2216 }, { "epoch": 0.18, "grad_norm": 3.6301152354638875, "learning_rate": 9.421293820508817e-06, "loss": 0.6335, "step": 2217 }, { "epoch": 0.18, "grad_norm": 11.412441280826037, "learning_rate": 9.420679400588334e-06, "loss": 0.8085, "step": 2218 }, { "epoch": 0.18, "grad_norm": 5.591515482115656, "learning_rate": 9.420064674729002e-06, "loss": 0.6424, "step": 2219 }, { "epoch": 0.18, "grad_norm": 4.97081935293266, "learning_rate": 9.419449642973361e-06, "loss": 0.8159, "step": 2220 }, { "epoch": 0.18, "grad_norm": 4.254327215298947, "learning_rate": 9.41883430536398e-06, "loss": 0.7236, "step": 2221 }, { "epoch": 0.18, "grad_norm": 4.3356343693922055, "learning_rate": 9.41821866194344e-06, "loss": 0.7456, "step": 2222 }, { "epoch": 0.18, "grad_norm": 5.555589072833969, "learning_rate": 9.41760271275435e-06, "loss": 0.6993, "step": 2223 }, { "epoch": 0.18, "grad_norm": 4.34791617128502, "learning_rate": 9.416986457839336e-06, "loss": 0.6812, "step": 2224 }, { "epoch": 0.18, "grad_norm": 5.164799230470531, "learning_rate": 9.41636989724105e-06, "loss": 0.6288, "step": 2225 }, { "epoch": 0.18, "grad_norm": 5.775550342921115, "learning_rate": 9.415753031002157e-06, "loss": 0.4857, "step": 2226 }, { "epoch": 0.18, "grad_norm": 5.5456612743322875, "learning_rate": 9.415135859165349e-06, "loss": 0.8171, "step": 2227 }, { "epoch": 0.18, "grad_norm": 3.8019097332372764, "learning_rate": 9.414518381773342e-06, "loss": 0.6382, "step": 2228 }, { "epoch": 0.18, "grad_norm": 8.500468757993067, "learning_rate": 9.413900598868867e-06, "loss": 0.9072, "step": 2229 }, { "epoch": 0.18, "grad_norm": 4.636342966302793, "learning_rate": 9.413282510494676e-06, "loss": 0.8352, "step": 2230 }, { "epoch": 0.18, "grad_norm": 3.7141200791250024, "learning_rate": 9.41266411669355e-06, "loss": 0.6337, "step": 2231 }, { "epoch": 0.18, "grad_norm": 4.316702394877184, "learning_rate": 9.412045417508281e-06, "loss": 0.8037, "step": 2232 }, { "epoch": 0.18, "grad_norm": 7.034867282733218, "learning_rate": 9.411426412981688e-06, "loss": 0.6919, "step": 2233 }, { "epoch": 0.18, "grad_norm": 3.4363398924341304, "learning_rate": 9.410807103156611e-06, "loss": 0.5706, "step": 2234 }, { "epoch": 0.18, "grad_norm": 3.989275170502083, "learning_rate": 9.410187488075912e-06, "loss": 0.5811, "step": 2235 }, { "epoch": 0.18, "grad_norm": 5.685331390481626, "learning_rate": 9.409567567782466e-06, "loss": 0.743, "step": 2236 }, { "epoch": 0.18, "grad_norm": 3.9990264377885496, "learning_rate": 9.408947342319183e-06, "loss": 0.7371, "step": 2237 }, { "epoch": 0.18, "grad_norm": 5.526359854449125, "learning_rate": 9.408326811728982e-06, "loss": 0.8689, "step": 2238 }, { "epoch": 0.18, "grad_norm": 2.993134258706843, "learning_rate": 9.407705976054808e-06, "loss": 0.6858, "step": 2239 }, { "epoch": 0.18, "grad_norm": 6.724796790304231, "learning_rate": 9.407084835339627e-06, "loss": 0.8478, "step": 2240 }, { "epoch": 0.18, "grad_norm": 4.4900506903132635, "learning_rate": 9.406463389626425e-06, "loss": 0.8297, "step": 2241 }, { "epoch": 0.18, "grad_norm": 3.280449139976349, "learning_rate": 9.405841638958212e-06, "loss": 0.8071, "step": 2242 }, { "epoch": 0.18, "grad_norm": 12.834778059369317, "learning_rate": 9.405219583378018e-06, "loss": 0.8655, "step": 2243 }, { "epoch": 0.18, "grad_norm": 4.373236267679056, "learning_rate": 9.40459722292889e-06, "loss": 0.6861, "step": 2244 }, { "epoch": 0.18, "grad_norm": 4.239570273055634, "learning_rate": 9.4039745576539e-06, "loss": 0.5999, "step": 2245 }, { "epoch": 0.18, "grad_norm": 8.546177899064674, "learning_rate": 9.40335158759614e-06, "loss": 0.7862, "step": 2246 }, { "epoch": 0.18, "grad_norm": 2.779135081755068, "learning_rate": 9.402728312798726e-06, "loss": 0.8104, "step": 2247 }, { "epoch": 0.18, "grad_norm": 4.803584755754255, "learning_rate": 9.402104733304792e-06, "loss": 0.6223, "step": 2248 }, { "epoch": 0.18, "grad_norm": 4.413725450716245, "learning_rate": 9.401480849157489e-06, "loss": 0.7495, "step": 2249 }, { "epoch": 0.18, "grad_norm": 5.3182307049865765, "learning_rate": 9.4008566604e-06, "loss": 0.5752, "step": 2250 }, { "epoch": 0.18, "grad_norm": 10.231812398070327, "learning_rate": 9.400232167075519e-06, "loss": 0.7582, "step": 2251 }, { "epoch": 0.18, "grad_norm": 16.15212034972755, "learning_rate": 9.399607369227265e-06, "loss": 0.9528, "step": 2252 }, { "epoch": 0.18, "grad_norm": 3.5790077520491015, "learning_rate": 9.398982266898481e-06, "loss": 0.7943, "step": 2253 }, { "epoch": 0.18, "grad_norm": 3.9149732075112587, "learning_rate": 9.398356860132425e-06, "loss": 0.7267, "step": 2254 }, { "epoch": 0.18, "grad_norm": 4.076918350740101, "learning_rate": 9.39773114897238e-06, "loss": 0.753, "step": 2255 }, { "epoch": 0.18, "grad_norm": 4.7194783380100125, "learning_rate": 9.397105133461647e-06, "loss": 0.5195, "step": 2256 }, { "epoch": 0.18, "grad_norm": 13.315535602071407, "learning_rate": 9.396478813643554e-06, "loss": 0.9935, "step": 2257 }, { "epoch": 0.18, "grad_norm": 10.264093427957008, "learning_rate": 9.395852189561445e-06, "loss": 0.7321, "step": 2258 }, { "epoch": 0.18, "grad_norm": 4.966416020563409, "learning_rate": 9.395225261258686e-06, "loss": 0.7473, "step": 2259 }, { "epoch": 0.18, "grad_norm": 7.9751275833382085, "learning_rate": 9.394598028778664e-06, "loss": 0.7364, "step": 2260 }, { "epoch": 0.18, "grad_norm": 6.8490424967201005, "learning_rate": 9.393970492164787e-06, "loss": 0.8506, "step": 2261 }, { "epoch": 0.18, "grad_norm": 7.222531318099842, "learning_rate": 9.393342651460487e-06, "loss": 0.5739, "step": 2262 }, { "epoch": 0.18, "grad_norm": 5.5657210446969625, "learning_rate": 9.392714506709211e-06, "loss": 0.7827, "step": 2263 }, { "epoch": 0.18, "grad_norm": 3.836570039104997, "learning_rate": 9.392086057954432e-06, "loss": 0.7109, "step": 2264 }, { "epoch": 0.18, "grad_norm": 5.6814510069416, "learning_rate": 9.391457305239644e-06, "loss": 0.8507, "step": 2265 }, { "epoch": 0.18, "grad_norm": 8.854628491932893, "learning_rate": 9.39082824860836e-06, "loss": 0.6068, "step": 2266 }, { "epoch": 0.18, "grad_norm": 10.040644252419083, "learning_rate": 9.390198888104113e-06, "loss": 0.8004, "step": 2267 }, { "epoch": 0.18, "grad_norm": 27.865214705251393, "learning_rate": 9.389569223770461e-06, "loss": 0.8255, "step": 2268 }, { "epoch": 0.18, "grad_norm": 3.579127353507633, "learning_rate": 9.388939255650978e-06, "loss": 0.8223, "step": 2269 }, { "epoch": 0.18, "grad_norm": 3.6277253920021946, "learning_rate": 9.388308983789264e-06, "loss": 0.7211, "step": 2270 }, { "epoch": 0.18, "grad_norm": 6.205500060959045, "learning_rate": 9.38767840822894e-06, "loss": 0.6732, "step": 2271 }, { "epoch": 0.18, "grad_norm": 7.000987755892481, "learning_rate": 9.38704752901364e-06, "loss": 0.6675, "step": 2272 }, { "epoch": 0.18, "grad_norm": 4.250483458646069, "learning_rate": 9.38641634618703e-06, "loss": 0.7621, "step": 2273 }, { "epoch": 0.18, "grad_norm": 6.938613553879345, "learning_rate": 9.385784859792787e-06, "loss": 0.931, "step": 2274 }, { "epoch": 0.18, "grad_norm": 18.515364405854417, "learning_rate": 9.38515306987462e-06, "loss": 0.4976, "step": 2275 }, { "epoch": 0.18, "grad_norm": 4.723782511167325, "learning_rate": 9.384520976476246e-06, "loss": 0.7536, "step": 2276 }, { "epoch": 0.18, "grad_norm": 4.953722635888628, "learning_rate": 9.383888579641414e-06, "loss": 0.7423, "step": 2277 }, { "epoch": 0.19, "grad_norm": 4.231940019795666, "learning_rate": 9.383255879413891e-06, "loss": 0.7268, "step": 2278 }, { "epoch": 0.19, "grad_norm": 4.276197776391912, "learning_rate": 9.382622875837459e-06, "loss": 1.0088, "step": 2279 }, { "epoch": 0.19, "grad_norm": 6.063738292759658, "learning_rate": 9.381989568955931e-06, "loss": 0.8127, "step": 2280 }, { "epoch": 0.19, "grad_norm": 6.145934266294392, "learning_rate": 9.381355958813132e-06, "loss": 0.6489, "step": 2281 }, { "epoch": 0.19, "grad_norm": 4.153770353330247, "learning_rate": 9.380722045452915e-06, "loss": 0.6746, "step": 2282 }, { "epoch": 0.19, "grad_norm": 6.36240727890106, "learning_rate": 9.380087828919149e-06, "loss": 0.7459, "step": 2283 }, { "epoch": 0.19, "grad_norm": 6.791107575161874, "learning_rate": 9.379453309255726e-06, "loss": 0.7729, "step": 2284 }, { "epoch": 0.19, "grad_norm": 2.8117804704605573, "learning_rate": 9.378818486506556e-06, "loss": 0.7784, "step": 2285 }, { "epoch": 0.19, "grad_norm": 8.461903675312902, "learning_rate": 9.378183360715579e-06, "loss": 0.7211, "step": 2286 }, { "epoch": 0.19, "grad_norm": 4.238414116723706, "learning_rate": 9.377547931926743e-06, "loss": 0.677, "step": 2287 }, { "epoch": 0.19, "grad_norm": 8.326222112098273, "learning_rate": 9.376912200184029e-06, "loss": 0.7605, "step": 2288 }, { "epoch": 0.19, "grad_norm": 4.178287786114565, "learning_rate": 9.37627616553143e-06, "loss": 0.6586, "step": 2289 }, { "epoch": 0.19, "grad_norm": 5.932124692152528, "learning_rate": 9.375639828012965e-06, "loss": 0.693, "step": 2290 }, { "epoch": 0.19, "grad_norm": 3.622012361601318, "learning_rate": 9.375003187672674e-06, "loss": 0.6803, "step": 2291 }, { "epoch": 0.19, "grad_norm": 6.322313501361187, "learning_rate": 9.374366244554614e-06, "loss": 0.8546, "step": 2292 }, { "epoch": 0.19, "grad_norm": 3.822640361895152, "learning_rate": 9.373728998702868e-06, "loss": 0.9114, "step": 2293 }, { "epoch": 0.19, "grad_norm": 4.264606666000039, "learning_rate": 9.373091450161534e-06, "loss": 0.7643, "step": 2294 }, { "epoch": 0.19, "grad_norm": 7.147994751195317, "learning_rate": 9.372453598974738e-06, "loss": 0.7313, "step": 2295 }, { "epoch": 0.19, "grad_norm": 12.536771734088504, "learning_rate": 9.371815445186622e-06, "loss": 0.8124, "step": 2296 }, { "epoch": 0.19, "grad_norm": 3.852887065233964, "learning_rate": 9.371176988841349e-06, "loss": 0.8395, "step": 2297 }, { "epoch": 0.19, "grad_norm": 4.606432324759747, "learning_rate": 9.370538229983105e-06, "loss": 0.6717, "step": 2298 }, { "epoch": 0.19, "grad_norm": 7.120679921792463, "learning_rate": 9.369899168656095e-06, "loss": 0.708, "step": 2299 }, { "epoch": 0.19, "grad_norm": 7.993921079656909, "learning_rate": 9.36925980490455e-06, "loss": 0.6993, "step": 2300 }, { "epoch": 0.19, "grad_norm": 4.196791159078374, "learning_rate": 9.368620138772715e-06, "loss": 0.668, "step": 2301 }, { "epoch": 0.19, "grad_norm": 3.929597684313037, "learning_rate": 9.367980170304857e-06, "loss": 0.7922, "step": 2302 }, { "epoch": 0.19, "grad_norm": 3.4611831599650142, "learning_rate": 9.36733989954527e-06, "loss": 0.6946, "step": 2303 }, { "epoch": 0.19, "grad_norm": 3.5608415267233555, "learning_rate": 9.366699326538264e-06, "loss": 0.6488, "step": 2304 }, { "epoch": 0.19, "grad_norm": 5.109551002011297, "learning_rate": 9.366058451328169e-06, "loss": 0.7842, "step": 2305 }, { "epoch": 0.19, "grad_norm": 5.191770420081686, "learning_rate": 9.365417273959336e-06, "loss": 0.5759, "step": 2306 }, { "epoch": 0.19, "grad_norm": 5.441496337417363, "learning_rate": 9.364775794476142e-06, "loss": 0.7642, "step": 2307 }, { "epoch": 0.19, "grad_norm": 4.844667617725312, "learning_rate": 9.36413401292298e-06, "loss": 0.8297, "step": 2308 }, { "epoch": 0.19, "grad_norm": 21.017412086286843, "learning_rate": 9.363491929344266e-06, "loss": 0.6978, "step": 2309 }, { "epoch": 0.19, "grad_norm": 4.383074441942804, "learning_rate": 9.362849543784436e-06, "loss": 0.6984, "step": 2310 }, { "epoch": 0.19, "grad_norm": 3.8425171936933276, "learning_rate": 9.362206856287946e-06, "loss": 0.9348, "step": 2311 }, { "epoch": 0.19, "grad_norm": 4.422808583565183, "learning_rate": 9.361563866899274e-06, "loss": 0.6869, "step": 2312 }, { "epoch": 0.19, "grad_norm": 3.6981474985411316, "learning_rate": 9.360920575662922e-06, "loss": 0.6265, "step": 2313 }, { "epoch": 0.19, "grad_norm": 3.7457272927885703, "learning_rate": 9.360276982623405e-06, "loss": 0.5414, "step": 2314 }, { "epoch": 0.19, "grad_norm": 12.327323483355492, "learning_rate": 9.359633087825268e-06, "loss": 0.903, "step": 2315 }, { "epoch": 0.19, "grad_norm": 3.6750288103496485, "learning_rate": 9.35898889131307e-06, "loss": 0.7297, "step": 2316 }, { "epoch": 0.19, "grad_norm": 6.510314404097722, "learning_rate": 9.358344393131395e-06, "loss": 0.7476, "step": 2317 }, { "epoch": 0.19, "grad_norm": 14.031610718159982, "learning_rate": 9.357699593324846e-06, "loss": 0.748, "step": 2318 }, { "epoch": 0.19, "grad_norm": 5.193450483632661, "learning_rate": 9.357054491938045e-06, "loss": 0.6919, "step": 2319 }, { "epoch": 0.19, "grad_norm": 4.972271873679375, "learning_rate": 9.35640908901564e-06, "loss": 0.7184, "step": 2320 }, { "epoch": 0.19, "grad_norm": 4.126551476664808, "learning_rate": 9.355763384602294e-06, "loss": 0.7888, "step": 2321 }, { "epoch": 0.19, "grad_norm": 3.2265443374899627, "learning_rate": 9.355117378742698e-06, "loss": 1.0356, "step": 2322 }, { "epoch": 0.19, "grad_norm": 3.8368630705620177, "learning_rate": 9.354471071481557e-06, "loss": 0.8026, "step": 2323 }, { "epoch": 0.19, "grad_norm": 5.01786405246856, "learning_rate": 9.3538244628636e-06, "loss": 0.7618, "step": 2324 }, { "epoch": 0.19, "grad_norm": 14.700103199170401, "learning_rate": 9.353177552933575e-06, "loss": 0.7757, "step": 2325 }, { "epoch": 0.19, "grad_norm": 4.044818974028292, "learning_rate": 9.352530341736255e-06, "loss": 0.6542, "step": 2326 }, { "epoch": 0.19, "grad_norm": 3.9466598141175013, "learning_rate": 9.351882829316428e-06, "loss": 0.7744, "step": 2327 }, { "epoch": 0.19, "grad_norm": 3.7063760438442594, "learning_rate": 9.351235015718907e-06, "loss": 0.6686, "step": 2328 }, { "epoch": 0.19, "grad_norm": 11.267700730580987, "learning_rate": 9.350586900988527e-06, "loss": 0.6818, "step": 2329 }, { "epoch": 0.19, "grad_norm": 4.402227070589877, "learning_rate": 9.349938485170139e-06, "loss": 0.7174, "step": 2330 }, { "epoch": 0.19, "grad_norm": 6.198450140006214, "learning_rate": 9.34928976830862e-06, "loss": 0.6164, "step": 2331 }, { "epoch": 0.19, "grad_norm": 4.527063590552783, "learning_rate": 9.34864075044886e-06, "loss": 0.8365, "step": 2332 }, { "epoch": 0.19, "grad_norm": 17.43452740570329, "learning_rate": 9.347991431635782e-06, "loss": 0.6985, "step": 2333 }, { "epoch": 0.19, "grad_norm": 3.3467749681425665, "learning_rate": 9.347341811914319e-06, "loss": 0.7493, "step": 2334 }, { "epoch": 0.19, "grad_norm": 7.50519663713635, "learning_rate": 9.34669189132943e-06, "loss": 0.7604, "step": 2335 }, { "epoch": 0.19, "grad_norm": 6.398172472173961, "learning_rate": 9.346041669926092e-06, "loss": 0.6765, "step": 2336 }, { "epoch": 0.19, "grad_norm": 3.2056553025713703, "learning_rate": 9.345391147749305e-06, "loss": 0.8591, "step": 2337 }, { "epoch": 0.19, "grad_norm": 2.9162958907759684, "learning_rate": 9.344740324844091e-06, "loss": 0.7318, "step": 2338 }, { "epoch": 0.19, "grad_norm": 4.29811290595597, "learning_rate": 9.344089201255488e-06, "loss": 0.7979, "step": 2339 }, { "epoch": 0.19, "grad_norm": 3.833766431633332, "learning_rate": 9.343437777028561e-06, "loss": 0.7323, "step": 2340 }, { "epoch": 0.19, "grad_norm": 3.213013395758147, "learning_rate": 9.342786052208392e-06, "loss": 0.7295, "step": 2341 }, { "epoch": 0.19, "grad_norm": 4.853398291706577, "learning_rate": 9.342134026840083e-06, "loss": 0.6861, "step": 2342 }, { "epoch": 0.19, "grad_norm": 6.7240291158695555, "learning_rate": 9.34148170096876e-06, "loss": 0.6154, "step": 2343 }, { "epoch": 0.19, "grad_norm": 2.9431677635216937, "learning_rate": 9.340829074639566e-06, "loss": 0.7909, "step": 2344 }, { "epoch": 0.19, "grad_norm": 3.3251200775984358, "learning_rate": 9.340176147897669e-06, "loss": 0.9101, "step": 2345 }, { "epoch": 0.19, "grad_norm": 5.8973364914007345, "learning_rate": 9.339522920788252e-06, "loss": 0.8606, "step": 2346 }, { "epoch": 0.19, "grad_norm": 3.3615007147016294, "learning_rate": 9.338869393356527e-06, "loss": 0.727, "step": 2347 }, { "epoch": 0.19, "grad_norm": 3.058557056916983, "learning_rate": 9.338215565647719e-06, "loss": 0.8776, "step": 2348 }, { "epoch": 0.19, "grad_norm": 3.5062387986331482, "learning_rate": 9.33756143770708e-06, "loss": 0.6682, "step": 2349 }, { "epoch": 0.19, "grad_norm": 6.045118009924259, "learning_rate": 9.336907009579876e-06, "loss": 0.7358, "step": 2350 }, { "epoch": 0.19, "grad_norm": 3.6700071214039913, "learning_rate": 9.336252281311401e-06, "loss": 0.7535, "step": 2351 }, { "epoch": 0.19, "grad_norm": 4.6999920801688235, "learning_rate": 9.335597252946965e-06, "loss": 0.7672, "step": 2352 }, { "epoch": 0.19, "grad_norm": 3.64335633573572, "learning_rate": 9.334941924531898e-06, "loss": 0.8333, "step": 2353 }, { "epoch": 0.19, "grad_norm": 3.573641049751428, "learning_rate": 9.334286296111556e-06, "loss": 0.6451, "step": 2354 }, { "epoch": 0.19, "grad_norm": 4.409540914342568, "learning_rate": 9.333630367731311e-06, "loss": 0.7662, "step": 2355 }, { "epoch": 0.19, "grad_norm": 4.967924836661743, "learning_rate": 9.332974139436559e-06, "loss": 0.8419, "step": 2356 }, { "epoch": 0.19, "grad_norm": 5.096478222514124, "learning_rate": 9.332317611272712e-06, "loss": 0.8238, "step": 2357 }, { "epoch": 0.19, "grad_norm": 5.857698443164866, "learning_rate": 9.331660783285208e-06, "loss": 0.8513, "step": 2358 }, { "epoch": 0.19, "grad_norm": 3.2551301611971284, "learning_rate": 9.331003655519507e-06, "loss": 0.7557, "step": 2359 }, { "epoch": 0.19, "grad_norm": 3.66896932854, "learning_rate": 9.330346228021078e-06, "loss": 0.6739, "step": 2360 }, { "epoch": 0.19, "grad_norm": 3.690054102407496, "learning_rate": 9.329688500835425e-06, "loss": 0.6769, "step": 2361 }, { "epoch": 0.19, "grad_norm": 3.6143976288606896, "learning_rate": 9.329030474008067e-06, "loss": 0.5682, "step": 2362 }, { "epoch": 0.19, "grad_norm": 5.11143936678274, "learning_rate": 9.328372147584543e-06, "loss": 0.8142, "step": 2363 }, { "epoch": 0.19, "grad_norm": 4.61915082587634, "learning_rate": 9.327713521610412e-06, "loss": 0.8337, "step": 2364 }, { "epoch": 0.19, "grad_norm": 5.804813193443495, "learning_rate": 9.327054596131255e-06, "loss": 0.8011, "step": 2365 }, { "epoch": 0.19, "grad_norm": 5.970154371428778, "learning_rate": 9.326395371192674e-06, "loss": 0.7136, "step": 2366 }, { "epoch": 0.19, "grad_norm": 3.50649400309234, "learning_rate": 9.325735846840293e-06, "loss": 0.6342, "step": 2367 }, { "epoch": 0.19, "grad_norm": 3.6864505087668307, "learning_rate": 9.325076023119755e-06, "loss": 0.811, "step": 2368 }, { "epoch": 0.19, "grad_norm": 4.848541929943327, "learning_rate": 9.324415900076723e-06, "loss": 0.6728, "step": 2369 }, { "epoch": 0.19, "grad_norm": 4.955746611523773, "learning_rate": 9.323755477756881e-06, "loss": 0.8638, "step": 2370 }, { "epoch": 0.19, "grad_norm": 3.525791337165016, "learning_rate": 9.323094756205937e-06, "loss": 0.8577, "step": 2371 }, { "epoch": 0.19, "grad_norm": 4.419750128267288, "learning_rate": 9.322433735469614e-06, "loss": 0.7429, "step": 2372 }, { "epoch": 0.19, "grad_norm": 4.943105026606118, "learning_rate": 9.32177241559366e-06, "loss": 0.7331, "step": 2373 }, { "epoch": 0.19, "grad_norm": 5.743421660517981, "learning_rate": 9.321110796623845e-06, "loss": 0.7454, "step": 2374 }, { "epoch": 0.19, "grad_norm": 2.887159635521565, "learning_rate": 9.320448878605952e-06, "loss": 0.7013, "step": 2375 }, { "epoch": 0.19, "grad_norm": 4.87707740256212, "learning_rate": 9.319786661585795e-06, "loss": 0.8355, "step": 2376 }, { "epoch": 0.19, "grad_norm": 26.319113658153096, "learning_rate": 9.3191241456092e-06, "loss": 0.7375, "step": 2377 }, { "epoch": 0.19, "grad_norm": 6.250961548257255, "learning_rate": 9.318461330722018e-06, "loss": 0.8943, "step": 2378 }, { "epoch": 0.19, "grad_norm": 4.328251845432506, "learning_rate": 9.317798216970122e-06, "loss": 0.6079, "step": 2379 }, { "epoch": 0.19, "grad_norm": 11.980780426581918, "learning_rate": 9.317134804399401e-06, "loss": 0.7604, "step": 2380 }, { "epoch": 0.19, "grad_norm": 4.0360965946343725, "learning_rate": 9.31647109305577e-06, "loss": 0.7548, "step": 2381 }, { "epoch": 0.19, "grad_norm": 7.416253015904599, "learning_rate": 9.31580708298516e-06, "loss": 0.6372, "step": 2382 }, { "epoch": 0.19, "grad_norm": 5.934474168732798, "learning_rate": 9.315142774233526e-06, "loss": 0.5775, "step": 2383 }, { "epoch": 0.19, "grad_norm": 9.900341195987588, "learning_rate": 9.31447816684684e-06, "loss": 0.7505, "step": 2384 }, { "epoch": 0.19, "grad_norm": 5.888032546218741, "learning_rate": 9.3138132608711e-06, "loss": 0.8276, "step": 2385 }, { "epoch": 0.19, "grad_norm": 4.728322042089633, "learning_rate": 9.313148056352321e-06, "loss": 0.606, "step": 2386 }, { "epoch": 0.19, "grad_norm": 7.680449818304649, "learning_rate": 9.312482553336538e-06, "loss": 0.6513, "step": 2387 }, { "epoch": 0.19, "grad_norm": 8.3042396282667, "learning_rate": 9.311816751869809e-06, "loss": 0.7596, "step": 2388 }, { "epoch": 0.19, "grad_norm": 11.012809700729955, "learning_rate": 9.31115065199821e-06, "loss": 0.6874, "step": 2389 }, { "epoch": 0.19, "grad_norm": 3.82375398087193, "learning_rate": 9.310484253767842e-06, "loss": 0.843, "step": 2390 }, { "epoch": 0.19, "grad_norm": 10.035565717110465, "learning_rate": 9.309817557224822e-06, "loss": 0.7894, "step": 2391 }, { "epoch": 0.19, "grad_norm": 2.1176819332320185, "learning_rate": 9.30915056241529e-06, "loss": 0.7811, "step": 2392 }, { "epoch": 0.19, "grad_norm": 6.8857717448143765, "learning_rate": 9.308483269385406e-06, "loss": 0.6032, "step": 2393 }, { "epoch": 0.19, "grad_norm": 2.874008031592198, "learning_rate": 9.307815678181353e-06, "loss": 0.6924, "step": 2394 }, { "epoch": 0.19, "grad_norm": 10.463015575706057, "learning_rate": 9.307147788849329e-06, "loss": 0.5474, "step": 2395 }, { "epoch": 0.19, "grad_norm": 4.687493708477168, "learning_rate": 9.306479601435559e-06, "loss": 0.6662, "step": 2396 }, { "epoch": 0.19, "grad_norm": 4.51276910928311, "learning_rate": 9.305811115986285e-06, "loss": 0.6058, "step": 2397 }, { "epoch": 0.19, "grad_norm": 5.418141843764589, "learning_rate": 9.30514233254777e-06, "loss": 0.8429, "step": 2398 }, { "epoch": 0.19, "grad_norm": 8.089715281313294, "learning_rate": 9.304473251166297e-06, "loss": 0.713, "step": 2399 }, { "epoch": 0.19, "grad_norm": 9.17760105151901, "learning_rate": 9.303803871888172e-06, "loss": 0.7263, "step": 2400 }, { "epoch": 0.2, "grad_norm": 3.069840671597759, "learning_rate": 9.303134194759723e-06, "loss": 0.6533, "step": 2401 }, { "epoch": 0.2, "grad_norm": 14.548532164484056, "learning_rate": 9.302464219827289e-06, "loss": 0.7322, "step": 2402 }, { "epoch": 0.2, "grad_norm": 4.66309000674165, "learning_rate": 9.301793947137241e-06, "loss": 0.7569, "step": 2403 }, { "epoch": 0.2, "grad_norm": 9.068422879052775, "learning_rate": 9.301123376735968e-06, "loss": 0.7603, "step": 2404 }, { "epoch": 0.2, "grad_norm": 5.316122393629453, "learning_rate": 9.300452508669872e-06, "loss": 0.5841, "step": 2405 }, { "epoch": 0.2, "grad_norm": 5.608212652754311, "learning_rate": 9.299781342985387e-06, "loss": 0.6299, "step": 2406 }, { "epoch": 0.2, "grad_norm": 12.592680835420376, "learning_rate": 9.29910987972896e-06, "loss": 0.7323, "step": 2407 }, { "epoch": 0.2, "grad_norm": 8.70180704022712, "learning_rate": 9.298438118947058e-06, "loss": 0.8467, "step": 2408 }, { "epoch": 0.2, "grad_norm": 4.271549885425811, "learning_rate": 9.297766060686173e-06, "loss": 0.6919, "step": 2409 }, { "epoch": 0.2, "grad_norm": 3.394635097810448, "learning_rate": 9.297093704992817e-06, "loss": 0.6244, "step": 2410 }, { "epoch": 0.2, "grad_norm": 8.205242624225207, "learning_rate": 9.296421051913518e-06, "loss": 0.7436, "step": 2411 }, { "epoch": 0.2, "grad_norm": 11.830884593444653, "learning_rate": 9.295748101494831e-06, "loss": 0.6126, "step": 2412 }, { "epoch": 0.2, "grad_norm": 4.678889404361456, "learning_rate": 9.295074853783328e-06, "loss": 0.7838, "step": 2413 }, { "epoch": 0.2, "grad_norm": 5.215608165467666, "learning_rate": 9.2944013088256e-06, "loss": 0.7382, "step": 2414 }, { "epoch": 0.2, "grad_norm": 22.05641581643632, "learning_rate": 9.293727466668262e-06, "loss": 0.7399, "step": 2415 }, { "epoch": 0.2, "grad_norm": 3.2455972046859527, "learning_rate": 9.293053327357947e-06, "loss": 0.6739, "step": 2416 }, { "epoch": 0.2, "grad_norm": 3.482274017575485, "learning_rate": 9.29237889094131e-06, "loss": 0.7395, "step": 2417 }, { "epoch": 0.2, "grad_norm": 6.62447564070319, "learning_rate": 9.291704157465026e-06, "loss": 0.6913, "step": 2418 }, { "epoch": 0.2, "grad_norm": 5.754532711577562, "learning_rate": 9.291029126975794e-06, "loss": 0.8103, "step": 2419 }, { "epoch": 0.2, "grad_norm": 3.6977649972780298, "learning_rate": 9.290353799520328e-06, "loss": 0.7778, "step": 2420 }, { "epoch": 0.2, "grad_norm": 5.445637231729394, "learning_rate": 9.289678175145363e-06, "loss": 0.6751, "step": 2421 }, { "epoch": 0.2, "grad_norm": 11.750413892119365, "learning_rate": 9.28900225389766e-06, "loss": 0.7406, "step": 2422 }, { "epoch": 0.2, "grad_norm": 4.44653728905703, "learning_rate": 9.288326035823993e-06, "loss": 0.7168, "step": 2423 }, { "epoch": 0.2, "grad_norm": 4.215463719017191, "learning_rate": 9.287649520971165e-06, "loss": 0.7707, "step": 2424 }, { "epoch": 0.2, "grad_norm": 5.330572591632841, "learning_rate": 9.286972709385991e-06, "loss": 0.6941, "step": 2425 }, { "epoch": 0.2, "grad_norm": 8.29775919968905, "learning_rate": 9.286295601115314e-06, "loss": 0.6932, "step": 2426 }, { "epoch": 0.2, "grad_norm": 4.046667143945288, "learning_rate": 9.285618196205993e-06, "loss": 0.9281, "step": 2427 }, { "epoch": 0.2, "grad_norm": 4.663080138856968, "learning_rate": 9.284940494704906e-06, "loss": 0.6811, "step": 2428 }, { "epoch": 0.2, "grad_norm": 5.721644979362868, "learning_rate": 9.284262496658957e-06, "loss": 0.8363, "step": 2429 }, { "epoch": 0.2, "grad_norm": 2.9731674669583628, "learning_rate": 9.283584202115068e-06, "loss": 0.649, "step": 2430 }, { "epoch": 0.2, "grad_norm": 4.899695102469641, "learning_rate": 9.282905611120181e-06, "loss": 0.7552, "step": 2431 }, { "epoch": 0.2, "grad_norm": 4.593115129967294, "learning_rate": 9.282226723721259e-06, "loss": 0.7794, "step": 2432 }, { "epoch": 0.2, "grad_norm": 3.3414789061934025, "learning_rate": 9.281547539965284e-06, "loss": 0.5234, "step": 2433 }, { "epoch": 0.2, "grad_norm": 6.518797846484355, "learning_rate": 9.28086805989926e-06, "loss": 0.7435, "step": 2434 }, { "epoch": 0.2, "grad_norm": 5.703012747917296, "learning_rate": 9.28018828357021e-06, "loss": 0.6855, "step": 2435 }, { "epoch": 0.2, "grad_norm": 14.777940053968829, "learning_rate": 9.279508211025182e-06, "loss": 0.6449, "step": 2436 }, { "epoch": 0.2, "grad_norm": 3.3756434709678023, "learning_rate": 9.27882784231124e-06, "loss": 0.776, "step": 2437 }, { "epoch": 0.2, "grad_norm": 3.145525975191203, "learning_rate": 9.27814717747547e-06, "loss": 0.7516, "step": 2438 }, { "epoch": 0.2, "grad_norm": 2.8201143451161568, "learning_rate": 9.277466216564977e-06, "loss": 0.7306, "step": 2439 }, { "epoch": 0.2, "grad_norm": 4.63624619681657, "learning_rate": 9.276784959626889e-06, "loss": 0.7089, "step": 2440 }, { "epoch": 0.2, "grad_norm": 5.969777072079392, "learning_rate": 9.276103406708354e-06, "loss": 0.804, "step": 2441 }, { "epoch": 0.2, "grad_norm": 5.0156245023661805, "learning_rate": 9.275421557856536e-06, "loss": 0.7404, "step": 2442 }, { "epoch": 0.2, "grad_norm": 3.9233748296140334, "learning_rate": 9.274739413118629e-06, "loss": 0.7455, "step": 2443 }, { "epoch": 0.2, "grad_norm": 6.286216557644056, "learning_rate": 9.274056972541837e-06, "loss": 0.6616, "step": 2444 }, { "epoch": 0.2, "grad_norm": 5.201183671289719, "learning_rate": 9.273374236173391e-06, "loss": 0.9272, "step": 2445 }, { "epoch": 0.2, "grad_norm": 10.094561464137378, "learning_rate": 9.27269120406054e-06, "loss": 0.6327, "step": 2446 }, { "epoch": 0.2, "grad_norm": 3.5616417022652325, "learning_rate": 9.272007876250555e-06, "loss": 0.7214, "step": 2447 }, { "epoch": 0.2, "grad_norm": 5.097648215835642, "learning_rate": 9.271324252790725e-06, "loss": 0.6461, "step": 2448 }, { "epoch": 0.2, "grad_norm": 3.754695935863431, "learning_rate": 9.270640333728364e-06, "loss": 0.7626, "step": 2449 }, { "epoch": 0.2, "grad_norm": 7.320374059189283, "learning_rate": 9.269956119110802e-06, "loss": 0.6931, "step": 2450 }, { "epoch": 0.2, "grad_norm": 2.7245871458377335, "learning_rate": 9.269271608985391e-06, "loss": 0.7529, "step": 2451 }, { "epoch": 0.2, "grad_norm": 7.652491304119991, "learning_rate": 9.268586803399502e-06, "loss": 0.835, "step": 2452 }, { "epoch": 0.2, "grad_norm": 3.5167850286565376, "learning_rate": 9.267901702400527e-06, "loss": 0.5729, "step": 2453 }, { "epoch": 0.2, "grad_norm": 4.81871743740869, "learning_rate": 9.267216306035884e-06, "loss": 0.6267, "step": 2454 }, { "epoch": 0.2, "grad_norm": 25.41575022264047, "learning_rate": 9.266530614353004e-06, "loss": 0.6244, "step": 2455 }, { "epoch": 0.2, "grad_norm": 5.882065999052178, "learning_rate": 9.26584462739934e-06, "loss": 0.7517, "step": 2456 }, { "epoch": 0.2, "grad_norm": 2.7537123263085883, "learning_rate": 9.265158345222368e-06, "loss": 0.677, "step": 2457 }, { "epoch": 0.2, "grad_norm": 3.704220380670085, "learning_rate": 9.264471767869583e-06, "loss": 0.7541, "step": 2458 }, { "epoch": 0.2, "grad_norm": 4.882102013109601, "learning_rate": 9.263784895388502e-06, "loss": 0.6696, "step": 2459 }, { "epoch": 0.2, "grad_norm": 6.053350155401451, "learning_rate": 9.263097727826656e-06, "loss": 0.7681, "step": 2460 }, { "epoch": 0.2, "grad_norm": 4.158498158700847, "learning_rate": 9.262410265231607e-06, "loss": 0.8663, "step": 2461 }, { "epoch": 0.2, "grad_norm": 6.428038778547417, "learning_rate": 9.261722507650928e-06, "loss": 0.7338, "step": 2462 }, { "epoch": 0.2, "grad_norm": 6.203353385480414, "learning_rate": 9.261034455132217e-06, "loss": 0.6559, "step": 2463 }, { "epoch": 0.2, "grad_norm": 5.0245193673378665, "learning_rate": 9.260346107723093e-06, "loss": 0.9303, "step": 2464 }, { "epoch": 0.2, "grad_norm": 7.517011180786985, "learning_rate": 9.259657465471194e-06, "loss": 0.8187, "step": 2465 }, { "epoch": 0.2, "grad_norm": 4.8162816710073395, "learning_rate": 9.258968528424175e-06, "loss": 0.6355, "step": 2466 }, { "epoch": 0.2, "grad_norm": 4.848319058960849, "learning_rate": 9.25827929662972e-06, "loss": 0.8751, "step": 2467 }, { "epoch": 0.2, "grad_norm": 5.925785039864059, "learning_rate": 9.257589770135523e-06, "loss": 0.5809, "step": 2468 }, { "epoch": 0.2, "grad_norm": 5.167450948106605, "learning_rate": 9.256899948989307e-06, "loss": 0.7539, "step": 2469 }, { "epoch": 0.2, "grad_norm": 10.592110715898121, "learning_rate": 9.25620983323881e-06, "loss": 0.6947, "step": 2470 }, { "epoch": 0.2, "grad_norm": 8.771202071923351, "learning_rate": 9.255519422931794e-06, "loss": 0.7642, "step": 2471 }, { "epoch": 0.2, "grad_norm": 4.579273850723875, "learning_rate": 9.254828718116039e-06, "loss": 0.8416, "step": 2472 }, { "epoch": 0.2, "grad_norm": 7.206878900209931, "learning_rate": 9.254137718839345e-06, "loss": 0.6051, "step": 2473 }, { "epoch": 0.2, "grad_norm": 8.490069815113932, "learning_rate": 9.253446425149536e-06, "loss": 0.7622, "step": 2474 }, { "epoch": 0.2, "grad_norm": 4.593174587959045, "learning_rate": 9.252754837094452e-06, "loss": 0.7506, "step": 2475 }, { "epoch": 0.2, "grad_norm": 3.6670161066786338, "learning_rate": 9.252062954721955e-06, "loss": 0.6759, "step": 2476 }, { "epoch": 0.2, "grad_norm": 5.109094261245794, "learning_rate": 9.251370778079929e-06, "loss": 0.647, "step": 2477 }, { "epoch": 0.2, "grad_norm": 5.667414142519284, "learning_rate": 9.250678307216276e-06, "loss": 0.7136, "step": 2478 }, { "epoch": 0.2, "grad_norm": 8.468549003494472, "learning_rate": 9.24998554217892e-06, "loss": 0.7176, "step": 2479 }, { "epoch": 0.2, "grad_norm": 6.332944214897854, "learning_rate": 9.249292483015804e-06, "loss": 0.8866, "step": 2480 }, { "epoch": 0.2, "grad_norm": 5.194460579570262, "learning_rate": 9.248599129774894e-06, "loss": 0.6979, "step": 2481 }, { "epoch": 0.2, "grad_norm": 3.5378491140522508, "learning_rate": 9.247905482504172e-06, "loss": 0.7276, "step": 2482 }, { "epoch": 0.2, "grad_norm": 4.953606223020982, "learning_rate": 9.247211541251641e-06, "loss": 0.6265, "step": 2483 }, { "epoch": 0.2, "grad_norm": 4.752546980929678, "learning_rate": 9.246517306065332e-06, "loss": 0.7611, "step": 2484 }, { "epoch": 0.2, "grad_norm": 3.5243821889165905, "learning_rate": 9.245822776993286e-06, "loss": 0.6616, "step": 2485 }, { "epoch": 0.2, "grad_norm": 5.381215631396902, "learning_rate": 9.245127954083571e-06, "loss": 0.648, "step": 2486 }, { "epoch": 0.2, "grad_norm": 13.793619643426357, "learning_rate": 9.24443283738427e-06, "loss": 0.714, "step": 2487 }, { "epoch": 0.2, "grad_norm": 7.876424720391902, "learning_rate": 9.243737426943492e-06, "loss": 0.7141, "step": 2488 }, { "epoch": 0.2, "grad_norm": 3.5833129321416726, "learning_rate": 9.243041722809363e-06, "loss": 0.677, "step": 2489 }, { "epoch": 0.2, "grad_norm": 3.7272067742654147, "learning_rate": 9.242345725030033e-06, "loss": 0.7157, "step": 2490 }, { "epoch": 0.2, "grad_norm": 4.028858578064078, "learning_rate": 9.241649433653663e-06, "loss": 0.6102, "step": 2491 }, { "epoch": 0.2, "grad_norm": 4.166557613872918, "learning_rate": 9.240952848728447e-06, "loss": 0.6071, "step": 2492 }, { "epoch": 0.2, "grad_norm": 4.578049889394283, "learning_rate": 9.24025597030259e-06, "loss": 0.9905, "step": 2493 }, { "epoch": 0.2, "grad_norm": 5.4750457881306716, "learning_rate": 9.239558798424322e-06, "loss": 0.5054, "step": 2494 }, { "epoch": 0.2, "grad_norm": 8.503202954229025, "learning_rate": 9.238861333141889e-06, "loss": 0.6401, "step": 2495 }, { "epoch": 0.2, "grad_norm": 4.0566150011204725, "learning_rate": 9.238163574503562e-06, "loss": 0.7722, "step": 2496 }, { "epoch": 0.2, "grad_norm": 3.94304659018854, "learning_rate": 9.23746552255763e-06, "loss": 0.8436, "step": 2497 }, { "epoch": 0.2, "grad_norm": 3.22190650411511, "learning_rate": 9.236767177352403e-06, "loss": 0.7536, "step": 2498 }, { "epoch": 0.2, "grad_norm": 3.560225109749607, "learning_rate": 9.23606853893621e-06, "loss": 0.8405, "step": 2499 }, { "epoch": 0.2, "grad_norm": 3.6273205491560927, "learning_rate": 9.235369607357402e-06, "loss": 0.7262, "step": 2500 }, { "epoch": 0.2, "grad_norm": 3.7894401395286414, "learning_rate": 9.23467038266435e-06, "loss": 0.6985, "step": 2501 }, { "epoch": 0.2, "grad_norm": 4.018240683004062, "learning_rate": 9.233970864905444e-06, "loss": 0.6976, "step": 2502 }, { "epoch": 0.2, "grad_norm": 6.070353867016748, "learning_rate": 9.233271054129092e-06, "loss": 0.7613, "step": 2503 }, { "epoch": 0.2, "grad_norm": 4.267060739887431, "learning_rate": 9.23257095038373e-06, "loss": 0.6894, "step": 2504 }, { "epoch": 0.2, "grad_norm": 5.434883757957719, "learning_rate": 9.231870553717808e-06, "loss": 0.904, "step": 2505 }, { "epoch": 0.2, "grad_norm": 2.941550034736005, "learning_rate": 9.231169864179797e-06, "loss": 0.6429, "step": 2506 }, { "epoch": 0.2, "grad_norm": 18.317090624838876, "learning_rate": 9.230468881818192e-06, "loss": 0.6335, "step": 2507 }, { "epoch": 0.2, "grad_norm": 14.634152352091174, "learning_rate": 9.2297676066815e-06, "loss": 0.7564, "step": 2508 }, { "epoch": 0.2, "grad_norm": 4.262918747511421, "learning_rate": 9.229066038818258e-06, "loss": 0.7803, "step": 2509 }, { "epoch": 0.2, "grad_norm": 3.511425031951057, "learning_rate": 9.228364178277018e-06, "loss": 0.6934, "step": 2510 }, { "epoch": 0.2, "grad_norm": 4.287734182747469, "learning_rate": 9.227662025106352e-06, "loss": 0.7392, "step": 2511 }, { "epoch": 0.2, "grad_norm": 7.901303849645927, "learning_rate": 9.226959579354855e-06, "loss": 0.9464, "step": 2512 }, { "epoch": 0.2, "grad_norm": 11.554448610093775, "learning_rate": 9.22625684107114e-06, "loss": 0.7764, "step": 2513 }, { "epoch": 0.2, "grad_norm": 3.4052494621608873, "learning_rate": 9.22555381030384e-06, "loss": 0.6834, "step": 2514 }, { "epoch": 0.2, "grad_norm": 8.933844726491499, "learning_rate": 9.224850487101611e-06, "loss": 0.6246, "step": 2515 }, { "epoch": 0.2, "grad_norm": 4.514531331201028, "learning_rate": 9.224146871513127e-06, "loss": 0.7275, "step": 2516 }, { "epoch": 0.2, "grad_norm": 2.7734713986543102, "learning_rate": 9.223442963587082e-06, "loss": 0.6056, "step": 2517 }, { "epoch": 0.2, "grad_norm": 10.545427676606385, "learning_rate": 9.222738763372189e-06, "loss": 0.6675, "step": 2518 }, { "epoch": 0.2, "grad_norm": 19.33672342398612, "learning_rate": 9.222034270917187e-06, "loss": 0.8545, "step": 2519 }, { "epoch": 0.2, "grad_norm": 3.936441722365738, "learning_rate": 9.221329486270827e-06, "loss": 0.7063, "step": 2520 }, { "epoch": 0.2, "grad_norm": 5.152952337943601, "learning_rate": 9.220624409481888e-06, "loss": 0.7511, "step": 2521 }, { "epoch": 0.2, "grad_norm": 4.109248980137321, "learning_rate": 9.219919040599165e-06, "loss": 0.5497, "step": 2522 }, { "epoch": 0.2, "grad_norm": 9.062705042077418, "learning_rate": 9.219213379671474e-06, "loss": 0.7234, "step": 2523 }, { "epoch": 0.2, "grad_norm": 4.331730186367032, "learning_rate": 9.218507426747651e-06, "loss": 0.7561, "step": 2524 }, { "epoch": 0.21, "grad_norm": 3.19338022044433, "learning_rate": 9.21780118187655e-06, "loss": 0.7714, "step": 2525 }, { "epoch": 0.21, "grad_norm": 5.816456915568769, "learning_rate": 9.217094645107052e-06, "loss": 0.6624, "step": 2526 }, { "epoch": 0.21, "grad_norm": 3.1932817427796785, "learning_rate": 9.216387816488051e-06, "loss": 0.7515, "step": 2527 }, { "epoch": 0.21, "grad_norm": 5.767854446240626, "learning_rate": 9.215680696068465e-06, "loss": 0.7591, "step": 2528 }, { "epoch": 0.21, "grad_norm": 4.268644369034617, "learning_rate": 9.214973283897231e-06, "loss": 0.7581, "step": 2529 }, { "epoch": 0.21, "grad_norm": 4.1786040800356545, "learning_rate": 9.214265580023305e-06, "loss": 0.8489, "step": 2530 }, { "epoch": 0.21, "grad_norm": 3.0289662252248117, "learning_rate": 9.213557584495665e-06, "loss": 0.9291, "step": 2531 }, { "epoch": 0.21, "grad_norm": 3.24024098194718, "learning_rate": 9.212849297363312e-06, "loss": 0.7309, "step": 2532 }, { "epoch": 0.21, "grad_norm": 2.7551136024973046, "learning_rate": 9.212140718675257e-06, "loss": 0.5896, "step": 2533 }, { "epoch": 0.21, "grad_norm": 9.465515236290557, "learning_rate": 9.211431848480545e-06, "loss": 0.5831, "step": 2534 }, { "epoch": 0.21, "grad_norm": 3.1350217664061297, "learning_rate": 9.210722686828232e-06, "loss": 0.8176, "step": 2535 }, { "epoch": 0.21, "grad_norm": 3.060282934708333, "learning_rate": 9.210013233767396e-06, "loss": 0.6891, "step": 2536 }, { "epoch": 0.21, "grad_norm": 3.190790993536294, "learning_rate": 9.209303489347136e-06, "loss": 0.9744, "step": 2537 }, { "epoch": 0.21, "grad_norm": 3.9908004651289977, "learning_rate": 9.20859345361657e-06, "loss": 0.8672, "step": 2538 }, { "epoch": 0.21, "grad_norm": 4.145993801746851, "learning_rate": 9.207883126624838e-06, "loss": 0.7271, "step": 2539 }, { "epoch": 0.21, "grad_norm": 3.8188967255938335, "learning_rate": 9.207172508421099e-06, "loss": 0.6967, "step": 2540 }, { "epoch": 0.21, "grad_norm": 5.295485117994193, "learning_rate": 9.20646159905453e-06, "loss": 0.7497, "step": 2541 }, { "epoch": 0.21, "grad_norm": 8.12223447423027, "learning_rate": 9.205750398574334e-06, "loss": 0.8167, "step": 2542 }, { "epoch": 0.21, "grad_norm": 4.441117416734898, "learning_rate": 9.205038907029729e-06, "loss": 0.7028, "step": 2543 }, { "epoch": 0.21, "grad_norm": 43.745123805205154, "learning_rate": 9.204327124469953e-06, "loss": 0.7531, "step": 2544 }, { "epoch": 0.21, "grad_norm": 3.6208033610516672, "learning_rate": 9.203615050944269e-06, "loss": 0.8874, "step": 2545 }, { "epoch": 0.21, "grad_norm": 3.352843162960466, "learning_rate": 9.202902686501954e-06, "loss": 0.7101, "step": 2546 }, { "epoch": 0.21, "grad_norm": 5.048818767546314, "learning_rate": 9.20219003119231e-06, "loss": 0.7227, "step": 2547 }, { "epoch": 0.21, "grad_norm": 3.5273439437975993, "learning_rate": 9.201477085064656e-06, "loss": 0.4769, "step": 2548 }, { "epoch": 0.21, "grad_norm": 4.2639328301832915, "learning_rate": 9.200763848168334e-06, "loss": 0.7139, "step": 2549 }, { "epoch": 0.21, "grad_norm": 3.479648858302999, "learning_rate": 9.200050320552702e-06, "loss": 0.7772, "step": 2550 }, { "epoch": 0.21, "grad_norm": 4.898347724267929, "learning_rate": 9.199336502267145e-06, "loss": 0.8678, "step": 2551 }, { "epoch": 0.21, "grad_norm": 9.41603516254404, "learning_rate": 9.19862239336106e-06, "loss": 0.745, "step": 2552 }, { "epoch": 0.21, "grad_norm": 3.545931265898865, "learning_rate": 9.197907993883865e-06, "loss": 0.8986, "step": 2553 }, { "epoch": 0.21, "grad_norm": 3.301868952189717, "learning_rate": 9.197193303885008e-06, "loss": 0.64, "step": 2554 }, { "epoch": 0.21, "grad_norm": 4.345528438571801, "learning_rate": 9.196478323413946e-06, "loss": 0.8305, "step": 2555 }, { "epoch": 0.21, "grad_norm": 8.106496876694042, "learning_rate": 9.19576305252016e-06, "loss": 0.8963, "step": 2556 }, { "epoch": 0.21, "grad_norm": 3.361375352777354, "learning_rate": 9.195047491253154e-06, "loss": 0.823, "step": 2557 }, { "epoch": 0.21, "grad_norm": 4.923056400330417, "learning_rate": 9.194331639662445e-06, "loss": 0.6374, "step": 2558 }, { "epoch": 0.21, "grad_norm": 4.321522171284863, "learning_rate": 9.193615497797579e-06, "loss": 0.7804, "step": 2559 }, { "epoch": 0.21, "grad_norm": 9.091655304786851, "learning_rate": 9.192899065708115e-06, "loss": 0.6688, "step": 2560 }, { "epoch": 0.21, "grad_norm": 3.3904821534983802, "learning_rate": 9.192182343443634e-06, "loss": 0.7634, "step": 2561 }, { "epoch": 0.21, "grad_norm": 5.887533608463917, "learning_rate": 9.19146533105374e-06, "loss": 0.7445, "step": 2562 }, { "epoch": 0.21, "grad_norm": 4.683333555125534, "learning_rate": 9.190748028588053e-06, "loss": 0.7642, "step": 2563 }, { "epoch": 0.21, "grad_norm": 3.6142932153919536, "learning_rate": 9.190030436096213e-06, "loss": 0.7218, "step": 2564 }, { "epoch": 0.21, "grad_norm": 6.490733019316102, "learning_rate": 9.189312553627886e-06, "loss": 0.7971, "step": 2565 }, { "epoch": 0.21, "grad_norm": 5.115652874886379, "learning_rate": 9.188594381232754e-06, "loss": 0.8786, "step": 2566 }, { "epoch": 0.21, "grad_norm": 3.1046024833077643, "learning_rate": 9.187875918960516e-06, "loss": 0.662, "step": 2567 }, { "epoch": 0.21, "grad_norm": 4.108103830208113, "learning_rate": 9.187157166860894e-06, "loss": 0.626, "step": 2568 }, { "epoch": 0.21, "grad_norm": 3.327610341189339, "learning_rate": 9.186438124983633e-06, "loss": 0.7444, "step": 2569 }, { "epoch": 0.21, "grad_norm": 2.714346878290693, "learning_rate": 9.185718793378492e-06, "loss": 0.614, "step": 2570 }, { "epoch": 0.21, "grad_norm": 7.829820932368684, "learning_rate": 9.184999172095257e-06, "loss": 0.6303, "step": 2571 }, { "epoch": 0.21, "grad_norm": 7.117774846367013, "learning_rate": 9.184279261183728e-06, "loss": 0.6896, "step": 2572 }, { "epoch": 0.21, "grad_norm": 4.381983065807821, "learning_rate": 9.183559060693728e-06, "loss": 0.7862, "step": 2573 }, { "epoch": 0.21, "grad_norm": 12.820989595388447, "learning_rate": 9.182838570675097e-06, "loss": 0.7306, "step": 2574 }, { "epoch": 0.21, "grad_norm": 4.43502747538333, "learning_rate": 9.182117791177702e-06, "loss": 0.7599, "step": 2575 }, { "epoch": 0.21, "grad_norm": 3.281867666805685, "learning_rate": 9.181396722251422e-06, "loss": 0.738, "step": 2576 }, { "epoch": 0.21, "grad_norm": 5.779989177399466, "learning_rate": 9.18067536394616e-06, "loss": 0.6623, "step": 2577 }, { "epoch": 0.21, "grad_norm": 28.064252006983025, "learning_rate": 9.17995371631184e-06, "loss": 0.6323, "step": 2578 }, { "epoch": 0.21, "grad_norm": 3.758641172429831, "learning_rate": 9.179231779398403e-06, "loss": 0.7176, "step": 2579 }, { "epoch": 0.21, "grad_norm": 2.8678666455948183, "learning_rate": 9.178509553255812e-06, "loss": 0.8448, "step": 2580 }, { "epoch": 0.21, "grad_norm": 3.3331168817316525, "learning_rate": 9.177787037934052e-06, "loss": 0.6274, "step": 2581 }, { "epoch": 0.21, "grad_norm": 2.3517500160629163, "learning_rate": 9.177064233483121e-06, "loss": 0.6993, "step": 2582 }, { "epoch": 0.21, "grad_norm": 5.668439438071556, "learning_rate": 9.176341139953046e-06, "loss": 0.7189, "step": 2583 }, { "epoch": 0.21, "grad_norm": 8.236309432328966, "learning_rate": 9.175617757393867e-06, "loss": 0.5444, "step": 2584 }, { "epoch": 0.21, "grad_norm": 5.0573208614919185, "learning_rate": 9.174894085855645e-06, "loss": 0.7263, "step": 2585 }, { "epoch": 0.21, "grad_norm": 3.651801211304331, "learning_rate": 9.174170125388468e-06, "loss": 0.715, "step": 2586 }, { "epoch": 0.21, "grad_norm": 3.9889525021493264, "learning_rate": 9.173445876042436e-06, "loss": 0.9097, "step": 2587 }, { "epoch": 0.21, "grad_norm": 4.780204533400854, "learning_rate": 9.17272133786767e-06, "loss": 0.7696, "step": 2588 }, { "epoch": 0.21, "grad_norm": 31.55899433611728, "learning_rate": 9.171996510914311e-06, "loss": 0.7628, "step": 2589 }, { "epoch": 0.21, "grad_norm": 4.108568143082509, "learning_rate": 9.171271395232528e-06, "loss": 0.6953, "step": 2590 }, { "epoch": 0.21, "grad_norm": 4.051114625637082, "learning_rate": 9.170545990872499e-06, "loss": 0.8119, "step": 2591 }, { "epoch": 0.21, "grad_norm": 13.255343175481997, "learning_rate": 9.169820297884428e-06, "loss": 0.9631, "step": 2592 }, { "epoch": 0.21, "grad_norm": 4.1977369801329925, "learning_rate": 9.169094316318537e-06, "loss": 0.7845, "step": 2593 }, { "epoch": 0.21, "grad_norm": 4.909765594058128, "learning_rate": 9.168368046225067e-06, "loss": 0.7788, "step": 2594 }, { "epoch": 0.21, "grad_norm": 4.08491745696979, "learning_rate": 9.167641487654283e-06, "loss": 0.7316, "step": 2595 }, { "epoch": 0.21, "grad_norm": 5.228780810271977, "learning_rate": 9.166914640656467e-06, "loss": 0.5747, "step": 2596 }, { "epoch": 0.21, "grad_norm": 3.3880724253382017, "learning_rate": 9.166187505281919e-06, "loss": 0.7476, "step": 2597 }, { "epoch": 0.21, "grad_norm": 5.592187134524757, "learning_rate": 9.165460081580965e-06, "loss": 0.7696, "step": 2598 }, { "epoch": 0.21, "grad_norm": 3.866632645471413, "learning_rate": 9.164732369603944e-06, "loss": 0.6975, "step": 2599 }, { "epoch": 0.21, "grad_norm": 2.9567097617618865, "learning_rate": 9.16400436940122e-06, "loss": 0.8094, "step": 2600 }, { "epoch": 0.21, "grad_norm": 4.440064003884874, "learning_rate": 9.163276081023177e-06, "loss": 0.8229, "step": 2601 }, { "epoch": 0.21, "grad_norm": 3.576412861252549, "learning_rate": 9.162547504520214e-06, "loss": 0.6277, "step": 2602 }, { "epoch": 0.21, "grad_norm": 11.04488692956889, "learning_rate": 9.161818639942752e-06, "loss": 0.6133, "step": 2603 }, { "epoch": 0.21, "grad_norm": 3.102596020115686, "learning_rate": 9.161089487341237e-06, "loss": 0.7928, "step": 2604 }, { "epoch": 0.21, "grad_norm": 8.996732415763038, "learning_rate": 9.160360046766129e-06, "loss": 0.6554, "step": 2605 }, { "epoch": 0.21, "grad_norm": 3.3563536131865703, "learning_rate": 9.159630318267908e-06, "loss": 0.7639, "step": 2606 }, { "epoch": 0.21, "grad_norm": 10.741720068471897, "learning_rate": 9.15890030189708e-06, "loss": 0.7563, "step": 2607 }, { "epoch": 0.21, "grad_norm": 10.079084306533423, "learning_rate": 9.158169997704166e-06, "loss": 0.5617, "step": 2608 }, { "epoch": 0.21, "grad_norm": 5.189167989992016, "learning_rate": 9.157439405739703e-06, "loss": 0.8172, "step": 2609 }, { "epoch": 0.21, "grad_norm": 4.775254099694386, "learning_rate": 9.156708526054257e-06, "loss": 0.6895, "step": 2610 }, { "epoch": 0.21, "grad_norm": 3.0002308214770297, "learning_rate": 9.15597735869841e-06, "loss": 0.7644, "step": 2611 }, { "epoch": 0.21, "grad_norm": 4.004904774854671, "learning_rate": 9.155245903722758e-06, "loss": 0.7785, "step": 2612 }, { "epoch": 0.21, "grad_norm": 6.354417392143378, "learning_rate": 9.154514161177927e-06, "loss": 0.7169, "step": 2613 }, { "epoch": 0.21, "grad_norm": 3.4366143474901256, "learning_rate": 9.153782131114559e-06, "loss": 0.6287, "step": 2614 }, { "epoch": 0.21, "grad_norm": 7.383108649041177, "learning_rate": 9.15304981358331e-06, "loss": 0.7268, "step": 2615 }, { "epoch": 0.21, "grad_norm": 19.76298133232945, "learning_rate": 9.152317208634866e-06, "loss": 0.7053, "step": 2616 }, { "epoch": 0.21, "grad_norm": 3.1786785748148367, "learning_rate": 9.151584316319928e-06, "loss": 0.6673, "step": 2617 }, { "epoch": 0.21, "grad_norm": 2.968299364251915, "learning_rate": 9.150851136689212e-06, "loss": 0.7316, "step": 2618 }, { "epoch": 0.21, "grad_norm": 15.79613254809934, "learning_rate": 9.150117669793462e-06, "loss": 0.7724, "step": 2619 }, { "epoch": 0.21, "grad_norm": 3.0563021303323317, "learning_rate": 9.149383915683439e-06, "loss": 0.6686, "step": 2620 }, { "epoch": 0.21, "grad_norm": 4.841994801750214, "learning_rate": 9.148649874409921e-06, "loss": 0.7466, "step": 2621 }, { "epoch": 0.21, "grad_norm": 4.5679022173226445, "learning_rate": 9.14791554602371e-06, "loss": 0.6087, "step": 2622 }, { "epoch": 0.21, "grad_norm": 30.473889457845605, "learning_rate": 9.147180930575625e-06, "loss": 0.7049, "step": 2623 }, { "epoch": 0.21, "grad_norm": 3.5776867590333645, "learning_rate": 9.146446028116508e-06, "loss": 0.717, "step": 2624 }, { "epoch": 0.21, "grad_norm": 4.546865600071528, "learning_rate": 9.145710838697217e-06, "loss": 0.5362, "step": 2625 }, { "epoch": 0.21, "grad_norm": 4.420614653982784, "learning_rate": 9.144975362368633e-06, "loss": 0.8378, "step": 2626 }, { "epoch": 0.21, "grad_norm": 3.481214764733894, "learning_rate": 9.144239599181655e-06, "loss": 0.7211, "step": 2627 }, { "epoch": 0.21, "grad_norm": 4.690752450673927, "learning_rate": 9.143503549187203e-06, "loss": 0.8571, "step": 2628 }, { "epoch": 0.21, "grad_norm": 4.8232974925696555, "learning_rate": 9.142767212436214e-06, "loss": 0.8012, "step": 2629 }, { "epoch": 0.21, "grad_norm": 5.715245998342851, "learning_rate": 9.142030588979649e-06, "loss": 0.7481, "step": 2630 }, { "epoch": 0.21, "grad_norm": 3.3430597167109304, "learning_rate": 9.141293678868488e-06, "loss": 0.6595, "step": 2631 }, { "epoch": 0.21, "grad_norm": 4.667305769535707, "learning_rate": 9.140556482153729e-06, "loss": 0.7439, "step": 2632 }, { "epoch": 0.21, "grad_norm": 5.896872062295749, "learning_rate": 9.13981899888639e-06, "loss": 0.7425, "step": 2633 }, { "epoch": 0.21, "grad_norm": 5.652694425829886, "learning_rate": 9.139081229117508e-06, "loss": 0.7239, "step": 2634 }, { "epoch": 0.21, "grad_norm": 3.6717784615264386, "learning_rate": 9.138343172898145e-06, "loss": 0.6198, "step": 2635 }, { "epoch": 0.21, "grad_norm": 4.635901425925286, "learning_rate": 9.137604830279377e-06, "loss": 0.7164, "step": 2636 }, { "epoch": 0.21, "grad_norm": 4.420739061260594, "learning_rate": 9.136866201312302e-06, "loss": 0.7577, "step": 2637 }, { "epoch": 0.21, "grad_norm": 3.238242204210583, "learning_rate": 9.136127286048038e-06, "loss": 0.7427, "step": 2638 }, { "epoch": 0.21, "grad_norm": 3.1140559155848666, "learning_rate": 9.135388084537725e-06, "loss": 0.7473, "step": 2639 }, { "epoch": 0.21, "grad_norm": 4.579194673033722, "learning_rate": 9.134648596832513e-06, "loss": 0.8568, "step": 2640 }, { "epoch": 0.21, "grad_norm": 4.394945091205819, "learning_rate": 9.133908822983589e-06, "loss": 0.9303, "step": 2641 }, { "epoch": 0.21, "grad_norm": 7.516818800844026, "learning_rate": 9.133168763042141e-06, "loss": 0.6363, "step": 2642 }, { "epoch": 0.21, "grad_norm": 3.563621335239375, "learning_rate": 9.132428417059393e-06, "loss": 0.934, "step": 2643 }, { "epoch": 0.21, "grad_norm": 5.610273032555965, "learning_rate": 9.131687785086579e-06, "loss": 0.7349, "step": 2644 }, { "epoch": 0.21, "grad_norm": 6.595061349046355, "learning_rate": 9.130946867174952e-06, "loss": 0.7058, "step": 2645 }, { "epoch": 0.21, "grad_norm": 4.9931245800779855, "learning_rate": 9.130205663375792e-06, "loss": 0.7296, "step": 2646 }, { "epoch": 0.21, "grad_norm": 9.04287059619439, "learning_rate": 9.129464173740397e-06, "loss": 0.7229, "step": 2647 }, { "epoch": 0.22, "grad_norm": 10.186035226012091, "learning_rate": 9.128722398320077e-06, "loss": 0.7902, "step": 2648 }, { "epoch": 0.22, "grad_norm": 14.042674649080956, "learning_rate": 9.127980337166172e-06, "loss": 0.829, "step": 2649 }, { "epoch": 0.22, "grad_norm": 4.567737943991186, "learning_rate": 9.127237990330035e-06, "loss": 0.7444, "step": 2650 }, { "epoch": 0.22, "grad_norm": 8.205808130214953, "learning_rate": 9.126495357863042e-06, "loss": 0.6834, "step": 2651 }, { "epoch": 0.22, "grad_norm": 4.325680194904642, "learning_rate": 9.125752439816588e-06, "loss": 0.7545, "step": 2652 }, { "epoch": 0.22, "grad_norm": 4.853116556720304, "learning_rate": 9.125009236242088e-06, "loss": 0.6094, "step": 2653 }, { "epoch": 0.22, "grad_norm": 5.731713979445744, "learning_rate": 9.124265747190974e-06, "loss": 0.7126, "step": 2654 }, { "epoch": 0.22, "grad_norm": 7.764051409387372, "learning_rate": 9.123521972714702e-06, "loss": 0.7216, "step": 2655 }, { "epoch": 0.22, "grad_norm": 5.647308351505891, "learning_rate": 9.122777912864747e-06, "loss": 0.6114, "step": 2656 }, { "epoch": 0.22, "grad_norm": 7.104482992303934, "learning_rate": 9.122033567692601e-06, "loss": 0.7737, "step": 2657 }, { "epoch": 0.22, "grad_norm": 6.480850805948082, "learning_rate": 9.121288937249777e-06, "loss": 0.6796, "step": 2658 }, { "epoch": 0.22, "grad_norm": 4.427958378154737, "learning_rate": 9.120544021587807e-06, "loss": 0.7595, "step": 2659 }, { "epoch": 0.22, "grad_norm": 5.712765690668253, "learning_rate": 9.11979882075825e-06, "loss": 0.8159, "step": 2660 }, { "epoch": 0.22, "grad_norm": 29.89773928911269, "learning_rate": 9.119053334812671e-06, "loss": 0.7007, "step": 2661 }, { "epoch": 0.22, "grad_norm": 4.550242400499846, "learning_rate": 9.118307563802665e-06, "loss": 0.6697, "step": 2662 }, { "epoch": 0.22, "grad_norm": 6.08703493114258, "learning_rate": 9.117561507779847e-06, "loss": 0.7202, "step": 2663 }, { "epoch": 0.22, "grad_norm": 7.546835579384152, "learning_rate": 9.116815166795844e-06, "loss": 0.652, "step": 2664 }, { "epoch": 0.22, "grad_norm": 4.295192199334068, "learning_rate": 9.116068540902313e-06, "loss": 0.7643, "step": 2665 }, { "epoch": 0.22, "grad_norm": 2.7338442793599462, "learning_rate": 9.115321630150918e-06, "loss": 0.5044, "step": 2666 }, { "epoch": 0.22, "grad_norm": 5.547642337921033, "learning_rate": 9.114574434593357e-06, "loss": 0.776, "step": 2667 }, { "epoch": 0.22, "grad_norm": 7.284702566417422, "learning_rate": 9.113826954281335e-06, "loss": 0.7563, "step": 2668 }, { "epoch": 0.22, "grad_norm": 5.76573369240654, "learning_rate": 9.113079189266587e-06, "loss": 0.8402, "step": 2669 }, { "epoch": 0.22, "grad_norm": 5.230268456930929, "learning_rate": 9.112331139600861e-06, "loss": 0.7483, "step": 2670 }, { "epoch": 0.22, "grad_norm": 5.5528895761977894, "learning_rate": 9.111582805335926e-06, "loss": 0.7327, "step": 2671 }, { "epoch": 0.22, "grad_norm": 3.8461377163237023, "learning_rate": 9.110834186523572e-06, "loss": 0.6512, "step": 2672 }, { "epoch": 0.22, "grad_norm": 5.1447165679573885, "learning_rate": 9.11008528321561e-06, "loss": 0.6981, "step": 2673 }, { "epoch": 0.22, "grad_norm": 4.557117022653451, "learning_rate": 9.109336095463865e-06, "loss": 0.939, "step": 2674 }, { "epoch": 0.22, "grad_norm": 13.751336320407118, "learning_rate": 9.10858662332019e-06, "loss": 0.7143, "step": 2675 }, { "epoch": 0.22, "grad_norm": 4.680425328380966, "learning_rate": 9.107836866836448e-06, "loss": 0.8348, "step": 2676 }, { "epoch": 0.22, "grad_norm": 9.953345091881308, "learning_rate": 9.107086826064533e-06, "loss": 0.7134, "step": 2677 }, { "epoch": 0.22, "grad_norm": 14.32229750054991, "learning_rate": 9.106336501056348e-06, "loss": 0.7114, "step": 2678 }, { "epoch": 0.22, "grad_norm": 8.102795209142728, "learning_rate": 9.10558589186382e-06, "loss": 0.9128, "step": 2679 }, { "epoch": 0.22, "grad_norm": 13.912212284352316, "learning_rate": 9.104834998538899e-06, "loss": 0.6523, "step": 2680 }, { "epoch": 0.22, "grad_norm": 16.282086926318353, "learning_rate": 9.10408382113355e-06, "loss": 0.802, "step": 2681 }, { "epoch": 0.22, "grad_norm": 39.375009345723356, "learning_rate": 9.103332359699757e-06, "loss": 0.6199, "step": 2682 }, { "epoch": 0.22, "grad_norm": 8.912235763225437, "learning_rate": 9.102580614289532e-06, "loss": 0.6851, "step": 2683 }, { "epoch": 0.22, "grad_norm": 50.155658157814074, "learning_rate": 9.101828584954893e-06, "loss": 0.8759, "step": 2684 }, { "epoch": 0.22, "grad_norm": 9.107254244590859, "learning_rate": 9.101076271747888e-06, "loss": 0.929, "step": 2685 }, { "epoch": 0.22, "grad_norm": 5.209268275606788, "learning_rate": 9.100323674720585e-06, "loss": 0.7997, "step": 2686 }, { "epoch": 0.22, "grad_norm": 5.78347972967204, "learning_rate": 9.099570793925065e-06, "loss": 0.6723, "step": 2687 }, { "epoch": 0.22, "grad_norm": 3.273140075954397, "learning_rate": 9.098817629413434e-06, "loss": 0.6191, "step": 2688 }, { "epoch": 0.22, "grad_norm": 5.036411978263137, "learning_rate": 9.098064181237814e-06, "loss": 0.6556, "step": 2689 }, { "epoch": 0.22, "grad_norm": 8.382277251866585, "learning_rate": 9.097310449450348e-06, "loss": 0.7097, "step": 2690 }, { "epoch": 0.22, "grad_norm": 5.318812685537838, "learning_rate": 9.096556434103201e-06, "loss": 0.8173, "step": 2691 }, { "epoch": 0.22, "grad_norm": 7.913724205519675, "learning_rate": 9.095802135248557e-06, "loss": 0.7485, "step": 2692 }, { "epoch": 0.22, "grad_norm": 6.204978940727581, "learning_rate": 9.095047552938612e-06, "loss": 0.7458, "step": 2693 }, { "epoch": 0.22, "grad_norm": 5.438141731507851, "learning_rate": 9.094292687225594e-06, "loss": 0.763, "step": 2694 }, { "epoch": 0.22, "grad_norm": 3.9326766404857745, "learning_rate": 9.093537538161742e-06, "loss": 0.7283, "step": 2695 }, { "epoch": 0.22, "grad_norm": 4.767800338634533, "learning_rate": 9.092782105799317e-06, "loss": 0.8797, "step": 2696 }, { "epoch": 0.22, "grad_norm": 6.570497039625524, "learning_rate": 9.0920263901906e-06, "loss": 0.7888, "step": 2697 }, { "epoch": 0.22, "grad_norm": 6.932513129150175, "learning_rate": 9.091270391387892e-06, "loss": 0.7179, "step": 2698 }, { "epoch": 0.22, "grad_norm": 9.875900860904347, "learning_rate": 9.090514109443511e-06, "loss": 0.6616, "step": 2699 }, { "epoch": 0.22, "grad_norm": 5.317775666968505, "learning_rate": 9.089757544409798e-06, "loss": 0.7709, "step": 2700 }, { "epoch": 0.22, "grad_norm": 6.961696962761908, "learning_rate": 9.089000696339112e-06, "loss": 0.5837, "step": 2701 }, { "epoch": 0.22, "grad_norm": 2.97624468295653, "learning_rate": 9.088243565283832e-06, "loss": 0.7805, "step": 2702 }, { "epoch": 0.22, "grad_norm": 7.419510373643693, "learning_rate": 9.087486151296355e-06, "loss": 0.7519, "step": 2703 }, { "epoch": 0.22, "grad_norm": 4.581844965387088, "learning_rate": 9.086728454429099e-06, "loss": 0.7128, "step": 2704 }, { "epoch": 0.22, "grad_norm": 5.83024760691626, "learning_rate": 9.085970474734501e-06, "loss": 0.771, "step": 2705 }, { "epoch": 0.22, "grad_norm": 6.522818646515112, "learning_rate": 9.08521221226502e-06, "loss": 0.8641, "step": 2706 }, { "epoch": 0.22, "grad_norm": 6.734050701435857, "learning_rate": 9.084453667073131e-06, "loss": 0.8186, "step": 2707 }, { "epoch": 0.22, "grad_norm": 4.004804390681544, "learning_rate": 9.08369483921133e-06, "loss": 0.7255, "step": 2708 }, { "epoch": 0.22, "grad_norm": 4.437988292791883, "learning_rate": 9.082935728732135e-06, "loss": 0.7883, "step": 2709 }, { "epoch": 0.22, "grad_norm": 23.604595825957652, "learning_rate": 9.082176335688076e-06, "loss": 0.792, "step": 2710 }, { "epoch": 0.22, "grad_norm": 2.4938608384537417, "learning_rate": 9.081416660131713e-06, "loss": 0.5597, "step": 2711 }, { "epoch": 0.22, "grad_norm": 3.9557799062829435, "learning_rate": 9.080656702115619e-06, "loss": 0.752, "step": 2712 }, { "epoch": 0.22, "grad_norm": 10.548714184582717, "learning_rate": 9.079896461692386e-06, "loss": 0.7945, "step": 2713 }, { "epoch": 0.22, "grad_norm": 6.892085288408094, "learning_rate": 9.07913593891463e-06, "loss": 0.5684, "step": 2714 }, { "epoch": 0.22, "grad_norm": 5.879424259898536, "learning_rate": 9.078375133834981e-06, "loss": 0.6846, "step": 2715 }, { "epoch": 0.22, "grad_norm": 5.303886354130676, "learning_rate": 9.077614046506094e-06, "loss": 0.689, "step": 2716 }, { "epoch": 0.22, "grad_norm": 2.891782486024536, "learning_rate": 9.07685267698064e-06, "loss": 0.7651, "step": 2717 }, { "epoch": 0.22, "grad_norm": 3.1691665416430355, "learning_rate": 9.076091025311311e-06, "loss": 0.6953, "step": 2718 }, { "epoch": 0.22, "grad_norm": 5.866002378618986, "learning_rate": 9.075329091550818e-06, "loss": 0.8198, "step": 2719 }, { "epoch": 0.22, "grad_norm": 9.05782311958584, "learning_rate": 9.07456687575189e-06, "loss": 0.7667, "step": 2720 }, { "epoch": 0.22, "grad_norm": 4.138826918516352, "learning_rate": 9.07380437796728e-06, "loss": 0.8756, "step": 2721 }, { "epoch": 0.22, "grad_norm": 3.633036725881948, "learning_rate": 9.073041598249757e-06, "loss": 0.7408, "step": 2722 }, { "epoch": 0.22, "grad_norm": 9.987181530244671, "learning_rate": 9.072278536652107e-06, "loss": 0.7306, "step": 2723 }, { "epoch": 0.22, "grad_norm": 3.8200064178265314, "learning_rate": 9.071515193227145e-06, "loss": 0.6324, "step": 2724 }, { "epoch": 0.22, "grad_norm": 4.895201551282844, "learning_rate": 9.070751568027691e-06, "loss": 0.7071, "step": 2725 }, { "epoch": 0.22, "grad_norm": 15.408219559885941, "learning_rate": 9.0699876611066e-06, "loss": 0.7833, "step": 2726 }, { "epoch": 0.22, "grad_norm": 8.003758363197623, "learning_rate": 9.069223472516736e-06, "loss": 0.5875, "step": 2727 }, { "epoch": 0.22, "grad_norm": 3.7300051283717215, "learning_rate": 9.068459002310983e-06, "loss": 0.6757, "step": 2728 }, { "epoch": 0.22, "grad_norm": 4.99193212662554, "learning_rate": 9.067694250542252e-06, "loss": 0.6082, "step": 2729 }, { "epoch": 0.22, "grad_norm": 11.404216329577935, "learning_rate": 9.066929217263465e-06, "loss": 0.8323, "step": 2730 }, { "epoch": 0.22, "grad_norm": 5.948284416130249, "learning_rate": 9.066163902527571e-06, "loss": 0.815, "step": 2731 }, { "epoch": 0.22, "grad_norm": 5.550798934221118, "learning_rate": 9.065398306387532e-06, "loss": 0.7345, "step": 2732 }, { "epoch": 0.22, "grad_norm": 5.943286957990646, "learning_rate": 9.064632428896331e-06, "loss": 0.757, "step": 2733 }, { "epoch": 0.22, "grad_norm": 9.29412293612629, "learning_rate": 9.063866270106972e-06, "loss": 0.8429, "step": 2734 }, { "epoch": 0.22, "grad_norm": 4.3469635273621146, "learning_rate": 9.063099830072482e-06, "loss": 0.6731, "step": 2735 }, { "epoch": 0.22, "grad_norm": 12.67535621903259, "learning_rate": 9.062333108845897e-06, "loss": 0.8227, "step": 2736 }, { "epoch": 0.22, "grad_norm": 4.946492570529726, "learning_rate": 9.061566106480283e-06, "loss": 0.8305, "step": 2737 }, { "epoch": 0.22, "grad_norm": 7.3608034825731705, "learning_rate": 9.060798823028722e-06, "loss": 0.7179, "step": 2738 }, { "epoch": 0.22, "grad_norm": 3.8846830053711408, "learning_rate": 9.060031258544313e-06, "loss": 0.6044, "step": 2739 }, { "epoch": 0.22, "grad_norm": 3.090659456074845, "learning_rate": 9.059263413080178e-06, "loss": 0.7603, "step": 2740 }, { "epoch": 0.22, "grad_norm": 11.908399498640355, "learning_rate": 9.058495286689454e-06, "loss": 0.7644, "step": 2741 }, { "epoch": 0.22, "grad_norm": 15.029772778090752, "learning_rate": 9.057726879425302e-06, "loss": 0.758, "step": 2742 }, { "epoch": 0.22, "grad_norm": 4.891524469156882, "learning_rate": 9.0569581913409e-06, "loss": 0.8149, "step": 2743 }, { "epoch": 0.22, "grad_norm": 12.510455052677278, "learning_rate": 9.056189222489448e-06, "loss": 0.8281, "step": 2744 }, { "epoch": 0.22, "grad_norm": 2.848452257463674, "learning_rate": 9.055419972924161e-06, "loss": 0.7077, "step": 2745 }, { "epoch": 0.22, "grad_norm": 7.5548896297754355, "learning_rate": 9.054650442698276e-06, "loss": 0.5451, "step": 2746 }, { "epoch": 0.22, "grad_norm": 4.354326541771681, "learning_rate": 9.05388063186505e-06, "loss": 0.7335, "step": 2747 }, { "epoch": 0.22, "grad_norm": 4.195021040256123, "learning_rate": 9.053110540477762e-06, "loss": 0.7195, "step": 2748 }, { "epoch": 0.22, "grad_norm": 3.6155431524507655, "learning_rate": 9.052340168589702e-06, "loss": 0.8022, "step": 2749 }, { "epoch": 0.22, "grad_norm": 7.4616120963422, "learning_rate": 9.051569516254186e-06, "loss": 0.7934, "step": 2750 }, { "epoch": 0.22, "grad_norm": 3.589843109233797, "learning_rate": 9.050798583524549e-06, "loss": 0.7515, "step": 2751 }, { "epoch": 0.22, "grad_norm": 3.6711177946861135, "learning_rate": 9.050027370454146e-06, "loss": 0.7092, "step": 2752 }, { "epoch": 0.22, "grad_norm": 5.392293003814468, "learning_rate": 9.049255877096346e-06, "loss": 0.7708, "step": 2753 }, { "epoch": 0.22, "grad_norm": 2.708608722137364, "learning_rate": 9.048484103504542e-06, "loss": 0.7861, "step": 2754 }, { "epoch": 0.22, "grad_norm": 3.083993388669979, "learning_rate": 9.04771204973215e-06, "loss": 0.6698, "step": 2755 }, { "epoch": 0.22, "grad_norm": 3.326937002207549, "learning_rate": 9.046939715832595e-06, "loss": 0.7664, "step": 2756 }, { "epoch": 0.22, "grad_norm": 3.3260271832751815, "learning_rate": 9.046167101859332e-06, "loss": 0.6076, "step": 2757 }, { "epoch": 0.22, "grad_norm": 7.1350910382500174, "learning_rate": 9.045394207865826e-06, "loss": 0.5636, "step": 2758 }, { "epoch": 0.22, "grad_norm": 7.346422187879488, "learning_rate": 9.04462103390557e-06, "loss": 0.7002, "step": 2759 }, { "epoch": 0.22, "grad_norm": 6.21289492987418, "learning_rate": 9.043847580032072e-06, "loss": 0.7792, "step": 2760 }, { "epoch": 0.22, "grad_norm": 4.108704503870191, "learning_rate": 9.04307384629886e-06, "loss": 0.6693, "step": 2761 }, { "epoch": 0.22, "grad_norm": 4.064972310908145, "learning_rate": 9.04229983275948e-06, "loss": 0.8571, "step": 2762 }, { "epoch": 0.22, "grad_norm": 3.3714328539508003, "learning_rate": 9.041525539467498e-06, "loss": 0.6904, "step": 2763 }, { "epoch": 0.22, "grad_norm": 3.7886605911104545, "learning_rate": 9.040750966476502e-06, "loss": 0.8888, "step": 2764 }, { "epoch": 0.22, "grad_norm": 4.970085572119584, "learning_rate": 9.039976113840097e-06, "loss": 0.8186, "step": 2765 }, { "epoch": 0.22, "grad_norm": 5.123548129494378, "learning_rate": 9.039200981611907e-06, "loss": 0.8157, "step": 2766 }, { "epoch": 0.22, "grad_norm": 5.515641156549808, "learning_rate": 9.038425569845574e-06, "loss": 0.8627, "step": 2767 }, { "epoch": 0.22, "grad_norm": 3.951729168884959, "learning_rate": 9.037649878594766e-06, "loss": 0.7646, "step": 2768 }, { "epoch": 0.22, "grad_norm": 5.459338968085195, "learning_rate": 9.036873907913163e-06, "loss": 0.5484, "step": 2769 }, { "epoch": 0.22, "grad_norm": 6.414953471945675, "learning_rate": 9.036097657854467e-06, "loss": 0.7407, "step": 2770 }, { "epoch": 0.23, "grad_norm": 3.3980258179149323, "learning_rate": 9.035321128472398e-06, "loss": 0.674, "step": 2771 }, { "epoch": 0.23, "grad_norm": 5.128790761681696, "learning_rate": 9.034544319820701e-06, "loss": 0.7561, "step": 2772 }, { "epoch": 0.23, "grad_norm": 3.0122225874327375, "learning_rate": 9.033767231953131e-06, "loss": 0.6936, "step": 2773 }, { "epoch": 0.23, "grad_norm": 7.521065240423371, "learning_rate": 9.032989864923474e-06, "loss": 0.6843, "step": 2774 }, { "epoch": 0.23, "grad_norm": 5.552363230323675, "learning_rate": 9.032212218785521e-06, "loss": 0.7114, "step": 2775 }, { "epoch": 0.23, "grad_norm": 5.524191250555285, "learning_rate": 9.031434293593094e-06, "loss": 0.8634, "step": 2776 }, { "epoch": 0.23, "grad_norm": 25.952765763226378, "learning_rate": 9.03065608940003e-06, "loss": 0.7736, "step": 2777 }, { "epoch": 0.23, "grad_norm": 8.601359586721653, "learning_rate": 9.029877606260187e-06, "loss": 0.6508, "step": 2778 }, { "epoch": 0.23, "grad_norm": 4.631633213230639, "learning_rate": 9.029098844227438e-06, "loss": 0.6534, "step": 2779 }, { "epoch": 0.23, "grad_norm": 4.900497168771613, "learning_rate": 9.02831980335568e-06, "loss": 0.7626, "step": 2780 }, { "epoch": 0.23, "grad_norm": 3.542578512079863, "learning_rate": 9.027540483698828e-06, "loss": 0.8199, "step": 2781 }, { "epoch": 0.23, "grad_norm": 3.636061515855482, "learning_rate": 9.026760885310812e-06, "loss": 0.7583, "step": 2782 }, { "epoch": 0.23, "grad_norm": 6.484983563447593, "learning_rate": 9.02598100824559e-06, "loss": 0.6814, "step": 2783 }, { "epoch": 0.23, "grad_norm": 5.0532201747978505, "learning_rate": 9.025200852557135e-06, "loss": 0.9847, "step": 2784 }, { "epoch": 0.23, "grad_norm": 5.90432258345458, "learning_rate": 9.024420418299433e-06, "loss": 0.7339, "step": 2785 }, { "epoch": 0.23, "grad_norm": 3.20909135354148, "learning_rate": 9.0236397055265e-06, "loss": 0.9107, "step": 2786 }, { "epoch": 0.23, "grad_norm": 5.245169752474064, "learning_rate": 9.022858714292362e-06, "loss": 0.855, "step": 2787 }, { "epoch": 0.23, "grad_norm": 7.195303770794165, "learning_rate": 9.022077444651074e-06, "loss": 0.703, "step": 2788 }, { "epoch": 0.23, "grad_norm": 6.208064501030853, "learning_rate": 9.0212958966567e-06, "loss": 0.5855, "step": 2789 }, { "epoch": 0.23, "grad_norm": 6.6778823552188165, "learning_rate": 9.020514070363331e-06, "loss": 0.9221, "step": 2790 }, { "epoch": 0.23, "grad_norm": 3.917314724362079, "learning_rate": 9.019731965825072e-06, "loss": 0.6728, "step": 2791 }, { "epoch": 0.23, "grad_norm": 7.046068880236291, "learning_rate": 9.018949583096051e-06, "loss": 0.7425, "step": 2792 }, { "epoch": 0.23, "grad_norm": 17.78106361519418, "learning_rate": 9.018166922230413e-06, "loss": 0.6993, "step": 2793 }, { "epoch": 0.23, "grad_norm": 5.411868695428226, "learning_rate": 9.017383983282325e-06, "loss": 0.8871, "step": 2794 }, { "epoch": 0.23, "grad_norm": 5.482486583475973, "learning_rate": 9.016600766305967e-06, "loss": 0.6458, "step": 2795 }, { "epoch": 0.23, "grad_norm": 6.48750953029335, "learning_rate": 9.015817271355549e-06, "loss": 0.776, "step": 2796 }, { "epoch": 0.23, "grad_norm": 6.9090477126764, "learning_rate": 9.015033498485287e-06, "loss": 0.6723, "step": 2797 }, { "epoch": 0.23, "grad_norm": 10.674092993281306, "learning_rate": 9.014249447749429e-06, "loss": 0.8224, "step": 2798 }, { "epoch": 0.23, "grad_norm": 11.969592780783005, "learning_rate": 9.01346511920223e-06, "loss": 0.7704, "step": 2799 }, { "epoch": 0.23, "grad_norm": 10.157812541375764, "learning_rate": 9.012680512897975e-06, "loss": 0.686, "step": 2800 }, { "epoch": 0.23, "grad_norm": 6.468509377620594, "learning_rate": 9.011895628890964e-06, "loss": 0.7035, "step": 2801 }, { "epoch": 0.23, "grad_norm": 7.7116699413186405, "learning_rate": 9.011110467235515e-06, "loss": 0.679, "step": 2802 }, { "epoch": 0.23, "grad_norm": 8.19715850556492, "learning_rate": 9.010325027985964e-06, "loss": 0.5679, "step": 2803 }, { "epoch": 0.23, "grad_norm": 159.1343689802648, "learning_rate": 9.00953931119667e-06, "loss": 0.6765, "step": 2804 }, { "epoch": 0.23, "grad_norm": 4.8607421455164115, "learning_rate": 9.00875331692201e-06, "loss": 0.8127, "step": 2805 }, { "epoch": 0.23, "grad_norm": 6.117314737617798, "learning_rate": 9.00796704521638e-06, "loss": 0.7689, "step": 2806 }, { "epoch": 0.23, "grad_norm": 6.54933432155176, "learning_rate": 9.007180496134193e-06, "loss": 0.8096, "step": 2807 }, { "epoch": 0.23, "grad_norm": 2.870586844463507, "learning_rate": 9.006393669729885e-06, "loss": 0.7061, "step": 2808 }, { "epoch": 0.23, "grad_norm": 9.807046624109839, "learning_rate": 9.005606566057908e-06, "loss": 0.8141, "step": 2809 }, { "epoch": 0.23, "grad_norm": 3.5340386115084863, "learning_rate": 9.004819185172735e-06, "loss": 0.8112, "step": 2810 }, { "epoch": 0.23, "grad_norm": 4.063250975591392, "learning_rate": 9.00403152712886e-06, "loss": 0.83, "step": 2811 }, { "epoch": 0.23, "grad_norm": 5.7301654516006355, "learning_rate": 9.003243591980791e-06, "loss": 0.6636, "step": 2812 }, { "epoch": 0.23, "grad_norm": 10.976593614344091, "learning_rate": 9.002455379783057e-06, "loss": 0.782, "step": 2813 }, { "epoch": 0.23, "grad_norm": 4.576247375781676, "learning_rate": 9.00166689059021e-06, "loss": 0.8233, "step": 2814 }, { "epoch": 0.23, "grad_norm": 3.031119383054468, "learning_rate": 9.00087812445682e-06, "loss": 0.677, "step": 2815 }, { "epoch": 0.23, "grad_norm": 5.283066300402596, "learning_rate": 9.00008908143747e-06, "loss": 0.6836, "step": 2816 }, { "epoch": 0.23, "grad_norm": 5.876115062606732, "learning_rate": 8.999299761586768e-06, "loss": 0.744, "step": 2817 }, { "epoch": 0.23, "grad_norm": 4.627007787847743, "learning_rate": 8.998510164959344e-06, "loss": 0.6885, "step": 2818 }, { "epoch": 0.23, "grad_norm": 6.103590004582193, "learning_rate": 8.997720291609837e-06, "loss": 0.7772, "step": 2819 }, { "epoch": 0.23, "grad_norm": 7.5357072138337475, "learning_rate": 8.996930141592915e-06, "loss": 0.6992, "step": 2820 }, { "epoch": 0.23, "grad_norm": 6.1759140276645, "learning_rate": 8.996139714963262e-06, "loss": 0.682, "step": 2821 }, { "epoch": 0.23, "grad_norm": 4.082556187949097, "learning_rate": 8.995349011775577e-06, "loss": 0.6829, "step": 2822 }, { "epoch": 0.23, "grad_norm": 5.076183239269856, "learning_rate": 8.994558032084583e-06, "loss": 0.705, "step": 2823 }, { "epoch": 0.23, "grad_norm": 7.688959602021805, "learning_rate": 8.993766775945023e-06, "loss": 0.6444, "step": 2824 }, { "epoch": 0.23, "grad_norm": 4.582962583972632, "learning_rate": 8.992975243411655e-06, "loss": 0.8809, "step": 2825 }, { "epoch": 0.23, "grad_norm": 5.716913247496087, "learning_rate": 8.992183434539257e-06, "loss": 0.7502, "step": 2826 }, { "epoch": 0.23, "grad_norm": 9.799116045158838, "learning_rate": 8.99139134938263e-06, "loss": 0.81, "step": 2827 }, { "epoch": 0.23, "grad_norm": 5.775335489654277, "learning_rate": 8.99059898799659e-06, "loss": 0.8494, "step": 2828 }, { "epoch": 0.23, "grad_norm": 4.664086288782057, "learning_rate": 8.989806350435972e-06, "loss": 0.6815, "step": 2829 }, { "epoch": 0.23, "grad_norm": 7.566274278442036, "learning_rate": 8.989013436755633e-06, "loss": 0.6916, "step": 2830 }, { "epoch": 0.23, "grad_norm": 8.41029249038631, "learning_rate": 8.988220247010448e-06, "loss": 0.7804, "step": 2831 }, { "epoch": 0.23, "grad_norm": 13.63422625877445, "learning_rate": 8.987426781255309e-06, "loss": 0.7555, "step": 2832 }, { "epoch": 0.23, "grad_norm": 3.968314691032537, "learning_rate": 8.98663303954513e-06, "loss": 0.6407, "step": 2833 }, { "epoch": 0.23, "grad_norm": 4.576776974928141, "learning_rate": 8.985839021934843e-06, "loss": 0.5905, "step": 2834 }, { "epoch": 0.23, "grad_norm": 10.79745550370275, "learning_rate": 8.9850447284794e-06, "loss": 0.7459, "step": 2835 }, { "epoch": 0.23, "grad_norm": 4.140438408403593, "learning_rate": 8.984250159233767e-06, "loss": 0.7428, "step": 2836 }, { "epoch": 0.23, "grad_norm": 10.106899792737101, "learning_rate": 8.983455314252938e-06, "loss": 0.685, "step": 2837 }, { "epoch": 0.23, "grad_norm": 3.2763532351510856, "learning_rate": 8.98266019359192e-06, "loss": 0.7423, "step": 2838 }, { "epoch": 0.23, "grad_norm": 3.7270712753211215, "learning_rate": 8.981864797305738e-06, "loss": 0.8173, "step": 2839 }, { "epoch": 0.23, "grad_norm": 5.818057949924994, "learning_rate": 8.981069125449442e-06, "loss": 0.6716, "step": 2840 }, { "epoch": 0.23, "grad_norm": 10.871715122164675, "learning_rate": 8.980273178078093e-06, "loss": 0.6722, "step": 2841 }, { "epoch": 0.23, "grad_norm": 4.17038667688079, "learning_rate": 8.97947695524678e-06, "loss": 0.8945, "step": 2842 }, { "epoch": 0.23, "grad_norm": 3.0193063113500447, "learning_rate": 8.978680457010604e-06, "loss": 0.6195, "step": 2843 }, { "epoch": 0.23, "grad_norm": 5.12050083063855, "learning_rate": 8.977883683424689e-06, "loss": 0.7538, "step": 2844 }, { "epoch": 0.23, "grad_norm": 10.17477056381511, "learning_rate": 8.977086634544176e-06, "loss": 0.607, "step": 2845 }, { "epoch": 0.23, "grad_norm": 4.778498388808307, "learning_rate": 8.976289310424227e-06, "loss": 0.8404, "step": 2846 }, { "epoch": 0.23, "grad_norm": 10.725442519310553, "learning_rate": 8.97549171112002e-06, "loss": 0.6572, "step": 2847 }, { "epoch": 0.23, "grad_norm": 5.798472030578463, "learning_rate": 8.974693836686755e-06, "loss": 0.7007, "step": 2848 }, { "epoch": 0.23, "grad_norm": 3.3678798962129974, "learning_rate": 8.97389568717965e-06, "loss": 0.6143, "step": 2849 }, { "epoch": 0.23, "grad_norm": 5.005002754914163, "learning_rate": 8.973097262653942e-06, "loss": 0.7606, "step": 2850 }, { "epoch": 0.23, "grad_norm": 6.6915528397723545, "learning_rate": 8.972298563164886e-06, "loss": 0.7101, "step": 2851 }, { "epoch": 0.23, "grad_norm": 2.7458693390509934, "learning_rate": 8.971499588767758e-06, "loss": 0.7373, "step": 2852 }, { "epoch": 0.23, "grad_norm": 3.2718898789834117, "learning_rate": 8.970700339517853e-06, "loss": 0.7791, "step": 2853 }, { "epoch": 0.23, "grad_norm": 8.39732457615963, "learning_rate": 8.96990081547048e-06, "loss": 0.7041, "step": 2854 }, { "epoch": 0.23, "grad_norm": 3.1767190404039245, "learning_rate": 8.969101016680977e-06, "loss": 0.6039, "step": 2855 }, { "epoch": 0.23, "grad_norm": 3.7462376424204877, "learning_rate": 8.96830094320469e-06, "loss": 0.8686, "step": 2856 }, { "epoch": 0.23, "grad_norm": 5.759937543816016, "learning_rate": 8.967500595096994e-06, "loss": 0.8381, "step": 2857 }, { "epoch": 0.23, "grad_norm": 4.950413113827954, "learning_rate": 8.966699972413274e-06, "loss": 0.6799, "step": 2858 }, { "epoch": 0.23, "grad_norm": 4.7334578789563, "learning_rate": 8.965899075208939e-06, "loss": 0.6635, "step": 2859 }, { "epoch": 0.23, "grad_norm": 7.772266338772254, "learning_rate": 8.965097903539416e-06, "loss": 0.6693, "step": 2860 }, { "epoch": 0.23, "grad_norm": 3.256297534265736, "learning_rate": 8.964296457460152e-06, "loss": 0.8322, "step": 2861 }, { "epoch": 0.23, "grad_norm": 3.5487616498954755, "learning_rate": 8.963494737026612e-06, "loss": 0.7519, "step": 2862 }, { "epoch": 0.23, "grad_norm": 8.344681829474355, "learning_rate": 8.96269274229428e-06, "loss": 0.7004, "step": 2863 }, { "epoch": 0.23, "grad_norm": 8.468238635543933, "learning_rate": 8.96189047331866e-06, "loss": 0.8048, "step": 2864 }, { "epoch": 0.23, "grad_norm": 4.781805468608929, "learning_rate": 8.961087930155273e-06, "loss": 0.8814, "step": 2865 }, { "epoch": 0.23, "grad_norm": 7.215505993559402, "learning_rate": 8.96028511285966e-06, "loss": 0.7634, "step": 2866 }, { "epoch": 0.23, "grad_norm": 4.2756190959970235, "learning_rate": 8.95948202148738e-06, "loss": 0.7938, "step": 2867 }, { "epoch": 0.23, "grad_norm": 4.094251268260469, "learning_rate": 8.958678656094016e-06, "loss": 0.8088, "step": 2868 }, { "epoch": 0.23, "grad_norm": 6.077152939904661, "learning_rate": 8.95787501673516e-06, "loss": 0.7624, "step": 2869 }, { "epoch": 0.23, "grad_norm": 3.7708214250193173, "learning_rate": 8.957071103466433e-06, "loss": 0.898, "step": 2870 }, { "epoch": 0.23, "grad_norm": 5.349001255416674, "learning_rate": 8.95626691634347e-06, "loss": 0.6955, "step": 2871 }, { "epoch": 0.23, "grad_norm": 12.180066712533977, "learning_rate": 8.955462455421927e-06, "loss": 0.815, "step": 2872 }, { "epoch": 0.23, "grad_norm": 4.115698496278884, "learning_rate": 8.954657720757474e-06, "loss": 0.7697, "step": 2873 }, { "epoch": 0.23, "grad_norm": 2.9431212575479817, "learning_rate": 8.953852712405808e-06, "loss": 0.6371, "step": 2874 }, { "epoch": 0.23, "grad_norm": 41.665402554757456, "learning_rate": 8.953047430422637e-06, "loss": 0.6509, "step": 2875 }, { "epoch": 0.23, "grad_norm": 7.26980610478533, "learning_rate": 8.952241874863695e-06, "loss": 0.7843, "step": 2876 }, { "epoch": 0.23, "grad_norm": 11.2514832443559, "learning_rate": 8.95143604578473e-06, "loss": 0.6886, "step": 2877 }, { "epoch": 0.23, "grad_norm": 5.650493882294282, "learning_rate": 8.950629943241509e-06, "loss": 0.7846, "step": 2878 }, { "epoch": 0.23, "grad_norm": 3.2060699417501985, "learning_rate": 8.94982356728982e-06, "loss": 0.6502, "step": 2879 }, { "epoch": 0.23, "grad_norm": 7.744698136827544, "learning_rate": 8.94901691798547e-06, "loss": 0.6769, "step": 2880 }, { "epoch": 0.23, "grad_norm": 3.3738188460740184, "learning_rate": 8.948209995384288e-06, "loss": 0.743, "step": 2881 }, { "epoch": 0.23, "grad_norm": 3.208656472987335, "learning_rate": 8.947402799542111e-06, "loss": 0.71, "step": 2882 }, { "epoch": 0.23, "grad_norm": 4.9103193498062545, "learning_rate": 8.946595330514807e-06, "loss": 0.7137, "step": 2883 }, { "epoch": 0.23, "grad_norm": 2.8310233675017726, "learning_rate": 8.945787588358255e-06, "loss": 0.6973, "step": 2884 }, { "epoch": 0.23, "grad_norm": 4.795397985986916, "learning_rate": 8.944979573128358e-06, "loss": 0.5901, "step": 2885 }, { "epoch": 0.23, "grad_norm": 3.787550220829312, "learning_rate": 8.944171284881035e-06, "loss": 0.8325, "step": 2886 }, { "epoch": 0.23, "grad_norm": 2.8658509970749644, "learning_rate": 8.943362723672225e-06, "loss": 0.7268, "step": 2887 }, { "epoch": 0.23, "grad_norm": 4.1679513736456935, "learning_rate": 8.942553889557883e-06, "loss": 0.7348, "step": 2888 }, { "epoch": 0.23, "grad_norm": 4.023436653729692, "learning_rate": 8.941744782593989e-06, "loss": 0.6086, "step": 2889 }, { "epoch": 0.23, "grad_norm": 3.1176196767647446, "learning_rate": 8.940935402836535e-06, "loss": 0.7208, "step": 2890 }, { "epoch": 0.23, "grad_norm": 3.877078572220708, "learning_rate": 8.940125750341539e-06, "loss": 0.6787, "step": 2891 }, { "epoch": 0.23, "grad_norm": 4.580160887932237, "learning_rate": 8.939315825165032e-06, "loss": 0.746, "step": 2892 }, { "epoch": 0.23, "grad_norm": 3.9396391316635353, "learning_rate": 8.938505627363065e-06, "loss": 0.6851, "step": 2893 }, { "epoch": 0.24, "grad_norm": 3.2973651654376344, "learning_rate": 8.937695156991711e-06, "loss": 0.7109, "step": 2894 }, { "epoch": 0.24, "grad_norm": 4.751102448978402, "learning_rate": 8.936884414107056e-06, "loss": 0.7315, "step": 2895 }, { "epoch": 0.24, "grad_norm": 3.631403945039312, "learning_rate": 8.936073398765212e-06, "loss": 0.6349, "step": 2896 }, { "epoch": 0.24, "grad_norm": 4.536280341361928, "learning_rate": 8.935262111022306e-06, "loss": 0.8574, "step": 2897 }, { "epoch": 0.24, "grad_norm": 3.6673633731076545, "learning_rate": 8.934450550934483e-06, "loss": 0.6901, "step": 2898 }, { "epoch": 0.24, "grad_norm": 3.7937151047372013, "learning_rate": 8.933638718557908e-06, "loss": 0.6662, "step": 2899 }, { "epoch": 0.24, "grad_norm": 3.102159245358778, "learning_rate": 8.932826613948767e-06, "loss": 0.6885, "step": 2900 }, { "epoch": 0.24, "grad_norm": 3.9685981120214713, "learning_rate": 8.932014237163259e-06, "loss": 0.7894, "step": 2901 }, { "epoch": 0.24, "grad_norm": 3.3082226075129015, "learning_rate": 8.931201588257609e-06, "loss": 0.9473, "step": 2902 }, { "epoch": 0.24, "grad_norm": 3.7680416084117185, "learning_rate": 8.930388667288055e-06, "loss": 0.5511, "step": 2903 }, { "epoch": 0.24, "grad_norm": 3.0563907102177894, "learning_rate": 8.92957547431086e-06, "loss": 0.7402, "step": 2904 }, { "epoch": 0.24, "grad_norm": 4.761138966287189, "learning_rate": 8.928762009382297e-06, "loss": 0.5399, "step": 2905 }, { "epoch": 0.24, "grad_norm": 5.285451266079011, "learning_rate": 8.927948272558666e-06, "loss": 0.7228, "step": 2906 }, { "epoch": 0.24, "grad_norm": 4.714409275034327, "learning_rate": 8.927134263896284e-06, "loss": 0.7647, "step": 2907 }, { "epoch": 0.24, "grad_norm": 4.049724201601192, "learning_rate": 8.926319983451481e-06, "loss": 0.8375, "step": 2908 }, { "epoch": 0.24, "grad_norm": 4.3866914038557345, "learning_rate": 8.925505431280615e-06, "loss": 0.8092, "step": 2909 }, { "epoch": 0.24, "grad_norm": 3.2641281404544316, "learning_rate": 8.924690607440055e-06, "loss": 0.6129, "step": 2910 }, { "epoch": 0.24, "grad_norm": 4.199554759709983, "learning_rate": 8.923875511986193e-06, "loss": 0.6647, "step": 2911 }, { "epoch": 0.24, "grad_norm": 5.216177254280003, "learning_rate": 8.92306014497544e-06, "loss": 0.6786, "step": 2912 }, { "epoch": 0.24, "grad_norm": 4.739850240580817, "learning_rate": 8.92224450646422e-06, "loss": 0.7789, "step": 2913 }, { "epoch": 0.24, "grad_norm": 3.751931414325418, "learning_rate": 8.92142859650899e-06, "loss": 0.7805, "step": 2914 }, { "epoch": 0.24, "grad_norm": 3.418777866813581, "learning_rate": 8.920612415166206e-06, "loss": 0.7217, "step": 2915 }, { "epoch": 0.24, "grad_norm": 4.049103202621135, "learning_rate": 8.919795962492354e-06, "loss": 0.7773, "step": 2916 }, { "epoch": 0.24, "grad_norm": 25.230412440481512, "learning_rate": 8.918979238543944e-06, "loss": 0.6934, "step": 2917 }, { "epoch": 0.24, "grad_norm": 4.160417945895964, "learning_rate": 8.918162243377494e-06, "loss": 0.8345, "step": 2918 }, { "epoch": 0.24, "grad_norm": 4.818048401033922, "learning_rate": 8.917344977049546e-06, "loss": 0.8726, "step": 2919 }, { "epoch": 0.24, "grad_norm": 8.433136039723975, "learning_rate": 8.91652743961666e-06, "loss": 0.5962, "step": 2920 }, { "epoch": 0.24, "grad_norm": 2.9957261984543777, "learning_rate": 8.915709631135414e-06, "loss": 0.8819, "step": 2921 }, { "epoch": 0.24, "grad_norm": 5.653362698214063, "learning_rate": 8.914891551662406e-06, "loss": 0.695, "step": 2922 }, { "epoch": 0.24, "grad_norm": 8.198581345468392, "learning_rate": 8.914073201254253e-06, "loss": 0.8013, "step": 2923 }, { "epoch": 0.24, "grad_norm": 7.2519737607953605, "learning_rate": 8.91325457996759e-06, "loss": 0.8766, "step": 2924 }, { "epoch": 0.24, "grad_norm": 4.284678448884324, "learning_rate": 8.912435687859068e-06, "loss": 0.8102, "step": 2925 }, { "epoch": 0.24, "grad_norm": 4.972797496955421, "learning_rate": 8.911616524985364e-06, "loss": 0.7795, "step": 2926 }, { "epoch": 0.24, "grad_norm": 4.320373873162612, "learning_rate": 8.910797091403166e-06, "loss": 0.6979, "step": 2927 }, { "epoch": 0.24, "grad_norm": 3.4464603496627686, "learning_rate": 8.909977387169185e-06, "loss": 0.6842, "step": 2928 }, { "epoch": 0.24, "grad_norm": 6.110748688570022, "learning_rate": 8.90915741234015e-06, "loss": 0.6581, "step": 2929 }, { "epoch": 0.24, "grad_norm": 8.03880441712925, "learning_rate": 8.908337166972807e-06, "loss": 0.7596, "step": 2930 }, { "epoch": 0.24, "grad_norm": 3.2612986682443843, "learning_rate": 8.907516651123925e-06, "loss": 0.7736, "step": 2931 }, { "epoch": 0.24, "grad_norm": 3.9220094671982237, "learning_rate": 8.906695864850284e-06, "loss": 0.8371, "step": 2932 }, { "epoch": 0.24, "grad_norm": 8.216047649079009, "learning_rate": 8.905874808208692e-06, "loss": 0.6946, "step": 2933 }, { "epoch": 0.24, "grad_norm": 4.477608413845603, "learning_rate": 8.90505348125597e-06, "loss": 0.6315, "step": 2934 }, { "epoch": 0.24, "grad_norm": 3.1300250509081033, "learning_rate": 8.90423188404896e-06, "loss": 0.7536, "step": 2935 }, { "epoch": 0.24, "grad_norm": 2.7445509532532855, "learning_rate": 8.903410016644518e-06, "loss": 0.7202, "step": 2936 }, { "epoch": 0.24, "grad_norm": 6.176002590916825, "learning_rate": 8.902587879099527e-06, "loss": 0.8109, "step": 2937 }, { "epoch": 0.24, "grad_norm": 2.7247398817132495, "learning_rate": 8.901765471470882e-06, "loss": 0.4841, "step": 2938 }, { "epoch": 0.24, "grad_norm": 4.09224987020506, "learning_rate": 8.900942793815498e-06, "loss": 0.7496, "step": 2939 }, { "epoch": 0.24, "grad_norm": 4.7155817858854885, "learning_rate": 8.90011984619031e-06, "loss": 0.7336, "step": 2940 }, { "epoch": 0.24, "grad_norm": 2.7569497434953756, "learning_rate": 8.899296628652272e-06, "loss": 0.5279, "step": 2941 }, { "epoch": 0.24, "grad_norm": 14.99718403013933, "learning_rate": 8.898473141258356e-06, "loss": 0.7909, "step": 2942 }, { "epoch": 0.24, "grad_norm": 7.3153697290077995, "learning_rate": 8.897649384065552e-06, "loss": 0.713, "step": 2943 }, { "epoch": 0.24, "grad_norm": 4.577070504939822, "learning_rate": 8.896825357130867e-06, "loss": 0.4662, "step": 2944 }, { "epoch": 0.24, "grad_norm": 6.32892176040599, "learning_rate": 8.896001060511333e-06, "loss": 0.8103, "step": 2945 }, { "epoch": 0.24, "grad_norm": 6.723451276861912, "learning_rate": 8.895176494263993e-06, "loss": 0.745, "step": 2946 }, { "epoch": 0.24, "grad_norm": 4.524435486440204, "learning_rate": 8.894351658445913e-06, "loss": 0.8632, "step": 2947 }, { "epoch": 0.24, "grad_norm": 5.012111949529782, "learning_rate": 8.893526553114178e-06, "loss": 0.7762, "step": 2948 }, { "epoch": 0.24, "grad_norm": 5.983253921214527, "learning_rate": 8.89270117832589e-06, "loss": 0.8027, "step": 2949 }, { "epoch": 0.24, "grad_norm": 3.6157778015799646, "learning_rate": 8.89187553413817e-06, "loss": 0.7658, "step": 2950 }, { "epoch": 0.24, "grad_norm": 7.014843217231772, "learning_rate": 8.891049620608158e-06, "loss": 0.7264, "step": 2951 }, { "epoch": 0.24, "grad_norm": 3.916985475158266, "learning_rate": 8.890223437793012e-06, "loss": 0.7381, "step": 2952 }, { "epoch": 0.24, "grad_norm": 5.101418337055632, "learning_rate": 8.889396985749909e-06, "loss": 0.8047, "step": 2953 }, { "epoch": 0.24, "grad_norm": 30.985538830399083, "learning_rate": 8.888570264536046e-06, "loss": 0.7548, "step": 2954 }, { "epoch": 0.24, "grad_norm": 4.99969946714301, "learning_rate": 8.887743274208635e-06, "loss": 0.7697, "step": 2955 }, { "epoch": 0.24, "grad_norm": 3.6278033608690445, "learning_rate": 8.886916014824911e-06, "loss": 0.7013, "step": 2956 }, { "epoch": 0.24, "grad_norm": 4.221829947321943, "learning_rate": 8.886088486442124e-06, "loss": 0.8106, "step": 2957 }, { "epoch": 0.24, "grad_norm": 5.855650165755511, "learning_rate": 8.885260689117546e-06, "loss": 0.8342, "step": 2958 }, { "epoch": 0.24, "grad_norm": 4.8751913851337845, "learning_rate": 8.884432622908463e-06, "loss": 0.6958, "step": 2959 }, { "epoch": 0.24, "grad_norm": 26.064569563907508, "learning_rate": 8.883604287872186e-06, "loss": 0.6542, "step": 2960 }, { "epoch": 0.24, "grad_norm": 4.164135301125337, "learning_rate": 8.882775684066037e-06, "loss": 0.6284, "step": 2961 }, { "epoch": 0.24, "grad_norm": 4.322623650020969, "learning_rate": 8.881946811547364e-06, "loss": 0.742, "step": 2962 }, { "epoch": 0.24, "grad_norm": 4.151084763430534, "learning_rate": 8.881117670373528e-06, "loss": 0.8275, "step": 2963 }, { "epoch": 0.24, "grad_norm": 12.26965996564063, "learning_rate": 8.880288260601913e-06, "loss": 0.6818, "step": 2964 }, { "epoch": 0.24, "grad_norm": 3.5077286971251516, "learning_rate": 8.879458582289917e-06, "loss": 0.6596, "step": 2965 }, { "epoch": 0.24, "grad_norm": 4.730617930444589, "learning_rate": 8.878628635494961e-06, "loss": 0.8271, "step": 2966 }, { "epoch": 0.24, "grad_norm": 5.679268827529147, "learning_rate": 8.87779842027448e-06, "loss": 0.7606, "step": 2967 }, { "epoch": 0.24, "grad_norm": 3.663158013441438, "learning_rate": 8.876967936685933e-06, "loss": 0.7634, "step": 2968 }, { "epoch": 0.24, "grad_norm": 4.017154311013711, "learning_rate": 8.876137184786793e-06, "loss": 0.6235, "step": 2969 }, { "epoch": 0.24, "grad_norm": 6.613746315417937, "learning_rate": 8.875306164634554e-06, "loss": 0.8739, "step": 2970 }, { "epoch": 0.24, "grad_norm": 12.061884813730193, "learning_rate": 8.874474876286728e-06, "loss": 0.7142, "step": 2971 }, { "epoch": 0.24, "grad_norm": 3.1380666794402345, "learning_rate": 8.873643319800842e-06, "loss": 0.667, "step": 2972 }, { "epoch": 0.24, "grad_norm": 4.061328281557785, "learning_rate": 8.872811495234451e-06, "loss": 0.6998, "step": 2973 }, { "epoch": 0.24, "grad_norm": 3.7872838144424077, "learning_rate": 8.871979402645116e-06, "loss": 0.7295, "step": 2974 }, { "epoch": 0.24, "grad_norm": 5.767980903179702, "learning_rate": 8.871147042090428e-06, "loss": 0.7298, "step": 2975 }, { "epoch": 0.24, "grad_norm": 3.471594681483766, "learning_rate": 8.870314413627991e-06, "loss": 0.5385, "step": 2976 }, { "epoch": 0.24, "grad_norm": 4.0531376937887655, "learning_rate": 8.869481517315427e-06, "loss": 0.6623, "step": 2977 }, { "epoch": 0.24, "grad_norm": 19.57774768012508, "learning_rate": 8.868648353210377e-06, "loss": 0.7447, "step": 2978 }, { "epoch": 0.24, "grad_norm": 2.809809088960798, "learning_rate": 8.867814921370502e-06, "loss": 0.783, "step": 2979 }, { "epoch": 0.24, "grad_norm": 6.083747142893606, "learning_rate": 8.866981221853482e-06, "loss": 0.7387, "step": 2980 }, { "epoch": 0.24, "grad_norm": 4.296696723523334, "learning_rate": 8.86614725471701e-06, "loss": 0.8645, "step": 2981 }, { "epoch": 0.24, "grad_norm": 6.718845781139819, "learning_rate": 8.865313020018806e-06, "loss": 0.7093, "step": 2982 }, { "epoch": 0.24, "grad_norm": 3.94522223345727, "learning_rate": 8.864478517816604e-06, "loss": 0.7054, "step": 2983 }, { "epoch": 0.24, "grad_norm": 3.743995145549366, "learning_rate": 8.863643748168156e-06, "loss": 0.6699, "step": 2984 }, { "epoch": 0.24, "grad_norm": 4.678538963630396, "learning_rate": 8.862808711131232e-06, "loss": 0.7078, "step": 2985 }, { "epoch": 0.24, "grad_norm": 4.512166415645682, "learning_rate": 8.861973406763623e-06, "loss": 0.7259, "step": 2986 }, { "epoch": 0.24, "grad_norm": 3.308639076101463, "learning_rate": 8.861137835123137e-06, "loss": 0.8633, "step": 2987 }, { "epoch": 0.24, "grad_norm": 3.1698731790083743, "learning_rate": 8.860301996267601e-06, "loss": 0.7326, "step": 2988 }, { "epoch": 0.24, "grad_norm": 5.7846675006469885, "learning_rate": 8.859465890254861e-06, "loss": 0.8694, "step": 2989 }, { "epoch": 0.24, "grad_norm": 3.878481918281038, "learning_rate": 8.85862951714278e-06, "loss": 0.7486, "step": 2990 }, { "epoch": 0.24, "grad_norm": 6.233066220053629, "learning_rate": 8.857792876989241e-06, "loss": 0.9758, "step": 2991 }, { "epoch": 0.24, "grad_norm": 8.267107087713777, "learning_rate": 8.856955969852144e-06, "loss": 0.6349, "step": 2992 }, { "epoch": 0.24, "grad_norm": 4.16452610056301, "learning_rate": 8.856118795789408e-06, "loss": 0.6864, "step": 2993 }, { "epoch": 0.24, "grad_norm": 4.079853891757035, "learning_rate": 8.85528135485897e-06, "loss": 0.8241, "step": 2994 }, { "epoch": 0.24, "grad_norm": 2.452062790799971, "learning_rate": 8.85444364711879e-06, "loss": 0.648, "step": 2995 }, { "epoch": 0.24, "grad_norm": 5.035225754085019, "learning_rate": 8.853605672626839e-06, "loss": 0.6989, "step": 2996 }, { "epoch": 0.24, "grad_norm": 2.9659680279723117, "learning_rate": 8.852767431441111e-06, "loss": 0.6898, "step": 2997 }, { "epoch": 0.24, "grad_norm": 3.1360219037988175, "learning_rate": 8.851928923619617e-06, "loss": 0.801, "step": 2998 }, { "epoch": 0.24, "grad_norm": 6.939176580779341, "learning_rate": 8.85109014922039e-06, "loss": 0.7802, "step": 2999 }, { "epoch": 0.24, "grad_norm": 8.694564045091749, "learning_rate": 8.850251108301473e-06, "loss": 0.6329, "step": 3000 }, { "epoch": 0.24, "grad_norm": 6.227734515933598, "learning_rate": 8.849411800920938e-06, "loss": 0.5915, "step": 3001 }, { "epoch": 0.24, "grad_norm": 3.841397543367415, "learning_rate": 8.848572227136869e-06, "loss": 0.4682, "step": 3002 }, { "epoch": 0.24, "grad_norm": 2.902602930789638, "learning_rate": 8.847732387007369e-06, "loss": 0.6879, "step": 3003 }, { "epoch": 0.24, "grad_norm": 7.734585232478385, "learning_rate": 8.84689228059056e-06, "loss": 0.8703, "step": 3004 }, { "epoch": 0.24, "grad_norm": 3.2350269726057577, "learning_rate": 8.846051907944582e-06, "loss": 0.7505, "step": 3005 }, { "epoch": 0.24, "grad_norm": 3.9041982976320346, "learning_rate": 8.845211269127597e-06, "loss": 0.7281, "step": 3006 }, { "epoch": 0.24, "grad_norm": 7.260643312028547, "learning_rate": 8.844370364197781e-06, "loss": 0.7002, "step": 3007 }, { "epoch": 0.24, "grad_norm": 5.850802894543463, "learning_rate": 8.843529193213327e-06, "loss": 0.7696, "step": 3008 }, { "epoch": 0.24, "grad_norm": 3.5835452071859226, "learning_rate": 8.842687756232454e-06, "loss": 0.8905, "step": 3009 }, { "epoch": 0.24, "grad_norm": 5.366428063749266, "learning_rate": 8.841846053313392e-06, "loss": 0.846, "step": 3010 }, { "epoch": 0.24, "grad_norm": 4.557842788932391, "learning_rate": 8.841004084514394e-06, "loss": 0.6322, "step": 3011 }, { "epoch": 0.24, "grad_norm": 3.611465150609721, "learning_rate": 8.840161849893729e-06, "loss": 0.8319, "step": 3012 }, { "epoch": 0.24, "grad_norm": 3.948823286750815, "learning_rate": 8.839319349509683e-06, "loss": 0.6801, "step": 3013 }, { "epoch": 0.24, "grad_norm": 3.4502409186427605, "learning_rate": 8.838476583420562e-06, "loss": 0.68, "step": 3014 }, { "epoch": 0.24, "grad_norm": 13.732684267483572, "learning_rate": 8.837633551684695e-06, "loss": 0.8244, "step": 3015 }, { "epoch": 0.24, "grad_norm": 4.5115760396705475, "learning_rate": 8.83679025436042e-06, "loss": 0.762, "step": 3016 }, { "epoch": 0.25, "grad_norm": 4.407557344691762, "learning_rate": 8.835946691506103e-06, "loss": 0.805, "step": 3017 }, { "epoch": 0.25, "grad_norm": 6.606794455494425, "learning_rate": 8.835102863180123e-06, "loss": 0.7429, "step": 3018 }, { "epoch": 0.25, "grad_norm": 5.271796741961873, "learning_rate": 8.834258769440875e-06, "loss": 0.5733, "step": 3019 }, { "epoch": 0.25, "grad_norm": 5.009153662451588, "learning_rate": 8.833414410346777e-06, "loss": 0.7196, "step": 3020 }, { "epoch": 0.25, "grad_norm": 3.5986463336224723, "learning_rate": 8.832569785956267e-06, "loss": 0.7625, "step": 3021 }, { "epoch": 0.25, "grad_norm": 5.645621611539132, "learning_rate": 8.831724896327794e-06, "loss": 0.7989, "step": 3022 }, { "epoch": 0.25, "grad_norm": 7.344755117388516, "learning_rate": 8.830879741519831e-06, "loss": 0.7602, "step": 3023 }, { "epoch": 0.25, "grad_norm": 3.009304169840582, "learning_rate": 8.830034321590871e-06, "loss": 0.7909, "step": 3024 }, { "epoch": 0.25, "grad_norm": 3.839214190877482, "learning_rate": 8.82918863659942e-06, "loss": 0.7278, "step": 3025 }, { "epoch": 0.25, "grad_norm": 4.697837699981018, "learning_rate": 8.828342686604004e-06, "loss": 0.7607, "step": 3026 }, { "epoch": 0.25, "grad_norm": 18.52590714108115, "learning_rate": 8.827496471663169e-06, "loss": 0.799, "step": 3027 }, { "epoch": 0.25, "grad_norm": 3.5046475953453884, "learning_rate": 8.826649991835476e-06, "loss": 0.8405, "step": 3028 }, { "epoch": 0.25, "grad_norm": 3.3402376933912796, "learning_rate": 8.825803247179512e-06, "loss": 0.6462, "step": 3029 }, { "epoch": 0.25, "grad_norm": 3.794294837183767, "learning_rate": 8.824956237753872e-06, "loss": 0.7486, "step": 3030 }, { "epoch": 0.25, "grad_norm": 4.738082352840565, "learning_rate": 8.824108963617177e-06, "loss": 0.5948, "step": 3031 }, { "epoch": 0.25, "grad_norm": 9.682229478473296, "learning_rate": 8.823261424828064e-06, "loss": 0.7001, "step": 3032 }, { "epoch": 0.25, "grad_norm": 2.9608653386945707, "learning_rate": 8.822413621445188e-06, "loss": 0.8597, "step": 3033 }, { "epoch": 0.25, "grad_norm": 5.439436375748, "learning_rate": 8.821565553527218e-06, "loss": 0.7269, "step": 3034 }, { "epoch": 0.25, "grad_norm": 5.9023024152372505, "learning_rate": 8.820717221132854e-06, "loss": 0.8707, "step": 3035 }, { "epoch": 0.25, "grad_norm": 10.05615509544385, "learning_rate": 8.819868624320797e-06, "loss": 0.7602, "step": 3036 }, { "epoch": 0.25, "grad_norm": 2.785187791692384, "learning_rate": 8.81901976314978e-06, "loss": 0.745, "step": 3037 }, { "epoch": 0.25, "grad_norm": 2.870028343890995, "learning_rate": 8.818170637678549e-06, "loss": 0.6221, "step": 3038 }, { "epoch": 0.25, "grad_norm": 3.532454302973348, "learning_rate": 8.817321247965872e-06, "loss": 0.6828, "step": 3039 }, { "epoch": 0.25, "grad_norm": 6.307726204717456, "learning_rate": 8.816471594070523e-06, "loss": 0.6429, "step": 3040 }, { "epoch": 0.25, "grad_norm": 3.1197549955973014, "learning_rate": 8.815621676051313e-06, "loss": 0.6803, "step": 3041 }, { "epoch": 0.25, "grad_norm": 5.556611672109788, "learning_rate": 8.814771493967058e-06, "loss": 0.8512, "step": 3042 }, { "epoch": 0.25, "grad_norm": 3.583272860994243, "learning_rate": 8.813921047876595e-06, "loss": 0.5224, "step": 3043 }, { "epoch": 0.25, "grad_norm": 3.086527891586726, "learning_rate": 8.813070337838781e-06, "loss": 0.6818, "step": 3044 }, { "epoch": 0.25, "grad_norm": 2.92353885695445, "learning_rate": 8.81221936391249e-06, "loss": 0.7864, "step": 3045 }, { "epoch": 0.25, "grad_norm": 2.9551810477045874, "learning_rate": 8.811368126156615e-06, "loss": 0.8103, "step": 3046 }, { "epoch": 0.25, "grad_norm": 3.9291030248161487, "learning_rate": 8.81051662463007e-06, "loss": 0.6173, "step": 3047 }, { "epoch": 0.25, "grad_norm": 5.970094486217373, "learning_rate": 8.809664859391778e-06, "loss": 0.6372, "step": 3048 }, { "epoch": 0.25, "grad_norm": 6.7523921063696655, "learning_rate": 8.808812830500693e-06, "loss": 0.6764, "step": 3049 }, { "epoch": 0.25, "grad_norm": 3.2139730372308084, "learning_rate": 8.807960538015777e-06, "loss": 0.856, "step": 3050 }, { "epoch": 0.25, "grad_norm": 11.041542864660634, "learning_rate": 8.807107981996014e-06, "loss": 0.6652, "step": 3051 }, { "epoch": 0.25, "grad_norm": 3.1807474015485133, "learning_rate": 8.806255162500407e-06, "loss": 0.748, "step": 3052 }, { "epoch": 0.25, "grad_norm": 5.8150175097872685, "learning_rate": 8.805402079587977e-06, "loss": 0.6305, "step": 3053 }, { "epoch": 0.25, "grad_norm": 3.6815465550837905, "learning_rate": 8.804548733317764e-06, "loss": 0.7747, "step": 3054 }, { "epoch": 0.25, "grad_norm": 4.547709117122394, "learning_rate": 8.803695123748821e-06, "loss": 0.8176, "step": 3055 }, { "epoch": 0.25, "grad_norm": 2.3666443745613903, "learning_rate": 8.802841250940226e-06, "loss": 0.7812, "step": 3056 }, { "epoch": 0.25, "grad_norm": 3.114873178015105, "learning_rate": 8.80198711495107e-06, "loss": 0.6454, "step": 3057 }, { "epoch": 0.25, "grad_norm": 3.7267090030878736, "learning_rate": 8.80113271584047e-06, "loss": 0.7653, "step": 3058 }, { "epoch": 0.25, "grad_norm": 5.7336090204671235, "learning_rate": 8.800278053667551e-06, "loss": 0.8391, "step": 3059 }, { "epoch": 0.25, "grad_norm": 4.146656996565322, "learning_rate": 8.799423128491463e-06, "loss": 0.7512, "step": 3060 }, { "epoch": 0.25, "grad_norm": 8.978857674046594, "learning_rate": 8.798567940371367e-06, "loss": 0.7533, "step": 3061 }, { "epoch": 0.25, "grad_norm": 8.32912900018707, "learning_rate": 8.797712489366456e-06, "loss": 0.6606, "step": 3062 }, { "epoch": 0.25, "grad_norm": 5.424232967561503, "learning_rate": 8.796856775535926e-06, "loss": 0.7162, "step": 3063 }, { "epoch": 0.25, "grad_norm": 2.8718379914775785, "learning_rate": 8.796000798939001e-06, "loss": 0.6549, "step": 3064 }, { "epoch": 0.25, "grad_norm": 4.387055998905367, "learning_rate": 8.795144559634921e-06, "loss": 0.7572, "step": 3065 }, { "epoch": 0.25, "grad_norm": 16.522730471824318, "learning_rate": 8.794288057682939e-06, "loss": 0.6019, "step": 3066 }, { "epoch": 0.25, "grad_norm": 2.968471855541699, "learning_rate": 8.793431293142334e-06, "loss": 0.6142, "step": 3067 }, { "epoch": 0.25, "grad_norm": 8.859673006858515, "learning_rate": 8.792574266072397e-06, "loss": 0.8155, "step": 3068 }, { "epoch": 0.25, "grad_norm": 11.576330758995297, "learning_rate": 8.791716976532441e-06, "loss": 0.5978, "step": 3069 }, { "epoch": 0.25, "grad_norm": 4.4253756691029, "learning_rate": 8.790859424581796e-06, "loss": 0.6893, "step": 3070 }, { "epoch": 0.25, "grad_norm": 4.015026491366955, "learning_rate": 8.79000161027981e-06, "loss": 0.789, "step": 3071 }, { "epoch": 0.25, "grad_norm": 3.137490299438896, "learning_rate": 8.789143533685847e-06, "loss": 0.6068, "step": 3072 }, { "epoch": 0.25, "grad_norm": 10.06986050159233, "learning_rate": 8.788285194859293e-06, "loss": 0.8089, "step": 3073 }, { "epoch": 0.25, "grad_norm": 5.8039239794254565, "learning_rate": 8.787426593859552e-06, "loss": 0.7599, "step": 3074 }, { "epoch": 0.25, "grad_norm": 3.812170928524484, "learning_rate": 8.786567730746043e-06, "loss": 0.6906, "step": 3075 }, { "epoch": 0.25, "grad_norm": 4.334727792341875, "learning_rate": 8.785708605578204e-06, "loss": 0.6469, "step": 3076 }, { "epoch": 0.25, "grad_norm": 4.100441618871479, "learning_rate": 8.784849218415494e-06, "loss": 0.7728, "step": 3077 }, { "epoch": 0.25, "grad_norm": 10.776677988372978, "learning_rate": 8.783989569317386e-06, "loss": 0.665, "step": 3078 }, { "epoch": 0.25, "grad_norm": 8.065157601787455, "learning_rate": 8.783129658343375e-06, "loss": 0.7224, "step": 3079 }, { "epoch": 0.25, "grad_norm": 9.754760023461866, "learning_rate": 8.78226948555297e-06, "loss": 0.6977, "step": 3080 }, { "epoch": 0.25, "grad_norm": 2.91142717873937, "learning_rate": 8.7814090510057e-06, "loss": 0.7452, "step": 3081 }, { "epoch": 0.25, "grad_norm": 5.358599442144094, "learning_rate": 8.780548354761117e-06, "loss": 0.7386, "step": 3082 }, { "epoch": 0.25, "grad_norm": 6.1770719932293385, "learning_rate": 8.77968739687878e-06, "loss": 0.6611, "step": 3083 }, { "epoch": 0.25, "grad_norm": 5.094304578414262, "learning_rate": 8.778826177418279e-06, "loss": 0.6689, "step": 3084 }, { "epoch": 0.25, "grad_norm": 4.0009570657251805, "learning_rate": 8.777964696439211e-06, "loss": 0.8095, "step": 3085 }, { "epoch": 0.25, "grad_norm": 3.538530084619795, "learning_rate": 8.777102954001199e-06, "loss": 0.7265, "step": 3086 }, { "epoch": 0.25, "grad_norm": 3.935273935837516, "learning_rate": 8.776240950163881e-06, "loss": 0.7395, "step": 3087 }, { "epoch": 0.25, "grad_norm": 4.920882725364082, "learning_rate": 8.77537868498691e-06, "loss": 0.73, "step": 3088 }, { "epoch": 0.25, "grad_norm": 4.464398557139395, "learning_rate": 8.774516158529964e-06, "loss": 0.7222, "step": 3089 }, { "epoch": 0.25, "grad_norm": 3.519622405821405, "learning_rate": 8.773653370852732e-06, "loss": 0.6436, "step": 3090 }, { "epoch": 0.25, "grad_norm": 2.6481939261108702, "learning_rate": 8.772790322014928e-06, "loss": 0.6496, "step": 3091 }, { "epoch": 0.25, "grad_norm": 6.903381244721313, "learning_rate": 8.771927012076276e-06, "loss": 0.7779, "step": 3092 }, { "epoch": 0.25, "grad_norm": 6.087918109238199, "learning_rate": 8.771063441096527e-06, "loss": 0.6783, "step": 3093 }, { "epoch": 0.25, "grad_norm": 2.957922211199016, "learning_rate": 8.770199609135441e-06, "loss": 0.6523, "step": 3094 }, { "epoch": 0.25, "grad_norm": 4.841301250019361, "learning_rate": 8.769335516252803e-06, "loss": 0.5435, "step": 3095 }, { "epoch": 0.25, "grad_norm": 21.38121476483069, "learning_rate": 8.768471162508416e-06, "loss": 0.6877, "step": 3096 }, { "epoch": 0.25, "grad_norm": 2.888489436387813, "learning_rate": 8.767606547962095e-06, "loss": 0.6186, "step": 3097 }, { "epoch": 0.25, "grad_norm": 3.271647791303926, "learning_rate": 8.766741672673677e-06, "loss": 0.8017, "step": 3098 }, { "epoch": 0.25, "grad_norm": 3.585655721670914, "learning_rate": 8.76587653670302e-06, "loss": 0.7705, "step": 3099 }, { "epoch": 0.25, "grad_norm": 11.770367549933098, "learning_rate": 8.765011140109993e-06, "loss": 0.8647, "step": 3100 }, { "epoch": 0.25, "grad_norm": 4.691469114463657, "learning_rate": 8.76414548295449e-06, "loss": 0.7185, "step": 3101 }, { "epoch": 0.25, "grad_norm": 3.425361225456822, "learning_rate": 8.763279565296417e-06, "loss": 0.6477, "step": 3102 }, { "epoch": 0.25, "grad_norm": 3.0483154802755688, "learning_rate": 8.762413387195702e-06, "loss": 0.6128, "step": 3103 }, { "epoch": 0.25, "grad_norm": 14.842809738947048, "learning_rate": 8.761546948712293e-06, "loss": 0.7448, "step": 3104 }, { "epoch": 0.25, "grad_norm": 6.164015345671067, "learning_rate": 8.760680249906149e-06, "loss": 0.7513, "step": 3105 }, { "epoch": 0.25, "grad_norm": 4.358286757291583, "learning_rate": 8.759813290837254e-06, "loss": 0.8066, "step": 3106 }, { "epoch": 0.25, "grad_norm": 4.630388700537639, "learning_rate": 8.758946071565605e-06, "loss": 0.8617, "step": 3107 }, { "epoch": 0.25, "grad_norm": 5.078490895055653, "learning_rate": 8.758078592151218e-06, "loss": 0.8909, "step": 3108 }, { "epoch": 0.25, "grad_norm": 2.9463029330233406, "learning_rate": 8.75721085265413e-06, "loss": 0.7876, "step": 3109 }, { "epoch": 0.25, "grad_norm": 6.116187620171812, "learning_rate": 8.756342853134394e-06, "loss": 0.7866, "step": 3110 }, { "epoch": 0.25, "grad_norm": 7.0949835345468335, "learning_rate": 8.75547459365208e-06, "loss": 0.6554, "step": 3111 }, { "epoch": 0.25, "grad_norm": 7.547951850207582, "learning_rate": 8.75460607426728e-06, "loss": 0.6369, "step": 3112 }, { "epoch": 0.25, "grad_norm": 10.377164443560627, "learning_rate": 8.753737295040097e-06, "loss": 0.7811, "step": 3113 }, { "epoch": 0.25, "grad_norm": 13.518420405245568, "learning_rate": 8.752868256030658e-06, "loss": 0.7348, "step": 3114 }, { "epoch": 0.25, "grad_norm": 4.069145433334808, "learning_rate": 8.751998957299105e-06, "loss": 0.745, "step": 3115 }, { "epoch": 0.25, "grad_norm": 3.7879224799977327, "learning_rate": 8.7511293989056e-06, "loss": 0.7569, "step": 3116 }, { "epoch": 0.25, "grad_norm": 3.6759781094322803, "learning_rate": 8.750259580910323e-06, "loss": 0.7986, "step": 3117 }, { "epoch": 0.25, "grad_norm": 6.698303676964798, "learning_rate": 8.749389503373467e-06, "loss": 0.6889, "step": 3118 }, { "epoch": 0.25, "grad_norm": 3.906326166244853, "learning_rate": 8.748519166355251e-06, "loss": 0.6908, "step": 3119 }, { "epoch": 0.25, "grad_norm": 18.650861876775295, "learning_rate": 8.747648569915905e-06, "loss": 0.5615, "step": 3120 }, { "epoch": 0.25, "grad_norm": 3.635344764721767, "learning_rate": 8.746777714115681e-06, "loss": 0.7414, "step": 3121 }, { "epoch": 0.25, "grad_norm": 8.318153874007834, "learning_rate": 8.745906599014848e-06, "loss": 0.7507, "step": 3122 }, { "epoch": 0.25, "grad_norm": 4.020214871131042, "learning_rate": 8.745035224673693e-06, "loss": 0.7481, "step": 3123 }, { "epoch": 0.25, "grad_norm": 3.574527386136311, "learning_rate": 8.744163591152517e-06, "loss": 0.5815, "step": 3124 }, { "epoch": 0.25, "grad_norm": 3.5863367982424403, "learning_rate": 8.743291698511646e-06, "loss": 0.7305, "step": 3125 }, { "epoch": 0.25, "grad_norm": 5.451184676294389, "learning_rate": 8.742419546811423e-06, "loss": 0.7432, "step": 3126 }, { "epoch": 0.25, "grad_norm": 3.062352225952689, "learning_rate": 8.7415471361122e-06, "loss": 0.8169, "step": 3127 }, { "epoch": 0.25, "grad_norm": 3.5007176115921355, "learning_rate": 8.740674466474357e-06, "loss": 0.6944, "step": 3128 }, { "epoch": 0.25, "grad_norm": 7.740229179001324, "learning_rate": 8.739801537958289e-06, "loss": 0.6355, "step": 3129 }, { "epoch": 0.25, "grad_norm": 4.760860305494583, "learning_rate": 8.738928350624405e-06, "loss": 0.8089, "step": 3130 }, { "epoch": 0.25, "grad_norm": 8.48155766612004, "learning_rate": 8.738054904533138e-06, "loss": 0.794, "step": 3131 }, { "epoch": 0.25, "grad_norm": 4.647497005446932, "learning_rate": 8.737181199744936e-06, "loss": 0.7155, "step": 3132 }, { "epoch": 0.25, "grad_norm": 4.099857403437535, "learning_rate": 8.73630723632026e-06, "loss": 0.6637, "step": 3133 }, { "epoch": 0.25, "grad_norm": 3.6104204533974054, "learning_rate": 8.735433014319602e-06, "loss": 0.8782, "step": 3134 }, { "epoch": 0.25, "grad_norm": 7.023665806477569, "learning_rate": 8.734558533803456e-06, "loss": 0.7411, "step": 3135 }, { "epoch": 0.25, "grad_norm": 3.5091305472528966, "learning_rate": 8.733683794832346e-06, "loss": 0.8685, "step": 3136 }, { "epoch": 0.25, "grad_norm": 2.7535990976611684, "learning_rate": 8.732808797466808e-06, "loss": 0.7291, "step": 3137 }, { "epoch": 0.25, "grad_norm": 4.460848618672478, "learning_rate": 8.731933541767396e-06, "loss": 0.8162, "step": 3138 }, { "epoch": 0.25, "grad_norm": 3.627034661632686, "learning_rate": 8.731058027794688e-06, "loss": 0.6, "step": 3139 }, { "epoch": 0.26, "grad_norm": 3.5932830331587238, "learning_rate": 8.73018225560927e-06, "loss": 0.6503, "step": 3140 }, { "epoch": 0.26, "grad_norm": 4.660642123583916, "learning_rate": 8.729306225271752e-06, "loss": 0.6394, "step": 3141 }, { "epoch": 0.26, "grad_norm": 5.6774384893824585, "learning_rate": 8.728429936842762e-06, "loss": 0.8571, "step": 3142 }, { "epoch": 0.26, "grad_norm": 8.630971072221547, "learning_rate": 8.727553390382946e-06, "loss": 0.6036, "step": 3143 }, { "epoch": 0.26, "grad_norm": 3.8080765109471972, "learning_rate": 8.726676585952963e-06, "loss": 0.7048, "step": 3144 }, { "epoch": 0.26, "grad_norm": 3.054423786006399, "learning_rate": 8.725799523613494e-06, "loss": 0.7577, "step": 3145 }, { "epoch": 0.26, "grad_norm": 2.9213456980334045, "learning_rate": 8.72492220342524e-06, "loss": 0.9067, "step": 3146 }, { "epoch": 0.26, "grad_norm": 2.4419951437245992, "learning_rate": 8.724044625448915e-06, "loss": 0.6094, "step": 3147 }, { "epoch": 0.26, "grad_norm": 4.994992269704754, "learning_rate": 8.723166789745255e-06, "loss": 0.749, "step": 3148 }, { "epoch": 0.26, "grad_norm": 5.866976280194647, "learning_rate": 8.722288696375009e-06, "loss": 0.7469, "step": 3149 }, { "epoch": 0.26, "grad_norm": 2.8070344942227035, "learning_rate": 8.721410345398946e-06, "loss": 0.8725, "step": 3150 }, { "epoch": 0.26, "grad_norm": 4.4476406179513885, "learning_rate": 8.720531736877858e-06, "loss": 0.7973, "step": 3151 }, { "epoch": 0.26, "grad_norm": 5.03799945204716, "learning_rate": 8.719652870872546e-06, "loss": 0.805, "step": 3152 }, { "epoch": 0.26, "grad_norm": 5.37050191752738, "learning_rate": 8.718773747443834e-06, "loss": 0.6877, "step": 3153 }, { "epoch": 0.26, "grad_norm": 3.5957418475430787, "learning_rate": 8.717894366652564e-06, "loss": 0.6798, "step": 3154 }, { "epoch": 0.26, "grad_norm": 10.372394165580374, "learning_rate": 8.717014728559594e-06, "loss": 0.7821, "step": 3155 }, { "epoch": 0.26, "grad_norm": 7.857694882228299, "learning_rate": 8.716134833225803e-06, "loss": 0.6747, "step": 3156 }, { "epoch": 0.26, "grad_norm": 3.722154342366707, "learning_rate": 8.715254680712079e-06, "loss": 0.6145, "step": 3157 }, { "epoch": 0.26, "grad_norm": 3.011208930046009, "learning_rate": 8.714374271079339e-06, "loss": 0.7584, "step": 3158 }, { "epoch": 0.26, "grad_norm": 3.139584702264579, "learning_rate": 8.713493604388513e-06, "loss": 0.7627, "step": 3159 }, { "epoch": 0.26, "grad_norm": 4.426293700278985, "learning_rate": 8.712612680700545e-06, "loss": 0.6592, "step": 3160 }, { "epoch": 0.26, "grad_norm": 3.6095904479113576, "learning_rate": 8.711731500076405e-06, "loss": 0.8101, "step": 3161 }, { "epoch": 0.26, "grad_norm": 4.897542453631338, "learning_rate": 8.710850062577074e-06, "loss": 0.6313, "step": 3162 }, { "epoch": 0.26, "grad_norm": 3.380202800138096, "learning_rate": 8.709968368263553e-06, "loss": 0.6786, "step": 3163 }, { "epoch": 0.26, "grad_norm": 3.9420736333133997, "learning_rate": 8.709086417196862e-06, "loss": 0.6966, "step": 3164 }, { "epoch": 0.26, "grad_norm": 3.01766478559132, "learning_rate": 8.708204209438034e-06, "loss": 0.706, "step": 3165 }, { "epoch": 0.26, "grad_norm": 11.739275142211664, "learning_rate": 8.707321745048127e-06, "loss": 0.7194, "step": 3166 }, { "epoch": 0.26, "grad_norm": 3.6215568506631444, "learning_rate": 8.706439024088213e-06, "loss": 0.7671, "step": 3167 }, { "epoch": 0.26, "grad_norm": 3.100253708566771, "learning_rate": 8.705556046619382e-06, "loss": 0.7356, "step": 3168 }, { "epoch": 0.26, "grad_norm": 7.239927471366727, "learning_rate": 8.704672812702737e-06, "loss": 0.7065, "step": 3169 }, { "epoch": 0.26, "grad_norm": 6.508042993680408, "learning_rate": 8.70378932239941e-06, "loss": 0.6799, "step": 3170 }, { "epoch": 0.26, "grad_norm": 3.3221637334496372, "learning_rate": 8.702905575770539e-06, "loss": 0.8029, "step": 3171 }, { "epoch": 0.26, "grad_norm": 3.236179506992233, "learning_rate": 8.702021572877288e-06, "loss": 0.6837, "step": 3172 }, { "epoch": 0.26, "grad_norm": 4.430688642614841, "learning_rate": 8.701137313780833e-06, "loss": 0.7404, "step": 3173 }, { "epoch": 0.26, "grad_norm": 7.043340689823841, "learning_rate": 8.700252798542372e-06, "loss": 0.8444, "step": 3174 }, { "epoch": 0.26, "grad_norm": 3.4045707643206984, "learning_rate": 8.699368027223118e-06, "loss": 0.7279, "step": 3175 }, { "epoch": 0.26, "grad_norm": 3.139954314971914, "learning_rate": 8.698482999884304e-06, "loss": 0.7152, "step": 3176 }, { "epoch": 0.26, "grad_norm": 3.7429082791425596, "learning_rate": 8.697597716587181e-06, "loss": 0.5052, "step": 3177 }, { "epoch": 0.26, "grad_norm": 10.686945224955448, "learning_rate": 8.696712177393011e-06, "loss": 0.7174, "step": 3178 }, { "epoch": 0.26, "grad_norm": 8.892058849149779, "learning_rate": 8.695826382363083e-06, "loss": 0.7848, "step": 3179 }, { "epoch": 0.26, "grad_norm": 4.269392998250927, "learning_rate": 8.694940331558699e-06, "loss": 0.7712, "step": 3180 }, { "epoch": 0.26, "grad_norm": 3.9938333471267473, "learning_rate": 8.694054025041178e-06, "loss": 0.7543, "step": 3181 }, { "epoch": 0.26, "grad_norm": 5.5194613206959655, "learning_rate": 8.693167462871859e-06, "loss": 0.5992, "step": 3182 }, { "epoch": 0.26, "grad_norm": 3.0265730767118795, "learning_rate": 8.692280645112097e-06, "loss": 0.7448, "step": 3183 }, { "epoch": 0.26, "grad_norm": 5.145453021093892, "learning_rate": 8.691393571823266e-06, "loss": 0.7072, "step": 3184 }, { "epoch": 0.26, "grad_norm": 5.465871706903786, "learning_rate": 8.690506243066757e-06, "loss": 0.6629, "step": 3185 }, { "epoch": 0.26, "grad_norm": 4.006342358675749, "learning_rate": 8.68961865890398e-06, "loss": 0.7542, "step": 3186 }, { "epoch": 0.26, "grad_norm": 3.134299434254786, "learning_rate": 8.688730819396358e-06, "loss": 0.6575, "step": 3187 }, { "epoch": 0.26, "grad_norm": 5.150631234628473, "learning_rate": 8.687842724605338e-06, "loss": 0.7069, "step": 3188 }, { "epoch": 0.26, "grad_norm": 3.55472783469278, "learning_rate": 8.686954374592382e-06, "loss": 0.7212, "step": 3189 }, { "epoch": 0.26, "grad_norm": 3.252418671539824, "learning_rate": 8.686065769418967e-06, "loss": 0.8669, "step": 3190 }, { "epoch": 0.26, "grad_norm": 5.523631755961889, "learning_rate": 8.68517690914659e-06, "loss": 0.6601, "step": 3191 }, { "epoch": 0.26, "grad_norm": 4.459577519303184, "learning_rate": 8.68428779383677e-06, "loss": 0.8299, "step": 3192 }, { "epoch": 0.26, "grad_norm": 3.8596646274970308, "learning_rate": 8.683398423551034e-06, "loss": 0.7054, "step": 3193 }, { "epoch": 0.26, "grad_norm": 3.336044153338888, "learning_rate": 8.682508798350937e-06, "loss": 0.7074, "step": 3194 }, { "epoch": 0.26, "grad_norm": 13.8381498669211, "learning_rate": 8.681618918298043e-06, "loss": 0.6351, "step": 3195 }, { "epoch": 0.26, "grad_norm": 7.032259708332252, "learning_rate": 8.680728783453937e-06, "loss": 0.5975, "step": 3196 }, { "epoch": 0.26, "grad_norm": 18.368391685083438, "learning_rate": 8.679838393880224e-06, "loss": 0.5734, "step": 3197 }, { "epoch": 0.26, "grad_norm": 3.98708049777821, "learning_rate": 8.678947749638525e-06, "loss": 0.7007, "step": 3198 }, { "epoch": 0.26, "grad_norm": 3.3257990633485464, "learning_rate": 8.678056850790477e-06, "loss": 0.7348, "step": 3199 }, { "epoch": 0.26, "grad_norm": 6.545915739914991, "learning_rate": 8.677165697397736e-06, "loss": 0.7186, "step": 3200 }, { "epoch": 0.26, "grad_norm": 3.2962723461974264, "learning_rate": 8.676274289521976e-06, "loss": 0.8171, "step": 3201 }, { "epoch": 0.26, "grad_norm": 4.266180606392446, "learning_rate": 8.675382627224886e-06, "loss": 0.6979, "step": 3202 }, { "epoch": 0.26, "grad_norm": 5.99352455597282, "learning_rate": 8.674490710568176e-06, "loss": 0.8087, "step": 3203 }, { "epoch": 0.26, "grad_norm": 3.612149124374396, "learning_rate": 8.673598539613573e-06, "loss": 0.7438, "step": 3204 }, { "epoch": 0.26, "grad_norm": 3.2998574631719113, "learning_rate": 8.67270611442282e-06, "loss": 0.6706, "step": 3205 }, { "epoch": 0.26, "grad_norm": 4.300327282534021, "learning_rate": 8.671813435057678e-06, "loss": 0.8463, "step": 3206 }, { "epoch": 0.26, "grad_norm": 9.340763817220994, "learning_rate": 8.670920501579928e-06, "loss": 0.765, "step": 3207 }, { "epoch": 0.26, "grad_norm": 3.205538070884684, "learning_rate": 8.670027314051364e-06, "loss": 0.8013, "step": 3208 }, { "epoch": 0.26, "grad_norm": 3.5057415428389707, "learning_rate": 8.669133872533804e-06, "loss": 0.6927, "step": 3209 }, { "epoch": 0.26, "grad_norm": 3.2638647950868855, "learning_rate": 8.668240177089074e-06, "loss": 0.7248, "step": 3210 }, { "epoch": 0.26, "grad_norm": 2.866964393651608, "learning_rate": 8.667346227779028e-06, "loss": 0.7641, "step": 3211 }, { "epoch": 0.26, "grad_norm": 20.584110147417896, "learning_rate": 8.666452024665533e-06, "loss": 0.7217, "step": 3212 }, { "epoch": 0.26, "grad_norm": 2.6132459682668814, "learning_rate": 8.66555756781047e-06, "loss": 0.7023, "step": 3213 }, { "epoch": 0.26, "grad_norm": 2.519297971317584, "learning_rate": 8.664662857275744e-06, "loss": 0.6993, "step": 3214 }, { "epoch": 0.26, "grad_norm": 3.4265703667495155, "learning_rate": 8.663767893123272e-06, "loss": 0.6637, "step": 3215 }, { "epoch": 0.26, "grad_norm": 8.205509648102641, "learning_rate": 8.662872675414993e-06, "loss": 0.6888, "step": 3216 }, { "epoch": 0.26, "grad_norm": 3.419030151415074, "learning_rate": 8.661977204212864e-06, "loss": 0.665, "step": 3217 }, { "epoch": 0.26, "grad_norm": 17.903286004744963, "learning_rate": 8.661081479578852e-06, "loss": 0.7801, "step": 3218 }, { "epoch": 0.26, "grad_norm": 3.3684227355501983, "learning_rate": 8.660185501574952e-06, "loss": 0.7133, "step": 3219 }, { "epoch": 0.26, "grad_norm": 3.6874838253854554, "learning_rate": 8.659289270263167e-06, "loss": 0.7478, "step": 3220 }, { "epoch": 0.26, "grad_norm": 3.185170736521441, "learning_rate": 8.658392785705525e-06, "loss": 0.8235, "step": 3221 }, { "epoch": 0.26, "grad_norm": 3.7015605614848512, "learning_rate": 8.657496047964066e-06, "loss": 0.8226, "step": 3222 }, { "epoch": 0.26, "grad_norm": 3.6710987725206263, "learning_rate": 8.656599057100853e-06, "loss": 0.4764, "step": 3223 }, { "epoch": 0.26, "grad_norm": 4.79206371788448, "learning_rate": 8.655701813177959e-06, "loss": 0.8447, "step": 3224 }, { "epoch": 0.26, "grad_norm": 3.5200200680803624, "learning_rate": 8.65480431625748e-06, "loss": 0.8119, "step": 3225 }, { "epoch": 0.26, "grad_norm": 3.0744180472335843, "learning_rate": 8.653906566401533e-06, "loss": 0.7374, "step": 3226 }, { "epoch": 0.26, "grad_norm": 2.6018496170031113, "learning_rate": 8.653008563672242e-06, "loss": 0.7083, "step": 3227 }, { "epoch": 0.26, "grad_norm": 2.698042521378998, "learning_rate": 8.65211030813176e-06, "loss": 0.7159, "step": 3228 }, { "epoch": 0.26, "grad_norm": 3.7187653352667662, "learning_rate": 8.651211799842248e-06, "loss": 0.8173, "step": 3229 }, { "epoch": 0.26, "grad_norm": 3.73243687165133, "learning_rate": 8.65031303886589e-06, "loss": 0.7924, "step": 3230 }, { "epoch": 0.26, "grad_norm": 2.9375138583642664, "learning_rate": 8.649414025264884e-06, "loss": 0.6647, "step": 3231 }, { "epoch": 0.26, "grad_norm": 6.0223365449654445, "learning_rate": 8.64851475910145e-06, "loss": 0.6637, "step": 3232 }, { "epoch": 0.26, "grad_norm": 16.04673929851229, "learning_rate": 8.647615240437821e-06, "loss": 0.7696, "step": 3233 }, { "epoch": 0.26, "grad_norm": 3.6864006100333637, "learning_rate": 8.64671546933625e-06, "loss": 0.6658, "step": 3234 }, { "epoch": 0.26, "grad_norm": 4.075556435914477, "learning_rate": 8.645815445859008e-06, "loss": 0.8458, "step": 3235 }, { "epoch": 0.26, "grad_norm": 11.6625120687269, "learning_rate": 8.644915170068382e-06, "loss": 0.5764, "step": 3236 }, { "epoch": 0.26, "grad_norm": 5.421487139734052, "learning_rate": 8.644014642026673e-06, "loss": 0.9108, "step": 3237 }, { "epoch": 0.26, "grad_norm": 3.328575562706458, "learning_rate": 8.643113861796209e-06, "loss": 0.5935, "step": 3238 }, { "epoch": 0.26, "grad_norm": 4.387767583405524, "learning_rate": 8.642212829439325e-06, "loss": 0.8214, "step": 3239 }, { "epoch": 0.26, "grad_norm": 3.680973041559468, "learning_rate": 8.64131154501838e-06, "loss": 0.7718, "step": 3240 }, { "epoch": 0.26, "grad_norm": 6.230458925054092, "learning_rate": 8.640410008595748e-06, "loss": 0.6446, "step": 3241 }, { "epoch": 0.26, "grad_norm": 4.240807683835683, "learning_rate": 8.639508220233822e-06, "loss": 0.8377, "step": 3242 }, { "epoch": 0.26, "grad_norm": 3.451694084035272, "learning_rate": 8.638606179995013e-06, "loss": 0.588, "step": 3243 }, { "epoch": 0.26, "grad_norm": 2.7660389044695433, "learning_rate": 8.637703887941744e-06, "loss": 0.9464, "step": 3244 }, { "epoch": 0.26, "grad_norm": 3.47333453510192, "learning_rate": 8.63680134413646e-06, "loss": 0.7578, "step": 3245 }, { "epoch": 0.26, "grad_norm": 2.5437573262789854, "learning_rate": 8.635898548641627e-06, "loss": 0.8159, "step": 3246 }, { "epoch": 0.26, "grad_norm": 11.893660589589684, "learning_rate": 8.634995501519718e-06, "loss": 0.6712, "step": 3247 }, { "epoch": 0.26, "grad_norm": 4.116797672031491, "learning_rate": 8.634092202833233e-06, "loss": 0.741, "step": 3248 }, { "epoch": 0.26, "grad_norm": 6.167257437660994, "learning_rate": 8.633188652644686e-06, "loss": 0.8481, "step": 3249 }, { "epoch": 0.26, "grad_norm": 19.25051829808035, "learning_rate": 8.632284851016607e-06, "loss": 0.625, "step": 3250 }, { "epoch": 0.26, "grad_norm": 3.1216677192328364, "learning_rate": 8.631380798011546e-06, "loss": 0.7214, "step": 3251 }, { "epoch": 0.26, "grad_norm": 4.089922296354801, "learning_rate": 8.63047649369207e-06, "loss": 0.6974, "step": 3252 }, { "epoch": 0.26, "grad_norm": 6.084856075812232, "learning_rate": 8.62957193812076e-06, "loss": 0.8369, "step": 3253 }, { "epoch": 0.26, "grad_norm": 3.159501872481622, "learning_rate": 8.628667131360218e-06, "loss": 0.7078, "step": 3254 }, { "epoch": 0.26, "grad_norm": 3.514417320153134, "learning_rate": 8.627762073473063e-06, "loss": 0.6147, "step": 3255 }, { "epoch": 0.26, "grad_norm": 2.8403537692686958, "learning_rate": 8.62685676452193e-06, "loss": 0.7494, "step": 3256 }, { "epoch": 0.26, "grad_norm": 4.57325391759759, "learning_rate": 8.625951204569473e-06, "loss": 0.7323, "step": 3257 }, { "epoch": 0.26, "grad_norm": 7.413588873273317, "learning_rate": 8.62504539367836e-06, "loss": 0.8129, "step": 3258 }, { "epoch": 0.26, "grad_norm": 11.650052778296885, "learning_rate": 8.624139331911283e-06, "loss": 0.8058, "step": 3259 }, { "epoch": 0.26, "grad_norm": 3.934127231152171, "learning_rate": 8.623233019330943e-06, "loss": 0.6543, "step": 3260 }, { "epoch": 0.26, "grad_norm": 2.632913542481407, "learning_rate": 8.622326456000065e-06, "loss": 0.6536, "step": 3261 }, { "epoch": 0.26, "grad_norm": 3.5199339880452825, "learning_rate": 8.621419641981387e-06, "loss": 0.7564, "step": 3262 }, { "epoch": 0.27, "grad_norm": 2.6632097472282963, "learning_rate": 8.620512577337668e-06, "loss": 0.7519, "step": 3263 }, { "epoch": 0.27, "grad_norm": 3.4141906149158503, "learning_rate": 8.619605262131683e-06, "loss": 0.8445, "step": 3264 }, { "epoch": 0.27, "grad_norm": 3.711403876788153, "learning_rate": 8.618697696426223e-06, "loss": 0.8001, "step": 3265 }, { "epoch": 0.27, "grad_norm": 2.6086964718147536, "learning_rate": 8.617789880284097e-06, "loss": 0.7267, "step": 3266 }, { "epoch": 0.27, "grad_norm": 3.2318309937701994, "learning_rate": 8.61688181376813e-06, "loss": 0.7547, "step": 3267 }, { "epoch": 0.27, "grad_norm": 9.90480681067458, "learning_rate": 8.61597349694117e-06, "loss": 0.86, "step": 3268 }, { "epoch": 0.27, "grad_norm": 3.73241757591505, "learning_rate": 8.615064929866074e-06, "loss": 0.7248, "step": 3269 }, { "epoch": 0.27, "grad_norm": 4.972615979290784, "learning_rate": 8.614156112605725e-06, "loss": 0.6817, "step": 3270 }, { "epoch": 0.27, "grad_norm": 3.398181059119163, "learning_rate": 8.613247045223014e-06, "loss": 0.7055, "step": 3271 }, { "epoch": 0.27, "grad_norm": 2.7709866941183368, "learning_rate": 8.61233772778086e-06, "loss": 0.6862, "step": 3272 }, { "epoch": 0.27, "grad_norm": 3.5354902840870133, "learning_rate": 8.611428160342185e-06, "loss": 0.8771, "step": 3273 }, { "epoch": 0.27, "grad_norm": 2.4969323475028062, "learning_rate": 8.610518342969947e-06, "loss": 0.7203, "step": 3274 }, { "epoch": 0.27, "grad_norm": 3.6404422775039387, "learning_rate": 8.609608275727102e-06, "loss": 0.81, "step": 3275 }, { "epoch": 0.27, "grad_norm": 3.7932823084202294, "learning_rate": 8.608697958676638e-06, "loss": 0.5665, "step": 3276 }, { "epoch": 0.27, "grad_norm": 3.253721430568261, "learning_rate": 8.607787391881552e-06, "loss": 0.7267, "step": 3277 }, { "epoch": 0.27, "grad_norm": 5.208576658088399, "learning_rate": 8.606876575404863e-06, "loss": 0.6558, "step": 3278 }, { "epoch": 0.27, "grad_norm": 3.663975777075048, "learning_rate": 8.605965509309605e-06, "loss": 0.6548, "step": 3279 }, { "epoch": 0.27, "grad_norm": 3.6859607417409617, "learning_rate": 8.605054193658827e-06, "loss": 0.7854, "step": 3280 }, { "epoch": 0.27, "grad_norm": 3.011007018139226, "learning_rate": 8.604142628515602e-06, "loss": 0.7417, "step": 3281 }, { "epoch": 0.27, "grad_norm": 3.1255284155438243, "learning_rate": 8.60323081394301e-06, "loss": 0.6975, "step": 3282 }, { "epoch": 0.27, "grad_norm": 4.265618960931716, "learning_rate": 8.60231875000416e-06, "loss": 0.6337, "step": 3283 }, { "epoch": 0.27, "grad_norm": 7.015580863351832, "learning_rate": 8.60140643676217e-06, "loss": 0.8029, "step": 3284 }, { "epoch": 0.27, "grad_norm": 4.312401811027869, "learning_rate": 8.600493874280179e-06, "loss": 0.7472, "step": 3285 }, { "epoch": 0.27, "grad_norm": 3.1523141103985455, "learning_rate": 8.59958106262134e-06, "loss": 0.6893, "step": 3286 }, { "epoch": 0.27, "grad_norm": 3.0021671163905306, "learning_rate": 8.598668001848828e-06, "loss": 0.8166, "step": 3287 }, { "epoch": 0.27, "grad_norm": 3.889461048343854, "learning_rate": 8.59775469202583e-06, "loss": 0.7855, "step": 3288 }, { "epoch": 0.27, "grad_norm": 11.14539313668885, "learning_rate": 8.596841133215554e-06, "loss": 0.8144, "step": 3289 }, { "epoch": 0.27, "grad_norm": 2.951924747837581, "learning_rate": 8.595927325481227e-06, "loss": 0.7788, "step": 3290 }, { "epoch": 0.27, "grad_norm": 2.5379457137799064, "learning_rate": 8.595013268886083e-06, "loss": 0.8488, "step": 3291 }, { "epoch": 0.27, "grad_norm": 3.0527164699105014, "learning_rate": 8.594098963493387e-06, "loss": 0.7177, "step": 3292 }, { "epoch": 0.27, "grad_norm": 2.9382365798445327, "learning_rate": 8.593184409366411e-06, "loss": 0.8075, "step": 3293 }, { "epoch": 0.27, "grad_norm": 6.308891412264307, "learning_rate": 8.592269606568451e-06, "loss": 0.6582, "step": 3294 }, { "epoch": 0.27, "grad_norm": 3.114670097246518, "learning_rate": 8.591354555162813e-06, "loss": 0.8553, "step": 3295 }, { "epoch": 0.27, "grad_norm": 3.5803268420882426, "learning_rate": 8.59043925521283e-06, "loss": 0.7822, "step": 3296 }, { "epoch": 0.27, "grad_norm": 2.594740443503019, "learning_rate": 8.589523706781841e-06, "loss": 0.7845, "step": 3297 }, { "epoch": 0.27, "grad_norm": 3.107354089805622, "learning_rate": 8.588607909933211e-06, "loss": 0.8989, "step": 3298 }, { "epoch": 0.27, "grad_norm": 2.7220507819778, "learning_rate": 8.587691864730316e-06, "loss": 0.7899, "step": 3299 }, { "epoch": 0.27, "grad_norm": 4.731172012254202, "learning_rate": 8.586775571236557e-06, "loss": 0.7715, "step": 3300 }, { "epoch": 0.27, "grad_norm": 3.859806549335766, "learning_rate": 8.585859029515342e-06, "loss": 0.8083, "step": 3301 }, { "epoch": 0.27, "grad_norm": 3.2749258568492525, "learning_rate": 8.584942239630105e-06, "loss": 0.687, "step": 3302 }, { "epoch": 0.27, "grad_norm": 3.598193068305734, "learning_rate": 8.584025201644292e-06, "loss": 0.6268, "step": 3303 }, { "epoch": 0.27, "grad_norm": 2.813056064809422, "learning_rate": 8.583107915621367e-06, "loss": 0.7308, "step": 3304 }, { "epoch": 0.27, "grad_norm": 4.024180443138766, "learning_rate": 8.582190381624814e-06, "loss": 0.7338, "step": 3305 }, { "epoch": 0.27, "grad_norm": 4.212881623334999, "learning_rate": 8.581272599718131e-06, "loss": 0.7103, "step": 3306 }, { "epoch": 0.27, "grad_norm": 2.4047430453994494, "learning_rate": 8.580354569964836e-06, "loss": 0.7758, "step": 3307 }, { "epoch": 0.27, "grad_norm": 3.826312790016627, "learning_rate": 8.579436292428458e-06, "loss": 0.6325, "step": 3308 }, { "epoch": 0.27, "grad_norm": 2.992864527424246, "learning_rate": 8.578517767172554e-06, "loss": 0.7728, "step": 3309 }, { "epoch": 0.27, "grad_norm": 6.3477405169102195, "learning_rate": 8.577598994260687e-06, "loss": 0.6637, "step": 3310 }, { "epoch": 0.27, "grad_norm": 3.418703277272325, "learning_rate": 8.576679973756443e-06, "loss": 0.7187, "step": 3311 }, { "epoch": 0.27, "grad_norm": 4.519412720410982, "learning_rate": 8.575760705723424e-06, "loss": 0.6031, "step": 3312 }, { "epoch": 0.27, "grad_norm": 2.922918879405777, "learning_rate": 8.57484119022525e-06, "loss": 0.6717, "step": 3313 }, { "epoch": 0.27, "grad_norm": 4.042210254722186, "learning_rate": 8.573921427325556e-06, "loss": 0.754, "step": 3314 }, { "epoch": 0.27, "grad_norm": 3.4261580989070533, "learning_rate": 8.573001417087997e-06, "loss": 0.6211, "step": 3315 }, { "epoch": 0.27, "grad_norm": 4.258452715009701, "learning_rate": 8.57208115957624e-06, "loss": 0.6796, "step": 3316 }, { "epoch": 0.27, "grad_norm": 9.093959121669448, "learning_rate": 8.571160654853976e-06, "loss": 0.603, "step": 3317 }, { "epoch": 0.27, "grad_norm": 4.982302494181531, "learning_rate": 8.57023990298491e-06, "loss": 0.6429, "step": 3318 }, { "epoch": 0.27, "grad_norm": 2.3518782927192032, "learning_rate": 8.569318904032763e-06, "loss": 0.7076, "step": 3319 }, { "epoch": 0.27, "grad_norm": 2.5223315000276467, "learning_rate": 8.56839765806127e-06, "loss": 0.8356, "step": 3320 }, { "epoch": 0.27, "grad_norm": 4.02554128565961, "learning_rate": 8.567476165134192e-06, "loss": 0.7827, "step": 3321 }, { "epoch": 0.27, "grad_norm": 4.877455648809495, "learning_rate": 8.566554425315303e-06, "loss": 0.7862, "step": 3322 }, { "epoch": 0.27, "grad_norm": 4.977412807946987, "learning_rate": 8.56563243866839e-06, "loss": 0.7247, "step": 3323 }, { "epoch": 0.27, "grad_norm": 4.287138727702408, "learning_rate": 8.56471020525726e-06, "loss": 0.8824, "step": 3324 }, { "epoch": 0.27, "grad_norm": 4.209857331932112, "learning_rate": 8.56378772514574e-06, "loss": 0.7889, "step": 3325 }, { "epoch": 0.27, "grad_norm": 2.9840601705402134, "learning_rate": 8.56286499839767e-06, "loss": 0.7115, "step": 3326 }, { "epoch": 0.27, "grad_norm": 5.061276330275833, "learning_rate": 8.561942025076907e-06, "loss": 0.7493, "step": 3327 }, { "epoch": 0.27, "grad_norm": 4.184272445539894, "learning_rate": 8.561018805247329e-06, "loss": 0.8091, "step": 3328 }, { "epoch": 0.27, "grad_norm": 3.308403445544687, "learning_rate": 8.560095338972827e-06, "loss": 0.5852, "step": 3329 }, { "epoch": 0.27, "grad_norm": 3.525710230827271, "learning_rate": 8.559171626317312e-06, "loss": 0.8984, "step": 3330 }, { "epoch": 0.27, "grad_norm": 3.355085812506672, "learning_rate": 8.55824766734471e-06, "loss": 0.7264, "step": 3331 }, { "epoch": 0.27, "grad_norm": 3.2051788796381735, "learning_rate": 8.557323462118963e-06, "loss": 0.6795, "step": 3332 }, { "epoch": 0.27, "grad_norm": 3.9361538292386156, "learning_rate": 8.556399010704036e-06, "loss": 0.7104, "step": 3333 }, { "epoch": 0.27, "grad_norm": 4.341182964460685, "learning_rate": 8.555474313163903e-06, "loss": 0.6994, "step": 3334 }, { "epoch": 0.27, "grad_norm": 3.6435448222869016, "learning_rate": 8.554549369562562e-06, "loss": 0.7705, "step": 3335 }, { "epoch": 0.27, "grad_norm": 3.749605049380575, "learning_rate": 8.553624179964023e-06, "loss": 0.8921, "step": 3336 }, { "epoch": 0.27, "grad_norm": 2.496170511113874, "learning_rate": 8.552698744432315e-06, "loss": 0.7504, "step": 3337 }, { "epoch": 0.27, "grad_norm": 3.879469439273665, "learning_rate": 8.551773063031484e-06, "loss": 0.7558, "step": 3338 }, { "epoch": 0.27, "grad_norm": 3.40773341954371, "learning_rate": 8.550847135825594e-06, "loss": 0.7489, "step": 3339 }, { "epoch": 0.27, "grad_norm": 4.816316620409646, "learning_rate": 8.549920962878724e-06, "loss": 0.6747, "step": 3340 }, { "epoch": 0.27, "grad_norm": 3.6856419753685907, "learning_rate": 8.54899454425497e-06, "loss": 0.7621, "step": 3341 }, { "epoch": 0.27, "grad_norm": 4.328313549215571, "learning_rate": 8.548067880018447e-06, "loss": 0.6951, "step": 3342 }, { "epoch": 0.27, "grad_norm": 4.955548046693349, "learning_rate": 8.547140970233287e-06, "loss": 0.598, "step": 3343 }, { "epoch": 0.27, "grad_norm": 2.824717911688605, "learning_rate": 8.546213814963638e-06, "loss": 0.7172, "step": 3344 }, { "epoch": 0.27, "grad_norm": 3.263676419431877, "learning_rate": 8.545286414273663e-06, "loss": 0.5756, "step": 3345 }, { "epoch": 0.27, "grad_norm": 6.478938172033021, "learning_rate": 8.544358768227545e-06, "loss": 0.5948, "step": 3346 }, { "epoch": 0.27, "grad_norm": 19.71863583589898, "learning_rate": 8.543430876889485e-06, "loss": 0.6282, "step": 3347 }, { "epoch": 0.27, "grad_norm": 4.425442234625287, "learning_rate": 8.542502740323695e-06, "loss": 0.7568, "step": 3348 }, { "epoch": 0.27, "grad_norm": 4.1858823762381085, "learning_rate": 8.54157435859441e-06, "loss": 0.6382, "step": 3349 }, { "epoch": 0.27, "grad_norm": 3.811568952025271, "learning_rate": 8.540645731765882e-06, "loss": 0.6938, "step": 3350 }, { "epoch": 0.27, "grad_norm": 6.115209242386223, "learning_rate": 8.539716859902374e-06, "loss": 0.7653, "step": 3351 }, { "epoch": 0.27, "grad_norm": 3.0582887733013737, "learning_rate": 8.538787743068172e-06, "loss": 0.7282, "step": 3352 }, { "epoch": 0.27, "grad_norm": 5.862014359957153, "learning_rate": 8.537858381327575e-06, "loss": 0.6535, "step": 3353 }, { "epoch": 0.27, "grad_norm": 2.4952699251979085, "learning_rate": 8.536928774744904e-06, "loss": 0.6181, "step": 3354 }, { "epoch": 0.27, "grad_norm": 2.9748004901951672, "learning_rate": 8.535998923384489e-06, "loss": 0.6724, "step": 3355 }, { "epoch": 0.27, "grad_norm": 6.311947842609172, "learning_rate": 8.535068827310684e-06, "loss": 0.5583, "step": 3356 }, { "epoch": 0.27, "grad_norm": 3.721584659685497, "learning_rate": 8.534138486587859e-06, "loss": 0.6294, "step": 3357 }, { "epoch": 0.27, "grad_norm": 2.874083674460974, "learning_rate": 8.533207901280399e-06, "loss": 0.6627, "step": 3358 }, { "epoch": 0.27, "grad_norm": 5.468262701122169, "learning_rate": 8.532277071452704e-06, "loss": 0.7833, "step": 3359 }, { "epoch": 0.27, "grad_norm": 3.118763990779353, "learning_rate": 8.531345997169194e-06, "loss": 0.8438, "step": 3360 }, { "epoch": 0.27, "grad_norm": 3.6512936062832635, "learning_rate": 8.530414678494306e-06, "loss": 0.6003, "step": 3361 }, { "epoch": 0.27, "grad_norm": 4.632181001735019, "learning_rate": 8.529483115492492e-06, "loss": 0.7535, "step": 3362 }, { "epoch": 0.27, "grad_norm": 6.205890458070212, "learning_rate": 8.528551308228224e-06, "loss": 0.7304, "step": 3363 }, { "epoch": 0.27, "grad_norm": 2.875337219739922, "learning_rate": 8.52761925676599e-06, "loss": 0.82, "step": 3364 }, { "epoch": 0.27, "grad_norm": 8.374558804639715, "learning_rate": 8.526686961170289e-06, "loss": 0.6903, "step": 3365 }, { "epoch": 0.27, "grad_norm": 3.852214292850994, "learning_rate": 8.525754421505646e-06, "loss": 0.7556, "step": 3366 }, { "epoch": 0.27, "grad_norm": 2.327822204715053, "learning_rate": 8.524821637836595e-06, "loss": 0.8042, "step": 3367 }, { "epoch": 0.27, "grad_norm": 3.011465567353725, "learning_rate": 8.523888610227692e-06, "loss": 0.7225, "step": 3368 }, { "epoch": 0.27, "grad_norm": 6.976772859441323, "learning_rate": 8.522955338743512e-06, "loss": 0.7556, "step": 3369 }, { "epoch": 0.27, "grad_norm": 2.760826114518901, "learning_rate": 8.522021823448638e-06, "loss": 0.6433, "step": 3370 }, { "epoch": 0.27, "grad_norm": 3.124483369687337, "learning_rate": 8.521088064407678e-06, "loss": 0.5861, "step": 3371 }, { "epoch": 0.27, "grad_norm": 2.736065248571767, "learning_rate": 8.520154061685255e-06, "loss": 0.7044, "step": 3372 }, { "epoch": 0.27, "grad_norm": 3.8630743693346283, "learning_rate": 8.519219815346004e-06, "loss": 0.7131, "step": 3373 }, { "epoch": 0.27, "grad_norm": 2.593686948829996, "learning_rate": 8.518285325454583e-06, "loss": 0.7322, "step": 3374 }, { "epoch": 0.27, "grad_norm": 2.8602596559928783, "learning_rate": 8.517350592075667e-06, "loss": 0.597, "step": 3375 }, { "epoch": 0.27, "grad_norm": 3.407124875356108, "learning_rate": 8.51641561527394e-06, "loss": 0.7004, "step": 3376 }, { "epoch": 0.27, "grad_norm": 4.664443118333286, "learning_rate": 8.515480395114112e-06, "loss": 0.6819, "step": 3377 }, { "epoch": 0.27, "grad_norm": 3.7238867556436, "learning_rate": 8.514544931660907e-06, "loss": 0.7568, "step": 3378 }, { "epoch": 0.27, "grad_norm": 3.1714579659740965, "learning_rate": 8.513609224979061e-06, "loss": 0.6853, "step": 3379 }, { "epoch": 0.27, "grad_norm": 2.5147249991604075, "learning_rate": 8.512673275133334e-06, "loss": 0.7837, "step": 3380 }, { "epoch": 0.27, "grad_norm": 3.270292987243555, "learning_rate": 8.5117370821885e-06, "loss": 0.8481, "step": 3381 }, { "epoch": 0.27, "grad_norm": 4.27839286481896, "learning_rate": 8.510800646209347e-06, "loss": 0.8577, "step": 3382 }, { "epoch": 0.27, "grad_norm": 3.2198892692705776, "learning_rate": 8.509863967260684e-06, "loss": 0.7464, "step": 3383 }, { "epoch": 0.27, "grad_norm": 3.236189360973214, "learning_rate": 8.508927045407334e-06, "loss": 0.8242, "step": 3384 }, { "epoch": 0.27, "grad_norm": 2.9783758277604737, "learning_rate": 8.507989880714139e-06, "loss": 0.5955, "step": 3385 }, { "epoch": 0.28, "grad_norm": 2.4696213306028127, "learning_rate": 8.507052473245953e-06, "loss": 0.6692, "step": 3386 }, { "epoch": 0.28, "grad_norm": 6.409553334139583, "learning_rate": 8.506114823067657e-06, "loss": 0.861, "step": 3387 }, { "epoch": 0.28, "grad_norm": 3.5029173349099416, "learning_rate": 8.50517693024414e-06, "loss": 0.6611, "step": 3388 }, { "epoch": 0.28, "grad_norm": 2.6795803552507045, "learning_rate": 8.504238794840305e-06, "loss": 0.658, "step": 3389 }, { "epoch": 0.28, "grad_norm": 3.200816002397676, "learning_rate": 8.503300416921082e-06, "loss": 0.6274, "step": 3390 }, { "epoch": 0.28, "grad_norm": 4.037378088364288, "learning_rate": 8.502361796551415e-06, "loss": 0.746, "step": 3391 }, { "epoch": 0.28, "grad_norm": 3.5002154410766737, "learning_rate": 8.501422933796256e-06, "loss": 0.7615, "step": 3392 }, { "epoch": 0.28, "grad_norm": 3.558197577812753, "learning_rate": 8.500483828720582e-06, "loss": 0.6948, "step": 3393 }, { "epoch": 0.28, "grad_norm": 2.5048463446024143, "learning_rate": 8.49954448138939e-06, "loss": 0.6263, "step": 3394 }, { "epoch": 0.28, "grad_norm": 4.408952276463324, "learning_rate": 8.498604891867683e-06, "loss": 0.6482, "step": 3395 }, { "epoch": 0.28, "grad_norm": 4.226213450289688, "learning_rate": 8.497665060220488e-06, "loss": 0.6842, "step": 3396 }, { "epoch": 0.28, "grad_norm": 3.7265993812223486, "learning_rate": 8.496724986512848e-06, "loss": 0.6746, "step": 3397 }, { "epoch": 0.28, "grad_norm": 2.9026796021396524, "learning_rate": 8.495784670809822e-06, "loss": 0.7646, "step": 3398 }, { "epoch": 0.28, "grad_norm": 5.26894273274618, "learning_rate": 8.494844113176486e-06, "loss": 0.6401, "step": 3399 }, { "epoch": 0.28, "grad_norm": 3.443409431752811, "learning_rate": 8.49390331367793e-06, "loss": 0.7328, "step": 3400 }, { "epoch": 0.28, "grad_norm": 3.135560789299575, "learning_rate": 8.492962272379268e-06, "loss": 0.8426, "step": 3401 }, { "epoch": 0.28, "grad_norm": 12.24784666684616, "learning_rate": 8.492020989345622e-06, "loss": 0.8092, "step": 3402 }, { "epoch": 0.28, "grad_norm": 2.245527474555863, "learning_rate": 8.491079464642134e-06, "loss": 0.7113, "step": 3403 }, { "epoch": 0.28, "grad_norm": 3.6502809284815516, "learning_rate": 8.490137698333969e-06, "loss": 0.6906, "step": 3404 }, { "epoch": 0.28, "grad_norm": 6.74085520338024, "learning_rate": 8.489195690486296e-06, "loss": 0.7697, "step": 3405 }, { "epoch": 0.28, "grad_norm": 2.92429672890809, "learning_rate": 8.488253441164313e-06, "loss": 0.7274, "step": 3406 }, { "epoch": 0.28, "grad_norm": 2.779385686596072, "learning_rate": 8.48731095043323e-06, "loss": 0.6477, "step": 3407 }, { "epoch": 0.28, "grad_norm": 2.5557130204025778, "learning_rate": 8.486368218358268e-06, "loss": 0.7512, "step": 3408 }, { "epoch": 0.28, "grad_norm": 3.716731033773124, "learning_rate": 8.485425245004675e-06, "loss": 0.7646, "step": 3409 }, { "epoch": 0.28, "grad_norm": 4.414773653984684, "learning_rate": 8.484482030437708e-06, "loss": 0.7015, "step": 3410 }, { "epoch": 0.28, "grad_norm": 5.932430800896966, "learning_rate": 8.483538574722648e-06, "loss": 0.6358, "step": 3411 }, { "epoch": 0.28, "grad_norm": 3.6415645027039543, "learning_rate": 8.482594877924779e-06, "loss": 0.7446, "step": 3412 }, { "epoch": 0.28, "grad_norm": 18.0612837191058, "learning_rate": 8.481650940109419e-06, "loss": 0.7081, "step": 3413 }, { "epoch": 0.28, "grad_norm": 3.162714756439013, "learning_rate": 8.480706761341893e-06, "loss": 0.839, "step": 3414 }, { "epoch": 0.28, "grad_norm": 3.2378503097262574, "learning_rate": 8.47976234168754e-06, "loss": 0.8578, "step": 3415 }, { "epoch": 0.28, "grad_norm": 4.702868047060205, "learning_rate": 8.478817681211724e-06, "loss": 0.6566, "step": 3416 }, { "epoch": 0.28, "grad_norm": 2.928765837017126, "learning_rate": 8.47787277997982e-06, "loss": 0.69, "step": 3417 }, { "epoch": 0.28, "grad_norm": 4.086795076167226, "learning_rate": 8.476927638057221e-06, "loss": 0.7978, "step": 3418 }, { "epoch": 0.28, "grad_norm": 6.090646742371641, "learning_rate": 8.475982255509336e-06, "loss": 0.8555, "step": 3419 }, { "epoch": 0.28, "grad_norm": 8.08169650251828, "learning_rate": 8.475036632401594e-06, "loss": 0.6639, "step": 3420 }, { "epoch": 0.28, "grad_norm": 2.7400709623157193, "learning_rate": 8.474090768799436e-06, "loss": 0.6374, "step": 3421 }, { "epoch": 0.28, "grad_norm": 5.882548233665905, "learning_rate": 8.473144664768322e-06, "loss": 0.7037, "step": 3422 }, { "epoch": 0.28, "grad_norm": 4.758768983190327, "learning_rate": 8.472198320373729e-06, "loss": 0.7754, "step": 3423 }, { "epoch": 0.28, "grad_norm": 3.5030693413732834, "learning_rate": 8.471251735681148e-06, "loss": 0.5913, "step": 3424 }, { "epoch": 0.28, "grad_norm": 4.429324157522576, "learning_rate": 8.47030491075609e-06, "loss": 0.7187, "step": 3425 }, { "epoch": 0.28, "grad_norm": 3.6729377630220994, "learning_rate": 8.46935784566408e-06, "loss": 0.7205, "step": 3426 }, { "epoch": 0.28, "grad_norm": 3.2308402649290424, "learning_rate": 8.468410540470666e-06, "loss": 0.7664, "step": 3427 }, { "epoch": 0.28, "grad_norm": 2.888006085360701, "learning_rate": 8.467462995241403e-06, "loss": 0.7553, "step": 3428 }, { "epoch": 0.28, "grad_norm": 3.0257686387451046, "learning_rate": 8.466515210041866e-06, "loss": 0.633, "step": 3429 }, { "epoch": 0.28, "grad_norm": 3.124613920405623, "learning_rate": 8.46556718493765e-06, "loss": 0.7035, "step": 3430 }, { "epoch": 0.28, "grad_norm": 2.644456571765262, "learning_rate": 8.464618919994364e-06, "loss": 0.7086, "step": 3431 }, { "epoch": 0.28, "grad_norm": 4.331826202589411, "learning_rate": 8.463670415277634e-06, "loss": 0.5793, "step": 3432 }, { "epoch": 0.28, "grad_norm": 4.691061721311305, "learning_rate": 8.462721670853101e-06, "loss": 0.8795, "step": 3433 }, { "epoch": 0.28, "grad_norm": 2.348335174885549, "learning_rate": 8.461772686786427e-06, "loss": 0.5998, "step": 3434 }, { "epoch": 0.28, "grad_norm": 3.1957922047266103, "learning_rate": 8.460823463143284e-06, "loss": 0.7169, "step": 3435 }, { "epoch": 0.28, "grad_norm": 4.56126454463297, "learning_rate": 8.459873999989367e-06, "loss": 0.71, "step": 3436 }, { "epoch": 0.28, "grad_norm": 13.590360129304072, "learning_rate": 8.458924297390385e-06, "loss": 0.7248, "step": 3437 }, { "epoch": 0.28, "grad_norm": 2.7250754830610573, "learning_rate": 8.457974355412062e-06, "loss": 0.7403, "step": 3438 }, { "epoch": 0.28, "grad_norm": 9.239943051830677, "learning_rate": 8.457024174120141e-06, "loss": 0.5732, "step": 3439 }, { "epoch": 0.28, "grad_norm": 3.4328699534374105, "learning_rate": 8.456073753580378e-06, "loss": 0.6238, "step": 3440 }, { "epoch": 0.28, "grad_norm": 4.0457530104295545, "learning_rate": 8.455123093858551e-06, "loss": 0.9201, "step": 3441 }, { "epoch": 0.28, "grad_norm": 3.8638679370017464, "learning_rate": 8.454172195020452e-06, "loss": 0.7626, "step": 3442 }, { "epoch": 0.28, "grad_norm": 3.08677814213671, "learning_rate": 8.453221057131886e-06, "loss": 0.805, "step": 3443 }, { "epoch": 0.28, "grad_norm": 5.525897117337661, "learning_rate": 8.45226968025868e-06, "loss": 0.5757, "step": 3444 }, { "epoch": 0.28, "grad_norm": 2.99680735890996, "learning_rate": 8.451318064466676e-06, "loss": 0.7734, "step": 3445 }, { "epoch": 0.28, "grad_norm": 3.7724471786923566, "learning_rate": 8.450366209821728e-06, "loss": 0.8221, "step": 3446 }, { "epoch": 0.28, "grad_norm": 2.9787516855280822, "learning_rate": 8.449414116389716e-06, "loss": 0.6826, "step": 3447 }, { "epoch": 0.28, "grad_norm": 3.0121815343253235, "learning_rate": 8.448461784236525e-06, "loss": 0.7126, "step": 3448 }, { "epoch": 0.28, "grad_norm": 3.405700738764285, "learning_rate": 8.447509213428067e-06, "loss": 0.7307, "step": 3449 }, { "epoch": 0.28, "grad_norm": 3.0016034731368797, "learning_rate": 8.446556404030263e-06, "loss": 0.7837, "step": 3450 }, { "epoch": 0.28, "grad_norm": 3.4388507483947346, "learning_rate": 8.445603356109057e-06, "loss": 0.569, "step": 3451 }, { "epoch": 0.28, "grad_norm": 4.1548362998446215, "learning_rate": 8.4446500697304e-06, "loss": 0.5445, "step": 3452 }, { "epoch": 0.28, "grad_norm": 4.007684452189185, "learning_rate": 8.443696544960272e-06, "loss": 0.6466, "step": 3453 }, { "epoch": 0.28, "grad_norm": 2.3653943716866133, "learning_rate": 8.44274278186466e-06, "loss": 0.6692, "step": 3454 }, { "epoch": 0.28, "grad_norm": 2.3619559048975645, "learning_rate": 8.441788780509568e-06, "loss": 0.7444, "step": 3455 }, { "epoch": 0.28, "grad_norm": 2.3586144759068945, "learning_rate": 8.44083454096102e-06, "loss": 0.6642, "step": 3456 }, { "epoch": 0.28, "grad_norm": 3.526775691433164, "learning_rate": 8.43988006328506e-06, "loss": 0.6953, "step": 3457 }, { "epoch": 0.28, "grad_norm": 3.9035646119017198, "learning_rate": 8.438925347547737e-06, "loss": 0.68, "step": 3458 }, { "epoch": 0.28, "grad_norm": 2.2702725961322923, "learning_rate": 8.437970393815129e-06, "loss": 0.7081, "step": 3459 }, { "epoch": 0.28, "grad_norm": 3.208156894459663, "learning_rate": 8.437015202153322e-06, "loss": 0.7157, "step": 3460 }, { "epoch": 0.28, "grad_norm": 3.6148937122543683, "learning_rate": 8.436059772628421e-06, "loss": 0.6189, "step": 3461 }, { "epoch": 0.28, "grad_norm": 3.774059221482722, "learning_rate": 8.435104105306549e-06, "loss": 0.8104, "step": 3462 }, { "epoch": 0.28, "grad_norm": 3.346137098837187, "learning_rate": 8.434148200253843e-06, "loss": 0.8152, "step": 3463 }, { "epoch": 0.28, "grad_norm": 2.764372417391963, "learning_rate": 8.433192057536458e-06, "loss": 0.7842, "step": 3464 }, { "epoch": 0.28, "grad_norm": 3.165528144141994, "learning_rate": 8.432235677220567e-06, "loss": 0.7955, "step": 3465 }, { "epoch": 0.28, "grad_norm": 2.7907531631475035, "learning_rate": 8.431279059372357e-06, "loss": 0.801, "step": 3466 }, { "epoch": 0.28, "grad_norm": 2.512382878932326, "learning_rate": 8.43032220405803e-06, "loss": 0.6152, "step": 3467 }, { "epoch": 0.28, "grad_norm": 2.9922035044268234, "learning_rate": 8.429365111343806e-06, "loss": 0.608, "step": 3468 }, { "epoch": 0.28, "grad_norm": 2.35153913387305, "learning_rate": 8.428407781295924e-06, "loss": 0.7296, "step": 3469 }, { "epoch": 0.28, "grad_norm": 14.417730754359736, "learning_rate": 8.427450213980636e-06, "loss": 0.6743, "step": 3470 }, { "epoch": 0.28, "grad_norm": 3.2153960200237335, "learning_rate": 8.426492409464213e-06, "loss": 0.6143, "step": 3471 }, { "epoch": 0.28, "grad_norm": 3.6248165627709525, "learning_rate": 8.42553436781294e-06, "loss": 0.5606, "step": 3472 }, { "epoch": 0.28, "grad_norm": 2.9303729349030707, "learning_rate": 8.42457608909312e-06, "loss": 0.6752, "step": 3473 }, { "epoch": 0.28, "grad_norm": 4.333787668761645, "learning_rate": 8.423617573371073e-06, "loss": 0.7768, "step": 3474 }, { "epoch": 0.28, "grad_norm": 4.843328457272623, "learning_rate": 8.422658820713131e-06, "loss": 0.7368, "step": 3475 }, { "epoch": 0.28, "grad_norm": 3.1918146761477892, "learning_rate": 8.421699831185649e-06, "loss": 0.5924, "step": 3476 }, { "epoch": 0.28, "grad_norm": 2.9849385817252903, "learning_rate": 8.420740604854993e-06, "loss": 0.6306, "step": 3477 }, { "epoch": 0.28, "grad_norm": 2.6153714177783614, "learning_rate": 8.419781141787549e-06, "loss": 0.6301, "step": 3478 }, { "epoch": 0.28, "grad_norm": 2.854108276694084, "learning_rate": 8.418821442049716e-06, "loss": 0.6411, "step": 3479 }, { "epoch": 0.28, "grad_norm": 2.5389709265408014, "learning_rate": 8.417861505707914e-06, "loss": 0.6805, "step": 3480 }, { "epoch": 0.28, "grad_norm": 3.151605011069831, "learning_rate": 8.416901332828574e-06, "loss": 0.6677, "step": 3481 }, { "epoch": 0.28, "grad_norm": 2.6457456494684215, "learning_rate": 8.415940923478148e-06, "loss": 0.792, "step": 3482 }, { "epoch": 0.28, "grad_norm": 2.3914292361238343, "learning_rate": 8.414980277723101e-06, "loss": 0.7138, "step": 3483 }, { "epoch": 0.28, "grad_norm": 2.4391007777872424, "learning_rate": 8.414019395629918e-06, "loss": 0.8203, "step": 3484 }, { "epoch": 0.28, "grad_norm": 4.590218169483447, "learning_rate": 8.413058277265094e-06, "loss": 0.6714, "step": 3485 }, { "epoch": 0.28, "grad_norm": 5.813957562117255, "learning_rate": 8.412096922695147e-06, "loss": 0.8835, "step": 3486 }, { "epoch": 0.28, "grad_norm": 3.8069302518506674, "learning_rate": 8.41113533198661e-06, "loss": 0.6778, "step": 3487 }, { "epoch": 0.28, "grad_norm": 3.380066090694027, "learning_rate": 8.41017350520603e-06, "loss": 0.7246, "step": 3488 }, { "epoch": 0.28, "grad_norm": 3.4084201369289695, "learning_rate": 8.40921144241997e-06, "loss": 0.8061, "step": 3489 }, { "epoch": 0.28, "grad_norm": 2.4492832590578675, "learning_rate": 8.408249143695014e-06, "loss": 0.5812, "step": 3490 }, { "epoch": 0.28, "grad_norm": 2.1911238479461477, "learning_rate": 8.407286609097754e-06, "loss": 0.768, "step": 3491 }, { "epoch": 0.28, "grad_norm": 3.8501856132564254, "learning_rate": 8.406323838694808e-06, "loss": 0.6398, "step": 3492 }, { "epoch": 0.28, "grad_norm": 2.42463582869534, "learning_rate": 8.405360832552805e-06, "loss": 0.6164, "step": 3493 }, { "epoch": 0.28, "grad_norm": 3.0978672820008626, "learning_rate": 8.40439759073839e-06, "loss": 0.7561, "step": 3494 }, { "epoch": 0.28, "grad_norm": 2.990085745586637, "learning_rate": 8.403434113318225e-06, "loss": 0.5866, "step": 3495 }, { "epoch": 0.28, "grad_norm": 3.7734092720972114, "learning_rate": 8.40247040035899e-06, "loss": 0.7322, "step": 3496 }, { "epoch": 0.28, "grad_norm": 3.327528314580933, "learning_rate": 8.401506451927382e-06, "loss": 0.6608, "step": 3497 }, { "epoch": 0.28, "grad_norm": 4.689073563397524, "learning_rate": 8.400542268090106e-06, "loss": 0.7661, "step": 3498 }, { "epoch": 0.28, "grad_norm": 2.7070712671273958, "learning_rate": 8.399577848913896e-06, "loss": 0.659, "step": 3499 }, { "epoch": 0.28, "grad_norm": 6.99560555668214, "learning_rate": 8.398613194465492e-06, "loss": 0.6466, "step": 3500 }, { "epoch": 0.28, "grad_norm": 4.226474068478074, "learning_rate": 8.397648304811657e-06, "loss": 0.7224, "step": 3501 }, { "epoch": 0.28, "grad_norm": 6.306901945693885, "learning_rate": 8.396683180019166e-06, "loss": 0.8435, "step": 3502 }, { "epoch": 0.28, "grad_norm": 5.61944085955927, "learning_rate": 8.39571782015481e-06, "loss": 0.7978, "step": 3503 }, { "epoch": 0.28, "grad_norm": 3.453655124080758, "learning_rate": 8.3947522252854e-06, "loss": 0.721, "step": 3504 }, { "epoch": 0.28, "grad_norm": 8.422867185274093, "learning_rate": 8.393786395477761e-06, "loss": 0.7179, "step": 3505 }, { "epoch": 0.28, "grad_norm": 5.295405306358807, "learning_rate": 8.392820330798734e-06, "loss": 0.6321, "step": 3506 }, { "epoch": 0.28, "grad_norm": 3.612987800486962, "learning_rate": 8.391854031315178e-06, "loss": 0.7428, "step": 3507 }, { "epoch": 0.28, "grad_norm": 10.884359010582811, "learning_rate": 8.390887497093968e-06, "loss": 0.8065, "step": 3508 }, { "epoch": 0.29, "grad_norm": 5.069873249800019, "learning_rate": 8.38992072820199e-06, "loss": 0.7499, "step": 3509 }, { "epoch": 0.29, "grad_norm": 6.917081130488773, "learning_rate": 8.388953724706152e-06, "loss": 0.6734, "step": 3510 }, { "epoch": 0.29, "grad_norm": 4.403011702490444, "learning_rate": 8.387986486673381e-06, "loss": 0.7111, "step": 3511 }, { "epoch": 0.29, "grad_norm": 3.6740319296504635, "learning_rate": 8.38701901417061e-06, "loss": 0.4359, "step": 3512 }, { "epoch": 0.29, "grad_norm": 7.450811376950235, "learning_rate": 8.386051307264798e-06, "loss": 0.7917, "step": 3513 }, { "epoch": 0.29, "grad_norm": 3.6074589865262108, "learning_rate": 8.385083366022914e-06, "loss": 0.7174, "step": 3514 }, { "epoch": 0.29, "grad_norm": 3.1477663539696805, "learning_rate": 8.384115190511948e-06, "loss": 0.8458, "step": 3515 }, { "epoch": 0.29, "grad_norm": 4.563630410783101, "learning_rate": 8.383146780798901e-06, "loss": 0.8753, "step": 3516 }, { "epoch": 0.29, "grad_norm": 5.697895793470481, "learning_rate": 8.382178136950796e-06, "loss": 0.7696, "step": 3517 }, { "epoch": 0.29, "grad_norm": 3.869601461331248, "learning_rate": 8.381209259034668e-06, "loss": 0.7951, "step": 3518 }, { "epoch": 0.29, "grad_norm": 3.296027703116834, "learning_rate": 8.380240147117569e-06, "loss": 0.5266, "step": 3519 }, { "epoch": 0.29, "grad_norm": 2.679296448401381, "learning_rate": 8.379270801266569e-06, "loss": 0.6947, "step": 3520 }, { "epoch": 0.29, "grad_norm": 12.013200752045412, "learning_rate": 8.37830122154875e-06, "loss": 0.5685, "step": 3521 }, { "epoch": 0.29, "grad_norm": 4.724271009762378, "learning_rate": 8.377331408031216e-06, "loss": 0.6585, "step": 3522 }, { "epoch": 0.29, "grad_norm": 4.009187488614058, "learning_rate": 8.376361360781083e-06, "loss": 0.8104, "step": 3523 }, { "epoch": 0.29, "grad_norm": 4.097410112926117, "learning_rate": 8.375391079865485e-06, "loss": 0.6784, "step": 3524 }, { "epoch": 0.29, "grad_norm": 2.324946916563888, "learning_rate": 8.37442056535157e-06, "loss": 0.7041, "step": 3525 }, { "epoch": 0.29, "grad_norm": 3.9475960587596277, "learning_rate": 8.373449817306505e-06, "loss": 0.7196, "step": 3526 }, { "epoch": 0.29, "grad_norm": 3.95878259928452, "learning_rate": 8.372478835797473e-06, "loss": 0.874, "step": 3527 }, { "epoch": 0.29, "grad_norm": 2.643227518871769, "learning_rate": 8.37150762089167e-06, "loss": 0.7626, "step": 3528 }, { "epoch": 0.29, "grad_norm": 3.6363233498330807, "learning_rate": 8.37053617265631e-06, "loss": 0.8392, "step": 3529 }, { "epoch": 0.29, "grad_norm": 4.074460069516078, "learning_rate": 8.369564491158626e-06, "loss": 0.5997, "step": 3530 }, { "epoch": 0.29, "grad_norm": 3.1682130922620364, "learning_rate": 8.368592576465861e-06, "loss": 0.7175, "step": 3531 }, { "epoch": 0.29, "grad_norm": 5.657803736000153, "learning_rate": 8.367620428645281e-06, "loss": 0.8291, "step": 3532 }, { "epoch": 0.29, "grad_norm": 3.940670943290156, "learning_rate": 8.366648047764161e-06, "loss": 0.7834, "step": 3533 }, { "epoch": 0.29, "grad_norm": 3.3124287401586177, "learning_rate": 8.3656754338898e-06, "loss": 0.8664, "step": 3534 }, { "epoch": 0.29, "grad_norm": 2.353958015493078, "learning_rate": 8.364702587089503e-06, "loss": 0.6858, "step": 3535 }, { "epoch": 0.29, "grad_norm": 3.6547224889261156, "learning_rate": 8.363729507430605e-06, "loss": 0.6812, "step": 3536 }, { "epoch": 0.29, "grad_norm": 3.634120295835721, "learning_rate": 8.362756194980444e-06, "loss": 0.7321, "step": 3537 }, { "epoch": 0.29, "grad_norm": 2.686769946884574, "learning_rate": 8.36178264980638e-06, "loss": 0.6026, "step": 3538 }, { "epoch": 0.29, "grad_norm": 4.572798911309554, "learning_rate": 8.36080887197579e-06, "loss": 0.7351, "step": 3539 }, { "epoch": 0.29, "grad_norm": 4.386834462797566, "learning_rate": 8.359834861556066e-06, "loss": 0.8774, "step": 3540 }, { "epoch": 0.29, "grad_norm": 4.698203621612668, "learning_rate": 8.358860618614612e-06, "loss": 0.7425, "step": 3541 }, { "epoch": 0.29, "grad_norm": 5.993951310822244, "learning_rate": 8.357886143218855e-06, "loss": 0.7102, "step": 3542 }, { "epoch": 0.29, "grad_norm": 2.3723043035676974, "learning_rate": 8.356911435436234e-06, "loss": 0.8388, "step": 3543 }, { "epoch": 0.29, "grad_norm": 7.866736365019808, "learning_rate": 8.355936495334204e-06, "loss": 0.7495, "step": 3544 }, { "epoch": 0.29, "grad_norm": 4.190351196990587, "learning_rate": 8.35496132298024e-06, "loss": 0.781, "step": 3545 }, { "epoch": 0.29, "grad_norm": 2.7827325543577834, "learning_rate": 8.353985918441825e-06, "loss": 0.9104, "step": 3546 }, { "epoch": 0.29, "grad_norm": 2.9659921774841074, "learning_rate": 8.353010281786467e-06, "loss": 0.6534, "step": 3547 }, { "epoch": 0.29, "grad_norm": 3.4660431004813876, "learning_rate": 8.352034413081687e-06, "loss": 0.7023, "step": 3548 }, { "epoch": 0.29, "grad_norm": 4.3956004608456976, "learning_rate": 8.351058312395018e-06, "loss": 0.745, "step": 3549 }, { "epoch": 0.29, "grad_norm": 3.2721546593834083, "learning_rate": 8.350081979794013e-06, "loss": 0.8692, "step": 3550 }, { "epoch": 0.29, "grad_norm": 2.6106291170866154, "learning_rate": 8.349105415346241e-06, "loss": 0.8022, "step": 3551 }, { "epoch": 0.29, "grad_norm": 3.754286301608582, "learning_rate": 8.348128619119287e-06, "loss": 0.6679, "step": 3552 }, { "epoch": 0.29, "grad_norm": 2.832642617623086, "learning_rate": 8.347151591180753e-06, "loss": 0.5043, "step": 3553 }, { "epoch": 0.29, "grad_norm": 5.473816912274864, "learning_rate": 8.346174331598251e-06, "loss": 0.7565, "step": 3554 }, { "epoch": 0.29, "grad_norm": 6.696370863529595, "learning_rate": 8.345196840439418e-06, "loss": 0.9184, "step": 3555 }, { "epoch": 0.29, "grad_norm": 5.4155927023771335, "learning_rate": 8.344219117771899e-06, "loss": 0.9407, "step": 3556 }, { "epoch": 0.29, "grad_norm": 3.9019251848703753, "learning_rate": 8.343241163663361e-06, "loss": 0.6713, "step": 3557 }, { "epoch": 0.29, "grad_norm": 3.685186102235513, "learning_rate": 8.342262978181482e-06, "loss": 0.6528, "step": 3558 }, { "epoch": 0.29, "grad_norm": 3.9003490148262694, "learning_rate": 8.341284561393961e-06, "loss": 0.8707, "step": 3559 }, { "epoch": 0.29, "grad_norm": 6.101881252429892, "learning_rate": 8.340305913368511e-06, "loss": 0.7126, "step": 3560 }, { "epoch": 0.29, "grad_norm": 3.0557781741379557, "learning_rate": 8.339327034172859e-06, "loss": 0.6309, "step": 3561 }, { "epoch": 0.29, "grad_norm": 2.7543765839910184, "learning_rate": 8.33834792387475e-06, "loss": 0.6994, "step": 3562 }, { "epoch": 0.29, "grad_norm": 4.681327631691478, "learning_rate": 8.337368582541944e-06, "loss": 0.6937, "step": 3563 }, { "epoch": 0.29, "grad_norm": 4.2397481421777945, "learning_rate": 8.33638901024222e-06, "loss": 0.8142, "step": 3564 }, { "epoch": 0.29, "grad_norm": 4.177156794099068, "learning_rate": 8.335409207043366e-06, "loss": 0.7458, "step": 3565 }, { "epoch": 0.29, "grad_norm": 4.515968050680153, "learning_rate": 8.334429173013197e-06, "loss": 0.7827, "step": 3566 }, { "epoch": 0.29, "grad_norm": 3.3611763736210274, "learning_rate": 8.333448908219531e-06, "loss": 0.5784, "step": 3567 }, { "epoch": 0.29, "grad_norm": 2.627453403537669, "learning_rate": 8.332468412730213e-06, "loss": 0.797, "step": 3568 }, { "epoch": 0.29, "grad_norm": 4.92414908826998, "learning_rate": 8.331487686613097e-06, "loss": 0.6804, "step": 3569 }, { "epoch": 0.29, "grad_norm": 2.487233066374833, "learning_rate": 8.330506729936057e-06, "loss": 0.6234, "step": 3570 }, { "epoch": 0.29, "grad_norm": 3.354856210777375, "learning_rate": 8.32952554276698e-06, "loss": 0.646, "step": 3571 }, { "epoch": 0.29, "grad_norm": 2.809882085875991, "learning_rate": 8.328544125173772e-06, "loss": 0.8571, "step": 3572 }, { "epoch": 0.29, "grad_norm": 5.669261037920729, "learning_rate": 8.327562477224352e-06, "loss": 0.6522, "step": 3573 }, { "epoch": 0.29, "grad_norm": 7.422766432934572, "learning_rate": 8.326580598986656e-06, "loss": 0.7032, "step": 3574 }, { "epoch": 0.29, "grad_norm": 9.040526061350263, "learning_rate": 8.325598490528636e-06, "loss": 0.6551, "step": 3575 }, { "epoch": 0.29, "grad_norm": 8.502540473610582, "learning_rate": 8.324616151918263e-06, "loss": 0.822, "step": 3576 }, { "epoch": 0.29, "grad_norm": 4.9695111393128055, "learning_rate": 8.323633583223516e-06, "loss": 0.6965, "step": 3577 }, { "epoch": 0.29, "grad_norm": 3.7476995893291107, "learning_rate": 8.3226507845124e-06, "loss": 0.9427, "step": 3578 }, { "epoch": 0.29, "grad_norm": 6.3133010568439065, "learning_rate": 8.321667755852927e-06, "loss": 0.7375, "step": 3579 }, { "epoch": 0.29, "grad_norm": 14.596130966979175, "learning_rate": 8.320684497313131e-06, "loss": 0.8515, "step": 3580 }, { "epoch": 0.29, "grad_norm": 4.48868441617736, "learning_rate": 8.319701008961058e-06, "loss": 0.7092, "step": 3581 }, { "epoch": 0.29, "grad_norm": 3.261924824907615, "learning_rate": 8.318717290864775e-06, "loss": 0.7307, "step": 3582 }, { "epoch": 0.29, "grad_norm": 5.002244261221541, "learning_rate": 8.317733343092357e-06, "loss": 0.6581, "step": 3583 }, { "epoch": 0.29, "grad_norm": 3.1779613490746215, "learning_rate": 8.316749165711903e-06, "loss": 0.749, "step": 3584 }, { "epoch": 0.29, "grad_norm": 4.273774017189862, "learning_rate": 8.315764758791522e-06, "loss": 0.6508, "step": 3585 }, { "epoch": 0.29, "grad_norm": 4.596467257361083, "learning_rate": 8.314780122399341e-06, "loss": 0.7284, "step": 3586 }, { "epoch": 0.29, "grad_norm": 4.8650311340865615, "learning_rate": 8.313795256603505e-06, "loss": 0.6657, "step": 3587 }, { "epoch": 0.29, "grad_norm": 58.183853366780966, "learning_rate": 8.312810161472173e-06, "loss": 0.6059, "step": 3588 }, { "epoch": 0.29, "grad_norm": 4.998058633242659, "learning_rate": 8.311824837073517e-06, "loss": 0.597, "step": 3589 }, { "epoch": 0.29, "grad_norm": 4.9324808993077855, "learning_rate": 8.31083928347573e-06, "loss": 0.778, "step": 3590 }, { "epoch": 0.29, "grad_norm": 8.116975026108058, "learning_rate": 8.309853500747016e-06, "loss": 0.6889, "step": 3591 }, { "epoch": 0.29, "grad_norm": 3.6743642447769718, "learning_rate": 8.308867488955602e-06, "loss": 0.7925, "step": 3592 }, { "epoch": 0.29, "grad_norm": 3.1179402791073247, "learning_rate": 8.307881248169722e-06, "loss": 0.6022, "step": 3593 }, { "epoch": 0.29, "grad_norm": 9.107621761143436, "learning_rate": 8.306894778457631e-06, "loss": 0.789, "step": 3594 }, { "epoch": 0.29, "grad_norm": 6.1421653345043365, "learning_rate": 8.3059080798876e-06, "loss": 0.8314, "step": 3595 }, { "epoch": 0.29, "grad_norm": 5.756707212595984, "learning_rate": 8.304921152527915e-06, "loss": 0.7591, "step": 3596 }, { "epoch": 0.29, "grad_norm": 4.267457112052182, "learning_rate": 8.303933996446876e-06, "loss": 0.7158, "step": 3597 }, { "epoch": 0.29, "grad_norm": 3.8107606900174447, "learning_rate": 8.3029466117128e-06, "loss": 0.668, "step": 3598 }, { "epoch": 0.29, "grad_norm": 4.7362959449697355, "learning_rate": 8.301958998394021e-06, "loss": 0.589, "step": 3599 }, { "epoch": 0.29, "grad_norm": 3.2649192657070283, "learning_rate": 8.300971156558892e-06, "loss": 0.8364, "step": 3600 }, { "epoch": 0.29, "grad_norm": 4.720488068861202, "learning_rate": 8.299983086275773e-06, "loss": 0.8166, "step": 3601 }, { "epoch": 0.29, "grad_norm": 16.892930927665617, "learning_rate": 8.298994787613044e-06, "loss": 0.6964, "step": 3602 }, { "epoch": 0.29, "grad_norm": 4.861898063963509, "learning_rate": 8.298006260639106e-06, "loss": 0.7707, "step": 3603 }, { "epoch": 0.29, "grad_norm": 5.178656987316199, "learning_rate": 8.297017505422366e-06, "loss": 0.7489, "step": 3604 }, { "epoch": 0.29, "grad_norm": 4.4784671865672765, "learning_rate": 8.296028522031257e-06, "loss": 0.7794, "step": 3605 }, { "epoch": 0.29, "grad_norm": 9.668476136119686, "learning_rate": 8.295039310534221e-06, "loss": 0.7147, "step": 3606 }, { "epoch": 0.29, "grad_norm": 5.4932237648156725, "learning_rate": 8.294049870999717e-06, "loss": 0.7207, "step": 3607 }, { "epoch": 0.29, "grad_norm": 4.737710658658471, "learning_rate": 8.293060203496219e-06, "loss": 0.6734, "step": 3608 }, { "epoch": 0.29, "grad_norm": 5.90600443219457, "learning_rate": 8.292070308092223e-06, "loss": 0.7794, "step": 3609 }, { "epoch": 0.29, "grad_norm": 10.638777054584876, "learning_rate": 8.291080184856231e-06, "loss": 0.6812, "step": 3610 }, { "epoch": 0.29, "grad_norm": 14.256564123422592, "learning_rate": 8.290089833856769e-06, "loss": 0.834, "step": 3611 }, { "epoch": 0.29, "grad_norm": 11.990025629425269, "learning_rate": 8.289099255162374e-06, "loss": 0.7073, "step": 3612 }, { "epoch": 0.29, "grad_norm": 3.5422846125454366, "learning_rate": 8.288108448841601e-06, "loss": 0.678, "step": 3613 }, { "epoch": 0.29, "grad_norm": 4.150900741587477, "learning_rate": 8.287117414963019e-06, "loss": 0.7254, "step": 3614 }, { "epoch": 0.29, "grad_norm": 7.258771600795925, "learning_rate": 8.286126153595213e-06, "loss": 0.6764, "step": 3615 }, { "epoch": 0.29, "grad_norm": 3.22900983739793, "learning_rate": 8.285134664806788e-06, "loss": 0.7984, "step": 3616 }, { "epoch": 0.29, "grad_norm": 13.989358835704389, "learning_rate": 8.284142948666361e-06, "loss": 0.5343, "step": 3617 }, { "epoch": 0.29, "grad_norm": 2.885262362128362, "learning_rate": 8.28315100524256e-06, "loss": 0.7728, "step": 3618 }, { "epoch": 0.29, "grad_norm": 15.38925584885041, "learning_rate": 8.28215883460404e-06, "loss": 0.737, "step": 3619 }, { "epoch": 0.29, "grad_norm": 8.323833041225676, "learning_rate": 8.281166436819458e-06, "loss": 0.6107, "step": 3620 }, { "epoch": 0.29, "grad_norm": 3.342949124390851, "learning_rate": 8.280173811957503e-06, "loss": 0.7527, "step": 3621 }, { "epoch": 0.29, "grad_norm": 16.728989660947818, "learning_rate": 8.279180960086866e-06, "loss": 0.7969, "step": 3622 }, { "epoch": 0.29, "grad_norm": 13.372498276211944, "learning_rate": 8.278187881276257e-06, "loss": 0.9697, "step": 3623 }, { "epoch": 0.29, "grad_norm": 3.4951571916636768, "learning_rate": 8.277194575594407e-06, "loss": 0.7804, "step": 3624 }, { "epoch": 0.29, "grad_norm": 4.407764803027612, "learning_rate": 8.276201043110057e-06, "loss": 0.6956, "step": 3625 }, { "epoch": 0.29, "grad_norm": 3.1558749371758874, "learning_rate": 8.275207283891967e-06, "loss": 0.7098, "step": 3626 }, { "epoch": 0.29, "grad_norm": 4.1912222347567685, "learning_rate": 8.274213298008908e-06, "loss": 0.803, "step": 3627 }, { "epoch": 0.29, "grad_norm": 2.861561582272143, "learning_rate": 8.273219085529676e-06, "loss": 0.7111, "step": 3628 }, { "epoch": 0.29, "grad_norm": 5.90546243405222, "learning_rate": 8.272224646523072e-06, "loss": 0.7486, "step": 3629 }, { "epoch": 0.29, "grad_norm": 28.710477893550134, "learning_rate": 8.271229981057917e-06, "loss": 0.7903, "step": 3630 }, { "epoch": 0.29, "grad_norm": 9.61738656263696, "learning_rate": 8.270235089203052e-06, "loss": 0.7065, "step": 3631 }, { "epoch": 0.29, "grad_norm": 4.330971256643809, "learning_rate": 8.269239971027328e-06, "loss": 0.7236, "step": 3632 }, { "epoch": 0.3, "grad_norm": 10.476720229592933, "learning_rate": 8.268244626599613e-06, "loss": 0.6467, "step": 3633 }, { "epoch": 0.3, "grad_norm": 7.134023231459432, "learning_rate": 8.267249055988788e-06, "loss": 0.6713, "step": 3634 }, { "epoch": 0.3, "grad_norm": 4.755786462906381, "learning_rate": 8.266253259263758e-06, "loss": 0.6968, "step": 3635 }, { "epoch": 0.3, "grad_norm": 4.110370260727674, "learning_rate": 8.26525723649344e-06, "loss": 0.6632, "step": 3636 }, { "epoch": 0.3, "grad_norm": 6.127947200354974, "learning_rate": 8.264260987746757e-06, "loss": 0.9702, "step": 3637 }, { "epoch": 0.3, "grad_norm": 3.457642874317659, "learning_rate": 8.263264513092662e-06, "loss": 0.5843, "step": 3638 }, { "epoch": 0.3, "grad_norm": 3.0145808515627945, "learning_rate": 8.262267812600116e-06, "loss": 0.7334, "step": 3639 }, { "epoch": 0.3, "grad_norm": 4.637743016152468, "learning_rate": 8.261270886338095e-06, "loss": 0.7659, "step": 3640 }, { "epoch": 0.3, "grad_norm": 4.389122136583996, "learning_rate": 8.260273734375594e-06, "loss": 0.7615, "step": 3641 }, { "epoch": 0.3, "grad_norm": 4.0054671638627735, "learning_rate": 8.259276356781624e-06, "loss": 0.6937, "step": 3642 }, { "epoch": 0.3, "grad_norm": 3.85937232364193, "learning_rate": 8.258278753625207e-06, "loss": 0.6817, "step": 3643 }, { "epoch": 0.3, "grad_norm": 3.6184458301138047, "learning_rate": 8.257280924975384e-06, "loss": 0.8854, "step": 3644 }, { "epoch": 0.3, "grad_norm": 2.040096233880686, "learning_rate": 8.25628287090121e-06, "loss": 0.5259, "step": 3645 }, { "epoch": 0.3, "grad_norm": 6.013349852507329, "learning_rate": 8.255284591471762e-06, "loss": 0.5649, "step": 3646 }, { "epoch": 0.3, "grad_norm": 5.70417966570395, "learning_rate": 8.25428608675612e-06, "loss": 0.7666, "step": 3647 }, { "epoch": 0.3, "grad_norm": 2.70292318410213, "learning_rate": 8.253287356823392e-06, "loss": 0.6846, "step": 3648 }, { "epoch": 0.3, "grad_norm": 6.823091090790905, "learning_rate": 8.252288401742695e-06, "loss": 0.6188, "step": 3649 }, { "epoch": 0.3, "grad_norm": 3.330574465843821, "learning_rate": 8.25128922158316e-06, "loss": 0.6335, "step": 3650 }, { "epoch": 0.3, "grad_norm": 4.092917909725572, "learning_rate": 8.25028981641394e-06, "loss": 0.6873, "step": 3651 }, { "epoch": 0.3, "grad_norm": 3.9567687453204337, "learning_rate": 8.249290186304199e-06, "loss": 0.6845, "step": 3652 }, { "epoch": 0.3, "grad_norm": 5.652588686788329, "learning_rate": 8.24829033132312e-06, "loss": 0.7102, "step": 3653 }, { "epoch": 0.3, "grad_norm": 16.934139524506165, "learning_rate": 8.247290251539894e-06, "loss": 0.843, "step": 3654 }, { "epoch": 0.3, "grad_norm": 4.629057489008848, "learning_rate": 8.246289947023737e-06, "loss": 0.7287, "step": 3655 }, { "epoch": 0.3, "grad_norm": 8.04421697646196, "learning_rate": 8.245289417843877e-06, "loss": 0.7892, "step": 3656 }, { "epoch": 0.3, "grad_norm": 11.157584894799221, "learning_rate": 8.244288664069555e-06, "loss": 0.6364, "step": 3657 }, { "epoch": 0.3, "grad_norm": 3.220427030017744, "learning_rate": 8.243287685770028e-06, "loss": 0.7461, "step": 3658 }, { "epoch": 0.3, "grad_norm": 5.090587417819171, "learning_rate": 8.242286483014572e-06, "loss": 0.6714, "step": 3659 }, { "epoch": 0.3, "grad_norm": 6.824641620491466, "learning_rate": 8.241285055872478e-06, "loss": 0.6592, "step": 3660 }, { "epoch": 0.3, "grad_norm": 3.0371439600628167, "learning_rate": 8.240283404413048e-06, "loss": 0.5896, "step": 3661 }, { "epoch": 0.3, "grad_norm": 7.581295379012853, "learning_rate": 8.239281528705605e-06, "loss": 0.7222, "step": 3662 }, { "epoch": 0.3, "grad_norm": 4.113283987241927, "learning_rate": 8.238279428819482e-06, "loss": 0.8067, "step": 3663 }, { "epoch": 0.3, "grad_norm": 4.028242828895518, "learning_rate": 8.237277104824032e-06, "loss": 0.7602, "step": 3664 }, { "epoch": 0.3, "grad_norm": 2.8527376290444737, "learning_rate": 8.236274556788626e-06, "loss": 0.7552, "step": 3665 }, { "epoch": 0.3, "grad_norm": 2.398252215178266, "learning_rate": 8.235271784782642e-06, "loss": 0.5816, "step": 3666 }, { "epoch": 0.3, "grad_norm": 13.798165023567215, "learning_rate": 8.23426878887548e-06, "loss": 0.7432, "step": 3667 }, { "epoch": 0.3, "grad_norm": 5.764702713114874, "learning_rate": 8.233265569136552e-06, "loss": 0.6469, "step": 3668 }, { "epoch": 0.3, "grad_norm": 3.2083962163269004, "learning_rate": 8.232262125635288e-06, "loss": 0.7711, "step": 3669 }, { "epoch": 0.3, "grad_norm": 13.405610019275006, "learning_rate": 8.231258458441135e-06, "loss": 0.8616, "step": 3670 }, { "epoch": 0.3, "grad_norm": 4.003424065008685, "learning_rate": 8.230254567623548e-06, "loss": 0.6992, "step": 3671 }, { "epoch": 0.3, "grad_norm": 6.808840152647502, "learning_rate": 8.229250453252008e-06, "loss": 0.7909, "step": 3672 }, { "epoch": 0.3, "grad_norm": 27.671924702286308, "learning_rate": 8.228246115396004e-06, "loss": 0.5618, "step": 3673 }, { "epoch": 0.3, "grad_norm": 7.469459501924244, "learning_rate": 8.227241554125041e-06, "loss": 0.7337, "step": 3674 }, { "epoch": 0.3, "grad_norm": 3.8140699490300976, "learning_rate": 8.22623676950864e-06, "loss": 0.8273, "step": 3675 }, { "epoch": 0.3, "grad_norm": 3.747528686031325, "learning_rate": 8.225231761616344e-06, "loss": 0.7381, "step": 3676 }, { "epoch": 0.3, "grad_norm": 22.825994657233448, "learning_rate": 8.2242265305177e-06, "loss": 0.8461, "step": 3677 }, { "epoch": 0.3, "grad_norm": 5.217365869318151, "learning_rate": 8.22322107628228e-06, "loss": 0.7227, "step": 3678 }, { "epoch": 0.3, "grad_norm": 10.506315964182377, "learning_rate": 8.222215398979667e-06, "loss": 0.6625, "step": 3679 }, { "epoch": 0.3, "grad_norm": 5.351009570919189, "learning_rate": 8.221209498679458e-06, "loss": 0.5191, "step": 3680 }, { "epoch": 0.3, "grad_norm": 3.5972851340230574, "learning_rate": 8.22020337545127e-06, "loss": 0.7848, "step": 3681 }, { "epoch": 0.3, "grad_norm": 3.43347290715262, "learning_rate": 8.219197029364733e-06, "loss": 0.7332, "step": 3682 }, { "epoch": 0.3, "grad_norm": 6.618761929906742, "learning_rate": 8.21819046048949e-06, "loss": 0.8361, "step": 3683 }, { "epoch": 0.3, "grad_norm": 3.787197114602886, "learning_rate": 8.217183668895205e-06, "loss": 0.64, "step": 3684 }, { "epoch": 0.3, "grad_norm": 6.301737415393621, "learning_rate": 8.216176654651553e-06, "loss": 0.7579, "step": 3685 }, { "epoch": 0.3, "grad_norm": 4.193477279991484, "learning_rate": 8.215169417828226e-06, "loss": 0.722, "step": 3686 }, { "epoch": 0.3, "grad_norm": 3.155925487295511, "learning_rate": 8.214161958494931e-06, "loss": 0.7473, "step": 3687 }, { "epoch": 0.3, "grad_norm": 8.361390389888049, "learning_rate": 8.213154276721388e-06, "loss": 0.7481, "step": 3688 }, { "epoch": 0.3, "grad_norm": 4.862336838419223, "learning_rate": 8.212146372577342e-06, "loss": 0.7305, "step": 3689 }, { "epoch": 0.3, "grad_norm": 3.4286693281464604, "learning_rate": 8.211138246132537e-06, "loss": 0.8738, "step": 3690 }, { "epoch": 0.3, "grad_norm": 3.6446888336077645, "learning_rate": 8.21012989745675e-06, "loss": 0.8209, "step": 3691 }, { "epoch": 0.3, "grad_norm": 4.652062931231507, "learning_rate": 8.20912132661976e-06, "loss": 0.8877, "step": 3692 }, { "epoch": 0.3, "grad_norm": 4.715650488218229, "learning_rate": 8.208112533691367e-06, "loss": 0.7064, "step": 3693 }, { "epoch": 0.3, "grad_norm": 4.308412838971919, "learning_rate": 8.207103518741388e-06, "loss": 0.801, "step": 3694 }, { "epoch": 0.3, "grad_norm": 9.56051350880958, "learning_rate": 8.20609428183965e-06, "loss": 0.7208, "step": 3695 }, { "epoch": 0.3, "grad_norm": 33.76159425314962, "learning_rate": 8.205084823056003e-06, "loss": 0.8033, "step": 3696 }, { "epoch": 0.3, "grad_norm": 3.2218662127313467, "learning_rate": 8.204075142460305e-06, "loss": 0.8255, "step": 3697 }, { "epoch": 0.3, "grad_norm": 3.06119888507931, "learning_rate": 8.20306524012243e-06, "loss": 0.6171, "step": 3698 }, { "epoch": 0.3, "grad_norm": 3.121401973897496, "learning_rate": 8.202055116112275e-06, "loss": 0.7353, "step": 3699 }, { "epoch": 0.3, "grad_norm": 4.16319907242963, "learning_rate": 8.201044770499743e-06, "loss": 0.6991, "step": 3700 }, { "epoch": 0.3, "grad_norm": 3.269329925980588, "learning_rate": 8.200034203354758e-06, "loss": 0.771, "step": 3701 }, { "epoch": 0.3, "grad_norm": 4.2595357695144465, "learning_rate": 8.199023414747257e-06, "loss": 0.6551, "step": 3702 }, { "epoch": 0.3, "grad_norm": 16.02140458308134, "learning_rate": 8.198012404747192e-06, "loss": 0.5345, "step": 3703 }, { "epoch": 0.3, "grad_norm": 3.228032581669579, "learning_rate": 8.197001173424533e-06, "loss": 0.6517, "step": 3704 }, { "epoch": 0.3, "grad_norm": 3.949558875655524, "learning_rate": 8.195989720849262e-06, "loss": 0.8581, "step": 3705 }, { "epoch": 0.3, "grad_norm": 5.104777399217436, "learning_rate": 8.19497804709138e-06, "loss": 0.6803, "step": 3706 }, { "epoch": 0.3, "grad_norm": 11.0962878218219, "learning_rate": 8.1939661522209e-06, "loss": 0.6335, "step": 3707 }, { "epoch": 0.3, "grad_norm": 3.8814262311878434, "learning_rate": 8.192954036307849e-06, "loss": 0.6256, "step": 3708 }, { "epoch": 0.3, "grad_norm": 3.650576332196466, "learning_rate": 8.191941699422276e-06, "loss": 0.5718, "step": 3709 }, { "epoch": 0.3, "grad_norm": 9.402264979420416, "learning_rate": 8.19092914163424e-06, "loss": 0.6868, "step": 3710 }, { "epoch": 0.3, "grad_norm": 4.149492229222343, "learning_rate": 8.189916363013815e-06, "loss": 0.7222, "step": 3711 }, { "epoch": 0.3, "grad_norm": 2.884462249735279, "learning_rate": 8.188903363631092e-06, "loss": 0.6119, "step": 3712 }, { "epoch": 0.3, "grad_norm": 9.582224929265774, "learning_rate": 8.187890143556178e-06, "loss": 0.8185, "step": 3713 }, { "epoch": 0.3, "grad_norm": 3.835798100412109, "learning_rate": 8.186876702859192e-06, "loss": 0.6862, "step": 3714 }, { "epoch": 0.3, "grad_norm": 2.9582423619020948, "learning_rate": 8.185863041610273e-06, "loss": 0.8561, "step": 3715 }, { "epoch": 0.3, "grad_norm": 4.441781979271604, "learning_rate": 8.18484915987957e-06, "loss": 0.7729, "step": 3716 }, { "epoch": 0.3, "grad_norm": 4.968029214728309, "learning_rate": 8.183835057737256e-06, "loss": 0.6525, "step": 3717 }, { "epoch": 0.3, "grad_norm": 2.5392518946270815, "learning_rate": 8.182820735253504e-06, "loss": 0.6627, "step": 3718 }, { "epoch": 0.3, "grad_norm": 4.5045341141325865, "learning_rate": 8.181806192498518e-06, "loss": 0.8008, "step": 3719 }, { "epoch": 0.3, "grad_norm": 3.522173499385073, "learning_rate": 8.18079142954251e-06, "loss": 0.7786, "step": 3720 }, { "epoch": 0.3, "grad_norm": 4.654976094078377, "learning_rate": 8.179776446455707e-06, "loss": 0.658, "step": 3721 }, { "epoch": 0.3, "grad_norm": 4.5458818280687945, "learning_rate": 8.178761243308353e-06, "loss": 0.7548, "step": 3722 }, { "epoch": 0.3, "grad_norm": 3.226859149071264, "learning_rate": 8.177745820170705e-06, "loss": 0.6415, "step": 3723 }, { "epoch": 0.3, "grad_norm": 4.809055215778432, "learning_rate": 8.176730177113037e-06, "loss": 0.6932, "step": 3724 }, { "epoch": 0.3, "grad_norm": 4.3931875738987625, "learning_rate": 8.175714314205639e-06, "loss": 0.5867, "step": 3725 }, { "epoch": 0.3, "grad_norm": 10.929005429797298, "learning_rate": 8.174698231518813e-06, "loss": 0.6594, "step": 3726 }, { "epoch": 0.3, "grad_norm": 3.6819152021342068, "learning_rate": 8.173681929122883e-06, "loss": 0.8414, "step": 3727 }, { "epoch": 0.3, "grad_norm": 5.620990990818821, "learning_rate": 8.172665407088178e-06, "loss": 0.7122, "step": 3728 }, { "epoch": 0.3, "grad_norm": 11.723919229690786, "learning_rate": 8.17164866548505e-06, "loss": 0.6452, "step": 3729 }, { "epoch": 0.3, "grad_norm": 3.666451843600515, "learning_rate": 8.170631704383865e-06, "loss": 0.6022, "step": 3730 }, { "epoch": 0.3, "grad_norm": 4.915128103429705, "learning_rate": 8.169614523855001e-06, "loss": 0.7327, "step": 3731 }, { "epoch": 0.3, "grad_norm": 4.190277250855935, "learning_rate": 8.168597123968857e-06, "loss": 0.6404, "step": 3732 }, { "epoch": 0.3, "grad_norm": 3.551149035740589, "learning_rate": 8.167579504795838e-06, "loss": 0.6042, "step": 3733 }, { "epoch": 0.3, "grad_norm": 3.7588664695071636, "learning_rate": 8.166561666406374e-06, "loss": 0.6908, "step": 3734 }, { "epoch": 0.3, "grad_norm": 4.690428244975542, "learning_rate": 8.165543608870906e-06, "loss": 0.5072, "step": 3735 }, { "epoch": 0.3, "grad_norm": 5.334374688163294, "learning_rate": 8.164525332259884e-06, "loss": 0.6226, "step": 3736 }, { "epoch": 0.3, "grad_norm": 3.626404541421322, "learning_rate": 8.163506836643787e-06, "loss": 0.8602, "step": 3737 }, { "epoch": 0.3, "grad_norm": 2.8301599530963757, "learning_rate": 8.162488122093095e-06, "loss": 0.6091, "step": 3738 }, { "epoch": 0.3, "grad_norm": 4.416725221469185, "learning_rate": 8.161469188678315e-06, "loss": 0.5649, "step": 3739 }, { "epoch": 0.3, "grad_norm": 2.920413777212486, "learning_rate": 8.16045003646996e-06, "loss": 0.6782, "step": 3740 }, { "epoch": 0.3, "grad_norm": 14.40707580450295, "learning_rate": 8.159430665538561e-06, "loss": 0.9344, "step": 3741 }, { "epoch": 0.3, "grad_norm": 4.634730500073284, "learning_rate": 8.158411075954669e-06, "loss": 0.7096, "step": 3742 }, { "epoch": 0.3, "grad_norm": 5.23791572111884, "learning_rate": 8.157391267788842e-06, "loss": 0.7501, "step": 3743 }, { "epoch": 0.3, "grad_norm": 3.9980562219990357, "learning_rate": 8.15637124111166e-06, "loss": 0.6696, "step": 3744 }, { "epoch": 0.3, "grad_norm": 3.1546970890797126, "learning_rate": 8.155350995993713e-06, "loss": 0.6542, "step": 3745 }, { "epoch": 0.3, "grad_norm": 4.397072615644264, "learning_rate": 8.15433053250561e-06, "loss": 0.6655, "step": 3746 }, { "epoch": 0.3, "grad_norm": 3.309917802773555, "learning_rate": 8.153309850717973e-06, "loss": 0.848, "step": 3747 }, { "epoch": 0.3, "grad_norm": 4.106108013638399, "learning_rate": 8.152288950701437e-06, "loss": 0.7339, "step": 3748 }, { "epoch": 0.3, "grad_norm": 2.5119122903194873, "learning_rate": 8.151267832526658e-06, "loss": 0.6879, "step": 3749 }, { "epoch": 0.3, "grad_norm": 19.681356443712104, "learning_rate": 8.150246496264304e-06, "loss": 0.7779, "step": 3750 }, { "epoch": 0.3, "grad_norm": 3.1975643451556115, "learning_rate": 8.149224941985058e-06, "loss": 0.675, "step": 3751 }, { "epoch": 0.3, "grad_norm": 3.8651322132765658, "learning_rate": 8.148203169759617e-06, "loss": 0.8486, "step": 3752 }, { "epoch": 0.3, "grad_norm": 4.388062201438417, "learning_rate": 8.14718117965869e-06, "loss": 0.7546, "step": 3753 }, { "epoch": 0.3, "grad_norm": 5.081302613724464, "learning_rate": 8.146158971753013e-06, "loss": 0.8501, "step": 3754 }, { "epoch": 0.3, "grad_norm": 2.3703340241905386, "learning_rate": 8.145136546113323e-06, "loss": 0.5522, "step": 3755 }, { "epoch": 0.31, "grad_norm": 2.837232088800711, "learning_rate": 8.144113902810383e-06, "loss": 0.73, "step": 3756 }, { "epoch": 0.31, "grad_norm": 3.948321468695182, "learning_rate": 8.143091041914962e-06, "loss": 0.6876, "step": 3757 }, { "epoch": 0.31, "grad_norm": 13.642195500446972, "learning_rate": 8.14206796349785e-06, "loss": 0.6109, "step": 3758 }, { "epoch": 0.31, "grad_norm": 3.3087815498278923, "learning_rate": 8.141044667629852e-06, "loss": 0.5886, "step": 3759 }, { "epoch": 0.31, "grad_norm": 3.3378795789401368, "learning_rate": 8.140021154381786e-06, "loss": 0.7511, "step": 3760 }, { "epoch": 0.31, "grad_norm": 2.353272407476943, "learning_rate": 8.138997423824483e-06, "loss": 0.721, "step": 3761 }, { "epoch": 0.31, "grad_norm": 2.7848418121481506, "learning_rate": 8.137973476028795e-06, "loss": 0.8508, "step": 3762 }, { "epoch": 0.31, "grad_norm": 9.008549712860148, "learning_rate": 8.136949311065583e-06, "loss": 0.5955, "step": 3763 }, { "epoch": 0.31, "grad_norm": 7.3202003930463135, "learning_rate": 8.135924929005728e-06, "loss": 0.6649, "step": 3764 }, { "epoch": 0.31, "grad_norm": 2.779952441739666, "learning_rate": 8.134900329920121e-06, "loss": 0.6654, "step": 3765 }, { "epoch": 0.31, "grad_norm": 7.615992101317586, "learning_rate": 8.133875513879675e-06, "loss": 0.7246, "step": 3766 }, { "epoch": 0.31, "grad_norm": 3.1962301378940468, "learning_rate": 8.132850480955307e-06, "loss": 0.5659, "step": 3767 }, { "epoch": 0.31, "grad_norm": 5.0673075297467, "learning_rate": 8.131825231217962e-06, "loss": 0.692, "step": 3768 }, { "epoch": 0.31, "grad_norm": 5.093738633189581, "learning_rate": 8.130799764738591e-06, "loss": 0.6394, "step": 3769 }, { "epoch": 0.31, "grad_norm": 3.546881923059605, "learning_rate": 8.129774081588164e-06, "loss": 0.8117, "step": 3770 }, { "epoch": 0.31, "grad_norm": 4.468497928599216, "learning_rate": 8.128748181837662e-06, "loss": 0.7291, "step": 3771 }, { "epoch": 0.31, "grad_norm": 4.426907273653148, "learning_rate": 8.127722065558087e-06, "loss": 0.6537, "step": 3772 }, { "epoch": 0.31, "grad_norm": 5.037250436580407, "learning_rate": 8.12669573282045e-06, "loss": 0.7802, "step": 3773 }, { "epoch": 0.31, "grad_norm": 3.3550861426249994, "learning_rate": 8.125669183695784e-06, "loss": 0.7178, "step": 3774 }, { "epoch": 0.31, "grad_norm": 3.9552158515403444, "learning_rate": 8.124642418255127e-06, "loss": 0.7624, "step": 3775 }, { "epoch": 0.31, "grad_norm": 4.404467342782481, "learning_rate": 8.12361543656954e-06, "loss": 0.7331, "step": 3776 }, { "epoch": 0.31, "grad_norm": 3.4965353440458755, "learning_rate": 8.122588238710098e-06, "loss": 0.6302, "step": 3777 }, { "epoch": 0.31, "grad_norm": 3.879138548035437, "learning_rate": 8.121560824747889e-06, "loss": 0.6388, "step": 3778 }, { "epoch": 0.31, "grad_norm": 27.99905130815318, "learning_rate": 8.120533194754015e-06, "loss": 0.6645, "step": 3779 }, { "epoch": 0.31, "grad_norm": 3.4101101232374416, "learning_rate": 8.119505348799595e-06, "loss": 0.8705, "step": 3780 }, { "epoch": 0.31, "grad_norm": 3.2128273612029, "learning_rate": 8.118477286955764e-06, "loss": 0.754, "step": 3781 }, { "epoch": 0.31, "grad_norm": 3.376333074694725, "learning_rate": 8.117449009293668e-06, "loss": 0.6783, "step": 3782 }, { "epoch": 0.31, "grad_norm": 3.829956856229782, "learning_rate": 8.116420515884473e-06, "loss": 0.7888, "step": 3783 }, { "epoch": 0.31, "grad_norm": 2.4042293291090195, "learning_rate": 8.115391806799354e-06, "loss": 0.6911, "step": 3784 }, { "epoch": 0.31, "grad_norm": 2.9572527336676777, "learning_rate": 8.114362882109507e-06, "loss": 0.6015, "step": 3785 }, { "epoch": 0.31, "grad_norm": 2.740495907573269, "learning_rate": 8.113333741886137e-06, "loss": 0.8441, "step": 3786 }, { "epoch": 0.31, "grad_norm": 3.537452524642415, "learning_rate": 8.11230438620047e-06, "loss": 0.6159, "step": 3787 }, { "epoch": 0.31, "grad_norm": 6.220159024018744, "learning_rate": 8.111274815123746e-06, "loss": 0.701, "step": 3788 }, { "epoch": 0.31, "grad_norm": 4.220892546241309, "learning_rate": 8.110245028727211e-06, "loss": 0.7133, "step": 3789 }, { "epoch": 0.31, "grad_norm": 6.438574183988354, "learning_rate": 8.109215027082137e-06, "loss": 0.8488, "step": 3790 }, { "epoch": 0.31, "grad_norm": 4.320626595467487, "learning_rate": 8.108184810259806e-06, "loss": 0.6818, "step": 3791 }, { "epoch": 0.31, "grad_norm": 21.409502765716667, "learning_rate": 8.107154378331515e-06, "loss": 0.745, "step": 3792 }, { "epoch": 0.31, "grad_norm": 2.8557435853692428, "learning_rate": 8.106123731368579e-06, "loss": 0.8006, "step": 3793 }, { "epoch": 0.31, "grad_norm": 3.9262834880074604, "learning_rate": 8.10509286944232e-06, "loss": 0.9271, "step": 3794 }, { "epoch": 0.31, "grad_norm": 21.228192529370176, "learning_rate": 8.104061792624085e-06, "loss": 0.7205, "step": 3795 }, { "epoch": 0.31, "grad_norm": 4.580604934591343, "learning_rate": 8.103030500985227e-06, "loss": 0.783, "step": 3796 }, { "epoch": 0.31, "grad_norm": 3.712073932733611, "learning_rate": 8.101998994597123e-06, "loss": 0.6982, "step": 3797 }, { "epoch": 0.31, "grad_norm": 4.201304396840697, "learning_rate": 8.100967273531154e-06, "loss": 0.75, "step": 3798 }, { "epoch": 0.31, "grad_norm": 3.37374017557371, "learning_rate": 8.099935337858726e-06, "loss": 0.7596, "step": 3799 }, { "epoch": 0.31, "grad_norm": 4.209357533596984, "learning_rate": 8.098903187651252e-06, "loss": 0.6863, "step": 3800 }, { "epoch": 0.31, "grad_norm": 4.480290511002427, "learning_rate": 8.097870822980166e-06, "loss": 0.7374, "step": 3801 }, { "epoch": 0.31, "grad_norm": 3.549953778624068, "learning_rate": 8.096838243916916e-06, "loss": 0.6068, "step": 3802 }, { "epoch": 0.31, "grad_norm": 4.8415110662599625, "learning_rate": 8.095805450532957e-06, "loss": 0.6972, "step": 3803 }, { "epoch": 0.31, "grad_norm": 4.551316528601511, "learning_rate": 8.09477244289977e-06, "loss": 0.6196, "step": 3804 }, { "epoch": 0.31, "grad_norm": 7.3475027891572475, "learning_rate": 8.093739221088842e-06, "loss": 0.6489, "step": 3805 }, { "epoch": 0.31, "grad_norm": 2.7883605417197868, "learning_rate": 8.09270578517168e-06, "loss": 0.8722, "step": 3806 }, { "epoch": 0.31, "grad_norm": 2.822608041368121, "learning_rate": 8.091672135219805e-06, "loss": 0.7387, "step": 3807 }, { "epoch": 0.31, "grad_norm": 3.925695166219304, "learning_rate": 8.090638271304754e-06, "loss": 0.6987, "step": 3808 }, { "epoch": 0.31, "grad_norm": 3.0358414295226566, "learning_rate": 8.08960419349807e-06, "loss": 0.6139, "step": 3809 }, { "epoch": 0.31, "grad_norm": 14.760050401619537, "learning_rate": 8.088569901871325e-06, "loss": 0.7213, "step": 3810 }, { "epoch": 0.31, "grad_norm": 3.351132418502165, "learning_rate": 8.087535396496093e-06, "loss": 0.7162, "step": 3811 }, { "epoch": 0.31, "grad_norm": 2.9944465077998217, "learning_rate": 8.086500677443974e-06, "loss": 0.6915, "step": 3812 }, { "epoch": 0.31, "grad_norm": 3.17940947655128, "learning_rate": 8.085465744786572e-06, "loss": 0.7636, "step": 3813 }, { "epoch": 0.31, "grad_norm": 3.0347479813539513, "learning_rate": 8.084430598595514e-06, "loss": 0.65, "step": 3814 }, { "epoch": 0.31, "grad_norm": 8.387621966752047, "learning_rate": 8.083395238942437e-06, "loss": 0.6454, "step": 3815 }, { "epoch": 0.31, "grad_norm": 2.5776737410641046, "learning_rate": 8.082359665898994e-06, "loss": 0.5806, "step": 3816 }, { "epoch": 0.31, "grad_norm": 3.4223821327636186, "learning_rate": 8.081323879536854e-06, "loss": 0.7425, "step": 3817 }, { "epoch": 0.31, "grad_norm": 3.7529775593871144, "learning_rate": 8.0802878799277e-06, "loss": 0.6963, "step": 3818 }, { "epoch": 0.31, "grad_norm": 2.6405069508913654, "learning_rate": 8.079251667143229e-06, "loss": 0.7639, "step": 3819 }, { "epoch": 0.31, "grad_norm": 3.637474970576809, "learning_rate": 8.078215241255156e-06, "loss": 0.8213, "step": 3820 }, { "epoch": 0.31, "grad_norm": 2.4993380341655866, "learning_rate": 8.077178602335204e-06, "loss": 0.7573, "step": 3821 }, { "epoch": 0.31, "grad_norm": 2.90511779672793, "learning_rate": 8.076141750455119e-06, "loss": 0.6998, "step": 3822 }, { "epoch": 0.31, "grad_norm": 2.4960415586852878, "learning_rate": 8.075104685686655e-06, "loss": 0.6006, "step": 3823 }, { "epoch": 0.31, "grad_norm": 4.898140379041115, "learning_rate": 8.074067408101585e-06, "loss": 0.7718, "step": 3824 }, { "epoch": 0.31, "grad_norm": 4.661629721662624, "learning_rate": 8.073029917771692e-06, "loss": 0.7093, "step": 3825 }, { "epoch": 0.31, "grad_norm": 2.8367074784109696, "learning_rate": 8.071992214768783e-06, "loss": 0.8326, "step": 3826 }, { "epoch": 0.31, "grad_norm": 2.9893009979048797, "learning_rate": 8.070954299164668e-06, "loss": 0.7114, "step": 3827 }, { "epoch": 0.31, "grad_norm": 2.7184241696259988, "learning_rate": 8.069916171031181e-06, "loss": 0.7541, "step": 3828 }, { "epoch": 0.31, "grad_norm": 4.608302419770265, "learning_rate": 8.068877830440162e-06, "loss": 0.7252, "step": 3829 }, { "epoch": 0.31, "grad_norm": 4.772772539385872, "learning_rate": 8.067839277463475e-06, "loss": 0.7514, "step": 3830 }, { "epoch": 0.31, "grad_norm": 7.42560705038805, "learning_rate": 8.066800512172994e-06, "loss": 0.9019, "step": 3831 }, { "epoch": 0.31, "grad_norm": 3.5132295504116358, "learning_rate": 8.065761534640606e-06, "loss": 0.7021, "step": 3832 }, { "epoch": 0.31, "grad_norm": 7.792408664433977, "learning_rate": 8.064722344938218e-06, "loss": 0.5339, "step": 3833 }, { "epoch": 0.31, "grad_norm": 3.7994299304054078, "learning_rate": 8.063682943137745e-06, "loss": 0.717, "step": 3834 }, { "epoch": 0.31, "grad_norm": 3.4985436004169403, "learning_rate": 8.062643329311123e-06, "loss": 0.6081, "step": 3835 }, { "epoch": 0.31, "grad_norm": 3.661006060019155, "learning_rate": 8.061603503530298e-06, "loss": 0.629, "step": 3836 }, { "epoch": 0.31, "grad_norm": 4.401692786571959, "learning_rate": 8.060563465867232e-06, "loss": 0.6103, "step": 3837 }, { "epoch": 0.31, "grad_norm": 3.190241383371681, "learning_rate": 8.059523216393907e-06, "loss": 0.5868, "step": 3838 }, { "epoch": 0.31, "grad_norm": 7.806495671361692, "learning_rate": 8.058482755182309e-06, "loss": 0.6442, "step": 3839 }, { "epoch": 0.31, "grad_norm": 4.627446392232139, "learning_rate": 8.057442082304445e-06, "loss": 0.6792, "step": 3840 }, { "epoch": 0.31, "grad_norm": 3.407444953752837, "learning_rate": 8.05640119783234e-06, "loss": 0.7449, "step": 3841 }, { "epoch": 0.31, "grad_norm": 2.867512216974582, "learning_rate": 8.055360101838026e-06, "loss": 0.657, "step": 3842 }, { "epoch": 0.31, "grad_norm": 3.631023611547521, "learning_rate": 8.054318794393554e-06, "loss": 0.7608, "step": 3843 }, { "epoch": 0.31, "grad_norm": 4.3522360411418815, "learning_rate": 8.05327727557099e-06, "loss": 0.6165, "step": 3844 }, { "epoch": 0.31, "grad_norm": 6.8773670097717385, "learning_rate": 8.052235545442416e-06, "loss": 0.8298, "step": 3845 }, { "epoch": 0.31, "grad_norm": 2.7943439718396124, "learning_rate": 8.051193604079921e-06, "loss": 0.6853, "step": 3846 }, { "epoch": 0.31, "grad_norm": 3.2710406852298743, "learning_rate": 8.05015145155562e-06, "loss": 0.6216, "step": 3847 }, { "epoch": 0.31, "grad_norm": 3.422232158520331, "learning_rate": 8.04910908794163e-06, "loss": 0.5977, "step": 3848 }, { "epoch": 0.31, "grad_norm": 2.9666381291830604, "learning_rate": 8.048066513310093e-06, "loss": 0.6585, "step": 3849 }, { "epoch": 0.31, "grad_norm": 2.5115687230386263, "learning_rate": 8.047023727733162e-06, "loss": 0.7279, "step": 3850 }, { "epoch": 0.31, "grad_norm": 3.7143353322138837, "learning_rate": 8.045980731283002e-06, "loss": 0.7238, "step": 3851 }, { "epoch": 0.31, "grad_norm": 3.2578923529505577, "learning_rate": 8.044937524031798e-06, "loss": 0.652, "step": 3852 }, { "epoch": 0.31, "grad_norm": 5.020446780126338, "learning_rate": 8.043894106051743e-06, "loss": 0.7128, "step": 3853 }, { "epoch": 0.31, "grad_norm": 4.1552285674975105, "learning_rate": 8.042850477415052e-06, "loss": 0.7723, "step": 3854 }, { "epoch": 0.31, "grad_norm": 3.8896819488437973, "learning_rate": 8.041806638193948e-06, "loss": 0.8213, "step": 3855 }, { "epoch": 0.31, "grad_norm": 2.5192806884695997, "learning_rate": 8.04076258846067e-06, "loss": 0.7643, "step": 3856 }, { "epoch": 0.31, "grad_norm": 4.8767008503737435, "learning_rate": 8.039718328287478e-06, "loss": 0.6367, "step": 3857 }, { "epoch": 0.31, "grad_norm": 3.7011601281168387, "learning_rate": 8.038673857746636e-06, "loss": 0.7832, "step": 3858 }, { "epoch": 0.31, "grad_norm": 4.006813002241106, "learning_rate": 8.03762917691043e-06, "loss": 0.7053, "step": 3859 }, { "epoch": 0.31, "grad_norm": 2.8536675590287, "learning_rate": 8.03658428585116e-06, "loss": 0.7103, "step": 3860 }, { "epoch": 0.31, "grad_norm": 2.96592025655951, "learning_rate": 8.035539184641134e-06, "loss": 0.5904, "step": 3861 }, { "epoch": 0.31, "grad_norm": 3.9042987888483203, "learning_rate": 8.034493873352685e-06, "loss": 0.6634, "step": 3862 }, { "epoch": 0.31, "grad_norm": 2.6581721103457925, "learning_rate": 8.033448352058155e-06, "loss": 0.6537, "step": 3863 }, { "epoch": 0.31, "grad_norm": 5.482733499186662, "learning_rate": 8.032402620829895e-06, "loss": 0.6193, "step": 3864 }, { "epoch": 0.31, "grad_norm": 3.686880810322882, "learning_rate": 8.031356679740283e-06, "loss": 0.7373, "step": 3865 }, { "epoch": 0.31, "grad_norm": 2.72885750004841, "learning_rate": 8.030310528861703e-06, "loss": 0.9408, "step": 3866 }, { "epoch": 0.31, "grad_norm": 4.228982453368367, "learning_rate": 8.02926416826655e-06, "loss": 0.7633, "step": 3867 }, { "epoch": 0.31, "grad_norm": 3.642871831297605, "learning_rate": 8.028217598027247e-06, "loss": 0.8684, "step": 3868 }, { "epoch": 0.31, "grad_norm": 2.8614605115837417, "learning_rate": 8.027170818216215e-06, "loss": 0.6858, "step": 3869 }, { "epoch": 0.31, "grad_norm": 2.4204190947417823, "learning_rate": 8.026123828905902e-06, "loss": 0.7924, "step": 3870 }, { "epoch": 0.31, "grad_norm": 6.850563297247912, "learning_rate": 8.025076630168769e-06, "loss": 0.7901, "step": 3871 }, { "epoch": 0.31, "grad_norm": 4.068546960885706, "learning_rate": 8.024029222077286e-06, "loss": 0.5381, "step": 3872 }, { "epoch": 0.31, "grad_norm": 3.6982159833354946, "learning_rate": 8.022981604703937e-06, "loss": 0.7101, "step": 3873 }, { "epoch": 0.31, "grad_norm": 4.9032663721247065, "learning_rate": 8.021933778121227e-06, "loss": 0.6717, "step": 3874 }, { "epoch": 0.31, "grad_norm": 2.1326191524631852, "learning_rate": 8.020885742401675e-06, "loss": 0.7521, "step": 3875 }, { "epoch": 0.31, "grad_norm": 2.922491285521148, "learning_rate": 8.019837497617804e-06, "loss": 0.7085, "step": 3876 }, { "epoch": 0.31, "grad_norm": 4.339102430661286, "learning_rate": 8.018789043842166e-06, "loss": 0.6647, "step": 3877 }, { "epoch": 0.31, "grad_norm": 3.7043738162032946, "learning_rate": 8.017740381147319e-06, "loss": 0.7106, "step": 3878 }, { "epoch": 0.32, "grad_norm": 3.600741001391441, "learning_rate": 8.016691509605836e-06, "loss": 0.5931, "step": 3879 }, { "epoch": 0.32, "grad_norm": 2.500302960993017, "learning_rate": 8.015642429290304e-06, "loss": 0.7905, "step": 3880 }, { "epoch": 0.32, "grad_norm": 6.646574277323032, "learning_rate": 8.01459314027333e-06, "loss": 0.7212, "step": 3881 }, { "epoch": 0.32, "grad_norm": 2.9361183762790195, "learning_rate": 8.013543642627529e-06, "loss": 0.7837, "step": 3882 }, { "epoch": 0.32, "grad_norm": 2.5386458953787736, "learning_rate": 8.012493936425532e-06, "loss": 0.7738, "step": 3883 }, { "epoch": 0.32, "grad_norm": 4.561581248764552, "learning_rate": 8.011444021739986e-06, "loss": 0.6709, "step": 3884 }, { "epoch": 0.32, "grad_norm": 3.427316464110523, "learning_rate": 8.010393898643555e-06, "loss": 0.6204, "step": 3885 }, { "epoch": 0.32, "grad_norm": 3.082382707597153, "learning_rate": 8.009343567208909e-06, "loss": 0.6868, "step": 3886 }, { "epoch": 0.32, "grad_norm": 3.271984413705368, "learning_rate": 8.00829302750874e-06, "loss": 0.7346, "step": 3887 }, { "epoch": 0.32, "grad_norm": 3.9674644683223046, "learning_rate": 8.007242279615752e-06, "loss": 0.7115, "step": 3888 }, { "epoch": 0.32, "grad_norm": 8.355456734207701, "learning_rate": 8.006191323602663e-06, "loss": 0.7421, "step": 3889 }, { "epoch": 0.32, "grad_norm": 5.644927970513903, "learning_rate": 8.005140159542206e-06, "loss": 0.6537, "step": 3890 }, { "epoch": 0.32, "grad_norm": 2.740662912736704, "learning_rate": 8.004088787507128e-06, "loss": 0.7205, "step": 3891 }, { "epoch": 0.32, "grad_norm": 3.245086876737402, "learning_rate": 8.00303720757019e-06, "loss": 0.7096, "step": 3892 }, { "epoch": 0.32, "grad_norm": 2.6166627426331432, "learning_rate": 8.00198541980417e-06, "loss": 0.8382, "step": 3893 }, { "epoch": 0.32, "grad_norm": 4.279142322343546, "learning_rate": 8.000933424281856e-06, "loss": 0.8966, "step": 3894 }, { "epoch": 0.32, "grad_norm": 3.046516627699684, "learning_rate": 7.999881221076054e-06, "loss": 0.7695, "step": 3895 }, { "epoch": 0.32, "grad_norm": 5.149416183217677, "learning_rate": 7.998828810259581e-06, "loss": 0.7099, "step": 3896 }, { "epoch": 0.32, "grad_norm": 3.4371758680033433, "learning_rate": 7.997776191905273e-06, "loss": 0.6788, "step": 3897 }, { "epoch": 0.32, "grad_norm": 3.112988680951314, "learning_rate": 7.996723366085978e-06, "loss": 0.7384, "step": 3898 }, { "epoch": 0.32, "grad_norm": 2.9164236741212255, "learning_rate": 7.995670332874556e-06, "loss": 0.7185, "step": 3899 }, { "epoch": 0.32, "grad_norm": 3.37605795294584, "learning_rate": 7.994617092343885e-06, "loss": 0.7222, "step": 3900 }, { "epoch": 0.32, "grad_norm": 3.322943911841214, "learning_rate": 7.993563644566856e-06, "loss": 0.6943, "step": 3901 }, { "epoch": 0.32, "grad_norm": 3.1120697485803515, "learning_rate": 7.992509989616373e-06, "loss": 0.6316, "step": 3902 }, { "epoch": 0.32, "grad_norm": 3.4396877999425595, "learning_rate": 7.991456127565357e-06, "loss": 0.6001, "step": 3903 }, { "epoch": 0.32, "grad_norm": 2.622676151672132, "learning_rate": 7.990402058486742e-06, "loss": 0.7524, "step": 3904 }, { "epoch": 0.32, "grad_norm": 7.481288424645567, "learning_rate": 7.989347782453473e-06, "loss": 0.7567, "step": 3905 }, { "epoch": 0.32, "grad_norm": 3.1560061965951647, "learning_rate": 7.988293299538516e-06, "loss": 0.6474, "step": 3906 }, { "epoch": 0.32, "grad_norm": 3.4366162184737896, "learning_rate": 7.987238609814848e-06, "loss": 0.6249, "step": 3907 }, { "epoch": 0.32, "grad_norm": 2.453829820837504, "learning_rate": 7.986183713355458e-06, "loss": 0.8548, "step": 3908 }, { "epoch": 0.32, "grad_norm": 3.5821712677389512, "learning_rate": 7.985128610233353e-06, "loss": 0.7608, "step": 3909 }, { "epoch": 0.32, "grad_norm": 2.454011600103875, "learning_rate": 7.984073300521552e-06, "loss": 0.7452, "step": 3910 }, { "epoch": 0.32, "grad_norm": 3.600422892100495, "learning_rate": 7.983017784293088e-06, "loss": 0.7923, "step": 3911 }, { "epoch": 0.32, "grad_norm": 2.6557279994683998, "learning_rate": 7.981962061621012e-06, "loss": 0.6576, "step": 3912 }, { "epoch": 0.32, "grad_norm": 3.3142445035826054, "learning_rate": 7.980906132578386e-06, "loss": 0.6326, "step": 3913 }, { "epoch": 0.32, "grad_norm": 5.058565457460089, "learning_rate": 7.979849997238284e-06, "loss": 0.6233, "step": 3914 }, { "epoch": 0.32, "grad_norm": 3.1961231368546623, "learning_rate": 7.978793655673803e-06, "loss": 0.6064, "step": 3915 }, { "epoch": 0.32, "grad_norm": 2.8958684933834777, "learning_rate": 7.977737107958042e-06, "loss": 0.67, "step": 3916 }, { "epoch": 0.32, "grad_norm": 3.1444688522029964, "learning_rate": 7.976680354164124e-06, "loss": 0.6008, "step": 3917 }, { "epoch": 0.32, "grad_norm": 2.6625101019817103, "learning_rate": 7.975623394365184e-06, "loss": 0.6148, "step": 3918 }, { "epoch": 0.32, "grad_norm": 6.888914690748621, "learning_rate": 7.974566228634369e-06, "loss": 0.7404, "step": 3919 }, { "epoch": 0.32, "grad_norm": 4.028543995166718, "learning_rate": 7.97350885704484e-06, "loss": 0.6674, "step": 3920 }, { "epoch": 0.32, "grad_norm": 2.088499732839049, "learning_rate": 7.972451279669777e-06, "loss": 0.7586, "step": 3921 }, { "epoch": 0.32, "grad_norm": 3.1876230287709304, "learning_rate": 7.97139349658237e-06, "loss": 0.6663, "step": 3922 }, { "epoch": 0.32, "grad_norm": 5.966207897723611, "learning_rate": 7.970335507855822e-06, "loss": 0.7048, "step": 3923 }, { "epoch": 0.32, "grad_norm": 3.1849210585315615, "learning_rate": 7.969277313563354e-06, "loss": 0.7949, "step": 3924 }, { "epoch": 0.32, "grad_norm": 3.8554392915448648, "learning_rate": 7.9682189137782e-06, "loss": 0.7147, "step": 3925 }, { "epoch": 0.32, "grad_norm": 4.814488122979218, "learning_rate": 7.967160308573607e-06, "loss": 0.7245, "step": 3926 }, { "epoch": 0.32, "grad_norm": 3.2795228392151903, "learning_rate": 7.96610149802284e-06, "loss": 0.6609, "step": 3927 }, { "epoch": 0.32, "grad_norm": 4.08718181667886, "learning_rate": 7.965042482199173e-06, "loss": 0.9151, "step": 3928 }, { "epoch": 0.32, "grad_norm": 5.183521625048285, "learning_rate": 7.963983261175894e-06, "loss": 0.641, "step": 3929 }, { "epoch": 0.32, "grad_norm": 3.4963650414362473, "learning_rate": 7.962923835026312e-06, "loss": 0.7218, "step": 3930 }, { "epoch": 0.32, "grad_norm": 61.29066160922814, "learning_rate": 7.961864203823746e-06, "loss": 0.7766, "step": 3931 }, { "epoch": 0.32, "grad_norm": 3.000856294908018, "learning_rate": 7.960804367641526e-06, "loss": 0.7444, "step": 3932 }, { "epoch": 0.32, "grad_norm": 5.2796317153813295, "learning_rate": 7.959744326553002e-06, "loss": 0.7061, "step": 3933 }, { "epoch": 0.32, "grad_norm": 2.472945957129339, "learning_rate": 7.958684080631533e-06, "loss": 0.6291, "step": 3934 }, { "epoch": 0.32, "grad_norm": 3.3855962610444417, "learning_rate": 7.957623629950498e-06, "loss": 0.6866, "step": 3935 }, { "epoch": 0.32, "grad_norm": 18.989269384398778, "learning_rate": 7.956562974583284e-06, "loss": 0.7107, "step": 3936 }, { "epoch": 0.32, "grad_norm": 3.000022537766427, "learning_rate": 7.955502114603296e-06, "loss": 0.7728, "step": 3937 }, { "epoch": 0.32, "grad_norm": 27.389408777657138, "learning_rate": 7.954441050083954e-06, "loss": 0.7416, "step": 3938 }, { "epoch": 0.32, "grad_norm": 4.145808818591805, "learning_rate": 7.953379781098686e-06, "loss": 0.6525, "step": 3939 }, { "epoch": 0.32, "grad_norm": 3.5067754599912964, "learning_rate": 7.952318307720943e-06, "loss": 0.6996, "step": 3940 }, { "epoch": 0.32, "grad_norm": 3.0612911379596013, "learning_rate": 7.951256630024184e-06, "loss": 0.5761, "step": 3941 }, { "epoch": 0.32, "grad_norm": 4.248779484097216, "learning_rate": 7.950194748081882e-06, "loss": 0.8271, "step": 3942 }, { "epoch": 0.32, "grad_norm": 3.7779819140901467, "learning_rate": 7.94913266196753e-06, "loss": 0.8272, "step": 3943 }, { "epoch": 0.32, "grad_norm": 7.0151021506094455, "learning_rate": 7.948070371754626e-06, "loss": 0.7065, "step": 3944 }, { "epoch": 0.32, "grad_norm": 4.2031065845617865, "learning_rate": 7.94700787751669e-06, "loss": 0.6629, "step": 3945 }, { "epoch": 0.32, "grad_norm": 4.450483734856019, "learning_rate": 7.945945179327252e-06, "loss": 0.9009, "step": 3946 }, { "epoch": 0.32, "grad_norm": 6.079892970279501, "learning_rate": 7.94488227725986e-06, "loss": 0.639, "step": 3947 }, { "epoch": 0.32, "grad_norm": 3.911611484615307, "learning_rate": 7.943819171388073e-06, "loss": 0.7575, "step": 3948 }, { "epoch": 0.32, "grad_norm": 3.0008004807524165, "learning_rate": 7.942755861785462e-06, "loss": 0.7012, "step": 3949 }, { "epoch": 0.32, "grad_norm": 9.559793246786608, "learning_rate": 7.941692348525616e-06, "loss": 0.6029, "step": 3950 }, { "epoch": 0.32, "grad_norm": 2.9265953303421948, "learning_rate": 7.940628631682139e-06, "loss": 0.8689, "step": 3951 }, { "epoch": 0.32, "grad_norm": 2.77688089050199, "learning_rate": 7.939564711328643e-06, "loss": 0.6822, "step": 3952 }, { "epoch": 0.32, "grad_norm": 2.6517052552594227, "learning_rate": 7.93850058753876e-06, "loss": 0.7966, "step": 3953 }, { "epoch": 0.32, "grad_norm": 10.706768629888632, "learning_rate": 7.937436260386134e-06, "loss": 0.7603, "step": 3954 }, { "epoch": 0.32, "grad_norm": 2.8395172328084257, "learning_rate": 7.936371729944423e-06, "loss": 0.7506, "step": 3955 }, { "epoch": 0.32, "grad_norm": 4.296087403075084, "learning_rate": 7.935306996287301e-06, "loss": 0.6108, "step": 3956 }, { "epoch": 0.32, "grad_norm": 3.2670653461580605, "learning_rate": 7.934242059488453e-06, "loss": 0.7584, "step": 3957 }, { "epoch": 0.32, "grad_norm": 2.7338787844543275, "learning_rate": 7.933176919621577e-06, "loss": 0.6956, "step": 3958 }, { "epoch": 0.32, "grad_norm": 3.6406179770695606, "learning_rate": 7.932111576760389e-06, "loss": 0.7553, "step": 3959 }, { "epoch": 0.32, "grad_norm": 5.201492720465625, "learning_rate": 7.931046030978619e-06, "loss": 0.8215, "step": 3960 }, { "epoch": 0.32, "grad_norm": 4.192323551263408, "learning_rate": 7.929980282350009e-06, "loss": 0.6738, "step": 3961 }, { "epoch": 0.32, "grad_norm": 2.99813192274881, "learning_rate": 7.928914330948312e-06, "loss": 0.7431, "step": 3962 }, { "epoch": 0.32, "grad_norm": 4.979824801122564, "learning_rate": 7.927848176847303e-06, "loss": 0.8108, "step": 3963 }, { "epoch": 0.32, "grad_norm": 5.1769395705307355, "learning_rate": 7.926781820120765e-06, "loss": 0.6437, "step": 3964 }, { "epoch": 0.32, "grad_norm": 4.9488841733506925, "learning_rate": 7.925715260842497e-06, "loss": 0.8524, "step": 3965 }, { "epoch": 0.32, "grad_norm": 4.815261070731859, "learning_rate": 7.92464849908631e-06, "loss": 0.894, "step": 3966 }, { "epoch": 0.32, "grad_norm": 6.18374869735505, "learning_rate": 7.923581534926034e-06, "loss": 0.7378, "step": 3967 }, { "epoch": 0.32, "grad_norm": 3.4300636590844404, "learning_rate": 7.922514368435506e-06, "loss": 0.7354, "step": 3968 }, { "epoch": 0.32, "grad_norm": 2.3166955433573033, "learning_rate": 7.92144699968858e-06, "loss": 0.6804, "step": 3969 }, { "epoch": 0.32, "grad_norm": 3.551603313376008, "learning_rate": 7.920379428759129e-06, "loss": 0.8918, "step": 3970 }, { "epoch": 0.32, "grad_norm": 3.6522260731083804, "learning_rate": 7.919311655721034e-06, "loss": 0.7785, "step": 3971 }, { "epoch": 0.32, "grad_norm": 22.84440412445875, "learning_rate": 7.91824368064819e-06, "loss": 0.7526, "step": 3972 }, { "epoch": 0.32, "grad_norm": 2.7481284744959, "learning_rate": 7.917175503614507e-06, "loss": 0.7263, "step": 3973 }, { "epoch": 0.32, "grad_norm": 2.7468459620503265, "learning_rate": 7.916107124693912e-06, "loss": 0.8064, "step": 3974 }, { "epoch": 0.32, "grad_norm": 3.029343681462667, "learning_rate": 7.915038543960342e-06, "loss": 0.6701, "step": 3975 }, { "epoch": 0.32, "grad_norm": 5.271869274714605, "learning_rate": 7.913969761487752e-06, "loss": 0.583, "step": 3976 }, { "epoch": 0.32, "grad_norm": 4.280948884365676, "learning_rate": 7.912900777350106e-06, "loss": 0.7116, "step": 3977 }, { "epoch": 0.32, "grad_norm": 2.831829235665687, "learning_rate": 7.911831591621384e-06, "loss": 0.6972, "step": 3978 }, { "epoch": 0.32, "grad_norm": 24.045967917161263, "learning_rate": 7.910762204375584e-06, "loss": 0.886, "step": 3979 }, { "epoch": 0.32, "grad_norm": 4.149930835612155, "learning_rate": 7.909692615686709e-06, "loss": 0.6526, "step": 3980 }, { "epoch": 0.32, "grad_norm": 5.169162787830478, "learning_rate": 7.908622825628787e-06, "loss": 0.6912, "step": 3981 }, { "epoch": 0.32, "grad_norm": 3.3947539979383414, "learning_rate": 7.907552834275847e-06, "loss": 0.7366, "step": 3982 }, { "epoch": 0.32, "grad_norm": 4.168128472472476, "learning_rate": 7.906482641701948e-06, "loss": 0.7986, "step": 3983 }, { "epoch": 0.32, "grad_norm": 2.951075973434164, "learning_rate": 7.905412247981145e-06, "loss": 0.8068, "step": 3984 }, { "epoch": 0.32, "grad_norm": 2.1030447991337367, "learning_rate": 7.904341653187525e-06, "loss": 0.6505, "step": 3985 }, { "epoch": 0.32, "grad_norm": 7.600191197001962, "learning_rate": 7.903270857395171e-06, "loss": 0.7605, "step": 3986 }, { "epoch": 0.32, "grad_norm": 7.881893183982748, "learning_rate": 7.902199860678197e-06, "loss": 0.5648, "step": 3987 }, { "epoch": 0.32, "grad_norm": 2.992401383156658, "learning_rate": 7.901128663110716e-06, "loss": 0.7371, "step": 3988 }, { "epoch": 0.32, "grad_norm": 4.032999457858187, "learning_rate": 7.900057264766865e-06, "loss": 0.7199, "step": 3989 }, { "epoch": 0.32, "grad_norm": 2.431758377946861, "learning_rate": 7.898985665720792e-06, "loss": 0.6945, "step": 3990 }, { "epoch": 0.32, "grad_norm": 3.965754674928643, "learning_rate": 7.897913866046658e-06, "loss": 0.7991, "step": 3991 }, { "epoch": 0.32, "grad_norm": 7.070921047310893, "learning_rate": 7.896841865818636e-06, "loss": 0.7076, "step": 3992 }, { "epoch": 0.32, "grad_norm": 4.710028351588609, "learning_rate": 7.895769665110918e-06, "loss": 0.7267, "step": 3993 }, { "epoch": 0.32, "grad_norm": 3.141045921812545, "learning_rate": 7.894697263997706e-06, "loss": 0.8131, "step": 3994 }, { "epoch": 0.32, "grad_norm": 4.2493039545761295, "learning_rate": 7.893624662553216e-06, "loss": 0.5688, "step": 3995 }, { "epoch": 0.32, "grad_norm": 3.6611038009331507, "learning_rate": 7.892551860851679e-06, "loss": 0.7407, "step": 3996 }, { "epoch": 0.32, "grad_norm": 11.319736058964265, "learning_rate": 7.891478858967342e-06, "loss": 0.7448, "step": 3997 }, { "epoch": 0.32, "grad_norm": 3.095309253359473, "learning_rate": 7.89040565697446e-06, "loss": 0.6961, "step": 3998 }, { "epoch": 0.32, "grad_norm": 3.7157411447831166, "learning_rate": 7.889332254947308e-06, "loss": 0.8233, "step": 3999 }, { "epoch": 0.32, "grad_norm": 2.737224342747105, "learning_rate": 7.888258652960171e-06, "loss": 0.6638, "step": 4000 }, { "epoch": 0.32, "grad_norm": 3.0678091740404394, "learning_rate": 7.88718485108735e-06, "loss": 0.578, "step": 4001 }, { "epoch": 0.33, "grad_norm": 2.924838946815285, "learning_rate": 7.886110849403157e-06, "loss": 0.7711, "step": 4002 }, { "epoch": 0.33, "grad_norm": 5.937967326413743, "learning_rate": 7.88503664798192e-06, "loss": 0.7255, "step": 4003 }, { "epoch": 0.33, "grad_norm": 10.954740968906862, "learning_rate": 7.883962246897982e-06, "loss": 0.6236, "step": 4004 }, { "epoch": 0.33, "grad_norm": 5.981709783040065, "learning_rate": 7.8828876462257e-06, "loss": 0.6681, "step": 4005 }, { "epoch": 0.33, "grad_norm": 5.029120277526408, "learning_rate": 7.881812846039438e-06, "loss": 0.707, "step": 4006 }, { "epoch": 0.33, "grad_norm": 10.757851442583057, "learning_rate": 7.880737846413582e-06, "loss": 0.6998, "step": 4007 }, { "epoch": 0.33, "grad_norm": 4.862184012969428, "learning_rate": 7.87966264742253e-06, "loss": 0.6842, "step": 4008 }, { "epoch": 0.33, "grad_norm": 5.892476043664106, "learning_rate": 7.878587249140688e-06, "loss": 0.8522, "step": 4009 }, { "epoch": 0.33, "grad_norm": 3.560054172882769, "learning_rate": 7.877511651642486e-06, "loss": 0.753, "step": 4010 }, { "epoch": 0.33, "grad_norm": 3.7985266866211225, "learning_rate": 7.876435855002357e-06, "loss": 0.6187, "step": 4011 }, { "epoch": 0.33, "grad_norm": 2.459276693031832, "learning_rate": 7.875359859294758e-06, "loss": 0.4691, "step": 4012 }, { "epoch": 0.33, "grad_norm": 2.982910160543017, "learning_rate": 7.87428366459415e-06, "loss": 0.7514, "step": 4013 }, { "epoch": 0.33, "grad_norm": 3.567711268346048, "learning_rate": 7.873207270975017e-06, "loss": 0.6869, "step": 4014 }, { "epoch": 0.33, "grad_norm": 3.6696161888412417, "learning_rate": 7.872130678511847e-06, "loss": 0.7617, "step": 4015 }, { "epoch": 0.33, "grad_norm": 3.1382651657499987, "learning_rate": 7.87105388727915e-06, "loss": 0.6328, "step": 4016 }, { "epoch": 0.33, "grad_norm": 4.731992711249098, "learning_rate": 7.869976897351446e-06, "loss": 0.8129, "step": 4017 }, { "epoch": 0.33, "grad_norm": 2.9595947287652535, "learning_rate": 7.86889970880327e-06, "loss": 0.6981, "step": 4018 }, { "epoch": 0.33, "grad_norm": 2.3505552225578508, "learning_rate": 7.867822321709171e-06, "loss": 0.7741, "step": 4019 }, { "epoch": 0.33, "grad_norm": 2.8971529726414476, "learning_rate": 7.86674473614371e-06, "loss": 0.7891, "step": 4020 }, { "epoch": 0.33, "grad_norm": 4.169734958740916, "learning_rate": 7.865666952181463e-06, "loss": 0.757, "step": 4021 }, { "epoch": 0.33, "grad_norm": 3.773414590397785, "learning_rate": 7.864588969897017e-06, "loss": 0.7726, "step": 4022 }, { "epoch": 0.33, "grad_norm": 5.436593425986144, "learning_rate": 7.863510789364978e-06, "loss": 0.6835, "step": 4023 }, { "epoch": 0.33, "grad_norm": 4.243976058788896, "learning_rate": 7.862432410659964e-06, "loss": 0.8677, "step": 4024 }, { "epoch": 0.33, "grad_norm": 5.771829521723743, "learning_rate": 7.861353833856605e-06, "loss": 0.6527, "step": 4025 }, { "epoch": 0.33, "grad_norm": 2.5059523116447466, "learning_rate": 7.860275059029541e-06, "loss": 0.6992, "step": 4026 }, { "epoch": 0.33, "grad_norm": 3.568862388621924, "learning_rate": 7.859196086253434e-06, "loss": 0.6531, "step": 4027 }, { "epoch": 0.33, "grad_norm": 3.6640053235759846, "learning_rate": 7.858116915602955e-06, "loss": 0.7008, "step": 4028 }, { "epoch": 0.33, "grad_norm": 3.8830768438375487, "learning_rate": 7.85703754715279e-06, "loss": 0.7535, "step": 4029 }, { "epoch": 0.33, "grad_norm": 3.7048391993201566, "learning_rate": 7.855957980977636e-06, "loss": 0.9496, "step": 4030 }, { "epoch": 0.33, "grad_norm": 2.3405273078166084, "learning_rate": 7.854878217152208e-06, "loss": 0.626, "step": 4031 }, { "epoch": 0.33, "grad_norm": 4.220458615251663, "learning_rate": 7.853798255751231e-06, "loss": 0.6996, "step": 4032 }, { "epoch": 0.33, "grad_norm": 4.243201430839897, "learning_rate": 7.852718096849445e-06, "loss": 0.7394, "step": 4033 }, { "epoch": 0.33, "grad_norm": 3.5176451314335, "learning_rate": 7.851637740521608e-06, "loss": 0.7925, "step": 4034 }, { "epoch": 0.33, "grad_norm": 3.7871254192207204, "learning_rate": 7.85055718684248e-06, "loss": 0.6538, "step": 4035 }, { "epoch": 0.33, "grad_norm": 2.8257614422145587, "learning_rate": 7.849476435886847e-06, "loss": 0.6096, "step": 4036 }, { "epoch": 0.33, "grad_norm": 3.308609454555126, "learning_rate": 7.848395487729505e-06, "loss": 0.7691, "step": 4037 }, { "epoch": 0.33, "grad_norm": 2.6383729710203117, "learning_rate": 7.847314342445258e-06, "loss": 0.7863, "step": 4038 }, { "epoch": 0.33, "grad_norm": 4.635028452379696, "learning_rate": 7.84623300010893e-06, "loss": 0.8036, "step": 4039 }, { "epoch": 0.33, "grad_norm": 5.427719952737282, "learning_rate": 7.84515146079536e-06, "loss": 0.7029, "step": 4040 }, { "epoch": 0.33, "grad_norm": 4.2372017528608135, "learning_rate": 7.844069724579392e-06, "loss": 0.8418, "step": 4041 }, { "epoch": 0.33, "grad_norm": 2.6153259822179673, "learning_rate": 7.842987791535891e-06, "loss": 0.6058, "step": 4042 }, { "epoch": 0.33, "grad_norm": 3.785325191874001, "learning_rate": 7.841905661739735e-06, "loss": 0.698, "step": 4043 }, { "epoch": 0.33, "grad_norm": 3.33553283571518, "learning_rate": 7.840823335265813e-06, "loss": 0.6399, "step": 4044 }, { "epoch": 0.33, "grad_norm": 9.76389378766501, "learning_rate": 7.839740812189027e-06, "loss": 0.7678, "step": 4045 }, { "epoch": 0.33, "grad_norm": 3.427439623166322, "learning_rate": 7.8386580925843e-06, "loss": 0.4043, "step": 4046 }, { "epoch": 0.33, "grad_norm": 3.8407561538457835, "learning_rate": 7.837575176526556e-06, "loss": 0.6855, "step": 4047 }, { "epoch": 0.33, "grad_norm": 3.1358971796546555, "learning_rate": 7.836492064090745e-06, "loss": 0.6993, "step": 4048 }, { "epoch": 0.33, "grad_norm": 6.570332117553186, "learning_rate": 7.83540875535182e-06, "loss": 0.7468, "step": 4049 }, { "epoch": 0.33, "grad_norm": 3.948865745022593, "learning_rate": 7.83432525038476e-06, "loss": 0.6822, "step": 4050 }, { "epoch": 0.33, "grad_norm": 4.7488918413680965, "learning_rate": 7.833241549264544e-06, "loss": 0.6391, "step": 4051 }, { "epoch": 0.33, "grad_norm": 3.0880446265928527, "learning_rate": 7.832157652066173e-06, "loss": 0.666, "step": 4052 }, { "epoch": 0.33, "grad_norm": 5.217687001816275, "learning_rate": 7.831073558864661e-06, "loss": 0.7284, "step": 4053 }, { "epoch": 0.33, "grad_norm": 3.049392989535443, "learning_rate": 7.829989269735033e-06, "loss": 0.6204, "step": 4054 }, { "epoch": 0.33, "grad_norm": 3.984918816353251, "learning_rate": 7.828904784752327e-06, "loss": 0.7827, "step": 4055 }, { "epoch": 0.33, "grad_norm": 3.728661484017217, "learning_rate": 7.8278201039916e-06, "loss": 0.8908, "step": 4056 }, { "epoch": 0.33, "grad_norm": 3.0424028266399774, "learning_rate": 7.826735227527913e-06, "loss": 0.7756, "step": 4057 }, { "epoch": 0.33, "grad_norm": 4.028174685880376, "learning_rate": 7.825650155436352e-06, "loss": 0.7433, "step": 4058 }, { "epoch": 0.33, "grad_norm": 10.278119416074434, "learning_rate": 7.824564887792008e-06, "loss": 0.6336, "step": 4059 }, { "epoch": 0.33, "grad_norm": 2.739215044632915, "learning_rate": 7.823479424669988e-06, "loss": 0.6444, "step": 4060 }, { "epoch": 0.33, "grad_norm": 5.526006374892855, "learning_rate": 7.822393766145415e-06, "loss": 0.6217, "step": 4061 }, { "epoch": 0.33, "grad_norm": 2.8194295174276993, "learning_rate": 7.82130791229342e-06, "loss": 0.7388, "step": 4062 }, { "epoch": 0.33, "grad_norm": 3.0434223990426084, "learning_rate": 7.820221863189156e-06, "loss": 0.6793, "step": 4063 }, { "epoch": 0.33, "grad_norm": 3.268143701971518, "learning_rate": 7.819135618907781e-06, "loss": 0.6439, "step": 4064 }, { "epoch": 0.33, "grad_norm": 3.3650955995875766, "learning_rate": 7.81804917952447e-06, "loss": 0.7982, "step": 4065 }, { "epoch": 0.33, "grad_norm": 3.46927797233056, "learning_rate": 7.81696254511441e-06, "loss": 0.5934, "step": 4066 }, { "epoch": 0.33, "grad_norm": 4.054674725980978, "learning_rate": 7.815875715752806e-06, "loss": 0.7939, "step": 4067 }, { "epoch": 0.33, "grad_norm": 3.4090407989165774, "learning_rate": 7.814788691514871e-06, "loss": 0.6676, "step": 4068 }, { "epoch": 0.33, "grad_norm": 3.6681233465909524, "learning_rate": 7.813701472475839e-06, "loss": 0.7219, "step": 4069 }, { "epoch": 0.33, "grad_norm": 3.730348746381778, "learning_rate": 7.812614058710946e-06, "loss": 0.5985, "step": 4070 }, { "epoch": 0.33, "grad_norm": 2.5878949648442857, "learning_rate": 7.81152645029545e-06, "loss": 0.7509, "step": 4071 }, { "epoch": 0.33, "grad_norm": 2.492655346569026, "learning_rate": 7.810438647304621e-06, "loss": 0.7055, "step": 4072 }, { "epoch": 0.33, "grad_norm": 3.061167024935287, "learning_rate": 7.809350649813743e-06, "loss": 0.6532, "step": 4073 }, { "epoch": 0.33, "grad_norm": 16.333585594803058, "learning_rate": 7.80826245789811e-06, "loss": 0.5317, "step": 4074 }, { "epoch": 0.33, "grad_norm": 3.9319868898793273, "learning_rate": 7.807174071633032e-06, "loss": 0.6462, "step": 4075 }, { "epoch": 0.33, "grad_norm": 10.158383994755884, "learning_rate": 7.806085491093833e-06, "loss": 0.4214, "step": 4076 }, { "epoch": 0.33, "grad_norm": 2.5492804290846367, "learning_rate": 7.80499671635585e-06, "loss": 0.6198, "step": 4077 }, { "epoch": 0.33, "grad_norm": 3.8651720526761166, "learning_rate": 7.803907747494432e-06, "loss": 0.7509, "step": 4078 }, { "epoch": 0.33, "grad_norm": 8.304522655130349, "learning_rate": 7.802818584584944e-06, "loss": 0.6101, "step": 4079 }, { "epoch": 0.33, "grad_norm": 3.800144844913076, "learning_rate": 7.80172922770276e-06, "loss": 0.7286, "step": 4080 }, { "epoch": 0.33, "grad_norm": 4.12455386553149, "learning_rate": 7.800639676923276e-06, "loss": 0.8352, "step": 4081 }, { "epoch": 0.33, "grad_norm": 8.769564802279781, "learning_rate": 7.799549932321889e-06, "loss": 0.6337, "step": 4082 }, { "epoch": 0.33, "grad_norm": 4.000470968758646, "learning_rate": 7.798459993974022e-06, "loss": 0.6705, "step": 4083 }, { "epoch": 0.33, "grad_norm": 4.773622868797054, "learning_rate": 7.797369861955099e-06, "loss": 0.7885, "step": 4084 }, { "epoch": 0.33, "grad_norm": 3.278459471282244, "learning_rate": 7.79627953634057e-06, "loss": 0.7291, "step": 4085 }, { "epoch": 0.33, "grad_norm": 4.1034281831182655, "learning_rate": 7.795189017205888e-06, "loss": 0.6794, "step": 4086 }, { "epoch": 0.33, "grad_norm": 3.666641948357952, "learning_rate": 7.79409830462653e-06, "loss": 0.5839, "step": 4087 }, { "epoch": 0.33, "grad_norm": 3.614734455374862, "learning_rate": 7.793007398677973e-06, "loss": 0.7052, "step": 4088 }, { "epoch": 0.33, "grad_norm": 5.507318164068913, "learning_rate": 7.79191629943572e-06, "loss": 0.6686, "step": 4089 }, { "epoch": 0.33, "grad_norm": 10.676401270174203, "learning_rate": 7.790825006975279e-06, "loss": 0.6491, "step": 4090 }, { "epoch": 0.33, "grad_norm": 3.6209708565111534, "learning_rate": 7.789733521372174e-06, "loss": 0.6564, "step": 4091 }, { "epoch": 0.33, "grad_norm": 20.198825515507583, "learning_rate": 7.788641842701945e-06, "loss": 0.6134, "step": 4092 }, { "epoch": 0.33, "grad_norm": 4.872787851031557, "learning_rate": 7.78754997104014e-06, "loss": 0.6679, "step": 4093 }, { "epoch": 0.33, "grad_norm": 2.879068451747883, "learning_rate": 7.786457906462329e-06, "loss": 0.7314, "step": 4094 }, { "epoch": 0.33, "grad_norm": 12.101396645687867, "learning_rate": 7.78536564904408e-06, "loss": 0.756, "step": 4095 }, { "epoch": 0.33, "grad_norm": 4.115924686329829, "learning_rate": 7.784273198860995e-06, "loss": 0.768, "step": 4096 }, { "epoch": 0.33, "grad_norm": 3.0318172566732815, "learning_rate": 7.783180555988671e-06, "loss": 0.5867, "step": 4097 }, { "epoch": 0.33, "grad_norm": 5.88818577725966, "learning_rate": 7.78208772050273e-06, "loss": 0.6385, "step": 4098 }, { "epoch": 0.33, "grad_norm": 9.447203908840548, "learning_rate": 7.780994692478798e-06, "loss": 0.743, "step": 4099 }, { "epoch": 0.33, "grad_norm": 30.369144915399453, "learning_rate": 7.779901471992526e-06, "loss": 0.7243, "step": 4100 }, { "epoch": 0.33, "grad_norm": 7.2012427548716555, "learning_rate": 7.778808059119567e-06, "loss": 0.6446, "step": 4101 }, { "epoch": 0.33, "grad_norm": 7.236087009464436, "learning_rate": 7.777714453935594e-06, "loss": 0.7151, "step": 4102 }, { "epoch": 0.33, "grad_norm": 8.360358110778868, "learning_rate": 7.77662065651629e-06, "loss": 0.7456, "step": 4103 }, { "epoch": 0.33, "grad_norm": 5.157020313618741, "learning_rate": 7.775526666937354e-06, "loss": 0.6859, "step": 4104 }, { "epoch": 0.33, "grad_norm": 3.9790936876645144, "learning_rate": 7.774432485274497e-06, "loss": 0.6719, "step": 4105 }, { "epoch": 0.33, "grad_norm": 4.29641542968105, "learning_rate": 7.773338111603441e-06, "loss": 0.7753, "step": 4106 }, { "epoch": 0.33, "grad_norm": 3.109518616171552, "learning_rate": 7.772243545999927e-06, "loss": 0.6328, "step": 4107 }, { "epoch": 0.33, "grad_norm": 3.905990823969171, "learning_rate": 7.771148788539704e-06, "loss": 0.8857, "step": 4108 }, { "epoch": 0.33, "grad_norm": 3.1677308978861753, "learning_rate": 7.770053839298535e-06, "loss": 0.711, "step": 4109 }, { "epoch": 0.33, "grad_norm": 3.1901076270752906, "learning_rate": 7.7689586983522e-06, "loss": 0.7131, "step": 4110 }, { "epoch": 0.33, "grad_norm": 4.786363269034304, "learning_rate": 7.767863365776488e-06, "loss": 0.8328, "step": 4111 }, { "epoch": 0.33, "grad_norm": 4.623157867135536, "learning_rate": 7.766767841647203e-06, "loss": 0.6248, "step": 4112 }, { "epoch": 0.33, "grad_norm": 2.405042997179284, "learning_rate": 7.765672126040162e-06, "loss": 0.7472, "step": 4113 }, { "epoch": 0.33, "grad_norm": 4.47305225801398, "learning_rate": 7.764576219031197e-06, "loss": 0.9045, "step": 4114 }, { "epoch": 0.33, "grad_norm": 4.587956657843888, "learning_rate": 7.763480120696149e-06, "loss": 0.7675, "step": 4115 }, { "epoch": 0.33, "grad_norm": 4.896169536520506, "learning_rate": 7.762383831110878e-06, "loss": 0.7246, "step": 4116 }, { "epoch": 0.33, "grad_norm": 4.8970607335428245, "learning_rate": 7.761287350351249e-06, "loss": 0.6263, "step": 4117 }, { "epoch": 0.33, "grad_norm": 3.2483340860096805, "learning_rate": 7.760190678493152e-06, "loss": 0.6393, "step": 4118 }, { "epoch": 0.33, "grad_norm": 7.6971954311891055, "learning_rate": 7.75909381561248e-06, "loss": 0.6648, "step": 4119 }, { "epoch": 0.33, "grad_norm": 24.353094749108823, "learning_rate": 7.757996761785142e-06, "loss": 0.7571, "step": 4120 }, { "epoch": 0.33, "grad_norm": 3.9187088505766723, "learning_rate": 7.756899517087064e-06, "loss": 0.7357, "step": 4121 }, { "epoch": 0.33, "grad_norm": 6.939205931395191, "learning_rate": 7.755802081594179e-06, "loss": 0.7559, "step": 4122 }, { "epoch": 0.33, "grad_norm": 10.891093033491927, "learning_rate": 7.75470445538244e-06, "loss": 0.8352, "step": 4123 }, { "epoch": 0.33, "grad_norm": 5.5681219779568485, "learning_rate": 7.753606638527806e-06, "loss": 0.6222, "step": 4124 }, { "epoch": 0.34, "grad_norm": 2.9505204365213897, "learning_rate": 7.752508631106254e-06, "loss": 0.6965, "step": 4125 }, { "epoch": 0.34, "grad_norm": 9.466021376256897, "learning_rate": 7.751410433193775e-06, "loss": 0.7038, "step": 4126 }, { "epoch": 0.34, "grad_norm": 3.508643205227156, "learning_rate": 7.75031204486637e-06, "loss": 0.6965, "step": 4127 }, { "epoch": 0.34, "grad_norm": 3.761493636141535, "learning_rate": 7.749213466200052e-06, "loss": 0.768, "step": 4128 }, { "epoch": 0.34, "grad_norm": 9.026908527490088, "learning_rate": 7.748114697270854e-06, "loss": 0.7602, "step": 4129 }, { "epoch": 0.34, "grad_norm": 4.2627735058668605, "learning_rate": 7.747015738154814e-06, "loss": 0.6751, "step": 4130 }, { "epoch": 0.34, "grad_norm": 4.439934069674513, "learning_rate": 7.745916588927988e-06, "loss": 0.6475, "step": 4131 }, { "epoch": 0.34, "grad_norm": 5.2383227787604705, "learning_rate": 7.744817249666445e-06, "loss": 0.7058, "step": 4132 }, { "epoch": 0.34, "grad_norm": 3.4372320405668715, "learning_rate": 7.743717720446265e-06, "loss": 0.7557, "step": 4133 }, { "epoch": 0.34, "grad_norm": 4.438490245659513, "learning_rate": 7.742618001343544e-06, "loss": 0.8321, "step": 4134 }, { "epoch": 0.34, "grad_norm": 3.546815682773286, "learning_rate": 7.741518092434388e-06, "loss": 0.8111, "step": 4135 }, { "epoch": 0.34, "grad_norm": 5.684351469153283, "learning_rate": 7.740417993794918e-06, "loss": 0.8111, "step": 4136 }, { "epoch": 0.34, "grad_norm": 6.074752771301794, "learning_rate": 7.739317705501266e-06, "loss": 0.6977, "step": 4137 }, { "epoch": 0.34, "grad_norm": 3.0155319121674027, "learning_rate": 7.738217227629582e-06, "loss": 0.5559, "step": 4138 }, { "epoch": 0.34, "grad_norm": 3.6280985815363076, "learning_rate": 7.737116560256024e-06, "loss": 0.7512, "step": 4139 }, { "epoch": 0.34, "grad_norm": 2.998859134416602, "learning_rate": 7.736015703456768e-06, "loss": 0.6122, "step": 4140 }, { "epoch": 0.34, "grad_norm": 16.37306700851657, "learning_rate": 7.734914657307995e-06, "loss": 0.7383, "step": 4141 }, { "epoch": 0.34, "grad_norm": 3.605277379139862, "learning_rate": 7.733813421885907e-06, "loss": 0.737, "step": 4142 }, { "epoch": 0.34, "grad_norm": 18.75982192873511, "learning_rate": 7.73271199726672e-06, "loss": 0.6649, "step": 4143 }, { "epoch": 0.34, "grad_norm": 6.8783168396698775, "learning_rate": 7.731610383526654e-06, "loss": 0.765, "step": 4144 }, { "epoch": 0.34, "grad_norm": 3.5431948171922296, "learning_rate": 7.73050858074195e-06, "loss": 0.7947, "step": 4145 }, { "epoch": 0.34, "grad_norm": 6.167641000453161, "learning_rate": 7.72940658898886e-06, "loss": 0.7782, "step": 4146 }, { "epoch": 0.34, "grad_norm": 3.9945438146533467, "learning_rate": 7.728304408343648e-06, "loss": 0.7962, "step": 4147 }, { "epoch": 0.34, "grad_norm": 3.0812113275096564, "learning_rate": 7.72720203888259e-06, "loss": 0.7787, "step": 4148 }, { "epoch": 0.34, "grad_norm": 3.4178309141023178, "learning_rate": 7.726099480681983e-06, "loss": 0.7006, "step": 4149 }, { "epoch": 0.34, "grad_norm": 3.6869408073749588, "learning_rate": 7.724996733818124e-06, "loss": 0.6133, "step": 4150 }, { "epoch": 0.34, "grad_norm": 6.019079725212533, "learning_rate": 7.723893798367335e-06, "loss": 0.8643, "step": 4151 }, { "epoch": 0.34, "grad_norm": 3.2551044974971997, "learning_rate": 7.722790674405943e-06, "loss": 0.6639, "step": 4152 }, { "epoch": 0.34, "grad_norm": 2.903062872053778, "learning_rate": 7.721687362010293e-06, "loss": 0.8125, "step": 4153 }, { "epoch": 0.34, "grad_norm": 3.8159382740878987, "learning_rate": 7.72058386125674e-06, "loss": 0.8421, "step": 4154 }, { "epoch": 0.34, "grad_norm": 3.5294059463180854, "learning_rate": 7.719480172221652e-06, "loss": 0.7814, "step": 4155 }, { "epoch": 0.34, "grad_norm": 2.3711255996802287, "learning_rate": 7.718376294981416e-06, "loss": 0.7501, "step": 4156 }, { "epoch": 0.34, "grad_norm": 3.4514582764212527, "learning_rate": 7.71727222961242e-06, "loss": 0.7437, "step": 4157 }, { "epoch": 0.34, "grad_norm": 3.7004749294885055, "learning_rate": 7.71616797619108e-06, "loss": 0.8478, "step": 4158 }, { "epoch": 0.34, "grad_norm": 8.468487658189542, "learning_rate": 7.715063534793811e-06, "loss": 0.6774, "step": 4159 }, { "epoch": 0.34, "grad_norm": 2.9881669309742995, "learning_rate": 7.713958905497051e-06, "loss": 0.7948, "step": 4160 }, { "epoch": 0.34, "grad_norm": 20.85695866115867, "learning_rate": 7.712854088377247e-06, "loss": 0.7139, "step": 4161 }, { "epoch": 0.34, "grad_norm": 5.480801745893337, "learning_rate": 7.711749083510859e-06, "loss": 0.8241, "step": 4162 }, { "epoch": 0.34, "grad_norm": 47.52951888594341, "learning_rate": 7.710643890974358e-06, "loss": 0.8022, "step": 4163 }, { "epoch": 0.34, "grad_norm": 5.4239647853745145, "learning_rate": 7.709538510844234e-06, "loss": 0.8134, "step": 4164 }, { "epoch": 0.34, "grad_norm": 2.811308789895674, "learning_rate": 7.708432943196982e-06, "loss": 0.5743, "step": 4165 }, { "epoch": 0.34, "grad_norm": 7.624334506596432, "learning_rate": 7.70732718810912e-06, "loss": 0.6332, "step": 4166 }, { "epoch": 0.34, "grad_norm": 3.442416656138392, "learning_rate": 7.706221245657168e-06, "loss": 0.7508, "step": 4167 }, { "epoch": 0.34, "grad_norm": 2.927889032363458, "learning_rate": 7.705115115917665e-06, "loss": 0.7404, "step": 4168 }, { "epoch": 0.34, "grad_norm": 3.3005994166846095, "learning_rate": 7.704008798967164e-06, "loss": 0.6508, "step": 4169 }, { "epoch": 0.34, "grad_norm": 5.356235752596568, "learning_rate": 7.70290229488223e-06, "loss": 0.6125, "step": 4170 }, { "epoch": 0.34, "grad_norm": 3.5490115380220204, "learning_rate": 7.701795603739434e-06, "loss": 0.5048, "step": 4171 }, { "epoch": 0.34, "grad_norm": 3.4100993042505685, "learning_rate": 7.700688725615373e-06, "loss": 0.7198, "step": 4172 }, { "epoch": 0.34, "grad_norm": 2.8120108044876706, "learning_rate": 7.699581660586648e-06, "loss": 0.8058, "step": 4173 }, { "epoch": 0.34, "grad_norm": 4.399572028246543, "learning_rate": 7.698474408729872e-06, "loss": 0.6973, "step": 4174 }, { "epoch": 0.34, "grad_norm": 3.8053077621999054, "learning_rate": 7.697366970121678e-06, "loss": 0.8226, "step": 4175 }, { "epoch": 0.34, "grad_norm": 3.1218565339521036, "learning_rate": 7.696259344838706e-06, "loss": 0.7116, "step": 4176 }, { "epoch": 0.34, "grad_norm": 5.631258285929297, "learning_rate": 7.695151532957608e-06, "loss": 0.6867, "step": 4177 }, { "epoch": 0.34, "grad_norm": 3.8185477601773297, "learning_rate": 7.694043534555055e-06, "loss": 0.6987, "step": 4178 }, { "epoch": 0.34, "grad_norm": 3.9237029025735075, "learning_rate": 7.692935349707726e-06, "loss": 0.7236, "step": 4179 }, { "epoch": 0.34, "grad_norm": 15.740143678524328, "learning_rate": 7.691826978492316e-06, "loss": 0.7921, "step": 4180 }, { "epoch": 0.34, "grad_norm": 3.9487319245565122, "learning_rate": 7.690718420985527e-06, "loss": 0.5639, "step": 4181 }, { "epoch": 0.34, "grad_norm": 3.1572443227536846, "learning_rate": 7.689609677264083e-06, "loss": 0.7445, "step": 4182 }, { "epoch": 0.34, "grad_norm": 6.693988002017067, "learning_rate": 7.688500747404716e-06, "loss": 0.7799, "step": 4183 }, { "epoch": 0.34, "grad_norm": 3.5869468706418477, "learning_rate": 7.687391631484168e-06, "loss": 0.5931, "step": 4184 }, { "epoch": 0.34, "grad_norm": 10.782830887104295, "learning_rate": 7.686282329579195e-06, "loss": 0.7683, "step": 4185 }, { "epoch": 0.34, "grad_norm": 3.6198637291941487, "learning_rate": 7.685172841766573e-06, "loss": 0.7242, "step": 4186 }, { "epoch": 0.34, "grad_norm": 3.909840826051098, "learning_rate": 7.684063168123082e-06, "loss": 0.7236, "step": 4187 }, { "epoch": 0.34, "grad_norm": 4.5002556387158865, "learning_rate": 7.682953308725522e-06, "loss": 0.6931, "step": 4188 }, { "epoch": 0.34, "grad_norm": 33.390889858115614, "learning_rate": 7.681843263650698e-06, "loss": 0.7819, "step": 4189 }, { "epoch": 0.34, "grad_norm": 4.236231648796214, "learning_rate": 7.680733032975434e-06, "loss": 0.749, "step": 4190 }, { "epoch": 0.34, "grad_norm": 3.560344258759262, "learning_rate": 7.679622616776565e-06, "loss": 0.5957, "step": 4191 }, { "epoch": 0.34, "grad_norm": 10.36076355842283, "learning_rate": 7.678512015130936e-06, "loss": 0.7471, "step": 4192 }, { "epoch": 0.34, "grad_norm": 2.5496913962085896, "learning_rate": 7.677401228115414e-06, "loss": 0.6378, "step": 4193 }, { "epoch": 0.34, "grad_norm": 8.25757401515829, "learning_rate": 7.676290255806866e-06, "loss": 0.7459, "step": 4194 }, { "epoch": 0.34, "grad_norm": 6.002619111786537, "learning_rate": 7.675179098282183e-06, "loss": 0.5771, "step": 4195 }, { "epoch": 0.34, "grad_norm": 3.701241230377751, "learning_rate": 7.674067755618261e-06, "loss": 0.671, "step": 4196 }, { "epoch": 0.34, "grad_norm": 3.8880285706180135, "learning_rate": 7.672956227892014e-06, "loss": 0.7014, "step": 4197 }, { "epoch": 0.34, "grad_norm": 2.8466834906214045, "learning_rate": 7.671844515180365e-06, "loss": 0.7784, "step": 4198 }, { "epoch": 0.34, "grad_norm": 2.775989698557638, "learning_rate": 7.670732617560253e-06, "loss": 0.6754, "step": 4199 }, { "epoch": 0.34, "grad_norm": 3.5978981404266084, "learning_rate": 7.669620535108626e-06, "loss": 0.6528, "step": 4200 }, { "epoch": 0.34, "grad_norm": 4.220452673801354, "learning_rate": 7.66850826790245e-06, "loss": 0.7001, "step": 4201 }, { "epoch": 0.34, "grad_norm": 3.7171663050214656, "learning_rate": 7.667395816018699e-06, "loss": 0.7974, "step": 4202 }, { "epoch": 0.34, "grad_norm": 2.7313885002651697, "learning_rate": 7.666283179534362e-06, "loss": 0.7008, "step": 4203 }, { "epoch": 0.34, "grad_norm": 3.9428986527451193, "learning_rate": 7.665170358526441e-06, "loss": 0.7038, "step": 4204 }, { "epoch": 0.34, "grad_norm": 4.778854928045878, "learning_rate": 7.66405735307195e-06, "loss": 0.674, "step": 4205 }, { "epoch": 0.34, "grad_norm": 9.0063608463653, "learning_rate": 7.662944163247916e-06, "loss": 0.5842, "step": 4206 }, { "epoch": 0.34, "grad_norm": 2.785810860625398, "learning_rate": 7.661830789131378e-06, "loss": 0.7495, "step": 4207 }, { "epoch": 0.34, "grad_norm": 3.170122198926483, "learning_rate": 7.66071723079939e-06, "loss": 0.7608, "step": 4208 }, { "epoch": 0.34, "grad_norm": 4.6494954041382694, "learning_rate": 7.659603488329014e-06, "loss": 0.6163, "step": 4209 }, { "epoch": 0.34, "grad_norm": 2.499744152038324, "learning_rate": 7.658489561797333e-06, "loss": 0.7113, "step": 4210 }, { "epoch": 0.34, "grad_norm": 13.818948853593524, "learning_rate": 7.657375451281435e-06, "loss": 0.8048, "step": 4211 }, { "epoch": 0.34, "grad_norm": 3.379269193921864, "learning_rate": 7.656261156858423e-06, "loss": 0.6937, "step": 4212 }, { "epoch": 0.34, "grad_norm": 3.2596806987744276, "learning_rate": 7.655146678605414e-06, "loss": 0.7981, "step": 4213 }, { "epoch": 0.34, "grad_norm": 7.327873204303678, "learning_rate": 7.654032016599536e-06, "loss": 0.5877, "step": 4214 }, { "epoch": 0.34, "grad_norm": 3.329455137290018, "learning_rate": 7.65291717091793e-06, "loss": 0.6145, "step": 4215 }, { "epoch": 0.34, "grad_norm": 2.9881259905539186, "learning_rate": 7.651802141637753e-06, "loss": 0.7879, "step": 4216 }, { "epoch": 0.34, "grad_norm": 5.4802503217711775, "learning_rate": 7.650686928836172e-06, "loss": 0.8479, "step": 4217 }, { "epoch": 0.34, "grad_norm": 3.1318347680815566, "learning_rate": 7.649571532590363e-06, "loss": 0.6803, "step": 4218 }, { "epoch": 0.34, "grad_norm": 3.30604254142086, "learning_rate": 7.648455952977523e-06, "loss": 0.6684, "step": 4219 }, { "epoch": 0.34, "grad_norm": 4.201200828071186, "learning_rate": 7.647340190074854e-06, "loss": 0.6677, "step": 4220 }, { "epoch": 0.34, "grad_norm": 5.093367256882833, "learning_rate": 7.646224243959575e-06, "loss": 0.7021, "step": 4221 }, { "epoch": 0.34, "grad_norm": 3.1090242822156138, "learning_rate": 7.645108114708916e-06, "loss": 0.6763, "step": 4222 }, { "epoch": 0.34, "grad_norm": 3.000165483696345, "learning_rate": 7.643991802400122e-06, "loss": 0.7582, "step": 4223 }, { "epoch": 0.34, "grad_norm": 3.375623946799999, "learning_rate": 7.642875307110444e-06, "loss": 0.8375, "step": 4224 }, { "epoch": 0.34, "grad_norm": 4.387542910288342, "learning_rate": 7.641758628917156e-06, "loss": 0.8571, "step": 4225 }, { "epoch": 0.34, "grad_norm": 2.470912717516068, "learning_rate": 7.640641767897537e-06, "loss": 0.8537, "step": 4226 }, { "epoch": 0.34, "grad_norm": 14.196094651474384, "learning_rate": 7.639524724128881e-06, "loss": 0.8582, "step": 4227 }, { "epoch": 0.34, "grad_norm": 2.3513481041016395, "learning_rate": 7.638407497688493e-06, "loss": 0.8401, "step": 4228 }, { "epoch": 0.34, "grad_norm": 4.389900618980124, "learning_rate": 7.637290088653695e-06, "loss": 0.741, "step": 4229 }, { "epoch": 0.34, "grad_norm": 3.0452021008936785, "learning_rate": 7.636172497101817e-06, "loss": 0.6562, "step": 4230 }, { "epoch": 0.34, "grad_norm": 3.2185592330810473, "learning_rate": 7.635054723110203e-06, "loss": 0.7614, "step": 4231 }, { "epoch": 0.34, "grad_norm": 2.7764435827683256, "learning_rate": 7.633936766756211e-06, "loss": 0.7416, "step": 4232 }, { "epoch": 0.34, "grad_norm": 2.7500128874845915, "learning_rate": 7.63281862811721e-06, "loss": 0.6041, "step": 4233 }, { "epoch": 0.34, "grad_norm": 3.7893848037547686, "learning_rate": 7.63170030727058e-06, "loss": 0.7515, "step": 4234 }, { "epoch": 0.34, "grad_norm": 2.6436139683335282, "learning_rate": 7.63058180429372e-06, "loss": 0.7151, "step": 4235 }, { "epoch": 0.34, "grad_norm": 65.65021008458264, "learning_rate": 7.629463119264036e-06, "loss": 0.6541, "step": 4236 }, { "epoch": 0.34, "grad_norm": 35.95581769413149, "learning_rate": 7.628344252258948e-06, "loss": 0.7633, "step": 4237 }, { "epoch": 0.34, "grad_norm": 2.76599168914248, "learning_rate": 7.627225203355887e-06, "loss": 0.6128, "step": 4238 }, { "epoch": 0.34, "grad_norm": 6.0670053389041705, "learning_rate": 7.6261059726323006e-06, "loss": 0.7771, "step": 4239 }, { "epoch": 0.34, "grad_norm": 3.085052386009528, "learning_rate": 7.6249865601656434e-06, "loss": 0.7562, "step": 4240 }, { "epoch": 0.34, "grad_norm": 7.866399397270718, "learning_rate": 7.623866966033391e-06, "loss": 0.6186, "step": 4241 }, { "epoch": 0.34, "grad_norm": 5.457272015709935, "learning_rate": 7.622747190313022e-06, "loss": 0.7136, "step": 4242 }, { "epoch": 0.34, "grad_norm": 9.095566379321125, "learning_rate": 7.621627233082033e-06, "loss": 0.7175, "step": 4243 }, { "epoch": 0.34, "grad_norm": 5.462027611056057, "learning_rate": 7.620507094417933e-06, "loss": 0.7203, "step": 4244 }, { "epoch": 0.34, "grad_norm": 3.5910973649497864, "learning_rate": 7.619386774398241e-06, "loss": 0.7107, "step": 4245 }, { "epoch": 0.34, "grad_norm": 4.0588277797716446, "learning_rate": 7.618266273100492e-06, "loss": 0.7534, "step": 4246 }, { "epoch": 0.34, "grad_norm": 3.3122494566692384, "learning_rate": 7.617145590602231e-06, "loss": 0.6957, "step": 4247 }, { "epoch": 0.35, "grad_norm": 2.848716313031181, "learning_rate": 7.616024726981015e-06, "loss": 0.7398, "step": 4248 }, { "epoch": 0.35, "grad_norm": 2.3764844662969375, "learning_rate": 7.614903682314419e-06, "loss": 0.7286, "step": 4249 }, { "epoch": 0.35, "grad_norm": 11.688971661091193, "learning_rate": 7.613782456680019e-06, "loss": 0.6604, "step": 4250 }, { "epoch": 0.35, "grad_norm": 2.913327946682503, "learning_rate": 7.612661050155418e-06, "loss": 0.6498, "step": 4251 }, { "epoch": 0.35, "grad_norm": 4.1708442864891895, "learning_rate": 7.611539462818221e-06, "loss": 0.7868, "step": 4252 }, { "epoch": 0.35, "grad_norm": 2.7376442369297425, "learning_rate": 7.6104176947460506e-06, "loss": 0.7743, "step": 4253 }, { "epoch": 0.35, "grad_norm": 6.627369714398502, "learning_rate": 7.609295746016538e-06, "loss": 0.8531, "step": 4254 }, { "epoch": 0.35, "grad_norm": 6.999147695090658, "learning_rate": 7.60817361670733e-06, "loss": 0.7408, "step": 4255 }, { "epoch": 0.35, "grad_norm": 3.2934167685567934, "learning_rate": 7.607051306896087e-06, "loss": 0.7353, "step": 4256 }, { "epoch": 0.35, "grad_norm": 4.962497122851434, "learning_rate": 7.605928816660477e-06, "loss": 0.5706, "step": 4257 }, { "epoch": 0.35, "grad_norm": 2.357074047694368, "learning_rate": 7.604806146078185e-06, "loss": 0.7376, "step": 4258 }, { "epoch": 0.35, "grad_norm": 5.343497725228845, "learning_rate": 7.603683295226907e-06, "loss": 0.6709, "step": 4259 }, { "epoch": 0.35, "grad_norm": 45.94924082581608, "learning_rate": 7.602560264184349e-06, "loss": 0.6962, "step": 4260 }, { "epoch": 0.35, "grad_norm": 3.99734239842821, "learning_rate": 7.601437053028235e-06, "loss": 0.762, "step": 4261 }, { "epoch": 0.35, "grad_norm": 14.08562740089319, "learning_rate": 7.600313661836298e-06, "loss": 0.8911, "step": 4262 }, { "epoch": 0.35, "grad_norm": 8.032445371482412, "learning_rate": 7.59919009068628e-06, "loss": 0.6637, "step": 4263 }, { "epoch": 0.35, "grad_norm": 2.3748303119157153, "learning_rate": 7.598066339655943e-06, "loss": 0.7459, "step": 4264 }, { "epoch": 0.35, "grad_norm": 2.881278380295066, "learning_rate": 7.596942408823057e-06, "loss": 0.5924, "step": 4265 }, { "epoch": 0.35, "grad_norm": 2.7172493662580743, "learning_rate": 7.595818298265405e-06, "loss": 0.7629, "step": 4266 }, { "epoch": 0.35, "grad_norm": 4.474899227032276, "learning_rate": 7.594694008060781e-06, "loss": 0.6736, "step": 4267 }, { "epoch": 0.35, "grad_norm": 3.011121489499902, "learning_rate": 7.593569538286996e-06, "loss": 0.6869, "step": 4268 }, { "epoch": 0.35, "grad_norm": 7.966491105107606, "learning_rate": 7.592444889021866e-06, "loss": 0.6638, "step": 4269 }, { "epoch": 0.35, "grad_norm": 3.680136116118259, "learning_rate": 7.591320060343228e-06, "loss": 0.878, "step": 4270 }, { "epoch": 0.35, "grad_norm": 3.472905503656415, "learning_rate": 7.590195052328923e-06, "loss": 0.601, "step": 4271 }, { "epoch": 0.35, "grad_norm": 3.3626481887466526, "learning_rate": 7.589069865056815e-06, "loss": 0.664, "step": 4272 }, { "epoch": 0.35, "grad_norm": 5.648473049318399, "learning_rate": 7.587944498604767e-06, "loss": 0.5532, "step": 4273 }, { "epoch": 0.35, "grad_norm": 9.179329166723242, "learning_rate": 7.586818953050666e-06, "loss": 0.8864, "step": 4274 }, { "epoch": 0.35, "grad_norm": 3.592445722859328, "learning_rate": 7.585693228472405e-06, "loss": 0.7922, "step": 4275 }, { "epoch": 0.35, "grad_norm": 3.6183777042074166, "learning_rate": 7.584567324947893e-06, "loss": 0.5551, "step": 4276 }, { "epoch": 0.35, "grad_norm": 8.46317710728317, "learning_rate": 7.5834412425550476e-06, "loss": 0.7138, "step": 4277 }, { "epoch": 0.35, "grad_norm": 2.229481922945613, "learning_rate": 7.582314981371801e-06, "loss": 0.6913, "step": 4278 }, { "epoch": 0.35, "grad_norm": 4.920419097660212, "learning_rate": 7.581188541476099e-06, "loss": 0.8236, "step": 4279 }, { "epoch": 0.35, "grad_norm": 4.971846136754737, "learning_rate": 7.580061922945896e-06, "loss": 0.7004, "step": 4280 }, { "epoch": 0.35, "grad_norm": 3.790890194239284, "learning_rate": 7.578935125859164e-06, "loss": 0.6001, "step": 4281 }, { "epoch": 0.35, "grad_norm": 3.739403303657867, "learning_rate": 7.577808150293883e-06, "loss": 0.6626, "step": 4282 }, { "epoch": 0.35, "grad_norm": 11.138993134364888, "learning_rate": 7.576680996328046e-06, "loss": 0.6695, "step": 4283 }, { "epoch": 0.35, "grad_norm": 3.561010131528322, "learning_rate": 7.5755536640396585e-06, "loss": 0.8184, "step": 4284 }, { "epoch": 0.35, "grad_norm": 2.472230734999347, "learning_rate": 7.5744261535067436e-06, "loss": 0.6314, "step": 4285 }, { "epoch": 0.35, "grad_norm": 3.003977553554704, "learning_rate": 7.573298464807329e-06, "loss": 0.5863, "step": 4286 }, { "epoch": 0.35, "grad_norm": 3.0686257045391883, "learning_rate": 7.572170598019455e-06, "loss": 0.6578, "step": 4287 }, { "epoch": 0.35, "grad_norm": 2.544806123624027, "learning_rate": 7.5710425532211795e-06, "loss": 0.6203, "step": 4288 }, { "epoch": 0.35, "grad_norm": 4.576071171501176, "learning_rate": 7.569914330490573e-06, "loss": 0.641, "step": 4289 }, { "epoch": 0.35, "grad_norm": 4.3167412223340405, "learning_rate": 7.568785929905713e-06, "loss": 0.7007, "step": 4290 }, { "epoch": 0.35, "grad_norm": 3.309508843412498, "learning_rate": 7.567657351544691e-06, "loss": 0.7809, "step": 4291 }, { "epoch": 0.35, "grad_norm": 5.412586443488965, "learning_rate": 7.566528595485614e-06, "loss": 0.6616, "step": 4292 }, { "epoch": 0.35, "grad_norm": 4.025616402450841, "learning_rate": 7.565399661806598e-06, "loss": 0.6127, "step": 4293 }, { "epoch": 0.35, "grad_norm": 3.1648631058745846, "learning_rate": 7.564270550585773e-06, "loss": 0.8687, "step": 4294 }, { "epoch": 0.35, "grad_norm": 4.0924745924238835, "learning_rate": 7.563141261901279e-06, "loss": 0.6236, "step": 4295 }, { "epoch": 0.35, "grad_norm": 3.284148698220159, "learning_rate": 7.56201179583127e-06, "loss": 0.6316, "step": 4296 }, { "epoch": 0.35, "grad_norm": 4.769678694625544, "learning_rate": 7.560882152453914e-06, "loss": 0.7607, "step": 4297 }, { "epoch": 0.35, "grad_norm": 5.829625574435862, "learning_rate": 7.559752331847388e-06, "loss": 0.7013, "step": 4298 }, { "epoch": 0.35, "grad_norm": 2.2528921926368746, "learning_rate": 7.558622334089884e-06, "loss": 0.5965, "step": 4299 }, { "epoch": 0.35, "grad_norm": 3.152801605318248, "learning_rate": 7.557492159259603e-06, "loss": 0.6844, "step": 4300 }, { "epoch": 0.35, "grad_norm": 4.327779305291376, "learning_rate": 7.556361807434762e-06, "loss": 0.7638, "step": 4301 }, { "epoch": 0.35, "grad_norm": 2.841464875596218, "learning_rate": 7.5552312786935864e-06, "loss": 0.7442, "step": 4302 }, { "epoch": 0.35, "grad_norm": 3.9567418070259452, "learning_rate": 7.554100573114318e-06, "loss": 0.8092, "step": 4303 }, { "epoch": 0.35, "grad_norm": 2.314113493638594, "learning_rate": 7.552969690775209e-06, "loss": 0.7011, "step": 4304 }, { "epoch": 0.35, "grad_norm": 8.735612181970499, "learning_rate": 7.551838631754522e-06, "loss": 0.8591, "step": 4305 }, { "epoch": 0.35, "grad_norm": 5.238950558333852, "learning_rate": 7.550707396130533e-06, "loss": 0.7353, "step": 4306 }, { "epoch": 0.35, "grad_norm": 8.205141867471987, "learning_rate": 7.549575983981532e-06, "loss": 0.7643, "step": 4307 }, { "epoch": 0.35, "grad_norm": 4.035112391994702, "learning_rate": 7.548444395385819e-06, "loss": 0.8964, "step": 4308 }, { "epoch": 0.35, "grad_norm": 3.299592018189047, "learning_rate": 7.547312630421711e-06, "loss": 0.7828, "step": 4309 }, { "epoch": 0.35, "grad_norm": 4.163112869232628, "learning_rate": 7.546180689167526e-06, "loss": 0.7216, "step": 4310 }, { "epoch": 0.35, "grad_norm": 2.9262470424793032, "learning_rate": 7.545048571701606e-06, "loss": 0.6496, "step": 4311 }, { "epoch": 0.35, "grad_norm": 4.011712214957674, "learning_rate": 7.543916278102301e-06, "loss": 0.6719, "step": 4312 }, { "epoch": 0.35, "grad_norm": 5.111229060072679, "learning_rate": 7.542783808447971e-06, "loss": 0.5831, "step": 4313 }, { "epoch": 0.35, "grad_norm": 3.8276495122470675, "learning_rate": 7.541651162816989e-06, "loss": 0.6679, "step": 4314 }, { "epoch": 0.35, "grad_norm": 4.515969422204437, "learning_rate": 7.540518341287746e-06, "loss": 0.7352, "step": 4315 }, { "epoch": 0.35, "grad_norm": 3.7718834626352558, "learning_rate": 7.539385343938635e-06, "loss": 0.6302, "step": 4316 }, { "epoch": 0.35, "grad_norm": 2.910487524741352, "learning_rate": 7.538252170848071e-06, "loss": 0.8576, "step": 4317 }, { "epoch": 0.35, "grad_norm": 3.3754792627870502, "learning_rate": 7.537118822094474e-06, "loss": 0.7643, "step": 4318 }, { "epoch": 0.35, "grad_norm": 5.375285341172751, "learning_rate": 7.535985297756278e-06, "loss": 0.7353, "step": 4319 }, { "epoch": 0.35, "grad_norm": 3.450964116605378, "learning_rate": 7.534851597911933e-06, "loss": 0.678, "step": 4320 }, { "epoch": 0.35, "grad_norm": 6.802113764760465, "learning_rate": 7.533717722639896e-06, "loss": 0.681, "step": 4321 }, { "epoch": 0.35, "grad_norm": 4.723671888537213, "learning_rate": 7.5325836720186395e-06, "loss": 0.7163, "step": 4322 }, { "epoch": 0.35, "grad_norm": 2.844832202845109, "learning_rate": 7.531449446126646e-06, "loss": 0.8422, "step": 4323 }, { "epoch": 0.35, "grad_norm": 5.405549775057817, "learning_rate": 7.530315045042411e-06, "loss": 0.6053, "step": 4324 }, { "epoch": 0.35, "grad_norm": 5.794883352150976, "learning_rate": 7.529180468844443e-06, "loss": 0.8149, "step": 4325 }, { "epoch": 0.35, "grad_norm": 2.763794887740198, "learning_rate": 7.528045717611263e-06, "loss": 0.5991, "step": 4326 }, { "epoch": 0.35, "grad_norm": 4.21226558936452, "learning_rate": 7.5269107914214e-06, "loss": 0.7543, "step": 4327 }, { "epoch": 0.35, "grad_norm": 3.2514841798366945, "learning_rate": 7.5257756903534005e-06, "loss": 0.5784, "step": 4328 }, { "epoch": 0.35, "grad_norm": 4.109552823380717, "learning_rate": 7.52464041448582e-06, "loss": 0.8158, "step": 4329 }, { "epoch": 0.35, "grad_norm": 17.481983166844294, "learning_rate": 7.523504963897223e-06, "loss": 0.7928, "step": 4330 }, { "epoch": 0.35, "grad_norm": 77.51934481382357, "learning_rate": 7.522369338666195e-06, "loss": 0.626, "step": 4331 }, { "epoch": 0.35, "grad_norm": 3.096121078174357, "learning_rate": 7.521233538871329e-06, "loss": 0.675, "step": 4332 }, { "epoch": 0.35, "grad_norm": 3.0225664396930765, "learning_rate": 7.520097564591224e-06, "loss": 0.722, "step": 4333 }, { "epoch": 0.35, "grad_norm": 3.841735200545695, "learning_rate": 7.518961415904502e-06, "loss": 0.8584, "step": 4334 }, { "epoch": 0.35, "grad_norm": 3.9403101054438143, "learning_rate": 7.517825092889789e-06, "loss": 0.7618, "step": 4335 }, { "epoch": 0.35, "grad_norm": 8.637741653292197, "learning_rate": 7.516688595625725e-06, "loss": 0.9029, "step": 4336 }, { "epoch": 0.35, "grad_norm": 8.571593678613747, "learning_rate": 7.515551924190964e-06, "loss": 0.7626, "step": 4337 }, { "epoch": 0.35, "grad_norm": 3.684436126968125, "learning_rate": 7.5144150786641715e-06, "loss": 0.7271, "step": 4338 }, { "epoch": 0.35, "grad_norm": 2.754834524792647, "learning_rate": 7.5132780591240216e-06, "loss": 0.7724, "step": 4339 }, { "epoch": 0.35, "grad_norm": 2.7306855035362596, "learning_rate": 7.512140865649207e-06, "loss": 0.7638, "step": 4340 }, { "epoch": 0.35, "grad_norm": 2.848712832399641, "learning_rate": 7.5110034983184255e-06, "loss": 0.7882, "step": 4341 }, { "epoch": 0.35, "grad_norm": 4.882641771002021, "learning_rate": 7.509865957210393e-06, "loss": 0.795, "step": 4342 }, { "epoch": 0.35, "grad_norm": 3.2604957956172775, "learning_rate": 7.508728242403831e-06, "loss": 0.6628, "step": 4343 }, { "epoch": 0.35, "grad_norm": 3.7648995043631066, "learning_rate": 7.5075903539774785e-06, "loss": 0.7274, "step": 4344 }, { "epoch": 0.35, "grad_norm": 11.340782949888176, "learning_rate": 7.506452292010085e-06, "loss": 0.8604, "step": 4345 }, { "epoch": 0.35, "grad_norm": 6.424270119065036, "learning_rate": 7.505314056580411e-06, "loss": 0.7382, "step": 4346 }, { "epoch": 0.35, "grad_norm": 4.694682979492532, "learning_rate": 7.504175647767229e-06, "loss": 0.6887, "step": 4347 }, { "epoch": 0.35, "grad_norm": 3.2455752662804667, "learning_rate": 7.503037065649325e-06, "loss": 0.8675, "step": 4348 }, { "epoch": 0.35, "grad_norm": 4.0485837198328785, "learning_rate": 7.501898310305495e-06, "loss": 0.8403, "step": 4349 }, { "epoch": 0.35, "grad_norm": 4.232352064815178, "learning_rate": 7.500759381814551e-06, "loss": 0.6316, "step": 4350 }, { "epoch": 0.35, "grad_norm": 2.536019956477901, "learning_rate": 7.4996202802553085e-06, "loss": 0.739, "step": 4351 }, { "epoch": 0.35, "grad_norm": 2.4280947287275367, "learning_rate": 7.498481005706606e-06, "loss": 0.7394, "step": 4352 }, { "epoch": 0.35, "grad_norm": 5.383288921593991, "learning_rate": 7.497341558247285e-06, "loss": 0.7144, "step": 4353 }, { "epoch": 0.35, "grad_norm": 10.719075400449896, "learning_rate": 7.496201937956204e-06, "loss": 0.6862, "step": 4354 }, { "epoch": 0.35, "grad_norm": 3.1058513793628295, "learning_rate": 7.495062144912232e-06, "loss": 0.7191, "step": 4355 }, { "epoch": 0.35, "grad_norm": 5.403956744813072, "learning_rate": 7.493922179194249e-06, "loss": 0.6442, "step": 4356 }, { "epoch": 0.35, "grad_norm": 5.762749520896775, "learning_rate": 7.492782040881148e-06, "loss": 0.8512, "step": 4357 }, { "epoch": 0.35, "grad_norm": 2.5824931142326437, "learning_rate": 7.491641730051833e-06, "loss": 0.5986, "step": 4358 }, { "epoch": 0.35, "grad_norm": 2.3777913169090628, "learning_rate": 7.4905012467852234e-06, "loss": 0.7455, "step": 4359 }, { "epoch": 0.35, "grad_norm": 4.807002957594186, "learning_rate": 7.489360591160245e-06, "loss": 0.7312, "step": 4360 }, { "epoch": 0.35, "grad_norm": 4.170384616213076, "learning_rate": 7.48821976325584e-06, "loss": 0.7623, "step": 4361 }, { "epoch": 0.35, "grad_norm": 3.30622894237531, "learning_rate": 7.487078763150959e-06, "loss": 0.7976, "step": 4362 }, { "epoch": 0.35, "grad_norm": 4.699365350019269, "learning_rate": 7.485937590924568e-06, "loss": 0.7909, "step": 4363 }, { "epoch": 0.35, "grad_norm": 4.082362812667312, "learning_rate": 7.484796246655643e-06, "loss": 0.7012, "step": 4364 }, { "epoch": 0.35, "grad_norm": 2.6580126614536552, "learning_rate": 7.483654730423173e-06, "loss": 0.7096, "step": 4365 }, { "epoch": 0.35, "grad_norm": 4.074900852916815, "learning_rate": 7.482513042306158e-06, "loss": 0.6994, "step": 4366 }, { "epoch": 0.35, "grad_norm": 3.020106639083808, "learning_rate": 7.481371182383608e-06, "loss": 0.6411, "step": 4367 }, { "epoch": 0.35, "grad_norm": 6.391860893050497, "learning_rate": 7.480229150734548e-06, "loss": 0.7552, "step": 4368 }, { "epoch": 0.35, "grad_norm": 3.1866776090479867, "learning_rate": 7.479086947438015e-06, "loss": 0.7383, "step": 4369 }, { "epoch": 0.35, "grad_norm": 7.938616134659222, "learning_rate": 7.477944572573054e-06, "loss": 0.8675, "step": 4370 }, { "epoch": 0.36, "grad_norm": 3.729162888124308, "learning_rate": 7.476802026218726e-06, "loss": 0.8473, "step": 4371 }, { "epoch": 0.36, "grad_norm": 2.648190168076544, "learning_rate": 7.475659308454104e-06, "loss": 0.6545, "step": 4372 }, { "epoch": 0.36, "grad_norm": 2.5492860425201385, "learning_rate": 7.474516419358268e-06, "loss": 0.6718, "step": 4373 }, { "epoch": 0.36, "grad_norm": 2.962336085043656, "learning_rate": 7.4733733590103185e-06, "loss": 0.6607, "step": 4374 }, { "epoch": 0.36, "grad_norm": 4.939593749610692, "learning_rate": 7.472230127489357e-06, "loss": 0.6948, "step": 4375 }, { "epoch": 0.36, "grad_norm": 4.5483901471045325, "learning_rate": 7.471086724874503e-06, "loss": 0.7022, "step": 4376 }, { "epoch": 0.36, "grad_norm": 23.344568080240478, "learning_rate": 7.46994315124489e-06, "loss": 0.6708, "step": 4377 }, { "epoch": 0.36, "grad_norm": 3.8962352363212074, "learning_rate": 7.4687994066796585e-06, "loss": 0.9484, "step": 4378 }, { "epoch": 0.36, "grad_norm": 3.0325900516467854, "learning_rate": 7.467655491257962e-06, "loss": 0.5932, "step": 4379 }, { "epoch": 0.36, "grad_norm": 7.373328253132646, "learning_rate": 7.466511405058969e-06, "loss": 0.7201, "step": 4380 }, { "epoch": 0.36, "grad_norm": 3.2906479470233125, "learning_rate": 7.4653671481618565e-06, "loss": 0.6844, "step": 4381 }, { "epoch": 0.36, "grad_norm": 5.354385641816617, "learning_rate": 7.4642227206458125e-06, "loss": 0.8214, "step": 4382 }, { "epoch": 0.36, "grad_norm": 3.7625310585625833, "learning_rate": 7.463078122590043e-06, "loss": 0.7634, "step": 4383 }, { "epoch": 0.36, "grad_norm": 3.168165182004448, "learning_rate": 7.4619333540737556e-06, "loss": 0.6484, "step": 4384 }, { "epoch": 0.36, "grad_norm": 3.6448506509708443, "learning_rate": 7.460788415176181e-06, "loss": 0.6737, "step": 4385 }, { "epoch": 0.36, "grad_norm": 2.8296230470965487, "learning_rate": 7.459643305976552e-06, "loss": 0.6259, "step": 4386 }, { "epoch": 0.36, "grad_norm": 3.893643745956693, "learning_rate": 7.45849802655412e-06, "loss": 0.8519, "step": 4387 }, { "epoch": 0.36, "grad_norm": 2.5568576546958846, "learning_rate": 7.457352576988144e-06, "loss": 0.7352, "step": 4388 }, { "epoch": 0.36, "grad_norm": 2.9646037351542973, "learning_rate": 7.456206957357896e-06, "loss": 0.7524, "step": 4389 }, { "epoch": 0.36, "grad_norm": 5.742027316486682, "learning_rate": 7.4550611677426635e-06, "loss": 0.5943, "step": 4390 }, { "epoch": 0.36, "grad_norm": 2.473308982446472, "learning_rate": 7.453915208221739e-06, "loss": 0.7509, "step": 4391 }, { "epoch": 0.36, "grad_norm": 3.265700408273659, "learning_rate": 7.45276907887443e-06, "loss": 0.752, "step": 4392 }, { "epoch": 0.36, "grad_norm": 6.999515805677346, "learning_rate": 7.451622779780057e-06, "loss": 0.5414, "step": 4393 }, { "epoch": 0.36, "grad_norm": 2.311703141414544, "learning_rate": 7.450476311017951e-06, "loss": 0.6898, "step": 4394 }, { "epoch": 0.36, "grad_norm": 2.8718384830276156, "learning_rate": 7.449329672667456e-06, "loss": 0.7158, "step": 4395 }, { "epoch": 0.36, "grad_norm": 3.7790002479755347, "learning_rate": 7.4481828648079235e-06, "loss": 0.6822, "step": 4396 }, { "epoch": 0.36, "grad_norm": 3.5467110462830638, "learning_rate": 7.447035887518722e-06, "loss": 0.8671, "step": 4397 }, { "epoch": 0.36, "grad_norm": 3.36794392555424, "learning_rate": 7.44588874087923e-06, "loss": 0.9396, "step": 4398 }, { "epoch": 0.36, "grad_norm": 3.5598389632750376, "learning_rate": 7.4447414249688375e-06, "loss": 0.7146, "step": 4399 }, { "epoch": 0.36, "grad_norm": 3.5350313382908096, "learning_rate": 7.443593939866944e-06, "loss": 0.6315, "step": 4400 }, { "epoch": 0.36, "grad_norm": 3.5845399562367386, "learning_rate": 7.442446285652964e-06, "loss": 0.6204, "step": 4401 }, { "epoch": 0.36, "grad_norm": 21.358207546900445, "learning_rate": 7.441298462406321e-06, "loss": 0.6278, "step": 4402 }, { "epoch": 0.36, "grad_norm": 2.7893521881140986, "learning_rate": 7.440150470206453e-06, "loss": 0.7836, "step": 4403 }, { "epoch": 0.36, "grad_norm": 4.3679027234008, "learning_rate": 7.439002309132808e-06, "loss": 0.8058, "step": 4404 }, { "epoch": 0.36, "grad_norm": 2.462728113742895, "learning_rate": 7.437853979264847e-06, "loss": 0.735, "step": 4405 }, { "epoch": 0.36, "grad_norm": 6.404155790869616, "learning_rate": 7.43670548068204e-06, "loss": 0.8153, "step": 4406 }, { "epoch": 0.36, "grad_norm": 5.479978434150942, "learning_rate": 7.435556813463871e-06, "loss": 0.6841, "step": 4407 }, { "epoch": 0.36, "grad_norm": 5.6454771356866, "learning_rate": 7.434407977689837e-06, "loss": 0.6981, "step": 4408 }, { "epoch": 0.36, "grad_norm": 3.8221363121651963, "learning_rate": 7.43325897343944e-06, "loss": 0.8039, "step": 4409 }, { "epoch": 0.36, "grad_norm": 2.5910480444473136, "learning_rate": 7.432109800792201e-06, "loss": 0.5592, "step": 4410 }, { "epoch": 0.36, "grad_norm": 3.564302382351731, "learning_rate": 7.430960459827652e-06, "loss": 0.6607, "step": 4411 }, { "epoch": 0.36, "grad_norm": 6.327072800179629, "learning_rate": 7.42981095062533e-06, "loss": 0.7135, "step": 4412 }, { "epoch": 0.36, "grad_norm": 2.5503268761382865, "learning_rate": 7.428661273264792e-06, "loss": 0.7202, "step": 4413 }, { "epoch": 0.36, "grad_norm": 2.962175249739075, "learning_rate": 7.427511427825602e-06, "loss": 0.6867, "step": 4414 }, { "epoch": 0.36, "grad_norm": 5.797923456038306, "learning_rate": 7.426361414387338e-06, "loss": 0.6126, "step": 4415 }, { "epoch": 0.36, "grad_norm": 2.9750245664600543, "learning_rate": 7.4252112330295835e-06, "loss": 0.7435, "step": 4416 }, { "epoch": 0.36, "grad_norm": 4.357031784225165, "learning_rate": 7.424060883831942e-06, "loss": 0.6562, "step": 4417 }, { "epoch": 0.36, "grad_norm": 4.689394544762236, "learning_rate": 7.422910366874026e-06, "loss": 0.6558, "step": 4418 }, { "epoch": 0.36, "grad_norm": 3.362997759954026, "learning_rate": 7.421759682235454e-06, "loss": 0.8782, "step": 4419 }, { "epoch": 0.36, "grad_norm": 4.076213161988718, "learning_rate": 7.4206088299958646e-06, "loss": 0.5963, "step": 4420 }, { "epoch": 0.36, "grad_norm": 2.4998675953692286, "learning_rate": 7.4194578102349025e-06, "loss": 0.7361, "step": 4421 }, { "epoch": 0.36, "grad_norm": 10.68025149381913, "learning_rate": 7.418306623032227e-06, "loss": 0.8335, "step": 4422 }, { "epoch": 0.36, "grad_norm": 3.915984488004341, "learning_rate": 7.417155268467505e-06, "loss": 0.694, "step": 4423 }, { "epoch": 0.36, "grad_norm": 4.323354267768315, "learning_rate": 7.416003746620419e-06, "loss": 0.8888, "step": 4424 }, { "epoch": 0.36, "grad_norm": 3.0366748416549365, "learning_rate": 7.414852057570661e-06, "loss": 0.7066, "step": 4425 }, { "epoch": 0.36, "grad_norm": 3.80221444821345, "learning_rate": 7.413700201397936e-06, "loss": 0.8362, "step": 4426 }, { "epoch": 0.36, "grad_norm": 4.179382507836741, "learning_rate": 7.4125481781819594e-06, "loss": 0.7123, "step": 4427 }, { "epoch": 0.36, "grad_norm": 5.264683396950939, "learning_rate": 7.411395988002457e-06, "loss": 0.6809, "step": 4428 }, { "epoch": 0.36, "grad_norm": 2.5328985399594695, "learning_rate": 7.41024363093917e-06, "loss": 0.7276, "step": 4429 }, { "epoch": 0.36, "grad_norm": 5.269893304673372, "learning_rate": 7.409091107071849e-06, "loss": 0.6323, "step": 4430 }, { "epoch": 0.36, "grad_norm": 4.533576009059273, "learning_rate": 7.407938416480253e-06, "loss": 0.6308, "step": 4431 }, { "epoch": 0.36, "grad_norm": 2.879600882191293, "learning_rate": 7.406785559244156e-06, "loss": 0.6572, "step": 4432 }, { "epoch": 0.36, "grad_norm": 7.286176994035368, "learning_rate": 7.4056325354433445e-06, "loss": 0.7032, "step": 4433 }, { "epoch": 0.36, "grad_norm": 2.4675438183094593, "learning_rate": 7.404479345157613e-06, "loss": 0.717, "step": 4434 }, { "epoch": 0.36, "grad_norm": 3.781173474743352, "learning_rate": 7.403325988466774e-06, "loss": 0.5571, "step": 4435 }, { "epoch": 0.36, "grad_norm": 2.485568872963055, "learning_rate": 7.402172465450642e-06, "loss": 0.5699, "step": 4436 }, { "epoch": 0.36, "grad_norm": 3.906426371288215, "learning_rate": 7.4010187761890504e-06, "loss": 0.5097, "step": 4437 }, { "epoch": 0.36, "grad_norm": 3.3024995323868644, "learning_rate": 7.3998649207618425e-06, "loss": 0.7408, "step": 4438 }, { "epoch": 0.36, "grad_norm": 3.6042702688457045, "learning_rate": 7.398710899248871e-06, "loss": 0.7343, "step": 4439 }, { "epoch": 0.36, "grad_norm": 4.120637871880075, "learning_rate": 7.39755671173e-06, "loss": 0.8833, "step": 4440 }, { "epoch": 0.36, "grad_norm": 4.802276718670347, "learning_rate": 7.396402358285111e-06, "loss": 0.6656, "step": 4441 }, { "epoch": 0.36, "grad_norm": 5.523283725885031, "learning_rate": 7.395247838994087e-06, "loss": 0.8113, "step": 4442 }, { "epoch": 0.36, "grad_norm": 2.6428317951929197, "learning_rate": 7.394093153936832e-06, "loss": 0.7314, "step": 4443 }, { "epoch": 0.36, "grad_norm": 2.8348351784781376, "learning_rate": 7.392938303193257e-06, "loss": 0.6182, "step": 4444 }, { "epoch": 0.36, "grad_norm": 3.7823311607569168, "learning_rate": 7.391783286843283e-06, "loss": 0.867, "step": 4445 }, { "epoch": 0.36, "grad_norm": 2.675564225908151, "learning_rate": 7.390628104966846e-06, "loss": 0.7526, "step": 4446 }, { "epoch": 0.36, "grad_norm": 3.0716978667734183, "learning_rate": 7.389472757643892e-06, "loss": 0.7543, "step": 4447 }, { "epoch": 0.36, "grad_norm": 3.5339393854202745, "learning_rate": 7.388317244954379e-06, "loss": 0.792, "step": 4448 }, { "epoch": 0.36, "grad_norm": 2.4377307616407755, "learning_rate": 7.387161566978271e-06, "loss": 0.7818, "step": 4449 }, { "epoch": 0.36, "grad_norm": 3.4030299838671603, "learning_rate": 7.386005723795554e-06, "loss": 0.7784, "step": 4450 }, { "epoch": 0.36, "grad_norm": 5.238520446201125, "learning_rate": 7.384849715486217e-06, "loss": 0.6194, "step": 4451 }, { "epoch": 0.36, "grad_norm": 3.6739702339207656, "learning_rate": 7.383693542130265e-06, "loss": 0.7833, "step": 4452 }, { "epoch": 0.36, "grad_norm": 2.699240871470253, "learning_rate": 7.382537203807709e-06, "loss": 0.6864, "step": 4453 }, { "epoch": 0.36, "grad_norm": 2.815667069581723, "learning_rate": 7.381380700598577e-06, "loss": 0.8795, "step": 4454 }, { "epoch": 0.36, "grad_norm": 4.0636217260525305, "learning_rate": 7.380224032582908e-06, "loss": 0.6884, "step": 4455 }, { "epoch": 0.36, "grad_norm": 3.6425945293438087, "learning_rate": 7.379067199840746e-06, "loss": 0.7579, "step": 4456 }, { "epoch": 0.36, "grad_norm": 3.060351253226855, "learning_rate": 7.377910202452155e-06, "loss": 0.6402, "step": 4457 }, { "epoch": 0.36, "grad_norm": 3.842007156371223, "learning_rate": 7.376753040497207e-06, "loss": 0.7784, "step": 4458 }, { "epoch": 0.36, "grad_norm": 2.914196955308941, "learning_rate": 7.375595714055981e-06, "loss": 0.8269, "step": 4459 }, { "epoch": 0.36, "grad_norm": 3.6018975751318885, "learning_rate": 7.374438223208575e-06, "loss": 0.6826, "step": 4460 }, { "epoch": 0.36, "grad_norm": 3.588982859842889, "learning_rate": 7.373280568035093e-06, "loss": 0.7012, "step": 4461 }, { "epoch": 0.36, "grad_norm": 3.0819515408638516, "learning_rate": 7.372122748615651e-06, "loss": 0.7294, "step": 4462 }, { "epoch": 0.36, "grad_norm": 4.4930432066471555, "learning_rate": 7.370964765030381e-06, "loss": 0.6681, "step": 4463 }, { "epoch": 0.36, "grad_norm": 2.5649525708235696, "learning_rate": 7.36980661735942e-06, "loss": 0.71, "step": 4464 }, { "epoch": 0.36, "grad_norm": 4.523007518653461, "learning_rate": 7.368648305682917e-06, "loss": 0.6903, "step": 4465 }, { "epoch": 0.36, "grad_norm": 3.420544116856962, "learning_rate": 7.367489830081039e-06, "loss": 0.6694, "step": 4466 }, { "epoch": 0.36, "grad_norm": 4.097159963606105, "learning_rate": 7.3663311906339575e-06, "loss": 0.6577, "step": 4467 }, { "epoch": 0.36, "grad_norm": 3.9323260735012817, "learning_rate": 7.365172387421858e-06, "loss": 0.6595, "step": 4468 }, { "epoch": 0.36, "grad_norm": 3.2839904184587567, "learning_rate": 7.364013420524937e-06, "loss": 0.8204, "step": 4469 }, { "epoch": 0.36, "grad_norm": 3.077568178413793, "learning_rate": 7.362854290023402e-06, "loss": 0.803, "step": 4470 }, { "epoch": 0.36, "grad_norm": 2.9499308219118725, "learning_rate": 7.361694995997473e-06, "loss": 0.6457, "step": 4471 }, { "epoch": 0.36, "grad_norm": 6.008733877548259, "learning_rate": 7.3605355385273805e-06, "loss": 0.5798, "step": 4472 }, { "epoch": 0.36, "grad_norm": 2.1644681750580914, "learning_rate": 7.359375917693363e-06, "loss": 0.5895, "step": 4473 }, { "epoch": 0.36, "grad_norm": 5.284804039572399, "learning_rate": 7.358216133575678e-06, "loss": 0.8053, "step": 4474 }, { "epoch": 0.36, "grad_norm": 2.9141999765481916, "learning_rate": 7.357056186254587e-06, "loss": 0.7025, "step": 4475 }, { "epoch": 0.36, "grad_norm": 2.718037820371632, "learning_rate": 7.355896075810368e-06, "loss": 0.7647, "step": 4476 }, { "epoch": 0.36, "grad_norm": 4.8492144590168, "learning_rate": 7.354735802323305e-06, "loss": 0.7476, "step": 4477 }, { "epoch": 0.36, "grad_norm": 3.4937446532726746, "learning_rate": 7.3535753658737e-06, "loss": 0.7112, "step": 4478 }, { "epoch": 0.36, "grad_norm": 2.3530635055445606, "learning_rate": 7.3524147665418585e-06, "loss": 0.6617, "step": 4479 }, { "epoch": 0.36, "grad_norm": 3.825658092249463, "learning_rate": 7.351254004408104e-06, "loss": 0.7787, "step": 4480 }, { "epoch": 0.36, "grad_norm": 3.4362059331620687, "learning_rate": 7.350093079552768e-06, "loss": 0.6551, "step": 4481 }, { "epoch": 0.36, "grad_norm": 2.349597271509936, "learning_rate": 7.348931992056192e-06, "loss": 0.6308, "step": 4482 }, { "epoch": 0.36, "grad_norm": 6.397850722351214, "learning_rate": 7.347770741998733e-06, "loss": 0.7859, "step": 4483 }, { "epoch": 0.36, "grad_norm": 2.7586586677911478, "learning_rate": 7.346609329460757e-06, "loss": 0.7025, "step": 4484 }, { "epoch": 0.36, "grad_norm": 4.2912941470279655, "learning_rate": 7.345447754522637e-06, "loss": 0.5807, "step": 4485 }, { "epoch": 0.36, "grad_norm": 5.2182919104816285, "learning_rate": 7.344286017264765e-06, "loss": 0.6608, "step": 4486 }, { "epoch": 0.36, "grad_norm": 9.616429337382309, "learning_rate": 7.343124117767542e-06, "loss": 0.7459, "step": 4487 }, { "epoch": 0.36, "grad_norm": 3.975173007872712, "learning_rate": 7.341962056111376e-06, "loss": 0.6199, "step": 4488 }, { "epoch": 0.36, "grad_norm": 4.344522161427955, "learning_rate": 7.340799832376689e-06, "loss": 0.7651, "step": 4489 }, { "epoch": 0.36, "grad_norm": 3.9539388157709983, "learning_rate": 7.339637446643913e-06, "loss": 0.7351, "step": 4490 }, { "epoch": 0.36, "grad_norm": 5.500780946926331, "learning_rate": 7.338474898993496e-06, "loss": 0.625, "step": 4491 }, { "epoch": 0.36, "grad_norm": 2.695192884904258, "learning_rate": 7.337312189505892e-06, "loss": 0.8551, "step": 4492 }, { "epoch": 0.36, "grad_norm": 3.6660337234277947, "learning_rate": 7.336149318261565e-06, "loss": 0.6786, "step": 4493 }, { "epoch": 0.37, "grad_norm": 3.4142356380326087, "learning_rate": 7.3349862853409996e-06, "loss": 0.6263, "step": 4494 }, { "epoch": 0.37, "grad_norm": 7.343329823678661, "learning_rate": 7.333823090824679e-06, "loss": 0.6181, "step": 4495 }, { "epoch": 0.37, "grad_norm": 4.352349273214737, "learning_rate": 7.332659734793104e-06, "loss": 0.7285, "step": 4496 }, { "epoch": 0.37, "grad_norm": 3.755468306016667, "learning_rate": 7.331496217326789e-06, "loss": 0.6578, "step": 4497 }, { "epoch": 0.37, "grad_norm": 6.070947189547664, "learning_rate": 7.3303325385062555e-06, "loss": 0.5937, "step": 4498 }, { "epoch": 0.37, "grad_norm": 4.786729601064355, "learning_rate": 7.329168698412037e-06, "loss": 0.9753, "step": 4499 }, { "epoch": 0.37, "grad_norm": 17.53945792672117, "learning_rate": 7.3280046971246786e-06, "loss": 0.6946, "step": 4500 }, { "epoch": 0.37, "grad_norm": 3.1868990876330567, "learning_rate": 7.326840534724738e-06, "loss": 0.7419, "step": 4501 }, { "epoch": 0.37, "grad_norm": 4.77679280063808, "learning_rate": 7.3256762112927805e-06, "loss": 0.6692, "step": 4502 }, { "epoch": 0.37, "grad_norm": 5.282872279203256, "learning_rate": 7.324511726909387e-06, "loss": 0.6341, "step": 4503 }, { "epoch": 0.37, "grad_norm": 3.056725116253631, "learning_rate": 7.323347081655146e-06, "loss": 0.7403, "step": 4504 }, { "epoch": 0.37, "grad_norm": 2.7936693920790505, "learning_rate": 7.322182275610655e-06, "loss": 0.8717, "step": 4505 }, { "epoch": 0.37, "grad_norm": 3.1625636100261616, "learning_rate": 7.3210173088565294e-06, "loss": 0.5849, "step": 4506 }, { "epoch": 0.37, "grad_norm": 5.693060722705188, "learning_rate": 7.319852181473393e-06, "loss": 0.7963, "step": 4507 }, { "epoch": 0.37, "grad_norm": 3.299505131122427, "learning_rate": 7.318686893541879e-06, "loss": 0.8001, "step": 4508 }, { "epoch": 0.37, "grad_norm": 4.223171113197734, "learning_rate": 7.317521445142631e-06, "loss": 0.7491, "step": 4509 }, { "epoch": 0.37, "grad_norm": 3.90549369845727, "learning_rate": 7.3163558363563055e-06, "loss": 0.657, "step": 4510 }, { "epoch": 0.37, "grad_norm": 3.6712786695001736, "learning_rate": 7.315190067263574e-06, "loss": 0.6473, "step": 4511 }, { "epoch": 0.37, "grad_norm": 3.0617241975372522, "learning_rate": 7.314024137945113e-06, "loss": 0.7854, "step": 4512 }, { "epoch": 0.37, "grad_norm": 4.16181435349445, "learning_rate": 7.312858048481608e-06, "loss": 0.6128, "step": 4513 }, { "epoch": 0.37, "grad_norm": 4.581605283772196, "learning_rate": 7.311691798953765e-06, "loss": 0.6351, "step": 4514 }, { "epoch": 0.37, "grad_norm": 3.18255791428962, "learning_rate": 7.310525389442294e-06, "loss": 0.6911, "step": 4515 }, { "epoch": 0.37, "grad_norm": 5.513449865630468, "learning_rate": 7.3093588200279165e-06, "loss": 0.601, "step": 4516 }, { "epoch": 0.37, "grad_norm": 3.5127199862506826, "learning_rate": 7.308192090791368e-06, "loss": 0.5516, "step": 4517 }, { "epoch": 0.37, "grad_norm": 3.2659025156335186, "learning_rate": 7.307025201813394e-06, "loss": 0.6579, "step": 4518 }, { "epoch": 0.37, "grad_norm": 3.8030309495349726, "learning_rate": 7.30585815317475e-06, "loss": 0.6771, "step": 4519 }, { "epoch": 0.37, "grad_norm": 2.7472380661656786, "learning_rate": 7.304690944956202e-06, "loss": 0.767, "step": 4520 }, { "epoch": 0.37, "grad_norm": 2.323666872316359, "learning_rate": 7.3035235772385295e-06, "loss": 0.8885, "step": 4521 }, { "epoch": 0.37, "grad_norm": 4.359434683976116, "learning_rate": 7.302356050102522e-06, "loss": 0.6723, "step": 4522 }, { "epoch": 0.37, "grad_norm": 3.9416315942471467, "learning_rate": 7.301188363628977e-06, "loss": 0.6317, "step": 4523 }, { "epoch": 0.37, "grad_norm": 3.5357673660822733, "learning_rate": 7.30002051789871e-06, "loss": 0.647, "step": 4524 }, { "epoch": 0.37, "grad_norm": 3.3937182078312556, "learning_rate": 7.298852512992539e-06, "loss": 0.7492, "step": 4525 }, { "epoch": 0.37, "grad_norm": 5.540364366001674, "learning_rate": 7.2976843489913004e-06, "loss": 0.6415, "step": 4526 }, { "epoch": 0.37, "grad_norm": 5.3543556793176865, "learning_rate": 7.296516025975837e-06, "loss": 0.6691, "step": 4527 }, { "epoch": 0.37, "grad_norm": 8.503150703278033, "learning_rate": 7.295347544027006e-06, "loss": 0.7484, "step": 4528 }, { "epoch": 0.37, "grad_norm": 3.1146291847492726, "learning_rate": 7.2941789032256705e-06, "loss": 0.7278, "step": 4529 }, { "epoch": 0.37, "grad_norm": 3.0630323459162354, "learning_rate": 7.29301010365271e-06, "loss": 0.8748, "step": 4530 }, { "epoch": 0.37, "grad_norm": 3.5926084791304436, "learning_rate": 7.291841145389013e-06, "loss": 0.7058, "step": 4531 }, { "epoch": 0.37, "grad_norm": 3.6182340081078275, "learning_rate": 7.290672028515477e-06, "loss": 0.5328, "step": 4532 }, { "epoch": 0.37, "grad_norm": 2.7299228586058613, "learning_rate": 7.289502753113015e-06, "loss": 0.7408, "step": 4533 }, { "epoch": 0.37, "grad_norm": 3.8451954582573413, "learning_rate": 7.288333319262546e-06, "loss": 0.6903, "step": 4534 }, { "epoch": 0.37, "grad_norm": 2.53024285428926, "learning_rate": 7.287163727045002e-06, "loss": 0.6186, "step": 4535 }, { "epoch": 0.37, "grad_norm": 3.024554096339014, "learning_rate": 7.285993976541328e-06, "loss": 0.7634, "step": 4536 }, { "epoch": 0.37, "grad_norm": 2.8918017196047874, "learning_rate": 7.284824067832477e-06, "loss": 0.5123, "step": 4537 }, { "epoch": 0.37, "grad_norm": 3.6861996334807534, "learning_rate": 7.283654000999413e-06, "loss": 0.7377, "step": 4538 }, { "epoch": 0.37, "grad_norm": 3.511724204450621, "learning_rate": 7.282483776123113e-06, "loss": 0.6785, "step": 4539 }, { "epoch": 0.37, "grad_norm": 6.604730870714155, "learning_rate": 7.281313393284564e-06, "loss": 0.645, "step": 4540 }, { "epoch": 0.37, "grad_norm": 3.713225005858742, "learning_rate": 7.280142852564764e-06, "loss": 0.7845, "step": 4541 }, { "epoch": 0.37, "grad_norm": 3.9671421588655744, "learning_rate": 7.278972154044722e-06, "loss": 0.6083, "step": 4542 }, { "epoch": 0.37, "grad_norm": 2.798603295901293, "learning_rate": 7.277801297805458e-06, "loss": 0.5438, "step": 4543 }, { "epoch": 0.37, "grad_norm": 2.334182366897565, "learning_rate": 7.276630283928002e-06, "loss": 0.7704, "step": 4544 }, { "epoch": 0.37, "grad_norm": 6.733843349007048, "learning_rate": 7.275459112493395e-06, "loss": 0.8557, "step": 4545 }, { "epoch": 0.37, "grad_norm": 4.158914812034645, "learning_rate": 7.274287783582689e-06, "loss": 0.8004, "step": 4546 }, { "epoch": 0.37, "grad_norm": 4.695978509578617, "learning_rate": 7.2731162972769484e-06, "loss": 0.7563, "step": 4547 }, { "epoch": 0.37, "grad_norm": 2.331290508565121, "learning_rate": 7.271944653657248e-06, "loss": 0.7446, "step": 4548 }, { "epoch": 0.37, "grad_norm": 3.4394030356131027, "learning_rate": 7.270772852804672e-06, "loss": 0.6757, "step": 4549 }, { "epoch": 0.37, "grad_norm": 3.0920548798272485, "learning_rate": 7.2696008948003164e-06, "loss": 0.6967, "step": 4550 }, { "epoch": 0.37, "grad_norm": 3.540676870570052, "learning_rate": 7.26842877972529e-06, "loss": 0.608, "step": 4551 }, { "epoch": 0.37, "grad_norm": 2.557950061716488, "learning_rate": 7.2672565076607075e-06, "loss": 0.5225, "step": 4552 }, { "epoch": 0.37, "grad_norm": 3.7142264642875458, "learning_rate": 7.266084078687698e-06, "loss": 0.7657, "step": 4553 }, { "epoch": 0.37, "grad_norm": 11.09317960733513, "learning_rate": 7.264911492887403e-06, "loss": 0.7854, "step": 4554 }, { "epoch": 0.37, "grad_norm": 2.677222738106719, "learning_rate": 7.26373875034097e-06, "loss": 0.7034, "step": 4555 }, { "epoch": 0.37, "grad_norm": 3.038416509291698, "learning_rate": 7.2625658511295635e-06, "loss": 0.6344, "step": 4556 }, { "epoch": 0.37, "grad_norm": 3.526997371392452, "learning_rate": 7.261392795334354e-06, "loss": 0.6855, "step": 4557 }, { "epoch": 0.37, "grad_norm": 4.839224849112458, "learning_rate": 7.260219583036523e-06, "loss": 0.7645, "step": 4558 }, { "epoch": 0.37, "grad_norm": 6.486281317039733, "learning_rate": 7.259046214317266e-06, "loss": 0.7564, "step": 4559 }, { "epoch": 0.37, "grad_norm": 2.6792517499256263, "learning_rate": 7.257872689257787e-06, "loss": 0.6917, "step": 4560 }, { "epoch": 0.37, "grad_norm": 3.771886467674248, "learning_rate": 7.256699007939301e-06, "loss": 0.6858, "step": 4561 }, { "epoch": 0.37, "grad_norm": 3.344732150887603, "learning_rate": 7.255525170443034e-06, "loss": 0.5819, "step": 4562 }, { "epoch": 0.37, "grad_norm": 4.363691755572554, "learning_rate": 7.254351176850223e-06, "loss": 0.66, "step": 4563 }, { "epoch": 0.37, "grad_norm": 2.615891923513796, "learning_rate": 7.253177027242117e-06, "loss": 0.8462, "step": 4564 }, { "epoch": 0.37, "grad_norm": 3.1168405735950806, "learning_rate": 7.252002721699972e-06, "loss": 0.6546, "step": 4565 }, { "epoch": 0.37, "grad_norm": 3.6931431043567744, "learning_rate": 7.2508282603050595e-06, "loss": 0.6695, "step": 4566 }, { "epoch": 0.37, "grad_norm": 9.327837871670415, "learning_rate": 7.24965364313866e-06, "loss": 0.9064, "step": 4567 }, { "epoch": 0.37, "grad_norm": 2.481337631194375, "learning_rate": 7.248478870282063e-06, "loss": 0.7351, "step": 4568 }, { "epoch": 0.37, "grad_norm": 5.62741655654551, "learning_rate": 7.24730394181657e-06, "loss": 0.6023, "step": 4569 }, { "epoch": 0.37, "grad_norm": 3.3939429939186585, "learning_rate": 7.2461288578234955e-06, "loss": 0.679, "step": 4570 }, { "epoch": 0.37, "grad_norm": 3.573568861569186, "learning_rate": 7.2449536183841584e-06, "loss": 0.7867, "step": 4571 }, { "epoch": 0.37, "grad_norm": 2.609345271924847, "learning_rate": 7.2437782235798985e-06, "loss": 0.6368, "step": 4572 }, { "epoch": 0.37, "grad_norm": 2.6043982681065376, "learning_rate": 7.242602673492054e-06, "loss": 0.7712, "step": 4573 }, { "epoch": 0.37, "grad_norm": 6.21425089175264, "learning_rate": 7.241426968201988e-06, "loss": 0.6049, "step": 4574 }, { "epoch": 0.37, "grad_norm": 2.595482758036474, "learning_rate": 7.24025110779106e-06, "loss": 0.7128, "step": 4575 }, { "epoch": 0.37, "grad_norm": 3.9185572592968274, "learning_rate": 7.239075092340651e-06, "loss": 0.8079, "step": 4576 }, { "epoch": 0.37, "grad_norm": 4.906126656128381, "learning_rate": 7.2378989219321475e-06, "loss": 0.6028, "step": 4577 }, { "epoch": 0.37, "grad_norm": 8.80429910323126, "learning_rate": 7.236722596646946e-06, "loss": 0.7737, "step": 4578 }, { "epoch": 0.37, "grad_norm": 3.6438321064462946, "learning_rate": 7.235546116566456e-06, "loss": 0.6816, "step": 4579 }, { "epoch": 0.37, "grad_norm": 3.07118777038309, "learning_rate": 7.234369481772101e-06, "loss": 0.7659, "step": 4580 }, { "epoch": 0.37, "grad_norm": 2.7111779933442888, "learning_rate": 7.233192692345309e-06, "loss": 0.651, "step": 4581 }, { "epoch": 0.37, "grad_norm": 4.234014950863667, "learning_rate": 7.23201574836752e-06, "loss": 0.6503, "step": 4582 }, { "epoch": 0.37, "grad_norm": 4.817036907452633, "learning_rate": 7.230838649920189e-06, "loss": 0.8369, "step": 4583 }, { "epoch": 0.37, "grad_norm": 2.5171222675429257, "learning_rate": 7.229661397084775e-06, "loss": 0.6974, "step": 4584 }, { "epoch": 0.37, "grad_norm": 4.442788209780339, "learning_rate": 7.228483989942756e-06, "loss": 0.5769, "step": 4585 }, { "epoch": 0.37, "grad_norm": 6.026905561747521, "learning_rate": 7.227306428575611e-06, "loss": 0.8128, "step": 4586 }, { "epoch": 0.37, "grad_norm": 3.134403083387003, "learning_rate": 7.2261287130648374e-06, "loss": 0.8053, "step": 4587 }, { "epoch": 0.37, "grad_norm": 11.268197332653804, "learning_rate": 7.224950843491941e-06, "loss": 0.7556, "step": 4588 }, { "epoch": 0.37, "grad_norm": 4.587164494762261, "learning_rate": 7.223772819938434e-06, "loss": 0.7144, "step": 4589 }, { "epoch": 0.37, "grad_norm": 8.765766967551963, "learning_rate": 7.222594642485849e-06, "loss": 0.6391, "step": 4590 }, { "epoch": 0.37, "grad_norm": 3.6599792090456127, "learning_rate": 7.221416311215718e-06, "loss": 0.6075, "step": 4591 }, { "epoch": 0.37, "grad_norm": 2.978936563983368, "learning_rate": 7.220237826209592e-06, "loss": 0.6746, "step": 4592 }, { "epoch": 0.37, "grad_norm": 4.096708790035792, "learning_rate": 7.219059187549028e-06, "loss": 0.6987, "step": 4593 }, { "epoch": 0.37, "grad_norm": 4.214828583214547, "learning_rate": 7.217880395315596e-06, "loss": 0.6459, "step": 4594 }, { "epoch": 0.37, "grad_norm": 3.529298377320403, "learning_rate": 7.216701449590876e-06, "loss": 0.7842, "step": 4595 }, { "epoch": 0.37, "grad_norm": 8.43057087299158, "learning_rate": 7.215522350456457e-06, "loss": 0.7392, "step": 4596 }, { "epoch": 0.37, "grad_norm": 6.083927468428668, "learning_rate": 7.214343097993944e-06, "loss": 0.8105, "step": 4597 }, { "epoch": 0.37, "grad_norm": 2.9975262418947, "learning_rate": 7.213163692284943e-06, "loss": 0.7509, "step": 4598 }, { "epoch": 0.37, "grad_norm": 8.21083632358296, "learning_rate": 7.211984133411081e-06, "loss": 0.5472, "step": 4599 }, { "epoch": 0.37, "grad_norm": 5.114782825420015, "learning_rate": 7.21080442145399e-06, "loss": 0.7532, "step": 4600 }, { "epoch": 0.37, "grad_norm": 4.942817601309485, "learning_rate": 7.209624556495312e-06, "loss": 0.599, "step": 4601 }, { "epoch": 0.37, "grad_norm": 3.3857090339517315, "learning_rate": 7.2084445386167e-06, "loss": 0.7096, "step": 4602 }, { "epoch": 0.37, "grad_norm": 8.477506880956728, "learning_rate": 7.207264367899822e-06, "loss": 0.8509, "step": 4603 }, { "epoch": 0.37, "grad_norm": 3.5150950399943772, "learning_rate": 7.206084044426351e-06, "loss": 0.9706, "step": 4604 }, { "epoch": 0.37, "grad_norm": 3.378883747827981, "learning_rate": 7.204903568277975e-06, "loss": 0.6458, "step": 4605 }, { "epoch": 0.37, "grad_norm": 2.825083210936648, "learning_rate": 7.203722939536386e-06, "loss": 0.7206, "step": 4606 }, { "epoch": 0.37, "grad_norm": 3.029207355109933, "learning_rate": 7.202542158283297e-06, "loss": 0.8459, "step": 4607 }, { "epoch": 0.37, "grad_norm": 4.0785525092058394, "learning_rate": 7.20136122460042e-06, "loss": 0.6555, "step": 4608 }, { "epoch": 0.37, "grad_norm": 3.5358981019320828, "learning_rate": 7.2001801385694855e-06, "loss": 0.7707, "step": 4609 }, { "epoch": 0.37, "grad_norm": 2.8737566113518436, "learning_rate": 7.198998900272234e-06, "loss": 0.7383, "step": 4610 }, { "epoch": 0.37, "grad_norm": 4.506566893656891, "learning_rate": 7.19781750979041e-06, "loss": 0.7377, "step": 4611 }, { "epoch": 0.37, "grad_norm": 3.2251487017150313, "learning_rate": 7.196635967205776e-06, "loss": 0.8004, "step": 4612 }, { "epoch": 0.37, "grad_norm": 2.3122448031558185, "learning_rate": 7.195454272600104e-06, "loss": 0.5337, "step": 4613 }, { "epoch": 0.37, "grad_norm": 3.430526040477239, "learning_rate": 7.194272426055171e-06, "loss": 0.8458, "step": 4614 }, { "epoch": 0.37, "grad_norm": 3.1968625969383404, "learning_rate": 7.193090427652769e-06, "loss": 0.821, "step": 4615 }, { "epoch": 0.37, "grad_norm": 3.208762288933861, "learning_rate": 7.191908277474703e-06, "loss": 0.5317, "step": 4616 }, { "epoch": 0.37, "grad_norm": 9.598094212414262, "learning_rate": 7.190725975602781e-06, "loss": 0.6212, "step": 4617 }, { "epoch": 0.38, "grad_norm": 4.823046359083563, "learning_rate": 7.189543522118828e-06, "loss": 0.5381, "step": 4618 }, { "epoch": 0.38, "grad_norm": 4.981238625431476, "learning_rate": 7.188360917104676e-06, "loss": 0.6638, "step": 4619 }, { "epoch": 0.38, "grad_norm": 2.786435191993864, "learning_rate": 7.187178160642172e-06, "loss": 0.6756, "step": 4620 }, { "epoch": 0.38, "grad_norm": 4.503388179848868, "learning_rate": 7.185995252813165e-06, "loss": 0.7356, "step": 4621 }, { "epoch": 0.38, "grad_norm": 3.497944670610621, "learning_rate": 7.184812193699523e-06, "loss": 0.696, "step": 4622 }, { "epoch": 0.38, "grad_norm": 5.2878611448171675, "learning_rate": 7.183628983383122e-06, "loss": 0.7148, "step": 4623 }, { "epoch": 0.38, "grad_norm": 3.4466132957046325, "learning_rate": 7.182445621945844e-06, "loss": 0.7535, "step": 4624 }, { "epoch": 0.38, "grad_norm": 3.8373019503464616, "learning_rate": 7.181262109469588e-06, "loss": 0.8449, "step": 4625 }, { "epoch": 0.38, "grad_norm": 3.3730537725653087, "learning_rate": 7.180078446036259e-06, "loss": 0.675, "step": 4626 }, { "epoch": 0.38, "grad_norm": 3.4536043344573524, "learning_rate": 7.178894631727776e-06, "loss": 0.5769, "step": 4627 }, { "epoch": 0.38, "grad_norm": 3.4422784999166303, "learning_rate": 7.177710666626064e-06, "loss": 0.5517, "step": 4628 }, { "epoch": 0.38, "grad_norm": 4.342433814512476, "learning_rate": 7.1765265508130625e-06, "loss": 0.6372, "step": 4629 }, { "epoch": 0.38, "grad_norm": 2.813416200574221, "learning_rate": 7.175342284370719e-06, "loss": 0.6313, "step": 4630 }, { "epoch": 0.38, "grad_norm": 2.614986656758312, "learning_rate": 7.174157867380992e-06, "loss": 0.7583, "step": 4631 }, { "epoch": 0.38, "grad_norm": 3.536362081041492, "learning_rate": 7.1729732999258515e-06, "loss": 0.6173, "step": 4632 }, { "epoch": 0.38, "grad_norm": 2.481696936079603, "learning_rate": 7.1717885820872766e-06, "loss": 0.6383, "step": 4633 }, { "epoch": 0.38, "grad_norm": 2.7623252405885337, "learning_rate": 7.170603713947256e-06, "loss": 0.6752, "step": 4634 }, { "epoch": 0.38, "grad_norm": 6.635907439727863, "learning_rate": 7.169418695587791e-06, "loss": 0.662, "step": 4635 }, { "epoch": 0.38, "grad_norm": 2.826614426957756, "learning_rate": 7.168233527090893e-06, "loss": 0.5858, "step": 4636 }, { "epoch": 0.38, "grad_norm": 4.093919599114829, "learning_rate": 7.167048208538584e-06, "loss": 0.7368, "step": 4637 }, { "epoch": 0.38, "grad_norm": 3.2721269813731015, "learning_rate": 7.165862740012892e-06, "loss": 0.6526, "step": 4638 }, { "epoch": 0.38, "grad_norm": 2.841651248068161, "learning_rate": 7.164677121595862e-06, "loss": 0.6743, "step": 4639 }, { "epoch": 0.38, "grad_norm": 3.9093931891702773, "learning_rate": 7.163491353369545e-06, "loss": 0.7495, "step": 4640 }, { "epoch": 0.38, "grad_norm": 2.9809105998952536, "learning_rate": 7.1623054354160045e-06, "loss": 0.7539, "step": 4641 }, { "epoch": 0.38, "grad_norm": 5.272578972408287, "learning_rate": 7.161119367817313e-06, "loss": 0.6042, "step": 4642 }, { "epoch": 0.38, "grad_norm": 5.430665378882515, "learning_rate": 7.1599331506555535e-06, "loss": 0.5704, "step": 4643 }, { "epoch": 0.38, "grad_norm": 3.738495536196233, "learning_rate": 7.158746784012819e-06, "loss": 0.6285, "step": 4644 }, { "epoch": 0.38, "grad_norm": 7.62894779917687, "learning_rate": 7.157560267971214e-06, "loss": 0.7959, "step": 4645 }, { "epoch": 0.38, "grad_norm": 4.675281474246245, "learning_rate": 7.156373602612854e-06, "loss": 0.9053, "step": 4646 }, { "epoch": 0.38, "grad_norm": 4.775749864698209, "learning_rate": 7.155186788019864e-06, "loss": 0.4985, "step": 4647 }, { "epoch": 0.38, "grad_norm": 11.954451505133795, "learning_rate": 7.153999824274377e-06, "loss": 0.5708, "step": 4648 }, { "epoch": 0.38, "grad_norm": 4.29264108181392, "learning_rate": 7.152812711458541e-06, "loss": 0.5455, "step": 4649 }, { "epoch": 0.38, "grad_norm": 3.4668578855760632, "learning_rate": 7.151625449654509e-06, "loss": 0.6797, "step": 4650 }, { "epoch": 0.38, "grad_norm": 2.268808338807677, "learning_rate": 7.150438038944448e-06, "loss": 0.6572, "step": 4651 }, { "epoch": 0.38, "grad_norm": 3.7225077219565583, "learning_rate": 7.149250479410535e-06, "loss": 0.6027, "step": 4652 }, { "epoch": 0.38, "grad_norm": 3.0397708159013193, "learning_rate": 7.148062771134956e-06, "loss": 0.6133, "step": 4653 }, { "epoch": 0.38, "grad_norm": 7.205837471556408, "learning_rate": 7.146874914199906e-06, "loss": 0.7338, "step": 4654 }, { "epoch": 0.38, "grad_norm": 7.386212757823452, "learning_rate": 7.1456869086875955e-06, "loss": 0.6737, "step": 4655 }, { "epoch": 0.38, "grad_norm": 8.441754556305513, "learning_rate": 7.1444987546802415e-06, "loss": 0.5709, "step": 4656 }, { "epoch": 0.38, "grad_norm": 2.8555547065947873, "learning_rate": 7.1433104522600705e-06, "loss": 0.7045, "step": 4657 }, { "epoch": 0.38, "grad_norm": 4.873391244255208, "learning_rate": 7.1421220015093195e-06, "loss": 0.598, "step": 4658 }, { "epoch": 0.38, "grad_norm": 3.1599114860935478, "learning_rate": 7.1409334025102395e-06, "loss": 0.6517, "step": 4659 }, { "epoch": 0.38, "grad_norm": 2.9712183550675904, "learning_rate": 7.139744655345087e-06, "loss": 0.583, "step": 4660 }, { "epoch": 0.38, "grad_norm": 3.2435078418245027, "learning_rate": 7.138555760096131e-06, "loss": 0.7729, "step": 4661 }, { "epoch": 0.38, "grad_norm": 2.904077071328026, "learning_rate": 7.137366716845651e-06, "loss": 0.661, "step": 4662 }, { "epoch": 0.38, "grad_norm": 12.39071428593291, "learning_rate": 7.136177525675937e-06, "loss": 0.7207, "step": 4663 }, { "epoch": 0.38, "grad_norm": 8.321056109513979, "learning_rate": 7.134988186669287e-06, "loss": 0.7683, "step": 4664 }, { "epoch": 0.38, "grad_norm": 9.271932411096461, "learning_rate": 7.133798699908012e-06, "loss": 0.5346, "step": 4665 }, { "epoch": 0.38, "grad_norm": 3.7271538233796817, "learning_rate": 7.132609065474432e-06, "loss": 0.7361, "step": 4666 }, { "epoch": 0.38, "grad_norm": 5.8818397221116845, "learning_rate": 7.131419283450875e-06, "loss": 0.6833, "step": 4667 }, { "epoch": 0.38, "grad_norm": 3.122850247102864, "learning_rate": 7.130229353919685e-06, "loss": 0.8497, "step": 4668 }, { "epoch": 0.38, "grad_norm": 5.098573850938509, "learning_rate": 7.129039276963209e-06, "loss": 0.5876, "step": 4669 }, { "epoch": 0.38, "grad_norm": 5.450692122221304, "learning_rate": 7.12784905266381e-06, "loss": 0.7207, "step": 4670 }, { "epoch": 0.38, "grad_norm": 2.678522930421112, "learning_rate": 7.126658681103858e-06, "loss": 0.7842, "step": 4671 }, { "epoch": 0.38, "grad_norm": 4.278532112478232, "learning_rate": 7.125468162365736e-06, "loss": 0.6208, "step": 4672 }, { "epoch": 0.38, "grad_norm": 11.368575576046588, "learning_rate": 7.124277496531834e-06, "loss": 0.6432, "step": 4673 }, { "epoch": 0.38, "grad_norm": 5.359932771442346, "learning_rate": 7.123086683684554e-06, "loss": 0.5558, "step": 4674 }, { "epoch": 0.38, "grad_norm": 2.898097619127136, "learning_rate": 7.121895723906306e-06, "loss": 0.8041, "step": 4675 }, { "epoch": 0.38, "grad_norm": 3.47237108497763, "learning_rate": 7.1207046172795145e-06, "loss": 0.6061, "step": 4676 }, { "epoch": 0.38, "grad_norm": 4.320518907948175, "learning_rate": 7.1195133638866085e-06, "loss": 0.7361, "step": 4677 }, { "epoch": 0.38, "grad_norm": 3.670823585036834, "learning_rate": 7.118321963810033e-06, "loss": 0.7705, "step": 4678 }, { "epoch": 0.38, "grad_norm": 2.6007285772395137, "learning_rate": 7.117130417132241e-06, "loss": 0.6766, "step": 4679 }, { "epoch": 0.38, "grad_norm": 2.640208804132585, "learning_rate": 7.115938723935693e-06, "loss": 0.7198, "step": 4680 }, { "epoch": 0.38, "grad_norm": 2.7550458332469625, "learning_rate": 7.114746884302862e-06, "loss": 0.6745, "step": 4681 }, { "epoch": 0.38, "grad_norm": 2.8387949986935954, "learning_rate": 7.113554898316231e-06, "loss": 0.6555, "step": 4682 }, { "epoch": 0.38, "grad_norm": 3.6471663756970596, "learning_rate": 7.1123627660582925e-06, "loss": 0.654, "step": 4683 }, { "epoch": 0.38, "grad_norm": 2.5069485078757863, "learning_rate": 7.111170487611551e-06, "loss": 0.7319, "step": 4684 }, { "epoch": 0.38, "grad_norm": 3.01650507537378, "learning_rate": 7.109978063058518e-06, "loss": 0.7052, "step": 4685 }, { "epoch": 0.38, "grad_norm": 11.417044643219272, "learning_rate": 7.108785492481718e-06, "loss": 0.5815, "step": 4686 }, { "epoch": 0.38, "grad_norm": 6.12751161220626, "learning_rate": 7.107592775963683e-06, "loss": 0.5818, "step": 4687 }, { "epoch": 0.38, "grad_norm": 3.906952307866992, "learning_rate": 7.106399913586958e-06, "loss": 0.7939, "step": 4688 }, { "epoch": 0.38, "grad_norm": 4.634279269787878, "learning_rate": 7.105206905434097e-06, "loss": 0.8369, "step": 4689 }, { "epoch": 0.38, "grad_norm": 2.456524174934351, "learning_rate": 7.104013751587662e-06, "loss": 0.7736, "step": 4690 }, { "epoch": 0.38, "grad_norm": 3.2869926364334243, "learning_rate": 7.1028204521302255e-06, "loss": 0.6946, "step": 4691 }, { "epoch": 0.38, "grad_norm": 2.8141452804517364, "learning_rate": 7.101627007144375e-06, "loss": 0.7007, "step": 4692 }, { "epoch": 0.38, "grad_norm": 3.672130284730923, "learning_rate": 7.100433416712703e-06, "loss": 0.6419, "step": 4693 }, { "epoch": 0.38, "grad_norm": 25.416486132647815, "learning_rate": 7.099239680917813e-06, "loss": 0.7434, "step": 4694 }, { "epoch": 0.38, "grad_norm": 5.574362086676644, "learning_rate": 7.098045799842318e-06, "loss": 0.7044, "step": 4695 }, { "epoch": 0.38, "grad_norm": 4.736531071837449, "learning_rate": 7.0968517735688445e-06, "loss": 0.6905, "step": 4696 }, { "epoch": 0.38, "grad_norm": 3.4878564618133954, "learning_rate": 7.095657602180025e-06, "loss": 0.7594, "step": 4697 }, { "epoch": 0.38, "grad_norm": 5.171514770025322, "learning_rate": 7.094463285758505e-06, "loss": 0.6537, "step": 4698 }, { "epoch": 0.38, "grad_norm": 3.317578098449159, "learning_rate": 7.093268824386936e-06, "loss": 0.7155, "step": 4699 }, { "epoch": 0.38, "grad_norm": 2.4636745818904853, "learning_rate": 7.0920742181479865e-06, "loss": 0.7538, "step": 4700 }, { "epoch": 0.38, "grad_norm": 3.547806076694837, "learning_rate": 7.090879467124325e-06, "loss": 0.5509, "step": 4701 }, { "epoch": 0.38, "grad_norm": 3.2563259121455865, "learning_rate": 7.089684571398641e-06, "loss": 0.5816, "step": 4702 }, { "epoch": 0.38, "grad_norm": 3.2255120247281672, "learning_rate": 7.0884895310536276e-06, "loss": 0.783, "step": 4703 }, { "epoch": 0.38, "grad_norm": 5.776721035622656, "learning_rate": 7.087294346171987e-06, "loss": 0.6842, "step": 4704 }, { "epoch": 0.38, "grad_norm": 5.048516985531098, "learning_rate": 7.086099016836436e-06, "loss": 0.6774, "step": 4705 }, { "epoch": 0.38, "grad_norm": 4.063302664025569, "learning_rate": 7.084903543129699e-06, "loss": 0.7017, "step": 4706 }, { "epoch": 0.38, "grad_norm": 6.383645684848338, "learning_rate": 7.083707925134507e-06, "loss": 0.8186, "step": 4707 }, { "epoch": 0.38, "grad_norm": 4.125286107161097, "learning_rate": 7.082512162933606e-06, "loss": 0.6044, "step": 4708 }, { "epoch": 0.38, "grad_norm": 3.78441046497153, "learning_rate": 7.081316256609752e-06, "loss": 0.7631, "step": 4709 }, { "epoch": 0.38, "grad_norm": 3.5065912131038317, "learning_rate": 7.080120206245709e-06, "loss": 0.7189, "step": 4710 }, { "epoch": 0.38, "grad_norm": 2.701618202613849, "learning_rate": 7.078924011924248e-06, "loss": 0.6737, "step": 4711 }, { "epoch": 0.38, "grad_norm": 2.292711295292416, "learning_rate": 7.077727673728156e-06, "loss": 0.6707, "step": 4712 }, { "epoch": 0.38, "grad_norm": 3.534182666841607, "learning_rate": 7.076531191740228e-06, "loss": 0.7193, "step": 4713 }, { "epoch": 0.38, "grad_norm": 3.1046399084071057, "learning_rate": 7.075334566043266e-06, "loss": 0.7456, "step": 4714 }, { "epoch": 0.38, "grad_norm": 7.771713752398221, "learning_rate": 7.074137796720083e-06, "loss": 0.6358, "step": 4715 }, { "epoch": 0.38, "grad_norm": 3.222820471856443, "learning_rate": 7.0729408838535075e-06, "loss": 0.705, "step": 4716 }, { "epoch": 0.38, "grad_norm": 4.336394645030191, "learning_rate": 7.071743827526367e-06, "loss": 0.8549, "step": 4717 }, { "epoch": 0.38, "grad_norm": 3.627849959579447, "learning_rate": 7.07054662782151e-06, "loss": 0.8724, "step": 4718 }, { "epoch": 0.38, "grad_norm": 3.8270295955216893, "learning_rate": 7.06934928482179e-06, "loss": 0.8338, "step": 4719 }, { "epoch": 0.38, "grad_norm": 3.1567612307157593, "learning_rate": 7.06815179861007e-06, "loss": 0.7705, "step": 4720 }, { "epoch": 0.38, "grad_norm": 3.6584244116093805, "learning_rate": 7.066954169269225e-06, "loss": 0.6964, "step": 4721 }, { "epoch": 0.38, "grad_norm": 3.6982942016067466, "learning_rate": 7.065756396882134e-06, "loss": 0.7037, "step": 4722 }, { "epoch": 0.38, "grad_norm": 2.664422466104552, "learning_rate": 7.064558481531695e-06, "loss": 0.5922, "step": 4723 }, { "epoch": 0.38, "grad_norm": 2.735010415736837, "learning_rate": 7.063360423300808e-06, "loss": 0.669, "step": 4724 }, { "epoch": 0.38, "grad_norm": 8.837962127996956, "learning_rate": 7.0621622222723875e-06, "loss": 0.7479, "step": 4725 }, { "epoch": 0.38, "grad_norm": 3.8384119996737094, "learning_rate": 7.060963878529359e-06, "loss": 0.6741, "step": 4726 }, { "epoch": 0.38, "grad_norm": 3.3298489327790572, "learning_rate": 7.059765392154651e-06, "loss": 0.6785, "step": 4727 }, { "epoch": 0.38, "grad_norm": 7.207616723323181, "learning_rate": 7.058566763231209e-06, "loss": 0.7337, "step": 4728 }, { "epoch": 0.38, "grad_norm": 3.003171316423661, "learning_rate": 7.0573679918419855e-06, "loss": 0.7798, "step": 4729 }, { "epoch": 0.38, "grad_norm": 8.500366819448388, "learning_rate": 7.056169078069943e-06, "loss": 0.6808, "step": 4730 }, { "epoch": 0.38, "grad_norm": 4.359801025012354, "learning_rate": 7.054970021998054e-06, "loss": 0.563, "step": 4731 }, { "epoch": 0.38, "grad_norm": 3.830337464691882, "learning_rate": 7.0537708237092985e-06, "loss": 0.7713, "step": 4732 }, { "epoch": 0.38, "grad_norm": 12.434751353758072, "learning_rate": 7.052571483286672e-06, "loss": 0.8095, "step": 4733 }, { "epoch": 0.38, "grad_norm": 4.176176076309689, "learning_rate": 7.0513720008131745e-06, "loss": 0.6965, "step": 4734 }, { "epoch": 0.38, "grad_norm": 3.1911186745988207, "learning_rate": 7.050172376371817e-06, "loss": 0.957, "step": 4735 }, { "epoch": 0.38, "grad_norm": 11.419599361262877, "learning_rate": 7.048972610045624e-06, "loss": 0.879, "step": 4736 }, { "epoch": 0.38, "grad_norm": 5.785404441556161, "learning_rate": 7.0477727019176235e-06, "loss": 0.8006, "step": 4737 }, { "epoch": 0.38, "grad_norm": 16.700054897768478, "learning_rate": 7.04657265207086e-06, "loss": 0.7379, "step": 4738 }, { "epoch": 0.38, "grad_norm": 2.31001164582755, "learning_rate": 7.045372460588381e-06, "loss": 0.7344, "step": 4739 }, { "epoch": 0.38, "grad_norm": 3.090967187510388, "learning_rate": 7.044172127553249e-06, "loss": 0.6778, "step": 4740 }, { "epoch": 0.39, "grad_norm": 3.796858230944286, "learning_rate": 7.042971653048535e-06, "loss": 0.7779, "step": 4741 }, { "epoch": 0.39, "grad_norm": 3.0251323675608686, "learning_rate": 7.0417710371573185e-06, "loss": 0.783, "step": 4742 }, { "epoch": 0.39, "grad_norm": 3.139367508370477, "learning_rate": 7.0405702799626905e-06, "loss": 0.5638, "step": 4743 }, { "epoch": 0.39, "grad_norm": 4.2397039366983975, "learning_rate": 7.0393693815477505e-06, "loss": 0.5706, "step": 4744 }, { "epoch": 0.39, "grad_norm": 3.123173958836047, "learning_rate": 7.038168341995609e-06, "loss": 0.9192, "step": 4745 }, { "epoch": 0.39, "grad_norm": 20.89906946577019, "learning_rate": 7.036967161389386e-06, "loss": 0.6656, "step": 4746 }, { "epoch": 0.39, "grad_norm": 2.729162646857455, "learning_rate": 7.035765839812208e-06, "loss": 0.6235, "step": 4747 }, { "epoch": 0.39, "grad_norm": 3.722917781428734, "learning_rate": 7.034564377347215e-06, "loss": 0.6638, "step": 4748 }, { "epoch": 0.39, "grad_norm": 3.6165913437908417, "learning_rate": 7.033362774077557e-06, "loss": 0.658, "step": 4749 }, { "epoch": 0.39, "grad_norm": 2.97570762293132, "learning_rate": 7.032161030086392e-06, "loss": 0.8415, "step": 4750 }, { "epoch": 0.39, "grad_norm": 2.3525135934384527, "learning_rate": 7.030959145456888e-06, "loss": 0.7325, "step": 4751 }, { "epoch": 0.39, "grad_norm": 6.5214968914635625, "learning_rate": 7.029757120272222e-06, "loss": 0.6022, "step": 4752 }, { "epoch": 0.39, "grad_norm": 3.081810895384827, "learning_rate": 7.028554954615585e-06, "loss": 0.6031, "step": 4753 }, { "epoch": 0.39, "grad_norm": 4.37171912183395, "learning_rate": 7.027352648570173e-06, "loss": 0.7175, "step": 4754 }, { "epoch": 0.39, "grad_norm": 2.7692945793723376, "learning_rate": 7.026150202219191e-06, "loss": 0.6536, "step": 4755 }, { "epoch": 0.39, "grad_norm": 2.962069871900144, "learning_rate": 7.0249476156458574e-06, "loss": 0.7136, "step": 4756 }, { "epoch": 0.39, "grad_norm": 3.533535732389279, "learning_rate": 7.0237448889333985e-06, "loss": 0.623, "step": 4757 }, { "epoch": 0.39, "grad_norm": 2.733575254379372, "learning_rate": 7.022542022165051e-06, "loss": 0.5744, "step": 4758 }, { "epoch": 0.39, "grad_norm": 3.443727789975465, "learning_rate": 7.02133901542406e-06, "loss": 0.7227, "step": 4759 }, { "epoch": 0.39, "grad_norm": 2.8484237733229407, "learning_rate": 7.020135868793683e-06, "loss": 0.7205, "step": 4760 }, { "epoch": 0.39, "grad_norm": 8.262231973112538, "learning_rate": 7.018932582357182e-06, "loss": 0.7336, "step": 4761 }, { "epoch": 0.39, "grad_norm": 2.5305398197549156, "learning_rate": 7.017729156197836e-06, "loss": 0.7715, "step": 4762 }, { "epoch": 0.39, "grad_norm": 4.528294715825457, "learning_rate": 7.0165255903989275e-06, "loss": 0.6227, "step": 4763 }, { "epoch": 0.39, "grad_norm": 3.962359345790988, "learning_rate": 7.01532188504375e-06, "loss": 0.6327, "step": 4764 }, { "epoch": 0.39, "grad_norm": 2.9968972709091957, "learning_rate": 7.0141180402156085e-06, "loss": 0.5927, "step": 4765 }, { "epoch": 0.39, "grad_norm": 3.039593348032703, "learning_rate": 7.0129140559978184e-06, "loss": 0.7199, "step": 4766 }, { "epoch": 0.39, "grad_norm": 15.449739397911483, "learning_rate": 7.011709932473699e-06, "loss": 0.5359, "step": 4767 }, { "epoch": 0.39, "grad_norm": 6.9289384120405915, "learning_rate": 7.010505669726586e-06, "loss": 0.6328, "step": 4768 }, { "epoch": 0.39, "grad_norm": 4.679805263166353, "learning_rate": 7.0093012678398234e-06, "loss": 0.757, "step": 4769 }, { "epoch": 0.39, "grad_norm": 3.590905785872564, "learning_rate": 7.008096726896761e-06, "loss": 0.7391, "step": 4770 }, { "epoch": 0.39, "grad_norm": 3.5995779970118895, "learning_rate": 7.00689204698076e-06, "loss": 0.9249, "step": 4771 }, { "epoch": 0.39, "grad_norm": 2.5076534012899625, "learning_rate": 7.005687228175192e-06, "loss": 0.7813, "step": 4772 }, { "epoch": 0.39, "grad_norm": 6.610103732360768, "learning_rate": 7.004482270563441e-06, "loss": 0.6416, "step": 4773 }, { "epoch": 0.39, "grad_norm": 3.5101774440748144, "learning_rate": 7.0032771742288945e-06, "loss": 0.7388, "step": 4774 }, { "epoch": 0.39, "grad_norm": 3.8045407712194037, "learning_rate": 7.002071939254953e-06, "loss": 0.644, "step": 4775 }, { "epoch": 0.39, "grad_norm": 4.611748653957224, "learning_rate": 7.00086656572503e-06, "loss": 0.705, "step": 4776 }, { "epoch": 0.39, "grad_norm": 3.292408249368622, "learning_rate": 6.99966105372254e-06, "loss": 0.8843, "step": 4777 }, { "epoch": 0.39, "grad_norm": 6.196076740072117, "learning_rate": 6.998455403330915e-06, "loss": 0.7964, "step": 4778 }, { "epoch": 0.39, "grad_norm": 3.854180307343359, "learning_rate": 6.997249614633592e-06, "loss": 0.7312, "step": 4779 }, { "epoch": 0.39, "grad_norm": 2.7662169052403867, "learning_rate": 6.99604368771402e-06, "loss": 0.758, "step": 4780 }, { "epoch": 0.39, "grad_norm": 12.82854700120382, "learning_rate": 6.994837622655657e-06, "loss": 0.8026, "step": 4781 }, { "epoch": 0.39, "grad_norm": 3.9924703274932507, "learning_rate": 6.993631419541971e-06, "loss": 0.7989, "step": 4782 }, { "epoch": 0.39, "grad_norm": 3.394679096204586, "learning_rate": 6.992425078456436e-06, "loss": 0.576, "step": 4783 }, { "epoch": 0.39, "grad_norm": 3.2052315446281074, "learning_rate": 6.991218599482541e-06, "loss": 0.7248, "step": 4784 }, { "epoch": 0.39, "grad_norm": 5.829783402074966, "learning_rate": 6.9900119827037815e-06, "loss": 0.7393, "step": 4785 }, { "epoch": 0.39, "grad_norm": 4.505100124682155, "learning_rate": 6.988805228203662e-06, "loss": 0.6921, "step": 4786 }, { "epoch": 0.39, "grad_norm": 5.99520124462559, "learning_rate": 6.9875983360657e-06, "loss": 0.7645, "step": 4787 }, { "epoch": 0.39, "grad_norm": 3.128365653463775, "learning_rate": 6.9863913063734155e-06, "loss": 0.6814, "step": 4788 }, { "epoch": 0.39, "grad_norm": 4.219836684072162, "learning_rate": 6.985184139210347e-06, "loss": 0.6192, "step": 4789 }, { "epoch": 0.39, "grad_norm": 5.060870354550515, "learning_rate": 6.983976834660036e-06, "loss": 0.8057, "step": 4790 }, { "epoch": 0.39, "grad_norm": 4.853054135697003, "learning_rate": 6.982769392806035e-06, "loss": 0.6226, "step": 4791 }, { "epoch": 0.39, "grad_norm": 5.590901122362912, "learning_rate": 6.981561813731909e-06, "loss": 0.7665, "step": 4792 }, { "epoch": 0.39, "grad_norm": 3.872390955556565, "learning_rate": 6.980354097521227e-06, "loss": 0.6474, "step": 4793 }, { "epoch": 0.39, "grad_norm": 5.05894404233163, "learning_rate": 6.979146244257573e-06, "loss": 0.8038, "step": 4794 }, { "epoch": 0.39, "grad_norm": 3.2985392079290476, "learning_rate": 6.977938254024537e-06, "loss": 0.6575, "step": 4795 }, { "epoch": 0.39, "grad_norm": 3.200526478924465, "learning_rate": 6.9767301269057195e-06, "loss": 0.64, "step": 4796 }, { "epoch": 0.39, "grad_norm": 3.034106899897965, "learning_rate": 6.975521862984731e-06, "loss": 0.7676, "step": 4797 }, { "epoch": 0.39, "grad_norm": 7.9877283184173535, "learning_rate": 6.97431346234519e-06, "loss": 0.7624, "step": 4798 }, { "epoch": 0.39, "grad_norm": 3.4660120769015514, "learning_rate": 6.9731049250707274e-06, "loss": 0.7593, "step": 4799 }, { "epoch": 0.39, "grad_norm": 3.344396575082435, "learning_rate": 6.971896251244978e-06, "loss": 0.7253, "step": 4800 }, { "epoch": 0.39, "grad_norm": 4.0084637416976685, "learning_rate": 6.9706874409515934e-06, "loss": 0.7032, "step": 4801 }, { "epoch": 0.39, "grad_norm": 3.772271827388688, "learning_rate": 6.969478494274231e-06, "loss": 0.7033, "step": 4802 }, { "epoch": 0.39, "grad_norm": 3.2229279627551426, "learning_rate": 6.968269411296555e-06, "loss": 0.7639, "step": 4803 }, { "epoch": 0.39, "grad_norm": 3.0697043020637014, "learning_rate": 6.9670601921022405e-06, "loss": 0.9466, "step": 4804 }, { "epoch": 0.39, "grad_norm": 4.711419929329179, "learning_rate": 6.965850836774976e-06, "loss": 0.6672, "step": 4805 }, { "epoch": 0.39, "grad_norm": 3.2544511999017, "learning_rate": 6.9646413453984576e-06, "loss": 0.7028, "step": 4806 }, { "epoch": 0.39, "grad_norm": 5.231057528667463, "learning_rate": 6.963431718056386e-06, "loss": 0.6186, "step": 4807 }, { "epoch": 0.39, "grad_norm": 5.072030013314798, "learning_rate": 6.962221954832476e-06, "loss": 0.7459, "step": 4808 }, { "epoch": 0.39, "grad_norm": 3.6083940864712267, "learning_rate": 6.961012055810452e-06, "loss": 0.7894, "step": 4809 }, { "epoch": 0.39, "grad_norm": 3.043149196602401, "learning_rate": 6.959802021074048e-06, "loss": 0.6362, "step": 4810 }, { "epoch": 0.39, "grad_norm": 3.5806014620304136, "learning_rate": 6.958591850707003e-06, "loss": 0.8644, "step": 4811 }, { "epoch": 0.39, "grad_norm": 4.401141092956759, "learning_rate": 6.957381544793069e-06, "loss": 0.7423, "step": 4812 }, { "epoch": 0.39, "grad_norm": 5.044522120092507, "learning_rate": 6.956171103416007e-06, "loss": 0.759, "step": 4813 }, { "epoch": 0.39, "grad_norm": 3.8813838781887844, "learning_rate": 6.9549605266595884e-06, "loss": 0.8383, "step": 4814 }, { "epoch": 0.39, "grad_norm": 3.193952462769136, "learning_rate": 6.9537498146075925e-06, "loss": 0.6518, "step": 4815 }, { "epoch": 0.39, "grad_norm": 6.295054639016323, "learning_rate": 6.952538967343807e-06, "loss": 0.5823, "step": 4816 }, { "epoch": 0.39, "grad_norm": 3.353986345212676, "learning_rate": 6.95132798495203e-06, "loss": 0.563, "step": 4817 }, { "epoch": 0.39, "grad_norm": 3.2383150637038103, "learning_rate": 6.950116867516071e-06, "loss": 0.5848, "step": 4818 }, { "epoch": 0.39, "grad_norm": 3.529747284134479, "learning_rate": 6.948905615119746e-06, "loss": 0.872, "step": 4819 }, { "epoch": 0.39, "grad_norm": 2.8181419144918824, "learning_rate": 6.94769422784688e-06, "loss": 0.7193, "step": 4820 }, { "epoch": 0.39, "grad_norm": 2.8819947125477032, "learning_rate": 6.94648270578131e-06, "loss": 0.706, "step": 4821 }, { "epoch": 0.39, "grad_norm": 3.3857924594068196, "learning_rate": 6.945271049006882e-06, "loss": 0.5259, "step": 4822 }, { "epoch": 0.39, "grad_norm": 3.290557550668647, "learning_rate": 6.944059257607447e-06, "loss": 0.697, "step": 4823 }, { "epoch": 0.39, "grad_norm": 3.18650239441529, "learning_rate": 6.942847331666872e-06, "loss": 0.6802, "step": 4824 }, { "epoch": 0.39, "grad_norm": 4.452219946444507, "learning_rate": 6.941635271269027e-06, "loss": 0.6743, "step": 4825 }, { "epoch": 0.39, "grad_norm": 8.170256908873805, "learning_rate": 6.940423076497798e-06, "loss": 0.543, "step": 4826 }, { "epoch": 0.39, "grad_norm": 4.094811431356428, "learning_rate": 6.939210747437073e-06, "loss": 0.8904, "step": 4827 }, { "epoch": 0.39, "grad_norm": 3.4304699025698406, "learning_rate": 6.937998284170754e-06, "loss": 0.8, "step": 4828 }, { "epoch": 0.39, "grad_norm": 2.9774677272601506, "learning_rate": 6.936785686782751e-06, "loss": 0.6621, "step": 4829 }, { "epoch": 0.39, "grad_norm": 2.488762887188442, "learning_rate": 6.9355729553569824e-06, "loss": 0.7363, "step": 4830 }, { "epoch": 0.39, "grad_norm": 4.054060702281797, "learning_rate": 6.934360089977379e-06, "loss": 0.7053, "step": 4831 }, { "epoch": 0.39, "grad_norm": 5.08085247803542, "learning_rate": 6.933147090727878e-06, "loss": 0.6227, "step": 4832 }, { "epoch": 0.39, "grad_norm": 2.164629488068441, "learning_rate": 6.931933957692425e-06, "loss": 0.6465, "step": 4833 }, { "epoch": 0.39, "grad_norm": 3.7527869687773174, "learning_rate": 6.9307206909549795e-06, "loss": 0.7853, "step": 4834 }, { "epoch": 0.39, "grad_norm": 2.7310070914460285, "learning_rate": 6.929507290599506e-06, "loss": 0.6748, "step": 4835 }, { "epoch": 0.39, "grad_norm": 3.0460914094795593, "learning_rate": 6.928293756709976e-06, "loss": 0.5652, "step": 4836 }, { "epoch": 0.39, "grad_norm": 3.277227833770824, "learning_rate": 6.927080089370377e-06, "loss": 0.6367, "step": 4837 }, { "epoch": 0.39, "grad_norm": 6.312090911288772, "learning_rate": 6.925866288664702e-06, "loss": 0.8895, "step": 4838 }, { "epoch": 0.39, "grad_norm": 2.762364319267452, "learning_rate": 6.924652354676955e-06, "loss": 0.6174, "step": 4839 }, { "epoch": 0.39, "grad_norm": 4.901084220276386, "learning_rate": 6.923438287491145e-06, "loss": 0.7807, "step": 4840 }, { "epoch": 0.39, "grad_norm": 7.349605173306933, "learning_rate": 6.922224087191295e-06, "loss": 0.7794, "step": 4841 }, { "epoch": 0.39, "grad_norm": 4.504229995389656, "learning_rate": 6.9210097538614355e-06, "loss": 0.6636, "step": 4842 }, { "epoch": 0.39, "grad_norm": 3.1233364879584284, "learning_rate": 6.9197952875856044e-06, "loss": 0.7023, "step": 4843 }, { "epoch": 0.39, "grad_norm": 2.234534529419142, "learning_rate": 6.918580688447851e-06, "loss": 0.7493, "step": 4844 }, { "epoch": 0.39, "grad_norm": 2.9700592965797354, "learning_rate": 6.917365956532236e-06, "loss": 0.8958, "step": 4845 }, { "epoch": 0.39, "grad_norm": 15.646954668236777, "learning_rate": 6.916151091922822e-06, "loss": 0.7039, "step": 4846 }, { "epoch": 0.39, "grad_norm": 6.009356960421061, "learning_rate": 6.914936094703687e-06, "loss": 0.6982, "step": 4847 }, { "epoch": 0.39, "grad_norm": 2.504380044199826, "learning_rate": 6.9137209649589165e-06, "loss": 0.6325, "step": 4848 }, { "epoch": 0.39, "grad_norm": 3.3492555628831826, "learning_rate": 6.912505702772608e-06, "loss": 0.7983, "step": 4849 }, { "epoch": 0.39, "grad_norm": 1.8157188367439279, "learning_rate": 6.911290308228861e-06, "loss": 0.6933, "step": 4850 }, { "epoch": 0.39, "grad_norm": 3.7603590075297753, "learning_rate": 6.910074781411791e-06, "loss": 0.7869, "step": 4851 }, { "epoch": 0.39, "grad_norm": 3.2566879865106375, "learning_rate": 6.908859122405519e-06, "loss": 0.6108, "step": 4852 }, { "epoch": 0.39, "grad_norm": 2.290384485690406, "learning_rate": 6.907643331294176e-06, "loss": 0.6921, "step": 4853 }, { "epoch": 0.39, "grad_norm": 4.5076507899564415, "learning_rate": 6.906427408161902e-06, "loss": 0.6926, "step": 4854 }, { "epoch": 0.39, "grad_norm": 2.8981679304789374, "learning_rate": 6.90521135309285e-06, "loss": 0.6142, "step": 4855 }, { "epoch": 0.39, "grad_norm": 3.0077441297538567, "learning_rate": 6.903995166171174e-06, "loss": 0.5751, "step": 4856 }, { "epoch": 0.39, "grad_norm": 2.7602107885057667, "learning_rate": 6.9027788474810455e-06, "loss": 0.8843, "step": 4857 }, { "epoch": 0.39, "grad_norm": 4.440473376558985, "learning_rate": 6.901562397106639e-06, "loss": 0.8154, "step": 4858 }, { "epoch": 0.39, "grad_norm": 2.3164539492566516, "learning_rate": 6.900345815132142e-06, "loss": 0.8152, "step": 4859 }, { "epoch": 0.39, "grad_norm": 5.9698913678772145, "learning_rate": 6.899129101641749e-06, "loss": 0.6033, "step": 4860 }, { "epoch": 0.39, "grad_norm": 3.677718448085302, "learning_rate": 6.897912256719663e-06, "loss": 0.6525, "step": 4861 }, { "epoch": 0.39, "grad_norm": 3.1018884482136397, "learning_rate": 6.896695280450101e-06, "loss": 0.6381, "step": 4862 }, { "epoch": 0.39, "grad_norm": 6.04664164346696, "learning_rate": 6.89547817291728e-06, "loss": 0.7792, "step": 4863 }, { "epoch": 0.4, "grad_norm": 2.9141833975241522, "learning_rate": 6.894260934205437e-06, "loss": 0.6559, "step": 4864 }, { "epoch": 0.4, "grad_norm": 3.6985594169482856, "learning_rate": 6.893043564398809e-06, "loss": 0.7285, "step": 4865 }, { "epoch": 0.4, "grad_norm": 3.1611910933365333, "learning_rate": 6.891826063581646e-06, "loss": 0.7749, "step": 4866 }, { "epoch": 0.4, "grad_norm": 2.6069696519997665, "learning_rate": 6.89060843183821e-06, "loss": 0.6807, "step": 4867 }, { "epoch": 0.4, "grad_norm": 4.580422093056034, "learning_rate": 6.8893906692527635e-06, "loss": 0.6287, "step": 4868 }, { "epoch": 0.4, "grad_norm": 2.444635152017811, "learning_rate": 6.888172775909588e-06, "loss": 0.8139, "step": 4869 }, { "epoch": 0.4, "grad_norm": 4.904345140899487, "learning_rate": 6.886954751892966e-06, "loss": 0.5901, "step": 4870 }, { "epoch": 0.4, "grad_norm": 2.6470422407509746, "learning_rate": 6.885736597287195e-06, "loss": 0.7187, "step": 4871 }, { "epoch": 0.4, "grad_norm": 8.215865622617345, "learning_rate": 6.884518312176578e-06, "loss": 0.7838, "step": 4872 }, { "epoch": 0.4, "grad_norm": 4.751349945627463, "learning_rate": 6.883299896645427e-06, "loss": 0.5323, "step": 4873 }, { "epoch": 0.4, "grad_norm": 2.423893416061932, "learning_rate": 6.882081350778065e-06, "loss": 0.8255, "step": 4874 }, { "epoch": 0.4, "grad_norm": 2.967999148784259, "learning_rate": 6.8808626746588235e-06, "loss": 0.7699, "step": 4875 }, { "epoch": 0.4, "grad_norm": 10.662438312947389, "learning_rate": 6.879643868372043e-06, "loss": 0.631, "step": 4876 }, { "epoch": 0.4, "grad_norm": 3.2539442085637185, "learning_rate": 6.878424932002069e-06, "loss": 0.7352, "step": 4877 }, { "epoch": 0.4, "grad_norm": 5.32244299076864, "learning_rate": 6.8772058656332626e-06, "loss": 0.6774, "step": 4878 }, { "epoch": 0.4, "grad_norm": 7.162138210066419, "learning_rate": 6.875986669349993e-06, "loss": 0.6791, "step": 4879 }, { "epoch": 0.4, "grad_norm": 3.128353701835335, "learning_rate": 6.874767343236631e-06, "loss": 0.7136, "step": 4880 }, { "epoch": 0.4, "grad_norm": 2.819837127300953, "learning_rate": 6.873547887377565e-06, "loss": 0.7726, "step": 4881 }, { "epoch": 0.4, "grad_norm": 2.287134181595837, "learning_rate": 6.872328301857189e-06, "loss": 0.7061, "step": 4882 }, { "epoch": 0.4, "grad_norm": 3.019489563935498, "learning_rate": 6.871108586759907e-06, "loss": 0.6867, "step": 4883 }, { "epoch": 0.4, "grad_norm": 4.243860201693428, "learning_rate": 6.869888742170127e-06, "loss": 0.8063, "step": 4884 }, { "epoch": 0.4, "grad_norm": 2.6384940511022377, "learning_rate": 6.868668768172273e-06, "loss": 0.7353, "step": 4885 }, { "epoch": 0.4, "grad_norm": 2.631504097013453, "learning_rate": 6.8674486648507735e-06, "loss": 0.6798, "step": 4886 }, { "epoch": 0.4, "grad_norm": 4.973520759093243, "learning_rate": 6.8662284322900675e-06, "loss": 0.6342, "step": 4887 }, { "epoch": 0.4, "grad_norm": 5.262189673632032, "learning_rate": 6.865008070574604e-06, "loss": 0.6115, "step": 4888 }, { "epoch": 0.4, "grad_norm": 2.486058711455715, "learning_rate": 6.8637875797888394e-06, "loss": 0.6982, "step": 4889 }, { "epoch": 0.4, "grad_norm": 5.976942703364385, "learning_rate": 6.8625669600172386e-06, "loss": 0.5798, "step": 4890 }, { "epoch": 0.4, "grad_norm": 4.181174871753097, "learning_rate": 6.861346211344277e-06, "loss": 0.6583, "step": 4891 }, { "epoch": 0.4, "grad_norm": 2.751577049115162, "learning_rate": 6.860125333854437e-06, "loss": 0.6289, "step": 4892 }, { "epoch": 0.4, "grad_norm": 3.4840019897365724, "learning_rate": 6.858904327632212e-06, "loss": 0.7523, "step": 4893 }, { "epoch": 0.4, "grad_norm": 8.756750573155582, "learning_rate": 6.857683192762101e-06, "loss": 0.6771, "step": 4894 }, { "epoch": 0.4, "grad_norm": 6.7511098527241735, "learning_rate": 6.85646192932862e-06, "loss": 0.6944, "step": 4895 }, { "epoch": 0.4, "grad_norm": 5.708761425865642, "learning_rate": 6.85524053741628e-06, "loss": 0.7667, "step": 4896 }, { "epoch": 0.4, "grad_norm": 3.4578905548610033, "learning_rate": 6.854019017109614e-06, "loss": 0.7, "step": 4897 }, { "epoch": 0.4, "grad_norm": 2.768490692522605, "learning_rate": 6.85279736849316e-06, "loss": 0.6715, "step": 4898 }, { "epoch": 0.4, "grad_norm": 3.8707193731066476, "learning_rate": 6.851575591651461e-06, "loss": 0.6208, "step": 4899 }, { "epoch": 0.4, "grad_norm": 2.814606485889003, "learning_rate": 6.8503536866690735e-06, "loss": 0.856, "step": 4900 }, { "epoch": 0.4, "grad_norm": 3.861416581199034, "learning_rate": 6.849131653630558e-06, "loss": 0.7036, "step": 4901 }, { "epoch": 0.4, "grad_norm": 2.6794671355215796, "learning_rate": 6.8479094926204925e-06, "loss": 0.7893, "step": 4902 }, { "epoch": 0.4, "grad_norm": 3.46235404964707, "learning_rate": 6.846687203723452e-06, "loss": 0.587, "step": 4903 }, { "epoch": 0.4, "grad_norm": 2.8876925087219494, "learning_rate": 6.845464787024029e-06, "loss": 0.6824, "step": 4904 }, { "epoch": 0.4, "grad_norm": 2.9597244953680635, "learning_rate": 6.844242242606825e-06, "loss": 0.7209, "step": 4905 }, { "epoch": 0.4, "grad_norm": 15.791574232592932, "learning_rate": 6.843019570556443e-06, "loss": 0.5625, "step": 4906 }, { "epoch": 0.4, "grad_norm": 2.7578395422668005, "learning_rate": 6.841796770957503e-06, "loss": 0.6725, "step": 4907 }, { "epoch": 0.4, "grad_norm": 3.0826355219913117, "learning_rate": 6.840573843894631e-06, "loss": 0.7371, "step": 4908 }, { "epoch": 0.4, "grad_norm": 3.7095000877485846, "learning_rate": 6.839350789452458e-06, "loss": 0.7468, "step": 4909 }, { "epoch": 0.4, "grad_norm": 4.628751378647502, "learning_rate": 6.838127607715629e-06, "loss": 0.7616, "step": 4910 }, { "epoch": 0.4, "grad_norm": 3.851553840753211, "learning_rate": 6.836904298768795e-06, "loss": 0.7338, "step": 4911 }, { "epoch": 0.4, "grad_norm": 2.271296816808612, "learning_rate": 6.835680862696618e-06, "loss": 0.7156, "step": 4912 }, { "epoch": 0.4, "grad_norm": 2.55003452073316, "learning_rate": 6.834457299583768e-06, "loss": 0.8402, "step": 4913 }, { "epoch": 0.4, "grad_norm": 3.846070474038525, "learning_rate": 6.833233609514921e-06, "loss": 0.8685, "step": 4914 }, { "epoch": 0.4, "grad_norm": 3.6300982880081123, "learning_rate": 6.832009792574766e-06, "loss": 0.6318, "step": 4915 }, { "epoch": 0.4, "grad_norm": 4.251487750984797, "learning_rate": 6.830785848848e-06, "loss": 0.7366, "step": 4916 }, { "epoch": 0.4, "grad_norm": 3.3090286237907924, "learning_rate": 6.829561778419323e-06, "loss": 0.6907, "step": 4917 }, { "epoch": 0.4, "grad_norm": 3.5608003879066765, "learning_rate": 6.828337581373452e-06, "loss": 0.8053, "step": 4918 }, { "epoch": 0.4, "grad_norm": 2.869821317041007, "learning_rate": 6.827113257795107e-06, "loss": 0.7041, "step": 4919 }, { "epoch": 0.4, "grad_norm": 5.1506786167368155, "learning_rate": 6.82588880776902e-06, "loss": 0.7103, "step": 4920 }, { "epoch": 0.4, "grad_norm": 2.903738089308982, "learning_rate": 6.824664231379932e-06, "loss": 0.6428, "step": 4921 }, { "epoch": 0.4, "grad_norm": 5.180286174370614, "learning_rate": 6.82343952871259e-06, "loss": 0.5192, "step": 4922 }, { "epoch": 0.4, "grad_norm": 3.3407512211338766, "learning_rate": 6.8222146998517515e-06, "loss": 0.6543, "step": 4923 }, { "epoch": 0.4, "grad_norm": 3.016329848326716, "learning_rate": 6.820989744882182e-06, "loss": 0.6059, "step": 4924 }, { "epoch": 0.4, "grad_norm": 3.7798140504016247, "learning_rate": 6.819764663888656e-06, "loss": 0.7088, "step": 4925 }, { "epoch": 0.4, "grad_norm": 9.32782874952827, "learning_rate": 6.818539456955957e-06, "loss": 0.6638, "step": 4926 }, { "epoch": 0.4, "grad_norm": 3.2294859608543174, "learning_rate": 6.817314124168877e-06, "loss": 0.6311, "step": 4927 }, { "epoch": 0.4, "grad_norm": 2.2677417453980624, "learning_rate": 6.816088665612217e-06, "loss": 0.7605, "step": 4928 }, { "epoch": 0.4, "grad_norm": 4.007936509204852, "learning_rate": 6.814863081370786e-06, "loss": 0.6108, "step": 4929 }, { "epoch": 0.4, "grad_norm": 2.5660843710383747, "learning_rate": 6.813637371529403e-06, "loss": 0.866, "step": 4930 }, { "epoch": 0.4, "grad_norm": 2.9705517049068058, "learning_rate": 6.8124115361728935e-06, "loss": 0.6793, "step": 4931 }, { "epoch": 0.4, "grad_norm": 2.523319423250326, "learning_rate": 6.811185575386095e-06, "loss": 0.6886, "step": 4932 }, { "epoch": 0.4, "grad_norm": 25.918255448354714, "learning_rate": 6.80995948925385e-06, "loss": 0.6762, "step": 4933 }, { "epoch": 0.4, "grad_norm": 3.278457881643568, "learning_rate": 6.8087332778610116e-06, "loss": 0.5528, "step": 4934 }, { "epoch": 0.4, "grad_norm": 3.672443977365079, "learning_rate": 6.8075069412924425e-06, "loss": 0.8241, "step": 4935 }, { "epoch": 0.4, "grad_norm": 2.6843366699481845, "learning_rate": 6.806280479633011e-06, "loss": 0.8567, "step": 4936 }, { "epoch": 0.4, "grad_norm": 4.213207165335354, "learning_rate": 6.8050538929675965e-06, "loss": 0.7736, "step": 4937 }, { "epoch": 0.4, "grad_norm": 3.7486391167375754, "learning_rate": 6.803827181381089e-06, "loss": 0.752, "step": 4938 }, { "epoch": 0.4, "grad_norm": 3.1190565653439846, "learning_rate": 6.802600344958381e-06, "loss": 0.7614, "step": 4939 }, { "epoch": 0.4, "grad_norm": 3.793317648003399, "learning_rate": 6.80137338378438e-06, "loss": 0.6383, "step": 4940 }, { "epoch": 0.4, "grad_norm": 6.246085295528323, "learning_rate": 6.800146297943998e-06, "loss": 0.778, "step": 4941 }, { "epoch": 0.4, "grad_norm": 4.299199697024928, "learning_rate": 6.798919087522157e-06, "loss": 0.703, "step": 4942 }, { "epoch": 0.4, "grad_norm": 2.628244345392673, "learning_rate": 6.79769175260379e-06, "loss": 0.7291, "step": 4943 }, { "epoch": 0.4, "grad_norm": 5.244779606912698, "learning_rate": 6.796464293273832e-06, "loss": 0.6751, "step": 4944 }, { "epoch": 0.4, "grad_norm": 2.647494937081442, "learning_rate": 6.795236709617237e-06, "loss": 0.7623, "step": 4945 }, { "epoch": 0.4, "grad_norm": 3.0507548862403695, "learning_rate": 6.794009001718954e-06, "loss": 0.6035, "step": 4946 }, { "epoch": 0.4, "grad_norm": 4.29394347585778, "learning_rate": 6.7927811696639554e-06, "loss": 0.7374, "step": 4947 }, { "epoch": 0.4, "grad_norm": 3.7936480222799127, "learning_rate": 6.791553213537209e-06, "loss": 0.5189, "step": 4948 }, { "epoch": 0.4, "grad_norm": 3.324451737302168, "learning_rate": 6.790325133423701e-06, "loss": 0.6558, "step": 4949 }, { "epoch": 0.4, "grad_norm": 3.3448679908728822, "learning_rate": 6.789096929408421e-06, "loss": 0.6626, "step": 4950 }, { "epoch": 0.4, "grad_norm": 3.7624592233009424, "learning_rate": 6.787868601576368e-06, "loss": 0.8336, "step": 4951 }, { "epoch": 0.4, "grad_norm": 5.0030750967991295, "learning_rate": 6.78664015001255e-06, "loss": 0.6029, "step": 4952 }, { "epoch": 0.4, "grad_norm": 3.5888509335441756, "learning_rate": 6.7854115748019845e-06, "loss": 0.6706, "step": 4953 }, { "epoch": 0.4, "grad_norm": 3.063577444692808, "learning_rate": 6.784182876029696e-06, "loss": 0.6527, "step": 4954 }, { "epoch": 0.4, "grad_norm": 2.724760082821225, "learning_rate": 6.782954053780719e-06, "loss": 0.7408, "step": 4955 }, { "epoch": 0.4, "grad_norm": 2.8398349947036854, "learning_rate": 6.781725108140095e-06, "loss": 0.6336, "step": 4956 }, { "epoch": 0.4, "grad_norm": 3.492050262409166, "learning_rate": 6.780496039192874e-06, "loss": 0.7221, "step": 4957 }, { "epoch": 0.4, "grad_norm": 2.479745341654804, "learning_rate": 6.779266847024118e-06, "loss": 0.5949, "step": 4958 }, { "epoch": 0.4, "grad_norm": 2.175749955463759, "learning_rate": 6.7780375317188904e-06, "loss": 0.7195, "step": 4959 }, { "epoch": 0.4, "grad_norm": 4.097047557585542, "learning_rate": 6.776808093362271e-06, "loss": 0.7895, "step": 4960 }, { "epoch": 0.4, "grad_norm": 2.7493222483737947, "learning_rate": 6.775578532039344e-06, "loss": 0.7537, "step": 4961 }, { "epoch": 0.4, "grad_norm": 2.451279738986141, "learning_rate": 6.774348847835203e-06, "loss": 0.6038, "step": 4962 }, { "epoch": 0.4, "grad_norm": 2.528594258655667, "learning_rate": 6.7731190408349475e-06, "loss": 0.7321, "step": 4963 }, { "epoch": 0.4, "grad_norm": 3.7806794514672712, "learning_rate": 6.7718891111236925e-06, "loss": 0.7319, "step": 4964 }, { "epoch": 0.4, "grad_norm": 5.734898757581246, "learning_rate": 6.770659058786555e-06, "loss": 0.6775, "step": 4965 }, { "epoch": 0.4, "grad_norm": 5.6660040045273545, "learning_rate": 6.7694288839086595e-06, "loss": 0.675, "step": 4966 }, { "epoch": 0.4, "grad_norm": 3.170865202091634, "learning_rate": 6.7681985865751434e-06, "loss": 0.7601, "step": 4967 }, { "epoch": 0.4, "grad_norm": 3.0833020650219836, "learning_rate": 6.766968166871154e-06, "loss": 0.5309, "step": 4968 }, { "epoch": 0.4, "grad_norm": 4.8071087652262054, "learning_rate": 6.76573762488184e-06, "loss": 0.7415, "step": 4969 }, { "epoch": 0.4, "grad_norm": 3.2614415620616857, "learning_rate": 6.764506960692364e-06, "loss": 0.8299, "step": 4970 }, { "epoch": 0.4, "grad_norm": 4.6331570792770504, "learning_rate": 6.763276174387898e-06, "loss": 0.8818, "step": 4971 }, { "epoch": 0.4, "grad_norm": 6.796748452132098, "learning_rate": 6.7620452660536175e-06, "loss": 0.8108, "step": 4972 }, { "epoch": 0.4, "grad_norm": 4.539266007033854, "learning_rate": 6.760814235774709e-06, "loss": 0.6718, "step": 4973 }, { "epoch": 0.4, "grad_norm": 3.116393385357577, "learning_rate": 6.7595830836363684e-06, "loss": 0.6739, "step": 4974 }, { "epoch": 0.4, "grad_norm": 4.152408908299569, "learning_rate": 6.7583518097238e-06, "loss": 0.8215, "step": 4975 }, { "epoch": 0.4, "grad_norm": 5.143771337933333, "learning_rate": 6.757120414122214e-06, "loss": 0.6795, "step": 4976 }, { "epoch": 0.4, "grad_norm": 4.058050852254165, "learning_rate": 6.755888896916831e-06, "loss": 0.685, "step": 4977 }, { "epoch": 0.4, "grad_norm": 25.687008959770978, "learning_rate": 6.754657258192883e-06, "loss": 0.618, "step": 4978 }, { "epoch": 0.4, "grad_norm": 3.4679077080681178, "learning_rate": 6.753425498035602e-06, "loss": 0.6927, "step": 4979 }, { "epoch": 0.4, "grad_norm": 5.717393355193208, "learning_rate": 6.7521936165302384e-06, "loss": 0.7709, "step": 4980 }, { "epoch": 0.4, "grad_norm": 3.52521391370229, "learning_rate": 6.750961613762042e-06, "loss": 0.7718, "step": 4981 }, { "epoch": 0.4, "grad_norm": 4.152022307203397, "learning_rate": 6.749729489816277e-06, "loss": 0.7635, "step": 4982 }, { "epoch": 0.4, "grad_norm": 2.4207049119488993, "learning_rate": 6.748497244778214e-06, "loss": 0.7649, "step": 4983 }, { "epoch": 0.4, "grad_norm": 3.0774242493309747, "learning_rate": 6.747264878733133e-06, "loss": 0.7109, "step": 4984 }, { "epoch": 0.4, "grad_norm": 5.701531419707866, "learning_rate": 6.746032391766321e-06, "loss": 0.7855, "step": 4985 }, { "epoch": 0.4, "grad_norm": 22.67299778658607, "learning_rate": 6.744799783963072e-06, "loss": 0.745, "step": 4986 }, { "epoch": 0.41, "grad_norm": 2.4966346755969138, "learning_rate": 6.743567055408693e-06, "loss": 0.5558, "step": 4987 }, { "epoch": 0.41, "grad_norm": 2.971100668042735, "learning_rate": 6.742334206188494e-06, "loss": 0.7532, "step": 4988 }, { "epoch": 0.41, "grad_norm": 3.1520189641407472, "learning_rate": 6.741101236387799e-06, "loss": 0.7895, "step": 4989 }, { "epoch": 0.41, "grad_norm": 2.844819841318991, "learning_rate": 6.739868146091934e-06, "loss": 0.6002, "step": 4990 }, { "epoch": 0.41, "grad_norm": 2.8579954476862306, "learning_rate": 6.7386349353862415e-06, "loss": 0.7722, "step": 4991 }, { "epoch": 0.41, "grad_norm": 5.254711892310361, "learning_rate": 6.73740160435606e-06, "loss": 0.6614, "step": 4992 }, { "epoch": 0.41, "grad_norm": 2.4600862141161786, "learning_rate": 6.73616815308675e-06, "loss": 0.7076, "step": 4993 }, { "epoch": 0.41, "grad_norm": 3.1514416380860055, "learning_rate": 6.73493458166367e-06, "loss": 0.7211, "step": 4994 }, { "epoch": 0.41, "grad_norm": 4.271489919509307, "learning_rate": 6.733700890172196e-06, "loss": 0.8261, "step": 4995 }, { "epoch": 0.41, "grad_norm": 2.7358240516543146, "learning_rate": 6.732467078697703e-06, "loss": 0.6951, "step": 4996 }, { "epoch": 0.41, "grad_norm": 3.25147475326891, "learning_rate": 6.731233147325578e-06, "loss": 0.722, "step": 4997 }, { "epoch": 0.41, "grad_norm": 5.7606544742576995, "learning_rate": 6.729999096141221e-06, "loss": 0.724, "step": 4998 }, { "epoch": 0.41, "grad_norm": 9.355223991585957, "learning_rate": 6.728764925230032e-06, "loss": 0.6977, "step": 4999 }, { "epoch": 0.41, "grad_norm": 5.5165558059484345, "learning_rate": 6.727530634677425e-06, "loss": 0.8671, "step": 5000 }, { "epoch": 0.41, "grad_norm": 3.8632299351827935, "learning_rate": 6.726296224568821e-06, "loss": 0.7115, "step": 5001 }, { "epoch": 0.41, "grad_norm": 2.4995308036675477, "learning_rate": 6.725061694989647e-06, "loss": 0.7201, "step": 5002 }, { "epoch": 0.41, "grad_norm": 2.4757929721952308, "learning_rate": 6.723827046025344e-06, "loss": 0.745, "step": 5003 }, { "epoch": 0.41, "grad_norm": 4.214930358802065, "learning_rate": 6.722592277761355e-06, "loss": 0.827, "step": 5004 }, { "epoch": 0.41, "grad_norm": 3.4380746462034697, "learning_rate": 6.721357390283134e-06, "loss": 0.5538, "step": 5005 }, { "epoch": 0.41, "grad_norm": 3.674369744138658, "learning_rate": 6.720122383676142e-06, "loss": 0.6961, "step": 5006 }, { "epoch": 0.41, "grad_norm": 2.928556239665184, "learning_rate": 6.718887258025851e-06, "loss": 0.5996, "step": 5007 }, { "epoch": 0.41, "grad_norm": 3.3021005883001693, "learning_rate": 6.717652013417739e-06, "loss": 0.6816, "step": 5008 }, { "epoch": 0.41, "grad_norm": 2.961289019740647, "learning_rate": 6.716416649937291e-06, "loss": 0.6832, "step": 5009 }, { "epoch": 0.41, "grad_norm": 4.155525662245276, "learning_rate": 6.715181167670005e-06, "loss": 0.6027, "step": 5010 }, { "epoch": 0.41, "grad_norm": 2.7306408775871365, "learning_rate": 6.713945566701383e-06, "loss": 0.6836, "step": 5011 }, { "epoch": 0.41, "grad_norm": 3.0934729932999487, "learning_rate": 6.712709847116934e-06, "loss": 0.8548, "step": 5012 }, { "epoch": 0.41, "grad_norm": 4.2552055161435485, "learning_rate": 6.711474009002181e-06, "loss": 0.5983, "step": 5013 }, { "epoch": 0.41, "grad_norm": 3.4059636103358737, "learning_rate": 6.71023805244265e-06, "loss": 0.78, "step": 5014 }, { "epoch": 0.41, "grad_norm": 2.791794911046696, "learning_rate": 6.709001977523877e-06, "loss": 0.6411, "step": 5015 }, { "epoch": 0.41, "grad_norm": 3.1380299358727246, "learning_rate": 6.707765784331406e-06, "loss": 0.7839, "step": 5016 }, { "epoch": 0.41, "grad_norm": 3.8383987597544214, "learning_rate": 6.706529472950789e-06, "loss": 0.7235, "step": 5017 }, { "epoch": 0.41, "grad_norm": 2.8690584080396015, "learning_rate": 6.705293043467589e-06, "loss": 0.8103, "step": 5018 }, { "epoch": 0.41, "grad_norm": 2.7013125646738065, "learning_rate": 6.704056495967372e-06, "loss": 0.7716, "step": 5019 }, { "epoch": 0.41, "grad_norm": 2.8572493551274656, "learning_rate": 6.702819830535716e-06, "loss": 0.7041, "step": 5020 }, { "epoch": 0.41, "grad_norm": 2.468425537393209, "learning_rate": 6.7015830472582065e-06, "loss": 0.6019, "step": 5021 }, { "epoch": 0.41, "grad_norm": 2.81278489809031, "learning_rate": 6.700346146220436e-06, "loss": 0.7471, "step": 5022 }, { "epoch": 0.41, "grad_norm": 2.328934847965021, "learning_rate": 6.699109127508004e-06, "loss": 0.7705, "step": 5023 }, { "epoch": 0.41, "grad_norm": 6.970253484989624, "learning_rate": 6.697871991206524e-06, "loss": 0.6292, "step": 5024 }, { "epoch": 0.41, "grad_norm": 3.2510358557789996, "learning_rate": 6.69663473740161e-06, "loss": 0.8023, "step": 5025 }, { "epoch": 0.41, "grad_norm": 2.735028893226962, "learning_rate": 6.695397366178891e-06, "loss": 0.7816, "step": 5026 }, { "epoch": 0.41, "grad_norm": 2.712153740279217, "learning_rate": 6.694159877623998e-06, "loss": 0.7923, "step": 5027 }, { "epoch": 0.41, "grad_norm": 4.280357329229467, "learning_rate": 6.692922271822575e-06, "loss": 0.6613, "step": 5028 }, { "epoch": 0.41, "grad_norm": 22.32109284885269, "learning_rate": 6.691684548860271e-06, "loss": 0.6882, "step": 5029 }, { "epoch": 0.41, "grad_norm": 2.6198163844461506, "learning_rate": 6.690446708822744e-06, "loss": 0.7059, "step": 5030 }, { "epoch": 0.41, "grad_norm": 2.9798684719887887, "learning_rate": 6.689208751795662e-06, "loss": 0.6601, "step": 5031 }, { "epoch": 0.41, "grad_norm": 2.6350320286969144, "learning_rate": 6.687970677864696e-06, "loss": 0.8318, "step": 5032 }, { "epoch": 0.41, "grad_norm": 3.3680880399874855, "learning_rate": 6.6867324871155316e-06, "loss": 0.5916, "step": 5033 }, { "epoch": 0.41, "grad_norm": 2.7772717113195995, "learning_rate": 6.68549417963386e-06, "loss": 0.7119, "step": 5034 }, { "epoch": 0.41, "grad_norm": 2.1239408176620738, "learning_rate": 6.6842557555053765e-06, "loss": 0.6337, "step": 5035 }, { "epoch": 0.41, "grad_norm": 2.9474245923833977, "learning_rate": 6.683017214815791e-06, "loss": 0.5968, "step": 5036 }, { "epoch": 0.41, "grad_norm": 3.6923366673392612, "learning_rate": 6.681778557650816e-06, "loss": 0.7317, "step": 5037 }, { "epoch": 0.41, "grad_norm": 2.8559656397007744, "learning_rate": 6.680539784096177e-06, "loss": 0.7574, "step": 5038 }, { "epoch": 0.41, "grad_norm": 2.3631775929096053, "learning_rate": 6.679300894237603e-06, "loss": 0.6943, "step": 5039 }, { "epoch": 0.41, "grad_norm": 7.389040743639903, "learning_rate": 6.6780618881608315e-06, "loss": 0.7443, "step": 5040 }, { "epoch": 0.41, "grad_norm": 4.293024439847495, "learning_rate": 6.676822765951614e-06, "loss": 0.7487, "step": 5041 }, { "epoch": 0.41, "grad_norm": 2.609097741737723, "learning_rate": 6.675583527695701e-06, "loss": 0.7211, "step": 5042 }, { "epoch": 0.41, "grad_norm": 3.041360089405942, "learning_rate": 6.674344173478858e-06, "loss": 0.7158, "step": 5043 }, { "epoch": 0.41, "grad_norm": 2.6003188369592616, "learning_rate": 6.673104703386856e-06, "loss": 0.5661, "step": 5044 }, { "epoch": 0.41, "grad_norm": 3.468428455022618, "learning_rate": 6.671865117505476e-06, "loss": 0.8295, "step": 5045 }, { "epoch": 0.41, "grad_norm": 3.129459100316946, "learning_rate": 6.6706254159205e-06, "loss": 0.6633, "step": 5046 }, { "epoch": 0.41, "grad_norm": 3.960369845937572, "learning_rate": 6.6693855987177254e-06, "loss": 0.6505, "step": 5047 }, { "epoch": 0.41, "grad_norm": 3.2408355837193, "learning_rate": 6.668145665982959e-06, "loss": 0.6992, "step": 5048 }, { "epoch": 0.41, "grad_norm": 4.284071896381552, "learning_rate": 6.666905617802006e-06, "loss": 0.7053, "step": 5049 }, { "epoch": 0.41, "grad_norm": 3.8541736350340186, "learning_rate": 6.66566545426069e-06, "loss": 0.8008, "step": 5050 }, { "epoch": 0.41, "grad_norm": 2.9589248725208694, "learning_rate": 6.664425175444838e-06, "loss": 0.8337, "step": 5051 }, { "epoch": 0.41, "grad_norm": 2.6886965292385363, "learning_rate": 6.6631847814402815e-06, "loss": 0.6631, "step": 5052 }, { "epoch": 0.41, "grad_norm": 4.007024126168313, "learning_rate": 6.661944272332867e-06, "loss": 0.6692, "step": 5053 }, { "epoch": 0.41, "grad_norm": 8.308365232233202, "learning_rate": 6.660703648208446e-06, "loss": 0.8157, "step": 5054 }, { "epoch": 0.41, "grad_norm": 2.976952899370484, "learning_rate": 6.659462909152873e-06, "loss": 0.6068, "step": 5055 }, { "epoch": 0.41, "grad_norm": 5.1632577261639225, "learning_rate": 6.658222055252019e-06, "loss": 0.6583, "step": 5056 }, { "epoch": 0.41, "grad_norm": 2.650919258539903, "learning_rate": 6.656981086591756e-06, "loss": 0.5791, "step": 5057 }, { "epoch": 0.41, "grad_norm": 2.924828151933549, "learning_rate": 6.655740003257971e-06, "loss": 0.8503, "step": 5058 }, { "epoch": 0.41, "grad_norm": 3.2753795009843825, "learning_rate": 6.654498805336551e-06, "loss": 0.778, "step": 5059 }, { "epoch": 0.41, "grad_norm": 3.255770231726, "learning_rate": 6.653257492913398e-06, "loss": 0.7918, "step": 5060 }, { "epoch": 0.41, "grad_norm": 2.349428819895754, "learning_rate": 6.652016066074416e-06, "loss": 0.6037, "step": 5061 }, { "epoch": 0.41, "grad_norm": 3.1597893282554437, "learning_rate": 6.650774524905519e-06, "loss": 0.8108, "step": 5062 }, { "epoch": 0.41, "grad_norm": 2.714615314035783, "learning_rate": 6.649532869492631e-06, "loss": 0.7253, "step": 5063 }, { "epoch": 0.41, "grad_norm": 4.89023893976048, "learning_rate": 6.648291099921683e-06, "loss": 0.6877, "step": 5064 }, { "epoch": 0.41, "grad_norm": 3.9672246276933087, "learning_rate": 6.647049216278612e-06, "loss": 0.6675, "step": 5065 }, { "epoch": 0.41, "grad_norm": 5.519301514153897, "learning_rate": 6.645807218649364e-06, "loss": 0.7745, "step": 5066 }, { "epoch": 0.41, "grad_norm": 5.394529094650727, "learning_rate": 6.644565107119895e-06, "loss": 0.7197, "step": 5067 }, { "epoch": 0.41, "grad_norm": 2.599852605743063, "learning_rate": 6.643322881776164e-06, "loss": 0.7601, "step": 5068 }, { "epoch": 0.41, "grad_norm": 7.959115742962483, "learning_rate": 6.642080542704144e-06, "loss": 0.6973, "step": 5069 }, { "epoch": 0.41, "grad_norm": 2.555274778284788, "learning_rate": 6.640838089989809e-06, "loss": 0.8815, "step": 5070 }, { "epoch": 0.41, "grad_norm": 2.9974597963405656, "learning_rate": 6.639595523719148e-06, "loss": 0.7689, "step": 5071 }, { "epoch": 0.41, "grad_norm": 3.9229524075739044, "learning_rate": 6.638352843978153e-06, "loss": 0.7671, "step": 5072 }, { "epoch": 0.41, "grad_norm": 3.21939783823383, "learning_rate": 6.637110050852824e-06, "loss": 0.7184, "step": 5073 }, { "epoch": 0.41, "grad_norm": 4.700668108280518, "learning_rate": 6.6358671444291735e-06, "loss": 0.7317, "step": 5074 }, { "epoch": 0.41, "grad_norm": 4.378392081738789, "learning_rate": 6.634624124793214e-06, "loss": 0.6345, "step": 5075 }, { "epoch": 0.41, "grad_norm": 3.0609003754767388, "learning_rate": 6.633380992030973e-06, "loss": 0.7689, "step": 5076 }, { "epoch": 0.41, "grad_norm": 4.1340436387013435, "learning_rate": 6.6321377462284845e-06, "loss": 0.793, "step": 5077 }, { "epoch": 0.41, "grad_norm": 3.506604164541014, "learning_rate": 6.630894387471787e-06, "loss": 0.755, "step": 5078 }, { "epoch": 0.41, "grad_norm": 3.4382437530620154, "learning_rate": 6.629650915846928e-06, "loss": 0.586, "step": 5079 }, { "epoch": 0.41, "grad_norm": 4.202886214794389, "learning_rate": 6.628407331439964e-06, "loss": 0.5438, "step": 5080 }, { "epoch": 0.41, "grad_norm": 3.3132512225434994, "learning_rate": 6.6271636343369606e-06, "loss": 0.5141, "step": 5081 }, { "epoch": 0.41, "grad_norm": 3.857983815209744, "learning_rate": 6.6259198246239874e-06, "loss": 0.6856, "step": 5082 }, { "epoch": 0.41, "grad_norm": 5.2046949297431935, "learning_rate": 6.624675902387124e-06, "loss": 0.7413, "step": 5083 }, { "epoch": 0.41, "grad_norm": 6.361141741674706, "learning_rate": 6.62343186771246e-06, "loss": 0.4927, "step": 5084 }, { "epoch": 0.41, "grad_norm": 3.703819365479077, "learning_rate": 6.6221877206860885e-06, "loss": 0.7266, "step": 5085 }, { "epoch": 0.41, "grad_norm": 4.058652973006847, "learning_rate": 6.620943461394111e-06, "loss": 0.844, "step": 5086 }, { "epoch": 0.41, "grad_norm": 3.086634882234965, "learning_rate": 6.619699089922642e-06, "loss": 0.5647, "step": 5087 }, { "epoch": 0.41, "grad_norm": 2.942574635794407, "learning_rate": 6.618454606357796e-06, "loss": 0.5957, "step": 5088 }, { "epoch": 0.41, "grad_norm": 2.7301558501515597, "learning_rate": 6.617210010785701e-06, "loss": 0.6801, "step": 5089 }, { "epoch": 0.41, "grad_norm": 2.7397019860003073, "learning_rate": 6.61596530329249e-06, "loss": 0.6839, "step": 5090 }, { "epoch": 0.41, "grad_norm": 2.5547147480633634, "learning_rate": 6.614720483964305e-06, "loss": 0.7955, "step": 5091 }, { "epoch": 0.41, "grad_norm": 3.507368736191685, "learning_rate": 6.613475552887296e-06, "loss": 0.5266, "step": 5092 }, { "epoch": 0.41, "grad_norm": 3.131103502388818, "learning_rate": 6.61223051014762e-06, "loss": 0.7304, "step": 5093 }, { "epoch": 0.41, "grad_norm": 2.0540027299447865, "learning_rate": 6.610985355831441e-06, "loss": 0.6823, "step": 5094 }, { "epoch": 0.41, "grad_norm": 2.6262186858862817, "learning_rate": 6.609740090024931e-06, "loss": 0.7449, "step": 5095 }, { "epoch": 0.41, "grad_norm": 3.2951921262156696, "learning_rate": 6.60849471281427e-06, "loss": 0.8184, "step": 5096 }, { "epoch": 0.41, "grad_norm": 2.5005182984391636, "learning_rate": 6.60724922428565e-06, "loss": 0.6602, "step": 5097 }, { "epoch": 0.41, "grad_norm": 2.442955440812688, "learning_rate": 6.606003624525262e-06, "loss": 0.6637, "step": 5098 }, { "epoch": 0.41, "grad_norm": 3.109233404706816, "learning_rate": 6.60475791361931e-06, "loss": 0.712, "step": 5099 }, { "epoch": 0.41, "grad_norm": 2.391663100230515, "learning_rate": 6.603512091654007e-06, "loss": 0.7831, "step": 5100 }, { "epoch": 0.41, "grad_norm": 4.449960591145155, "learning_rate": 6.60226615871557e-06, "loss": 0.6644, "step": 5101 }, { "epoch": 0.41, "grad_norm": 6.776939128538275, "learning_rate": 6.601020114890227e-06, "loss": 0.6789, "step": 5102 }, { "epoch": 0.41, "grad_norm": 2.9631092761589493, "learning_rate": 6.599773960264211e-06, "loss": 0.7905, "step": 5103 }, { "epoch": 0.41, "grad_norm": 3.3920002121434236, "learning_rate": 6.598527694923764e-06, "loss": 0.5453, "step": 5104 }, { "epoch": 0.41, "grad_norm": 3.400846260479609, "learning_rate": 6.597281318955134e-06, "loss": 0.6576, "step": 5105 }, { "epoch": 0.41, "grad_norm": 2.511903747847077, "learning_rate": 6.596034832444581e-06, "loss": 0.8276, "step": 5106 }, { "epoch": 0.41, "grad_norm": 3.8890400429165775, "learning_rate": 6.594788235478368e-06, "loss": 0.5459, "step": 5107 }, { "epoch": 0.41, "grad_norm": 2.761529334528283, "learning_rate": 6.593541528142766e-06, "loss": 0.5908, "step": 5108 }, { "epoch": 0.41, "grad_norm": 2.7295796381618436, "learning_rate": 6.5922947105240585e-06, "loss": 0.7668, "step": 5109 }, { "epoch": 0.42, "grad_norm": 11.355418728983004, "learning_rate": 6.59104778270853e-06, "loss": 0.7054, "step": 5110 }, { "epoch": 0.42, "grad_norm": 2.467454895569117, "learning_rate": 6.589800744782478e-06, "loss": 0.6456, "step": 5111 }, { "epoch": 0.42, "grad_norm": 3.530480677113284, "learning_rate": 6.588553596832204e-06, "loss": 0.9019, "step": 5112 }, { "epoch": 0.42, "grad_norm": 3.4487594311354233, "learning_rate": 6.587306338944017e-06, "loss": 0.6821, "step": 5113 }, { "epoch": 0.42, "grad_norm": 2.484148554071114, "learning_rate": 6.586058971204239e-06, "loss": 0.813, "step": 5114 }, { "epoch": 0.42, "grad_norm": 3.1744859807399264, "learning_rate": 6.584811493699191e-06, "loss": 0.7771, "step": 5115 }, { "epoch": 0.42, "grad_norm": 2.520752983370148, "learning_rate": 6.5835639065152104e-06, "loss": 0.7636, "step": 5116 }, { "epoch": 0.42, "grad_norm": 3.1938231693178265, "learning_rate": 6.582316209738638e-06, "loss": 0.6011, "step": 5117 }, { "epoch": 0.42, "grad_norm": 5.3442702512233575, "learning_rate": 6.581068403455819e-06, "loss": 0.6621, "step": 5118 }, { "epoch": 0.42, "grad_norm": 3.429171022776963, "learning_rate": 6.57982048775311e-06, "loss": 0.6215, "step": 5119 }, { "epoch": 0.42, "grad_norm": 3.3808351967077597, "learning_rate": 6.578572462716879e-06, "loss": 0.5334, "step": 5120 }, { "epoch": 0.42, "grad_norm": 3.619364772181749, "learning_rate": 6.577324328433492e-06, "loss": 0.6761, "step": 5121 }, { "epoch": 0.42, "grad_norm": 2.3036710815834702, "learning_rate": 6.576076084989329e-06, "loss": 0.8721, "step": 5122 }, { "epoch": 0.42, "grad_norm": 5.510460953040238, "learning_rate": 6.574827732470779e-06, "loss": 0.7249, "step": 5123 }, { "epoch": 0.42, "grad_norm": 2.4336381851440287, "learning_rate": 6.573579270964233e-06, "loss": 0.6295, "step": 5124 }, { "epoch": 0.42, "grad_norm": 3.0377773121945295, "learning_rate": 6.5723307005560955e-06, "loss": 0.6313, "step": 5125 }, { "epoch": 0.42, "grad_norm": 7.118248019100789, "learning_rate": 6.571082021332771e-06, "loss": 0.6986, "step": 5126 }, { "epoch": 0.42, "grad_norm": 3.5451460556434857, "learning_rate": 6.569833233380679e-06, "loss": 0.7131, "step": 5127 }, { "epoch": 0.42, "grad_norm": 2.564663367975666, "learning_rate": 6.568584336786242e-06, "loss": 0.637, "step": 5128 }, { "epoch": 0.42, "grad_norm": 2.961397101498486, "learning_rate": 6.567335331635892e-06, "loss": 0.6969, "step": 5129 }, { "epoch": 0.42, "grad_norm": 2.4849552668604438, "learning_rate": 6.56608621801607e-06, "loss": 0.6407, "step": 5130 }, { "epoch": 0.42, "grad_norm": 3.674512913794182, "learning_rate": 6.56483699601322e-06, "loss": 0.6845, "step": 5131 }, { "epoch": 0.42, "grad_norm": 4.3391434463276415, "learning_rate": 6.563587665713796e-06, "loss": 0.7919, "step": 5132 }, { "epoch": 0.42, "grad_norm": 12.216543918921209, "learning_rate": 6.5623382272042625e-06, "loss": 0.6832, "step": 5133 }, { "epoch": 0.42, "grad_norm": 2.4319586239511515, "learning_rate": 6.561088680571085e-06, "loss": 0.8278, "step": 5134 }, { "epoch": 0.42, "grad_norm": 2.3387045496885652, "learning_rate": 6.5598390259007415e-06, "loss": 0.6763, "step": 5135 }, { "epoch": 0.42, "grad_norm": 2.934483979902261, "learning_rate": 6.558589263279716e-06, "loss": 0.7004, "step": 5136 }, { "epoch": 0.42, "grad_norm": 2.095079294763798, "learning_rate": 6.5573393927945e-06, "loss": 0.6582, "step": 5137 }, { "epoch": 0.42, "grad_norm": 4.087866607874321, "learning_rate": 6.55608941453159e-06, "loss": 0.6146, "step": 5138 }, { "epoch": 0.42, "grad_norm": 3.6322183237178773, "learning_rate": 6.554839328577497e-06, "loss": 0.5761, "step": 5139 }, { "epoch": 0.42, "grad_norm": 3.766186539120334, "learning_rate": 6.553589135018732e-06, "loss": 0.6836, "step": 5140 }, { "epoch": 0.42, "grad_norm": 6.9979409521366005, "learning_rate": 6.552338833941816e-06, "loss": 0.6977, "step": 5141 }, { "epoch": 0.42, "grad_norm": 3.965183701058465, "learning_rate": 6.55108842543328e-06, "loss": 0.6959, "step": 5142 }, { "epoch": 0.42, "grad_norm": 31.73922927426521, "learning_rate": 6.549837909579656e-06, "loss": 0.7231, "step": 5143 }, { "epoch": 0.42, "grad_norm": 9.150695788568113, "learning_rate": 6.548587286467491e-06, "loss": 0.5688, "step": 5144 }, { "epoch": 0.42, "grad_norm": 2.2439802547757273, "learning_rate": 6.547336556183336e-06, "loss": 0.7135, "step": 5145 }, { "epoch": 0.42, "grad_norm": 2.1758341284687845, "learning_rate": 6.546085718813747e-06, "loss": 0.6759, "step": 5146 }, { "epoch": 0.42, "grad_norm": 4.600316076343942, "learning_rate": 6.544834774445293e-06, "loss": 0.6583, "step": 5147 }, { "epoch": 0.42, "grad_norm": 3.9783717881697234, "learning_rate": 6.543583723164544e-06, "loss": 0.59, "step": 5148 }, { "epoch": 0.42, "grad_norm": 7.690640250820197, "learning_rate": 6.542332565058084e-06, "loss": 0.719, "step": 5149 }, { "epoch": 0.42, "grad_norm": 4.644284559525008, "learning_rate": 6.541081300212499e-06, "loss": 0.7001, "step": 5150 }, { "epoch": 0.42, "grad_norm": 5.36075757365983, "learning_rate": 6.539829928714383e-06, "loss": 0.6966, "step": 5151 }, { "epoch": 0.42, "grad_norm": 4.556602367139547, "learning_rate": 6.53857845065034e-06, "loss": 0.7444, "step": 5152 }, { "epoch": 0.42, "grad_norm": 5.552920175440035, "learning_rate": 6.537326866106981e-06, "loss": 0.767, "step": 5153 }, { "epoch": 0.42, "grad_norm": 4.280691675546673, "learning_rate": 6.536075175170924e-06, "loss": 0.6648, "step": 5154 }, { "epoch": 0.42, "grad_norm": 5.713586960112847, "learning_rate": 6.534823377928792e-06, "loss": 0.6264, "step": 5155 }, { "epoch": 0.42, "grad_norm": 10.2953113987641, "learning_rate": 6.533571474467218e-06, "loss": 0.631, "step": 5156 }, { "epoch": 0.42, "grad_norm": 12.942227843345615, "learning_rate": 6.532319464872844e-06, "loss": 0.6948, "step": 5157 }, { "epoch": 0.42, "grad_norm": 9.429229635928682, "learning_rate": 6.531067349232314e-06, "loss": 0.6742, "step": 5158 }, { "epoch": 0.42, "grad_norm": 4.796309895459906, "learning_rate": 6.529815127632282e-06, "loss": 0.688, "step": 5159 }, { "epoch": 0.42, "grad_norm": 10.193115991265875, "learning_rate": 6.52856280015941e-06, "loss": 0.7021, "step": 5160 }, { "epoch": 0.42, "grad_norm": 11.770248955558008, "learning_rate": 6.527310366900369e-06, "loss": 0.7637, "step": 5161 }, { "epoch": 0.42, "grad_norm": 8.431529611564065, "learning_rate": 6.5260578279418325e-06, "loss": 0.7375, "step": 5162 }, { "epoch": 0.42, "grad_norm": 16.99620538103692, "learning_rate": 6.524805183370486e-06, "loss": 0.7121, "step": 5163 }, { "epoch": 0.42, "grad_norm": 5.196343131842406, "learning_rate": 6.523552433273022e-06, "loss": 0.7002, "step": 5164 }, { "epoch": 0.42, "grad_norm": 12.332410083977896, "learning_rate": 6.522299577736133e-06, "loss": 0.7808, "step": 5165 }, { "epoch": 0.42, "grad_norm": 8.264414180080209, "learning_rate": 6.52104661684653e-06, "loss": 0.7886, "step": 5166 }, { "epoch": 0.42, "grad_norm": 4.892588304177848, "learning_rate": 6.519793550690925e-06, "loss": 0.7552, "step": 5167 }, { "epoch": 0.42, "grad_norm": 31.26440008643601, "learning_rate": 6.5185403793560355e-06, "loss": 0.8189, "step": 5168 }, { "epoch": 0.42, "grad_norm": 4.685607991350681, "learning_rate": 6.517287102928589e-06, "loss": 0.5534, "step": 5169 }, { "epoch": 0.42, "grad_norm": 2.966266684294976, "learning_rate": 6.516033721495323e-06, "loss": 0.7885, "step": 5170 }, { "epoch": 0.42, "grad_norm": 5.783215349797061, "learning_rate": 6.514780235142977e-06, "loss": 0.632, "step": 5171 }, { "epoch": 0.42, "grad_norm": 4.247895608543652, "learning_rate": 6.5135266439583015e-06, "loss": 0.6885, "step": 5172 }, { "epoch": 0.42, "grad_norm": 3.3025766230107756, "learning_rate": 6.512272948028051e-06, "loss": 0.724, "step": 5173 }, { "epoch": 0.42, "grad_norm": 5.457575891686157, "learning_rate": 6.511019147438993e-06, "loss": 0.847, "step": 5174 }, { "epoch": 0.42, "grad_norm": 13.301671293282698, "learning_rate": 6.5097652422778935e-06, "loss": 0.7902, "step": 5175 }, { "epoch": 0.42, "grad_norm": 2.2266357158870314, "learning_rate": 6.508511232631534e-06, "loss": 0.6921, "step": 5176 }, { "epoch": 0.42, "grad_norm": 2.5796610179572355, "learning_rate": 6.507257118586698e-06, "loss": 0.658, "step": 5177 }, { "epoch": 0.42, "grad_norm": 4.9065930683545425, "learning_rate": 6.5060029002301795e-06, "loss": 0.7826, "step": 5178 }, { "epoch": 0.42, "grad_norm": 5.428587699557702, "learning_rate": 6.504748577648777e-06, "loss": 0.5166, "step": 5179 }, { "epoch": 0.42, "grad_norm": 18.02019485302552, "learning_rate": 6.503494150929299e-06, "loss": 0.7552, "step": 5180 }, { "epoch": 0.42, "grad_norm": 5.6144885296774545, "learning_rate": 6.502239620158559e-06, "loss": 0.7687, "step": 5181 }, { "epoch": 0.42, "grad_norm": 3.703952575926243, "learning_rate": 6.5009849854233786e-06, "loss": 0.7183, "step": 5182 }, { "epoch": 0.42, "grad_norm": 6.348378046959388, "learning_rate": 6.499730246810587e-06, "loss": 0.6924, "step": 5183 }, { "epoch": 0.42, "grad_norm": 7.5343143950488765, "learning_rate": 6.498475404407018e-06, "loss": 0.656, "step": 5184 }, { "epoch": 0.42, "grad_norm": 4.563908370107235, "learning_rate": 6.497220458299515e-06, "loss": 0.7761, "step": 5185 }, { "epoch": 0.42, "grad_norm": 5.343834255337502, "learning_rate": 6.495965408574929e-06, "loss": 0.7318, "step": 5186 }, { "epoch": 0.42, "grad_norm": 3.269661660866481, "learning_rate": 6.4947102553201195e-06, "loss": 0.5819, "step": 5187 }, { "epoch": 0.42, "grad_norm": 5.315334843453977, "learning_rate": 6.493454998621946e-06, "loss": 0.617, "step": 5188 }, { "epoch": 0.42, "grad_norm": 3.2394630519777396, "learning_rate": 6.492199638567285e-06, "loss": 0.8927, "step": 5189 }, { "epoch": 0.42, "grad_norm": 2.8214706011424995, "learning_rate": 6.490944175243014e-06, "loss": 0.676, "step": 5190 }, { "epoch": 0.42, "grad_norm": 4.496179827915101, "learning_rate": 6.4896886087360175e-06, "loss": 0.5796, "step": 5191 }, { "epoch": 0.42, "grad_norm": 4.434267996887535, "learning_rate": 6.488432939133189e-06, "loss": 0.6357, "step": 5192 }, { "epoch": 0.42, "grad_norm": 8.005459111908596, "learning_rate": 6.48717716652143e-06, "loss": 0.7053, "step": 5193 }, { "epoch": 0.42, "grad_norm": 2.6813766175192026, "learning_rate": 6.485921290987647e-06, "loss": 0.7689, "step": 5194 }, { "epoch": 0.42, "grad_norm": 2.872702294269331, "learning_rate": 6.484665312618753e-06, "loss": 0.7115, "step": 5195 }, { "epoch": 0.42, "grad_norm": 4.036473892756181, "learning_rate": 6.483409231501672e-06, "loss": 0.7021, "step": 5196 }, { "epoch": 0.42, "grad_norm": 6.135692667271775, "learning_rate": 6.482153047723332e-06, "loss": 0.7552, "step": 5197 }, { "epoch": 0.42, "grad_norm": 3.6355384421325057, "learning_rate": 6.48089676137067e-06, "loss": 0.7156, "step": 5198 }, { "epoch": 0.42, "grad_norm": 3.471728382828722, "learning_rate": 6.479640372530626e-06, "loss": 0.5728, "step": 5199 }, { "epoch": 0.42, "grad_norm": 7.029022578711728, "learning_rate": 6.478383881290152e-06, "loss": 0.835, "step": 5200 }, { "epoch": 0.42, "grad_norm": 3.337360464801638, "learning_rate": 6.477127287736204e-06, "loss": 0.7169, "step": 5201 }, { "epoch": 0.42, "grad_norm": 2.9142618980402086, "learning_rate": 6.475870591955748e-06, "loss": 0.692, "step": 5202 }, { "epoch": 0.42, "grad_norm": 15.778039891864742, "learning_rate": 6.474613794035754e-06, "loss": 0.8646, "step": 5203 }, { "epoch": 0.42, "grad_norm": 4.8798800276251555, "learning_rate": 6.4733568940632e-06, "loss": 0.6414, "step": 5204 }, { "epoch": 0.42, "grad_norm": 2.855605537100388, "learning_rate": 6.472099892125072e-06, "loss": 0.5577, "step": 5205 }, { "epoch": 0.42, "grad_norm": 6.356064153681271, "learning_rate": 6.470842788308362e-06, "loss": 0.7305, "step": 5206 }, { "epoch": 0.42, "grad_norm": 3.1160279666025703, "learning_rate": 6.469585582700072e-06, "loss": 0.6576, "step": 5207 }, { "epoch": 0.42, "grad_norm": 4.385586245173973, "learning_rate": 6.468328275387205e-06, "loss": 0.8128, "step": 5208 }, { "epoch": 0.42, "grad_norm": 27.59206274518409, "learning_rate": 6.467070866456775e-06, "loss": 0.6198, "step": 5209 }, { "epoch": 0.42, "grad_norm": 3.0298702840887834, "learning_rate": 6.465813355995804e-06, "loss": 0.7304, "step": 5210 }, { "epoch": 0.42, "grad_norm": 4.155438365195864, "learning_rate": 6.46455574409132e-06, "loss": 0.7968, "step": 5211 }, { "epoch": 0.42, "grad_norm": 3.5751198034119693, "learning_rate": 6.463298030830356e-06, "loss": 0.7073, "step": 5212 }, { "epoch": 0.42, "grad_norm": 7.9524352485608825, "learning_rate": 6.462040216299956e-06, "loss": 0.5154, "step": 5213 }, { "epoch": 0.42, "grad_norm": 8.711240861725129, "learning_rate": 6.460782300587166e-06, "loss": 0.7545, "step": 5214 }, { "epoch": 0.42, "grad_norm": 4.065254070984416, "learning_rate": 6.459524283779044e-06, "loss": 0.5958, "step": 5215 }, { "epoch": 0.42, "grad_norm": 5.101286995693898, "learning_rate": 6.45826616596265e-06, "loss": 0.8396, "step": 5216 }, { "epoch": 0.42, "grad_norm": 3.725259205413698, "learning_rate": 6.457007947225058e-06, "loss": 0.7448, "step": 5217 }, { "epoch": 0.42, "grad_norm": 3.4903826060355505, "learning_rate": 6.455749627653339e-06, "loss": 0.7276, "step": 5218 }, { "epoch": 0.42, "grad_norm": 2.350746765882947, "learning_rate": 6.454491207334581e-06, "loss": 0.5739, "step": 5219 }, { "epoch": 0.42, "grad_norm": 3.9980741016218415, "learning_rate": 6.453232686355874e-06, "loss": 0.8459, "step": 5220 }, { "epoch": 0.42, "grad_norm": 3.0919523750894706, "learning_rate": 6.451974064804313e-06, "loss": 0.7157, "step": 5221 }, { "epoch": 0.42, "grad_norm": 3.2924188436001987, "learning_rate": 6.450715342767005e-06, "loss": 0.5553, "step": 5222 }, { "epoch": 0.42, "grad_norm": 4.3733312320345465, "learning_rate": 6.449456520331063e-06, "loss": 0.7715, "step": 5223 }, { "epoch": 0.42, "grad_norm": 2.172647147742984, "learning_rate": 6.448197597583601e-06, "loss": 0.5613, "step": 5224 }, { "epoch": 0.42, "grad_norm": 3.073730504806312, "learning_rate": 6.446938574611746e-06, "loss": 0.664, "step": 5225 }, { "epoch": 0.42, "grad_norm": 3.1584711224678976, "learning_rate": 6.445679451502634e-06, "loss": 0.6146, "step": 5226 }, { "epoch": 0.42, "grad_norm": 2.2321161089320563, "learning_rate": 6.444420228343398e-06, "loss": 0.7656, "step": 5227 }, { "epoch": 0.42, "grad_norm": 6.053805218679485, "learning_rate": 6.443160905221188e-06, "loss": 0.6996, "step": 5228 }, { "epoch": 0.42, "grad_norm": 2.4876072940662923, "learning_rate": 6.441901482223156e-06, "loss": 0.7718, "step": 5229 }, { "epoch": 0.42, "grad_norm": 3.568482438754372, "learning_rate": 6.440641959436464e-06, "loss": 0.8145, "step": 5230 }, { "epoch": 0.42, "grad_norm": 3.0355656464641467, "learning_rate": 6.439382336948278e-06, "loss": 0.7285, "step": 5231 }, { "epoch": 0.42, "grad_norm": 10.794388631020505, "learning_rate": 6.438122614845769e-06, "loss": 0.7317, "step": 5232 }, { "epoch": 0.43, "grad_norm": 3.0983815463994655, "learning_rate": 6.436862793216121e-06, "loss": 0.6455, "step": 5233 }, { "epoch": 0.43, "grad_norm": 2.7557869137663573, "learning_rate": 6.43560287214652e-06, "loss": 0.5485, "step": 5234 }, { "epoch": 0.43, "grad_norm": 2.3923688040249127, "learning_rate": 6.4343428517241616e-06, "loss": 0.7034, "step": 5235 }, { "epoch": 0.43, "grad_norm": 4.558950485112192, "learning_rate": 6.433082732036246e-06, "loss": 0.717, "step": 5236 }, { "epoch": 0.43, "grad_norm": 3.3868451320382196, "learning_rate": 6.431822513169983e-06, "loss": 0.6352, "step": 5237 }, { "epoch": 0.43, "grad_norm": 2.963829420735218, "learning_rate": 6.430562195212586e-06, "loss": 0.731, "step": 5238 }, { "epoch": 0.43, "grad_norm": 3.7732361484101915, "learning_rate": 6.4293017782512764e-06, "loss": 0.7049, "step": 5239 }, { "epoch": 0.43, "grad_norm": 3.0114183784804824, "learning_rate": 6.428041262373286e-06, "loss": 0.7233, "step": 5240 }, { "epoch": 0.43, "grad_norm": 5.50488873967489, "learning_rate": 6.4267806476658465e-06, "loss": 0.5451, "step": 5241 }, { "epoch": 0.43, "grad_norm": 5.862583260380868, "learning_rate": 6.425519934216204e-06, "loss": 0.7373, "step": 5242 }, { "epoch": 0.43, "grad_norm": 3.403880594270728, "learning_rate": 6.424259122111606e-06, "loss": 0.6205, "step": 5243 }, { "epoch": 0.43, "grad_norm": 3.34436582589992, "learning_rate": 6.422998211439307e-06, "loss": 0.8311, "step": 5244 }, { "epoch": 0.43, "grad_norm": 3.5309837569161067, "learning_rate": 6.421737202286573e-06, "loss": 0.8683, "step": 5245 }, { "epoch": 0.43, "grad_norm": 4.555725876837524, "learning_rate": 6.420476094740674e-06, "loss": 0.7112, "step": 5246 }, { "epoch": 0.43, "grad_norm": 5.919165322381834, "learning_rate": 6.419214888888885e-06, "loss": 0.786, "step": 5247 }, { "epoch": 0.43, "grad_norm": 3.445026652851622, "learning_rate": 6.417953584818488e-06, "loss": 0.6499, "step": 5248 }, { "epoch": 0.43, "grad_norm": 2.880883584163261, "learning_rate": 6.416692182616775e-06, "loss": 0.6731, "step": 5249 }, { "epoch": 0.43, "grad_norm": 2.5967675235794005, "learning_rate": 6.415430682371044e-06, "loss": 0.7002, "step": 5250 }, { "epoch": 0.43, "grad_norm": 2.4823750866949648, "learning_rate": 6.414169084168596e-06, "loss": 0.7132, "step": 5251 }, { "epoch": 0.43, "grad_norm": 3.8252562789986375, "learning_rate": 6.412907388096743e-06, "loss": 0.8286, "step": 5252 }, { "epoch": 0.43, "grad_norm": 2.8408714821656793, "learning_rate": 6.411645594242804e-06, "loss": 0.7406, "step": 5253 }, { "epoch": 0.43, "grad_norm": 2.690635521919526, "learning_rate": 6.4103837026941e-06, "loss": 0.722, "step": 5254 }, { "epoch": 0.43, "grad_norm": 2.71976700201395, "learning_rate": 6.409121713537965e-06, "loss": 0.7916, "step": 5255 }, { "epoch": 0.43, "grad_norm": 6.066180290378848, "learning_rate": 6.407859626861734e-06, "loss": 0.5905, "step": 5256 }, { "epoch": 0.43, "grad_norm": 2.977591690609914, "learning_rate": 6.406597442752751e-06, "loss": 0.7574, "step": 5257 }, { "epoch": 0.43, "grad_norm": 2.7267330074179377, "learning_rate": 6.405335161298369e-06, "loss": 0.6836, "step": 5258 }, { "epoch": 0.43, "grad_norm": 4.220799983394707, "learning_rate": 6.404072782585945e-06, "loss": 0.6847, "step": 5259 }, { "epoch": 0.43, "grad_norm": 2.2952019183772787, "learning_rate": 6.402810306702845e-06, "loss": 0.8459, "step": 5260 }, { "epoch": 0.43, "grad_norm": 5.624079882266811, "learning_rate": 6.401547733736437e-06, "loss": 0.6673, "step": 5261 }, { "epoch": 0.43, "grad_norm": 4.25995636461149, "learning_rate": 6.400285063774102e-06, "loss": 0.8297, "step": 5262 }, { "epoch": 0.43, "grad_norm": 2.681753142751082, "learning_rate": 6.399022296903225e-06, "loss": 0.8774, "step": 5263 }, { "epoch": 0.43, "grad_norm": 2.9717594191112267, "learning_rate": 6.397759433211194e-06, "loss": 0.6591, "step": 5264 }, { "epoch": 0.43, "grad_norm": 7.1341135688177495, "learning_rate": 6.396496472785409e-06, "loss": 0.8238, "step": 5265 }, { "epoch": 0.43, "grad_norm": 3.8350327759210017, "learning_rate": 6.395233415713277e-06, "loss": 0.5831, "step": 5266 }, { "epoch": 0.43, "grad_norm": 8.359989898228111, "learning_rate": 6.393970262082205e-06, "loss": 0.7875, "step": 5267 }, { "epoch": 0.43, "grad_norm": 2.466873874646155, "learning_rate": 6.3927070119796156e-06, "loss": 0.6581, "step": 5268 }, { "epoch": 0.43, "grad_norm": 9.004229595642427, "learning_rate": 6.39144366549293e-06, "loss": 0.7972, "step": 5269 }, { "epoch": 0.43, "grad_norm": 2.487934171620757, "learning_rate": 6.390180222709583e-06, "loss": 0.6188, "step": 5270 }, { "epoch": 0.43, "grad_norm": 2.9649533628787617, "learning_rate": 6.388916683717011e-06, "loss": 0.8735, "step": 5271 }, { "epoch": 0.43, "grad_norm": 3.1661176758467966, "learning_rate": 6.38765304860266e-06, "loss": 0.6761, "step": 5272 }, { "epoch": 0.43, "grad_norm": 2.802007322700124, "learning_rate": 6.3863893174539805e-06, "loss": 0.6599, "step": 5273 }, { "epoch": 0.43, "grad_norm": 4.334763694730785, "learning_rate": 6.38512549035843e-06, "loss": 0.6104, "step": 5274 }, { "epoch": 0.43, "grad_norm": 2.661898851882122, "learning_rate": 6.383861567403473e-06, "loss": 0.675, "step": 5275 }, { "epoch": 0.43, "grad_norm": 2.66579857373232, "learning_rate": 6.382597548676583e-06, "loss": 0.661, "step": 5276 }, { "epoch": 0.43, "grad_norm": 2.8762056254224917, "learning_rate": 6.3813334342652375e-06, "loss": 0.698, "step": 5277 }, { "epoch": 0.43, "grad_norm": 6.9352400707030215, "learning_rate": 6.38006922425692e-06, "loss": 0.7164, "step": 5278 }, { "epoch": 0.43, "grad_norm": 13.967592407698485, "learning_rate": 6.3788049187391236e-06, "loss": 0.8905, "step": 5279 }, { "epoch": 0.43, "grad_norm": 2.71130406637019, "learning_rate": 6.377540517799346e-06, "loss": 0.7606, "step": 5280 }, { "epoch": 0.43, "grad_norm": 2.511387701274658, "learning_rate": 6.376276021525087e-06, "loss": 0.6524, "step": 5281 }, { "epoch": 0.43, "grad_norm": 2.608216246226552, "learning_rate": 6.375011430003864e-06, "loss": 0.7238, "step": 5282 }, { "epoch": 0.43, "grad_norm": 2.3530022828999266, "learning_rate": 6.373746743323193e-06, "loss": 0.6202, "step": 5283 }, { "epoch": 0.43, "grad_norm": 3.4515140554871566, "learning_rate": 6.372481961570597e-06, "loss": 0.7624, "step": 5284 }, { "epoch": 0.43, "grad_norm": 2.544913506823473, "learning_rate": 6.3712170848336064e-06, "loss": 0.5893, "step": 5285 }, { "epoch": 0.43, "grad_norm": 7.532904580049164, "learning_rate": 6.369952113199761e-06, "loss": 0.7289, "step": 5286 }, { "epoch": 0.43, "grad_norm": 3.2607505012216573, "learning_rate": 6.368687046756604e-06, "loss": 0.6654, "step": 5287 }, { "epoch": 0.43, "grad_norm": 2.5881251907207505, "learning_rate": 6.367421885591684e-06, "loss": 0.7131, "step": 5288 }, { "epoch": 0.43, "grad_norm": 2.713919739499223, "learning_rate": 6.3661566297925605e-06, "loss": 0.6192, "step": 5289 }, { "epoch": 0.43, "grad_norm": 3.71142486007151, "learning_rate": 6.364891279446795e-06, "loss": 0.761, "step": 5290 }, { "epoch": 0.43, "grad_norm": 2.8764213482393646, "learning_rate": 6.3636258346419585e-06, "loss": 0.633, "step": 5291 }, { "epoch": 0.43, "grad_norm": 2.8152901341654193, "learning_rate": 6.362360295465628e-06, "loss": 0.8202, "step": 5292 }, { "epoch": 0.43, "grad_norm": 2.8443223461121607, "learning_rate": 6.361094662005389e-06, "loss": 0.4909, "step": 5293 }, { "epoch": 0.43, "grad_norm": 3.807027557832339, "learning_rate": 6.359828934348828e-06, "loss": 0.6599, "step": 5294 }, { "epoch": 0.43, "grad_norm": 2.4240101495975956, "learning_rate": 6.3585631125835435e-06, "loss": 0.7188, "step": 5295 }, { "epoch": 0.43, "grad_norm": 2.6441013498808745, "learning_rate": 6.3572971967971364e-06, "loss": 0.707, "step": 5296 }, { "epoch": 0.43, "grad_norm": 2.7029358130174805, "learning_rate": 6.356031187077218e-06, "loss": 0.687, "step": 5297 }, { "epoch": 0.43, "grad_norm": 3.0102734735259773, "learning_rate": 6.3547650835114014e-06, "loss": 0.6227, "step": 5298 }, { "epoch": 0.43, "grad_norm": 3.4421901041565186, "learning_rate": 6.353498886187313e-06, "loss": 0.7078, "step": 5299 }, { "epoch": 0.43, "grad_norm": 3.457539779227769, "learning_rate": 6.352232595192577e-06, "loss": 0.7323, "step": 5300 }, { "epoch": 0.43, "grad_norm": 4.4629904295009935, "learning_rate": 6.3509662106148314e-06, "loss": 0.7436, "step": 5301 }, { "epoch": 0.43, "grad_norm": 2.5090402257104083, "learning_rate": 6.349699732541719e-06, "loss": 0.6919, "step": 5302 }, { "epoch": 0.43, "grad_norm": 2.224146709755235, "learning_rate": 6.348433161060886e-06, "loss": 0.8017, "step": 5303 }, { "epoch": 0.43, "grad_norm": 4.690659661724078, "learning_rate": 6.347166496259989e-06, "loss": 0.7003, "step": 5304 }, { "epoch": 0.43, "grad_norm": 2.9010303005852855, "learning_rate": 6.3458997382266865e-06, "loss": 0.6783, "step": 5305 }, { "epoch": 0.43, "grad_norm": 2.328712623138541, "learning_rate": 6.344632887048647e-06, "loss": 0.6279, "step": 5306 }, { "epoch": 0.43, "grad_norm": 2.38164698694708, "learning_rate": 6.343365942813546e-06, "loss": 0.6396, "step": 5307 }, { "epoch": 0.43, "grad_norm": 3.10426693429425, "learning_rate": 6.3420989056090645e-06, "loss": 0.6911, "step": 5308 }, { "epoch": 0.43, "grad_norm": 2.870607952941557, "learning_rate": 6.340831775522886e-06, "loss": 0.5323, "step": 5309 }, { "epoch": 0.43, "grad_norm": 3.0501657123256707, "learning_rate": 6.339564552642708e-06, "loss": 0.8065, "step": 5310 }, { "epoch": 0.43, "grad_norm": 3.154110893134373, "learning_rate": 6.338297237056228e-06, "loss": 0.6719, "step": 5311 }, { "epoch": 0.43, "grad_norm": 4.097632059306806, "learning_rate": 6.337029828851151e-06, "loss": 0.6379, "step": 5312 }, { "epoch": 0.43, "grad_norm": 5.838025767126768, "learning_rate": 6.335762328115194e-06, "loss": 0.5671, "step": 5313 }, { "epoch": 0.43, "grad_norm": 2.691955714219819, "learning_rate": 6.334494734936071e-06, "loss": 0.8376, "step": 5314 }, { "epoch": 0.43, "grad_norm": 3.142764302020504, "learning_rate": 6.333227049401509e-06, "loss": 0.8203, "step": 5315 }, { "epoch": 0.43, "grad_norm": 4.456752591272409, "learning_rate": 6.331959271599243e-06, "loss": 0.6692, "step": 5316 }, { "epoch": 0.43, "grad_norm": 2.9933192619704756, "learning_rate": 6.330691401617007e-06, "loss": 0.8276, "step": 5317 }, { "epoch": 0.43, "grad_norm": 4.115823291476445, "learning_rate": 6.3294234395425465e-06, "loss": 0.7502, "step": 5318 }, { "epoch": 0.43, "grad_norm": 3.288428928249791, "learning_rate": 6.328155385463616e-06, "loss": 0.8581, "step": 5319 }, { "epoch": 0.43, "grad_norm": 8.514299731306384, "learning_rate": 6.326887239467969e-06, "loss": 0.779, "step": 5320 }, { "epoch": 0.43, "grad_norm": 2.7487833267156625, "learning_rate": 6.32561900164337e-06, "loss": 0.6337, "step": 5321 }, { "epoch": 0.43, "grad_norm": 2.263419642375841, "learning_rate": 6.324350672077588e-06, "loss": 0.5782, "step": 5322 }, { "epoch": 0.43, "grad_norm": 3.9891408672023774, "learning_rate": 6.323082250858402e-06, "loss": 0.8111, "step": 5323 }, { "epoch": 0.43, "grad_norm": 3.5187577545160393, "learning_rate": 6.3218137380735934e-06, "loss": 0.6087, "step": 5324 }, { "epoch": 0.43, "grad_norm": 3.1313320933112423, "learning_rate": 6.32054513381095e-06, "loss": 0.6584, "step": 5325 }, { "epoch": 0.43, "grad_norm": 2.5967540577214225, "learning_rate": 6.319276438158271e-06, "loss": 0.7009, "step": 5326 }, { "epoch": 0.43, "grad_norm": 5.846245139453428, "learning_rate": 6.3180076512033525e-06, "loss": 0.5999, "step": 5327 }, { "epoch": 0.43, "grad_norm": 2.53987616270917, "learning_rate": 6.316738773034009e-06, "loss": 0.8392, "step": 5328 }, { "epoch": 0.43, "grad_norm": 3.1076312980597627, "learning_rate": 6.31546980373805e-06, "loss": 0.7322, "step": 5329 }, { "epoch": 0.43, "grad_norm": 3.33283973916105, "learning_rate": 6.314200743403297e-06, "loss": 0.639, "step": 5330 }, { "epoch": 0.43, "grad_norm": 2.90753301224908, "learning_rate": 6.312931592117578e-06, "loss": 0.6892, "step": 5331 }, { "epoch": 0.43, "grad_norm": 3.103386418908662, "learning_rate": 6.311662349968726e-06, "loss": 0.7516, "step": 5332 }, { "epoch": 0.43, "grad_norm": 2.9494648974373683, "learning_rate": 6.310393017044581e-06, "loss": 0.6699, "step": 5333 }, { "epoch": 0.43, "grad_norm": 3.3061706824576467, "learning_rate": 6.309123593432988e-06, "loss": 0.5765, "step": 5334 }, { "epoch": 0.43, "grad_norm": 9.268461813364416, "learning_rate": 6.3078540792218e-06, "loss": 0.7125, "step": 5335 }, { "epoch": 0.43, "grad_norm": 3.518779427584672, "learning_rate": 6.3065844744988746e-06, "loss": 0.5462, "step": 5336 }, { "epoch": 0.43, "grad_norm": 3.1758946125652905, "learning_rate": 6.305314779352076e-06, "loss": 0.6834, "step": 5337 }, { "epoch": 0.43, "grad_norm": 2.924179564159826, "learning_rate": 6.304044993869276e-06, "loss": 0.8562, "step": 5338 }, { "epoch": 0.43, "grad_norm": 3.722573843571273, "learning_rate": 6.302775118138352e-06, "loss": 0.7935, "step": 5339 }, { "epoch": 0.43, "grad_norm": 9.530991276176536, "learning_rate": 6.301505152247185e-06, "loss": 0.7517, "step": 5340 }, { "epoch": 0.43, "grad_norm": 3.5409695148358957, "learning_rate": 6.300235096283668e-06, "loss": 0.7535, "step": 5341 }, { "epoch": 0.43, "grad_norm": 3.2486045610096963, "learning_rate": 6.2989649503356955e-06, "loss": 0.8066, "step": 5342 }, { "epoch": 0.43, "grad_norm": 5.186509901986895, "learning_rate": 6.297694714491169e-06, "loss": 0.6076, "step": 5343 }, { "epoch": 0.43, "grad_norm": 2.4404382926400716, "learning_rate": 6.296424388837998e-06, "loss": 0.6935, "step": 5344 }, { "epoch": 0.43, "grad_norm": 3.790387822917204, "learning_rate": 6.295153973464095e-06, "loss": 0.7227, "step": 5345 }, { "epoch": 0.43, "grad_norm": 14.48362766681455, "learning_rate": 6.293883468457383e-06, "loss": 0.7985, "step": 5346 }, { "epoch": 0.43, "grad_norm": 4.66139114475922, "learning_rate": 6.2926128739057875e-06, "loss": 0.8366, "step": 5347 }, { "epoch": 0.43, "grad_norm": 5.048546098288123, "learning_rate": 6.291342189897242e-06, "loss": 0.7165, "step": 5348 }, { "epoch": 0.43, "grad_norm": 4.7119719851595185, "learning_rate": 6.2900714165196875e-06, "loss": 0.7916, "step": 5349 }, { "epoch": 0.43, "grad_norm": 2.4183444680567217, "learning_rate": 6.288800553861068e-06, "loss": 0.6321, "step": 5350 }, { "epoch": 0.43, "grad_norm": 6.195559737049939, "learning_rate": 6.287529602009334e-06, "loss": 0.7216, "step": 5351 }, { "epoch": 0.43, "grad_norm": 4.2480494812431235, "learning_rate": 6.286258561052444e-06, "loss": 0.8423, "step": 5352 }, { "epoch": 0.43, "grad_norm": 6.3369016060079915, "learning_rate": 6.284987431078364e-06, "loss": 0.7436, "step": 5353 }, { "epoch": 0.43, "grad_norm": 4.3225344423690135, "learning_rate": 6.283716212175062e-06, "loss": 0.6411, "step": 5354 }, { "epoch": 0.43, "grad_norm": 6.563367259530566, "learning_rate": 6.282444904430516e-06, "loss": 0.6836, "step": 5355 }, { "epoch": 0.44, "grad_norm": 5.865987301531571, "learning_rate": 6.281173507932708e-06, "loss": 0.6643, "step": 5356 }, { "epoch": 0.44, "grad_norm": 3.55239417917751, "learning_rate": 6.279902022769624e-06, "loss": 0.6865, "step": 5357 }, { "epoch": 0.44, "grad_norm": 3.1220185173679353, "learning_rate": 6.278630449029263e-06, "loss": 0.7681, "step": 5358 }, { "epoch": 0.44, "grad_norm": 4.186195006945495, "learning_rate": 6.277358786799623e-06, "loss": 0.8277, "step": 5359 }, { "epoch": 0.44, "grad_norm": 5.284091969487949, "learning_rate": 6.2760870361687145e-06, "loss": 0.6701, "step": 5360 }, { "epoch": 0.44, "grad_norm": 7.3253871428292685, "learning_rate": 6.2748151972245455e-06, "loss": 0.719, "step": 5361 }, { "epoch": 0.44, "grad_norm": 7.724022488010174, "learning_rate": 6.273543270055139e-06, "loss": 0.6994, "step": 5362 }, { "epoch": 0.44, "grad_norm": 3.6935322576458733, "learning_rate": 6.272271254748519e-06, "loss": 0.7592, "step": 5363 }, { "epoch": 0.44, "grad_norm": 3.97121808828623, "learning_rate": 6.2709991513927156e-06, "loss": 0.7802, "step": 5364 }, { "epoch": 0.44, "grad_norm": 21.21890987257359, "learning_rate": 6.26972696007577e-06, "loss": 0.7675, "step": 5365 }, { "epoch": 0.44, "grad_norm": 3.6353345420097516, "learning_rate": 6.268454680885725e-06, "loss": 0.8211, "step": 5366 }, { "epoch": 0.44, "grad_norm": 4.523650589718551, "learning_rate": 6.267182313910627e-06, "loss": 0.6816, "step": 5367 }, { "epoch": 0.44, "grad_norm": 3.021840541347513, "learning_rate": 6.265909859238536e-06, "loss": 0.7134, "step": 5368 }, { "epoch": 0.44, "grad_norm": 7.5793452218181985, "learning_rate": 6.264637316957512e-06, "loss": 0.7602, "step": 5369 }, { "epoch": 0.44, "grad_norm": 5.817414349145018, "learning_rate": 6.263364687155621e-06, "loss": 0.8693, "step": 5370 }, { "epoch": 0.44, "grad_norm": 2.935361977472497, "learning_rate": 6.262091969920938e-06, "loss": 0.7554, "step": 5371 }, { "epoch": 0.44, "grad_norm": 8.880339798788528, "learning_rate": 6.260819165341548e-06, "loss": 0.6667, "step": 5372 }, { "epoch": 0.44, "grad_norm": 2.4813681257006333, "learning_rate": 6.259546273505529e-06, "loss": 0.7014, "step": 5373 }, { "epoch": 0.44, "grad_norm": 3.783352149431896, "learning_rate": 6.258273294500978e-06, "loss": 0.6466, "step": 5374 }, { "epoch": 0.44, "grad_norm": 3.954436769701012, "learning_rate": 6.257000228415994e-06, "loss": 0.8158, "step": 5375 }, { "epoch": 0.44, "grad_norm": 2.574706782929329, "learning_rate": 6.255727075338678e-06, "loss": 0.7405, "step": 5376 }, { "epoch": 0.44, "grad_norm": 7.593442259735793, "learning_rate": 6.254453835357142e-06, "loss": 0.497, "step": 5377 }, { "epoch": 0.44, "grad_norm": 3.099777659845841, "learning_rate": 6.253180508559501e-06, "loss": 0.6707, "step": 5378 }, { "epoch": 0.44, "grad_norm": 2.829228847341015, "learning_rate": 6.25190709503388e-06, "loss": 0.6808, "step": 5379 }, { "epoch": 0.44, "grad_norm": 5.699881551059783, "learning_rate": 6.250633594868404e-06, "loss": 0.7624, "step": 5380 }, { "epoch": 0.44, "grad_norm": 6.473006615324479, "learning_rate": 6.2493600081512085e-06, "loss": 0.9757, "step": 5381 }, { "epoch": 0.44, "grad_norm": 5.066672490197237, "learning_rate": 6.248086334970435e-06, "loss": 0.8142, "step": 5382 }, { "epoch": 0.44, "grad_norm": 9.465462596898993, "learning_rate": 6.2468125754142275e-06, "loss": 0.6955, "step": 5383 }, { "epoch": 0.44, "grad_norm": 5.254627552383158, "learning_rate": 6.24553872957074e-06, "loss": 0.6811, "step": 5384 }, { "epoch": 0.44, "grad_norm": 3.201992377576008, "learning_rate": 6.244264797528129e-06, "loss": 0.7173, "step": 5385 }, { "epoch": 0.44, "grad_norm": 13.459291553091939, "learning_rate": 6.24299077937456e-06, "loss": 0.7101, "step": 5386 }, { "epoch": 0.44, "grad_norm": 3.4576304079499156, "learning_rate": 6.241716675198202e-06, "loss": 0.7596, "step": 5387 }, { "epoch": 0.44, "grad_norm": 3.350855293179054, "learning_rate": 6.240442485087231e-06, "loss": 0.7432, "step": 5388 }, { "epoch": 0.44, "grad_norm": 3.50583677390844, "learning_rate": 6.239168209129832e-06, "loss": 0.7151, "step": 5389 }, { "epoch": 0.44, "grad_norm": 2.352355412406717, "learning_rate": 6.237893847414188e-06, "loss": 0.6377, "step": 5390 }, { "epoch": 0.44, "grad_norm": 5.8153858448204785, "learning_rate": 6.2366194000284965e-06, "loss": 0.7262, "step": 5391 }, { "epoch": 0.44, "grad_norm": 3.3662347283669414, "learning_rate": 6.235344867060956e-06, "loss": 0.6757, "step": 5392 }, { "epoch": 0.44, "grad_norm": 3.6684975439693503, "learning_rate": 6.234070248599774e-06, "loss": 0.6573, "step": 5393 }, { "epoch": 0.44, "grad_norm": 4.02243878934282, "learning_rate": 6.232795544733158e-06, "loss": 0.6166, "step": 5394 }, { "epoch": 0.44, "grad_norm": 6.544225511912298, "learning_rate": 6.231520755549329e-06, "loss": 0.6406, "step": 5395 }, { "epoch": 0.44, "grad_norm": 2.9271099030260572, "learning_rate": 6.230245881136509e-06, "loss": 0.7559, "step": 5396 }, { "epoch": 0.44, "grad_norm": 3.6019111498678913, "learning_rate": 6.228970921582927e-06, "loss": 0.7281, "step": 5397 }, { "epoch": 0.44, "grad_norm": 2.7380268650313973, "learning_rate": 6.22769587697682e-06, "loss": 0.7908, "step": 5398 }, { "epoch": 0.44, "grad_norm": 3.2135957247105775, "learning_rate": 6.226420747406429e-06, "loss": 0.7685, "step": 5399 }, { "epoch": 0.44, "grad_norm": 2.5905397053270227, "learning_rate": 6.2251455329599995e-06, "loss": 0.7252, "step": 5400 }, { "epoch": 0.44, "grad_norm": 6.756989970341897, "learning_rate": 6.223870233725784e-06, "loss": 0.6458, "step": 5401 }, { "epoch": 0.44, "grad_norm": 3.56228681885131, "learning_rate": 6.222594849792043e-06, "loss": 0.7646, "step": 5402 }, { "epoch": 0.44, "grad_norm": 3.0864387286228014, "learning_rate": 6.22131938124704e-06, "loss": 0.5313, "step": 5403 }, { "epoch": 0.44, "grad_norm": 3.9830568539294364, "learning_rate": 6.220043828179046e-06, "loss": 0.7072, "step": 5404 }, { "epoch": 0.44, "grad_norm": 4.583329975242541, "learning_rate": 6.218768190676336e-06, "loss": 0.6741, "step": 5405 }, { "epoch": 0.44, "grad_norm": 3.48718690636573, "learning_rate": 6.217492468827194e-06, "loss": 0.6883, "step": 5406 }, { "epoch": 0.44, "grad_norm": 5.319676448035411, "learning_rate": 6.216216662719907e-06, "loss": 0.7973, "step": 5407 }, { "epoch": 0.44, "grad_norm": 2.941174486928553, "learning_rate": 6.21494077244277e-06, "loss": 0.6404, "step": 5408 }, { "epoch": 0.44, "grad_norm": 2.5846925557150016, "learning_rate": 6.2136647980840815e-06, "loss": 0.6866, "step": 5409 }, { "epoch": 0.44, "grad_norm": 2.7264782286603424, "learning_rate": 6.2123887397321456e-06, "loss": 0.8159, "step": 5410 }, { "epoch": 0.44, "grad_norm": 2.5762708589251018, "learning_rate": 6.2111125974752765e-06, "loss": 0.5873, "step": 5411 }, { "epoch": 0.44, "grad_norm": 3.252771569694799, "learning_rate": 6.209836371401789e-06, "loss": 0.7169, "step": 5412 }, { "epoch": 0.44, "grad_norm": 4.950665161342302, "learning_rate": 6.208560061600008e-06, "loss": 0.64, "step": 5413 }, { "epoch": 0.44, "grad_norm": 3.0404644272425223, "learning_rate": 6.207283668158259e-06, "loss": 0.5511, "step": 5414 }, { "epoch": 0.44, "grad_norm": 6.127639734923818, "learning_rate": 6.20600719116488e-06, "loss": 0.6127, "step": 5415 }, { "epoch": 0.44, "grad_norm": 3.843356708190703, "learning_rate": 6.204730630708209e-06, "loss": 0.5182, "step": 5416 }, { "epoch": 0.44, "grad_norm": 2.6569152052562646, "learning_rate": 6.203453986876594e-06, "loss": 0.6976, "step": 5417 }, { "epoch": 0.44, "grad_norm": 3.533354670299025, "learning_rate": 6.202177259758384e-06, "loss": 0.5959, "step": 5418 }, { "epoch": 0.44, "grad_norm": 5.209694022605238, "learning_rate": 6.20090044944194e-06, "loss": 0.8021, "step": 5419 }, { "epoch": 0.44, "grad_norm": 2.7257542660698593, "learning_rate": 6.199623556015621e-06, "loss": 0.7803, "step": 5420 }, { "epoch": 0.44, "grad_norm": 2.367116253574616, "learning_rate": 6.1983465795678e-06, "loss": 0.6215, "step": 5421 }, { "epoch": 0.44, "grad_norm": 3.1830152480743785, "learning_rate": 6.19706952018685e-06, "loss": 0.7885, "step": 5422 }, { "epoch": 0.44, "grad_norm": 4.721921655102054, "learning_rate": 6.195792377961152e-06, "loss": 0.7182, "step": 5423 }, { "epoch": 0.44, "grad_norm": 2.8564867760835946, "learning_rate": 6.194515152979093e-06, "loss": 0.6179, "step": 5424 }, { "epoch": 0.44, "grad_norm": 3.16518348319037, "learning_rate": 6.193237845329063e-06, "loss": 0.576, "step": 5425 }, { "epoch": 0.44, "grad_norm": 3.5048940898998686, "learning_rate": 6.191960455099461e-06, "loss": 0.7919, "step": 5426 }, { "epoch": 0.44, "grad_norm": 2.7602764225850676, "learning_rate": 6.19068298237869e-06, "loss": 0.7971, "step": 5427 }, { "epoch": 0.44, "grad_norm": 4.49977092855266, "learning_rate": 6.189405427255158e-06, "loss": 0.7714, "step": 5428 }, { "epoch": 0.44, "grad_norm": 2.465674191101762, "learning_rate": 6.188127789817284e-06, "loss": 0.8631, "step": 5429 }, { "epoch": 0.44, "grad_norm": 3.534092416065252, "learning_rate": 6.186850070153484e-06, "loss": 0.7703, "step": 5430 }, { "epoch": 0.44, "grad_norm": 5.954974833418722, "learning_rate": 6.1855722683521865e-06, "loss": 0.7837, "step": 5431 }, { "epoch": 0.44, "grad_norm": 3.186373510846532, "learning_rate": 6.184294384501824e-06, "loss": 0.671, "step": 5432 }, { "epoch": 0.44, "grad_norm": 2.3291820264808507, "learning_rate": 6.183016418690833e-06, "loss": 0.6637, "step": 5433 }, { "epoch": 0.44, "grad_norm": 4.491703984527265, "learning_rate": 6.181738371007657e-06, "loss": 0.6005, "step": 5434 }, { "epoch": 0.44, "grad_norm": 3.4114646909893525, "learning_rate": 6.180460241540745e-06, "loss": 0.6914, "step": 5435 }, { "epoch": 0.44, "grad_norm": 8.965727036413394, "learning_rate": 6.1791820303785495e-06, "loss": 0.7147, "step": 5436 }, { "epoch": 0.44, "grad_norm": 13.2255432902925, "learning_rate": 6.177903737609535e-06, "loss": 0.6077, "step": 5437 }, { "epoch": 0.44, "grad_norm": 2.1424648876354775, "learning_rate": 6.176625363322164e-06, "loss": 0.6357, "step": 5438 }, { "epoch": 0.44, "grad_norm": 3.233952699653693, "learning_rate": 6.17534690760491e-06, "loss": 0.8118, "step": 5439 }, { "epoch": 0.44, "grad_norm": 11.860065414271707, "learning_rate": 6.17406837054625e-06, "loss": 0.7259, "step": 5440 }, { "epoch": 0.44, "grad_norm": 3.353191894431032, "learning_rate": 6.172789752234665e-06, "loss": 0.672, "step": 5441 }, { "epoch": 0.44, "grad_norm": 2.5365073095911748, "learning_rate": 6.171511052758645e-06, "loss": 0.7353, "step": 5442 }, { "epoch": 0.44, "grad_norm": 1.9867565063265187, "learning_rate": 6.170232272206683e-06, "loss": 0.6947, "step": 5443 }, { "epoch": 0.44, "grad_norm": 7.802102200103752, "learning_rate": 6.16895341066728e-06, "loss": 0.7656, "step": 5444 }, { "epoch": 0.44, "grad_norm": 2.4924829618151336, "learning_rate": 6.1676744682289415e-06, "loss": 0.7499, "step": 5445 }, { "epoch": 0.44, "grad_norm": 2.3468149800417977, "learning_rate": 6.1663954449801755e-06, "loss": 0.663, "step": 5446 }, { "epoch": 0.44, "grad_norm": 3.0585915895021416, "learning_rate": 6.165116341009501e-06, "loss": 0.7797, "step": 5447 }, { "epoch": 0.44, "grad_norm": 4.057064431661013, "learning_rate": 6.1638371564054415e-06, "loss": 0.8421, "step": 5448 }, { "epoch": 0.44, "grad_norm": 3.9499814769573316, "learning_rate": 6.162557891256521e-06, "loss": 0.5301, "step": 5449 }, { "epoch": 0.44, "grad_norm": 5.417813111914994, "learning_rate": 6.1612785456512745e-06, "loss": 0.8148, "step": 5450 }, { "epoch": 0.44, "grad_norm": 2.541330293301017, "learning_rate": 6.159999119678241e-06, "loss": 0.7168, "step": 5451 }, { "epoch": 0.44, "grad_norm": 6.457983100996122, "learning_rate": 6.158719613425964e-06, "loss": 0.7683, "step": 5452 }, { "epoch": 0.44, "grad_norm": 3.0991812769342886, "learning_rate": 6.1574400269829934e-06, "loss": 0.6132, "step": 5453 }, { "epoch": 0.44, "grad_norm": 3.3400809026059073, "learning_rate": 6.156160360437885e-06, "loss": 0.6904, "step": 5454 }, { "epoch": 0.44, "grad_norm": 2.3273916895260562, "learning_rate": 6.154880613879202e-06, "loss": 0.8502, "step": 5455 }, { "epoch": 0.44, "grad_norm": 2.507437687895256, "learning_rate": 6.153600787395506e-06, "loss": 0.5769, "step": 5456 }, { "epoch": 0.44, "grad_norm": 2.080519541895471, "learning_rate": 6.152320881075374e-06, "loss": 0.7646, "step": 5457 }, { "epoch": 0.44, "grad_norm": 2.3478652515986926, "learning_rate": 6.151040895007382e-06, "loss": 0.7436, "step": 5458 }, { "epoch": 0.44, "grad_norm": 2.4164658315290923, "learning_rate": 6.1497608292801105e-06, "loss": 0.6299, "step": 5459 }, { "epoch": 0.44, "grad_norm": 5.725614000961182, "learning_rate": 6.14848068398215e-06, "loss": 0.7445, "step": 5460 }, { "epoch": 0.44, "grad_norm": 2.1824596819168454, "learning_rate": 6.147200459202095e-06, "loss": 0.707, "step": 5461 }, { "epoch": 0.44, "grad_norm": 5.207859413440403, "learning_rate": 6.145920155028546e-06, "loss": 0.762, "step": 5462 }, { "epoch": 0.44, "grad_norm": 3.7187974262118453, "learning_rate": 6.144639771550106e-06, "loss": 0.7088, "step": 5463 }, { "epoch": 0.44, "grad_norm": 3.1082905453030985, "learning_rate": 6.143359308855388e-06, "loss": 0.7101, "step": 5464 }, { "epoch": 0.44, "grad_norm": 3.871269266122093, "learning_rate": 6.142078767033006e-06, "loss": 0.5926, "step": 5465 }, { "epoch": 0.44, "grad_norm": 2.6660709616399156, "learning_rate": 6.140798146171581e-06, "loss": 0.6339, "step": 5466 }, { "epoch": 0.44, "grad_norm": 3.0958431901142163, "learning_rate": 6.139517446359742e-06, "loss": 0.6837, "step": 5467 }, { "epoch": 0.44, "grad_norm": 2.9837431784955566, "learning_rate": 6.138236667686121e-06, "loss": 0.7579, "step": 5468 }, { "epoch": 0.44, "grad_norm": 3.2499933360219453, "learning_rate": 6.136955810239356e-06, "loss": 0.6534, "step": 5469 }, { "epoch": 0.44, "grad_norm": 2.8064188398389214, "learning_rate": 6.135674874108089e-06, "loss": 0.7902, "step": 5470 }, { "epoch": 0.44, "grad_norm": 3.2751762399093107, "learning_rate": 6.134393859380969e-06, "loss": 0.6461, "step": 5471 }, { "epoch": 0.44, "grad_norm": 3.5868773927172124, "learning_rate": 6.1331127661466525e-06, "loss": 0.8174, "step": 5472 }, { "epoch": 0.44, "grad_norm": 3.8423729790280716, "learning_rate": 6.1318315944937985e-06, "loss": 0.7063, "step": 5473 }, { "epoch": 0.44, "grad_norm": 3.513132486641011, "learning_rate": 6.130550344511071e-06, "loss": 0.7053, "step": 5474 }, { "epoch": 0.44, "grad_norm": 4.097479482107203, "learning_rate": 6.129269016287142e-06, "loss": 0.6955, "step": 5475 }, { "epoch": 0.44, "grad_norm": 4.1922935058248285, "learning_rate": 6.127987609910685e-06, "loss": 0.6594, "step": 5476 }, { "epoch": 0.44, "grad_norm": 2.8918171031402364, "learning_rate": 6.126706125470383e-06, "loss": 0.7509, "step": 5477 }, { "epoch": 0.44, "grad_norm": 3.1915852462643763, "learning_rate": 6.125424563054925e-06, "loss": 0.6067, "step": 5478 }, { "epoch": 0.45, "grad_norm": 3.696561461594429, "learning_rate": 6.124142922752998e-06, "loss": 0.8133, "step": 5479 }, { "epoch": 0.45, "grad_norm": 13.331292131497543, "learning_rate": 6.122861204653304e-06, "loss": 0.7111, "step": 5480 }, { "epoch": 0.45, "grad_norm": 2.9090074060075053, "learning_rate": 6.121579408844546e-06, "loss": 0.6356, "step": 5481 }, { "epoch": 0.45, "grad_norm": 3.5576086257561386, "learning_rate": 6.1202975354154296e-06, "loss": 0.5959, "step": 5482 }, { "epoch": 0.45, "grad_norm": 2.524695286913268, "learning_rate": 6.1190155844546695e-06, "loss": 0.6309, "step": 5483 }, { "epoch": 0.45, "grad_norm": 3.5432535038546273, "learning_rate": 6.117733556050985e-06, "loss": 0.8023, "step": 5484 }, { "epoch": 0.45, "grad_norm": 3.185092093595493, "learning_rate": 6.1164514502931e-06, "loss": 0.7325, "step": 5485 }, { "epoch": 0.45, "grad_norm": 5.288818852742843, "learning_rate": 6.115169267269746e-06, "loss": 0.7447, "step": 5486 }, { "epoch": 0.45, "grad_norm": 3.504440840954236, "learning_rate": 6.113887007069657e-06, "loss": 0.6257, "step": 5487 }, { "epoch": 0.45, "grad_norm": 2.7656473141883997, "learning_rate": 6.112604669781572e-06, "loss": 0.6709, "step": 5488 }, { "epoch": 0.45, "grad_norm": 3.028114962404329, "learning_rate": 6.1113222554942405e-06, "loss": 0.6002, "step": 5489 }, { "epoch": 0.45, "grad_norm": 9.162837392962425, "learning_rate": 6.1100397642964105e-06, "loss": 0.6283, "step": 5490 }, { "epoch": 0.45, "grad_norm": 3.468672856032037, "learning_rate": 6.108757196276839e-06, "loss": 0.6276, "step": 5491 }, { "epoch": 0.45, "grad_norm": 2.943584562463514, "learning_rate": 6.107474551524288e-06, "loss": 0.6981, "step": 5492 }, { "epoch": 0.45, "grad_norm": 2.423080842383334, "learning_rate": 6.106191830127526e-06, "loss": 0.5114, "step": 5493 }, { "epoch": 0.45, "grad_norm": 11.155287187583347, "learning_rate": 6.104909032175323e-06, "loss": 0.7022, "step": 5494 }, { "epoch": 0.45, "grad_norm": 3.8154891910531292, "learning_rate": 6.103626157756459e-06, "loss": 0.7656, "step": 5495 }, { "epoch": 0.45, "grad_norm": 8.136020163417781, "learning_rate": 6.102343206959714e-06, "loss": 0.8002, "step": 5496 }, { "epoch": 0.45, "grad_norm": 2.3983422652249082, "learning_rate": 6.101060179873881e-06, "loss": 0.6919, "step": 5497 }, { "epoch": 0.45, "grad_norm": 2.9597901697306592, "learning_rate": 6.099777076587749e-06, "loss": 0.7012, "step": 5498 }, { "epoch": 0.45, "grad_norm": 3.6252602399527274, "learning_rate": 6.098493897190119e-06, "loss": 0.7057, "step": 5499 }, { "epoch": 0.45, "grad_norm": 3.3362153013550264, "learning_rate": 6.097210641769794e-06, "loss": 0.8149, "step": 5500 }, { "epoch": 0.45, "grad_norm": 2.541577381763081, "learning_rate": 6.095927310415584e-06, "loss": 0.6942, "step": 5501 }, { "epoch": 0.45, "grad_norm": 5.712320679657128, "learning_rate": 6.094643903216304e-06, "loss": 0.531, "step": 5502 }, { "epoch": 0.45, "grad_norm": 5.377621185877719, "learning_rate": 6.0933604202607735e-06, "loss": 0.685, "step": 5503 }, { "epoch": 0.45, "grad_norm": 2.7563797054336563, "learning_rate": 6.092076861637817e-06, "loss": 0.6915, "step": 5504 }, { "epoch": 0.45, "grad_norm": 3.2962370508527163, "learning_rate": 6.0907932274362655e-06, "loss": 0.6769, "step": 5505 }, { "epoch": 0.45, "grad_norm": 3.053444836208826, "learning_rate": 6.089509517744956e-06, "loss": 0.6877, "step": 5506 }, { "epoch": 0.45, "grad_norm": 3.944120604345821, "learning_rate": 6.088225732652726e-06, "loss": 0.7667, "step": 5507 }, { "epoch": 0.45, "grad_norm": 2.98845035372142, "learning_rate": 6.086941872248424e-06, "loss": 0.6201, "step": 5508 }, { "epoch": 0.45, "grad_norm": 2.58968636755926, "learning_rate": 6.0856579366209005e-06, "loss": 0.57, "step": 5509 }, { "epoch": 0.45, "grad_norm": 3.5525262240433158, "learning_rate": 6.084373925859011e-06, "loss": 0.5896, "step": 5510 }, { "epoch": 0.45, "grad_norm": 3.5940031361575833, "learning_rate": 6.083089840051619e-06, "loss": 0.7888, "step": 5511 }, { "epoch": 0.45, "grad_norm": 6.000042053568261, "learning_rate": 6.0818056792875905e-06, "loss": 0.6076, "step": 5512 }, { "epoch": 0.45, "grad_norm": 3.4356303213025208, "learning_rate": 6.080521443655797e-06, "loss": 0.7205, "step": 5513 }, { "epoch": 0.45, "grad_norm": 3.0369844295951616, "learning_rate": 6.079237133245115e-06, "loss": 0.5872, "step": 5514 }, { "epoch": 0.45, "grad_norm": 3.5597237644164257, "learning_rate": 6.07795274814443e-06, "loss": 0.5408, "step": 5515 }, { "epoch": 0.45, "grad_norm": 2.114816642177686, "learning_rate": 6.076668288442626e-06, "loss": 0.7376, "step": 5516 }, { "epoch": 0.45, "grad_norm": 3.4330311341119404, "learning_rate": 6.075383754228598e-06, "loss": 0.6608, "step": 5517 }, { "epoch": 0.45, "grad_norm": 10.80120312623891, "learning_rate": 6.074099145591242e-06, "loss": 0.5894, "step": 5518 }, { "epoch": 0.45, "grad_norm": 2.7215246870568555, "learning_rate": 6.072814462619463e-06, "loss": 0.7244, "step": 5519 }, { "epoch": 0.45, "grad_norm": 8.32048826020818, "learning_rate": 6.071529705402167e-06, "loss": 0.7393, "step": 5520 }, { "epoch": 0.45, "grad_norm": 3.649122727356196, "learning_rate": 6.0702448740282704e-06, "loss": 0.7106, "step": 5521 }, { "epoch": 0.45, "grad_norm": 2.9896427477115948, "learning_rate": 6.068959968586689e-06, "loss": 0.641, "step": 5522 }, { "epoch": 0.45, "grad_norm": 3.9209350591946897, "learning_rate": 6.0676749891663464e-06, "loss": 0.4698, "step": 5523 }, { "epoch": 0.45, "grad_norm": 2.379236859570022, "learning_rate": 6.066389935856172e-06, "loss": 0.8414, "step": 5524 }, { "epoch": 0.45, "grad_norm": 3.642893200893073, "learning_rate": 6.0651048087451e-06, "loss": 0.7144, "step": 5525 }, { "epoch": 0.45, "grad_norm": 3.1959707519466494, "learning_rate": 6.063819607922068e-06, "loss": 0.6937, "step": 5526 }, { "epoch": 0.45, "grad_norm": 2.9945364211937493, "learning_rate": 6.062534333476021e-06, "loss": 0.6656, "step": 5527 }, { "epoch": 0.45, "grad_norm": 4.856604907936098, "learning_rate": 6.061248985495909e-06, "loss": 0.7815, "step": 5528 }, { "epoch": 0.45, "grad_norm": 3.6345119796498477, "learning_rate": 6.059963564070683e-06, "loss": 0.5658, "step": 5529 }, { "epoch": 0.45, "grad_norm": 3.1557347854144875, "learning_rate": 6.058678069289307e-06, "loss": 0.7401, "step": 5530 }, { "epoch": 0.45, "grad_norm": 3.634940497522204, "learning_rate": 6.057392501240741e-06, "loss": 0.7574, "step": 5531 }, { "epoch": 0.45, "grad_norm": 5.064597336396302, "learning_rate": 6.056106860013956e-06, "loss": 0.721, "step": 5532 }, { "epoch": 0.45, "grad_norm": 2.918225148022936, "learning_rate": 6.0548211456979255e-06, "loss": 0.7276, "step": 5533 }, { "epoch": 0.45, "grad_norm": 2.4741388861959557, "learning_rate": 6.053535358381632e-06, "loss": 0.5945, "step": 5534 }, { "epoch": 0.45, "grad_norm": 3.04529080845913, "learning_rate": 6.052249498154057e-06, "loss": 0.6167, "step": 5535 }, { "epoch": 0.45, "grad_norm": 4.12801735906496, "learning_rate": 6.050963565104191e-06, "loss": 0.6862, "step": 5536 }, { "epoch": 0.45, "grad_norm": 4.327844939699068, "learning_rate": 6.04967755932103e-06, "loss": 0.5271, "step": 5537 }, { "epoch": 0.45, "grad_norm": 4.316307729140436, "learning_rate": 6.0483914808935715e-06, "loss": 0.8816, "step": 5538 }, { "epoch": 0.45, "grad_norm": 5.170019008761969, "learning_rate": 6.0471053299108216e-06, "loss": 0.6825, "step": 5539 }, { "epoch": 0.45, "grad_norm": 3.087785488458495, "learning_rate": 6.04581910646179e-06, "loss": 0.7986, "step": 5540 }, { "epoch": 0.45, "grad_norm": 3.2270329131187134, "learning_rate": 6.04453281063549e-06, "loss": 0.7098, "step": 5541 }, { "epoch": 0.45, "grad_norm": 3.092376325749032, "learning_rate": 6.0432464425209445e-06, "loss": 0.6118, "step": 5542 }, { "epoch": 0.45, "grad_norm": 7.453302381062141, "learning_rate": 6.041960002207174e-06, "loss": 0.7232, "step": 5543 }, { "epoch": 0.45, "grad_norm": 6.668788138829259, "learning_rate": 6.040673489783212e-06, "loss": 0.7153, "step": 5544 }, { "epoch": 0.45, "grad_norm": 4.745626269578607, "learning_rate": 6.039386905338093e-06, "loss": 0.7795, "step": 5545 }, { "epoch": 0.45, "grad_norm": 2.282937914918666, "learning_rate": 6.0381002489608554e-06, "loss": 0.744, "step": 5546 }, { "epoch": 0.45, "grad_norm": 2.1049495726609586, "learning_rate": 6.036813520740543e-06, "loss": 0.646, "step": 5547 }, { "epoch": 0.45, "grad_norm": 3.684562699653572, "learning_rate": 6.035526720766207e-06, "loss": 0.8077, "step": 5548 }, { "epoch": 0.45, "grad_norm": 5.170062740000753, "learning_rate": 6.034239849126901e-06, "loss": 0.7445, "step": 5549 }, { "epoch": 0.45, "grad_norm": 2.3936539023480927, "learning_rate": 6.032952905911686e-06, "loss": 0.6343, "step": 5550 }, { "epoch": 0.45, "grad_norm": 4.0941263605514795, "learning_rate": 6.031665891209627e-06, "loss": 0.6527, "step": 5551 }, { "epoch": 0.45, "grad_norm": 3.250914370966659, "learning_rate": 6.030378805109791e-06, "loss": 0.5753, "step": 5552 }, { "epoch": 0.45, "grad_norm": 12.738931809841883, "learning_rate": 6.029091647701254e-06, "loss": 0.6138, "step": 5553 }, { "epoch": 0.45, "grad_norm": 9.28200601067911, "learning_rate": 6.027804419073096e-06, "loss": 0.5888, "step": 5554 }, { "epoch": 0.45, "grad_norm": 6.855940234919852, "learning_rate": 6.0265171193144e-06, "loss": 0.7445, "step": 5555 }, { "epoch": 0.45, "grad_norm": 2.5646589553201324, "learning_rate": 6.025229748514256e-06, "loss": 0.6026, "step": 5556 }, { "epoch": 0.45, "grad_norm": 3.050897821941831, "learning_rate": 6.023942306761758e-06, "loss": 0.7622, "step": 5557 }, { "epoch": 0.45, "grad_norm": 2.5363723287320847, "learning_rate": 6.022654794146006e-06, "loss": 0.5502, "step": 5558 }, { "epoch": 0.45, "grad_norm": 3.4094850908888037, "learning_rate": 6.0213672107561005e-06, "loss": 0.7411, "step": 5559 }, { "epoch": 0.45, "grad_norm": 4.909537023299501, "learning_rate": 6.020079556681154e-06, "loss": 0.891, "step": 5560 }, { "epoch": 0.45, "grad_norm": 3.709870374550227, "learning_rate": 6.018791832010281e-06, "loss": 0.8141, "step": 5561 }, { "epoch": 0.45, "grad_norm": 5.678466723562747, "learning_rate": 6.017504036832598e-06, "loss": 0.7289, "step": 5562 }, { "epoch": 0.45, "grad_norm": 2.3966343157395165, "learning_rate": 6.016216171237228e-06, "loss": 0.8388, "step": 5563 }, { "epoch": 0.45, "grad_norm": 7.334902193859894, "learning_rate": 6.014928235313301e-06, "loss": 0.7347, "step": 5564 }, { "epoch": 0.45, "grad_norm": 3.076488541849444, "learning_rate": 6.013640229149948e-06, "loss": 0.7614, "step": 5565 }, { "epoch": 0.45, "grad_norm": 4.385063339589167, "learning_rate": 6.012352152836309e-06, "loss": 0.7464, "step": 5566 }, { "epoch": 0.45, "grad_norm": 2.7211592676806737, "learning_rate": 6.011064006461528e-06, "loss": 0.6434, "step": 5567 }, { "epoch": 0.45, "grad_norm": 7.345989756607234, "learning_rate": 6.009775790114751e-06, "loss": 0.5279, "step": 5568 }, { "epoch": 0.45, "grad_norm": 3.6401390064802857, "learning_rate": 6.008487503885132e-06, "loss": 0.6762, "step": 5569 }, { "epoch": 0.45, "grad_norm": 7.0361743741790095, "learning_rate": 6.0071991478618275e-06, "loss": 0.6972, "step": 5570 }, { "epoch": 0.45, "grad_norm": 3.7869377196617875, "learning_rate": 6.005910722134001e-06, "loss": 0.6637, "step": 5571 }, { "epoch": 0.45, "grad_norm": 3.3824748805857054, "learning_rate": 6.004622226790816e-06, "loss": 0.7765, "step": 5572 }, { "epoch": 0.45, "grad_norm": 3.9419536508176183, "learning_rate": 6.003333661921449e-06, "loss": 0.8397, "step": 5573 }, { "epoch": 0.45, "grad_norm": 4.581503634063919, "learning_rate": 6.002045027615076e-06, "loss": 0.6127, "step": 5574 }, { "epoch": 0.45, "grad_norm": 5.025940011311868, "learning_rate": 6.000756323960875e-06, "loss": 0.6275, "step": 5575 }, { "epoch": 0.45, "grad_norm": 5.212500305550624, "learning_rate": 5.999467551048037e-06, "loss": 0.8595, "step": 5576 }, { "epoch": 0.45, "grad_norm": 4.6083405386571, "learning_rate": 5.998178708965752e-06, "loss": 0.7639, "step": 5577 }, { "epoch": 0.45, "grad_norm": 2.523716581009918, "learning_rate": 5.996889797803214e-06, "loss": 0.6307, "step": 5578 }, { "epoch": 0.45, "grad_norm": 2.480568202181107, "learning_rate": 5.995600817649625e-06, "loss": 0.706, "step": 5579 }, { "epoch": 0.45, "grad_norm": 3.9920817575102427, "learning_rate": 5.994311768594191e-06, "loss": 0.7819, "step": 5580 }, { "epoch": 0.45, "grad_norm": 10.18474341357066, "learning_rate": 5.993022650726122e-06, "loss": 0.5788, "step": 5581 }, { "epoch": 0.45, "grad_norm": 3.670246154477628, "learning_rate": 5.9917334641346325e-06, "loss": 0.7016, "step": 5582 }, { "epoch": 0.45, "grad_norm": 3.058775787977504, "learning_rate": 5.990444208908942e-06, "loss": 0.6849, "step": 5583 }, { "epoch": 0.45, "grad_norm": 2.5166152428968305, "learning_rate": 5.989154885138279e-06, "loss": 0.6315, "step": 5584 }, { "epoch": 0.45, "grad_norm": 3.0160515905740626, "learning_rate": 5.987865492911866e-06, "loss": 0.5419, "step": 5585 }, { "epoch": 0.45, "grad_norm": 2.4451833308000075, "learning_rate": 5.986576032318943e-06, "loss": 0.7434, "step": 5586 }, { "epoch": 0.45, "grad_norm": 2.867969828637662, "learning_rate": 5.985286503448746e-06, "loss": 0.6824, "step": 5587 }, { "epoch": 0.45, "grad_norm": 7.427523669043565, "learning_rate": 5.9839969063905205e-06, "loss": 0.7957, "step": 5588 }, { "epoch": 0.45, "grad_norm": 3.163381115665876, "learning_rate": 5.982707241233511e-06, "loss": 0.6994, "step": 5589 }, { "epoch": 0.45, "grad_norm": 4.6747053778104615, "learning_rate": 5.981417508066974e-06, "loss": 0.6323, "step": 5590 }, { "epoch": 0.45, "grad_norm": 3.6449175123487665, "learning_rate": 5.980127706980165e-06, "loss": 0.6168, "step": 5591 }, { "epoch": 0.45, "grad_norm": 7.772738291633539, "learning_rate": 5.978837838062348e-06, "loss": 0.7204, "step": 5592 }, { "epoch": 0.45, "grad_norm": 4.140518459178593, "learning_rate": 5.9775479014027895e-06, "loss": 0.6567, "step": 5593 }, { "epoch": 0.45, "grad_norm": 4.528767214994769, "learning_rate": 5.976257897090761e-06, "loss": 0.7794, "step": 5594 }, { "epoch": 0.45, "grad_norm": 3.451332048831778, "learning_rate": 5.9749678252155394e-06, "loss": 0.724, "step": 5595 }, { "epoch": 0.45, "grad_norm": 4.9906710790072655, "learning_rate": 5.973677685866405e-06, "loss": 0.5742, "step": 5596 }, { "epoch": 0.45, "grad_norm": 4.202263350691895, "learning_rate": 5.9723874791326434e-06, "loss": 0.6339, "step": 5597 }, { "epoch": 0.45, "grad_norm": 2.8724974670014594, "learning_rate": 5.971097205103547e-06, "loss": 0.7919, "step": 5598 }, { "epoch": 0.45, "grad_norm": 2.5909744333726885, "learning_rate": 5.969806863868407e-06, "loss": 0.6262, "step": 5599 }, { "epoch": 0.45, "grad_norm": 5.699930880597009, "learning_rate": 5.968516455516526e-06, "loss": 0.5873, "step": 5600 }, { "epoch": 0.45, "grad_norm": 5.235105565494222, "learning_rate": 5.967225980137211e-06, "loss": 0.8451, "step": 5601 }, { "epoch": 0.45, "grad_norm": 4.22796480939009, "learning_rate": 5.9659354378197666e-06, "loss": 0.6706, "step": 5602 }, { "epoch": 0.46, "grad_norm": 2.930849867227877, "learning_rate": 5.964644828653506e-06, "loss": 0.721, "step": 5603 }, { "epoch": 0.46, "grad_norm": 3.2167317217587734, "learning_rate": 5.963354152727751e-06, "loss": 0.7929, "step": 5604 }, { "epoch": 0.46, "grad_norm": 5.41177027895188, "learning_rate": 5.962063410131823e-06, "loss": 0.7472, "step": 5605 }, { "epoch": 0.46, "grad_norm": 2.691295926289071, "learning_rate": 5.9607726009550494e-06, "loss": 0.8851, "step": 5606 }, { "epoch": 0.46, "grad_norm": 3.6409175034478127, "learning_rate": 5.959481725286761e-06, "loss": 0.8012, "step": 5607 }, { "epoch": 0.46, "grad_norm": 3.441324772598239, "learning_rate": 5.958190783216297e-06, "loss": 0.7659, "step": 5608 }, { "epoch": 0.46, "grad_norm": 3.0572127707864913, "learning_rate": 5.956899774832997e-06, "loss": 0.7427, "step": 5609 }, { "epoch": 0.46, "grad_norm": 3.9778288710614893, "learning_rate": 5.955608700226208e-06, "loss": 0.6225, "step": 5610 }, { "epoch": 0.46, "grad_norm": 3.2509851252178663, "learning_rate": 5.95431755948528e-06, "loss": 0.745, "step": 5611 }, { "epoch": 0.46, "grad_norm": 5.69934957624073, "learning_rate": 5.9530263526995665e-06, "loss": 0.7848, "step": 5612 }, { "epoch": 0.46, "grad_norm": 3.1729169445809746, "learning_rate": 5.9517350799584305e-06, "loss": 0.5803, "step": 5613 }, { "epoch": 0.46, "grad_norm": 3.543003657252106, "learning_rate": 5.950443741351234e-06, "loss": 0.585, "step": 5614 }, { "epoch": 0.46, "grad_norm": 8.161416037217528, "learning_rate": 5.949152336967345e-06, "loss": 0.8223, "step": 5615 }, { "epoch": 0.46, "grad_norm": 3.394931978827472, "learning_rate": 5.9478608668961375e-06, "loss": 0.7023, "step": 5616 }, { "epoch": 0.46, "grad_norm": 4.416962152982418, "learning_rate": 5.946569331226992e-06, "loss": 0.7282, "step": 5617 }, { "epoch": 0.46, "grad_norm": 4.0993338204237135, "learning_rate": 5.945277730049287e-06, "loss": 0.6273, "step": 5618 }, { "epoch": 0.46, "grad_norm": 5.011530479873847, "learning_rate": 5.943986063452412e-06, "loss": 0.8692, "step": 5619 }, { "epoch": 0.46, "grad_norm": 3.9213469938751015, "learning_rate": 5.942694331525758e-06, "loss": 0.6645, "step": 5620 }, { "epoch": 0.46, "grad_norm": 3.7476993046354106, "learning_rate": 5.94140253435872e-06, "loss": 0.751, "step": 5621 }, { "epoch": 0.46, "grad_norm": 4.7572062985866905, "learning_rate": 5.940110672040699e-06, "loss": 0.6993, "step": 5622 }, { "epoch": 0.46, "grad_norm": 5.4628389642146, "learning_rate": 5.938818744661099e-06, "loss": 0.6514, "step": 5623 }, { "epoch": 0.46, "grad_norm": 2.9679994983833153, "learning_rate": 5.937526752309331e-06, "loss": 0.7478, "step": 5624 }, { "epoch": 0.46, "grad_norm": 2.447392515908094, "learning_rate": 5.936234695074809e-06, "loss": 0.7964, "step": 5625 }, { "epoch": 0.46, "grad_norm": 5.188289071372854, "learning_rate": 5.934942573046953e-06, "loss": 0.7344, "step": 5626 }, { "epoch": 0.46, "grad_norm": 3.7085565278313073, "learning_rate": 5.9336503863151825e-06, "loss": 0.8268, "step": 5627 }, { "epoch": 0.46, "grad_norm": 2.173604035973248, "learning_rate": 5.932358134968925e-06, "loss": 0.5884, "step": 5628 }, { "epoch": 0.46, "grad_norm": 2.924074296830354, "learning_rate": 5.931065819097616e-06, "loss": 0.674, "step": 5629 }, { "epoch": 0.46, "grad_norm": 4.281831028199484, "learning_rate": 5.929773438790688e-06, "loss": 0.6652, "step": 5630 }, { "epoch": 0.46, "grad_norm": 3.9546505967765455, "learning_rate": 5.928480994137586e-06, "loss": 0.5957, "step": 5631 }, { "epoch": 0.46, "grad_norm": 2.8272042440677914, "learning_rate": 5.9271884852277505e-06, "loss": 0.7226, "step": 5632 }, { "epoch": 0.46, "grad_norm": 4.435080111584851, "learning_rate": 5.9258959121506345e-06, "loss": 0.6117, "step": 5633 }, { "epoch": 0.46, "grad_norm": 2.5976271870831904, "learning_rate": 5.924603274995693e-06, "loss": 0.5711, "step": 5634 }, { "epoch": 0.46, "grad_norm": 4.039696681909373, "learning_rate": 5.9233105738523835e-06, "loss": 0.8134, "step": 5635 }, { "epoch": 0.46, "grad_norm": 5.2321136845021, "learning_rate": 5.9220178088101654e-06, "loss": 0.7622, "step": 5636 }, { "epoch": 0.46, "grad_norm": 3.3330470101560548, "learning_rate": 5.920724979958512e-06, "loss": 0.6186, "step": 5637 }, { "epoch": 0.46, "grad_norm": 2.817319861523605, "learning_rate": 5.919432087386891e-06, "loss": 0.6629, "step": 5638 }, { "epoch": 0.46, "grad_norm": 2.315873781066584, "learning_rate": 5.918139131184781e-06, "loss": 0.6226, "step": 5639 }, { "epoch": 0.46, "grad_norm": 3.11057872721177, "learning_rate": 5.916846111441663e-06, "loss": 0.7952, "step": 5640 }, { "epoch": 0.46, "grad_norm": 3.121019219880057, "learning_rate": 5.915553028247021e-06, "loss": 0.5495, "step": 5641 }, { "epoch": 0.46, "grad_norm": 2.8758429756939488, "learning_rate": 5.914259881690343e-06, "loss": 0.6698, "step": 5642 }, { "epoch": 0.46, "grad_norm": 26.86074133130603, "learning_rate": 5.912966671861127e-06, "loss": 0.622, "step": 5643 }, { "epoch": 0.46, "grad_norm": 3.052048162915032, "learning_rate": 5.9116733988488676e-06, "loss": 0.7541, "step": 5644 }, { "epoch": 0.46, "grad_norm": 6.074595451940016, "learning_rate": 5.910380062743067e-06, "loss": 0.587, "step": 5645 }, { "epoch": 0.46, "grad_norm": 2.9505790955574724, "learning_rate": 5.909086663633235e-06, "loss": 0.6598, "step": 5646 }, { "epoch": 0.46, "grad_norm": 4.1289877899278355, "learning_rate": 5.9077932016088835e-06, "loss": 0.6435, "step": 5647 }, { "epoch": 0.46, "grad_norm": 3.8219784678777016, "learning_rate": 5.906499676759524e-06, "loss": 0.8832, "step": 5648 }, { "epoch": 0.46, "grad_norm": 2.6816686369529656, "learning_rate": 5.9052060891746796e-06, "loss": 0.5819, "step": 5649 }, { "epoch": 0.46, "grad_norm": 5.585264074473614, "learning_rate": 5.903912438943875e-06, "loss": 0.6244, "step": 5650 }, { "epoch": 0.46, "grad_norm": 2.2870317291391986, "learning_rate": 5.902618726156639e-06, "loss": 0.7642, "step": 5651 }, { "epoch": 0.46, "grad_norm": 2.7361087029961513, "learning_rate": 5.9013249509025016e-06, "loss": 0.6995, "step": 5652 }, { "epoch": 0.46, "grad_norm": 3.0358767993543, "learning_rate": 5.900031113271003e-06, "loss": 0.6731, "step": 5653 }, { "epoch": 0.46, "grad_norm": 6.020234028267428, "learning_rate": 5.8987372133516865e-06, "loss": 0.7503, "step": 5654 }, { "epoch": 0.46, "grad_norm": 4.05824232923475, "learning_rate": 5.897443251234093e-06, "loss": 0.7473, "step": 5655 }, { "epoch": 0.46, "grad_norm": 3.3459317319357025, "learning_rate": 5.896149227007776e-06, "loss": 0.7704, "step": 5656 }, { "epoch": 0.46, "grad_norm": 6.00936886339648, "learning_rate": 5.894855140762292e-06, "loss": 0.7694, "step": 5657 }, { "epoch": 0.46, "grad_norm": 3.557118944823089, "learning_rate": 5.893560992587196e-06, "loss": 0.56, "step": 5658 }, { "epoch": 0.46, "grad_norm": 5.122025383605861, "learning_rate": 5.892266782572053e-06, "loss": 0.7186, "step": 5659 }, { "epoch": 0.46, "grad_norm": 3.0711173904120903, "learning_rate": 5.890972510806431e-06, "loss": 0.8126, "step": 5660 }, { "epoch": 0.46, "grad_norm": 3.656009449391839, "learning_rate": 5.8896781773799015e-06, "loss": 0.7556, "step": 5661 }, { "epoch": 0.46, "grad_norm": 2.8440546554835766, "learning_rate": 5.88838378238204e-06, "loss": 0.8314, "step": 5662 }, { "epoch": 0.46, "grad_norm": 3.496472087959339, "learning_rate": 5.8870893259024264e-06, "loss": 0.5987, "step": 5663 }, { "epoch": 0.46, "grad_norm": 3.2173919049233346, "learning_rate": 5.885794808030647e-06, "loss": 0.6539, "step": 5664 }, { "epoch": 0.46, "grad_norm": 4.140234613234118, "learning_rate": 5.884500228856289e-06, "loss": 0.6819, "step": 5665 }, { "epoch": 0.46, "grad_norm": 3.3337652081867493, "learning_rate": 5.8832055884689465e-06, "loss": 0.6254, "step": 5666 }, { "epoch": 0.46, "grad_norm": 3.3628557674858146, "learning_rate": 5.881910886958214e-06, "loss": 0.4852, "step": 5667 }, { "epoch": 0.46, "grad_norm": 8.230373401705116, "learning_rate": 5.880616124413698e-06, "loss": 0.7346, "step": 5668 }, { "epoch": 0.46, "grad_norm": 3.9566658954950733, "learning_rate": 5.879321300924999e-06, "loss": 0.7342, "step": 5669 }, { "epoch": 0.46, "grad_norm": 3.6476031315904187, "learning_rate": 5.87802641658173e-06, "loss": 0.5692, "step": 5670 }, { "epoch": 0.46, "grad_norm": 5.064370883783466, "learning_rate": 5.876731471473506e-06, "loss": 0.7567, "step": 5671 }, { "epoch": 0.46, "grad_norm": 4.04424153330966, "learning_rate": 5.875436465689942e-06, "loss": 0.7693, "step": 5672 }, { "epoch": 0.46, "grad_norm": 6.36466460642214, "learning_rate": 5.874141399320662e-06, "loss": 0.6407, "step": 5673 }, { "epoch": 0.46, "grad_norm": 6.002740697211344, "learning_rate": 5.872846272455295e-06, "loss": 0.7776, "step": 5674 }, { "epoch": 0.46, "grad_norm": 3.4118763626272006, "learning_rate": 5.87155108518347e-06, "loss": 0.6891, "step": 5675 }, { "epoch": 0.46, "grad_norm": 3.728885451793463, "learning_rate": 5.8702558375948206e-06, "loss": 0.6166, "step": 5676 }, { "epoch": 0.46, "grad_norm": 2.1139439994434004, "learning_rate": 5.868960529778989e-06, "loss": 0.6329, "step": 5677 }, { "epoch": 0.46, "grad_norm": 3.940787110196132, "learning_rate": 5.8676651618256165e-06, "loss": 0.6884, "step": 5678 }, { "epoch": 0.46, "grad_norm": 2.9407849870387257, "learning_rate": 5.866369733824351e-06, "loss": 0.6168, "step": 5679 }, { "epoch": 0.46, "grad_norm": 3.5798745387504787, "learning_rate": 5.865074245864846e-06, "loss": 0.7867, "step": 5680 }, { "epoch": 0.46, "grad_norm": 11.785772420822497, "learning_rate": 5.863778698036755e-06, "loss": 0.6997, "step": 5681 }, { "epoch": 0.46, "grad_norm": 3.574881027180635, "learning_rate": 5.862483090429739e-06, "loss": 0.613, "step": 5682 }, { "epoch": 0.46, "grad_norm": 4.12846554216105, "learning_rate": 5.861187423133464e-06, "loss": 0.6573, "step": 5683 }, { "epoch": 0.46, "grad_norm": 4.213663798503768, "learning_rate": 5.859891696237597e-06, "loss": 0.739, "step": 5684 }, { "epoch": 0.46, "grad_norm": 3.7719208030161155, "learning_rate": 5.8585959098318105e-06, "loss": 0.6568, "step": 5685 }, { "epoch": 0.46, "grad_norm": 2.794899269878294, "learning_rate": 5.8573000640057785e-06, "loss": 0.5064, "step": 5686 }, { "epoch": 0.46, "grad_norm": 3.7338054153210294, "learning_rate": 5.8560041588491865e-06, "loss": 0.7223, "step": 5687 }, { "epoch": 0.46, "grad_norm": 3.698582594782864, "learning_rate": 5.854708194451716e-06, "loss": 0.5468, "step": 5688 }, { "epoch": 0.46, "grad_norm": 3.3607318881064305, "learning_rate": 5.853412170903055e-06, "loss": 0.6869, "step": 5689 }, { "epoch": 0.46, "grad_norm": 3.8200897134649816, "learning_rate": 5.852116088292901e-06, "loss": 0.8712, "step": 5690 }, { "epoch": 0.46, "grad_norm": 5.952389878530989, "learning_rate": 5.850819946710949e-06, "loss": 0.6798, "step": 5691 }, { "epoch": 0.46, "grad_norm": 3.712492403919604, "learning_rate": 5.8495237462468966e-06, "loss": 0.6828, "step": 5692 }, { "epoch": 0.46, "grad_norm": 4.202882236676595, "learning_rate": 5.848227486990452e-06, "loss": 0.5487, "step": 5693 }, { "epoch": 0.46, "grad_norm": 3.8337395248735526, "learning_rate": 5.846931169031327e-06, "loss": 0.5792, "step": 5694 }, { "epoch": 0.46, "grad_norm": 10.655087579167843, "learning_rate": 5.8456347924592295e-06, "loss": 0.7377, "step": 5695 }, { "epoch": 0.46, "grad_norm": 3.8491552200290187, "learning_rate": 5.844338357363881e-06, "loss": 0.7035, "step": 5696 }, { "epoch": 0.46, "grad_norm": 2.586370026034477, "learning_rate": 5.843041863835003e-06, "loss": 0.6127, "step": 5697 }, { "epoch": 0.46, "grad_norm": 5.799013215808532, "learning_rate": 5.8417453119623176e-06, "loss": 0.7516, "step": 5698 }, { "epoch": 0.46, "grad_norm": 2.1161423026699886, "learning_rate": 5.840448701835559e-06, "loss": 0.5723, "step": 5699 }, { "epoch": 0.46, "grad_norm": 3.5375681381626314, "learning_rate": 5.839152033544457e-06, "loss": 0.7619, "step": 5700 }, { "epoch": 0.46, "grad_norm": 3.86569600862823, "learning_rate": 5.8378553071787504e-06, "loss": 0.6268, "step": 5701 }, { "epoch": 0.46, "grad_norm": 3.522907617552013, "learning_rate": 5.836558522828181e-06, "loss": 0.7016, "step": 5702 }, { "epoch": 0.46, "grad_norm": 3.16770843781035, "learning_rate": 5.835261680582493e-06, "loss": 0.7634, "step": 5703 }, { "epoch": 0.46, "grad_norm": 39.16838223939189, "learning_rate": 5.8339647805314404e-06, "loss": 0.5516, "step": 5704 }, { "epoch": 0.46, "grad_norm": 5.386697719590322, "learning_rate": 5.832667822764771e-06, "loss": 0.6797, "step": 5705 }, { "epoch": 0.46, "grad_norm": 5.822638438817317, "learning_rate": 5.8313708073722475e-06, "loss": 0.6236, "step": 5706 }, { "epoch": 0.46, "grad_norm": 3.4141997894462914, "learning_rate": 5.8300737344436285e-06, "loss": 0.739, "step": 5707 }, { "epoch": 0.46, "grad_norm": 6.134334423886971, "learning_rate": 5.828776604068682e-06, "loss": 0.8001, "step": 5708 }, { "epoch": 0.46, "grad_norm": 2.88565880958368, "learning_rate": 5.827479416337174e-06, "loss": 0.5722, "step": 5709 }, { "epoch": 0.46, "grad_norm": 3.3064773259282565, "learning_rate": 5.826182171338882e-06, "loss": 0.6725, "step": 5710 }, { "epoch": 0.46, "grad_norm": 2.962434387159482, "learning_rate": 5.824884869163581e-06, "loss": 0.7108, "step": 5711 }, { "epoch": 0.46, "grad_norm": 2.504568175047801, "learning_rate": 5.8235875099010516e-06, "loss": 0.6987, "step": 5712 }, { "epoch": 0.46, "grad_norm": 8.532626738523177, "learning_rate": 5.822290093641081e-06, "loss": 0.6987, "step": 5713 }, { "epoch": 0.46, "grad_norm": 5.563573249454162, "learning_rate": 5.82099262047346e-06, "loss": 0.7139, "step": 5714 }, { "epoch": 0.46, "grad_norm": 3.2633249747597834, "learning_rate": 5.81969509048798e-06, "loss": 0.7311, "step": 5715 }, { "epoch": 0.46, "grad_norm": 3.1130594721384233, "learning_rate": 5.818397503774438e-06, "loss": 0.5648, "step": 5716 }, { "epoch": 0.46, "grad_norm": 5.200537756627442, "learning_rate": 5.817099860422637e-06, "loss": 0.6039, "step": 5717 }, { "epoch": 0.46, "grad_norm": 3.575690563457655, "learning_rate": 5.815802160522379e-06, "loss": 0.6895, "step": 5718 }, { "epoch": 0.46, "grad_norm": 5.191206926447397, "learning_rate": 5.814504404163474e-06, "loss": 0.7822, "step": 5719 }, { "epoch": 0.46, "grad_norm": 3.843853172438234, "learning_rate": 5.813206591435739e-06, "loss": 0.65, "step": 5720 }, { "epoch": 0.46, "grad_norm": 3.7326265425687346, "learning_rate": 5.8119087224289835e-06, "loss": 0.5419, "step": 5721 }, { "epoch": 0.46, "grad_norm": 4.738835725464466, "learning_rate": 5.810610797233034e-06, "loss": 0.6809, "step": 5722 }, { "epoch": 0.46, "grad_norm": 3.6040389488624194, "learning_rate": 5.809312815937715e-06, "loss": 0.6499, "step": 5723 }, { "epoch": 0.46, "grad_norm": 2.5317128008661065, "learning_rate": 5.808014778632852e-06, "loss": 0.7133, "step": 5724 }, { "epoch": 0.46, "grad_norm": 2.715819480135261, "learning_rate": 5.806716685408278e-06, "loss": 0.7213, "step": 5725 }, { "epoch": 0.47, "grad_norm": 5.089468218101545, "learning_rate": 5.805418536353829e-06, "loss": 0.7569, "step": 5726 }, { "epoch": 0.47, "grad_norm": 3.2017915414262146, "learning_rate": 5.804120331559349e-06, "loss": 0.7109, "step": 5727 }, { "epoch": 0.47, "grad_norm": 5.749780916241432, "learning_rate": 5.802822071114676e-06, "loss": 0.5977, "step": 5728 }, { "epoch": 0.47, "grad_norm": 3.0991520659980245, "learning_rate": 5.801523755109661e-06, "loss": 0.762, "step": 5729 }, { "epoch": 0.47, "grad_norm": 2.886788888167605, "learning_rate": 5.8002253836341586e-06, "loss": 0.5904, "step": 5730 }, { "epoch": 0.47, "grad_norm": 5.08278486768797, "learning_rate": 5.798926956778017e-06, "loss": 0.6509, "step": 5731 }, { "epoch": 0.47, "grad_norm": 2.667785174468762, "learning_rate": 5.797628474631102e-06, "loss": 0.622, "step": 5732 }, { "epoch": 0.47, "grad_norm": 2.427148721908196, "learning_rate": 5.796329937283274e-06, "loss": 0.6385, "step": 5733 }, { "epoch": 0.47, "grad_norm": 4.44670655917923, "learning_rate": 5.795031344824399e-06, "loss": 0.7065, "step": 5734 }, { "epoch": 0.47, "grad_norm": 3.044007728134551, "learning_rate": 5.79373269734435e-06, "loss": 0.6678, "step": 5735 }, { "epoch": 0.47, "grad_norm": 4.636121910766767, "learning_rate": 5.792433994932999e-06, "loss": 0.7862, "step": 5736 }, { "epoch": 0.47, "grad_norm": 3.541543341081417, "learning_rate": 5.791135237680228e-06, "loss": 0.6495, "step": 5737 }, { "epoch": 0.47, "grad_norm": 4.0246541274018, "learning_rate": 5.7898364256759165e-06, "loss": 0.6219, "step": 5738 }, { "epoch": 0.47, "grad_norm": 3.1289732186735466, "learning_rate": 5.788537559009951e-06, "loss": 0.7909, "step": 5739 }, { "epoch": 0.47, "grad_norm": 5.813955663416041, "learning_rate": 5.787238637772223e-06, "loss": 0.6082, "step": 5740 }, { "epoch": 0.47, "grad_norm": 3.2116854166710462, "learning_rate": 5.785939662052622e-06, "loss": 0.7723, "step": 5741 }, { "epoch": 0.47, "grad_norm": 2.6618735405259573, "learning_rate": 5.784640631941048e-06, "loss": 0.7024, "step": 5742 }, { "epoch": 0.47, "grad_norm": 4.083561659330691, "learning_rate": 5.783341547527403e-06, "loss": 0.6005, "step": 5743 }, { "epoch": 0.47, "grad_norm": 2.421357905123556, "learning_rate": 5.782042408901589e-06, "loss": 0.7978, "step": 5744 }, { "epoch": 0.47, "grad_norm": 2.8161846841238853, "learning_rate": 5.780743216153516e-06, "loss": 0.6775, "step": 5745 }, { "epoch": 0.47, "grad_norm": 3.904692854314484, "learning_rate": 5.7794439693730975e-06, "loss": 0.7986, "step": 5746 }, { "epoch": 0.47, "grad_norm": 3.5547694305525224, "learning_rate": 5.778144668650248e-06, "loss": 0.5782, "step": 5747 }, { "epoch": 0.47, "grad_norm": 3.9329544545123323, "learning_rate": 5.776845314074889e-06, "loss": 0.6605, "step": 5748 }, { "epoch": 0.47, "grad_norm": 3.216768053365182, "learning_rate": 5.775545905736942e-06, "loss": 0.8415, "step": 5749 }, { "epoch": 0.47, "grad_norm": 8.504597650338404, "learning_rate": 5.774246443726336e-06, "loss": 0.5837, "step": 5750 }, { "epoch": 0.47, "grad_norm": 3.5881969988720246, "learning_rate": 5.772946928133e-06, "loss": 0.7973, "step": 5751 }, { "epoch": 0.47, "grad_norm": 3.6856821575615926, "learning_rate": 5.771647359046869e-06, "loss": 0.5745, "step": 5752 }, { "epoch": 0.47, "grad_norm": 9.574906428763217, "learning_rate": 5.770347736557884e-06, "loss": 0.4776, "step": 5753 }, { "epoch": 0.47, "grad_norm": 3.7102892250448543, "learning_rate": 5.769048060755984e-06, "loss": 0.6283, "step": 5754 }, { "epoch": 0.47, "grad_norm": 3.7308436260304365, "learning_rate": 5.7677483317311164e-06, "loss": 0.6557, "step": 5755 }, { "epoch": 0.47, "grad_norm": 6.574500988473744, "learning_rate": 5.766448549573229e-06, "loss": 0.699, "step": 5756 }, { "epoch": 0.47, "grad_norm": 5.118415093138679, "learning_rate": 5.765148714372277e-06, "loss": 0.6519, "step": 5757 }, { "epoch": 0.47, "grad_norm": 2.2409897826042755, "learning_rate": 5.7638488262182165e-06, "loss": 0.5972, "step": 5758 }, { "epoch": 0.47, "grad_norm": 3.6884911976600425, "learning_rate": 5.762548885201007e-06, "loss": 0.6959, "step": 5759 }, { "epoch": 0.47, "grad_norm": 5.139632734738237, "learning_rate": 5.761248891410613e-06, "loss": 0.8107, "step": 5760 }, { "epoch": 0.47, "grad_norm": 3.025660807293486, "learning_rate": 5.7599488449370025e-06, "loss": 0.5237, "step": 5761 }, { "epoch": 0.47, "grad_norm": 3.093683102846561, "learning_rate": 5.758648745870147e-06, "loss": 0.5644, "step": 5762 }, { "epoch": 0.47, "grad_norm": 6.919776825753386, "learning_rate": 5.757348594300021e-06, "loss": 0.6525, "step": 5763 }, { "epoch": 0.47, "grad_norm": 3.777505815189163, "learning_rate": 5.7560483903166065e-06, "loss": 0.66, "step": 5764 }, { "epoch": 0.47, "grad_norm": 2.3929340656365445, "learning_rate": 5.75474813400988e-06, "loss": 0.7158, "step": 5765 }, { "epoch": 0.47, "grad_norm": 4.137709474719427, "learning_rate": 5.75344782546983e-06, "loss": 0.7297, "step": 5766 }, { "epoch": 0.47, "grad_norm": 3.3866960714223273, "learning_rate": 5.752147464786449e-06, "loss": 0.579, "step": 5767 }, { "epoch": 0.47, "grad_norm": 3.131827846944271, "learning_rate": 5.750847052049725e-06, "loss": 0.5822, "step": 5768 }, { "epoch": 0.47, "grad_norm": 2.9317651863324223, "learning_rate": 5.749546587349657e-06, "loss": 0.7502, "step": 5769 }, { "epoch": 0.47, "grad_norm": 3.092554287942367, "learning_rate": 5.748246070776248e-06, "loss": 0.7726, "step": 5770 }, { "epoch": 0.47, "grad_norm": 2.758893373931066, "learning_rate": 5.746945502419497e-06, "loss": 0.7071, "step": 5771 }, { "epoch": 0.47, "grad_norm": 3.0261085238691052, "learning_rate": 5.745644882369417e-06, "loss": 0.8018, "step": 5772 }, { "epoch": 0.47, "grad_norm": 4.276685473006278, "learning_rate": 5.744344210716015e-06, "loss": 0.8957, "step": 5773 }, { "epoch": 0.47, "grad_norm": 5.507044419375576, "learning_rate": 5.743043487549306e-06, "loss": 0.7051, "step": 5774 }, { "epoch": 0.47, "grad_norm": 3.9443991174452244, "learning_rate": 5.741742712959308e-06, "loss": 0.7576, "step": 5775 }, { "epoch": 0.47, "grad_norm": 2.8144654746747593, "learning_rate": 5.740441887036046e-06, "loss": 0.7318, "step": 5776 }, { "epoch": 0.47, "grad_norm": 4.22064613606551, "learning_rate": 5.7391410098695435e-06, "loss": 0.7537, "step": 5777 }, { "epoch": 0.47, "grad_norm": 4.186332948345009, "learning_rate": 5.737840081549827e-06, "loss": 0.7192, "step": 5778 }, { "epoch": 0.47, "grad_norm": 5.5055057345079526, "learning_rate": 5.736539102166934e-06, "loss": 0.6411, "step": 5779 }, { "epoch": 0.47, "grad_norm": 3.101918992158709, "learning_rate": 5.7352380718108954e-06, "loss": 0.7521, "step": 5780 }, { "epoch": 0.47, "grad_norm": 4.845211240583912, "learning_rate": 5.733936990571752e-06, "loss": 0.57, "step": 5781 }, { "epoch": 0.47, "grad_norm": 2.202408629073388, "learning_rate": 5.732635858539549e-06, "loss": 0.5978, "step": 5782 }, { "epoch": 0.47, "grad_norm": 3.586983445724865, "learning_rate": 5.731334675804332e-06, "loss": 0.7098, "step": 5783 }, { "epoch": 0.47, "grad_norm": 3.6435676348008093, "learning_rate": 5.730033442456149e-06, "loss": 0.6209, "step": 5784 }, { "epoch": 0.47, "grad_norm": 52.095506540077075, "learning_rate": 5.728732158585056e-06, "loss": 0.5997, "step": 5785 }, { "epoch": 0.47, "grad_norm": 2.6864175949424713, "learning_rate": 5.7274308242811095e-06, "loss": 0.8797, "step": 5786 }, { "epoch": 0.47, "grad_norm": 5.523806327719223, "learning_rate": 5.726129439634369e-06, "loss": 0.7574, "step": 5787 }, { "epoch": 0.47, "grad_norm": 2.3292710743903715, "learning_rate": 5.7248280047348995e-06, "loss": 0.7031, "step": 5788 }, { "epoch": 0.47, "grad_norm": 3.7397484215912056, "learning_rate": 5.7235265196727674e-06, "loss": 0.7593, "step": 5789 }, { "epoch": 0.47, "grad_norm": 3.1115980186534045, "learning_rate": 5.722224984538046e-06, "loss": 0.7853, "step": 5790 }, { "epoch": 0.47, "grad_norm": 12.085753514511365, "learning_rate": 5.720923399420807e-06, "loss": 0.6605, "step": 5791 }, { "epoch": 0.47, "grad_norm": 2.4791833487839816, "learning_rate": 5.7196217644111295e-06, "loss": 0.8459, "step": 5792 }, { "epoch": 0.47, "grad_norm": 4.757496764992668, "learning_rate": 5.718320079599096e-06, "loss": 0.8126, "step": 5793 }, { "epoch": 0.47, "grad_norm": 2.635265762213373, "learning_rate": 5.717018345074788e-06, "loss": 0.6242, "step": 5794 }, { "epoch": 0.47, "grad_norm": 2.981756047084814, "learning_rate": 5.715716560928297e-06, "loss": 0.6362, "step": 5795 }, { "epoch": 0.47, "grad_norm": 12.628437255206284, "learning_rate": 5.714414727249714e-06, "loss": 0.6901, "step": 5796 }, { "epoch": 0.47, "grad_norm": 2.713401817505127, "learning_rate": 5.713112844129133e-06, "loss": 0.6986, "step": 5797 }, { "epoch": 0.47, "grad_norm": 2.9097784367553396, "learning_rate": 5.7118109116566525e-06, "loss": 0.7297, "step": 5798 }, { "epoch": 0.47, "grad_norm": 2.506125793971184, "learning_rate": 5.710508929922376e-06, "loss": 0.7954, "step": 5799 }, { "epoch": 0.47, "grad_norm": 3.3120021207626227, "learning_rate": 5.709206899016407e-06, "loss": 0.7786, "step": 5800 }, { "epoch": 0.47, "grad_norm": 3.39427128004559, "learning_rate": 5.707904819028856e-06, "loss": 0.5329, "step": 5801 }, { "epoch": 0.47, "grad_norm": 9.124215767789128, "learning_rate": 5.706602690049832e-06, "loss": 0.8379, "step": 5802 }, { "epoch": 0.47, "grad_norm": 2.767818652394017, "learning_rate": 5.705300512169455e-06, "loss": 0.6424, "step": 5803 }, { "epoch": 0.47, "grad_norm": 7.092040605087711, "learning_rate": 5.703998285477842e-06, "loss": 0.6699, "step": 5804 }, { "epoch": 0.47, "grad_norm": 5.641383501817107, "learning_rate": 5.702696010065113e-06, "loss": 0.7998, "step": 5805 }, { "epoch": 0.47, "grad_norm": 3.9241281435525592, "learning_rate": 5.701393686021397e-06, "loss": 0.8518, "step": 5806 }, { "epoch": 0.47, "grad_norm": 5.942381314047857, "learning_rate": 5.70009131343682e-06, "loss": 0.6687, "step": 5807 }, { "epoch": 0.47, "grad_norm": 5.775864712214542, "learning_rate": 5.698788892401517e-06, "loss": 0.5846, "step": 5808 }, { "epoch": 0.47, "grad_norm": 2.3743417727785117, "learning_rate": 5.697486423005621e-06, "loss": 0.8248, "step": 5809 }, { "epoch": 0.47, "grad_norm": 6.809716475709886, "learning_rate": 5.696183905339277e-06, "loss": 0.6891, "step": 5810 }, { "epoch": 0.47, "grad_norm": 4.489926325140084, "learning_rate": 5.69488133949262e-06, "loss": 0.6153, "step": 5811 }, { "epoch": 0.47, "grad_norm": 3.7573023779335295, "learning_rate": 5.693578725555799e-06, "loss": 0.6711, "step": 5812 }, { "epoch": 0.47, "grad_norm": 2.2195998452821923, "learning_rate": 5.692276063618964e-06, "loss": 0.8454, "step": 5813 }, { "epoch": 0.47, "grad_norm": 2.342198622077043, "learning_rate": 5.690973353772267e-06, "loss": 0.7084, "step": 5814 }, { "epoch": 0.47, "grad_norm": 4.033431825319062, "learning_rate": 5.689670596105861e-06, "loss": 0.6825, "step": 5815 }, { "epoch": 0.47, "grad_norm": 3.8298968725001346, "learning_rate": 5.688367790709909e-06, "loss": 0.7143, "step": 5816 }, { "epoch": 0.47, "grad_norm": 3.1328692285414887, "learning_rate": 5.6870649376745714e-06, "loss": 0.7916, "step": 5817 }, { "epoch": 0.47, "grad_norm": 5.267488521284832, "learning_rate": 5.685762037090013e-06, "loss": 0.8928, "step": 5818 }, { "epoch": 0.47, "grad_norm": 2.775404435961653, "learning_rate": 5.6844590890464035e-06, "loss": 0.838, "step": 5819 }, { "epoch": 0.47, "grad_norm": 2.7889921218877536, "learning_rate": 5.683156093633917e-06, "loss": 0.5996, "step": 5820 }, { "epoch": 0.47, "grad_norm": 2.6694801130527464, "learning_rate": 5.681853050942727e-06, "loss": 0.6881, "step": 5821 }, { "epoch": 0.47, "grad_norm": 4.764167522320337, "learning_rate": 5.680549961063011e-06, "loss": 0.717, "step": 5822 }, { "epoch": 0.47, "grad_norm": 4.175036039467229, "learning_rate": 5.679246824084955e-06, "loss": 0.8186, "step": 5823 }, { "epoch": 0.47, "grad_norm": 3.8674061887972915, "learning_rate": 5.67794364009874e-06, "loss": 0.6814, "step": 5824 }, { "epoch": 0.47, "grad_norm": 2.7367872694303803, "learning_rate": 5.676640409194556e-06, "loss": 0.7518, "step": 5825 }, { "epoch": 0.47, "grad_norm": 6.3879362644659246, "learning_rate": 5.6753371314625975e-06, "loss": 0.8068, "step": 5826 }, { "epoch": 0.47, "grad_norm": 7.60863794393657, "learning_rate": 5.674033806993056e-06, "loss": 0.7635, "step": 5827 }, { "epoch": 0.47, "grad_norm": 3.0033696247012314, "learning_rate": 5.6727304358761305e-06, "loss": 0.8091, "step": 5828 }, { "epoch": 0.47, "grad_norm": 2.844480632353817, "learning_rate": 5.671427018202023e-06, "loss": 0.6503, "step": 5829 }, { "epoch": 0.47, "grad_norm": 5.593753465684358, "learning_rate": 5.6701235540609405e-06, "loss": 0.6583, "step": 5830 }, { "epoch": 0.47, "grad_norm": 2.714439462757601, "learning_rate": 5.668820043543085e-06, "loss": 0.508, "step": 5831 }, { "epoch": 0.47, "grad_norm": 5.760072604711219, "learning_rate": 5.667516486738672e-06, "loss": 0.7247, "step": 5832 }, { "epoch": 0.47, "grad_norm": 3.1566847002301386, "learning_rate": 5.666212883737917e-06, "loss": 0.6605, "step": 5833 }, { "epoch": 0.47, "grad_norm": 3.836457457320009, "learning_rate": 5.6649092346310345e-06, "loss": 0.5013, "step": 5834 }, { "epoch": 0.47, "grad_norm": 3.756194917486008, "learning_rate": 5.663605539508245e-06, "loss": 0.6346, "step": 5835 }, { "epoch": 0.47, "grad_norm": 3.902661297739872, "learning_rate": 5.662301798459777e-06, "loss": 0.5987, "step": 5836 }, { "epoch": 0.47, "grad_norm": 3.0792162104007725, "learning_rate": 5.660998011575853e-06, "loss": 0.7415, "step": 5837 }, { "epoch": 0.47, "grad_norm": 11.264493018311287, "learning_rate": 5.659694178946704e-06, "loss": 0.6967, "step": 5838 }, { "epoch": 0.47, "grad_norm": 3.1837744508695485, "learning_rate": 5.658390300662565e-06, "loss": 0.6992, "step": 5839 }, { "epoch": 0.47, "grad_norm": 1.9449491940769168, "learning_rate": 5.657086376813671e-06, "loss": 0.5513, "step": 5840 }, { "epoch": 0.47, "grad_norm": 2.6543461166200886, "learning_rate": 5.655782407490261e-06, "loss": 0.6874, "step": 5841 }, { "epoch": 0.47, "grad_norm": 6.906352367049715, "learning_rate": 5.65447839278258e-06, "loss": 0.7457, "step": 5842 }, { "epoch": 0.47, "grad_norm": 2.289287569957791, "learning_rate": 5.653174332780874e-06, "loss": 0.6938, "step": 5843 }, { "epoch": 0.47, "grad_norm": 3.354614906967554, "learning_rate": 5.651870227575391e-06, "loss": 0.6776, "step": 5844 }, { "epoch": 0.47, "grad_norm": 4.200319846334159, "learning_rate": 5.650566077256385e-06, "loss": 0.6575, "step": 5845 }, { "epoch": 0.47, "grad_norm": 2.2160457331030736, "learning_rate": 5.64926188191411e-06, "loss": 0.6131, "step": 5846 }, { "epoch": 0.47, "grad_norm": 5.722072076925793, "learning_rate": 5.647957641638823e-06, "loss": 0.7048, "step": 5847 }, { "epoch": 0.47, "grad_norm": 2.3342859862134913, "learning_rate": 5.646653356520788e-06, "loss": 0.6848, "step": 5848 }, { "epoch": 0.48, "grad_norm": 5.186189717786647, "learning_rate": 5.6453490266502695e-06, "loss": 0.7614, "step": 5849 }, { "epoch": 0.48, "grad_norm": 11.985845275655757, "learning_rate": 5.644044652117534e-06, "loss": 0.7533, "step": 5850 }, { "epoch": 0.48, "grad_norm": 3.603170116008523, "learning_rate": 5.642740233012854e-06, "loss": 0.5888, "step": 5851 }, { "epoch": 0.48, "grad_norm": 3.3150955015618258, "learning_rate": 5.6414357694265035e-06, "loss": 0.6464, "step": 5852 }, { "epoch": 0.48, "grad_norm": 20.751550720352387, "learning_rate": 5.640131261448758e-06, "loss": 0.8277, "step": 5853 }, { "epoch": 0.48, "grad_norm": 2.7856775008548214, "learning_rate": 5.638826709169899e-06, "loss": 0.7196, "step": 5854 }, { "epoch": 0.48, "grad_norm": 7.893554997978029, "learning_rate": 5.6375221126802085e-06, "loss": 0.8021, "step": 5855 }, { "epoch": 0.48, "grad_norm": 2.36359730894712, "learning_rate": 5.6362174720699744e-06, "loss": 0.6393, "step": 5856 }, { "epoch": 0.48, "grad_norm": 3.2477486501315624, "learning_rate": 5.6349127874294855e-06, "loss": 0.6184, "step": 5857 }, { "epoch": 0.48, "grad_norm": 4.95608148530001, "learning_rate": 5.633608058849033e-06, "loss": 0.6515, "step": 5858 }, { "epoch": 0.48, "grad_norm": 3.517205513420182, "learning_rate": 5.632303286418914e-06, "loss": 0.6956, "step": 5859 }, { "epoch": 0.48, "grad_norm": 3.1204648364844285, "learning_rate": 5.630998470229426e-06, "loss": 0.7374, "step": 5860 }, { "epoch": 0.48, "grad_norm": 2.444126220884048, "learning_rate": 5.6296936103708725e-06, "loss": 0.8583, "step": 5861 }, { "epoch": 0.48, "grad_norm": 5.799268369836265, "learning_rate": 5.6283887069335545e-06, "loss": 0.607, "step": 5862 }, { "epoch": 0.48, "grad_norm": 5.686337377886992, "learning_rate": 5.627083760007781e-06, "loss": 0.6211, "step": 5863 }, { "epoch": 0.48, "grad_norm": 9.458711487683296, "learning_rate": 5.625778769683863e-06, "loss": 0.7429, "step": 5864 }, { "epoch": 0.48, "grad_norm": 2.4203032363584414, "learning_rate": 5.624473736052114e-06, "loss": 0.6259, "step": 5865 }, { "epoch": 0.48, "grad_norm": 3.235019674443303, "learning_rate": 5.623168659202851e-06, "loss": 0.6063, "step": 5866 }, { "epoch": 0.48, "grad_norm": 2.8412022685056186, "learning_rate": 5.621863539226394e-06, "loss": 0.7087, "step": 5867 }, { "epoch": 0.48, "grad_norm": 3.7249508712676227, "learning_rate": 5.620558376213063e-06, "loss": 0.6629, "step": 5868 }, { "epoch": 0.48, "grad_norm": 4.280655766093252, "learning_rate": 5.619253170253185e-06, "loss": 0.7991, "step": 5869 }, { "epoch": 0.48, "grad_norm": 4.977627007259665, "learning_rate": 5.617947921437089e-06, "loss": 0.7147, "step": 5870 }, { "epoch": 0.48, "grad_norm": 2.7581014240796304, "learning_rate": 5.616642629855106e-06, "loss": 0.7042, "step": 5871 }, { "epoch": 0.48, "grad_norm": 4.559611928582002, "learning_rate": 5.61533729559757e-06, "loss": 0.7254, "step": 5872 }, { "epoch": 0.48, "grad_norm": 5.805096582345258, "learning_rate": 5.614031918754819e-06, "loss": 0.709, "step": 5873 }, { "epoch": 0.48, "grad_norm": 2.57651207241554, "learning_rate": 5.612726499417192e-06, "loss": 0.7506, "step": 5874 }, { "epoch": 0.48, "grad_norm": 6.316357147709202, "learning_rate": 5.611421037675034e-06, "loss": 0.7437, "step": 5875 }, { "epoch": 0.48, "grad_norm": 4.295406839311556, "learning_rate": 5.61011553361869e-06, "loss": 0.6881, "step": 5876 }, { "epoch": 0.48, "grad_norm": 4.50491882343317, "learning_rate": 5.60880998733851e-06, "loss": 0.7941, "step": 5877 }, { "epoch": 0.48, "grad_norm": 2.5192592881599274, "learning_rate": 5.607504398924845e-06, "loss": 0.6002, "step": 5878 }, { "epoch": 0.48, "grad_norm": 4.2060583507968285, "learning_rate": 5.6061987684680505e-06, "loss": 0.8541, "step": 5879 }, { "epoch": 0.48, "grad_norm": 8.094412711160727, "learning_rate": 5.604893096058485e-06, "loss": 0.6542, "step": 5880 }, { "epoch": 0.48, "grad_norm": 3.082942997686426, "learning_rate": 5.603587381786506e-06, "loss": 0.7981, "step": 5881 }, { "epoch": 0.48, "grad_norm": 2.9078782011293978, "learning_rate": 5.602281625742481e-06, "loss": 0.7507, "step": 5882 }, { "epoch": 0.48, "grad_norm": 3.0674044553806272, "learning_rate": 5.6009758280167766e-06, "loss": 0.6959, "step": 5883 }, { "epoch": 0.48, "grad_norm": 3.0767626224295093, "learning_rate": 5.599669988699761e-06, "loss": 0.8676, "step": 5884 }, { "epoch": 0.48, "grad_norm": 4.979890605664801, "learning_rate": 5.598364107881805e-06, "loss": 0.5457, "step": 5885 }, { "epoch": 0.48, "grad_norm": 3.0687384117851146, "learning_rate": 5.5970581856532864e-06, "loss": 0.6336, "step": 5886 }, { "epoch": 0.48, "grad_norm": 4.0456114298189805, "learning_rate": 5.59575222210458e-06, "loss": 0.6126, "step": 5887 }, { "epoch": 0.48, "grad_norm": 2.947721791720395, "learning_rate": 5.594446217326069e-06, "loss": 0.658, "step": 5888 }, { "epoch": 0.48, "grad_norm": 5.111469998302074, "learning_rate": 5.5931401714081394e-06, "loss": 0.7067, "step": 5889 }, { "epoch": 0.48, "grad_norm": 3.6139775674907115, "learning_rate": 5.591834084441172e-06, "loss": 0.8245, "step": 5890 }, { "epoch": 0.48, "grad_norm": 2.258327322262662, "learning_rate": 5.590527956515561e-06, "loss": 0.586, "step": 5891 }, { "epoch": 0.48, "grad_norm": 2.6216332873241366, "learning_rate": 5.589221787721697e-06, "loss": 0.5769, "step": 5892 }, { "epoch": 0.48, "grad_norm": 2.4184769875681, "learning_rate": 5.587915578149976e-06, "loss": 0.688, "step": 5893 }, { "epoch": 0.48, "grad_norm": 4.481081724640225, "learning_rate": 5.586609327890794e-06, "loss": 0.7078, "step": 5894 }, { "epoch": 0.48, "grad_norm": 2.7904220652557927, "learning_rate": 5.585303037034553e-06, "loss": 0.6956, "step": 5895 }, { "epoch": 0.48, "grad_norm": 10.232501237702206, "learning_rate": 5.583996705671657e-06, "loss": 0.6296, "step": 5896 }, { "epoch": 0.48, "grad_norm": 5.4213543800496495, "learning_rate": 5.582690333892512e-06, "loss": 0.6377, "step": 5897 }, { "epoch": 0.48, "grad_norm": 4.945174500045023, "learning_rate": 5.5813839217875256e-06, "loss": 0.8252, "step": 5898 }, { "epoch": 0.48, "grad_norm": 3.236768986051341, "learning_rate": 5.580077469447113e-06, "loss": 0.7032, "step": 5899 }, { "epoch": 0.48, "grad_norm": 4.890413970724716, "learning_rate": 5.578770976961685e-06, "loss": 0.6829, "step": 5900 }, { "epoch": 0.48, "grad_norm": 2.839054836239624, "learning_rate": 5.577464444421663e-06, "loss": 0.6844, "step": 5901 }, { "epoch": 0.48, "grad_norm": 13.799819444372186, "learning_rate": 5.576157871917466e-06, "loss": 0.7103, "step": 5902 }, { "epoch": 0.48, "grad_norm": 3.2395469457710413, "learning_rate": 5.574851259539514e-06, "loss": 0.6671, "step": 5903 }, { "epoch": 0.48, "grad_norm": 5.183754193338998, "learning_rate": 5.5735446073782364e-06, "loss": 0.633, "step": 5904 }, { "epoch": 0.48, "grad_norm": 2.2693595688826447, "learning_rate": 5.57223791552406e-06, "loss": 0.7479, "step": 5905 }, { "epoch": 0.48, "grad_norm": 3.809400747400824, "learning_rate": 5.570931184067419e-06, "loss": 0.6191, "step": 5906 }, { "epoch": 0.48, "grad_norm": 8.718811649677951, "learning_rate": 5.569624413098742e-06, "loss": 0.5709, "step": 5907 }, { "epoch": 0.48, "grad_norm": 2.823261171360906, "learning_rate": 5.568317602708471e-06, "loss": 0.6983, "step": 5908 }, { "epoch": 0.48, "grad_norm": 3.0810277534129784, "learning_rate": 5.5670107529870435e-06, "loss": 0.6364, "step": 5909 }, { "epoch": 0.48, "grad_norm": 3.784356676946425, "learning_rate": 5.5657038640249015e-06, "loss": 0.8816, "step": 5910 }, { "epoch": 0.48, "grad_norm": 2.9357366168242316, "learning_rate": 5.564396935912489e-06, "loss": 0.6416, "step": 5911 }, { "epoch": 0.48, "grad_norm": 5.59437008935768, "learning_rate": 5.563089968740257e-06, "loss": 0.6477, "step": 5912 }, { "epoch": 0.48, "grad_norm": 4.05265046492426, "learning_rate": 5.561782962598652e-06, "loss": 0.751, "step": 5913 }, { "epoch": 0.48, "grad_norm": 3.7789169084784286, "learning_rate": 5.560475917578129e-06, "loss": 0.8523, "step": 5914 }, { "epoch": 0.48, "grad_norm": 3.472931041079459, "learning_rate": 5.5591688337691415e-06, "loss": 0.7577, "step": 5915 }, { "epoch": 0.48, "grad_norm": 2.86623853096685, "learning_rate": 5.557861711262154e-06, "loss": 0.7382, "step": 5916 }, { "epoch": 0.48, "grad_norm": 2.3072738925850746, "learning_rate": 5.556554550147622e-06, "loss": 0.6447, "step": 5917 }, { "epoch": 0.48, "grad_norm": 3.990543035444797, "learning_rate": 5.555247350516009e-06, "loss": 0.6523, "step": 5918 }, { "epoch": 0.48, "grad_norm": 3.154047938465092, "learning_rate": 5.553940112457785e-06, "loss": 0.7879, "step": 5919 }, { "epoch": 0.48, "grad_norm": 4.704000288403946, "learning_rate": 5.552632836063417e-06, "loss": 0.6051, "step": 5920 }, { "epoch": 0.48, "grad_norm": 4.618849387675572, "learning_rate": 5.551325521423375e-06, "loss": 0.5893, "step": 5921 }, { "epoch": 0.48, "grad_norm": 3.3307892077234333, "learning_rate": 5.5500181686281385e-06, "loss": 0.5814, "step": 5922 }, { "epoch": 0.48, "grad_norm": 6.580077393821343, "learning_rate": 5.54871077776818e-06, "loss": 0.8306, "step": 5923 }, { "epoch": 0.48, "grad_norm": 2.5577958689738374, "learning_rate": 5.54740334893398e-06, "loss": 0.7353, "step": 5924 }, { "epoch": 0.48, "grad_norm": 2.470728867004697, "learning_rate": 5.546095882216024e-06, "loss": 0.6548, "step": 5925 }, { "epoch": 0.48, "grad_norm": 3.221893565064767, "learning_rate": 5.544788377704793e-06, "loss": 0.7324, "step": 5926 }, { "epoch": 0.48, "grad_norm": 3.061110244897309, "learning_rate": 5.5434808354907755e-06, "loss": 0.7466, "step": 5927 }, { "epoch": 0.48, "grad_norm": 2.306336398779051, "learning_rate": 5.542173255664463e-06, "loss": 0.6855, "step": 5928 }, { "epoch": 0.48, "grad_norm": 4.335874514294128, "learning_rate": 5.540865638316346e-06, "loss": 0.6961, "step": 5929 }, { "epoch": 0.48, "grad_norm": 3.728366268817357, "learning_rate": 5.539557983536923e-06, "loss": 0.7839, "step": 5930 }, { "epoch": 0.48, "grad_norm": 13.726773639026964, "learning_rate": 5.538250291416688e-06, "loss": 0.5286, "step": 5931 }, { "epoch": 0.48, "grad_norm": 2.3226998014425617, "learning_rate": 5.536942562046146e-06, "loss": 0.7185, "step": 5932 }, { "epoch": 0.48, "grad_norm": 3.8459872276585205, "learning_rate": 5.5356347955157974e-06, "loss": 0.7207, "step": 5933 }, { "epoch": 0.48, "grad_norm": 3.6144426264945895, "learning_rate": 5.534326991916148e-06, "loss": 0.7287, "step": 5934 }, { "epoch": 0.48, "grad_norm": 4.3330663098256945, "learning_rate": 5.533019151337706e-06, "loss": 0.6706, "step": 5935 }, { "epoch": 0.48, "grad_norm": 2.6126678893466706, "learning_rate": 5.531711273870983e-06, "loss": 0.5147, "step": 5936 }, { "epoch": 0.48, "grad_norm": 3.2163950361360265, "learning_rate": 5.530403359606492e-06, "loss": 0.6117, "step": 5937 }, { "epoch": 0.48, "grad_norm": 3.5989766690324925, "learning_rate": 5.529095408634748e-06, "loss": 0.6645, "step": 5938 }, { "epoch": 0.48, "grad_norm": 13.307671522400634, "learning_rate": 5.5277874210462715e-06, "loss": 0.7469, "step": 5939 }, { "epoch": 0.48, "grad_norm": 2.1912407384381645, "learning_rate": 5.526479396931581e-06, "loss": 0.6796, "step": 5940 }, { "epoch": 0.48, "grad_norm": 2.6386397382828455, "learning_rate": 5.525171336381202e-06, "loss": 0.6136, "step": 5941 }, { "epoch": 0.48, "grad_norm": 5.618606315080148, "learning_rate": 5.523863239485661e-06, "loss": 0.6268, "step": 5942 }, { "epoch": 0.48, "grad_norm": 4.615189357811246, "learning_rate": 5.522555106335483e-06, "loss": 0.7704, "step": 5943 }, { "epoch": 0.48, "grad_norm": 4.308420725968467, "learning_rate": 5.521246937021202e-06, "loss": 0.6504, "step": 5944 }, { "epoch": 0.48, "grad_norm": 3.340424981487809, "learning_rate": 5.5199387316333505e-06, "loss": 0.6256, "step": 5945 }, { "epoch": 0.48, "grad_norm": 2.6633058103731604, "learning_rate": 5.518630490262467e-06, "loss": 0.6516, "step": 5946 }, { "epoch": 0.48, "grad_norm": 3.923711951305693, "learning_rate": 5.517322212999086e-06, "loss": 0.6793, "step": 5947 }, { "epoch": 0.48, "grad_norm": 13.994057347754657, "learning_rate": 5.516013899933751e-06, "loss": 0.6655, "step": 5948 }, { "epoch": 0.48, "grad_norm": 2.2254891895525177, "learning_rate": 5.514705551157005e-06, "loss": 0.7737, "step": 5949 }, { "epoch": 0.48, "grad_norm": 2.6160184770571124, "learning_rate": 5.513397166759395e-06, "loss": 0.6703, "step": 5950 }, { "epoch": 0.48, "grad_norm": 6.331575171926444, "learning_rate": 5.512088746831468e-06, "loss": 0.6071, "step": 5951 }, { "epoch": 0.48, "grad_norm": 2.893743025128155, "learning_rate": 5.5107802914637755e-06, "loss": 0.7219, "step": 5952 }, { "epoch": 0.48, "grad_norm": 4.323022199214157, "learning_rate": 5.509471800746869e-06, "loss": 0.7423, "step": 5953 }, { "epoch": 0.48, "grad_norm": 4.0118236304013095, "learning_rate": 5.508163274771308e-06, "loss": 0.7294, "step": 5954 }, { "epoch": 0.48, "grad_norm": 2.7014311816328047, "learning_rate": 5.506854713627647e-06, "loss": 0.5668, "step": 5955 }, { "epoch": 0.48, "grad_norm": 2.759251065518076, "learning_rate": 5.505546117406449e-06, "loss": 0.5532, "step": 5956 }, { "epoch": 0.48, "grad_norm": 2.928258761434138, "learning_rate": 5.504237486198277e-06, "loss": 0.6747, "step": 5957 }, { "epoch": 0.48, "grad_norm": 3.4900605864103698, "learning_rate": 5.502928820093696e-06, "loss": 0.5736, "step": 5958 }, { "epoch": 0.48, "grad_norm": 3.178825155160367, "learning_rate": 5.501620119183275e-06, "loss": 0.6252, "step": 5959 }, { "epoch": 0.48, "grad_norm": 3.8810609414550976, "learning_rate": 5.5003113835575814e-06, "loss": 0.6751, "step": 5960 }, { "epoch": 0.48, "grad_norm": 2.8704395058853334, "learning_rate": 5.49900261330719e-06, "loss": 0.7266, "step": 5961 }, { "epoch": 0.48, "grad_norm": 2.6572731339543605, "learning_rate": 5.497693808522677e-06, "loss": 0.7251, "step": 5962 }, { "epoch": 0.48, "grad_norm": 3.5707394175249516, "learning_rate": 5.496384969294617e-06, "loss": 0.6439, "step": 5963 }, { "epoch": 0.48, "grad_norm": 3.5323863202965082, "learning_rate": 5.4950760957135926e-06, "loss": 0.6789, "step": 5964 }, { "epoch": 0.48, "grad_norm": 2.718152958044197, "learning_rate": 5.493767187870186e-06, "loss": 0.8317, "step": 5965 }, { "epoch": 0.48, "grad_norm": 3.3431843202261002, "learning_rate": 5.49245824585498e-06, "loss": 0.7107, "step": 5966 }, { "epoch": 0.48, "grad_norm": 3.4348020570989415, "learning_rate": 5.4911492697585635e-06, "loss": 0.702, "step": 5967 }, { "epoch": 0.48, "grad_norm": 2.5964222985249323, "learning_rate": 5.489840259671523e-06, "loss": 0.8075, "step": 5968 }, { "epoch": 0.48, "grad_norm": 2.5796613553352405, "learning_rate": 5.488531215684454e-06, "loss": 0.5849, "step": 5969 }, { "epoch": 0.48, "grad_norm": 2.300747308193322, "learning_rate": 5.487222137887949e-06, "loss": 0.6931, "step": 5970 }, { "epoch": 0.48, "grad_norm": 2.2129022847530857, "learning_rate": 5.485913026372602e-06, "loss": 0.505, "step": 5971 }, { "epoch": 0.49, "grad_norm": 3.4725853577981907, "learning_rate": 5.484603881229017e-06, "loss": 0.759, "step": 5972 }, { "epoch": 0.49, "grad_norm": 5.162933715838699, "learning_rate": 5.48329470254779e-06, "loss": 0.7611, "step": 5973 }, { "epoch": 0.49, "grad_norm": 3.120069743409115, "learning_rate": 5.481985490419528e-06, "loss": 0.6843, "step": 5974 }, { "epoch": 0.49, "grad_norm": 1.9875008178158569, "learning_rate": 5.480676244934835e-06, "loss": 0.6296, "step": 5975 }, { "epoch": 0.49, "grad_norm": 3.1877610294607224, "learning_rate": 5.479366966184317e-06, "loss": 0.555, "step": 5976 }, { "epoch": 0.49, "grad_norm": 2.5338274543860186, "learning_rate": 5.478057654258588e-06, "loss": 0.7902, "step": 5977 }, { "epoch": 0.49, "grad_norm": 3.265280260835974, "learning_rate": 5.47674830924826e-06, "loss": 0.6582, "step": 5978 }, { "epoch": 0.49, "grad_norm": 3.2184041109968136, "learning_rate": 5.475438931243947e-06, "loss": 0.8355, "step": 5979 }, { "epoch": 0.49, "grad_norm": 3.611096417517397, "learning_rate": 5.4741295203362655e-06, "loss": 0.6901, "step": 5980 }, { "epoch": 0.49, "grad_norm": 2.924738465572044, "learning_rate": 5.472820076615837e-06, "loss": 0.6976, "step": 5981 }, { "epoch": 0.49, "grad_norm": 4.518763424817466, "learning_rate": 5.471510600173281e-06, "loss": 0.6703, "step": 5982 }, { "epoch": 0.49, "grad_norm": 3.0696352063465917, "learning_rate": 5.4702010910992235e-06, "loss": 0.6906, "step": 5983 }, { "epoch": 0.49, "grad_norm": 3.2553276939125397, "learning_rate": 5.4688915494842886e-06, "loss": 0.6385, "step": 5984 }, { "epoch": 0.49, "grad_norm": 12.13250430222909, "learning_rate": 5.467581975419108e-06, "loss": 0.6911, "step": 5985 }, { "epoch": 0.49, "grad_norm": 2.2459569298495037, "learning_rate": 5.4662723689943085e-06, "loss": 0.6131, "step": 5986 }, { "epoch": 0.49, "grad_norm": 3.0271084237156884, "learning_rate": 5.464962730300526e-06, "loss": 0.8608, "step": 5987 }, { "epoch": 0.49, "grad_norm": 2.264105090503508, "learning_rate": 5.4636530594283945e-06, "loss": 0.6521, "step": 5988 }, { "epoch": 0.49, "grad_norm": 4.837588964886058, "learning_rate": 5.4623433564685536e-06, "loss": 0.718, "step": 5989 }, { "epoch": 0.49, "grad_norm": 3.823635715143104, "learning_rate": 5.46103362151164e-06, "loss": 0.679, "step": 5990 }, { "epoch": 0.49, "grad_norm": 2.227895039886624, "learning_rate": 5.459723854648297e-06, "loss": 0.8, "step": 5991 }, { "epoch": 0.49, "grad_norm": 2.5697961233021323, "learning_rate": 5.458414055969169e-06, "loss": 0.6244, "step": 5992 }, { "epoch": 0.49, "grad_norm": 4.910819553907711, "learning_rate": 5.457104225564901e-06, "loss": 0.6829, "step": 5993 }, { "epoch": 0.49, "grad_norm": 6.7440155071133105, "learning_rate": 5.4557943635261425e-06, "loss": 0.6464, "step": 5994 }, { "epoch": 0.49, "grad_norm": 3.6052731745588744, "learning_rate": 5.454484469943545e-06, "loss": 0.7488, "step": 5995 }, { "epoch": 0.49, "grad_norm": 2.6889017665761332, "learning_rate": 5.45317454490776e-06, "loss": 0.6819, "step": 5996 }, { "epoch": 0.49, "grad_norm": 4.928846973097363, "learning_rate": 5.451864588509442e-06, "loss": 0.6475, "step": 5997 }, { "epoch": 0.49, "grad_norm": 4.646626204029376, "learning_rate": 5.450554600839251e-06, "loss": 0.7024, "step": 5998 }, { "epoch": 0.49, "grad_norm": 2.6337274247636655, "learning_rate": 5.449244581987845e-06, "loss": 0.6801, "step": 5999 }, { "epoch": 0.49, "grad_norm": 4.703817431112147, "learning_rate": 5.447934532045884e-06, "loss": 0.8008, "step": 6000 }, { "epoch": 0.49, "grad_norm": 2.7220439421192824, "learning_rate": 5.446624451104032e-06, "loss": 0.6387, "step": 6001 }, { "epoch": 0.49, "grad_norm": 2.3843281273430446, "learning_rate": 5.4453143392529586e-06, "loss": 0.5188, "step": 6002 }, { "epoch": 0.49, "grad_norm": 4.206465912155756, "learning_rate": 5.4440041965833265e-06, "loss": 0.7254, "step": 6003 }, { "epoch": 0.49, "grad_norm": 3.7868698676639676, "learning_rate": 5.44269402318581e-06, "loss": 0.6986, "step": 6004 }, { "epoch": 0.49, "grad_norm": 9.48619468044147, "learning_rate": 5.4413838191510785e-06, "loss": 0.621, "step": 6005 }, { "epoch": 0.49, "grad_norm": 3.525805508684711, "learning_rate": 5.44007358456981e-06, "loss": 0.6929, "step": 6006 }, { "epoch": 0.49, "grad_norm": 2.5526136284113705, "learning_rate": 5.438763319532675e-06, "loss": 0.6104, "step": 6007 }, { "epoch": 0.49, "grad_norm": 2.9528098370265226, "learning_rate": 5.437453024130358e-06, "loss": 0.6373, "step": 6008 }, { "epoch": 0.49, "grad_norm": 3.975851887896986, "learning_rate": 5.436142698453536e-06, "loss": 0.7316, "step": 6009 }, { "epoch": 0.49, "grad_norm": 3.5086165613228366, "learning_rate": 5.434832342592893e-06, "loss": 0.7098, "step": 6010 }, { "epoch": 0.49, "grad_norm": 2.0735745979950178, "learning_rate": 5.433521956639114e-06, "loss": 0.7015, "step": 6011 }, { "epoch": 0.49, "grad_norm": 2.563154278863986, "learning_rate": 5.432211540682887e-06, "loss": 0.5834, "step": 6012 }, { "epoch": 0.49, "grad_norm": 3.2000523631273117, "learning_rate": 5.430901094814899e-06, "loss": 0.5947, "step": 6013 }, { "epoch": 0.49, "grad_norm": 2.87559365693038, "learning_rate": 5.429590619125843e-06, "loss": 0.6668, "step": 6014 }, { "epoch": 0.49, "grad_norm": 2.536256148563878, "learning_rate": 5.4282801137064114e-06, "loss": 0.6846, "step": 6015 }, { "epoch": 0.49, "grad_norm": 2.3317000437048585, "learning_rate": 5.426969578647298e-06, "loss": 0.8176, "step": 6016 }, { "epoch": 0.49, "grad_norm": 22.65889067478474, "learning_rate": 5.425659014039201e-06, "loss": 0.5692, "step": 6017 }, { "epoch": 0.49, "grad_norm": 3.9558838204967373, "learning_rate": 5.424348419972821e-06, "loss": 0.6334, "step": 6018 }, { "epoch": 0.49, "grad_norm": 2.6008636739348385, "learning_rate": 5.423037796538858e-06, "loss": 0.6015, "step": 6019 }, { "epoch": 0.49, "grad_norm": 2.8409810226481844, "learning_rate": 5.421727143828016e-06, "loss": 0.6852, "step": 6020 }, { "epoch": 0.49, "grad_norm": 5.264403950751759, "learning_rate": 5.4204164619309994e-06, "loss": 0.7101, "step": 6021 }, { "epoch": 0.49, "grad_norm": 2.8080331071570526, "learning_rate": 5.419105750938518e-06, "loss": 0.7143, "step": 6022 }, { "epoch": 0.49, "grad_norm": 3.2637713424003247, "learning_rate": 5.41779501094128e-06, "loss": 0.6998, "step": 6023 }, { "epoch": 0.49, "grad_norm": 6.337070143951558, "learning_rate": 5.416484242029996e-06, "loss": 0.824, "step": 6024 }, { "epoch": 0.49, "grad_norm": 3.144785735172008, "learning_rate": 5.41517344429538e-06, "loss": 0.8502, "step": 6025 }, { "epoch": 0.49, "grad_norm": 6.962155103824911, "learning_rate": 5.413862617828147e-06, "loss": 0.6204, "step": 6026 }, { "epoch": 0.49, "grad_norm": 3.144120176320849, "learning_rate": 5.412551762719015e-06, "loss": 0.7989, "step": 6027 }, { "epoch": 0.49, "grad_norm": 3.5454392967187625, "learning_rate": 5.411240879058703e-06, "loss": 0.5724, "step": 6028 }, { "epoch": 0.49, "grad_norm": 2.9328952707320566, "learning_rate": 5.409929966937933e-06, "loss": 0.7518, "step": 6029 }, { "epoch": 0.49, "grad_norm": 2.871331155297309, "learning_rate": 5.40861902644743e-06, "loss": 0.5722, "step": 6030 }, { "epoch": 0.49, "grad_norm": 3.8319953500940356, "learning_rate": 5.407308057677916e-06, "loss": 0.7841, "step": 6031 }, { "epoch": 0.49, "grad_norm": 2.4459071296170993, "learning_rate": 5.40599706072012e-06, "loss": 0.7552, "step": 6032 }, { "epoch": 0.49, "grad_norm": 3.6037284275219945, "learning_rate": 5.4046860356647705e-06, "loss": 0.6545, "step": 6033 }, { "epoch": 0.49, "grad_norm": 14.266364416344473, "learning_rate": 5.4033749826025995e-06, "loss": 0.8036, "step": 6034 }, { "epoch": 0.49, "grad_norm": 4.881891601696901, "learning_rate": 5.40206390162434e-06, "loss": 0.624, "step": 6035 }, { "epoch": 0.49, "grad_norm": 3.8355586314363546, "learning_rate": 5.400752792820726e-06, "loss": 0.5328, "step": 6036 }, { "epoch": 0.49, "grad_norm": 3.8136205611839853, "learning_rate": 5.3994416562824955e-06, "loss": 0.6334, "step": 6037 }, { "epoch": 0.49, "grad_norm": 6.074063491063646, "learning_rate": 5.39813049210039e-06, "loss": 0.7521, "step": 6038 }, { "epoch": 0.49, "grad_norm": 2.788429614863073, "learning_rate": 5.396819300365146e-06, "loss": 0.6613, "step": 6039 }, { "epoch": 0.49, "grad_norm": 3.661858296815369, "learning_rate": 5.395508081167506e-06, "loss": 0.6944, "step": 6040 }, { "epoch": 0.49, "grad_norm": 3.1935204482796284, "learning_rate": 5.394196834598218e-06, "loss": 0.767, "step": 6041 }, { "epoch": 0.49, "grad_norm": 4.398406727062364, "learning_rate": 5.392885560748028e-06, "loss": 0.7185, "step": 6042 }, { "epoch": 0.49, "grad_norm": 2.635709776353121, "learning_rate": 5.391574259707682e-06, "loss": 0.7065, "step": 6043 }, { "epoch": 0.49, "grad_norm": 2.1179927457085235, "learning_rate": 5.3902629315679315e-06, "loss": 0.7464, "step": 6044 }, { "epoch": 0.49, "grad_norm": 3.685804367672943, "learning_rate": 5.38895157641953e-06, "loss": 0.7652, "step": 6045 }, { "epoch": 0.49, "grad_norm": 5.057539174718798, "learning_rate": 5.387640194353229e-06, "loss": 0.5839, "step": 6046 }, { "epoch": 0.49, "grad_norm": 2.4604760894511792, "learning_rate": 5.3863287854597865e-06, "loss": 0.6744, "step": 6047 }, { "epoch": 0.49, "grad_norm": 4.114976780670543, "learning_rate": 5.38501734982996e-06, "loss": 0.5769, "step": 6048 }, { "epoch": 0.49, "grad_norm": 3.6382902197853197, "learning_rate": 5.383705887554508e-06, "loss": 0.7757, "step": 6049 }, { "epoch": 0.49, "grad_norm": 4.869439157032522, "learning_rate": 5.3823943987241926e-06, "loss": 0.7051, "step": 6050 }, { "epoch": 0.49, "grad_norm": 3.3118802147779762, "learning_rate": 5.381082883429776e-06, "loss": 0.6706, "step": 6051 }, { "epoch": 0.49, "grad_norm": 2.860528365453887, "learning_rate": 5.379771341762025e-06, "loss": 0.7592, "step": 6052 }, { "epoch": 0.49, "grad_norm": 3.321617056150225, "learning_rate": 5.378459773811707e-06, "loss": 0.6632, "step": 6053 }, { "epoch": 0.49, "grad_norm": 4.594482168342254, "learning_rate": 5.37714817966959e-06, "loss": 0.6308, "step": 6054 }, { "epoch": 0.49, "grad_norm": 3.796655428904238, "learning_rate": 5.375836559426444e-06, "loss": 0.6478, "step": 6055 }, { "epoch": 0.49, "grad_norm": 2.3086641598124755, "learning_rate": 5.37452491317304e-06, "loss": 0.7806, "step": 6056 }, { "epoch": 0.49, "grad_norm": 3.520788507408676, "learning_rate": 5.373213241000155e-06, "loss": 0.676, "step": 6057 }, { "epoch": 0.49, "grad_norm": 3.1922833532984236, "learning_rate": 5.371901542998563e-06, "loss": 0.816, "step": 6058 }, { "epoch": 0.49, "grad_norm": 3.7542616334082104, "learning_rate": 5.370589819259043e-06, "loss": 0.7208, "step": 6059 }, { "epoch": 0.49, "grad_norm": 2.62387100063729, "learning_rate": 5.369278069872373e-06, "loss": 0.7552, "step": 6060 }, { "epoch": 0.49, "grad_norm": 4.805590783378133, "learning_rate": 5.367966294929337e-06, "loss": 0.7716, "step": 6061 }, { "epoch": 0.49, "grad_norm": 2.542312555581617, "learning_rate": 5.366654494520717e-06, "loss": 0.8278, "step": 6062 }, { "epoch": 0.49, "grad_norm": 15.746894458291544, "learning_rate": 5.365342668737297e-06, "loss": 0.617, "step": 6063 }, { "epoch": 0.49, "grad_norm": 3.1226948088760307, "learning_rate": 5.364030817669862e-06, "loss": 0.7273, "step": 6064 }, { "epoch": 0.49, "grad_norm": 2.5871510186995117, "learning_rate": 5.362718941409204e-06, "loss": 0.6226, "step": 6065 }, { "epoch": 0.49, "grad_norm": 2.928282070853197, "learning_rate": 5.36140704004611e-06, "loss": 0.6519, "step": 6066 }, { "epoch": 0.49, "grad_norm": 4.614472864629575, "learning_rate": 5.3600951136713745e-06, "loss": 0.6232, "step": 6067 }, { "epoch": 0.49, "grad_norm": 2.9302165064439287, "learning_rate": 5.35878316237579e-06, "loss": 0.6108, "step": 6068 }, { "epoch": 0.49, "grad_norm": 2.9879702004908815, "learning_rate": 5.35747118625015e-06, "loss": 0.7517, "step": 6069 }, { "epoch": 0.49, "grad_norm": 4.947324066479569, "learning_rate": 5.356159185385255e-06, "loss": 0.8119, "step": 6070 }, { "epoch": 0.49, "grad_norm": 2.780934345440035, "learning_rate": 5.354847159871901e-06, "loss": 0.6418, "step": 6071 }, { "epoch": 0.49, "grad_norm": 2.8381875802735226, "learning_rate": 5.353535109800891e-06, "loss": 0.6467, "step": 6072 }, { "epoch": 0.49, "grad_norm": 2.471893538112849, "learning_rate": 5.352223035263022e-06, "loss": 0.6602, "step": 6073 }, { "epoch": 0.49, "grad_norm": 2.821842297233693, "learning_rate": 5.350910936349102e-06, "loss": 0.6887, "step": 6074 }, { "epoch": 0.49, "grad_norm": 2.6668666850577933, "learning_rate": 5.349598813149937e-06, "loss": 0.6304, "step": 6075 }, { "epoch": 0.49, "grad_norm": 3.0567978903041957, "learning_rate": 5.348286665756331e-06, "loss": 0.8358, "step": 6076 }, { "epoch": 0.49, "grad_norm": 2.7027583097486145, "learning_rate": 5.346974494259096e-06, "loss": 0.9387, "step": 6077 }, { "epoch": 0.49, "grad_norm": 3.6057449268108863, "learning_rate": 5.345662298749043e-06, "loss": 0.8957, "step": 6078 }, { "epoch": 0.49, "grad_norm": 3.457156058660888, "learning_rate": 5.344350079316981e-06, "loss": 0.6204, "step": 6079 }, { "epoch": 0.49, "grad_norm": 3.648010568877727, "learning_rate": 5.343037836053724e-06, "loss": 0.7787, "step": 6080 }, { "epoch": 0.49, "grad_norm": 4.070781676232831, "learning_rate": 5.341725569050091e-06, "loss": 0.6028, "step": 6081 }, { "epoch": 0.49, "grad_norm": 4.454688606329808, "learning_rate": 5.340413278396896e-06, "loss": 0.7242, "step": 6082 }, { "epoch": 0.49, "grad_norm": 3.8815227919655015, "learning_rate": 5.339100964184956e-06, "loss": 0.6233, "step": 6083 }, { "epoch": 0.49, "grad_norm": 3.1081970130406793, "learning_rate": 5.337788626505097e-06, "loss": 0.6219, "step": 6084 }, { "epoch": 0.49, "grad_norm": 4.005613631025357, "learning_rate": 5.336476265448138e-06, "loss": 0.7065, "step": 6085 }, { "epoch": 0.49, "grad_norm": 2.3755491919319134, "learning_rate": 5.335163881104902e-06, "loss": 0.6003, "step": 6086 }, { "epoch": 0.49, "grad_norm": 4.264052114217631, "learning_rate": 5.333851473566217e-06, "loss": 0.7421, "step": 6087 }, { "epoch": 0.49, "grad_norm": 3.6623169988903657, "learning_rate": 5.332539042922908e-06, "loss": 0.5314, "step": 6088 }, { "epoch": 0.49, "grad_norm": 2.1096826667555733, "learning_rate": 5.331226589265801e-06, "loss": 0.5706, "step": 6089 }, { "epoch": 0.49, "grad_norm": 4.642840657456279, "learning_rate": 5.329914112685729e-06, "loss": 0.8183, "step": 6090 }, { "epoch": 0.49, "grad_norm": 4.761583386758941, "learning_rate": 5.328601613273524e-06, "loss": 0.6561, "step": 6091 }, { "epoch": 0.49, "grad_norm": 3.1561771580049856, "learning_rate": 5.327289091120017e-06, "loss": 0.6014, "step": 6092 }, { "epoch": 0.49, "grad_norm": 3.618550220016676, "learning_rate": 5.325976546316044e-06, "loss": 0.7986, "step": 6093 }, { "epoch": 0.49, "grad_norm": 2.663761754693607, "learning_rate": 5.324663978952443e-06, "loss": 0.6761, "step": 6094 }, { "epoch": 0.5, "grad_norm": 4.755446047808612, "learning_rate": 5.32335138912005e-06, "loss": 0.6614, "step": 6095 }, { "epoch": 0.5, "grad_norm": 2.719045998729653, "learning_rate": 5.322038776909705e-06, "loss": 0.6595, "step": 6096 }, { "epoch": 0.5, "grad_norm": 2.7713302586424162, "learning_rate": 5.320726142412248e-06, "loss": 0.5959, "step": 6097 }, { "epoch": 0.5, "grad_norm": 2.4001853770394503, "learning_rate": 5.3194134857185244e-06, "loss": 0.7854, "step": 6098 }, { "epoch": 0.5, "grad_norm": 9.02748191740966, "learning_rate": 5.318100806919374e-06, "loss": 0.7502, "step": 6099 }, { "epoch": 0.5, "grad_norm": 2.3909465751838286, "learning_rate": 5.316788106105646e-06, "loss": 0.5897, "step": 6100 }, { "epoch": 0.5, "grad_norm": 4.617371720817901, "learning_rate": 5.315475383368186e-06, "loss": 0.9065, "step": 6101 }, { "epoch": 0.5, "grad_norm": 2.944454175874567, "learning_rate": 5.314162638797844e-06, "loss": 0.706, "step": 6102 }, { "epoch": 0.5, "grad_norm": 6.77489885209827, "learning_rate": 5.312849872485468e-06, "loss": 0.6734, "step": 6103 }, { "epoch": 0.5, "grad_norm": 3.9625328761098815, "learning_rate": 5.311537084521911e-06, "loss": 0.71, "step": 6104 }, { "epoch": 0.5, "grad_norm": 3.572020428951802, "learning_rate": 5.310224274998028e-06, "loss": 0.5039, "step": 6105 }, { "epoch": 0.5, "grad_norm": 2.8593272583980807, "learning_rate": 5.308911444004671e-06, "loss": 0.6846, "step": 6106 }, { "epoch": 0.5, "grad_norm": 4.84595196509567, "learning_rate": 5.307598591632696e-06, "loss": 0.7806, "step": 6107 }, { "epoch": 0.5, "grad_norm": 15.453522486993867, "learning_rate": 5.306285717972962e-06, "loss": 0.7916, "step": 6108 }, { "epoch": 0.5, "grad_norm": 2.7391950587999503, "learning_rate": 5.3049728231163275e-06, "loss": 0.6137, "step": 6109 }, { "epoch": 0.5, "grad_norm": 2.789118969319593, "learning_rate": 5.303659907153654e-06, "loss": 0.5938, "step": 6110 }, { "epoch": 0.5, "grad_norm": 3.460812216976524, "learning_rate": 5.302346970175803e-06, "loss": 0.6847, "step": 6111 }, { "epoch": 0.5, "grad_norm": 3.1467880016543357, "learning_rate": 5.301034012273638e-06, "loss": 0.6453, "step": 6112 }, { "epoch": 0.5, "grad_norm": 2.586338746246817, "learning_rate": 5.299721033538023e-06, "loss": 0.7839, "step": 6113 }, { "epoch": 0.5, "grad_norm": 5.161668078077261, "learning_rate": 5.298408034059827e-06, "loss": 0.6512, "step": 6114 }, { "epoch": 0.5, "grad_norm": 4.253839899934579, "learning_rate": 5.297095013929915e-06, "loss": 0.8, "step": 6115 }, { "epoch": 0.5, "grad_norm": 3.584780138917904, "learning_rate": 5.295781973239157e-06, "loss": 0.5697, "step": 6116 }, { "epoch": 0.5, "grad_norm": 1.6547496515507776, "learning_rate": 5.294468912078424e-06, "loss": 0.6216, "step": 6117 }, { "epoch": 0.5, "grad_norm": 2.6087721109250057, "learning_rate": 5.293155830538589e-06, "loss": 0.692, "step": 6118 }, { "epoch": 0.5, "grad_norm": 3.4252715425847606, "learning_rate": 5.291842728710524e-06, "loss": 0.74, "step": 6119 }, { "epoch": 0.5, "grad_norm": 5.279843724034523, "learning_rate": 5.290529606685105e-06, "loss": 0.8002, "step": 6120 }, { "epoch": 0.5, "grad_norm": 2.2085829277476794, "learning_rate": 5.289216464553209e-06, "loss": 0.5669, "step": 6121 }, { "epoch": 0.5, "grad_norm": 2.2200869478730385, "learning_rate": 5.28790330240571e-06, "loss": 0.6571, "step": 6122 }, { "epoch": 0.5, "grad_norm": 3.287486802808259, "learning_rate": 5.286590120333491e-06, "loss": 0.6184, "step": 6123 }, { "epoch": 0.5, "grad_norm": 4.613905549504539, "learning_rate": 5.285276918427432e-06, "loss": 0.67, "step": 6124 }, { "epoch": 0.5, "grad_norm": 3.8654836942423247, "learning_rate": 5.2839636967784124e-06, "loss": 0.6213, "step": 6125 }, { "epoch": 0.5, "grad_norm": 2.6161080479369296, "learning_rate": 5.282650455477317e-06, "loss": 0.7134, "step": 6126 }, { "epoch": 0.5, "grad_norm": 2.103492777867684, "learning_rate": 5.281337194615033e-06, "loss": 0.643, "step": 6127 }, { "epoch": 0.5, "grad_norm": 2.823750175339366, "learning_rate": 5.280023914282442e-06, "loss": 0.7403, "step": 6128 }, { "epoch": 0.5, "grad_norm": 8.304131259461812, "learning_rate": 5.278710614570432e-06, "loss": 0.664, "step": 6129 }, { "epoch": 0.5, "grad_norm": 8.819791891227474, "learning_rate": 5.277397295569893e-06, "loss": 0.7486, "step": 6130 }, { "epoch": 0.5, "grad_norm": 3.7960270038142743, "learning_rate": 5.276083957371716e-06, "loss": 0.658, "step": 6131 }, { "epoch": 0.5, "grad_norm": 2.822821918691646, "learning_rate": 5.2747706000667885e-06, "loss": 0.7288, "step": 6132 }, { "epoch": 0.5, "grad_norm": 2.3757101706604633, "learning_rate": 5.2734572237460056e-06, "loss": 0.667, "step": 6133 }, { "epoch": 0.5, "grad_norm": 5.101905178367649, "learning_rate": 5.272143828500264e-06, "loss": 0.676, "step": 6134 }, { "epoch": 0.5, "grad_norm": 29.613867040447612, "learning_rate": 5.270830414420453e-06, "loss": 0.5123, "step": 6135 }, { "epoch": 0.5, "grad_norm": 11.265496912775681, "learning_rate": 5.269516981597473e-06, "loss": 0.7884, "step": 6136 }, { "epoch": 0.5, "grad_norm": 3.9268025810993037, "learning_rate": 5.26820353012222e-06, "loss": 0.675, "step": 6137 }, { "epoch": 0.5, "grad_norm": 3.550837957235736, "learning_rate": 5.2668900600855955e-06, "loss": 0.7178, "step": 6138 }, { "epoch": 0.5, "grad_norm": 2.6233075632644702, "learning_rate": 5.265576571578497e-06, "loss": 0.5249, "step": 6139 }, { "epoch": 0.5, "grad_norm": 3.6264656835277442, "learning_rate": 5.264263064691828e-06, "loss": 0.6668, "step": 6140 }, { "epoch": 0.5, "grad_norm": 4.241402491934814, "learning_rate": 5.2629495395164905e-06, "loss": 0.7393, "step": 6141 }, { "epoch": 0.5, "grad_norm": 4.6265904205712385, "learning_rate": 5.26163599614339e-06, "loss": 0.5065, "step": 6142 }, { "epoch": 0.5, "grad_norm": 3.6122218155642787, "learning_rate": 5.260322434663432e-06, "loss": 0.4714, "step": 6143 }, { "epoch": 0.5, "grad_norm": 2.7974894337533405, "learning_rate": 5.2590088551675215e-06, "loss": 0.6652, "step": 6144 }, { "epoch": 0.5, "grad_norm": 4.173934484939835, "learning_rate": 5.257695257746567e-06, "loss": 0.6247, "step": 6145 }, { "epoch": 0.5, "grad_norm": 3.3365707684175345, "learning_rate": 5.256381642491477e-06, "loss": 0.5308, "step": 6146 }, { "epoch": 0.5, "grad_norm": 10.385033548065424, "learning_rate": 5.255068009493165e-06, "loss": 0.7774, "step": 6147 }, { "epoch": 0.5, "grad_norm": 3.4081386918040897, "learning_rate": 5.25375435884254e-06, "loss": 0.4786, "step": 6148 }, { "epoch": 0.5, "grad_norm": 3.2695691346921247, "learning_rate": 5.252440690630515e-06, "loss": 0.7612, "step": 6149 }, { "epoch": 0.5, "grad_norm": 2.671254105419217, "learning_rate": 5.251127004948005e-06, "loss": 0.7359, "step": 6150 }, { "epoch": 0.5, "grad_norm": 3.441089603246151, "learning_rate": 5.249813301885926e-06, "loss": 0.574, "step": 6151 }, { "epoch": 0.5, "grad_norm": 4.13877042750484, "learning_rate": 5.248499581535193e-06, "loss": 0.6604, "step": 6152 }, { "epoch": 0.5, "grad_norm": 3.7172967133166375, "learning_rate": 5.247185843986724e-06, "loss": 0.672, "step": 6153 }, { "epoch": 0.5, "grad_norm": 3.5687916575527074, "learning_rate": 5.24587208933144e-06, "loss": 0.6674, "step": 6154 }, { "epoch": 0.5, "grad_norm": 3.7696331425469545, "learning_rate": 5.244558317660256e-06, "loss": 0.7127, "step": 6155 }, { "epoch": 0.5, "grad_norm": 2.7881411562827823, "learning_rate": 5.243244529064098e-06, "loss": 0.7139, "step": 6156 }, { "epoch": 0.5, "grad_norm": 3.7779642121175616, "learning_rate": 5.241930723633887e-06, "loss": 0.6709, "step": 6157 }, { "epoch": 0.5, "grad_norm": 2.8811436976738904, "learning_rate": 5.240616901460547e-06, "loss": 0.7537, "step": 6158 }, { "epoch": 0.5, "grad_norm": 2.5501142622631576, "learning_rate": 5.239303062635001e-06, "loss": 0.7724, "step": 6159 }, { "epoch": 0.5, "grad_norm": 9.292940823716274, "learning_rate": 5.237989207248179e-06, "loss": 0.6356, "step": 6160 }, { "epoch": 0.5, "grad_norm": 3.4123595092618726, "learning_rate": 5.236675335391004e-06, "loss": 0.5866, "step": 6161 }, { "epoch": 0.5, "grad_norm": 3.353554811197336, "learning_rate": 5.235361447154406e-06, "loss": 0.8898, "step": 6162 }, { "epoch": 0.5, "grad_norm": 5.942815827239726, "learning_rate": 5.2340475426293125e-06, "loss": 0.6854, "step": 6163 }, { "epoch": 0.5, "grad_norm": 2.4576067856427763, "learning_rate": 5.232733621906656e-06, "loss": 0.5921, "step": 6164 }, { "epoch": 0.5, "grad_norm": 3.2210207223955387, "learning_rate": 5.231419685077367e-06, "loss": 0.7975, "step": 6165 }, { "epoch": 0.5, "grad_norm": 9.181922515927047, "learning_rate": 5.2301057322323786e-06, "loss": 0.6976, "step": 6166 }, { "epoch": 0.5, "grad_norm": 2.5367914598114485, "learning_rate": 5.228791763462626e-06, "loss": 0.449, "step": 6167 }, { "epoch": 0.5, "grad_norm": 2.9360657863154795, "learning_rate": 5.227477778859044e-06, "loss": 0.8882, "step": 6168 }, { "epoch": 0.5, "grad_norm": 3.2988732664047276, "learning_rate": 5.226163778512564e-06, "loss": 0.6486, "step": 6169 }, { "epoch": 0.5, "grad_norm": 5.325424064551389, "learning_rate": 5.224849762514127e-06, "loss": 0.7733, "step": 6170 }, { "epoch": 0.5, "grad_norm": 3.2659001085866772, "learning_rate": 5.223535730954673e-06, "loss": 0.4959, "step": 6171 }, { "epoch": 0.5, "grad_norm": 3.130300513542728, "learning_rate": 5.222221683925138e-06, "loss": 0.7962, "step": 6172 }, { "epoch": 0.5, "grad_norm": 4.1538063729001005, "learning_rate": 5.220907621516461e-06, "loss": 0.7712, "step": 6173 }, { "epoch": 0.5, "grad_norm": 5.5564267521277735, "learning_rate": 5.219593543819587e-06, "loss": 0.7416, "step": 6174 }, { "epoch": 0.5, "grad_norm": 3.6650094529909194, "learning_rate": 5.218279450925458e-06, "loss": 0.7192, "step": 6175 }, { "epoch": 0.5, "grad_norm": 3.8478887337676944, "learning_rate": 5.216965342925017e-06, "loss": 0.6952, "step": 6176 }, { "epoch": 0.5, "grad_norm": 2.5226291278007675, "learning_rate": 5.215651219909208e-06, "loss": 0.7435, "step": 6177 }, { "epoch": 0.5, "grad_norm": 2.413143222383419, "learning_rate": 5.2143370819689756e-06, "loss": 0.5118, "step": 6178 }, { "epoch": 0.5, "grad_norm": 2.8962255050817145, "learning_rate": 5.213022929195267e-06, "loss": 0.7005, "step": 6179 }, { "epoch": 0.5, "grad_norm": 3.71708012665215, "learning_rate": 5.211708761679031e-06, "loss": 0.673, "step": 6180 }, { "epoch": 0.5, "grad_norm": 3.713636318009029, "learning_rate": 5.210394579511217e-06, "loss": 0.7289, "step": 6181 }, { "epoch": 0.5, "grad_norm": 3.283584018638852, "learning_rate": 5.209080382782772e-06, "loss": 0.7526, "step": 6182 }, { "epoch": 0.5, "grad_norm": 6.014019005529718, "learning_rate": 5.207766171584648e-06, "loss": 0.5562, "step": 6183 }, { "epoch": 0.5, "grad_norm": 4.993489054799933, "learning_rate": 5.206451946007797e-06, "loss": 0.7209, "step": 6184 }, { "epoch": 0.5, "grad_norm": 5.179498948287335, "learning_rate": 5.205137706143172e-06, "loss": 0.6716, "step": 6185 }, { "epoch": 0.5, "grad_norm": 3.628828681521246, "learning_rate": 5.203823452081725e-06, "loss": 0.6323, "step": 6186 }, { "epoch": 0.5, "grad_norm": 2.814430006658283, "learning_rate": 5.2025091839144124e-06, "loss": 0.6728, "step": 6187 }, { "epoch": 0.5, "grad_norm": 3.6623186216338, "learning_rate": 5.201194901732189e-06, "loss": 0.835, "step": 6188 }, { "epoch": 0.5, "grad_norm": 4.135063668002361, "learning_rate": 5.1998806056260105e-06, "loss": 0.6375, "step": 6189 }, { "epoch": 0.5, "grad_norm": 2.6770162327181617, "learning_rate": 5.198566295686837e-06, "loss": 0.5926, "step": 6190 }, { "epoch": 0.5, "grad_norm": 3.9819103933181212, "learning_rate": 5.197251972005626e-06, "loss": 0.8081, "step": 6191 }, { "epoch": 0.5, "grad_norm": 2.6998838438191624, "learning_rate": 5.195937634673336e-06, "loss": 0.7073, "step": 6192 }, { "epoch": 0.5, "grad_norm": 3.370591963206708, "learning_rate": 5.194623283780927e-06, "loss": 0.7051, "step": 6193 }, { "epoch": 0.5, "grad_norm": 4.410232433475372, "learning_rate": 5.193308919419363e-06, "loss": 0.6879, "step": 6194 }, { "epoch": 0.5, "grad_norm": 4.494584808257606, "learning_rate": 5.191994541679603e-06, "loss": 0.5908, "step": 6195 }, { "epoch": 0.5, "grad_norm": 8.333774246184413, "learning_rate": 5.190680150652613e-06, "loss": 0.7648, "step": 6196 }, { "epoch": 0.5, "grad_norm": 9.617380507683823, "learning_rate": 5.189365746429356e-06, "loss": 0.6442, "step": 6197 }, { "epoch": 0.5, "grad_norm": 3.477191914952608, "learning_rate": 5.188051329100795e-06, "loss": 0.824, "step": 6198 }, { "epoch": 0.5, "grad_norm": 4.631588516045174, "learning_rate": 5.186736898757899e-06, "loss": 0.8145, "step": 6199 }, { "epoch": 0.5, "grad_norm": 4.130013253083948, "learning_rate": 5.185422455491636e-06, "loss": 0.5547, "step": 6200 }, { "epoch": 0.5, "grad_norm": 1.9784063822333422, "learning_rate": 5.18410799939297e-06, "loss": 0.6744, "step": 6201 }, { "epoch": 0.5, "grad_norm": 4.736033281213874, "learning_rate": 5.18279353055287e-06, "loss": 0.6468, "step": 6202 }, { "epoch": 0.5, "grad_norm": 4.6730873520773875, "learning_rate": 5.181479049062307e-06, "loss": 0.6111, "step": 6203 }, { "epoch": 0.5, "grad_norm": 5.533278576854528, "learning_rate": 5.180164555012253e-06, "loss": 0.5452, "step": 6204 }, { "epoch": 0.5, "grad_norm": 3.073174700815744, "learning_rate": 5.178850048493675e-06, "loss": 0.6584, "step": 6205 }, { "epoch": 0.5, "grad_norm": 4.240172084915734, "learning_rate": 5.177535529597548e-06, "loss": 0.8213, "step": 6206 }, { "epoch": 0.5, "grad_norm": 5.34805098711028, "learning_rate": 5.176220998414846e-06, "loss": 0.7171, "step": 6207 }, { "epoch": 0.5, "grad_norm": 3.1702814071440684, "learning_rate": 5.1749064550365414e-06, "loss": 0.7693, "step": 6208 }, { "epoch": 0.5, "grad_norm": 6.146566509538084, "learning_rate": 5.1735918995536074e-06, "loss": 0.6876, "step": 6209 }, { "epoch": 0.5, "grad_norm": 7.171354286996852, "learning_rate": 5.1722773320570205e-06, "loss": 0.6772, "step": 6210 }, { "epoch": 0.5, "grad_norm": 2.8338016028828097, "learning_rate": 5.1709627526377604e-06, "loss": 0.682, "step": 6211 }, { "epoch": 0.5, "grad_norm": 3.72196710298027, "learning_rate": 5.1696481613867986e-06, "loss": 0.5212, "step": 6212 }, { "epoch": 0.5, "grad_norm": 5.309635970913145, "learning_rate": 5.1683335583951156e-06, "loss": 0.6851, "step": 6213 }, { "epoch": 0.5, "grad_norm": 4.246942302599598, "learning_rate": 5.167018943753692e-06, "loss": 0.6922, "step": 6214 }, { "epoch": 0.5, "grad_norm": 4.3817839419623485, "learning_rate": 5.1657043175535045e-06, "loss": 0.6164, "step": 6215 }, { "epoch": 0.5, "grad_norm": 7.2854991840436645, "learning_rate": 5.164389679885538e-06, "loss": 0.6956, "step": 6216 }, { "epoch": 0.5, "grad_norm": 5.958323988491736, "learning_rate": 5.1630750308407675e-06, "loss": 0.9094, "step": 6217 }, { "epoch": 0.51, "grad_norm": 5.865238076798589, "learning_rate": 5.161760370510178e-06, "loss": 0.7996, "step": 6218 }, { "epoch": 0.51, "grad_norm": 4.393186785392793, "learning_rate": 5.160445698984753e-06, "loss": 0.7125, "step": 6219 }, { "epoch": 0.51, "grad_norm": 7.117320990383577, "learning_rate": 5.159131016355475e-06, "loss": 0.6213, "step": 6220 }, { "epoch": 0.51, "grad_norm": 2.828080090939579, "learning_rate": 5.15781632271333e-06, "loss": 0.591, "step": 6221 }, { "epoch": 0.51, "grad_norm": 4.423128903837038, "learning_rate": 5.156501618149301e-06, "loss": 0.5315, "step": 6222 }, { "epoch": 0.51, "grad_norm": 26.064224173411056, "learning_rate": 5.155186902754375e-06, "loss": 0.7142, "step": 6223 }, { "epoch": 0.51, "grad_norm": 3.795288853269341, "learning_rate": 5.1538721766195375e-06, "loss": 0.7093, "step": 6224 }, { "epoch": 0.51, "grad_norm": 21.411370440548747, "learning_rate": 5.152557439835777e-06, "loss": 0.7201, "step": 6225 }, { "epoch": 0.51, "grad_norm": 3.17625991121986, "learning_rate": 5.1512426924940804e-06, "loss": 0.7568, "step": 6226 }, { "epoch": 0.51, "grad_norm": 3.78978191556089, "learning_rate": 5.149927934685438e-06, "loss": 0.6711, "step": 6227 }, { "epoch": 0.51, "grad_norm": 4.005385430229076, "learning_rate": 5.1486131665008386e-06, "loss": 0.5685, "step": 6228 }, { "epoch": 0.51, "grad_norm": 4.104950089907731, "learning_rate": 5.147298388031271e-06, "loss": 0.5924, "step": 6229 }, { "epoch": 0.51, "grad_norm": 4.13134986606643, "learning_rate": 5.145983599367729e-06, "loss": 0.6834, "step": 6230 }, { "epoch": 0.51, "grad_norm": 3.4496652597245774, "learning_rate": 5.1446688006012015e-06, "loss": 0.598, "step": 6231 }, { "epoch": 0.51, "grad_norm": 11.41152676370373, "learning_rate": 5.1433539918226835e-06, "loss": 0.6215, "step": 6232 }, { "epoch": 0.51, "grad_norm": 4.499029716593304, "learning_rate": 5.142039173123166e-06, "loss": 0.7062, "step": 6233 }, { "epoch": 0.51, "grad_norm": 3.2227682760664496, "learning_rate": 5.140724344593643e-06, "loss": 0.7048, "step": 6234 }, { "epoch": 0.51, "grad_norm": 4.435639172580153, "learning_rate": 5.139409506325109e-06, "loss": 0.641, "step": 6235 }, { "epoch": 0.51, "grad_norm": 2.9178084630426033, "learning_rate": 5.13809465840856e-06, "loss": 0.5248, "step": 6236 }, { "epoch": 0.51, "grad_norm": 3.3631548729024683, "learning_rate": 5.1367798009349915e-06, "loss": 0.8047, "step": 6237 }, { "epoch": 0.51, "grad_norm": 5.136661630483342, "learning_rate": 5.135464933995399e-06, "loss": 0.6818, "step": 6238 }, { "epoch": 0.51, "grad_norm": 8.300619799221906, "learning_rate": 5.134150057680779e-06, "loss": 0.6906, "step": 6239 }, { "epoch": 0.51, "grad_norm": 5.771213019958129, "learning_rate": 5.132835172082132e-06, "loss": 0.6435, "step": 6240 }, { "epoch": 0.51, "grad_norm": 3.7455522290771115, "learning_rate": 5.131520277290455e-06, "loss": 0.6725, "step": 6241 }, { "epoch": 0.51, "grad_norm": 3.4084851599136683, "learning_rate": 5.130205373396745e-06, "loss": 0.6496, "step": 6242 }, { "epoch": 0.51, "grad_norm": 3.257814757400069, "learning_rate": 5.128890460492004e-06, "loss": 0.5872, "step": 6243 }, { "epoch": 0.51, "grad_norm": 3.773971269476287, "learning_rate": 5.127575538667232e-06, "loss": 0.7871, "step": 6244 }, { "epoch": 0.51, "grad_norm": 4.540015770986173, "learning_rate": 5.1262606080134295e-06, "loss": 0.671, "step": 6245 }, { "epoch": 0.51, "grad_norm": 4.2589624754919875, "learning_rate": 5.124945668621597e-06, "loss": 0.7469, "step": 6246 }, { "epoch": 0.51, "grad_norm": 9.719587366494281, "learning_rate": 5.123630720582738e-06, "loss": 0.6163, "step": 6247 }, { "epoch": 0.51, "grad_norm": 5.757925118842129, "learning_rate": 5.122315763987855e-06, "loss": 0.5416, "step": 6248 }, { "epoch": 0.51, "grad_norm": 5.921969573258437, "learning_rate": 5.121000798927951e-06, "loss": 0.794, "step": 6249 }, { "epoch": 0.51, "grad_norm": 48.65652883348092, "learning_rate": 5.11968582549403e-06, "loss": 0.6388, "step": 6250 }, { "epoch": 0.51, "grad_norm": 4.377508895660131, "learning_rate": 5.118370843777095e-06, "loss": 0.747, "step": 6251 }, { "epoch": 0.51, "grad_norm": 4.102649546117135, "learning_rate": 5.117055853868153e-06, "loss": 0.6836, "step": 6252 }, { "epoch": 0.51, "grad_norm": 2.67265722083857, "learning_rate": 5.115740855858209e-06, "loss": 0.78, "step": 6253 }, { "epoch": 0.51, "grad_norm": 3.5039241539403094, "learning_rate": 5.114425849838269e-06, "loss": 0.7522, "step": 6254 }, { "epoch": 0.51, "grad_norm": 9.795063742297692, "learning_rate": 5.11311083589934e-06, "loss": 0.7253, "step": 6255 }, { "epoch": 0.51, "grad_norm": 6.266589275738627, "learning_rate": 5.111795814132429e-06, "loss": 0.6696, "step": 6256 }, { "epoch": 0.51, "grad_norm": 3.599079156741777, "learning_rate": 5.110480784628544e-06, "loss": 0.6816, "step": 6257 }, { "epoch": 0.51, "grad_norm": 6.743036513672567, "learning_rate": 5.109165747478693e-06, "loss": 0.8133, "step": 6258 }, { "epoch": 0.51, "grad_norm": 3.0426494728493623, "learning_rate": 5.107850702773883e-06, "loss": 0.6776, "step": 6259 }, { "epoch": 0.51, "grad_norm": 4.655702174685603, "learning_rate": 5.106535650605128e-06, "loss": 0.7664, "step": 6260 }, { "epoch": 0.51, "grad_norm": 3.589273000531835, "learning_rate": 5.105220591063432e-06, "loss": 0.6402, "step": 6261 }, { "epoch": 0.51, "grad_norm": 3.2786743284274418, "learning_rate": 5.103905524239811e-06, "loss": 0.5992, "step": 6262 }, { "epoch": 0.51, "grad_norm": 3.0283003087999196, "learning_rate": 5.102590450225272e-06, "loss": 0.6872, "step": 6263 }, { "epoch": 0.51, "grad_norm": 4.256850269747036, "learning_rate": 5.10127536911083e-06, "loss": 0.6451, "step": 6264 }, { "epoch": 0.51, "grad_norm": 4.061332231273521, "learning_rate": 5.099960280987494e-06, "loss": 0.5206, "step": 6265 }, { "epoch": 0.51, "grad_norm": 2.4912958002412426, "learning_rate": 5.098645185946276e-06, "loss": 0.7174, "step": 6266 }, { "epoch": 0.51, "grad_norm": 3.1842977055236035, "learning_rate": 5.097330084078191e-06, "loss": 0.7688, "step": 6267 }, { "epoch": 0.51, "grad_norm": 3.891044302903524, "learning_rate": 5.09601497547425e-06, "loss": 0.6511, "step": 6268 }, { "epoch": 0.51, "grad_norm": 3.7727471299723163, "learning_rate": 5.09469986022547e-06, "loss": 0.6068, "step": 6269 }, { "epoch": 0.51, "grad_norm": 5.821934123560957, "learning_rate": 5.093384738422863e-06, "loss": 0.7294, "step": 6270 }, { "epoch": 0.51, "grad_norm": 4.127401262379407, "learning_rate": 5.092069610157443e-06, "loss": 0.6514, "step": 6271 }, { "epoch": 0.51, "grad_norm": 2.8825340599312703, "learning_rate": 5.090754475520226e-06, "loss": 0.7012, "step": 6272 }, { "epoch": 0.51, "grad_norm": 4.643043727319916, "learning_rate": 5.08943933460223e-06, "loss": 0.6178, "step": 6273 }, { "epoch": 0.51, "grad_norm": 3.564448118598244, "learning_rate": 5.088124187494468e-06, "loss": 0.6575, "step": 6274 }, { "epoch": 0.51, "grad_norm": 2.9400452820553657, "learning_rate": 5.086809034287957e-06, "loss": 0.648, "step": 6275 }, { "epoch": 0.51, "grad_norm": 22.39053453718653, "learning_rate": 5.085493875073714e-06, "loss": 0.8311, "step": 6276 }, { "epoch": 0.51, "grad_norm": 4.249848436272152, "learning_rate": 5.084178709942757e-06, "loss": 0.757, "step": 6277 }, { "epoch": 0.51, "grad_norm": 2.535573262284296, "learning_rate": 5.082863538986103e-06, "loss": 0.4822, "step": 6278 }, { "epoch": 0.51, "grad_norm": 11.30432405839932, "learning_rate": 5.0815483622947694e-06, "loss": 0.5438, "step": 6279 }, { "epoch": 0.51, "grad_norm": 3.2550272267903564, "learning_rate": 5.080233179959777e-06, "loss": 0.5133, "step": 6280 }, { "epoch": 0.51, "grad_norm": 9.743716529476114, "learning_rate": 5.078917992072144e-06, "loss": 0.7112, "step": 6281 }, { "epoch": 0.51, "grad_norm": 3.87975482317902, "learning_rate": 5.077602798722888e-06, "loss": 0.6653, "step": 6282 }, { "epoch": 0.51, "grad_norm": 6.35546215040325, "learning_rate": 5.076287600003029e-06, "loss": 0.6723, "step": 6283 }, { "epoch": 0.51, "grad_norm": 5.292099959035826, "learning_rate": 5.074972396003589e-06, "loss": 0.5966, "step": 6284 }, { "epoch": 0.51, "grad_norm": 3.1902300120041627, "learning_rate": 5.073657186815586e-06, "loss": 0.7872, "step": 6285 }, { "epoch": 0.51, "grad_norm": 3.730703839663729, "learning_rate": 5.072341972530043e-06, "loss": 0.6412, "step": 6286 }, { "epoch": 0.51, "grad_norm": 24.789754365733003, "learning_rate": 5.07102675323798e-06, "loss": 0.6573, "step": 6287 }, { "epoch": 0.51, "grad_norm": 5.497567759866287, "learning_rate": 5.069711529030417e-06, "loss": 0.7641, "step": 6288 }, { "epoch": 0.51, "grad_norm": 4.758441457288548, "learning_rate": 5.068396299998379e-06, "loss": 0.5057, "step": 6289 }, { "epoch": 0.51, "grad_norm": 4.9053592970554485, "learning_rate": 5.0670810662328865e-06, "loss": 0.6168, "step": 6290 }, { "epoch": 0.51, "grad_norm": 3.2072430731205124, "learning_rate": 5.06576582782496e-06, "loss": 0.5961, "step": 6291 }, { "epoch": 0.51, "grad_norm": 3.507140223952992, "learning_rate": 5.064450584865624e-06, "loss": 0.6724, "step": 6292 }, { "epoch": 0.51, "grad_norm": 4.161283778576761, "learning_rate": 5.063135337445903e-06, "loss": 0.77, "step": 6293 }, { "epoch": 0.51, "grad_norm": 4.679684785151928, "learning_rate": 5.06182008565682e-06, "loss": 0.5581, "step": 6294 }, { "epoch": 0.51, "grad_norm": 5.334033826764989, "learning_rate": 5.060504829589396e-06, "loss": 0.5484, "step": 6295 }, { "epoch": 0.51, "grad_norm": 3.447596083382297, "learning_rate": 5.059189569334658e-06, "loss": 0.7659, "step": 6296 }, { "epoch": 0.51, "grad_norm": 3.3766614364355494, "learning_rate": 5.0578743049836274e-06, "loss": 0.6621, "step": 6297 }, { "epoch": 0.51, "grad_norm": 3.9154768274359246, "learning_rate": 5.056559036627333e-06, "loss": 0.6543, "step": 6298 }, { "epoch": 0.51, "grad_norm": 4.55916540809735, "learning_rate": 5.055243764356795e-06, "loss": 0.7174, "step": 6299 }, { "epoch": 0.51, "grad_norm": 4.932127337658018, "learning_rate": 5.053928488263043e-06, "loss": 0.7061, "step": 6300 }, { "epoch": 0.51, "grad_norm": 3.8712637985271288, "learning_rate": 5.052613208437098e-06, "loss": 0.5938, "step": 6301 }, { "epoch": 0.51, "grad_norm": 4.222986215479166, "learning_rate": 5.051297924969988e-06, "loss": 0.5944, "step": 6302 }, { "epoch": 0.51, "grad_norm": 8.873162875087925, "learning_rate": 5.04998263795274e-06, "loss": 0.8256, "step": 6303 }, { "epoch": 0.51, "grad_norm": 3.3367976348226374, "learning_rate": 5.048667347476376e-06, "loss": 0.6353, "step": 6304 }, { "epoch": 0.51, "grad_norm": 3.509565432265062, "learning_rate": 5.047352053631928e-06, "loss": 0.756, "step": 6305 }, { "epoch": 0.51, "grad_norm": 8.346724018565215, "learning_rate": 5.046036756510417e-06, "loss": 0.6907, "step": 6306 }, { "epoch": 0.51, "grad_norm": 12.57514821162449, "learning_rate": 5.0447214562028755e-06, "loss": 0.5992, "step": 6307 }, { "epoch": 0.51, "grad_norm": 3.4033571896682737, "learning_rate": 5.043406152800325e-06, "loss": 0.6546, "step": 6308 }, { "epoch": 0.51, "grad_norm": 2.7816531642011593, "learning_rate": 5.042090846393797e-06, "loss": 0.6608, "step": 6309 }, { "epoch": 0.51, "grad_norm": 2.6417985658465364, "learning_rate": 5.040775537074318e-06, "loss": 0.772, "step": 6310 }, { "epoch": 0.51, "grad_norm": 3.0714143437152384, "learning_rate": 5.039460224932913e-06, "loss": 0.7309, "step": 6311 }, { "epoch": 0.51, "grad_norm": 2.467102201591127, "learning_rate": 5.0381449100606126e-06, "loss": 0.7047, "step": 6312 }, { "epoch": 0.51, "grad_norm": 2.558396459846335, "learning_rate": 5.036829592548446e-06, "loss": 0.709, "step": 6313 }, { "epoch": 0.51, "grad_norm": 2.9211847468439798, "learning_rate": 5.035514272487438e-06, "loss": 0.5507, "step": 6314 }, { "epoch": 0.51, "grad_norm": 5.2102977468633735, "learning_rate": 5.034198949968618e-06, "loss": 0.6036, "step": 6315 }, { "epoch": 0.51, "grad_norm": 9.834354421799684, "learning_rate": 5.032883625083017e-06, "loss": 0.7272, "step": 6316 }, { "epoch": 0.51, "grad_norm": 2.110622541964088, "learning_rate": 5.0315682979216615e-06, "loss": 0.7107, "step": 6317 }, { "epoch": 0.51, "grad_norm": 3.112445919661464, "learning_rate": 5.0302529685755805e-06, "loss": 0.89, "step": 6318 }, { "epoch": 0.51, "grad_norm": 7.241950077241141, "learning_rate": 5.028937637135804e-06, "loss": 0.7031, "step": 6319 }, { "epoch": 0.51, "grad_norm": 3.3302127460637414, "learning_rate": 5.027622303693363e-06, "loss": 0.6483, "step": 6320 }, { "epoch": 0.51, "grad_norm": 5.674644143159602, "learning_rate": 5.026306968339282e-06, "loss": 0.7435, "step": 6321 }, { "epoch": 0.51, "grad_norm": 5.2714012824227146, "learning_rate": 5.024991631164593e-06, "loss": 0.5843, "step": 6322 }, { "epoch": 0.51, "grad_norm": 3.0180074359376716, "learning_rate": 5.023676292260328e-06, "loss": 0.6509, "step": 6323 }, { "epoch": 0.51, "grad_norm": 5.987819418125114, "learning_rate": 5.022360951717512e-06, "loss": 0.6771, "step": 6324 }, { "epoch": 0.51, "grad_norm": 3.321548488585978, "learning_rate": 5.0210456096271775e-06, "loss": 0.707, "step": 6325 }, { "epoch": 0.51, "grad_norm": 4.886664745546878, "learning_rate": 5.0197302660803545e-06, "loss": 0.7559, "step": 6326 }, { "epoch": 0.51, "grad_norm": 6.368649976490519, "learning_rate": 5.018414921168075e-06, "loss": 0.7171, "step": 6327 }, { "epoch": 0.51, "grad_norm": 4.213867885350846, "learning_rate": 5.017099574981366e-06, "loss": 0.7112, "step": 6328 }, { "epoch": 0.51, "grad_norm": 4.647568689024224, "learning_rate": 5.015784227611258e-06, "loss": 0.6629, "step": 6329 }, { "epoch": 0.51, "grad_norm": 3.5510137769903243, "learning_rate": 5.0144688791487825e-06, "loss": 0.7343, "step": 6330 }, { "epoch": 0.51, "grad_norm": 3.5266911442493036, "learning_rate": 5.0131535296849684e-06, "loss": 0.5639, "step": 6331 }, { "epoch": 0.51, "grad_norm": 2.8045393149122053, "learning_rate": 5.011838179310848e-06, "loss": 0.697, "step": 6332 }, { "epoch": 0.51, "grad_norm": 3.139989534629165, "learning_rate": 5.010522828117452e-06, "loss": 0.6541, "step": 6333 }, { "epoch": 0.51, "grad_norm": 3.7592281556841236, "learning_rate": 5.0092074761958085e-06, "loss": 0.6587, "step": 6334 }, { "epoch": 0.51, "grad_norm": 12.511469348877558, "learning_rate": 5.00789212363695e-06, "loss": 0.785, "step": 6335 }, { "epoch": 0.51, "grad_norm": 2.352169308874297, "learning_rate": 5.006576770531907e-06, "loss": 0.4462, "step": 6336 }, { "epoch": 0.51, "grad_norm": 2.3429816339815646, "learning_rate": 5.00526141697171e-06, "loss": 0.5871, "step": 6337 }, { "epoch": 0.51, "grad_norm": 3.7657627080780443, "learning_rate": 5.003946063047393e-06, "loss": 0.76, "step": 6338 }, { "epoch": 0.51, "grad_norm": 4.668506303265283, "learning_rate": 5.002630708849979e-06, "loss": 0.6603, "step": 6339 }, { "epoch": 0.51, "grad_norm": 3.4664322265740766, "learning_rate": 5.001315354470506e-06, "loss": 0.7713, "step": 6340 }, { "epoch": 0.52, "grad_norm": 3.116547238234516, "learning_rate": 5e-06, "loss": 0.6457, "step": 6341 }, { "epoch": 0.52, "grad_norm": 4.46212173527785, "learning_rate": 4.998684645529496e-06, "loss": 0.6369, "step": 6342 }, { "epoch": 0.52, "grad_norm": 4.526732114218656, "learning_rate": 4.997369291150021e-06, "loss": 0.6381, "step": 6343 }, { "epoch": 0.52, "grad_norm": 4.4148994157716395, "learning_rate": 4.99605393695261e-06, "loss": 0.7904, "step": 6344 }, { "epoch": 0.52, "grad_norm": 4.0051790405532515, "learning_rate": 4.994738583028291e-06, "loss": 0.6149, "step": 6345 }, { "epoch": 0.52, "grad_norm": 6.050139412075392, "learning_rate": 4.993423229468094e-06, "loss": 0.7763, "step": 6346 }, { "epoch": 0.52, "grad_norm": 3.0181206903472972, "learning_rate": 4.992107876363051e-06, "loss": 0.5243, "step": 6347 }, { "epoch": 0.52, "grad_norm": 3.0108521215138953, "learning_rate": 4.990792523804192e-06, "loss": 0.7942, "step": 6348 }, { "epoch": 0.52, "grad_norm": 3.103117966247781, "learning_rate": 4.989477171882549e-06, "loss": 0.5996, "step": 6349 }, { "epoch": 0.52, "grad_norm": 3.270549363845947, "learning_rate": 4.988161820689152e-06, "loss": 0.6478, "step": 6350 }, { "epoch": 0.52, "grad_norm": 3.6141796552617613, "learning_rate": 4.986846470315033e-06, "loss": 0.7283, "step": 6351 }, { "epoch": 0.52, "grad_norm": 3.796486621535746, "learning_rate": 4.98553112085122e-06, "loss": 0.7291, "step": 6352 }, { "epoch": 0.52, "grad_norm": 10.503942766991463, "learning_rate": 4.984215772388744e-06, "loss": 0.6979, "step": 6353 }, { "epoch": 0.52, "grad_norm": 4.266974698845338, "learning_rate": 4.982900425018637e-06, "loss": 0.8384, "step": 6354 }, { "epoch": 0.52, "grad_norm": 2.280078937194067, "learning_rate": 4.981585078831926e-06, "loss": 0.6545, "step": 6355 }, { "epoch": 0.52, "grad_norm": 2.9209362279952815, "learning_rate": 4.980269733919645e-06, "loss": 0.666, "step": 6356 }, { "epoch": 0.52, "grad_norm": 5.390739875520528, "learning_rate": 4.9789543903728224e-06, "loss": 0.6941, "step": 6357 }, { "epoch": 0.52, "grad_norm": 3.251344286999029, "learning_rate": 4.97763904828249e-06, "loss": 0.6321, "step": 6358 }, { "epoch": 0.52, "grad_norm": 3.964415277973606, "learning_rate": 4.976323707739675e-06, "loss": 0.8115, "step": 6359 }, { "epoch": 0.52, "grad_norm": 2.8433745535799333, "learning_rate": 4.975008368835408e-06, "loss": 0.6591, "step": 6360 }, { "epoch": 0.52, "grad_norm": 7.689166018373095, "learning_rate": 4.973693031660719e-06, "loss": 0.8147, "step": 6361 }, { "epoch": 0.52, "grad_norm": 10.078192333247861, "learning_rate": 4.972377696306639e-06, "loss": 0.6828, "step": 6362 }, { "epoch": 0.52, "grad_norm": 15.572383369901102, "learning_rate": 4.971062362864196e-06, "loss": 0.6735, "step": 6363 }, { "epoch": 0.52, "grad_norm": 2.8575537611868427, "learning_rate": 4.969747031424419e-06, "loss": 0.6287, "step": 6364 }, { "epoch": 0.52, "grad_norm": 2.806013827226555, "learning_rate": 4.968431702078341e-06, "loss": 0.7296, "step": 6365 }, { "epoch": 0.52, "grad_norm": 3.6298434244722624, "learning_rate": 4.967116374916985e-06, "loss": 0.7312, "step": 6366 }, { "epoch": 0.52, "grad_norm": 8.387068702084253, "learning_rate": 4.965801050031383e-06, "loss": 0.79, "step": 6367 }, { "epoch": 0.52, "grad_norm": 3.6170534497596574, "learning_rate": 4.9644857275125634e-06, "loss": 0.655, "step": 6368 }, { "epoch": 0.52, "grad_norm": 6.969601269120004, "learning_rate": 4.963170407451556e-06, "loss": 0.5889, "step": 6369 }, { "epoch": 0.52, "grad_norm": 3.3148919557115883, "learning_rate": 4.961855089939388e-06, "loss": 0.7272, "step": 6370 }, { "epoch": 0.52, "grad_norm": 3.69393805622033, "learning_rate": 4.960539775067089e-06, "loss": 0.6436, "step": 6371 }, { "epoch": 0.52, "grad_norm": 3.4170437513463603, "learning_rate": 4.959224462925685e-06, "loss": 0.6985, "step": 6372 }, { "epoch": 0.52, "grad_norm": 2.8674547831501527, "learning_rate": 4.9579091536062054e-06, "loss": 0.6502, "step": 6373 }, { "epoch": 0.52, "grad_norm": 4.931877187777162, "learning_rate": 4.956593847199676e-06, "loss": 0.6063, "step": 6374 }, { "epoch": 0.52, "grad_norm": 3.9731306731202833, "learning_rate": 4.955278543797126e-06, "loss": 0.6997, "step": 6375 }, { "epoch": 0.52, "grad_norm": 4.783354846569904, "learning_rate": 4.953963243489583e-06, "loss": 0.7188, "step": 6376 }, { "epoch": 0.52, "grad_norm": 16.599637167867275, "learning_rate": 4.952647946368074e-06, "loss": 0.7386, "step": 6377 }, { "epoch": 0.52, "grad_norm": 4.032903713261169, "learning_rate": 4.951332652523625e-06, "loss": 0.7051, "step": 6378 }, { "epoch": 0.52, "grad_norm": 3.791574009322771, "learning_rate": 4.950017362047264e-06, "loss": 0.6941, "step": 6379 }, { "epoch": 0.52, "grad_norm": 5.919900037831575, "learning_rate": 4.948702075030014e-06, "loss": 0.7167, "step": 6380 }, { "epoch": 0.52, "grad_norm": 3.1701979735786656, "learning_rate": 4.947386791562904e-06, "loss": 0.6915, "step": 6381 }, { "epoch": 0.52, "grad_norm": 3.864771205304822, "learning_rate": 4.946071511736959e-06, "loss": 0.6785, "step": 6382 }, { "epoch": 0.52, "grad_norm": 5.768537577843789, "learning_rate": 4.944756235643205e-06, "loss": 0.6965, "step": 6383 }, { "epoch": 0.52, "grad_norm": 4.594881279617763, "learning_rate": 4.943440963372668e-06, "loss": 0.6001, "step": 6384 }, { "epoch": 0.52, "grad_norm": 4.7638653217958105, "learning_rate": 4.942125695016373e-06, "loss": 0.6453, "step": 6385 }, { "epoch": 0.52, "grad_norm": 3.828066138044884, "learning_rate": 4.940810430665344e-06, "loss": 0.5966, "step": 6386 }, { "epoch": 0.52, "grad_norm": 5.59429964228511, "learning_rate": 4.939495170410606e-06, "loss": 0.7866, "step": 6387 }, { "epoch": 0.52, "grad_norm": 2.3543983207238046, "learning_rate": 4.9381799143431815e-06, "loss": 0.8467, "step": 6388 }, { "epoch": 0.52, "grad_norm": 5.125080758660041, "learning_rate": 4.936864662554098e-06, "loss": 0.6249, "step": 6389 }, { "epoch": 0.52, "grad_norm": 3.6568281998277676, "learning_rate": 4.935549415134376e-06, "loss": 0.8075, "step": 6390 }, { "epoch": 0.52, "grad_norm": 12.579054924178276, "learning_rate": 4.934234172175043e-06, "loss": 0.6585, "step": 6391 }, { "epoch": 0.52, "grad_norm": 2.3209165248906003, "learning_rate": 4.932918933767116e-06, "loss": 0.6215, "step": 6392 }, { "epoch": 0.52, "grad_norm": 3.676653675140861, "learning_rate": 4.931603700001623e-06, "loss": 0.6438, "step": 6393 }, { "epoch": 0.52, "grad_norm": 2.73884537323697, "learning_rate": 4.930288470969584e-06, "loss": 0.7231, "step": 6394 }, { "epoch": 0.52, "grad_norm": 3.9639053366386086, "learning_rate": 4.928973246762022e-06, "loss": 0.6294, "step": 6395 }, { "epoch": 0.52, "grad_norm": 21.35458625668591, "learning_rate": 4.927658027469958e-06, "loss": 0.5958, "step": 6396 }, { "epoch": 0.52, "grad_norm": 2.725073808178824, "learning_rate": 4.926342813184413e-06, "loss": 0.6966, "step": 6397 }, { "epoch": 0.52, "grad_norm": 4.532701722618901, "learning_rate": 4.925027603996414e-06, "loss": 0.7644, "step": 6398 }, { "epoch": 0.52, "grad_norm": 3.827771147301602, "learning_rate": 4.923712399996972e-06, "loss": 0.6024, "step": 6399 }, { "epoch": 0.52, "grad_norm": 2.9825806708957887, "learning_rate": 4.922397201277114e-06, "loss": 0.5417, "step": 6400 }, { "epoch": 0.52, "grad_norm": 2.67740885081383, "learning_rate": 4.921082007927857e-06, "loss": 0.7419, "step": 6401 }, { "epoch": 0.52, "grad_norm": 3.4917724523278055, "learning_rate": 4.919766820040224e-06, "loss": 0.642, "step": 6402 }, { "epoch": 0.52, "grad_norm": 2.5674161970507985, "learning_rate": 4.9184516377052305e-06, "loss": 0.6026, "step": 6403 }, { "epoch": 0.52, "grad_norm": 4.725729591305165, "learning_rate": 4.9171364610139e-06, "loss": 0.7535, "step": 6404 }, { "epoch": 0.52, "grad_norm": 2.948880072751482, "learning_rate": 4.915821290057245e-06, "loss": 0.7515, "step": 6405 }, { "epoch": 0.52, "grad_norm": 6.568096667210566, "learning_rate": 4.914506124926288e-06, "loss": 0.6559, "step": 6406 }, { "epoch": 0.52, "grad_norm": 2.6501606402728517, "learning_rate": 4.913190965712045e-06, "loss": 0.6338, "step": 6407 }, { "epoch": 0.52, "grad_norm": 6.846070694646515, "learning_rate": 4.911875812505533e-06, "loss": 0.668, "step": 6408 }, { "epoch": 0.52, "grad_norm": 9.501851312125194, "learning_rate": 4.910560665397772e-06, "loss": 0.7569, "step": 6409 }, { "epoch": 0.52, "grad_norm": 3.5569555940736572, "learning_rate": 4.909245524479774e-06, "loss": 0.7844, "step": 6410 }, { "epoch": 0.52, "grad_norm": 3.2054358423086287, "learning_rate": 4.907930389842558e-06, "loss": 0.6693, "step": 6411 }, { "epoch": 0.52, "grad_norm": 8.330526161666796, "learning_rate": 4.906615261577139e-06, "loss": 0.6321, "step": 6412 }, { "epoch": 0.52, "grad_norm": 2.2449889419161257, "learning_rate": 4.905300139774532e-06, "loss": 0.7846, "step": 6413 }, { "epoch": 0.52, "grad_norm": 3.0590300277771174, "learning_rate": 4.903985024525751e-06, "loss": 0.6838, "step": 6414 }, { "epoch": 0.52, "grad_norm": 2.9617294160582412, "learning_rate": 4.90266991592181e-06, "loss": 0.7916, "step": 6415 }, { "epoch": 0.52, "grad_norm": 4.442971007173976, "learning_rate": 4.901354814053724e-06, "loss": 0.7019, "step": 6416 }, { "epoch": 0.52, "grad_norm": 4.203682970791329, "learning_rate": 4.9000397190125076e-06, "loss": 0.6891, "step": 6417 }, { "epoch": 0.52, "grad_norm": 3.302478574245904, "learning_rate": 4.898724630889172e-06, "loss": 0.5293, "step": 6418 }, { "epoch": 0.52, "grad_norm": 2.7789041411074393, "learning_rate": 4.897409549774729e-06, "loss": 0.6709, "step": 6419 }, { "epoch": 0.52, "grad_norm": 3.645925886226135, "learning_rate": 4.896094475760191e-06, "loss": 0.7505, "step": 6420 }, { "epoch": 0.52, "grad_norm": 3.1945467846235687, "learning_rate": 4.8947794089365685e-06, "loss": 0.7228, "step": 6421 }, { "epoch": 0.52, "grad_norm": 4.271069117326643, "learning_rate": 4.893464349394874e-06, "loss": 0.7548, "step": 6422 }, { "epoch": 0.52, "grad_norm": 4.694655446506869, "learning_rate": 4.892149297226118e-06, "loss": 0.5366, "step": 6423 }, { "epoch": 0.52, "grad_norm": 11.364478738264422, "learning_rate": 4.890834252521311e-06, "loss": 0.7314, "step": 6424 }, { "epoch": 0.52, "grad_norm": 6.510783391621584, "learning_rate": 4.889519215371458e-06, "loss": 0.638, "step": 6425 }, { "epoch": 0.52, "grad_norm": 3.2258567334754615, "learning_rate": 4.888204185867572e-06, "loss": 0.5635, "step": 6426 }, { "epoch": 0.52, "grad_norm": 3.224529721452054, "learning_rate": 4.886889164100661e-06, "loss": 0.6794, "step": 6427 }, { "epoch": 0.52, "grad_norm": 3.362427928638376, "learning_rate": 4.885574150161732e-06, "loss": 0.724, "step": 6428 }, { "epoch": 0.52, "grad_norm": 4.7204437395199355, "learning_rate": 4.884259144141792e-06, "loss": 0.6561, "step": 6429 }, { "epoch": 0.52, "grad_norm": 2.5394195204765944, "learning_rate": 4.882944146131848e-06, "loss": 0.7797, "step": 6430 }, { "epoch": 0.52, "grad_norm": 6.980790558032294, "learning_rate": 4.881629156222907e-06, "loss": 0.556, "step": 6431 }, { "epoch": 0.52, "grad_norm": 2.99893173919464, "learning_rate": 4.880314174505972e-06, "loss": 0.6949, "step": 6432 }, { "epoch": 0.52, "grad_norm": 2.9951376458742023, "learning_rate": 4.8789992010720505e-06, "loss": 0.766, "step": 6433 }, { "epoch": 0.52, "grad_norm": 2.1760280912025802, "learning_rate": 4.877684236012147e-06, "loss": 0.6768, "step": 6434 }, { "epoch": 0.52, "grad_norm": 2.4190528921361367, "learning_rate": 4.876369279417263e-06, "loss": 0.6981, "step": 6435 }, { "epoch": 0.52, "grad_norm": 4.195995133865271, "learning_rate": 4.875054331378404e-06, "loss": 0.7759, "step": 6436 }, { "epoch": 0.52, "grad_norm": 4.798954939981184, "learning_rate": 4.873739391986571e-06, "loss": 0.6341, "step": 6437 }, { "epoch": 0.52, "grad_norm": 2.822775238507823, "learning_rate": 4.87242446133277e-06, "loss": 0.6786, "step": 6438 }, { "epoch": 0.52, "grad_norm": 3.200575302817755, "learning_rate": 4.871109539507998e-06, "loss": 0.6178, "step": 6439 }, { "epoch": 0.52, "grad_norm": 3.0001511396021154, "learning_rate": 4.869794626603256e-06, "loss": 0.7569, "step": 6440 }, { "epoch": 0.52, "grad_norm": 3.8045909348439313, "learning_rate": 4.868479722709547e-06, "loss": 0.6831, "step": 6441 }, { "epoch": 0.52, "grad_norm": 2.5401181823660854, "learning_rate": 4.86716482791787e-06, "loss": 0.596, "step": 6442 }, { "epoch": 0.52, "grad_norm": 6.692407526298747, "learning_rate": 4.8658499423192215e-06, "loss": 0.676, "step": 6443 }, { "epoch": 0.52, "grad_norm": 8.034122487007481, "learning_rate": 4.864535066004604e-06, "loss": 0.723, "step": 6444 }, { "epoch": 0.52, "grad_norm": 2.8709890589797684, "learning_rate": 4.863220199065011e-06, "loss": 0.8244, "step": 6445 }, { "epoch": 0.52, "grad_norm": 3.1541197174179763, "learning_rate": 4.861905341591442e-06, "loss": 0.6012, "step": 6446 }, { "epoch": 0.52, "grad_norm": 2.541868841247612, "learning_rate": 4.860590493674892e-06, "loss": 0.7638, "step": 6447 }, { "epoch": 0.52, "grad_norm": 2.2875331255066396, "learning_rate": 4.859275655406358e-06, "loss": 0.7176, "step": 6448 }, { "epoch": 0.52, "grad_norm": 3.389296275858009, "learning_rate": 4.857960826876835e-06, "loss": 0.7971, "step": 6449 }, { "epoch": 0.52, "grad_norm": 7.608012479350219, "learning_rate": 4.856646008177318e-06, "loss": 0.6686, "step": 6450 }, { "epoch": 0.52, "grad_norm": 7.845934367671523, "learning_rate": 4.855331199398799e-06, "loss": 0.6883, "step": 6451 }, { "epoch": 0.52, "grad_norm": 4.324055416673929, "learning_rate": 4.8540164006322735e-06, "loss": 0.6225, "step": 6452 }, { "epoch": 0.52, "grad_norm": 4.999141946174273, "learning_rate": 4.8527016119687306e-06, "loss": 0.76, "step": 6453 }, { "epoch": 0.52, "grad_norm": 3.2953791511615615, "learning_rate": 4.851386833499163e-06, "loss": 0.6914, "step": 6454 }, { "epoch": 0.52, "grad_norm": 5.370327795119699, "learning_rate": 4.850072065314563e-06, "loss": 0.8209, "step": 6455 }, { "epoch": 0.52, "grad_norm": 2.9202756624324695, "learning_rate": 4.8487573075059195e-06, "loss": 0.8148, "step": 6456 }, { "epoch": 0.52, "grad_norm": 3.019099815294026, "learning_rate": 4.847442560164226e-06, "loss": 0.6801, "step": 6457 }, { "epoch": 0.52, "grad_norm": 3.139574476738395, "learning_rate": 4.846127823380464e-06, "loss": 0.6225, "step": 6458 }, { "epoch": 0.52, "grad_norm": 7.569503691887198, "learning_rate": 4.844813097245628e-06, "loss": 0.6775, "step": 6459 }, { "epoch": 0.52, "grad_norm": 4.819630963707262, "learning_rate": 4.843498381850701e-06, "loss": 0.7349, "step": 6460 }, { "epoch": 0.52, "grad_norm": 4.241141261829069, "learning_rate": 4.842183677286671e-06, "loss": 0.7548, "step": 6461 }, { "epoch": 0.52, "grad_norm": 2.456007749685355, "learning_rate": 4.840868983644525e-06, "loss": 0.6758, "step": 6462 }, { "epoch": 0.52, "grad_norm": 4.538681786602927, "learning_rate": 4.839554301015247e-06, "loss": 0.6927, "step": 6463 }, { "epoch": 0.53, "grad_norm": 3.4558697123770146, "learning_rate": 4.838239629489824e-06, "loss": 0.6596, "step": 6464 }, { "epoch": 0.53, "grad_norm": 25.283376049394857, "learning_rate": 4.836924969159234e-06, "loss": 0.5828, "step": 6465 }, { "epoch": 0.53, "grad_norm": 5.535437586532406, "learning_rate": 4.835610320114465e-06, "loss": 0.9513, "step": 6466 }, { "epoch": 0.53, "grad_norm": 3.4836105477808608, "learning_rate": 4.834295682446496e-06, "loss": 0.7487, "step": 6467 }, { "epoch": 0.53, "grad_norm": 14.654930251043227, "learning_rate": 4.83298105624631e-06, "loss": 0.5394, "step": 6468 }, { "epoch": 0.53, "grad_norm": 2.822968386689163, "learning_rate": 4.831666441604884e-06, "loss": 0.7181, "step": 6469 }, { "epoch": 0.53, "grad_norm": 2.9269271026148456, "learning_rate": 4.830351838613202e-06, "loss": 0.7548, "step": 6470 }, { "epoch": 0.53, "grad_norm": 4.765919535555958, "learning_rate": 4.829037247362243e-06, "loss": 0.6214, "step": 6471 }, { "epoch": 0.53, "grad_norm": 3.09388891730253, "learning_rate": 4.82772266794298e-06, "loss": 0.6693, "step": 6472 }, { "epoch": 0.53, "grad_norm": 3.44704271006444, "learning_rate": 4.826408100446393e-06, "loss": 0.7739, "step": 6473 }, { "epoch": 0.53, "grad_norm": 21.55790530469095, "learning_rate": 4.82509354496346e-06, "loss": 0.7431, "step": 6474 }, { "epoch": 0.53, "grad_norm": 2.912877650204532, "learning_rate": 4.823779001585155e-06, "loss": 0.7069, "step": 6475 }, { "epoch": 0.53, "grad_norm": 2.766901036719601, "learning_rate": 4.822464470402452e-06, "loss": 0.5794, "step": 6476 }, { "epoch": 0.53, "grad_norm": 3.1699762502755635, "learning_rate": 4.821149951506327e-06, "loss": 0.7737, "step": 6477 }, { "epoch": 0.53, "grad_norm": 2.714422906616694, "learning_rate": 4.81983544498775e-06, "loss": 0.7907, "step": 6478 }, { "epoch": 0.53, "grad_norm": 3.2432875009933215, "learning_rate": 4.818520950937694e-06, "loss": 0.7728, "step": 6479 }, { "epoch": 0.53, "grad_norm": 6.556282142958906, "learning_rate": 4.817206469447132e-06, "loss": 0.5339, "step": 6480 }, { "epoch": 0.53, "grad_norm": 2.2193288163407465, "learning_rate": 4.815892000607032e-06, "loss": 0.7085, "step": 6481 }, { "epoch": 0.53, "grad_norm": 4.666085027293924, "learning_rate": 4.814577544508367e-06, "loss": 0.7162, "step": 6482 }, { "epoch": 0.53, "grad_norm": 8.412608825562815, "learning_rate": 4.813263101242101e-06, "loss": 0.6844, "step": 6483 }, { "epoch": 0.53, "grad_norm": 3.160818637727029, "learning_rate": 4.811948670899207e-06, "loss": 0.6893, "step": 6484 }, { "epoch": 0.53, "grad_norm": 2.5908341108149537, "learning_rate": 4.810634253570647e-06, "loss": 0.6023, "step": 6485 }, { "epoch": 0.53, "grad_norm": 5.646059836988076, "learning_rate": 4.8093198493473896e-06, "loss": 0.8402, "step": 6486 }, { "epoch": 0.53, "grad_norm": 3.01627079388889, "learning_rate": 4.8080054583203975e-06, "loss": 0.6908, "step": 6487 }, { "epoch": 0.53, "grad_norm": 2.444250384858454, "learning_rate": 4.8066910805806384e-06, "loss": 0.6558, "step": 6488 }, { "epoch": 0.53, "grad_norm": 3.264325368048423, "learning_rate": 4.805376716219073e-06, "loss": 0.6705, "step": 6489 }, { "epoch": 0.53, "grad_norm": 8.61116645762462, "learning_rate": 4.804062365326665e-06, "loss": 0.6051, "step": 6490 }, { "epoch": 0.53, "grad_norm": 3.7697763690659403, "learning_rate": 4.802748027994376e-06, "loss": 0.6951, "step": 6491 }, { "epoch": 0.53, "grad_norm": 4.9782880694223595, "learning_rate": 4.801433704313164e-06, "loss": 0.6406, "step": 6492 }, { "epoch": 0.53, "grad_norm": 2.9918777949675697, "learning_rate": 4.80011939437399e-06, "loss": 0.7993, "step": 6493 }, { "epoch": 0.53, "grad_norm": 2.1607958375086382, "learning_rate": 4.7988050982678125e-06, "loss": 0.7583, "step": 6494 }, { "epoch": 0.53, "grad_norm": 2.9358014731111868, "learning_rate": 4.797490816085588e-06, "loss": 0.5487, "step": 6495 }, { "epoch": 0.53, "grad_norm": 2.6443722327049284, "learning_rate": 4.796176547918276e-06, "loss": 0.6268, "step": 6496 }, { "epoch": 0.53, "grad_norm": 3.5942155918658782, "learning_rate": 4.7948622938568305e-06, "loss": 0.6101, "step": 6497 }, { "epoch": 0.53, "grad_norm": 7.243264155984004, "learning_rate": 4.793548053992205e-06, "loss": 0.5971, "step": 6498 }, { "epoch": 0.53, "grad_norm": 3.686805890154354, "learning_rate": 4.792233828415353e-06, "loss": 0.6365, "step": 6499 }, { "epoch": 0.53, "grad_norm": 3.018733051877643, "learning_rate": 4.79091961721723e-06, "loss": 0.7762, "step": 6500 }, { "epoch": 0.53, "grad_norm": 3.05475750340497, "learning_rate": 4.789605420488785e-06, "loss": 0.7222, "step": 6501 }, { "epoch": 0.53, "grad_norm": 2.725972636727803, "learning_rate": 4.78829123832097e-06, "loss": 0.6777, "step": 6502 }, { "epoch": 0.53, "grad_norm": 4.0479363527494066, "learning_rate": 4.786977070804733e-06, "loss": 0.7731, "step": 6503 }, { "epoch": 0.53, "grad_norm": 4.378851954138839, "learning_rate": 4.785662918031027e-06, "loss": 0.6143, "step": 6504 }, { "epoch": 0.53, "grad_norm": 5.478974780435422, "learning_rate": 4.784348780090795e-06, "loss": 0.6099, "step": 6505 }, { "epoch": 0.53, "grad_norm": 2.836956082129708, "learning_rate": 4.783034657074985e-06, "loss": 0.6748, "step": 6506 }, { "epoch": 0.53, "grad_norm": 2.1393955414564694, "learning_rate": 4.781720549074543e-06, "loss": 0.6672, "step": 6507 }, { "epoch": 0.53, "grad_norm": 6.353242854951079, "learning_rate": 4.7804064561804135e-06, "loss": 0.6131, "step": 6508 }, { "epoch": 0.53, "grad_norm": 4.506960355460957, "learning_rate": 4.779092378483539e-06, "loss": 0.8057, "step": 6509 }, { "epoch": 0.53, "grad_norm": 2.542123021068329, "learning_rate": 4.777778316074866e-06, "loss": 0.5761, "step": 6510 }, { "epoch": 0.53, "grad_norm": 2.7918381080145176, "learning_rate": 4.77646426904533e-06, "loss": 0.7661, "step": 6511 }, { "epoch": 0.53, "grad_norm": 3.7901224160605795, "learning_rate": 4.775150237485874e-06, "loss": 0.6958, "step": 6512 }, { "epoch": 0.53, "grad_norm": 5.636691397391061, "learning_rate": 4.773836221487437e-06, "loss": 0.7174, "step": 6513 }, { "epoch": 0.53, "grad_norm": 3.9913918445078567, "learning_rate": 4.772522221140959e-06, "loss": 0.6308, "step": 6514 }, { "epoch": 0.53, "grad_norm": 2.3364584673615125, "learning_rate": 4.7712082365373755e-06, "loss": 0.6052, "step": 6515 }, { "epoch": 0.53, "grad_norm": 4.149080661712527, "learning_rate": 4.769894267767621e-06, "loss": 0.6166, "step": 6516 }, { "epoch": 0.53, "grad_norm": 5.085614011368114, "learning_rate": 4.768580314922635e-06, "loss": 0.7956, "step": 6517 }, { "epoch": 0.53, "grad_norm": 3.05738405009252, "learning_rate": 4.767266378093346e-06, "loss": 0.7635, "step": 6518 }, { "epoch": 0.53, "grad_norm": 3.487911861058963, "learning_rate": 4.765952457370689e-06, "loss": 0.6206, "step": 6519 }, { "epoch": 0.53, "grad_norm": 2.6211609943248395, "learning_rate": 4.7646385528455966e-06, "loss": 0.6759, "step": 6520 }, { "epoch": 0.53, "grad_norm": 2.5451707265201944, "learning_rate": 4.763324664608997e-06, "loss": 0.8192, "step": 6521 }, { "epoch": 0.53, "grad_norm": 3.4016162812318678, "learning_rate": 4.762010792751823e-06, "loss": 0.6082, "step": 6522 }, { "epoch": 0.53, "grad_norm": 2.947345024466726, "learning_rate": 4.760696937364999e-06, "loss": 0.6572, "step": 6523 }, { "epoch": 0.53, "grad_norm": 4.52877042625775, "learning_rate": 4.759383098539454e-06, "loss": 0.6485, "step": 6524 }, { "epoch": 0.53, "grad_norm": 8.272560283063008, "learning_rate": 4.758069276366115e-06, "loss": 0.5812, "step": 6525 }, { "epoch": 0.53, "grad_norm": 3.8984186352634573, "learning_rate": 4.756755470935903e-06, "loss": 0.6979, "step": 6526 }, { "epoch": 0.53, "grad_norm": 8.797019820520363, "learning_rate": 4.755441682339745e-06, "loss": 0.6657, "step": 6527 }, { "epoch": 0.53, "grad_norm": 3.242159009500934, "learning_rate": 4.754127910668562e-06, "loss": 0.8229, "step": 6528 }, { "epoch": 0.53, "grad_norm": 2.198035289935515, "learning_rate": 4.752814156013276e-06, "loss": 0.6008, "step": 6529 }, { "epoch": 0.53, "grad_norm": 4.970999595704256, "learning_rate": 4.751500418464809e-06, "loss": 0.9307, "step": 6530 }, { "epoch": 0.53, "grad_norm": 7.027451664656361, "learning_rate": 4.7501866981140755e-06, "loss": 0.7864, "step": 6531 }, { "epoch": 0.53, "grad_norm": 4.766871976303786, "learning_rate": 4.748872995051996e-06, "loss": 0.5974, "step": 6532 }, { "epoch": 0.53, "grad_norm": 5.036349311241271, "learning_rate": 4.747559309369486e-06, "loss": 0.6499, "step": 6533 }, { "epoch": 0.53, "grad_norm": 3.0628455602518243, "learning_rate": 4.746245641157461e-06, "loss": 0.7212, "step": 6534 }, { "epoch": 0.53, "grad_norm": 3.6850060006118017, "learning_rate": 4.744931990506836e-06, "loss": 0.7501, "step": 6535 }, { "epoch": 0.53, "grad_norm": 2.348277999571458, "learning_rate": 4.743618357508522e-06, "loss": 0.6835, "step": 6536 }, { "epoch": 0.53, "grad_norm": 3.5383264410923703, "learning_rate": 4.742304742253436e-06, "loss": 0.7248, "step": 6537 }, { "epoch": 0.53, "grad_norm": 5.0274065182545815, "learning_rate": 4.740991144832481e-06, "loss": 0.7053, "step": 6538 }, { "epoch": 0.53, "grad_norm": 6.733311242022406, "learning_rate": 4.73967756533657e-06, "loss": 0.7506, "step": 6539 }, { "epoch": 0.53, "grad_norm": 2.4614657840720757, "learning_rate": 4.738364003856611e-06, "loss": 0.7246, "step": 6540 }, { "epoch": 0.53, "grad_norm": 3.984477315347523, "learning_rate": 4.73705046048351e-06, "loss": 0.7069, "step": 6541 }, { "epoch": 0.53, "grad_norm": 9.685004703668936, "learning_rate": 4.735736935308173e-06, "loss": 0.6303, "step": 6542 }, { "epoch": 0.53, "grad_norm": 4.139695765932749, "learning_rate": 4.734423428421504e-06, "loss": 0.7564, "step": 6543 }, { "epoch": 0.53, "grad_norm": 3.3915270732442733, "learning_rate": 4.733109939914407e-06, "loss": 0.6931, "step": 6544 }, { "epoch": 0.53, "grad_norm": 2.7367909329885274, "learning_rate": 4.731796469877781e-06, "loss": 0.5984, "step": 6545 }, { "epoch": 0.53, "grad_norm": 2.4191448143982903, "learning_rate": 4.7304830184025286e-06, "loss": 0.7552, "step": 6546 }, { "epoch": 0.53, "grad_norm": 3.2170464735005817, "learning_rate": 4.729169585579549e-06, "loss": 0.6015, "step": 6547 }, { "epoch": 0.53, "grad_norm": 6.304314599433871, "learning_rate": 4.727856171499738e-06, "loss": 0.5193, "step": 6548 }, { "epoch": 0.53, "grad_norm": 3.4858790673430864, "learning_rate": 4.7265427762539936e-06, "loss": 0.7098, "step": 6549 }, { "epoch": 0.53, "grad_norm": 2.7153585677812964, "learning_rate": 4.725229399933214e-06, "loss": 0.7443, "step": 6550 }, { "epoch": 0.53, "grad_norm": 2.8377986557990593, "learning_rate": 4.723916042628287e-06, "loss": 0.7858, "step": 6551 }, { "epoch": 0.53, "grad_norm": 5.472524227480369, "learning_rate": 4.722602704430108e-06, "loss": 0.5207, "step": 6552 }, { "epoch": 0.53, "grad_norm": 3.2512519256935635, "learning_rate": 4.721289385429569e-06, "loss": 0.8032, "step": 6553 }, { "epoch": 0.53, "grad_norm": 3.151133617184783, "learning_rate": 4.71997608571756e-06, "loss": 0.731, "step": 6554 }, { "epoch": 0.53, "grad_norm": 3.2898644324268327, "learning_rate": 4.71866280538497e-06, "loss": 0.7069, "step": 6555 }, { "epoch": 0.53, "grad_norm": 3.6139589900965827, "learning_rate": 4.717349544522683e-06, "loss": 0.6137, "step": 6556 }, { "epoch": 0.53, "grad_norm": 3.302476407478498, "learning_rate": 4.71603630322159e-06, "loss": 0.8684, "step": 6557 }, { "epoch": 0.53, "grad_norm": 8.656895751033002, "learning_rate": 4.714723081572571e-06, "loss": 0.5347, "step": 6558 }, { "epoch": 0.53, "grad_norm": 3.565183857421903, "learning_rate": 4.71340987966651e-06, "loss": 0.6073, "step": 6559 }, { "epoch": 0.53, "grad_norm": 2.5623534762364346, "learning_rate": 4.7120966975942905e-06, "loss": 0.6609, "step": 6560 }, { "epoch": 0.53, "grad_norm": 6.301720780624858, "learning_rate": 4.710783535446793e-06, "loss": 0.7892, "step": 6561 }, { "epoch": 0.53, "grad_norm": 4.438664267025778, "learning_rate": 4.709470393314896e-06, "loss": 0.6616, "step": 6562 }, { "epoch": 0.53, "grad_norm": 3.927502508577626, "learning_rate": 4.708157271289477e-06, "loss": 0.6009, "step": 6563 }, { "epoch": 0.53, "grad_norm": 3.1289394755063817, "learning_rate": 4.706844169461413e-06, "loss": 0.6883, "step": 6564 }, { "epoch": 0.53, "grad_norm": 7.028052298139913, "learning_rate": 4.705531087921578e-06, "loss": 0.6324, "step": 6565 }, { "epoch": 0.53, "grad_norm": 3.745045886393011, "learning_rate": 4.7042180267608445e-06, "loss": 0.8369, "step": 6566 }, { "epoch": 0.53, "grad_norm": 8.08031999769109, "learning_rate": 4.7029049860700865e-06, "loss": 0.6891, "step": 6567 }, { "epoch": 0.53, "grad_norm": 13.56575456147451, "learning_rate": 4.701591965940174e-06, "loss": 0.6927, "step": 6568 }, { "epoch": 0.53, "grad_norm": 2.9558069060613477, "learning_rate": 4.700278966461977e-06, "loss": 0.7404, "step": 6569 }, { "epoch": 0.53, "grad_norm": 6.039738264820022, "learning_rate": 4.6989659877263636e-06, "loss": 0.6192, "step": 6570 }, { "epoch": 0.53, "grad_norm": 4.63960107421065, "learning_rate": 4.697653029824198e-06, "loss": 0.819, "step": 6571 }, { "epoch": 0.53, "grad_norm": 2.2811115729175806, "learning_rate": 4.696340092846347e-06, "loss": 0.6627, "step": 6572 }, { "epoch": 0.53, "grad_norm": 2.3234269918797867, "learning_rate": 4.695027176883673e-06, "loss": 0.5731, "step": 6573 }, { "epoch": 0.53, "grad_norm": 4.5493983203735295, "learning_rate": 4.693714282027039e-06, "loss": 0.7915, "step": 6574 }, { "epoch": 0.53, "grad_norm": 3.8312371708493145, "learning_rate": 4.692401408367305e-06, "loss": 0.7356, "step": 6575 }, { "epoch": 0.53, "grad_norm": 2.4848385300081026, "learning_rate": 4.69108855599533e-06, "loss": 0.7224, "step": 6576 }, { "epoch": 0.53, "grad_norm": 2.7225411823837553, "learning_rate": 4.689775725001974e-06, "loss": 0.6242, "step": 6577 }, { "epoch": 0.53, "grad_norm": 4.53432021661694, "learning_rate": 4.6884629154780895e-06, "loss": 0.7509, "step": 6578 }, { "epoch": 0.53, "grad_norm": 3.9830787397835494, "learning_rate": 4.6871501275145325e-06, "loss": 0.7708, "step": 6579 }, { "epoch": 0.53, "grad_norm": 3.4815850177678187, "learning_rate": 4.6858373612021575e-06, "loss": 0.6652, "step": 6580 }, { "epoch": 0.53, "grad_norm": 4.464786657250361, "learning_rate": 4.684524616631815e-06, "loss": 0.7656, "step": 6581 }, { "epoch": 0.53, "grad_norm": 4.32132457756276, "learning_rate": 4.683211893894355e-06, "loss": 0.7037, "step": 6582 }, { "epoch": 0.53, "grad_norm": 3.6552625024597822, "learning_rate": 4.681899193080628e-06, "loss": 0.6821, "step": 6583 }, { "epoch": 0.53, "grad_norm": 3.1795217780737413, "learning_rate": 4.680586514281479e-06, "loss": 0.7767, "step": 6584 }, { "epoch": 0.53, "grad_norm": 2.6141349354942442, "learning_rate": 4.679273857587753e-06, "loss": 0.6681, "step": 6585 }, { "epoch": 0.53, "grad_norm": 2.750261797300128, "learning_rate": 4.677961223090297e-06, "loss": 0.7594, "step": 6586 }, { "epoch": 0.53, "grad_norm": 2.5236443826553225, "learning_rate": 4.6766486108799505e-06, "loss": 0.5148, "step": 6587 }, { "epoch": 0.54, "grad_norm": 2.4834648525229737, "learning_rate": 4.6753360210475576e-06, "loss": 0.7451, "step": 6588 }, { "epoch": 0.54, "grad_norm": 4.8551143754188475, "learning_rate": 4.674023453683956e-06, "loss": 0.6633, "step": 6589 }, { "epoch": 0.54, "grad_norm": 2.9600284132004546, "learning_rate": 4.672710908879985e-06, "loss": 0.7555, "step": 6590 }, { "epoch": 0.54, "grad_norm": 3.061178459461667, "learning_rate": 4.671398386726479e-06, "loss": 0.5683, "step": 6591 }, { "epoch": 0.54, "grad_norm": 2.6049533329992016, "learning_rate": 4.670085887314273e-06, "loss": 0.656, "step": 6592 }, { "epoch": 0.54, "grad_norm": 4.965331113963211, "learning_rate": 4.6687734107342005e-06, "loss": 0.7669, "step": 6593 }, { "epoch": 0.54, "grad_norm": 3.8621287924335426, "learning_rate": 4.667460957077094e-06, "loss": 0.6622, "step": 6594 }, { "epoch": 0.54, "grad_norm": 27.5842062742614, "learning_rate": 4.666148526433784e-06, "loss": 0.663, "step": 6595 }, { "epoch": 0.54, "grad_norm": 3.199525539100665, "learning_rate": 4.6648361188950976e-06, "loss": 0.5194, "step": 6596 }, { "epoch": 0.54, "grad_norm": 4.148987522027259, "learning_rate": 4.663523734551863e-06, "loss": 0.7305, "step": 6597 }, { "epoch": 0.54, "grad_norm": 3.052734178415257, "learning_rate": 4.662211373494904e-06, "loss": 0.7156, "step": 6598 }, { "epoch": 0.54, "grad_norm": 4.4085581095274105, "learning_rate": 4.6608990358150444e-06, "loss": 0.6515, "step": 6599 }, { "epoch": 0.54, "grad_norm": 20.84002359512866, "learning_rate": 4.659586721603107e-06, "loss": 0.5971, "step": 6600 }, { "epoch": 0.54, "grad_norm": 8.352703701213258, "learning_rate": 4.658274430949911e-06, "loss": 0.6749, "step": 6601 }, { "epoch": 0.54, "grad_norm": 2.1533922893254513, "learning_rate": 4.656962163946276e-06, "loss": 0.6209, "step": 6602 }, { "epoch": 0.54, "grad_norm": 3.661626394672807, "learning_rate": 4.655649920683022e-06, "loss": 0.8312, "step": 6603 }, { "epoch": 0.54, "grad_norm": 2.788055480383149, "learning_rate": 4.654337701250959e-06, "loss": 0.656, "step": 6604 }, { "epoch": 0.54, "grad_norm": 3.1361905896761866, "learning_rate": 4.6530255057409055e-06, "loss": 0.5552, "step": 6605 }, { "epoch": 0.54, "grad_norm": 2.6181048594116105, "learning_rate": 4.6517133342436695e-06, "loss": 0.7074, "step": 6606 }, { "epoch": 0.54, "grad_norm": 3.2612469084888334, "learning_rate": 4.650401186850064e-06, "loss": 0.7117, "step": 6607 }, { "epoch": 0.54, "grad_norm": 3.015253742433555, "learning_rate": 4.649089063650898e-06, "loss": 0.7986, "step": 6608 }, { "epoch": 0.54, "grad_norm": 4.439618212059267, "learning_rate": 4.6477769647369785e-06, "loss": 0.8132, "step": 6609 }, { "epoch": 0.54, "grad_norm": 4.351726988856648, "learning_rate": 4.646464890199113e-06, "loss": 0.5885, "step": 6610 }, { "epoch": 0.54, "grad_norm": 4.49084677788596, "learning_rate": 4.6451528401281e-06, "loss": 0.7586, "step": 6611 }, { "epoch": 0.54, "grad_norm": 3.1524710157006317, "learning_rate": 4.6438408146147455e-06, "loss": 0.6821, "step": 6612 }, { "epoch": 0.54, "grad_norm": 2.4091197530398873, "learning_rate": 4.6425288137498506e-06, "loss": 0.7515, "step": 6613 }, { "epoch": 0.54, "grad_norm": 4.529629396258881, "learning_rate": 4.641216837624211e-06, "loss": 0.7693, "step": 6614 }, { "epoch": 0.54, "grad_norm": 3.833613464529931, "learning_rate": 4.6399048863286255e-06, "loss": 0.7955, "step": 6615 }, { "epoch": 0.54, "grad_norm": 9.494314889185498, "learning_rate": 4.638592959953889e-06, "loss": 0.6749, "step": 6616 }, { "epoch": 0.54, "grad_norm": 3.864285215898108, "learning_rate": 4.637281058590798e-06, "loss": 0.6933, "step": 6617 }, { "epoch": 0.54, "grad_norm": 2.2967734422358217, "learning_rate": 4.635969182330139e-06, "loss": 0.7839, "step": 6618 }, { "epoch": 0.54, "grad_norm": 2.497374157001241, "learning_rate": 4.634657331262705e-06, "loss": 0.7592, "step": 6619 }, { "epoch": 0.54, "grad_norm": 2.8675188174744712, "learning_rate": 4.633345505479285e-06, "loss": 0.7856, "step": 6620 }, { "epoch": 0.54, "grad_norm": 3.3661663242949436, "learning_rate": 4.632033705070663e-06, "loss": 0.703, "step": 6621 }, { "epoch": 0.54, "grad_norm": 2.90247757707111, "learning_rate": 4.630721930127626e-06, "loss": 0.6747, "step": 6622 }, { "epoch": 0.54, "grad_norm": 6.258881222264688, "learning_rate": 4.62941018074096e-06, "loss": 0.7375, "step": 6623 }, { "epoch": 0.54, "grad_norm": 4.161113073789652, "learning_rate": 4.6280984570014395e-06, "loss": 0.6651, "step": 6624 }, { "epoch": 0.54, "grad_norm": 5.107610590258278, "learning_rate": 4.626786758999847e-06, "loss": 0.6868, "step": 6625 }, { "epoch": 0.54, "grad_norm": 2.704019732371965, "learning_rate": 4.625475086826961e-06, "loss": 0.6422, "step": 6626 }, { "epoch": 0.54, "grad_norm": 6.295053210236223, "learning_rate": 4.624163440573558e-06, "loss": 0.7998, "step": 6627 }, { "epoch": 0.54, "grad_norm": 3.035357126194412, "learning_rate": 4.622851820330412e-06, "loss": 0.784, "step": 6628 }, { "epoch": 0.54, "grad_norm": 4.039459680924603, "learning_rate": 4.6215402261882935e-06, "loss": 0.9122, "step": 6629 }, { "epoch": 0.54, "grad_norm": 3.879646133764714, "learning_rate": 4.620228658237976e-06, "loss": 0.6336, "step": 6630 }, { "epoch": 0.54, "grad_norm": 6.725956182067229, "learning_rate": 4.618917116570225e-06, "loss": 0.5537, "step": 6631 }, { "epoch": 0.54, "grad_norm": 2.7807220355259297, "learning_rate": 4.61760560127581e-06, "loss": 0.7119, "step": 6632 }, { "epoch": 0.54, "grad_norm": 2.599743473528298, "learning_rate": 4.616294112445494e-06, "loss": 0.6429, "step": 6633 }, { "epoch": 0.54, "grad_norm": 2.582296741904413, "learning_rate": 4.614982650170041e-06, "loss": 0.4829, "step": 6634 }, { "epoch": 0.54, "grad_norm": 2.9936761225936768, "learning_rate": 4.613671214540214e-06, "loss": 0.6411, "step": 6635 }, { "epoch": 0.54, "grad_norm": 4.506044775218329, "learning_rate": 4.612359805646773e-06, "loss": 0.7319, "step": 6636 }, { "epoch": 0.54, "grad_norm": 10.230227916452899, "learning_rate": 4.611048423580472e-06, "loss": 0.7302, "step": 6637 }, { "epoch": 0.54, "grad_norm": 2.085715594391407, "learning_rate": 4.609737068432071e-06, "loss": 0.7145, "step": 6638 }, { "epoch": 0.54, "grad_norm": 3.491468739494525, "learning_rate": 4.60842574029232e-06, "loss": 0.6628, "step": 6639 }, { "epoch": 0.54, "grad_norm": 3.0069275413778223, "learning_rate": 4.607114439251974e-06, "loss": 0.5617, "step": 6640 }, { "epoch": 0.54, "grad_norm": 6.664064628507378, "learning_rate": 4.605803165401782e-06, "loss": 0.6376, "step": 6641 }, { "epoch": 0.54, "grad_norm": 7.163062003940158, "learning_rate": 4.604491918832494e-06, "loss": 0.6292, "step": 6642 }, { "epoch": 0.54, "grad_norm": 3.4439607803063033, "learning_rate": 4.603180699634857e-06, "loss": 0.6623, "step": 6643 }, { "epoch": 0.54, "grad_norm": 9.281783096459746, "learning_rate": 4.601869507899612e-06, "loss": 0.7172, "step": 6644 }, { "epoch": 0.54, "grad_norm": 3.379480756170299, "learning_rate": 4.600558343717505e-06, "loss": 0.5549, "step": 6645 }, { "epoch": 0.54, "grad_norm": 3.8004595518943844, "learning_rate": 4.599247207179275e-06, "loss": 0.7758, "step": 6646 }, { "epoch": 0.54, "grad_norm": 3.436069888074509, "learning_rate": 4.597936098375662e-06, "loss": 0.7092, "step": 6647 }, { "epoch": 0.54, "grad_norm": 33.82166900941241, "learning_rate": 4.596625017397401e-06, "loss": 0.689, "step": 6648 }, { "epoch": 0.54, "grad_norm": 4.6902323951782146, "learning_rate": 4.59531396433523e-06, "loss": 0.6742, "step": 6649 }, { "epoch": 0.54, "grad_norm": 3.726946138169601, "learning_rate": 4.594002939279883e-06, "loss": 0.5965, "step": 6650 }, { "epoch": 0.54, "grad_norm": 3.3912624940115204, "learning_rate": 4.592691942322086e-06, "loss": 0.6788, "step": 6651 }, { "epoch": 0.54, "grad_norm": 4.054687979593414, "learning_rate": 4.591380973552571e-06, "loss": 0.6331, "step": 6652 }, { "epoch": 0.54, "grad_norm": 2.7210662681001323, "learning_rate": 4.5900700330620675e-06, "loss": 0.7772, "step": 6653 }, { "epoch": 0.54, "grad_norm": 7.478096001945345, "learning_rate": 4.5887591209412975e-06, "loss": 0.6631, "step": 6654 }, { "epoch": 0.54, "grad_norm": 3.619262110704854, "learning_rate": 4.587448237280986e-06, "loss": 0.5888, "step": 6655 }, { "epoch": 0.54, "grad_norm": 2.805589150847091, "learning_rate": 4.586137382171856e-06, "loss": 0.8029, "step": 6656 }, { "epoch": 0.54, "grad_norm": 5.924584755934715, "learning_rate": 4.5848265557046226e-06, "loss": 0.682, "step": 6657 }, { "epoch": 0.54, "grad_norm": 4.150679422148071, "learning_rate": 4.583515757970007e-06, "loss": 0.769, "step": 6658 }, { "epoch": 0.54, "grad_norm": 5.530903077880697, "learning_rate": 4.5822049890587215e-06, "loss": 0.6987, "step": 6659 }, { "epoch": 0.54, "grad_norm": 7.810166958065769, "learning_rate": 4.580894249061483e-06, "loss": 0.6734, "step": 6660 }, { "epoch": 0.54, "grad_norm": 3.5024212942073794, "learning_rate": 4.5795835380690005e-06, "loss": 0.6626, "step": 6661 }, { "epoch": 0.54, "grad_norm": 28.989693084695062, "learning_rate": 4.578272856171985e-06, "loss": 0.7165, "step": 6662 }, { "epoch": 0.54, "grad_norm": 3.5035931014937227, "learning_rate": 4.576962203461144e-06, "loss": 0.6568, "step": 6663 }, { "epoch": 0.54, "grad_norm": 2.448710282148407, "learning_rate": 4.5756515800271815e-06, "loss": 0.5197, "step": 6664 }, { "epoch": 0.54, "grad_norm": 3.5224921420705817, "learning_rate": 4.574340985960801e-06, "loss": 0.7752, "step": 6665 }, { "epoch": 0.54, "grad_norm": 3.737454047681081, "learning_rate": 4.573030421352704e-06, "loss": 0.5826, "step": 6666 }, { "epoch": 0.54, "grad_norm": 4.463797251287656, "learning_rate": 4.571719886293591e-06, "loss": 0.7684, "step": 6667 }, { "epoch": 0.54, "grad_norm": 3.2941111898698856, "learning_rate": 4.570409380874159e-06, "loss": 0.6043, "step": 6668 }, { "epoch": 0.54, "grad_norm": 3.631297192076074, "learning_rate": 4.569098905185102e-06, "loss": 0.789, "step": 6669 }, { "epoch": 0.54, "grad_norm": 4.1851295769599695, "learning_rate": 4.567788459317116e-06, "loss": 0.5236, "step": 6670 }, { "epoch": 0.54, "grad_norm": 3.503351610199908, "learning_rate": 4.566478043360888e-06, "loss": 0.6525, "step": 6671 }, { "epoch": 0.54, "grad_norm": 5.5959023616599, "learning_rate": 4.565167657407109e-06, "loss": 0.6061, "step": 6672 }, { "epoch": 0.54, "grad_norm": 3.2135405578783964, "learning_rate": 4.563857301546466e-06, "loss": 0.7545, "step": 6673 }, { "epoch": 0.54, "grad_norm": 6.503361611501933, "learning_rate": 4.562546975869644e-06, "loss": 0.7783, "step": 6674 }, { "epoch": 0.54, "grad_norm": 14.932049108871887, "learning_rate": 4.561236680467326e-06, "loss": 0.5992, "step": 6675 }, { "epoch": 0.54, "grad_norm": 5.90712438256307, "learning_rate": 4.559926415430194e-06, "loss": 0.7046, "step": 6676 }, { "epoch": 0.54, "grad_norm": 2.9571032593291657, "learning_rate": 4.558616180848922e-06, "loss": 0.6089, "step": 6677 }, { "epoch": 0.54, "grad_norm": 2.597780598804849, "learning_rate": 4.557305976814193e-06, "loss": 0.6403, "step": 6678 }, { "epoch": 0.54, "grad_norm": 5.566021419323083, "learning_rate": 4.555995803416674e-06, "loss": 0.7609, "step": 6679 }, { "epoch": 0.54, "grad_norm": 3.900452754930337, "learning_rate": 4.554685660747043e-06, "loss": 0.7704, "step": 6680 }, { "epoch": 0.54, "grad_norm": 14.694609341600737, "learning_rate": 4.553375548895968e-06, "loss": 0.7091, "step": 6681 }, { "epoch": 0.54, "grad_norm": 4.514175327950168, "learning_rate": 4.552065467954117e-06, "loss": 0.6286, "step": 6682 }, { "epoch": 0.54, "grad_norm": 6.7117302134141426, "learning_rate": 4.550755418012158e-06, "loss": 0.7501, "step": 6683 }, { "epoch": 0.54, "grad_norm": 2.762116017493449, "learning_rate": 4.54944539916075e-06, "loss": 0.7016, "step": 6684 }, { "epoch": 0.54, "grad_norm": 6.605277350544211, "learning_rate": 4.5481354114905595e-06, "loss": 0.7463, "step": 6685 }, { "epoch": 0.54, "grad_norm": 3.3179601187548253, "learning_rate": 4.546825455092242e-06, "loss": 0.8276, "step": 6686 }, { "epoch": 0.54, "grad_norm": 5.7777731531464225, "learning_rate": 4.545515530056457e-06, "loss": 0.8988, "step": 6687 }, { "epoch": 0.54, "grad_norm": 3.219604761494268, "learning_rate": 4.544205636473858e-06, "loss": 0.6158, "step": 6688 }, { "epoch": 0.54, "grad_norm": 4.997110199112129, "learning_rate": 4.542895774435102e-06, "loss": 0.6994, "step": 6689 }, { "epoch": 0.54, "grad_norm": 2.5798419577911353, "learning_rate": 4.541585944030833e-06, "loss": 0.7564, "step": 6690 }, { "epoch": 0.54, "grad_norm": 3.4405341450590785, "learning_rate": 4.540276145351705e-06, "loss": 0.7078, "step": 6691 }, { "epoch": 0.54, "grad_norm": 2.3449428148115534, "learning_rate": 4.538966378488362e-06, "loss": 0.6487, "step": 6692 }, { "epoch": 0.54, "grad_norm": 5.941951161476081, "learning_rate": 4.537656643531448e-06, "loss": 0.6811, "step": 6693 }, { "epoch": 0.54, "grad_norm": 11.89940356260767, "learning_rate": 4.536346940571606e-06, "loss": 0.7666, "step": 6694 }, { "epoch": 0.54, "grad_norm": 6.595110348685419, "learning_rate": 4.535037269699474e-06, "loss": 0.8103, "step": 6695 }, { "epoch": 0.54, "grad_norm": 4.1909835862626545, "learning_rate": 4.533727631005694e-06, "loss": 0.5907, "step": 6696 }, { "epoch": 0.54, "grad_norm": 3.218433680474106, "learning_rate": 4.5324180245808945e-06, "loss": 0.7066, "step": 6697 }, { "epoch": 0.54, "grad_norm": 3.8656068642155383, "learning_rate": 4.531108450515712e-06, "loss": 0.8201, "step": 6698 }, { "epoch": 0.54, "grad_norm": 5.630828204516917, "learning_rate": 4.529798908900777e-06, "loss": 0.6089, "step": 6699 }, { "epoch": 0.54, "grad_norm": 3.505191271248207, "learning_rate": 4.52848939982672e-06, "loss": 0.6896, "step": 6700 }, { "epoch": 0.54, "grad_norm": 2.6856599770051135, "learning_rate": 4.527179923384165e-06, "loss": 0.5842, "step": 6701 }, { "epoch": 0.54, "grad_norm": 2.3588634222132203, "learning_rate": 4.5258704796637345e-06, "loss": 0.8579, "step": 6702 }, { "epoch": 0.54, "grad_norm": 3.4218484615413804, "learning_rate": 4.524561068756055e-06, "loss": 0.7978, "step": 6703 }, { "epoch": 0.54, "grad_norm": 2.809657816411793, "learning_rate": 4.523251690751741e-06, "loss": 0.6258, "step": 6704 }, { "epoch": 0.54, "grad_norm": 4.261272381694099, "learning_rate": 4.521942345741413e-06, "loss": 0.7515, "step": 6705 }, { "epoch": 0.54, "grad_norm": 30.014843001509995, "learning_rate": 4.520633033815684e-06, "loss": 0.7123, "step": 6706 }, { "epoch": 0.54, "grad_norm": 6.790969504459516, "learning_rate": 4.519323755065167e-06, "loss": 0.6856, "step": 6707 }, { "epoch": 0.54, "grad_norm": 2.878378842195477, "learning_rate": 4.518014509580474e-06, "loss": 0.6668, "step": 6708 }, { "epoch": 0.54, "grad_norm": 3.010808332578446, "learning_rate": 4.516705297452212e-06, "loss": 0.792, "step": 6709 }, { "epoch": 0.54, "grad_norm": 2.477459799882348, "learning_rate": 4.515396118770986e-06, "loss": 0.6284, "step": 6710 }, { "epoch": 0.55, "grad_norm": 2.198079295130724, "learning_rate": 4.514086973627399e-06, "loss": 0.6708, "step": 6711 }, { "epoch": 0.55, "grad_norm": 2.3893130794546984, "learning_rate": 4.512777862112053e-06, "loss": 0.7647, "step": 6712 }, { "epoch": 0.55, "grad_norm": 3.1444475335326607, "learning_rate": 4.511468784315547e-06, "loss": 0.6161, "step": 6713 }, { "epoch": 0.55, "grad_norm": 3.0208766390944493, "learning_rate": 4.5101597403284765e-06, "loss": 0.5301, "step": 6714 }, { "epoch": 0.55, "grad_norm": 3.918797861826325, "learning_rate": 4.508850730241437e-06, "loss": 0.4313, "step": 6715 }, { "epoch": 0.55, "grad_norm": 2.3654515573406623, "learning_rate": 4.5075417541450215e-06, "loss": 0.7215, "step": 6716 }, { "epoch": 0.55, "grad_norm": 3.2801522030676176, "learning_rate": 4.506232812129816e-06, "loss": 0.7729, "step": 6717 }, { "epoch": 0.55, "grad_norm": 3.12112248829566, "learning_rate": 4.504923904286409e-06, "loss": 0.7279, "step": 6718 }, { "epoch": 0.55, "grad_norm": 7.8624182972335905, "learning_rate": 4.503615030705384e-06, "loss": 0.6375, "step": 6719 }, { "epoch": 0.55, "grad_norm": 3.996929954014196, "learning_rate": 4.5023061914773244e-06, "loss": 0.6773, "step": 6720 }, { "epoch": 0.55, "grad_norm": 2.376166508516253, "learning_rate": 4.5009973866928105e-06, "loss": 0.6622, "step": 6721 }, { "epoch": 0.55, "grad_norm": 3.371857953089268, "learning_rate": 4.499688616442419e-06, "loss": 0.7857, "step": 6722 }, { "epoch": 0.55, "grad_norm": 3.721452199819446, "learning_rate": 4.498379880816728e-06, "loss": 0.677, "step": 6723 }, { "epoch": 0.55, "grad_norm": 3.669239784434637, "learning_rate": 4.497071179906305e-06, "loss": 0.7255, "step": 6724 }, { "epoch": 0.55, "grad_norm": 8.878511392781272, "learning_rate": 4.495762513801724e-06, "loss": 0.8223, "step": 6725 }, { "epoch": 0.55, "grad_norm": 2.7643459251663702, "learning_rate": 4.494453882593552e-06, "loss": 0.762, "step": 6726 }, { "epoch": 0.55, "grad_norm": 3.546761834411261, "learning_rate": 4.4931452863723535e-06, "loss": 0.6948, "step": 6727 }, { "epoch": 0.55, "grad_norm": 2.219742152088609, "learning_rate": 4.491836725228693e-06, "loss": 0.6289, "step": 6728 }, { "epoch": 0.55, "grad_norm": 3.222239452993486, "learning_rate": 4.490528199253133e-06, "loss": 0.678, "step": 6729 }, { "epoch": 0.55, "grad_norm": 4.039863666240687, "learning_rate": 4.489219708536228e-06, "loss": 0.7015, "step": 6730 }, { "epoch": 0.55, "grad_norm": 4.002033534843258, "learning_rate": 4.487911253168534e-06, "loss": 0.7997, "step": 6731 }, { "epoch": 0.55, "grad_norm": 2.7610655263986428, "learning_rate": 4.4866028332406064e-06, "loss": 0.7937, "step": 6732 }, { "epoch": 0.55, "grad_norm": 3.9935545920730995, "learning_rate": 4.485294448842996e-06, "loss": 0.6704, "step": 6733 }, { "epoch": 0.55, "grad_norm": 8.24533419406517, "learning_rate": 4.4839861000662496e-06, "loss": 0.8214, "step": 6734 }, { "epoch": 0.55, "grad_norm": 4.667802527243373, "learning_rate": 4.482677787000915e-06, "loss": 0.6558, "step": 6735 }, { "epoch": 0.55, "grad_norm": 2.916224639300997, "learning_rate": 4.4813695097375355e-06, "loss": 0.6094, "step": 6736 }, { "epoch": 0.55, "grad_norm": 4.215978092389523, "learning_rate": 4.48006126836665e-06, "loss": 0.7036, "step": 6737 }, { "epoch": 0.55, "grad_norm": 2.726765288253642, "learning_rate": 4.4787530629787995e-06, "loss": 0.6991, "step": 6738 }, { "epoch": 0.55, "grad_norm": 4.465314361569634, "learning_rate": 4.477444893664518e-06, "loss": 0.6463, "step": 6739 }, { "epoch": 0.55, "grad_norm": 5.090999309979331, "learning_rate": 4.476136760514341e-06, "loss": 0.7844, "step": 6740 }, { "epoch": 0.55, "grad_norm": 4.318269304136731, "learning_rate": 4.4748286636187985e-06, "loss": 0.6391, "step": 6741 }, { "epoch": 0.55, "grad_norm": 2.818514401302119, "learning_rate": 4.473520603068421e-06, "loss": 0.6322, "step": 6742 }, { "epoch": 0.55, "grad_norm": 3.4071907620949085, "learning_rate": 4.472212578953731e-06, "loss": 0.6967, "step": 6743 }, { "epoch": 0.55, "grad_norm": 2.654691208151812, "learning_rate": 4.470904591365253e-06, "loss": 0.6374, "step": 6744 }, { "epoch": 0.55, "grad_norm": 2.218558949338727, "learning_rate": 4.4695966403935095e-06, "loss": 0.6717, "step": 6745 }, { "epoch": 0.55, "grad_norm": 2.8815332717207465, "learning_rate": 4.468288726129018e-06, "loss": 0.7074, "step": 6746 }, { "epoch": 0.55, "grad_norm": 2.6668805621423624, "learning_rate": 4.466980848662295e-06, "loss": 0.8332, "step": 6747 }, { "epoch": 0.55, "grad_norm": 2.63204004296017, "learning_rate": 4.4656730080838535e-06, "loss": 0.6099, "step": 6748 }, { "epoch": 0.55, "grad_norm": 2.4748550745061726, "learning_rate": 4.464365204484204e-06, "loss": 0.6179, "step": 6749 }, { "epoch": 0.55, "grad_norm": 4.236652586850476, "learning_rate": 4.463057437953855e-06, "loss": 0.7659, "step": 6750 }, { "epoch": 0.55, "grad_norm": 5.246496087227289, "learning_rate": 4.461749708583313e-06, "loss": 0.597, "step": 6751 }, { "epoch": 0.55, "grad_norm": 3.1869054571016733, "learning_rate": 4.460442016463079e-06, "loss": 0.8225, "step": 6752 }, { "epoch": 0.55, "grad_norm": 2.359610854596599, "learning_rate": 4.4591343616836545e-06, "loss": 0.4747, "step": 6753 }, { "epoch": 0.55, "grad_norm": 3.2641550269475337, "learning_rate": 4.457826744335538e-06, "loss": 0.5478, "step": 6754 }, { "epoch": 0.55, "grad_norm": 13.751901471810733, "learning_rate": 4.4565191645092244e-06, "loss": 0.7826, "step": 6755 }, { "epoch": 0.55, "grad_norm": 4.139938756693409, "learning_rate": 4.45521162229521e-06, "loss": 0.5977, "step": 6756 }, { "epoch": 0.55, "grad_norm": 4.2495862922110375, "learning_rate": 4.453904117783978e-06, "loss": 0.6785, "step": 6757 }, { "epoch": 0.55, "grad_norm": 3.0688870582805134, "learning_rate": 4.452596651066021e-06, "loss": 0.6836, "step": 6758 }, { "epoch": 0.55, "grad_norm": 3.222344557145634, "learning_rate": 4.451289222231821e-06, "loss": 0.7173, "step": 6759 }, { "epoch": 0.55, "grad_norm": 2.933298701496621, "learning_rate": 4.449981831371863e-06, "loss": 0.5088, "step": 6760 }, { "epoch": 0.55, "grad_norm": 3.6852505474558965, "learning_rate": 4.448674478576625e-06, "loss": 0.9275, "step": 6761 }, { "epoch": 0.55, "grad_norm": 2.6298660844132016, "learning_rate": 4.447367163936586e-06, "loss": 0.5508, "step": 6762 }, { "epoch": 0.55, "grad_norm": 5.471763681682436, "learning_rate": 4.4460598875422175e-06, "loss": 0.5929, "step": 6763 }, { "epoch": 0.55, "grad_norm": 4.119746550532442, "learning_rate": 4.444752649483993e-06, "loss": 0.6637, "step": 6764 }, { "epoch": 0.55, "grad_norm": 6.554778771125893, "learning_rate": 4.44344544985238e-06, "loss": 0.7469, "step": 6765 }, { "epoch": 0.55, "grad_norm": 3.5784563875203768, "learning_rate": 4.442138288737848e-06, "loss": 0.7068, "step": 6766 }, { "epoch": 0.55, "grad_norm": 4.446059255533405, "learning_rate": 4.440831166230858e-06, "loss": 0.7519, "step": 6767 }, { "epoch": 0.55, "grad_norm": 2.3413452690024545, "learning_rate": 4.439524082421872e-06, "loss": 0.5098, "step": 6768 }, { "epoch": 0.55, "grad_norm": 4.268941714671915, "learning_rate": 4.438217037401351e-06, "loss": 0.6509, "step": 6769 }, { "epoch": 0.55, "grad_norm": 3.2972197728439703, "learning_rate": 4.4369100312597455e-06, "loss": 0.6241, "step": 6770 }, { "epoch": 0.55, "grad_norm": 2.823880734997757, "learning_rate": 4.435603064087512e-06, "loss": 0.6973, "step": 6771 }, { "epoch": 0.55, "grad_norm": 2.4285030669083536, "learning_rate": 4.434296135975099e-06, "loss": 0.6431, "step": 6772 }, { "epoch": 0.55, "grad_norm": 2.344138302798512, "learning_rate": 4.432989247012958e-06, "loss": 0.7165, "step": 6773 }, { "epoch": 0.55, "grad_norm": 3.318739936395028, "learning_rate": 4.43168239729153e-06, "loss": 0.72, "step": 6774 }, { "epoch": 0.55, "grad_norm": 14.229288111268472, "learning_rate": 4.430375586901258e-06, "loss": 0.6236, "step": 6775 }, { "epoch": 0.55, "grad_norm": 8.376423903309352, "learning_rate": 4.429068815932585e-06, "loss": 0.8278, "step": 6776 }, { "epoch": 0.55, "grad_norm": 3.231230230888898, "learning_rate": 4.427762084475941e-06, "loss": 0.7404, "step": 6777 }, { "epoch": 0.55, "grad_norm": 6.200312764738706, "learning_rate": 4.426455392621765e-06, "loss": 0.7774, "step": 6778 }, { "epoch": 0.55, "grad_norm": 3.287926989827072, "learning_rate": 4.425148740460487e-06, "loss": 0.5863, "step": 6779 }, { "epoch": 0.55, "grad_norm": 2.486685751121668, "learning_rate": 4.423842128082535e-06, "loss": 0.6078, "step": 6780 }, { "epoch": 0.55, "grad_norm": 2.6591907274918536, "learning_rate": 4.422535555578338e-06, "loss": 0.7295, "step": 6781 }, { "epoch": 0.55, "grad_norm": 4.429070825054204, "learning_rate": 4.421229023038316e-06, "loss": 0.8475, "step": 6782 }, { "epoch": 0.55, "grad_norm": 4.807842088398467, "learning_rate": 4.41992253055289e-06, "loss": 0.6807, "step": 6783 }, { "epoch": 0.55, "grad_norm": 4.305960130883989, "learning_rate": 4.418616078212475e-06, "loss": 0.6459, "step": 6784 }, { "epoch": 0.55, "grad_norm": 2.8671408574431805, "learning_rate": 4.4173096661074895e-06, "loss": 0.5426, "step": 6785 }, { "epoch": 0.55, "grad_norm": 4.655487629784463, "learning_rate": 4.416003294328344e-06, "loss": 0.6308, "step": 6786 }, { "epoch": 0.55, "grad_norm": 3.1802288012490054, "learning_rate": 4.414696962965447e-06, "loss": 0.6717, "step": 6787 }, { "epoch": 0.55, "grad_norm": 3.856534003553808, "learning_rate": 4.413390672109207e-06, "loss": 0.7129, "step": 6788 }, { "epoch": 0.55, "grad_norm": 3.996665243099306, "learning_rate": 4.412084421850026e-06, "loss": 0.6742, "step": 6789 }, { "epoch": 0.55, "grad_norm": 3.2734912986533846, "learning_rate": 4.410778212278304e-06, "loss": 0.7696, "step": 6790 }, { "epoch": 0.55, "grad_norm": 5.430574509110885, "learning_rate": 4.40947204348444e-06, "loss": 0.7004, "step": 6791 }, { "epoch": 0.55, "grad_norm": 4.944299302840354, "learning_rate": 4.408165915558829e-06, "loss": 0.565, "step": 6792 }, { "epoch": 0.55, "grad_norm": 3.962879396044792, "learning_rate": 4.406859828591862e-06, "loss": 0.5595, "step": 6793 }, { "epoch": 0.55, "grad_norm": 2.3547902145815116, "learning_rate": 4.40555378267393e-06, "loss": 0.7508, "step": 6794 }, { "epoch": 0.55, "grad_norm": 2.284092742876538, "learning_rate": 4.4042477778954215e-06, "loss": 0.6907, "step": 6795 }, { "epoch": 0.55, "grad_norm": 3.0642794767442205, "learning_rate": 4.402941814346716e-06, "loss": 0.6299, "step": 6796 }, { "epoch": 0.55, "grad_norm": 3.6977575658930975, "learning_rate": 4.401635892118196e-06, "loss": 0.6803, "step": 6797 }, { "epoch": 0.55, "grad_norm": 3.6853232169300756, "learning_rate": 4.400330011300242e-06, "loss": 0.623, "step": 6798 }, { "epoch": 0.55, "grad_norm": 3.3064321738398985, "learning_rate": 4.399024171983224e-06, "loss": 0.7509, "step": 6799 }, { "epoch": 0.55, "grad_norm": 2.5652442919321037, "learning_rate": 4.3977183742575186e-06, "loss": 0.7283, "step": 6800 }, { "epoch": 0.55, "grad_norm": 2.903623141070364, "learning_rate": 4.396412618213494e-06, "loss": 0.6517, "step": 6801 }, { "epoch": 0.55, "grad_norm": 4.2175841326536965, "learning_rate": 4.3951069039415184e-06, "loss": 0.7427, "step": 6802 }, { "epoch": 0.55, "grad_norm": 3.9506541951063623, "learning_rate": 4.393801231531952e-06, "loss": 0.6943, "step": 6803 }, { "epoch": 0.55, "grad_norm": 3.842806540883433, "learning_rate": 4.392495601075157e-06, "loss": 0.6348, "step": 6804 }, { "epoch": 0.55, "grad_norm": 3.7986950893324125, "learning_rate": 4.391190012661491e-06, "loss": 0.6203, "step": 6805 }, { "epoch": 0.55, "grad_norm": 4.168668997823342, "learning_rate": 4.389884466381312e-06, "loss": 0.7809, "step": 6806 }, { "epoch": 0.55, "grad_norm": 3.4433229113020487, "learning_rate": 4.388578962324967e-06, "loss": 0.7327, "step": 6807 }, { "epoch": 0.55, "grad_norm": 2.7667754655885135, "learning_rate": 4.387273500582809e-06, "loss": 0.5899, "step": 6808 }, { "epoch": 0.55, "grad_norm": 3.7644107720700184, "learning_rate": 4.3859680812451844e-06, "loss": 0.6426, "step": 6809 }, { "epoch": 0.55, "grad_norm": 2.9805139009576105, "learning_rate": 4.384662704402433e-06, "loss": 0.7077, "step": 6810 }, { "epoch": 0.55, "grad_norm": 3.5754318770227105, "learning_rate": 4.383357370144896e-06, "loss": 0.5922, "step": 6811 }, { "epoch": 0.55, "grad_norm": 3.963388652937568, "learning_rate": 4.382052078562913e-06, "loss": 0.6327, "step": 6812 }, { "epoch": 0.55, "grad_norm": 2.3978615046933807, "learning_rate": 4.380746829746817e-06, "loss": 0.6471, "step": 6813 }, { "epoch": 0.55, "grad_norm": 3.1407812042839693, "learning_rate": 4.379441623786938e-06, "loss": 0.8238, "step": 6814 }, { "epoch": 0.55, "grad_norm": 5.0606686031518935, "learning_rate": 4.378136460773609e-06, "loss": 0.6591, "step": 6815 }, { "epoch": 0.55, "grad_norm": 2.8574753075181225, "learning_rate": 4.376831340797151e-06, "loss": 0.8108, "step": 6816 }, { "epoch": 0.55, "grad_norm": 3.342133698190901, "learning_rate": 4.375526263947887e-06, "loss": 0.5185, "step": 6817 }, { "epoch": 0.55, "grad_norm": 24.21977990190991, "learning_rate": 4.374221230316138e-06, "loss": 0.5867, "step": 6818 }, { "epoch": 0.55, "grad_norm": 3.097243863937538, "learning_rate": 4.37291623999222e-06, "loss": 0.7158, "step": 6819 }, { "epoch": 0.55, "grad_norm": 2.3121114690182742, "learning_rate": 4.371611293066446e-06, "loss": 0.6579, "step": 6820 }, { "epoch": 0.55, "grad_norm": 3.283416195224011, "learning_rate": 4.37030638962913e-06, "loss": 0.6807, "step": 6821 }, { "epoch": 0.55, "grad_norm": 2.874680888419324, "learning_rate": 4.3690015297705755e-06, "loss": 0.7401, "step": 6822 }, { "epoch": 0.55, "grad_norm": 3.4228578878989118, "learning_rate": 4.367696713581088e-06, "loss": 0.5715, "step": 6823 }, { "epoch": 0.55, "grad_norm": 9.094807985216361, "learning_rate": 4.366391941150969e-06, "loss": 0.8104, "step": 6824 }, { "epoch": 0.55, "grad_norm": 4.815336355126061, "learning_rate": 4.365087212570516e-06, "loss": 0.7584, "step": 6825 }, { "epoch": 0.55, "grad_norm": 4.097249894248789, "learning_rate": 4.363782527930026e-06, "loss": 0.6317, "step": 6826 }, { "epoch": 0.55, "grad_norm": 3.2542578602686554, "learning_rate": 4.362477887319792e-06, "loss": 0.7114, "step": 6827 }, { "epoch": 0.55, "grad_norm": 4.576346748074083, "learning_rate": 4.361173290830102e-06, "loss": 0.7068, "step": 6828 }, { "epoch": 0.55, "grad_norm": 4.4001467366703455, "learning_rate": 4.359868738551244e-06, "loss": 0.5701, "step": 6829 }, { "epoch": 0.55, "grad_norm": 2.888778309484578, "learning_rate": 4.358564230573498e-06, "loss": 0.6928, "step": 6830 }, { "epoch": 0.55, "grad_norm": 3.3897288448091265, "learning_rate": 4.357259766987147e-06, "loss": 0.612, "step": 6831 }, { "epoch": 0.55, "grad_norm": 5.3837512393981095, "learning_rate": 4.355955347882467e-06, "loss": 0.7801, "step": 6832 }, { "epoch": 0.55, "grad_norm": 3.660006228650256, "learning_rate": 4.354650973349732e-06, "loss": 0.7005, "step": 6833 }, { "epoch": 0.56, "grad_norm": 4.5326247096127705, "learning_rate": 4.3533466434792125e-06, "loss": 0.7045, "step": 6834 }, { "epoch": 0.56, "grad_norm": 2.504208698339557, "learning_rate": 4.35204235836118e-06, "loss": 0.6674, "step": 6835 }, { "epoch": 0.56, "grad_norm": 3.2000150725821825, "learning_rate": 4.350738118085893e-06, "loss": 0.575, "step": 6836 }, { "epoch": 0.56, "grad_norm": 7.019690815326524, "learning_rate": 4.349433922743616e-06, "loss": 0.603, "step": 6837 }, { "epoch": 0.56, "grad_norm": 2.855861328453311, "learning_rate": 4.34812977242461e-06, "loss": 0.6177, "step": 6838 }, { "epoch": 0.56, "grad_norm": 4.683856007469121, "learning_rate": 4.346825667219127e-06, "loss": 0.6589, "step": 6839 }, { "epoch": 0.56, "grad_norm": 3.129192450229475, "learning_rate": 4.34552160721742e-06, "loss": 0.7202, "step": 6840 }, { "epoch": 0.56, "grad_norm": 5.474050679960641, "learning_rate": 4.3442175925097395e-06, "loss": 0.7405, "step": 6841 }, { "epoch": 0.56, "grad_norm": 3.331377930224471, "learning_rate": 4.342913623186332e-06, "loss": 0.7208, "step": 6842 }, { "epoch": 0.56, "grad_norm": 21.168127492269672, "learning_rate": 4.341609699337438e-06, "loss": 0.7653, "step": 6843 }, { "epoch": 0.56, "grad_norm": 2.9519802143215133, "learning_rate": 4.3403058210532975e-06, "loss": 0.6766, "step": 6844 }, { "epoch": 0.56, "grad_norm": 4.283554167974886, "learning_rate": 4.339001988424148e-06, "loss": 0.5934, "step": 6845 }, { "epoch": 0.56, "grad_norm": 2.6798154042577123, "learning_rate": 4.337698201540225e-06, "loss": 0.6729, "step": 6846 }, { "epoch": 0.56, "grad_norm": 7.507049437169468, "learning_rate": 4.336394460491754e-06, "loss": 0.7491, "step": 6847 }, { "epoch": 0.56, "grad_norm": 2.7205816971242416, "learning_rate": 4.335090765368968e-06, "loss": 0.7147, "step": 6848 }, { "epoch": 0.56, "grad_norm": 5.276156317210714, "learning_rate": 4.333787116262085e-06, "loss": 0.822, "step": 6849 }, { "epoch": 0.56, "grad_norm": 3.792361014771035, "learning_rate": 4.3324835132613285e-06, "loss": 0.7172, "step": 6850 }, { "epoch": 0.56, "grad_norm": 3.058327451345355, "learning_rate": 4.3311799564569165e-06, "loss": 0.7133, "step": 6851 }, { "epoch": 0.56, "grad_norm": 4.642867198276733, "learning_rate": 4.329876445939062e-06, "loss": 0.5627, "step": 6852 }, { "epoch": 0.56, "grad_norm": 3.383166451271083, "learning_rate": 4.3285729817979775e-06, "loss": 0.7741, "step": 6853 }, { "epoch": 0.56, "grad_norm": 2.7400101143718167, "learning_rate": 4.32726956412387e-06, "loss": 0.7178, "step": 6854 }, { "epoch": 0.56, "grad_norm": 2.4756911441636227, "learning_rate": 4.325966193006946e-06, "loss": 0.7413, "step": 6855 }, { "epoch": 0.56, "grad_norm": 3.194191717608314, "learning_rate": 4.324662868537405e-06, "loss": 0.6077, "step": 6856 }, { "epoch": 0.56, "grad_norm": 3.6015080786733877, "learning_rate": 4.323359590805445e-06, "loss": 0.6798, "step": 6857 }, { "epoch": 0.56, "grad_norm": 2.439863206470225, "learning_rate": 4.322056359901262e-06, "loss": 0.7068, "step": 6858 }, { "epoch": 0.56, "grad_norm": 2.5042078080037626, "learning_rate": 4.320753175915047e-06, "loss": 0.6189, "step": 6859 }, { "epoch": 0.56, "grad_norm": 2.9228169993683997, "learning_rate": 4.319450038936989e-06, "loss": 0.6765, "step": 6860 }, { "epoch": 0.56, "grad_norm": 2.810683684170626, "learning_rate": 4.318146949057275e-06, "loss": 0.7373, "step": 6861 }, { "epoch": 0.56, "grad_norm": 3.5029878062490876, "learning_rate": 4.316843906366085e-06, "loss": 0.678, "step": 6862 }, { "epoch": 0.56, "grad_norm": 3.2255358236980713, "learning_rate": 4.315540910953598e-06, "loss": 0.747, "step": 6863 }, { "epoch": 0.56, "grad_norm": 4.5528421390757785, "learning_rate": 4.314237962909989e-06, "loss": 0.6096, "step": 6864 }, { "epoch": 0.56, "grad_norm": 3.8601774619259297, "learning_rate": 4.312935062325431e-06, "loss": 0.7473, "step": 6865 }, { "epoch": 0.56, "grad_norm": 3.4966415893751863, "learning_rate": 4.3116322092900925e-06, "loss": 0.6065, "step": 6866 }, { "epoch": 0.56, "grad_norm": 11.24879479757515, "learning_rate": 4.31032940389414e-06, "loss": 0.5538, "step": 6867 }, { "epoch": 0.56, "grad_norm": 3.658742767608833, "learning_rate": 4.309026646227737e-06, "loss": 0.5204, "step": 6868 }, { "epoch": 0.56, "grad_norm": 2.692263620159278, "learning_rate": 4.307723936381038e-06, "loss": 0.6503, "step": 6869 }, { "epoch": 0.56, "grad_norm": 3.706723566596683, "learning_rate": 4.3064212744442026e-06, "loss": 0.5831, "step": 6870 }, { "epoch": 0.56, "grad_norm": 2.402681804292963, "learning_rate": 4.305118660507382e-06, "loss": 0.5789, "step": 6871 }, { "epoch": 0.56, "grad_norm": 3.2699878469512558, "learning_rate": 4.303816094660726e-06, "loss": 0.731, "step": 6872 }, { "epoch": 0.56, "grad_norm": 3.6528611211965876, "learning_rate": 4.3025135769943786e-06, "loss": 0.6031, "step": 6873 }, { "epoch": 0.56, "grad_norm": 2.295379776572049, "learning_rate": 4.301211107598484e-06, "loss": 0.6954, "step": 6874 }, { "epoch": 0.56, "grad_norm": 3.2718876613953887, "learning_rate": 4.2999086865631825e-06, "loss": 0.6587, "step": 6875 }, { "epoch": 0.56, "grad_norm": 2.477917529947379, "learning_rate": 4.298606313978605e-06, "loss": 0.7516, "step": 6876 }, { "epoch": 0.56, "grad_norm": 2.9652452416912474, "learning_rate": 4.297303989934888e-06, "loss": 0.6927, "step": 6877 }, { "epoch": 0.56, "grad_norm": 3.5725438439547745, "learning_rate": 4.29600171452216e-06, "loss": 0.6893, "step": 6878 }, { "epoch": 0.56, "grad_norm": 3.2835239613039704, "learning_rate": 4.294699487830546e-06, "loss": 0.7377, "step": 6879 }, { "epoch": 0.56, "grad_norm": 6.893217186711873, "learning_rate": 4.293397309950168e-06, "loss": 0.6117, "step": 6880 }, { "epoch": 0.56, "grad_norm": 2.1266753939261904, "learning_rate": 4.292095180971145e-06, "loss": 0.6442, "step": 6881 }, { "epoch": 0.56, "grad_norm": 4.169125447443993, "learning_rate": 4.2907931009835954e-06, "loss": 0.8305, "step": 6882 }, { "epoch": 0.56, "grad_norm": 6.135002503289643, "learning_rate": 4.289491070077626e-06, "loss": 0.4442, "step": 6883 }, { "epoch": 0.56, "grad_norm": 6.757603041277577, "learning_rate": 4.288189088343348e-06, "loss": 0.7315, "step": 6884 }, { "epoch": 0.56, "grad_norm": 4.797176624238879, "learning_rate": 4.286887155870868e-06, "loss": 0.8499, "step": 6885 }, { "epoch": 0.56, "grad_norm": 2.8593098324895605, "learning_rate": 4.285585272750287e-06, "loss": 0.6901, "step": 6886 }, { "epoch": 0.56, "grad_norm": 3.869462001341872, "learning_rate": 4.284283439071703e-06, "loss": 0.6972, "step": 6887 }, { "epoch": 0.56, "grad_norm": 3.6847339731251108, "learning_rate": 4.282981654925214e-06, "loss": 0.7295, "step": 6888 }, { "epoch": 0.56, "grad_norm": 4.949437261496342, "learning_rate": 4.281679920400907e-06, "loss": 0.5941, "step": 6889 }, { "epoch": 0.56, "grad_norm": 4.978802555034849, "learning_rate": 4.280378235588872e-06, "loss": 0.7034, "step": 6890 }, { "epoch": 0.56, "grad_norm": 4.376380418818413, "learning_rate": 4.279076600579194e-06, "loss": 0.6988, "step": 6891 }, { "epoch": 0.56, "grad_norm": 3.4259645241469916, "learning_rate": 4.277775015461955e-06, "loss": 0.8049, "step": 6892 }, { "epoch": 0.56, "grad_norm": 3.2911235879344076, "learning_rate": 4.2764734803272325e-06, "loss": 0.6656, "step": 6893 }, { "epoch": 0.56, "grad_norm": 3.127810935896433, "learning_rate": 4.275171995265101e-06, "loss": 0.7018, "step": 6894 }, { "epoch": 0.56, "grad_norm": 2.831031382255254, "learning_rate": 4.2738705603656326e-06, "loss": 0.7153, "step": 6895 }, { "epoch": 0.56, "grad_norm": 3.0253779529259353, "learning_rate": 4.272569175718893e-06, "loss": 0.731, "step": 6896 }, { "epoch": 0.56, "grad_norm": 3.7216029049773622, "learning_rate": 4.271267841414945e-06, "loss": 0.558, "step": 6897 }, { "epoch": 0.56, "grad_norm": 2.5302661351268227, "learning_rate": 4.269966557543852e-06, "loss": 0.7019, "step": 6898 }, { "epoch": 0.56, "grad_norm": 3.536498346111739, "learning_rate": 4.26866532419567e-06, "loss": 0.5796, "step": 6899 }, { "epoch": 0.56, "grad_norm": 3.457232323957409, "learning_rate": 4.267364141460452e-06, "loss": 0.6769, "step": 6900 }, { "epoch": 0.56, "grad_norm": 2.54255783361201, "learning_rate": 4.266063009428249e-06, "loss": 0.693, "step": 6901 }, { "epoch": 0.56, "grad_norm": 6.85275688174486, "learning_rate": 4.264761928189107e-06, "loss": 0.6411, "step": 6902 }, { "epoch": 0.56, "grad_norm": 3.467817956697217, "learning_rate": 4.263460897833069e-06, "loss": 0.6038, "step": 6903 }, { "epoch": 0.56, "grad_norm": 2.624433923144948, "learning_rate": 4.2621599184501736e-06, "loss": 0.6174, "step": 6904 }, { "epoch": 0.56, "grad_norm": 3.585874732383417, "learning_rate": 4.260858990130459e-06, "loss": 0.785, "step": 6905 }, { "epoch": 0.56, "grad_norm": 3.608098982953486, "learning_rate": 4.259558112963954e-06, "loss": 0.755, "step": 6906 }, { "epoch": 0.56, "grad_norm": 5.541333386927576, "learning_rate": 4.258257287040692e-06, "loss": 0.7412, "step": 6907 }, { "epoch": 0.56, "grad_norm": 2.5274300499259263, "learning_rate": 4.256956512450697e-06, "loss": 0.5357, "step": 6908 }, { "epoch": 0.56, "grad_norm": 3.6208632605691893, "learning_rate": 4.2556557892839875e-06, "loss": 0.6301, "step": 6909 }, { "epoch": 0.56, "grad_norm": 3.9467515991788997, "learning_rate": 4.254355117630585e-06, "loss": 0.6172, "step": 6910 }, { "epoch": 0.56, "grad_norm": 3.0969169612864027, "learning_rate": 4.2530544975805034e-06, "loss": 0.6708, "step": 6911 }, { "epoch": 0.56, "grad_norm": 5.043956915185662, "learning_rate": 4.251753929223754e-06, "loss": 0.7537, "step": 6912 }, { "epoch": 0.56, "grad_norm": 3.5173455429655096, "learning_rate": 4.250453412650343e-06, "loss": 0.7341, "step": 6913 }, { "epoch": 0.56, "grad_norm": 3.085061272051881, "learning_rate": 4.249152947950276e-06, "loss": 0.6552, "step": 6914 }, { "epoch": 0.56, "grad_norm": 2.6985279728188476, "learning_rate": 4.247852535213554e-06, "loss": 0.5843, "step": 6915 }, { "epoch": 0.56, "grad_norm": 3.3724583306023295, "learning_rate": 4.246552174530171e-06, "loss": 0.6629, "step": 6916 }, { "epoch": 0.56, "grad_norm": 4.98527057985444, "learning_rate": 4.245251865990122e-06, "loss": 0.6779, "step": 6917 }, { "epoch": 0.56, "grad_norm": 2.7148417949240664, "learning_rate": 4.243951609683395e-06, "loss": 0.7512, "step": 6918 }, { "epoch": 0.56, "grad_norm": 2.749127735519029, "learning_rate": 4.242651405699979e-06, "loss": 0.6893, "step": 6919 }, { "epoch": 0.56, "grad_norm": 3.2642926709931808, "learning_rate": 4.241351254129854e-06, "loss": 0.6173, "step": 6920 }, { "epoch": 0.56, "grad_norm": 5.090029937141236, "learning_rate": 4.240051155063e-06, "loss": 0.7109, "step": 6921 }, { "epoch": 0.56, "grad_norm": 2.768285255760678, "learning_rate": 4.238751108589389e-06, "loss": 0.6719, "step": 6922 }, { "epoch": 0.56, "grad_norm": 2.8344256712184275, "learning_rate": 4.237451114798995e-06, "loss": 0.7396, "step": 6923 }, { "epoch": 0.56, "grad_norm": 10.72485084269237, "learning_rate": 4.236151173781785e-06, "loss": 0.6967, "step": 6924 }, { "epoch": 0.56, "grad_norm": 2.5303867448420116, "learning_rate": 4.2348512856277235e-06, "loss": 0.6694, "step": 6925 }, { "epoch": 0.56, "grad_norm": 3.6706371518829792, "learning_rate": 4.233551450426772e-06, "loss": 0.8611, "step": 6926 }, { "epoch": 0.56, "grad_norm": 2.9105442807704764, "learning_rate": 4.232251668268884e-06, "loss": 0.6477, "step": 6927 }, { "epoch": 0.56, "grad_norm": 2.947705037421373, "learning_rate": 4.2309519392440175e-06, "loss": 0.6659, "step": 6928 }, { "epoch": 0.56, "grad_norm": 6.402133450303937, "learning_rate": 4.229652263442119e-06, "loss": 0.5482, "step": 6929 }, { "epoch": 0.56, "grad_norm": 5.661679875807692, "learning_rate": 4.228352640953132e-06, "loss": 0.6949, "step": 6930 }, { "epoch": 0.56, "grad_norm": 3.6436215324016223, "learning_rate": 4.227053071867001e-06, "loss": 0.5583, "step": 6931 }, { "epoch": 0.56, "grad_norm": 3.044194250327129, "learning_rate": 4.225753556273665e-06, "loss": 0.6226, "step": 6932 }, { "epoch": 0.56, "grad_norm": 3.5078450917745547, "learning_rate": 4.224454094263058e-06, "loss": 0.7228, "step": 6933 }, { "epoch": 0.56, "grad_norm": 3.2739072696751292, "learning_rate": 4.223154685925112e-06, "loss": 0.5581, "step": 6934 }, { "epoch": 0.56, "grad_norm": 3.990188184137978, "learning_rate": 4.221855331349753e-06, "loss": 0.6212, "step": 6935 }, { "epoch": 0.56, "grad_norm": 2.1403026590456626, "learning_rate": 4.220556030626904e-06, "loss": 0.6222, "step": 6936 }, { "epoch": 0.56, "grad_norm": 2.412876906849242, "learning_rate": 4.219256783846486e-06, "loss": 0.8232, "step": 6937 }, { "epoch": 0.56, "grad_norm": 3.6908419423726913, "learning_rate": 4.217957591098413e-06, "loss": 0.6956, "step": 6938 }, { "epoch": 0.56, "grad_norm": 2.6590647870066806, "learning_rate": 4.216658452472599e-06, "loss": 0.7058, "step": 6939 }, { "epoch": 0.56, "grad_norm": 3.647769449392533, "learning_rate": 4.215359368058953e-06, "loss": 0.6135, "step": 6940 }, { "epoch": 0.56, "grad_norm": 2.9181381986913277, "learning_rate": 4.214060337947381e-06, "loss": 0.7652, "step": 6941 }, { "epoch": 0.56, "grad_norm": 2.8977150255200117, "learning_rate": 4.21276136222778e-06, "loss": 0.7265, "step": 6942 }, { "epoch": 0.56, "grad_norm": 2.7067296603603976, "learning_rate": 4.21146244099005e-06, "loss": 0.7343, "step": 6943 }, { "epoch": 0.56, "grad_norm": 4.068896090182501, "learning_rate": 4.210163574324085e-06, "loss": 0.7433, "step": 6944 }, { "epoch": 0.56, "grad_norm": 4.445737546903657, "learning_rate": 4.208864762319773e-06, "loss": 0.6623, "step": 6945 }, { "epoch": 0.56, "grad_norm": 2.1674324217444743, "learning_rate": 4.207566005067001e-06, "loss": 0.6069, "step": 6946 }, { "epoch": 0.56, "grad_norm": 3.066015263098254, "learning_rate": 4.206267302655651e-06, "loss": 0.6724, "step": 6947 }, { "epoch": 0.56, "grad_norm": 4.278582462049776, "learning_rate": 4.204968655175603e-06, "loss": 0.6913, "step": 6948 }, { "epoch": 0.56, "grad_norm": 2.616174743636218, "learning_rate": 4.203670062716728e-06, "loss": 0.7945, "step": 6949 }, { "epoch": 0.56, "grad_norm": 4.157050326997998, "learning_rate": 4.202371525368899e-06, "loss": 0.7758, "step": 6950 }, { "epoch": 0.56, "grad_norm": 3.396124135036793, "learning_rate": 4.2010730432219845e-06, "loss": 0.7564, "step": 6951 }, { "epoch": 0.56, "grad_norm": 6.230260829983935, "learning_rate": 4.199774616365844e-06, "loss": 0.6655, "step": 6952 }, { "epoch": 0.56, "grad_norm": 3.3868972231220447, "learning_rate": 4.198476244890338e-06, "loss": 0.6684, "step": 6953 }, { "epoch": 0.56, "grad_norm": 2.602355793928937, "learning_rate": 4.197177928885324e-06, "loss": 0.8673, "step": 6954 }, { "epoch": 0.56, "grad_norm": 6.951792856718155, "learning_rate": 4.195879668440654e-06, "loss": 0.6314, "step": 6955 }, { "epoch": 0.56, "grad_norm": 4.2578614488574695, "learning_rate": 4.194581463646172e-06, "loss": 0.6605, "step": 6956 }, { "epoch": 0.57, "grad_norm": 5.698667075326142, "learning_rate": 4.193283314591723e-06, "loss": 0.7334, "step": 6957 }, { "epoch": 0.57, "grad_norm": 4.793750457034329, "learning_rate": 4.191985221367149e-06, "loss": 0.6277, "step": 6958 }, { "epoch": 0.57, "grad_norm": 2.998544672944845, "learning_rate": 4.190687184062286e-06, "loss": 0.6871, "step": 6959 }, { "epoch": 0.57, "grad_norm": 2.9266727867606193, "learning_rate": 4.189389202766966e-06, "loss": 0.489, "step": 6960 }, { "epoch": 0.57, "grad_norm": 2.426347982121342, "learning_rate": 4.188091277571018e-06, "loss": 0.9593, "step": 6961 }, { "epoch": 0.57, "grad_norm": 2.4510324698773287, "learning_rate": 4.186793408564264e-06, "loss": 0.6993, "step": 6962 }, { "epoch": 0.57, "grad_norm": 2.7807890692245825, "learning_rate": 4.1854955958365266e-06, "loss": 0.6088, "step": 6963 }, { "epoch": 0.57, "grad_norm": 3.750802336792746, "learning_rate": 4.184197839477622e-06, "loss": 0.6171, "step": 6964 }, { "epoch": 0.57, "grad_norm": 7.762623844184858, "learning_rate": 4.182900139577365e-06, "loss": 0.718, "step": 6965 }, { "epoch": 0.57, "grad_norm": 8.229150675378827, "learning_rate": 4.181602496225562e-06, "loss": 0.6466, "step": 6966 }, { "epoch": 0.57, "grad_norm": 3.2659685179011646, "learning_rate": 4.180304909512021e-06, "loss": 0.5392, "step": 6967 }, { "epoch": 0.57, "grad_norm": 2.74212297377412, "learning_rate": 4.179007379526541e-06, "loss": 0.6128, "step": 6968 }, { "epoch": 0.57, "grad_norm": 3.6857015054727786, "learning_rate": 4.17770990635892e-06, "loss": 0.7609, "step": 6969 }, { "epoch": 0.57, "grad_norm": 2.4533151660888293, "learning_rate": 4.17641249009895e-06, "loss": 0.6076, "step": 6970 }, { "epoch": 0.57, "grad_norm": 8.432819590213036, "learning_rate": 4.175115130836421e-06, "loss": 0.6141, "step": 6971 }, { "epoch": 0.57, "grad_norm": 3.5354045453391945, "learning_rate": 4.17381782866112e-06, "loss": 0.627, "step": 6972 }, { "epoch": 0.57, "grad_norm": 4.104127409640797, "learning_rate": 4.172520583662825e-06, "loss": 0.691, "step": 6973 }, { "epoch": 0.57, "grad_norm": 6.614995015345534, "learning_rate": 4.171223395931321e-06, "loss": 0.6441, "step": 6974 }, { "epoch": 0.57, "grad_norm": 4.283319028455133, "learning_rate": 4.169926265556372e-06, "loss": 0.7528, "step": 6975 }, { "epoch": 0.57, "grad_norm": 3.0450442093745655, "learning_rate": 4.168629192627754e-06, "loss": 0.8984, "step": 6976 }, { "epoch": 0.57, "grad_norm": 2.4805258942389554, "learning_rate": 4.1673321772352296e-06, "loss": 0.7947, "step": 6977 }, { "epoch": 0.57, "grad_norm": 2.8356137716526844, "learning_rate": 4.166035219468561e-06, "loss": 0.7014, "step": 6978 }, { "epoch": 0.57, "grad_norm": 13.372403322537808, "learning_rate": 4.164738319417507e-06, "loss": 0.6765, "step": 6979 }, { "epoch": 0.57, "grad_norm": 2.7362077028438483, "learning_rate": 4.16344147717182e-06, "loss": 0.5522, "step": 6980 }, { "epoch": 0.57, "grad_norm": 2.880412823004085, "learning_rate": 4.162144692821252e-06, "loss": 0.5945, "step": 6981 }, { "epoch": 0.57, "grad_norm": 3.455361614624516, "learning_rate": 4.160847966455546e-06, "loss": 0.7302, "step": 6982 }, { "epoch": 0.57, "grad_norm": 8.763511073609845, "learning_rate": 4.159551298164442e-06, "loss": 0.6853, "step": 6983 }, { "epoch": 0.57, "grad_norm": 3.86274625066185, "learning_rate": 4.158254688037683e-06, "loss": 0.5585, "step": 6984 }, { "epoch": 0.57, "grad_norm": 5.572053454080541, "learning_rate": 4.156958136164999e-06, "loss": 0.7547, "step": 6985 }, { "epoch": 0.57, "grad_norm": 3.7325630350931895, "learning_rate": 4.1556616426361195e-06, "loss": 0.8862, "step": 6986 }, { "epoch": 0.57, "grad_norm": 4.010836243194204, "learning_rate": 4.1543652075407705e-06, "loss": 0.6712, "step": 6987 }, { "epoch": 0.57, "grad_norm": 2.8972568444904194, "learning_rate": 4.153068830968676e-06, "loss": 0.6456, "step": 6988 }, { "epoch": 0.57, "grad_norm": 4.105580395751182, "learning_rate": 4.151772513009549e-06, "loss": 0.6258, "step": 6989 }, { "epoch": 0.57, "grad_norm": 3.5872361108436555, "learning_rate": 4.150476253753105e-06, "loss": 0.7067, "step": 6990 }, { "epoch": 0.57, "grad_norm": 4.346122099792341, "learning_rate": 4.149180053289054e-06, "loss": 0.6976, "step": 6991 }, { "epoch": 0.57, "grad_norm": 2.8041949768638776, "learning_rate": 4.1478839117071e-06, "loss": 0.6819, "step": 6992 }, { "epoch": 0.57, "grad_norm": 4.803441080414738, "learning_rate": 4.146587829096945e-06, "loss": 0.8158, "step": 6993 }, { "epoch": 0.57, "grad_norm": 3.8389702818027747, "learning_rate": 4.1452918055482876e-06, "loss": 0.633, "step": 6994 }, { "epoch": 0.57, "grad_norm": 3.139920121476924, "learning_rate": 4.143995841150816e-06, "loss": 0.6422, "step": 6995 }, { "epoch": 0.57, "grad_norm": 3.6123468082567993, "learning_rate": 4.142699935994222e-06, "loss": 0.7024, "step": 6996 }, { "epoch": 0.57, "grad_norm": 10.518531232008542, "learning_rate": 4.141404090168192e-06, "loss": 0.6559, "step": 6997 }, { "epoch": 0.57, "grad_norm": 4.4666480257785395, "learning_rate": 4.140108303762404e-06, "loss": 0.6895, "step": 6998 }, { "epoch": 0.57, "grad_norm": 4.12669646816345, "learning_rate": 4.138812576866537e-06, "loss": 0.737, "step": 6999 }, { "epoch": 0.57, "grad_norm": 2.5261102473110837, "learning_rate": 4.137516909570261e-06, "loss": 0.5375, "step": 7000 }, { "epoch": 0.57, "grad_norm": 3.567310106819449, "learning_rate": 4.136221301963247e-06, "loss": 0.6619, "step": 7001 }, { "epoch": 0.57, "grad_norm": 2.461835452672331, "learning_rate": 4.134925754135157e-06, "loss": 0.6671, "step": 7002 }, { "epoch": 0.57, "grad_norm": 3.47197424273473, "learning_rate": 4.133630266175651e-06, "loss": 0.6363, "step": 7003 }, { "epoch": 0.57, "grad_norm": 4.665543907810832, "learning_rate": 4.132334838174385e-06, "loss": 0.6492, "step": 7004 }, { "epoch": 0.57, "grad_norm": 3.311331134782175, "learning_rate": 4.131039470221013e-06, "loss": 0.5325, "step": 7005 }, { "epoch": 0.57, "grad_norm": 10.11120752270408, "learning_rate": 4.12974416240518e-06, "loss": 0.8934, "step": 7006 }, { "epoch": 0.57, "grad_norm": 5.131770279643075, "learning_rate": 4.128448914816532e-06, "loss": 0.4708, "step": 7007 }, { "epoch": 0.57, "grad_norm": 3.4207295136638884, "learning_rate": 4.127153727544706e-06, "loss": 0.6712, "step": 7008 }, { "epoch": 0.57, "grad_norm": 2.820561755816374, "learning_rate": 4.125858600679339e-06, "loss": 0.6856, "step": 7009 }, { "epoch": 0.57, "grad_norm": 7.089689352256433, "learning_rate": 4.12456353431006e-06, "loss": 0.6537, "step": 7010 }, { "epoch": 0.57, "grad_norm": 2.955098347963419, "learning_rate": 4.1232685285264955e-06, "loss": 0.5982, "step": 7011 }, { "epoch": 0.57, "grad_norm": 3.013909341519685, "learning_rate": 4.12197358341827e-06, "loss": 0.6128, "step": 7012 }, { "epoch": 0.57, "grad_norm": 3.554990686335348, "learning_rate": 4.120678699075001e-06, "loss": 0.686, "step": 7013 }, { "epoch": 0.57, "grad_norm": 4.071837424331226, "learning_rate": 4.119383875586304e-06, "loss": 0.6816, "step": 7014 }, { "epoch": 0.57, "grad_norm": 4.047015330144255, "learning_rate": 4.118089113041787e-06, "loss": 0.7738, "step": 7015 }, { "epoch": 0.57, "grad_norm": 2.7011206439987507, "learning_rate": 4.116794411531055e-06, "loss": 0.7857, "step": 7016 }, { "epoch": 0.57, "grad_norm": 2.443015591627053, "learning_rate": 4.115499771143713e-06, "loss": 0.7378, "step": 7017 }, { "epoch": 0.57, "grad_norm": 2.630151132061548, "learning_rate": 4.114205191969354e-06, "loss": 0.7302, "step": 7018 }, { "epoch": 0.57, "grad_norm": 3.559285243907365, "learning_rate": 4.1129106740975735e-06, "loss": 0.6594, "step": 7019 }, { "epoch": 0.57, "grad_norm": 2.20982742807074, "learning_rate": 4.11161621761796e-06, "loss": 0.5712, "step": 7020 }, { "epoch": 0.57, "grad_norm": 2.650858541869961, "learning_rate": 4.1103218226201e-06, "loss": 0.683, "step": 7021 }, { "epoch": 0.57, "grad_norm": 3.3895925274012786, "learning_rate": 4.10902748919357e-06, "loss": 0.7497, "step": 7022 }, { "epoch": 0.57, "grad_norm": 3.1332893213838604, "learning_rate": 4.1077332174279475e-06, "loss": 0.6315, "step": 7023 }, { "epoch": 0.57, "grad_norm": 4.906590289956032, "learning_rate": 4.106439007412806e-06, "loss": 0.7657, "step": 7024 }, { "epoch": 0.57, "grad_norm": 3.697615257312409, "learning_rate": 4.10514485923771e-06, "loss": 0.6215, "step": 7025 }, { "epoch": 0.57, "grad_norm": 3.5427551387827645, "learning_rate": 4.103850772992224e-06, "loss": 0.57, "step": 7026 }, { "epoch": 0.57, "grad_norm": 2.965693629689701, "learning_rate": 4.10255674876591e-06, "loss": 0.6873, "step": 7027 }, { "epoch": 0.57, "grad_norm": 4.218038610573853, "learning_rate": 4.101262786648317e-06, "loss": 0.69, "step": 7028 }, { "epoch": 0.57, "grad_norm": 4.794665487858357, "learning_rate": 4.099968886728998e-06, "loss": 0.6925, "step": 7029 }, { "epoch": 0.57, "grad_norm": 4.011174925049835, "learning_rate": 4.098675049097499e-06, "loss": 0.7004, "step": 7030 }, { "epoch": 0.57, "grad_norm": 7.801034212070132, "learning_rate": 4.097381273843363e-06, "loss": 0.7107, "step": 7031 }, { "epoch": 0.57, "grad_norm": 5.283237052604058, "learning_rate": 4.096087561056126e-06, "loss": 0.7031, "step": 7032 }, { "epoch": 0.57, "grad_norm": 3.044230294571237, "learning_rate": 4.09479391082532e-06, "loss": 0.7869, "step": 7033 }, { "epoch": 0.57, "grad_norm": 6.176605078991393, "learning_rate": 4.093500323240479e-06, "loss": 0.6318, "step": 7034 }, { "epoch": 0.57, "grad_norm": 2.7351750786253635, "learning_rate": 4.09220679839112e-06, "loss": 0.6997, "step": 7035 }, { "epoch": 0.57, "grad_norm": 2.885638212857847, "learning_rate": 4.0909133363667654e-06, "loss": 0.784, "step": 7036 }, { "epoch": 0.57, "grad_norm": 2.5717807975805242, "learning_rate": 4.089619937256934e-06, "loss": 0.655, "step": 7037 }, { "epoch": 0.57, "grad_norm": 2.431623769657669, "learning_rate": 4.088326601151134e-06, "loss": 0.6268, "step": 7038 }, { "epoch": 0.57, "grad_norm": 3.0015131907465715, "learning_rate": 4.087033328138875e-06, "loss": 0.5873, "step": 7039 }, { "epoch": 0.57, "grad_norm": 3.2371315591211256, "learning_rate": 4.085740118309657e-06, "loss": 0.7532, "step": 7040 }, { "epoch": 0.57, "grad_norm": 4.075216512844301, "learning_rate": 4.084446971752981e-06, "loss": 0.8184, "step": 7041 }, { "epoch": 0.57, "grad_norm": 3.3846155243129536, "learning_rate": 4.0831538885583384e-06, "loss": 0.5013, "step": 7042 }, { "epoch": 0.57, "grad_norm": 2.444416255808669, "learning_rate": 4.08186086881522e-06, "loss": 0.6642, "step": 7043 }, { "epoch": 0.57, "grad_norm": 2.85389886969537, "learning_rate": 4.0805679126131096e-06, "loss": 0.6088, "step": 7044 }, { "epoch": 0.57, "grad_norm": 5.217992271804968, "learning_rate": 4.079275020041489e-06, "loss": 0.6256, "step": 7045 }, { "epoch": 0.57, "grad_norm": 3.740372737200289, "learning_rate": 4.0779821911898345e-06, "loss": 0.6677, "step": 7046 }, { "epoch": 0.57, "grad_norm": 2.658326584943097, "learning_rate": 4.07668942614762e-06, "loss": 0.6169, "step": 7047 }, { "epoch": 0.57, "grad_norm": 2.500953519065944, "learning_rate": 4.075396725004308e-06, "loss": 0.7561, "step": 7048 }, { "epoch": 0.57, "grad_norm": 4.033602099855156, "learning_rate": 4.074104087849366e-06, "loss": 0.7109, "step": 7049 }, { "epoch": 0.57, "grad_norm": 4.018652416607557, "learning_rate": 4.072811514772251e-06, "loss": 0.6418, "step": 7050 }, { "epoch": 0.57, "grad_norm": 3.475612196135787, "learning_rate": 4.071519005862416e-06, "loss": 0.6051, "step": 7051 }, { "epoch": 0.57, "grad_norm": 2.571037654524126, "learning_rate": 4.0702265612093125e-06, "loss": 0.5917, "step": 7052 }, { "epoch": 0.57, "grad_norm": 2.672389462818635, "learning_rate": 4.068934180902385e-06, "loss": 0.7578, "step": 7053 }, { "epoch": 0.57, "grad_norm": 10.033104494828656, "learning_rate": 4.067641865031076e-06, "loss": 0.6651, "step": 7054 }, { "epoch": 0.57, "grad_norm": 4.801455431537581, "learning_rate": 4.06634961368482e-06, "loss": 0.5716, "step": 7055 }, { "epoch": 0.57, "grad_norm": 3.051278724372702, "learning_rate": 4.065057426953049e-06, "loss": 0.5318, "step": 7056 }, { "epoch": 0.57, "grad_norm": 2.4216953608249656, "learning_rate": 4.0637653049251915e-06, "loss": 0.5733, "step": 7057 }, { "epoch": 0.57, "grad_norm": 2.981763234104512, "learning_rate": 4.0624732476906695e-06, "loss": 0.5114, "step": 7058 }, { "epoch": 0.57, "grad_norm": 3.3952158015716645, "learning_rate": 4.061181255338902e-06, "loss": 0.7248, "step": 7059 }, { "epoch": 0.57, "grad_norm": 4.457571544170344, "learning_rate": 4.059889327959302e-06, "loss": 0.6903, "step": 7060 }, { "epoch": 0.57, "grad_norm": 3.772096922595958, "learning_rate": 4.058597465641283e-06, "loss": 0.7111, "step": 7061 }, { "epoch": 0.57, "grad_norm": 3.151545206750877, "learning_rate": 4.057305668474244e-06, "loss": 0.735, "step": 7062 }, { "epoch": 0.57, "grad_norm": 4.73946926290578, "learning_rate": 4.0560139365475885e-06, "loss": 0.526, "step": 7063 }, { "epoch": 0.57, "grad_norm": 2.40225347193254, "learning_rate": 4.054722269950714e-06, "loss": 0.6833, "step": 7064 }, { "epoch": 0.57, "grad_norm": 2.8861880836575597, "learning_rate": 4.053430668773009e-06, "loss": 0.5289, "step": 7065 }, { "epoch": 0.57, "grad_norm": 3.4858856994498515, "learning_rate": 4.0521391331038624e-06, "loss": 0.6765, "step": 7066 }, { "epoch": 0.57, "grad_norm": 5.4327513934305145, "learning_rate": 4.050847663032657e-06, "loss": 0.5509, "step": 7067 }, { "epoch": 0.57, "grad_norm": 4.679572625314522, "learning_rate": 4.0495562586487685e-06, "loss": 0.6792, "step": 7068 }, { "epoch": 0.57, "grad_norm": 3.189841680618907, "learning_rate": 4.048264920041571e-06, "loss": 0.6398, "step": 7069 }, { "epoch": 0.57, "grad_norm": 3.120392246091671, "learning_rate": 4.046973647300434e-06, "loss": 0.7439, "step": 7070 }, { "epoch": 0.57, "grad_norm": 5.020875457136881, "learning_rate": 4.045682440514721e-06, "loss": 0.7007, "step": 7071 }, { "epoch": 0.57, "grad_norm": 2.7445980263530205, "learning_rate": 4.044391299773793e-06, "loss": 0.663, "step": 7072 }, { "epoch": 0.57, "grad_norm": 4.252267745389334, "learning_rate": 4.043100225167004e-06, "loss": 0.7099, "step": 7073 }, { "epoch": 0.57, "grad_norm": 3.3732567284536623, "learning_rate": 4.041809216783705e-06, "loss": 0.843, "step": 7074 }, { "epoch": 0.57, "grad_norm": 3.54904892470229, "learning_rate": 4.04051827471324e-06, "loss": 0.6988, "step": 7075 }, { "epoch": 0.57, "grad_norm": 4.4150302327769575, "learning_rate": 4.039227399044952e-06, "loss": 0.6566, "step": 7076 }, { "epoch": 0.57, "grad_norm": 2.196855632678299, "learning_rate": 4.037936589868179e-06, "loss": 0.6491, "step": 7077 }, { "epoch": 0.57, "grad_norm": 2.1056305609931387, "learning_rate": 4.0366458472722495e-06, "loss": 0.7972, "step": 7078 }, { "epoch": 0.57, "grad_norm": 2.579276927801973, "learning_rate": 4.035355171346494e-06, "loss": 0.7998, "step": 7079 }, { "epoch": 0.58, "grad_norm": 6.366461185850729, "learning_rate": 4.034064562180236e-06, "loss": 0.7302, "step": 7080 }, { "epoch": 0.58, "grad_norm": 2.0880156000446632, "learning_rate": 4.032774019862791e-06, "loss": 0.6702, "step": 7081 }, { "epoch": 0.58, "grad_norm": 135.53772051368554, "learning_rate": 4.0314835444834744e-06, "loss": 0.5617, "step": 7082 }, { "epoch": 0.58, "grad_norm": 3.383949655414227, "learning_rate": 4.030193136131594e-06, "loss": 0.7293, "step": 7083 }, { "epoch": 0.58, "grad_norm": 2.4175060882339605, "learning_rate": 4.028902794896455e-06, "loss": 0.7137, "step": 7084 }, { "epoch": 0.58, "grad_norm": 3.714319371619351, "learning_rate": 4.027612520867357e-06, "loss": 0.6962, "step": 7085 }, { "epoch": 0.58, "grad_norm": 3.7014320202532955, "learning_rate": 4.026322314133596e-06, "loss": 0.7067, "step": 7086 }, { "epoch": 0.58, "grad_norm": 2.360488574682084, "learning_rate": 4.025032174784463e-06, "loss": 0.7352, "step": 7087 }, { "epoch": 0.58, "grad_norm": 10.120989490702291, "learning_rate": 4.0237421029092405e-06, "loss": 0.6739, "step": 7088 }, { "epoch": 0.58, "grad_norm": 4.394345164597692, "learning_rate": 4.022452098597212e-06, "loss": 0.7594, "step": 7089 }, { "epoch": 0.58, "grad_norm": 3.308053383223621, "learning_rate": 4.021162161937653e-06, "loss": 0.6454, "step": 7090 }, { "epoch": 0.58, "grad_norm": 3.9880934345528547, "learning_rate": 4.019872293019835e-06, "loss": 0.5711, "step": 7091 }, { "epoch": 0.58, "grad_norm": 2.6387960463333426, "learning_rate": 4.018582491933027e-06, "loss": 0.6446, "step": 7092 }, { "epoch": 0.58, "grad_norm": 4.837870306150221, "learning_rate": 4.017292758766489e-06, "loss": 0.7222, "step": 7093 }, { "epoch": 0.58, "grad_norm": 4.02634876717946, "learning_rate": 4.016003093609482e-06, "loss": 0.6411, "step": 7094 }, { "epoch": 0.58, "grad_norm": 2.0614844495383546, "learning_rate": 4.0147134965512555e-06, "loss": 0.5738, "step": 7095 }, { "epoch": 0.58, "grad_norm": 4.779380707527782, "learning_rate": 4.0134239676810575e-06, "loss": 0.7193, "step": 7096 }, { "epoch": 0.58, "grad_norm": 4.471159342181864, "learning_rate": 4.012134507088135e-06, "loss": 0.6231, "step": 7097 }, { "epoch": 0.58, "grad_norm": 2.7267299535950005, "learning_rate": 4.010845114861724e-06, "loss": 0.7063, "step": 7098 }, { "epoch": 0.58, "grad_norm": 2.048546602296547, "learning_rate": 4.009555791091058e-06, "loss": 0.5358, "step": 7099 }, { "epoch": 0.58, "grad_norm": 3.334308585353318, "learning_rate": 4.00826653586537e-06, "loss": 0.5832, "step": 7100 }, { "epoch": 0.58, "grad_norm": 2.8520865479760427, "learning_rate": 4.00697734927388e-06, "loss": 0.7655, "step": 7101 }, { "epoch": 0.58, "grad_norm": 2.81653992797605, "learning_rate": 4.005688231405811e-06, "loss": 0.714, "step": 7102 }, { "epoch": 0.58, "grad_norm": 4.571812163577576, "learning_rate": 4.004399182350377e-06, "loss": 0.5714, "step": 7103 }, { "epoch": 0.58, "grad_norm": 2.31253948320934, "learning_rate": 4.003110202196787e-06, "loss": 0.6416, "step": 7104 }, { "epoch": 0.58, "grad_norm": 5.421226694768261, "learning_rate": 4.00182129103425e-06, "loss": 0.7174, "step": 7105 }, { "epoch": 0.58, "grad_norm": 3.477881026038711, "learning_rate": 4.0005324489519634e-06, "loss": 0.7019, "step": 7106 }, { "epoch": 0.58, "grad_norm": 2.6006236833725995, "learning_rate": 3.999243676039127e-06, "loss": 0.8622, "step": 7107 }, { "epoch": 0.58, "grad_norm": 2.9735132406338485, "learning_rate": 3.997954972384928e-06, "loss": 0.7592, "step": 7108 }, { "epoch": 0.58, "grad_norm": 4.028429629547646, "learning_rate": 3.996666338078553e-06, "loss": 0.7227, "step": 7109 }, { "epoch": 0.58, "grad_norm": 7.430154104602893, "learning_rate": 3.9953777732091854e-06, "loss": 0.7075, "step": 7110 }, { "epoch": 0.58, "grad_norm": 4.983411353107799, "learning_rate": 3.994089277866001e-06, "loss": 0.5159, "step": 7111 }, { "epoch": 0.58, "grad_norm": 3.1490092625068558, "learning_rate": 3.992800852138174e-06, "loss": 0.6903, "step": 7112 }, { "epoch": 0.58, "grad_norm": 2.4223680871025013, "learning_rate": 3.991512496114869e-06, "loss": 0.6953, "step": 7113 }, { "epoch": 0.58, "grad_norm": 8.073684133253089, "learning_rate": 3.990224209885251e-06, "loss": 0.7106, "step": 7114 }, { "epoch": 0.58, "grad_norm": 2.840984998535892, "learning_rate": 3.988935993538474e-06, "loss": 0.8413, "step": 7115 }, { "epoch": 0.58, "grad_norm": 3.017826991115728, "learning_rate": 3.987647847163692e-06, "loss": 0.706, "step": 7116 }, { "epoch": 0.58, "grad_norm": 3.94102754559899, "learning_rate": 3.986359770850053e-06, "loss": 0.6627, "step": 7117 }, { "epoch": 0.58, "grad_norm": 5.0866405948519455, "learning_rate": 3.985071764686701e-06, "loss": 0.652, "step": 7118 }, { "epoch": 0.58, "grad_norm": 3.6973490664048487, "learning_rate": 3.983783828762773e-06, "loss": 0.7126, "step": 7119 }, { "epoch": 0.58, "grad_norm": 4.608764955189467, "learning_rate": 3.9824959631674045e-06, "loss": 0.5954, "step": 7120 }, { "epoch": 0.58, "grad_norm": 3.9712986111403015, "learning_rate": 3.9812081679897205e-06, "loss": 0.7672, "step": 7121 }, { "epoch": 0.58, "grad_norm": 3.274576886750923, "learning_rate": 3.979920443318847e-06, "loss": 0.6183, "step": 7122 }, { "epoch": 0.58, "grad_norm": 8.080289535774286, "learning_rate": 3.9786327892439e-06, "loss": 0.7738, "step": 7123 }, { "epoch": 0.58, "grad_norm": 3.6606731635016145, "learning_rate": 3.977345205853996e-06, "loss": 0.6448, "step": 7124 }, { "epoch": 0.58, "grad_norm": 30.708902604430268, "learning_rate": 3.976057693238243e-06, "loss": 0.6561, "step": 7125 }, { "epoch": 0.58, "grad_norm": 8.9008171536227, "learning_rate": 3.974770251485745e-06, "loss": 0.579, "step": 7126 }, { "epoch": 0.58, "grad_norm": 3.2694346401965295, "learning_rate": 3.973482880685603e-06, "loss": 0.5499, "step": 7127 }, { "epoch": 0.58, "grad_norm": 2.319533322066518, "learning_rate": 3.972195580926906e-06, "loss": 0.6618, "step": 7128 }, { "epoch": 0.58, "grad_norm": 3.7157301424392157, "learning_rate": 3.970908352298747e-06, "loss": 0.6377, "step": 7129 }, { "epoch": 0.58, "grad_norm": 4.11385347410864, "learning_rate": 3.969621194890211e-06, "loss": 0.8272, "step": 7130 }, { "epoch": 0.58, "grad_norm": 3.5221889443121577, "learning_rate": 3.968334108790375e-06, "loss": 0.7417, "step": 7131 }, { "epoch": 0.58, "grad_norm": 2.4143592836641257, "learning_rate": 3.9670470940883144e-06, "loss": 0.6909, "step": 7132 }, { "epoch": 0.58, "grad_norm": 2.735378463235592, "learning_rate": 3.965760150873101e-06, "loss": 0.6633, "step": 7133 }, { "epoch": 0.58, "grad_norm": 3.1453425545495413, "learning_rate": 3.9644732792337956e-06, "loss": 0.8668, "step": 7134 }, { "epoch": 0.58, "grad_norm": 3.224006594471027, "learning_rate": 3.963186479259459e-06, "loss": 0.6877, "step": 7135 }, { "epoch": 0.58, "grad_norm": 3.2155981706789656, "learning_rate": 3.961899751039146e-06, "loss": 0.655, "step": 7136 }, { "epoch": 0.58, "grad_norm": 2.752151974816608, "learning_rate": 3.960613094661908e-06, "loss": 0.8427, "step": 7137 }, { "epoch": 0.58, "grad_norm": 2.4229192556137007, "learning_rate": 3.959326510216788e-06, "loss": 0.811, "step": 7138 }, { "epoch": 0.58, "grad_norm": 2.6910878124223054, "learning_rate": 3.9580399977928256e-06, "loss": 0.7086, "step": 7139 }, { "epoch": 0.58, "grad_norm": 3.395493643124682, "learning_rate": 3.956753557479058e-06, "loss": 0.5931, "step": 7140 }, { "epoch": 0.58, "grad_norm": 4.458235396836014, "learning_rate": 3.955467189364511e-06, "loss": 0.7405, "step": 7141 }, { "epoch": 0.58, "grad_norm": 2.6195110177642635, "learning_rate": 3.954180893538212e-06, "loss": 0.6578, "step": 7142 }, { "epoch": 0.58, "grad_norm": 2.6294253289058123, "learning_rate": 3.952894670089179e-06, "loss": 0.6806, "step": 7143 }, { "epoch": 0.58, "grad_norm": 2.3949243170800902, "learning_rate": 3.951608519106429e-06, "loss": 0.6714, "step": 7144 }, { "epoch": 0.58, "grad_norm": 4.743017411551807, "learning_rate": 3.950322440678972e-06, "loss": 0.5838, "step": 7145 }, { "epoch": 0.58, "grad_norm": 6.840928874491977, "learning_rate": 3.94903643489581e-06, "loss": 0.8645, "step": 7146 }, { "epoch": 0.58, "grad_norm": 5.160408115665429, "learning_rate": 3.947750501845946e-06, "loss": 0.6656, "step": 7147 }, { "epoch": 0.58, "grad_norm": 4.537840446327892, "learning_rate": 3.946464641618371e-06, "loss": 0.6054, "step": 7148 }, { "epoch": 0.58, "grad_norm": 4.450911099054057, "learning_rate": 3.945178854302075e-06, "loss": 0.6101, "step": 7149 }, { "epoch": 0.58, "grad_norm": 2.85610640593839, "learning_rate": 3.943893139986046e-06, "loss": 0.5647, "step": 7150 }, { "epoch": 0.58, "grad_norm": 5.6872553457541155, "learning_rate": 3.942607498759261e-06, "loss": 0.6266, "step": 7151 }, { "epoch": 0.58, "grad_norm": 2.467327264867698, "learning_rate": 3.941321930710695e-06, "loss": 0.6275, "step": 7152 }, { "epoch": 0.58, "grad_norm": 2.4375852895068206, "learning_rate": 3.940036435929318e-06, "loss": 0.7381, "step": 7153 }, { "epoch": 0.58, "grad_norm": 3.0608470066806936, "learning_rate": 3.938751014504093e-06, "loss": 0.7376, "step": 7154 }, { "epoch": 0.58, "grad_norm": 29.294594195116662, "learning_rate": 3.937465666523981e-06, "loss": 0.6674, "step": 7155 }, { "epoch": 0.58, "grad_norm": 2.39115818843071, "learning_rate": 3.9361803920779335e-06, "loss": 0.6115, "step": 7156 }, { "epoch": 0.58, "grad_norm": 2.419905863667368, "learning_rate": 3.934895191254901e-06, "loss": 0.7588, "step": 7157 }, { "epoch": 0.58, "grad_norm": 3.1178064997011568, "learning_rate": 3.933610064143829e-06, "loss": 0.5792, "step": 7158 }, { "epoch": 0.58, "grad_norm": 3.866681648895621, "learning_rate": 3.932325010833654e-06, "loss": 0.7154, "step": 7159 }, { "epoch": 0.58, "grad_norm": 2.99812259081433, "learning_rate": 3.931040031413313e-06, "loss": 0.7449, "step": 7160 }, { "epoch": 0.58, "grad_norm": 2.168260594487398, "learning_rate": 3.929755125971731e-06, "loss": 0.4579, "step": 7161 }, { "epoch": 0.58, "grad_norm": 3.051557709198906, "learning_rate": 3.928470294597834e-06, "loss": 0.6434, "step": 7162 }, { "epoch": 0.58, "grad_norm": 2.244744314834905, "learning_rate": 3.927185537380539e-06, "loss": 0.717, "step": 7163 }, { "epoch": 0.58, "grad_norm": 4.6015150906868225, "learning_rate": 3.925900854408759e-06, "loss": 0.7066, "step": 7164 }, { "epoch": 0.58, "grad_norm": 3.627472554385189, "learning_rate": 3.924616245771403e-06, "loss": 0.5849, "step": 7165 }, { "epoch": 0.58, "grad_norm": 3.0429335049343633, "learning_rate": 3.9233317115573745e-06, "loss": 0.7272, "step": 7166 }, { "epoch": 0.58, "grad_norm": 2.8403723486645127, "learning_rate": 3.922047251855572e-06, "loss": 0.6936, "step": 7167 }, { "epoch": 0.58, "grad_norm": 2.428126481669758, "learning_rate": 3.9207628667548855e-06, "loss": 0.5414, "step": 7168 }, { "epoch": 0.58, "grad_norm": 3.835722169142076, "learning_rate": 3.919478556344205e-06, "loss": 0.6228, "step": 7169 }, { "epoch": 0.58, "grad_norm": 3.324640521430359, "learning_rate": 3.918194320712412e-06, "loss": 0.6293, "step": 7170 }, { "epoch": 0.58, "grad_norm": 2.4802307783981914, "learning_rate": 3.916910159948382e-06, "loss": 0.6964, "step": 7171 }, { "epoch": 0.58, "grad_norm": 3.153639548342947, "learning_rate": 3.915626074140989e-06, "loss": 0.8017, "step": 7172 }, { "epoch": 0.58, "grad_norm": 2.272287398469343, "learning_rate": 3.914342063379102e-06, "loss": 0.7842, "step": 7173 }, { "epoch": 0.58, "grad_norm": 2.8165599860236257, "learning_rate": 3.913058127751578e-06, "loss": 0.7548, "step": 7174 }, { "epoch": 0.58, "grad_norm": 2.2558954578668042, "learning_rate": 3.911774267347276e-06, "loss": 0.7581, "step": 7175 }, { "epoch": 0.58, "grad_norm": 2.6829229553980136, "learning_rate": 3.910490482255046e-06, "loss": 0.6439, "step": 7176 }, { "epoch": 0.58, "grad_norm": 2.2670960439596812, "learning_rate": 3.909206772563735e-06, "loss": 0.6253, "step": 7177 }, { "epoch": 0.58, "grad_norm": 3.0871668272852024, "learning_rate": 3.907923138362184e-06, "loss": 0.6802, "step": 7178 }, { "epoch": 0.58, "grad_norm": 6.588851475813517, "learning_rate": 3.906639579739227e-06, "loss": 0.7277, "step": 7179 }, { "epoch": 0.58, "grad_norm": 3.802346326024433, "learning_rate": 3.9053560967836985e-06, "loss": 0.608, "step": 7180 }, { "epoch": 0.58, "grad_norm": 3.062849233313521, "learning_rate": 3.904072689584418e-06, "loss": 0.6987, "step": 7181 }, { "epoch": 0.58, "grad_norm": 3.101469149764168, "learning_rate": 3.902789358230208e-06, "loss": 0.7503, "step": 7182 }, { "epoch": 0.58, "grad_norm": 2.9694095345214118, "learning_rate": 3.901506102809882e-06, "loss": 0.7927, "step": 7183 }, { "epoch": 0.58, "grad_norm": 2.9872987693511015, "learning_rate": 3.9002229234122516e-06, "loss": 0.7916, "step": 7184 }, { "epoch": 0.58, "grad_norm": 3.519532135469556, "learning_rate": 3.898939820126121e-06, "loss": 0.7042, "step": 7185 }, { "epoch": 0.58, "grad_norm": 3.067276273068609, "learning_rate": 3.897656793040287e-06, "loss": 0.7501, "step": 7186 }, { "epoch": 0.58, "grad_norm": 5.803766889974801, "learning_rate": 3.896373842243543e-06, "loss": 0.6409, "step": 7187 }, { "epoch": 0.58, "grad_norm": 2.2318748387184173, "learning_rate": 3.895090967824678e-06, "loss": 0.6115, "step": 7188 }, { "epoch": 0.58, "grad_norm": 2.408297709139427, "learning_rate": 3.8938081698724755e-06, "loss": 0.6143, "step": 7189 }, { "epoch": 0.58, "grad_norm": 2.2393862555000665, "learning_rate": 3.892525448475713e-06, "loss": 0.6762, "step": 7190 }, { "epoch": 0.58, "grad_norm": 4.184636086856159, "learning_rate": 3.891242803723162e-06, "loss": 0.7141, "step": 7191 }, { "epoch": 0.58, "grad_norm": 2.6274662966595788, "learning_rate": 3.889960235703591e-06, "loss": 0.7047, "step": 7192 }, { "epoch": 0.58, "grad_norm": 2.2508948579955983, "learning_rate": 3.888677744505762e-06, "loss": 0.8252, "step": 7193 }, { "epoch": 0.58, "grad_norm": 3.1153022288612937, "learning_rate": 3.887395330218429e-06, "loss": 0.6737, "step": 7194 }, { "epoch": 0.58, "grad_norm": 2.9151043502806306, "learning_rate": 3.886112992930345e-06, "loss": 0.6627, "step": 7195 }, { "epoch": 0.58, "grad_norm": 2.9304253212175793, "learning_rate": 3.884830732730256e-06, "loss": 0.6311, "step": 7196 }, { "epoch": 0.58, "grad_norm": 2.5563519750572383, "learning_rate": 3.883548549706901e-06, "loss": 0.571, "step": 7197 }, { "epoch": 0.58, "grad_norm": 2.621668624211604, "learning_rate": 3.882266443949016e-06, "loss": 0.7884, "step": 7198 }, { "epoch": 0.58, "grad_norm": 2.8090990735473156, "learning_rate": 3.880984415545331e-06, "loss": 0.8881, "step": 7199 }, { "epoch": 0.58, "grad_norm": 3.236210003920545, "learning_rate": 3.879702464584573e-06, "loss": 0.6562, "step": 7200 }, { "epoch": 0.58, "grad_norm": 2.6916479094271137, "learning_rate": 3.878420591155456e-06, "loss": 0.6965, "step": 7201 }, { "epoch": 0.58, "grad_norm": 4.721446012476177, "learning_rate": 3.877138795346697e-06, "loss": 0.721, "step": 7202 }, { "epoch": 0.59, "grad_norm": 6.681146630832065, "learning_rate": 3.875857077247003e-06, "loss": 0.5663, "step": 7203 }, { "epoch": 0.59, "grad_norm": 4.379412145480314, "learning_rate": 3.8745754369450766e-06, "loss": 0.785, "step": 7204 }, { "epoch": 0.59, "grad_norm": 12.961918217050401, "learning_rate": 3.873293874529617e-06, "loss": 0.7024, "step": 7205 }, { "epoch": 0.59, "grad_norm": 4.164589381791025, "learning_rate": 3.872012390089318e-06, "loss": 0.6477, "step": 7206 }, { "epoch": 0.59, "grad_norm": 4.18180053343718, "learning_rate": 3.870730983712861e-06, "loss": 0.6177, "step": 7207 }, { "epoch": 0.59, "grad_norm": 2.517025835710788, "learning_rate": 3.86944965548893e-06, "loss": 0.7079, "step": 7208 }, { "epoch": 0.59, "grad_norm": 2.4843008449983923, "learning_rate": 3.868168405506202e-06, "loss": 0.6075, "step": 7209 }, { "epoch": 0.59, "grad_norm": 2.494483836483101, "learning_rate": 3.866887233853348e-06, "loss": 0.477, "step": 7210 }, { "epoch": 0.59, "grad_norm": 2.7910383364045024, "learning_rate": 3.865606140619032e-06, "loss": 0.6363, "step": 7211 }, { "epoch": 0.59, "grad_norm": 3.3259199026576036, "learning_rate": 3.864325125891912e-06, "loss": 0.7215, "step": 7212 }, { "epoch": 0.59, "grad_norm": 3.0871505619556356, "learning_rate": 3.863044189760648e-06, "loss": 0.6827, "step": 7213 }, { "epoch": 0.59, "grad_norm": 3.9231385581149323, "learning_rate": 3.861763332313881e-06, "loss": 0.6288, "step": 7214 }, { "epoch": 0.59, "grad_norm": 2.060535966473672, "learning_rate": 3.86048255364026e-06, "loss": 0.5824, "step": 7215 }, { "epoch": 0.59, "grad_norm": 9.742416264242609, "learning_rate": 3.85920185382842e-06, "loss": 0.5813, "step": 7216 }, { "epoch": 0.59, "grad_norm": 4.59445604628631, "learning_rate": 3.8579212329669956e-06, "loss": 0.8651, "step": 7217 }, { "epoch": 0.59, "grad_norm": 2.9663063073640026, "learning_rate": 3.856640691144614e-06, "loss": 0.6132, "step": 7218 }, { "epoch": 0.59, "grad_norm": 2.574833658107921, "learning_rate": 3.8553602284498945e-06, "loss": 0.8064, "step": 7219 }, { "epoch": 0.59, "grad_norm": 3.1517735153701105, "learning_rate": 3.854079844971456e-06, "loss": 0.6178, "step": 7220 }, { "epoch": 0.59, "grad_norm": 3.911390324305378, "learning_rate": 3.852799540797906e-06, "loss": 0.7517, "step": 7221 }, { "epoch": 0.59, "grad_norm": 3.4241318381324013, "learning_rate": 3.851519316017851e-06, "loss": 0.6175, "step": 7222 }, { "epoch": 0.59, "grad_norm": 3.105643683430784, "learning_rate": 3.850239170719891e-06, "loss": 0.6606, "step": 7223 }, { "epoch": 0.59, "grad_norm": 2.644108443360923, "learning_rate": 3.848959104992619e-06, "loss": 0.6611, "step": 7224 }, { "epoch": 0.59, "grad_norm": 6.780798108946891, "learning_rate": 3.847679118924627e-06, "loss": 0.7851, "step": 7225 }, { "epoch": 0.59, "grad_norm": 4.11567693269245, "learning_rate": 3.846399212604495e-06, "loss": 0.6335, "step": 7226 }, { "epoch": 0.59, "grad_norm": 2.954699984067186, "learning_rate": 3.845119386120801e-06, "loss": 0.5931, "step": 7227 }, { "epoch": 0.59, "grad_norm": 3.084396318031787, "learning_rate": 3.8438396395621155e-06, "loss": 0.7061, "step": 7228 }, { "epoch": 0.59, "grad_norm": 3.505458754041068, "learning_rate": 3.842559973017007e-06, "loss": 0.6277, "step": 7229 }, { "epoch": 0.59, "grad_norm": 2.6835424893423943, "learning_rate": 3.841280386574037e-06, "loss": 0.5841, "step": 7230 }, { "epoch": 0.59, "grad_norm": 7.0675334694293435, "learning_rate": 3.84000088032176e-06, "loss": 0.6593, "step": 7231 }, { "epoch": 0.59, "grad_norm": 2.32267646422886, "learning_rate": 3.838721454348726e-06, "loss": 0.6421, "step": 7232 }, { "epoch": 0.59, "grad_norm": 4.559178333668757, "learning_rate": 3.837442108743481e-06, "loss": 0.6663, "step": 7233 }, { "epoch": 0.59, "grad_norm": 3.6079087860769774, "learning_rate": 3.836162843594561e-06, "loss": 0.4741, "step": 7234 }, { "epoch": 0.59, "grad_norm": 2.9282600916634998, "learning_rate": 3.8348836589905e-06, "loss": 0.6938, "step": 7235 }, { "epoch": 0.59, "grad_norm": 4.919553372839366, "learning_rate": 3.833604555019826e-06, "loss": 0.7656, "step": 7236 }, { "epoch": 0.59, "grad_norm": 2.7995397734226843, "learning_rate": 3.832325531771061e-06, "loss": 0.703, "step": 7237 }, { "epoch": 0.59, "grad_norm": 5.496908886308545, "learning_rate": 3.831046589332721e-06, "loss": 0.6902, "step": 7238 }, { "epoch": 0.59, "grad_norm": 7.39964249065611, "learning_rate": 3.82976772779332e-06, "loss": 0.6733, "step": 7239 }, { "epoch": 0.59, "grad_norm": 2.1525700114017003, "learning_rate": 3.8284889472413575e-06, "loss": 0.6014, "step": 7240 }, { "epoch": 0.59, "grad_norm": 4.537658975154655, "learning_rate": 3.8272102477653374e-06, "loss": 0.6228, "step": 7241 }, { "epoch": 0.59, "grad_norm": 3.8141789154662664, "learning_rate": 3.825931629453752e-06, "loss": 0.699, "step": 7242 }, { "epoch": 0.59, "grad_norm": 3.5797493848278577, "learning_rate": 3.824653092395091e-06, "loss": 0.7611, "step": 7243 }, { "epoch": 0.59, "grad_norm": 30.21051244384909, "learning_rate": 3.823374636677837e-06, "loss": 0.8074, "step": 7244 }, { "epoch": 0.59, "grad_norm": 5.938529816369747, "learning_rate": 3.822096262390466e-06, "loss": 0.7849, "step": 7245 }, { "epoch": 0.59, "grad_norm": 12.104901020428983, "learning_rate": 3.820817969621452e-06, "loss": 0.642, "step": 7246 }, { "epoch": 0.59, "grad_norm": 6.208213911645407, "learning_rate": 3.819539758459258e-06, "loss": 0.7094, "step": 7247 }, { "epoch": 0.59, "grad_norm": 2.9462731893274317, "learning_rate": 3.8182616289923445e-06, "loss": 0.7118, "step": 7248 }, { "epoch": 0.59, "grad_norm": 5.769709331321691, "learning_rate": 3.8169835813091675e-06, "loss": 0.6705, "step": 7249 }, { "epoch": 0.59, "grad_norm": 5.392203288692865, "learning_rate": 3.815705615498177e-06, "loss": 0.6834, "step": 7250 }, { "epoch": 0.59, "grad_norm": 3.0020658023445153, "learning_rate": 3.8144277316478135e-06, "loss": 0.6077, "step": 7251 }, { "epoch": 0.59, "grad_norm": 10.042213970577976, "learning_rate": 3.813149929846516e-06, "loss": 0.6952, "step": 7252 }, { "epoch": 0.59, "grad_norm": 10.455287717037274, "learning_rate": 3.8118722101827186e-06, "loss": 0.6124, "step": 7253 }, { "epoch": 0.59, "grad_norm": 2.5436967829007293, "learning_rate": 3.810594572744843e-06, "loss": 0.7633, "step": 7254 }, { "epoch": 0.59, "grad_norm": 6.695256271579665, "learning_rate": 3.8093170176213125e-06, "loss": 0.5679, "step": 7255 }, { "epoch": 0.59, "grad_norm": 2.675130500984532, "learning_rate": 3.808039544900541e-06, "loss": 0.6674, "step": 7256 }, { "epoch": 0.59, "grad_norm": 3.2151662665051806, "learning_rate": 3.806762154670938e-06, "loss": 0.5473, "step": 7257 }, { "epoch": 0.59, "grad_norm": 1.9915570694439937, "learning_rate": 3.8054848470209094e-06, "loss": 0.6101, "step": 7258 }, { "epoch": 0.59, "grad_norm": 2.8705539405942555, "learning_rate": 3.8042076220388494e-06, "loss": 0.7492, "step": 7259 }, { "epoch": 0.59, "grad_norm": 3.380885745975379, "learning_rate": 3.8029304798131522e-06, "loss": 0.5438, "step": 7260 }, { "epoch": 0.59, "grad_norm": 3.148770564674383, "learning_rate": 3.8016534204322015e-06, "loss": 0.6225, "step": 7261 }, { "epoch": 0.59, "grad_norm": 17.67362039582867, "learning_rate": 3.80037644398438e-06, "loss": 0.7057, "step": 7262 }, { "epoch": 0.59, "grad_norm": 2.199823629081299, "learning_rate": 3.7990995505580613e-06, "loss": 0.7123, "step": 7263 }, { "epoch": 0.59, "grad_norm": 2.6642799848007668, "learning_rate": 3.7978227402416155e-06, "loss": 0.405, "step": 7264 }, { "epoch": 0.59, "grad_norm": 2.552145961684152, "learning_rate": 3.796546013123407e-06, "loss": 0.5678, "step": 7265 }, { "epoch": 0.59, "grad_norm": 4.454298144343011, "learning_rate": 3.795269369291792e-06, "loss": 0.54, "step": 7266 }, { "epoch": 0.59, "grad_norm": 3.455051219378804, "learning_rate": 3.793992808835121e-06, "loss": 0.7836, "step": 7267 }, { "epoch": 0.59, "grad_norm": 3.000137984062915, "learning_rate": 3.7927163318417426e-06, "loss": 0.6297, "step": 7268 }, { "epoch": 0.59, "grad_norm": 9.05652693384451, "learning_rate": 3.791439938399994e-06, "loss": 0.8266, "step": 7269 }, { "epoch": 0.59, "grad_norm": 21.664017230343216, "learning_rate": 3.790163628598212e-06, "loss": 0.6272, "step": 7270 }, { "epoch": 0.59, "grad_norm": 5.460426999278598, "learning_rate": 3.7888874025247243e-06, "loss": 0.5154, "step": 7271 }, { "epoch": 0.59, "grad_norm": 6.362063070585342, "learning_rate": 3.7876112602678544e-06, "loss": 0.5735, "step": 7272 }, { "epoch": 0.59, "grad_norm": 2.395111705112645, "learning_rate": 3.786335201915921e-06, "loss": 0.7249, "step": 7273 }, { "epoch": 0.59, "grad_norm": 2.3274706668652643, "learning_rate": 3.7850592275572316e-06, "loss": 0.5885, "step": 7274 }, { "epoch": 0.59, "grad_norm": 3.158507784404962, "learning_rate": 3.783783337280094e-06, "loss": 0.8045, "step": 7275 }, { "epoch": 0.59, "grad_norm": 4.020353364995064, "learning_rate": 3.782507531172807e-06, "loss": 0.5935, "step": 7276 }, { "epoch": 0.59, "grad_norm": 4.5596513661815194, "learning_rate": 3.781231809323665e-06, "loss": 0.7314, "step": 7277 }, { "epoch": 0.59, "grad_norm": 4.102031344115807, "learning_rate": 3.7799561718209555e-06, "loss": 0.7554, "step": 7278 }, { "epoch": 0.59, "grad_norm": 2.319310150103513, "learning_rate": 3.778680618752963e-06, "loss": 0.6662, "step": 7279 }, { "epoch": 0.59, "grad_norm": 3.969363160057141, "learning_rate": 3.7774051502079596e-06, "loss": 0.5514, "step": 7280 }, { "epoch": 0.59, "grad_norm": 2.8240704270365495, "learning_rate": 3.776129766274218e-06, "loss": 0.7703, "step": 7281 }, { "epoch": 0.59, "grad_norm": 2.9126984781048404, "learning_rate": 3.774854467040002e-06, "loss": 0.6712, "step": 7282 }, { "epoch": 0.59, "grad_norm": 4.7635942971269305, "learning_rate": 3.7735792525935735e-06, "loss": 0.6436, "step": 7283 }, { "epoch": 0.59, "grad_norm": 5.7172518320405015, "learning_rate": 3.7723041230231804e-06, "loss": 0.5938, "step": 7284 }, { "epoch": 0.59, "grad_norm": 5.188486848638251, "learning_rate": 3.7710290784170733e-06, "loss": 0.6635, "step": 7285 }, { "epoch": 0.59, "grad_norm": 3.1391049735641894, "learning_rate": 3.7697541188634934e-06, "loss": 0.5841, "step": 7286 }, { "epoch": 0.59, "grad_norm": 3.442731107041563, "learning_rate": 3.7684792444506733e-06, "loss": 0.778, "step": 7287 }, { "epoch": 0.59, "grad_norm": 3.1025109357717615, "learning_rate": 3.7672044552668436e-06, "loss": 0.784, "step": 7288 }, { "epoch": 0.59, "grad_norm": 2.4284228131984222, "learning_rate": 3.765929751400228e-06, "loss": 0.6904, "step": 7289 }, { "epoch": 0.59, "grad_norm": 3.1258211256239545, "learning_rate": 3.7646551329390445e-06, "loss": 0.8809, "step": 7290 }, { "epoch": 0.59, "grad_norm": 3.3436508090189148, "learning_rate": 3.763380599971504e-06, "loss": 0.6396, "step": 7291 }, { "epoch": 0.59, "grad_norm": 2.8392529089421448, "learning_rate": 3.762106152585813e-06, "loss": 0.5705, "step": 7292 }, { "epoch": 0.59, "grad_norm": 7.916368871920782, "learning_rate": 3.760831790870171e-06, "loss": 0.736, "step": 7293 }, { "epoch": 0.59, "grad_norm": 2.7444956103580873, "learning_rate": 3.7595575149127693e-06, "loss": 0.6116, "step": 7294 }, { "epoch": 0.59, "grad_norm": 3.056678484798001, "learning_rate": 3.758283324801799e-06, "loss": 0.7477, "step": 7295 }, { "epoch": 0.59, "grad_norm": 5.079672154811444, "learning_rate": 3.757009220625441e-06, "loss": 0.7192, "step": 7296 }, { "epoch": 0.59, "grad_norm": 4.476492311547803, "learning_rate": 3.7557352024718718e-06, "loss": 0.7163, "step": 7297 }, { "epoch": 0.59, "grad_norm": 3.0904931844461085, "learning_rate": 3.7544612704292616e-06, "loss": 0.7768, "step": 7298 }, { "epoch": 0.59, "grad_norm": 5.4284757805782755, "learning_rate": 3.753187424585774e-06, "loss": 0.6644, "step": 7299 }, { "epoch": 0.59, "grad_norm": 3.9688884010972734, "learning_rate": 3.7519136650295673e-06, "loss": 0.7391, "step": 7300 }, { "epoch": 0.59, "grad_norm": 4.02451487833557, "learning_rate": 3.7506399918487927e-06, "loss": 0.6577, "step": 7301 }, { "epoch": 0.59, "grad_norm": 4.743075690605672, "learning_rate": 3.7493664051315976e-06, "loss": 0.6018, "step": 7302 }, { "epoch": 0.59, "grad_norm": 4.538296913182985, "learning_rate": 3.748092904966122e-06, "loss": 0.7299, "step": 7303 }, { "epoch": 0.59, "grad_norm": 6.940023903922378, "learning_rate": 3.7468194914404986e-06, "loss": 0.5886, "step": 7304 }, { "epoch": 0.59, "grad_norm": 2.418059115404444, "learning_rate": 3.745546164642859e-06, "loss": 0.6898, "step": 7305 }, { "epoch": 0.59, "grad_norm": 7.042844801926572, "learning_rate": 3.7442729246613243e-06, "loss": 0.7128, "step": 7306 }, { "epoch": 0.59, "grad_norm": 2.6568260975861397, "learning_rate": 3.742999771584008e-06, "loss": 0.5964, "step": 7307 }, { "epoch": 0.59, "grad_norm": 4.604082433565104, "learning_rate": 3.7417267054990234e-06, "loss": 0.7636, "step": 7308 }, { "epoch": 0.59, "grad_norm": 2.720544242532811, "learning_rate": 3.740453726494473e-06, "loss": 0.6294, "step": 7309 }, { "epoch": 0.59, "grad_norm": 4.077363071731181, "learning_rate": 3.7391808346584545e-06, "loss": 0.5218, "step": 7310 }, { "epoch": 0.59, "grad_norm": 2.2079242017602843, "learning_rate": 3.7379080300790616e-06, "loss": 0.8184, "step": 7311 }, { "epoch": 0.59, "grad_norm": 3.851829489873319, "learning_rate": 3.7366353128443823e-06, "loss": 0.6807, "step": 7312 }, { "epoch": 0.59, "grad_norm": 1.8707867851801352, "learning_rate": 3.7353626830424915e-06, "loss": 0.7754, "step": 7313 }, { "epoch": 0.59, "grad_norm": 6.262686183065627, "learning_rate": 3.734090140761466e-06, "loss": 0.7534, "step": 7314 }, { "epoch": 0.59, "grad_norm": 3.222533421776613, "learning_rate": 3.7328176860893743e-06, "loss": 0.835, "step": 7315 }, { "epoch": 0.59, "grad_norm": 3.349002082563296, "learning_rate": 3.731545319114277e-06, "loss": 0.5618, "step": 7316 }, { "epoch": 0.59, "grad_norm": 3.23384930348398, "learning_rate": 3.7302730399242305e-06, "loss": 0.719, "step": 7317 }, { "epoch": 0.59, "grad_norm": 2.740611501071095, "learning_rate": 3.7290008486072836e-06, "loss": 0.7509, "step": 7318 }, { "epoch": 0.59, "grad_norm": 6.938051999744724, "learning_rate": 3.7277287452514844e-06, "loss": 0.7121, "step": 7319 }, { "epoch": 0.59, "grad_norm": 1.9062652497507258, "learning_rate": 3.726456729944864e-06, "loss": 0.4997, "step": 7320 }, { "epoch": 0.59, "grad_norm": 4.797029651708164, "learning_rate": 3.7251848027754566e-06, "loss": 0.7725, "step": 7321 }, { "epoch": 0.59, "grad_norm": 4.308471130314857, "learning_rate": 3.7239129638312876e-06, "loss": 0.5171, "step": 7322 }, { "epoch": 0.59, "grad_norm": 2.8394518292624578, "learning_rate": 3.7226412132003775e-06, "loss": 0.6554, "step": 7323 }, { "epoch": 0.59, "grad_norm": 3.915612819876549, "learning_rate": 3.7213695509707382e-06, "loss": 0.6871, "step": 7324 }, { "epoch": 0.59, "grad_norm": 2.8477706457359475, "learning_rate": 3.720097977230376e-06, "loss": 0.6479, "step": 7325 }, { "epoch": 0.6, "grad_norm": 15.094926483075213, "learning_rate": 3.7188264920672958e-06, "loss": 0.6751, "step": 7326 }, { "epoch": 0.6, "grad_norm": 3.40302766700636, "learning_rate": 3.717555095569486e-06, "loss": 0.5662, "step": 7327 }, { "epoch": 0.6, "grad_norm": 3.482315615312778, "learning_rate": 3.716283787824939e-06, "loss": 0.6228, "step": 7328 }, { "epoch": 0.6, "grad_norm": 3.0254614307954033, "learning_rate": 3.7150125689216365e-06, "loss": 0.7599, "step": 7329 }, { "epoch": 0.6, "grad_norm": 3.715720435265195, "learning_rate": 3.7137414389475566e-06, "loss": 0.7637, "step": 7330 }, { "epoch": 0.6, "grad_norm": 3.9743786472464704, "learning_rate": 3.7124703979906674e-06, "loss": 0.8342, "step": 7331 }, { "epoch": 0.6, "grad_norm": 2.7100283616072436, "learning_rate": 3.7111994461389346e-06, "loss": 0.7956, "step": 7332 }, { "epoch": 0.6, "grad_norm": 3.784783770059734, "learning_rate": 3.7099285834803146e-06, "loss": 0.6263, "step": 7333 }, { "epoch": 0.6, "grad_norm": 2.7325968038777404, "learning_rate": 3.708657810102759e-06, "loss": 0.7586, "step": 7334 }, { "epoch": 0.6, "grad_norm": 2.8368295303232087, "learning_rate": 3.707387126094213e-06, "loss": 0.7055, "step": 7335 }, { "epoch": 0.6, "grad_norm": 3.77220291521277, "learning_rate": 3.7061165315426173e-06, "loss": 0.6782, "step": 7336 }, { "epoch": 0.6, "grad_norm": 11.999899633740377, "learning_rate": 3.7048460265359054e-06, "loss": 0.7718, "step": 7337 }, { "epoch": 0.6, "grad_norm": 2.8780581353734926, "learning_rate": 3.7035756111620037e-06, "loss": 0.5941, "step": 7338 }, { "epoch": 0.6, "grad_norm": 3.501336089963944, "learning_rate": 3.7023052855088327e-06, "loss": 0.7938, "step": 7339 }, { "epoch": 0.6, "grad_norm": 21.701738445421, "learning_rate": 3.7010350496643065e-06, "loss": 0.7309, "step": 7340 }, { "epoch": 0.6, "grad_norm": 3.4981375924591074, "learning_rate": 3.6997649037163336e-06, "loss": 0.6602, "step": 7341 }, { "epoch": 0.6, "grad_norm": 2.8239392248433193, "learning_rate": 3.698494847752816e-06, "loss": 0.6981, "step": 7342 }, { "epoch": 0.6, "grad_norm": 5.686085407662347, "learning_rate": 3.6972248818616497e-06, "loss": 0.6483, "step": 7343 }, { "epoch": 0.6, "grad_norm": 2.2970129393030274, "learning_rate": 3.6959550061307246e-06, "loss": 0.6969, "step": 7344 }, { "epoch": 0.6, "grad_norm": 2.8577774368735596, "learning_rate": 3.6946852206479244e-06, "loss": 0.8203, "step": 7345 }, { "epoch": 0.6, "grad_norm": 2.998849811732684, "learning_rate": 3.693415525501128e-06, "loss": 0.776, "step": 7346 }, { "epoch": 0.6, "grad_norm": 2.9829314542238907, "learning_rate": 3.6921459207782017e-06, "loss": 0.5407, "step": 7347 }, { "epoch": 0.6, "grad_norm": 3.2892755123158492, "learning_rate": 3.6908764065670134e-06, "loss": 0.768, "step": 7348 }, { "epoch": 0.6, "grad_norm": 7.563693044579954, "learning_rate": 3.6896069829554205e-06, "loss": 0.6022, "step": 7349 }, { "epoch": 0.6, "grad_norm": 4.066374372784264, "learning_rate": 3.688337650031274e-06, "loss": 0.6213, "step": 7350 }, { "epoch": 0.6, "grad_norm": 7.35627071548663, "learning_rate": 3.687068407882422e-06, "loss": 0.636, "step": 7351 }, { "epoch": 0.6, "grad_norm": 3.3694629288355444, "learning_rate": 3.685799256596705e-06, "loss": 0.7368, "step": 7352 }, { "epoch": 0.6, "grad_norm": 2.81326833222612, "learning_rate": 3.6845301962619525e-06, "loss": 0.7889, "step": 7353 }, { "epoch": 0.6, "grad_norm": 3.068086801096515, "learning_rate": 3.683261226965993e-06, "loss": 0.6114, "step": 7354 }, { "epoch": 0.6, "grad_norm": 3.050866778184678, "learning_rate": 3.681992348796648e-06, "loss": 0.6424, "step": 7355 }, { "epoch": 0.6, "grad_norm": 5.353583618659161, "learning_rate": 3.6807235618417314e-06, "loss": 0.7037, "step": 7356 }, { "epoch": 0.6, "grad_norm": 3.51547153777052, "learning_rate": 3.6794548661890506e-06, "loss": 0.6838, "step": 7357 }, { "epoch": 0.6, "grad_norm": 5.218758206161168, "learning_rate": 3.6781862619264074e-06, "loss": 0.6982, "step": 7358 }, { "epoch": 0.6, "grad_norm": 2.810365870865711, "learning_rate": 3.6769177491416004e-06, "loss": 0.8776, "step": 7359 }, { "epoch": 0.6, "grad_norm": 2.73536081318481, "learning_rate": 3.6756493279224137e-06, "loss": 0.5942, "step": 7360 }, { "epoch": 0.6, "grad_norm": 2.312571897085781, "learning_rate": 3.6743809983566324e-06, "loss": 0.6155, "step": 7361 }, { "epoch": 0.6, "grad_norm": 1.7280209460157567, "learning_rate": 3.6731127605320326e-06, "loss": 0.5422, "step": 7362 }, { "epoch": 0.6, "grad_norm": 2.2483955327525234, "learning_rate": 3.6718446145363857e-06, "loss": 0.7942, "step": 7363 }, { "epoch": 0.6, "grad_norm": 4.87319719793674, "learning_rate": 3.6705765604574534e-06, "loss": 0.698, "step": 7364 }, { "epoch": 0.6, "grad_norm": 2.6660059292934863, "learning_rate": 3.6693085983829955e-06, "loss": 0.5085, "step": 7365 }, { "epoch": 0.6, "grad_norm": 5.5764041209249555, "learning_rate": 3.6680407284007595e-06, "loss": 0.6, "step": 7366 }, { "epoch": 0.6, "grad_norm": 2.819184106941003, "learning_rate": 3.6667729505984916e-06, "loss": 0.6404, "step": 7367 }, { "epoch": 0.6, "grad_norm": 2.7016607748541444, "learning_rate": 3.6655052650639313e-06, "loss": 0.6461, "step": 7368 }, { "epoch": 0.6, "grad_norm": 4.7830564835462495, "learning_rate": 3.6642376718848076e-06, "loss": 0.6721, "step": 7369 }, { "epoch": 0.6, "grad_norm": 3.4867206424649875, "learning_rate": 3.6629701711488485e-06, "loss": 0.6357, "step": 7370 }, { "epoch": 0.6, "grad_norm": 3.853191352058882, "learning_rate": 3.6617027629437735e-06, "loss": 0.7528, "step": 7371 }, { "epoch": 0.6, "grad_norm": 2.624544850960897, "learning_rate": 3.6604354473572934e-06, "loss": 0.6265, "step": 7372 }, { "epoch": 0.6, "grad_norm": 3.707890966403876, "learning_rate": 3.6591682244771154e-06, "loss": 0.6072, "step": 7373 }, { "epoch": 0.6, "grad_norm": 2.316784138481489, "learning_rate": 3.6579010943909376e-06, "loss": 0.5535, "step": 7374 }, { "epoch": 0.6, "grad_norm": 5.2933425411726684, "learning_rate": 3.6566340571864544e-06, "loss": 0.711, "step": 7375 }, { "epoch": 0.6, "grad_norm": 12.070246655012982, "learning_rate": 3.6553671129513534e-06, "loss": 0.6061, "step": 7376 }, { "epoch": 0.6, "grad_norm": 5.951990262528006, "learning_rate": 3.6541002617733147e-06, "loss": 0.5611, "step": 7377 }, { "epoch": 0.6, "grad_norm": 4.698397698313504, "learning_rate": 3.652833503740013e-06, "loss": 0.5494, "step": 7378 }, { "epoch": 0.6, "grad_norm": 3.036075843767054, "learning_rate": 3.6515668389391157e-06, "loss": 0.5877, "step": 7379 }, { "epoch": 0.6, "grad_norm": 2.922370702053709, "learning_rate": 3.6503002674582823e-06, "loss": 0.587, "step": 7380 }, { "epoch": 0.6, "grad_norm": 2.3480338085188914, "learning_rate": 3.64903378938517e-06, "loss": 0.6602, "step": 7381 }, { "epoch": 0.6, "grad_norm": 5.253862737099655, "learning_rate": 3.647767404807424e-06, "loss": 0.627, "step": 7382 }, { "epoch": 0.6, "grad_norm": 2.8909625112504473, "learning_rate": 3.6465011138126894e-06, "loss": 0.7586, "step": 7383 }, { "epoch": 0.6, "grad_norm": 5.367757252075153, "learning_rate": 3.645234916488599e-06, "loss": 0.7268, "step": 7384 }, { "epoch": 0.6, "grad_norm": 115.07498013614116, "learning_rate": 3.6439688129227853e-06, "loss": 0.7857, "step": 7385 }, { "epoch": 0.6, "grad_norm": 3.0428660295695837, "learning_rate": 3.6427028032028656e-06, "loss": 0.6272, "step": 7386 }, { "epoch": 0.6, "grad_norm": 2.891589588081227, "learning_rate": 3.6414368874164586e-06, "loss": 0.6837, "step": 7387 }, { "epoch": 0.6, "grad_norm": 2.979096354638379, "learning_rate": 3.6401710656511734e-06, "loss": 0.5668, "step": 7388 }, { "epoch": 0.6, "grad_norm": 3.71001924930594, "learning_rate": 3.638905337994612e-06, "loss": 0.6534, "step": 7389 }, { "epoch": 0.6, "grad_norm": 3.7682537974195434, "learning_rate": 3.6376397045343716e-06, "loss": 0.6284, "step": 7390 }, { "epoch": 0.6, "grad_norm": 3.923526506784474, "learning_rate": 3.636374165358042e-06, "loss": 0.7152, "step": 7391 }, { "epoch": 0.6, "grad_norm": 4.332754426108979, "learning_rate": 3.635108720553208e-06, "loss": 0.6212, "step": 7392 }, { "epoch": 0.6, "grad_norm": 2.9713923597123992, "learning_rate": 3.633843370207443e-06, "loss": 0.8295, "step": 7393 }, { "epoch": 0.6, "grad_norm": 17.769906774633828, "learning_rate": 3.632578114408318e-06, "loss": 0.6423, "step": 7394 }, { "epoch": 0.6, "grad_norm": 5.563480011177433, "learning_rate": 3.6313129532433976e-06, "loss": 0.6142, "step": 7395 }, { "epoch": 0.6, "grad_norm": 2.7471018418412414, "learning_rate": 3.6300478868002397e-06, "loss": 0.7332, "step": 7396 }, { "epoch": 0.6, "grad_norm": 2.960938746782647, "learning_rate": 3.6287829151663935e-06, "loss": 0.6241, "step": 7397 }, { "epoch": 0.6, "grad_norm": 3.3244601287712663, "learning_rate": 3.6275180384294033e-06, "loss": 0.6261, "step": 7398 }, { "epoch": 0.6, "grad_norm": 3.04758375350109, "learning_rate": 3.6262532566768087e-06, "loss": 0.6003, "step": 7399 }, { "epoch": 0.6, "grad_norm": 4.148754541828166, "learning_rate": 3.624988569996137e-06, "loss": 0.6409, "step": 7400 }, { "epoch": 0.6, "grad_norm": 5.220521118319865, "learning_rate": 3.6237239784749132e-06, "loss": 0.6581, "step": 7401 }, { "epoch": 0.6, "grad_norm": 5.161334001062796, "learning_rate": 3.6224594822006564e-06, "loss": 0.7151, "step": 7402 }, { "epoch": 0.6, "grad_norm": 14.677947169225197, "learning_rate": 3.6211950812608777e-06, "loss": 0.8255, "step": 7403 }, { "epoch": 0.6, "grad_norm": 3.1644686056753066, "learning_rate": 3.6199307757430806e-06, "loss": 0.4962, "step": 7404 }, { "epoch": 0.6, "grad_norm": 3.5720963507070422, "learning_rate": 3.618666565734764e-06, "loss": 0.7252, "step": 7405 }, { "epoch": 0.6, "grad_norm": 12.785658881099442, "learning_rate": 3.617402451323419e-06, "loss": 0.6313, "step": 7406 }, { "epoch": 0.6, "grad_norm": 3.636281569236962, "learning_rate": 3.616138432596529e-06, "loss": 0.7256, "step": 7407 }, { "epoch": 0.6, "grad_norm": 3.482448966358987, "learning_rate": 3.614874509641573e-06, "loss": 0.7139, "step": 7408 }, { "epoch": 0.6, "grad_norm": 2.913919190770498, "learning_rate": 3.6136106825460216e-06, "loss": 0.6947, "step": 7409 }, { "epoch": 0.6, "grad_norm": 3.9653355441426075, "learning_rate": 3.612346951397341e-06, "loss": 0.6649, "step": 7410 }, { "epoch": 0.6, "grad_norm": 3.5684972375213584, "learning_rate": 3.6110833162829896e-06, "loss": 0.8236, "step": 7411 }, { "epoch": 0.6, "grad_norm": 13.866876925362012, "learning_rate": 3.609819777290418e-06, "loss": 0.6926, "step": 7412 }, { "epoch": 0.6, "grad_norm": 2.9908740190233245, "learning_rate": 3.608556334507072e-06, "loss": 0.7981, "step": 7413 }, { "epoch": 0.6, "grad_norm": 3.4929840280381725, "learning_rate": 3.6072929880203865e-06, "loss": 0.7569, "step": 7414 }, { "epoch": 0.6, "grad_norm": 3.7318662072481046, "learning_rate": 3.6060297379177963e-06, "loss": 0.7681, "step": 7415 }, { "epoch": 0.6, "grad_norm": 3.663281620344524, "learning_rate": 3.6047665842867254e-06, "loss": 0.7876, "step": 7416 }, { "epoch": 0.6, "grad_norm": 2.844513174451792, "learning_rate": 3.6035035272145912e-06, "loss": 0.5963, "step": 7417 }, { "epoch": 0.6, "grad_norm": 3.099551154949786, "learning_rate": 3.6022405667888087e-06, "loss": 0.6513, "step": 7418 }, { "epoch": 0.6, "grad_norm": 3.5039842140014597, "learning_rate": 3.6009777030967778e-06, "loss": 0.7656, "step": 7419 }, { "epoch": 0.6, "grad_norm": 2.796447368831364, "learning_rate": 3.5997149362258986e-06, "loss": 0.7451, "step": 7420 }, { "epoch": 0.6, "grad_norm": 2.9008332895667834, "learning_rate": 3.5984522662635647e-06, "loss": 0.6005, "step": 7421 }, { "epoch": 0.6, "grad_norm": 4.292150120438743, "learning_rate": 3.597189693297157e-06, "loss": 0.8122, "step": 7422 }, { "epoch": 0.6, "grad_norm": 5.4528149633907175, "learning_rate": 3.5959272174140556e-06, "loss": 0.7191, "step": 7423 }, { "epoch": 0.6, "grad_norm": 3.507289227802753, "learning_rate": 3.5946648387016315e-06, "loss": 0.7637, "step": 7424 }, { "epoch": 0.6, "grad_norm": 9.9852168813264, "learning_rate": 3.5934025572472507e-06, "loss": 0.6046, "step": 7425 }, { "epoch": 0.6, "grad_norm": 3.4736471397379516, "learning_rate": 3.5921403731382685e-06, "loss": 0.5477, "step": 7426 }, { "epoch": 0.6, "grad_norm": 3.2894486857299805, "learning_rate": 3.5908782864620366e-06, "loss": 0.5862, "step": 7427 }, { "epoch": 0.6, "grad_norm": 8.339181316810304, "learning_rate": 3.5896162973059013e-06, "loss": 0.6038, "step": 7428 }, { "epoch": 0.6, "grad_norm": 3.4399392977424483, "learning_rate": 3.5883544057571974e-06, "loss": 0.7104, "step": 7429 }, { "epoch": 0.6, "grad_norm": 2.799165767067909, "learning_rate": 3.5870926119032568e-06, "loss": 0.6365, "step": 7430 }, { "epoch": 0.6, "grad_norm": 4.617079430169025, "learning_rate": 3.5858309158314044e-06, "loss": 0.7092, "step": 7431 }, { "epoch": 0.6, "grad_norm": 4.5219995765391365, "learning_rate": 3.5845693176289587e-06, "loss": 0.6852, "step": 7432 }, { "epoch": 0.6, "grad_norm": 4.817399633909096, "learning_rate": 3.583307817383226e-06, "loss": 0.7077, "step": 7433 }, { "epoch": 0.6, "grad_norm": 4.76126893841402, "learning_rate": 3.5820464151815133e-06, "loss": 0.6744, "step": 7434 }, { "epoch": 0.6, "grad_norm": 8.940478020531653, "learning_rate": 3.5807851111111167e-06, "loss": 0.6672, "step": 7435 }, { "epoch": 0.6, "grad_norm": 3.1123666422739733, "learning_rate": 3.579523905259327e-06, "loss": 0.5721, "step": 7436 }, { "epoch": 0.6, "grad_norm": 9.242911222196803, "learning_rate": 3.5782627977134264e-06, "loss": 0.5619, "step": 7437 }, { "epoch": 0.6, "grad_norm": 2.633197309355743, "learning_rate": 3.577001788560695e-06, "loss": 0.6304, "step": 7438 }, { "epoch": 0.6, "grad_norm": 7.032586923230544, "learning_rate": 3.5757408778883972e-06, "loss": 0.7834, "step": 7439 }, { "epoch": 0.6, "grad_norm": 2.6920840802343577, "learning_rate": 3.5744800657837984e-06, "loss": 0.6377, "step": 7440 }, { "epoch": 0.6, "grad_norm": 3.2239254934461985, "learning_rate": 3.573219352334155e-06, "loss": 0.6446, "step": 7441 }, { "epoch": 0.6, "grad_norm": 2.4639003976234797, "learning_rate": 3.5719587376267163e-06, "loss": 0.7605, "step": 7442 }, { "epoch": 0.6, "grad_norm": 3.125943728789647, "learning_rate": 3.5706982217487252e-06, "loss": 0.5768, "step": 7443 }, { "epoch": 0.6, "grad_norm": 5.6578371646774634, "learning_rate": 3.569437804787416e-06, "loss": 0.5837, "step": 7444 }, { "epoch": 0.6, "grad_norm": 4.491356875708425, "learning_rate": 3.568177486830019e-06, "loss": 0.7558, "step": 7445 }, { "epoch": 0.6, "grad_norm": 3.9196997398654516, "learning_rate": 3.566917267963756e-06, "loss": 0.5761, "step": 7446 }, { "epoch": 0.6, "grad_norm": 9.848948453498652, "learning_rate": 3.56565714827584e-06, "loss": 0.8136, "step": 7447 }, { "epoch": 0.6, "grad_norm": 7.730786089156826, "learning_rate": 3.5643971278534805e-06, "loss": 0.8032, "step": 7448 }, { "epoch": 0.61, "grad_norm": 6.674699654272515, "learning_rate": 3.5631372067838798e-06, "loss": 0.5267, "step": 7449 }, { "epoch": 0.61, "grad_norm": 2.603941762933034, "learning_rate": 3.561877385154231e-06, "loss": 0.5457, "step": 7450 }, { "epoch": 0.61, "grad_norm": 4.946074547451311, "learning_rate": 3.560617663051724e-06, "loss": 0.7727, "step": 7451 }, { "epoch": 0.61, "grad_norm": 8.14094111561901, "learning_rate": 3.5593580405635374e-06, "loss": 0.5433, "step": 7452 }, { "epoch": 0.61, "grad_norm": 3.033982308557601, "learning_rate": 3.5580985177768456e-06, "loss": 0.7063, "step": 7453 }, { "epoch": 0.61, "grad_norm": 3.551145050639668, "learning_rate": 3.556839094778814e-06, "loss": 0.8079, "step": 7454 }, { "epoch": 0.61, "grad_norm": 4.326093621977823, "learning_rate": 3.555579771656604e-06, "loss": 0.7129, "step": 7455 }, { "epoch": 0.61, "grad_norm": 37.418582917242695, "learning_rate": 3.5543205484973684e-06, "loss": 0.6575, "step": 7456 }, { "epoch": 0.61, "grad_norm": 5.316704214860367, "learning_rate": 3.5530614253882546e-06, "loss": 0.7761, "step": 7457 }, { "epoch": 0.61, "grad_norm": 5.492745698722403, "learning_rate": 3.5518024024164023e-06, "loss": 0.7963, "step": 7458 }, { "epoch": 0.61, "grad_norm": 3.4366181366781383, "learning_rate": 3.5505434796689396e-06, "loss": 0.7946, "step": 7459 }, { "epoch": 0.61, "grad_norm": 5.025074032335014, "learning_rate": 3.5492846572329952e-06, "loss": 0.8172, "step": 7460 }, { "epoch": 0.61, "grad_norm": 3.399991602686766, "learning_rate": 3.5480259351956882e-06, "loss": 0.6188, "step": 7461 }, { "epoch": 0.61, "grad_norm": 2.556868153626207, "learning_rate": 3.546767313644128e-06, "loss": 0.6612, "step": 7462 }, { "epoch": 0.61, "grad_norm": 4.451405150011575, "learning_rate": 3.5455087926654197e-06, "loss": 0.6265, "step": 7463 }, { "epoch": 0.61, "grad_norm": 2.796846650522022, "learning_rate": 3.544250372346661e-06, "loss": 0.5943, "step": 7464 }, { "epoch": 0.61, "grad_norm": 2.146322225123683, "learning_rate": 3.542992052774945e-06, "loss": 0.5916, "step": 7465 }, { "epoch": 0.61, "grad_norm": 3.158542674518788, "learning_rate": 3.541733834037351e-06, "loss": 0.6333, "step": 7466 }, { "epoch": 0.61, "grad_norm": 8.634972935042876, "learning_rate": 3.5404757162209573e-06, "loss": 0.7511, "step": 7467 }, { "epoch": 0.61, "grad_norm": 3.095751474180428, "learning_rate": 3.5392176994128357e-06, "loss": 0.5816, "step": 7468 }, { "epoch": 0.61, "grad_norm": 3.979623297655365, "learning_rate": 3.537959783700046e-06, "loss": 0.7147, "step": 7469 }, { "epoch": 0.61, "grad_norm": 39.91399625056651, "learning_rate": 3.536701969169644e-06, "loss": 0.6689, "step": 7470 }, { "epoch": 0.61, "grad_norm": 4.982232803984003, "learning_rate": 3.5354442559086823e-06, "loss": 0.6588, "step": 7471 }, { "epoch": 0.61, "grad_norm": 10.61320063679203, "learning_rate": 3.5341866440041977e-06, "loss": 0.6637, "step": 7472 }, { "epoch": 0.61, "grad_norm": 3.574015307026985, "learning_rate": 3.532929133543227e-06, "loss": 0.5225, "step": 7473 }, { "epoch": 0.61, "grad_norm": 3.4782369508446074, "learning_rate": 3.5316717246127973e-06, "loss": 0.7809, "step": 7474 }, { "epoch": 0.61, "grad_norm": 4.081763733078685, "learning_rate": 3.5304144172999295e-06, "loss": 0.7006, "step": 7475 }, { "epoch": 0.61, "grad_norm": 4.707854843255855, "learning_rate": 3.5291572116916383e-06, "loss": 0.6645, "step": 7476 }, { "epoch": 0.61, "grad_norm": 3.1294700332511667, "learning_rate": 3.5279001078749285e-06, "loss": 0.576, "step": 7477 }, { "epoch": 0.61, "grad_norm": 10.574617868204239, "learning_rate": 3.526643105936802e-06, "loss": 0.6541, "step": 7478 }, { "epoch": 0.61, "grad_norm": 3.8608913724545166, "learning_rate": 3.5253862059642483e-06, "loss": 0.6665, "step": 7479 }, { "epoch": 0.61, "grad_norm": 3.104653721147114, "learning_rate": 3.524129408044254e-06, "loss": 0.7161, "step": 7480 }, { "epoch": 0.61, "grad_norm": 3.649684018076883, "learning_rate": 3.5228727122637973e-06, "loss": 0.673, "step": 7481 }, { "epoch": 0.61, "grad_norm": 3.247443326591562, "learning_rate": 3.5216161187098497e-06, "loss": 0.552, "step": 7482 }, { "epoch": 0.61, "grad_norm": 2.8906115930719953, "learning_rate": 3.5203596274693752e-06, "loss": 0.5292, "step": 7483 }, { "epoch": 0.61, "grad_norm": 4.250808900652668, "learning_rate": 3.5191032386293315e-06, "loss": 0.6648, "step": 7484 }, { "epoch": 0.61, "grad_norm": 4.037753999067969, "learning_rate": 3.517846952276669e-06, "loss": 0.7541, "step": 7485 }, { "epoch": 0.61, "grad_norm": 4.747744603187847, "learning_rate": 3.5165907684983297e-06, "loss": 0.6382, "step": 7486 }, { "epoch": 0.61, "grad_norm": 5.429821659735972, "learning_rate": 3.5153346873812484e-06, "loss": 0.6565, "step": 7487 }, { "epoch": 0.61, "grad_norm": 4.098600727721019, "learning_rate": 3.5140787090123554e-06, "loss": 0.6331, "step": 7488 }, { "epoch": 0.61, "grad_norm": 3.2772646206463643, "learning_rate": 3.512822833478571e-06, "loss": 0.786, "step": 7489 }, { "epoch": 0.61, "grad_norm": 3.1690652723192576, "learning_rate": 3.5115670608668107e-06, "loss": 0.6329, "step": 7490 }, { "epoch": 0.61, "grad_norm": 4.901228272483044, "learning_rate": 3.510311391263984e-06, "loss": 0.8053, "step": 7491 }, { "epoch": 0.61, "grad_norm": 4.521589422396018, "learning_rate": 3.5090558247569873e-06, "loss": 0.733, "step": 7492 }, { "epoch": 0.61, "grad_norm": 5.383405775573046, "learning_rate": 3.507800361432716e-06, "loss": 0.7596, "step": 7493 }, { "epoch": 0.61, "grad_norm": 3.510076665071607, "learning_rate": 3.5065450013780544e-06, "loss": 0.6236, "step": 7494 }, { "epoch": 0.61, "grad_norm": 207.6708967768317, "learning_rate": 3.5052897446798818e-06, "loss": 0.5501, "step": 7495 }, { "epoch": 0.61, "grad_norm": 5.411337803501745, "learning_rate": 3.504034591425071e-06, "loss": 0.6683, "step": 7496 }, { "epoch": 0.61, "grad_norm": 2.6205665342670184, "learning_rate": 3.502779541700485e-06, "loss": 0.6318, "step": 7497 }, { "epoch": 0.61, "grad_norm": 13.063560236619885, "learning_rate": 3.501524595592985e-06, "loss": 0.7156, "step": 7498 }, { "epoch": 0.61, "grad_norm": 3.662077405662867, "learning_rate": 3.5002697531894157e-06, "loss": 0.7592, "step": 7499 }, { "epoch": 0.61, "grad_norm": 6.1797708523514645, "learning_rate": 3.4990150145766227e-06, "loss": 0.6042, "step": 7500 }, { "epoch": 0.61, "grad_norm": 5.094967996698112, "learning_rate": 3.4977603798414427e-06, "loss": 0.5516, "step": 7501 }, { "epoch": 0.61, "grad_norm": 2.696605898506245, "learning_rate": 3.4965058490707017e-06, "loss": 0.601, "step": 7502 }, { "epoch": 0.61, "grad_norm": 5.60726691514901, "learning_rate": 3.4952514223512235e-06, "loss": 0.6846, "step": 7503 }, { "epoch": 0.61, "grad_norm": 3.582569383750347, "learning_rate": 3.4939970997698213e-06, "loss": 0.831, "step": 7504 }, { "epoch": 0.61, "grad_norm": 8.124051279721181, "learning_rate": 3.4927428814133043e-06, "loss": 0.5354, "step": 7505 }, { "epoch": 0.61, "grad_norm": 4.04058518958966, "learning_rate": 3.491488767368468e-06, "loss": 0.6631, "step": 7506 }, { "epoch": 0.61, "grad_norm": 3.2407010948334074, "learning_rate": 3.490234757722108e-06, "loss": 0.8165, "step": 7507 }, { "epoch": 0.61, "grad_norm": 4.299299475979328, "learning_rate": 3.4889808525610085e-06, "loss": 0.5579, "step": 7508 }, { "epoch": 0.61, "grad_norm": 4.152458281296697, "learning_rate": 3.4877270519719496e-06, "loss": 0.6582, "step": 7509 }, { "epoch": 0.61, "grad_norm": 3.2017835268883856, "learning_rate": 3.4864733560416998e-06, "loss": 0.7237, "step": 7510 }, { "epoch": 0.61, "grad_norm": 2.7718636953479243, "learning_rate": 3.485219764857025e-06, "loss": 0.7915, "step": 7511 }, { "epoch": 0.61, "grad_norm": 4.16678902980323, "learning_rate": 3.483966278504679e-06, "loss": 0.7403, "step": 7512 }, { "epoch": 0.61, "grad_norm": 2.6574637399353565, "learning_rate": 3.4827128970714123e-06, "loss": 0.6491, "step": 7513 }, { "epoch": 0.61, "grad_norm": 5.765782272221393, "learning_rate": 3.4814596206439666e-06, "loss": 0.6318, "step": 7514 }, { "epoch": 0.61, "grad_norm": 3.618754662511141, "learning_rate": 3.4802064493090765e-06, "loss": 0.5972, "step": 7515 }, { "epoch": 0.61, "grad_norm": 3.1053712083909537, "learning_rate": 3.4789533831534706e-06, "loss": 0.687, "step": 7516 }, { "epoch": 0.61, "grad_norm": 3.593768579302839, "learning_rate": 3.477700422263867e-06, "loss": 0.759, "step": 7517 }, { "epoch": 0.61, "grad_norm": 3.5757928555745937, "learning_rate": 3.4764475667269815e-06, "loss": 0.7198, "step": 7518 }, { "epoch": 0.61, "grad_norm": 14.456081601410123, "learning_rate": 3.4751948166295153e-06, "loss": 0.7842, "step": 7519 }, { "epoch": 0.61, "grad_norm": 3.296511440773971, "learning_rate": 3.473942172058169e-06, "loss": 0.671, "step": 7520 }, { "epoch": 0.61, "grad_norm": 2.7681650615448232, "learning_rate": 3.472689633099633e-06, "loss": 0.7124, "step": 7521 }, { "epoch": 0.61, "grad_norm": 3.619507762883987, "learning_rate": 3.4714371998405903e-06, "loss": 0.6797, "step": 7522 }, { "epoch": 0.61, "grad_norm": 4.7647575371595785, "learning_rate": 3.470184872367719e-06, "loss": 0.6645, "step": 7523 }, { "epoch": 0.61, "grad_norm": 3.465184326297442, "learning_rate": 3.468932650767689e-06, "loss": 0.5799, "step": 7524 }, { "epoch": 0.61, "grad_norm": 4.5381760373826, "learning_rate": 3.467680535127158e-06, "loss": 0.7122, "step": 7525 }, { "epoch": 0.61, "grad_norm": 10.240758344469585, "learning_rate": 3.466428525532783e-06, "loss": 0.673, "step": 7526 }, { "epoch": 0.61, "grad_norm": 2.915393453011111, "learning_rate": 3.465176622071209e-06, "loss": 0.6828, "step": 7527 }, { "epoch": 0.61, "grad_norm": 5.745266727619725, "learning_rate": 3.463924824829077e-06, "loss": 0.8178, "step": 7528 }, { "epoch": 0.61, "grad_norm": 6.8201581530158775, "learning_rate": 3.4626731338930194e-06, "loss": 0.611, "step": 7529 }, { "epoch": 0.61, "grad_norm": 3.457473827712386, "learning_rate": 3.4614215493496604e-06, "loss": 0.7095, "step": 7530 }, { "epoch": 0.61, "grad_norm": 2.9497103153545785, "learning_rate": 3.4601700712856202e-06, "loss": 0.8942, "step": 7531 }, { "epoch": 0.61, "grad_norm": 2.925772376733001, "learning_rate": 3.458918699787504e-06, "loss": 0.688, "step": 7532 }, { "epoch": 0.61, "grad_norm": 5.941175101434325, "learning_rate": 3.4576674349419178e-06, "loss": 0.6646, "step": 7533 }, { "epoch": 0.61, "grad_norm": 2.2634904600427452, "learning_rate": 3.456416276835457e-06, "loss": 0.6057, "step": 7534 }, { "epoch": 0.61, "grad_norm": 3.2104076943050965, "learning_rate": 3.4551652255547087e-06, "loss": 0.4869, "step": 7535 }, { "epoch": 0.61, "grad_norm": 3.965658145215351, "learning_rate": 3.453914281186253e-06, "loss": 0.7993, "step": 7536 }, { "epoch": 0.61, "grad_norm": 3.063300630377541, "learning_rate": 3.4526634438166643e-06, "loss": 0.6329, "step": 7537 }, { "epoch": 0.61, "grad_norm": 4.113619594841262, "learning_rate": 3.4514127135325105e-06, "loss": 0.7657, "step": 7538 }, { "epoch": 0.61, "grad_norm": 3.0728880864938515, "learning_rate": 3.4501620904203455e-06, "loss": 0.6631, "step": 7539 }, { "epoch": 0.61, "grad_norm": 3.6177796145425902, "learning_rate": 3.448911574566722e-06, "loss": 0.7692, "step": 7540 }, { "epoch": 0.61, "grad_norm": 4.3696261315460045, "learning_rate": 3.4476611660581856e-06, "loss": 0.601, "step": 7541 }, { "epoch": 0.61, "grad_norm": 3.469751703993662, "learning_rate": 3.4464108649812692e-06, "loss": 0.7128, "step": 7542 }, { "epoch": 0.61, "grad_norm": 2.7064940915106606, "learning_rate": 3.445160671422504e-06, "loss": 0.8469, "step": 7543 }, { "epoch": 0.61, "grad_norm": 3.3719725341938265, "learning_rate": 3.4439105854684117e-06, "loss": 0.7976, "step": 7544 }, { "epoch": 0.61, "grad_norm": 3.050294741985281, "learning_rate": 3.4426606072055033e-06, "loss": 0.7369, "step": 7545 }, { "epoch": 0.61, "grad_norm": 4.359685554400141, "learning_rate": 3.4414107367202865e-06, "loss": 0.5492, "step": 7546 }, { "epoch": 0.61, "grad_norm": 2.518188602099489, "learning_rate": 3.44016097409926e-06, "loss": 0.657, "step": 7547 }, { "epoch": 0.61, "grad_norm": 3.7170306957583934, "learning_rate": 3.4389113194289158e-06, "loss": 0.77, "step": 7548 }, { "epoch": 0.61, "grad_norm": 3.875129275150784, "learning_rate": 3.4376617727957396e-06, "loss": 0.6587, "step": 7549 }, { "epoch": 0.61, "grad_norm": 2.8938395998889006, "learning_rate": 3.4364123342862043e-06, "loss": 0.7543, "step": 7550 }, { "epoch": 0.61, "grad_norm": 7.438521489977645, "learning_rate": 3.4351630039867823e-06, "loss": 0.7345, "step": 7551 }, { "epoch": 0.61, "grad_norm": 4.142714207615951, "learning_rate": 3.433913781983932e-06, "loss": 0.6809, "step": 7552 }, { "epoch": 0.61, "grad_norm": 4.399413891029143, "learning_rate": 3.4326646683641085e-06, "loss": 0.6667, "step": 7553 }, { "epoch": 0.61, "grad_norm": 3.718180137185137, "learning_rate": 3.43141566321376e-06, "loss": 0.6406, "step": 7554 }, { "epoch": 0.61, "grad_norm": 3.865513430650758, "learning_rate": 3.4301667666193227e-06, "loss": 0.7799, "step": 7555 }, { "epoch": 0.61, "grad_norm": 155.36602555041543, "learning_rate": 3.4289179786672313e-06, "loss": 0.5487, "step": 7556 }, { "epoch": 0.61, "grad_norm": 2.583920680705088, "learning_rate": 3.4276692994439066e-06, "loss": 0.6163, "step": 7557 }, { "epoch": 0.61, "grad_norm": 2.8379677151565246, "learning_rate": 3.4264207290357677e-06, "loss": 0.6186, "step": 7558 }, { "epoch": 0.61, "grad_norm": 2.8614862845848745, "learning_rate": 3.4251722675292234e-06, "loss": 0.645, "step": 7559 }, { "epoch": 0.61, "grad_norm": 3.816174879108252, "learning_rate": 3.4239239150106718e-06, "loss": 0.6699, "step": 7560 }, { "epoch": 0.61, "grad_norm": 5.100903637001031, "learning_rate": 3.42267567156651e-06, "loss": 0.7712, "step": 7561 }, { "epoch": 0.61, "grad_norm": 5.698770048507221, "learning_rate": 3.421427537283123e-06, "loss": 0.6137, "step": 7562 }, { "epoch": 0.61, "grad_norm": 3.784394996793609, "learning_rate": 3.4201795122468895e-06, "loss": 0.6888, "step": 7563 }, { "epoch": 0.61, "grad_norm": 6.378711673819811, "learning_rate": 3.4189315965441838e-06, "loss": 0.6655, "step": 7564 }, { "epoch": 0.61, "grad_norm": 4.907416852753616, "learning_rate": 3.4176837902613645e-06, "loss": 0.7325, "step": 7565 }, { "epoch": 0.61, "grad_norm": 8.755515461970084, "learning_rate": 3.4164360934847912e-06, "loss": 0.6645, "step": 7566 }, { "epoch": 0.61, "grad_norm": 5.059930427335747, "learning_rate": 3.41518850630081e-06, "loss": 0.6388, "step": 7567 }, { "epoch": 0.61, "grad_norm": 4.848736507482465, "learning_rate": 3.413941028795763e-06, "loss": 0.6486, "step": 7568 }, { "epoch": 0.61, "grad_norm": 5.602527706998475, "learning_rate": 3.4126936610559835e-06, "loss": 0.5881, "step": 7569 }, { "epoch": 0.61, "grad_norm": 2.9329454364209995, "learning_rate": 3.4114464031677976e-06, "loss": 0.6854, "step": 7570 }, { "epoch": 0.61, "grad_norm": 3.2587049665308667, "learning_rate": 3.4101992552175243e-06, "loss": 0.7212, "step": 7571 }, { "epoch": 0.61, "grad_norm": 3.7887060670001023, "learning_rate": 3.4089522172914713e-06, "loss": 0.7532, "step": 7572 }, { "epoch": 0.62, "grad_norm": 7.1201085347086535, "learning_rate": 3.4077052894759423e-06, "loss": 0.7754, "step": 7573 }, { "epoch": 0.62, "grad_norm": 5.29265399362968, "learning_rate": 3.4064584718572348e-06, "loss": 0.7639, "step": 7574 }, { "epoch": 0.62, "grad_norm": 2.7258394766481953, "learning_rate": 3.4052117645216333e-06, "loss": 0.7618, "step": 7575 }, { "epoch": 0.62, "grad_norm": 6.378065528881045, "learning_rate": 3.4039651675554197e-06, "loss": 0.7478, "step": 7576 }, { "epoch": 0.62, "grad_norm": 3.6832148871869688, "learning_rate": 3.4027186810448677e-06, "loss": 0.7685, "step": 7577 }, { "epoch": 0.62, "grad_norm": 8.764362612449606, "learning_rate": 3.4014723050762382e-06, "loss": 0.6476, "step": 7578 }, { "epoch": 0.62, "grad_norm": 2.6143854157830595, "learning_rate": 3.4002260397357906e-06, "loss": 0.7827, "step": 7579 }, { "epoch": 0.62, "grad_norm": 4.134247018665249, "learning_rate": 3.3989798851097744e-06, "loss": 0.7283, "step": 7580 }, { "epoch": 0.62, "grad_norm": 3.6227313138825625, "learning_rate": 3.3977338412844315e-06, "loss": 0.7077, "step": 7581 }, { "epoch": 0.62, "grad_norm": 4.554280826344177, "learning_rate": 3.3964879083459945e-06, "loss": 0.6923, "step": 7582 }, { "epoch": 0.62, "grad_norm": 4.295105284035616, "learning_rate": 3.395242086380691e-06, "loss": 0.6117, "step": 7583 }, { "epoch": 0.62, "grad_norm": 4.647545962328437, "learning_rate": 3.3939963754747413e-06, "loss": 0.7276, "step": 7584 }, { "epoch": 0.62, "grad_norm": 3.4353216906245416, "learning_rate": 3.392750775714353e-06, "loss": 0.772, "step": 7585 }, { "epoch": 0.62, "grad_norm": 7.972910457391782, "learning_rate": 3.391505287185731e-06, "loss": 0.8188, "step": 7586 }, { "epoch": 0.62, "grad_norm": 3.741876121869406, "learning_rate": 3.3902599099750706e-06, "loss": 0.5565, "step": 7587 }, { "epoch": 0.62, "grad_norm": 3.4172216638434523, "learning_rate": 3.3890146441685602e-06, "loss": 0.6391, "step": 7588 }, { "epoch": 0.62, "grad_norm": 2.661050743556769, "learning_rate": 3.3877694898523817e-06, "loss": 0.5918, "step": 7589 }, { "epoch": 0.62, "grad_norm": 3.263090230973825, "learning_rate": 3.3865244471127045e-06, "loss": 0.7147, "step": 7590 }, { "epoch": 0.62, "grad_norm": 8.350130700238457, "learning_rate": 3.3852795160356968e-06, "loss": 0.6576, "step": 7591 }, { "epoch": 0.62, "grad_norm": 5.319606378751055, "learning_rate": 3.384034696707512e-06, "loss": 0.699, "step": 7592 }, { "epoch": 0.62, "grad_norm": 2.6821540304070117, "learning_rate": 3.3827899892143006e-06, "loss": 0.8146, "step": 7593 }, { "epoch": 0.62, "grad_norm": 2.9240450709303722, "learning_rate": 3.381545393642205e-06, "loss": 0.8117, "step": 7594 }, { "epoch": 0.62, "grad_norm": 2.176858948563231, "learning_rate": 3.380300910077359e-06, "loss": 0.6749, "step": 7595 }, { "epoch": 0.62, "grad_norm": 3.0392370141715475, "learning_rate": 3.3790565386058882e-06, "loss": 0.6283, "step": 7596 }, { "epoch": 0.62, "grad_norm": 4.212753144989034, "learning_rate": 3.3778122793139132e-06, "loss": 0.7344, "step": 7597 }, { "epoch": 0.62, "grad_norm": 4.2679195342232354, "learning_rate": 3.376568132287541e-06, "loss": 0.7003, "step": 7598 }, { "epoch": 0.62, "grad_norm": 4.687791044418684, "learning_rate": 3.3753240976128776e-06, "loss": 0.5509, "step": 7599 }, { "epoch": 0.62, "grad_norm": 3.1721425555808103, "learning_rate": 3.3740801753760142e-06, "loss": 0.7878, "step": 7600 }, { "epoch": 0.62, "grad_norm": 4.6877598116318575, "learning_rate": 3.3728363656630407e-06, "loss": 0.7774, "step": 7601 }, { "epoch": 0.62, "grad_norm": 6.146183988433133, "learning_rate": 3.3715926685600363e-06, "loss": 0.5087, "step": 7602 }, { "epoch": 0.62, "grad_norm": 3.642230564569195, "learning_rate": 3.3703490841530727e-06, "loss": 0.6192, "step": 7603 }, { "epoch": 0.62, "grad_norm": 4.47494069608227, "learning_rate": 3.369105612528215e-06, "loss": 0.6403, "step": 7604 }, { "epoch": 0.62, "grad_norm": 20.733977456026008, "learning_rate": 3.3678622537715167e-06, "loss": 0.7043, "step": 7605 }, { "epoch": 0.62, "grad_norm": 4.299512026883813, "learning_rate": 3.3666190079690274e-06, "loss": 0.7505, "step": 7606 }, { "epoch": 0.62, "grad_norm": 3.055570205287564, "learning_rate": 3.3653758752067873e-06, "loss": 0.7283, "step": 7607 }, { "epoch": 0.62, "grad_norm": 3.9612086080329707, "learning_rate": 3.3641328555708286e-06, "loss": 0.5763, "step": 7608 }, { "epoch": 0.62, "grad_norm": 3.670936416006846, "learning_rate": 3.3628899491471765e-06, "loss": 0.6743, "step": 7609 }, { "epoch": 0.62, "grad_norm": 3.8075709020030133, "learning_rate": 3.3616471560218476e-06, "loss": 0.6467, "step": 7610 }, { "epoch": 0.62, "grad_norm": 4.274976174388547, "learning_rate": 3.3604044762808543e-06, "loss": 0.7192, "step": 7611 }, { "epoch": 0.62, "grad_norm": 12.127380549450214, "learning_rate": 3.3591619100101924e-06, "loss": 0.6765, "step": 7612 }, { "epoch": 0.62, "grad_norm": 3.619476348721796, "learning_rate": 3.3579194572958583e-06, "loss": 0.6522, "step": 7613 }, { "epoch": 0.62, "grad_norm": 6.706480111979132, "learning_rate": 3.356677118223838e-06, "loss": 0.6877, "step": 7614 }, { "epoch": 0.62, "grad_norm": 3.971162282550115, "learning_rate": 3.355434892880107e-06, "loss": 0.6166, "step": 7615 }, { "epoch": 0.62, "grad_norm": 30.761581180809856, "learning_rate": 3.354192781350637e-06, "loss": 0.5656, "step": 7616 }, { "epoch": 0.62, "grad_norm": 4.618268303835102, "learning_rate": 3.3529507837213902e-06, "loss": 0.5783, "step": 7617 }, { "epoch": 0.62, "grad_norm": 3.542632453439617, "learning_rate": 3.3517089000783193e-06, "loss": 0.7226, "step": 7618 }, { "epoch": 0.62, "grad_norm": 8.787151627397668, "learning_rate": 3.35046713050737e-06, "loss": 0.6324, "step": 7619 }, { "epoch": 0.62, "grad_norm": 4.502601317352308, "learning_rate": 3.349225475094482e-06, "loss": 0.6605, "step": 7620 }, { "epoch": 0.62, "grad_norm": 9.129781494319017, "learning_rate": 3.347983933925586e-06, "loss": 0.6254, "step": 7621 }, { "epoch": 0.62, "grad_norm": 4.3374159657149445, "learning_rate": 3.3467425070866034e-06, "loss": 0.765, "step": 7622 }, { "epoch": 0.62, "grad_norm": 2.563864121273048, "learning_rate": 3.3455011946634486e-06, "loss": 0.7385, "step": 7623 }, { "epoch": 0.62, "grad_norm": 6.516016162321504, "learning_rate": 3.344259996742031e-06, "loss": 0.8031, "step": 7624 }, { "epoch": 0.62, "grad_norm": 4.513862114676875, "learning_rate": 3.343018913408245e-06, "loss": 0.6626, "step": 7625 }, { "epoch": 0.62, "grad_norm": 3.5021701188097376, "learning_rate": 3.341777944747983e-06, "loss": 0.6789, "step": 7626 }, { "epoch": 0.62, "grad_norm": 3.4814712205112768, "learning_rate": 3.3405370908471284e-06, "loss": 0.6634, "step": 7627 }, { "epoch": 0.62, "grad_norm": 5.173562625914548, "learning_rate": 3.339296351791556e-06, "loss": 0.8589, "step": 7628 }, { "epoch": 0.62, "grad_norm": 4.210867153656778, "learning_rate": 3.3380557276671345e-06, "loss": 0.6119, "step": 7629 }, { "epoch": 0.62, "grad_norm": 3.1572098433620903, "learning_rate": 3.33681521855972e-06, "loss": 0.679, "step": 7630 }, { "epoch": 0.62, "grad_norm": 4.504057341518131, "learning_rate": 3.335574824555165e-06, "loss": 0.7075, "step": 7631 }, { "epoch": 0.62, "grad_norm": 4.214673052098549, "learning_rate": 3.334334545739311e-06, "loss": 0.6395, "step": 7632 }, { "epoch": 0.62, "grad_norm": 2.9608187310383824, "learning_rate": 3.3330943821979944e-06, "loss": 0.8362, "step": 7633 }, { "epoch": 0.62, "grad_norm": 9.761837016919914, "learning_rate": 3.3318543340170427e-06, "loss": 0.8531, "step": 7634 }, { "epoch": 0.62, "grad_norm": 3.1988797996930134, "learning_rate": 3.3306144012822745e-06, "loss": 0.7701, "step": 7635 }, { "epoch": 0.62, "grad_norm": 3.961756173334681, "learning_rate": 3.3293745840795004e-06, "loss": 0.7678, "step": 7636 }, { "epoch": 0.62, "grad_norm": 2.6678785403794336, "learning_rate": 3.328134882494527e-06, "loss": 0.6052, "step": 7637 }, { "epoch": 0.62, "grad_norm": 9.698446651831313, "learning_rate": 3.326895296613144e-06, "loss": 0.5608, "step": 7638 }, { "epoch": 0.62, "grad_norm": 2.7950826898696635, "learning_rate": 3.325655826521143e-06, "loss": 0.6427, "step": 7639 }, { "epoch": 0.62, "grad_norm": 2.4300939959299215, "learning_rate": 3.3244164723043e-06, "loss": 0.6423, "step": 7640 }, { "epoch": 0.62, "grad_norm": 3.174686478526541, "learning_rate": 3.323177234048387e-06, "loss": 0.7855, "step": 7641 }, { "epoch": 0.62, "grad_norm": 4.072481012022744, "learning_rate": 3.321938111839168e-06, "loss": 0.597, "step": 7642 }, { "epoch": 0.62, "grad_norm": 4.2582583679564525, "learning_rate": 3.3206991057623977e-06, "loss": 0.7655, "step": 7643 }, { "epoch": 0.62, "grad_norm": 6.610611876438256, "learning_rate": 3.3194602159038247e-06, "loss": 0.7194, "step": 7644 }, { "epoch": 0.62, "grad_norm": 7.095847825292501, "learning_rate": 3.318221442349184e-06, "loss": 0.6448, "step": 7645 }, { "epoch": 0.62, "grad_norm": 2.939983950333868, "learning_rate": 3.3169827851842096e-06, "loss": 0.6499, "step": 7646 }, { "epoch": 0.62, "grad_norm": 4.900593757030222, "learning_rate": 3.3157442444946247e-06, "loss": 0.6828, "step": 7647 }, { "epoch": 0.62, "grad_norm": 4.473196969809137, "learning_rate": 3.3145058203661416e-06, "loss": 0.7372, "step": 7648 }, { "epoch": 0.62, "grad_norm": 3.340425883696199, "learning_rate": 3.3132675128844684e-06, "loss": 0.7354, "step": 7649 }, { "epoch": 0.62, "grad_norm": 4.797626039720172, "learning_rate": 3.312029322135306e-06, "loss": 0.6353, "step": 7650 }, { "epoch": 0.62, "grad_norm": 5.327347955083741, "learning_rate": 3.3107912482043413e-06, "loss": 0.6843, "step": 7651 }, { "epoch": 0.62, "grad_norm": 10.098680333792164, "learning_rate": 3.309553291177258e-06, "loss": 0.73, "step": 7652 }, { "epoch": 0.62, "grad_norm": 5.232056353290789, "learning_rate": 3.3083154511397308e-06, "loss": 0.7105, "step": 7653 }, { "epoch": 0.62, "grad_norm": 3.3190201068345804, "learning_rate": 3.307077728177427e-06, "loss": 0.739, "step": 7654 }, { "epoch": 0.62, "grad_norm": 3.783498222703066, "learning_rate": 3.305840122376003e-06, "loss": 0.815, "step": 7655 }, { "epoch": 0.62, "grad_norm": 2.583289707211872, "learning_rate": 3.30460263382111e-06, "loss": 0.5703, "step": 7656 }, { "epoch": 0.62, "grad_norm": 4.035889472612898, "learning_rate": 3.3033652625983915e-06, "loss": 0.7322, "step": 7657 }, { "epoch": 0.62, "grad_norm": 6.109958115776633, "learning_rate": 3.302128008793478e-06, "loss": 0.6066, "step": 7658 }, { "epoch": 0.62, "grad_norm": 14.75017196502749, "learning_rate": 3.300890872491997e-06, "loss": 0.6381, "step": 7659 }, { "epoch": 0.62, "grad_norm": 2.735863448747024, "learning_rate": 3.2996538537795656e-06, "loss": 0.6694, "step": 7660 }, { "epoch": 0.62, "grad_norm": 5.437792596895274, "learning_rate": 3.2984169527417943e-06, "loss": 0.7091, "step": 7661 }, { "epoch": 0.62, "grad_norm": 23.877700755266194, "learning_rate": 3.2971801694642845e-06, "loss": 0.5451, "step": 7662 }, { "epoch": 0.62, "grad_norm": 3.8946372978336408, "learning_rate": 3.295943504032629e-06, "loss": 0.7329, "step": 7663 }, { "epoch": 0.62, "grad_norm": 3.181871180584756, "learning_rate": 3.2947069565324134e-06, "loss": 0.5762, "step": 7664 }, { "epoch": 0.62, "grad_norm": 3.8124439541833364, "learning_rate": 3.2934705270492124e-06, "loss": 0.5801, "step": 7665 }, { "epoch": 0.62, "grad_norm": 3.829757867100352, "learning_rate": 3.292234215668596e-06, "loss": 0.5428, "step": 7666 }, { "epoch": 0.62, "grad_norm": 4.637744724675553, "learning_rate": 3.2909980224761246e-06, "loss": 0.7825, "step": 7667 }, { "epoch": 0.62, "grad_norm": 3.013602207219761, "learning_rate": 3.289761947557351e-06, "loss": 0.7835, "step": 7668 }, { "epoch": 0.62, "grad_norm": 3.4680334890081848, "learning_rate": 3.2885259909978205e-06, "loss": 0.7759, "step": 7669 }, { "epoch": 0.62, "grad_norm": 2.439206637462609, "learning_rate": 3.287290152883067e-06, "loss": 0.6723, "step": 7670 }, { "epoch": 0.62, "grad_norm": 4.179106225527792, "learning_rate": 3.286054433298619e-06, "loss": 0.7072, "step": 7671 }, { "epoch": 0.62, "grad_norm": 4.8041732960693775, "learning_rate": 3.2848188323299964e-06, "loss": 0.5256, "step": 7672 }, { "epoch": 0.62, "grad_norm": 2.60557524086, "learning_rate": 3.283583350062709e-06, "loss": 0.5816, "step": 7673 }, { "epoch": 0.62, "grad_norm": 4.300835489776446, "learning_rate": 3.2823479865822616e-06, "loss": 0.7765, "step": 7674 }, { "epoch": 0.62, "grad_norm": 10.040977092105795, "learning_rate": 3.2811127419741495e-06, "loss": 0.688, "step": 7675 }, { "epoch": 0.62, "grad_norm": 7.412265533566398, "learning_rate": 3.279877616323858e-06, "loss": 0.6562, "step": 7676 }, { "epoch": 0.62, "grad_norm": 4.422554359228267, "learning_rate": 3.278642609716868e-06, "loss": 0.5768, "step": 7677 }, { "epoch": 0.62, "grad_norm": 4.690113171316932, "learning_rate": 3.2774077222386465e-06, "loss": 0.6951, "step": 7678 }, { "epoch": 0.62, "grad_norm": 3.7419522628757904, "learning_rate": 3.276172953974658e-06, "loss": 0.7497, "step": 7679 }, { "epoch": 0.62, "grad_norm": 4.393845110521941, "learning_rate": 3.2749383050103534e-06, "loss": 0.7425, "step": 7680 }, { "epoch": 0.62, "grad_norm": 3.6937774292244923, "learning_rate": 3.2737037754311808e-06, "loss": 0.6713, "step": 7681 }, { "epoch": 0.62, "grad_norm": 4.410884187724273, "learning_rate": 3.2724693653225757e-06, "loss": 0.7277, "step": 7682 }, { "epoch": 0.62, "grad_norm": 3.9393292391271264, "learning_rate": 3.2712350747699704e-06, "loss": 0.7861, "step": 7683 }, { "epoch": 0.62, "grad_norm": 5.231265555332869, "learning_rate": 3.2700009038587817e-06, "loss": 0.5494, "step": 7684 }, { "epoch": 0.62, "grad_norm": 3.348974837981795, "learning_rate": 3.2687668526744224e-06, "loss": 0.7663, "step": 7685 }, { "epoch": 0.62, "grad_norm": 5.369366626832881, "learning_rate": 3.267532921302299e-06, "loss": 0.5978, "step": 7686 }, { "epoch": 0.62, "grad_norm": 7.706487494107511, "learning_rate": 3.2662991098278057e-06, "loss": 0.5342, "step": 7687 }, { "epoch": 0.62, "grad_norm": 3.277494553101847, "learning_rate": 3.2650654183363297e-06, "loss": 0.7206, "step": 7688 }, { "epoch": 0.62, "grad_norm": 2.9952783807855683, "learning_rate": 3.2638318469132507e-06, "loss": 0.6935, "step": 7689 }, { "epoch": 0.62, "grad_norm": 8.033823874131544, "learning_rate": 3.262598395643942e-06, "loss": 0.5771, "step": 7690 }, { "epoch": 0.62, "grad_norm": 11.449662697936539, "learning_rate": 3.261365064613762e-06, "loss": 0.6382, "step": 7691 }, { "epoch": 0.62, "grad_norm": 2.726104823017054, "learning_rate": 3.260131853908066e-06, "loss": 0.6494, "step": 7692 }, { "epoch": 0.62, "grad_norm": 3.1446021922028695, "learning_rate": 3.2588987636122016e-06, "loss": 0.6588, "step": 7693 }, { "epoch": 0.62, "grad_norm": 2.874213712478007, "learning_rate": 3.2576657938115068e-06, "loss": 0.5727, "step": 7694 }, { "epoch": 0.62, "grad_norm": 3.49788809870037, "learning_rate": 3.2564329445913085e-06, "loss": 0.6762, "step": 7695 }, { "epoch": 0.63, "grad_norm": 4.047238046135469, "learning_rate": 3.255200216036929e-06, "loss": 0.7261, "step": 7696 }, { "epoch": 0.63, "grad_norm": 3.226500976776351, "learning_rate": 3.2539676082336823e-06, "loss": 0.7115, "step": 7697 }, { "epoch": 0.63, "grad_norm": 4.613355220739448, "learning_rate": 3.2527351212668688e-06, "loss": 0.6949, "step": 7698 }, { "epoch": 0.63, "grad_norm": 2.868387409718942, "learning_rate": 3.251502755221787e-06, "loss": 0.6188, "step": 7699 }, { "epoch": 0.63, "grad_norm": 5.177311637015253, "learning_rate": 3.250270510183724e-06, "loss": 0.8344, "step": 7700 }, { "epoch": 0.63, "grad_norm": 6.184161304533539, "learning_rate": 3.2490383862379594e-06, "loss": 0.7366, "step": 7701 }, { "epoch": 0.63, "grad_norm": 2.757913672258659, "learning_rate": 3.2478063834697637e-06, "loss": 0.5776, "step": 7702 }, { "epoch": 0.63, "grad_norm": 3.603749390775768, "learning_rate": 3.2465745019643992e-06, "loss": 0.8333, "step": 7703 }, { "epoch": 0.63, "grad_norm": 6.971405006653064, "learning_rate": 3.24534274180712e-06, "loss": 0.7142, "step": 7704 }, { "epoch": 0.63, "grad_norm": 2.5076719510943923, "learning_rate": 3.2441111030831695e-06, "loss": 0.6597, "step": 7705 }, { "epoch": 0.63, "grad_norm": 4.19856775593818, "learning_rate": 3.2428795858777873e-06, "loss": 0.6919, "step": 7706 }, { "epoch": 0.63, "grad_norm": 3.0672345442128863, "learning_rate": 3.2416481902762015e-06, "loss": 0.7056, "step": 7707 }, { "epoch": 0.63, "grad_norm": 6.554291488559537, "learning_rate": 3.2404169163636324e-06, "loss": 0.5825, "step": 7708 }, { "epoch": 0.63, "grad_norm": 4.094106997714975, "learning_rate": 3.239185764225291e-06, "loss": 0.6599, "step": 7709 }, { "epoch": 0.63, "grad_norm": 3.8593722196957, "learning_rate": 3.237954733946385e-06, "loss": 0.6976, "step": 7710 }, { "epoch": 0.63, "grad_norm": 5.0051146894564935, "learning_rate": 3.2367238256121035e-06, "loss": 0.6504, "step": 7711 }, { "epoch": 0.63, "grad_norm": 7.05607896581478, "learning_rate": 3.2354930393076373e-06, "loss": 0.6607, "step": 7712 }, { "epoch": 0.63, "grad_norm": 3.798002792397022, "learning_rate": 3.234262375118161e-06, "loss": 0.5919, "step": 7713 }, { "epoch": 0.63, "grad_norm": 4.028723396190554, "learning_rate": 3.233031833128848e-06, "loss": 0.7423, "step": 7714 }, { "epoch": 0.63, "grad_norm": 5.786384515398065, "learning_rate": 3.2318014134248565e-06, "loss": 0.8331, "step": 7715 }, { "epoch": 0.63, "grad_norm": 2.6654172140159296, "learning_rate": 3.230571116091341e-06, "loss": 0.7741, "step": 7716 }, { "epoch": 0.63, "grad_norm": 3.967654398945249, "learning_rate": 3.229340941213448e-06, "loss": 0.6815, "step": 7717 }, { "epoch": 0.63, "grad_norm": 5.447210932324046, "learning_rate": 3.228110888876308e-06, "loss": 0.6838, "step": 7718 }, { "epoch": 0.63, "grad_norm": 4.349344794487387, "learning_rate": 3.226880959165053e-06, "loss": 0.6382, "step": 7719 }, { "epoch": 0.63, "grad_norm": 3.8974510665640474, "learning_rate": 3.225651152164799e-06, "loss": 0.5971, "step": 7720 }, { "epoch": 0.63, "grad_norm": 18.835150563214153, "learning_rate": 3.2244214679606574e-06, "loss": 0.6938, "step": 7721 }, { "epoch": 0.63, "grad_norm": 7.590028560985334, "learning_rate": 3.22319190663773e-06, "loss": 0.7676, "step": 7722 }, { "epoch": 0.63, "grad_norm": 40.435510990333455, "learning_rate": 3.2219624682811125e-06, "loss": 0.7199, "step": 7723 }, { "epoch": 0.63, "grad_norm": 4.087583585415945, "learning_rate": 3.2207331529758856e-06, "loss": 0.6098, "step": 7724 }, { "epoch": 0.63, "grad_norm": 5.628691103812373, "learning_rate": 3.2195039608071278e-06, "loss": 0.6713, "step": 7725 }, { "epoch": 0.63, "grad_norm": 2.912436390534725, "learning_rate": 3.2182748918599064e-06, "loss": 0.6006, "step": 7726 }, { "epoch": 0.63, "grad_norm": 3.0509772199619407, "learning_rate": 3.2170459462192827e-06, "loss": 0.683, "step": 7727 }, { "epoch": 0.63, "grad_norm": 3.0222839387223805, "learning_rate": 3.215817123970305e-06, "loss": 0.6247, "step": 7728 }, { "epoch": 0.63, "grad_norm": 4.677206881731413, "learning_rate": 3.214588425198016e-06, "loss": 0.6999, "step": 7729 }, { "epoch": 0.63, "grad_norm": 5.514324746090309, "learning_rate": 3.213359849987452e-06, "loss": 0.6766, "step": 7730 }, { "epoch": 0.63, "grad_norm": 3.223193905586136, "learning_rate": 3.212131398423634e-06, "loss": 0.8193, "step": 7731 }, { "epoch": 0.63, "grad_norm": 4.287674588437586, "learning_rate": 3.2109030705915805e-06, "loss": 0.9024, "step": 7732 }, { "epoch": 0.63, "grad_norm": 2.8462741984231394, "learning_rate": 3.2096748665763e-06, "loss": 0.6102, "step": 7733 }, { "epoch": 0.63, "grad_norm": 6.23799182649523, "learning_rate": 3.208446786462791e-06, "loss": 0.6404, "step": 7734 }, { "epoch": 0.63, "grad_norm": 2.5296982293489747, "learning_rate": 3.2072188303360462e-06, "loss": 0.6327, "step": 7735 }, { "epoch": 0.63, "grad_norm": 4.5288065371547255, "learning_rate": 3.2059909982810456e-06, "loss": 0.7014, "step": 7736 }, { "epoch": 0.63, "grad_norm": 5.417449009010182, "learning_rate": 3.2047632903827664e-06, "loss": 0.6281, "step": 7737 }, { "epoch": 0.63, "grad_norm": 4.350418374016844, "learning_rate": 3.2035357067261686e-06, "loss": 0.5926, "step": 7738 }, { "epoch": 0.63, "grad_norm": 3.0377568791259586, "learning_rate": 3.202308247396212e-06, "loss": 0.6447, "step": 7739 }, { "epoch": 0.63, "grad_norm": 4.496318808813016, "learning_rate": 3.201080912477843e-06, "loss": 0.6377, "step": 7740 }, { "epoch": 0.63, "grad_norm": 7.598672818303001, "learning_rate": 3.199853702056003e-06, "loss": 0.863, "step": 7741 }, { "epoch": 0.63, "grad_norm": 13.039716597322176, "learning_rate": 3.198626616215621e-06, "loss": 0.662, "step": 7742 }, { "epoch": 0.63, "grad_norm": 3.408917508565585, "learning_rate": 3.197399655041621e-06, "loss": 0.693, "step": 7743 }, { "epoch": 0.63, "grad_norm": 7.22095126804445, "learning_rate": 3.196172818618914e-06, "loss": 0.6231, "step": 7744 }, { "epoch": 0.63, "grad_norm": 3.656461033961272, "learning_rate": 3.194946107032405e-06, "loss": 0.7085, "step": 7745 }, { "epoch": 0.63, "grad_norm": 4.92919446388533, "learning_rate": 3.1937195203669907e-06, "loss": 0.7219, "step": 7746 }, { "epoch": 0.63, "grad_norm": 8.851805241243225, "learning_rate": 3.192493058707559e-06, "loss": 0.7888, "step": 7747 }, { "epoch": 0.63, "grad_norm": 3.499339926878046, "learning_rate": 3.1912667221389892e-06, "loss": 0.6251, "step": 7748 }, { "epoch": 0.63, "grad_norm": 6.799931601253928, "learning_rate": 3.1900405107461506e-06, "loss": 0.7246, "step": 7749 }, { "epoch": 0.63, "grad_norm": 2.6846434007196076, "learning_rate": 3.1888144246139067e-06, "loss": 0.7842, "step": 7750 }, { "epoch": 0.63, "grad_norm": 4.585604911486977, "learning_rate": 3.187588463827107e-06, "loss": 0.6561, "step": 7751 }, { "epoch": 0.63, "grad_norm": 3.718118408356869, "learning_rate": 3.1863626284705997e-06, "loss": 0.6814, "step": 7752 }, { "epoch": 0.63, "grad_norm": 5.190609558842971, "learning_rate": 3.185136918629216e-06, "loss": 0.6963, "step": 7753 }, { "epoch": 0.63, "grad_norm": 3.239222862174707, "learning_rate": 3.1839113343877848e-06, "loss": 0.5497, "step": 7754 }, { "epoch": 0.63, "grad_norm": 9.88778798528499, "learning_rate": 3.182685875831124e-06, "loss": 0.7924, "step": 7755 }, { "epoch": 0.63, "grad_norm": 3.1218864768920374, "learning_rate": 3.1814605430440458e-06, "loss": 0.5899, "step": 7756 }, { "epoch": 0.63, "grad_norm": 3.15791075684033, "learning_rate": 3.180235336111346e-06, "loss": 0.7058, "step": 7757 }, { "epoch": 0.63, "grad_norm": 4.377376120293765, "learning_rate": 3.17901025511782e-06, "loss": 0.6661, "step": 7758 }, { "epoch": 0.63, "grad_norm": 3.409969053505127, "learning_rate": 3.1777853001482493e-06, "loss": 0.5112, "step": 7759 }, { "epoch": 0.63, "grad_norm": 3.980900907828548, "learning_rate": 3.1765604712874115e-06, "loss": 0.7136, "step": 7760 }, { "epoch": 0.63, "grad_norm": 5.151238656542228, "learning_rate": 3.1753357686200693e-06, "loss": 0.5439, "step": 7761 }, { "epoch": 0.63, "grad_norm": 22.615372455978253, "learning_rate": 3.1741111922309797e-06, "loss": 0.6819, "step": 7762 }, { "epoch": 0.63, "grad_norm": 7.980533539373199, "learning_rate": 3.1728867422048957e-06, "loss": 0.6054, "step": 7763 }, { "epoch": 0.63, "grad_norm": 4.7561622208903795, "learning_rate": 3.171662418626551e-06, "loss": 0.7378, "step": 7764 }, { "epoch": 0.63, "grad_norm": 6.126044575079399, "learning_rate": 3.1704382215806794e-06, "loss": 0.6452, "step": 7765 }, { "epoch": 0.63, "grad_norm": 3.5784985141591794, "learning_rate": 3.1692141511520025e-06, "loss": 0.7608, "step": 7766 }, { "epoch": 0.63, "grad_norm": 4.4288329191768545, "learning_rate": 3.1679902074252344e-06, "loss": 0.7558, "step": 7767 }, { "epoch": 0.63, "grad_norm": 3.4271202605406024, "learning_rate": 3.1667663904850786e-06, "loss": 0.8049, "step": 7768 }, { "epoch": 0.63, "grad_norm": 4.220449535006005, "learning_rate": 3.165542700416232e-06, "loss": 0.4805, "step": 7769 }, { "epoch": 0.63, "grad_norm": 3.3989277540852494, "learning_rate": 3.1643191373033833e-06, "loss": 0.5789, "step": 7770 }, { "epoch": 0.63, "grad_norm": 3.022969568252157, "learning_rate": 3.1630957012312063e-06, "loss": 0.847, "step": 7771 }, { "epoch": 0.63, "grad_norm": 3.7117206380202394, "learning_rate": 3.161872392284373e-06, "loss": 0.6714, "step": 7772 }, { "epoch": 0.63, "grad_norm": 6.240200871913442, "learning_rate": 3.160649210547544e-06, "loss": 0.6754, "step": 7773 }, { "epoch": 0.63, "grad_norm": 2.527652181232202, "learning_rate": 3.1594261561053707e-06, "loss": 0.6578, "step": 7774 }, { "epoch": 0.63, "grad_norm": 2.890328872037365, "learning_rate": 3.158203229042498e-06, "loss": 0.7807, "step": 7775 }, { "epoch": 0.63, "grad_norm": 4.279387722613669, "learning_rate": 3.156980429443559e-06, "loss": 0.675, "step": 7776 }, { "epoch": 0.63, "grad_norm": 4.3813053920552, "learning_rate": 3.1557577573931786e-06, "loss": 0.6648, "step": 7777 }, { "epoch": 0.63, "grad_norm": 7.8366371850988665, "learning_rate": 3.154535212975973e-06, "loss": 0.6509, "step": 7778 }, { "epoch": 0.63, "grad_norm": 6.510129993214789, "learning_rate": 3.1533127962765497e-06, "loss": 0.6698, "step": 7779 }, { "epoch": 0.63, "grad_norm": 3.256387733347844, "learning_rate": 3.1520905073795096e-06, "loss": 0.7841, "step": 7780 }, { "epoch": 0.63, "grad_norm": 5.144551442583872, "learning_rate": 3.150868346369441e-06, "loss": 0.7633, "step": 7781 }, { "epoch": 0.63, "grad_norm": 12.38142037042164, "learning_rate": 3.1496463133309274e-06, "loss": 0.7002, "step": 7782 }, { "epoch": 0.63, "grad_norm": 3.018917575865357, "learning_rate": 3.14842440834854e-06, "loss": 0.6307, "step": 7783 }, { "epoch": 0.63, "grad_norm": 21.90359593025437, "learning_rate": 3.1472026315068404e-06, "loss": 0.5818, "step": 7784 }, { "epoch": 0.63, "grad_norm": 5.974350587656121, "learning_rate": 3.1459809828903865e-06, "loss": 0.6482, "step": 7785 }, { "epoch": 0.63, "grad_norm": 13.927370647612653, "learning_rate": 3.144759462583721e-06, "loss": 0.7035, "step": 7786 }, { "epoch": 0.63, "grad_norm": 31.941265019583575, "learning_rate": 3.1435380706713823e-06, "loss": 0.7151, "step": 7787 }, { "epoch": 0.63, "grad_norm": 8.55445053760908, "learning_rate": 3.1423168072378986e-06, "loss": 0.5533, "step": 7788 }, { "epoch": 0.63, "grad_norm": 6.0138693672080334, "learning_rate": 3.1410956723677888e-06, "loss": 0.6116, "step": 7789 }, { "epoch": 0.63, "grad_norm": 5.008743160972371, "learning_rate": 3.1398746661455647e-06, "loss": 0.7882, "step": 7790 }, { "epoch": 0.63, "grad_norm": 4.551174500873657, "learning_rate": 3.1386537886557244e-06, "loss": 0.6925, "step": 7791 }, { "epoch": 0.63, "grad_norm": 3.5890832512231343, "learning_rate": 3.137433039982763e-06, "loss": 0.6555, "step": 7792 }, { "epoch": 0.63, "grad_norm": 9.65990171509788, "learning_rate": 3.1362124202111614e-06, "loss": 0.6497, "step": 7793 }, { "epoch": 0.63, "grad_norm": 7.137935208217813, "learning_rate": 3.134991929425396e-06, "loss": 0.7044, "step": 7794 }, { "epoch": 0.63, "grad_norm": 4.887859221024265, "learning_rate": 3.1337715677099325e-06, "loss": 0.6397, "step": 7795 }, { "epoch": 0.63, "grad_norm": 4.722730914851677, "learning_rate": 3.1325513351492286e-06, "loss": 0.6903, "step": 7796 }, { "epoch": 0.63, "grad_norm": 11.819912797652126, "learning_rate": 3.131331231827729e-06, "loss": 0.7927, "step": 7797 }, { "epoch": 0.63, "grad_norm": 3.897105251050104, "learning_rate": 3.130111257829874e-06, "loss": 0.5588, "step": 7798 }, { "epoch": 0.63, "grad_norm": 6.406932038833458, "learning_rate": 3.1288914132400948e-06, "loss": 0.8049, "step": 7799 }, { "epoch": 0.63, "grad_norm": 24.688988420277415, "learning_rate": 3.127671698142811e-06, "loss": 0.5677, "step": 7800 }, { "epoch": 0.63, "grad_norm": 3.878597056739613, "learning_rate": 3.1264521126224345e-06, "loss": 0.6835, "step": 7801 }, { "epoch": 0.63, "grad_norm": 2.6840381167648113, "learning_rate": 3.1252326567633686e-06, "loss": 0.5967, "step": 7802 }, { "epoch": 0.63, "grad_norm": 4.468656781635662, "learning_rate": 3.1240133306500096e-06, "loss": 0.7435, "step": 7803 }, { "epoch": 0.63, "grad_norm": 3.006163746955273, "learning_rate": 3.122794134366738e-06, "loss": 0.6444, "step": 7804 }, { "epoch": 0.63, "grad_norm": 11.017568369656365, "learning_rate": 3.1215750679979316e-06, "loss": 0.7563, "step": 7805 }, { "epoch": 0.63, "grad_norm": 4.02654974514555, "learning_rate": 3.120356131627959e-06, "loss": 0.9429, "step": 7806 }, { "epoch": 0.63, "grad_norm": 7.012079005629595, "learning_rate": 3.119137325341178e-06, "loss": 0.5927, "step": 7807 }, { "epoch": 0.63, "grad_norm": 4.21845084670605, "learning_rate": 3.117918649221936e-06, "loss": 0.5881, "step": 7808 }, { "epoch": 0.63, "grad_norm": 4.665467673248985, "learning_rate": 3.116700103354575e-06, "loss": 0.7387, "step": 7809 }, { "epoch": 0.63, "grad_norm": 3.387636063968424, "learning_rate": 3.115481687823425e-06, "loss": 0.5892, "step": 7810 }, { "epoch": 0.63, "grad_norm": 3.1986666881053, "learning_rate": 3.114263402712807e-06, "loss": 0.5072, "step": 7811 }, { "epoch": 0.63, "grad_norm": 3.6582221794064966, "learning_rate": 3.113045248107035e-06, "loss": 0.6263, "step": 7812 }, { "epoch": 0.63, "grad_norm": 3.202176878332542, "learning_rate": 3.1118272240904136e-06, "loss": 0.6164, "step": 7813 }, { "epoch": 0.63, "grad_norm": 5.006164122249263, "learning_rate": 3.110609330747237e-06, "loss": 0.6311, "step": 7814 }, { "epoch": 0.63, "grad_norm": 5.559994708593936, "learning_rate": 3.109391568161792e-06, "loss": 0.7401, "step": 7815 }, { "epoch": 0.63, "grad_norm": 2.9547972782051644, "learning_rate": 3.108173936418355e-06, "loss": 0.5967, "step": 7816 }, { "epoch": 0.63, "grad_norm": 3.0577294516536573, "learning_rate": 3.106956435601194e-06, "loss": 0.6676, "step": 7817 }, { "epoch": 0.63, "grad_norm": 2.7416254123662127, "learning_rate": 3.105739065794565e-06, "loss": 0.7813, "step": 7818 }, { "epoch": 0.64, "grad_norm": 3.750782139307154, "learning_rate": 3.104521827082721e-06, "loss": 0.6834, "step": 7819 }, { "epoch": 0.64, "grad_norm": 3.1726802635563955, "learning_rate": 3.1033047195499013e-06, "loss": 0.5241, "step": 7820 }, { "epoch": 0.64, "grad_norm": 6.520254021356529, "learning_rate": 3.102087743280337e-06, "loss": 0.6318, "step": 7821 }, { "epoch": 0.64, "grad_norm": 4.515664299037804, "learning_rate": 3.1008708983582525e-06, "loss": 0.5974, "step": 7822 }, { "epoch": 0.64, "grad_norm": 4.205600514002894, "learning_rate": 3.0996541848678598e-06, "loss": 0.6768, "step": 7823 }, { "epoch": 0.64, "grad_norm": 5.388180188522335, "learning_rate": 3.0984376028933623e-06, "loss": 0.7288, "step": 7824 }, { "epoch": 0.64, "grad_norm": 43.8329588145411, "learning_rate": 3.0972211525189566e-06, "loss": 0.6351, "step": 7825 }, { "epoch": 0.64, "grad_norm": 3.6485111273942277, "learning_rate": 3.096004833828827e-06, "loss": 0.6738, "step": 7826 }, { "epoch": 0.64, "grad_norm": 3.427899421891699, "learning_rate": 3.0947886469071512e-06, "loss": 0.83, "step": 7827 }, { "epoch": 0.64, "grad_norm": 3.2538144675701868, "learning_rate": 3.0935725918380977e-06, "loss": 0.6581, "step": 7828 }, { "epoch": 0.64, "grad_norm": 5.297058177906108, "learning_rate": 3.0923566687058264e-06, "loss": 0.6391, "step": 7829 }, { "epoch": 0.64, "grad_norm": 3.36028536088647, "learning_rate": 3.0911408775944836e-06, "loss": 0.6735, "step": 7830 }, { "epoch": 0.64, "grad_norm": 27.407577340052498, "learning_rate": 3.0899252185882106e-06, "loss": 0.6654, "step": 7831 }, { "epoch": 0.64, "grad_norm": 3.9111025150663843, "learning_rate": 3.0887096917711408e-06, "loss": 0.6961, "step": 7832 }, { "epoch": 0.64, "grad_norm": 4.422645980999589, "learning_rate": 3.0874942972273937e-06, "loss": 0.5759, "step": 7833 }, { "epoch": 0.64, "grad_norm": 3.5753318802998315, "learning_rate": 3.086279035041083e-06, "loss": 0.7835, "step": 7834 }, { "epoch": 0.64, "grad_norm": 2.615130735915641, "learning_rate": 3.0850639052963135e-06, "loss": 0.7615, "step": 7835 }, { "epoch": 0.64, "grad_norm": 5.192258680627292, "learning_rate": 3.0838489080771804e-06, "loss": 0.5789, "step": 7836 }, { "epoch": 0.64, "grad_norm": 5.838917166902251, "learning_rate": 3.082634043467767e-06, "loss": 0.6546, "step": 7837 }, { "epoch": 0.64, "grad_norm": 2.9469886056947705, "learning_rate": 3.0814193115521496e-06, "loss": 0.715, "step": 7838 }, { "epoch": 0.64, "grad_norm": 3.817014681098799, "learning_rate": 3.0802047124143964e-06, "loss": 0.7515, "step": 7839 }, { "epoch": 0.64, "grad_norm": 3.3198898935645875, "learning_rate": 3.078990246138566e-06, "loss": 0.5867, "step": 7840 }, { "epoch": 0.64, "grad_norm": 7.8286379030413045, "learning_rate": 3.077775912808706e-06, "loss": 0.5958, "step": 7841 }, { "epoch": 0.64, "grad_norm": 4.295519938350132, "learning_rate": 3.0765617125088554e-06, "loss": 0.7171, "step": 7842 }, { "epoch": 0.64, "grad_norm": 3.969462832878753, "learning_rate": 3.075347645323048e-06, "loss": 0.6072, "step": 7843 }, { "epoch": 0.64, "grad_norm": 5.640517531966812, "learning_rate": 3.074133711335299e-06, "loss": 0.6917, "step": 7844 }, { "epoch": 0.64, "grad_norm": 3.9185191450811088, "learning_rate": 3.072919910629625e-06, "loss": 0.7194, "step": 7845 }, { "epoch": 0.64, "grad_norm": 6.523923374258665, "learning_rate": 3.071706243290026e-06, "loss": 0.7285, "step": 7846 }, { "epoch": 0.64, "grad_norm": 9.72633181915045, "learning_rate": 3.0704927094004964e-06, "loss": 0.6619, "step": 7847 }, { "epoch": 0.64, "grad_norm": 2.573777202958262, "learning_rate": 3.0692793090450217e-06, "loss": 0.5786, "step": 7848 }, { "epoch": 0.64, "grad_norm": 4.061796164522386, "learning_rate": 3.068066042307576e-06, "loss": 0.7024, "step": 7849 }, { "epoch": 0.64, "grad_norm": 16.607891963948443, "learning_rate": 3.0668529092721246e-06, "loss": 0.6591, "step": 7850 }, { "epoch": 0.64, "grad_norm": 9.213118749684922, "learning_rate": 3.0656399100226218e-06, "loss": 0.593, "step": 7851 }, { "epoch": 0.64, "grad_norm": 3.569839625820368, "learning_rate": 3.0644270446430184e-06, "loss": 0.6297, "step": 7852 }, { "epoch": 0.64, "grad_norm": 2.8726355481413335, "learning_rate": 3.0632143132172503e-06, "loss": 0.6432, "step": 7853 }, { "epoch": 0.64, "grad_norm": 4.682413917202822, "learning_rate": 3.062001715829247e-06, "loss": 0.6283, "step": 7854 }, { "epoch": 0.64, "grad_norm": 15.480529211341592, "learning_rate": 3.0607892525629283e-06, "loss": 0.7385, "step": 7855 }, { "epoch": 0.64, "grad_norm": 5.937287464227921, "learning_rate": 3.059576923502204e-06, "loss": 0.6936, "step": 7856 }, { "epoch": 0.64, "grad_norm": 3.0541846827672634, "learning_rate": 3.0583647287309744e-06, "loss": 0.6334, "step": 7857 }, { "epoch": 0.64, "grad_norm": 7.769415266227085, "learning_rate": 3.05715266833313e-06, "loss": 0.8101, "step": 7858 }, { "epoch": 0.64, "grad_norm": 3.357925122052827, "learning_rate": 3.0559407423925536e-06, "loss": 0.6588, "step": 7859 }, { "epoch": 0.64, "grad_norm": 6.2840431057745345, "learning_rate": 3.0547289509931194e-06, "loss": 0.6509, "step": 7860 }, { "epoch": 0.64, "grad_norm": 32.238299815732894, "learning_rate": 3.05351729421869e-06, "loss": 0.7241, "step": 7861 }, { "epoch": 0.64, "grad_norm": 12.773983193619882, "learning_rate": 3.0523057721531217e-06, "loss": 0.6083, "step": 7862 }, { "epoch": 0.64, "grad_norm": 9.512312813758063, "learning_rate": 3.051094384880256e-06, "loss": 0.4938, "step": 7863 }, { "epoch": 0.64, "grad_norm": 4.207905589942727, "learning_rate": 3.0498831324839294e-06, "loss": 0.628, "step": 7864 }, { "epoch": 0.64, "grad_norm": 4.042296915855348, "learning_rate": 3.048672015047971e-06, "loss": 0.7337, "step": 7865 }, { "epoch": 0.64, "grad_norm": 5.391390544497846, "learning_rate": 3.047461032656195e-06, "loss": 0.8529, "step": 7866 }, { "epoch": 0.64, "grad_norm": 4.532974773520633, "learning_rate": 3.0462501853924088e-06, "loss": 0.6345, "step": 7867 }, { "epoch": 0.64, "grad_norm": 4.799622434355538, "learning_rate": 3.0450394733404115e-06, "loss": 0.7364, "step": 7868 }, { "epoch": 0.64, "grad_norm": 13.465004451670874, "learning_rate": 3.0438288965839947e-06, "loss": 0.5904, "step": 7869 }, { "epoch": 0.64, "grad_norm": 6.242620737775251, "learning_rate": 3.0426184552069327e-06, "loss": 0.6309, "step": 7870 }, { "epoch": 0.64, "grad_norm": 6.9349626310096495, "learning_rate": 3.0414081492929993e-06, "loss": 0.4354, "step": 7871 }, { "epoch": 0.64, "grad_norm": 7.43634561969817, "learning_rate": 3.0401979789259533e-06, "loss": 0.6585, "step": 7872 }, { "epoch": 0.64, "grad_norm": 4.335818042484232, "learning_rate": 3.0389879441895485e-06, "loss": 0.6891, "step": 7873 }, { "epoch": 0.64, "grad_norm": 5.041383983211005, "learning_rate": 3.0377780451675243e-06, "loss": 0.7062, "step": 7874 }, { "epoch": 0.64, "grad_norm": 8.098438813868885, "learning_rate": 3.036568281943615e-06, "loss": 0.6207, "step": 7875 }, { "epoch": 0.64, "grad_norm": 3.331323271312619, "learning_rate": 3.035358654601545e-06, "loss": 0.6325, "step": 7876 }, { "epoch": 0.64, "grad_norm": 8.172012236661782, "learning_rate": 3.034149163225025e-06, "loss": 0.6375, "step": 7877 }, { "epoch": 0.64, "grad_norm": 4.956578636567212, "learning_rate": 3.03293980789776e-06, "loss": 0.6135, "step": 7878 }, { "epoch": 0.64, "grad_norm": 19.415122842998876, "learning_rate": 3.0317305887034466e-06, "loss": 0.7273, "step": 7879 }, { "epoch": 0.64, "grad_norm": 8.885271264185215, "learning_rate": 3.030521505725771e-06, "loss": 0.6618, "step": 7880 }, { "epoch": 0.64, "grad_norm": 11.457323142400096, "learning_rate": 3.029312559048406e-06, "loss": 0.5922, "step": 7881 }, { "epoch": 0.64, "grad_norm": 5.7852882438453666, "learning_rate": 3.0281037487550235e-06, "loss": 0.5909, "step": 7882 }, { "epoch": 0.64, "grad_norm": 4.8593047139245575, "learning_rate": 3.0268950749292747e-06, "loss": 0.8284, "step": 7883 }, { "epoch": 0.64, "grad_norm": 3.070505913406939, "learning_rate": 3.025686537654812e-06, "loss": 0.7446, "step": 7884 }, { "epoch": 0.64, "grad_norm": 5.877066261886097, "learning_rate": 3.0244781370152705e-06, "loss": 0.7533, "step": 7885 }, { "epoch": 0.64, "grad_norm": 4.491271405104326, "learning_rate": 3.023269873094281e-06, "loss": 0.5516, "step": 7886 }, { "epoch": 0.64, "grad_norm": 5.075265876201668, "learning_rate": 3.0220617459754638e-06, "loss": 0.6125, "step": 7887 }, { "epoch": 0.64, "grad_norm": 2.8901075478741336, "learning_rate": 3.020853755742428e-06, "loss": 0.6526, "step": 7888 }, { "epoch": 0.64, "grad_norm": 22.458422196900415, "learning_rate": 3.0196459024787745e-06, "loss": 0.5759, "step": 7889 }, { "epoch": 0.64, "grad_norm": 3.9186420432648004, "learning_rate": 3.018438186268094e-06, "loss": 0.6842, "step": 7890 }, { "epoch": 0.64, "grad_norm": 6.9748673377967805, "learning_rate": 3.0172306071939666e-06, "loss": 0.5512, "step": 7891 }, { "epoch": 0.64, "grad_norm": 6.937072774044195, "learning_rate": 3.0160231653399656e-06, "loss": 0.7846, "step": 7892 }, { "epoch": 0.64, "grad_norm": 5.135000466827242, "learning_rate": 3.014815860789654e-06, "loss": 0.6697, "step": 7893 }, { "epoch": 0.64, "grad_norm": 4.299658009216886, "learning_rate": 3.0136086936265853e-06, "loss": 0.7657, "step": 7894 }, { "epoch": 0.64, "grad_norm": 28.91912794836449, "learning_rate": 3.0124016639343023e-06, "loss": 0.6206, "step": 7895 }, { "epoch": 0.64, "grad_norm": 4.823366619539434, "learning_rate": 3.011194771796339e-06, "loss": 0.6931, "step": 7896 }, { "epoch": 0.64, "grad_norm": 4.475263454895488, "learning_rate": 3.0099880172962197e-06, "loss": 0.8125, "step": 7897 }, { "epoch": 0.64, "grad_norm": 5.79657639753959, "learning_rate": 3.008781400517461e-06, "loss": 0.5828, "step": 7898 }, { "epoch": 0.64, "grad_norm": 14.183035035030214, "learning_rate": 3.007574921543565e-06, "loss": 0.84, "step": 7899 }, { "epoch": 0.64, "grad_norm": 9.636072225940968, "learning_rate": 3.0063685804580306e-06, "loss": 0.5457, "step": 7900 }, { "epoch": 0.64, "grad_norm": 3.869545403430963, "learning_rate": 3.005162377344343e-06, "loss": 0.6299, "step": 7901 }, { "epoch": 0.64, "grad_norm": 17.072487959173916, "learning_rate": 3.0039563122859815e-06, "loss": 0.6958, "step": 7902 }, { "epoch": 0.64, "grad_norm": 4.569244186342764, "learning_rate": 3.0027503853664097e-06, "loss": 0.6301, "step": 7903 }, { "epoch": 0.64, "grad_norm": 13.062297963859766, "learning_rate": 3.001544596669087e-06, "loss": 0.7271, "step": 7904 }, { "epoch": 0.64, "grad_norm": 9.463149920474777, "learning_rate": 3.0003389462774625e-06, "loss": 0.6611, "step": 7905 }, { "epoch": 0.64, "grad_norm": 4.240716493379181, "learning_rate": 2.9991334342749725e-06, "loss": 0.7052, "step": 7906 }, { "epoch": 0.64, "grad_norm": 5.76850876516233, "learning_rate": 2.9979280607450466e-06, "loss": 0.6932, "step": 7907 }, { "epoch": 0.64, "grad_norm": 5.742749648571481, "learning_rate": 2.9967228257711063e-06, "loss": 0.7938, "step": 7908 }, { "epoch": 0.64, "grad_norm": 4.145215614532756, "learning_rate": 2.995517729436561e-06, "loss": 0.5623, "step": 7909 }, { "epoch": 0.64, "grad_norm": 4.886022292183818, "learning_rate": 2.994312771824809e-06, "loss": 0.5979, "step": 7910 }, { "epoch": 0.64, "grad_norm": 10.086279461239739, "learning_rate": 2.9931079530192418e-06, "loss": 0.6673, "step": 7911 }, { "epoch": 0.64, "grad_norm": 5.825988826120475, "learning_rate": 2.9919032731032406e-06, "loss": 0.7695, "step": 7912 }, { "epoch": 0.64, "grad_norm": 5.584574557801636, "learning_rate": 2.990698732160178e-06, "loss": 0.6012, "step": 7913 }, { "epoch": 0.64, "grad_norm": 3.2434054284591785, "learning_rate": 2.9894943302734137e-06, "loss": 0.6601, "step": 7914 }, { "epoch": 0.64, "grad_norm": 4.341394232799937, "learning_rate": 2.9882900675263026e-06, "loss": 0.62, "step": 7915 }, { "epoch": 0.64, "grad_norm": 6.141136561869252, "learning_rate": 2.9870859440021845e-06, "loss": 0.6394, "step": 7916 }, { "epoch": 0.64, "grad_norm": 9.981751957236176, "learning_rate": 2.9858819597843923e-06, "loss": 0.6822, "step": 7917 }, { "epoch": 0.64, "grad_norm": 7.416965121903551, "learning_rate": 2.9846781149562515e-06, "loss": 0.7304, "step": 7918 }, { "epoch": 0.64, "grad_norm": 4.274431051083681, "learning_rate": 2.9834744096010738e-06, "loss": 0.5364, "step": 7919 }, { "epoch": 0.64, "grad_norm": 4.969932756795815, "learning_rate": 2.982270843802165e-06, "loss": 0.4988, "step": 7920 }, { "epoch": 0.64, "grad_norm": 9.357437356718279, "learning_rate": 2.9810674176428184e-06, "loss": 0.7122, "step": 7921 }, { "epoch": 0.64, "grad_norm": 3.574239072455284, "learning_rate": 2.979864131206319e-06, "loss": 0.6666, "step": 7922 }, { "epoch": 0.64, "grad_norm": 3.6839782779232797, "learning_rate": 2.9786609845759416e-06, "loss": 0.5263, "step": 7923 }, { "epoch": 0.64, "grad_norm": 3.3508871678142755, "learning_rate": 2.977457977834951e-06, "loss": 0.6691, "step": 7924 }, { "epoch": 0.64, "grad_norm": 4.185103107716808, "learning_rate": 2.9762551110666027e-06, "loss": 0.7456, "step": 7925 }, { "epoch": 0.64, "grad_norm": 9.41904428498282, "learning_rate": 2.975052384354144e-06, "loss": 0.6896, "step": 7926 }, { "epoch": 0.64, "grad_norm": 3.3044620151225677, "learning_rate": 2.97384979778081e-06, "loss": 0.6696, "step": 7927 }, { "epoch": 0.64, "grad_norm": 3.231869928374368, "learning_rate": 2.972647351429828e-06, "loss": 0.654, "step": 7928 }, { "epoch": 0.64, "grad_norm": 3.2922325244580324, "learning_rate": 2.9714450453844156e-06, "loss": 0.6341, "step": 7929 }, { "epoch": 0.64, "grad_norm": 9.100957700942446, "learning_rate": 2.970242879727778e-06, "loss": 0.6161, "step": 7930 }, { "epoch": 0.64, "grad_norm": 3.983404084257971, "learning_rate": 2.9690408545431138e-06, "loss": 0.6426, "step": 7931 }, { "epoch": 0.64, "grad_norm": 9.35888262770671, "learning_rate": 2.967838969913609e-06, "loss": 0.6427, "step": 7932 }, { "epoch": 0.64, "grad_norm": 3.8283775129531388, "learning_rate": 2.9666372259224442e-06, "loss": 0.5771, "step": 7933 }, { "epoch": 0.64, "grad_norm": 4.353232357024093, "learning_rate": 2.9654356226527857e-06, "loss": 0.7037, "step": 7934 }, { "epoch": 0.64, "grad_norm": 3.1626257251002183, "learning_rate": 2.9642341601877954e-06, "loss": 0.7246, "step": 7935 }, { "epoch": 0.64, "grad_norm": 4.394324187380833, "learning_rate": 2.9630328386106165e-06, "loss": 0.6876, "step": 7936 }, { "epoch": 0.64, "grad_norm": 3.649140026101249, "learning_rate": 2.9618316580043915e-06, "loss": 0.554, "step": 7937 }, { "epoch": 0.64, "grad_norm": 71.34016249697791, "learning_rate": 2.9606306184522503e-06, "loss": 0.5776, "step": 7938 }, { "epoch": 0.64, "grad_norm": 3.720611466257484, "learning_rate": 2.95942972003731e-06, "loss": 0.693, "step": 7939 }, { "epoch": 0.64, "grad_norm": 3.8802664737751256, "learning_rate": 2.958228962842682e-06, "loss": 0.7109, "step": 7940 }, { "epoch": 0.64, "grad_norm": 7.826390114148268, "learning_rate": 2.957028346951466e-06, "loss": 0.6693, "step": 7941 }, { "epoch": 0.65, "grad_norm": 3.4790995891865046, "learning_rate": 2.955827872446753e-06, "loss": 0.5502, "step": 7942 }, { "epoch": 0.65, "grad_norm": 13.062916621124748, "learning_rate": 2.954627539411621e-06, "loss": 0.7426, "step": 7943 }, { "epoch": 0.65, "grad_norm": 4.8941872484225915, "learning_rate": 2.953427347929142e-06, "loss": 0.7372, "step": 7944 }, { "epoch": 0.65, "grad_norm": 3.2018462171097166, "learning_rate": 2.9522272980823773e-06, "loss": 0.8031, "step": 7945 }, { "epoch": 0.65, "grad_norm": 4.230947170917448, "learning_rate": 2.9510273899543774e-06, "loss": 0.5772, "step": 7946 }, { "epoch": 0.65, "grad_norm": 4.036943717057517, "learning_rate": 2.949827623628183e-06, "loss": 0.6124, "step": 7947 }, { "epoch": 0.65, "grad_norm": 2.3796032971634973, "learning_rate": 2.948627999186826e-06, "loss": 0.6637, "step": 7948 }, { "epoch": 0.65, "grad_norm": 2.765529867414433, "learning_rate": 2.9474285167133297e-06, "loss": 0.5128, "step": 7949 }, { "epoch": 0.65, "grad_norm": 10.674421582559516, "learning_rate": 2.9462291762907024e-06, "loss": 0.7475, "step": 7950 }, { "epoch": 0.65, "grad_norm": 4.064782182364504, "learning_rate": 2.9450299780019476e-06, "loss": 0.6172, "step": 7951 }, { "epoch": 0.65, "grad_norm": 3.703773336780566, "learning_rate": 2.9438309219300578e-06, "loss": 0.7579, "step": 7952 }, { "epoch": 0.65, "grad_norm": 5.634757393313631, "learning_rate": 2.942632008158015e-06, "loss": 0.5659, "step": 7953 }, { "epoch": 0.65, "grad_norm": 6.474832530656683, "learning_rate": 2.9414332367687914e-06, "loss": 0.8499, "step": 7954 }, { "epoch": 0.65, "grad_norm": 5.428702717606891, "learning_rate": 2.9402346078453513e-06, "loss": 0.64, "step": 7955 }, { "epoch": 0.65, "grad_norm": 7.94889601818546, "learning_rate": 2.9390361214706443e-06, "loss": 0.7082, "step": 7956 }, { "epoch": 0.65, "grad_norm": 3.145697355851298, "learning_rate": 2.9378377777276134e-06, "loss": 0.6718, "step": 7957 }, { "epoch": 0.65, "grad_norm": 3.0426558195478544, "learning_rate": 2.936639576699194e-06, "loss": 0.6516, "step": 7958 }, { "epoch": 0.65, "grad_norm": 4.912128450013469, "learning_rate": 2.935441518468307e-06, "loss": 0.6268, "step": 7959 }, { "epoch": 0.65, "grad_norm": 7.756751233757057, "learning_rate": 2.9342436031178677e-06, "loss": 0.7175, "step": 7960 }, { "epoch": 0.65, "grad_norm": 2.774060271416822, "learning_rate": 2.9330458307307774e-06, "loss": 0.596, "step": 7961 }, { "epoch": 0.65, "grad_norm": 6.143130770325557, "learning_rate": 2.9318482013899306e-06, "loss": 0.6759, "step": 7962 }, { "epoch": 0.65, "grad_norm": 3.0279437038225416, "learning_rate": 2.930650715178211e-06, "loss": 0.7146, "step": 7963 }, { "epoch": 0.65, "grad_norm": 3.6286172762672035, "learning_rate": 2.92945337217849e-06, "loss": 0.6897, "step": 7964 }, { "epoch": 0.65, "grad_norm": 3.3096147681560106, "learning_rate": 2.9282561724736335e-06, "loss": 0.7997, "step": 7965 }, { "epoch": 0.65, "grad_norm": 7.5184808043191635, "learning_rate": 2.9270591161464946e-06, "loss": 0.5893, "step": 7966 }, { "epoch": 0.65, "grad_norm": 6.4450835896070044, "learning_rate": 2.9258622032799165e-06, "loss": 0.7474, "step": 7967 }, { "epoch": 0.65, "grad_norm": 4.141192623134453, "learning_rate": 2.9246654339567373e-06, "loss": 0.7651, "step": 7968 }, { "epoch": 0.65, "grad_norm": 8.341503198541163, "learning_rate": 2.923468808259774e-06, "loss": 0.5452, "step": 7969 }, { "epoch": 0.65, "grad_norm": 4.912407446912591, "learning_rate": 2.9222723262718456e-06, "loss": 0.6942, "step": 7970 }, { "epoch": 0.65, "grad_norm": 4.805692582141735, "learning_rate": 2.921075988075753e-06, "loss": 0.6534, "step": 7971 }, { "epoch": 0.65, "grad_norm": 4.009331023721363, "learning_rate": 2.9198797937542935e-06, "loss": 0.68, "step": 7972 }, { "epoch": 0.65, "grad_norm": 4.273268378117469, "learning_rate": 2.918683743390248e-06, "loss": 0.688, "step": 7973 }, { "epoch": 0.65, "grad_norm": 6.026614079388815, "learning_rate": 2.917487837066395e-06, "loss": 0.7275, "step": 7974 }, { "epoch": 0.65, "grad_norm": 5.313160018907861, "learning_rate": 2.9162920748654955e-06, "loss": 0.6443, "step": 7975 }, { "epoch": 0.65, "grad_norm": 3.607403742104908, "learning_rate": 2.915096456870305e-06, "loss": 0.6542, "step": 7976 }, { "epoch": 0.65, "grad_norm": 2.862426215231995, "learning_rate": 2.913900983163565e-06, "loss": 0.5966, "step": 7977 }, { "epoch": 0.65, "grad_norm": 2.941519325575196, "learning_rate": 2.9127056538280142e-06, "loss": 0.7731, "step": 7978 }, { "epoch": 0.65, "grad_norm": 3.8829764860047624, "learning_rate": 2.9115104689463724e-06, "loss": 0.7495, "step": 7979 }, { "epoch": 0.65, "grad_norm": 4.846146106556295, "learning_rate": 2.910315428601359e-06, "loss": 0.6769, "step": 7980 }, { "epoch": 0.65, "grad_norm": 5.43896740777116, "learning_rate": 2.9091205328756755e-06, "loss": 0.7169, "step": 7981 }, { "epoch": 0.65, "grad_norm": 6.055833871896981, "learning_rate": 2.907925781852017e-06, "loss": 0.6893, "step": 7982 }, { "epoch": 0.65, "grad_norm": 2.3383098832766898, "learning_rate": 2.906731175613066e-06, "loss": 0.4464, "step": 7983 }, { "epoch": 0.65, "grad_norm": 4.4110259476385485, "learning_rate": 2.905536714241497e-06, "loss": 0.6174, "step": 7984 }, { "epoch": 0.65, "grad_norm": 3.9106396925019045, "learning_rate": 2.9043423978199764e-06, "loss": 0.7601, "step": 7985 }, { "epoch": 0.65, "grad_norm": 4.928270083696889, "learning_rate": 2.903148226431155e-06, "loss": 0.5821, "step": 7986 }, { "epoch": 0.65, "grad_norm": 3.418163235308268, "learning_rate": 2.901954200157682e-06, "loss": 0.7019, "step": 7987 }, { "epoch": 0.65, "grad_norm": 2.5586526169629757, "learning_rate": 2.900760319082189e-06, "loss": 0.5839, "step": 7988 }, { "epoch": 0.65, "grad_norm": 8.021811364147736, "learning_rate": 2.899566583287299e-06, "loss": 0.7865, "step": 7989 }, { "epoch": 0.65, "grad_norm": 2.5165847932544025, "learning_rate": 2.898372992855627e-06, "loss": 0.5454, "step": 7990 }, { "epoch": 0.65, "grad_norm": 3.2709619933447973, "learning_rate": 2.897179547869775e-06, "loss": 0.6818, "step": 7991 }, { "epoch": 0.65, "grad_norm": 3.5076440096854045, "learning_rate": 2.8959862484123407e-06, "loss": 0.8244, "step": 7992 }, { "epoch": 0.65, "grad_norm": 9.566804698825475, "learning_rate": 2.8947930945659043e-06, "loss": 0.6592, "step": 7993 }, { "epoch": 0.65, "grad_norm": 3.746943799016381, "learning_rate": 2.8936000864130427e-06, "loss": 0.6335, "step": 7994 }, { "epoch": 0.65, "grad_norm": 4.284303628858223, "learning_rate": 2.8924072240363182e-06, "loss": 0.7517, "step": 7995 }, { "epoch": 0.65, "grad_norm": 2.719195740432593, "learning_rate": 2.8912145075182844e-06, "loss": 0.5689, "step": 7996 }, { "epoch": 0.65, "grad_norm": 7.210201003929519, "learning_rate": 2.890021936941483e-06, "loss": 0.816, "step": 7997 }, { "epoch": 0.65, "grad_norm": 4.9637647127954665, "learning_rate": 2.8888295123884507e-06, "loss": 0.5769, "step": 7998 }, { "epoch": 0.65, "grad_norm": 3.344290299609074, "learning_rate": 2.887637233941709e-06, "loss": 0.7094, "step": 7999 }, { "epoch": 0.65, "grad_norm": 3.8171927691933503, "learning_rate": 2.8864451016837703e-06, "loss": 0.6812, "step": 8000 }, { "epoch": 0.65, "grad_norm": 7.207018494530118, "learning_rate": 2.88525311569714e-06, "loss": 0.6815, "step": 8001 }, { "epoch": 0.65, "grad_norm": 3.379976828641701, "learning_rate": 2.884061276064309e-06, "loss": 0.6165, "step": 8002 }, { "epoch": 0.65, "grad_norm": 3.018867886467191, "learning_rate": 2.882869582867761e-06, "loss": 0.752, "step": 8003 }, { "epoch": 0.65, "grad_norm": 3.4021895370054533, "learning_rate": 2.8816780361899664e-06, "loss": 0.6062, "step": 8004 }, { "epoch": 0.65, "grad_norm": 4.569776692017455, "learning_rate": 2.880486636113392e-06, "loss": 0.6702, "step": 8005 }, { "epoch": 0.65, "grad_norm": 5.325271848292634, "learning_rate": 2.8792953827204884e-06, "loss": 0.7343, "step": 8006 }, { "epoch": 0.65, "grad_norm": 3.2729458613252556, "learning_rate": 2.878104276093695e-06, "loss": 0.6263, "step": 8007 }, { "epoch": 0.65, "grad_norm": 6.023866598421868, "learning_rate": 2.87691331631545e-06, "loss": 0.6179, "step": 8008 }, { "epoch": 0.65, "grad_norm": 3.4525442210827513, "learning_rate": 2.875722503468168e-06, "loss": 0.7279, "step": 8009 }, { "epoch": 0.65, "grad_norm": 6.522536383425385, "learning_rate": 2.874531837634266e-06, "loss": 0.6595, "step": 8010 }, { "epoch": 0.65, "grad_norm": 2.6069834590391516, "learning_rate": 2.8733413188961416e-06, "loss": 0.6464, "step": 8011 }, { "epoch": 0.65, "grad_norm": 3.833461329120667, "learning_rate": 2.872150947336191e-06, "loss": 0.5774, "step": 8012 }, { "epoch": 0.65, "grad_norm": 14.295096291704, "learning_rate": 2.870960723036793e-06, "loss": 0.7079, "step": 8013 }, { "epoch": 0.65, "grad_norm": 3.4686198610194823, "learning_rate": 2.869770646080316e-06, "loss": 0.7404, "step": 8014 }, { "epoch": 0.65, "grad_norm": 3.694525856744964, "learning_rate": 2.8685807165491275e-06, "loss": 0.7049, "step": 8015 }, { "epoch": 0.65, "grad_norm": 3.5126109050783083, "learning_rate": 2.86739093452557e-06, "loss": 0.6991, "step": 8016 }, { "epoch": 0.65, "grad_norm": 5.27960053021177, "learning_rate": 2.8662013000919897e-06, "loss": 0.7257, "step": 8017 }, { "epoch": 0.65, "grad_norm": 2.915218762908385, "learning_rate": 2.865011813330713e-06, "loss": 0.6121, "step": 8018 }, { "epoch": 0.65, "grad_norm": 2.9744899449321274, "learning_rate": 2.863822474324064e-06, "loss": 0.7253, "step": 8019 }, { "epoch": 0.65, "grad_norm": 3.063975824891128, "learning_rate": 2.862633283154348e-06, "loss": 0.6413, "step": 8020 }, { "epoch": 0.65, "grad_norm": 3.476180188181614, "learning_rate": 2.8614442399038713e-06, "loss": 0.5547, "step": 8021 }, { "epoch": 0.65, "grad_norm": 3.5944207966133868, "learning_rate": 2.860255344654914e-06, "loss": 0.61, "step": 8022 }, { "epoch": 0.65, "grad_norm": 4.9939928969105285, "learning_rate": 2.8590665974897626e-06, "loss": 0.5697, "step": 8023 }, { "epoch": 0.65, "grad_norm": 2.609342772961525, "learning_rate": 2.857877998490682e-06, "loss": 0.6324, "step": 8024 }, { "epoch": 0.65, "grad_norm": 3.4910581802080674, "learning_rate": 2.8566895477399303e-06, "loss": 0.758, "step": 8025 }, { "epoch": 0.65, "grad_norm": 22.555353816985132, "learning_rate": 2.8555012453197594e-06, "loss": 0.7427, "step": 8026 }, { "epoch": 0.65, "grad_norm": 3.3924341478690874, "learning_rate": 2.8543130913124036e-06, "loss": 0.6537, "step": 8027 }, { "epoch": 0.65, "grad_norm": 6.540463838343723, "learning_rate": 2.853125085800096e-06, "loss": 0.7307, "step": 8028 }, { "epoch": 0.65, "grad_norm": 4.608088004437503, "learning_rate": 2.851937228865046e-06, "loss": 0.7204, "step": 8029 }, { "epoch": 0.65, "grad_norm": 4.750358123066622, "learning_rate": 2.850749520589467e-06, "loss": 0.7658, "step": 8030 }, { "epoch": 0.65, "grad_norm": 5.290827648545436, "learning_rate": 2.849561961055554e-06, "loss": 0.758, "step": 8031 }, { "epoch": 0.65, "grad_norm": 3.0124113658921785, "learning_rate": 2.848374550345492e-06, "loss": 0.6691, "step": 8032 }, { "epoch": 0.65, "grad_norm": 3.548019509451185, "learning_rate": 2.847187288541461e-06, "loss": 0.7344, "step": 8033 }, { "epoch": 0.65, "grad_norm": 3.2523457725426064, "learning_rate": 2.8460001757256225e-06, "loss": 0.6396, "step": 8034 }, { "epoch": 0.65, "grad_norm": 3.04257344946543, "learning_rate": 2.8448132119801387e-06, "loss": 0.7396, "step": 8035 }, { "epoch": 0.65, "grad_norm": 8.140818571161578, "learning_rate": 2.843626397387146e-06, "loss": 0.6545, "step": 8036 }, { "epoch": 0.65, "grad_norm": 2.7542264824030367, "learning_rate": 2.842439732028787e-06, "loss": 0.7558, "step": 8037 }, { "epoch": 0.65, "grad_norm": 6.4777330081682685, "learning_rate": 2.8412532159871835e-06, "loss": 0.7565, "step": 8038 }, { "epoch": 0.65, "grad_norm": 2.9347423493914984, "learning_rate": 2.840066849344448e-06, "loss": 0.6668, "step": 8039 }, { "epoch": 0.65, "grad_norm": 14.075107441588635, "learning_rate": 2.838880632182689e-06, "loss": 0.6859, "step": 8040 }, { "epoch": 0.65, "grad_norm": 2.8346265119003657, "learning_rate": 2.837694564583997e-06, "loss": 0.709, "step": 8041 }, { "epoch": 0.65, "grad_norm": 8.108622104523713, "learning_rate": 2.836508646630457e-06, "loss": 0.6318, "step": 8042 }, { "epoch": 0.65, "grad_norm": 2.446374545868996, "learning_rate": 2.835322878404139e-06, "loss": 0.5959, "step": 8043 }, { "epoch": 0.65, "grad_norm": 2.6309871248743137, "learning_rate": 2.834137259987109e-06, "loss": 0.8056, "step": 8044 }, { "epoch": 0.65, "grad_norm": 4.897188935304473, "learning_rate": 2.832951791461417e-06, "loss": 0.7282, "step": 8045 }, { "epoch": 0.65, "grad_norm": 3.230830528350498, "learning_rate": 2.831766472909107e-06, "loss": 0.7623, "step": 8046 }, { "epoch": 0.65, "grad_norm": 5.104243282469706, "learning_rate": 2.83058130441221e-06, "loss": 0.6459, "step": 8047 }, { "epoch": 0.65, "grad_norm": 3.895120005138428, "learning_rate": 2.8293962860527463e-06, "loss": 0.6943, "step": 8048 }, { "epoch": 0.65, "grad_norm": 2.669712617957591, "learning_rate": 2.828211417912727e-06, "loss": 0.6413, "step": 8049 }, { "epoch": 0.65, "grad_norm": 7.4947910697641635, "learning_rate": 2.82702670007415e-06, "loss": 0.6932, "step": 8050 }, { "epoch": 0.65, "grad_norm": 3.3743044314570665, "learning_rate": 2.82584213261901e-06, "loss": 0.6865, "step": 8051 }, { "epoch": 0.65, "grad_norm": 6.645393736516758, "learning_rate": 2.8246577156292814e-06, "loss": 0.7015, "step": 8052 }, { "epoch": 0.65, "grad_norm": 4.256401060582648, "learning_rate": 2.8234734491869388e-06, "loss": 0.6964, "step": 8053 }, { "epoch": 0.65, "grad_norm": 3.5846555797881288, "learning_rate": 2.822289333373937e-06, "loss": 0.6724, "step": 8054 }, { "epoch": 0.65, "grad_norm": 17.647839782732696, "learning_rate": 2.821105368272226e-06, "loss": 0.5622, "step": 8055 }, { "epoch": 0.65, "grad_norm": 2.9535726185612163, "learning_rate": 2.8199215539637427e-06, "loss": 0.7646, "step": 8056 }, { "epoch": 0.65, "grad_norm": 6.010230176300674, "learning_rate": 2.818737890530413e-06, "loss": 0.6683, "step": 8057 }, { "epoch": 0.65, "grad_norm": 4.239535044533543, "learning_rate": 2.8175543780541583e-06, "loss": 0.8028, "step": 8058 }, { "epoch": 0.65, "grad_norm": 3.2369724267153903, "learning_rate": 2.816371016616879e-06, "loss": 0.7309, "step": 8059 }, { "epoch": 0.65, "grad_norm": 5.9465045609551614, "learning_rate": 2.815187806300478e-06, "loss": 0.5982, "step": 8060 }, { "epoch": 0.65, "grad_norm": 8.21157044415239, "learning_rate": 2.8140047471868364e-06, "loss": 0.7354, "step": 8061 }, { "epoch": 0.65, "grad_norm": 10.13291753451248, "learning_rate": 2.812821839357831e-06, "loss": 0.7539, "step": 8062 }, { "epoch": 0.65, "grad_norm": 5.9758329503293695, "learning_rate": 2.8116390828953257e-06, "loss": 0.691, "step": 8063 }, { "epoch": 0.65, "grad_norm": 4.822796671840841, "learning_rate": 2.8104564778811735e-06, "loss": 0.6881, "step": 8064 }, { "epoch": 0.66, "grad_norm": 5.331684154175175, "learning_rate": 2.8092740243972205e-06, "loss": 0.6795, "step": 8065 }, { "epoch": 0.66, "grad_norm": 3.737394938090677, "learning_rate": 2.8080917225252977e-06, "loss": 0.6896, "step": 8066 }, { "epoch": 0.66, "grad_norm": 2.5397599396491572, "learning_rate": 2.806909572347231e-06, "loss": 0.653, "step": 8067 }, { "epoch": 0.66, "grad_norm": 5.553295341303807, "learning_rate": 2.805727573944831e-06, "loss": 0.6577, "step": 8068 }, { "epoch": 0.66, "grad_norm": 2.7603569793241465, "learning_rate": 2.804545727399899e-06, "loss": 0.5979, "step": 8069 }, { "epoch": 0.66, "grad_norm": 4.30557996129583, "learning_rate": 2.8033640327942235e-06, "loss": 0.7343, "step": 8070 }, { "epoch": 0.66, "grad_norm": 3.7599864026680043, "learning_rate": 2.8021824902095914e-06, "loss": 0.7741, "step": 8071 }, { "epoch": 0.66, "grad_norm": 4.425154338026033, "learning_rate": 2.8010010997277692e-06, "loss": 0.6454, "step": 8072 }, { "epoch": 0.66, "grad_norm": 9.292637563655099, "learning_rate": 2.7998198614305145e-06, "loss": 0.6662, "step": 8073 }, { "epoch": 0.66, "grad_norm": 3.0078072550963215, "learning_rate": 2.798638775399583e-06, "loss": 0.6474, "step": 8074 }, { "epoch": 0.66, "grad_norm": 2.4950065604598426, "learning_rate": 2.7974578417167052e-06, "loss": 0.6051, "step": 8075 }, { "epoch": 0.66, "grad_norm": 2.77412315501453, "learning_rate": 2.796277060463616e-06, "loss": 0.6506, "step": 8076 }, { "epoch": 0.66, "grad_norm": 89.2254811421721, "learning_rate": 2.7950964317220266e-06, "loss": 0.638, "step": 8077 }, { "epoch": 0.66, "grad_norm": 9.063342351241381, "learning_rate": 2.79391595557365e-06, "loss": 0.6091, "step": 8078 }, { "epoch": 0.66, "grad_norm": 7.1340359037385115, "learning_rate": 2.79273563210018e-06, "loss": 0.5987, "step": 8079 }, { "epoch": 0.66, "grad_norm": 3.740413829044198, "learning_rate": 2.7915554613833e-06, "loss": 0.6965, "step": 8080 }, { "epoch": 0.66, "grad_norm": 3.089046791368479, "learning_rate": 2.7903754435046914e-06, "loss": 0.6006, "step": 8081 }, { "epoch": 0.66, "grad_norm": 12.496913360956905, "learning_rate": 2.7891955785460124e-06, "loss": 0.7316, "step": 8082 }, { "epoch": 0.66, "grad_norm": 2.263579399050377, "learning_rate": 2.78801586658892e-06, "loss": 0.7201, "step": 8083 }, { "epoch": 0.66, "grad_norm": 3.128008181608036, "learning_rate": 2.786836307715056e-06, "loss": 0.671, "step": 8084 }, { "epoch": 0.66, "grad_norm": 5.005441463228108, "learning_rate": 2.7856569020060576e-06, "loss": 0.6981, "step": 8085 }, { "epoch": 0.66, "grad_norm": 3.5765022728610334, "learning_rate": 2.7844776495435435e-06, "loss": 0.7256, "step": 8086 }, { "epoch": 0.66, "grad_norm": 2.916427159046024, "learning_rate": 2.7832985504091242e-06, "loss": 0.6826, "step": 8087 }, { "epoch": 0.66, "grad_norm": 2.259328775828344, "learning_rate": 2.782119604684407e-06, "loss": 0.784, "step": 8088 }, { "epoch": 0.66, "grad_norm": 3.011025343382135, "learning_rate": 2.780940812450974e-06, "loss": 0.6897, "step": 8089 }, { "epoch": 0.66, "grad_norm": 9.132058530184704, "learning_rate": 2.779762173790411e-06, "loss": 0.7162, "step": 8090 }, { "epoch": 0.66, "grad_norm": 3.8341124318769713, "learning_rate": 2.778583688784283e-06, "loss": 0.6693, "step": 8091 }, { "epoch": 0.66, "grad_norm": 5.064764614926233, "learning_rate": 2.7774053575141534e-06, "loss": 0.675, "step": 8092 }, { "epoch": 0.66, "grad_norm": 4.023152511188378, "learning_rate": 2.7762271800615654e-06, "loss": 0.4862, "step": 8093 }, { "epoch": 0.66, "grad_norm": 4.7788938895923625, "learning_rate": 2.7750491565080628e-06, "loss": 0.5944, "step": 8094 }, { "epoch": 0.66, "grad_norm": 4.192379958973519, "learning_rate": 2.773871286935164e-06, "loss": 0.5599, "step": 8095 }, { "epoch": 0.66, "grad_norm": 3.3720122497226215, "learning_rate": 2.772693571424391e-06, "loss": 0.5955, "step": 8096 }, { "epoch": 0.66, "grad_norm": 2.5577324112883493, "learning_rate": 2.771516010057247e-06, "loss": 0.6597, "step": 8097 }, { "epoch": 0.66, "grad_norm": 3.6533423604480904, "learning_rate": 2.7703386029152246e-06, "loss": 0.6618, "step": 8098 }, { "epoch": 0.66, "grad_norm": 2.9683340010962227, "learning_rate": 2.769161350079812e-06, "loss": 0.5322, "step": 8099 }, { "epoch": 0.66, "grad_norm": 3.708425913454942, "learning_rate": 2.767984251632479e-06, "loss": 0.7231, "step": 8100 }, { "epoch": 0.66, "grad_norm": 2.3598118702840942, "learning_rate": 2.7668073076546936e-06, "loss": 0.7126, "step": 8101 }, { "epoch": 0.66, "grad_norm": 2.4044414523470983, "learning_rate": 2.7656305182279e-06, "loss": 0.7585, "step": 8102 }, { "epoch": 0.66, "grad_norm": 2.879798893839328, "learning_rate": 2.7644538834335446e-06, "loss": 0.6973, "step": 8103 }, { "epoch": 0.66, "grad_norm": 3.752774397731517, "learning_rate": 2.7632774033530575e-06, "loss": 0.6072, "step": 8104 }, { "epoch": 0.66, "grad_norm": 5.784226603803175, "learning_rate": 2.7621010780678546e-06, "loss": 0.572, "step": 8105 }, { "epoch": 0.66, "grad_norm": 5.218206124068918, "learning_rate": 2.7609249076593507e-06, "loss": 0.7062, "step": 8106 }, { "epoch": 0.66, "grad_norm": 3.942241814504858, "learning_rate": 2.75974889220894e-06, "loss": 0.6435, "step": 8107 }, { "epoch": 0.66, "grad_norm": 3.6680174676185695, "learning_rate": 2.7585730317980154e-06, "loss": 0.6204, "step": 8108 }, { "epoch": 0.66, "grad_norm": 6.0356555201357365, "learning_rate": 2.7573973265079456e-06, "loss": 0.7886, "step": 8109 }, { "epoch": 0.66, "grad_norm": 4.179336771332034, "learning_rate": 2.756221776420104e-06, "loss": 0.4285, "step": 8110 }, { "epoch": 0.66, "grad_norm": 2.586153514957241, "learning_rate": 2.7550463816158437e-06, "loss": 0.9171, "step": 8111 }, { "epoch": 0.66, "grad_norm": 2.574352646633711, "learning_rate": 2.753871142176506e-06, "loss": 0.6009, "step": 8112 }, { "epoch": 0.66, "grad_norm": 5.190698838544873, "learning_rate": 2.7526960581834316e-06, "loss": 0.7553, "step": 8113 }, { "epoch": 0.66, "grad_norm": 3.3618261508897382, "learning_rate": 2.751521129717939e-06, "loss": 0.6209, "step": 8114 }, { "epoch": 0.66, "grad_norm": 3.2833187956151426, "learning_rate": 2.7503463568613425e-06, "loss": 0.6505, "step": 8115 }, { "epoch": 0.66, "grad_norm": 3.4587743625127083, "learning_rate": 2.749171739694941e-06, "loss": 0.6915, "step": 8116 }, { "epoch": 0.66, "grad_norm": 3.5522995137920397, "learning_rate": 2.747997278300029e-06, "loss": 0.6012, "step": 8117 }, { "epoch": 0.66, "grad_norm": 8.236393816434713, "learning_rate": 2.7468229727578836e-06, "loss": 0.6107, "step": 8118 }, { "epoch": 0.66, "grad_norm": 4.267780455576585, "learning_rate": 2.745648823149778e-06, "loss": 1.0247, "step": 8119 }, { "epoch": 0.66, "grad_norm": 3.6651676247798903, "learning_rate": 2.744474829556968e-06, "loss": 0.814, "step": 8120 }, { "epoch": 0.66, "grad_norm": 3.862417019236534, "learning_rate": 2.743300992060701e-06, "loss": 0.7157, "step": 8121 }, { "epoch": 0.66, "grad_norm": 4.809328871151626, "learning_rate": 2.7421273107422157e-06, "loss": 0.698, "step": 8122 }, { "epoch": 0.66, "grad_norm": 5.306268007817394, "learning_rate": 2.740953785682735e-06, "loss": 0.6299, "step": 8123 }, { "epoch": 0.66, "grad_norm": 3.2365114206748578, "learning_rate": 2.7397804169634785e-06, "loss": 0.6519, "step": 8124 }, { "epoch": 0.66, "grad_norm": 3.9779067661232146, "learning_rate": 2.7386072046656466e-06, "loss": 0.6022, "step": 8125 }, { "epoch": 0.66, "grad_norm": 2.8569241628554756, "learning_rate": 2.737434148870437e-06, "loss": 0.6969, "step": 8126 }, { "epoch": 0.66, "grad_norm": 8.249849309220107, "learning_rate": 2.73626124965903e-06, "loss": 0.6716, "step": 8127 }, { "epoch": 0.66, "grad_norm": 5.473925444958698, "learning_rate": 2.7350885071125993e-06, "loss": 0.5792, "step": 8128 }, { "epoch": 0.66, "grad_norm": 14.151669351812043, "learning_rate": 2.7339159213123047e-06, "loss": 0.7819, "step": 8129 }, { "epoch": 0.66, "grad_norm": 3.271079193189441, "learning_rate": 2.732743492339294e-06, "loss": 0.7488, "step": 8130 }, { "epoch": 0.66, "grad_norm": 2.8123534476903784, "learning_rate": 2.7315712202747123e-06, "loss": 0.5749, "step": 8131 }, { "epoch": 0.66, "grad_norm": 3.080516209679372, "learning_rate": 2.730399105199683e-06, "loss": 0.7644, "step": 8132 }, { "epoch": 0.66, "grad_norm": 9.666565114778587, "learning_rate": 2.7292271471953287e-06, "loss": 0.6489, "step": 8133 }, { "epoch": 0.66, "grad_norm": 4.768538734082018, "learning_rate": 2.728055346342753e-06, "loss": 0.7818, "step": 8134 }, { "epoch": 0.66, "grad_norm": 2.897447991631518, "learning_rate": 2.7268837027230532e-06, "loss": 0.8416, "step": 8135 }, { "epoch": 0.66, "grad_norm": 2.2968322726515984, "learning_rate": 2.725712216417314e-06, "loss": 0.5824, "step": 8136 }, { "epoch": 0.66, "grad_norm": 6.151412765665947, "learning_rate": 2.724540887506607e-06, "loss": 0.6942, "step": 8137 }, { "epoch": 0.66, "grad_norm": 4.983144827871983, "learning_rate": 2.7233697160720006e-06, "loss": 0.6166, "step": 8138 }, { "epoch": 0.66, "grad_norm": 10.496092195886966, "learning_rate": 2.7221987021945424e-06, "loss": 0.6779, "step": 8139 }, { "epoch": 0.66, "grad_norm": 5.399288633401184, "learning_rate": 2.7210278459552786e-06, "loss": 0.6359, "step": 8140 }, { "epoch": 0.66, "grad_norm": 40.49926859289402, "learning_rate": 2.7198571474352365e-06, "loss": 0.6678, "step": 8141 }, { "epoch": 0.66, "grad_norm": 4.068194332199744, "learning_rate": 2.7186866067154377e-06, "loss": 0.6917, "step": 8142 }, { "epoch": 0.66, "grad_norm": 4.214361757073315, "learning_rate": 2.717516223876888e-06, "loss": 0.6056, "step": 8143 }, { "epoch": 0.66, "grad_norm": 2.366611601620602, "learning_rate": 2.7163459990005885e-06, "loss": 0.5489, "step": 8144 }, { "epoch": 0.66, "grad_norm": 3.433824867741869, "learning_rate": 2.715175932167525e-06, "loss": 0.6497, "step": 8145 }, { "epoch": 0.66, "grad_norm": 8.416421347979638, "learning_rate": 2.714006023458673e-06, "loss": 0.6585, "step": 8146 }, { "epoch": 0.66, "grad_norm": 4.370174083445899, "learning_rate": 2.712836272955001e-06, "loss": 0.5897, "step": 8147 }, { "epoch": 0.66, "grad_norm": 7.079149995938893, "learning_rate": 2.7116666807374557e-06, "loss": 0.6861, "step": 8148 }, { "epoch": 0.66, "grad_norm": 4.123743866742354, "learning_rate": 2.7104972468869867e-06, "loss": 0.669, "step": 8149 }, { "epoch": 0.66, "grad_norm": 3.310417937479774, "learning_rate": 2.7093279714845223e-06, "loss": 0.6096, "step": 8150 }, { "epoch": 0.66, "grad_norm": 7.5286076165290625, "learning_rate": 2.7081588546109875e-06, "loss": 0.7812, "step": 8151 }, { "epoch": 0.66, "grad_norm": 16.493016333175053, "learning_rate": 2.7069898963472906e-06, "loss": 0.7276, "step": 8152 }, { "epoch": 0.66, "grad_norm": 4.300271825650717, "learning_rate": 2.7058210967743294e-06, "loss": 0.6913, "step": 8153 }, { "epoch": 0.66, "grad_norm": 4.57384285911414, "learning_rate": 2.704652455972997e-06, "loss": 0.7158, "step": 8154 }, { "epoch": 0.66, "grad_norm": 4.351206435000293, "learning_rate": 2.7034839740241634e-06, "loss": 0.743, "step": 8155 }, { "epoch": 0.66, "grad_norm": 3.0215783665567226, "learning_rate": 2.7023156510087012e-06, "loss": 0.5758, "step": 8156 }, { "epoch": 0.66, "grad_norm": 3.7332458177399337, "learning_rate": 2.701147487007461e-06, "loss": 0.7216, "step": 8157 }, { "epoch": 0.66, "grad_norm": 29.436091927951935, "learning_rate": 2.6999794821012915e-06, "loss": 0.7029, "step": 8158 }, { "epoch": 0.66, "grad_norm": 28.091473821487718, "learning_rate": 2.6988116363710243e-06, "loss": 0.7103, "step": 8159 }, { "epoch": 0.66, "grad_norm": 2.6741269095688835, "learning_rate": 2.697643949897479e-06, "loss": 0.5242, "step": 8160 }, { "epoch": 0.66, "grad_norm": 4.066508999322499, "learning_rate": 2.696476422761474e-06, "loss": 0.8965, "step": 8161 }, { "epoch": 0.66, "grad_norm": 2.766408325277755, "learning_rate": 2.6953090550437994e-06, "loss": 0.5919, "step": 8162 }, { "epoch": 0.66, "grad_norm": 4.319680001264087, "learning_rate": 2.694141846825252e-06, "loss": 0.6082, "step": 8163 }, { "epoch": 0.66, "grad_norm": 2.7161134675993197, "learning_rate": 2.6929747981866066e-06, "loss": 0.6127, "step": 8164 }, { "epoch": 0.66, "grad_norm": 3.748930160644413, "learning_rate": 2.6918079092086323e-06, "loss": 0.5933, "step": 8165 }, { "epoch": 0.66, "grad_norm": 4.402770707724097, "learning_rate": 2.6906411799720856e-06, "loss": 0.5895, "step": 8166 }, { "epoch": 0.66, "grad_norm": 4.766736266458826, "learning_rate": 2.689474610557709e-06, "loss": 0.7376, "step": 8167 }, { "epoch": 0.66, "grad_norm": 5.26013466914022, "learning_rate": 2.688308201046236e-06, "loss": 0.6647, "step": 8168 }, { "epoch": 0.66, "grad_norm": 4.934365711994622, "learning_rate": 2.6871419515183934e-06, "loss": 0.7761, "step": 8169 }, { "epoch": 0.66, "grad_norm": 3.1207334549338785, "learning_rate": 2.6859758620548904e-06, "loss": 0.6577, "step": 8170 }, { "epoch": 0.66, "grad_norm": 4.540244856917245, "learning_rate": 2.6848099327364263e-06, "loss": 0.5619, "step": 8171 }, { "epoch": 0.66, "grad_norm": 7.544296323887684, "learning_rate": 2.683644163643694e-06, "loss": 0.5972, "step": 8172 }, { "epoch": 0.66, "grad_norm": 5.913912830134446, "learning_rate": 2.6824785548573685e-06, "loss": 0.6488, "step": 8173 }, { "epoch": 0.66, "grad_norm": 9.218652830581636, "learning_rate": 2.6813131064581237e-06, "loss": 0.7573, "step": 8174 }, { "epoch": 0.66, "grad_norm": 4.940829344477792, "learning_rate": 2.6801478185266076e-06, "loss": 0.6524, "step": 8175 }, { "epoch": 0.66, "grad_norm": 4.802348762504626, "learning_rate": 2.6789826911434714e-06, "loss": 0.6368, "step": 8176 }, { "epoch": 0.66, "grad_norm": 6.0293096270980895, "learning_rate": 2.6778177243893475e-06, "loss": 0.8511, "step": 8177 }, { "epoch": 0.66, "grad_norm": 3.2633117713110504, "learning_rate": 2.6766529183448566e-06, "loss": 0.802, "step": 8178 }, { "epoch": 0.66, "grad_norm": 6.686260005792027, "learning_rate": 2.6754882730906145e-06, "loss": 0.5111, "step": 8179 }, { "epoch": 0.66, "grad_norm": 6.396596808027793, "learning_rate": 2.674323788707218e-06, "loss": 0.833, "step": 8180 }, { "epoch": 0.66, "grad_norm": 13.006997384344544, "learning_rate": 2.673159465275264e-06, "loss": 0.558, "step": 8181 }, { "epoch": 0.66, "grad_norm": 13.289912997776192, "learning_rate": 2.6719953028753214e-06, "loss": 0.6079, "step": 8182 }, { "epoch": 0.66, "grad_norm": 6.083173780646696, "learning_rate": 2.670831301587964e-06, "loss": 0.8162, "step": 8183 }, { "epoch": 0.66, "grad_norm": 9.201957025999572, "learning_rate": 2.6696674614937466e-06, "loss": 0.7288, "step": 8184 }, { "epoch": 0.66, "grad_norm": 17.598100752302532, "learning_rate": 2.668503782673212e-06, "loss": 0.7004, "step": 8185 }, { "epoch": 0.66, "grad_norm": 6.483641831618135, "learning_rate": 2.667340265206897e-06, "loss": 0.8014, "step": 8186 }, { "epoch": 0.66, "grad_norm": 3.5587194007847933, "learning_rate": 2.6661769091753244e-06, "loss": 0.7161, "step": 8187 }, { "epoch": 0.67, "grad_norm": 8.318247796483886, "learning_rate": 2.665013714659004e-06, "loss": 0.6558, "step": 8188 }, { "epoch": 0.67, "grad_norm": 6.389200576753488, "learning_rate": 2.6638506817384346e-06, "loss": 0.6297, "step": 8189 }, { "epoch": 0.67, "grad_norm": 7.707717064787286, "learning_rate": 2.66268781049411e-06, "loss": 0.9265, "step": 8190 }, { "epoch": 0.67, "grad_norm": 7.042260655258461, "learning_rate": 2.661525101006506e-06, "loss": 0.6435, "step": 8191 }, { "epoch": 0.67, "grad_norm": 4.1712809946919265, "learning_rate": 2.660362553356087e-06, "loss": 0.6226, "step": 8192 }, { "epoch": 0.67, "grad_norm": 6.965248040716757, "learning_rate": 2.659200167623313e-06, "loss": 0.5982, "step": 8193 }, { "epoch": 0.67, "grad_norm": 7.610740591713689, "learning_rate": 2.658037943888626e-06, "loss": 0.505, "step": 8194 }, { "epoch": 0.67, "grad_norm": 3.5886055930808562, "learning_rate": 2.6568758822324605e-06, "loss": 0.7438, "step": 8195 }, { "epoch": 0.67, "grad_norm": 3.931046275279916, "learning_rate": 2.655713982735234e-06, "loss": 0.676, "step": 8196 }, { "epoch": 0.67, "grad_norm": 4.715776715617235, "learning_rate": 2.6545522454773643e-06, "loss": 0.6227, "step": 8197 }, { "epoch": 0.67, "grad_norm": 3.4117764528628163, "learning_rate": 2.653390670539244e-06, "loss": 0.6489, "step": 8198 }, { "epoch": 0.67, "grad_norm": 3.1189164504067204, "learning_rate": 2.652229258001268e-06, "loss": 0.7726, "step": 8199 }, { "epoch": 0.67, "grad_norm": 6.742417384081913, "learning_rate": 2.651068007943809e-06, "loss": 0.7546, "step": 8200 }, { "epoch": 0.67, "grad_norm": 7.067713286725016, "learning_rate": 2.6499069204472346e-06, "loss": 0.5828, "step": 8201 }, { "epoch": 0.67, "grad_norm": 5.2475697301286965, "learning_rate": 2.648745995591898e-06, "loss": 0.8124, "step": 8202 }, { "epoch": 0.67, "grad_norm": 9.905347852964248, "learning_rate": 2.647585233458142e-06, "loss": 0.7315, "step": 8203 }, { "epoch": 0.67, "grad_norm": 28.31208804022421, "learning_rate": 2.6464246341263023e-06, "loss": 0.6172, "step": 8204 }, { "epoch": 0.67, "grad_norm": 12.742042122893908, "learning_rate": 2.645264197676694e-06, "loss": 0.7066, "step": 8205 }, { "epoch": 0.67, "grad_norm": 4.906364811344351, "learning_rate": 2.6441039241896325e-06, "loss": 0.6999, "step": 8206 }, { "epoch": 0.67, "grad_norm": 6.262589629087726, "learning_rate": 2.6429438137454133e-06, "loss": 0.88, "step": 8207 }, { "epoch": 0.67, "grad_norm": 4.546216797871973, "learning_rate": 2.6417838664243232e-06, "loss": 0.7516, "step": 8208 }, { "epoch": 0.67, "grad_norm": 10.1806082319197, "learning_rate": 2.6406240823066387e-06, "loss": 0.5961, "step": 8209 }, { "epoch": 0.67, "grad_norm": 18.23638104296995, "learning_rate": 2.6394644614726215e-06, "loss": 0.8248, "step": 8210 }, { "epoch": 0.67, "grad_norm": 7.429546987646909, "learning_rate": 2.638305004002528e-06, "loss": 0.9113, "step": 8211 }, { "epoch": 0.67, "grad_norm": 8.732817295019055, "learning_rate": 2.6371457099765975e-06, "loss": 0.6727, "step": 8212 }, { "epoch": 0.67, "grad_norm": 9.124099909141329, "learning_rate": 2.6359865794750635e-06, "loss": 0.807, "step": 8213 }, { "epoch": 0.67, "grad_norm": 6.402880912308503, "learning_rate": 2.6348276125781423e-06, "loss": 0.5557, "step": 8214 }, { "epoch": 0.67, "grad_norm": 12.446808329390967, "learning_rate": 2.633668809366044e-06, "loss": 0.6702, "step": 8215 }, { "epoch": 0.67, "grad_norm": 9.981870623522754, "learning_rate": 2.632510169918963e-06, "loss": 0.5764, "step": 8216 }, { "epoch": 0.67, "grad_norm": 13.76705176843207, "learning_rate": 2.6313516943170836e-06, "loss": 0.6461, "step": 8217 }, { "epoch": 0.67, "grad_norm": 6.052215955873879, "learning_rate": 2.630193382640583e-06, "loss": 0.6595, "step": 8218 }, { "epoch": 0.67, "grad_norm": 15.38530910237124, "learning_rate": 2.6290352349696196e-06, "loss": 0.7473, "step": 8219 }, { "epoch": 0.67, "grad_norm": 10.268505480326876, "learning_rate": 2.627877251384351e-06, "loss": 0.6669, "step": 8220 }, { "epoch": 0.67, "grad_norm": 12.654271352594717, "learning_rate": 2.6267194319649087e-06, "loss": 0.5112, "step": 8221 }, { "epoch": 0.67, "grad_norm": 18.583044626326892, "learning_rate": 2.625561776791427e-06, "loss": 0.675, "step": 8222 }, { "epoch": 0.67, "grad_norm": 22.33390887622557, "learning_rate": 2.6244042859440195e-06, "loss": 0.6994, "step": 8223 }, { "epoch": 0.67, "grad_norm": 16.730588975323435, "learning_rate": 2.623246959502795e-06, "loss": 0.4905, "step": 8224 }, { "epoch": 0.67, "grad_norm": 14.676263907689403, "learning_rate": 2.622089797547846e-06, "loss": 0.6832, "step": 8225 }, { "epoch": 0.67, "grad_norm": 14.253781704791074, "learning_rate": 2.6209328001592538e-06, "loss": 0.7666, "step": 8226 }, { "epoch": 0.67, "grad_norm": 33.8195551811475, "learning_rate": 2.619775967417096e-06, "loss": 0.7291, "step": 8227 }, { "epoch": 0.67, "grad_norm": 24.18578915126938, "learning_rate": 2.6186192994014238e-06, "loss": 0.722, "step": 8228 }, { "epoch": 0.67, "grad_norm": 14.377973655685794, "learning_rate": 2.6174627961922926e-06, "loss": 0.6936, "step": 8229 }, { "epoch": 0.67, "grad_norm": 32.98636212297213, "learning_rate": 2.6163064578697363e-06, "loss": 0.6515, "step": 8230 }, { "epoch": 0.67, "grad_norm": 9.386963435014797, "learning_rate": 2.615150284513783e-06, "loss": 0.7523, "step": 8231 }, { "epoch": 0.67, "grad_norm": 15.205331967586337, "learning_rate": 2.613994276204447e-06, "loss": 0.5983, "step": 8232 }, { "epoch": 0.67, "grad_norm": 28.6514188069815, "learning_rate": 2.6128384330217283e-06, "loss": 0.7293, "step": 8233 }, { "epoch": 0.67, "grad_norm": 17.831687296780995, "learning_rate": 2.6116827550456247e-06, "loss": 0.6784, "step": 8234 }, { "epoch": 0.67, "grad_norm": 19.516646057738622, "learning_rate": 2.610527242356109e-06, "loss": 0.779, "step": 8235 }, { "epoch": 0.67, "grad_norm": 33.49779404976335, "learning_rate": 2.609371895033156e-06, "loss": 0.8037, "step": 8236 }, { "epoch": 0.67, "grad_norm": 50.56590111142393, "learning_rate": 2.608216713156717e-06, "loss": 0.6721, "step": 8237 }, { "epoch": 0.67, "grad_norm": 49.68248270482413, "learning_rate": 2.6070616968067446e-06, "loss": 0.7132, "step": 8238 }, { "epoch": 0.67, "grad_norm": 19.86493983513108, "learning_rate": 2.60590684606317e-06, "loss": 0.6762, "step": 8239 }, { "epoch": 0.67, "grad_norm": 12.342508565066408, "learning_rate": 2.6047521610059153e-06, "loss": 0.7483, "step": 8240 }, { "epoch": 0.67, "grad_norm": 11.843376396440245, "learning_rate": 2.603597641714893e-06, "loss": 0.765, "step": 8241 }, { "epoch": 0.67, "grad_norm": 17.959175600565697, "learning_rate": 2.6024432882700012e-06, "loss": 0.7787, "step": 8242 }, { "epoch": 0.67, "grad_norm": 28.307255426551528, "learning_rate": 2.601289100751132e-06, "loss": 0.714, "step": 8243 }, { "epoch": 0.67, "grad_norm": 26.479581534129256, "learning_rate": 2.6001350792381587e-06, "loss": 0.8364, "step": 8244 }, { "epoch": 0.67, "grad_norm": 14.725010026610999, "learning_rate": 2.5989812238109504e-06, "loss": 0.6629, "step": 8245 }, { "epoch": 0.67, "grad_norm": 16.52815762956759, "learning_rate": 2.5978275345493577e-06, "loss": 0.7947, "step": 8246 }, { "epoch": 0.67, "grad_norm": 48.565615276067895, "learning_rate": 2.5966740115332283e-06, "loss": 0.6574, "step": 8247 }, { "epoch": 0.67, "grad_norm": 33.74968617743713, "learning_rate": 2.5955206548423867e-06, "loss": 0.6726, "step": 8248 }, { "epoch": 0.67, "grad_norm": 10.138348242999951, "learning_rate": 2.5943674645566576e-06, "loss": 0.8163, "step": 8249 }, { "epoch": 0.67, "grad_norm": 23.37531335896041, "learning_rate": 2.5932144407558468e-06, "loss": 0.8429, "step": 8250 }, { "epoch": 0.67, "grad_norm": 26.3693816235651, "learning_rate": 2.592061583519749e-06, "loss": 0.8271, "step": 8251 }, { "epoch": 0.67, "grad_norm": 17.781584712441497, "learning_rate": 2.5909088929281534e-06, "loss": 0.739, "step": 8252 }, { "epoch": 0.67, "grad_norm": 21.240776158318038, "learning_rate": 2.5897563690608307e-06, "loss": 0.8345, "step": 8253 }, { "epoch": 0.67, "grad_norm": 20.702491218654632, "learning_rate": 2.5886040119975443e-06, "loss": 0.7993, "step": 8254 }, { "epoch": 0.67, "grad_norm": 12.629814151892004, "learning_rate": 2.587451821818041e-06, "loss": 0.7768, "step": 8255 }, { "epoch": 0.67, "grad_norm": 40.903013276169, "learning_rate": 2.586299798602065e-06, "loss": 0.9042, "step": 8256 }, { "epoch": 0.67, "grad_norm": 10.475404344049656, "learning_rate": 2.5851479424293403e-06, "loss": 0.7024, "step": 8257 }, { "epoch": 0.67, "grad_norm": 13.456025969614958, "learning_rate": 2.5839962533795813e-06, "loss": 0.786, "step": 8258 }, { "epoch": 0.67, "grad_norm": 24.09518736140198, "learning_rate": 2.582844731532496e-06, "loss": 0.7525, "step": 8259 }, { "epoch": 0.67, "grad_norm": 16.993367992918316, "learning_rate": 2.5816933769677753e-06, "loss": 0.7025, "step": 8260 }, { "epoch": 0.67, "grad_norm": 9.65078052737918, "learning_rate": 2.5805421897650996e-06, "loss": 0.7227, "step": 8261 }, { "epoch": 0.67, "grad_norm": 33.898179329554765, "learning_rate": 2.5793911700041362e-06, "loss": 0.8241, "step": 8262 }, { "epoch": 0.67, "grad_norm": 12.844796282842156, "learning_rate": 2.578240317764548e-06, "loss": 0.8568, "step": 8263 }, { "epoch": 0.67, "grad_norm": 7.8673172758208185, "learning_rate": 2.5770896331259778e-06, "loss": 0.7153, "step": 8264 }, { "epoch": 0.67, "grad_norm": 14.084314347936505, "learning_rate": 2.5759391161680587e-06, "loss": 0.732, "step": 8265 }, { "epoch": 0.67, "grad_norm": 60.96104111663253, "learning_rate": 2.574788766970418e-06, "loss": 0.8572, "step": 8266 }, { "epoch": 0.67, "grad_norm": 7.003459601832733, "learning_rate": 2.5736385856126656e-06, "loss": 0.691, "step": 8267 }, { "epoch": 0.67, "grad_norm": 30.690852447772407, "learning_rate": 2.5724885721744e-06, "loss": 0.8399, "step": 8268 }, { "epoch": 0.67, "grad_norm": 6.949819403904213, "learning_rate": 2.5713387267352084e-06, "loss": 0.7524, "step": 8269 }, { "epoch": 0.67, "grad_norm": 8.421339360589064, "learning_rate": 2.570189049374671e-06, "loss": 0.6622, "step": 8270 }, { "epoch": 0.67, "grad_norm": 12.09286622812239, "learning_rate": 2.569039540172349e-06, "loss": 0.6285, "step": 8271 }, { "epoch": 0.67, "grad_norm": 21.914259386453125, "learning_rate": 2.5678901992077993e-06, "loss": 0.6467, "step": 8272 }, { "epoch": 0.67, "grad_norm": 17.869546336735226, "learning_rate": 2.566741026560562e-06, "loss": 0.6533, "step": 8273 }, { "epoch": 0.67, "grad_norm": 13.474181994400592, "learning_rate": 2.5655920223101662e-06, "loss": 0.7067, "step": 8274 }, { "epoch": 0.67, "grad_norm": 13.422399016321672, "learning_rate": 2.564443186536131e-06, "loss": 0.6449, "step": 8275 }, { "epoch": 0.67, "grad_norm": 10.905226227658213, "learning_rate": 2.5632945193179603e-06, "loss": 0.5227, "step": 8276 }, { "epoch": 0.67, "grad_norm": 7.384792416364311, "learning_rate": 2.562146020735154e-06, "loss": 0.8417, "step": 8277 }, { "epoch": 0.67, "grad_norm": 7.488766354034559, "learning_rate": 2.5609976908671906e-06, "loss": 0.7317, "step": 8278 }, { "epoch": 0.67, "grad_norm": 46.21264585044283, "learning_rate": 2.559849529793547e-06, "loss": 0.5848, "step": 8279 }, { "epoch": 0.67, "grad_norm": 14.41707088896261, "learning_rate": 2.55870153759368e-06, "loss": 0.7727, "step": 8280 }, { "epoch": 0.67, "grad_norm": 6.240576917652196, "learning_rate": 2.5575537143470386e-06, "loss": 0.6971, "step": 8281 }, { "epoch": 0.67, "grad_norm": 11.603423489036432, "learning_rate": 2.556406060133059e-06, "loss": 0.5985, "step": 8282 }, { "epoch": 0.67, "grad_norm": 7.499365229119412, "learning_rate": 2.555258575031164e-06, "loss": 0.6869, "step": 8283 }, { "epoch": 0.67, "grad_norm": 16.838022291498124, "learning_rate": 2.5541112591207705e-06, "loss": 0.6011, "step": 8284 }, { "epoch": 0.67, "grad_norm": 18.39698225368846, "learning_rate": 2.5529641124812776e-06, "loss": 0.7313, "step": 8285 }, { "epoch": 0.67, "grad_norm": 20.58441821433014, "learning_rate": 2.5518171351920773e-06, "loss": 0.7285, "step": 8286 }, { "epoch": 0.67, "grad_norm": 13.179078535201766, "learning_rate": 2.550670327332546e-06, "loss": 0.819, "step": 8287 }, { "epoch": 0.67, "grad_norm": 10.770960324872755, "learning_rate": 2.5495236889820507e-06, "loss": 0.6965, "step": 8288 }, { "epoch": 0.67, "grad_norm": 9.767263230814526, "learning_rate": 2.5483772202199452e-06, "loss": 0.7579, "step": 8289 }, { "epoch": 0.67, "grad_norm": 6.965736691152021, "learning_rate": 2.5472309211255707e-06, "loss": 0.7983, "step": 8290 }, { "epoch": 0.67, "grad_norm": 24.97808997137343, "learning_rate": 2.546084791778263e-06, "loss": 0.7247, "step": 8291 }, { "epoch": 0.67, "grad_norm": 12.992255972259166, "learning_rate": 2.5449388322573365e-06, "loss": 0.7102, "step": 8292 }, { "epoch": 0.67, "grad_norm": 12.308641024431868, "learning_rate": 2.5437930426421053e-06, "loss": 0.4962, "step": 8293 }, { "epoch": 0.67, "grad_norm": 9.471606044559692, "learning_rate": 2.542647423011857e-06, "loss": 0.7973, "step": 8294 }, { "epoch": 0.67, "grad_norm": 25.998663466745278, "learning_rate": 2.541501973445882e-06, "loss": 0.6297, "step": 8295 }, { "epoch": 0.67, "grad_norm": 9.288472817795983, "learning_rate": 2.540356694023448e-06, "loss": 0.6668, "step": 8296 }, { "epoch": 0.67, "grad_norm": 11.995650766777597, "learning_rate": 2.5392115848238203e-06, "loss": 0.673, "step": 8297 }, { "epoch": 0.67, "grad_norm": 7.736268342679664, "learning_rate": 2.538066645926245e-06, "loss": 0.7518, "step": 8298 }, { "epoch": 0.67, "grad_norm": 5.223080692719563, "learning_rate": 2.536921877409958e-06, "loss": 0.7897, "step": 8299 }, { "epoch": 0.67, "grad_norm": 8.73998769571859, "learning_rate": 2.535777279354189e-06, "loss": 0.7061, "step": 8300 }, { "epoch": 0.67, "grad_norm": 7.554764900826824, "learning_rate": 2.5346328518381447e-06, "loss": 0.5404, "step": 8301 }, { "epoch": 0.67, "grad_norm": 23.340008431974912, "learning_rate": 2.5334885949410327e-06, "loss": 0.7239, "step": 8302 }, { "epoch": 0.67, "grad_norm": 34.47352832458383, "learning_rate": 2.5323445087420385e-06, "loss": 0.5376, "step": 8303 }, { "epoch": 0.67, "grad_norm": 18.71218663547627, "learning_rate": 2.531200593320343e-06, "loss": 0.735, "step": 8304 }, { "epoch": 0.67, "grad_norm": 7.445078966527546, "learning_rate": 2.530056848755112e-06, "loss": 0.7634, "step": 8305 }, { "epoch": 0.67, "grad_norm": 14.618680343058326, "learning_rate": 2.5289132751254985e-06, "loss": 0.7824, "step": 8306 }, { "epoch": 0.67, "grad_norm": 7.306603566080601, "learning_rate": 2.5277698725106462e-06, "loss": 0.7184, "step": 8307 }, { "epoch": 0.67, "grad_norm": 14.592755503060456, "learning_rate": 2.526626640989683e-06, "loss": 0.5805, "step": 8308 }, { "epoch": 0.67, "grad_norm": 4.836907742020782, "learning_rate": 2.525483580641732e-06, "loss": 0.7413, "step": 8309 }, { "epoch": 0.67, "grad_norm": 10.70722702735294, "learning_rate": 2.524340691545896e-06, "loss": 0.7518, "step": 8310 }, { "epoch": 0.68, "grad_norm": 9.253993510084053, "learning_rate": 2.523197973781274e-06, "loss": 0.6853, "step": 8311 }, { "epoch": 0.68, "grad_norm": 7.9131644903090494, "learning_rate": 2.5220554274269475e-06, "loss": 0.6818, "step": 8312 }, { "epoch": 0.68, "grad_norm": 10.27759790776848, "learning_rate": 2.5209130525619884e-06, "loss": 0.5856, "step": 8313 }, { "epoch": 0.68, "grad_norm": 9.309877892345757, "learning_rate": 2.519770849265455e-06, "loss": 0.7758, "step": 8314 }, { "epoch": 0.68, "grad_norm": 14.283158577072696, "learning_rate": 2.518628817616394e-06, "loss": 0.7568, "step": 8315 }, { "epoch": 0.68, "grad_norm": 9.672996938056864, "learning_rate": 2.517486957693844e-06, "loss": 0.7764, "step": 8316 }, { "epoch": 0.68, "grad_norm": 31.567805309846364, "learning_rate": 2.516345269576827e-06, "loss": 0.7585, "step": 8317 }, { "epoch": 0.68, "grad_norm": 10.860002033694853, "learning_rate": 2.5152037533443575e-06, "loss": 0.5649, "step": 8318 }, { "epoch": 0.68, "grad_norm": 8.556735012495583, "learning_rate": 2.514062409075433e-06, "loss": 0.7482, "step": 8319 }, { "epoch": 0.68, "grad_norm": 12.074569219762061, "learning_rate": 2.512921236849043e-06, "loss": 0.6966, "step": 8320 }, { "epoch": 0.68, "grad_norm": 10.889438682207485, "learning_rate": 2.5117802367441613e-06, "loss": 0.5698, "step": 8321 }, { "epoch": 0.68, "grad_norm": 6.115744169201199, "learning_rate": 2.510639408839757e-06, "loss": 0.7425, "step": 8322 }, { "epoch": 0.68, "grad_norm": 6.761739507948528, "learning_rate": 2.5094987532147786e-06, "loss": 0.688, "step": 8323 }, { "epoch": 0.68, "grad_norm": 17.73763991038439, "learning_rate": 2.5083582699481667e-06, "loss": 0.6337, "step": 8324 }, { "epoch": 0.68, "grad_norm": 5.663564222221587, "learning_rate": 2.507217959118854e-06, "loss": 0.6352, "step": 8325 }, { "epoch": 0.68, "grad_norm": 332.42161985430135, "learning_rate": 2.5060778208057533e-06, "loss": 0.6817, "step": 8326 }, { "epoch": 0.68, "grad_norm": 8.030513051004808, "learning_rate": 2.50493785508777e-06, "loss": 0.6418, "step": 8327 }, { "epoch": 0.68, "grad_norm": 17.22949239095917, "learning_rate": 2.5037980620437963e-06, "loss": 0.4806, "step": 8328 }, { "epoch": 0.68, "grad_norm": 4.038479820147205, "learning_rate": 2.502658441752716e-06, "loss": 0.7588, "step": 8329 }, { "epoch": 0.68, "grad_norm": 11.106475270562205, "learning_rate": 2.501518994293396e-06, "loss": 0.5461, "step": 8330 }, { "epoch": 0.68, "grad_norm": 18.50909960720777, "learning_rate": 2.500379719744691e-06, "loss": 0.7187, "step": 8331 }, { "epoch": 0.68, "grad_norm": 13.828695655621319, "learning_rate": 2.499240618185451e-06, "loss": 0.7435, "step": 8332 }, { "epoch": 0.68, "grad_norm": 7.669078711104005, "learning_rate": 2.498101689694506e-06, "loss": 0.782, "step": 8333 }, { "epoch": 0.68, "grad_norm": 5.84590706742733, "learning_rate": 2.4969629343506767e-06, "loss": 0.8187, "step": 8334 }, { "epoch": 0.68, "grad_norm": 33.6666776788197, "learning_rate": 2.495824352232771e-06, "loss": 0.5788, "step": 8335 }, { "epoch": 0.68, "grad_norm": 3.593049303548369, "learning_rate": 2.4946859434195904e-06, "loss": 0.6803, "step": 8336 }, { "epoch": 0.68, "grad_norm": 8.795124251341287, "learning_rate": 2.4935477079899167e-06, "loss": 0.6579, "step": 8337 }, { "epoch": 0.68, "grad_norm": 25.447033559238918, "learning_rate": 2.4924096460225223e-06, "loss": 0.7124, "step": 8338 }, { "epoch": 0.68, "grad_norm": 7.060984452592301, "learning_rate": 2.4912717575961703e-06, "loss": 0.7204, "step": 8339 }, { "epoch": 0.68, "grad_norm": 13.048779600965632, "learning_rate": 2.4901340427896097e-06, "loss": 0.5793, "step": 8340 }, { "epoch": 0.68, "grad_norm": 6.823225016563455, "learning_rate": 2.4889965016815766e-06, "loss": 0.6722, "step": 8341 }, { "epoch": 0.68, "grad_norm": 25.295642173460735, "learning_rate": 2.487859134350794e-06, "loss": 0.6823, "step": 8342 }, { "epoch": 0.68, "grad_norm": 7.6884366581580315, "learning_rate": 2.4867219408759797e-06, "loss": 0.7397, "step": 8343 }, { "epoch": 0.68, "grad_norm": 3.6688460649343075, "learning_rate": 2.4855849213358314e-06, "loss": 0.6021, "step": 8344 }, { "epoch": 0.68, "grad_norm": 5.027713290032436, "learning_rate": 2.4844480758090366e-06, "loss": 0.6631, "step": 8345 }, { "epoch": 0.68, "grad_norm": 6.887070393685942, "learning_rate": 2.483311404374276e-06, "loss": 0.6126, "step": 8346 }, { "epoch": 0.68, "grad_norm": 50.70892855282612, "learning_rate": 2.4821749071102132e-06, "loss": 0.672, "step": 8347 }, { "epoch": 0.68, "grad_norm": 3.9507228051166736, "learning_rate": 2.4810385840955e-06, "loss": 0.6836, "step": 8348 }, { "epoch": 0.68, "grad_norm": 6.45342169485433, "learning_rate": 2.4799024354087758e-06, "loss": 0.7652, "step": 8349 }, { "epoch": 0.68, "grad_norm": 5.344406007275595, "learning_rate": 2.478766461128672e-06, "loss": 0.5764, "step": 8350 }, { "epoch": 0.68, "grad_norm": 6.187591748854834, "learning_rate": 2.477630661333803e-06, "loss": 0.6414, "step": 8351 }, { "epoch": 0.68, "grad_norm": 23.064222526382782, "learning_rate": 2.476495036102776e-06, "loss": 0.5848, "step": 8352 }, { "epoch": 0.68, "grad_norm": 8.4341490723611, "learning_rate": 2.475359585514182e-06, "loss": 0.803, "step": 8353 }, { "epoch": 0.68, "grad_norm": 18.23331533632398, "learning_rate": 2.474224309646601e-06, "loss": 0.6365, "step": 8354 }, { "epoch": 0.68, "grad_norm": 10.63785711862461, "learning_rate": 2.4730892085786018e-06, "loss": 0.607, "step": 8355 }, { "epoch": 0.68, "grad_norm": 4.905295188032726, "learning_rate": 2.4719542823887375e-06, "loss": 0.6336, "step": 8356 }, { "epoch": 0.68, "grad_norm": 4.571715588261021, "learning_rate": 2.470819531155557e-06, "loss": 0.633, "step": 8357 }, { "epoch": 0.68, "grad_norm": 3.080861161179966, "learning_rate": 2.4696849549575878e-06, "loss": 0.5545, "step": 8358 }, { "epoch": 0.68, "grad_norm": 5.551542271881336, "learning_rate": 2.4685505538733562e-06, "loss": 0.7473, "step": 8359 }, { "epoch": 0.68, "grad_norm": 8.773371456743488, "learning_rate": 2.4674163279813617e-06, "loss": 0.7983, "step": 8360 }, { "epoch": 0.68, "grad_norm": 12.303284594775095, "learning_rate": 2.4662822773601055e-06, "loss": 0.822, "step": 8361 }, { "epoch": 0.68, "grad_norm": 4.432959065770363, "learning_rate": 2.465148402088069e-06, "loss": 0.5756, "step": 8362 }, { "epoch": 0.68, "grad_norm": 9.635145768457829, "learning_rate": 2.464014702243722e-06, "loss": 0.5492, "step": 8363 }, { "epoch": 0.68, "grad_norm": 4.632664306787909, "learning_rate": 2.4628811779055277e-06, "loss": 0.7039, "step": 8364 }, { "epoch": 0.68, "grad_norm": 9.380741959344734, "learning_rate": 2.461747829151929e-06, "loss": 0.6809, "step": 8365 }, { "epoch": 0.68, "grad_norm": 4.6158743438646175, "learning_rate": 2.4606146560613663e-06, "loss": 0.6149, "step": 8366 }, { "epoch": 0.68, "grad_norm": 4.733452003609559, "learning_rate": 2.4594816587122557e-06, "loss": 0.5411, "step": 8367 }, { "epoch": 0.68, "grad_norm": 4.018816770527788, "learning_rate": 2.4583488371830115e-06, "loss": 0.6974, "step": 8368 }, { "epoch": 0.68, "grad_norm": 4.632716556586067, "learning_rate": 2.457216191552032e-06, "loss": 0.5839, "step": 8369 }, { "epoch": 0.68, "grad_norm": 11.8896188836641, "learning_rate": 2.4560837218977006e-06, "loss": 0.6084, "step": 8370 }, { "epoch": 0.68, "grad_norm": 2.945213617712697, "learning_rate": 2.454951428298395e-06, "loss": 0.7008, "step": 8371 }, { "epoch": 0.68, "grad_norm": 7.114222287552107, "learning_rate": 2.4538193108324742e-06, "loss": 0.5078, "step": 8372 }, { "epoch": 0.68, "grad_norm": 5.577578606011007, "learning_rate": 2.4526873695782928e-06, "loss": 0.6545, "step": 8373 }, { "epoch": 0.68, "grad_norm": 6.516088559992444, "learning_rate": 2.451555604614181e-06, "loss": 0.6411, "step": 8374 }, { "epoch": 0.68, "grad_norm": 4.301408951108873, "learning_rate": 2.450424016018469e-06, "loss": 0.6349, "step": 8375 }, { "epoch": 0.68, "grad_norm": 10.600475921349892, "learning_rate": 2.449292603869467e-06, "loss": 0.6219, "step": 8376 }, { "epoch": 0.68, "grad_norm": 5.393286904023358, "learning_rate": 2.4481613682454796e-06, "loss": 0.8101, "step": 8377 }, { "epoch": 0.68, "grad_norm": 4.77895276643497, "learning_rate": 2.4470303092247926e-06, "loss": 0.6838, "step": 8378 }, { "epoch": 0.68, "grad_norm": 13.797426485172773, "learning_rate": 2.4458994268856835e-06, "loss": 0.6073, "step": 8379 }, { "epoch": 0.68, "grad_norm": 10.2079912010546, "learning_rate": 2.4447687213064157e-06, "loss": 0.6675, "step": 8380 }, { "epoch": 0.68, "grad_norm": 6.90362430005197, "learning_rate": 2.4436381925652397e-06, "loss": 0.6694, "step": 8381 }, { "epoch": 0.68, "grad_norm": 10.103958531934417, "learning_rate": 2.442507840740399e-06, "loss": 0.6136, "step": 8382 }, { "epoch": 0.68, "grad_norm": 9.164638555755664, "learning_rate": 2.4413776659101172e-06, "loss": 0.5395, "step": 8383 }, { "epoch": 0.68, "grad_norm": 21.06306834013, "learning_rate": 2.4402476681526125e-06, "loss": 0.8171, "step": 8384 }, { "epoch": 0.68, "grad_norm": 9.470979333296741, "learning_rate": 2.4391178475460873e-06, "loss": 0.635, "step": 8385 }, { "epoch": 0.68, "grad_norm": 5.208569615915241, "learning_rate": 2.437988204168732e-06, "loss": 0.7552, "step": 8386 }, { "epoch": 0.68, "grad_norm": 4.93668510208142, "learning_rate": 2.4368587380987246e-06, "loss": 0.6462, "step": 8387 }, { "epoch": 0.68, "grad_norm": 5.213279772036837, "learning_rate": 2.435729449414229e-06, "loss": 0.6508, "step": 8388 }, { "epoch": 0.68, "grad_norm": 4.520355371713811, "learning_rate": 2.4346003381934036e-06, "loss": 0.5953, "step": 8389 }, { "epoch": 0.68, "grad_norm": 8.457956602423597, "learning_rate": 2.433471404514386e-06, "loss": 0.5243, "step": 8390 }, { "epoch": 0.68, "grad_norm": 4.307006121715574, "learning_rate": 2.432342648455309e-06, "loss": 0.5886, "step": 8391 }, { "epoch": 0.68, "grad_norm": 5.659180606121157, "learning_rate": 2.431214070094289e-06, "loss": 0.6104, "step": 8392 }, { "epoch": 0.68, "grad_norm": 7.20484361301813, "learning_rate": 2.4300856695094287e-06, "loss": 0.7232, "step": 8393 }, { "epoch": 0.68, "grad_norm": 5.387265914547474, "learning_rate": 2.42895744677882e-06, "loss": 0.6597, "step": 8394 }, { "epoch": 0.68, "grad_norm": 9.139932113045544, "learning_rate": 2.427829401980547e-06, "loss": 0.8518, "step": 8395 }, { "epoch": 0.68, "grad_norm": 6.4238743651347585, "learning_rate": 2.4267015351926747e-06, "loss": 0.7006, "step": 8396 }, { "epoch": 0.68, "grad_norm": 12.991008989031487, "learning_rate": 2.4255738464932573e-06, "loss": 0.7193, "step": 8397 }, { "epoch": 0.68, "grad_norm": 5.529679732086161, "learning_rate": 2.4244463359603415e-06, "loss": 0.5918, "step": 8398 }, { "epoch": 0.68, "grad_norm": 4.677988042702766, "learning_rate": 2.423319003671956e-06, "loss": 0.5842, "step": 8399 }, { "epoch": 0.68, "grad_norm": 3.5476273423304305, "learning_rate": 2.42219184970612e-06, "loss": 0.6553, "step": 8400 }, { "epoch": 0.68, "grad_norm": 6.158805973732745, "learning_rate": 2.4210648741408364e-06, "loss": 0.6194, "step": 8401 }, { "epoch": 0.68, "grad_norm": 5.538379827666495, "learning_rate": 2.419938077054105e-06, "loss": 0.7514, "step": 8402 }, { "epoch": 0.68, "grad_norm": 4.258544415595995, "learning_rate": 2.418811458523903e-06, "loss": 0.6597, "step": 8403 }, { "epoch": 0.68, "grad_norm": 9.909075842941341, "learning_rate": 2.4176850186281993e-06, "loss": 0.7525, "step": 8404 }, { "epoch": 0.68, "grad_norm": 9.061979521252233, "learning_rate": 2.4165587574449533e-06, "loss": 0.6153, "step": 8405 }, { "epoch": 0.68, "grad_norm": 5.8261232271363, "learning_rate": 2.4154326750521084e-06, "loss": 0.7194, "step": 8406 }, { "epoch": 0.68, "grad_norm": 4.063607746638771, "learning_rate": 2.4143067715275965e-06, "loss": 0.6049, "step": 8407 }, { "epoch": 0.68, "grad_norm": 8.94359803917466, "learning_rate": 2.4131810469493343e-06, "loss": 0.6781, "step": 8408 }, { "epoch": 0.68, "grad_norm": 3.6059435897762584, "learning_rate": 2.412055501395234e-06, "loss": 0.7044, "step": 8409 }, { "epoch": 0.68, "grad_norm": 8.089435314273251, "learning_rate": 2.410930134943187e-06, "loss": 0.6873, "step": 8410 }, { "epoch": 0.68, "grad_norm": 4.202930531359204, "learning_rate": 2.4098049476710767e-06, "loss": 0.6739, "step": 8411 }, { "epoch": 0.68, "grad_norm": 9.240341042382903, "learning_rate": 2.4086799396567755e-06, "loss": 0.6435, "step": 8412 }, { "epoch": 0.68, "grad_norm": 29.518656371527673, "learning_rate": 2.407555110978136e-06, "loss": 0.7528, "step": 8413 }, { "epoch": 0.68, "grad_norm": 6.564930486049343, "learning_rate": 2.4064304617130076e-06, "loss": 0.5093, "step": 8414 }, { "epoch": 0.68, "grad_norm": 5.92270354347785, "learning_rate": 2.4053059919392197e-06, "loss": 0.6441, "step": 8415 }, { "epoch": 0.68, "grad_norm": 4.682227112888556, "learning_rate": 2.4041817017345963e-06, "loss": 0.6611, "step": 8416 }, { "epoch": 0.68, "grad_norm": 7.132099312397348, "learning_rate": 2.4030575911769443e-06, "loss": 0.6452, "step": 8417 }, { "epoch": 0.68, "grad_norm": 5.271616264140666, "learning_rate": 2.4019336603440567e-06, "loss": 0.6882, "step": 8418 }, { "epoch": 0.68, "grad_norm": 3.866446913623662, "learning_rate": 2.400809909313721e-06, "loss": 0.608, "step": 8419 }, { "epoch": 0.68, "grad_norm": 4.430563855304474, "learning_rate": 2.3996863381637046e-06, "loss": 0.5782, "step": 8420 }, { "epoch": 0.68, "grad_norm": 5.0549862715664595, "learning_rate": 2.398562946971767e-06, "loss": 0.7621, "step": 8421 }, { "epoch": 0.68, "grad_norm": 6.318287326484502, "learning_rate": 2.3974397358156516e-06, "loss": 0.763, "step": 8422 }, { "epoch": 0.68, "grad_norm": 8.306378489138103, "learning_rate": 2.396316704773095e-06, "loss": 0.4981, "step": 8423 }, { "epoch": 0.68, "grad_norm": 8.65721648549689, "learning_rate": 2.395193853921815e-06, "loss": 0.6421, "step": 8424 }, { "epoch": 0.68, "grad_norm": 5.554766796455805, "learning_rate": 2.394071183339523e-06, "loss": 0.6486, "step": 8425 }, { "epoch": 0.68, "grad_norm": 5.340749926459876, "learning_rate": 2.3929486931039143e-06, "loss": 0.5543, "step": 8426 }, { "epoch": 0.68, "grad_norm": 9.19554677699654, "learning_rate": 2.391826383292671e-06, "loss": 0.8047, "step": 8427 }, { "epoch": 0.68, "grad_norm": 6.122861600085072, "learning_rate": 2.390704253983464e-06, "loss": 0.6904, "step": 8428 }, { "epoch": 0.68, "grad_norm": 7.113127938262337, "learning_rate": 2.3895823052539503e-06, "loss": 0.658, "step": 8429 }, { "epoch": 0.68, "grad_norm": 10.604292520170947, "learning_rate": 2.38846053718178e-06, "loss": 0.4966, "step": 8430 }, { "epoch": 0.68, "grad_norm": 4.188822387185473, "learning_rate": 2.3873389498445814e-06, "loss": 0.5807, "step": 8431 }, { "epoch": 0.68, "grad_norm": 5.201203351394706, "learning_rate": 2.3862175433199823e-06, "loss": 0.6183, "step": 8432 }, { "epoch": 0.68, "grad_norm": 4.955220540322817, "learning_rate": 2.3850963176855833e-06, "loss": 0.6121, "step": 8433 }, { "epoch": 0.69, "grad_norm": 45.505539801826345, "learning_rate": 2.383975273018986e-06, "loss": 0.6639, "step": 8434 }, { "epoch": 0.69, "grad_norm": 3.9689423829973576, "learning_rate": 2.382854409397772e-06, "loss": 0.7081, "step": 8435 }, { "epoch": 0.69, "grad_norm": 6.7115536062502095, "learning_rate": 2.381733726899509e-06, "loss": 0.6311, "step": 8436 }, { "epoch": 0.69, "grad_norm": 8.730370587025364, "learning_rate": 2.3806132256017607e-06, "loss": 0.5415, "step": 8437 }, { "epoch": 0.69, "grad_norm": 14.413680394853325, "learning_rate": 2.3794929055820677e-06, "loss": 0.5518, "step": 8438 }, { "epoch": 0.69, "grad_norm": 4.719599601542386, "learning_rate": 2.3783727669179695e-06, "loss": 0.6229, "step": 8439 }, { "epoch": 0.69, "grad_norm": 10.074699043363571, "learning_rate": 2.3772528096869796e-06, "loss": 0.6762, "step": 8440 }, { "epoch": 0.69, "grad_norm": 11.543848847790601, "learning_rate": 2.376133033966611e-06, "loss": 0.7442, "step": 8441 }, { "epoch": 0.69, "grad_norm": 6.107740359912297, "learning_rate": 2.375013439834358e-06, "loss": 0.7017, "step": 8442 }, { "epoch": 0.69, "grad_norm": 29.291324378564525, "learning_rate": 2.3738940273677007e-06, "loss": 0.7695, "step": 8443 }, { "epoch": 0.69, "grad_norm": 5.929525708405646, "learning_rate": 2.3727747966441144e-06, "loss": 0.5682, "step": 8444 }, { "epoch": 0.69, "grad_norm": 6.302905681186479, "learning_rate": 2.371655747741053e-06, "loss": 0.7086, "step": 8445 }, { "epoch": 0.69, "grad_norm": 5.272121119177552, "learning_rate": 2.370536880735967e-06, "loss": 0.5548, "step": 8446 }, { "epoch": 0.69, "grad_norm": 7.558809159287319, "learning_rate": 2.3694181957062812e-06, "loss": 0.7837, "step": 8447 }, { "epoch": 0.69, "grad_norm": 5.780219238769598, "learning_rate": 2.3682996927294216e-06, "loss": 0.6779, "step": 8448 }, { "epoch": 0.69, "grad_norm": 3.5837353857638967, "learning_rate": 2.367181371882792e-06, "loss": 0.5634, "step": 8449 }, { "epoch": 0.69, "grad_norm": 4.898956436274996, "learning_rate": 2.366063233243791e-06, "loss": 0.7431, "step": 8450 }, { "epoch": 0.69, "grad_norm": 6.73302771299475, "learning_rate": 2.364945276889799e-06, "loss": 0.6452, "step": 8451 }, { "epoch": 0.69, "grad_norm": 8.296626556600119, "learning_rate": 2.3638275028981854e-06, "loss": 0.7699, "step": 8452 }, { "epoch": 0.69, "grad_norm": 5.636523699326703, "learning_rate": 2.362709911346307e-06, "loss": 0.6516, "step": 8453 }, { "epoch": 0.69, "grad_norm": 5.316881936498307, "learning_rate": 2.361592502311507e-06, "loss": 0.8503, "step": 8454 }, { "epoch": 0.69, "grad_norm": 12.2817977397594, "learning_rate": 2.3604752758711207e-06, "loss": 0.8209, "step": 8455 }, { "epoch": 0.69, "grad_norm": 2.9092249999626363, "learning_rate": 2.3593582321024625e-06, "loss": 0.6413, "step": 8456 }, { "epoch": 0.69, "grad_norm": 7.328579796523897, "learning_rate": 2.3582413710828445e-06, "loss": 0.6957, "step": 8457 }, { "epoch": 0.69, "grad_norm": 20.932373350132284, "learning_rate": 2.357124692889556e-06, "loss": 0.5434, "step": 8458 }, { "epoch": 0.69, "grad_norm": 3.293253765874854, "learning_rate": 2.356008197599881e-06, "loss": 0.6483, "step": 8459 }, { "epoch": 0.69, "grad_norm": 5.9979749443588055, "learning_rate": 2.354891885291086e-06, "loss": 0.7811, "step": 8460 }, { "epoch": 0.69, "grad_norm": 8.280738057088614, "learning_rate": 2.3537757560404263e-06, "loss": 0.6826, "step": 8461 }, { "epoch": 0.69, "grad_norm": 5.9218038486902955, "learning_rate": 2.3526598099251473e-06, "loss": 0.555, "step": 8462 }, { "epoch": 0.69, "grad_norm": 5.827334751349018, "learning_rate": 2.3515440470224778e-06, "loss": 0.596, "step": 8463 }, { "epoch": 0.69, "grad_norm": 29.19993497652659, "learning_rate": 2.3504284674096366e-06, "loss": 0.6562, "step": 8464 }, { "epoch": 0.69, "grad_norm": 9.29031311426116, "learning_rate": 2.3493130711638295e-06, "loss": 0.6663, "step": 8465 }, { "epoch": 0.69, "grad_norm": 9.578376961952015, "learning_rate": 2.348197858362248e-06, "loss": 0.7251, "step": 8466 }, { "epoch": 0.69, "grad_norm": 3.4244944882291874, "learning_rate": 2.347082829082072e-06, "loss": 0.6724, "step": 8467 }, { "epoch": 0.69, "grad_norm": 83.68547392394662, "learning_rate": 2.345967983400466e-06, "loss": 0.5521, "step": 8468 }, { "epoch": 0.69, "grad_norm": 3.8595389978510886, "learning_rate": 2.3448533213945884e-06, "loss": 0.5407, "step": 8469 }, { "epoch": 0.69, "grad_norm": 6.834497654367603, "learning_rate": 2.3437388431415774e-06, "loss": 0.7813, "step": 8470 }, { "epoch": 0.69, "grad_norm": 3.861775158801616, "learning_rate": 2.3426245487185663e-06, "loss": 0.5242, "step": 8471 }, { "epoch": 0.69, "grad_norm": 9.60543108929251, "learning_rate": 2.3415104382026678e-06, "loss": 0.6846, "step": 8472 }, { "epoch": 0.69, "grad_norm": 21.80984637096357, "learning_rate": 2.3403965116709863e-06, "loss": 0.66, "step": 8473 }, { "epoch": 0.69, "grad_norm": 3.143262241399748, "learning_rate": 2.339282769200611e-06, "loss": 0.8833, "step": 8474 }, { "epoch": 0.69, "grad_norm": 9.656634210889026, "learning_rate": 2.338169210868623e-06, "loss": 0.6943, "step": 8475 }, { "epoch": 0.69, "grad_norm": 4.391053079549587, "learning_rate": 2.3370558367520856e-06, "loss": 0.716, "step": 8476 }, { "epoch": 0.69, "grad_norm": 4.3907963189668795, "learning_rate": 2.3359426469280507e-06, "loss": 0.5086, "step": 8477 }, { "epoch": 0.69, "grad_norm": 2.6999854148092344, "learning_rate": 2.3348296414735595e-06, "loss": 0.6079, "step": 8478 }, { "epoch": 0.69, "grad_norm": 6.533069419882555, "learning_rate": 2.3337168204656392e-06, "loss": 0.7044, "step": 8479 }, { "epoch": 0.69, "grad_norm": 2.978556990570742, "learning_rate": 2.332604183981303e-06, "loss": 0.5809, "step": 8480 }, { "epoch": 0.69, "grad_norm": 6.490537664901921, "learning_rate": 2.3314917320975504e-06, "loss": 0.6561, "step": 8481 }, { "epoch": 0.69, "grad_norm": 7.402779595205896, "learning_rate": 2.3303794648913745e-06, "loss": 0.6203, "step": 8482 }, { "epoch": 0.69, "grad_norm": 2.7463131735289066, "learning_rate": 2.329267382439749e-06, "loss": 0.6419, "step": 8483 }, { "epoch": 0.69, "grad_norm": 8.098558513650591, "learning_rate": 2.3281554848196347e-06, "loss": 0.6464, "step": 8484 }, { "epoch": 0.69, "grad_norm": 7.228655708504801, "learning_rate": 2.3270437721079885e-06, "loss": 0.6681, "step": 8485 }, { "epoch": 0.69, "grad_norm": 10.982143389579852, "learning_rate": 2.3259322443817397e-06, "loss": 0.7015, "step": 8486 }, { "epoch": 0.69, "grad_norm": 14.438136988785494, "learning_rate": 2.3248209017178186e-06, "loss": 0.722, "step": 8487 }, { "epoch": 0.69, "grad_norm": 3.668595189596921, "learning_rate": 2.3237097441931333e-06, "loss": 0.8091, "step": 8488 }, { "epoch": 0.69, "grad_norm": 10.528631488265987, "learning_rate": 2.3225987718845873e-06, "loss": 0.7232, "step": 8489 }, { "epoch": 0.69, "grad_norm": 16.643916404353256, "learning_rate": 2.321487984869064e-06, "loss": 0.5179, "step": 8490 }, { "epoch": 0.69, "grad_norm": 3.201848526256978, "learning_rate": 2.3203773832234368e-06, "loss": 0.7617, "step": 8491 }, { "epoch": 0.69, "grad_norm": 8.002247498385998, "learning_rate": 2.319266967024569e-06, "loss": 0.6016, "step": 8492 }, { "epoch": 0.69, "grad_norm": 5.659220661693019, "learning_rate": 2.318156736349304e-06, "loss": 0.733, "step": 8493 }, { "epoch": 0.69, "grad_norm": 3.534320849692937, "learning_rate": 2.317046691274481e-06, "loss": 0.745, "step": 8494 }, { "epoch": 0.69, "grad_norm": 3.2198993212320612, "learning_rate": 2.3159368318769176e-06, "loss": 0.5961, "step": 8495 }, { "epoch": 0.69, "grad_norm": 3.789454027844675, "learning_rate": 2.314827158233428e-06, "loss": 0.8784, "step": 8496 }, { "epoch": 0.69, "grad_norm": 10.409392099085872, "learning_rate": 2.313717670420804e-06, "loss": 0.5898, "step": 8497 }, { "epoch": 0.69, "grad_norm": 4.23445199585428, "learning_rate": 2.312608368515834e-06, "loss": 0.7891, "step": 8498 }, { "epoch": 0.69, "grad_norm": 4.530778679043598, "learning_rate": 2.3114992525952855e-06, "loss": 0.5663, "step": 8499 }, { "epoch": 0.69, "grad_norm": 6.263107656925747, "learning_rate": 2.3103903227359177e-06, "loss": 0.772, "step": 8500 }, { "epoch": 0.69, "grad_norm": 3.9271039211966627, "learning_rate": 2.309281579014474e-06, "loss": 0.7443, "step": 8501 }, { "epoch": 0.69, "grad_norm": 7.8495665083021064, "learning_rate": 2.3081730215076853e-06, "loss": 0.6638, "step": 8502 }, { "epoch": 0.69, "grad_norm": 10.832249253696789, "learning_rate": 2.307064650292275e-06, "loss": 0.6766, "step": 8503 }, { "epoch": 0.69, "grad_norm": 2.9341253765305284, "learning_rate": 2.305956465444945e-06, "loss": 0.632, "step": 8504 }, { "epoch": 0.69, "grad_norm": 5.825577665956119, "learning_rate": 2.304848467042394e-06, "loss": 0.6561, "step": 8505 }, { "epoch": 0.69, "grad_norm": 6.264117929053284, "learning_rate": 2.303740655161296e-06, "loss": 0.7126, "step": 8506 }, { "epoch": 0.69, "grad_norm": 3.0985761816714943, "learning_rate": 2.3026330298783232e-06, "loss": 0.6879, "step": 8507 }, { "epoch": 0.69, "grad_norm": 10.111526910074229, "learning_rate": 2.301525591270129e-06, "loss": 0.6291, "step": 8508 }, { "epoch": 0.69, "grad_norm": 6.436197761418853, "learning_rate": 2.3004183394133535e-06, "loss": 0.775, "step": 8509 }, { "epoch": 0.69, "grad_norm": 6.226197413334194, "learning_rate": 2.299311274384628e-06, "loss": 0.8087, "step": 8510 }, { "epoch": 0.69, "grad_norm": 8.275333382510041, "learning_rate": 2.2982043962605653e-06, "loss": 0.6988, "step": 8511 }, { "epoch": 0.69, "grad_norm": 3.4940877325906765, "learning_rate": 2.2970977051177745e-06, "loss": 0.7818, "step": 8512 }, { "epoch": 0.69, "grad_norm": 7.997752836258018, "learning_rate": 2.2959912010328372e-06, "loss": 0.5971, "step": 8513 }, { "epoch": 0.69, "grad_norm": 14.66546642836317, "learning_rate": 2.2948848840823367e-06, "loss": 0.6045, "step": 8514 }, { "epoch": 0.69, "grad_norm": 11.35736695368174, "learning_rate": 2.293778754342835e-06, "loss": 0.4988, "step": 8515 }, { "epoch": 0.69, "grad_norm": 4.349383743325977, "learning_rate": 2.292672811890882e-06, "loss": 0.5822, "step": 8516 }, { "epoch": 0.69, "grad_norm": 12.173691667256804, "learning_rate": 2.2915670568030183e-06, "loss": 0.5628, "step": 8517 }, { "epoch": 0.69, "grad_norm": 2.5985034630801134, "learning_rate": 2.290461489155768e-06, "loss": 0.668, "step": 8518 }, { "epoch": 0.69, "grad_norm": 12.878706968707768, "learning_rate": 2.289356109025644e-06, "loss": 0.5932, "step": 8519 }, { "epoch": 0.69, "grad_norm": 16.124017807522456, "learning_rate": 2.288250916489142e-06, "loss": 0.8018, "step": 8520 }, { "epoch": 0.69, "grad_norm": 8.702467351221129, "learning_rate": 2.287145911622754e-06, "loss": 0.7048, "step": 8521 }, { "epoch": 0.69, "grad_norm": 3.43799033040984, "learning_rate": 2.2860410945029483e-06, "loss": 0.4545, "step": 8522 }, { "epoch": 0.69, "grad_norm": 10.593057633909131, "learning_rate": 2.284936465206189e-06, "loss": 0.6866, "step": 8523 }, { "epoch": 0.69, "grad_norm": 7.94601214930685, "learning_rate": 2.283832023808922e-06, "loss": 0.6446, "step": 8524 }, { "epoch": 0.69, "grad_norm": 6.228455379869982, "learning_rate": 2.2827277703875806e-06, "loss": 0.6375, "step": 8525 }, { "epoch": 0.69, "grad_norm": 9.453672403257391, "learning_rate": 2.2816237050185875e-06, "loss": 0.625, "step": 8526 }, { "epoch": 0.69, "grad_norm": 3.593239479790527, "learning_rate": 2.2805198277783484e-06, "loss": 0.6203, "step": 8527 }, { "epoch": 0.69, "grad_norm": 4.639895124065902, "learning_rate": 2.279416138743262e-06, "loss": 0.6008, "step": 8528 }, { "epoch": 0.69, "grad_norm": 8.612577966601357, "learning_rate": 2.278312637989708e-06, "loss": 0.6895, "step": 8529 }, { "epoch": 0.69, "grad_norm": 6.034300664018397, "learning_rate": 2.277209325594058e-06, "loss": 0.6863, "step": 8530 }, { "epoch": 0.69, "grad_norm": 3.1526826458246653, "learning_rate": 2.2761062016326667e-06, "loss": 0.7628, "step": 8531 }, { "epoch": 0.69, "grad_norm": 5.060198575844125, "learning_rate": 2.275003266181877e-06, "loss": 0.6354, "step": 8532 }, { "epoch": 0.69, "grad_norm": 4.317361763016478, "learning_rate": 2.2739005193180196e-06, "loss": 0.7614, "step": 8533 }, { "epoch": 0.69, "grad_norm": 9.294788949003523, "learning_rate": 2.2727979611174096e-06, "loss": 0.7102, "step": 8534 }, { "epoch": 0.69, "grad_norm": 3.093610412782277, "learning_rate": 2.2716955916563544e-06, "loss": 0.6581, "step": 8535 }, { "epoch": 0.69, "grad_norm": 4.795755252504728, "learning_rate": 2.270593411011141e-06, "loss": 0.7487, "step": 8536 }, { "epoch": 0.69, "grad_norm": 4.1270339416754664, "learning_rate": 2.2694914192580506e-06, "loss": 0.5363, "step": 8537 }, { "epoch": 0.69, "grad_norm": 2.330987429467569, "learning_rate": 2.2683896164733476e-06, "loss": 0.649, "step": 8538 }, { "epoch": 0.69, "grad_norm": 22.83871459355381, "learning_rate": 2.267288002733283e-06, "loss": 0.7177, "step": 8539 }, { "epoch": 0.69, "grad_norm": 3.4255811193506145, "learning_rate": 2.266186578114094e-06, "loss": 0.6266, "step": 8540 }, { "epoch": 0.69, "grad_norm": 6.116036650768977, "learning_rate": 2.2650853426920065e-06, "loss": 0.6037, "step": 8541 }, { "epoch": 0.69, "grad_norm": 5.610822245688341, "learning_rate": 2.2639842965432353e-06, "loss": 0.5242, "step": 8542 }, { "epoch": 0.69, "grad_norm": 5.34312530611171, "learning_rate": 2.262883439743976e-06, "loss": 0.6515, "step": 8543 }, { "epoch": 0.69, "grad_norm": 4.056419835352032, "learning_rate": 2.261782772370419e-06, "loss": 0.5427, "step": 8544 }, { "epoch": 0.69, "grad_norm": 5.037809901717693, "learning_rate": 2.2606822944987357e-06, "loss": 0.7306, "step": 8545 }, { "epoch": 0.69, "grad_norm": 57.07562588312198, "learning_rate": 2.2595820062050854e-06, "loss": 0.6369, "step": 8546 }, { "epoch": 0.69, "grad_norm": 3.970436896810602, "learning_rate": 2.258481907565613e-06, "loss": 0.6802, "step": 8547 }, { "epoch": 0.69, "grad_norm": 4.020054873050856, "learning_rate": 2.2573819986564576e-06, "loss": 0.638, "step": 8548 }, { "epoch": 0.69, "grad_norm": 6.9172050748144684, "learning_rate": 2.2562822795537364e-06, "loss": 0.5828, "step": 8549 }, { "epoch": 0.69, "grad_norm": 4.212179949032205, "learning_rate": 2.2551827503335556e-06, "loss": 0.7656, "step": 8550 }, { "epoch": 0.69, "grad_norm": 4.981269966080089, "learning_rate": 2.254083411072013e-06, "loss": 0.7586, "step": 8551 }, { "epoch": 0.69, "grad_norm": 4.107953207312299, "learning_rate": 2.252984261845188e-06, "loss": 0.6383, "step": 8552 }, { "epoch": 0.69, "grad_norm": 4.2747194570202565, "learning_rate": 2.2518853027291487e-06, "loss": 0.7714, "step": 8553 }, { "epoch": 0.69, "grad_norm": 3.5880206085975415, "learning_rate": 2.250786533799948e-06, "loss": 0.7179, "step": 8554 }, { "epoch": 0.69, "grad_norm": 3.37566109180717, "learning_rate": 2.249687955133632e-06, "loss": 0.6726, "step": 8555 }, { "epoch": 0.69, "grad_norm": 4.778730200727405, "learning_rate": 2.2485895668062263e-06, "loss": 0.5623, "step": 8556 }, { "epoch": 0.69, "grad_norm": 3.3290876360003567, "learning_rate": 2.2474913688937457e-06, "loss": 0.505, "step": 8557 }, { "epoch": 0.7, "grad_norm": 7.21595884769128, "learning_rate": 2.2463933614721965e-06, "loss": 0.7445, "step": 8558 }, { "epoch": 0.7, "grad_norm": 6.541253184190403, "learning_rate": 2.245295544617562e-06, "loss": 0.6479, "step": 8559 }, { "epoch": 0.7, "grad_norm": 5.15514891370725, "learning_rate": 2.2441979184058223e-06, "loss": 0.4837, "step": 8560 }, { "epoch": 0.7, "grad_norm": 4.535841138822577, "learning_rate": 2.2431004829129368e-06, "loss": 0.6225, "step": 8561 }, { "epoch": 0.7, "grad_norm": 8.495294118664097, "learning_rate": 2.2420032382148584e-06, "loss": 0.6908, "step": 8562 }, { "epoch": 0.7, "grad_norm": 6.302123796596489, "learning_rate": 2.240906184387522e-06, "loss": 0.8441, "step": 8563 }, { "epoch": 0.7, "grad_norm": 4.272287677467053, "learning_rate": 2.239809321506848e-06, "loss": 0.6411, "step": 8564 }, { "epoch": 0.7, "grad_norm": 4.84266452244706, "learning_rate": 2.2387126496487526e-06, "loss": 0.679, "step": 8565 }, { "epoch": 0.7, "grad_norm": 3.412564497109921, "learning_rate": 2.2376161688891247e-06, "loss": 0.5688, "step": 8566 }, { "epoch": 0.7, "grad_norm": 5.565136612770939, "learning_rate": 2.2365198793038526e-06, "loss": 0.532, "step": 8567 }, { "epoch": 0.7, "grad_norm": 5.134678779141866, "learning_rate": 2.2354237809688038e-06, "loss": 0.609, "step": 8568 }, { "epoch": 0.7, "grad_norm": 3.903154857176654, "learning_rate": 2.234327873959839e-06, "loss": 0.7834, "step": 8569 }, { "epoch": 0.7, "grad_norm": 26.88418753654049, "learning_rate": 2.233232158352799e-06, "loss": 0.5264, "step": 8570 }, { "epoch": 0.7, "grad_norm": 6.640729250089596, "learning_rate": 2.2321366342235124e-06, "loss": 0.793, "step": 8571 }, { "epoch": 0.7, "grad_norm": 5.552826160781963, "learning_rate": 2.2310413016478003e-06, "loss": 0.6787, "step": 8572 }, { "epoch": 0.7, "grad_norm": 3.1976662784308076, "learning_rate": 2.2299461607014654e-06, "loss": 0.6362, "step": 8573 }, { "epoch": 0.7, "grad_norm": 3.83499189961259, "learning_rate": 2.2288512114602986e-06, "loss": 0.658, "step": 8574 }, { "epoch": 0.7, "grad_norm": 3.1671346641349203, "learning_rate": 2.2277564540000736e-06, "loss": 0.6246, "step": 8575 }, { "epoch": 0.7, "grad_norm": 21.794237250179386, "learning_rate": 2.2266618883965597e-06, "loss": 0.6937, "step": 8576 }, { "epoch": 0.7, "grad_norm": 5.497314374601564, "learning_rate": 2.2255675147255036e-06, "loss": 0.727, "step": 8577 }, { "epoch": 0.7, "grad_norm": 6.967444537740059, "learning_rate": 2.2244733330626484e-06, "loss": 0.876, "step": 8578 }, { "epoch": 0.7, "grad_norm": 2.9057236235301986, "learning_rate": 2.2233793434837108e-06, "loss": 0.7735, "step": 8579 }, { "epoch": 0.7, "grad_norm": 3.607714048234826, "learning_rate": 2.222285546064408e-06, "loss": 0.6462, "step": 8580 }, { "epoch": 0.7, "grad_norm": 4.309307427657575, "learning_rate": 2.2211919408804357e-06, "loss": 0.7832, "step": 8581 }, { "epoch": 0.7, "grad_norm": 4.5708567457274505, "learning_rate": 2.220098528007475e-06, "loss": 0.6513, "step": 8582 }, { "epoch": 0.7, "grad_norm": 8.52777755423946, "learning_rate": 2.2190053075212024e-06, "loss": 0.6828, "step": 8583 }, { "epoch": 0.7, "grad_norm": 6.924271821075603, "learning_rate": 2.217912279497271e-06, "loss": 0.6942, "step": 8584 }, { "epoch": 0.7, "grad_norm": 4.572383361116938, "learning_rate": 2.216819444011331e-06, "loss": 0.496, "step": 8585 }, { "epoch": 0.7, "grad_norm": 4.022850263665472, "learning_rate": 2.2157268011390065e-06, "loss": 0.6326, "step": 8586 }, { "epoch": 0.7, "grad_norm": 11.49366626481796, "learning_rate": 2.2146343509559205e-06, "loss": 0.6046, "step": 8587 }, { "epoch": 0.7, "grad_norm": 5.265303261808111, "learning_rate": 2.213542093537675e-06, "loss": 0.6467, "step": 8588 }, { "epoch": 0.7, "grad_norm": 3.6295894233424613, "learning_rate": 2.21245002895986e-06, "loss": 0.5624, "step": 8589 }, { "epoch": 0.7, "grad_norm": 8.489376458113789, "learning_rate": 2.2113581572980568e-06, "loss": 0.6682, "step": 8590 }, { "epoch": 0.7, "grad_norm": 3.3694218358218158, "learning_rate": 2.2102664786278276e-06, "loss": 0.6715, "step": 8591 }, { "epoch": 0.7, "grad_norm": 7.747761745052959, "learning_rate": 2.2091749930247242e-06, "loss": 0.6099, "step": 8592 }, { "epoch": 0.7, "grad_norm": 5.337270094939879, "learning_rate": 2.2080837005642813e-06, "loss": 0.5992, "step": 8593 }, { "epoch": 0.7, "grad_norm": 4.063349748652745, "learning_rate": 2.206992601322028e-06, "loss": 0.676, "step": 8594 }, { "epoch": 0.7, "grad_norm": 4.038287272502322, "learning_rate": 2.2059016953734723e-06, "loss": 0.7289, "step": 8595 }, { "epoch": 0.7, "grad_norm": 8.565532030679988, "learning_rate": 2.204810982794111e-06, "loss": 0.6195, "step": 8596 }, { "epoch": 0.7, "grad_norm": 3.5109210707237546, "learning_rate": 2.2037204636594316e-06, "loss": 0.7609, "step": 8597 }, { "epoch": 0.7, "grad_norm": 5.167555257794369, "learning_rate": 2.2026301380449026e-06, "loss": 0.6475, "step": 8598 }, { "epoch": 0.7, "grad_norm": 4.215673434487035, "learning_rate": 2.2015400060259824e-06, "loss": 0.5562, "step": 8599 }, { "epoch": 0.7, "grad_norm": 3.492322207632711, "learning_rate": 2.200450067678112e-06, "loss": 0.6747, "step": 8600 }, { "epoch": 0.7, "grad_norm": 7.538421366908801, "learning_rate": 2.199360323076726e-06, "loss": 0.5867, "step": 8601 }, { "epoch": 0.7, "grad_norm": 3.9624585278136246, "learning_rate": 2.1982707722972383e-06, "loss": 0.5787, "step": 8602 }, { "epoch": 0.7, "grad_norm": 3.8517341600905386, "learning_rate": 2.1971814154150562e-06, "loss": 0.6847, "step": 8603 }, { "epoch": 0.7, "grad_norm": 12.074191000544618, "learning_rate": 2.1960922525055684e-06, "loss": 0.7161, "step": 8604 }, { "epoch": 0.7, "grad_norm": 3.4149049782222063, "learning_rate": 2.195003283644151e-06, "loss": 0.632, "step": 8605 }, { "epoch": 0.7, "grad_norm": 3.9831308149650426, "learning_rate": 2.1939145089061685e-06, "loss": 0.6732, "step": 8606 }, { "epoch": 0.7, "grad_norm": 4.9795316517203645, "learning_rate": 2.1928259283669686e-06, "loss": 0.6318, "step": 8607 }, { "epoch": 0.7, "grad_norm": 4.914866971486496, "learning_rate": 2.1917375421018914e-06, "loss": 0.5883, "step": 8608 }, { "epoch": 0.7, "grad_norm": 4.219046929349652, "learning_rate": 2.1906493501862574e-06, "loss": 0.7631, "step": 8609 }, { "epoch": 0.7, "grad_norm": 3.589514452798457, "learning_rate": 2.189561352695379e-06, "loss": 0.6832, "step": 8610 }, { "epoch": 0.7, "grad_norm": 3.6479538532774494, "learning_rate": 2.188473549704551e-06, "loss": 0.5522, "step": 8611 }, { "epoch": 0.7, "grad_norm": 2.9681243550939893, "learning_rate": 2.1873859412890565e-06, "loss": 0.5881, "step": 8612 }, { "epoch": 0.7, "grad_norm": 6.140650664220086, "learning_rate": 2.186298527524164e-06, "loss": 0.695, "step": 8613 }, { "epoch": 0.7, "grad_norm": 3.973758915821884, "learning_rate": 2.1852113084851286e-06, "loss": 0.721, "step": 8614 }, { "epoch": 0.7, "grad_norm": 3.94096062650955, "learning_rate": 2.1841242842471955e-06, "loss": 0.7336, "step": 8615 }, { "epoch": 0.7, "grad_norm": 7.352036398785971, "learning_rate": 2.1830374548855905e-06, "loss": 0.719, "step": 8616 }, { "epoch": 0.7, "grad_norm": 3.5623424192841093, "learning_rate": 2.181950820475532e-06, "loss": 0.5465, "step": 8617 }, { "epoch": 0.7, "grad_norm": 2.663431350659676, "learning_rate": 2.1808643810922207e-06, "loss": 0.8053, "step": 8618 }, { "epoch": 0.7, "grad_norm": 3.5390657890432964, "learning_rate": 2.1797781368108458e-06, "loss": 0.7685, "step": 8619 }, { "epoch": 0.7, "grad_norm": 3.2513384100568024, "learning_rate": 2.178692087706581e-06, "loss": 0.8052, "step": 8620 }, { "epoch": 0.7, "grad_norm": 5.376188954480933, "learning_rate": 2.177606233854586e-06, "loss": 0.783, "step": 8621 }, { "epoch": 0.7, "grad_norm": 3.9474292900630097, "learning_rate": 2.176520575330013e-06, "loss": 0.5182, "step": 8622 }, { "epoch": 0.7, "grad_norm": 4.129261860413936, "learning_rate": 2.1754351122079926e-06, "loss": 0.6717, "step": 8623 }, { "epoch": 0.7, "grad_norm": 2.6080343585247787, "learning_rate": 2.1743498445636492e-06, "loss": 0.6319, "step": 8624 }, { "epoch": 0.7, "grad_norm": 4.1331609567671945, "learning_rate": 2.173264772472088e-06, "loss": 0.6355, "step": 8625 }, { "epoch": 0.7, "grad_norm": 7.064389889346811, "learning_rate": 2.172179896008403e-06, "loss": 0.6934, "step": 8626 }, { "epoch": 0.7, "grad_norm": 9.705151724425937, "learning_rate": 2.1710952152476732e-06, "loss": 0.5991, "step": 8627 }, { "epoch": 0.7, "grad_norm": 2.6314630520282916, "learning_rate": 2.1700107302649686e-06, "loss": 0.5289, "step": 8628 }, { "epoch": 0.7, "grad_norm": 5.677628131094843, "learning_rate": 2.16892644113534e-06, "loss": 0.4892, "step": 8629 }, { "epoch": 0.7, "grad_norm": 5.918374935852546, "learning_rate": 2.167842347933826e-06, "loss": 0.6989, "step": 8630 }, { "epoch": 0.7, "grad_norm": 3.8988428176289256, "learning_rate": 2.1667584507354584e-06, "loss": 0.5952, "step": 8631 }, { "epoch": 0.7, "grad_norm": 5.740871241603859, "learning_rate": 2.165674749615242e-06, "loss": 0.6833, "step": 8632 }, { "epoch": 0.7, "grad_norm": 4.14773882994138, "learning_rate": 2.1645912446481805e-06, "loss": 0.7358, "step": 8633 }, { "epoch": 0.7, "grad_norm": 4.255777688225042, "learning_rate": 2.1635079359092566e-06, "loss": 0.8461, "step": 8634 }, { "epoch": 0.7, "grad_norm": 6.3807009075069825, "learning_rate": 2.162424823473445e-06, "loss": 0.813, "step": 8635 }, { "epoch": 0.7, "grad_norm": 6.941771014334811, "learning_rate": 2.1613419074157026e-06, "loss": 0.7084, "step": 8636 }, { "epoch": 0.7, "grad_norm": 16.63107834524546, "learning_rate": 2.1602591878109724e-06, "loss": 0.6655, "step": 8637 }, { "epoch": 0.7, "grad_norm": 5.501016707335661, "learning_rate": 2.1591766647341904e-06, "loss": 0.5609, "step": 8638 }, { "epoch": 0.7, "grad_norm": 4.267035702882348, "learning_rate": 2.158094338260267e-06, "loss": 0.6609, "step": 8639 }, { "epoch": 0.7, "grad_norm": 3.5787856039940538, "learning_rate": 2.157012208464111e-06, "loss": 0.8027, "step": 8640 }, { "epoch": 0.7, "grad_norm": 4.975055960926863, "learning_rate": 2.1559302754206092e-06, "loss": 0.7628, "step": 8641 }, { "epoch": 0.7, "grad_norm": 5.782341608527495, "learning_rate": 2.154848539204642e-06, "loss": 0.7448, "step": 8642 }, { "epoch": 0.7, "grad_norm": 4.432465579265484, "learning_rate": 2.153766999891071e-06, "loss": 0.8149, "step": 8643 }, { "epoch": 0.7, "grad_norm": 4.611631649127151, "learning_rate": 2.1526856575547444e-06, "loss": 0.7374, "step": 8644 }, { "epoch": 0.7, "grad_norm": 4.688766355622498, "learning_rate": 2.151604512270499e-06, "loss": 0.7187, "step": 8645 }, { "epoch": 0.7, "grad_norm": 7.678656616077013, "learning_rate": 2.1505235641131538e-06, "loss": 0.5581, "step": 8646 }, { "epoch": 0.7, "grad_norm": 3.782042452398749, "learning_rate": 2.1494428131575218e-06, "loss": 0.717, "step": 8647 }, { "epoch": 0.7, "grad_norm": 5.055834975843569, "learning_rate": 2.1483622594783937e-06, "loss": 0.6277, "step": 8648 }, { "epoch": 0.7, "grad_norm": 3.8526455857836583, "learning_rate": 2.147281903150555e-06, "loss": 0.7904, "step": 8649 }, { "epoch": 0.7, "grad_norm": 12.190045974194316, "learning_rate": 2.1462017442487688e-06, "loss": 0.7114, "step": 8650 }, { "epoch": 0.7, "grad_norm": 5.79155826084793, "learning_rate": 2.1451217828477945e-06, "loss": 0.6396, "step": 8651 }, { "epoch": 0.7, "grad_norm": 3.6803239673866, "learning_rate": 2.144042019022365e-06, "loss": 0.7278, "step": 8652 }, { "epoch": 0.7, "grad_norm": 2.700124218420424, "learning_rate": 2.142962452847212e-06, "loss": 0.6705, "step": 8653 }, { "epoch": 0.7, "grad_norm": 3.4313381194175867, "learning_rate": 2.141883084397047e-06, "loss": 0.5537, "step": 8654 }, { "epoch": 0.7, "grad_norm": 3.7096628004087755, "learning_rate": 2.1408039137465664e-06, "loss": 0.6182, "step": 8655 }, { "epoch": 0.7, "grad_norm": 5.338593365876373, "learning_rate": 2.1397249409704603e-06, "loss": 0.7251, "step": 8656 }, { "epoch": 0.7, "grad_norm": 2.836407930869923, "learning_rate": 2.138646166143396e-06, "loss": 0.6073, "step": 8657 }, { "epoch": 0.7, "grad_norm": 3.5080304047599493, "learning_rate": 2.1375675893400373e-06, "loss": 0.5967, "step": 8658 }, { "epoch": 0.7, "grad_norm": 3.020425501738531, "learning_rate": 2.136489210635021e-06, "loss": 0.6938, "step": 8659 }, { "epoch": 0.7, "grad_norm": 2.991417668725965, "learning_rate": 2.1354110301029834e-06, "loss": 0.6449, "step": 8660 }, { "epoch": 0.7, "grad_norm": 5.169612186110909, "learning_rate": 2.1343330478185398e-06, "loss": 0.508, "step": 8661 }, { "epoch": 0.7, "grad_norm": 5.940615779555634, "learning_rate": 2.13325526385629e-06, "loss": 0.7991, "step": 8662 }, { "epoch": 0.7, "grad_norm": 6.65234367608044, "learning_rate": 2.13217767829083e-06, "loss": 0.5911, "step": 8663 }, { "epoch": 0.7, "grad_norm": 3.212048018037774, "learning_rate": 2.131100291196731e-06, "loss": 0.6948, "step": 8664 }, { "epoch": 0.7, "grad_norm": 4.820565321202793, "learning_rate": 2.130023102648556e-06, "loss": 0.7052, "step": 8665 }, { "epoch": 0.7, "grad_norm": 2.357035210674939, "learning_rate": 2.128946112720851e-06, "loss": 0.6269, "step": 8666 }, { "epoch": 0.7, "grad_norm": 3.2865651071642765, "learning_rate": 2.1278693214881552e-06, "loss": 0.7986, "step": 8667 }, { "epoch": 0.7, "grad_norm": 10.197509087785358, "learning_rate": 2.126792729024986e-06, "loss": 0.6764, "step": 8668 }, { "epoch": 0.7, "grad_norm": 3.2599414332477554, "learning_rate": 2.1257163354058502e-06, "loss": 0.7318, "step": 8669 }, { "epoch": 0.7, "grad_norm": 2.7568378724860976, "learning_rate": 2.1246401407052437e-06, "loss": 0.5436, "step": 8670 }, { "epoch": 0.7, "grad_norm": 6.1365012097545755, "learning_rate": 2.1235641449976437e-06, "loss": 0.6607, "step": 8671 }, { "epoch": 0.7, "grad_norm": 6.382133686428175, "learning_rate": 2.1224883483575166e-06, "loss": 0.8555, "step": 8672 }, { "epoch": 0.7, "grad_norm": 3.198341717226936, "learning_rate": 2.1214127508593124e-06, "loss": 0.5251, "step": 8673 }, { "epoch": 0.7, "grad_norm": 2.9528514812501467, "learning_rate": 2.120337352577472e-06, "loss": 0.6841, "step": 8674 }, { "epoch": 0.7, "grad_norm": 3.811048466123399, "learning_rate": 2.119262153586418e-06, "loss": 0.7342, "step": 8675 }, { "epoch": 0.7, "grad_norm": 4.047916400272804, "learning_rate": 2.118187153960563e-06, "loss": 0.6661, "step": 8676 }, { "epoch": 0.7, "grad_norm": 5.788036242962474, "learning_rate": 2.1171123537743023e-06, "loss": 0.5965, "step": 8677 }, { "epoch": 0.7, "grad_norm": 2.8984934838449035, "learning_rate": 2.1160377531020185e-06, "loss": 0.6307, "step": 8678 }, { "epoch": 0.7, "grad_norm": 3.2751604517817663, "learning_rate": 2.1149633520180813e-06, "loss": 0.5921, "step": 8679 }, { "epoch": 0.7, "grad_norm": 5.734705131610686, "learning_rate": 2.1138891505968444e-06, "loss": 0.6421, "step": 8680 }, { "epoch": 0.71, "grad_norm": 3.523416707367139, "learning_rate": 2.112815148912652e-06, "loss": 0.6321, "step": 8681 }, { "epoch": 0.71, "grad_norm": 9.018799935431145, "learning_rate": 2.111741347039829e-06, "loss": 0.6609, "step": 8682 }, { "epoch": 0.71, "grad_norm": 3.0199819550339067, "learning_rate": 2.110667745052693e-06, "loss": 0.5086, "step": 8683 }, { "epoch": 0.71, "grad_norm": 6.709487236395212, "learning_rate": 2.109594343025541e-06, "loss": 0.591, "step": 8684 }, { "epoch": 0.71, "grad_norm": 3.3334942662345957, "learning_rate": 2.1085211410326605e-06, "loss": 0.7547, "step": 8685 }, { "epoch": 0.71, "grad_norm": 2.9600302950983965, "learning_rate": 2.1074481391483233e-06, "loss": 0.6807, "step": 8686 }, { "epoch": 0.71, "grad_norm": 4.127885370736339, "learning_rate": 2.1063753374467854e-06, "loss": 0.6041, "step": 8687 }, { "epoch": 0.71, "grad_norm": 3.4425164921147737, "learning_rate": 2.1053027360022965e-06, "loss": 0.8104, "step": 8688 }, { "epoch": 0.71, "grad_norm": 3.9323000269657804, "learning_rate": 2.1042303348890825e-06, "loss": 0.653, "step": 8689 }, { "epoch": 0.71, "grad_norm": 2.569468756731211, "learning_rate": 2.1031581341813646e-06, "loss": 0.6515, "step": 8690 }, { "epoch": 0.71, "grad_norm": 9.363600749813932, "learning_rate": 2.1020861339533438e-06, "loss": 0.6311, "step": 8691 }, { "epoch": 0.71, "grad_norm": 4.137352027724873, "learning_rate": 2.1010143342792096e-06, "loss": 0.6009, "step": 8692 }, { "epoch": 0.71, "grad_norm": 7.151476899836574, "learning_rate": 2.099942735233136e-06, "loss": 0.7406, "step": 8693 }, { "epoch": 0.71, "grad_norm": 5.151903497882219, "learning_rate": 2.0988713368892848e-06, "loss": 0.7441, "step": 8694 }, { "epoch": 0.71, "grad_norm": 8.598946775632346, "learning_rate": 2.0978001393218054e-06, "loss": 0.54, "step": 8695 }, { "epoch": 0.71, "grad_norm": 4.578597796144323, "learning_rate": 2.0967291426048288e-06, "loss": 0.6434, "step": 8696 }, { "epoch": 0.71, "grad_norm": 8.592868960907758, "learning_rate": 2.0956583468124787e-06, "loss": 0.6214, "step": 8697 }, { "epoch": 0.71, "grad_norm": 14.500333153251693, "learning_rate": 2.0945877520188552e-06, "loss": 0.6546, "step": 8698 }, { "epoch": 0.71, "grad_norm": 6.461235125731297, "learning_rate": 2.093517358298055e-06, "loss": 0.6259, "step": 8699 }, { "epoch": 0.71, "grad_norm": 3.2599787728836094, "learning_rate": 2.0924471657241526e-06, "loss": 0.6931, "step": 8700 }, { "epoch": 0.71, "grad_norm": 4.762429091968062, "learning_rate": 2.091377174371215e-06, "loss": 0.5189, "step": 8701 }, { "epoch": 0.71, "grad_norm": 6.4486011061203925, "learning_rate": 2.090307384313292e-06, "loss": 0.7062, "step": 8702 }, { "epoch": 0.71, "grad_norm": 6.487457445290819, "learning_rate": 2.089237795624417e-06, "loss": 0.5634, "step": 8703 }, { "epoch": 0.71, "grad_norm": 2.5894098966713157, "learning_rate": 2.0881684083786173e-06, "loss": 0.5507, "step": 8704 }, { "epoch": 0.71, "grad_norm": 7.5365149028032885, "learning_rate": 2.0870992226498947e-06, "loss": 0.747, "step": 8705 }, { "epoch": 0.71, "grad_norm": 2.9580458546394914, "learning_rate": 2.0860302385122493e-06, "loss": 0.7489, "step": 8706 }, { "epoch": 0.71, "grad_norm": 2.929235240916999, "learning_rate": 2.084961456039657e-06, "loss": 0.5252, "step": 8707 }, { "epoch": 0.71, "grad_norm": 4.994150618570016, "learning_rate": 2.0838928753060887e-06, "loss": 0.5555, "step": 8708 }, { "epoch": 0.71, "grad_norm": 28.759874795858693, "learning_rate": 2.082824496385494e-06, "loss": 0.5985, "step": 8709 }, { "epoch": 0.71, "grad_norm": 4.379750912784285, "learning_rate": 2.0817563193518115e-06, "loss": 0.7479, "step": 8710 }, { "epoch": 0.71, "grad_norm": 15.810220790301898, "learning_rate": 2.0806883442789694e-06, "loss": 0.695, "step": 8711 }, { "epoch": 0.71, "grad_norm": 3.2393711778677816, "learning_rate": 2.0796205712408718e-06, "loss": 0.6269, "step": 8712 }, { "epoch": 0.71, "grad_norm": 2.8221422208054125, "learning_rate": 2.0785530003114206e-06, "loss": 0.7105, "step": 8713 }, { "epoch": 0.71, "grad_norm": 4.147768406746869, "learning_rate": 2.0774856315644955e-06, "loss": 0.7037, "step": 8714 }, { "epoch": 0.71, "grad_norm": 4.075343137593721, "learning_rate": 2.0764184650739677e-06, "loss": 0.6125, "step": 8715 }, { "epoch": 0.71, "grad_norm": 3.809925517865381, "learning_rate": 2.0753515009136905e-06, "loss": 0.7524, "step": 8716 }, { "epoch": 0.71, "grad_norm": 2.5202275142128707, "learning_rate": 2.074284739157505e-06, "loss": 0.7352, "step": 8717 }, { "epoch": 0.71, "grad_norm": 4.356608614327902, "learning_rate": 2.0732181798792366e-06, "loss": 0.603, "step": 8718 }, { "epoch": 0.71, "grad_norm": 3.1177854093008612, "learning_rate": 2.0721518231526977e-06, "loss": 0.7067, "step": 8719 }, { "epoch": 0.71, "grad_norm": 4.264112895053691, "learning_rate": 2.0710856690516893e-06, "loss": 0.5412, "step": 8720 }, { "epoch": 0.71, "grad_norm": 4.52024655669802, "learning_rate": 2.0700197176499927e-06, "loss": 0.6411, "step": 8721 }, { "epoch": 0.71, "grad_norm": 4.6499806411241495, "learning_rate": 2.0689539690213823e-06, "loss": 0.7169, "step": 8722 }, { "epoch": 0.71, "grad_norm": 5.413234635787713, "learning_rate": 2.0678884232396106e-06, "loss": 0.707, "step": 8723 }, { "epoch": 0.71, "grad_norm": 3.5517990822081735, "learning_rate": 2.066823080378426e-06, "loss": 0.7047, "step": 8724 }, { "epoch": 0.71, "grad_norm": 4.956214019018222, "learning_rate": 2.065757940511549e-06, "loss": 0.5625, "step": 8725 }, { "epoch": 0.71, "grad_norm": 4.4319365568827545, "learning_rate": 2.0646930037127003e-06, "loss": 0.7116, "step": 8726 }, { "epoch": 0.71, "grad_norm": 3.333851219894353, "learning_rate": 2.0636282700555775e-06, "loss": 0.6362, "step": 8727 }, { "epoch": 0.71, "grad_norm": 5.577300691087735, "learning_rate": 2.0625637396138666e-06, "loss": 0.6129, "step": 8728 }, { "epoch": 0.71, "grad_norm": 13.152794198042924, "learning_rate": 2.0614994124612413e-06, "loss": 0.6509, "step": 8729 }, { "epoch": 0.71, "grad_norm": 5.161030402290788, "learning_rate": 2.0604352886713574e-06, "loss": 0.7231, "step": 8730 }, { "epoch": 0.71, "grad_norm": 3.1113872836751413, "learning_rate": 2.059371368317864e-06, "loss": 0.6442, "step": 8731 }, { "epoch": 0.71, "grad_norm": 3.6569494167890872, "learning_rate": 2.0583076514743844e-06, "loss": 0.7688, "step": 8732 }, { "epoch": 0.71, "grad_norm": 2.7580933591632664, "learning_rate": 2.0572441382145397e-06, "loss": 0.6358, "step": 8733 }, { "epoch": 0.71, "grad_norm": 2.8104458085992507, "learning_rate": 2.0561808286119294e-06, "loss": 0.6583, "step": 8734 }, { "epoch": 0.71, "grad_norm": 4.366274906959839, "learning_rate": 2.0551177227401397e-06, "loss": 0.6322, "step": 8735 }, { "epoch": 0.71, "grad_norm": 4.083506917762391, "learning_rate": 2.054054820672748e-06, "loss": 0.6904, "step": 8736 }, { "epoch": 0.71, "grad_norm": 7.218458812277885, "learning_rate": 2.052992122483312e-06, "loss": 0.5426, "step": 8737 }, { "epoch": 0.71, "grad_norm": 2.8177185258560553, "learning_rate": 2.051929628245377e-06, "loss": 0.6697, "step": 8738 }, { "epoch": 0.71, "grad_norm": 3.3944690494797713, "learning_rate": 2.0508673380324723e-06, "loss": 0.5708, "step": 8739 }, { "epoch": 0.71, "grad_norm": 3.0659302369352384, "learning_rate": 2.0498052519181193e-06, "loss": 0.7051, "step": 8740 }, { "epoch": 0.71, "grad_norm": 3.649979616514557, "learning_rate": 2.0487433699758184e-06, "loss": 0.7239, "step": 8741 }, { "epoch": 0.71, "grad_norm": 4.11572919664713, "learning_rate": 2.0476816922790575e-06, "loss": 0.6405, "step": 8742 }, { "epoch": 0.71, "grad_norm": 3.415681130598828, "learning_rate": 2.0466202189013145e-06, "loss": 0.6504, "step": 8743 }, { "epoch": 0.71, "grad_norm": 7.959075286556643, "learning_rate": 2.0455589499160484e-06, "loss": 0.6164, "step": 8744 }, { "epoch": 0.71, "grad_norm": 2.614939214660841, "learning_rate": 2.0444978853967057e-06, "loss": 0.6671, "step": 8745 }, { "epoch": 0.71, "grad_norm": 3.519863362194282, "learning_rate": 2.0434370254167166e-06, "loss": 0.6475, "step": 8746 }, { "epoch": 0.71, "grad_norm": 2.4098669032329227, "learning_rate": 2.0423763700495037e-06, "loss": 0.4122, "step": 8747 }, { "epoch": 0.71, "grad_norm": 3.360617735215812, "learning_rate": 2.041315919368466e-06, "loss": 0.796, "step": 8748 }, { "epoch": 0.71, "grad_norm": 3.135359601540009, "learning_rate": 2.040255673446999e-06, "loss": 0.621, "step": 8749 }, { "epoch": 0.71, "grad_norm": 2.9458632091083037, "learning_rate": 2.039195632358475e-06, "loss": 0.7715, "step": 8750 }, { "epoch": 0.71, "grad_norm": 3.31503705409151, "learning_rate": 2.038135796176256e-06, "loss": 0.7427, "step": 8751 }, { "epoch": 0.71, "grad_norm": 5.172875999063851, "learning_rate": 2.0370761649736892e-06, "loss": 0.6782, "step": 8752 }, { "epoch": 0.71, "grad_norm": 3.0013392021312173, "learning_rate": 2.0360167388241063e-06, "loss": 0.7386, "step": 8753 }, { "epoch": 0.71, "grad_norm": 2.9064158610937847, "learning_rate": 2.0349575178008298e-06, "loss": 0.8823, "step": 8754 }, { "epoch": 0.71, "grad_norm": 5.832432484605943, "learning_rate": 2.0338985019771606e-06, "loss": 0.6298, "step": 8755 }, { "epoch": 0.71, "grad_norm": 9.547488560837657, "learning_rate": 2.0328396914263925e-06, "loss": 0.7966, "step": 8756 }, { "epoch": 0.71, "grad_norm": 7.685151162193827, "learning_rate": 2.031781086221801e-06, "loss": 0.6159, "step": 8757 }, { "epoch": 0.71, "grad_norm": 3.9834943806801983, "learning_rate": 2.0307226864366483e-06, "loss": 0.6155, "step": 8758 }, { "epoch": 0.71, "grad_norm": 2.958339369303112, "learning_rate": 2.029664492144181e-06, "loss": 0.5695, "step": 8759 }, { "epoch": 0.71, "grad_norm": 2.355163702464426, "learning_rate": 2.028606503417632e-06, "loss": 0.5086, "step": 8760 }, { "epoch": 0.71, "grad_norm": 11.997588756694606, "learning_rate": 2.027548720330224e-06, "loss": 0.6777, "step": 8761 }, { "epoch": 0.71, "grad_norm": 2.966795201439225, "learning_rate": 2.026491142955159e-06, "loss": 0.5589, "step": 8762 }, { "epoch": 0.71, "grad_norm": 2.351763077065818, "learning_rate": 2.025433771365632e-06, "loss": 0.5699, "step": 8763 }, { "epoch": 0.71, "grad_norm": 4.301127495955502, "learning_rate": 2.0243766056348167e-06, "loss": 0.7164, "step": 8764 }, { "epoch": 0.71, "grad_norm": 2.775612509667861, "learning_rate": 2.0233196458358773e-06, "loss": 0.7505, "step": 8765 }, { "epoch": 0.71, "grad_norm": 2.902540412913684, "learning_rate": 2.02226289204196e-06, "loss": 0.5775, "step": 8766 }, { "epoch": 0.71, "grad_norm": 4.592198878891836, "learning_rate": 2.021206344326199e-06, "loss": 0.5671, "step": 8767 }, { "epoch": 0.71, "grad_norm": 2.9187244078518146, "learning_rate": 2.0201500027617167e-06, "loss": 0.7872, "step": 8768 }, { "epoch": 0.71, "grad_norm": 3.130563729588463, "learning_rate": 2.0190938674216146e-06, "loss": 0.5342, "step": 8769 }, { "epoch": 0.71, "grad_norm": 3.482246278832916, "learning_rate": 2.0180379383789907e-06, "loss": 0.5205, "step": 8770 }, { "epoch": 0.71, "grad_norm": 4.352517286409447, "learning_rate": 2.016982215706913e-06, "loss": 0.7255, "step": 8771 }, { "epoch": 0.71, "grad_norm": 3.6667276761978616, "learning_rate": 2.0159266994784504e-06, "loss": 0.6406, "step": 8772 }, { "epoch": 0.71, "grad_norm": 8.038854512780896, "learning_rate": 2.0148713897666485e-06, "loss": 0.5379, "step": 8773 }, { "epoch": 0.71, "grad_norm": 2.8582175226159774, "learning_rate": 2.013816286644543e-06, "loss": 0.7832, "step": 8774 }, { "epoch": 0.71, "grad_norm": 3.1584807172865745, "learning_rate": 2.0127613901851537e-06, "loss": 0.8039, "step": 8775 }, { "epoch": 0.71, "grad_norm": 4.71440524692816, "learning_rate": 2.0117067004614838e-06, "loss": 0.6369, "step": 8776 }, { "epoch": 0.71, "grad_norm": 3.382746967430863, "learning_rate": 2.0106522175465292e-06, "loss": 0.671, "step": 8777 }, { "epoch": 0.71, "grad_norm": 3.1467605862658012, "learning_rate": 2.0095979415132603e-06, "loss": 0.6567, "step": 8778 }, { "epoch": 0.71, "grad_norm": 3.505394019386553, "learning_rate": 2.0085438724346446e-06, "loss": 0.7686, "step": 8779 }, { "epoch": 0.71, "grad_norm": 3.236953113885481, "learning_rate": 2.007490010383627e-06, "loss": 0.5824, "step": 8780 }, { "epoch": 0.71, "grad_norm": 5.154553815704456, "learning_rate": 2.006436355433145e-06, "loss": 0.6867, "step": 8781 }, { "epoch": 0.71, "grad_norm": 5.033077935833127, "learning_rate": 2.0053829076561158e-06, "loss": 0.7493, "step": 8782 }, { "epoch": 0.71, "grad_norm": 13.297693329221463, "learning_rate": 2.004329667125444e-06, "loss": 0.6757, "step": 8783 }, { "epoch": 0.71, "grad_norm": 9.07637789094105, "learning_rate": 2.0032766339140246e-06, "loss": 0.5927, "step": 8784 }, { "epoch": 0.71, "grad_norm": 3.227209380649744, "learning_rate": 2.0022238080947275e-06, "loss": 0.6673, "step": 8785 }, { "epoch": 0.71, "grad_norm": 3.832185161291698, "learning_rate": 2.0011711897404207e-06, "loss": 0.6972, "step": 8786 }, { "epoch": 0.71, "grad_norm": 2.8703507096471452, "learning_rate": 2.000118778923947e-06, "loss": 0.5868, "step": 8787 }, { "epoch": 0.71, "grad_norm": 7.193109091535448, "learning_rate": 1.9990665757181455e-06, "loss": 0.6396, "step": 8788 }, { "epoch": 0.71, "grad_norm": 2.756965544552875, "learning_rate": 1.9980145801958316e-06, "loss": 0.7477, "step": 8789 }, { "epoch": 0.71, "grad_norm": 4.567258164627998, "learning_rate": 1.9969627924298114e-06, "loss": 0.7473, "step": 8790 }, { "epoch": 0.71, "grad_norm": 16.96634245215981, "learning_rate": 1.9959112124928743e-06, "loss": 0.8128, "step": 8791 }, { "epoch": 0.71, "grad_norm": 4.326706004376088, "learning_rate": 1.9948598404577944e-06, "loss": 0.6632, "step": 8792 }, { "epoch": 0.71, "grad_norm": 2.531978564691185, "learning_rate": 1.993808676397338e-06, "loss": 0.7591, "step": 8793 }, { "epoch": 0.71, "grad_norm": 7.527630960837817, "learning_rate": 1.992757720384248e-06, "loss": 0.6943, "step": 8794 }, { "epoch": 0.71, "grad_norm": 6.343451868325262, "learning_rate": 1.9917069724912603e-06, "loss": 0.6974, "step": 8795 }, { "epoch": 0.71, "grad_norm": 2.471764167291829, "learning_rate": 1.990656432791092e-06, "loss": 0.6072, "step": 8796 }, { "epoch": 0.71, "grad_norm": 8.582828636829632, "learning_rate": 1.9896061013564467e-06, "loss": 0.5871, "step": 8797 }, { "epoch": 0.71, "grad_norm": 2.4587343788493907, "learning_rate": 1.988555978260013e-06, "loss": 0.5145, "step": 8798 }, { "epoch": 0.71, "grad_norm": 2.9815703130217637, "learning_rate": 1.987506063574468e-06, "loss": 0.6576, "step": 8799 }, { "epoch": 0.71, "grad_norm": 3.2030165514381377, "learning_rate": 1.9864563573724725e-06, "loss": 0.6835, "step": 8800 }, { "epoch": 0.71, "grad_norm": 7.861961822558024, "learning_rate": 1.98540685972667e-06, "loss": 0.6036, "step": 8801 }, { "epoch": 0.71, "grad_norm": 3.20920395854436, "learning_rate": 1.9843575707096955e-06, "loss": 0.7583, "step": 8802 }, { "epoch": 0.71, "grad_norm": 11.64728827343324, "learning_rate": 1.9833084903941657e-06, "loss": 0.624, "step": 8803 }, { "epoch": 0.72, "grad_norm": 2.9001418924202986, "learning_rate": 1.9822596188526834e-06, "loss": 0.6414, "step": 8804 }, { "epoch": 0.72, "grad_norm": 12.843775200413022, "learning_rate": 1.981210956157834e-06, "loss": 0.7241, "step": 8805 }, { "epoch": 0.72, "grad_norm": 4.864499375595119, "learning_rate": 1.9801625023821968e-06, "loss": 0.6794, "step": 8806 }, { "epoch": 0.72, "grad_norm": 3.8144753717202486, "learning_rate": 1.9791142575983286e-06, "loss": 0.6899, "step": 8807 }, { "epoch": 0.72, "grad_norm": 2.98050843228975, "learning_rate": 1.9780662218787733e-06, "loss": 0.6075, "step": 8808 }, { "epoch": 0.72, "grad_norm": 5.571411515945465, "learning_rate": 1.977018395296064e-06, "loss": 0.5819, "step": 8809 }, { "epoch": 0.72, "grad_norm": 6.47989052207137, "learning_rate": 1.975970777922717e-06, "loss": 0.6682, "step": 8810 }, { "epoch": 0.72, "grad_norm": 2.6616961134977277, "learning_rate": 1.9749233698312327e-06, "loss": 0.6291, "step": 8811 }, { "epoch": 0.72, "grad_norm": 8.259310514297201, "learning_rate": 1.973876171094097e-06, "loss": 0.7177, "step": 8812 }, { "epoch": 0.72, "grad_norm": 7.500422836903766, "learning_rate": 1.9728291817837857e-06, "loss": 0.6698, "step": 8813 }, { "epoch": 0.72, "grad_norm": 5.085636074316168, "learning_rate": 1.9717824019727567e-06, "loss": 0.6926, "step": 8814 }, { "epoch": 0.72, "grad_norm": 3.6010372384968923, "learning_rate": 1.9707358317334497e-06, "loss": 0.7064, "step": 8815 }, { "epoch": 0.72, "grad_norm": 6.570934873563309, "learning_rate": 1.9696894711382997e-06, "loss": 0.621, "step": 8816 }, { "epoch": 0.72, "grad_norm": 6.828100193374712, "learning_rate": 1.9686433202597178e-06, "loss": 0.6842, "step": 8817 }, { "epoch": 0.72, "grad_norm": 4.159167754686179, "learning_rate": 1.9675973791701057e-06, "loss": 0.7297, "step": 8818 }, { "epoch": 0.72, "grad_norm": 4.39609840413412, "learning_rate": 1.966551647941847e-06, "loss": 0.7244, "step": 8819 }, { "epoch": 0.72, "grad_norm": 2.187336405449309, "learning_rate": 1.9655061266473158e-06, "loss": 0.6542, "step": 8820 }, { "epoch": 0.72, "grad_norm": 4.371349603679339, "learning_rate": 1.9644608153588674e-06, "loss": 0.7111, "step": 8821 }, { "epoch": 0.72, "grad_norm": 3.23061652058898, "learning_rate": 1.963415714148842e-06, "loss": 0.7292, "step": 8822 }, { "epoch": 0.72, "grad_norm": 8.340844833874831, "learning_rate": 1.962370823089571e-06, "loss": 0.6715, "step": 8823 }, { "epoch": 0.72, "grad_norm": 11.777039717234098, "learning_rate": 1.9613261422533657e-06, "loss": 0.6475, "step": 8824 }, { "epoch": 0.72, "grad_norm": 6.288982565294828, "learning_rate": 1.9602816717125243e-06, "loss": 0.619, "step": 8825 }, { "epoch": 0.72, "grad_norm": 3.5372321756169103, "learning_rate": 1.9592374115393293e-06, "loss": 0.6137, "step": 8826 }, { "epoch": 0.72, "grad_norm": 2.401546727963931, "learning_rate": 1.958193361806053e-06, "loss": 0.6387, "step": 8827 }, { "epoch": 0.72, "grad_norm": 4.502049295304319, "learning_rate": 1.9571495225849475e-06, "loss": 0.5887, "step": 8828 }, { "epoch": 0.72, "grad_norm": 3.0368141758375082, "learning_rate": 1.9561058939482562e-06, "loss": 0.6378, "step": 8829 }, { "epoch": 0.72, "grad_norm": 5.279880187545452, "learning_rate": 1.9550624759682028e-06, "loss": 0.6048, "step": 8830 }, { "epoch": 0.72, "grad_norm": 3.459920744456565, "learning_rate": 1.9540192687169984e-06, "loss": 0.6234, "step": 8831 }, { "epoch": 0.72, "grad_norm": 2.6907751830788924, "learning_rate": 1.95297627226684e-06, "loss": 0.7046, "step": 8832 }, { "epoch": 0.72, "grad_norm": 3.8818150432192287, "learning_rate": 1.951933486689907e-06, "loss": 0.5922, "step": 8833 }, { "epoch": 0.72, "grad_norm": 3.2705707612512147, "learning_rate": 1.9508909120583715e-06, "loss": 0.5511, "step": 8834 }, { "epoch": 0.72, "grad_norm": 4.139808327956517, "learning_rate": 1.9498485484443813e-06, "loss": 0.7152, "step": 8835 }, { "epoch": 0.72, "grad_norm": 6.001292193704591, "learning_rate": 1.948806395920079e-06, "loss": 0.6956, "step": 8836 }, { "epoch": 0.72, "grad_norm": 4.080190400459213, "learning_rate": 1.947764454557585e-06, "loss": 0.7407, "step": 8837 }, { "epoch": 0.72, "grad_norm": 3.615498639777865, "learning_rate": 1.9467227244290105e-06, "loss": 0.7361, "step": 8838 }, { "epoch": 0.72, "grad_norm": 5.0511357492068525, "learning_rate": 1.945681205606448e-06, "loss": 0.6729, "step": 8839 }, { "epoch": 0.72, "grad_norm": 3.533593971268565, "learning_rate": 1.9446398981619757e-06, "loss": 0.4941, "step": 8840 }, { "epoch": 0.72, "grad_norm": 4.733292801297826, "learning_rate": 1.9435988021676626e-06, "loss": 0.6545, "step": 8841 }, { "epoch": 0.72, "grad_norm": 9.594415058582966, "learning_rate": 1.942557917695555e-06, "loss": 0.5628, "step": 8842 }, { "epoch": 0.72, "grad_norm": 3.1979531907412144, "learning_rate": 1.9415172448176945e-06, "loss": 0.7919, "step": 8843 }, { "epoch": 0.72, "grad_norm": 2.98315139661177, "learning_rate": 1.940476783606095e-06, "loss": 0.6954, "step": 8844 }, { "epoch": 0.72, "grad_norm": 4.848243160187772, "learning_rate": 1.939436534132768e-06, "loss": 0.7195, "step": 8845 }, { "epoch": 0.72, "grad_norm": 4.1605302466262675, "learning_rate": 1.938396496469704e-06, "loss": 0.6755, "step": 8846 }, { "epoch": 0.72, "grad_norm": 14.056649281012927, "learning_rate": 1.937356670688878e-06, "loss": 0.6979, "step": 8847 }, { "epoch": 0.72, "grad_norm": 8.780443747167748, "learning_rate": 1.936317056862256e-06, "loss": 0.6964, "step": 8848 }, { "epoch": 0.72, "grad_norm": 3.3643636349117223, "learning_rate": 1.9352776550617824e-06, "loss": 0.6031, "step": 8849 }, { "epoch": 0.72, "grad_norm": 3.3227013309111584, "learning_rate": 1.934238465359396e-06, "loss": 0.5447, "step": 8850 }, { "epoch": 0.72, "grad_norm": 3.135112387352472, "learning_rate": 1.9331994878270077e-06, "loss": 0.6779, "step": 8851 }, { "epoch": 0.72, "grad_norm": 9.366334045097481, "learning_rate": 1.9321607225365267e-06, "loss": 0.7106, "step": 8852 }, { "epoch": 0.72, "grad_norm": 3.517658218963076, "learning_rate": 1.931122169559839e-06, "loss": 0.557, "step": 8853 }, { "epoch": 0.72, "grad_norm": 3.5169309975828154, "learning_rate": 1.9300838289688216e-06, "loss": 0.5963, "step": 8854 }, { "epoch": 0.72, "grad_norm": 6.773285088108598, "learning_rate": 1.9290457008353336e-06, "loss": 0.6356, "step": 8855 }, { "epoch": 0.72, "grad_norm": 7.5664047014484, "learning_rate": 1.9280077852312194e-06, "loss": 0.7356, "step": 8856 }, { "epoch": 0.72, "grad_norm": 3.908910187668835, "learning_rate": 1.926970082228309e-06, "loss": 0.5346, "step": 8857 }, { "epoch": 0.72, "grad_norm": 3.970747605647887, "learning_rate": 1.9259325918984167e-06, "loss": 0.6697, "step": 8858 }, { "epoch": 0.72, "grad_norm": 8.903257348224061, "learning_rate": 1.924895314313347e-06, "loss": 0.659, "step": 8859 }, { "epoch": 0.72, "grad_norm": 2.5131874768384823, "learning_rate": 1.9238582495448814e-06, "loss": 0.783, "step": 8860 }, { "epoch": 0.72, "grad_norm": 3.2968781216152383, "learning_rate": 1.9228213976647964e-06, "loss": 0.5727, "step": 8861 }, { "epoch": 0.72, "grad_norm": 4.186467857850592, "learning_rate": 1.9217847587448464e-06, "loss": 0.7955, "step": 8862 }, { "epoch": 0.72, "grad_norm": 4.175171325936079, "learning_rate": 1.9207483328567726e-06, "loss": 0.732, "step": 8863 }, { "epoch": 0.72, "grad_norm": 3.1490724095877782, "learning_rate": 1.919712120072303e-06, "loss": 0.6484, "step": 8864 }, { "epoch": 0.72, "grad_norm": 2.9071704239129392, "learning_rate": 1.9186761204631476e-06, "loss": 0.4801, "step": 8865 }, { "epoch": 0.72, "grad_norm": 2.3813989347091855, "learning_rate": 1.9176403341010087e-06, "loss": 0.6109, "step": 8866 }, { "epoch": 0.72, "grad_norm": 2.8342140976000767, "learning_rate": 1.9166047610575646e-06, "loss": 0.6841, "step": 8867 }, { "epoch": 0.72, "grad_norm": 1.918152166754916, "learning_rate": 1.915569401404488e-06, "loss": 0.77, "step": 8868 }, { "epoch": 0.72, "grad_norm": 3.768150286762269, "learning_rate": 1.9145342552134293e-06, "loss": 0.6882, "step": 8869 }, { "epoch": 0.72, "grad_norm": 3.649873713760616, "learning_rate": 1.9134993225560283e-06, "loss": 0.6548, "step": 8870 }, { "epoch": 0.72, "grad_norm": 4.863850684322349, "learning_rate": 1.912464603503908e-06, "loss": 0.7566, "step": 8871 }, { "epoch": 0.72, "grad_norm": 5.851115547488806, "learning_rate": 1.9114300981286763e-06, "loss": 0.5994, "step": 8872 }, { "epoch": 0.72, "grad_norm": 3.5132057714789506, "learning_rate": 1.9103958065019307e-06, "loss": 0.7338, "step": 8873 }, { "epoch": 0.72, "grad_norm": 4.751211097863998, "learning_rate": 1.9093617286952476e-06, "loss": 0.7096, "step": 8874 }, { "epoch": 0.72, "grad_norm": 4.485628580346519, "learning_rate": 1.908327864780195e-06, "loss": 0.4865, "step": 8875 }, { "epoch": 0.72, "grad_norm": 4.414869053437688, "learning_rate": 1.9072942148283202e-06, "loss": 0.5486, "step": 8876 }, { "epoch": 0.72, "grad_norm": 4.237645749272, "learning_rate": 1.9062607789111598e-06, "loss": 0.7081, "step": 8877 }, { "epoch": 0.72, "grad_norm": 5.988538903555468, "learning_rate": 1.905227557100231e-06, "loss": 0.6915, "step": 8878 }, { "epoch": 0.72, "grad_norm": 6.0746211516613435, "learning_rate": 1.904194549467044e-06, "loss": 0.805, "step": 8879 }, { "epoch": 0.72, "grad_norm": 4.6827267273987605, "learning_rate": 1.9031617560830861e-06, "loss": 0.7081, "step": 8880 }, { "epoch": 0.72, "grad_norm": 4.590668992869816, "learning_rate": 1.902129177019833e-06, "loss": 0.5751, "step": 8881 }, { "epoch": 0.72, "grad_norm": 3.1395077087252523, "learning_rate": 1.9010968123487478e-06, "loss": 0.7919, "step": 8882 }, { "epoch": 0.72, "grad_norm": 5.511577507063931, "learning_rate": 1.9000646621412762e-06, "loss": 0.5962, "step": 8883 }, { "epoch": 0.72, "grad_norm": 20.164675316732144, "learning_rate": 1.899032726468848e-06, "loss": 0.6879, "step": 8884 }, { "epoch": 0.72, "grad_norm": 4.46412725949072, "learning_rate": 1.8980010054028792e-06, "loss": 0.5249, "step": 8885 }, { "epoch": 0.72, "grad_norm": 7.207493164539404, "learning_rate": 1.8969694990147742e-06, "loss": 0.7571, "step": 8886 }, { "epoch": 0.72, "grad_norm": 4.2069825022497795, "learning_rate": 1.895938207375918e-06, "loss": 0.5319, "step": 8887 }, { "epoch": 0.72, "grad_norm": 3.8124152684909336, "learning_rate": 1.894907130557681e-06, "loss": 0.5642, "step": 8888 }, { "epoch": 0.72, "grad_norm": 2.685926505522822, "learning_rate": 1.8938762686314238e-06, "loss": 0.6805, "step": 8889 }, { "epoch": 0.72, "grad_norm": 3.8138392264987586, "learning_rate": 1.892845621668486e-06, "loss": 0.6468, "step": 8890 }, { "epoch": 0.72, "grad_norm": 3.931445920141013, "learning_rate": 1.891815189740196e-06, "loss": 0.5371, "step": 8891 }, { "epoch": 0.72, "grad_norm": 7.345806439582732, "learning_rate": 1.890784972917864e-06, "loss": 0.6336, "step": 8892 }, { "epoch": 0.72, "grad_norm": 3.428362852982109, "learning_rate": 1.8897549712727903e-06, "loss": 0.7927, "step": 8893 }, { "epoch": 0.72, "grad_norm": 3.3390805881812033, "learning_rate": 1.8887251848762567e-06, "loss": 0.5877, "step": 8894 }, { "epoch": 0.72, "grad_norm": 7.351594676809807, "learning_rate": 1.8876956137995284e-06, "loss": 0.5394, "step": 8895 }, { "epoch": 0.72, "grad_norm": 2.2130512581854758, "learning_rate": 1.8866662581138646e-06, "loss": 0.6815, "step": 8896 }, { "epoch": 0.72, "grad_norm": 5.258494473815784, "learning_rate": 1.8856371178904947e-06, "loss": 0.7325, "step": 8897 }, { "epoch": 0.72, "grad_norm": 3.9004540796423877, "learning_rate": 1.8846081932006476e-06, "loss": 0.6311, "step": 8898 }, { "epoch": 0.72, "grad_norm": 2.430892785125821, "learning_rate": 1.883579484115528e-06, "loss": 0.7303, "step": 8899 }, { "epoch": 0.72, "grad_norm": 11.19710787708259, "learning_rate": 1.8825509907063328e-06, "loss": 0.6601, "step": 8900 }, { "epoch": 0.72, "grad_norm": 5.002406894291113, "learning_rate": 1.881522713044236e-06, "loss": 0.614, "step": 8901 }, { "epoch": 0.72, "grad_norm": 3.863070105141923, "learning_rate": 1.8804946512004053e-06, "loss": 0.8421, "step": 8902 }, { "epoch": 0.72, "grad_norm": 8.00026687953639, "learning_rate": 1.8794668052459863e-06, "loss": 0.5922, "step": 8903 }, { "epoch": 0.72, "grad_norm": 6.733748844287857, "learning_rate": 1.878439175252113e-06, "loss": 0.6112, "step": 8904 }, { "epoch": 0.72, "grad_norm": 3.737647186540963, "learning_rate": 1.8774117612899034e-06, "loss": 0.6153, "step": 8905 }, { "epoch": 0.72, "grad_norm": 4.700916158126489, "learning_rate": 1.87638456343046e-06, "loss": 0.5457, "step": 8906 }, { "epoch": 0.72, "grad_norm": 3.2856501841996058, "learning_rate": 1.8753575817448745e-06, "loss": 0.6974, "step": 8907 }, { "epoch": 0.72, "grad_norm": 3.2838574532711218, "learning_rate": 1.8743308163042167e-06, "loss": 0.5804, "step": 8908 }, { "epoch": 0.72, "grad_norm": 5.105916117254069, "learning_rate": 1.873304267179551e-06, "loss": 0.6725, "step": 8909 }, { "epoch": 0.72, "grad_norm": 5.153860723511032, "learning_rate": 1.8722779344419139e-06, "loss": 0.7657, "step": 8910 }, { "epoch": 0.72, "grad_norm": 13.466183605469553, "learning_rate": 1.871251818162339e-06, "loss": 0.8554, "step": 8911 }, { "epoch": 0.72, "grad_norm": 4.629660635048524, "learning_rate": 1.8702259184118387e-06, "loss": 0.6891, "step": 8912 }, { "epoch": 0.72, "grad_norm": 2.746441486492676, "learning_rate": 1.8692002352614098e-06, "loss": 0.6385, "step": 8913 }, { "epoch": 0.72, "grad_norm": 4.018041203452597, "learning_rate": 1.868174768782039e-06, "loss": 0.6318, "step": 8914 }, { "epoch": 0.72, "grad_norm": 3.560944326930053, "learning_rate": 1.8671495190446925e-06, "loss": 0.6133, "step": 8915 }, { "epoch": 0.72, "grad_norm": 32.677818019842654, "learning_rate": 1.8661244861203288e-06, "loss": 0.7663, "step": 8916 }, { "epoch": 0.72, "grad_norm": 3.887371866724973, "learning_rate": 1.8650996700798797e-06, "loss": 0.5237, "step": 8917 }, { "epoch": 0.72, "grad_norm": 3.9806102594400885, "learning_rate": 1.864075070994274e-06, "loss": 0.6477, "step": 8918 }, { "epoch": 0.72, "grad_norm": 2.6242920161706467, "learning_rate": 1.863050688934419e-06, "loss": 0.6185, "step": 8919 }, { "epoch": 0.72, "grad_norm": 2.778203570254656, "learning_rate": 1.8620265239712066e-06, "loss": 0.7441, "step": 8920 }, { "epoch": 0.72, "grad_norm": 3.806447036424115, "learning_rate": 1.8610025761755184e-06, "loss": 0.6068, "step": 8921 }, { "epoch": 0.72, "grad_norm": 3.6615885983520875, "learning_rate": 1.859978845618215e-06, "loss": 0.5936, "step": 8922 }, { "epoch": 0.72, "grad_norm": 3.118603952204705, "learning_rate": 1.8589553323701503e-06, "loss": 0.6658, "step": 8923 }, { "epoch": 0.72, "grad_norm": 5.223911553332602, "learning_rate": 1.8579320365021508e-06, "loss": 0.5449, "step": 8924 }, { "epoch": 0.72, "grad_norm": 6.435947445253762, "learning_rate": 1.8569089580850403e-06, "loss": 0.6364, "step": 8925 }, { "epoch": 0.72, "grad_norm": 4.963754850974254, "learning_rate": 1.855886097189618e-06, "loss": 0.667, "step": 8926 }, { "epoch": 0.73, "grad_norm": 2.8400800060155227, "learning_rate": 1.8548634538866772e-06, "loss": 0.766, "step": 8927 }, { "epoch": 0.73, "grad_norm": 4.115290829574984, "learning_rate": 1.8538410282469888e-06, "loss": 0.5938, "step": 8928 }, { "epoch": 0.73, "grad_norm": 3.811712575342287, "learning_rate": 1.852818820341311e-06, "loss": 0.6294, "step": 8929 }, { "epoch": 0.73, "grad_norm": 2.2137187414278205, "learning_rate": 1.8517968302403872e-06, "loss": 0.5604, "step": 8930 }, { "epoch": 0.73, "grad_norm": 5.263109231578008, "learning_rate": 1.8507750580149436e-06, "loss": 0.6258, "step": 8931 }, { "epoch": 0.73, "grad_norm": 4.121221600036374, "learning_rate": 1.8497535037356967e-06, "loss": 0.7666, "step": 8932 }, { "epoch": 0.73, "grad_norm": 3.77503342284328, "learning_rate": 1.8487321674733412e-06, "loss": 0.6392, "step": 8933 }, { "epoch": 0.73, "grad_norm": 9.33547330763729, "learning_rate": 1.847711049298564e-06, "loss": 0.8678, "step": 8934 }, { "epoch": 0.73, "grad_norm": 4.334042439527358, "learning_rate": 1.84669014928203e-06, "loss": 0.6495, "step": 8935 }, { "epoch": 0.73, "grad_norm": 4.017867873332193, "learning_rate": 1.845669467494393e-06, "loss": 0.5016, "step": 8936 }, { "epoch": 0.73, "grad_norm": 5.247128543463194, "learning_rate": 1.8446490040062898e-06, "loss": 0.5412, "step": 8937 }, { "epoch": 0.73, "grad_norm": 3.496608306137211, "learning_rate": 1.8436287588883416e-06, "loss": 0.6213, "step": 8938 }, { "epoch": 0.73, "grad_norm": 3.3600632311866896, "learning_rate": 1.8426087322111597e-06, "loss": 0.6747, "step": 8939 }, { "epoch": 0.73, "grad_norm": 4.525386929664864, "learning_rate": 1.8415889240453316e-06, "loss": 0.6864, "step": 8940 }, { "epoch": 0.73, "grad_norm": 13.08742272246945, "learning_rate": 1.840569334461439e-06, "loss": 0.7082, "step": 8941 }, { "epoch": 0.73, "grad_norm": 4.54220366001043, "learning_rate": 1.8395499635300423e-06, "loss": 0.7895, "step": 8942 }, { "epoch": 0.73, "grad_norm": 3.130245164444048, "learning_rate": 1.8385308113216876e-06, "loss": 0.6243, "step": 8943 }, { "epoch": 0.73, "grad_norm": 2.8823914782576554, "learning_rate": 1.8375118779069067e-06, "loss": 0.7373, "step": 8944 }, { "epoch": 0.73, "grad_norm": 4.111831772144409, "learning_rate": 1.836493163356215e-06, "loss": 0.795, "step": 8945 }, { "epoch": 0.73, "grad_norm": 7.67936925723455, "learning_rate": 1.8354746677401174e-06, "loss": 0.5647, "step": 8946 }, { "epoch": 0.73, "grad_norm": 7.997706317521311, "learning_rate": 1.8344563911290964e-06, "loss": 0.7289, "step": 8947 }, { "epoch": 0.73, "grad_norm": 6.268636044870036, "learning_rate": 1.8334383335936269e-06, "loss": 0.7539, "step": 8948 }, { "epoch": 0.73, "grad_norm": 8.24434019604976, "learning_rate": 1.832420495204163e-06, "loss": 0.6067, "step": 8949 }, { "epoch": 0.73, "grad_norm": 3.6026947510567098, "learning_rate": 1.8314028760311458e-06, "loss": 0.7028, "step": 8950 }, { "epoch": 0.73, "grad_norm": 3.9820277836451767, "learning_rate": 1.8303854761449984e-06, "loss": 0.5997, "step": 8951 }, { "epoch": 0.73, "grad_norm": 3.3255929843193117, "learning_rate": 1.8293682956161357e-06, "loss": 0.6649, "step": 8952 }, { "epoch": 0.73, "grad_norm": 3.8811180403990075, "learning_rate": 1.8283513345149507e-06, "loss": 0.6645, "step": 8953 }, { "epoch": 0.73, "grad_norm": 4.259735814374588, "learning_rate": 1.8273345929118225e-06, "loss": 0.8205, "step": 8954 }, { "epoch": 0.73, "grad_norm": 3.8203005597936253, "learning_rate": 1.8263180708771184e-06, "loss": 0.6583, "step": 8955 }, { "epoch": 0.73, "grad_norm": 2.6201288642527136, "learning_rate": 1.825301768481187e-06, "loss": 0.7451, "step": 8956 }, { "epoch": 0.73, "grad_norm": 3.4385288483430356, "learning_rate": 1.824285685794363e-06, "loss": 0.8086, "step": 8957 }, { "epoch": 0.73, "grad_norm": 4.486546745669859, "learning_rate": 1.8232698228869633e-06, "loss": 0.6086, "step": 8958 }, { "epoch": 0.73, "grad_norm": 11.169820215423293, "learning_rate": 1.8222541798292965e-06, "loss": 0.7319, "step": 8959 }, { "epoch": 0.73, "grad_norm": 5.678950261741719, "learning_rate": 1.821238756691649e-06, "loss": 0.7987, "step": 8960 }, { "epoch": 0.73, "grad_norm": 4.783690915444024, "learning_rate": 1.820223553544293e-06, "loss": 0.6954, "step": 8961 }, { "epoch": 0.73, "grad_norm": 3.3347573949184564, "learning_rate": 1.8192085704574902e-06, "loss": 0.5746, "step": 8962 }, { "epoch": 0.73, "grad_norm": 4.555647604637633, "learning_rate": 1.8181938075014821e-06, "loss": 0.8035, "step": 8963 }, { "epoch": 0.73, "grad_norm": 4.528980627802501, "learning_rate": 1.817179264746497e-06, "loss": 0.8058, "step": 8964 }, { "epoch": 0.73, "grad_norm": 5.776142902647297, "learning_rate": 1.8161649422627458e-06, "loss": 0.6615, "step": 8965 }, { "epoch": 0.73, "grad_norm": 3.279929496658013, "learning_rate": 1.8151508401204298e-06, "loss": 0.6595, "step": 8966 }, { "epoch": 0.73, "grad_norm": 4.359658071853933, "learning_rate": 1.8141369583897283e-06, "loss": 0.4988, "step": 8967 }, { "epoch": 0.73, "grad_norm": 2.88296926229359, "learning_rate": 1.813123297140808e-06, "loss": 0.7011, "step": 8968 }, { "epoch": 0.73, "grad_norm": 5.47099188690207, "learning_rate": 1.8121098564438249e-06, "loss": 0.4852, "step": 8969 }, { "epoch": 0.73, "grad_norm": 7.7618395462869, "learning_rate": 1.8110966363689093e-06, "loss": 0.6238, "step": 8970 }, { "epoch": 0.73, "grad_norm": 7.841487017123696, "learning_rate": 1.8100836369861869e-06, "loss": 0.6894, "step": 8971 }, { "epoch": 0.73, "grad_norm": 9.846068274846333, "learning_rate": 1.8090708583657606e-06, "loss": 0.7645, "step": 8972 }, { "epoch": 0.73, "grad_norm": 3.4077122700992106, "learning_rate": 1.8080583005777241e-06, "loss": 0.6646, "step": 8973 }, { "epoch": 0.73, "grad_norm": 2.363809129038152, "learning_rate": 1.8070459636921517e-06, "loss": 0.6734, "step": 8974 }, { "epoch": 0.73, "grad_norm": 7.689613686336973, "learning_rate": 1.8060338477791011e-06, "loss": 0.6706, "step": 8975 }, { "epoch": 0.73, "grad_norm": 5.503918679902205, "learning_rate": 1.805021952908621e-06, "loss": 0.5106, "step": 8976 }, { "epoch": 0.73, "grad_norm": 6.228365668188462, "learning_rate": 1.8040102791507385e-06, "loss": 0.7783, "step": 8977 }, { "epoch": 0.73, "grad_norm": 3.740134013002249, "learning_rate": 1.8029988265754688e-06, "loss": 0.6774, "step": 8978 }, { "epoch": 0.73, "grad_norm": 2.972612345685512, "learning_rate": 1.8019875952528087e-06, "loss": 0.7327, "step": 8979 }, { "epoch": 0.73, "grad_norm": 4.830794121883064, "learning_rate": 1.800976585252745e-06, "loss": 0.4537, "step": 8980 }, { "epoch": 0.73, "grad_norm": 16.78888868962632, "learning_rate": 1.799965796645242e-06, "loss": 0.6034, "step": 8981 }, { "epoch": 0.73, "grad_norm": 3.6369935798976267, "learning_rate": 1.7989552295002593e-06, "loss": 0.6731, "step": 8982 }, { "epoch": 0.73, "grad_norm": 3.644768700621319, "learning_rate": 1.7979448838877262e-06, "loss": 0.5378, "step": 8983 }, { "epoch": 0.73, "grad_norm": 2.6747102535784393, "learning_rate": 1.7969347598775705e-06, "loss": 0.6811, "step": 8984 }, { "epoch": 0.73, "grad_norm": 8.431429027706097, "learning_rate": 1.7959248575396982e-06, "loss": 0.6829, "step": 8985 }, { "epoch": 0.73, "grad_norm": 4.431836853961968, "learning_rate": 1.7949151769439983e-06, "loss": 0.6992, "step": 8986 }, { "epoch": 0.73, "grad_norm": 10.761560258546373, "learning_rate": 1.7939057181603504e-06, "loss": 0.7632, "step": 8987 }, { "epoch": 0.73, "grad_norm": 4.628241603904614, "learning_rate": 1.7928964812586126e-06, "loss": 0.8677, "step": 8988 }, { "epoch": 0.73, "grad_norm": 2.8317954292852643, "learning_rate": 1.7918874663086355e-06, "loss": 0.6575, "step": 8989 }, { "epoch": 0.73, "grad_norm": 66.91201835222382, "learning_rate": 1.7908786733802419e-06, "loss": 0.8439, "step": 8990 }, { "epoch": 0.73, "grad_norm": 5.504491479135456, "learning_rate": 1.789870102543252e-06, "loss": 0.6043, "step": 8991 }, { "epoch": 0.73, "grad_norm": 10.810557621017534, "learning_rate": 1.788861753867464e-06, "loss": 0.5702, "step": 8992 }, { "epoch": 0.73, "grad_norm": 3.0093973864062384, "learning_rate": 1.7878536274226598e-06, "loss": 0.5665, "step": 8993 }, { "epoch": 0.73, "grad_norm": 5.417772893934148, "learning_rate": 1.7868457232786117e-06, "loss": 0.6604, "step": 8994 }, { "epoch": 0.73, "grad_norm": 5.059833991515718, "learning_rate": 1.7858380415050696e-06, "loss": 0.7275, "step": 8995 }, { "epoch": 0.73, "grad_norm": 4.971516046770104, "learning_rate": 1.7848305821717766e-06, "loss": 0.7033, "step": 8996 }, { "epoch": 0.73, "grad_norm": 7.384158316000213, "learning_rate": 1.7838233453484476e-06, "loss": 0.6184, "step": 8997 }, { "epoch": 0.73, "grad_norm": 23.44364335545117, "learning_rate": 1.7828163311047963e-06, "loss": 0.7064, "step": 8998 }, { "epoch": 0.73, "grad_norm": 10.883352435369922, "learning_rate": 1.7818095395105116e-06, "loss": 0.5967, "step": 8999 }, { "epoch": 0.73, "grad_norm": 4.29345362283638, "learning_rate": 1.780802970635268e-06, "loss": 0.7215, "step": 9000 }, { "epoch": 0.73, "grad_norm": 6.696126834789653, "learning_rate": 1.7797966245487314e-06, "loss": 0.6664, "step": 9001 }, { "epoch": 0.73, "grad_norm": 3.6294596186497943, "learning_rate": 1.7787905013205437e-06, "loss": 0.6883, "step": 9002 }, { "epoch": 0.73, "grad_norm": 3.5979563728844113, "learning_rate": 1.7777846010203359e-06, "loss": 0.6018, "step": 9003 }, { "epoch": 0.73, "grad_norm": 3.459464342918987, "learning_rate": 1.7767789237177208e-06, "loss": 0.5564, "step": 9004 }, { "epoch": 0.73, "grad_norm": 4.719924776994022, "learning_rate": 1.7757734694823004e-06, "loss": 0.735, "step": 9005 }, { "epoch": 0.73, "grad_norm": 4.720136840744433, "learning_rate": 1.7747682383836563e-06, "loss": 0.7211, "step": 9006 }, { "epoch": 0.73, "grad_norm": 7.264085624524268, "learning_rate": 1.7737632304913592e-06, "loss": 0.7236, "step": 9007 }, { "epoch": 0.73, "grad_norm": 3.4478113986501246, "learning_rate": 1.7727584458749608e-06, "loss": 0.6115, "step": 9008 }, { "epoch": 0.73, "grad_norm": 9.773460893580113, "learning_rate": 1.7717538846039984e-06, "loss": 0.6398, "step": 9009 }, { "epoch": 0.73, "grad_norm": 3.1112526191049374, "learning_rate": 1.7707495467479934e-06, "loss": 0.7122, "step": 9010 }, { "epoch": 0.73, "grad_norm": 6.582481412180845, "learning_rate": 1.7697454323764518e-06, "loss": 0.6642, "step": 9011 }, { "epoch": 0.73, "grad_norm": 4.412118142441393, "learning_rate": 1.7687415415588672e-06, "loss": 0.6743, "step": 9012 }, { "epoch": 0.73, "grad_norm": 12.78464136476746, "learning_rate": 1.7677378743647116e-06, "loss": 0.6451, "step": 9013 }, { "epoch": 0.73, "grad_norm": 7.1328121900297266, "learning_rate": 1.7667344308634488e-06, "loss": 0.8853, "step": 9014 }, { "epoch": 0.73, "grad_norm": 4.349985568047294, "learning_rate": 1.7657312111245218e-06, "loss": 0.6934, "step": 9015 }, { "epoch": 0.73, "grad_norm": 4.221360289547216, "learning_rate": 1.7647282152173594e-06, "loss": 0.6002, "step": 9016 }, { "epoch": 0.73, "grad_norm": 3.604380148727912, "learning_rate": 1.763725443211376e-06, "loss": 0.4612, "step": 9017 }, { "epoch": 0.73, "grad_norm": 4.58516107331622, "learning_rate": 1.7627228951759673e-06, "loss": 0.6356, "step": 9018 }, { "epoch": 0.73, "grad_norm": 4.9419297235067114, "learning_rate": 1.7617205711805196e-06, "loss": 0.7621, "step": 9019 }, { "epoch": 0.73, "grad_norm": 4.854320444043853, "learning_rate": 1.7607184712943964e-06, "loss": 0.6562, "step": 9020 }, { "epoch": 0.73, "grad_norm": 4.222895018022861, "learning_rate": 1.7597165955869528e-06, "loss": 0.7388, "step": 9021 }, { "epoch": 0.73, "grad_norm": 5.515256703478686, "learning_rate": 1.7587149441275236e-06, "loss": 0.683, "step": 9022 }, { "epoch": 0.73, "grad_norm": 2.9721383949852735, "learning_rate": 1.7577135169854286e-06, "loss": 0.6173, "step": 9023 }, { "epoch": 0.73, "grad_norm": 4.287168177606552, "learning_rate": 1.7567123142299718e-06, "loss": 0.7624, "step": 9024 }, { "epoch": 0.73, "grad_norm": 4.179905343394945, "learning_rate": 1.7557113359304461e-06, "loss": 0.6658, "step": 9025 }, { "epoch": 0.73, "grad_norm": 3.5768020742337057, "learning_rate": 1.7547105821561238e-06, "loss": 0.7563, "step": 9026 }, { "epoch": 0.73, "grad_norm": 2.8418063680582013, "learning_rate": 1.7537100529762619e-06, "loss": 0.6087, "step": 9027 }, { "epoch": 0.73, "grad_norm": 4.63907027952988, "learning_rate": 1.7527097484601057e-06, "loss": 0.7878, "step": 9028 }, { "epoch": 0.73, "grad_norm": 3.773055042408477, "learning_rate": 1.751709668676882e-06, "loss": 0.6239, "step": 9029 }, { "epoch": 0.73, "grad_norm": 4.300900392702149, "learning_rate": 1.7507098136958017e-06, "loss": 0.7563, "step": 9030 }, { "epoch": 0.73, "grad_norm": 5.995543001320013, "learning_rate": 1.7497101835860603e-06, "loss": 0.6281, "step": 9031 }, { "epoch": 0.73, "grad_norm": 7.037518494021243, "learning_rate": 1.748710778416841e-06, "loss": 0.7558, "step": 9032 }, { "epoch": 0.73, "grad_norm": 6.1753280524523095, "learning_rate": 1.7477115982573078e-06, "loss": 0.7854, "step": 9033 }, { "epoch": 0.73, "grad_norm": 2.7143625032684087, "learning_rate": 1.7467126431766084e-06, "loss": 0.7144, "step": 9034 }, { "epoch": 0.73, "grad_norm": 5.201435086480116, "learning_rate": 1.7457139132438816e-06, "loss": 0.7627, "step": 9035 }, { "epoch": 0.73, "grad_norm": 4.2234905298335335, "learning_rate": 1.7447154085282398e-06, "loss": 0.5716, "step": 9036 }, { "epoch": 0.73, "grad_norm": 4.411111418167788, "learning_rate": 1.7437171290987898e-06, "loss": 0.8133, "step": 9037 }, { "epoch": 0.73, "grad_norm": 2.8236004070386422, "learning_rate": 1.7427190750246164e-06, "loss": 0.684, "step": 9038 }, { "epoch": 0.73, "grad_norm": 7.281846612935211, "learning_rate": 1.7417212463747945e-06, "loss": 0.5949, "step": 9039 }, { "epoch": 0.73, "grad_norm": 2.95905044721102, "learning_rate": 1.7407236432183778e-06, "loss": 0.7019, "step": 9040 }, { "epoch": 0.73, "grad_norm": 3.6531974406127885, "learning_rate": 1.7397262656244057e-06, "loss": 0.698, "step": 9041 }, { "epoch": 0.73, "grad_norm": 3.2760151276700524, "learning_rate": 1.7387291136619071e-06, "loss": 0.7477, "step": 9042 }, { "epoch": 0.73, "grad_norm": 11.437757978207243, "learning_rate": 1.7377321873998858e-06, "loss": 0.7306, "step": 9043 }, { "epoch": 0.73, "grad_norm": 4.47655015391583, "learning_rate": 1.7367354869073394e-06, "loss": 0.7008, "step": 9044 }, { "epoch": 0.73, "grad_norm": 2.8784992560830913, "learning_rate": 1.735739012253243e-06, "loss": 0.5629, "step": 9045 }, { "epoch": 0.73, "grad_norm": 6.088462584267213, "learning_rate": 1.7347427635065622e-06, "loss": 0.6368, "step": 9046 }, { "epoch": 0.73, "grad_norm": 4.498229947807873, "learning_rate": 1.7337467407362418e-06, "loss": 0.6819, "step": 9047 }, { "epoch": 0.73, "grad_norm": 5.098128921328518, "learning_rate": 1.7327509440112112e-06, "loss": 0.6594, "step": 9048 }, { "epoch": 0.73, "grad_norm": 3.126851388231115, "learning_rate": 1.7317553734003894e-06, "loss": 0.6863, "step": 9049 }, { "epoch": 0.74, "grad_norm": 5.052787635697249, "learning_rate": 1.7307600289726745e-06, "loss": 0.5893, "step": 9050 }, { "epoch": 0.74, "grad_norm": 4.777614082166385, "learning_rate": 1.72976491079695e-06, "loss": 0.7043, "step": 9051 }, { "epoch": 0.74, "grad_norm": 4.139907213065849, "learning_rate": 1.7287700189420831e-06, "loss": 0.6776, "step": 9052 }, { "epoch": 0.74, "grad_norm": 4.512297868201302, "learning_rate": 1.7277753534769304e-06, "loss": 0.7316, "step": 9053 }, { "epoch": 0.74, "grad_norm": 6.001483969036824, "learning_rate": 1.7267809144703251e-06, "loss": 0.6071, "step": 9054 }, { "epoch": 0.74, "grad_norm": 6.349328884197159, "learning_rate": 1.7257867019910933e-06, "loss": 0.6583, "step": 9055 }, { "epoch": 0.74, "grad_norm": 3.5299168070237603, "learning_rate": 1.7247927161080346e-06, "loss": 0.5047, "step": 9056 }, { "epoch": 0.74, "grad_norm": 4.227965674527148, "learning_rate": 1.7237989568899444e-06, "loss": 0.641, "step": 9057 }, { "epoch": 0.74, "grad_norm": 3.8061821226187074, "learning_rate": 1.7228054244055952e-06, "loss": 0.583, "step": 9058 }, { "epoch": 0.74, "grad_norm": 3.3558384297001114, "learning_rate": 1.7218121187237436e-06, "loss": 0.6177, "step": 9059 }, { "epoch": 0.74, "grad_norm": 8.038592903515427, "learning_rate": 1.7208190399131359e-06, "loss": 0.5733, "step": 9060 }, { "epoch": 0.74, "grad_norm": 2.7442964032355848, "learning_rate": 1.7198261880424967e-06, "loss": 0.629, "step": 9061 }, { "epoch": 0.74, "grad_norm": 2.854857229577612, "learning_rate": 1.7188335631805426e-06, "loss": 0.5712, "step": 9062 }, { "epoch": 0.74, "grad_norm": 4.02428763612648, "learning_rate": 1.717841165395962e-06, "loss": 0.7264, "step": 9063 }, { "epoch": 0.74, "grad_norm": 6.718385428002132, "learning_rate": 1.7168489947574407e-06, "loss": 0.7513, "step": 9064 }, { "epoch": 0.74, "grad_norm": 4.440736186316236, "learning_rate": 1.715857051333642e-06, "loss": 0.7002, "step": 9065 }, { "epoch": 0.74, "grad_norm": 3.6065433015276014, "learning_rate": 1.7148653351932116e-06, "loss": 0.7542, "step": 9066 }, { "epoch": 0.74, "grad_norm": 3.3725003325413216, "learning_rate": 1.713873846404787e-06, "loss": 0.6819, "step": 9067 }, { "epoch": 0.74, "grad_norm": 12.63661843072848, "learning_rate": 1.7128825850369819e-06, "loss": 0.5876, "step": 9068 }, { "epoch": 0.74, "grad_norm": 5.193358105257273, "learning_rate": 1.7118915511584022e-06, "loss": 0.4654, "step": 9069 }, { "epoch": 0.74, "grad_norm": 20.656710694762054, "learning_rate": 1.7109007448376274e-06, "loss": 0.6683, "step": 9070 }, { "epoch": 0.74, "grad_norm": 3.709800434236687, "learning_rate": 1.7099101661432326e-06, "loss": 0.5542, "step": 9071 }, { "epoch": 0.74, "grad_norm": 11.913372999313664, "learning_rate": 1.7089198151437708e-06, "loss": 0.608, "step": 9072 }, { "epoch": 0.74, "grad_norm": 4.870034372793257, "learning_rate": 1.7079296919077781e-06, "loss": 0.5864, "step": 9073 }, { "epoch": 0.74, "grad_norm": 4.264145769317549, "learning_rate": 1.7069397965037816e-06, "loss": 0.6341, "step": 9074 }, { "epoch": 0.74, "grad_norm": 4.389151236961843, "learning_rate": 1.7059501290002855e-06, "loss": 0.7465, "step": 9075 }, { "epoch": 0.74, "grad_norm": 4.998991433883439, "learning_rate": 1.7049606894657817e-06, "loss": 0.6225, "step": 9076 }, { "epoch": 0.74, "grad_norm": 3.268987331458217, "learning_rate": 1.7039714779687438e-06, "loss": 0.6913, "step": 9077 }, { "epoch": 0.74, "grad_norm": 7.400244461200818, "learning_rate": 1.7029824945776346e-06, "loss": 0.6046, "step": 9078 }, { "epoch": 0.74, "grad_norm": 11.676297328591891, "learning_rate": 1.701993739360895e-06, "loss": 0.7142, "step": 9079 }, { "epoch": 0.74, "grad_norm": 4.156450850923716, "learning_rate": 1.7010052123869564e-06, "loss": 0.6187, "step": 9080 }, { "epoch": 0.74, "grad_norm": 3.9160448808124215, "learning_rate": 1.700016913724229e-06, "loss": 0.8204, "step": 9081 }, { "epoch": 0.74, "grad_norm": 4.4581595964277945, "learning_rate": 1.6990288434411094e-06, "loss": 0.8151, "step": 9082 }, { "epoch": 0.74, "grad_norm": 6.108067224138785, "learning_rate": 1.6980410016059789e-06, "loss": 0.7362, "step": 9083 }, { "epoch": 0.74, "grad_norm": 7.249607278894565, "learning_rate": 1.6970533882872004e-06, "loss": 0.6182, "step": 9084 }, { "epoch": 0.74, "grad_norm": 4.903282138217733, "learning_rate": 1.6960660035531256e-06, "loss": 0.6303, "step": 9085 }, { "epoch": 0.74, "grad_norm": 3.7111491747140537, "learning_rate": 1.6950788474720852e-06, "loss": 0.698, "step": 9086 }, { "epoch": 0.74, "grad_norm": 5.685673776479529, "learning_rate": 1.6940919201124001e-06, "loss": 0.5428, "step": 9087 }, { "epoch": 0.74, "grad_norm": 5.537884616404571, "learning_rate": 1.6931052215423693e-06, "loss": 0.6795, "step": 9088 }, { "epoch": 0.74, "grad_norm": 4.4066587046520835, "learning_rate": 1.6921187518302795e-06, "loss": 0.6103, "step": 9089 }, { "epoch": 0.74, "grad_norm": 5.522610604200022, "learning_rate": 1.6911325110444005e-06, "loss": 0.6918, "step": 9090 }, { "epoch": 0.74, "grad_norm": 4.694734407310773, "learning_rate": 1.6901464992529837e-06, "loss": 0.5909, "step": 9091 }, { "epoch": 0.74, "grad_norm": 3.302509582619649, "learning_rate": 1.6891607165242718e-06, "loss": 0.7304, "step": 9092 }, { "epoch": 0.74, "grad_norm": 5.768673489039729, "learning_rate": 1.688175162926483e-06, "loss": 0.7259, "step": 9093 }, { "epoch": 0.74, "grad_norm": 2.8808571421889364, "learning_rate": 1.6871898385278278e-06, "loss": 0.6389, "step": 9094 }, { "epoch": 0.74, "grad_norm": 12.20263962194019, "learning_rate": 1.686204743396495e-06, "loss": 0.5056, "step": 9095 }, { "epoch": 0.74, "grad_norm": 4.170177527202357, "learning_rate": 1.6852198776006596e-06, "loss": 0.6973, "step": 9096 }, { "epoch": 0.74, "grad_norm": 4.790392057339692, "learning_rate": 1.68423524120848e-06, "loss": 0.6513, "step": 9097 }, { "epoch": 0.74, "grad_norm": 8.725997075970518, "learning_rate": 1.6832508342880981e-06, "loss": 0.6928, "step": 9098 }, { "epoch": 0.74, "grad_norm": 4.153388006904061, "learning_rate": 1.6822666569076434e-06, "loss": 0.6925, "step": 9099 }, { "epoch": 0.74, "grad_norm": 3.978065682747705, "learning_rate": 1.6812827091352252e-06, "loss": 0.6699, "step": 9100 }, { "epoch": 0.74, "grad_norm": 5.3207723377084, "learning_rate": 1.6802989910389416e-06, "loss": 0.6841, "step": 9101 }, { "epoch": 0.74, "grad_norm": 3.4069671825968824, "learning_rate": 1.67931550268687e-06, "loss": 0.6806, "step": 9102 }, { "epoch": 0.74, "grad_norm": 3.015712646777389, "learning_rate": 1.6783322441470745e-06, "loss": 0.672, "step": 9103 }, { "epoch": 0.74, "grad_norm": 7.9885619403915475, "learning_rate": 1.6773492154876008e-06, "loss": 0.67, "step": 9104 }, { "epoch": 0.74, "grad_norm": 14.551906968357082, "learning_rate": 1.6763664167764847e-06, "loss": 0.7617, "step": 9105 }, { "epoch": 0.74, "grad_norm": 6.667806995767433, "learning_rate": 1.6753838480817397e-06, "loss": 0.7167, "step": 9106 }, { "epoch": 0.74, "grad_norm": 5.008091582743248, "learning_rate": 1.674401509471364e-06, "loss": 0.6347, "step": 9107 }, { "epoch": 0.74, "grad_norm": 3.547632990834146, "learning_rate": 1.673419401013347e-06, "loss": 0.5644, "step": 9108 }, { "epoch": 0.74, "grad_norm": 4.339606931924342, "learning_rate": 1.6724375227756501e-06, "loss": 0.5805, "step": 9109 }, { "epoch": 0.74, "grad_norm": 4.131366870584318, "learning_rate": 1.6714558748262298e-06, "loss": 0.6864, "step": 9110 }, { "epoch": 0.74, "grad_norm": 3.986157075215698, "learning_rate": 1.6704744572330206e-06, "loss": 0.6885, "step": 9111 }, { "epoch": 0.74, "grad_norm": 6.749418945188202, "learning_rate": 1.6694932700639444e-06, "loss": 0.6904, "step": 9112 }, { "epoch": 0.74, "grad_norm": 7.0648936777508355, "learning_rate": 1.6685123133869046e-06, "loss": 0.7836, "step": 9113 }, { "epoch": 0.74, "grad_norm": 3.515946587550001, "learning_rate": 1.6675315872697879e-06, "loss": 0.6884, "step": 9114 }, { "epoch": 0.74, "grad_norm": 10.604344868341466, "learning_rate": 1.6665510917804712e-06, "loss": 0.6752, "step": 9115 }, { "epoch": 0.74, "grad_norm": 3.615890680680915, "learning_rate": 1.6655708269868055e-06, "loss": 0.7058, "step": 9116 }, { "epoch": 0.74, "grad_norm": 5.14001618855776, "learning_rate": 1.6645907929566345e-06, "loss": 0.729, "step": 9117 }, { "epoch": 0.74, "grad_norm": 10.46455098034888, "learning_rate": 1.6636109897577813e-06, "loss": 0.6776, "step": 9118 }, { "epoch": 0.74, "grad_norm": 4.569020223502847, "learning_rate": 1.6626314174580565e-06, "loss": 0.5976, "step": 9119 }, { "epoch": 0.74, "grad_norm": 5.676520332948421, "learning_rate": 1.661652076125252e-06, "loss": 0.7312, "step": 9120 }, { "epoch": 0.74, "grad_norm": 5.755037101965318, "learning_rate": 1.6606729658271413e-06, "loss": 0.7252, "step": 9121 }, { "epoch": 0.74, "grad_norm": 3.4341892797644964, "learning_rate": 1.6596940866314915e-06, "loss": 0.6965, "step": 9122 }, { "epoch": 0.74, "grad_norm": 5.475785196018253, "learning_rate": 1.65871543860604e-06, "loss": 0.6194, "step": 9123 }, { "epoch": 0.74, "grad_norm": 4.912743834214711, "learning_rate": 1.6577370218185197e-06, "loss": 0.6062, "step": 9124 }, { "epoch": 0.74, "grad_norm": 7.577586886506765, "learning_rate": 1.656758836336641e-06, "loss": 0.7639, "step": 9125 }, { "epoch": 0.74, "grad_norm": 2.883934903324397, "learning_rate": 1.655780882228103e-06, "loss": 0.5834, "step": 9126 }, { "epoch": 0.74, "grad_norm": 9.15424994558803, "learning_rate": 1.6548031595605829e-06, "loss": 0.7172, "step": 9127 }, { "epoch": 0.74, "grad_norm": 3.256171763045889, "learning_rate": 1.6538256684017512e-06, "loss": 0.6774, "step": 9128 }, { "epoch": 0.74, "grad_norm": 2.9494340733529523, "learning_rate": 1.6528484088192487e-06, "loss": 0.5602, "step": 9129 }, { "epoch": 0.74, "grad_norm": 3.810263468724041, "learning_rate": 1.6518713808807135e-06, "loss": 0.7268, "step": 9130 }, { "epoch": 0.74, "grad_norm": 3.3056735247106093, "learning_rate": 1.6508945846537606e-06, "loss": 0.9031, "step": 9131 }, { "epoch": 0.74, "grad_norm": 3.635794463167354, "learning_rate": 1.6499180202059883e-06, "loss": 0.6024, "step": 9132 }, { "epoch": 0.74, "grad_norm": 5.051135977375669, "learning_rate": 1.648941687604984e-06, "loss": 0.7451, "step": 9133 }, { "epoch": 0.74, "grad_norm": 4.32487712961461, "learning_rate": 1.6479655869183142e-06, "loss": 0.8083, "step": 9134 }, { "epoch": 0.74, "grad_norm": 4.509850515877558, "learning_rate": 1.6469897182135347e-06, "loss": 0.5401, "step": 9135 }, { "epoch": 0.74, "grad_norm": 3.0609954209440535, "learning_rate": 1.6460140815581754e-06, "loss": 0.7148, "step": 9136 }, { "epoch": 0.74, "grad_norm": 2.890736995360674, "learning_rate": 1.6450386770197625e-06, "loss": 0.6671, "step": 9137 }, { "epoch": 0.74, "grad_norm": 4.40560969211236, "learning_rate": 1.6440635046657971e-06, "loss": 0.6227, "step": 9138 }, { "epoch": 0.74, "grad_norm": 6.215995185891323, "learning_rate": 1.6430885645637667e-06, "loss": 0.748, "step": 9139 }, { "epoch": 0.74, "grad_norm": 3.339770772935581, "learning_rate": 1.6421138567811456e-06, "loss": 0.6585, "step": 9140 }, { "epoch": 0.74, "grad_norm": 5.417085367573536, "learning_rate": 1.6411393813853893e-06, "loss": 0.6055, "step": 9141 }, { "epoch": 0.74, "grad_norm": 4.202176153914713, "learning_rate": 1.6401651384439365e-06, "loss": 0.6097, "step": 9142 }, { "epoch": 0.74, "grad_norm": 4.367131862255048, "learning_rate": 1.63919112802421e-06, "loss": 0.7113, "step": 9143 }, { "epoch": 0.74, "grad_norm": 11.779382813060646, "learning_rate": 1.6382173501936206e-06, "loss": 0.724, "step": 9144 }, { "epoch": 0.74, "grad_norm": 2.6634392830037816, "learning_rate": 1.6372438050195577e-06, "loss": 0.6127, "step": 9145 }, { "epoch": 0.74, "grad_norm": 3.907296089291729, "learning_rate": 1.6362704925693957e-06, "loss": 0.703, "step": 9146 }, { "epoch": 0.74, "grad_norm": 6.189456493705393, "learning_rate": 1.6352974129104964e-06, "loss": 0.5996, "step": 9147 }, { "epoch": 0.74, "grad_norm": 4.498296102419997, "learning_rate": 1.6343245661102031e-06, "loss": 0.6815, "step": 9148 }, { "epoch": 0.74, "grad_norm": 5.196170975003428, "learning_rate": 1.6333519522358416e-06, "loss": 0.701, "step": 9149 }, { "epoch": 0.74, "grad_norm": 3.334316168853089, "learning_rate": 1.6323795713547208e-06, "loss": 0.7045, "step": 9150 }, { "epoch": 0.74, "grad_norm": 22.819685436074494, "learning_rate": 1.6314074235341403e-06, "loss": 0.659, "step": 9151 }, { "epoch": 0.74, "grad_norm": 5.540659253063088, "learning_rate": 1.6304355088413747e-06, "loss": 0.7287, "step": 9152 }, { "epoch": 0.74, "grad_norm": 4.687392666501798, "learning_rate": 1.6294638273436902e-06, "loss": 0.6199, "step": 9153 }, { "epoch": 0.74, "grad_norm": 7.664401345340307, "learning_rate": 1.6284923791083312e-06, "loss": 0.7385, "step": 9154 }, { "epoch": 0.74, "grad_norm": 12.026549945268178, "learning_rate": 1.6275211642025285e-06, "loss": 0.6975, "step": 9155 }, { "epoch": 0.74, "grad_norm": 5.861625300862647, "learning_rate": 1.6265501826934959e-06, "loss": 0.7269, "step": 9156 }, { "epoch": 0.74, "grad_norm": 4.152682572236951, "learning_rate": 1.6255794346484305e-06, "loss": 0.621, "step": 9157 }, { "epoch": 0.74, "grad_norm": 9.234341237890744, "learning_rate": 1.6246089201345167e-06, "loss": 0.7241, "step": 9158 }, { "epoch": 0.74, "grad_norm": 8.769690963293272, "learning_rate": 1.6236386392189175e-06, "loss": 0.7735, "step": 9159 }, { "epoch": 0.74, "grad_norm": 3.861439507922802, "learning_rate": 1.622668591968785e-06, "loss": 0.6127, "step": 9160 }, { "epoch": 0.74, "grad_norm": 4.405117975854151, "learning_rate": 1.6216987784512512e-06, "loss": 0.7164, "step": 9161 }, { "epoch": 0.74, "grad_norm": 6.999821958661862, "learning_rate": 1.620729198733434e-06, "loss": 0.7372, "step": 9162 }, { "epoch": 0.74, "grad_norm": 4.234094529083211, "learning_rate": 1.6197598528824338e-06, "loss": 0.5409, "step": 9163 }, { "epoch": 0.74, "grad_norm": 3.923997652612684, "learning_rate": 1.6187907409653335e-06, "loss": 0.6248, "step": 9164 }, { "epoch": 0.74, "grad_norm": 4.964865302354988, "learning_rate": 1.617821863049206e-06, "loss": 0.6974, "step": 9165 }, { "epoch": 0.74, "grad_norm": 5.090789297409314, "learning_rate": 1.6168532192010993e-06, "loss": 0.7865, "step": 9166 }, { "epoch": 0.74, "grad_norm": 7.018303851147221, "learning_rate": 1.6158848094880535e-06, "loss": 0.5518, "step": 9167 }, { "epoch": 0.74, "grad_norm": 3.8169427169205816, "learning_rate": 1.6149166339770877e-06, "loss": 0.6302, "step": 9168 }, { "epoch": 0.74, "grad_norm": 7.209963653850187, "learning_rate": 1.6139486927352048e-06, "loss": 0.6847, "step": 9169 }, { "epoch": 0.74, "grad_norm": 7.226084564230448, "learning_rate": 1.6129809858293926e-06, "loss": 0.6822, "step": 9170 }, { "epoch": 0.74, "grad_norm": 3.655494651526364, "learning_rate": 1.6120135133266208e-06, "loss": 0.6129, "step": 9171 }, { "epoch": 0.74, "grad_norm": 3.5918371148134525, "learning_rate": 1.6110462752938482e-06, "loss": 0.6865, "step": 9172 }, { "epoch": 0.75, "grad_norm": 5.490431937751609, "learning_rate": 1.6100792717980106e-06, "loss": 0.7239, "step": 9173 }, { "epoch": 0.75, "grad_norm": 3.684896562475687, "learning_rate": 1.6091125029060335e-06, "loss": 0.5958, "step": 9174 }, { "epoch": 0.75, "grad_norm": 13.42468390490575, "learning_rate": 1.6081459686848217e-06, "loss": 0.7426, "step": 9175 }, { "epoch": 0.75, "grad_norm": 5.73150237237512, "learning_rate": 1.6071796692012663e-06, "loss": 0.844, "step": 9176 }, { "epoch": 0.75, "grad_norm": 5.643259136436941, "learning_rate": 1.6062136045222388e-06, "loss": 0.665, "step": 9177 }, { "epoch": 0.75, "grad_norm": 3.333573845412961, "learning_rate": 1.6052477747146006e-06, "loss": 0.637, "step": 9178 }, { "epoch": 0.75, "grad_norm": 5.2401039201135875, "learning_rate": 1.6042821798451914e-06, "loss": 0.6692, "step": 9179 }, { "epoch": 0.75, "grad_norm": 7.129691568387476, "learning_rate": 1.6033168199808352e-06, "loss": 0.7452, "step": 9180 }, { "epoch": 0.75, "grad_norm": 4.120948592788868, "learning_rate": 1.6023516951883455e-06, "loss": 0.7169, "step": 9181 }, { "epoch": 0.75, "grad_norm": 7.693447296054683, "learning_rate": 1.6013868055345084e-06, "loss": 0.5477, "step": 9182 }, { "epoch": 0.75, "grad_norm": 4.259877268147181, "learning_rate": 1.6004221510861057e-06, "loss": 0.5187, "step": 9183 }, { "epoch": 0.75, "grad_norm": 7.118176312147558, "learning_rate": 1.5994577319098936e-06, "loss": 0.7644, "step": 9184 }, { "epoch": 0.75, "grad_norm": 6.93585007449809, "learning_rate": 1.5984935480726199e-06, "loss": 0.729, "step": 9185 }, { "epoch": 0.75, "grad_norm": 4.977732233558026, "learning_rate": 1.5975295996410107e-06, "loss": 0.6384, "step": 9186 }, { "epoch": 0.75, "grad_norm": 4.80102309224025, "learning_rate": 1.5965658866817751e-06, "loss": 0.6703, "step": 9187 }, { "epoch": 0.75, "grad_norm": 3.945065652565553, "learning_rate": 1.5956024092616129e-06, "loss": 0.6165, "step": 9188 }, { "epoch": 0.75, "grad_norm": 3.046224525980792, "learning_rate": 1.5946391674471968e-06, "loss": 0.4656, "step": 9189 }, { "epoch": 0.75, "grad_norm": 3.97849703743684, "learning_rate": 1.5936761613051937e-06, "loss": 0.6367, "step": 9190 }, { "epoch": 0.75, "grad_norm": 7.265006225011878, "learning_rate": 1.5927133909022469e-06, "loss": 0.6929, "step": 9191 }, { "epoch": 0.75, "grad_norm": 6.517950305423383, "learning_rate": 1.5917508563049888e-06, "loss": 0.7377, "step": 9192 }, { "epoch": 0.75, "grad_norm": 2.6027746513511745, "learning_rate": 1.5907885575800318e-06, "loss": 0.6322, "step": 9193 }, { "epoch": 0.75, "grad_norm": 3.5160709055603894, "learning_rate": 1.5898264947939729e-06, "loss": 0.7594, "step": 9194 }, { "epoch": 0.75, "grad_norm": 6.451420791104515, "learning_rate": 1.5888646680133923e-06, "loss": 0.4824, "step": 9195 }, { "epoch": 0.75, "grad_norm": 4.678427133251209, "learning_rate": 1.5879030773048536e-06, "loss": 0.61, "step": 9196 }, { "epoch": 0.75, "grad_norm": 5.212637963787614, "learning_rate": 1.5869417227349077e-06, "loss": 0.8648, "step": 9197 }, { "epoch": 0.75, "grad_norm": 2.9362544649209155, "learning_rate": 1.5859806043700838e-06, "loss": 0.5822, "step": 9198 }, { "epoch": 0.75, "grad_norm": 4.05145130873793, "learning_rate": 1.5850197222768998e-06, "loss": 0.6556, "step": 9199 }, { "epoch": 0.75, "grad_norm": 18.3590440407811, "learning_rate": 1.5840590765218538e-06, "loss": 0.5171, "step": 9200 }, { "epoch": 0.75, "grad_norm": 2.595370668199203, "learning_rate": 1.5830986671714283e-06, "loss": 0.6932, "step": 9201 }, { "epoch": 0.75, "grad_norm": 3.7085766653583896, "learning_rate": 1.5821384942920876e-06, "loss": 0.7209, "step": 9202 }, { "epoch": 0.75, "grad_norm": 4.257791650298892, "learning_rate": 1.5811785579502852e-06, "loss": 0.5832, "step": 9203 }, { "epoch": 0.75, "grad_norm": 4.921515809682467, "learning_rate": 1.580218858212454e-06, "loss": 0.6732, "step": 9204 }, { "epoch": 0.75, "grad_norm": 4.387574225923554, "learning_rate": 1.5792593951450085e-06, "loss": 0.6015, "step": 9205 }, { "epoch": 0.75, "grad_norm": 3.988231355404521, "learning_rate": 1.578300168814353e-06, "loss": 0.8296, "step": 9206 }, { "epoch": 0.75, "grad_norm": 4.9590633483187805, "learning_rate": 1.5773411792868692e-06, "loss": 0.7607, "step": 9207 }, { "epoch": 0.75, "grad_norm": 3.2155625784212076, "learning_rate": 1.57638242662893e-06, "loss": 0.8566, "step": 9208 }, { "epoch": 0.75, "grad_norm": 3.875293183795946, "learning_rate": 1.5754239109068804e-06, "loss": 0.6667, "step": 9209 }, { "epoch": 0.75, "grad_norm": 4.518434355778084, "learning_rate": 1.574465632187061e-06, "loss": 0.7105, "step": 9210 }, { "epoch": 0.75, "grad_norm": 4.092580564897229, "learning_rate": 1.5735075905357882e-06, "loss": 0.5526, "step": 9211 }, { "epoch": 0.75, "grad_norm": 4.659085709354241, "learning_rate": 1.572549786019364e-06, "loss": 0.5401, "step": 9212 }, { "epoch": 0.75, "grad_norm": 4.908088813060428, "learning_rate": 1.5715922187040771e-06, "loss": 0.5642, "step": 9213 }, { "epoch": 0.75, "grad_norm": 7.16254994324948, "learning_rate": 1.5706348886561955e-06, "loss": 0.7392, "step": 9214 }, { "epoch": 0.75, "grad_norm": 17.269429750689987, "learning_rate": 1.5696777959419729e-06, "loss": 0.6543, "step": 9215 }, { "epoch": 0.75, "grad_norm": 3.385640826487743, "learning_rate": 1.5687209406276443e-06, "loss": 0.6619, "step": 9216 }, { "epoch": 0.75, "grad_norm": 4.856734977688676, "learning_rate": 1.5677643227794332e-06, "loss": 0.6164, "step": 9217 }, { "epoch": 0.75, "grad_norm": 9.699678129090218, "learning_rate": 1.5668079424635424e-06, "loss": 0.5915, "step": 9218 }, { "epoch": 0.75, "grad_norm": 6.769990278525421, "learning_rate": 1.565851799746157e-06, "loss": 0.6369, "step": 9219 }, { "epoch": 0.75, "grad_norm": 5.307646279136852, "learning_rate": 1.5648958946934523e-06, "loss": 0.6575, "step": 9220 }, { "epoch": 0.75, "grad_norm": 3.976543767937419, "learning_rate": 1.563940227371581e-06, "loss": 0.6661, "step": 9221 }, { "epoch": 0.75, "grad_norm": 5.2934098774266785, "learning_rate": 1.5629847978466805e-06, "loss": 0.5588, "step": 9222 }, { "epoch": 0.75, "grad_norm": 3.8612276933948975, "learning_rate": 1.5620296061848722e-06, "loss": 0.5009, "step": 9223 }, { "epoch": 0.75, "grad_norm": 3.927145591459146, "learning_rate": 1.561074652452264e-06, "loss": 0.6675, "step": 9224 }, { "epoch": 0.75, "grad_norm": 3.3608913380276064, "learning_rate": 1.5601199367149432e-06, "loss": 0.6538, "step": 9225 }, { "epoch": 0.75, "grad_norm": 3.59653933875556, "learning_rate": 1.5591654590389798e-06, "loss": 0.6122, "step": 9226 }, { "epoch": 0.75, "grad_norm": 4.250110997902879, "learning_rate": 1.558211219490434e-06, "loss": 0.7701, "step": 9227 }, { "epoch": 0.75, "grad_norm": 8.47222757288281, "learning_rate": 1.5572572181353435e-06, "loss": 0.7227, "step": 9228 }, { "epoch": 0.75, "grad_norm": 4.29823036502934, "learning_rate": 1.5563034550397305e-06, "loss": 0.6022, "step": 9229 }, { "epoch": 0.75, "grad_norm": 8.753653718420093, "learning_rate": 1.5553499302695996e-06, "loss": 0.5702, "step": 9230 }, { "epoch": 0.75, "grad_norm": 6.82887321416608, "learning_rate": 1.5543966438909451e-06, "loss": 0.6388, "step": 9231 }, { "epoch": 0.75, "grad_norm": 14.36021441245323, "learning_rate": 1.5534435959697363e-06, "loss": 0.5621, "step": 9232 }, { "epoch": 0.75, "grad_norm": 5.02160843440359, "learning_rate": 1.5524907865719336e-06, "loss": 0.6572, "step": 9233 }, { "epoch": 0.75, "grad_norm": 9.689530491544076, "learning_rate": 1.5515382157634756e-06, "loss": 0.6774, "step": 9234 }, { "epoch": 0.75, "grad_norm": 2.745878161202803, "learning_rate": 1.5505858836102866e-06, "loss": 0.653, "step": 9235 }, { "epoch": 0.75, "grad_norm": 10.68215910521105, "learning_rate": 1.5496337901782737e-06, "loss": 0.7023, "step": 9236 }, { "epoch": 0.75, "grad_norm": 10.267370816841499, "learning_rate": 1.548681935533326e-06, "loss": 0.5201, "step": 9237 }, { "epoch": 0.75, "grad_norm": 5.03302229731131, "learning_rate": 1.5477303197413213e-06, "loss": 0.748, "step": 9238 }, { "epoch": 0.75, "grad_norm": 3.0474537566442126, "learning_rate": 1.5467789428681145e-06, "loss": 0.7339, "step": 9239 }, { "epoch": 0.75, "grad_norm": 4.320054887296264, "learning_rate": 1.5458278049795495e-06, "loss": 0.7397, "step": 9240 }, { "epoch": 0.75, "grad_norm": 7.302954571483545, "learning_rate": 1.5448769061414497e-06, "loss": 0.8012, "step": 9241 }, { "epoch": 0.75, "grad_norm": 3.0207065053807693, "learning_rate": 1.5439262464196236e-06, "loss": 0.5929, "step": 9242 }, { "epoch": 0.75, "grad_norm": 13.230253778180412, "learning_rate": 1.5429758258798622e-06, "loss": 0.5731, "step": 9243 }, { "epoch": 0.75, "grad_norm": 6.959799588574407, "learning_rate": 1.542025644587939e-06, "loss": 0.5715, "step": 9244 }, { "epoch": 0.75, "grad_norm": 3.257006355714011, "learning_rate": 1.5410757026096163e-06, "loss": 0.6638, "step": 9245 }, { "epoch": 0.75, "grad_norm": 8.227118253275556, "learning_rate": 1.5401260000106321e-06, "loss": 0.6396, "step": 9246 }, { "epoch": 0.75, "grad_norm": 5.811584241571735, "learning_rate": 1.5391765368567173e-06, "loss": 0.7566, "step": 9247 }, { "epoch": 0.75, "grad_norm": 5.569791300293353, "learning_rate": 1.5382273132135745e-06, "loss": 0.7143, "step": 9248 }, { "epoch": 0.75, "grad_norm": 10.15907716062236, "learning_rate": 1.5372783291469002e-06, "loss": 0.7944, "step": 9249 }, { "epoch": 0.75, "grad_norm": 4.704889215605705, "learning_rate": 1.5363295847223685e-06, "loss": 0.6312, "step": 9250 }, { "epoch": 0.75, "grad_norm": 6.157170630343912, "learning_rate": 1.5353810800056367e-06, "loss": 0.7463, "step": 9251 }, { "epoch": 0.75, "grad_norm": 5.111424047629009, "learning_rate": 1.5344328150623516e-06, "loss": 0.8069, "step": 9252 }, { "epoch": 0.75, "grad_norm": 3.4527324622912072, "learning_rate": 1.5334847899581344e-06, "loss": 0.6963, "step": 9253 }, { "epoch": 0.75, "grad_norm": 4.066958643787716, "learning_rate": 1.5325370047586003e-06, "loss": 0.6095, "step": 9254 }, { "epoch": 0.75, "grad_norm": 7.533251528204885, "learning_rate": 1.531589459529335e-06, "loss": 0.5975, "step": 9255 }, { "epoch": 0.75, "grad_norm": 4.161754736588987, "learning_rate": 1.5306421543359195e-06, "loss": 0.62, "step": 9256 }, { "epoch": 0.75, "grad_norm": 3.302096731690723, "learning_rate": 1.5296950892439106e-06, "loss": 0.7082, "step": 9257 }, { "epoch": 0.75, "grad_norm": 6.289009720198772, "learning_rate": 1.528748264318854e-06, "loss": 0.6463, "step": 9258 }, { "epoch": 0.75, "grad_norm": 27.717254730586927, "learning_rate": 1.527801679626274e-06, "loss": 0.6503, "step": 9259 }, { "epoch": 0.75, "grad_norm": 3.824479183718752, "learning_rate": 1.526855335231679e-06, "loss": 0.7478, "step": 9260 }, { "epoch": 0.75, "grad_norm": 3.5740542654717222, "learning_rate": 1.5259092312005668e-06, "loss": 0.8412, "step": 9261 }, { "epoch": 0.75, "grad_norm": 4.253259984536387, "learning_rate": 1.5249633675984072e-06, "loss": 0.5094, "step": 9262 }, { "epoch": 0.75, "grad_norm": 129.67896132521813, "learning_rate": 1.5240177444906651e-06, "loss": 0.6777, "step": 9263 }, { "epoch": 0.75, "grad_norm": 7.472854077658867, "learning_rate": 1.5230723619427795e-06, "loss": 0.7814, "step": 9264 }, { "epoch": 0.75, "grad_norm": 3.1834672142842084, "learning_rate": 1.5221272200201808e-06, "loss": 0.7614, "step": 9265 }, { "epoch": 0.75, "grad_norm": 3.8019906331171027, "learning_rate": 1.5211823187882774e-06, "loss": 0.7666, "step": 9266 }, { "epoch": 0.75, "grad_norm": 4.466781684155773, "learning_rate": 1.5202376583124617e-06, "loss": 0.7644, "step": 9267 }, { "epoch": 0.75, "grad_norm": 5.509186117026691, "learning_rate": 1.5192932386581105e-06, "loss": 0.7377, "step": 9268 }, { "epoch": 0.75, "grad_norm": 5.506211265222116, "learning_rate": 1.5183490598905814e-06, "loss": 0.615, "step": 9269 }, { "epoch": 0.75, "grad_norm": 3.649314717925277, "learning_rate": 1.5174051220752216e-06, "loss": 0.6686, "step": 9270 }, { "epoch": 0.75, "grad_norm": 4.648718334978951, "learning_rate": 1.5164614252773545e-06, "loss": 0.7374, "step": 9271 }, { "epoch": 0.75, "grad_norm": 6.535012781862225, "learning_rate": 1.5155179695622918e-06, "loss": 0.6309, "step": 9272 }, { "epoch": 0.75, "grad_norm": 3.9433851342779307, "learning_rate": 1.514574754995326e-06, "loss": 0.4907, "step": 9273 }, { "epoch": 0.75, "grad_norm": 6.350863966303147, "learning_rate": 1.5136317816417333e-06, "loss": 0.7643, "step": 9274 }, { "epoch": 0.75, "grad_norm": 4.894091796859776, "learning_rate": 1.5126890495667734e-06, "loss": 0.6533, "step": 9275 }, { "epoch": 0.75, "grad_norm": 7.639719974319914, "learning_rate": 1.5117465588356871e-06, "loss": 0.8022, "step": 9276 }, { "epoch": 0.75, "grad_norm": 4.347682022479323, "learning_rate": 1.5108043095137048e-06, "loss": 0.6198, "step": 9277 }, { "epoch": 0.75, "grad_norm": 3.191939686103781, "learning_rate": 1.5098623016660325e-06, "loss": 0.5919, "step": 9278 }, { "epoch": 0.75, "grad_norm": 5.20371465162756, "learning_rate": 1.5089205353578663e-06, "loss": 0.5907, "step": 9279 }, { "epoch": 0.75, "grad_norm": 2.855046168317262, "learning_rate": 1.507979010654379e-06, "loss": 0.6376, "step": 9280 }, { "epoch": 0.75, "grad_norm": 5.142402084139453, "learning_rate": 1.5070377276207348e-06, "loss": 0.6523, "step": 9281 }, { "epoch": 0.75, "grad_norm": 4.838668671451622, "learning_rate": 1.50609668632207e-06, "loss": 0.5614, "step": 9282 }, { "epoch": 0.75, "grad_norm": 3.2125028433098923, "learning_rate": 1.505155886823516e-06, "loss": 0.7854, "step": 9283 }, { "epoch": 0.75, "grad_norm": 3.4174997687799715, "learning_rate": 1.5042153291901796e-06, "loss": 0.609, "step": 9284 }, { "epoch": 0.75, "grad_norm": 3.845669736688184, "learning_rate": 1.5032750134871527e-06, "loss": 0.7142, "step": 9285 }, { "epoch": 0.75, "grad_norm": 3.7994916286810474, "learning_rate": 1.5023349397795128e-06, "loss": 0.7153, "step": 9286 }, { "epoch": 0.75, "grad_norm": 3.235914159338443, "learning_rate": 1.5013951081323186e-06, "loss": 0.6226, "step": 9287 }, { "epoch": 0.75, "grad_norm": 4.123545209455686, "learning_rate": 1.5004555186106124e-06, "loss": 0.692, "step": 9288 }, { "epoch": 0.75, "grad_norm": 3.2544455607248937, "learning_rate": 1.499516171279417e-06, "loss": 0.6108, "step": 9289 }, { "epoch": 0.75, "grad_norm": 4.435548968747164, "learning_rate": 1.4985770662037453e-06, "loss": 0.5999, "step": 9290 }, { "epoch": 0.75, "grad_norm": 3.816375521089074, "learning_rate": 1.4976382034485876e-06, "loss": 0.6891, "step": 9291 }, { "epoch": 0.75, "grad_norm": 3.9966555691748056, "learning_rate": 1.4966995830789167e-06, "loss": 0.8782, "step": 9292 }, { "epoch": 0.75, "grad_norm": 7.4538069667697195, "learning_rate": 1.4957612051596953e-06, "loss": 0.6575, "step": 9293 }, { "epoch": 0.75, "grad_norm": 4.935559227840784, "learning_rate": 1.494823069755863e-06, "loss": 0.7139, "step": 9294 }, { "epoch": 0.75, "grad_norm": 3.0903064557130886, "learning_rate": 1.4938851769323449e-06, "loss": 0.6205, "step": 9295 }, { "epoch": 0.76, "grad_norm": 5.808422615849854, "learning_rate": 1.4929475267540467e-06, "loss": 0.8061, "step": 9296 }, { "epoch": 0.76, "grad_norm": 6.791235467919618, "learning_rate": 1.4920101192858637e-06, "loss": 0.7637, "step": 9297 }, { "epoch": 0.76, "grad_norm": 3.683634453110716, "learning_rate": 1.4910729545926689e-06, "loss": 0.7237, "step": 9298 }, { "epoch": 0.76, "grad_norm": 3.622448925006723, "learning_rate": 1.4901360327393177e-06, "loss": 0.5661, "step": 9299 }, { "epoch": 0.76, "grad_norm": 4.695114106657661, "learning_rate": 1.4891993537906563e-06, "loss": 0.6312, "step": 9300 }, { "epoch": 0.76, "grad_norm": 3.812612347237318, "learning_rate": 1.488262917811502e-06, "loss": 0.6054, "step": 9301 }, { "epoch": 0.76, "grad_norm": 7.009663055766406, "learning_rate": 1.487326724866668e-06, "loss": 0.6748, "step": 9302 }, { "epoch": 0.76, "grad_norm": 6.083384332947613, "learning_rate": 1.4863907750209399e-06, "loss": 0.6129, "step": 9303 }, { "epoch": 0.76, "grad_norm": 8.3833174880032, "learning_rate": 1.485455068339095e-06, "loss": 0.7168, "step": 9304 }, { "epoch": 0.76, "grad_norm": 2.913862213041282, "learning_rate": 1.484519604885888e-06, "loss": 0.7563, "step": 9305 }, { "epoch": 0.76, "grad_norm": 13.314362639088515, "learning_rate": 1.4835843847260605e-06, "loss": 0.552, "step": 9306 }, { "epoch": 0.76, "grad_norm": 3.4298889728525794, "learning_rate": 1.4826494079243353e-06, "loss": 0.7031, "step": 9307 }, { "epoch": 0.76, "grad_norm": 4.877332375157771, "learning_rate": 1.4817146745454174e-06, "loss": 0.7611, "step": 9308 }, { "epoch": 0.76, "grad_norm": 3.6126484827195955, "learning_rate": 1.4807801846539977e-06, "loss": 0.7667, "step": 9309 }, { "epoch": 0.76, "grad_norm": 6.170541528714656, "learning_rate": 1.4798459383147462e-06, "loss": 0.7008, "step": 9310 }, { "epoch": 0.76, "grad_norm": 3.407821872231295, "learning_rate": 1.4789119355923227e-06, "loss": 0.759, "step": 9311 }, { "epoch": 0.76, "grad_norm": 7.012052171750457, "learning_rate": 1.4779781765513612e-06, "loss": 0.6111, "step": 9312 }, { "epoch": 0.76, "grad_norm": 18.56320732367359, "learning_rate": 1.4770446612564887e-06, "loss": 0.7102, "step": 9313 }, { "epoch": 0.76, "grad_norm": 4.955881004437763, "learning_rate": 1.4761113897723078e-06, "loss": 0.5928, "step": 9314 }, { "epoch": 0.76, "grad_norm": 4.730250532559149, "learning_rate": 1.475178362163407e-06, "loss": 0.6538, "step": 9315 }, { "epoch": 0.76, "grad_norm": 4.7496379987671595, "learning_rate": 1.4742455784943576e-06, "loss": 0.7231, "step": 9316 }, { "epoch": 0.76, "grad_norm": 11.364472325159019, "learning_rate": 1.4733130388297124e-06, "loss": 0.5832, "step": 9317 }, { "epoch": 0.76, "grad_norm": 4.457145344889489, "learning_rate": 1.4723807432340125e-06, "loss": 0.7458, "step": 9318 }, { "epoch": 0.76, "grad_norm": 6.800535402176218, "learning_rate": 1.4714486917717753e-06, "loss": 0.7445, "step": 9319 }, { "epoch": 0.76, "grad_norm": 3.714306216479424, "learning_rate": 1.4705168845075095e-06, "loss": 0.6098, "step": 9320 }, { "epoch": 0.76, "grad_norm": 5.306299729887863, "learning_rate": 1.4695853215056955e-06, "loss": 0.6536, "step": 9321 }, { "epoch": 0.76, "grad_norm": 4.20643848952529, "learning_rate": 1.4686540028308083e-06, "loss": 0.7495, "step": 9322 }, { "epoch": 0.76, "grad_norm": 3.6561084069340892, "learning_rate": 1.4677229285472988e-06, "loss": 0.6338, "step": 9323 }, { "epoch": 0.76, "grad_norm": 4.265606389719064, "learning_rate": 1.4667920987196028e-06, "loss": 0.6615, "step": 9324 }, { "epoch": 0.76, "grad_norm": 3.028822361190528, "learning_rate": 1.4658615134121417e-06, "loss": 0.7723, "step": 9325 }, { "epoch": 0.76, "grad_norm": 5.743626541381983, "learning_rate": 1.4649311726893151e-06, "loss": 0.7238, "step": 9326 }, { "epoch": 0.76, "grad_norm": 4.676897935008805, "learning_rate": 1.4640010766155128e-06, "loss": 0.6896, "step": 9327 }, { "epoch": 0.76, "grad_norm": 15.156779956523764, "learning_rate": 1.4630712252550977e-06, "loss": 0.6965, "step": 9328 }, { "epoch": 0.76, "grad_norm": 4.01535544773294, "learning_rate": 1.4621416186724257e-06, "loss": 0.604, "step": 9329 }, { "epoch": 0.76, "grad_norm": 5.867567225253043, "learning_rate": 1.4612122569318282e-06, "loss": 0.5211, "step": 9330 }, { "epoch": 0.76, "grad_norm": 3.968299325985637, "learning_rate": 1.4602831400976263e-06, "loss": 0.6325, "step": 9331 }, { "epoch": 0.76, "grad_norm": 5.332360261243705, "learning_rate": 1.4593542682341193e-06, "loss": 0.6537, "step": 9332 }, { "epoch": 0.76, "grad_norm": 5.237917683230502, "learning_rate": 1.4584256414055886e-06, "loss": 0.5422, "step": 9333 }, { "epoch": 0.76, "grad_norm": 3.7038639417554093, "learning_rate": 1.4574972596763066e-06, "loss": 0.7572, "step": 9334 }, { "epoch": 0.76, "grad_norm": 4.366763674415903, "learning_rate": 1.456569123110516e-06, "loss": 0.5537, "step": 9335 }, { "epoch": 0.76, "grad_norm": 4.048691976731345, "learning_rate": 1.4556412317724556e-06, "loss": 0.6057, "step": 9336 }, { "epoch": 0.76, "grad_norm": 3.1770592548547096, "learning_rate": 1.4547135857263372e-06, "loss": 0.8133, "step": 9337 }, { "epoch": 0.76, "grad_norm": 4.273052178917577, "learning_rate": 1.4537861850363633e-06, "loss": 0.6866, "step": 9338 }, { "epoch": 0.76, "grad_norm": 7.993579487150804, "learning_rate": 1.452859029766714e-06, "loss": 0.5804, "step": 9339 }, { "epoch": 0.76, "grad_norm": 3.5101684811171205, "learning_rate": 1.4519321199815544e-06, "loss": 0.6341, "step": 9340 }, { "epoch": 0.76, "grad_norm": 3.7801582414463124, "learning_rate": 1.4510054557450332e-06, "loss": 0.6836, "step": 9341 }, { "epoch": 0.76, "grad_norm": 4.788131555611861, "learning_rate": 1.4500790371212786e-06, "loss": 0.5263, "step": 9342 }, { "epoch": 0.76, "grad_norm": 4.148113292645213, "learning_rate": 1.4491528641744085e-06, "loss": 0.6957, "step": 9343 }, { "epoch": 0.76, "grad_norm": 5.82267111825369, "learning_rate": 1.448226936968517e-06, "loss": 0.5848, "step": 9344 }, { "epoch": 0.76, "grad_norm": 3.306365422177082, "learning_rate": 1.4473012555676862e-06, "loss": 0.698, "step": 9345 }, { "epoch": 0.76, "grad_norm": 3.9184169230978316, "learning_rate": 1.4463758200359783e-06, "loss": 0.7738, "step": 9346 }, { "epoch": 0.76, "grad_norm": 3.9343299528086617, "learning_rate": 1.4454506304374394e-06, "loss": 0.6179, "step": 9347 }, { "epoch": 0.76, "grad_norm": 3.8637123397143474, "learning_rate": 1.4445256868360979e-06, "loss": 0.5942, "step": 9348 }, { "epoch": 0.76, "grad_norm": 6.378754228020261, "learning_rate": 1.4436009892959647e-06, "loss": 0.5277, "step": 9349 }, { "epoch": 0.76, "grad_norm": 5.988587805054817, "learning_rate": 1.4426765378810376e-06, "loss": 0.6473, "step": 9350 }, { "epoch": 0.76, "grad_norm": 7.990524893043351, "learning_rate": 1.4417523326552911e-06, "loss": 0.7076, "step": 9351 }, { "epoch": 0.76, "grad_norm": 3.8324272212929333, "learning_rate": 1.4408283736826894e-06, "loss": 0.7753, "step": 9352 }, { "epoch": 0.76, "grad_norm": 3.045654808839438, "learning_rate": 1.4399046610271726e-06, "loss": 0.6739, "step": 9353 }, { "epoch": 0.76, "grad_norm": 5.6403461302166225, "learning_rate": 1.4389811947526733e-06, "loss": 0.5497, "step": 9354 }, { "epoch": 0.76, "grad_norm": 6.415072690864279, "learning_rate": 1.4380579749230938e-06, "loss": 0.689, "step": 9355 }, { "epoch": 0.76, "grad_norm": 7.0265031471379595, "learning_rate": 1.4371350016023323e-06, "loss": 0.6661, "step": 9356 }, { "epoch": 0.76, "grad_norm": 4.259754088083147, "learning_rate": 1.4362122748542617e-06, "loss": 0.7027, "step": 9357 }, { "epoch": 0.76, "grad_norm": 5.793872018476198, "learning_rate": 1.4352897947427396e-06, "loss": 0.7106, "step": 9358 }, { "epoch": 0.76, "grad_norm": 3.24953483362668, "learning_rate": 1.434367561331611e-06, "loss": 0.49, "step": 9359 }, { "epoch": 0.76, "grad_norm": 5.098133111989722, "learning_rate": 1.433445574684698e-06, "loss": 0.6826, "step": 9360 }, { "epoch": 0.76, "grad_norm": 3.3460340922132232, "learning_rate": 1.4325238348658082e-06, "loss": 0.6714, "step": 9361 }, { "epoch": 0.76, "grad_norm": 3.890265257009263, "learning_rate": 1.4316023419387303e-06, "loss": 0.6585, "step": 9362 }, { "epoch": 0.76, "grad_norm": 5.057762003928424, "learning_rate": 1.43068109596724e-06, "loss": 0.7342, "step": 9363 }, { "epoch": 0.76, "grad_norm": 3.427564669345006, "learning_rate": 1.4297600970150927e-06, "loss": 0.6393, "step": 9364 }, { "epoch": 0.76, "grad_norm": 2.6128378002813206, "learning_rate": 1.4288393451460248e-06, "loss": 0.6295, "step": 9365 }, { "epoch": 0.76, "grad_norm": 7.0161774381701125, "learning_rate": 1.4279188404237615e-06, "loss": 0.6829, "step": 9366 }, { "epoch": 0.76, "grad_norm": 3.1804544126040577, "learning_rate": 1.4269985829120065e-06, "loss": 0.6144, "step": 9367 }, { "epoch": 0.76, "grad_norm": 11.214863897223797, "learning_rate": 1.426078572674447e-06, "loss": 0.5799, "step": 9368 }, { "epoch": 0.76, "grad_norm": 25.30369612817867, "learning_rate": 1.4251588097747515e-06, "loss": 0.6735, "step": 9369 }, { "epoch": 0.76, "grad_norm": 3.630466790980653, "learning_rate": 1.4242392942765775e-06, "loss": 0.675, "step": 9370 }, { "epoch": 0.76, "grad_norm": 6.3586736554188095, "learning_rate": 1.4233200262435592e-06, "loss": 0.712, "step": 9371 }, { "epoch": 0.76, "grad_norm": 5.179197510191876, "learning_rate": 1.422401005739314e-06, "loss": 0.5815, "step": 9372 }, { "epoch": 0.76, "grad_norm": 5.113403459690238, "learning_rate": 1.4214822328274485e-06, "loss": 0.6101, "step": 9373 }, { "epoch": 0.76, "grad_norm": 2.9333260586841323, "learning_rate": 1.4205637075715418e-06, "loss": 0.7115, "step": 9374 }, { "epoch": 0.76, "grad_norm": 6.9652998235247345, "learning_rate": 1.4196454300351665e-06, "loss": 0.7521, "step": 9375 }, { "epoch": 0.76, "grad_norm": 3.619509654471619, "learning_rate": 1.418727400281869e-06, "loss": 0.5868, "step": 9376 }, { "epoch": 0.76, "grad_norm": 5.222293923131044, "learning_rate": 1.4178096183751866e-06, "loss": 0.6987, "step": 9377 }, { "epoch": 0.76, "grad_norm": 6.382648874908834, "learning_rate": 1.4168920843786326e-06, "loss": 0.7036, "step": 9378 }, { "epoch": 0.76, "grad_norm": 2.6352075156029686, "learning_rate": 1.4159747983557093e-06, "loss": 0.5236, "step": 9379 }, { "epoch": 0.76, "grad_norm": 2.066683403265199, "learning_rate": 1.4150577603698962e-06, "loss": 0.5709, "step": 9380 }, { "epoch": 0.76, "grad_norm": 6.5213741077241805, "learning_rate": 1.4141409704846592e-06, "loss": 0.5405, "step": 9381 }, { "epoch": 0.76, "grad_norm": 3.637692591390936, "learning_rate": 1.4132244287634456e-06, "loss": 0.6722, "step": 9382 }, { "epoch": 0.76, "grad_norm": 3.6366763484426174, "learning_rate": 1.4123081352696838e-06, "loss": 0.586, "step": 9383 }, { "epoch": 0.76, "grad_norm": 2.773732006360121, "learning_rate": 1.4113920900667905e-06, "loss": 0.5606, "step": 9384 }, { "epoch": 0.76, "grad_norm": 3.187365333462406, "learning_rate": 1.4104762932181592e-06, "loss": 0.6227, "step": 9385 }, { "epoch": 0.76, "grad_norm": 2.1990589750044864, "learning_rate": 1.4095607447871711e-06, "loss": 0.7466, "step": 9386 }, { "epoch": 0.76, "grad_norm": 3.849252959376761, "learning_rate": 1.4086454448371873e-06, "loss": 0.6404, "step": 9387 }, { "epoch": 0.76, "grad_norm": 4.294010139524278, "learning_rate": 1.4077303934315511e-06, "loss": 0.6122, "step": 9388 }, { "epoch": 0.76, "grad_norm": 4.0210574665377194, "learning_rate": 1.4068155906335906e-06, "loss": 0.6887, "step": 9389 }, { "epoch": 0.76, "grad_norm": 5.575880392902406, "learning_rate": 1.4059010365066145e-06, "loss": 0.6074, "step": 9390 }, { "epoch": 0.76, "grad_norm": 3.9841927375665755, "learning_rate": 1.4049867311139182e-06, "loss": 0.7425, "step": 9391 }, { "epoch": 0.76, "grad_norm": 7.987163491207309, "learning_rate": 1.4040726745187749e-06, "loss": 0.7611, "step": 9392 }, { "epoch": 0.76, "grad_norm": 3.913856026919917, "learning_rate": 1.4031588667844476e-06, "loss": 0.6164, "step": 9393 }, { "epoch": 0.76, "grad_norm": 7.953483679783007, "learning_rate": 1.402245307974171e-06, "loss": 0.6473, "step": 9394 }, { "epoch": 0.76, "grad_norm": 7.532476624848864, "learning_rate": 1.4013319981511736e-06, "loss": 0.7298, "step": 9395 }, { "epoch": 0.76, "grad_norm": 4.20155485111007, "learning_rate": 1.4004189373786614e-06, "loss": 0.7231, "step": 9396 }, { "epoch": 0.76, "grad_norm": 5.394815073032657, "learning_rate": 1.3995061257198224e-06, "loss": 0.5554, "step": 9397 }, { "epoch": 0.76, "grad_norm": 3.2714839651335272, "learning_rate": 1.398593563237831e-06, "loss": 0.7769, "step": 9398 }, { "epoch": 0.76, "grad_norm": 4.4275263483779606, "learning_rate": 1.3976812499958397e-06, "loss": 0.5575, "step": 9399 }, { "epoch": 0.76, "grad_norm": 3.5053858514271337, "learning_rate": 1.3967691860569915e-06, "loss": 0.6995, "step": 9400 }, { "epoch": 0.76, "grad_norm": 4.505526921097421, "learning_rate": 1.3958573714844005e-06, "loss": 0.6892, "step": 9401 }, { "epoch": 0.76, "grad_norm": 4.092998180178473, "learning_rate": 1.3949458063411742e-06, "loss": 0.7358, "step": 9402 }, { "epoch": 0.76, "grad_norm": 3.0186226234192866, "learning_rate": 1.3940344906903957e-06, "loss": 0.4459, "step": 9403 }, { "epoch": 0.76, "grad_norm": 3.514920750122406, "learning_rate": 1.3931234245951375e-06, "loss": 0.5682, "step": 9404 }, { "epoch": 0.76, "grad_norm": 4.459417237539226, "learning_rate": 1.3922126081184484e-06, "loss": 0.6886, "step": 9405 }, { "epoch": 0.76, "grad_norm": 2.7926230018193845, "learning_rate": 1.3913020413233625e-06, "loss": 0.796, "step": 9406 }, { "epoch": 0.76, "grad_norm": 3.058280350991169, "learning_rate": 1.3903917242729004e-06, "loss": 0.5742, "step": 9407 }, { "epoch": 0.76, "grad_norm": 42.39492807120868, "learning_rate": 1.3894816570300557e-06, "loss": 0.7018, "step": 9408 }, { "epoch": 0.76, "grad_norm": 8.678357776462851, "learning_rate": 1.3885718396578157e-06, "loss": 0.7281, "step": 9409 }, { "epoch": 0.76, "grad_norm": 4.681286840037638, "learning_rate": 1.3876622722191425e-06, "loss": 0.5698, "step": 9410 }, { "epoch": 0.76, "grad_norm": 5.840682750296039, "learning_rate": 1.3867529547769865e-06, "loss": 0.5836, "step": 9411 }, { "epoch": 0.76, "grad_norm": 3.466426624789153, "learning_rate": 1.3858438873942765e-06, "loss": 0.7828, "step": 9412 }, { "epoch": 0.76, "grad_norm": 4.662761279982415, "learning_rate": 1.3849350701339265e-06, "loss": 0.6137, "step": 9413 }, { "epoch": 0.76, "grad_norm": 3.026888218262693, "learning_rate": 1.3840265030588323e-06, "loss": 0.4448, "step": 9414 }, { "epoch": 0.76, "grad_norm": 4.827438691696988, "learning_rate": 1.3831181862318704e-06, "loss": 0.6629, "step": 9415 }, { "epoch": 0.76, "grad_norm": 3.8725643768166362, "learning_rate": 1.3822101197159049e-06, "loss": 0.6567, "step": 9416 }, { "epoch": 0.76, "grad_norm": 4.657553514633317, "learning_rate": 1.3813023035737778e-06, "loss": 0.4273, "step": 9417 }, { "epoch": 0.76, "grad_norm": 19.357686246995534, "learning_rate": 1.3803947378683174e-06, "loss": 0.7513, "step": 9418 }, { "epoch": 0.77, "grad_norm": 5.610892118403507, "learning_rate": 1.3794874226623323e-06, "loss": 0.7867, "step": 9419 }, { "epoch": 0.77, "grad_norm": 3.8942101571835606, "learning_rate": 1.3785803580186141e-06, "loss": 0.6229, "step": 9420 }, { "epoch": 0.77, "grad_norm": 9.136690708914886, "learning_rate": 1.3776735439999379e-06, "loss": 0.5678, "step": 9421 }, { "epoch": 0.77, "grad_norm": 5.614065362734215, "learning_rate": 1.3767669806690586e-06, "loss": 0.7685, "step": 9422 }, { "epoch": 0.77, "grad_norm": 3.0858022072414943, "learning_rate": 1.3758606680887194e-06, "loss": 0.6561, "step": 9423 }, { "epoch": 0.77, "grad_norm": 3.767367994011455, "learning_rate": 1.37495460632164e-06, "loss": 0.5858, "step": 9424 }, { "epoch": 0.77, "grad_norm": 2.657141547611293, "learning_rate": 1.3740487954305288e-06, "loss": 0.718, "step": 9425 }, { "epoch": 0.77, "grad_norm": 4.026550591224162, "learning_rate": 1.3731432354780716e-06, "loss": 0.6113, "step": 9426 }, { "epoch": 0.77, "grad_norm": 16.161578514508417, "learning_rate": 1.3722379265269393e-06, "loss": 0.6813, "step": 9427 }, { "epoch": 0.77, "grad_norm": 8.296344156904274, "learning_rate": 1.3713328686397832e-06, "loss": 0.802, "step": 9428 }, { "epoch": 0.77, "grad_norm": 3.0271976617204652, "learning_rate": 1.3704280618792415e-06, "loss": 0.717, "step": 9429 }, { "epoch": 0.77, "grad_norm": 4.739558449333144, "learning_rate": 1.3695235063079322e-06, "loss": 0.6639, "step": 9430 }, { "epoch": 0.77, "grad_norm": 15.09090945733377, "learning_rate": 1.3686192019884542e-06, "loss": 0.6142, "step": 9431 }, { "epoch": 0.77, "grad_norm": 3.5850617805810323, "learning_rate": 1.3677151489833933e-06, "loss": 0.7216, "step": 9432 }, { "epoch": 0.77, "grad_norm": 2.5573239524107496, "learning_rate": 1.3668113473553157e-06, "loss": 0.6464, "step": 9433 }, { "epoch": 0.77, "grad_norm": 5.633427485422954, "learning_rate": 1.3659077971667689e-06, "loss": 0.7828, "step": 9434 }, { "epoch": 0.77, "grad_norm": 3.880682936681103, "learning_rate": 1.365004498480283e-06, "loss": 0.6984, "step": 9435 }, { "epoch": 0.77, "grad_norm": 16.854026774746146, "learning_rate": 1.3641014513583755e-06, "loss": 0.6061, "step": 9436 }, { "epoch": 0.77, "grad_norm": 2.2413485418308796, "learning_rate": 1.3631986558635408e-06, "loss": 0.5517, "step": 9437 }, { "epoch": 0.77, "grad_norm": 5.871668771035668, "learning_rate": 1.3622961120582567e-06, "loss": 0.6006, "step": 9438 }, { "epoch": 0.77, "grad_norm": 14.038943024850285, "learning_rate": 1.3613938200049886e-06, "loss": 0.5614, "step": 9439 }, { "epoch": 0.77, "grad_norm": 3.546456078083907, "learning_rate": 1.3604917797661782e-06, "loss": 0.703, "step": 9440 }, { "epoch": 0.77, "grad_norm": 3.2314983478417942, "learning_rate": 1.3595899914042531e-06, "loss": 0.5713, "step": 9441 }, { "epoch": 0.77, "grad_norm": 3.752462864977622, "learning_rate": 1.358688454981621e-06, "loss": 0.7105, "step": 9442 }, { "epoch": 0.77, "grad_norm": 13.505514208892068, "learning_rate": 1.3577871705606765e-06, "loss": 0.5521, "step": 9443 }, { "epoch": 0.77, "grad_norm": 4.972439527237001, "learning_rate": 1.3568861382037934e-06, "loss": 0.6371, "step": 9444 }, { "epoch": 0.77, "grad_norm": 4.84470654981171, "learning_rate": 1.3559853579733274e-06, "loss": 0.8575, "step": 9445 }, { "epoch": 0.77, "grad_norm": 3.37562565935764, "learning_rate": 1.3550848299316216e-06, "loss": 0.5746, "step": 9446 }, { "epoch": 0.77, "grad_norm": 3.2630794649236337, "learning_rate": 1.354184554140993e-06, "loss": 0.7595, "step": 9447 }, { "epoch": 0.77, "grad_norm": 4.797906327124326, "learning_rate": 1.353284530663751e-06, "loss": 0.6748, "step": 9448 }, { "epoch": 0.77, "grad_norm": 4.498425560107589, "learning_rate": 1.3523847595621792e-06, "loss": 0.7346, "step": 9449 }, { "epoch": 0.77, "grad_norm": 3.5797633960400828, "learning_rate": 1.3514852408985513e-06, "loss": 0.5507, "step": 9450 }, { "epoch": 0.77, "grad_norm": 3.5582291379528, "learning_rate": 1.3505859747351174e-06, "loss": 0.6038, "step": 9451 }, { "epoch": 0.77, "grad_norm": 4.807824396820554, "learning_rate": 1.3496869611341107e-06, "loss": 0.6591, "step": 9452 }, { "epoch": 0.77, "grad_norm": 2.814359334846196, "learning_rate": 1.348788200157753e-06, "loss": 0.717, "step": 9453 }, { "epoch": 0.77, "grad_norm": 13.986395786837159, "learning_rate": 1.347889691868241e-06, "loss": 0.6358, "step": 9454 }, { "epoch": 0.77, "grad_norm": 3.336861632328459, "learning_rate": 1.3469914363277582e-06, "loss": 0.6312, "step": 9455 }, { "epoch": 0.77, "grad_norm": 2.9235675377273656, "learning_rate": 1.3460934335984677e-06, "loss": 0.4495, "step": 9456 }, { "epoch": 0.77, "grad_norm": 3.077665290491094, "learning_rate": 1.34519568374252e-06, "loss": 0.6599, "step": 9457 }, { "epoch": 0.77, "grad_norm": 5.249627745329606, "learning_rate": 1.3442981868220423e-06, "loss": 0.5464, "step": 9458 }, { "epoch": 0.77, "grad_norm": 2.7728833694350725, "learning_rate": 1.343400942899149e-06, "loss": 0.6116, "step": 9459 }, { "epoch": 0.77, "grad_norm": 3.7730671031244585, "learning_rate": 1.3425039520359352e-06, "loss": 0.6769, "step": 9460 }, { "epoch": 0.77, "grad_norm": 4.421558918751307, "learning_rate": 1.3416072142944768e-06, "loss": 0.53, "step": 9461 }, { "epoch": 0.77, "grad_norm": 2.869963322675455, "learning_rate": 1.340710729736835e-06, "loss": 0.643, "step": 9462 }, { "epoch": 0.77, "grad_norm": 3.2423807961942686, "learning_rate": 1.3398144984250493e-06, "loss": 0.776, "step": 9463 }, { "epoch": 0.77, "grad_norm": 9.9924529501169, "learning_rate": 1.3389185204211487e-06, "loss": 0.6591, "step": 9464 }, { "epoch": 0.77, "grad_norm": 7.117505675698146, "learning_rate": 1.3380227957871366e-06, "loss": 0.5951, "step": 9465 }, { "epoch": 0.77, "grad_norm": 3.275376050156914, "learning_rate": 1.337127324585008e-06, "loss": 0.616, "step": 9466 }, { "epoch": 0.77, "grad_norm": 3.4346046303001825, "learning_rate": 1.3362321068767293e-06, "loss": 0.592, "step": 9467 }, { "epoch": 0.77, "grad_norm": 3.4120153758497995, "learning_rate": 1.3353371427242585e-06, "loss": 0.7462, "step": 9468 }, { "epoch": 0.77, "grad_norm": 3.664979299883388, "learning_rate": 1.3344424321895328e-06, "loss": 0.7389, "step": 9469 }, { "epoch": 0.77, "grad_norm": 2.2984499813772734, "learning_rate": 1.3335479753344688e-06, "loss": 0.7003, "step": 9470 }, { "epoch": 0.77, "grad_norm": 4.168994906173609, "learning_rate": 1.3326537722209727e-06, "loss": 0.6077, "step": 9471 }, { "epoch": 0.77, "grad_norm": 4.20905239145414, "learning_rate": 1.3317598229109258e-06, "loss": 0.5668, "step": 9472 }, { "epoch": 0.77, "grad_norm": 6.972859705232672, "learning_rate": 1.3308661274661988e-06, "loss": 0.6262, "step": 9473 }, { "epoch": 0.77, "grad_norm": 3.2884191701654233, "learning_rate": 1.3299726859486361e-06, "loss": 0.669, "step": 9474 }, { "epoch": 0.77, "grad_norm": 4.061886599429941, "learning_rate": 1.3290794984200734e-06, "loss": 0.6047, "step": 9475 }, { "epoch": 0.77, "grad_norm": 3.4633455103451714, "learning_rate": 1.3281865649423231e-06, "loss": 0.6355, "step": 9476 }, { "epoch": 0.77, "grad_norm": 3.279719834942962, "learning_rate": 1.3272938855771805e-06, "loss": 0.7319, "step": 9477 }, { "epoch": 0.77, "grad_norm": 12.929307332529223, "learning_rate": 1.3264014603864278e-06, "loss": 0.7332, "step": 9478 }, { "epoch": 0.77, "grad_norm": 4.894570486158061, "learning_rate": 1.3255092894318256e-06, "loss": 0.6998, "step": 9479 }, { "epoch": 0.77, "grad_norm": 8.48825942957005, "learning_rate": 1.3246173727751166e-06, "loss": 0.5335, "step": 9480 }, { "epoch": 0.77, "grad_norm": 2.6854420005954913, "learning_rate": 1.323725710478026e-06, "loss": 0.6232, "step": 9481 }, { "epoch": 0.77, "grad_norm": 4.189099177111844, "learning_rate": 1.3228343026022656e-06, "loss": 0.6521, "step": 9482 }, { "epoch": 0.77, "grad_norm": 4.843425941346352, "learning_rate": 1.321943149209523e-06, "loss": 0.7574, "step": 9483 }, { "epoch": 0.77, "grad_norm": 3.7605790907966896, "learning_rate": 1.3210522503614753e-06, "loss": 0.6304, "step": 9484 }, { "epoch": 0.77, "grad_norm": 2.656388881656097, "learning_rate": 1.3201616061197763e-06, "loss": 0.5772, "step": 9485 }, { "epoch": 0.77, "grad_norm": 4.242821918812973, "learning_rate": 1.3192712165460648e-06, "loss": 0.6511, "step": 9486 }, { "epoch": 0.77, "grad_norm": 3.945219436104987, "learning_rate": 1.31838108170196e-06, "loss": 0.7387, "step": 9487 }, { "epoch": 0.77, "grad_norm": 3.516458437000437, "learning_rate": 1.3174912016490649e-06, "loss": 0.7046, "step": 9488 }, { "epoch": 0.77, "grad_norm": 3.5464214990016734, "learning_rate": 1.316601576448967e-06, "loss": 0.5098, "step": 9489 }, { "epoch": 0.77, "grad_norm": 3.660385989974557, "learning_rate": 1.315712206163231e-06, "loss": 0.5736, "step": 9490 }, { "epoch": 0.77, "grad_norm": 7.127447316552168, "learning_rate": 1.3148230908534098e-06, "loss": 0.7929, "step": 9491 }, { "epoch": 0.77, "grad_norm": 26.23366192024085, "learning_rate": 1.3139342305810349e-06, "loss": 0.6779, "step": 9492 }, { "epoch": 0.77, "grad_norm": 3.1936162666647743, "learning_rate": 1.3130456254076206e-06, "loss": 0.7702, "step": 9493 }, { "epoch": 0.77, "grad_norm": 6.514636194832057, "learning_rate": 1.3121572753946638e-06, "loss": 0.727, "step": 9494 }, { "epoch": 0.77, "grad_norm": 3.267814276339569, "learning_rate": 1.3112691806036425e-06, "loss": 0.5855, "step": 9495 }, { "epoch": 0.77, "grad_norm": 5.683781091152204, "learning_rate": 1.310381341096022e-06, "loss": 0.6916, "step": 9496 }, { "epoch": 0.77, "grad_norm": 2.9322214508221713, "learning_rate": 1.3094937569332428e-06, "loss": 0.6743, "step": 9497 }, { "epoch": 0.77, "grad_norm": 3.373301687035292, "learning_rate": 1.3086064281767346e-06, "loss": 0.5708, "step": 9498 }, { "epoch": 0.77, "grad_norm": 3.037177140777006, "learning_rate": 1.307719354887904e-06, "loss": 0.8457, "step": 9499 }, { "epoch": 0.77, "grad_norm": 4.500426649586497, "learning_rate": 1.3068325371281433e-06, "loss": 0.6916, "step": 9500 }, { "epoch": 0.77, "grad_norm": 3.734558616073429, "learning_rate": 1.3059459749588243e-06, "loss": 0.6509, "step": 9501 }, { "epoch": 0.77, "grad_norm": 4.6652459661091745, "learning_rate": 1.3050596684413025e-06, "loss": 0.608, "step": 9502 }, { "epoch": 0.77, "grad_norm": 3.682720377105289, "learning_rate": 1.3041736176369184e-06, "loss": 0.6993, "step": 9503 }, { "epoch": 0.77, "grad_norm": 3.2513865670415867, "learning_rate": 1.3032878226069895e-06, "loss": 0.6689, "step": 9504 }, { "epoch": 0.77, "grad_norm": 3.036004580320126, "learning_rate": 1.302402283412821e-06, "loss": 0.6038, "step": 9505 }, { "epoch": 0.77, "grad_norm": 12.426136977120445, "learning_rate": 1.3015170001156962e-06, "loss": 0.652, "step": 9506 }, { "epoch": 0.77, "grad_norm": 7.952317677931165, "learning_rate": 1.300631972776883e-06, "loss": 0.7531, "step": 9507 }, { "epoch": 0.77, "grad_norm": 6.42523116334164, "learning_rate": 1.299747201457629e-06, "loss": 0.8523, "step": 9508 }, { "epoch": 0.77, "grad_norm": 6.272477864862201, "learning_rate": 1.2988626862191684e-06, "loss": 0.6346, "step": 9509 }, { "epoch": 0.77, "grad_norm": 4.79702736986493, "learning_rate": 1.2979784271227146e-06, "loss": 0.6283, "step": 9510 }, { "epoch": 0.77, "grad_norm": 2.979345806540136, "learning_rate": 1.2970944242294614e-06, "loss": 0.482, "step": 9511 }, { "epoch": 0.77, "grad_norm": 5.288386849289338, "learning_rate": 1.2962106776005917e-06, "loss": 0.55, "step": 9512 }, { "epoch": 0.77, "grad_norm": 3.199472690350407, "learning_rate": 1.2953271872972638e-06, "loss": 0.6315, "step": 9513 }, { "epoch": 0.77, "grad_norm": 2.9096214029363225, "learning_rate": 1.2944439533806207e-06, "loss": 0.774, "step": 9514 }, { "epoch": 0.77, "grad_norm": 3.008805231043488, "learning_rate": 1.2935609759117873e-06, "loss": 0.7406, "step": 9515 }, { "epoch": 0.77, "grad_norm": 4.150798833911846, "learning_rate": 1.2926782549518734e-06, "loss": 0.6784, "step": 9516 }, { "epoch": 0.77, "grad_norm": 9.215084121911525, "learning_rate": 1.2917957905619672e-06, "loss": 0.6568, "step": 9517 }, { "epoch": 0.77, "grad_norm": 5.649516436408424, "learning_rate": 1.2909135828031398e-06, "loss": 0.7832, "step": 9518 }, { "epoch": 0.77, "grad_norm": 3.491442266883123, "learning_rate": 1.2900316317364498e-06, "loss": 0.7138, "step": 9519 }, { "epoch": 0.77, "grad_norm": 10.552972195007301, "learning_rate": 1.2891499374229276e-06, "loss": 0.4464, "step": 9520 }, { "epoch": 0.77, "grad_norm": 6.478727050024747, "learning_rate": 1.2882684999235967e-06, "loss": 0.7565, "step": 9521 }, { "epoch": 0.77, "grad_norm": 4.02253963072119, "learning_rate": 1.2873873192994552e-06, "loss": 0.5654, "step": 9522 }, { "epoch": 0.77, "grad_norm": 7.426251196401269, "learning_rate": 1.2865063956114893e-06, "loss": 0.5125, "step": 9523 }, { "epoch": 0.77, "grad_norm": 3.106882263675633, "learning_rate": 1.2856257289206625e-06, "loss": 0.5261, "step": 9524 }, { "epoch": 0.77, "grad_norm": 2.5022550530711145, "learning_rate": 1.2847453192879217e-06, "loss": 0.8325, "step": 9525 }, { "epoch": 0.77, "grad_norm": 3.9709878161306817, "learning_rate": 1.2838651667742014e-06, "loss": 0.6347, "step": 9526 }, { "epoch": 0.77, "grad_norm": 3.1397708119233965, "learning_rate": 1.2829852714404068e-06, "loss": 0.6313, "step": 9527 }, { "epoch": 0.77, "grad_norm": 4.45783865869915, "learning_rate": 1.2821056333474368e-06, "loss": 0.6782, "step": 9528 }, { "epoch": 0.77, "grad_norm": 2.2183880303718646, "learning_rate": 1.281226252556166e-06, "loss": 0.7428, "step": 9529 }, { "epoch": 0.77, "grad_norm": 4.042599479424637, "learning_rate": 1.280347129127455e-06, "loss": 0.6635, "step": 9530 }, { "epoch": 0.77, "grad_norm": 2.5031459900373916, "learning_rate": 1.2794682631221423e-06, "loss": 0.4689, "step": 9531 }, { "epoch": 0.77, "grad_norm": 16.64032822868164, "learning_rate": 1.278589654601055e-06, "loss": 0.6319, "step": 9532 }, { "epoch": 0.77, "grad_norm": 5.056688915720502, "learning_rate": 1.2777113036249927e-06, "loss": 0.7046, "step": 9533 }, { "epoch": 0.77, "grad_norm": 6.390324709845071, "learning_rate": 1.2768332102547464e-06, "loss": 0.628, "step": 9534 }, { "epoch": 0.77, "grad_norm": 3.1254744860918717, "learning_rate": 1.275955374551086e-06, "loss": 0.5482, "step": 9535 }, { "epoch": 0.77, "grad_norm": 4.781001368342637, "learning_rate": 1.2750777965747601e-06, "loss": 0.7425, "step": 9536 }, { "epoch": 0.77, "grad_norm": 2.931844168629806, "learning_rate": 1.2742004763865063e-06, "loss": 0.6571, "step": 9537 }, { "epoch": 0.77, "grad_norm": 2.38800903016534, "learning_rate": 1.273323414047038e-06, "loss": 0.6273, "step": 9538 }, { "epoch": 0.77, "grad_norm": 6.726295790302656, "learning_rate": 1.2724466096170568e-06, "loss": 0.8395, "step": 9539 }, { "epoch": 0.77, "grad_norm": 3.666764789221523, "learning_rate": 1.2715700631572387e-06, "loss": 0.6851, "step": 9540 }, { "epoch": 0.77, "grad_norm": 3.693267124076448, "learning_rate": 1.2706937747282493e-06, "loss": 0.7828, "step": 9541 }, { "epoch": 0.78, "grad_norm": 5.632303995361531, "learning_rate": 1.2698177443907322e-06, "loss": 0.7265, "step": 9542 }, { "epoch": 0.78, "grad_norm": 3.0788167771280786, "learning_rate": 1.2689419722053132e-06, "loss": 0.5517, "step": 9543 }, { "epoch": 0.78, "grad_norm": 3.3799337836105963, "learning_rate": 1.2680664582326042e-06, "loss": 0.4752, "step": 9544 }, { "epoch": 0.78, "grad_norm": 5.66525494421117, "learning_rate": 1.2671912025331922e-06, "loss": 0.7874, "step": 9545 }, { "epoch": 0.78, "grad_norm": 5.992751150613818, "learning_rate": 1.2663162051676565e-06, "loss": 0.5712, "step": 9546 }, { "epoch": 0.78, "grad_norm": 3.7405047888835554, "learning_rate": 1.2654414661965447e-06, "loss": 0.6324, "step": 9547 }, { "epoch": 0.78, "grad_norm": 14.240767975016839, "learning_rate": 1.2645669856804005e-06, "loss": 0.5388, "step": 9548 }, { "epoch": 0.78, "grad_norm": 2.709086698385279, "learning_rate": 1.2636927636797407e-06, "loss": 0.6597, "step": 9549 }, { "epoch": 0.78, "grad_norm": 4.011325047864485, "learning_rate": 1.2628188002550662e-06, "loss": 0.6425, "step": 9550 }, { "epoch": 0.78, "grad_norm": 3.486298032839116, "learning_rate": 1.2619450954668633e-06, "loss": 0.7064, "step": 9551 }, { "epoch": 0.78, "grad_norm": 2.82453304462159, "learning_rate": 1.2610716493755965e-06, "loss": 0.6387, "step": 9552 }, { "epoch": 0.78, "grad_norm": 4.287445732403542, "learning_rate": 1.2601984620417136e-06, "loss": 0.7065, "step": 9553 }, { "epoch": 0.78, "grad_norm": 3.039181482293806, "learning_rate": 1.2593255335256438e-06, "loss": 0.6161, "step": 9554 }, { "epoch": 0.78, "grad_norm": 2.887711658599226, "learning_rate": 1.2584528638878014e-06, "loss": 0.7509, "step": 9555 }, { "epoch": 0.78, "grad_norm": 7.124386944298144, "learning_rate": 1.2575804531885783e-06, "loss": 0.6278, "step": 9556 }, { "epoch": 0.78, "grad_norm": 4.966553770182493, "learning_rate": 1.2567083014883536e-06, "loss": 0.8015, "step": 9557 }, { "epoch": 0.78, "grad_norm": 3.359991493600607, "learning_rate": 1.2558364088474838e-06, "loss": 0.5198, "step": 9558 }, { "epoch": 0.78, "grad_norm": 2.9359154034670802, "learning_rate": 1.25496477532631e-06, "loss": 0.7131, "step": 9559 }, { "epoch": 0.78, "grad_norm": 2.340351344368538, "learning_rate": 1.2540934009851541e-06, "loss": 0.5602, "step": 9560 }, { "epoch": 0.78, "grad_norm": 3.0011070473618746, "learning_rate": 1.2532222858843202e-06, "loss": 0.6853, "step": 9561 }, { "epoch": 0.78, "grad_norm": 3.9353496464230457, "learning_rate": 1.2523514300840967e-06, "loss": 0.617, "step": 9562 }, { "epoch": 0.78, "grad_norm": 4.3507879199612045, "learning_rate": 1.2514808336447499e-06, "loss": 0.7897, "step": 9563 }, { "epoch": 0.78, "grad_norm": 2.664658563841125, "learning_rate": 1.2506104966265336e-06, "loss": 0.6507, "step": 9564 }, { "epoch": 0.78, "grad_norm": 3.610129299176213, "learning_rate": 1.2497404190896795e-06, "loss": 0.8481, "step": 9565 }, { "epoch": 0.78, "grad_norm": 3.231669621724826, "learning_rate": 1.2488706010944012e-06, "loss": 0.7117, "step": 9566 }, { "epoch": 0.78, "grad_norm": 5.410990380846471, "learning_rate": 1.248001042700897e-06, "loss": 0.7147, "step": 9567 }, { "epoch": 0.78, "grad_norm": 3.329839871992406, "learning_rate": 1.2471317439693436e-06, "loss": 0.5731, "step": 9568 }, { "epoch": 0.78, "grad_norm": 2.8676361042348844, "learning_rate": 1.2462627049599052e-06, "loss": 0.6977, "step": 9569 }, { "epoch": 0.78, "grad_norm": 4.043671699356178, "learning_rate": 1.2453939257327213e-06, "loss": 0.4871, "step": 9570 }, { "epoch": 0.78, "grad_norm": 5.322868473881842, "learning_rate": 1.24452540634792e-06, "loss": 0.6787, "step": 9571 }, { "epoch": 0.78, "grad_norm": 3.570523032201416, "learning_rate": 1.2436571468656071e-06, "loss": 0.7608, "step": 9572 }, { "epoch": 0.78, "grad_norm": 3.039044964921788, "learning_rate": 1.242789147345872e-06, "loss": 0.619, "step": 9573 }, { "epoch": 0.78, "grad_norm": 4.224375005791324, "learning_rate": 1.2419214078487846e-06, "loss": 0.7531, "step": 9574 }, { "epoch": 0.78, "grad_norm": 9.119077338920082, "learning_rate": 1.2410539284343975e-06, "loss": 0.6441, "step": 9575 }, { "epoch": 0.78, "grad_norm": 4.910610252005452, "learning_rate": 1.2401867091627485e-06, "loss": 0.6464, "step": 9576 }, { "epoch": 0.78, "grad_norm": 7.088794565466001, "learning_rate": 1.2393197500938508e-06, "loss": 0.6115, "step": 9577 }, { "epoch": 0.78, "grad_norm": 4.022615217698594, "learning_rate": 1.2384530512877074e-06, "loss": 0.6389, "step": 9578 }, { "epoch": 0.78, "grad_norm": 4.1652060062719904, "learning_rate": 1.237586612804298e-06, "loss": 0.6482, "step": 9579 }, { "epoch": 0.78, "grad_norm": 3.3097162079473255, "learning_rate": 1.2367204347035845e-06, "loss": 0.862, "step": 9580 }, { "epoch": 0.78, "grad_norm": 23.44444919315233, "learning_rate": 1.235854517045511e-06, "loss": 0.6533, "step": 9581 }, { "epoch": 0.78, "grad_norm": 4.56494358636794, "learning_rate": 1.2349888598900078e-06, "loss": 0.7958, "step": 9582 }, { "epoch": 0.78, "grad_norm": 3.467798125316924, "learning_rate": 1.2341234632969817e-06, "loss": 0.6072, "step": 9583 }, { "epoch": 0.78, "grad_norm": 2.6837300904263093, "learning_rate": 1.2332583273263227e-06, "loss": 0.6819, "step": 9584 }, { "epoch": 0.78, "grad_norm": 6.28537589498293, "learning_rate": 1.232393452037907e-06, "loss": 0.5772, "step": 9585 }, { "epoch": 0.78, "grad_norm": 3.833562915235655, "learning_rate": 1.2315288374915852e-06, "loss": 0.713, "step": 9586 }, { "epoch": 0.78, "grad_norm": 3.6677176934363485, "learning_rate": 1.2306644837471971e-06, "loss": 0.6154, "step": 9587 }, { "epoch": 0.78, "grad_norm": 3.758829091242493, "learning_rate": 1.229800390864559e-06, "loss": 0.6044, "step": 9588 }, { "epoch": 0.78, "grad_norm": 3.186526174599644, "learning_rate": 1.2289365589034746e-06, "loss": 0.7094, "step": 9589 }, { "epoch": 0.78, "grad_norm": 8.028294912543311, "learning_rate": 1.2280729879237247e-06, "loss": 0.6784, "step": 9590 }, { "epoch": 0.78, "grad_norm": 8.164653773605256, "learning_rate": 1.2272096779850728e-06, "loss": 0.7694, "step": 9591 }, { "epoch": 0.78, "grad_norm": 5.063424299452289, "learning_rate": 1.2263466291472692e-06, "loss": 0.784, "step": 9592 }, { "epoch": 0.78, "grad_norm": 5.924236981931554, "learning_rate": 1.2254838414700371e-06, "loss": 0.7228, "step": 9593 }, { "epoch": 0.78, "grad_norm": 2.894697716079355, "learning_rate": 1.224621315013091e-06, "loss": 0.6904, "step": 9594 }, { "epoch": 0.78, "grad_norm": 3.173362067409352, "learning_rate": 1.2237590498361202e-06, "loss": 0.6775, "step": 9595 }, { "epoch": 0.78, "grad_norm": 2.6684281719755236, "learning_rate": 1.2228970459988015e-06, "loss": 0.5385, "step": 9596 }, { "epoch": 0.78, "grad_norm": 3.3744139799994706, "learning_rate": 1.2220353035607902e-06, "loss": 0.6454, "step": 9597 }, { "epoch": 0.78, "grad_norm": 2.805794200279848, "learning_rate": 1.221173822581722e-06, "loss": 0.7744, "step": 9598 }, { "epoch": 0.78, "grad_norm": 6.720023113604179, "learning_rate": 1.220312603121222e-06, "loss": 0.5788, "step": 9599 }, { "epoch": 0.78, "grad_norm": 5.026312604866843, "learning_rate": 1.2194516452388861e-06, "loss": 0.4923, "step": 9600 }, { "epoch": 0.78, "grad_norm": 2.659868431870634, "learning_rate": 1.2185909489943015e-06, "loss": 0.6683, "step": 9601 }, { "epoch": 0.78, "grad_norm": 7.019878786826271, "learning_rate": 1.217730514447032e-06, "loss": 0.7073, "step": 9602 }, { "epoch": 0.78, "grad_norm": 3.6557179406078246, "learning_rate": 1.2168703416566274e-06, "loss": 0.7031, "step": 9603 }, { "epoch": 0.78, "grad_norm": 4.547170879504895, "learning_rate": 1.2160104306826154e-06, "loss": 0.7275, "step": 9604 }, { "epoch": 0.78, "grad_norm": 5.177141151994431, "learning_rate": 1.2151507815845077e-06, "loss": 0.6608, "step": 9605 }, { "epoch": 0.78, "grad_norm": 3.9473281936320475, "learning_rate": 1.214291394421796e-06, "loss": 0.6326, "step": 9606 }, { "epoch": 0.78, "grad_norm": 3.7195551900189874, "learning_rate": 1.213432269253958e-06, "loss": 0.685, "step": 9607 }, { "epoch": 0.78, "grad_norm": 2.399625797093667, "learning_rate": 1.2125734061404488e-06, "loss": 0.6829, "step": 9608 }, { "epoch": 0.78, "grad_norm": 5.269585512658102, "learning_rate": 1.2117148051407064e-06, "loss": 0.5668, "step": 9609 }, { "epoch": 0.78, "grad_norm": 6.455420343327664, "learning_rate": 1.2108564663141541e-06, "loss": 0.6362, "step": 9610 }, { "epoch": 0.78, "grad_norm": 3.0188580984211977, "learning_rate": 1.209998389720191e-06, "loss": 0.7286, "step": 9611 }, { "epoch": 0.78, "grad_norm": 4.727330074761765, "learning_rate": 1.2091405754182061e-06, "loss": 0.7701, "step": 9612 }, { "epoch": 0.78, "grad_norm": 3.9516304969028067, "learning_rate": 1.2082830234675597e-06, "loss": 0.6331, "step": 9613 }, { "epoch": 0.78, "grad_norm": 4.726769937936566, "learning_rate": 1.2074257339276041e-06, "loss": 0.6636, "step": 9614 }, { "epoch": 0.78, "grad_norm": 5.2240850101689835, "learning_rate": 1.206568706857668e-06, "loss": 0.5978, "step": 9615 }, { "epoch": 0.78, "grad_norm": 4.068590841150558, "learning_rate": 1.205711942317061e-06, "loss": 0.742, "step": 9616 }, { "epoch": 0.78, "grad_norm": 4.86774571973402, "learning_rate": 1.2048554403650803e-06, "loss": 0.7562, "step": 9617 }, { "epoch": 0.78, "grad_norm": 4.2669033346874805, "learning_rate": 1.2039992010609974e-06, "loss": 0.7791, "step": 9618 }, { "epoch": 0.78, "grad_norm": 2.9145475573449207, "learning_rate": 1.203143224464075e-06, "loss": 0.6959, "step": 9619 }, { "epoch": 0.78, "grad_norm": 3.497617366248506, "learning_rate": 1.2022875106335446e-06, "loss": 0.5946, "step": 9620 }, { "epoch": 0.78, "grad_norm": 4.079856301069102, "learning_rate": 1.2014320596286327e-06, "loss": 0.6903, "step": 9621 }, { "epoch": 0.78, "grad_norm": 4.574458224097774, "learning_rate": 1.2005768715085402e-06, "loss": 0.6076, "step": 9622 }, { "epoch": 0.78, "grad_norm": 5.618204258576541, "learning_rate": 1.19972194633245e-06, "loss": 0.6766, "step": 9623 }, { "epoch": 0.78, "grad_norm": 5.069222147867285, "learning_rate": 1.1988672841595312e-06, "loss": 0.7764, "step": 9624 }, { "epoch": 0.78, "grad_norm": 3.0841799032921338, "learning_rate": 1.1980128850489298e-06, "loss": 0.637, "step": 9625 }, { "epoch": 0.78, "grad_norm": 3.227425537046121, "learning_rate": 1.1971587490597759e-06, "loss": 0.556, "step": 9626 }, { "epoch": 0.78, "grad_norm": 4.298069711101177, "learning_rate": 1.1963048762511802e-06, "loss": 0.5983, "step": 9627 }, { "epoch": 0.78, "grad_norm": 4.219900479613822, "learning_rate": 1.1954512666822383e-06, "loss": 0.8055, "step": 9628 }, { "epoch": 0.78, "grad_norm": 5.152423102015065, "learning_rate": 1.1945979204120244e-06, "loss": 0.6486, "step": 9629 }, { "epoch": 0.78, "grad_norm": 4.187802495101171, "learning_rate": 1.1937448374995936e-06, "loss": 0.5941, "step": 9630 }, { "epoch": 0.78, "grad_norm": 4.566822192955822, "learning_rate": 1.1928920180039877e-06, "loss": 0.687, "step": 9631 }, { "epoch": 0.78, "grad_norm": 2.980511266830398, "learning_rate": 1.1920394619842257e-06, "loss": 0.7644, "step": 9632 }, { "epoch": 0.78, "grad_norm": 7.3526787683167365, "learning_rate": 1.1911871694993093e-06, "loss": 0.6765, "step": 9633 }, { "epoch": 0.78, "grad_norm": 3.203667054744695, "learning_rate": 1.1903351406082224e-06, "loss": 0.6395, "step": 9634 }, { "epoch": 0.78, "grad_norm": 4.0058431664561605, "learning_rate": 1.1894833753699325e-06, "loss": 0.682, "step": 9635 }, { "epoch": 0.78, "grad_norm": 5.207957447236932, "learning_rate": 1.1886318738433844e-06, "loss": 0.679, "step": 9636 }, { "epoch": 0.78, "grad_norm": 6.874841198116699, "learning_rate": 1.1877806360875111e-06, "loss": 0.6493, "step": 9637 }, { "epoch": 0.78, "grad_norm": 5.071974534828488, "learning_rate": 1.186929662161221e-06, "loss": 0.4878, "step": 9638 }, { "epoch": 0.78, "grad_norm": 7.971429247966249, "learning_rate": 1.1860789521234072e-06, "loss": 0.7024, "step": 9639 }, { "epoch": 0.78, "grad_norm": 3.6947777562316024, "learning_rate": 1.1852285060329445e-06, "loss": 0.8092, "step": 9640 }, { "epoch": 0.78, "grad_norm": 3.8582083359088646, "learning_rate": 1.1843783239486878e-06, "loss": 0.6184, "step": 9641 }, { "epoch": 0.78, "grad_norm": 4.293907610858088, "learning_rate": 1.1835284059294772e-06, "loss": 0.6328, "step": 9642 }, { "epoch": 0.78, "grad_norm": 3.1754457334937585, "learning_rate": 1.1826787520341305e-06, "loss": 0.6122, "step": 9643 }, { "epoch": 0.78, "grad_norm": 12.246519715294442, "learning_rate": 1.181829362321451e-06, "loss": 0.7338, "step": 9644 }, { "epoch": 0.78, "grad_norm": 2.499651219999791, "learning_rate": 1.180980236850221e-06, "loss": 0.6928, "step": 9645 }, { "epoch": 0.78, "grad_norm": 3.594425801623606, "learning_rate": 1.180131375679205e-06, "loss": 0.6358, "step": 9646 }, { "epoch": 0.78, "grad_norm": 4.392444984731222, "learning_rate": 1.1792827788671496e-06, "loss": 0.6205, "step": 9647 }, { "epoch": 0.78, "grad_norm": 13.604272230403495, "learning_rate": 1.178434446472782e-06, "loss": 0.8644, "step": 9648 }, { "epoch": 0.78, "grad_norm": 2.6024776033915433, "learning_rate": 1.1775863785548147e-06, "loss": 0.588, "step": 9649 }, { "epoch": 0.78, "grad_norm": 14.537394509933167, "learning_rate": 1.1767385751719362e-06, "loss": 0.6458, "step": 9650 }, { "epoch": 0.78, "grad_norm": 2.5381805411280784, "learning_rate": 1.175891036382823e-06, "loss": 0.6513, "step": 9651 }, { "epoch": 0.78, "grad_norm": 3.0303118121987103, "learning_rate": 1.1750437622461293e-06, "loss": 0.5681, "step": 9652 }, { "epoch": 0.78, "grad_norm": 3.9068296982847395, "learning_rate": 1.17419675282049e-06, "loss": 0.6265, "step": 9653 }, { "epoch": 0.78, "grad_norm": 5.75146600736371, "learning_rate": 1.1733500081645243e-06, "loss": 0.7751, "step": 9654 }, { "epoch": 0.78, "grad_norm": 4.398045793440526, "learning_rate": 1.1725035283368335e-06, "loss": 0.7736, "step": 9655 }, { "epoch": 0.78, "grad_norm": 5.303020747673285, "learning_rate": 1.1716573133959985e-06, "loss": 0.6145, "step": 9656 }, { "epoch": 0.78, "grad_norm": 9.648148665950508, "learning_rate": 1.1708113634005813e-06, "loss": 0.7501, "step": 9657 }, { "epoch": 0.78, "grad_norm": 2.9303155619223737, "learning_rate": 1.1699656784091311e-06, "loss": 0.8575, "step": 9658 }, { "epoch": 0.78, "grad_norm": 3.832602747573734, "learning_rate": 1.1691202584801692e-06, "loss": 0.7392, "step": 9659 }, { "epoch": 0.78, "grad_norm": 2.804885902566445, "learning_rate": 1.1682751036722078e-06, "loss": 0.7396, "step": 9660 }, { "epoch": 0.78, "grad_norm": 3.1948167754758066, "learning_rate": 1.1674302140437344e-06, "loss": 0.7027, "step": 9661 }, { "epoch": 0.78, "grad_norm": 4.90538211396826, "learning_rate": 1.1665855896532235e-06, "loss": 0.4959, "step": 9662 }, { "epoch": 0.78, "grad_norm": 4.5585920990480835, "learning_rate": 1.165741230559127e-06, "loss": 0.739, "step": 9663 }, { "epoch": 0.78, "grad_norm": 5.568827427781763, "learning_rate": 1.1648971368198786e-06, "loss": 0.7025, "step": 9664 }, { "epoch": 0.78, "grad_norm": 2.5972258888476776, "learning_rate": 1.1640533084938988e-06, "loss": 0.6907, "step": 9665 }, { "epoch": 0.79, "grad_norm": 10.820020156140478, "learning_rate": 1.1632097456395802e-06, "loss": 0.6587, "step": 9666 }, { "epoch": 0.79, "grad_norm": 5.653303323184138, "learning_rate": 1.1623664483153069e-06, "loss": 0.6687, "step": 9667 }, { "epoch": 0.79, "grad_norm": 6.160000970875404, "learning_rate": 1.1615234165794381e-06, "loss": 0.6767, "step": 9668 }, { "epoch": 0.79, "grad_norm": 2.892261587940089, "learning_rate": 1.160680650490319e-06, "loss": 0.6827, "step": 9669 }, { "epoch": 0.79, "grad_norm": 4.898715714603023, "learning_rate": 1.1598381501062738e-06, "loss": 0.6711, "step": 9670 }, { "epoch": 0.79, "grad_norm": 3.0244597515304785, "learning_rate": 1.1589959154856063e-06, "loss": 0.5147, "step": 9671 }, { "epoch": 0.79, "grad_norm": 5.1039331120685265, "learning_rate": 1.1581539466866094e-06, "loss": 0.6583, "step": 9672 }, { "epoch": 0.79, "grad_norm": 4.556082072404434, "learning_rate": 1.1573122437675465e-06, "loss": 0.5773, "step": 9673 }, { "epoch": 0.79, "grad_norm": 8.277379927436868, "learning_rate": 1.1564708067866743e-06, "loss": 0.7724, "step": 9674 }, { "epoch": 0.79, "grad_norm": 4.304968043971542, "learning_rate": 1.1556296358022207e-06, "loss": 0.5938, "step": 9675 }, { "epoch": 0.79, "grad_norm": 4.159142398812623, "learning_rate": 1.1547887308724043e-06, "loss": 0.6771, "step": 9676 }, { "epoch": 0.79, "grad_norm": 4.22566451472834, "learning_rate": 1.153948092055419e-06, "loss": 0.5606, "step": 9677 }, { "epoch": 0.79, "grad_norm": 3.170054018350635, "learning_rate": 1.1531077194094426e-06, "loss": 0.5805, "step": 9678 }, { "epoch": 0.79, "grad_norm": 5.05262277175399, "learning_rate": 1.1522676129926324e-06, "loss": 0.6084, "step": 9679 }, { "epoch": 0.79, "grad_norm": 2.8360146728026927, "learning_rate": 1.1514277728631323e-06, "loss": 0.7273, "step": 9680 }, { "epoch": 0.79, "grad_norm": 3.4645596947399273, "learning_rate": 1.1505881990790634e-06, "loss": 0.6756, "step": 9681 }, { "epoch": 0.79, "grad_norm": 2.6532793512664568, "learning_rate": 1.1497488916985273e-06, "loss": 0.6044, "step": 9682 }, { "epoch": 0.79, "grad_norm": 4.609439698468369, "learning_rate": 1.148909850779612e-06, "loss": 0.6893, "step": 9683 }, { "epoch": 0.79, "grad_norm": 2.945774241740003, "learning_rate": 1.1480710763803826e-06, "loss": 0.5901, "step": 9684 }, { "epoch": 0.79, "grad_norm": 3.644527362727375, "learning_rate": 1.147232568558891e-06, "loss": 0.5732, "step": 9685 }, { "epoch": 0.79, "grad_norm": 4.192537875676673, "learning_rate": 1.146394327373162e-06, "loss": 0.8358, "step": 9686 }, { "epoch": 0.79, "grad_norm": 7.062437611859238, "learning_rate": 1.1455563528812113e-06, "loss": 0.8054, "step": 9687 }, { "epoch": 0.79, "grad_norm": 3.5419914276762747, "learning_rate": 1.1447186451410308e-06, "loss": 0.693, "step": 9688 }, { "epoch": 0.79, "grad_norm": 6.129239809713161, "learning_rate": 1.143881204210593e-06, "loss": 0.7837, "step": 9689 }, { "epoch": 0.79, "grad_norm": 3.7934839233525963, "learning_rate": 1.143044030147858e-06, "loss": 0.7311, "step": 9690 }, { "epoch": 0.79, "grad_norm": 3.943349551292075, "learning_rate": 1.1422071230107607e-06, "loss": 0.6903, "step": 9691 }, { "epoch": 0.79, "grad_norm": 5.535187730837632, "learning_rate": 1.141370482857222e-06, "loss": 0.6173, "step": 9692 }, { "epoch": 0.79, "grad_norm": 5.133055386900323, "learning_rate": 1.14053410974514e-06, "loss": 0.6514, "step": 9693 }, { "epoch": 0.79, "grad_norm": 5.152983562475722, "learning_rate": 1.1396980037324e-06, "loss": 0.6253, "step": 9694 }, { "epoch": 0.79, "grad_norm": 3.943519991692357, "learning_rate": 1.138862164876865e-06, "loss": 0.5579, "step": 9695 }, { "epoch": 0.79, "grad_norm": 4.488075935419111, "learning_rate": 1.1380265932363783e-06, "loss": 0.7353, "step": 9696 }, { "epoch": 0.79, "grad_norm": 6.378047870688172, "learning_rate": 1.1371912888687698e-06, "loss": 0.8865, "step": 9697 }, { "epoch": 0.79, "grad_norm": 3.633542562262393, "learning_rate": 1.1363562518318465e-06, "loss": 0.5558, "step": 9698 }, { "epoch": 0.79, "grad_norm": 5.09808064828012, "learning_rate": 1.1355214821833983e-06, "loss": 0.627, "step": 9699 }, { "epoch": 0.79, "grad_norm": 11.095714047175939, "learning_rate": 1.1346869799811943e-06, "loss": 0.6412, "step": 9700 }, { "epoch": 0.79, "grad_norm": 3.4342924423023096, "learning_rate": 1.1338527452829912e-06, "loss": 0.6934, "step": 9701 }, { "epoch": 0.79, "grad_norm": 3.9846550784591903, "learning_rate": 1.1330187781465207e-06, "loss": 0.5665, "step": 9702 }, { "epoch": 0.79, "grad_norm": 3.0256006505485447, "learning_rate": 1.1321850786294986e-06, "loss": 0.6522, "step": 9703 }, { "epoch": 0.79, "grad_norm": 2.7547328963372366, "learning_rate": 1.131351646789624e-06, "loss": 0.6399, "step": 9704 }, { "epoch": 0.79, "grad_norm": 2.3947916901946, "learning_rate": 1.1305184826845745e-06, "loss": 0.7279, "step": 9705 }, { "epoch": 0.79, "grad_norm": 4.678641501940377, "learning_rate": 1.1296855863720103e-06, "loss": 0.6577, "step": 9706 }, { "epoch": 0.79, "grad_norm": 3.401625757577636, "learning_rate": 1.1288529579095713e-06, "loss": 0.6255, "step": 9707 }, { "epoch": 0.79, "grad_norm": 2.786864622235492, "learning_rate": 1.128020597354884e-06, "loss": 0.5278, "step": 9708 }, { "epoch": 0.79, "grad_norm": 4.276418898701669, "learning_rate": 1.12718850476555e-06, "loss": 0.5811, "step": 9709 }, { "epoch": 0.79, "grad_norm": 4.642329986414064, "learning_rate": 1.1263566801991583e-06, "loss": 0.7763, "step": 9710 }, { "epoch": 0.79, "grad_norm": 2.762623222175062, "learning_rate": 1.1255251237132746e-06, "loss": 0.5785, "step": 9711 }, { "epoch": 0.79, "grad_norm": 7.231099144134598, "learning_rate": 1.124693835365448e-06, "loss": 0.7966, "step": 9712 }, { "epoch": 0.79, "grad_norm": 2.8210258430420136, "learning_rate": 1.1238628152132093e-06, "loss": 0.6635, "step": 9713 }, { "epoch": 0.79, "grad_norm": 4.7451956962387865, "learning_rate": 1.1230320633140678e-06, "loss": 0.6041, "step": 9714 }, { "epoch": 0.79, "grad_norm": 2.6179270218608206, "learning_rate": 1.122201579725521e-06, "loss": 0.6041, "step": 9715 }, { "epoch": 0.79, "grad_norm": 3.9323621128508375, "learning_rate": 1.12137136450504e-06, "loss": 0.5517, "step": 9716 }, { "epoch": 0.79, "grad_norm": 5.000328275867567, "learning_rate": 1.1205414177100837e-06, "loss": 0.5663, "step": 9717 }, { "epoch": 0.79, "grad_norm": 3.0234454540137583, "learning_rate": 1.1197117393980883e-06, "loss": 0.6707, "step": 9718 }, { "epoch": 0.79, "grad_norm": 10.592957426203395, "learning_rate": 1.1188823296264734e-06, "loss": 0.6716, "step": 9719 }, { "epoch": 0.79, "grad_norm": 15.539374023295242, "learning_rate": 1.118053188452638e-06, "loss": 0.5131, "step": 9720 }, { "epoch": 0.79, "grad_norm": 3.34681886379964, "learning_rate": 1.117224315933964e-06, "loss": 0.6771, "step": 9721 }, { "epoch": 0.79, "grad_norm": 5.259963234921147, "learning_rate": 1.1163957121278163e-06, "loss": 0.7317, "step": 9722 }, { "epoch": 0.79, "grad_norm": 6.394483156480688, "learning_rate": 1.1155673770915377e-06, "loss": 0.6714, "step": 9723 }, { "epoch": 0.79, "grad_norm": 3.06166633541685, "learning_rate": 1.1147393108824556e-06, "loss": 0.6505, "step": 9724 }, { "epoch": 0.79, "grad_norm": 6.620930835666329, "learning_rate": 1.113911513557877e-06, "loss": 0.5993, "step": 9725 }, { "epoch": 0.79, "grad_norm": 4.356851727848269, "learning_rate": 1.1130839851750908e-06, "loss": 0.7491, "step": 9726 }, { "epoch": 0.79, "grad_norm": 5.197260372620817, "learning_rate": 1.112256725791367e-06, "loss": 0.5711, "step": 9727 }, { "epoch": 0.79, "grad_norm": 5.916259531545235, "learning_rate": 1.1114297354639553e-06, "loss": 0.7968, "step": 9728 }, { "epoch": 0.79, "grad_norm": 4.837624168103878, "learning_rate": 1.1106030142500917e-06, "loss": 0.7132, "step": 9729 }, { "epoch": 0.79, "grad_norm": 2.6842887453832907, "learning_rate": 1.1097765622069878e-06, "loss": 0.6206, "step": 9730 }, { "epoch": 0.79, "grad_norm": 5.805029141042495, "learning_rate": 1.1089503793918438e-06, "loss": 0.5622, "step": 9731 }, { "epoch": 0.79, "grad_norm": 4.198211582905947, "learning_rate": 1.1081244658618306e-06, "loss": 0.8159, "step": 9732 }, { "epoch": 0.79, "grad_norm": 5.898727013812226, "learning_rate": 1.107298821674111e-06, "loss": 0.6261, "step": 9733 }, { "epoch": 0.79, "grad_norm": 7.105839467981459, "learning_rate": 1.1064734468858223e-06, "loss": 0.5587, "step": 9734 }, { "epoch": 0.79, "grad_norm": 4.5615799781334845, "learning_rate": 1.1056483415540874e-06, "loss": 0.6985, "step": 9735 }, { "epoch": 0.79, "grad_norm": 3.892344535612827, "learning_rate": 1.104823505736009e-06, "loss": 0.6041, "step": 9736 }, { "epoch": 0.79, "grad_norm": 5.642601068388329, "learning_rate": 1.1039989394886686e-06, "loss": 0.6085, "step": 9737 }, { "epoch": 0.79, "grad_norm": 4.855958276444511, "learning_rate": 1.1031746428691354e-06, "loss": 0.7245, "step": 9738 }, { "epoch": 0.79, "grad_norm": 4.539357229552136, "learning_rate": 1.1023506159344498e-06, "loss": 0.7087, "step": 9739 }, { "epoch": 0.79, "grad_norm": 6.767155304205483, "learning_rate": 1.1015268587416455e-06, "loss": 0.741, "step": 9740 }, { "epoch": 0.79, "grad_norm": 3.522013878855432, "learning_rate": 1.1007033713477277e-06, "loss": 0.7851, "step": 9741 }, { "epoch": 0.79, "grad_norm": 2.4456256187860443, "learning_rate": 1.0998801538096904e-06, "loss": 0.6785, "step": 9742 }, { "epoch": 0.79, "grad_norm": 4.624646460868328, "learning_rate": 1.0990572061845034e-06, "loss": 0.745, "step": 9743 }, { "epoch": 0.79, "grad_norm": 4.0752240110712785, "learning_rate": 1.0982345285291184e-06, "loss": 0.7212, "step": 9744 }, { "epoch": 0.79, "grad_norm": 3.3468582299379555, "learning_rate": 1.0974121209004746e-06, "loss": 0.5652, "step": 9745 }, { "epoch": 0.79, "grad_norm": 3.2845251821572865, "learning_rate": 1.0965899833554821e-06, "loss": 0.6849, "step": 9746 }, { "epoch": 0.79, "grad_norm": 2.361084013711998, "learning_rate": 1.0957681159510418e-06, "loss": 0.8287, "step": 9747 }, { "epoch": 0.79, "grad_norm": 3.2001332072406288, "learning_rate": 1.09494651874403e-06, "loss": 0.6247, "step": 9748 }, { "epoch": 0.79, "grad_norm": 3.978647846563255, "learning_rate": 1.0941251917913082e-06, "loss": 0.7044, "step": 9749 }, { "epoch": 0.79, "grad_norm": 3.061369104160993, "learning_rate": 1.093304135149717e-06, "loss": 0.683, "step": 9750 }, { "epoch": 0.79, "grad_norm": 6.820948028367795, "learning_rate": 1.0924833488760778e-06, "loss": 0.7039, "step": 9751 }, { "epoch": 0.79, "grad_norm": 9.612384506598756, "learning_rate": 1.091662833027195e-06, "loss": 0.7224, "step": 9752 }, { "epoch": 0.79, "grad_norm": 2.22698314263358, "learning_rate": 1.0908425876598512e-06, "loss": 0.6723, "step": 9753 }, { "epoch": 0.79, "grad_norm": 3.6679925662629467, "learning_rate": 1.090022612830816e-06, "loss": 0.6476, "step": 9754 }, { "epoch": 0.79, "grad_norm": 5.046718509361913, "learning_rate": 1.0892029085968343e-06, "loss": 0.8094, "step": 9755 }, { "epoch": 0.79, "grad_norm": 3.1467281063049084, "learning_rate": 1.0883834750146366e-06, "loss": 0.7074, "step": 9756 }, { "epoch": 0.79, "grad_norm": 5.749920143474759, "learning_rate": 1.0875643121409307e-06, "loss": 0.6944, "step": 9757 }, { "epoch": 0.79, "grad_norm": 2.931383088425106, "learning_rate": 1.0867454200324123e-06, "loss": 0.6525, "step": 9758 }, { "epoch": 0.79, "grad_norm": 3.3960350593838005, "learning_rate": 1.0859267987457478e-06, "loss": 0.6391, "step": 9759 }, { "epoch": 0.79, "grad_norm": 2.839040912644251, "learning_rate": 1.085108448337595e-06, "loss": 0.7594, "step": 9760 }, { "epoch": 0.79, "grad_norm": 3.465451141606628, "learning_rate": 1.0842903688645879e-06, "loss": 0.6289, "step": 9761 }, { "epoch": 0.79, "grad_norm": 3.8371713358598587, "learning_rate": 1.0834725603833414e-06, "loss": 0.6642, "step": 9762 }, { "epoch": 0.79, "grad_norm": 4.125293622236857, "learning_rate": 1.0826550229504552e-06, "loss": 0.6999, "step": 9763 }, { "epoch": 0.79, "grad_norm": 2.765093682787993, "learning_rate": 1.0818377566225075e-06, "loss": 0.7862, "step": 9764 }, { "epoch": 0.79, "grad_norm": 3.327115086233531, "learning_rate": 1.0810207614560575e-06, "loss": 0.6173, "step": 9765 }, { "epoch": 0.79, "grad_norm": 7.117884598842083, "learning_rate": 1.0802040375076457e-06, "loss": 0.7942, "step": 9766 }, { "epoch": 0.79, "grad_norm": 3.944794301192135, "learning_rate": 1.0793875848337964e-06, "loss": 0.7212, "step": 9767 }, { "epoch": 0.79, "grad_norm": 3.9042654880637957, "learning_rate": 1.0785714034910128e-06, "loss": 0.6901, "step": 9768 }, { "epoch": 0.79, "grad_norm": 4.4270346318516, "learning_rate": 1.077755493535778e-06, "loss": 0.737, "step": 9769 }, { "epoch": 0.79, "grad_norm": 5.013169963814797, "learning_rate": 1.0769398550245613e-06, "loss": 0.7072, "step": 9770 }, { "epoch": 0.79, "grad_norm": 4.949637448756055, "learning_rate": 1.0761244880138078e-06, "loss": 0.6744, "step": 9771 }, { "epoch": 0.79, "grad_norm": 3.2484788298471954, "learning_rate": 1.0753093925599467e-06, "loss": 0.712, "step": 9772 }, { "epoch": 0.79, "grad_norm": 8.510977166981402, "learning_rate": 1.0744945687193858e-06, "loss": 0.766, "step": 9773 }, { "epoch": 0.79, "grad_norm": 3.3349905675965297, "learning_rate": 1.0736800165485194e-06, "loss": 0.667, "step": 9774 }, { "epoch": 0.79, "grad_norm": 2.3178571140266717, "learning_rate": 1.072865736103718e-06, "loss": 0.7812, "step": 9775 }, { "epoch": 0.79, "grad_norm": 4.8316602969272235, "learning_rate": 1.0720517274413338e-06, "loss": 0.6317, "step": 9776 }, { "epoch": 0.79, "grad_norm": 3.1927301207945282, "learning_rate": 1.0712379906177034e-06, "loss": 0.7594, "step": 9777 }, { "epoch": 0.79, "grad_norm": 2.894156867274025, "learning_rate": 1.070424525689142e-06, "loss": 0.4974, "step": 9778 }, { "epoch": 0.79, "grad_norm": 6.65724442669717, "learning_rate": 1.0696113327119461e-06, "loss": 0.7553, "step": 9779 }, { "epoch": 0.79, "grad_norm": 8.213544208588305, "learning_rate": 1.068798411742392e-06, "loss": 0.5794, "step": 9780 }, { "epoch": 0.79, "grad_norm": 4.029607735526111, "learning_rate": 1.0679857628367423e-06, "loss": 0.5589, "step": 9781 }, { "epoch": 0.79, "grad_norm": 3.035865218068489, "learning_rate": 1.0671733860512346e-06, "loss": 0.7249, "step": 9782 }, { "epoch": 0.79, "grad_norm": 3.2117165727754124, "learning_rate": 1.0663612814420927e-06, "loss": 0.5294, "step": 9783 }, { "epoch": 0.79, "grad_norm": 2.9296379865594915, "learning_rate": 1.0655494490655183e-06, "loss": 0.6008, "step": 9784 }, { "epoch": 0.79, "grad_norm": 6.690473689729569, "learning_rate": 1.0647378889776956e-06, "loss": 0.6787, "step": 9785 }, { "epoch": 0.79, "grad_norm": 3.2495891997445536, "learning_rate": 1.0639266012347892e-06, "loss": 0.571, "step": 9786 }, { "epoch": 0.79, "grad_norm": 4.588433892039435, "learning_rate": 1.0631155858929448e-06, "loss": 0.574, "step": 9787 }, { "epoch": 0.79, "grad_norm": 12.087009859965255, "learning_rate": 1.0623048430082917e-06, "loss": 0.6206, "step": 9788 }, { "epoch": 0.8, "grad_norm": 2.534138459952571, "learning_rate": 1.0614943726369354e-06, "loss": 0.6891, "step": 9789 }, { "epoch": 0.8, "grad_norm": 3.282467796066206, "learning_rate": 1.060684174834969e-06, "loss": 0.483, "step": 9790 }, { "epoch": 0.8, "grad_norm": 7.760725227511191, "learning_rate": 1.059874249658462e-06, "loss": 0.662, "step": 9791 }, { "epoch": 0.8, "grad_norm": 4.110396428224749, "learning_rate": 1.0590645971634655e-06, "loss": 0.5828, "step": 9792 }, { "epoch": 0.8, "grad_norm": 3.1462311175106166, "learning_rate": 1.0582552174060133e-06, "loss": 0.6508, "step": 9793 }, { "epoch": 0.8, "grad_norm": 2.8866488415353535, "learning_rate": 1.057446110442118e-06, "loss": 0.7141, "step": 9794 }, { "epoch": 0.8, "grad_norm": 2.6226958772210525, "learning_rate": 1.0566372763277777e-06, "loss": 0.6764, "step": 9795 }, { "epoch": 0.8, "grad_norm": 3.2512020029050253, "learning_rate": 1.0558287151189656e-06, "loss": 0.6086, "step": 9796 }, { "epoch": 0.8, "grad_norm": 6.3873777482631215, "learning_rate": 1.055020426871643e-06, "loss": 0.7259, "step": 9797 }, { "epoch": 0.8, "grad_norm": 3.5571791715349765, "learning_rate": 1.0542124116417456e-06, "loss": 0.621, "step": 9798 }, { "epoch": 0.8, "grad_norm": 2.7555725063588077, "learning_rate": 1.0534046694851945e-06, "loss": 0.5893, "step": 9799 }, { "epoch": 0.8, "grad_norm": 6.802592347371291, "learning_rate": 1.0525972004578904e-06, "loss": 0.5954, "step": 9800 }, { "epoch": 0.8, "grad_norm": 4.179481322058555, "learning_rate": 1.051790004615713e-06, "loss": 0.5572, "step": 9801 }, { "epoch": 0.8, "grad_norm": 2.6510495370101723, "learning_rate": 1.0509830820145294e-06, "loss": 0.7066, "step": 9802 }, { "epoch": 0.8, "grad_norm": 4.549669132313497, "learning_rate": 1.0501764327101793e-06, "loss": 0.6037, "step": 9803 }, { "epoch": 0.8, "grad_norm": 3.3420122030448396, "learning_rate": 1.0493700567584935e-06, "loss": 0.6085, "step": 9804 }, { "epoch": 0.8, "grad_norm": 4.143601182458291, "learning_rate": 1.048563954215272e-06, "loss": 0.8227, "step": 9805 }, { "epoch": 0.8, "grad_norm": 3.8398553918063425, "learning_rate": 1.0477581251363066e-06, "loss": 0.6747, "step": 9806 }, { "epoch": 0.8, "grad_norm": 2.8450218432357515, "learning_rate": 1.0469525695773636e-06, "loss": 0.7192, "step": 9807 }, { "epoch": 0.8, "grad_norm": 4.071998571654696, "learning_rate": 1.0461472875941935e-06, "loss": 0.6987, "step": 9808 }, { "epoch": 0.8, "grad_norm": 6.200122686823859, "learning_rate": 1.0453422792425273e-06, "loss": 0.7143, "step": 9809 }, { "epoch": 0.8, "grad_norm": 3.1071614119043374, "learning_rate": 1.0445375445780747e-06, "loss": 0.5628, "step": 9810 }, { "epoch": 0.8, "grad_norm": 3.5983636434502366, "learning_rate": 1.0437330836565317e-06, "loss": 0.5938, "step": 9811 }, { "epoch": 0.8, "grad_norm": 2.546479104936366, "learning_rate": 1.0429288965335683e-06, "loss": 0.5832, "step": 9812 }, { "epoch": 0.8, "grad_norm": 3.4932302962130124, "learning_rate": 1.0421249832648416e-06, "loss": 0.7725, "step": 9813 }, { "epoch": 0.8, "grad_norm": 2.7445871570196374, "learning_rate": 1.0413213439059855e-06, "loss": 0.5961, "step": 9814 }, { "epoch": 0.8, "grad_norm": 3.4847538198474868, "learning_rate": 1.0405179785126201e-06, "loss": 0.6386, "step": 9815 }, { "epoch": 0.8, "grad_norm": 4.790984485566235, "learning_rate": 1.0397148871403412e-06, "loss": 0.5645, "step": 9816 }, { "epoch": 0.8, "grad_norm": 3.0407499743680524, "learning_rate": 1.0389120698447286e-06, "loss": 0.5538, "step": 9817 }, { "epoch": 0.8, "grad_norm": 3.2291153915641915, "learning_rate": 1.0381095266813413e-06, "loss": 0.6546, "step": 9818 }, { "epoch": 0.8, "grad_norm": 3.414551047727841, "learning_rate": 1.0373072577057197e-06, "loss": 0.6793, "step": 9819 }, { "epoch": 0.8, "grad_norm": 3.8562338931200952, "learning_rate": 1.0365052629733884e-06, "loss": 0.6466, "step": 9820 }, { "epoch": 0.8, "grad_norm": 2.8985592894916477, "learning_rate": 1.0357035425398482e-06, "loss": 0.5847, "step": 9821 }, { "epoch": 0.8, "grad_norm": 2.9384587727277904, "learning_rate": 1.034902096460585e-06, "loss": 0.6297, "step": 9822 }, { "epoch": 0.8, "grad_norm": 2.917657446062314, "learning_rate": 1.0341009247910626e-06, "loss": 0.7864, "step": 9823 }, { "epoch": 0.8, "grad_norm": 3.729609375101682, "learning_rate": 1.0333000275867284e-06, "loss": 0.6788, "step": 9824 }, { "epoch": 0.8, "grad_norm": 3.6943581375243597, "learning_rate": 1.0324994049030085e-06, "loss": 0.5271, "step": 9825 }, { "epoch": 0.8, "grad_norm": 3.9860330169121614, "learning_rate": 1.0316990567953101e-06, "loss": 0.6025, "step": 9826 }, { "epoch": 0.8, "grad_norm": 2.636193133743554, "learning_rate": 1.0308989833190241e-06, "loss": 0.7409, "step": 9827 }, { "epoch": 0.8, "grad_norm": 2.4835857648800244, "learning_rate": 1.030099184529519e-06, "loss": 0.6271, "step": 9828 }, { "epoch": 0.8, "grad_norm": 3.1346390387416663, "learning_rate": 1.0292996604821482e-06, "loss": 0.7568, "step": 9829 }, { "epoch": 0.8, "grad_norm": 3.7144664418359605, "learning_rate": 1.0285004112322428e-06, "loss": 0.6225, "step": 9830 }, { "epoch": 0.8, "grad_norm": 2.4355144159678597, "learning_rate": 1.0277014368351152e-06, "loss": 0.5937, "step": 9831 }, { "epoch": 0.8, "grad_norm": 4.515312259428321, "learning_rate": 1.0269027373460589e-06, "loss": 0.7651, "step": 9832 }, { "epoch": 0.8, "grad_norm": 3.0296099442093793, "learning_rate": 1.0261043128203508e-06, "loss": 0.5965, "step": 9833 }, { "epoch": 0.8, "grad_norm": 8.30637490607456, "learning_rate": 1.025306163313246e-06, "loss": 0.6246, "step": 9834 }, { "epoch": 0.8, "grad_norm": 3.453953429327857, "learning_rate": 1.02450828887998e-06, "loss": 0.7047, "step": 9835 }, { "epoch": 0.8, "grad_norm": 3.2883844110558766, "learning_rate": 1.0237106895757738e-06, "loss": 0.5855, "step": 9836 }, { "epoch": 0.8, "grad_norm": 4.291481196393002, "learning_rate": 1.022913365455825e-06, "loss": 0.6593, "step": 9837 }, { "epoch": 0.8, "grad_norm": 2.462518202903803, "learning_rate": 1.0221163165753122e-06, "loss": 0.695, "step": 9838 }, { "epoch": 0.8, "grad_norm": 4.755127102563516, "learning_rate": 1.0213195429893963e-06, "loss": 0.6545, "step": 9839 }, { "epoch": 0.8, "grad_norm": 3.2668505756551944, "learning_rate": 1.0205230447532217e-06, "loss": 0.6016, "step": 9840 }, { "epoch": 0.8, "grad_norm": 8.225575090310823, "learning_rate": 1.0197268219219087e-06, "loss": 0.66, "step": 9841 }, { "epoch": 0.8, "grad_norm": 3.2115669509100533, "learning_rate": 1.0189308745505598e-06, "loss": 0.7311, "step": 9842 }, { "epoch": 0.8, "grad_norm": 2.703656660165645, "learning_rate": 1.0181352026942632e-06, "loss": 0.5958, "step": 9843 }, { "epoch": 0.8, "grad_norm": 6.347740546803845, "learning_rate": 1.017339806408082e-06, "loss": 0.5058, "step": 9844 }, { "epoch": 0.8, "grad_norm": 3.7415659875750897, "learning_rate": 1.0165446857470635e-06, "loss": 0.5069, "step": 9845 }, { "epoch": 0.8, "grad_norm": 9.315507809385082, "learning_rate": 1.015749840766233e-06, "loss": 0.6386, "step": 9846 }, { "epoch": 0.8, "grad_norm": 3.994838929632411, "learning_rate": 1.0149552715206024e-06, "loss": 0.6315, "step": 9847 }, { "epoch": 0.8, "grad_norm": 2.343483464463572, "learning_rate": 1.0141609780651585e-06, "loss": 0.6322, "step": 9848 }, { "epoch": 0.8, "grad_norm": 2.8765354442463704, "learning_rate": 1.0133669604548702e-06, "loss": 0.7813, "step": 9849 }, { "epoch": 0.8, "grad_norm": 3.017737827426694, "learning_rate": 1.0125732187446918e-06, "loss": 0.7039, "step": 9850 }, { "epoch": 0.8, "grad_norm": 2.6575724236641967, "learning_rate": 1.0117797529895535e-06, "loss": 0.6462, "step": 9851 }, { "epoch": 0.8, "grad_norm": 4.011062257505342, "learning_rate": 1.0109865632443684e-06, "loss": 0.7091, "step": 9852 }, { "epoch": 0.8, "grad_norm": 2.842731910061206, "learning_rate": 1.0101936495640285e-06, "loss": 0.6484, "step": 9853 }, { "epoch": 0.8, "grad_norm": 2.9779938108617507, "learning_rate": 1.0094010120034115e-06, "loss": 0.7331, "step": 9854 }, { "epoch": 0.8, "grad_norm": 2.797181126354803, "learning_rate": 1.008608650617371e-06, "loss": 0.6564, "step": 9855 }, { "epoch": 0.8, "grad_norm": 2.7150580818574706, "learning_rate": 1.0078165654607425e-06, "loss": 0.6411, "step": 9856 }, { "epoch": 0.8, "grad_norm": 5.477065946580599, "learning_rate": 1.0070247565883462e-06, "loss": 0.6293, "step": 9857 }, { "epoch": 0.8, "grad_norm": 2.996772066276645, "learning_rate": 1.0062332240549782e-06, "loss": 0.6806, "step": 9858 }, { "epoch": 0.8, "grad_norm": 6.118761970564444, "learning_rate": 1.0054419679154182e-06, "loss": 0.7158, "step": 9859 }, { "epoch": 0.8, "grad_norm": 4.5072894417883305, "learning_rate": 1.0046509882244243e-06, "loss": 0.5304, "step": 9860 }, { "epoch": 0.8, "grad_norm": 8.209889708484853, "learning_rate": 1.0038602850367401e-06, "loss": 0.7886, "step": 9861 }, { "epoch": 0.8, "grad_norm": 4.827637373309874, "learning_rate": 1.0030698584070848e-06, "loss": 0.6521, "step": 9862 }, { "epoch": 0.8, "grad_norm": 2.60922441210416, "learning_rate": 1.002279708390163e-06, "loss": 0.6337, "step": 9863 }, { "epoch": 0.8, "grad_norm": 5.138348283360533, "learning_rate": 1.0014898350406577e-06, "loss": 0.5664, "step": 9864 }, { "epoch": 0.8, "grad_norm": 3.9101353720032335, "learning_rate": 1.0007002384132325e-06, "loss": 0.5833, "step": 9865 }, { "epoch": 0.8, "grad_norm": 2.7448521102382304, "learning_rate": 9.999109185625321e-07, "loss": 0.6864, "step": 9866 }, { "epoch": 0.8, "grad_norm": 2.2845213466571432, "learning_rate": 9.991218755431814e-07, "loss": 0.6316, "step": 9867 }, { "epoch": 0.8, "grad_norm": 8.489723838723888, "learning_rate": 9.983331094097903e-07, "loss": 0.6779, "step": 9868 }, { "epoch": 0.8, "grad_norm": 5.302306223808616, "learning_rate": 9.975446202169432e-07, "loss": 0.5683, "step": 9869 }, { "epoch": 0.8, "grad_norm": 4.744571955249638, "learning_rate": 9.967564080192122e-07, "loss": 0.7098, "step": 9870 }, { "epoch": 0.8, "grad_norm": 3.8680602372490682, "learning_rate": 9.959684728711417e-07, "loss": 0.8085, "step": 9871 }, { "epoch": 0.8, "grad_norm": 3.3022222488447164, "learning_rate": 9.951808148272656e-07, "loss": 0.7149, "step": 9872 }, { "epoch": 0.8, "grad_norm": 3.1886163022263077, "learning_rate": 9.943934339420941e-07, "loss": 0.7712, "step": 9873 }, { "epoch": 0.8, "grad_norm": 4.330511260748767, "learning_rate": 9.936063302701165e-07, "loss": 0.8742, "step": 9874 }, { "epoch": 0.8, "grad_norm": 3.2640347591500034, "learning_rate": 9.928195038658085e-07, "loss": 0.5693, "step": 9875 }, { "epoch": 0.8, "grad_norm": 12.38615577161832, "learning_rate": 9.92032954783621e-07, "loss": 0.682, "step": 9876 }, { "epoch": 0.8, "grad_norm": 3.1347015589706433, "learning_rate": 9.91246683077992e-07, "loss": 0.7185, "step": 9877 }, { "epoch": 0.8, "grad_norm": 3.5947056110095885, "learning_rate": 9.904606888033307e-07, "loss": 0.7086, "step": 9878 }, { "epoch": 0.8, "grad_norm": 2.7228555737199014, "learning_rate": 9.896749720140375e-07, "loss": 0.7253, "step": 9879 }, { "epoch": 0.8, "grad_norm": 7.816999087187546, "learning_rate": 9.888895327644876e-07, "loss": 0.6724, "step": 9880 }, { "epoch": 0.8, "grad_norm": 3.669850136064356, "learning_rate": 9.881043711090366e-07, "loss": 0.6906, "step": 9881 }, { "epoch": 0.8, "grad_norm": 2.9955352572892413, "learning_rate": 9.873194871020252e-07, "loss": 0.6552, "step": 9882 }, { "epoch": 0.8, "grad_norm": 4.418101307687318, "learning_rate": 9.865348807977698e-07, "loss": 0.7678, "step": 9883 }, { "epoch": 0.8, "grad_norm": 3.9007923991874645, "learning_rate": 9.857505522505745e-07, "loss": 0.7791, "step": 9884 }, { "epoch": 0.8, "grad_norm": 2.843120049423034, "learning_rate": 9.849665015147136e-07, "loss": 0.5385, "step": 9885 }, { "epoch": 0.8, "grad_norm": 3.623885339692565, "learning_rate": 9.841827286444532e-07, "loss": 0.5117, "step": 9886 }, { "epoch": 0.8, "grad_norm": 7.32091976815324, "learning_rate": 9.833992336940328e-07, "loss": 0.7094, "step": 9887 }, { "epoch": 0.8, "grad_norm": 5.4631310249121805, "learning_rate": 9.826160167176768e-07, "loss": 0.6313, "step": 9888 }, { "epoch": 0.8, "grad_norm": 2.5846001876292672, "learning_rate": 9.818330777695878e-07, "loss": 0.6994, "step": 9889 }, { "epoch": 0.8, "grad_norm": 5.0506085763568676, "learning_rate": 9.81050416903951e-07, "loss": 0.5898, "step": 9890 }, { "epoch": 0.8, "grad_norm": 2.5686981938386353, "learning_rate": 9.802680341749303e-07, "loss": 0.6972, "step": 9891 }, { "epoch": 0.8, "grad_norm": 2.3391065713945363, "learning_rate": 9.794859296366704e-07, "loss": 0.5776, "step": 9892 }, { "epoch": 0.8, "grad_norm": 11.017948277806132, "learning_rate": 9.787041033433014e-07, "loss": 0.429, "step": 9893 }, { "epoch": 0.8, "grad_norm": 5.6097893167574115, "learning_rate": 9.77922555348927e-07, "loss": 0.625, "step": 9894 }, { "epoch": 0.8, "grad_norm": 3.553094928138148, "learning_rate": 9.771412857076379e-07, "loss": 0.5638, "step": 9895 }, { "epoch": 0.8, "grad_norm": 3.2993411124900534, "learning_rate": 9.763602944735018e-07, "loss": 0.7334, "step": 9896 }, { "epoch": 0.8, "grad_norm": 3.20098221947586, "learning_rate": 9.755795817005686e-07, "loss": 0.5926, "step": 9897 }, { "epoch": 0.8, "grad_norm": 8.868622972391147, "learning_rate": 9.747991474428682e-07, "loss": 0.6959, "step": 9898 }, { "epoch": 0.8, "grad_norm": 4.930077251644765, "learning_rate": 9.740189917544102e-07, "loss": 0.5402, "step": 9899 }, { "epoch": 0.8, "grad_norm": 5.6389110324195295, "learning_rate": 9.73239114689189e-07, "loss": 0.7216, "step": 9900 }, { "epoch": 0.8, "grad_norm": 5.8879918247849545, "learning_rate": 9.724595163011741e-07, "loss": 0.5999, "step": 9901 }, { "epoch": 0.8, "grad_norm": 2.9601317312061295, "learning_rate": 9.716801966443211e-07, "loss": 0.5953, "step": 9902 }, { "epoch": 0.8, "grad_norm": 12.242294430189657, "learning_rate": 9.709011557725639e-07, "loss": 0.5665, "step": 9903 }, { "epoch": 0.8, "grad_norm": 2.900616768327703, "learning_rate": 9.701223937398152e-07, "loss": 0.7114, "step": 9904 }, { "epoch": 0.8, "grad_norm": 4.764342312992366, "learning_rate": 9.693439105999715e-07, "loss": 0.8228, "step": 9905 }, { "epoch": 0.8, "grad_norm": 3.0850936729241676, "learning_rate": 9.68565706406907e-07, "loss": 0.6997, "step": 9906 }, { "epoch": 0.8, "grad_norm": 3.245720389276964, "learning_rate": 9.677877812144803e-07, "loss": 0.4705, "step": 9907 }, { "epoch": 0.8, "grad_norm": 6.250918850932546, "learning_rate": 9.670101350765276e-07, "loss": 0.5754, "step": 9908 }, { "epoch": 0.8, "grad_norm": 3.1019898222030506, "learning_rate": 9.66232768046868e-07, "loss": 0.8221, "step": 9909 }, { "epoch": 0.8, "grad_norm": 3.9075477351777055, "learning_rate": 9.654556801793002e-07, "loss": 0.6233, "step": 9910 }, { "epoch": 0.8, "grad_norm": 2.95268295266713, "learning_rate": 9.646788715276024e-07, "loss": 0.7627, "step": 9911 }, { "epoch": 0.81, "grad_norm": 3.9429748279105863, "learning_rate": 9.63902342145534e-07, "loss": 0.5924, "step": 9912 }, { "epoch": 0.81, "grad_norm": 6.556515030329859, "learning_rate": 9.631260920868386e-07, "loss": 0.7955, "step": 9913 }, { "epoch": 0.81, "grad_norm": 7.014493961271654, "learning_rate": 9.62350121405235e-07, "loss": 0.7527, "step": 9914 }, { "epoch": 0.81, "grad_norm": 5.044064711572825, "learning_rate": 9.615744301544256e-07, "loss": 0.6153, "step": 9915 }, { "epoch": 0.81, "grad_norm": 3.5663222594727806, "learning_rate": 9.607990183880944e-07, "loss": 0.6786, "step": 9916 }, { "epoch": 0.81, "grad_norm": 6.631948114406123, "learning_rate": 9.600238861599047e-07, "loss": 0.6252, "step": 9917 }, { "epoch": 0.81, "grad_norm": 4.244549301947422, "learning_rate": 9.592490335234993e-07, "loss": 0.5268, "step": 9918 }, { "epoch": 0.81, "grad_norm": 17.741883362213013, "learning_rate": 9.584744605325024e-07, "loss": 0.7255, "step": 9919 }, { "epoch": 0.81, "grad_norm": 3.908729417462394, "learning_rate": 9.577001672405218e-07, "loss": 0.88, "step": 9920 }, { "epoch": 0.81, "grad_norm": 2.6733531510879756, "learning_rate": 9.569261537011421e-07, "loss": 0.4844, "step": 9921 }, { "epoch": 0.81, "grad_norm": 4.3824202752278305, "learning_rate": 9.561524199679284e-07, "loss": 0.786, "step": 9922 }, { "epoch": 0.81, "grad_norm": 3.3814556966659737, "learning_rate": 9.553789660944318e-07, "loss": 0.715, "step": 9923 }, { "epoch": 0.81, "grad_norm": 7.21167255596437, "learning_rate": 9.54605792134175e-07, "loss": 0.6129, "step": 9924 }, { "epoch": 0.81, "grad_norm": 4.770964950828427, "learning_rate": 9.538328981406714e-07, "loss": 0.6037, "step": 9925 }, { "epoch": 0.81, "grad_norm": 6.126381299311236, "learning_rate": 9.530602841674064e-07, "loss": 0.5184, "step": 9926 }, { "epoch": 0.81, "grad_norm": 2.923778646833415, "learning_rate": 9.522879502678522e-07, "loss": 0.76, "step": 9927 }, { "epoch": 0.81, "grad_norm": 3.2053625728389443, "learning_rate": 9.515158964954585e-07, "loss": 0.7445, "step": 9928 }, { "epoch": 0.81, "grad_norm": 3.621953503704296, "learning_rate": 9.507441229036551e-07, "loss": 0.6254, "step": 9929 }, { "epoch": 0.81, "grad_norm": 10.117085393736515, "learning_rate": 9.499726295458572e-07, "loss": 0.7325, "step": 9930 }, { "epoch": 0.81, "grad_norm": 3.2295081819081135, "learning_rate": 9.492014164754521e-07, "loss": 0.5779, "step": 9931 }, { "epoch": 0.81, "grad_norm": 2.606483573577365, "learning_rate": 9.484304837458158e-07, "loss": 0.6866, "step": 9932 }, { "epoch": 0.81, "grad_norm": 2.76858179425622, "learning_rate": 9.476598314102992e-07, "loss": 0.6533, "step": 9933 }, { "epoch": 0.81, "grad_norm": 5.365714699325056, "learning_rate": 9.468894595222399e-07, "loss": 0.6083, "step": 9934 }, { "epoch": 0.81, "grad_norm": 2.537466135233147, "learning_rate": 9.46119368134949e-07, "loss": 0.6682, "step": 9935 }, { "epoch": 0.81, "grad_norm": 2.672354938808407, "learning_rate": 9.453495573017241e-07, "loss": 0.6396, "step": 9936 }, { "epoch": 0.81, "grad_norm": 3.1029966936616122, "learning_rate": 9.445800270758404e-07, "loss": 0.5554, "step": 9937 }, { "epoch": 0.81, "grad_norm": 3.667919478908177, "learning_rate": 9.438107775105538e-07, "loss": 0.6622, "step": 9938 }, { "epoch": 0.81, "grad_norm": 4.481492053441964, "learning_rate": 9.430418086591008e-07, "loss": 0.6594, "step": 9939 }, { "epoch": 0.81, "grad_norm": 3.732630352314224, "learning_rate": 9.422731205746988e-07, "loss": 0.6862, "step": 9940 }, { "epoch": 0.81, "grad_norm": 3.9530019307486466, "learning_rate": 9.41504713310547e-07, "loss": 0.7385, "step": 9941 }, { "epoch": 0.81, "grad_norm": 9.053886732090607, "learning_rate": 9.407365869198226e-07, "loss": 0.6474, "step": 9942 }, { "epoch": 0.81, "grad_norm": 3.6758936164347076, "learning_rate": 9.399687414556885e-07, "loss": 0.6395, "step": 9943 }, { "epoch": 0.81, "grad_norm": 3.8841177307556918, "learning_rate": 9.392011769712784e-07, "loss": 0.7285, "step": 9944 }, { "epoch": 0.81, "grad_norm": 2.562305562817162, "learning_rate": 9.384338935197174e-07, "loss": 0.6913, "step": 9945 }, { "epoch": 0.81, "grad_norm": 8.272170620929478, "learning_rate": 9.376668911541042e-07, "loss": 0.8153, "step": 9946 }, { "epoch": 0.81, "grad_norm": 2.599613293050827, "learning_rate": 9.369001699275199e-07, "loss": 0.7047, "step": 9947 }, { "epoch": 0.81, "grad_norm": 5.567350074408844, "learning_rate": 9.361337298930284e-07, "loss": 0.5762, "step": 9948 }, { "epoch": 0.81, "grad_norm": 4.325903761824358, "learning_rate": 9.353675711036697e-07, "loss": 0.6129, "step": 9949 }, { "epoch": 0.81, "grad_norm": 3.660770102574959, "learning_rate": 9.346016936124708e-07, "loss": 0.6449, "step": 9950 }, { "epoch": 0.81, "grad_norm": 5.817692395970429, "learning_rate": 9.338360974724298e-07, "loss": 0.6095, "step": 9951 }, { "epoch": 0.81, "grad_norm": 8.418374291451418, "learning_rate": 9.330707827365354e-07, "loss": 0.715, "step": 9952 }, { "epoch": 0.81, "grad_norm": 6.890711367873007, "learning_rate": 9.323057494577498e-07, "loss": 0.6922, "step": 9953 }, { "epoch": 0.81, "grad_norm": 2.0488552272629583, "learning_rate": 9.315409976890172e-07, "loss": 0.6094, "step": 9954 }, { "epoch": 0.81, "grad_norm": 5.277842085185813, "learning_rate": 9.307765274832664e-07, "loss": 0.6416, "step": 9955 }, { "epoch": 0.81, "grad_norm": 9.701786330251842, "learning_rate": 9.300123388934001e-07, "loss": 0.7129, "step": 9956 }, { "epoch": 0.81, "grad_norm": 2.685363438627029, "learning_rate": 9.292484319723094e-07, "loss": 0.6335, "step": 9957 }, { "epoch": 0.81, "grad_norm": 5.091373219660217, "learning_rate": 9.284848067728569e-07, "loss": 0.6499, "step": 9958 }, { "epoch": 0.81, "grad_norm": 5.263243363282859, "learning_rate": 9.277214633478926e-07, "loss": 0.7102, "step": 9959 }, { "epoch": 0.81, "grad_norm": 9.469385285515743, "learning_rate": 9.269584017502431e-07, "loss": 0.8157, "step": 9960 }, { "epoch": 0.81, "grad_norm": 4.874019528623201, "learning_rate": 9.261956220327195e-07, "loss": 0.6714, "step": 9961 }, { "epoch": 0.81, "grad_norm": 10.350105670674443, "learning_rate": 9.254331242481102e-07, "loss": 0.7947, "step": 9962 }, { "epoch": 0.81, "grad_norm": 3.386553640009264, "learning_rate": 9.246709084491839e-07, "loss": 0.6018, "step": 9963 }, { "epoch": 0.81, "grad_norm": 2.548136398112541, "learning_rate": 9.239089746886909e-07, "loss": 0.6337, "step": 9964 }, { "epoch": 0.81, "grad_norm": 3.886245952536533, "learning_rate": 9.231473230193611e-07, "loss": 0.594, "step": 9965 }, { "epoch": 0.81, "grad_norm": 4.813430292578901, "learning_rate": 9.223859534939073e-07, "loss": 0.7768, "step": 9966 }, { "epoch": 0.81, "grad_norm": 5.206574699364554, "learning_rate": 9.216248661650196e-07, "loss": 0.5511, "step": 9967 }, { "epoch": 0.81, "grad_norm": 3.1499657962144294, "learning_rate": 9.208640610853719e-07, "loss": 0.632, "step": 9968 }, { "epoch": 0.81, "grad_norm": 4.785945467020094, "learning_rate": 9.201035383076152e-07, "loss": 0.7104, "step": 9969 }, { "epoch": 0.81, "grad_norm": 2.6062730959721105, "learning_rate": 9.19343297884383e-07, "loss": 0.546, "step": 9970 }, { "epoch": 0.81, "grad_norm": 4.312447485612665, "learning_rate": 9.185833398682886e-07, "loss": 0.71, "step": 9971 }, { "epoch": 0.81, "grad_norm": 3.0433521237370815, "learning_rate": 9.178236643119242e-07, "loss": 0.7095, "step": 9972 }, { "epoch": 0.81, "grad_norm": 5.3710641076397065, "learning_rate": 9.170642712678674e-07, "loss": 0.7192, "step": 9973 }, { "epoch": 0.81, "grad_norm": 4.744952186413227, "learning_rate": 9.163051607886703e-07, "loss": 0.715, "step": 9974 }, { "epoch": 0.81, "grad_norm": 4.208452409065429, "learning_rate": 9.155463329268699e-07, "loss": 0.6187, "step": 9975 }, { "epoch": 0.81, "grad_norm": 3.5017488865379813, "learning_rate": 9.147877877349815e-07, "loss": 0.751, "step": 9976 }, { "epoch": 0.81, "grad_norm": 12.000348819317006, "learning_rate": 9.140295252655002e-07, "loss": 0.616, "step": 9977 }, { "epoch": 0.81, "grad_norm": 3.1433084888266336, "learning_rate": 9.132715455709035e-07, "loss": 0.6289, "step": 9978 }, { "epoch": 0.81, "grad_norm": 35.7302882179897, "learning_rate": 9.125138487036467e-07, "loss": 0.7065, "step": 9979 }, { "epoch": 0.81, "grad_norm": 4.902207323945893, "learning_rate": 9.1175643471617e-07, "loss": 0.6192, "step": 9980 }, { "epoch": 0.81, "grad_norm": 3.2746288505432606, "learning_rate": 9.109993036608883e-07, "loss": 0.6142, "step": 9981 }, { "epoch": 0.81, "grad_norm": 4.800546252378688, "learning_rate": 9.102424555902023e-07, "loss": 0.7213, "step": 9982 }, { "epoch": 0.81, "grad_norm": 20.975126133655735, "learning_rate": 9.094858905564902e-07, "loss": 0.669, "step": 9983 }, { "epoch": 0.81, "grad_norm": 2.8746858668462645, "learning_rate": 9.0872960861211e-07, "loss": 0.7381, "step": 9984 }, { "epoch": 0.81, "grad_norm": 5.067209310758347, "learning_rate": 9.079736098094006e-07, "loss": 0.5981, "step": 9985 }, { "epoch": 0.81, "grad_norm": 2.50363557741976, "learning_rate": 9.072178942006838e-07, "loss": 0.5365, "step": 9986 }, { "epoch": 0.81, "grad_norm": 3.4652050748271845, "learning_rate": 9.064624618382595e-07, "loss": 0.548, "step": 9987 }, { "epoch": 0.81, "grad_norm": 10.690335971055255, "learning_rate": 9.057073127744065e-07, "loss": 0.6651, "step": 9988 }, { "epoch": 0.81, "grad_norm": 6.79897315345429, "learning_rate": 9.049524470613885e-07, "loss": 0.6373, "step": 9989 }, { "epoch": 0.81, "grad_norm": 5.229264430109422, "learning_rate": 9.041978647514454e-07, "loss": 0.5997, "step": 9990 }, { "epoch": 0.81, "grad_norm": 2.4722038457767863, "learning_rate": 9.034435658967999e-07, "loss": 0.5981, "step": 9991 }, { "epoch": 0.81, "grad_norm": 2.8959921227574075, "learning_rate": 9.026895505496519e-07, "loss": 0.5882, "step": 9992 }, { "epoch": 0.81, "grad_norm": 3.8511478104590635, "learning_rate": 9.019358187621874e-07, "loss": 0.6288, "step": 9993 }, { "epoch": 0.81, "grad_norm": 4.146488149715795, "learning_rate": 9.011823705865674e-07, "loss": 0.704, "step": 9994 }, { "epoch": 0.81, "grad_norm": 2.903316464439226, "learning_rate": 9.004292060749347e-07, "loss": 0.7238, "step": 9995 }, { "epoch": 0.81, "grad_norm": 3.230917624133374, "learning_rate": 8.996763252794166e-07, "loss": 0.7223, "step": 9996 }, { "epoch": 0.81, "grad_norm": 4.521846836940077, "learning_rate": 8.989237282521118e-07, "loss": 0.8728, "step": 9997 }, { "epoch": 0.81, "grad_norm": 3.7138821547713445, "learning_rate": 8.981714150451093e-07, "loss": 0.7436, "step": 9998 }, { "epoch": 0.81, "grad_norm": 3.194124361155184, "learning_rate": 8.974193857104702e-07, "loss": 0.5838, "step": 9999 }, { "epoch": 0.81, "grad_norm": 4.403892501063843, "learning_rate": 8.966676403002434e-07, "loss": 0.5369, "step": 10000 }, { "epoch": 0.81, "grad_norm": 4.375022464560412, "learning_rate": 8.959161788664522e-07, "loss": 0.5794, "step": 10001 }, { "epoch": 0.81, "grad_norm": 3.1482155647840733, "learning_rate": 8.951650014611019e-07, "loss": 0.7108, "step": 10002 }, { "epoch": 0.81, "grad_norm": 2.821499133219986, "learning_rate": 8.944141081361818e-07, "loss": 0.6749, "step": 10003 }, { "epoch": 0.81, "grad_norm": 5.211414831541902, "learning_rate": 8.936634989436537e-07, "loss": 0.5111, "step": 10004 }, { "epoch": 0.81, "grad_norm": 3.3177974720829506, "learning_rate": 8.929131739354691e-07, "loss": 0.531, "step": 10005 }, { "epoch": 0.81, "grad_norm": 3.9594489519800056, "learning_rate": 8.921631331635516e-07, "loss": 0.7919, "step": 10006 }, { "epoch": 0.81, "grad_norm": 9.729668434642653, "learning_rate": 8.914133766798117e-07, "loss": 0.7486, "step": 10007 }, { "epoch": 0.81, "grad_norm": 13.118682432926416, "learning_rate": 8.906639045361343e-07, "loss": 0.6797, "step": 10008 }, { "epoch": 0.81, "grad_norm": 5.434523461612034, "learning_rate": 8.899147167843908e-07, "loss": 0.6095, "step": 10009 }, { "epoch": 0.81, "grad_norm": 8.96529726489475, "learning_rate": 8.89165813476428e-07, "loss": 0.7011, "step": 10010 }, { "epoch": 0.81, "grad_norm": 4.2660321046956975, "learning_rate": 8.884171946640746e-07, "loss": 0.6122, "step": 10011 }, { "epoch": 0.81, "grad_norm": 5.399336702770959, "learning_rate": 8.876688603991407e-07, "loss": 0.7188, "step": 10012 }, { "epoch": 0.81, "grad_norm": 5.137013802557782, "learning_rate": 8.869208107334131e-07, "loss": 0.6184, "step": 10013 }, { "epoch": 0.81, "grad_norm": 3.5346420944263865, "learning_rate": 8.861730457186651e-07, "loss": 0.7026, "step": 10014 }, { "epoch": 0.81, "grad_norm": 13.443712801252248, "learning_rate": 8.85425565406644e-07, "loss": 0.6192, "step": 10015 }, { "epoch": 0.81, "grad_norm": 3.8982771683310378, "learning_rate": 8.846783698490835e-07, "loss": 0.6048, "step": 10016 }, { "epoch": 0.81, "grad_norm": 5.969153616681193, "learning_rate": 8.839314590976894e-07, "loss": 0.6681, "step": 10017 }, { "epoch": 0.81, "grad_norm": 5.834030993747555, "learning_rate": 8.831848332041571e-07, "loss": 0.5791, "step": 10018 }, { "epoch": 0.81, "grad_norm": 4.010729864195363, "learning_rate": 8.824384922201556e-07, "loss": 0.7278, "step": 10019 }, { "epoch": 0.81, "grad_norm": 3.2691051437356857, "learning_rate": 8.81692436197335e-07, "loss": 0.6184, "step": 10020 }, { "epoch": 0.81, "grad_norm": 5.275393904335757, "learning_rate": 8.809466651873305e-07, "loss": 0.6494, "step": 10021 }, { "epoch": 0.81, "grad_norm": 2.6415277801299886, "learning_rate": 8.802011792417515e-07, "loss": 0.7521, "step": 10022 }, { "epoch": 0.81, "grad_norm": 4.43963955885498, "learning_rate": 8.794559784121936e-07, "loss": 0.5756, "step": 10023 }, { "epoch": 0.81, "grad_norm": 3.113050618765771, "learning_rate": 8.787110627502243e-07, "loss": 0.479, "step": 10024 }, { "epoch": 0.81, "grad_norm": 3.11635687300156, "learning_rate": 8.779664323074011e-07, "loss": 0.732, "step": 10025 }, { "epoch": 0.81, "grad_norm": 3.2315641957310066, "learning_rate": 8.772220871352549e-07, "loss": 0.6677, "step": 10026 }, { "epoch": 0.81, "grad_norm": 15.812044950390879, "learning_rate": 8.76478027285298e-07, "loss": 0.6643, "step": 10027 }, { "epoch": 0.81, "grad_norm": 4.934738090176063, "learning_rate": 8.757342528090268e-07, "loss": 0.633, "step": 10028 }, { "epoch": 0.81, "grad_norm": 2.373447810651177, "learning_rate": 8.749907637579136e-07, "loss": 0.6666, "step": 10029 }, { "epoch": 0.81, "grad_norm": 3.0448811834514706, "learning_rate": 8.742475601834133e-07, "loss": 0.681, "step": 10030 }, { "epoch": 0.81, "grad_norm": 3.7632109114332377, "learning_rate": 8.735046421369581e-07, "loss": 0.6288, "step": 10031 }, { "epoch": 0.81, "grad_norm": 2.970402716900649, "learning_rate": 8.727620096699658e-07, "loss": 0.513, "step": 10032 }, { "epoch": 0.81, "grad_norm": 2.7748752909336254, "learning_rate": 8.720196628338278e-07, "loss": 0.4794, "step": 10033 }, { "epoch": 0.81, "grad_norm": 3.5168284070517495, "learning_rate": 8.71277601679923e-07, "loss": 0.7166, "step": 10034 }, { "epoch": 0.82, "grad_norm": 3.6616367936068, "learning_rate": 8.705358262596042e-07, "loss": 0.7462, "step": 10035 }, { "epoch": 0.82, "grad_norm": 5.091964927511263, "learning_rate": 8.697943366242079e-07, "loss": 0.6282, "step": 10036 }, { "epoch": 0.82, "grad_norm": 3.034898650078384, "learning_rate": 8.690531328250489e-07, "loss": 0.5632, "step": 10037 }, { "epoch": 0.82, "grad_norm": 3.3716848570428213, "learning_rate": 8.683122149134232e-07, "loss": 0.8308, "step": 10038 }, { "epoch": 0.82, "grad_norm": 9.818542424694925, "learning_rate": 8.675715829406084e-07, "loss": 0.6914, "step": 10039 }, { "epoch": 0.82, "grad_norm": 8.703049490539447, "learning_rate": 8.668312369578586e-07, "loss": 0.6717, "step": 10040 }, { "epoch": 0.82, "grad_norm": 17.006251696228503, "learning_rate": 8.660911770164132e-07, "loss": 0.6394, "step": 10041 }, { "epoch": 0.82, "grad_norm": 3.9283875912049804, "learning_rate": 8.65351403167487e-07, "loss": 0.5303, "step": 10042 }, { "epoch": 0.82, "grad_norm": 3.956962778763141, "learning_rate": 8.646119154622784e-07, "loss": 0.6878, "step": 10043 }, { "epoch": 0.82, "grad_norm": 4.723838136464106, "learning_rate": 8.638727139519637e-07, "loss": 0.6048, "step": 10044 }, { "epoch": 0.82, "grad_norm": 4.642161893501136, "learning_rate": 8.631337986876987e-07, "loss": 0.6733, "step": 10045 }, { "epoch": 0.82, "grad_norm": 3.3225533693272133, "learning_rate": 8.62395169720624e-07, "loss": 0.7483, "step": 10046 }, { "epoch": 0.82, "grad_norm": 3.5344546736925704, "learning_rate": 8.616568271018549e-07, "loss": 0.7195, "step": 10047 }, { "epoch": 0.82, "grad_norm": 2.757047265554324, "learning_rate": 8.609187708824923e-07, "loss": 0.7156, "step": 10048 }, { "epoch": 0.82, "grad_norm": 2.873518495746369, "learning_rate": 8.601810011136119e-07, "loss": 0.6754, "step": 10049 }, { "epoch": 0.82, "grad_norm": 6.134221879628959, "learning_rate": 8.594435178462729e-07, "loss": 0.595, "step": 10050 }, { "epoch": 0.82, "grad_norm": 3.296454296851781, "learning_rate": 8.587063211315138e-07, "loss": 0.748, "step": 10051 }, { "epoch": 0.82, "grad_norm": 2.9996258929563226, "learning_rate": 8.579694110203512e-07, "loss": 0.6052, "step": 10052 }, { "epoch": 0.82, "grad_norm": 4.164977674490247, "learning_rate": 8.572327875637876e-07, "loss": 0.6497, "step": 10053 }, { "epoch": 0.82, "grad_norm": 2.6727321287613868, "learning_rate": 8.564964508127987e-07, "loss": 0.6241, "step": 10054 }, { "epoch": 0.82, "grad_norm": 6.630090955781284, "learning_rate": 8.557604008183462e-07, "loss": 0.6606, "step": 10055 }, { "epoch": 0.82, "grad_norm": 5.62966583843766, "learning_rate": 8.550246376313681e-07, "loss": 0.6881, "step": 10056 }, { "epoch": 0.82, "grad_norm": 4.890753279891143, "learning_rate": 8.542891613027843e-07, "loss": 0.6692, "step": 10057 }, { "epoch": 0.82, "grad_norm": 4.633094972123343, "learning_rate": 8.535539718834929e-07, "loss": 0.7126, "step": 10058 }, { "epoch": 0.82, "grad_norm": 3.066437100145936, "learning_rate": 8.528190694243759e-07, "loss": 0.7466, "step": 10059 }, { "epoch": 0.82, "grad_norm": 3.3593599244267396, "learning_rate": 8.520844539762918e-07, "loss": 0.6855, "step": 10060 }, { "epoch": 0.82, "grad_norm": 18.54404783434007, "learning_rate": 8.513501255900802e-07, "loss": 0.6152, "step": 10061 }, { "epoch": 0.82, "grad_norm": 3.779066328568525, "learning_rate": 8.506160843165629e-07, "loss": 0.8059, "step": 10062 }, { "epoch": 0.82, "grad_norm": 3.1639471885558628, "learning_rate": 8.498823302065395e-07, "loss": 0.5708, "step": 10063 }, { "epoch": 0.82, "grad_norm": 17.83718328452912, "learning_rate": 8.491488633107897e-07, "loss": 0.795, "step": 10064 }, { "epoch": 0.82, "grad_norm": 6.003342601866175, "learning_rate": 8.484156836800739e-07, "loss": 0.5638, "step": 10065 }, { "epoch": 0.82, "grad_norm": 6.333901284514175, "learning_rate": 8.476827913651337e-07, "loss": 0.8183, "step": 10066 }, { "epoch": 0.82, "grad_norm": 2.6577947985809796, "learning_rate": 8.469501864166902e-07, "loss": 0.7356, "step": 10067 }, { "epoch": 0.82, "grad_norm": 4.542498708977644, "learning_rate": 8.462178688854423e-07, "loss": 0.6353, "step": 10068 }, { "epoch": 0.82, "grad_norm": 10.434213955943274, "learning_rate": 8.454858388220744e-07, "loss": 0.6221, "step": 10069 }, { "epoch": 0.82, "grad_norm": 3.8546958778127074, "learning_rate": 8.447540962772426e-07, "loss": 0.6192, "step": 10070 }, { "epoch": 0.82, "grad_norm": 2.2740977978185803, "learning_rate": 8.440226413015928e-07, "loss": 0.7045, "step": 10071 }, { "epoch": 0.82, "grad_norm": 6.322790581030244, "learning_rate": 8.432914739457432e-07, "loss": 0.7421, "step": 10072 }, { "epoch": 0.82, "grad_norm": 2.878544547668449, "learning_rate": 8.425605942602977e-07, "loss": 0.6284, "step": 10073 }, { "epoch": 0.82, "grad_norm": 3.324119782403429, "learning_rate": 8.418300022958359e-07, "loss": 0.5786, "step": 10074 }, { "epoch": 0.82, "grad_norm": 4.791672976030121, "learning_rate": 8.41099698102919e-07, "loss": 0.5112, "step": 10075 }, { "epoch": 0.82, "grad_norm": 3.136160297104687, "learning_rate": 8.403696817320922e-07, "loss": 0.7406, "step": 10076 }, { "epoch": 0.82, "grad_norm": 3.066838639519394, "learning_rate": 8.396399532338722e-07, "loss": 0.6602, "step": 10077 }, { "epoch": 0.82, "grad_norm": 4.688393503306328, "learning_rate": 8.389105126587644e-07, "loss": 0.5227, "step": 10078 }, { "epoch": 0.82, "grad_norm": 2.9545023242377226, "learning_rate": 8.38181360057248e-07, "loss": 0.5662, "step": 10079 }, { "epoch": 0.82, "grad_norm": 6.427798172336533, "learning_rate": 8.37452495479788e-07, "loss": 0.7295, "step": 10080 }, { "epoch": 0.82, "grad_norm": 4.184494303271157, "learning_rate": 8.36723918976825e-07, "loss": 0.7596, "step": 10081 }, { "epoch": 0.82, "grad_norm": 3.931254765805409, "learning_rate": 8.359956305987805e-07, "loss": 0.6669, "step": 10082 }, { "epoch": 0.82, "grad_norm": 6.752607265065419, "learning_rate": 8.352676303960561e-07, "loss": 0.7041, "step": 10083 }, { "epoch": 0.82, "grad_norm": 2.3524220642091382, "learning_rate": 8.345399184190362e-07, "loss": 0.6889, "step": 10084 }, { "epoch": 0.82, "grad_norm": 3.5619192014658085, "learning_rate": 8.33812494718082e-07, "loss": 0.6599, "step": 10085 }, { "epoch": 0.82, "grad_norm": 4.870899849411198, "learning_rate": 8.330853593435345e-07, "loss": 0.7056, "step": 10086 }, { "epoch": 0.82, "grad_norm": 2.398365406564716, "learning_rate": 8.323585123457179e-07, "loss": 0.5399, "step": 10087 }, { "epoch": 0.82, "grad_norm": 3.100343712333666, "learning_rate": 8.316319537749328e-07, "loss": 0.5487, "step": 10088 }, { "epoch": 0.82, "grad_norm": 4.064413469988187, "learning_rate": 8.309056836814656e-07, "loss": 0.7184, "step": 10089 }, { "epoch": 0.82, "grad_norm": 3.494413252339731, "learning_rate": 8.301797021155733e-07, "loss": 0.5436, "step": 10090 }, { "epoch": 0.82, "grad_norm": 3.1773139635434604, "learning_rate": 8.294540091275022e-07, "loss": 0.8159, "step": 10091 }, { "epoch": 0.82, "grad_norm": 14.091969817918459, "learning_rate": 8.28728604767473e-07, "loss": 0.6656, "step": 10092 }, { "epoch": 0.82, "grad_norm": 10.58721334926195, "learning_rate": 8.280034890856886e-07, "loss": 0.6846, "step": 10093 }, { "epoch": 0.82, "grad_norm": 3.557409951412148, "learning_rate": 8.272786621323326e-07, "loss": 0.6401, "step": 10094 }, { "epoch": 0.82, "grad_norm": 2.69140007905225, "learning_rate": 8.265541239575653e-07, "loss": 0.5285, "step": 10095 }, { "epoch": 0.82, "grad_norm": 5.637270821416528, "learning_rate": 8.258298746115334e-07, "loss": 0.6103, "step": 10096 }, { "epoch": 0.82, "grad_norm": 2.8348275711569757, "learning_rate": 8.251059141443545e-07, "loss": 0.6203, "step": 10097 }, { "epoch": 0.82, "grad_norm": 3.3500300781312875, "learning_rate": 8.243822426061348e-07, "loss": 0.6617, "step": 10098 }, { "epoch": 0.82, "grad_norm": 5.414726643368543, "learning_rate": 8.236588600469558e-07, "loss": 0.6935, "step": 10099 }, { "epoch": 0.82, "grad_norm": 4.03935550027246, "learning_rate": 8.229357665168791e-07, "loss": 0.6556, "step": 10100 }, { "epoch": 0.82, "grad_norm": 5.480901250831511, "learning_rate": 8.222129620659497e-07, "loss": 0.5506, "step": 10101 }, { "epoch": 0.82, "grad_norm": 3.177275605231011, "learning_rate": 8.214904467441887e-07, "loss": 0.5894, "step": 10102 }, { "epoch": 0.82, "grad_norm": 22.891657069117663, "learning_rate": 8.207682206015988e-07, "loss": 0.6457, "step": 10103 }, { "epoch": 0.82, "grad_norm": 2.850670177976615, "learning_rate": 8.200462836881612e-07, "loss": 0.6119, "step": 10104 }, { "epoch": 0.82, "grad_norm": 9.147155185340978, "learning_rate": 8.19324636053841e-07, "loss": 0.6607, "step": 10105 }, { "epoch": 0.82, "grad_norm": 6.187151596075321, "learning_rate": 8.186032777485803e-07, "loss": 0.6391, "step": 10106 }, { "epoch": 0.82, "grad_norm": 5.270641072708848, "learning_rate": 8.178822088222992e-07, "loss": 0.6316, "step": 10107 }, { "epoch": 0.82, "grad_norm": 6.023323030820504, "learning_rate": 8.171614293249036e-07, "loss": 0.7677, "step": 10108 }, { "epoch": 0.82, "grad_norm": 8.19160943650215, "learning_rate": 8.164409393062744e-07, "loss": 0.6565, "step": 10109 }, { "epoch": 0.82, "grad_norm": 2.9053125784745752, "learning_rate": 8.157207388162741e-07, "loss": 0.6957, "step": 10110 }, { "epoch": 0.82, "grad_norm": 10.482786565300222, "learning_rate": 8.150008279047439e-07, "loss": 0.6524, "step": 10111 }, { "epoch": 0.82, "grad_norm": 6.324983642143745, "learning_rate": 8.142812066215083e-07, "loss": 0.6409, "step": 10112 }, { "epoch": 0.82, "grad_norm": 6.970866170137658, "learning_rate": 8.135618750163677e-07, "loss": 0.694, "step": 10113 }, { "epoch": 0.82, "grad_norm": 4.172965195778272, "learning_rate": 8.12842833139107e-07, "loss": 0.6435, "step": 10114 }, { "epoch": 0.82, "grad_norm": 3.2351037799756264, "learning_rate": 8.12124081039486e-07, "loss": 0.6842, "step": 10115 }, { "epoch": 0.82, "grad_norm": 6.935783492440107, "learning_rate": 8.114056187672481e-07, "loss": 0.6755, "step": 10116 }, { "epoch": 0.82, "grad_norm": 4.096022214321478, "learning_rate": 8.106874463721143e-07, "loss": 0.633, "step": 10117 }, { "epoch": 0.82, "grad_norm": 5.154422332599943, "learning_rate": 8.099695639037869e-07, "loss": 0.7158, "step": 10118 }, { "epoch": 0.82, "grad_norm": 2.6058249391847434, "learning_rate": 8.09251971411949e-07, "loss": 0.5274, "step": 10119 }, { "epoch": 0.82, "grad_norm": 2.815111526971434, "learning_rate": 8.085346689462609e-07, "loss": 0.6454, "step": 10120 }, { "epoch": 0.82, "grad_norm": 3.7593263902721223, "learning_rate": 8.078176565563661e-07, "loss": 0.6661, "step": 10121 }, { "epoch": 0.82, "grad_norm": 4.994278030161245, "learning_rate": 8.071009342918861e-07, "loss": 0.6275, "step": 10122 }, { "epoch": 0.82, "grad_norm": 4.208489094655455, "learning_rate": 8.063845022024219e-07, "loss": 0.6778, "step": 10123 }, { "epoch": 0.82, "grad_norm": 4.296257192510657, "learning_rate": 8.056683603375553e-07, "loss": 0.6065, "step": 10124 }, { "epoch": 0.82, "grad_norm": 6.979817946677779, "learning_rate": 8.049525087468469e-07, "loss": 0.6094, "step": 10125 }, { "epoch": 0.82, "grad_norm": 2.9296188092921005, "learning_rate": 8.042369474798401e-07, "loss": 0.6018, "step": 10126 }, { "epoch": 0.82, "grad_norm": 3.3887398248505303, "learning_rate": 8.035216765860537e-07, "loss": 0.5247, "step": 10127 }, { "epoch": 0.82, "grad_norm": 4.436193239621094, "learning_rate": 8.028066961149921e-07, "loss": 0.6429, "step": 10128 }, { "epoch": 0.82, "grad_norm": 7.551590110345325, "learning_rate": 8.020920061161352e-07, "loss": 0.6206, "step": 10129 }, { "epoch": 0.82, "grad_norm": 2.496423160290504, "learning_rate": 8.013776066389434e-07, "loss": 0.558, "step": 10130 }, { "epoch": 0.82, "grad_norm": 3.670744597907161, "learning_rate": 8.006634977328575e-07, "loss": 0.706, "step": 10131 }, { "epoch": 0.82, "grad_norm": 5.848619146668664, "learning_rate": 7.999496794472977e-07, "loss": 0.7227, "step": 10132 }, { "epoch": 0.82, "grad_norm": 9.869707989799299, "learning_rate": 7.992361518316677e-07, "loss": 0.7191, "step": 10133 }, { "epoch": 0.82, "grad_norm": 8.498997857008085, "learning_rate": 7.98522914935344e-07, "loss": 0.6804, "step": 10134 }, { "epoch": 0.82, "grad_norm": 6.126768132874517, "learning_rate": 7.978099688076912e-07, "loss": 0.5459, "step": 10135 }, { "epoch": 0.82, "grad_norm": 13.289793451007261, "learning_rate": 7.970973134980475e-07, "loss": 0.6822, "step": 10136 }, { "epoch": 0.82, "grad_norm": 3.864888186080762, "learning_rate": 7.963849490557335e-07, "loss": 0.4448, "step": 10137 }, { "epoch": 0.82, "grad_norm": 1.9225289849221263, "learning_rate": 7.956728755300474e-07, "loss": 0.5732, "step": 10138 }, { "epoch": 0.82, "grad_norm": 5.519012125235013, "learning_rate": 7.949610929702728e-07, "loss": 0.6365, "step": 10139 }, { "epoch": 0.82, "grad_norm": 4.75023876574547, "learning_rate": 7.942496014256673e-07, "loss": 0.7651, "step": 10140 }, { "epoch": 0.82, "grad_norm": 4.231250425666945, "learning_rate": 7.9353840094547e-07, "loss": 0.7076, "step": 10141 }, { "epoch": 0.82, "grad_norm": 7.2890702116742485, "learning_rate": 7.928274915789035e-07, "loss": 0.6996, "step": 10142 }, { "epoch": 0.82, "grad_norm": 6.20125422483117, "learning_rate": 7.921168733751633e-07, "loss": 0.6531, "step": 10143 }, { "epoch": 0.82, "grad_norm": 2.581696168456722, "learning_rate": 7.914065463834314e-07, "loss": 0.6917, "step": 10144 }, { "epoch": 0.82, "grad_norm": 5.303097198431156, "learning_rate": 7.906965106528647e-07, "loss": 0.6645, "step": 10145 }, { "epoch": 0.82, "grad_norm": 3.6990325959823056, "learning_rate": 7.899867662326049e-07, "loss": 0.7903, "step": 10146 }, { "epoch": 0.82, "grad_norm": 7.150008280987295, "learning_rate": 7.89277313171769e-07, "loss": 0.6497, "step": 10147 }, { "epoch": 0.82, "grad_norm": 4.100901674782403, "learning_rate": 7.885681515194549e-07, "loss": 0.5572, "step": 10148 }, { "epoch": 0.82, "grad_norm": 3.1770106590830047, "learning_rate": 7.878592813247443e-07, "loss": 0.6542, "step": 10149 }, { "epoch": 0.82, "grad_norm": 3.5956830684505516, "learning_rate": 7.871507026366909e-07, "loss": 0.8315, "step": 10150 }, { "epoch": 0.82, "grad_norm": 4.22113903684988, "learning_rate": 7.864424155043366e-07, "loss": 0.5046, "step": 10151 }, { "epoch": 0.82, "grad_norm": 2.3724472663445773, "learning_rate": 7.857344199766964e-07, "loss": 0.6814, "step": 10152 }, { "epoch": 0.82, "grad_norm": 4.874729327220722, "learning_rate": 7.850267161027709e-07, "loss": 0.8528, "step": 10153 }, { "epoch": 0.82, "grad_norm": 5.4719900299038, "learning_rate": 7.843193039315361e-07, "loss": 0.6331, "step": 10154 }, { "epoch": 0.82, "grad_norm": 4.86477705255406, "learning_rate": 7.836121835119498e-07, "loss": 0.6353, "step": 10155 }, { "epoch": 0.82, "grad_norm": 4.007241314171843, "learning_rate": 7.829053548929488e-07, "loss": 0.6617, "step": 10156 }, { "epoch": 0.82, "grad_norm": 3.843163189941457, "learning_rate": 7.821988181234497e-07, "loss": 0.6907, "step": 10157 }, { "epoch": 0.83, "grad_norm": 2.824561973661893, "learning_rate": 7.814925732523504e-07, "loss": 0.6195, "step": 10158 }, { "epoch": 0.83, "grad_norm": 3.3628067254773377, "learning_rate": 7.807866203285258e-07, "loss": 0.6719, "step": 10159 }, { "epoch": 0.83, "grad_norm": 2.7102201205108396, "learning_rate": 7.800809594008346e-07, "loss": 0.6202, "step": 10160 }, { "epoch": 0.83, "grad_norm": 8.493171717989103, "learning_rate": 7.793755905181111e-07, "loss": 0.6924, "step": 10161 }, { "epoch": 0.83, "grad_norm": 8.186740370391155, "learning_rate": 7.78670513729174e-07, "loss": 0.7066, "step": 10162 }, { "epoch": 0.83, "grad_norm": 3.876260709066584, "learning_rate": 7.779657290828146e-07, "loss": 0.6074, "step": 10163 }, { "epoch": 0.83, "grad_norm": 3.619848332205266, "learning_rate": 7.772612366278121e-07, "loss": 0.7117, "step": 10164 }, { "epoch": 0.83, "grad_norm": 3.9413923770978916, "learning_rate": 7.76557036412921e-07, "loss": 0.6303, "step": 10165 }, { "epoch": 0.83, "grad_norm": 3.4733039403608457, "learning_rate": 7.758531284868742e-07, "loss": 0.5753, "step": 10166 }, { "epoch": 0.83, "grad_norm": 13.141796085533507, "learning_rate": 7.7514951289839e-07, "loss": 0.8038, "step": 10167 }, { "epoch": 0.83, "grad_norm": 6.029904041482119, "learning_rate": 7.744461896961598e-07, "loss": 0.5812, "step": 10168 }, { "epoch": 0.83, "grad_norm": 3.519758699994979, "learning_rate": 7.737431589288619e-07, "loss": 0.7023, "step": 10169 }, { "epoch": 0.83, "grad_norm": 3.3300568765930016, "learning_rate": 7.730404206451459e-07, "loss": 0.6397, "step": 10170 }, { "epoch": 0.83, "grad_norm": 5.680880422768918, "learning_rate": 7.723379748936494e-07, "loss": 0.5487, "step": 10171 }, { "epoch": 0.83, "grad_norm": 4.822683081387333, "learning_rate": 7.716358217229841e-07, "loss": 0.6671, "step": 10172 }, { "epoch": 0.83, "grad_norm": 3.7748097314750066, "learning_rate": 7.709339611817429e-07, "loss": 0.7738, "step": 10173 }, { "epoch": 0.83, "grad_norm": 3.4303245913987572, "learning_rate": 7.702323933185013e-07, "loss": 0.7037, "step": 10174 }, { "epoch": 0.83, "grad_norm": 2.8167239006212763, "learning_rate": 7.695311181818111e-07, "loss": 0.7308, "step": 10175 }, { "epoch": 0.83, "grad_norm": 3.0217795272544095, "learning_rate": 7.688301358202043e-07, "loss": 0.7025, "step": 10176 }, { "epoch": 0.83, "grad_norm": 9.061168689610298, "learning_rate": 7.681294462821925e-07, "loss": 0.5613, "step": 10177 }, { "epoch": 0.83, "grad_norm": 5.125386807553893, "learning_rate": 7.674290496162707e-07, "loss": 0.5784, "step": 10178 }, { "epoch": 0.83, "grad_norm": 3.073319161000866, "learning_rate": 7.667289458709088e-07, "loss": 0.6692, "step": 10179 }, { "epoch": 0.83, "grad_norm": 23.21586193756055, "learning_rate": 7.660291350945581e-07, "loss": 0.6006, "step": 10180 }, { "epoch": 0.83, "grad_norm": 4.410568116311697, "learning_rate": 7.653296173356512e-07, "loss": 0.5988, "step": 10181 }, { "epoch": 0.83, "grad_norm": 3.4146482517699717, "learning_rate": 7.646303926425986e-07, "loss": 0.6404, "step": 10182 }, { "epoch": 0.83, "grad_norm": 4.819260532191193, "learning_rate": 7.639314610637905e-07, "loss": 0.7422, "step": 10183 }, { "epoch": 0.83, "grad_norm": 4.126538264960124, "learning_rate": 7.632328226475971e-07, "loss": 0.7315, "step": 10184 }, { "epoch": 0.83, "grad_norm": 3.0229372395694156, "learning_rate": 7.625344774423704e-07, "loss": 0.4861, "step": 10185 }, { "epoch": 0.83, "grad_norm": 5.454984074248593, "learning_rate": 7.618364254964378e-07, "loss": 0.535, "step": 10186 }, { "epoch": 0.83, "grad_norm": 4.5718627092307225, "learning_rate": 7.611386668581117e-07, "loss": 0.6587, "step": 10187 }, { "epoch": 0.83, "grad_norm": 3.075648257710709, "learning_rate": 7.604412015756796e-07, "loss": 0.7122, "step": 10188 }, { "epoch": 0.83, "grad_norm": 3.203526440911372, "learning_rate": 7.597440296974112e-07, "loss": 0.5986, "step": 10189 }, { "epoch": 0.83, "grad_norm": 6.188141744831746, "learning_rate": 7.590471512715547e-07, "loss": 0.4143, "step": 10190 }, { "epoch": 0.83, "grad_norm": 3.781234348822351, "learning_rate": 7.58350566346337e-07, "loss": 0.5905, "step": 10191 }, { "epoch": 0.83, "grad_norm": 13.772789176008088, "learning_rate": 7.576542749699695e-07, "loss": 0.5904, "step": 10192 }, { "epoch": 0.83, "grad_norm": 2.873932427968937, "learning_rate": 7.569582771906364e-07, "loss": 0.6637, "step": 10193 }, { "epoch": 0.83, "grad_norm": 11.71370104878234, "learning_rate": 7.562625730565088e-07, "loss": 0.753, "step": 10194 }, { "epoch": 0.83, "grad_norm": 5.5976354786786295, "learning_rate": 7.555671626157312e-07, "loss": 0.6404, "step": 10195 }, { "epoch": 0.83, "grad_norm": 3.3844331630609803, "learning_rate": 7.548720459164316e-07, "loss": 0.7377, "step": 10196 }, { "epoch": 0.83, "grad_norm": 5.307981339011157, "learning_rate": 7.541772230067157e-07, "loss": 0.6314, "step": 10197 }, { "epoch": 0.83, "grad_norm": 5.061631733208505, "learning_rate": 7.53482693934669e-07, "loss": 0.7617, "step": 10198 }, { "epoch": 0.83, "grad_norm": 4.451421598327279, "learning_rate": 7.527884587483592e-07, "loss": 0.668, "step": 10199 }, { "epoch": 0.83, "grad_norm": 4.771655132737514, "learning_rate": 7.520945174958294e-07, "loss": 0.6895, "step": 10200 }, { "epoch": 0.83, "grad_norm": 22.414786560626762, "learning_rate": 7.514008702251068e-07, "loss": 0.5476, "step": 10201 }, { "epoch": 0.83, "grad_norm": 4.674627558912777, "learning_rate": 7.50707516984196e-07, "loss": 0.5944, "step": 10202 }, { "epoch": 0.83, "grad_norm": 3.854452834521621, "learning_rate": 7.500144578210805e-07, "loss": 0.6728, "step": 10203 }, { "epoch": 0.83, "grad_norm": 4.17955301544156, "learning_rate": 7.49321692783725e-07, "loss": 0.7063, "step": 10204 }, { "epoch": 0.83, "grad_norm": 7.769539727100411, "learning_rate": 7.486292219200714e-07, "loss": 0.7398, "step": 10205 }, { "epoch": 0.83, "grad_norm": 3.1111028518844046, "learning_rate": 7.47937045278046e-07, "loss": 0.7386, "step": 10206 }, { "epoch": 0.83, "grad_norm": 3.904299973843954, "learning_rate": 7.472451629055483e-07, "loss": 0.6721, "step": 10207 }, { "epoch": 0.83, "grad_norm": 4.211919171774436, "learning_rate": 7.46553574850466e-07, "loss": 0.8329, "step": 10208 }, { "epoch": 0.83, "grad_norm": 6.216305997681818, "learning_rate": 7.458622811606553e-07, "loss": 0.565, "step": 10209 }, { "epoch": 0.83, "grad_norm": 2.968840945883631, "learning_rate": 7.451712818839629e-07, "loss": 0.5918, "step": 10210 }, { "epoch": 0.83, "grad_norm": 4.570607731068357, "learning_rate": 7.444805770682068e-07, "loss": 0.7387, "step": 10211 }, { "epoch": 0.83, "grad_norm": 2.710315573917204, "learning_rate": 7.437901667611908e-07, "loss": 0.7255, "step": 10212 }, { "epoch": 0.83, "grad_norm": 2.489205059000977, "learning_rate": 7.431000510106945e-07, "loss": 0.5279, "step": 10213 }, { "epoch": 0.83, "grad_norm": 4.545759072508146, "learning_rate": 7.424102298644775e-07, "loss": 0.75, "step": 10214 }, { "epoch": 0.83, "grad_norm": 3.5425442188725946, "learning_rate": 7.417207033702827e-07, "loss": 0.4542, "step": 10215 }, { "epoch": 0.83, "grad_norm": 6.963534589952645, "learning_rate": 7.410314715758255e-07, "loss": 0.64, "step": 10216 }, { "epoch": 0.83, "grad_norm": 4.365877125503975, "learning_rate": 7.403425345288079e-07, "loss": 0.6509, "step": 10217 }, { "epoch": 0.83, "grad_norm": 3.354505495325425, "learning_rate": 7.39653892276907e-07, "loss": 0.5866, "step": 10218 }, { "epoch": 0.83, "grad_norm": 2.6790065935553384, "learning_rate": 7.389655448677834e-07, "loss": 0.5339, "step": 10219 }, { "epoch": 0.83, "grad_norm": 3.2477589082173246, "learning_rate": 7.382774923490738e-07, "loss": 0.6434, "step": 10220 }, { "epoch": 0.83, "grad_norm": 5.067106079956164, "learning_rate": 7.375897347683942e-07, "loss": 0.532, "step": 10221 }, { "epoch": 0.83, "grad_norm": 3.705076837922779, "learning_rate": 7.36902272173346e-07, "loss": 0.6304, "step": 10222 }, { "epoch": 0.83, "grad_norm": 5.287578836010013, "learning_rate": 7.362151046115007e-07, "loss": 0.5771, "step": 10223 }, { "epoch": 0.83, "grad_norm": 3.2333213765429933, "learning_rate": 7.355282321304185e-07, "loss": 0.7157, "step": 10224 }, { "epoch": 0.83, "grad_norm": 5.287089846042632, "learning_rate": 7.348416547776327e-07, "loss": 0.6911, "step": 10225 }, { "epoch": 0.83, "grad_norm": 8.34536524856903, "learning_rate": 7.341553726006611e-07, "loss": 0.5208, "step": 10226 }, { "epoch": 0.83, "grad_norm": 4.067430452718088, "learning_rate": 7.334693856469982e-07, "loss": 0.6789, "step": 10227 }, { "epoch": 0.83, "grad_norm": 8.551517404564226, "learning_rate": 7.327836939641175e-07, "loss": 0.5792, "step": 10228 }, { "epoch": 0.83, "grad_norm": 5.678720595104767, "learning_rate": 7.320982975994739e-07, "loss": 0.6594, "step": 10229 }, { "epoch": 0.83, "grad_norm": 5.080679204551104, "learning_rate": 7.314131966005e-07, "loss": 0.7425, "step": 10230 }, { "epoch": 0.83, "grad_norm": 6.4504270401508155, "learning_rate": 7.307283910146118e-07, "loss": 0.5143, "step": 10231 }, { "epoch": 0.83, "grad_norm": 5.955054458292446, "learning_rate": 7.300438808891985e-07, "loss": 0.5932, "step": 10232 }, { "epoch": 0.83, "grad_norm": 5.943403200607857, "learning_rate": 7.293596662716362e-07, "loss": 0.6345, "step": 10233 }, { "epoch": 0.83, "grad_norm": 3.6936250023573356, "learning_rate": 7.286757472092749e-07, "loss": 0.6373, "step": 10234 }, { "epoch": 0.83, "grad_norm": 2.87598512920276, "learning_rate": 7.279921237494464e-07, "loss": 0.7345, "step": 10235 }, { "epoch": 0.83, "grad_norm": 4.4833261912587865, "learning_rate": 7.273087959394609e-07, "loss": 0.6137, "step": 10236 }, { "epoch": 0.83, "grad_norm": 2.991248061720026, "learning_rate": 7.266257638266106e-07, "loss": 0.6686, "step": 10237 }, { "epoch": 0.83, "grad_norm": 4.524742297759488, "learning_rate": 7.259430274581647e-07, "loss": 0.7008, "step": 10238 }, { "epoch": 0.83, "grad_norm": 7.39141564428959, "learning_rate": 7.252605868813722e-07, "loss": 0.6848, "step": 10239 }, { "epoch": 0.83, "grad_norm": 5.836627278870458, "learning_rate": 7.245784421434643e-07, "loss": 0.6215, "step": 10240 }, { "epoch": 0.83, "grad_norm": 3.1396570859644592, "learning_rate": 7.23896593291647e-07, "loss": 0.5527, "step": 10241 }, { "epoch": 0.83, "grad_norm": 4.097785791460001, "learning_rate": 7.232150403731126e-07, "loss": 0.7235, "step": 10242 }, { "epoch": 0.83, "grad_norm": 3.688406583616061, "learning_rate": 7.225337834350237e-07, "loss": 0.6325, "step": 10243 }, { "epoch": 0.83, "grad_norm": 3.648541327707882, "learning_rate": 7.218528225245314e-07, "loss": 0.6635, "step": 10244 }, { "epoch": 0.83, "grad_norm": 26.55656057431152, "learning_rate": 7.211721576887609e-07, "loss": 0.72, "step": 10245 }, { "epoch": 0.83, "grad_norm": 5.880070357657142, "learning_rate": 7.204917889748181e-07, "loss": 0.7435, "step": 10246 }, { "epoch": 0.83, "grad_norm": 6.262622038231014, "learning_rate": 7.198117164297908e-07, "loss": 0.6219, "step": 10247 }, { "epoch": 0.83, "grad_norm": 4.607431292423183, "learning_rate": 7.191319401007423e-07, "loss": 0.7331, "step": 10248 }, { "epoch": 0.83, "grad_norm": 3.435736569694444, "learning_rate": 7.184524600347187e-07, "loss": 0.7149, "step": 10249 }, { "epoch": 0.83, "grad_norm": 4.653171952035075, "learning_rate": 7.177732762787426e-07, "loss": 0.72, "step": 10250 }, { "epoch": 0.83, "grad_norm": 4.977890959625817, "learning_rate": 7.170943888798199e-07, "loss": 0.8009, "step": 10251 }, { "epoch": 0.83, "grad_norm": 2.959268097093111, "learning_rate": 7.164157978849329e-07, "loss": 0.6028, "step": 10252 }, { "epoch": 0.83, "grad_norm": 3.018126538434741, "learning_rate": 7.15737503341043e-07, "loss": 0.669, "step": 10253 }, { "epoch": 0.83, "grad_norm": 4.264711722504301, "learning_rate": 7.150595052950954e-07, "loss": 0.6819, "step": 10254 }, { "epoch": 0.83, "grad_norm": 3.7745904981180027, "learning_rate": 7.143818037940098e-07, "loss": 0.5662, "step": 10255 }, { "epoch": 0.83, "grad_norm": 4.463814512383072, "learning_rate": 7.137043988846881e-07, "loss": 0.6456, "step": 10256 }, { "epoch": 0.83, "grad_norm": 3.023813075919316, "learning_rate": 7.130272906140095e-07, "loss": 0.6044, "step": 10257 }, { "epoch": 0.83, "grad_norm": 7.706398060493753, "learning_rate": 7.123504790288371e-07, "loss": 0.7286, "step": 10258 }, { "epoch": 0.83, "grad_norm": 4.733282957754952, "learning_rate": 7.116739641760085e-07, "loss": 0.6908, "step": 10259 }, { "epoch": 0.83, "grad_norm": 4.712365755064234, "learning_rate": 7.109977461023415e-07, "loss": 0.556, "step": 10260 }, { "epoch": 0.83, "grad_norm": 2.5456775869275616, "learning_rate": 7.103218248546379e-07, "loss": 0.5377, "step": 10261 }, { "epoch": 0.83, "grad_norm": 4.281573303018533, "learning_rate": 7.09646200479674e-07, "loss": 0.6947, "step": 10262 }, { "epoch": 0.83, "grad_norm": 8.865958573238945, "learning_rate": 7.089708730242067e-07, "loss": 0.7508, "step": 10263 }, { "epoch": 0.83, "grad_norm": 4.342278155483451, "learning_rate": 7.082958425349734e-07, "loss": 0.567, "step": 10264 }, { "epoch": 0.83, "grad_norm": 5.786765807652377, "learning_rate": 7.076211090586909e-07, "loss": 0.6085, "step": 10265 }, { "epoch": 0.83, "grad_norm": 5.77668001919161, "learning_rate": 7.069466726420543e-07, "loss": 0.6629, "step": 10266 }, { "epoch": 0.83, "grad_norm": 2.784683119806058, "learning_rate": 7.062725333317399e-07, "loss": 0.6982, "step": 10267 }, { "epoch": 0.83, "grad_norm": 4.342476773267422, "learning_rate": 7.055986911744017e-07, "loss": 0.5913, "step": 10268 }, { "epoch": 0.83, "grad_norm": 6.940907527501119, "learning_rate": 7.04925146216674e-07, "loss": 0.8289, "step": 10269 }, { "epoch": 0.83, "grad_norm": 3.2146721293793554, "learning_rate": 7.042518985051705e-07, "loss": 0.5996, "step": 10270 }, { "epoch": 0.83, "grad_norm": 3.93633227172791, "learning_rate": 7.035789480864824e-07, "loss": 0.564, "step": 10271 }, { "epoch": 0.83, "grad_norm": 2.8101480225352993, "learning_rate": 7.029062950071847e-07, "loss": 0.5076, "step": 10272 }, { "epoch": 0.83, "grad_norm": 5.08787573823197, "learning_rate": 7.022339393138272e-07, "loss": 0.6075, "step": 10273 }, { "epoch": 0.83, "grad_norm": 3.2671836232172513, "learning_rate": 7.015618810529428e-07, "loss": 0.7458, "step": 10274 }, { "epoch": 0.83, "grad_norm": 3.9281403047501344, "learning_rate": 7.008901202710416e-07, "loss": 0.8771, "step": 10275 }, { "epoch": 0.83, "grad_norm": 3.1279623744196385, "learning_rate": 7.002186570146141e-07, "loss": 0.5684, "step": 10276 }, { "epoch": 0.83, "grad_norm": 4.0917047361638375, "learning_rate": 6.995474913301287e-07, "loss": 0.7017, "step": 10277 }, { "epoch": 0.83, "grad_norm": 4.481461830892188, "learning_rate": 6.988766232640337e-07, "loss": 0.6429, "step": 10278 }, { "epoch": 0.83, "grad_norm": 6.993127680316164, "learning_rate": 6.982060528627594e-07, "loss": 0.8608, "step": 10279 }, { "epoch": 0.83, "grad_norm": 6.8092471373883745, "learning_rate": 6.975357801727117e-07, "loss": 0.6497, "step": 10280 }, { "epoch": 0.84, "grad_norm": 2.678803408196946, "learning_rate": 6.968658052402805e-07, "loss": 0.5866, "step": 10281 }, { "epoch": 0.84, "grad_norm": 9.344964932791905, "learning_rate": 6.961961281118285e-07, "loss": 0.7114, "step": 10282 }, { "epoch": 0.84, "grad_norm": 4.401805319053445, "learning_rate": 6.955267488337048e-07, "loss": 0.6884, "step": 10283 }, { "epoch": 0.84, "grad_norm": 5.788684589390815, "learning_rate": 6.948576674522317e-07, "loss": 0.5688, "step": 10284 }, { "epoch": 0.84, "grad_norm": 5.665107987038337, "learning_rate": 6.941888840137162e-07, "loss": 0.5753, "step": 10285 }, { "epoch": 0.84, "grad_norm": 5.291629653143022, "learning_rate": 6.935203985644423e-07, "loss": 0.7368, "step": 10286 }, { "epoch": 0.84, "grad_norm": 5.4084795505447065, "learning_rate": 6.928522111506713e-07, "loss": 0.6546, "step": 10287 }, { "epoch": 0.84, "grad_norm": 9.345589560433593, "learning_rate": 6.921843218186492e-07, "loss": 0.7892, "step": 10288 }, { "epoch": 0.84, "grad_norm": 3.6173610479553737, "learning_rate": 6.915167306145943e-07, "loss": 0.6149, "step": 10289 }, { "epoch": 0.84, "grad_norm": 3.2295157064303006, "learning_rate": 6.908494375847114e-07, "loss": 0.7342, "step": 10290 }, { "epoch": 0.84, "grad_norm": 3.854541269762568, "learning_rate": 6.901824427751785e-07, "loss": 0.6512, "step": 10291 }, { "epoch": 0.84, "grad_norm": 5.720851724348029, "learning_rate": 6.895157462321589e-07, "loss": 0.6955, "step": 10292 }, { "epoch": 0.84, "grad_norm": 15.804938239872524, "learning_rate": 6.88849348001791e-07, "loss": 0.6454, "step": 10293 }, { "epoch": 0.84, "grad_norm": 4.651269146874924, "learning_rate": 6.88183248130192e-07, "loss": 0.7272, "step": 10294 }, { "epoch": 0.84, "grad_norm": 3.8057621411456464, "learning_rate": 6.875174466634638e-07, "loss": 0.6901, "step": 10295 }, { "epoch": 0.84, "grad_norm": 2.978275668062095, "learning_rate": 6.868519436476795e-07, "loss": 0.7389, "step": 10296 }, { "epoch": 0.84, "grad_norm": 2.9691941368260433, "learning_rate": 6.861867391289e-07, "loss": 0.6854, "step": 10297 }, { "epoch": 0.84, "grad_norm": 6.43910238964205, "learning_rate": 6.855218331531594e-07, "loss": 0.8386, "step": 10298 }, { "epoch": 0.84, "grad_norm": 3.2261013951058333, "learning_rate": 6.848572257664749e-07, "loss": 0.7331, "step": 10299 }, { "epoch": 0.84, "grad_norm": 5.3338752568327, "learning_rate": 6.841929170148403e-07, "loss": 0.6453, "step": 10300 }, { "epoch": 0.84, "grad_norm": 8.512909804575376, "learning_rate": 6.835289069442308e-07, "loss": 0.5218, "step": 10301 }, { "epoch": 0.84, "grad_norm": 2.874504027138462, "learning_rate": 6.828651956006e-07, "loss": 0.7418, "step": 10302 }, { "epoch": 0.84, "grad_norm": 3.8434801054176924, "learning_rate": 6.822017830298788e-07, "loss": 0.5339, "step": 10303 }, { "epoch": 0.84, "grad_norm": 14.45940776824948, "learning_rate": 6.815386692779829e-07, "loss": 0.6591, "step": 10304 }, { "epoch": 0.84, "grad_norm": 8.933732830244342, "learning_rate": 6.808758543908012e-07, "loss": 0.6542, "step": 10305 }, { "epoch": 0.84, "grad_norm": 3.334997529908435, "learning_rate": 6.802133384142068e-07, "loss": 0.6655, "step": 10306 }, { "epoch": 0.84, "grad_norm": 8.224923364123644, "learning_rate": 6.795511213940492e-07, "loss": 0.6344, "step": 10307 }, { "epoch": 0.84, "grad_norm": 5.425023392372662, "learning_rate": 6.788892033761579e-07, "loss": 0.8, "step": 10308 }, { "epoch": 0.84, "grad_norm": 4.057378608699342, "learning_rate": 6.782275844063402e-07, "loss": 0.6748, "step": 10309 }, { "epoch": 0.84, "grad_norm": 3.530998033490358, "learning_rate": 6.775662645303871e-07, "loss": 0.5981, "step": 10310 }, { "epoch": 0.84, "grad_norm": 3.0777349118971693, "learning_rate": 6.769052437940649e-07, "loss": 0.7284, "step": 10311 }, { "epoch": 0.84, "grad_norm": 3.317813205319253, "learning_rate": 6.762445222431191e-07, "loss": 0.6273, "step": 10312 }, { "epoch": 0.84, "grad_norm": 4.7540914251439546, "learning_rate": 6.755840999232776e-07, "loss": 0.7435, "step": 10313 }, { "epoch": 0.84, "grad_norm": 3.950362125580203, "learning_rate": 6.749239768802457e-07, "loss": 0.6815, "step": 10314 }, { "epoch": 0.84, "grad_norm": 3.394164293821075, "learning_rate": 6.742641531597077e-07, "loss": 0.6544, "step": 10315 }, { "epoch": 0.84, "grad_norm": 2.9423269808752077, "learning_rate": 6.736046288073261e-07, "loss": 0.6683, "step": 10316 }, { "epoch": 0.84, "grad_norm": 2.926102349419464, "learning_rate": 6.729454038687461e-07, "loss": 0.6786, "step": 10317 }, { "epoch": 0.84, "grad_norm": 6.1844165819940145, "learning_rate": 6.722864783895899e-07, "loss": 0.6564, "step": 10318 }, { "epoch": 0.84, "grad_norm": 7.913077866984318, "learning_rate": 6.716278524154579e-07, "loss": 0.6802, "step": 10319 }, { "epoch": 0.84, "grad_norm": 5.347027814148411, "learning_rate": 6.70969525991933e-07, "loss": 0.627, "step": 10320 }, { "epoch": 0.84, "grad_norm": 3.8244963417087936, "learning_rate": 6.703114991645754e-07, "loss": 0.6633, "step": 10321 }, { "epoch": 0.84, "grad_norm": 6.193735581475296, "learning_rate": 6.696537719789231e-07, "loss": 0.7494, "step": 10322 }, { "epoch": 0.84, "grad_norm": 3.1135273151725276, "learning_rate": 6.689963444804954e-07, "loss": 0.6579, "step": 10323 }, { "epoch": 0.84, "grad_norm": 2.866006617273913, "learning_rate": 6.683392167147917e-07, "loss": 0.6197, "step": 10324 }, { "epoch": 0.84, "grad_norm": 4.108516330537227, "learning_rate": 6.676823887272888e-07, "loss": 0.6185, "step": 10325 }, { "epoch": 0.84, "grad_norm": 6.062323830574421, "learning_rate": 6.670258605634422e-07, "loss": 0.7935, "step": 10326 }, { "epoch": 0.84, "grad_norm": 3.1354579115801857, "learning_rate": 6.663696322686897e-07, "loss": 0.6264, "step": 10327 }, { "epoch": 0.84, "grad_norm": 13.144135383962132, "learning_rate": 6.657137038884453e-07, "loss": 0.8409, "step": 10328 }, { "epoch": 0.84, "grad_norm": 10.44201426781892, "learning_rate": 6.650580754681035e-07, "loss": 0.7576, "step": 10329 }, { "epoch": 0.84, "grad_norm": 3.450999863278629, "learning_rate": 6.644027470530367e-07, "loss": 0.6023, "step": 10330 }, { "epoch": 0.84, "grad_norm": 2.605468927205407, "learning_rate": 6.637477186886004e-07, "loss": 0.7032, "step": 10331 }, { "epoch": 0.84, "grad_norm": 9.356896866243238, "learning_rate": 6.63092990420125e-07, "loss": 0.5973, "step": 10332 }, { "epoch": 0.84, "grad_norm": 3.021726192174664, "learning_rate": 6.624385622929214e-07, "loss": 0.6924, "step": 10333 }, { "epoch": 0.84, "grad_norm": 4.823269583980918, "learning_rate": 6.617844343522817e-07, "loss": 0.706, "step": 10334 }, { "epoch": 0.84, "grad_norm": 2.3900572299403176, "learning_rate": 6.611306066434747e-07, "loss": 0.6583, "step": 10335 }, { "epoch": 0.84, "grad_norm": 5.711679502227634, "learning_rate": 6.604770792117493e-07, "loss": 0.6994, "step": 10336 }, { "epoch": 0.84, "grad_norm": 16.79376202724141, "learning_rate": 6.598238521023332e-07, "loss": 0.6974, "step": 10337 }, { "epoch": 0.84, "grad_norm": 2.9757197337152905, "learning_rate": 6.591709253604356e-07, "loss": 0.6157, "step": 10338 }, { "epoch": 0.84, "grad_norm": 3.215857467464239, "learning_rate": 6.585182990312405e-07, "loss": 0.7551, "step": 10339 }, { "epoch": 0.84, "grad_norm": 7.121030621894883, "learning_rate": 6.578659731599169e-07, "loss": 0.5982, "step": 10340 }, { "epoch": 0.84, "grad_norm": 6.626078812745635, "learning_rate": 6.572139477916084e-07, "loss": 0.591, "step": 10341 }, { "epoch": 0.84, "grad_norm": 4.037539856414299, "learning_rate": 6.565622229714392e-07, "loss": 0.5354, "step": 10342 }, { "epoch": 0.84, "grad_norm": 2.968027033660957, "learning_rate": 6.559107987445124e-07, "loss": 0.8826, "step": 10343 }, { "epoch": 0.84, "grad_norm": 4.53264512312851, "learning_rate": 6.552596751559098e-07, "loss": 0.742, "step": 10344 }, { "epoch": 0.84, "grad_norm": 4.204865704765471, "learning_rate": 6.546088522506955e-07, "loss": 0.7021, "step": 10345 }, { "epoch": 0.84, "grad_norm": 5.867827255980681, "learning_rate": 6.539583300739089e-07, "loss": 0.6873, "step": 10346 }, { "epoch": 0.84, "grad_norm": 7.307166362198696, "learning_rate": 6.533081086705711e-07, "loss": 0.6685, "step": 10347 }, { "epoch": 0.84, "grad_norm": 7.40612694342808, "learning_rate": 6.526581880856819e-07, "loss": 0.5511, "step": 10348 }, { "epoch": 0.84, "grad_norm": 5.20080167432009, "learning_rate": 6.520085683642191e-07, "loss": 0.6783, "step": 10349 }, { "epoch": 0.84, "grad_norm": 2.5813895676085608, "learning_rate": 6.513592495511406e-07, "loss": 0.6618, "step": 10350 }, { "epoch": 0.84, "grad_norm": 2.6116065994577693, "learning_rate": 6.507102316913816e-07, "loss": 0.5529, "step": 10351 }, { "epoch": 0.84, "grad_norm": 4.80458611424017, "learning_rate": 6.500615148298617e-07, "loss": 0.6887, "step": 10352 }, { "epoch": 0.84, "grad_norm": 21.334037622840906, "learning_rate": 6.494130990114733e-07, "loss": 0.7477, "step": 10353 }, { "epoch": 0.84, "grad_norm": 3.6788465851705574, "learning_rate": 6.487649842810939e-07, "loss": 0.7162, "step": 10354 }, { "epoch": 0.84, "grad_norm": 5.3090341604953775, "learning_rate": 6.481171706835737e-07, "loss": 0.5675, "step": 10355 }, { "epoch": 0.84, "grad_norm": 4.0817200217799305, "learning_rate": 6.474696582637474e-07, "loss": 0.6988, "step": 10356 }, { "epoch": 0.84, "grad_norm": 87.68690045900183, "learning_rate": 6.46822447066427e-07, "loss": 0.7554, "step": 10357 }, { "epoch": 0.84, "grad_norm": 4.502143765734597, "learning_rate": 6.461755371364015e-07, "loss": 0.6506, "step": 10358 }, { "epoch": 0.84, "grad_norm": 3.1037843511659853, "learning_rate": 6.455289285184446e-07, "loss": 0.6503, "step": 10359 }, { "epoch": 0.84, "grad_norm": 2.484675077768453, "learning_rate": 6.448826212573023e-07, "loss": 0.7002, "step": 10360 }, { "epoch": 0.84, "grad_norm": 5.5047779931021115, "learning_rate": 6.44236615397707e-07, "loss": 0.537, "step": 10361 }, { "epoch": 0.84, "grad_norm": 5.808955311385058, "learning_rate": 6.435909109843619e-07, "loss": 0.6739, "step": 10362 }, { "epoch": 0.84, "grad_norm": 2.8508680085392153, "learning_rate": 6.429455080619568e-07, "loss": 0.6523, "step": 10363 }, { "epoch": 0.84, "grad_norm": 4.329015422887391, "learning_rate": 6.42300406675156e-07, "loss": 0.6689, "step": 10364 }, { "epoch": 0.84, "grad_norm": 5.613391125960158, "learning_rate": 6.416556068686064e-07, "loss": 0.7513, "step": 10365 }, { "epoch": 0.84, "grad_norm": 2.739849790229807, "learning_rate": 6.410111086869314e-07, "loss": 0.4758, "step": 10366 }, { "epoch": 0.84, "grad_norm": 6.131785198292752, "learning_rate": 6.403669121747336e-07, "loss": 0.5984, "step": 10367 }, { "epoch": 0.84, "grad_norm": 2.3467058360087165, "learning_rate": 6.397230173765967e-07, "loss": 0.6914, "step": 10368 }, { "epoch": 0.84, "grad_norm": 4.338622187972352, "learning_rate": 6.390794243370801e-07, "loss": 0.6395, "step": 10369 }, { "epoch": 0.84, "grad_norm": 20.877496552494325, "learning_rate": 6.384361331007271e-07, "loss": 0.7061, "step": 10370 }, { "epoch": 0.84, "grad_norm": 3.3398472569820887, "learning_rate": 6.377931437120555e-07, "loss": 0.6416, "step": 10371 }, { "epoch": 0.84, "grad_norm": 2.734674335682519, "learning_rate": 6.371504562155656e-07, "loss": 0.6, "step": 10372 }, { "epoch": 0.84, "grad_norm": 2.9118978312716317, "learning_rate": 6.365080706557352e-07, "loss": 0.6582, "step": 10373 }, { "epoch": 0.84, "grad_norm": 3.5333695170524897, "learning_rate": 6.358659870770212e-07, "loss": 0.6903, "step": 10374 }, { "epoch": 0.84, "grad_norm": 3.592788041985399, "learning_rate": 6.3522420552386e-07, "loss": 0.8557, "step": 10375 }, { "epoch": 0.84, "grad_norm": 5.290180713457035, "learning_rate": 6.34582726040665e-07, "loss": 0.7188, "step": 10376 }, { "epoch": 0.84, "grad_norm": 3.2193692414814046, "learning_rate": 6.339415486718336e-07, "loss": 0.8291, "step": 10377 }, { "epoch": 0.84, "grad_norm": 3.1535420078102843, "learning_rate": 6.333006734617375e-07, "loss": 0.6187, "step": 10378 }, { "epoch": 0.84, "grad_norm": 7.260409625376434, "learning_rate": 6.326601004547301e-07, "loss": 0.4959, "step": 10379 }, { "epoch": 0.84, "grad_norm": 10.432916900277515, "learning_rate": 6.320198296951435e-07, "loss": 0.6387, "step": 10380 }, { "epoch": 0.84, "grad_norm": 3.3128386379719243, "learning_rate": 6.31379861227287e-07, "loss": 0.6815, "step": 10381 }, { "epoch": 0.84, "grad_norm": 4.046337035488539, "learning_rate": 6.307401950954517e-07, "loss": 0.7009, "step": 10382 }, { "epoch": 0.84, "grad_norm": 5.815238562421174, "learning_rate": 6.30100831343905e-07, "loss": 0.4688, "step": 10383 }, { "epoch": 0.84, "grad_norm": 53.539435612051825, "learning_rate": 6.29461770016897e-07, "loss": 0.7678, "step": 10384 }, { "epoch": 0.84, "grad_norm": 19.587524240326776, "learning_rate": 6.288230111586524e-07, "loss": 0.5885, "step": 10385 }, { "epoch": 0.84, "grad_norm": 10.117443535484943, "learning_rate": 6.281845548133796e-07, "loss": 0.6438, "step": 10386 }, { "epoch": 0.84, "grad_norm": 3.044261323403354, "learning_rate": 6.27546401025263e-07, "loss": 0.5458, "step": 10387 }, { "epoch": 0.84, "grad_norm": 3.65084815691572, "learning_rate": 6.26908549838467e-07, "loss": 0.6376, "step": 10388 }, { "epoch": 0.84, "grad_norm": 21.497912503281153, "learning_rate": 6.262710012971329e-07, "loss": 0.6523, "step": 10389 }, { "epoch": 0.84, "grad_norm": 4.506621217933862, "learning_rate": 6.256337554453862e-07, "loss": 0.7805, "step": 10390 }, { "epoch": 0.84, "grad_norm": 6.337912195930157, "learning_rate": 6.24996812327327e-07, "loss": 0.6193, "step": 10391 }, { "epoch": 0.84, "grad_norm": 3.6775326311364274, "learning_rate": 6.243601719870346e-07, "loss": 0.5932, "step": 10392 }, { "epoch": 0.84, "grad_norm": 3.9853114690390354, "learning_rate": 6.237238344685703e-07, "loss": 0.7941, "step": 10393 }, { "epoch": 0.84, "grad_norm": 5.7886714788766644, "learning_rate": 6.230877998159724e-07, "loss": 0.6257, "step": 10394 }, { "epoch": 0.84, "grad_norm": 6.260932374432806, "learning_rate": 6.224520680732582e-07, "loss": 0.6847, "step": 10395 }, { "epoch": 0.84, "grad_norm": 4.299173570488173, "learning_rate": 6.218166392844227e-07, "loss": 0.6663, "step": 10396 }, { "epoch": 0.84, "grad_norm": 4.375333355386297, "learning_rate": 6.211815134934446e-07, "loss": 0.6135, "step": 10397 }, { "epoch": 0.84, "grad_norm": 2.5452650103996453, "learning_rate": 6.205466907442764e-07, "loss": 0.6365, "step": 10398 }, { "epoch": 0.84, "grad_norm": 3.9901833225220193, "learning_rate": 6.19912171080852e-07, "loss": 0.6014, "step": 10399 }, { "epoch": 0.84, "grad_norm": 3.261833758382411, "learning_rate": 6.192779545470856e-07, "loss": 0.8548, "step": 10400 }, { "epoch": 0.84, "grad_norm": 3.5044936715148247, "learning_rate": 6.186440411868683e-07, "loss": 0.6833, "step": 10401 }, { "epoch": 0.84, "grad_norm": 3.498299056057299, "learning_rate": 6.180104310440705e-07, "loss": 0.6055, "step": 10402 }, { "epoch": 0.84, "grad_norm": 4.807397300682668, "learning_rate": 6.173771241625409e-07, "loss": 0.6662, "step": 10403 }, { "epoch": 0.85, "grad_norm": 4.991125347854919, "learning_rate": 6.167441205861108e-07, "loss": 0.6206, "step": 10404 }, { "epoch": 0.85, "grad_norm": 8.090919405503609, "learning_rate": 6.161114203585866e-07, "loss": 0.649, "step": 10405 }, { "epoch": 0.85, "grad_norm": 8.471272683824242, "learning_rate": 6.154790235237546e-07, "loss": 0.6031, "step": 10406 }, { "epoch": 0.85, "grad_norm": 3.8302448620665666, "learning_rate": 6.148469301253834e-07, "loss": 0.603, "step": 10407 }, { "epoch": 0.85, "grad_norm": 5.12344653475466, "learning_rate": 6.142151402072133e-07, "loss": 0.585, "step": 10408 }, { "epoch": 0.85, "grad_norm": 4.341409406176881, "learning_rate": 6.135836538129725e-07, "loss": 0.6119, "step": 10409 }, { "epoch": 0.85, "grad_norm": 11.994580200705164, "learning_rate": 6.129524709863605e-07, "loss": 0.7164, "step": 10410 }, { "epoch": 0.85, "grad_norm": 4.771347418688263, "learning_rate": 6.123215917710617e-07, "loss": 0.6189, "step": 10411 }, { "epoch": 0.85, "grad_norm": 4.46114124270828, "learning_rate": 6.116910162107348e-07, "loss": 0.6445, "step": 10412 }, { "epoch": 0.85, "grad_norm": 3.2124967338696395, "learning_rate": 6.110607443490218e-07, "loss": 0.6684, "step": 10413 }, { "epoch": 0.85, "grad_norm": 2.4512186008441392, "learning_rate": 6.104307762295403e-07, "loss": 0.6613, "step": 10414 }, { "epoch": 0.85, "grad_norm": 7.234811722280214, "learning_rate": 6.098011118958885e-07, "loss": 0.6208, "step": 10415 }, { "epoch": 0.85, "grad_norm": 3.7458987287811345, "learning_rate": 6.091717513916424e-07, "loss": 0.6757, "step": 10416 }, { "epoch": 0.85, "grad_norm": 3.1862721466114, "learning_rate": 6.085426947603568e-07, "loss": 0.6142, "step": 10417 }, { "epoch": 0.85, "grad_norm": 3.5773256137198226, "learning_rate": 6.079139420455688e-07, "loss": 0.7204, "step": 10418 }, { "epoch": 0.85, "grad_norm": 2.9527655987048163, "learning_rate": 6.072854932907901e-07, "loss": 0.4914, "step": 10419 }, { "epoch": 0.85, "grad_norm": 2.713211585221999, "learning_rate": 6.066573485395155e-07, "loss": 0.6181, "step": 10420 }, { "epoch": 0.85, "grad_norm": 7.123617165101472, "learning_rate": 6.060295078352135e-07, "loss": 0.7754, "step": 10421 }, { "epoch": 0.85, "grad_norm": 3.1498774063466732, "learning_rate": 6.054019712213377e-07, "loss": 0.7949, "step": 10422 }, { "epoch": 0.85, "grad_norm": 4.337723456755252, "learning_rate": 6.047747387413156e-07, "loss": 0.7654, "step": 10423 }, { "epoch": 0.85, "grad_norm": 4.663180619022543, "learning_rate": 6.041478104385556e-07, "loss": 0.7326, "step": 10424 }, { "epoch": 0.85, "grad_norm": 4.92796186203844, "learning_rate": 6.035211863564461e-07, "loss": 0.5615, "step": 10425 }, { "epoch": 0.85, "grad_norm": 7.54693451654569, "learning_rate": 6.028948665383527e-07, "loss": 0.643, "step": 10426 }, { "epoch": 0.85, "grad_norm": 4.756255508057335, "learning_rate": 6.022688510276226e-07, "loss": 0.7324, "step": 10427 }, { "epoch": 0.85, "grad_norm": 3.799498802717264, "learning_rate": 6.016431398675764e-07, "loss": 0.625, "step": 10428 }, { "epoch": 0.85, "grad_norm": 19.40892480915589, "learning_rate": 6.010177331015205e-07, "loss": 0.613, "step": 10429 }, { "epoch": 0.85, "grad_norm": 4.081445016357576, "learning_rate": 6.003926307727359e-07, "loss": 0.5737, "step": 10430 }, { "epoch": 0.85, "grad_norm": 5.422477275692693, "learning_rate": 5.997678329244822e-07, "loss": 0.6934, "step": 10431 }, { "epoch": 0.85, "grad_norm": 2.8998179351185183, "learning_rate": 5.991433396000013e-07, "loss": 0.5573, "step": 10432 }, { "epoch": 0.85, "grad_norm": 3.075276476336215, "learning_rate": 5.985191508425109e-07, "loss": 0.541, "step": 10433 }, { "epoch": 0.85, "grad_norm": 3.1398148373933443, "learning_rate": 5.978952666952109e-07, "loss": 0.5644, "step": 10434 }, { "epoch": 0.85, "grad_norm": 5.354432132988761, "learning_rate": 5.972716872012746e-07, "loss": 0.7347, "step": 10435 }, { "epoch": 0.85, "grad_norm": 6.0921637779136235, "learning_rate": 5.966484124038602e-07, "loss": 0.604, "step": 10436 }, { "epoch": 0.85, "grad_norm": 3.775606925354813, "learning_rate": 5.960254423461009e-07, "loss": 0.5325, "step": 10437 }, { "epoch": 0.85, "grad_norm": 2.762244894743621, "learning_rate": 5.954027770711112e-07, "loss": 0.6813, "step": 10438 }, { "epoch": 0.85, "grad_norm": 3.5560300413945294, "learning_rate": 5.947804166219834e-07, "loss": 0.768, "step": 10439 }, { "epoch": 0.85, "grad_norm": 4.215138672799785, "learning_rate": 5.941583610417878e-07, "loss": 0.74, "step": 10440 }, { "epoch": 0.85, "grad_norm": 2.770448243603704, "learning_rate": 5.935366103735757e-07, "loss": 0.6183, "step": 10441 }, { "epoch": 0.85, "grad_norm": 6.107574466474058, "learning_rate": 5.929151646603742e-07, "loss": 0.6654, "step": 10442 }, { "epoch": 0.85, "grad_norm": 3.8323745782748917, "learning_rate": 5.922940239451935e-07, "loss": 0.6157, "step": 10443 }, { "epoch": 0.85, "grad_norm": 5.290518424973234, "learning_rate": 5.916731882710186e-07, "loss": 0.6526, "step": 10444 }, { "epoch": 0.85, "grad_norm": 5.360297804639516, "learning_rate": 5.910526576808173e-07, "loss": 0.5134, "step": 10445 }, { "epoch": 0.85, "grad_norm": 2.822586854552277, "learning_rate": 5.904324322175331e-07, "loss": 0.5253, "step": 10446 }, { "epoch": 0.85, "grad_norm": 3.831853653025244, "learning_rate": 5.8981251192409e-07, "loss": 0.6633, "step": 10447 }, { "epoch": 0.85, "grad_norm": 5.747059534916783, "learning_rate": 5.891928968433891e-07, "loss": 0.5285, "step": 10448 }, { "epoch": 0.85, "grad_norm": 4.4877683130208945, "learning_rate": 5.885735870183118e-07, "loss": 0.5811, "step": 10449 }, { "epoch": 0.85, "grad_norm": 3.775932976594435, "learning_rate": 5.879545824917199e-07, "loss": 0.5389, "step": 10450 }, { "epoch": 0.85, "grad_norm": 3.750757655951693, "learning_rate": 5.873358833064507e-07, "loss": 0.5711, "step": 10451 }, { "epoch": 0.85, "grad_norm": 4.417306371635409, "learning_rate": 5.867174895053235e-07, "loss": 0.6208, "step": 10452 }, { "epoch": 0.85, "grad_norm": 4.104949924456998, "learning_rate": 5.860994011311344e-07, "loss": 0.6585, "step": 10453 }, { "epoch": 0.85, "grad_norm": 5.501678328833904, "learning_rate": 5.854816182266593e-07, "loss": 0.5378, "step": 10454 }, { "epoch": 0.85, "grad_norm": 3.420261198676261, "learning_rate": 5.848641408346517e-07, "loss": 0.6376, "step": 10455 }, { "epoch": 0.85, "grad_norm": 5.197292639161484, "learning_rate": 5.842469689978447e-07, "loss": 0.7506, "step": 10456 }, { "epoch": 0.85, "grad_norm": 2.8908635631430477, "learning_rate": 5.836301027589525e-07, "loss": 0.7235, "step": 10457 }, { "epoch": 0.85, "grad_norm": 4.861022318169697, "learning_rate": 5.830135421606642e-07, "loss": 0.6076, "step": 10458 }, { "epoch": 0.85, "grad_norm": 7.489755608726509, "learning_rate": 5.823972872456512e-07, "loss": 0.7943, "step": 10459 }, { "epoch": 0.85, "grad_norm": 10.59410304310968, "learning_rate": 5.817813380565612e-07, "loss": 0.6886, "step": 10460 }, { "epoch": 0.85, "grad_norm": 6.03736415315865, "learning_rate": 5.811656946360222e-07, "loss": 0.7753, "step": 10461 }, { "epoch": 0.85, "grad_norm": 4.336831404605428, "learning_rate": 5.805503570266396e-07, "loss": 0.6668, "step": 10462 }, { "epoch": 0.85, "grad_norm": 2.5533491592726203, "learning_rate": 5.799353252710005e-07, "loss": 0.6407, "step": 10463 }, { "epoch": 0.85, "grad_norm": 3.6278356642623897, "learning_rate": 5.793205994116674e-07, "loss": 0.8978, "step": 10464 }, { "epoch": 0.85, "grad_norm": 5.5589684501172165, "learning_rate": 5.78706179491183e-07, "loss": 0.723, "step": 10465 }, { "epoch": 0.85, "grad_norm": 3.3143701313246003, "learning_rate": 5.780920655520711e-07, "loss": 0.7077, "step": 10466 }, { "epoch": 0.85, "grad_norm": 2.57361119760105, "learning_rate": 5.774782576368304e-07, "loss": 0.7415, "step": 10467 }, { "epoch": 0.85, "grad_norm": 2.947981599599951, "learning_rate": 5.768647557879408e-07, "loss": 0.635, "step": 10468 }, { "epoch": 0.85, "grad_norm": 3.227808313926324, "learning_rate": 5.762515600478596e-07, "loss": 0.7014, "step": 10469 }, { "epoch": 0.85, "grad_norm": 12.114263232259527, "learning_rate": 5.756386704590255e-07, "loss": 0.8008, "step": 10470 }, { "epoch": 0.85, "grad_norm": 3.464787519314774, "learning_rate": 5.750260870638541e-07, "loss": 0.7135, "step": 10471 }, { "epoch": 0.85, "grad_norm": 5.601030370219566, "learning_rate": 5.744138099047375e-07, "loss": 0.7965, "step": 10472 }, { "epoch": 0.85, "grad_norm": 41.707977187014066, "learning_rate": 5.738018390240535e-07, "loss": 0.6188, "step": 10473 }, { "epoch": 0.85, "grad_norm": 8.908503250200823, "learning_rate": 5.731901744641499e-07, "loss": 0.7987, "step": 10474 }, { "epoch": 0.85, "grad_norm": 3.6928638454145704, "learning_rate": 5.725788162673612e-07, "loss": 0.5576, "step": 10475 }, { "epoch": 0.85, "grad_norm": 3.903003052043101, "learning_rate": 5.719677644759941e-07, "loss": 0.6336, "step": 10476 }, { "epoch": 0.85, "grad_norm": 3.1266846127333547, "learning_rate": 5.713570191323398e-07, "loss": 0.6908, "step": 10477 }, { "epoch": 0.85, "grad_norm": 3.4547086037485526, "learning_rate": 5.707465802786655e-07, "loss": 0.5827, "step": 10478 }, { "epoch": 0.85, "grad_norm": 3.1352188120714954, "learning_rate": 5.701364479572152e-07, "loss": 0.6586, "step": 10479 }, { "epoch": 0.85, "grad_norm": 3.9516907517193705, "learning_rate": 5.695266222102175e-07, "loss": 0.6951, "step": 10480 }, { "epoch": 0.85, "grad_norm": 4.272577748854382, "learning_rate": 5.689171030798723e-07, "loss": 0.5986, "step": 10481 }, { "epoch": 0.85, "grad_norm": 4.2666762537922205, "learning_rate": 5.683078906083644e-07, "loss": 0.4899, "step": 10482 }, { "epoch": 0.85, "grad_norm": 3.4245318131258835, "learning_rate": 5.676989848378545e-07, "loss": 0.6647, "step": 10483 }, { "epoch": 0.85, "grad_norm": 2.605718714393821, "learning_rate": 5.670903858104837e-07, "loss": 0.5798, "step": 10484 }, { "epoch": 0.85, "grad_norm": 3.508762339610457, "learning_rate": 5.664820935683695e-07, "loss": 0.5445, "step": 10485 }, { "epoch": 0.85, "grad_norm": 7.027852891063986, "learning_rate": 5.658741081536101e-07, "loss": 0.6171, "step": 10486 }, { "epoch": 0.85, "grad_norm": 2.6425636235422147, "learning_rate": 5.652664296082822e-07, "loss": 0.6663, "step": 10487 }, { "epoch": 0.85, "grad_norm": 7.187846163584768, "learning_rate": 5.64659057974441e-07, "loss": 0.6666, "step": 10488 }, { "epoch": 0.85, "grad_norm": 5.777206419105017, "learning_rate": 5.640519932941202e-07, "loss": 0.6063, "step": 10489 }, { "epoch": 0.85, "grad_norm": 12.686746745228717, "learning_rate": 5.634452356093317e-07, "loss": 0.5177, "step": 10490 }, { "epoch": 0.85, "grad_norm": 3.6741115956239074, "learning_rate": 5.628387849620687e-07, "loss": 0.5503, "step": 10491 }, { "epoch": 0.85, "grad_norm": 4.163605372311336, "learning_rate": 5.622326413942997e-07, "loss": 0.7316, "step": 10492 }, { "epoch": 0.85, "grad_norm": 3.3348619975581553, "learning_rate": 5.616268049479756e-07, "loss": 0.6219, "step": 10493 }, { "epoch": 0.85, "grad_norm": 6.475155001732228, "learning_rate": 5.610212756650219e-07, "loss": 0.576, "step": 10494 }, { "epoch": 0.85, "grad_norm": 4.21222858749342, "learning_rate": 5.604160535873465e-07, "loss": 0.6862, "step": 10495 }, { "epoch": 0.85, "grad_norm": 3.10594804741805, "learning_rate": 5.598111387568339e-07, "loss": 0.59, "step": 10496 }, { "epoch": 0.85, "grad_norm": 2.987327285622205, "learning_rate": 5.592065312153477e-07, "loss": 0.6251, "step": 10497 }, { "epoch": 0.85, "grad_norm": 4.37267722475284, "learning_rate": 5.586022310047317e-07, "loss": 0.6098, "step": 10498 }, { "epoch": 0.85, "grad_norm": 4.414321777178975, "learning_rate": 5.579982381668058e-07, "loss": 0.7805, "step": 10499 }, { "epoch": 0.85, "grad_norm": 8.555182853057014, "learning_rate": 5.573945527433733e-07, "loss": 0.6006, "step": 10500 }, { "epoch": 0.85, "grad_norm": 6.523839739145099, "learning_rate": 5.567911747762084e-07, "loss": 0.7815, "step": 10501 }, { "epoch": 0.85, "grad_norm": 4.3531984322880275, "learning_rate": 5.561881043070721e-07, "loss": 0.8642, "step": 10502 }, { "epoch": 0.85, "grad_norm": 2.9368808756037335, "learning_rate": 5.555853413776991e-07, "loss": 0.584, "step": 10503 }, { "epoch": 0.85, "grad_norm": 3.257545599693184, "learning_rate": 5.549828860298046e-07, "loss": 0.649, "step": 10504 }, { "epoch": 0.85, "grad_norm": 3.2443289063388794, "learning_rate": 5.543807383050826e-07, "loss": 0.7033, "step": 10505 }, { "epoch": 0.85, "grad_norm": 14.636871459198412, "learning_rate": 5.537788982452052e-07, "loss": 0.6668, "step": 10506 }, { "epoch": 0.85, "grad_norm": 3.420298604385048, "learning_rate": 5.531773658918254e-07, "loss": 0.6947, "step": 10507 }, { "epoch": 0.85, "grad_norm": 4.9134211305509865, "learning_rate": 5.525761412865693e-07, "loss": 0.639, "step": 10508 }, { "epoch": 0.85, "grad_norm": 4.562912416516313, "learning_rate": 5.519752244710491e-07, "loss": 0.681, "step": 10509 }, { "epoch": 0.85, "grad_norm": 3.918343456024627, "learning_rate": 5.513746154868499e-07, "loss": 0.7008, "step": 10510 }, { "epoch": 0.85, "grad_norm": 14.64249338792327, "learning_rate": 5.507743143755373e-07, "loss": 0.6881, "step": 10511 }, { "epoch": 0.85, "grad_norm": 8.6273248730356, "learning_rate": 5.501743211786575e-07, "loss": 0.7638, "step": 10512 }, { "epoch": 0.85, "grad_norm": 4.652435617616853, "learning_rate": 5.495746359377335e-07, "loss": 0.6598, "step": 10513 }, { "epoch": 0.85, "grad_norm": 5.190453697518684, "learning_rate": 5.48975258694267e-07, "loss": 0.5521, "step": 10514 }, { "epoch": 0.85, "grad_norm": 3.3803604285741278, "learning_rate": 5.483761894897371e-07, "loss": 0.6289, "step": 10515 }, { "epoch": 0.85, "grad_norm": 6.560965467319808, "learning_rate": 5.477774283656062e-07, "loss": 0.6772, "step": 10516 }, { "epoch": 0.85, "grad_norm": 2.6701711485625834, "learning_rate": 5.471789753633095e-07, "loss": 0.6921, "step": 10517 }, { "epoch": 0.85, "grad_norm": 3.038743598112128, "learning_rate": 5.465808305242659e-07, "loss": 0.6807, "step": 10518 }, { "epoch": 0.85, "grad_norm": 2.749553157839282, "learning_rate": 5.459829938898697e-07, "loss": 0.5905, "step": 10519 }, { "epoch": 0.85, "grad_norm": 10.854706603114828, "learning_rate": 5.453854655014956e-07, "loss": 0.6775, "step": 10520 }, { "epoch": 0.85, "grad_norm": 4.269071265175622, "learning_rate": 5.447882454004955e-07, "loss": 0.686, "step": 10521 }, { "epoch": 0.85, "grad_norm": 2.853907333078846, "learning_rate": 5.441913336282001e-07, "loss": 0.6965, "step": 10522 }, { "epoch": 0.85, "grad_norm": 5.114523722564825, "learning_rate": 5.435947302259215e-07, "loss": 0.5129, "step": 10523 }, { "epoch": 0.85, "grad_norm": 6.197091259142938, "learning_rate": 5.429984352349466e-07, "loss": 0.6902, "step": 10524 }, { "epoch": 0.85, "grad_norm": 4.676453452667284, "learning_rate": 5.424024486965446e-07, "loss": 0.6663, "step": 10525 }, { "epoch": 0.85, "grad_norm": 3.366709298636746, "learning_rate": 5.418067706519603e-07, "loss": 0.5944, "step": 10526 }, { "epoch": 0.86, "grad_norm": 18.273426121956494, "learning_rate": 5.412114011424191e-07, "loss": 0.7503, "step": 10527 }, { "epoch": 0.86, "grad_norm": 3.7596731263709464, "learning_rate": 5.406163402091236e-07, "loss": 0.694, "step": 10528 }, { "epoch": 0.86, "grad_norm": 4.37285851142754, "learning_rate": 5.400215878932547e-07, "loss": 0.5341, "step": 10529 }, { "epoch": 0.86, "grad_norm": 5.272860907097611, "learning_rate": 5.39427144235975e-07, "loss": 0.5827, "step": 10530 }, { "epoch": 0.86, "grad_norm": 6.676266335920648, "learning_rate": 5.388330092784222e-07, "loss": 0.6228, "step": 10531 }, { "epoch": 0.86, "grad_norm": 7.362440420769961, "learning_rate": 5.382391830617162e-07, "loss": 0.5789, "step": 10532 }, { "epoch": 0.86, "grad_norm": 2.8814051450583795, "learning_rate": 5.376456656269524e-07, "loss": 0.7283, "step": 10533 }, { "epoch": 0.86, "grad_norm": 3.4488748727174645, "learning_rate": 5.370524570152059e-07, "loss": 0.6834, "step": 10534 }, { "epoch": 0.86, "grad_norm": 3.19758406223143, "learning_rate": 5.364595572675302e-07, "loss": 0.7506, "step": 10535 }, { "epoch": 0.86, "grad_norm": 4.629791989870899, "learning_rate": 5.358669664249566e-07, "loss": 0.6763, "step": 10536 }, { "epoch": 0.86, "grad_norm": 4.324939374829556, "learning_rate": 5.35274684528499e-07, "loss": 0.7324, "step": 10537 }, { "epoch": 0.86, "grad_norm": 3.887977618355025, "learning_rate": 5.346827116191438e-07, "loss": 0.6359, "step": 10538 }, { "epoch": 0.86, "grad_norm": 4.2207115213857, "learning_rate": 5.340910477378625e-07, "loss": 0.5644, "step": 10539 }, { "epoch": 0.86, "grad_norm": 4.739609769218641, "learning_rate": 5.334996929256003e-07, "loss": 0.7145, "step": 10540 }, { "epoch": 0.86, "grad_norm": 3.570825006490705, "learning_rate": 5.329086472232825e-07, "loss": 0.6594, "step": 10541 }, { "epoch": 0.86, "grad_norm": 5.051548996510039, "learning_rate": 5.323179106718129e-07, "loss": 0.5454, "step": 10542 }, { "epoch": 0.86, "grad_norm": 4.418263224865038, "learning_rate": 5.31727483312075e-07, "loss": 0.6165, "step": 10543 }, { "epoch": 0.86, "grad_norm": 4.83260981620051, "learning_rate": 5.311373651849305e-07, "loss": 0.7012, "step": 10544 }, { "epoch": 0.86, "grad_norm": 4.277805244576161, "learning_rate": 5.305475563312174e-07, "loss": 0.666, "step": 10545 }, { "epoch": 0.86, "grad_norm": 4.975542131461341, "learning_rate": 5.299580567917573e-07, "loss": 0.6073, "step": 10546 }, { "epoch": 0.86, "grad_norm": 11.605812194946333, "learning_rate": 5.293688666073438e-07, "loss": 0.6732, "step": 10547 }, { "epoch": 0.86, "grad_norm": 4.2409241908450275, "learning_rate": 5.287799858187548e-07, "loss": 0.6426, "step": 10548 }, { "epoch": 0.86, "grad_norm": 14.305969226693401, "learning_rate": 5.281914144667427e-07, "loss": 0.76, "step": 10549 }, { "epoch": 0.86, "grad_norm": 4.733803971508274, "learning_rate": 5.276031525920427e-07, "loss": 0.6391, "step": 10550 }, { "epoch": 0.86, "grad_norm": 3.2113097195839706, "learning_rate": 5.270152002353651e-07, "loss": 0.5095, "step": 10551 }, { "epoch": 0.86, "grad_norm": 5.85936567432933, "learning_rate": 5.264275574373989e-07, "loss": 0.6679, "step": 10552 }, { "epoch": 0.86, "grad_norm": 4.652227304738535, "learning_rate": 5.258402242388156e-07, "loss": 0.6637, "step": 10553 }, { "epoch": 0.86, "grad_norm": 7.369486736826186, "learning_rate": 5.252532006802585e-07, "loss": 0.6037, "step": 10554 }, { "epoch": 0.86, "grad_norm": 2.8911085096934186, "learning_rate": 5.246664868023565e-07, "loss": 0.766, "step": 10555 }, { "epoch": 0.86, "grad_norm": 3.8186751391199176, "learning_rate": 5.240800826457115e-07, "loss": 0.605, "step": 10556 }, { "epoch": 0.86, "grad_norm": 4.803947863403988, "learning_rate": 5.234939882509083e-07, "loss": 0.6399, "step": 10557 }, { "epoch": 0.86, "grad_norm": 3.965588522005686, "learning_rate": 5.229082036585076e-07, "loss": 0.738, "step": 10558 }, { "epoch": 0.86, "grad_norm": 3.48272340087464, "learning_rate": 5.223227289090482e-07, "loss": 0.685, "step": 10559 }, { "epoch": 0.86, "grad_norm": 4.23140765331175, "learning_rate": 5.217375640430522e-07, "loss": 0.6292, "step": 10560 }, { "epoch": 0.86, "grad_norm": 5.897979897944252, "learning_rate": 5.211527091010116e-07, "loss": 0.7619, "step": 10561 }, { "epoch": 0.86, "grad_norm": 3.112186185375023, "learning_rate": 5.205681641234062e-07, "loss": 0.619, "step": 10562 }, { "epoch": 0.86, "grad_norm": 18.232828557853026, "learning_rate": 5.199839291506875e-07, "loss": 0.7551, "step": 10563 }, { "epoch": 0.86, "grad_norm": 4.3861718664890725, "learning_rate": 5.194000042232906e-07, "loss": 0.5812, "step": 10564 }, { "epoch": 0.86, "grad_norm": 4.735029090022982, "learning_rate": 5.188163893816239e-07, "loss": 0.577, "step": 10565 }, { "epoch": 0.86, "grad_norm": 3.658741322798877, "learning_rate": 5.182330846660815e-07, "loss": 0.5866, "step": 10566 }, { "epoch": 0.86, "grad_norm": 5.522653060913649, "learning_rate": 5.176500901170273e-07, "loss": 0.606, "step": 10567 }, { "epoch": 0.86, "grad_norm": 4.395711799783536, "learning_rate": 5.170674057748109e-07, "loss": 0.5854, "step": 10568 }, { "epoch": 0.86, "grad_norm": 18.455156662017206, "learning_rate": 5.16485031679757e-07, "loss": 0.6455, "step": 10569 }, { "epoch": 0.86, "grad_norm": 3.470361169422147, "learning_rate": 5.159029678721683e-07, "loss": 0.5632, "step": 10570 }, { "epoch": 0.86, "grad_norm": 5.320427189128552, "learning_rate": 5.153212143923292e-07, "loss": 0.7743, "step": 10571 }, { "epoch": 0.86, "grad_norm": 3.5693918549923382, "learning_rate": 5.147397712804992e-07, "loss": 0.6212, "step": 10572 }, { "epoch": 0.86, "grad_norm": 3.7455786315052007, "learning_rate": 5.141586385769204e-07, "loss": 0.6655, "step": 10573 }, { "epoch": 0.86, "grad_norm": 2.94125506023899, "learning_rate": 5.135778163218074e-07, "loss": 0.6415, "step": 10574 }, { "epoch": 0.86, "grad_norm": 8.278718512856946, "learning_rate": 5.129973045553593e-07, "loss": 0.6008, "step": 10575 }, { "epoch": 0.86, "grad_norm": 3.6833335543368757, "learning_rate": 5.1241710331775e-07, "loss": 0.7002, "step": 10576 }, { "epoch": 0.86, "grad_norm": 6.460548296974436, "learning_rate": 5.118372126491322e-07, "loss": 0.5877, "step": 10577 }, { "epoch": 0.86, "grad_norm": 5.054272156481077, "learning_rate": 5.112576325896401e-07, "loss": 0.7049, "step": 10578 }, { "epoch": 0.86, "grad_norm": 3.291200695720283, "learning_rate": 5.106783631793826e-07, "loss": 0.6278, "step": 10579 }, { "epoch": 0.86, "grad_norm": 4.540267510675974, "learning_rate": 5.100994044584511e-07, "loss": 0.7307, "step": 10580 }, { "epoch": 0.86, "grad_norm": 2.882817940978211, "learning_rate": 5.095207564669097e-07, "loss": 0.5502, "step": 10581 }, { "epoch": 0.86, "grad_norm": 3.7507092049889557, "learning_rate": 5.089424192448078e-07, "loss": 0.6053, "step": 10582 }, { "epoch": 0.86, "grad_norm": 3.1805385700120308, "learning_rate": 5.08364392832168e-07, "loss": 0.6144, "step": 10583 }, { "epoch": 0.86, "grad_norm": 8.522537145484804, "learning_rate": 5.077866772689932e-07, "loss": 0.7216, "step": 10584 }, { "epoch": 0.86, "grad_norm": 8.361047956031662, "learning_rate": 5.07209272595266e-07, "loss": 0.4926, "step": 10585 }, { "epoch": 0.86, "grad_norm": 8.20122524918878, "learning_rate": 5.066321788509465e-07, "loss": 0.7026, "step": 10586 }, { "epoch": 0.86, "grad_norm": 4.283186653538668, "learning_rate": 5.060553960759729e-07, "loss": 0.7298, "step": 10587 }, { "epoch": 0.86, "grad_norm": 2.9345050601400904, "learning_rate": 5.054789243102615e-07, "loss": 0.6809, "step": 10588 }, { "epoch": 0.86, "grad_norm": 5.887505501756078, "learning_rate": 5.049027635937087e-07, "loss": 0.5909, "step": 10589 }, { "epoch": 0.86, "grad_norm": 3.9275803618180674, "learning_rate": 5.043269139661872e-07, "loss": 0.6625, "step": 10590 }, { "epoch": 0.86, "grad_norm": 309.3909502206394, "learning_rate": 5.037513754675516e-07, "loss": 0.6389, "step": 10591 }, { "epoch": 0.86, "grad_norm": 4.056319338162214, "learning_rate": 5.031761481376318e-07, "loss": 0.637, "step": 10592 }, { "epoch": 0.86, "grad_norm": 4.397426432059715, "learning_rate": 5.026012320162365e-07, "loss": 0.5116, "step": 10593 }, { "epoch": 0.86, "grad_norm": 3.6227971417607354, "learning_rate": 5.02026627143154e-07, "loss": 0.7308, "step": 10594 }, { "epoch": 0.86, "grad_norm": 4.343959560266296, "learning_rate": 5.014523335581495e-07, "loss": 0.7387, "step": 10595 }, { "epoch": 0.86, "grad_norm": 3.2530948217614117, "learning_rate": 5.008783513009696e-07, "loss": 0.5354, "step": 10596 }, { "epoch": 0.86, "grad_norm": 3.5275053557443607, "learning_rate": 5.003046804113354e-07, "loss": 0.7055, "step": 10597 }, { "epoch": 0.86, "grad_norm": 4.820721482380367, "learning_rate": 4.997313209289512e-07, "loss": 0.6841, "step": 10598 }, { "epoch": 0.86, "grad_norm": 4.576153449877427, "learning_rate": 4.991582728934952e-07, "loss": 0.6943, "step": 10599 }, { "epoch": 0.86, "grad_norm": 3.8452697545324006, "learning_rate": 4.985855363446268e-07, "loss": 0.6361, "step": 10600 }, { "epoch": 0.86, "grad_norm": 6.88192464799193, "learning_rate": 4.980131113219822e-07, "loss": 0.6391, "step": 10601 }, { "epoch": 0.86, "grad_norm": 3.9129811219456907, "learning_rate": 4.974409978651762e-07, "loss": 0.6869, "step": 10602 }, { "epoch": 0.86, "grad_norm": 5.449472163573471, "learning_rate": 4.96869196013805e-07, "loss": 0.6459, "step": 10603 }, { "epoch": 0.86, "grad_norm": 4.172261244328076, "learning_rate": 4.962977058074381e-07, "loss": 0.576, "step": 10604 }, { "epoch": 0.86, "grad_norm": 11.059593329921459, "learning_rate": 4.957265272856288e-07, "loss": 0.715, "step": 10605 }, { "epoch": 0.86, "grad_norm": 2.7194163077427613, "learning_rate": 4.951556604879049e-07, "loss": 0.6763, "step": 10606 }, { "epoch": 0.86, "grad_norm": 6.198100077429061, "learning_rate": 4.945851054537737e-07, "loss": 0.5365, "step": 10607 }, { "epoch": 0.86, "grad_norm": 4.563358032232945, "learning_rate": 4.940148622227225e-07, "loss": 0.6101, "step": 10608 }, { "epoch": 0.86, "grad_norm": 7.297522348946438, "learning_rate": 4.934449308342131e-07, "loss": 0.5722, "step": 10609 }, { "epoch": 0.86, "grad_norm": 4.147626008345462, "learning_rate": 4.928753113276918e-07, "loss": 0.5664, "step": 10610 }, { "epoch": 0.86, "grad_norm": 4.3861938013427295, "learning_rate": 4.92306003742577e-07, "loss": 0.4749, "step": 10611 }, { "epoch": 0.86, "grad_norm": 4.681590036620992, "learning_rate": 4.917370081182698e-07, "loss": 0.5637, "step": 10612 }, { "epoch": 0.86, "grad_norm": 3.7230256106035844, "learning_rate": 4.91168324494149e-07, "loss": 0.6898, "step": 10613 }, { "epoch": 0.86, "grad_norm": 3.3049238813477837, "learning_rate": 4.905999529095695e-07, "loss": 0.6471, "step": 10614 }, { "epoch": 0.86, "grad_norm": 3.94147644657435, "learning_rate": 4.900318934038662e-07, "loss": 0.5835, "step": 10615 }, { "epoch": 0.86, "grad_norm": 4.337598647202033, "learning_rate": 4.894641460163536e-07, "loss": 0.6927, "step": 10616 }, { "epoch": 0.86, "grad_norm": 2.895531550852072, "learning_rate": 4.888967107863229e-07, "loss": 0.5616, "step": 10617 }, { "epoch": 0.86, "grad_norm": 2.216273778060793, "learning_rate": 4.883295877530431e-07, "loss": 0.4925, "step": 10618 }, { "epoch": 0.86, "grad_norm": 3.545956964017675, "learning_rate": 4.877627769557658e-07, "loss": 0.8262, "step": 10619 }, { "epoch": 0.86, "grad_norm": 3.5256952436828572, "learning_rate": 4.871962784337131e-07, "loss": 0.6437, "step": 10620 }, { "epoch": 0.86, "grad_norm": 3.9170066278168165, "learning_rate": 4.866300922260947e-07, "loss": 0.5866, "step": 10621 }, { "epoch": 0.86, "grad_norm": 6.8906195712446605, "learning_rate": 4.86064218372091e-07, "loss": 0.73, "step": 10622 }, { "epoch": 0.86, "grad_norm": 7.924580649900985, "learning_rate": 4.854986569108667e-07, "loss": 0.5662, "step": 10623 }, { "epoch": 0.86, "grad_norm": 4.631069010927226, "learning_rate": 4.849334078815609e-07, "loss": 0.708, "step": 10624 }, { "epoch": 0.86, "grad_norm": 3.8392169604301745, "learning_rate": 4.843684713232916e-07, "loss": 0.7795, "step": 10625 }, { "epoch": 0.86, "grad_norm": 4.601164038347287, "learning_rate": 4.838038472751582e-07, "loss": 0.6441, "step": 10626 }, { "epoch": 0.86, "grad_norm": 4.701846148230725, "learning_rate": 4.832395357762337e-07, "loss": 0.695, "step": 10627 }, { "epoch": 0.86, "grad_norm": 28.658827084440855, "learning_rate": 4.826755368655739e-07, "loss": 0.5909, "step": 10628 }, { "epoch": 0.86, "grad_norm": 61.79259142274256, "learning_rate": 4.821118505822093e-07, "loss": 0.8125, "step": 10629 }, { "epoch": 0.86, "grad_norm": 11.260471148525255, "learning_rate": 4.815484769651529e-07, "loss": 0.5481, "step": 10630 }, { "epoch": 0.86, "grad_norm": 4.294809915851474, "learning_rate": 4.809854160533923e-07, "loss": 0.5414, "step": 10631 }, { "epoch": 0.86, "grad_norm": 3.882161200259773, "learning_rate": 4.804226678858936e-07, "loss": 0.6763, "step": 10632 }, { "epoch": 0.86, "grad_norm": 4.290195397872298, "learning_rate": 4.79860232501606e-07, "loss": 0.5978, "step": 10633 }, { "epoch": 0.86, "grad_norm": 4.267078407063617, "learning_rate": 4.7929810993945e-07, "loss": 0.68, "step": 10634 }, { "epoch": 0.86, "grad_norm": 2.6229768447322295, "learning_rate": 4.787363002383299e-07, "loss": 0.5809, "step": 10635 }, { "epoch": 0.86, "grad_norm": 4.704167181546853, "learning_rate": 4.781748034371253e-07, "loss": 0.7956, "step": 10636 }, { "epoch": 0.86, "grad_norm": 3.598967932543625, "learning_rate": 4.776136195746972e-07, "loss": 0.8454, "step": 10637 }, { "epoch": 0.86, "grad_norm": 5.57438540593825, "learning_rate": 4.770527486898808e-07, "loss": 0.6648, "step": 10638 }, { "epoch": 0.86, "grad_norm": 49.875161375206574, "learning_rate": 4.764921908214948e-07, "loss": 0.6805, "step": 10639 }, { "epoch": 0.86, "grad_norm": 9.200926855274849, "learning_rate": 4.759319460083295e-07, "loss": 0.6228, "step": 10640 }, { "epoch": 0.86, "grad_norm": 5.884702202467515, "learning_rate": 4.75372014289161e-07, "loss": 0.564, "step": 10641 }, { "epoch": 0.86, "grad_norm": 3.5923936922622794, "learning_rate": 4.748123957027379e-07, "loss": 0.4832, "step": 10642 }, { "epoch": 0.86, "grad_norm": 4.114500342579102, "learning_rate": 4.7425309028778954e-07, "loss": 0.6898, "step": 10643 }, { "epoch": 0.86, "grad_norm": 11.659995047574768, "learning_rate": 4.7369409808302457e-07, "loss": 0.7404, "step": 10644 }, { "epoch": 0.86, "grad_norm": 5.294514529587528, "learning_rate": 4.731354191271265e-07, "loss": 0.8083, "step": 10645 }, { "epoch": 0.86, "grad_norm": 4.482671858791675, "learning_rate": 4.725770534587637e-07, "loss": 0.5486, "step": 10646 }, { "epoch": 0.86, "grad_norm": 4.311126718705619, "learning_rate": 4.7201900111657316e-07, "loss": 0.6297, "step": 10647 }, { "epoch": 0.86, "grad_norm": 3.29032404457384, "learning_rate": 4.714612621391795e-07, "loss": 0.4806, "step": 10648 }, { "epoch": 0.86, "grad_norm": 3.843451611443095, "learning_rate": 4.709038365651808e-07, "loss": 0.5216, "step": 10649 }, { "epoch": 0.86, "grad_norm": 4.776116006946723, "learning_rate": 4.7034672443315274e-07, "loss": 0.7396, "step": 10650 }, { "epoch": 0.87, "grad_norm": 3.165981709076825, "learning_rate": 4.697899257816535e-07, "loss": 0.6285, "step": 10651 }, { "epoch": 0.87, "grad_norm": 5.324935612151433, "learning_rate": 4.6923344064921604e-07, "loss": 0.5604, "step": 10652 }, { "epoch": 0.87, "grad_norm": 3.5979812727132616, "learning_rate": 4.6867726907435295e-07, "loss": 0.733, "step": 10653 }, { "epoch": 0.87, "grad_norm": 11.359870204681577, "learning_rate": 4.6812141109555286e-07, "loss": 0.6604, "step": 10654 }, { "epoch": 0.87, "grad_norm": 2.708531677156082, "learning_rate": 4.6756586675128724e-07, "loss": 0.5802, "step": 10655 }, { "epoch": 0.87, "grad_norm": 8.61350395342908, "learning_rate": 4.670106360800025e-07, "loss": 0.5981, "step": 10656 }, { "epoch": 0.87, "grad_norm": 2.7432617918576176, "learning_rate": 4.6645571912012245e-07, "loss": 0.6139, "step": 10657 }, { "epoch": 0.87, "grad_norm": 3.194952948779785, "learning_rate": 4.659011159100535e-07, "loss": 0.6095, "step": 10658 }, { "epoch": 0.87, "grad_norm": 5.0003352575296525, "learning_rate": 4.653468264881761e-07, "loss": 0.705, "step": 10659 }, { "epoch": 0.87, "grad_norm": 9.921719853622454, "learning_rate": 4.647928508928512e-07, "loss": 0.8292, "step": 10660 }, { "epoch": 0.87, "grad_norm": 3.9676314298486037, "learning_rate": 4.642391891624159e-07, "loss": 0.714, "step": 10661 }, { "epoch": 0.87, "grad_norm": 4.002695521266337, "learning_rate": 4.6368584133518914e-07, "loss": 0.6978, "step": 10662 }, { "epoch": 0.87, "grad_norm": 4.146433874369412, "learning_rate": 4.6313280744946396e-07, "loss": 0.9193, "step": 10663 }, { "epoch": 0.87, "grad_norm": 4.409137241818238, "learning_rate": 4.625800875435166e-07, "loss": 0.6236, "step": 10664 }, { "epoch": 0.87, "grad_norm": 7.746631306750918, "learning_rate": 4.620276816555963e-07, "loss": 0.7241, "step": 10665 }, { "epoch": 0.87, "grad_norm": 6.417075639192836, "learning_rate": 4.6147558982393427e-07, "loss": 0.8172, "step": 10666 }, { "epoch": 0.87, "grad_norm": 3.4633927289202093, "learning_rate": 4.6092381208673875e-07, "loss": 0.5396, "step": 10667 }, { "epoch": 0.87, "grad_norm": 3.424281430579176, "learning_rate": 4.6037234848219424e-07, "loss": 0.5794, "step": 10668 }, { "epoch": 0.87, "grad_norm": 2.3934324478734585, "learning_rate": 4.59821199048468e-07, "loss": 0.6771, "step": 10669 }, { "epoch": 0.87, "grad_norm": 2.4545270456651376, "learning_rate": 4.592703638237017e-07, "loss": 0.6353, "step": 10670 }, { "epoch": 0.87, "grad_norm": 4.619502455479935, "learning_rate": 4.5871984284601765e-07, "loss": 0.6405, "step": 10671 }, { "epoch": 0.87, "grad_norm": 7.174178438437912, "learning_rate": 4.5816963615351486e-07, "loss": 0.5364, "step": 10672 }, { "epoch": 0.87, "grad_norm": 3.3650846113824997, "learning_rate": 4.576197437842705e-07, "loss": 0.5696, "step": 10673 }, { "epoch": 0.87, "grad_norm": 3.24642689059261, "learning_rate": 4.5707016577634156e-07, "loss": 0.7475, "step": 10674 }, { "epoch": 0.87, "grad_norm": 4.658879225526947, "learning_rate": 4.565209021677608e-07, "loss": 0.7568, "step": 10675 }, { "epoch": 0.87, "grad_norm": 3.4726092868542944, "learning_rate": 4.5597195299654285e-07, "loss": 0.6556, "step": 10676 }, { "epoch": 0.87, "grad_norm": 4.829099294606015, "learning_rate": 4.554233183006762e-07, "loss": 0.8412, "step": 10677 }, { "epoch": 0.87, "grad_norm": 3.477059135767142, "learning_rate": 4.5487499811813163e-07, "loss": 0.6292, "step": 10678 }, { "epoch": 0.87, "grad_norm": 4.443836704857228, "learning_rate": 4.5432699248685597e-07, "loss": 0.7187, "step": 10679 }, { "epoch": 0.87, "grad_norm": 3.017378970938267, "learning_rate": 4.537793014447739e-07, "loss": 0.6979, "step": 10680 }, { "epoch": 0.87, "grad_norm": 2.817235505882643, "learning_rate": 4.532319250297901e-07, "loss": 0.6393, "step": 10681 }, { "epoch": 0.87, "grad_norm": 6.188456478269907, "learning_rate": 4.526848632797848e-07, "loss": 0.5979, "step": 10682 }, { "epoch": 0.87, "grad_norm": 3.600301302996873, "learning_rate": 4.5213811623261994e-07, "loss": 0.6585, "step": 10683 }, { "epoch": 0.87, "grad_norm": 4.438135214543242, "learning_rate": 4.515916839261325e-07, "loss": 0.7192, "step": 10684 }, { "epoch": 0.87, "grad_norm": 29.624191162479622, "learning_rate": 4.5104556639814055e-07, "loss": 0.6984, "step": 10685 }, { "epoch": 0.87, "grad_norm": 4.114865999884099, "learning_rate": 4.504997636864378e-07, "loss": 0.6885, "step": 10686 }, { "epoch": 0.87, "grad_norm": 4.816215245279651, "learning_rate": 4.4995427582879725e-07, "loss": 0.722, "step": 10687 }, { "epoch": 0.87, "grad_norm": 4.518576055772596, "learning_rate": 4.494091028629699e-07, "loss": 0.6212, "step": 10688 }, { "epoch": 0.87, "grad_norm": 9.414125447537662, "learning_rate": 4.488642448266861e-07, "loss": 0.6031, "step": 10689 }, { "epoch": 0.87, "grad_norm": 4.013569962235598, "learning_rate": 4.4831970175765293e-07, "loss": 0.6197, "step": 10690 }, { "epoch": 0.87, "grad_norm": 3.477407045598223, "learning_rate": 4.4777547369355523e-07, "loss": 0.7972, "step": 10691 }, { "epoch": 0.87, "grad_norm": 4.830427915339826, "learning_rate": 4.472315606720601e-07, "loss": 0.7668, "step": 10692 }, { "epoch": 0.87, "grad_norm": 3.4004253754601206, "learning_rate": 4.4668796273080515e-07, "loss": 0.6022, "step": 10693 }, { "epoch": 0.87, "grad_norm": 10.372112958292616, "learning_rate": 4.461446799074143e-07, "loss": 0.4801, "step": 10694 }, { "epoch": 0.87, "grad_norm": 5.1516987134369066, "learning_rate": 4.4560171223948457e-07, "loss": 0.6883, "step": 10695 }, { "epoch": 0.87, "grad_norm": 3.6732652956552703, "learning_rate": 4.4505905976459374e-07, "loss": 0.5188, "step": 10696 }, { "epoch": 0.87, "grad_norm": 3.583479564699838, "learning_rate": 4.445167225202962e-07, "loss": 0.5876, "step": 10697 }, { "epoch": 0.87, "grad_norm": 4.366386547835916, "learning_rate": 4.4397470054412415e-07, "loss": 0.6699, "step": 10698 }, { "epoch": 0.87, "grad_norm": 3.5871270381617655, "learning_rate": 4.434329938735921e-07, "loss": 0.5064, "step": 10699 }, { "epoch": 0.87, "grad_norm": 3.2086242418846607, "learning_rate": 4.428916025461855e-07, "loss": 0.6888, "step": 10700 }, { "epoch": 0.87, "grad_norm": 4.997365684494536, "learning_rate": 4.4235052659937437e-07, "loss": 0.5655, "step": 10701 }, { "epoch": 0.87, "grad_norm": 2.7224291586339917, "learning_rate": 4.418097660706039e-07, "loss": 0.5888, "step": 10702 }, { "epoch": 0.87, "grad_norm": 5.109252829432643, "learning_rate": 4.4126932099729903e-07, "loss": 0.608, "step": 10703 }, { "epoch": 0.87, "grad_norm": 11.04049901556695, "learning_rate": 4.40729191416861e-07, "loss": 0.7794, "step": 10704 }, { "epoch": 0.87, "grad_norm": 4.793674277503852, "learning_rate": 4.40189377366671e-07, "loss": 0.633, "step": 10705 }, { "epoch": 0.87, "grad_norm": 2.897300522636713, "learning_rate": 4.396498788840864e-07, "loss": 0.6524, "step": 10706 }, { "epoch": 0.87, "grad_norm": 2.73339379744802, "learning_rate": 4.3911069600644396e-07, "loss": 0.6855, "step": 10707 }, { "epoch": 0.87, "grad_norm": 4.155855647493374, "learning_rate": 4.3857182877105997e-07, "loss": 0.7516, "step": 10708 }, { "epoch": 0.87, "grad_norm": 3.8774944705788434, "learning_rate": 4.380332772152257e-07, "loss": 0.689, "step": 10709 }, { "epoch": 0.87, "grad_norm": 4.063910709280885, "learning_rate": 4.3749504137621413e-07, "loss": 0.5842, "step": 10710 }, { "epoch": 0.87, "grad_norm": 9.913068545118325, "learning_rate": 4.369571212912732e-07, "loss": 0.7191, "step": 10711 }, { "epoch": 0.87, "grad_norm": 3.579721761886697, "learning_rate": 4.36419516997631e-07, "loss": 0.6416, "step": 10712 }, { "epoch": 0.87, "grad_norm": 3.4509672210560884, "learning_rate": 4.3588222853249207e-07, "loss": 0.8461, "step": 10713 }, { "epoch": 0.87, "grad_norm": 3.241141784706939, "learning_rate": 4.3534525593304177e-07, "loss": 0.6639, "step": 10714 }, { "epoch": 0.87, "grad_norm": 8.656809468557332, "learning_rate": 4.348085992364415e-07, "loss": 0.6811, "step": 10715 }, { "epoch": 0.87, "grad_norm": 6.0464387930865895, "learning_rate": 4.342722584798298e-07, "loss": 0.6983, "step": 10716 }, { "epoch": 0.87, "grad_norm": 4.568471405690998, "learning_rate": 4.33736233700327e-07, "loss": 0.5458, "step": 10717 }, { "epoch": 0.87, "grad_norm": 7.755403977461108, "learning_rate": 4.332005249350274e-07, "loss": 0.6554, "step": 10718 }, { "epoch": 0.87, "grad_norm": 2.469411592698162, "learning_rate": 4.3266513222100846e-07, "loss": 0.646, "step": 10719 }, { "epoch": 0.87, "grad_norm": 4.1011309925795425, "learning_rate": 4.3213005559531893e-07, "loss": 0.6755, "step": 10720 }, { "epoch": 0.87, "grad_norm": 2.8232968513780894, "learning_rate": 4.31595295094992e-07, "loss": 0.7011, "step": 10721 }, { "epoch": 0.87, "grad_norm": 7.189138763845999, "learning_rate": 4.3106085075703576e-07, "loss": 0.5735, "step": 10722 }, { "epoch": 0.87, "grad_norm": 4.9764717974193395, "learning_rate": 4.3052672261843564e-07, "loss": 0.7338, "step": 10723 }, { "epoch": 0.87, "grad_norm": 3.965778039093081, "learning_rate": 4.2999291071615934e-07, "loss": 0.6783, "step": 10724 }, { "epoch": 0.87, "grad_norm": 7.1017850062923245, "learning_rate": 4.294594150871489e-07, "loss": 0.7119, "step": 10725 }, { "epoch": 0.87, "grad_norm": 4.09854637897419, "learning_rate": 4.289262357683255e-07, "loss": 0.579, "step": 10726 }, { "epoch": 0.87, "grad_norm": 3.395027399396893, "learning_rate": 4.283933727965872e-07, "loss": 0.6879, "step": 10727 }, { "epoch": 0.87, "grad_norm": 2.561964856597612, "learning_rate": 4.278608262088141e-07, "loss": 0.5705, "step": 10728 }, { "epoch": 0.87, "grad_norm": 9.143239212745891, "learning_rate": 4.2732859604185994e-07, "loss": 0.8148, "step": 10729 }, { "epoch": 0.87, "grad_norm": 3.236095773149485, "learning_rate": 4.267966823325581e-07, "loss": 0.8488, "step": 10730 }, { "epoch": 0.87, "grad_norm": 3.1534033487066147, "learning_rate": 4.2626508511772247e-07, "loss": 0.5129, "step": 10731 }, { "epoch": 0.87, "grad_norm": 5.01009947183095, "learning_rate": 4.2573380443414083e-07, "loss": 0.7155, "step": 10732 }, { "epoch": 0.87, "grad_norm": 5.560043806292731, "learning_rate": 4.2520284031858206e-07, "loss": 0.7596, "step": 10733 }, { "epoch": 0.87, "grad_norm": 3.6784894874431444, "learning_rate": 4.2467219280779183e-07, "loss": 0.6808, "step": 10734 }, { "epoch": 0.87, "grad_norm": 5.4886375162283, "learning_rate": 4.241418619384946e-07, "loss": 0.6529, "step": 10735 }, { "epoch": 0.87, "grad_norm": 2.836470937591066, "learning_rate": 4.236118477473927e-07, "loss": 0.6393, "step": 10736 }, { "epoch": 0.87, "grad_norm": 2.80145808918984, "learning_rate": 4.230821502711657e-07, "loss": 0.7841, "step": 10737 }, { "epoch": 0.87, "grad_norm": 4.098709739120388, "learning_rate": 4.225527695464732e-07, "loss": 0.5967, "step": 10738 }, { "epoch": 0.87, "grad_norm": 7.4095888310814475, "learning_rate": 4.2202370560995076e-07, "loss": 0.5959, "step": 10739 }, { "epoch": 0.87, "grad_norm": 3.9753327539729195, "learning_rate": 4.2149495849821365e-07, "loss": 0.679, "step": 10740 }, { "epoch": 0.87, "grad_norm": 3.8556580480137637, "learning_rate": 4.209665282478531e-07, "loss": 0.7672, "step": 10741 }, { "epoch": 0.87, "grad_norm": 4.723370368903523, "learning_rate": 4.2043841489544156e-07, "loss": 0.6165, "step": 10742 }, { "epoch": 0.87, "grad_norm": 27.050325456903867, "learning_rate": 4.199106184775259e-07, "loss": 0.551, "step": 10743 }, { "epoch": 0.87, "grad_norm": 5.467307375636786, "learning_rate": 4.193831390306352e-07, "loss": 0.7173, "step": 10744 }, { "epoch": 0.87, "grad_norm": 3.0401008008734647, "learning_rate": 4.188559765912731e-07, "loss": 0.5714, "step": 10745 }, { "epoch": 0.87, "grad_norm": 4.128529810729945, "learning_rate": 4.183291311959231e-07, "loss": 0.641, "step": 10746 }, { "epoch": 0.87, "grad_norm": 2.485994496870722, "learning_rate": 4.1780260288104504e-07, "loss": 0.4747, "step": 10747 }, { "epoch": 0.87, "grad_norm": 4.043381728166808, "learning_rate": 4.172763916830785e-07, "loss": 0.7358, "step": 10748 }, { "epoch": 0.87, "grad_norm": 4.481993697998047, "learning_rate": 4.167504976384418e-07, "loss": 0.6272, "step": 10749 }, { "epoch": 0.87, "grad_norm": 3.5276661692307334, "learning_rate": 4.1622492078352783e-07, "loss": 0.6207, "step": 10750 }, { "epoch": 0.87, "grad_norm": 2.339290533645544, "learning_rate": 4.156996611547126e-07, "loss": 0.5842, "step": 10751 }, { "epoch": 0.87, "grad_norm": 5.441490813999628, "learning_rate": 4.1517471878834536e-07, "loss": 0.5888, "step": 10752 }, { "epoch": 0.87, "grad_norm": 8.177837937407425, "learning_rate": 4.1465009372075647e-07, "loss": 0.5467, "step": 10753 }, { "epoch": 0.87, "grad_norm": 3.867151964631239, "learning_rate": 4.141257859882525e-07, "loss": 0.6577, "step": 10754 }, { "epoch": 0.87, "grad_norm": 3.6094521000745527, "learning_rate": 4.136017956271188e-07, "loss": 0.6963, "step": 10755 }, { "epoch": 0.87, "grad_norm": 5.441529914069627, "learning_rate": 4.130781226736197e-07, "loss": 0.6772, "step": 10756 }, { "epoch": 0.87, "grad_norm": 3.9750203704806393, "learning_rate": 4.125547671639957e-07, "loss": 0.7016, "step": 10757 }, { "epoch": 0.87, "grad_norm": 2.9720566962814456, "learning_rate": 4.1203172913446774e-07, "loss": 0.7542, "step": 10758 }, { "epoch": 0.87, "grad_norm": 2.375481272321517, "learning_rate": 4.1150900862123145e-07, "loss": 0.5636, "step": 10759 }, { "epoch": 0.87, "grad_norm": 2.9125915907391353, "learning_rate": 4.109866056604633e-07, "loss": 0.5838, "step": 10760 }, { "epoch": 0.87, "grad_norm": 7.754166412312943, "learning_rate": 4.1046452028831786e-07, "loss": 0.7411, "step": 10761 }, { "epoch": 0.87, "grad_norm": 3.2216364944218623, "learning_rate": 4.099427525409239e-07, "loss": 0.8449, "step": 10762 }, { "epoch": 0.87, "grad_norm": 8.691515045128018, "learning_rate": 4.0942130245439414e-07, "loss": 0.8273, "step": 10763 }, { "epoch": 0.87, "grad_norm": 3.1807086757421694, "learning_rate": 4.089001700648143e-07, "loss": 0.6819, "step": 10764 }, { "epoch": 0.87, "grad_norm": 3.0881183837050785, "learning_rate": 4.0837935540825214e-07, "loss": 0.729, "step": 10765 }, { "epoch": 0.87, "grad_norm": 4.812208167391301, "learning_rate": 4.078588585207477e-07, "loss": 0.7293, "step": 10766 }, { "epoch": 0.87, "grad_norm": 3.316448712526414, "learning_rate": 4.0733867943832607e-07, "loss": 0.5389, "step": 10767 }, { "epoch": 0.87, "grad_norm": 7.863321853301671, "learning_rate": 4.068188181969851e-07, "loss": 0.5217, "step": 10768 }, { "epoch": 0.87, "grad_norm": 3.706238670079514, "learning_rate": 4.0629927483270326e-07, "loss": 0.7675, "step": 10769 }, { "epoch": 0.87, "grad_norm": 5.291048939953106, "learning_rate": 4.0578004938143624e-07, "loss": 0.5705, "step": 10770 }, { "epoch": 0.87, "grad_norm": 22.34300728010316, "learning_rate": 4.0526114187911636e-07, "loss": 0.5999, "step": 10771 }, { "epoch": 0.87, "grad_norm": 2.3222506841879937, "learning_rate": 4.047425523616577e-07, "loss": 0.6545, "step": 10772 }, { "epoch": 0.87, "grad_norm": 3.886792034387991, "learning_rate": 4.0422428086494713e-07, "loss": 0.5307, "step": 10773 }, { "epoch": 0.88, "grad_norm": 7.531583274532244, "learning_rate": 4.037063274248548e-07, "loss": 0.5338, "step": 10774 }, { "epoch": 0.88, "grad_norm": 3.635812634928802, "learning_rate": 4.0318869207722433e-07, "loss": 0.6593, "step": 10775 }, { "epoch": 0.88, "grad_norm": 5.122985585651787, "learning_rate": 4.026713748578809e-07, "loss": 0.7173, "step": 10776 }, { "epoch": 0.88, "grad_norm": 6.834422601280923, "learning_rate": 4.0215437580262584e-07, "loss": 0.6338, "step": 10777 }, { "epoch": 0.88, "grad_norm": 3.237068795115182, "learning_rate": 4.0163769494723836e-07, "loss": 0.8367, "step": 10778 }, { "epoch": 0.88, "grad_norm": 6.514229708552531, "learning_rate": 4.0112133232747596e-07, "loss": 0.6697, "step": 10779 }, { "epoch": 0.88, "grad_norm": 5.851794017685984, "learning_rate": 4.006052879790734e-07, "loss": 0.6796, "step": 10780 }, { "epoch": 0.88, "grad_norm": 8.39969653473002, "learning_rate": 4.0008956193774597e-07, "loss": 0.5769, "step": 10781 }, { "epoch": 0.88, "grad_norm": 3.807743145700366, "learning_rate": 3.995741542391834e-07, "loss": 0.8037, "step": 10782 }, { "epoch": 0.88, "grad_norm": 4.525909173501873, "learning_rate": 3.9905906491905676e-07, "loss": 0.8172, "step": 10783 }, { "epoch": 0.88, "grad_norm": 4.234772635311939, "learning_rate": 3.98544294013013e-07, "loss": 0.7275, "step": 10784 }, { "epoch": 0.88, "grad_norm": 3.9127935712544764, "learning_rate": 3.9802984155667744e-07, "loss": 0.5657, "step": 10785 }, { "epoch": 0.88, "grad_norm": 24.338474949032975, "learning_rate": 3.9751570758565284e-07, "loss": 0.6679, "step": 10786 }, { "epoch": 0.88, "grad_norm": 3.4639828238236094, "learning_rate": 3.970018921355201e-07, "loss": 0.7563, "step": 10787 }, { "epoch": 0.88, "grad_norm": 3.3006041412268003, "learning_rate": 3.964883952418402e-07, "loss": 0.7276, "step": 10788 }, { "epoch": 0.88, "grad_norm": 5.138867709159208, "learning_rate": 3.9597521694014875e-07, "loss": 0.6436, "step": 10789 }, { "epoch": 0.88, "grad_norm": 3.484028622598305, "learning_rate": 3.9546235726596273e-07, "loss": 0.7474, "step": 10790 }, { "epoch": 0.88, "grad_norm": 3.6119127333558207, "learning_rate": 3.949498162547727e-07, "loss": 0.6258, "step": 10791 }, { "epoch": 0.88, "grad_norm": 4.158881464932098, "learning_rate": 3.9443759394205303e-07, "loss": 0.8304, "step": 10792 }, { "epoch": 0.88, "grad_norm": 3.4376506485900324, "learning_rate": 3.9392569036324936e-07, "loss": 0.6527, "step": 10793 }, { "epoch": 0.88, "grad_norm": 4.1278923531363025, "learning_rate": 3.9341410555379103e-07, "loss": 0.6089, "step": 10794 }, { "epoch": 0.88, "grad_norm": 16.98344433356969, "learning_rate": 3.929028395490819e-07, "loss": 0.6806, "step": 10795 }, { "epoch": 0.88, "grad_norm": 4.610832433200752, "learning_rate": 3.923918923845038e-07, "loss": 0.7049, "step": 10796 }, { "epoch": 0.88, "grad_norm": 12.824302302694603, "learning_rate": 3.9188126409542003e-07, "loss": 0.7757, "step": 10797 }, { "epoch": 0.88, "grad_norm": 3.2547259255027003, "learning_rate": 3.9137095471716793e-07, "loss": 0.6869, "step": 10798 }, { "epoch": 0.88, "grad_norm": 6.449743102549887, "learning_rate": 3.908609642850636e-07, "loss": 0.6903, "step": 10799 }, { "epoch": 0.88, "grad_norm": 8.712775558014576, "learning_rate": 3.9035129283440165e-07, "loss": 0.6427, "step": 10800 }, { "epoch": 0.88, "grad_norm": 5.89446468517924, "learning_rate": 3.898419404004555e-07, "loss": 0.6401, "step": 10801 }, { "epoch": 0.88, "grad_norm": 3.6309736738123393, "learning_rate": 3.893329070184754e-07, "loss": 0.7009, "step": 10802 }, { "epoch": 0.88, "grad_norm": 3.052039845785766, "learning_rate": 3.88824192723688e-07, "loss": 0.7364, "step": 10803 }, { "epoch": 0.88, "grad_norm": 3.6550574277065744, "learning_rate": 3.8831579755130243e-07, "loss": 0.7283, "step": 10804 }, { "epoch": 0.88, "grad_norm": 4.480008765502872, "learning_rate": 3.878077215365006e-07, "loss": 0.6016, "step": 10805 }, { "epoch": 0.88, "grad_norm": 3.24592424617487, "learning_rate": 3.872999647144454e-07, "loss": 0.5713, "step": 10806 }, { "epoch": 0.88, "grad_norm": 3.3614592661674436, "learning_rate": 3.867925271202755e-07, "loss": 0.745, "step": 10807 }, { "epoch": 0.88, "grad_norm": 3.61736760124939, "learning_rate": 3.8628540878911105e-07, "loss": 0.7137, "step": 10808 }, { "epoch": 0.88, "grad_norm": 4.091836165510375, "learning_rate": 3.857786097560462e-07, "loss": 0.6086, "step": 10809 }, { "epoch": 0.88, "grad_norm": 4.389002292409124, "learning_rate": 3.852721300561546e-07, "loss": 0.6205, "step": 10810 }, { "epoch": 0.88, "grad_norm": 3.8487328516598094, "learning_rate": 3.8476596972449043e-07, "loss": 0.6733, "step": 10811 }, { "epoch": 0.88, "grad_norm": 22.488528845979367, "learning_rate": 3.84260128796079e-07, "loss": 0.6, "step": 10812 }, { "epoch": 0.88, "grad_norm": 4.12196490116398, "learning_rate": 3.8375460730593005e-07, "loss": 0.6711, "step": 10813 }, { "epoch": 0.88, "grad_norm": 4.105090609792938, "learning_rate": 3.8324940528902845e-07, "loss": 0.6909, "step": 10814 }, { "epoch": 0.88, "grad_norm": 3.1301202383461035, "learning_rate": 3.8274452278033836e-07, "loss": 0.6067, "step": 10815 }, { "epoch": 0.88, "grad_norm": 6.690917712219051, "learning_rate": 3.8223995981479855e-07, "loss": 0.7009, "step": 10816 }, { "epoch": 0.88, "grad_norm": 4.088085968418896, "learning_rate": 3.8173571642733056e-07, "loss": 0.4663, "step": 10817 }, { "epoch": 0.88, "grad_norm": 2.864656508894658, "learning_rate": 3.812317926528297e-07, "loss": 0.57, "step": 10818 }, { "epoch": 0.88, "grad_norm": 4.0731893650097035, "learning_rate": 3.80728188526171e-07, "loss": 0.8613, "step": 10819 }, { "epoch": 0.88, "grad_norm": 15.872279201065492, "learning_rate": 3.8022490408220757e-07, "loss": 0.6205, "step": 10820 }, { "epoch": 0.88, "grad_norm": 6.715321180448168, "learning_rate": 3.797219393557677e-07, "loss": 0.6215, "step": 10821 }, { "epoch": 0.88, "grad_norm": 5.611492008451022, "learning_rate": 3.792192943816625e-07, "loss": 0.8028, "step": 10822 }, { "epoch": 0.88, "grad_norm": 3.672703192369442, "learning_rate": 3.787169691946763e-07, "loss": 0.5803, "step": 10823 }, { "epoch": 0.88, "grad_norm": 4.222360364051522, "learning_rate": 3.78214963829574e-07, "loss": 0.5888, "step": 10824 }, { "epoch": 0.88, "grad_norm": 6.768556937185829, "learning_rate": 3.7771327832109795e-07, "loss": 0.7202, "step": 10825 }, { "epoch": 0.88, "grad_norm": 3.3218425955721544, "learning_rate": 3.772119127039675e-07, "loss": 0.6861, "step": 10826 }, { "epoch": 0.88, "grad_norm": 13.579685371603079, "learning_rate": 3.7671086701287994e-07, "loss": 0.6153, "step": 10827 }, { "epoch": 0.88, "grad_norm": 8.805727296263525, "learning_rate": 3.762101412825098e-07, "loss": 0.4538, "step": 10828 }, { "epoch": 0.88, "grad_norm": 3.4703984014085156, "learning_rate": 3.757097355475131e-07, "loss": 0.6611, "step": 10829 }, { "epoch": 0.88, "grad_norm": 4.523117551334587, "learning_rate": 3.752096498425184e-07, "loss": 0.6932, "step": 10830 }, { "epoch": 0.88, "grad_norm": 6.265916493785175, "learning_rate": 3.7470988420213796e-07, "loss": 0.691, "step": 10831 }, { "epoch": 0.88, "grad_norm": 5.668490577994119, "learning_rate": 3.7421043866095465e-07, "loss": 0.737, "step": 10832 }, { "epoch": 0.88, "grad_norm": 3.284505113694467, "learning_rate": 3.7371131325353695e-07, "loss": 0.7774, "step": 10833 }, { "epoch": 0.88, "grad_norm": 2.9792978064078075, "learning_rate": 3.73212508014425e-07, "loss": 0.6887, "step": 10834 }, { "epoch": 0.88, "grad_norm": 8.134236049816344, "learning_rate": 3.727140229781401e-07, "loss": 0.6874, "step": 10835 }, { "epoch": 0.88, "grad_norm": 7.138657160872964, "learning_rate": 3.722158581791813e-07, "loss": 0.6499, "step": 10836 }, { "epoch": 0.88, "grad_norm": 3.377567140291227, "learning_rate": 3.7171801365202266e-07, "loss": 0.6613, "step": 10837 }, { "epoch": 0.88, "grad_norm": 4.651548329583782, "learning_rate": 3.7122048943112165e-07, "loss": 0.7572, "step": 10838 }, { "epoch": 0.88, "grad_norm": 3.429868901871164, "learning_rate": 3.707232855509063e-07, "loss": 0.7628, "step": 10839 }, { "epoch": 0.88, "grad_norm": 3.03028237683892, "learning_rate": 3.702264020457885e-07, "loss": 0.6038, "step": 10840 }, { "epoch": 0.88, "grad_norm": 4.351056190404571, "learning_rate": 3.6972983895015467e-07, "loss": 0.6581, "step": 10841 }, { "epoch": 0.88, "grad_norm": 18.841013864930726, "learning_rate": 3.6923359629837117e-07, "loss": 0.7694, "step": 10842 }, { "epoch": 0.88, "grad_norm": 5.015133105399184, "learning_rate": 3.687376741247811e-07, "loss": 0.7624, "step": 10843 }, { "epoch": 0.88, "grad_norm": 4.408370547016043, "learning_rate": 3.682420724637031e-07, "loss": 0.6106, "step": 10844 }, { "epoch": 0.88, "grad_norm": 2.5022558791540153, "learning_rate": 3.677467913494398e-07, "loss": 0.6293, "step": 10845 }, { "epoch": 0.88, "grad_norm": 5.776660231366383, "learning_rate": 3.6725183081626424e-07, "loss": 0.7405, "step": 10846 }, { "epoch": 0.88, "grad_norm": 4.746430416783898, "learning_rate": 3.6675719089843245e-07, "loss": 0.6463, "step": 10847 }, { "epoch": 0.88, "grad_norm": 6.316204321422619, "learning_rate": 3.662628716301758e-07, "loss": 0.5675, "step": 10848 }, { "epoch": 0.88, "grad_norm": 4.400179913488069, "learning_rate": 3.657688730457054e-07, "loss": 0.6398, "step": 10849 }, { "epoch": 0.88, "grad_norm": 3.286898495998081, "learning_rate": 3.6527519517920886e-07, "loss": 0.752, "step": 10850 }, { "epoch": 0.88, "grad_norm": 3.0158511600315054, "learning_rate": 3.64781838064851e-07, "loss": 0.4571, "step": 10851 }, { "epoch": 0.88, "grad_norm": 3.6313709695818357, "learning_rate": 3.642888017367763e-07, "loss": 0.64, "step": 10852 }, { "epoch": 0.88, "grad_norm": 4.846280850082306, "learning_rate": 3.63796086229104e-07, "loss": 0.7249, "step": 10853 }, { "epoch": 0.88, "grad_norm": 4.08817591106842, "learning_rate": 3.633036915759358e-07, "loss": 0.6381, "step": 10854 }, { "epoch": 0.88, "grad_norm": 7.201152463679357, "learning_rate": 3.628116178113461e-07, "loss": 0.8171, "step": 10855 }, { "epoch": 0.88, "grad_norm": 4.606024947970406, "learning_rate": 3.6231986496939153e-07, "loss": 0.5831, "step": 10856 }, { "epoch": 0.88, "grad_norm": 7.281297639751838, "learning_rate": 3.618284330841032e-07, "loss": 0.7388, "step": 10857 }, { "epoch": 0.88, "grad_norm": 3.6732143611130845, "learning_rate": 3.6133732218949223e-07, "loss": 0.6119, "step": 10858 }, { "epoch": 0.88, "grad_norm": 4.408555356895679, "learning_rate": 3.608465323195454e-07, "loss": 0.6128, "step": 10859 }, { "epoch": 0.88, "grad_norm": 9.460299777518019, "learning_rate": 3.603560635082287e-07, "loss": 0.6298, "step": 10860 }, { "epoch": 0.88, "grad_norm": 5.5701445925225785, "learning_rate": 3.598659157894868e-07, "loss": 0.7678, "step": 10861 }, { "epoch": 0.88, "grad_norm": 5.58053137794694, "learning_rate": 3.593760891972392e-07, "loss": 0.6718, "step": 10862 }, { "epoch": 0.88, "grad_norm": 6.558371047788067, "learning_rate": 3.5888658376538654e-07, "loss": 0.58, "step": 10863 }, { "epoch": 0.88, "grad_norm": 3.494515364394613, "learning_rate": 3.583973995278056e-07, "loss": 0.7092, "step": 10864 }, { "epoch": 0.88, "grad_norm": 2.7476416955410006, "learning_rate": 3.5790853651835043e-07, "loss": 0.574, "step": 10865 }, { "epoch": 0.88, "grad_norm": 2.8351782462483706, "learning_rate": 3.574199947708529e-07, "loss": 0.6011, "step": 10866 }, { "epoch": 0.88, "grad_norm": 5.00961626519902, "learning_rate": 3.5693177431912473e-07, "loss": 0.781, "step": 10867 }, { "epoch": 0.88, "grad_norm": 3.9902535354987, "learning_rate": 3.564438751969523e-07, "loss": 0.5352, "step": 10868 }, { "epoch": 0.88, "grad_norm": 4.832401005192465, "learning_rate": 3.55956297438102e-07, "loss": 0.6396, "step": 10869 }, { "epoch": 0.88, "grad_norm": 6.607730565230629, "learning_rate": 3.554690410763173e-07, "loss": 0.6863, "step": 10870 }, { "epoch": 0.88, "grad_norm": 6.44160700052036, "learning_rate": 3.5498210614532013e-07, "loss": 0.7407, "step": 10871 }, { "epoch": 0.88, "grad_norm": 3.3963547794851108, "learning_rate": 3.5449549267880803e-07, "loss": 0.6021, "step": 10872 }, { "epoch": 0.88, "grad_norm": 3.433347639161826, "learning_rate": 3.5400920071045787e-07, "loss": 0.721, "step": 10873 }, { "epoch": 0.88, "grad_norm": 6.9244875742490155, "learning_rate": 3.5352323027392497e-07, "loss": 0.7314, "step": 10874 }, { "epoch": 0.88, "grad_norm": 5.165494561102616, "learning_rate": 3.530375814028414e-07, "loss": 0.7358, "step": 10875 }, { "epoch": 0.88, "grad_norm": 14.7942220002371, "learning_rate": 3.525522541308163e-07, "loss": 0.6501, "step": 10876 }, { "epoch": 0.88, "grad_norm": 4.20282943510205, "learning_rate": 3.520672484914384e-07, "loss": 0.7039, "step": 10877 }, { "epoch": 0.88, "grad_norm": 3.123767783444509, "learning_rate": 3.51582564518273e-07, "loss": 0.6741, "step": 10878 }, { "epoch": 0.88, "grad_norm": 4.173298116133693, "learning_rate": 3.510982022448628e-07, "loss": 0.7465, "step": 10879 }, { "epoch": 0.88, "grad_norm": 4.388081029425778, "learning_rate": 3.506141617047282e-07, "loss": 0.8184, "step": 10880 }, { "epoch": 0.88, "grad_norm": 3.9527444792979405, "learning_rate": 3.5013044293136957e-07, "loss": 0.6049, "step": 10881 }, { "epoch": 0.88, "grad_norm": 8.214400547552188, "learning_rate": 3.496470459582624e-07, "loss": 0.721, "step": 10882 }, { "epoch": 0.88, "grad_norm": 3.019469287232392, "learning_rate": 3.4916397081885935e-07, "loss": 0.6151, "step": 10883 }, { "epoch": 0.88, "grad_norm": 2.7579013561345302, "learning_rate": 3.4868121754659533e-07, "loss": 0.6887, "step": 10884 }, { "epoch": 0.88, "grad_norm": 5.3501040291615505, "learning_rate": 3.48198786174877e-07, "loss": 0.7722, "step": 10885 }, { "epoch": 0.88, "grad_norm": 3.1480402326081856, "learning_rate": 3.477166767370932e-07, "loss": 0.6364, "step": 10886 }, { "epoch": 0.88, "grad_norm": 6.992128079977042, "learning_rate": 3.4723488926660777e-07, "loss": 0.5978, "step": 10887 }, { "epoch": 0.88, "grad_norm": 3.1966450633847643, "learning_rate": 3.467534237967651e-07, "loss": 0.6207, "step": 10888 }, { "epoch": 0.88, "grad_norm": 4.275108895684727, "learning_rate": 3.462722803608848e-07, "loss": 0.7046, "step": 10889 }, { "epoch": 0.88, "grad_norm": 2.5054385421132968, "learning_rate": 3.457914589922645e-07, "loss": 0.641, "step": 10890 }, { "epoch": 0.88, "grad_norm": 4.428693232769049, "learning_rate": 3.4531095972418103e-07, "loss": 0.6462, "step": 10891 }, { "epoch": 0.88, "grad_norm": 2.5645204253873395, "learning_rate": 3.448307825898872e-07, "loss": 0.6611, "step": 10892 }, { "epoch": 0.88, "grad_norm": 3.477850476916759, "learning_rate": 3.443509276226148e-07, "loss": 0.7433, "step": 10893 }, { "epoch": 0.88, "grad_norm": 9.552941877146717, "learning_rate": 3.438713948555722e-07, "loss": 0.6224, "step": 10894 }, { "epoch": 0.88, "grad_norm": 4.62616435209956, "learning_rate": 3.433921843219468e-07, "loss": 0.6496, "step": 10895 }, { "epoch": 0.88, "grad_norm": 3.6714003301140625, "learning_rate": 3.4291329605490196e-07, "loss": 0.723, "step": 10896 }, { "epoch": 0.89, "grad_norm": 5.767331545991184, "learning_rate": 3.4243473008758134e-07, "loss": 0.7232, "step": 10897 }, { "epoch": 0.89, "grad_norm": 3.304808188817758, "learning_rate": 3.4195648645310443e-07, "loss": 0.7345, "step": 10898 }, { "epoch": 0.89, "grad_norm": 5.1712891953343645, "learning_rate": 3.4147856518456757e-07, "loss": 0.7991, "step": 10899 }, { "epoch": 0.89, "grad_norm": 3.4617159556750137, "learning_rate": 3.4100096631504597e-07, "loss": 0.6601, "step": 10900 }, { "epoch": 0.89, "grad_norm": 4.555856076024046, "learning_rate": 3.4052368987759323e-07, "loss": 0.7221, "step": 10901 }, { "epoch": 0.89, "grad_norm": 14.385081937100026, "learning_rate": 3.400467359052395e-07, "loss": 0.5517, "step": 10902 }, { "epoch": 0.89, "grad_norm": 21.449272974765854, "learning_rate": 3.3957010443099294e-07, "loss": 0.6947, "step": 10903 }, { "epoch": 0.89, "grad_norm": 3.2195911324029507, "learning_rate": 3.3909379548784095e-07, "loss": 0.5112, "step": 10904 }, { "epoch": 0.89, "grad_norm": 5.057611814677612, "learning_rate": 3.386178091087444e-07, "loss": 0.6487, "step": 10905 }, { "epoch": 0.89, "grad_norm": 3.0765013536103134, "learning_rate": 3.381421453266465e-07, "loss": 0.57, "step": 10906 }, { "epoch": 0.89, "grad_norm": 5.559972952182055, "learning_rate": 3.3766680417446574e-07, "loss": 0.7629, "step": 10907 }, { "epoch": 0.89, "grad_norm": 4.8210301378962, "learning_rate": 3.371917856850981e-07, "loss": 0.6382, "step": 10908 }, { "epoch": 0.89, "grad_norm": 5.398364042425473, "learning_rate": 3.3671708989141905e-07, "loss": 0.648, "step": 10909 }, { "epoch": 0.89, "grad_norm": 3.32764146430838, "learning_rate": 3.3624271682627884e-07, "loss": 0.6049, "step": 10910 }, { "epoch": 0.89, "grad_norm": 8.365182820873128, "learning_rate": 3.357686665225096e-07, "loss": 0.576, "step": 10911 }, { "epoch": 0.89, "grad_norm": 4.28327169175371, "learning_rate": 3.3529493901291567e-07, "loss": 0.6841, "step": 10912 }, { "epoch": 0.89, "grad_norm": 6.183675003210733, "learning_rate": 3.3482153433028407e-07, "loss": 0.7028, "step": 10913 }, { "epoch": 0.89, "grad_norm": 9.483625523776135, "learning_rate": 3.3434845250737593e-07, "loss": 0.6664, "step": 10914 }, { "epoch": 0.89, "grad_norm": 29.371961055661107, "learning_rate": 3.3387569357693274e-07, "loss": 0.6688, "step": 10915 }, { "epoch": 0.89, "grad_norm": 3.50031587473056, "learning_rate": 3.3340325757167224e-07, "loss": 0.5737, "step": 10916 }, { "epoch": 0.89, "grad_norm": 4.071057120881112, "learning_rate": 3.3293114452428944e-07, "loss": 0.4962, "step": 10917 }, { "epoch": 0.89, "grad_norm": 3.14887072082782, "learning_rate": 3.3245935446745815e-07, "loss": 0.5862, "step": 10918 }, { "epoch": 0.89, "grad_norm": 6.829552335957282, "learning_rate": 3.3198788743382784e-07, "loss": 0.7145, "step": 10919 }, { "epoch": 0.89, "grad_norm": 5.971437757259194, "learning_rate": 3.3151674345602844e-07, "loss": 0.643, "step": 10920 }, { "epoch": 0.89, "grad_norm": 9.7659533856254, "learning_rate": 3.310459225666651e-07, "loss": 0.7354, "step": 10921 }, { "epoch": 0.89, "grad_norm": 4.039331298549966, "learning_rate": 3.3057542479832285e-07, "loss": 0.6404, "step": 10922 }, { "epoch": 0.89, "grad_norm": 2.524898602175444, "learning_rate": 3.301052501835622e-07, "loss": 0.6528, "step": 10923 }, { "epoch": 0.89, "grad_norm": 3.2135190610748077, "learning_rate": 3.296353987549222e-07, "loss": 0.5357, "step": 10924 }, { "epoch": 0.89, "grad_norm": 4.241760176126978, "learning_rate": 3.2916587054491967e-07, "loss": 0.6785, "step": 10925 }, { "epoch": 0.89, "grad_norm": 7.035880113520567, "learning_rate": 3.286966655860485e-07, "loss": 0.7147, "step": 10926 }, { "epoch": 0.89, "grad_norm": 3.4895870297386273, "learning_rate": 3.282277839107817e-07, "loss": 0.6999, "step": 10927 }, { "epoch": 0.89, "grad_norm": 9.452803755456355, "learning_rate": 3.277592255515671e-07, "loss": 0.5655, "step": 10928 }, { "epoch": 0.89, "grad_norm": 3.7550536108174133, "learning_rate": 3.2729099054083393e-07, "loss": 0.6218, "step": 10929 }, { "epoch": 0.89, "grad_norm": 3.409548020345975, "learning_rate": 3.2682307891098606e-07, "loss": 0.6936, "step": 10930 }, { "epoch": 0.89, "grad_norm": 3.845889888304671, "learning_rate": 3.263554906944055e-07, "loss": 0.7564, "step": 10931 }, { "epoch": 0.89, "grad_norm": 3.650318890663278, "learning_rate": 3.2588822592345304e-07, "loss": 0.7519, "step": 10932 }, { "epoch": 0.89, "grad_norm": 4.163174538561021, "learning_rate": 3.2542128463046495e-07, "loss": 0.4984, "step": 10933 }, { "epoch": 0.89, "grad_norm": 6.3085841970843815, "learning_rate": 3.249546668477588e-07, "loss": 0.629, "step": 10934 }, { "epoch": 0.89, "grad_norm": 3.9483163598080626, "learning_rate": 3.244883726076253e-07, "loss": 0.6657, "step": 10935 }, { "epoch": 0.89, "grad_norm": 4.006543373567025, "learning_rate": 3.240224019423366e-07, "loss": 0.7121, "step": 10936 }, { "epoch": 0.89, "grad_norm": 3.4621533129599507, "learning_rate": 3.235567548841401e-07, "loss": 0.7952, "step": 10937 }, { "epoch": 0.89, "grad_norm": 3.1730685169617985, "learning_rate": 3.2309143146526114e-07, "loss": 0.726, "step": 10938 }, { "epoch": 0.89, "grad_norm": 2.912219007765076, "learning_rate": 3.226264317179029e-07, "loss": 0.7009, "step": 10939 }, { "epoch": 0.89, "grad_norm": 3.8435243721097962, "learning_rate": 3.2216175567424737e-07, "loss": 0.5869, "step": 10940 }, { "epoch": 0.89, "grad_norm": 7.084705475421821, "learning_rate": 3.2169740336645274e-07, "loss": 0.6604, "step": 10941 }, { "epoch": 0.89, "grad_norm": 4.607484392596619, "learning_rate": 3.2123337482665385e-07, "loss": 0.5276, "step": 10942 }, { "epoch": 0.89, "grad_norm": 6.1201509016279205, "learning_rate": 3.2076967008696614e-07, "loss": 0.7097, "step": 10943 }, { "epoch": 0.89, "grad_norm": 4.117041536382245, "learning_rate": 3.2030628917948006e-07, "loss": 0.6372, "step": 10944 }, { "epoch": 0.89, "grad_norm": 3.364394496305914, "learning_rate": 3.198432321362643e-07, "loss": 0.5792, "step": 10945 }, { "epoch": 0.89, "grad_norm": 13.844525175317687, "learning_rate": 3.193804989893656e-07, "loss": 0.6414, "step": 10946 }, { "epoch": 0.89, "grad_norm": 5.01081783945343, "learning_rate": 3.189180897708083e-07, "loss": 0.5943, "step": 10947 }, { "epoch": 0.89, "grad_norm": 5.371180046602053, "learning_rate": 3.184560045125934e-07, "loss": 0.6251, "step": 10948 }, { "epoch": 0.89, "grad_norm": 9.389195349538713, "learning_rate": 3.1799424324670035e-07, "loss": 0.6732, "step": 10949 }, { "epoch": 0.89, "grad_norm": 5.819654660523757, "learning_rate": 3.175328060050864e-07, "loss": 0.5288, "step": 10950 }, { "epoch": 0.89, "grad_norm": 3.8829556961393905, "learning_rate": 3.170716928196854e-07, "loss": 0.7548, "step": 10951 }, { "epoch": 0.89, "grad_norm": 4.684812362635046, "learning_rate": 3.1661090372240965e-07, "loss": 0.6741, "step": 10952 }, { "epoch": 0.89, "grad_norm": 3.7338990775230925, "learning_rate": 3.161504387451475e-07, "loss": 0.6004, "step": 10953 }, { "epoch": 0.89, "grad_norm": 6.849774123859998, "learning_rate": 3.156902979197679e-07, "loss": 0.6495, "step": 10954 }, { "epoch": 0.89, "grad_norm": 9.581000665819154, "learning_rate": 3.1523048127811426e-07, "loss": 0.5655, "step": 10955 }, { "epoch": 0.89, "grad_norm": 6.887475936059228, "learning_rate": 3.147709888520084e-07, "loss": 0.6411, "step": 10956 }, { "epoch": 0.89, "grad_norm": 10.829624710704543, "learning_rate": 3.1431182067325207e-07, "loss": 0.6369, "step": 10957 }, { "epoch": 0.89, "grad_norm": 3.878753810976778, "learning_rate": 3.1385297677362035e-07, "loss": 0.7126, "step": 10958 }, { "epoch": 0.89, "grad_norm": 2.780466212127189, "learning_rate": 3.133944571848696e-07, "loss": 0.6138, "step": 10959 }, { "epoch": 0.89, "grad_norm": 2.3058794313638065, "learning_rate": 3.129362619387305e-07, "loss": 0.7082, "step": 10960 }, { "epoch": 0.89, "grad_norm": 5.446517143292522, "learning_rate": 3.124783910669155e-07, "loss": 0.7126, "step": 10961 }, { "epoch": 0.89, "grad_norm": 3.3075086431795166, "learning_rate": 3.120208446011108e-07, "loss": 0.8188, "step": 10962 }, { "epoch": 0.89, "grad_norm": 5.549107875505797, "learning_rate": 3.1156362257298065e-07, "loss": 0.6251, "step": 10963 }, { "epoch": 0.89, "grad_norm": 4.620396362887429, "learning_rate": 3.111067250141697e-07, "loss": 0.5695, "step": 10964 }, { "epoch": 0.89, "grad_norm": 2.9912729095760766, "learning_rate": 3.106501519562971e-07, "loss": 0.6634, "step": 10965 }, { "epoch": 0.89, "grad_norm": 25.75748257553001, "learning_rate": 3.1019390343096033e-07, "loss": 0.7508, "step": 10966 }, { "epoch": 0.89, "grad_norm": 16.94716932791019, "learning_rate": 3.097379794697342e-07, "loss": 0.5963, "step": 10967 }, { "epoch": 0.89, "grad_norm": 2.7729427253941332, "learning_rate": 3.0928238010417275e-07, "loss": 0.6435, "step": 10968 }, { "epoch": 0.89, "grad_norm": 7.919912434676973, "learning_rate": 3.088271053658054e-07, "loss": 0.6582, "step": 10969 }, { "epoch": 0.89, "grad_norm": 5.811574226645277, "learning_rate": 3.0837215528614127e-07, "loss": 0.6864, "step": 10970 }, { "epoch": 0.89, "grad_norm": 21.720939131055648, "learning_rate": 3.079175298966647e-07, "loss": 0.6347, "step": 10971 }, { "epoch": 0.89, "grad_norm": 6.784288646832833, "learning_rate": 3.0746322922883933e-07, "loss": 0.665, "step": 10972 }, { "epoch": 0.89, "grad_norm": 3.764678459861052, "learning_rate": 3.0700925331410447e-07, "loss": 0.6391, "step": 10973 }, { "epoch": 0.89, "grad_norm": 3.5031464046138905, "learning_rate": 3.0655560218387835e-07, "loss": 0.6944, "step": 10974 }, { "epoch": 0.89, "grad_norm": 3.535400395103272, "learning_rate": 3.0610227586955753e-07, "loss": 0.7876, "step": 10975 }, { "epoch": 0.89, "grad_norm": 3.633990322605817, "learning_rate": 3.0564927440251355e-07, "loss": 0.7006, "step": 10976 }, { "epoch": 0.89, "grad_norm": 4.360386217156877, "learning_rate": 3.051965978140997e-07, "loss": 0.5755, "step": 10977 }, { "epoch": 0.89, "grad_norm": 2.3788548101949814, "learning_rate": 3.047442461356409e-07, "loss": 0.7298, "step": 10978 }, { "epoch": 0.89, "grad_norm": 7.423088251507555, "learning_rate": 3.0429221939844433e-07, "loss": 0.7105, "step": 10979 }, { "epoch": 0.89, "grad_norm": 3.1681020190668243, "learning_rate": 3.0384051763379327e-07, "loss": 0.8693, "step": 10980 }, { "epoch": 0.89, "grad_norm": 3.1250691843570118, "learning_rate": 3.0338914087294667e-07, "loss": 0.5456, "step": 10981 }, { "epoch": 0.89, "grad_norm": 2.2767691600742492, "learning_rate": 3.029380891471445e-07, "loss": 0.6396, "step": 10982 }, { "epoch": 0.89, "grad_norm": 3.2631299435276335, "learning_rate": 3.0248736248760126e-07, "loss": 0.7114, "step": 10983 }, { "epoch": 0.89, "grad_norm": 5.383009363404805, "learning_rate": 3.0203696092551193e-07, "loss": 0.6221, "step": 10984 }, { "epoch": 0.89, "grad_norm": 4.66888702240897, "learning_rate": 3.015868844920444e-07, "loss": 0.6413, "step": 10985 }, { "epoch": 0.89, "grad_norm": 3.327183694571496, "learning_rate": 3.011371332183488e-07, "loss": 0.6769, "step": 10986 }, { "epoch": 0.89, "grad_norm": 7.734963464205543, "learning_rate": 3.0068770713554965e-07, "loss": 0.7668, "step": 10987 }, { "epoch": 0.89, "grad_norm": 11.050728398062168, "learning_rate": 3.002386062747503e-07, "loss": 0.6344, "step": 10988 }, { "epoch": 0.89, "grad_norm": 4.536299399405929, "learning_rate": 2.997898306670322e-07, "loss": 0.5377, "step": 10989 }, { "epoch": 0.89, "grad_norm": 4.067352075678169, "learning_rate": 2.993413803434525e-07, "loss": 0.6885, "step": 10990 }, { "epoch": 0.89, "grad_norm": 7.296190236092171, "learning_rate": 2.988932553350471e-07, "loss": 0.646, "step": 10991 }, { "epoch": 0.89, "grad_norm": 3.126252517031877, "learning_rate": 2.9844545567282835e-07, "loss": 0.7359, "step": 10992 }, { "epoch": 0.89, "grad_norm": 3.647144943073153, "learning_rate": 2.979979813877881e-07, "loss": 0.7087, "step": 10993 }, { "epoch": 0.89, "grad_norm": 7.786361848617197, "learning_rate": 2.9755083251089334e-07, "loss": 0.6814, "step": 10994 }, { "epoch": 0.89, "grad_norm": 5.947825742287276, "learning_rate": 2.971040090730909e-07, "loss": 0.6607, "step": 10995 }, { "epoch": 0.89, "grad_norm": 2.6824023817478513, "learning_rate": 2.966575111053027e-07, "loss": 0.6799, "step": 10996 }, { "epoch": 0.89, "grad_norm": 7.290364883255014, "learning_rate": 2.9621133863842913e-07, "loss": 0.7455, "step": 10997 }, { "epoch": 0.89, "grad_norm": 4.814323049416905, "learning_rate": 2.957654917033487e-07, "loss": 0.5305, "step": 10998 }, { "epoch": 0.89, "grad_norm": 11.724736310381989, "learning_rate": 2.953199703309162e-07, "loss": 0.6842, "step": 10999 }, { "epoch": 0.89, "grad_norm": 5.85119626303855, "learning_rate": 2.948747745519648e-07, "loss": 0.5625, "step": 11000 }, { "epoch": 0.89, "grad_norm": 4.289126343156414, "learning_rate": 2.9442990439730477e-07, "loss": 0.7346, "step": 11001 }, { "epoch": 0.89, "grad_norm": 6.147511325838741, "learning_rate": 2.939853598977249e-07, "loss": 0.8817, "step": 11002 }, { "epoch": 0.89, "grad_norm": 4.074792092739971, "learning_rate": 2.935411410839889e-07, "loss": 0.5382, "step": 11003 }, { "epoch": 0.89, "grad_norm": 5.62727757776066, "learning_rate": 2.9309724798684105e-07, "loss": 0.7896, "step": 11004 }, { "epoch": 0.89, "grad_norm": 3.6789632017200478, "learning_rate": 2.926536806370006e-07, "loss": 0.5883, "step": 11005 }, { "epoch": 0.89, "grad_norm": 2.8308157892618664, "learning_rate": 2.922104390651642e-07, "loss": 0.7303, "step": 11006 }, { "epoch": 0.89, "grad_norm": 3.1674429898774004, "learning_rate": 2.9176752330200895e-07, "loss": 0.6527, "step": 11007 }, { "epoch": 0.89, "grad_norm": 3.2703844883961812, "learning_rate": 2.9132493337818644e-07, "loss": 0.7117, "step": 11008 }, { "epoch": 0.89, "grad_norm": 5.449218084393396, "learning_rate": 2.908826693243266e-07, "loss": 0.6392, "step": 11009 }, { "epoch": 0.89, "grad_norm": 10.042307722425726, "learning_rate": 2.9044073117103777e-07, "loss": 0.5633, "step": 11010 }, { "epoch": 0.89, "grad_norm": 3.7124749921247826, "learning_rate": 2.8999911894890434e-07, "loss": 0.6788, "step": 11011 }, { "epoch": 0.89, "grad_norm": 3.6686671637432933, "learning_rate": 2.895578326884879e-07, "loss": 0.7725, "step": 11012 }, { "epoch": 0.89, "grad_norm": 12.499661304534454, "learning_rate": 2.891168724203286e-07, "loss": 0.5301, "step": 11013 }, { "epoch": 0.89, "grad_norm": 4.923511311841258, "learning_rate": 2.8867623817494415e-07, "loss": 0.7701, "step": 11014 }, { "epoch": 0.89, "grad_norm": 4.282808010497953, "learning_rate": 2.882359299828286e-07, "loss": 0.4871, "step": 11015 }, { "epoch": 0.89, "grad_norm": 5.279265734248698, "learning_rate": 2.877959478744546e-07, "loss": 0.6954, "step": 11016 }, { "epoch": 0.89, "grad_norm": 3.992050200322682, "learning_rate": 2.8735629188027247e-07, "loss": 0.745, "step": 11017 }, { "epoch": 0.89, "grad_norm": 10.735040539927928, "learning_rate": 2.869169620307072e-07, "loss": 0.6784, "step": 11018 }, { "epoch": 0.89, "grad_norm": 2.8885986017060663, "learning_rate": 2.8647795835616387e-07, "loss": 0.7013, "step": 11019 }, { "epoch": 0.9, "grad_norm": 4.931027669111495, "learning_rate": 2.8603928088702547e-07, "loss": 0.5912, "step": 11020 }, { "epoch": 0.9, "grad_norm": 4.738511406984342, "learning_rate": 2.856009296536505e-07, "loss": 0.5725, "step": 11021 }, { "epoch": 0.9, "grad_norm": 2.9900060693603896, "learning_rate": 2.8516290468637467e-07, "loss": 0.6641, "step": 11022 }, { "epoch": 0.9, "grad_norm": 3.3331611004904107, "learning_rate": 2.847252060155131e-07, "loss": 0.5742, "step": 11023 }, { "epoch": 0.9, "grad_norm": 3.753065822721993, "learning_rate": 2.842878336713578e-07, "loss": 0.8139, "step": 11024 }, { "epoch": 0.9, "grad_norm": 3.0790590520907166, "learning_rate": 2.838507876841767e-07, "loss": 0.658, "step": 11025 }, { "epoch": 0.9, "grad_norm": 4.094361228382077, "learning_rate": 2.834140680842157e-07, "loss": 0.6788, "step": 11026 }, { "epoch": 0.9, "grad_norm": 6.1670024060023465, "learning_rate": 2.829776749016999e-07, "loss": 0.7216, "step": 11027 }, { "epoch": 0.9, "grad_norm": 3.8104025176980767, "learning_rate": 2.8254160816682975e-07, "loss": 0.7401, "step": 11028 }, { "epoch": 0.9, "grad_norm": 4.505179201853745, "learning_rate": 2.8210586790978323e-07, "loss": 0.6331, "step": 11029 }, { "epoch": 0.9, "grad_norm": 5.759404068733508, "learning_rate": 2.81670454160719e-07, "loss": 0.7486, "step": 11030 }, { "epoch": 0.9, "grad_norm": 5.196715188777045, "learning_rate": 2.8123536694976636e-07, "loss": 0.5044, "step": 11031 }, { "epoch": 0.9, "grad_norm": 4.06660649156897, "learning_rate": 2.8080060630703896e-07, "loss": 0.6347, "step": 11032 }, { "epoch": 0.9, "grad_norm": 7.477711581152729, "learning_rate": 2.803661722626233e-07, "loss": 0.6174, "step": 11033 }, { "epoch": 0.9, "grad_norm": 4.101523609885264, "learning_rate": 2.799320648465864e-07, "loss": 0.8228, "step": 11034 }, { "epoch": 0.9, "grad_norm": 3.625755814722213, "learning_rate": 2.7949828408897097e-07, "loss": 0.7009, "step": 11035 }, { "epoch": 0.9, "grad_norm": 4.843786626435055, "learning_rate": 2.7906483001979623e-07, "loss": 0.7467, "step": 11036 }, { "epoch": 0.9, "grad_norm": 4.05239168898531, "learning_rate": 2.7863170266906215e-07, "loss": 0.6315, "step": 11037 }, { "epoch": 0.9, "grad_norm": 2.2900184133245327, "learning_rate": 2.7819890206674083e-07, "loss": 0.511, "step": 11038 }, { "epoch": 0.9, "grad_norm": 2.3478426629191254, "learning_rate": 2.777664282427872e-07, "loss": 0.6157, "step": 11039 }, { "epoch": 0.9, "grad_norm": 4.732189669581965, "learning_rate": 2.773342812271301e-07, "loss": 0.7367, "step": 11040 }, { "epoch": 0.9, "grad_norm": 6.484904005981082, "learning_rate": 2.7690246104967735e-07, "loss": 0.6805, "step": 11041 }, { "epoch": 0.9, "grad_norm": 3.179213619844034, "learning_rate": 2.7647096774031267e-07, "loss": 0.6705, "step": 11042 }, { "epoch": 0.9, "grad_norm": 5.33654241393421, "learning_rate": 2.760398013289001e-07, "loss": 0.6427, "step": 11043 }, { "epoch": 0.9, "grad_norm": 5.840324108276913, "learning_rate": 2.7560896184527674e-07, "loss": 0.6388, "step": 11044 }, { "epoch": 0.9, "grad_norm": 3.9816053213673377, "learning_rate": 2.7517844931926106e-07, "loss": 0.8055, "step": 11045 }, { "epoch": 0.9, "grad_norm": 4.262387984297056, "learning_rate": 2.7474826378064647e-07, "loss": 0.7576, "step": 11046 }, { "epoch": 0.9, "grad_norm": 9.057910352882542, "learning_rate": 2.7431840525920407e-07, "loss": 0.7114, "step": 11047 }, { "epoch": 0.9, "grad_norm": 5.172748901330774, "learning_rate": 2.73888873784684e-07, "loss": 0.778, "step": 11048 }, { "epoch": 0.9, "grad_norm": 4.8044785611634655, "learning_rate": 2.7345966938681134e-07, "loss": 0.5451, "step": 11049 }, { "epoch": 0.9, "grad_norm": 2.6873342534314486, "learning_rate": 2.730307920952913e-07, "loss": 0.6718, "step": 11050 }, { "epoch": 0.9, "grad_norm": 4.4728732922107275, "learning_rate": 2.7260224193980335e-07, "loss": 0.6887, "step": 11051 }, { "epoch": 0.9, "grad_norm": 2.5400663468894873, "learning_rate": 2.7217401895000664e-07, "loss": 0.7207, "step": 11052 }, { "epoch": 0.9, "grad_norm": 30.80619272167525, "learning_rate": 2.7174612315553627e-07, "loss": 0.6757, "step": 11053 }, { "epoch": 0.9, "grad_norm": 4.078369844990921, "learning_rate": 2.713185545860053e-07, "loss": 0.7554, "step": 11054 }, { "epoch": 0.9, "grad_norm": 7.3351131093809085, "learning_rate": 2.708913132710056e-07, "loss": 0.5357, "step": 11055 }, { "epoch": 0.9, "grad_norm": 4.937600040195671, "learning_rate": 2.7046439924010295e-07, "loss": 0.5559, "step": 11056 }, { "epoch": 0.9, "grad_norm": 3.947098860009083, "learning_rate": 2.7003781252284533e-07, "loss": 0.6379, "step": 11057 }, { "epoch": 0.9, "grad_norm": 8.937500308756222, "learning_rate": 2.6961155314875144e-07, "loss": 0.5549, "step": 11058 }, { "epoch": 0.9, "grad_norm": 4.740888046462372, "learning_rate": 2.6918562114732374e-07, "loss": 0.7494, "step": 11059 }, { "epoch": 0.9, "grad_norm": 2.7691718979419084, "learning_rate": 2.687600165480392e-07, "loss": 0.6387, "step": 11060 }, { "epoch": 0.9, "grad_norm": 4.193479970646864, "learning_rate": 2.6833473938035094e-07, "loss": 0.6808, "step": 11061 }, { "epoch": 0.9, "grad_norm": 5.989482028595132, "learning_rate": 2.679097896736921e-07, "loss": 0.5675, "step": 11062 }, { "epoch": 0.9, "grad_norm": 3.038569809133305, "learning_rate": 2.6748516745747187e-07, "loss": 0.5682, "step": 11063 }, { "epoch": 0.9, "grad_norm": 4.210461827183721, "learning_rate": 2.670608727610763e-07, "loss": 0.6391, "step": 11064 }, { "epoch": 0.9, "grad_norm": 3.858660906488997, "learning_rate": 2.6663690561386903e-07, "loss": 0.7706, "step": 11065 }, { "epoch": 0.9, "grad_norm": 3.18010352232131, "learning_rate": 2.6621326604519216e-07, "loss": 0.6139, "step": 11066 }, { "epoch": 0.9, "grad_norm": 4.873795037276898, "learning_rate": 2.6578995408436283e-07, "loss": 0.6138, "step": 11067 }, { "epoch": 0.9, "grad_norm": 2.7048552037389864, "learning_rate": 2.653669697606781e-07, "loss": 0.7192, "step": 11068 }, { "epoch": 0.9, "grad_norm": 5.944068909352404, "learning_rate": 2.649443131034113e-07, "loss": 0.5785, "step": 11069 }, { "epoch": 0.9, "grad_norm": 5.275556438146503, "learning_rate": 2.645219841418123e-07, "loss": 0.5607, "step": 11070 }, { "epoch": 0.9, "grad_norm": 2.8747941865006155, "learning_rate": 2.6409998290510884e-07, "loss": 0.7647, "step": 11071 }, { "epoch": 0.9, "grad_norm": 4.900992328853086, "learning_rate": 2.6367830942250596e-07, "loss": 0.5738, "step": 11072 }, { "epoch": 0.9, "grad_norm": 6.8776815581718935, "learning_rate": 2.6325696372318687e-07, "loss": 0.7264, "step": 11073 }, { "epoch": 0.9, "grad_norm": 6.526611136912102, "learning_rate": 2.6283594583631e-07, "loss": 0.7287, "step": 11074 }, { "epoch": 0.9, "grad_norm": 3.772144936698093, "learning_rate": 2.6241525579101425e-07, "loss": 0.6975, "step": 11075 }, { "epoch": 0.9, "grad_norm": 3.9206089330944818, "learning_rate": 2.61994893616413e-07, "loss": 0.5787, "step": 11076 }, { "epoch": 0.9, "grad_norm": 3.7637230687007723, "learning_rate": 2.615748593415979e-07, "loss": 0.6346, "step": 11077 }, { "epoch": 0.9, "grad_norm": 4.426360126121607, "learning_rate": 2.6115515299563856e-07, "loss": 0.4801, "step": 11078 }, { "epoch": 0.9, "grad_norm": 5.7396289326706365, "learning_rate": 2.6073577460758003e-07, "loss": 0.6079, "step": 11079 }, { "epoch": 0.9, "grad_norm": 3.394920379151465, "learning_rate": 2.6031672420644694e-07, "loss": 0.6492, "step": 11080 }, { "epoch": 0.9, "grad_norm": 3.1043298234267467, "learning_rate": 2.5989800182123994e-07, "loss": 0.711, "step": 11081 }, { "epoch": 0.9, "grad_norm": 7.656738845633479, "learning_rate": 2.5947960748093805e-07, "loss": 0.5637, "step": 11082 }, { "epoch": 0.9, "grad_norm": 14.313786296976895, "learning_rate": 2.5906154121449587e-07, "loss": 0.7494, "step": 11083 }, { "epoch": 0.9, "grad_norm": 4.095595547293801, "learning_rate": 2.5864380305084646e-07, "loss": 0.6043, "step": 11084 }, { "epoch": 0.9, "grad_norm": 3.2503757638554864, "learning_rate": 2.5822639301889995e-07, "loss": 0.5951, "step": 11085 }, { "epoch": 0.9, "grad_norm": 4.421316136387105, "learning_rate": 2.578093111475433e-07, "loss": 0.731, "step": 11086 }, { "epoch": 0.9, "grad_norm": 2.4469570776847727, "learning_rate": 2.573925574656422e-07, "loss": 0.6792, "step": 11087 }, { "epoch": 0.9, "grad_norm": 4.180240511963956, "learning_rate": 2.5697613200203697e-07, "loss": 0.7852, "step": 11088 }, { "epoch": 0.9, "grad_norm": 3.560192570109727, "learning_rate": 2.5656003478554903e-07, "loss": 0.6535, "step": 11089 }, { "epoch": 0.9, "grad_norm": 9.993804265099204, "learning_rate": 2.5614426584497363e-07, "loss": 0.6224, "step": 11090 }, { "epoch": 0.9, "grad_norm": 3.685717836360914, "learning_rate": 2.5572882520908505e-07, "loss": 0.7154, "step": 11091 }, { "epoch": 0.9, "grad_norm": 5.076263093940613, "learning_rate": 2.553137129066335e-07, "loss": 0.5295, "step": 11092 }, { "epoch": 0.9, "grad_norm": 5.590691796037563, "learning_rate": 2.548989289663484e-07, "loss": 0.5224, "step": 11093 }, { "epoch": 0.9, "grad_norm": 4.0380278217603385, "learning_rate": 2.5448447341693493e-07, "loss": 0.8946, "step": 11094 }, { "epoch": 0.9, "grad_norm": 4.33410672132875, "learning_rate": 2.540703462870758e-07, "loss": 0.5321, "step": 11095 }, { "epoch": 0.9, "grad_norm": 2.8223743977094635, "learning_rate": 2.5365654760543313e-07, "loss": 0.6875, "step": 11096 }, { "epoch": 0.9, "grad_norm": 10.155282123370107, "learning_rate": 2.5324307740064113e-07, "loss": 0.7869, "step": 11097 }, { "epoch": 0.9, "grad_norm": 3.105071004926456, "learning_rate": 2.5282993570131697e-07, "loss": 0.7059, "step": 11098 }, { "epoch": 0.9, "grad_norm": 4.759174445219804, "learning_rate": 2.524171225360511e-07, "loss": 0.7714, "step": 11099 }, { "epoch": 0.9, "grad_norm": 3.305611317761764, "learning_rate": 2.5200463793341455e-07, "loss": 0.6946, "step": 11100 }, { "epoch": 0.9, "grad_norm": 3.5013903699175417, "learning_rate": 2.5159248192195284e-07, "loss": 0.6914, "step": 11101 }, { "epoch": 0.9, "grad_norm": 4.004572463957777, "learning_rate": 2.5118065453018867e-07, "loss": 0.6863, "step": 11102 }, { "epoch": 0.9, "grad_norm": 2.8414661158451766, "learning_rate": 2.5076915578662597e-07, "loss": 0.4927, "step": 11103 }, { "epoch": 0.9, "grad_norm": 7.158135883358956, "learning_rate": 2.503579857197402e-07, "loss": 0.8022, "step": 11104 }, { "epoch": 0.9, "grad_norm": 6.303931678287403, "learning_rate": 2.4994714435798815e-07, "loss": 0.4349, "step": 11105 }, { "epoch": 0.9, "grad_norm": 9.152109380009543, "learning_rate": 2.495366317298026e-07, "loss": 0.647, "step": 11106 }, { "epoch": 0.9, "grad_norm": 3.6233721171321203, "learning_rate": 2.4912644786359354e-07, "loss": 0.5429, "step": 11107 }, { "epoch": 0.9, "grad_norm": 2.936108732280984, "learning_rate": 2.4871659278774884e-07, "loss": 0.6106, "step": 11108 }, { "epoch": 0.9, "grad_norm": 76.5995454463314, "learning_rate": 2.483070665306314e-07, "loss": 0.831, "step": 11109 }, { "epoch": 0.9, "grad_norm": 9.110038819834891, "learning_rate": 2.4789786912058524e-07, "loss": 0.8331, "step": 11110 }, { "epoch": 0.9, "grad_norm": 6.213034492420114, "learning_rate": 2.474890005859271e-07, "loss": 0.5754, "step": 11111 }, { "epoch": 0.9, "grad_norm": 3.1989941600754412, "learning_rate": 2.470804609549554e-07, "loss": 0.4403, "step": 11112 }, { "epoch": 0.9, "grad_norm": 6.387931422354827, "learning_rate": 2.466722502559416e-07, "loss": 0.5919, "step": 11113 }, { "epoch": 0.9, "grad_norm": 2.7793936208278027, "learning_rate": 2.4626436851713844e-07, "loss": 0.7198, "step": 11114 }, { "epoch": 0.9, "grad_norm": 24.675438852653613, "learning_rate": 2.458568157667729e-07, "loss": 0.4868, "step": 11115 }, { "epoch": 0.9, "grad_norm": 2.968899497997956, "learning_rate": 2.454495920330502e-07, "loss": 0.549, "step": 11116 }, { "epoch": 0.9, "grad_norm": 2.9724963433551848, "learning_rate": 2.450426973441516e-07, "loss": 0.4692, "step": 11117 }, { "epoch": 0.9, "grad_norm": 6.113019026443085, "learning_rate": 2.4463613172823975e-07, "loss": 0.6257, "step": 11118 }, { "epoch": 0.9, "grad_norm": 4.8362695841092, "learning_rate": 2.442298952134492e-07, "loss": 0.5985, "step": 11119 }, { "epoch": 0.9, "grad_norm": 2.992228939025299, "learning_rate": 2.4382398782789416e-07, "loss": 0.7287, "step": 11120 }, { "epoch": 0.9, "grad_norm": 4.313877136232641, "learning_rate": 2.4341840959966724e-07, "loss": 0.7235, "step": 11121 }, { "epoch": 0.9, "grad_norm": 8.472511654987843, "learning_rate": 2.430131605568353e-07, "loss": 0.8167, "step": 11122 }, { "epoch": 0.9, "grad_norm": 4.92453178971149, "learning_rate": 2.4260824072744714e-07, "loss": 0.7042, "step": 11123 }, { "epoch": 0.9, "grad_norm": 2.7753498312351743, "learning_rate": 2.42203650139522e-07, "loss": 0.5991, "step": 11124 }, { "epoch": 0.9, "grad_norm": 2.59708602228783, "learning_rate": 2.4179938882106235e-07, "loss": 0.6161, "step": 11125 }, { "epoch": 0.9, "grad_norm": 3.9195639880987545, "learning_rate": 2.413954568000454e-07, "loss": 0.7473, "step": 11126 }, { "epoch": 0.9, "grad_norm": 3.3863210309823724, "learning_rate": 2.409918541044248e-07, "loss": 0.6121, "step": 11127 }, { "epoch": 0.9, "grad_norm": 13.656219673341297, "learning_rate": 2.405885807621333e-07, "loss": 0.6579, "step": 11128 }, { "epoch": 0.9, "grad_norm": 2.7701955902738185, "learning_rate": 2.4018563680107964e-07, "loss": 0.5286, "step": 11129 }, { "epoch": 0.9, "grad_norm": 5.1333374481865555, "learning_rate": 2.397830222491515e-07, "loss": 0.591, "step": 11130 }, { "epoch": 0.9, "grad_norm": 4.906035418301558, "learning_rate": 2.393807371342094e-07, "loss": 0.6907, "step": 11131 }, { "epoch": 0.9, "grad_norm": 3.38353178579262, "learning_rate": 2.38978781484096e-07, "loss": 0.6791, "step": 11132 }, { "epoch": 0.9, "grad_norm": 3.645503942555014, "learning_rate": 2.3857715532662915e-07, "loss": 0.6961, "step": 11133 }, { "epoch": 0.9, "grad_norm": 6.296236525567647, "learning_rate": 2.3817585868960323e-07, "loss": 0.7069, "step": 11134 }, { "epoch": 0.9, "grad_norm": 2.8484121432260605, "learning_rate": 2.3777489160079104e-07, "loss": 0.8108, "step": 11135 }, { "epoch": 0.9, "grad_norm": 6.71723743756462, "learning_rate": 2.3737425408794202e-07, "loss": 0.6915, "step": 11136 }, { "epoch": 0.9, "grad_norm": 4.11595224928552, "learning_rate": 2.3697394617878232e-07, "loss": 0.854, "step": 11137 }, { "epoch": 0.9, "grad_norm": 5.462369659612821, "learning_rate": 2.3657396790101539e-07, "loss": 0.7799, "step": 11138 }, { "epoch": 0.9, "grad_norm": 4.1844888416580766, "learning_rate": 2.3617431928232405e-07, "loss": 0.6329, "step": 11139 }, { "epoch": 0.9, "grad_norm": 7.2286873241493526, "learning_rate": 2.3577500035036505e-07, "loss": 0.5984, "step": 11140 }, { "epoch": 0.9, "grad_norm": 5.965449536007669, "learning_rate": 2.3537601113277299e-07, "loss": 0.7, "step": 11141 }, { "epoch": 0.9, "grad_norm": 3.8296834453207613, "learning_rate": 2.349773516571624e-07, "loss": 0.5032, "step": 11142 }, { "epoch": 0.91, "grad_norm": 4.66201295833367, "learning_rate": 2.3457902195112236e-07, "loss": 0.6273, "step": 11143 }, { "epoch": 0.91, "grad_norm": 6.59917799988265, "learning_rate": 2.3418102204221972e-07, "loss": 0.6182, "step": 11144 }, { "epoch": 0.91, "grad_norm": 3.269615318244128, "learning_rate": 2.3378335195799739e-07, "loss": 0.7314, "step": 11145 }, { "epoch": 0.91, "grad_norm": 4.881587472595604, "learning_rate": 2.3338601172597842e-07, "loss": 0.6883, "step": 11146 }, { "epoch": 0.91, "grad_norm": 29.10442500021408, "learning_rate": 2.3298900137365966e-07, "loss": 0.6143, "step": 11147 }, { "epoch": 0.91, "grad_norm": 2.758332840153624, "learning_rate": 2.3259232092851857e-07, "loss": 0.6094, "step": 11148 }, { "epoch": 0.91, "grad_norm": 3.3708754290049634, "learning_rate": 2.3219597041800713e-07, "loss": 0.6694, "step": 11149 }, { "epoch": 0.91, "grad_norm": 3.5667350172006196, "learning_rate": 2.31799949869555e-07, "loss": 0.6215, "step": 11150 }, { "epoch": 0.91, "grad_norm": 4.60462786089303, "learning_rate": 2.314042593105692e-07, "loss": 0.66, "step": 11151 }, { "epoch": 0.91, "grad_norm": 10.131368632742777, "learning_rate": 2.3100889876843335e-07, "loss": 0.6525, "step": 11152 }, { "epoch": 0.91, "grad_norm": 3.883522401708305, "learning_rate": 2.3061386827051114e-07, "loss": 0.6799, "step": 11153 }, { "epoch": 0.91, "grad_norm": 3.755193991968222, "learning_rate": 2.3021916784413845e-07, "loss": 0.6643, "step": 11154 }, { "epoch": 0.91, "grad_norm": 6.489369986521138, "learning_rate": 2.2982479751663344e-07, "loss": 0.6152, "step": 11155 }, { "epoch": 0.91, "grad_norm": 3.542210881578721, "learning_rate": 2.2943075731528764e-07, "loss": 0.6141, "step": 11156 }, { "epoch": 0.91, "grad_norm": 2.4501734368800485, "learning_rate": 2.290370472673714e-07, "loss": 0.6118, "step": 11157 }, { "epoch": 0.91, "grad_norm": 2.8648890370561024, "learning_rate": 2.2864366740013188e-07, "loss": 0.6098, "step": 11158 }, { "epoch": 0.91, "grad_norm": 3.331476229469243, "learning_rate": 2.2825061774079337e-07, "loss": 0.637, "step": 11159 }, { "epoch": 0.91, "grad_norm": 11.827249495520467, "learning_rate": 2.2785789831655803e-07, "loss": 0.6456, "step": 11160 }, { "epoch": 0.91, "grad_norm": 4.784397517162743, "learning_rate": 2.2746550915460297e-07, "loss": 0.5943, "step": 11161 }, { "epoch": 0.91, "grad_norm": 4.35396000818709, "learning_rate": 2.2707345028208593e-07, "loss": 0.6139, "step": 11162 }, { "epoch": 0.91, "grad_norm": 5.719855164084684, "learning_rate": 2.2668172172613912e-07, "loss": 0.7485, "step": 11163 }, { "epoch": 0.91, "grad_norm": 3.88158982756313, "learning_rate": 2.2629032351387247e-07, "loss": 0.7047, "step": 11164 }, { "epoch": 0.91, "grad_norm": 5.796267768771133, "learning_rate": 2.258992556723727e-07, "loss": 0.6729, "step": 11165 }, { "epoch": 0.91, "grad_norm": 3.066477015841981, "learning_rate": 2.2550851822870423e-07, "loss": 0.6729, "step": 11166 }, { "epoch": 0.91, "grad_norm": 10.031857418393574, "learning_rate": 2.251181112099099e-07, "loss": 0.5465, "step": 11167 }, { "epoch": 0.91, "grad_norm": 2.675819055436363, "learning_rate": 2.2472803464300697e-07, "loss": 0.7149, "step": 11168 }, { "epoch": 0.91, "grad_norm": 3.5101733525460443, "learning_rate": 2.2433828855499218e-07, "loss": 0.5845, "step": 11169 }, { "epoch": 0.91, "grad_norm": 3.599381142452844, "learning_rate": 2.239488729728373e-07, "loss": 0.7133, "step": 11170 }, { "epoch": 0.91, "grad_norm": 6.624830753928612, "learning_rate": 2.23559787923493e-07, "loss": 0.6637, "step": 11171 }, { "epoch": 0.91, "grad_norm": 3.3807407363981623, "learning_rate": 2.2317103343388603e-07, "loss": 0.937, "step": 11172 }, { "epoch": 0.91, "grad_norm": 4.136022318660975, "learning_rate": 2.2278260953092158e-07, "loss": 0.705, "step": 11173 }, { "epoch": 0.91, "grad_norm": 3.213316628655878, "learning_rate": 2.2239451624148035e-07, "loss": 0.5909, "step": 11174 }, { "epoch": 0.91, "grad_norm": 4.113919450818377, "learning_rate": 2.220067535924203e-07, "loss": 0.5676, "step": 11175 }, { "epoch": 0.91, "grad_norm": 3.689508213542701, "learning_rate": 2.2161932161057888e-07, "loss": 0.6524, "step": 11176 }, { "epoch": 0.91, "grad_norm": 3.690227101161742, "learning_rate": 2.2123222032276625e-07, "loss": 0.6971, "step": 11177 }, { "epoch": 0.91, "grad_norm": 4.31144987650552, "learning_rate": 2.2084544975577383e-07, "loss": 0.6283, "step": 11178 }, { "epoch": 0.91, "grad_norm": 4.627351558278359, "learning_rate": 2.2045900993636793e-07, "loss": 0.6298, "step": 11179 }, { "epoch": 0.91, "grad_norm": 7.181540816920443, "learning_rate": 2.2007290089129386e-07, "loss": 0.6105, "step": 11180 }, { "epoch": 0.91, "grad_norm": 2.4111880089800546, "learning_rate": 2.1968712264727187e-07, "loss": 0.6496, "step": 11181 }, { "epoch": 0.91, "grad_norm": 3.7301495699497518, "learning_rate": 2.193016752310001e-07, "loss": 0.7502, "step": 11182 }, { "epoch": 0.91, "grad_norm": 6.946330232235449, "learning_rate": 2.1891655866915496e-07, "loss": 0.6652, "step": 11183 }, { "epoch": 0.91, "grad_norm": 5.9617569552395135, "learning_rate": 2.185317729883868e-07, "loss": 0.5079, "step": 11184 }, { "epoch": 0.91, "grad_norm": 3.4539595084818053, "learning_rate": 2.1814731821532765e-07, "loss": 0.6411, "step": 11185 }, { "epoch": 0.91, "grad_norm": 3.7295998632425245, "learning_rate": 2.1776319437658233e-07, "loss": 0.6514, "step": 11186 }, { "epoch": 0.91, "grad_norm": 5.180756978128458, "learning_rate": 2.173794014987357e-07, "loss": 0.6396, "step": 11187 }, { "epoch": 0.91, "grad_norm": 2.3700950684986877, "learning_rate": 2.1699593960834876e-07, "loss": 0.6384, "step": 11188 }, { "epoch": 0.91, "grad_norm": 5.435322993458829, "learning_rate": 2.1661280873195855e-07, "loss": 0.8065, "step": 11189 }, { "epoch": 0.91, "grad_norm": 4.588651937813416, "learning_rate": 2.1623000889608113e-07, "loss": 0.6653, "step": 11190 }, { "epoch": 0.91, "grad_norm": 3.8417818868419378, "learning_rate": 2.1584754012720755e-07, "loss": 0.7299, "step": 11191 }, { "epoch": 0.91, "grad_norm": 3.321297919657786, "learning_rate": 2.1546540245180825e-07, "loss": 0.5849, "step": 11192 }, { "epoch": 0.91, "grad_norm": 4.812304121023848, "learning_rate": 2.150835958963282e-07, "loss": 0.5711, "step": 11193 }, { "epoch": 0.91, "grad_norm": 4.789822625841763, "learning_rate": 2.147021204871924e-07, "loss": 0.6448, "step": 11194 }, { "epoch": 0.91, "grad_norm": 4.482624387945147, "learning_rate": 2.1432097625080028e-07, "loss": 0.4734, "step": 11195 }, { "epoch": 0.91, "grad_norm": 4.15611770572156, "learning_rate": 2.1394016321353074e-07, "loss": 0.4833, "step": 11196 }, { "epoch": 0.91, "grad_norm": 5.533211608966728, "learning_rate": 2.13559681401736e-07, "loss": 0.6356, "step": 11197 }, { "epoch": 0.91, "grad_norm": 4.668946241240258, "learning_rate": 2.1317953084175003e-07, "loss": 0.6324, "step": 11198 }, { "epoch": 0.91, "grad_norm": 15.00257056308707, "learning_rate": 2.1279971155988066e-07, "loss": 0.5525, "step": 11199 }, { "epoch": 0.91, "grad_norm": 6.806304167910922, "learning_rate": 2.1242022358241354e-07, "loss": 0.6445, "step": 11200 }, { "epoch": 0.91, "grad_norm": 3.343260844327897, "learning_rate": 2.1204106693561265e-07, "loss": 0.6619, "step": 11201 }, { "epoch": 0.91, "grad_norm": 3.950683077485783, "learning_rate": 2.1166224164571757e-07, "loss": 0.4888, "step": 11202 }, { "epoch": 0.91, "grad_norm": 3.5932287198342583, "learning_rate": 2.1128374773894512e-07, "loss": 0.7357, "step": 11203 }, { "epoch": 0.91, "grad_norm": 4.2658072215347875, "learning_rate": 2.1090558524148875e-07, "loss": 0.718, "step": 11204 }, { "epoch": 0.91, "grad_norm": 3.3954486428975836, "learning_rate": 2.1052775417952088e-07, "loss": 0.66, "step": 11205 }, { "epoch": 0.91, "grad_norm": 7.184776914767399, "learning_rate": 2.1015025457919002e-07, "loss": 0.6501, "step": 11206 }, { "epoch": 0.91, "grad_norm": 6.020422422928121, "learning_rate": 2.0977308646662032e-07, "loss": 0.5798, "step": 11207 }, { "epoch": 0.91, "grad_norm": 5.383636768910911, "learning_rate": 2.0939624986791473e-07, "loss": 0.6269, "step": 11208 }, { "epoch": 0.91, "grad_norm": 7.392695274835778, "learning_rate": 2.0901974480915355e-07, "loss": 0.6152, "step": 11209 }, { "epoch": 0.91, "grad_norm": 12.184743807836144, "learning_rate": 2.08643571316392e-07, "loss": 0.5746, "step": 11210 }, { "epoch": 0.91, "grad_norm": 4.578078483932733, "learning_rate": 2.0826772941566376e-07, "loss": 0.637, "step": 11211 }, { "epoch": 0.91, "grad_norm": 4.292490272675671, "learning_rate": 2.0789221913298075e-07, "loss": 0.5561, "step": 11212 }, { "epoch": 0.91, "grad_norm": 3.900359661084928, "learning_rate": 2.075170404943294e-07, "loss": 0.6385, "step": 11213 }, { "epoch": 0.91, "grad_norm": 4.637713822962244, "learning_rate": 2.0714219352567455e-07, "loss": 0.6683, "step": 11214 }, { "epoch": 0.91, "grad_norm": 4.596075403319375, "learning_rate": 2.0676767825295873e-07, "loss": 0.7032, "step": 11215 }, { "epoch": 0.91, "grad_norm": 2.8509918175013684, "learning_rate": 2.0639349470210014e-07, "loss": 0.6394, "step": 11216 }, { "epoch": 0.91, "grad_norm": 3.820180651945835, "learning_rate": 2.0601964289899467e-07, "loss": 0.8167, "step": 11217 }, { "epoch": 0.91, "grad_norm": 15.241877536564646, "learning_rate": 2.05646122869515e-07, "loss": 0.6918, "step": 11218 }, { "epoch": 0.91, "grad_norm": 11.633888495411068, "learning_rate": 2.0527293463951158e-07, "loss": 0.6618, "step": 11219 }, { "epoch": 0.91, "grad_norm": 15.251399605112153, "learning_rate": 2.0490007823481096e-07, "loss": 0.6036, "step": 11220 }, { "epoch": 0.91, "grad_norm": 3.349610011431575, "learning_rate": 2.0452755368121803e-07, "loss": 0.6235, "step": 11221 }, { "epoch": 0.91, "grad_norm": 5.595683221298956, "learning_rate": 2.0415536100451273e-07, "loss": 0.4783, "step": 11222 }, { "epoch": 0.91, "grad_norm": 5.956363776287118, "learning_rate": 2.037835002304539e-07, "loss": 0.6666, "step": 11223 }, { "epoch": 0.91, "grad_norm": 12.635621690223486, "learning_rate": 2.0341197138477652e-07, "loss": 0.7744, "step": 11224 }, { "epoch": 0.91, "grad_norm": 6.839538448672353, "learning_rate": 2.030407744931917e-07, "loss": 0.6424, "step": 11225 }, { "epoch": 0.91, "grad_norm": 6.538209436943901, "learning_rate": 2.0266990958138998e-07, "loss": 0.7572, "step": 11226 }, { "epoch": 0.91, "grad_norm": 5.329314131426762, "learning_rate": 2.0229937667503641e-07, "loss": 0.5788, "step": 11227 }, { "epoch": 0.91, "grad_norm": 4.981178461071761, "learning_rate": 2.0192917579977545e-07, "loss": 0.6999, "step": 11228 }, { "epoch": 0.91, "grad_norm": 3.9967243742484726, "learning_rate": 2.0155930698122661e-07, "loss": 0.704, "step": 11229 }, { "epoch": 0.91, "grad_norm": 4.680105901722793, "learning_rate": 2.011897702449872e-07, "loss": 0.7099, "step": 11230 }, { "epoch": 0.91, "grad_norm": 6.067912557989318, "learning_rate": 2.008205656166312e-07, "loss": 0.6702, "step": 11231 }, { "epoch": 0.91, "grad_norm": 4.629518653451745, "learning_rate": 2.0045169312171043e-07, "loss": 0.4777, "step": 11232 }, { "epoch": 0.91, "grad_norm": 14.303071857238, "learning_rate": 2.0008315278575274e-07, "loss": 0.6023, "step": 11233 }, { "epoch": 0.91, "grad_norm": 2.9550083491589967, "learning_rate": 1.9971494463426332e-07, "loss": 0.5514, "step": 11234 }, { "epoch": 0.91, "grad_norm": 4.45169761435258, "learning_rate": 1.993470686927257e-07, "loss": 0.5483, "step": 11235 }, { "epoch": 0.91, "grad_norm": 14.136993320354271, "learning_rate": 1.989795249865978e-07, "loss": 0.6777, "step": 11236 }, { "epoch": 0.91, "grad_norm": 3.448756818442511, "learning_rate": 1.9861231354131705e-07, "loss": 0.7651, "step": 11237 }, { "epoch": 0.91, "grad_norm": 6.710480896720181, "learning_rate": 1.9824543438229593e-07, "loss": 0.5539, "step": 11238 }, { "epoch": 0.91, "grad_norm": 11.228298048873127, "learning_rate": 1.978788875349247e-07, "loss": 0.5702, "step": 11239 }, { "epoch": 0.91, "grad_norm": 4.245243401907516, "learning_rate": 1.9751267302457132e-07, "loss": 0.5553, "step": 11240 }, { "epoch": 0.91, "grad_norm": 3.8167119126457365, "learning_rate": 1.971467908765795e-07, "loss": 0.6471, "step": 11241 }, { "epoch": 0.91, "grad_norm": 3.2591391861311845, "learning_rate": 1.9678124111627229e-07, "loss": 0.61, "step": 11242 }, { "epoch": 0.91, "grad_norm": 3.27032283104346, "learning_rate": 1.9641602376894552e-07, "loss": 0.8078, "step": 11243 }, { "epoch": 0.91, "grad_norm": 4.396870639714731, "learning_rate": 1.960511388598768e-07, "loss": 0.5992, "step": 11244 }, { "epoch": 0.91, "grad_norm": 3.452741420176858, "learning_rate": 1.9568658641431648e-07, "loss": 0.7191, "step": 11245 }, { "epoch": 0.91, "grad_norm": 4.469746243397283, "learning_rate": 1.9532236645749492e-07, "loss": 0.6226, "step": 11246 }, { "epoch": 0.91, "grad_norm": 3.0071098089703447, "learning_rate": 1.9495847901461916e-07, "loss": 0.6423, "step": 11247 }, { "epoch": 0.91, "grad_norm": 2.927309478084663, "learning_rate": 1.9459492411087078e-07, "loss": 0.6625, "step": 11248 }, { "epoch": 0.91, "grad_norm": 3.8751220068643923, "learning_rate": 1.9423170177141182e-07, "loss": 0.6653, "step": 11249 }, { "epoch": 0.91, "grad_norm": 3.563704809780736, "learning_rate": 1.938688120213783e-07, "loss": 0.5827, "step": 11250 }, { "epoch": 0.91, "grad_norm": 3.358420934460345, "learning_rate": 1.9350625488588458e-07, "loss": 0.5478, "step": 11251 }, { "epoch": 0.91, "grad_norm": 5.2048093345710065, "learning_rate": 1.9314403039002228e-07, "loss": 0.6095, "step": 11252 }, { "epoch": 0.91, "grad_norm": 2.5368372327602633, "learning_rate": 1.927821385588602e-07, "loss": 0.6251, "step": 11253 }, { "epoch": 0.91, "grad_norm": 2.8808988217708147, "learning_rate": 1.924205794174422e-07, "loss": 0.6186, "step": 11254 }, { "epoch": 0.91, "grad_norm": 4.215083189640467, "learning_rate": 1.9205935299079158e-07, "loss": 0.5443, "step": 11255 }, { "epoch": 0.91, "grad_norm": 3.968943102852748, "learning_rate": 1.916984593039073e-07, "loss": 0.7671, "step": 11256 }, { "epoch": 0.91, "grad_norm": 2.658793552877997, "learning_rate": 1.913378983817643e-07, "loss": 0.6221, "step": 11257 }, { "epoch": 0.91, "grad_norm": 13.225970960842027, "learning_rate": 1.9097767024931713e-07, "loss": 0.8828, "step": 11258 }, { "epoch": 0.91, "grad_norm": 3.787876191138275, "learning_rate": 1.906177749314947e-07, "loss": 0.7026, "step": 11259 }, { "epoch": 0.91, "grad_norm": 3.5243636606237465, "learning_rate": 1.902582124532054e-07, "loss": 0.5846, "step": 11260 }, { "epoch": 0.91, "grad_norm": 3.940901314921539, "learning_rate": 1.8989898283933216e-07, "loss": 0.6955, "step": 11261 }, { "epoch": 0.91, "grad_norm": 3.4868704721265336, "learning_rate": 1.8954008611473618e-07, "loss": 0.5596, "step": 11262 }, { "epoch": 0.91, "grad_norm": 18.703640149227553, "learning_rate": 1.8918152230425534e-07, "loss": 0.4744, "step": 11263 }, { "epoch": 0.91, "grad_norm": 4.945525287111293, "learning_rate": 1.8882329143270429e-07, "loss": 0.6621, "step": 11264 }, { "epoch": 0.91, "grad_norm": 5.204010961577526, "learning_rate": 1.8846539352487591e-07, "loss": 0.4942, "step": 11265 }, { "epoch": 0.92, "grad_norm": 3.5048726028688013, "learning_rate": 1.8810782860553712e-07, "loss": 0.7458, "step": 11266 }, { "epoch": 0.92, "grad_norm": 2.659341861036358, "learning_rate": 1.8775059669943586e-07, "loss": 0.6877, "step": 11267 }, { "epoch": 0.92, "grad_norm": 3.3723421273103655, "learning_rate": 1.873936978312929e-07, "loss": 0.5985, "step": 11268 }, { "epoch": 0.92, "grad_norm": 3.0123279451882095, "learning_rate": 1.8703713202580963e-07, "loss": 0.7331, "step": 11269 }, { "epoch": 0.92, "grad_norm": 7.625981246359626, "learning_rate": 1.8668089930766077e-07, "loss": 0.6579, "step": 11270 }, { "epoch": 0.92, "grad_norm": 3.0689827099492053, "learning_rate": 1.8632499970150154e-07, "loss": 0.6426, "step": 11271 }, { "epoch": 0.92, "grad_norm": 21.540314721514395, "learning_rate": 1.859694332319617e-07, "loss": 0.6223, "step": 11272 }, { "epoch": 0.92, "grad_norm": 5.964900391084288, "learning_rate": 1.8561419992364826e-07, "loss": 0.6281, "step": 11273 }, { "epoch": 0.92, "grad_norm": 4.6903264018260105, "learning_rate": 1.8525929980114653e-07, "loss": 0.7361, "step": 11274 }, { "epoch": 0.92, "grad_norm": 6.964590575087678, "learning_rate": 1.8490473288901744e-07, "loss": 0.6393, "step": 11275 }, { "epoch": 0.92, "grad_norm": 4.547461324425597, "learning_rate": 1.8455049921179858e-07, "loss": 0.7326, "step": 11276 }, { "epoch": 0.92, "grad_norm": 48.91079470325377, "learning_rate": 1.8419659879400587e-07, "loss": 0.4434, "step": 11277 }, { "epoch": 0.92, "grad_norm": 7.599910658665928, "learning_rate": 1.8384303166013194e-07, "loss": 0.7121, "step": 11278 }, { "epoch": 0.92, "grad_norm": 5.094208246499218, "learning_rate": 1.8348979783464505e-07, "loss": 0.6975, "step": 11279 }, { "epoch": 0.92, "grad_norm": 7.1835376821648484, "learning_rate": 1.831368973419906e-07, "loss": 0.5416, "step": 11280 }, { "epoch": 0.92, "grad_norm": 4.078551495747529, "learning_rate": 1.827843302065929e-07, "loss": 0.4897, "step": 11281 }, { "epoch": 0.92, "grad_norm": 6.288439876156027, "learning_rate": 1.8243209645285143e-07, "loss": 0.6914, "step": 11282 }, { "epoch": 0.92, "grad_norm": 4.5239103333466275, "learning_rate": 1.8208019610514273e-07, "loss": 0.6296, "step": 11283 }, { "epoch": 0.92, "grad_norm": 2.647979380089214, "learning_rate": 1.8172862918782008e-07, "loss": 0.6212, "step": 11284 }, { "epoch": 0.92, "grad_norm": 2.42850606551405, "learning_rate": 1.8137739572521518e-07, "loss": 0.6969, "step": 11285 }, { "epoch": 0.92, "grad_norm": 2.8193799941343847, "learning_rate": 1.8102649574163523e-07, "loss": 0.6285, "step": 11286 }, { "epoch": 0.92, "grad_norm": 5.3924909696544265, "learning_rate": 1.8067592926136412e-07, "loss": 0.5888, "step": 11287 }, { "epoch": 0.92, "grad_norm": 3.7818778346585287, "learning_rate": 1.803256963086636e-07, "loss": 0.5445, "step": 11288 }, { "epoch": 0.92, "grad_norm": 3.255098907805088, "learning_rate": 1.7997579690777257e-07, "loss": 0.8356, "step": 11289 }, { "epoch": 0.92, "grad_norm": 2.7259441838723153, "learning_rate": 1.7962623108290556e-07, "loss": 0.4971, "step": 11290 }, { "epoch": 0.92, "grad_norm": 4.696404589431635, "learning_rate": 1.7927699885825488e-07, "loss": 0.5833, "step": 11291 }, { "epoch": 0.92, "grad_norm": 5.084098117617025, "learning_rate": 1.7892810025798958e-07, "loss": 0.8636, "step": 11292 }, { "epoch": 0.92, "grad_norm": 5.779511802895045, "learning_rate": 1.7857953530625528e-07, "loss": 0.6311, "step": 11293 }, { "epoch": 0.92, "grad_norm": 4.813675047655089, "learning_rate": 1.7823130402717604e-07, "loss": 0.8099, "step": 11294 }, { "epoch": 0.92, "grad_norm": 6.518444299799028, "learning_rate": 1.7788340644485093e-07, "loss": 0.6243, "step": 11295 }, { "epoch": 0.92, "grad_norm": 3.352692611521556, "learning_rate": 1.7753584258335677e-07, "loss": 0.6686, "step": 11296 }, { "epoch": 0.92, "grad_norm": 4.7854030750804855, "learning_rate": 1.7718861246674656e-07, "loss": 0.6203, "step": 11297 }, { "epoch": 0.92, "grad_norm": 3.0625260606515714, "learning_rate": 1.768417161190511e-07, "loss": 0.7503, "step": 11298 }, { "epoch": 0.92, "grad_norm": 8.733551191478266, "learning_rate": 1.7649515356427839e-07, "loss": 0.7507, "step": 11299 }, { "epoch": 0.92, "grad_norm": 3.9132102446045893, "learning_rate": 1.76148924826412e-07, "loss": 0.5885, "step": 11300 }, { "epoch": 0.92, "grad_norm": 5.394820205406352, "learning_rate": 1.758030299294139e-07, "loss": 0.7972, "step": 11301 }, { "epoch": 0.92, "grad_norm": 5.415622490564768, "learning_rate": 1.754574688972216e-07, "loss": 0.6228, "step": 11302 }, { "epoch": 0.92, "grad_norm": 3.6203674526266982, "learning_rate": 1.7511224175375097e-07, "loss": 0.6477, "step": 11303 }, { "epoch": 0.92, "grad_norm": 3.482400390919232, "learning_rate": 1.7476734852289235e-07, "loss": 0.6297, "step": 11304 }, { "epoch": 0.92, "grad_norm": 4.646189629126471, "learning_rate": 1.7442278922851551e-07, "loss": 0.6205, "step": 11305 }, { "epoch": 0.92, "grad_norm": 2.5846996323918274, "learning_rate": 1.7407856389446588e-07, "loss": 0.5584, "step": 11306 }, { "epoch": 0.92, "grad_norm": 4.119611207171882, "learning_rate": 1.73734672544566e-07, "loss": 0.7873, "step": 11307 }, { "epoch": 0.92, "grad_norm": 3.8005402386654237, "learning_rate": 1.7339111520261686e-07, "loss": 0.7073, "step": 11308 }, { "epoch": 0.92, "grad_norm": 4.046992935276586, "learning_rate": 1.7304789189239167e-07, "loss": 0.8008, "step": 11309 }, { "epoch": 0.92, "grad_norm": 5.040735739036495, "learning_rate": 1.7270500263764645e-07, "loss": 0.6108, "step": 11310 }, { "epoch": 0.92, "grad_norm": 8.466417338929515, "learning_rate": 1.7236244746210994e-07, "loss": 0.6214, "step": 11311 }, { "epoch": 0.92, "grad_norm": 4.110455408198369, "learning_rate": 1.7202022638948878e-07, "loss": 0.6717, "step": 11312 }, { "epoch": 0.92, "grad_norm": 4.671910819961705, "learning_rate": 1.7167833944346846e-07, "loss": 0.6942, "step": 11313 }, { "epoch": 0.92, "grad_norm": 20.625401341803297, "learning_rate": 1.7133678664770726e-07, "loss": 0.5982, "step": 11314 }, { "epoch": 0.92, "grad_norm": 3.667073387587954, "learning_rate": 1.7099556802584628e-07, "loss": 0.5206, "step": 11315 }, { "epoch": 0.92, "grad_norm": 3.804106872063484, "learning_rate": 1.7065468360149607e-07, "loss": 0.8144, "step": 11316 }, { "epoch": 0.92, "grad_norm": 3.945172852137348, "learning_rate": 1.7031413339825054e-07, "loss": 0.762, "step": 11317 }, { "epoch": 0.92, "grad_norm": 4.399638969248526, "learning_rate": 1.6997391743967696e-07, "loss": 0.7223, "step": 11318 }, { "epoch": 0.92, "grad_norm": 5.530700456349516, "learning_rate": 1.696340357493209e-07, "loss": 0.616, "step": 11319 }, { "epoch": 0.92, "grad_norm": 3.77537007080804, "learning_rate": 1.6929448835070418e-07, "loss": 0.7288, "step": 11320 }, { "epoch": 0.92, "grad_norm": 3.2395221834056325, "learning_rate": 1.689552752673246e-07, "loss": 0.6597, "step": 11321 }, { "epoch": 0.92, "grad_norm": 3.04079371987463, "learning_rate": 1.686163965226606e-07, "loss": 0.7368, "step": 11322 }, { "epoch": 0.92, "grad_norm": 3.843562519496911, "learning_rate": 1.6827785214016123e-07, "loss": 0.6692, "step": 11323 }, { "epoch": 0.92, "grad_norm": 4.8703317084375755, "learning_rate": 1.6793964214325776e-07, "loss": 0.5896, "step": 11324 }, { "epoch": 0.92, "grad_norm": 4.904465482176586, "learning_rate": 1.6760176655535643e-07, "loss": 0.5499, "step": 11325 }, { "epoch": 0.92, "grad_norm": 4.345299328999583, "learning_rate": 1.672642253998402e-07, "loss": 0.6296, "step": 11326 }, { "epoch": 0.92, "grad_norm": 3.0821982158403833, "learning_rate": 1.6692701870006933e-07, "loss": 0.706, "step": 11327 }, { "epoch": 0.92, "grad_norm": 2.8088281504890302, "learning_rate": 1.665901464793801e-07, "loss": 0.5967, "step": 11328 }, { "epoch": 0.92, "grad_norm": 5.5914595375943685, "learning_rate": 1.6625360876108608e-07, "loss": 0.6793, "step": 11329 }, { "epoch": 0.92, "grad_norm": 4.05874173295695, "learning_rate": 1.6591740556847812e-07, "loss": 0.6682, "step": 11330 }, { "epoch": 0.92, "grad_norm": 2.849166669824276, "learning_rate": 1.655815369248237e-07, "loss": 0.7779, "step": 11331 }, { "epoch": 0.92, "grad_norm": 2.9577242451888446, "learning_rate": 1.65246002853367e-07, "loss": 0.6344, "step": 11332 }, { "epoch": 0.92, "grad_norm": 3.8005021807318426, "learning_rate": 1.649108033773289e-07, "loss": 0.6756, "step": 11333 }, { "epoch": 0.92, "grad_norm": 5.5086865869507875, "learning_rate": 1.6457593851990805e-07, "loss": 0.5032, "step": 11334 }, { "epoch": 0.92, "grad_norm": 5.752048430276614, "learning_rate": 1.6424140830427816e-07, "loss": 0.6865, "step": 11335 }, { "epoch": 0.92, "grad_norm": 3.5670783083033104, "learning_rate": 1.6390721275359123e-07, "loss": 0.7315, "step": 11336 }, { "epoch": 0.92, "grad_norm": 3.9611069283397486, "learning_rate": 1.6357335189097546e-07, "loss": 0.6658, "step": 11337 }, { "epoch": 0.92, "grad_norm": 6.907016723052516, "learning_rate": 1.632398257395368e-07, "loss": 0.6841, "step": 11338 }, { "epoch": 0.92, "grad_norm": 13.774494562248503, "learning_rate": 1.6290663432235622e-07, "loss": 0.5817, "step": 11339 }, { "epoch": 0.92, "grad_norm": 2.1466833584054457, "learning_rate": 1.6257377766249416e-07, "loss": 0.6933, "step": 11340 }, { "epoch": 0.92, "grad_norm": 4.677183835696159, "learning_rate": 1.6224125578298611e-07, "loss": 0.7218, "step": 11341 }, { "epoch": 0.92, "grad_norm": 10.862130663735634, "learning_rate": 1.6190906870684365e-07, "loss": 0.7622, "step": 11342 }, { "epoch": 0.92, "grad_norm": 3.1152603199156434, "learning_rate": 1.6157721645705615e-07, "loss": 0.6921, "step": 11343 }, { "epoch": 0.92, "grad_norm": 3.982612311057777, "learning_rate": 1.6124569905659136e-07, "loss": 0.6354, "step": 11344 }, { "epoch": 0.92, "grad_norm": 3.3627137885472655, "learning_rate": 1.6091451652839151e-07, "loss": 0.6055, "step": 11345 }, { "epoch": 0.92, "grad_norm": 2.981164909693949, "learning_rate": 1.6058366889537546e-07, "loss": 0.561, "step": 11346 }, { "epoch": 0.92, "grad_norm": 3.7013246737156655, "learning_rate": 1.6025315618044211e-07, "loss": 0.6128, "step": 11347 }, { "epoch": 0.92, "grad_norm": 23.816482656442975, "learning_rate": 1.5992297840646376e-07, "loss": 0.6945, "step": 11348 }, { "epoch": 0.92, "grad_norm": 2.5154817244944305, "learning_rate": 1.5959313559629098e-07, "loss": 0.6949, "step": 11349 }, { "epoch": 0.92, "grad_norm": 4.028092005877914, "learning_rate": 1.5926362777274994e-07, "loss": 0.4796, "step": 11350 }, { "epoch": 0.92, "grad_norm": 3.3833095600247156, "learning_rate": 1.589344549586469e-07, "loss": 0.6234, "step": 11351 }, { "epoch": 0.92, "grad_norm": 5.042563100392468, "learning_rate": 1.5860561717676137e-07, "loss": 0.6392, "step": 11352 }, { "epoch": 0.92, "grad_norm": 3.464226622332731, "learning_rate": 1.5827711444985017e-07, "loss": 0.5578, "step": 11353 }, { "epoch": 0.92, "grad_norm": 5.4081553398162105, "learning_rate": 1.57948946800649e-07, "loss": 0.7801, "step": 11354 }, { "epoch": 0.92, "grad_norm": 8.559692279775795, "learning_rate": 1.576211142518691e-07, "loss": 0.6881, "step": 11355 }, { "epoch": 0.92, "grad_norm": 2.9308364806958203, "learning_rate": 1.572936168261985e-07, "loss": 0.7093, "step": 11356 }, { "epoch": 0.92, "grad_norm": 3.7556613182043166, "learning_rate": 1.5696645454630121e-07, "loss": 0.6598, "step": 11357 }, { "epoch": 0.92, "grad_norm": 3.8895442592773337, "learning_rate": 1.5663962743481976e-07, "loss": 0.611, "step": 11358 }, { "epoch": 0.92, "grad_norm": 5.245928206072252, "learning_rate": 1.5631313551437266e-07, "loss": 0.6779, "step": 11359 }, { "epoch": 0.92, "grad_norm": 2.8060822507163423, "learning_rate": 1.559869788075541e-07, "loss": 0.7006, "step": 11360 }, { "epoch": 0.92, "grad_norm": 4.553019662325742, "learning_rate": 1.5566115733693766e-07, "loss": 0.7098, "step": 11361 }, { "epoch": 0.92, "grad_norm": 12.987882498736571, "learning_rate": 1.5533567112507196e-07, "loss": 0.7036, "step": 11362 }, { "epoch": 0.92, "grad_norm": 5.506611439697978, "learning_rate": 1.5501052019448183e-07, "loss": 0.6388, "step": 11363 }, { "epoch": 0.92, "grad_norm": 3.54126202649975, "learning_rate": 1.5468570456766973e-07, "loss": 0.6579, "step": 11364 }, { "epoch": 0.92, "grad_norm": 4.111291338264455, "learning_rate": 1.5436122426711664e-07, "loss": 0.5988, "step": 11365 }, { "epoch": 0.92, "grad_norm": 5.477972852199743, "learning_rate": 1.5403707931527735e-07, "loss": 0.5365, "step": 11366 }, { "epoch": 0.92, "grad_norm": 2.9252105481445767, "learning_rate": 1.537132697345839e-07, "loss": 0.6941, "step": 11367 }, { "epoch": 0.92, "grad_norm": 5.370815188033459, "learning_rate": 1.5338979554744782e-07, "loss": 0.671, "step": 11368 }, { "epoch": 0.92, "grad_norm": 2.804232315146814, "learning_rate": 1.5306665677625453e-07, "loss": 0.6526, "step": 11369 }, { "epoch": 0.92, "grad_norm": 4.269627434998638, "learning_rate": 1.5274385344336728e-07, "loss": 0.5296, "step": 11370 }, { "epoch": 0.92, "grad_norm": 8.860232817952454, "learning_rate": 1.5242138557112595e-07, "loss": 0.6667, "step": 11371 }, { "epoch": 0.92, "grad_norm": 4.078921810451814, "learning_rate": 1.5209925318184827e-07, "loss": 0.683, "step": 11372 }, { "epoch": 0.92, "grad_norm": 3.8827854380777485, "learning_rate": 1.5177745629782638e-07, "loss": 0.655, "step": 11373 }, { "epoch": 0.92, "grad_norm": 3.627943026625959, "learning_rate": 1.514559949413319e-07, "loss": 0.7929, "step": 11374 }, { "epoch": 0.92, "grad_norm": 3.3382801012273338, "learning_rate": 1.5113486913461152e-07, "loss": 0.7024, "step": 11375 }, { "epoch": 0.92, "grad_norm": 10.312793837771833, "learning_rate": 1.5081407889988908e-07, "loss": 0.6685, "step": 11376 }, { "epoch": 0.92, "grad_norm": 4.985840613105751, "learning_rate": 1.5049362425936576e-07, "loss": 0.5905, "step": 11377 }, { "epoch": 0.92, "grad_norm": 4.668605820625122, "learning_rate": 1.5017350523521823e-07, "loss": 0.7027, "step": 11378 }, { "epoch": 0.92, "grad_norm": 4.23191987741091, "learning_rate": 1.49853721849601e-07, "loss": 0.5432, "step": 11379 }, { "epoch": 0.92, "grad_norm": 4.218237886594436, "learning_rate": 1.4953427412464527e-07, "loss": 0.6174, "step": 11380 }, { "epoch": 0.92, "grad_norm": 4.311840916897585, "learning_rate": 1.4921516208246002e-07, "loss": 0.6073, "step": 11381 }, { "epoch": 0.92, "grad_norm": 3.3755069862100195, "learning_rate": 1.48896385745127e-07, "loss": 0.8342, "step": 11382 }, { "epoch": 0.92, "grad_norm": 4.345442506283233, "learning_rate": 1.4857794513471025e-07, "loss": 0.6335, "step": 11383 }, { "epoch": 0.92, "grad_norm": 3.1581361945411155, "learning_rate": 1.482598402732466e-07, "loss": 0.6706, "step": 11384 }, { "epoch": 0.92, "grad_norm": 3.059322864683502, "learning_rate": 1.4794207118275007e-07, "loss": 0.5244, "step": 11385 }, { "epoch": 0.92, "grad_norm": 6.527982839451223, "learning_rate": 1.4762463788521474e-07, "loss": 0.7469, "step": 11386 }, { "epoch": 0.92, "grad_norm": 5.870894137829264, "learning_rate": 1.4730754040260642e-07, "loss": 0.7489, "step": 11387 }, { "epoch": 0.92, "grad_norm": 5.14039387263071, "learning_rate": 1.4699077875687252e-07, "loss": 0.5167, "step": 11388 }, { "epoch": 0.93, "grad_norm": 9.401638749942542, "learning_rate": 1.466743529699327e-07, "loss": 0.6808, "step": 11389 }, { "epoch": 0.93, "grad_norm": 2.6071930152666014, "learning_rate": 1.463582630636873e-07, "loss": 0.5333, "step": 11390 }, { "epoch": 0.93, "grad_norm": 2.6917431700755707, "learning_rate": 1.4604250906001093e-07, "loss": 0.5804, "step": 11391 }, { "epoch": 0.93, "grad_norm": 3.353136079190856, "learning_rate": 1.4572709098075565e-07, "loss": 0.7591, "step": 11392 }, { "epoch": 0.93, "grad_norm": 4.780160289960551, "learning_rate": 1.4541200884775119e-07, "loss": 0.6824, "step": 11393 }, { "epoch": 0.93, "grad_norm": 12.947381580933802, "learning_rate": 1.4509726268280233e-07, "loss": 0.6537, "step": 11394 }, { "epoch": 0.93, "grad_norm": 3.6487765281032, "learning_rate": 1.447828525076933e-07, "loss": 0.6034, "step": 11395 }, { "epoch": 0.93, "grad_norm": 2.7371953806911082, "learning_rate": 1.4446877834418004e-07, "loss": 0.721, "step": 11396 }, { "epoch": 0.93, "grad_norm": 3.272631827861146, "learning_rate": 1.4415504021400128e-07, "loss": 0.6393, "step": 11397 }, { "epoch": 0.93, "grad_norm": 3.897589784273589, "learning_rate": 1.43841638138868e-07, "loss": 0.6521, "step": 11398 }, { "epoch": 0.93, "grad_norm": 4.110382566654573, "learning_rate": 1.4352857214047056e-07, "loss": 0.5612, "step": 11399 }, { "epoch": 0.93, "grad_norm": 3.258100866122637, "learning_rate": 1.4321584224047502e-07, "loss": 0.6846, "step": 11400 }, { "epoch": 0.93, "grad_norm": 3.1329123979100104, "learning_rate": 1.4290344846052406e-07, "loss": 0.692, "step": 11401 }, { "epoch": 0.93, "grad_norm": 2.768243901488135, "learning_rate": 1.4259139082223761e-07, "loss": 0.6342, "step": 11402 }, { "epoch": 0.93, "grad_norm": 3.7383615746483367, "learning_rate": 1.422796693472106e-07, "loss": 0.6043, "step": 11403 }, { "epoch": 0.93, "grad_norm": 5.813280930536397, "learning_rate": 1.41968284057018e-07, "loss": 0.6294, "step": 11404 }, { "epoch": 0.93, "grad_norm": 3.510748005571704, "learning_rate": 1.4165723497320815e-07, "loss": 0.6244, "step": 11405 }, { "epoch": 0.93, "grad_norm": 49.73435916302651, "learning_rate": 1.413465221173088e-07, "loss": 0.6415, "step": 11406 }, { "epoch": 0.93, "grad_norm": 5.6395586627568965, "learning_rate": 1.410361455108228e-07, "loss": 0.8055, "step": 11407 }, { "epoch": 0.93, "grad_norm": 3.803798780051327, "learning_rate": 1.4072610517523068e-07, "loss": 0.6204, "step": 11408 }, { "epoch": 0.93, "grad_norm": 2.595097040542259, "learning_rate": 1.404164011319875e-07, "loss": 0.6475, "step": 11409 }, { "epoch": 0.93, "grad_norm": 2.9503674995130216, "learning_rate": 1.401070334025284e-07, "loss": 0.6113, "step": 11410 }, { "epoch": 0.93, "grad_norm": 6.8650050775455735, "learning_rate": 1.3979800200826289e-07, "loss": 0.5771, "step": 11411 }, { "epoch": 0.93, "grad_norm": 3.059204302336034, "learning_rate": 1.3948930697057772e-07, "loss": 0.6457, "step": 11412 }, { "epoch": 0.93, "grad_norm": 2.5066812251668362, "learning_rate": 1.3918094831083696e-07, "loss": 0.6207, "step": 11413 }, { "epoch": 0.93, "grad_norm": 3.4666209134087476, "learning_rate": 1.3887292605038128e-07, "loss": 0.7905, "step": 11414 }, { "epoch": 0.93, "grad_norm": 4.547030065393692, "learning_rate": 1.3856524021052696e-07, "loss": 0.6082, "step": 11415 }, { "epoch": 0.93, "grad_norm": 9.204413778944172, "learning_rate": 1.3825789081256812e-07, "loss": 0.7553, "step": 11416 }, { "epoch": 0.93, "grad_norm": 6.833168621264518, "learning_rate": 1.3795087787777494e-07, "loss": 0.5879, "step": 11417 }, { "epoch": 0.93, "grad_norm": 3.9049100321032477, "learning_rate": 1.3764420142739543e-07, "loss": 0.6388, "step": 11418 }, { "epoch": 0.93, "grad_norm": 3.9527516075623437, "learning_rate": 1.373378614826526e-07, "loss": 0.6209, "step": 11419 }, { "epoch": 0.93, "grad_norm": 6.063438106064102, "learning_rate": 1.3703185806474838e-07, "loss": 0.7724, "step": 11420 }, { "epoch": 0.93, "grad_norm": 4.026549451805104, "learning_rate": 1.367261911948592e-07, "loss": 0.7508, "step": 11421 }, { "epoch": 0.93, "grad_norm": 3.3084445404464207, "learning_rate": 1.364208608941392e-07, "loss": 0.5567, "step": 11422 }, { "epoch": 0.93, "grad_norm": 4.447217249115067, "learning_rate": 1.3611586718371871e-07, "loss": 0.589, "step": 11423 }, { "epoch": 0.93, "grad_norm": 2.7712875353268376, "learning_rate": 1.3581121008470644e-07, "loss": 0.5829, "step": 11424 }, { "epoch": 0.93, "grad_norm": 3.119636383680576, "learning_rate": 1.3550688961818602e-07, "loss": 0.7012, "step": 11425 }, { "epoch": 0.93, "grad_norm": 4.394405906267207, "learning_rate": 1.3520290580521734e-07, "loss": 0.6759, "step": 11426 }, { "epoch": 0.93, "grad_norm": 6.357454642491195, "learning_rate": 1.348992586668396e-07, "loss": 0.6017, "step": 11427 }, { "epoch": 0.93, "grad_norm": 4.5308063680424375, "learning_rate": 1.3459594822406607e-07, "loss": 0.6301, "step": 11428 }, { "epoch": 0.93, "grad_norm": 7.658950334816602, "learning_rate": 1.3429297449788825e-07, "loss": 0.9281, "step": 11429 }, { "epoch": 0.93, "grad_norm": 3.945622023335836, "learning_rate": 1.3399033750927327e-07, "loss": 0.7386, "step": 11430 }, { "epoch": 0.93, "grad_norm": 2.615094996485289, "learning_rate": 1.3368803727916658e-07, "loss": 0.5747, "step": 11431 }, { "epoch": 0.93, "grad_norm": 4.82000048914744, "learning_rate": 1.3338607382848811e-07, "loss": 0.6137, "step": 11432 }, { "epoch": 0.93, "grad_norm": 4.041368937015934, "learning_rate": 1.3308444717813562e-07, "loss": 0.604, "step": 11433 }, { "epoch": 0.93, "grad_norm": 8.452854769154436, "learning_rate": 1.3278315734898516e-07, "loss": 0.6382, "step": 11434 }, { "epoch": 0.93, "grad_norm": 3.735592822997537, "learning_rate": 1.3248220436188565e-07, "loss": 0.4741, "step": 11435 }, { "epoch": 0.93, "grad_norm": 3.7318679891647264, "learning_rate": 1.3218158823766646e-07, "loss": 0.7383, "step": 11436 }, { "epoch": 0.93, "grad_norm": 4.410538387383327, "learning_rate": 1.3188130899713102e-07, "loss": 0.7998, "step": 11437 }, { "epoch": 0.93, "grad_norm": 6.089742488283766, "learning_rate": 1.3158136666106215e-07, "loss": 0.6571, "step": 11438 }, { "epoch": 0.93, "grad_norm": 6.547591457278495, "learning_rate": 1.3128176125021653e-07, "loss": 0.6286, "step": 11439 }, { "epoch": 0.93, "grad_norm": 2.866402969474792, "learning_rate": 1.3098249278532814e-07, "loss": 0.6275, "step": 11440 }, { "epoch": 0.93, "grad_norm": 5.069474245946652, "learning_rate": 1.30683561287111e-07, "loss": 0.6068, "step": 11441 }, { "epoch": 0.93, "grad_norm": 3.3593093735361617, "learning_rate": 1.3038496677624968e-07, "loss": 0.7212, "step": 11442 }, { "epoch": 0.93, "grad_norm": 2.8443034469932384, "learning_rate": 1.3008670927341037e-07, "loss": 0.4995, "step": 11443 }, { "epoch": 0.93, "grad_norm": 2.7166993764204466, "learning_rate": 1.297887887992344e-07, "loss": 0.5849, "step": 11444 }, { "epoch": 0.93, "grad_norm": 2.750668719298837, "learning_rate": 1.2949120537434024e-07, "loss": 0.5086, "step": 11445 }, { "epoch": 0.93, "grad_norm": 10.18295930415933, "learning_rate": 1.2919395901932087e-07, "loss": 0.7181, "step": 11446 }, { "epoch": 0.93, "grad_norm": 4.335804195975361, "learning_rate": 1.288970497547498e-07, "loss": 0.6296, "step": 11447 }, { "epoch": 0.93, "grad_norm": 3.9923644499337567, "learning_rate": 1.2860047760117344e-07, "loss": 0.6693, "step": 11448 }, { "epoch": 0.93, "grad_norm": 3.829525422343924, "learning_rate": 1.28304242579117e-07, "loss": 0.7851, "step": 11449 }, { "epoch": 0.93, "grad_norm": 3.7603471583677086, "learning_rate": 1.280083447090813e-07, "loss": 0.6702, "step": 11450 }, { "epoch": 0.93, "grad_norm": 8.334774438674412, "learning_rate": 1.2771278401154496e-07, "loss": 0.6578, "step": 11451 }, { "epoch": 0.93, "grad_norm": 4.729800875038276, "learning_rate": 1.2741756050696275e-07, "loss": 0.7373, "step": 11452 }, { "epoch": 0.93, "grad_norm": 3.660710829268898, "learning_rate": 1.2712267421576497e-07, "loss": 0.682, "step": 11453 }, { "epoch": 0.93, "grad_norm": 3.1866141556836154, "learning_rate": 1.268281251583614e-07, "loss": 0.6424, "step": 11454 }, { "epoch": 0.93, "grad_norm": 3.880055463916356, "learning_rate": 1.265339133551341e-07, "loss": 0.6087, "step": 11455 }, { "epoch": 0.93, "grad_norm": 3.30578707194263, "learning_rate": 1.2624003882644674e-07, "loss": 0.7275, "step": 11456 }, { "epoch": 0.93, "grad_norm": 6.042591611668468, "learning_rate": 1.25946501592637e-07, "loss": 0.7509, "step": 11457 }, { "epoch": 0.93, "grad_norm": 2.663008684694424, "learning_rate": 1.2565330167401747e-07, "loss": 0.5814, "step": 11458 }, { "epoch": 0.93, "grad_norm": 18.815938197177395, "learning_rate": 1.253604390908819e-07, "loss": 0.7523, "step": 11459 }, { "epoch": 0.93, "grad_norm": 4.525150600376938, "learning_rate": 1.2506791386349693e-07, "loss": 0.7165, "step": 11460 }, { "epoch": 0.93, "grad_norm": 5.096269770592939, "learning_rate": 1.2477572601210796e-07, "loss": 0.7191, "step": 11461 }, { "epoch": 0.93, "grad_norm": 5.543790499615598, "learning_rate": 1.2448387555693498e-07, "loss": 0.6205, "step": 11462 }, { "epoch": 0.93, "grad_norm": 7.447283179539826, "learning_rate": 1.2419236251817735e-07, "loss": 0.7706, "step": 11463 }, { "epoch": 0.93, "grad_norm": 4.067276293011203, "learning_rate": 1.2390118691600838e-07, "loss": 0.7817, "step": 11464 }, { "epoch": 0.93, "grad_norm": 5.872606758268423, "learning_rate": 1.236103487705792e-07, "loss": 0.638, "step": 11465 }, { "epoch": 0.93, "grad_norm": 3.008389368255949, "learning_rate": 1.2331984810201869e-07, "loss": 0.6085, "step": 11466 }, { "epoch": 0.93, "grad_norm": 4.6678813332588405, "learning_rate": 1.2302968493043078e-07, "loss": 0.7046, "step": 11467 }, { "epoch": 0.93, "grad_norm": 3.354952063264658, "learning_rate": 1.2273985927589715e-07, "loss": 0.5203, "step": 11468 }, { "epoch": 0.93, "grad_norm": 4.388074469745559, "learning_rate": 1.2245037115847402e-07, "loss": 0.6352, "step": 11469 }, { "epoch": 0.93, "grad_norm": 4.363366641435926, "learning_rate": 1.2216122059819757e-07, "loss": 0.6699, "step": 11470 }, { "epoch": 0.93, "grad_norm": 4.5356698483628985, "learning_rate": 1.2187240761507736e-07, "loss": 0.9072, "step": 11471 }, { "epoch": 0.93, "grad_norm": 5.301440655624692, "learning_rate": 1.2158393222910235e-07, "loss": 0.5446, "step": 11472 }, { "epoch": 0.93, "grad_norm": 10.592560003178411, "learning_rate": 1.2129579446023665e-07, "loss": 0.7756, "step": 11473 }, { "epoch": 0.93, "grad_norm": 3.3125618821169147, "learning_rate": 1.2100799432842037e-07, "loss": 0.6231, "step": 11474 }, { "epoch": 0.93, "grad_norm": 3.066028756926879, "learning_rate": 1.2072053185357146e-07, "loss": 0.7225, "step": 11475 }, { "epoch": 0.93, "grad_norm": 3.222692303239116, "learning_rate": 1.2043340705558405e-07, "loss": 0.5683, "step": 11476 }, { "epoch": 0.93, "grad_norm": 7.062321074809714, "learning_rate": 1.201466199543294e-07, "loss": 0.6948, "step": 11477 }, { "epoch": 0.93, "grad_norm": 3.9573957233842276, "learning_rate": 1.1986017056965448e-07, "loss": 0.616, "step": 11478 }, { "epoch": 0.93, "grad_norm": 4.704614401418179, "learning_rate": 1.1957405892138397e-07, "loss": 0.68, "step": 11479 }, { "epoch": 0.93, "grad_norm": 4.5823885844589345, "learning_rate": 1.1928828502931867e-07, "loss": 0.609, "step": 11480 }, { "epoch": 0.93, "grad_norm": 6.304430283723194, "learning_rate": 1.1900284891323499e-07, "loss": 0.6984, "step": 11481 }, { "epoch": 0.93, "grad_norm": 3.4489755957564174, "learning_rate": 1.1871775059288771e-07, "loss": 0.6543, "step": 11482 }, { "epoch": 0.93, "grad_norm": 5.072475991937842, "learning_rate": 1.1843299008800712e-07, "loss": 0.6035, "step": 11483 }, { "epoch": 0.93, "grad_norm": 11.761782002413566, "learning_rate": 1.1814856741830027e-07, "loss": 0.6944, "step": 11484 }, { "epoch": 0.93, "grad_norm": 3.9315357284272667, "learning_rate": 1.1786448260345141e-07, "loss": 0.8571, "step": 11485 }, { "epoch": 0.93, "grad_norm": 7.267408502137549, "learning_rate": 1.175807356631209e-07, "loss": 0.7623, "step": 11486 }, { "epoch": 0.93, "grad_norm": 6.81775875186139, "learning_rate": 1.1729732661694582e-07, "loss": 0.6502, "step": 11487 }, { "epoch": 0.93, "grad_norm": 2.6358801138093613, "learning_rate": 1.1701425548453938e-07, "loss": 0.6235, "step": 11488 }, { "epoch": 0.93, "grad_norm": 12.343909592743612, "learning_rate": 1.1673152228549256e-07, "loss": 0.4986, "step": 11489 }, { "epoch": 0.93, "grad_norm": 6.2864122965684075, "learning_rate": 1.1644912703937194e-07, "loss": 0.573, "step": 11490 }, { "epoch": 0.93, "grad_norm": 5.355532116362043, "learning_rate": 1.1616706976572134e-07, "loss": 0.5227, "step": 11491 }, { "epoch": 0.93, "grad_norm": 3.3531169924161017, "learning_rate": 1.1588535048406013e-07, "loss": 0.6328, "step": 11492 }, { "epoch": 0.93, "grad_norm": 3.8756372832388304, "learning_rate": 1.1560396921388551e-07, "loss": 0.6779, "step": 11493 }, { "epoch": 0.93, "grad_norm": 9.408758765338888, "learning_rate": 1.1532292597467188e-07, "loss": 0.5711, "step": 11494 }, { "epoch": 0.93, "grad_norm": 3.4668620983892366, "learning_rate": 1.1504222078586757e-07, "loss": 0.5696, "step": 11495 }, { "epoch": 0.93, "grad_norm": 4.5817165494331435, "learning_rate": 1.1476185366689985e-07, "loss": 0.7049, "step": 11496 }, { "epoch": 0.93, "grad_norm": 4.154842225515676, "learning_rate": 1.1448182463717205e-07, "loss": 0.7533, "step": 11497 }, { "epoch": 0.93, "grad_norm": 3.9670265430670297, "learning_rate": 1.142021337160637e-07, "loss": 0.7129, "step": 11498 }, { "epoch": 0.93, "grad_norm": 3.421862648752209, "learning_rate": 1.1392278092293041e-07, "loss": 0.8248, "step": 11499 }, { "epoch": 0.93, "grad_norm": 4.1245467483504745, "learning_rate": 1.1364376627710727e-07, "loss": 0.5923, "step": 11500 }, { "epoch": 0.93, "grad_norm": 10.428484925579507, "learning_rate": 1.1336508979790217e-07, "loss": 0.7293, "step": 11501 }, { "epoch": 0.93, "grad_norm": 5.31349496955677, "learning_rate": 1.1308675150460136e-07, "loss": 0.6367, "step": 11502 }, { "epoch": 0.93, "grad_norm": 4.807113153984954, "learning_rate": 1.1280875141646774e-07, "loss": 0.565, "step": 11503 }, { "epoch": 0.93, "grad_norm": 4.779421150724952, "learning_rate": 1.1253108955274094e-07, "loss": 0.7455, "step": 11504 }, { "epoch": 0.93, "grad_norm": 8.916204004292753, "learning_rate": 1.1225376593263726e-07, "loss": 0.5611, "step": 11505 }, { "epoch": 0.93, "grad_norm": 3.1897827783342025, "learning_rate": 1.11976780575348e-07, "loss": 0.7628, "step": 11506 }, { "epoch": 0.93, "grad_norm": 3.125904759399018, "learning_rate": 1.1170013350004449e-07, "loss": 0.616, "step": 11507 }, { "epoch": 0.93, "grad_norm": 3.6804669900038514, "learning_rate": 1.1142382472586921e-07, "loss": 0.5992, "step": 11508 }, { "epoch": 0.93, "grad_norm": 3.479507763164218, "learning_rate": 1.111478542719474e-07, "loss": 0.6898, "step": 11509 }, { "epoch": 0.93, "grad_norm": 3.166576669367274, "learning_rate": 1.1087222215737603e-07, "loss": 0.6678, "step": 11510 }, { "epoch": 0.93, "grad_norm": 6.5907576553474, "learning_rate": 1.1059692840123204e-07, "loss": 0.6673, "step": 11511 }, { "epoch": 0.94, "grad_norm": 3.949853025962199, "learning_rate": 1.1032197302256686e-07, "loss": 0.6254, "step": 11512 }, { "epoch": 0.94, "grad_norm": 4.305377308077892, "learning_rate": 1.1004735604040862e-07, "loss": 0.6232, "step": 11513 }, { "epoch": 0.94, "grad_norm": 7.385771144314364, "learning_rate": 1.0977307747376431e-07, "loss": 0.737, "step": 11514 }, { "epoch": 0.94, "grad_norm": 3.9879489698615607, "learning_rate": 1.0949913734161266e-07, "loss": 0.5812, "step": 11515 }, { "epoch": 0.94, "grad_norm": 2.718159896092306, "learning_rate": 1.0922553566291516e-07, "loss": 0.6668, "step": 11516 }, { "epoch": 0.94, "grad_norm": 6.151474753800585, "learning_rate": 1.0895227245660444e-07, "loss": 0.7592, "step": 11517 }, { "epoch": 0.94, "grad_norm": 3.175089066326356, "learning_rate": 1.0867934774159372e-07, "loss": 0.8187, "step": 11518 }, { "epoch": 0.94, "grad_norm": 4.131506931443423, "learning_rate": 1.0840676153677066e-07, "loss": 0.5617, "step": 11519 }, { "epoch": 0.94, "grad_norm": 2.6311422849833903, "learning_rate": 1.0813451386099904e-07, "loss": 0.6008, "step": 11520 }, { "epoch": 0.94, "grad_norm": 5.0694915706349555, "learning_rate": 1.0786260473312104e-07, "loss": 0.6224, "step": 11521 }, { "epoch": 0.94, "grad_norm": 2.406061541795044, "learning_rate": 1.0759103417195438e-07, "loss": 0.6534, "step": 11522 }, { "epoch": 0.94, "grad_norm": 2.968554981058841, "learning_rate": 1.0731980219629346e-07, "loss": 0.6147, "step": 11523 }, { "epoch": 0.94, "grad_norm": 3.8488686832202417, "learning_rate": 1.0704890882490827e-07, "loss": 0.4803, "step": 11524 }, { "epoch": 0.94, "grad_norm": 5.342749147250126, "learning_rate": 1.0677835407654824e-07, "loss": 0.5724, "step": 11525 }, { "epoch": 0.94, "grad_norm": 38.42224631027316, "learning_rate": 1.0650813796993508e-07, "loss": 0.546, "step": 11526 }, { "epoch": 0.94, "grad_norm": 3.243083961626768, "learning_rate": 1.0623826052377217e-07, "loss": 0.5683, "step": 11527 }, { "epoch": 0.94, "grad_norm": 3.339100282442589, "learning_rate": 1.0596872175673456e-07, "loss": 0.5925, "step": 11528 }, { "epoch": 0.94, "grad_norm": 6.323886679719286, "learning_rate": 1.0569952168747677e-07, "loss": 0.7036, "step": 11529 }, { "epoch": 0.94, "grad_norm": 4.297906812413702, "learning_rate": 1.0543066033462946e-07, "loss": 0.7611, "step": 11530 }, { "epoch": 0.94, "grad_norm": 2.5633968139913996, "learning_rate": 1.0516213771679885e-07, "loss": 0.6412, "step": 11531 }, { "epoch": 0.94, "grad_norm": 5.471481590573864, "learning_rate": 1.0489395385256896e-07, "loss": 0.6177, "step": 11532 }, { "epoch": 0.94, "grad_norm": 3.1911007804717975, "learning_rate": 1.046261087604994e-07, "loss": 0.6781, "step": 11533 }, { "epoch": 0.94, "grad_norm": 3.5118392570540125, "learning_rate": 1.0435860245912754e-07, "loss": 0.7317, "step": 11534 }, { "epoch": 0.94, "grad_norm": 2.7864538147219746, "learning_rate": 1.0409143496696528e-07, "loss": 0.7429, "step": 11535 }, { "epoch": 0.94, "grad_norm": 3.3808046819415942, "learning_rate": 1.038246063025028e-07, "loss": 0.6297, "step": 11536 }, { "epoch": 0.94, "grad_norm": 9.732503340033963, "learning_rate": 1.03558116484207e-07, "loss": 0.6748, "step": 11537 }, { "epoch": 0.94, "grad_norm": 4.293665581081362, "learning_rate": 1.0329196553051924e-07, "loss": 0.5564, "step": 11538 }, { "epoch": 0.94, "grad_norm": 3.660290581264573, "learning_rate": 1.0302615345986034e-07, "loss": 0.6979, "step": 11539 }, { "epoch": 0.94, "grad_norm": 3.3771867679522973, "learning_rate": 1.0276068029062559e-07, "loss": 0.7345, "step": 11540 }, { "epoch": 0.94, "grad_norm": 4.516735338652766, "learning_rate": 1.024955460411875e-07, "loss": 0.6474, "step": 11541 }, { "epoch": 0.94, "grad_norm": 3.6534943145331114, "learning_rate": 1.0223075072989418e-07, "loss": 0.5188, "step": 11542 }, { "epoch": 0.94, "grad_norm": 5.811678301905325, "learning_rate": 1.019662943750721e-07, "loss": 0.7598, "step": 11543 }, { "epoch": 0.94, "grad_norm": 3.0269610934528606, "learning_rate": 1.0170217699502272e-07, "loss": 0.5633, "step": 11544 }, { "epoch": 0.94, "grad_norm": 6.031090401356234, "learning_rate": 1.0143839860802529e-07, "loss": 0.6686, "step": 11545 }, { "epoch": 0.94, "grad_norm": 3.9537633934158403, "learning_rate": 1.0117495923233467e-07, "loss": 0.78, "step": 11546 }, { "epoch": 0.94, "grad_norm": 2.7836766586563786, "learning_rate": 1.0091185888618238e-07, "loss": 0.6807, "step": 11547 }, { "epoch": 0.94, "grad_norm": 5.525225967591341, "learning_rate": 1.0064909758777719e-07, "loss": 0.5995, "step": 11548 }, { "epoch": 0.94, "grad_norm": 4.036200857961824, "learning_rate": 1.0038667535530233e-07, "loss": 0.6269, "step": 11549 }, { "epoch": 0.94, "grad_norm": 4.768220441354685, "learning_rate": 1.001245922069205e-07, "loss": 0.6867, "step": 11550 }, { "epoch": 0.94, "grad_norm": 3.631182825766013, "learning_rate": 9.98628481607683e-08, "loss": 0.6584, "step": 11551 }, { "epoch": 0.94, "grad_norm": 3.521279969963515, "learning_rate": 9.960144323496179e-08, "loss": 0.6244, "step": 11552 }, { "epoch": 0.94, "grad_norm": 8.285872302571256, "learning_rate": 9.934037744759096e-08, "loss": 0.6541, "step": 11553 }, { "epoch": 0.94, "grad_norm": 5.755804522446817, "learning_rate": 9.907965081672244e-08, "loss": 0.6331, "step": 11554 }, { "epoch": 0.94, "grad_norm": 3.5099633342407204, "learning_rate": 9.881926336040126e-08, "loss": 0.7255, "step": 11555 }, { "epoch": 0.94, "grad_norm": 5.651743321305104, "learning_rate": 9.855921509664745e-08, "loss": 0.6239, "step": 11556 }, { "epoch": 0.94, "grad_norm": 3.7542328891519845, "learning_rate": 9.829950604345772e-08, "loss": 0.6934, "step": 11557 }, { "epoch": 0.94, "grad_norm": 3.4735214247980846, "learning_rate": 9.804013621880548e-08, "loss": 0.6757, "step": 11558 }, { "epoch": 0.94, "grad_norm": 3.3913642322877506, "learning_rate": 9.778110564064191e-08, "loss": 0.6834, "step": 11559 }, { "epoch": 0.94, "grad_norm": 2.984380678780318, "learning_rate": 9.752241432689214e-08, "loss": 0.7108, "step": 11560 }, { "epoch": 0.94, "grad_norm": 3.1182360513861322, "learning_rate": 9.72640622954607e-08, "loss": 0.6659, "step": 11561 }, { "epoch": 0.94, "grad_norm": 4.439015078897651, "learning_rate": 9.700604956422554e-08, "loss": 0.5725, "step": 11562 }, { "epoch": 0.94, "grad_norm": 3.3307809303430456, "learning_rate": 9.674837615104349e-08, "loss": 0.7122, "step": 11563 }, { "epoch": 0.94, "grad_norm": 3.7068015658384317, "learning_rate": 9.649104207374749e-08, "loss": 0.6315, "step": 11564 }, { "epoch": 0.94, "grad_norm": 2.3523424097740366, "learning_rate": 9.623404735014608e-08, "loss": 0.6933, "step": 11565 }, { "epoch": 0.94, "grad_norm": 5.176665596445569, "learning_rate": 9.597739199802614e-08, "loss": 0.5512, "step": 11566 }, { "epoch": 0.94, "grad_norm": 3.475544379390946, "learning_rate": 9.572107603514846e-08, "loss": 0.5305, "step": 11567 }, { "epoch": 0.94, "grad_norm": 3.675809172104682, "learning_rate": 9.54650994792522e-08, "loss": 0.5767, "step": 11568 }, { "epoch": 0.94, "grad_norm": 5.737765280557999, "learning_rate": 9.520946234805206e-08, "loss": 0.5663, "step": 11569 }, { "epoch": 0.94, "grad_norm": 2.9182776775666692, "learning_rate": 9.495416465924113e-08, "loss": 0.5466, "step": 11570 }, { "epoch": 0.94, "grad_norm": 3.6868794542786842, "learning_rate": 9.469920643048636e-08, "loss": 0.6532, "step": 11571 }, { "epoch": 0.94, "grad_norm": 5.0995567761750795, "learning_rate": 9.444458767943254e-08, "loss": 0.7676, "step": 11572 }, { "epoch": 0.94, "grad_norm": 2.697674843721375, "learning_rate": 9.419030842370114e-08, "loss": 0.6472, "step": 11573 }, { "epoch": 0.94, "grad_norm": 5.4233984982670895, "learning_rate": 9.393636868089029e-08, "loss": 0.823, "step": 11574 }, { "epoch": 0.94, "grad_norm": 3.6890610300496998, "learning_rate": 9.368276846857427e-08, "loss": 0.633, "step": 11575 }, { "epoch": 0.94, "grad_norm": 10.782577575098676, "learning_rate": 9.342950780430238e-08, "loss": 0.6728, "step": 11576 }, { "epoch": 0.94, "grad_norm": 4.266639622208362, "learning_rate": 9.317658670560336e-08, "loss": 0.6915, "step": 11577 }, { "epoch": 0.94, "grad_norm": 2.9196072722320987, "learning_rate": 9.292400518998102e-08, "loss": 0.653, "step": 11578 }, { "epoch": 0.94, "grad_norm": 4.968598092054262, "learning_rate": 9.267176327491412e-08, "loss": 0.7488, "step": 11579 }, { "epoch": 0.94, "grad_norm": 3.858103519916151, "learning_rate": 9.241986097786093e-08, "loss": 0.6532, "step": 11580 }, { "epoch": 0.94, "grad_norm": 2.4807037938636998, "learning_rate": 9.216829831625363e-08, "loss": 0.6437, "step": 11581 }, { "epoch": 0.94, "grad_norm": 4.200296841777683, "learning_rate": 9.191707530750271e-08, "loss": 0.7319, "step": 11582 }, { "epoch": 0.94, "grad_norm": 4.18615010952583, "learning_rate": 9.166619196899318e-08, "loss": 0.6507, "step": 11583 }, { "epoch": 0.94, "grad_norm": 4.572003535381461, "learning_rate": 9.141564831808947e-08, "loss": 0.6512, "step": 11584 }, { "epoch": 0.94, "grad_norm": 2.910539420515676, "learning_rate": 9.116544437212993e-08, "loss": 0.7063, "step": 11585 }, { "epoch": 0.94, "grad_norm": 7.726111085651036, "learning_rate": 9.091558014842961e-08, "loss": 0.5578, "step": 11586 }, { "epoch": 0.94, "grad_norm": 6.525478925624177, "learning_rate": 9.066605566428188e-08, "loss": 0.6738, "step": 11587 }, { "epoch": 0.94, "grad_norm": 3.350555394766436, "learning_rate": 9.041687093695461e-08, "loss": 0.6915, "step": 11588 }, { "epoch": 0.94, "grad_norm": 2.838150938541724, "learning_rate": 9.01680259836929e-08, "loss": 0.6678, "step": 11589 }, { "epoch": 0.94, "grad_norm": 4.639352425093215, "learning_rate": 8.991952082171851e-08, "loss": 0.588, "step": 11590 }, { "epoch": 0.94, "grad_norm": 3.3533410319936827, "learning_rate": 8.967135546823047e-08, "loss": 0.5919, "step": 11591 }, { "epoch": 0.94, "grad_norm": 5.016141101764093, "learning_rate": 8.942352994040227e-08, "loss": 0.6153, "step": 11592 }, { "epoch": 0.94, "grad_norm": 15.523717066604496, "learning_rate": 8.917604425538518e-08, "loss": 0.6691, "step": 11593 }, { "epoch": 0.94, "grad_norm": 10.429911331746677, "learning_rate": 8.892889843030717e-08, "loss": 0.6266, "step": 11594 }, { "epoch": 0.94, "grad_norm": 2.931049488674767, "learning_rate": 8.868209248227178e-08, "loss": 0.7576, "step": 11595 }, { "epoch": 0.94, "grad_norm": 3.821839098993725, "learning_rate": 8.843562642835979e-08, "loss": 0.5988, "step": 11596 }, { "epoch": 0.94, "grad_norm": 5.566454634037884, "learning_rate": 8.818950028562811e-08, "loss": 0.7029, "step": 11597 }, { "epoch": 0.94, "grad_norm": 3.961223750544519, "learning_rate": 8.794371407111091e-08, "loss": 0.6638, "step": 11598 }, { "epoch": 0.94, "grad_norm": 4.36013230181537, "learning_rate": 8.769826780181678e-08, "loss": 0.5431, "step": 11599 }, { "epoch": 0.94, "grad_norm": 5.5354828177064785, "learning_rate": 8.745316149473382e-08, "loss": 0.6708, "step": 11600 }, { "epoch": 0.94, "grad_norm": 2.726120754900734, "learning_rate": 8.720839516682344e-08, "loss": 0.6106, "step": 11601 }, { "epoch": 0.94, "grad_norm": 3.3694560656560912, "learning_rate": 8.6963968835026e-08, "loss": 0.4937, "step": 11602 }, { "epoch": 0.94, "grad_norm": 4.784349168717699, "learning_rate": 8.671988251625685e-08, "loss": 0.6154, "step": 11603 }, { "epoch": 0.94, "grad_norm": 3.4841503991185947, "learning_rate": 8.647613622740746e-08, "loss": 0.6955, "step": 11604 }, { "epoch": 0.94, "grad_norm": 3.3273731289619217, "learning_rate": 8.623272998534882e-08, "loss": 0.6842, "step": 11605 }, { "epoch": 0.94, "grad_norm": 9.537243778071526, "learning_rate": 8.598966380692408e-08, "loss": 0.5794, "step": 11606 }, { "epoch": 0.94, "grad_norm": 2.9748760151315534, "learning_rate": 8.574693770895648e-08, "loss": 0.7394, "step": 11607 }, { "epoch": 0.94, "grad_norm": 4.7055747418133365, "learning_rate": 8.550455170824313e-08, "loss": 0.7158, "step": 11608 }, { "epoch": 0.94, "grad_norm": 3.8182130679469277, "learning_rate": 8.526250582155893e-08, "loss": 0.6823, "step": 11609 }, { "epoch": 0.94, "grad_norm": 3.163062967380128, "learning_rate": 8.502080006565495e-08, "loss": 0.7314, "step": 11610 }, { "epoch": 0.94, "grad_norm": 6.835815105432434, "learning_rate": 8.477943445725889e-08, "loss": 0.6437, "step": 11611 }, { "epoch": 0.94, "grad_norm": 5.743579343542501, "learning_rate": 8.45384090130752e-08, "loss": 0.5644, "step": 11612 }, { "epoch": 0.94, "grad_norm": 4.744159961895033, "learning_rate": 8.429772374978384e-08, "loss": 0.782, "step": 11613 }, { "epoch": 0.94, "grad_norm": 4.792524804149734, "learning_rate": 8.405737868404151e-08, "loss": 0.6003, "step": 11614 }, { "epoch": 0.94, "grad_norm": 4.3166922619617765, "learning_rate": 8.381737383248156e-08, "loss": 0.6129, "step": 11615 }, { "epoch": 0.94, "grad_norm": 6.499222075008327, "learning_rate": 8.357770921171516e-08, "loss": 0.574, "step": 11616 }, { "epoch": 0.94, "grad_norm": 6.521274010017859, "learning_rate": 8.333838483832679e-08, "loss": 0.5296, "step": 11617 }, { "epoch": 0.94, "grad_norm": 5.855771900680462, "learning_rate": 8.309940072888046e-08, "loss": 0.6511, "step": 11618 }, { "epoch": 0.94, "grad_norm": 4.772660268051909, "learning_rate": 8.286075689991457e-08, "loss": 0.7796, "step": 11619 }, { "epoch": 0.94, "grad_norm": 2.980918799755801, "learning_rate": 8.262245336794594e-08, "loss": 0.6253, "step": 11620 }, { "epoch": 0.94, "grad_norm": 5.179517333585579, "learning_rate": 8.238449014946526e-08, "loss": 0.6407, "step": 11621 }, { "epoch": 0.94, "grad_norm": 3.1512457581422826, "learning_rate": 8.214686726094157e-08, "loss": 0.6343, "step": 11622 }, { "epoch": 0.94, "grad_norm": 3.82226874289729, "learning_rate": 8.19095847188206e-08, "loss": 0.5487, "step": 11623 }, { "epoch": 0.94, "grad_norm": 7.873913725748984, "learning_rate": 8.167264253952256e-08, "loss": 0.8426, "step": 11624 }, { "epoch": 0.94, "grad_norm": 6.504261537819809, "learning_rate": 8.143604073944656e-08, "loss": 0.7083, "step": 11625 }, { "epoch": 0.94, "grad_norm": 9.504890110119282, "learning_rate": 8.11997793349667e-08, "loss": 0.615, "step": 11626 }, { "epoch": 0.94, "grad_norm": 25.717031275301753, "learning_rate": 8.096385834243325e-08, "loss": 0.4832, "step": 11627 }, { "epoch": 0.94, "grad_norm": 2.6888519422814947, "learning_rate": 8.072827777817316e-08, "loss": 0.6875, "step": 11628 }, { "epoch": 0.94, "grad_norm": 3.711849887015135, "learning_rate": 8.049303765849059e-08, "loss": 0.5983, "step": 11629 }, { "epoch": 0.94, "grad_norm": 4.15815723616945, "learning_rate": 8.025813799966586e-08, "loss": 0.7489, "step": 11630 }, { "epoch": 0.94, "grad_norm": 2.924321452685983, "learning_rate": 8.002357881795486e-08, "loss": 0.6797, "step": 11631 }, { "epoch": 0.94, "grad_norm": 2.9912399391153275, "learning_rate": 7.978936012959126e-08, "loss": 0.6462, "step": 11632 }, { "epoch": 0.94, "grad_norm": 2.1665609563727517, "learning_rate": 7.955548195078433e-08, "loss": 0.5375, "step": 11633 }, { "epoch": 0.94, "grad_norm": 6.145731813981746, "learning_rate": 7.932194429771945e-08, "loss": 0.6375, "step": 11634 }, { "epoch": 0.94, "grad_norm": 8.795368409432303, "learning_rate": 7.908874718655923e-08, "loss": 0.6494, "step": 11635 }, { "epoch": 0.95, "grad_norm": 3.2244219716125446, "learning_rate": 7.88558906334419e-08, "loss": 0.5431, "step": 11636 }, { "epoch": 0.95, "grad_norm": 2.9539547923962974, "learning_rate": 7.862337465448344e-08, "loss": 0.629, "step": 11637 }, { "epoch": 0.95, "grad_norm": 3.97953857435795, "learning_rate": 7.839119926577488e-08, "loss": 0.6806, "step": 11638 }, { "epoch": 0.95, "grad_norm": 5.762450643057052, "learning_rate": 7.815936448338446e-08, "loss": 0.6174, "step": 11639 }, { "epoch": 0.95, "grad_norm": 2.7535604536714646, "learning_rate": 7.792787032335657e-08, "loss": 0.4925, "step": 11640 }, { "epoch": 0.95, "grad_norm": 5.890220407167974, "learning_rate": 7.769671680171232e-08, "loss": 0.6196, "step": 11641 }, { "epoch": 0.95, "grad_norm": 4.809098251359767, "learning_rate": 7.74659039344483e-08, "loss": 0.6616, "step": 11642 }, { "epoch": 0.95, "grad_norm": 4.490832267627192, "learning_rate": 7.723543173753789e-08, "loss": 0.6091, "step": 11643 }, { "epoch": 0.95, "grad_norm": 9.444773869167516, "learning_rate": 7.700530022693275e-08, "loss": 0.68, "step": 11644 }, { "epoch": 0.95, "grad_norm": 2.343394088195895, "learning_rate": 7.677550941855793e-08, "loss": 0.6036, "step": 11645 }, { "epoch": 0.95, "grad_norm": 4.606753780027665, "learning_rate": 7.654605932831793e-08, "loss": 0.6125, "step": 11646 }, { "epoch": 0.95, "grad_norm": 6.98710454191637, "learning_rate": 7.631694997209061e-08, "loss": 0.6896, "step": 11647 }, { "epoch": 0.95, "grad_norm": 5.865091923210574, "learning_rate": 7.60881813657327e-08, "loss": 0.7415, "step": 11648 }, { "epoch": 0.95, "grad_norm": 6.0396325940630975, "learning_rate": 7.585975352507547e-08, "loss": 0.5673, "step": 11649 }, { "epoch": 0.95, "grad_norm": 3.24069278097471, "learning_rate": 7.5631666465929e-08, "loss": 0.746, "step": 11650 }, { "epoch": 0.95, "grad_norm": 6.487372615802543, "learning_rate": 7.540392020407739e-08, "loss": 0.7738, "step": 11651 }, { "epoch": 0.95, "grad_norm": 3.109664530159642, "learning_rate": 7.517651475528187e-08, "loss": 0.538, "step": 11652 }, { "epoch": 0.95, "grad_norm": 9.15964529060189, "learning_rate": 7.49494501352821e-08, "loss": 0.7096, "step": 11653 }, { "epoch": 0.95, "grad_norm": 4.857504601036801, "learning_rate": 7.472272635978995e-08, "loss": 0.6357, "step": 11654 }, { "epoch": 0.95, "grad_norm": 5.36471050006214, "learning_rate": 7.44963434444973e-08, "loss": 0.5389, "step": 11655 }, { "epoch": 0.95, "grad_norm": 3.33508908282511, "learning_rate": 7.427030140507108e-08, "loss": 0.6442, "step": 11656 }, { "epoch": 0.95, "grad_norm": 4.391788444758637, "learning_rate": 7.404460025715543e-08, "loss": 0.7423, "step": 11657 }, { "epoch": 0.95, "grad_norm": 6.528137374660082, "learning_rate": 7.381924001636953e-08, "loss": 0.7136, "step": 11658 }, { "epoch": 0.95, "grad_norm": 4.554457221550705, "learning_rate": 7.359422069831035e-08, "loss": 0.7168, "step": 11659 }, { "epoch": 0.95, "grad_norm": 2.4903430032411733, "learning_rate": 7.336954231855042e-08, "loss": 0.4979, "step": 11660 }, { "epoch": 0.95, "grad_norm": 4.360894000283392, "learning_rate": 7.314520489263787e-08, "loss": 0.6154, "step": 11661 }, { "epoch": 0.95, "grad_norm": 4.090449773781666, "learning_rate": 7.29212084361003e-08, "loss": 0.7313, "step": 11662 }, { "epoch": 0.95, "grad_norm": 2.935725075978512, "learning_rate": 7.269755296443748e-08, "loss": 0.6681, "step": 11663 }, { "epoch": 0.95, "grad_norm": 4.669406930782915, "learning_rate": 7.247423849312984e-08, "loss": 0.5562, "step": 11664 }, { "epoch": 0.95, "grad_norm": 3.336626173636811, "learning_rate": 7.225126503763057e-08, "loss": 0.6231, "step": 11665 }, { "epoch": 0.95, "grad_norm": 3.636211872373431, "learning_rate": 7.202863261337178e-08, "loss": 0.7553, "step": 11666 }, { "epoch": 0.95, "grad_norm": 3.150574842900758, "learning_rate": 7.180634123576058e-08, "loss": 0.647, "step": 11667 }, { "epoch": 0.95, "grad_norm": 3.8888324209728045, "learning_rate": 7.158439092018077e-08, "loss": 0.8087, "step": 11668 }, { "epoch": 0.95, "grad_norm": 8.136560649361392, "learning_rate": 7.13627816819934e-08, "loss": 0.72, "step": 11669 }, { "epoch": 0.95, "grad_norm": 2.9991879772076735, "learning_rate": 7.114151353653399e-08, "loss": 0.5567, "step": 11670 }, { "epoch": 0.95, "grad_norm": 2.7172871566518326, "learning_rate": 7.092058649911748e-08, "loss": 0.5748, "step": 11671 }, { "epoch": 0.95, "grad_norm": 2.9132600838390226, "learning_rate": 7.070000058503169e-08, "loss": 0.6371, "step": 11672 }, { "epoch": 0.95, "grad_norm": 5.962452321979046, "learning_rate": 7.047975580954436e-08, "loss": 0.6457, "step": 11673 }, { "epoch": 0.95, "grad_norm": 11.20241206756556, "learning_rate": 7.025985218789555e-08, "loss": 0.6162, "step": 11674 }, { "epoch": 0.95, "grad_norm": 3.346096299542828, "learning_rate": 7.004028973530586e-08, "loss": 0.8032, "step": 11675 }, { "epoch": 0.95, "grad_norm": 5.851467428148096, "learning_rate": 6.982106846696979e-08, "loss": 0.7072, "step": 11676 }, { "epoch": 0.95, "grad_norm": 11.020489786730803, "learning_rate": 6.9602188398058e-08, "loss": 0.5978, "step": 11677 }, { "epoch": 0.95, "grad_norm": 4.001187987405929, "learning_rate": 6.938364954372001e-08, "loss": 0.7337, "step": 11678 }, { "epoch": 0.95, "grad_norm": 7.460073231275004, "learning_rate": 6.91654519190782e-08, "loss": 0.6974, "step": 11679 }, { "epoch": 0.95, "grad_norm": 4.553490945055479, "learning_rate": 6.894759553923547e-08, "loss": 0.7167, "step": 11680 }, { "epoch": 0.95, "grad_norm": 2.8631416112897083, "learning_rate": 6.873008041926643e-08, "loss": 0.6052, "step": 11681 }, { "epoch": 0.95, "grad_norm": 10.48298385823776, "learning_rate": 6.851290657422627e-08, "loss": 0.6784, "step": 11682 }, { "epoch": 0.95, "grad_norm": 3.320154206976553, "learning_rate": 6.829607401914462e-08, "loss": 0.6991, "step": 11683 }, { "epoch": 0.95, "grad_norm": 5.932955269226586, "learning_rate": 6.807958276902615e-08, "loss": 0.6533, "step": 11684 }, { "epoch": 0.95, "grad_norm": 5.223752866619851, "learning_rate": 6.786343283885554e-08, "loss": 0.6863, "step": 11685 }, { "epoch": 0.95, "grad_norm": 6.974001810193784, "learning_rate": 6.764762424359029e-08, "loss": 0.691, "step": 11686 }, { "epoch": 0.95, "grad_norm": 4.3220393808556, "learning_rate": 6.743215699816564e-08, "loss": 0.8174, "step": 11687 }, { "epoch": 0.95, "grad_norm": 2.855817887731506, "learning_rate": 6.721703111749412e-08, "loss": 0.6676, "step": 11688 }, { "epoch": 0.95, "grad_norm": 2.7986218182754286, "learning_rate": 6.700224661646326e-08, "loss": 0.6008, "step": 11689 }, { "epoch": 0.95, "grad_norm": 4.217314689381992, "learning_rate": 6.678780350993786e-08, "loss": 0.7453, "step": 11690 }, { "epoch": 0.95, "grad_norm": 4.096674792101529, "learning_rate": 6.657370181275823e-08, "loss": 0.6831, "step": 11691 }, { "epoch": 0.95, "grad_norm": 3.3553204887401304, "learning_rate": 6.635994153974257e-08, "loss": 0.7659, "step": 11692 }, { "epoch": 0.95, "grad_norm": 3.758865732524541, "learning_rate": 6.61465227056829e-08, "loss": 0.5297, "step": 11693 }, { "epoch": 0.95, "grad_norm": 4.082034937843226, "learning_rate": 6.593344532535073e-08, "loss": 0.6807, "step": 11694 }, { "epoch": 0.95, "grad_norm": 3.0757607724979907, "learning_rate": 6.572070941349095e-08, "loss": 0.7898, "step": 11695 }, { "epoch": 0.95, "grad_norm": 2.7172509878037627, "learning_rate": 6.550831498482679e-08, "loss": 0.6053, "step": 11696 }, { "epoch": 0.95, "grad_norm": 8.806184060462922, "learning_rate": 6.529626205405759e-08, "loss": 0.7493, "step": 11697 }, { "epoch": 0.95, "grad_norm": 3.9409835634147736, "learning_rate": 6.508455063585883e-08, "loss": 0.725, "step": 11698 }, { "epoch": 0.95, "grad_norm": 2.081419349184579, "learning_rate": 6.487318074488159e-08, "loss": 0.6261, "step": 11699 }, { "epoch": 0.95, "grad_norm": 12.1516430982968, "learning_rate": 6.466215239575469e-08, "loss": 0.5925, "step": 11700 }, { "epoch": 0.95, "grad_norm": 5.661992939388067, "learning_rate": 6.445146560308202e-08, "loss": 0.5893, "step": 11701 }, { "epoch": 0.95, "grad_norm": 3.8211793726593872, "learning_rate": 6.42411203814447e-08, "loss": 0.5849, "step": 11702 }, { "epoch": 0.95, "grad_norm": 4.167181838430979, "learning_rate": 6.403111674539996e-08, "loss": 0.6441, "step": 11703 }, { "epoch": 0.95, "grad_norm": 5.5875787897976705, "learning_rate": 6.38214547094812e-08, "loss": 0.5131, "step": 11704 }, { "epoch": 0.95, "grad_norm": 2.673228563884166, "learning_rate": 6.361213428819901e-08, "loss": 0.6417, "step": 11705 }, { "epoch": 0.95, "grad_norm": 3.0925866395972355, "learning_rate": 6.340315549603903e-08, "loss": 0.7816, "step": 11706 }, { "epoch": 0.95, "grad_norm": 3.326546306893215, "learning_rate": 6.319451834746415e-08, "loss": 0.7483, "step": 11707 }, { "epoch": 0.95, "grad_norm": 5.7795352368544854, "learning_rate": 6.298622285691337e-08, "loss": 0.632, "step": 11708 }, { "epoch": 0.95, "grad_norm": 3.5534990874002412, "learning_rate": 6.277826903880125e-08, "loss": 0.6951, "step": 11709 }, { "epoch": 0.95, "grad_norm": 4.161709880855212, "learning_rate": 6.257065690752129e-08, "loss": 0.6208, "step": 11710 }, { "epoch": 0.95, "grad_norm": 8.90725377683745, "learning_rate": 6.236338647743922e-08, "loss": 0.6842, "step": 11711 }, { "epoch": 0.95, "grad_norm": 4.4160285104456465, "learning_rate": 6.215645776290191e-08, "loss": 0.678, "step": 11712 }, { "epoch": 0.95, "grad_norm": 3.311361410569505, "learning_rate": 6.194987077822845e-08, "loss": 0.6691, "step": 11713 }, { "epoch": 0.95, "grad_norm": 3.091133285935937, "learning_rate": 6.174362553771685e-08, "loss": 0.6524, "step": 11714 }, { "epoch": 0.95, "grad_norm": 3.6902023663328367, "learning_rate": 6.153772205563957e-08, "loss": 0.5426, "step": 11715 }, { "epoch": 0.95, "grad_norm": 2.8404791017445543, "learning_rate": 6.133216034624745e-08, "loss": 0.7112, "step": 11716 }, { "epoch": 0.95, "grad_norm": 5.033195540933517, "learning_rate": 6.112694042376632e-08, "loss": 0.5975, "step": 11717 }, { "epoch": 0.95, "grad_norm": 4.558822748664367, "learning_rate": 6.092206230239817e-08, "loss": 0.6435, "step": 11718 }, { "epoch": 0.95, "grad_norm": 3.6303853319449413, "learning_rate": 6.071752599632274e-08, "loss": 0.5443, "step": 11719 }, { "epoch": 0.95, "grad_norm": 6.148070415844036, "learning_rate": 6.051333151969484e-08, "loss": 0.6088, "step": 11720 }, { "epoch": 0.95, "grad_norm": 9.901224407908957, "learning_rate": 6.030947888664595e-08, "loss": 0.7554, "step": 11721 }, { "epoch": 0.95, "grad_norm": 8.911564597551182, "learning_rate": 6.010596811128366e-08, "loss": 0.6903, "step": 11722 }, { "epoch": 0.95, "grad_norm": 10.908731564824842, "learning_rate": 5.990279920769227e-08, "loss": 0.6494, "step": 11723 }, { "epoch": 0.95, "grad_norm": 3.30618045758593, "learning_rate": 5.969997218993328e-08, "loss": 0.878, "step": 11724 }, { "epoch": 0.95, "grad_norm": 3.0259451785408618, "learning_rate": 5.9497487072042726e-08, "loss": 0.6883, "step": 11725 }, { "epoch": 0.95, "grad_norm": 4.340156293807322, "learning_rate": 5.929534386803437e-08, "loss": 0.5766, "step": 11726 }, { "epoch": 0.95, "grad_norm": 3.5549552286110973, "learning_rate": 5.909354259189648e-08, "loss": 0.6149, "step": 11727 }, { "epoch": 0.95, "grad_norm": 3.9749930530491513, "learning_rate": 5.889208325759677e-08, "loss": 0.6626, "step": 11728 }, { "epoch": 0.95, "grad_norm": 7.390236351493411, "learning_rate": 5.86909658790763e-08, "loss": 0.5118, "step": 11729 }, { "epoch": 0.95, "grad_norm": 5.125566525101464, "learning_rate": 5.8490190470254505e-08, "loss": 0.6327, "step": 11730 }, { "epoch": 0.95, "grad_norm": 4.259811792140969, "learning_rate": 5.8289757045025816e-08, "loss": 0.7539, "step": 11731 }, { "epoch": 0.95, "grad_norm": 7.5175206300375805, "learning_rate": 5.8089665617260816e-08, "loss": 0.5818, "step": 11732 }, { "epoch": 0.95, "grad_norm": 3.7699829581998667, "learning_rate": 5.7889916200808414e-08, "loss": 0.6107, "step": 11733 }, { "epoch": 0.95, "grad_norm": 4.9176823580550355, "learning_rate": 5.769050880949201e-08, "loss": 0.6737, "step": 11734 }, { "epoch": 0.95, "grad_norm": 5.322091305529035, "learning_rate": 5.7491443457111105e-08, "loss": 0.8031, "step": 11735 }, { "epoch": 0.95, "grad_norm": 4.814411466178407, "learning_rate": 5.729272015744303e-08, "loss": 0.6445, "step": 11736 }, { "epoch": 0.95, "grad_norm": 5.632955372670994, "learning_rate": 5.709433892424121e-08, "loss": 0.5588, "step": 11737 }, { "epoch": 0.95, "grad_norm": 3.831056469865346, "learning_rate": 5.689629977123412e-08, "loss": 0.7361, "step": 11738 }, { "epoch": 0.95, "grad_norm": 3.52884708347657, "learning_rate": 5.6698602712126906e-08, "loss": 0.6379, "step": 11739 }, { "epoch": 0.95, "grad_norm": 2.8764701210074586, "learning_rate": 5.6501247760602506e-08, "loss": 0.7537, "step": 11740 }, { "epoch": 0.95, "grad_norm": 3.497824934773602, "learning_rate": 5.6304234930318336e-08, "loss": 0.6144, "step": 11741 }, { "epoch": 0.95, "grad_norm": 3.979743264884185, "learning_rate": 5.610756423490904e-08, "loss": 0.7633, "step": 11742 }, { "epoch": 0.95, "grad_norm": 2.7418776773373006, "learning_rate": 5.591123568798596e-08, "loss": 0.5232, "step": 11743 }, { "epoch": 0.95, "grad_norm": 7.234840138021113, "learning_rate": 5.571524930313543e-08, "loss": 0.6694, "step": 11744 }, { "epoch": 0.95, "grad_norm": 3.302785926799452, "learning_rate": 5.551960509392218e-08, "loss": 0.7003, "step": 11745 }, { "epoch": 0.95, "grad_norm": 3.8394740752944747, "learning_rate": 5.532430307388481e-08, "loss": 0.5596, "step": 11746 }, { "epoch": 0.95, "grad_norm": 3.87822818498878, "learning_rate": 5.5129343256539734e-08, "loss": 0.7971, "step": 11747 }, { "epoch": 0.95, "grad_norm": 2.2671029104677416, "learning_rate": 5.493472565538005e-08, "loss": 0.6643, "step": 11748 }, { "epoch": 0.95, "grad_norm": 3.981859394682588, "learning_rate": 5.474045028387387e-08, "loss": 0.6538, "step": 11749 }, { "epoch": 0.95, "grad_norm": 5.011321146903895, "learning_rate": 5.4546517155465996e-08, "loss": 0.5559, "step": 11750 }, { "epoch": 0.95, "grad_norm": 6.722255364012913, "learning_rate": 5.435292628357902e-08, "loss": 0.5028, "step": 11751 }, { "epoch": 0.95, "grad_norm": 3.9585529911247788, "learning_rate": 5.415967768160946e-08, "loss": 0.5237, "step": 11752 }, { "epoch": 0.95, "grad_norm": 4.3637407933531325, "learning_rate": 5.396677136293216e-08, "loss": 0.6942, "step": 11753 }, { "epoch": 0.95, "grad_norm": 10.38383821701632, "learning_rate": 5.377420734089644e-08, "loss": 0.6323, "step": 11754 }, { "epoch": 0.95, "grad_norm": 16.965801943513956, "learning_rate": 5.3581985628830545e-08, "loss": 0.5266, "step": 11755 }, { "epoch": 0.95, "grad_norm": 4.786846301516611, "learning_rate": 5.3390106240036046e-08, "loss": 0.6443, "step": 11756 }, { "epoch": 0.95, "grad_norm": 2.956081327722214, "learning_rate": 5.319856918779232e-08, "loss": 0.5561, "step": 11757 }, { "epoch": 0.95, "grad_norm": 4.499820576197688, "learning_rate": 5.3007374485355424e-08, "loss": 0.6006, "step": 11758 }, { "epoch": 0.96, "grad_norm": 5.162631407623525, "learning_rate": 5.281652214595701e-08, "loss": 0.5894, "step": 11759 }, { "epoch": 0.96, "grad_norm": 3.112365071169805, "learning_rate": 5.262601218280539e-08, "loss": 0.5843, "step": 11760 }, { "epoch": 0.96, "grad_norm": 2.8853421401107124, "learning_rate": 5.243584460908446e-08, "loss": 0.5181, "step": 11761 }, { "epoch": 0.96, "grad_norm": 4.11792934507338, "learning_rate": 5.2246019437956486e-08, "loss": 0.6572, "step": 11762 }, { "epoch": 0.96, "grad_norm": 5.92477116497485, "learning_rate": 5.2056536682557054e-08, "loss": 0.6777, "step": 11763 }, { "epoch": 0.96, "grad_norm": 3.9361211085662506, "learning_rate": 5.186739635600013e-08, "loss": 0.7874, "step": 11764 }, { "epoch": 0.96, "grad_norm": 2.9373416501277725, "learning_rate": 5.167859847137524e-08, "loss": 0.727, "step": 11765 }, { "epoch": 0.96, "grad_norm": 3.4091266528517106, "learning_rate": 5.149014304174915e-08, "loss": 0.7605, "step": 11766 }, { "epoch": 0.96, "grad_norm": 7.291767057113755, "learning_rate": 5.13020300801631e-08, "loss": 0.6447, "step": 11767 }, { "epoch": 0.96, "grad_norm": 5.736239395341825, "learning_rate": 5.111425959963612e-08, "loss": 0.6843, "step": 11768 }, { "epoch": 0.96, "grad_norm": 3.224492789528648, "learning_rate": 5.092683161316281e-08, "loss": 0.6628, "step": 11769 }, { "epoch": 0.96, "grad_norm": 4.292948962405509, "learning_rate": 5.0739746133715574e-08, "loss": 0.5603, "step": 11770 }, { "epoch": 0.96, "grad_norm": 2.9891738149979004, "learning_rate": 5.055300317424017e-08, "loss": 0.7368, "step": 11771 }, { "epoch": 0.96, "grad_norm": 18.401821849542376, "learning_rate": 5.036660274766181e-08, "loss": 0.594, "step": 11772 }, { "epoch": 0.96, "grad_norm": 5.535712182828714, "learning_rate": 5.018054486687962e-08, "loss": 0.6591, "step": 11773 }, { "epoch": 0.96, "grad_norm": 4.350381755532951, "learning_rate": 4.999482954477053e-08, "loss": 0.7204, "step": 11774 }, { "epoch": 0.96, "grad_norm": 3.3175349348044554, "learning_rate": 4.980945679418703e-08, "loss": 0.6631, "step": 11775 }, { "epoch": 0.96, "grad_norm": 8.368479457097477, "learning_rate": 4.96244266279583e-08, "loss": 0.5932, "step": 11776 }, { "epoch": 0.96, "grad_norm": 4.804503287296305, "learning_rate": 4.94397390588891e-08, "loss": 0.6023, "step": 11777 }, { "epoch": 0.96, "grad_norm": 6.8984197259125315, "learning_rate": 4.9255394099761436e-08, "loss": 0.6383, "step": 11778 }, { "epoch": 0.96, "grad_norm": 11.166330058521746, "learning_rate": 4.907139176333286e-08, "loss": 0.5621, "step": 11779 }, { "epoch": 0.96, "grad_norm": 3.817361965547026, "learning_rate": 4.8887732062337656e-08, "loss": 0.6938, "step": 11780 }, { "epoch": 0.96, "grad_norm": 2.3044762273354675, "learning_rate": 4.8704415009486194e-08, "loss": 0.6966, "step": 11781 }, { "epoch": 0.96, "grad_norm": 3.0063241892957127, "learning_rate": 4.8521440617465e-08, "loss": 0.6059, "step": 11782 }, { "epoch": 0.96, "grad_norm": 3.576583988628541, "learning_rate": 4.833880889893727e-08, "loss": 0.6574, "step": 11783 }, { "epoch": 0.96, "grad_norm": 4.647600597087502, "learning_rate": 4.815651986654235e-08, "loss": 0.629, "step": 11784 }, { "epoch": 0.96, "grad_norm": 4.880565343466998, "learning_rate": 4.7974573532895695e-08, "loss": 0.7421, "step": 11785 }, { "epoch": 0.96, "grad_norm": 5.369678854894292, "learning_rate": 4.77929699105889e-08, "loss": 0.7461, "step": 11786 }, { "epoch": 0.96, "grad_norm": 6.825459718938507, "learning_rate": 4.761170901219025e-08, "loss": 0.7085, "step": 11787 }, { "epoch": 0.96, "grad_norm": 4.817402987415686, "learning_rate": 4.743079085024416e-08, "loss": 0.6638, "step": 11788 }, { "epoch": 0.96, "grad_norm": 3.1856757977129075, "learning_rate": 4.725021543727115e-08, "loss": 0.7412, "step": 11789 }, { "epoch": 0.96, "grad_norm": 4.000049906005819, "learning_rate": 4.706998278576846e-08, "loss": 0.6355, "step": 11790 }, { "epoch": 0.96, "grad_norm": 5.619029392557585, "learning_rate": 4.68900929082089e-08, "loss": 0.725, "step": 11791 }, { "epoch": 0.96, "grad_norm": 7.161004327045112, "learning_rate": 4.671054581704304e-08, "loss": 0.6832, "step": 11792 }, { "epoch": 0.96, "grad_norm": 3.770603912796601, "learning_rate": 4.653134152469541e-08, "loss": 0.7151, "step": 11793 }, { "epoch": 0.96, "grad_norm": 3.1447001719861545, "learning_rate": 4.635248004356885e-08, "loss": 0.7283, "step": 11794 }, { "epoch": 0.96, "grad_norm": 3.6507668736272922, "learning_rate": 4.6173961386041246e-08, "loss": 0.6539, "step": 11795 }, { "epoch": 0.96, "grad_norm": 13.851385403214774, "learning_rate": 4.5995785564467155e-08, "loss": 0.6265, "step": 11796 }, { "epoch": 0.96, "grad_norm": 3.9219720999886225, "learning_rate": 4.581795259117783e-08, "loss": 0.5695, "step": 11797 }, { "epoch": 0.96, "grad_norm": 8.214854891611218, "learning_rate": 4.564046247848008e-08, "loss": 0.7844, "step": 11798 }, { "epoch": 0.96, "grad_norm": 4.687880779138726, "learning_rate": 4.546331523865799e-08, "loss": 0.7868, "step": 11799 }, { "epoch": 0.96, "grad_norm": 2.947492742888208, "learning_rate": 4.528651088397063e-08, "loss": 0.6984, "step": 11800 }, { "epoch": 0.96, "grad_norm": 4.064461748502639, "learning_rate": 4.5110049426653755e-08, "loss": 0.7241, "step": 11801 }, { "epoch": 0.96, "grad_norm": 3.5106466271044736, "learning_rate": 4.49339308789204e-08, "loss": 0.7736, "step": 11802 }, { "epoch": 0.96, "grad_norm": 11.366460903777558, "learning_rate": 4.475815525295857e-08, "loss": 0.7007, "step": 11803 }, { "epoch": 0.96, "grad_norm": 4.117332181173183, "learning_rate": 4.458272256093355e-08, "loss": 0.7438, "step": 11804 }, { "epoch": 0.96, "grad_norm": 41.72140384334036, "learning_rate": 4.440763281498561e-08, "loss": 0.5759, "step": 11805 }, { "epoch": 0.96, "grad_norm": 4.3315864226899, "learning_rate": 4.423288602723286e-08, "loss": 0.6719, "step": 11806 }, { "epoch": 0.96, "grad_norm": 2.9160590023268895, "learning_rate": 4.405848220976838e-08, "loss": 0.7821, "step": 11807 }, { "epoch": 0.96, "grad_norm": 4.547706385640694, "learning_rate": 4.388442137466198e-08, "loss": 0.7137, "step": 11808 }, { "epoch": 0.96, "grad_norm": 4.007870718501078, "learning_rate": 4.3710703533959566e-08, "loss": 0.7098, "step": 11809 }, { "epoch": 0.96, "grad_norm": 4.213252084335085, "learning_rate": 4.35373286996843e-08, "loss": 0.6617, "step": 11810 }, { "epoch": 0.96, "grad_norm": 3.3318056721847014, "learning_rate": 4.3364296883834364e-08, "loss": 0.823, "step": 11811 }, { "epoch": 0.96, "grad_norm": 3.2881173728429283, "learning_rate": 4.319160809838463e-08, "loss": 0.5753, "step": 11812 }, { "epoch": 0.96, "grad_norm": 3.0660704359374944, "learning_rate": 4.301926235528664e-08, "loss": 0.5376, "step": 11813 }, { "epoch": 0.96, "grad_norm": 4.68932158352393, "learning_rate": 4.2847259666466414e-08, "loss": 0.5945, "step": 11814 }, { "epoch": 0.96, "grad_norm": 2.0674276419708786, "learning_rate": 4.2675600043829425e-08, "loss": 0.6133, "step": 11815 }, { "epoch": 0.96, "grad_norm": 6.610089690095713, "learning_rate": 4.250428349925451e-08, "loss": 0.7862, "step": 11816 }, { "epoch": 0.96, "grad_norm": 3.7059969871160576, "learning_rate": 4.233331004459829e-08, "loss": 0.6245, "step": 11817 }, { "epoch": 0.96, "grad_norm": 3.323119209650115, "learning_rate": 4.2162679691692966e-08, "loss": 0.8316, "step": 11818 }, { "epoch": 0.96, "grad_norm": 2.92159898112092, "learning_rate": 4.199239245234743e-08, "loss": 0.791, "step": 11819 }, { "epoch": 0.96, "grad_norm": 2.5131701947476306, "learning_rate": 4.18224483383467e-08, "loss": 0.7938, "step": 11820 }, { "epoch": 0.96, "grad_norm": 4.166327737543794, "learning_rate": 4.165284736145136e-08, "loss": 0.7224, "step": 11821 }, { "epoch": 0.96, "grad_norm": 4.793683203033816, "learning_rate": 4.148358953339926e-08, "loss": 0.6067, "step": 11822 }, { "epoch": 0.96, "grad_norm": 2.700167702150854, "learning_rate": 4.131467486590435e-08, "loss": 0.5525, "step": 11823 }, { "epoch": 0.96, "grad_norm": 5.547321442230637, "learning_rate": 4.114610337065672e-08, "loss": 0.676, "step": 11824 }, { "epoch": 0.96, "grad_norm": 5.0759249232115, "learning_rate": 4.0977875059322046e-08, "loss": 0.7486, "step": 11825 }, { "epoch": 0.96, "grad_norm": 2.9483857624522316, "learning_rate": 4.080998994354324e-08, "loss": 0.6341, "step": 11826 }, { "epoch": 0.96, "grad_norm": 8.960801444921684, "learning_rate": 4.064244803493822e-08, "loss": 0.5047, "step": 11827 }, { "epoch": 0.96, "grad_norm": 4.512891620984063, "learning_rate": 4.0475249345102716e-08, "loss": 0.7384, "step": 11828 }, { "epoch": 0.96, "grad_norm": 3.607577900643964, "learning_rate": 4.0308393885608034e-08, "loss": 0.7112, "step": 11829 }, { "epoch": 0.96, "grad_norm": 3.409797416262915, "learning_rate": 4.0141881668000485e-08, "loss": 0.6734, "step": 11830 }, { "epoch": 0.96, "grad_norm": 4.978103726536734, "learning_rate": 3.997571270380529e-08, "loss": 0.7975, "step": 11831 }, { "epoch": 0.96, "grad_norm": 3.6102855850208857, "learning_rate": 3.98098870045216e-08, "loss": 0.7464, "step": 11832 }, { "epoch": 0.96, "grad_norm": 3.824873472225501, "learning_rate": 3.964440458162577e-08, "loss": 0.6344, "step": 11833 }, { "epoch": 0.96, "grad_norm": 4.98441774699736, "learning_rate": 3.947926544656977e-08, "loss": 0.7687, "step": 11834 }, { "epoch": 0.96, "grad_norm": 3.2292704784398385, "learning_rate": 3.931446961078278e-08, "loss": 0.5859, "step": 11835 }, { "epoch": 0.96, "grad_norm": 3.4870169994153906, "learning_rate": 3.9150017085669566e-08, "loss": 0.4805, "step": 11836 }, { "epoch": 0.96, "grad_norm": 5.062580293442506, "learning_rate": 3.898590788261103e-08, "loss": 0.7161, "step": 11837 }, { "epoch": 0.96, "grad_norm": 6.100431525644177, "learning_rate": 3.8822142012964747e-08, "loss": 0.7341, "step": 11838 }, { "epoch": 0.96, "grad_norm": 3.3314324069473447, "learning_rate": 3.8658719488064985e-08, "loss": 0.6781, "step": 11839 }, { "epoch": 0.96, "grad_norm": 2.450516359856029, "learning_rate": 3.8495640319221036e-08, "loss": 0.6023, "step": 11840 }, { "epoch": 0.96, "grad_norm": 2.976975946829453, "learning_rate": 3.8332904517718315e-08, "loss": 0.6164, "step": 11841 }, { "epoch": 0.96, "grad_norm": 8.095341290807635, "learning_rate": 3.817051209482003e-08, "loss": 0.71, "step": 11842 }, { "epoch": 0.96, "grad_norm": 9.611964994972334, "learning_rate": 3.800846306176498e-08, "loss": 0.7238, "step": 11843 }, { "epoch": 0.96, "grad_norm": 3.2114721599806795, "learning_rate": 3.7846757429766955e-08, "loss": 0.6205, "step": 11844 }, { "epoch": 0.96, "grad_norm": 4.361437741075271, "learning_rate": 3.7685395210018127e-08, "loss": 0.82, "step": 11845 }, { "epoch": 0.96, "grad_norm": 3.5935391999504485, "learning_rate": 3.7524376413685114e-08, "loss": 0.5529, "step": 11846 }, { "epoch": 0.96, "grad_norm": 10.300596126210026, "learning_rate": 3.736370105191178e-08, "loss": 0.6262, "step": 11847 }, { "epoch": 0.96, "grad_norm": 5.053305308054646, "learning_rate": 3.7203369135817016e-08, "loss": 0.7948, "step": 11848 }, { "epoch": 0.96, "grad_norm": 2.696827430525888, "learning_rate": 3.704338067649804e-08, "loss": 0.5176, "step": 11849 }, { "epoch": 0.96, "grad_norm": 5.351856593592406, "learning_rate": 3.688373568502601e-08, "loss": 0.7051, "step": 11850 }, { "epoch": 0.96, "grad_norm": 3.5107290070064825, "learning_rate": 3.672443417245042e-08, "loss": 0.6872, "step": 11851 }, { "epoch": 0.96, "grad_norm": 4.572847814659825, "learning_rate": 3.656547614979522e-08, "loss": 0.548, "step": 11852 }, { "epoch": 0.96, "grad_norm": 3.6960078321425676, "learning_rate": 3.640686162806106e-08, "loss": 0.644, "step": 11853 }, { "epoch": 0.96, "grad_norm": 4.94524423191858, "learning_rate": 3.6248590618225834e-08, "loss": 0.5384, "step": 11854 }, { "epoch": 0.96, "grad_norm": 5.15555236867171, "learning_rate": 3.609066313124243e-08, "loss": 0.5936, "step": 11855 }, { "epoch": 0.96, "grad_norm": 6.012299533848973, "learning_rate": 3.593307917804045e-08, "loss": 0.6281, "step": 11856 }, { "epoch": 0.96, "grad_norm": 4.093419828481794, "learning_rate": 3.577583876952562e-08, "loss": 0.6121, "step": 11857 }, { "epoch": 0.96, "grad_norm": 4.3229737969701425, "learning_rate": 3.561894191658033e-08, "loss": 0.6845, "step": 11858 }, { "epoch": 0.96, "grad_norm": 3.0526804458579693, "learning_rate": 3.546238863006202e-08, "loss": 0.5692, "step": 11859 }, { "epoch": 0.96, "grad_norm": 3.3962669548380418, "learning_rate": 3.5306178920806456e-08, "loss": 0.689, "step": 11860 }, { "epoch": 0.96, "grad_norm": 12.461258436497639, "learning_rate": 3.515031279962333e-08, "loss": 0.6997, "step": 11861 }, { "epoch": 0.96, "grad_norm": 3.528824179039459, "learning_rate": 3.499479027729957e-08, "loss": 0.6958, "step": 11862 }, { "epoch": 0.96, "grad_norm": 5.124628103678376, "learning_rate": 3.483961136459879e-08, "loss": 0.7675, "step": 11863 }, { "epoch": 0.96, "grad_norm": 8.159251235931286, "learning_rate": 3.468477607226017e-08, "loss": 0.6361, "step": 11864 }, { "epoch": 0.96, "grad_norm": 3.281044459149608, "learning_rate": 3.453028441099959e-08, "loss": 0.6438, "step": 11865 }, { "epoch": 0.96, "grad_norm": 3.220204242004862, "learning_rate": 3.437613639150794e-08, "loss": 0.7162, "step": 11866 }, { "epoch": 0.96, "grad_norm": 5.2679109447005725, "learning_rate": 3.422233202445391e-08, "loss": 0.7529, "step": 11867 }, { "epoch": 0.96, "grad_norm": 3.7577588171765166, "learning_rate": 3.406887132048176e-08, "loss": 0.6714, "step": 11868 }, { "epoch": 0.96, "grad_norm": 3.251742605212685, "learning_rate": 3.3915754290211876e-08, "loss": 0.4785, "step": 11869 }, { "epoch": 0.96, "grad_norm": 10.602164482178486, "learning_rate": 3.37629809442408e-08, "loss": 0.6755, "step": 11870 }, { "epoch": 0.96, "grad_norm": 4.620589946245881, "learning_rate": 3.361055129314117e-08, "loss": 0.5591, "step": 11871 }, { "epoch": 0.96, "grad_norm": 7.256418404793536, "learning_rate": 3.345846534746289e-08, "loss": 0.7291, "step": 11872 }, { "epoch": 0.96, "grad_norm": 4.373951585327049, "learning_rate": 3.330672311773031e-08, "loss": 0.5641, "step": 11873 }, { "epoch": 0.96, "grad_norm": 4.472418803154782, "learning_rate": 3.3155324614445593e-08, "loss": 0.7203, "step": 11874 }, { "epoch": 0.96, "grad_norm": 6.221621712633217, "learning_rate": 3.3004269848085914e-08, "loss": 0.6343, "step": 11875 }, { "epoch": 0.96, "grad_norm": 5.9469173023246045, "learning_rate": 3.285355882910568e-08, "loss": 0.6024, "step": 11876 }, { "epoch": 0.96, "grad_norm": 3.236680186386587, "learning_rate": 3.270319156793544e-08, "loss": 0.7358, "step": 11877 }, { "epoch": 0.96, "grad_norm": 3.2664858863859503, "learning_rate": 3.255316807498077e-08, "loss": 0.5884, "step": 11878 }, { "epoch": 0.96, "grad_norm": 3.7959539348916485, "learning_rate": 3.2403488360624455e-08, "loss": 0.5568, "step": 11879 }, { "epoch": 0.96, "grad_norm": 4.888618167181351, "learning_rate": 3.225415243522489e-08, "loss": 0.5406, "step": 11880 }, { "epoch": 0.96, "grad_norm": 5.133780143768868, "learning_rate": 3.21051603091177e-08, "loss": 0.6207, "step": 11881 }, { "epoch": 0.97, "grad_norm": 3.0867649333185887, "learning_rate": 3.195651199261407e-08, "loss": 0.6324, "step": 11882 }, { "epoch": 0.97, "grad_norm": 3.429140874548844, "learning_rate": 3.180820749600133e-08, "loss": 0.6677, "step": 11883 }, { "epoch": 0.97, "grad_norm": 2.4855478431729603, "learning_rate": 3.1660246829542385e-08, "loss": 0.8144, "step": 11884 }, { "epoch": 0.97, "grad_norm": 11.997469807122755, "learning_rate": 3.151263000347793e-08, "loss": 0.8046, "step": 11885 }, { "epoch": 0.97, "grad_norm": 2.9222589268612085, "learning_rate": 3.136535702802423e-08, "loss": 0.6575, "step": 11886 }, { "epoch": 0.97, "grad_norm": 4.27314531850364, "learning_rate": 3.121842791337204e-08, "loss": 0.6022, "step": 11887 }, { "epoch": 0.97, "grad_norm": 3.3107370203826387, "learning_rate": 3.107184266969099e-08, "loss": 0.6944, "step": 11888 }, { "epoch": 0.97, "grad_norm": 3.032218957667251, "learning_rate": 3.0925601307125184e-08, "loss": 0.7337, "step": 11889 }, { "epoch": 0.97, "grad_norm": 4.2891704378221736, "learning_rate": 3.077970383579598e-08, "loss": 0.7441, "step": 11890 }, { "epoch": 0.97, "grad_norm": 3.255225485438617, "learning_rate": 3.06341502658003e-08, "loss": 0.5414, "step": 11891 }, { "epoch": 0.97, "grad_norm": 4.557984584508957, "learning_rate": 3.048894060721064e-08, "loss": 0.6986, "step": 11892 }, { "epoch": 0.97, "grad_norm": 3.4357919453777073, "learning_rate": 3.0344074870077287e-08, "loss": 0.6044, "step": 11893 }, { "epoch": 0.97, "grad_norm": 5.182388493527507, "learning_rate": 3.0199553064425014e-08, "loss": 0.7402, "step": 11894 }, { "epoch": 0.97, "grad_norm": 5.7900889342726245, "learning_rate": 3.005537520025637e-08, "loss": 0.6623, "step": 11895 }, { "epoch": 0.97, "grad_norm": 4.7116717154188485, "learning_rate": 2.9911541287549474e-08, "loss": 0.6877, "step": 11896 }, { "epoch": 0.97, "grad_norm": 5.214135387615386, "learning_rate": 2.9768051336257487e-08, "loss": 0.5149, "step": 11897 }, { "epoch": 0.97, "grad_norm": 4.362429168296588, "learning_rate": 2.9624905356311905e-08, "loss": 0.5293, "step": 11898 }, { "epoch": 0.97, "grad_norm": 3.531257564276267, "learning_rate": 2.948210335761925e-08, "loss": 0.6374, "step": 11899 }, { "epoch": 0.97, "grad_norm": 5.732242561278232, "learning_rate": 2.9339645350061617e-08, "loss": 0.5524, "step": 11900 }, { "epoch": 0.97, "grad_norm": 6.7269994188701325, "learning_rate": 2.9197531343498344e-08, "loss": 0.7478, "step": 11901 }, { "epoch": 0.97, "grad_norm": 4.096987047475449, "learning_rate": 2.9055761347764887e-08, "loss": 0.6621, "step": 11902 }, { "epoch": 0.97, "grad_norm": 3.0051812682952237, "learning_rate": 2.8914335372672296e-08, "loss": 0.6073, "step": 11903 }, { "epoch": 0.97, "grad_norm": 4.961099537086095, "learning_rate": 2.8773253428008296e-08, "loss": 0.616, "step": 11904 }, { "epoch": 0.97, "grad_norm": 11.941392703574312, "learning_rate": 2.863251552353674e-08, "loss": 0.7127, "step": 11905 }, { "epoch": 0.97, "grad_norm": 5.152404616828218, "learning_rate": 2.8492121668997064e-08, "loss": 0.6741, "step": 11906 }, { "epoch": 0.97, "grad_norm": 3.6530869510621367, "learning_rate": 2.8352071874105934e-08, "loss": 0.845, "step": 11907 }, { "epoch": 0.97, "grad_norm": 3.3309374590549248, "learning_rate": 2.8212366148555602e-08, "loss": 0.6494, "step": 11908 }, { "epoch": 0.97, "grad_norm": 3.635360543471996, "learning_rate": 2.8073004502014445e-08, "loss": 0.5611, "step": 11909 }, { "epoch": 0.97, "grad_norm": 3.7786526075395983, "learning_rate": 2.7933986944126967e-08, "loss": 0.6531, "step": 11910 }, { "epoch": 0.97, "grad_norm": 3.669604467143136, "learning_rate": 2.7795313484514362e-08, "loss": 0.612, "step": 11911 }, { "epoch": 0.97, "grad_norm": 4.799484129297882, "learning_rate": 2.7656984132773955e-08, "loss": 0.5849, "step": 11912 }, { "epoch": 0.97, "grad_norm": 6.053339119941887, "learning_rate": 2.7518998898478644e-08, "loss": 0.5381, "step": 11913 }, { "epoch": 0.97, "grad_norm": 5.544861288384643, "learning_rate": 2.7381357791177454e-08, "loss": 0.9079, "step": 11914 }, { "epoch": 0.97, "grad_norm": 7.036877878927683, "learning_rate": 2.724406082039721e-08, "loss": 0.685, "step": 11915 }, { "epoch": 0.97, "grad_norm": 2.6061658072696514, "learning_rate": 2.7107107995638648e-08, "loss": 0.7237, "step": 11916 }, { "epoch": 0.97, "grad_norm": 4.913288140064642, "learning_rate": 2.697049932637974e-08, "loss": 0.7011, "step": 11917 }, { "epoch": 0.97, "grad_norm": 4.077282708178528, "learning_rate": 2.6834234822076255e-08, "loss": 0.6142, "step": 11918 }, { "epoch": 0.97, "grad_norm": 4.568058483596346, "learning_rate": 2.6698314492156208e-08, "loss": 0.6412, "step": 11919 }, { "epoch": 0.97, "grad_norm": 4.848466570194925, "learning_rate": 2.6562738346027627e-08, "loss": 0.6093, "step": 11920 }, { "epoch": 0.97, "grad_norm": 4.619506795616202, "learning_rate": 2.642750639307301e-08, "loss": 0.6488, "step": 11921 }, { "epoch": 0.97, "grad_norm": 2.810065753423881, "learning_rate": 2.629261864265098e-08, "loss": 0.6464, "step": 11922 }, { "epoch": 0.97, "grad_norm": 3.5801625892157, "learning_rate": 2.6158075104096848e-08, "loss": 0.6557, "step": 11923 }, { "epoch": 0.97, "grad_norm": 6.436228517053414, "learning_rate": 2.6023875786722053e-08, "loss": 0.548, "step": 11924 }, { "epoch": 0.97, "grad_norm": 3.8137573521015966, "learning_rate": 2.589002069981361e-08, "loss": 0.592, "step": 11925 }, { "epoch": 0.97, "grad_norm": 8.421799636832723, "learning_rate": 2.5756509852635226e-08, "loss": 0.674, "step": 11926 }, { "epoch": 0.97, "grad_norm": 2.637088447190258, "learning_rate": 2.562334325442728e-08, "loss": 0.598, "step": 11927 }, { "epoch": 0.97, "grad_norm": 3.044061400170564, "learning_rate": 2.5490520914404627e-08, "loss": 0.6312, "step": 11928 }, { "epoch": 0.97, "grad_norm": 4.072431683870927, "learning_rate": 2.5358042841760466e-08, "loss": 0.6144, "step": 11929 }, { "epoch": 0.97, "grad_norm": 3.844565037271914, "learning_rate": 2.5225909045661913e-08, "loss": 0.7093, "step": 11930 }, { "epoch": 0.97, "grad_norm": 6.413785624713688, "learning_rate": 2.509411953525498e-08, "loss": 0.6617, "step": 11931 }, { "epoch": 0.97, "grad_norm": 3.985518335231157, "learning_rate": 2.4962674319659595e-08, "loss": 0.6247, "step": 11932 }, { "epoch": 0.97, "grad_norm": 3.5328705697973914, "learning_rate": 2.4831573407972377e-08, "loss": 0.5599, "step": 11933 }, { "epoch": 0.97, "grad_norm": 4.375236493316782, "learning_rate": 2.4700816809266615e-08, "loss": 0.7279, "step": 11934 }, { "epoch": 0.97, "grad_norm": 11.30994058504707, "learning_rate": 2.4570404532591187e-08, "loss": 0.7648, "step": 11935 }, { "epoch": 0.97, "grad_norm": 5.655322030546553, "learning_rate": 2.4440336586971648e-08, "loss": 0.6675, "step": 11936 }, { "epoch": 0.97, "grad_norm": 3.2225337307210533, "learning_rate": 2.4310612981409686e-08, "loss": 0.7029, "step": 11937 }, { "epoch": 0.97, "grad_norm": 3.6279750790971654, "learning_rate": 2.418123372488257e-08, "loss": 0.6478, "step": 11938 }, { "epoch": 0.97, "grad_norm": 17.593306659512876, "learning_rate": 2.4052198826344796e-08, "loss": 0.6248, "step": 11939 }, { "epoch": 0.97, "grad_norm": 4.7657603621801305, "learning_rate": 2.3923508294725893e-08, "loss": 0.7203, "step": 11940 }, { "epoch": 0.97, "grad_norm": 5.025901495485195, "learning_rate": 2.3795162138932072e-08, "loss": 0.7016, "step": 11941 }, { "epoch": 0.97, "grad_norm": 5.236126639015923, "learning_rate": 2.3667160367845664e-08, "loss": 0.7081, "step": 11942 }, { "epoch": 0.97, "grad_norm": 5.515405300164409, "learning_rate": 2.35395029903257e-08, "loss": 0.6129, "step": 11943 }, { "epoch": 0.97, "grad_norm": 4.481965799805051, "learning_rate": 2.3412190015206226e-08, "loss": 0.6469, "step": 11944 }, { "epoch": 0.97, "grad_norm": 3.6278191276225726, "learning_rate": 2.328522145129908e-08, "loss": 0.5895, "step": 11945 }, { "epoch": 0.97, "grad_norm": 3.425379394785436, "learning_rate": 2.3158597307390007e-08, "loss": 0.5487, "step": 11946 }, { "epoch": 0.97, "grad_norm": 4.424205359844178, "learning_rate": 2.303231759224256e-08, "loss": 0.6771, "step": 11947 }, { "epoch": 0.97, "grad_norm": 3.3230942387174562, "learning_rate": 2.290638231459641e-08, "loss": 0.6647, "step": 11948 }, { "epoch": 0.97, "grad_norm": 4.059729867198876, "learning_rate": 2.2780791483167363e-08, "loss": 0.5848, "step": 11949 }, { "epoch": 0.97, "grad_norm": 3.984873707440627, "learning_rate": 2.2655545106646803e-08, "loss": 0.6941, "step": 11950 }, { "epoch": 0.97, "grad_norm": 4.103170728555762, "learning_rate": 2.253064319370224e-08, "loss": 0.7415, "step": 11951 }, { "epoch": 0.97, "grad_norm": 2.5447513306076495, "learning_rate": 2.240608575297787e-08, "loss": 0.546, "step": 11952 }, { "epoch": 0.97, "grad_norm": 9.241307061082546, "learning_rate": 2.2281872793093462e-08, "loss": 0.5532, "step": 11953 }, { "epoch": 0.97, "grad_norm": 4.150659586550228, "learning_rate": 2.2158004322646033e-08, "loss": 0.7274, "step": 11954 }, { "epoch": 0.97, "grad_norm": 2.8765592241886964, "learning_rate": 2.2034480350208166e-08, "loss": 0.6793, "step": 11955 }, { "epoch": 0.97, "grad_norm": 2.606224735162423, "learning_rate": 2.1911300884328023e-08, "loss": 0.6543, "step": 11956 }, { "epoch": 0.97, "grad_norm": 3.4238020604322577, "learning_rate": 2.17884659335299e-08, "loss": 0.7113, "step": 11957 }, { "epoch": 0.97, "grad_norm": 8.118626053682032, "learning_rate": 2.1665975506315885e-08, "loss": 0.6308, "step": 11958 }, { "epoch": 0.97, "grad_norm": 3.3156694969478715, "learning_rate": 2.1543829611162524e-08, "loss": 0.6807, "step": 11959 }, { "epoch": 0.97, "grad_norm": 3.0513316303596443, "learning_rate": 2.1422028256523065e-08, "loss": 0.6433, "step": 11960 }, { "epoch": 0.97, "grad_norm": 3.133452014898096, "learning_rate": 2.130057145082687e-08, "loss": 0.6414, "step": 11961 }, { "epoch": 0.97, "grad_norm": 6.032224333862911, "learning_rate": 2.1179459202479436e-08, "loss": 0.7187, "step": 11962 }, { "epoch": 0.97, "grad_norm": 4.218850570435848, "learning_rate": 2.1058691519862952e-08, "loss": 0.6403, "step": 11963 }, { "epoch": 0.97, "grad_norm": 3.0767606734852957, "learning_rate": 2.0938268411335172e-08, "loss": 0.7581, "step": 11964 }, { "epoch": 0.97, "grad_norm": 2.8919751913242755, "learning_rate": 2.081818988522999e-08, "loss": 0.6509, "step": 11965 }, { "epoch": 0.97, "grad_norm": 2.4201619947583435, "learning_rate": 2.069845594985742e-08, "loss": 0.5854, "step": 11966 }, { "epoch": 0.97, "grad_norm": 2.822856858224823, "learning_rate": 2.0579066613503618e-08, "loss": 0.6901, "step": 11967 }, { "epoch": 0.97, "grad_norm": 3.52859049395303, "learning_rate": 2.046002188443197e-08, "loss": 0.7091, "step": 11968 }, { "epoch": 0.97, "grad_norm": 4.884165994831136, "learning_rate": 2.0341321770880327e-08, "loss": 0.5925, "step": 11969 }, { "epoch": 0.97, "grad_norm": 5.834157460728985, "learning_rate": 2.0222966281063794e-08, "loss": 0.6902, "step": 11970 }, { "epoch": 0.97, "grad_norm": 5.618860980312664, "learning_rate": 2.0104955423173034e-08, "loss": 0.556, "step": 11971 }, { "epoch": 0.97, "grad_norm": 3.311309050986231, "learning_rate": 1.9987289205375958e-08, "loss": 0.6753, "step": 11972 }, { "epoch": 0.97, "grad_norm": 4.028842695582181, "learning_rate": 1.986996763581439e-08, "loss": 0.8232, "step": 11973 }, { "epoch": 0.97, "grad_norm": 3.3395948196662353, "learning_rate": 1.9752990722609057e-08, "loss": 0.7651, "step": 11974 }, { "epoch": 0.97, "grad_norm": 3.600131193015411, "learning_rate": 1.9636358473855145e-08, "loss": 0.584, "step": 11975 }, { "epoch": 0.97, "grad_norm": 4.408745712856693, "learning_rate": 1.9520070897623976e-08, "loss": 0.6481, "step": 11976 }, { "epoch": 0.97, "grad_norm": 2.3645400616337406, "learning_rate": 1.9404128001963562e-08, "loss": 0.7192, "step": 11977 }, { "epoch": 0.97, "grad_norm": 7.472001271313164, "learning_rate": 1.9288529794898037e-08, "loss": 0.6289, "step": 11978 }, { "epoch": 0.97, "grad_norm": 5.307054297205913, "learning_rate": 1.9173276284427666e-08, "loss": 0.6486, "step": 11979 }, { "epoch": 0.97, "grad_norm": 4.581064860353578, "learning_rate": 1.905836747852774e-08, "loss": 0.7518, "step": 11980 }, { "epoch": 0.97, "grad_norm": 3.0210183235128683, "learning_rate": 1.8943803385151894e-08, "loss": 0.6633, "step": 11981 }, { "epoch": 0.97, "grad_norm": 4.13423809176151, "learning_rate": 1.882958401222823e-08, "loss": 0.6693, "step": 11982 }, { "epoch": 0.97, "grad_norm": 6.383761537258242, "learning_rate": 1.8715709367660984e-08, "loss": 0.6425, "step": 11983 }, { "epoch": 0.97, "grad_norm": 4.300049821055086, "learning_rate": 1.8602179459331625e-08, "loss": 0.7342, "step": 11984 }, { "epoch": 0.97, "grad_norm": 3.9354804177743037, "learning_rate": 1.8488994295096653e-08, "loss": 0.6204, "step": 11985 }, { "epoch": 0.97, "grad_norm": 2.75486604852116, "learning_rate": 1.8376153882789792e-08, "loss": 0.6579, "step": 11986 }, { "epoch": 0.97, "grad_norm": 2.418300504387471, "learning_rate": 1.8263658230219804e-08, "loss": 0.8194, "step": 11987 }, { "epoch": 0.97, "grad_norm": 3.807686015398802, "learning_rate": 1.815150734517268e-08, "loss": 0.6236, "step": 11988 }, { "epoch": 0.97, "grad_norm": 4.369865360356615, "learning_rate": 1.8039701235409434e-08, "loss": 0.7407, "step": 11989 }, { "epoch": 0.97, "grad_norm": 3.032980589017032, "learning_rate": 1.792823990866721e-08, "loss": 0.7408, "step": 11990 }, { "epoch": 0.97, "grad_norm": 3.9728061904199237, "learning_rate": 1.7817123372661505e-08, "loss": 0.6209, "step": 11991 }, { "epoch": 0.97, "grad_norm": 4.307731537824287, "learning_rate": 1.770635163508061e-08, "loss": 0.6387, "step": 11992 }, { "epoch": 0.97, "grad_norm": 4.556848522893382, "learning_rate": 1.7595924703591726e-08, "loss": 0.7818, "step": 11993 }, { "epoch": 0.97, "grad_norm": 3.2671355048469275, "learning_rate": 1.7485842585835966e-08, "loss": 0.6736, "step": 11994 }, { "epoch": 0.97, "grad_norm": 6.461901095139359, "learning_rate": 1.7376105289432786e-08, "loss": 0.6878, "step": 11995 }, { "epoch": 0.97, "grad_norm": 15.840750276869194, "learning_rate": 1.7266712821976673e-08, "loss": 0.6524, "step": 11996 }, { "epoch": 0.97, "grad_norm": 3.386002456212125, "learning_rate": 1.715766519103712e-08, "loss": 0.6708, "step": 11997 }, { "epoch": 0.97, "grad_norm": 3.2631949771024416, "learning_rate": 1.704896240416254e-08, "loss": 0.5577, "step": 11998 }, { "epoch": 0.97, "grad_norm": 5.241975701142466, "learning_rate": 1.694060446887469e-08, "loss": 0.6202, "step": 11999 }, { "epoch": 0.97, "grad_norm": 12.330298388112183, "learning_rate": 1.6832591392673127e-08, "loss": 0.9061, "step": 12000 }, { "epoch": 0.97, "grad_norm": 4.024984042225761, "learning_rate": 7.887651627436933e-06, "loss": 0.6954, "step": 12001 }, { "epoch": 0.97, "grad_norm": 3.136642290189405, "learning_rate": 7.887293688015853e-06, "loss": 0.6799, "step": 12002 }, { "epoch": 0.97, "grad_norm": 2.826007311876671, "learning_rate": 7.886935726393908e-06, "loss": 0.5704, "step": 12003 }, { "epoch": 0.97, "grad_norm": 3.673132617932286, "learning_rate": 7.886577742573856e-06, "loss": 0.7033, "step": 12004 }, { "epoch": 0.98, "grad_norm": 3.180031638412777, "learning_rate": 7.886219736558448e-06, "loss": 0.5536, "step": 12005 }, { "epoch": 0.98, "grad_norm": 6.115056360310386, "learning_rate": 7.885861708350437e-06, "loss": 0.6069, "step": 12006 }, { "epoch": 0.98, "grad_norm": 5.516314784807653, "learning_rate": 7.885503657952575e-06, "loss": 0.6268, "step": 12007 }, { "epoch": 0.98, "grad_norm": 4.033164723146516, "learning_rate": 7.885145585367615e-06, "loss": 0.5566, "step": 12008 }, { "epoch": 0.98, "grad_norm": 3.721533168735317, "learning_rate": 7.884787490598312e-06, "loss": 0.6053, "step": 12009 }, { "epoch": 0.98, "grad_norm": 11.374917535692543, "learning_rate": 7.884429373647419e-06, "loss": 0.7036, "step": 12010 }, { "epoch": 0.98, "grad_norm": 6.440950636513003, "learning_rate": 7.884071234517687e-06, "loss": 0.6475, "step": 12011 }, { "epoch": 0.98, "grad_norm": 3.8169925255485677, "learning_rate": 7.883713073211874e-06, "loss": 0.603, "step": 12012 }, { "epoch": 0.98, "grad_norm": 3.195059509655681, "learning_rate": 7.883354889732731e-06, "loss": 0.608, "step": 12013 }, { "epoch": 0.98, "grad_norm": 4.504968783982464, "learning_rate": 7.882996684083013e-06, "loss": 0.7708, "step": 12014 }, { "epoch": 0.98, "grad_norm": 4.061421903967798, "learning_rate": 7.882638456265475e-06, "loss": 0.7702, "step": 12015 }, { "epoch": 0.98, "grad_norm": 2.703162273490401, "learning_rate": 7.882280206282871e-06, "loss": 0.5519, "step": 12016 }, { "epoch": 0.98, "grad_norm": 16.447392410081385, "learning_rate": 7.881921934137952e-06, "loss": 0.5959, "step": 12017 }, { "epoch": 0.98, "grad_norm": 2.8289420634636224, "learning_rate": 7.881563639833479e-06, "loss": 0.7197, "step": 12018 }, { "epoch": 0.98, "grad_norm": 4.244201348888967, "learning_rate": 7.881205323372206e-06, "loss": 0.7613, "step": 12019 }, { "epoch": 0.98, "grad_norm": 3.335400755659604, "learning_rate": 7.880846984756883e-06, "loss": 0.8804, "step": 12020 }, { "epoch": 0.98, "grad_norm": 3.1421324244639526, "learning_rate": 7.88048862399027e-06, "loss": 0.7064, "step": 12021 }, { "epoch": 0.98, "grad_norm": 4.022727506743707, "learning_rate": 7.880130241075121e-06, "loss": 0.7558, "step": 12022 }, { "epoch": 0.98, "grad_norm": 4.007836802587387, "learning_rate": 7.879771836014191e-06, "loss": 0.6004, "step": 12023 }, { "epoch": 0.98, "grad_norm": 3.863983054511877, "learning_rate": 7.879413408810239e-06, "loss": 0.5285, "step": 12024 }, { "epoch": 0.98, "grad_norm": 7.181891100001127, "learning_rate": 7.879054959466017e-06, "loss": 0.6728, "step": 12025 }, { "epoch": 0.98, "grad_norm": 6.439894165032388, "learning_rate": 7.878696487984282e-06, "loss": 0.7008, "step": 12026 }, { "epoch": 0.98, "grad_norm": 4.734452638445886, "learning_rate": 7.878337994367793e-06, "loss": 0.6403, "step": 12027 }, { "epoch": 0.98, "grad_norm": 10.700781186542399, "learning_rate": 7.877979478619303e-06, "loss": 0.7694, "step": 12028 }, { "epoch": 0.98, "grad_norm": 4.334030451751, "learning_rate": 7.877620940741571e-06, "loss": 0.7115, "step": 12029 }, { "epoch": 0.98, "grad_norm": 3.484183309697673, "learning_rate": 7.877262380737353e-06, "loss": 0.6892, "step": 12030 }, { "epoch": 0.98, "grad_norm": 4.830946426998408, "learning_rate": 7.876903798609408e-06, "loss": 0.705, "step": 12031 }, { "epoch": 0.98, "grad_norm": 4.191861307537942, "learning_rate": 7.87654519436049e-06, "loss": 0.781, "step": 12032 }, { "epoch": 0.98, "grad_norm": 3.2180020003697463, "learning_rate": 7.876186567993358e-06, "loss": 0.7363, "step": 12033 }, { "epoch": 0.98, "grad_norm": 3.6913941949566786, "learning_rate": 7.875827919510769e-06, "loss": 0.5523, "step": 12034 }, { "epoch": 0.98, "grad_norm": 4.233631716969953, "learning_rate": 7.875469248915481e-06, "loss": 0.6651, "step": 12035 }, { "epoch": 0.98, "grad_norm": 5.2013003086881175, "learning_rate": 7.875110556210252e-06, "loss": 0.7688, "step": 12036 }, { "epoch": 0.98, "grad_norm": 5.036950821191211, "learning_rate": 7.874751841397841e-06, "loss": 0.5058, "step": 12037 }, { "epoch": 0.98, "grad_norm": 20.539660031748948, "learning_rate": 7.874393104481004e-06, "loss": 0.6575, "step": 12038 }, { "epoch": 0.98, "grad_norm": 4.237282620069438, "learning_rate": 7.874034345462502e-06, "loss": 0.6526, "step": 12039 }, { "epoch": 0.98, "grad_norm": 5.976683686078058, "learning_rate": 7.87367556434509e-06, "loss": 0.8748, "step": 12040 }, { "epoch": 0.98, "grad_norm": 3.8723361335971447, "learning_rate": 7.873316761131531e-06, "loss": 0.7453, "step": 12041 }, { "epoch": 0.98, "grad_norm": 2.8145918262647327, "learning_rate": 7.87295793582458e-06, "loss": 0.5655, "step": 12042 }, { "epoch": 0.98, "grad_norm": 12.971621644696427, "learning_rate": 7.872599088427e-06, "loss": 0.7022, "step": 12043 }, { "epoch": 0.98, "grad_norm": 48.36840574529206, "learning_rate": 7.872240218941545e-06, "loss": 0.5662, "step": 12044 }, { "epoch": 0.98, "grad_norm": 5.128550993468869, "learning_rate": 7.87188132737098e-06, "loss": 0.7513, "step": 12045 }, { "epoch": 0.98, "grad_norm": 4.118101313610252, "learning_rate": 7.87152241371806e-06, "loss": 0.5864, "step": 12046 }, { "epoch": 0.98, "grad_norm": 4.079900967589845, "learning_rate": 7.871163477985548e-06, "loss": 0.6023, "step": 12047 }, { "epoch": 0.98, "grad_norm": 7.159664989094367, "learning_rate": 7.870804520176203e-06, "loss": 0.711, "step": 12048 }, { "epoch": 0.98, "grad_norm": 6.110818642805435, "learning_rate": 7.870445540292784e-06, "loss": 0.6605, "step": 12049 }, { "epoch": 0.98, "grad_norm": 2.243267073620121, "learning_rate": 7.870086538338054e-06, "loss": 0.5194, "step": 12050 }, { "epoch": 0.98, "grad_norm": 2.681849544295601, "learning_rate": 7.869727514314767e-06, "loss": 0.6116, "step": 12051 }, { "epoch": 0.98, "grad_norm": 3.8018800112959603, "learning_rate": 7.869368468225692e-06, "loss": 0.6738, "step": 12052 }, { "epoch": 0.98, "grad_norm": 59.764163924914094, "learning_rate": 7.869009400073583e-06, "loss": 0.6759, "step": 12053 }, { "epoch": 0.98, "grad_norm": 3.604168693360398, "learning_rate": 7.868650309861206e-06, "loss": 0.8029, "step": 12054 }, { "epoch": 0.98, "grad_norm": 3.73004260525144, "learning_rate": 7.86829119759132e-06, "loss": 0.702, "step": 12055 }, { "epoch": 0.98, "grad_norm": 2.85374198083674, "learning_rate": 7.867932063266685e-06, "loss": 0.729, "step": 12056 }, { "epoch": 0.98, "grad_norm": 2.7857704353130623, "learning_rate": 7.867572906890064e-06, "loss": 0.5993, "step": 12057 }, { "epoch": 0.98, "grad_norm": 3.052394242109492, "learning_rate": 7.867213728464219e-06, "loss": 0.6259, "step": 12058 }, { "epoch": 0.98, "grad_norm": 24.82425349092041, "learning_rate": 7.866854527991908e-06, "loss": 0.7488, "step": 12059 }, { "epoch": 0.98, "grad_norm": 4.809873699165875, "learning_rate": 7.866495305475898e-06, "loss": 0.6165, "step": 12060 }, { "epoch": 0.98, "grad_norm": 4.806074512909129, "learning_rate": 7.86613606091895e-06, "loss": 0.675, "step": 12061 }, { "epoch": 0.98, "grad_norm": 3.3655606508869096, "learning_rate": 7.865776794323823e-06, "loss": 0.6545, "step": 12062 }, { "epoch": 0.98, "grad_norm": 5.042723634600169, "learning_rate": 7.865417505693282e-06, "loss": 0.7312, "step": 12063 }, { "epoch": 0.98, "grad_norm": 4.463062454187894, "learning_rate": 7.86505819503009e-06, "loss": 0.6281, "step": 12064 }, { "epoch": 0.98, "grad_norm": 5.60971747600775, "learning_rate": 7.86469886233701e-06, "loss": 0.664, "step": 12065 }, { "epoch": 0.98, "grad_norm": 3.6456850600461785, "learning_rate": 7.864339507616803e-06, "loss": 0.6528, "step": 12066 }, { "epoch": 0.98, "grad_norm": 3.8403800076799977, "learning_rate": 7.863980130872235e-06, "loss": 0.6592, "step": 12067 }, { "epoch": 0.98, "grad_norm": 33.9275048898015, "learning_rate": 7.863620732106067e-06, "loss": 0.6925, "step": 12068 }, { "epoch": 0.98, "grad_norm": 3.7099841141422387, "learning_rate": 7.863261311321062e-06, "loss": 0.7796, "step": 12069 }, { "epoch": 0.98, "grad_norm": 3.7105492781550895, "learning_rate": 7.862901868519986e-06, "loss": 0.6531, "step": 12070 }, { "epoch": 0.98, "grad_norm": 7.727475759019637, "learning_rate": 7.862542403705599e-06, "loss": 0.6272, "step": 12071 }, { "epoch": 0.98, "grad_norm": 4.774757700743257, "learning_rate": 7.86218291688067e-06, "loss": 0.5291, "step": 12072 }, { "epoch": 0.98, "grad_norm": 5.217142021739866, "learning_rate": 7.861823408047959e-06, "loss": 0.8212, "step": 12073 }, { "epoch": 0.98, "grad_norm": 4.792473091687254, "learning_rate": 7.861463877210234e-06, "loss": 0.8523, "step": 12074 }, { "epoch": 0.98, "grad_norm": 6.217593114573845, "learning_rate": 7.861104324370255e-06, "loss": 0.6507, "step": 12075 }, { "epoch": 0.98, "grad_norm": 3.091993839071186, "learning_rate": 7.860744749530791e-06, "loss": 0.7532, "step": 12076 }, { "epoch": 0.98, "grad_norm": 4.016470200004109, "learning_rate": 7.860385152694603e-06, "loss": 0.5071, "step": 12077 }, { "epoch": 0.98, "grad_norm": 4.620649092064307, "learning_rate": 7.86002553386446e-06, "loss": 0.6416, "step": 12078 }, { "epoch": 0.98, "grad_norm": 3.554086480348715, "learning_rate": 7.859665893043124e-06, "loss": 0.6021, "step": 12079 }, { "epoch": 0.98, "grad_norm": 6.638508230438698, "learning_rate": 7.859306230233363e-06, "loss": 0.5603, "step": 12080 }, { "epoch": 0.98, "grad_norm": 7.202945507426494, "learning_rate": 7.858946545437938e-06, "loss": 0.7015, "step": 12081 }, { "epoch": 0.98, "grad_norm": 5.138652319020925, "learning_rate": 7.858586838659621e-06, "loss": 0.5554, "step": 12082 }, { "epoch": 0.98, "grad_norm": 3.8305891334146374, "learning_rate": 7.858227109901172e-06, "loss": 0.5977, "step": 12083 }, { "epoch": 0.98, "grad_norm": 4.003655404513807, "learning_rate": 7.85786735916536e-06, "loss": 0.7089, "step": 12084 }, { "epoch": 0.98, "grad_norm": 3.283468005369923, "learning_rate": 7.857507586454951e-06, "loss": 0.6506, "step": 12085 }, { "epoch": 0.98, "grad_norm": 5.957433420269763, "learning_rate": 7.85714779177271e-06, "loss": 0.7231, "step": 12086 }, { "epoch": 0.98, "grad_norm": 3.351845708326642, "learning_rate": 7.856787975121407e-06, "loss": 0.7154, "step": 12087 }, { "epoch": 0.98, "grad_norm": 6.995854611678724, "learning_rate": 7.856428136503804e-06, "loss": 0.6035, "step": 12088 }, { "epoch": 0.98, "grad_norm": 6.286548863116367, "learning_rate": 7.85606827592267e-06, "loss": 0.745, "step": 12089 }, { "epoch": 0.98, "grad_norm": 3.7995685039775933, "learning_rate": 7.855708393380775e-06, "loss": 0.6579, "step": 12090 }, { "epoch": 0.98, "grad_norm": 6.695566935500905, "learning_rate": 7.85534848888088e-06, "loss": 0.9223, "step": 12091 }, { "epoch": 0.98, "grad_norm": 5.747206759230759, "learning_rate": 7.854988562425758e-06, "loss": 0.7343, "step": 12092 }, { "epoch": 0.98, "grad_norm": 2.243430323428925, "learning_rate": 7.854628614018172e-06, "loss": 0.4484, "step": 12093 }, { "epoch": 0.98, "grad_norm": 3.1351353516418046, "learning_rate": 7.854268643660893e-06, "loss": 0.5022, "step": 12094 }, { "epoch": 0.98, "grad_norm": 3.596635838346699, "learning_rate": 7.853908651356688e-06, "loss": 0.6252, "step": 12095 }, { "epoch": 0.98, "grad_norm": 10.757898620532243, "learning_rate": 7.853548637108323e-06, "loss": 0.5191, "step": 12096 }, { "epoch": 0.98, "grad_norm": 5.8036339138670545, "learning_rate": 7.85318860091857e-06, "loss": 0.738, "step": 12097 }, { "epoch": 0.98, "grad_norm": 18.586551832690887, "learning_rate": 7.852828542790195e-06, "loss": 0.5293, "step": 12098 }, { "epoch": 0.98, "grad_norm": 5.1815913123352, "learning_rate": 7.852468462725966e-06, "loss": 0.5379, "step": 12099 }, { "epoch": 0.98, "grad_norm": 4.494366370070267, "learning_rate": 7.852108360728655e-06, "loss": 0.6193, "step": 12100 }, { "epoch": 0.98, "grad_norm": 5.130268160062507, "learning_rate": 7.851748236801026e-06, "loss": 0.5823, "step": 12101 }, { "epoch": 0.98, "grad_norm": 4.166718538744968, "learning_rate": 7.851388090945853e-06, "loss": 0.5748, "step": 12102 }, { "epoch": 0.98, "grad_norm": 3.818580947298578, "learning_rate": 7.851027923165899e-06, "loss": 0.7684, "step": 12103 }, { "epoch": 0.98, "grad_norm": 5.1699384974859885, "learning_rate": 7.850667733463941e-06, "loss": 0.5763, "step": 12104 }, { "epoch": 0.98, "grad_norm": 4.778738939474952, "learning_rate": 7.850307521842742e-06, "loss": 0.6607, "step": 12105 }, { "epoch": 0.98, "grad_norm": 3.675896105279259, "learning_rate": 7.849947288305075e-06, "loss": 0.6721, "step": 12106 }, { "epoch": 0.98, "grad_norm": 4.158405425690384, "learning_rate": 7.84958703285371e-06, "loss": 0.5552, "step": 12107 }, { "epoch": 0.98, "grad_norm": 3.049232752823057, "learning_rate": 7.849226755491417e-06, "loss": 0.6009, "step": 12108 }, { "epoch": 0.98, "grad_norm": 3.094498278259343, "learning_rate": 7.848866456220965e-06, "loss": 0.634, "step": 12109 }, { "epoch": 0.98, "grad_norm": 4.524825118523398, "learning_rate": 7.848506135045123e-06, "loss": 0.6453, "step": 12110 }, { "epoch": 0.98, "grad_norm": 3.9514437764359, "learning_rate": 7.848145791966668e-06, "loss": 0.6701, "step": 12111 }, { "epoch": 0.98, "grad_norm": 3.5648903724874574, "learning_rate": 7.847785426988364e-06, "loss": 0.4488, "step": 12112 }, { "epoch": 0.98, "grad_norm": 2.843288321049604, "learning_rate": 7.847425040112984e-06, "loss": 0.6156, "step": 12113 }, { "epoch": 0.98, "grad_norm": 4.4286040758995675, "learning_rate": 7.8470646313433e-06, "loss": 0.5989, "step": 12114 }, { "epoch": 0.98, "grad_norm": 3.9923110497977414, "learning_rate": 7.84670420068208e-06, "loss": 0.6909, "step": 12115 }, { "epoch": 0.98, "grad_norm": 3.10144542464045, "learning_rate": 7.846343748132102e-06, "loss": 0.7802, "step": 12116 }, { "epoch": 0.98, "grad_norm": 3.8305742267968874, "learning_rate": 7.845983273696131e-06, "loss": 0.7178, "step": 12117 }, { "epoch": 0.98, "grad_norm": 3.246697472986766, "learning_rate": 7.845622777376942e-06, "loss": 0.6418, "step": 12118 }, { "epoch": 0.98, "grad_norm": 2.9757394344174033, "learning_rate": 7.845262259177305e-06, "loss": 0.653, "step": 12119 }, { "epoch": 0.98, "grad_norm": 6.571458522104288, "learning_rate": 7.844901719099996e-06, "loss": 0.5585, "step": 12120 }, { "epoch": 0.98, "grad_norm": 3.233114897438143, "learning_rate": 7.844541157147781e-06, "loss": 0.64, "step": 12121 }, { "epoch": 0.98, "grad_norm": 3.3106215455909243, "learning_rate": 7.84418057332344e-06, "loss": 0.6281, "step": 12122 }, { "epoch": 0.98, "grad_norm": 4.061517380522789, "learning_rate": 7.843819967629737e-06, "loss": 0.7036, "step": 12123 }, { "epoch": 0.98, "grad_norm": 4.012100530517519, "learning_rate": 7.843459340069452e-06, "loss": 0.7446, "step": 12124 }, { "epoch": 0.98, "grad_norm": 7.17489777599761, "learning_rate": 7.843098690645355e-06, "loss": 0.5927, "step": 12125 }, { "epoch": 0.98, "grad_norm": 3.523158296552429, "learning_rate": 7.842738019360218e-06, "loss": 0.655, "step": 12126 }, { "epoch": 0.98, "grad_norm": 2.598426058363254, "learning_rate": 7.842377326216818e-06, "loss": 0.5008, "step": 12127 }, { "epoch": 0.99, "grad_norm": 4.198243621338775, "learning_rate": 7.842016611217924e-06, "loss": 0.5445, "step": 12128 }, { "epoch": 0.99, "grad_norm": 5.084293806153699, "learning_rate": 7.841655874366313e-06, "loss": 0.5282, "step": 12129 }, { "epoch": 0.99, "grad_norm": 3.7836362295918757, "learning_rate": 7.841295115664756e-06, "loss": 0.7669, "step": 12130 }, { "epoch": 0.99, "grad_norm": 3.6056221378375475, "learning_rate": 7.84093433511603e-06, "loss": 0.7224, "step": 12131 }, { "epoch": 0.99, "grad_norm": 5.598493650674104, "learning_rate": 7.840573532722905e-06, "loss": 0.6373, "step": 12132 }, { "epoch": 0.99, "grad_norm": 5.355764080741471, "learning_rate": 7.84021270848816e-06, "loss": 0.7114, "step": 12133 }, { "epoch": 0.99, "grad_norm": 6.220595285582811, "learning_rate": 7.839851862414566e-06, "loss": 0.6773, "step": 12134 }, { "epoch": 0.99, "grad_norm": 9.393262452020393, "learning_rate": 7.8394909945049e-06, "loss": 0.7257, "step": 12135 }, { "epoch": 0.99, "grad_norm": 3.282048592073847, "learning_rate": 7.839130104761932e-06, "loss": 0.5825, "step": 12136 }, { "epoch": 0.99, "grad_norm": 2.974934652597153, "learning_rate": 7.838769193188443e-06, "loss": 0.4903, "step": 12137 }, { "epoch": 0.99, "grad_norm": 4.491033237232289, "learning_rate": 7.838408259787205e-06, "loss": 0.7295, "step": 12138 }, { "epoch": 0.99, "grad_norm": 4.652569682851361, "learning_rate": 7.838047304560993e-06, "loss": 0.7767, "step": 12139 }, { "epoch": 0.99, "grad_norm": 5.575782123747067, "learning_rate": 7.837686327512585e-06, "loss": 0.7394, "step": 12140 }, { "epoch": 0.99, "grad_norm": 2.5846419254656623, "learning_rate": 7.837325328644754e-06, "loss": 0.538, "step": 12141 }, { "epoch": 0.99, "grad_norm": 3.9482960618292404, "learning_rate": 7.836964307960276e-06, "loss": 0.5625, "step": 12142 }, { "epoch": 0.99, "grad_norm": 5.316577658107811, "learning_rate": 7.836603265461929e-06, "loss": 0.6206, "step": 12143 }, { "epoch": 0.99, "grad_norm": 10.624688009719822, "learning_rate": 7.836242201152486e-06, "loss": 0.8618, "step": 12144 }, { "epoch": 0.99, "grad_norm": 4.931925501763064, "learning_rate": 7.835881115034725e-06, "loss": 0.5689, "step": 12145 }, { "epoch": 0.99, "grad_norm": 3.1920217374094872, "learning_rate": 7.835520007111424e-06, "loss": 0.5673, "step": 12146 }, { "epoch": 0.99, "grad_norm": 7.054961152554092, "learning_rate": 7.835158877385356e-06, "loss": 0.6172, "step": 12147 }, { "epoch": 0.99, "grad_norm": 2.824546434544536, "learning_rate": 7.8347977258593e-06, "loss": 0.6955, "step": 12148 }, { "epoch": 0.99, "grad_norm": 14.735700032706571, "learning_rate": 7.834436552536035e-06, "loss": 0.5175, "step": 12149 }, { "epoch": 0.99, "grad_norm": 8.832659947223265, "learning_rate": 7.834075357418334e-06, "loss": 0.5754, "step": 12150 }, { "epoch": 0.99, "grad_norm": 16.041721059089777, "learning_rate": 7.833714140508977e-06, "loss": 0.6403, "step": 12151 }, { "epoch": 0.99, "grad_norm": 4.787231194546511, "learning_rate": 7.83335290181074e-06, "loss": 0.6287, "step": 12152 }, { "epoch": 0.99, "grad_norm": 3.6284973769571454, "learning_rate": 7.832991641326401e-06, "loss": 0.6701, "step": 12153 }, { "epoch": 0.99, "grad_norm": 3.4158574058330275, "learning_rate": 7.832630359058739e-06, "loss": 0.7507, "step": 12154 }, { "epoch": 0.99, "grad_norm": 5.918548430325671, "learning_rate": 7.83226905501053e-06, "loss": 0.6509, "step": 12155 }, { "epoch": 0.99, "grad_norm": 4.353082124426365, "learning_rate": 7.831907729184553e-06, "loss": 0.7693, "step": 12156 }, { "epoch": 0.99, "grad_norm": 7.711173132583299, "learning_rate": 7.831546381583588e-06, "loss": 0.5678, "step": 12157 }, { "epoch": 0.99, "grad_norm": 3.136214583937381, "learning_rate": 7.83118501221041e-06, "loss": 0.6021, "step": 12158 }, { "epoch": 0.99, "grad_norm": 3.8386719091651615, "learning_rate": 7.8308236210678e-06, "loss": 0.6955, "step": 12159 }, { "epoch": 0.99, "grad_norm": 9.676051206522784, "learning_rate": 7.830462208158537e-06, "loss": 0.7687, "step": 12160 }, { "epoch": 0.99, "grad_norm": 3.3403014014492682, "learning_rate": 7.830100773485398e-06, "loss": 0.814, "step": 12161 }, { "epoch": 0.99, "grad_norm": 4.825282859915182, "learning_rate": 7.829739317051163e-06, "loss": 0.6187, "step": 12162 }, { "epoch": 0.99, "grad_norm": 3.6935016475696996, "learning_rate": 7.829377838858614e-06, "loss": 0.7142, "step": 12163 }, { "epoch": 0.99, "grad_norm": 44.18085326653864, "learning_rate": 7.829016338910526e-06, "loss": 0.6578, "step": 12164 }, { "epoch": 0.99, "grad_norm": 3.3725435330629177, "learning_rate": 7.828654817209682e-06, "loss": 0.649, "step": 12165 }, { "epoch": 0.99, "grad_norm": 5.371158673500937, "learning_rate": 7.82829327375886e-06, "loss": 0.6118, "step": 12166 }, { "epoch": 0.99, "grad_norm": 3.8597236504292964, "learning_rate": 7.827931708560841e-06, "loss": 0.7847, "step": 12167 }, { "epoch": 0.99, "grad_norm": 4.140979186240384, "learning_rate": 7.827570121618404e-06, "loss": 0.6216, "step": 12168 }, { "epoch": 0.99, "grad_norm": 5.741194175582843, "learning_rate": 7.82720851293433e-06, "loss": 0.5707, "step": 12169 }, { "epoch": 0.99, "grad_norm": 3.997725445356984, "learning_rate": 7.8268468825114e-06, "loss": 0.7038, "step": 12170 }, { "epoch": 0.99, "grad_norm": 4.2115428708702884, "learning_rate": 7.826485230352395e-06, "loss": 0.5636, "step": 12171 }, { "epoch": 0.99, "grad_norm": 7.211196934784885, "learning_rate": 7.826123556460093e-06, "loss": 0.5492, "step": 12172 }, { "epoch": 0.99, "grad_norm": 4.8228237443080815, "learning_rate": 7.825761860837276e-06, "loss": 0.7031, "step": 12173 }, { "epoch": 0.99, "grad_norm": 5.97291479259059, "learning_rate": 7.825400143486727e-06, "loss": 0.6916, "step": 12174 }, { "epoch": 0.99, "grad_norm": 7.07356665638762, "learning_rate": 7.825038404411226e-06, "loss": 0.6724, "step": 12175 }, { "epoch": 0.99, "grad_norm": 3.2675880360296268, "learning_rate": 7.824676643613556e-06, "loss": 0.6483, "step": 12176 }, { "epoch": 0.99, "grad_norm": 4.5353829941079935, "learning_rate": 7.824314861096495e-06, "loss": 0.4635, "step": 12177 }, { "epoch": 0.99, "grad_norm": 2.846249778732734, "learning_rate": 7.82395305686283e-06, "loss": 0.6092, "step": 12178 }, { "epoch": 0.99, "grad_norm": 3.5522368199597723, "learning_rate": 7.82359123091534e-06, "loss": 0.5557, "step": 12179 }, { "epoch": 0.99, "grad_norm": 10.855329604628054, "learning_rate": 7.823229383256805e-06, "loss": 0.7271, "step": 12180 }, { "epoch": 0.99, "grad_norm": 6.264178411998487, "learning_rate": 7.822867513890011e-06, "loss": 0.7451, "step": 12181 }, { "epoch": 0.99, "grad_norm": 4.078196816084956, "learning_rate": 7.82250562281774e-06, "loss": 0.5085, "step": 12182 }, { "epoch": 0.99, "grad_norm": 7.354194632814517, "learning_rate": 7.822143710042771e-06, "loss": 0.5789, "step": 12183 }, { "epoch": 0.99, "grad_norm": 8.609939176473407, "learning_rate": 7.821781775567891e-06, "loss": 0.7198, "step": 12184 }, { "epoch": 0.99, "grad_norm": 5.886965359507057, "learning_rate": 7.821419819395881e-06, "loss": 0.5406, "step": 12185 }, { "epoch": 0.99, "grad_norm": 4.631325517278753, "learning_rate": 7.821057841529525e-06, "loss": 0.7126, "step": 12186 }, { "epoch": 0.99, "grad_norm": 3.179115714905711, "learning_rate": 7.820695841971606e-06, "loss": 0.7723, "step": 12187 }, { "epoch": 0.99, "grad_norm": 3.0981813321885125, "learning_rate": 7.820333820724908e-06, "loss": 0.7101, "step": 12188 }, { "epoch": 0.99, "grad_norm": 7.9788668476993045, "learning_rate": 7.819971777792212e-06, "loss": 0.689, "step": 12189 }, { "epoch": 0.99, "grad_norm": 2.9278301025900983, "learning_rate": 7.819609713176305e-06, "loss": 0.6265, "step": 12190 }, { "epoch": 0.99, "grad_norm": 4.1349316035422525, "learning_rate": 7.819247626879972e-06, "loss": 0.6366, "step": 12191 }, { "epoch": 0.99, "grad_norm": 3.496437332582988, "learning_rate": 7.818885518905992e-06, "loss": 0.6069, "step": 12192 }, { "epoch": 0.99, "grad_norm": 3.9195632965385196, "learning_rate": 7.818523389257151e-06, "loss": 0.7834, "step": 12193 }, { "epoch": 0.99, "grad_norm": 37.19072104576985, "learning_rate": 7.818161237936238e-06, "loss": 0.7115, "step": 12194 }, { "epoch": 0.99, "grad_norm": 3.685572604659147, "learning_rate": 7.817799064946033e-06, "loss": 0.6284, "step": 12195 }, { "epoch": 0.99, "grad_norm": 4.119569996234796, "learning_rate": 7.817436870289324e-06, "loss": 0.6561, "step": 12196 }, { "epoch": 0.99, "grad_norm": 3.3272859247826427, "learning_rate": 7.817074653968891e-06, "loss": 0.6308, "step": 12197 }, { "epoch": 0.99, "grad_norm": 3.5846458993900034, "learning_rate": 7.816712415987523e-06, "loss": 0.6964, "step": 12198 }, { "epoch": 0.99, "grad_norm": 6.74395545842818, "learning_rate": 7.816350156348006e-06, "loss": 0.6677, "step": 12199 }, { "epoch": 0.99, "grad_norm": 3.3677456728084936, "learning_rate": 7.815987875053123e-06, "loss": 0.6797, "step": 12200 }, { "epoch": 0.99, "grad_norm": 3.2920651685559967, "learning_rate": 7.81562557210566e-06, "loss": 0.6911, "step": 12201 }, { "epoch": 0.99, "grad_norm": 3.0304128828059613, "learning_rate": 7.815263247508406e-06, "loss": 0.6402, "step": 12202 }, { "epoch": 0.99, "grad_norm": 5.460447163868425, "learning_rate": 7.814900901264142e-06, "loss": 0.7705, "step": 12203 }, { "epoch": 0.99, "grad_norm": 9.911453053210254, "learning_rate": 7.814538533375658e-06, "loss": 0.6264, "step": 12204 }, { "epoch": 0.99, "grad_norm": 3.5953934074809766, "learning_rate": 7.814176143845737e-06, "loss": 0.6217, "step": 12205 }, { "epoch": 0.99, "grad_norm": 3.3373822463462273, "learning_rate": 7.81381373267717e-06, "loss": 0.5611, "step": 12206 }, { "epoch": 0.99, "grad_norm": 31.8902307086028, "learning_rate": 7.81345129987274e-06, "loss": 0.6458, "step": 12207 }, { "epoch": 0.99, "grad_norm": 5.098533876643183, "learning_rate": 7.813088845435235e-06, "loss": 0.735, "step": 12208 }, { "epoch": 0.99, "grad_norm": 4.278928865920938, "learning_rate": 7.812726369367441e-06, "loss": 0.6014, "step": 12209 }, { "epoch": 0.99, "grad_norm": 3.4908477693681808, "learning_rate": 7.812363871672147e-06, "loss": 0.6013, "step": 12210 }, { "epoch": 0.99, "grad_norm": 4.549320650953422, "learning_rate": 7.812001352352138e-06, "loss": 0.7671, "step": 12211 }, { "epoch": 0.99, "grad_norm": 5.2217724415360385, "learning_rate": 7.811638811410203e-06, "loss": 0.6527, "step": 12212 }, { "epoch": 0.99, "grad_norm": 8.963712999094353, "learning_rate": 7.811276248849129e-06, "loss": 0.6332, "step": 12213 }, { "epoch": 0.99, "grad_norm": 5.886074458142891, "learning_rate": 7.810913664671706e-06, "loss": 0.665, "step": 12214 }, { "epoch": 0.99, "grad_norm": 6.471175164766711, "learning_rate": 7.810551058880718e-06, "loss": 0.7012, "step": 12215 }, { "epoch": 0.99, "grad_norm": 2.7793148174480793, "learning_rate": 7.810188431478955e-06, "loss": 0.6479, "step": 12216 }, { "epoch": 0.99, "grad_norm": 3.7220735311819544, "learning_rate": 7.809825782469207e-06, "loss": 0.7588, "step": 12217 }, { "epoch": 0.99, "grad_norm": 2.8507931515297433, "learning_rate": 7.80946311185426e-06, "loss": 0.4817, "step": 12218 }, { "epoch": 0.99, "grad_norm": 3.0043089701166386, "learning_rate": 7.809100419636906e-06, "loss": 0.5407, "step": 12219 }, { "epoch": 0.99, "grad_norm": 8.795581797951318, "learning_rate": 7.808737705819929e-06, "loss": 0.6904, "step": 12220 }, { "epoch": 0.99, "grad_norm": 3.492431613558116, "learning_rate": 7.80837497040612e-06, "loss": 0.6938, "step": 12221 }, { "epoch": 0.99, "grad_norm": 3.6165768628534427, "learning_rate": 7.80801221339827e-06, "loss": 0.744, "step": 12222 }, { "epoch": 0.99, "grad_norm": 3.7602955462066348, "learning_rate": 7.807649434799168e-06, "loss": 0.7081, "step": 12223 }, { "epoch": 0.99, "grad_norm": 2.5990608601790273, "learning_rate": 7.8072866346116e-06, "loss": 0.7011, "step": 12224 }, { "epoch": 0.99, "grad_norm": 3.017271375863101, "learning_rate": 7.806923812838357e-06, "loss": 0.6235, "step": 12225 }, { "epoch": 0.99, "grad_norm": 4.1901361269949, "learning_rate": 7.806560969482232e-06, "loss": 0.7299, "step": 12226 }, { "epoch": 0.99, "grad_norm": 3.566871661270214, "learning_rate": 7.806198104546012e-06, "loss": 0.7182, "step": 12227 }, { "epoch": 0.99, "grad_norm": 5.002970092188824, "learning_rate": 7.805835218032487e-06, "loss": 0.4558, "step": 12228 }, { "epoch": 0.99, "grad_norm": 3.9166014475720896, "learning_rate": 7.80547230994445e-06, "loss": 0.6167, "step": 12229 }, { "epoch": 0.99, "grad_norm": 3.264870537355649, "learning_rate": 7.805109380284688e-06, "loss": 0.5233, "step": 12230 }, { "epoch": 0.99, "grad_norm": 3.6615798534304567, "learning_rate": 7.804746429055994e-06, "loss": 0.5422, "step": 12231 }, { "epoch": 0.99, "grad_norm": 3.717846446727007, "learning_rate": 7.804383456261156e-06, "loss": 0.8171, "step": 12232 }, { "epoch": 0.99, "grad_norm": 3.5926100277554824, "learning_rate": 7.804020461902968e-06, "loss": 0.6136, "step": 12233 }, { "epoch": 0.99, "grad_norm": 3.9472504729452473, "learning_rate": 7.803657445984221e-06, "loss": 0.7147, "step": 12234 }, { "epoch": 0.99, "grad_norm": 4.703175604467212, "learning_rate": 7.803294408507704e-06, "loss": 0.7633, "step": 12235 }, { "epoch": 0.99, "grad_norm": 18.94742197454528, "learning_rate": 7.80293134947621e-06, "loss": 0.6135, "step": 12236 }, { "epoch": 0.99, "grad_norm": 10.956400725383462, "learning_rate": 7.802568268892531e-06, "loss": 0.6734, "step": 12237 }, { "epoch": 0.99, "grad_norm": 5.307158523155442, "learning_rate": 7.802205166759457e-06, "loss": 0.6009, "step": 12238 }, { "epoch": 0.99, "grad_norm": 2.2162234448504177, "learning_rate": 7.801842043079784e-06, "loss": 0.5927, "step": 12239 }, { "epoch": 0.99, "grad_norm": 7.164349261371752, "learning_rate": 7.801478897856298e-06, "loss": 0.6616, "step": 12240 }, { "epoch": 0.99, "grad_norm": 3.9214110772691244, "learning_rate": 7.801115731091797e-06, "loss": 0.6727, "step": 12241 }, { "epoch": 0.99, "grad_norm": 3.4363097490008188, "learning_rate": 7.80075254278907e-06, "loss": 0.6967, "step": 12242 }, { "epoch": 0.99, "grad_norm": 3.2572995279125725, "learning_rate": 7.80038933295091e-06, "loss": 0.7039, "step": 12243 }, { "epoch": 0.99, "grad_norm": 6.978419475256046, "learning_rate": 7.80002610158011e-06, "loss": 0.6751, "step": 12244 }, { "epoch": 0.99, "grad_norm": 4.165558393552936, "learning_rate": 7.799662848679464e-06, "loss": 0.7314, "step": 12245 }, { "epoch": 0.99, "grad_norm": 4.629007274193252, "learning_rate": 7.799299574251766e-06, "loss": 0.5777, "step": 12246 }, { "epoch": 0.99, "grad_norm": 5.105145079313559, "learning_rate": 7.798936278299804e-06, "loss": 0.617, "step": 12247 }, { "epoch": 0.99, "grad_norm": 7.9822088811219745, "learning_rate": 7.798572960826378e-06, "loss": 0.6453, "step": 12248 }, { "epoch": 0.99, "grad_norm": 6.709508600257386, "learning_rate": 7.798209621834279e-06, "loss": 0.6615, "step": 12249 }, { "epoch": 0.99, "grad_norm": 3.4662543990604107, "learning_rate": 7.7978462613263e-06, "loss": 0.695, "step": 12250 }, { "epoch": 1.0, "grad_norm": 3.778282186841342, "learning_rate": 7.797482879305233e-06, "loss": 0.6307, "step": 12251 }, { "epoch": 1.0, "grad_norm": 3.7972152053114674, "learning_rate": 7.797119475773877e-06, "loss": 0.8989, "step": 12252 }, { "epoch": 1.0, "grad_norm": 12.109735931431942, "learning_rate": 7.796756050735023e-06, "loss": 0.6249, "step": 12253 }, { "epoch": 1.0, "grad_norm": 4.064435333074841, "learning_rate": 7.796392604191468e-06, "loss": 0.8667, "step": 12254 }, { "epoch": 1.0, "grad_norm": 4.611726736971056, "learning_rate": 7.796029136146003e-06, "loss": 0.6571, "step": 12255 }, { "epoch": 1.0, "grad_norm": 14.420739561213098, "learning_rate": 7.795665646601425e-06, "loss": 0.5519, "step": 12256 }, { "epoch": 1.0, "grad_norm": 8.158273952733538, "learning_rate": 7.795302135560527e-06, "loss": 0.6887, "step": 12257 }, { "epoch": 1.0, "grad_norm": 3.271767687201891, "learning_rate": 7.794938603026107e-06, "loss": 0.6402, "step": 12258 }, { "epoch": 1.0, "grad_norm": 4.771347499546004, "learning_rate": 7.794575049000961e-06, "loss": 0.7685, "step": 12259 }, { "epoch": 1.0, "grad_norm": 11.082559439950336, "learning_rate": 7.79421147348788e-06, "loss": 0.7517, "step": 12260 }, { "epoch": 1.0, "grad_norm": 3.1195492010686197, "learning_rate": 7.793847876489662e-06, "loss": 0.6749, "step": 12261 }, { "epoch": 1.0, "grad_norm": 3.747118224137669, "learning_rate": 7.793484258009103e-06, "loss": 0.6963, "step": 12262 }, { "epoch": 1.0, "grad_norm": 4.260413779070824, "learning_rate": 7.793120618048997e-06, "loss": 0.7857, "step": 12263 }, { "epoch": 1.0, "grad_norm": 5.865520868853068, "learning_rate": 7.792756956612143e-06, "loss": 0.7947, "step": 12264 }, { "epoch": 1.0, "grad_norm": 4.8332355000111376, "learning_rate": 7.792393273701337e-06, "loss": 0.5821, "step": 12265 }, { "epoch": 1.0, "grad_norm": 25.033079898992558, "learning_rate": 7.792029569319374e-06, "loss": 0.7391, "step": 12266 }, { "epoch": 1.0, "grad_norm": 6.894461244006182, "learning_rate": 7.791665843469049e-06, "loss": 0.6408, "step": 12267 }, { "epoch": 1.0, "grad_norm": 5.434250883738288, "learning_rate": 7.791302096153162e-06, "loss": 0.8545, "step": 12268 }, { "epoch": 1.0, "grad_norm": 35.93544476333542, "learning_rate": 7.790938327374508e-06, "loss": 0.6269, "step": 12269 }, { "epoch": 1.0, "grad_norm": 13.647866478969739, "learning_rate": 7.790574537135886e-06, "loss": 0.721, "step": 12270 }, { "epoch": 1.0, "grad_norm": 3.4969679179110615, "learning_rate": 7.790210725440091e-06, "loss": 0.637, "step": 12271 }, { "epoch": 1.0, "grad_norm": 3.018765412240935, "learning_rate": 7.789846892289921e-06, "loss": 0.7249, "step": 12272 }, { "epoch": 1.0, "grad_norm": 3.9287572774896864, "learning_rate": 7.789483037688174e-06, "loss": 0.7201, "step": 12273 }, { "epoch": 1.0, "grad_norm": 2.475719742855069, "learning_rate": 7.789119161637649e-06, "loss": 0.6552, "step": 12274 }, { "epoch": 1.0, "grad_norm": 4.689685354833785, "learning_rate": 7.78875526414114e-06, "loss": 0.5868, "step": 12275 }, { "epoch": 1.0, "grad_norm": 3.9910993335853244, "learning_rate": 7.788391345201449e-06, "loss": 0.4659, "step": 12276 }, { "epoch": 1.0, "grad_norm": 4.0144784681909025, "learning_rate": 7.788027404821375e-06, "loss": 0.8007, "step": 12277 }, { "epoch": 1.0, "grad_norm": 2.538671321675504, "learning_rate": 7.78766344300371e-06, "loss": 0.4756, "step": 12278 }, { "epoch": 1.0, "grad_norm": 7.304400678411963, "learning_rate": 7.78729945975126e-06, "loss": 0.6857, "step": 12279 }, { "epoch": 1.0, "grad_norm": 3.226066453952374, "learning_rate": 7.786935455066817e-06, "loss": 0.5848, "step": 12280 }, { "epoch": 1.0, "grad_norm": 5.534743052487797, "learning_rate": 7.786571428953187e-06, "loss": 0.7197, "step": 12281 }, { "epoch": 1.0, "grad_norm": 3.8865146800842147, "learning_rate": 7.786207381413164e-06, "loss": 0.5744, "step": 12282 }, { "epoch": 1.0, "grad_norm": 5.95261662451427, "learning_rate": 7.785843312449548e-06, "loss": 0.6293, "step": 12283 }, { "epoch": 1.0, "grad_norm": 3.195883462505136, "learning_rate": 7.78547922206514e-06, "loss": 0.6475, "step": 12284 }, { "epoch": 1.0, "grad_norm": 3.75891372454614, "learning_rate": 7.785115110262738e-06, "loss": 0.7816, "step": 12285 }, { "epoch": 1.0, "grad_norm": 3.556380028332088, "learning_rate": 7.784750977045143e-06, "loss": 0.7241, "step": 12286 }, { "epoch": 1.0, "grad_norm": 3.8345500910780563, "learning_rate": 7.784386822415152e-06, "loss": 0.7368, "step": 12287 }, { "epoch": 1.0, "grad_norm": 4.313241620150329, "learning_rate": 7.784022646375569e-06, "loss": 0.5734, "step": 12288 }, { "epoch": 1.0, "grad_norm": 4.228884839750384, "learning_rate": 7.783658448929193e-06, "loss": 0.4883, "step": 12289 }, { "epoch": 1.0, "grad_norm": 4.661439332288415, "learning_rate": 7.783294230078823e-06, "loss": 0.6979, "step": 12290 }, { "epoch": 1.0, "grad_norm": 8.249220768611895, "learning_rate": 7.78292998982726e-06, "loss": 0.6955, "step": 12291 }, { "epoch": 1.0, "grad_norm": 3.1187762738556417, "learning_rate": 7.782565728177304e-06, "loss": 0.5674, "step": 12292 }, { "epoch": 1.0, "grad_norm": 8.556128914340617, "learning_rate": 7.782201445131761e-06, "loss": 0.677, "step": 12293 }, { "epoch": 1.0, "grad_norm": 30.013634363000687, "learning_rate": 7.781837140693425e-06, "loss": 0.6098, "step": 12294 }, { "epoch": 1.0, "grad_norm": 3.6626451992633204, "learning_rate": 7.781472814865099e-06, "loss": 0.7141, "step": 12295 }, { "epoch": 1.0, "grad_norm": 4.189813590714736, "learning_rate": 7.781108467649588e-06, "loss": 0.5932, "step": 12296 }, { "epoch": 1.0, "grad_norm": 7.4752300479225315, "learning_rate": 7.780744099049689e-06, "loss": 0.6724, "step": 12297 }, { "epoch": 1.0, "grad_norm": 3.3388877413636706, "learning_rate": 7.780379709068206e-06, "loss": 0.7507, "step": 12298 }, { "epoch": 1.0, "grad_norm": 2.6990917016073555, "learning_rate": 7.780015297707942e-06, "loss": 0.7497, "step": 12299 }, { "epoch": 1.0, "grad_norm": 2.8370745145690477, "learning_rate": 7.779650864971695e-06, "loss": 0.4812, "step": 12300 }, { "epoch": 1.0, "grad_norm": 4.1805838085100095, "learning_rate": 7.779286410862273e-06, "loss": 0.5362, "step": 12301 }, { "epoch": 1.0, "grad_norm": 3.269065170056007, "learning_rate": 7.778921935382473e-06, "loss": 0.7703, "step": 12302 }, { "epoch": 1.0, "grad_norm": 5.339250280794474, "learning_rate": 7.778557438535099e-06, "loss": 0.6341, "step": 12303 }, { "epoch": 1.0, "grad_norm": 2.9216049089902607, "learning_rate": 7.778192920322955e-06, "loss": 0.6253, "step": 12304 }, { "epoch": 1.0, "grad_norm": 20.6498819775808, "learning_rate": 7.777828380748844e-06, "loss": 0.7632, "step": 12305 }, { "epoch": 1.0, "grad_norm": 4.740886465965859, "learning_rate": 7.777463819815568e-06, "loss": 0.6718, "step": 12306 }, { "epoch": 1.0, "grad_norm": 5.251076687239021, "learning_rate": 7.777099237525929e-06, "loss": 0.5973, "step": 12307 }, { "epoch": 1.0, "grad_norm": 3.4352323146561954, "learning_rate": 7.776734633882731e-06, "loss": 0.6901, "step": 12308 }, { "epoch": 1.0, "grad_norm": 2.980752899021085, "learning_rate": 7.776370008888781e-06, "loss": 0.6319, "step": 12309 }, { "epoch": 1.0, "grad_norm": 3.401668790743825, "learning_rate": 7.77600536254688e-06, "loss": 0.606, "step": 12310 }, { "epoch": 1.0, "grad_norm": 2.7812082880721687, "learning_rate": 7.77564069485983e-06, "loss": 0.5914, "step": 12311 }, { "epoch": 1.0, "grad_norm": 8.209408090889218, "learning_rate": 7.775276005830434e-06, "loss": 0.8583, "step": 12312 }, { "epoch": 1.0, "grad_norm": 3.185361682188218, "learning_rate": 7.774911295461503e-06, "loss": 0.6666, "step": 12313 }, { "epoch": 1.0, "grad_norm": 3.498702307938696, "learning_rate": 7.774546563755833e-06, "loss": 0.484, "step": 12314 }, { "epoch": 1.0, "grad_norm": 5.472964213982246, "learning_rate": 7.774181810716236e-06, "loss": 0.5913, "step": 12315 }, { "epoch": 1.0, "grad_norm": 2.833738539254753, "learning_rate": 7.773817036345513e-06, "loss": 0.6254, "step": 12316 }, { "epoch": 1.0, "grad_norm": 7.236101394034412, "learning_rate": 7.773452240646466e-06, "loss": 0.6619, "step": 12317 }, { "epoch": 1.0, "grad_norm": 4.9409095194088675, "learning_rate": 7.773087423621905e-06, "loss": 0.6499, "step": 12318 }, { "epoch": 1.0, "grad_norm": 5.177145996419497, "learning_rate": 7.772722585274633e-06, "loss": 0.7187, "step": 12319 }, { "epoch": 1.0, "grad_norm": 5.385487705384896, "learning_rate": 7.772357725607455e-06, "loss": 0.5783, "step": 12320 }, { "epoch": 1.0, "grad_norm": 3.986181230282297, "learning_rate": 7.771992844623177e-06, "loss": 0.6453, "step": 12321 }, { "epoch": 1.0, "grad_norm": 4.369590576159136, "learning_rate": 7.771627942324605e-06, "loss": 0.6456, "step": 12322 }, { "epoch": 1.0, "grad_norm": 6.991346849888702, "learning_rate": 7.771263018714544e-06, "loss": 0.4831, "step": 12323 }, { "epoch": 1.0, "grad_norm": 33.44804825094083, "learning_rate": 7.7708980737958e-06, "loss": 0.6116, "step": 12324 }, { "epoch": 1.0, "grad_norm": 3.0716189454435665, "learning_rate": 7.77053310757118e-06, "loss": 0.5712, "step": 12325 }, { "epoch": 1.0, "grad_norm": 4.089502726707574, "learning_rate": 7.77016812004349e-06, "loss": 0.6704, "step": 12326 }, { "epoch": 1.0, "grad_norm": 4.657682099783619, "learning_rate": 7.769803111215534e-06, "loss": 0.4638, "step": 12327 }, { "epoch": 1.0, "grad_norm": 4.723065857048151, "learning_rate": 7.769438081090121e-06, "loss": 0.597, "step": 12328 }, { "epoch": 1.0, "grad_norm": 6.691568424305026, "learning_rate": 7.76907302967006e-06, "loss": 0.8181, "step": 12329 }, { "epoch": 1.0, "grad_norm": 3.0383961866918083, "learning_rate": 7.768707956958154e-06, "loss": 0.6252, "step": 12330 }, { "epoch": 1.0, "grad_norm": 3.191519093750927, "learning_rate": 7.76834286295721e-06, "loss": 0.7377, "step": 12331 }, { "epoch": 1.0, "grad_norm": 4.050261980539512, "learning_rate": 7.76797774767004e-06, "loss": 0.6642, "step": 12332 }, { "epoch": 1.0, "grad_norm": 16.666896987375384, "learning_rate": 7.767612611099444e-06, "loss": 0.7379, "step": 12333 }, { "epoch": 1.0, "grad_norm": 4.610156634550446, "learning_rate": 7.767247453248237e-06, "loss": 0.7204, "step": 12334 }, { "epoch": 1.0, "grad_norm": 3.0943573096149506, "learning_rate": 7.766882274119222e-06, "loss": 0.5846, "step": 12335 }, { "epoch": 1.0, "grad_norm": 3.548686559472081, "learning_rate": 7.766517073715208e-06, "loss": 0.7203, "step": 12336 }, { "epoch": 1.0, "grad_norm": 3.4692453031314496, "learning_rate": 7.766151852039006e-06, "loss": 0.5866, "step": 12337 }, { "epoch": 1.0, "grad_norm": 3.4946597367643375, "learning_rate": 7.76578660909342e-06, "loss": 0.5318, "step": 12338 }, { "epoch": 1.0, "grad_norm": 4.370031047749948, "learning_rate": 7.765421344881261e-06, "loss": 0.6678, "step": 12339 }, { "epoch": 1.0, "grad_norm": 3.71542157403533, "learning_rate": 7.765056059405335e-06, "loss": 0.6277, "step": 12340 }, { "epoch": 1.0, "grad_norm": 5.94370950967478, "learning_rate": 7.764690752668454e-06, "loss": 0.6381, "step": 12341 }, { "epoch": 1.0, "grad_norm": 4.911420117400132, "learning_rate": 7.764325424673425e-06, "loss": 0.4531, "step": 12342 }, { "epoch": 1.0, "grad_norm": 4.102001959781842, "learning_rate": 7.763960075423059e-06, "loss": 0.6942, "step": 12343 }, { "epoch": 1.0, "grad_norm": 2.5015750285894587, "learning_rate": 7.763594704920161e-06, "loss": 0.5658, "step": 12344 }, { "epoch": 1.0, "grad_norm": 2.953171671566058, "learning_rate": 7.763229313167547e-06, "loss": 0.5321, "step": 12345 }, { "epoch": 1.0, "grad_norm": 7.127827544512035, "learning_rate": 7.762863900168019e-06, "loss": 0.7065, "step": 12346 }, { "epoch": 1.0, "grad_norm": 3.3827313382861, "learning_rate": 7.762498465924391e-06, "loss": 0.7649, "step": 12347 }, { "epoch": 1.0, "grad_norm": 6.547389372714532, "learning_rate": 7.762133010439474e-06, "loss": 0.5579, "step": 12348 }, { "epoch": 1.0, "grad_norm": 3.98740693862126, "learning_rate": 7.761767533716076e-06, "loss": 0.6622, "step": 12349 }, { "epoch": 1.0, "grad_norm": 2.9972146281650947, "learning_rate": 7.761402035757007e-06, "loss": 0.7119, "step": 12350 }, { "epoch": 1.0, "grad_norm": 6.761449708977114, "learning_rate": 7.761036516565077e-06, "loss": 0.5546, "step": 12351 }, { "epoch": 1.0, "grad_norm": 5.014182086625072, "learning_rate": 7.760670976143098e-06, "loss": 0.7446, "step": 12352 }, { "epoch": 1.0, "grad_norm": 5.390113396572107, "learning_rate": 7.76030541449388e-06, "loss": 0.7511, "step": 12353 }, { "epoch": 1.0, "grad_norm": 4.489083824453948, "learning_rate": 7.759939831620234e-06, "loss": 0.5598, "step": 12354 }, { "epoch": 1.0, "grad_norm": 5.4785876621263325, "learning_rate": 7.75957422752497e-06, "loss": 0.764, "step": 12355 }, { "epoch": 1.0, "grad_norm": 3.301134677945862, "learning_rate": 7.759208602210903e-06, "loss": 0.621, "step": 12356 }, { "epoch": 1.0, "grad_norm": 5.137221195664433, "learning_rate": 7.758842955680841e-06, "loss": 0.6051, "step": 12357 }, { "epoch": 1.0, "grad_norm": 7.161915928381801, "learning_rate": 7.758477287937594e-06, "loss": 0.5628, "step": 12358 }, { "epoch": 1.0, "grad_norm": 14.221602585639925, "learning_rate": 7.758111598983978e-06, "loss": 0.5629, "step": 12359 }, { "epoch": 1.0, "grad_norm": 10.61359663720406, "learning_rate": 7.7577458888228e-06, "loss": 0.7102, "step": 12360 }, { "epoch": 1.0, "grad_norm": 19.547655154438917, "learning_rate": 7.757380157456876e-06, "loss": 0.607, "step": 12361 }, { "epoch": 1.0, "grad_norm": 6.079260282167976, "learning_rate": 7.757014404889017e-06, "loss": 0.5267, "step": 12362 }, { "epoch": 1.0, "grad_norm": 12.455611052289733, "learning_rate": 7.756648631122034e-06, "loss": 0.7443, "step": 12363 }, { "epoch": 1.0, "grad_norm": 2.995062351657669, "learning_rate": 7.756282836158743e-06, "loss": 0.5046, "step": 12364 }, { "epoch": 1.0, "grad_norm": 3.9298575264214537, "learning_rate": 7.755917020001952e-06, "loss": 0.6369, "step": 12365 }, { "epoch": 1.0, "grad_norm": 2.671893253903679, "learning_rate": 7.755551182654478e-06, "loss": 0.6545, "step": 12366 }, { "epoch": 1.0, "grad_norm": 2.8006294274097616, "learning_rate": 7.75518532411913e-06, "loss": 0.7098, "step": 12367 }, { "epoch": 1.0, "grad_norm": 9.810284457455396, "learning_rate": 7.754819444398725e-06, "loss": 0.6724, "step": 12368 }, { "epoch": 1.0, "grad_norm": 3.6523321395386037, "learning_rate": 7.754453543496071e-06, "loss": 0.64, "step": 12369 }, { "epoch": 1.0, "grad_norm": 2.3938093764643162, "learning_rate": 7.754087621413989e-06, "loss": 0.6504, "step": 12370 }, { "epoch": 1.0, "grad_norm": 4.151078216143737, "learning_rate": 7.753721678155287e-06, "loss": 0.8103, "step": 12371 }, { "epoch": 1.0, "grad_norm": 7.634980881799524, "learning_rate": 7.75335571372278e-06, "loss": 0.7608, "step": 12372 }, { "epoch": 1.0, "grad_norm": 17.874650859569957, "learning_rate": 7.752989728119283e-06, "loss": 0.6029, "step": 12373 }, { "epoch": 1.01, "grad_norm": 3.8044805748291934, "learning_rate": 7.752623721347609e-06, "loss": 0.5672, "step": 12374 }, { "epoch": 1.01, "grad_norm": 3.4340875179578347, "learning_rate": 7.752257693410574e-06, "loss": 0.544, "step": 12375 }, { "epoch": 1.01, "grad_norm": 4.24443514873277, "learning_rate": 7.75189164431099e-06, "loss": 0.5404, "step": 12376 }, { "epoch": 1.01, "grad_norm": 4.998389835300329, "learning_rate": 7.751525574051672e-06, "loss": 0.5918, "step": 12377 }, { "epoch": 1.01, "grad_norm": 4.390147408671915, "learning_rate": 7.751159482635437e-06, "loss": 0.5597, "step": 12378 }, { "epoch": 1.01, "grad_norm": 3.5628917431910634, "learning_rate": 7.750793370065098e-06, "loss": 0.5366, "step": 12379 }, { "epoch": 1.01, "grad_norm": 4.95645296554164, "learning_rate": 7.750427236343471e-06, "loss": 0.6, "step": 12380 }, { "epoch": 1.01, "grad_norm": 4.259826541221953, "learning_rate": 7.75006108147337e-06, "loss": 0.6176, "step": 12381 }, { "epoch": 1.01, "grad_norm": 8.25881958656223, "learning_rate": 7.749694905457612e-06, "loss": 0.6598, "step": 12382 }, { "epoch": 1.01, "grad_norm": 5.012678935908401, "learning_rate": 7.749328708299012e-06, "loss": 0.6759, "step": 12383 }, { "epoch": 1.01, "grad_norm": 2.9763067338894955, "learning_rate": 7.748962490000385e-06, "loss": 0.6029, "step": 12384 }, { "epoch": 1.01, "grad_norm": 3.302962317854548, "learning_rate": 7.748596250564548e-06, "loss": 0.611, "step": 12385 }, { "epoch": 1.01, "grad_norm": 4.275089073351172, "learning_rate": 7.748229989994317e-06, "loss": 0.5557, "step": 12386 }, { "epoch": 1.01, "grad_norm": 5.855164554664778, "learning_rate": 7.747863708292508e-06, "loss": 0.5917, "step": 12387 }, { "epoch": 1.01, "grad_norm": 4.005887088186894, "learning_rate": 7.747497405461936e-06, "loss": 0.7338, "step": 12388 }, { "epoch": 1.01, "grad_norm": 4.331118169381667, "learning_rate": 7.747131081505419e-06, "loss": 0.6196, "step": 12389 }, { "epoch": 1.01, "grad_norm": 7.306210302354887, "learning_rate": 7.746764736425774e-06, "loss": 0.7645, "step": 12390 }, { "epoch": 1.01, "grad_norm": 3.3845578069745885, "learning_rate": 7.746398370225818e-06, "loss": 0.8394, "step": 12391 }, { "epoch": 1.01, "grad_norm": 5.143218388978716, "learning_rate": 7.746031982908367e-06, "loss": 0.6478, "step": 12392 }, { "epoch": 1.01, "grad_norm": 4.235429469520762, "learning_rate": 7.74566557447624e-06, "loss": 0.6428, "step": 12393 }, { "epoch": 1.01, "grad_norm": 3.641673011416448, "learning_rate": 7.745299144932251e-06, "loss": 0.6982, "step": 12394 }, { "epoch": 1.01, "grad_norm": 3.656750697109626, "learning_rate": 7.744932694279219e-06, "loss": 0.6602, "step": 12395 }, { "epoch": 1.01, "grad_norm": 3.980877697518048, "learning_rate": 7.744566222519964e-06, "loss": 0.6246, "step": 12396 }, { "epoch": 1.01, "grad_norm": 5.759409884924394, "learning_rate": 7.744199729657303e-06, "loss": 0.7747, "step": 12397 }, { "epoch": 1.01, "grad_norm": 3.5644067378487727, "learning_rate": 7.74383321569405e-06, "loss": 0.5402, "step": 12398 }, { "epoch": 1.01, "grad_norm": 3.474110855441311, "learning_rate": 7.74346668063303e-06, "loss": 0.6459, "step": 12399 }, { "epoch": 1.01, "grad_norm": 8.8094401957294, "learning_rate": 7.743100124477054e-06, "loss": 0.6544, "step": 12400 }, { "epoch": 1.01, "grad_norm": 4.508837876645364, "learning_rate": 7.742733547228947e-06, "loss": 0.7154, "step": 12401 }, { "epoch": 1.01, "grad_norm": 8.482146241296723, "learning_rate": 7.742366948891523e-06, "loss": 0.4897, "step": 12402 }, { "epoch": 1.01, "grad_norm": 2.8012339544173175, "learning_rate": 7.742000329467605e-06, "loss": 0.7675, "step": 12403 }, { "epoch": 1.01, "grad_norm": 4.811890366880647, "learning_rate": 7.741633688960007e-06, "loss": 0.7713, "step": 12404 }, { "epoch": 1.01, "grad_norm": 3.718089944762814, "learning_rate": 7.741267027371553e-06, "loss": 0.6709, "step": 12405 }, { "epoch": 1.01, "grad_norm": 4.208618513478443, "learning_rate": 7.74090034470506e-06, "loss": 0.6052, "step": 12406 }, { "epoch": 1.01, "grad_norm": 3.5628861606361024, "learning_rate": 7.740533640963347e-06, "loss": 0.7031, "step": 12407 }, { "epoch": 1.01, "grad_norm": 4.951995912330768, "learning_rate": 7.740166916149234e-06, "loss": 0.6086, "step": 12408 }, { "epoch": 1.01, "grad_norm": 4.066257857836169, "learning_rate": 7.739800170265542e-06, "loss": 0.496, "step": 12409 }, { "epoch": 1.01, "grad_norm": 2.7838069601218964, "learning_rate": 7.739433403315088e-06, "loss": 0.5929, "step": 12410 }, { "epoch": 1.01, "grad_norm": 3.9309172196933706, "learning_rate": 7.739066615300697e-06, "loss": 0.6071, "step": 12411 }, { "epoch": 1.01, "grad_norm": 4.266282188232434, "learning_rate": 7.738699806225185e-06, "loss": 0.652, "step": 12412 }, { "epoch": 1.01, "grad_norm": 4.682750071130759, "learning_rate": 7.738332976091374e-06, "loss": 0.7113, "step": 12413 }, { "epoch": 1.01, "grad_norm": 4.234492710801553, "learning_rate": 7.737966124902086e-06, "loss": 0.7745, "step": 12414 }, { "epoch": 1.01, "grad_norm": 3.8162428891457822, "learning_rate": 7.737599252660139e-06, "loss": 0.6393, "step": 12415 }, { "epoch": 1.01, "grad_norm": 4.488736193502105, "learning_rate": 7.737232359368355e-06, "loss": 0.6527, "step": 12416 }, { "epoch": 1.01, "grad_norm": 2.6143265710864934, "learning_rate": 7.736865445029555e-06, "loss": 0.6947, "step": 12417 }, { "epoch": 1.01, "grad_norm": 3.4623748071856157, "learning_rate": 7.736498509646562e-06, "loss": 0.5658, "step": 12418 }, { "epoch": 1.01, "grad_norm": 3.5217046523318696, "learning_rate": 7.736131553222195e-06, "loss": 0.5268, "step": 12419 }, { "epoch": 1.01, "grad_norm": 3.374979470035191, "learning_rate": 7.735764575759278e-06, "loss": 0.6547, "step": 12420 }, { "epoch": 1.01, "grad_norm": 3.485433535474984, "learning_rate": 7.73539757726063e-06, "loss": 0.5143, "step": 12421 }, { "epoch": 1.01, "grad_norm": 7.004160809032969, "learning_rate": 7.735030557729075e-06, "loss": 0.7766, "step": 12422 }, { "epoch": 1.01, "grad_norm": 3.6036504120675192, "learning_rate": 7.734663517167436e-06, "loss": 0.7153, "step": 12423 }, { "epoch": 1.01, "grad_norm": 3.884095417512094, "learning_rate": 7.734296455578531e-06, "loss": 0.6505, "step": 12424 }, { "epoch": 1.01, "grad_norm": 4.573398702571422, "learning_rate": 7.733929372965185e-06, "loss": 0.7059, "step": 12425 }, { "epoch": 1.01, "grad_norm": 3.8139982635944367, "learning_rate": 7.733562269330222e-06, "loss": 0.6952, "step": 12426 }, { "epoch": 1.01, "grad_norm": 5.194551261366177, "learning_rate": 7.733195144676463e-06, "loss": 0.5159, "step": 12427 }, { "epoch": 1.01, "grad_norm": 4.860328106309098, "learning_rate": 7.732827999006732e-06, "loss": 0.7511, "step": 12428 }, { "epoch": 1.01, "grad_norm": 3.3449992575824705, "learning_rate": 7.732460832323849e-06, "loss": 0.6291, "step": 12429 }, { "epoch": 1.01, "grad_norm": 6.181628492683932, "learning_rate": 7.732093644630641e-06, "loss": 0.7886, "step": 12430 }, { "epoch": 1.01, "grad_norm": 5.136907784533668, "learning_rate": 7.73172643592993e-06, "loss": 0.6918, "step": 12431 }, { "epoch": 1.01, "grad_norm": 2.4852940212195125, "learning_rate": 7.73135920622454e-06, "loss": 0.6246, "step": 12432 }, { "epoch": 1.01, "grad_norm": 4.105300684373457, "learning_rate": 7.730991955517291e-06, "loss": 0.6206, "step": 12433 }, { "epoch": 1.01, "grad_norm": 6.681406266577509, "learning_rate": 7.730624683811012e-06, "loss": 0.6622, "step": 12434 }, { "epoch": 1.01, "grad_norm": 137.76017076187927, "learning_rate": 7.730257391108524e-06, "loss": 0.6682, "step": 12435 }, { "epoch": 1.01, "grad_norm": 2.5196184702541746, "learning_rate": 7.729890077412655e-06, "loss": 0.6022, "step": 12436 }, { "epoch": 1.01, "grad_norm": 3.7161633214146996, "learning_rate": 7.729522742726221e-06, "loss": 0.6222, "step": 12437 }, { "epoch": 1.01, "grad_norm": 4.63742921396767, "learning_rate": 7.729155387052057e-06, "loss": 0.766, "step": 12438 }, { "epoch": 1.01, "grad_norm": 3.6814001993959615, "learning_rate": 7.72878801039298e-06, "loss": 0.5864, "step": 12439 }, { "epoch": 1.01, "grad_norm": 4.246354017028032, "learning_rate": 7.728420612751816e-06, "loss": 0.5371, "step": 12440 }, { "epoch": 1.01, "grad_norm": 3.89690299307031, "learning_rate": 7.728053194131393e-06, "loss": 0.5069, "step": 12441 }, { "epoch": 1.01, "grad_norm": 2.512753592726871, "learning_rate": 7.727685754534535e-06, "loss": 0.5506, "step": 12442 }, { "epoch": 1.01, "grad_norm": 3.1359035638049595, "learning_rate": 7.727318293964066e-06, "loss": 0.703, "step": 12443 }, { "epoch": 1.01, "grad_norm": 3.4010716550034585, "learning_rate": 7.726950812422812e-06, "loss": 0.6649, "step": 12444 }, { "epoch": 1.01, "grad_norm": 3.4902385325799314, "learning_rate": 7.7265833099136e-06, "loss": 0.7175, "step": 12445 }, { "epoch": 1.01, "grad_norm": 8.341618495245067, "learning_rate": 7.726215786439253e-06, "loss": 0.6683, "step": 12446 }, { "epoch": 1.01, "grad_norm": 3.9105637151793555, "learning_rate": 7.7258482420026e-06, "loss": 0.8218, "step": 12447 }, { "epoch": 1.01, "grad_norm": 4.898789746602815, "learning_rate": 7.725480676606465e-06, "loss": 0.581, "step": 12448 }, { "epoch": 1.01, "grad_norm": 2.4825356702267585, "learning_rate": 7.725113090253673e-06, "loss": 0.5684, "step": 12449 }, { "epoch": 1.01, "grad_norm": 6.118383132410117, "learning_rate": 7.724745482947055e-06, "loss": 0.7814, "step": 12450 }, { "epoch": 1.01, "grad_norm": 12.024728912950485, "learning_rate": 7.724377854689436e-06, "loss": 0.6322, "step": 12451 }, { "epoch": 1.01, "grad_norm": 5.378213907319591, "learning_rate": 7.724010205483639e-06, "loss": 0.6503, "step": 12452 }, { "epoch": 1.01, "grad_norm": 2.908838228926188, "learning_rate": 7.723642535332493e-06, "loss": 0.5902, "step": 12453 }, { "epoch": 1.01, "grad_norm": 2.7925478097847245, "learning_rate": 7.72327484423883e-06, "loss": 0.564, "step": 12454 }, { "epoch": 1.01, "grad_norm": 3.219172884101676, "learning_rate": 7.72290713220547e-06, "loss": 0.5676, "step": 12455 }, { "epoch": 1.01, "grad_norm": 4.325319515868642, "learning_rate": 7.722539399235242e-06, "loss": 0.7188, "step": 12456 }, { "epoch": 1.01, "grad_norm": 3.1005123523180393, "learning_rate": 7.722171645330978e-06, "loss": 0.5075, "step": 12457 }, { "epoch": 1.01, "grad_norm": 51.844239775420874, "learning_rate": 7.721803870495502e-06, "loss": 0.667, "step": 12458 }, { "epoch": 1.01, "grad_norm": 3.614372489244219, "learning_rate": 7.72143607473164e-06, "loss": 0.7487, "step": 12459 }, { "epoch": 1.01, "grad_norm": 4.729110265982259, "learning_rate": 7.721068258042227e-06, "loss": 0.5708, "step": 12460 }, { "epoch": 1.01, "grad_norm": 9.235611099044382, "learning_rate": 7.720700420430083e-06, "loss": 0.7087, "step": 12461 }, { "epoch": 1.01, "grad_norm": 2.980037951333018, "learning_rate": 7.72033256189804e-06, "loss": 0.6202, "step": 12462 }, { "epoch": 1.01, "grad_norm": 4.610652505152559, "learning_rate": 7.719964682448927e-06, "loss": 0.6729, "step": 12463 }, { "epoch": 1.01, "grad_norm": 4.203383173467424, "learning_rate": 7.719596782085575e-06, "loss": 0.6963, "step": 12464 }, { "epoch": 1.01, "grad_norm": 5.938864009496398, "learning_rate": 7.719228860810806e-06, "loss": 0.6215, "step": 12465 }, { "epoch": 1.01, "grad_norm": 4.42999822004694, "learning_rate": 7.718860918627456e-06, "loss": 0.4941, "step": 12466 }, { "epoch": 1.01, "grad_norm": 3.790169057616943, "learning_rate": 7.718492955538351e-06, "loss": 0.6048, "step": 12467 }, { "epoch": 1.01, "grad_norm": 5.252885183751117, "learning_rate": 7.718124971546318e-06, "loss": 0.6138, "step": 12468 }, { "epoch": 1.01, "grad_norm": 3.0266282733554575, "learning_rate": 7.717756966654193e-06, "loss": 0.6817, "step": 12469 }, { "epoch": 1.01, "grad_norm": 2.8871540398253113, "learning_rate": 7.717388940864801e-06, "loss": 0.6417, "step": 12470 }, { "epoch": 1.01, "grad_norm": 7.537664204470688, "learning_rate": 7.717020894180972e-06, "loss": 0.5406, "step": 12471 }, { "epoch": 1.01, "grad_norm": 3.0147322826724676, "learning_rate": 7.716652826605535e-06, "loss": 0.6454, "step": 12472 }, { "epoch": 1.01, "grad_norm": 6.485109186116596, "learning_rate": 7.716284738141325e-06, "loss": 0.5993, "step": 12473 }, { "epoch": 1.01, "grad_norm": 4.197615314683759, "learning_rate": 7.715916628791165e-06, "loss": 0.5768, "step": 12474 }, { "epoch": 1.01, "grad_norm": 3.6062381656100704, "learning_rate": 7.715548498557893e-06, "loss": 0.6595, "step": 12475 }, { "epoch": 1.01, "grad_norm": 2.761853757796361, "learning_rate": 7.715180347444333e-06, "loss": 0.6605, "step": 12476 }, { "epoch": 1.01, "grad_norm": 3.711674679767731, "learning_rate": 7.714812175453321e-06, "loss": 0.7607, "step": 12477 }, { "epoch": 1.01, "grad_norm": 5.940281561281353, "learning_rate": 7.714443982587685e-06, "loss": 0.578, "step": 12478 }, { "epoch": 1.01, "grad_norm": 6.8326576933770875, "learning_rate": 7.714075768850257e-06, "loss": 0.5773, "step": 12479 }, { "epoch": 1.01, "grad_norm": 5.306155783080045, "learning_rate": 7.713707534243868e-06, "loss": 0.7211, "step": 12480 }, { "epoch": 1.01, "grad_norm": 4.158634480674293, "learning_rate": 7.71333927877135e-06, "loss": 0.6481, "step": 12481 }, { "epoch": 1.01, "grad_norm": 2.843609143817566, "learning_rate": 7.712971002435533e-06, "loss": 0.633, "step": 12482 }, { "epoch": 1.01, "grad_norm": 3.867345166845518, "learning_rate": 7.712602705239249e-06, "loss": 0.58, "step": 12483 }, { "epoch": 1.01, "grad_norm": 3.5395612668829197, "learning_rate": 7.712234387185333e-06, "loss": 0.6477, "step": 12484 }, { "epoch": 1.01, "grad_norm": 3.309771716147315, "learning_rate": 7.711866048276614e-06, "loss": 0.611, "step": 12485 }, { "epoch": 1.01, "grad_norm": 3.7774767869502615, "learning_rate": 7.711497688515926e-06, "loss": 0.6026, "step": 12486 }, { "epoch": 1.01, "grad_norm": 2.6896578225234253, "learning_rate": 7.711129307906098e-06, "loss": 0.8223, "step": 12487 }, { "epoch": 1.01, "grad_norm": 3.5091284485037595, "learning_rate": 7.710760906449967e-06, "loss": 0.6385, "step": 12488 }, { "epoch": 1.01, "grad_norm": 3.2257552192719334, "learning_rate": 7.710392484150361e-06, "loss": 0.667, "step": 12489 }, { "epoch": 1.01, "grad_norm": 3.2266761286620094, "learning_rate": 7.71002404101012e-06, "loss": 0.5546, "step": 12490 }, { "epoch": 1.01, "grad_norm": 2.960008109392364, "learning_rate": 7.70965557703207e-06, "loss": 0.4834, "step": 12491 }, { "epoch": 1.01, "grad_norm": 2.8925774836362432, "learning_rate": 7.709287092219045e-06, "loss": 0.5245, "step": 12492 }, { "epoch": 1.01, "grad_norm": 12.788409531014182, "learning_rate": 7.708918586573881e-06, "loss": 0.729, "step": 12493 }, { "epoch": 1.01, "grad_norm": 3.896006725309682, "learning_rate": 7.708550060099411e-06, "loss": 0.6387, "step": 12494 }, { "epoch": 1.01, "grad_norm": 11.052491606860372, "learning_rate": 7.708181512798467e-06, "loss": 0.5589, "step": 12495 }, { "epoch": 1.01, "grad_norm": 5.239690051420297, "learning_rate": 7.707812944673886e-06, "loss": 0.7396, "step": 12496 }, { "epoch": 1.02, "grad_norm": 3.4831620878914693, "learning_rate": 7.7074443557285e-06, "loss": 0.593, "step": 12497 }, { "epoch": 1.02, "grad_norm": 4.379393889441705, "learning_rate": 7.70707574596514e-06, "loss": 0.591, "step": 12498 }, { "epoch": 1.02, "grad_norm": 2.7208026474898066, "learning_rate": 7.706707115386648e-06, "loss": 0.6065, "step": 12499 }, { "epoch": 1.02, "grad_norm": 4.934889838350088, "learning_rate": 7.70633846399585e-06, "loss": 0.7773, "step": 12500 }, { "epoch": 1.02, "grad_norm": 2.955300990181328, "learning_rate": 7.705969791795585e-06, "loss": 0.6696, "step": 12501 }, { "epoch": 1.02, "grad_norm": 3.3891291853942334, "learning_rate": 7.70560109878869e-06, "loss": 0.6165, "step": 12502 }, { "epoch": 1.02, "grad_norm": 4.463789616446766, "learning_rate": 7.705232384977994e-06, "loss": 0.7136, "step": 12503 }, { "epoch": 1.02, "grad_norm": 4.404550775750048, "learning_rate": 7.704863650366337e-06, "loss": 0.7982, "step": 12504 }, { "epoch": 1.02, "grad_norm": 4.408852042468807, "learning_rate": 7.704494894956551e-06, "loss": 0.7204, "step": 12505 }, { "epoch": 1.02, "grad_norm": 3.2997349924763775, "learning_rate": 7.704126118751476e-06, "loss": 0.7004, "step": 12506 }, { "epoch": 1.02, "grad_norm": 5.109205518581624, "learning_rate": 7.703757321753942e-06, "loss": 0.5842, "step": 12507 }, { "epoch": 1.02, "grad_norm": 3.907151286069359, "learning_rate": 7.703388503966787e-06, "loss": 0.6245, "step": 12508 }, { "epoch": 1.02, "grad_norm": 3.270189421701857, "learning_rate": 7.703019665392848e-06, "loss": 0.7375, "step": 12509 }, { "epoch": 1.02, "grad_norm": 3.6054525953614935, "learning_rate": 7.702650806034962e-06, "loss": 0.7707, "step": 12510 }, { "epoch": 1.02, "grad_norm": 5.277238984730477, "learning_rate": 7.70228192589596e-06, "loss": 0.5775, "step": 12511 }, { "epoch": 1.02, "grad_norm": 2.861479295290755, "learning_rate": 7.701913024978684e-06, "loss": 0.64, "step": 12512 }, { "epoch": 1.02, "grad_norm": 4.683086539504576, "learning_rate": 7.701544103285967e-06, "loss": 0.6542, "step": 12513 }, { "epoch": 1.02, "grad_norm": 3.886111698955071, "learning_rate": 7.701175160820648e-06, "loss": 0.6161, "step": 12514 }, { "epoch": 1.02, "grad_norm": 4.335351822417811, "learning_rate": 7.700806197585564e-06, "loss": 0.6632, "step": 12515 }, { "epoch": 1.02, "grad_norm": 3.770021228495931, "learning_rate": 7.70043721358355e-06, "loss": 0.5825, "step": 12516 }, { "epoch": 1.02, "grad_norm": 4.109337837525772, "learning_rate": 7.700068208817444e-06, "loss": 0.6602, "step": 12517 }, { "epoch": 1.02, "grad_norm": 3.7824469160379546, "learning_rate": 7.699699183290084e-06, "loss": 0.5915, "step": 12518 }, { "epoch": 1.02, "grad_norm": 3.3741714656917883, "learning_rate": 7.699330137004306e-06, "loss": 0.6083, "step": 12519 }, { "epoch": 1.02, "grad_norm": 4.113577975332614, "learning_rate": 7.69896106996295e-06, "loss": 0.6592, "step": 12520 }, { "epoch": 1.02, "grad_norm": 3.284420853677757, "learning_rate": 7.698591982168851e-06, "loss": 0.6026, "step": 12521 }, { "epoch": 1.02, "grad_norm": 3.7657081335262608, "learning_rate": 7.698222873624847e-06, "loss": 0.6015, "step": 12522 }, { "epoch": 1.02, "grad_norm": 2.533041664474653, "learning_rate": 7.697853744333781e-06, "loss": 0.5321, "step": 12523 }, { "epoch": 1.02, "grad_norm": 5.874571038245419, "learning_rate": 7.697484594298485e-06, "loss": 0.734, "step": 12524 }, { "epoch": 1.02, "grad_norm": 3.0987508486379047, "learning_rate": 7.697115423521802e-06, "loss": 0.645, "step": 12525 }, { "epoch": 1.02, "grad_norm": 3.9874398749366544, "learning_rate": 7.696746232006569e-06, "loss": 0.6241, "step": 12526 }, { "epoch": 1.02, "grad_norm": 3.4565971877879744, "learning_rate": 7.696377019755624e-06, "loss": 0.6687, "step": 12527 }, { "epoch": 1.02, "grad_norm": 5.557081188894988, "learning_rate": 7.696007786771806e-06, "loss": 0.5783, "step": 12528 }, { "epoch": 1.02, "grad_norm": 2.8604984111035843, "learning_rate": 7.695638533057956e-06, "loss": 0.6064, "step": 12529 }, { "epoch": 1.02, "grad_norm": 3.1284752666066225, "learning_rate": 7.69526925861691e-06, "loss": 0.6775, "step": 12530 }, { "epoch": 1.02, "grad_norm": 3.1044202777037664, "learning_rate": 7.694899963451512e-06, "loss": 0.5536, "step": 12531 }, { "epoch": 1.02, "grad_norm": 3.393868298344729, "learning_rate": 7.694530647564597e-06, "loss": 0.7347, "step": 12532 }, { "epoch": 1.02, "grad_norm": 4.015780076086292, "learning_rate": 7.694161310959007e-06, "loss": 0.6135, "step": 12533 }, { "epoch": 1.02, "grad_norm": 4.761142339762222, "learning_rate": 7.693791953637584e-06, "loss": 0.5762, "step": 12534 }, { "epoch": 1.02, "grad_norm": 3.5468236845590657, "learning_rate": 7.693422575603162e-06, "loss": 0.7304, "step": 12535 }, { "epoch": 1.02, "grad_norm": 17.37870368059202, "learning_rate": 7.693053176858586e-06, "loss": 0.6916, "step": 12536 }, { "epoch": 1.02, "grad_norm": 4.964013871202618, "learning_rate": 7.692683757406696e-06, "loss": 0.5789, "step": 12537 }, { "epoch": 1.02, "grad_norm": 2.3521395362245916, "learning_rate": 7.692314317250331e-06, "loss": 0.6145, "step": 12538 }, { "epoch": 1.02, "grad_norm": 2.566645138834135, "learning_rate": 7.691944856392333e-06, "loss": 0.5797, "step": 12539 }, { "epoch": 1.02, "grad_norm": 2.6899042136466536, "learning_rate": 7.69157537483554e-06, "loss": 0.6343, "step": 12540 }, { "epoch": 1.02, "grad_norm": 4.6904359585547155, "learning_rate": 7.691205872582797e-06, "loss": 0.6079, "step": 12541 }, { "epoch": 1.02, "grad_norm": 3.19248809338908, "learning_rate": 7.690836349636945e-06, "loss": 0.5855, "step": 12542 }, { "epoch": 1.02, "grad_norm": 4.04093009566446, "learning_rate": 7.690466806000822e-06, "loss": 0.833, "step": 12543 }, { "epoch": 1.02, "grad_norm": 6.493340837376103, "learning_rate": 7.69009724167727e-06, "loss": 0.6643, "step": 12544 }, { "epoch": 1.02, "grad_norm": 11.094288996913862, "learning_rate": 7.689727656669132e-06, "loss": 0.5741, "step": 12545 }, { "epoch": 1.02, "grad_norm": 7.199239490640086, "learning_rate": 7.689358050979252e-06, "loss": 0.4498, "step": 12546 }, { "epoch": 1.02, "grad_norm": 9.338233991109362, "learning_rate": 7.688988424610468e-06, "loss": 0.597, "step": 12547 }, { "epoch": 1.02, "grad_norm": 3.031397304346165, "learning_rate": 7.688618777565623e-06, "loss": 0.5712, "step": 12548 }, { "epoch": 1.02, "grad_norm": 3.3270259212646858, "learning_rate": 7.68824910984756e-06, "loss": 0.7088, "step": 12549 }, { "epoch": 1.02, "grad_norm": 5.361687459090794, "learning_rate": 7.687879421459123e-06, "loss": 0.8544, "step": 12550 }, { "epoch": 1.02, "grad_norm": 2.5667657686767544, "learning_rate": 7.687509712403152e-06, "loss": 0.6679, "step": 12551 }, { "epoch": 1.02, "grad_norm": 55.11664932699268, "learning_rate": 7.68713998268249e-06, "loss": 0.6991, "step": 12552 }, { "epoch": 1.02, "grad_norm": 3.755015059995825, "learning_rate": 7.686770232299982e-06, "loss": 0.636, "step": 12553 }, { "epoch": 1.02, "grad_norm": 4.325059288928103, "learning_rate": 7.68640046125847e-06, "loss": 0.7616, "step": 12554 }, { "epoch": 1.02, "grad_norm": 4.356956566199944, "learning_rate": 7.686030669560796e-06, "loss": 0.5423, "step": 12555 }, { "epoch": 1.02, "grad_norm": 3.8784764522094157, "learning_rate": 7.685660857209805e-06, "loss": 0.6149, "step": 12556 }, { "epoch": 1.02, "grad_norm": 2.6067373594337373, "learning_rate": 7.685291024208338e-06, "loss": 0.5454, "step": 12557 }, { "epoch": 1.02, "grad_norm": 4.015451592080818, "learning_rate": 7.684921170559243e-06, "loss": 0.5329, "step": 12558 }, { "epoch": 1.02, "grad_norm": 5.460954957993479, "learning_rate": 7.68455129626536e-06, "loss": 0.5194, "step": 12559 }, { "epoch": 1.02, "grad_norm": 17.44754904089569, "learning_rate": 7.684181401329535e-06, "loss": 0.5676, "step": 12560 }, { "epoch": 1.02, "grad_norm": 4.959332331093466, "learning_rate": 7.68381148575461e-06, "loss": 0.6397, "step": 12561 }, { "epoch": 1.02, "grad_norm": 2.9243296428501235, "learning_rate": 7.683441549543435e-06, "loss": 0.5269, "step": 12562 }, { "epoch": 1.02, "grad_norm": 5.801959713766648, "learning_rate": 7.683071592698847e-06, "loss": 0.6858, "step": 12563 }, { "epoch": 1.02, "grad_norm": 45.56470308297, "learning_rate": 7.682701615223695e-06, "loss": 0.7867, "step": 12564 }, { "epoch": 1.02, "grad_norm": 3.7277751535654393, "learning_rate": 7.682331617120823e-06, "loss": 0.7609, "step": 12565 }, { "epoch": 1.02, "grad_norm": 7.314442079940626, "learning_rate": 7.681961598393077e-06, "loss": 0.7517, "step": 12566 }, { "epoch": 1.02, "grad_norm": 16.591712719960395, "learning_rate": 7.6815915590433e-06, "loss": 0.6199, "step": 12567 }, { "epoch": 1.02, "grad_norm": 4.288676630101633, "learning_rate": 7.681221499074338e-06, "loss": 0.5563, "step": 12568 }, { "epoch": 1.02, "grad_norm": 4.67781462152573, "learning_rate": 7.680851418489037e-06, "loss": 0.7763, "step": 12569 }, { "epoch": 1.02, "grad_norm": 3.2820391513501885, "learning_rate": 7.680481317290243e-06, "loss": 0.7015, "step": 12570 }, { "epoch": 1.02, "grad_norm": 4.037068985052755, "learning_rate": 7.680111195480801e-06, "loss": 0.7217, "step": 12571 }, { "epoch": 1.02, "grad_norm": 5.492997210151447, "learning_rate": 7.679741053063557e-06, "loss": 0.5099, "step": 12572 }, { "epoch": 1.02, "grad_norm": 2.7877565694175805, "learning_rate": 7.679370890041358e-06, "loss": 0.5245, "step": 12573 }, { "epoch": 1.02, "grad_norm": 5.502229127065097, "learning_rate": 7.679000706417049e-06, "loss": 0.5252, "step": 12574 }, { "epoch": 1.02, "grad_norm": 2.6868943961811547, "learning_rate": 7.678630502193476e-06, "loss": 0.8192, "step": 12575 }, { "epoch": 1.02, "grad_norm": 5.155918508588384, "learning_rate": 7.678260277373488e-06, "loss": 0.6738, "step": 12576 }, { "epoch": 1.02, "grad_norm": 8.159239974960599, "learning_rate": 7.677890031959928e-06, "loss": 0.6146, "step": 12577 }, { "epoch": 1.02, "grad_norm": 4.63249666100707, "learning_rate": 7.677519765955647e-06, "loss": 0.6395, "step": 12578 }, { "epoch": 1.02, "grad_norm": 4.086485087761686, "learning_rate": 7.677149479363487e-06, "loss": 0.5852, "step": 12579 }, { "epoch": 1.02, "grad_norm": 3.488293765484528, "learning_rate": 7.6767791721863e-06, "loss": 0.5882, "step": 12580 }, { "epoch": 1.02, "grad_norm": 6.466325233015778, "learning_rate": 7.676408844426934e-06, "loss": 0.6721, "step": 12581 }, { "epoch": 1.02, "grad_norm": 2.6952983509431783, "learning_rate": 7.676038496088232e-06, "loss": 0.4849, "step": 12582 }, { "epoch": 1.02, "grad_norm": 3.0064643513799, "learning_rate": 7.675668127173043e-06, "loss": 0.6906, "step": 12583 }, { "epoch": 1.02, "grad_norm": 3.7211621564519684, "learning_rate": 7.675297737684217e-06, "loss": 0.6305, "step": 12584 }, { "epoch": 1.02, "grad_norm": 7.0064852257260775, "learning_rate": 7.6749273276246e-06, "loss": 0.5429, "step": 12585 }, { "epoch": 1.02, "grad_norm": 3.0022255296393427, "learning_rate": 7.674556896997041e-06, "loss": 0.5432, "step": 12586 }, { "epoch": 1.02, "grad_norm": 4.644900194514009, "learning_rate": 7.674186445804387e-06, "loss": 0.763, "step": 12587 }, { "epoch": 1.02, "grad_norm": 4.122263336429523, "learning_rate": 7.673815974049489e-06, "loss": 0.6086, "step": 12588 }, { "epoch": 1.02, "grad_norm": 4.252663133854405, "learning_rate": 7.673445481735191e-06, "loss": 0.697, "step": 12589 }, { "epoch": 1.02, "grad_norm": 2.6400001535358597, "learning_rate": 7.673074968864347e-06, "loss": 0.5344, "step": 12590 }, { "epoch": 1.02, "grad_norm": 4.686475514425044, "learning_rate": 7.672704435439805e-06, "loss": 0.7779, "step": 12591 }, { "epoch": 1.02, "grad_norm": 3.097133767992966, "learning_rate": 7.672333881464411e-06, "loss": 0.5638, "step": 12592 }, { "epoch": 1.02, "grad_norm": 3.638546275863654, "learning_rate": 7.671963306941017e-06, "loss": 0.6188, "step": 12593 }, { "epoch": 1.02, "grad_norm": 4.509845926533034, "learning_rate": 7.67159271187247e-06, "loss": 0.7021, "step": 12594 }, { "epoch": 1.02, "grad_norm": 2.6588582068874804, "learning_rate": 7.671222096261624e-06, "loss": 0.612, "step": 12595 }, { "epoch": 1.02, "grad_norm": 4.517744275470992, "learning_rate": 7.670851460111323e-06, "loss": 0.6097, "step": 12596 }, { "epoch": 1.02, "grad_norm": 5.041977547756016, "learning_rate": 7.670480803424422e-06, "loss": 0.6301, "step": 12597 }, { "epoch": 1.02, "grad_norm": 3.2306451631836945, "learning_rate": 7.670110126203767e-06, "loss": 0.6299, "step": 12598 }, { "epoch": 1.02, "grad_norm": 5.986142420766472, "learning_rate": 7.669739428452211e-06, "loss": 0.6896, "step": 12599 }, { "epoch": 1.02, "grad_norm": 4.6298990483191425, "learning_rate": 7.669368710172603e-06, "loss": 0.6252, "step": 12600 }, { "epoch": 1.02, "grad_norm": 3.534689909909978, "learning_rate": 7.668997971367793e-06, "loss": 0.4609, "step": 12601 }, { "epoch": 1.02, "grad_norm": 2.71198053992389, "learning_rate": 7.668627212040633e-06, "loss": 0.7169, "step": 12602 }, { "epoch": 1.02, "grad_norm": 3.741814230254302, "learning_rate": 7.668256432193974e-06, "loss": 0.7502, "step": 12603 }, { "epoch": 1.02, "grad_norm": 6.4266329533502455, "learning_rate": 7.667885631830665e-06, "loss": 0.6781, "step": 12604 }, { "epoch": 1.02, "grad_norm": 3.070013793096365, "learning_rate": 7.66751481095356e-06, "loss": 0.6919, "step": 12605 }, { "epoch": 1.02, "grad_norm": 2.7622846635810046, "learning_rate": 7.667143969565507e-06, "loss": 0.642, "step": 12606 }, { "epoch": 1.02, "grad_norm": 4.194138834810723, "learning_rate": 7.66677310766936e-06, "loss": 0.6854, "step": 12607 }, { "epoch": 1.02, "grad_norm": 3.7363510699447517, "learning_rate": 7.66640222526797e-06, "loss": 0.6769, "step": 12608 }, { "epoch": 1.02, "grad_norm": 12.241112970295672, "learning_rate": 7.666031322364188e-06, "loss": 0.5354, "step": 12609 }, { "epoch": 1.02, "grad_norm": 3.454955727844363, "learning_rate": 7.665660398960867e-06, "loss": 0.6226, "step": 12610 }, { "epoch": 1.02, "grad_norm": 4.291899800526837, "learning_rate": 7.665289455060857e-06, "loss": 0.5728, "step": 12611 }, { "epoch": 1.02, "grad_norm": 2.4431201295869376, "learning_rate": 7.664918490667016e-06, "loss": 0.5855, "step": 12612 }, { "epoch": 1.02, "grad_norm": 2.85912174151412, "learning_rate": 7.664547505782187e-06, "loss": 0.6685, "step": 12613 }, { "epoch": 1.02, "grad_norm": 6.108523369578036, "learning_rate": 7.664176500409231e-06, "loss": 0.5269, "step": 12614 }, { "epoch": 1.02, "grad_norm": 3.8746210000527834, "learning_rate": 7.663805474550998e-06, "loss": 0.5418, "step": 12615 }, { "epoch": 1.02, "grad_norm": 3.2637285308097974, "learning_rate": 7.663434428210339e-06, "loss": 0.6343, "step": 12616 }, { "epoch": 1.02, "grad_norm": 3.7334445523450617, "learning_rate": 7.663063361390109e-06, "loss": 0.7649, "step": 12617 }, { "epoch": 1.02, "grad_norm": 3.1571098780109486, "learning_rate": 7.66269227409316e-06, "loss": 0.5962, "step": 12618 }, { "epoch": 1.02, "grad_norm": 2.7328510710828033, "learning_rate": 7.662321166322346e-06, "loss": 0.6699, "step": 12619 }, { "epoch": 1.02, "grad_norm": 4.258015310199422, "learning_rate": 7.661950038080521e-06, "loss": 0.6852, "step": 12620 }, { "epoch": 1.03, "grad_norm": 4.640945031137924, "learning_rate": 7.661578889370538e-06, "loss": 0.6567, "step": 12621 }, { "epoch": 1.03, "grad_norm": 7.609780829004056, "learning_rate": 7.66120772019525e-06, "loss": 0.6189, "step": 12622 }, { "epoch": 1.03, "grad_norm": 3.3958081065183845, "learning_rate": 7.660836530557514e-06, "loss": 0.5997, "step": 12623 }, { "epoch": 1.03, "grad_norm": 3.1015283651929795, "learning_rate": 7.66046532046018e-06, "loss": 0.55, "step": 12624 }, { "epoch": 1.03, "grad_norm": 3.316638205052729, "learning_rate": 7.660094089906105e-06, "loss": 0.719, "step": 12625 }, { "epoch": 1.03, "grad_norm": 2.9895636026662147, "learning_rate": 7.659722838898144e-06, "loss": 0.6164, "step": 12626 }, { "epoch": 1.03, "grad_norm": 4.738051178839015, "learning_rate": 7.65935156743915e-06, "loss": 0.6487, "step": 12627 }, { "epoch": 1.03, "grad_norm": 3.3178545810205744, "learning_rate": 7.658980275531977e-06, "loss": 0.5264, "step": 12628 }, { "epoch": 1.03, "grad_norm": 3.7736308503544413, "learning_rate": 7.65860896317948e-06, "loss": 0.7128, "step": 12629 }, { "epoch": 1.03, "grad_norm": 2.8919043028994262, "learning_rate": 7.658237630384518e-06, "loss": 0.5955, "step": 12630 }, { "epoch": 1.03, "grad_norm": 3.7289010251378465, "learning_rate": 7.657866277149943e-06, "loss": 0.5479, "step": 12631 }, { "epoch": 1.03, "grad_norm": 3.228984425810598, "learning_rate": 7.65749490347861e-06, "loss": 0.713, "step": 12632 }, { "epoch": 1.03, "grad_norm": 2.5258839598154768, "learning_rate": 7.657123509373376e-06, "loss": 0.6011, "step": 12633 }, { "epoch": 1.03, "grad_norm": 4.030415988029781, "learning_rate": 7.656752094837097e-06, "loss": 0.6751, "step": 12634 }, { "epoch": 1.03, "grad_norm": 3.7202505144906794, "learning_rate": 7.656380659872627e-06, "loss": 0.6943, "step": 12635 }, { "epoch": 1.03, "grad_norm": 4.641821520473564, "learning_rate": 7.656009204482822e-06, "loss": 0.7023, "step": 12636 }, { "epoch": 1.03, "grad_norm": 4.1640511210328155, "learning_rate": 7.65563772867054e-06, "loss": 0.6807, "step": 12637 }, { "epoch": 1.03, "grad_norm": 3.3880663182288764, "learning_rate": 7.655266232438636e-06, "loss": 0.652, "step": 12638 }, { "epoch": 1.03, "grad_norm": 7.361598382079345, "learning_rate": 7.654894715789968e-06, "loss": 0.6858, "step": 12639 }, { "epoch": 1.03, "grad_norm": 4.264258619611187, "learning_rate": 7.654523178727391e-06, "loss": 0.569, "step": 12640 }, { "epoch": 1.03, "grad_norm": 2.9905351205532913, "learning_rate": 7.654151621253762e-06, "loss": 0.6036, "step": 12641 }, { "epoch": 1.03, "grad_norm": 2.457708442124658, "learning_rate": 7.653780043371939e-06, "loss": 0.583, "step": 12642 }, { "epoch": 1.03, "grad_norm": 4.0053163690024505, "learning_rate": 7.653408445084779e-06, "loss": 0.447, "step": 12643 }, { "epoch": 1.03, "grad_norm": 5.426722192810419, "learning_rate": 7.653036826395138e-06, "loss": 0.5981, "step": 12644 }, { "epoch": 1.03, "grad_norm": 5.259307618019066, "learning_rate": 7.652665187305874e-06, "loss": 0.6231, "step": 12645 }, { "epoch": 1.03, "grad_norm": 2.849345243363587, "learning_rate": 7.652293527819845e-06, "loss": 0.6454, "step": 12646 }, { "epoch": 1.03, "grad_norm": 6.475845877783626, "learning_rate": 7.651921847939909e-06, "loss": 0.5483, "step": 12647 }, { "epoch": 1.03, "grad_norm": 4.911356482245304, "learning_rate": 7.651550147668925e-06, "loss": 0.599, "step": 12648 }, { "epoch": 1.03, "grad_norm": 3.822108633375212, "learning_rate": 7.651178427009746e-06, "loss": 0.4859, "step": 12649 }, { "epoch": 1.03, "grad_norm": 3.208152452620052, "learning_rate": 7.650806685965237e-06, "loss": 0.6283, "step": 12650 }, { "epoch": 1.03, "grad_norm": 5.013681535076231, "learning_rate": 7.650434924538253e-06, "loss": 0.5204, "step": 12651 }, { "epoch": 1.03, "grad_norm": 4.195904411173517, "learning_rate": 7.650063142731652e-06, "loss": 0.5806, "step": 12652 }, { "epoch": 1.03, "grad_norm": 3.505536869019851, "learning_rate": 7.649691340548291e-06, "loss": 0.5668, "step": 12653 }, { "epoch": 1.03, "grad_norm": 5.851559676617221, "learning_rate": 7.649319517991034e-06, "loss": 0.6395, "step": 12654 }, { "epoch": 1.03, "grad_norm": 2.2066569061475505, "learning_rate": 7.648947675062737e-06, "loss": 0.5282, "step": 12655 }, { "epoch": 1.03, "grad_norm": 2.554987259007454, "learning_rate": 7.64857581176626e-06, "loss": 0.4915, "step": 12656 }, { "epoch": 1.03, "grad_norm": 3.176595981309338, "learning_rate": 7.648203928104458e-06, "loss": 0.6313, "step": 12657 }, { "epoch": 1.03, "grad_norm": 4.662850864986201, "learning_rate": 7.647832024080197e-06, "loss": 0.6297, "step": 12658 }, { "epoch": 1.03, "grad_norm": 3.631005298730444, "learning_rate": 7.647460099696333e-06, "loss": 0.7317, "step": 12659 }, { "epoch": 1.03, "grad_norm": 3.540099823594697, "learning_rate": 7.647088154955728e-06, "loss": 0.6325, "step": 12660 }, { "epoch": 1.03, "grad_norm": 3.250794334498882, "learning_rate": 7.64671618986124e-06, "loss": 0.5916, "step": 12661 }, { "epoch": 1.03, "grad_norm": 4.434718213820831, "learning_rate": 7.646344204415729e-06, "loss": 0.5714, "step": 12662 }, { "epoch": 1.03, "grad_norm": 2.2102645742289724, "learning_rate": 7.645972198622056e-06, "loss": 0.5076, "step": 12663 }, { "epoch": 1.03, "grad_norm": 3.392975242736115, "learning_rate": 7.645600172483083e-06, "loss": 0.6118, "step": 12664 }, { "epoch": 1.03, "grad_norm": 3.918041500269964, "learning_rate": 7.645228126001668e-06, "loss": 0.617, "step": 12665 }, { "epoch": 1.03, "grad_norm": 6.485057942493537, "learning_rate": 7.644856059180669e-06, "loss": 0.7169, "step": 12666 }, { "epoch": 1.03, "grad_norm": 4.570162958975371, "learning_rate": 7.644483972022955e-06, "loss": 0.5793, "step": 12667 }, { "epoch": 1.03, "grad_norm": 3.5811980945416915, "learning_rate": 7.644111864531381e-06, "loss": 0.6468, "step": 12668 }, { "epoch": 1.03, "grad_norm": 4.537947090902917, "learning_rate": 7.643739736708811e-06, "loss": 0.621, "step": 12669 }, { "epoch": 1.03, "grad_norm": 8.401402696966562, "learning_rate": 7.6433675885581e-06, "loss": 0.651, "step": 12670 }, { "epoch": 1.03, "grad_norm": 3.166768705021124, "learning_rate": 7.64299542008212e-06, "loss": 0.6323, "step": 12671 }, { "epoch": 1.03, "grad_norm": 3.4287667384016784, "learning_rate": 7.642623231283725e-06, "loss": 0.4893, "step": 12672 }, { "epoch": 1.03, "grad_norm": 3.923935838757038, "learning_rate": 7.64225102216578e-06, "loss": 0.7357, "step": 12673 }, { "epoch": 1.03, "grad_norm": 3.263290077962503, "learning_rate": 7.641878792731146e-06, "loss": 0.6786, "step": 12674 }, { "epoch": 1.03, "grad_norm": 2.5469350279805347, "learning_rate": 7.641506542982686e-06, "loss": 0.6096, "step": 12675 }, { "epoch": 1.03, "grad_norm": 4.7633702929307615, "learning_rate": 7.641134272923259e-06, "loss": 0.5758, "step": 12676 }, { "epoch": 1.03, "grad_norm": 2.6467588717374784, "learning_rate": 7.640761982555732e-06, "loss": 0.6636, "step": 12677 }, { "epoch": 1.03, "grad_norm": 3.5489589829612123, "learning_rate": 7.640389671882963e-06, "loss": 0.6997, "step": 12678 }, { "epoch": 1.03, "grad_norm": 3.5096145876932643, "learning_rate": 7.64001734090782e-06, "loss": 0.6318, "step": 12679 }, { "epoch": 1.03, "grad_norm": 3.8685306095150707, "learning_rate": 7.63964498963316e-06, "loss": 0.5047, "step": 12680 }, { "epoch": 1.03, "grad_norm": 3.3489888692499936, "learning_rate": 7.639272618061852e-06, "loss": 0.7346, "step": 12681 }, { "epoch": 1.03, "grad_norm": 2.8764381734466036, "learning_rate": 7.638900226196756e-06, "loss": 0.6137, "step": 12682 }, { "epoch": 1.03, "grad_norm": 3.4309636151388476, "learning_rate": 7.638527814040735e-06, "loss": 0.5165, "step": 12683 }, { "epoch": 1.03, "grad_norm": 4.079100724581614, "learning_rate": 7.638155381596655e-06, "loss": 0.5057, "step": 12684 }, { "epoch": 1.03, "grad_norm": 3.8923304718753995, "learning_rate": 7.637782928867376e-06, "loss": 0.6078, "step": 12685 }, { "epoch": 1.03, "grad_norm": 5.994973961507608, "learning_rate": 7.637410455855764e-06, "loss": 0.5187, "step": 12686 }, { "epoch": 1.03, "grad_norm": 16.141474167517647, "learning_rate": 7.637037962564683e-06, "loss": 0.7251, "step": 12687 }, { "epoch": 1.03, "grad_norm": 4.1994825318012845, "learning_rate": 7.636665448996999e-06, "loss": 0.6299, "step": 12688 }, { "epoch": 1.03, "grad_norm": 5.567059959206812, "learning_rate": 7.636292915155574e-06, "loss": 0.6624, "step": 12689 }, { "epoch": 1.03, "grad_norm": 5.098566248758252, "learning_rate": 7.635920361043271e-06, "loss": 0.5266, "step": 12690 }, { "epoch": 1.03, "grad_norm": 4.666446824047482, "learning_rate": 7.635547786662958e-06, "loss": 0.6295, "step": 12691 }, { "epoch": 1.03, "grad_norm": 2.521160221289315, "learning_rate": 7.635175192017496e-06, "loss": 0.637, "step": 12692 }, { "epoch": 1.03, "grad_norm": 11.346480274163584, "learning_rate": 7.634802577109755e-06, "loss": 0.6331, "step": 12693 }, { "epoch": 1.03, "grad_norm": 2.8889141559473956, "learning_rate": 7.634429941942596e-06, "loss": 0.5706, "step": 12694 }, { "epoch": 1.03, "grad_norm": 15.643610117436824, "learning_rate": 7.634057286518885e-06, "loss": 0.5741, "step": 12695 }, { "epoch": 1.03, "grad_norm": 6.189224929460995, "learning_rate": 7.63368461084149e-06, "loss": 0.4159, "step": 12696 }, { "epoch": 1.03, "grad_norm": 3.606080223399179, "learning_rate": 7.633311914913274e-06, "loss": 0.6609, "step": 12697 }, { "epoch": 1.03, "grad_norm": 2.957572907874164, "learning_rate": 7.632939198737102e-06, "loss": 0.5185, "step": 12698 }, { "epoch": 1.03, "grad_norm": 13.065508946227473, "learning_rate": 7.63256646231584e-06, "loss": 0.65, "step": 12699 }, { "epoch": 1.03, "grad_norm": 4.549873516079415, "learning_rate": 7.632193705652358e-06, "loss": 0.6508, "step": 12700 }, { "epoch": 1.03, "grad_norm": 5.528590277276484, "learning_rate": 7.631820928749517e-06, "loss": 0.7976, "step": 12701 }, { "epoch": 1.03, "grad_norm": 3.376794564510867, "learning_rate": 7.631448131610188e-06, "loss": 0.7881, "step": 12702 }, { "epoch": 1.03, "grad_norm": 4.71868224284807, "learning_rate": 7.631075314237233e-06, "loss": 0.6213, "step": 12703 }, { "epoch": 1.03, "grad_norm": 8.95824015129035, "learning_rate": 7.630702476633522e-06, "loss": 0.5107, "step": 12704 }, { "epoch": 1.03, "grad_norm": 3.1156414482767962, "learning_rate": 7.63032961880192e-06, "loss": 0.6331, "step": 12705 }, { "epoch": 1.03, "grad_norm": 4.209895955685698, "learning_rate": 7.629956740745294e-06, "loss": 0.6437, "step": 12706 }, { "epoch": 1.03, "grad_norm": 8.740229868541386, "learning_rate": 7.629583842466512e-06, "loss": 0.5358, "step": 12707 }, { "epoch": 1.03, "grad_norm": 3.7898023146714688, "learning_rate": 7.629210923968443e-06, "loss": 0.7315, "step": 12708 }, { "epoch": 1.03, "grad_norm": 3.9090961923385388, "learning_rate": 7.628837985253952e-06, "loss": 0.7224, "step": 12709 }, { "epoch": 1.03, "grad_norm": 14.424070680951075, "learning_rate": 7.628465026325905e-06, "loss": 0.6609, "step": 12710 }, { "epoch": 1.03, "grad_norm": 3.978638724331431, "learning_rate": 7.628092047187173e-06, "loss": 0.5598, "step": 12711 }, { "epoch": 1.03, "grad_norm": 2.8596370126196216, "learning_rate": 7.627719047840622e-06, "loss": 0.7251, "step": 12712 }, { "epoch": 1.03, "grad_norm": 5.649291025963399, "learning_rate": 7.627346028289121e-06, "loss": 0.4565, "step": 12713 }, { "epoch": 1.03, "grad_norm": 7.2442414412241725, "learning_rate": 7.626972988535538e-06, "loss": 0.5738, "step": 12714 }, { "epoch": 1.03, "grad_norm": 5.078459145574682, "learning_rate": 7.626599928582741e-06, "loss": 0.5754, "step": 12715 }, { "epoch": 1.03, "grad_norm": 4.321479761150848, "learning_rate": 7.626226848433599e-06, "loss": 0.6033, "step": 12716 }, { "epoch": 1.03, "grad_norm": 6.6860391090558, "learning_rate": 7.625853748090981e-06, "loss": 0.7958, "step": 12717 }, { "epoch": 1.03, "grad_norm": 3.850835155819159, "learning_rate": 7.6254806275577545e-06, "loss": 0.6412, "step": 12718 }, { "epoch": 1.03, "grad_norm": 4.4608686724929845, "learning_rate": 7.625107486836789e-06, "loss": 0.7587, "step": 12719 }, { "epoch": 1.03, "grad_norm": 3.4215482857655237, "learning_rate": 7.6247343259309535e-06, "loss": 0.483, "step": 12720 }, { "epoch": 1.03, "grad_norm": 3.098025831168619, "learning_rate": 7.6243611448431195e-06, "loss": 0.7063, "step": 12721 }, { "epoch": 1.03, "grad_norm": 10.727304840829941, "learning_rate": 7.623987943576153e-06, "loss": 0.5673, "step": 12722 }, { "epoch": 1.03, "grad_norm": 2.773103092325417, "learning_rate": 7.623614722132926e-06, "loss": 0.69, "step": 12723 }, { "epoch": 1.03, "grad_norm": 2.611113493629122, "learning_rate": 7.623241480516307e-06, "loss": 0.7246, "step": 12724 }, { "epoch": 1.03, "grad_norm": 2.703047847146327, "learning_rate": 7.622868218729167e-06, "loss": 0.6533, "step": 12725 }, { "epoch": 1.03, "grad_norm": 4.8948907064671285, "learning_rate": 7.622494936774376e-06, "loss": 0.6401, "step": 12726 }, { "epoch": 1.03, "grad_norm": 3.010706603838789, "learning_rate": 7.622121634654802e-06, "loss": 0.575, "step": 12727 }, { "epoch": 1.03, "grad_norm": 14.463933934117026, "learning_rate": 7.621748312373318e-06, "loss": 0.4634, "step": 12728 }, { "epoch": 1.03, "grad_norm": 3.5977220452248284, "learning_rate": 7.621374969932793e-06, "loss": 0.5652, "step": 12729 }, { "epoch": 1.03, "grad_norm": 4.733752512886142, "learning_rate": 7.6210016073361e-06, "loss": 0.5665, "step": 12730 }, { "epoch": 1.03, "grad_norm": 6.214524943664311, "learning_rate": 7.620628224586106e-06, "loss": 0.656, "step": 12731 }, { "epoch": 1.03, "grad_norm": 7.204604365567833, "learning_rate": 7.620254821685687e-06, "loss": 0.6533, "step": 12732 }, { "epoch": 1.03, "grad_norm": 2.985986789259628, "learning_rate": 7.619881398637709e-06, "loss": 0.6811, "step": 12733 }, { "epoch": 1.03, "grad_norm": 4.627074874276101, "learning_rate": 7.619507955445047e-06, "loss": 0.4404, "step": 12734 }, { "epoch": 1.03, "grad_norm": 3.461723280278261, "learning_rate": 7.619134492110569e-06, "loss": 0.589, "step": 12735 }, { "epoch": 1.03, "grad_norm": 4.658443577476422, "learning_rate": 7.61876100863715e-06, "loss": 0.6971, "step": 12736 }, { "epoch": 1.03, "grad_norm": 3.628238462327136, "learning_rate": 7.61838750502766e-06, "loss": 0.6149, "step": 12737 }, { "epoch": 1.03, "grad_norm": 2.9273601008684715, "learning_rate": 7.618013981284973e-06, "loss": 0.5583, "step": 12738 }, { "epoch": 1.03, "grad_norm": 15.400414693616444, "learning_rate": 7.617640437411958e-06, "loss": 0.6308, "step": 12739 }, { "epoch": 1.03, "grad_norm": 3.6699417830876864, "learning_rate": 7.617266873411489e-06, "loss": 0.7399, "step": 12740 }, { "epoch": 1.03, "grad_norm": 4.996752595734896, "learning_rate": 7.616893289286438e-06, "loss": 0.7019, "step": 12741 }, { "epoch": 1.03, "grad_norm": 3.556986995240959, "learning_rate": 7.616519685039678e-06, "loss": 0.6845, "step": 12742 }, { "epoch": 1.03, "grad_norm": 3.44127620543055, "learning_rate": 7.616146060674081e-06, "loss": 0.6563, "step": 12743 }, { "epoch": 1.04, "grad_norm": 5.036999803877084, "learning_rate": 7.6157724161925195e-06, "loss": 0.6513, "step": 12744 }, { "epoch": 1.04, "grad_norm": 9.55447748952306, "learning_rate": 7.615398751597869e-06, "loss": 0.5298, "step": 12745 }, { "epoch": 1.04, "grad_norm": 4.325238728687551, "learning_rate": 7.615025066893001e-06, "loss": 0.4261, "step": 12746 }, { "epoch": 1.04, "grad_norm": 9.517232044134829, "learning_rate": 7.614651362080787e-06, "loss": 0.7219, "step": 12747 }, { "epoch": 1.04, "grad_norm": 3.1950782344470507, "learning_rate": 7.614277637164103e-06, "loss": 0.5758, "step": 12748 }, { "epoch": 1.04, "grad_norm": 3.257063695380154, "learning_rate": 7.613903892145822e-06, "loss": 0.6144, "step": 12749 }, { "epoch": 1.04, "grad_norm": 2.397110433945175, "learning_rate": 7.6135301270288175e-06, "loss": 0.7748, "step": 12750 }, { "epoch": 1.04, "grad_norm": 9.31565973881787, "learning_rate": 7.613156341815962e-06, "loss": 0.544, "step": 12751 }, { "epoch": 1.04, "grad_norm": 5.01815608637562, "learning_rate": 7.612782536510134e-06, "loss": 0.5427, "step": 12752 }, { "epoch": 1.04, "grad_norm": 7.558146419952685, "learning_rate": 7.612408711114203e-06, "loss": 0.7821, "step": 12753 }, { "epoch": 1.04, "grad_norm": 3.2531385248784357, "learning_rate": 7.612034865631046e-06, "loss": 0.7159, "step": 12754 }, { "epoch": 1.04, "grad_norm": 4.064756667661807, "learning_rate": 7.611661000063537e-06, "loss": 0.6756, "step": 12755 }, { "epoch": 1.04, "grad_norm": 5.457252790122411, "learning_rate": 7.61128711441455e-06, "loss": 0.6291, "step": 12756 }, { "epoch": 1.04, "grad_norm": 3.1934871898818322, "learning_rate": 7.6109132086869606e-06, "loss": 0.5896, "step": 12757 }, { "epoch": 1.04, "grad_norm": 2.9152751405486406, "learning_rate": 7.6105392828836445e-06, "loss": 0.609, "step": 12758 }, { "epoch": 1.04, "grad_norm": 2.5100458126505565, "learning_rate": 7.610165337007475e-06, "loss": 0.6877, "step": 12759 }, { "epoch": 1.04, "grad_norm": 6.166657944816198, "learning_rate": 7.609791371061328e-06, "loss": 0.6328, "step": 12760 }, { "epoch": 1.04, "grad_norm": 2.4854246456665208, "learning_rate": 7.609417385048081e-06, "loss": 0.5329, "step": 12761 }, { "epoch": 1.04, "grad_norm": 2.4282461551566312, "learning_rate": 7.609043378970607e-06, "loss": 0.5994, "step": 12762 }, { "epoch": 1.04, "grad_norm": 2.634963939819283, "learning_rate": 7.608669352831783e-06, "loss": 0.5741, "step": 12763 }, { "epoch": 1.04, "grad_norm": 8.22524159691337, "learning_rate": 7.6082953066344855e-06, "loss": 0.6962, "step": 12764 }, { "epoch": 1.04, "grad_norm": 5.089818500257091, "learning_rate": 7.60792124038159e-06, "loss": 0.5954, "step": 12765 }, { "epoch": 1.04, "grad_norm": 2.864265783619446, "learning_rate": 7.607547154075971e-06, "loss": 0.3764, "step": 12766 }, { "epoch": 1.04, "grad_norm": 2.986366872991414, "learning_rate": 7.607173047720507e-06, "loss": 0.5681, "step": 12767 }, { "epoch": 1.04, "grad_norm": 4.699899032336854, "learning_rate": 7.606798921318076e-06, "loss": 0.7856, "step": 12768 }, { "epoch": 1.04, "grad_norm": 5.651795465220876, "learning_rate": 7.606424774871553e-06, "loss": 0.504, "step": 12769 }, { "epoch": 1.04, "grad_norm": 7.733345413134611, "learning_rate": 7.606050608383813e-06, "loss": 0.54, "step": 12770 }, { "epoch": 1.04, "grad_norm": 2.6129903048693426, "learning_rate": 7.605676421857734e-06, "loss": 0.5981, "step": 12771 }, { "epoch": 1.04, "grad_norm": 4.026272806869781, "learning_rate": 7.6053022152961955e-06, "loss": 0.678, "step": 12772 }, { "epoch": 1.04, "grad_norm": 4.65748943283699, "learning_rate": 7.6049279887020735e-06, "loss": 0.5444, "step": 12773 }, { "epoch": 1.04, "grad_norm": 12.02601042081635, "learning_rate": 7.604553742078245e-06, "loss": 0.6976, "step": 12774 }, { "epoch": 1.04, "grad_norm": 13.724336846252793, "learning_rate": 7.604179475427587e-06, "loss": 0.666, "step": 12775 }, { "epoch": 1.04, "grad_norm": 2.9504382687062423, "learning_rate": 7.603805188752978e-06, "loss": 0.6547, "step": 12776 }, { "epoch": 1.04, "grad_norm": 17.464439827354802, "learning_rate": 7.6034308820572975e-06, "loss": 0.6583, "step": 12777 }, { "epoch": 1.04, "grad_norm": 4.550452991851988, "learning_rate": 7.603056555343422e-06, "loss": 0.5131, "step": 12778 }, { "epoch": 1.04, "grad_norm": 3.456177792292817, "learning_rate": 7.602682208614229e-06, "loss": 0.7705, "step": 12779 }, { "epoch": 1.04, "grad_norm": 7.334727550407669, "learning_rate": 7.602307841872599e-06, "loss": 0.6911, "step": 12780 }, { "epoch": 1.04, "grad_norm": 3.4165144011491817, "learning_rate": 7.601933455121409e-06, "loss": 0.5651, "step": 12781 }, { "epoch": 1.04, "grad_norm": 9.218716007509439, "learning_rate": 7.60155904836354e-06, "loss": 0.4555, "step": 12782 }, { "epoch": 1.04, "grad_norm": 4.223146443670349, "learning_rate": 7.601184621601867e-06, "loss": 0.6172, "step": 12783 }, { "epoch": 1.04, "grad_norm": 3.3109871212909776, "learning_rate": 7.600810174839271e-06, "loss": 0.5285, "step": 12784 }, { "epoch": 1.04, "grad_norm": 3.419517487598908, "learning_rate": 7.600435708078631e-06, "loss": 0.5786, "step": 12785 }, { "epoch": 1.04, "grad_norm": 4.815783386241288, "learning_rate": 7.600061221322829e-06, "loss": 0.6246, "step": 12786 }, { "epoch": 1.04, "grad_norm": 2.9430935855323996, "learning_rate": 7.599686714574741e-06, "loss": 0.5619, "step": 12787 }, { "epoch": 1.04, "grad_norm": 3.5133423983408116, "learning_rate": 7.599312187837247e-06, "loss": 0.724, "step": 12788 }, { "epoch": 1.04, "grad_norm": 4.962449945598559, "learning_rate": 7.598937641113226e-06, "loss": 0.6352, "step": 12789 }, { "epoch": 1.04, "grad_norm": 4.084565556809841, "learning_rate": 7.598563074405563e-06, "loss": 0.6259, "step": 12790 }, { "epoch": 1.04, "grad_norm": 3.5886149354437262, "learning_rate": 7.598188487717133e-06, "loss": 0.6265, "step": 12791 }, { "epoch": 1.04, "grad_norm": 6.560635682657924, "learning_rate": 7.597813881050817e-06, "loss": 0.5909, "step": 12792 }, { "epoch": 1.04, "grad_norm": 2.649151892526432, "learning_rate": 7.597439254409498e-06, "loss": 0.6349, "step": 12793 }, { "epoch": 1.04, "grad_norm": 3.1693242036844738, "learning_rate": 7.597064607796054e-06, "loss": 0.5471, "step": 12794 }, { "epoch": 1.04, "grad_norm": 2.6411156987136692, "learning_rate": 7.596689941213366e-06, "loss": 0.5808, "step": 12795 }, { "epoch": 1.04, "grad_norm": 3.2723051211658087, "learning_rate": 7.596315254664317e-06, "loss": 0.6621, "step": 12796 }, { "epoch": 1.04, "grad_norm": 3.4525280464515835, "learning_rate": 7.5959405481517855e-06, "loss": 0.4874, "step": 12797 }, { "epoch": 1.04, "grad_norm": 5.526233481973619, "learning_rate": 7.595565821678653e-06, "loss": 0.6286, "step": 12798 }, { "epoch": 1.04, "grad_norm": 2.872959149562405, "learning_rate": 7.595191075247803e-06, "loss": 0.615, "step": 12799 }, { "epoch": 1.04, "grad_norm": 3.698519515892671, "learning_rate": 7.594816308862114e-06, "loss": 0.4336, "step": 12800 }, { "epoch": 1.04, "grad_norm": 3.163842019446634, "learning_rate": 7.594441522524469e-06, "loss": 0.6808, "step": 12801 }, { "epoch": 1.04, "grad_norm": 5.5689324479126245, "learning_rate": 7.594066716237751e-06, "loss": 0.5975, "step": 12802 }, { "epoch": 1.04, "grad_norm": 3.235896546149228, "learning_rate": 7.593691890004841e-06, "loss": 0.4524, "step": 12803 }, { "epoch": 1.04, "grad_norm": 2.616976415961154, "learning_rate": 7.593317043828618e-06, "loss": 0.5607, "step": 12804 }, { "epoch": 1.04, "grad_norm": 2.933131474160084, "learning_rate": 7.592942177711971e-06, "loss": 0.59, "step": 12805 }, { "epoch": 1.04, "grad_norm": 4.270078724733852, "learning_rate": 7.592567291657778e-06, "loss": 0.6675, "step": 12806 }, { "epoch": 1.04, "grad_norm": 4.330723914274467, "learning_rate": 7.592192385668919e-06, "loss": 0.7049, "step": 12807 }, { "epoch": 1.04, "grad_norm": 2.9772652714102548, "learning_rate": 7.591817459748283e-06, "loss": 0.3996, "step": 12808 }, { "epoch": 1.04, "grad_norm": 3.6057363048709923, "learning_rate": 7.591442513898748e-06, "loss": 0.7368, "step": 12809 }, { "epoch": 1.04, "grad_norm": 3.260327628676037, "learning_rate": 7.5910675481232e-06, "loss": 0.6862, "step": 12810 }, { "epoch": 1.04, "grad_norm": 2.6641322549182025, "learning_rate": 7.59069256242452e-06, "loss": 0.5374, "step": 12811 }, { "epoch": 1.04, "grad_norm": 8.605123424224642, "learning_rate": 7.5903175568055924e-06, "loss": 0.5916, "step": 12812 }, { "epoch": 1.04, "grad_norm": 4.02195230421896, "learning_rate": 7.5899425312693e-06, "loss": 0.5072, "step": 12813 }, { "epoch": 1.04, "grad_norm": 7.945457106233641, "learning_rate": 7.589567485818528e-06, "loss": 0.6073, "step": 12814 }, { "epoch": 1.04, "grad_norm": 3.7491013436098926, "learning_rate": 7.589192420456159e-06, "loss": 0.6567, "step": 12815 }, { "epoch": 1.04, "grad_norm": 3.041146457022362, "learning_rate": 7.588817335185077e-06, "loss": 0.6743, "step": 12816 }, { "epoch": 1.04, "grad_norm": 5.3290351982731, "learning_rate": 7.588442230008164e-06, "loss": 0.5651, "step": 12817 }, { "epoch": 1.04, "grad_norm": 2.900515589125215, "learning_rate": 7.5880671049283095e-06, "loss": 0.7394, "step": 12818 }, { "epoch": 1.04, "grad_norm": 2.8867849310449536, "learning_rate": 7.5876919599483935e-06, "loss": 0.6317, "step": 12819 }, { "epoch": 1.04, "grad_norm": 3.0397792305371705, "learning_rate": 7.587316795071303e-06, "loss": 0.7619, "step": 12820 }, { "epoch": 1.04, "grad_norm": 5.8819838721710225, "learning_rate": 7.586941610299918e-06, "loss": 0.5767, "step": 12821 }, { "epoch": 1.04, "grad_norm": 3.0413282927489536, "learning_rate": 7.58656640563713e-06, "loss": 0.5173, "step": 12822 }, { "epoch": 1.04, "grad_norm": 3.2020903224621637, "learning_rate": 7.58619118108582e-06, "loss": 0.6467, "step": 12823 }, { "epoch": 1.04, "grad_norm": 5.0346044767259235, "learning_rate": 7.585815936648875e-06, "loss": 0.766, "step": 12824 }, { "epoch": 1.04, "grad_norm": 5.074782026541122, "learning_rate": 7.585440672329179e-06, "loss": 0.6455, "step": 12825 }, { "epoch": 1.04, "grad_norm": 2.9257779028272393, "learning_rate": 7.585065388129618e-06, "loss": 0.6246, "step": 12826 }, { "epoch": 1.04, "grad_norm": 3.3028295750054903, "learning_rate": 7.584690084053077e-06, "loss": 0.6052, "step": 12827 }, { "epoch": 1.04, "grad_norm": 4.223614400729803, "learning_rate": 7.584314760102442e-06, "loss": 0.6191, "step": 12828 }, { "epoch": 1.04, "grad_norm": 4.445941894319534, "learning_rate": 7.583939416280599e-06, "loss": 0.6843, "step": 12829 }, { "epoch": 1.04, "grad_norm": 3.057786288720754, "learning_rate": 7.5835640525904355e-06, "loss": 0.7344, "step": 12830 }, { "epoch": 1.04, "grad_norm": 4.952058114611515, "learning_rate": 7.583188669034836e-06, "loss": 0.621, "step": 12831 }, { "epoch": 1.04, "grad_norm": 6.136468826486243, "learning_rate": 7.582813265616686e-06, "loss": 0.7438, "step": 12832 }, { "epoch": 1.04, "grad_norm": 5.744980675253817, "learning_rate": 7.5824378423388745e-06, "loss": 0.6039, "step": 12833 }, { "epoch": 1.04, "grad_norm": 2.0920290602443576, "learning_rate": 7.582062399204286e-06, "loss": 0.6646, "step": 12834 }, { "epoch": 1.04, "grad_norm": 4.18517096491782, "learning_rate": 7.581686936215811e-06, "loss": 0.6616, "step": 12835 }, { "epoch": 1.04, "grad_norm": 3.124520912811146, "learning_rate": 7.581311453376332e-06, "loss": 0.6432, "step": 12836 }, { "epoch": 1.04, "grad_norm": 2.624355000407767, "learning_rate": 7.580935950688737e-06, "loss": 0.4822, "step": 12837 }, { "epoch": 1.04, "grad_norm": 3.3228417315567778, "learning_rate": 7.580560428155917e-06, "loss": 0.5396, "step": 12838 }, { "epoch": 1.04, "grad_norm": 2.111096381939276, "learning_rate": 7.580184885780755e-06, "loss": 0.5327, "step": 12839 }, { "epoch": 1.04, "grad_norm": 4.116813057621899, "learning_rate": 7.579809323566141e-06, "loss": 0.5965, "step": 12840 }, { "epoch": 1.04, "grad_norm": 2.678693845015011, "learning_rate": 7.579433741514962e-06, "loss": 0.6427, "step": 12841 }, { "epoch": 1.04, "grad_norm": 4.288166769486887, "learning_rate": 7.579058139630107e-06, "loss": 0.5799, "step": 12842 }, { "epoch": 1.04, "grad_norm": 5.145233701073364, "learning_rate": 7.578682517914462e-06, "loss": 0.5781, "step": 12843 }, { "epoch": 1.04, "grad_norm": 4.170856938477209, "learning_rate": 7.578306876370918e-06, "loss": 0.6935, "step": 12844 }, { "epoch": 1.04, "grad_norm": 3.204411420820357, "learning_rate": 7.577931215002359e-06, "loss": 0.721, "step": 12845 }, { "epoch": 1.04, "grad_norm": 3.1376649062294817, "learning_rate": 7.577555533811678e-06, "loss": 0.613, "step": 12846 }, { "epoch": 1.04, "grad_norm": 6.764195349377388, "learning_rate": 7.577179832801762e-06, "loss": 0.6847, "step": 12847 }, { "epoch": 1.04, "grad_norm": 5.208624196821827, "learning_rate": 7.5768041119755e-06, "loss": 0.7508, "step": 12848 }, { "epoch": 1.04, "grad_norm": 4.6310628681771036, "learning_rate": 7.57642837133578e-06, "loss": 0.6596, "step": 12849 }, { "epoch": 1.04, "grad_norm": 4.085638005543012, "learning_rate": 7.576052610885492e-06, "loss": 0.7066, "step": 12850 }, { "epoch": 1.04, "grad_norm": 5.2807557012953135, "learning_rate": 7.575676830627525e-06, "loss": 0.5935, "step": 12851 }, { "epoch": 1.04, "grad_norm": 3.9332013067601572, "learning_rate": 7.57530103056477e-06, "loss": 0.924, "step": 12852 }, { "epoch": 1.04, "grad_norm": 5.7821358569883845, "learning_rate": 7.574925210700112e-06, "loss": 0.6504, "step": 12853 }, { "epoch": 1.04, "grad_norm": 5.942539675122486, "learning_rate": 7.574549371036447e-06, "loss": 0.5952, "step": 12854 }, { "epoch": 1.04, "grad_norm": 2.832936696465376, "learning_rate": 7.574173511576661e-06, "loss": 0.6834, "step": 12855 }, { "epoch": 1.04, "grad_norm": 4.435250721417182, "learning_rate": 7.5737976323236455e-06, "loss": 0.5961, "step": 12856 }, { "epoch": 1.04, "grad_norm": 3.318390130152538, "learning_rate": 7.5734217332802884e-06, "loss": 0.5799, "step": 12857 }, { "epoch": 1.04, "grad_norm": 3.090246126662872, "learning_rate": 7.573045814449482e-06, "loss": 0.5969, "step": 12858 }, { "epoch": 1.04, "grad_norm": 3.3261611515366223, "learning_rate": 7.572669875834118e-06, "loss": 0.635, "step": 12859 }, { "epoch": 1.04, "grad_norm": 2.73147751416171, "learning_rate": 7.572293917437084e-06, "loss": 0.6056, "step": 12860 }, { "epoch": 1.04, "grad_norm": 3.229177332884211, "learning_rate": 7.571917939261272e-06, "loss": 0.6744, "step": 12861 }, { "epoch": 1.04, "grad_norm": 5.06987894923852, "learning_rate": 7.5715419413095734e-06, "loss": 0.6544, "step": 12862 }, { "epoch": 1.04, "grad_norm": 4.36480891428641, "learning_rate": 7.57116592358488e-06, "loss": 0.4624, "step": 12863 }, { "epoch": 1.04, "grad_norm": 3.010204142447662, "learning_rate": 7.570789886090083e-06, "loss": 0.526, "step": 12864 }, { "epoch": 1.04, "grad_norm": 6.566813077356942, "learning_rate": 7.5704138288280714e-06, "loss": 0.6147, "step": 12865 }, { "epoch": 1.04, "grad_norm": 3.7478303308783616, "learning_rate": 7.57003775180174e-06, "loss": 0.6144, "step": 12866 }, { "epoch": 1.05, "grad_norm": 2.9057936474336468, "learning_rate": 7.569661655013978e-06, "loss": 0.5585, "step": 12867 }, { "epoch": 1.05, "grad_norm": 3.3389219202585854, "learning_rate": 7.569285538467679e-06, "loss": 0.719, "step": 12868 }, { "epoch": 1.05, "grad_norm": 4.361067155938908, "learning_rate": 7.568909402165732e-06, "loss": 0.7157, "step": 12869 }, { "epoch": 1.05, "grad_norm": 2.926865809629079, "learning_rate": 7.568533246111034e-06, "loss": 0.6856, "step": 12870 }, { "epoch": 1.05, "grad_norm": 3.10965533698562, "learning_rate": 7.5681570703064745e-06, "loss": 0.6095, "step": 12871 }, { "epoch": 1.05, "grad_norm": 6.978224756012123, "learning_rate": 7.567780874754945e-06, "loss": 0.5196, "step": 12872 }, { "epoch": 1.05, "grad_norm": 2.697163511376358, "learning_rate": 7.567404659459341e-06, "loss": 0.6253, "step": 12873 }, { "epoch": 1.05, "grad_norm": 4.302982332376722, "learning_rate": 7.567028424422551e-06, "loss": 0.5521, "step": 12874 }, { "epoch": 1.05, "grad_norm": 7.0034166533003415, "learning_rate": 7.566652169647472e-06, "loss": 0.7202, "step": 12875 }, { "epoch": 1.05, "grad_norm": 5.4408456646420715, "learning_rate": 7.566275895136996e-06, "loss": 0.6429, "step": 12876 }, { "epoch": 1.05, "grad_norm": 2.705834714059999, "learning_rate": 7.565899600894015e-06, "loss": 0.498, "step": 12877 }, { "epoch": 1.05, "grad_norm": 3.2415108009721076, "learning_rate": 7.565523286921423e-06, "loss": 0.7025, "step": 12878 }, { "epoch": 1.05, "grad_norm": 4.616854295087875, "learning_rate": 7.565146953222116e-06, "loss": 0.7654, "step": 12879 }, { "epoch": 1.05, "grad_norm": 2.191030664565779, "learning_rate": 7.564770599798984e-06, "loss": 0.5182, "step": 12880 }, { "epoch": 1.05, "grad_norm": 5.587930036843815, "learning_rate": 7.564394226654923e-06, "loss": 0.6505, "step": 12881 }, { "epoch": 1.05, "grad_norm": 3.689544741376294, "learning_rate": 7.564017833792825e-06, "loss": 0.7206, "step": 12882 }, { "epoch": 1.05, "grad_norm": 3.7347983475460436, "learning_rate": 7.563641421215586e-06, "loss": 0.7166, "step": 12883 }, { "epoch": 1.05, "grad_norm": 2.1950899116882363, "learning_rate": 7.5632649889261e-06, "loss": 0.6308, "step": 12884 }, { "epoch": 1.05, "grad_norm": 2.6291732741040246, "learning_rate": 7.562888536927262e-06, "loss": 0.5754, "step": 12885 }, { "epoch": 1.05, "grad_norm": 5.134022269848872, "learning_rate": 7.562512065221964e-06, "loss": 0.5737, "step": 12886 }, { "epoch": 1.05, "grad_norm": 2.412242116634721, "learning_rate": 7.562135573813104e-06, "loss": 0.6684, "step": 12887 }, { "epoch": 1.05, "grad_norm": 3.063713919672888, "learning_rate": 7.561759062703575e-06, "loss": 0.5779, "step": 12888 }, { "epoch": 1.05, "grad_norm": 5.1595520351443005, "learning_rate": 7.561382531896273e-06, "loss": 0.6646, "step": 12889 }, { "epoch": 1.05, "grad_norm": 2.7146067153071907, "learning_rate": 7.561005981394092e-06, "loss": 0.545, "step": 12890 }, { "epoch": 1.05, "grad_norm": 2.4833305272159945, "learning_rate": 7.560629411199928e-06, "loss": 0.5678, "step": 12891 }, { "epoch": 1.05, "grad_norm": 5.084120901006132, "learning_rate": 7.560252821316677e-06, "loss": 0.6182, "step": 12892 }, { "epoch": 1.05, "grad_norm": 3.0139425149079426, "learning_rate": 7.559876211747234e-06, "loss": 0.6031, "step": 12893 }, { "epoch": 1.05, "grad_norm": 3.3030395034120947, "learning_rate": 7.559499582494495e-06, "loss": 0.5666, "step": 12894 }, { "epoch": 1.05, "grad_norm": 3.583735421756559, "learning_rate": 7.559122933561356e-06, "loss": 0.544, "step": 12895 }, { "epoch": 1.05, "grad_norm": 6.827200608305828, "learning_rate": 7.5587462649507134e-06, "loss": 0.5064, "step": 12896 }, { "epoch": 1.05, "grad_norm": 3.3939484459747904, "learning_rate": 7.558369576665464e-06, "loss": 0.5746, "step": 12897 }, { "epoch": 1.05, "grad_norm": 2.8013861541149865, "learning_rate": 7.557992868708501e-06, "loss": 0.628, "step": 12898 }, { "epoch": 1.05, "grad_norm": 4.005491801836468, "learning_rate": 7.557616141082727e-06, "loss": 0.6334, "step": 12899 }, { "epoch": 1.05, "grad_norm": 2.8944989689176843, "learning_rate": 7.5572393937910325e-06, "loss": 0.6981, "step": 12900 }, { "epoch": 1.05, "grad_norm": 31.879377491954656, "learning_rate": 7.556862626836317e-06, "loss": 0.6903, "step": 12901 }, { "epoch": 1.05, "grad_norm": 2.282639491219605, "learning_rate": 7.556485840221478e-06, "loss": 0.7524, "step": 12902 }, { "epoch": 1.05, "grad_norm": 5.453569689945408, "learning_rate": 7.5561090339494126e-06, "loss": 0.5688, "step": 12903 }, { "epoch": 1.05, "grad_norm": 4.297351724448505, "learning_rate": 7.555732208023017e-06, "loss": 0.5193, "step": 12904 }, { "epoch": 1.05, "grad_norm": 5.553114862082581, "learning_rate": 7.5553553624451905e-06, "loss": 0.6624, "step": 12905 }, { "epoch": 1.05, "grad_norm": 4.554025873564326, "learning_rate": 7.5549784972188275e-06, "loss": 0.664, "step": 12906 }, { "epoch": 1.05, "grad_norm": 2.7215684347286344, "learning_rate": 7.55460161234683e-06, "loss": 0.6802, "step": 12907 }, { "epoch": 1.05, "grad_norm": 2.529484433345862, "learning_rate": 7.5542247078320925e-06, "loss": 0.6583, "step": 12908 }, { "epoch": 1.05, "grad_norm": 3.4351283090074105, "learning_rate": 7.553847783677515e-06, "loss": 0.6228, "step": 12909 }, { "epoch": 1.05, "grad_norm": 2.8329958193872615, "learning_rate": 7.553470839885994e-06, "loss": 0.7529, "step": 12910 }, { "epoch": 1.05, "grad_norm": 4.146702165606948, "learning_rate": 7.553093876460431e-06, "loss": 0.6535, "step": 12911 }, { "epoch": 1.05, "grad_norm": 4.552828545682671, "learning_rate": 7.552716893403721e-06, "loss": 0.4728, "step": 12912 }, { "epoch": 1.05, "grad_norm": 3.0591043155616564, "learning_rate": 7.552339890718765e-06, "loss": 0.4705, "step": 12913 }, { "epoch": 1.05, "grad_norm": 18.64032005174934, "learning_rate": 7.55196286840846e-06, "loss": 0.5905, "step": 12914 }, { "epoch": 1.05, "grad_norm": 3.176329840718672, "learning_rate": 7.551585826475707e-06, "loss": 0.6987, "step": 12915 }, { "epoch": 1.05, "grad_norm": 7.183984656313434, "learning_rate": 7.551208764923403e-06, "loss": 0.6651, "step": 12916 }, { "epoch": 1.05, "grad_norm": 2.4136293255632717, "learning_rate": 7.550831683754449e-06, "loss": 0.4232, "step": 12917 }, { "epoch": 1.05, "grad_norm": 2.4527900065214037, "learning_rate": 7.550454582971745e-06, "loss": 0.5677, "step": 12918 }, { "epoch": 1.05, "grad_norm": 3.465747657421707, "learning_rate": 7.550077462578188e-06, "loss": 0.6474, "step": 12919 }, { "epoch": 1.05, "grad_norm": 2.891478918299922, "learning_rate": 7.5497003225766795e-06, "loss": 0.6477, "step": 12920 }, { "epoch": 1.05, "grad_norm": 4.44268419784709, "learning_rate": 7.549323162970119e-06, "loss": 0.5989, "step": 12921 }, { "epoch": 1.05, "grad_norm": 6.736434592398039, "learning_rate": 7.548945983761407e-06, "loss": 0.7118, "step": 12922 }, { "epoch": 1.05, "grad_norm": 21.55659486119966, "learning_rate": 7.548568784953443e-06, "loss": 0.5354, "step": 12923 }, { "epoch": 1.05, "grad_norm": 4.545342723425328, "learning_rate": 7.548191566549128e-06, "loss": 0.7615, "step": 12924 }, { "epoch": 1.05, "grad_norm": 4.597743276211596, "learning_rate": 7.547814328551363e-06, "loss": 0.5849, "step": 12925 }, { "epoch": 1.05, "grad_norm": 4.5639859704389485, "learning_rate": 7.547437070963046e-06, "loss": 0.6209, "step": 12926 }, { "epoch": 1.05, "grad_norm": 2.8344832636185395, "learning_rate": 7.547059793787082e-06, "loss": 0.545, "step": 12927 }, { "epoch": 1.05, "grad_norm": 2.6199364508971943, "learning_rate": 7.546682497026368e-06, "loss": 0.6768, "step": 12928 }, { "epoch": 1.05, "grad_norm": 22.088523405703608, "learning_rate": 7.546305180683806e-06, "loss": 0.7606, "step": 12929 }, { "epoch": 1.05, "grad_norm": 5.220863625599206, "learning_rate": 7.545927844762297e-06, "loss": 0.5198, "step": 12930 }, { "epoch": 1.05, "grad_norm": 6.245333746181903, "learning_rate": 7.545550489264746e-06, "loss": 0.6268, "step": 12931 }, { "epoch": 1.05, "grad_norm": 3.4138271374914577, "learning_rate": 7.545173114194051e-06, "loss": 0.5801, "step": 12932 }, { "epoch": 1.05, "grad_norm": 4.026446039957563, "learning_rate": 7.544795719553113e-06, "loss": 0.5299, "step": 12933 }, { "epoch": 1.05, "grad_norm": 7.555896978053495, "learning_rate": 7.544418305344836e-06, "loss": 0.7816, "step": 12934 }, { "epoch": 1.05, "grad_norm": 4.283123159716678, "learning_rate": 7.544040871572122e-06, "loss": 0.6527, "step": 12935 }, { "epoch": 1.05, "grad_norm": 3.6196072724572073, "learning_rate": 7.5436634182378735e-06, "loss": 0.6432, "step": 12936 }, { "epoch": 1.05, "grad_norm": 3.89965917088967, "learning_rate": 7.54328594534499e-06, "loss": 0.6019, "step": 12937 }, { "epoch": 1.05, "grad_norm": 2.822201854569433, "learning_rate": 7.542908452896376e-06, "loss": 0.6069, "step": 12938 }, { "epoch": 1.05, "grad_norm": 4.346270456196262, "learning_rate": 7.5425309408949346e-06, "loss": 0.5748, "step": 12939 }, { "epoch": 1.05, "grad_norm": 3.696427546959457, "learning_rate": 7.542153409343568e-06, "loss": 0.6025, "step": 12940 }, { "epoch": 1.05, "grad_norm": 6.426884662177392, "learning_rate": 7.541775858245179e-06, "loss": 0.7392, "step": 12941 }, { "epoch": 1.05, "grad_norm": 2.242249684746652, "learning_rate": 7.541398287602668e-06, "loss": 0.5272, "step": 12942 }, { "epoch": 1.05, "grad_norm": 4.611115678953091, "learning_rate": 7.541020697418944e-06, "loss": 0.5293, "step": 12943 }, { "epoch": 1.05, "grad_norm": 5.543807748018826, "learning_rate": 7.540643087696906e-06, "loss": 0.7165, "step": 12944 }, { "epoch": 1.05, "grad_norm": 2.394840366377686, "learning_rate": 7.540265458439457e-06, "loss": 0.561, "step": 12945 }, { "epoch": 1.05, "grad_norm": 3.7510754180689383, "learning_rate": 7.539887809649505e-06, "loss": 0.6649, "step": 12946 }, { "epoch": 1.05, "grad_norm": 3.271408650176617, "learning_rate": 7.539510141329949e-06, "loss": 0.5564, "step": 12947 }, { "epoch": 1.05, "grad_norm": 2.7833473760466307, "learning_rate": 7.539132453483696e-06, "loss": 0.6426, "step": 12948 }, { "epoch": 1.05, "grad_norm": 4.143611131463908, "learning_rate": 7.538754746113649e-06, "loss": 0.6389, "step": 12949 }, { "epoch": 1.05, "grad_norm": 12.08943732076139, "learning_rate": 7.5383770192227115e-06, "loss": 0.5431, "step": 12950 }, { "epoch": 1.05, "grad_norm": 4.768896154153527, "learning_rate": 7.53799927281379e-06, "loss": 0.6396, "step": 12951 }, { "epoch": 1.05, "grad_norm": 3.3455061762708485, "learning_rate": 7.537621506889787e-06, "loss": 0.6087, "step": 12952 }, { "epoch": 1.05, "grad_norm": 5.27275111067098, "learning_rate": 7.537243721453609e-06, "loss": 0.6011, "step": 12953 }, { "epoch": 1.05, "grad_norm": 3.3120248364910463, "learning_rate": 7.536865916508158e-06, "loss": 0.775, "step": 12954 }, { "epoch": 1.05, "grad_norm": 2.92865777603432, "learning_rate": 7.536488092056343e-06, "loss": 0.6719, "step": 12955 }, { "epoch": 1.05, "grad_norm": 21.51719720798822, "learning_rate": 7.536110248101066e-06, "loss": 0.6667, "step": 12956 }, { "epoch": 1.05, "grad_norm": 7.982117329036535, "learning_rate": 7.5357323846452336e-06, "loss": 0.8352, "step": 12957 }, { "epoch": 1.05, "grad_norm": 2.445754806062073, "learning_rate": 7.535354501691751e-06, "loss": 0.705, "step": 12958 }, { "epoch": 1.05, "grad_norm": 7.3088583547263175, "learning_rate": 7.534976599243524e-06, "loss": 0.5363, "step": 12959 }, { "epoch": 1.05, "grad_norm": 2.2036612728870257, "learning_rate": 7.534598677303457e-06, "loss": 0.6128, "step": 12960 }, { "epoch": 1.05, "grad_norm": 4.256759567307507, "learning_rate": 7.534220735874459e-06, "loss": 0.5546, "step": 12961 }, { "epoch": 1.05, "grad_norm": 3.209387645788221, "learning_rate": 7.533842774959433e-06, "loss": 0.7113, "step": 12962 }, { "epoch": 1.05, "grad_norm": 2.8565533203506557, "learning_rate": 7.533464794561285e-06, "loss": 0.5947, "step": 12963 }, { "epoch": 1.05, "grad_norm": 5.952308731330324, "learning_rate": 7.533086794682925e-06, "loss": 0.5474, "step": 12964 }, { "epoch": 1.05, "grad_norm": 3.3088632128223727, "learning_rate": 7.5327087753272555e-06, "loss": 0.5409, "step": 12965 }, { "epoch": 1.05, "grad_norm": 2.9167789673277866, "learning_rate": 7.532330736497187e-06, "loss": 0.6823, "step": 12966 }, { "epoch": 1.05, "grad_norm": 2.5216256001648847, "learning_rate": 7.531952678195621e-06, "loss": 0.6508, "step": 12967 }, { "epoch": 1.05, "grad_norm": 4.206423326585319, "learning_rate": 7.531574600425468e-06, "loss": 0.5374, "step": 12968 }, { "epoch": 1.05, "grad_norm": 3.205265185722258, "learning_rate": 7.531196503189637e-06, "loss": 0.6661, "step": 12969 }, { "epoch": 1.05, "grad_norm": 2.5274990819323646, "learning_rate": 7.530818386491032e-06, "loss": 0.5368, "step": 12970 }, { "epoch": 1.05, "grad_norm": 2.2976003245348555, "learning_rate": 7.53044025033256e-06, "loss": 0.6919, "step": 12971 }, { "epoch": 1.05, "grad_norm": 4.50511898097329, "learning_rate": 7.5300620947171295e-06, "loss": 0.7339, "step": 12972 }, { "epoch": 1.05, "grad_norm": 3.100494668097852, "learning_rate": 7.52968391964765e-06, "loss": 0.6969, "step": 12973 }, { "epoch": 1.05, "grad_norm": 5.392658362833077, "learning_rate": 7.529305725127028e-06, "loss": 0.6843, "step": 12974 }, { "epoch": 1.05, "grad_norm": 4.987288503908171, "learning_rate": 7.528927511158172e-06, "loss": 0.5408, "step": 12975 }, { "epoch": 1.05, "grad_norm": 2.626879004492817, "learning_rate": 7.528549277743989e-06, "loss": 0.4995, "step": 12976 }, { "epoch": 1.05, "grad_norm": 2.8047086178959493, "learning_rate": 7.5281710248873866e-06, "loss": 0.6311, "step": 12977 }, { "epoch": 1.05, "grad_norm": 2.47872598683856, "learning_rate": 7.527792752591276e-06, "loss": 0.6015, "step": 12978 }, { "epoch": 1.05, "grad_norm": 4.884974458147338, "learning_rate": 7.527414460858563e-06, "loss": 0.4657, "step": 12979 }, { "epoch": 1.05, "grad_norm": 6.553667367967418, "learning_rate": 7.527036149692157e-06, "loss": 0.6715, "step": 12980 }, { "epoch": 1.05, "grad_norm": 6.211611045747593, "learning_rate": 7.52665781909497e-06, "loss": 0.596, "step": 12981 }, { "epoch": 1.05, "grad_norm": 4.3696167344970185, "learning_rate": 7.526279469069908e-06, "loss": 0.7348, "step": 12982 }, { "epoch": 1.05, "grad_norm": 4.114145999740935, "learning_rate": 7.52590109961988e-06, "loss": 0.5073, "step": 12983 }, { "epoch": 1.05, "grad_norm": 27.3030639853401, "learning_rate": 7.525522710747794e-06, "loss": 0.7124, "step": 12984 }, { "epoch": 1.05, "grad_norm": 8.636906289517063, "learning_rate": 7.525144302456566e-06, "loss": 0.6407, "step": 12985 }, { "epoch": 1.05, "grad_norm": 2.3467047615989722, "learning_rate": 7.524765874749098e-06, "loss": 0.6442, "step": 12986 }, { "epoch": 1.05, "grad_norm": 2.9629395407962105, "learning_rate": 7.524387427628306e-06, "loss": 0.6355, "step": 12987 }, { "epoch": 1.05, "grad_norm": 6.632899706427862, "learning_rate": 7.524008961097094e-06, "loss": 0.7496, "step": 12988 }, { "epoch": 1.05, "grad_norm": 2.784499931967794, "learning_rate": 7.5236304751583765e-06, "loss": 0.6299, "step": 12989 }, { "epoch": 1.06, "grad_norm": 3.812928489468325, "learning_rate": 7.523251969815062e-06, "loss": 0.6023, "step": 12990 }, { "epoch": 1.06, "grad_norm": 3.227227109717069, "learning_rate": 7.52287344507006e-06, "loss": 0.5626, "step": 12991 }, { "epoch": 1.06, "grad_norm": 3.5066681185408695, "learning_rate": 7.522494900926284e-06, "loss": 0.5426, "step": 12992 }, { "epoch": 1.06, "grad_norm": 2.80311215468122, "learning_rate": 7.522116337386642e-06, "loss": 0.574, "step": 12993 }, { "epoch": 1.06, "grad_norm": 3.8688784179005697, "learning_rate": 7.521737754454046e-06, "loss": 0.5773, "step": 12994 }, { "epoch": 1.06, "grad_norm": 2.6756710898649727, "learning_rate": 7.521359152131407e-06, "loss": 0.662, "step": 12995 }, { "epoch": 1.06, "grad_norm": 4.109892498738257, "learning_rate": 7.520980530421635e-06, "loss": 0.6704, "step": 12996 }, { "epoch": 1.06, "grad_norm": 2.9935996837741086, "learning_rate": 7.520601889327643e-06, "loss": 0.5862, "step": 12997 }, { "epoch": 1.06, "grad_norm": 5.566543373292843, "learning_rate": 7.520223228852342e-06, "loss": 0.5596, "step": 12998 }, { "epoch": 1.06, "grad_norm": 6.662874817246582, "learning_rate": 7.519844548998642e-06, "loss": 0.4694, "step": 12999 }, { "epoch": 1.06, "grad_norm": 3.4734137977205592, "learning_rate": 7.5194658497694564e-06, "loss": 0.4194, "step": 13000 }, { "epoch": 1.06, "grad_norm": 3.0767028833173113, "learning_rate": 7.519087131167697e-06, "loss": 0.6477, "step": 13001 }, { "epoch": 1.06, "grad_norm": 3.1101376604430406, "learning_rate": 7.5187083931962744e-06, "loss": 0.5146, "step": 13002 }, { "epoch": 1.06, "grad_norm": 4.192752146596859, "learning_rate": 7.5183296358581025e-06, "loss": 0.5482, "step": 13003 }, { "epoch": 1.06, "grad_norm": 2.730969386956924, "learning_rate": 7.5179508591560925e-06, "loss": 0.5271, "step": 13004 }, { "epoch": 1.06, "grad_norm": 3.6263101866310254, "learning_rate": 7.517572063093157e-06, "loss": 0.7015, "step": 13005 }, { "epoch": 1.06, "grad_norm": 4.2242336701075756, "learning_rate": 7.51719324767221e-06, "loss": 0.618, "step": 13006 }, { "epoch": 1.06, "grad_norm": 3.19168906340524, "learning_rate": 7.5168144128961625e-06, "loss": 0.5821, "step": 13007 }, { "epoch": 1.06, "grad_norm": 3.2437606904974796, "learning_rate": 7.516435558767927e-06, "loss": 0.6609, "step": 13008 }, { "epoch": 1.06, "grad_norm": 9.121274810725644, "learning_rate": 7.516056685290421e-06, "loss": 0.6599, "step": 13009 }, { "epoch": 1.06, "grad_norm": 3.9445503014105667, "learning_rate": 7.5156777924665515e-06, "loss": 0.6084, "step": 13010 }, { "epoch": 1.06, "grad_norm": 6.5464683272119935, "learning_rate": 7.515298880299236e-06, "loss": 0.5868, "step": 13011 }, { "epoch": 1.06, "grad_norm": 2.3965096227047296, "learning_rate": 7.514919948791385e-06, "loss": 0.6254, "step": 13012 }, { "epoch": 1.06, "grad_norm": 36.13939920301798, "learning_rate": 7.514540997945915e-06, "loss": 0.6169, "step": 13013 }, { "epoch": 1.06, "grad_norm": 5.051357332271329, "learning_rate": 7.514162027765739e-06, "loss": 0.6199, "step": 13014 }, { "epoch": 1.06, "grad_norm": 3.649929083914388, "learning_rate": 7.51378303825377e-06, "loss": 0.6355, "step": 13015 }, { "epoch": 1.06, "grad_norm": 2.7801950489804104, "learning_rate": 7.513404029412923e-06, "loss": 0.6716, "step": 13016 }, { "epoch": 1.06, "grad_norm": 7.20404319693001, "learning_rate": 7.5130250012461125e-06, "loss": 0.6472, "step": 13017 }, { "epoch": 1.06, "grad_norm": 4.210849432276456, "learning_rate": 7.512645953756252e-06, "loss": 0.6148, "step": 13018 }, { "epoch": 1.06, "grad_norm": 3.6746032785353115, "learning_rate": 7.512266886946258e-06, "loss": 0.6568, "step": 13019 }, { "epoch": 1.06, "grad_norm": 2.810460607281335, "learning_rate": 7.511887800819042e-06, "loss": 0.5361, "step": 13020 }, { "epoch": 1.06, "grad_norm": 3.1242753321128203, "learning_rate": 7.511508695377522e-06, "loss": 0.5355, "step": 13021 }, { "epoch": 1.06, "grad_norm": 3.7511597008032385, "learning_rate": 7.511129570624611e-06, "loss": 0.6807, "step": 13022 }, { "epoch": 1.06, "grad_norm": 2.7224367570107697, "learning_rate": 7.510750426563225e-06, "loss": 0.652, "step": 13023 }, { "epoch": 1.06, "grad_norm": 3.737486058646932, "learning_rate": 7.510371263196277e-06, "loss": 0.6574, "step": 13024 }, { "epoch": 1.06, "grad_norm": 4.603673008665052, "learning_rate": 7.509992080526687e-06, "loss": 0.5906, "step": 13025 }, { "epoch": 1.06, "grad_norm": 4.016496267754109, "learning_rate": 7.5096128785573676e-06, "loss": 0.612, "step": 13026 }, { "epoch": 1.06, "grad_norm": 4.302334775672012, "learning_rate": 7.509233657291235e-06, "loss": 0.6664, "step": 13027 }, { "epoch": 1.06, "grad_norm": 2.9798563576339276, "learning_rate": 7.508854416731204e-06, "loss": 0.6026, "step": 13028 }, { "epoch": 1.06, "grad_norm": 3.2743207860352648, "learning_rate": 7.508475156880193e-06, "loss": 0.4926, "step": 13029 }, { "epoch": 1.06, "grad_norm": 3.295863186400481, "learning_rate": 7.508095877741116e-06, "loss": 0.5906, "step": 13030 }, { "epoch": 1.06, "grad_norm": 5.1871386537799955, "learning_rate": 7.5077165793168905e-06, "loss": 0.7731, "step": 13031 }, { "epoch": 1.06, "grad_norm": 3.692072194444417, "learning_rate": 7.5073372616104326e-06, "loss": 0.638, "step": 13032 }, { "epoch": 1.06, "grad_norm": 3.6704819569980986, "learning_rate": 7.50695792462466e-06, "loss": 0.8383, "step": 13033 }, { "epoch": 1.06, "grad_norm": 4.263780895661941, "learning_rate": 7.506578568362488e-06, "loss": 0.5926, "step": 13034 }, { "epoch": 1.06, "grad_norm": 2.7738588951559975, "learning_rate": 7.506199192826835e-06, "loss": 0.4862, "step": 13035 }, { "epoch": 1.06, "grad_norm": 4.4427784709790386, "learning_rate": 7.5058197980206145e-06, "loss": 0.544, "step": 13036 }, { "epoch": 1.06, "grad_norm": 3.7691587650125653, "learning_rate": 7.50544038394675e-06, "loss": 0.6153, "step": 13037 }, { "epoch": 1.06, "grad_norm": 3.35902403741966, "learning_rate": 7.505060950608154e-06, "loss": 0.7068, "step": 13038 }, { "epoch": 1.06, "grad_norm": 9.763970095984241, "learning_rate": 7.504681498007744e-06, "loss": 0.58, "step": 13039 }, { "epoch": 1.06, "grad_norm": 5.857111260063662, "learning_rate": 7.50430202614844e-06, "loss": 0.5357, "step": 13040 }, { "epoch": 1.06, "grad_norm": 5.716448218697416, "learning_rate": 7.503922535033159e-06, "loss": 0.5835, "step": 13041 }, { "epoch": 1.06, "grad_norm": 3.7624159394126333, "learning_rate": 7.503543024664819e-06, "loss": 0.5003, "step": 13042 }, { "epoch": 1.06, "grad_norm": 4.460675269904419, "learning_rate": 7.5031634950463385e-06, "loss": 0.6573, "step": 13043 }, { "epoch": 1.06, "grad_norm": 2.3468656457059875, "learning_rate": 7.502783946180634e-06, "loss": 0.6669, "step": 13044 }, { "epoch": 1.06, "grad_norm": 2.6538654253751712, "learning_rate": 7.502404378070625e-06, "loss": 0.6655, "step": 13045 }, { "epoch": 1.06, "grad_norm": 2.392353197394934, "learning_rate": 7.502024790719231e-06, "loss": 0.6067, "step": 13046 }, { "epoch": 1.06, "grad_norm": 12.680554096727802, "learning_rate": 7.501645184129369e-06, "loss": 0.7067, "step": 13047 }, { "epoch": 1.06, "grad_norm": 3.203523074504997, "learning_rate": 7.501265558303958e-06, "loss": 0.6952, "step": 13048 }, { "epoch": 1.06, "grad_norm": 5.835395677164653, "learning_rate": 7.500885913245919e-06, "loss": 0.6415, "step": 13049 }, { "epoch": 1.06, "grad_norm": 5.116114378839112, "learning_rate": 7.500506248958171e-06, "loss": 0.6454, "step": 13050 }, { "epoch": 1.06, "grad_norm": 6.753197175918499, "learning_rate": 7.50012656544363e-06, "loss": 0.7574, "step": 13051 }, { "epoch": 1.06, "grad_norm": 3.5937447995557683, "learning_rate": 7.499746862705218e-06, "loss": 0.549, "step": 13052 }, { "epoch": 1.06, "grad_norm": 2.0345778510253947, "learning_rate": 7.499367140745854e-06, "loss": 0.692, "step": 13053 }, { "epoch": 1.06, "grad_norm": 3.7789440099958203, "learning_rate": 7.498987399568459e-06, "loss": 0.6218, "step": 13054 }, { "epoch": 1.06, "grad_norm": 5.956870211003573, "learning_rate": 7.498607639175952e-06, "loss": 0.5634, "step": 13055 }, { "epoch": 1.06, "grad_norm": 4.942193661542927, "learning_rate": 7.498227859571252e-06, "loss": 0.7677, "step": 13056 }, { "epoch": 1.06, "grad_norm": 2.8438836614209873, "learning_rate": 7.49784806075728e-06, "loss": 0.4879, "step": 13057 }, { "epoch": 1.06, "grad_norm": 2.250099262413133, "learning_rate": 7.497468242736956e-06, "loss": 0.5448, "step": 13058 }, { "epoch": 1.06, "grad_norm": 7.759401909023542, "learning_rate": 7.497088405513202e-06, "loss": 0.4052, "step": 13059 }, { "epoch": 1.06, "grad_norm": 2.5429105166024333, "learning_rate": 7.496708549088938e-06, "loss": 0.5191, "step": 13060 }, { "epoch": 1.06, "grad_norm": 4.907347266909476, "learning_rate": 7.496328673467082e-06, "loss": 0.6831, "step": 13061 }, { "epoch": 1.06, "grad_norm": 3.9018292592070254, "learning_rate": 7.495948778650559e-06, "loss": 0.5689, "step": 13062 }, { "epoch": 1.06, "grad_norm": 2.9649616462911834, "learning_rate": 7.495568864642288e-06, "loss": 0.7447, "step": 13063 }, { "epoch": 1.06, "grad_norm": 3.5333521105053673, "learning_rate": 7.49518893144519e-06, "loss": 0.7198, "step": 13064 }, { "epoch": 1.06, "grad_norm": 2.9993076997153083, "learning_rate": 7.494808979062187e-06, "loss": 0.5752, "step": 13065 }, { "epoch": 1.06, "grad_norm": 3.685347400052185, "learning_rate": 7.4944290074962e-06, "loss": 0.7822, "step": 13066 }, { "epoch": 1.06, "grad_norm": 6.190997307115064, "learning_rate": 7.494049016750152e-06, "loss": 0.6422, "step": 13067 }, { "epoch": 1.06, "grad_norm": 3.586246774580454, "learning_rate": 7.493669006826964e-06, "loss": 0.6159, "step": 13068 }, { "epoch": 1.06, "grad_norm": 3.637529991540786, "learning_rate": 7.493288977729556e-06, "loss": 0.6231, "step": 13069 }, { "epoch": 1.06, "grad_norm": 2.975063591428215, "learning_rate": 7.492908929460854e-06, "loss": 0.6252, "step": 13070 }, { "epoch": 1.06, "grad_norm": 3.880210367107952, "learning_rate": 7.492528862023777e-06, "loss": 0.7183, "step": 13071 }, { "epoch": 1.06, "grad_norm": 3.8759185342633016, "learning_rate": 7.492148775421248e-06, "loss": 0.5719, "step": 13072 }, { "epoch": 1.06, "grad_norm": 2.6444228116965918, "learning_rate": 7.491768669656191e-06, "loss": 0.7625, "step": 13073 }, { "epoch": 1.06, "grad_norm": 35.218429409114826, "learning_rate": 7.491388544731528e-06, "loss": 0.4292, "step": 13074 }, { "epoch": 1.06, "grad_norm": 3.4159379577313604, "learning_rate": 7.4910084006501816e-06, "loss": 0.6335, "step": 13075 }, { "epoch": 1.06, "grad_norm": 3.039646119686925, "learning_rate": 7.490628237415074e-06, "loss": 0.6722, "step": 13076 }, { "epoch": 1.06, "grad_norm": 3.461297144351354, "learning_rate": 7.49024805502913e-06, "loss": 0.5702, "step": 13077 }, { "epoch": 1.06, "grad_norm": 3.5340864901680686, "learning_rate": 7.489867853495271e-06, "loss": 0.5333, "step": 13078 }, { "epoch": 1.06, "grad_norm": 2.1197618044316617, "learning_rate": 7.489487632816424e-06, "loss": 0.542, "step": 13079 }, { "epoch": 1.06, "grad_norm": 11.10742497632765, "learning_rate": 7.489107392995507e-06, "loss": 0.6233, "step": 13080 }, { "epoch": 1.06, "grad_norm": 4.090226583930624, "learning_rate": 7.488727134035449e-06, "loss": 0.5619, "step": 13081 }, { "epoch": 1.06, "grad_norm": 2.603861603129147, "learning_rate": 7.48834685593917e-06, "loss": 0.5104, "step": 13082 }, { "epoch": 1.06, "grad_norm": 4.14082112092457, "learning_rate": 7.487966558709596e-06, "loss": 0.6035, "step": 13083 }, { "epoch": 1.06, "grad_norm": 3.141612130828562, "learning_rate": 7.487586242349652e-06, "loss": 0.7136, "step": 13084 }, { "epoch": 1.06, "grad_norm": 7.1735149052796, "learning_rate": 7.487205906862259e-06, "loss": 0.5953, "step": 13085 }, { "epoch": 1.06, "grad_norm": 3.5794536967046753, "learning_rate": 7.486825552250345e-06, "loss": 0.6325, "step": 13086 }, { "epoch": 1.06, "grad_norm": 5.767455056747688, "learning_rate": 7.486445178516834e-06, "loss": 0.5969, "step": 13087 }, { "epoch": 1.06, "grad_norm": 3.1255294996060705, "learning_rate": 7.48606478566465e-06, "loss": 0.5852, "step": 13088 }, { "epoch": 1.06, "grad_norm": 3.8281320256781823, "learning_rate": 7.485684373696715e-06, "loss": 0.7062, "step": 13089 }, { "epoch": 1.06, "grad_norm": 7.260757385922951, "learning_rate": 7.48530394261596e-06, "loss": 0.4995, "step": 13090 }, { "epoch": 1.06, "grad_norm": 6.443329719967807, "learning_rate": 7.4849234924253065e-06, "loss": 0.6251, "step": 13091 }, { "epoch": 1.06, "grad_norm": 2.8739589027070602, "learning_rate": 7.484543023127679e-06, "loss": 0.7199, "step": 13092 }, { "epoch": 1.06, "grad_norm": 2.6622625021658095, "learning_rate": 7.484162534726005e-06, "loss": 0.6285, "step": 13093 }, { "epoch": 1.06, "grad_norm": 3.283872090441876, "learning_rate": 7.4837820272232105e-06, "loss": 0.549, "step": 13094 }, { "epoch": 1.06, "grad_norm": 2.961479817296063, "learning_rate": 7.48340150062222e-06, "loss": 0.8426, "step": 13095 }, { "epoch": 1.06, "grad_norm": 3.1667071341589974, "learning_rate": 7.48302095492596e-06, "loss": 0.673, "step": 13096 }, { "epoch": 1.06, "grad_norm": 4.976002653264443, "learning_rate": 7.482640390137356e-06, "loss": 0.7217, "step": 13097 }, { "epoch": 1.06, "grad_norm": 3.9857283799431436, "learning_rate": 7.482259806259334e-06, "loss": 0.6385, "step": 13098 }, { "epoch": 1.06, "grad_norm": 7.9234256162612615, "learning_rate": 7.481879203294822e-06, "loss": 0.5577, "step": 13099 }, { "epoch": 1.06, "grad_norm": 4.229645067691927, "learning_rate": 7.481498581246746e-06, "loss": 0.6828, "step": 13100 }, { "epoch": 1.06, "grad_norm": 5.737875261869866, "learning_rate": 7.48111794011803e-06, "loss": 0.6518, "step": 13101 }, { "epoch": 1.06, "grad_norm": 8.521616978414704, "learning_rate": 7.480737279911605e-06, "loss": 0.5183, "step": 13102 }, { "epoch": 1.06, "grad_norm": 3.2798927639312967, "learning_rate": 7.4803566006303955e-06, "loss": 0.4544, "step": 13103 }, { "epoch": 1.06, "grad_norm": 15.073779547788366, "learning_rate": 7.4799759022773275e-06, "loss": 0.6635, "step": 13104 }, { "epoch": 1.06, "grad_norm": 4.552393725827752, "learning_rate": 7.47959518485533e-06, "loss": 0.554, "step": 13105 }, { "epoch": 1.06, "grad_norm": 3.4897679900275986, "learning_rate": 7.479214448367332e-06, "loss": 0.543, "step": 13106 }, { "epoch": 1.06, "grad_norm": 6.7181230743250895, "learning_rate": 7.478833692816259e-06, "loss": 0.7602, "step": 13107 }, { "epoch": 1.06, "grad_norm": 3.2840881241311366, "learning_rate": 7.478452918205038e-06, "loss": 0.5727, "step": 13108 }, { "epoch": 1.06, "grad_norm": 2.1440826203088044, "learning_rate": 7.478072124536598e-06, "loss": 0.4487, "step": 13109 }, { "epoch": 1.06, "grad_norm": 3.2503063605270217, "learning_rate": 7.4776913118138664e-06, "loss": 0.6186, "step": 13110 }, { "epoch": 1.06, "grad_norm": 4.555547705490123, "learning_rate": 7.477310480039771e-06, "loss": 0.5847, "step": 13111 }, { "epoch": 1.06, "grad_norm": 11.169269764016597, "learning_rate": 7.476929629217242e-06, "loss": 0.5817, "step": 13112 }, { "epoch": 1.07, "grad_norm": 4.05308104316401, "learning_rate": 7.4765487593492044e-06, "loss": 0.8092, "step": 13113 }, { "epoch": 1.07, "grad_norm": 4.195311370397483, "learning_rate": 7.476167870438592e-06, "loss": 0.6955, "step": 13114 }, { "epoch": 1.07, "grad_norm": 5.420620062852729, "learning_rate": 7.475786962488329e-06, "loss": 0.6482, "step": 13115 }, { "epoch": 1.07, "grad_norm": 3.2088742909149732, "learning_rate": 7.475406035501346e-06, "loss": 0.5608, "step": 13116 }, { "epoch": 1.07, "grad_norm": 3.6922266571041056, "learning_rate": 7.475025089480571e-06, "loss": 0.7131, "step": 13117 }, { "epoch": 1.07, "grad_norm": 4.092314974821069, "learning_rate": 7.474644124428933e-06, "loss": 0.3942, "step": 13118 }, { "epoch": 1.07, "grad_norm": 2.655913910175776, "learning_rate": 7.474263140349365e-06, "loss": 0.6576, "step": 13119 }, { "epoch": 1.07, "grad_norm": 13.708168113365288, "learning_rate": 7.473882137244792e-06, "loss": 0.6796, "step": 13120 }, { "epoch": 1.07, "grad_norm": 3.2163496780001144, "learning_rate": 7.473501115118145e-06, "loss": 0.6863, "step": 13121 }, { "epoch": 1.07, "grad_norm": 3.6653105698485375, "learning_rate": 7.473120073972353e-06, "loss": 0.5001, "step": 13122 }, { "epoch": 1.07, "grad_norm": 3.174045713888869, "learning_rate": 7.472739013810348e-06, "loss": 0.6836, "step": 13123 }, { "epoch": 1.07, "grad_norm": 3.0753418214289, "learning_rate": 7.4723579346350595e-06, "loss": 0.6006, "step": 13124 }, { "epoch": 1.07, "grad_norm": 10.134506659967762, "learning_rate": 7.471976836449416e-06, "loss": 0.5719, "step": 13125 }, { "epoch": 1.07, "grad_norm": 8.52158483458711, "learning_rate": 7.4715957192563494e-06, "loss": 0.5088, "step": 13126 }, { "epoch": 1.07, "grad_norm": 3.2188856173128295, "learning_rate": 7.47121458305879e-06, "loss": 0.7113, "step": 13127 }, { "epoch": 1.07, "grad_norm": 2.943558597435543, "learning_rate": 7.470833427859667e-06, "loss": 0.5373, "step": 13128 }, { "epoch": 1.07, "grad_norm": 8.352764074871018, "learning_rate": 7.4704522536619116e-06, "loss": 0.6991, "step": 13129 }, { "epoch": 1.07, "grad_norm": 2.7641227938140593, "learning_rate": 7.470071060468457e-06, "loss": 0.7818, "step": 13130 }, { "epoch": 1.07, "grad_norm": 6.756026062200751, "learning_rate": 7.469689848282231e-06, "loss": 0.5774, "step": 13131 }, { "epoch": 1.07, "grad_norm": 4.407893082266839, "learning_rate": 7.469308617106168e-06, "loss": 0.5675, "step": 13132 }, { "epoch": 1.07, "grad_norm": 3.6523045812510775, "learning_rate": 7.468927366943198e-06, "loss": 0.5938, "step": 13133 }, { "epoch": 1.07, "grad_norm": 3.39645892320719, "learning_rate": 7.4685460977962495e-06, "loss": 0.6581, "step": 13134 }, { "epoch": 1.07, "grad_norm": 3.241529608313073, "learning_rate": 7.468164809668259e-06, "loss": 0.646, "step": 13135 }, { "epoch": 1.07, "grad_norm": 2.487257642585635, "learning_rate": 7.467783502562156e-06, "loss": 0.4596, "step": 13136 }, { "epoch": 1.07, "grad_norm": 2.9710902676024653, "learning_rate": 7.467402176480873e-06, "loss": 0.6213, "step": 13137 }, { "epoch": 1.07, "grad_norm": 4.7309006817942585, "learning_rate": 7.46702083142734e-06, "loss": 0.4781, "step": 13138 }, { "epoch": 1.07, "grad_norm": 144.67675691725674, "learning_rate": 7.466639467404492e-06, "loss": 0.7385, "step": 13139 }, { "epoch": 1.07, "grad_norm": 2.5156514317444847, "learning_rate": 7.4662580844152596e-06, "loss": 0.6209, "step": 13140 }, { "epoch": 1.07, "grad_norm": 8.305305820388769, "learning_rate": 7.465876682462576e-06, "loss": 0.5491, "step": 13141 }, { "epoch": 1.07, "grad_norm": 2.15168257955364, "learning_rate": 7.465495261549373e-06, "loss": 0.6536, "step": 13142 }, { "epoch": 1.07, "grad_norm": 3.736901266723794, "learning_rate": 7.465113821678587e-06, "loss": 0.5547, "step": 13143 }, { "epoch": 1.07, "grad_norm": 3.096421973622719, "learning_rate": 7.464732362853146e-06, "loss": 0.6518, "step": 13144 }, { "epoch": 1.07, "grad_norm": 7.109904852316536, "learning_rate": 7.464350885075986e-06, "loss": 0.672, "step": 13145 }, { "epoch": 1.07, "grad_norm": 3.5381466282286, "learning_rate": 7.4639693883500384e-06, "loss": 0.6154, "step": 13146 }, { "epoch": 1.07, "grad_norm": 5.438163040758922, "learning_rate": 7.46358787267824e-06, "loss": 0.5683, "step": 13147 }, { "epoch": 1.07, "grad_norm": 2.9702431266512956, "learning_rate": 7.46320633806352e-06, "loss": 0.5774, "step": 13148 }, { "epoch": 1.07, "grad_norm": 3.157234807952063, "learning_rate": 7.462824784508815e-06, "loss": 0.5533, "step": 13149 }, { "epoch": 1.07, "grad_norm": 2.9748905805705768, "learning_rate": 7.462443212017059e-06, "loss": 0.6478, "step": 13150 }, { "epoch": 1.07, "grad_norm": 4.461351213565799, "learning_rate": 7.462061620591183e-06, "loss": 0.4918, "step": 13151 }, { "epoch": 1.07, "grad_norm": 2.74558813142874, "learning_rate": 7.4616800102341235e-06, "loss": 0.4109, "step": 13152 }, { "epoch": 1.07, "grad_norm": 3.751227098727027, "learning_rate": 7.461298380948815e-06, "loss": 0.5711, "step": 13153 }, { "epoch": 1.07, "grad_norm": 7.318146318969377, "learning_rate": 7.46091673273819e-06, "loss": 0.5473, "step": 13154 }, { "epoch": 1.07, "grad_norm": 7.136078183314036, "learning_rate": 7.460535065605184e-06, "loss": 0.5208, "step": 13155 }, { "epoch": 1.07, "grad_norm": 3.882304640969959, "learning_rate": 7.460153379552734e-06, "loss": 0.7574, "step": 13156 }, { "epoch": 1.07, "grad_norm": 3.263320254215945, "learning_rate": 7.459771674583771e-06, "loss": 0.5641, "step": 13157 }, { "epoch": 1.07, "grad_norm": 3.9454302819181706, "learning_rate": 7.4593899507012334e-06, "loss": 0.8165, "step": 13158 }, { "epoch": 1.07, "grad_norm": 5.023684017649677, "learning_rate": 7.459008207908053e-06, "loss": 0.6519, "step": 13159 }, { "epoch": 1.07, "grad_norm": 2.946288482614118, "learning_rate": 7.458626446207168e-06, "loss": 0.5857, "step": 13160 }, { "epoch": 1.07, "grad_norm": 3.3237427377397304, "learning_rate": 7.4582446656015125e-06, "loss": 0.5407, "step": 13161 }, { "epoch": 1.07, "grad_norm": 2.4057548289416175, "learning_rate": 7.457862866094022e-06, "loss": 0.5911, "step": 13162 }, { "epoch": 1.07, "grad_norm": 5.7306448242100165, "learning_rate": 7.457481047687631e-06, "loss": 0.546, "step": 13163 }, { "epoch": 1.07, "grad_norm": 2.43236916047915, "learning_rate": 7.457099210385279e-06, "loss": 0.6269, "step": 13164 }, { "epoch": 1.07, "grad_norm": 12.018064351585961, "learning_rate": 7.456717354189898e-06, "loss": 0.5036, "step": 13165 }, { "epoch": 1.07, "grad_norm": 5.049084473895779, "learning_rate": 7.456335479104429e-06, "loss": 0.7227, "step": 13166 }, { "epoch": 1.07, "grad_norm": 3.62912891290483, "learning_rate": 7.455953585131801e-06, "loss": 0.5387, "step": 13167 }, { "epoch": 1.07, "grad_norm": 3.9047174348882008, "learning_rate": 7.455571672274957e-06, "loss": 0.5964, "step": 13168 }, { "epoch": 1.07, "grad_norm": 3.266857483593641, "learning_rate": 7.455189740536832e-06, "loss": 0.5561, "step": 13169 }, { "epoch": 1.07, "grad_norm": 4.161309609801212, "learning_rate": 7.454807789920361e-06, "loss": 0.683, "step": 13170 }, { "epoch": 1.07, "grad_norm": 9.909369323452003, "learning_rate": 7.454425820428481e-06, "loss": 0.6305, "step": 13171 }, { "epoch": 1.07, "grad_norm": 3.0868172990934695, "learning_rate": 7.4540438320641304e-06, "loss": 0.6774, "step": 13172 }, { "epoch": 1.07, "grad_norm": 2.3203630580355004, "learning_rate": 7.453661824830247e-06, "loss": 0.6174, "step": 13173 }, { "epoch": 1.07, "grad_norm": 6.682073889503434, "learning_rate": 7.453279798729766e-06, "loss": 0.6479, "step": 13174 }, { "epoch": 1.07, "grad_norm": 5.2843051067858875, "learning_rate": 7.452897753765626e-06, "loss": 0.6467, "step": 13175 }, { "epoch": 1.07, "grad_norm": 3.012566680341504, "learning_rate": 7.452515689940765e-06, "loss": 0.5223, "step": 13176 }, { "epoch": 1.07, "grad_norm": 3.4024672574784858, "learning_rate": 7.45213360725812e-06, "loss": 0.5819, "step": 13177 }, { "epoch": 1.07, "grad_norm": 2.8784912182560967, "learning_rate": 7.45175150572063e-06, "loss": 0.7457, "step": 13178 }, { "epoch": 1.07, "grad_norm": 5.583025305898903, "learning_rate": 7.451369385331229e-06, "loss": 0.6961, "step": 13179 }, { "epoch": 1.07, "grad_norm": 4.218089368153732, "learning_rate": 7.450987246092862e-06, "loss": 0.5456, "step": 13180 }, { "epoch": 1.07, "grad_norm": 3.4169816808152, "learning_rate": 7.450605088008462e-06, "loss": 0.7615, "step": 13181 }, { "epoch": 1.07, "grad_norm": 4.147815156131929, "learning_rate": 7.45022291108097e-06, "loss": 0.6199, "step": 13182 }, { "epoch": 1.07, "grad_norm": 4.814323223907262, "learning_rate": 7.4498407153133215e-06, "loss": 0.5564, "step": 13183 }, { "epoch": 1.07, "grad_norm": 3.227284197187449, "learning_rate": 7.4494585007084594e-06, "loss": 0.6277, "step": 13184 }, { "epoch": 1.07, "grad_norm": 3.722205002668175, "learning_rate": 7.449076267269321e-06, "loss": 0.653, "step": 13185 }, { "epoch": 1.07, "grad_norm": 2.8504798535520344, "learning_rate": 7.448694014998844e-06, "loss": 0.6865, "step": 13186 }, { "epoch": 1.07, "grad_norm": 3.588612035387599, "learning_rate": 7.4483117438999685e-06, "loss": 0.5696, "step": 13187 }, { "epoch": 1.07, "grad_norm": 3.206604201549923, "learning_rate": 7.447929453975635e-06, "loss": 0.5462, "step": 13188 }, { "epoch": 1.07, "grad_norm": 8.687200740721579, "learning_rate": 7.4475471452287816e-06, "loss": 0.6277, "step": 13189 }, { "epoch": 1.07, "grad_norm": 2.5196362813600133, "learning_rate": 7.447164817662349e-06, "loss": 0.6248, "step": 13190 }, { "epoch": 1.07, "grad_norm": 4.39504533981986, "learning_rate": 7.4467824712792744e-06, "loss": 0.842, "step": 13191 }, { "epoch": 1.07, "grad_norm": 7.392500980030107, "learning_rate": 7.446400106082501e-06, "loss": 0.6453, "step": 13192 }, { "epoch": 1.07, "grad_norm": 3.297465587889023, "learning_rate": 7.446017722074968e-06, "loss": 0.5973, "step": 13193 }, { "epoch": 1.07, "grad_norm": 6.351685119573751, "learning_rate": 7.445635319259615e-06, "loss": 0.755, "step": 13194 }, { "epoch": 1.07, "grad_norm": 2.7229964663372557, "learning_rate": 7.445252897639381e-06, "loss": 0.6806, "step": 13195 }, { "epoch": 1.07, "grad_norm": 5.003098543224561, "learning_rate": 7.444870457217209e-06, "loss": 0.7158, "step": 13196 }, { "epoch": 1.07, "grad_norm": 2.976671300658637, "learning_rate": 7.44448799799604e-06, "loss": 0.6609, "step": 13197 }, { "epoch": 1.07, "grad_norm": 3.3555567529529555, "learning_rate": 7.444105519978812e-06, "loss": 0.5668, "step": 13198 }, { "epoch": 1.07, "grad_norm": 3.588637739002608, "learning_rate": 7.443723023168466e-06, "loss": 0.6229, "step": 13199 }, { "epoch": 1.07, "grad_norm": 2.7747849810965084, "learning_rate": 7.443340507567947e-06, "loss": 0.5135, "step": 13200 }, { "epoch": 1.07, "grad_norm": 3.004624738836836, "learning_rate": 7.4429579731801915e-06, "loss": 0.451, "step": 13201 }, { "epoch": 1.07, "grad_norm": 3.88518218219603, "learning_rate": 7.442575420008145e-06, "loss": 0.5553, "step": 13202 }, { "epoch": 1.07, "grad_norm": 2.329906907265006, "learning_rate": 7.442192848054745e-06, "loss": 0.6215, "step": 13203 }, { "epoch": 1.07, "grad_norm": 2.4662115365504453, "learning_rate": 7.441810257322937e-06, "loss": 0.611, "step": 13204 }, { "epoch": 1.07, "grad_norm": 5.730812734292271, "learning_rate": 7.44142764781566e-06, "loss": 0.6095, "step": 13205 }, { "epoch": 1.07, "grad_norm": 3.865176831121156, "learning_rate": 7.441045019535857e-06, "loss": 0.6881, "step": 13206 }, { "epoch": 1.07, "grad_norm": 3.395602676329784, "learning_rate": 7.440662372486469e-06, "loss": 0.7076, "step": 13207 }, { "epoch": 1.07, "grad_norm": 2.2138721793505067, "learning_rate": 7.440279706670441e-06, "loss": 0.4204, "step": 13208 }, { "epoch": 1.07, "grad_norm": 4.597840672592666, "learning_rate": 7.439897022090713e-06, "loss": 0.6595, "step": 13209 }, { "epoch": 1.07, "grad_norm": 2.85241927117721, "learning_rate": 7.439514318750228e-06, "loss": 0.6681, "step": 13210 }, { "epoch": 1.07, "grad_norm": 3.082806122929316, "learning_rate": 7.439131596651929e-06, "loss": 0.6682, "step": 13211 }, { "epoch": 1.07, "grad_norm": 2.933447515645228, "learning_rate": 7.438748855798758e-06, "loss": 0.5394, "step": 13212 }, { "epoch": 1.07, "grad_norm": 2.334173410533451, "learning_rate": 7.43836609619366e-06, "loss": 0.5864, "step": 13213 }, { "epoch": 1.07, "grad_norm": 3.126398221300579, "learning_rate": 7.437983317839577e-06, "loss": 0.5398, "step": 13214 }, { "epoch": 1.07, "grad_norm": 8.091837987166922, "learning_rate": 7.4376005207394495e-06, "loss": 0.6768, "step": 13215 }, { "epoch": 1.07, "grad_norm": 4.526932198455699, "learning_rate": 7.437217704896225e-06, "loss": 0.5944, "step": 13216 }, { "epoch": 1.07, "grad_norm": 3.040644472058373, "learning_rate": 7.436834870312846e-06, "loss": 0.6438, "step": 13217 }, { "epoch": 1.07, "grad_norm": 3.2714842496701118, "learning_rate": 7.436452016992254e-06, "loss": 0.7201, "step": 13218 }, { "epoch": 1.07, "grad_norm": 11.422882965630448, "learning_rate": 7.436069144937394e-06, "loss": 0.4795, "step": 13219 }, { "epoch": 1.07, "grad_norm": 3.209457339080209, "learning_rate": 7.435686254151211e-06, "loss": 0.5999, "step": 13220 }, { "epoch": 1.07, "grad_norm": 4.694160598414946, "learning_rate": 7.4353033446366495e-06, "loss": 0.6036, "step": 13221 }, { "epoch": 1.07, "grad_norm": 2.730136495648104, "learning_rate": 7.434920416396651e-06, "loss": 0.5434, "step": 13222 }, { "epoch": 1.07, "grad_norm": 3.5184394502310377, "learning_rate": 7.434537469434162e-06, "loss": 0.6205, "step": 13223 }, { "epoch": 1.07, "grad_norm": 7.466522293819573, "learning_rate": 7.434154503752128e-06, "loss": 0.5954, "step": 13224 }, { "epoch": 1.07, "grad_norm": 3.1657789386471173, "learning_rate": 7.433771519353492e-06, "loss": 0.6656, "step": 13225 }, { "epoch": 1.07, "grad_norm": 17.029234997514077, "learning_rate": 7.433388516241198e-06, "loss": 0.561, "step": 13226 }, { "epoch": 1.07, "grad_norm": 4.005451057316696, "learning_rate": 7.433005494418192e-06, "loss": 0.5955, "step": 13227 }, { "epoch": 1.07, "grad_norm": 3.7188709869458316, "learning_rate": 7.432622453887419e-06, "loss": 0.6584, "step": 13228 }, { "epoch": 1.07, "grad_norm": 3.568972283936614, "learning_rate": 7.432239394651826e-06, "loss": 0.7238, "step": 13229 }, { "epoch": 1.07, "grad_norm": 4.012677303820611, "learning_rate": 7.4318563167143565e-06, "loss": 0.6036, "step": 13230 }, { "epoch": 1.07, "grad_norm": 6.133045280193642, "learning_rate": 7.431473220077955e-06, "loss": 0.5659, "step": 13231 }, { "epoch": 1.07, "grad_norm": 4.356657568114009, "learning_rate": 7.43109010474557e-06, "loss": 0.5803, "step": 13232 }, { "epoch": 1.07, "grad_norm": 3.2742651428548557, "learning_rate": 7.430706970720145e-06, "loss": 0.4892, "step": 13233 }, { "epoch": 1.07, "grad_norm": 2.757850994158556, "learning_rate": 7.430323818004629e-06, "loss": 0.5953, "step": 13234 }, { "epoch": 1.07, "grad_norm": 4.574939438902401, "learning_rate": 7.429940646601964e-06, "loss": 0.7631, "step": 13235 }, { "epoch": 1.08, "grad_norm": 2.914063817014665, "learning_rate": 7.429557456515098e-06, "loss": 0.5647, "step": 13236 }, { "epoch": 1.08, "grad_norm": 5.4051270973396255, "learning_rate": 7.42917424774698e-06, "loss": 0.6855, "step": 13237 }, { "epoch": 1.08, "grad_norm": 3.990199675551497, "learning_rate": 7.428791020300552e-06, "loss": 0.5289, "step": 13238 }, { "epoch": 1.08, "grad_norm": 2.8648522652203527, "learning_rate": 7.428407774178764e-06, "loss": 0.6821, "step": 13239 }, { "epoch": 1.08, "grad_norm": 2.9273038537099056, "learning_rate": 7.428024509384561e-06, "loss": 0.5467, "step": 13240 }, { "epoch": 1.08, "grad_norm": 6.027222672783492, "learning_rate": 7.427641225920892e-06, "loss": 0.6703, "step": 13241 }, { "epoch": 1.08, "grad_norm": 3.822647847278044, "learning_rate": 7.427257923790703e-06, "loss": 0.6269, "step": 13242 }, { "epoch": 1.08, "grad_norm": 3.4220047782158236, "learning_rate": 7.426874602996941e-06, "loss": 0.541, "step": 13243 }, { "epoch": 1.08, "grad_norm": 5.716366484184259, "learning_rate": 7.426491263542551e-06, "loss": 0.6579, "step": 13244 }, { "epoch": 1.08, "grad_norm": 4.398584835132485, "learning_rate": 7.426107905430486e-06, "loss": 0.5246, "step": 13245 }, { "epoch": 1.08, "grad_norm": 2.6068763792276077, "learning_rate": 7.42572452866369e-06, "loss": 0.5123, "step": 13246 }, { "epoch": 1.08, "grad_norm": 3.4856252089901023, "learning_rate": 7.425341133245112e-06, "loss": 0.7347, "step": 13247 }, { "epoch": 1.08, "grad_norm": 5.232532327047942, "learning_rate": 7.424957719177699e-06, "loss": 0.6251, "step": 13248 }, { "epoch": 1.08, "grad_norm": 4.564120716691231, "learning_rate": 7.424574286464401e-06, "loss": 0.5374, "step": 13249 }, { "epoch": 1.08, "grad_norm": 3.810498962768359, "learning_rate": 7.424190835108165e-06, "loss": 0.5396, "step": 13250 }, { "epoch": 1.08, "grad_norm": 4.2126697330723175, "learning_rate": 7.423807365111939e-06, "loss": 0.4812, "step": 13251 }, { "epoch": 1.08, "grad_norm": 3.9497893100034194, "learning_rate": 7.423423876478672e-06, "loss": 0.6903, "step": 13252 }, { "epoch": 1.08, "grad_norm": 23.72419201543532, "learning_rate": 7.423040369211313e-06, "loss": 0.5933, "step": 13253 }, { "epoch": 1.08, "grad_norm": 5.209033948351683, "learning_rate": 7.422656843312811e-06, "loss": 0.7385, "step": 13254 }, { "epoch": 1.08, "grad_norm": 3.1981060680149396, "learning_rate": 7.422273298786115e-06, "loss": 0.5485, "step": 13255 }, { "epoch": 1.08, "grad_norm": 6.478489272254062, "learning_rate": 7.421889735634172e-06, "loss": 0.5808, "step": 13256 }, { "epoch": 1.08, "grad_norm": 7.438475442305074, "learning_rate": 7.421506153859934e-06, "loss": 0.5563, "step": 13257 }, { "epoch": 1.08, "grad_norm": 4.084610723725969, "learning_rate": 7.42112255346635e-06, "loss": 0.5692, "step": 13258 }, { "epoch": 1.08, "grad_norm": 5.691684887734501, "learning_rate": 7.420738934456369e-06, "loss": 0.5496, "step": 13259 }, { "epoch": 1.08, "grad_norm": 6.447687030692418, "learning_rate": 7.42035529683294e-06, "loss": 0.6503, "step": 13260 }, { "epoch": 1.08, "grad_norm": 6.71435985491528, "learning_rate": 7.419971640599013e-06, "loss": 0.7384, "step": 13261 }, { "epoch": 1.08, "grad_norm": 2.696361319372669, "learning_rate": 7.41958796575754e-06, "loss": 0.6365, "step": 13262 }, { "epoch": 1.08, "grad_norm": 12.85285184184139, "learning_rate": 7.4192042723114696e-06, "loss": 0.4558, "step": 13263 }, { "epoch": 1.08, "grad_norm": 4.687942120444293, "learning_rate": 7.418820560263751e-06, "loss": 0.7266, "step": 13264 }, { "epoch": 1.08, "grad_norm": 5.82047509039928, "learning_rate": 7.418436829617337e-06, "loss": 0.5765, "step": 13265 }, { "epoch": 1.08, "grad_norm": 4.787652403067645, "learning_rate": 7.418053080375177e-06, "loss": 0.6128, "step": 13266 }, { "epoch": 1.08, "grad_norm": 4.967867724919784, "learning_rate": 7.417669312540221e-06, "loss": 0.5525, "step": 13267 }, { "epoch": 1.08, "grad_norm": 5.715021338770556, "learning_rate": 7.4172855261154204e-06, "loss": 0.657, "step": 13268 }, { "epoch": 1.08, "grad_norm": 7.1322204561142355, "learning_rate": 7.4169017211037275e-06, "loss": 0.6177, "step": 13269 }, { "epoch": 1.08, "grad_norm": 6.27198894969328, "learning_rate": 7.416517897508092e-06, "loss": 0.5589, "step": 13270 }, { "epoch": 1.08, "grad_norm": 3.6378189931235965, "learning_rate": 7.416134055331466e-06, "loss": 0.6485, "step": 13271 }, { "epoch": 1.08, "grad_norm": 3.5028604880007137, "learning_rate": 7.415750194576799e-06, "loss": 0.5357, "step": 13272 }, { "epoch": 1.08, "grad_norm": 5.5031192830231115, "learning_rate": 7.415366315247043e-06, "loss": 0.5948, "step": 13273 }, { "epoch": 1.08, "grad_norm": 4.192262296590541, "learning_rate": 7.4149824173451534e-06, "loss": 0.617, "step": 13274 }, { "epoch": 1.08, "grad_norm": 5.836848821924808, "learning_rate": 7.414598500874078e-06, "loss": 0.5832, "step": 13275 }, { "epoch": 1.08, "grad_norm": 4.005217070584467, "learning_rate": 7.414214565836771e-06, "loss": 0.6577, "step": 13276 }, { "epoch": 1.08, "grad_norm": 5.994712981097223, "learning_rate": 7.413830612236181e-06, "loss": 0.5213, "step": 13277 }, { "epoch": 1.08, "grad_norm": 2.7161736183945147, "learning_rate": 7.4134466400752655e-06, "loss": 0.5844, "step": 13278 }, { "epoch": 1.08, "grad_norm": 3.6209869495040685, "learning_rate": 7.413062649356975e-06, "loss": 0.6991, "step": 13279 }, { "epoch": 1.08, "grad_norm": 3.5316345985980577, "learning_rate": 7.412678640084258e-06, "loss": 0.5137, "step": 13280 }, { "epoch": 1.08, "grad_norm": 3.809810081804549, "learning_rate": 7.4122946122600735e-06, "loss": 0.6427, "step": 13281 }, { "epoch": 1.08, "grad_norm": 2.3050533970022165, "learning_rate": 7.4119105658873714e-06, "loss": 0.5149, "step": 13282 }, { "epoch": 1.08, "grad_norm": 6.3686477171519815, "learning_rate": 7.411526500969104e-06, "loss": 0.5543, "step": 13283 }, { "epoch": 1.08, "grad_norm": 3.2811517802933134, "learning_rate": 7.411142417508225e-06, "loss": 0.6836, "step": 13284 }, { "epoch": 1.08, "grad_norm": 2.384303154240263, "learning_rate": 7.410758315507688e-06, "loss": 0.563, "step": 13285 }, { "epoch": 1.08, "grad_norm": 3.7188045243265084, "learning_rate": 7.410374194970447e-06, "loss": 0.5262, "step": 13286 }, { "epoch": 1.08, "grad_norm": 4.986806125269831, "learning_rate": 7.409990055899454e-06, "loss": 0.6662, "step": 13287 }, { "epoch": 1.08, "grad_norm": 2.8175814740369836, "learning_rate": 7.409605898297664e-06, "loss": 0.5922, "step": 13288 }, { "epoch": 1.08, "grad_norm": 3.2760113226705196, "learning_rate": 7.409221722168029e-06, "loss": 0.7339, "step": 13289 }, { "epoch": 1.08, "grad_norm": 3.324158103436725, "learning_rate": 7.408837527513507e-06, "loss": 0.7418, "step": 13290 }, { "epoch": 1.08, "grad_norm": 3.948392042379822, "learning_rate": 7.408453314337047e-06, "loss": 0.5154, "step": 13291 }, { "epoch": 1.08, "grad_norm": 3.625910203446188, "learning_rate": 7.408069082641608e-06, "loss": 0.7366, "step": 13292 }, { "epoch": 1.08, "grad_norm": 1.7810246925783204, "learning_rate": 7.4076848324301406e-06, "loss": 0.4648, "step": 13293 }, { "epoch": 1.08, "grad_norm": 2.180323254466273, "learning_rate": 7.407300563705603e-06, "loss": 0.4685, "step": 13294 }, { "epoch": 1.08, "grad_norm": 4.207731895752863, "learning_rate": 7.4069162764709464e-06, "loss": 0.5888, "step": 13295 }, { "epoch": 1.08, "grad_norm": 2.9353263058128847, "learning_rate": 7.4065319707291275e-06, "loss": 0.7236, "step": 13296 }, { "epoch": 1.08, "grad_norm": 2.6740823768348636, "learning_rate": 7.4061476464831005e-06, "loss": 0.6026, "step": 13297 }, { "epoch": 1.08, "grad_norm": 6.4536367009218445, "learning_rate": 7.4057633037358225e-06, "loss": 0.5732, "step": 13298 }, { "epoch": 1.08, "grad_norm": 3.021613168144522, "learning_rate": 7.405378942490245e-06, "loss": 0.5626, "step": 13299 }, { "epoch": 1.08, "grad_norm": 3.884493995489164, "learning_rate": 7.404994562749328e-06, "loss": 0.5363, "step": 13300 }, { "epoch": 1.08, "grad_norm": 3.606482309089173, "learning_rate": 7.404610164516023e-06, "loss": 0.6295, "step": 13301 }, { "epoch": 1.08, "grad_norm": 3.92212260819278, "learning_rate": 7.4042257477932875e-06, "loss": 0.6155, "step": 13302 }, { "epoch": 1.08, "grad_norm": 4.436738081189434, "learning_rate": 7.403841312584079e-06, "loss": 0.5412, "step": 13303 }, { "epoch": 1.08, "grad_norm": 3.07810492648197, "learning_rate": 7.40345685889135e-06, "loss": 0.4993, "step": 13304 }, { "epoch": 1.08, "grad_norm": 4.703002431451, "learning_rate": 7.4030723867180585e-06, "loss": 0.5208, "step": 13305 }, { "epoch": 1.08, "grad_norm": 5.671345571351434, "learning_rate": 7.4026878960671625e-06, "loss": 0.5952, "step": 13306 }, { "epoch": 1.08, "grad_norm": 19.630485219315084, "learning_rate": 7.402303386941614e-06, "loss": 0.6579, "step": 13307 }, { "epoch": 1.08, "grad_norm": 3.324344719823664, "learning_rate": 7.401918859344373e-06, "loss": 0.625, "step": 13308 }, { "epoch": 1.08, "grad_norm": 4.585859163190357, "learning_rate": 7.401534313278396e-06, "loss": 0.6206, "step": 13309 }, { "epoch": 1.08, "grad_norm": 3.013115062020825, "learning_rate": 7.401149748746639e-06, "loss": 0.5492, "step": 13310 }, { "epoch": 1.08, "grad_norm": 3.045598600906526, "learning_rate": 7.400765165752059e-06, "loss": 0.5649, "step": 13311 }, { "epoch": 1.08, "grad_norm": 3.196164519692179, "learning_rate": 7.400380564297613e-06, "loss": 0.5525, "step": 13312 }, { "epoch": 1.08, "grad_norm": 3.7137650803776516, "learning_rate": 7.399995944386258e-06, "loss": 0.6448, "step": 13313 }, { "epoch": 1.08, "grad_norm": 3.4122336660597683, "learning_rate": 7.399611306020953e-06, "loss": 0.5411, "step": 13314 }, { "epoch": 1.08, "grad_norm": 4.986289778777504, "learning_rate": 7.399226649204654e-06, "loss": 0.5427, "step": 13315 }, { "epoch": 1.08, "grad_norm": 3.8400061286585783, "learning_rate": 7.398841973940318e-06, "loss": 0.495, "step": 13316 }, { "epoch": 1.08, "grad_norm": 2.838412044844545, "learning_rate": 7.398457280230905e-06, "loss": 0.6105, "step": 13317 }, { "epoch": 1.08, "grad_norm": 3.6803561987982767, "learning_rate": 7.398072568079372e-06, "loss": 0.5566, "step": 13318 }, { "epoch": 1.08, "grad_norm": 2.7778889598182857, "learning_rate": 7.397687837488677e-06, "loss": 0.6425, "step": 13319 }, { "epoch": 1.08, "grad_norm": 2.3961841412477765, "learning_rate": 7.397303088461779e-06, "loss": 0.5832, "step": 13320 }, { "epoch": 1.08, "grad_norm": 5.666398766276481, "learning_rate": 7.396918321001634e-06, "loss": 0.5932, "step": 13321 }, { "epoch": 1.08, "grad_norm": 2.3758657592424353, "learning_rate": 7.396533535111203e-06, "loss": 0.5025, "step": 13322 }, { "epoch": 1.08, "grad_norm": 3.5756966468747815, "learning_rate": 7.396148730793444e-06, "loss": 0.6582, "step": 13323 }, { "epoch": 1.08, "grad_norm": 5.5686691646075355, "learning_rate": 7.395763908051317e-06, "loss": 0.6816, "step": 13324 }, { "epoch": 1.08, "grad_norm": 3.5179475067981065, "learning_rate": 7.395379066887778e-06, "loss": 0.6145, "step": 13325 }, { "epoch": 1.08, "grad_norm": 2.415214604886496, "learning_rate": 7.3949942073057876e-06, "loss": 0.6418, "step": 13326 }, { "epoch": 1.08, "grad_norm": 2.1617353859133095, "learning_rate": 7.394609329308306e-06, "loss": 0.6327, "step": 13327 }, { "epoch": 1.08, "grad_norm": 4.791422361591413, "learning_rate": 7.394224432898293e-06, "loss": 0.5652, "step": 13328 }, { "epoch": 1.08, "grad_norm": 3.700889320458536, "learning_rate": 7.3938395180787044e-06, "loss": 0.4456, "step": 13329 }, { "epoch": 1.08, "grad_norm": 3.619616252579102, "learning_rate": 7.393454584852504e-06, "loss": 0.5571, "step": 13330 }, { "epoch": 1.08, "grad_norm": 3.6210679625726665, "learning_rate": 7.393069633222652e-06, "loss": 0.6491, "step": 13331 }, { "epoch": 1.08, "grad_norm": 3.2160924152875046, "learning_rate": 7.392684663192103e-06, "loss": 0.5991, "step": 13332 }, { "epoch": 1.08, "grad_norm": 3.141763455438355, "learning_rate": 7.392299674763823e-06, "loss": 0.4313, "step": 13333 }, { "epoch": 1.08, "grad_norm": 3.742697564526375, "learning_rate": 7.391914667940768e-06, "loss": 0.4995, "step": 13334 }, { "epoch": 1.08, "grad_norm": 3.926847512834141, "learning_rate": 7.3915296427259e-06, "loss": 0.5404, "step": 13335 }, { "epoch": 1.08, "grad_norm": 2.8536085142060097, "learning_rate": 7.391144599122181e-06, "loss": 0.642, "step": 13336 }, { "epoch": 1.08, "grad_norm": 2.6778900294622217, "learning_rate": 7.3907595371325705e-06, "loss": 0.6263, "step": 13337 }, { "epoch": 1.08, "grad_norm": 3.313658519856464, "learning_rate": 7.390374456760027e-06, "loss": 0.6721, "step": 13338 }, { "epoch": 1.08, "grad_norm": 4.847492133877503, "learning_rate": 7.389989358007514e-06, "loss": 0.533, "step": 13339 }, { "epoch": 1.08, "grad_norm": 4.9897180092268805, "learning_rate": 7.389604240877994e-06, "loss": 0.6389, "step": 13340 }, { "epoch": 1.08, "grad_norm": 2.941041372140006, "learning_rate": 7.3892191053744255e-06, "loss": 0.6186, "step": 13341 }, { "epoch": 1.08, "grad_norm": 3.655602471796342, "learning_rate": 7.38883395149977e-06, "loss": 0.6595, "step": 13342 }, { "epoch": 1.08, "grad_norm": 4.365545216103017, "learning_rate": 7.38844877925699e-06, "loss": 0.6479, "step": 13343 }, { "epoch": 1.08, "grad_norm": 6.118006719455853, "learning_rate": 7.388063588649047e-06, "loss": 0.6941, "step": 13344 }, { "epoch": 1.08, "grad_norm": 3.6435395296446598, "learning_rate": 7.387678379678903e-06, "loss": 0.5773, "step": 13345 }, { "epoch": 1.08, "grad_norm": 4.179386428825421, "learning_rate": 7.38729315234952e-06, "loss": 0.7434, "step": 13346 }, { "epoch": 1.08, "grad_norm": 4.311718568944002, "learning_rate": 7.386907906663858e-06, "loss": 0.6326, "step": 13347 }, { "epoch": 1.08, "grad_norm": 3.812304148030748, "learning_rate": 7.3865226426248826e-06, "loss": 0.4957, "step": 13348 }, { "epoch": 1.08, "grad_norm": 4.769599135106726, "learning_rate": 7.386137360235554e-06, "loss": 0.3403, "step": 13349 }, { "epoch": 1.08, "grad_norm": 5.987181216629483, "learning_rate": 7.385752059498834e-06, "loss": 0.6991, "step": 13350 }, { "epoch": 1.08, "grad_norm": 2.900970074701525, "learning_rate": 7.3853667404176886e-06, "loss": 0.5882, "step": 13351 }, { "epoch": 1.08, "grad_norm": 3.817701135238978, "learning_rate": 7.384981402995077e-06, "loss": 0.6783, "step": 13352 }, { "epoch": 1.08, "grad_norm": 8.626573916368784, "learning_rate": 7.384596047233964e-06, "loss": 0.6283, "step": 13353 }, { "epoch": 1.08, "grad_norm": 3.963501283380743, "learning_rate": 7.384210673137311e-06, "loss": 0.5692, "step": 13354 }, { "epoch": 1.08, "grad_norm": 3.4777590324014485, "learning_rate": 7.383825280708084e-06, "loss": 0.466, "step": 13355 }, { "epoch": 1.08, "grad_norm": 3.4197899990700336, "learning_rate": 7.3834398699492436e-06, "loss": 0.5533, "step": 13356 }, { "epoch": 1.08, "grad_norm": 2.7537233246253368, "learning_rate": 7.383054440863755e-06, "loss": 0.7064, "step": 13357 }, { "epoch": 1.08, "grad_norm": 3.677770330574415, "learning_rate": 7.382668993454581e-06, "loss": 0.696, "step": 13358 }, { "epoch": 1.09, "grad_norm": 4.807014526354837, "learning_rate": 7.3822835277246855e-06, "loss": 0.7088, "step": 13359 }, { "epoch": 1.09, "grad_norm": 2.43781972069784, "learning_rate": 7.381898043677033e-06, "loss": 0.6017, "step": 13360 }, { "epoch": 1.09, "grad_norm": 3.3695284052223653, "learning_rate": 7.381512541314586e-06, "loss": 0.4097, "step": 13361 }, { "epoch": 1.09, "grad_norm": 2.9678324283700683, "learning_rate": 7.381127020640311e-06, "loss": 0.6378, "step": 13362 }, { "epoch": 1.09, "grad_norm": 3.8003089829825143, "learning_rate": 7.38074148165717e-06, "loss": 0.5963, "step": 13363 }, { "epoch": 1.09, "grad_norm": 6.196766435437576, "learning_rate": 7.3803559243681284e-06, "loss": 0.673, "step": 13364 }, { "epoch": 1.09, "grad_norm": 2.294039519703108, "learning_rate": 7.379970348776152e-06, "loss": 0.553, "step": 13365 }, { "epoch": 1.09, "grad_norm": 5.899757171549006, "learning_rate": 7.379584754884203e-06, "loss": 0.7049, "step": 13366 }, { "epoch": 1.09, "grad_norm": 3.7059539796944874, "learning_rate": 7.379199142695249e-06, "loss": 0.6357, "step": 13367 }, { "epoch": 1.09, "grad_norm": 3.1902426560339863, "learning_rate": 7.378813512212254e-06, "loss": 0.5575, "step": 13368 }, { "epoch": 1.09, "grad_norm": 2.9200037299678754, "learning_rate": 7.378427863438183e-06, "loss": 0.5566, "step": 13369 }, { "epoch": 1.09, "grad_norm": 9.749428249369979, "learning_rate": 7.378042196376001e-06, "loss": 0.5314, "step": 13370 }, { "epoch": 1.09, "grad_norm": 6.991227830196902, "learning_rate": 7.377656511028672e-06, "loss": 0.6268, "step": 13371 }, { "epoch": 1.09, "grad_norm": 5.3220751691566, "learning_rate": 7.377270807399166e-06, "loss": 0.5925, "step": 13372 }, { "epoch": 1.09, "grad_norm": 5.954751373968552, "learning_rate": 7.376885085490446e-06, "loss": 0.6219, "step": 13373 }, { "epoch": 1.09, "grad_norm": 2.6724790075494553, "learning_rate": 7.376499345305476e-06, "loss": 0.4996, "step": 13374 }, { "epoch": 1.09, "grad_norm": 3.1289076724745697, "learning_rate": 7.376113586847226e-06, "loss": 0.6523, "step": 13375 }, { "epoch": 1.09, "grad_norm": 3.8733865557818734, "learning_rate": 7.375727810118658e-06, "loss": 0.5935, "step": 13376 }, { "epoch": 1.09, "grad_norm": 3.9038676807925157, "learning_rate": 7.375342015122743e-06, "loss": 0.5889, "step": 13377 }, { "epoch": 1.09, "grad_norm": 2.7039781874129614, "learning_rate": 7.374956201862442e-06, "loss": 0.6089, "step": 13378 }, { "epoch": 1.09, "grad_norm": 5.138483684534424, "learning_rate": 7.374570370340727e-06, "loss": 0.5297, "step": 13379 }, { "epoch": 1.09, "grad_norm": 3.206866366391148, "learning_rate": 7.374184520560561e-06, "loss": 0.524, "step": 13380 }, { "epoch": 1.09, "grad_norm": 3.655404803967594, "learning_rate": 7.3737986525249125e-06, "loss": 0.5493, "step": 13381 }, { "epoch": 1.09, "grad_norm": 2.4628375799818008, "learning_rate": 7.373412766236747e-06, "loss": 0.4964, "step": 13382 }, { "epoch": 1.09, "grad_norm": 3.2655762919473554, "learning_rate": 7.373026861699033e-06, "loss": 0.6149, "step": 13383 }, { "epoch": 1.09, "grad_norm": 3.4053705260825016, "learning_rate": 7.372640938914739e-06, "loss": 0.6629, "step": 13384 }, { "epoch": 1.09, "grad_norm": 8.471285142533315, "learning_rate": 7.37225499788683e-06, "loss": 0.5483, "step": 13385 }, { "epoch": 1.09, "grad_norm": 3.9783224020055434, "learning_rate": 7.371869038618273e-06, "loss": 0.5021, "step": 13386 }, { "epoch": 1.09, "grad_norm": 3.840001067143406, "learning_rate": 7.3714830611120395e-06, "loss": 0.5964, "step": 13387 }, { "epoch": 1.09, "grad_norm": 3.5814599815133668, "learning_rate": 7.371097065371093e-06, "loss": 0.6482, "step": 13388 }, { "epoch": 1.09, "grad_norm": 5.617559810603534, "learning_rate": 7.370711051398406e-06, "loss": 0.6026, "step": 13389 }, { "epoch": 1.09, "grad_norm": 3.3820942783628993, "learning_rate": 7.370325019196941e-06, "loss": 0.5133, "step": 13390 }, { "epoch": 1.09, "grad_norm": 4.054158108350162, "learning_rate": 7.369938968769672e-06, "loss": 0.4765, "step": 13391 }, { "epoch": 1.09, "grad_norm": 2.969236352258403, "learning_rate": 7.369552900119563e-06, "loss": 0.5939, "step": 13392 }, { "epoch": 1.09, "grad_norm": 2.3418001747287067, "learning_rate": 7.369166813249586e-06, "loss": 0.6913, "step": 13393 }, { "epoch": 1.09, "grad_norm": 2.852197552130543, "learning_rate": 7.368780708162706e-06, "loss": 0.5712, "step": 13394 }, { "epoch": 1.09, "grad_norm": 4.690951798802362, "learning_rate": 7.368394584861895e-06, "loss": 0.796, "step": 13395 }, { "epoch": 1.09, "grad_norm": 5.977634419900721, "learning_rate": 7.368008443350121e-06, "loss": 0.5366, "step": 13396 }, { "epoch": 1.09, "grad_norm": 2.6512689487073464, "learning_rate": 7.367622283630353e-06, "loss": 0.7687, "step": 13397 }, { "epoch": 1.09, "grad_norm": 4.154744275206109, "learning_rate": 7.3672361057055585e-06, "loss": 0.6488, "step": 13398 }, { "epoch": 1.09, "grad_norm": 3.55996297500704, "learning_rate": 7.366849909578711e-06, "loss": 0.5997, "step": 13399 }, { "epoch": 1.09, "grad_norm": 5.680832161136861, "learning_rate": 7.366463695252776e-06, "loss": 0.7422, "step": 13400 }, { "epoch": 1.09, "grad_norm": 2.4042707631818256, "learning_rate": 7.366077462730724e-06, "loss": 0.5856, "step": 13401 }, { "epoch": 1.09, "grad_norm": 3.123338294120542, "learning_rate": 7.3656912120155265e-06, "loss": 0.4939, "step": 13402 }, { "epoch": 1.09, "grad_norm": 3.651941762600341, "learning_rate": 7.365304943110152e-06, "loss": 0.6437, "step": 13403 }, { "epoch": 1.09, "grad_norm": 3.9283965068159477, "learning_rate": 7.364918656017572e-06, "loss": 0.5593, "step": 13404 }, { "epoch": 1.09, "grad_norm": 6.46375267756135, "learning_rate": 7.364532350740755e-06, "loss": 0.7014, "step": 13405 }, { "epoch": 1.09, "grad_norm": 5.996651642159815, "learning_rate": 7.3641460272826715e-06, "loss": 0.8186, "step": 13406 }, { "epoch": 1.09, "grad_norm": 2.8991978700595733, "learning_rate": 7.3637596856462945e-06, "loss": 0.6996, "step": 13407 }, { "epoch": 1.09, "grad_norm": 2.4830519091859036, "learning_rate": 7.363373325834591e-06, "loss": 0.487, "step": 13408 }, { "epoch": 1.09, "grad_norm": 3.868601773523832, "learning_rate": 7.362986947850534e-06, "loss": 0.6429, "step": 13409 }, { "epoch": 1.09, "grad_norm": 2.920457296137699, "learning_rate": 7.362600551697094e-06, "loss": 0.6708, "step": 13410 }, { "epoch": 1.09, "grad_norm": 2.342645145660642, "learning_rate": 7.3622141373772426e-06, "loss": 0.521, "step": 13411 }, { "epoch": 1.09, "grad_norm": 6.374356136300773, "learning_rate": 7.36182770489395e-06, "loss": 0.6696, "step": 13412 }, { "epoch": 1.09, "grad_norm": 3.047173642193051, "learning_rate": 7.3614412542501876e-06, "loss": 0.513, "step": 13413 }, { "epoch": 1.09, "grad_norm": 4.268807851668942, "learning_rate": 7.361054785448928e-06, "loss": 0.5674, "step": 13414 }, { "epoch": 1.09, "grad_norm": 3.365899486108061, "learning_rate": 7.360668298493142e-06, "loss": 0.6785, "step": 13415 }, { "epoch": 1.09, "grad_norm": 2.7832589555461102, "learning_rate": 7.3602817933858015e-06, "loss": 0.6633, "step": 13416 }, { "epoch": 1.09, "grad_norm": 4.115218613629016, "learning_rate": 7.359895270129878e-06, "loss": 0.7278, "step": 13417 }, { "epoch": 1.09, "grad_norm": 4.93241876152276, "learning_rate": 7.359508728728344e-06, "loss": 0.6568, "step": 13418 }, { "epoch": 1.09, "grad_norm": 3.244464674999036, "learning_rate": 7.359122169184171e-06, "loss": 0.5654, "step": 13419 }, { "epoch": 1.09, "grad_norm": 3.208430393175036, "learning_rate": 7.358735591500333e-06, "loss": 0.6408, "step": 13420 }, { "epoch": 1.09, "grad_norm": 2.9771764239776575, "learning_rate": 7.3583489956798e-06, "loss": 0.5922, "step": 13421 }, { "epoch": 1.09, "grad_norm": 2.6172242079302737, "learning_rate": 7.357962381725548e-06, "loss": 0.5347, "step": 13422 }, { "epoch": 1.09, "grad_norm": 3.0308398789347737, "learning_rate": 7.357575749640545e-06, "loss": 0.7112, "step": 13423 }, { "epoch": 1.09, "grad_norm": 4.253798493946265, "learning_rate": 7.357189099427767e-06, "loss": 0.5547, "step": 13424 }, { "epoch": 1.09, "grad_norm": 3.730119601981954, "learning_rate": 7.3568024310901875e-06, "loss": 0.4586, "step": 13425 }, { "epoch": 1.09, "grad_norm": 3.1542016367072003, "learning_rate": 7.356415744630779e-06, "loss": 0.6294, "step": 13426 }, { "epoch": 1.09, "grad_norm": 9.481997050228143, "learning_rate": 7.3560290400525125e-06, "loss": 0.5434, "step": 13427 }, { "epoch": 1.09, "grad_norm": 2.683830841207533, "learning_rate": 7.355642317358366e-06, "loss": 0.6899, "step": 13428 }, { "epoch": 1.09, "grad_norm": 2.1936319839719096, "learning_rate": 7.355255576551309e-06, "loss": 0.5321, "step": 13429 }, { "epoch": 1.09, "grad_norm": 3.9381660235072116, "learning_rate": 7.354868817634317e-06, "loss": 0.6586, "step": 13430 }, { "epoch": 1.09, "grad_norm": 7.079058053557462, "learning_rate": 7.354482040610363e-06, "loss": 0.7153, "step": 13431 }, { "epoch": 1.09, "grad_norm": 3.7320977034790244, "learning_rate": 7.354095245482423e-06, "loss": 0.441, "step": 13432 }, { "epoch": 1.09, "grad_norm": 2.9172066726457424, "learning_rate": 7.353708432253469e-06, "loss": 0.5116, "step": 13433 }, { "epoch": 1.09, "grad_norm": 3.329639278298864, "learning_rate": 7.353321600926476e-06, "loss": 0.6715, "step": 13434 }, { "epoch": 1.09, "grad_norm": 3.3439046228501197, "learning_rate": 7.352934751504418e-06, "loss": 0.5837, "step": 13435 }, { "epoch": 1.09, "grad_norm": 5.574842284300025, "learning_rate": 7.352547883990271e-06, "loss": 0.6844, "step": 13436 }, { "epoch": 1.09, "grad_norm": 4.360454148616133, "learning_rate": 7.352160998387007e-06, "loss": 0.6115, "step": 13437 }, { "epoch": 1.09, "grad_norm": 2.9082686960950763, "learning_rate": 7.3517740946976035e-06, "loss": 0.4728, "step": 13438 }, { "epoch": 1.09, "grad_norm": 6.181744259678195, "learning_rate": 7.351387172925033e-06, "loss": 0.5889, "step": 13439 }, { "epoch": 1.09, "grad_norm": 10.918153480782195, "learning_rate": 7.351000233072274e-06, "loss": 0.7201, "step": 13440 }, { "epoch": 1.09, "grad_norm": 10.20131753397193, "learning_rate": 7.3506132751422985e-06, "loss": 0.5955, "step": 13441 }, { "epoch": 1.09, "grad_norm": 2.357334017449769, "learning_rate": 7.3502262991380835e-06, "loss": 0.6144, "step": 13442 }, { "epoch": 1.09, "grad_norm": 2.931643129704439, "learning_rate": 7.3498393050626034e-06, "loss": 0.465, "step": 13443 }, { "epoch": 1.09, "grad_norm": 3.4207700503272296, "learning_rate": 7.349452292918835e-06, "loss": 0.6511, "step": 13444 }, { "epoch": 1.09, "grad_norm": 3.5144603866879165, "learning_rate": 7.349065262709754e-06, "loss": 0.5947, "step": 13445 }, { "epoch": 1.09, "grad_norm": 2.3408194512040215, "learning_rate": 7.348678214438337e-06, "loss": 0.5454, "step": 13446 }, { "epoch": 1.09, "grad_norm": 3.3527654093400265, "learning_rate": 7.348291148107557e-06, "loss": 0.5844, "step": 13447 }, { "epoch": 1.09, "grad_norm": 6.73129166832152, "learning_rate": 7.3479040637203935e-06, "loss": 0.5802, "step": 13448 }, { "epoch": 1.09, "grad_norm": 4.4470233452306385, "learning_rate": 7.347516961279821e-06, "loss": 0.6498, "step": 13449 }, { "epoch": 1.09, "grad_norm": 3.3274888497661697, "learning_rate": 7.3471298407888165e-06, "loss": 0.5489, "step": 13450 }, { "epoch": 1.09, "grad_norm": 5.73446100552803, "learning_rate": 7.346742702250358e-06, "loss": 0.642, "step": 13451 }, { "epoch": 1.09, "grad_norm": 2.4952394942598954, "learning_rate": 7.346355545667419e-06, "loss": 0.5061, "step": 13452 }, { "epoch": 1.09, "grad_norm": 18.803310916818997, "learning_rate": 7.345968371042981e-06, "loss": 0.5707, "step": 13453 }, { "epoch": 1.09, "grad_norm": 3.8711459653014866, "learning_rate": 7.345581178380018e-06, "loss": 0.5937, "step": 13454 }, { "epoch": 1.09, "grad_norm": 4.704245202427621, "learning_rate": 7.345193967681508e-06, "loss": 0.6444, "step": 13455 }, { "epoch": 1.09, "grad_norm": 3.6535478592179382, "learning_rate": 7.344806738950425e-06, "loss": 0.5379, "step": 13456 }, { "epoch": 1.09, "grad_norm": 5.5212331133772725, "learning_rate": 7.344419492189753e-06, "loss": 0.4924, "step": 13457 }, { "epoch": 1.09, "grad_norm": 3.5534392047519785, "learning_rate": 7.344032227402465e-06, "loss": 0.4625, "step": 13458 }, { "epoch": 1.09, "grad_norm": 3.7249069663835366, "learning_rate": 7.343644944591539e-06, "loss": 0.6002, "step": 13459 }, { "epoch": 1.09, "grad_norm": 5.9612294800676, "learning_rate": 7.343257643759953e-06, "loss": 0.599, "step": 13460 }, { "epoch": 1.09, "grad_norm": 3.6127049233918727, "learning_rate": 7.342870324910688e-06, "loss": 0.7612, "step": 13461 }, { "epoch": 1.09, "grad_norm": 4.464996920045892, "learning_rate": 7.34248298804672e-06, "loss": 0.6314, "step": 13462 }, { "epoch": 1.09, "grad_norm": 3.517255524805919, "learning_rate": 7.342095633171025e-06, "loss": 0.7793, "step": 13463 }, { "epoch": 1.09, "grad_norm": 2.7706442378490292, "learning_rate": 7.3417082602865845e-06, "loss": 0.6191, "step": 13464 }, { "epoch": 1.09, "grad_norm": 4.555545635871531, "learning_rate": 7.341320869396376e-06, "loss": 0.717, "step": 13465 }, { "epoch": 1.09, "grad_norm": 3.8157611043444413, "learning_rate": 7.34093346050338e-06, "loss": 0.638, "step": 13466 }, { "epoch": 1.09, "grad_norm": 2.9898488651488053, "learning_rate": 7.3405460336105726e-06, "loss": 0.5815, "step": 13467 }, { "epoch": 1.09, "grad_norm": 18.969251562968797, "learning_rate": 7.340158588720934e-06, "loss": 0.4965, "step": 13468 }, { "epoch": 1.09, "grad_norm": 2.292176529253343, "learning_rate": 7.339771125837443e-06, "loss": 0.4479, "step": 13469 }, { "epoch": 1.09, "grad_norm": 4.724094616916793, "learning_rate": 7.339383644963078e-06, "loss": 0.693, "step": 13470 }, { "epoch": 1.09, "grad_norm": 3.5563815092278364, "learning_rate": 7.338996146100822e-06, "loss": 0.606, "step": 13471 }, { "epoch": 1.09, "grad_norm": 3.5694023145069265, "learning_rate": 7.338608629253649e-06, "loss": 0.5273, "step": 13472 }, { "epoch": 1.09, "grad_norm": 3.814591843391919, "learning_rate": 7.338221094424545e-06, "loss": 0.4615, "step": 13473 }, { "epoch": 1.09, "grad_norm": 2.3815621487596803, "learning_rate": 7.337833541616486e-06, "loss": 0.5943, "step": 13474 }, { "epoch": 1.09, "grad_norm": 2.8345208171741776, "learning_rate": 7.337445970832451e-06, "loss": 0.5789, "step": 13475 }, { "epoch": 1.09, "grad_norm": 3.990302422910012, "learning_rate": 7.337058382075421e-06, "loss": 0.6557, "step": 13476 }, { "epoch": 1.09, "grad_norm": 4.377866920367567, "learning_rate": 7.336670775348379e-06, "loss": 0.6824, "step": 13477 }, { "epoch": 1.09, "grad_norm": 2.089018424450257, "learning_rate": 7.336283150654303e-06, "loss": 0.5621, "step": 13478 }, { "epoch": 1.09, "grad_norm": 2.5477827818324004, "learning_rate": 7.335895507996174e-06, "loss": 0.4, "step": 13479 }, { "epoch": 1.09, "grad_norm": 3.696650310536078, "learning_rate": 7.33550784737697e-06, "loss": 0.493, "step": 13480 }, { "epoch": 1.09, "grad_norm": 2.5574118612161696, "learning_rate": 7.335120168799675e-06, "loss": 0.5891, "step": 13481 }, { "epoch": 1.1, "grad_norm": 3.386627649159391, "learning_rate": 7.33473247226727e-06, "loss": 0.6264, "step": 13482 }, { "epoch": 1.1, "grad_norm": 18.408373989860383, "learning_rate": 7.334344757782735e-06, "loss": 0.618, "step": 13483 }, { "epoch": 1.1, "grad_norm": 2.9146463133579195, "learning_rate": 7.333957025349051e-06, "loss": 0.66, "step": 13484 }, { "epoch": 1.1, "grad_norm": 6.9267255808415005, "learning_rate": 7.3335692749692e-06, "loss": 0.7153, "step": 13485 }, { "epoch": 1.1, "grad_norm": 4.579137866249911, "learning_rate": 7.333181506646163e-06, "loss": 0.5246, "step": 13486 }, { "epoch": 1.1, "grad_norm": 3.3301909924003104, "learning_rate": 7.332793720382921e-06, "loss": 0.513, "step": 13487 }, { "epoch": 1.1, "grad_norm": 3.6475195832753027, "learning_rate": 7.332405916182457e-06, "loss": 0.4976, "step": 13488 }, { "epoch": 1.1, "grad_norm": 2.6061015914992938, "learning_rate": 7.332018094047752e-06, "loss": 0.5899, "step": 13489 }, { "epoch": 1.1, "grad_norm": 3.6413188892324535, "learning_rate": 7.33163025398179e-06, "loss": 0.6308, "step": 13490 }, { "epoch": 1.1, "grad_norm": 2.6946965463132937, "learning_rate": 7.3312423959875514e-06, "loss": 0.5647, "step": 13491 }, { "epoch": 1.1, "grad_norm": 5.141701003957877, "learning_rate": 7.330854520068017e-06, "loss": 0.5795, "step": 13492 }, { "epoch": 1.1, "grad_norm": 2.4440313294029488, "learning_rate": 7.3304666262261716e-06, "loss": 0.636, "step": 13493 }, { "epoch": 1.1, "grad_norm": 3.2288124472461246, "learning_rate": 7.330078714464997e-06, "loss": 0.6276, "step": 13494 }, { "epoch": 1.1, "grad_norm": 3.341430905712477, "learning_rate": 7.329690784787478e-06, "loss": 0.5773, "step": 13495 }, { "epoch": 1.1, "grad_norm": 2.2399777233803424, "learning_rate": 7.329302837196592e-06, "loss": 0.5462, "step": 13496 }, { "epoch": 1.1, "grad_norm": 5.605321198857471, "learning_rate": 7.328914871695327e-06, "loss": 0.6128, "step": 13497 }, { "epoch": 1.1, "grad_norm": 2.874917193864595, "learning_rate": 7.328526888286666e-06, "loss": 0.571, "step": 13498 }, { "epoch": 1.1, "grad_norm": 3.4971126242514456, "learning_rate": 7.328138886973589e-06, "loss": 0.6595, "step": 13499 }, { "epoch": 1.1, "grad_norm": 12.190328503403988, "learning_rate": 7.327750867759081e-06, "loss": 0.5138, "step": 13500 }, { "epoch": 1.1, "grad_norm": 3.466214100452097, "learning_rate": 7.327362830646127e-06, "loss": 0.6627, "step": 13501 }, { "epoch": 1.1, "grad_norm": 4.3354947054144075, "learning_rate": 7.32697477563771e-06, "loss": 0.5575, "step": 13502 }, { "epoch": 1.1, "grad_norm": 4.2027677586716345, "learning_rate": 7.326586702736813e-06, "loss": 0.7054, "step": 13503 }, { "epoch": 1.1, "grad_norm": 3.2703466827775682, "learning_rate": 7.326198611946419e-06, "loss": 0.5926, "step": 13504 }, { "epoch": 1.1, "grad_norm": 3.624998139152789, "learning_rate": 7.325810503269514e-06, "loss": 0.6104, "step": 13505 }, { "epoch": 1.1, "grad_norm": 3.0080727254089044, "learning_rate": 7.325422376709082e-06, "loss": 0.5625, "step": 13506 }, { "epoch": 1.1, "grad_norm": 3.954914749444578, "learning_rate": 7.325034232268107e-06, "loss": 0.6458, "step": 13507 }, { "epoch": 1.1, "grad_norm": 2.896013255977765, "learning_rate": 7.3246460699495725e-06, "loss": 0.5809, "step": 13508 }, { "epoch": 1.1, "grad_norm": 17.46524313233495, "learning_rate": 7.324257889756464e-06, "loss": 0.5315, "step": 13509 }, { "epoch": 1.1, "grad_norm": 3.577513130146795, "learning_rate": 7.323869691691767e-06, "loss": 0.6148, "step": 13510 }, { "epoch": 1.1, "grad_norm": 3.8764419185000416, "learning_rate": 7.323481475758467e-06, "loss": 0.6576, "step": 13511 }, { "epoch": 1.1, "grad_norm": 4.847178116767892, "learning_rate": 7.323093241959546e-06, "loss": 0.5483, "step": 13512 }, { "epoch": 1.1, "grad_norm": 2.712003006796985, "learning_rate": 7.322704990297992e-06, "loss": 0.5917, "step": 13513 }, { "epoch": 1.1, "grad_norm": 2.897607361700453, "learning_rate": 7.322316720776788e-06, "loss": 0.6499, "step": 13514 }, { "epoch": 1.1, "grad_norm": 4.558409061649964, "learning_rate": 7.321928433398922e-06, "loss": 0.5401, "step": 13515 }, { "epoch": 1.1, "grad_norm": 4.719251330486398, "learning_rate": 7.32154012816738e-06, "loss": 0.5595, "step": 13516 }, { "epoch": 1.1, "grad_norm": 3.238679698966101, "learning_rate": 7.321151805085143e-06, "loss": 0.8019, "step": 13517 }, { "epoch": 1.1, "grad_norm": 2.5570233681813996, "learning_rate": 7.320763464155202e-06, "loss": 0.6095, "step": 13518 }, { "epoch": 1.1, "grad_norm": 3.445873274398044, "learning_rate": 7.320375105380541e-06, "loss": 0.6063, "step": 13519 }, { "epoch": 1.1, "grad_norm": 4.606409674015923, "learning_rate": 7.319986728764146e-06, "loss": 0.5613, "step": 13520 }, { "epoch": 1.1, "grad_norm": 3.5550820380594765, "learning_rate": 7.319598334309001e-06, "loss": 0.506, "step": 13521 }, { "epoch": 1.1, "grad_norm": 2.7548082115758032, "learning_rate": 7.319209922018098e-06, "loss": 0.6644, "step": 13522 }, { "epoch": 1.1, "grad_norm": 3.4446181681762518, "learning_rate": 7.31882149189442e-06, "loss": 0.5624, "step": 13523 }, { "epoch": 1.1, "grad_norm": 7.762712303756813, "learning_rate": 7.318433043940954e-06, "loss": 0.5479, "step": 13524 }, { "epoch": 1.1, "grad_norm": 3.6672561799376724, "learning_rate": 7.318044578160685e-06, "loss": 0.5259, "step": 13525 }, { "epoch": 1.1, "grad_norm": 3.2079248353381953, "learning_rate": 7.317656094556605e-06, "loss": 0.5229, "step": 13526 }, { "epoch": 1.1, "grad_norm": 2.448107930050599, "learning_rate": 7.317267593131698e-06, "loss": 0.5732, "step": 13527 }, { "epoch": 1.1, "grad_norm": 3.48208376292226, "learning_rate": 7.316879073888951e-06, "loss": 0.6546, "step": 13528 }, { "epoch": 1.1, "grad_norm": 4.87011793167953, "learning_rate": 7.31649053683135e-06, "loss": 0.5394, "step": 13529 }, { "epoch": 1.1, "grad_norm": 2.4065631656474915, "learning_rate": 7.316101981961885e-06, "loss": 0.4963, "step": 13530 }, { "epoch": 1.1, "grad_norm": 3.4261978583146524, "learning_rate": 7.315713409283543e-06, "loss": 0.5597, "step": 13531 }, { "epoch": 1.1, "grad_norm": 3.0708375064384312, "learning_rate": 7.315324818799313e-06, "loss": 0.6599, "step": 13532 }, { "epoch": 1.1, "grad_norm": 3.119498607794073, "learning_rate": 7.31493621051218e-06, "loss": 0.5502, "step": 13533 }, { "epoch": 1.1, "grad_norm": 3.107578346702478, "learning_rate": 7.314547584425136e-06, "loss": 0.5194, "step": 13534 }, { "epoch": 1.1, "grad_norm": 3.458998292235858, "learning_rate": 7.314158940541165e-06, "loss": 0.5856, "step": 13535 }, { "epoch": 1.1, "grad_norm": 4.956017720784595, "learning_rate": 7.313770278863258e-06, "loss": 0.7769, "step": 13536 }, { "epoch": 1.1, "grad_norm": 3.159744534267069, "learning_rate": 7.313381599394401e-06, "loss": 0.5113, "step": 13537 }, { "epoch": 1.1, "grad_norm": 2.941019969044103, "learning_rate": 7.312992902137587e-06, "loss": 0.6011, "step": 13538 }, { "epoch": 1.1, "grad_norm": 4.015922627952936, "learning_rate": 7.312604187095801e-06, "loss": 0.5856, "step": 13539 }, { "epoch": 1.1, "grad_norm": 4.05314211414396, "learning_rate": 7.3122154542720335e-06, "loss": 0.496, "step": 13540 }, { "epoch": 1.1, "grad_norm": 3.4463788117050544, "learning_rate": 7.311826703669271e-06, "loss": 0.4943, "step": 13541 }, { "epoch": 1.1, "grad_norm": 7.18866687740904, "learning_rate": 7.311437935290508e-06, "loss": 0.6552, "step": 13542 }, { "epoch": 1.1, "grad_norm": 6.341320372758751, "learning_rate": 7.311049149138729e-06, "loss": 0.5455, "step": 13543 }, { "epoch": 1.1, "grad_norm": 4.408898606957012, "learning_rate": 7.310660345216924e-06, "loss": 0.6043, "step": 13544 }, { "epoch": 1.1, "grad_norm": 3.2899498532532228, "learning_rate": 7.310271523528084e-06, "loss": 0.5835, "step": 13545 }, { "epoch": 1.1, "grad_norm": 4.270912281069678, "learning_rate": 7.309882684075199e-06, "loss": 0.5058, "step": 13546 }, { "epoch": 1.1, "grad_norm": 3.1832429803423357, "learning_rate": 7.309493826861258e-06, "loss": 0.6059, "step": 13547 }, { "epoch": 1.1, "grad_norm": 3.467091211329391, "learning_rate": 7.309104951889252e-06, "loss": 0.649, "step": 13548 }, { "epoch": 1.1, "grad_norm": 4.0405913863912275, "learning_rate": 7.308716059162169e-06, "loss": 0.5789, "step": 13549 }, { "epoch": 1.1, "grad_norm": 4.178791172911453, "learning_rate": 7.308327148683e-06, "loss": 0.5797, "step": 13550 }, { "epoch": 1.1, "grad_norm": 2.3173689717226282, "learning_rate": 7.3079382204547365e-06, "loss": 0.4513, "step": 13551 }, { "epoch": 1.1, "grad_norm": 7.577770580343595, "learning_rate": 7.307549274480369e-06, "loss": 0.6018, "step": 13552 }, { "epoch": 1.1, "grad_norm": 9.44835882331754, "learning_rate": 7.3071603107628865e-06, "loss": 0.5657, "step": 13553 }, { "epoch": 1.1, "grad_norm": 4.47599878889487, "learning_rate": 7.306771329305281e-06, "loss": 0.6772, "step": 13554 }, { "epoch": 1.1, "grad_norm": 4.315885006547513, "learning_rate": 7.306382330110544e-06, "loss": 0.6037, "step": 13555 }, { "epoch": 1.1, "grad_norm": 4.069729257195434, "learning_rate": 7.305993313181666e-06, "loss": 0.4309, "step": 13556 }, { "epoch": 1.1, "grad_norm": 3.46152939428144, "learning_rate": 7.305604278521636e-06, "loss": 0.5809, "step": 13557 }, { "epoch": 1.1, "grad_norm": 2.721081011203209, "learning_rate": 7.305215226133451e-06, "loss": 0.4471, "step": 13558 }, { "epoch": 1.1, "grad_norm": 2.7144132328526247, "learning_rate": 7.304826156020096e-06, "loss": 0.5945, "step": 13559 }, { "epoch": 1.1, "grad_norm": 8.37350810475523, "learning_rate": 7.304437068184567e-06, "loss": 0.5545, "step": 13560 }, { "epoch": 1.1, "grad_norm": 3.4706792239582818, "learning_rate": 7.304047962629854e-06, "loss": 0.5397, "step": 13561 }, { "epoch": 1.1, "grad_norm": 5.737522773980567, "learning_rate": 7.303658839358949e-06, "loss": 0.6053, "step": 13562 }, { "epoch": 1.1, "grad_norm": 7.991863958193902, "learning_rate": 7.303269698374844e-06, "loss": 0.788, "step": 13563 }, { "epoch": 1.1, "grad_norm": 5.1569081698735895, "learning_rate": 7.302880539680532e-06, "loss": 0.587, "step": 13564 }, { "epoch": 1.1, "grad_norm": 3.669724445338854, "learning_rate": 7.302491363279004e-06, "loss": 0.5617, "step": 13565 }, { "epoch": 1.1, "grad_norm": 2.4799806545465546, "learning_rate": 7.302102169173254e-06, "loss": 0.485, "step": 13566 }, { "epoch": 1.1, "grad_norm": 4.329439633083559, "learning_rate": 7.301712957366273e-06, "loss": 0.5074, "step": 13567 }, { "epoch": 1.1, "grad_norm": 3.5407603098365454, "learning_rate": 7.301323727861056e-06, "loss": 0.4944, "step": 13568 }, { "epoch": 1.1, "grad_norm": 4.0054824256282195, "learning_rate": 7.300934480660593e-06, "loss": 0.7084, "step": 13569 }, { "epoch": 1.1, "grad_norm": 2.8037781900403838, "learning_rate": 7.300545215767878e-06, "loss": 0.6442, "step": 13570 }, { "epoch": 1.1, "grad_norm": 5.946360632985224, "learning_rate": 7.300155933185905e-06, "loss": 0.7092, "step": 13571 }, { "epoch": 1.1, "grad_norm": 4.141908972931778, "learning_rate": 7.299766632917666e-06, "loss": 0.529, "step": 13572 }, { "epoch": 1.1, "grad_norm": 3.8391000099197234, "learning_rate": 7.299377314966156e-06, "loss": 0.4806, "step": 13573 }, { "epoch": 1.1, "grad_norm": 5.4165428611223225, "learning_rate": 7.298987979334367e-06, "loss": 0.6158, "step": 13574 }, { "epoch": 1.1, "grad_norm": 4.054570256657723, "learning_rate": 7.298598626025293e-06, "loss": 0.7201, "step": 13575 }, { "epoch": 1.1, "grad_norm": 9.248602861189125, "learning_rate": 7.298209255041929e-06, "loss": 0.5464, "step": 13576 }, { "epoch": 1.1, "grad_norm": 10.873927472003286, "learning_rate": 7.2978198663872665e-06, "loss": 0.7012, "step": 13577 }, { "epoch": 1.1, "grad_norm": 3.666704866617444, "learning_rate": 7.297430460064302e-06, "loss": 0.5661, "step": 13578 }, { "epoch": 1.1, "grad_norm": 3.406852661198488, "learning_rate": 7.297041036076029e-06, "loss": 0.5875, "step": 13579 }, { "epoch": 1.1, "grad_norm": 3.8922825883595644, "learning_rate": 7.296651594425441e-06, "loss": 0.7036, "step": 13580 }, { "epoch": 1.1, "grad_norm": 2.939279568834087, "learning_rate": 7.296262135115533e-06, "loss": 0.489, "step": 13581 }, { "epoch": 1.1, "grad_norm": 3.9001271509756954, "learning_rate": 7.2958726581493e-06, "loss": 0.5783, "step": 13582 }, { "epoch": 1.1, "grad_norm": 5.090768593288156, "learning_rate": 7.295483163529736e-06, "loss": 0.5157, "step": 13583 }, { "epoch": 1.1, "grad_norm": 5.695163722737587, "learning_rate": 7.295093651259837e-06, "loss": 0.4091, "step": 13584 }, { "epoch": 1.1, "grad_norm": 6.1503587818411685, "learning_rate": 7.294704121342596e-06, "loss": 0.7059, "step": 13585 }, { "epoch": 1.1, "grad_norm": 4.595982494244561, "learning_rate": 7.294314573781012e-06, "loss": 0.5023, "step": 13586 }, { "epoch": 1.1, "grad_norm": 4.811992358783734, "learning_rate": 7.293925008578075e-06, "loss": 0.5458, "step": 13587 }, { "epoch": 1.1, "grad_norm": 2.539335337156614, "learning_rate": 7.2935354257367855e-06, "loss": 0.5069, "step": 13588 }, { "epoch": 1.1, "grad_norm": 2.9754689287191525, "learning_rate": 7.293145825260135e-06, "loss": 0.4656, "step": 13589 }, { "epoch": 1.1, "grad_norm": 4.374643705646175, "learning_rate": 7.292756207151122e-06, "loss": 0.5121, "step": 13590 }, { "epoch": 1.1, "grad_norm": 11.160481761635483, "learning_rate": 7.292366571412741e-06, "loss": 0.5037, "step": 13591 }, { "epoch": 1.1, "grad_norm": 5.483865060339781, "learning_rate": 7.29197691804799e-06, "loss": 0.684, "step": 13592 }, { "epoch": 1.1, "grad_norm": 3.31470691122908, "learning_rate": 7.2915872470598605e-06, "loss": 0.4371, "step": 13593 }, { "epoch": 1.1, "grad_norm": 2.72764792064609, "learning_rate": 7.291197558451353e-06, "loss": 0.6355, "step": 13594 }, { "epoch": 1.1, "grad_norm": 4.746384486518186, "learning_rate": 7.290807852225462e-06, "loss": 0.6652, "step": 13595 }, { "epoch": 1.1, "grad_norm": 4.1724240882029635, "learning_rate": 7.290418128385186e-06, "loss": 0.617, "step": 13596 }, { "epoch": 1.1, "grad_norm": 8.395128518659952, "learning_rate": 7.290028386933518e-06, "loss": 0.5053, "step": 13597 }, { "epoch": 1.1, "grad_norm": 2.537125099289778, "learning_rate": 7.289638627873459e-06, "loss": 0.4778, "step": 13598 }, { "epoch": 1.1, "grad_norm": 3.828844361020568, "learning_rate": 7.289248851208003e-06, "loss": 0.668, "step": 13599 }, { "epoch": 1.1, "grad_norm": 3.738144377230147, "learning_rate": 7.288859056940148e-06, "loss": 0.4496, "step": 13600 }, { "epoch": 1.1, "grad_norm": 2.5324930536936354, "learning_rate": 7.288469245072891e-06, "loss": 0.5376, "step": 13601 }, { "epoch": 1.1, "grad_norm": 4.017819418566862, "learning_rate": 7.288079415609229e-06, "loss": 0.6945, "step": 13602 }, { "epoch": 1.1, "grad_norm": 7.879159822813833, "learning_rate": 7.287689568552161e-06, "loss": 0.7104, "step": 13603 }, { "epoch": 1.1, "grad_norm": 2.844743820253322, "learning_rate": 7.287299703904682e-06, "loss": 0.6756, "step": 13604 }, { "epoch": 1.1, "grad_norm": 3.5662607641722843, "learning_rate": 7.2869098216697934e-06, "loss": 0.5007, "step": 13605 }, { "epoch": 1.11, "grad_norm": 2.5600655453042065, "learning_rate": 7.286519921850489e-06, "loss": 0.671, "step": 13606 }, { "epoch": 1.11, "grad_norm": 8.035498911679829, "learning_rate": 7.28613000444977e-06, "loss": 0.6832, "step": 13607 }, { "epoch": 1.11, "grad_norm": 4.526303496241881, "learning_rate": 7.285740069470633e-06, "loss": 0.6311, "step": 13608 }, { "epoch": 1.11, "grad_norm": 6.597877776277447, "learning_rate": 7.285350116916074e-06, "loss": 0.5635, "step": 13609 }, { "epoch": 1.11, "grad_norm": 3.5848214898594453, "learning_rate": 7.284960146789097e-06, "loss": 0.5096, "step": 13610 }, { "epoch": 1.11, "grad_norm": 2.4774790972932212, "learning_rate": 7.284570159092696e-06, "loss": 0.6451, "step": 13611 }, { "epoch": 1.11, "grad_norm": 4.099595520630844, "learning_rate": 7.284180153829872e-06, "loss": 0.5972, "step": 13612 }, { "epoch": 1.11, "grad_norm": 3.5914068408288857, "learning_rate": 7.283790131003623e-06, "loss": 0.5741, "step": 13613 }, { "epoch": 1.11, "grad_norm": 3.1171858573239404, "learning_rate": 7.283400090616948e-06, "loss": 0.5853, "step": 13614 }, { "epoch": 1.11, "grad_norm": 2.805075246820459, "learning_rate": 7.283010032672844e-06, "loss": 0.5167, "step": 13615 }, { "epoch": 1.11, "grad_norm": 3.331404559748941, "learning_rate": 7.282619957174315e-06, "loss": 0.5707, "step": 13616 }, { "epoch": 1.11, "grad_norm": 4.827123802040391, "learning_rate": 7.282229864124356e-06, "loss": 0.45, "step": 13617 }, { "epoch": 1.11, "grad_norm": 4.674823034587977, "learning_rate": 7.2818397535259685e-06, "loss": 0.6018, "step": 13618 }, { "epoch": 1.11, "grad_norm": 2.4115448723124886, "learning_rate": 7.281449625382151e-06, "loss": 0.5965, "step": 13619 }, { "epoch": 1.11, "grad_norm": 2.8496826932441754, "learning_rate": 7.281059479695906e-06, "loss": 0.4554, "step": 13620 }, { "epoch": 1.11, "grad_norm": 3.1166968602242227, "learning_rate": 7.280669316470229e-06, "loss": 0.6485, "step": 13621 }, { "epoch": 1.11, "grad_norm": 3.597477060902457, "learning_rate": 7.2802791357081236e-06, "loss": 0.616, "step": 13622 }, { "epoch": 1.11, "grad_norm": 2.54740171127477, "learning_rate": 7.279888937412587e-06, "loss": 0.6266, "step": 13623 }, { "epoch": 1.11, "grad_norm": 3.7886230570323183, "learning_rate": 7.279498721586623e-06, "loss": 0.7589, "step": 13624 }, { "epoch": 1.11, "grad_norm": 3.600326995782971, "learning_rate": 7.279108488233231e-06, "loss": 0.4986, "step": 13625 }, { "epoch": 1.11, "grad_norm": 2.365628714708935, "learning_rate": 7.2787182373554085e-06, "loss": 0.5026, "step": 13626 }, { "epoch": 1.11, "grad_norm": 3.8072110976070106, "learning_rate": 7.278327968956159e-06, "loss": 0.6004, "step": 13627 }, { "epoch": 1.11, "grad_norm": 3.4000086205518705, "learning_rate": 7.277937683038484e-06, "loss": 0.6412, "step": 13628 }, { "epoch": 1.11, "grad_norm": 12.052754719416749, "learning_rate": 7.277547379605383e-06, "loss": 0.7265, "step": 13629 }, { "epoch": 1.11, "grad_norm": 3.290521225808238, "learning_rate": 7.2771570586598576e-06, "loss": 0.5871, "step": 13630 }, { "epoch": 1.11, "grad_norm": 8.44642544046495, "learning_rate": 7.276766720204907e-06, "loss": 0.5083, "step": 13631 }, { "epoch": 1.11, "grad_norm": 3.618153842211457, "learning_rate": 7.276376364243536e-06, "loss": 0.5141, "step": 13632 }, { "epoch": 1.11, "grad_norm": 3.22437007278642, "learning_rate": 7.275985990778745e-06, "loss": 0.5498, "step": 13633 }, { "epoch": 1.11, "grad_norm": 3.1396820332048248, "learning_rate": 7.275595599813534e-06, "loss": 0.4359, "step": 13634 }, { "epoch": 1.11, "grad_norm": 2.2956769549208964, "learning_rate": 7.275205191350907e-06, "loss": 0.6314, "step": 13635 }, { "epoch": 1.11, "grad_norm": 5.373027564793716, "learning_rate": 7.274814765393864e-06, "loss": 0.5064, "step": 13636 }, { "epoch": 1.11, "grad_norm": 3.2087241182326713, "learning_rate": 7.274424321945408e-06, "loss": 0.6844, "step": 13637 }, { "epoch": 1.11, "grad_norm": 3.850744892233335, "learning_rate": 7.274033861008542e-06, "loss": 0.6981, "step": 13638 }, { "epoch": 1.11, "grad_norm": 2.8135283505772137, "learning_rate": 7.273643382586266e-06, "loss": 0.5551, "step": 13639 }, { "epoch": 1.11, "grad_norm": 4.823781858190363, "learning_rate": 7.273252886681585e-06, "loss": 0.5302, "step": 13640 }, { "epoch": 1.11, "grad_norm": 6.95097058641305, "learning_rate": 7.2728623732975e-06, "loss": 0.5494, "step": 13641 }, { "epoch": 1.11, "grad_norm": 3.938086509159436, "learning_rate": 7.272471842437015e-06, "loss": 0.6281, "step": 13642 }, { "epoch": 1.11, "grad_norm": 3.2491894769278615, "learning_rate": 7.272081294103131e-06, "loss": 0.5295, "step": 13643 }, { "epoch": 1.11, "grad_norm": 2.9046462949286327, "learning_rate": 7.271690728298852e-06, "loss": 0.5519, "step": 13644 }, { "epoch": 1.11, "grad_norm": 4.528543221855132, "learning_rate": 7.271300145027182e-06, "loss": 0.5703, "step": 13645 }, { "epoch": 1.11, "grad_norm": 3.560129751321755, "learning_rate": 7.2709095442911236e-06, "loss": 0.7193, "step": 13646 }, { "epoch": 1.11, "grad_norm": 2.8951147975485854, "learning_rate": 7.27051892609368e-06, "loss": 0.6322, "step": 13647 }, { "epoch": 1.11, "grad_norm": 3.8027551698005913, "learning_rate": 7.2701282904378525e-06, "loss": 0.53, "step": 13648 }, { "epoch": 1.11, "grad_norm": 3.009623067233355, "learning_rate": 7.269737637326649e-06, "loss": 0.5484, "step": 13649 }, { "epoch": 1.11, "grad_norm": 3.230212847971009, "learning_rate": 7.269346966763071e-06, "loss": 0.4083, "step": 13650 }, { "epoch": 1.11, "grad_norm": 4.214078454548609, "learning_rate": 7.268956278750122e-06, "loss": 0.5674, "step": 13651 }, { "epoch": 1.11, "grad_norm": 4.050525411313377, "learning_rate": 7.2685655732908064e-06, "loss": 0.6467, "step": 13652 }, { "epoch": 1.11, "grad_norm": 3.3000538062489673, "learning_rate": 7.268174850388131e-06, "loss": 0.5376, "step": 13653 }, { "epoch": 1.11, "grad_norm": 2.8406946846862366, "learning_rate": 7.267784110045096e-06, "loss": 0.557, "step": 13654 }, { "epoch": 1.11, "grad_norm": 8.22274558048246, "learning_rate": 7.267393352264708e-06, "loss": 0.5333, "step": 13655 }, { "epoch": 1.11, "grad_norm": 2.9546001891123845, "learning_rate": 7.267002577049972e-06, "loss": 0.4562, "step": 13656 }, { "epoch": 1.11, "grad_norm": 7.384747069188464, "learning_rate": 7.266611784403892e-06, "loss": 0.6083, "step": 13657 }, { "epoch": 1.11, "grad_norm": 4.960575889470557, "learning_rate": 7.266220974329472e-06, "loss": 0.4839, "step": 13658 }, { "epoch": 1.11, "grad_norm": 5.760522025014579, "learning_rate": 7.265830146829719e-06, "loss": 0.8203, "step": 13659 }, { "epoch": 1.11, "grad_norm": 2.720569229733579, "learning_rate": 7.2654393019076365e-06, "loss": 0.7054, "step": 13660 }, { "epoch": 1.11, "grad_norm": 5.322842485156056, "learning_rate": 7.265048439566231e-06, "loss": 0.6491, "step": 13661 }, { "epoch": 1.11, "grad_norm": 4.830703679417691, "learning_rate": 7.2646575598085065e-06, "loss": 0.6712, "step": 13662 }, { "epoch": 1.11, "grad_norm": 2.3070340659312154, "learning_rate": 7.264266662637469e-06, "loss": 0.6548, "step": 13663 }, { "epoch": 1.11, "grad_norm": 3.1204332912785344, "learning_rate": 7.263875748056125e-06, "loss": 0.6155, "step": 13664 }, { "epoch": 1.11, "grad_norm": 3.5955227083838075, "learning_rate": 7.2634848160674805e-06, "loss": 0.6416, "step": 13665 }, { "epoch": 1.11, "grad_norm": 2.7656666887570722, "learning_rate": 7.26309386667454e-06, "loss": 0.4581, "step": 13666 }, { "epoch": 1.11, "grad_norm": 3.2051705283746683, "learning_rate": 7.26270289988031e-06, "loss": 0.5317, "step": 13667 }, { "epoch": 1.11, "grad_norm": 3.60560063568297, "learning_rate": 7.2623119156877976e-06, "loss": 0.6239, "step": 13668 }, { "epoch": 1.11, "grad_norm": 6.668021961127223, "learning_rate": 7.261920914100008e-06, "loss": 0.5405, "step": 13669 }, { "epoch": 1.11, "grad_norm": 2.7457893845248758, "learning_rate": 7.261529895119949e-06, "loss": 0.6306, "step": 13670 }, { "epoch": 1.11, "grad_norm": 2.6840329869681816, "learning_rate": 7.2611388587506245e-06, "loss": 0.5027, "step": 13671 }, { "epoch": 1.11, "grad_norm": 4.168427182404945, "learning_rate": 7.260747804995045e-06, "loss": 0.5979, "step": 13672 }, { "epoch": 1.11, "grad_norm": 3.041583516612649, "learning_rate": 7.260356733856215e-06, "loss": 0.6731, "step": 13673 }, { "epoch": 1.11, "grad_norm": 4.9262524762455335, "learning_rate": 7.2599656453371426e-06, "loss": 0.6092, "step": 13674 }, { "epoch": 1.11, "grad_norm": 2.9129632448592915, "learning_rate": 7.259574539440833e-06, "loss": 0.7235, "step": 13675 }, { "epoch": 1.11, "grad_norm": 2.7629937667746383, "learning_rate": 7.259183416170296e-06, "loss": 0.6612, "step": 13676 }, { "epoch": 1.11, "grad_norm": 3.6304337433268703, "learning_rate": 7.2587922755285374e-06, "loss": 0.6052, "step": 13677 }, { "epoch": 1.11, "grad_norm": 3.4560103336122503, "learning_rate": 7.258401117518565e-06, "loss": 0.6594, "step": 13678 }, { "epoch": 1.11, "grad_norm": 4.938568660329166, "learning_rate": 7.258009942143387e-06, "loss": 0.6374, "step": 13679 }, { "epoch": 1.11, "grad_norm": 3.625959071665032, "learning_rate": 7.257618749406012e-06, "loss": 0.4654, "step": 13680 }, { "epoch": 1.11, "grad_norm": 3.342589056076939, "learning_rate": 7.257227539309445e-06, "loss": 0.7786, "step": 13681 }, { "epoch": 1.11, "grad_norm": 3.9275297612554554, "learning_rate": 7.256836311856697e-06, "loss": 0.5244, "step": 13682 }, { "epoch": 1.11, "grad_norm": 2.8750540339371806, "learning_rate": 7.256445067050774e-06, "loss": 0.5843, "step": 13683 }, { "epoch": 1.11, "grad_norm": 2.867133340954212, "learning_rate": 7.2560538048946874e-06, "loss": 0.4666, "step": 13684 }, { "epoch": 1.11, "grad_norm": 3.348985641589021, "learning_rate": 7.255662525391443e-06, "loss": 0.6404, "step": 13685 }, { "epoch": 1.11, "grad_norm": 3.7940246344832738, "learning_rate": 7.2552712285440485e-06, "loss": 0.6081, "step": 13686 }, { "epoch": 1.11, "grad_norm": 2.3820628047988133, "learning_rate": 7.2548799143555145e-06, "loss": 0.6042, "step": 13687 }, { "epoch": 1.11, "grad_norm": 5.82420927209422, "learning_rate": 7.2544885828288514e-06, "loss": 0.586, "step": 13688 }, { "epoch": 1.11, "grad_norm": 3.749865580424538, "learning_rate": 7.254097233967065e-06, "loss": 0.5887, "step": 13689 }, { "epoch": 1.11, "grad_norm": 4.573537936515338, "learning_rate": 7.253705867773167e-06, "loss": 0.621, "step": 13690 }, { "epoch": 1.11, "grad_norm": 2.883335237272948, "learning_rate": 7.253314484250165e-06, "loss": 0.4828, "step": 13691 }, { "epoch": 1.11, "grad_norm": 168.65855162998915, "learning_rate": 7.25292308340107e-06, "loss": 0.6059, "step": 13692 }, { "epoch": 1.11, "grad_norm": 2.0567768512908606, "learning_rate": 7.25253166522889e-06, "loss": 0.5182, "step": 13693 }, { "epoch": 1.11, "grad_norm": 2.55393114590187, "learning_rate": 7.252140229736635e-06, "loss": 0.6399, "step": 13694 }, { "epoch": 1.11, "grad_norm": 9.703428966641564, "learning_rate": 7.251748776927315e-06, "loss": 0.631, "step": 13695 }, { "epoch": 1.11, "grad_norm": 5.8407023873118025, "learning_rate": 7.25135730680394e-06, "loss": 0.7433, "step": 13696 }, { "epoch": 1.11, "grad_norm": 3.275765330255824, "learning_rate": 7.25096581936952e-06, "loss": 0.7617, "step": 13697 }, { "epoch": 1.11, "grad_norm": 2.076815594301001, "learning_rate": 7.2505743146270656e-06, "loss": 0.4659, "step": 13698 }, { "epoch": 1.11, "grad_norm": 2.3373062117208154, "learning_rate": 7.250182792579587e-06, "loss": 0.5542, "step": 13699 }, { "epoch": 1.11, "grad_norm": 6.326351678182756, "learning_rate": 7.249791253230094e-06, "loss": 0.6495, "step": 13700 }, { "epoch": 1.11, "grad_norm": 2.588511802418635, "learning_rate": 7.2493996965815976e-06, "loss": 0.5579, "step": 13701 }, { "epoch": 1.11, "grad_norm": 3.4345067324826126, "learning_rate": 7.249008122637109e-06, "loss": 0.508, "step": 13702 }, { "epoch": 1.11, "grad_norm": 3.455976206609785, "learning_rate": 7.248616531399639e-06, "loss": 0.6621, "step": 13703 }, { "epoch": 1.11, "grad_norm": 2.2468877595006487, "learning_rate": 7.2482249228721965e-06, "loss": 0.6294, "step": 13704 }, { "epoch": 1.11, "grad_norm": 3.336489416247208, "learning_rate": 7.247833297057796e-06, "loss": 0.5929, "step": 13705 }, { "epoch": 1.11, "grad_norm": 3.352030269271094, "learning_rate": 7.247441653959448e-06, "loss": 0.6797, "step": 13706 }, { "epoch": 1.11, "grad_norm": 6.455987224174443, "learning_rate": 7.247049993580162e-06, "loss": 0.5335, "step": 13707 }, { "epoch": 1.11, "grad_norm": 4.161599268951711, "learning_rate": 7.24665831592295e-06, "loss": 0.703, "step": 13708 }, { "epoch": 1.11, "grad_norm": 5.127961260542747, "learning_rate": 7.246266620990825e-06, "loss": 0.6588, "step": 13709 }, { "epoch": 1.11, "grad_norm": 11.152777537696295, "learning_rate": 7.245874908786798e-06, "loss": 0.6498, "step": 13710 }, { "epoch": 1.11, "grad_norm": 2.965389147515734, "learning_rate": 7.245483179313884e-06, "loss": 0.541, "step": 13711 }, { "epoch": 1.11, "grad_norm": 3.4586407513574806, "learning_rate": 7.245091432575088e-06, "loss": 0.576, "step": 13712 }, { "epoch": 1.11, "grad_norm": 3.1803393192682385, "learning_rate": 7.244699668573428e-06, "loss": 0.6211, "step": 13713 }, { "epoch": 1.11, "grad_norm": 3.205115546836763, "learning_rate": 7.2443078873119145e-06, "loss": 0.4287, "step": 13714 }, { "epoch": 1.11, "grad_norm": 11.735905986620926, "learning_rate": 7.243916088793561e-06, "loss": 0.769, "step": 13715 }, { "epoch": 1.11, "grad_norm": 8.72597881360186, "learning_rate": 7.243524273021379e-06, "loss": 0.6037, "step": 13716 }, { "epoch": 1.11, "grad_norm": 3.0367790302394773, "learning_rate": 7.2431324399983806e-06, "loss": 0.6474, "step": 13717 }, { "epoch": 1.11, "grad_norm": 3.4717628797993445, "learning_rate": 7.242740589727579e-06, "loss": 0.5807, "step": 13718 }, { "epoch": 1.11, "grad_norm": 8.153124968326608, "learning_rate": 7.242348722211991e-06, "loss": 0.4099, "step": 13719 }, { "epoch": 1.11, "grad_norm": 3.3190918125883537, "learning_rate": 7.241956837454622e-06, "loss": 0.5153, "step": 13720 }, { "epoch": 1.11, "grad_norm": 5.327688805317918, "learning_rate": 7.241564935458493e-06, "loss": 0.6314, "step": 13721 }, { "epoch": 1.11, "grad_norm": 2.288197494273073, "learning_rate": 7.241173016226613e-06, "loss": 0.5377, "step": 13722 }, { "epoch": 1.11, "grad_norm": 2.6292898211266262, "learning_rate": 7.240781079761998e-06, "loss": 0.5014, "step": 13723 }, { "epoch": 1.11, "grad_norm": 2.1768929274770397, "learning_rate": 7.240389126067658e-06, "loss": 0.4638, "step": 13724 }, { "epoch": 1.11, "grad_norm": 6.2558964187211314, "learning_rate": 7.2399971551466105e-06, "loss": 0.4922, "step": 13725 }, { "epoch": 1.11, "grad_norm": 5.031110740890489, "learning_rate": 7.2396051670018685e-06, "loss": 0.5348, "step": 13726 }, { "epoch": 1.11, "grad_norm": 5.314010823657378, "learning_rate": 7.239213161636446e-06, "loss": 0.6956, "step": 13727 }, { "epoch": 1.11, "grad_norm": 3.6657947949212337, "learning_rate": 7.238821139053354e-06, "loss": 0.63, "step": 13728 }, { "epoch": 1.12, "grad_norm": 3.0904740632647276, "learning_rate": 7.238429099255613e-06, "loss": 0.6213, "step": 13729 }, { "epoch": 1.12, "grad_norm": 2.7797692311792477, "learning_rate": 7.238037042246233e-06, "loss": 0.6163, "step": 13730 }, { "epoch": 1.12, "grad_norm": 3.2708110106374897, "learning_rate": 7.23764496802823e-06, "loss": 0.4747, "step": 13731 }, { "epoch": 1.12, "grad_norm": 8.342987777374985, "learning_rate": 7.237252876604617e-06, "loss": 0.6911, "step": 13732 }, { "epoch": 1.12, "grad_norm": 2.9431556945703083, "learning_rate": 7.236860767978411e-06, "loss": 0.5518, "step": 13733 }, { "epoch": 1.12, "grad_norm": 2.947640758211327, "learning_rate": 7.2364686421526265e-06, "loss": 0.5633, "step": 13734 }, { "epoch": 1.12, "grad_norm": 3.535012343741763, "learning_rate": 7.236076499130279e-06, "loss": 0.5511, "step": 13735 }, { "epoch": 1.12, "grad_norm": 4.528931065468862, "learning_rate": 7.235684338914382e-06, "loss": 0.5711, "step": 13736 }, { "epoch": 1.12, "grad_norm": 2.651878840428295, "learning_rate": 7.235292161507952e-06, "loss": 0.5151, "step": 13737 }, { "epoch": 1.12, "grad_norm": 3.734653233718342, "learning_rate": 7.234899966914005e-06, "loss": 0.6354, "step": 13738 }, { "epoch": 1.12, "grad_norm": 3.6016425562290704, "learning_rate": 7.234507755135557e-06, "loss": 0.6073, "step": 13739 }, { "epoch": 1.12, "grad_norm": 3.2749555987118457, "learning_rate": 7.234115526175621e-06, "loss": 0.665, "step": 13740 }, { "epoch": 1.12, "grad_norm": 2.3730387744696624, "learning_rate": 7.233723280037216e-06, "loss": 0.568, "step": 13741 }, { "epoch": 1.12, "grad_norm": 4.2483717443397335, "learning_rate": 7.233331016723357e-06, "loss": 0.67, "step": 13742 }, { "epoch": 1.12, "grad_norm": 3.264771619915305, "learning_rate": 7.2329387362370605e-06, "loss": 0.6045, "step": 13743 }, { "epoch": 1.12, "grad_norm": 3.8350954358834795, "learning_rate": 7.232546438581341e-06, "loss": 0.6234, "step": 13744 }, { "epoch": 1.12, "grad_norm": 2.9479147912347066, "learning_rate": 7.232154123759217e-06, "loss": 0.4711, "step": 13745 }, { "epoch": 1.12, "grad_norm": 2.911420313334315, "learning_rate": 7.231761791773705e-06, "loss": 0.6761, "step": 13746 }, { "epoch": 1.12, "grad_norm": 4.022530378352138, "learning_rate": 7.231369442627821e-06, "loss": 0.6366, "step": 13747 }, { "epoch": 1.12, "grad_norm": 2.4230112214579047, "learning_rate": 7.23097707632458e-06, "loss": 0.5104, "step": 13748 }, { "epoch": 1.12, "grad_norm": 4.310376389057019, "learning_rate": 7.230584692867003e-06, "loss": 0.4716, "step": 13749 }, { "epoch": 1.12, "grad_norm": 12.180468241073962, "learning_rate": 7.230192292258105e-06, "loss": 0.5348, "step": 13750 }, { "epoch": 1.12, "grad_norm": 2.845133121693099, "learning_rate": 7.229799874500902e-06, "loss": 0.6172, "step": 13751 }, { "epoch": 1.12, "grad_norm": 5.329772625426198, "learning_rate": 7.229407439598413e-06, "loss": 0.5634, "step": 13752 }, { "epoch": 1.12, "grad_norm": 9.164802219986807, "learning_rate": 7.2290149875536555e-06, "loss": 0.5821, "step": 13753 }, { "epoch": 1.12, "grad_norm": 2.9236811019280515, "learning_rate": 7.228622518369647e-06, "loss": 0.4933, "step": 13754 }, { "epoch": 1.12, "grad_norm": 3.255980576626028, "learning_rate": 7.228230032049405e-06, "loss": 0.5434, "step": 13755 }, { "epoch": 1.12, "grad_norm": 4.39678082320362, "learning_rate": 7.2278375285959455e-06, "loss": 0.6526, "step": 13756 }, { "epoch": 1.12, "grad_norm": 3.5103525411741052, "learning_rate": 7.227445008012291e-06, "loss": 0.6443, "step": 13757 }, { "epoch": 1.12, "grad_norm": 2.1891001436810886, "learning_rate": 7.227052470301454e-06, "loss": 0.6112, "step": 13758 }, { "epoch": 1.12, "grad_norm": 4.463762184761607, "learning_rate": 7.226659915466459e-06, "loss": 0.4771, "step": 13759 }, { "epoch": 1.12, "grad_norm": 3.5798292919014494, "learning_rate": 7.226267343510319e-06, "loss": 0.6131, "step": 13760 }, { "epoch": 1.12, "grad_norm": 2.895298596538284, "learning_rate": 7.225874754436055e-06, "loss": 0.7167, "step": 13761 }, { "epoch": 1.12, "grad_norm": 2.834722205240585, "learning_rate": 7.225482148246687e-06, "loss": 0.6605, "step": 13762 }, { "epoch": 1.12, "grad_norm": 2.3004681876366866, "learning_rate": 7.225089524945231e-06, "loss": 0.5126, "step": 13763 }, { "epoch": 1.12, "grad_norm": 2.1027116224513867, "learning_rate": 7.224696884534708e-06, "loss": 0.4398, "step": 13764 }, { "epoch": 1.12, "grad_norm": 4.6301745030118555, "learning_rate": 7.224304227018135e-06, "loss": 0.5517, "step": 13765 }, { "epoch": 1.12, "grad_norm": 3.5367749463926073, "learning_rate": 7.223911552398534e-06, "loss": 0.4263, "step": 13766 }, { "epoch": 1.12, "grad_norm": 19.024696152630558, "learning_rate": 7.223518860678922e-06, "loss": 0.6696, "step": 13767 }, { "epoch": 1.12, "grad_norm": 7.8105903389616405, "learning_rate": 7.2231261518623185e-06, "loss": 0.6686, "step": 13768 }, { "epoch": 1.12, "grad_norm": 8.583645713253404, "learning_rate": 7.222733425951745e-06, "loss": 0.6792, "step": 13769 }, { "epoch": 1.12, "grad_norm": 2.686878938979837, "learning_rate": 7.22234068295022e-06, "loss": 0.6836, "step": 13770 }, { "epoch": 1.12, "grad_norm": 4.175943040189135, "learning_rate": 7.221947922860764e-06, "loss": 0.559, "step": 13771 }, { "epoch": 1.12, "grad_norm": 3.842331323402045, "learning_rate": 7.221555145686396e-06, "loss": 0.7488, "step": 13772 }, { "epoch": 1.12, "grad_norm": 5.2834534469433665, "learning_rate": 7.221162351430135e-06, "loss": 0.5906, "step": 13773 }, { "epoch": 1.12, "grad_norm": 6.82704507702863, "learning_rate": 7.220769540095006e-06, "loss": 0.5866, "step": 13774 }, { "epoch": 1.12, "grad_norm": 3.591465783066619, "learning_rate": 7.220376711684025e-06, "loss": 0.5638, "step": 13775 }, { "epoch": 1.12, "grad_norm": 4.26573050633974, "learning_rate": 7.219983866200213e-06, "loss": 0.6138, "step": 13776 }, { "epoch": 1.12, "grad_norm": 2.729945733359918, "learning_rate": 7.219591003646592e-06, "loss": 0.5794, "step": 13777 }, { "epoch": 1.12, "grad_norm": 3.3486741755387546, "learning_rate": 7.2191981240261825e-06, "loss": 0.5606, "step": 13778 }, { "epoch": 1.12, "grad_norm": 3.5433319340297142, "learning_rate": 7.2188052273420055e-06, "loss": 0.6684, "step": 13779 }, { "epoch": 1.12, "grad_norm": 8.62487166182258, "learning_rate": 7.218412313597081e-06, "loss": 0.6376, "step": 13780 }, { "epoch": 1.12, "grad_norm": 8.412388793335955, "learning_rate": 7.21801938279443e-06, "loss": 0.5682, "step": 13781 }, { "epoch": 1.12, "grad_norm": 3.5715871970199546, "learning_rate": 7.217626434937076e-06, "loss": 0.5783, "step": 13782 }, { "epoch": 1.12, "grad_norm": 5.098582570731343, "learning_rate": 7.217233470028039e-06, "loss": 0.6361, "step": 13783 }, { "epoch": 1.12, "grad_norm": 4.559091280972446, "learning_rate": 7.216840488070341e-06, "loss": 0.5303, "step": 13784 }, { "epoch": 1.12, "grad_norm": 3.4899853791386235, "learning_rate": 7.216447489067002e-06, "loss": 0.5092, "step": 13785 }, { "epoch": 1.12, "grad_norm": 4.434691305521869, "learning_rate": 7.216054473021046e-06, "loss": 0.6606, "step": 13786 }, { "epoch": 1.12, "grad_norm": 2.5709695605313176, "learning_rate": 7.215661439935494e-06, "loss": 0.6045, "step": 13787 }, { "epoch": 1.12, "grad_norm": 6.072519950124297, "learning_rate": 7.215268389813369e-06, "loss": 0.7048, "step": 13788 }, { "epoch": 1.12, "grad_norm": 2.9344085640848854, "learning_rate": 7.214875322657691e-06, "loss": 0.4699, "step": 13789 }, { "epoch": 1.12, "grad_norm": 2.8935437444044805, "learning_rate": 7.214482238471485e-06, "loss": 0.5875, "step": 13790 }, { "epoch": 1.12, "grad_norm": 3.909764683105959, "learning_rate": 7.2140891372577724e-06, "loss": 0.7566, "step": 13791 }, { "epoch": 1.12, "grad_norm": 2.8950347683647606, "learning_rate": 7.213696019019576e-06, "loss": 0.6663, "step": 13792 }, { "epoch": 1.12, "grad_norm": 2.616807920059923, "learning_rate": 7.213302883759917e-06, "loss": 0.5711, "step": 13793 }, { "epoch": 1.12, "grad_norm": 6.539236654690772, "learning_rate": 7.21290973148182e-06, "loss": 0.6792, "step": 13794 }, { "epoch": 1.12, "grad_norm": 3.1885493683863806, "learning_rate": 7.212516562188309e-06, "loss": 0.6979, "step": 13795 }, { "epoch": 1.12, "grad_norm": 2.742976003919392, "learning_rate": 7.212123375882404e-06, "loss": 0.5894, "step": 13796 }, { "epoch": 1.12, "grad_norm": 2.5769838867925365, "learning_rate": 7.211730172567131e-06, "loss": 0.6136, "step": 13797 }, { "epoch": 1.12, "grad_norm": 2.7449666248796567, "learning_rate": 7.211336952245511e-06, "loss": 0.65, "step": 13798 }, { "epoch": 1.12, "grad_norm": 3.55244069683914, "learning_rate": 7.2109437149205705e-06, "loss": 0.6071, "step": 13799 }, { "epoch": 1.12, "grad_norm": 2.5151252493599605, "learning_rate": 7.2105504605953315e-06, "loss": 0.3771, "step": 13800 }, { "epoch": 1.12, "grad_norm": 3.663623033127427, "learning_rate": 7.210157189272817e-06, "loss": 0.6494, "step": 13801 }, { "epoch": 1.12, "grad_norm": 3.8786365793688997, "learning_rate": 7.209763900956053e-06, "loss": 0.5431, "step": 13802 }, { "epoch": 1.12, "grad_norm": 3.5515019897879285, "learning_rate": 7.209370595648061e-06, "loss": 0.671, "step": 13803 }, { "epoch": 1.12, "grad_norm": 2.8296975687995567, "learning_rate": 7.208977273351867e-06, "loss": 0.5313, "step": 13804 }, { "epoch": 1.12, "grad_norm": 2.7700278535294585, "learning_rate": 7.208583934070496e-06, "loss": 0.5599, "step": 13805 }, { "epoch": 1.12, "grad_norm": 6.317564579045138, "learning_rate": 7.208190577806969e-06, "loss": 0.6371, "step": 13806 }, { "epoch": 1.12, "grad_norm": 3.3483814303048294, "learning_rate": 7.207797204564315e-06, "loss": 0.7047, "step": 13807 }, { "epoch": 1.12, "grad_norm": 8.083423535140293, "learning_rate": 7.2074038143455576e-06, "loss": 0.6715, "step": 13808 }, { "epoch": 1.12, "grad_norm": 3.0789909503231887, "learning_rate": 7.207010407153719e-06, "loss": 0.568, "step": 13809 }, { "epoch": 1.12, "grad_norm": 4.6439865729004355, "learning_rate": 7.2066169829918245e-06, "loss": 0.6112, "step": 13810 }, { "epoch": 1.12, "grad_norm": 2.724868689608358, "learning_rate": 7.206223541862902e-06, "loss": 0.6705, "step": 13811 }, { "epoch": 1.12, "grad_norm": 7.727766980671883, "learning_rate": 7.2058300837699755e-06, "loss": 0.7059, "step": 13812 }, { "epoch": 1.12, "grad_norm": 2.7906760545338956, "learning_rate": 7.20543660871607e-06, "loss": 0.7561, "step": 13813 }, { "epoch": 1.12, "grad_norm": 2.631927545966649, "learning_rate": 7.205043116704211e-06, "loss": 0.5901, "step": 13814 }, { "epoch": 1.12, "grad_norm": 18.108004916024033, "learning_rate": 7.204649607737424e-06, "loss": 0.441, "step": 13815 }, { "epoch": 1.12, "grad_norm": 3.8382259607021068, "learning_rate": 7.204256081818735e-06, "loss": 0.69, "step": 13816 }, { "epoch": 1.12, "grad_norm": 4.791665815119219, "learning_rate": 7.203862538951171e-06, "loss": 0.4563, "step": 13817 }, { "epoch": 1.12, "grad_norm": 2.6611345991951083, "learning_rate": 7.2034689791377555e-06, "loss": 0.5902, "step": 13818 }, { "epoch": 1.12, "grad_norm": 4.15457437587834, "learning_rate": 7.203075402381516e-06, "loss": 0.5801, "step": 13819 }, { "epoch": 1.12, "grad_norm": 5.477199139999866, "learning_rate": 7.20268180868548e-06, "loss": 0.6452, "step": 13820 }, { "epoch": 1.12, "grad_norm": 4.904122212453779, "learning_rate": 7.202288198052673e-06, "loss": 0.5942, "step": 13821 }, { "epoch": 1.12, "grad_norm": 3.976966525544117, "learning_rate": 7.201894570486119e-06, "loss": 0.6664, "step": 13822 }, { "epoch": 1.12, "grad_norm": 3.427898919844347, "learning_rate": 7.201500925988848e-06, "loss": 0.4538, "step": 13823 }, { "epoch": 1.12, "grad_norm": 6.2578702341618, "learning_rate": 7.201107264563887e-06, "loss": 0.5162, "step": 13824 }, { "epoch": 1.12, "grad_norm": 4.05762462007046, "learning_rate": 7.200713586214261e-06, "loss": 0.5559, "step": 13825 }, { "epoch": 1.12, "grad_norm": 4.628852187013995, "learning_rate": 7.200319890942996e-06, "loss": 0.6448, "step": 13826 }, { "epoch": 1.12, "grad_norm": 3.9620794070063985, "learning_rate": 7.199926178753123e-06, "loss": 0.456, "step": 13827 }, { "epoch": 1.12, "grad_norm": 4.394319225017799, "learning_rate": 7.199532449647666e-06, "loss": 0.5275, "step": 13828 }, { "epoch": 1.12, "grad_norm": 8.62773341941716, "learning_rate": 7.199138703629654e-06, "loss": 0.681, "step": 13829 }, { "epoch": 1.12, "grad_norm": 3.617551161454771, "learning_rate": 7.198744940702113e-06, "loss": 0.6179, "step": 13830 }, { "epoch": 1.12, "grad_norm": 10.562824619541656, "learning_rate": 7.1983511608680735e-06, "loss": 0.4861, "step": 13831 }, { "epoch": 1.12, "grad_norm": 3.5139947185503213, "learning_rate": 7.197957364130562e-06, "loss": 0.6002, "step": 13832 }, { "epoch": 1.12, "grad_norm": 2.6797663263308773, "learning_rate": 7.197563550492605e-06, "loss": 0.6601, "step": 13833 }, { "epoch": 1.12, "grad_norm": 4.512459034070046, "learning_rate": 7.197169719957233e-06, "loss": 0.5329, "step": 13834 }, { "epoch": 1.12, "grad_norm": 2.904311365987366, "learning_rate": 7.196775872527473e-06, "loss": 0.4502, "step": 13835 }, { "epoch": 1.12, "grad_norm": 5.691522272756192, "learning_rate": 7.196382008206353e-06, "loss": 0.6618, "step": 13836 }, { "epoch": 1.12, "grad_norm": 3.4168930110864237, "learning_rate": 7.195988126996902e-06, "loss": 0.6327, "step": 13837 }, { "epoch": 1.12, "grad_norm": 3.4863749768076877, "learning_rate": 7.195594228902148e-06, "loss": 0.6133, "step": 13838 }, { "epoch": 1.12, "grad_norm": 3.5017008467170587, "learning_rate": 7.195200313925119e-06, "loss": 0.5927, "step": 13839 }, { "epoch": 1.12, "grad_norm": 3.874689883190253, "learning_rate": 7.1948063820688475e-06, "loss": 0.5838, "step": 13840 }, { "epoch": 1.12, "grad_norm": 5.238980568930057, "learning_rate": 7.19441243333636e-06, "loss": 0.5301, "step": 13841 }, { "epoch": 1.12, "grad_norm": 3.497707253411865, "learning_rate": 7.194018467730683e-06, "loss": 0.5963, "step": 13842 }, { "epoch": 1.12, "grad_norm": 2.7609624200498035, "learning_rate": 7.193624485254852e-06, "loss": 0.5497, "step": 13843 }, { "epoch": 1.12, "grad_norm": 2.6990515158370294, "learning_rate": 7.1932304859118915e-06, "loss": 0.4653, "step": 13844 }, { "epoch": 1.12, "grad_norm": 3.213511660305424, "learning_rate": 7.192836469704832e-06, "loss": 0.565, "step": 13845 }, { "epoch": 1.12, "grad_norm": 3.404358032919781, "learning_rate": 7.192442436636704e-06, "loss": 0.4971, "step": 13846 }, { "epoch": 1.12, "grad_norm": 4.059642568573351, "learning_rate": 7.192048386710537e-06, "loss": 0.5758, "step": 13847 }, { "epoch": 1.12, "grad_norm": 2.3726941122453185, "learning_rate": 7.191654319929361e-06, "loss": 0.5411, "step": 13848 }, { "epoch": 1.12, "grad_norm": 2.753535090242175, "learning_rate": 7.191260236296206e-06, "loss": 0.4848, "step": 13849 }, { "epoch": 1.12, "grad_norm": 2.877288808729877, "learning_rate": 7.190866135814101e-06, "loss": 0.6716, "step": 13850 }, { "epoch": 1.12, "grad_norm": 3.3204068582815216, "learning_rate": 7.1904720184860774e-06, "loss": 0.7113, "step": 13851 }, { "epoch": 1.13, "grad_norm": 4.277001926729191, "learning_rate": 7.190077884315166e-06, "loss": 0.6293, "step": 13852 }, { "epoch": 1.13, "grad_norm": 4.400370386253204, "learning_rate": 7.1896837333043975e-06, "loss": 0.5902, "step": 13853 }, { "epoch": 1.13, "grad_norm": 5.448542043581633, "learning_rate": 7.189289565456801e-06, "loss": 0.5405, "step": 13854 }, { "epoch": 1.13, "grad_norm": 3.824459056322676, "learning_rate": 7.188895380775409e-06, "loss": 0.5968, "step": 13855 }, { "epoch": 1.13, "grad_norm": 3.317817256082273, "learning_rate": 7.188501179263252e-06, "loss": 0.6357, "step": 13856 }, { "epoch": 1.13, "grad_norm": 2.6726714125510203, "learning_rate": 7.18810696092336e-06, "loss": 0.5401, "step": 13857 }, { "epoch": 1.13, "grad_norm": 3.7628811782106806, "learning_rate": 7.187712725758765e-06, "loss": 0.6106, "step": 13858 }, { "epoch": 1.13, "grad_norm": 2.8866577996147993, "learning_rate": 7.1873184737724985e-06, "loss": 0.5815, "step": 13859 }, { "epoch": 1.13, "grad_norm": 7.6066320686319635, "learning_rate": 7.186924204967593e-06, "loss": 0.548, "step": 13860 }, { "epoch": 1.13, "grad_norm": 4.269830986547436, "learning_rate": 7.186529919347077e-06, "loss": 0.573, "step": 13861 }, { "epoch": 1.13, "grad_norm": 3.174004846347893, "learning_rate": 7.186135616913985e-06, "loss": 0.4036, "step": 13862 }, { "epoch": 1.13, "grad_norm": 3.006786151509747, "learning_rate": 7.185741297671348e-06, "loss": 0.5308, "step": 13863 }, { "epoch": 1.13, "grad_norm": 2.995313483494356, "learning_rate": 7.185346961622199e-06, "loss": 0.5799, "step": 13864 }, { "epoch": 1.13, "grad_norm": 5.406691261706471, "learning_rate": 7.184952608769569e-06, "loss": 0.6245, "step": 13865 }, { "epoch": 1.13, "grad_norm": 2.830366075239703, "learning_rate": 7.184558239116488e-06, "loss": 0.638, "step": 13866 }, { "epoch": 1.13, "grad_norm": 2.3652318803646497, "learning_rate": 7.184163852665993e-06, "loss": 0.5574, "step": 13867 }, { "epoch": 1.13, "grad_norm": 2.561945064372659, "learning_rate": 7.1837694494211145e-06, "loss": 0.5379, "step": 13868 }, { "epoch": 1.13, "grad_norm": 2.6509450773172216, "learning_rate": 7.183375029384884e-06, "loss": 0.5769, "step": 13869 }, { "epoch": 1.13, "grad_norm": 1.9732951562469092, "learning_rate": 7.182980592560334e-06, "loss": 0.69, "step": 13870 }, { "epoch": 1.13, "grad_norm": 3.4653405882391244, "learning_rate": 7.1825861389505005e-06, "loss": 0.6843, "step": 13871 }, { "epoch": 1.13, "grad_norm": 4.255171132710585, "learning_rate": 7.1821916685584135e-06, "loss": 0.5056, "step": 13872 }, { "epoch": 1.13, "grad_norm": 3.912955800085182, "learning_rate": 7.181797181387107e-06, "loss": 0.6407, "step": 13873 }, { "epoch": 1.13, "grad_norm": 11.051891974625585, "learning_rate": 7.181402677439614e-06, "loss": 0.4559, "step": 13874 }, { "epoch": 1.13, "grad_norm": 2.6112562406306647, "learning_rate": 7.181008156718969e-06, "loss": 0.656, "step": 13875 }, { "epoch": 1.13, "grad_norm": 3.46346601593531, "learning_rate": 7.180613619228206e-06, "loss": 0.5725, "step": 13876 }, { "epoch": 1.13, "grad_norm": 3.162121858153935, "learning_rate": 7.180219064970356e-06, "loss": 0.425, "step": 13877 }, { "epoch": 1.13, "grad_norm": 3.3750302099340024, "learning_rate": 7.179824493948455e-06, "loss": 0.6943, "step": 13878 }, { "epoch": 1.13, "grad_norm": 2.421298215040587, "learning_rate": 7.179429906165536e-06, "loss": 0.5966, "step": 13879 }, { "epoch": 1.13, "grad_norm": 3.857748371382513, "learning_rate": 7.179035301624634e-06, "loss": 0.4828, "step": 13880 }, { "epoch": 1.13, "grad_norm": 2.882910367105991, "learning_rate": 7.178640680328782e-06, "loss": 0.6013, "step": 13881 }, { "epoch": 1.13, "grad_norm": 4.021187163042069, "learning_rate": 7.178246042281015e-06, "loss": 0.6163, "step": 13882 }, { "epoch": 1.13, "grad_norm": 3.740447707479144, "learning_rate": 7.177851387484366e-06, "loss": 0.5675, "step": 13883 }, { "epoch": 1.13, "grad_norm": 2.4682595381741272, "learning_rate": 7.177456715941872e-06, "loss": 0.5142, "step": 13884 }, { "epoch": 1.13, "grad_norm": 4.065577893387178, "learning_rate": 7.1770620276565664e-06, "loss": 0.6111, "step": 13885 }, { "epoch": 1.13, "grad_norm": 3.5442062194116515, "learning_rate": 7.176667322631484e-06, "loss": 0.6339, "step": 13886 }, { "epoch": 1.13, "grad_norm": 8.189626846772645, "learning_rate": 7.176272600869658e-06, "loss": 0.6138, "step": 13887 }, { "epoch": 1.13, "grad_norm": 4.035802494212654, "learning_rate": 7.175877862374127e-06, "loss": 0.6243, "step": 13888 }, { "epoch": 1.13, "grad_norm": 6.5514011498489175, "learning_rate": 7.175483107147926e-06, "loss": 0.6103, "step": 13889 }, { "epoch": 1.13, "grad_norm": 3.378438197949367, "learning_rate": 7.175088335194087e-06, "loss": 0.5298, "step": 13890 }, { "epoch": 1.13, "grad_norm": 2.7796497625254704, "learning_rate": 7.174693546515648e-06, "loss": 0.6225, "step": 13891 }, { "epoch": 1.13, "grad_norm": 9.984470715459475, "learning_rate": 7.174298741115644e-06, "loss": 0.6573, "step": 13892 }, { "epoch": 1.13, "grad_norm": 6.784402754254416, "learning_rate": 7.1739039189971095e-06, "loss": 0.5571, "step": 13893 }, { "epoch": 1.13, "grad_norm": 2.8388458596600556, "learning_rate": 7.173509080163083e-06, "loss": 0.5664, "step": 13894 }, { "epoch": 1.13, "grad_norm": 49.990620700172826, "learning_rate": 7.1731142246165975e-06, "loss": 0.6003, "step": 13895 }, { "epoch": 1.13, "grad_norm": 2.1482417742483153, "learning_rate": 7.172719352360692e-06, "loss": 0.5578, "step": 13896 }, { "epoch": 1.13, "grad_norm": 5.4210498332023045, "learning_rate": 7.1723244633984005e-06, "loss": 0.6748, "step": 13897 }, { "epoch": 1.13, "grad_norm": 5.506822762437197, "learning_rate": 7.171929557732761e-06, "loss": 0.5981, "step": 13898 }, { "epoch": 1.13, "grad_norm": 4.055471858355198, "learning_rate": 7.171534635366808e-06, "loss": 0.5702, "step": 13899 }, { "epoch": 1.13, "grad_norm": 2.9263229491751015, "learning_rate": 7.17113969630358e-06, "loss": 0.6373, "step": 13900 }, { "epoch": 1.13, "grad_norm": 3.3697766408580376, "learning_rate": 7.1707447405461125e-06, "loss": 0.6359, "step": 13901 }, { "epoch": 1.13, "grad_norm": 4.0312086507405605, "learning_rate": 7.170349768097443e-06, "loss": 0.5251, "step": 13902 }, { "epoch": 1.13, "grad_norm": 5.423719232380969, "learning_rate": 7.169954778960608e-06, "loss": 0.7085, "step": 13903 }, { "epoch": 1.13, "grad_norm": 4.074180620964624, "learning_rate": 7.169559773138647e-06, "loss": 0.5344, "step": 13904 }, { "epoch": 1.13, "grad_norm": 3.5229812874395168, "learning_rate": 7.169164750634594e-06, "loss": 0.5216, "step": 13905 }, { "epoch": 1.13, "grad_norm": 4.248614185204561, "learning_rate": 7.168769711451488e-06, "loss": 0.6562, "step": 13906 }, { "epoch": 1.13, "grad_norm": 2.9487944776994457, "learning_rate": 7.168374655592365e-06, "loss": 0.5842, "step": 13907 }, { "epoch": 1.13, "grad_norm": 1.8052205599697673, "learning_rate": 7.167979583060265e-06, "loss": 0.4995, "step": 13908 }, { "epoch": 1.13, "grad_norm": 2.9625159902876086, "learning_rate": 7.167584493858225e-06, "loss": 0.551, "step": 13909 }, { "epoch": 1.13, "grad_norm": 7.254494796730688, "learning_rate": 7.167189387989283e-06, "loss": 0.4748, "step": 13910 }, { "epoch": 1.13, "grad_norm": 3.1591906525983178, "learning_rate": 7.166794265456475e-06, "loss": 0.6117, "step": 13911 }, { "epoch": 1.13, "grad_norm": 2.1517150867522874, "learning_rate": 7.166399126262842e-06, "loss": 0.6234, "step": 13912 }, { "epoch": 1.13, "grad_norm": 2.7310674796524035, "learning_rate": 7.16600397041142e-06, "loss": 0.7281, "step": 13913 }, { "epoch": 1.13, "grad_norm": 3.480284811360218, "learning_rate": 7.165608797905249e-06, "loss": 0.6267, "step": 13914 }, { "epoch": 1.13, "grad_norm": 12.568051698152436, "learning_rate": 7.165213608747367e-06, "loss": 0.5812, "step": 13915 }, { "epoch": 1.13, "grad_norm": 3.436715474336368, "learning_rate": 7.164818402940813e-06, "loss": 0.5472, "step": 13916 }, { "epoch": 1.13, "grad_norm": 4.472822346034293, "learning_rate": 7.164423180488625e-06, "loss": 0.7649, "step": 13917 }, { "epoch": 1.13, "grad_norm": 4.587258830421729, "learning_rate": 7.164027941393843e-06, "loss": 0.5744, "step": 13918 }, { "epoch": 1.13, "grad_norm": 2.526690831142121, "learning_rate": 7.163632685659504e-06, "loss": 0.6233, "step": 13919 }, { "epoch": 1.13, "grad_norm": 2.4346480322733797, "learning_rate": 7.1632374132886506e-06, "loss": 0.4204, "step": 13920 }, { "epoch": 1.13, "grad_norm": 2.713576049329069, "learning_rate": 7.1628421242843195e-06, "loss": 0.641, "step": 13921 }, { "epoch": 1.13, "grad_norm": 2.6168031588221403, "learning_rate": 7.16244681864955e-06, "loss": 0.5223, "step": 13922 }, { "epoch": 1.13, "grad_norm": 2.502868536294418, "learning_rate": 7.162051496387382e-06, "loss": 0.5765, "step": 13923 }, { "epoch": 1.13, "grad_norm": 5.876081240191781, "learning_rate": 7.161656157500857e-06, "loss": 0.5308, "step": 13924 }, { "epoch": 1.13, "grad_norm": 2.414597893699812, "learning_rate": 7.161260801993013e-06, "loss": 0.5309, "step": 13925 }, { "epoch": 1.13, "grad_norm": 2.8507010897142995, "learning_rate": 7.160865429866891e-06, "loss": 0.6039, "step": 13926 }, { "epoch": 1.13, "grad_norm": 4.724602346032643, "learning_rate": 7.16047004112553e-06, "loss": 0.6457, "step": 13927 }, { "epoch": 1.13, "grad_norm": 4.895762245453891, "learning_rate": 7.16007463577197e-06, "loss": 0.5523, "step": 13928 }, { "epoch": 1.13, "grad_norm": 4.62593965272068, "learning_rate": 7.159679213809253e-06, "loss": 0.6391, "step": 13929 }, { "epoch": 1.13, "grad_norm": 3.8572038488701037, "learning_rate": 7.159283775240419e-06, "loss": 0.5466, "step": 13930 }, { "epoch": 1.13, "grad_norm": 1.7937155143161267, "learning_rate": 7.158888320068507e-06, "loss": 0.482, "step": 13931 }, { "epoch": 1.13, "grad_norm": 4.508318606547678, "learning_rate": 7.1584928482965586e-06, "loss": 0.6574, "step": 13932 }, { "epoch": 1.13, "grad_norm": 8.163163784060702, "learning_rate": 7.158097359927616e-06, "loss": 0.5256, "step": 13933 }, { "epoch": 1.13, "grad_norm": 4.774549549628053, "learning_rate": 7.157701854964719e-06, "loss": 0.615, "step": 13934 }, { "epoch": 1.13, "grad_norm": 6.143098145913764, "learning_rate": 7.1573063334109085e-06, "loss": 0.5513, "step": 13935 }, { "epoch": 1.13, "grad_norm": 3.786266960761607, "learning_rate": 7.1569107952692255e-06, "loss": 0.4934, "step": 13936 }, { "epoch": 1.13, "grad_norm": 4.202427215202946, "learning_rate": 7.156515240542712e-06, "loss": 0.6978, "step": 13937 }, { "epoch": 1.13, "grad_norm": 3.8030948185834896, "learning_rate": 7.15611966923441e-06, "loss": 0.4359, "step": 13938 }, { "epoch": 1.13, "grad_norm": 3.0354891578202623, "learning_rate": 7.15572408134736e-06, "loss": 0.6155, "step": 13939 }, { "epoch": 1.13, "grad_norm": 3.210985712073173, "learning_rate": 7.155328476884603e-06, "loss": 0.4971, "step": 13940 }, { "epoch": 1.13, "grad_norm": 3.3833210534093885, "learning_rate": 7.154932855849184e-06, "loss": 0.6064, "step": 13941 }, { "epoch": 1.13, "grad_norm": 11.078452034845732, "learning_rate": 7.154537218244142e-06, "loss": 0.4711, "step": 13942 }, { "epoch": 1.13, "grad_norm": 6.777079861804997, "learning_rate": 7.154141564072521e-06, "loss": 0.5551, "step": 13943 }, { "epoch": 1.13, "grad_norm": 6.1910307906881075, "learning_rate": 7.153745893337361e-06, "loss": 0.4641, "step": 13944 }, { "epoch": 1.13, "grad_norm": 5.171532798503356, "learning_rate": 7.153350206041706e-06, "loss": 0.6698, "step": 13945 }, { "epoch": 1.13, "grad_norm": 2.8881978638426244, "learning_rate": 7.152954502188599e-06, "loss": 0.5547, "step": 13946 }, { "epoch": 1.13, "grad_norm": 3.7962827328488213, "learning_rate": 7.152558781781082e-06, "loss": 0.5981, "step": 13947 }, { "epoch": 1.13, "grad_norm": 9.834960910606593, "learning_rate": 7.152163044822197e-06, "loss": 0.7151, "step": 13948 }, { "epoch": 1.13, "grad_norm": 2.454312955107933, "learning_rate": 7.151767291314989e-06, "loss": 0.5448, "step": 13949 }, { "epoch": 1.13, "grad_norm": 3.3624556147759557, "learning_rate": 7.151371521262498e-06, "loss": 0.6813, "step": 13950 }, { "epoch": 1.13, "grad_norm": 4.001379219637956, "learning_rate": 7.150975734667769e-06, "loss": 0.5665, "step": 13951 }, { "epoch": 1.13, "grad_norm": 3.5636964560510527, "learning_rate": 7.150579931533844e-06, "loss": 0.5313, "step": 13952 }, { "epoch": 1.13, "grad_norm": 4.780778811902173, "learning_rate": 7.150184111863768e-06, "loss": 0.5076, "step": 13953 }, { "epoch": 1.13, "grad_norm": 2.3583493059284857, "learning_rate": 7.149788275660585e-06, "loss": 0.6196, "step": 13954 }, { "epoch": 1.13, "grad_norm": 2.647393173341142, "learning_rate": 7.149392422927337e-06, "loss": 0.6494, "step": 13955 }, { "epoch": 1.13, "grad_norm": 3.842129682618392, "learning_rate": 7.1489965536670666e-06, "loss": 0.686, "step": 13956 }, { "epoch": 1.13, "grad_norm": 8.626116757099544, "learning_rate": 7.148600667882821e-06, "loss": 0.5927, "step": 13957 }, { "epoch": 1.13, "grad_norm": 2.422068799728741, "learning_rate": 7.148204765577643e-06, "loss": 0.552, "step": 13958 }, { "epoch": 1.13, "grad_norm": 2.9359730971372464, "learning_rate": 7.147808846754576e-06, "loss": 0.5935, "step": 13959 }, { "epoch": 1.13, "grad_norm": 6.70259163523777, "learning_rate": 7.147412911416664e-06, "loss": 0.514, "step": 13960 }, { "epoch": 1.13, "grad_norm": 4.186502996518931, "learning_rate": 7.147016959566953e-06, "loss": 0.6248, "step": 13961 }, { "epoch": 1.13, "grad_norm": 4.404774312829644, "learning_rate": 7.146620991208486e-06, "loss": 0.6203, "step": 13962 }, { "epoch": 1.13, "grad_norm": 2.700011087551851, "learning_rate": 7.146225006344309e-06, "loss": 0.6009, "step": 13963 }, { "epoch": 1.13, "grad_norm": 2.6353260233051246, "learning_rate": 7.145829004977465e-06, "loss": 0.5804, "step": 13964 }, { "epoch": 1.13, "grad_norm": 3.0009819240038045, "learning_rate": 7.145432987111001e-06, "loss": 0.5519, "step": 13965 }, { "epoch": 1.13, "grad_norm": 2.7416414520584564, "learning_rate": 7.14503695274796e-06, "loss": 0.6597, "step": 13966 }, { "epoch": 1.13, "grad_norm": 3.721123781980044, "learning_rate": 7.144640901891389e-06, "loss": 0.6586, "step": 13967 }, { "epoch": 1.13, "grad_norm": 3.5919646519659296, "learning_rate": 7.144244834544331e-06, "loss": 0.4226, "step": 13968 }, { "epoch": 1.13, "grad_norm": 3.6910003192928236, "learning_rate": 7.143848750709835e-06, "loss": 0.4933, "step": 13969 }, { "epoch": 1.13, "grad_norm": 5.127393070600145, "learning_rate": 7.143452650390944e-06, "loss": 0.614, "step": 13970 }, { "epoch": 1.13, "grad_norm": 3.688656507957647, "learning_rate": 7.143056533590704e-06, "loss": 0.5591, "step": 13971 }, { "epoch": 1.13, "grad_norm": 4.433085422689032, "learning_rate": 7.14266040031216e-06, "loss": 0.6977, "step": 13972 }, { "epoch": 1.13, "grad_norm": 3.3030169444795527, "learning_rate": 7.14226425055836e-06, "loss": 0.5886, "step": 13973 }, { "epoch": 1.13, "grad_norm": 6.788700585354247, "learning_rate": 7.141868084332349e-06, "loss": 0.6239, "step": 13974 }, { "epoch": 1.14, "grad_norm": 5.092288471339357, "learning_rate": 7.141471901637173e-06, "loss": 0.5107, "step": 13975 }, { "epoch": 1.14, "grad_norm": 3.0847450809819947, "learning_rate": 7.141075702475878e-06, "loss": 0.5263, "step": 13976 }, { "epoch": 1.14, "grad_norm": 3.2140987235900393, "learning_rate": 7.140679486851509e-06, "loss": 0.6384, "step": 13977 }, { "epoch": 1.14, "grad_norm": 3.9898147884053135, "learning_rate": 7.140283254767118e-06, "loss": 0.5706, "step": 13978 }, { "epoch": 1.14, "grad_norm": 2.9719987561485586, "learning_rate": 7.139887006225747e-06, "loss": 0.5458, "step": 13979 }, { "epoch": 1.14, "grad_norm": 5.363355221068068, "learning_rate": 7.139490741230444e-06, "loss": 0.6443, "step": 13980 }, { "epoch": 1.14, "grad_norm": 4.579838594789887, "learning_rate": 7.139094459784254e-06, "loss": 0.5244, "step": 13981 }, { "epoch": 1.14, "grad_norm": 3.6841996887661916, "learning_rate": 7.138698161890228e-06, "loss": 0.6745, "step": 13982 }, { "epoch": 1.14, "grad_norm": 6.788198125805596, "learning_rate": 7.138301847551411e-06, "loss": 0.5236, "step": 13983 }, { "epoch": 1.14, "grad_norm": 8.8394044651797, "learning_rate": 7.13790551677085e-06, "loss": 0.4088, "step": 13984 }, { "epoch": 1.14, "grad_norm": 3.8168957828482104, "learning_rate": 7.137509169551592e-06, "loss": 0.5996, "step": 13985 }, { "epoch": 1.14, "grad_norm": 2.718926860154682, "learning_rate": 7.1371128058966864e-06, "loss": 0.7695, "step": 13986 }, { "epoch": 1.14, "grad_norm": 3.6345128264086712, "learning_rate": 7.13671642580918e-06, "loss": 0.5572, "step": 13987 }, { "epoch": 1.14, "grad_norm": 1.6837277051765758, "learning_rate": 7.136320029292122e-06, "loss": 0.5062, "step": 13988 }, { "epoch": 1.14, "grad_norm": 3.408500915513878, "learning_rate": 7.1359236163485564e-06, "loss": 0.5344, "step": 13989 }, { "epoch": 1.14, "grad_norm": 6.367493853190697, "learning_rate": 7.1355271869815365e-06, "loss": 0.8157, "step": 13990 }, { "epoch": 1.14, "grad_norm": 5.379678545397891, "learning_rate": 7.135130741194107e-06, "loss": 0.6107, "step": 13991 }, { "epoch": 1.14, "grad_norm": 4.485392412505868, "learning_rate": 7.134734278989317e-06, "loss": 0.493, "step": 13992 }, { "epoch": 1.14, "grad_norm": 3.228221703028478, "learning_rate": 7.134337800370215e-06, "loss": 0.4726, "step": 13993 }, { "epoch": 1.14, "grad_norm": 3.5045283426093894, "learning_rate": 7.133941305339849e-06, "loss": 0.6206, "step": 13994 }, { "epoch": 1.14, "grad_norm": 3.6389969507063804, "learning_rate": 7.133544793901269e-06, "loss": 0.6038, "step": 13995 }, { "epoch": 1.14, "grad_norm": 3.3681824363722748, "learning_rate": 7.133148266057524e-06, "loss": 0.4501, "step": 13996 }, { "epoch": 1.14, "grad_norm": 3.467226798304162, "learning_rate": 7.13275172181166e-06, "loss": 0.5774, "step": 13997 }, { "epoch": 1.14, "grad_norm": 2.6047589715568207, "learning_rate": 7.132355161166731e-06, "loss": 0.566, "step": 13998 }, { "epoch": 1.14, "grad_norm": 12.318717076757398, "learning_rate": 7.131958584125782e-06, "loss": 0.6792, "step": 13999 }, { "epoch": 1.14, "grad_norm": 3.4507065763203415, "learning_rate": 7.131561990691864e-06, "loss": 0.4846, "step": 14000 }, { "epoch": 1.14, "grad_norm": 6.242212347105321, "learning_rate": 7.131165380868026e-06, "loss": 0.5442, "step": 14001 }, { "epoch": 1.14, "grad_norm": 6.930273107287269, "learning_rate": 7.130768754657319e-06, "loss": 0.6367, "step": 14002 }, { "epoch": 1.14, "grad_norm": 2.8929932563322205, "learning_rate": 7.130372112062791e-06, "loss": 0.5218, "step": 14003 }, { "epoch": 1.14, "grad_norm": 4.39733226831647, "learning_rate": 7.1299754530874936e-06, "loss": 0.5871, "step": 14004 }, { "epoch": 1.14, "grad_norm": 17.02333657347272, "learning_rate": 7.129578777734472e-06, "loss": 0.5311, "step": 14005 }, { "epoch": 1.14, "grad_norm": 3.039717864113113, "learning_rate": 7.129182086006784e-06, "loss": 0.7625, "step": 14006 }, { "epoch": 1.14, "grad_norm": 8.325642489843645, "learning_rate": 7.128785377907475e-06, "loss": 0.5331, "step": 14007 }, { "epoch": 1.14, "grad_norm": 3.6732080647469347, "learning_rate": 7.128388653439595e-06, "loss": 0.499, "step": 14008 }, { "epoch": 1.14, "grad_norm": 7.177687497926602, "learning_rate": 7.127991912606196e-06, "loss": 0.4268, "step": 14009 }, { "epoch": 1.14, "grad_norm": 2.7350986391699617, "learning_rate": 7.127595155410329e-06, "loss": 0.6696, "step": 14010 }, { "epoch": 1.14, "grad_norm": 11.704710100305793, "learning_rate": 7.1271983818550426e-06, "loss": 0.7394, "step": 14011 }, { "epoch": 1.14, "grad_norm": 6.599395200235075, "learning_rate": 7.126801591943389e-06, "loss": 0.4981, "step": 14012 }, { "epoch": 1.14, "grad_norm": 4.0670380243567905, "learning_rate": 7.12640478567842e-06, "loss": 0.6661, "step": 14013 }, { "epoch": 1.14, "grad_norm": 3.709272454058723, "learning_rate": 7.126007963063186e-06, "loss": 0.5569, "step": 14014 }, { "epoch": 1.14, "grad_norm": 3.4942600626040203, "learning_rate": 7.125611124100739e-06, "loss": 0.556, "step": 14015 }, { "epoch": 1.14, "grad_norm": 2.4631165118017972, "learning_rate": 7.125214268794129e-06, "loss": 0.5246, "step": 14016 }, { "epoch": 1.14, "grad_norm": 2.485823977905314, "learning_rate": 7.1248173971464065e-06, "loss": 0.5405, "step": 14017 }, { "epoch": 1.14, "grad_norm": 3.2236929857975953, "learning_rate": 7.124420509160626e-06, "loss": 0.681, "step": 14018 }, { "epoch": 1.14, "grad_norm": 8.685896867173078, "learning_rate": 7.124023604839836e-06, "loss": 0.5715, "step": 14019 }, { "epoch": 1.14, "grad_norm": 3.3273328093794254, "learning_rate": 7.123626684187092e-06, "loss": 0.5432, "step": 14020 }, { "epoch": 1.14, "grad_norm": 3.529042101830504, "learning_rate": 7.123229747205442e-06, "loss": 0.6158, "step": 14021 }, { "epoch": 1.14, "grad_norm": 3.776446771979209, "learning_rate": 7.1228327938979435e-06, "loss": 0.761, "step": 14022 }, { "epoch": 1.14, "grad_norm": 3.181729733610844, "learning_rate": 7.122435824267644e-06, "loss": 0.5828, "step": 14023 }, { "epoch": 1.14, "grad_norm": 7.813596500994247, "learning_rate": 7.122038838317598e-06, "loss": 0.4946, "step": 14024 }, { "epoch": 1.14, "grad_norm": 2.890600938310227, "learning_rate": 7.121641836050855e-06, "loss": 0.5451, "step": 14025 }, { "epoch": 1.14, "grad_norm": 4.225950405658553, "learning_rate": 7.121244817470472e-06, "loss": 0.5887, "step": 14026 }, { "epoch": 1.14, "grad_norm": 2.761608940991486, "learning_rate": 7.1208477825795e-06, "loss": 0.5644, "step": 14027 }, { "epoch": 1.14, "grad_norm": 2.5772166823562412, "learning_rate": 7.120450731380991e-06, "loss": 0.7592, "step": 14028 }, { "epoch": 1.14, "grad_norm": 2.742152906290321, "learning_rate": 7.120053663877997e-06, "loss": 0.5988, "step": 14029 }, { "epoch": 1.14, "grad_norm": 6.385341121748592, "learning_rate": 7.119656580073575e-06, "loss": 0.5873, "step": 14030 }, { "epoch": 1.14, "grad_norm": 2.3756375522211814, "learning_rate": 7.119259479970775e-06, "loss": 0.4631, "step": 14031 }, { "epoch": 1.14, "grad_norm": 2.6471526773981915, "learning_rate": 7.1188623635726515e-06, "loss": 0.5567, "step": 14032 }, { "epoch": 1.14, "grad_norm": 6.804030897702289, "learning_rate": 7.118465230882258e-06, "loss": 0.6458, "step": 14033 }, { "epoch": 1.14, "grad_norm": 6.329285581444827, "learning_rate": 7.118068081902647e-06, "loss": 0.6643, "step": 14034 }, { "epoch": 1.14, "grad_norm": 3.0937024644037896, "learning_rate": 7.117670916636874e-06, "loss": 0.5862, "step": 14035 }, { "epoch": 1.14, "grad_norm": 6.433043270063747, "learning_rate": 7.117273735087993e-06, "loss": 0.6547, "step": 14036 }, { "epoch": 1.14, "grad_norm": 5.843080992640058, "learning_rate": 7.116876537259054e-06, "loss": 0.5489, "step": 14037 }, { "epoch": 1.14, "grad_norm": 3.656064659562861, "learning_rate": 7.116479323153116e-06, "loss": 0.5703, "step": 14038 }, { "epoch": 1.14, "grad_norm": 5.3032634176140645, "learning_rate": 7.116082092773231e-06, "loss": 0.5514, "step": 14039 }, { "epoch": 1.14, "grad_norm": 2.7270044372363675, "learning_rate": 7.1156848461224545e-06, "loss": 0.6904, "step": 14040 }, { "epoch": 1.14, "grad_norm": 2.6540333651140378, "learning_rate": 7.115287583203839e-06, "loss": 0.6183, "step": 14041 }, { "epoch": 1.14, "grad_norm": 4.009342461023394, "learning_rate": 7.114890304020441e-06, "loss": 0.4273, "step": 14042 }, { "epoch": 1.14, "grad_norm": 2.681541107577118, "learning_rate": 7.114493008575315e-06, "loss": 0.5448, "step": 14043 }, { "epoch": 1.14, "grad_norm": 6.476858165003556, "learning_rate": 7.1140956968715154e-06, "loss": 0.4766, "step": 14044 }, { "epoch": 1.14, "grad_norm": 2.127736994985274, "learning_rate": 7.113698368912096e-06, "loss": 0.5371, "step": 14045 }, { "epoch": 1.14, "grad_norm": 6.492346649264869, "learning_rate": 7.113301024700115e-06, "loss": 0.5217, "step": 14046 }, { "epoch": 1.14, "grad_norm": 3.857623321669221, "learning_rate": 7.112903664238624e-06, "loss": 0.5586, "step": 14047 }, { "epoch": 1.14, "grad_norm": 4.231953093247358, "learning_rate": 7.112506287530682e-06, "loss": 0.5895, "step": 14048 }, { "epoch": 1.14, "grad_norm": 2.7119821708871474, "learning_rate": 7.11210889457934e-06, "loss": 0.596, "step": 14049 }, { "epoch": 1.14, "grad_norm": 3.2373965081330343, "learning_rate": 7.111711485387659e-06, "loss": 0.667, "step": 14050 }, { "epoch": 1.14, "grad_norm": 3.1633006482257935, "learning_rate": 7.111314059958692e-06, "loss": 0.6395, "step": 14051 }, { "epoch": 1.14, "grad_norm": 6.181949659053406, "learning_rate": 7.110916618295493e-06, "loss": 0.6003, "step": 14052 }, { "epoch": 1.14, "grad_norm": 3.4255721513063606, "learning_rate": 7.11051916040112e-06, "loss": 0.5798, "step": 14053 }, { "epoch": 1.14, "grad_norm": 3.6079143569189767, "learning_rate": 7.110121686278631e-06, "loss": 0.6137, "step": 14054 }, { "epoch": 1.14, "grad_norm": 5.87133046763248, "learning_rate": 7.109724195931078e-06, "loss": 0.525, "step": 14055 }, { "epoch": 1.14, "grad_norm": 3.293642929718121, "learning_rate": 7.109326689361521e-06, "loss": 0.5557, "step": 14056 }, { "epoch": 1.14, "grad_norm": 3.62988946486616, "learning_rate": 7.108929166573014e-06, "loss": 0.5175, "step": 14057 }, { "epoch": 1.14, "grad_norm": 4.763547408904908, "learning_rate": 7.108531627568615e-06, "loss": 0.5919, "step": 14058 }, { "epoch": 1.14, "grad_norm": 13.518437752041928, "learning_rate": 7.108134072351381e-06, "loss": 0.5254, "step": 14059 }, { "epoch": 1.14, "grad_norm": 2.002461124491446, "learning_rate": 7.107736500924369e-06, "loss": 0.5139, "step": 14060 }, { "epoch": 1.14, "grad_norm": 5.21698623549741, "learning_rate": 7.107338913290635e-06, "loss": 0.5652, "step": 14061 }, { "epoch": 1.14, "grad_norm": 4.624539691500908, "learning_rate": 7.106941309453235e-06, "loss": 0.5224, "step": 14062 }, { "epoch": 1.14, "grad_norm": 8.93781793302301, "learning_rate": 7.106543689415228e-06, "loss": 0.5506, "step": 14063 }, { "epoch": 1.14, "grad_norm": 2.600324887881603, "learning_rate": 7.106146053179672e-06, "loss": 0.5911, "step": 14064 }, { "epoch": 1.14, "grad_norm": 2.955799788303194, "learning_rate": 7.105748400749624e-06, "loss": 0.6704, "step": 14065 }, { "epoch": 1.14, "grad_norm": 4.081984541821989, "learning_rate": 7.10535073212814e-06, "loss": 0.6583, "step": 14066 }, { "epoch": 1.14, "grad_norm": 4.248804887688949, "learning_rate": 7.10495304731828e-06, "loss": 0.6981, "step": 14067 }, { "epoch": 1.14, "grad_norm": 6.92853198105978, "learning_rate": 7.104555346323098e-06, "loss": 0.5545, "step": 14068 }, { "epoch": 1.14, "grad_norm": 3.8659932647849367, "learning_rate": 7.104157629145658e-06, "loss": 0.618, "step": 14069 }, { "epoch": 1.14, "grad_norm": 3.1260199424631945, "learning_rate": 7.103759895789013e-06, "loss": 0.5967, "step": 14070 }, { "epoch": 1.14, "grad_norm": 3.1766996080036813, "learning_rate": 7.103362146256223e-06, "loss": 0.5136, "step": 14071 }, { "epoch": 1.14, "grad_norm": 3.8031647423475508, "learning_rate": 7.102964380550348e-06, "loss": 0.6074, "step": 14072 }, { "epoch": 1.14, "grad_norm": 9.07410922745925, "learning_rate": 7.102566598674443e-06, "loss": 0.6495, "step": 14073 }, { "epoch": 1.14, "grad_norm": 3.9226776713759994, "learning_rate": 7.102168800631569e-06, "loss": 0.5438, "step": 14074 }, { "epoch": 1.14, "grad_norm": 3.9220484951316323, "learning_rate": 7.101770986424785e-06, "loss": 0.577, "step": 14075 }, { "epoch": 1.14, "grad_norm": 3.970188428490416, "learning_rate": 7.101373156057148e-06, "loss": 0.5468, "step": 14076 }, { "epoch": 1.14, "grad_norm": 4.903892395965639, "learning_rate": 7.10097530953172e-06, "loss": 0.7002, "step": 14077 }, { "epoch": 1.14, "grad_norm": 4.093239525825034, "learning_rate": 7.100577446851555e-06, "loss": 0.5743, "step": 14078 }, { "epoch": 1.14, "grad_norm": 2.897996512013464, "learning_rate": 7.100179568019719e-06, "loss": 0.6799, "step": 14079 }, { "epoch": 1.14, "grad_norm": 3.735222923519625, "learning_rate": 7.099781673039265e-06, "loss": 0.5399, "step": 14080 }, { "epoch": 1.14, "grad_norm": 2.868410546521567, "learning_rate": 7.099383761913257e-06, "loss": 0.6331, "step": 14081 }, { "epoch": 1.14, "grad_norm": 5.462939334244097, "learning_rate": 7.0989858346447515e-06, "loss": 0.4825, "step": 14082 }, { "epoch": 1.14, "grad_norm": 7.11011658215929, "learning_rate": 7.098587891236811e-06, "loss": 0.5513, "step": 14083 }, { "epoch": 1.14, "grad_norm": 3.451756389908537, "learning_rate": 7.098189931692494e-06, "loss": 0.5442, "step": 14084 }, { "epoch": 1.14, "grad_norm": 4.842599459390585, "learning_rate": 7.097791956014859e-06, "loss": 0.733, "step": 14085 }, { "epoch": 1.14, "grad_norm": 3.112264745473636, "learning_rate": 7.097393964206968e-06, "loss": 0.6751, "step": 14086 }, { "epoch": 1.14, "grad_norm": 2.811174693668927, "learning_rate": 7.096995956271881e-06, "loss": 0.742, "step": 14087 }, { "epoch": 1.14, "grad_norm": 3.405638353854878, "learning_rate": 7.0965979322126574e-06, "loss": 0.5813, "step": 14088 }, { "epoch": 1.14, "grad_norm": 3.6003122621451245, "learning_rate": 7.096199892032359e-06, "loss": 0.6734, "step": 14089 }, { "epoch": 1.14, "grad_norm": 2.7739248661018934, "learning_rate": 7.095801835734046e-06, "loss": 0.5432, "step": 14090 }, { "epoch": 1.14, "grad_norm": 3.5569595655297386, "learning_rate": 7.095403763320777e-06, "loss": 0.6025, "step": 14091 }, { "epoch": 1.14, "grad_norm": 4.584418015281892, "learning_rate": 7.095005674795616e-06, "loss": 0.6391, "step": 14092 }, { "epoch": 1.14, "grad_norm": 3.6938939539169318, "learning_rate": 7.094607570161625e-06, "loss": 0.6481, "step": 14093 }, { "epoch": 1.14, "grad_norm": 5.1040573879123325, "learning_rate": 7.09420944942186e-06, "loss": 0.5178, "step": 14094 }, { "epoch": 1.14, "grad_norm": 3.2501333268378096, "learning_rate": 7.093811312579385e-06, "loss": 0.6699, "step": 14095 }, { "epoch": 1.14, "grad_norm": 2.726086705995903, "learning_rate": 7.0934131596372615e-06, "loss": 0.5765, "step": 14096 }, { "epoch": 1.14, "grad_norm": 4.575038192925408, "learning_rate": 7.0930149905985525e-06, "loss": 0.5689, "step": 14097 }, { "epoch": 1.15, "grad_norm": 12.649958184097981, "learning_rate": 7.092616805466316e-06, "loss": 0.6007, "step": 14098 }, { "epoch": 1.15, "grad_norm": 4.656171235696052, "learning_rate": 7.092218604243615e-06, "loss": 0.6753, "step": 14099 }, { "epoch": 1.15, "grad_norm": 2.2936373055637858, "learning_rate": 7.091820386933513e-06, "loss": 0.5615, "step": 14100 }, { "epoch": 1.15, "grad_norm": 3.3618159627256192, "learning_rate": 7.091422153539072e-06, "loss": 0.6564, "step": 14101 }, { "epoch": 1.15, "grad_norm": 3.208953388488813, "learning_rate": 7.091023904063352e-06, "loss": 0.5159, "step": 14102 }, { "epoch": 1.15, "grad_norm": 3.3376953747366103, "learning_rate": 7.0906256385094145e-06, "loss": 0.6552, "step": 14103 }, { "epoch": 1.15, "grad_norm": 4.059295072807514, "learning_rate": 7.090227356880325e-06, "loss": 0.6571, "step": 14104 }, { "epoch": 1.15, "grad_norm": 3.5955086851826072, "learning_rate": 7.089829059179145e-06, "loss": 0.4719, "step": 14105 }, { "epoch": 1.15, "grad_norm": 2.7183274192340905, "learning_rate": 7.089430745408936e-06, "loss": 0.6439, "step": 14106 }, { "epoch": 1.15, "grad_norm": 9.77472418476916, "learning_rate": 7.08903241557276e-06, "loss": 0.5568, "step": 14107 }, { "epoch": 1.15, "grad_norm": 3.247580974979504, "learning_rate": 7.088634069673683e-06, "loss": 0.5631, "step": 14108 }, { "epoch": 1.15, "grad_norm": 2.6667231495117663, "learning_rate": 7.088235707714763e-06, "loss": 0.5781, "step": 14109 }, { "epoch": 1.15, "grad_norm": 3.2584384235731254, "learning_rate": 7.0878373296990685e-06, "loss": 0.5411, "step": 14110 }, { "epoch": 1.15, "grad_norm": 11.403141698690456, "learning_rate": 7.087438935629659e-06, "loss": 0.5395, "step": 14111 }, { "epoch": 1.15, "grad_norm": 2.2351864915359125, "learning_rate": 7.0870405255096e-06, "loss": 0.5342, "step": 14112 }, { "epoch": 1.15, "grad_norm": 2.822381014634726, "learning_rate": 7.0866420993419535e-06, "loss": 0.5907, "step": 14113 }, { "epoch": 1.15, "grad_norm": 3.6866002641339866, "learning_rate": 7.086243657129784e-06, "loss": 0.5102, "step": 14114 }, { "epoch": 1.15, "grad_norm": 7.3050480577659815, "learning_rate": 7.085845198876154e-06, "loss": 0.6958, "step": 14115 }, { "epoch": 1.15, "grad_norm": 5.99677284558523, "learning_rate": 7.085446724584129e-06, "loss": 0.6622, "step": 14116 }, { "epoch": 1.15, "grad_norm": 2.922262982628735, "learning_rate": 7.085048234256771e-06, "loss": 0.5063, "step": 14117 }, { "epoch": 1.15, "grad_norm": 4.52880605265708, "learning_rate": 7.084649727897145e-06, "loss": 0.5515, "step": 14118 }, { "epoch": 1.15, "grad_norm": 3.243147010824444, "learning_rate": 7.084251205508315e-06, "loss": 0.5647, "step": 14119 }, { "epoch": 1.15, "grad_norm": 2.470191200775277, "learning_rate": 7.083852667093346e-06, "loss": 0.5193, "step": 14120 }, { "epoch": 1.15, "grad_norm": 2.5320749709149935, "learning_rate": 7.083454112655302e-06, "loss": 0.5184, "step": 14121 }, { "epoch": 1.15, "grad_norm": 4.390295479158579, "learning_rate": 7.083055542197248e-06, "loss": 0.5423, "step": 14122 }, { "epoch": 1.15, "grad_norm": 3.066019523434415, "learning_rate": 7.082656955722247e-06, "loss": 0.5862, "step": 14123 }, { "epoch": 1.15, "grad_norm": 18.12779303562252, "learning_rate": 7.082258353233365e-06, "loss": 0.5815, "step": 14124 }, { "epoch": 1.15, "grad_norm": 3.4149559418994193, "learning_rate": 7.081859734733667e-06, "loss": 0.4436, "step": 14125 }, { "epoch": 1.15, "grad_norm": 14.92517512936975, "learning_rate": 7.0814611002262194e-06, "loss": 0.6932, "step": 14126 }, { "epoch": 1.15, "grad_norm": 2.2794132881214884, "learning_rate": 7.081062449714084e-06, "loss": 0.6581, "step": 14127 }, { "epoch": 1.15, "grad_norm": 3.5659905356015447, "learning_rate": 7.080663783200328e-06, "loss": 0.5788, "step": 14128 }, { "epoch": 1.15, "grad_norm": 3.706520014161506, "learning_rate": 7.080265100688018e-06, "loss": 0.628, "step": 14129 }, { "epoch": 1.15, "grad_norm": 4.35230767820653, "learning_rate": 7.079866402180218e-06, "loss": 0.5671, "step": 14130 }, { "epoch": 1.15, "grad_norm": 2.5077247290695155, "learning_rate": 7.079467687679993e-06, "loss": 0.5825, "step": 14131 }, { "epoch": 1.15, "grad_norm": 3.1462337223422794, "learning_rate": 7.079068957190409e-06, "loss": 0.6244, "step": 14132 }, { "epoch": 1.15, "grad_norm": 7.107115742882602, "learning_rate": 7.078670210714536e-06, "loss": 0.6741, "step": 14133 }, { "epoch": 1.15, "grad_norm": 3.1053063413149697, "learning_rate": 7.078271448255434e-06, "loss": 0.5557, "step": 14134 }, { "epoch": 1.15, "grad_norm": 2.4951065382384936, "learning_rate": 7.077872669816172e-06, "loss": 0.5443, "step": 14135 }, { "epoch": 1.15, "grad_norm": 3.4341544438330183, "learning_rate": 7.077473875399816e-06, "loss": 0.7928, "step": 14136 }, { "epoch": 1.15, "grad_norm": 2.306575633269179, "learning_rate": 7.0770750650094335e-06, "loss": 0.6348, "step": 14137 }, { "epoch": 1.15, "grad_norm": 2.9829238856004805, "learning_rate": 7.076676238648089e-06, "loss": 0.5296, "step": 14138 }, { "epoch": 1.15, "grad_norm": 2.641276243233299, "learning_rate": 7.0762773963188495e-06, "loss": 0.6552, "step": 14139 }, { "epoch": 1.15, "grad_norm": 4.033204496628203, "learning_rate": 7.075878538024783e-06, "loss": 0.5109, "step": 14140 }, { "epoch": 1.15, "grad_norm": 3.1348349710619723, "learning_rate": 7.075479663768957e-06, "loss": 0.6851, "step": 14141 }, { "epoch": 1.15, "grad_norm": 2.8099052621020646, "learning_rate": 7.075080773554437e-06, "loss": 0.5616, "step": 14142 }, { "epoch": 1.15, "grad_norm": 3.8927122830835437, "learning_rate": 7.0746818673842884e-06, "loss": 0.5616, "step": 14143 }, { "epoch": 1.15, "grad_norm": 4.135964918894633, "learning_rate": 7.074282945261581e-06, "loss": 0.6813, "step": 14144 }, { "epoch": 1.15, "grad_norm": 2.8281807827288974, "learning_rate": 7.073884007189383e-06, "loss": 0.5875, "step": 14145 }, { "epoch": 1.15, "grad_norm": 2.5513601234085352, "learning_rate": 7.073485053170761e-06, "loss": 0.4731, "step": 14146 }, { "epoch": 1.15, "grad_norm": 4.4011919842333675, "learning_rate": 7.07308608320878e-06, "loss": 0.6637, "step": 14147 }, { "epoch": 1.15, "grad_norm": 3.7373945575770655, "learning_rate": 7.072687097306512e-06, "loss": 0.5743, "step": 14148 }, { "epoch": 1.15, "grad_norm": 2.475161611195808, "learning_rate": 7.0722880954670215e-06, "loss": 0.5747, "step": 14149 }, { "epoch": 1.15, "grad_norm": 2.486059182959368, "learning_rate": 7.071889077693378e-06, "loss": 0.6948, "step": 14150 }, { "epoch": 1.15, "grad_norm": 5.830111930499957, "learning_rate": 7.071490043988649e-06, "loss": 0.6594, "step": 14151 }, { "epoch": 1.15, "grad_norm": 3.7657416059330777, "learning_rate": 7.071090994355904e-06, "loss": 0.7021, "step": 14152 }, { "epoch": 1.15, "grad_norm": 8.728802268459566, "learning_rate": 7.07069192879821e-06, "loss": 0.5258, "step": 14153 }, { "epoch": 1.15, "grad_norm": 3.9561275292081812, "learning_rate": 7.070292847318636e-06, "loss": 0.5427, "step": 14154 }, { "epoch": 1.15, "grad_norm": 2.916833737248854, "learning_rate": 7.06989374992025e-06, "loss": 0.5892, "step": 14155 }, { "epoch": 1.15, "grad_norm": 5.290824947251644, "learning_rate": 7.069494636606121e-06, "loss": 0.5325, "step": 14156 }, { "epoch": 1.15, "grad_norm": 3.6321110330165034, "learning_rate": 7.069095507379319e-06, "loss": 0.7114, "step": 14157 }, { "epoch": 1.15, "grad_norm": 3.6899015568475297, "learning_rate": 7.068696362242912e-06, "loss": 0.5574, "step": 14158 }, { "epoch": 1.15, "grad_norm": 3.4814257110268603, "learning_rate": 7.068297201199969e-06, "loss": 0.5065, "step": 14159 }, { "epoch": 1.15, "grad_norm": 2.8971998233592093, "learning_rate": 7.067898024253559e-06, "loss": 0.5867, "step": 14160 }, { "epoch": 1.15, "grad_norm": 3.230370994885463, "learning_rate": 7.067498831406751e-06, "loss": 0.6415, "step": 14161 }, { "epoch": 1.15, "grad_norm": 4.054201325148742, "learning_rate": 7.067099622662618e-06, "loss": 0.6904, "step": 14162 }, { "epoch": 1.15, "grad_norm": 16.39179449732065, "learning_rate": 7.066700398024225e-06, "loss": 0.6, "step": 14163 }, { "epoch": 1.15, "grad_norm": 2.318860455813238, "learning_rate": 7.066301157494641e-06, "loss": 0.5265, "step": 14164 }, { "epoch": 1.15, "grad_norm": 3.896431255934566, "learning_rate": 7.0659019010769415e-06, "loss": 0.4681, "step": 14165 }, { "epoch": 1.15, "grad_norm": 3.134207206228953, "learning_rate": 7.065502628774193e-06, "loss": 0.5775, "step": 14166 }, { "epoch": 1.15, "grad_norm": 2.5381122116601778, "learning_rate": 7.065103340589466e-06, "loss": 0.5491, "step": 14167 }, { "epoch": 1.15, "grad_norm": 7.798928323928904, "learning_rate": 7.064704036525829e-06, "loss": 0.5972, "step": 14168 }, { "epoch": 1.15, "grad_norm": 3.566298411881335, "learning_rate": 7.064304716586354e-06, "loss": 0.6204, "step": 14169 }, { "epoch": 1.15, "grad_norm": 4.580900525922911, "learning_rate": 7.063905380774112e-06, "loss": 0.5264, "step": 14170 }, { "epoch": 1.15, "grad_norm": 5.044030661659421, "learning_rate": 7.063506029092173e-06, "loss": 0.6364, "step": 14171 }, { "epoch": 1.15, "grad_norm": 5.258222859950595, "learning_rate": 7.063106661543606e-06, "loss": 0.5465, "step": 14172 }, { "epoch": 1.15, "grad_norm": 5.07461770642589, "learning_rate": 7.062707278131485e-06, "loss": 0.6461, "step": 14173 }, { "epoch": 1.15, "grad_norm": 3.0162060080548314, "learning_rate": 7.062307878858877e-06, "loss": 0.709, "step": 14174 }, { "epoch": 1.15, "grad_norm": 2.7004139744413926, "learning_rate": 7.0619084637288574e-06, "loss": 0.5251, "step": 14175 }, { "epoch": 1.15, "grad_norm": 3.531640933137738, "learning_rate": 7.0615090327444935e-06, "loss": 0.5967, "step": 14176 }, { "epoch": 1.15, "grad_norm": 2.4479087366468577, "learning_rate": 7.061109585908858e-06, "loss": 0.6044, "step": 14177 }, { "epoch": 1.15, "grad_norm": 3.505476908621678, "learning_rate": 7.060710123225025e-06, "loss": 0.4353, "step": 14178 }, { "epoch": 1.15, "grad_norm": 3.565731847618166, "learning_rate": 7.060310644696062e-06, "loss": 0.5569, "step": 14179 }, { "epoch": 1.15, "grad_norm": 2.907161365099284, "learning_rate": 7.059911150325043e-06, "loss": 0.6003, "step": 14180 }, { "epoch": 1.15, "grad_norm": 3.7061213433323945, "learning_rate": 7.059511640115038e-06, "loss": 0.548, "step": 14181 }, { "epoch": 1.15, "grad_norm": 2.1972393226108906, "learning_rate": 7.059112114069121e-06, "loss": 0.5637, "step": 14182 }, { "epoch": 1.15, "grad_norm": 2.5087399276062468, "learning_rate": 7.058712572190362e-06, "loss": 0.6655, "step": 14183 }, { "epoch": 1.15, "grad_norm": 3.9807149605159298, "learning_rate": 7.0583130144818345e-06, "loss": 0.5794, "step": 14184 }, { "epoch": 1.15, "grad_norm": 2.779790463183691, "learning_rate": 7.057913440946611e-06, "loss": 0.5326, "step": 14185 }, { "epoch": 1.15, "grad_norm": 2.0242194908515527, "learning_rate": 7.057513851587763e-06, "loss": 0.6512, "step": 14186 }, { "epoch": 1.15, "grad_norm": 3.0648594562789664, "learning_rate": 7.057114246408363e-06, "loss": 0.5705, "step": 14187 }, { "epoch": 1.15, "grad_norm": 2.749251205679122, "learning_rate": 7.056714625411482e-06, "loss": 0.5764, "step": 14188 }, { "epoch": 1.15, "grad_norm": 3.6710502819411164, "learning_rate": 7.056314988600198e-06, "loss": 0.6023, "step": 14189 }, { "epoch": 1.15, "grad_norm": 2.841614284212581, "learning_rate": 7.055915335977579e-06, "loss": 0.5274, "step": 14190 }, { "epoch": 1.15, "grad_norm": 3.1849817079290332, "learning_rate": 7.0555156675466994e-06, "loss": 0.6975, "step": 14191 }, { "epoch": 1.15, "grad_norm": 6.19683003953635, "learning_rate": 7.055115983310632e-06, "loss": 0.5268, "step": 14192 }, { "epoch": 1.15, "grad_norm": 1.9388549574697502, "learning_rate": 7.054716283272451e-06, "loss": 0.4735, "step": 14193 }, { "epoch": 1.15, "grad_norm": 6.809605985542842, "learning_rate": 7.054316567435231e-06, "loss": 0.642, "step": 14194 }, { "epoch": 1.15, "grad_norm": 2.4303950371227496, "learning_rate": 7.053916835802042e-06, "loss": 0.6966, "step": 14195 }, { "epoch": 1.15, "grad_norm": 5.101884248414051, "learning_rate": 7.053517088375959e-06, "loss": 0.7299, "step": 14196 }, { "epoch": 1.15, "grad_norm": 4.98381854556617, "learning_rate": 7.053117325160055e-06, "loss": 0.5358, "step": 14197 }, { "epoch": 1.15, "grad_norm": 3.8788729306803917, "learning_rate": 7.052717546157407e-06, "loss": 0.6807, "step": 14198 }, { "epoch": 1.15, "grad_norm": 6.322838976019162, "learning_rate": 7.052317751371086e-06, "loss": 0.5928, "step": 14199 }, { "epoch": 1.15, "grad_norm": 2.537546204200766, "learning_rate": 7.051917940804166e-06, "loss": 0.6548, "step": 14200 }, { "epoch": 1.15, "grad_norm": 2.682024910459504, "learning_rate": 7.051518114459723e-06, "loss": 0.6621, "step": 14201 }, { "epoch": 1.15, "grad_norm": 4.696853423223693, "learning_rate": 7.051118272340831e-06, "loss": 0.5416, "step": 14202 }, { "epoch": 1.15, "grad_norm": 2.9290480436392157, "learning_rate": 7.050718414450563e-06, "loss": 0.5128, "step": 14203 }, { "epoch": 1.15, "grad_norm": 3.849270822665608, "learning_rate": 7.050318540791994e-06, "loss": 0.5966, "step": 14204 }, { "epoch": 1.15, "grad_norm": 5.1453802120454775, "learning_rate": 7.0499186513682e-06, "loss": 0.6246, "step": 14205 }, { "epoch": 1.15, "grad_norm": 4.787934275303903, "learning_rate": 7.049518746182255e-06, "loss": 0.4698, "step": 14206 }, { "epoch": 1.15, "grad_norm": 2.0871450044159077, "learning_rate": 7.0491188252372344e-06, "loss": 0.6549, "step": 14207 }, { "epoch": 1.15, "grad_norm": 3.4986509024792256, "learning_rate": 7.0487188885362115e-06, "loss": 0.6304, "step": 14208 }, { "epoch": 1.15, "grad_norm": 3.0676703886409675, "learning_rate": 7.048318936082264e-06, "loss": 0.6224, "step": 14209 }, { "epoch": 1.15, "grad_norm": 3.866784754285266, "learning_rate": 7.047918967878465e-06, "loss": 0.4615, "step": 14210 }, { "epoch": 1.15, "grad_norm": 2.6499533088665483, "learning_rate": 7.047518983927891e-06, "loss": 0.4718, "step": 14211 }, { "epoch": 1.15, "grad_norm": 3.668578097527645, "learning_rate": 7.047118984233618e-06, "loss": 0.6364, "step": 14212 }, { "epoch": 1.15, "grad_norm": 2.274191053878962, "learning_rate": 7.04671896879872e-06, "loss": 0.6038, "step": 14213 }, { "epoch": 1.15, "grad_norm": 3.227721908211826, "learning_rate": 7.046318937626275e-06, "loss": 0.6532, "step": 14214 }, { "epoch": 1.15, "grad_norm": 2.236891619411432, "learning_rate": 7.0459188907193566e-06, "loss": 0.6661, "step": 14215 }, { "epoch": 1.15, "grad_norm": 3.1897841702304173, "learning_rate": 7.045518828081041e-06, "loss": 0.5675, "step": 14216 }, { "epoch": 1.15, "grad_norm": 3.862769464211047, "learning_rate": 7.045118749714408e-06, "loss": 0.7569, "step": 14217 }, { "epoch": 1.15, "grad_norm": 2.7098936322591896, "learning_rate": 7.044718655622531e-06, "loss": 0.549, "step": 14218 }, { "epoch": 1.15, "grad_norm": 2.3318334835866166, "learning_rate": 7.044318545808485e-06, "loss": 0.385, "step": 14219 }, { "epoch": 1.15, "grad_norm": 18.438469572456377, "learning_rate": 7.043918420275348e-06, "loss": 0.5976, "step": 14220 }, { "epoch": 1.16, "grad_norm": 3.64866732986671, "learning_rate": 7.043518279026198e-06, "loss": 0.5169, "step": 14221 }, { "epoch": 1.16, "grad_norm": 2.8022253428234407, "learning_rate": 7.04311812206411e-06, "loss": 0.6423, "step": 14222 }, { "epoch": 1.16, "grad_norm": 3.2695242600483043, "learning_rate": 7.042717949392162e-06, "loss": 0.5445, "step": 14223 }, { "epoch": 1.16, "grad_norm": 2.373034291469507, "learning_rate": 7.042317761013428e-06, "loss": 0.6268, "step": 14224 }, { "epoch": 1.16, "grad_norm": 4.278726047941581, "learning_rate": 7.041917556930988e-06, "loss": 0.5275, "step": 14225 }, { "epoch": 1.16, "grad_norm": 7.575303241550505, "learning_rate": 7.041517337147921e-06, "loss": 0.5914, "step": 14226 }, { "epoch": 1.16, "grad_norm": 4.236060154047391, "learning_rate": 7.0411171016673005e-06, "loss": 0.5985, "step": 14227 }, { "epoch": 1.16, "grad_norm": 3.72609517028221, "learning_rate": 7.040716850492204e-06, "loss": 0.6925, "step": 14228 }, { "epoch": 1.16, "grad_norm": 3.562042962853014, "learning_rate": 7.040316583625712e-06, "loss": 0.6362, "step": 14229 }, { "epoch": 1.16, "grad_norm": 3.077031168319329, "learning_rate": 7.039916301070902e-06, "loss": 0.5332, "step": 14230 }, { "epoch": 1.16, "grad_norm": 3.2276407717421103, "learning_rate": 7.03951600283085e-06, "loss": 0.6154, "step": 14231 }, { "epoch": 1.16, "grad_norm": 2.4644680798648673, "learning_rate": 7.039115688908633e-06, "loss": 0.65, "step": 14232 }, { "epoch": 1.16, "grad_norm": 2.836473678887765, "learning_rate": 7.038715359307332e-06, "loss": 0.643, "step": 14233 }, { "epoch": 1.16, "grad_norm": 31.734069037054216, "learning_rate": 7.0383150140300236e-06, "loss": 0.5541, "step": 14234 }, { "epoch": 1.16, "grad_norm": 4.481432072482124, "learning_rate": 7.037914653079787e-06, "loss": 0.5053, "step": 14235 }, { "epoch": 1.16, "grad_norm": 3.481188855081529, "learning_rate": 7.037514276459698e-06, "loss": 0.4337, "step": 14236 }, { "epoch": 1.16, "grad_norm": 2.956238670935746, "learning_rate": 7.0371138841728395e-06, "loss": 0.5488, "step": 14237 }, { "epoch": 1.16, "grad_norm": 2.6783277180415337, "learning_rate": 7.036713476222288e-06, "loss": 0.6124, "step": 14238 }, { "epoch": 1.16, "grad_norm": 3.499927366795735, "learning_rate": 7.036313052611121e-06, "loss": 0.5339, "step": 14239 }, { "epoch": 1.16, "grad_norm": 4.1823288130429335, "learning_rate": 7.035912613342418e-06, "loss": 0.6013, "step": 14240 }, { "epoch": 1.16, "grad_norm": 3.787370209468694, "learning_rate": 7.03551215841926e-06, "loss": 0.5942, "step": 14241 }, { "epoch": 1.16, "grad_norm": 3.1707196488507363, "learning_rate": 7.0351116878447234e-06, "loss": 0.7343, "step": 14242 }, { "epoch": 1.16, "grad_norm": 2.620789120139993, "learning_rate": 7.03471120162189e-06, "loss": 0.5365, "step": 14243 }, { "epoch": 1.16, "grad_norm": 3.697701424448276, "learning_rate": 7.034310699753838e-06, "loss": 0.6508, "step": 14244 }, { "epoch": 1.16, "grad_norm": 3.205998466773204, "learning_rate": 7.033910182243646e-06, "loss": 0.683, "step": 14245 }, { "epoch": 1.16, "grad_norm": 2.2415047488304283, "learning_rate": 7.0335096490943944e-06, "loss": 0.4936, "step": 14246 }, { "epoch": 1.16, "grad_norm": 3.1970524813710424, "learning_rate": 7.0331091003091645e-06, "loss": 0.6232, "step": 14247 }, { "epoch": 1.16, "grad_norm": 3.275575383702841, "learning_rate": 7.032708535891035e-06, "loss": 0.6115, "step": 14248 }, { "epoch": 1.16, "grad_norm": 4.266359018917631, "learning_rate": 7.032307955843084e-06, "loss": 0.6165, "step": 14249 }, { "epoch": 1.16, "grad_norm": 3.1378515633452344, "learning_rate": 7.031907360168395e-06, "loss": 0.5524, "step": 14250 }, { "epoch": 1.16, "grad_norm": 7.249711138190236, "learning_rate": 7.031506748870046e-06, "loss": 0.5644, "step": 14251 }, { "epoch": 1.16, "grad_norm": 3.2162692082218745, "learning_rate": 7.031106121951119e-06, "loss": 0.632, "step": 14252 }, { "epoch": 1.16, "grad_norm": 3.5931438875311055, "learning_rate": 7.030705479414693e-06, "loss": 0.6657, "step": 14253 }, { "epoch": 1.16, "grad_norm": 7.824382926667766, "learning_rate": 7.030304821263848e-06, "loss": 0.6022, "step": 14254 }, { "epoch": 1.16, "grad_norm": 4.132514640687481, "learning_rate": 7.029904147501667e-06, "loss": 0.5773, "step": 14255 }, { "epoch": 1.16, "grad_norm": 4.724806935404955, "learning_rate": 7.029503458131231e-06, "loss": 0.5868, "step": 14256 }, { "epoch": 1.16, "grad_norm": 4.19500344275097, "learning_rate": 7.029102753155618e-06, "loss": 0.6365, "step": 14257 }, { "epoch": 1.16, "grad_norm": 4.598390870970992, "learning_rate": 7.02870203257791e-06, "loss": 0.5698, "step": 14258 }, { "epoch": 1.16, "grad_norm": 4.179967694382992, "learning_rate": 7.02830129640119e-06, "loss": 0.6406, "step": 14259 }, { "epoch": 1.16, "grad_norm": 3.5470600459981214, "learning_rate": 7.027900544628538e-06, "loss": 0.5384, "step": 14260 }, { "epoch": 1.16, "grad_norm": 1.944454053254147, "learning_rate": 7.027499777263036e-06, "loss": 0.5242, "step": 14261 }, { "epoch": 1.16, "grad_norm": 3.719403525036206, "learning_rate": 7.027098994307764e-06, "loss": 0.6097, "step": 14262 }, { "epoch": 1.16, "grad_norm": 6.2569974974738685, "learning_rate": 7.026698195765806e-06, "loss": 0.6217, "step": 14263 }, { "epoch": 1.16, "grad_norm": 3.5394625905364725, "learning_rate": 7.026297381640244e-06, "loss": 0.6382, "step": 14264 }, { "epoch": 1.16, "grad_norm": 4.98790303985018, "learning_rate": 7.025896551934157e-06, "loss": 0.5931, "step": 14265 }, { "epoch": 1.16, "grad_norm": 4.1845552567708575, "learning_rate": 7.025495706650628e-06, "loss": 0.6124, "step": 14266 }, { "epoch": 1.16, "grad_norm": 3.283451759215607, "learning_rate": 7.025094845792741e-06, "loss": 0.5638, "step": 14267 }, { "epoch": 1.16, "grad_norm": 5.135297357234463, "learning_rate": 7.024693969363577e-06, "loss": 0.7682, "step": 14268 }, { "epoch": 1.16, "grad_norm": 4.288630295475565, "learning_rate": 7.02429307736622e-06, "loss": 0.6353, "step": 14269 }, { "epoch": 1.16, "grad_norm": 8.26184866605229, "learning_rate": 7.023892169803748e-06, "loss": 0.5387, "step": 14270 }, { "epoch": 1.16, "grad_norm": 2.685698657168184, "learning_rate": 7.02349124667925e-06, "loss": 0.5178, "step": 14271 }, { "epoch": 1.16, "grad_norm": 4.350758759440808, "learning_rate": 7.0230903079958035e-06, "loss": 0.4892, "step": 14272 }, { "epoch": 1.16, "grad_norm": 2.058275065640589, "learning_rate": 7.022689353756493e-06, "loss": 0.5459, "step": 14273 }, { "epoch": 1.16, "grad_norm": 3.4042189692306204, "learning_rate": 7.022288383964403e-06, "loss": 0.6865, "step": 14274 }, { "epoch": 1.16, "grad_norm": 3.1462412949832093, "learning_rate": 7.021887398622616e-06, "loss": 0.6052, "step": 14275 }, { "epoch": 1.16, "grad_norm": 3.90072606020423, "learning_rate": 7.021486397734214e-06, "loss": 0.6066, "step": 14276 }, { "epoch": 1.16, "grad_norm": 3.5941592100779425, "learning_rate": 7.0210853813022804e-06, "loss": 0.5538, "step": 14277 }, { "epoch": 1.16, "grad_norm": 19.374855319086677, "learning_rate": 7.020684349329899e-06, "loss": 0.5373, "step": 14278 }, { "epoch": 1.16, "grad_norm": 7.898111934725016, "learning_rate": 7.0202833018201556e-06, "loss": 0.6998, "step": 14279 }, { "epoch": 1.16, "grad_norm": 3.444032974062626, "learning_rate": 7.0198822387761325e-06, "loss": 0.6366, "step": 14280 }, { "epoch": 1.16, "grad_norm": 3.1021112313858734, "learning_rate": 7.019481160200912e-06, "loss": 0.6034, "step": 14281 }, { "epoch": 1.16, "grad_norm": 4.188810116655731, "learning_rate": 7.019080066097578e-06, "loss": 0.5915, "step": 14282 }, { "epoch": 1.16, "grad_norm": 2.5392609469184104, "learning_rate": 7.018678956469217e-06, "loss": 0.4396, "step": 14283 }, { "epoch": 1.16, "grad_norm": 4.364169499956356, "learning_rate": 7.018277831318911e-06, "loss": 0.6798, "step": 14284 }, { "epoch": 1.16, "grad_norm": 3.058440700169329, "learning_rate": 7.017876690649747e-06, "loss": 0.6833, "step": 14285 }, { "epoch": 1.16, "grad_norm": 3.730260520261365, "learning_rate": 7.017475534464806e-06, "loss": 0.5656, "step": 14286 }, { "epoch": 1.16, "grad_norm": 2.4517320227522026, "learning_rate": 7.017074362767176e-06, "loss": 0.6382, "step": 14287 }, { "epoch": 1.16, "grad_norm": 3.243880959335342, "learning_rate": 7.016673175559939e-06, "loss": 0.6384, "step": 14288 }, { "epoch": 1.16, "grad_norm": 5.6257644524606265, "learning_rate": 7.0162719728461804e-06, "loss": 0.5352, "step": 14289 }, { "epoch": 1.16, "grad_norm": 4.600378816527835, "learning_rate": 7.015870754628985e-06, "loss": 0.4983, "step": 14290 }, { "epoch": 1.16, "grad_norm": 2.4691132195469168, "learning_rate": 7.01546952091144e-06, "loss": 0.4924, "step": 14291 }, { "epoch": 1.16, "grad_norm": 3.2473224802264307, "learning_rate": 7.015068271696628e-06, "loss": 0.5009, "step": 14292 }, { "epoch": 1.16, "grad_norm": 3.3686684626597145, "learning_rate": 7.014667006987634e-06, "loss": 0.7183, "step": 14293 }, { "epoch": 1.16, "grad_norm": 2.820540035102836, "learning_rate": 7.014265726787546e-06, "loss": 0.5231, "step": 14294 }, { "epoch": 1.16, "grad_norm": 3.789686339556025, "learning_rate": 7.013864431099446e-06, "loss": 0.5128, "step": 14295 }, { "epoch": 1.16, "grad_norm": 2.335494360825453, "learning_rate": 7.013463119926425e-06, "loss": 0.4913, "step": 14296 }, { "epoch": 1.16, "grad_norm": 3.534001407306064, "learning_rate": 7.013061793271563e-06, "loss": 0.5593, "step": 14297 }, { "epoch": 1.16, "grad_norm": 5.18746331545558, "learning_rate": 7.012660451137947e-06, "loss": 0.7104, "step": 14298 }, { "epoch": 1.16, "grad_norm": 5.6493774004037585, "learning_rate": 7.012259093528666e-06, "loss": 0.6258, "step": 14299 }, { "epoch": 1.16, "grad_norm": 2.9257381941606098, "learning_rate": 7.011857720446805e-06, "loss": 0.6526, "step": 14300 }, { "epoch": 1.16, "grad_norm": 2.633411593380638, "learning_rate": 7.011456331895449e-06, "loss": 0.6897, "step": 14301 }, { "epoch": 1.16, "grad_norm": 2.5222965533559267, "learning_rate": 7.011054927877683e-06, "loss": 0.5715, "step": 14302 }, { "epoch": 1.16, "grad_norm": 3.292623548327875, "learning_rate": 7.010653508396598e-06, "loss": 0.6149, "step": 14303 }, { "epoch": 1.16, "grad_norm": 2.597249919260393, "learning_rate": 7.010252073455277e-06, "loss": 0.5925, "step": 14304 }, { "epoch": 1.16, "grad_norm": 4.415128115344621, "learning_rate": 7.009850623056807e-06, "loss": 0.5944, "step": 14305 }, { "epoch": 1.16, "grad_norm": 2.4897428841915263, "learning_rate": 7.009449157204275e-06, "loss": 0.5302, "step": 14306 }, { "epoch": 1.16, "grad_norm": 4.657284668879697, "learning_rate": 7.00904767590077e-06, "loss": 0.6341, "step": 14307 }, { "epoch": 1.16, "grad_norm": 4.419504732160633, "learning_rate": 7.008646179149377e-06, "loss": 0.4999, "step": 14308 }, { "epoch": 1.16, "grad_norm": 10.280517335341065, "learning_rate": 7.008244666953182e-06, "loss": 0.6955, "step": 14309 }, { "epoch": 1.16, "grad_norm": 14.099013589556494, "learning_rate": 7.007843139315275e-06, "loss": 0.5176, "step": 14310 }, { "epoch": 1.16, "grad_norm": 3.458984400780439, "learning_rate": 7.007441596238742e-06, "loss": 0.51, "step": 14311 }, { "epoch": 1.16, "grad_norm": 5.383464098332261, "learning_rate": 7.0070400377266715e-06, "loss": 0.5011, "step": 14312 }, { "epoch": 1.16, "grad_norm": 3.239818942293765, "learning_rate": 7.00663846378215e-06, "loss": 0.6235, "step": 14313 }, { "epoch": 1.16, "grad_norm": 2.8546949092995773, "learning_rate": 7.006236874408265e-06, "loss": 0.4533, "step": 14314 }, { "epoch": 1.16, "grad_norm": 4.1804134820621055, "learning_rate": 7.005835269608106e-06, "loss": 0.4069, "step": 14315 }, { "epoch": 1.16, "grad_norm": 3.8695467842726194, "learning_rate": 7.005433649384761e-06, "loss": 0.3909, "step": 14316 }, { "epoch": 1.16, "grad_norm": 3.272513515418013, "learning_rate": 7.0050320137413154e-06, "loss": 0.5891, "step": 14317 }, { "epoch": 1.16, "grad_norm": 3.7416433539010336, "learning_rate": 7.00463036268086e-06, "loss": 0.5106, "step": 14318 }, { "epoch": 1.16, "grad_norm": 2.5495522678032243, "learning_rate": 7.004228696206482e-06, "loss": 0.5411, "step": 14319 }, { "epoch": 1.16, "grad_norm": 2.1458036016879634, "learning_rate": 7.003827014321272e-06, "loss": 0.472, "step": 14320 }, { "epoch": 1.16, "grad_norm": 3.338365066157051, "learning_rate": 7.0034253170283154e-06, "loss": 0.681, "step": 14321 }, { "epoch": 1.16, "grad_norm": 8.21066698040215, "learning_rate": 7.003023604330702e-06, "loss": 0.5258, "step": 14322 }, { "epoch": 1.16, "grad_norm": 7.263807912585048, "learning_rate": 7.002621876231521e-06, "loss": 0.5927, "step": 14323 }, { "epoch": 1.16, "grad_norm": 4.607377515690884, "learning_rate": 7.002220132733864e-06, "loss": 0.6158, "step": 14324 }, { "epoch": 1.16, "grad_norm": 3.7137709876487484, "learning_rate": 7.001818373840815e-06, "loss": 0.5318, "step": 14325 }, { "epoch": 1.16, "grad_norm": 5.081545836644973, "learning_rate": 7.001416599555466e-06, "loss": 0.5295, "step": 14326 }, { "epoch": 1.16, "grad_norm": 2.8397771366828812, "learning_rate": 7.001014809880906e-06, "loss": 0.6522, "step": 14327 }, { "epoch": 1.16, "grad_norm": 4.086702431562511, "learning_rate": 7.000613004820225e-06, "loss": 0.6997, "step": 14328 }, { "epoch": 1.16, "grad_norm": 2.930224267265713, "learning_rate": 7.000211184376512e-06, "loss": 0.6057, "step": 14329 }, { "epoch": 1.16, "grad_norm": 3.5875215759806083, "learning_rate": 6.999809348552855e-06, "loss": 0.7486, "step": 14330 }, { "epoch": 1.16, "grad_norm": 2.657890963189876, "learning_rate": 6.999407497352348e-06, "loss": 0.657, "step": 14331 }, { "epoch": 1.16, "grad_norm": 6.243013781429413, "learning_rate": 6.9990056307780765e-06, "loss": 0.5178, "step": 14332 }, { "epoch": 1.16, "grad_norm": 3.708661312104522, "learning_rate": 6.998603748833133e-06, "loss": 0.4944, "step": 14333 }, { "epoch": 1.16, "grad_norm": 3.1312206968052423, "learning_rate": 6.998201851520605e-06, "loss": 0.6599, "step": 14334 }, { "epoch": 1.16, "grad_norm": 3.6656408090384676, "learning_rate": 6.997799938843587e-06, "loss": 0.6819, "step": 14335 }, { "epoch": 1.16, "grad_norm": 3.2935957007769057, "learning_rate": 6.997398010805166e-06, "loss": 0.7279, "step": 14336 }, { "epoch": 1.16, "grad_norm": 5.888406169172882, "learning_rate": 6.9969960674084345e-06, "loss": 0.6371, "step": 14337 }, { "epoch": 1.16, "grad_norm": 3.774438784174132, "learning_rate": 6.996594108656482e-06, "loss": 0.5928, "step": 14338 }, { "epoch": 1.16, "grad_norm": 2.2188273061923267, "learning_rate": 6.996192134552397e-06, "loss": 0.6339, "step": 14339 }, { "epoch": 1.16, "grad_norm": 3.709104016510214, "learning_rate": 6.995790145099276e-06, "loss": 0.5738, "step": 14340 }, { "epoch": 1.16, "grad_norm": 4.842457330602665, "learning_rate": 6.995388140300205e-06, "loss": 0.5877, "step": 14341 }, { "epoch": 1.16, "grad_norm": 4.681528044720819, "learning_rate": 6.994986120158278e-06, "loss": 0.5867, "step": 14342 }, { "epoch": 1.16, "grad_norm": 9.32937426644189, "learning_rate": 6.994584084676583e-06, "loss": 0.5722, "step": 14343 }, { "epoch": 1.17, "grad_norm": 3.1848223850243587, "learning_rate": 6.994182033858215e-06, "loss": 0.5362, "step": 14344 }, { "epoch": 1.17, "grad_norm": 6.564815504002435, "learning_rate": 6.9937799677062626e-06, "loss": 0.6368, "step": 14345 }, { "epoch": 1.17, "grad_norm": 3.1987090748418106, "learning_rate": 6.9933778862238186e-06, "loss": 0.6182, "step": 14346 }, { "epoch": 1.17, "grad_norm": 3.0578874080368137, "learning_rate": 6.992975789413974e-06, "loss": 0.4869, "step": 14347 }, { "epoch": 1.17, "grad_norm": 25.621097330749468, "learning_rate": 6.992573677279822e-06, "loss": 0.5725, "step": 14348 }, { "epoch": 1.17, "grad_norm": 3.7363801280548934, "learning_rate": 6.992171549824453e-06, "loss": 0.4721, "step": 14349 }, { "epoch": 1.17, "grad_norm": 2.7744705247068238, "learning_rate": 6.991769407050961e-06, "loss": 0.7033, "step": 14350 }, { "epoch": 1.17, "grad_norm": 3.474478192986402, "learning_rate": 6.991367248962435e-06, "loss": 0.568, "step": 14351 }, { "epoch": 1.17, "grad_norm": 4.546957961790935, "learning_rate": 6.990965075561971e-06, "loss": 0.4784, "step": 14352 }, { "epoch": 1.17, "grad_norm": 6.11401562522574, "learning_rate": 6.990562886852658e-06, "loss": 0.6165, "step": 14353 }, { "epoch": 1.17, "grad_norm": 3.9047566310040525, "learning_rate": 6.99016068283759e-06, "loss": 0.5949, "step": 14354 }, { "epoch": 1.17, "grad_norm": 2.9266982396267665, "learning_rate": 6.989758463519859e-06, "loss": 0.6844, "step": 14355 }, { "epoch": 1.17, "grad_norm": 3.402016149153466, "learning_rate": 6.98935622890256e-06, "loss": 0.6214, "step": 14356 }, { "epoch": 1.17, "grad_norm": 2.982808532668181, "learning_rate": 6.988953978988781e-06, "loss": 0.6567, "step": 14357 }, { "epoch": 1.17, "grad_norm": 3.898175964082523, "learning_rate": 6.988551713781622e-06, "loss": 0.6108, "step": 14358 }, { "epoch": 1.17, "grad_norm": 4.662277020948855, "learning_rate": 6.988149433284168e-06, "loss": 0.6059, "step": 14359 }, { "epoch": 1.17, "grad_norm": 3.4111622186054538, "learning_rate": 6.98774713749952e-06, "loss": 0.614, "step": 14360 }, { "epoch": 1.17, "grad_norm": 2.7320782341051872, "learning_rate": 6.987344826430766e-06, "loss": 0.5392, "step": 14361 }, { "epoch": 1.17, "grad_norm": 3.4686411469204526, "learning_rate": 6.986942500081001e-06, "loss": 0.6384, "step": 14362 }, { "epoch": 1.17, "grad_norm": 3.173090377014203, "learning_rate": 6.986540158453319e-06, "loss": 0.4716, "step": 14363 }, { "epoch": 1.17, "grad_norm": 3.096791265309678, "learning_rate": 6.986137801550812e-06, "loss": 0.4752, "step": 14364 }, { "epoch": 1.17, "grad_norm": 4.8143501926506875, "learning_rate": 6.985735429376577e-06, "loss": 0.6, "step": 14365 }, { "epoch": 1.17, "grad_norm": 2.4462063820842443, "learning_rate": 6.985333041933705e-06, "loss": 0.6659, "step": 14366 }, { "epoch": 1.17, "grad_norm": 3.6728921012953752, "learning_rate": 6.984930639225291e-06, "loss": 0.6288, "step": 14367 }, { "epoch": 1.17, "grad_norm": 5.223560767515796, "learning_rate": 6.98452822125443e-06, "loss": 0.5641, "step": 14368 }, { "epoch": 1.17, "grad_norm": 3.0925376791391344, "learning_rate": 6.984125788024214e-06, "loss": 0.5045, "step": 14369 }, { "epoch": 1.17, "grad_norm": 3.480869296655566, "learning_rate": 6.98372333953774e-06, "loss": 0.5843, "step": 14370 }, { "epoch": 1.17, "grad_norm": 2.8390158419798173, "learning_rate": 6.9833208757981e-06, "loss": 0.6507, "step": 14371 }, { "epoch": 1.17, "grad_norm": 3.006454821342152, "learning_rate": 6.982918396808391e-06, "loss": 0.593, "step": 14372 }, { "epoch": 1.17, "grad_norm": 5.573638379559762, "learning_rate": 6.982515902571706e-06, "loss": 0.6283, "step": 14373 }, { "epoch": 1.17, "grad_norm": 4.566660085411355, "learning_rate": 6.982113393091141e-06, "loss": 0.7519, "step": 14374 }, { "epoch": 1.17, "grad_norm": 8.681217432274263, "learning_rate": 6.981710868369789e-06, "loss": 0.4871, "step": 14375 }, { "epoch": 1.17, "grad_norm": 2.8482653095466577, "learning_rate": 6.9813083284107476e-06, "loss": 0.5129, "step": 14376 }, { "epoch": 1.17, "grad_norm": 4.175341198785065, "learning_rate": 6.9809057732171115e-06, "loss": 0.6001, "step": 14377 }, { "epoch": 1.17, "grad_norm": 2.8747275560042667, "learning_rate": 6.980503202791975e-06, "loss": 0.6338, "step": 14378 }, { "epoch": 1.17, "grad_norm": 3.5022422261109445, "learning_rate": 6.980100617138433e-06, "loss": 0.6773, "step": 14379 }, { "epoch": 1.17, "grad_norm": 3.962739994178937, "learning_rate": 6.9796980162595816e-06, "loss": 0.6986, "step": 14380 }, { "epoch": 1.17, "grad_norm": 6.391472358760104, "learning_rate": 6.979295400158519e-06, "loss": 0.6048, "step": 14381 }, { "epoch": 1.17, "grad_norm": 2.6423739026527975, "learning_rate": 6.9788927688383375e-06, "loss": 0.5914, "step": 14382 }, { "epoch": 1.17, "grad_norm": 3.135679931640387, "learning_rate": 6.978490122302134e-06, "loss": 0.4888, "step": 14383 }, { "epoch": 1.17, "grad_norm": 2.239844735822562, "learning_rate": 6.978087460553005e-06, "loss": 0.529, "step": 14384 }, { "epoch": 1.17, "grad_norm": 4.794632692026889, "learning_rate": 6.977684783594047e-06, "loss": 0.5175, "step": 14385 }, { "epoch": 1.17, "grad_norm": 2.3501057830500023, "learning_rate": 6.977282091428354e-06, "loss": 0.5594, "step": 14386 }, { "epoch": 1.17, "grad_norm": 2.870632360650033, "learning_rate": 6.976879384059025e-06, "loss": 0.6589, "step": 14387 }, { "epoch": 1.17, "grad_norm": 2.8998745782701274, "learning_rate": 6.976476661489156e-06, "loss": 0.4675, "step": 14388 }, { "epoch": 1.17, "grad_norm": 37.81231139900525, "learning_rate": 6.976073923721844e-06, "loss": 0.5786, "step": 14389 }, { "epoch": 1.17, "grad_norm": 3.548745545461503, "learning_rate": 6.975671170760184e-06, "loss": 0.6791, "step": 14390 }, { "epoch": 1.17, "grad_norm": 3.7472913912717685, "learning_rate": 6.975268402607273e-06, "loss": 0.6195, "step": 14391 }, { "epoch": 1.17, "grad_norm": 4.039606035480137, "learning_rate": 6.974865619266209e-06, "loss": 0.5425, "step": 14392 }, { "epoch": 1.17, "grad_norm": 3.7674261132181988, "learning_rate": 6.974462820740089e-06, "loss": 0.7325, "step": 14393 }, { "epoch": 1.17, "grad_norm": 16.828672966850558, "learning_rate": 6.9740600070320095e-06, "loss": 0.5822, "step": 14394 }, { "epoch": 1.17, "grad_norm": 3.179237926362057, "learning_rate": 6.973657178145068e-06, "loss": 0.644, "step": 14395 }, { "epoch": 1.17, "grad_norm": 2.6762755940609977, "learning_rate": 6.9732543340823625e-06, "loss": 0.5259, "step": 14396 }, { "epoch": 1.17, "grad_norm": 10.92428157152755, "learning_rate": 6.97285147484699e-06, "loss": 0.62, "step": 14397 }, { "epoch": 1.17, "grad_norm": 4.45452400649346, "learning_rate": 6.972448600442049e-06, "loss": 0.5079, "step": 14398 }, { "epoch": 1.17, "grad_norm": 3.52073172281811, "learning_rate": 6.972045710870635e-06, "loss": 0.6075, "step": 14399 }, { "epoch": 1.17, "grad_norm": 3.7215429942568576, "learning_rate": 6.971642806135848e-06, "loss": 0.4868, "step": 14400 }, { "epoch": 1.17, "grad_norm": 3.3293480967158993, "learning_rate": 6.9712398862407855e-06, "loss": 0.6403, "step": 14401 }, { "epoch": 1.17, "grad_norm": 4.008981933670134, "learning_rate": 6.970836951188546e-06, "loss": 0.6132, "step": 14402 }, { "epoch": 1.17, "grad_norm": 2.6923034952557425, "learning_rate": 6.970434000982227e-06, "loss": 0.5692, "step": 14403 }, { "epoch": 1.17, "grad_norm": 2.6762156985686643, "learning_rate": 6.970031035624927e-06, "loss": 0.5048, "step": 14404 }, { "epoch": 1.17, "grad_norm": 2.267599241682459, "learning_rate": 6.969628055119743e-06, "loss": 0.5385, "step": 14405 }, { "epoch": 1.17, "grad_norm": 5.771215214722076, "learning_rate": 6.969225059469778e-06, "loss": 0.6546, "step": 14406 }, { "epoch": 1.17, "grad_norm": 2.9552246423485613, "learning_rate": 6.9688220486781266e-06, "loss": 0.6403, "step": 14407 }, { "epoch": 1.17, "grad_norm": 2.166165497279776, "learning_rate": 6.9684190227478876e-06, "loss": 0.5658, "step": 14408 }, { "epoch": 1.17, "grad_norm": 15.575607020082467, "learning_rate": 6.968015981682163e-06, "loss": 0.6496, "step": 14409 }, { "epoch": 1.17, "grad_norm": 2.6021809587434213, "learning_rate": 6.96761292548405e-06, "loss": 0.7036, "step": 14410 }, { "epoch": 1.17, "grad_norm": 2.546907069565274, "learning_rate": 6.967209854156647e-06, "loss": 0.6077, "step": 14411 }, { "epoch": 1.17, "grad_norm": 2.7488686561205156, "learning_rate": 6.966806767703054e-06, "loss": 0.6283, "step": 14412 }, { "epoch": 1.17, "grad_norm": 2.995010561152073, "learning_rate": 6.966403666126371e-06, "loss": 0.4886, "step": 14413 }, { "epoch": 1.17, "grad_norm": 3.810030776176775, "learning_rate": 6.9660005494296965e-06, "loss": 0.4791, "step": 14414 }, { "epoch": 1.17, "grad_norm": 3.42248382242463, "learning_rate": 6.965597417616131e-06, "loss": 0.5891, "step": 14415 }, { "epoch": 1.17, "grad_norm": 3.1886976142437597, "learning_rate": 6.965194270688773e-06, "loss": 0.4953, "step": 14416 }, { "epoch": 1.17, "grad_norm": 2.7148153760039633, "learning_rate": 6.964791108650725e-06, "loss": 0.568, "step": 14417 }, { "epoch": 1.17, "grad_norm": 18.725090757385285, "learning_rate": 6.964387931505084e-06, "loss": 0.601, "step": 14418 }, { "epoch": 1.17, "grad_norm": 5.304822505435947, "learning_rate": 6.963984739254952e-06, "loss": 0.6416, "step": 14419 }, { "epoch": 1.17, "grad_norm": 3.0611772645721254, "learning_rate": 6.963581531903427e-06, "loss": 0.4845, "step": 14420 }, { "epoch": 1.17, "grad_norm": 4.958420776103195, "learning_rate": 6.963178309453612e-06, "loss": 0.569, "step": 14421 }, { "epoch": 1.17, "grad_norm": 3.6402015672583374, "learning_rate": 6.9627750719086075e-06, "loss": 0.356, "step": 14422 }, { "epoch": 1.17, "grad_norm": 6.788615198498025, "learning_rate": 6.9623718192715105e-06, "loss": 0.5999, "step": 14423 }, { "epoch": 1.17, "grad_norm": 2.8605981501853446, "learning_rate": 6.961968551545425e-06, "loss": 0.521, "step": 14424 }, { "epoch": 1.17, "grad_norm": 3.436802712074126, "learning_rate": 6.96156526873345e-06, "loss": 0.6463, "step": 14425 }, { "epoch": 1.17, "grad_norm": 2.5790178172689577, "learning_rate": 6.961161970838689e-06, "loss": 0.631, "step": 14426 }, { "epoch": 1.17, "grad_norm": 4.628268415180049, "learning_rate": 6.96075865786424e-06, "loss": 0.5626, "step": 14427 }, { "epoch": 1.17, "grad_norm": 2.6567155756065124, "learning_rate": 6.960355329813205e-06, "loss": 0.6449, "step": 14428 }, { "epoch": 1.17, "grad_norm": 6.994920028866129, "learning_rate": 6.9599519866886865e-06, "loss": 0.5483, "step": 14429 }, { "epoch": 1.17, "grad_norm": 4.352044545876115, "learning_rate": 6.959548628493785e-06, "loss": 0.6923, "step": 14430 }, { "epoch": 1.17, "grad_norm": 3.6040236462593978, "learning_rate": 6.959145255231602e-06, "loss": 0.5904, "step": 14431 }, { "epoch": 1.17, "grad_norm": 8.142022398021625, "learning_rate": 6.958741866905238e-06, "loss": 0.3811, "step": 14432 }, { "epoch": 1.17, "grad_norm": 3.464647890254119, "learning_rate": 6.9583384635177966e-06, "loss": 0.7376, "step": 14433 }, { "epoch": 1.17, "grad_norm": 2.934550034235554, "learning_rate": 6.95793504507238e-06, "loss": 0.5579, "step": 14434 }, { "epoch": 1.17, "grad_norm": 5.489098908607065, "learning_rate": 6.957531611572087e-06, "loss": 0.5379, "step": 14435 }, { "epoch": 1.17, "grad_norm": 2.4403418770921634, "learning_rate": 6.957128163020022e-06, "loss": 0.5621, "step": 14436 }, { "epoch": 1.17, "grad_norm": 3.695129049865734, "learning_rate": 6.956724699419286e-06, "loss": 0.6381, "step": 14437 }, { "epoch": 1.17, "grad_norm": 3.6233689748503255, "learning_rate": 6.956321220772984e-06, "loss": 0.5965, "step": 14438 }, { "epoch": 1.17, "grad_norm": 3.453742364885673, "learning_rate": 6.955917727084216e-06, "loss": 0.7493, "step": 14439 }, { "epoch": 1.17, "grad_norm": 3.852200289282606, "learning_rate": 6.955514218356085e-06, "loss": 0.6375, "step": 14440 }, { "epoch": 1.17, "grad_norm": 8.907921466841959, "learning_rate": 6.955110694591692e-06, "loss": 0.5739, "step": 14441 }, { "epoch": 1.17, "grad_norm": 3.0053273914422793, "learning_rate": 6.954707155794144e-06, "loss": 0.5764, "step": 14442 }, { "epoch": 1.17, "grad_norm": 2.4444362137450395, "learning_rate": 6.95430360196654e-06, "loss": 0.6312, "step": 14443 }, { "epoch": 1.17, "grad_norm": 4.552504380206663, "learning_rate": 6.953900033111985e-06, "loss": 0.6743, "step": 14444 }, { "epoch": 1.17, "grad_norm": 10.594083810729677, "learning_rate": 6.95349644923358e-06, "loss": 0.6278, "step": 14445 }, { "epoch": 1.17, "grad_norm": 2.0910392908780358, "learning_rate": 6.953092850334431e-06, "loss": 0.5347, "step": 14446 }, { "epoch": 1.17, "grad_norm": 2.5170330814506703, "learning_rate": 6.9526892364176405e-06, "loss": 0.489, "step": 14447 }, { "epoch": 1.17, "grad_norm": 14.807003721571265, "learning_rate": 6.95228560748631e-06, "loss": 0.6311, "step": 14448 }, { "epoch": 1.17, "grad_norm": 2.69009518789666, "learning_rate": 6.951881963543544e-06, "loss": 0.5366, "step": 14449 }, { "epoch": 1.17, "grad_norm": 5.047129019330523, "learning_rate": 6.951478304592448e-06, "loss": 0.6048, "step": 14450 }, { "epoch": 1.17, "grad_norm": 7.696321536972059, "learning_rate": 6.951074630636124e-06, "loss": 0.6983, "step": 14451 }, { "epoch": 1.17, "grad_norm": 3.995056215344097, "learning_rate": 6.950670941677678e-06, "loss": 0.5687, "step": 14452 }, { "epoch": 1.17, "grad_norm": 7.888937232835702, "learning_rate": 6.95026723772021e-06, "loss": 0.5067, "step": 14453 }, { "epoch": 1.17, "grad_norm": 4.184677893703467, "learning_rate": 6.9498635187668276e-06, "loss": 0.502, "step": 14454 }, { "epoch": 1.17, "grad_norm": 2.700464604971667, "learning_rate": 6.949459784820633e-06, "loss": 0.4556, "step": 14455 }, { "epoch": 1.17, "grad_norm": 1.9415550473834058, "learning_rate": 6.9490560358847335e-06, "loss": 0.5243, "step": 14456 }, { "epoch": 1.17, "grad_norm": 6.112133684629271, "learning_rate": 6.9486522719622305e-06, "loss": 0.6074, "step": 14457 }, { "epoch": 1.17, "grad_norm": 2.558293499464618, "learning_rate": 6.94824849305623e-06, "loss": 0.5656, "step": 14458 }, { "epoch": 1.17, "grad_norm": 3.034697989777955, "learning_rate": 6.947844699169837e-06, "loss": 0.6923, "step": 14459 }, { "epoch": 1.17, "grad_norm": 2.986962921572295, "learning_rate": 6.9474408903061555e-06, "loss": 0.4884, "step": 14460 }, { "epoch": 1.17, "grad_norm": 3.493398004113482, "learning_rate": 6.94703706646829e-06, "loss": 0.5289, "step": 14461 }, { "epoch": 1.17, "grad_norm": 3.4731142285957204, "learning_rate": 6.9466332276593474e-06, "loss": 0.592, "step": 14462 }, { "epoch": 1.17, "grad_norm": 8.763589641777372, "learning_rate": 6.9462293738824315e-06, "loss": 0.6499, "step": 14463 }, { "epoch": 1.17, "grad_norm": 4.52475535840548, "learning_rate": 6.9458255051406474e-06, "loss": 0.592, "step": 14464 }, { "epoch": 1.17, "grad_norm": 4.695638273405512, "learning_rate": 6.9454216214371e-06, "loss": 0.619, "step": 14465 }, { "epoch": 1.17, "grad_norm": 6.5616661267180785, "learning_rate": 6.945017722774898e-06, "loss": 0.7059, "step": 14466 }, { "epoch": 1.18, "grad_norm": 3.266149976332877, "learning_rate": 6.944613809157146e-06, "loss": 0.6844, "step": 14467 }, { "epoch": 1.18, "grad_norm": 3.073413464543612, "learning_rate": 6.944209880586946e-06, "loss": 0.5085, "step": 14468 }, { "epoch": 1.18, "grad_norm": 5.373660738438155, "learning_rate": 6.943805937067407e-06, "loss": 0.6729, "step": 14469 }, { "epoch": 1.18, "grad_norm": 4.305212600092387, "learning_rate": 6.943401978601636e-06, "loss": 0.5937, "step": 14470 }, { "epoch": 1.18, "grad_norm": 2.9393802118129715, "learning_rate": 6.942998005192736e-06, "loss": 0.4203, "step": 14471 }, { "epoch": 1.18, "grad_norm": 4.104284032081064, "learning_rate": 6.9425940168438165e-06, "loss": 0.685, "step": 14472 }, { "epoch": 1.18, "grad_norm": 4.091450107283679, "learning_rate": 6.94219001355798e-06, "loss": 0.6502, "step": 14473 }, { "epoch": 1.18, "grad_norm": 6.388606407007351, "learning_rate": 6.9417859953383375e-06, "loss": 0.5572, "step": 14474 }, { "epoch": 1.18, "grad_norm": 2.8529118219582585, "learning_rate": 6.941381962187992e-06, "loss": 0.4887, "step": 14475 }, { "epoch": 1.18, "grad_norm": 2.4264530983589663, "learning_rate": 6.940977914110052e-06, "loss": 0.5473, "step": 14476 }, { "epoch": 1.18, "grad_norm": 4.095804444139031, "learning_rate": 6.940573851107622e-06, "loss": 0.5881, "step": 14477 }, { "epoch": 1.18, "grad_norm": 3.0032376852488394, "learning_rate": 6.940169773183812e-06, "loss": 0.6145, "step": 14478 }, { "epoch": 1.18, "grad_norm": 3.922028243634884, "learning_rate": 6.939765680341727e-06, "loss": 0.7281, "step": 14479 }, { "epoch": 1.18, "grad_norm": 3.6186835928882677, "learning_rate": 6.9393615725844755e-06, "loss": 0.6334, "step": 14480 }, { "epoch": 1.18, "grad_norm": 5.694292881787652, "learning_rate": 6.9389574499151624e-06, "loss": 0.6004, "step": 14481 }, { "epoch": 1.18, "grad_norm": 4.334504571831336, "learning_rate": 6.938553312336897e-06, "loss": 0.5724, "step": 14482 }, { "epoch": 1.18, "grad_norm": 4.942372914609437, "learning_rate": 6.9381491598527875e-06, "loss": 0.6537, "step": 14483 }, { "epoch": 1.18, "grad_norm": 2.880060952924213, "learning_rate": 6.93774499246594e-06, "loss": 0.6288, "step": 14484 }, { "epoch": 1.18, "grad_norm": 3.3184450531676535, "learning_rate": 6.937340810179462e-06, "loss": 0.5929, "step": 14485 }, { "epoch": 1.18, "grad_norm": 2.2420099026407723, "learning_rate": 6.936936612996462e-06, "loss": 0.4999, "step": 14486 }, { "epoch": 1.18, "grad_norm": 2.7566032603695034, "learning_rate": 6.936532400920048e-06, "loss": 0.5569, "step": 14487 }, { "epoch": 1.18, "grad_norm": 5.287939074943457, "learning_rate": 6.93612817395333e-06, "loss": 0.8029, "step": 14488 }, { "epoch": 1.18, "grad_norm": 2.5402167100741644, "learning_rate": 6.935723932099411e-06, "loss": 0.5409, "step": 14489 }, { "epoch": 1.18, "grad_norm": 2.634229023218897, "learning_rate": 6.935319675361404e-06, "loss": 0.5988, "step": 14490 }, { "epoch": 1.18, "grad_norm": 4.228036784112948, "learning_rate": 6.934915403742415e-06, "loss": 0.6141, "step": 14491 }, { "epoch": 1.18, "grad_norm": 3.263605324718991, "learning_rate": 6.934511117245554e-06, "loss": 0.6113, "step": 14492 }, { "epoch": 1.18, "grad_norm": 10.540184288257919, "learning_rate": 6.934106815873928e-06, "loss": 0.5666, "step": 14493 }, { "epoch": 1.18, "grad_norm": 2.4732038090945974, "learning_rate": 6.933702499630647e-06, "loss": 0.6033, "step": 14494 }, { "epoch": 1.18, "grad_norm": 2.9096236681682486, "learning_rate": 6.93329816851882e-06, "loss": 0.4452, "step": 14495 }, { "epoch": 1.18, "grad_norm": 2.803658197668262, "learning_rate": 6.9328938225415556e-06, "loss": 0.7844, "step": 14496 }, { "epoch": 1.18, "grad_norm": 2.501477241764622, "learning_rate": 6.9324894617019615e-06, "loss": 0.4965, "step": 14497 }, { "epoch": 1.18, "grad_norm": 2.573260797654089, "learning_rate": 6.932085086003149e-06, "loss": 0.6283, "step": 14498 }, { "epoch": 1.18, "grad_norm": 4.659134702879103, "learning_rate": 6.931680695448225e-06, "loss": 0.4654, "step": 14499 }, { "epoch": 1.18, "grad_norm": 4.382496520456605, "learning_rate": 6.931276290040302e-06, "loss": 0.6358, "step": 14500 }, { "epoch": 1.18, "grad_norm": 3.9526001229338603, "learning_rate": 6.930871869782488e-06, "loss": 0.5823, "step": 14501 }, { "epoch": 1.18, "grad_norm": 3.6188307498633017, "learning_rate": 6.9304674346778925e-06, "loss": 0.7168, "step": 14502 }, { "epoch": 1.18, "grad_norm": 4.370348800190304, "learning_rate": 6.930062984729624e-06, "loss": 0.6438, "step": 14503 }, { "epoch": 1.18, "grad_norm": 5.39690235171182, "learning_rate": 6.929658519940796e-06, "loss": 0.5861, "step": 14504 }, { "epoch": 1.18, "grad_norm": 3.8506907724649904, "learning_rate": 6.929254040314514e-06, "loss": 0.6067, "step": 14505 }, { "epoch": 1.18, "grad_norm": 3.143680453525122, "learning_rate": 6.9288495458538915e-06, "loss": 0.5675, "step": 14506 }, { "epoch": 1.18, "grad_norm": 2.2910626808847936, "learning_rate": 6.9284450365620385e-06, "loss": 0.4505, "step": 14507 }, { "epoch": 1.18, "grad_norm": 3.245542271536684, "learning_rate": 6.928040512442064e-06, "loss": 0.501, "step": 14508 }, { "epoch": 1.18, "grad_norm": 3.55701894105794, "learning_rate": 6.927635973497077e-06, "loss": 0.5896, "step": 14509 }, { "epoch": 1.18, "grad_norm": 4.0393767894626444, "learning_rate": 6.9272314197301925e-06, "loss": 0.5646, "step": 14510 }, { "epoch": 1.18, "grad_norm": 6.149176482069408, "learning_rate": 6.926826851144518e-06, "loss": 0.5843, "step": 14511 }, { "epoch": 1.18, "grad_norm": 4.152491306049876, "learning_rate": 6.9264222677431645e-06, "loss": 0.6703, "step": 14512 }, { "epoch": 1.18, "grad_norm": 3.747954856878307, "learning_rate": 6.926017669529242e-06, "loss": 0.6766, "step": 14513 }, { "epoch": 1.18, "grad_norm": 5.083977912322783, "learning_rate": 6.925613056505865e-06, "loss": 0.7093, "step": 14514 }, { "epoch": 1.18, "grad_norm": 2.7289031081883914, "learning_rate": 6.925208428676142e-06, "loss": 0.5994, "step": 14515 }, { "epoch": 1.18, "grad_norm": 4.157304065905774, "learning_rate": 6.924803786043185e-06, "loss": 0.692, "step": 14516 }, { "epoch": 1.18, "grad_norm": 3.2148154963327156, "learning_rate": 6.924399128610104e-06, "loss": 0.6058, "step": 14517 }, { "epoch": 1.18, "grad_norm": 4.183329641774825, "learning_rate": 6.923994456380012e-06, "loss": 0.7598, "step": 14518 }, { "epoch": 1.18, "grad_norm": 3.35829866966315, "learning_rate": 6.92358976935602e-06, "loss": 0.5264, "step": 14519 }, { "epoch": 1.18, "grad_norm": 6.219441357927212, "learning_rate": 6.923185067541241e-06, "loss": 0.5883, "step": 14520 }, { "epoch": 1.18, "grad_norm": 2.7878692265898373, "learning_rate": 6.9227803509387845e-06, "loss": 0.5065, "step": 14521 }, { "epoch": 1.18, "grad_norm": 4.378925828353985, "learning_rate": 6.922375619551763e-06, "loss": 0.6651, "step": 14522 }, { "epoch": 1.18, "grad_norm": 2.011147862395951, "learning_rate": 6.921970873383291e-06, "loss": 0.5467, "step": 14523 }, { "epoch": 1.18, "grad_norm": 5.886382233203103, "learning_rate": 6.921566112436478e-06, "loss": 0.6541, "step": 14524 }, { "epoch": 1.18, "grad_norm": 3.5457697956374825, "learning_rate": 6.921161336714437e-06, "loss": 0.5786, "step": 14525 }, { "epoch": 1.18, "grad_norm": 13.733004475248478, "learning_rate": 6.92075654622028e-06, "loss": 0.4584, "step": 14526 }, { "epoch": 1.18, "grad_norm": 2.859111966772742, "learning_rate": 6.920351740957121e-06, "loss": 0.5579, "step": 14527 }, { "epoch": 1.18, "grad_norm": 3.1564173528651986, "learning_rate": 6.9199469209280715e-06, "loss": 0.6417, "step": 14528 }, { "epoch": 1.18, "grad_norm": 3.740323572489624, "learning_rate": 6.9195420861362435e-06, "loss": 0.5106, "step": 14529 }, { "epoch": 1.18, "grad_norm": 4.080594189494386, "learning_rate": 6.9191372365847495e-06, "loss": 0.5823, "step": 14530 }, { "epoch": 1.18, "grad_norm": 2.3326303848203733, "learning_rate": 6.918732372276707e-06, "loss": 0.587, "step": 14531 }, { "epoch": 1.18, "grad_norm": 5.443489220170335, "learning_rate": 6.9183274932152234e-06, "loss": 0.6017, "step": 14532 }, { "epoch": 1.18, "grad_norm": 4.314880484895241, "learning_rate": 6.917922599403415e-06, "loss": 0.5064, "step": 14533 }, { "epoch": 1.18, "grad_norm": 4.749167624866858, "learning_rate": 6.917517690844392e-06, "loss": 0.5379, "step": 14534 }, { "epoch": 1.18, "grad_norm": 8.56354884698868, "learning_rate": 6.917112767541272e-06, "loss": 0.6246, "step": 14535 }, { "epoch": 1.18, "grad_norm": 3.8989006066748715, "learning_rate": 6.9167078294971665e-06, "loss": 0.561, "step": 14536 }, { "epoch": 1.18, "grad_norm": 3.654886438435428, "learning_rate": 6.91630287671519e-06, "loss": 0.4718, "step": 14537 }, { "epoch": 1.18, "grad_norm": 2.2925192699155, "learning_rate": 6.915897909198453e-06, "loss": 0.3467, "step": 14538 }, { "epoch": 1.18, "grad_norm": 3.671775140571182, "learning_rate": 6.915492926950074e-06, "loss": 0.5226, "step": 14539 }, { "epoch": 1.18, "grad_norm": 2.955106516372029, "learning_rate": 6.915087929973164e-06, "loss": 0.5438, "step": 14540 }, { "epoch": 1.18, "grad_norm": 5.910237713387555, "learning_rate": 6.914682918270839e-06, "loss": 0.6538, "step": 14541 }, { "epoch": 1.18, "grad_norm": 6.385512057467495, "learning_rate": 6.914277891846209e-06, "loss": 0.6817, "step": 14542 }, { "epoch": 1.18, "grad_norm": 3.7691200491610153, "learning_rate": 6.913872850702393e-06, "loss": 0.5884, "step": 14543 }, { "epoch": 1.18, "grad_norm": 7.262591535784197, "learning_rate": 6.913467794842505e-06, "loss": 0.6593, "step": 14544 }, { "epoch": 1.18, "grad_norm": 2.3960757507600294, "learning_rate": 6.913062724269658e-06, "loss": 0.4691, "step": 14545 }, { "epoch": 1.18, "grad_norm": 3.070385853483246, "learning_rate": 6.912657638986966e-06, "loss": 0.5396, "step": 14546 }, { "epoch": 1.18, "grad_norm": 3.035048924608097, "learning_rate": 6.912252538997545e-06, "loss": 0.4264, "step": 14547 }, { "epoch": 1.18, "grad_norm": 3.1272489686554916, "learning_rate": 6.91184742430451e-06, "loss": 0.4707, "step": 14548 }, { "epoch": 1.18, "grad_norm": 2.7703294734446966, "learning_rate": 6.911442294910975e-06, "loss": 0.6093, "step": 14549 }, { "epoch": 1.18, "grad_norm": 3.1661571059834515, "learning_rate": 6.911037150820056e-06, "loss": 0.7229, "step": 14550 }, { "epoch": 1.18, "grad_norm": 3.580717046428166, "learning_rate": 6.9106319920348685e-06, "loss": 0.5768, "step": 14551 }, { "epoch": 1.18, "grad_norm": 5.683861688888747, "learning_rate": 6.910226818558528e-06, "loss": 0.59, "step": 14552 }, { "epoch": 1.18, "grad_norm": 2.9539888658386264, "learning_rate": 6.909821630394147e-06, "loss": 0.6783, "step": 14553 }, { "epoch": 1.18, "grad_norm": 2.794105628728191, "learning_rate": 6.909416427544844e-06, "loss": 0.7218, "step": 14554 }, { "epoch": 1.18, "grad_norm": 4.237422219470898, "learning_rate": 6.909011210013734e-06, "loss": 0.6012, "step": 14555 }, { "epoch": 1.18, "grad_norm": 5.354421107418542, "learning_rate": 6.9086059778039336e-06, "loss": 0.5679, "step": 14556 }, { "epoch": 1.18, "grad_norm": 3.1692471391332324, "learning_rate": 6.908200730918557e-06, "loss": 0.5319, "step": 14557 }, { "epoch": 1.18, "grad_norm": 3.8358600933617786, "learning_rate": 6.9077954693607206e-06, "loss": 0.5093, "step": 14558 }, { "epoch": 1.18, "grad_norm": 2.6717782282767693, "learning_rate": 6.907390193133543e-06, "loss": 0.6958, "step": 14559 }, { "epoch": 1.18, "grad_norm": 3.275165219418645, "learning_rate": 6.906984902240137e-06, "loss": 0.4726, "step": 14560 }, { "epoch": 1.18, "grad_norm": 3.516761491251228, "learning_rate": 6.90657959668362e-06, "loss": 0.5884, "step": 14561 }, { "epoch": 1.18, "grad_norm": 1.8342664351162046, "learning_rate": 6.906174276467109e-06, "loss": 0.4467, "step": 14562 }, { "epoch": 1.18, "grad_norm": 2.109137528488764, "learning_rate": 6.905768941593721e-06, "loss": 0.5953, "step": 14563 }, { "epoch": 1.18, "grad_norm": 2.9290489374269986, "learning_rate": 6.905363592066572e-06, "loss": 0.7976, "step": 14564 }, { "epoch": 1.18, "grad_norm": 4.672266777362304, "learning_rate": 6.904958227888777e-06, "loss": 0.6124, "step": 14565 }, { "epoch": 1.18, "grad_norm": 2.8051911993989123, "learning_rate": 6.9045528490634575e-06, "loss": 0.5999, "step": 14566 }, { "epoch": 1.18, "grad_norm": 8.549356018637834, "learning_rate": 6.904147455593725e-06, "loss": 0.6209, "step": 14567 }, { "epoch": 1.18, "grad_norm": 2.2780007663246256, "learning_rate": 6.9037420474827014e-06, "loss": 0.4011, "step": 14568 }, { "epoch": 1.18, "grad_norm": 3.766290942331301, "learning_rate": 6.903336624733501e-06, "loss": 0.6025, "step": 14569 }, { "epoch": 1.18, "grad_norm": 8.538594306585793, "learning_rate": 6.902931187349243e-06, "loss": 0.6406, "step": 14570 }, { "epoch": 1.18, "grad_norm": 2.916951018598615, "learning_rate": 6.9025257353330435e-06, "loss": 0.6077, "step": 14571 }, { "epoch": 1.18, "grad_norm": 3.5753188419283712, "learning_rate": 6.902120268688021e-06, "loss": 0.6457, "step": 14572 }, { "epoch": 1.18, "grad_norm": 2.5482159041894694, "learning_rate": 6.9017147874172915e-06, "loss": 0.5663, "step": 14573 }, { "epoch": 1.18, "grad_norm": 6.1718157765408055, "learning_rate": 6.901309291523976e-06, "loss": 0.4969, "step": 14574 }, { "epoch": 1.18, "grad_norm": 4.241099917640582, "learning_rate": 6.900903781011188e-06, "loss": 0.6229, "step": 14575 }, { "epoch": 1.18, "grad_norm": 4.00185011230321, "learning_rate": 6.90049825588205e-06, "loss": 0.546, "step": 14576 }, { "epoch": 1.18, "grad_norm": 3.0743367067680234, "learning_rate": 6.900092716139678e-06, "loss": 0.6437, "step": 14577 }, { "epoch": 1.18, "grad_norm": 2.4224029343180944, "learning_rate": 6.899687161787191e-06, "loss": 0.504, "step": 14578 }, { "epoch": 1.18, "grad_norm": 4.43917239539689, "learning_rate": 6.899281592827705e-06, "loss": 0.6521, "step": 14579 }, { "epoch": 1.18, "grad_norm": 3.2729537294571003, "learning_rate": 6.898876009264341e-06, "loss": 0.6006, "step": 14580 }, { "epoch": 1.18, "grad_norm": 3.1362197756417944, "learning_rate": 6.898470411100218e-06, "loss": 0.6611, "step": 14581 }, { "epoch": 1.18, "grad_norm": 2.8966494068382556, "learning_rate": 6.898064798338453e-06, "loss": 0.5743, "step": 14582 }, { "epoch": 1.18, "grad_norm": 3.1644909838633324, "learning_rate": 6.8976591709821635e-06, "loss": 0.5351, "step": 14583 }, { "epoch": 1.18, "grad_norm": 5.052919754624432, "learning_rate": 6.897253529034474e-06, "loss": 0.5455, "step": 14584 }, { "epoch": 1.18, "grad_norm": 3.0956940313485455, "learning_rate": 6.896847872498498e-06, "loss": 0.5472, "step": 14585 }, { "epoch": 1.18, "grad_norm": 3.0198119014272535, "learning_rate": 6.8964422013773555e-06, "loss": 0.699, "step": 14586 }, { "epoch": 1.18, "grad_norm": 3.0018378847112825, "learning_rate": 6.896036515674168e-06, "loss": 0.546, "step": 14587 }, { "epoch": 1.18, "grad_norm": 2.200639339574012, "learning_rate": 6.895630815392054e-06, "loss": 0.5628, "step": 14588 }, { "epoch": 1.18, "grad_norm": 2.516244836007579, "learning_rate": 6.895225100534132e-06, "loss": 0.5125, "step": 14589 }, { "epoch": 1.18, "grad_norm": 5.53199277099304, "learning_rate": 6.894819371103522e-06, "loss": 0.64, "step": 14590 }, { "epoch": 1.19, "grad_norm": 4.096333944260052, "learning_rate": 6.894413627103345e-06, "loss": 0.717, "step": 14591 }, { "epoch": 1.19, "grad_norm": 2.530081223163244, "learning_rate": 6.8940078685367205e-06, "loss": 0.7187, "step": 14592 }, { "epoch": 1.19, "grad_norm": 6.313443132328232, "learning_rate": 6.8936020954067664e-06, "loss": 0.5712, "step": 14593 }, { "epoch": 1.19, "grad_norm": 3.1724676524861954, "learning_rate": 6.893196307716606e-06, "loss": 0.4531, "step": 14594 }, { "epoch": 1.19, "grad_norm": 3.806889966091346, "learning_rate": 6.8927905054693546e-06, "loss": 0.6022, "step": 14595 }, { "epoch": 1.19, "grad_norm": 43.44389870361191, "learning_rate": 6.892384688668138e-06, "loss": 0.5661, "step": 14596 }, { "epoch": 1.19, "grad_norm": 5.180615163495028, "learning_rate": 6.891978857316073e-06, "loss": 0.6604, "step": 14597 }, { "epoch": 1.19, "grad_norm": 2.8463665847896977, "learning_rate": 6.891573011416282e-06, "loss": 0.5829, "step": 14598 }, { "epoch": 1.19, "grad_norm": 3.1951499432145654, "learning_rate": 6.891167150971884e-06, "loss": 0.4976, "step": 14599 }, { "epoch": 1.19, "grad_norm": 3.799688565093332, "learning_rate": 6.890761275986e-06, "loss": 0.595, "step": 14600 }, { "epoch": 1.19, "grad_norm": 3.8140165146978275, "learning_rate": 6.890355386461753e-06, "loss": 0.4989, "step": 14601 }, { "epoch": 1.19, "grad_norm": 3.57989470396492, "learning_rate": 6.8899494824022615e-06, "loss": 0.5904, "step": 14602 }, { "epoch": 1.19, "grad_norm": 5.7862313252525155, "learning_rate": 6.8895435638106465e-06, "loss": 0.5105, "step": 14603 }, { "epoch": 1.19, "grad_norm": 3.2940086293065507, "learning_rate": 6.889137630690031e-06, "loss": 0.6355, "step": 14604 }, { "epoch": 1.19, "grad_norm": 3.275668657174012, "learning_rate": 6.8887316830435354e-06, "loss": 0.7609, "step": 14605 }, { "epoch": 1.19, "grad_norm": 3.4113767264285606, "learning_rate": 6.888325720874283e-06, "loss": 0.5245, "step": 14606 }, { "epoch": 1.19, "grad_norm": 2.395612270995112, "learning_rate": 6.8879197441853895e-06, "loss": 0.5912, "step": 14607 }, { "epoch": 1.19, "grad_norm": 21.61806765454407, "learning_rate": 6.887513752979983e-06, "loss": 0.6156, "step": 14608 }, { "epoch": 1.19, "grad_norm": 2.9313884028735337, "learning_rate": 6.887107747261182e-06, "loss": 0.6651, "step": 14609 }, { "epoch": 1.19, "grad_norm": 2.6147975922347384, "learning_rate": 6.886701727032108e-06, "loss": 0.544, "step": 14610 }, { "epoch": 1.19, "grad_norm": 15.394087523217047, "learning_rate": 6.886295692295884e-06, "loss": 0.5341, "step": 14611 }, { "epoch": 1.19, "grad_norm": 2.983613310106075, "learning_rate": 6.885889643055633e-06, "loss": 0.5799, "step": 14612 }, { "epoch": 1.19, "grad_norm": 3.631379638501287, "learning_rate": 6.885483579314476e-06, "loss": 0.5539, "step": 14613 }, { "epoch": 1.19, "grad_norm": 3.1474430734767767, "learning_rate": 6.885077501075536e-06, "loss": 0.654, "step": 14614 }, { "epoch": 1.19, "grad_norm": 2.9899482808207094, "learning_rate": 6.884671408341933e-06, "loss": 0.4174, "step": 14615 }, { "epoch": 1.19, "grad_norm": 3.4593852490438692, "learning_rate": 6.884265301116793e-06, "loss": 0.6522, "step": 14616 }, { "epoch": 1.19, "grad_norm": 2.6917839437555275, "learning_rate": 6.8838591794032365e-06, "loss": 0.5679, "step": 14617 }, { "epoch": 1.19, "grad_norm": 4.8672016369957225, "learning_rate": 6.883453043204387e-06, "loss": 0.4636, "step": 14618 }, { "epoch": 1.19, "grad_norm": 7.735843492066565, "learning_rate": 6.883046892523366e-06, "loss": 0.5884, "step": 14619 }, { "epoch": 1.19, "grad_norm": 3.2621953416171534, "learning_rate": 6.8826407273632975e-06, "loss": 0.5953, "step": 14620 }, { "epoch": 1.19, "grad_norm": 2.4575172355996604, "learning_rate": 6.882234547727306e-06, "loss": 0.6319, "step": 14621 }, { "epoch": 1.19, "grad_norm": 2.7269738607819725, "learning_rate": 6.881828353618512e-06, "loss": 0.5386, "step": 14622 }, { "epoch": 1.19, "grad_norm": 3.670029071801196, "learning_rate": 6.881422145040041e-06, "loss": 0.7119, "step": 14623 }, { "epoch": 1.19, "grad_norm": 1.9789427513807505, "learning_rate": 6.881015921995013e-06, "loss": 0.5229, "step": 14624 }, { "epoch": 1.19, "grad_norm": 4.8383702980540075, "learning_rate": 6.880609684486557e-06, "loss": 0.5584, "step": 14625 }, { "epoch": 1.19, "grad_norm": 3.171112849874239, "learning_rate": 6.8802034325177925e-06, "loss": 0.4344, "step": 14626 }, { "epoch": 1.19, "grad_norm": 2.7888561828287166, "learning_rate": 6.879797166091844e-06, "loss": 0.6604, "step": 14627 }, { "epoch": 1.19, "grad_norm": 2.3024484433218158, "learning_rate": 6.879390885211835e-06, "loss": 0.5356, "step": 14628 }, { "epoch": 1.19, "grad_norm": 3.6442738281565616, "learning_rate": 6.878984589880892e-06, "loss": 0.707, "step": 14629 }, { "epoch": 1.19, "grad_norm": 2.363310974206081, "learning_rate": 6.878578280102136e-06, "loss": 0.6052, "step": 14630 }, { "epoch": 1.19, "grad_norm": 5.932755721244748, "learning_rate": 6.878171955878693e-06, "loss": 0.5156, "step": 14631 }, { "epoch": 1.19, "grad_norm": 5.737313379889864, "learning_rate": 6.877765617213685e-06, "loss": 0.5764, "step": 14632 }, { "epoch": 1.19, "grad_norm": 3.077036645983875, "learning_rate": 6.8773592641102405e-06, "loss": 0.549, "step": 14633 }, { "epoch": 1.19, "grad_norm": 3.356979668110325, "learning_rate": 6.87695289657148e-06, "loss": 0.7877, "step": 14634 }, { "epoch": 1.19, "grad_norm": 3.6310622040370863, "learning_rate": 6.87654651460053e-06, "loss": 0.7438, "step": 14635 }, { "epoch": 1.19, "grad_norm": 10.905019053630436, "learning_rate": 6.876140118200515e-06, "loss": 0.6713, "step": 14636 }, { "epoch": 1.19, "grad_norm": 4.155163479973959, "learning_rate": 6.87573370737456e-06, "loss": 0.494, "step": 14637 }, { "epoch": 1.19, "grad_norm": 4.065749953678626, "learning_rate": 6.87532728212579e-06, "loss": 0.5899, "step": 14638 }, { "epoch": 1.19, "grad_norm": 3.2493713143651646, "learning_rate": 6.874920842457329e-06, "loss": 0.5877, "step": 14639 }, { "epoch": 1.19, "grad_norm": 12.211560184486986, "learning_rate": 6.874514388372303e-06, "loss": 0.4738, "step": 14640 }, { "epoch": 1.19, "grad_norm": 3.1468040066912026, "learning_rate": 6.874107919873838e-06, "loss": 0.5501, "step": 14641 }, { "epoch": 1.19, "grad_norm": 6.536938409475869, "learning_rate": 6.873701436965059e-06, "loss": 0.5867, "step": 14642 }, { "epoch": 1.19, "grad_norm": 5.031300298201698, "learning_rate": 6.87329493964909e-06, "loss": 0.4403, "step": 14643 }, { "epoch": 1.19, "grad_norm": 5.136219984350847, "learning_rate": 6.8728884279290574e-06, "loss": 0.6777, "step": 14644 }, { "epoch": 1.19, "grad_norm": 3.959432670409234, "learning_rate": 6.872481901808089e-06, "loss": 0.5103, "step": 14645 }, { "epoch": 1.19, "grad_norm": 3.316732970983276, "learning_rate": 6.872075361289309e-06, "loss": 0.6007, "step": 14646 }, { "epoch": 1.19, "grad_norm": 8.495909661677377, "learning_rate": 6.871668806375843e-06, "loss": 0.5693, "step": 14647 }, { "epoch": 1.19, "grad_norm": 2.9687044358207695, "learning_rate": 6.871262237070816e-06, "loss": 0.5396, "step": 14648 }, { "epoch": 1.19, "grad_norm": 3.119150259066366, "learning_rate": 6.870855653377357e-06, "loss": 0.7173, "step": 14649 }, { "epoch": 1.19, "grad_norm": 16.547911475538278, "learning_rate": 6.87044905529859e-06, "loss": 0.6296, "step": 14650 }, { "epoch": 1.19, "grad_norm": 2.6583243198080657, "learning_rate": 6.8700424428376435e-06, "loss": 0.5883, "step": 14651 }, { "epoch": 1.19, "grad_norm": 3.5187193300065505, "learning_rate": 6.869635815997642e-06, "loss": 0.6531, "step": 14652 }, { "epoch": 1.19, "grad_norm": 4.831538175510151, "learning_rate": 6.869229174781713e-06, "loss": 0.5324, "step": 14653 }, { "epoch": 1.19, "grad_norm": 7.289556028660734, "learning_rate": 6.868822519192984e-06, "loss": 0.5805, "step": 14654 }, { "epoch": 1.19, "grad_norm": 7.620531642927985, "learning_rate": 6.86841584923458e-06, "loss": 0.4925, "step": 14655 }, { "epoch": 1.19, "grad_norm": 4.198086297676359, "learning_rate": 6.868009164909628e-06, "loss": 0.6057, "step": 14656 }, { "epoch": 1.19, "grad_norm": 10.175524707631405, "learning_rate": 6.867602466221257e-06, "loss": 0.7283, "step": 14657 }, { "epoch": 1.19, "grad_norm": 3.14364453428349, "learning_rate": 6.867195753172594e-06, "loss": 0.7116, "step": 14658 }, { "epoch": 1.19, "grad_norm": 4.624989917239108, "learning_rate": 6.866789025766764e-06, "loss": 0.6377, "step": 14659 }, { "epoch": 1.19, "grad_norm": 3.7800738865376347, "learning_rate": 6.866382284006896e-06, "loss": 0.609, "step": 14660 }, { "epoch": 1.19, "grad_norm": 2.492699277675529, "learning_rate": 6.865975527896118e-06, "loss": 0.5471, "step": 14661 }, { "epoch": 1.19, "grad_norm": 3.4894303588259037, "learning_rate": 6.865568757437558e-06, "loss": 0.5765, "step": 14662 }, { "epoch": 1.19, "grad_norm": 2.9572408667518824, "learning_rate": 6.865161972634341e-06, "loss": 0.6899, "step": 14663 }, { "epoch": 1.19, "grad_norm": 8.698606911187769, "learning_rate": 6.864755173489597e-06, "loss": 0.5742, "step": 14664 }, { "epoch": 1.19, "grad_norm": 3.5517550221427756, "learning_rate": 6.864348360006453e-06, "loss": 0.6839, "step": 14665 }, { "epoch": 1.19, "grad_norm": 5.595848727162736, "learning_rate": 6.863941532188039e-06, "loss": 0.5442, "step": 14666 }, { "epoch": 1.19, "grad_norm": 6.23529839519157, "learning_rate": 6.86353469003748e-06, "loss": 0.5915, "step": 14667 }, { "epoch": 1.19, "grad_norm": 4.24340321170888, "learning_rate": 6.863127833557905e-06, "loss": 0.5916, "step": 14668 }, { "epoch": 1.19, "grad_norm": 9.81902068365687, "learning_rate": 6.862720962752445e-06, "loss": 0.6966, "step": 14669 }, { "epoch": 1.19, "grad_norm": 5.2536189729295355, "learning_rate": 6.862314077624227e-06, "loss": 0.6447, "step": 14670 }, { "epoch": 1.19, "grad_norm": 2.76684046409634, "learning_rate": 6.861907178176379e-06, "loss": 0.6612, "step": 14671 }, { "epoch": 1.19, "grad_norm": 3.1660628896964638, "learning_rate": 6.86150026441203e-06, "loss": 0.5876, "step": 14672 }, { "epoch": 1.19, "grad_norm": 3.886385734107923, "learning_rate": 6.861093336334309e-06, "loss": 0.5254, "step": 14673 }, { "epoch": 1.19, "grad_norm": 4.20366618065212, "learning_rate": 6.860686393946345e-06, "loss": 0.4607, "step": 14674 }, { "epoch": 1.19, "grad_norm": 3.552136677567691, "learning_rate": 6.860279437251267e-06, "loss": 0.6166, "step": 14675 }, { "epoch": 1.19, "grad_norm": 26.150441346228664, "learning_rate": 6.859872466252204e-06, "loss": 0.6809, "step": 14676 }, { "epoch": 1.19, "grad_norm": 3.392126872146257, "learning_rate": 6.8594654809522855e-06, "loss": 0.651, "step": 14677 }, { "epoch": 1.19, "grad_norm": 3.6500177388145976, "learning_rate": 6.8590584813546414e-06, "loss": 0.5847, "step": 14678 }, { "epoch": 1.19, "grad_norm": 2.7755441594658836, "learning_rate": 6.858651467462399e-06, "loss": 0.4924, "step": 14679 }, { "epoch": 1.19, "grad_norm": 4.6806015103090335, "learning_rate": 6.85824443927869e-06, "loss": 0.6493, "step": 14680 }, { "epoch": 1.19, "grad_norm": 2.8555758335664594, "learning_rate": 6.857837396806643e-06, "loss": 0.5572, "step": 14681 }, { "epoch": 1.19, "grad_norm": 2.8441032303112315, "learning_rate": 6.857430340049391e-06, "loss": 0.6873, "step": 14682 }, { "epoch": 1.19, "grad_norm": 5.360166573160086, "learning_rate": 6.857023269010058e-06, "loss": 0.6478, "step": 14683 }, { "epoch": 1.19, "grad_norm": 3.710068990816284, "learning_rate": 6.856616183691777e-06, "loss": 0.5423, "step": 14684 }, { "epoch": 1.19, "grad_norm": 10.778100744832885, "learning_rate": 6.8562090840976816e-06, "loss": 0.6969, "step": 14685 }, { "epoch": 1.19, "grad_norm": 3.8970660696268014, "learning_rate": 6.855801970230898e-06, "loss": 0.616, "step": 14686 }, { "epoch": 1.19, "grad_norm": 5.54902674493392, "learning_rate": 6.855394842094556e-06, "loss": 0.4647, "step": 14687 }, { "epoch": 1.19, "grad_norm": 3.963603016961619, "learning_rate": 6.854987699691788e-06, "loss": 0.8043, "step": 14688 }, { "epoch": 1.19, "grad_norm": 4.487067875305332, "learning_rate": 6.854580543025724e-06, "loss": 0.5265, "step": 14689 }, { "epoch": 1.19, "grad_norm": 4.999512694219298, "learning_rate": 6.854173372099495e-06, "loss": 0.7449, "step": 14690 }, { "epoch": 1.19, "grad_norm": 3.8435834618627327, "learning_rate": 6.853766186916232e-06, "loss": 0.479, "step": 14691 }, { "epoch": 1.19, "grad_norm": 2.8430511226124664, "learning_rate": 6.853358987479065e-06, "loss": 0.578, "step": 14692 }, { "epoch": 1.19, "grad_norm": 3.7541736731806092, "learning_rate": 6.852951773791125e-06, "loss": 0.5739, "step": 14693 }, { "epoch": 1.19, "grad_norm": 2.153530489223778, "learning_rate": 6.852544545855545e-06, "loss": 0.5666, "step": 14694 }, { "epoch": 1.19, "grad_norm": 2.5408896341506653, "learning_rate": 6.852137303675455e-06, "loss": 0.4928, "step": 14695 }, { "epoch": 1.19, "grad_norm": 2.8974024862694283, "learning_rate": 6.851730047253985e-06, "loss": 0.5409, "step": 14696 }, { "epoch": 1.19, "grad_norm": 2.966070322427666, "learning_rate": 6.851322776594268e-06, "loss": 0.6679, "step": 14697 }, { "epoch": 1.19, "grad_norm": 5.857088659538648, "learning_rate": 6.850915491699436e-06, "loss": 0.5394, "step": 14698 }, { "epoch": 1.19, "grad_norm": 3.763808319096023, "learning_rate": 6.8505081925726205e-06, "loss": 0.497, "step": 14699 }, { "epoch": 1.19, "grad_norm": 4.023071609799391, "learning_rate": 6.85010087921695e-06, "loss": 0.4907, "step": 14700 }, { "epoch": 1.19, "grad_norm": 4.973886727329223, "learning_rate": 6.849693551635561e-06, "loss": 0.6239, "step": 14701 }, { "epoch": 1.19, "grad_norm": 7.92839766650643, "learning_rate": 6.849286209831585e-06, "loss": 0.6243, "step": 14702 }, { "epoch": 1.19, "grad_norm": 2.4003191114555795, "learning_rate": 6.848878853808151e-06, "loss": 0.6531, "step": 14703 }, { "epoch": 1.19, "grad_norm": 2.257789344043934, "learning_rate": 6.848471483568393e-06, "loss": 0.6302, "step": 14704 }, { "epoch": 1.19, "grad_norm": 2.3079246801389384, "learning_rate": 6.848064099115444e-06, "loss": 0.5215, "step": 14705 }, { "epoch": 1.19, "grad_norm": 3.182218967420018, "learning_rate": 6.847656700452436e-06, "loss": 0.5505, "step": 14706 }, { "epoch": 1.19, "grad_norm": 4.305800809092512, "learning_rate": 6.8472492875825e-06, "loss": 0.4313, "step": 14707 }, { "epoch": 1.19, "grad_norm": 5.908891350014604, "learning_rate": 6.84684186050877e-06, "loss": 0.5486, "step": 14708 }, { "epoch": 1.19, "grad_norm": 4.760643988354593, "learning_rate": 6.84643441923438e-06, "loss": 0.4156, "step": 14709 }, { "epoch": 1.19, "grad_norm": 4.0614172785634945, "learning_rate": 6.846026963762461e-06, "loss": 0.5884, "step": 14710 }, { "epoch": 1.19, "grad_norm": 2.880233172958939, "learning_rate": 6.8456194940961475e-06, "loss": 0.6842, "step": 14711 }, { "epoch": 1.19, "grad_norm": 2.2193720119458082, "learning_rate": 6.845212010238571e-06, "loss": 0.5716, "step": 14712 }, { "epoch": 1.19, "grad_norm": 3.510072669291895, "learning_rate": 6.844804512192864e-06, "loss": 0.5126, "step": 14713 }, { "epoch": 1.2, "grad_norm": 4.689280612629077, "learning_rate": 6.844396999962164e-06, "loss": 0.5171, "step": 14714 }, { "epoch": 1.2, "grad_norm": 3.213174798211879, "learning_rate": 6.8439894735496e-06, "loss": 0.7135, "step": 14715 }, { "epoch": 1.2, "grad_norm": 3.2655173950171528, "learning_rate": 6.843581932958308e-06, "loss": 0.5901, "step": 14716 }, { "epoch": 1.2, "grad_norm": 2.4276917706838317, "learning_rate": 6.843174378191419e-06, "loss": 0.4364, "step": 14717 }, { "epoch": 1.2, "grad_norm": 3.6318836368014327, "learning_rate": 6.84276680925207e-06, "loss": 0.5311, "step": 14718 }, { "epoch": 1.2, "grad_norm": 5.136452195101276, "learning_rate": 6.842359226143394e-06, "loss": 0.499, "step": 14719 }, { "epoch": 1.2, "grad_norm": 3.583300628326307, "learning_rate": 6.841951628868525e-06, "loss": 0.6088, "step": 14720 }, { "epoch": 1.2, "grad_norm": 3.7510241148183234, "learning_rate": 6.841544017430595e-06, "loss": 0.6, "step": 14721 }, { "epoch": 1.2, "grad_norm": 2.197192189128557, "learning_rate": 6.84113639183274e-06, "loss": 0.4996, "step": 14722 }, { "epoch": 1.2, "grad_norm": 4.198562020468372, "learning_rate": 6.8407287520780944e-06, "loss": 0.6003, "step": 14723 }, { "epoch": 1.2, "grad_norm": 4.179014914255244, "learning_rate": 6.840321098169791e-06, "loss": 0.4895, "step": 14724 }, { "epoch": 1.2, "grad_norm": 3.1336196797618165, "learning_rate": 6.839913430110967e-06, "loss": 0.5997, "step": 14725 }, { "epoch": 1.2, "grad_norm": 2.8441183640818877, "learning_rate": 6.839505747904754e-06, "loss": 0.5903, "step": 14726 }, { "epoch": 1.2, "grad_norm": 4.415429763903203, "learning_rate": 6.83909805155429e-06, "loss": 0.5687, "step": 14727 }, { "epoch": 1.2, "grad_norm": 3.5673190102561665, "learning_rate": 6.838690341062708e-06, "loss": 0.5193, "step": 14728 }, { "epoch": 1.2, "grad_norm": 8.364522269054966, "learning_rate": 6.838282616433143e-06, "loss": 0.5711, "step": 14729 }, { "epoch": 1.2, "grad_norm": 4.931869090260097, "learning_rate": 6.8378748776687296e-06, "loss": 0.6281, "step": 14730 }, { "epoch": 1.2, "grad_norm": 4.377437607560901, "learning_rate": 6.837467124772604e-06, "loss": 0.4964, "step": 14731 }, { "epoch": 1.2, "grad_norm": 4.240034619014098, "learning_rate": 6.8370593577479004e-06, "loss": 0.7647, "step": 14732 }, { "epoch": 1.2, "grad_norm": 3.3790234451697008, "learning_rate": 6.836651576597756e-06, "loss": 0.5076, "step": 14733 }, { "epoch": 1.2, "grad_norm": 2.0258166149763386, "learning_rate": 6.836243781325303e-06, "loss": 0.5449, "step": 14734 }, { "epoch": 1.2, "grad_norm": 1.8903200895884404, "learning_rate": 6.835835971933681e-06, "loss": 0.5623, "step": 14735 }, { "epoch": 1.2, "grad_norm": 6.486703233589475, "learning_rate": 6.8354281484260235e-06, "loss": 0.669, "step": 14736 }, { "epoch": 1.2, "grad_norm": 7.273531651041171, "learning_rate": 6.835020310805467e-06, "loss": 0.5097, "step": 14737 }, { "epoch": 1.2, "grad_norm": 3.427515253578516, "learning_rate": 6.834612459075145e-06, "loss": 0.6121, "step": 14738 }, { "epoch": 1.2, "grad_norm": 3.091071309689994, "learning_rate": 6.8342045932381964e-06, "loss": 0.5982, "step": 14739 }, { "epoch": 1.2, "grad_norm": 3.2711963059862152, "learning_rate": 6.8337967132977574e-06, "loss": 0.5969, "step": 14740 }, { "epoch": 1.2, "grad_norm": 4.611297306190468, "learning_rate": 6.833388819256963e-06, "loss": 0.6958, "step": 14741 }, { "epoch": 1.2, "grad_norm": 4.225762848151504, "learning_rate": 6.832980911118949e-06, "loss": 0.4497, "step": 14742 }, { "epoch": 1.2, "grad_norm": 3.277378577434065, "learning_rate": 6.832572988886854e-06, "loss": 0.6368, "step": 14743 }, { "epoch": 1.2, "grad_norm": 3.594241511770091, "learning_rate": 6.832165052563814e-06, "loss": 0.6818, "step": 14744 }, { "epoch": 1.2, "grad_norm": 2.7157025944403164, "learning_rate": 6.831757102152964e-06, "loss": 0.4068, "step": 14745 }, { "epoch": 1.2, "grad_norm": 2.927508483529524, "learning_rate": 6.8313491376574415e-06, "loss": 0.5261, "step": 14746 }, { "epoch": 1.2, "grad_norm": 3.2553186828486167, "learning_rate": 6.830941159080384e-06, "loss": 0.6132, "step": 14747 }, { "epoch": 1.2, "grad_norm": 2.61550268781451, "learning_rate": 6.830533166424929e-06, "loss": 0.472, "step": 14748 }, { "epoch": 1.2, "grad_norm": 2.861419432081579, "learning_rate": 6.830125159694213e-06, "loss": 0.6692, "step": 14749 }, { "epoch": 1.2, "grad_norm": 3.5088264068590456, "learning_rate": 6.829717138891372e-06, "loss": 0.5583, "step": 14750 }, { "epoch": 1.2, "grad_norm": 2.398749094541213, "learning_rate": 6.829309104019544e-06, "loss": 0.5727, "step": 14751 }, { "epoch": 1.2, "grad_norm": 2.567437367786468, "learning_rate": 6.828901055081869e-06, "loss": 0.5891, "step": 14752 }, { "epoch": 1.2, "grad_norm": 2.6729759742067, "learning_rate": 6.828492992081481e-06, "loss": 0.6758, "step": 14753 }, { "epoch": 1.2, "grad_norm": 3.3962796244401567, "learning_rate": 6.82808491502152e-06, "loss": 0.5274, "step": 14754 }, { "epoch": 1.2, "grad_norm": 2.664077729817176, "learning_rate": 6.827676823905123e-06, "loss": 0.5926, "step": 14755 }, { "epoch": 1.2, "grad_norm": 2.759870349512828, "learning_rate": 6.827268718735427e-06, "loss": 0.5079, "step": 14756 }, { "epoch": 1.2, "grad_norm": 3.548071549483714, "learning_rate": 6.826860599515571e-06, "loss": 0.6991, "step": 14757 }, { "epoch": 1.2, "grad_norm": 3.0197632775721823, "learning_rate": 6.826452466248692e-06, "loss": 0.5358, "step": 14758 }, { "epoch": 1.2, "grad_norm": 5.540378939381762, "learning_rate": 6.82604431893793e-06, "loss": 0.5853, "step": 14759 }, { "epoch": 1.2, "grad_norm": 3.1705217638497794, "learning_rate": 6.825636157586423e-06, "loss": 0.5612, "step": 14760 }, { "epoch": 1.2, "grad_norm": 22.997665966305444, "learning_rate": 6.825227982197309e-06, "loss": 0.5227, "step": 14761 }, { "epoch": 1.2, "grad_norm": 3.4443091020898784, "learning_rate": 6.824819792773725e-06, "loss": 0.5699, "step": 14762 }, { "epoch": 1.2, "grad_norm": 4.253305728855996, "learning_rate": 6.824411589318811e-06, "loss": 0.5869, "step": 14763 }, { "epoch": 1.2, "grad_norm": 2.8501912031070233, "learning_rate": 6.8240033718357054e-06, "loss": 0.6259, "step": 14764 }, { "epoch": 1.2, "grad_norm": 6.046963578283579, "learning_rate": 6.823595140327549e-06, "loss": 0.5237, "step": 14765 }, { "epoch": 1.2, "grad_norm": 2.4391276206912518, "learning_rate": 6.8231868947974776e-06, "loss": 0.4432, "step": 14766 }, { "epoch": 1.2, "grad_norm": 3.3783655170824334, "learning_rate": 6.822778635248633e-06, "loss": 0.4853, "step": 14767 }, { "epoch": 1.2, "grad_norm": 2.4873335067755398, "learning_rate": 6.8223703616841515e-06, "loss": 0.5551, "step": 14768 }, { "epoch": 1.2, "grad_norm": 4.951816874594107, "learning_rate": 6.8219620741071754e-06, "loss": 0.6278, "step": 14769 }, { "epoch": 1.2, "grad_norm": 6.575744138016724, "learning_rate": 6.821553772520841e-06, "loss": 0.6479, "step": 14770 }, { "epoch": 1.2, "grad_norm": 3.884841464862106, "learning_rate": 6.821145456928291e-06, "loss": 0.4425, "step": 14771 }, { "epoch": 1.2, "grad_norm": 2.8873656084345036, "learning_rate": 6.820737127332664e-06, "loss": 0.6389, "step": 14772 }, { "epoch": 1.2, "grad_norm": 2.7144470153943776, "learning_rate": 6.820328783737098e-06, "loss": 0.6162, "step": 14773 }, { "epoch": 1.2, "grad_norm": 7.050838496908331, "learning_rate": 6.819920426144734e-06, "loss": 0.6888, "step": 14774 }, { "epoch": 1.2, "grad_norm": 2.7758330680229695, "learning_rate": 6.819512054558713e-06, "loss": 0.4272, "step": 14775 }, { "epoch": 1.2, "grad_norm": 3.7347789087962995, "learning_rate": 6.8191036689821735e-06, "loss": 0.8185, "step": 14776 }, { "epoch": 1.2, "grad_norm": 5.91985486004289, "learning_rate": 6.8186952694182565e-06, "loss": 0.5799, "step": 14777 }, { "epoch": 1.2, "grad_norm": 3.5697304929618867, "learning_rate": 6.8182868558701e-06, "loss": 0.5133, "step": 14778 }, { "epoch": 1.2, "grad_norm": 3.6320166054387535, "learning_rate": 6.817878428340847e-06, "loss": 0.7047, "step": 14779 }, { "epoch": 1.2, "grad_norm": 2.7153134173019517, "learning_rate": 6.817469986833639e-06, "loss": 0.5043, "step": 14780 }, { "epoch": 1.2, "grad_norm": 3.590402039574638, "learning_rate": 6.817061531351614e-06, "loss": 0.5539, "step": 14781 }, { "epoch": 1.2, "grad_norm": 2.8156278741555076, "learning_rate": 6.816653061897912e-06, "loss": 0.5066, "step": 14782 }, { "epoch": 1.2, "grad_norm": 2.905298217699814, "learning_rate": 6.816244578475677e-06, "loss": 0.6337, "step": 14783 }, { "epoch": 1.2, "grad_norm": 23.51843809477399, "learning_rate": 6.815836081088047e-06, "loss": 0.558, "step": 14784 }, { "epoch": 1.2, "grad_norm": 2.6620907079667697, "learning_rate": 6.815427569738164e-06, "loss": 0.5499, "step": 14785 }, { "epoch": 1.2, "grad_norm": 4.295796771178682, "learning_rate": 6.81501904442917e-06, "loss": 0.645, "step": 14786 }, { "epoch": 1.2, "grad_norm": 2.7262879754511222, "learning_rate": 6.814610505164205e-06, "loss": 0.5336, "step": 14787 }, { "epoch": 1.2, "grad_norm": 2.254840056023091, "learning_rate": 6.814201951946412e-06, "loss": 0.6156, "step": 14788 }, { "epoch": 1.2, "grad_norm": 2.5390871360390572, "learning_rate": 6.81379338477893e-06, "loss": 0.516, "step": 14789 }, { "epoch": 1.2, "grad_norm": 3.746597547876837, "learning_rate": 6.813384803664902e-06, "loss": 0.4872, "step": 14790 }, { "epoch": 1.2, "grad_norm": 6.5072497101828395, "learning_rate": 6.812976208607469e-06, "loss": 0.579, "step": 14791 }, { "epoch": 1.2, "grad_norm": 5.057975399305155, "learning_rate": 6.812567599609774e-06, "loss": 0.68, "step": 14792 }, { "epoch": 1.2, "grad_norm": 6.535378039003077, "learning_rate": 6.812158976674958e-06, "loss": 0.6115, "step": 14793 }, { "epoch": 1.2, "grad_norm": 3.2654791595422643, "learning_rate": 6.811750339806161e-06, "loss": 0.5274, "step": 14794 }, { "epoch": 1.2, "grad_norm": 2.1380247791910567, "learning_rate": 6.81134168900653e-06, "loss": 0.5939, "step": 14795 }, { "epoch": 1.2, "grad_norm": 2.944178700637409, "learning_rate": 6.810933024279203e-06, "loss": 0.5175, "step": 14796 }, { "epoch": 1.2, "grad_norm": 2.7438478566940256, "learning_rate": 6.810524345627323e-06, "loss": 0.5664, "step": 14797 }, { "epoch": 1.2, "grad_norm": 2.6719373051241497, "learning_rate": 6.810115653054033e-06, "loss": 0.48, "step": 14798 }, { "epoch": 1.2, "grad_norm": 3.3390754146364108, "learning_rate": 6.809706946562475e-06, "loss": 0.5867, "step": 14799 }, { "epoch": 1.2, "grad_norm": 2.6337600865271216, "learning_rate": 6.809298226155794e-06, "loss": 0.5322, "step": 14800 }, { "epoch": 1.2, "grad_norm": 2.182146828862285, "learning_rate": 6.80888949183713e-06, "loss": 0.5538, "step": 14801 }, { "epoch": 1.2, "grad_norm": 2.455566695395919, "learning_rate": 6.808480743609626e-06, "loss": 0.6108, "step": 14802 }, { "epoch": 1.2, "grad_norm": 5.798309466151922, "learning_rate": 6.8080719814764255e-06, "loss": 0.455, "step": 14803 }, { "epoch": 1.2, "grad_norm": 3.6462677354672675, "learning_rate": 6.807663205440671e-06, "loss": 0.522, "step": 14804 }, { "epoch": 1.2, "grad_norm": 5.235018800439813, "learning_rate": 6.807254415505506e-06, "loss": 0.6477, "step": 14805 }, { "epoch": 1.2, "grad_norm": 4.717549413126731, "learning_rate": 6.806845611674076e-06, "loss": 0.4998, "step": 14806 }, { "epoch": 1.2, "grad_norm": 3.260368951521907, "learning_rate": 6.80643679394952e-06, "loss": 0.5702, "step": 14807 }, { "epoch": 1.2, "grad_norm": 2.931305929030512, "learning_rate": 6.806027962334985e-06, "loss": 0.5549, "step": 14808 }, { "epoch": 1.2, "grad_norm": 4.372973972108359, "learning_rate": 6.8056191168336126e-06, "loss": 0.726, "step": 14809 }, { "epoch": 1.2, "grad_norm": 12.414451429256461, "learning_rate": 6.805210257448549e-06, "loss": 0.5219, "step": 14810 }, { "epoch": 1.2, "grad_norm": 3.0991981576251546, "learning_rate": 6.804801384182933e-06, "loss": 0.7568, "step": 14811 }, { "epoch": 1.2, "grad_norm": 3.6387093704475513, "learning_rate": 6.8043924970399145e-06, "loss": 0.6711, "step": 14812 }, { "epoch": 1.2, "grad_norm": 3.209127374784164, "learning_rate": 6.803983596022634e-06, "loss": 0.4246, "step": 14813 }, { "epoch": 1.2, "grad_norm": 5.262968533541719, "learning_rate": 6.8035746811342364e-06, "loss": 0.7182, "step": 14814 }, { "epoch": 1.2, "grad_norm": 6.2062956872002255, "learning_rate": 6.803165752377864e-06, "loss": 0.5052, "step": 14815 }, { "epoch": 1.2, "grad_norm": 3.779841709785704, "learning_rate": 6.8027568097566645e-06, "loss": 0.5955, "step": 14816 }, { "epoch": 1.2, "grad_norm": 5.717216278745841, "learning_rate": 6.8023478532737804e-06, "loss": 0.7065, "step": 14817 }, { "epoch": 1.2, "grad_norm": 4.427116417544992, "learning_rate": 6.801938882932357e-06, "loss": 0.5563, "step": 14818 }, { "epoch": 1.2, "grad_norm": 3.2308138390740395, "learning_rate": 6.801529898735537e-06, "loss": 0.5021, "step": 14819 }, { "epoch": 1.2, "grad_norm": 3.2607607253308295, "learning_rate": 6.8011209006864685e-06, "loss": 0.5585, "step": 14820 }, { "epoch": 1.2, "grad_norm": 2.7978925262568057, "learning_rate": 6.800711888788294e-06, "loss": 0.6009, "step": 14821 }, { "epoch": 1.2, "grad_norm": 3.55882890093947, "learning_rate": 6.800302863044159e-06, "loss": 0.6361, "step": 14822 }, { "epoch": 1.2, "grad_norm": 4.731035896279384, "learning_rate": 6.799893823457209e-06, "loss": 0.5911, "step": 14823 }, { "epoch": 1.2, "grad_norm": 2.336452905005183, "learning_rate": 6.7994847700305875e-06, "loss": 0.6552, "step": 14824 }, { "epoch": 1.2, "grad_norm": 4.460804962040697, "learning_rate": 6.7990757027674415e-06, "loss": 0.728, "step": 14825 }, { "epoch": 1.2, "grad_norm": 4.67237122776513, "learning_rate": 6.798666621670916e-06, "loss": 0.5155, "step": 14826 }, { "epoch": 1.2, "grad_norm": 3.1174122115694933, "learning_rate": 6.798257526744155e-06, "loss": 0.5583, "step": 14827 }, { "epoch": 1.2, "grad_norm": 2.7560681786351258, "learning_rate": 6.797848417990307e-06, "loss": 0.5209, "step": 14828 }, { "epoch": 1.2, "grad_norm": 2.3155483478046635, "learning_rate": 6.797439295412517e-06, "loss": 0.5019, "step": 14829 }, { "epoch": 1.2, "grad_norm": 2.4017957516904818, "learning_rate": 6.797030159013929e-06, "loss": 0.4986, "step": 14830 }, { "epoch": 1.2, "grad_norm": 5.407251176869368, "learning_rate": 6.7966210087976885e-06, "loss": 0.7315, "step": 14831 }, { "epoch": 1.2, "grad_norm": 4.1120032518994405, "learning_rate": 6.796211844766945e-06, "loss": 0.6064, "step": 14832 }, { "epoch": 1.2, "grad_norm": 3.6678911173976836, "learning_rate": 6.795802666924841e-06, "loss": 0.5705, "step": 14833 }, { "epoch": 1.2, "grad_norm": 3.0792773221101783, "learning_rate": 6.7953934752745246e-06, "loss": 0.4362, "step": 14834 }, { "epoch": 1.2, "grad_norm": 4.415328386226919, "learning_rate": 6.794984269819142e-06, "loss": 0.5291, "step": 14835 }, { "epoch": 1.2, "grad_norm": 5.219540193012217, "learning_rate": 6.794575050561839e-06, "loss": 0.5592, "step": 14836 }, { "epoch": 1.21, "grad_norm": 2.4877305456309386, "learning_rate": 6.7941658175057635e-06, "loss": 0.6168, "step": 14837 }, { "epoch": 1.21, "grad_norm": 15.844652333724921, "learning_rate": 6.793756570654061e-06, "loss": 0.5583, "step": 14838 }, { "epoch": 1.21, "grad_norm": 2.8439661517650183, "learning_rate": 6.793347310009877e-06, "loss": 0.5444, "step": 14839 }, { "epoch": 1.21, "grad_norm": 3.758723230397438, "learning_rate": 6.792938035576362e-06, "loss": 0.6927, "step": 14840 }, { "epoch": 1.21, "grad_norm": 6.106635580983163, "learning_rate": 6.792528747356659e-06, "loss": 0.6323, "step": 14841 }, { "epoch": 1.21, "grad_norm": 3.291241225163582, "learning_rate": 6.792119445353918e-06, "loss": 0.7022, "step": 14842 }, { "epoch": 1.21, "grad_norm": 3.3238712712201055, "learning_rate": 6.791710129571285e-06, "loss": 0.6856, "step": 14843 }, { "epoch": 1.21, "grad_norm": 2.6371711029896, "learning_rate": 6.791300800011908e-06, "loss": 0.617, "step": 14844 }, { "epoch": 1.21, "grad_norm": 3.1693597970697853, "learning_rate": 6.790891456678933e-06, "loss": 0.5028, "step": 14845 }, { "epoch": 1.21, "grad_norm": 2.546506255865266, "learning_rate": 6.790482099575508e-06, "loss": 0.3318, "step": 14846 }, { "epoch": 1.21, "grad_norm": 3.29771779093766, "learning_rate": 6.790072728704782e-06, "loss": 0.8016, "step": 14847 }, { "epoch": 1.21, "grad_norm": 2.643095871371575, "learning_rate": 6.789663344069901e-06, "loss": 0.4898, "step": 14848 }, { "epoch": 1.21, "grad_norm": 2.635116990967789, "learning_rate": 6.789253945674013e-06, "loss": 0.7046, "step": 14849 }, { "epoch": 1.21, "grad_norm": 5.908303624393926, "learning_rate": 6.788844533520268e-06, "loss": 0.7898, "step": 14850 }, { "epoch": 1.21, "grad_norm": 3.8810701683758517, "learning_rate": 6.788435107611811e-06, "loss": 0.6115, "step": 14851 }, { "epoch": 1.21, "grad_norm": 2.689473222381809, "learning_rate": 6.7880256679517915e-06, "loss": 0.5451, "step": 14852 }, { "epoch": 1.21, "grad_norm": 3.748763429887011, "learning_rate": 6.7876162145433595e-06, "loss": 0.6224, "step": 14853 }, { "epoch": 1.21, "grad_norm": 2.7967399999634255, "learning_rate": 6.787206747389661e-06, "loss": 0.6204, "step": 14854 }, { "epoch": 1.21, "grad_norm": 4.25404410975183, "learning_rate": 6.786797266493843e-06, "loss": 0.6173, "step": 14855 }, { "epoch": 1.21, "grad_norm": 7.17216365104514, "learning_rate": 6.786387771859059e-06, "loss": 0.5187, "step": 14856 }, { "epoch": 1.21, "grad_norm": 4.504697891761151, "learning_rate": 6.785978263488454e-06, "loss": 0.7197, "step": 14857 }, { "epoch": 1.21, "grad_norm": 3.0327988185167394, "learning_rate": 6.785568741385178e-06, "loss": 0.5814, "step": 14858 }, { "epoch": 1.21, "grad_norm": 4.798500100088936, "learning_rate": 6.785159205552378e-06, "loss": 0.6174, "step": 14859 }, { "epoch": 1.21, "grad_norm": 5.240325917693067, "learning_rate": 6.784749655993206e-06, "loss": 0.4095, "step": 14860 }, { "epoch": 1.21, "grad_norm": 23.259543597924814, "learning_rate": 6.7843400927108095e-06, "loss": 0.4682, "step": 14861 }, { "epoch": 1.21, "grad_norm": 4.059762094065877, "learning_rate": 6.783930515708337e-06, "loss": 0.5924, "step": 14862 }, { "epoch": 1.21, "grad_norm": 3.671885201574418, "learning_rate": 6.7835209249889385e-06, "loss": 0.6314, "step": 14863 }, { "epoch": 1.21, "grad_norm": 2.577228154260966, "learning_rate": 6.7831113205557645e-06, "loss": 0.6713, "step": 14864 }, { "epoch": 1.21, "grad_norm": 3.070797974513478, "learning_rate": 6.782701702411964e-06, "loss": 0.6325, "step": 14865 }, { "epoch": 1.21, "grad_norm": 2.77986767725274, "learning_rate": 6.7822920705606855e-06, "loss": 0.6196, "step": 14866 }, { "epoch": 1.21, "grad_norm": 4.290208973267117, "learning_rate": 6.7818824250050774e-06, "loss": 0.6099, "step": 14867 }, { "epoch": 1.21, "grad_norm": 4.475187602130366, "learning_rate": 6.781472765748294e-06, "loss": 0.6864, "step": 14868 }, { "epoch": 1.21, "grad_norm": 3.5224372833495226, "learning_rate": 6.7810630927934815e-06, "loss": 0.6975, "step": 14869 }, { "epoch": 1.21, "grad_norm": 2.7956385548333045, "learning_rate": 6.780653406143792e-06, "loss": 0.4659, "step": 14870 }, { "epoch": 1.21, "grad_norm": 4.070882171854959, "learning_rate": 6.780243705802374e-06, "loss": 0.5846, "step": 14871 }, { "epoch": 1.21, "grad_norm": 2.700188859238596, "learning_rate": 6.77983399177238e-06, "loss": 0.5747, "step": 14872 }, { "epoch": 1.21, "grad_norm": 5.198585542092083, "learning_rate": 6.779424264056958e-06, "loss": 0.6275, "step": 14873 }, { "epoch": 1.21, "grad_norm": 3.9692894750405667, "learning_rate": 6.77901452265926e-06, "loss": 0.5938, "step": 14874 }, { "epoch": 1.21, "grad_norm": 3.371526530400815, "learning_rate": 6.778604767582434e-06, "loss": 0.5454, "step": 14875 }, { "epoch": 1.21, "grad_norm": 7.633769723456087, "learning_rate": 6.7781949988296345e-06, "loss": 0.5634, "step": 14876 }, { "epoch": 1.21, "grad_norm": 4.3664743220412054, "learning_rate": 6.77778521640401e-06, "loss": 0.6131, "step": 14877 }, { "epoch": 1.21, "grad_norm": 3.8878483251973877, "learning_rate": 6.777375420308712e-06, "loss": 0.7577, "step": 14878 }, { "epoch": 1.21, "grad_norm": 2.757930371934674, "learning_rate": 6.77696561054689e-06, "loss": 0.5119, "step": 14879 }, { "epoch": 1.21, "grad_norm": 8.853750486591252, "learning_rate": 6.776555787121698e-06, "loss": 0.4906, "step": 14880 }, { "epoch": 1.21, "grad_norm": 2.381920428994496, "learning_rate": 6.776145950036285e-06, "loss": 0.6058, "step": 14881 }, { "epoch": 1.21, "grad_norm": 3.693999691498746, "learning_rate": 6.775736099293803e-06, "loss": 0.4644, "step": 14882 }, { "epoch": 1.21, "grad_norm": 3.1418668238017147, "learning_rate": 6.775326234897403e-06, "loss": 0.5338, "step": 14883 }, { "epoch": 1.21, "grad_norm": 4.084425543326978, "learning_rate": 6.774916356850235e-06, "loss": 0.5618, "step": 14884 }, { "epoch": 1.21, "grad_norm": 2.9138840544893423, "learning_rate": 6.774506465155455e-06, "loss": 0.5327, "step": 14885 }, { "epoch": 1.21, "grad_norm": 3.7928749916250415, "learning_rate": 6.774096559816212e-06, "loss": 0.6097, "step": 14886 }, { "epoch": 1.21, "grad_norm": 3.6219685529410848, "learning_rate": 6.773686640835657e-06, "loss": 0.6725, "step": 14887 }, { "epoch": 1.21, "grad_norm": 3.1858957132247077, "learning_rate": 6.773276708216943e-06, "loss": 0.487, "step": 14888 }, { "epoch": 1.21, "grad_norm": 3.5544365517229912, "learning_rate": 6.772866761963223e-06, "loss": 0.5625, "step": 14889 }, { "epoch": 1.21, "grad_norm": 2.9004141275982094, "learning_rate": 6.772456802077647e-06, "loss": 0.5485, "step": 14890 }, { "epoch": 1.21, "grad_norm": 7.218163923731186, "learning_rate": 6.772046828563369e-06, "loss": 0.5172, "step": 14891 }, { "epoch": 1.21, "grad_norm": 3.389057976496339, "learning_rate": 6.771636841423539e-06, "loss": 0.7012, "step": 14892 }, { "epoch": 1.21, "grad_norm": 2.9873556731001734, "learning_rate": 6.771226840661314e-06, "loss": 0.5542, "step": 14893 }, { "epoch": 1.21, "grad_norm": 5.268274305712097, "learning_rate": 6.770816826279841e-06, "loss": 0.4931, "step": 14894 }, { "epoch": 1.21, "grad_norm": 2.593137242544547, "learning_rate": 6.770406798282277e-06, "loss": 0.6675, "step": 14895 }, { "epoch": 1.21, "grad_norm": 2.8748900727611697, "learning_rate": 6.769996756671773e-06, "loss": 0.4301, "step": 14896 }, { "epoch": 1.21, "grad_norm": 5.810001596382339, "learning_rate": 6.769586701451481e-06, "loss": 0.5507, "step": 14897 }, { "epoch": 1.21, "grad_norm": 3.3869169879404857, "learning_rate": 6.769176632624556e-06, "loss": 0.6071, "step": 14898 }, { "epoch": 1.21, "grad_norm": 3.630628339340941, "learning_rate": 6.7687665501941504e-06, "loss": 0.7011, "step": 14899 }, { "epoch": 1.21, "grad_norm": 2.255777570605704, "learning_rate": 6.7683564541634165e-06, "loss": 0.6939, "step": 14900 }, { "epoch": 1.21, "grad_norm": 2.4447687968255245, "learning_rate": 6.7679463445355065e-06, "loss": 0.5363, "step": 14901 }, { "epoch": 1.21, "grad_norm": 3.014235548221006, "learning_rate": 6.7675362213135775e-06, "loss": 0.6091, "step": 14902 }, { "epoch": 1.21, "grad_norm": 2.878403473308778, "learning_rate": 6.7671260845007804e-06, "loss": 0.6734, "step": 14903 }, { "epoch": 1.21, "grad_norm": 3.066857329337155, "learning_rate": 6.76671593410027e-06, "loss": 0.757, "step": 14904 }, { "epoch": 1.21, "grad_norm": 5.008502984016628, "learning_rate": 6.766305770115198e-06, "loss": 0.6301, "step": 14905 }, { "epoch": 1.21, "grad_norm": 2.739842896526618, "learning_rate": 6.76589559254872e-06, "loss": 0.6467, "step": 14906 }, { "epoch": 1.21, "grad_norm": 2.645533380458798, "learning_rate": 6.76548540140399e-06, "loss": 0.5885, "step": 14907 }, { "epoch": 1.21, "grad_norm": 3.282026097214851, "learning_rate": 6.765075196684162e-06, "loss": 0.4495, "step": 14908 }, { "epoch": 1.21, "grad_norm": 3.1166605245527084, "learning_rate": 6.764664978392388e-06, "loss": 0.5759, "step": 14909 }, { "epoch": 1.21, "grad_norm": 4.45386723269136, "learning_rate": 6.7642547465318254e-06, "loss": 0.6144, "step": 14910 }, { "epoch": 1.21, "grad_norm": 3.6279771688108684, "learning_rate": 6.763844501105627e-06, "loss": 0.55, "step": 14911 }, { "epoch": 1.21, "grad_norm": 9.816154473336146, "learning_rate": 6.763434242116946e-06, "loss": 0.5878, "step": 14912 }, { "epoch": 1.21, "grad_norm": 4.347939065644158, "learning_rate": 6.76302396956894e-06, "loss": 0.6775, "step": 14913 }, { "epoch": 1.21, "grad_norm": 4.876321335187087, "learning_rate": 6.76261368346476e-06, "loss": 0.5713, "step": 14914 }, { "epoch": 1.21, "grad_norm": 6.194061843963838, "learning_rate": 6.762203383807564e-06, "loss": 0.5373, "step": 14915 }, { "epoch": 1.21, "grad_norm": 2.661532393474939, "learning_rate": 6.7617930706005055e-06, "loss": 0.5829, "step": 14916 }, { "epoch": 1.21, "grad_norm": 3.0528360491445796, "learning_rate": 6.761382743846738e-06, "loss": 0.4923, "step": 14917 }, { "epoch": 1.21, "grad_norm": 2.197285914592314, "learning_rate": 6.7609724035494195e-06, "loss": 0.6449, "step": 14918 }, { "epoch": 1.21, "grad_norm": 3.126276459284213, "learning_rate": 6.760562049711703e-06, "loss": 0.5503, "step": 14919 }, { "epoch": 1.21, "grad_norm": 9.65785130982446, "learning_rate": 6.7601516823367455e-06, "loss": 0.438, "step": 14920 }, { "epoch": 1.21, "grad_norm": 3.8038449686159295, "learning_rate": 6.759741301427699e-06, "loss": 0.5474, "step": 14921 }, { "epoch": 1.21, "grad_norm": 4.047525880087615, "learning_rate": 6.759330906987723e-06, "loss": 0.5408, "step": 14922 }, { "epoch": 1.21, "grad_norm": 2.7293789238708546, "learning_rate": 6.758920499019972e-06, "loss": 0.5825, "step": 14923 }, { "epoch": 1.21, "grad_norm": 2.9195180962420704, "learning_rate": 6.7585100775276005e-06, "loss": 0.6482, "step": 14924 }, { "epoch": 1.21, "grad_norm": 4.526446721901817, "learning_rate": 6.7580996425137635e-06, "loss": 0.6229, "step": 14925 }, { "epoch": 1.21, "grad_norm": 6.867093010154416, "learning_rate": 6.75768919398162e-06, "loss": 0.5328, "step": 14926 }, { "epoch": 1.21, "grad_norm": 2.3200207549029024, "learning_rate": 6.7572787319343245e-06, "loss": 0.5211, "step": 14927 }, { "epoch": 1.21, "grad_norm": 3.348954437484658, "learning_rate": 6.756868256375032e-06, "loss": 0.5466, "step": 14928 }, { "epoch": 1.21, "grad_norm": 13.573143074996123, "learning_rate": 6.7564577673069e-06, "loss": 0.5146, "step": 14929 }, { "epoch": 1.21, "grad_norm": 5.146589220741601, "learning_rate": 6.756047264733085e-06, "loss": 0.6238, "step": 14930 }, { "epoch": 1.21, "grad_norm": 4.0734943970533735, "learning_rate": 6.755636748656742e-06, "loss": 0.5783, "step": 14931 }, { "epoch": 1.21, "grad_norm": 2.8000534612454495, "learning_rate": 6.755226219081028e-06, "loss": 0.551, "step": 14932 }, { "epoch": 1.21, "grad_norm": 2.7268253984180957, "learning_rate": 6.754815676009101e-06, "loss": 0.575, "step": 14933 }, { "epoch": 1.21, "grad_norm": 4.320334683002618, "learning_rate": 6.754405119444116e-06, "loss": 0.5083, "step": 14934 }, { "epoch": 1.21, "grad_norm": 4.814918800508288, "learning_rate": 6.753994549389231e-06, "loss": 0.5302, "step": 14935 }, { "epoch": 1.21, "grad_norm": 3.5917751072140507, "learning_rate": 6.753583965847603e-06, "loss": 0.5347, "step": 14936 }, { "epoch": 1.21, "grad_norm": 3.3404881717304287, "learning_rate": 6.753173368822388e-06, "loss": 0.4817, "step": 14937 }, { "epoch": 1.21, "grad_norm": 4.714868534948615, "learning_rate": 6.752762758316744e-06, "loss": 0.6229, "step": 14938 }, { "epoch": 1.21, "grad_norm": 2.490795051857086, "learning_rate": 6.7523521343338285e-06, "loss": 0.6258, "step": 14939 }, { "epoch": 1.21, "grad_norm": 5.4682101979533595, "learning_rate": 6.751941496876797e-06, "loss": 0.6834, "step": 14940 }, { "epoch": 1.21, "grad_norm": 2.898522139890144, "learning_rate": 6.751530845948809e-06, "loss": 0.6246, "step": 14941 }, { "epoch": 1.21, "grad_norm": 2.110916247897185, "learning_rate": 6.75112018155302e-06, "loss": 0.5609, "step": 14942 }, { "epoch": 1.21, "grad_norm": 2.8411411144098375, "learning_rate": 6.750709503692592e-06, "loss": 0.5703, "step": 14943 }, { "epoch": 1.21, "grad_norm": 2.3170044236807468, "learning_rate": 6.750298812370677e-06, "loss": 0.6, "step": 14944 }, { "epoch": 1.21, "grad_norm": 2.3286927833151845, "learning_rate": 6.749888107590437e-06, "loss": 0.5649, "step": 14945 }, { "epoch": 1.21, "grad_norm": 3.5530501666236534, "learning_rate": 6.749477389355028e-06, "loss": 0.5812, "step": 14946 }, { "epoch": 1.21, "grad_norm": 6.776717908474441, "learning_rate": 6.749066657667609e-06, "loss": 0.4992, "step": 14947 }, { "epoch": 1.21, "grad_norm": 2.684583555062618, "learning_rate": 6.7486559125313374e-06, "loss": 0.5093, "step": 14948 }, { "epoch": 1.21, "grad_norm": 6.409612452124999, "learning_rate": 6.748245153949372e-06, "loss": 0.6102, "step": 14949 }, { "epoch": 1.21, "grad_norm": 3.8714639620374616, "learning_rate": 6.747834381924871e-06, "loss": 0.6109, "step": 14950 }, { "epoch": 1.21, "grad_norm": 2.774899167323002, "learning_rate": 6.747423596460995e-06, "loss": 0.6473, "step": 14951 }, { "epoch": 1.21, "grad_norm": 4.505917853933734, "learning_rate": 6.747012797560899e-06, "loss": 0.6114, "step": 14952 }, { "epoch": 1.21, "grad_norm": 3.393353915004305, "learning_rate": 6.746601985227742e-06, "loss": 0.729, "step": 14953 }, { "epoch": 1.21, "grad_norm": 2.947476546006258, "learning_rate": 6.746191159464685e-06, "loss": 0.5376, "step": 14954 }, { "epoch": 1.21, "grad_norm": 3.006791325250897, "learning_rate": 6.745780320274888e-06, "loss": 0.7159, "step": 14955 }, { "epoch": 1.21, "grad_norm": 7.165608647939638, "learning_rate": 6.745369467661507e-06, "loss": 0.7287, "step": 14956 }, { "epoch": 1.21, "grad_norm": 2.7184683162082095, "learning_rate": 6.744958601627701e-06, "loss": 0.5261, "step": 14957 }, { "epoch": 1.21, "grad_norm": 4.193171917950052, "learning_rate": 6.744547722176631e-06, "loss": 0.4563, "step": 14958 }, { "epoch": 1.21, "grad_norm": 3.061544060675656, "learning_rate": 6.744136829311457e-06, "loss": 0.6054, "step": 14959 }, { "epoch": 1.22, "grad_norm": 3.0696397711246304, "learning_rate": 6.743725923035336e-06, "loss": 0.604, "step": 14960 }, { "epoch": 1.22, "grad_norm": 3.2919222796861534, "learning_rate": 6.743315003351427e-06, "loss": 0.6945, "step": 14961 }, { "epoch": 1.22, "grad_norm": 2.1196984209047427, "learning_rate": 6.742904070262894e-06, "loss": 0.5894, "step": 14962 }, { "epoch": 1.22, "grad_norm": 3.3374645049815874, "learning_rate": 6.742493123772893e-06, "loss": 0.6631, "step": 14963 }, { "epoch": 1.22, "grad_norm": 4.222941325257737, "learning_rate": 6.7420821638845844e-06, "loss": 0.5689, "step": 14964 }, { "epoch": 1.22, "grad_norm": 5.739845325321258, "learning_rate": 6.7416711906011275e-06, "loss": 0.4766, "step": 14965 }, { "epoch": 1.22, "grad_norm": 5.154994471723901, "learning_rate": 6.741260203925686e-06, "loss": 0.5471, "step": 14966 }, { "epoch": 1.22, "grad_norm": 3.290280583049368, "learning_rate": 6.740849203861416e-06, "loss": 0.5837, "step": 14967 }, { "epoch": 1.22, "grad_norm": 2.969123611814239, "learning_rate": 6.740438190411479e-06, "loss": 0.4667, "step": 14968 }, { "epoch": 1.22, "grad_norm": 4.030944546187075, "learning_rate": 6.7400271635790345e-06, "loss": 0.6172, "step": 14969 }, { "epoch": 1.22, "grad_norm": 5.2029443831569, "learning_rate": 6.739616123367246e-06, "loss": 0.5081, "step": 14970 }, { "epoch": 1.22, "grad_norm": 3.327385103504345, "learning_rate": 6.739205069779272e-06, "loss": 0.5785, "step": 14971 }, { "epoch": 1.22, "grad_norm": 15.547299826960266, "learning_rate": 6.738794002818273e-06, "loss": 0.6687, "step": 14972 }, { "epoch": 1.22, "grad_norm": 3.332514931811837, "learning_rate": 6.738382922487408e-06, "loss": 0.6012, "step": 14973 }, { "epoch": 1.22, "grad_norm": 2.4642680250707407, "learning_rate": 6.7379718287898425e-06, "loss": 0.5094, "step": 14974 }, { "epoch": 1.22, "grad_norm": 2.9411803759772917, "learning_rate": 6.737560721728733e-06, "loss": 0.6168, "step": 14975 }, { "epoch": 1.22, "grad_norm": 3.580392598301216, "learning_rate": 6.7371496013072435e-06, "loss": 0.4821, "step": 14976 }, { "epoch": 1.22, "grad_norm": 2.5370892243056793, "learning_rate": 6.736738467528532e-06, "loss": 0.5574, "step": 14977 }, { "epoch": 1.22, "grad_norm": 8.459756082518046, "learning_rate": 6.736327320395764e-06, "loss": 0.4706, "step": 14978 }, { "epoch": 1.22, "grad_norm": 3.121416643139075, "learning_rate": 6.735916159912098e-06, "loss": 0.551, "step": 14979 }, { "epoch": 1.22, "grad_norm": 3.2487348434294834, "learning_rate": 6.735504986080696e-06, "loss": 0.6276, "step": 14980 }, { "epoch": 1.22, "grad_norm": 3.8353263301090266, "learning_rate": 6.735093798904721e-06, "loss": 0.6296, "step": 14981 }, { "epoch": 1.22, "grad_norm": 4.80225052641842, "learning_rate": 6.734682598387331e-06, "loss": 0.6556, "step": 14982 }, { "epoch": 1.22, "grad_norm": 13.759825068630365, "learning_rate": 6.734271384531691e-06, "loss": 0.4623, "step": 14983 }, { "epoch": 1.22, "grad_norm": 2.566141694530004, "learning_rate": 6.733860157340963e-06, "loss": 0.5698, "step": 14984 }, { "epoch": 1.22, "grad_norm": 3.3679456388370324, "learning_rate": 6.733448916818308e-06, "loss": 0.5367, "step": 14985 }, { "epoch": 1.22, "grad_norm": 3.4333308282531214, "learning_rate": 6.733037662966886e-06, "loss": 0.7183, "step": 14986 }, { "epoch": 1.22, "grad_norm": 5.990579847629308, "learning_rate": 6.732626395789863e-06, "loss": 0.5603, "step": 14987 }, { "epoch": 1.22, "grad_norm": 3.4034368872687937, "learning_rate": 6.7322151152904006e-06, "loss": 0.6942, "step": 14988 }, { "epoch": 1.22, "grad_norm": 2.611527490067071, "learning_rate": 6.73180382147166e-06, "loss": 0.5661, "step": 14989 }, { "epoch": 1.22, "grad_norm": 3.0432731680594554, "learning_rate": 6.731392514336802e-06, "loss": 0.513, "step": 14990 }, { "epoch": 1.22, "grad_norm": 4.513425710544245, "learning_rate": 6.730981193888993e-06, "loss": 0.6597, "step": 14991 }, { "epoch": 1.22, "grad_norm": 9.931240117034832, "learning_rate": 6.7305698601313925e-06, "loss": 0.5815, "step": 14992 }, { "epoch": 1.22, "grad_norm": 3.688825611488757, "learning_rate": 6.7301585130671665e-06, "loss": 0.6309, "step": 14993 }, { "epoch": 1.22, "grad_norm": 2.402023152973241, "learning_rate": 6.729747152699474e-06, "loss": 0.4699, "step": 14994 }, { "epoch": 1.22, "grad_norm": 5.095778514060208, "learning_rate": 6.729335779031482e-06, "loss": 0.5455, "step": 14995 }, { "epoch": 1.22, "grad_norm": 3.062877155310399, "learning_rate": 6.728924392066352e-06, "loss": 0.5898, "step": 14996 }, { "epoch": 1.22, "grad_norm": 5.524628374013388, "learning_rate": 6.7285129918072455e-06, "loss": 0.7479, "step": 14997 }, { "epoch": 1.22, "grad_norm": 3.702879940472238, "learning_rate": 6.7281015782573265e-06, "loss": 0.6725, "step": 14998 }, { "epoch": 1.22, "grad_norm": 2.812282553187698, "learning_rate": 6.727690151419761e-06, "loss": 0.6656, "step": 14999 }, { "epoch": 1.22, "grad_norm": 2.465124049605808, "learning_rate": 6.72727871129771e-06, "loss": 0.4037, "step": 15000 } ], "logging_steps": 1.0, "max_steps": 36936, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "total_flos": 1.2325614325636006e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }