{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999322171761675, "eval_steps": 500, "global_step": 7376, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013556564766488172, "grad_norm": 31.358448898252252, "learning_rate": 9.009009009009009e-09, "loss": 2.3424, "step": 1 }, { "epoch": 0.00027113129532976344, "grad_norm": 25.775006525003928, "learning_rate": 1.8018018018018017e-08, "loss": 2.3467, "step": 2 }, { "epoch": 0.00040669694299464516, "grad_norm": 30.07326378556178, "learning_rate": 2.7027027027027028e-08, "loss": 2.3266, "step": 3 }, { "epoch": 0.0005422625906595269, "grad_norm": 37.27390831757232, "learning_rate": 3.6036036036036035e-08, "loss": 2.3276, "step": 4 }, { "epoch": 0.0006778282383244086, "grad_norm": 31.57991475426305, "learning_rate": 4.504504504504504e-08, "loss": 2.349, "step": 5 }, { "epoch": 0.0008133938859892903, "grad_norm": 31.689090757240738, "learning_rate": 5.4054054054054056e-08, "loss": 2.3798, "step": 6 }, { "epoch": 0.000948959533654172, "grad_norm": 26.35424937817498, "learning_rate": 6.306306306306305e-08, "loss": 2.3184, "step": 7 }, { "epoch": 0.0010845251813190538, "grad_norm": 37.2147460317946, "learning_rate": 7.207207207207207e-08, "loss": 2.331, "step": 8 }, { "epoch": 0.0012200908289839354, "grad_norm": 32.035119213596005, "learning_rate": 8.108108108108108e-08, "loss": 2.3527, "step": 9 }, { "epoch": 0.0013556564766488172, "grad_norm": 26.034279709482913, "learning_rate": 9.009009009009008e-08, "loss": 2.3257, "step": 10 }, { "epoch": 0.0014912221243136988, "grad_norm": 27.48123460639181, "learning_rate": 9.909909909909909e-08, "loss": 2.3145, "step": 11 }, { "epoch": 0.0016267877719785807, "grad_norm": 29.82960986253434, "learning_rate": 1.0810810810810811e-07, "loss": 2.3909, "step": 12 }, { "epoch": 0.0017623534196434623, "grad_norm": 24.517638496135522, "learning_rate": 1.171171171171171e-07, "loss": 2.3031, "step": 13 }, { "epoch": 0.001897919067308344, "grad_norm": 28.654472776828367, "learning_rate": 1.261261261261261e-07, "loss": 2.3397, "step": 14 }, { "epoch": 0.002033484714973226, "grad_norm": 36.503658082418745, "learning_rate": 1.3513513513513515e-07, "loss": 2.3815, "step": 15 }, { "epoch": 0.0021690503626381075, "grad_norm": 33.54224160514921, "learning_rate": 1.4414414414414414e-07, "loss": 2.3409, "step": 16 }, { "epoch": 0.002304616010302989, "grad_norm": 29.038467337622713, "learning_rate": 1.5315315315315313e-07, "loss": 2.3402, "step": 17 }, { "epoch": 0.0024401816579678708, "grad_norm": 35.06295523861638, "learning_rate": 1.6216216216216215e-07, "loss": 2.3434, "step": 18 }, { "epoch": 0.002575747305632753, "grad_norm": 26.619974976960805, "learning_rate": 1.7117117117117117e-07, "loss": 2.3323, "step": 19 }, { "epoch": 0.0027113129532976344, "grad_norm": 30.835139509103342, "learning_rate": 1.8018018018018017e-07, "loss": 2.3237, "step": 20 }, { "epoch": 0.002846878600962516, "grad_norm": 26.087486320225505, "learning_rate": 1.891891891891892e-07, "loss": 2.3056, "step": 21 }, { "epoch": 0.0029824442486273976, "grad_norm": 34.98074876879901, "learning_rate": 1.9819819819819818e-07, "loss": 2.2896, "step": 22 }, { "epoch": 0.0031180098962922797, "grad_norm": 26.271198028653888, "learning_rate": 2.072072072072072e-07, "loss": 2.3033, "step": 23 }, { "epoch": 0.0032535755439571613, "grad_norm": 26.45688027370123, "learning_rate": 2.1621621621621622e-07, "loss": 2.3033, "step": 24 }, { "epoch": 0.003389141191622043, "grad_norm": 26.572076855931503, "learning_rate": 2.2522522522522522e-07, "loss": 2.3254, "step": 25 }, { "epoch": 0.0035247068392869245, "grad_norm": 29.300410682505227, "learning_rate": 2.342342342342342e-07, "loss": 2.3478, "step": 26 }, { "epoch": 0.0036602724869518066, "grad_norm": 29.317067939627105, "learning_rate": 2.4324324324324326e-07, "loss": 2.3154, "step": 27 }, { "epoch": 0.003795838134616688, "grad_norm": 23.23553553851753, "learning_rate": 2.522522522522522e-07, "loss": 2.291, "step": 28 }, { "epoch": 0.00393140378228157, "grad_norm": 35.08260017693689, "learning_rate": 2.6126126126126124e-07, "loss": 2.3327, "step": 29 }, { "epoch": 0.004066969429946452, "grad_norm": 29.227020985991448, "learning_rate": 2.702702702702703e-07, "loss": 2.305, "step": 30 }, { "epoch": 0.0042025350776113335, "grad_norm": 25.292368008655497, "learning_rate": 2.7927927927927923e-07, "loss": 2.2306, "step": 31 }, { "epoch": 0.004338100725276215, "grad_norm": 26.758203248424646, "learning_rate": 2.882882882882883e-07, "loss": 2.2416, "step": 32 }, { "epoch": 0.004473666372941097, "grad_norm": 22.220460717665034, "learning_rate": 2.972972972972973e-07, "loss": 2.2007, "step": 33 }, { "epoch": 0.004609232020605978, "grad_norm": 23.865012067050447, "learning_rate": 3.0630630630630627e-07, "loss": 2.2194, "step": 34 }, { "epoch": 0.00474479766827086, "grad_norm": 23.469039120945865, "learning_rate": 3.153153153153153e-07, "loss": 2.1846, "step": 35 }, { "epoch": 0.0048803633159357415, "grad_norm": 21.75748883028805, "learning_rate": 3.243243243243243e-07, "loss": 2.2136, "step": 36 }, { "epoch": 0.005015928963600624, "grad_norm": 23.616094425775092, "learning_rate": 3.333333333333333e-07, "loss": 2.2182, "step": 37 }, { "epoch": 0.005151494611265506, "grad_norm": 22.60700328970862, "learning_rate": 3.4234234234234235e-07, "loss": 2.2159, "step": 38 }, { "epoch": 0.005287060258930387, "grad_norm": 22.97349293644944, "learning_rate": 3.5135135135135134e-07, "loss": 2.2447, "step": 39 }, { "epoch": 0.005422625906595269, "grad_norm": 23.231398059525194, "learning_rate": 3.6036036036036033e-07, "loss": 2.1748, "step": 40 }, { "epoch": 0.0055581915542601504, "grad_norm": 22.887826333812118, "learning_rate": 3.6936936936936933e-07, "loss": 2.2422, "step": 41 }, { "epoch": 0.005693757201925032, "grad_norm": 21.833607015768074, "learning_rate": 3.783783783783784e-07, "loss": 2.2142, "step": 42 }, { "epoch": 0.005829322849589914, "grad_norm": 22.715899837023056, "learning_rate": 3.8738738738738737e-07, "loss": 2.0821, "step": 43 }, { "epoch": 0.005964888497254795, "grad_norm": 15.851575851862286, "learning_rate": 3.9639639639639636e-07, "loss": 2.0068, "step": 44 }, { "epoch": 0.006100454144919678, "grad_norm": 15.179676731593098, "learning_rate": 4.054054054054054e-07, "loss": 2.0041, "step": 45 }, { "epoch": 0.006236019792584559, "grad_norm": 15.963612054275739, "learning_rate": 4.144144144144144e-07, "loss": 1.9291, "step": 46 }, { "epoch": 0.006371585440249441, "grad_norm": 15.908413840122002, "learning_rate": 4.234234234234234e-07, "loss": 1.9598, "step": 47 }, { "epoch": 0.006507151087914323, "grad_norm": 18.666900677279312, "learning_rate": 4.3243243243243244e-07, "loss": 1.967, "step": 48 }, { "epoch": 0.006642716735579204, "grad_norm": 16.17490189029915, "learning_rate": 4.414414414414414e-07, "loss": 1.9584, "step": 49 }, { "epoch": 0.006778282383244086, "grad_norm": 42.23957036520458, "learning_rate": 4.5045045045045043e-07, "loss": 1.9864, "step": 50 }, { "epoch": 0.0069138480309089674, "grad_norm": 14.341654043585185, "learning_rate": 4.594594594594595e-07, "loss": 1.9477, "step": 51 }, { "epoch": 0.007049413678573849, "grad_norm": 28.534641296668752, "learning_rate": 4.684684684684684e-07, "loss": 1.9526, "step": 52 }, { "epoch": 0.0071849793262387315, "grad_norm": 16.34018231338953, "learning_rate": 4.774774774774775e-07, "loss": 1.9416, "step": 53 }, { "epoch": 0.007320544973903613, "grad_norm": 18.49781101272322, "learning_rate": 4.864864864864865e-07, "loss": 1.9471, "step": 54 }, { "epoch": 0.007456110621568495, "grad_norm": 15.020996088134007, "learning_rate": 4.954954954954955e-07, "loss": 1.9274, "step": 55 }, { "epoch": 0.007591676269233376, "grad_norm": 17.288120347440433, "learning_rate": 5.045045045045044e-07, "loss": 1.9129, "step": 56 }, { "epoch": 0.007727241916898258, "grad_norm": 16.02025520711598, "learning_rate": 5.135135135135134e-07, "loss": 1.8766, "step": 57 }, { "epoch": 0.00786280756456314, "grad_norm": 15.441426537839197, "learning_rate": 5.225225225225225e-07, "loss": 1.8434, "step": 58 }, { "epoch": 0.007998373212228021, "grad_norm": 16.085337854762805, "learning_rate": 5.315315315315315e-07, "loss": 1.8249, "step": 59 }, { "epoch": 0.008133938859892904, "grad_norm": 14.283007325395044, "learning_rate": 5.405405405405406e-07, "loss": 1.7848, "step": 60 }, { "epoch": 0.008269504507557784, "grad_norm": 11.14383868155693, "learning_rate": 5.495495495495495e-07, "loss": 1.7474, "step": 61 }, { "epoch": 0.008405070155222667, "grad_norm": 10.35253730422045, "learning_rate": 5.585585585585585e-07, "loss": 1.7148, "step": 62 }, { "epoch": 0.008540635802887548, "grad_norm": 15.381787971723359, "learning_rate": 5.675675675675675e-07, "loss": 1.6885, "step": 63 }, { "epoch": 0.00867620145055243, "grad_norm": 8.711031517613073, "learning_rate": 5.765765765765766e-07, "loss": 1.6956, "step": 64 }, { "epoch": 0.008811767098217311, "grad_norm": 9.892536617710872, "learning_rate": 5.855855855855856e-07, "loss": 1.6849, "step": 65 }, { "epoch": 0.008947332745882193, "grad_norm": 14.46373746282737, "learning_rate": 5.945945945945947e-07, "loss": 1.6409, "step": 66 }, { "epoch": 0.009082898393547076, "grad_norm": 13.644799758720835, "learning_rate": 6.036036036036036e-07, "loss": 1.6519, "step": 67 }, { "epoch": 0.009218464041211957, "grad_norm": 8.574473557412077, "learning_rate": 6.126126126126125e-07, "loss": 1.6319, "step": 68 }, { "epoch": 0.009354029688876839, "grad_norm": 8.62068371212319, "learning_rate": 6.216216216216216e-07, "loss": 1.6572, "step": 69 }, { "epoch": 0.00948959533654172, "grad_norm": 9.249525051290387, "learning_rate": 6.306306306306306e-07, "loss": 1.5694, "step": 70 }, { "epoch": 0.009625160984206602, "grad_norm": 11.129340899643314, "learning_rate": 6.396396396396397e-07, "loss": 1.6606, "step": 71 }, { "epoch": 0.009760726631871483, "grad_norm": 8.239171033943611, "learning_rate": 6.486486486486486e-07, "loss": 1.6092, "step": 72 }, { "epoch": 0.009896292279536366, "grad_norm": 6.7936087333796245, "learning_rate": 6.576576576576577e-07, "loss": 1.6354, "step": 73 }, { "epoch": 0.010031857927201248, "grad_norm": 9.103888518285245, "learning_rate": 6.666666666666666e-07, "loss": 1.6054, "step": 74 }, { "epoch": 0.010167423574866129, "grad_norm": 7.84377617969639, "learning_rate": 6.756756756756756e-07, "loss": 1.5994, "step": 75 }, { "epoch": 0.010302989222531011, "grad_norm": 7.495397318442781, "learning_rate": 6.846846846846847e-07, "loss": 1.6029, "step": 76 }, { "epoch": 0.010438554870195892, "grad_norm": 14.952549309689383, "learning_rate": 6.936936936936936e-07, "loss": 1.5678, "step": 77 }, { "epoch": 0.010574120517860774, "grad_norm": 10.00435804058015, "learning_rate": 7.027027027027027e-07, "loss": 1.589, "step": 78 }, { "epoch": 0.010709686165525655, "grad_norm": 9.72167612060451, "learning_rate": 7.117117117117116e-07, "loss": 1.5921, "step": 79 }, { "epoch": 0.010845251813190538, "grad_norm": 8.845192410958315, "learning_rate": 7.207207207207207e-07, "loss": 1.5974, "step": 80 }, { "epoch": 0.010980817460855418, "grad_norm": 6.750793419744415, "learning_rate": 7.297297297297297e-07, "loss": 1.5912, "step": 81 }, { "epoch": 0.011116383108520301, "grad_norm": 8.7828205836003, "learning_rate": 7.387387387387387e-07, "loss": 1.5691, "step": 82 }, { "epoch": 0.011251948756185183, "grad_norm": 9.101698791378114, "learning_rate": 7.477477477477477e-07, "loss": 1.5724, "step": 83 }, { "epoch": 0.011387514403850064, "grad_norm": 6.715403533592601, "learning_rate": 7.567567567567568e-07, "loss": 1.5815, "step": 84 }, { "epoch": 0.011523080051514947, "grad_norm": 5.581981213701177, "learning_rate": 7.657657657657657e-07, "loss": 1.5682, "step": 85 }, { "epoch": 0.011658645699179827, "grad_norm": 7.138772717159439, "learning_rate": 7.747747747747747e-07, "loss": 1.5265, "step": 86 }, { "epoch": 0.01179421134684471, "grad_norm": 4.870305941173202, "learning_rate": 7.837837837837838e-07, "loss": 1.5419, "step": 87 }, { "epoch": 0.01192977699450959, "grad_norm": 6.521717423058534, "learning_rate": 7.927927927927927e-07, "loss": 1.5455, "step": 88 }, { "epoch": 0.012065342642174473, "grad_norm": 5.769843758005414, "learning_rate": 8.018018018018018e-07, "loss": 1.4849, "step": 89 }, { "epoch": 0.012200908289839356, "grad_norm": 7.178121071594865, "learning_rate": 8.108108108108108e-07, "loss": 1.5256, "step": 90 }, { "epoch": 0.012336473937504236, "grad_norm": 46.0309554596922, "learning_rate": 8.198198198198198e-07, "loss": 1.5773, "step": 91 }, { "epoch": 0.012472039585169119, "grad_norm": 6.9575695671884805, "learning_rate": 8.288288288288288e-07, "loss": 1.5231, "step": 92 }, { "epoch": 0.012607605232834, "grad_norm": 8.121817047269476, "learning_rate": 8.378378378378377e-07, "loss": 1.5372, "step": 93 }, { "epoch": 0.012743170880498882, "grad_norm": 5.34618841291262, "learning_rate": 8.468468468468468e-07, "loss": 1.5557, "step": 94 }, { "epoch": 0.012878736528163763, "grad_norm": 8.247242648543923, "learning_rate": 8.558558558558558e-07, "loss": 1.5228, "step": 95 }, { "epoch": 0.013014302175828645, "grad_norm": 6.686865835736031, "learning_rate": 8.648648648648649e-07, "loss": 1.5278, "step": 96 }, { "epoch": 0.013149867823493526, "grad_norm": 6.144676122801508, "learning_rate": 8.738738738738738e-07, "loss": 1.5329, "step": 97 }, { "epoch": 0.013285433471158408, "grad_norm": 4.850889659663519, "learning_rate": 8.828828828828828e-07, "loss": 1.497, "step": 98 }, { "epoch": 0.013420999118823291, "grad_norm": 9.777707206537467, "learning_rate": 8.918918918918918e-07, "loss": 1.4888, "step": 99 }, { "epoch": 0.013556564766488172, "grad_norm": 8.856528151046826, "learning_rate": 9.009009009009009e-07, "loss": 1.5183, "step": 100 }, { "epoch": 0.013692130414153054, "grad_norm": 6.375427945573184, "learning_rate": 9.099099099099099e-07, "loss": 1.5194, "step": 101 }, { "epoch": 0.013827696061817935, "grad_norm": 9.90023282731355, "learning_rate": 9.18918918918919e-07, "loss": 1.4986, "step": 102 }, { "epoch": 0.013963261709482817, "grad_norm": 9.385073643098055, "learning_rate": 9.279279279279278e-07, "loss": 1.5175, "step": 103 }, { "epoch": 0.014098827357147698, "grad_norm": 9.039037084053, "learning_rate": 9.369369369369368e-07, "loss": 1.5165, "step": 104 }, { "epoch": 0.01423439300481258, "grad_norm": 9.177366597039015, "learning_rate": 9.459459459459459e-07, "loss": 1.5245, "step": 105 }, { "epoch": 0.014369958652477463, "grad_norm": 7.69384003568614, "learning_rate": 9.54954954954955e-07, "loss": 1.4966, "step": 106 }, { "epoch": 0.014505524300142344, "grad_norm": 6.7167918884019056, "learning_rate": 9.63963963963964e-07, "loss": 1.456, "step": 107 }, { "epoch": 0.014641089947807226, "grad_norm": 9.15666483069009, "learning_rate": 9.72972972972973e-07, "loss": 1.5246, "step": 108 }, { "epoch": 0.014776655595472107, "grad_norm": 5.796715920958907, "learning_rate": 9.819819819819819e-07, "loss": 1.4733, "step": 109 }, { "epoch": 0.01491222124313699, "grad_norm": 9.073326918846115, "learning_rate": 9.90990990990991e-07, "loss": 1.4639, "step": 110 }, { "epoch": 0.01504778689080187, "grad_norm": 6.496935406804497, "learning_rate": 1e-06, "loss": 1.5298, "step": 111 }, { "epoch": 0.015183352538466753, "grad_norm": 13.739345899895717, "learning_rate": 1.0090090090090088e-06, "loss": 1.4757, "step": 112 }, { "epoch": 0.015318918186131633, "grad_norm": 5.293408505017478, "learning_rate": 1.018018018018018e-06, "loss": 1.5113, "step": 113 }, { "epoch": 0.015454483833796516, "grad_norm": 6.3967513328690515, "learning_rate": 1.0270270270270269e-06, "loss": 1.512, "step": 114 }, { "epoch": 0.015590049481461398, "grad_norm": 5.083105646846086, "learning_rate": 1.0360360360360361e-06, "loss": 1.5176, "step": 115 }, { "epoch": 0.01572561512912628, "grad_norm": 16.434112550762993, "learning_rate": 1.045045045045045e-06, "loss": 1.5081, "step": 116 }, { "epoch": 0.01586118077679116, "grad_norm": 5.375925845011797, "learning_rate": 1.0540540540540538e-06, "loss": 1.5325, "step": 117 }, { "epoch": 0.015996746424456042, "grad_norm": 7.085320737240935, "learning_rate": 1.063063063063063e-06, "loss": 1.4432, "step": 118 }, { "epoch": 0.016132312072120923, "grad_norm": 7.716010144701415, "learning_rate": 1.072072072072072e-06, "loss": 1.532, "step": 119 }, { "epoch": 0.016267877719785807, "grad_norm": 5.097401764988239, "learning_rate": 1.0810810810810812e-06, "loss": 1.4842, "step": 120 }, { "epoch": 0.016403443367450688, "grad_norm": 5.609195650374577, "learning_rate": 1.09009009009009e-06, "loss": 1.4775, "step": 121 }, { "epoch": 0.01653900901511557, "grad_norm": 8.409386846307449, "learning_rate": 1.099099099099099e-06, "loss": 1.4968, "step": 122 }, { "epoch": 0.016674574662780453, "grad_norm": 5.859812201180504, "learning_rate": 1.108108108108108e-06, "loss": 1.4723, "step": 123 }, { "epoch": 0.016810140310445334, "grad_norm": 6.483637255952086, "learning_rate": 1.117117117117117e-06, "loss": 1.4931, "step": 124 }, { "epoch": 0.016945705958110215, "grad_norm": 10.078184077791981, "learning_rate": 1.1261261261261262e-06, "loss": 1.4825, "step": 125 }, { "epoch": 0.017081271605775095, "grad_norm": 5.626040582137921, "learning_rate": 1.135135135135135e-06, "loss": 1.4785, "step": 126 }, { "epoch": 0.01721683725343998, "grad_norm": 6.630721403139526, "learning_rate": 1.1441441441441443e-06, "loss": 1.4877, "step": 127 }, { "epoch": 0.01735240290110486, "grad_norm": 6.527311714536328, "learning_rate": 1.1531531531531531e-06, "loss": 1.4651, "step": 128 }, { "epoch": 0.01748796854876974, "grad_norm": 11.885281538508448, "learning_rate": 1.162162162162162e-06, "loss": 1.4705, "step": 129 }, { "epoch": 0.017623534196434622, "grad_norm": 5.549073700399762, "learning_rate": 1.1711711711711712e-06, "loss": 1.4492, "step": 130 }, { "epoch": 0.017759099844099506, "grad_norm": 8.813578700458427, "learning_rate": 1.18018018018018e-06, "loss": 1.4654, "step": 131 }, { "epoch": 0.017894665491764387, "grad_norm": 5.2267262532613925, "learning_rate": 1.1891891891891893e-06, "loss": 1.5264, "step": 132 }, { "epoch": 0.018030231139429267, "grad_norm": 8.281098775403043, "learning_rate": 1.1981981981981981e-06, "loss": 1.4584, "step": 133 }, { "epoch": 0.01816579678709415, "grad_norm": 5.634834280170394, "learning_rate": 1.2072072072072072e-06, "loss": 1.4598, "step": 134 }, { "epoch": 0.018301362434759032, "grad_norm": 12.697852601679982, "learning_rate": 1.2162162162162162e-06, "loss": 1.4953, "step": 135 }, { "epoch": 0.018436928082423913, "grad_norm": 5.5642290794558456, "learning_rate": 1.225225225225225e-06, "loss": 1.4875, "step": 136 }, { "epoch": 0.018572493730088794, "grad_norm": 5.943347804220723, "learning_rate": 1.2342342342342343e-06, "loss": 1.4799, "step": 137 }, { "epoch": 0.018708059377753678, "grad_norm": 7.48171278290922, "learning_rate": 1.2432432432432432e-06, "loss": 1.4796, "step": 138 }, { "epoch": 0.01884362502541856, "grad_norm": 4.346055213760585, "learning_rate": 1.2522522522522522e-06, "loss": 1.4517, "step": 139 }, { "epoch": 0.01897919067308344, "grad_norm": 5.438503689485981, "learning_rate": 1.2612612612612613e-06, "loss": 1.4751, "step": 140 }, { "epoch": 0.019114756320748324, "grad_norm": 6.91960743498674, "learning_rate": 1.27027027027027e-06, "loss": 1.4449, "step": 141 }, { "epoch": 0.019250321968413205, "grad_norm": 5.1459315483702746, "learning_rate": 1.2792792792792793e-06, "loss": 1.4541, "step": 142 }, { "epoch": 0.019385887616078085, "grad_norm": 6.685422444369274, "learning_rate": 1.2882882882882882e-06, "loss": 1.4813, "step": 143 }, { "epoch": 0.019521453263742966, "grad_norm": 7.824568867218183, "learning_rate": 1.2972972972972972e-06, "loss": 1.4887, "step": 144 }, { "epoch": 0.01965701891140785, "grad_norm": 5.964151928137742, "learning_rate": 1.3063063063063063e-06, "loss": 1.4656, "step": 145 }, { "epoch": 0.01979258455907273, "grad_norm": 6.5647795739261285, "learning_rate": 1.3153153153153153e-06, "loss": 1.4598, "step": 146 }, { "epoch": 0.019928150206737612, "grad_norm": 22.169352087458673, "learning_rate": 1.3243243243243244e-06, "loss": 1.4795, "step": 147 }, { "epoch": 0.020063715854402496, "grad_norm": 6.572148913413069, "learning_rate": 1.3333333333333332e-06, "loss": 1.4448, "step": 148 }, { "epoch": 0.020199281502067377, "grad_norm": 8.145806526914168, "learning_rate": 1.3423423423423422e-06, "loss": 1.4676, "step": 149 }, { "epoch": 0.020334847149732257, "grad_norm": 8.143039273021522, "learning_rate": 1.3513513513513513e-06, "loss": 1.4508, "step": 150 }, { "epoch": 0.020470412797397138, "grad_norm": 7.338801967878059, "learning_rate": 1.3603603603603603e-06, "loss": 1.4852, "step": 151 }, { "epoch": 0.020605978445062022, "grad_norm": 5.322061551173442, "learning_rate": 1.3693693693693694e-06, "loss": 1.4485, "step": 152 }, { "epoch": 0.020741544092726903, "grad_norm": 6.246409377411929, "learning_rate": 1.3783783783783782e-06, "loss": 1.4263, "step": 153 }, { "epoch": 0.020877109740391784, "grad_norm": 7.595245418558238, "learning_rate": 1.3873873873873873e-06, "loss": 1.4329, "step": 154 }, { "epoch": 0.021012675388056668, "grad_norm": 3.7874290647627338, "learning_rate": 1.3963963963963963e-06, "loss": 1.4498, "step": 155 }, { "epoch": 0.02114824103572155, "grad_norm": 12.83085628941315, "learning_rate": 1.4054054054054054e-06, "loss": 1.4479, "step": 156 }, { "epoch": 0.02128380668338643, "grad_norm": 6.3375445241242545, "learning_rate": 1.4144144144144144e-06, "loss": 1.4295, "step": 157 }, { "epoch": 0.02141937233105131, "grad_norm": 9.631414236531649, "learning_rate": 1.4234234234234232e-06, "loss": 1.4577, "step": 158 }, { "epoch": 0.021554937978716195, "grad_norm": 5.928978091529006, "learning_rate": 1.4324324324324323e-06, "loss": 1.485, "step": 159 }, { "epoch": 0.021690503626381075, "grad_norm": 11.312392672971228, "learning_rate": 1.4414414414414413e-06, "loss": 1.4322, "step": 160 }, { "epoch": 0.021826069274045956, "grad_norm": 6.062307712143609, "learning_rate": 1.4504504504504504e-06, "loss": 1.4238, "step": 161 }, { "epoch": 0.021961634921710837, "grad_norm": 6.886703665802914, "learning_rate": 1.4594594594594594e-06, "loss": 1.4552, "step": 162 }, { "epoch": 0.02209720056937572, "grad_norm": 18.756921678215658, "learning_rate": 1.4684684684684685e-06, "loss": 1.4264, "step": 163 }, { "epoch": 0.022232766217040602, "grad_norm": 6.419853503215192, "learning_rate": 1.4774774774774773e-06, "loss": 1.4269, "step": 164 }, { "epoch": 0.022368331864705483, "grad_norm": 6.63003442370533, "learning_rate": 1.4864864864864864e-06, "loss": 1.4319, "step": 165 }, { "epoch": 0.022503897512370367, "grad_norm": 4.790633009608216, "learning_rate": 1.4954954954954954e-06, "loss": 1.4399, "step": 166 }, { "epoch": 0.022639463160035248, "grad_norm": 7.022411485444659, "learning_rate": 1.5045045045045045e-06, "loss": 1.4497, "step": 167 }, { "epoch": 0.022775028807700128, "grad_norm": 7.3799040903640325, "learning_rate": 1.5135135135135135e-06, "loss": 1.4705, "step": 168 }, { "epoch": 0.02291059445536501, "grad_norm": 8.389158463696175, "learning_rate": 1.5225225225225225e-06, "loss": 1.4381, "step": 169 }, { "epoch": 0.023046160103029893, "grad_norm": 7.599052864231493, "learning_rate": 1.5315315315315314e-06, "loss": 1.4643, "step": 170 }, { "epoch": 0.023181725750694774, "grad_norm": 4.036784129987057, "learning_rate": 1.5405405405405404e-06, "loss": 1.4281, "step": 171 }, { "epoch": 0.023317291398359655, "grad_norm": 6.033869229904262, "learning_rate": 1.5495495495495495e-06, "loss": 1.4552, "step": 172 }, { "epoch": 0.02345285704602454, "grad_norm": 12.034372346120493, "learning_rate": 1.5585585585585585e-06, "loss": 1.4106, "step": 173 }, { "epoch": 0.02358842269368942, "grad_norm": 4.853857193138236, "learning_rate": 1.5675675675675676e-06, "loss": 1.4167, "step": 174 }, { "epoch": 0.0237239883413543, "grad_norm": 24.39558704305203, "learning_rate": 1.5765765765765766e-06, "loss": 1.42, "step": 175 }, { "epoch": 0.02385955398901918, "grad_norm": 7.7669320710288865, "learning_rate": 1.5855855855855855e-06, "loss": 1.427, "step": 176 }, { "epoch": 0.023995119636684065, "grad_norm": 7.132125652303494, "learning_rate": 1.5945945945945945e-06, "loss": 1.4117, "step": 177 }, { "epoch": 0.024130685284348946, "grad_norm": 6.285410254791736, "learning_rate": 1.6036036036036035e-06, "loss": 1.4404, "step": 178 }, { "epoch": 0.024266250932013827, "grad_norm": 5.001219676903, "learning_rate": 1.6126126126126126e-06, "loss": 1.4229, "step": 179 }, { "epoch": 0.02440181657967871, "grad_norm": 9.853572273669368, "learning_rate": 1.6216216216216216e-06, "loss": 1.4192, "step": 180 }, { "epoch": 0.024537382227343592, "grad_norm": 4.9869076421306096, "learning_rate": 1.6306306306306305e-06, "loss": 1.4533, "step": 181 }, { "epoch": 0.024672947875008473, "grad_norm": 5.275160156236582, "learning_rate": 1.6396396396396395e-06, "loss": 1.4453, "step": 182 }, { "epoch": 0.024808513522673353, "grad_norm": 6.542097233101092, "learning_rate": 1.6486486486486486e-06, "loss": 1.4718, "step": 183 }, { "epoch": 0.024944079170338238, "grad_norm": 7.892057245328271, "learning_rate": 1.6576576576576576e-06, "loss": 1.4243, "step": 184 }, { "epoch": 0.025079644818003118, "grad_norm": 4.745495203866797, "learning_rate": 1.6666666666666667e-06, "loss": 1.4242, "step": 185 }, { "epoch": 0.025215210465668, "grad_norm": 6.230171760493501, "learning_rate": 1.6756756756756755e-06, "loss": 1.4036, "step": 186 }, { "epoch": 0.02535077611333288, "grad_norm": 6.540885291783418, "learning_rate": 1.6846846846846845e-06, "loss": 1.4271, "step": 187 }, { "epoch": 0.025486341760997764, "grad_norm": 9.047320335321341, "learning_rate": 1.6936936936936936e-06, "loss": 1.4086, "step": 188 }, { "epoch": 0.025621907408662645, "grad_norm": 4.030680073556825, "learning_rate": 1.7027027027027026e-06, "loss": 1.4443, "step": 189 }, { "epoch": 0.025757473056327525, "grad_norm": 4.461952627126357, "learning_rate": 1.7117117117117117e-06, "loss": 1.4018, "step": 190 }, { "epoch": 0.02589303870399241, "grad_norm": 5.226477519229153, "learning_rate": 1.7207207207207205e-06, "loss": 1.3899, "step": 191 }, { "epoch": 0.02602860435165729, "grad_norm": 6.062274117558592, "learning_rate": 1.7297297297297298e-06, "loss": 1.4635, "step": 192 }, { "epoch": 0.02616416999932217, "grad_norm": 6.351022014185166, "learning_rate": 1.7387387387387386e-06, "loss": 1.465, "step": 193 }, { "epoch": 0.026299735646987052, "grad_norm": 5.848254868638427, "learning_rate": 1.7477477477477477e-06, "loss": 1.4415, "step": 194 }, { "epoch": 0.026435301294651936, "grad_norm": 5.699616751636566, "learning_rate": 1.7567567567567567e-06, "loss": 1.4179, "step": 195 }, { "epoch": 0.026570866942316817, "grad_norm": 7.08925940925803, "learning_rate": 1.7657657657657655e-06, "loss": 1.3992, "step": 196 }, { "epoch": 0.026706432589981698, "grad_norm": 6.367476996128057, "learning_rate": 1.7747747747747748e-06, "loss": 1.3865, "step": 197 }, { "epoch": 0.026841998237646582, "grad_norm": 5.090174327271608, "learning_rate": 1.7837837837837836e-06, "loss": 1.3974, "step": 198 }, { "epoch": 0.026977563885311463, "grad_norm": 5.604637277329292, "learning_rate": 1.7927927927927927e-06, "loss": 1.3828, "step": 199 }, { "epoch": 0.027113129532976343, "grad_norm": 5.590273038383943, "learning_rate": 1.8018018018018017e-06, "loss": 1.3901, "step": 200 }, { "epoch": 0.027248695180641224, "grad_norm": 20.486806350431323, "learning_rate": 1.8108108108108106e-06, "loss": 1.4133, "step": 201 }, { "epoch": 0.02738426082830611, "grad_norm": 7.887162856768152, "learning_rate": 1.8198198198198198e-06, "loss": 1.4228, "step": 202 }, { "epoch": 0.02751982647597099, "grad_norm": 6.969432231534598, "learning_rate": 1.8288288288288287e-06, "loss": 1.4142, "step": 203 }, { "epoch": 0.02765539212363587, "grad_norm": 11.093202099441841, "learning_rate": 1.837837837837838e-06, "loss": 1.4532, "step": 204 }, { "epoch": 0.027790957771300754, "grad_norm": 6.049844779886499, "learning_rate": 1.8468468468468467e-06, "loss": 1.4023, "step": 205 }, { "epoch": 0.027926523418965635, "grad_norm": 33.423141143254455, "learning_rate": 1.8558558558558556e-06, "loss": 1.4036, "step": 206 }, { "epoch": 0.028062089066630515, "grad_norm": 9.03008982263621, "learning_rate": 1.8648648648648648e-06, "loss": 1.4074, "step": 207 }, { "epoch": 0.028197654714295396, "grad_norm": 4.505416162913146, "learning_rate": 1.8738738738738737e-06, "loss": 1.3883, "step": 208 }, { "epoch": 0.02833322036196028, "grad_norm": 7.8180078963774955, "learning_rate": 1.882882882882883e-06, "loss": 1.4177, "step": 209 }, { "epoch": 0.02846878600962516, "grad_norm": 14.99544444579815, "learning_rate": 1.8918918918918918e-06, "loss": 1.4267, "step": 210 }, { "epoch": 0.028604351657290042, "grad_norm": 6.201256272976691, "learning_rate": 1.9009009009009008e-06, "loss": 1.4104, "step": 211 }, { "epoch": 0.028739917304954926, "grad_norm": 7.333202439357275, "learning_rate": 1.90990990990991e-06, "loss": 1.4018, "step": 212 }, { "epoch": 0.028875482952619807, "grad_norm": 4.657106962824464, "learning_rate": 1.9189189189189187e-06, "loss": 1.3898, "step": 213 }, { "epoch": 0.029011048600284688, "grad_norm": 11.652945365141015, "learning_rate": 1.927927927927928e-06, "loss": 1.4304, "step": 214 }, { "epoch": 0.02914661424794957, "grad_norm": 5.989721939444323, "learning_rate": 1.936936936936937e-06, "loss": 1.4202, "step": 215 }, { "epoch": 0.029282179895614453, "grad_norm": 8.437827277108651, "learning_rate": 1.945945945945946e-06, "loss": 1.4222, "step": 216 }, { "epoch": 0.029417745543279333, "grad_norm": 8.61266370034404, "learning_rate": 1.954954954954955e-06, "loss": 1.4194, "step": 217 }, { "epoch": 0.029553311190944214, "grad_norm": 6.314721095001688, "learning_rate": 1.9639639639639637e-06, "loss": 1.437, "step": 218 }, { "epoch": 0.029688876838609095, "grad_norm": 4.09324736960784, "learning_rate": 1.972972972972973e-06, "loss": 1.3963, "step": 219 }, { "epoch": 0.02982444248627398, "grad_norm": 6.651959759404366, "learning_rate": 1.981981981981982e-06, "loss": 1.3893, "step": 220 }, { "epoch": 0.02996000813393886, "grad_norm": 24.732346905719567, "learning_rate": 1.990990990990991e-06, "loss": 1.3393, "step": 221 }, { "epoch": 0.03009557378160374, "grad_norm": 7.9476624485502745, "learning_rate": 2e-06, "loss": 1.397, "step": 222 }, { "epoch": 0.030231139429268625, "grad_norm": 10.830106483416774, "learning_rate": 1.9999999035789467e-06, "loss": 1.4066, "step": 223 }, { "epoch": 0.030366705076933505, "grad_norm": 4.148145401600818, "learning_rate": 1.9999996143158056e-06, "loss": 1.3703, "step": 224 }, { "epoch": 0.030502270724598386, "grad_norm": 4.805619627002686, "learning_rate": 1.9999991322106323e-06, "loss": 1.4136, "step": 225 }, { "epoch": 0.030637836372263267, "grad_norm": 4.965485386238507, "learning_rate": 1.99999845726352e-06, "loss": 1.3893, "step": 226 }, { "epoch": 0.03077340201992815, "grad_norm": 4.0252481928845985, "learning_rate": 1.9999975894745984e-06, "loss": 1.4228, "step": 227 }, { "epoch": 0.030908967667593032, "grad_norm": 4.376432610645285, "learning_rate": 1.9999965288440357e-06, "loss": 1.3934, "step": 228 }, { "epoch": 0.031044533315257913, "grad_norm": 11.658791269631394, "learning_rate": 1.9999952753720353e-06, "loss": 1.3692, "step": 229 }, { "epoch": 0.031180098962922797, "grad_norm": 3.857767420844823, "learning_rate": 1.99999382905884e-06, "loss": 1.4063, "step": 230 }, { "epoch": 0.03131566461058768, "grad_norm": 8.91951446506079, "learning_rate": 1.9999921899047284e-06, "loss": 1.3974, "step": 231 }, { "epoch": 0.03145123025825256, "grad_norm": 7.675180714228612, "learning_rate": 1.999990357910016e-06, "loss": 1.3465, "step": 232 }, { "epoch": 0.03158679590591744, "grad_norm": 4.973550490157865, "learning_rate": 1.9999883330750567e-06, "loss": 1.3729, "step": 233 }, { "epoch": 0.03172236155358232, "grad_norm": 8.351562955473106, "learning_rate": 1.9999861154002405e-06, "loss": 1.442, "step": 234 }, { "epoch": 0.0318579272012472, "grad_norm": 6.80250704635314, "learning_rate": 1.9999837048859957e-06, "loss": 1.3556, "step": 235 }, { "epoch": 0.031993492848912085, "grad_norm": 4.9067536550802, "learning_rate": 1.999981101532787e-06, "loss": 1.3824, "step": 236 }, { "epoch": 0.03212905849657697, "grad_norm": 6.196355244386288, "learning_rate": 1.9999783053411157e-06, "loss": 1.3753, "step": 237 }, { "epoch": 0.032264624144241846, "grad_norm": 9.439705908623946, "learning_rate": 1.999975316311522e-06, "loss": 1.3651, "step": 238 }, { "epoch": 0.03240018979190673, "grad_norm": 7.494355590244044, "learning_rate": 1.9999721344445816e-06, "loss": 1.4321, "step": 239 }, { "epoch": 0.032535755439571615, "grad_norm": 16.15463121500745, "learning_rate": 1.9999687597409084e-06, "loss": 1.4189, "step": 240 }, { "epoch": 0.03267132108723649, "grad_norm": 7.511353824748306, "learning_rate": 1.9999651922011532e-06, "loss": 1.4109, "step": 241 }, { "epoch": 0.032806886734901376, "grad_norm": 4.366657137949344, "learning_rate": 1.999961431826004e-06, "loss": 1.3782, "step": 242 }, { "epoch": 0.03294245238256626, "grad_norm": 5.906732990071201, "learning_rate": 1.999957478616186e-06, "loss": 1.3818, "step": 243 }, { "epoch": 0.03307801803023114, "grad_norm": 12.687886138057157, "learning_rate": 1.9999533325724613e-06, "loss": 1.4151, "step": 244 }, { "epoch": 0.03321358367789602, "grad_norm": 7.374725185350206, "learning_rate": 1.9999489936956295e-06, "loss": 1.3602, "step": 245 }, { "epoch": 0.033349149325560906, "grad_norm": 4.345442579839099, "learning_rate": 1.9999444619865273e-06, "loss": 1.3811, "step": 246 }, { "epoch": 0.03348471497322578, "grad_norm": 7.01943676757666, "learning_rate": 1.999939737446029e-06, "loss": 1.3643, "step": 247 }, { "epoch": 0.03362028062089067, "grad_norm": 5.914453332742853, "learning_rate": 1.999934820075045e-06, "loss": 1.3977, "step": 248 }, { "epoch": 0.033755846268555545, "grad_norm": 8.115462440970285, "learning_rate": 1.9999297098745245e-06, "loss": 1.3961, "step": 249 }, { "epoch": 0.03389141191622043, "grad_norm": 4.241045633626498, "learning_rate": 1.999924406845452e-06, "loss": 1.4044, "step": 250 }, { "epoch": 0.03402697756388531, "grad_norm": 5.4519381360373025, "learning_rate": 1.9999189109888503e-06, "loss": 1.3565, "step": 251 }, { "epoch": 0.03416254321155019, "grad_norm": 5.464638606125798, "learning_rate": 1.9999132223057797e-06, "loss": 1.4006, "step": 252 }, { "epoch": 0.034298108859215075, "grad_norm": 5.205277414201136, "learning_rate": 1.999907340797337e-06, "loss": 1.3777, "step": 253 }, { "epoch": 0.03443367450687996, "grad_norm": 5.138878356407394, "learning_rate": 1.9999012664646567e-06, "loss": 1.4022, "step": 254 }, { "epoch": 0.034569240154544836, "grad_norm": 31.105741652227955, "learning_rate": 1.99989499930891e-06, "loss": 1.3867, "step": 255 }, { "epoch": 0.03470480580220972, "grad_norm": 4.451596485087273, "learning_rate": 1.999888539331305e-06, "loss": 1.3399, "step": 256 }, { "epoch": 0.034840371449874605, "grad_norm": 5.376950574356271, "learning_rate": 1.999881886533088e-06, "loss": 1.4075, "step": 257 }, { "epoch": 0.03497593709753948, "grad_norm": 5.885261225403823, "learning_rate": 1.9998750409155416e-06, "loss": 1.3282, "step": 258 }, { "epoch": 0.035111502745204366, "grad_norm": 5.497009921277833, "learning_rate": 1.999868002479986e-06, "loss": 1.4097, "step": 259 }, { "epoch": 0.035247068392869244, "grad_norm": 8.025520000069164, "learning_rate": 1.9998607712277792e-06, "loss": 1.3896, "step": 260 }, { "epoch": 0.03538263404053413, "grad_norm": 21.861010683952163, "learning_rate": 1.9998533471603145e-06, "loss": 1.3831, "step": 261 }, { "epoch": 0.03551819968819901, "grad_norm": 12.963449600465703, "learning_rate": 1.9998457302790245e-06, "loss": 1.353, "step": 262 }, { "epoch": 0.03565376533586389, "grad_norm": 5.580645503705842, "learning_rate": 1.9998379205853775e-06, "loss": 1.365, "step": 263 }, { "epoch": 0.03578933098352877, "grad_norm": 3.922705356028408, "learning_rate": 1.9998299180808796e-06, "loss": 1.3792, "step": 264 }, { "epoch": 0.03592489663119366, "grad_norm": 4.039468952751028, "learning_rate": 1.999821722767075e-06, "loss": 1.3544, "step": 265 }, { "epoch": 0.036060462278858535, "grad_norm": 6.441343202888422, "learning_rate": 1.9998133346455422e-06, "loss": 1.3669, "step": 266 }, { "epoch": 0.03619602792652342, "grad_norm": 4.1573180097716245, "learning_rate": 1.9998047537179007e-06, "loss": 1.3583, "step": 267 }, { "epoch": 0.0363315935741883, "grad_norm": 6.65986126151815, "learning_rate": 1.999795979985804e-06, "loss": 1.363, "step": 268 }, { "epoch": 0.03646715922185318, "grad_norm": 10.683083924164087, "learning_rate": 1.9997870134509444e-06, "loss": 1.362, "step": 269 }, { "epoch": 0.036602724869518065, "grad_norm": 4.646108789065889, "learning_rate": 1.9997778541150515e-06, "loss": 1.3456, "step": 270 }, { "epoch": 0.03673829051718295, "grad_norm": 8.193605815652578, "learning_rate": 1.9997685019798908e-06, "loss": 1.3419, "step": 271 }, { "epoch": 0.036873856164847826, "grad_norm": 3.8011937100721496, "learning_rate": 1.999758957047266e-06, "loss": 1.3952, "step": 272 }, { "epoch": 0.03700942181251271, "grad_norm": 3.858701175457161, "learning_rate": 1.9997492193190185e-06, "loss": 1.372, "step": 273 }, { "epoch": 0.03714498746017759, "grad_norm": 5.368093450982288, "learning_rate": 1.9997392887970253e-06, "loss": 1.3555, "step": 274 }, { "epoch": 0.03728055310784247, "grad_norm": 4.288683478830354, "learning_rate": 1.999729165483202e-06, "loss": 1.359, "step": 275 }, { "epoch": 0.037416118755507356, "grad_norm": 6.921104841591178, "learning_rate": 1.9997188493795e-06, "loss": 1.3843, "step": 276 }, { "epoch": 0.037551684403172234, "grad_norm": 8.21322507704822, "learning_rate": 1.99970834048791e-06, "loss": 1.3732, "step": 277 }, { "epoch": 0.03768725005083712, "grad_norm": 5.677514213281259, "learning_rate": 1.999697638810457e-06, "loss": 1.3571, "step": 278 }, { "epoch": 0.037822815698502, "grad_norm": 7.373991288967245, "learning_rate": 1.9996867443492057e-06, "loss": 1.3936, "step": 279 }, { "epoch": 0.03795838134616688, "grad_norm": 12.381749262477573, "learning_rate": 1.999675657106257e-06, "loss": 1.3791, "step": 280 }, { "epoch": 0.038093946993831763, "grad_norm": 4.306428878490322, "learning_rate": 1.9996643770837486e-06, "loss": 1.3537, "step": 281 }, { "epoch": 0.03822951264149665, "grad_norm": 4.736435326126218, "learning_rate": 1.999652904283856e-06, "loss": 1.3838, "step": 282 }, { "epoch": 0.038365078289161525, "grad_norm": 15.609268611203728, "learning_rate": 1.9996412387087914e-06, "loss": 1.3669, "step": 283 }, { "epoch": 0.03850064393682641, "grad_norm": 4.505180561617885, "learning_rate": 1.9996293803608053e-06, "loss": 1.3133, "step": 284 }, { "epoch": 0.038636209584491286, "grad_norm": 3.60651900423864, "learning_rate": 1.9996173292421828e-06, "loss": 1.3663, "step": 285 }, { "epoch": 0.03877177523215617, "grad_norm": 7.193542769315229, "learning_rate": 1.9996050853552494e-06, "loss": 1.3985, "step": 286 }, { "epoch": 0.038907340879821055, "grad_norm": 4.476027934852836, "learning_rate": 1.999592648702366e-06, "loss": 1.3729, "step": 287 }, { "epoch": 0.03904290652748593, "grad_norm": 4.875321432858991, "learning_rate": 1.99958001928593e-06, "loss": 1.4072, "step": 288 }, { "epoch": 0.039178472175150816, "grad_norm": 6.561794729364456, "learning_rate": 1.9995671971083777e-06, "loss": 1.3572, "step": 289 }, { "epoch": 0.0393140378228157, "grad_norm": 4.327084888328601, "learning_rate": 1.9995541821721814e-06, "loss": 1.3559, "step": 290 }, { "epoch": 0.03944960347048058, "grad_norm": 3.707146729748083, "learning_rate": 1.9995409744798512e-06, "loss": 1.381, "step": 291 }, { "epoch": 0.03958516911814546, "grad_norm": 3.493738741844489, "learning_rate": 1.999527574033934e-06, "loss": 1.3154, "step": 292 }, { "epoch": 0.039720734765810346, "grad_norm": 5.103176018227813, "learning_rate": 1.9995139808370142e-06, "loss": 1.3532, "step": 293 }, { "epoch": 0.039856300413475224, "grad_norm": 17.75009291750115, "learning_rate": 1.9995001948917124e-06, "loss": 1.3363, "step": 294 }, { "epoch": 0.03999186606114011, "grad_norm": 3.9508998553779224, "learning_rate": 1.999486216200688e-06, "loss": 1.4003, "step": 295 }, { "epoch": 0.04012743170880499, "grad_norm": 3.5515250726078436, "learning_rate": 1.999472044766636e-06, "loss": 1.3831, "step": 296 }, { "epoch": 0.04026299735646987, "grad_norm": 18.85952214845598, "learning_rate": 1.9994576805922898e-06, "loss": 1.3541, "step": 297 }, { "epoch": 0.040398563004134753, "grad_norm": 4.624835117790798, "learning_rate": 1.9994431236804187e-06, "loss": 1.3512, "step": 298 }, { "epoch": 0.04053412865179963, "grad_norm": 4.75871047622305, "learning_rate": 1.9994283740338306e-06, "loss": 1.3801, "step": 299 }, { "epoch": 0.040669694299464515, "grad_norm": 4.43367239186552, "learning_rate": 1.9994134316553693e-06, "loss": 1.3811, "step": 300 }, { "epoch": 0.0408052599471294, "grad_norm": 7.813520321943819, "learning_rate": 1.999398296547917e-06, "loss": 1.3363, "step": 301 }, { "epoch": 0.040940825594794276, "grad_norm": 3.7777901193646466, "learning_rate": 1.9993829687143913e-06, "loss": 1.3678, "step": 302 }, { "epoch": 0.04107639124245916, "grad_norm": 5.055204539300215, "learning_rate": 1.9993674481577497e-06, "loss": 1.3622, "step": 303 }, { "epoch": 0.041211956890124045, "grad_norm": 5.622995450577575, "learning_rate": 1.9993517348809836e-06, "loss": 1.3447, "step": 304 }, { "epoch": 0.04134752253778892, "grad_norm": 4.256054048803271, "learning_rate": 1.999335828887124e-06, "loss": 1.3492, "step": 305 }, { "epoch": 0.041483088185453806, "grad_norm": 4.675115466771846, "learning_rate": 1.999319730179238e-06, "loss": 1.3772, "step": 306 }, { "epoch": 0.04161865383311869, "grad_norm": 4.005658015870402, "learning_rate": 1.9993034387604302e-06, "loss": 1.3505, "step": 307 }, { "epoch": 0.04175421948078357, "grad_norm": 4.002589049528694, "learning_rate": 1.9992869546338428e-06, "loss": 1.3556, "step": 308 }, { "epoch": 0.04188978512844845, "grad_norm": 7.089754809398343, "learning_rate": 1.9992702778026532e-06, "loss": 1.3302, "step": 309 }, { "epoch": 0.042025350776113336, "grad_norm": 4.39929067334065, "learning_rate": 1.999253408270079e-06, "loss": 1.3256, "step": 310 }, { "epoch": 0.042160916423778214, "grad_norm": 6.798124091830079, "learning_rate": 1.9992363460393724e-06, "loss": 1.337, "step": 311 }, { "epoch": 0.0422964820714431, "grad_norm": 3.875579754471458, "learning_rate": 1.9992190911138236e-06, "loss": 1.3364, "step": 312 }, { "epoch": 0.042432047719107975, "grad_norm": 6.085148086416932, "learning_rate": 1.999201643496761e-06, "loss": 1.3475, "step": 313 }, { "epoch": 0.04256761336677286, "grad_norm": 4.562716727285527, "learning_rate": 1.9991840031915484e-06, "loss": 1.342, "step": 314 }, { "epoch": 0.042703179014437743, "grad_norm": 3.8178478331375967, "learning_rate": 1.9991661702015877e-06, "loss": 1.3495, "step": 315 }, { "epoch": 0.04283874466210262, "grad_norm": 4.564506489204476, "learning_rate": 1.9991481445303182e-06, "loss": 1.3552, "step": 316 }, { "epoch": 0.042974310309767505, "grad_norm": 20.5015673315407, "learning_rate": 1.999129926181216e-06, "loss": 1.3288, "step": 317 }, { "epoch": 0.04310987595743239, "grad_norm": 6.110037986926639, "learning_rate": 1.9991115151577938e-06, "loss": 1.3413, "step": 318 }, { "epoch": 0.043245441605097266, "grad_norm": 5.310617996886684, "learning_rate": 1.999092911463603e-06, "loss": 1.3754, "step": 319 }, { "epoch": 0.04338100725276215, "grad_norm": 5.96592056177986, "learning_rate": 1.99907411510223e-06, "loss": 1.3751, "step": 320 }, { "epoch": 0.043516572900427035, "grad_norm": 4.407097572946085, "learning_rate": 1.9990551260773003e-06, "loss": 1.3291, "step": 321 }, { "epoch": 0.04365213854809191, "grad_norm": 3.979699022588242, "learning_rate": 1.9990359443924755e-06, "loss": 1.3655, "step": 322 }, { "epoch": 0.043787704195756796, "grad_norm": 4.328006464048339, "learning_rate": 1.999016570051455e-06, "loss": 1.3602, "step": 323 }, { "epoch": 0.043923269843421674, "grad_norm": 4.508898977551904, "learning_rate": 1.9989970030579744e-06, "loss": 1.3474, "step": 324 }, { "epoch": 0.04405883549108656, "grad_norm": 4.518870146928563, "learning_rate": 1.9989772434158076e-06, "loss": 1.3621, "step": 325 }, { "epoch": 0.04419440113875144, "grad_norm": 4.498852911545795, "learning_rate": 1.9989572911287647e-06, "loss": 1.3455, "step": 326 }, { "epoch": 0.04432996678641632, "grad_norm": 5.502014758574464, "learning_rate": 1.9989371462006938e-06, "loss": 1.3399, "step": 327 }, { "epoch": 0.044465532434081204, "grad_norm": 4.863998745621132, "learning_rate": 1.998916808635479e-06, "loss": 1.3621, "step": 328 }, { "epoch": 0.04460109808174609, "grad_norm": 5.182513643199714, "learning_rate": 1.998896278437043e-06, "loss": 1.3634, "step": 329 }, { "epoch": 0.044736663729410965, "grad_norm": 4.306794154167989, "learning_rate": 1.998875555609344e-06, "loss": 1.3333, "step": 330 }, { "epoch": 0.04487222937707585, "grad_norm": 3.6444208523580475, "learning_rate": 1.998854640156379e-06, "loss": 1.3443, "step": 331 }, { "epoch": 0.045007795024740734, "grad_norm": 62.09275837553969, "learning_rate": 1.998833532082181e-06, "loss": 1.2934, "step": 332 }, { "epoch": 0.04514336067240561, "grad_norm": 4.267525902393983, "learning_rate": 1.9988122313908212e-06, "loss": 1.3698, "step": 333 }, { "epoch": 0.045278926320070495, "grad_norm": 6.286050821885431, "learning_rate": 1.998790738086406e-06, "loss": 1.3847, "step": 334 }, { "epoch": 0.04541449196773538, "grad_norm": 3.5044370164191823, "learning_rate": 1.9987690521730817e-06, "loss": 1.3517, "step": 335 }, { "epoch": 0.045550057615400256, "grad_norm": 9.929191318809762, "learning_rate": 1.9987471736550287e-06, "loss": 1.3763, "step": 336 }, { "epoch": 0.04568562326306514, "grad_norm": 6.991677908417916, "learning_rate": 1.9987251025364677e-06, "loss": 1.3847, "step": 337 }, { "epoch": 0.04582118891073002, "grad_norm": 4.229952622953037, "learning_rate": 1.9987028388216532e-06, "loss": 1.3557, "step": 338 }, { "epoch": 0.0459567545583949, "grad_norm": 4.733953991658373, "learning_rate": 1.99868038251488e-06, "loss": 1.3436, "step": 339 }, { "epoch": 0.046092320206059786, "grad_norm": 7.074789895319798, "learning_rate": 1.9986577336204782e-06, "loss": 1.3257, "step": 340 }, { "epoch": 0.046227885853724664, "grad_norm": 6.936958923740686, "learning_rate": 1.9986348921428154e-06, "loss": 1.369, "step": 341 }, { "epoch": 0.04636345150138955, "grad_norm": 3.745762059361746, "learning_rate": 1.9986118580862964e-06, "loss": 1.4096, "step": 342 }, { "epoch": 0.04649901714905443, "grad_norm": 3.852254494045695, "learning_rate": 1.998588631455363e-06, "loss": 1.3338, "step": 343 }, { "epoch": 0.04663458279671931, "grad_norm": 5.254937250093801, "learning_rate": 1.9985652122544947e-06, "loss": 1.3047, "step": 344 }, { "epoch": 0.046770148444384194, "grad_norm": 3.8528097799560848, "learning_rate": 1.998541600488207e-06, "loss": 1.3169, "step": 345 }, { "epoch": 0.04690571409204908, "grad_norm": 6.3991042600286665, "learning_rate": 1.998517796161054e-06, "loss": 1.3733, "step": 346 }, { "epoch": 0.047041279739713955, "grad_norm": 4.457939092695606, "learning_rate": 1.9984937992776257e-06, "loss": 1.3679, "step": 347 }, { "epoch": 0.04717684538737884, "grad_norm": 4.6675070876436875, "learning_rate": 1.99846960984255e-06, "loss": 1.3811, "step": 348 }, { "epoch": 0.04731241103504372, "grad_norm": 3.91656230888219, "learning_rate": 1.9984452278604907e-06, "loss": 1.3724, "step": 349 }, { "epoch": 0.0474479766827086, "grad_norm": 3.6560815955346455, "learning_rate": 1.998420653336151e-06, "loss": 1.34, "step": 350 }, { "epoch": 0.047583542330373485, "grad_norm": 5.1857945099130704, "learning_rate": 1.99839588627427e-06, "loss": 1.3357, "step": 351 }, { "epoch": 0.04771910797803836, "grad_norm": 3.5489637265862446, "learning_rate": 1.9983709266796224e-06, "loss": 1.3094, "step": 352 }, { "epoch": 0.047854673625703247, "grad_norm": 4.008810598483268, "learning_rate": 1.9983457745570222e-06, "loss": 1.3453, "step": 353 }, { "epoch": 0.04799023927336813, "grad_norm": 4.8718031868849145, "learning_rate": 1.99832042991132e-06, "loss": 1.3418, "step": 354 }, { "epoch": 0.04812580492103301, "grad_norm": 4.335425795427466, "learning_rate": 1.9982948927474033e-06, "loss": 1.3709, "step": 355 }, { "epoch": 0.04826137056869789, "grad_norm": 3.9190697604163383, "learning_rate": 1.9982691630701966e-06, "loss": 1.3509, "step": 356 }, { "epoch": 0.048396936216362776, "grad_norm": 10.56037004608258, "learning_rate": 1.9982432408846615e-06, "loss": 1.3574, "step": 357 }, { "epoch": 0.048532501864027654, "grad_norm": 5.791960413724488, "learning_rate": 1.998217126195797e-06, "loss": 1.3469, "step": 358 }, { "epoch": 0.04866806751169254, "grad_norm": 4.9202841234562, "learning_rate": 1.9981908190086398e-06, "loss": 1.3505, "step": 359 }, { "epoch": 0.04880363315935742, "grad_norm": 3.614503654104304, "learning_rate": 1.9981643193282617e-06, "loss": 1.3238, "step": 360 }, { "epoch": 0.0489391988070223, "grad_norm": 4.075094816614907, "learning_rate": 1.9981376271597735e-06, "loss": 1.3784, "step": 361 }, { "epoch": 0.049074764454687184, "grad_norm": 5.620930473075717, "learning_rate": 1.9981107425083233e-06, "loss": 1.3313, "step": 362 }, { "epoch": 0.04921033010235206, "grad_norm": 4.494958800056155, "learning_rate": 1.9980836653790946e-06, "loss": 1.3558, "step": 363 }, { "epoch": 0.049345895750016945, "grad_norm": 7.244644997796911, "learning_rate": 1.9980563957773097e-06, "loss": 1.3211, "step": 364 }, { "epoch": 0.04948146139768183, "grad_norm": 4.921504174401483, "learning_rate": 1.998028933708227e-06, "loss": 1.3106, "step": 365 }, { "epoch": 0.04961702704534671, "grad_norm": 5.460914977965747, "learning_rate": 1.9980012791771424e-06, "loss": 1.3685, "step": 366 }, { "epoch": 0.04975259269301159, "grad_norm": 4.59681621066532, "learning_rate": 1.9979734321893885e-06, "loss": 1.3112, "step": 367 }, { "epoch": 0.049888158340676475, "grad_norm": 7.535145743250462, "learning_rate": 1.9979453927503364e-06, "loss": 1.3597, "step": 368 }, { "epoch": 0.05002372398834135, "grad_norm": 7.641898346010865, "learning_rate": 1.9979171608653923e-06, "loss": 1.3321, "step": 369 }, { "epoch": 0.050159289636006237, "grad_norm": 20.980562524383753, "learning_rate": 1.9978887365400006e-06, "loss": 1.3407, "step": 370 }, { "epoch": 0.05029485528367112, "grad_norm": 3.38919948713843, "learning_rate": 1.997860119779643e-06, "loss": 1.3316, "step": 371 }, { "epoch": 0.050430420931336, "grad_norm": 4.1508524616430655, "learning_rate": 1.9978313105898378e-06, "loss": 1.3377, "step": 372 }, { "epoch": 0.05056598657900088, "grad_norm": 5.802312240547198, "learning_rate": 1.997802308976141e-06, "loss": 1.364, "step": 373 }, { "epoch": 0.05070155222666576, "grad_norm": 5.767521338042631, "learning_rate": 1.997773114944145e-06, "loss": 1.3448, "step": 374 }, { "epoch": 0.050837117874330644, "grad_norm": 4.920654146186082, "learning_rate": 1.99774372849948e-06, "loss": 1.289, "step": 375 }, { "epoch": 0.05097268352199553, "grad_norm": 5.355250502028538, "learning_rate": 1.9977141496478124e-06, "loss": 1.3329, "step": 376 }, { "epoch": 0.051108249169660405, "grad_norm": 5.82247511688523, "learning_rate": 1.9976843783948463e-06, "loss": 1.3713, "step": 377 }, { "epoch": 0.05124381481732529, "grad_norm": 3.763335655586951, "learning_rate": 1.9976544147463237e-06, "loss": 1.3227, "step": 378 }, { "epoch": 0.051379380464990174, "grad_norm": 4.853182937991789, "learning_rate": 1.9976242587080216e-06, "loss": 1.3564, "step": 379 }, { "epoch": 0.05151494611265505, "grad_norm": 15.214407590088928, "learning_rate": 1.997593910285756e-06, "loss": 1.3196, "step": 380 }, { "epoch": 0.051650511760319935, "grad_norm": 4.29869374471758, "learning_rate": 1.9975633694853797e-06, "loss": 1.3513, "step": 381 }, { "epoch": 0.05178607740798482, "grad_norm": 5.540328258026923, "learning_rate": 1.9975326363127815e-06, "loss": 1.3349, "step": 382 }, { "epoch": 0.0519216430556497, "grad_norm": 5.022754504965433, "learning_rate": 1.9975017107738887e-06, "loss": 1.3377, "step": 383 }, { "epoch": 0.05205720870331458, "grad_norm": 3.607018246600445, "learning_rate": 1.997470592874665e-06, "loss": 1.3337, "step": 384 }, { "epoch": 0.052192774350979465, "grad_norm": 6.407181048490426, "learning_rate": 1.9974392826211107e-06, "loss": 1.3794, "step": 385 }, { "epoch": 0.05232833999864434, "grad_norm": 4.30073434788955, "learning_rate": 1.997407780019264e-06, "loss": 1.2971, "step": 386 }, { "epoch": 0.05246390564630923, "grad_norm": 8.282425901395088, "learning_rate": 1.9973760850752e-06, "loss": 1.304, "step": 387 }, { "epoch": 0.052599471293974104, "grad_norm": 4.372734273525402, "learning_rate": 1.997344197795031e-06, "loss": 1.3617, "step": 388 }, { "epoch": 0.05273503694163899, "grad_norm": 4.385479323690286, "learning_rate": 1.9973121181849056e-06, "loss": 1.3457, "step": 389 }, { "epoch": 0.05287060258930387, "grad_norm": 4.188126412494196, "learning_rate": 1.997279846251011e-06, "loss": 1.3318, "step": 390 }, { "epoch": 0.05300616823696875, "grad_norm": 4.627279480430818, "learning_rate": 1.99724738199957e-06, "loss": 1.3615, "step": 391 }, { "epoch": 0.053141733884633634, "grad_norm": 4.918277346842277, "learning_rate": 1.997214725436843e-06, "loss": 1.3563, "step": 392 }, { "epoch": 0.05327729953229852, "grad_norm": 3.317902082709688, "learning_rate": 1.997181876569128e-06, "loss": 1.3189, "step": 393 }, { "epoch": 0.053412865179963395, "grad_norm": 4.939309365977563, "learning_rate": 1.9971488354027592e-06, "loss": 1.3211, "step": 394 }, { "epoch": 0.05354843082762828, "grad_norm": 4.923421066802084, "learning_rate": 1.997115601944108e-06, "loss": 1.3048, "step": 395 }, { "epoch": 0.053683996475293164, "grad_norm": 3.872796621332436, "learning_rate": 1.9970821761995843e-06, "loss": 1.2969, "step": 396 }, { "epoch": 0.05381956212295804, "grad_norm": 3.7464906379621463, "learning_rate": 1.9970485581756334e-06, "loss": 1.3258, "step": 397 }, { "epoch": 0.053955127770622925, "grad_norm": 10.023076167333594, "learning_rate": 1.997014747878738e-06, "loss": 1.3102, "step": 398 }, { "epoch": 0.0540906934182878, "grad_norm": 4.416643733956577, "learning_rate": 1.996980745315419e-06, "loss": 1.3437, "step": 399 }, { "epoch": 0.05422625906595269, "grad_norm": 4.097138436336728, "learning_rate": 1.9969465504922324e-06, "loss": 1.3689, "step": 400 }, { "epoch": 0.05436182471361757, "grad_norm": 3.573418612366874, "learning_rate": 1.9969121634157734e-06, "loss": 1.3347, "step": 401 }, { "epoch": 0.05449739036128245, "grad_norm": 4.786024459542757, "learning_rate": 1.9968775840926725e-06, "loss": 1.3288, "step": 402 }, { "epoch": 0.05463295600894733, "grad_norm": 6.864013078336004, "learning_rate": 1.996842812529598e-06, "loss": 1.2885, "step": 403 }, { "epoch": 0.05476852165661222, "grad_norm": 4.728442475440963, "learning_rate": 1.9968078487332563e-06, "loss": 1.327, "step": 404 }, { "epoch": 0.054904087304277094, "grad_norm": 9.050034141519461, "learning_rate": 1.9967726927103893e-06, "loss": 1.3147, "step": 405 }, { "epoch": 0.05503965295194198, "grad_norm": 4.24128226928266, "learning_rate": 1.9967373444677763e-06, "loss": 1.2898, "step": 406 }, { "epoch": 0.05517521859960686, "grad_norm": 8.032415512016682, "learning_rate": 1.996701804012234e-06, "loss": 1.3213, "step": 407 }, { "epoch": 0.05531078424727174, "grad_norm": 6.787808239931473, "learning_rate": 1.9966660713506167e-06, "loss": 1.3584, "step": 408 }, { "epoch": 0.055446349894936624, "grad_norm": 5.110669220664282, "learning_rate": 1.996630146489815e-06, "loss": 1.3289, "step": 409 }, { "epoch": 0.05558191554260151, "grad_norm": 4.380172657409232, "learning_rate": 1.996594029436756e-06, "loss": 1.3323, "step": 410 }, { "epoch": 0.055717481190266385, "grad_norm": 3.9943924890537383, "learning_rate": 1.9965577201984048e-06, "loss": 1.2829, "step": 411 }, { "epoch": 0.05585304683793127, "grad_norm": 6.3092933943180824, "learning_rate": 1.9965212187817644e-06, "loss": 1.3124, "step": 412 }, { "epoch": 0.05598861248559615, "grad_norm": 4.848205681083869, "learning_rate": 1.9964845251938722e-06, "loss": 1.3381, "step": 413 }, { "epoch": 0.05612417813326103, "grad_norm": 4.304156244786856, "learning_rate": 1.9964476394418054e-06, "loss": 1.3103, "step": 414 }, { "epoch": 0.056259743780925915, "grad_norm": 4.02633224992112, "learning_rate": 1.996410561532677e-06, "loss": 1.3232, "step": 415 }, { "epoch": 0.05639530942859079, "grad_norm": 4.552120146822936, "learning_rate": 1.996373291473637e-06, "loss": 1.3151, "step": 416 }, { "epoch": 0.05653087507625568, "grad_norm": 4.281448382760846, "learning_rate": 1.9963358292718723e-06, "loss": 1.3674, "step": 417 }, { "epoch": 0.05666644072392056, "grad_norm": 4.139027532575556, "learning_rate": 1.996298174934608e-06, "loss": 1.3163, "step": 418 }, { "epoch": 0.05680200637158544, "grad_norm": 4.815400425631094, "learning_rate": 1.996260328469104e-06, "loss": 1.316, "step": 419 }, { "epoch": 0.05693757201925032, "grad_norm": 7.559015036533734, "learning_rate": 1.9962222898826608e-06, "loss": 1.3291, "step": 420 }, { "epoch": 0.05707313766691521, "grad_norm": 4.398224874097076, "learning_rate": 1.996184059182612e-06, "loss": 1.3128, "step": 421 }, { "epoch": 0.057208703314580084, "grad_norm": 4.15915253746429, "learning_rate": 1.996145636376331e-06, "loss": 1.3497, "step": 422 }, { "epoch": 0.05734426896224497, "grad_norm": 3.644838215923158, "learning_rate": 1.996107021471227e-06, "loss": 1.3468, "step": 423 }, { "epoch": 0.05747983460990985, "grad_norm": 7.179651410374043, "learning_rate": 1.996068214474747e-06, "loss": 1.3268, "step": 424 }, { "epoch": 0.05761540025757473, "grad_norm": 5.019460834820341, "learning_rate": 1.996029215394374e-06, "loss": 1.3174, "step": 425 }, { "epoch": 0.057750965905239614, "grad_norm": 3.845878998648737, "learning_rate": 1.9959900242376294e-06, "loss": 1.2969, "step": 426 }, { "epoch": 0.05788653155290449, "grad_norm": 3.944466265754802, "learning_rate": 1.9959506410120702e-06, "loss": 1.3089, "step": 427 }, { "epoch": 0.058022097200569375, "grad_norm": 4.708557756647917, "learning_rate": 1.9959110657252915e-06, "loss": 1.3256, "step": 428 }, { "epoch": 0.05815766284823426, "grad_norm": 3.671835460437269, "learning_rate": 1.995871298384925e-06, "loss": 1.3222, "step": 429 }, { "epoch": 0.05829322849589914, "grad_norm": 3.343930247238272, "learning_rate": 1.9958313389986395e-06, "loss": 1.3065, "step": 430 }, { "epoch": 0.05842879414356402, "grad_norm": 5.085447515298306, "learning_rate": 1.995791187574141e-06, "loss": 1.3123, "step": 431 }, { "epoch": 0.058564359791228905, "grad_norm": 5.074024587385257, "learning_rate": 1.995750844119172e-06, "loss": 1.3026, "step": 432 }, { "epoch": 0.05869992543889378, "grad_norm": 6.7432809344955285, "learning_rate": 1.995710308641513e-06, "loss": 1.3323, "step": 433 }, { "epoch": 0.05883549108655867, "grad_norm": 4.68942271729958, "learning_rate": 1.9956695811489803e-06, "loss": 1.3043, "step": 434 }, { "epoch": 0.05897105673422355, "grad_norm": 4.455095026567772, "learning_rate": 1.9956286616494287e-06, "loss": 1.2835, "step": 435 }, { "epoch": 0.05910662238188843, "grad_norm": 5.017003219365191, "learning_rate": 1.9955875501507485e-06, "loss": 1.3109, "step": 436 }, { "epoch": 0.05924218802955331, "grad_norm": 5.322314453186751, "learning_rate": 1.995546246660868e-06, "loss": 1.2864, "step": 437 }, { "epoch": 0.05937775367721819, "grad_norm": 4.076585419484469, "learning_rate": 1.995504751187752e-06, "loss": 1.3309, "step": 438 }, { "epoch": 0.059513319324883074, "grad_norm": 3.4309095161973335, "learning_rate": 1.9954630637394027e-06, "loss": 1.2829, "step": 439 }, { "epoch": 0.05964888497254796, "grad_norm": 3.8674875538717215, "learning_rate": 1.9954211843238594e-06, "loss": 1.3206, "step": 440 }, { "epoch": 0.059784450620212835, "grad_norm": 4.305726500697063, "learning_rate": 1.9953791129491983e-06, "loss": 1.3326, "step": 441 }, { "epoch": 0.05992001626787772, "grad_norm": 5.072225489909742, "learning_rate": 1.995336849623532e-06, "loss": 1.3453, "step": 442 }, { "epoch": 0.060055581915542604, "grad_norm": 8.20617712034658, "learning_rate": 1.995294394355011e-06, "loss": 1.2762, "step": 443 }, { "epoch": 0.06019114756320748, "grad_norm": 5.01832306698539, "learning_rate": 1.9952517471518228e-06, "loss": 1.3034, "step": 444 }, { "epoch": 0.060326713210872365, "grad_norm": 4.510217023727553, "learning_rate": 1.9952089080221907e-06, "loss": 1.2881, "step": 445 }, { "epoch": 0.06046227885853725, "grad_norm": 8.06209008254256, "learning_rate": 1.9951658769743766e-06, "loss": 1.3367, "step": 446 }, { "epoch": 0.06059784450620213, "grad_norm": 7.308699102303056, "learning_rate": 1.9951226540166785e-06, "loss": 1.293, "step": 447 }, { "epoch": 0.06073341015386701, "grad_norm": 10.854734701972713, "learning_rate": 1.9950792391574316e-06, "loss": 1.3256, "step": 448 }, { "epoch": 0.060868975801531895, "grad_norm": 5.067807442785799, "learning_rate": 1.995035632405008e-06, "loss": 1.337, "step": 449 }, { "epoch": 0.06100454144919677, "grad_norm": 16.74871172592918, "learning_rate": 1.994991833767817e-06, "loss": 1.2995, "step": 450 }, { "epoch": 0.06114010709686166, "grad_norm": 4.5422522160840275, "learning_rate": 1.994947843254305e-06, "loss": 1.299, "step": 451 }, { "epoch": 0.061275672744526534, "grad_norm": 3.720374491984649, "learning_rate": 1.994903660872955e-06, "loss": 1.315, "step": 452 }, { "epoch": 0.06141123839219142, "grad_norm": 4.232999056416431, "learning_rate": 1.9948592866322873e-06, "loss": 1.2711, "step": 453 }, { "epoch": 0.0615468040398563, "grad_norm": 4.300098723427015, "learning_rate": 1.9948147205408593e-06, "loss": 1.3386, "step": 454 }, { "epoch": 0.06168236968752118, "grad_norm": 4.0452696247922155, "learning_rate": 1.9947699626072646e-06, "loss": 1.3184, "step": 455 }, { "epoch": 0.061817935335186064, "grad_norm": 3.5344828661926533, "learning_rate": 1.9947250128401354e-06, "loss": 1.2895, "step": 456 }, { "epoch": 0.06195350098285095, "grad_norm": 3.8840599314512407, "learning_rate": 1.994679871248139e-06, "loss": 1.3159, "step": 457 }, { "epoch": 0.062089066630515825, "grad_norm": 4.884773593719557, "learning_rate": 1.9946345378399807e-06, "loss": 1.3476, "step": 458 }, { "epoch": 0.06222463227818071, "grad_norm": 3.096445239698731, "learning_rate": 1.9945890126244038e-06, "loss": 1.3572, "step": 459 }, { "epoch": 0.062360197925845594, "grad_norm": 4.8466939806119145, "learning_rate": 1.9945432956101858e-06, "loss": 1.3062, "step": 460 }, { "epoch": 0.06249576357351047, "grad_norm": 6.182290569264887, "learning_rate": 1.994497386806144e-06, "loss": 1.3047, "step": 461 }, { "epoch": 0.06263132922117536, "grad_norm": 5.533221049738012, "learning_rate": 1.9944512862211313e-06, "loss": 1.2581, "step": 462 }, { "epoch": 0.06276689486884024, "grad_norm": 4.444363780493525, "learning_rate": 1.9944049938640377e-06, "loss": 1.2771, "step": 463 }, { "epoch": 0.06290246051650512, "grad_norm": 3.491802255340631, "learning_rate": 1.9943585097437903e-06, "loss": 1.3203, "step": 464 }, { "epoch": 0.06303802616417, "grad_norm": 5.291818205531003, "learning_rate": 1.9943118338693533e-06, "loss": 1.2644, "step": 465 }, { "epoch": 0.06317359181183488, "grad_norm": 4.064702432178633, "learning_rate": 1.994264966249728e-06, "loss": 1.3177, "step": 466 }, { "epoch": 0.06330915745949976, "grad_norm": 4.7379769229402715, "learning_rate": 1.9942179068939516e-06, "loss": 1.297, "step": 467 }, { "epoch": 0.06344472310716465, "grad_norm": 8.364309577370909, "learning_rate": 1.9941706558111004e-06, "loss": 1.3147, "step": 468 }, { "epoch": 0.06358028875482953, "grad_norm": 4.18575697092828, "learning_rate": 1.9941232130102854e-06, "loss": 1.3033, "step": 469 }, { "epoch": 0.0637158544024944, "grad_norm": 3.173967869705563, "learning_rate": 1.9940755785006564e-06, "loss": 1.3058, "step": 470 }, { "epoch": 0.06385142005015929, "grad_norm": 5.632817866968019, "learning_rate": 1.994027752291398e-06, "loss": 1.2947, "step": 471 }, { "epoch": 0.06398698569782417, "grad_norm": 4.644643599207019, "learning_rate": 1.9939797343917344e-06, "loss": 1.2982, "step": 472 }, { "epoch": 0.06412255134548905, "grad_norm": 4.236624635564682, "learning_rate": 1.9939315248109253e-06, "loss": 1.3207, "step": 473 }, { "epoch": 0.06425811699315394, "grad_norm": 6.734300693490946, "learning_rate": 1.993883123558267e-06, "loss": 1.3073, "step": 474 }, { "epoch": 0.06439368264081882, "grad_norm": 4.5869186615267905, "learning_rate": 1.9938345306430936e-06, "loss": 1.2788, "step": 475 }, { "epoch": 0.06452924828848369, "grad_norm": 4.35072322231293, "learning_rate": 1.9937857460747757e-06, "loss": 1.319, "step": 476 }, { "epoch": 0.06466481393614858, "grad_norm": 3.5154788490503903, "learning_rate": 1.9937367698627208e-06, "loss": 1.319, "step": 477 }, { "epoch": 0.06480037958381346, "grad_norm": 10.024822154476203, "learning_rate": 1.9936876020163746e-06, "loss": 1.3059, "step": 478 }, { "epoch": 0.06493594523147835, "grad_norm": 4.010635402616946, "learning_rate": 1.9936382425452176e-06, "loss": 1.3468, "step": 479 }, { "epoch": 0.06507151087914323, "grad_norm": 7.52308885023496, "learning_rate": 1.993588691458769e-06, "loss": 1.3004, "step": 480 }, { "epoch": 0.06520707652680811, "grad_norm": 4.987657697620128, "learning_rate": 1.993538948766584e-06, "loss": 1.2999, "step": 481 }, { "epoch": 0.06534264217447298, "grad_norm": 4.278579550231999, "learning_rate": 1.9934890144782558e-06, "loss": 1.3017, "step": 482 }, { "epoch": 0.06547820782213787, "grad_norm": 14.675825854010924, "learning_rate": 1.9934388886034126e-06, "loss": 1.256, "step": 483 }, { "epoch": 0.06561377346980275, "grad_norm": 4.383253244040248, "learning_rate": 1.993388571151722e-06, "loss": 1.3146, "step": 484 }, { "epoch": 0.06574933911746764, "grad_norm": 5.75365036961094, "learning_rate": 1.993338062132886e-06, "loss": 1.3461, "step": 485 }, { "epoch": 0.06588490476513252, "grad_norm": 3.5848446561179186, "learning_rate": 1.993287361556646e-06, "loss": 1.3237, "step": 486 }, { "epoch": 0.06602047041279739, "grad_norm": 4.52861820223525, "learning_rate": 1.9932364694327795e-06, "loss": 1.2938, "step": 487 }, { "epoch": 0.06615603606046228, "grad_norm": 4.399073454943939, "learning_rate": 1.9931853857710995e-06, "loss": 1.3188, "step": 488 }, { "epoch": 0.06629160170812716, "grad_norm": 7.848825499553985, "learning_rate": 1.9931341105814575e-06, "loss": 1.3309, "step": 489 }, { "epoch": 0.06642716735579204, "grad_norm": 4.315557374029811, "learning_rate": 1.993082643873742e-06, "loss": 1.3407, "step": 490 }, { "epoch": 0.06656273300345693, "grad_norm": 3.81275560068807, "learning_rate": 1.9930309856578772e-06, "loss": 1.2959, "step": 491 }, { "epoch": 0.06669829865112181, "grad_norm": 5.139737055959169, "learning_rate": 1.992979135943825e-06, "loss": 1.2896, "step": 492 }, { "epoch": 0.06683386429878668, "grad_norm": 5.322679005270176, "learning_rate": 1.9929270947415852e-06, "loss": 1.297, "step": 493 }, { "epoch": 0.06696942994645157, "grad_norm": 4.4012247049612885, "learning_rate": 1.9928748620611927e-06, "loss": 1.323, "step": 494 }, { "epoch": 0.06710499559411645, "grad_norm": 7.198755653444711, "learning_rate": 1.99282243791272e-06, "loss": 1.3711, "step": 495 }, { "epoch": 0.06724056124178134, "grad_norm": 4.202624796605996, "learning_rate": 1.992769822306277e-06, "loss": 1.3316, "step": 496 }, { "epoch": 0.06737612688944622, "grad_norm": 4.792408160864771, "learning_rate": 1.992717015252011e-06, "loss": 1.3049, "step": 497 }, { "epoch": 0.06751169253711109, "grad_norm": 3.8875766216715624, "learning_rate": 1.992664016760104e-06, "loss": 1.3127, "step": 498 }, { "epoch": 0.06764725818477597, "grad_norm": 4.722185739470626, "learning_rate": 1.992610826840777e-06, "loss": 1.2867, "step": 499 }, { "epoch": 0.06778282383244086, "grad_norm": 3.8501500713705457, "learning_rate": 1.9925574455042873e-06, "loss": 1.3398, "step": 500 }, { "epoch": 0.06791838948010574, "grad_norm": 5.489739319142849, "learning_rate": 1.9925038727609287e-06, "loss": 1.2796, "step": 501 }, { "epoch": 0.06805395512777063, "grad_norm": 5.643899069283273, "learning_rate": 1.9924501086210334e-06, "loss": 1.3365, "step": 502 }, { "epoch": 0.06818952077543551, "grad_norm": 3.7608799401627566, "learning_rate": 1.9923961530949677e-06, "loss": 1.2921, "step": 503 }, { "epoch": 0.06832508642310038, "grad_norm": 4.066178055456332, "learning_rate": 1.9923420061931376e-06, "loss": 1.2806, "step": 504 }, { "epoch": 0.06846065207076527, "grad_norm": 3.1721503122969503, "learning_rate": 1.992287667925985e-06, "loss": 1.262, "step": 505 }, { "epoch": 0.06859621771843015, "grad_norm": 5.252059142335182, "learning_rate": 1.992233138303988e-06, "loss": 1.2744, "step": 506 }, { "epoch": 0.06873178336609503, "grad_norm": 3.1752092659159574, "learning_rate": 1.9921784173376626e-06, "loss": 1.2941, "step": 507 }, { "epoch": 0.06886734901375992, "grad_norm": 3.9580933107570533, "learning_rate": 1.9921235050375612e-06, "loss": 1.2875, "step": 508 }, { "epoch": 0.06900291466142479, "grad_norm": 9.562410444476622, "learning_rate": 1.9920684014142736e-06, "loss": 1.2802, "step": 509 }, { "epoch": 0.06913848030908967, "grad_norm": 4.214168733565793, "learning_rate": 1.992013106478425e-06, "loss": 1.3143, "step": 510 }, { "epoch": 0.06927404595675456, "grad_norm": 3.428710447154897, "learning_rate": 1.9919576202406795e-06, "loss": 1.2593, "step": 511 }, { "epoch": 0.06940961160441944, "grad_norm": 7.7740161194439805, "learning_rate": 1.9919019427117372e-06, "loss": 1.2933, "step": 512 }, { "epoch": 0.06954517725208433, "grad_norm": 4.910237661531752, "learning_rate": 1.9918460739023348e-06, "loss": 1.3298, "step": 513 }, { "epoch": 0.06968074289974921, "grad_norm": 4.285706458978206, "learning_rate": 1.991790013823246e-06, "loss": 1.2992, "step": 514 }, { "epoch": 0.06981630854741408, "grad_norm": 3.9538497790606404, "learning_rate": 1.991733762485282e-06, "loss": 1.289, "step": 515 }, { "epoch": 0.06995187419507896, "grad_norm": 7.016458067000537, "learning_rate": 1.9916773198992897e-06, "loss": 1.2965, "step": 516 }, { "epoch": 0.07008743984274385, "grad_norm": 4.922748424851831, "learning_rate": 1.9916206860761546e-06, "loss": 1.2787, "step": 517 }, { "epoch": 0.07022300549040873, "grad_norm": 9.184333022239917, "learning_rate": 1.9915638610267974e-06, "loss": 1.3214, "step": 518 }, { "epoch": 0.07035857113807362, "grad_norm": 5.6390831709518245, "learning_rate": 1.9915068447621765e-06, "loss": 1.3326, "step": 519 }, { "epoch": 0.07049413678573849, "grad_norm": 3.67657050742817, "learning_rate": 1.9914496372932873e-06, "loss": 1.294, "step": 520 }, { "epoch": 0.07062970243340337, "grad_norm": 3.8665832917004885, "learning_rate": 1.9913922386311612e-06, "loss": 1.3029, "step": 521 }, { "epoch": 0.07076526808106826, "grad_norm": 6.84113714138121, "learning_rate": 1.9913346487868676e-06, "loss": 1.3176, "step": 522 }, { "epoch": 0.07090083372873314, "grad_norm": 5.136680891126473, "learning_rate": 1.9912768677715123e-06, "loss": 1.3209, "step": 523 }, { "epoch": 0.07103639937639802, "grad_norm": 5.189634235728607, "learning_rate": 1.9912188955962376e-06, "loss": 1.3151, "step": 524 }, { "epoch": 0.07117196502406291, "grad_norm": 5.56900897916199, "learning_rate": 1.991160732272223e-06, "loss": 1.2983, "step": 525 }, { "epoch": 0.07130753067172778, "grad_norm": 4.273297242316038, "learning_rate": 1.9911023778106846e-06, "loss": 1.302, "step": 526 }, { "epoch": 0.07144309631939266, "grad_norm": 6.8274353468369, "learning_rate": 1.9910438322228762e-06, "loss": 1.3014, "step": 527 }, { "epoch": 0.07157866196705755, "grad_norm": 7.111623499782618, "learning_rate": 1.990985095520088e-06, "loss": 1.2884, "step": 528 }, { "epoch": 0.07171422761472243, "grad_norm": 6.295579549196944, "learning_rate": 1.990926167713646e-06, "loss": 1.2408, "step": 529 }, { "epoch": 0.07184979326238732, "grad_norm": 4.980411773862449, "learning_rate": 1.9908670488149145e-06, "loss": 1.289, "step": 530 }, { "epoch": 0.0719853589100522, "grad_norm": 5.51414521837999, "learning_rate": 1.9908077388352943e-06, "loss": 1.2673, "step": 531 }, { "epoch": 0.07212092455771707, "grad_norm": 3.4116504240187706, "learning_rate": 1.9907482377862226e-06, "loss": 1.2994, "step": 532 }, { "epoch": 0.07225649020538195, "grad_norm": 7.109910298316042, "learning_rate": 1.990688545679173e-06, "loss": 1.2889, "step": 533 }, { "epoch": 0.07239205585304684, "grad_norm": 5.397319430120683, "learning_rate": 1.990628662525658e-06, "loss": 1.2778, "step": 534 }, { "epoch": 0.07252762150071172, "grad_norm": 9.918447457480768, "learning_rate": 1.9905685883372254e-06, "loss": 1.2927, "step": 535 }, { "epoch": 0.0726631871483766, "grad_norm": 3.503446130106881, "learning_rate": 1.990508323125459e-06, "loss": 1.2795, "step": 536 }, { "epoch": 0.07279875279604148, "grad_norm": 4.779364429977744, "learning_rate": 1.9904478669019815e-06, "loss": 1.2339, "step": 537 }, { "epoch": 0.07293431844370636, "grad_norm": 4.6140053360419895, "learning_rate": 1.990387219678451e-06, "loss": 1.3041, "step": 538 }, { "epoch": 0.07306988409137125, "grad_norm": 6.865456513105855, "learning_rate": 1.9903263814665624e-06, "loss": 1.2667, "step": 539 }, { "epoch": 0.07320544973903613, "grad_norm": 4.335626029536592, "learning_rate": 1.9902653522780482e-06, "loss": 1.2798, "step": 540 }, { "epoch": 0.07334101538670101, "grad_norm": 5.431772190868453, "learning_rate": 1.990204132124678e-06, "loss": 1.2835, "step": 541 }, { "epoch": 0.0734765810343659, "grad_norm": 8.06507576633438, "learning_rate": 1.990142721018257e-06, "loss": 1.2615, "step": 542 }, { "epoch": 0.07361214668203077, "grad_norm": 4.362757814896038, "learning_rate": 1.990081118970628e-06, "loss": 1.257, "step": 543 }, { "epoch": 0.07374771232969565, "grad_norm": 4.862657984066072, "learning_rate": 1.99001932599367e-06, "loss": 1.2939, "step": 544 }, { "epoch": 0.07388327797736054, "grad_norm": 3.189252492011794, "learning_rate": 1.9899573420993003e-06, "loss": 1.3325, "step": 545 }, { "epoch": 0.07401884362502542, "grad_norm": 9.14241849990571, "learning_rate": 1.9898951672994708e-06, "loss": 1.3007, "step": 546 }, { "epoch": 0.0741544092726903, "grad_norm": 3.3639845760311933, "learning_rate": 1.9898328016061726e-06, "loss": 1.2866, "step": 547 }, { "epoch": 0.07428997492035518, "grad_norm": 5.181795018964276, "learning_rate": 1.9897702450314316e-06, "loss": 1.2823, "step": 548 }, { "epoch": 0.07442554056802006, "grad_norm": 10.992744317425117, "learning_rate": 1.9897074975873116e-06, "loss": 1.3053, "step": 549 }, { "epoch": 0.07456110621568494, "grad_norm": 7.411101566580022, "learning_rate": 1.9896445592859134e-06, "loss": 1.2806, "step": 550 }, { "epoch": 0.07469667186334983, "grad_norm": 6.444052211385014, "learning_rate": 1.989581430139373e-06, "loss": 1.295, "step": 551 }, { "epoch": 0.07483223751101471, "grad_norm": 4.401517087048684, "learning_rate": 1.9895181101598656e-06, "loss": 1.2843, "step": 552 }, { "epoch": 0.0749678031586796, "grad_norm": 4.1415234517763775, "learning_rate": 1.9894545993596014e-06, "loss": 1.2494, "step": 553 }, { "epoch": 0.07510336880634447, "grad_norm": 6.932524981914319, "learning_rate": 1.9893908977508277e-06, "loss": 1.2806, "step": 554 }, { "epoch": 0.07523893445400935, "grad_norm": 5.77342851591222, "learning_rate": 1.9893270053458293e-06, "loss": 1.3134, "step": 555 }, { "epoch": 0.07537450010167424, "grad_norm": 5.208174270523092, "learning_rate": 1.9892629221569274e-06, "loss": 1.3182, "step": 556 }, { "epoch": 0.07551006574933912, "grad_norm": 3.802324563917097, "learning_rate": 1.989198648196479e-06, "loss": 1.3108, "step": 557 }, { "epoch": 0.075645631397004, "grad_norm": 10.076690522424359, "learning_rate": 1.9891341834768806e-06, "loss": 1.3066, "step": 558 }, { "epoch": 0.07578119704466887, "grad_norm": 3.4479835492085074, "learning_rate": 1.9890695280105622e-06, "loss": 1.2853, "step": 559 }, { "epoch": 0.07591676269233376, "grad_norm": 5.65658648455465, "learning_rate": 1.9890046818099925e-06, "loss": 1.2754, "step": 560 }, { "epoch": 0.07605232833999864, "grad_norm": 28.911759706822483, "learning_rate": 1.9889396448876765e-06, "loss": 1.3196, "step": 561 }, { "epoch": 0.07618789398766353, "grad_norm": 35.940528165272305, "learning_rate": 1.9888744172561563e-06, "loss": 1.32, "step": 562 }, { "epoch": 0.07632345963532841, "grad_norm": 7.966268907418616, "learning_rate": 1.9888089989280107e-06, "loss": 1.2759, "step": 563 }, { "epoch": 0.0764590252829933, "grad_norm": 13.006596594936601, "learning_rate": 1.9887433899158547e-06, "loss": 1.3127, "step": 564 }, { "epoch": 0.07659459093065817, "grad_norm": 3.9642695786446454, "learning_rate": 1.9886775902323402e-06, "loss": 1.3195, "step": 565 }, { "epoch": 0.07673015657832305, "grad_norm": 6.333662831815957, "learning_rate": 1.9886115998901572e-06, "loss": 1.314, "step": 566 }, { "epoch": 0.07686572222598793, "grad_norm": 5.925836742238495, "learning_rate": 1.9885454189020303e-06, "loss": 1.3042, "step": 567 }, { "epoch": 0.07700128787365282, "grad_norm": 3.8011917308944008, "learning_rate": 1.988479047280723e-06, "loss": 1.3265, "step": 568 }, { "epoch": 0.0771368535213177, "grad_norm": 4.347574755035327, "learning_rate": 1.9884124850390336e-06, "loss": 1.2909, "step": 569 }, { "epoch": 0.07727241916898257, "grad_norm": 3.6621525917917848, "learning_rate": 1.9883457321897984e-06, "loss": 1.318, "step": 570 }, { "epoch": 0.07740798481664746, "grad_norm": 4.638275348535267, "learning_rate": 1.9882787887458907e-06, "loss": 1.2942, "step": 571 }, { "epoch": 0.07754355046431234, "grad_norm": 4.180393841820077, "learning_rate": 1.988211654720219e-06, "loss": 1.3399, "step": 572 }, { "epoch": 0.07767911611197723, "grad_norm": 4.42672278372727, "learning_rate": 1.9881443301257308e-06, "loss": 1.2702, "step": 573 }, { "epoch": 0.07781468175964211, "grad_norm": 5.1996732763671405, "learning_rate": 1.988076814975408e-06, "loss": 1.2914, "step": 574 }, { "epoch": 0.077950247407307, "grad_norm": 5.2619443644966255, "learning_rate": 1.988009109282271e-06, "loss": 1.3016, "step": 575 }, { "epoch": 0.07808581305497186, "grad_norm": 3.6026977446092903, "learning_rate": 1.9879412130593765e-06, "loss": 1.3244, "step": 576 }, { "epoch": 0.07822137870263675, "grad_norm": 3.3549855244120828, "learning_rate": 1.9878731263198165e-06, "loss": 1.276, "step": 577 }, { "epoch": 0.07835694435030163, "grad_norm": 4.645855742701119, "learning_rate": 1.987804849076723e-06, "loss": 1.2565, "step": 578 }, { "epoch": 0.07849250999796652, "grad_norm": 5.072550533554671, "learning_rate": 1.9877363813432607e-06, "loss": 1.3263, "step": 579 }, { "epoch": 0.0786280756456314, "grad_norm": 5.008983585651738, "learning_rate": 1.9876677231326347e-06, "loss": 1.2763, "step": 580 }, { "epoch": 0.07876364129329629, "grad_norm": 9.583531116069832, "learning_rate": 1.9875988744580837e-06, "loss": 1.2835, "step": 581 }, { "epoch": 0.07889920694096116, "grad_norm": 9.95276325977868, "learning_rate": 1.987529835332886e-06, "loss": 1.2641, "step": 582 }, { "epoch": 0.07903477258862604, "grad_norm": 3.9200240716522052, "learning_rate": 1.9874606057703546e-06, "loss": 1.2963, "step": 583 }, { "epoch": 0.07917033823629092, "grad_norm": 3.5107645902702735, "learning_rate": 1.9873911857838395e-06, "loss": 1.2789, "step": 584 }, { "epoch": 0.07930590388395581, "grad_norm": 19.072244560895346, "learning_rate": 1.9873215753867286e-06, "loss": 1.3015, "step": 585 }, { "epoch": 0.07944146953162069, "grad_norm": 6.436653843265902, "learning_rate": 1.987251774592445e-06, "loss": 1.3103, "step": 586 }, { "epoch": 0.07957703517928556, "grad_norm": 6.511893872382613, "learning_rate": 1.98718178341445e-06, "loss": 1.3015, "step": 587 }, { "epoch": 0.07971260082695045, "grad_norm": 4.399944001964293, "learning_rate": 1.9871116018662403e-06, "loss": 1.2939, "step": 588 }, { "epoch": 0.07984816647461533, "grad_norm": 15.436688087753497, "learning_rate": 1.98704122996135e-06, "loss": 1.287, "step": 589 }, { "epoch": 0.07998373212228022, "grad_norm": 3.7136115824659455, "learning_rate": 1.9869706677133493e-06, "loss": 1.2905, "step": 590 }, { "epoch": 0.0801192977699451, "grad_norm": 4.16855096294531, "learning_rate": 1.9868999151358465e-06, "loss": 1.3176, "step": 591 }, { "epoch": 0.08025486341760998, "grad_norm": 4.633592699717388, "learning_rate": 1.9868289722424846e-06, "loss": 1.2875, "step": 592 }, { "epoch": 0.08039042906527485, "grad_norm": 12.88904992895097, "learning_rate": 1.9867578390469454e-06, "loss": 1.3012, "step": 593 }, { "epoch": 0.08052599471293974, "grad_norm": 11.841351350801157, "learning_rate": 1.986686515562946e-06, "loss": 1.2745, "step": 594 }, { "epoch": 0.08066156036060462, "grad_norm": 4.7467140702127235, "learning_rate": 1.9866150018042403e-06, "loss": 1.3011, "step": 595 }, { "epoch": 0.08079712600826951, "grad_norm": 12.875316742069254, "learning_rate": 1.986543297784619e-06, "loss": 1.32, "step": 596 }, { "epoch": 0.08093269165593439, "grad_norm": 3.5507795727214497, "learning_rate": 1.9864714035179108e-06, "loss": 1.2768, "step": 597 }, { "epoch": 0.08106825730359926, "grad_norm": 5.186808192602036, "learning_rate": 1.986399319017979e-06, "loss": 1.261, "step": 598 }, { "epoch": 0.08120382295126415, "grad_norm": 5.951100396678611, "learning_rate": 1.986327044298724e-06, "loss": 1.302, "step": 599 }, { "epoch": 0.08133938859892903, "grad_norm": 22.9750689172275, "learning_rate": 1.986254579374085e-06, "loss": 1.2372, "step": 600 }, { "epoch": 0.08147495424659391, "grad_norm": 4.900819077430162, "learning_rate": 1.9861819242580353e-06, "loss": 1.2708, "step": 601 }, { "epoch": 0.0816105198942588, "grad_norm": 4.571125191316594, "learning_rate": 1.9861090789645855e-06, "loss": 1.2581, "step": 602 }, { "epoch": 0.08174608554192368, "grad_norm": 5.391955823629966, "learning_rate": 1.9860360435077837e-06, "loss": 1.2849, "step": 603 }, { "epoch": 0.08188165118958855, "grad_norm": 8.62376487509451, "learning_rate": 1.9859628179017142e-06, "loss": 1.281, "step": 604 }, { "epoch": 0.08201721683725344, "grad_norm": 3.4814712842545834, "learning_rate": 1.985889402160498e-06, "loss": 1.2864, "step": 605 }, { "epoch": 0.08215278248491832, "grad_norm": 4.059344119359007, "learning_rate": 1.985815796298293e-06, "loss": 1.2988, "step": 606 }, { "epoch": 0.0822883481325832, "grad_norm": 5.432585771760035, "learning_rate": 1.985742000329293e-06, "loss": 1.3017, "step": 607 }, { "epoch": 0.08242391378024809, "grad_norm": 5.036247150062654, "learning_rate": 1.9856680142677294e-06, "loss": 1.2581, "step": 608 }, { "epoch": 0.08255947942791296, "grad_norm": 6.062443570156134, "learning_rate": 1.9855938381278698e-06, "loss": 1.2777, "step": 609 }, { "epoch": 0.08269504507557784, "grad_norm": 4.415598316450686, "learning_rate": 1.985519471924018e-06, "loss": 1.2781, "step": 610 }, { "epoch": 0.08283061072324273, "grad_norm": 4.879573770102122, "learning_rate": 1.985444915670515e-06, "loss": 1.2827, "step": 611 }, { "epoch": 0.08296617637090761, "grad_norm": 7.714783486680817, "learning_rate": 1.9853701693817393e-06, "loss": 1.2461, "step": 612 }, { "epoch": 0.0831017420185725, "grad_norm": 10.197031723655956, "learning_rate": 1.985295233072104e-06, "loss": 1.2712, "step": 613 }, { "epoch": 0.08323730766623738, "grad_norm": 7.845701068447388, "learning_rate": 1.9852201067560607e-06, "loss": 1.3027, "step": 614 }, { "epoch": 0.08337287331390225, "grad_norm": 5.0936830947319445, "learning_rate": 1.9851447904480964e-06, "loss": 1.2813, "step": 615 }, { "epoch": 0.08350843896156714, "grad_norm": 9.432115362585781, "learning_rate": 1.9850692841627356e-06, "loss": 1.2982, "step": 616 }, { "epoch": 0.08364400460923202, "grad_norm": 4.985687642758532, "learning_rate": 1.984993587914539e-06, "loss": 1.247, "step": 617 }, { "epoch": 0.0837795702568969, "grad_norm": 4.372382233483309, "learning_rate": 1.9849177017181044e-06, "loss": 1.2581, "step": 618 }, { "epoch": 0.08391513590456179, "grad_norm": 7.197660631503032, "learning_rate": 1.984841625588065e-06, "loss": 1.2605, "step": 619 }, { "epoch": 0.08405070155222667, "grad_norm": 4.622725698673203, "learning_rate": 1.9847653595390923e-06, "loss": 1.2696, "step": 620 }, { "epoch": 0.08418626719989154, "grad_norm": 5.820677242383131, "learning_rate": 1.984688903585893e-06, "loss": 1.2778, "step": 621 }, { "epoch": 0.08432183284755643, "grad_norm": 3.7008119682830656, "learning_rate": 1.9846122577432116e-06, "loss": 1.2627, "step": 622 }, { "epoch": 0.08445739849522131, "grad_norm": 16.845004299564643, "learning_rate": 1.9845354220258283e-06, "loss": 1.2353, "step": 623 }, { "epoch": 0.0845929641428862, "grad_norm": 4.9818241263295056, "learning_rate": 1.9844583964485604e-06, "loss": 1.2681, "step": 624 }, { "epoch": 0.08472852979055108, "grad_norm": 16.373433443748826, "learning_rate": 1.9843811810262612e-06, "loss": 1.2839, "step": 625 }, { "epoch": 0.08486409543821595, "grad_norm": 8.010256650338077, "learning_rate": 1.984303775773822e-06, "loss": 1.2819, "step": 626 }, { "epoch": 0.08499966108588083, "grad_norm": 6.439701456217815, "learning_rate": 1.9842261807061685e-06, "loss": 1.2989, "step": 627 }, { "epoch": 0.08513522673354572, "grad_norm": 6.481987434842699, "learning_rate": 1.984148395838266e-06, "loss": 1.2715, "step": 628 }, { "epoch": 0.0852707923812106, "grad_norm": 5.139030792894487, "learning_rate": 1.984070421185113e-06, "loss": 1.2638, "step": 629 }, { "epoch": 0.08540635802887549, "grad_norm": 7.094427589482462, "learning_rate": 1.983992256761747e-06, "loss": 1.2676, "step": 630 }, { "epoch": 0.08554192367654037, "grad_norm": 3.803295302107756, "learning_rate": 1.983913902583242e-06, "loss": 1.2614, "step": 631 }, { "epoch": 0.08567748932420524, "grad_norm": 4.380318413411752, "learning_rate": 1.983835358664707e-06, "loss": 1.3041, "step": 632 }, { "epoch": 0.08581305497187013, "grad_norm": 5.947366371428865, "learning_rate": 1.9837566250212894e-06, "loss": 1.2798, "step": 633 }, { "epoch": 0.08594862061953501, "grad_norm": 6.488240573942914, "learning_rate": 1.9836777016681723e-06, "loss": 1.2273, "step": 634 }, { "epoch": 0.0860841862671999, "grad_norm": 4.897179514918908, "learning_rate": 1.9835985886205744e-06, "loss": 1.2721, "step": 635 }, { "epoch": 0.08621975191486478, "grad_norm": 6.2470750600587435, "learning_rate": 1.983519285893753e-06, "loss": 1.3093, "step": 636 }, { "epoch": 0.08635531756252965, "grad_norm": 7.019259534728544, "learning_rate": 1.983439793503e-06, "loss": 1.2656, "step": 637 }, { "epoch": 0.08649088321019453, "grad_norm": 4.297123002682255, "learning_rate": 1.9833601114636465e-06, "loss": 1.2452, "step": 638 }, { "epoch": 0.08662644885785942, "grad_norm": 4.936128034223658, "learning_rate": 1.9832802397910578e-06, "loss": 1.2742, "step": 639 }, { "epoch": 0.0867620145055243, "grad_norm": 7.353500261368715, "learning_rate": 1.983200178500636e-06, "loss": 1.2957, "step": 640 }, { "epoch": 0.08689758015318919, "grad_norm": 7.265143511215646, "learning_rate": 1.9831199276078208e-06, "loss": 1.2943, "step": 641 }, { "epoch": 0.08703314580085407, "grad_norm": 4.851750059245971, "learning_rate": 1.9830394871280876e-06, "loss": 1.2289, "step": 642 }, { "epoch": 0.08716871144851894, "grad_norm": 5.947919002853873, "learning_rate": 1.982958857076949e-06, "loss": 1.2787, "step": 643 }, { "epoch": 0.08730427709618382, "grad_norm": 19.281518387424438, "learning_rate": 1.982878037469954e-06, "loss": 1.2761, "step": 644 }, { "epoch": 0.08743984274384871, "grad_norm": 3.796905885005761, "learning_rate": 1.9827970283226883e-06, "loss": 1.3049, "step": 645 }, { "epoch": 0.08757540839151359, "grad_norm": 5.010416551584826, "learning_rate": 1.9827158296507727e-06, "loss": 1.2797, "step": 646 }, { "epoch": 0.08771097403917848, "grad_norm": 4.65689596111523, "learning_rate": 1.9826344414698667e-06, "loss": 1.2685, "step": 647 }, { "epoch": 0.08784653968684335, "grad_norm": 5.321615812105299, "learning_rate": 1.982552863795665e-06, "loss": 1.2684, "step": 648 }, { "epoch": 0.08798210533450823, "grad_norm": 5.155347032860903, "learning_rate": 1.9824710966438995e-06, "loss": 1.2944, "step": 649 }, { "epoch": 0.08811767098217312, "grad_norm": 42.3727872190792, "learning_rate": 1.982389140030338e-06, "loss": 1.2357, "step": 650 }, { "epoch": 0.088253236629838, "grad_norm": 5.057273764148637, "learning_rate": 1.9823069939707856e-06, "loss": 1.2737, "step": 651 }, { "epoch": 0.08838880227750288, "grad_norm": 5.169281852965491, "learning_rate": 1.982224658481083e-06, "loss": 1.2632, "step": 652 }, { "epoch": 0.08852436792516777, "grad_norm": 7.573722428237371, "learning_rate": 1.9821421335771084e-06, "loss": 1.2864, "step": 653 }, { "epoch": 0.08865993357283264, "grad_norm": 4.491906828367358, "learning_rate": 1.9820594192747757e-06, "loss": 1.298, "step": 654 }, { "epoch": 0.08879549922049752, "grad_norm": 9.869180962484537, "learning_rate": 1.981976515590036e-06, "loss": 1.2864, "step": 655 }, { "epoch": 0.08893106486816241, "grad_norm": 6.778204314171425, "learning_rate": 1.9818934225388765e-06, "loss": 1.3064, "step": 656 }, { "epoch": 0.08906663051582729, "grad_norm": 7.194681311955922, "learning_rate": 1.981810140137321e-06, "loss": 1.2701, "step": 657 }, { "epoch": 0.08920219616349218, "grad_norm": 5.209441299767114, "learning_rate": 1.9817266684014303e-06, "loss": 1.2493, "step": 658 }, { "epoch": 0.08933776181115705, "grad_norm": 5.457235867617876, "learning_rate": 1.9816430073473005e-06, "loss": 1.2748, "step": 659 }, { "epoch": 0.08947332745882193, "grad_norm": 4.828226850814362, "learning_rate": 1.9815591569910653e-06, "loss": 1.299, "step": 660 }, { "epoch": 0.08960889310648681, "grad_norm": 5.202889767293584, "learning_rate": 1.9814751173488944e-06, "loss": 1.2872, "step": 661 }, { "epoch": 0.0897444587541517, "grad_norm": 8.222399068051672, "learning_rate": 1.981390888436995e-06, "loss": 1.3194, "step": 662 }, { "epoch": 0.08988002440181658, "grad_norm": 4.592585794908774, "learning_rate": 1.981306470271609e-06, "loss": 1.2834, "step": 663 }, { "epoch": 0.09001559004948147, "grad_norm": 4.8533091688867955, "learning_rate": 1.9812218628690165e-06, "loss": 1.2632, "step": 664 }, { "epoch": 0.09015115569714634, "grad_norm": 12.141319871906608, "learning_rate": 1.981137066245533e-06, "loss": 1.2593, "step": 665 }, { "epoch": 0.09028672134481122, "grad_norm": 6.127327146741361, "learning_rate": 1.981052080417511e-06, "loss": 1.2764, "step": 666 }, { "epoch": 0.0904222869924761, "grad_norm": 3.8783097252533434, "learning_rate": 1.980966905401339e-06, "loss": 1.2428, "step": 667 }, { "epoch": 0.09055785264014099, "grad_norm": 20.385302246825347, "learning_rate": 1.9808815412134424e-06, "loss": 1.2719, "step": 668 }, { "epoch": 0.09069341828780587, "grad_norm": 3.9380984663723413, "learning_rate": 1.9807959878702833e-06, "loss": 1.2499, "step": 669 }, { "epoch": 0.09082898393547076, "grad_norm": 5.086972114099048, "learning_rate": 1.98071024538836e-06, "loss": 1.2936, "step": 670 }, { "epoch": 0.09096454958313563, "grad_norm": 5.929544434278532, "learning_rate": 1.980624313784207e-06, "loss": 1.2683, "step": 671 }, { "epoch": 0.09110011523080051, "grad_norm": 5.723263105820939, "learning_rate": 1.980538193074396e-06, "loss": 1.2541, "step": 672 }, { "epoch": 0.0912356808784654, "grad_norm": 15.099238262330982, "learning_rate": 1.980451883275534e-06, "loss": 1.2585, "step": 673 }, { "epoch": 0.09137124652613028, "grad_norm": 7.567209514001259, "learning_rate": 1.9803653844042655e-06, "loss": 1.2718, "step": 674 }, { "epoch": 0.09150681217379517, "grad_norm": 6.071736742902772, "learning_rate": 1.9802786964772714e-06, "loss": 1.2656, "step": 675 }, { "epoch": 0.09164237782146004, "grad_norm": 4.9579438055689815, "learning_rate": 1.9801918195112684e-06, "loss": 1.2742, "step": 676 }, { "epoch": 0.09177794346912492, "grad_norm": 5.376294049775214, "learning_rate": 1.9801047535230103e-06, "loss": 1.294, "step": 677 }, { "epoch": 0.0919135091167898, "grad_norm": 7.254988585635768, "learning_rate": 1.9800174985292866e-06, "loss": 1.2748, "step": 678 }, { "epoch": 0.09204907476445469, "grad_norm": 4.718510290232815, "learning_rate": 1.9799300545469248e-06, "loss": 1.2819, "step": 679 }, { "epoch": 0.09218464041211957, "grad_norm": 6.564805827796596, "learning_rate": 1.9798424215927864e-06, "loss": 1.2359, "step": 680 }, { "epoch": 0.09232020605978446, "grad_norm": 4.586676933004481, "learning_rate": 1.979754599683772e-06, "loss": 1.2795, "step": 681 }, { "epoch": 0.09245577170744933, "grad_norm": 7.498666742213608, "learning_rate": 1.979666588836816e-06, "loss": 1.2474, "step": 682 }, { "epoch": 0.09259133735511421, "grad_norm": 4.411506198512411, "learning_rate": 1.9795783890688917e-06, "loss": 1.2813, "step": 683 }, { "epoch": 0.0927269030027791, "grad_norm": 9.225471307164925, "learning_rate": 1.9794900003970073e-06, "loss": 1.2643, "step": 684 }, { "epoch": 0.09286246865044398, "grad_norm": 3.8974879390839674, "learning_rate": 1.9794014228382085e-06, "loss": 1.2664, "step": 685 }, { "epoch": 0.09299803429810886, "grad_norm": 8.580610009561477, "learning_rate": 1.9793126564095756e-06, "loss": 1.2834, "step": 686 }, { "epoch": 0.09313359994577373, "grad_norm": 16.779870638171758, "learning_rate": 1.979223701128227e-06, "loss": 1.3107, "step": 687 }, { "epoch": 0.09326916559343862, "grad_norm": 6.169703102608036, "learning_rate": 1.979134557011318e-06, "loss": 1.2586, "step": 688 }, { "epoch": 0.0934047312411035, "grad_norm": 8.714413608245653, "learning_rate": 1.979045224076038e-06, "loss": 1.2948, "step": 689 }, { "epoch": 0.09354029688876839, "grad_norm": 7.757422169622411, "learning_rate": 1.9789557023396145e-06, "loss": 1.2594, "step": 690 }, { "epoch": 0.09367586253643327, "grad_norm": 11.751496764279661, "learning_rate": 1.9788659918193115e-06, "loss": 1.2891, "step": 691 }, { "epoch": 0.09381142818409816, "grad_norm": 6.198861611867003, "learning_rate": 1.9787760925324285e-06, "loss": 1.2817, "step": 692 }, { "epoch": 0.09394699383176303, "grad_norm": 10.052258435460178, "learning_rate": 1.9786860044963023e-06, "loss": 1.2606, "step": 693 }, { "epoch": 0.09408255947942791, "grad_norm": 7.568182077208302, "learning_rate": 1.978595727728305e-06, "loss": 1.2801, "step": 694 }, { "epoch": 0.0942181251270928, "grad_norm": 4.200593075911887, "learning_rate": 1.9785052622458467e-06, "loss": 1.2701, "step": 695 }, { "epoch": 0.09435369077475768, "grad_norm": 9.163927559693754, "learning_rate": 1.978414608066372e-06, "loss": 1.2873, "step": 696 }, { "epoch": 0.09448925642242256, "grad_norm": 14.913825598280447, "learning_rate": 1.9783237652073633e-06, "loss": 1.2452, "step": 697 }, { "epoch": 0.09462482207008743, "grad_norm": 6.208301197633583, "learning_rate": 1.978232733686339e-06, "loss": 1.2382, "step": 698 }, { "epoch": 0.09476038771775232, "grad_norm": 4.528307558890476, "learning_rate": 1.9781415135208536e-06, "loss": 1.2692, "step": 699 }, { "epoch": 0.0948959533654172, "grad_norm": 11.619431697373908, "learning_rate": 1.9780501047284983e-06, "loss": 1.2566, "step": 700 }, { "epoch": 0.09503151901308209, "grad_norm": 5.798507125820654, "learning_rate": 1.977958507326901e-06, "loss": 1.2546, "step": 701 }, { "epoch": 0.09516708466074697, "grad_norm": 5.358082226117899, "learning_rate": 1.9778667213337242e-06, "loss": 1.2716, "step": 702 }, { "epoch": 0.09530265030841185, "grad_norm": 6.172291141763051, "learning_rate": 1.97777474676667e-06, "loss": 1.2578, "step": 703 }, { "epoch": 0.09543821595607672, "grad_norm": 5.313225494074544, "learning_rate": 1.9776825836434733e-06, "loss": 1.2608, "step": 704 }, { "epoch": 0.09557378160374161, "grad_norm": 6.3706068921106, "learning_rate": 1.977590231981908e-06, "loss": 1.2723, "step": 705 }, { "epoch": 0.09570934725140649, "grad_norm": 4.913394174270748, "learning_rate": 1.977497691799783e-06, "loss": 1.2486, "step": 706 }, { "epoch": 0.09584491289907138, "grad_norm": 9.695269331250323, "learning_rate": 1.9774049631149443e-06, "loss": 1.2695, "step": 707 }, { "epoch": 0.09598047854673626, "grad_norm": 16.542999604560837, "learning_rate": 1.977312045945273e-06, "loss": 1.2785, "step": 708 }, { "epoch": 0.09611604419440115, "grad_norm": 5.470521715949267, "learning_rate": 1.9772189403086884e-06, "loss": 1.2577, "step": 709 }, { "epoch": 0.09625160984206602, "grad_norm": 6.623968317380443, "learning_rate": 1.977125646223145e-06, "loss": 1.2571, "step": 710 }, { "epoch": 0.0963871754897309, "grad_norm": 5.2235105311631385, "learning_rate": 1.977032163706633e-06, "loss": 1.3315, "step": 711 }, { "epoch": 0.09652274113739578, "grad_norm": 11.904009135196272, "learning_rate": 1.976938492777182e-06, "loss": 1.2298, "step": 712 }, { "epoch": 0.09665830678506067, "grad_norm": 9.760127406315691, "learning_rate": 1.976844633452853e-06, "loss": 1.267, "step": 713 }, { "epoch": 0.09679387243272555, "grad_norm": 8.295420644697678, "learning_rate": 1.976750585751747e-06, "loss": 1.2313, "step": 714 }, { "epoch": 0.09692943808039042, "grad_norm": 9.380842074286623, "learning_rate": 1.9766563496920014e-06, "loss": 1.2832, "step": 715 }, { "epoch": 0.09706500372805531, "grad_norm": 5.964691135420266, "learning_rate": 1.9765619252917873e-06, "loss": 1.2725, "step": 716 }, { "epoch": 0.09720056937572019, "grad_norm": 5.818594346054154, "learning_rate": 1.9764673125693146e-06, "loss": 1.284, "step": 717 }, { "epoch": 0.09733613502338508, "grad_norm": 5.84909609356952, "learning_rate": 1.9763725115428284e-06, "loss": 1.259, "step": 718 }, { "epoch": 0.09747170067104996, "grad_norm": 8.389561079888836, "learning_rate": 1.9762775222306107e-06, "loss": 1.2679, "step": 719 }, { "epoch": 0.09760726631871484, "grad_norm": 5.442021990048278, "learning_rate": 1.976182344650979e-06, "loss": 1.2528, "step": 720 }, { "epoch": 0.09774283196637971, "grad_norm": 7.05335594348579, "learning_rate": 1.9760869788222873e-06, "loss": 1.2459, "step": 721 }, { "epoch": 0.0978783976140446, "grad_norm": 6.6512142131441445, "learning_rate": 1.9759914247629264e-06, "loss": 1.2828, "step": 722 }, { "epoch": 0.09801396326170948, "grad_norm": 8.89076051141924, "learning_rate": 1.975895682491324e-06, "loss": 1.2415, "step": 723 }, { "epoch": 0.09814952890937437, "grad_norm": 4.94336492055686, "learning_rate": 1.975799752025942e-06, "loss": 1.3006, "step": 724 }, { "epoch": 0.09828509455703925, "grad_norm": 10.35631564186455, "learning_rate": 1.97570363338528e-06, "loss": 1.2399, "step": 725 }, { "epoch": 0.09842066020470412, "grad_norm": 5.331219456479723, "learning_rate": 1.9756073265878746e-06, "loss": 1.2439, "step": 726 }, { "epoch": 0.098556225852369, "grad_norm": 6.2746163425791, "learning_rate": 1.9755108316522967e-06, "loss": 1.2767, "step": 727 }, { "epoch": 0.09869179150003389, "grad_norm": 4.701525271272733, "learning_rate": 1.9754141485971555e-06, "loss": 1.2635, "step": 728 }, { "epoch": 0.09882735714769877, "grad_norm": 6.438179339932713, "learning_rate": 1.9753172774410952e-06, "loss": 1.2717, "step": 729 }, { "epoch": 0.09896292279536366, "grad_norm": 4.030701106122799, "learning_rate": 1.9752202182027967e-06, "loss": 1.2649, "step": 730 }, { "epoch": 0.09909848844302854, "grad_norm": 11.697544701455923, "learning_rate": 1.9751229709009767e-06, "loss": 1.2483, "step": 731 }, { "epoch": 0.09923405409069341, "grad_norm": 40.24199627516775, "learning_rate": 1.975025535554389e-06, "loss": 1.304, "step": 732 }, { "epoch": 0.0993696197383583, "grad_norm": 6.08973916258979, "learning_rate": 1.9749279121818236e-06, "loss": 1.234, "step": 733 }, { "epoch": 0.09950518538602318, "grad_norm": 5.443114363435679, "learning_rate": 1.9748301008021055e-06, "loss": 1.2627, "step": 734 }, { "epoch": 0.09964075103368807, "grad_norm": 4.252069071887052, "learning_rate": 1.9747321014340974e-06, "loss": 1.2033, "step": 735 }, { "epoch": 0.09977631668135295, "grad_norm": 4.198265896502168, "learning_rate": 1.974633914096698e-06, "loss": 1.2433, "step": 736 }, { "epoch": 0.09991188232901782, "grad_norm": 4.61943861765985, "learning_rate": 1.974535538808841e-06, "loss": 1.2642, "step": 737 }, { "epoch": 0.1000474479766827, "grad_norm": 4.230069225918995, "learning_rate": 1.9744369755894977e-06, "loss": 1.3019, "step": 738 }, { "epoch": 0.10018301362434759, "grad_norm": 5.728565415781082, "learning_rate": 1.974338224457676e-06, "loss": 1.2644, "step": 739 }, { "epoch": 0.10031857927201247, "grad_norm": 4.188960655349667, "learning_rate": 1.9742392854324186e-06, "loss": 1.2946, "step": 740 }, { "epoch": 0.10045414491967736, "grad_norm": 5.946320796592437, "learning_rate": 1.974140158532805e-06, "loss": 1.2842, "step": 741 }, { "epoch": 0.10058971056734224, "grad_norm": 3.9408898607873093, "learning_rate": 1.974040843777951e-06, "loss": 1.2065, "step": 742 }, { "epoch": 0.10072527621500711, "grad_norm": 6.592669054228508, "learning_rate": 1.973941341187009e-06, "loss": 1.2845, "step": 743 }, { "epoch": 0.100860841862672, "grad_norm": 4.575472278407309, "learning_rate": 1.9738416507791676e-06, "loss": 1.2768, "step": 744 }, { "epoch": 0.10099640751033688, "grad_norm": 5.043818072843461, "learning_rate": 1.9737417725736507e-06, "loss": 1.2192, "step": 745 }, { "epoch": 0.10113197315800176, "grad_norm": 5.872056016747114, "learning_rate": 1.9736417065897187e-06, "loss": 1.305, "step": 746 }, { "epoch": 0.10126753880566665, "grad_norm": 7.323054408890468, "learning_rate": 1.9735414528466694e-06, "loss": 1.2934, "step": 747 }, { "epoch": 0.10140310445333152, "grad_norm": 4.14781337640987, "learning_rate": 1.9734410113638356e-06, "loss": 1.256, "step": 748 }, { "epoch": 0.1015386701009964, "grad_norm": 4.521688505217117, "learning_rate": 1.973340382160587e-06, "loss": 1.2562, "step": 749 }, { "epoch": 0.10167423574866129, "grad_norm": 4.354823929908704, "learning_rate": 1.973239565256328e-06, "loss": 1.3132, "step": 750 }, { "epoch": 0.10180980139632617, "grad_norm": 6.739684745490271, "learning_rate": 1.973138560670502e-06, "loss": 1.2887, "step": 751 }, { "epoch": 0.10194536704399106, "grad_norm": 14.445215645453398, "learning_rate": 1.973037368422585e-06, "loss": 1.2273, "step": 752 }, { "epoch": 0.10208093269165594, "grad_norm": 4.871058791074302, "learning_rate": 1.9729359885320933e-06, "loss": 1.2658, "step": 753 }, { "epoch": 0.10221649833932081, "grad_norm": 8.57615383754767, "learning_rate": 1.9728344210185757e-06, "loss": 1.2404, "step": 754 }, { "epoch": 0.1023520639869857, "grad_norm": 5.892138559518459, "learning_rate": 1.9727326659016187e-06, "loss": 1.2817, "step": 755 }, { "epoch": 0.10248762963465058, "grad_norm": 3.6213689796040325, "learning_rate": 1.972630723200846e-06, "loss": 1.2414, "step": 756 }, { "epoch": 0.10262319528231546, "grad_norm": 5.163967381126394, "learning_rate": 1.9725285929359156e-06, "loss": 1.2539, "step": 757 }, { "epoch": 0.10275876092998035, "grad_norm": 10.70383115764249, "learning_rate": 1.9724262751265222e-06, "loss": 1.2815, "step": 758 }, { "epoch": 0.10289432657764523, "grad_norm": 5.918109148070708, "learning_rate": 1.972323769792398e-06, "loss": 1.2904, "step": 759 }, { "epoch": 0.1030298922253101, "grad_norm": 4.1580125638059595, "learning_rate": 1.97222107695331e-06, "loss": 1.2888, "step": 760 }, { "epoch": 0.10316545787297499, "grad_norm": 7.109573386149248, "learning_rate": 1.9721181966290614e-06, "loss": 1.2383, "step": 761 }, { "epoch": 0.10330102352063987, "grad_norm": 4.344634495763852, "learning_rate": 1.9720151288394916e-06, "loss": 1.2678, "step": 762 }, { "epoch": 0.10343658916830475, "grad_norm": 14.77645621485242, "learning_rate": 1.9719118736044773e-06, "loss": 1.2441, "step": 763 }, { "epoch": 0.10357215481596964, "grad_norm": 4.1437499717585204, "learning_rate": 1.97180843094393e-06, "loss": 1.248, "step": 764 }, { "epoch": 0.10370772046363451, "grad_norm": 6.424680075226791, "learning_rate": 1.9717048008777978e-06, "loss": 1.2863, "step": 765 }, { "epoch": 0.1038432861112994, "grad_norm": 3.994092960845478, "learning_rate": 1.9716009834260645e-06, "loss": 1.2798, "step": 766 }, { "epoch": 0.10397885175896428, "grad_norm": 9.688916054151786, "learning_rate": 1.971496978608751e-06, "loss": 1.2617, "step": 767 }, { "epoch": 0.10411441740662916, "grad_norm": 6.209889231923605, "learning_rate": 1.971392786445914e-06, "loss": 1.2626, "step": 768 }, { "epoch": 0.10424998305429405, "grad_norm": 6.507038233795597, "learning_rate": 1.9712884069576455e-06, "loss": 1.2379, "step": 769 }, { "epoch": 0.10438554870195893, "grad_norm": 12.306299745685381, "learning_rate": 1.971183840164075e-06, "loss": 1.2178, "step": 770 }, { "epoch": 0.1045211143496238, "grad_norm": 5.871143987629765, "learning_rate": 1.9710790860853667e-06, "loss": 1.2427, "step": 771 }, { "epoch": 0.10465667999728868, "grad_norm": 4.518214187076625, "learning_rate": 1.9709741447417223e-06, "loss": 1.2452, "step": 772 }, { "epoch": 0.10479224564495357, "grad_norm": 4.884353255075787, "learning_rate": 1.970869016153378e-06, "loss": 1.2777, "step": 773 }, { "epoch": 0.10492781129261845, "grad_norm": 6.309932583465173, "learning_rate": 1.9707637003406075e-06, "loss": 1.2343, "step": 774 }, { "epoch": 0.10506337694028334, "grad_norm": 5.774190065306855, "learning_rate": 1.9706581973237202e-06, "loss": 1.2477, "step": 775 }, { "epoch": 0.10519894258794821, "grad_norm": 5.387076565517426, "learning_rate": 1.9705525071230616e-06, "loss": 1.2613, "step": 776 }, { "epoch": 0.10533450823561309, "grad_norm": 5.0168580258216116, "learning_rate": 1.9704466297590134e-06, "loss": 1.2735, "step": 777 }, { "epoch": 0.10547007388327798, "grad_norm": 5.031217062566614, "learning_rate": 1.9703405652519924e-06, "loss": 1.2958, "step": 778 }, { "epoch": 0.10560563953094286, "grad_norm": 3.1841451602669157, "learning_rate": 1.970234313622453e-06, "loss": 1.2761, "step": 779 }, { "epoch": 0.10574120517860774, "grad_norm": 4.967122113359004, "learning_rate": 1.9701278748908844e-06, "loss": 1.2705, "step": 780 }, { "epoch": 0.10587677082627263, "grad_norm": 3.3965237934419377, "learning_rate": 1.9700212490778136e-06, "loss": 1.2625, "step": 781 }, { "epoch": 0.1060123364739375, "grad_norm": 3.5983782845712566, "learning_rate": 1.969914436203801e-06, "loss": 1.2759, "step": 782 }, { "epoch": 0.10614790212160238, "grad_norm": 6.0504167170886145, "learning_rate": 1.9698074362894456e-06, "loss": 1.2892, "step": 783 }, { "epoch": 0.10628346776926727, "grad_norm": 7.277099614907581, "learning_rate": 1.9697002493553815e-06, "loss": 1.248, "step": 784 }, { "epoch": 0.10641903341693215, "grad_norm": 3.888296050036849, "learning_rate": 1.969592875422279e-06, "loss": 1.282, "step": 785 }, { "epoch": 0.10655459906459704, "grad_norm": 4.928363959519651, "learning_rate": 1.9694853145108433e-06, "loss": 1.2459, "step": 786 }, { "epoch": 0.1066901647122619, "grad_norm": 4.260605045174764, "learning_rate": 1.969377566641818e-06, "loss": 1.2694, "step": 787 }, { "epoch": 0.10682573035992679, "grad_norm": 12.855583444885662, "learning_rate": 1.96926963183598e-06, "loss": 1.2807, "step": 788 }, { "epoch": 0.10696129600759167, "grad_norm": 4.554832968395204, "learning_rate": 1.9691615101141454e-06, "loss": 1.2799, "step": 789 }, { "epoch": 0.10709686165525656, "grad_norm": 4.4914587419446095, "learning_rate": 1.969053201497163e-06, "loss": 1.2661, "step": 790 }, { "epoch": 0.10723242730292144, "grad_norm": 5.418242547771152, "learning_rate": 1.96894470600592e-06, "loss": 1.2724, "step": 791 }, { "epoch": 0.10736799295058633, "grad_norm": 4.997144741138823, "learning_rate": 1.9688360236613388e-06, "loss": 1.2275, "step": 792 }, { "epoch": 0.1075035585982512, "grad_norm": 4.281543146854012, "learning_rate": 1.968727154484378e-06, "loss": 1.3022, "step": 793 }, { "epoch": 0.10763912424591608, "grad_norm": 7.366287646614089, "learning_rate": 1.968618098496032e-06, "loss": 1.2601, "step": 794 }, { "epoch": 0.10777468989358097, "grad_norm": 6.121791420897725, "learning_rate": 1.9685088557173318e-06, "loss": 1.2647, "step": 795 }, { "epoch": 0.10791025554124585, "grad_norm": 5.403732703425754, "learning_rate": 1.968399426169344e-06, "loss": 1.2617, "step": 796 }, { "epoch": 0.10804582118891073, "grad_norm": 4.560435027331551, "learning_rate": 1.9682898098731707e-06, "loss": 1.2252, "step": 797 }, { "epoch": 0.1081813868365756, "grad_norm": 4.85874092410286, "learning_rate": 1.9681800068499507e-06, "loss": 1.2685, "step": 798 }, { "epoch": 0.10831695248424049, "grad_norm": 42.718039467117855, "learning_rate": 1.9680700171208583e-06, "loss": 1.2973, "step": 799 }, { "epoch": 0.10845251813190537, "grad_norm": 3.528348997542481, "learning_rate": 1.9679598407071053e-06, "loss": 1.2838, "step": 800 }, { "epoch": 0.10858808377957026, "grad_norm": 43.48624643127659, "learning_rate": 1.967849477629937e-06, "loss": 1.2624, "step": 801 }, { "epoch": 0.10872364942723514, "grad_norm": 7.621416030784088, "learning_rate": 1.9677389279106367e-06, "loss": 1.2508, "step": 802 }, { "epoch": 0.10885921507490003, "grad_norm": 30.30086256123812, "learning_rate": 1.9676281915705236e-06, "loss": 1.2391, "step": 803 }, { "epoch": 0.1089947807225649, "grad_norm": 3.549041767988706, "learning_rate": 1.9675172686309516e-06, "loss": 1.264, "step": 804 }, { "epoch": 0.10913034637022978, "grad_norm": 5.297623867200432, "learning_rate": 1.9674061591133114e-06, "loss": 1.26, "step": 805 }, { "epoch": 0.10926591201789466, "grad_norm": 93.05890271957647, "learning_rate": 1.9672948630390295e-06, "loss": 1.2282, "step": 806 }, { "epoch": 0.10940147766555955, "grad_norm": 5.282213953287624, "learning_rate": 1.9671833804295684e-06, "loss": 1.2819, "step": 807 }, { "epoch": 0.10953704331322443, "grad_norm": 13.979330793595205, "learning_rate": 1.967071711306427e-06, "loss": 1.2708, "step": 808 }, { "epoch": 0.10967260896088932, "grad_norm": 6.33879996995674, "learning_rate": 1.96695985569114e-06, "loss": 1.269, "step": 809 }, { "epoch": 0.10980817460855419, "grad_norm": 11.169941073274936, "learning_rate": 1.966847813605277e-06, "loss": 1.2536, "step": 810 }, { "epoch": 0.10994374025621907, "grad_norm": 4.796339876169145, "learning_rate": 1.9667355850704456e-06, "loss": 1.2897, "step": 811 }, { "epoch": 0.11007930590388396, "grad_norm": 4.9742584733621875, "learning_rate": 1.9666231701082876e-06, "loss": 1.2111, "step": 812 }, { "epoch": 0.11021487155154884, "grad_norm": 4.102313179211866, "learning_rate": 1.966510568740481e-06, "loss": 1.2475, "step": 813 }, { "epoch": 0.11035043719921372, "grad_norm": 6.053263405599289, "learning_rate": 1.9663977809887406e-06, "loss": 1.2424, "step": 814 }, { "epoch": 0.1104860028468786, "grad_norm": 5.86414238962591, "learning_rate": 1.966284806874816e-06, "loss": 1.2153, "step": 815 }, { "epoch": 0.11062156849454348, "grad_norm": 4.885135357356571, "learning_rate": 1.966171646420494e-06, "loss": 1.2543, "step": 816 }, { "epoch": 0.11075713414220836, "grad_norm": 5.284224855348902, "learning_rate": 1.9660582996475962e-06, "loss": 1.2797, "step": 817 }, { "epoch": 0.11089269978987325, "grad_norm": 6.323017854104721, "learning_rate": 1.9659447665779815e-06, "loss": 1.2175, "step": 818 }, { "epoch": 0.11102826543753813, "grad_norm": 5.042166337458198, "learning_rate": 1.965831047233543e-06, "loss": 1.2429, "step": 819 }, { "epoch": 0.11116383108520302, "grad_norm": 4.41947127308469, "learning_rate": 1.965717141636211e-06, "loss": 1.2579, "step": 820 }, { "epoch": 0.11129939673286789, "grad_norm": 6.880629787270314, "learning_rate": 1.9656030498079507e-06, "loss": 1.2669, "step": 821 }, { "epoch": 0.11143496238053277, "grad_norm": 5.3672232196622645, "learning_rate": 1.9654887717707645e-06, "loss": 1.2468, "step": 822 }, { "epoch": 0.11157052802819765, "grad_norm": 11.24535217808232, "learning_rate": 1.96537430754669e-06, "loss": 1.2919, "step": 823 }, { "epoch": 0.11170609367586254, "grad_norm": 5.410182568119717, "learning_rate": 1.9652596571578003e-06, "loss": 1.2879, "step": 824 }, { "epoch": 0.11184165932352742, "grad_norm": 6.293662098322281, "learning_rate": 1.9651448206262047e-06, "loss": 1.2519, "step": 825 }, { "epoch": 0.1119772249711923, "grad_norm": 6.040546136899566, "learning_rate": 1.965029797974049e-06, "loss": 1.2576, "step": 826 }, { "epoch": 0.11211279061885718, "grad_norm": 16.29542964092758, "learning_rate": 1.9649145892235145e-06, "loss": 1.2264, "step": 827 }, { "epoch": 0.11224835626652206, "grad_norm": 20.805126872018903, "learning_rate": 1.964799194396818e-06, "loss": 1.2466, "step": 828 }, { "epoch": 0.11238392191418695, "grad_norm": 12.433747481929636, "learning_rate": 1.9646836135162125e-06, "loss": 1.2563, "step": 829 }, { "epoch": 0.11251948756185183, "grad_norm": 5.981867311775587, "learning_rate": 1.9645678466039864e-06, "loss": 1.2275, "step": 830 }, { "epoch": 0.11265505320951671, "grad_norm": 8.66315180982918, "learning_rate": 1.9644518936824658e-06, "loss": 1.2691, "step": 831 }, { "epoch": 0.11279061885718158, "grad_norm": 12.92732849276207, "learning_rate": 1.9643357547740097e-06, "loss": 1.2375, "step": 832 }, { "epoch": 0.11292618450484647, "grad_norm": 8.083197668766635, "learning_rate": 1.9642194299010155e-06, "loss": 1.1863, "step": 833 }, { "epoch": 0.11306175015251135, "grad_norm": 4.030425777115433, "learning_rate": 1.9641029190859155e-06, "loss": 1.2401, "step": 834 }, { "epoch": 0.11319731580017624, "grad_norm": 4.837900750913607, "learning_rate": 1.9639862223511777e-06, "loss": 1.2325, "step": 835 }, { "epoch": 0.11333288144784112, "grad_norm": 3.63573700737395, "learning_rate": 1.9638693397193057e-06, "loss": 1.223, "step": 836 }, { "epoch": 0.11346844709550599, "grad_norm": 3.994583655383387, "learning_rate": 1.9637522712128407e-06, "loss": 1.237, "step": 837 }, { "epoch": 0.11360401274317088, "grad_norm": 5.681012764593559, "learning_rate": 1.963635016854357e-06, "loss": 1.2569, "step": 838 }, { "epoch": 0.11373957839083576, "grad_norm": 9.925044968129558, "learning_rate": 1.963517576666467e-06, "loss": 1.2547, "step": 839 }, { "epoch": 0.11387514403850064, "grad_norm": 7.890249596690955, "learning_rate": 1.9633999506718176e-06, "loss": 1.2526, "step": 840 }, { "epoch": 0.11401070968616553, "grad_norm": 6.207649775125442, "learning_rate": 1.9632821388930926e-06, "loss": 1.2255, "step": 841 }, { "epoch": 0.11414627533383041, "grad_norm": 4.442069674081206, "learning_rate": 1.9631641413530102e-06, "loss": 1.2241, "step": 842 }, { "epoch": 0.11428184098149528, "grad_norm": 7.473112084701818, "learning_rate": 1.9630459580743264e-06, "loss": 1.2734, "step": 843 }, { "epoch": 0.11441740662916017, "grad_norm": 4.20288905759056, "learning_rate": 1.9629275890798315e-06, "loss": 1.2546, "step": 844 }, { "epoch": 0.11455297227682505, "grad_norm": 11.808988923546032, "learning_rate": 1.962809034392352e-06, "loss": 1.2598, "step": 845 }, { "epoch": 0.11468853792448994, "grad_norm": 6.281949929396883, "learning_rate": 1.96269029403475e-06, "loss": 1.2742, "step": 846 }, { "epoch": 0.11482410357215482, "grad_norm": 4.770413268992024, "learning_rate": 1.962571368029924e-06, "loss": 1.2622, "step": 847 }, { "epoch": 0.1149596692198197, "grad_norm": 6.440886313080073, "learning_rate": 1.9624522564008074e-06, "loss": 1.2448, "step": 848 }, { "epoch": 0.11509523486748457, "grad_norm": 4.252670865458973, "learning_rate": 1.9623329591703706e-06, "loss": 1.2454, "step": 849 }, { "epoch": 0.11523080051514946, "grad_norm": 5.27784712033425, "learning_rate": 1.962213476361619e-06, "loss": 1.2773, "step": 850 }, { "epoch": 0.11536636616281434, "grad_norm": 3.8835948494870642, "learning_rate": 1.962093807997593e-06, "loss": 1.255, "step": 851 }, { "epoch": 0.11550193181047923, "grad_norm": 5.068259171362372, "learning_rate": 1.961973954101371e-06, "loss": 1.2742, "step": 852 }, { "epoch": 0.11563749745814411, "grad_norm": 8.71559543902427, "learning_rate": 1.961853914696065e-06, "loss": 1.2644, "step": 853 }, { "epoch": 0.11577306310580898, "grad_norm": 5.559568949704769, "learning_rate": 1.961733689804824e-06, "loss": 1.2243, "step": 854 }, { "epoch": 0.11590862875347387, "grad_norm": 5.526495079870968, "learning_rate": 1.961613279450833e-06, "loss": 1.2665, "step": 855 }, { "epoch": 0.11604419440113875, "grad_norm": 6.390901413468549, "learning_rate": 1.9614926836573107e-06, "loss": 1.2745, "step": 856 }, { "epoch": 0.11617976004880363, "grad_norm": 4.309480335024567, "learning_rate": 1.9613719024475145e-06, "loss": 1.2573, "step": 857 }, { "epoch": 0.11631532569646852, "grad_norm": 7.273780013732136, "learning_rate": 1.961250935844735e-06, "loss": 1.2795, "step": 858 }, { "epoch": 0.1164508913441334, "grad_norm": 6.400301530138345, "learning_rate": 1.9611297838723007e-06, "loss": 1.2503, "step": 859 }, { "epoch": 0.11658645699179827, "grad_norm": 6.6046030489150604, "learning_rate": 1.961008446553574e-06, "loss": 1.2775, "step": 860 }, { "epoch": 0.11672202263946316, "grad_norm": 16.847395978723043, "learning_rate": 1.9608869239119545e-06, "loss": 1.2782, "step": 861 }, { "epoch": 0.11685758828712804, "grad_norm": 7.1819276235494485, "learning_rate": 1.960765215970876e-06, "loss": 1.2306, "step": 862 }, { "epoch": 0.11699315393479293, "grad_norm": 7.957845717164709, "learning_rate": 1.9606433227538095e-06, "loss": 1.2791, "step": 863 }, { "epoch": 0.11712871958245781, "grad_norm": 5.709267497452056, "learning_rate": 1.960521244284261e-06, "loss": 1.2919, "step": 864 }, { "epoch": 0.11726428523012268, "grad_norm": 5.792564667200271, "learning_rate": 1.960398980585773e-06, "loss": 1.2412, "step": 865 }, { "epoch": 0.11739985087778756, "grad_norm": 4.919676325051405, "learning_rate": 1.960276531681922e-06, "loss": 1.2378, "step": 866 }, { "epoch": 0.11753541652545245, "grad_norm": 4.638248445856451, "learning_rate": 1.960153897596322e-06, "loss": 1.2429, "step": 867 }, { "epoch": 0.11767098217311733, "grad_norm": 7.620021744334929, "learning_rate": 1.960031078352622e-06, "loss": 1.247, "step": 868 }, { "epoch": 0.11780654782078222, "grad_norm": 5.852259970152706, "learning_rate": 1.9599080739745064e-06, "loss": 1.2396, "step": 869 }, { "epoch": 0.1179421134684471, "grad_norm": 6.59359159787049, "learning_rate": 1.9597848844856955e-06, "loss": 1.2658, "step": 870 }, { "epoch": 0.11807767911611197, "grad_norm": 7.776169626899859, "learning_rate": 1.959661509909946e-06, "loss": 1.2386, "step": 871 }, { "epoch": 0.11821324476377686, "grad_norm": 4.639231226712726, "learning_rate": 1.9595379502710495e-06, "loss": 1.2439, "step": 872 }, { "epoch": 0.11834881041144174, "grad_norm": 8.719085559565222, "learning_rate": 1.9594142055928333e-06, "loss": 1.2245, "step": 873 }, { "epoch": 0.11848437605910662, "grad_norm": 15.258982950126379, "learning_rate": 1.9592902758991606e-06, "loss": 1.2549, "step": 874 }, { "epoch": 0.11861994170677151, "grad_norm": 5.525767711830907, "learning_rate": 1.9591661612139306e-06, "loss": 1.2578, "step": 875 }, { "epoch": 0.11875550735443638, "grad_norm": 3.917821562322089, "learning_rate": 1.9590418615610775e-06, "loss": 1.268, "step": 876 }, { "epoch": 0.11889107300210126, "grad_norm": 9.66657983524901, "learning_rate": 1.9589173769645714e-06, "loss": 1.2448, "step": 877 }, { "epoch": 0.11902663864976615, "grad_norm": 5.56979459072797, "learning_rate": 1.958792707448419e-06, "loss": 1.2608, "step": 878 }, { "epoch": 0.11916220429743103, "grad_norm": 5.8832107693237, "learning_rate": 1.9586678530366606e-06, "loss": 1.243, "step": 879 }, { "epoch": 0.11929776994509592, "grad_norm": 4.569151046925509, "learning_rate": 1.958542813753374e-06, "loss": 1.2848, "step": 880 }, { "epoch": 0.1194333355927608, "grad_norm": 4.684545024987991, "learning_rate": 1.9584175896226725e-06, "loss": 1.2614, "step": 881 }, { "epoch": 0.11956890124042567, "grad_norm": 17.290137249785243, "learning_rate": 1.9582921806687037e-06, "loss": 1.2593, "step": 882 }, { "epoch": 0.11970446688809055, "grad_norm": 4.712713964323381, "learning_rate": 1.9581665869156526e-06, "loss": 1.2201, "step": 883 }, { "epoch": 0.11984003253575544, "grad_norm": 75.77731956866367, "learning_rate": 1.958040808387738e-06, "loss": 1.2294, "step": 884 }, { "epoch": 0.11997559818342032, "grad_norm": 4.08899598401549, "learning_rate": 1.9579148451092163e-06, "loss": 1.2357, "step": 885 }, { "epoch": 0.12011116383108521, "grad_norm": 6.5653905437498015, "learning_rate": 1.957788697104378e-06, "loss": 1.2894, "step": 886 }, { "epoch": 0.12024672947875008, "grad_norm": 5.5157070356990365, "learning_rate": 1.9576623643975496e-06, "loss": 1.2406, "step": 887 }, { "epoch": 0.12038229512641496, "grad_norm": 5.476294420738289, "learning_rate": 1.9575358470130934e-06, "loss": 1.2405, "step": 888 }, { "epoch": 0.12051786077407985, "grad_norm": 93.81552460767558, "learning_rate": 1.9574091449754074e-06, "loss": 1.2518, "step": 889 }, { "epoch": 0.12065342642174473, "grad_norm": 8.06651391065244, "learning_rate": 1.9572822583089253e-06, "loss": 1.273, "step": 890 }, { "epoch": 0.12078899206940961, "grad_norm": 5.655946375789121, "learning_rate": 1.9571551870381163e-06, "loss": 1.247, "step": 891 }, { "epoch": 0.1209245577170745, "grad_norm": 8.166936673897352, "learning_rate": 1.9570279311874842e-06, "loss": 1.2499, "step": 892 }, { "epoch": 0.12106012336473937, "grad_norm": 8.9955301953271, "learning_rate": 1.9569004907815706e-06, "loss": 1.2386, "step": 893 }, { "epoch": 0.12119568901240425, "grad_norm": 9.052259419779741, "learning_rate": 1.9567728658449503e-06, "loss": 1.2309, "step": 894 }, { "epoch": 0.12133125466006914, "grad_norm": 6.941720440591538, "learning_rate": 1.956645056402235e-06, "loss": 1.232, "step": 895 }, { "epoch": 0.12146682030773402, "grad_norm": 6.507519429184931, "learning_rate": 1.956517062478072e-06, "loss": 1.2431, "step": 896 }, { "epoch": 0.1216023859553989, "grad_norm": 5.545532417563132, "learning_rate": 1.956388884097144e-06, "loss": 1.2754, "step": 897 }, { "epoch": 0.12173795160306379, "grad_norm": 5.039901985709906, "learning_rate": 1.9562605212841686e-06, "loss": 1.2534, "step": 898 }, { "epoch": 0.12187351725072866, "grad_norm": 6.237096472297131, "learning_rate": 1.9561319740639e-06, "loss": 1.2578, "step": 899 }, { "epoch": 0.12200908289839354, "grad_norm": 12.002276209686048, "learning_rate": 1.9560032424611274e-06, "loss": 1.2425, "step": 900 }, { "epoch": 0.12214464854605843, "grad_norm": 4.8982436933410645, "learning_rate": 1.955874326500676e-06, "loss": 1.2671, "step": 901 }, { "epoch": 0.12228021419372331, "grad_norm": 8.68225597528566, "learning_rate": 1.955745226207406e-06, "loss": 1.222, "step": 902 }, { "epoch": 0.1224157798413882, "grad_norm": 7.398625800291473, "learning_rate": 1.9556159416062127e-06, "loss": 1.2138, "step": 903 }, { "epoch": 0.12255134548905307, "grad_norm": 5.934518755776278, "learning_rate": 1.955486472722029e-06, "loss": 1.2793, "step": 904 }, { "epoch": 0.12268691113671795, "grad_norm": 5.458525374825778, "learning_rate": 1.955356819579821e-06, "loss": 1.2225, "step": 905 }, { "epoch": 0.12282247678438284, "grad_norm": 5.898610019170786, "learning_rate": 1.955226982204591e-06, "loss": 1.2433, "step": 906 }, { "epoch": 0.12295804243204772, "grad_norm": 7.144919282899134, "learning_rate": 1.955096960621378e-06, "loss": 1.2559, "step": 907 }, { "epoch": 0.1230936080797126, "grad_norm": 3.983599256168923, "learning_rate": 1.9549667548552553e-06, "loss": 1.2346, "step": 908 }, { "epoch": 0.12322917372737749, "grad_norm": 22.67350666266334, "learning_rate": 1.9548363649313315e-06, "loss": 1.244, "step": 909 }, { "epoch": 0.12336473937504236, "grad_norm": 7.15769462254712, "learning_rate": 1.9547057908747522e-06, "loss": 1.2382, "step": 910 }, { "epoch": 0.12350030502270724, "grad_norm": 4.298376458713009, "learning_rate": 1.954575032710697e-06, "loss": 1.239, "step": 911 }, { "epoch": 0.12363587067037213, "grad_norm": 5.440683780385679, "learning_rate": 1.954444090464382e-06, "loss": 1.254, "step": 912 }, { "epoch": 0.12377143631803701, "grad_norm": 7.032223836440052, "learning_rate": 1.9543129641610575e-06, "loss": 1.2321, "step": 913 }, { "epoch": 0.1239070019657019, "grad_norm": 78.05426365468912, "learning_rate": 1.9541816538260105e-06, "loss": 1.2685, "step": 914 }, { "epoch": 0.12404256761336677, "grad_norm": 10.083241673344281, "learning_rate": 1.954050159484564e-06, "loss": 1.263, "step": 915 }, { "epoch": 0.12417813326103165, "grad_norm": 4.500530359254375, "learning_rate": 1.953918481162075e-06, "loss": 1.2317, "step": 916 }, { "epoch": 0.12431369890869653, "grad_norm": 9.497239911535377, "learning_rate": 1.953786618883937e-06, "loss": 1.2263, "step": 917 }, { "epoch": 0.12444926455636142, "grad_norm": 36.33969096974355, "learning_rate": 1.953654572675578e-06, "loss": 1.2948, "step": 918 }, { "epoch": 0.1245848302040263, "grad_norm": 6.145638435415509, "learning_rate": 1.953522342562462e-06, "loss": 1.222, "step": 919 }, { "epoch": 0.12472039585169119, "grad_norm": 9.57484169280885, "learning_rate": 1.9533899285700893e-06, "loss": 1.1991, "step": 920 }, { "epoch": 0.12485596149935606, "grad_norm": 9.474791819436554, "learning_rate": 1.9532573307239942e-06, "loss": 1.2005, "step": 921 }, { "epoch": 0.12499152714702094, "grad_norm": 10.378340761034387, "learning_rate": 1.9531245490497475e-06, "loss": 1.25, "step": 922 }, { "epoch": 0.12512709279468584, "grad_norm": 7.147646316780686, "learning_rate": 1.952991583572955e-06, "loss": 1.2491, "step": 923 }, { "epoch": 0.1252626584423507, "grad_norm": 6.936908646572636, "learning_rate": 1.9528584343192583e-06, "loss": 1.2373, "step": 924 }, { "epoch": 0.12539822409001558, "grad_norm": 4.449650834268529, "learning_rate": 1.9527251013143338e-06, "loss": 1.2333, "step": 925 }, { "epoch": 0.12553378973768048, "grad_norm": 5.947828614942066, "learning_rate": 1.9525915845838942e-06, "loss": 1.2421, "step": 926 }, { "epoch": 0.12566935538534535, "grad_norm": 7.879524397508392, "learning_rate": 1.952457884153686e-06, "loss": 1.2212, "step": 927 }, { "epoch": 0.12580492103301025, "grad_norm": 9.132943213996072, "learning_rate": 1.952324000049494e-06, "loss": 1.2705, "step": 928 }, { "epoch": 0.12594048668067512, "grad_norm": 8.028022443952434, "learning_rate": 1.952189932297135e-06, "loss": 1.2358, "step": 929 }, { "epoch": 0.12607605232834, "grad_norm": 6.106480450981467, "learning_rate": 1.9520556809224643e-06, "loss": 1.2571, "step": 930 }, { "epoch": 0.1262116179760049, "grad_norm": 16.666315656553373, "learning_rate": 1.9519212459513702e-06, "loss": 1.3023, "step": 931 }, { "epoch": 0.12634718362366976, "grad_norm": 7.720891854816692, "learning_rate": 1.951786627409778e-06, "loss": 1.2177, "step": 932 }, { "epoch": 0.12648274927133465, "grad_norm": 5.766872716135944, "learning_rate": 1.9516518253236474e-06, "loss": 1.2506, "step": 933 }, { "epoch": 0.12661831491899952, "grad_norm": 88.60705491101177, "learning_rate": 1.9515168397189743e-06, "loss": 1.207, "step": 934 }, { "epoch": 0.1267538805666644, "grad_norm": 5.761567678371702, "learning_rate": 1.95138167062179e-06, "loss": 1.2279, "step": 935 }, { "epoch": 0.1268894462143293, "grad_norm": 4.462843825530445, "learning_rate": 1.9512463180581595e-06, "loss": 1.2417, "step": 936 }, { "epoch": 0.12702501186199416, "grad_norm": 4.548082540787862, "learning_rate": 1.9511107820541857e-06, "loss": 1.2241, "step": 937 }, { "epoch": 0.12716057750965906, "grad_norm": 4.940308272435773, "learning_rate": 1.9509750626360053e-06, "loss": 1.2414, "step": 938 }, { "epoch": 0.12729614315732393, "grad_norm": 4.942846032977566, "learning_rate": 1.95083915982979e-06, "loss": 1.2309, "step": 939 }, { "epoch": 0.1274317088049888, "grad_norm": 12.897431241652002, "learning_rate": 1.950703073661749e-06, "loss": 1.2674, "step": 940 }, { "epoch": 0.1275672744526537, "grad_norm": 7.074236688006131, "learning_rate": 1.950566804158124e-06, "loss": 1.2364, "step": 941 }, { "epoch": 0.12770284010031857, "grad_norm": 6.084392335256531, "learning_rate": 1.9504303513451944e-06, "loss": 1.2161, "step": 942 }, { "epoch": 0.12783840574798347, "grad_norm": 10.52394625311439, "learning_rate": 1.9502937152492737e-06, "loss": 1.228, "step": 943 }, { "epoch": 0.12797397139564834, "grad_norm": 15.885898260340156, "learning_rate": 1.950156895896711e-06, "loss": 1.2445, "step": 944 }, { "epoch": 0.12810953704331324, "grad_norm": 13.701981531422826, "learning_rate": 1.9500198933138914e-06, "loss": 1.2314, "step": 945 }, { "epoch": 0.1282451026909781, "grad_norm": 5.273974755794971, "learning_rate": 1.949882707527234e-06, "loss": 1.2204, "step": 946 }, { "epoch": 0.12838066833864298, "grad_norm": 8.93314045591767, "learning_rate": 1.949745338563195e-06, "loss": 1.224, "step": 947 }, { "epoch": 0.12851623398630788, "grad_norm": 6.056287384410954, "learning_rate": 1.949607786448264e-06, "loss": 1.2281, "step": 948 }, { "epoch": 0.12865179963397275, "grad_norm": 7.438146868683628, "learning_rate": 1.9494700512089664e-06, "loss": 1.2759, "step": 949 }, { "epoch": 0.12878736528163764, "grad_norm": 9.650293845428301, "learning_rate": 1.949332132871865e-06, "loss": 1.2166, "step": 950 }, { "epoch": 0.12892293092930251, "grad_norm": 4.87962336199158, "learning_rate": 1.9491940314635553e-06, "loss": 1.219, "step": 951 }, { "epoch": 0.12905849657696739, "grad_norm": 14.031618772335566, "learning_rate": 1.9490557470106686e-06, "loss": 1.2697, "step": 952 }, { "epoch": 0.12919406222463228, "grad_norm": 4.771609246899278, "learning_rate": 1.9489172795398727e-06, "loss": 1.2435, "step": 953 }, { "epoch": 0.12932962787229715, "grad_norm": 5.9052356951730705, "learning_rate": 1.9487786290778696e-06, "loss": 1.2355, "step": 954 }, { "epoch": 0.12946519351996205, "grad_norm": 33.28581091486867, "learning_rate": 1.9486397956513975e-06, "loss": 1.2271, "step": 955 }, { "epoch": 0.12960075916762692, "grad_norm": 4.666076365924516, "learning_rate": 1.9485007792872285e-06, "loss": 1.2655, "step": 956 }, { "epoch": 0.1297363248152918, "grad_norm": 4.726340042979972, "learning_rate": 1.9483615800121713e-06, "loss": 1.2217, "step": 957 }, { "epoch": 0.1298718904629567, "grad_norm": 5.450419838780576, "learning_rate": 1.9482221978530695e-06, "loss": 1.2269, "step": 958 }, { "epoch": 0.13000745611062156, "grad_norm": 6.039501940301494, "learning_rate": 1.9480826328368018e-06, "loss": 1.2433, "step": 959 }, { "epoch": 0.13014302175828646, "grad_norm": 6.854682318748389, "learning_rate": 1.9479428849902816e-06, "loss": 1.2127, "step": 960 }, { "epoch": 0.13027858740595133, "grad_norm": 6.922804415959161, "learning_rate": 1.9478029543404587e-06, "loss": 1.2528, "step": 961 }, { "epoch": 0.13041415305361623, "grad_norm": 17.166576858067803, "learning_rate": 1.9476628409143177e-06, "loss": 1.2506, "step": 962 }, { "epoch": 0.1305497187012811, "grad_norm": 5.254974796925736, "learning_rate": 1.9475225447388787e-06, "loss": 1.2639, "step": 963 }, { "epoch": 0.13068528434894597, "grad_norm": 6.62015885786989, "learning_rate": 1.9473820658411954e-06, "loss": 1.2446, "step": 964 }, { "epoch": 0.13082084999661087, "grad_norm": 5.269209547916162, "learning_rate": 1.9472414042483594e-06, "loss": 1.2149, "step": 965 }, { "epoch": 0.13095641564427574, "grad_norm": 4.1509722552989095, "learning_rate": 1.9471005599874955e-06, "loss": 1.2448, "step": 966 }, { "epoch": 0.13109198129194063, "grad_norm": 4.165093510715309, "learning_rate": 1.9469595330857644e-06, "loss": 1.238, "step": 967 }, { "epoch": 0.1312275469396055, "grad_norm": 7.807837585755742, "learning_rate": 1.946818323570362e-06, "loss": 1.224, "step": 968 }, { "epoch": 0.13136311258727038, "grad_norm": 4.990705758197052, "learning_rate": 1.9466769314685204e-06, "loss": 1.2164, "step": 969 }, { "epoch": 0.13149867823493527, "grad_norm": 4.812094676033353, "learning_rate": 1.9465353568075047e-06, "loss": 1.2309, "step": 970 }, { "epoch": 0.13163424388260014, "grad_norm": 5.109448176994002, "learning_rate": 1.946393599614617e-06, "loss": 1.2311, "step": 971 }, { "epoch": 0.13176980953026504, "grad_norm": 4.5438380821966495, "learning_rate": 1.9462516599171944e-06, "loss": 1.2596, "step": 972 }, { "epoch": 0.1319053751779299, "grad_norm": 6.966894976545739, "learning_rate": 1.946109537742608e-06, "loss": 1.1916, "step": 973 }, { "epoch": 0.13204094082559478, "grad_norm": 6.2664924931400465, "learning_rate": 1.945967233118265e-06, "loss": 1.2216, "step": 974 }, { "epoch": 0.13217650647325968, "grad_norm": 9.442960075036895, "learning_rate": 1.945824746071609e-06, "loss": 1.2112, "step": 975 }, { "epoch": 0.13231207212092455, "grad_norm": 8.924941957088649, "learning_rate": 1.945682076630116e-06, "loss": 1.2314, "step": 976 }, { "epoch": 0.13244763776858945, "grad_norm": 6.089329525764326, "learning_rate": 1.9455392248212995e-06, "loss": 1.2123, "step": 977 }, { "epoch": 0.13258320341625432, "grad_norm": 6.648577195334611, "learning_rate": 1.945396190672707e-06, "loss": 1.22, "step": 978 }, { "epoch": 0.1327187690639192, "grad_norm": 5.262600226838777, "learning_rate": 1.9452529742119214e-06, "loss": 1.2443, "step": 979 }, { "epoch": 0.1328543347115841, "grad_norm": 6.583957359935894, "learning_rate": 1.9451095754665613e-06, "loss": 1.2215, "step": 980 }, { "epoch": 0.13298990035924896, "grad_norm": 4.727102111492363, "learning_rate": 1.94496599446428e-06, "loss": 1.1994, "step": 981 }, { "epoch": 0.13312546600691386, "grad_norm": 6.10512434911499, "learning_rate": 1.9448222312327654e-06, "loss": 1.2276, "step": 982 }, { "epoch": 0.13326103165457873, "grad_norm": 6.574031506550019, "learning_rate": 1.944678285799742e-06, "loss": 1.2287, "step": 983 }, { "epoch": 0.13339659730224362, "grad_norm": 6.5988378650311805, "learning_rate": 1.944534158192968e-06, "loss": 1.2063, "step": 984 }, { "epoch": 0.1335321629499085, "grad_norm": 3.976381535192446, "learning_rate": 1.944389848440237e-06, "loss": 1.2328, "step": 985 }, { "epoch": 0.13366772859757337, "grad_norm": 14.039964967995271, "learning_rate": 1.9442453565693782e-06, "loss": 1.2153, "step": 986 }, { "epoch": 0.13380329424523826, "grad_norm": 4.212285014595893, "learning_rate": 1.944100682608256e-06, "loss": 1.194, "step": 987 }, { "epoch": 0.13393885989290313, "grad_norm": 4.849921161789654, "learning_rate": 1.943955826584769e-06, "loss": 1.2061, "step": 988 }, { "epoch": 0.13407442554056803, "grad_norm": 6.453307055660163, "learning_rate": 1.9438107885268525e-06, "loss": 1.2318, "step": 989 }, { "epoch": 0.1342099911882329, "grad_norm": 6.607753784372047, "learning_rate": 1.9436655684624755e-06, "loss": 1.271, "step": 990 }, { "epoch": 0.13434555683589777, "grad_norm": 6.128488461199083, "learning_rate": 1.9435201664196424e-06, "loss": 1.211, "step": 991 }, { "epoch": 0.13448112248356267, "grad_norm": 4.665526671937368, "learning_rate": 1.9433745824263924e-06, "loss": 1.196, "step": 992 }, { "epoch": 0.13461668813122754, "grad_norm": 5.532539339804926, "learning_rate": 1.943228816510801e-06, "loss": 1.2561, "step": 993 }, { "epoch": 0.13475225377889244, "grad_norm": 4.065568317642403, "learning_rate": 1.943082868700978e-06, "loss": 1.2379, "step": 994 }, { "epoch": 0.1348878194265573, "grad_norm": 5.562814565004131, "learning_rate": 1.9429367390250676e-06, "loss": 1.2271, "step": 995 }, { "epoch": 0.13502338507422218, "grad_norm": 5.044087926492237, "learning_rate": 1.942790427511251e-06, "loss": 1.1966, "step": 996 }, { "epoch": 0.13515895072188708, "grad_norm": 8.453975898120566, "learning_rate": 1.9426439341877412e-06, "loss": 1.2395, "step": 997 }, { "epoch": 0.13529451636955195, "grad_norm": 6.267574426983016, "learning_rate": 1.94249725908279e-06, "loss": 1.2218, "step": 998 }, { "epoch": 0.13543008201721685, "grad_norm": 5.656065068700128, "learning_rate": 1.942350402224682e-06, "loss": 1.2665, "step": 999 }, { "epoch": 0.13556564766488172, "grad_norm": 5.696287338986107, "learning_rate": 1.942203363641738e-06, "loss": 1.2156, "step": 1000 }, { "epoch": 0.1357012133125466, "grad_norm": 6.909890971964008, "learning_rate": 1.942056143362312e-06, "loss": 1.2106, "step": 1001 }, { "epoch": 0.13583677896021149, "grad_norm": 9.086520841013677, "learning_rate": 1.941908741414795e-06, "loss": 1.2335, "step": 1002 }, { "epoch": 0.13597234460787636, "grad_norm": 5.125289515227048, "learning_rate": 1.941761157827612e-06, "loss": 1.2167, "step": 1003 }, { "epoch": 0.13610791025554125, "grad_norm": 7.0314212073072015, "learning_rate": 1.9416133926292236e-06, "loss": 1.2187, "step": 1004 }, { "epoch": 0.13624347590320612, "grad_norm": 4.811393712777178, "learning_rate": 1.941465445848125e-06, "loss": 1.2487, "step": 1005 }, { "epoch": 0.13637904155087102, "grad_norm": 4.697451952572935, "learning_rate": 1.941317317512847e-06, "loss": 1.1993, "step": 1006 }, { "epoch": 0.1365146071985359, "grad_norm": 5.241752141770997, "learning_rate": 1.9411690076519545e-06, "loss": 1.1877, "step": 1007 }, { "epoch": 0.13665017284620076, "grad_norm": 5.139544077758041, "learning_rate": 1.941020516294048e-06, "loss": 1.2457, "step": 1008 }, { "epoch": 0.13678573849386566, "grad_norm": 8.754237561649528, "learning_rate": 1.9408718434677625e-06, "loss": 1.2471, "step": 1009 }, { "epoch": 0.13692130414153053, "grad_norm": 10.36617309429563, "learning_rate": 1.9407229892017694e-06, "loss": 1.2488, "step": 1010 }, { "epoch": 0.13705686978919543, "grad_norm": 6.927070474613726, "learning_rate": 1.940573953524773e-06, "loss": 1.2333, "step": 1011 }, { "epoch": 0.1371924354368603, "grad_norm": 8.163524558704946, "learning_rate": 1.9404247364655145e-06, "loss": 1.2122, "step": 1012 }, { "epoch": 0.13732800108452517, "grad_norm": 11.13209818145657, "learning_rate": 1.9402753380527684e-06, "loss": 1.2199, "step": 1013 }, { "epoch": 0.13746356673219007, "grad_norm": 7.2762846331667115, "learning_rate": 1.9401257583153456e-06, "loss": 1.2025, "step": 1014 }, { "epoch": 0.13759913237985494, "grad_norm": 5.2992966787901326, "learning_rate": 1.9399759972820913e-06, "loss": 1.2412, "step": 1015 }, { "epoch": 0.13773469802751984, "grad_norm": 6.224855749274224, "learning_rate": 1.9398260549818856e-06, "loss": 1.2295, "step": 1016 }, { "epoch": 0.1378702636751847, "grad_norm": 9.507043960039011, "learning_rate": 1.9396759314436435e-06, "loss": 1.2149, "step": 1017 }, { "epoch": 0.13800582932284958, "grad_norm": 4.180443778325843, "learning_rate": 1.939525626696316e-06, "loss": 1.2351, "step": 1018 }, { "epoch": 0.13814139497051448, "grad_norm": 4.163047338803655, "learning_rate": 1.9393751407688866e-06, "loss": 1.2014, "step": 1019 }, { "epoch": 0.13827696061817935, "grad_norm": 5.3985417498992785, "learning_rate": 1.9392244736903773e-06, "loss": 1.2735, "step": 1020 }, { "epoch": 0.13841252626584424, "grad_norm": 7.877108659307856, "learning_rate": 1.9390736254898414e-06, "loss": 1.2364, "step": 1021 }, { "epoch": 0.1385480919135091, "grad_norm": 6.558529805177947, "learning_rate": 1.9389225961963698e-06, "loss": 1.2506, "step": 1022 }, { "epoch": 0.138683657561174, "grad_norm": 5.3684134713572105, "learning_rate": 1.9387713858390863e-06, "loss": 1.2478, "step": 1023 }, { "epoch": 0.13881922320883888, "grad_norm": 5.6100885557694635, "learning_rate": 1.938619994447152e-06, "loss": 1.2038, "step": 1024 }, { "epoch": 0.13895478885650375, "grad_norm": 5.096506010372401, "learning_rate": 1.9384684220497604e-06, "loss": 1.2365, "step": 1025 }, { "epoch": 0.13909035450416865, "grad_norm": 5.248441644111186, "learning_rate": 1.9383166686761416e-06, "loss": 1.2643, "step": 1026 }, { "epoch": 0.13922592015183352, "grad_norm": 7.907827271014586, "learning_rate": 1.9381647343555596e-06, "loss": 1.2415, "step": 1027 }, { "epoch": 0.13936148579949842, "grad_norm": 5.975695737451974, "learning_rate": 1.938012619117314e-06, "loss": 1.2266, "step": 1028 }, { "epoch": 0.1394970514471633, "grad_norm": 6.681632846870104, "learning_rate": 1.9378603229907393e-06, "loss": 1.2031, "step": 1029 }, { "epoch": 0.13963261709482816, "grad_norm": 5.464012870847367, "learning_rate": 1.937707846005204e-06, "loss": 1.2046, "step": 1030 }, { "epoch": 0.13976818274249306, "grad_norm": 6.651151891712118, "learning_rate": 1.9375551881901127e-06, "loss": 1.2151, "step": 1031 }, { "epoch": 0.13990374839015793, "grad_norm": 5.097162189917296, "learning_rate": 1.937402349574904e-06, "loss": 1.201, "step": 1032 }, { "epoch": 0.14003931403782283, "grad_norm": 9.902483806774288, "learning_rate": 1.9372493301890517e-06, "loss": 1.2109, "step": 1033 }, { "epoch": 0.1401748796854877, "grad_norm": 9.960564635581385, "learning_rate": 1.9370961300620636e-06, "loss": 1.2511, "step": 1034 }, { "epoch": 0.14031044533315257, "grad_norm": 7.38700100237584, "learning_rate": 1.9369427492234846e-06, "loss": 1.1858, "step": 1035 }, { "epoch": 0.14044601098081747, "grad_norm": 4.448176035013963, "learning_rate": 1.9367891877028917e-06, "loss": 1.2334, "step": 1036 }, { "epoch": 0.14058157662848234, "grad_norm": 34.315632592080576, "learning_rate": 1.9366354455298987e-06, "loss": 1.2514, "step": 1037 }, { "epoch": 0.14071714227614723, "grad_norm": 8.479194279938012, "learning_rate": 1.936481522734153e-06, "loss": 1.228, "step": 1038 }, { "epoch": 0.1408527079238121, "grad_norm": 7.549265689311511, "learning_rate": 1.9363274193453383e-06, "loss": 1.231, "step": 1039 }, { "epoch": 0.14098827357147697, "grad_norm": 8.167687000896326, "learning_rate": 1.9361731353931714e-06, "loss": 1.2198, "step": 1040 }, { "epoch": 0.14112383921914187, "grad_norm": 8.751521718576463, "learning_rate": 1.936018670907405e-06, "loss": 1.2321, "step": 1041 }, { "epoch": 0.14125940486680674, "grad_norm": 5.4520113582050245, "learning_rate": 1.935864025917827e-06, "loss": 1.2423, "step": 1042 }, { "epoch": 0.14139497051447164, "grad_norm": 6.869515339466587, "learning_rate": 1.935709200454258e-06, "loss": 1.2244, "step": 1043 }, { "epoch": 0.1415305361621365, "grad_norm": 4.778829908532258, "learning_rate": 1.9355541945465563e-06, "loss": 1.2784, "step": 1044 }, { "epoch": 0.1416661018098014, "grad_norm": 15.737718543987205, "learning_rate": 1.9353990082246127e-06, "loss": 1.2956, "step": 1045 }, { "epoch": 0.14180166745746628, "grad_norm": 12.07669747035019, "learning_rate": 1.935243641518354e-06, "loss": 1.2583, "step": 1046 }, { "epoch": 0.14193723310513115, "grad_norm": 8.213702246901896, "learning_rate": 1.935088094457742e-06, "loss": 1.238, "step": 1047 }, { "epoch": 0.14207279875279605, "grad_norm": 5.948433943238501, "learning_rate": 1.9349323670727717e-06, "loss": 1.1899, "step": 1048 }, { "epoch": 0.14220836440046092, "grad_norm": 5.070042038970494, "learning_rate": 1.9347764593934743e-06, "loss": 1.2666, "step": 1049 }, { "epoch": 0.14234393004812582, "grad_norm": 7.4008741735795756, "learning_rate": 1.934620371449915e-06, "loss": 1.2456, "step": 1050 }, { "epoch": 0.1424794956957907, "grad_norm": 5.652715484821274, "learning_rate": 1.934464103272195e-06, "loss": 1.2371, "step": 1051 }, { "epoch": 0.14261506134345556, "grad_norm": 6.550046470509362, "learning_rate": 1.9343076548904483e-06, "loss": 1.2334, "step": 1052 }, { "epoch": 0.14275062699112046, "grad_norm": 7.786067434830767, "learning_rate": 1.9341510263348457e-06, "loss": 1.2095, "step": 1053 }, { "epoch": 0.14288619263878533, "grad_norm": 5.188412450654316, "learning_rate": 1.9339942176355916e-06, "loss": 1.2581, "step": 1054 }, { "epoch": 0.14302175828645022, "grad_norm": 4.490222562026627, "learning_rate": 1.933837228822925e-06, "loss": 1.2469, "step": 1055 }, { "epoch": 0.1431573239341151, "grad_norm": 6.175267067992332, "learning_rate": 1.9336800599271203e-06, "loss": 1.2485, "step": 1056 }, { "epoch": 0.14329288958177996, "grad_norm": 5.751325924062163, "learning_rate": 1.933522710978486e-06, "loss": 1.2327, "step": 1057 }, { "epoch": 0.14342845522944486, "grad_norm": 8.381536135198854, "learning_rate": 1.9333651820073655e-06, "loss": 1.2401, "step": 1058 }, { "epoch": 0.14356402087710973, "grad_norm": 8.566714234641264, "learning_rate": 1.933207473044137e-06, "loss": 1.1918, "step": 1059 }, { "epoch": 0.14369958652477463, "grad_norm": 11.790896178089993, "learning_rate": 1.9330495841192138e-06, "loss": 1.2442, "step": 1060 }, { "epoch": 0.1438351521724395, "grad_norm": 4.954456879051294, "learning_rate": 1.9328915152630435e-06, "loss": 1.2232, "step": 1061 }, { "epoch": 0.1439707178201044, "grad_norm": 5.618708258785032, "learning_rate": 1.932733266506108e-06, "loss": 1.2037, "step": 1062 }, { "epoch": 0.14410628346776927, "grad_norm": 6.659621725766414, "learning_rate": 1.9325748378789246e-06, "loss": 1.233, "step": 1063 }, { "epoch": 0.14424184911543414, "grad_norm": 12.7016651385268, "learning_rate": 1.9324162294120453e-06, "loss": 1.2618, "step": 1064 }, { "epoch": 0.14437741476309904, "grad_norm": 10.244762386074516, "learning_rate": 1.9322574411360557e-06, "loss": 1.2405, "step": 1065 }, { "epoch": 0.1445129804107639, "grad_norm": 6.832229677833453, "learning_rate": 1.932098473081578e-06, "loss": 1.2335, "step": 1066 }, { "epoch": 0.1446485460584288, "grad_norm": 6.645904745333331, "learning_rate": 1.931939325279267e-06, "loss": 1.2372, "step": 1067 }, { "epoch": 0.14478411170609368, "grad_norm": 13.18331309309852, "learning_rate": 1.9317799977598136e-06, "loss": 1.2079, "step": 1068 }, { "epoch": 0.14491967735375855, "grad_norm": 7.3223883126702685, "learning_rate": 1.9316204905539425e-06, "loss": 1.2456, "step": 1069 }, { "epoch": 0.14505524300142345, "grad_norm": 7.453023407823035, "learning_rate": 1.9314608036924133e-06, "loss": 1.2537, "step": 1070 }, { "epoch": 0.14519080864908832, "grad_norm": 10.105930844723463, "learning_rate": 1.931300937206021e-06, "loss": 1.2566, "step": 1071 }, { "epoch": 0.1453263742967532, "grad_norm": 4.927101463675154, "learning_rate": 1.931140891125594e-06, "loss": 1.2598, "step": 1072 }, { "epoch": 0.14546193994441808, "grad_norm": 8.055757928038656, "learning_rate": 1.9309806654819963e-06, "loss": 1.2761, "step": 1073 }, { "epoch": 0.14559750559208295, "grad_norm": 6.8734533828756, "learning_rate": 1.9308202603061258e-06, "loss": 1.2211, "step": 1074 }, { "epoch": 0.14573307123974785, "grad_norm": 29.967102147958148, "learning_rate": 1.9306596756289155e-06, "loss": 1.2592, "step": 1075 }, { "epoch": 0.14586863688741272, "grad_norm": 5.7280744105451245, "learning_rate": 1.930498911481333e-06, "loss": 1.2102, "step": 1076 }, { "epoch": 0.14600420253507762, "grad_norm": 6.1581548329944615, "learning_rate": 1.9303379678943805e-06, "loss": 1.2399, "step": 1077 }, { "epoch": 0.1461397681827425, "grad_norm": 6.072452882826994, "learning_rate": 1.9301768448990946e-06, "loss": 1.2035, "step": 1078 }, { "epoch": 0.14627533383040736, "grad_norm": 9.11568390949763, "learning_rate": 1.930015542526546e-06, "loss": 1.2143, "step": 1079 }, { "epoch": 0.14641089947807226, "grad_norm": 8.112153815233434, "learning_rate": 1.9298540608078417e-06, "loss": 1.2223, "step": 1080 }, { "epoch": 0.14654646512573713, "grad_norm": 8.247487480693545, "learning_rate": 1.9296923997741216e-06, "loss": 1.2386, "step": 1081 }, { "epoch": 0.14668203077340203, "grad_norm": 4.942548685557905, "learning_rate": 1.9295305594565604e-06, "loss": 1.2306, "step": 1082 }, { "epoch": 0.1468175964210669, "grad_norm": 6.921951651163756, "learning_rate": 1.9293685398863683e-06, "loss": 1.2073, "step": 1083 }, { "epoch": 0.1469531620687318, "grad_norm": 21.30578214751518, "learning_rate": 1.929206341094789e-06, "loss": 1.2536, "step": 1084 }, { "epoch": 0.14708872771639667, "grad_norm": 9.022700483519355, "learning_rate": 1.9290439631131018e-06, "loss": 1.2536, "step": 1085 }, { "epoch": 0.14722429336406154, "grad_norm": 5.529507797678988, "learning_rate": 1.9288814059726196e-06, "loss": 1.2286, "step": 1086 }, { "epoch": 0.14735985901172644, "grad_norm": 5.5478841525903215, "learning_rate": 1.92871866970469e-06, "loss": 1.2005, "step": 1087 }, { "epoch": 0.1474954246593913, "grad_norm": 7.859656648310155, "learning_rate": 1.9285557543406964e-06, "loss": 1.2806, "step": 1088 }, { "epoch": 0.1476309903070562, "grad_norm": 4.681029247145602, "learning_rate": 1.928392659912055e-06, "loss": 1.2498, "step": 1089 }, { "epoch": 0.14776655595472107, "grad_norm": 5.795127187585173, "learning_rate": 1.9282293864502176e-06, "loss": 1.2175, "step": 1090 }, { "epoch": 0.14790212160238594, "grad_norm": 4.072649978571221, "learning_rate": 1.92806593398667e-06, "loss": 1.2037, "step": 1091 }, { "epoch": 0.14803768725005084, "grad_norm": 5.579577236589794, "learning_rate": 1.9279023025529324e-06, "loss": 1.2235, "step": 1092 }, { "epoch": 0.1481732528977157, "grad_norm": 5.503453263763152, "learning_rate": 1.9277384921805604e-06, "loss": 1.2407, "step": 1093 }, { "epoch": 0.1483088185453806, "grad_norm": 8.30273588213727, "learning_rate": 1.927574502901143e-06, "loss": 1.212, "step": 1094 }, { "epoch": 0.14844438419304548, "grad_norm": 6.745471826583052, "learning_rate": 1.927410334746305e-06, "loss": 1.2548, "step": 1095 }, { "epoch": 0.14857994984071035, "grad_norm": 5.491330221097082, "learning_rate": 1.927245987747704e-06, "loss": 1.2373, "step": 1096 }, { "epoch": 0.14871551548837525, "grad_norm": 6.378690282661109, "learning_rate": 1.9270814619370337e-06, "loss": 1.2462, "step": 1097 }, { "epoch": 0.14885108113604012, "grad_norm": 6.123785543400309, "learning_rate": 1.9269167573460217e-06, "loss": 1.255, "step": 1098 }, { "epoch": 0.14898664678370502, "grad_norm": 14.237632360985012, "learning_rate": 1.9267518740064294e-06, "loss": 1.2366, "step": 1099 }, { "epoch": 0.1491222124313699, "grad_norm": 5.083114525014874, "learning_rate": 1.9265868119500538e-06, "loss": 1.1923, "step": 1100 }, { "epoch": 0.1492577780790348, "grad_norm": 9.999086834649283, "learning_rate": 1.926421571208725e-06, "loss": 1.1985, "step": 1101 }, { "epoch": 0.14939334372669966, "grad_norm": 5.434967587377199, "learning_rate": 1.9262561518143095e-06, "loss": 1.2525, "step": 1102 }, { "epoch": 0.14952890937436453, "grad_norm": 4.050886579036711, "learning_rate": 1.9260905537987063e-06, "loss": 1.2437, "step": 1103 }, { "epoch": 0.14966447502202943, "grad_norm": 11.863250252126985, "learning_rate": 1.92592477719385e-06, "loss": 1.189, "step": 1104 }, { "epoch": 0.1498000406696943, "grad_norm": 7.933823104451924, "learning_rate": 1.925758822031709e-06, "loss": 1.2292, "step": 1105 }, { "epoch": 0.1499356063173592, "grad_norm": 4.755251623087836, "learning_rate": 1.9255926883442867e-06, "loss": 1.2516, "step": 1106 }, { "epoch": 0.15007117196502406, "grad_norm": 4.944791657973357, "learning_rate": 1.9254263761636207e-06, "loss": 1.22, "step": 1107 }, { "epoch": 0.15020673761268893, "grad_norm": 8.749662821217235, "learning_rate": 1.925259885521783e-06, "loss": 1.2146, "step": 1108 }, { "epoch": 0.15034230326035383, "grad_norm": 5.807647508921474, "learning_rate": 1.92509321645088e-06, "loss": 1.2498, "step": 1109 }, { "epoch": 0.1504778689080187, "grad_norm": 9.075210963245452, "learning_rate": 1.924926368983052e-06, "loss": 1.2149, "step": 1110 }, { "epoch": 0.1506134345556836, "grad_norm": 4.9319417971678305, "learning_rate": 1.9247593431504756e-06, "loss": 1.1993, "step": 1111 }, { "epoch": 0.15074900020334847, "grad_norm": 6.736015345216462, "learning_rate": 1.9245921389853588e-06, "loss": 1.2072, "step": 1112 }, { "epoch": 0.15088456585101334, "grad_norm": 9.517538538292698, "learning_rate": 1.9244247565199463e-06, "loss": 1.2084, "step": 1113 }, { "epoch": 0.15102013149867824, "grad_norm": 9.194345576950662, "learning_rate": 1.9242571957865165e-06, "loss": 1.275, "step": 1114 }, { "epoch": 0.1511556971463431, "grad_norm": 5.19770715296049, "learning_rate": 1.924089456817382e-06, "loss": 1.2045, "step": 1115 }, { "epoch": 0.151291262794008, "grad_norm": 5.2177676864983775, "learning_rate": 1.92392153964489e-06, "loss": 1.2277, "step": 1116 }, { "epoch": 0.15142682844167288, "grad_norm": 7.767327035697304, "learning_rate": 1.923753444301423e-06, "loss": 1.2262, "step": 1117 }, { "epoch": 0.15156239408933775, "grad_norm": 6.145493747927465, "learning_rate": 1.923585170819395e-06, "loss": 1.2351, "step": 1118 }, { "epoch": 0.15169795973700265, "grad_norm": 6.411504933846916, "learning_rate": 1.923416719231257e-06, "loss": 1.23, "step": 1119 }, { "epoch": 0.15183352538466752, "grad_norm": 5.925661831377389, "learning_rate": 1.9232480895694945e-06, "loss": 1.2074, "step": 1120 }, { "epoch": 0.15196909103233242, "grad_norm": 5.774780736452698, "learning_rate": 1.9230792818666252e-06, "loss": 1.2126, "step": 1121 }, { "epoch": 0.15210465667999729, "grad_norm": 10.550436405242362, "learning_rate": 1.9229102961552026e-06, "loss": 1.25, "step": 1122 }, { "epoch": 0.15224022232766218, "grad_norm": 5.04038390335601, "learning_rate": 1.9227411324678146e-06, "loss": 1.2147, "step": 1123 }, { "epoch": 0.15237578797532705, "grad_norm": 6.982427146352223, "learning_rate": 1.922571790837083e-06, "loss": 1.2435, "step": 1124 }, { "epoch": 0.15251135362299192, "grad_norm": 7.85428622866106, "learning_rate": 1.9224022712956635e-06, "loss": 1.2011, "step": 1125 }, { "epoch": 0.15264691927065682, "grad_norm": 5.857448446121751, "learning_rate": 1.922232573876247e-06, "loss": 1.212, "step": 1126 }, { "epoch": 0.1527824849183217, "grad_norm": 7.3172187139722205, "learning_rate": 1.922062698611559e-06, "loss": 1.228, "step": 1127 }, { "epoch": 0.1529180505659866, "grad_norm": 6.731732274636058, "learning_rate": 1.921892645534357e-06, "loss": 1.267, "step": 1128 }, { "epoch": 0.15305361621365146, "grad_norm": 8.43369145796822, "learning_rate": 1.9217224146774357e-06, "loss": 1.2229, "step": 1129 }, { "epoch": 0.15318918186131633, "grad_norm": 7.857941229871054, "learning_rate": 1.921552006073622e-06, "loss": 1.2186, "step": 1130 }, { "epoch": 0.15332474750898123, "grad_norm": 10.013427277153193, "learning_rate": 1.9213814197557787e-06, "loss": 1.2384, "step": 1131 }, { "epoch": 0.1534603131566461, "grad_norm": 7.732408664514436, "learning_rate": 1.9212106557568016e-06, "loss": 1.1921, "step": 1132 }, { "epoch": 0.153595878804311, "grad_norm": 7.776598016146517, "learning_rate": 1.9210397141096206e-06, "loss": 1.1992, "step": 1133 }, { "epoch": 0.15373144445197587, "grad_norm": 8.863324505896863, "learning_rate": 1.9208685948472014e-06, "loss": 1.2256, "step": 1134 }, { "epoch": 0.15386701009964074, "grad_norm": 6.934752168795941, "learning_rate": 1.9206972980025426e-06, "loss": 1.1823, "step": 1135 }, { "epoch": 0.15400257574730564, "grad_norm": 8.054570683968992, "learning_rate": 1.9205258236086773e-06, "loss": 1.2494, "step": 1136 }, { "epoch": 0.1541381413949705, "grad_norm": 6.539061845058247, "learning_rate": 1.920354171698673e-06, "loss": 1.2149, "step": 1137 }, { "epoch": 0.1542737070426354, "grad_norm": 12.282570859798845, "learning_rate": 1.9201823423056315e-06, "loss": 1.2272, "step": 1138 }, { "epoch": 0.15440927269030028, "grad_norm": 7.004370891260318, "learning_rate": 1.920010335462689e-06, "loss": 1.2316, "step": 1139 }, { "epoch": 0.15454483833796515, "grad_norm": 9.754025292697483, "learning_rate": 1.9198381512030154e-06, "loss": 1.2586, "step": 1140 }, { "epoch": 0.15468040398563004, "grad_norm": 14.859286887067553, "learning_rate": 1.919665789559815e-06, "loss": 1.1891, "step": 1141 }, { "epoch": 0.15481596963329491, "grad_norm": 6.944091025829019, "learning_rate": 1.9194932505663265e-06, "loss": 1.2066, "step": 1142 }, { "epoch": 0.1549515352809598, "grad_norm": 11.931134814351793, "learning_rate": 1.9193205342558227e-06, "loss": 1.2248, "step": 1143 }, { "epoch": 0.15508710092862468, "grad_norm": 6.443611432539763, "learning_rate": 1.9191476406616107e-06, "loss": 1.2152, "step": 1144 }, { "epoch": 0.15522266657628958, "grad_norm": 5.894122590041182, "learning_rate": 1.918974569817031e-06, "loss": 1.1936, "step": 1145 }, { "epoch": 0.15535823222395445, "grad_norm": 9.148261198660556, "learning_rate": 1.9188013217554596e-06, "loss": 1.2053, "step": 1146 }, { "epoch": 0.15549379787161932, "grad_norm": 7.766611191210653, "learning_rate": 1.918627896510306e-06, "loss": 1.2357, "step": 1147 }, { "epoch": 0.15562936351928422, "grad_norm": 6.981277779486316, "learning_rate": 1.9184542941150143e-06, "loss": 1.2354, "step": 1148 }, { "epoch": 0.1557649291669491, "grad_norm": 14.658097649009193, "learning_rate": 1.9182805146030614e-06, "loss": 1.2191, "step": 1149 }, { "epoch": 0.155900494814614, "grad_norm": 6.012389750369721, "learning_rate": 1.9181065580079593e-06, "loss": 1.2294, "step": 1150 }, { "epoch": 0.15603606046227886, "grad_norm": 7.654467101455142, "learning_rate": 1.917932424363255e-06, "loss": 1.2161, "step": 1151 }, { "epoch": 0.15617162610994373, "grad_norm": 10.461242464719323, "learning_rate": 1.9177581137025284e-06, "loss": 1.2263, "step": 1152 }, { "epoch": 0.15630719175760863, "grad_norm": 6.994466214492946, "learning_rate": 1.9175836260593937e-06, "loss": 1.1998, "step": 1153 }, { "epoch": 0.1564427574052735, "grad_norm": 9.954267335316096, "learning_rate": 1.9174089614674998e-06, "loss": 1.2222, "step": 1154 }, { "epoch": 0.1565783230529384, "grad_norm": 9.777617871352044, "learning_rate": 1.9172341199605293e-06, "loss": 1.2051, "step": 1155 }, { "epoch": 0.15671388870060327, "grad_norm": 4.724663623760174, "learning_rate": 1.9170591015721987e-06, "loss": 1.2054, "step": 1156 }, { "epoch": 0.15684945434826814, "grad_norm": 4.325364303125986, "learning_rate": 1.9168839063362595e-06, "loss": 1.213, "step": 1157 }, { "epoch": 0.15698501999593303, "grad_norm": 5.007475012740682, "learning_rate": 1.9167085342864962e-06, "loss": 1.1655, "step": 1158 }, { "epoch": 0.1571205856435979, "grad_norm": 7.34635232084413, "learning_rate": 1.9165329854567285e-06, "loss": 1.2065, "step": 1159 }, { "epoch": 0.1572561512912628, "grad_norm": 5.6673163099292045, "learning_rate": 1.916357259880809e-06, "loss": 1.2228, "step": 1160 }, { "epoch": 0.15739171693892767, "grad_norm": 8.209523463698897, "learning_rate": 1.916181357592625e-06, "loss": 1.2421, "step": 1161 }, { "epoch": 0.15752728258659257, "grad_norm": 8.407794506403604, "learning_rate": 1.916005278626098e-06, "loss": 1.241, "step": 1162 }, { "epoch": 0.15766284823425744, "grad_norm": 6.669630289770926, "learning_rate": 1.915829023015184e-06, "loss": 1.2466, "step": 1163 }, { "epoch": 0.1577984138819223, "grad_norm": 4.831954797467159, "learning_rate": 1.915652590793872e-06, "loss": 1.2267, "step": 1164 }, { "epoch": 0.1579339795295872, "grad_norm": 6.167773484959026, "learning_rate": 1.9154759819961854e-06, "loss": 1.2202, "step": 1165 }, { "epoch": 0.15806954517725208, "grad_norm": 7.61213243256573, "learning_rate": 1.915299196656182e-06, "loss": 1.2293, "step": 1166 }, { "epoch": 0.15820511082491698, "grad_norm": 7.053746384405824, "learning_rate": 1.9151222348079535e-06, "loss": 1.2363, "step": 1167 }, { "epoch": 0.15834067647258185, "grad_norm": 5.220612726026369, "learning_rate": 1.9149450964856254e-06, "loss": 1.2709, "step": 1168 }, { "epoch": 0.15847624212024672, "grad_norm": 5.941142829696954, "learning_rate": 1.914767781723358e-06, "loss": 1.2082, "step": 1169 }, { "epoch": 0.15861180776791162, "grad_norm": 7.870245686633313, "learning_rate": 1.914590290555344e-06, "loss": 1.2375, "step": 1170 }, { "epoch": 0.1587473734155765, "grad_norm": 8.4305922554565, "learning_rate": 1.9144126230158124e-06, "loss": 1.2271, "step": 1171 }, { "epoch": 0.15888293906324139, "grad_norm": 6.010230341568643, "learning_rate": 1.9142347791390242e-06, "loss": 1.2957, "step": 1172 }, { "epoch": 0.15901850471090626, "grad_norm": 6.459483430649322, "learning_rate": 1.9140567589592755e-06, "loss": 1.1567, "step": 1173 }, { "epoch": 0.15915407035857113, "grad_norm": 6.001960654450094, "learning_rate": 1.9138785625108955e-06, "loss": 1.2747, "step": 1174 }, { "epoch": 0.15928963600623602, "grad_norm": 11.079773958030824, "learning_rate": 1.9137001898282484e-06, "loss": 1.2056, "step": 1175 }, { "epoch": 0.1594252016539009, "grad_norm": 5.4618405280069044, "learning_rate": 1.9135216409457327e-06, "loss": 1.252, "step": 1176 }, { "epoch": 0.1595607673015658, "grad_norm": 12.216192086812105, "learning_rate": 1.913342915897779e-06, "loss": 1.2145, "step": 1177 }, { "epoch": 0.15969633294923066, "grad_norm": 4.772262386295963, "learning_rate": 1.9131640147188534e-06, "loss": 1.1926, "step": 1178 }, { "epoch": 0.15983189859689553, "grad_norm": 9.386482293981734, "learning_rate": 1.912984937443456e-06, "loss": 1.2305, "step": 1179 }, { "epoch": 0.15996746424456043, "grad_norm": 13.94530195863296, "learning_rate": 1.9128056841061197e-06, "loss": 1.2597, "step": 1180 }, { "epoch": 0.1601030298922253, "grad_norm": 4.76011893281087, "learning_rate": 1.912626254741413e-06, "loss": 1.223, "step": 1181 }, { "epoch": 0.1602385955398902, "grad_norm": 10.330243344725897, "learning_rate": 1.912446649383936e-06, "loss": 1.2207, "step": 1182 }, { "epoch": 0.16037416118755507, "grad_norm": 6.0744484142300115, "learning_rate": 1.9122668680683255e-06, "loss": 1.2276, "step": 1183 }, { "epoch": 0.16050972683521997, "grad_norm": 4.791398614164715, "learning_rate": 1.9120869108292504e-06, "loss": 1.1913, "step": 1184 }, { "epoch": 0.16064529248288484, "grad_norm": 4.5728686744766405, "learning_rate": 1.9119067777014146e-06, "loss": 1.2065, "step": 1185 }, { "epoch": 0.1607808581305497, "grad_norm": 6.578330077307236, "learning_rate": 1.9117264687195546e-06, "loss": 1.2554, "step": 1186 }, { "epoch": 0.1609164237782146, "grad_norm": 8.193347769492032, "learning_rate": 1.911545983918442e-06, "loss": 1.236, "step": 1187 }, { "epoch": 0.16105198942587948, "grad_norm": 8.183249117575004, "learning_rate": 1.911365323332881e-06, "loss": 1.2098, "step": 1188 }, { "epoch": 0.16118755507354438, "grad_norm": 6.979638640610322, "learning_rate": 1.9111844869977123e-06, "loss": 1.2777, "step": 1189 }, { "epoch": 0.16132312072120925, "grad_norm": 5.500482121795565, "learning_rate": 1.911003474947807e-06, "loss": 1.2131, "step": 1190 }, { "epoch": 0.16145868636887412, "grad_norm": 6.855230508654609, "learning_rate": 1.910822287218073e-06, "loss": 1.1815, "step": 1191 }, { "epoch": 0.16159425201653901, "grad_norm": 5.5760011413063255, "learning_rate": 1.9106409238434503e-06, "loss": 1.1923, "step": 1192 }, { "epoch": 0.16172981766420388, "grad_norm": 7.15982079400997, "learning_rate": 1.9104593848589137e-06, "loss": 1.211, "step": 1193 }, { "epoch": 0.16186538331186878, "grad_norm": 13.221876049462558, "learning_rate": 1.9102776702994713e-06, "loss": 1.2244, "step": 1194 }, { "epoch": 0.16200094895953365, "grad_norm": 5.854249868815069, "learning_rate": 1.9100957802001654e-06, "loss": 1.194, "step": 1195 }, { "epoch": 0.16213651460719852, "grad_norm": 7.451938479570799, "learning_rate": 1.9099137145960724e-06, "loss": 1.2291, "step": 1196 }, { "epoch": 0.16227208025486342, "grad_norm": 4.1410053020683915, "learning_rate": 1.909731473522302e-06, "loss": 1.2045, "step": 1197 }, { "epoch": 0.1624076459025283, "grad_norm": 5.968650754985262, "learning_rate": 1.9095490570139977e-06, "loss": 1.202, "step": 1198 }, { "epoch": 0.1625432115501932, "grad_norm": 6.991579595702941, "learning_rate": 1.9093664651063375e-06, "loss": 1.2266, "step": 1199 }, { "epoch": 0.16267877719785806, "grad_norm": 6.635161261773493, "learning_rate": 1.9091836978345323e-06, "loss": 1.1909, "step": 1200 }, { "epoch": 0.16281434284552296, "grad_norm": 11.36605352225371, "learning_rate": 1.909000755233828e-06, "loss": 1.2133, "step": 1201 }, { "epoch": 0.16294990849318783, "grad_norm": 6.0194986191592665, "learning_rate": 1.908817637339503e-06, "loss": 1.188, "step": 1202 }, { "epoch": 0.1630854741408527, "grad_norm": 11.30562457294852, "learning_rate": 1.9086343441868706e-06, "loss": 1.2182, "step": 1203 }, { "epoch": 0.1632210397885176, "grad_norm": 6.196096706658786, "learning_rate": 1.908450875811277e-06, "loss": 1.2576, "step": 1204 }, { "epoch": 0.16335660543618247, "grad_norm": 14.222668851264771, "learning_rate": 1.908267232248103e-06, "loss": 1.2401, "step": 1205 }, { "epoch": 0.16349217108384737, "grad_norm": 5.180376587300367, "learning_rate": 1.9080834135327624e-06, "loss": 1.2354, "step": 1206 }, { "epoch": 0.16362773673151224, "grad_norm": 5.543927460963443, "learning_rate": 1.907899419700704e-06, "loss": 1.2408, "step": 1207 }, { "epoch": 0.1637633023791771, "grad_norm": 5.807224867179025, "learning_rate": 1.9077152507874086e-06, "loss": 1.1795, "step": 1208 }, { "epoch": 0.163898868026842, "grad_norm": 18.547106276666355, "learning_rate": 1.9075309068283928e-06, "loss": 1.2071, "step": 1209 }, { "epoch": 0.16403443367450687, "grad_norm": 7.34865097337007, "learning_rate": 1.9073463878592046e-06, "loss": 1.191, "step": 1210 }, { "epoch": 0.16416999932217177, "grad_norm": 8.171312766225492, "learning_rate": 1.9071616939154279e-06, "loss": 1.238, "step": 1211 }, { "epoch": 0.16430556496983664, "grad_norm": 9.085050485728525, "learning_rate": 1.9069768250326792e-06, "loss": 1.2524, "step": 1212 }, { "epoch": 0.1644411306175015, "grad_norm": 8.144869830171647, "learning_rate": 1.9067917812466088e-06, "loss": 1.1962, "step": 1213 }, { "epoch": 0.1645766962651664, "grad_norm": 5.718199892374763, "learning_rate": 1.9066065625929014e-06, "loss": 1.2187, "step": 1214 }, { "epoch": 0.16471226191283128, "grad_norm": 4.855347591076163, "learning_rate": 1.9064211691072747e-06, "loss": 1.2115, "step": 1215 }, { "epoch": 0.16484782756049618, "grad_norm": 5.075690612213428, "learning_rate": 1.9062356008254804e-06, "loss": 1.2427, "step": 1216 }, { "epoch": 0.16498339320816105, "grad_norm": 5.375664217028067, "learning_rate": 1.906049857783304e-06, "loss": 1.1825, "step": 1217 }, { "epoch": 0.16511895885582592, "grad_norm": 6.347255727540407, "learning_rate": 1.905863940016564e-06, "loss": 1.2184, "step": 1218 }, { "epoch": 0.16525452450349082, "grad_norm": 6.251013771075703, "learning_rate": 1.9056778475611143e-06, "loss": 1.2045, "step": 1219 }, { "epoch": 0.1653900901511557, "grad_norm": 11.279774580019053, "learning_rate": 1.9054915804528403e-06, "loss": 1.2154, "step": 1220 }, { "epoch": 0.1655256557988206, "grad_norm": 6.412404549886819, "learning_rate": 1.9053051387276625e-06, "loss": 1.2497, "step": 1221 }, { "epoch": 0.16566122144648546, "grad_norm": 14.308805470356829, "learning_rate": 1.9051185224215347e-06, "loss": 1.1971, "step": 1222 }, { "epoch": 0.16579678709415036, "grad_norm": 5.267493791450055, "learning_rate": 1.9049317315704445e-06, "loss": 1.1984, "step": 1223 }, { "epoch": 0.16593235274181523, "grad_norm": 5.409961106907849, "learning_rate": 1.904744766210413e-06, "loss": 1.215, "step": 1224 }, { "epoch": 0.1660679183894801, "grad_norm": 6.108134118781691, "learning_rate": 1.904557626377495e-06, "loss": 1.2395, "step": 1225 }, { "epoch": 0.166203484037145, "grad_norm": 13.436983209145186, "learning_rate": 1.9043703121077788e-06, "loss": 1.2063, "step": 1226 }, { "epoch": 0.16633904968480986, "grad_norm": 5.739665599475749, "learning_rate": 1.9041828234373866e-06, "loss": 1.2486, "step": 1227 }, { "epoch": 0.16647461533247476, "grad_norm": 25.50293962020775, "learning_rate": 1.903995160402474e-06, "loss": 1.1859, "step": 1228 }, { "epoch": 0.16661018098013963, "grad_norm": 9.02233346608125, "learning_rate": 1.9038073230392306e-06, "loss": 1.2126, "step": 1229 }, { "epoch": 0.1667457466278045, "grad_norm": 3.732283653763142, "learning_rate": 1.903619311383879e-06, "loss": 1.2378, "step": 1230 }, { "epoch": 0.1668813122754694, "grad_norm": 14.145537075751495, "learning_rate": 1.903431125472676e-06, "loss": 1.2298, "step": 1231 }, { "epoch": 0.16701687792313427, "grad_norm": 7.0864723404528345, "learning_rate": 1.903242765341912e-06, "loss": 1.2435, "step": 1232 }, { "epoch": 0.16715244357079917, "grad_norm": 6.507630354921488, "learning_rate": 1.90305423102791e-06, "loss": 1.1939, "step": 1233 }, { "epoch": 0.16728800921846404, "grad_norm": 5.127868161338444, "learning_rate": 1.902865522567028e-06, "loss": 1.2125, "step": 1234 }, { "epoch": 0.1674235748661289, "grad_norm": 7.282652683838276, "learning_rate": 1.9026766399956568e-06, "loss": 1.2308, "step": 1235 }, { "epoch": 0.1675591405137938, "grad_norm": 6.5891023446486905, "learning_rate": 1.9024875833502208e-06, "loss": 1.1971, "step": 1236 }, { "epoch": 0.16769470616145868, "grad_norm": 7.208389844212893, "learning_rate": 1.9022983526671784e-06, "loss": 1.1446, "step": 1237 }, { "epoch": 0.16783027180912358, "grad_norm": 6.931765118104109, "learning_rate": 1.9021089479830206e-06, "loss": 1.2213, "step": 1238 }, { "epoch": 0.16796583745678845, "grad_norm": 7.062504908209749, "learning_rate": 1.9019193693342733e-06, "loss": 1.2092, "step": 1239 }, { "epoch": 0.16810140310445335, "grad_norm": 5.48409655648433, "learning_rate": 1.9017296167574948e-06, "loss": 1.1875, "step": 1240 }, { "epoch": 0.16823696875211822, "grad_norm": 5.406382853625194, "learning_rate": 1.9015396902892775e-06, "loss": 1.2202, "step": 1241 }, { "epoch": 0.16837253439978309, "grad_norm": 7.586227262643707, "learning_rate": 1.9013495899662474e-06, "loss": 1.264, "step": 1242 }, { "epoch": 0.16850810004744798, "grad_norm": 8.41766958070418, "learning_rate": 1.9011593158250637e-06, "loss": 1.2232, "step": 1243 }, { "epoch": 0.16864366569511285, "grad_norm": 4.841015848802813, "learning_rate": 1.9009688679024189e-06, "loss": 1.1872, "step": 1244 }, { "epoch": 0.16877923134277775, "grad_norm": 5.712641517583826, "learning_rate": 1.9007782462350401e-06, "loss": 1.2247, "step": 1245 }, { "epoch": 0.16891479699044262, "grad_norm": 6.075475345948291, "learning_rate": 1.9005874508596868e-06, "loss": 1.2144, "step": 1246 }, { "epoch": 0.1690503626381075, "grad_norm": 4.255330681985049, "learning_rate": 1.9003964818131524e-06, "loss": 1.2551, "step": 1247 }, { "epoch": 0.1691859282857724, "grad_norm": 4.5236167629227735, "learning_rate": 1.9002053391322636e-06, "loss": 1.1954, "step": 1248 }, { "epoch": 0.16932149393343726, "grad_norm": 6.476769538492088, "learning_rate": 1.900014022853881e-06, "loss": 1.2265, "step": 1249 }, { "epoch": 0.16945705958110216, "grad_norm": 4.69446853338395, "learning_rate": 1.8998225330148988e-06, "loss": 1.1963, "step": 1250 }, { "epoch": 0.16959262522876703, "grad_norm": 6.344020448190403, "learning_rate": 1.8996308696522432e-06, "loss": 1.2189, "step": 1251 }, { "epoch": 0.1697281908764319, "grad_norm": 6.698648323573968, "learning_rate": 1.899439032802876e-06, "loss": 1.2215, "step": 1252 }, { "epoch": 0.1698637565240968, "grad_norm": 6.4540541502553825, "learning_rate": 1.8992470225037911e-06, "loss": 1.2631, "step": 1253 }, { "epoch": 0.16999932217176167, "grad_norm": 5.610676101817481, "learning_rate": 1.899054838792016e-06, "loss": 1.2023, "step": 1254 }, { "epoch": 0.17013488781942657, "grad_norm": 11.268510344293434, "learning_rate": 1.8988624817046119e-06, "loss": 1.1845, "step": 1255 }, { "epoch": 0.17027045346709144, "grad_norm": 4.621829280760264, "learning_rate": 1.8986699512786735e-06, "loss": 1.2352, "step": 1256 }, { "epoch": 0.1704060191147563, "grad_norm": 8.943023091276832, "learning_rate": 1.898477247551329e-06, "loss": 1.1874, "step": 1257 }, { "epoch": 0.1705415847624212, "grad_norm": 5.312826064075479, "learning_rate": 1.8982843705597388e-06, "loss": 1.2179, "step": 1258 }, { "epoch": 0.17067715041008608, "grad_norm": 10.901346912689098, "learning_rate": 1.8980913203410988e-06, "loss": 1.2187, "step": 1259 }, { "epoch": 0.17081271605775097, "grad_norm": 6.470596128798893, "learning_rate": 1.8978980969326366e-06, "loss": 1.2286, "step": 1260 }, { "epoch": 0.17094828170541584, "grad_norm": 4.25079261826077, "learning_rate": 1.897704700371614e-06, "loss": 1.2131, "step": 1261 }, { "epoch": 0.17108384735308074, "grad_norm": 5.792165373781871, "learning_rate": 1.8975111306953261e-06, "loss": 1.2553, "step": 1262 }, { "epoch": 0.1712194130007456, "grad_norm": 6.534689233358019, "learning_rate": 1.8973173879411011e-06, "loss": 1.2452, "step": 1263 }, { "epoch": 0.17135497864841048, "grad_norm": 6.156334316688967, "learning_rate": 1.8971234721463008e-06, "loss": 1.2412, "step": 1264 }, { "epoch": 0.17149054429607538, "grad_norm": 4.766811136933077, "learning_rate": 1.8969293833483202e-06, "loss": 1.1862, "step": 1265 }, { "epoch": 0.17162610994374025, "grad_norm": 6.12952426561443, "learning_rate": 1.896735121584588e-06, "loss": 1.1631, "step": 1266 }, { "epoch": 0.17176167559140515, "grad_norm": 10.515650025460989, "learning_rate": 1.8965406868925664e-06, "loss": 1.1925, "step": 1267 }, { "epoch": 0.17189724123907002, "grad_norm": 4.682615749091892, "learning_rate": 1.89634607930975e-06, "loss": 1.2135, "step": 1268 }, { "epoch": 0.1720328068867349, "grad_norm": 7.2127544694023475, "learning_rate": 1.8961512988736671e-06, "loss": 1.2226, "step": 1269 }, { "epoch": 0.1721683725343998, "grad_norm": 6.711480839365289, "learning_rate": 1.8959563456218807e-06, "loss": 1.213, "step": 1270 }, { "epoch": 0.17230393818206466, "grad_norm": 10.240114354850403, "learning_rate": 1.8957612195919847e-06, "loss": 1.2179, "step": 1271 }, { "epoch": 0.17243950382972956, "grad_norm": 4.963880809411536, "learning_rate": 1.8955659208216086e-06, "loss": 1.2143, "step": 1272 }, { "epoch": 0.17257506947739443, "grad_norm": 5.219189122935364, "learning_rate": 1.8953704493484138e-06, "loss": 1.2006, "step": 1273 }, { "epoch": 0.1727106351250593, "grad_norm": 7.03343340407269, "learning_rate": 1.8951748052100954e-06, "loss": 1.2709, "step": 1274 }, { "epoch": 0.1728462007727242, "grad_norm": 5.5718778248942815, "learning_rate": 1.894978988444382e-06, "loss": 1.1671, "step": 1275 }, { "epoch": 0.17298176642038907, "grad_norm": 5.539398872011233, "learning_rate": 1.8947829990890347e-06, "loss": 1.253, "step": 1276 }, { "epoch": 0.17311733206805396, "grad_norm": 7.77152588697599, "learning_rate": 1.8945868371818493e-06, "loss": 1.2036, "step": 1277 }, { "epoch": 0.17325289771571883, "grad_norm": 6.410853059275999, "learning_rate": 1.8943905027606539e-06, "loss": 1.2089, "step": 1278 }, { "epoch": 0.17338846336338373, "grad_norm": 5.630051610768279, "learning_rate": 1.8941939958633099e-06, "loss": 1.2347, "step": 1279 }, { "epoch": 0.1735240290110486, "grad_norm": 5.128622427790508, "learning_rate": 1.8939973165277123e-06, "loss": 1.2254, "step": 1280 }, { "epoch": 0.17365959465871347, "grad_norm": 12.345289307755793, "learning_rate": 1.8938004647917886e-06, "loss": 1.195, "step": 1281 }, { "epoch": 0.17379516030637837, "grad_norm": 7.675663982115639, "learning_rate": 1.8936034406935008e-06, "loss": 1.1738, "step": 1282 }, { "epoch": 0.17393072595404324, "grad_norm": 8.07300573992471, "learning_rate": 1.8934062442708432e-06, "loss": 1.183, "step": 1283 }, { "epoch": 0.17406629160170814, "grad_norm": 7.577610904599473, "learning_rate": 1.8932088755618434e-06, "loss": 1.2297, "step": 1284 }, { "epoch": 0.174201857249373, "grad_norm": 5.476375576579437, "learning_rate": 1.8930113346045627e-06, "loss": 1.1918, "step": 1285 }, { "epoch": 0.17433742289703788, "grad_norm": 9.439775215226359, "learning_rate": 1.892813621437095e-06, "loss": 1.2084, "step": 1286 }, { "epoch": 0.17447298854470278, "grad_norm": 6.705365831011205, "learning_rate": 1.8926157360975674e-06, "loss": 1.2018, "step": 1287 }, { "epoch": 0.17460855419236765, "grad_norm": 5.726600953322701, "learning_rate": 1.8924176786241416e-06, "loss": 1.2033, "step": 1288 }, { "epoch": 0.17474411984003255, "grad_norm": 5.569331179579947, "learning_rate": 1.8922194490550103e-06, "loss": 1.2092, "step": 1289 }, { "epoch": 0.17487968548769742, "grad_norm": 5.932971487931769, "learning_rate": 1.8920210474284014e-06, "loss": 1.2443, "step": 1290 }, { "epoch": 0.1750152511353623, "grad_norm": 5.033423322743814, "learning_rate": 1.8918224737825743e-06, "loss": 1.1911, "step": 1291 }, { "epoch": 0.17515081678302719, "grad_norm": 7.540336061217861, "learning_rate": 1.891623728155823e-06, "loss": 1.2071, "step": 1292 }, { "epoch": 0.17528638243069206, "grad_norm": 4.4012080954132955, "learning_rate": 1.8914248105864738e-06, "loss": 1.1908, "step": 1293 }, { "epoch": 0.17542194807835695, "grad_norm": 4.549613690216136, "learning_rate": 1.8912257211128864e-06, "loss": 1.2262, "step": 1294 }, { "epoch": 0.17555751372602182, "grad_norm": 6.228139943985673, "learning_rate": 1.8910264597734535e-06, "loss": 1.1885, "step": 1295 }, { "epoch": 0.1756930793736867, "grad_norm": 32.70982423682672, "learning_rate": 1.8908270266066011e-06, "loss": 1.2173, "step": 1296 }, { "epoch": 0.1758286450213516, "grad_norm": 3.9734587386586018, "learning_rate": 1.8906274216507885e-06, "loss": 1.229, "step": 1297 }, { "epoch": 0.17596421066901646, "grad_norm": 5.944086172487185, "learning_rate": 1.8904276449445079e-06, "loss": 1.1896, "step": 1298 }, { "epoch": 0.17609977631668136, "grad_norm": 5.185226962143197, "learning_rate": 1.8902276965262845e-06, "loss": 1.179, "step": 1299 }, { "epoch": 0.17623534196434623, "grad_norm": 5.9239056607172405, "learning_rate": 1.8900275764346768e-06, "loss": 1.2089, "step": 1300 }, { "epoch": 0.17637090761201113, "grad_norm": 4.883955913845506, "learning_rate": 1.8898272847082764e-06, "loss": 1.2211, "step": 1301 }, { "epoch": 0.176506473259676, "grad_norm": 8.52944611279195, "learning_rate": 1.8896268213857078e-06, "loss": 1.2467, "step": 1302 }, { "epoch": 0.17664203890734087, "grad_norm": 5.818513267238603, "learning_rate": 1.8894261865056293e-06, "loss": 1.1987, "step": 1303 }, { "epoch": 0.17677760455500577, "grad_norm": 4.813358178821206, "learning_rate": 1.8892253801067315e-06, "loss": 1.2126, "step": 1304 }, { "epoch": 0.17691317020267064, "grad_norm": 7.0889676033165205, "learning_rate": 1.889024402227738e-06, "loss": 1.1905, "step": 1305 }, { "epoch": 0.17704873585033554, "grad_norm": 5.652697061447667, "learning_rate": 1.8888232529074062e-06, "loss": 1.1986, "step": 1306 }, { "epoch": 0.1771843014980004, "grad_norm": 7.052811157074975, "learning_rate": 1.888621932184526e-06, "loss": 1.2581, "step": 1307 }, { "epoch": 0.17731986714566528, "grad_norm": 6.728441045930733, "learning_rate": 1.8884204400979206e-06, "loss": 1.2176, "step": 1308 }, { "epoch": 0.17745543279333018, "grad_norm": 5.701004621175093, "learning_rate": 1.888218776686446e-06, "loss": 1.184, "step": 1309 }, { "epoch": 0.17759099844099505, "grad_norm": 8.00353866519538, "learning_rate": 1.8880169419889915e-06, "loss": 1.1926, "step": 1310 }, { "epoch": 0.17772656408865994, "grad_norm": 4.155073145599904, "learning_rate": 1.8878149360444793e-06, "loss": 1.1815, "step": 1311 }, { "epoch": 0.17786212973632481, "grad_norm": 12.464170042052173, "learning_rate": 1.8876127588918648e-06, "loss": 1.2102, "step": 1312 }, { "epoch": 0.17799769538398968, "grad_norm": 6.345587212660416, "learning_rate": 1.887410410570136e-06, "loss": 1.1978, "step": 1313 }, { "epoch": 0.17813326103165458, "grad_norm": 12.530337979331462, "learning_rate": 1.8872078911183145e-06, "loss": 1.2331, "step": 1314 }, { "epoch": 0.17826882667931945, "grad_norm": 7.155829720015195, "learning_rate": 1.8870052005754542e-06, "loss": 1.178, "step": 1315 }, { "epoch": 0.17840439232698435, "grad_norm": 6.733517438752517, "learning_rate": 1.8868023389806428e-06, "loss": 1.1933, "step": 1316 }, { "epoch": 0.17853995797464922, "grad_norm": 8.162612381633924, "learning_rate": 1.8865993063730002e-06, "loss": 1.2219, "step": 1317 }, { "epoch": 0.1786755236223141, "grad_norm": 6.049399934062728, "learning_rate": 1.8863961027916794e-06, "loss": 1.2369, "step": 1318 }, { "epoch": 0.178811089269979, "grad_norm": 7.680086254641557, "learning_rate": 1.8861927282758673e-06, "loss": 1.212, "step": 1319 }, { "epoch": 0.17894665491764386, "grad_norm": 4.670830406229335, "learning_rate": 1.8859891828647827e-06, "loss": 1.1871, "step": 1320 }, { "epoch": 0.17908222056530876, "grad_norm": 5.280434943959792, "learning_rate": 1.8857854665976777e-06, "loss": 1.2159, "step": 1321 }, { "epoch": 0.17921778621297363, "grad_norm": 4.790180339752977, "learning_rate": 1.8855815795138375e-06, "loss": 1.2315, "step": 1322 }, { "epoch": 0.17935335186063853, "grad_norm": 5.033161627232756, "learning_rate": 1.8853775216525803e-06, "loss": 1.2378, "step": 1323 }, { "epoch": 0.1794889175083034, "grad_norm": 6.283998472145694, "learning_rate": 1.8851732930532563e-06, "loss": 1.1904, "step": 1324 }, { "epoch": 0.17962448315596827, "grad_norm": 6.221124493401272, "learning_rate": 1.8849688937552502e-06, "loss": 1.2173, "step": 1325 }, { "epoch": 0.17976004880363317, "grad_norm": 7.340297810326605, "learning_rate": 1.8847643237979783e-06, "loss": 1.195, "step": 1326 }, { "epoch": 0.17989561445129804, "grad_norm": 7.769508364184116, "learning_rate": 1.8845595832208905e-06, "loss": 1.1765, "step": 1327 }, { "epoch": 0.18003118009896293, "grad_norm": 5.657154350897168, "learning_rate": 1.8843546720634693e-06, "loss": 1.2457, "step": 1328 }, { "epoch": 0.1801667457466278, "grad_norm": 10.740997953755375, "learning_rate": 1.8841495903652302e-06, "loss": 1.1888, "step": 1329 }, { "epoch": 0.18030231139429267, "grad_norm": 5.717069178439469, "learning_rate": 1.883944338165722e-06, "loss": 1.2275, "step": 1330 }, { "epoch": 0.18043787704195757, "grad_norm": 5.225990732123497, "learning_rate": 1.8837389155045253e-06, "loss": 1.189, "step": 1331 }, { "epoch": 0.18057344268962244, "grad_norm": 6.66636561659353, "learning_rate": 1.883533322421255e-06, "loss": 1.1991, "step": 1332 }, { "epoch": 0.18070900833728734, "grad_norm": 5.100621557533876, "learning_rate": 1.883327558955557e-06, "loss": 1.2285, "step": 1333 }, { "epoch": 0.1808445739849522, "grad_norm": 6.831213799465331, "learning_rate": 1.8831216251471123e-06, "loss": 1.2395, "step": 1334 }, { "epoch": 0.18098013963261708, "grad_norm": 12.758740423840429, "learning_rate": 1.8829155210356329e-06, "loss": 1.1733, "step": 1335 }, { "epoch": 0.18111570528028198, "grad_norm": 6.0804656700459345, "learning_rate": 1.8827092466608647e-06, "loss": 1.1804, "step": 1336 }, { "epoch": 0.18125127092794685, "grad_norm": 4.409656816148908, "learning_rate": 1.8825028020625858e-06, "loss": 1.1726, "step": 1337 }, { "epoch": 0.18138683657561175, "grad_norm": 5.6417988774536765, "learning_rate": 1.8822961872806076e-06, "loss": 1.2132, "step": 1338 }, { "epoch": 0.18152240222327662, "grad_norm": 6.072907770831562, "learning_rate": 1.8820894023547745e-06, "loss": 1.1893, "step": 1339 }, { "epoch": 0.18165796787094152, "grad_norm": 5.4318225127502195, "learning_rate": 1.8818824473249624e-06, "loss": 1.2019, "step": 1340 }, { "epoch": 0.1817935335186064, "grad_norm": 5.979391482263714, "learning_rate": 1.8816753222310818e-06, "loss": 1.2163, "step": 1341 }, { "epoch": 0.18192909916627126, "grad_norm": 6.3155728339231985, "learning_rate": 1.8814680271130747e-06, "loss": 1.1909, "step": 1342 }, { "epoch": 0.18206466481393616, "grad_norm": 4.93434678209705, "learning_rate": 1.8812605620109165e-06, "loss": 1.1848, "step": 1343 }, { "epoch": 0.18220023046160103, "grad_norm": 15.244799191202947, "learning_rate": 1.881052926964615e-06, "loss": 1.2246, "step": 1344 }, { "epoch": 0.18233579610926592, "grad_norm": 14.596020380249628, "learning_rate": 1.8808451220142114e-06, "loss": 1.2229, "step": 1345 }, { "epoch": 0.1824713617569308, "grad_norm": 8.117953848528288, "learning_rate": 1.880637147199779e-06, "loss": 1.2258, "step": 1346 }, { "epoch": 0.18260692740459566, "grad_norm": 11.94142207970674, "learning_rate": 1.8804290025614242e-06, "loss": 1.2481, "step": 1347 }, { "epoch": 0.18274249305226056, "grad_norm": 13.716019821788374, "learning_rate": 1.8802206881392858e-06, "loss": 1.2164, "step": 1348 }, { "epoch": 0.18287805869992543, "grad_norm": 5.768089027632059, "learning_rate": 1.8800122039735355e-06, "loss": 1.2343, "step": 1349 }, { "epoch": 0.18301362434759033, "grad_norm": 4.88425648215312, "learning_rate": 1.8798035501043783e-06, "loss": 1.1723, "step": 1350 }, { "epoch": 0.1831491899952552, "grad_norm": 4.662200796804328, "learning_rate": 1.879594726572051e-06, "loss": 1.1955, "step": 1351 }, { "epoch": 0.18328475564292007, "grad_norm": 4.5375735853780546, "learning_rate": 1.8793857334168243e-06, "loss": 1.2432, "step": 1352 }, { "epoch": 0.18342032129058497, "grad_norm": 4.100427121454619, "learning_rate": 1.8791765706789997e-06, "loss": 1.1826, "step": 1353 }, { "epoch": 0.18355588693824984, "grad_norm": 5.601571207347056, "learning_rate": 1.8789672383989134e-06, "loss": 1.1983, "step": 1354 }, { "epoch": 0.18369145258591474, "grad_norm": 7.741463675536851, "learning_rate": 1.8787577366169336e-06, "loss": 1.2287, "step": 1355 }, { "epoch": 0.1838270182335796, "grad_norm": 5.980039792077148, "learning_rate": 1.8785480653734607e-06, "loss": 1.217, "step": 1356 }, { "epoch": 0.18396258388124448, "grad_norm": 11.16081011744827, "learning_rate": 1.878338224708928e-06, "loss": 1.2184, "step": 1357 }, { "epoch": 0.18409814952890938, "grad_norm": 6.247153897375661, "learning_rate": 1.878128214663802e-06, "loss": 1.2, "step": 1358 }, { "epoch": 0.18423371517657425, "grad_norm": 4.813384833438804, "learning_rate": 1.8779180352785814e-06, "loss": 1.2097, "step": 1359 }, { "epoch": 0.18436928082423915, "grad_norm": 5.982238066181923, "learning_rate": 1.8777076865937976e-06, "loss": 1.2147, "step": 1360 }, { "epoch": 0.18450484647190402, "grad_norm": 5.448941212135867, "learning_rate": 1.8774971686500143e-06, "loss": 1.206, "step": 1361 }, { "epoch": 0.18464041211956891, "grad_norm": 3.8772742918846923, "learning_rate": 1.877286481487829e-06, "loss": 1.1724, "step": 1362 }, { "epoch": 0.18477597776723378, "grad_norm": 4.4887204412538715, "learning_rate": 1.8770756251478703e-06, "loss": 1.2367, "step": 1363 }, { "epoch": 0.18491154341489865, "grad_norm": 4.8387017848769895, "learning_rate": 1.8768645996708007e-06, "loss": 1.2031, "step": 1364 }, { "epoch": 0.18504710906256355, "grad_norm": 5.492100067560855, "learning_rate": 1.8766534050973144e-06, "loss": 1.1666, "step": 1365 }, { "epoch": 0.18518267471022842, "grad_norm": 5.636297397564015, "learning_rate": 1.876442041468139e-06, "loss": 1.189, "step": 1366 }, { "epoch": 0.18531824035789332, "grad_norm": 6.086530148242061, "learning_rate": 1.876230508824034e-06, "loss": 1.2066, "step": 1367 }, { "epoch": 0.1854538060055582, "grad_norm": 4.536627187476101, "learning_rate": 1.876018807205792e-06, "loss": 1.1833, "step": 1368 }, { "epoch": 0.18558937165322306, "grad_norm": 5.0882051585336, "learning_rate": 1.875806936654238e-06, "loss": 1.1951, "step": 1369 }, { "epoch": 0.18572493730088796, "grad_norm": 6.097211912070002, "learning_rate": 1.8755948972102292e-06, "loss": 1.2162, "step": 1370 }, { "epoch": 0.18586050294855283, "grad_norm": 5.31231488841598, "learning_rate": 1.8753826889146562e-06, "loss": 1.2191, "step": 1371 }, { "epoch": 0.18599606859621773, "grad_norm": 5.903724431376242, "learning_rate": 1.8751703118084413e-06, "loss": 1.1828, "step": 1372 }, { "epoch": 0.1861316342438826, "grad_norm": 8.334226117305509, "learning_rate": 1.8749577659325401e-06, "loss": 1.1786, "step": 1373 }, { "epoch": 0.18626719989154747, "grad_norm": 7.039976201656778, "learning_rate": 1.8747450513279403e-06, "loss": 1.1907, "step": 1374 }, { "epoch": 0.18640276553921237, "grad_norm": 6.564871702956314, "learning_rate": 1.874532168035662e-06, "loss": 1.2639, "step": 1375 }, { "epoch": 0.18653833118687724, "grad_norm": 7.125175804840867, "learning_rate": 1.8743191160967584e-06, "loss": 1.2344, "step": 1376 }, { "epoch": 0.18667389683454214, "grad_norm": 5.5074410875894095, "learning_rate": 1.8741058955523145e-06, "loss": 1.2365, "step": 1377 }, { "epoch": 0.186809462482207, "grad_norm": 4.6795554387098885, "learning_rate": 1.8738925064434485e-06, "loss": 1.2085, "step": 1378 }, { "epoch": 0.1869450281298719, "grad_norm": 7.1991588120845185, "learning_rate": 1.8736789488113108e-06, "loss": 1.2079, "step": 1379 }, { "epoch": 0.18708059377753677, "grad_norm": 7.024258058776983, "learning_rate": 1.8734652226970844e-06, "loss": 1.1894, "step": 1380 }, { "epoch": 0.18721615942520164, "grad_norm": 8.3742309847982, "learning_rate": 1.8732513281419843e-06, "loss": 1.1919, "step": 1381 }, { "epoch": 0.18735172507286654, "grad_norm": 10.046554457730483, "learning_rate": 1.8730372651872585e-06, "loss": 1.203, "step": 1382 }, { "epoch": 0.1874872907205314, "grad_norm": 6.182071327875957, "learning_rate": 1.8728230338741877e-06, "loss": 1.175, "step": 1383 }, { "epoch": 0.1876228563681963, "grad_norm": 5.776814155249896, "learning_rate": 1.8726086342440842e-06, "loss": 1.1838, "step": 1384 }, { "epoch": 0.18775842201586118, "grad_norm": 5.673491414909744, "learning_rate": 1.8723940663382939e-06, "loss": 1.1883, "step": 1385 }, { "epoch": 0.18789398766352605, "grad_norm": 6.003923502944447, "learning_rate": 1.8721793301981937e-06, "loss": 1.1906, "step": 1386 }, { "epoch": 0.18802955331119095, "grad_norm": 7.284134086732133, "learning_rate": 1.8719644258651942e-06, "loss": 1.2265, "step": 1387 }, { "epoch": 0.18816511895885582, "grad_norm": 8.783674945754584, "learning_rate": 1.8717493533807386e-06, "loss": 1.188, "step": 1388 }, { "epoch": 0.18830068460652072, "grad_norm": 4.920574422155272, "learning_rate": 1.871534112786301e-06, "loss": 1.1818, "step": 1389 }, { "epoch": 0.1884362502541856, "grad_norm": 8.193157965041571, "learning_rate": 1.8713187041233893e-06, "loss": 1.1876, "step": 1390 }, { "epoch": 0.18857181590185046, "grad_norm": 10.849953400060725, "learning_rate": 1.8711031274335434e-06, "loss": 1.1417, "step": 1391 }, { "epoch": 0.18870738154951536, "grad_norm": 7.362949061514129, "learning_rate": 1.8708873827583352e-06, "loss": 1.2081, "step": 1392 }, { "epoch": 0.18884294719718023, "grad_norm": 4.463142079596294, "learning_rate": 1.8706714701393697e-06, "loss": 1.2222, "step": 1393 }, { "epoch": 0.18897851284484513, "grad_norm": 5.195822719004175, "learning_rate": 1.8704553896182838e-06, "loss": 1.182, "step": 1394 }, { "epoch": 0.18911407849251, "grad_norm": 10.044886600920535, "learning_rate": 1.870239141236747e-06, "loss": 1.25, "step": 1395 }, { "epoch": 0.18924964414017487, "grad_norm": 7.818813904051608, "learning_rate": 1.870022725036461e-06, "loss": 1.1735, "step": 1396 }, { "epoch": 0.18938520978783976, "grad_norm": 7.8830762043479385, "learning_rate": 1.8698061410591604e-06, "loss": 1.1787, "step": 1397 }, { "epoch": 0.18952077543550463, "grad_norm": 6.934544956277122, "learning_rate": 1.8695893893466108e-06, "loss": 1.1816, "step": 1398 }, { "epoch": 0.18965634108316953, "grad_norm": 6.416622367478746, "learning_rate": 1.869372469940612e-06, "loss": 1.2126, "step": 1399 }, { "epoch": 0.1897919067308344, "grad_norm": 6.720597345873249, "learning_rate": 1.8691553828829948e-06, "loss": 1.2062, "step": 1400 }, { "epoch": 0.1899274723784993, "grad_norm": 5.136151188804196, "learning_rate": 1.8689381282156222e-06, "loss": 1.1725, "step": 1401 }, { "epoch": 0.19006303802616417, "grad_norm": 19.48053031104807, "learning_rate": 1.868720705980391e-06, "loss": 1.2157, "step": 1402 }, { "epoch": 0.19019860367382904, "grad_norm": 6.515926645564021, "learning_rate": 1.8685031162192287e-06, "loss": 1.2337, "step": 1403 }, { "epoch": 0.19033416932149394, "grad_norm": 6.543179338921863, "learning_rate": 1.8682853589740962e-06, "loss": 1.1932, "step": 1404 }, { "epoch": 0.1904697349691588, "grad_norm": 5.375494354284686, "learning_rate": 1.8680674342869858e-06, "loss": 1.1975, "step": 1405 }, { "epoch": 0.1906053006168237, "grad_norm": 8.228904416167232, "learning_rate": 1.867849342199923e-06, "loss": 1.1894, "step": 1406 }, { "epoch": 0.19074086626448858, "grad_norm": 8.977405893099464, "learning_rate": 1.867631082754965e-06, "loss": 1.2087, "step": 1407 }, { "epoch": 0.19087643191215345, "grad_norm": 6.37663451339782, "learning_rate": 1.8674126559942009e-06, "loss": 1.2012, "step": 1408 }, { "epoch": 0.19101199755981835, "grad_norm": 4.691053170973936, "learning_rate": 1.8671940619597532e-06, "loss": 1.1762, "step": 1409 }, { "epoch": 0.19114756320748322, "grad_norm": 7.561476028846201, "learning_rate": 1.8669753006937762e-06, "loss": 1.229, "step": 1410 }, { "epoch": 0.19128312885514812, "grad_norm": 6.852050437659478, "learning_rate": 1.8667563722384559e-06, "loss": 1.1688, "step": 1411 }, { "epoch": 0.19141869450281299, "grad_norm": 9.320293166871284, "learning_rate": 1.8665372766360107e-06, "loss": 1.226, "step": 1412 }, { "epoch": 0.19155426015047786, "grad_norm": 6.514497224509165, "learning_rate": 1.866318013928692e-06, "loss": 1.1614, "step": 1413 }, { "epoch": 0.19168982579814275, "grad_norm": 21.875152266890606, "learning_rate": 1.8660985841587824e-06, "loss": 1.145, "step": 1414 }, { "epoch": 0.19182539144580762, "grad_norm": 7.402272371358957, "learning_rate": 1.8658789873685973e-06, "loss": 1.2203, "step": 1415 }, { "epoch": 0.19196095709347252, "grad_norm": 7.196502314297269, "learning_rate": 1.8656592236004847e-06, "loss": 1.1899, "step": 1416 }, { "epoch": 0.1920965227411374, "grad_norm": 4.765340808951759, "learning_rate": 1.8654392928968239e-06, "loss": 1.1979, "step": 1417 }, { "epoch": 0.1922320883888023, "grad_norm": 7.603968348708788, "learning_rate": 1.8652191953000265e-06, "loss": 1.1767, "step": 1418 }, { "epoch": 0.19236765403646716, "grad_norm": 5.76207774925471, "learning_rate": 1.864998930852537e-06, "loss": 1.1907, "step": 1419 }, { "epoch": 0.19250321968413203, "grad_norm": 4.209395264793149, "learning_rate": 1.8647784995968317e-06, "loss": 1.1437, "step": 1420 }, { "epoch": 0.19263878533179693, "grad_norm": 7.1431491602211326, "learning_rate": 1.8645579015754189e-06, "loss": 1.1914, "step": 1421 }, { "epoch": 0.1927743509794618, "grad_norm": 5.823816696946987, "learning_rate": 1.8643371368308389e-06, "loss": 1.2343, "step": 1422 }, { "epoch": 0.1929099166271267, "grad_norm": 7.359734692471507, "learning_rate": 1.8641162054056651e-06, "loss": 1.2087, "step": 1423 }, { "epoch": 0.19304548227479157, "grad_norm": 5.083065569872794, "learning_rate": 1.8638951073425018e-06, "loss": 1.1932, "step": 1424 }, { "epoch": 0.19318104792245644, "grad_norm": 6.92026331031309, "learning_rate": 1.8636738426839863e-06, "loss": 1.187, "step": 1425 }, { "epoch": 0.19331661357012134, "grad_norm": 6.209182605546703, "learning_rate": 1.8634524114727878e-06, "loss": 1.1911, "step": 1426 }, { "epoch": 0.1934521792177862, "grad_norm": 8.738648806254234, "learning_rate": 1.8632308137516071e-06, "loss": 1.2064, "step": 1427 }, { "epoch": 0.1935877448654511, "grad_norm": 7.238088996483906, "learning_rate": 1.8630090495631783e-06, "loss": 1.2471, "step": 1428 }, { "epoch": 0.19372331051311598, "grad_norm": 8.865280530902853, "learning_rate": 1.8627871189502662e-06, "loss": 1.2028, "step": 1429 }, { "epoch": 0.19385887616078085, "grad_norm": 5.89448158412717, "learning_rate": 1.8625650219556688e-06, "loss": 1.1951, "step": 1430 }, { "epoch": 0.19399444180844574, "grad_norm": 7.307968805743874, "learning_rate": 1.8623427586222154e-06, "loss": 1.1922, "step": 1431 }, { "epoch": 0.19413000745611061, "grad_norm": 5.741995775836488, "learning_rate": 1.8621203289927681e-06, "loss": 1.2422, "step": 1432 }, { "epoch": 0.1942655731037755, "grad_norm": 5.3197833551669635, "learning_rate": 1.8618977331102204e-06, "loss": 1.2095, "step": 1433 }, { "epoch": 0.19440113875144038, "grad_norm": 6.494152007478482, "learning_rate": 1.861674971017498e-06, "loss": 1.2009, "step": 1434 }, { "epoch": 0.19453670439910525, "grad_norm": 7.491292933941971, "learning_rate": 1.8614520427575596e-06, "loss": 1.1795, "step": 1435 }, { "epoch": 0.19467227004677015, "grad_norm": 8.107848126224381, "learning_rate": 1.8612289483733942e-06, "loss": 1.2032, "step": 1436 }, { "epoch": 0.19480783569443502, "grad_norm": 5.442743882878252, "learning_rate": 1.8610056879080247e-06, "loss": 1.1551, "step": 1437 }, { "epoch": 0.19494340134209992, "grad_norm": 6.804153149771637, "learning_rate": 1.8607822614045041e-06, "loss": 1.234, "step": 1438 }, { "epoch": 0.1950789669897648, "grad_norm": 10.700287629769816, "learning_rate": 1.8605586689059195e-06, "loss": 1.1511, "step": 1439 }, { "epoch": 0.1952145326374297, "grad_norm": 14.504484670146454, "learning_rate": 1.8603349104553882e-06, "loss": 1.1531, "step": 1440 }, { "epoch": 0.19535009828509456, "grad_norm": 5.050390005917245, "learning_rate": 1.8601109860960603e-06, "loss": 1.149, "step": 1441 }, { "epoch": 0.19548566393275943, "grad_norm": 21.55762248572399, "learning_rate": 1.8598868958711185e-06, "loss": 1.185, "step": 1442 }, { "epoch": 0.19562122958042433, "grad_norm": 8.57152583550541, "learning_rate": 1.8596626398237762e-06, "loss": 1.1839, "step": 1443 }, { "epoch": 0.1957567952280892, "grad_norm": 7.444593086097855, "learning_rate": 1.8594382179972794e-06, "loss": 1.1865, "step": 1444 }, { "epoch": 0.1958923608757541, "grad_norm": 9.360597592040643, "learning_rate": 1.8592136304349063e-06, "loss": 1.233, "step": 1445 }, { "epoch": 0.19602792652341897, "grad_norm": 8.898958950804078, "learning_rate": 1.8589888771799669e-06, "loss": 1.1977, "step": 1446 }, { "epoch": 0.19616349217108384, "grad_norm": 4.850286072007758, "learning_rate": 1.858763958275803e-06, "loss": 1.1951, "step": 1447 }, { "epoch": 0.19629905781874873, "grad_norm": 6.502181888645367, "learning_rate": 1.8585388737657883e-06, "loss": 1.2445, "step": 1448 }, { "epoch": 0.1964346234664136, "grad_norm": 8.188676698783384, "learning_rate": 1.8583136236933287e-06, "loss": 1.2198, "step": 1449 }, { "epoch": 0.1965701891140785, "grad_norm": 7.834120818319786, "learning_rate": 1.858088208101862e-06, "loss": 1.1886, "step": 1450 }, { "epoch": 0.19670575476174337, "grad_norm": 6.713549916269707, "learning_rate": 1.8578626270348576e-06, "loss": 1.1479, "step": 1451 }, { "epoch": 0.19684132040940824, "grad_norm": 12.95585592915156, "learning_rate": 1.8576368805358171e-06, "loss": 1.2508, "step": 1452 }, { "epoch": 0.19697688605707314, "grad_norm": 6.3633270115942935, "learning_rate": 1.857410968648274e-06, "loss": 1.1917, "step": 1453 }, { "epoch": 0.197112451704738, "grad_norm": 7.320866092374608, "learning_rate": 1.8571848914157938e-06, "loss": 1.1931, "step": 1454 }, { "epoch": 0.1972480173524029, "grad_norm": 4.90749864282696, "learning_rate": 1.8569586488819732e-06, "loss": 1.2142, "step": 1455 }, { "epoch": 0.19738358300006778, "grad_norm": 9.699080125099306, "learning_rate": 1.8567322410904416e-06, "loss": 1.18, "step": 1456 }, { "epoch": 0.19751914864773265, "grad_norm": 5.434036285851235, "learning_rate": 1.8565056680848602e-06, "loss": 1.2215, "step": 1457 }, { "epoch": 0.19765471429539755, "grad_norm": 9.109958491214607, "learning_rate": 1.8562789299089212e-06, "loss": 1.1805, "step": 1458 }, { "epoch": 0.19779027994306242, "grad_norm": 7.478387094394844, "learning_rate": 1.8560520266063497e-06, "loss": 1.1827, "step": 1459 }, { "epoch": 0.19792584559072732, "grad_norm": 5.571222715393924, "learning_rate": 1.8558249582209022e-06, "loss": 1.1764, "step": 1460 }, { "epoch": 0.1980614112383922, "grad_norm": 5.49590946384339, "learning_rate": 1.8555977247963673e-06, "loss": 1.18, "step": 1461 }, { "epoch": 0.19819697688605709, "grad_norm": 6.625237837566789, "learning_rate": 1.8553703263765646e-06, "loss": 1.191, "step": 1462 }, { "epoch": 0.19833254253372196, "grad_norm": 6.606138352255716, "learning_rate": 1.8551427630053463e-06, "loss": 1.1643, "step": 1463 }, { "epoch": 0.19846810818138683, "grad_norm": 7.239761484549548, "learning_rate": 1.854915034726596e-06, "loss": 1.1682, "step": 1464 }, { "epoch": 0.19860367382905172, "grad_norm": 14.986937711010235, "learning_rate": 1.8546871415842298e-06, "loss": 1.2334, "step": 1465 }, { "epoch": 0.1987392394767166, "grad_norm": 4.80099869861652, "learning_rate": 1.8544590836221947e-06, "loss": 1.1445, "step": 1466 }, { "epoch": 0.1988748051243815, "grad_norm": 5.359261551965813, "learning_rate": 1.8542308608844704e-06, "loss": 1.1947, "step": 1467 }, { "epoch": 0.19901037077204636, "grad_norm": 7.708908410184139, "learning_rate": 1.854002473415067e-06, "loss": 1.2025, "step": 1468 }, { "epoch": 0.19914593641971123, "grad_norm": 8.959199964940794, "learning_rate": 1.853773921258028e-06, "loss": 1.2084, "step": 1469 }, { "epoch": 0.19928150206737613, "grad_norm": 9.265388184436837, "learning_rate": 1.8535452044574274e-06, "loss": 1.2147, "step": 1470 }, { "epoch": 0.199417067715041, "grad_norm": 4.299878977203211, "learning_rate": 1.8533163230573716e-06, "loss": 1.2232, "step": 1471 }, { "epoch": 0.1995526333627059, "grad_norm": 10.723038593741354, "learning_rate": 1.8530872771019984e-06, "loss": 1.1987, "step": 1472 }, { "epoch": 0.19968819901037077, "grad_norm": 33.65138184963421, "learning_rate": 1.8528580666354782e-06, "loss": 1.2043, "step": 1473 }, { "epoch": 0.19982376465803564, "grad_norm": 6.765461650324087, "learning_rate": 1.8526286917020114e-06, "loss": 1.2028, "step": 1474 }, { "epoch": 0.19995933030570054, "grad_norm": 13.72916665435505, "learning_rate": 1.852399152345832e-06, "loss": 1.1641, "step": 1475 }, { "epoch": 0.2000948959533654, "grad_norm": 7.013501821189006, "learning_rate": 1.8521694486112045e-06, "loss": 1.1924, "step": 1476 }, { "epoch": 0.2002304616010303, "grad_norm": 5.000844863904866, "learning_rate": 1.851939580542425e-06, "loss": 1.1871, "step": 1477 }, { "epoch": 0.20036602724869518, "grad_norm": 10.124379201774563, "learning_rate": 1.8517095481838228e-06, "loss": 1.1904, "step": 1478 }, { "epoch": 0.20050159289636008, "grad_norm": 6.121635608039962, "learning_rate": 1.8514793515797567e-06, "loss": 1.2225, "step": 1479 }, { "epoch": 0.20063715854402495, "grad_norm": 11.559607491309519, "learning_rate": 1.8512489907746193e-06, "loss": 1.2165, "step": 1480 }, { "epoch": 0.20077272419168982, "grad_norm": 7.579722469011129, "learning_rate": 1.851018465812833e-06, "loss": 1.1773, "step": 1481 }, { "epoch": 0.20090828983935471, "grad_norm": 6.114588046095102, "learning_rate": 1.8507877767388531e-06, "loss": 1.2093, "step": 1482 }, { "epoch": 0.20104385548701958, "grad_norm": 5.063973437840123, "learning_rate": 1.8505569235971663e-06, "loss": 1.2018, "step": 1483 }, { "epoch": 0.20117942113468448, "grad_norm": 10.649097915305372, "learning_rate": 1.8503259064322907e-06, "loss": 1.2318, "step": 1484 }, { "epoch": 0.20131498678234935, "grad_norm": 9.435882934791785, "learning_rate": 1.8500947252887759e-06, "loss": 1.207, "step": 1485 }, { "epoch": 0.20145055243001422, "grad_norm": 5.158738474547966, "learning_rate": 1.8498633802112039e-06, "loss": 1.2134, "step": 1486 }, { "epoch": 0.20158611807767912, "grad_norm": 4.905945387106227, "learning_rate": 1.849631871244187e-06, "loss": 1.2063, "step": 1487 }, { "epoch": 0.201721683725344, "grad_norm": 7.518247662876749, "learning_rate": 1.8494001984323706e-06, "loss": 1.1683, "step": 1488 }, { "epoch": 0.2018572493730089, "grad_norm": 7.237046825969355, "learning_rate": 1.8491683618204307e-06, "loss": 1.2097, "step": 1489 }, { "epoch": 0.20199281502067376, "grad_norm": 4.426657515407172, "learning_rate": 1.848936361453075e-06, "loss": 1.1877, "step": 1490 }, { "epoch": 0.20212838066833863, "grad_norm": 6.5722479678087815, "learning_rate": 1.8487041973750434e-06, "loss": 1.1996, "step": 1491 }, { "epoch": 0.20226394631600353, "grad_norm": 6.69235883591684, "learning_rate": 1.8484718696311063e-06, "loss": 1.2316, "step": 1492 }, { "epoch": 0.2023995119636684, "grad_norm": 7.3029650558968955, "learning_rate": 1.8482393782660669e-06, "loss": 1.1941, "step": 1493 }, { "epoch": 0.2025350776113333, "grad_norm": 5.791204656956538, "learning_rate": 1.8480067233247584e-06, "loss": 1.1567, "step": 1494 }, { "epoch": 0.20267064325899817, "grad_norm": 23.238423653475827, "learning_rate": 1.8477739048520475e-06, "loss": 1.1584, "step": 1495 }, { "epoch": 0.20280620890666304, "grad_norm": 22.29003398493994, "learning_rate": 1.847540922892831e-06, "loss": 1.2151, "step": 1496 }, { "epoch": 0.20294177455432794, "grad_norm": 4.340020747652146, "learning_rate": 1.8473077774920377e-06, "loss": 1.194, "step": 1497 }, { "epoch": 0.2030773402019928, "grad_norm": 7.490099338868254, "learning_rate": 1.8470744686946276e-06, "loss": 1.1833, "step": 1498 }, { "epoch": 0.2032129058496577, "grad_norm": 6.138784961041676, "learning_rate": 1.8468409965455924e-06, "loss": 1.1763, "step": 1499 }, { "epoch": 0.20334847149732257, "grad_norm": 8.606926708349398, "learning_rate": 1.8466073610899557e-06, "loss": 1.1804, "step": 1500 }, { "epoch": 0.20348403714498747, "grad_norm": 7.64224545502102, "learning_rate": 1.846373562372772e-06, "loss": 1.1722, "step": 1501 }, { "epoch": 0.20361960279265234, "grad_norm": 8.348320030015321, "learning_rate": 1.846139600439128e-06, "loss": 1.2249, "step": 1502 }, { "epoch": 0.2037551684403172, "grad_norm": 7.560337215195306, "learning_rate": 1.845905475334141e-06, "loss": 1.2007, "step": 1503 }, { "epoch": 0.2038907340879821, "grad_norm": 6.447966759765013, "learning_rate": 1.84567118710296e-06, "loss": 1.2045, "step": 1504 }, { "epoch": 0.20402629973564698, "grad_norm": 6.516018282900846, "learning_rate": 1.8454367357907663e-06, "loss": 1.2416, "step": 1505 }, { "epoch": 0.20416186538331188, "grad_norm": 5.587544195182825, "learning_rate": 1.8452021214427713e-06, "loss": 1.2082, "step": 1506 }, { "epoch": 0.20429743103097675, "grad_norm": 6.6432236599938905, "learning_rate": 1.8449673441042188e-06, "loss": 1.2102, "step": 1507 }, { "epoch": 0.20443299667864162, "grad_norm": 5.189479305969716, "learning_rate": 1.8447324038203838e-06, "loss": 1.2317, "step": 1508 }, { "epoch": 0.20456856232630652, "grad_norm": 14.16861843861227, "learning_rate": 1.8444973006365724e-06, "loss": 1.1761, "step": 1509 }, { "epoch": 0.2047041279739714, "grad_norm": 5.147256125024215, "learning_rate": 1.844262034598123e-06, "loss": 1.2025, "step": 1510 }, { "epoch": 0.2048396936216363, "grad_norm": 4.525281409063017, "learning_rate": 1.8440266057504044e-06, "loss": 1.1941, "step": 1511 }, { "epoch": 0.20497525926930116, "grad_norm": 4.623837545453614, "learning_rate": 1.843791014138817e-06, "loss": 1.1505, "step": 1512 }, { "epoch": 0.20511082491696603, "grad_norm": 7.2646457952120675, "learning_rate": 1.843555259808793e-06, "loss": 1.2274, "step": 1513 }, { "epoch": 0.20524639056463093, "grad_norm": 14.956179501127183, "learning_rate": 1.8433193428057958e-06, "loss": 1.2034, "step": 1514 }, { "epoch": 0.2053819562122958, "grad_norm": 7.985147302256646, "learning_rate": 1.84308326317532e-06, "loss": 1.2013, "step": 1515 }, { "epoch": 0.2055175218599607, "grad_norm": 4.260653015466098, "learning_rate": 1.842847020962892e-06, "loss": 1.1987, "step": 1516 }, { "epoch": 0.20565308750762556, "grad_norm": 5.675293539619846, "learning_rate": 1.842610616214069e-06, "loss": 1.2049, "step": 1517 }, { "epoch": 0.20578865315529046, "grad_norm": 7.368561199451082, "learning_rate": 1.8423740489744399e-06, "loss": 1.2076, "step": 1518 }, { "epoch": 0.20592421880295533, "grad_norm": 6.876288125076287, "learning_rate": 1.8421373192896248e-06, "loss": 1.2036, "step": 1519 }, { "epoch": 0.2060597844506202, "grad_norm": 6.269482244863307, "learning_rate": 1.841900427205275e-06, "loss": 1.2127, "step": 1520 }, { "epoch": 0.2061953500982851, "grad_norm": 9.12652958592398, "learning_rate": 1.8416633727670732e-06, "loss": 1.1849, "step": 1521 }, { "epoch": 0.20633091574594997, "grad_norm": 9.122438715074699, "learning_rate": 1.8414261560207337e-06, "loss": 1.1798, "step": 1522 }, { "epoch": 0.20646648139361487, "grad_norm": 11.808848685780434, "learning_rate": 1.8411887770120021e-06, "loss": 1.19, "step": 1523 }, { "epoch": 0.20660204704127974, "grad_norm": 8.402353513732963, "learning_rate": 1.8409512357866546e-06, "loss": 1.2014, "step": 1524 }, { "epoch": 0.2067376126889446, "grad_norm": 5.781195216191159, "learning_rate": 1.8407135323904995e-06, "loss": 1.1452, "step": 1525 }, { "epoch": 0.2068731783366095, "grad_norm": 9.920305043384824, "learning_rate": 1.8404756668693758e-06, "loss": 1.1553, "step": 1526 }, { "epoch": 0.20700874398427438, "grad_norm": 5.067080849011703, "learning_rate": 1.8402376392691539e-06, "loss": 1.1912, "step": 1527 }, { "epoch": 0.20714430963193928, "grad_norm": 5.753306672166743, "learning_rate": 1.8399994496357359e-06, "loss": 1.22, "step": 1528 }, { "epoch": 0.20727987527960415, "grad_norm": 9.319347854444123, "learning_rate": 1.8397610980150544e-06, "loss": 1.213, "step": 1529 }, { "epoch": 0.20741544092726902, "grad_norm": 8.500256391853696, "learning_rate": 1.8395225844530738e-06, "loss": 1.1805, "step": 1530 }, { "epoch": 0.20755100657493392, "grad_norm": 5.4208650951349915, "learning_rate": 1.8392839089957897e-06, "loss": 1.1997, "step": 1531 }, { "epoch": 0.2076865722225988, "grad_norm": 8.881794417603254, "learning_rate": 1.8390450716892288e-06, "loss": 1.19, "step": 1532 }, { "epoch": 0.20782213787026368, "grad_norm": 17.313942185226875, "learning_rate": 1.8388060725794485e-06, "loss": 1.236, "step": 1533 }, { "epoch": 0.20795770351792855, "grad_norm": 7.054909277004141, "learning_rate": 1.8385669117125385e-06, "loss": 1.1812, "step": 1534 }, { "epoch": 0.20809326916559343, "grad_norm": 6.021223369599779, "learning_rate": 1.8383275891346186e-06, "loss": 1.226, "step": 1535 }, { "epoch": 0.20822883481325832, "grad_norm": 9.13351734700697, "learning_rate": 1.8380881048918404e-06, "loss": 1.178, "step": 1536 }, { "epoch": 0.2083644004609232, "grad_norm": 5.527163299057135, "learning_rate": 1.837848459030387e-06, "loss": 1.1717, "step": 1537 }, { "epoch": 0.2084999661085881, "grad_norm": 6.906268470560432, "learning_rate": 1.8376086515964716e-06, "loss": 1.1992, "step": 1538 }, { "epoch": 0.20863553175625296, "grad_norm": 4.505161209639852, "learning_rate": 1.8373686826363397e-06, "loss": 1.195, "step": 1539 }, { "epoch": 0.20877109740391786, "grad_norm": 12.188425420163746, "learning_rate": 1.837128552196267e-06, "loss": 1.1484, "step": 1540 }, { "epoch": 0.20890666305158273, "grad_norm": 6.275948066558004, "learning_rate": 1.8368882603225609e-06, "loss": 1.2161, "step": 1541 }, { "epoch": 0.2090422286992476, "grad_norm": 4.163479243887042, "learning_rate": 1.8366478070615596e-06, "loss": 1.2083, "step": 1542 }, { "epoch": 0.2091777943469125, "grad_norm": 11.142107230128596, "learning_rate": 1.8364071924596328e-06, "loss": 1.2003, "step": 1543 }, { "epoch": 0.20931335999457737, "grad_norm": 5.940487658160735, "learning_rate": 1.8361664165631817e-06, "loss": 1.1756, "step": 1544 }, { "epoch": 0.20944892564224227, "grad_norm": 4.8061682574182, "learning_rate": 1.8359254794186368e-06, "loss": 1.195, "step": 1545 }, { "epoch": 0.20958449128990714, "grad_norm": 8.574814139758866, "learning_rate": 1.835684381072462e-06, "loss": 1.2057, "step": 1546 }, { "epoch": 0.209720056937572, "grad_norm": 4.126601219934179, "learning_rate": 1.8354431215711506e-06, "loss": 1.1507, "step": 1547 }, { "epoch": 0.2098556225852369, "grad_norm": 7.380029872204336, "learning_rate": 1.8352017009612276e-06, "loss": 1.1516, "step": 1548 }, { "epoch": 0.20999118823290178, "grad_norm": 4.309624361866445, "learning_rate": 1.8349601192892498e-06, "loss": 1.1745, "step": 1549 }, { "epoch": 0.21012675388056667, "grad_norm": 5.492244463400137, "learning_rate": 1.8347183766018033e-06, "loss": 1.196, "step": 1550 }, { "epoch": 0.21026231952823154, "grad_norm": 10.035349975198057, "learning_rate": 1.8344764729455066e-06, "loss": 1.2163, "step": 1551 }, { "epoch": 0.21039788517589642, "grad_norm": 7.5195000699604675, "learning_rate": 1.8342344083670097e-06, "loss": 1.1936, "step": 1552 }, { "epoch": 0.2105334508235613, "grad_norm": 6.671834017845505, "learning_rate": 1.8339921829129916e-06, "loss": 1.1712, "step": 1553 }, { "epoch": 0.21066901647122618, "grad_norm": 9.986677530429093, "learning_rate": 1.8337497966301645e-06, "loss": 1.2362, "step": 1554 }, { "epoch": 0.21080458211889108, "grad_norm": 5.711383582864952, "learning_rate": 1.8335072495652702e-06, "loss": 1.1232, "step": 1555 }, { "epoch": 0.21094014776655595, "grad_norm": 15.38202802485951, "learning_rate": 1.8332645417650822e-06, "loss": 1.2226, "step": 1556 }, { "epoch": 0.21107571341422085, "grad_norm": 3.7320819544554045, "learning_rate": 1.8330216732764049e-06, "loss": 1.1942, "step": 1557 }, { "epoch": 0.21121127906188572, "grad_norm": 15.071128995150232, "learning_rate": 1.832778644146073e-06, "loss": 1.1826, "step": 1558 }, { "epoch": 0.2113468447095506, "grad_norm": 7.834688351690169, "learning_rate": 1.8325354544209532e-06, "loss": 1.1397, "step": 1559 }, { "epoch": 0.2114824103572155, "grad_norm": 4.756424035604894, "learning_rate": 1.832292104147943e-06, "loss": 1.1671, "step": 1560 }, { "epoch": 0.21161797600488036, "grad_norm": 5.1464606460349716, "learning_rate": 1.8320485933739697e-06, "loss": 1.2142, "step": 1561 }, { "epoch": 0.21175354165254526, "grad_norm": 5.18043217710164, "learning_rate": 1.8318049221459932e-06, "loss": 1.1721, "step": 1562 }, { "epoch": 0.21188910730021013, "grad_norm": 5.2423770988773235, "learning_rate": 1.8315610905110032e-06, "loss": 1.194, "step": 1563 }, { "epoch": 0.212024672947875, "grad_norm": 6.1310586899530355, "learning_rate": 1.8313170985160213e-06, "loss": 1.183, "step": 1564 }, { "epoch": 0.2121602385955399, "grad_norm": 3.848954495878705, "learning_rate": 1.8310729462080987e-06, "loss": 1.2279, "step": 1565 }, { "epoch": 0.21229580424320477, "grad_norm": 5.646301768847017, "learning_rate": 1.8308286336343183e-06, "loss": 1.2006, "step": 1566 }, { "epoch": 0.21243136989086966, "grad_norm": 8.33197528851251, "learning_rate": 1.8305841608417945e-06, "loss": 1.1756, "step": 1567 }, { "epoch": 0.21256693553853453, "grad_norm": 8.223276222681076, "learning_rate": 1.8303395278776712e-06, "loss": 1.2115, "step": 1568 }, { "epoch": 0.2127025011861994, "grad_norm": 8.988842701731256, "learning_rate": 1.830094734789124e-06, "loss": 1.207, "step": 1569 }, { "epoch": 0.2128380668338643, "grad_norm": 5.424575259089086, "learning_rate": 1.82984978162336e-06, "loss": 1.2097, "step": 1570 }, { "epoch": 0.21297363248152917, "grad_norm": 10.145558328037778, "learning_rate": 1.8296046684276161e-06, "loss": 1.226, "step": 1571 }, { "epoch": 0.21310919812919407, "grad_norm": 4.910645595815021, "learning_rate": 1.8293593952491602e-06, "loss": 1.1877, "step": 1572 }, { "epoch": 0.21324476377685894, "grad_norm": 6.864107694513582, "learning_rate": 1.8291139621352913e-06, "loss": 1.1744, "step": 1573 }, { "epoch": 0.2133803294245238, "grad_norm": 4.708893363718397, "learning_rate": 1.8288683691333398e-06, "loss": 1.1906, "step": 1574 }, { "epoch": 0.2135158950721887, "grad_norm": 4.798091956340871, "learning_rate": 1.8286226162906657e-06, "loss": 1.1949, "step": 1575 }, { "epoch": 0.21365146071985358, "grad_norm": 6.262670222805324, "learning_rate": 1.8283767036546612e-06, "loss": 1.2398, "step": 1576 }, { "epoch": 0.21378702636751848, "grad_norm": 4.245463827034619, "learning_rate": 1.8281306312727477e-06, "loss": 1.2198, "step": 1577 }, { "epoch": 0.21392259201518335, "grad_norm": 5.0074820411508645, "learning_rate": 1.8278843991923791e-06, "loss": 1.2187, "step": 1578 }, { "epoch": 0.21405815766284825, "grad_norm": 5.0837006817301225, "learning_rate": 1.8276380074610392e-06, "loss": 1.1545, "step": 1579 }, { "epoch": 0.21419372331051312, "grad_norm": 4.6518814742589525, "learning_rate": 1.8273914561262422e-06, "loss": 1.2075, "step": 1580 }, { "epoch": 0.214329288958178, "grad_norm": 3.9310690748367567, "learning_rate": 1.8271447452355343e-06, "loss": 1.1913, "step": 1581 }, { "epoch": 0.2144648546058429, "grad_norm": 14.751354560228467, "learning_rate": 1.826897874836491e-06, "loss": 1.2157, "step": 1582 }, { "epoch": 0.21460042025350776, "grad_norm": 4.368201747370056, "learning_rate": 1.8266508449767196e-06, "loss": 1.1822, "step": 1583 }, { "epoch": 0.21473598590117265, "grad_norm": 4.542378904812186, "learning_rate": 1.8264036557038581e-06, "loss": 1.1898, "step": 1584 }, { "epoch": 0.21487155154883752, "grad_norm": 4.999037402323875, "learning_rate": 1.826156307065575e-06, "loss": 1.1805, "step": 1585 }, { "epoch": 0.2150071171965024, "grad_norm": 4.390175260738812, "learning_rate": 1.8259087991095692e-06, "loss": 1.1825, "step": 1586 }, { "epoch": 0.2151426828441673, "grad_norm": 4.806920806337751, "learning_rate": 1.8256611318835709e-06, "loss": 1.1671, "step": 1587 }, { "epoch": 0.21527824849183216, "grad_norm": 4.027740948385196, "learning_rate": 1.8254133054353406e-06, "loss": 1.1759, "step": 1588 }, { "epoch": 0.21541381413949706, "grad_norm": 5.898605752033014, "learning_rate": 1.8251653198126697e-06, "loss": 1.1757, "step": 1589 }, { "epoch": 0.21554937978716193, "grad_norm": 22.12039808950255, "learning_rate": 1.8249171750633808e-06, "loss": 1.1443, "step": 1590 }, { "epoch": 0.2156849454348268, "grad_norm": 8.21789671744646, "learning_rate": 1.8246688712353256e-06, "loss": 1.2186, "step": 1591 }, { "epoch": 0.2158205110824917, "grad_norm": 4.422269236539888, "learning_rate": 1.8244204083763886e-06, "loss": 1.2049, "step": 1592 }, { "epoch": 0.21595607673015657, "grad_norm": 5.510109959686233, "learning_rate": 1.824171786534483e-06, "loss": 1.1701, "step": 1593 }, { "epoch": 0.21609164237782147, "grad_norm": 4.392486595149967, "learning_rate": 1.823923005757554e-06, "loss": 1.1989, "step": 1594 }, { "epoch": 0.21622720802548634, "grad_norm": 16.651370746886204, "learning_rate": 1.8236740660935772e-06, "loss": 1.151, "step": 1595 }, { "epoch": 0.2163627736731512, "grad_norm": 7.1671734045054185, "learning_rate": 1.8234249675905584e-06, "loss": 1.2324, "step": 1596 }, { "epoch": 0.2164983393208161, "grad_norm": 4.360809657164942, "learning_rate": 1.8231757102965343e-06, "loss": 1.1726, "step": 1597 }, { "epoch": 0.21663390496848098, "grad_norm": 4.061801628006119, "learning_rate": 1.8229262942595724e-06, "loss": 1.1973, "step": 1598 }, { "epoch": 0.21676947061614588, "grad_norm": 4.893849284195079, "learning_rate": 1.8226767195277702e-06, "loss": 1.1794, "step": 1599 }, { "epoch": 0.21690503626381075, "grad_norm": 4.6991411533368055, "learning_rate": 1.8224269861492565e-06, "loss": 1.1875, "step": 1600 }, { "epoch": 0.21704060191147564, "grad_norm": 4.961539191027219, "learning_rate": 1.8221770941721904e-06, "loss": 1.1653, "step": 1601 }, { "epoch": 0.21717616755914051, "grad_norm": 4.403436902510595, "learning_rate": 1.8219270436447615e-06, "loss": 1.1908, "step": 1602 }, { "epoch": 0.21731173320680539, "grad_norm": 5.3039955483227645, "learning_rate": 1.8216768346151904e-06, "loss": 1.1978, "step": 1603 }, { "epoch": 0.21744729885447028, "grad_norm": 5.605208499402525, "learning_rate": 1.8214264671317272e-06, "loss": 1.1763, "step": 1604 }, { "epoch": 0.21758286450213515, "grad_norm": 6.543018549101455, "learning_rate": 1.821175941242654e-06, "loss": 1.1875, "step": 1605 }, { "epoch": 0.21771843014980005, "grad_norm": 4.624342861875911, "learning_rate": 1.8209252569962828e-06, "loss": 1.2179, "step": 1606 }, { "epoch": 0.21785399579746492, "grad_norm": 5.431526751763906, "learning_rate": 1.8206744144409553e-06, "loss": 1.1832, "step": 1607 }, { "epoch": 0.2179895614451298, "grad_norm": 13.798078808391887, "learning_rate": 1.8204234136250452e-06, "loss": 1.1935, "step": 1608 }, { "epoch": 0.2181251270927947, "grad_norm": 5.9395195503889004, "learning_rate": 1.8201722545969557e-06, "loss": 1.1772, "step": 1609 }, { "epoch": 0.21826069274045956, "grad_norm": 4.5399700853145015, "learning_rate": 1.8199209374051212e-06, "loss": 1.1785, "step": 1610 }, { "epoch": 0.21839625838812446, "grad_norm": 6.4894642046602025, "learning_rate": 1.8196694620980058e-06, "loss": 1.1888, "step": 1611 }, { "epoch": 0.21853182403578933, "grad_norm": 5.302478110453357, "learning_rate": 1.8194178287241047e-06, "loss": 1.2073, "step": 1612 }, { "epoch": 0.2186673896834542, "grad_norm": 7.373823445965018, "learning_rate": 1.8191660373319433e-06, "loss": 1.1852, "step": 1613 }, { "epoch": 0.2188029553311191, "grad_norm": 6.422942897669411, "learning_rate": 1.8189140879700779e-06, "loss": 1.16, "step": 1614 }, { "epoch": 0.21893852097878397, "grad_norm": 4.199755735344384, "learning_rate": 1.818661980687095e-06, "loss": 1.1899, "step": 1615 }, { "epoch": 0.21907408662644887, "grad_norm": 13.477354650529087, "learning_rate": 1.8184097155316108e-06, "loss": 1.218, "step": 1616 }, { "epoch": 0.21920965227411374, "grad_norm": 6.836138550002931, "learning_rate": 1.8181572925522732e-06, "loss": 1.1755, "step": 1617 }, { "epoch": 0.21934521792177863, "grad_norm": 9.37498671031086, "learning_rate": 1.81790471179776e-06, "loss": 1.1624, "step": 1618 }, { "epoch": 0.2194807835694435, "grad_norm": 5.686160370015602, "learning_rate": 1.8176519733167792e-06, "loss": 1.1641, "step": 1619 }, { "epoch": 0.21961634921710838, "grad_norm": 6.5598653178613935, "learning_rate": 1.8173990771580694e-06, "loss": 1.1859, "step": 1620 }, { "epoch": 0.21975191486477327, "grad_norm": 4.280385562843226, "learning_rate": 1.8171460233704e-06, "loss": 1.1922, "step": 1621 }, { "epoch": 0.21988748051243814, "grad_norm": 6.11359550122151, "learning_rate": 1.8168928120025698e-06, "loss": 1.1988, "step": 1622 }, { "epoch": 0.22002304616010304, "grad_norm": 4.867517431614315, "learning_rate": 1.816639443103409e-06, "loss": 1.2051, "step": 1623 }, { "epoch": 0.2201586118077679, "grad_norm": 6.494647737382101, "learning_rate": 1.8163859167217778e-06, "loss": 1.2078, "step": 1624 }, { "epoch": 0.22029417745543278, "grad_norm": 4.092708349735404, "learning_rate": 1.816132232906567e-06, "loss": 1.1829, "step": 1625 }, { "epoch": 0.22042974310309768, "grad_norm": 5.577206283089058, "learning_rate": 1.815878391706697e-06, "loss": 1.173, "step": 1626 }, { "epoch": 0.22056530875076255, "grad_norm": 4.3016235036960975, "learning_rate": 1.8156243931711194e-06, "loss": 1.1674, "step": 1627 }, { "epoch": 0.22070087439842745, "grad_norm": 4.771604415579226, "learning_rate": 1.8153702373488157e-06, "loss": 1.1657, "step": 1628 }, { "epoch": 0.22083644004609232, "grad_norm": 7.9750945526933315, "learning_rate": 1.815115924288798e-06, "loss": 1.2001, "step": 1629 }, { "epoch": 0.2209720056937572, "grad_norm": 9.459448503265762, "learning_rate": 1.8148614540401082e-06, "loss": 1.177, "step": 1630 }, { "epoch": 0.2211075713414221, "grad_norm": 6.279733540098949, "learning_rate": 1.8146068266518193e-06, "loss": 1.1696, "step": 1631 }, { "epoch": 0.22124313698908696, "grad_norm": 5.7539933495687965, "learning_rate": 1.8143520421730338e-06, "loss": 1.1861, "step": 1632 }, { "epoch": 0.22137870263675186, "grad_norm": 4.727213379562076, "learning_rate": 1.8140971006528854e-06, "loss": 1.1907, "step": 1633 }, { "epoch": 0.22151426828441673, "grad_norm": 4.537830255994021, "learning_rate": 1.8138420021405367e-06, "loss": 1.1577, "step": 1634 }, { "epoch": 0.2216498339320816, "grad_norm": 10.91042097601027, "learning_rate": 1.8135867466851824e-06, "loss": 1.2028, "step": 1635 }, { "epoch": 0.2217853995797465, "grad_norm": 6.18859225942091, "learning_rate": 1.813331334336046e-06, "loss": 1.1671, "step": 1636 }, { "epoch": 0.22192096522741137, "grad_norm": 4.245437134654883, "learning_rate": 1.8130757651423817e-06, "loss": 1.188, "step": 1637 }, { "epoch": 0.22205653087507626, "grad_norm": 6.7731059433678285, "learning_rate": 1.812820039153474e-06, "loss": 1.141, "step": 1638 }, { "epoch": 0.22219209652274113, "grad_norm": 7.491647820876794, "learning_rate": 1.812564156418638e-06, "loss": 1.1799, "step": 1639 }, { "epoch": 0.22232766217040603, "grad_norm": 5.623559742812027, "learning_rate": 1.8123081169872184e-06, "loss": 1.2208, "step": 1640 }, { "epoch": 0.2224632278180709, "grad_norm": 5.028184991147166, "learning_rate": 1.8120519209085905e-06, "loss": 1.1718, "step": 1641 }, { "epoch": 0.22259879346573577, "grad_norm": 4.700446631472624, "learning_rate": 1.8117955682321594e-06, "loss": 1.2082, "step": 1642 }, { "epoch": 0.22273435911340067, "grad_norm": 4.339427112414726, "learning_rate": 1.811539059007361e-06, "loss": 1.1886, "step": 1643 }, { "epoch": 0.22286992476106554, "grad_norm": 5.770252270157829, "learning_rate": 1.8112823932836609e-06, "loss": 1.1682, "step": 1644 }, { "epoch": 0.22300549040873044, "grad_norm": 5.3274533184076835, "learning_rate": 1.8110255711105552e-06, "loss": 1.1889, "step": 1645 }, { "epoch": 0.2231410560563953, "grad_norm": 4.484413626572553, "learning_rate": 1.81076859253757e-06, "loss": 1.1669, "step": 1646 }, { "epoch": 0.22327662170406018, "grad_norm": 10.5095086162285, "learning_rate": 1.8105114576142615e-06, "loss": 1.2155, "step": 1647 }, { "epoch": 0.22341218735172508, "grad_norm": 14.76653199644871, "learning_rate": 1.810254166390216e-06, "loss": 1.1996, "step": 1648 }, { "epoch": 0.22354775299938995, "grad_norm": 4.754571430650666, "learning_rate": 1.8099967189150505e-06, "loss": 1.1904, "step": 1649 }, { "epoch": 0.22368331864705485, "grad_norm": 5.6398579601880146, "learning_rate": 1.8097391152384113e-06, "loss": 1.1947, "step": 1650 }, { "epoch": 0.22381888429471972, "grad_norm": 4.843641248289472, "learning_rate": 1.8094813554099754e-06, "loss": 1.1782, "step": 1651 }, { "epoch": 0.2239544499423846, "grad_norm": 9.465928939466913, "learning_rate": 1.80922343947945e-06, "loss": 1.1677, "step": 1652 }, { "epoch": 0.22409001559004949, "grad_norm": 5.97710967127237, "learning_rate": 1.808965367496572e-06, "loss": 1.1816, "step": 1653 }, { "epoch": 0.22422558123771436, "grad_norm": 7.213423174702149, "learning_rate": 1.808707139511108e-06, "loss": 1.2123, "step": 1654 }, { "epoch": 0.22436114688537925, "grad_norm": 5.931587373487585, "learning_rate": 1.808448755572856e-06, "loss": 1.1922, "step": 1655 }, { "epoch": 0.22449671253304412, "grad_norm": 4.8410498281836585, "learning_rate": 1.808190215731643e-06, "loss": 1.195, "step": 1656 }, { "epoch": 0.22463227818070902, "grad_norm": 6.265950431342789, "learning_rate": 1.8079315200373265e-06, "loss": 1.2064, "step": 1657 }, { "epoch": 0.2247678438283739, "grad_norm": 4.5367796842676995, "learning_rate": 1.8076726685397934e-06, "loss": 1.2497, "step": 1658 }, { "epoch": 0.22490340947603876, "grad_norm": 5.00113574410056, "learning_rate": 1.8074136612889619e-06, "loss": 1.1886, "step": 1659 }, { "epoch": 0.22503897512370366, "grad_norm": 4.885290286154746, "learning_rate": 1.8071544983347791e-06, "loss": 1.2183, "step": 1660 }, { "epoch": 0.22517454077136853, "grad_norm": 5.624394352543804, "learning_rate": 1.8068951797272222e-06, "loss": 1.1842, "step": 1661 }, { "epoch": 0.22531010641903343, "grad_norm": 4.580101127557773, "learning_rate": 1.8066357055162994e-06, "loss": 1.1794, "step": 1662 }, { "epoch": 0.2254456720666983, "grad_norm": 4.882607185537793, "learning_rate": 1.8063760757520483e-06, "loss": 1.1738, "step": 1663 }, { "epoch": 0.22558123771436317, "grad_norm": 7.502053809091721, "learning_rate": 1.8061162904845356e-06, "loss": 1.1425, "step": 1664 }, { "epoch": 0.22571680336202807, "grad_norm": 7.892219699440379, "learning_rate": 1.80585634976386e-06, "loss": 1.1834, "step": 1665 }, { "epoch": 0.22585236900969294, "grad_norm": 4.858914925033696, "learning_rate": 1.8055962536401479e-06, "loss": 1.2198, "step": 1666 }, { "epoch": 0.22598793465735784, "grad_norm": 6.156055678385086, "learning_rate": 1.8053360021635572e-06, "loss": 1.2118, "step": 1667 }, { "epoch": 0.2261235003050227, "grad_norm": 4.865545709203594, "learning_rate": 1.8050755953842757e-06, "loss": 1.195, "step": 1668 }, { "epoch": 0.22625906595268758, "grad_norm": 4.420162383954534, "learning_rate": 1.8048150333525206e-06, "loss": 1.186, "step": 1669 }, { "epoch": 0.22639463160035248, "grad_norm": 4.211623409938759, "learning_rate": 1.8045543161185388e-06, "loss": 1.205, "step": 1670 }, { "epoch": 0.22653019724801735, "grad_norm": 5.974200400513421, "learning_rate": 1.8042934437326082e-06, "loss": 1.2039, "step": 1671 }, { "epoch": 0.22666576289568224, "grad_norm": 4.714769781143718, "learning_rate": 1.8040324162450355e-06, "loss": 1.1862, "step": 1672 }, { "epoch": 0.2268013285433471, "grad_norm": 17.24965573970959, "learning_rate": 1.8037712337061582e-06, "loss": 1.2238, "step": 1673 }, { "epoch": 0.22693689419101198, "grad_norm": 8.073528915283307, "learning_rate": 1.803509896166343e-06, "loss": 1.2106, "step": 1674 }, { "epoch": 0.22707245983867688, "grad_norm": 5.197609447957254, "learning_rate": 1.8032484036759866e-06, "loss": 1.1617, "step": 1675 }, { "epoch": 0.22720802548634175, "grad_norm": 4.568991492465655, "learning_rate": 1.8029867562855161e-06, "loss": 1.1635, "step": 1676 }, { "epoch": 0.22734359113400665, "grad_norm": 34.35548257055383, "learning_rate": 1.8027249540453878e-06, "loss": 1.185, "step": 1677 }, { "epoch": 0.22747915678167152, "grad_norm": 5.299251077725795, "learning_rate": 1.802462997006089e-06, "loss": 1.2275, "step": 1678 }, { "epoch": 0.22761472242933642, "grad_norm": 7.285876379210854, "learning_rate": 1.8022008852181351e-06, "loss": 1.2057, "step": 1679 }, { "epoch": 0.2277502880770013, "grad_norm": 5.169748859338357, "learning_rate": 1.801938618732073e-06, "loss": 1.163, "step": 1680 }, { "epoch": 0.22788585372466616, "grad_norm": 6.938577627388036, "learning_rate": 1.801676197598478e-06, "loss": 1.1899, "step": 1681 }, { "epoch": 0.22802141937233106, "grad_norm": 10.855123504501483, "learning_rate": 1.8014136218679566e-06, "loss": 1.2039, "step": 1682 }, { "epoch": 0.22815698501999593, "grad_norm": 5.5478602646523925, "learning_rate": 1.8011508915911441e-06, "loss": 1.1448, "step": 1683 }, { "epoch": 0.22829255066766083, "grad_norm": 4.179114535530078, "learning_rate": 1.800888006818706e-06, "loss": 1.1907, "step": 1684 }, { "epoch": 0.2284281163153257, "grad_norm": 13.603946701972417, "learning_rate": 1.8006249676013377e-06, "loss": 1.1936, "step": 1685 }, { "epoch": 0.22856368196299057, "grad_norm": 4.014573455807594, "learning_rate": 1.8003617739897642e-06, "loss": 1.1584, "step": 1686 }, { "epoch": 0.22869924761065547, "grad_norm": 4.398345503750402, "learning_rate": 1.8000984260347401e-06, "loss": 1.1762, "step": 1687 }, { "epoch": 0.22883481325832034, "grad_norm": 5.344704853726422, "learning_rate": 1.7998349237870506e-06, "loss": 1.1831, "step": 1688 }, { "epoch": 0.22897037890598523, "grad_norm": 11.717463754579159, "learning_rate": 1.7995712672975088e-06, "loss": 1.1844, "step": 1689 }, { "epoch": 0.2291059445536501, "grad_norm": 5.55809195272182, "learning_rate": 1.79930745661696e-06, "loss": 1.1924, "step": 1690 }, { "epoch": 0.22924151020131497, "grad_norm": 4.685946447469732, "learning_rate": 1.7990434917962776e-06, "loss": 1.1841, "step": 1691 }, { "epoch": 0.22937707584897987, "grad_norm": 5.550128305641103, "learning_rate": 1.7987793728863649e-06, "loss": 1.1514, "step": 1692 }, { "epoch": 0.22951264149664474, "grad_norm": 5.648277244451273, "learning_rate": 1.7985150999381553e-06, "loss": 1.1446, "step": 1693 }, { "epoch": 0.22964820714430964, "grad_norm": 4.12722498595527, "learning_rate": 1.798250673002612e-06, "loss": 1.1638, "step": 1694 }, { "epoch": 0.2297837727919745, "grad_norm": 5.035768808608428, "learning_rate": 1.797986092130727e-06, "loss": 1.1939, "step": 1695 }, { "epoch": 0.2299193384396394, "grad_norm": 4.123529278506786, "learning_rate": 1.7977213573735234e-06, "loss": 1.1414, "step": 1696 }, { "epoch": 0.23005490408730428, "grad_norm": 4.426238107995662, "learning_rate": 1.7974564687820526e-06, "loss": 1.2075, "step": 1697 }, { "epoch": 0.23019046973496915, "grad_norm": 17.83496552491643, "learning_rate": 1.7971914264073967e-06, "loss": 1.1818, "step": 1698 }, { "epoch": 0.23032603538263405, "grad_norm": 6.481430655322452, "learning_rate": 1.796926230300667e-06, "loss": 1.2014, "step": 1699 }, { "epoch": 0.23046160103029892, "grad_norm": 4.351151445624022, "learning_rate": 1.7966608805130043e-06, "loss": 1.1743, "step": 1700 }, { "epoch": 0.23059716667796382, "grad_norm": 4.578350108787919, "learning_rate": 1.7963953770955791e-06, "loss": 1.1542, "step": 1701 }, { "epoch": 0.2307327323256287, "grad_norm": 4.341231873326, "learning_rate": 1.7961297200995917e-06, "loss": 1.1628, "step": 1702 }, { "epoch": 0.23086829797329356, "grad_norm": 6.806505439760908, "learning_rate": 1.7958639095762722e-06, "loss": 1.1543, "step": 1703 }, { "epoch": 0.23100386362095846, "grad_norm": 5.811940564989657, "learning_rate": 1.79559794557688e-06, "loss": 1.1816, "step": 1704 }, { "epoch": 0.23113942926862333, "grad_norm": 10.073564365662135, "learning_rate": 1.795331828152704e-06, "loss": 1.1465, "step": 1705 }, { "epoch": 0.23127499491628822, "grad_norm": 4.568117084277955, "learning_rate": 1.7950655573550627e-06, "loss": 1.1909, "step": 1706 }, { "epoch": 0.2314105605639531, "grad_norm": 6.966464764354197, "learning_rate": 1.7947991332353048e-06, "loss": 1.2048, "step": 1707 }, { "epoch": 0.23154612621161796, "grad_norm": 4.5995088841794765, "learning_rate": 1.7945325558448078e-06, "loss": 1.1706, "step": 1708 }, { "epoch": 0.23168169185928286, "grad_norm": 4.464947398960954, "learning_rate": 1.7942658252349787e-06, "loss": 1.1798, "step": 1709 }, { "epoch": 0.23181725750694773, "grad_norm": 4.93815290701972, "learning_rate": 1.7939989414572552e-06, "loss": 1.1693, "step": 1710 }, { "epoch": 0.23195282315461263, "grad_norm": 5.18465658614975, "learning_rate": 1.7937319045631032e-06, "loss": 1.1635, "step": 1711 }, { "epoch": 0.2320883888022775, "grad_norm": 6.1903959086995854, "learning_rate": 1.7934647146040185e-06, "loss": 1.1726, "step": 1712 }, { "epoch": 0.23222395444994237, "grad_norm": 6.014621591614925, "learning_rate": 1.793197371631527e-06, "loss": 1.1582, "step": 1713 }, { "epoch": 0.23235952009760727, "grad_norm": 3.5891163995585997, "learning_rate": 1.7929298756971836e-06, "loss": 1.2044, "step": 1714 }, { "epoch": 0.23249508574527214, "grad_norm": 9.573567071852368, "learning_rate": 1.7926622268525725e-06, "loss": 1.1692, "step": 1715 }, { "epoch": 0.23263065139293704, "grad_norm": 6.426043238137347, "learning_rate": 1.792394425149308e-06, "loss": 1.1601, "step": 1716 }, { "epoch": 0.2327662170406019, "grad_norm": 4.294199851726696, "learning_rate": 1.792126470639033e-06, "loss": 1.159, "step": 1717 }, { "epoch": 0.2329017826882668, "grad_norm": 5.242946046760549, "learning_rate": 1.7918583633734212e-06, "loss": 1.1487, "step": 1718 }, { "epoch": 0.23303734833593168, "grad_norm": 4.0998576191422105, "learning_rate": 1.7915901034041744e-06, "loss": 1.1853, "step": 1719 }, { "epoch": 0.23317291398359655, "grad_norm": 4.949561716917422, "learning_rate": 1.7913216907830248e-06, "loss": 1.201, "step": 1720 }, { "epoch": 0.23330847963126145, "grad_norm": 14.696288781779511, "learning_rate": 1.7910531255617332e-06, "loss": 1.1695, "step": 1721 }, { "epoch": 0.23344404527892632, "grad_norm": 6.872059141397177, "learning_rate": 1.7907844077920905e-06, "loss": 1.2195, "step": 1722 }, { "epoch": 0.2335796109265912, "grad_norm": 5.126333256676335, "learning_rate": 1.790515537525917e-06, "loss": 1.1848, "step": 1723 }, { "epoch": 0.23371517657425608, "grad_norm": 5.060231667949659, "learning_rate": 1.7902465148150623e-06, "loss": 1.1549, "step": 1724 }, { "epoch": 0.23385074222192095, "grad_norm": 3.597669869393725, "learning_rate": 1.7899773397114046e-06, "loss": 1.1841, "step": 1725 }, { "epoch": 0.23398630786958585, "grad_norm": 7.603862808360256, "learning_rate": 1.789708012266853e-06, "loss": 1.2134, "step": 1726 }, { "epoch": 0.23412187351725072, "grad_norm": 3.9847589434945774, "learning_rate": 1.7894385325333444e-06, "loss": 1.1399, "step": 1727 }, { "epoch": 0.23425743916491562, "grad_norm": 8.317892957245556, "learning_rate": 1.7891689005628466e-06, "loss": 1.2227, "step": 1728 }, { "epoch": 0.2343930048125805, "grad_norm": 5.896939866930044, "learning_rate": 1.7888991164073554e-06, "loss": 1.2055, "step": 1729 }, { "epoch": 0.23452857046024536, "grad_norm": 5.458421432289499, "learning_rate": 1.7886291801188968e-06, "loss": 1.2091, "step": 1730 }, { "epoch": 0.23466413610791026, "grad_norm": 7.144837598607754, "learning_rate": 1.788359091749526e-06, "loss": 1.2453, "step": 1731 }, { "epoch": 0.23479970175557513, "grad_norm": 4.648256772920857, "learning_rate": 1.7880888513513272e-06, "loss": 1.1815, "step": 1732 }, { "epoch": 0.23493526740324003, "grad_norm": 6.954412877892294, "learning_rate": 1.7878184589764142e-06, "loss": 1.216, "step": 1733 }, { "epoch": 0.2350708330509049, "grad_norm": 5.2155171347744265, "learning_rate": 1.7875479146769303e-06, "loss": 1.1705, "step": 1734 }, { "epoch": 0.23520639869856977, "grad_norm": 6.763577826113547, "learning_rate": 1.7872772185050474e-06, "loss": 1.1836, "step": 1735 }, { "epoch": 0.23534196434623467, "grad_norm": 3.721759045998633, "learning_rate": 1.7870063705129672e-06, "loss": 1.183, "step": 1736 }, { "epoch": 0.23547752999389954, "grad_norm": 11.823721666837839, "learning_rate": 1.786735370752921e-06, "loss": 1.2071, "step": 1737 }, { "epoch": 0.23561309564156444, "grad_norm": 5.264638307921382, "learning_rate": 1.7864642192771683e-06, "loss": 1.1875, "step": 1738 }, { "epoch": 0.2357486612892293, "grad_norm": 4.376871836975706, "learning_rate": 1.786192916137999e-06, "loss": 1.1967, "step": 1739 }, { "epoch": 0.2358842269368942, "grad_norm": 5.5042696557573265, "learning_rate": 1.7859214613877316e-06, "loss": 1.1632, "step": 1740 }, { "epoch": 0.23601979258455907, "grad_norm": 13.629529493314926, "learning_rate": 1.7856498550787141e-06, "loss": 1.1767, "step": 1741 }, { "epoch": 0.23615535823222394, "grad_norm": 5.11670922494752, "learning_rate": 1.7853780972633239e-06, "loss": 1.1617, "step": 1742 }, { "epoch": 0.23629092387988884, "grad_norm": 4.367923748458888, "learning_rate": 1.7851061879939669e-06, "loss": 1.1927, "step": 1743 }, { "epoch": 0.2364264895275537, "grad_norm": 5.368505221210739, "learning_rate": 1.7848341273230786e-06, "loss": 1.2052, "step": 1744 }, { "epoch": 0.2365620551752186, "grad_norm": 5.371951547578429, "learning_rate": 1.784561915303124e-06, "loss": 1.169, "step": 1745 }, { "epoch": 0.23669762082288348, "grad_norm": 5.779855395385629, "learning_rate": 1.784289551986597e-06, "loss": 1.1899, "step": 1746 }, { "epoch": 0.23683318647054835, "grad_norm": 7.818561452996879, "learning_rate": 1.7840170374260206e-06, "loss": 1.1822, "step": 1747 }, { "epoch": 0.23696875211821325, "grad_norm": 6.679329191772715, "learning_rate": 1.7837443716739474e-06, "loss": 1.1566, "step": 1748 }, { "epoch": 0.23710431776587812, "grad_norm": 10.572245849195212, "learning_rate": 1.7834715547829584e-06, "loss": 1.1858, "step": 1749 }, { "epoch": 0.23723988341354302, "grad_norm": 4.238622395827147, "learning_rate": 1.7831985868056646e-06, "loss": 1.1483, "step": 1750 }, { "epoch": 0.2373754490612079, "grad_norm": 8.411858028330881, "learning_rate": 1.7829254677947054e-06, "loss": 1.1944, "step": 1751 }, { "epoch": 0.23751101470887276, "grad_norm": 5.208480489015468, "learning_rate": 1.7826521978027499e-06, "loss": 1.1557, "step": 1752 }, { "epoch": 0.23764658035653766, "grad_norm": 3.3451923501162293, "learning_rate": 1.7823787768824958e-06, "loss": 1.2059, "step": 1753 }, { "epoch": 0.23778214600420253, "grad_norm": 6.1735750675047365, "learning_rate": 1.7821052050866703e-06, "loss": 1.1968, "step": 1754 }, { "epoch": 0.23791771165186743, "grad_norm": 5.5426840437892295, "learning_rate": 1.7818314824680298e-06, "loss": 1.2144, "step": 1755 }, { "epoch": 0.2380532772995323, "grad_norm": 4.933143301608345, "learning_rate": 1.7815576090793592e-06, "loss": 1.1406, "step": 1756 }, { "epoch": 0.2381888429471972, "grad_norm": 4.939277925806625, "learning_rate": 1.781283584973473e-06, "loss": 1.2165, "step": 1757 }, { "epoch": 0.23832440859486206, "grad_norm": 4.900799356910527, "learning_rate": 1.781009410203214e-06, "loss": 1.1371, "step": 1758 }, { "epoch": 0.23845997424252693, "grad_norm": 7.138740382243508, "learning_rate": 1.7807350848214557e-06, "loss": 1.1904, "step": 1759 }, { "epoch": 0.23859553989019183, "grad_norm": 5.163286858620985, "learning_rate": 1.780460608881099e-06, "loss": 1.2138, "step": 1760 }, { "epoch": 0.2387311055378567, "grad_norm": 5.557497245064533, "learning_rate": 1.7801859824350743e-06, "loss": 1.1921, "step": 1761 }, { "epoch": 0.2388666711855216, "grad_norm": 4.806642875723844, "learning_rate": 1.7799112055363415e-06, "loss": 1.1368, "step": 1762 }, { "epoch": 0.23900223683318647, "grad_norm": 3.9845396867198897, "learning_rate": 1.7796362782378887e-06, "loss": 1.1684, "step": 1763 }, { "epoch": 0.23913780248085134, "grad_norm": 4.258544505060423, "learning_rate": 1.7793612005927337e-06, "loss": 1.1971, "step": 1764 }, { "epoch": 0.23927336812851624, "grad_norm": 3.552671088462182, "learning_rate": 1.7790859726539232e-06, "loss": 1.1488, "step": 1765 }, { "epoch": 0.2394089337761811, "grad_norm": 19.36551931052588, "learning_rate": 1.7788105944745325e-06, "loss": 1.1675, "step": 1766 }, { "epoch": 0.239544499423846, "grad_norm": 4.333477709947493, "learning_rate": 1.7785350661076663e-06, "loss": 1.1668, "step": 1767 }, { "epoch": 0.23968006507151088, "grad_norm": 4.317012160839868, "learning_rate": 1.778259387606458e-06, "loss": 1.1916, "step": 1768 }, { "epoch": 0.23981563071917575, "grad_norm": 13.102390586172437, "learning_rate": 1.7779835590240699e-06, "loss": 1.1541, "step": 1769 }, { "epoch": 0.23995119636684065, "grad_norm": 10.213020770815522, "learning_rate": 1.7777075804136938e-06, "loss": 1.1595, "step": 1770 }, { "epoch": 0.24008676201450552, "grad_norm": 8.194385182487377, "learning_rate": 1.7774314518285492e-06, "loss": 1.1814, "step": 1771 }, { "epoch": 0.24022232766217042, "grad_norm": 6.74194330957276, "learning_rate": 1.777155173321886e-06, "loss": 1.175, "step": 1772 }, { "epoch": 0.24035789330983529, "grad_norm": 5.985109594042932, "learning_rate": 1.7768787449469823e-06, "loss": 1.1455, "step": 1773 }, { "epoch": 0.24049345895750016, "grad_norm": 4.83093381922162, "learning_rate": 1.7766021667571448e-06, "loss": 1.1861, "step": 1774 }, { "epoch": 0.24062902460516505, "grad_norm": 7.001670694583787, "learning_rate": 1.7763254388057094e-06, "loss": 1.191, "step": 1775 }, { "epoch": 0.24076459025282992, "grad_norm": 5.961775352861965, "learning_rate": 1.7760485611460415e-06, "loss": 1.1668, "step": 1776 }, { "epoch": 0.24090015590049482, "grad_norm": 12.594200605836392, "learning_rate": 1.7757715338315337e-06, "loss": 1.2011, "step": 1777 }, { "epoch": 0.2410357215481597, "grad_norm": 4.325387417096421, "learning_rate": 1.7754943569156096e-06, "loss": 1.2031, "step": 1778 }, { "epoch": 0.2411712871958246, "grad_norm": 3.7490084118955282, "learning_rate": 1.7752170304517202e-06, "loss": 1.2249, "step": 1779 }, { "epoch": 0.24130685284348946, "grad_norm": 3.8342786059863245, "learning_rate": 1.7749395544933455e-06, "loss": 1.1824, "step": 1780 }, { "epoch": 0.24144241849115433, "grad_norm": 4.556608366046245, "learning_rate": 1.7746619290939946e-06, "loss": 1.1709, "step": 1781 }, { "epoch": 0.24157798413881923, "grad_norm": 4.862450679852271, "learning_rate": 1.7743841543072055e-06, "loss": 1.155, "step": 1782 }, { "epoch": 0.2417135497864841, "grad_norm": 5.3794614548740345, "learning_rate": 1.7741062301865453e-06, "loss": 1.1382, "step": 1783 }, { "epoch": 0.241849115434149, "grad_norm": 7.459037302959056, "learning_rate": 1.7738281567856088e-06, "loss": 1.1822, "step": 1784 }, { "epoch": 0.24198468108181387, "grad_norm": 4.845551850248177, "learning_rate": 1.7735499341580203e-06, "loss": 1.1882, "step": 1785 }, { "epoch": 0.24212024672947874, "grad_norm": 4.184375626744902, "learning_rate": 1.7732715623574333e-06, "loss": 1.1592, "step": 1786 }, { "epoch": 0.24225581237714364, "grad_norm": 6.0731757330453044, "learning_rate": 1.772993041437529e-06, "loss": 1.1522, "step": 1787 }, { "epoch": 0.2423913780248085, "grad_norm": 5.611492506838727, "learning_rate": 1.7727143714520184e-06, "loss": 1.1897, "step": 1788 }, { "epoch": 0.2425269436724734, "grad_norm": 5.050815172614509, "learning_rate": 1.7724355524546409e-06, "loss": 1.1576, "step": 1789 }, { "epoch": 0.24266250932013828, "grad_norm": 5.069206338022929, "learning_rate": 1.7721565844991641e-06, "loss": 1.1629, "step": 1790 }, { "epoch": 0.24279807496780315, "grad_norm": 4.99299858752955, "learning_rate": 1.7718774676393852e-06, "loss": 1.1767, "step": 1791 }, { "epoch": 0.24293364061546804, "grad_norm": 7.4806317434036504, "learning_rate": 1.7715982019291293e-06, "loss": 1.1743, "step": 1792 }, { "epoch": 0.24306920626313291, "grad_norm": 6.043368322949443, "learning_rate": 1.771318787422251e-06, "loss": 1.1922, "step": 1793 }, { "epoch": 0.2432047719107978, "grad_norm": 4.399029337211006, "learning_rate": 1.7710392241726328e-06, "loss": 1.1767, "step": 1794 }, { "epoch": 0.24334033755846268, "grad_norm": 192.78890651637866, "learning_rate": 1.7707595122341865e-06, "loss": 1.2054, "step": 1795 }, { "epoch": 0.24347590320612758, "grad_norm": 10.305728743544556, "learning_rate": 1.7704796516608524e-06, "loss": 1.1888, "step": 1796 }, { "epoch": 0.24361146885379245, "grad_norm": 5.729705392784877, "learning_rate": 1.7701996425065992e-06, "loss": 1.1627, "step": 1797 }, { "epoch": 0.24374703450145732, "grad_norm": 5.8008253312953855, "learning_rate": 1.7699194848254244e-06, "loss": 1.1708, "step": 1798 }, { "epoch": 0.24388260014912222, "grad_norm": 4.289529027077935, "learning_rate": 1.7696391786713545e-06, "loss": 1.1783, "step": 1799 }, { "epoch": 0.2440181657967871, "grad_norm": 5.615652277905333, "learning_rate": 1.769358724098444e-06, "loss": 1.1682, "step": 1800 }, { "epoch": 0.244153731444452, "grad_norm": 5.626121109790758, "learning_rate": 1.7690781211607767e-06, "loss": 1.1828, "step": 1801 }, { "epoch": 0.24428929709211686, "grad_norm": 54.733103399169615, "learning_rate": 1.7687973699124643e-06, "loss": 1.1548, "step": 1802 }, { "epoch": 0.24442486273978173, "grad_norm": 8.185071718070118, "learning_rate": 1.7685164704076476e-06, "loss": 1.1439, "step": 1803 }, { "epoch": 0.24456042838744663, "grad_norm": 6.290938923677848, "learning_rate": 1.768235422700496e-06, "loss": 1.2342, "step": 1804 }, { "epoch": 0.2446959940351115, "grad_norm": 5.602243216488234, "learning_rate": 1.767954226845207e-06, "loss": 1.1869, "step": 1805 }, { "epoch": 0.2448315596827764, "grad_norm": 5.327401469640176, "learning_rate": 1.7676728828960075e-06, "loss": 1.1487, "step": 1806 }, { "epoch": 0.24496712533044127, "grad_norm": 3.5584472106992617, "learning_rate": 1.7673913909071523e-06, "loss": 1.1923, "step": 1807 }, { "epoch": 0.24510269097810614, "grad_norm": 5.856865700931168, "learning_rate": 1.7671097509329242e-06, "loss": 1.1246, "step": 1808 }, { "epoch": 0.24523825662577103, "grad_norm": 5.37421844792861, "learning_rate": 1.7668279630276364e-06, "loss": 1.1807, "step": 1809 }, { "epoch": 0.2453738222734359, "grad_norm": 5.063390090296603, "learning_rate": 1.7665460272456287e-06, "loss": 1.1766, "step": 1810 }, { "epoch": 0.2455093879211008, "grad_norm": 9.996747671230656, "learning_rate": 1.7662639436412703e-06, "loss": 1.1684, "step": 1811 }, { "epoch": 0.24564495356876567, "grad_norm": 6.665350786908931, "learning_rate": 1.7659817122689589e-06, "loss": 1.1466, "step": 1812 }, { "epoch": 0.24578051921643054, "grad_norm": 6.860763260360227, "learning_rate": 1.7656993331831208e-06, "loss": 1.188, "step": 1813 }, { "epoch": 0.24591608486409544, "grad_norm": 8.561991917021814, "learning_rate": 1.76541680643821e-06, "loss": 1.2023, "step": 1814 }, { "epoch": 0.2460516505117603, "grad_norm": 10.540562853037327, "learning_rate": 1.7651341320887102e-06, "loss": 1.1522, "step": 1815 }, { "epoch": 0.2461872161594252, "grad_norm": 4.890152210819948, "learning_rate": 1.7648513101891325e-06, "loss": 1.1892, "step": 1816 }, { "epoch": 0.24632278180709008, "grad_norm": 9.528718903912658, "learning_rate": 1.764568340794017e-06, "loss": 1.2197, "step": 1817 }, { "epoch": 0.24645834745475498, "grad_norm": 6.31398865996059, "learning_rate": 1.7642852239579323e-06, "loss": 1.1738, "step": 1818 }, { "epoch": 0.24659391310241985, "grad_norm": 5.090201061080049, "learning_rate": 1.7640019597354747e-06, "loss": 1.1724, "step": 1819 }, { "epoch": 0.24672947875008472, "grad_norm": 9.630555036929744, "learning_rate": 1.76371854818127e-06, "loss": 1.1815, "step": 1820 }, { "epoch": 0.24686504439774962, "grad_norm": 4.299163190127628, "learning_rate": 1.7634349893499719e-06, "loss": 1.2101, "step": 1821 }, { "epoch": 0.2470006100454145, "grad_norm": 4.288715009349871, "learning_rate": 1.7631512832962622e-06, "loss": 1.1666, "step": 1822 }, { "epoch": 0.24713617569307939, "grad_norm": 4.963339807809318, "learning_rate": 1.7628674300748511e-06, "loss": 1.182, "step": 1823 }, { "epoch": 0.24727174134074426, "grad_norm": 5.934103187847507, "learning_rate": 1.7625834297404783e-06, "loss": 1.1711, "step": 1824 }, { "epoch": 0.24740730698840913, "grad_norm": 5.84466818872142, "learning_rate": 1.7622992823479103e-06, "loss": 1.1628, "step": 1825 }, { "epoch": 0.24754287263607402, "grad_norm": 5.131173860569545, "learning_rate": 1.7620149879519431e-06, "loss": 1.1363, "step": 1826 }, { "epoch": 0.2476784382837389, "grad_norm": 5.375136238692562, "learning_rate": 1.7617305466074002e-06, "loss": 1.155, "step": 1827 }, { "epoch": 0.2478140039314038, "grad_norm": 7.766743666914054, "learning_rate": 1.7614459583691342e-06, "loss": 1.2041, "step": 1828 }, { "epoch": 0.24794956957906866, "grad_norm": 7.022427741236789, "learning_rate": 1.7611612232920258e-06, "loss": 1.1804, "step": 1829 }, { "epoch": 0.24808513522673353, "grad_norm": 4.155038365313111, "learning_rate": 1.7608763414309835e-06, "loss": 1.1845, "step": 1830 }, { "epoch": 0.24822070087439843, "grad_norm": 5.304863502961441, "learning_rate": 1.7605913128409449e-06, "loss": 1.116, "step": 1831 }, { "epoch": 0.2483562665220633, "grad_norm": 9.112763794910554, "learning_rate": 1.7603061375768754e-06, "loss": 1.166, "step": 1832 }, { "epoch": 0.2484918321697282, "grad_norm": 7.174816282642524, "learning_rate": 1.7600208156937688e-06, "loss": 1.184, "step": 1833 }, { "epoch": 0.24862739781739307, "grad_norm": 4.962126628189748, "learning_rate": 1.759735347246647e-06, "loss": 1.143, "step": 1834 }, { "epoch": 0.24876296346505797, "grad_norm": 4.714530689698317, "learning_rate": 1.7594497322905603e-06, "loss": 1.1798, "step": 1835 }, { "epoch": 0.24889852911272284, "grad_norm": 4.351841379669884, "learning_rate": 1.759163970880588e-06, "loss": 1.1428, "step": 1836 }, { "epoch": 0.2490340947603877, "grad_norm": 5.053054234566059, "learning_rate": 1.7588780630718358e-06, "loss": 1.1868, "step": 1837 }, { "epoch": 0.2491696604080526, "grad_norm": 4.123643125416394, "learning_rate": 1.7585920089194394e-06, "loss": 1.1808, "step": 1838 }, { "epoch": 0.24930522605571748, "grad_norm": 5.636172067259354, "learning_rate": 1.7583058084785625e-06, "loss": 1.1927, "step": 1839 }, { "epoch": 0.24944079170338238, "grad_norm": 4.169514270351794, "learning_rate": 1.758019461804396e-06, "loss": 1.1502, "step": 1840 }, { "epoch": 0.24957635735104725, "grad_norm": 7.944699006605956, "learning_rate": 1.7577329689521596e-06, "loss": 1.1525, "step": 1841 }, { "epoch": 0.24971192299871212, "grad_norm": 4.839344275830447, "learning_rate": 1.7574463299771011e-06, "loss": 1.1496, "step": 1842 }, { "epoch": 0.24984748864637701, "grad_norm": 12.902951101104113, "learning_rate": 1.7571595449344972e-06, "loss": 1.1701, "step": 1843 }, { "epoch": 0.24998305429404188, "grad_norm": 4.623619757918428, "learning_rate": 1.7568726138796515e-06, "loss": 1.1957, "step": 1844 }, { "epoch": 0.25011861994170675, "grad_norm": 23.11752551532163, "learning_rate": 1.7565855368678965e-06, "loss": 1.1887, "step": 1845 }, { "epoch": 0.2502541855893717, "grad_norm": 5.631557149162929, "learning_rate": 1.756298313954593e-06, "loss": 1.1963, "step": 1846 }, { "epoch": 0.25038975123703655, "grad_norm": 3.9064294981670042, "learning_rate": 1.7560109451951295e-06, "loss": 1.1482, "step": 1847 }, { "epoch": 0.2505253168847014, "grad_norm": 4.102145649244177, "learning_rate": 1.7557234306449227e-06, "loss": 1.1679, "step": 1848 }, { "epoch": 0.2506608825323663, "grad_norm": 6.507791146656023, "learning_rate": 1.7554357703594178e-06, "loss": 1.2007, "step": 1849 }, { "epoch": 0.25079644818003116, "grad_norm": 5.589141731073473, "learning_rate": 1.7551479643940874e-06, "loss": 1.161, "step": 1850 }, { "epoch": 0.2509320138276961, "grad_norm": 3.8519546775118605, "learning_rate": 1.7548600128044328e-06, "loss": 1.1835, "step": 1851 }, { "epoch": 0.25106757947536096, "grad_norm": 5.313122955639268, "learning_rate": 1.7545719156459835e-06, "loss": 1.1751, "step": 1852 }, { "epoch": 0.25120314512302583, "grad_norm": 4.498891109224595, "learning_rate": 1.7542836729742964e-06, "loss": 1.1777, "step": 1853 }, { "epoch": 0.2513387107706907, "grad_norm": 8.13483757986233, "learning_rate": 1.753995284844957e-06, "loss": 1.1743, "step": 1854 }, { "epoch": 0.25147427641835557, "grad_norm": 4.615276067229578, "learning_rate": 1.7537067513135787e-06, "loss": 1.2183, "step": 1855 }, { "epoch": 0.2516098420660205, "grad_norm": 7.98754330468003, "learning_rate": 1.7534180724358026e-06, "loss": 1.1495, "step": 1856 }, { "epoch": 0.25174540771368537, "grad_norm": 6.620508070204147, "learning_rate": 1.7531292482672982e-06, "loss": 1.2187, "step": 1857 }, { "epoch": 0.25188097336135024, "grad_norm": 4.685538423867104, "learning_rate": 1.7528402788637633e-06, "loss": 1.1745, "step": 1858 }, { "epoch": 0.2520165390090151, "grad_norm": 6.687382425844402, "learning_rate": 1.7525511642809232e-06, "loss": 1.1591, "step": 1859 }, { "epoch": 0.25215210465668, "grad_norm": 4.618914421083232, "learning_rate": 1.7522619045745312e-06, "loss": 1.1661, "step": 1860 }, { "epoch": 0.2522876703043449, "grad_norm": 5.130363462352752, "learning_rate": 1.751972499800369e-06, "loss": 1.1405, "step": 1861 }, { "epoch": 0.2524232359520098, "grad_norm": 3.8590051817698057, "learning_rate": 1.7516829500142461e-06, "loss": 1.1518, "step": 1862 }, { "epoch": 0.25255880159967464, "grad_norm": 4.456334900124673, "learning_rate": 1.7513932552719995e-06, "loss": 1.1593, "step": 1863 }, { "epoch": 0.2526943672473395, "grad_norm": 4.084766410464668, "learning_rate": 1.7511034156294948e-06, "loss": 1.1752, "step": 1864 }, { "epoch": 0.2528299328950044, "grad_norm": 10.605205692230031, "learning_rate": 1.7508134311426253e-06, "loss": 1.1536, "step": 1865 }, { "epoch": 0.2529654985426693, "grad_norm": 6.096419241514889, "learning_rate": 1.750523301867312e-06, "loss": 1.1742, "step": 1866 }, { "epoch": 0.2531010641903342, "grad_norm": 5.2833534616461995, "learning_rate": 1.7502330278595043e-06, "loss": 1.2101, "step": 1867 }, { "epoch": 0.25323662983799905, "grad_norm": 4.344686462232004, "learning_rate": 1.7499426091751792e-06, "loss": 1.1661, "step": 1868 }, { "epoch": 0.2533721954856639, "grad_norm": 7.337140529319761, "learning_rate": 1.7496520458703416e-06, "loss": 1.2202, "step": 1869 }, { "epoch": 0.2535077611333288, "grad_norm": 4.394714142384648, "learning_rate": 1.7493613380010244e-06, "loss": 1.1582, "step": 1870 }, { "epoch": 0.2536433267809937, "grad_norm": 9.565230397184669, "learning_rate": 1.7490704856232882e-06, "loss": 1.18, "step": 1871 }, { "epoch": 0.2537788924286586, "grad_norm": 3.867933347572716, "learning_rate": 1.7487794887932216e-06, "loss": 1.1815, "step": 1872 }, { "epoch": 0.25391445807632346, "grad_norm": 5.5026710787809545, "learning_rate": 1.7484883475669412e-06, "loss": 1.2034, "step": 1873 }, { "epoch": 0.2540500237239883, "grad_norm": 4.184554664453502, "learning_rate": 1.748197062000591e-06, "loss": 1.1758, "step": 1874 }, { "epoch": 0.2541855893716532, "grad_norm": 6.269955462903129, "learning_rate": 1.7479056321503436e-06, "loss": 1.1299, "step": 1875 }, { "epoch": 0.2543211550193181, "grad_norm": 4.917008605959312, "learning_rate": 1.7476140580723984e-06, "loss": 1.1437, "step": 1876 }, { "epoch": 0.254456720666983, "grad_norm": 3.701982885800993, "learning_rate": 1.7473223398229836e-06, "loss": 1.1831, "step": 1877 }, { "epoch": 0.25459228631464786, "grad_norm": 4.999607186365109, "learning_rate": 1.7470304774583542e-06, "loss": 1.1811, "step": 1878 }, { "epoch": 0.25472785196231273, "grad_norm": 5.688444554866129, "learning_rate": 1.7467384710347943e-06, "loss": 1.1672, "step": 1879 }, { "epoch": 0.2548634176099776, "grad_norm": 5.239523699011631, "learning_rate": 1.7464463206086144e-06, "loss": 1.1843, "step": 1880 }, { "epoch": 0.25499898325764253, "grad_norm": 4.328134458939541, "learning_rate": 1.7461540262361538e-06, "loss": 1.1766, "step": 1881 }, { "epoch": 0.2551345489053074, "grad_norm": 6.195230942310561, "learning_rate": 1.7458615879737791e-06, "loss": 1.2248, "step": 1882 }, { "epoch": 0.25527011455297227, "grad_norm": 6.606406895008299, "learning_rate": 1.7455690058778844e-06, "loss": 1.1839, "step": 1883 }, { "epoch": 0.25540568020063714, "grad_norm": 5.828798100805759, "learning_rate": 1.7452762800048924e-06, "loss": 1.1621, "step": 1884 }, { "epoch": 0.25554124584830207, "grad_norm": 6.284144912889393, "learning_rate": 1.7449834104112525e-06, "loss": 1.1953, "step": 1885 }, { "epoch": 0.25567681149596694, "grad_norm": 3.8176840210640304, "learning_rate": 1.7446903971534423e-06, "loss": 1.1727, "step": 1886 }, { "epoch": 0.2558123771436318, "grad_norm": 8.590340538250032, "learning_rate": 1.7443972402879674e-06, "loss": 1.1706, "step": 1887 }, { "epoch": 0.2559479427912967, "grad_norm": 5.864701189420675, "learning_rate": 1.7441039398713605e-06, "loss": 1.1749, "step": 1888 }, { "epoch": 0.25608350843896155, "grad_norm": 5.7600538365673115, "learning_rate": 1.7438104959601826e-06, "loss": 1.1176, "step": 1889 }, { "epoch": 0.2562190740866265, "grad_norm": 9.950657170481849, "learning_rate": 1.7435169086110217e-06, "loss": 1.1618, "step": 1890 }, { "epoch": 0.25635463973429135, "grad_norm": 8.203683124582595, "learning_rate": 1.743223177880494e-06, "loss": 1.1785, "step": 1891 }, { "epoch": 0.2564902053819562, "grad_norm": 5.309960737108301, "learning_rate": 1.742929303825243e-06, "loss": 1.2115, "step": 1892 }, { "epoch": 0.2566257710296211, "grad_norm": 4.161494066572332, "learning_rate": 1.7426352865019402e-06, "loss": 1.1577, "step": 1893 }, { "epoch": 0.25676133667728596, "grad_norm": 7.58540219301748, "learning_rate": 1.7423411259672841e-06, "loss": 1.1649, "step": 1894 }, { "epoch": 0.2568969023249509, "grad_norm": 6.520600280800803, "learning_rate": 1.7420468222780017e-06, "loss": 1.1622, "step": 1895 }, { "epoch": 0.25703246797261575, "grad_norm": 4.6816117160259045, "learning_rate": 1.7417523754908473e-06, "loss": 1.2034, "step": 1896 }, { "epoch": 0.2571680336202806, "grad_norm": 4.7362013624272645, "learning_rate": 1.741457785662602e-06, "loss": 1.1796, "step": 1897 }, { "epoch": 0.2573035992679455, "grad_norm": 3.819019224125359, "learning_rate": 1.7411630528500757e-06, "loss": 1.1585, "step": 1898 }, { "epoch": 0.25743916491561036, "grad_norm": 4.238985015353189, "learning_rate": 1.7408681771101048e-06, "loss": 1.2175, "step": 1899 }, { "epoch": 0.2575747305632753, "grad_norm": 6.604351902812647, "learning_rate": 1.740573158499554e-06, "loss": 1.1924, "step": 1900 }, { "epoch": 0.25771029621094016, "grad_norm": 4.271625126194662, "learning_rate": 1.7402779970753154e-06, "loss": 1.1815, "step": 1901 }, { "epoch": 0.25784586185860503, "grad_norm": 4.001517599834117, "learning_rate": 1.7399826928943084e-06, "loss": 1.1788, "step": 1902 }, { "epoch": 0.2579814275062699, "grad_norm": 3.200735374078492, "learning_rate": 1.7396872460134805e-06, "loss": 1.2047, "step": 1903 }, { "epoch": 0.25811699315393477, "grad_norm": 6.609496938622446, "learning_rate": 1.7393916564898055e-06, "loss": 1.2007, "step": 1904 }, { "epoch": 0.2582525588015997, "grad_norm": 5.216236760636593, "learning_rate": 1.739095924380286e-06, "loss": 1.1564, "step": 1905 }, { "epoch": 0.25838812444926457, "grad_norm": 7.881549835574067, "learning_rate": 1.7388000497419518e-06, "loss": 1.2262, "step": 1906 }, { "epoch": 0.25852369009692944, "grad_norm": 5.196601998770602, "learning_rate": 1.7385040326318597e-06, "loss": 1.157, "step": 1907 }, { "epoch": 0.2586592557445943, "grad_norm": 12.420088948831587, "learning_rate": 1.738207873107094e-06, "loss": 1.1582, "step": 1908 }, { "epoch": 0.2587948213922592, "grad_norm": 3.9812289015272633, "learning_rate": 1.7379115712247675e-06, "loss": 1.1881, "step": 1909 }, { "epoch": 0.2589303870399241, "grad_norm": 4.154605192629034, "learning_rate": 1.7376151270420186e-06, "loss": 1.1573, "step": 1910 }, { "epoch": 0.259065952687589, "grad_norm": 5.130825457925709, "learning_rate": 1.737318540616015e-06, "loss": 1.1479, "step": 1911 }, { "epoch": 0.25920151833525384, "grad_norm": 4.6457466897766135, "learning_rate": 1.7370218120039512e-06, "loss": 1.1435, "step": 1912 }, { "epoch": 0.2593370839829187, "grad_norm": 8.521857614663846, "learning_rate": 1.7367249412630484e-06, "loss": 1.1666, "step": 1913 }, { "epoch": 0.2594726496305836, "grad_norm": 6.739606821664607, "learning_rate": 1.7364279284505564e-06, "loss": 1.1819, "step": 1914 }, { "epoch": 0.2596082152782485, "grad_norm": 6.109985428744761, "learning_rate": 1.736130773623751e-06, "loss": 1.1304, "step": 1915 }, { "epoch": 0.2597437809259134, "grad_norm": 6.901675589031934, "learning_rate": 1.7358334768399368e-06, "loss": 1.1954, "step": 1916 }, { "epoch": 0.25987934657357825, "grad_norm": 12.142758728515634, "learning_rate": 1.7355360381564449e-06, "loss": 1.173, "step": 1917 }, { "epoch": 0.2600149122212431, "grad_norm": 10.589804332469638, "learning_rate": 1.7352384576306336e-06, "loss": 1.1839, "step": 1918 }, { "epoch": 0.260150477868908, "grad_norm": 7.315370289000524, "learning_rate": 1.7349407353198898e-06, "loss": 1.1945, "step": 1919 }, { "epoch": 0.2602860435165729, "grad_norm": 4.285359422423111, "learning_rate": 1.7346428712816262e-06, "loss": 1.137, "step": 1920 }, { "epoch": 0.2604216091642378, "grad_norm": 3.825277692319182, "learning_rate": 1.734344865573284e-06, "loss": 1.1931, "step": 1921 }, { "epoch": 0.26055717481190266, "grad_norm": 6.249042916642298, "learning_rate": 1.734046718252331e-06, "loss": 1.1561, "step": 1922 }, { "epoch": 0.26069274045956753, "grad_norm": 5.9725325560856, "learning_rate": 1.7337484293762627e-06, "loss": 1.1545, "step": 1923 }, { "epoch": 0.26082830610723245, "grad_norm": 4.560453736309626, "learning_rate": 1.7334499990026014e-06, "loss": 1.1545, "step": 1924 }, { "epoch": 0.2609638717548973, "grad_norm": 5.824283990848273, "learning_rate": 1.7331514271888973e-06, "loss": 1.1576, "step": 1925 }, { "epoch": 0.2610994374025622, "grad_norm": 3.8810154159170085, "learning_rate": 1.7328527139927278e-06, "loss": 1.1311, "step": 1926 }, { "epoch": 0.26123500305022707, "grad_norm": 4.9837908929657075, "learning_rate": 1.7325538594716971e-06, "loss": 1.1809, "step": 1927 }, { "epoch": 0.26137056869789194, "grad_norm": 8.534753713214034, "learning_rate": 1.7322548636834372e-06, "loss": 1.2322, "step": 1928 }, { "epoch": 0.26150613434555686, "grad_norm": 7.584175714516811, "learning_rate": 1.7319557266856067e-06, "loss": 1.1573, "step": 1929 }, { "epoch": 0.26164169999322173, "grad_norm": 17.796748497018203, "learning_rate": 1.731656448535892e-06, "loss": 1.1617, "step": 1930 }, { "epoch": 0.2617772656408866, "grad_norm": 4.182403659374688, "learning_rate": 1.7313570292920065e-06, "loss": 1.1276, "step": 1931 }, { "epoch": 0.2619128312885515, "grad_norm": 5.116013853881824, "learning_rate": 1.731057469011691e-06, "loss": 1.1722, "step": 1932 }, { "epoch": 0.26204839693621634, "grad_norm": 4.914792425780745, "learning_rate": 1.7307577677527135e-06, "loss": 1.184, "step": 1933 }, { "epoch": 0.26218396258388127, "grad_norm": 4.132437985331935, "learning_rate": 1.7304579255728684e-06, "loss": 1.1778, "step": 1934 }, { "epoch": 0.26231952823154614, "grad_norm": 4.464766869234008, "learning_rate": 1.7301579425299782e-06, "loss": 1.1524, "step": 1935 }, { "epoch": 0.262455093879211, "grad_norm": 6.905709764752086, "learning_rate": 1.7298578186818925e-06, "loss": 1.1599, "step": 1936 }, { "epoch": 0.2625906595268759, "grad_norm": 4.3716967341704285, "learning_rate": 1.7295575540864875e-06, "loss": 1.1811, "step": 1937 }, { "epoch": 0.26272622517454075, "grad_norm": 5.049259215948536, "learning_rate": 1.729257148801667e-06, "loss": 1.1422, "step": 1938 }, { "epoch": 0.2628617908222057, "grad_norm": 9.259334206594854, "learning_rate": 1.7289566028853616e-06, "loss": 1.1442, "step": 1939 }, { "epoch": 0.26299735646987055, "grad_norm": 7.994177312968008, "learning_rate": 1.7286559163955297e-06, "loss": 1.2253, "step": 1940 }, { "epoch": 0.2631329221175354, "grad_norm": 3.85355324183205, "learning_rate": 1.7283550893901557e-06, "loss": 1.1723, "step": 1941 }, { "epoch": 0.2632684877652003, "grad_norm": 9.05986781071849, "learning_rate": 1.728054121927252e-06, "loss": 1.1274, "step": 1942 }, { "epoch": 0.26340405341286516, "grad_norm": 6.7194551307453745, "learning_rate": 1.727753014064858e-06, "loss": 1.1988, "step": 1943 }, { "epoch": 0.2635396190605301, "grad_norm": 12.372069550993846, "learning_rate": 1.7274517658610397e-06, "loss": 1.1503, "step": 1944 }, { "epoch": 0.26367518470819495, "grad_norm": 11.770129166668482, "learning_rate": 1.7271503773738906e-06, "loss": 1.1487, "step": 1945 }, { "epoch": 0.2638107503558598, "grad_norm": 11.864539567243922, "learning_rate": 1.7268488486615307e-06, "loss": 1.1494, "step": 1946 }, { "epoch": 0.2639463160035247, "grad_norm": 3.8909603863152435, "learning_rate": 1.726547179782108e-06, "loss": 1.167, "step": 1947 }, { "epoch": 0.26408188165118957, "grad_norm": 4.516064001538695, "learning_rate": 1.7262453707937964e-06, "loss": 1.1481, "step": 1948 }, { "epoch": 0.2642174472988545, "grad_norm": 7.846726887231285, "learning_rate": 1.725943421754798e-06, "loss": 1.189, "step": 1949 }, { "epoch": 0.26435301294651936, "grad_norm": 4.464984234497767, "learning_rate": 1.7256413327233408e-06, "loss": 1.1597, "step": 1950 }, { "epoch": 0.26448857859418423, "grad_norm": 3.95072651786872, "learning_rate": 1.7253391037576806e-06, "loss": 1.1526, "step": 1951 }, { "epoch": 0.2646241442418491, "grad_norm": 5.988738966335434, "learning_rate": 1.7250367349160994e-06, "loss": 1.1509, "step": 1952 }, { "epoch": 0.26475970988951397, "grad_norm": 6.361465025496389, "learning_rate": 1.724734226256907e-06, "loss": 1.197, "step": 1953 }, { "epoch": 0.2648952755371789, "grad_norm": 6.486349196118413, "learning_rate": 1.7244315778384403e-06, "loss": 1.1827, "step": 1954 }, { "epoch": 0.26503084118484377, "grad_norm": 6.837621619367821, "learning_rate": 1.7241287897190616e-06, "loss": 1.1564, "step": 1955 }, { "epoch": 0.26516640683250864, "grad_norm": 7.186848080613145, "learning_rate": 1.7238258619571616e-06, "loss": 1.1986, "step": 1956 }, { "epoch": 0.2653019724801735, "grad_norm": 3.7070494052721568, "learning_rate": 1.7235227946111582e-06, "loss": 1.1701, "step": 1957 }, { "epoch": 0.2654375381278384, "grad_norm": 4.572659142629713, "learning_rate": 1.7232195877394948e-06, "loss": 1.2054, "step": 1958 }, { "epoch": 0.2655731037755033, "grad_norm": 4.0541212036223815, "learning_rate": 1.7229162414006426e-06, "loss": 1.1808, "step": 1959 }, { "epoch": 0.2657086694231682, "grad_norm": 5.331945089133912, "learning_rate": 1.7226127556530997e-06, "loss": 1.1893, "step": 1960 }, { "epoch": 0.26584423507083305, "grad_norm": 5.6490422579031, "learning_rate": 1.7223091305553905e-06, "loss": 1.2087, "step": 1961 }, { "epoch": 0.2659798007184979, "grad_norm": 10.316332242684657, "learning_rate": 1.7220053661660673e-06, "loss": 1.1885, "step": 1962 }, { "epoch": 0.2661153663661628, "grad_norm": 8.643986306608147, "learning_rate": 1.7217014625437085e-06, "loss": 1.1583, "step": 1963 }, { "epoch": 0.2662509320138277, "grad_norm": 5.163242139811063, "learning_rate": 1.721397419746919e-06, "loss": 1.1843, "step": 1964 }, { "epoch": 0.2663864976614926, "grad_norm": 7.873185339735694, "learning_rate": 1.721093237834332e-06, "loss": 1.1593, "step": 1965 }, { "epoch": 0.26652206330915745, "grad_norm": 6.422149293906805, "learning_rate": 1.7207889168646056e-06, "loss": 1.1611, "step": 1966 }, { "epoch": 0.2666576289568223, "grad_norm": 5.638088318779739, "learning_rate": 1.7204844568964262e-06, "loss": 1.1574, "step": 1967 }, { "epoch": 0.26679319460448725, "grad_norm": 6.631764606420792, "learning_rate": 1.7201798579885067e-06, "loss": 1.1477, "step": 1968 }, { "epoch": 0.2669287602521521, "grad_norm": 7.578299253949581, "learning_rate": 1.7198751201995862e-06, "loss": 1.206, "step": 1969 }, { "epoch": 0.267064325899817, "grad_norm": 4.739496401300023, "learning_rate": 1.7195702435884312e-06, "loss": 1.1326, "step": 1970 }, { "epoch": 0.26719989154748186, "grad_norm": 83.82160496114565, "learning_rate": 1.7192652282138346e-06, "loss": 1.1647, "step": 1971 }, { "epoch": 0.26733545719514673, "grad_norm": 4.83201366914517, "learning_rate": 1.7189600741346164e-06, "loss": 1.1664, "step": 1972 }, { "epoch": 0.26747102284281166, "grad_norm": 8.04939266269509, "learning_rate": 1.7186547814096232e-06, "loss": 1.1612, "step": 1973 }, { "epoch": 0.2676065884904765, "grad_norm": 5.715801290125888, "learning_rate": 1.7183493500977275e-06, "loss": 1.2028, "step": 1974 }, { "epoch": 0.2677421541381414, "grad_norm": 6.503433945692051, "learning_rate": 1.7180437802578302e-06, "loss": 1.1961, "step": 1975 }, { "epoch": 0.26787771978580627, "grad_norm": 6.1372911154372005, "learning_rate": 1.717738071948858e-06, "loss": 1.1752, "step": 1976 }, { "epoch": 0.26801328543347114, "grad_norm": 6.403711791775905, "learning_rate": 1.7174322252297638e-06, "loss": 1.21, "step": 1977 }, { "epoch": 0.26814885108113606, "grad_norm": 5.333134870625287, "learning_rate": 1.7171262401595282e-06, "loss": 1.1194, "step": 1978 }, { "epoch": 0.26828441672880093, "grad_norm": 5.574415184758806, "learning_rate": 1.7168201167971579e-06, "loss": 1.1424, "step": 1979 }, { "epoch": 0.2684199823764658, "grad_norm": 4.537389240732732, "learning_rate": 1.7165138552016861e-06, "loss": 1.1721, "step": 1980 }, { "epoch": 0.2685555480241307, "grad_norm": 4.853721377571363, "learning_rate": 1.7162074554321736e-06, "loss": 1.1679, "step": 1981 }, { "epoch": 0.26869111367179555, "grad_norm": 3.930243670209693, "learning_rate": 1.7159009175477061e-06, "loss": 1.1741, "step": 1982 }, { "epoch": 0.26882667931946047, "grad_norm": 4.406294013201639, "learning_rate": 1.715594241607398e-06, "loss": 1.169, "step": 1983 }, { "epoch": 0.26896224496712534, "grad_norm": 7.786595751428932, "learning_rate": 1.7152874276703888e-06, "loss": 1.1932, "step": 1984 }, { "epoch": 0.2690978106147902, "grad_norm": 7.059203907846517, "learning_rate": 1.7149804757958456e-06, "loss": 1.1514, "step": 1985 }, { "epoch": 0.2692333762624551, "grad_norm": 4.752603117803003, "learning_rate": 1.714673386042961e-06, "loss": 1.1442, "step": 1986 }, { "epoch": 0.26936894191011995, "grad_norm": 4.767180107512681, "learning_rate": 1.7143661584709553e-06, "loss": 1.2004, "step": 1987 }, { "epoch": 0.2695045075577849, "grad_norm": 4.293428293722788, "learning_rate": 1.714058793139075e-06, "loss": 1.1576, "step": 1988 }, { "epoch": 0.26964007320544975, "grad_norm": 5.554105759154358, "learning_rate": 1.7137512901065924e-06, "loss": 1.1846, "step": 1989 }, { "epoch": 0.2697756388531146, "grad_norm": 4.939507930839392, "learning_rate": 1.713443649432808e-06, "loss": 1.1612, "step": 1990 }, { "epoch": 0.2699112045007795, "grad_norm": 6.1703248188985755, "learning_rate": 1.7131358711770472e-06, "loss": 1.1643, "step": 1991 }, { "epoch": 0.27004677014844436, "grad_norm": 5.6060643378268855, "learning_rate": 1.7128279553986626e-06, "loss": 1.2114, "step": 1992 }, { "epoch": 0.2701823357961093, "grad_norm": 4.1985434811402005, "learning_rate": 1.7125199021570339e-06, "loss": 1.1366, "step": 1993 }, { "epoch": 0.27031790144377416, "grad_norm": 14.089951340098876, "learning_rate": 1.712211711511566e-06, "loss": 1.1785, "step": 1994 }, { "epoch": 0.270453467091439, "grad_norm": 4.8117072455876455, "learning_rate": 1.7119033835216916e-06, "loss": 1.1546, "step": 1995 }, { "epoch": 0.2705890327391039, "grad_norm": 5.190472733873009, "learning_rate": 1.7115949182468693e-06, "loss": 1.1498, "step": 1996 }, { "epoch": 0.27072459838676877, "grad_norm": 4.349975839475061, "learning_rate": 1.7112863157465838e-06, "loss": 1.158, "step": 1997 }, { "epoch": 0.2708601640344337, "grad_norm": 6.795471754076471, "learning_rate": 1.7109775760803466e-06, "loss": 1.1654, "step": 1998 }, { "epoch": 0.27099572968209856, "grad_norm": 4.313648989569087, "learning_rate": 1.7106686993076962e-06, "loss": 1.1729, "step": 1999 }, { "epoch": 0.27113129532976343, "grad_norm": 14.921889747059014, "learning_rate": 1.710359685488197e-06, "loss": 1.1824, "step": 2000 }, { "epoch": 0.2712668609774283, "grad_norm": 5.440702673553783, "learning_rate": 1.7100505346814396e-06, "loss": 1.168, "step": 2001 }, { "epoch": 0.2714024266250932, "grad_norm": 6.6121895188758995, "learning_rate": 1.709741246947041e-06, "loss": 1.1351, "step": 2002 }, { "epoch": 0.2715379922727581, "grad_norm": 5.31546204746593, "learning_rate": 1.709431822344646e-06, "loss": 1.1363, "step": 2003 }, { "epoch": 0.27167355792042297, "grad_norm": 5.875190593257905, "learning_rate": 1.7091222609339234e-06, "loss": 1.1495, "step": 2004 }, { "epoch": 0.27180912356808784, "grad_norm": 5.684425349427874, "learning_rate": 1.7088125627745704e-06, "loss": 1.1379, "step": 2005 }, { "epoch": 0.2719446892157527, "grad_norm": 4.487693990023767, "learning_rate": 1.7085027279263098e-06, "loss": 1.1792, "step": 2006 }, { "epoch": 0.27208025486341764, "grad_norm": 5.947010628933066, "learning_rate": 1.7081927564488908e-06, "loss": 1.1828, "step": 2007 }, { "epoch": 0.2722158205110825, "grad_norm": 6.936426289943196, "learning_rate": 1.7078826484020886e-06, "loss": 1.2137, "step": 2008 }, { "epoch": 0.2723513861587474, "grad_norm": 6.032030656653422, "learning_rate": 1.7075724038457053e-06, "loss": 1.1075, "step": 2009 }, { "epoch": 0.27248695180641225, "grad_norm": 6.611932047242894, "learning_rate": 1.7072620228395693e-06, "loss": 1.157, "step": 2010 }, { "epoch": 0.2726225174540771, "grad_norm": 4.649479212072796, "learning_rate": 1.7069515054435351e-06, "loss": 1.1877, "step": 2011 }, { "epoch": 0.27275808310174204, "grad_norm": 7.162354319576925, "learning_rate": 1.7066408517174832e-06, "loss": 1.1951, "step": 2012 }, { "epoch": 0.2728936487494069, "grad_norm": 5.262399517693676, "learning_rate": 1.706330061721321e-06, "loss": 1.153, "step": 2013 }, { "epoch": 0.2730292143970718, "grad_norm": 3.8183011243259997, "learning_rate": 1.7060191355149817e-06, "loss": 1.1468, "step": 2014 }, { "epoch": 0.27316478004473665, "grad_norm": 5.143212331689829, "learning_rate": 1.7057080731584252e-06, "loss": 1.2144, "step": 2015 }, { "epoch": 0.2733003456924015, "grad_norm": 9.240111388786042, "learning_rate": 1.7053968747116374e-06, "loss": 1.1583, "step": 2016 }, { "epoch": 0.27343591134006645, "grad_norm": 5.431572620781217, "learning_rate": 1.7050855402346303e-06, "loss": 1.1471, "step": 2017 }, { "epoch": 0.2735714769877313, "grad_norm": 3.5426891784343306, "learning_rate": 1.7047740697874425e-06, "loss": 1.1621, "step": 2018 }, { "epoch": 0.2737070426353962, "grad_norm": 6.962310191276227, "learning_rate": 1.7044624634301382e-06, "loss": 1.1889, "step": 2019 }, { "epoch": 0.27384260828306106, "grad_norm": 7.642268073746927, "learning_rate": 1.7041507212228088e-06, "loss": 1.1611, "step": 2020 }, { "epoch": 0.27397817393072593, "grad_norm": 10.439308933791605, "learning_rate": 1.7038388432255709e-06, "loss": 1.1753, "step": 2021 }, { "epoch": 0.27411373957839086, "grad_norm": 4.088172871791615, "learning_rate": 1.7035268294985677e-06, "loss": 1.1925, "step": 2022 }, { "epoch": 0.27424930522605573, "grad_norm": 5.315921887858007, "learning_rate": 1.703214680101969e-06, "loss": 1.1655, "step": 2023 }, { "epoch": 0.2743848708737206, "grad_norm": 5.772142661104593, "learning_rate": 1.70290239509597e-06, "loss": 1.1789, "step": 2024 }, { "epoch": 0.27452043652138547, "grad_norm": 9.337569997958779, "learning_rate": 1.7025899745407925e-06, "loss": 1.1577, "step": 2025 }, { "epoch": 0.27465600216905034, "grad_norm": 4.944559738515427, "learning_rate": 1.7022774184966845e-06, "loss": 1.1628, "step": 2026 }, { "epoch": 0.27479156781671527, "grad_norm": 8.89671865028356, "learning_rate": 1.7019647270239194e-06, "loss": 1.1713, "step": 2027 }, { "epoch": 0.27492713346438014, "grad_norm": 6.652858627963108, "learning_rate": 1.7016519001827977e-06, "loss": 1.1776, "step": 2028 }, { "epoch": 0.275062699112045, "grad_norm": 4.223749277794888, "learning_rate": 1.7013389380336458e-06, "loss": 1.1765, "step": 2029 }, { "epoch": 0.2751982647597099, "grad_norm": 7.526560057967662, "learning_rate": 1.7010258406368157e-06, "loss": 1.1798, "step": 2030 }, { "epoch": 0.27533383040737475, "grad_norm": 4.363054028059613, "learning_rate": 1.7007126080526857e-06, "loss": 1.1412, "step": 2031 }, { "epoch": 0.2754693960550397, "grad_norm": 5.307426268827, "learning_rate": 1.7003992403416603e-06, "loss": 1.2015, "step": 2032 }, { "epoch": 0.27560496170270454, "grad_norm": 6.413582126291676, "learning_rate": 1.70008573756417e-06, "loss": 1.1528, "step": 2033 }, { "epoch": 0.2757405273503694, "grad_norm": 6.679545078526544, "learning_rate": 1.6997720997806714e-06, "loss": 1.2135, "step": 2034 }, { "epoch": 0.2758760929980343, "grad_norm": 5.809559115330159, "learning_rate": 1.699458327051647e-06, "loss": 1.2041, "step": 2035 }, { "epoch": 0.27601165864569915, "grad_norm": 6.213663649242289, "learning_rate": 1.6991444194376054e-06, "loss": 1.216, "step": 2036 }, { "epoch": 0.2761472242933641, "grad_norm": 5.5932483686699985, "learning_rate": 1.6988303769990813e-06, "loss": 1.1352, "step": 2037 }, { "epoch": 0.27628278994102895, "grad_norm": 10.132626993580136, "learning_rate": 1.6985161997966352e-06, "loss": 1.1678, "step": 2038 }, { "epoch": 0.2764183555886938, "grad_norm": 6.238906876123069, "learning_rate": 1.6982018878908536e-06, "loss": 1.1764, "step": 2039 }, { "epoch": 0.2765539212363587, "grad_norm": 13.72095131218385, "learning_rate": 1.6978874413423495e-06, "loss": 1.1592, "step": 2040 }, { "epoch": 0.27668948688402356, "grad_norm": 5.704889676662741, "learning_rate": 1.6975728602117609e-06, "loss": 1.1654, "step": 2041 }, { "epoch": 0.2768250525316885, "grad_norm": 5.4519838492583, "learning_rate": 1.6972581445597527e-06, "loss": 1.1584, "step": 2042 }, { "epoch": 0.27696061817935336, "grad_norm": 37.01801980378287, "learning_rate": 1.6969432944470148e-06, "loss": 1.188, "step": 2043 }, { "epoch": 0.2770961838270182, "grad_norm": 4.820241570867167, "learning_rate": 1.6966283099342643e-06, "loss": 1.1674, "step": 2044 }, { "epoch": 0.2772317494746831, "grad_norm": 5.803404826049562, "learning_rate": 1.6963131910822427e-06, "loss": 1.1355, "step": 2045 }, { "epoch": 0.277367315122348, "grad_norm": 5.413973619986068, "learning_rate": 1.6959979379517186e-06, "loss": 1.1386, "step": 2046 }, { "epoch": 0.2775028807700129, "grad_norm": 4.041033581856921, "learning_rate": 1.6956825506034863e-06, "loss": 1.1549, "step": 2047 }, { "epoch": 0.27763844641767776, "grad_norm": 4.458054459681945, "learning_rate": 1.6953670290983656e-06, "loss": 1.1376, "step": 2048 }, { "epoch": 0.27777401206534263, "grad_norm": 11.229952333223826, "learning_rate": 1.6950513734972018e-06, "loss": 1.1393, "step": 2049 }, { "epoch": 0.2779095777130075, "grad_norm": 17.75342023322161, "learning_rate": 1.6947355838608672e-06, "loss": 1.1341, "step": 2050 }, { "epoch": 0.27804514336067243, "grad_norm": 5.924990657469735, "learning_rate": 1.6944196602502593e-06, "loss": 1.1732, "step": 2051 }, { "epoch": 0.2781807090083373, "grad_norm": 5.930134071319509, "learning_rate": 1.694103602726301e-06, "loss": 1.1889, "step": 2052 }, { "epoch": 0.27831627465600217, "grad_norm": 15.875979349168404, "learning_rate": 1.6937874113499425e-06, "loss": 1.1415, "step": 2053 }, { "epoch": 0.27845184030366704, "grad_norm": 9.588236286228147, "learning_rate": 1.6934710861821575e-06, "loss": 1.197, "step": 2054 }, { "epoch": 0.2785874059513319, "grad_norm": 13.6297417880229, "learning_rate": 1.6931546272839477e-06, "loss": 1.1682, "step": 2055 }, { "epoch": 0.27872297159899684, "grad_norm": 6.798044642953906, "learning_rate": 1.6928380347163396e-06, "loss": 1.1794, "step": 2056 }, { "epoch": 0.2788585372466617, "grad_norm": 4.609761966460918, "learning_rate": 1.6925213085403849e-06, "loss": 1.1593, "step": 2057 }, { "epoch": 0.2789941028943266, "grad_norm": 5.550860892883201, "learning_rate": 1.6922044488171627e-06, "loss": 1.1682, "step": 2058 }, { "epoch": 0.27912966854199145, "grad_norm": 4.647155544764949, "learning_rate": 1.6918874556077764e-06, "loss": 1.1945, "step": 2059 }, { "epoch": 0.2792652341896563, "grad_norm": 3.8094099126788556, "learning_rate": 1.6915703289733558e-06, "loss": 1.1983, "step": 2060 }, { "epoch": 0.27940079983732125, "grad_norm": 7.6056341224741555, "learning_rate": 1.6912530689750559e-06, "loss": 1.1821, "step": 2061 }, { "epoch": 0.2795363654849861, "grad_norm": 7.602487152195605, "learning_rate": 1.6909356756740586e-06, "loss": 1.1037, "step": 2062 }, { "epoch": 0.279671931132651, "grad_norm": 9.460904092533024, "learning_rate": 1.6906181491315697e-06, "loss": 1.1583, "step": 2063 }, { "epoch": 0.27980749678031586, "grad_norm": 6.276883631888465, "learning_rate": 1.6903004894088223e-06, "loss": 1.1662, "step": 2064 }, { "epoch": 0.2799430624279807, "grad_norm": 7.698354137154887, "learning_rate": 1.6899826965670742e-06, "loss": 1.2077, "step": 2065 }, { "epoch": 0.28007862807564565, "grad_norm": 4.490303252926773, "learning_rate": 1.6896647706676098e-06, "loss": 1.1815, "step": 2066 }, { "epoch": 0.2802141937233105, "grad_norm": 6.017397380421924, "learning_rate": 1.6893467117717383e-06, "loss": 1.1568, "step": 2067 }, { "epoch": 0.2803497593709754, "grad_norm": 8.159875808762642, "learning_rate": 1.6890285199407945e-06, "loss": 1.1902, "step": 2068 }, { "epoch": 0.28048532501864026, "grad_norm": 8.50714530327681, "learning_rate": 1.6887101952361395e-06, "loss": 1.1925, "step": 2069 }, { "epoch": 0.28062089066630513, "grad_norm": 4.204432052847423, "learning_rate": 1.6883917377191602e-06, "loss": 1.1768, "step": 2070 }, { "epoch": 0.28075645631397006, "grad_norm": 3.884598781686882, "learning_rate": 1.6880731474512677e-06, "loss": 1.173, "step": 2071 }, { "epoch": 0.28089202196163493, "grad_norm": 6.58963651101161, "learning_rate": 1.6877544244938998e-06, "loss": 1.1668, "step": 2072 }, { "epoch": 0.2810275876092998, "grad_norm": 4.060562796283964, "learning_rate": 1.6874355689085205e-06, "loss": 1.1662, "step": 2073 }, { "epoch": 0.28116315325696467, "grad_norm": 14.085919156180326, "learning_rate": 1.6871165807566174e-06, "loss": 1.1389, "step": 2074 }, { "epoch": 0.28129871890462954, "grad_norm": 4.721597666511504, "learning_rate": 1.686797460099706e-06, "loss": 1.1339, "step": 2075 }, { "epoch": 0.28143428455229447, "grad_norm": 5.812408178080186, "learning_rate": 1.6864782069993252e-06, "loss": 1.1784, "step": 2076 }, { "epoch": 0.28156985019995934, "grad_norm": 4.267491562353416, "learning_rate": 1.6861588215170413e-06, "loss": 1.1687, "step": 2077 }, { "epoch": 0.2817054158476242, "grad_norm": 5.9142641064756925, "learning_rate": 1.6858393037144447e-06, "loss": 1.1276, "step": 2078 }, { "epoch": 0.2818409814952891, "grad_norm": 6.279567138073819, "learning_rate": 1.6855196536531522e-06, "loss": 1.1922, "step": 2079 }, { "epoch": 0.28197654714295395, "grad_norm": 11.9397339085026, "learning_rate": 1.6851998713948055e-06, "loss": 1.1567, "step": 2080 }, { "epoch": 0.2821121127906189, "grad_norm": 4.5765092190778445, "learning_rate": 1.6848799570010725e-06, "loss": 1.1773, "step": 2081 }, { "epoch": 0.28224767843828374, "grad_norm": 6.395068356365473, "learning_rate": 1.6845599105336456e-06, "loss": 1.1706, "step": 2082 }, { "epoch": 0.2823832440859486, "grad_norm": 4.993913422404261, "learning_rate": 1.6842397320542436e-06, "loss": 1.1563, "step": 2083 }, { "epoch": 0.2825188097336135, "grad_norm": 16.458231984784117, "learning_rate": 1.6839194216246107e-06, "loss": 1.1719, "step": 2084 }, { "epoch": 0.2826543753812784, "grad_norm": 6.28162855472153, "learning_rate": 1.6835989793065152e-06, "loss": 1.1536, "step": 2085 }, { "epoch": 0.2827899410289433, "grad_norm": 5.72503975796939, "learning_rate": 1.683278405161753e-06, "loss": 1.1575, "step": 2086 }, { "epoch": 0.28292550667660815, "grad_norm": 6.639916655120338, "learning_rate": 1.682957699252144e-06, "loss": 1.1251, "step": 2087 }, { "epoch": 0.283061072324273, "grad_norm": 12.412855067290863, "learning_rate": 1.6826368616395331e-06, "loss": 1.153, "step": 2088 }, { "epoch": 0.2831966379719379, "grad_norm": 7.522260123888008, "learning_rate": 1.6823158923857924e-06, "loss": 1.1102, "step": 2089 }, { "epoch": 0.2833322036196028, "grad_norm": 4.700019086219184, "learning_rate": 1.6819947915528173e-06, "loss": 1.1468, "step": 2090 }, { "epoch": 0.2834677692672677, "grad_norm": 4.308357945570108, "learning_rate": 1.6816735592025303e-06, "loss": 1.2169, "step": 2091 }, { "epoch": 0.28360333491493256, "grad_norm": 3.373173083246314, "learning_rate": 1.681352195396878e-06, "loss": 1.1606, "step": 2092 }, { "epoch": 0.28373890056259743, "grad_norm": 4.698644588034405, "learning_rate": 1.681030700197833e-06, "loss": 1.1623, "step": 2093 }, { "epoch": 0.2838744662102623, "grad_norm": 4.885710709065632, "learning_rate": 1.6807090736673932e-06, "loss": 1.2062, "step": 2094 }, { "epoch": 0.2840100318579272, "grad_norm": 4.493766211591332, "learning_rate": 1.6803873158675823e-06, "loss": 1.1312, "step": 2095 }, { "epoch": 0.2841455975055921, "grad_norm": 4.2138630778040005, "learning_rate": 1.6800654268604478e-06, "loss": 1.1311, "step": 2096 }, { "epoch": 0.28428116315325697, "grad_norm": 12.269170037946857, "learning_rate": 1.6797434067080635e-06, "loss": 1.1531, "step": 2097 }, { "epoch": 0.28441672880092184, "grad_norm": 5.3173426788493074, "learning_rate": 1.679421255472529e-06, "loss": 1.1743, "step": 2098 }, { "epoch": 0.2845522944485867, "grad_norm": 7.64291536452136, "learning_rate": 1.6790989732159685e-06, "loss": 1.1382, "step": 2099 }, { "epoch": 0.28468786009625163, "grad_norm": 19.172456416850448, "learning_rate": 1.6787765600005317e-06, "loss": 1.2008, "step": 2100 }, { "epoch": 0.2848234257439165, "grad_norm": 8.936613511796605, "learning_rate": 1.6784540158883928e-06, "loss": 1.1476, "step": 2101 }, { "epoch": 0.2849589913915814, "grad_norm": 14.783457552906064, "learning_rate": 1.6781313409417527e-06, "loss": 1.1518, "step": 2102 }, { "epoch": 0.28509455703924624, "grad_norm": 5.287529364246698, "learning_rate": 1.6778085352228362e-06, "loss": 1.178, "step": 2103 }, { "epoch": 0.2852301226869111, "grad_norm": 4.786635340196638, "learning_rate": 1.6774855987938938e-06, "loss": 1.1739, "step": 2104 }, { "epoch": 0.28536568833457604, "grad_norm": 6.14328805883015, "learning_rate": 1.6771625317172018e-06, "loss": 1.1426, "step": 2105 }, { "epoch": 0.2855012539822409, "grad_norm": 6.675218767457616, "learning_rate": 1.6768393340550607e-06, "loss": 1.1423, "step": 2106 }, { "epoch": 0.2856368196299058, "grad_norm": 4.876105793769702, "learning_rate": 1.6765160058697962e-06, "loss": 1.1643, "step": 2107 }, { "epoch": 0.28577238527757065, "grad_norm": 10.608986120774432, "learning_rate": 1.6761925472237604e-06, "loss": 1.1395, "step": 2108 }, { "epoch": 0.2859079509252355, "grad_norm": 5.19970169448486, "learning_rate": 1.6758689581793295e-06, "loss": 1.1234, "step": 2109 }, { "epoch": 0.28604351657290045, "grad_norm": 4.791292588982854, "learning_rate": 1.675545238798905e-06, "loss": 1.1568, "step": 2110 }, { "epoch": 0.2861790822205653, "grad_norm": 4.394772074083935, "learning_rate": 1.6752213891449134e-06, "loss": 1.1729, "step": 2111 }, { "epoch": 0.2863146478682302, "grad_norm": 6.178645117295335, "learning_rate": 1.674897409279807e-06, "loss": 1.1822, "step": 2112 }, { "epoch": 0.28645021351589506, "grad_norm": 14.191916303631405, "learning_rate": 1.6745732992660622e-06, "loss": 1.1667, "step": 2113 }, { "epoch": 0.28658577916355993, "grad_norm": 3.560556994778068, "learning_rate": 1.6742490591661817e-06, "loss": 1.1821, "step": 2114 }, { "epoch": 0.28672134481122485, "grad_norm": 4.064682573363457, "learning_rate": 1.6739246890426922e-06, "loss": 1.188, "step": 2115 }, { "epoch": 0.2868569104588897, "grad_norm": 16.077701688070764, "learning_rate": 1.673600188958146e-06, "loss": 1.1423, "step": 2116 }, { "epoch": 0.2869924761065546, "grad_norm": 7.83423657913268, "learning_rate": 1.6732755589751208e-06, "loss": 1.1893, "step": 2117 }, { "epoch": 0.28712804175421947, "grad_norm": 6.299553369128615, "learning_rate": 1.6729507991562181e-06, "loss": 1.1603, "step": 2118 }, { "epoch": 0.28726360740188434, "grad_norm": 5.939289352197253, "learning_rate": 1.6726259095640663e-06, "loss": 1.1296, "step": 2119 }, { "epoch": 0.28739917304954926, "grad_norm": 7.100696443759497, "learning_rate": 1.6723008902613168e-06, "loss": 1.156, "step": 2120 }, { "epoch": 0.28753473869721413, "grad_norm": 5.231552798061006, "learning_rate": 1.6719757413106475e-06, "loss": 1.1717, "step": 2121 }, { "epoch": 0.287670304344879, "grad_norm": 4.5585467556915935, "learning_rate": 1.6716504627747608e-06, "loss": 1.1716, "step": 2122 }, { "epoch": 0.2878058699925439, "grad_norm": 11.016685914496861, "learning_rate": 1.6713250547163839e-06, "loss": 1.1534, "step": 2123 }, { "epoch": 0.2879414356402088, "grad_norm": 5.940236756563234, "learning_rate": 1.6709995171982697e-06, "loss": 1.1514, "step": 2124 }, { "epoch": 0.28807700128787367, "grad_norm": 30.503969245671858, "learning_rate": 1.6706738502831948e-06, "loss": 1.1189, "step": 2125 }, { "epoch": 0.28821256693553854, "grad_norm": 5.071825471846679, "learning_rate": 1.6703480540339617e-06, "loss": 1.1513, "step": 2126 }, { "epoch": 0.2883481325832034, "grad_norm": 4.87244975899648, "learning_rate": 1.670022128513398e-06, "loss": 1.1738, "step": 2127 }, { "epoch": 0.2884836982308683, "grad_norm": 9.621863471978239, "learning_rate": 1.6696960737843556e-06, "loss": 1.1261, "step": 2128 }, { "epoch": 0.2886192638785332, "grad_norm": 6.136700366661, "learning_rate": 1.6693698899097117e-06, "loss": 1.1642, "step": 2129 }, { "epoch": 0.2887548295261981, "grad_norm": 4.189370897747243, "learning_rate": 1.6690435769523684e-06, "loss": 1.1729, "step": 2130 }, { "epoch": 0.28889039517386295, "grad_norm": 5.419074226510939, "learning_rate": 1.668717134975252e-06, "loss": 1.1888, "step": 2131 }, { "epoch": 0.2890259608215278, "grad_norm": 4.872036684054624, "learning_rate": 1.668390564041315e-06, "loss": 1.1836, "step": 2132 }, { "epoch": 0.2891615264691927, "grad_norm": 4.8156149331899165, "learning_rate": 1.6680638642135334e-06, "loss": 1.1626, "step": 2133 }, { "epoch": 0.2892970921168576, "grad_norm": 5.935927980952792, "learning_rate": 1.667737035554909e-06, "loss": 1.1789, "step": 2134 }, { "epoch": 0.2894326577645225, "grad_norm": 5.965540233722618, "learning_rate": 1.6674100781284683e-06, "loss": 1.2051, "step": 2135 }, { "epoch": 0.28956822341218735, "grad_norm": 5.586550325139463, "learning_rate": 1.6670829919972622e-06, "loss": 1.1637, "step": 2136 }, { "epoch": 0.2897037890598522, "grad_norm": 7.230886851579167, "learning_rate": 1.6667557772243668e-06, "loss": 1.1629, "step": 2137 }, { "epoch": 0.2898393547075171, "grad_norm": 4.50466907102742, "learning_rate": 1.6664284338728824e-06, "loss": 1.1601, "step": 2138 }, { "epoch": 0.289974920355182, "grad_norm": 7.678360209111348, "learning_rate": 1.6661009620059355e-06, "loss": 1.1358, "step": 2139 }, { "epoch": 0.2901104860028469, "grad_norm": 18.211054210397435, "learning_rate": 1.6657733616866755e-06, "loss": 1.1753, "step": 2140 }, { "epoch": 0.29024605165051176, "grad_norm": 6.22955047660166, "learning_rate": 1.6654456329782783e-06, "loss": 1.1348, "step": 2141 }, { "epoch": 0.29038161729817663, "grad_norm": 4.176498003798748, "learning_rate": 1.6651177759439432e-06, "loss": 1.1395, "step": 2142 }, { "epoch": 0.2905171829458415, "grad_norm": 4.3892896776508605, "learning_rate": 1.6647897906468953e-06, "loss": 1.1407, "step": 2143 }, { "epoch": 0.2906527485935064, "grad_norm": 4.535530549012143, "learning_rate": 1.6644616771503838e-06, "loss": 1.1504, "step": 2144 }, { "epoch": 0.2907883142411713, "grad_norm": 23.06948965353287, "learning_rate": 1.6641334355176827e-06, "loss": 1.1797, "step": 2145 }, { "epoch": 0.29092387988883617, "grad_norm": 4.476221242447249, "learning_rate": 1.6638050658120913e-06, "loss": 1.1976, "step": 2146 }, { "epoch": 0.29105944553650104, "grad_norm": 6.400564760460165, "learning_rate": 1.6634765680969323e-06, "loss": 1.1551, "step": 2147 }, { "epoch": 0.2911950111841659, "grad_norm": 4.971290281563488, "learning_rate": 1.6631479424355548e-06, "loss": 1.15, "step": 2148 }, { "epoch": 0.29133057683183083, "grad_norm": 4.307202258360756, "learning_rate": 1.6628191888913308e-06, "loss": 1.1761, "step": 2149 }, { "epoch": 0.2914661424794957, "grad_norm": 5.298569753704846, "learning_rate": 1.662490307527658e-06, "loss": 1.1645, "step": 2150 }, { "epoch": 0.2916017081271606, "grad_norm": 5.003131260813144, "learning_rate": 1.6621612984079592e-06, "loss": 1.1721, "step": 2151 }, { "epoch": 0.29173727377482545, "grad_norm": 5.678725104663333, "learning_rate": 1.6618321615956808e-06, "loss": 1.1482, "step": 2152 }, { "epoch": 0.2918728394224903, "grad_norm": 7.134654352389106, "learning_rate": 1.661502897154294e-06, "loss": 1.1608, "step": 2153 }, { "epoch": 0.29200840507015524, "grad_norm": 6.75399873386201, "learning_rate": 1.6611735051472948e-06, "loss": 1.1495, "step": 2154 }, { "epoch": 0.2921439707178201, "grad_norm": 6.055625063257547, "learning_rate": 1.6608439856382046e-06, "loss": 1.1325, "step": 2155 }, { "epoch": 0.292279536365485, "grad_norm": 5.268816285917984, "learning_rate": 1.660514338690568e-06, "loss": 1.1606, "step": 2156 }, { "epoch": 0.29241510201314985, "grad_norm": 8.601544124672872, "learning_rate": 1.6601845643679548e-06, "loss": 1.1336, "step": 2157 }, { "epoch": 0.2925506676608147, "grad_norm": 4.394120172663901, "learning_rate": 1.6598546627339598e-06, "loss": 1.1646, "step": 2158 }, { "epoch": 0.29268623330847965, "grad_norm": 5.242577268380131, "learning_rate": 1.6595246338522016e-06, "loss": 1.2007, "step": 2159 }, { "epoch": 0.2928217989561445, "grad_norm": 4.340330098435287, "learning_rate": 1.6591944777863237e-06, "loss": 1.1864, "step": 2160 }, { "epoch": 0.2929573646038094, "grad_norm": 6.851117437718364, "learning_rate": 1.6588641945999937e-06, "loss": 1.1633, "step": 2161 }, { "epoch": 0.29309293025147426, "grad_norm": 5.684803023262613, "learning_rate": 1.658533784356905e-06, "loss": 1.1808, "step": 2162 }, { "epoch": 0.2932284958991392, "grad_norm": 4.659731012404564, "learning_rate": 1.658203247120774e-06, "loss": 1.2378, "step": 2163 }, { "epoch": 0.29336406154680406, "grad_norm": 5.83421515531391, "learning_rate": 1.6578725829553425e-06, "loss": 1.1753, "step": 2164 }, { "epoch": 0.2934996271944689, "grad_norm": 7.38852495478132, "learning_rate": 1.6575417919243765e-06, "loss": 1.1362, "step": 2165 }, { "epoch": 0.2936351928421338, "grad_norm": 7.045849193544536, "learning_rate": 1.6572108740916657e-06, "loss": 1.1548, "step": 2166 }, { "epoch": 0.29377075848979867, "grad_norm": 4.820508470503079, "learning_rate": 1.656879829521026e-06, "loss": 1.1779, "step": 2167 }, { "epoch": 0.2939063241374636, "grad_norm": 5.00019910236338, "learning_rate": 1.656548658276296e-06, "loss": 1.1757, "step": 2168 }, { "epoch": 0.29404188978512846, "grad_norm": 13.610852527809667, "learning_rate": 1.6562173604213396e-06, "loss": 1.1592, "step": 2169 }, { "epoch": 0.29417745543279333, "grad_norm": 4.154346685214313, "learning_rate": 1.6558859360200454e-06, "loss": 1.1697, "step": 2170 }, { "epoch": 0.2943130210804582, "grad_norm": 7.986135433456213, "learning_rate": 1.6555543851363256e-06, "loss": 1.1472, "step": 2171 }, { "epoch": 0.2944485867281231, "grad_norm": 4.007098200913435, "learning_rate": 1.6552227078341171e-06, "loss": 1.1404, "step": 2172 }, { "epoch": 0.294584152375788, "grad_norm": 6.009785578046688, "learning_rate": 1.6548909041773817e-06, "loss": 1.1169, "step": 2173 }, { "epoch": 0.29471971802345287, "grad_norm": 6.086853085321143, "learning_rate": 1.6545589742301048e-06, "loss": 1.1498, "step": 2174 }, { "epoch": 0.29485528367111774, "grad_norm": 4.37809454481342, "learning_rate": 1.6542269180562961e-06, "loss": 1.1826, "step": 2175 }, { "epoch": 0.2949908493187826, "grad_norm": 4.629642088569175, "learning_rate": 1.6538947357199907e-06, "loss": 1.1286, "step": 2176 }, { "epoch": 0.2951264149664475, "grad_norm": 4.829893666162775, "learning_rate": 1.6535624272852471e-06, "loss": 1.1621, "step": 2177 }, { "epoch": 0.2952619806141124, "grad_norm": 4.01727604985848, "learning_rate": 1.653229992816148e-06, "loss": 1.1376, "step": 2178 }, { "epoch": 0.2953975462617773, "grad_norm": 6.541950359900163, "learning_rate": 1.6528974323768016e-06, "loss": 1.1253, "step": 2179 }, { "epoch": 0.29553311190944215, "grad_norm": 8.79069215416369, "learning_rate": 1.6525647460313388e-06, "loss": 1.1488, "step": 2180 }, { "epoch": 0.295668677557107, "grad_norm": 10.85668302927698, "learning_rate": 1.6522319338439156e-06, "loss": 1.1663, "step": 2181 }, { "epoch": 0.2958042432047719, "grad_norm": 13.97677531438202, "learning_rate": 1.6518989958787125e-06, "loss": 1.1516, "step": 2182 }, { "epoch": 0.2959398088524368, "grad_norm": 6.212415666973446, "learning_rate": 1.6515659321999337e-06, "loss": 1.1787, "step": 2183 }, { "epoch": 0.2960753745001017, "grad_norm": 3.834171217866773, "learning_rate": 1.6512327428718082e-06, "loss": 1.1394, "step": 2184 }, { "epoch": 0.29621094014776655, "grad_norm": 5.102971146601417, "learning_rate": 1.6508994279585885e-06, "loss": 1.1205, "step": 2185 }, { "epoch": 0.2963465057954314, "grad_norm": 6.624993220349104, "learning_rate": 1.6505659875245524e-06, "loss": 1.1653, "step": 2186 }, { "epoch": 0.2964820714430963, "grad_norm": 6.431315013241862, "learning_rate": 1.6502324216340004e-06, "loss": 1.1391, "step": 2187 }, { "epoch": 0.2966176370907612, "grad_norm": 10.973256912346288, "learning_rate": 1.6498987303512588e-06, "loss": 1.1547, "step": 2188 }, { "epoch": 0.2967532027384261, "grad_norm": 9.970061626597028, "learning_rate": 1.649564913740677e-06, "loss": 1.1466, "step": 2189 }, { "epoch": 0.29688876838609096, "grad_norm": 5.72562339777151, "learning_rate": 1.6492309718666289e-06, "loss": 1.1832, "step": 2190 }, { "epoch": 0.29702433403375583, "grad_norm": 6.578618500075818, "learning_rate": 1.6488969047935125e-06, "loss": 1.1296, "step": 2191 }, { "epoch": 0.2971598996814207, "grad_norm": 7.277680226591296, "learning_rate": 1.6485627125857504e-06, "loss": 1.1535, "step": 2192 }, { "epoch": 0.29729546532908563, "grad_norm": 6.553685468995894, "learning_rate": 1.6482283953077884e-06, "loss": 1.1414, "step": 2193 }, { "epoch": 0.2974310309767505, "grad_norm": 6.93899863759678, "learning_rate": 1.6478939530240971e-06, "loss": 1.1634, "step": 2194 }, { "epoch": 0.29756659662441537, "grad_norm": 5.895641676155323, "learning_rate": 1.6475593857991714e-06, "loss": 1.1691, "step": 2195 }, { "epoch": 0.29770216227208024, "grad_norm": 13.700950959284043, "learning_rate": 1.6472246936975293e-06, "loss": 1.16, "step": 2196 }, { "epoch": 0.2978377279197451, "grad_norm": 12.804517807978392, "learning_rate": 1.6468898767837142e-06, "loss": 1.1476, "step": 2197 }, { "epoch": 0.29797329356741004, "grad_norm": 7.424979812387173, "learning_rate": 1.6465549351222924e-06, "loss": 1.2267, "step": 2198 }, { "epoch": 0.2981088592150749, "grad_norm": 7.145761406338754, "learning_rate": 1.646219868777855e-06, "loss": 1.1277, "step": 2199 }, { "epoch": 0.2982444248627398, "grad_norm": 19.82113116817483, "learning_rate": 1.645884677815017e-06, "loss": 1.1371, "step": 2200 }, { "epoch": 0.29837999051040465, "grad_norm": 5.0730438284849635, "learning_rate": 1.645549362298417e-06, "loss": 1.1573, "step": 2201 }, { "epoch": 0.2985155561580696, "grad_norm": 7.218502551495568, "learning_rate": 1.6452139222927181e-06, "loss": 1.1823, "step": 2202 }, { "epoch": 0.29865112180573444, "grad_norm": 5.3807800183349075, "learning_rate": 1.6448783578626076e-06, "loss": 1.132, "step": 2203 }, { "epoch": 0.2987866874533993, "grad_norm": 24.9065590683296, "learning_rate": 1.6445426690727959e-06, "loss": 1.1061, "step": 2204 }, { "epoch": 0.2989222531010642, "grad_norm": 15.878574495634497, "learning_rate": 1.6442068559880182e-06, "loss": 1.1524, "step": 2205 }, { "epoch": 0.29905781874872905, "grad_norm": 4.877404841662213, "learning_rate": 1.6438709186730333e-06, "loss": 1.1036, "step": 2206 }, { "epoch": 0.299193384396394, "grad_norm": 5.144933237020157, "learning_rate": 1.6435348571926245e-06, "loss": 1.1446, "step": 2207 }, { "epoch": 0.29932895004405885, "grad_norm": 5.380488415841651, "learning_rate": 1.6431986716115982e-06, "loss": 1.1609, "step": 2208 }, { "epoch": 0.2994645156917237, "grad_norm": 6.4409884106167645, "learning_rate": 1.6428623619947848e-06, "loss": 1.2184, "step": 2209 }, { "epoch": 0.2996000813393886, "grad_norm": 16.100123236662085, "learning_rate": 1.6425259284070395e-06, "loss": 1.1711, "step": 2210 }, { "epoch": 0.29973564698705346, "grad_norm": 4.818307228901024, "learning_rate": 1.6421893709132405e-06, "loss": 1.1237, "step": 2211 }, { "epoch": 0.2998712126347184, "grad_norm": 6.928866170609891, "learning_rate": 1.641852689578291e-06, "loss": 1.1603, "step": 2212 }, { "epoch": 0.30000677828238326, "grad_norm": 9.802529099792816, "learning_rate": 1.6415158844671163e-06, "loss": 1.1784, "step": 2213 }, { "epoch": 0.3001423439300481, "grad_norm": 6.0335970132381425, "learning_rate": 1.6411789556446673e-06, "loss": 1.1339, "step": 2214 }, { "epoch": 0.300277909577713, "grad_norm": 7.751169480491906, "learning_rate": 1.640841903175918e-06, "loss": 1.1749, "step": 2215 }, { "epoch": 0.30041347522537787, "grad_norm": 7.422713356420624, "learning_rate": 1.640504727125866e-06, "loss": 1.1427, "step": 2216 }, { "epoch": 0.3005490408730428, "grad_norm": 4.8990753104290095, "learning_rate": 1.640167427559533e-06, "loss": 1.1994, "step": 2217 }, { "epoch": 0.30068460652070766, "grad_norm": 6.079052905330586, "learning_rate": 1.639830004541965e-06, "loss": 1.1343, "step": 2218 }, { "epoch": 0.30082017216837253, "grad_norm": 6.070420613189945, "learning_rate": 1.6394924581382312e-06, "loss": 1.174, "step": 2219 }, { "epoch": 0.3009557378160374, "grad_norm": 6.439154558049675, "learning_rate": 1.6391547884134247e-06, "loss": 1.1772, "step": 2220 }, { "epoch": 0.3010913034637023, "grad_norm": 21.95645908116363, "learning_rate": 1.6388169954326623e-06, "loss": 1.1487, "step": 2221 }, { "epoch": 0.3012268691113672, "grad_norm": 10.31301166414198, "learning_rate": 1.6384790792610849e-06, "loss": 1.1751, "step": 2222 }, { "epoch": 0.30136243475903207, "grad_norm": 5.0635819415037515, "learning_rate": 1.6381410399638571e-06, "loss": 1.1389, "step": 2223 }, { "epoch": 0.30149800040669694, "grad_norm": 10.116364087741553, "learning_rate": 1.6378028776061666e-06, "loss": 1.1912, "step": 2224 }, { "epoch": 0.3016335660543618, "grad_norm": 4.678327914972854, "learning_rate": 1.6374645922532257e-06, "loss": 1.1334, "step": 2225 }, { "epoch": 0.3017691317020267, "grad_norm": 7.331642171953128, "learning_rate": 1.63712618397027e-06, "loss": 1.1685, "step": 2226 }, { "epoch": 0.3019046973496916, "grad_norm": 11.203332912426289, "learning_rate": 1.636787652822559e-06, "loss": 1.1418, "step": 2227 }, { "epoch": 0.3020402629973565, "grad_norm": 6.211673234581092, "learning_rate": 1.6364489988753757e-06, "loss": 1.1437, "step": 2228 }, { "epoch": 0.30217582864502135, "grad_norm": 5.429440454322014, "learning_rate": 1.6361102221940268e-06, "loss": 1.1817, "step": 2229 }, { "epoch": 0.3023113942926862, "grad_norm": 6.174073517342776, "learning_rate": 1.6357713228438428e-06, "loss": 1.1056, "step": 2230 }, { "epoch": 0.3024469599403511, "grad_norm": 5.0157238480214215, "learning_rate": 1.6354323008901773e-06, "loss": 1.1331, "step": 2231 }, { "epoch": 0.302582525588016, "grad_norm": 19.72303629470108, "learning_rate": 1.6350931563984087e-06, "loss": 1.1523, "step": 2232 }, { "epoch": 0.3027180912356809, "grad_norm": 7.6432305385498625, "learning_rate": 1.6347538894339379e-06, "loss": 1.1444, "step": 2233 }, { "epoch": 0.30285365688334576, "grad_norm": 4.692848272685213, "learning_rate": 1.6344145000621898e-06, "loss": 1.1286, "step": 2234 }, { "epoch": 0.3029892225310106, "grad_norm": 9.066940583043701, "learning_rate": 1.6340749883486136e-06, "loss": 1.1916, "step": 2235 }, { "epoch": 0.3031247881786755, "grad_norm": 4.234465743138555, "learning_rate": 1.6337353543586808e-06, "loss": 1.1692, "step": 2236 }, { "epoch": 0.3032603538263404, "grad_norm": 5.754596630865374, "learning_rate": 1.6333955981578868e-06, "loss": 1.1761, "step": 2237 }, { "epoch": 0.3033959194740053, "grad_norm": 14.586714373778108, "learning_rate": 1.633055719811752e-06, "loss": 1.1678, "step": 2238 }, { "epoch": 0.30353148512167016, "grad_norm": 4.517011750002433, "learning_rate": 1.6327157193858182e-06, "loss": 1.2004, "step": 2239 }, { "epoch": 0.30366705076933503, "grad_norm": 5.167335238569452, "learning_rate": 1.6323755969456526e-06, "loss": 1.1481, "step": 2240 }, { "epoch": 0.30380261641699996, "grad_norm": 4.301304761294314, "learning_rate": 1.6320353525568447e-06, "loss": 1.1305, "step": 2241 }, { "epoch": 0.30393818206466483, "grad_norm": 5.355941295643989, "learning_rate": 1.6316949862850082e-06, "loss": 1.1395, "step": 2242 }, { "epoch": 0.3040737477123297, "grad_norm": 5.532165899148368, "learning_rate": 1.6313544981957797e-06, "loss": 1.1835, "step": 2243 }, { "epoch": 0.30420931335999457, "grad_norm": 7.875773792670472, "learning_rate": 1.6310138883548199e-06, "loss": 1.1408, "step": 2244 }, { "epoch": 0.30434487900765944, "grad_norm": 8.067182107914684, "learning_rate": 1.6306731568278126e-06, "loss": 1.1473, "step": 2245 }, { "epoch": 0.30448044465532437, "grad_norm": 5.460956627881876, "learning_rate": 1.6303323036804652e-06, "loss": 1.1988, "step": 2246 }, { "epoch": 0.30461601030298924, "grad_norm": 5.237018352884611, "learning_rate": 1.6299913289785087e-06, "loss": 1.1257, "step": 2247 }, { "epoch": 0.3047515759506541, "grad_norm": 5.863448789909375, "learning_rate": 1.6296502327876974e-06, "loss": 1.1745, "step": 2248 }, { "epoch": 0.304887141598319, "grad_norm": 6.541345959673799, "learning_rate": 1.6293090151738086e-06, "loss": 1.1446, "step": 2249 }, { "epoch": 0.30502270724598385, "grad_norm": 5.245263184981238, "learning_rate": 1.6289676762026438e-06, "loss": 1.1297, "step": 2250 }, { "epoch": 0.3051582728936488, "grad_norm": 17.436714773676908, "learning_rate": 1.6286262159400275e-06, "loss": 1.1557, "step": 2251 }, { "epoch": 0.30529383854131364, "grad_norm": 6.076669020067402, "learning_rate": 1.6282846344518073e-06, "loss": 1.173, "step": 2252 }, { "epoch": 0.3054294041889785, "grad_norm": 9.630013007708575, "learning_rate": 1.627942931803855e-06, "loss": 1.1554, "step": 2253 }, { "epoch": 0.3055649698366434, "grad_norm": 18.470354196688692, "learning_rate": 1.627601108062065e-06, "loss": 1.156, "step": 2254 }, { "epoch": 0.30570053548430826, "grad_norm": 5.253712176552862, "learning_rate": 1.6272591632923548e-06, "loss": 1.1625, "step": 2255 }, { "epoch": 0.3058361011319732, "grad_norm": 4.856943218719029, "learning_rate": 1.6269170975606665e-06, "loss": 1.1271, "step": 2256 }, { "epoch": 0.30597166677963805, "grad_norm": 8.237438726489666, "learning_rate": 1.6265749109329647e-06, "loss": 1.1722, "step": 2257 }, { "epoch": 0.3061072324273029, "grad_norm": 15.222925919942357, "learning_rate": 1.6262326034752371e-06, "loss": 1.1246, "step": 2258 }, { "epoch": 0.3062427980749678, "grad_norm": 3.9574376139410714, "learning_rate": 1.6258901752534947e-06, "loss": 1.1828, "step": 2259 }, { "epoch": 0.30637836372263266, "grad_norm": 7.975945975003511, "learning_rate": 1.625547626333773e-06, "loss": 1.175, "step": 2260 }, { "epoch": 0.3065139293702976, "grad_norm": 11.092833372906323, "learning_rate": 1.6252049567821294e-06, "loss": 1.1796, "step": 2261 }, { "epoch": 0.30664949501796246, "grad_norm": 7.158295994698185, "learning_rate": 1.6248621666646448e-06, "loss": 1.1553, "step": 2262 }, { "epoch": 0.30678506066562733, "grad_norm": 5.001283175272252, "learning_rate": 1.6245192560474237e-06, "loss": 1.1222, "step": 2263 }, { "epoch": 0.3069206263132922, "grad_norm": 11.348276851816653, "learning_rate": 1.6241762249965935e-06, "loss": 1.1481, "step": 2264 }, { "epoch": 0.30705619196095707, "grad_norm": 6.353186396382458, "learning_rate": 1.6238330735783054e-06, "loss": 1.1235, "step": 2265 }, { "epoch": 0.307191757608622, "grad_norm": 6.761875365640253, "learning_rate": 1.6234898018587336e-06, "loss": 1.1565, "step": 2266 }, { "epoch": 0.30732732325628687, "grad_norm": 5.1082695600624834, "learning_rate": 1.6231464099040748e-06, "loss": 1.1299, "step": 2267 }, { "epoch": 0.30746288890395174, "grad_norm": 5.459744541467046, "learning_rate": 1.6228028977805495e-06, "loss": 1.1608, "step": 2268 }, { "epoch": 0.3075984545516166, "grad_norm": 5.109255378579173, "learning_rate": 1.6224592655544016e-06, "loss": 1.1611, "step": 2269 }, { "epoch": 0.3077340201992815, "grad_norm": 5.075403067269792, "learning_rate": 1.6221155132918979e-06, "loss": 1.1865, "step": 2270 }, { "epoch": 0.3078695858469464, "grad_norm": 5.272360640956592, "learning_rate": 1.6217716410593281e-06, "loss": 1.1552, "step": 2271 }, { "epoch": 0.3080051514946113, "grad_norm": 6.427339996151642, "learning_rate": 1.621427648923005e-06, "loss": 1.1591, "step": 2272 }, { "epoch": 0.30814071714227614, "grad_norm": 5.157539139427162, "learning_rate": 1.6210835369492652e-06, "loss": 1.1568, "step": 2273 }, { "epoch": 0.308276282789941, "grad_norm": 5.370655012038446, "learning_rate": 1.6207393052044678e-06, "loss": 1.1638, "step": 2274 }, { "epoch": 0.3084118484376059, "grad_norm": 6.028687010788074, "learning_rate": 1.6203949537549954e-06, "loss": 1.1564, "step": 2275 }, { "epoch": 0.3085474140852708, "grad_norm": 6.113349688439236, "learning_rate": 1.6200504826672533e-06, "loss": 1.1548, "step": 2276 }, { "epoch": 0.3086829797329357, "grad_norm": 5.404182524638724, "learning_rate": 1.6197058920076696e-06, "loss": 1.171, "step": 2277 }, { "epoch": 0.30881854538060055, "grad_norm": 7.597969846857142, "learning_rate": 1.6193611818426968e-06, "loss": 1.1732, "step": 2278 }, { "epoch": 0.3089541110282654, "grad_norm": 14.22980529978129, "learning_rate": 1.6190163522388088e-06, "loss": 1.1499, "step": 2279 }, { "epoch": 0.3090896766759303, "grad_norm": 5.564736815678661, "learning_rate": 1.6186714032625033e-06, "loss": 1.1074, "step": 2280 }, { "epoch": 0.3092252423235952, "grad_norm": 6.548841385710263, "learning_rate": 1.6183263349803014e-06, "loss": 1.1829, "step": 2281 }, { "epoch": 0.3093608079712601, "grad_norm": 5.959091902144806, "learning_rate": 1.6179811474587464e-06, "loss": 1.1721, "step": 2282 }, { "epoch": 0.30949637361892496, "grad_norm": 6.316370456731391, "learning_rate": 1.6176358407644055e-06, "loss": 1.1615, "step": 2283 }, { "epoch": 0.30963193926658983, "grad_norm": 6.441495152152914, "learning_rate": 1.6172904149638677e-06, "loss": 1.1982, "step": 2284 }, { "epoch": 0.30976750491425475, "grad_norm": 8.284641579387008, "learning_rate": 1.616944870123746e-06, "loss": 1.167, "step": 2285 }, { "epoch": 0.3099030705619196, "grad_norm": 6.817487962063171, "learning_rate": 1.616599206310676e-06, "loss": 1.1587, "step": 2286 }, { "epoch": 0.3100386362095845, "grad_norm": 7.450403291079676, "learning_rate": 1.616253423591316e-06, "loss": 1.1629, "step": 2287 }, { "epoch": 0.31017420185724937, "grad_norm": 9.362692939664205, "learning_rate": 1.6159075220323482e-06, "loss": 1.1545, "step": 2288 }, { "epoch": 0.31030976750491424, "grad_norm": 4.6263014478481965, "learning_rate": 1.6155615017004762e-06, "loss": 1.1395, "step": 2289 }, { "epoch": 0.31044533315257916, "grad_norm": 6.324334173700592, "learning_rate": 1.6152153626624275e-06, "loss": 1.1669, "step": 2290 }, { "epoch": 0.31058089880024403, "grad_norm": 5.999027582518052, "learning_rate": 1.6148691049849523e-06, "loss": 1.1274, "step": 2291 }, { "epoch": 0.3107164644479089, "grad_norm": 10.375852604565956, "learning_rate": 1.6145227287348238e-06, "loss": 1.1773, "step": 2292 }, { "epoch": 0.3108520300955738, "grad_norm": 100.88335889274927, "learning_rate": 1.6141762339788376e-06, "loss": 1.1547, "step": 2293 }, { "epoch": 0.31098759574323864, "grad_norm": 8.886339671528946, "learning_rate": 1.6138296207838127e-06, "loss": 1.1814, "step": 2294 }, { "epoch": 0.31112316139090357, "grad_norm": 15.342083408516748, "learning_rate": 1.6134828892165907e-06, "loss": 1.1607, "step": 2295 }, { "epoch": 0.31125872703856844, "grad_norm": 10.12954496560072, "learning_rate": 1.6131360393440362e-06, "loss": 1.1212, "step": 2296 }, { "epoch": 0.3113942926862333, "grad_norm": 32.27591491034021, "learning_rate": 1.6127890712330364e-06, "loss": 1.1458, "step": 2297 }, { "epoch": 0.3115298583338982, "grad_norm": 5.285606705431216, "learning_rate": 1.6124419849505013e-06, "loss": 1.1289, "step": 2298 }, { "epoch": 0.31166542398156305, "grad_norm": 6.227691054552524, "learning_rate": 1.6120947805633636e-06, "loss": 1.1651, "step": 2299 }, { "epoch": 0.311800989629228, "grad_norm": 7.035881292026058, "learning_rate": 1.6117474581385788e-06, "loss": 1.1331, "step": 2300 }, { "epoch": 0.31193655527689285, "grad_norm": 5.164043098487097, "learning_rate": 1.611400017743126e-06, "loss": 1.1457, "step": 2301 }, { "epoch": 0.3120721209245577, "grad_norm": 9.559791783940355, "learning_rate": 1.6110524594440055e-06, "loss": 1.1245, "step": 2302 }, { "epoch": 0.3122076865722226, "grad_norm": 6.102078359685353, "learning_rate": 1.6107047833082418e-06, "loss": 1.1267, "step": 2303 }, { "epoch": 0.31234325221988746, "grad_norm": 9.999250709016211, "learning_rate": 1.6103569894028813e-06, "loss": 1.1673, "step": 2304 }, { "epoch": 0.3124788178675524, "grad_norm": 6.7794112425180355, "learning_rate": 1.6100090777949928e-06, "loss": 1.1212, "step": 2305 }, { "epoch": 0.31261438351521725, "grad_norm": 5.135465294039806, "learning_rate": 1.6096610485516693e-06, "loss": 1.1456, "step": 2306 }, { "epoch": 0.3127499491628821, "grad_norm": 8.289442102121333, "learning_rate": 1.6093129017400248e-06, "loss": 1.176, "step": 2307 }, { "epoch": 0.312885514810547, "grad_norm": 5.097945184580306, "learning_rate": 1.6089646374271965e-06, "loss": 1.175, "step": 2308 }, { "epoch": 0.31302108045821186, "grad_norm": 6.400285260965961, "learning_rate": 1.6086162556803453e-06, "loss": 1.1324, "step": 2309 }, { "epoch": 0.3131566461058768, "grad_norm": 7.487141595899564, "learning_rate": 1.608267756566653e-06, "loss": 1.1523, "step": 2310 }, { "epoch": 0.31329221175354166, "grad_norm": 7.9525856389891905, "learning_rate": 1.607919140153325e-06, "loss": 1.1487, "step": 2311 }, { "epoch": 0.31342777740120653, "grad_norm": 6.250866471072889, "learning_rate": 1.6075704065075897e-06, "loss": 1.1743, "step": 2312 }, { "epoch": 0.3135633430488714, "grad_norm": 7.433990534632403, "learning_rate": 1.6072215556966975e-06, "loss": 1.1844, "step": 2313 }, { "epoch": 0.31369890869653627, "grad_norm": 6.930758317203032, "learning_rate": 1.6068725877879213e-06, "loss": 1.1341, "step": 2314 }, { "epoch": 0.3138344743442012, "grad_norm": 5.441780727297386, "learning_rate": 1.6065235028485567e-06, "loss": 1.1278, "step": 2315 }, { "epoch": 0.31397003999186607, "grad_norm": 5.925019793230982, "learning_rate": 1.6061743009459225e-06, "loss": 1.1641, "step": 2316 }, { "epoch": 0.31410560563953094, "grad_norm": 7.240936370132488, "learning_rate": 1.605824982147359e-06, "loss": 1.151, "step": 2317 }, { "epoch": 0.3142411712871958, "grad_norm": 35.26453745394606, "learning_rate": 1.6054755465202296e-06, "loss": 1.1313, "step": 2318 }, { "epoch": 0.3143767369348607, "grad_norm": 9.051348937569697, "learning_rate": 1.6051259941319209e-06, "loss": 1.1449, "step": 2319 }, { "epoch": 0.3145123025825256, "grad_norm": 20.47765836204973, "learning_rate": 1.6047763250498405e-06, "loss": 1.1792, "step": 2320 }, { "epoch": 0.3146478682301905, "grad_norm": 19.786301495791236, "learning_rate": 1.6044265393414196e-06, "loss": 1.1982, "step": 2321 }, { "epoch": 0.31478343387785535, "grad_norm": 7.808578226199561, "learning_rate": 1.6040766370741117e-06, "loss": 1.1582, "step": 2322 }, { "epoch": 0.3149189995255202, "grad_norm": 6.024940949277665, "learning_rate": 1.6037266183153925e-06, "loss": 1.1748, "step": 2323 }, { "epoch": 0.31505456517318514, "grad_norm": 6.958840810293465, "learning_rate": 1.6033764831327607e-06, "loss": 1.1858, "step": 2324 }, { "epoch": 0.31519013082085, "grad_norm": 8.975884942673318, "learning_rate": 1.6030262315937368e-06, "loss": 1.2068, "step": 2325 }, { "epoch": 0.3153256964685149, "grad_norm": 5.467788674142456, "learning_rate": 1.6026758637658642e-06, "loss": 1.1723, "step": 2326 }, { "epoch": 0.31546126211617975, "grad_norm": 4.8431423729956755, "learning_rate": 1.6023253797167084e-06, "loss": 1.1634, "step": 2327 }, { "epoch": 0.3155968277638446, "grad_norm": 8.402301689602178, "learning_rate": 1.6019747795138576e-06, "loss": 1.147, "step": 2328 }, { "epoch": 0.31573239341150955, "grad_norm": 5.520517847480303, "learning_rate": 1.6016240632249222e-06, "loss": 1.1684, "step": 2329 }, { "epoch": 0.3158679590591744, "grad_norm": 9.483796296478346, "learning_rate": 1.6012732309175356e-06, "loss": 1.1644, "step": 2330 }, { "epoch": 0.3160035247068393, "grad_norm": 9.900108587346189, "learning_rate": 1.600922282659352e-06, "loss": 1.12, "step": 2331 }, { "epoch": 0.31613909035450416, "grad_norm": 10.084007104145757, "learning_rate": 1.60057121851805e-06, "loss": 1.127, "step": 2332 }, { "epoch": 0.31627465600216903, "grad_norm": 7.039415872256219, "learning_rate": 1.600220038561329e-06, "loss": 1.1661, "step": 2333 }, { "epoch": 0.31641022164983396, "grad_norm": 7.96072156389838, "learning_rate": 1.5998687428569113e-06, "loss": 1.1466, "step": 2334 }, { "epoch": 0.3165457872974988, "grad_norm": 8.58646392747167, "learning_rate": 1.5995173314725419e-06, "loss": 1.137, "step": 2335 }, { "epoch": 0.3166813529451637, "grad_norm": 16.47676048763891, "learning_rate": 1.5991658044759871e-06, "loss": 1.1483, "step": 2336 }, { "epoch": 0.31681691859282857, "grad_norm": 7.182369997760658, "learning_rate": 1.5988141619350363e-06, "loss": 1.1467, "step": 2337 }, { "epoch": 0.31695248424049344, "grad_norm": 11.108016683320145, "learning_rate": 1.5984624039175016e-06, "loss": 1.1995, "step": 2338 }, { "epoch": 0.31708804988815836, "grad_norm": 7.744033246968487, "learning_rate": 1.5981105304912159e-06, "loss": 1.1528, "step": 2339 }, { "epoch": 0.31722361553582323, "grad_norm": 11.271034896690738, "learning_rate": 1.5977585417240358e-06, "loss": 1.1231, "step": 2340 }, { "epoch": 0.3173591811834881, "grad_norm": 7.716382991650463, "learning_rate": 1.5974064376838392e-06, "loss": 1.1266, "step": 2341 }, { "epoch": 0.317494746831153, "grad_norm": 5.537328861745189, "learning_rate": 1.5970542184385268e-06, "loss": 1.1548, "step": 2342 }, { "epoch": 0.31763031247881784, "grad_norm": 9.068363004866185, "learning_rate": 1.5967018840560212e-06, "loss": 1.1444, "step": 2343 }, { "epoch": 0.31776587812648277, "grad_norm": 19.89143209450314, "learning_rate": 1.5963494346042674e-06, "loss": 1.1589, "step": 2344 }, { "epoch": 0.31790144377414764, "grad_norm": 6.795373204955472, "learning_rate": 1.5959968701512326e-06, "loss": 1.153, "step": 2345 }, { "epoch": 0.3180370094218125, "grad_norm": 8.949609009052748, "learning_rate": 1.5956441907649057e-06, "loss": 1.1398, "step": 2346 }, { "epoch": 0.3181725750694774, "grad_norm": 6.578972550730891, "learning_rate": 1.595291396513298e-06, "loss": 1.1793, "step": 2347 }, { "epoch": 0.31830814071714225, "grad_norm": 8.052925338240254, "learning_rate": 1.594938487464444e-06, "loss": 1.1564, "step": 2348 }, { "epoch": 0.3184437063648072, "grad_norm": 7.027270516518078, "learning_rate": 1.5945854636863987e-06, "loss": 1.1817, "step": 2349 }, { "epoch": 0.31857927201247205, "grad_norm": 5.287122275082638, "learning_rate": 1.59423232524724e-06, "loss": 1.1904, "step": 2350 }, { "epoch": 0.3187148376601369, "grad_norm": 5.839591146845545, "learning_rate": 1.593879072215068e-06, "loss": 1.1621, "step": 2351 }, { "epoch": 0.3188504033078018, "grad_norm": 6.595499709905762, "learning_rate": 1.5935257046580048e-06, "loss": 1.1492, "step": 2352 }, { "epoch": 0.31898596895546666, "grad_norm": 15.517623797580557, "learning_rate": 1.5931722226441945e-06, "loss": 1.1591, "step": 2353 }, { "epoch": 0.3191215346031316, "grad_norm": 7.5422497680003335, "learning_rate": 1.5928186262418032e-06, "loss": 1.158, "step": 2354 }, { "epoch": 0.31925710025079646, "grad_norm": 8.359305511840878, "learning_rate": 1.5924649155190191e-06, "loss": 1.1627, "step": 2355 }, { "epoch": 0.3193926658984613, "grad_norm": 7.904183647017699, "learning_rate": 1.5921110905440526e-06, "loss": 1.1815, "step": 2356 }, { "epoch": 0.3195282315461262, "grad_norm": 9.2743628056264, "learning_rate": 1.5917571513851364e-06, "loss": 1.1638, "step": 2357 }, { "epoch": 0.31966379719379107, "grad_norm": 7.4568313182619566, "learning_rate": 1.5914030981105246e-06, "loss": 1.143, "step": 2358 }, { "epoch": 0.319799362841456, "grad_norm": 10.647083822109366, "learning_rate": 1.5910489307884936e-06, "loss": 1.1209, "step": 2359 }, { "epoch": 0.31993492848912086, "grad_norm": 6.748065848626154, "learning_rate": 1.5906946494873415e-06, "loss": 1.1595, "step": 2360 }, { "epoch": 0.32007049413678573, "grad_norm": 7.662663499135054, "learning_rate": 1.590340254275389e-06, "loss": 1.1939, "step": 2361 }, { "epoch": 0.3202060597844506, "grad_norm": 10.125856313163657, "learning_rate": 1.5899857452209787e-06, "loss": 1.1368, "step": 2362 }, { "epoch": 0.32034162543211553, "grad_norm": 6.398094949548803, "learning_rate": 1.589631122392474e-06, "loss": 1.1531, "step": 2363 }, { "epoch": 0.3204771910797804, "grad_norm": 5.600570512869235, "learning_rate": 1.5892763858582618e-06, "loss": 1.1494, "step": 2364 }, { "epoch": 0.32061275672744527, "grad_norm": 8.979111443044216, "learning_rate": 1.58892153568675e-06, "loss": 1.1151, "step": 2365 }, { "epoch": 0.32074832237511014, "grad_norm": 5.883102392220999, "learning_rate": 1.588566571946369e-06, "loss": 1.1446, "step": 2366 }, { "epoch": 0.320883888022775, "grad_norm": 8.262628669234182, "learning_rate": 1.58821149470557e-06, "loss": 1.1388, "step": 2367 }, { "epoch": 0.32101945367043994, "grad_norm": 18.847118445008643, "learning_rate": 1.5878563040328276e-06, "loss": 1.1499, "step": 2368 }, { "epoch": 0.3211550193181048, "grad_norm": 11.0331633098778, "learning_rate": 1.5875009999966371e-06, "loss": 1.1597, "step": 2369 }, { "epoch": 0.3212905849657697, "grad_norm": 8.341071074304525, "learning_rate": 1.5871455826655163e-06, "loss": 1.1314, "step": 2370 }, { "epoch": 0.32142615061343455, "grad_norm": 5.353143639227528, "learning_rate": 1.5867900521080044e-06, "loss": 1.1354, "step": 2371 }, { "epoch": 0.3215617162610994, "grad_norm": 18.551850467615367, "learning_rate": 1.586434408392663e-06, "loss": 1.1646, "step": 2372 }, { "epoch": 0.32169728190876434, "grad_norm": 5.915244240181741, "learning_rate": 1.5860786515880745e-06, "loss": 1.1295, "step": 2373 }, { "epoch": 0.3218328475564292, "grad_norm": 6.687923542010782, "learning_rate": 1.5857227817628447e-06, "loss": 1.1691, "step": 2374 }, { "epoch": 0.3219684132040941, "grad_norm": 9.274674527107084, "learning_rate": 1.5853667989855999e-06, "loss": 1.125, "step": 2375 }, { "epoch": 0.32210397885175895, "grad_norm": 5.010155230029513, "learning_rate": 1.5850107033249884e-06, "loss": 1.1978, "step": 2376 }, { "epoch": 0.3222395444994238, "grad_norm": 4.888639732978304, "learning_rate": 1.5846544948496807e-06, "loss": 1.1169, "step": 2377 }, { "epoch": 0.32237511014708875, "grad_norm": 5.886201988039462, "learning_rate": 1.5842981736283685e-06, "loss": 1.1537, "step": 2378 }, { "epoch": 0.3225106757947536, "grad_norm": 7.150867737786985, "learning_rate": 1.5839417397297656e-06, "loss": 1.1424, "step": 2379 }, { "epoch": 0.3226462414424185, "grad_norm": 7.614120145745282, "learning_rate": 1.5835851932226074e-06, "loss": 1.2154, "step": 2380 }, { "epoch": 0.32278180709008336, "grad_norm": 5.651965479764882, "learning_rate": 1.5832285341756517e-06, "loss": 1.1418, "step": 2381 }, { "epoch": 0.32291737273774823, "grad_norm": 17.685353732015816, "learning_rate": 1.5828717626576766e-06, "loss": 1.1279, "step": 2382 }, { "epoch": 0.32305293838541316, "grad_norm": 5.689408497098716, "learning_rate": 1.582514878737483e-06, "loss": 1.1457, "step": 2383 }, { "epoch": 0.32318850403307803, "grad_norm": 8.221241674641938, "learning_rate": 1.5821578824838932e-06, "loss": 1.1362, "step": 2384 }, { "epoch": 0.3233240696807429, "grad_norm": 7.373322962459309, "learning_rate": 1.5818007739657512e-06, "loss": 1.1802, "step": 2385 }, { "epoch": 0.32345963532840777, "grad_norm": 5.028375059592231, "learning_rate": 1.5814435532519221e-06, "loss": 1.1445, "step": 2386 }, { "epoch": 0.32359520097607264, "grad_norm": 7.266194133662393, "learning_rate": 1.5810862204112933e-06, "loss": 1.1491, "step": 2387 }, { "epoch": 0.32373076662373756, "grad_norm": 17.09181006268, "learning_rate": 1.580728775512774e-06, "loss": 1.1417, "step": 2388 }, { "epoch": 0.32386633227140244, "grad_norm": 8.204988675754553, "learning_rate": 1.5803712186252943e-06, "loss": 1.1528, "step": 2389 }, { "epoch": 0.3240018979190673, "grad_norm": 5.295442884988452, "learning_rate": 1.5800135498178065e-06, "loss": 1.1729, "step": 2390 }, { "epoch": 0.3241374635667322, "grad_norm": 6.825970523605419, "learning_rate": 1.5796557691592835e-06, "loss": 1.1738, "step": 2391 }, { "epoch": 0.32427302921439705, "grad_norm": 8.393274918128142, "learning_rate": 1.579297876718721e-06, "loss": 1.1448, "step": 2392 }, { "epoch": 0.32440859486206197, "grad_norm": 6.45756526437617, "learning_rate": 1.5789398725651358e-06, "loss": 1.1282, "step": 2393 }, { "epoch": 0.32454416050972684, "grad_norm": 6.139868865579417, "learning_rate": 1.5785817567675661e-06, "loss": 1.1511, "step": 2394 }, { "epoch": 0.3246797261573917, "grad_norm": 6.627050328994415, "learning_rate": 1.5782235293950717e-06, "loss": 1.1355, "step": 2395 }, { "epoch": 0.3248152918050566, "grad_norm": 5.987854878560808, "learning_rate": 1.5778651905167334e-06, "loss": 1.1183, "step": 2396 }, { "epoch": 0.32495085745272145, "grad_norm": 13.418160196610913, "learning_rate": 1.577506740201655e-06, "loss": 1.1319, "step": 2397 }, { "epoch": 0.3250864231003864, "grad_norm": 4.746533772507595, "learning_rate": 1.5771481785189601e-06, "loss": 1.0848, "step": 2398 }, { "epoch": 0.32522198874805125, "grad_norm": 19.19545286667859, "learning_rate": 1.5767895055377948e-06, "loss": 1.1755, "step": 2399 }, { "epoch": 0.3253575543957161, "grad_norm": 7.44903651014597, "learning_rate": 1.5764307213273264e-06, "loss": 1.1687, "step": 2400 }, { "epoch": 0.325493120043381, "grad_norm": 5.68653486397219, "learning_rate": 1.5760718259567432e-06, "loss": 1.1764, "step": 2401 }, { "epoch": 0.3256286856910459, "grad_norm": 5.391766470291313, "learning_rate": 1.5757128194952557e-06, "loss": 1.1292, "step": 2402 }, { "epoch": 0.3257642513387108, "grad_norm": 15.8351549990908, "learning_rate": 1.5753537020120952e-06, "loss": 1.1735, "step": 2403 }, { "epoch": 0.32589981698637566, "grad_norm": 7.012508769792035, "learning_rate": 1.5749944735765153e-06, "loss": 1.1619, "step": 2404 }, { "epoch": 0.3260353826340405, "grad_norm": 8.345234109905523, "learning_rate": 1.5746351342577895e-06, "loss": 1.1659, "step": 2405 }, { "epoch": 0.3261709482817054, "grad_norm": 28.852449720905962, "learning_rate": 1.5742756841252143e-06, "loss": 1.1525, "step": 2406 }, { "epoch": 0.3263065139293703, "grad_norm": 5.81698283683956, "learning_rate": 1.573916123248106e-06, "loss": 1.1469, "step": 2407 }, { "epoch": 0.3264420795770352, "grad_norm": 11.893336686288622, "learning_rate": 1.5735564516958039e-06, "loss": 1.1542, "step": 2408 }, { "epoch": 0.32657764522470006, "grad_norm": 7.9219099398019175, "learning_rate": 1.5731966695376672e-06, "loss": 1.1439, "step": 2409 }, { "epoch": 0.32671321087236493, "grad_norm": 6.363202193813446, "learning_rate": 1.5728367768430775e-06, "loss": 1.1155, "step": 2410 }, { "epoch": 0.3268487765200298, "grad_norm": 5.552613355405117, "learning_rate": 1.572476773681437e-06, "loss": 1.1539, "step": 2411 }, { "epoch": 0.32698434216769473, "grad_norm": 9.897256806727018, "learning_rate": 1.5721166601221695e-06, "loss": 1.1602, "step": 2412 }, { "epoch": 0.3271199078153596, "grad_norm": 7.503770151473625, "learning_rate": 1.5717564362347203e-06, "loss": 1.1654, "step": 2413 }, { "epoch": 0.32725547346302447, "grad_norm": 8.417805511025401, "learning_rate": 1.5713961020885553e-06, "loss": 1.1465, "step": 2414 }, { "epoch": 0.32739103911068934, "grad_norm": 12.094511107072117, "learning_rate": 1.5710356577531628e-06, "loss": 1.1299, "step": 2415 }, { "epoch": 0.3275266047583542, "grad_norm": 6.991791236681501, "learning_rate": 1.5706751032980506e-06, "loss": 1.1666, "step": 2416 }, { "epoch": 0.32766217040601914, "grad_norm": 8.205665461636505, "learning_rate": 1.5703144387927499e-06, "loss": 1.1525, "step": 2417 }, { "epoch": 0.327797736053684, "grad_norm": 5.215533252423965, "learning_rate": 1.5699536643068113e-06, "loss": 1.1807, "step": 2418 }, { "epoch": 0.3279333017013489, "grad_norm": 5.2305667797330075, "learning_rate": 1.5695927799098071e-06, "loss": 1.1617, "step": 2419 }, { "epoch": 0.32806886734901375, "grad_norm": 9.059844387916911, "learning_rate": 1.5692317856713318e-06, "loss": 1.1659, "step": 2420 }, { "epoch": 0.3282044329966786, "grad_norm": 5.8389725709979965, "learning_rate": 1.5688706816609995e-06, "loss": 1.1648, "step": 2421 }, { "epoch": 0.32833999864434354, "grad_norm": 15.853891508663306, "learning_rate": 1.5685094679484472e-06, "loss": 1.1568, "step": 2422 }, { "epoch": 0.3284755642920084, "grad_norm": 5.411393016895916, "learning_rate": 1.5681481446033312e-06, "loss": 1.169, "step": 2423 }, { "epoch": 0.3286111299396733, "grad_norm": 14.01739477723575, "learning_rate": 1.56778671169533e-06, "loss": 1.2007, "step": 2424 }, { "epoch": 0.32874669558733816, "grad_norm": 6.836142093887308, "learning_rate": 1.5674251692941436e-06, "loss": 1.1629, "step": 2425 }, { "epoch": 0.328882261235003, "grad_norm": 7.857643787995369, "learning_rate": 1.5670635174694923e-06, "loss": 1.1741, "step": 2426 }, { "epoch": 0.32901782688266795, "grad_norm": 5.846061224763185, "learning_rate": 1.5667017562911176e-06, "loss": 1.1693, "step": 2427 }, { "epoch": 0.3291533925303328, "grad_norm": 8.27051262864684, "learning_rate": 1.5663398858287824e-06, "loss": 1.1705, "step": 2428 }, { "epoch": 0.3292889581779977, "grad_norm": 5.993360100418009, "learning_rate": 1.565977906152271e-06, "loss": 1.107, "step": 2429 }, { "epoch": 0.32942452382566256, "grad_norm": 7.854609163144327, "learning_rate": 1.5656158173313876e-06, "loss": 1.1339, "step": 2430 }, { "epoch": 0.32956008947332743, "grad_norm": 4.677962521960756, "learning_rate": 1.5652536194359586e-06, "loss": 1.161, "step": 2431 }, { "epoch": 0.32969565512099236, "grad_norm": 5.140560656490985, "learning_rate": 1.5648913125358312e-06, "loss": 1.1385, "step": 2432 }, { "epoch": 0.32983122076865723, "grad_norm": 8.62221266337209, "learning_rate": 1.564528896700873e-06, "loss": 1.1728, "step": 2433 }, { "epoch": 0.3299667864163221, "grad_norm": 6.123576009711185, "learning_rate": 1.5641663720009732e-06, "loss": 1.138, "step": 2434 }, { "epoch": 0.33010235206398697, "grad_norm": 7.5378164941927555, "learning_rate": 1.5638037385060416e-06, "loss": 1.1807, "step": 2435 }, { "epoch": 0.33023791771165184, "grad_norm": 8.673465186776886, "learning_rate": 1.5634409962860096e-06, "loss": 1.1727, "step": 2436 }, { "epoch": 0.33037348335931677, "grad_norm": 4.708197682874285, "learning_rate": 1.5630781454108291e-06, "loss": 1.1341, "step": 2437 }, { "epoch": 0.33050904900698164, "grad_norm": 4.983724588755842, "learning_rate": 1.5627151859504726e-06, "loss": 1.1693, "step": 2438 }, { "epoch": 0.3306446146546465, "grad_norm": 6.816317319153369, "learning_rate": 1.5623521179749346e-06, "loss": 1.1681, "step": 2439 }, { "epoch": 0.3307801803023114, "grad_norm": 4.4717104732327035, "learning_rate": 1.5619889415542296e-06, "loss": 1.1739, "step": 2440 }, { "epoch": 0.3309157459499763, "grad_norm": 8.552329397753564, "learning_rate": 1.5616256567583932e-06, "loss": 1.1791, "step": 2441 }, { "epoch": 0.3310513115976412, "grad_norm": 5.4985236113916, "learning_rate": 1.561262263657482e-06, "loss": 1.1808, "step": 2442 }, { "epoch": 0.33118687724530604, "grad_norm": 5.074466904142229, "learning_rate": 1.5608987623215736e-06, "loss": 1.1569, "step": 2443 }, { "epoch": 0.3313224428929709, "grad_norm": 5.177665348800453, "learning_rate": 1.5605351528207664e-06, "loss": 1.1671, "step": 2444 }, { "epoch": 0.3314580085406358, "grad_norm": 6.381414413707481, "learning_rate": 1.5601714352251798e-06, "loss": 1.1992, "step": 2445 }, { "epoch": 0.3315935741883007, "grad_norm": 8.94608046219737, "learning_rate": 1.5598076096049533e-06, "loss": 1.1543, "step": 2446 }, { "epoch": 0.3317291398359656, "grad_norm": 9.0501072081046, "learning_rate": 1.5594436760302483e-06, "loss": 1.148, "step": 2447 }, { "epoch": 0.33186470548363045, "grad_norm": 6.633521680169267, "learning_rate": 1.5590796345712465e-06, "loss": 1.1728, "step": 2448 }, { "epoch": 0.3320002711312953, "grad_norm": 6.914494410944193, "learning_rate": 1.55871548529815e-06, "loss": 1.1187, "step": 2449 }, { "epoch": 0.3321358367789602, "grad_norm": 7.602998669329585, "learning_rate": 1.5583512282811826e-06, "loss": 1.152, "step": 2450 }, { "epoch": 0.3322714024266251, "grad_norm": 10.941726890300558, "learning_rate": 1.557986863590588e-06, "loss": 1.1062, "step": 2451 }, { "epoch": 0.33240696807429, "grad_norm": 4.413296124394811, "learning_rate": 1.5576223912966313e-06, "loss": 1.1384, "step": 2452 }, { "epoch": 0.33254253372195486, "grad_norm": 10.352474201473793, "learning_rate": 1.557257811469598e-06, "loss": 1.1284, "step": 2453 }, { "epoch": 0.33267809936961973, "grad_norm": 6.003392360417369, "learning_rate": 1.5568931241797947e-06, "loss": 1.1718, "step": 2454 }, { "epoch": 0.3328136650172846, "grad_norm": 5.221963331759184, "learning_rate": 1.556528329497548e-06, "loss": 1.1373, "step": 2455 }, { "epoch": 0.3329492306649495, "grad_norm": 5.721974355424997, "learning_rate": 1.5561634274932061e-06, "loss": 1.1484, "step": 2456 }, { "epoch": 0.3330847963126144, "grad_norm": 4.989183746301683, "learning_rate": 1.555798418237137e-06, "loss": 1.1615, "step": 2457 }, { "epoch": 0.33322036196027927, "grad_norm": 8.95498953007373, "learning_rate": 1.5554333017997306e-06, "loss": 1.1569, "step": 2458 }, { "epoch": 0.33335592760794414, "grad_norm": 5.79716545887464, "learning_rate": 1.5550680782513962e-06, "loss": 1.1388, "step": 2459 }, { "epoch": 0.333491493255609, "grad_norm": 5.763734609247168, "learning_rate": 1.554702747662564e-06, "loss": 1.1629, "step": 2460 }, { "epoch": 0.33362705890327393, "grad_norm": 5.337667985693958, "learning_rate": 1.5543373101036856e-06, "loss": 1.1396, "step": 2461 }, { "epoch": 0.3337626245509388, "grad_norm": 5.5305119960347255, "learning_rate": 1.5539717656452327e-06, "loss": 1.1457, "step": 2462 }, { "epoch": 0.3338981901986037, "grad_norm": 5.936758865499087, "learning_rate": 1.5536061143576978e-06, "loss": 1.1529, "step": 2463 }, { "epoch": 0.33403375584626854, "grad_norm": 26.39497260355341, "learning_rate": 1.5532403563115932e-06, "loss": 1.1541, "step": 2464 }, { "epoch": 0.3341693214939334, "grad_norm": 7.468529850680234, "learning_rate": 1.5528744915774532e-06, "loss": 1.1471, "step": 2465 }, { "epoch": 0.33430488714159834, "grad_norm": 11.899285507126262, "learning_rate": 1.5525085202258316e-06, "loss": 1.1531, "step": 2466 }, { "epoch": 0.3344404527892632, "grad_norm": 6.225663715140454, "learning_rate": 1.552142442327303e-06, "loss": 1.1468, "step": 2467 }, { "epoch": 0.3345760184369281, "grad_norm": 5.20743162070876, "learning_rate": 1.5517762579524628e-06, "loss": 1.1147, "step": 2468 }, { "epoch": 0.33471158408459295, "grad_norm": 6.343564017006392, "learning_rate": 1.5514099671719267e-06, "loss": 1.1567, "step": 2469 }, { "epoch": 0.3348471497322578, "grad_norm": 6.068443416922055, "learning_rate": 1.551043570056331e-06, "loss": 1.1639, "step": 2470 }, { "epoch": 0.33498271537992275, "grad_norm": 5.385412344358991, "learning_rate": 1.5506770666763324e-06, "loss": 1.1355, "step": 2471 }, { "epoch": 0.3351182810275876, "grad_norm": 6.132177728960581, "learning_rate": 1.5503104571026084e-06, "loss": 1.1715, "step": 2472 }, { "epoch": 0.3352538466752525, "grad_norm": 7.969343863849034, "learning_rate": 1.5499437414058564e-06, "loss": 1.1612, "step": 2473 }, { "epoch": 0.33538941232291736, "grad_norm": 6.282235993466537, "learning_rate": 1.5495769196567955e-06, "loss": 1.1392, "step": 2474 }, { "epoch": 0.3355249779705822, "grad_norm": 7.874043619516374, "learning_rate": 1.5492099919261632e-06, "loss": 1.1873, "step": 2475 }, { "epoch": 0.33566054361824715, "grad_norm": 5.20056526925084, "learning_rate": 1.5488429582847192e-06, "loss": 1.1732, "step": 2476 }, { "epoch": 0.335796109265912, "grad_norm": 9.822631864284643, "learning_rate": 1.5484758188032433e-06, "loss": 1.1388, "step": 2477 }, { "epoch": 0.3359316749135769, "grad_norm": 5.982214617929224, "learning_rate": 1.5481085735525348e-06, "loss": 1.1726, "step": 2478 }, { "epoch": 0.33606724056124176, "grad_norm": 8.063611122123996, "learning_rate": 1.5477412226034145e-06, "loss": 1.1429, "step": 2479 }, { "epoch": 0.3362028062089067, "grad_norm": 7.964100444433511, "learning_rate": 1.547373766026723e-06, "loss": 1.1948, "step": 2480 }, { "epoch": 0.33633837185657156, "grad_norm": 6.336868311626661, "learning_rate": 1.5470062038933213e-06, "loss": 1.1465, "step": 2481 }, { "epoch": 0.33647393750423643, "grad_norm": 28.193150638007104, "learning_rate": 1.5466385362740911e-06, "loss": 1.1641, "step": 2482 }, { "epoch": 0.3366095031519013, "grad_norm": 6.546064246991433, "learning_rate": 1.5462707632399342e-06, "loss": 1.1138, "step": 2483 }, { "epoch": 0.33674506879956617, "grad_norm": 5.822209110716378, "learning_rate": 1.5459028848617726e-06, "loss": 1.1432, "step": 2484 }, { "epoch": 0.3368806344472311, "grad_norm": 8.320227189813652, "learning_rate": 1.5455349012105486e-06, "loss": 1.1831, "step": 2485 }, { "epoch": 0.33701620009489597, "grad_norm": 6.35406620349748, "learning_rate": 1.545166812357225e-06, "loss": 1.123, "step": 2486 }, { "epoch": 0.33715176574256084, "grad_norm": 19.478175979343536, "learning_rate": 1.5447986183727852e-06, "loss": 1.1533, "step": 2487 }, { "epoch": 0.3372873313902257, "grad_norm": 5.432474906978516, "learning_rate": 1.5444303193282324e-06, "loss": 1.1747, "step": 2488 }, { "epoch": 0.3374228970378906, "grad_norm": 5.416494923822422, "learning_rate": 1.5440619152945896e-06, "loss": 1.094, "step": 2489 }, { "epoch": 0.3375584626855555, "grad_norm": 8.264491710038335, "learning_rate": 1.5436934063429013e-06, "loss": 1.1861, "step": 2490 }, { "epoch": 0.3376940283332204, "grad_norm": 4.019478415249354, "learning_rate": 1.5433247925442308e-06, "loss": 1.1405, "step": 2491 }, { "epoch": 0.33782959398088525, "grad_norm": 6.884439679248026, "learning_rate": 1.542956073969663e-06, "loss": 1.1209, "step": 2492 }, { "epoch": 0.3379651596285501, "grad_norm": 9.342771269821675, "learning_rate": 1.5425872506903024e-06, "loss": 1.1149, "step": 2493 }, { "epoch": 0.338100725276215, "grad_norm": 6.997618663787411, "learning_rate": 1.542218322777273e-06, "loss": 1.146, "step": 2494 }, { "epoch": 0.3382362909238799, "grad_norm": 6.260017428145925, "learning_rate": 1.5418492903017204e-06, "loss": 1.1576, "step": 2495 }, { "epoch": 0.3383718565715448, "grad_norm": 7.441539221154666, "learning_rate": 1.5414801533348091e-06, "loss": 1.1819, "step": 2496 }, { "epoch": 0.33850742221920965, "grad_norm": 6.783202371785992, "learning_rate": 1.5411109119477247e-06, "loss": 1.1795, "step": 2497 }, { "epoch": 0.3386429878668745, "grad_norm": 7.024325112524923, "learning_rate": 1.5407415662116718e-06, "loss": 1.1928, "step": 2498 }, { "epoch": 0.3387785535145394, "grad_norm": 7.235294609413169, "learning_rate": 1.5403721161978764e-06, "loss": 1.1287, "step": 2499 }, { "epoch": 0.3389141191622043, "grad_norm": 6.1623426493827145, "learning_rate": 1.5400025619775838e-06, "loss": 1.1056, "step": 2500 }, { "epoch": 0.3390496848098692, "grad_norm": 5.7855563197582525, "learning_rate": 1.5396329036220598e-06, "loss": 1.157, "step": 2501 }, { "epoch": 0.33918525045753406, "grad_norm": 4.4905427004683665, "learning_rate": 1.5392631412025898e-06, "loss": 1.1588, "step": 2502 }, { "epoch": 0.33932081610519893, "grad_norm": 5.245920186441609, "learning_rate": 1.5388932747904797e-06, "loss": 1.148, "step": 2503 }, { "epoch": 0.3394563817528638, "grad_norm": 9.910442155505566, "learning_rate": 1.5385233044570554e-06, "loss": 1.1514, "step": 2504 }, { "epoch": 0.3395919474005287, "grad_norm": 8.345355641835422, "learning_rate": 1.5381532302736627e-06, "loss": 1.1667, "step": 2505 }, { "epoch": 0.3397275130481936, "grad_norm": 7.422851316843111, "learning_rate": 1.5377830523116675e-06, "loss": 1.1627, "step": 2506 }, { "epoch": 0.33986307869585847, "grad_norm": 7.8548948008449315, "learning_rate": 1.5374127706424553e-06, "loss": 1.1328, "step": 2507 }, { "epoch": 0.33999864434352334, "grad_norm": 5.7510142577958225, "learning_rate": 1.5370423853374325e-06, "loss": 1.1525, "step": 2508 }, { "epoch": 0.3401342099911882, "grad_norm": 3.9766176922219993, "learning_rate": 1.5366718964680253e-06, "loss": 1.1254, "step": 2509 }, { "epoch": 0.34026977563885313, "grad_norm": 7.384147987551099, "learning_rate": 1.5363013041056787e-06, "loss": 1.1714, "step": 2510 }, { "epoch": 0.340405341286518, "grad_norm": 6.807717678648995, "learning_rate": 1.5359306083218588e-06, "loss": 1.1329, "step": 2511 }, { "epoch": 0.3405409069341829, "grad_norm": 14.008057796991215, "learning_rate": 1.5355598091880517e-06, "loss": 1.1244, "step": 2512 }, { "epoch": 0.34067647258184774, "grad_norm": 16.8330583316958, "learning_rate": 1.5351889067757627e-06, "loss": 1.1817, "step": 2513 }, { "epoch": 0.3408120382295126, "grad_norm": 10.317065840421519, "learning_rate": 1.5348179011565176e-06, "loss": 1.1953, "step": 2514 }, { "epoch": 0.34094760387717754, "grad_norm": 6.494631013671713, "learning_rate": 1.5344467924018619e-06, "loss": 1.1039, "step": 2515 }, { "epoch": 0.3410831695248424, "grad_norm": 7.23737778612119, "learning_rate": 1.534075580583361e-06, "loss": 1.095, "step": 2516 }, { "epoch": 0.3412187351725073, "grad_norm": 44.98238578608417, "learning_rate": 1.5337042657726e-06, "loss": 1.1565, "step": 2517 }, { "epoch": 0.34135430082017215, "grad_norm": 7.57644237270896, "learning_rate": 1.5333328480411842e-06, "loss": 1.1207, "step": 2518 }, { "epoch": 0.3414898664678371, "grad_norm": 5.391276093260331, "learning_rate": 1.5329613274607387e-06, "loss": 1.1757, "step": 2519 }, { "epoch": 0.34162543211550195, "grad_norm": 6.11482209545247, "learning_rate": 1.5325897041029078e-06, "loss": 1.1869, "step": 2520 }, { "epoch": 0.3417609977631668, "grad_norm": 14.068797649372247, "learning_rate": 1.5322179780393567e-06, "loss": 1.1249, "step": 2521 }, { "epoch": 0.3418965634108317, "grad_norm": 5.644265968404223, "learning_rate": 1.5318461493417694e-06, "loss": 1.1636, "step": 2522 }, { "epoch": 0.34203212905849656, "grad_norm": 14.779688488760092, "learning_rate": 1.5314742180818504e-06, "loss": 1.1853, "step": 2523 }, { "epoch": 0.3421676947061615, "grad_norm": 5.7709576604315505, "learning_rate": 1.5311021843313238e-06, "loss": 1.1391, "step": 2524 }, { "epoch": 0.34230326035382636, "grad_norm": 6.554485514567659, "learning_rate": 1.5307300481619332e-06, "loss": 1.1973, "step": 2525 }, { "epoch": 0.3424388260014912, "grad_norm": 5.4095622933507155, "learning_rate": 1.5303578096454422e-06, "loss": 1.1308, "step": 2526 }, { "epoch": 0.3425743916491561, "grad_norm": 8.763712006522894, "learning_rate": 1.5299854688536339e-06, "loss": 1.1348, "step": 2527 }, { "epoch": 0.34270995729682097, "grad_norm": 4.572006204639988, "learning_rate": 1.5296130258583113e-06, "loss": 1.161, "step": 2528 }, { "epoch": 0.3428455229444859, "grad_norm": 16.798929196365787, "learning_rate": 1.5292404807312971e-06, "loss": 1.1375, "step": 2529 }, { "epoch": 0.34298108859215076, "grad_norm": 6.255212971920833, "learning_rate": 1.5288678335444342e-06, "loss": 1.1417, "step": 2530 }, { "epoch": 0.34311665423981563, "grad_norm": 11.762284073246384, "learning_rate": 1.5284950843695838e-06, "loss": 1.1057, "step": 2531 }, { "epoch": 0.3432522198874805, "grad_norm": 10.521476015833533, "learning_rate": 1.5281222332786282e-06, "loss": 1.1583, "step": 2532 }, { "epoch": 0.3433877855351454, "grad_norm": 4.8629756739787515, "learning_rate": 1.527749280343469e-06, "loss": 1.1425, "step": 2533 }, { "epoch": 0.3435233511828103, "grad_norm": 6.900219765987413, "learning_rate": 1.527376225636026e-06, "loss": 1.1587, "step": 2534 }, { "epoch": 0.34365891683047517, "grad_norm": 6.583950147919382, "learning_rate": 1.5270030692282415e-06, "loss": 1.1472, "step": 2535 }, { "epoch": 0.34379448247814004, "grad_norm": 7.718044499946557, "learning_rate": 1.526629811192075e-06, "loss": 1.115, "step": 2536 }, { "epoch": 0.3439300481258049, "grad_norm": 5.597241375293888, "learning_rate": 1.5262564515995062e-06, "loss": 1.1672, "step": 2537 }, { "epoch": 0.3440656137734698, "grad_norm": 6.615094692312669, "learning_rate": 1.5258829905225348e-06, "loss": 1.1265, "step": 2538 }, { "epoch": 0.3442011794211347, "grad_norm": 9.343546556308656, "learning_rate": 1.5255094280331795e-06, "loss": 1.1446, "step": 2539 }, { "epoch": 0.3443367450687996, "grad_norm": 6.7823686275816195, "learning_rate": 1.5251357642034793e-06, "loss": 1.1347, "step": 2540 }, { "epoch": 0.34447231071646445, "grad_norm": 5.6498237974715835, "learning_rate": 1.524761999105492e-06, "loss": 1.123, "step": 2541 }, { "epoch": 0.3446078763641293, "grad_norm": 9.455452178516603, "learning_rate": 1.5243881328112953e-06, "loss": 1.1554, "step": 2542 }, { "epoch": 0.3447434420117942, "grad_norm": 6.24232451351103, "learning_rate": 1.5240141653929868e-06, "loss": 1.1664, "step": 2543 }, { "epoch": 0.3448790076594591, "grad_norm": 8.204739509220285, "learning_rate": 1.5236400969226828e-06, "loss": 1.1426, "step": 2544 }, { "epoch": 0.345014573307124, "grad_norm": 6.429253648730253, "learning_rate": 1.5232659274725195e-06, "loss": 1.1397, "step": 2545 }, { "epoch": 0.34515013895478885, "grad_norm": 7.6712115755929995, "learning_rate": 1.5228916571146522e-06, "loss": 1.1453, "step": 2546 }, { "epoch": 0.3452857046024537, "grad_norm": 7.172807909782149, "learning_rate": 1.5225172859212565e-06, "loss": 1.1605, "step": 2547 }, { "epoch": 0.3454212702501186, "grad_norm": 9.183343023949796, "learning_rate": 1.5221428139645266e-06, "loss": 1.1133, "step": 2548 }, { "epoch": 0.3455568358977835, "grad_norm": 18.881723882663156, "learning_rate": 1.5217682413166767e-06, "loss": 1.1398, "step": 2549 }, { "epoch": 0.3456924015454484, "grad_norm": 10.883287667291828, "learning_rate": 1.5213935680499397e-06, "loss": 1.1143, "step": 2550 }, { "epoch": 0.34582796719311326, "grad_norm": 4.022251269219868, "learning_rate": 1.521018794236569e-06, "loss": 1.1327, "step": 2551 }, { "epoch": 0.34596353284077813, "grad_norm": 6.07654011675003, "learning_rate": 1.5206439199488366e-06, "loss": 1.1582, "step": 2552 }, { "epoch": 0.346099098488443, "grad_norm": 9.693126483896975, "learning_rate": 1.5202689452590339e-06, "loss": 1.1497, "step": 2553 }, { "epoch": 0.34623466413610793, "grad_norm": 6.315122301755214, "learning_rate": 1.5198938702394717e-06, "loss": 1.112, "step": 2554 }, { "epoch": 0.3463702297837728, "grad_norm": 6.803410144407285, "learning_rate": 1.5195186949624804e-06, "loss": 1.1298, "step": 2555 }, { "epoch": 0.34650579543143767, "grad_norm": 6.552568518506039, "learning_rate": 1.5191434195004098e-06, "loss": 1.1064, "step": 2556 }, { "epoch": 0.34664136107910254, "grad_norm": 11.428416904630254, "learning_rate": 1.5187680439256285e-06, "loss": 1.1178, "step": 2557 }, { "epoch": 0.34677692672676746, "grad_norm": 5.694495450034219, "learning_rate": 1.5183925683105251e-06, "loss": 1.1444, "step": 2558 }, { "epoch": 0.34691249237443234, "grad_norm": 6.439490068133435, "learning_rate": 1.5180169927275066e-06, "loss": 1.1304, "step": 2559 }, { "epoch": 0.3470480580220972, "grad_norm": 7.711850078060415, "learning_rate": 1.517641317249e-06, "loss": 1.1997, "step": 2560 }, { "epoch": 0.3471836236697621, "grad_norm": 8.322884109884196, "learning_rate": 1.5172655419474514e-06, "loss": 1.1559, "step": 2561 }, { "epoch": 0.34731918931742695, "grad_norm": 6.128705124874595, "learning_rate": 1.5168896668953261e-06, "loss": 1.1254, "step": 2562 }, { "epoch": 0.34745475496509187, "grad_norm": 8.109155346449981, "learning_rate": 1.5165136921651084e-06, "loss": 1.1194, "step": 2563 }, { "epoch": 0.34759032061275674, "grad_norm": 5.4243299828415505, "learning_rate": 1.5161376178293028e-06, "loss": 1.1493, "step": 2564 }, { "epoch": 0.3477258862604216, "grad_norm": 13.45867132170564, "learning_rate": 1.5157614439604313e-06, "loss": 1.1368, "step": 2565 }, { "epoch": 0.3478614519080865, "grad_norm": 15.454638436901087, "learning_rate": 1.5153851706310367e-06, "loss": 1.174, "step": 2566 }, { "epoch": 0.34799701755575135, "grad_norm": 10.339447903106391, "learning_rate": 1.51500879791368e-06, "loss": 1.1428, "step": 2567 }, { "epoch": 0.3481325832034163, "grad_norm": 6.584645352710464, "learning_rate": 1.5146323258809423e-06, "loss": 1.1436, "step": 2568 }, { "epoch": 0.34826814885108115, "grad_norm": 7.624270732057963, "learning_rate": 1.5142557546054224e-06, "loss": 1.1832, "step": 2569 }, { "epoch": 0.348403714498746, "grad_norm": 10.608661062511942, "learning_rate": 1.5138790841597398e-06, "loss": 1.1607, "step": 2570 }, { "epoch": 0.3485392801464109, "grad_norm": 11.449905154380078, "learning_rate": 1.5135023146165317e-06, "loss": 1.1934, "step": 2571 }, { "epoch": 0.34867484579407576, "grad_norm": 6.64391577690378, "learning_rate": 1.513125446048456e-06, "loss": 1.1829, "step": 2572 }, { "epoch": 0.3488104114417407, "grad_norm": 32.80475339174894, "learning_rate": 1.5127484785281884e-06, "loss": 1.1349, "step": 2573 }, { "epoch": 0.34894597708940556, "grad_norm": 7.461383373679582, "learning_rate": 1.5123714121284237e-06, "loss": 1.1497, "step": 2574 }, { "epoch": 0.3490815427370704, "grad_norm": 6.447865535442002, "learning_rate": 1.5119942469218768e-06, "loss": 1.1612, "step": 2575 }, { "epoch": 0.3492171083847353, "grad_norm": 16.29113283554209, "learning_rate": 1.5116169829812807e-06, "loss": 1.18, "step": 2576 }, { "epoch": 0.34935267403240017, "grad_norm": 4.5885711557663456, "learning_rate": 1.511239620379388e-06, "loss": 1.1593, "step": 2577 }, { "epoch": 0.3494882396800651, "grad_norm": 8.156932830250899, "learning_rate": 1.51086215918897e-06, "loss": 1.1668, "step": 2578 }, { "epoch": 0.34962380532772996, "grad_norm": 6.204719299282453, "learning_rate": 1.510484599482817e-06, "loss": 1.1163, "step": 2579 }, { "epoch": 0.34975937097539483, "grad_norm": 6.508305075932119, "learning_rate": 1.5101069413337386e-06, "loss": 1.1315, "step": 2580 }, { "epoch": 0.3498949366230597, "grad_norm": 15.769532384132825, "learning_rate": 1.5097291848145631e-06, "loss": 1.1425, "step": 2581 }, { "epoch": 0.3500305022707246, "grad_norm": 13.42589533229843, "learning_rate": 1.5093513299981378e-06, "loss": 1.1484, "step": 2582 }, { "epoch": 0.3501660679183895, "grad_norm": 6.309510741282941, "learning_rate": 1.5089733769573292e-06, "loss": 1.1746, "step": 2583 }, { "epoch": 0.35030163356605437, "grad_norm": 6.104140345344509, "learning_rate": 1.5085953257650223e-06, "loss": 1.1356, "step": 2584 }, { "epoch": 0.35043719921371924, "grad_norm": 4.504109812433356, "learning_rate": 1.5082171764941216e-06, "loss": 1.1653, "step": 2585 }, { "epoch": 0.3505727648613841, "grad_norm": 28.404630968609908, "learning_rate": 1.5078389292175499e-06, "loss": 1.1555, "step": 2586 }, { "epoch": 0.350708330509049, "grad_norm": 9.361465338880464, "learning_rate": 1.5074605840082494e-06, "loss": 1.1347, "step": 2587 }, { "epoch": 0.3508438961567139, "grad_norm": 9.0534159646238, "learning_rate": 1.5070821409391812e-06, "loss": 1.1762, "step": 2588 }, { "epoch": 0.3509794618043788, "grad_norm": 5.70173790091992, "learning_rate": 1.5067036000833242e-06, "loss": 1.1522, "step": 2589 }, { "epoch": 0.35111502745204365, "grad_norm": 14.350382175809605, "learning_rate": 1.5063249615136782e-06, "loss": 1.126, "step": 2590 }, { "epoch": 0.3512505930997085, "grad_norm": 23.321842887352037, "learning_rate": 1.5059462253032595e-06, "loss": 1.1714, "step": 2591 }, { "epoch": 0.3513861587473734, "grad_norm": 7.906256537619518, "learning_rate": 1.5055673915251052e-06, "loss": 1.1136, "step": 2592 }, { "epoch": 0.3515217243950383, "grad_norm": 5.168585928922041, "learning_rate": 1.5051884602522702e-06, "loss": 1.1131, "step": 2593 }, { "epoch": 0.3516572900427032, "grad_norm": 6.405371258379515, "learning_rate": 1.5048094315578284e-06, "loss": 1.1388, "step": 2594 }, { "epoch": 0.35179285569036806, "grad_norm": 4.897493088527305, "learning_rate": 1.5044303055148722e-06, "loss": 1.1212, "step": 2595 }, { "epoch": 0.3519284213380329, "grad_norm": 5.931059313296869, "learning_rate": 1.5040510821965135e-06, "loss": 1.1561, "step": 2596 }, { "epoch": 0.3520639869856978, "grad_norm": 6.240598130263159, "learning_rate": 1.5036717616758824e-06, "loss": 1.1161, "step": 2597 }, { "epoch": 0.3521995526333627, "grad_norm": 9.977747036496707, "learning_rate": 1.5032923440261276e-06, "loss": 1.1427, "step": 2598 }, { "epoch": 0.3523351182810276, "grad_norm": 6.380013737320545, "learning_rate": 1.5029128293204174e-06, "loss": 1.1608, "step": 2599 }, { "epoch": 0.35247068392869246, "grad_norm": 7.263625426293791, "learning_rate": 1.5025332176319373e-06, "loss": 1.1291, "step": 2600 }, { "epoch": 0.35260624957635733, "grad_norm": 9.068931137732172, "learning_rate": 1.5021535090338932e-06, "loss": 1.1167, "step": 2601 }, { "epoch": 0.35274181522402226, "grad_norm": 8.290762064103246, "learning_rate": 1.5017737035995087e-06, "loss": 1.1475, "step": 2602 }, { "epoch": 0.35287738087168713, "grad_norm": 5.038331335647966, "learning_rate": 1.5013938014020262e-06, "loss": 1.125, "step": 2603 }, { "epoch": 0.353012946519352, "grad_norm": 5.943573228258548, "learning_rate": 1.501013802514707e-06, "loss": 1.1332, "step": 2604 }, { "epoch": 0.35314851216701687, "grad_norm": 9.513068030072155, "learning_rate": 1.5006337070108304e-06, "loss": 1.2012, "step": 2605 }, { "epoch": 0.35328407781468174, "grad_norm": 6.517258261671622, "learning_rate": 1.5002535149636952e-06, "loss": 1.1356, "step": 2606 }, { "epoch": 0.35341964346234667, "grad_norm": 6.697193803822637, "learning_rate": 1.4998732264466186e-06, "loss": 1.0938, "step": 2607 }, { "epoch": 0.35355520911001154, "grad_norm": 4.836323875477952, "learning_rate": 1.499492841532936e-06, "loss": 1.1511, "step": 2608 }, { "epoch": 0.3536907747576764, "grad_norm": 6.104752835333138, "learning_rate": 1.4991123602960017e-06, "loss": 1.1626, "step": 2609 }, { "epoch": 0.3538263404053413, "grad_norm": 4.759350839368554, "learning_rate": 1.4987317828091882e-06, "loss": 1.1596, "step": 2610 }, { "epoch": 0.35396190605300615, "grad_norm": 6.517107962382451, "learning_rate": 1.4983511091458874e-06, "loss": 1.1461, "step": 2611 }, { "epoch": 0.3540974717006711, "grad_norm": 5.19383476718817, "learning_rate": 1.4979703393795086e-06, "loss": 1.1616, "step": 2612 }, { "epoch": 0.35423303734833594, "grad_norm": 9.00366385212223, "learning_rate": 1.4975894735834809e-06, "loss": 1.0715, "step": 2613 }, { "epoch": 0.3543686029960008, "grad_norm": 7.097519935924112, "learning_rate": 1.4972085118312511e-06, "loss": 1.1237, "step": 2614 }, { "epoch": 0.3545041686436657, "grad_norm": 6.148572286205537, "learning_rate": 1.4968274541962845e-06, "loss": 1.1475, "step": 2615 }, { "epoch": 0.35463973429133056, "grad_norm": 17.107740515134903, "learning_rate": 1.4964463007520647e-06, "loss": 1.1477, "step": 2616 }, { "epoch": 0.3547752999389955, "grad_norm": 6.896037171319214, "learning_rate": 1.4960650515720947e-06, "loss": 1.1276, "step": 2617 }, { "epoch": 0.35491086558666035, "grad_norm": 5.34178847527941, "learning_rate": 1.4956837067298954e-06, "loss": 1.1636, "step": 2618 }, { "epoch": 0.3550464312343252, "grad_norm": 5.1898262876167225, "learning_rate": 1.4953022662990057e-06, "loss": 1.1243, "step": 2619 }, { "epoch": 0.3551819968819901, "grad_norm": 8.115610823895064, "learning_rate": 1.4949207303529835e-06, "loss": 1.1538, "step": 2620 }, { "epoch": 0.35531756252965496, "grad_norm": 6.148268783841293, "learning_rate": 1.4945390989654054e-06, "loss": 1.1591, "step": 2621 }, { "epoch": 0.3554531281773199, "grad_norm": 5.893061369438809, "learning_rate": 1.4941573722098655e-06, "loss": 1.1608, "step": 2622 }, { "epoch": 0.35558869382498476, "grad_norm": 4.757003208434511, "learning_rate": 1.4937755501599772e-06, "loss": 1.1308, "step": 2623 }, { "epoch": 0.35572425947264963, "grad_norm": 7.587870525420808, "learning_rate": 1.4933936328893714e-06, "loss": 1.1342, "step": 2624 }, { "epoch": 0.3558598251203145, "grad_norm": 5.653230489050219, "learning_rate": 1.4930116204716984e-06, "loss": 1.1736, "step": 2625 }, { "epoch": 0.35599539076797937, "grad_norm": 5.151465661349339, "learning_rate": 1.492629512980626e-06, "loss": 1.1335, "step": 2626 }, { "epoch": 0.3561309564156443, "grad_norm": 5.896839996262761, "learning_rate": 1.4922473104898404e-06, "loss": 1.1469, "step": 2627 }, { "epoch": 0.35626652206330917, "grad_norm": 6.803432605861944, "learning_rate": 1.4918650130730467e-06, "loss": 1.1414, "step": 2628 }, { "epoch": 0.35640208771097404, "grad_norm": 7.6731529692066225, "learning_rate": 1.491482620803968e-06, "loss": 1.1457, "step": 2629 }, { "epoch": 0.3565376533586389, "grad_norm": 5.470518449329218, "learning_rate": 1.491100133756345e-06, "loss": 1.1442, "step": 2630 }, { "epoch": 0.3566732190063038, "grad_norm": 6.066717934203194, "learning_rate": 1.490717552003938e-06, "loss": 1.1636, "step": 2631 }, { "epoch": 0.3568087846539687, "grad_norm": 5.434755130127665, "learning_rate": 1.4903348756205242e-06, "loss": 1.1541, "step": 2632 }, { "epoch": 0.3569443503016336, "grad_norm": 4.6873102240282165, "learning_rate": 1.4899521046799005e-06, "loss": 1.1695, "step": 2633 }, { "epoch": 0.35707991594929844, "grad_norm": 4.830557251666402, "learning_rate": 1.4895692392558806e-06, "loss": 1.1494, "step": 2634 }, { "epoch": 0.3572154815969633, "grad_norm": 9.804538157859422, "learning_rate": 1.4891862794222976e-06, "loss": 1.142, "step": 2635 }, { "epoch": 0.3573510472446282, "grad_norm": 5.387438127265189, "learning_rate": 1.4888032252530017e-06, "loss": 1.1625, "step": 2636 }, { "epoch": 0.3574866128922931, "grad_norm": 7.631416386029343, "learning_rate": 1.4884200768218625e-06, "loss": 1.1711, "step": 2637 }, { "epoch": 0.357622178539958, "grad_norm": 6.766640796248589, "learning_rate": 1.4880368342027665e-06, "loss": 1.148, "step": 2638 }, { "epoch": 0.35775774418762285, "grad_norm": 5.188984832497427, "learning_rate": 1.4876534974696196e-06, "loss": 1.157, "step": 2639 }, { "epoch": 0.3578933098352877, "grad_norm": 5.686578559589804, "learning_rate": 1.487270066696345e-06, "loss": 1.1084, "step": 2640 }, { "epoch": 0.35802887548295265, "grad_norm": 5.978240405650865, "learning_rate": 1.4868865419568841e-06, "loss": 1.1044, "step": 2641 }, { "epoch": 0.3581644411306175, "grad_norm": 8.68807054642942, "learning_rate": 1.4865029233251971e-06, "loss": 1.1377, "step": 2642 }, { "epoch": 0.3583000067782824, "grad_norm": 4.348683253558772, "learning_rate": 1.4861192108752617e-06, "loss": 1.1711, "step": 2643 }, { "epoch": 0.35843557242594726, "grad_norm": 4.731809343556683, "learning_rate": 1.485735404681073e-06, "loss": 1.143, "step": 2644 }, { "epoch": 0.35857113807361213, "grad_norm": 7.00962507405611, "learning_rate": 1.4853515048166463e-06, "loss": 1.1766, "step": 2645 }, { "epoch": 0.35870670372127705, "grad_norm": 6.689904457679424, "learning_rate": 1.4849675113560128e-06, "loss": 1.1076, "step": 2646 }, { "epoch": 0.3588422693689419, "grad_norm": 5.555982717472699, "learning_rate": 1.4845834243732228e-06, "loss": 1.1444, "step": 2647 }, { "epoch": 0.3589778350166068, "grad_norm": 7.740766358525576, "learning_rate": 1.4841992439423445e-06, "loss": 1.1212, "step": 2648 }, { "epoch": 0.35911340066427166, "grad_norm": 6.412526004380024, "learning_rate": 1.483814970137464e-06, "loss": 1.1096, "step": 2649 }, { "epoch": 0.35924896631193654, "grad_norm": 5.213940236205315, "learning_rate": 1.4834306030326855e-06, "loss": 1.1309, "step": 2650 }, { "epoch": 0.35938453195960146, "grad_norm": 4.7989196906953335, "learning_rate": 1.4830461427021311e-06, "loss": 1.1676, "step": 2651 }, { "epoch": 0.35952009760726633, "grad_norm": 5.0543345561395965, "learning_rate": 1.4826615892199415e-06, "loss": 1.1468, "step": 2652 }, { "epoch": 0.3596556632549312, "grad_norm": 5.176238752510122, "learning_rate": 1.482276942660274e-06, "loss": 1.0986, "step": 2653 }, { "epoch": 0.35979122890259607, "grad_norm": 5.198715605560463, "learning_rate": 1.481892203097305e-06, "loss": 1.1259, "step": 2654 }, { "epoch": 0.35992679455026094, "grad_norm": 4.257057967498309, "learning_rate": 1.481507370605228e-06, "loss": 1.1525, "step": 2655 }, { "epoch": 0.36006236019792587, "grad_norm": 4.773329711289018, "learning_rate": 1.481122445258256e-06, "loss": 1.1602, "step": 2656 }, { "epoch": 0.36019792584559074, "grad_norm": 5.181194896290771, "learning_rate": 1.4807374271306182e-06, "loss": 1.1449, "step": 2657 }, { "epoch": 0.3603334914932556, "grad_norm": 6.0628464743017885, "learning_rate": 1.4803523162965618e-06, "loss": 1.1142, "step": 2658 }, { "epoch": 0.3604690571409205, "grad_norm": 11.831697301581029, "learning_rate": 1.4799671128303533e-06, "loss": 1.1229, "step": 2659 }, { "epoch": 0.36060462278858535, "grad_norm": 6.514458862372969, "learning_rate": 1.4795818168062755e-06, "loss": 1.1328, "step": 2660 }, { "epoch": 0.3607401884362503, "grad_norm": 7.535839183210265, "learning_rate": 1.47919642829863e-06, "loss": 1.1638, "step": 2661 }, { "epoch": 0.36087575408391515, "grad_norm": 5.37787508761504, "learning_rate": 1.4788109473817359e-06, "loss": 1.1594, "step": 2662 }, { "epoch": 0.36101131973158, "grad_norm": 5.755154833926278, "learning_rate": 1.4784253741299298e-06, "loss": 1.1515, "step": 2663 }, { "epoch": 0.3611468853792449, "grad_norm": 5.565453100995126, "learning_rate": 1.4780397086175672e-06, "loss": 1.1138, "step": 2664 }, { "epoch": 0.36128245102690976, "grad_norm": 6.353337593404619, "learning_rate": 1.4776539509190198e-06, "loss": 1.1485, "step": 2665 }, { "epoch": 0.3614180166745747, "grad_norm": 5.136324442477909, "learning_rate": 1.4772681011086788e-06, "loss": 1.1201, "step": 2666 }, { "epoch": 0.36155358232223955, "grad_norm": 5.971672476871734, "learning_rate": 1.4768821592609513e-06, "loss": 1.127, "step": 2667 }, { "epoch": 0.3616891479699044, "grad_norm": 5.713199135712239, "learning_rate": 1.4764961254502639e-06, "loss": 1.1785, "step": 2668 }, { "epoch": 0.3618247136175693, "grad_norm": 6.550602765233083, "learning_rate": 1.47610999975106e-06, "loss": 1.138, "step": 2669 }, { "epoch": 0.36196027926523416, "grad_norm": 8.027001622736915, "learning_rate": 1.4757237822378009e-06, "loss": 1.1335, "step": 2670 }, { "epoch": 0.3620958449128991, "grad_norm": 6.065908389640879, "learning_rate": 1.4753374729849656e-06, "loss": 1.1652, "step": 2671 }, { "epoch": 0.36223141056056396, "grad_norm": 5.999211740476885, "learning_rate": 1.4749510720670503e-06, "loss": 1.1079, "step": 2672 }, { "epoch": 0.36236697620822883, "grad_norm": 4.066934968620464, "learning_rate": 1.47456457955857e-06, "loss": 1.1215, "step": 2673 }, { "epoch": 0.3625025418558937, "grad_norm": 8.233673328538318, "learning_rate": 1.4741779955340565e-06, "loss": 1.1062, "step": 2674 }, { "epoch": 0.36263810750355857, "grad_norm": 14.558337086240593, "learning_rate": 1.4737913200680596e-06, "loss": 1.1213, "step": 2675 }, { "epoch": 0.3627736731512235, "grad_norm": 5.521476700642908, "learning_rate": 1.4734045532351463e-06, "loss": 1.1936, "step": 2676 }, { "epoch": 0.36290923879888837, "grad_norm": 5.698259179545498, "learning_rate": 1.473017695109902e-06, "loss": 1.0991, "step": 2677 }, { "epoch": 0.36304480444655324, "grad_norm": 4.962884720309228, "learning_rate": 1.472630745766929e-06, "loss": 1.1779, "step": 2678 }, { "epoch": 0.3631803700942181, "grad_norm": 5.2369736842773555, "learning_rate": 1.4722437052808472e-06, "loss": 1.1576, "step": 2679 }, { "epoch": 0.36331593574188303, "grad_norm": 11.902661182540019, "learning_rate": 1.4718565737262945e-06, "loss": 1.131, "step": 2680 }, { "epoch": 0.3634515013895479, "grad_norm": 11.227720571257967, "learning_rate": 1.4714693511779262e-06, "loss": 1.1333, "step": 2681 }, { "epoch": 0.3635870670372128, "grad_norm": 5.65112106587552, "learning_rate": 1.471082037710415e-06, "loss": 1.1557, "step": 2682 }, { "epoch": 0.36372263268487764, "grad_norm": 11.766633420629466, "learning_rate": 1.4706946333984514e-06, "loss": 1.1448, "step": 2683 }, { "epoch": 0.3638581983325425, "grad_norm": 6.8844166841664, "learning_rate": 1.4703071383167433e-06, "loss": 1.1481, "step": 2684 }, { "epoch": 0.36399376398020744, "grad_norm": 7.498244071411334, "learning_rate": 1.4699195525400158e-06, "loss": 1.1584, "step": 2685 }, { "epoch": 0.3641293296278723, "grad_norm": 5.8884863015922, "learning_rate": 1.469531876143012e-06, "loss": 1.1703, "step": 2686 }, { "epoch": 0.3642648952755372, "grad_norm": 6.644174675639749, "learning_rate": 1.4691441092004921e-06, "loss": 1.125, "step": 2687 }, { "epoch": 0.36440046092320205, "grad_norm": 5.836344884066499, "learning_rate": 1.4687562517872342e-06, "loss": 1.1509, "step": 2688 }, { "epoch": 0.3645360265708669, "grad_norm": 8.389996049604978, "learning_rate": 1.4683683039780328e-06, "loss": 1.1497, "step": 2689 }, { "epoch": 0.36467159221853185, "grad_norm": 15.401912343359228, "learning_rate": 1.4679802658477013e-06, "loss": 1.1518, "step": 2690 }, { "epoch": 0.3648071578661967, "grad_norm": 4.995414305623379, "learning_rate": 1.4675921374710696e-06, "loss": 1.1373, "step": 2691 }, { "epoch": 0.3649427235138616, "grad_norm": 6.126661150738293, "learning_rate": 1.467203918922985e-06, "loss": 1.1426, "step": 2692 }, { "epoch": 0.36507828916152646, "grad_norm": 6.8571431037990465, "learning_rate": 1.4668156102783125e-06, "loss": 1.1077, "step": 2693 }, { "epoch": 0.36521385480919133, "grad_norm": 10.971826325911819, "learning_rate": 1.4664272116119345e-06, "loss": 1.1265, "step": 2694 }, { "epoch": 0.36534942045685626, "grad_norm": 7.969081082020128, "learning_rate": 1.4660387229987504e-06, "loss": 1.1078, "step": 2695 }, { "epoch": 0.3654849861045211, "grad_norm": 7.443180031636914, "learning_rate": 1.4656501445136774e-06, "loss": 1.1664, "step": 2696 }, { "epoch": 0.365620551752186, "grad_norm": 6.438065296490702, "learning_rate": 1.4652614762316495e-06, "loss": 1.1019, "step": 2697 }, { "epoch": 0.36575611739985087, "grad_norm": 5.441503510480351, "learning_rate": 1.4648727182276186e-06, "loss": 1.1768, "step": 2698 }, { "epoch": 0.36589168304751574, "grad_norm": 9.650465769055486, "learning_rate": 1.4644838705765534e-06, "loss": 1.1518, "step": 2699 }, { "epoch": 0.36602724869518066, "grad_norm": 7.58622670310479, "learning_rate": 1.46409493335344e-06, "loss": 1.1236, "step": 2700 }, { "epoch": 0.36616281434284553, "grad_norm": 6.342945333553175, "learning_rate": 1.4637059066332824e-06, "loss": 1.144, "step": 2701 }, { "epoch": 0.3662983799905104, "grad_norm": 7.65307327542163, "learning_rate": 1.4633167904911008e-06, "loss": 1.1558, "step": 2702 }, { "epoch": 0.3664339456381753, "grad_norm": 5.598565966732144, "learning_rate": 1.4629275850019336e-06, "loss": 1.1422, "step": 2703 }, { "epoch": 0.36656951128584014, "grad_norm": 6.969308344918233, "learning_rate": 1.4625382902408354e-06, "loss": 1.1473, "step": 2704 }, { "epoch": 0.36670507693350507, "grad_norm": 4.329204022695242, "learning_rate": 1.4621489062828788e-06, "loss": 1.121, "step": 2705 }, { "epoch": 0.36684064258116994, "grad_norm": 11.429623450232658, "learning_rate": 1.461759433203154e-06, "loss": 1.1133, "step": 2706 }, { "epoch": 0.3669762082288348, "grad_norm": 6.413499881372456, "learning_rate": 1.4613698710767674e-06, "loss": 1.1402, "step": 2707 }, { "epoch": 0.3671117738764997, "grad_norm": 10.165271214832494, "learning_rate": 1.4609802199788427e-06, "loss": 1.1431, "step": 2708 }, { "epoch": 0.36724733952416455, "grad_norm": 8.774727068555855, "learning_rate": 1.4605904799845218e-06, "loss": 1.1295, "step": 2709 }, { "epoch": 0.3673829051718295, "grad_norm": 6.486883206207203, "learning_rate": 1.4602006511689623e-06, "loss": 1.1538, "step": 2710 }, { "epoch": 0.36751847081949435, "grad_norm": 14.177102725945398, "learning_rate": 1.4598107336073396e-06, "loss": 1.1164, "step": 2711 }, { "epoch": 0.3676540364671592, "grad_norm": 7.092528055728591, "learning_rate": 1.4594207273748467e-06, "loss": 1.1256, "step": 2712 }, { "epoch": 0.3677896021148241, "grad_norm": 34.08997376432222, "learning_rate": 1.459030632546693e-06, "loss": 1.1819, "step": 2713 }, { "epoch": 0.36792516776248896, "grad_norm": 8.810315502076673, "learning_rate": 1.458640449198105e-06, "loss": 1.1659, "step": 2714 }, { "epoch": 0.3680607334101539, "grad_norm": 4.918701019692645, "learning_rate": 1.4582501774043268e-06, "loss": 1.1915, "step": 2715 }, { "epoch": 0.36819629905781875, "grad_norm": 10.383598256305138, "learning_rate": 1.4578598172406189e-06, "loss": 1.1419, "step": 2716 }, { "epoch": 0.3683318647054836, "grad_norm": 9.624051530069156, "learning_rate": 1.4574693687822594e-06, "loss": 1.1444, "step": 2717 }, { "epoch": 0.3684674303531485, "grad_norm": 6.607882422200035, "learning_rate": 1.4570788321045432e-06, "loss": 1.1012, "step": 2718 }, { "epoch": 0.3686029960008134, "grad_norm": 4.892650154428013, "learning_rate": 1.4566882072827824e-06, "loss": 1.0953, "step": 2719 }, { "epoch": 0.3687385616484783, "grad_norm": 6.242689004531058, "learning_rate": 1.4562974943923054e-06, "loss": 1.1649, "step": 2720 }, { "epoch": 0.36887412729614316, "grad_norm": 4.537624871722699, "learning_rate": 1.4559066935084588e-06, "loss": 1.1411, "step": 2721 }, { "epoch": 0.36900969294380803, "grad_norm": 4.680800161099988, "learning_rate": 1.4555158047066047e-06, "loss": 1.148, "step": 2722 }, { "epoch": 0.3691452585914729, "grad_norm": 5.759643720438019, "learning_rate": 1.4551248280621234e-06, "loss": 1.122, "step": 2723 }, { "epoch": 0.36928082423913783, "grad_norm": 8.815603373616979, "learning_rate": 1.4547337636504116e-06, "loss": 1.1773, "step": 2724 }, { "epoch": 0.3694163898868027, "grad_norm": 5.495320029223958, "learning_rate": 1.4543426115468829e-06, "loss": 1.1298, "step": 2725 }, { "epoch": 0.36955195553446757, "grad_norm": 6.561943883961816, "learning_rate": 1.453951371826968e-06, "loss": 1.128, "step": 2726 }, { "epoch": 0.36968752118213244, "grad_norm": 5.22487117525729, "learning_rate": 1.4535600445661143e-06, "loss": 1.1861, "step": 2727 }, { "epoch": 0.3698230868297973, "grad_norm": 6.302031532949986, "learning_rate": 1.453168629839786e-06, "loss": 1.1659, "step": 2728 }, { "epoch": 0.36995865247746224, "grad_norm": 7.272428229817585, "learning_rate": 1.4527771277234648e-06, "loss": 1.1913, "step": 2729 }, { "epoch": 0.3700942181251271, "grad_norm": 7.438641321442418, "learning_rate": 1.4523855382926483e-06, "loss": 1.169, "step": 2730 }, { "epoch": 0.370229783772792, "grad_norm": 4.082976929920586, "learning_rate": 1.4519938616228518e-06, "loss": 1.1335, "step": 2731 }, { "epoch": 0.37036534942045685, "grad_norm": 4.349496458796506, "learning_rate": 1.4516020977896067e-06, "loss": 1.1403, "step": 2732 }, { "epoch": 0.3705009150681217, "grad_norm": 5.132894290933817, "learning_rate": 1.4512102468684621e-06, "loss": 1.1477, "step": 2733 }, { "epoch": 0.37063648071578664, "grad_norm": 5.022875741225986, "learning_rate": 1.4508183089349828e-06, "loss": 1.1285, "step": 2734 }, { "epoch": 0.3707720463634515, "grad_norm": 6.172571458941415, "learning_rate": 1.4504262840647512e-06, "loss": 1.1074, "step": 2735 }, { "epoch": 0.3709076120111164, "grad_norm": 6.367148628402239, "learning_rate": 1.4500341723333663e-06, "loss": 1.1478, "step": 2736 }, { "epoch": 0.37104317765878125, "grad_norm": 5.02151134419698, "learning_rate": 1.4496419738164434e-06, "loss": 1.1421, "step": 2737 }, { "epoch": 0.3711787433064461, "grad_norm": 6.646722547006824, "learning_rate": 1.449249688589615e-06, "loss": 1.1177, "step": 2738 }, { "epoch": 0.37131430895411105, "grad_norm": 7.46006160234919, "learning_rate": 1.4488573167285307e-06, "loss": 1.1355, "step": 2739 }, { "epoch": 0.3714498746017759, "grad_norm": 14.297225511699768, "learning_rate": 1.448464858308856e-06, "loss": 1.1411, "step": 2740 }, { "epoch": 0.3715854402494408, "grad_norm": 6.017377202459813, "learning_rate": 1.4480723134062732e-06, "loss": 1.1048, "step": 2741 }, { "epoch": 0.37172100589710566, "grad_norm": 6.580068934983791, "learning_rate": 1.4476796820964814e-06, "loss": 1.1287, "step": 2742 }, { "epoch": 0.37185657154477053, "grad_norm": 6.120347051046298, "learning_rate": 1.4472869644551966e-06, "loss": 1.1483, "step": 2743 }, { "epoch": 0.37199213719243546, "grad_norm": 8.670407741358218, "learning_rate": 1.4468941605581518e-06, "loss": 1.108, "step": 2744 }, { "epoch": 0.3721277028401003, "grad_norm": 4.465030756966347, "learning_rate": 1.4465012704810952e-06, "loss": 1.1622, "step": 2745 }, { "epoch": 0.3722632684877652, "grad_norm": 11.454752739129116, "learning_rate": 1.4461082942997936e-06, "loss": 1.147, "step": 2746 }, { "epoch": 0.37239883413543007, "grad_norm": 13.505337328386858, "learning_rate": 1.4457152320900283e-06, "loss": 1.1525, "step": 2747 }, { "epoch": 0.37253439978309494, "grad_norm": 5.402697257001313, "learning_rate": 1.445322083927599e-06, "loss": 1.1297, "step": 2748 }, { "epoch": 0.37266996543075986, "grad_norm": 5.380025153537538, "learning_rate": 1.444928849888321e-06, "loss": 1.1206, "step": 2749 }, { "epoch": 0.37280553107842473, "grad_norm": 5.639307017220597, "learning_rate": 1.4445355300480262e-06, "loss": 1.0893, "step": 2750 }, { "epoch": 0.3729410967260896, "grad_norm": 5.305222018953929, "learning_rate": 1.4441421244825636e-06, "loss": 1.1486, "step": 2751 }, { "epoch": 0.3730766623737545, "grad_norm": 9.201111803741401, "learning_rate": 1.443748633267798e-06, "loss": 1.1501, "step": 2752 }, { "epoch": 0.37321222802141935, "grad_norm": 7.420246548368787, "learning_rate": 1.443355056479611e-06, "loss": 1.1818, "step": 2753 }, { "epoch": 0.37334779366908427, "grad_norm": 7.36613239722165, "learning_rate": 1.4429613941939016e-06, "loss": 1.1366, "step": 2754 }, { "epoch": 0.37348335931674914, "grad_norm": 7.66830890740954, "learning_rate": 1.4425676464865835e-06, "loss": 1.1828, "step": 2755 }, { "epoch": 0.373618924964414, "grad_norm": 7.005051158755684, "learning_rate": 1.442173813433588e-06, "loss": 1.1407, "step": 2756 }, { "epoch": 0.3737544906120789, "grad_norm": 14.478498650278942, "learning_rate": 1.4417798951108632e-06, "loss": 1.1785, "step": 2757 }, { "epoch": 0.3738900562597438, "grad_norm": 7.863296407026765, "learning_rate": 1.4413858915943728e-06, "loss": 1.1512, "step": 2758 }, { "epoch": 0.3740256219074087, "grad_norm": 7.5251845290961406, "learning_rate": 1.4409918029600972e-06, "loss": 1.1729, "step": 2759 }, { "epoch": 0.37416118755507355, "grad_norm": 4.7197597635683515, "learning_rate": 1.4405976292840332e-06, "loss": 1.1484, "step": 2760 }, { "epoch": 0.3742967532027384, "grad_norm": 5.024600794946528, "learning_rate": 1.4402033706421945e-06, "loss": 1.0875, "step": 2761 }, { "epoch": 0.3744323188504033, "grad_norm": 5.437760839616343, "learning_rate": 1.4398090271106104e-06, "loss": 1.1458, "step": 2762 }, { "epoch": 0.3745678844980682, "grad_norm": 5.36322649016899, "learning_rate": 1.4394145987653272e-06, "loss": 1.1396, "step": 2763 }, { "epoch": 0.3747034501457331, "grad_norm": 7.509460887235231, "learning_rate": 1.4390200856824072e-06, "loss": 1.1262, "step": 2764 }, { "epoch": 0.37483901579339796, "grad_norm": 5.23253370354285, "learning_rate": 1.438625487937929e-06, "loss": 1.1867, "step": 2765 }, { "epoch": 0.3749745814410628, "grad_norm": 5.6983445057645, "learning_rate": 1.4382308056079876e-06, "loss": 1.1351, "step": 2766 }, { "epoch": 0.3751101470887277, "grad_norm": 15.346125959220824, "learning_rate": 1.4378360387686948e-06, "loss": 1.1657, "step": 2767 }, { "epoch": 0.3752457127363926, "grad_norm": 5.907325309770933, "learning_rate": 1.4374411874961777e-06, "loss": 1.1431, "step": 2768 }, { "epoch": 0.3753812783840575, "grad_norm": 16.657281846644125, "learning_rate": 1.437046251866581e-06, "loss": 1.1272, "step": 2769 }, { "epoch": 0.37551684403172236, "grad_norm": 4.237709689757863, "learning_rate": 1.436651231956064e-06, "loss": 1.1219, "step": 2770 }, { "epoch": 0.37565240967938723, "grad_norm": 7.956517859686152, "learning_rate": 1.4362561278408038e-06, "loss": 1.163, "step": 2771 }, { "epoch": 0.3757879753270521, "grad_norm": 5.731717085681114, "learning_rate": 1.435860939596993e-06, "loss": 1.1624, "step": 2772 }, { "epoch": 0.37592354097471703, "grad_norm": 5.125250732955354, "learning_rate": 1.43546566730084e-06, "loss": 1.0992, "step": 2773 }, { "epoch": 0.3760591066223819, "grad_norm": 6.273683126953882, "learning_rate": 1.4350703110285709e-06, "loss": 1.1548, "step": 2774 }, { "epoch": 0.37619467227004677, "grad_norm": 6.210981934501707, "learning_rate": 1.4346748708564264e-06, "loss": 1.1754, "step": 2775 }, { "epoch": 0.37633023791771164, "grad_norm": 6.544076981396289, "learning_rate": 1.4342793468606643e-06, "loss": 1.1385, "step": 2776 }, { "epoch": 0.3764658035653765, "grad_norm": 5.994737855700095, "learning_rate": 1.433883739117558e-06, "loss": 1.1074, "step": 2777 }, { "epoch": 0.37660136921304144, "grad_norm": 6.570625718658175, "learning_rate": 1.4334880477033976e-06, "loss": 1.1663, "step": 2778 }, { "epoch": 0.3767369348607063, "grad_norm": 6.913667877892116, "learning_rate": 1.4330922726944889e-06, "loss": 1.1027, "step": 2779 }, { "epoch": 0.3768725005083712, "grad_norm": 7.266351916149852, "learning_rate": 1.432696414167154e-06, "loss": 1.1868, "step": 2780 }, { "epoch": 0.37700806615603605, "grad_norm": 5.140046458102535, "learning_rate": 1.4323004721977312e-06, "loss": 1.1045, "step": 2781 }, { "epoch": 0.3771436318037009, "grad_norm": 5.8109459360130105, "learning_rate": 1.4319044468625748e-06, "loss": 1.0886, "step": 2782 }, { "epoch": 0.37727919745136584, "grad_norm": 6.999120836045832, "learning_rate": 1.4315083382380552e-06, "loss": 1.1292, "step": 2783 }, { "epoch": 0.3774147630990307, "grad_norm": 7.659709958933532, "learning_rate": 1.4311121464005582e-06, "loss": 1.1375, "step": 2784 }, { "epoch": 0.3775503287466956, "grad_norm": 4.346183435488324, "learning_rate": 1.430715871426487e-06, "loss": 1.1746, "step": 2785 }, { "epoch": 0.37768589439436046, "grad_norm": 5.292737785988608, "learning_rate": 1.43031951339226e-06, "loss": 1.1677, "step": 2786 }, { "epoch": 0.3778214600420253, "grad_norm": 6.197696282338144, "learning_rate": 1.4299230723743112e-06, "loss": 1.1624, "step": 2787 }, { "epoch": 0.37795702568969025, "grad_norm": 8.946982798441669, "learning_rate": 1.4295265484490918e-06, "loss": 1.13, "step": 2788 }, { "epoch": 0.3780925913373551, "grad_norm": 6.029818938696366, "learning_rate": 1.429129941693068e-06, "loss": 1.1578, "step": 2789 }, { "epoch": 0.37822815698502, "grad_norm": 5.255841290447208, "learning_rate": 1.428733252182722e-06, "loss": 1.101, "step": 2790 }, { "epoch": 0.37836372263268486, "grad_norm": 8.97446570721878, "learning_rate": 1.4283364799945527e-06, "loss": 1.1028, "step": 2791 }, { "epoch": 0.37849928828034973, "grad_norm": 6.304214469413675, "learning_rate": 1.4279396252050747e-06, "loss": 1.1391, "step": 2792 }, { "epoch": 0.37863485392801466, "grad_norm": 5.176195962467574, "learning_rate": 1.4275426878908174e-06, "loss": 1.132, "step": 2793 }, { "epoch": 0.37877041957567953, "grad_norm": 4.864164653509887, "learning_rate": 1.4271456681283275e-06, "loss": 1.1305, "step": 2794 }, { "epoch": 0.3789059852233444, "grad_norm": 5.772658675728952, "learning_rate": 1.4267485659941676e-06, "loss": 1.1266, "step": 2795 }, { "epoch": 0.37904155087100927, "grad_norm": 8.575997693138865, "learning_rate": 1.4263513815649152e-06, "loss": 1.1096, "step": 2796 }, { "epoch": 0.3791771165186742, "grad_norm": 6.183871527963882, "learning_rate": 1.4259541149171643e-06, "loss": 1.1388, "step": 2797 }, { "epoch": 0.37931268216633907, "grad_norm": 9.26277322478828, "learning_rate": 1.4255567661275247e-06, "loss": 1.1336, "step": 2798 }, { "epoch": 0.37944824781400394, "grad_norm": 14.143517975690946, "learning_rate": 1.4251593352726217e-06, "loss": 1.1467, "step": 2799 }, { "epoch": 0.3795838134616688, "grad_norm": 13.996453302277725, "learning_rate": 1.4247618224290968e-06, "loss": 1.1511, "step": 2800 }, { "epoch": 0.3797193791093337, "grad_norm": 4.956446833578325, "learning_rate": 1.4243642276736076e-06, "loss": 1.1383, "step": 2801 }, { "epoch": 0.3798549447569986, "grad_norm": 5.204996409621987, "learning_rate": 1.4239665510828266e-06, "loss": 1.1613, "step": 2802 }, { "epoch": 0.3799905104046635, "grad_norm": 5.596280578370868, "learning_rate": 1.423568792733443e-06, "loss": 1.1185, "step": 2803 }, { "epoch": 0.38012607605232834, "grad_norm": 4.506944763299867, "learning_rate": 1.423170952702161e-06, "loss": 1.1224, "step": 2804 }, { "epoch": 0.3802616416999932, "grad_norm": 5.602528946845516, "learning_rate": 1.422773031065701e-06, "loss": 1.1625, "step": 2805 }, { "epoch": 0.3803972073476581, "grad_norm": 7.744093877964791, "learning_rate": 1.4223750279007993e-06, "loss": 1.1396, "step": 2806 }, { "epoch": 0.380532772995323, "grad_norm": 3.9994398087663017, "learning_rate": 1.4219769432842075e-06, "loss": 1.1562, "step": 2807 }, { "epoch": 0.3806683386429879, "grad_norm": 8.210345660191205, "learning_rate": 1.4215787772926931e-06, "loss": 1.1347, "step": 2808 }, { "epoch": 0.38080390429065275, "grad_norm": 6.0568338151745715, "learning_rate": 1.4211805300030389e-06, "loss": 1.1152, "step": 2809 }, { "epoch": 0.3809394699383176, "grad_norm": 4.655176502650176, "learning_rate": 1.4207822014920443e-06, "loss": 1.1196, "step": 2810 }, { "epoch": 0.3810750355859825, "grad_norm": 10.248816531803454, "learning_rate": 1.420383791836524e-06, "loss": 1.1465, "step": 2811 }, { "epoch": 0.3812106012336474, "grad_norm": 6.298836467149822, "learning_rate": 1.419985301113307e-06, "loss": 1.143, "step": 2812 }, { "epoch": 0.3813461668813123, "grad_norm": 8.582022837216254, "learning_rate": 1.4195867293992405e-06, "loss": 1.1625, "step": 2813 }, { "epoch": 0.38148173252897716, "grad_norm": 5.599691226739036, "learning_rate": 1.419188076771185e-06, "loss": 1.1647, "step": 2814 }, { "epoch": 0.38161729817664203, "grad_norm": 7.48239008942855, "learning_rate": 1.4187893433060176e-06, "loss": 1.1227, "step": 2815 }, { "epoch": 0.3817528638243069, "grad_norm": 5.419942959478955, "learning_rate": 1.4183905290806313e-06, "loss": 1.1532, "step": 2816 }, { "epoch": 0.3818884294719718, "grad_norm": 4.917398481285522, "learning_rate": 1.4179916341719339e-06, "loss": 1.1246, "step": 2817 }, { "epoch": 0.3820239951196367, "grad_norm": 6.080134237622023, "learning_rate": 1.4175926586568493e-06, "loss": 1.1425, "step": 2818 }, { "epoch": 0.38215956076730156, "grad_norm": 5.39372375993424, "learning_rate": 1.4171936026123168e-06, "loss": 1.1442, "step": 2819 }, { "epoch": 0.38229512641496644, "grad_norm": 7.202073308930714, "learning_rate": 1.4167944661152911e-06, "loss": 1.151, "step": 2820 }, { "epoch": 0.3824306920626313, "grad_norm": 5.330359316546751, "learning_rate": 1.4163952492427424e-06, "loss": 1.1439, "step": 2821 }, { "epoch": 0.38256625771029623, "grad_norm": 8.33887758884075, "learning_rate": 1.415995952071657e-06, "loss": 1.1734, "step": 2822 }, { "epoch": 0.3827018233579611, "grad_norm": 7.822267435045512, "learning_rate": 1.415596574679036e-06, "loss": 1.1244, "step": 2823 }, { "epoch": 0.38283738900562597, "grad_norm": 5.126566328816404, "learning_rate": 1.4151971171418959e-06, "loss": 1.1322, "step": 2824 }, { "epoch": 0.38297295465329084, "grad_norm": 16.796411577615267, "learning_rate": 1.4147975795372694e-06, "loss": 1.1166, "step": 2825 }, { "epoch": 0.3831085203009557, "grad_norm": 4.6849981018153235, "learning_rate": 1.4143979619422035e-06, "loss": 1.1594, "step": 2826 }, { "epoch": 0.38324408594862064, "grad_norm": 5.316435299981712, "learning_rate": 1.4139982644337617e-06, "loss": 1.1356, "step": 2827 }, { "epoch": 0.3833796515962855, "grad_norm": 5.255078040190217, "learning_rate": 1.4135984870890228e-06, "loss": 1.0887, "step": 2828 }, { "epoch": 0.3835152172439504, "grad_norm": 14.23204960784395, "learning_rate": 1.4131986299850803e-06, "loss": 1.1683, "step": 2829 }, { "epoch": 0.38365078289161525, "grad_norm": 8.112105502461825, "learning_rate": 1.4127986931990437e-06, "loss": 1.0951, "step": 2830 }, { "epoch": 0.3837863485392801, "grad_norm": 5.69705770717015, "learning_rate": 1.4123986768080375e-06, "loss": 1.1292, "step": 2831 }, { "epoch": 0.38392191418694505, "grad_norm": 6.568811907030162, "learning_rate": 1.4119985808892016e-06, "loss": 1.1643, "step": 2832 }, { "epoch": 0.3840574798346099, "grad_norm": 6.2236274174852, "learning_rate": 1.4115984055196918e-06, "loss": 1.1181, "step": 2833 }, { "epoch": 0.3841930454822748, "grad_norm": 7.896439157329611, "learning_rate": 1.4111981507766782e-06, "loss": 1.1434, "step": 2834 }, { "epoch": 0.38432861112993966, "grad_norm": 6.968330530945772, "learning_rate": 1.4107978167373469e-06, "loss": 1.1685, "step": 2835 }, { "epoch": 0.3844641767776046, "grad_norm": 4.274816106854278, "learning_rate": 1.4103974034788994e-06, "loss": 1.1076, "step": 2836 }, { "epoch": 0.38459974242526945, "grad_norm": 5.2724781710020565, "learning_rate": 1.4099969110785521e-06, "loss": 1.146, "step": 2837 }, { "epoch": 0.3847353080729343, "grad_norm": 4.816984761291238, "learning_rate": 1.409596339613537e-06, "loss": 1.1393, "step": 2838 }, { "epoch": 0.3848708737205992, "grad_norm": 7.0147103940660775, "learning_rate": 1.409195689161101e-06, "loss": 1.1548, "step": 2839 }, { "epoch": 0.38500643936826406, "grad_norm": 5.392369139503987, "learning_rate": 1.4087949597985062e-06, "loss": 1.168, "step": 2840 }, { "epoch": 0.385142005015929, "grad_norm": 6.874152003455172, "learning_rate": 1.4083941516030303e-06, "loss": 1.1716, "step": 2841 }, { "epoch": 0.38527757066359386, "grad_norm": 21.756629208730423, "learning_rate": 1.407993264651966e-06, "loss": 1.1223, "step": 2842 }, { "epoch": 0.38541313631125873, "grad_norm": 7.149387842150839, "learning_rate": 1.4075922990226209e-06, "loss": 1.1385, "step": 2843 }, { "epoch": 0.3855487019589236, "grad_norm": 7.113610447816051, "learning_rate": 1.407191254792318e-06, "loss": 1.0954, "step": 2844 }, { "epoch": 0.38568426760658847, "grad_norm": 14.359337584477291, "learning_rate": 1.4067901320383962e-06, "loss": 1.1231, "step": 2845 }, { "epoch": 0.3858198332542534, "grad_norm": 12.957037564678407, "learning_rate": 1.4063889308382084e-06, "loss": 1.103, "step": 2846 }, { "epoch": 0.38595539890191827, "grad_norm": 9.533774570729891, "learning_rate": 1.405987651269123e-06, "loss": 1.157, "step": 2847 }, { "epoch": 0.38609096454958314, "grad_norm": 5.080895732379304, "learning_rate": 1.4055862934085239e-06, "loss": 1.173, "step": 2848 }, { "epoch": 0.386226530197248, "grad_norm": 4.478004123885882, "learning_rate": 1.4051848573338095e-06, "loss": 1.1302, "step": 2849 }, { "epoch": 0.3863620958449129, "grad_norm": 6.449159282281514, "learning_rate": 1.4047833431223936e-06, "loss": 1.1087, "step": 2850 }, { "epoch": 0.3864976614925778, "grad_norm": 7.065636864011673, "learning_rate": 1.4043817508517053e-06, "loss": 1.1836, "step": 2851 }, { "epoch": 0.3866332271402427, "grad_norm": 9.713392958220348, "learning_rate": 1.4039800805991883e-06, "loss": 1.1207, "step": 2852 }, { "epoch": 0.38676879278790754, "grad_norm": 7.847407453351209, "learning_rate": 1.403578332442302e-06, "loss": 1.1557, "step": 2853 }, { "epoch": 0.3869043584355724, "grad_norm": 5.973598486595706, "learning_rate": 1.4031765064585196e-06, "loss": 1.184, "step": 2854 }, { "epoch": 0.3870399240832373, "grad_norm": 5.234682398415145, "learning_rate": 1.4027746027253301e-06, "loss": 1.1236, "step": 2855 }, { "epoch": 0.3871754897309022, "grad_norm": 9.285944705857315, "learning_rate": 1.402372621320238e-06, "loss": 1.1893, "step": 2856 }, { "epoch": 0.3873110553785671, "grad_norm": 5.395331011782463, "learning_rate": 1.401970562320762e-06, "loss": 1.1419, "step": 2857 }, { "epoch": 0.38744662102623195, "grad_norm": 5.346041083147784, "learning_rate": 1.4015684258044363e-06, "loss": 1.1097, "step": 2858 }, { "epoch": 0.3875821866738968, "grad_norm": 6.302477117287476, "learning_rate": 1.401166211848809e-06, "loss": 1.0923, "step": 2859 }, { "epoch": 0.3877177523215617, "grad_norm": 4.583091738889103, "learning_rate": 1.4007639205314448e-06, "loss": 1.1229, "step": 2860 }, { "epoch": 0.3878533179692266, "grad_norm": 5.639007796511065, "learning_rate": 1.4003615519299216e-06, "loss": 1.121, "step": 2861 }, { "epoch": 0.3879888836168915, "grad_norm": 5.80689619614811, "learning_rate": 1.3999591061218334e-06, "loss": 1.1283, "step": 2862 }, { "epoch": 0.38812444926455636, "grad_norm": 7.464313454781392, "learning_rate": 1.399556583184789e-06, "loss": 1.1587, "step": 2863 }, { "epoch": 0.38826001491222123, "grad_norm": 7.228647977813854, "learning_rate": 1.3991539831964114e-06, "loss": 1.0952, "step": 2864 }, { "epoch": 0.3883955805598861, "grad_norm": 5.82723821461323, "learning_rate": 1.3987513062343385e-06, "loss": 1.1624, "step": 2865 }, { "epoch": 0.388531146207551, "grad_norm": 6.5353830687205665, "learning_rate": 1.3983485523762243e-06, "loss": 1.15, "step": 2866 }, { "epoch": 0.3886667118552159, "grad_norm": 5.998778993387091, "learning_rate": 1.3979457216997358e-06, "loss": 1.1403, "step": 2867 }, { "epoch": 0.38880227750288077, "grad_norm": 8.506133715399502, "learning_rate": 1.397542814282556e-06, "loss": 1.1231, "step": 2868 }, { "epoch": 0.38893784315054564, "grad_norm": 4.2368564336762145, "learning_rate": 1.3971398302023824e-06, "loss": 1.1249, "step": 2869 }, { "epoch": 0.3890734087982105, "grad_norm": 5.381760485318621, "learning_rate": 1.3967367695369276e-06, "loss": 1.1732, "step": 2870 }, { "epoch": 0.38920897444587543, "grad_norm": 7.1437567576765195, "learning_rate": 1.3963336323639183e-06, "loss": 1.1586, "step": 2871 }, { "epoch": 0.3893445400935403, "grad_norm": 4.997084359711632, "learning_rate": 1.3959304187610967e-06, "loss": 1.1009, "step": 2872 }, { "epoch": 0.3894801057412052, "grad_norm": 5.542281511205433, "learning_rate": 1.3955271288062188e-06, "loss": 1.1442, "step": 2873 }, { "epoch": 0.38961567138887004, "grad_norm": 4.986916471145126, "learning_rate": 1.3951237625770564e-06, "loss": 1.1209, "step": 2874 }, { "epoch": 0.3897512370365349, "grad_norm": 7.215885230068349, "learning_rate": 1.3947203201513953e-06, "loss": 1.1355, "step": 2875 }, { "epoch": 0.38988680268419984, "grad_norm": 5.953842106693422, "learning_rate": 1.3943168016070361e-06, "loss": 1.1425, "step": 2876 }, { "epoch": 0.3900223683318647, "grad_norm": 8.230154321722866, "learning_rate": 1.3939132070217942e-06, "loss": 1.1077, "step": 2877 }, { "epoch": 0.3901579339795296, "grad_norm": 4.2748795226687974, "learning_rate": 1.3935095364734998e-06, "loss": 1.1367, "step": 2878 }, { "epoch": 0.39029349962719445, "grad_norm": 6.1322043898382725, "learning_rate": 1.3931057900399976e-06, "loss": 1.1638, "step": 2879 }, { "epoch": 0.3904290652748594, "grad_norm": 7.572929053236426, "learning_rate": 1.3927019677991466e-06, "loss": 1.1586, "step": 2880 }, { "epoch": 0.39056463092252425, "grad_norm": 5.096360775653269, "learning_rate": 1.3922980698288212e-06, "loss": 1.1324, "step": 2881 }, { "epoch": 0.3907001965701891, "grad_norm": 6.444929162514169, "learning_rate": 1.3918940962069093e-06, "loss": 1.1855, "step": 2882 }, { "epoch": 0.390835762217854, "grad_norm": 9.565761607147165, "learning_rate": 1.3914900470113144e-06, "loss": 1.1397, "step": 2883 }, { "epoch": 0.39097132786551886, "grad_norm": 12.665586596516325, "learning_rate": 1.3910859223199545e-06, "loss": 1.166, "step": 2884 }, { "epoch": 0.3911068935131838, "grad_norm": 24.379143618406133, "learning_rate": 1.3906817222107611e-06, "loss": 1.1722, "step": 2885 }, { "epoch": 0.39124245916084865, "grad_norm": 5.405339508428262, "learning_rate": 1.3902774467616817e-06, "loss": 1.1329, "step": 2886 }, { "epoch": 0.3913780248085135, "grad_norm": 4.55399048438797, "learning_rate": 1.3898730960506772e-06, "loss": 1.1715, "step": 2887 }, { "epoch": 0.3915135904561784, "grad_norm": 8.068825264666934, "learning_rate": 1.3894686701557237e-06, "loss": 1.1301, "step": 2888 }, { "epoch": 0.39164915610384327, "grad_norm": 8.795203896960798, "learning_rate": 1.3890641691548113e-06, "loss": 1.1729, "step": 2889 }, { "epoch": 0.3917847217515082, "grad_norm": 7.600043330853067, "learning_rate": 1.3886595931259451e-06, "loss": 1.1411, "step": 2890 }, { "epoch": 0.39192028739917306, "grad_norm": 4.594244802619602, "learning_rate": 1.3882549421471442e-06, "loss": 1.1502, "step": 2891 }, { "epoch": 0.39205585304683793, "grad_norm": 5.868558746217, "learning_rate": 1.3878502162964422e-06, "loss": 1.1228, "step": 2892 }, { "epoch": 0.3921914186945028, "grad_norm": 6.6395468945714065, "learning_rate": 1.3874454156518877e-06, "loss": 1.1196, "step": 2893 }, { "epoch": 0.3923269843421677, "grad_norm": 6.049470358570959, "learning_rate": 1.3870405402915436e-06, "loss": 1.1641, "step": 2894 }, { "epoch": 0.3924625499898326, "grad_norm": 16.837744743689044, "learning_rate": 1.3866355902934856e-06, "loss": 1.1185, "step": 2895 }, { "epoch": 0.39259811563749747, "grad_norm": 9.758476715151529, "learning_rate": 1.3862305657358065e-06, "loss": 1.1289, "step": 2896 }, { "epoch": 0.39273368128516234, "grad_norm": 7.821980094084816, "learning_rate": 1.385825466696611e-06, "loss": 1.1184, "step": 2897 }, { "epoch": 0.3928692469328272, "grad_norm": 6.225524503506976, "learning_rate": 1.3854202932540202e-06, "loss": 1.1501, "step": 2898 }, { "epoch": 0.3930048125804921, "grad_norm": 5.491353980906748, "learning_rate": 1.3850150454861682e-06, "loss": 1.1356, "step": 2899 }, { "epoch": 0.393140378228157, "grad_norm": 12.410301327784994, "learning_rate": 1.3846097234712034e-06, "loss": 1.1072, "step": 2900 }, { "epoch": 0.3932759438758219, "grad_norm": 6.712256292650184, "learning_rate": 1.3842043272872896e-06, "loss": 1.142, "step": 2901 }, { "epoch": 0.39341150952348675, "grad_norm": 17.374590749793477, "learning_rate": 1.383798857012604e-06, "loss": 1.1694, "step": 2902 }, { "epoch": 0.3935470751711516, "grad_norm": 4.638314805368167, "learning_rate": 1.3833933127253383e-06, "loss": 1.1384, "step": 2903 }, { "epoch": 0.3936826408188165, "grad_norm": 4.855476933901539, "learning_rate": 1.3829876945036987e-06, "loss": 1.1281, "step": 2904 }, { "epoch": 0.3938182064664814, "grad_norm": 5.890197750982746, "learning_rate": 1.3825820024259052e-06, "loss": 1.1172, "step": 2905 }, { "epoch": 0.3939537721141463, "grad_norm": 5.162140483851674, "learning_rate": 1.3821762365701926e-06, "loss": 1.133, "step": 2906 }, { "epoch": 0.39408933776181115, "grad_norm": 6.251110591573857, "learning_rate": 1.3817703970148092e-06, "loss": 1.1507, "step": 2907 }, { "epoch": 0.394224903409476, "grad_norm": 6.680515113568253, "learning_rate": 1.3813644838380184e-06, "loss": 1.1497, "step": 2908 }, { "epoch": 0.3943604690571409, "grad_norm": 7.755275844979038, "learning_rate": 1.3809584971180975e-06, "loss": 1.1367, "step": 2909 }, { "epoch": 0.3944960347048058, "grad_norm": 6.663711150072788, "learning_rate": 1.3805524369333371e-06, "loss": 1.1505, "step": 2910 }, { "epoch": 0.3946316003524707, "grad_norm": 4.828139223928939, "learning_rate": 1.3801463033620433e-06, "loss": 1.1296, "step": 2911 }, { "epoch": 0.39476716600013556, "grad_norm": 9.599665459315748, "learning_rate": 1.3797400964825357e-06, "loss": 1.1375, "step": 2912 }, { "epoch": 0.39490273164780043, "grad_norm": 6.692187724252884, "learning_rate": 1.3793338163731476e-06, "loss": 1.1536, "step": 2913 }, { "epoch": 0.3950382972954653, "grad_norm": 4.756371836955459, "learning_rate": 1.3789274631122277e-06, "loss": 1.1209, "step": 2914 }, { "epoch": 0.3951738629431302, "grad_norm": 13.472158720967029, "learning_rate": 1.3785210367781375e-06, "loss": 1.14, "step": 2915 }, { "epoch": 0.3953094285907951, "grad_norm": 6.236049483800511, "learning_rate": 1.378114537449253e-06, "loss": 1.1316, "step": 2916 }, { "epoch": 0.39544499423845997, "grad_norm": 5.038473854585796, "learning_rate": 1.3777079652039646e-06, "loss": 1.1684, "step": 2917 }, { "epoch": 0.39558055988612484, "grad_norm": 31.321200978092573, "learning_rate": 1.3773013201206768e-06, "loss": 1.1801, "step": 2918 }, { "epoch": 0.39571612553378976, "grad_norm": 8.34042694122893, "learning_rate": 1.3768946022778075e-06, "loss": 1.1487, "step": 2919 }, { "epoch": 0.39585169118145463, "grad_norm": 7.474247643377325, "learning_rate": 1.3764878117537895e-06, "loss": 1.1223, "step": 2920 }, { "epoch": 0.3959872568291195, "grad_norm": 5.686805382285385, "learning_rate": 1.3760809486270684e-06, "loss": 1.1395, "step": 2921 }, { "epoch": 0.3961228224767844, "grad_norm": 4.804044625634983, "learning_rate": 1.3756740129761053e-06, "loss": 1.1992, "step": 2922 }, { "epoch": 0.39625838812444925, "grad_norm": 5.89332138855985, "learning_rate": 1.3752670048793743e-06, "loss": 1.185, "step": 2923 }, { "epoch": 0.39639395377211417, "grad_norm": 6.019116866397974, "learning_rate": 1.3748599244153632e-06, "loss": 1.1056, "step": 2924 }, { "epoch": 0.39652951941977904, "grad_norm": 9.61481134868819, "learning_rate": 1.3744527716625746e-06, "loss": 1.1274, "step": 2925 }, { "epoch": 0.3966650850674439, "grad_norm": 7.0703047888916215, "learning_rate": 1.3740455466995248e-06, "loss": 1.1113, "step": 2926 }, { "epoch": 0.3968006507151088, "grad_norm": 5.9290408451994985, "learning_rate": 1.373638249604744e-06, "loss": 1.1608, "step": 2927 }, { "epoch": 0.39693621636277365, "grad_norm": 4.668093514316299, "learning_rate": 1.3732308804567761e-06, "loss": 1.1325, "step": 2928 }, { "epoch": 0.3970717820104386, "grad_norm": 4.464155507541583, "learning_rate": 1.3728234393341789e-06, "loss": 1.0893, "step": 2929 }, { "epoch": 0.39720734765810345, "grad_norm": 10.843919894124896, "learning_rate": 1.3724159263155246e-06, "loss": 1.0916, "step": 2930 }, { "epoch": 0.3973429133057683, "grad_norm": 5.904915868708464, "learning_rate": 1.3720083414793984e-06, "loss": 1.1448, "step": 2931 }, { "epoch": 0.3974784789534332, "grad_norm": 7.738602714282102, "learning_rate": 1.3716006849043998e-06, "loss": 1.1574, "step": 2932 }, { "epoch": 0.39761404460109806, "grad_norm": 5.189909372540329, "learning_rate": 1.3711929566691424e-06, "loss": 1.1235, "step": 2933 }, { "epoch": 0.397749610248763, "grad_norm": 5.00845256110982, "learning_rate": 1.3707851568522534e-06, "loss": 1.1323, "step": 2934 }, { "epoch": 0.39788517589642786, "grad_norm": 7.225786992497464, "learning_rate": 1.3703772855323739e-06, "loss": 1.1, "step": 2935 }, { "epoch": 0.3980207415440927, "grad_norm": 11.90881467626941, "learning_rate": 1.3699693427881582e-06, "loss": 1.1155, "step": 2936 }, { "epoch": 0.3981563071917576, "grad_norm": 6.066780352030847, "learning_rate": 1.3695613286982754e-06, "loss": 1.1034, "step": 2937 }, { "epoch": 0.39829187283942247, "grad_norm": 4.93739248889655, "learning_rate": 1.3691532433414073e-06, "loss": 1.1508, "step": 2938 }, { "epoch": 0.3984274384870874, "grad_norm": 5.4842585382235916, "learning_rate": 1.36874508679625e-06, "loss": 1.0923, "step": 2939 }, { "epoch": 0.39856300413475226, "grad_norm": 4.910736475009506, "learning_rate": 1.3683368591415137e-06, "loss": 1.0915, "step": 2940 }, { "epoch": 0.39869856978241713, "grad_norm": 7.102357809243465, "learning_rate": 1.3679285604559211e-06, "loss": 1.1365, "step": 2941 }, { "epoch": 0.398834135430082, "grad_norm": 4.603528014288032, "learning_rate": 1.3675201908182103e-06, "loss": 1.1441, "step": 2942 }, { "epoch": 0.3989697010777469, "grad_norm": 7.153273530566805, "learning_rate": 1.3671117503071317e-06, "loss": 1.1414, "step": 2943 }, { "epoch": 0.3991052667254118, "grad_norm": 4.779677612360697, "learning_rate": 1.3667032390014497e-06, "loss": 1.1483, "step": 2944 }, { "epoch": 0.39924083237307667, "grad_norm": 5.081143968519306, "learning_rate": 1.3662946569799426e-06, "loss": 1.1502, "step": 2945 }, { "epoch": 0.39937639802074154, "grad_norm": 7.193890299685202, "learning_rate": 1.3658860043214024e-06, "loss": 1.177, "step": 2946 }, { "epoch": 0.3995119636684064, "grad_norm": 12.699422800649387, "learning_rate": 1.3654772811046344e-06, "loss": 1.1359, "step": 2947 }, { "epoch": 0.3996475293160713, "grad_norm": 14.772491157245652, "learning_rate": 1.3650684874084577e-06, "loss": 1.1454, "step": 2948 }, { "epoch": 0.3997830949637362, "grad_norm": 7.150594652971143, "learning_rate": 1.3646596233117047e-06, "loss": 1.0857, "step": 2949 }, { "epoch": 0.3999186606114011, "grad_norm": 6.950060456163959, "learning_rate": 1.364250688893222e-06, "loss": 1.1115, "step": 2950 }, { "epoch": 0.40005422625906595, "grad_norm": 6.330558381513969, "learning_rate": 1.3638416842318691e-06, "loss": 1.1185, "step": 2951 }, { "epoch": 0.4001897919067308, "grad_norm": 10.533997698902219, "learning_rate": 1.3634326094065194e-06, "loss": 1.1448, "step": 2952 }, { "epoch": 0.4003253575543957, "grad_norm": 5.79936913366662, "learning_rate": 1.3630234644960597e-06, "loss": 1.147, "step": 2953 }, { "epoch": 0.4004609232020606, "grad_norm": 4.519978710521539, "learning_rate": 1.3626142495793902e-06, "loss": 1.1438, "step": 2954 }, { "epoch": 0.4005964888497255, "grad_norm": 5.219215120695663, "learning_rate": 1.3622049647354252e-06, "loss": 1.1565, "step": 2955 }, { "epoch": 0.40073205449739036, "grad_norm": 11.480698529468496, "learning_rate": 1.361795610043092e-06, "loss": 1.124, "step": 2956 }, { "epoch": 0.4008676201450552, "grad_norm": 7.596116818010507, "learning_rate": 1.3613861855813308e-06, "loss": 1.1587, "step": 2957 }, { "epoch": 0.40100318579272015, "grad_norm": 6.325580157281871, "learning_rate": 1.3609766914290965e-06, "loss": 1.1337, "step": 2958 }, { "epoch": 0.401138751440385, "grad_norm": 6.061669037811298, "learning_rate": 1.3605671276653565e-06, "loss": 1.1421, "step": 2959 }, { "epoch": 0.4012743170880499, "grad_norm": 8.891237088109412, "learning_rate": 1.3601574943690924e-06, "loss": 1.1945, "step": 2960 }, { "epoch": 0.40140988273571476, "grad_norm": 6.448716585029775, "learning_rate": 1.3597477916192985e-06, "loss": 1.1598, "step": 2961 }, { "epoch": 0.40154544838337963, "grad_norm": 8.71832197051165, "learning_rate": 1.3593380194949823e-06, "loss": 1.1426, "step": 2962 }, { "epoch": 0.40168101403104456, "grad_norm": 5.09999577546547, "learning_rate": 1.3589281780751659e-06, "loss": 1.129, "step": 2963 }, { "epoch": 0.40181657967870943, "grad_norm": 6.387252541194524, "learning_rate": 1.358518267438883e-06, "loss": 1.1408, "step": 2964 }, { "epoch": 0.4019521453263743, "grad_norm": 5.79605379196519, "learning_rate": 1.3581082876651824e-06, "loss": 1.1822, "step": 2965 }, { "epoch": 0.40208771097403917, "grad_norm": 6.755668014461071, "learning_rate": 1.3576982388331258e-06, "loss": 1.1577, "step": 2966 }, { "epoch": 0.40222327662170404, "grad_norm": 6.775143050552369, "learning_rate": 1.3572881210217869e-06, "loss": 1.1168, "step": 2967 }, { "epoch": 0.40235884226936897, "grad_norm": 4.970711488350601, "learning_rate": 1.3568779343102539e-06, "loss": 1.1571, "step": 2968 }, { "epoch": 0.40249440791703384, "grad_norm": 5.138302114676299, "learning_rate": 1.3564676787776282e-06, "loss": 1.0785, "step": 2969 }, { "epoch": 0.4026299735646987, "grad_norm": 4.7156594448006075, "learning_rate": 1.356057354503025e-06, "loss": 1.1388, "step": 2970 }, { "epoch": 0.4027655392123636, "grad_norm": 7.796719785376424, "learning_rate": 1.3556469615655713e-06, "loss": 1.1867, "step": 2971 }, { "epoch": 0.40290110486002845, "grad_norm": 7.652439311800982, "learning_rate": 1.355236500044408e-06, "loss": 1.1357, "step": 2972 }, { "epoch": 0.4030366705076934, "grad_norm": 4.685750293966763, "learning_rate": 1.3548259700186901e-06, "loss": 1.1458, "step": 2973 }, { "epoch": 0.40317223615535824, "grad_norm": 7.711443864280499, "learning_rate": 1.3544153715675848e-06, "loss": 1.1825, "step": 2974 }, { "epoch": 0.4033078018030231, "grad_norm": 4.396062457841073, "learning_rate": 1.3540047047702725e-06, "loss": 1.1367, "step": 2975 }, { "epoch": 0.403443367450688, "grad_norm": 5.8928016505570815, "learning_rate": 1.353593969705947e-06, "loss": 1.1037, "step": 2976 }, { "epoch": 0.40357893309835285, "grad_norm": 4.864222874397865, "learning_rate": 1.353183166453816e-06, "loss": 1.144, "step": 2977 }, { "epoch": 0.4037144987460178, "grad_norm": 5.162966750283223, "learning_rate": 1.352772295093099e-06, "loss": 1.1389, "step": 2978 }, { "epoch": 0.40385006439368265, "grad_norm": 6.897397700782566, "learning_rate": 1.3523613557030298e-06, "loss": 1.1513, "step": 2979 }, { "epoch": 0.4039856300413475, "grad_norm": 5.85681152527414, "learning_rate": 1.3519503483628541e-06, "loss": 1.1323, "step": 2980 }, { "epoch": 0.4041211956890124, "grad_norm": 8.118493838861387, "learning_rate": 1.351539273151832e-06, "loss": 1.1123, "step": 2981 }, { "epoch": 0.40425676133667726, "grad_norm": 12.520584327309994, "learning_rate": 1.3511281301492358e-06, "loss": 1.1487, "step": 2982 }, { "epoch": 0.4043923269843422, "grad_norm": 4.897202153660223, "learning_rate": 1.3507169194343514e-06, "loss": 1.1355, "step": 2983 }, { "epoch": 0.40452789263200706, "grad_norm": 8.994309475847846, "learning_rate": 1.3503056410864777e-06, "loss": 1.1496, "step": 2984 }, { "epoch": 0.40466345827967193, "grad_norm": 6.136729209910847, "learning_rate": 1.349894295184926e-06, "loss": 1.1546, "step": 2985 }, { "epoch": 0.4047990239273368, "grad_norm": 4.73228081315715, "learning_rate": 1.3494828818090215e-06, "loss": 1.1332, "step": 2986 }, { "epoch": 0.40493458957500167, "grad_norm": 4.685713191772627, "learning_rate": 1.349071401038102e-06, "loss": 1.1539, "step": 2987 }, { "epoch": 0.4050701552226666, "grad_norm": 11.656085727466163, "learning_rate": 1.348659852951518e-06, "loss": 1.1079, "step": 2988 }, { "epoch": 0.40520572087033147, "grad_norm": 4.8745212924709875, "learning_rate": 1.3482482376286338e-06, "loss": 1.1154, "step": 2989 }, { "epoch": 0.40534128651799634, "grad_norm": 7.991064714422632, "learning_rate": 1.3478365551488256e-06, "loss": 1.1448, "step": 2990 }, { "epoch": 0.4054768521656612, "grad_norm": 6.129105874387823, "learning_rate": 1.3474248055914834e-06, "loss": 1.1247, "step": 2991 }, { "epoch": 0.4056124178133261, "grad_norm": 4.735853497849715, "learning_rate": 1.3470129890360103e-06, "loss": 1.1139, "step": 2992 }, { "epoch": 0.405747983460991, "grad_norm": 5.245625130112977, "learning_rate": 1.3466011055618207e-06, "loss": 1.1112, "step": 2993 }, { "epoch": 0.40588354910865587, "grad_norm": 12.64936991934092, "learning_rate": 1.3461891552483442e-06, "loss": 1.1293, "step": 2994 }, { "epoch": 0.40601911475632074, "grad_norm": 5.0712267036796925, "learning_rate": 1.3457771381750217e-06, "loss": 1.1255, "step": 2995 }, { "epoch": 0.4061546804039856, "grad_norm": 4.242412847822937, "learning_rate": 1.3453650544213076e-06, "loss": 1.1222, "step": 2996 }, { "epoch": 0.40629024605165054, "grad_norm": 5.140269057132415, "learning_rate": 1.344952904066669e-06, "loss": 1.1441, "step": 2997 }, { "epoch": 0.4064258116993154, "grad_norm": 6.339755302036813, "learning_rate": 1.3445406871905855e-06, "loss": 1.1255, "step": 2998 }, { "epoch": 0.4065613773469803, "grad_norm": 4.243670716188412, "learning_rate": 1.34412840387255e-06, "loss": 1.1286, "step": 2999 }, { "epoch": 0.40669694299464515, "grad_norm": 4.226924380922909, "learning_rate": 1.3437160541920685e-06, "loss": 1.1339, "step": 3000 }, { "epoch": 0.40683250864231, "grad_norm": 6.5186415928587085, "learning_rate": 1.3433036382286589e-06, "loss": 1.1454, "step": 3001 }, { "epoch": 0.40696807428997495, "grad_norm": 11.557450669027984, "learning_rate": 1.3428911560618525e-06, "loss": 1.1634, "step": 3002 }, { "epoch": 0.4071036399376398, "grad_norm": 5.045679390395705, "learning_rate": 1.3424786077711933e-06, "loss": 1.1391, "step": 3003 }, { "epoch": 0.4072392055853047, "grad_norm": 4.7644332939592156, "learning_rate": 1.342065993436238e-06, "loss": 1.0956, "step": 3004 }, { "epoch": 0.40737477123296956, "grad_norm": 6.430777058220889, "learning_rate": 1.3416533131365563e-06, "loss": 1.1174, "step": 3005 }, { "epoch": 0.4075103368806344, "grad_norm": 4.400363917416142, "learning_rate": 1.3412405669517296e-06, "loss": 1.1377, "step": 3006 }, { "epoch": 0.40764590252829935, "grad_norm": 5.981695602496739, "learning_rate": 1.3408277549613534e-06, "loss": 1.1357, "step": 3007 }, { "epoch": 0.4077814681759642, "grad_norm": 7.749031686433254, "learning_rate": 1.3404148772450348e-06, "loss": 1.151, "step": 3008 }, { "epoch": 0.4079170338236291, "grad_norm": 5.019088778135379, "learning_rate": 1.340001933882394e-06, "loss": 1.1045, "step": 3009 }, { "epoch": 0.40805259947129396, "grad_norm": 5.585428820887078, "learning_rate": 1.3395889249530642e-06, "loss": 1.1573, "step": 3010 }, { "epoch": 0.40818816511895883, "grad_norm": 13.549320732609893, "learning_rate": 1.339175850536691e-06, "loss": 1.135, "step": 3011 }, { "epoch": 0.40832373076662376, "grad_norm": 4.6691967908254846, "learning_rate": 1.338762710712932e-06, "loss": 1.1281, "step": 3012 }, { "epoch": 0.40845929641428863, "grad_norm": 5.646516931983621, "learning_rate": 1.3383495055614586e-06, "loss": 1.1331, "step": 3013 }, { "epoch": 0.4085948620619535, "grad_norm": 8.349859145069923, "learning_rate": 1.3379362351619537e-06, "loss": 1.1495, "step": 3014 }, { "epoch": 0.40873042770961837, "grad_norm": 5.756287587785424, "learning_rate": 1.3375228995941132e-06, "loss": 1.1678, "step": 3015 }, { "epoch": 0.40886599335728324, "grad_norm": 6.255193119745922, "learning_rate": 1.337109498937646e-06, "loss": 1.161, "step": 3016 }, { "epoch": 0.40900155900494817, "grad_norm": 3.538294061467963, "learning_rate": 1.3366960332722728e-06, "loss": 1.1043, "step": 3017 }, { "epoch": 0.40913712465261304, "grad_norm": 4.2355914824517065, "learning_rate": 1.3362825026777272e-06, "loss": 1.1615, "step": 3018 }, { "epoch": 0.4092726903002779, "grad_norm": 8.97631705370121, "learning_rate": 1.3358689072337554e-06, "loss": 1.1305, "step": 3019 }, { "epoch": 0.4094082559479428, "grad_norm": 7.027707262817496, "learning_rate": 1.3354552470201161e-06, "loss": 1.1006, "step": 3020 }, { "epoch": 0.40954382159560765, "grad_norm": 6.053695530016198, "learning_rate": 1.3350415221165805e-06, "loss": 1.1346, "step": 3021 }, { "epoch": 0.4096793872432726, "grad_norm": 13.35510922710804, "learning_rate": 1.3346277326029317e-06, "loss": 1.1328, "step": 3022 }, { "epoch": 0.40981495289093745, "grad_norm": 5.378381241706518, "learning_rate": 1.3342138785589666e-06, "loss": 1.1135, "step": 3023 }, { "epoch": 0.4099505185386023, "grad_norm": 4.4791169857605455, "learning_rate": 1.3337999600644928e-06, "loss": 1.1655, "step": 3024 }, { "epoch": 0.4100860841862672, "grad_norm": 8.864598953900398, "learning_rate": 1.3333859771993315e-06, "loss": 1.1266, "step": 3025 }, { "epoch": 0.41022164983393206, "grad_norm": 6.356355047628261, "learning_rate": 1.332971930043316e-06, "loss": 1.1065, "step": 3026 }, { "epoch": 0.410357215481597, "grad_norm": 11.116261784384237, "learning_rate": 1.3325578186762923e-06, "loss": 1.141, "step": 3027 }, { "epoch": 0.41049278112926185, "grad_norm": 12.185415107989627, "learning_rate": 1.3321436431781183e-06, "loss": 1.1373, "step": 3028 }, { "epoch": 0.4106283467769267, "grad_norm": 5.60447255498957, "learning_rate": 1.3317294036286644e-06, "loss": 1.0917, "step": 3029 }, { "epoch": 0.4107639124245916, "grad_norm": 7.133794061165845, "learning_rate": 1.3313151001078135e-06, "loss": 1.1029, "step": 3030 }, { "epoch": 0.41089947807225646, "grad_norm": 4.784774750405692, "learning_rate": 1.3309007326954608e-06, "loss": 1.1058, "step": 3031 }, { "epoch": 0.4110350437199214, "grad_norm": 4.903815431575502, "learning_rate": 1.330486301471514e-06, "loss": 1.1557, "step": 3032 }, { "epoch": 0.41117060936758626, "grad_norm": 6.115603387943093, "learning_rate": 1.3300718065158924e-06, "loss": 1.0965, "step": 3033 }, { "epoch": 0.41130617501525113, "grad_norm": 5.766116272757199, "learning_rate": 1.3296572479085284e-06, "loss": 1.1984, "step": 3034 }, { "epoch": 0.411441740662916, "grad_norm": 18.177205251068827, "learning_rate": 1.3292426257293668e-06, "loss": 1.1097, "step": 3035 }, { "epoch": 0.4115773063105809, "grad_norm": 4.606215016265659, "learning_rate": 1.3288279400583631e-06, "loss": 1.1562, "step": 3036 }, { "epoch": 0.4117128719582458, "grad_norm": 4.759772400452665, "learning_rate": 1.3284131909754868e-06, "loss": 1.1375, "step": 3037 }, { "epoch": 0.41184843760591067, "grad_norm": 6.335316702963394, "learning_rate": 1.3279983785607192e-06, "loss": 1.1283, "step": 3038 }, { "epoch": 0.41198400325357554, "grad_norm": 5.451403404310027, "learning_rate": 1.327583502894053e-06, "loss": 1.101, "step": 3039 }, { "epoch": 0.4121195689012404, "grad_norm": 5.188488358961294, "learning_rate": 1.3271685640554943e-06, "loss": 1.1042, "step": 3040 }, { "epoch": 0.41225513454890533, "grad_norm": 7.329062009631597, "learning_rate": 1.3267535621250604e-06, "loss": 1.1249, "step": 3041 }, { "epoch": 0.4123907001965702, "grad_norm": 5.150073224690307, "learning_rate": 1.3263384971827816e-06, "loss": 1.1384, "step": 3042 }, { "epoch": 0.4125262658442351, "grad_norm": 4.866688416773662, "learning_rate": 1.3259233693086993e-06, "loss": 1.1187, "step": 3043 }, { "epoch": 0.41266183149189994, "grad_norm": 7.592270983448029, "learning_rate": 1.3255081785828678e-06, "loss": 1.1401, "step": 3044 }, { "epoch": 0.4127973971395648, "grad_norm": 5.586002134368988, "learning_rate": 1.3250929250853537e-06, "loss": 1.1415, "step": 3045 }, { "epoch": 0.41293296278722974, "grad_norm": 6.215651787777566, "learning_rate": 1.324677608896235e-06, "loss": 1.1216, "step": 3046 }, { "epoch": 0.4130685284348946, "grad_norm": 4.388635727343822, "learning_rate": 1.3242622300956027e-06, "loss": 1.1112, "step": 3047 }, { "epoch": 0.4132040940825595, "grad_norm": 4.993013699827376, "learning_rate": 1.3238467887635583e-06, "loss": 1.1404, "step": 3048 }, { "epoch": 0.41333965973022435, "grad_norm": 9.586808904795902, "learning_rate": 1.3234312849802173e-06, "loss": 1.1369, "step": 3049 }, { "epoch": 0.4134752253778892, "grad_norm": 6.5891212535582016, "learning_rate": 1.323015718825706e-06, "loss": 1.1111, "step": 3050 }, { "epoch": 0.41361079102555415, "grad_norm": 4.643852598076405, "learning_rate": 1.3226000903801632e-06, "loss": 1.1118, "step": 3051 }, { "epoch": 0.413746356673219, "grad_norm": 6.92923125466372, "learning_rate": 1.322184399723739e-06, "loss": 1.1611, "step": 3052 }, { "epoch": 0.4138819223208839, "grad_norm": 5.3499085402025806, "learning_rate": 1.3217686469365967e-06, "loss": 1.1676, "step": 3053 }, { "epoch": 0.41401748796854876, "grad_norm": 5.570835758704831, "learning_rate": 1.3213528320989107e-06, "loss": 1.143, "step": 3054 }, { "epoch": 0.41415305361621363, "grad_norm": 5.541574868646467, "learning_rate": 1.3209369552908676e-06, "loss": 1.097, "step": 3055 }, { "epoch": 0.41428861926387855, "grad_norm": 10.081852122757118, "learning_rate": 1.320521016592666e-06, "loss": 1.1103, "step": 3056 }, { "epoch": 0.4144241849115434, "grad_norm": 6.713629273202307, "learning_rate": 1.3201050160845164e-06, "loss": 1.129, "step": 3057 }, { "epoch": 0.4145597505592083, "grad_norm": 10.255206112736152, "learning_rate": 1.3196889538466413e-06, "loss": 1.101, "step": 3058 }, { "epoch": 0.41469531620687317, "grad_norm": 6.224676351706396, "learning_rate": 1.319272829959275e-06, "loss": 1.1134, "step": 3059 }, { "epoch": 0.41483088185453804, "grad_norm": 6.578613530697989, "learning_rate": 1.3188566445026635e-06, "loss": 1.1267, "step": 3060 }, { "epoch": 0.41496644750220296, "grad_norm": 12.21987959726148, "learning_rate": 1.3184403975570648e-06, "loss": 1.1117, "step": 3061 }, { "epoch": 0.41510201314986783, "grad_norm": 7.735103606146069, "learning_rate": 1.3180240892027494e-06, "loss": 1.132, "step": 3062 }, { "epoch": 0.4152375787975327, "grad_norm": 7.990241570717287, "learning_rate": 1.3176077195199984e-06, "loss": 1.1131, "step": 3063 }, { "epoch": 0.4153731444451976, "grad_norm": 5.915991186947045, "learning_rate": 1.3171912885891061e-06, "loss": 1.1339, "step": 3064 }, { "epoch": 0.41550871009286244, "grad_norm": 5.844583036140049, "learning_rate": 1.3167747964903775e-06, "loss": 1.1545, "step": 3065 }, { "epoch": 0.41564427574052737, "grad_norm": 9.736428819842178, "learning_rate": 1.3163582433041296e-06, "loss": 1.1179, "step": 3066 }, { "epoch": 0.41577984138819224, "grad_norm": 5.943535255739216, "learning_rate": 1.3159416291106916e-06, "loss": 1.1671, "step": 3067 }, { "epoch": 0.4159154070358571, "grad_norm": 9.593507506707029, "learning_rate": 1.3155249539904049e-06, "loss": 1.1426, "step": 3068 }, { "epoch": 0.416050972683522, "grad_norm": 4.510651888609329, "learning_rate": 1.3151082180236209e-06, "loss": 1.1486, "step": 3069 }, { "epoch": 0.41618653833118685, "grad_norm": 4.711594134970664, "learning_rate": 1.3146914212907042e-06, "loss": 1.114, "step": 3070 }, { "epoch": 0.4163221039788518, "grad_norm": 10.410914067755316, "learning_rate": 1.3142745638720314e-06, "loss": 1.1276, "step": 3071 }, { "epoch": 0.41645766962651665, "grad_norm": 15.669453550051875, "learning_rate": 1.3138576458479893e-06, "loss": 1.1463, "step": 3072 }, { "epoch": 0.4165932352741815, "grad_norm": 6.775646630221745, "learning_rate": 1.3134406672989779e-06, "loss": 1.1067, "step": 3073 }, { "epoch": 0.4167288009218464, "grad_norm": 12.168419308605566, "learning_rate": 1.313023628305408e-06, "loss": 1.1498, "step": 3074 }, { "epoch": 0.4168643665695113, "grad_norm": 5.308573475492243, "learning_rate": 1.3126065289477019e-06, "loss": 1.1692, "step": 3075 }, { "epoch": 0.4169999322171762, "grad_norm": 4.678685210707991, "learning_rate": 1.3121893693062947e-06, "loss": 1.1393, "step": 3076 }, { "epoch": 0.41713549786484105, "grad_norm": 4.894430336861038, "learning_rate": 1.3117721494616319e-06, "loss": 1.1308, "step": 3077 }, { "epoch": 0.4172710635125059, "grad_norm": 4.5077770129041435, "learning_rate": 1.3113548694941708e-06, "loss": 1.0771, "step": 3078 }, { "epoch": 0.4174066291601708, "grad_norm": 6.226193375324401, "learning_rate": 1.3109375294843808e-06, "loss": 1.1085, "step": 3079 }, { "epoch": 0.4175421948078357, "grad_norm": 7.475231428298767, "learning_rate": 1.3105201295127426e-06, "loss": 1.1805, "step": 3080 }, { "epoch": 0.4176777604555006, "grad_norm": 5.90677514548964, "learning_rate": 1.3101026696597487e-06, "loss": 1.1479, "step": 3081 }, { "epoch": 0.41781332610316546, "grad_norm": 6.019534147874039, "learning_rate": 1.3096851500059028e-06, "loss": 1.1066, "step": 3082 }, { "epoch": 0.41794889175083033, "grad_norm": 7.64171382452236, "learning_rate": 1.3092675706317197e-06, "loss": 1.1574, "step": 3083 }, { "epoch": 0.4180844573984952, "grad_norm": 5.0950326874430925, "learning_rate": 1.3088499316177272e-06, "loss": 1.1764, "step": 3084 }, { "epoch": 0.4182200230461601, "grad_norm": 5.022785646839351, "learning_rate": 1.3084322330444635e-06, "loss": 1.098, "step": 3085 }, { "epoch": 0.418355588693825, "grad_norm": 6.364456557372791, "learning_rate": 1.3080144749924782e-06, "loss": 1.12, "step": 3086 }, { "epoch": 0.41849115434148987, "grad_norm": 4.863297332767282, "learning_rate": 1.3075966575423326e-06, "loss": 1.1775, "step": 3087 }, { "epoch": 0.41862671998915474, "grad_norm": 9.890100032589531, "learning_rate": 1.3071787807745996e-06, "loss": 1.1238, "step": 3088 }, { "epoch": 0.4187622856368196, "grad_norm": 6.184424156926125, "learning_rate": 1.3067608447698633e-06, "loss": 1.1069, "step": 3089 }, { "epoch": 0.41889785128448453, "grad_norm": 6.110673105530427, "learning_rate": 1.3063428496087196e-06, "loss": 1.1448, "step": 3090 }, { "epoch": 0.4190334169321494, "grad_norm": 8.720743144966933, "learning_rate": 1.3059247953717758e-06, "loss": 1.1583, "step": 3091 }, { "epoch": 0.4191689825798143, "grad_norm": 6.389523936238805, "learning_rate": 1.3055066821396498e-06, "loss": 1.1461, "step": 3092 }, { "epoch": 0.41930454822747915, "grad_norm": 4.702780885162384, "learning_rate": 1.3050885099929716e-06, "loss": 1.1066, "step": 3093 }, { "epoch": 0.419440113875144, "grad_norm": 5.80830214259063, "learning_rate": 1.3046702790123824e-06, "loss": 1.0909, "step": 3094 }, { "epoch": 0.41957567952280894, "grad_norm": 9.081665730128613, "learning_rate": 1.3042519892785353e-06, "loss": 1.1447, "step": 3095 }, { "epoch": 0.4197112451704738, "grad_norm": 5.225438101672256, "learning_rate": 1.3038336408720932e-06, "loss": 1.1231, "step": 3096 }, { "epoch": 0.4198468108181387, "grad_norm": 9.115456153111019, "learning_rate": 1.303415233873732e-06, "loss": 1.1596, "step": 3097 }, { "epoch": 0.41998237646580355, "grad_norm": 4.510903484637252, "learning_rate": 1.3029967683641378e-06, "loss": 1.1381, "step": 3098 }, { "epoch": 0.4201179421134684, "grad_norm": 6.653758453179639, "learning_rate": 1.3025782444240085e-06, "loss": 1.1349, "step": 3099 }, { "epoch": 0.42025350776113335, "grad_norm": 6.907170047641483, "learning_rate": 1.3021596621340533e-06, "loss": 1.1235, "step": 3100 }, { "epoch": 0.4203890734087982, "grad_norm": 4.5007333926292805, "learning_rate": 1.3017410215749924e-06, "loss": 1.1059, "step": 3101 }, { "epoch": 0.4205246390564631, "grad_norm": 5.964193098407778, "learning_rate": 1.3013223228275571e-06, "loss": 1.1865, "step": 3102 }, { "epoch": 0.42066020470412796, "grad_norm": 7.637604903714012, "learning_rate": 1.3009035659724904e-06, "loss": 1.1471, "step": 3103 }, { "epoch": 0.42079577035179283, "grad_norm": 7.271430001016825, "learning_rate": 1.3004847510905463e-06, "loss": 1.1413, "step": 3104 }, { "epoch": 0.42093133599945776, "grad_norm": 7.800895827848606, "learning_rate": 1.30006587826249e-06, "loss": 1.15, "step": 3105 }, { "epoch": 0.4210669016471226, "grad_norm": 7.1575990225173305, "learning_rate": 1.2996469475690975e-06, "loss": 1.0994, "step": 3106 }, { "epoch": 0.4212024672947875, "grad_norm": 4.598964159855805, "learning_rate": 1.2992279590911563e-06, "loss": 1.1563, "step": 3107 }, { "epoch": 0.42133803294245237, "grad_norm": 8.974943210928098, "learning_rate": 1.298808912909465e-06, "loss": 1.1153, "step": 3108 }, { "epoch": 0.42147359859011724, "grad_norm": 4.657313051507643, "learning_rate": 1.298389809104834e-06, "loss": 1.1554, "step": 3109 }, { "epoch": 0.42160916423778216, "grad_norm": 14.368127718176137, "learning_rate": 1.297970647758083e-06, "loss": 1.1406, "step": 3110 }, { "epoch": 0.42174472988544703, "grad_norm": 8.94322980123252, "learning_rate": 1.2975514289500451e-06, "loss": 1.1296, "step": 3111 }, { "epoch": 0.4218802955331119, "grad_norm": 5.7628188165293, "learning_rate": 1.2971321527615629e-06, "loss": 1.1726, "step": 3112 }, { "epoch": 0.4220158611807768, "grad_norm": 5.655495912880931, "learning_rate": 1.2967128192734902e-06, "loss": 1.1471, "step": 3113 }, { "epoch": 0.4221514268284417, "grad_norm": 5.978759855685248, "learning_rate": 1.2962934285666924e-06, "loss": 1.1466, "step": 3114 }, { "epoch": 0.42228699247610657, "grad_norm": 6.444514786549146, "learning_rate": 1.295873980722046e-06, "loss": 1.1636, "step": 3115 }, { "epoch": 0.42242255812377144, "grad_norm": 7.551020328349644, "learning_rate": 1.2954544758204374e-06, "loss": 1.1474, "step": 3116 }, { "epoch": 0.4225581237714363, "grad_norm": 6.518406742572277, "learning_rate": 1.2950349139427659e-06, "loss": 1.1158, "step": 3117 }, { "epoch": 0.4226936894191012, "grad_norm": 6.371852542885168, "learning_rate": 1.2946152951699398e-06, "loss": 1.1421, "step": 3118 }, { "epoch": 0.4228292550667661, "grad_norm": 7.9227204350979274, "learning_rate": 1.2941956195828797e-06, "loss": 1.1428, "step": 3119 }, { "epoch": 0.422964820714431, "grad_norm": 5.718042847032326, "learning_rate": 1.2937758872625166e-06, "loss": 1.1304, "step": 3120 }, { "epoch": 0.42310038636209585, "grad_norm": 6.132662152488239, "learning_rate": 1.2933560982897924e-06, "loss": 1.1015, "step": 3121 }, { "epoch": 0.4232359520097607, "grad_norm": 5.092840378719257, "learning_rate": 1.2929362527456604e-06, "loss": 1.1438, "step": 3122 }, { "epoch": 0.4233715176574256, "grad_norm": 7.959271587781029, "learning_rate": 1.2925163507110843e-06, "loss": 1.1387, "step": 3123 }, { "epoch": 0.4235070833050905, "grad_norm": 6.173025076921749, "learning_rate": 1.292096392267039e-06, "loss": 1.1261, "step": 3124 }, { "epoch": 0.4236426489527554, "grad_norm": 20.520207995757996, "learning_rate": 1.2916763774945101e-06, "loss": 1.1154, "step": 3125 }, { "epoch": 0.42377821460042026, "grad_norm": 5.04149698421718, "learning_rate": 1.2912563064744938e-06, "loss": 1.136, "step": 3126 }, { "epoch": 0.4239137802480851, "grad_norm": 10.0414123171041, "learning_rate": 1.2908361792879984e-06, "loss": 1.1398, "step": 3127 }, { "epoch": 0.42404934589575, "grad_norm": 7.9043982474436, "learning_rate": 1.2904159960160415e-06, "loss": 1.1201, "step": 3128 }, { "epoch": 0.4241849115434149, "grad_norm": 13.504337150412649, "learning_rate": 1.289995756739652e-06, "loss": 1.1414, "step": 3129 }, { "epoch": 0.4243204771910798, "grad_norm": 7.952905601079424, "learning_rate": 1.2895754615398697e-06, "loss": 1.0824, "step": 3130 }, { "epoch": 0.42445604283874466, "grad_norm": 7.31081446080399, "learning_rate": 1.2891551104977457e-06, "loss": 1.101, "step": 3131 }, { "epoch": 0.42459160848640953, "grad_norm": 6.013990705153432, "learning_rate": 1.2887347036943407e-06, "loss": 1.1324, "step": 3132 }, { "epoch": 0.4247271741340744, "grad_norm": 6.199497590794517, "learning_rate": 1.288314241210728e-06, "loss": 1.1566, "step": 3133 }, { "epoch": 0.42486273978173933, "grad_norm": 5.022915783787234, "learning_rate": 1.2878937231279892e-06, "loss": 1.1414, "step": 3134 }, { "epoch": 0.4249983054294042, "grad_norm": 4.8693692735466785, "learning_rate": 1.2874731495272181e-06, "loss": 1.0866, "step": 3135 }, { "epoch": 0.42513387107706907, "grad_norm": 5.269737656689055, "learning_rate": 1.2870525204895197e-06, "loss": 1.1716, "step": 3136 }, { "epoch": 0.42526943672473394, "grad_norm": 11.309873707296813, "learning_rate": 1.2866318360960084e-06, "loss": 1.117, "step": 3137 }, { "epoch": 0.4254050023723988, "grad_norm": 15.948932455515582, "learning_rate": 1.2862110964278102e-06, "loss": 1.1327, "step": 3138 }, { "epoch": 0.42554056802006374, "grad_norm": 4.8119242717512805, "learning_rate": 1.2857903015660612e-06, "loss": 1.1555, "step": 3139 }, { "epoch": 0.4256761336677286, "grad_norm": 6.241120084664898, "learning_rate": 1.2853694515919082e-06, "loss": 1.1353, "step": 3140 }, { "epoch": 0.4258116993153935, "grad_norm": 18.388954428707024, "learning_rate": 1.2849485465865092e-06, "loss": 1.1308, "step": 3141 }, { "epoch": 0.42594726496305835, "grad_norm": 9.75748388516646, "learning_rate": 1.2845275866310324e-06, "loss": 1.1371, "step": 3142 }, { "epoch": 0.4260828306107232, "grad_norm": 5.111542623053446, "learning_rate": 1.2841065718066563e-06, "loss": 1.114, "step": 3143 }, { "epoch": 0.42621839625838814, "grad_norm": 5.699819799293461, "learning_rate": 1.2836855021945705e-06, "loss": 1.141, "step": 3144 }, { "epoch": 0.426353961906053, "grad_norm": 5.1694237241898, "learning_rate": 1.283264377875975e-06, "loss": 1.1379, "step": 3145 }, { "epoch": 0.4264895275537179, "grad_norm": 9.227423562622723, "learning_rate": 1.2828431989320797e-06, "loss": 1.1039, "step": 3146 }, { "epoch": 0.42662509320138275, "grad_norm": 6.266542046280416, "learning_rate": 1.2824219654441067e-06, "loss": 1.1038, "step": 3147 }, { "epoch": 0.4267606588490476, "grad_norm": 5.622980164399701, "learning_rate": 1.2820006774932866e-06, "loss": 1.1469, "step": 3148 }, { "epoch": 0.42689622449671255, "grad_norm": 8.032453423404359, "learning_rate": 1.281579335160862e-06, "loss": 1.1181, "step": 3149 }, { "epoch": 0.4270317901443774, "grad_norm": 6.217862551978473, "learning_rate": 1.281157938528085e-06, "loss": 1.1088, "step": 3150 }, { "epoch": 0.4271673557920423, "grad_norm": 4.668555074258723, "learning_rate": 1.280736487676219e-06, "loss": 1.1313, "step": 3151 }, { "epoch": 0.42730292143970716, "grad_norm": 6.498603291732299, "learning_rate": 1.2803149826865375e-06, "loss": 1.1246, "step": 3152 }, { "epoch": 0.4274384870873721, "grad_norm": 9.331859604893125, "learning_rate": 1.279893423640324e-06, "loss": 1.1213, "step": 3153 }, { "epoch": 0.42757405273503696, "grad_norm": 6.161881029731658, "learning_rate": 1.2794718106188734e-06, "loss": 1.168, "step": 3154 }, { "epoch": 0.42770961838270183, "grad_norm": 5.299022615075627, "learning_rate": 1.27905014370349e-06, "loss": 1.1506, "step": 3155 }, { "epoch": 0.4278451840303667, "grad_norm": 6.1327062240836625, "learning_rate": 1.2786284229754892e-06, "loss": 1.1108, "step": 3156 }, { "epoch": 0.42798074967803157, "grad_norm": 4.9913351840726, "learning_rate": 1.2782066485161961e-06, "loss": 1.1239, "step": 3157 }, { "epoch": 0.4281163153256965, "grad_norm": 6.067344969672752, "learning_rate": 1.2777848204069473e-06, "loss": 1.1559, "step": 3158 }, { "epoch": 0.42825188097336137, "grad_norm": 6.5395651940616935, "learning_rate": 1.2773629387290883e-06, "loss": 1.117, "step": 3159 }, { "epoch": 0.42838744662102624, "grad_norm": 4.878004673419701, "learning_rate": 1.276941003563976e-06, "loss": 1.0904, "step": 3160 }, { "epoch": 0.4285230122686911, "grad_norm": 5.162098897015877, "learning_rate": 1.276519014992977e-06, "loss": 1.1513, "step": 3161 }, { "epoch": 0.428658577916356, "grad_norm": 5.480036995315611, "learning_rate": 1.276096973097469e-06, "loss": 1.0926, "step": 3162 }, { "epoch": 0.4287941435640209, "grad_norm": 4.7712619752202325, "learning_rate": 1.275674877958839e-06, "loss": 1.1302, "step": 3163 }, { "epoch": 0.4289297092116858, "grad_norm": 5.683498104952013, "learning_rate": 1.2752527296584847e-06, "loss": 1.1264, "step": 3164 }, { "epoch": 0.42906527485935064, "grad_norm": 7.130239704105728, "learning_rate": 1.2748305282778142e-06, "loss": 1.1085, "step": 3165 }, { "epoch": 0.4292008405070155, "grad_norm": 19.157853888435675, "learning_rate": 1.2744082738982457e-06, "loss": 1.1278, "step": 3166 }, { "epoch": 0.4293364061546804, "grad_norm": 7.742133090870079, "learning_rate": 1.2739859666012076e-06, "loss": 1.1305, "step": 3167 }, { "epoch": 0.4294719718023453, "grad_norm": 7.684638104042392, "learning_rate": 1.2735636064681387e-06, "loss": 1.1238, "step": 3168 }, { "epoch": 0.4296075374500102, "grad_norm": 10.403309657170885, "learning_rate": 1.2731411935804877e-06, "loss": 1.1182, "step": 3169 }, { "epoch": 0.42974310309767505, "grad_norm": 16.1129890004508, "learning_rate": 1.2727187280197133e-06, "loss": 1.0965, "step": 3170 }, { "epoch": 0.4298786687453399, "grad_norm": 7.895779155095832, "learning_rate": 1.272296209867285e-06, "loss": 1.1357, "step": 3171 }, { "epoch": 0.4300142343930048, "grad_norm": 9.124518063940787, "learning_rate": 1.2718736392046824e-06, "loss": 1.1176, "step": 3172 }, { "epoch": 0.4301498000406697, "grad_norm": 6.178349131283142, "learning_rate": 1.271451016113394e-06, "loss": 1.0983, "step": 3173 }, { "epoch": 0.4302853656883346, "grad_norm": 7.496975757744404, "learning_rate": 1.27102834067492e-06, "loss": 1.1557, "step": 3174 }, { "epoch": 0.43042093133599946, "grad_norm": 8.01744145083237, "learning_rate": 1.2706056129707703e-06, "loss": 1.1512, "step": 3175 }, { "epoch": 0.4305564969836643, "grad_norm": 4.648994736729065, "learning_rate": 1.2701828330824638e-06, "loss": 1.1373, "step": 3176 }, { "epoch": 0.4306920626313292, "grad_norm": 8.363801966856489, "learning_rate": 1.2697600010915306e-06, "loss": 1.1686, "step": 3177 }, { "epoch": 0.4308276282789941, "grad_norm": 6.348856488363784, "learning_rate": 1.2693371170795107e-06, "loss": 1.0929, "step": 3178 }, { "epoch": 0.430963193926659, "grad_norm": 8.652472701681909, "learning_rate": 1.2689141811279536e-06, "loss": 1.1304, "step": 3179 }, { "epoch": 0.43109875957432386, "grad_norm": 8.277948492475952, "learning_rate": 1.2684911933184193e-06, "loss": 1.1327, "step": 3180 }, { "epoch": 0.43123432522198873, "grad_norm": 6.233125124309289, "learning_rate": 1.2680681537324779e-06, "loss": 1.1428, "step": 3181 }, { "epoch": 0.4313698908696536, "grad_norm": 6.2448769429167905, "learning_rate": 1.267645062451709e-06, "loss": 1.1142, "step": 3182 }, { "epoch": 0.43150545651731853, "grad_norm": 6.72238865104783, "learning_rate": 1.2672219195577023e-06, "loss": 1.2013, "step": 3183 }, { "epoch": 0.4316410221649834, "grad_norm": 12.972502648437684, "learning_rate": 1.266798725132058e-06, "loss": 1.1396, "step": 3184 }, { "epoch": 0.43177658781264827, "grad_norm": 4.725590087042657, "learning_rate": 1.2663754792563852e-06, "loss": 1.1077, "step": 3185 }, { "epoch": 0.43191215346031314, "grad_norm": 8.480163196610928, "learning_rate": 1.2659521820123042e-06, "loss": 1.1534, "step": 3186 }, { "epoch": 0.432047719107978, "grad_norm": 7.386781500847957, "learning_rate": 1.265528833481444e-06, "loss": 1.137, "step": 3187 }, { "epoch": 0.43218328475564294, "grad_norm": 5.561283628757799, "learning_rate": 1.2651054337454443e-06, "loss": 1.1187, "step": 3188 }, { "epoch": 0.4323188504033078, "grad_norm": 14.071484874985314, "learning_rate": 1.2646819828859545e-06, "loss": 1.1354, "step": 3189 }, { "epoch": 0.4324544160509727, "grad_norm": 7.196716413224134, "learning_rate": 1.2642584809846333e-06, "loss": 1.1305, "step": 3190 }, { "epoch": 0.43258998169863755, "grad_norm": 7.471610163987864, "learning_rate": 1.2638349281231503e-06, "loss": 1.1539, "step": 3191 }, { "epoch": 0.4327255473463024, "grad_norm": 10.17773836404869, "learning_rate": 1.2634113243831836e-06, "loss": 1.1668, "step": 3192 }, { "epoch": 0.43286111299396735, "grad_norm": 6.086168230709986, "learning_rate": 1.2629876698464223e-06, "loss": 1.1046, "step": 3193 }, { "epoch": 0.4329966786416322, "grad_norm": 3.815759260101911, "learning_rate": 1.2625639645945652e-06, "loss": 1.1362, "step": 3194 }, { "epoch": 0.4331322442892971, "grad_norm": 5.439703003170956, "learning_rate": 1.2621402087093195e-06, "loss": 1.138, "step": 3195 }, { "epoch": 0.43326780993696196, "grad_norm": 11.500883320907297, "learning_rate": 1.261716402272404e-06, "loss": 1.1335, "step": 3196 }, { "epoch": 0.4334033755846269, "grad_norm": 9.678861344572807, "learning_rate": 1.2612925453655462e-06, "loss": 1.1198, "step": 3197 }, { "epoch": 0.43353894123229175, "grad_norm": 7.689928504466703, "learning_rate": 1.2608686380704838e-06, "loss": 1.0747, "step": 3198 }, { "epoch": 0.4336745068799566, "grad_norm": 6.25606529098209, "learning_rate": 1.2604446804689635e-06, "loss": 1.1265, "step": 3199 }, { "epoch": 0.4338100725276215, "grad_norm": 8.522244509674938, "learning_rate": 1.2600206726427422e-06, "loss": 1.1219, "step": 3200 }, { "epoch": 0.43394563817528636, "grad_norm": 5.993230843657443, "learning_rate": 1.2595966146735868e-06, "loss": 1.1468, "step": 3201 }, { "epoch": 0.4340812038229513, "grad_norm": 8.092675672754101, "learning_rate": 1.2591725066432734e-06, "loss": 1.1226, "step": 3202 }, { "epoch": 0.43421676947061616, "grad_norm": 6.3951822073696905, "learning_rate": 1.258748348633588e-06, "loss": 1.1067, "step": 3203 }, { "epoch": 0.43435233511828103, "grad_norm": 5.041908109065967, "learning_rate": 1.2583241407263259e-06, "loss": 1.134, "step": 3204 }, { "epoch": 0.4344879007659459, "grad_norm": 6.0976177726679515, "learning_rate": 1.2578998830032924e-06, "loss": 1.1461, "step": 3205 }, { "epoch": 0.43462346641361077, "grad_norm": 4.946867169136148, "learning_rate": 1.257475575546302e-06, "loss": 1.1196, "step": 3206 }, { "epoch": 0.4347590320612757, "grad_norm": 6.438976905376317, "learning_rate": 1.2570512184371796e-06, "loss": 1.1091, "step": 3207 }, { "epoch": 0.43489459770894057, "grad_norm": 4.77561802816255, "learning_rate": 1.2566268117577583e-06, "loss": 1.1374, "step": 3208 }, { "epoch": 0.43503016335660544, "grad_norm": 12.78667973790552, "learning_rate": 1.2562023555898823e-06, "loss": 1.1354, "step": 3209 }, { "epoch": 0.4351657290042703, "grad_norm": 6.9279741652694184, "learning_rate": 1.2557778500154044e-06, "loss": 1.118, "step": 3210 }, { "epoch": 0.4353012946519352, "grad_norm": 5.733064260606424, "learning_rate": 1.2553532951161868e-06, "loss": 1.1575, "step": 3211 }, { "epoch": 0.4354368602996001, "grad_norm": 5.127412200194971, "learning_rate": 1.2549286909741024e-06, "loss": 1.0733, "step": 3212 }, { "epoch": 0.435572425947265, "grad_norm": 6.0617212784940575, "learning_rate": 1.254504037671032e-06, "loss": 1.1577, "step": 3213 }, { "epoch": 0.43570799159492984, "grad_norm": 6.172022119838969, "learning_rate": 1.2540793352888667e-06, "loss": 1.1123, "step": 3214 }, { "epoch": 0.4358435572425947, "grad_norm": 6.120627071325605, "learning_rate": 1.2536545839095072e-06, "loss": 1.1582, "step": 3215 }, { "epoch": 0.4359791228902596, "grad_norm": 8.978821335923877, "learning_rate": 1.2532297836148636e-06, "loss": 1.0993, "step": 3216 }, { "epoch": 0.4361146885379245, "grad_norm": 8.737959360850306, "learning_rate": 1.2528049344868553e-06, "loss": 1.133, "step": 3217 }, { "epoch": 0.4362502541855894, "grad_norm": 5.340486654096328, "learning_rate": 1.2523800366074104e-06, "loss": 1.1398, "step": 3218 }, { "epoch": 0.43638581983325425, "grad_norm": 4.685819831763947, "learning_rate": 1.251955090058468e-06, "loss": 1.1467, "step": 3219 }, { "epoch": 0.4365213854809191, "grad_norm": 7.313679658748579, "learning_rate": 1.251530094921975e-06, "loss": 1.161, "step": 3220 }, { "epoch": 0.436656951128584, "grad_norm": 7.44799439282618, "learning_rate": 1.2511050512798889e-06, "loss": 1.0984, "step": 3221 }, { "epoch": 0.4367925167762489, "grad_norm": 6.372327991272596, "learning_rate": 1.2506799592141754e-06, "loss": 1.0995, "step": 3222 }, { "epoch": 0.4369280824239138, "grad_norm": 5.640991374062286, "learning_rate": 1.2502548188068109e-06, "loss": 1.141, "step": 3223 }, { "epoch": 0.43706364807157866, "grad_norm": 5.62490818706577, "learning_rate": 1.24982963013978e-06, "loss": 1.0998, "step": 3224 }, { "epoch": 0.43719921371924353, "grad_norm": 6.032000663601148, "learning_rate": 1.2494043932950768e-06, "loss": 1.1426, "step": 3225 }, { "epoch": 0.4373347793669084, "grad_norm": 6.0319156302567425, "learning_rate": 1.248979108354705e-06, "loss": 1.1257, "step": 3226 }, { "epoch": 0.4374703450145733, "grad_norm": 6.84949951683651, "learning_rate": 1.2485537754006776e-06, "loss": 1.0926, "step": 3227 }, { "epoch": 0.4376059106622382, "grad_norm": 6.85543492549992, "learning_rate": 1.2481283945150164e-06, "loss": 1.1464, "step": 3228 }, { "epoch": 0.43774147630990307, "grad_norm": 6.073089451930092, "learning_rate": 1.2477029657797531e-06, "loss": 1.1452, "step": 3229 }, { "epoch": 0.43787704195756794, "grad_norm": 6.17521440493632, "learning_rate": 1.247277489276928e-06, "loss": 1.16, "step": 3230 }, { "epoch": 0.4380126076052328, "grad_norm": 6.320225886999974, "learning_rate": 1.2468519650885912e-06, "loss": 1.1447, "step": 3231 }, { "epoch": 0.43814817325289773, "grad_norm": 5.384221445435883, "learning_rate": 1.2464263932968012e-06, "loss": 1.1186, "step": 3232 }, { "epoch": 0.4382837389005626, "grad_norm": 5.655373640422454, "learning_rate": 1.2460007739836265e-06, "loss": 1.1423, "step": 3233 }, { "epoch": 0.4384193045482275, "grad_norm": 9.845597117365624, "learning_rate": 1.2455751072311443e-06, "loss": 1.1117, "step": 3234 }, { "epoch": 0.43855487019589234, "grad_norm": 5.208376104496644, "learning_rate": 1.245149393121441e-06, "loss": 1.1536, "step": 3235 }, { "epoch": 0.43869043584355727, "grad_norm": 34.33150858502042, "learning_rate": 1.2447236317366124e-06, "loss": 1.1302, "step": 3236 }, { "epoch": 0.43882600149122214, "grad_norm": 6.240768654161426, "learning_rate": 1.2442978231587633e-06, "loss": 1.1112, "step": 3237 }, { "epoch": 0.438961567138887, "grad_norm": 5.781998966954475, "learning_rate": 1.2438719674700073e-06, "loss": 1.1325, "step": 3238 }, { "epoch": 0.4390971327865519, "grad_norm": 6.159751426893348, "learning_rate": 1.2434460647524675e-06, "loss": 1.0971, "step": 3239 }, { "epoch": 0.43923269843421675, "grad_norm": 5.420195217483647, "learning_rate": 1.2430201150882755e-06, "loss": 1.108, "step": 3240 }, { "epoch": 0.4393682640818817, "grad_norm": 5.691590585356758, "learning_rate": 1.2425941185595726e-06, "loss": 1.1671, "step": 3241 }, { "epoch": 0.43950382972954655, "grad_norm": 10.33512850288376, "learning_rate": 1.2421680752485092e-06, "loss": 1.1221, "step": 3242 }, { "epoch": 0.4396393953772114, "grad_norm": 5.64087009416511, "learning_rate": 1.241741985237244e-06, "loss": 1.0879, "step": 3243 }, { "epoch": 0.4397749610248763, "grad_norm": 6.206492651948602, "learning_rate": 1.241315848607945e-06, "loss": 1.151, "step": 3244 }, { "epoch": 0.43991052667254116, "grad_norm": 8.669137574285074, "learning_rate": 1.2408896654427894e-06, "loss": 1.1217, "step": 3245 }, { "epoch": 0.4400460923202061, "grad_norm": 7.266796262555917, "learning_rate": 1.2404634358239632e-06, "loss": 1.1737, "step": 3246 }, { "epoch": 0.44018165796787095, "grad_norm": 5.122464773355301, "learning_rate": 1.2400371598336617e-06, "loss": 1.1666, "step": 3247 }, { "epoch": 0.4403172236155358, "grad_norm": 7.254516382162455, "learning_rate": 1.2396108375540885e-06, "loss": 1.1382, "step": 3248 }, { "epoch": 0.4404527892632007, "grad_norm": 4.30320634944668, "learning_rate": 1.2391844690674567e-06, "loss": 1.1362, "step": 3249 }, { "epoch": 0.44058835491086557, "grad_norm": 5.528427986269313, "learning_rate": 1.2387580544559881e-06, "loss": 1.1205, "step": 3250 }, { "epoch": 0.4407239205585305, "grad_norm": 5.385686735676245, "learning_rate": 1.2383315938019132e-06, "loss": 1.1306, "step": 3251 }, { "epoch": 0.44085948620619536, "grad_norm": 4.746417449545714, "learning_rate": 1.2379050871874719e-06, "loss": 1.1534, "step": 3252 }, { "epoch": 0.44099505185386023, "grad_norm": 4.536400277565008, "learning_rate": 1.2374785346949125e-06, "loss": 1.0934, "step": 3253 }, { "epoch": 0.4411306175015251, "grad_norm": 4.253795311035701, "learning_rate": 1.2370519364064919e-06, "loss": 1.1222, "step": 3254 }, { "epoch": 0.44126618314918997, "grad_norm": 5.798752084789247, "learning_rate": 1.2366252924044767e-06, "loss": 1.1378, "step": 3255 }, { "epoch": 0.4414017487968549, "grad_norm": 13.88657362381247, "learning_rate": 1.236198602771142e-06, "loss": 1.1301, "step": 3256 }, { "epoch": 0.44153731444451977, "grad_norm": 9.626746360149959, "learning_rate": 1.2357718675887707e-06, "loss": 1.1271, "step": 3257 }, { "epoch": 0.44167288009218464, "grad_norm": 5.936500210528101, "learning_rate": 1.235345086939656e-06, "loss": 1.1057, "step": 3258 }, { "epoch": 0.4418084457398495, "grad_norm": 6.973242988432946, "learning_rate": 1.234918260906099e-06, "loss": 1.1293, "step": 3259 }, { "epoch": 0.4419440113875144, "grad_norm": 6.072995332970267, "learning_rate": 1.2344913895704096e-06, "loss": 1.1191, "step": 3260 }, { "epoch": 0.4420795770351793, "grad_norm": 11.11462013026865, "learning_rate": 1.234064473014907e-06, "loss": 1.1497, "step": 3261 }, { "epoch": 0.4422151426828442, "grad_norm": 10.124990168429477, "learning_rate": 1.2336375113219182e-06, "loss": 1.1435, "step": 3262 }, { "epoch": 0.44235070833050905, "grad_norm": 4.99859401449057, "learning_rate": 1.2332105045737796e-06, "loss": 1.123, "step": 3263 }, { "epoch": 0.4424862739781739, "grad_norm": 3.821071578298574, "learning_rate": 1.2327834528528357e-06, "loss": 1.1133, "step": 3264 }, { "epoch": 0.4426218396258388, "grad_norm": 8.796528451013508, "learning_rate": 1.2323563562414407e-06, "loss": 1.1335, "step": 3265 }, { "epoch": 0.4427574052735037, "grad_norm": 4.6006514614513385, "learning_rate": 1.2319292148219566e-06, "loss": 1.145, "step": 3266 }, { "epoch": 0.4428929709211686, "grad_norm": 5.02980295791182, "learning_rate": 1.2315020286767538e-06, "loss": 1.1367, "step": 3267 }, { "epoch": 0.44302853656883345, "grad_norm": 5.703471320851248, "learning_rate": 1.2310747978882126e-06, "loss": 1.1342, "step": 3268 }, { "epoch": 0.4431641022164983, "grad_norm": 7.810006519827976, "learning_rate": 1.2306475225387203e-06, "loss": 1.1526, "step": 3269 }, { "epoch": 0.4432996678641632, "grad_norm": 4.612968102692829, "learning_rate": 1.2302202027106739e-06, "loss": 1.1158, "step": 3270 }, { "epoch": 0.4434352335118281, "grad_norm": 5.789506383404235, "learning_rate": 1.2297928384864787e-06, "loss": 1.1563, "step": 3271 }, { "epoch": 0.443570799159493, "grad_norm": 4.733111233404886, "learning_rate": 1.2293654299485485e-06, "loss": 1.1183, "step": 3272 }, { "epoch": 0.44370636480715786, "grad_norm": 7.1284173781076765, "learning_rate": 1.2289379771793059e-06, "loss": 1.1314, "step": 3273 }, { "epoch": 0.44384193045482273, "grad_norm": 5.9705770510399665, "learning_rate": 1.2285104802611812e-06, "loss": 1.1377, "step": 3274 }, { "epoch": 0.44397749610248766, "grad_norm": 4.338138873818989, "learning_rate": 1.2280829392766143e-06, "loss": 1.1714, "step": 3275 }, { "epoch": 0.4441130617501525, "grad_norm": 4.9467292868653585, "learning_rate": 1.2276553543080527e-06, "loss": 1.1398, "step": 3276 }, { "epoch": 0.4442486273978174, "grad_norm": 4.1051153006012004, "learning_rate": 1.2272277254379533e-06, "loss": 1.1373, "step": 3277 }, { "epoch": 0.44438419304548227, "grad_norm": 6.035203157878023, "learning_rate": 1.2268000527487803e-06, "loss": 1.1259, "step": 3278 }, { "epoch": 0.44451975869314714, "grad_norm": 5.264137384090931, "learning_rate": 1.2263723363230076e-06, "loss": 1.1247, "step": 3279 }, { "epoch": 0.44465532434081206, "grad_norm": 5.651946264263782, "learning_rate": 1.2259445762431168e-06, "loss": 1.1062, "step": 3280 }, { "epoch": 0.44479088998847693, "grad_norm": 4.398315739230311, "learning_rate": 1.2255167725915981e-06, "loss": 1.1391, "step": 3281 }, { "epoch": 0.4449264556361418, "grad_norm": 5.473477640012857, "learning_rate": 1.2250889254509496e-06, "loss": 1.1375, "step": 3282 }, { "epoch": 0.4450620212838067, "grad_norm": 10.060438856830123, "learning_rate": 1.2246610349036785e-06, "loss": 1.1263, "step": 3283 }, { "epoch": 0.44519758693147155, "grad_norm": 4.556266448851805, "learning_rate": 1.2242331010323005e-06, "loss": 1.1257, "step": 3284 }, { "epoch": 0.44533315257913647, "grad_norm": 6.823524915513537, "learning_rate": 1.2238051239193387e-06, "loss": 1.129, "step": 3285 }, { "epoch": 0.44546871822680134, "grad_norm": 3.918659796609414, "learning_rate": 1.2233771036473255e-06, "loss": 1.1183, "step": 3286 }, { "epoch": 0.4456042838744662, "grad_norm": 4.406279805365834, "learning_rate": 1.2229490402988014e-06, "loss": 1.1292, "step": 3287 }, { "epoch": 0.4457398495221311, "grad_norm": 5.624255889272037, "learning_rate": 1.2225209339563143e-06, "loss": 1.124, "step": 3288 }, { "epoch": 0.44587541516979595, "grad_norm": 4.3498964550253705, "learning_rate": 1.2220927847024218e-06, "loss": 1.1544, "step": 3289 }, { "epoch": 0.4460109808174609, "grad_norm": 5.193554715470567, "learning_rate": 1.2216645926196886e-06, "loss": 1.1442, "step": 3290 }, { "epoch": 0.44614654646512575, "grad_norm": 4.890248723148548, "learning_rate": 1.2212363577906889e-06, "loss": 1.0905, "step": 3291 }, { "epoch": 0.4462821121127906, "grad_norm": 11.059395531943634, "learning_rate": 1.2208080802980037e-06, "loss": 1.0977, "step": 3292 }, { "epoch": 0.4464176777604555, "grad_norm": 5.565397501343814, "learning_rate": 1.220379760224223e-06, "loss": 1.142, "step": 3293 }, { "epoch": 0.44655324340812036, "grad_norm": 12.62862972416622, "learning_rate": 1.2199513976519451e-06, "loss": 1.1166, "step": 3294 }, { "epoch": 0.4466888090557853, "grad_norm": 5.897348266493272, "learning_rate": 1.2195229926637764e-06, "loss": 1.1466, "step": 3295 }, { "epoch": 0.44682437470345016, "grad_norm": 6.292786120294747, "learning_rate": 1.2190945453423315e-06, "loss": 1.0768, "step": 3296 }, { "epoch": 0.446959940351115, "grad_norm": 12.149507916886941, "learning_rate": 1.2186660557702328e-06, "loss": 1.1659, "step": 3297 }, { "epoch": 0.4470955059987799, "grad_norm": 6.853112249650174, "learning_rate": 1.2182375240301114e-06, "loss": 1.0979, "step": 3298 }, { "epoch": 0.44723107164644477, "grad_norm": 4.671891804327987, "learning_rate": 1.217808950204606e-06, "loss": 1.148, "step": 3299 }, { "epoch": 0.4473666372941097, "grad_norm": 7.488030818401849, "learning_rate": 1.217380334376364e-06, "loss": 1.1282, "step": 3300 }, { "epoch": 0.44750220294177456, "grad_norm": 7.320176481785205, "learning_rate": 1.2169516766280404e-06, "loss": 1.1632, "step": 3301 }, { "epoch": 0.44763776858943943, "grad_norm": 6.037729694324678, "learning_rate": 1.2165229770422986e-06, "loss": 1.1543, "step": 3302 }, { "epoch": 0.4477733342371043, "grad_norm": 5.785924204065225, "learning_rate": 1.2160942357018096e-06, "loss": 1.1275, "step": 3303 }, { "epoch": 0.4479088998847692, "grad_norm": 4.898788672381491, "learning_rate": 1.215665452689253e-06, "loss": 1.105, "step": 3304 }, { "epoch": 0.4480444655324341, "grad_norm": 5.1405038789634, "learning_rate": 1.2152366280873163e-06, "loss": 1.1492, "step": 3305 }, { "epoch": 0.44818003118009897, "grad_norm": 4.098220386248088, "learning_rate": 1.2148077619786948e-06, "loss": 1.1409, "step": 3306 }, { "epoch": 0.44831559682776384, "grad_norm": 12.060157400604941, "learning_rate": 1.214378854446092e-06, "loss": 1.1305, "step": 3307 }, { "epoch": 0.4484511624754287, "grad_norm": 7.794099566817304, "learning_rate": 1.2139499055722193e-06, "loss": 1.1525, "step": 3308 }, { "epoch": 0.4485867281230936, "grad_norm": 6.722902548715125, "learning_rate": 1.213520915439796e-06, "loss": 1.0591, "step": 3309 }, { "epoch": 0.4487222937707585, "grad_norm": 4.200690103136644, "learning_rate": 1.2130918841315496e-06, "loss": 1.1295, "step": 3310 }, { "epoch": 0.4488578594184234, "grad_norm": 4.886655312980908, "learning_rate": 1.2126628117302156e-06, "loss": 1.1156, "step": 3311 }, { "epoch": 0.44899342506608825, "grad_norm": 6.321849886970733, "learning_rate": 1.212233698318537e-06, "loss": 1.1446, "step": 3312 }, { "epoch": 0.4491289907137531, "grad_norm": 3.996725129728276, "learning_rate": 1.2118045439792648e-06, "loss": 1.1326, "step": 3313 }, { "epoch": 0.44926455636141804, "grad_norm": 4.210464157453001, "learning_rate": 1.2113753487951584e-06, "loss": 1.1123, "step": 3314 }, { "epoch": 0.4494001220090829, "grad_norm": 5.687203820527144, "learning_rate": 1.2109461128489842e-06, "loss": 1.1244, "step": 3315 }, { "epoch": 0.4495356876567478, "grad_norm": 4.392126193164462, "learning_rate": 1.2105168362235176e-06, "loss": 1.1222, "step": 3316 }, { "epoch": 0.44967125330441265, "grad_norm": 3.5527330242461264, "learning_rate": 1.2100875190015405e-06, "loss": 1.1079, "step": 3317 }, { "epoch": 0.4498068189520775, "grad_norm": 5.937411132524683, "learning_rate": 1.2096581612658438e-06, "loss": 1.1374, "step": 3318 }, { "epoch": 0.44994238459974245, "grad_norm": 4.1685157203972, "learning_rate": 1.2092287630992257e-06, "loss": 1.1558, "step": 3319 }, { "epoch": 0.4500779502474073, "grad_norm": 4.865499649832769, "learning_rate": 1.208799324584492e-06, "loss": 1.0969, "step": 3320 }, { "epoch": 0.4502135158950722, "grad_norm": 5.267176319036853, "learning_rate": 1.2083698458044572e-06, "loss": 1.1458, "step": 3321 }, { "epoch": 0.45034908154273706, "grad_norm": 12.774960220395744, "learning_rate": 1.207940326841942e-06, "loss": 1.1213, "step": 3322 }, { "epoch": 0.45048464719040193, "grad_norm": 4.771450770840541, "learning_rate": 1.2075107677797763e-06, "loss": 1.1064, "step": 3323 }, { "epoch": 0.45062021283806686, "grad_norm": 4.73374683650834, "learning_rate": 1.2070811687007969e-06, "loss": 1.123, "step": 3324 }, { "epoch": 0.45075577848573173, "grad_norm": 3.8805358302523216, "learning_rate": 1.2066515296878488e-06, "loss": 1.1482, "step": 3325 }, { "epoch": 0.4508913441333966, "grad_norm": 4.165995235494357, "learning_rate": 1.2062218508237845e-06, "loss": 1.1609, "step": 3326 }, { "epoch": 0.45102690978106147, "grad_norm": 3.960856789102782, "learning_rate": 1.2057921321914638e-06, "loss": 1.0789, "step": 3327 }, { "epoch": 0.45116247542872634, "grad_norm": 8.259379708477871, "learning_rate": 1.205362373873755e-06, "loss": 1.1086, "step": 3328 }, { "epoch": 0.45129804107639127, "grad_norm": 6.031472960854027, "learning_rate": 1.2049325759535334e-06, "loss": 1.1187, "step": 3329 }, { "epoch": 0.45143360672405614, "grad_norm": 5.19457964651156, "learning_rate": 1.2045027385136823e-06, "loss": 1.1107, "step": 3330 }, { "epoch": 0.451569172371721, "grad_norm": 5.8939360338464795, "learning_rate": 1.2040728616370924e-06, "loss": 1.1278, "step": 3331 }, { "epoch": 0.4517047380193859, "grad_norm": 6.428412462157201, "learning_rate": 1.2036429454066616e-06, "loss": 1.1311, "step": 3332 }, { "epoch": 0.45184030366705075, "grad_norm": 4.349352272782414, "learning_rate": 1.2032129899052965e-06, "loss": 1.1703, "step": 3333 }, { "epoch": 0.4519758693147157, "grad_norm": 4.166950862179209, "learning_rate": 1.2027829952159104e-06, "loss": 1.166, "step": 3334 }, { "epoch": 0.45211143496238054, "grad_norm": 4.519888715000602, "learning_rate": 1.2023529614214242e-06, "loss": 1.1019, "step": 3335 }, { "epoch": 0.4522470006100454, "grad_norm": 3.3607951201110713, "learning_rate": 1.2019228886047666e-06, "loss": 1.1365, "step": 3336 }, { "epoch": 0.4523825662577103, "grad_norm": 6.26092559938051, "learning_rate": 1.2014927768488739e-06, "loss": 1.0991, "step": 3337 }, { "epoch": 0.45251813190537515, "grad_norm": 4.044589759009956, "learning_rate": 1.2010626262366896e-06, "loss": 1.1079, "step": 3338 }, { "epoch": 0.4526536975530401, "grad_norm": 6.325208222081865, "learning_rate": 1.2006324368511651e-06, "loss": 1.1465, "step": 3339 }, { "epoch": 0.45278926320070495, "grad_norm": 4.55942751588761, "learning_rate": 1.200202208775259e-06, "loss": 1.1315, "step": 3340 }, { "epoch": 0.4529248288483698, "grad_norm": 5.707273736371441, "learning_rate": 1.1997719420919368e-06, "loss": 1.1267, "step": 3341 }, { "epoch": 0.4530603944960347, "grad_norm": 4.300041396976331, "learning_rate": 1.1993416368841727e-06, "loss": 1.1484, "step": 3342 }, { "epoch": 0.45319596014369956, "grad_norm": 3.828280047097263, "learning_rate": 1.1989112932349473e-06, "loss": 1.1052, "step": 3343 }, { "epoch": 0.4533315257913645, "grad_norm": 6.501566317441756, "learning_rate": 1.1984809112272493e-06, "loss": 1.1353, "step": 3344 }, { "epoch": 0.45346709143902936, "grad_norm": 7.392023632551206, "learning_rate": 1.1980504909440743e-06, "loss": 1.1236, "step": 3345 }, { "epoch": 0.4536026570866942, "grad_norm": 5.096260774361155, "learning_rate": 1.1976200324684253e-06, "loss": 1.1654, "step": 3346 }, { "epoch": 0.4537382227343591, "grad_norm": 6.3700028241874485, "learning_rate": 1.197189535883313e-06, "loss": 1.1345, "step": 3347 }, { "epoch": 0.45387378838202397, "grad_norm": 4.480048547871284, "learning_rate": 1.1967590012717552e-06, "loss": 1.0755, "step": 3348 }, { "epoch": 0.4540093540296889, "grad_norm": 4.365149400729586, "learning_rate": 1.1963284287167772e-06, "loss": 1.1573, "step": 3349 }, { "epoch": 0.45414491967735376, "grad_norm": 4.195619438639918, "learning_rate": 1.1958978183014111e-06, "loss": 1.1381, "step": 3350 }, { "epoch": 0.45428048532501863, "grad_norm": 5.026794073740857, "learning_rate": 1.1954671701086976e-06, "loss": 1.117, "step": 3351 }, { "epoch": 0.4544160509726835, "grad_norm": 4.563126604707908, "learning_rate": 1.195036484221683e-06, "loss": 1.156, "step": 3352 }, { "epoch": 0.45455161662034843, "grad_norm": 5.303378651765072, "learning_rate": 1.194605760723422e-06, "loss": 1.1275, "step": 3353 }, { "epoch": 0.4546871822680133, "grad_norm": 6.1178533584086745, "learning_rate": 1.1941749996969762e-06, "loss": 1.1489, "step": 3354 }, { "epoch": 0.45482274791567817, "grad_norm": 4.405934855645418, "learning_rate": 1.1937442012254144e-06, "loss": 1.1082, "step": 3355 }, { "epoch": 0.45495831356334304, "grad_norm": 5.096957129376892, "learning_rate": 1.1933133653918126e-06, "loss": 1.1366, "step": 3356 }, { "epoch": 0.4550938792110079, "grad_norm": 5.6926345182734615, "learning_rate": 1.1928824922792543e-06, "loss": 1.1393, "step": 3357 }, { "epoch": 0.45522944485867284, "grad_norm": 5.238061992506139, "learning_rate": 1.1924515819708298e-06, "loss": 1.1493, "step": 3358 }, { "epoch": 0.4553650105063377, "grad_norm": 4.905961720002976, "learning_rate": 1.1920206345496372e-06, "loss": 1.1459, "step": 3359 }, { "epoch": 0.4555005761540026, "grad_norm": 6.84925007164683, "learning_rate": 1.1915896500987809e-06, "loss": 1.0846, "step": 3360 }, { "epoch": 0.45563614180166745, "grad_norm": 3.7239991059312922, "learning_rate": 1.1911586287013725e-06, "loss": 1.1206, "step": 3361 }, { "epoch": 0.4557717074493323, "grad_norm": 7.3696030442684775, "learning_rate": 1.1907275704405316e-06, "loss": 1.152, "step": 3362 }, { "epoch": 0.45590727309699725, "grad_norm": 5.797603094075113, "learning_rate": 1.1902964753993842e-06, "loss": 1.1266, "step": 3363 }, { "epoch": 0.4560428387446621, "grad_norm": 4.676298391507037, "learning_rate": 1.1898653436610637e-06, "loss": 1.1184, "step": 3364 }, { "epoch": 0.456178404392327, "grad_norm": 4.59238261197654, "learning_rate": 1.1894341753087105e-06, "loss": 1.0943, "step": 3365 }, { "epoch": 0.45631397003999186, "grad_norm": 5.206432910309266, "learning_rate": 1.1890029704254716e-06, "loss": 1.1147, "step": 3366 }, { "epoch": 0.4564495356876567, "grad_norm": 5.135431987543262, "learning_rate": 1.188571729094502e-06, "loss": 1.0938, "step": 3367 }, { "epoch": 0.45658510133532165, "grad_norm": 6.947622203492716, "learning_rate": 1.1881404513989629e-06, "loss": 1.1205, "step": 3368 }, { "epoch": 0.4567206669829865, "grad_norm": 4.9271331237477485, "learning_rate": 1.1877091374220228e-06, "loss": 1.1543, "step": 3369 }, { "epoch": 0.4568562326306514, "grad_norm": 6.032468873800592, "learning_rate": 1.1872777872468572e-06, "loss": 1.1156, "step": 3370 }, { "epoch": 0.45699179827831626, "grad_norm": 8.3604922193919, "learning_rate": 1.1868464009566485e-06, "loss": 1.1312, "step": 3371 }, { "epoch": 0.45712736392598113, "grad_norm": 5.676549689612551, "learning_rate": 1.1864149786345868e-06, "loss": 1.1135, "step": 3372 }, { "epoch": 0.45726292957364606, "grad_norm": 5.137915516154086, "learning_rate": 1.1859835203638675e-06, "loss": 1.154, "step": 3373 }, { "epoch": 0.45739849522131093, "grad_norm": 4.996008340688538, "learning_rate": 1.1855520262276943e-06, "loss": 1.0734, "step": 3374 }, { "epoch": 0.4575340608689758, "grad_norm": 5.737317638536702, "learning_rate": 1.1851204963092775e-06, "loss": 1.1021, "step": 3375 }, { "epoch": 0.45766962651664067, "grad_norm": 4.388306496565139, "learning_rate": 1.1846889306918344e-06, "loss": 1.1431, "step": 3376 }, { "epoch": 0.45780519216430554, "grad_norm": 4.2164297657584635, "learning_rate": 1.1842573294585889e-06, "loss": 1.153, "step": 3377 }, { "epoch": 0.45794075781197047, "grad_norm": 3.6558080292342345, "learning_rate": 1.1838256926927718e-06, "loss": 1.0733, "step": 3378 }, { "epoch": 0.45807632345963534, "grad_norm": 4.744436982734307, "learning_rate": 1.1833940204776208e-06, "loss": 1.1549, "step": 3379 }, { "epoch": 0.4582118891073002, "grad_norm": 4.714658189880318, "learning_rate": 1.1829623128963807e-06, "loss": 1.1591, "step": 3380 }, { "epoch": 0.4583474547549651, "grad_norm": 3.88717774408736, "learning_rate": 1.1825305700323025e-06, "loss": 1.1269, "step": 3381 }, { "epoch": 0.45848302040262995, "grad_norm": 4.48316053901375, "learning_rate": 1.182098791968645e-06, "loss": 1.0976, "step": 3382 }, { "epoch": 0.4586185860502949, "grad_norm": 6.594272555048351, "learning_rate": 1.1816669787886727e-06, "loss": 1.0991, "step": 3383 }, { "epoch": 0.45875415169795974, "grad_norm": 4.575901389161572, "learning_rate": 1.1812351305756575e-06, "loss": 1.1434, "step": 3384 }, { "epoch": 0.4588897173456246, "grad_norm": 5.169112664997988, "learning_rate": 1.1808032474128782e-06, "loss": 1.1395, "step": 3385 }, { "epoch": 0.4590252829932895, "grad_norm": 9.174172256515783, "learning_rate": 1.1803713293836198e-06, "loss": 1.1156, "step": 3386 }, { "epoch": 0.45916084864095436, "grad_norm": 7.156230956701056, "learning_rate": 1.179939376571174e-06, "loss": 1.1473, "step": 3387 }, { "epoch": 0.4592964142886193, "grad_norm": 6.783579235247961, "learning_rate": 1.1795073890588401e-06, "loss": 1.1342, "step": 3388 }, { "epoch": 0.45943197993628415, "grad_norm": 6.095234559421291, "learning_rate": 1.179075366929923e-06, "loss": 1.1111, "step": 3389 }, { "epoch": 0.459567545583949, "grad_norm": 4.503094346990304, "learning_rate": 1.1786433102677348e-06, "loss": 1.1217, "step": 3390 }, { "epoch": 0.4597031112316139, "grad_norm": 6.40071900332651, "learning_rate": 1.1782112191555946e-06, "loss": 1.1335, "step": 3391 }, { "epoch": 0.4598386768792788, "grad_norm": 5.843122717580466, "learning_rate": 1.1777790936768272e-06, "loss": 1.1648, "step": 3392 }, { "epoch": 0.4599742425269437, "grad_norm": 5.087087288863347, "learning_rate": 1.1773469339147653e-06, "loss": 1.1066, "step": 3393 }, { "epoch": 0.46010980817460856, "grad_norm": 5.584055253791335, "learning_rate": 1.1769147399527466e-06, "loss": 1.1232, "step": 3394 }, { "epoch": 0.46024537382227343, "grad_norm": 4.676637818871636, "learning_rate": 1.176482511874117e-06, "loss": 1.1552, "step": 3395 }, { "epoch": 0.4603809394699383, "grad_norm": 3.8761929536324855, "learning_rate": 1.1760502497622281e-06, "loss": 1.1426, "step": 3396 }, { "epoch": 0.4605165051176032, "grad_norm": 5.22824850547533, "learning_rate": 1.1756179537004383e-06, "loss": 1.1191, "step": 3397 }, { "epoch": 0.4606520707652681, "grad_norm": 7.302466572828658, "learning_rate": 1.175185623772112e-06, "loss": 1.0978, "step": 3398 }, { "epoch": 0.46078763641293297, "grad_norm": 5.022133017128681, "learning_rate": 1.1747532600606213e-06, "loss": 1.1222, "step": 3399 }, { "epoch": 0.46092320206059784, "grad_norm": 6.5824916076281745, "learning_rate": 1.174320862649344e-06, "loss": 1.1281, "step": 3400 }, { "epoch": 0.4610587677082627, "grad_norm": 3.694010913242481, "learning_rate": 1.173888431621664e-06, "loss": 1.1222, "step": 3401 }, { "epoch": 0.46119433335592763, "grad_norm": 5.073996328877876, "learning_rate": 1.1734559670609727e-06, "loss": 1.0885, "step": 3402 }, { "epoch": 0.4613298990035925, "grad_norm": 4.731969160571848, "learning_rate": 1.1730234690506671e-06, "loss": 1.102, "step": 3403 }, { "epoch": 0.4614654646512574, "grad_norm": 3.9987104921351677, "learning_rate": 1.1725909376741515e-06, "loss": 1.1143, "step": 3404 }, { "epoch": 0.46160103029892224, "grad_norm": 4.688178712803482, "learning_rate": 1.1721583730148356e-06, "loss": 1.1127, "step": 3405 }, { "epoch": 0.4617365959465871, "grad_norm": 4.096566565358854, "learning_rate": 1.1717257751561367e-06, "loss": 1.1211, "step": 3406 }, { "epoch": 0.46187216159425204, "grad_norm": 5.018837767433783, "learning_rate": 1.1712931441814775e-06, "loss": 1.0873, "step": 3407 }, { "epoch": 0.4620077272419169, "grad_norm": 3.926800721992281, "learning_rate": 1.1708604801742877e-06, "loss": 1.1185, "step": 3408 }, { "epoch": 0.4621432928895818, "grad_norm": 5.1379230377780285, "learning_rate": 1.1704277832180027e-06, "loss": 1.1203, "step": 3409 }, { "epoch": 0.46227885853724665, "grad_norm": 5.266432899274433, "learning_rate": 1.1699950533960652e-06, "loss": 1.1206, "step": 3410 }, { "epoch": 0.4624144241849115, "grad_norm": 8.88476885990278, "learning_rate": 1.1695622907919233e-06, "loss": 1.1193, "step": 3411 }, { "epoch": 0.46254998983257645, "grad_norm": 4.574037043702375, "learning_rate": 1.1691294954890323e-06, "loss": 1.1291, "step": 3412 }, { "epoch": 0.4626855554802413, "grad_norm": 4.2739434825630385, "learning_rate": 1.168696667570853e-06, "loss": 1.1288, "step": 3413 }, { "epoch": 0.4628211211279062, "grad_norm": 11.603672419151767, "learning_rate": 1.1682638071208532e-06, "loss": 1.1029, "step": 3414 }, { "epoch": 0.46295668677557106, "grad_norm": 7.675689857295719, "learning_rate": 1.1678309142225062e-06, "loss": 1.1203, "step": 3415 }, { "epoch": 0.46309225242323593, "grad_norm": 7.278445763984161, "learning_rate": 1.1673979889592923e-06, "loss": 1.1259, "step": 3416 }, { "epoch": 0.46322781807090085, "grad_norm": 7.039061598559124, "learning_rate": 1.1669650314146973e-06, "loss": 1.1511, "step": 3417 }, { "epoch": 0.4633633837185657, "grad_norm": 6.076023207678846, "learning_rate": 1.166532041672214e-06, "loss": 1.1441, "step": 3418 }, { "epoch": 0.4634989493662306, "grad_norm": 16.755388624142917, "learning_rate": 1.166099019815341e-06, "loss": 1.0831, "step": 3419 }, { "epoch": 0.46363451501389547, "grad_norm": 5.99331286596274, "learning_rate": 1.1656659659275835e-06, "loss": 1.1294, "step": 3420 }, { "epoch": 0.46377008066156034, "grad_norm": 4.051583535304583, "learning_rate": 1.1652328800924517e-06, "loss": 1.1188, "step": 3421 }, { "epoch": 0.46390564630922526, "grad_norm": 4.70861102813402, "learning_rate": 1.1647997623934636e-06, "loss": 1.0977, "step": 3422 }, { "epoch": 0.46404121195689013, "grad_norm": 5.773107327439053, "learning_rate": 1.164366612914142e-06, "loss": 1.1171, "step": 3423 }, { "epoch": 0.464176777604555, "grad_norm": 7.119969593174533, "learning_rate": 1.1639334317380164e-06, "loss": 1.0919, "step": 3424 }, { "epoch": 0.4643123432522199, "grad_norm": 5.919838685265717, "learning_rate": 1.1635002189486228e-06, "loss": 1.1216, "step": 3425 }, { "epoch": 0.46444790889988474, "grad_norm": 4.724653218633891, "learning_rate": 1.1630669746295022e-06, "loss": 1.1452, "step": 3426 }, { "epoch": 0.46458347454754967, "grad_norm": 4.07367287157445, "learning_rate": 1.1626336988642029e-06, "loss": 1.1215, "step": 3427 }, { "epoch": 0.46471904019521454, "grad_norm": 3.6887817755477843, "learning_rate": 1.1622003917362788e-06, "loss": 1.1133, "step": 3428 }, { "epoch": 0.4648546058428794, "grad_norm": 7.00267581286451, "learning_rate": 1.1617670533292892e-06, "loss": 1.0999, "step": 3429 }, { "epoch": 0.4649901714905443, "grad_norm": 4.903215678604196, "learning_rate": 1.1613336837268001e-06, "loss": 1.1103, "step": 3430 }, { "epoch": 0.4651257371382092, "grad_norm": 5.338217440433534, "learning_rate": 1.1609002830123837e-06, "loss": 1.1418, "step": 3431 }, { "epoch": 0.4652613027858741, "grad_norm": 3.3866388126950824, "learning_rate": 1.1604668512696179e-06, "loss": 1.1592, "step": 3432 }, { "epoch": 0.46539686843353895, "grad_norm": 3.8605155591216853, "learning_rate": 1.1600333885820867e-06, "loss": 1.1104, "step": 3433 }, { "epoch": 0.4655324340812038, "grad_norm": 5.424823226797592, "learning_rate": 1.1595998950333793e-06, "loss": 1.129, "step": 3434 }, { "epoch": 0.4656679997288687, "grad_norm": 6.205650415648533, "learning_rate": 1.159166370707092e-06, "loss": 1.1343, "step": 3435 }, { "epoch": 0.4658035653765336, "grad_norm": 5.0979103536262445, "learning_rate": 1.1587328156868266e-06, "loss": 1.1795, "step": 3436 }, { "epoch": 0.4659391310241985, "grad_norm": 25.470118517186833, "learning_rate": 1.1582992300561906e-06, "loss": 1.1668, "step": 3437 }, { "epoch": 0.46607469667186335, "grad_norm": 4.034710646982364, "learning_rate": 1.157865613898798e-06, "loss": 1.1117, "step": 3438 }, { "epoch": 0.4662102623195282, "grad_norm": 4.480790682174862, "learning_rate": 1.1574319672982673e-06, "loss": 1.0965, "step": 3439 }, { "epoch": 0.4663458279671931, "grad_norm": 4.296250497335116, "learning_rate": 1.1569982903382247e-06, "loss": 1.118, "step": 3440 }, { "epoch": 0.466481393614858, "grad_norm": 4.0932780277240965, "learning_rate": 1.156564583102301e-06, "loss": 1.0845, "step": 3441 }, { "epoch": 0.4666169592625229, "grad_norm": 5.341322101561189, "learning_rate": 1.1561308456741336e-06, "loss": 1.1114, "step": 3442 }, { "epoch": 0.46675252491018776, "grad_norm": 5.077521989673349, "learning_rate": 1.1556970781373648e-06, "loss": 1.134, "step": 3443 }, { "epoch": 0.46688809055785263, "grad_norm": 6.862454325256562, "learning_rate": 1.1552632805756436e-06, "loss": 1.1477, "step": 3444 }, { "epoch": 0.4670236562055175, "grad_norm": 10.326016856604417, "learning_rate": 1.154829453072624e-06, "loss": 1.1469, "step": 3445 }, { "epoch": 0.4671592218531824, "grad_norm": 7.034273984036902, "learning_rate": 1.1543955957119667e-06, "loss": 1.1249, "step": 3446 }, { "epoch": 0.4672947875008473, "grad_norm": 4.744359948936734, "learning_rate": 1.1539617085773373e-06, "loss": 1.0935, "step": 3447 }, { "epoch": 0.46743035314851217, "grad_norm": 3.8369238272932495, "learning_rate": 1.1535277917524079e-06, "loss": 1.134, "step": 3448 }, { "epoch": 0.46756591879617704, "grad_norm": 3.4917292920218705, "learning_rate": 1.153093845320856e-06, "loss": 1.1382, "step": 3449 }, { "epoch": 0.4677014844438419, "grad_norm": 3.820303831919467, "learning_rate": 1.152659869366364e-06, "loss": 1.1139, "step": 3450 }, { "epoch": 0.46783705009150683, "grad_norm": 4.572630287040819, "learning_rate": 1.1522258639726215e-06, "loss": 1.1394, "step": 3451 }, { "epoch": 0.4679726157391717, "grad_norm": 4.339480468814823, "learning_rate": 1.1517918292233226e-06, "loss": 1.1232, "step": 3452 }, { "epoch": 0.4681081813868366, "grad_norm": 6.943818976044756, "learning_rate": 1.1513577652021678e-06, "loss": 1.1219, "step": 3453 }, { "epoch": 0.46824374703450145, "grad_norm": 7.434360557019198, "learning_rate": 1.1509236719928627e-06, "loss": 1.1557, "step": 3454 }, { "epoch": 0.4683793126821663, "grad_norm": 4.420780332154501, "learning_rate": 1.1504895496791185e-06, "loss": 1.1356, "step": 3455 }, { "epoch": 0.46851487832983124, "grad_norm": 4.899986167403883, "learning_rate": 1.1500553983446526e-06, "loss": 1.1504, "step": 3456 }, { "epoch": 0.4686504439774961, "grad_norm": 4.469283163538998, "learning_rate": 1.1496212180731877e-06, "loss": 1.1512, "step": 3457 }, { "epoch": 0.468786009625161, "grad_norm": 9.87582000744964, "learning_rate": 1.149187008948452e-06, "loss": 1.1766, "step": 3458 }, { "epoch": 0.46892157527282585, "grad_norm": 5.879355071687508, "learning_rate": 1.1487527710541794e-06, "loss": 1.1064, "step": 3459 }, { "epoch": 0.4690571409204907, "grad_norm": 4.608280970926042, "learning_rate": 1.1483185044741088e-06, "loss": 1.1066, "step": 3460 }, { "epoch": 0.46919270656815565, "grad_norm": 4.447159142249938, "learning_rate": 1.1478842092919854e-06, "loss": 1.1374, "step": 3461 }, { "epoch": 0.4693282722158205, "grad_norm": 4.77973914293931, "learning_rate": 1.1474498855915596e-06, "loss": 1.0978, "step": 3462 }, { "epoch": 0.4694638378634854, "grad_norm": 3.778433127350483, "learning_rate": 1.1470155334565869e-06, "loss": 1.1475, "step": 3463 }, { "epoch": 0.46959940351115026, "grad_norm": 5.149239322224076, "learning_rate": 1.1465811529708295e-06, "loss": 1.1247, "step": 3464 }, { "epoch": 0.46973496915881513, "grad_norm": 20.09955418650107, "learning_rate": 1.1461467442180537e-06, "loss": 1.1385, "step": 3465 }, { "epoch": 0.46987053480648006, "grad_norm": 5.4507898126237295, "learning_rate": 1.1457123072820319e-06, "loss": 1.1292, "step": 3466 }, { "epoch": 0.4700061004541449, "grad_norm": 5.869852671003192, "learning_rate": 1.1452778422465416e-06, "loss": 1.1249, "step": 3467 }, { "epoch": 0.4701416661018098, "grad_norm": 4.375165852130629, "learning_rate": 1.1448433491953665e-06, "loss": 1.1398, "step": 3468 }, { "epoch": 0.47027723174947467, "grad_norm": 7.080655608366544, "learning_rate": 1.1444088282122945e-06, "loss": 1.148, "step": 3469 }, { "epoch": 0.47041279739713954, "grad_norm": 5.299683163641077, "learning_rate": 1.1439742793811205e-06, "loss": 1.1357, "step": 3470 }, { "epoch": 0.47054836304480446, "grad_norm": 6.2120233081706955, "learning_rate": 1.1435397027856425e-06, "loss": 1.1146, "step": 3471 }, { "epoch": 0.47068392869246933, "grad_norm": 3.889399363616782, "learning_rate": 1.1431050985096663e-06, "loss": 1.1009, "step": 3472 }, { "epoch": 0.4708194943401342, "grad_norm": 8.241582782201505, "learning_rate": 1.142670466637001e-06, "loss": 1.1629, "step": 3473 }, { "epoch": 0.4709550599877991, "grad_norm": 4.225912523475701, "learning_rate": 1.142235807251463e-06, "loss": 1.1116, "step": 3474 }, { "epoch": 0.471090625635464, "grad_norm": 6.126010671613387, "learning_rate": 1.1418011204368717e-06, "loss": 1.1213, "step": 3475 }, { "epoch": 0.47122619128312887, "grad_norm": 6.625375062195554, "learning_rate": 1.1413664062770538e-06, "loss": 1.0949, "step": 3476 }, { "epoch": 0.47136175693079374, "grad_norm": 4.07270005314386, "learning_rate": 1.1409316648558404e-06, "loss": 1.1228, "step": 3477 }, { "epoch": 0.4714973225784586, "grad_norm": 3.3649860411727595, "learning_rate": 1.140496896257068e-06, "loss": 1.1514, "step": 3478 }, { "epoch": 0.4716328882261235, "grad_norm": 11.263646160222097, "learning_rate": 1.140062100564578e-06, "loss": 1.1063, "step": 3479 }, { "epoch": 0.4717684538737884, "grad_norm": 5.111194327149916, "learning_rate": 1.1396272778622175e-06, "loss": 1.1087, "step": 3480 }, { "epoch": 0.4719040195214533, "grad_norm": 5.527784501239378, "learning_rate": 1.1391924282338388e-06, "loss": 1.1174, "step": 3481 }, { "epoch": 0.47203958516911815, "grad_norm": 4.482215670626855, "learning_rate": 1.1387575517632987e-06, "loss": 1.0804, "step": 3482 }, { "epoch": 0.472175150816783, "grad_norm": 6.375293433614654, "learning_rate": 1.1383226485344604e-06, "loss": 1.0924, "step": 3483 }, { "epoch": 0.4723107164644479, "grad_norm": 4.037747940346856, "learning_rate": 1.137887718631191e-06, "loss": 1.1562, "step": 3484 }, { "epoch": 0.4724462821121128, "grad_norm": 4.965628022742793, "learning_rate": 1.1374527621373636e-06, "loss": 1.1251, "step": 3485 }, { "epoch": 0.4725818477597777, "grad_norm": 4.949403862949067, "learning_rate": 1.1370177791368558e-06, "loss": 1.1034, "step": 3486 }, { "epoch": 0.47271741340744255, "grad_norm": 4.694204959801355, "learning_rate": 1.136582769713551e-06, "loss": 1.1262, "step": 3487 }, { "epoch": 0.4728529790551074, "grad_norm": 4.6509581074443895, "learning_rate": 1.136147733951337e-06, "loss": 1.1214, "step": 3488 }, { "epoch": 0.4729885447027723, "grad_norm": 7.379827493448073, "learning_rate": 1.1357126719341076e-06, "loss": 1.1193, "step": 3489 }, { "epoch": 0.4731241103504372, "grad_norm": 7.586110656276199, "learning_rate": 1.1352775837457605e-06, "loss": 1.1019, "step": 3490 }, { "epoch": 0.4732596759981021, "grad_norm": 5.017053142156086, "learning_rate": 1.134842469470199e-06, "loss": 1.1416, "step": 3491 }, { "epoch": 0.47339524164576696, "grad_norm": 4.050571317209004, "learning_rate": 1.1344073291913317e-06, "loss": 1.1503, "step": 3492 }, { "epoch": 0.47353080729343183, "grad_norm": 3.8948266565555487, "learning_rate": 1.133972162993072e-06, "loss": 1.1187, "step": 3493 }, { "epoch": 0.4736663729410967, "grad_norm": 4.913816760425426, "learning_rate": 1.1335369709593382e-06, "loss": 1.0998, "step": 3494 }, { "epoch": 0.47380193858876163, "grad_norm": 17.987564396259693, "learning_rate": 1.1331017531740533e-06, "loss": 1.1078, "step": 3495 }, { "epoch": 0.4739375042364265, "grad_norm": 5.073091522521665, "learning_rate": 1.132666509721146e-06, "loss": 1.1368, "step": 3496 }, { "epoch": 0.47407306988409137, "grad_norm": 4.379460545969394, "learning_rate": 1.1322312406845498e-06, "loss": 1.1252, "step": 3497 }, { "epoch": 0.47420863553175624, "grad_norm": 11.848950253059462, "learning_rate": 1.1317959461482028e-06, "loss": 1.1289, "step": 3498 }, { "epoch": 0.4743442011794211, "grad_norm": 4.235156631191535, "learning_rate": 1.1313606261960475e-06, "loss": 1.1242, "step": 3499 }, { "epoch": 0.47447976682708604, "grad_norm": 5.5458274263282945, "learning_rate": 1.1309252809120324e-06, "loss": 1.1068, "step": 3500 }, { "epoch": 0.4746153324747509, "grad_norm": 5.833119264734957, "learning_rate": 1.1304899103801105e-06, "loss": 1.1442, "step": 3501 }, { "epoch": 0.4747508981224158, "grad_norm": 7.062479367460244, "learning_rate": 1.1300545146842393e-06, "loss": 1.1047, "step": 3502 }, { "epoch": 0.47488646377008065, "grad_norm": 4.733415474990543, "learning_rate": 1.1296190939083815e-06, "loss": 1.1001, "step": 3503 }, { "epoch": 0.4750220294177455, "grad_norm": 3.3614341173696465, "learning_rate": 1.1291836481365045e-06, "loss": 1.1274, "step": 3504 }, { "epoch": 0.47515759506541044, "grad_norm": 6.963486986089347, "learning_rate": 1.128748177452581e-06, "loss": 1.1005, "step": 3505 }, { "epoch": 0.4752931607130753, "grad_norm": 10.360823090343036, "learning_rate": 1.1283126819405873e-06, "loss": 1.1502, "step": 3506 }, { "epoch": 0.4754287263607402, "grad_norm": 4.746955395736472, "learning_rate": 1.127877161684506e-06, "loss": 1.1144, "step": 3507 }, { "epoch": 0.47556429200840505, "grad_norm": 5.179097110292373, "learning_rate": 1.1274416167683234e-06, "loss": 1.1271, "step": 3508 }, { "epoch": 0.4756998576560699, "grad_norm": 3.991667387526601, "learning_rate": 1.127006047276031e-06, "loss": 1.0923, "step": 3509 }, { "epoch": 0.47583542330373485, "grad_norm": 5.855275069116671, "learning_rate": 1.126570453291625e-06, "loss": 1.1205, "step": 3510 }, { "epoch": 0.4759709889513997, "grad_norm": 4.738775519740478, "learning_rate": 1.126134834899106e-06, "loss": 1.1405, "step": 3511 }, { "epoch": 0.4761065545990646, "grad_norm": 4.695768555851801, "learning_rate": 1.1256991921824798e-06, "loss": 1.1468, "step": 3512 }, { "epoch": 0.47624212024672946, "grad_norm": 5.626297095545447, "learning_rate": 1.1252635252257567e-06, "loss": 1.1017, "step": 3513 }, { "epoch": 0.4763776858943944, "grad_norm": 9.623087967370749, "learning_rate": 1.1248278341129516e-06, "loss": 1.0575, "step": 3514 }, { "epoch": 0.47651325154205926, "grad_norm": 3.8858082311045123, "learning_rate": 1.1243921189280838e-06, "loss": 1.1136, "step": 3515 }, { "epoch": 0.4766488171897241, "grad_norm": 4.42317280848375, "learning_rate": 1.1239563797551777e-06, "loss": 1.1274, "step": 3516 }, { "epoch": 0.476784382837389, "grad_norm": 5.008950377666138, "learning_rate": 1.1235206166782622e-06, "loss": 1.1262, "step": 3517 }, { "epoch": 0.47691994848505387, "grad_norm": 7.809692171807839, "learning_rate": 1.1230848297813712e-06, "loss": 1.1042, "step": 3518 }, { "epoch": 0.4770555141327188, "grad_norm": 4.57873264750795, "learning_rate": 1.122649019148542e-06, "loss": 1.1044, "step": 3519 }, { "epoch": 0.47719107978038366, "grad_norm": 4.697708964698666, "learning_rate": 1.122213184863818e-06, "loss": 1.142, "step": 3520 }, { "epoch": 0.47732664542804853, "grad_norm": 6.562620333211296, "learning_rate": 1.1217773270112454e-06, "loss": 1.1717, "step": 3521 }, { "epoch": 0.4774622110757134, "grad_norm": 5.428614005570119, "learning_rate": 1.121341445674877e-06, "loss": 1.0997, "step": 3522 }, { "epoch": 0.4775977767233783, "grad_norm": 4.475049304755516, "learning_rate": 1.1209055409387682e-06, "loss": 1.1069, "step": 3523 }, { "epoch": 0.4777333423710432, "grad_norm": 4.615317957563407, "learning_rate": 1.1204696128869803e-06, "loss": 1.0899, "step": 3524 }, { "epoch": 0.47786890801870807, "grad_norm": 5.731743749580526, "learning_rate": 1.1200336616035788e-06, "loss": 1.0987, "step": 3525 }, { "epoch": 0.47800447366637294, "grad_norm": 5.883050087872071, "learning_rate": 1.1195976871726332e-06, "loss": 1.1197, "step": 3526 }, { "epoch": 0.4781400393140378, "grad_norm": 4.135579110324949, "learning_rate": 1.1191616896782172e-06, "loss": 1.0879, "step": 3527 }, { "epoch": 0.4782756049617027, "grad_norm": 7.556788959106009, "learning_rate": 1.1187256692044103e-06, "loss": 1.1413, "step": 3528 }, { "epoch": 0.4784111706093676, "grad_norm": 4.19741265006602, "learning_rate": 1.1182896258352949e-06, "loss": 1.1214, "step": 3529 }, { "epoch": 0.4785467362570325, "grad_norm": 4.488439319678872, "learning_rate": 1.1178535596549592e-06, "loss": 1.0906, "step": 3530 }, { "epoch": 0.47868230190469735, "grad_norm": 4.466054012751972, "learning_rate": 1.1174174707474947e-06, "loss": 1.073, "step": 3531 }, { "epoch": 0.4788178675523622, "grad_norm": 5.795374698459985, "learning_rate": 1.116981359196998e-06, "loss": 1.1447, "step": 3532 }, { "epoch": 0.4789534332000271, "grad_norm": 7.803028748329756, "learning_rate": 1.116545225087569e-06, "loss": 1.1267, "step": 3533 }, { "epoch": 0.479088998847692, "grad_norm": 4.390217404211979, "learning_rate": 1.1161090685033138e-06, "loss": 1.1492, "step": 3534 }, { "epoch": 0.4792245644953569, "grad_norm": 4.421460337803361, "learning_rate": 1.1156728895283412e-06, "loss": 1.1294, "step": 3535 }, { "epoch": 0.47936013014302176, "grad_norm": 8.926263843993452, "learning_rate": 1.1152366882467647e-06, "loss": 1.1096, "step": 3536 }, { "epoch": 0.4794956957906866, "grad_norm": 7.088380081262841, "learning_rate": 1.1148004647427027e-06, "loss": 1.1196, "step": 3537 }, { "epoch": 0.4796312614383515, "grad_norm": 3.8337557783001137, "learning_rate": 1.114364219100277e-06, "loss": 1.1222, "step": 3538 }, { "epoch": 0.4797668270860164, "grad_norm": 7.556404948982881, "learning_rate": 1.1139279514036147e-06, "loss": 1.1082, "step": 3539 }, { "epoch": 0.4799023927336813, "grad_norm": 4.042054059877572, "learning_rate": 1.1134916617368464e-06, "loss": 1.1042, "step": 3540 }, { "epoch": 0.48003795838134616, "grad_norm": 25.835999043985407, "learning_rate": 1.1130553501841066e-06, "loss": 1.0989, "step": 3541 }, { "epoch": 0.48017352402901103, "grad_norm": 7.078044761773009, "learning_rate": 1.112619016829535e-06, "loss": 1.1473, "step": 3542 }, { "epoch": 0.4803090896766759, "grad_norm": 4.47292373792739, "learning_rate": 1.1121826617572752e-06, "loss": 1.1047, "step": 3543 }, { "epoch": 0.48044465532434083, "grad_norm": 4.606338877103414, "learning_rate": 1.1117462850514744e-06, "loss": 1.1436, "step": 3544 }, { "epoch": 0.4805802209720057, "grad_norm": 4.398490740966365, "learning_rate": 1.1113098867962844e-06, "loss": 1.164, "step": 3545 }, { "epoch": 0.48071578661967057, "grad_norm": 5.976984566734351, "learning_rate": 1.1108734670758616e-06, "loss": 1.0851, "step": 3546 }, { "epoch": 0.48085135226733544, "grad_norm": 5.744029250803551, "learning_rate": 1.1104370259743659e-06, "loss": 1.1637, "step": 3547 }, { "epoch": 0.4809869179150003, "grad_norm": 4.325418152243777, "learning_rate": 1.1100005635759612e-06, "loss": 1.0959, "step": 3548 }, { "epoch": 0.48112248356266524, "grad_norm": 3.8905084418756846, "learning_rate": 1.1095640799648162e-06, "loss": 1.1065, "step": 3549 }, { "epoch": 0.4812580492103301, "grad_norm": 4.97662691592034, "learning_rate": 1.1091275752251035e-06, "loss": 1.1063, "step": 3550 }, { "epoch": 0.481393614857995, "grad_norm": 5.2585672941044335, "learning_rate": 1.1086910494409993e-06, "loss": 1.0903, "step": 3551 }, { "epoch": 0.48152918050565985, "grad_norm": 16.63299495948859, "learning_rate": 1.1082545026966841e-06, "loss": 1.1005, "step": 3552 }, { "epoch": 0.4816647461533248, "grad_norm": 7.667542038476758, "learning_rate": 1.1078179350763424e-06, "loss": 1.1113, "step": 3553 }, { "epoch": 0.48180031180098964, "grad_norm": 9.73639190177335, "learning_rate": 1.107381346664163e-06, "loss": 1.1083, "step": 3554 }, { "epoch": 0.4819358774486545, "grad_norm": 7.443012740949242, "learning_rate": 1.1069447375443386e-06, "loss": 1.1338, "step": 3555 }, { "epoch": 0.4820714430963194, "grad_norm": 4.688778232147031, "learning_rate": 1.106508107801066e-06, "loss": 1.095, "step": 3556 }, { "epoch": 0.48220700874398426, "grad_norm": 4.419371135830476, "learning_rate": 1.1060714575185453e-06, "loss": 1.1213, "step": 3557 }, { "epoch": 0.4823425743916492, "grad_norm": 6.987244337110643, "learning_rate": 1.105634786780981e-06, "loss": 1.1155, "step": 3558 }, { "epoch": 0.48247814003931405, "grad_norm": 5.204345394114737, "learning_rate": 1.105198095672582e-06, "loss": 1.0921, "step": 3559 }, { "epoch": 0.4826137056869789, "grad_norm": 8.622336445943859, "learning_rate": 1.104761384277561e-06, "loss": 1.0773, "step": 3560 }, { "epoch": 0.4827492713346438, "grad_norm": 5.376251059013379, "learning_rate": 1.1043246526801338e-06, "loss": 1.1093, "step": 3561 }, { "epoch": 0.48288483698230866, "grad_norm": 4.187411366955389, "learning_rate": 1.1038879009645205e-06, "loss": 1.0913, "step": 3562 }, { "epoch": 0.4830204026299736, "grad_norm": 5.147188488116274, "learning_rate": 1.103451129214946e-06, "loss": 1.1425, "step": 3563 }, { "epoch": 0.48315596827763846, "grad_norm": 4.924692534888988, "learning_rate": 1.1030143375156375e-06, "loss": 1.1182, "step": 3564 }, { "epoch": 0.48329153392530333, "grad_norm": 5.475093240019371, "learning_rate": 1.1025775259508275e-06, "loss": 1.1469, "step": 3565 }, { "epoch": 0.4834270995729682, "grad_norm": 6.5715219023320754, "learning_rate": 1.1021406946047508e-06, "loss": 1.1225, "step": 3566 }, { "epoch": 0.48356266522063307, "grad_norm": 5.303701963041106, "learning_rate": 1.101703843561648e-06, "loss": 1.0803, "step": 3567 }, { "epoch": 0.483698230868298, "grad_norm": 4.148658062229901, "learning_rate": 1.1012669729057615e-06, "loss": 1.1657, "step": 3568 }, { "epoch": 0.48383379651596287, "grad_norm": 5.239145928339928, "learning_rate": 1.1008300827213385e-06, "loss": 1.1553, "step": 3569 }, { "epoch": 0.48396936216362774, "grad_norm": 5.359433090774206, "learning_rate": 1.10039317309263e-06, "loss": 1.1652, "step": 3570 }, { "epoch": 0.4841049278112926, "grad_norm": 5.938994085845094, "learning_rate": 1.0999562441038909e-06, "loss": 1.1101, "step": 3571 }, { "epoch": 0.4842404934589575, "grad_norm": 7.62809461504758, "learning_rate": 1.0995192958393785e-06, "loss": 1.1505, "step": 3572 }, { "epoch": 0.4843760591066224, "grad_norm": 4.793275682717471, "learning_rate": 1.099082328383356e-06, "loss": 1.1293, "step": 3573 }, { "epoch": 0.4845116247542873, "grad_norm": 7.555427142051634, "learning_rate": 1.098645341820088e-06, "loss": 1.1249, "step": 3574 }, { "epoch": 0.48464719040195214, "grad_norm": 5.381581849812383, "learning_rate": 1.098208336233845e-06, "loss": 1.1254, "step": 3575 }, { "epoch": 0.484782756049617, "grad_norm": 7.017749996463136, "learning_rate": 1.0977713117088994e-06, "loss": 1.1233, "step": 3576 }, { "epoch": 0.4849183216972819, "grad_norm": 4.5628845261670845, "learning_rate": 1.097334268329528e-06, "loss": 1.1789, "step": 3577 }, { "epoch": 0.4850538873449468, "grad_norm": 8.024210200845799, "learning_rate": 1.0968972061800115e-06, "loss": 1.1088, "step": 3578 }, { "epoch": 0.4851894529926117, "grad_norm": 3.936253613452419, "learning_rate": 1.0964601253446332e-06, "loss": 1.1026, "step": 3579 }, { "epoch": 0.48532501864027655, "grad_norm": 5.068257567313662, "learning_rate": 1.0960230259076817e-06, "loss": 1.1216, "step": 3580 }, { "epoch": 0.4854605842879414, "grad_norm": 5.2602052654939175, "learning_rate": 1.0955859079534473e-06, "loss": 1.1385, "step": 3581 }, { "epoch": 0.4855961499356063, "grad_norm": 3.4787447196978825, "learning_rate": 1.0951487715662253e-06, "loss": 1.0978, "step": 3582 }, { "epoch": 0.4857317155832712, "grad_norm": 3.5811578009346934, "learning_rate": 1.0947116168303137e-06, "loss": 1.1098, "step": 3583 }, { "epoch": 0.4858672812309361, "grad_norm": 4.674066944449036, "learning_rate": 1.0942744438300141e-06, "loss": 1.1096, "step": 3584 }, { "epoch": 0.48600284687860096, "grad_norm": 3.874285940767111, "learning_rate": 1.0938372526496324e-06, "loss": 1.1039, "step": 3585 }, { "epoch": 0.48613841252626583, "grad_norm": 3.699012216824638, "learning_rate": 1.0934000433734772e-06, "loss": 1.1111, "step": 3586 }, { "epoch": 0.4862739781739307, "grad_norm": 6.144760188600889, "learning_rate": 1.0929628160858611e-06, "loss": 1.1211, "step": 3587 }, { "epoch": 0.4864095438215956, "grad_norm": 9.032746625417735, "learning_rate": 1.0925255708710994e-06, "loss": 1.1077, "step": 3588 }, { "epoch": 0.4865451094692605, "grad_norm": 11.474718588360439, "learning_rate": 1.0920883078135118e-06, "loss": 1.1492, "step": 3589 }, { "epoch": 0.48668067511692537, "grad_norm": 6.521975055967229, "learning_rate": 1.0916510269974208e-06, "loss": 1.1475, "step": 3590 }, { "epoch": 0.48681624076459024, "grad_norm": 5.8191470904408025, "learning_rate": 1.091213728507153e-06, "loss": 1.0926, "step": 3591 }, { "epoch": 0.48695180641225516, "grad_norm": 4.886746236341577, "learning_rate": 1.0907764124270374e-06, "loss": 1.1172, "step": 3592 }, { "epoch": 0.48708737205992003, "grad_norm": 6.07927399525979, "learning_rate": 1.0903390788414072e-06, "loss": 1.1097, "step": 3593 }, { "epoch": 0.4872229377075849, "grad_norm": 4.541757216539432, "learning_rate": 1.089901727834599e-06, "loss": 1.0689, "step": 3594 }, { "epoch": 0.4873585033552498, "grad_norm": 4.922064044729523, "learning_rate": 1.0894643594909518e-06, "loss": 1.1179, "step": 3595 }, { "epoch": 0.48749406900291464, "grad_norm": 4.218323098712131, "learning_rate": 1.0890269738948096e-06, "loss": 1.1082, "step": 3596 }, { "epoch": 0.48762963465057957, "grad_norm": 4.5061942386272476, "learning_rate": 1.088589571130518e-06, "loss": 1.129, "step": 3597 }, { "epoch": 0.48776520029824444, "grad_norm": 4.4702266615821, "learning_rate": 1.0881521512824268e-06, "loss": 1.1141, "step": 3598 }, { "epoch": 0.4879007659459093, "grad_norm": 4.820600090536758, "learning_rate": 1.0877147144348892e-06, "loss": 1.1155, "step": 3599 }, { "epoch": 0.4880363315935742, "grad_norm": 28.193220889798955, "learning_rate": 1.087277260672261e-06, "loss": 1.1422, "step": 3600 }, { "epoch": 0.48817189724123905, "grad_norm": 7.086881976875367, "learning_rate": 1.0868397900789024e-06, "loss": 1.1348, "step": 3601 }, { "epoch": 0.488307462888904, "grad_norm": 4.940582398265469, "learning_rate": 1.0864023027391753e-06, "loss": 1.1299, "step": 3602 }, { "epoch": 0.48844302853656885, "grad_norm": 5.031596422629368, "learning_rate": 1.0859647987374464e-06, "loss": 1.107, "step": 3603 }, { "epoch": 0.4885785941842337, "grad_norm": 8.53813702555973, "learning_rate": 1.0855272781580846e-06, "loss": 1.1325, "step": 3604 }, { "epoch": 0.4887141598318986, "grad_norm": 5.26216111330814, "learning_rate": 1.0850897410854624e-06, "loss": 1.0726, "step": 3605 }, { "epoch": 0.48884972547956346, "grad_norm": 4.320793414688403, "learning_rate": 1.084652187603955e-06, "loss": 1.1397, "step": 3606 }, { "epoch": 0.4889852911272284, "grad_norm": 4.557365903960084, "learning_rate": 1.0842146177979418e-06, "loss": 1.1709, "step": 3607 }, { "epoch": 0.48912085677489325, "grad_norm": 8.55041472944637, "learning_rate": 1.0837770317518043e-06, "loss": 1.1453, "step": 3608 }, { "epoch": 0.4892564224225581, "grad_norm": 6.875938133534136, "learning_rate": 1.083339429549927e-06, "loss": 1.1193, "step": 3609 }, { "epoch": 0.489391988070223, "grad_norm": 4.688297916865262, "learning_rate": 1.0829018112766993e-06, "loss": 1.0954, "step": 3610 }, { "epoch": 0.48952755371788786, "grad_norm": 5.418607305494783, "learning_rate": 1.0824641770165112e-06, "loss": 1.1359, "step": 3611 }, { "epoch": 0.4896631193655528, "grad_norm": 9.220158105566997, "learning_rate": 1.0820265268537578e-06, "loss": 1.164, "step": 3612 }, { "epoch": 0.48979868501321766, "grad_norm": 4.271377597938468, "learning_rate": 1.0815888608728359e-06, "loss": 1.1505, "step": 3613 }, { "epoch": 0.48993425066088253, "grad_norm": 6.516601312768702, "learning_rate": 1.0811511791581463e-06, "loss": 1.104, "step": 3614 }, { "epoch": 0.4900698163085474, "grad_norm": 7.047577524508068, "learning_rate": 1.0807134817940923e-06, "loss": 1.1458, "step": 3615 }, { "epoch": 0.49020538195621227, "grad_norm": 6.120079717974936, "learning_rate": 1.0802757688650805e-06, "loss": 1.1545, "step": 3616 }, { "epoch": 0.4903409476038772, "grad_norm": 6.786258936174741, "learning_rate": 1.0798380404555203e-06, "loss": 1.1091, "step": 3617 }, { "epoch": 0.49047651325154207, "grad_norm": 6.278852825957973, "learning_rate": 1.0794002966498246e-06, "loss": 1.0908, "step": 3618 }, { "epoch": 0.49061207889920694, "grad_norm": 4.719305290061118, "learning_rate": 1.0789625375324078e-06, "loss": 1.079, "step": 3619 }, { "epoch": 0.4907476445468718, "grad_norm": 16.565234471028546, "learning_rate": 1.0785247631876892e-06, "loss": 1.1351, "step": 3620 }, { "epoch": 0.4908832101945367, "grad_norm": 4.870923280805193, "learning_rate": 1.0780869737000898e-06, "loss": 1.0969, "step": 3621 }, { "epoch": 0.4910187758422016, "grad_norm": 5.617549511578282, "learning_rate": 1.0776491691540342e-06, "loss": 1.1161, "step": 3622 }, { "epoch": 0.4911543414898665, "grad_norm": 5.363015086823901, "learning_rate": 1.077211349633949e-06, "loss": 1.1469, "step": 3623 }, { "epoch": 0.49128990713753135, "grad_norm": 6.522070676912514, "learning_rate": 1.0767735152242646e-06, "loss": 1.1312, "step": 3624 }, { "epoch": 0.4914254727851962, "grad_norm": 9.537919089582376, "learning_rate": 1.0763356660094139e-06, "loss": 1.1306, "step": 3625 }, { "epoch": 0.4915610384328611, "grad_norm": 8.244943153921769, "learning_rate": 1.0758978020738323e-06, "loss": 1.1197, "step": 3626 }, { "epoch": 0.491696604080526, "grad_norm": 4.542678246806904, "learning_rate": 1.0754599235019586e-06, "loss": 1.1529, "step": 3627 }, { "epoch": 0.4918321697281909, "grad_norm": 5.272963305614025, "learning_rate": 1.0750220303782345e-06, "loss": 1.146, "step": 3628 }, { "epoch": 0.49196773537585575, "grad_norm": 3.765403838478168, "learning_rate": 1.074584122787104e-06, "loss": 1.1702, "step": 3629 }, { "epoch": 0.4921033010235206, "grad_norm": 5.92920469959499, "learning_rate": 1.074146200813014e-06, "loss": 1.1311, "step": 3630 }, { "epoch": 0.49223886667118555, "grad_norm": 4.845634114237571, "learning_rate": 1.0737082645404147e-06, "loss": 1.0834, "step": 3631 }, { "epoch": 0.4923744323188504, "grad_norm": 5.0549523942380405, "learning_rate": 1.0732703140537583e-06, "loss": 1.1067, "step": 3632 }, { "epoch": 0.4925099979665153, "grad_norm": 4.696944949751006, "learning_rate": 1.0728323494375e-06, "loss": 1.1119, "step": 3633 }, { "epoch": 0.49264556361418016, "grad_norm": 6.4858347914845735, "learning_rate": 1.0723943707760984e-06, "loss": 1.1371, "step": 3634 }, { "epoch": 0.49278112926184503, "grad_norm": 5.390444184446231, "learning_rate": 1.0719563781540135e-06, "loss": 1.1156, "step": 3635 }, { "epoch": 0.49291669490950996, "grad_norm": 4.243617174722322, "learning_rate": 1.071518371655709e-06, "loss": 1.1397, "step": 3636 }, { "epoch": 0.4930522605571748, "grad_norm": 7.586835820846302, "learning_rate": 1.0710803513656514e-06, "loss": 1.1252, "step": 3637 }, { "epoch": 0.4931878262048397, "grad_norm": 4.725119168455558, "learning_rate": 1.0706423173683092e-06, "loss": 1.1171, "step": 3638 }, { "epoch": 0.49332339185250457, "grad_norm": 4.06216587070036, "learning_rate": 1.0702042697481536e-06, "loss": 1.1025, "step": 3639 }, { "epoch": 0.49345895750016944, "grad_norm": 9.453009521060814, "learning_rate": 1.0697662085896583e-06, "loss": 1.1002, "step": 3640 }, { "epoch": 0.49359452314783436, "grad_norm": 7.894362937823171, "learning_rate": 1.0693281339773009e-06, "loss": 1.1304, "step": 3641 }, { "epoch": 0.49373008879549923, "grad_norm": 4.938161296698363, "learning_rate": 1.0688900459955596e-06, "loss": 1.1199, "step": 3642 }, { "epoch": 0.4938656544431641, "grad_norm": 5.680889855913489, "learning_rate": 1.0684519447289171e-06, "loss": 1.0798, "step": 3643 }, { "epoch": 0.494001220090829, "grad_norm": 5.828174880538962, "learning_rate": 1.0680138302618572e-06, "loss": 1.1285, "step": 3644 }, { "epoch": 0.49413678573849384, "grad_norm": 5.1893390342108345, "learning_rate": 1.0675757026788672e-06, "loss": 1.1728, "step": 3645 }, { "epoch": 0.49427235138615877, "grad_norm": 4.836133514182705, "learning_rate": 1.0671375620644363e-06, "loss": 1.0979, "step": 3646 }, { "epoch": 0.49440791703382364, "grad_norm": 6.166122795713711, "learning_rate": 1.0666994085030563e-06, "loss": 1.0736, "step": 3647 }, { "epoch": 0.4945434826814885, "grad_norm": 5.265885224844543, "learning_rate": 1.066261242079222e-06, "loss": 1.0954, "step": 3648 }, { "epoch": 0.4946790483291534, "grad_norm": 6.216142899929929, "learning_rate": 1.0658230628774302e-06, "loss": 1.1725, "step": 3649 }, { "epoch": 0.49481461397681825, "grad_norm": 5.802151890309874, "learning_rate": 1.0653848709821806e-06, "loss": 1.1062, "step": 3650 }, { "epoch": 0.4949501796244832, "grad_norm": 4.676905098238515, "learning_rate": 1.0649466664779744e-06, "loss": 1.13, "step": 3651 }, { "epoch": 0.49508574527214805, "grad_norm": 6.949364590837406, "learning_rate": 1.0645084494493164e-06, "loss": 1.1285, "step": 3652 }, { "epoch": 0.4952213109198129, "grad_norm": 5.397666268484968, "learning_rate": 1.064070219980713e-06, "loss": 1.0781, "step": 3653 }, { "epoch": 0.4953568765674778, "grad_norm": 4.212474791405288, "learning_rate": 1.0636319781566736e-06, "loss": 1.1067, "step": 3654 }, { "epoch": 0.49549244221514266, "grad_norm": 11.400121453301695, "learning_rate": 1.0631937240617093e-06, "loss": 1.1057, "step": 3655 }, { "epoch": 0.4956280078628076, "grad_norm": 4.222321073823303, "learning_rate": 1.062755457780334e-06, "loss": 1.1244, "step": 3656 }, { "epoch": 0.49576357351047246, "grad_norm": 3.862756634188837, "learning_rate": 1.0623171793970642e-06, "loss": 1.0724, "step": 3657 }, { "epoch": 0.4958991391581373, "grad_norm": 7.409512508708356, "learning_rate": 1.0618788889964182e-06, "loss": 1.1446, "step": 3658 }, { "epoch": 0.4960347048058022, "grad_norm": 4.948213345182381, "learning_rate": 1.061440586662917e-06, "loss": 1.142, "step": 3659 }, { "epoch": 0.49617027045346707, "grad_norm": 4.607095614942139, "learning_rate": 1.0610022724810837e-06, "loss": 1.1672, "step": 3660 }, { "epoch": 0.496305836101132, "grad_norm": 6.260792504167756, "learning_rate": 1.0605639465354435e-06, "loss": 1.125, "step": 3661 }, { "epoch": 0.49644140174879686, "grad_norm": 4.541227745522847, "learning_rate": 1.0601256089105242e-06, "loss": 1.1447, "step": 3662 }, { "epoch": 0.49657696739646173, "grad_norm": 4.813102524353423, "learning_rate": 1.059687259690856e-06, "loss": 1.1302, "step": 3663 }, { "epoch": 0.4967125330441266, "grad_norm": 5.357704768555671, "learning_rate": 1.0592488989609708e-06, "loss": 1.1198, "step": 3664 }, { "epoch": 0.4968480986917915, "grad_norm": 5.024562767186866, "learning_rate": 1.0588105268054032e-06, "loss": 1.1236, "step": 3665 }, { "epoch": 0.4969836643394564, "grad_norm": 4.4616398377899325, "learning_rate": 1.0583721433086899e-06, "loss": 1.1375, "step": 3666 }, { "epoch": 0.49711922998712127, "grad_norm": 7.616980443900642, "learning_rate": 1.0579337485553695e-06, "loss": 1.1256, "step": 3667 }, { "epoch": 0.49725479563478614, "grad_norm": 5.550901563386953, "learning_rate": 1.0574953426299825e-06, "loss": 1.141, "step": 3668 }, { "epoch": 0.497390361282451, "grad_norm": 4.9562367789741275, "learning_rate": 1.057056925617073e-06, "loss": 1.1208, "step": 3669 }, { "epoch": 0.49752592693011594, "grad_norm": 4.382576681210224, "learning_rate": 1.0566184976011855e-06, "loss": 1.1097, "step": 3670 }, { "epoch": 0.4976614925777808, "grad_norm": 6.491951122632785, "learning_rate": 1.0561800586668678e-06, "loss": 1.1288, "step": 3671 }, { "epoch": 0.4977970582254457, "grad_norm": 5.212578488568786, "learning_rate": 1.0557416088986692e-06, "loss": 1.1019, "step": 3672 }, { "epoch": 0.49793262387311055, "grad_norm": 4.334252240403043, "learning_rate": 1.0553031483811414e-06, "loss": 1.1236, "step": 3673 }, { "epoch": 0.4980681895207754, "grad_norm": 7.1631901242138465, "learning_rate": 1.054864677198838e-06, "loss": 1.1147, "step": 3674 }, { "epoch": 0.49820375516844034, "grad_norm": 7.851634084801032, "learning_rate": 1.0544261954363146e-06, "loss": 1.1432, "step": 3675 }, { "epoch": 0.4983393208161052, "grad_norm": 4.295430663360338, "learning_rate": 1.0539877031781289e-06, "loss": 1.1246, "step": 3676 }, { "epoch": 0.4984748864637701, "grad_norm": 4.6956658228403825, "learning_rate": 1.053549200508841e-06, "loss": 1.0914, "step": 3677 }, { "epoch": 0.49861045211143495, "grad_norm": 5.858207141074376, "learning_rate": 1.0531106875130123e-06, "loss": 1.1096, "step": 3678 }, { "epoch": 0.4987460177590998, "grad_norm": 4.7049520286849535, "learning_rate": 1.0526721642752069e-06, "loss": 1.1138, "step": 3679 }, { "epoch": 0.49888158340676475, "grad_norm": 6.8915504697802925, "learning_rate": 1.0522336308799904e-06, "loss": 1.1057, "step": 3680 }, { "epoch": 0.4990171490544296, "grad_norm": 6.919494829942152, "learning_rate": 1.0517950874119304e-06, "loss": 1.1047, "step": 3681 }, { "epoch": 0.4991527147020945, "grad_norm": 5.701321301045695, "learning_rate": 1.0513565339555965e-06, "loss": 1.1516, "step": 3682 }, { "epoch": 0.49928828034975936, "grad_norm": 7.216187804544623, "learning_rate": 1.0509179705955607e-06, "loss": 1.143, "step": 3683 }, { "epoch": 0.49942384599742423, "grad_norm": 4.875390334585242, "learning_rate": 1.050479397416396e-06, "loss": 1.1319, "step": 3684 }, { "epoch": 0.49955941164508916, "grad_norm": 7.4722983228926045, "learning_rate": 1.050040814502678e-06, "loss": 1.121, "step": 3685 }, { "epoch": 0.49969497729275403, "grad_norm": 5.706586447864555, "learning_rate": 1.049602221938984e-06, "loss": 1.1118, "step": 3686 }, { "epoch": 0.4998305429404189, "grad_norm": 4.750575016461708, "learning_rate": 1.0491636198098932e-06, "loss": 1.1285, "step": 3687 }, { "epoch": 0.49996610858808377, "grad_norm": 6.223798683255216, "learning_rate": 1.048725008199986e-06, "loss": 1.1195, "step": 3688 }, { "epoch": 0.5001016742357487, "grad_norm": 6.683429760590256, "learning_rate": 1.0482863871938459e-06, "loss": 1.1233, "step": 3689 }, { "epoch": 0.5002372398834135, "grad_norm": 5.222733843717723, "learning_rate": 1.047847756876057e-06, "loss": 1.1419, "step": 3690 }, { "epoch": 0.5003728055310784, "grad_norm": 4.658582299146041, "learning_rate": 1.0474091173312058e-06, "loss": 1.1303, "step": 3691 }, { "epoch": 0.5005083711787434, "grad_norm": 4.862251779877016, "learning_rate": 1.0469704686438807e-06, "loss": 1.1179, "step": 3692 }, { "epoch": 0.5006439368264082, "grad_norm": 5.699057398942527, "learning_rate": 1.0465318108986713e-06, "loss": 1.1165, "step": 3693 }, { "epoch": 0.5007795024740731, "grad_norm": 4.192357669751763, "learning_rate": 1.04609314418017e-06, "loss": 1.1457, "step": 3694 }, { "epoch": 0.5009150681217379, "grad_norm": 6.527234881577316, "learning_rate": 1.045654468572969e-06, "loss": 1.1069, "step": 3695 }, { "epoch": 0.5010506337694028, "grad_norm": 8.393459361959248, "learning_rate": 1.0452157841616645e-06, "loss": 1.0809, "step": 3696 }, { "epoch": 0.5011861994170678, "grad_norm": 6.135820847339243, "learning_rate": 1.044777091030853e-06, "loss": 1.1078, "step": 3697 }, { "epoch": 0.5013217650647326, "grad_norm": 4.3065509935732855, "learning_rate": 1.0443383892651325e-06, "loss": 1.1062, "step": 3698 }, { "epoch": 0.5014573307123975, "grad_norm": 4.5758899483185385, "learning_rate": 1.043899678949104e-06, "loss": 1.1248, "step": 3699 }, { "epoch": 0.5015928963600623, "grad_norm": 25.511146421118692, "learning_rate": 1.0434609601673687e-06, "loss": 1.1092, "step": 3700 }, { "epoch": 0.5017284620077272, "grad_norm": 4.867738544530216, "learning_rate": 1.0430222330045304e-06, "loss": 1.112, "step": 3701 }, { "epoch": 0.5018640276553922, "grad_norm": 21.893504331638233, "learning_rate": 1.0425834975451942e-06, "loss": 1.1874, "step": 3702 }, { "epoch": 0.501999593303057, "grad_norm": 5.813914632636319, "learning_rate": 1.0421447538739664e-06, "loss": 1.1314, "step": 3703 }, { "epoch": 0.5021351589507219, "grad_norm": 5.818945516400186, "learning_rate": 1.0417060020754555e-06, "loss": 1.076, "step": 3704 }, { "epoch": 0.5022707245983867, "grad_norm": 6.166812438326592, "learning_rate": 1.0412672422342714e-06, "loss": 1.1243, "step": 3705 }, { "epoch": 0.5024062902460517, "grad_norm": 6.107316349242439, "learning_rate": 1.0408284744350255e-06, "loss": 1.0908, "step": 3706 }, { "epoch": 0.5025418558937166, "grad_norm": 5.5426296425781025, "learning_rate": 1.0403896987623304e-06, "loss": 1.1219, "step": 3707 }, { "epoch": 0.5026774215413814, "grad_norm": 5.587544429423662, "learning_rate": 1.039950915300801e-06, "loss": 1.13, "step": 3708 }, { "epoch": 0.5028129871890463, "grad_norm": 4.386068312636505, "learning_rate": 1.039512124135053e-06, "loss": 1.0831, "step": 3709 }, { "epoch": 0.5029485528367111, "grad_norm": 4.604582358129616, "learning_rate": 1.0390733253497033e-06, "loss": 1.1143, "step": 3710 }, { "epoch": 0.5030841184843761, "grad_norm": 5.416843501556298, "learning_rate": 1.0386345190293714e-06, "loss": 1.0884, "step": 3711 }, { "epoch": 0.503219684132041, "grad_norm": 6.229539013758396, "learning_rate": 1.0381957052586774e-06, "loss": 1.096, "step": 3712 }, { "epoch": 0.5033552497797058, "grad_norm": 5.588795732562195, "learning_rate": 1.037756884122243e-06, "loss": 1.1249, "step": 3713 }, { "epoch": 0.5034908154273707, "grad_norm": 5.259966631160287, "learning_rate": 1.037318055704692e-06, "loss": 1.1214, "step": 3714 }, { "epoch": 0.5036263810750355, "grad_norm": 3.929308900290215, "learning_rate": 1.0368792200906482e-06, "loss": 1.0963, "step": 3715 }, { "epoch": 0.5037619467227005, "grad_norm": 4.862314663777313, "learning_rate": 1.0364403773647379e-06, "loss": 1.0657, "step": 3716 }, { "epoch": 0.5038975123703654, "grad_norm": 6.130432960779448, "learning_rate": 1.0360015276115888e-06, "loss": 1.1277, "step": 3717 }, { "epoch": 0.5040330780180302, "grad_norm": 3.6441275019453303, "learning_rate": 1.035562670915829e-06, "loss": 1.1147, "step": 3718 }, { "epoch": 0.5041686436656951, "grad_norm": 6.117330848067444, "learning_rate": 1.0351238073620887e-06, "loss": 1.1395, "step": 3719 }, { "epoch": 0.50430420931336, "grad_norm": 5.343653293656788, "learning_rate": 1.0346849370349997e-06, "loss": 1.1026, "step": 3720 }, { "epoch": 0.5044397749610249, "grad_norm": 4.823034670426031, "learning_rate": 1.0342460600191942e-06, "loss": 1.1375, "step": 3721 }, { "epoch": 0.5045753406086898, "grad_norm": 4.332677549034026, "learning_rate": 1.0338071763993065e-06, "loss": 1.0927, "step": 3722 }, { "epoch": 0.5047109062563546, "grad_norm": 5.399964468513619, "learning_rate": 1.0333682862599714e-06, "loss": 1.1379, "step": 3723 }, { "epoch": 0.5048464719040195, "grad_norm": 5.219912813244006, "learning_rate": 1.032929389685826e-06, "loss": 1.0851, "step": 3724 }, { "epoch": 0.5049820375516844, "grad_norm": 4.783858568615478, "learning_rate": 1.0324904867615077e-06, "loss": 1.1534, "step": 3725 }, { "epoch": 0.5051176031993493, "grad_norm": 4.788284279388837, "learning_rate": 1.0320515775716554e-06, "loss": 1.1196, "step": 3726 }, { "epoch": 0.5052531688470142, "grad_norm": 5.2245049681882945, "learning_rate": 1.0316126622009092e-06, "loss": 1.107, "step": 3727 }, { "epoch": 0.505388734494679, "grad_norm": 5.333215989085679, "learning_rate": 1.0311737407339106e-06, "loss": 1.108, "step": 3728 }, { "epoch": 0.505524300142344, "grad_norm": 54.77157520933636, "learning_rate": 1.0307348132553024e-06, "loss": 1.0942, "step": 3729 }, { "epoch": 0.5056598657900088, "grad_norm": 4.370403103875089, "learning_rate": 1.030295879849728e-06, "loss": 1.1372, "step": 3730 }, { "epoch": 0.5057954314376737, "grad_norm": 8.373928743305639, "learning_rate": 1.0298569406018325e-06, "loss": 1.127, "step": 3731 }, { "epoch": 0.5059309970853386, "grad_norm": 5.746343666336101, "learning_rate": 1.0294179955962614e-06, "loss": 1.0995, "step": 3732 }, { "epoch": 0.5060665627330034, "grad_norm": 5.176261170864963, "learning_rate": 1.0289790449176622e-06, "loss": 1.1377, "step": 3733 }, { "epoch": 0.5062021283806684, "grad_norm": 7.612914762742412, "learning_rate": 1.0285400886506828e-06, "loss": 1.1105, "step": 3734 }, { "epoch": 0.5063376940283332, "grad_norm": 5.138221179965478, "learning_rate": 1.0281011268799726e-06, "loss": 1.089, "step": 3735 }, { "epoch": 0.5064732596759981, "grad_norm": 4.620829456608099, "learning_rate": 1.0276621596901821e-06, "loss": 1.1058, "step": 3736 }, { "epoch": 0.506608825323663, "grad_norm": 9.73274750539984, "learning_rate": 1.0272231871659624e-06, "loss": 1.0998, "step": 3737 }, { "epoch": 0.5067443909713278, "grad_norm": 10.510412966593428, "learning_rate": 1.026784209391966e-06, "loss": 1.1065, "step": 3738 }, { "epoch": 0.5068799566189928, "grad_norm": 5.393843573949861, "learning_rate": 1.026345226452846e-06, "loss": 1.1194, "step": 3739 }, { "epoch": 0.5070155222666576, "grad_norm": 12.255129528067673, "learning_rate": 1.0259062384332573e-06, "loss": 1.1025, "step": 3740 }, { "epoch": 0.5071510879143225, "grad_norm": 4.455448485716249, "learning_rate": 1.0254672454178547e-06, "loss": 1.1241, "step": 3741 }, { "epoch": 0.5072866535619874, "grad_norm": 4.544016342589621, "learning_rate": 1.0250282474912952e-06, "loss": 1.1323, "step": 3742 }, { "epoch": 0.5074222192096522, "grad_norm": 4.901765101816518, "learning_rate": 1.0245892447382354e-06, "loss": 1.1063, "step": 3743 }, { "epoch": 0.5075577848573172, "grad_norm": 4.106992027155848, "learning_rate": 1.0241502372433342e-06, "loss": 1.114, "step": 3744 }, { "epoch": 0.507693350504982, "grad_norm": 7.1761804755445135, "learning_rate": 1.02371122509125e-06, "loss": 1.0963, "step": 3745 }, { "epoch": 0.5078289161526469, "grad_norm": 5.5692614435196015, "learning_rate": 1.0232722083666435e-06, "loss": 1.111, "step": 3746 }, { "epoch": 0.5079644818003118, "grad_norm": 6.617718174105719, "learning_rate": 1.022833187154175e-06, "loss": 1.0923, "step": 3747 }, { "epoch": 0.5081000474479767, "grad_norm": 8.413960529457636, "learning_rate": 1.022394161538507e-06, "loss": 1.0982, "step": 3748 }, { "epoch": 0.5082356130956416, "grad_norm": 5.631845001574212, "learning_rate": 1.0219551316043016e-06, "loss": 1.122, "step": 3749 }, { "epoch": 0.5083711787433064, "grad_norm": 3.9553661239098172, "learning_rate": 1.0215160974362223e-06, "loss": 1.0768, "step": 3750 }, { "epoch": 0.5085067443909713, "grad_norm": 5.1476256387540325, "learning_rate": 1.0210770591189333e-06, "loss": 1.0833, "step": 3751 }, { "epoch": 0.5086423100386362, "grad_norm": 5.702242593811187, "learning_rate": 1.0206380167371e-06, "loss": 1.1387, "step": 3752 }, { "epoch": 0.5087778756863011, "grad_norm": 14.025287748060004, "learning_rate": 1.0201989703753881e-06, "loss": 1.1268, "step": 3753 }, { "epoch": 0.508913441333966, "grad_norm": 4.3191590996492835, "learning_rate": 1.0197599201184642e-06, "loss": 1.1359, "step": 3754 }, { "epoch": 0.5090490069816308, "grad_norm": 4.1066355348895796, "learning_rate": 1.0193208660509956e-06, "loss": 1.1224, "step": 3755 }, { "epoch": 0.5091845726292957, "grad_norm": 6.06083798676564, "learning_rate": 1.0188818082576505e-06, "loss": 1.1095, "step": 3756 }, { "epoch": 0.5093201382769607, "grad_norm": 4.213781347110719, "learning_rate": 1.0184427468230976e-06, "loss": 1.1287, "step": 3757 }, { "epoch": 0.5094557039246255, "grad_norm": 7.670949074305624, "learning_rate": 1.0180036818320067e-06, "loss": 1.1179, "step": 3758 }, { "epoch": 0.5095912695722904, "grad_norm": 6.812712749699945, "learning_rate": 1.0175646133690479e-06, "loss": 1.1272, "step": 3759 }, { "epoch": 0.5097268352199552, "grad_norm": 4.647785983634116, "learning_rate": 1.017125541518892e-06, "loss": 1.1192, "step": 3760 }, { "epoch": 0.5098624008676201, "grad_norm": 3.711026152298529, "learning_rate": 1.0166864663662104e-06, "loss": 1.1188, "step": 3761 }, { "epoch": 0.5099979665152851, "grad_norm": 5.069480166093077, "learning_rate": 1.016247387995676e-06, "loss": 1.1079, "step": 3762 }, { "epoch": 0.5101335321629499, "grad_norm": 6.795852308713597, "learning_rate": 1.0158083064919605e-06, "loss": 1.0784, "step": 3763 }, { "epoch": 0.5102690978106148, "grad_norm": 5.929422383542343, "learning_rate": 1.0153692219397385e-06, "loss": 1.1198, "step": 3764 }, { "epoch": 0.5104046634582796, "grad_norm": 5.3775968487824874, "learning_rate": 1.014930134423683e-06, "loss": 1.1159, "step": 3765 }, { "epoch": 0.5105402291059445, "grad_norm": 7.541923176031327, "learning_rate": 1.0144910440284689e-06, "loss": 1.1052, "step": 3766 }, { "epoch": 0.5106757947536095, "grad_norm": 4.6257046411311915, "learning_rate": 1.0140519508387713e-06, "loss": 1.1089, "step": 3767 }, { "epoch": 0.5108113604012743, "grad_norm": 5.537311615904259, "learning_rate": 1.013612854939266e-06, "loss": 1.1412, "step": 3768 }, { "epoch": 0.5109469260489392, "grad_norm": 7.33062687234888, "learning_rate": 1.013173756414629e-06, "loss": 1.1079, "step": 3769 }, { "epoch": 0.5110824916966041, "grad_norm": 8.40116293924405, "learning_rate": 1.0127346553495371e-06, "loss": 1.1422, "step": 3770 }, { "epoch": 0.511218057344269, "grad_norm": 3.9183328923973932, "learning_rate": 1.0122955518286672e-06, "loss": 1.0763, "step": 3771 }, { "epoch": 0.5113536229919339, "grad_norm": 5.512850650492395, "learning_rate": 1.0118564459366976e-06, "loss": 1.0898, "step": 3772 }, { "epoch": 0.5114891886395987, "grad_norm": 7.4492190849457325, "learning_rate": 1.0114173377583057e-06, "loss": 1.1058, "step": 3773 }, { "epoch": 0.5116247542872636, "grad_norm": 4.826093560838287, "learning_rate": 1.0109782273781706e-06, "loss": 1.0974, "step": 3774 }, { "epoch": 0.5117603199349285, "grad_norm": 4.551846151549389, "learning_rate": 1.0105391148809707e-06, "loss": 1.1387, "step": 3775 }, { "epoch": 0.5118958855825934, "grad_norm": 6.302742969378742, "learning_rate": 1.010100000351386e-06, "loss": 1.1128, "step": 3776 }, { "epoch": 0.5120314512302583, "grad_norm": 5.933765086147851, "learning_rate": 1.0096608838740956e-06, "loss": 1.0636, "step": 3777 }, { "epoch": 0.5121670168779231, "grad_norm": 4.359924595583916, "learning_rate": 1.0092217655337806e-06, "loss": 1.1618, "step": 3778 }, { "epoch": 0.512302582525588, "grad_norm": 5.727538734983148, "learning_rate": 1.0087826454151205e-06, "loss": 1.1614, "step": 3779 }, { "epoch": 0.512438148173253, "grad_norm": 5.463761851250308, "learning_rate": 1.0083435236027967e-06, "loss": 1.1209, "step": 3780 }, { "epoch": 0.5125737138209178, "grad_norm": 4.527044290580538, "learning_rate": 1.00790440018149e-06, "loss": 1.1057, "step": 3781 }, { "epoch": 0.5127092794685827, "grad_norm": 4.7184853204114665, "learning_rate": 1.0074652752358822e-06, "loss": 1.0806, "step": 3782 }, { "epoch": 0.5128448451162475, "grad_norm": 8.06156755943507, "learning_rate": 1.0070261488506551e-06, "loss": 1.0888, "step": 3783 }, { "epoch": 0.5129804107639124, "grad_norm": 6.9552614929447465, "learning_rate": 1.0065870211104906e-06, "loss": 1.1568, "step": 3784 }, { "epoch": 0.5131159764115774, "grad_norm": 4.120292603410249, "learning_rate": 1.006147892100071e-06, "loss": 1.1177, "step": 3785 }, { "epoch": 0.5132515420592422, "grad_norm": 4.443661132216519, "learning_rate": 1.0057087619040792e-06, "loss": 1.1062, "step": 3786 }, { "epoch": 0.5133871077069071, "grad_norm": 5.399332636057503, "learning_rate": 1.0052696306071974e-06, "loss": 1.1186, "step": 3787 }, { "epoch": 0.5135226733545719, "grad_norm": 5.9708907717226865, "learning_rate": 1.0048304982941089e-06, "loss": 1.1275, "step": 3788 }, { "epoch": 0.5136582390022368, "grad_norm": 4.3023796847886455, "learning_rate": 1.0043913650494972e-06, "loss": 1.1039, "step": 3789 }, { "epoch": 0.5137938046499018, "grad_norm": 8.924863573167142, "learning_rate": 1.0039522309580453e-06, "loss": 1.1082, "step": 3790 }, { "epoch": 0.5139293702975666, "grad_norm": 13.530611578701405, "learning_rate": 1.003513096104437e-06, "loss": 1.1069, "step": 3791 }, { "epoch": 0.5140649359452315, "grad_norm": 7.021541743986714, "learning_rate": 1.0030739605733557e-06, "loss": 1.1443, "step": 3792 }, { "epoch": 0.5142005015928963, "grad_norm": 6.898661719889462, "learning_rate": 1.0026348244494853e-06, "loss": 1.1162, "step": 3793 }, { "epoch": 0.5143360672405612, "grad_norm": 5.825715659399988, "learning_rate": 1.0021956878175099e-06, "loss": 1.1032, "step": 3794 }, { "epoch": 0.5144716328882262, "grad_norm": 6.631355940232556, "learning_rate": 1.0017565507621135e-06, "loss": 1.1058, "step": 3795 }, { "epoch": 0.514607198535891, "grad_norm": 8.507101248314479, "learning_rate": 1.0013174133679801e-06, "loss": 1.1464, "step": 3796 }, { "epoch": 0.5147427641835559, "grad_norm": 5.947149006905551, "learning_rate": 1.0008782757197939e-06, "loss": 1.1576, "step": 3797 }, { "epoch": 0.5148783298312207, "grad_norm": 6.424323493348252, "learning_rate": 1.000439137902239e-06, "loss": 1.131, "step": 3798 }, { "epoch": 0.5150138954788857, "grad_norm": 5.350606021657063, "learning_rate": 1e-06, "loss": 1.1071, "step": 3799 }, { "epoch": 0.5151494611265506, "grad_norm": 7.839109922421489, "learning_rate": 9.995608620977612e-07, "loss": 1.1111, "step": 3800 }, { "epoch": 0.5152850267742154, "grad_norm": 8.260855364320784, "learning_rate": 9.991217242802063e-07, "loss": 1.1355, "step": 3801 }, { "epoch": 0.5154205924218803, "grad_norm": 4.185599737231145, "learning_rate": 9.986825866320202e-07, "loss": 1.1348, "step": 3802 }, { "epoch": 0.5155561580695451, "grad_norm": 11.358608010497905, "learning_rate": 9.982434492378864e-07, "loss": 1.1555, "step": 3803 }, { "epoch": 0.5156917237172101, "grad_norm": 5.623020762160677, "learning_rate": 9.978043121824903e-07, "loss": 1.0942, "step": 3804 }, { "epoch": 0.515827289364875, "grad_norm": 4.9134392819981505, "learning_rate": 9.973651755505146e-07, "loss": 1.0765, "step": 3805 }, { "epoch": 0.5159628550125398, "grad_norm": 7.0005172609738, "learning_rate": 9.969260394266446e-07, "loss": 1.1462, "step": 3806 }, { "epoch": 0.5160984206602047, "grad_norm": 5.096259795205497, "learning_rate": 9.96486903895563e-07, "loss": 1.1325, "step": 3807 }, { "epoch": 0.5162339863078695, "grad_norm": 5.864632949984304, "learning_rate": 9.960477690419548e-07, "loss": 1.105, "step": 3808 }, { "epoch": 0.5163695519555345, "grad_norm": 4.092569929209837, "learning_rate": 9.956086349505027e-07, "loss": 1.1118, "step": 3809 }, { "epoch": 0.5165051176031994, "grad_norm": 6.235595277950917, "learning_rate": 9.95169501705891e-07, "loss": 1.0879, "step": 3810 }, { "epoch": 0.5166406832508642, "grad_norm": 6.568099497885109, "learning_rate": 9.947303693928026e-07, "loss": 1.1225, "step": 3811 }, { "epoch": 0.5167762488985291, "grad_norm": 6.866777647749745, "learning_rate": 9.94291238095921e-07, "loss": 1.1307, "step": 3812 }, { "epoch": 0.516911814546194, "grad_norm": 6.888211821201387, "learning_rate": 9.938521078999288e-07, "loss": 1.1091, "step": 3813 }, { "epoch": 0.5170473801938589, "grad_norm": 5.151107703136978, "learning_rate": 9.934129788895093e-07, "loss": 1.1042, "step": 3814 }, { "epoch": 0.5171829458415238, "grad_norm": 4.887432232597712, "learning_rate": 9.92973851149345e-07, "loss": 1.15, "step": 3815 }, { "epoch": 0.5173185114891886, "grad_norm": 16.336751543121146, "learning_rate": 9.92534724764118e-07, "loss": 1.1058, "step": 3816 }, { "epoch": 0.5174540771368535, "grad_norm": 4.685328463185273, "learning_rate": 9.920955998185102e-07, "loss": 1.0837, "step": 3817 }, { "epoch": 0.5175896427845184, "grad_norm": 9.386245305400834, "learning_rate": 9.916564763972035e-07, "loss": 1.1375, "step": 3818 }, { "epoch": 0.5177252084321833, "grad_norm": 43.581450265434505, "learning_rate": 9.912173545848796e-07, "loss": 1.0796, "step": 3819 }, { "epoch": 0.5178607740798482, "grad_norm": 7.380162890828864, "learning_rate": 9.907782344662194e-07, "loss": 1.1189, "step": 3820 }, { "epoch": 0.517996339727513, "grad_norm": 6.529401126008311, "learning_rate": 9.903391161259043e-07, "loss": 1.1222, "step": 3821 }, { "epoch": 0.518131905375178, "grad_norm": 3.8172178231261165, "learning_rate": 9.898999996486137e-07, "loss": 1.0936, "step": 3822 }, { "epoch": 0.5182674710228428, "grad_norm": 5.525356109835668, "learning_rate": 9.894608851190292e-07, "loss": 1.0827, "step": 3823 }, { "epoch": 0.5184030366705077, "grad_norm": 4.631611638831732, "learning_rate": 9.890217726218293e-07, "loss": 1.1136, "step": 3824 }, { "epoch": 0.5185386023181726, "grad_norm": 4.293238301472097, "learning_rate": 9.885826622416942e-07, "loss": 1.1432, "step": 3825 }, { "epoch": 0.5186741679658374, "grad_norm": 7.24006919591761, "learning_rate": 9.88143554063302e-07, "loss": 1.1333, "step": 3826 }, { "epoch": 0.5188097336135024, "grad_norm": 7.680655750316747, "learning_rate": 9.877044481713327e-07, "loss": 1.1143, "step": 3827 }, { "epoch": 0.5189452992611672, "grad_norm": 11.425971259820605, "learning_rate": 9.872653446504632e-07, "loss": 1.0757, "step": 3828 }, { "epoch": 0.5190808649088321, "grad_norm": 5.721549270110444, "learning_rate": 9.86826243585371e-07, "loss": 1.1425, "step": 3829 }, { "epoch": 0.519216430556497, "grad_norm": 5.693925646302662, "learning_rate": 9.863871450607342e-07, "loss": 1.1143, "step": 3830 }, { "epoch": 0.5193519962041618, "grad_norm": 4.417227842836063, "learning_rate": 9.859480491612288e-07, "loss": 1.1408, "step": 3831 }, { "epoch": 0.5194875618518268, "grad_norm": 5.547246843247154, "learning_rate": 9.855089559715314e-07, "loss": 1.1054, "step": 3832 }, { "epoch": 0.5196231274994916, "grad_norm": 4.943839041704478, "learning_rate": 9.850698655763171e-07, "loss": 1.1106, "step": 3833 }, { "epoch": 0.5197586931471565, "grad_norm": 6.5017513770335125, "learning_rate": 9.846307780602619e-07, "loss": 1.0929, "step": 3834 }, { "epoch": 0.5198942587948214, "grad_norm": 4.408611738209155, "learning_rate": 9.841916935080392e-07, "loss": 1.1173, "step": 3835 }, { "epoch": 0.5200298244424862, "grad_norm": 5.494098882817993, "learning_rate": 9.837526120043242e-07, "loss": 1.1067, "step": 3836 }, { "epoch": 0.5201653900901512, "grad_norm": 12.24166362557923, "learning_rate": 9.833135336337893e-07, "loss": 1.0904, "step": 3837 }, { "epoch": 0.520300955737816, "grad_norm": 6.314830772747964, "learning_rate": 9.82874458481108e-07, "loss": 1.1061, "step": 3838 }, { "epoch": 0.5204365213854809, "grad_norm": 12.802552059685942, "learning_rate": 9.82435386630952e-07, "loss": 1.1496, "step": 3839 }, { "epoch": 0.5205720870331458, "grad_norm": 8.761198395091442, "learning_rate": 9.819963181679934e-07, "loss": 1.0955, "step": 3840 }, { "epoch": 0.5207076526808107, "grad_norm": 4.283212387236299, "learning_rate": 9.81557253176902e-07, "loss": 1.0778, "step": 3841 }, { "epoch": 0.5208432183284756, "grad_norm": 5.129446086076453, "learning_rate": 9.811181917423495e-07, "loss": 1.1115, "step": 3842 }, { "epoch": 0.5209787839761404, "grad_norm": 4.962491406811547, "learning_rate": 9.806791339490047e-07, "loss": 1.079, "step": 3843 }, { "epoch": 0.5211143496238053, "grad_norm": 7.293024634665239, "learning_rate": 9.802400798815357e-07, "loss": 1.139, "step": 3844 }, { "epoch": 0.5212499152714702, "grad_norm": 4.959008325907351, "learning_rate": 9.79801029624612e-07, "loss": 1.0816, "step": 3845 }, { "epoch": 0.5213854809191351, "grad_norm": 18.28828646695921, "learning_rate": 9.793619832629001e-07, "loss": 1.1498, "step": 3846 }, { "epoch": 0.5215210465668, "grad_norm": 13.230533443538892, "learning_rate": 9.789229408810668e-07, "loss": 1.1036, "step": 3847 }, { "epoch": 0.5216566122144649, "grad_norm": 5.384213273810483, "learning_rate": 9.784839025637778e-07, "loss": 1.1184, "step": 3848 }, { "epoch": 0.5217921778621297, "grad_norm": 7.5699540156891905, "learning_rate": 9.780448683956983e-07, "loss": 1.1271, "step": 3849 }, { "epoch": 0.5219277435097947, "grad_norm": 5.57920415081436, "learning_rate": 9.77605838461493e-07, "loss": 1.1517, "step": 3850 }, { "epoch": 0.5220633091574595, "grad_norm": 5.611534676095274, "learning_rate": 9.771668128458251e-07, "loss": 1.0685, "step": 3851 }, { "epoch": 0.5221988748051244, "grad_norm": 5.284895024221364, "learning_rate": 9.767277916333564e-07, "loss": 1.0926, "step": 3852 }, { "epoch": 0.5223344404527893, "grad_norm": 5.867060705658198, "learning_rate": 9.762887749087501e-07, "loss": 1.1344, "step": 3853 }, { "epoch": 0.5224700061004541, "grad_norm": 4.769052774423156, "learning_rate": 9.758497627566657e-07, "loss": 1.132, "step": 3854 }, { "epoch": 0.5226055717481191, "grad_norm": 7.591868195270348, "learning_rate": 9.754107552617645e-07, "loss": 1.1514, "step": 3855 }, { "epoch": 0.5227411373957839, "grad_norm": 7.716933100904313, "learning_rate": 9.749717525087051e-07, "loss": 1.1209, "step": 3856 }, { "epoch": 0.5228767030434488, "grad_norm": 5.524117437493975, "learning_rate": 9.745327545821452e-07, "loss": 1.1212, "step": 3857 }, { "epoch": 0.5230122686911137, "grad_norm": 4.5377046295637244, "learning_rate": 9.74093761566743e-07, "loss": 1.0756, "step": 3858 }, { "epoch": 0.5231478343387785, "grad_norm": 4.1171263837411765, "learning_rate": 9.736547735471539e-07, "loss": 1.1101, "step": 3859 }, { "epoch": 0.5232833999864435, "grad_norm": 6.721930362062448, "learning_rate": 9.732157906080343e-07, "loss": 1.1612, "step": 3860 }, { "epoch": 0.5234189656341083, "grad_norm": 5.89813354975927, "learning_rate": 9.727768128340375e-07, "loss": 1.1144, "step": 3861 }, { "epoch": 0.5235545312817732, "grad_norm": 9.115174372331476, "learning_rate": 9.72337840309818e-07, "loss": 1.1333, "step": 3862 }, { "epoch": 0.5236900969294381, "grad_norm": 4.704676429577216, "learning_rate": 9.718988731200271e-07, "loss": 1.1311, "step": 3863 }, { "epoch": 0.523825662577103, "grad_norm": 9.452931596536038, "learning_rate": 9.714599113493171e-07, "loss": 1.1225, "step": 3864 }, { "epoch": 0.5239612282247679, "grad_norm": 6.333653204937084, "learning_rate": 9.710209550823375e-07, "loss": 1.1215, "step": 3865 }, { "epoch": 0.5240967938724327, "grad_norm": 12.452451797521357, "learning_rate": 9.705820044037387e-07, "loss": 1.1187, "step": 3866 }, { "epoch": 0.5242323595200976, "grad_norm": 6.692892581745013, "learning_rate": 9.701430593981674e-07, "loss": 1.1376, "step": 3867 }, { "epoch": 0.5243679251677625, "grad_norm": 6.910073705081862, "learning_rate": 9.697041201502718e-07, "loss": 1.189, "step": 3868 }, { "epoch": 0.5245034908154274, "grad_norm": 7.005750220607018, "learning_rate": 9.692651867446973e-07, "loss": 1.1401, "step": 3869 }, { "epoch": 0.5246390564630923, "grad_norm": 6.151045483596463, "learning_rate": 9.688262592660893e-07, "loss": 1.0931, "step": 3870 }, { "epoch": 0.5247746221107571, "grad_norm": 5.05790975429405, "learning_rate": 9.68387337799091e-07, "loss": 1.1119, "step": 3871 }, { "epoch": 0.524910187758422, "grad_norm": 10.387571595654252, "learning_rate": 9.679484224283447e-07, "loss": 1.1566, "step": 3872 }, { "epoch": 0.525045753406087, "grad_norm": 13.795944713762914, "learning_rate": 9.675095132384927e-07, "loss": 1.101, "step": 3873 }, { "epoch": 0.5251813190537518, "grad_norm": 5.9663920523337906, "learning_rate": 9.67070610314174e-07, "loss": 1.0937, "step": 3874 }, { "epoch": 0.5253168847014167, "grad_norm": 5.721373786650148, "learning_rate": 9.666317137400287e-07, "loss": 1.1097, "step": 3875 }, { "epoch": 0.5254524503490815, "grad_norm": 13.57530773895992, "learning_rate": 9.661928236006936e-07, "loss": 1.0559, "step": 3876 }, { "epoch": 0.5255880159967464, "grad_norm": 4.791792801949953, "learning_rate": 9.65753939980806e-07, "loss": 1.1752, "step": 3877 }, { "epoch": 0.5257235816444114, "grad_norm": 31.59582124325184, "learning_rate": 9.653150629650004e-07, "loss": 1.0909, "step": 3878 }, { "epoch": 0.5258591472920762, "grad_norm": 4.33609776432061, "learning_rate": 9.648761926379112e-07, "loss": 1.1281, "step": 3879 }, { "epoch": 0.5259947129397411, "grad_norm": 5.257371489433959, "learning_rate": 9.644373290841712e-07, "loss": 1.1206, "step": 3880 }, { "epoch": 0.5261302785874059, "grad_norm": 8.415202755759516, "learning_rate": 9.639984723884112e-07, "loss": 1.1133, "step": 3881 }, { "epoch": 0.5262658442350708, "grad_norm": 7.567253076514783, "learning_rate": 9.635596226352618e-07, "loss": 1.1387, "step": 3882 }, { "epoch": 0.5264014098827358, "grad_norm": 4.8177334964658955, "learning_rate": 9.63120779909352e-07, "loss": 1.1273, "step": 3883 }, { "epoch": 0.5265369755304006, "grad_norm": 3.884635553929665, "learning_rate": 9.626819442953081e-07, "loss": 1.1307, "step": 3884 }, { "epoch": 0.5266725411780655, "grad_norm": 4.556677999305315, "learning_rate": 9.622431158777568e-07, "loss": 1.1135, "step": 3885 }, { "epoch": 0.5268081068257303, "grad_norm": 4.890687886083745, "learning_rate": 9.618042947413228e-07, "loss": 1.1418, "step": 3886 }, { "epoch": 0.5269436724733952, "grad_norm": 5.217123588434144, "learning_rate": 9.613654809706288e-07, "loss": 1.1035, "step": 3887 }, { "epoch": 0.5270792381210602, "grad_norm": 5.270801018592838, "learning_rate": 9.60926674650297e-07, "loss": 1.1111, "step": 3888 }, { "epoch": 0.527214803768725, "grad_norm": 4.346435677362307, "learning_rate": 9.604878758649472e-07, "loss": 1.1104, "step": 3889 }, { "epoch": 0.5273503694163899, "grad_norm": 5.477614699353106, "learning_rate": 9.60049084699199e-07, "loss": 1.0708, "step": 3890 }, { "epoch": 0.5274859350640547, "grad_norm": 5.102300500736201, "learning_rate": 9.596103012376695e-07, "loss": 1.0721, "step": 3891 }, { "epoch": 0.5276215007117196, "grad_norm": 4.078683013758137, "learning_rate": 9.591715255649746e-07, "loss": 1.1149, "step": 3892 }, { "epoch": 0.5277570663593846, "grad_norm": 4.606538829733657, "learning_rate": 9.587327577657283e-07, "loss": 1.1267, "step": 3893 }, { "epoch": 0.5278926320070494, "grad_norm": 5.880330836515382, "learning_rate": 9.582939979245444e-07, "loss": 1.1377, "step": 3894 }, { "epoch": 0.5280281976547143, "grad_norm": 13.064680820677173, "learning_rate": 9.578552461260335e-07, "loss": 1.1124, "step": 3895 }, { "epoch": 0.5281637633023791, "grad_norm": 8.556622272254458, "learning_rate": 9.57416502454806e-07, "loss": 1.1196, "step": 3896 }, { "epoch": 0.5282993289500441, "grad_norm": 4.375733639437992, "learning_rate": 9.569777669954693e-07, "loss": 1.1376, "step": 3897 }, { "epoch": 0.528434894597709, "grad_norm": 19.13107319207703, "learning_rate": 9.565390398326312e-07, "loss": 1.1092, "step": 3898 }, { "epoch": 0.5285704602453738, "grad_norm": 5.744304707410615, "learning_rate": 9.561003210508963e-07, "loss": 1.1198, "step": 3899 }, { "epoch": 0.5287060258930387, "grad_norm": 3.9202397089930523, "learning_rate": 9.556616107348675e-07, "loss": 1.0961, "step": 3900 }, { "epoch": 0.5288415915407035, "grad_norm": 5.418991816120298, "learning_rate": 9.552229089691474e-07, "loss": 1.1011, "step": 3901 }, { "epoch": 0.5289771571883685, "grad_norm": 4.74499836501787, "learning_rate": 9.547842158383354e-07, "loss": 1.0987, "step": 3902 }, { "epoch": 0.5291127228360334, "grad_norm": 5.285412975249498, "learning_rate": 9.54345531427031e-07, "loss": 1.1093, "step": 3903 }, { "epoch": 0.5292482884836982, "grad_norm": 7.454780002365955, "learning_rate": 9.539068558198301e-07, "loss": 1.1504, "step": 3904 }, { "epoch": 0.5293838541313631, "grad_norm": 5.610207723613894, "learning_rate": 9.534681891013286e-07, "loss": 1.1267, "step": 3905 }, { "epoch": 0.5295194197790279, "grad_norm": 5.430940802341011, "learning_rate": 9.530295313561192e-07, "loss": 1.1205, "step": 3906 }, { "epoch": 0.5296549854266929, "grad_norm": 9.160503717314665, "learning_rate": 9.525908826687943e-07, "loss": 1.1513, "step": 3907 }, { "epoch": 0.5297905510743578, "grad_norm": 5.3601851770510445, "learning_rate": 9.521522431239429e-07, "loss": 1.1111, "step": 3908 }, { "epoch": 0.5299261167220226, "grad_norm": 5.54741508637081, "learning_rate": 9.517136128061543e-07, "loss": 1.0943, "step": 3909 }, { "epoch": 0.5300616823696875, "grad_norm": 5.644115967092427, "learning_rate": 9.51274991800014e-07, "loss": 1.0833, "step": 3910 }, { "epoch": 0.5301972480173524, "grad_norm": 7.250262558734776, "learning_rate": 9.508363801901069e-07, "loss": 1.1226, "step": 3911 }, { "epoch": 0.5303328136650173, "grad_norm": 6.10214441282272, "learning_rate": 9.50397778061016e-07, "loss": 1.1142, "step": 3912 }, { "epoch": 0.5304683793126822, "grad_norm": 7.5369276006866555, "learning_rate": 9.49959185497322e-07, "loss": 1.0556, "step": 3913 }, { "epoch": 0.530603944960347, "grad_norm": 8.59006923816616, "learning_rate": 9.49520602583604e-07, "loss": 1.1163, "step": 3914 }, { "epoch": 0.5307395106080119, "grad_norm": 6.175213615681629, "learning_rate": 9.490820294044394e-07, "loss": 1.1045, "step": 3915 }, { "epoch": 0.5308750762556768, "grad_norm": 5.579641015968148, "learning_rate": 9.486434660444034e-07, "loss": 1.1573, "step": 3916 }, { "epoch": 0.5310106419033417, "grad_norm": 4.848337411169277, "learning_rate": 9.482049125880697e-07, "loss": 1.1425, "step": 3917 }, { "epoch": 0.5311462075510066, "grad_norm": 4.373379836764076, "learning_rate": 9.477663691200099e-07, "loss": 1.103, "step": 3918 }, { "epoch": 0.5312817731986714, "grad_norm": 12.932002191337391, "learning_rate": 9.47327835724793e-07, "loss": 1.0846, "step": 3919 }, { "epoch": 0.5314173388463364, "grad_norm": 4.119788643969644, "learning_rate": 9.468893124869878e-07, "loss": 1.1226, "step": 3920 }, { "epoch": 0.5315529044940012, "grad_norm": 4.53301708305036, "learning_rate": 9.464507994911589e-07, "loss": 1.1089, "step": 3921 }, { "epoch": 0.5316884701416661, "grad_norm": 7.716025816365177, "learning_rate": 9.460122968218711e-07, "loss": 1.0991, "step": 3922 }, { "epoch": 0.531824035789331, "grad_norm": 3.7234213019794407, "learning_rate": 9.455738045636853e-07, "loss": 1.0881, "step": 3923 }, { "epoch": 0.5319596014369958, "grad_norm": 6.393232052663243, "learning_rate": 9.451353228011622e-07, "loss": 1.0975, "step": 3924 }, { "epoch": 0.5320951670846608, "grad_norm": 22.188801977321457, "learning_rate": 9.446968516188584e-07, "loss": 1.0908, "step": 3925 }, { "epoch": 0.5322307327323256, "grad_norm": 5.45495622352779, "learning_rate": 9.442583911013308e-07, "loss": 1.076, "step": 3926 }, { "epoch": 0.5323662983799905, "grad_norm": 15.372564668410954, "learning_rate": 9.438199413331323e-07, "loss": 1.1074, "step": 3927 }, { "epoch": 0.5325018640276554, "grad_norm": 4.111626344638642, "learning_rate": 9.433815023988144e-07, "loss": 1.1369, "step": 3928 }, { "epoch": 0.5326374296753202, "grad_norm": 4.696350184296592, "learning_rate": 9.429430743829272e-07, "loss": 1.0801, "step": 3929 }, { "epoch": 0.5327729953229852, "grad_norm": 5.959045454918979, "learning_rate": 9.425046573700174e-07, "loss": 1.0668, "step": 3930 }, { "epoch": 0.5329085609706501, "grad_norm": 4.9168895994727295, "learning_rate": 9.420662514446309e-07, "loss": 1.1098, "step": 3931 }, { "epoch": 0.5330441266183149, "grad_norm": 6.403005610628977, "learning_rate": 9.4162785669131e-07, "loss": 1.1005, "step": 3932 }, { "epoch": 0.5331796922659798, "grad_norm": 6.062179367868134, "learning_rate": 9.411894731945968e-07, "loss": 1.1556, "step": 3933 }, { "epoch": 0.5333152579136446, "grad_norm": 4.760542752438013, "learning_rate": 9.40751101039029e-07, "loss": 1.1282, "step": 3934 }, { "epoch": 0.5334508235613096, "grad_norm": 6.190219754499008, "learning_rate": 9.403127403091441e-07, "loss": 1.1158, "step": 3935 }, { "epoch": 0.5335863892089745, "grad_norm": 4.7325913178489705, "learning_rate": 9.398743910894755e-07, "loss": 1.1443, "step": 3936 }, { "epoch": 0.5337219548566393, "grad_norm": 4.967086382684331, "learning_rate": 9.394360534645566e-07, "loss": 1.0992, "step": 3937 }, { "epoch": 0.5338575205043042, "grad_norm": 5.48577583396416, "learning_rate": 9.389977275189163e-07, "loss": 1.1106, "step": 3938 }, { "epoch": 0.533993086151969, "grad_norm": 9.04170736567127, "learning_rate": 9.38559413337083e-07, "loss": 1.132, "step": 3939 }, { "epoch": 0.534128651799634, "grad_norm": 4.760624218160553, "learning_rate": 9.381211110035819e-07, "loss": 1.1347, "step": 3940 }, { "epoch": 0.5342642174472989, "grad_norm": 6.088435089582164, "learning_rate": 9.376828206029358e-07, "loss": 1.1012, "step": 3941 }, { "epoch": 0.5343997830949637, "grad_norm": 6.157536997480395, "learning_rate": 9.372445422196662e-07, "loss": 1.0959, "step": 3942 }, { "epoch": 0.5345353487426286, "grad_norm": 4.452336664139838, "learning_rate": 9.368062759382908e-07, "loss": 1.1162, "step": 3943 }, { "epoch": 0.5346709143902935, "grad_norm": 4.349188653434796, "learning_rate": 9.363680218433267e-07, "loss": 1.1449, "step": 3944 }, { "epoch": 0.5348064800379584, "grad_norm": 6.533786407106352, "learning_rate": 9.359297800192871e-07, "loss": 1.1271, "step": 3945 }, { "epoch": 0.5349420456856233, "grad_norm": 6.061972771893232, "learning_rate": 9.354915505506838e-07, "loss": 1.129, "step": 3946 }, { "epoch": 0.5350776113332881, "grad_norm": 5.583346809250113, "learning_rate": 9.350533335220256e-07, "loss": 1.1216, "step": 3947 }, { "epoch": 0.535213176980953, "grad_norm": 5.399867461807667, "learning_rate": 9.346151290178195e-07, "loss": 1.1081, "step": 3948 }, { "epoch": 0.5353487426286179, "grad_norm": 5.360616265894125, "learning_rate": 9.341769371225696e-07, "loss": 1.109, "step": 3949 }, { "epoch": 0.5354843082762828, "grad_norm": 4.659130366675941, "learning_rate": 9.337387579207779e-07, "loss": 1.091, "step": 3950 }, { "epoch": 0.5356198739239477, "grad_norm": 4.537709331187187, "learning_rate": 9.333005914969434e-07, "loss": 1.1165, "step": 3951 }, { "epoch": 0.5357554395716125, "grad_norm": 9.39444670191508, "learning_rate": 9.328624379355639e-07, "loss": 1.0967, "step": 3952 }, { "epoch": 0.5358910052192775, "grad_norm": 19.04065463937581, "learning_rate": 9.324242973211326e-07, "loss": 1.0651, "step": 3953 }, { "epoch": 0.5360265708669423, "grad_norm": 4.389604575682065, "learning_rate": 9.319861697381427e-07, "loss": 1.116, "step": 3954 }, { "epoch": 0.5361621365146072, "grad_norm": 7.358212553304114, "learning_rate": 9.315480552710832e-07, "loss": 1.0474, "step": 3955 }, { "epoch": 0.5362977021622721, "grad_norm": 9.243565285692934, "learning_rate": 9.311099540044402e-07, "loss": 1.1086, "step": 3956 }, { "epoch": 0.5364332678099369, "grad_norm": 7.445106227342863, "learning_rate": 9.306718660226996e-07, "loss": 1.1342, "step": 3957 }, { "epoch": 0.5365688334576019, "grad_norm": 8.950349302159067, "learning_rate": 9.302337914103416e-07, "loss": 1.1353, "step": 3958 }, { "epoch": 0.5367043991052667, "grad_norm": 4.706450960715733, "learning_rate": 9.297957302518469e-07, "loss": 1.1818, "step": 3959 }, { "epoch": 0.5368399647529316, "grad_norm": 4.463301884534137, "learning_rate": 9.293576826316909e-07, "loss": 1.1087, "step": 3960 }, { "epoch": 0.5369755304005965, "grad_norm": 7.6948706868681525, "learning_rate": 9.289196486343487e-07, "loss": 1.1063, "step": 3961 }, { "epoch": 0.5371110960482613, "grad_norm": 5.318406153646406, "learning_rate": 9.284816283442907e-07, "loss": 1.1217, "step": 3962 }, { "epoch": 0.5372466616959263, "grad_norm": 4.606101468599675, "learning_rate": 9.280436218459866e-07, "loss": 1.1088, "step": 3963 }, { "epoch": 0.5373822273435911, "grad_norm": 17.046254502597748, "learning_rate": 9.276056292239016e-07, "loss": 1.125, "step": 3964 }, { "epoch": 0.537517792991256, "grad_norm": 5.250653329639084, "learning_rate": 9.271676505625e-07, "loss": 1.0993, "step": 3965 }, { "epoch": 0.5376533586389209, "grad_norm": 5.364296622542373, "learning_rate": 9.267296859462416e-07, "loss": 1.1035, "step": 3966 }, { "epoch": 0.5377889242865858, "grad_norm": 5.597005860494038, "learning_rate": 9.262917354595854e-07, "loss": 1.1451, "step": 3967 }, { "epoch": 0.5379244899342507, "grad_norm": 6.088410753792883, "learning_rate": 9.258537991869861e-07, "loss": 1.0998, "step": 3968 }, { "epoch": 0.5380600555819155, "grad_norm": 5.4931132761594315, "learning_rate": 9.254158772128961e-07, "loss": 1.1059, "step": 3969 }, { "epoch": 0.5381956212295804, "grad_norm": 6.016285102377604, "learning_rate": 9.249779696217658e-07, "loss": 1.1456, "step": 3970 }, { "epoch": 0.5383311868772453, "grad_norm": 6.682058116742477, "learning_rate": 9.245400764980413e-07, "loss": 1.0729, "step": 3971 }, { "epoch": 0.5384667525249102, "grad_norm": 7.6189054075027105, "learning_rate": 9.241021979261681e-07, "loss": 1.129, "step": 3972 }, { "epoch": 0.5386023181725751, "grad_norm": 7.840210404594059, "learning_rate": 9.236643339905863e-07, "loss": 1.1041, "step": 3973 }, { "epoch": 0.5387378838202399, "grad_norm": 5.20102037895342, "learning_rate": 9.232264847757356e-07, "loss": 1.1389, "step": 3974 }, { "epoch": 0.5388734494679048, "grad_norm": 5.221348487376429, "learning_rate": 9.227886503660509e-07, "loss": 1.0823, "step": 3975 }, { "epoch": 0.5390090151155698, "grad_norm": 4.739409830098908, "learning_rate": 9.223508308459659e-07, "loss": 1.1152, "step": 3976 }, { "epoch": 0.5391445807632346, "grad_norm": 6.851630189774068, "learning_rate": 9.219130262999101e-07, "loss": 1.0999, "step": 3977 }, { "epoch": 0.5392801464108995, "grad_norm": 5.525096138831882, "learning_rate": 9.214752368123107e-07, "loss": 1.1367, "step": 3978 }, { "epoch": 0.5394157120585643, "grad_norm": 9.303736456137395, "learning_rate": 9.21037462467592e-07, "loss": 1.099, "step": 3979 }, { "epoch": 0.5395512777062292, "grad_norm": 5.604500437815634, "learning_rate": 9.205997033501756e-07, "loss": 1.093, "step": 3980 }, { "epoch": 0.5396868433538942, "grad_norm": 5.798970997885447, "learning_rate": 9.201619595444795e-07, "loss": 1.1465, "step": 3981 }, { "epoch": 0.539822409001559, "grad_norm": 5.113727690430266, "learning_rate": 9.197242311349195e-07, "loss": 1.1073, "step": 3982 }, { "epoch": 0.5399579746492239, "grad_norm": 3.791825691913971, "learning_rate": 9.192865182059077e-07, "loss": 1.1012, "step": 3983 }, { "epoch": 0.5400935402968887, "grad_norm": 5.71561009124482, "learning_rate": 9.188488208418538e-07, "loss": 1.0762, "step": 3984 }, { "epoch": 0.5402291059445536, "grad_norm": 4.491483828275089, "learning_rate": 9.184111391271642e-07, "loss": 1.0924, "step": 3985 }, { "epoch": 0.5403646715922186, "grad_norm": 4.927436381800349, "learning_rate": 9.179734731462423e-07, "loss": 1.1, "step": 3986 }, { "epoch": 0.5405002372398834, "grad_norm": 4.383656468525499, "learning_rate": 9.175358229834888e-07, "loss": 1.0645, "step": 3987 }, { "epoch": 0.5406358028875483, "grad_norm": 5.44631894368923, "learning_rate": 9.170981887233007e-07, "loss": 1.1294, "step": 3988 }, { "epoch": 0.5407713685352131, "grad_norm": 27.40640122160061, "learning_rate": 9.166605704500728e-07, "loss": 1.0828, "step": 3989 }, { "epoch": 0.540906934182878, "grad_norm": 4.720641628131505, "learning_rate": 9.162229682481957e-07, "loss": 1.1076, "step": 3990 }, { "epoch": 0.541042499830543, "grad_norm": 4.59828558212007, "learning_rate": 9.157853822020582e-07, "loss": 1.1279, "step": 3991 }, { "epoch": 0.5411780654782078, "grad_norm": 9.037391467491117, "learning_rate": 9.153478123960446e-07, "loss": 1.1032, "step": 3992 }, { "epoch": 0.5413136311258727, "grad_norm": 8.779450822398305, "learning_rate": 9.149102589145376e-07, "loss": 1.1349, "step": 3993 }, { "epoch": 0.5414491967735375, "grad_norm": 6.979286651546144, "learning_rate": 9.144727218419151e-07, "loss": 1.1368, "step": 3994 }, { "epoch": 0.5415847624212025, "grad_norm": 6.419909125692768, "learning_rate": 9.140352012625536e-07, "loss": 1.101, "step": 3995 }, { "epoch": 0.5417203280688674, "grad_norm": 6.482409466522455, "learning_rate": 9.135976972608248e-07, "loss": 1.0994, "step": 3996 }, { "epoch": 0.5418558937165322, "grad_norm": 8.855180735112882, "learning_rate": 9.131602099210978e-07, "loss": 1.1508, "step": 3997 }, { "epoch": 0.5419914593641971, "grad_norm": 5.931568402134541, "learning_rate": 9.127227393277391e-07, "loss": 1.1251, "step": 3998 }, { "epoch": 0.5421270250118619, "grad_norm": 5.843782371542218, "learning_rate": 9.12285285565111e-07, "loss": 1.1047, "step": 3999 }, { "epoch": 0.5422625906595269, "grad_norm": 4.514643688820371, "learning_rate": 9.118478487175735e-07, "loss": 1.1006, "step": 4000 }, { "epoch": 0.5423981563071918, "grad_norm": 4.91401383475125, "learning_rate": 9.114104288694821e-07, "loss": 1.1279, "step": 4001 }, { "epoch": 0.5425337219548566, "grad_norm": 8.747825493841034, "learning_rate": 9.109730261051905e-07, "loss": 1.1185, "step": 4002 }, { "epoch": 0.5426692876025215, "grad_norm": 5.74004567262838, "learning_rate": 9.105356405090479e-07, "loss": 1.1055, "step": 4003 }, { "epoch": 0.5428048532501863, "grad_norm": 10.05530618630074, "learning_rate": 9.100982721654011e-07, "loss": 1.1168, "step": 4004 }, { "epoch": 0.5429404188978513, "grad_norm": 13.130633698573654, "learning_rate": 9.096609211585926e-07, "loss": 1.1166, "step": 4005 }, { "epoch": 0.5430759845455162, "grad_norm": 5.525752144898968, "learning_rate": 9.092235875729627e-07, "loss": 1.1125, "step": 4006 }, { "epoch": 0.543211550193181, "grad_norm": 4.528809139706687, "learning_rate": 9.087862714928471e-07, "loss": 1.0803, "step": 4007 }, { "epoch": 0.5433471158408459, "grad_norm": 7.297817110485821, "learning_rate": 9.083489730025791e-07, "loss": 1.1151, "step": 4008 }, { "epoch": 0.5434826814885109, "grad_norm": 6.305797030436625, "learning_rate": 9.079116921864883e-07, "loss": 1.113, "step": 4009 }, { "epoch": 0.5436182471361757, "grad_norm": 6.343398640854999, "learning_rate": 9.074744291289007e-07, "loss": 1.0952, "step": 4010 }, { "epoch": 0.5437538127838406, "grad_norm": 7.61841635570382, "learning_rate": 9.070371839141393e-07, "loss": 1.0787, "step": 4011 }, { "epoch": 0.5438893784315054, "grad_norm": 6.008705030613505, "learning_rate": 9.065999566265229e-07, "loss": 1.1157, "step": 4012 }, { "epoch": 0.5440249440791703, "grad_norm": 7.811888528413564, "learning_rate": 9.061627473503677e-07, "loss": 1.1514, "step": 4013 }, { "epoch": 0.5441605097268353, "grad_norm": 6.042866722096638, "learning_rate": 9.057255561699859e-07, "loss": 1.1085, "step": 4014 }, { "epoch": 0.5442960753745001, "grad_norm": 11.51874255644579, "learning_rate": 9.052883831696865e-07, "loss": 1.0936, "step": 4015 }, { "epoch": 0.544431641022165, "grad_norm": 6.3598086486378875, "learning_rate": 9.048512284337747e-07, "loss": 1.1122, "step": 4016 }, { "epoch": 0.5445672066698298, "grad_norm": 12.646430796215274, "learning_rate": 9.044140920465529e-07, "loss": 1.1283, "step": 4017 }, { "epoch": 0.5447027723174948, "grad_norm": 10.874028658918581, "learning_rate": 9.039769740923182e-07, "loss": 1.0974, "step": 4018 }, { "epoch": 0.5448383379651597, "grad_norm": 6.058946438158476, "learning_rate": 9.035398746553667e-07, "loss": 1.1014, "step": 4019 }, { "epoch": 0.5449739036128245, "grad_norm": 8.452683923816824, "learning_rate": 9.031027938199884e-07, "loss": 1.1248, "step": 4020 }, { "epoch": 0.5451094692604894, "grad_norm": 4.3279994307364245, "learning_rate": 9.02665731670472e-07, "loss": 1.0975, "step": 4021 }, { "epoch": 0.5452450349081542, "grad_norm": 13.71988252628978, "learning_rate": 9.022286882911005e-07, "loss": 1.1196, "step": 4022 }, { "epoch": 0.5453806005558192, "grad_norm": 7.050354953173232, "learning_rate": 9.01791663766155e-07, "loss": 1.1053, "step": 4023 }, { "epoch": 0.5455161662034841, "grad_norm": 7.063368564259007, "learning_rate": 9.01354658179912e-07, "loss": 1.095, "step": 4024 }, { "epoch": 0.5456517318511489, "grad_norm": 3.8407860242013716, "learning_rate": 9.009176716166442e-07, "loss": 1.0829, "step": 4025 }, { "epoch": 0.5457872974988138, "grad_norm": 3.947190872776258, "learning_rate": 9.004807041606217e-07, "loss": 1.1126, "step": 4026 }, { "epoch": 0.5459228631464786, "grad_norm": 5.44369412382572, "learning_rate": 9.000437558961094e-07, "loss": 1.0666, "step": 4027 }, { "epoch": 0.5460584287941436, "grad_norm": 5.527116017313776, "learning_rate": 8.996068269073701e-07, "loss": 1.1126, "step": 4028 }, { "epoch": 0.5461939944418085, "grad_norm": 4.377407246545431, "learning_rate": 8.991699172786614e-07, "loss": 1.0886, "step": 4029 }, { "epoch": 0.5463295600894733, "grad_norm": 12.997595237687273, "learning_rate": 8.987330270942388e-07, "loss": 1.1566, "step": 4030 }, { "epoch": 0.5464651257371382, "grad_norm": 4.056358531171371, "learning_rate": 8.98296156438352e-07, "loss": 1.1295, "step": 4031 }, { "epoch": 0.546600691384803, "grad_norm": 5.565058071438536, "learning_rate": 8.978593053952492e-07, "loss": 1.1004, "step": 4032 }, { "epoch": 0.546736257032468, "grad_norm": 10.116508340421527, "learning_rate": 8.974224740491725e-07, "loss": 1.1205, "step": 4033 }, { "epoch": 0.5468718226801329, "grad_norm": 14.608477977707519, "learning_rate": 8.969856624843625e-07, "loss": 1.117, "step": 4034 }, { "epoch": 0.5470073883277977, "grad_norm": 14.947507698435224, "learning_rate": 8.965488707850539e-07, "loss": 1.1031, "step": 4035 }, { "epoch": 0.5471429539754626, "grad_norm": 5.733528634111426, "learning_rate": 8.961120990354794e-07, "loss": 1.1279, "step": 4036 }, { "epoch": 0.5472785196231275, "grad_norm": 5.289260115123045, "learning_rate": 8.956753473198662e-07, "loss": 1.1208, "step": 4037 }, { "epoch": 0.5474140852707924, "grad_norm": 4.809404986571882, "learning_rate": 8.952386157224391e-07, "loss": 1.1061, "step": 4038 }, { "epoch": 0.5475496509184573, "grad_norm": 3.9674048084261346, "learning_rate": 8.948019043274181e-07, "loss": 1.1137, "step": 4039 }, { "epoch": 0.5476852165661221, "grad_norm": 6.779039619359498, "learning_rate": 8.943652132190189e-07, "loss": 1.1126, "step": 4040 }, { "epoch": 0.547820782213787, "grad_norm": 4.3116092462233935, "learning_rate": 8.939285424814551e-07, "loss": 1.1057, "step": 4041 }, { "epoch": 0.5479563478614519, "grad_norm": 6.059821026417018, "learning_rate": 8.934918921989341e-07, "loss": 1.1035, "step": 4042 }, { "epoch": 0.5480919135091168, "grad_norm": 17.222134922133236, "learning_rate": 8.930552624556615e-07, "loss": 1.1106, "step": 4043 }, { "epoch": 0.5482274791567817, "grad_norm": 5.506961765582111, "learning_rate": 8.92618653335837e-07, "loss": 1.1132, "step": 4044 }, { "epoch": 0.5483630448044465, "grad_norm": 7.7675732059334806, "learning_rate": 8.921820649236576e-07, "loss": 1.0926, "step": 4045 }, { "epoch": 0.5484986104521115, "grad_norm": 7.943412463803645, "learning_rate": 8.917454973033161e-07, "loss": 1.0983, "step": 4046 }, { "epoch": 0.5486341760997763, "grad_norm": 8.045630827725065, "learning_rate": 8.913089505590007e-07, "loss": 1.12, "step": 4047 }, { "epoch": 0.5487697417474412, "grad_norm": 5.627081636442883, "learning_rate": 8.908724247748963e-07, "loss": 1.1141, "step": 4048 }, { "epoch": 0.5489053073951061, "grad_norm": 6.430107452029901, "learning_rate": 8.904359200351837e-07, "loss": 1.1498, "step": 4049 }, { "epoch": 0.5490408730427709, "grad_norm": 6.136519699138342, "learning_rate": 8.899994364240385e-07, "loss": 1.093, "step": 4050 }, { "epoch": 0.5491764386904359, "grad_norm": 4.894001403585023, "learning_rate": 8.895629740256343e-07, "loss": 1.0844, "step": 4051 }, { "epoch": 0.5493120043381007, "grad_norm": 11.641784301360106, "learning_rate": 8.891265329241387e-07, "loss": 1.1552, "step": 4052 }, { "epoch": 0.5494475699857656, "grad_norm": 7.5042968453059045, "learning_rate": 8.886901132037155e-07, "loss": 1.1258, "step": 4053 }, { "epoch": 0.5495831356334305, "grad_norm": 5.394188533361613, "learning_rate": 8.88253714948526e-07, "loss": 1.127, "step": 4054 }, { "epoch": 0.5497187012810953, "grad_norm": 4.412920780132943, "learning_rate": 8.87817338242725e-07, "loss": 1.1415, "step": 4055 }, { "epoch": 0.5498542669287603, "grad_norm": 10.883120083813564, "learning_rate": 8.873809831704652e-07, "loss": 1.1242, "step": 4056 }, { "epoch": 0.5499898325764251, "grad_norm": 8.41336264948205, "learning_rate": 8.869446498158935e-07, "loss": 1.1028, "step": 4057 }, { "epoch": 0.55012539822409, "grad_norm": 4.790022848984992, "learning_rate": 8.865083382631539e-07, "loss": 1.116, "step": 4058 }, { "epoch": 0.5502609638717549, "grad_norm": 4.5913152186305615, "learning_rate": 8.860720485963851e-07, "loss": 1.1246, "step": 4059 }, { "epoch": 0.5503965295194198, "grad_norm": 5.255261514273365, "learning_rate": 8.856357808997229e-07, "loss": 1.1312, "step": 4060 }, { "epoch": 0.5505320951670847, "grad_norm": 4.698896366384761, "learning_rate": 8.851995352572972e-07, "loss": 1.1174, "step": 4061 }, { "epoch": 0.5506676608147495, "grad_norm": 5.074620544070532, "learning_rate": 8.847633117532353e-07, "loss": 1.1054, "step": 4062 }, { "epoch": 0.5508032264624144, "grad_norm": 4.468037478545665, "learning_rate": 8.843271104716588e-07, "loss": 1.1568, "step": 4063 }, { "epoch": 0.5509387921100793, "grad_norm": 7.373369019169346, "learning_rate": 8.838909314966863e-07, "loss": 1.1284, "step": 4064 }, { "epoch": 0.5510743577577442, "grad_norm": 6.168255646874101, "learning_rate": 8.834547749124307e-07, "loss": 1.1209, "step": 4065 }, { "epoch": 0.5512099234054091, "grad_norm": 5.015125399748618, "learning_rate": 8.830186408030023e-07, "loss": 1.0815, "step": 4066 }, { "epoch": 0.5513454890530739, "grad_norm": 3.727389392451132, "learning_rate": 8.825825292525056e-07, "loss": 1.1502, "step": 4067 }, { "epoch": 0.5514810547007388, "grad_norm": 8.165740261170125, "learning_rate": 8.821464403450408e-07, "loss": 1.1285, "step": 4068 }, { "epoch": 0.5516166203484038, "grad_norm": 4.211869646030968, "learning_rate": 8.817103741647052e-07, "loss": 1.0859, "step": 4069 }, { "epoch": 0.5517521859960686, "grad_norm": 5.669006024620353, "learning_rate": 8.812743307955899e-07, "loss": 1.1269, "step": 4070 }, { "epoch": 0.5518877516437335, "grad_norm": 14.626498412382201, "learning_rate": 8.80838310321783e-07, "loss": 1.09, "step": 4071 }, { "epoch": 0.5520233172913983, "grad_norm": 4.8040836818962385, "learning_rate": 8.80402312827367e-07, "loss": 1.1116, "step": 4072 }, { "epoch": 0.5521588829390632, "grad_norm": 6.297346566634473, "learning_rate": 8.799663383964213e-07, "loss": 1.0713, "step": 4073 }, { "epoch": 0.5522944485867282, "grad_norm": 6.3771826053852685, "learning_rate": 8.795303871130196e-07, "loss": 1.1161, "step": 4074 }, { "epoch": 0.552430014234393, "grad_norm": 4.638449186666817, "learning_rate": 8.790944590612318e-07, "loss": 1.107, "step": 4075 }, { "epoch": 0.5525655798820579, "grad_norm": 5.13189797134375, "learning_rate": 8.786585543251232e-07, "loss": 1.1105, "step": 4076 }, { "epoch": 0.5527011455297227, "grad_norm": 6.188665891201945, "learning_rate": 8.782226729887546e-07, "loss": 1.1001, "step": 4077 }, { "epoch": 0.5528367111773876, "grad_norm": 6.89369844143633, "learning_rate": 8.777868151361823e-07, "loss": 1.1064, "step": 4078 }, { "epoch": 0.5529722768250526, "grad_norm": 3.992166155934123, "learning_rate": 8.773509808514581e-07, "loss": 1.0855, "step": 4079 }, { "epoch": 0.5531078424727174, "grad_norm": 16.36503154391374, "learning_rate": 8.769151702186289e-07, "loss": 1.1237, "step": 4080 }, { "epoch": 0.5532434081203823, "grad_norm": 5.403346442744131, "learning_rate": 8.764793833217377e-07, "loss": 1.1198, "step": 4081 }, { "epoch": 0.5533789737680471, "grad_norm": 5.619177631822348, "learning_rate": 8.760436202448223e-07, "loss": 1.0563, "step": 4082 }, { "epoch": 0.553514539415712, "grad_norm": 5.856914189774971, "learning_rate": 8.756078810719163e-07, "loss": 1.1169, "step": 4083 }, { "epoch": 0.553650105063377, "grad_norm": 5.886687952957864, "learning_rate": 8.751721658870488e-07, "loss": 1.1062, "step": 4084 }, { "epoch": 0.5537856707110418, "grad_norm": 5.089553331992478, "learning_rate": 8.747364747742433e-07, "loss": 1.0956, "step": 4085 }, { "epoch": 0.5539212363587067, "grad_norm": 4.86721599862606, "learning_rate": 8.743008078175202e-07, "loss": 1.1324, "step": 4086 }, { "epoch": 0.5540568020063716, "grad_norm": 7.226481068865632, "learning_rate": 8.73865165100894e-07, "loss": 1.1204, "step": 4087 }, { "epoch": 0.5541923676540365, "grad_norm": 5.146828295125884, "learning_rate": 8.734295467083752e-07, "loss": 1.1186, "step": 4088 }, { "epoch": 0.5543279333017014, "grad_norm": 5.4232456717873685, "learning_rate": 8.729939527239688e-07, "loss": 1.1048, "step": 4089 }, { "epoch": 0.5544634989493662, "grad_norm": 5.672383669310398, "learning_rate": 8.725583832316767e-07, "loss": 1.1306, "step": 4090 }, { "epoch": 0.5545990645970311, "grad_norm": 9.359893307206116, "learning_rate": 8.721228383154939e-07, "loss": 1.126, "step": 4091 }, { "epoch": 0.554734630244696, "grad_norm": 7.0827413383613615, "learning_rate": 8.716873180594128e-07, "loss": 1.0949, "step": 4092 }, { "epoch": 0.5548701958923609, "grad_norm": 3.545654987072784, "learning_rate": 8.71251822547419e-07, "loss": 1.1377, "step": 4093 }, { "epoch": 0.5550057615400258, "grad_norm": 15.440822659383796, "learning_rate": 8.708163518634956e-07, "loss": 1.1384, "step": 4094 }, { "epoch": 0.5551413271876906, "grad_norm": 4.223635570019245, "learning_rate": 8.703809060916188e-07, "loss": 1.1036, "step": 4095 }, { "epoch": 0.5552768928353555, "grad_norm": 5.549499974994683, "learning_rate": 8.699454853157608e-07, "loss": 1.107, "step": 4096 }, { "epoch": 0.5554124584830205, "grad_norm": 11.220592368249104, "learning_rate": 8.695100896198898e-07, "loss": 1.0919, "step": 4097 }, { "epoch": 0.5555480241306853, "grad_norm": 5.006999675474153, "learning_rate": 8.690747190879676e-07, "loss": 1.1153, "step": 4098 }, { "epoch": 0.5556835897783502, "grad_norm": 13.239207523990881, "learning_rate": 8.686393738039527e-07, "loss": 1.1008, "step": 4099 }, { "epoch": 0.555819155426015, "grad_norm": 6.271375351529569, "learning_rate": 8.682040538517973e-07, "loss": 1.1277, "step": 4100 }, { "epoch": 0.5559547210736799, "grad_norm": 4.573375933119899, "learning_rate": 8.677687593154503e-07, "loss": 1.0867, "step": 4101 }, { "epoch": 0.5560902867213449, "grad_norm": 7.497552282753712, "learning_rate": 8.673334902788536e-07, "loss": 1.09, "step": 4102 }, { "epoch": 0.5562258523690097, "grad_norm": 7.295647794985548, "learning_rate": 8.668982468259467e-07, "loss": 1.1732, "step": 4103 }, { "epoch": 0.5563614180166746, "grad_norm": 4.502493703498376, "learning_rate": 8.664630290406618e-07, "loss": 1.1297, "step": 4104 }, { "epoch": 0.5564969836643394, "grad_norm": 4.175436240737732, "learning_rate": 8.660278370069281e-07, "loss": 1.1321, "step": 4105 }, { "epoch": 0.5566325493120043, "grad_norm": 9.19343139132809, "learning_rate": 8.655926708086684e-07, "loss": 1.1078, "step": 4106 }, { "epoch": 0.5567681149596693, "grad_norm": 5.559489116055455, "learning_rate": 8.651575305298011e-07, "loss": 1.1035, "step": 4107 }, { "epoch": 0.5569036806073341, "grad_norm": 5.676993477697575, "learning_rate": 8.6472241625424e-07, "loss": 1.1384, "step": 4108 }, { "epoch": 0.557039246254999, "grad_norm": 6.09492680202188, "learning_rate": 8.642873280658924e-07, "loss": 1.1267, "step": 4109 }, { "epoch": 0.5571748119026638, "grad_norm": 6.36137184554895, "learning_rate": 8.63852266048663e-07, "loss": 1.0959, "step": 4110 }, { "epoch": 0.5573103775503288, "grad_norm": 5.055059378149775, "learning_rate": 8.634172302864491e-07, "loss": 1.087, "step": 4111 }, { "epoch": 0.5574459431979937, "grad_norm": 3.7561226595360493, "learning_rate": 8.629822208631442e-07, "loss": 1.1077, "step": 4112 }, { "epoch": 0.5575815088456585, "grad_norm": 78.35761573279453, "learning_rate": 8.625472378626365e-07, "loss": 1.0946, "step": 4113 }, { "epoch": 0.5577170744933234, "grad_norm": 3.96979722015886, "learning_rate": 8.62112281368809e-07, "loss": 1.0927, "step": 4114 }, { "epoch": 0.5578526401409882, "grad_norm": 4.407366067266106, "learning_rate": 8.616773514655395e-07, "loss": 1.0876, "step": 4115 }, { "epoch": 0.5579882057886532, "grad_norm": 14.235561501377697, "learning_rate": 8.612424482367014e-07, "loss": 1.1014, "step": 4116 }, { "epoch": 0.5581237714363181, "grad_norm": 6.959448567920494, "learning_rate": 8.608075717661611e-07, "loss": 1.0918, "step": 4117 }, { "epoch": 0.5582593370839829, "grad_norm": 6.157773917356829, "learning_rate": 8.603727221377826e-07, "loss": 1.1388, "step": 4118 }, { "epoch": 0.5583949027316478, "grad_norm": 4.672635427108998, "learning_rate": 8.599378994354218e-07, "loss": 1.1563, "step": 4119 }, { "epoch": 0.5585304683793126, "grad_norm": 5.219139861611144, "learning_rate": 8.595031037429321e-07, "loss": 1.1126, "step": 4120 }, { "epoch": 0.5586660340269776, "grad_norm": 4.902321479232233, "learning_rate": 8.590683351441594e-07, "loss": 1.1446, "step": 4121 }, { "epoch": 0.5588015996746425, "grad_norm": 6.226672403176164, "learning_rate": 8.586335937229462e-07, "loss": 1.1415, "step": 4122 }, { "epoch": 0.5589371653223073, "grad_norm": 5.3623705965352695, "learning_rate": 8.581988795631285e-07, "loss": 1.1295, "step": 4123 }, { "epoch": 0.5590727309699722, "grad_norm": 4.452958132497137, "learning_rate": 8.577641927485373e-07, "loss": 1.1014, "step": 4124 }, { "epoch": 0.559208296617637, "grad_norm": 3.4530984444646813, "learning_rate": 8.573295333629991e-07, "loss": 1.1376, "step": 4125 }, { "epoch": 0.559343862265302, "grad_norm": 9.184994599655976, "learning_rate": 8.568949014903339e-07, "loss": 1.1239, "step": 4126 }, { "epoch": 0.5594794279129669, "grad_norm": 6.154967039466638, "learning_rate": 8.564602972143576e-07, "loss": 1.1389, "step": 4127 }, { "epoch": 0.5596149935606317, "grad_norm": 6.405610072059719, "learning_rate": 8.560257206188797e-07, "loss": 1.0997, "step": 4128 }, { "epoch": 0.5597505592082966, "grad_norm": 4.768807577408647, "learning_rate": 8.555911717877053e-07, "loss": 1.0956, "step": 4129 }, { "epoch": 0.5598861248559615, "grad_norm": 3.4886165125681754, "learning_rate": 8.551566508046334e-07, "loss": 1.1069, "step": 4130 }, { "epoch": 0.5600216905036264, "grad_norm": 5.666713961570939, "learning_rate": 8.547221577534583e-07, "loss": 1.0962, "step": 4131 }, { "epoch": 0.5601572561512913, "grad_norm": 4.3524557779100865, "learning_rate": 8.542876927179679e-07, "loss": 1.1143, "step": 4132 }, { "epoch": 0.5602928217989561, "grad_norm": 5.655598658066337, "learning_rate": 8.538532557819463e-07, "loss": 1.0829, "step": 4133 }, { "epoch": 0.560428387446621, "grad_norm": 5.70855843986242, "learning_rate": 8.534188470291704e-07, "loss": 1.1106, "step": 4134 }, { "epoch": 0.5605639530942859, "grad_norm": 8.992084130632433, "learning_rate": 8.529844665434129e-07, "loss": 1.1218, "step": 4135 }, { "epoch": 0.5606995187419508, "grad_norm": 5.531055790327928, "learning_rate": 8.525501144084409e-07, "loss": 1.1015, "step": 4136 }, { "epoch": 0.5608350843896157, "grad_norm": 5.177696410753734, "learning_rate": 8.521157907080148e-07, "loss": 1.1039, "step": 4137 }, { "epoch": 0.5609706500372805, "grad_norm": 6.40338295348931, "learning_rate": 8.516814955258916e-07, "loss": 1.0919, "step": 4138 }, { "epoch": 0.5611062156849455, "grad_norm": 6.341355382567707, "learning_rate": 8.512472289458208e-07, "loss": 1.1461, "step": 4139 }, { "epoch": 0.5612417813326103, "grad_norm": 8.340472539166633, "learning_rate": 8.508129910515482e-07, "loss": 1.1116, "step": 4140 }, { "epoch": 0.5613773469802752, "grad_norm": 11.733030697519657, "learning_rate": 8.503787819268124e-07, "loss": 1.1297, "step": 4141 }, { "epoch": 0.5615129126279401, "grad_norm": 5.894111326233613, "learning_rate": 8.499446016553473e-07, "loss": 1.1183, "step": 4142 }, { "epoch": 0.5616484782756049, "grad_norm": 8.471526255317631, "learning_rate": 8.495104503208816e-07, "loss": 1.1567, "step": 4143 }, { "epoch": 0.5617840439232699, "grad_norm": 4.805304778290967, "learning_rate": 8.490763280071375e-07, "loss": 1.1212, "step": 4144 }, { "epoch": 0.5619196095709347, "grad_norm": 3.8389062500052127, "learning_rate": 8.486422347978323e-07, "loss": 1.0869, "step": 4145 }, { "epoch": 0.5620551752185996, "grad_norm": 3.8048455275874464, "learning_rate": 8.482081707766775e-07, "loss": 1.0743, "step": 4146 }, { "epoch": 0.5621907408662645, "grad_norm": 5.740957000397851, "learning_rate": 8.477741360273785e-07, "loss": 1.1318, "step": 4147 }, { "epoch": 0.5623263065139293, "grad_norm": 4.941492247882757, "learning_rate": 8.47340130633636e-07, "loss": 1.0834, "step": 4148 }, { "epoch": 0.5624618721615943, "grad_norm": 5.425895101757016, "learning_rate": 8.46906154679144e-07, "loss": 1.1342, "step": 4149 }, { "epoch": 0.5625974378092591, "grad_norm": 7.286275334057639, "learning_rate": 8.46472208247592e-07, "loss": 1.1068, "step": 4150 }, { "epoch": 0.562733003456924, "grad_norm": 6.153556359168374, "learning_rate": 8.460382914226628e-07, "loss": 1.0822, "step": 4151 }, { "epoch": 0.5628685691045889, "grad_norm": 5.558518412542682, "learning_rate": 8.456044042880333e-07, "loss": 1.1119, "step": 4152 }, { "epoch": 0.5630041347522537, "grad_norm": 4.539203053656194, "learning_rate": 8.451705469273763e-07, "loss": 1.123, "step": 4153 }, { "epoch": 0.5631397003999187, "grad_norm": 4.255534294251451, "learning_rate": 8.447367194243567e-07, "loss": 1.1299, "step": 4154 }, { "epoch": 0.5632752660475835, "grad_norm": 5.285092249469028, "learning_rate": 8.443029218626355e-07, "loss": 1.1146, "step": 4155 }, { "epoch": 0.5634108316952484, "grad_norm": 4.794604759442521, "learning_rate": 8.438691543258665e-07, "loss": 1.1177, "step": 4156 }, { "epoch": 0.5635463973429133, "grad_norm": 5.050662385331371, "learning_rate": 8.434354168976989e-07, "loss": 1.1251, "step": 4157 }, { "epoch": 0.5636819629905782, "grad_norm": 4.521332938181291, "learning_rate": 8.430017096617751e-07, "loss": 1.1858, "step": 4158 }, { "epoch": 0.5638175286382431, "grad_norm": 7.060961424257379, "learning_rate": 8.425680327017326e-07, "loss": 1.0542, "step": 4159 }, { "epoch": 0.5639530942859079, "grad_norm": 4.022165041511096, "learning_rate": 8.42134386101202e-07, "loss": 1.1042, "step": 4160 }, { "epoch": 0.5640886599335728, "grad_norm": 4.029564022342907, "learning_rate": 8.417007699438093e-07, "loss": 1.1157, "step": 4161 }, { "epoch": 0.5642242255812377, "grad_norm": 3.8535075497625813, "learning_rate": 8.412671843131731e-07, "loss": 1.087, "step": 4162 }, { "epoch": 0.5643597912289026, "grad_norm": 5.578392668674615, "learning_rate": 8.408336292929079e-07, "loss": 1.0718, "step": 4163 }, { "epoch": 0.5644953568765675, "grad_norm": 7.862431628698618, "learning_rate": 8.40400104966621e-07, "loss": 1.1231, "step": 4164 }, { "epoch": 0.5646309225242324, "grad_norm": 6.37783145232804, "learning_rate": 8.399666114179136e-07, "loss": 1.1314, "step": 4165 }, { "epoch": 0.5647664881718972, "grad_norm": 5.427248183277344, "learning_rate": 8.395331487303823e-07, "loss": 1.1088, "step": 4166 }, { "epoch": 0.5649020538195622, "grad_norm": 6.159216203960677, "learning_rate": 8.390997169876161e-07, "loss": 1.1635, "step": 4167 }, { "epoch": 0.565037619467227, "grad_norm": 32.74296531679099, "learning_rate": 8.386663162732001e-07, "loss": 1.1262, "step": 4168 }, { "epoch": 0.5651731851148919, "grad_norm": 8.250078386363906, "learning_rate": 8.38232946670711e-07, "loss": 1.0806, "step": 4169 }, { "epoch": 0.5653087507625568, "grad_norm": 8.627269337549807, "learning_rate": 8.377996082637215e-07, "loss": 1.0913, "step": 4170 }, { "epoch": 0.5654443164102216, "grad_norm": 6.0339067819957, "learning_rate": 8.37366301135797e-07, "loss": 1.1308, "step": 4171 }, { "epoch": 0.5655798820578866, "grad_norm": 13.686383364792208, "learning_rate": 8.369330253704979e-07, "loss": 1.1466, "step": 4172 }, { "epoch": 0.5657154477055514, "grad_norm": 12.920088137058004, "learning_rate": 8.364997810513774e-07, "loss": 1.0878, "step": 4173 }, { "epoch": 0.5658510133532163, "grad_norm": 7.54009617331255, "learning_rate": 8.360665682619837e-07, "loss": 1.087, "step": 4174 }, { "epoch": 0.5659865790008812, "grad_norm": 5.768376023378372, "learning_rate": 8.356333870858581e-07, "loss": 1.1475, "step": 4175 }, { "epoch": 0.566122144648546, "grad_norm": 5.056775433751034, "learning_rate": 8.352002376065364e-07, "loss": 1.1272, "step": 4176 }, { "epoch": 0.566257710296211, "grad_norm": 6.445004062231953, "learning_rate": 8.347671199075481e-07, "loss": 1.0715, "step": 4177 }, { "epoch": 0.5663932759438758, "grad_norm": 4.7464002695095715, "learning_rate": 8.343340340724168e-07, "loss": 1.0956, "step": 4178 }, { "epoch": 0.5665288415915407, "grad_norm": 4.153657183484218, "learning_rate": 8.339009801846589e-07, "loss": 1.0882, "step": 4179 }, { "epoch": 0.5666644072392056, "grad_norm": 5.323054487167493, "learning_rate": 8.334679583277859e-07, "loss": 1.0666, "step": 4180 }, { "epoch": 0.5667999728868705, "grad_norm": 5.723180664326325, "learning_rate": 8.330349685853027e-07, "loss": 1.1322, "step": 4181 }, { "epoch": 0.5669355385345354, "grad_norm": 4.38495324700614, "learning_rate": 8.326020110407079e-07, "loss": 1.094, "step": 4182 }, { "epoch": 0.5670711041822002, "grad_norm": 5.538961718801445, "learning_rate": 8.32169085777494e-07, "loss": 1.1139, "step": 4183 }, { "epoch": 0.5672066698298651, "grad_norm": 4.351544749491445, "learning_rate": 8.317361928791467e-07, "loss": 1.1436, "step": 4184 }, { "epoch": 0.56734223547753, "grad_norm": 7.49065108858561, "learning_rate": 8.313033324291469e-07, "loss": 1.1455, "step": 4185 }, { "epoch": 0.5674778011251949, "grad_norm": 10.483964029847678, "learning_rate": 8.308705045109675e-07, "loss": 1.1454, "step": 4186 }, { "epoch": 0.5676133667728598, "grad_norm": 10.608106030845681, "learning_rate": 8.304377092080766e-07, "loss": 1.1202, "step": 4187 }, { "epoch": 0.5677489324205246, "grad_norm": 6.3506841033189705, "learning_rate": 8.300049466039346e-07, "loss": 1.1389, "step": 4188 }, { "epoch": 0.5678844980681895, "grad_norm": 4.228113790850679, "learning_rate": 8.295722167819973e-07, "loss": 1.0725, "step": 4189 }, { "epoch": 0.5680200637158545, "grad_norm": 4.412022920929986, "learning_rate": 8.291395198257122e-07, "loss": 1.1308, "step": 4190 }, { "epoch": 0.5681556293635193, "grad_norm": 7.268685231996679, "learning_rate": 8.287068558185224e-07, "loss": 1.1147, "step": 4191 }, { "epoch": 0.5682911950111842, "grad_norm": 4.628682558682189, "learning_rate": 8.282742248438634e-07, "loss": 1.0802, "step": 4192 }, { "epoch": 0.568426760658849, "grad_norm": 3.6101619296486813, "learning_rate": 8.278416269851643e-07, "loss": 1.1142, "step": 4193 }, { "epoch": 0.5685623263065139, "grad_norm": 6.924633611544964, "learning_rate": 8.274090623258489e-07, "loss": 1.1202, "step": 4194 }, { "epoch": 0.5686978919541789, "grad_norm": 4.248984111845555, "learning_rate": 8.269765309493328e-07, "loss": 1.0793, "step": 4195 }, { "epoch": 0.5688334576018437, "grad_norm": 5.013839128974763, "learning_rate": 8.265440329390276e-07, "loss": 1.1171, "step": 4196 }, { "epoch": 0.5689690232495086, "grad_norm": 4.364966913545298, "learning_rate": 8.261115683783361e-07, "loss": 1.1093, "step": 4197 }, { "epoch": 0.5691045888971734, "grad_norm": 4.612603664672382, "learning_rate": 8.256791373506563e-07, "loss": 1.0645, "step": 4198 }, { "epoch": 0.5692401545448383, "grad_norm": 3.518643732976211, "learning_rate": 8.252467399393786e-07, "loss": 1.1058, "step": 4199 }, { "epoch": 0.5693757201925033, "grad_norm": 4.236907556273307, "learning_rate": 8.248143762278879e-07, "loss": 1.1101, "step": 4200 }, { "epoch": 0.5695112858401681, "grad_norm": 10.006424598716242, "learning_rate": 8.243820462995617e-07, "loss": 1.0665, "step": 4201 }, { "epoch": 0.569646851487833, "grad_norm": 5.857847471443422, "learning_rate": 8.239497502377719e-07, "loss": 1.1315, "step": 4202 }, { "epoch": 0.5697824171354978, "grad_norm": 5.102061476096233, "learning_rate": 8.235174881258827e-07, "loss": 1.1155, "step": 4203 }, { "epoch": 0.5699179827831627, "grad_norm": 5.296056158936005, "learning_rate": 8.230852600472533e-07, "loss": 1.119, "step": 4204 }, { "epoch": 0.5700535484308277, "grad_norm": 6.235026805921968, "learning_rate": 8.226530660852349e-07, "loss": 1.0846, "step": 4205 }, { "epoch": 0.5701891140784925, "grad_norm": 3.4859009264945144, "learning_rate": 8.222209063231727e-07, "loss": 1.0871, "step": 4206 }, { "epoch": 0.5703246797261574, "grad_norm": 4.242467893987442, "learning_rate": 8.217887808444056e-07, "loss": 1.1093, "step": 4207 }, { "epoch": 0.5704602453738222, "grad_norm": 5.318389681161493, "learning_rate": 8.213566897322651e-07, "loss": 1.102, "step": 4208 }, { "epoch": 0.5705958110214872, "grad_norm": 4.900987216486727, "learning_rate": 8.209246330700772e-07, "loss": 1.1022, "step": 4209 }, { "epoch": 0.5707313766691521, "grad_norm": 4.643765140813469, "learning_rate": 8.204926109411601e-07, "loss": 1.1001, "step": 4210 }, { "epoch": 0.5708669423168169, "grad_norm": 5.876113426401696, "learning_rate": 8.20060623428826e-07, "loss": 1.121, "step": 4211 }, { "epoch": 0.5710025079644818, "grad_norm": 5.307827752079064, "learning_rate": 8.196286706163804e-07, "loss": 1.0902, "step": 4212 }, { "epoch": 0.5711380736121466, "grad_norm": 8.240679150308466, "learning_rate": 8.191967525871219e-07, "loss": 1.081, "step": 4213 }, { "epoch": 0.5712736392598116, "grad_norm": 3.647940952147953, "learning_rate": 8.187648694243423e-07, "loss": 1.0826, "step": 4214 }, { "epoch": 0.5714092049074765, "grad_norm": 4.86784896767475, "learning_rate": 8.183330212113273e-07, "loss": 1.1216, "step": 4215 }, { "epoch": 0.5715447705551413, "grad_norm": 5.410688874005871, "learning_rate": 8.179012080313549e-07, "loss": 1.084, "step": 4216 }, { "epoch": 0.5716803362028062, "grad_norm": 9.463184723425947, "learning_rate": 8.174694299676974e-07, "loss": 1.1198, "step": 4217 }, { "epoch": 0.571815901850471, "grad_norm": 4.580389302663292, "learning_rate": 8.170376871036193e-07, "loss": 1.1294, "step": 4218 }, { "epoch": 0.571951467498136, "grad_norm": 4.857824728939963, "learning_rate": 8.166059795223793e-07, "loss": 1.1017, "step": 4219 }, { "epoch": 0.5720870331458009, "grad_norm": 17.199017980768385, "learning_rate": 8.161743073072286e-07, "loss": 1.1112, "step": 4220 }, { "epoch": 0.5722225987934657, "grad_norm": 4.978006074616705, "learning_rate": 8.157426705414113e-07, "loss": 1.1368, "step": 4221 }, { "epoch": 0.5723581644411306, "grad_norm": 4.683937423044682, "learning_rate": 8.153110693081657e-07, "loss": 1.1041, "step": 4222 }, { "epoch": 0.5724937300887954, "grad_norm": 7.976727662205152, "learning_rate": 8.148795036907224e-07, "loss": 1.1472, "step": 4223 }, { "epoch": 0.5726292957364604, "grad_norm": 5.314722780482313, "learning_rate": 8.144479737723058e-07, "loss": 1.0994, "step": 4224 }, { "epoch": 0.5727648613841253, "grad_norm": 3.732889740623138, "learning_rate": 8.140164796361327e-07, "loss": 1.1561, "step": 4225 }, { "epoch": 0.5729004270317901, "grad_norm": 6.9046191010084765, "learning_rate": 8.135850213654135e-07, "loss": 1.0883, "step": 4226 }, { "epoch": 0.573035992679455, "grad_norm": 3.817020676155975, "learning_rate": 8.131535990433513e-07, "loss": 1.0845, "step": 4227 }, { "epoch": 0.5731715583271199, "grad_norm": 3.783298011851651, "learning_rate": 8.127222127531429e-07, "loss": 1.1345, "step": 4228 }, { "epoch": 0.5733071239747848, "grad_norm": 7.1589874280543855, "learning_rate": 8.122908625779771e-07, "loss": 1.0908, "step": 4229 }, { "epoch": 0.5734426896224497, "grad_norm": 7.270351639028161, "learning_rate": 8.118595486010372e-07, "loss": 1.1193, "step": 4230 }, { "epoch": 0.5735782552701145, "grad_norm": 11.994363463336974, "learning_rate": 8.114282709054978e-07, "loss": 1.1128, "step": 4231 }, { "epoch": 0.5737138209177794, "grad_norm": 4.5656360086913965, "learning_rate": 8.109970295745284e-07, "loss": 1.1301, "step": 4232 }, { "epoch": 0.5738493865654443, "grad_norm": 8.90562945533847, "learning_rate": 8.105658246912895e-07, "loss": 1.079, "step": 4233 }, { "epoch": 0.5739849522131092, "grad_norm": 3.9932914373827813, "learning_rate": 8.101346563389363e-07, "loss": 1.0778, "step": 4234 }, { "epoch": 0.5741205178607741, "grad_norm": 7.552282220174871, "learning_rate": 8.097035246006161e-07, "loss": 1.1245, "step": 4235 }, { "epoch": 0.5742560835084389, "grad_norm": 4.458033044650567, "learning_rate": 8.092724295594685e-07, "loss": 1.1171, "step": 4236 }, { "epoch": 0.5743916491561039, "grad_norm": 3.827678914231326, "learning_rate": 8.088413712986279e-07, "loss": 1.1123, "step": 4237 }, { "epoch": 0.5745272148037687, "grad_norm": 6.575809116239911, "learning_rate": 8.084103499012194e-07, "loss": 1.1106, "step": 4238 }, { "epoch": 0.5746627804514336, "grad_norm": 7.3354001547441, "learning_rate": 8.07979365450363e-07, "loss": 1.1238, "step": 4239 }, { "epoch": 0.5747983460990985, "grad_norm": 4.710399902989448, "learning_rate": 8.075484180291701e-07, "loss": 1.0982, "step": 4240 }, { "epoch": 0.5749339117467633, "grad_norm": 4.570421961191262, "learning_rate": 8.071175077207457e-07, "loss": 1.1252, "step": 4241 }, { "epoch": 0.5750694773944283, "grad_norm": 5.516483707295258, "learning_rate": 8.066866346081873e-07, "loss": 1.0955, "step": 4242 }, { "epoch": 0.5752050430420931, "grad_norm": 4.191788775940615, "learning_rate": 8.062557987745856e-07, "loss": 1.1143, "step": 4243 }, { "epoch": 0.575340608689758, "grad_norm": 6.91230072993267, "learning_rate": 8.058250003030238e-07, "loss": 1.1543, "step": 4244 }, { "epoch": 0.5754761743374229, "grad_norm": 4.502706499713701, "learning_rate": 8.053942392765781e-07, "loss": 1.1191, "step": 4245 }, { "epoch": 0.5756117399850877, "grad_norm": 5.154631602656253, "learning_rate": 8.049635157783169e-07, "loss": 1.1092, "step": 4246 }, { "epoch": 0.5757473056327527, "grad_norm": 5.3556731928329855, "learning_rate": 8.045328298913024e-07, "loss": 1.1087, "step": 4247 }, { "epoch": 0.5758828712804176, "grad_norm": 5.278646409639891, "learning_rate": 8.041021816985887e-07, "loss": 1.0939, "step": 4248 }, { "epoch": 0.5760184369280824, "grad_norm": 5.737002138848636, "learning_rate": 8.03671571283223e-07, "loss": 1.1228, "step": 4249 }, { "epoch": 0.5761540025757473, "grad_norm": 12.515809110223218, "learning_rate": 8.03240998728245e-07, "loss": 1.1124, "step": 4250 }, { "epoch": 0.5762895682234122, "grad_norm": 6.769200699179952, "learning_rate": 8.028104641166871e-07, "loss": 1.0745, "step": 4251 }, { "epoch": 0.5764251338710771, "grad_norm": 5.081326753781401, "learning_rate": 8.02379967531575e-07, "loss": 1.1055, "step": 4252 }, { "epoch": 0.576560699518742, "grad_norm": 5.689500239711036, "learning_rate": 8.019495090559257e-07, "loss": 1.1029, "step": 4253 }, { "epoch": 0.5766962651664068, "grad_norm": 5.234171150745964, "learning_rate": 8.015190887727509e-07, "loss": 1.1206, "step": 4254 }, { "epoch": 0.5768318308140717, "grad_norm": 4.093134522548586, "learning_rate": 8.010887067650526e-07, "loss": 1.1042, "step": 4255 }, { "epoch": 0.5769673964617366, "grad_norm": 5.048623724148059, "learning_rate": 8.006583631158275e-07, "loss": 1.1053, "step": 4256 }, { "epoch": 0.5771029621094015, "grad_norm": 4.416967940901309, "learning_rate": 8.002280579080632e-07, "loss": 1.0965, "step": 4257 }, { "epoch": 0.5772385277570664, "grad_norm": 4.861604286023074, "learning_rate": 7.997977912247413e-07, "loss": 1.1236, "step": 4258 }, { "epoch": 0.5773740934047312, "grad_norm": 4.717925393936469, "learning_rate": 7.993675631488348e-07, "loss": 1.0956, "step": 4259 }, { "epoch": 0.5775096590523962, "grad_norm": 7.627779354114643, "learning_rate": 7.989373737633103e-07, "loss": 1.0929, "step": 4260 }, { "epoch": 0.577645224700061, "grad_norm": 4.68443536798926, "learning_rate": 7.985072231511259e-07, "loss": 1.0519, "step": 4261 }, { "epoch": 0.5777807903477259, "grad_norm": 5.45783452752589, "learning_rate": 7.980771113952335e-07, "loss": 1.0973, "step": 4262 }, { "epoch": 0.5779163559953908, "grad_norm": 4.036632606981577, "learning_rate": 7.976470385785762e-07, "loss": 1.0841, "step": 4263 }, { "epoch": 0.5780519216430556, "grad_norm": 4.525477954163249, "learning_rate": 7.972170047840898e-07, "loss": 1.1106, "step": 4264 }, { "epoch": 0.5781874872907206, "grad_norm": 4.295200139593326, "learning_rate": 7.967870100947038e-07, "loss": 1.1294, "step": 4265 }, { "epoch": 0.5783230529383854, "grad_norm": 5.924721554992169, "learning_rate": 7.963570545933384e-07, "loss": 1.0866, "step": 4266 }, { "epoch": 0.5784586185860503, "grad_norm": 6.439676073191933, "learning_rate": 7.95927138362908e-07, "loss": 1.1182, "step": 4267 }, { "epoch": 0.5785941842337152, "grad_norm": 3.6276848234728005, "learning_rate": 7.954972614863177e-07, "loss": 1.0964, "step": 4268 }, { "epoch": 0.57872974988138, "grad_norm": 7.741832494319983, "learning_rate": 7.950674240464667e-07, "loss": 1.1403, "step": 4269 }, { "epoch": 0.578865315529045, "grad_norm": 4.522316644489962, "learning_rate": 7.946376261262449e-07, "loss": 1.148, "step": 4270 }, { "epoch": 0.5790008811767098, "grad_norm": 5.762941479484341, "learning_rate": 7.942078678085363e-07, "loss": 1.1371, "step": 4271 }, { "epoch": 0.5791364468243747, "grad_norm": 4.694200741099864, "learning_rate": 7.937781491762156e-07, "loss": 1.1293, "step": 4272 }, { "epoch": 0.5792720124720396, "grad_norm": 6.390131739264352, "learning_rate": 7.933484703121513e-07, "loss": 1.0794, "step": 4273 }, { "epoch": 0.5794075781197044, "grad_norm": 7.346446755079661, "learning_rate": 7.929188312992031e-07, "loss": 1.1132, "step": 4274 }, { "epoch": 0.5795431437673694, "grad_norm": 5.092578618280839, "learning_rate": 7.924892322202236e-07, "loss": 1.114, "step": 4275 }, { "epoch": 0.5796787094150342, "grad_norm": 3.240382922125561, "learning_rate": 7.920596731580582e-07, "loss": 1.1105, "step": 4276 }, { "epoch": 0.5798142750626991, "grad_norm": 4.190744412313407, "learning_rate": 7.91630154195543e-07, "loss": 1.179, "step": 4277 }, { "epoch": 0.579949840710364, "grad_norm": 4.175276544897636, "learning_rate": 7.912006754155078e-07, "loss": 1.141, "step": 4278 }, { "epoch": 0.5800854063580289, "grad_norm": 4.512648462174678, "learning_rate": 7.907712369007743e-07, "loss": 1.1125, "step": 4279 }, { "epoch": 0.5802209720056938, "grad_norm": 16.4752952782245, "learning_rate": 7.903418387341564e-07, "loss": 1.1243, "step": 4280 }, { "epoch": 0.5803565376533586, "grad_norm": 4.041802922469331, "learning_rate": 7.899124809984595e-07, "loss": 1.1208, "step": 4281 }, { "epoch": 0.5804921033010235, "grad_norm": 5.080291940710795, "learning_rate": 7.894831637764828e-07, "loss": 1.1169, "step": 4282 }, { "epoch": 0.5806276689486884, "grad_norm": 4.369941183484925, "learning_rate": 7.890538871510156e-07, "loss": 1.1091, "step": 4283 }, { "epoch": 0.5807632345963533, "grad_norm": 5.870163232775835, "learning_rate": 7.886246512048418e-07, "loss": 1.0904, "step": 4284 }, { "epoch": 0.5808988002440182, "grad_norm": 4.743074570114998, "learning_rate": 7.88195456020735e-07, "loss": 1.0657, "step": 4285 }, { "epoch": 0.581034365891683, "grad_norm": 3.5901035788530584, "learning_rate": 7.87766301681463e-07, "loss": 1.1274, "step": 4286 }, { "epoch": 0.5811699315393479, "grad_norm": 4.788873355605764, "learning_rate": 7.873371882697841e-07, "loss": 1.1045, "step": 4287 }, { "epoch": 0.5813054971870129, "grad_norm": 6.990289615571949, "learning_rate": 7.869081158684503e-07, "loss": 1.0889, "step": 4288 }, { "epoch": 0.5814410628346777, "grad_norm": 13.556071006194347, "learning_rate": 7.864790845602038e-07, "loss": 1.0678, "step": 4289 }, { "epoch": 0.5815766284823426, "grad_norm": 4.321954238597349, "learning_rate": 7.860500944277809e-07, "loss": 1.1177, "step": 4290 }, { "epoch": 0.5817121941300074, "grad_norm": 5.368162640056084, "learning_rate": 7.856211455539084e-07, "loss": 1.1258, "step": 4291 }, { "epoch": 0.5818477597776723, "grad_norm": 22.83198313641713, "learning_rate": 7.851922380213053e-07, "loss": 1.1282, "step": 4292 }, { "epoch": 0.5819833254253373, "grad_norm": 4.1032756757725, "learning_rate": 7.847633719126839e-07, "loss": 1.0853, "step": 4293 }, { "epoch": 0.5821188910730021, "grad_norm": 4.202564060216676, "learning_rate": 7.84334547310747e-07, "loss": 1.1114, "step": 4294 }, { "epoch": 0.582254456720667, "grad_norm": 3.6019840178479, "learning_rate": 7.839057642981905e-07, "loss": 1.0953, "step": 4295 }, { "epoch": 0.5823900223683318, "grad_norm": 4.884916832719114, "learning_rate": 7.834770229577015e-07, "loss": 1.134, "step": 4296 }, { "epoch": 0.5825255880159967, "grad_norm": 3.8544780302635164, "learning_rate": 7.830483233719597e-07, "loss": 1.085, "step": 4297 }, { "epoch": 0.5826611536636617, "grad_norm": 3.6459804882859266, "learning_rate": 7.826196656236357e-07, "loss": 1.0927, "step": 4298 }, { "epoch": 0.5827967193113265, "grad_norm": 4.6233575371418745, "learning_rate": 7.821910497953939e-07, "loss": 1.0679, "step": 4299 }, { "epoch": 0.5829322849589914, "grad_norm": 5.634784070230837, "learning_rate": 7.817624759698884e-07, "loss": 1.0465, "step": 4300 }, { "epoch": 0.5830678506066562, "grad_norm": 4.1614047718261835, "learning_rate": 7.813339442297671e-07, "loss": 1.1126, "step": 4301 }, { "epoch": 0.5832034162543211, "grad_norm": 4.802711429625529, "learning_rate": 7.809054546576686e-07, "loss": 1.13, "step": 4302 }, { "epoch": 0.5833389819019861, "grad_norm": 5.4443362589017035, "learning_rate": 7.804770073362236e-07, "loss": 1.1067, "step": 4303 }, { "epoch": 0.5834745475496509, "grad_norm": 5.40113301739516, "learning_rate": 7.800486023480551e-07, "loss": 1.1186, "step": 4304 }, { "epoch": 0.5836101131973158, "grad_norm": 3.6894179406862873, "learning_rate": 7.796202397757771e-07, "loss": 1.114, "step": 4305 }, { "epoch": 0.5837456788449806, "grad_norm": 3.9718116562503485, "learning_rate": 7.791919197019967e-07, "loss": 1.1096, "step": 4306 }, { "epoch": 0.5838812444926456, "grad_norm": 4.809794219425605, "learning_rate": 7.787636422093114e-07, "loss": 1.1294, "step": 4307 }, { "epoch": 0.5840168101403105, "grad_norm": 3.9618158287311247, "learning_rate": 7.783354073803114e-07, "loss": 1.1432, "step": 4308 }, { "epoch": 0.5841523757879753, "grad_norm": 4.2390542149054165, "learning_rate": 7.779072152975783e-07, "loss": 1.108, "step": 4309 }, { "epoch": 0.5842879414356402, "grad_norm": 5.4562421602516125, "learning_rate": 7.774790660436857e-07, "loss": 1.0742, "step": 4310 }, { "epoch": 0.584423507083305, "grad_norm": 3.7919304612222793, "learning_rate": 7.770509597011986e-07, "loss": 1.0751, "step": 4311 }, { "epoch": 0.58455907273097, "grad_norm": 5.364784731031237, "learning_rate": 7.766228963526744e-07, "loss": 1.1156, "step": 4312 }, { "epoch": 0.5846946383786349, "grad_norm": 4.196899324938626, "learning_rate": 7.761948760806611e-07, "loss": 1.1282, "step": 4313 }, { "epoch": 0.5848302040262997, "grad_norm": 4.697134085179678, "learning_rate": 7.757668989676995e-07, "loss": 1.084, "step": 4314 }, { "epoch": 0.5849657696739646, "grad_norm": 15.3443562924597, "learning_rate": 7.753389650963212e-07, "loss": 1.0763, "step": 4315 }, { "epoch": 0.5851013353216294, "grad_norm": 4.345414004518237, "learning_rate": 7.749110745490505e-07, "loss": 1.0919, "step": 4316 }, { "epoch": 0.5852369009692944, "grad_norm": 5.193481721002572, "learning_rate": 7.744832274084019e-07, "loss": 1.1116, "step": 4317 }, { "epoch": 0.5853724666169593, "grad_norm": 3.9650967803697195, "learning_rate": 7.740554237568832e-07, "loss": 1.0924, "step": 4318 }, { "epoch": 0.5855080322646241, "grad_norm": 4.200873973778046, "learning_rate": 7.736276636769925e-07, "loss": 1.1436, "step": 4319 }, { "epoch": 0.585643597912289, "grad_norm": 4.8924690385921705, "learning_rate": 7.731999472512196e-07, "loss": 1.0813, "step": 4320 }, { "epoch": 0.5857791635599539, "grad_norm": 5.615432927262338, "learning_rate": 7.727722745620471e-07, "loss": 1.1235, "step": 4321 }, { "epoch": 0.5859147292076188, "grad_norm": 3.997633400670283, "learning_rate": 7.723446456919473e-07, "loss": 1.1248, "step": 4322 }, { "epoch": 0.5860502948552837, "grad_norm": 4.0438475235430955, "learning_rate": 7.719170607233861e-07, "loss": 1.0854, "step": 4323 }, { "epoch": 0.5861858605029485, "grad_norm": 6.060143151342041, "learning_rate": 7.714895197388188e-07, "loss": 1.1031, "step": 4324 }, { "epoch": 0.5863214261506134, "grad_norm": 3.962644170334309, "learning_rate": 7.710620228206944e-07, "loss": 1.1308, "step": 4325 }, { "epoch": 0.5864569917982784, "grad_norm": 5.537579939700694, "learning_rate": 7.706345700514512e-07, "loss": 1.1397, "step": 4326 }, { "epoch": 0.5865925574459432, "grad_norm": 5.201502200302864, "learning_rate": 7.702071615135212e-07, "loss": 1.0574, "step": 4327 }, { "epoch": 0.5867281230936081, "grad_norm": 5.455804955366375, "learning_rate": 7.697797972893258e-07, "loss": 1.0871, "step": 4328 }, { "epoch": 0.5868636887412729, "grad_norm": 6.454608615248833, "learning_rate": 7.693524774612797e-07, "loss": 1.1101, "step": 4329 }, { "epoch": 0.5869992543889379, "grad_norm": 4.966519370630809, "learning_rate": 7.689252021117874e-07, "loss": 1.1063, "step": 4330 }, { "epoch": 0.5871348200366028, "grad_norm": 5.543463889462911, "learning_rate": 7.684979713232461e-07, "loss": 1.0816, "step": 4331 }, { "epoch": 0.5872703856842676, "grad_norm": 4.87007181050284, "learning_rate": 7.680707851780433e-07, "loss": 1.1115, "step": 4332 }, { "epoch": 0.5874059513319325, "grad_norm": 4.986577660857964, "learning_rate": 7.676436437585593e-07, "loss": 1.1062, "step": 4333 }, { "epoch": 0.5875415169795973, "grad_norm": 4.168403364908123, "learning_rate": 7.672165471471643e-07, "loss": 1.1097, "step": 4334 }, { "epoch": 0.5876770826272623, "grad_norm": 5.389616801653561, "learning_rate": 7.667894954262205e-07, "loss": 1.1108, "step": 4335 }, { "epoch": 0.5878126482749272, "grad_norm": 5.686080673113264, "learning_rate": 7.66362488678082e-07, "loss": 1.1317, "step": 4336 }, { "epoch": 0.587948213922592, "grad_norm": 5.18619057875398, "learning_rate": 7.659355269850929e-07, "loss": 1.1189, "step": 4337 }, { "epoch": 0.5880837795702569, "grad_norm": 3.9842945784363257, "learning_rate": 7.655086104295904e-07, "loss": 1.1075, "step": 4338 }, { "epoch": 0.5882193452179217, "grad_norm": 11.612868863646426, "learning_rate": 7.65081739093901e-07, "loss": 1.1159, "step": 4339 }, { "epoch": 0.5883549108655867, "grad_norm": 4.688187974021669, "learning_rate": 7.646549130603439e-07, "loss": 1.1251, "step": 4340 }, { "epoch": 0.5884904765132516, "grad_norm": 4.1044061775627085, "learning_rate": 7.642281324112292e-07, "loss": 1.1138, "step": 4341 }, { "epoch": 0.5886260421609164, "grad_norm": 4.400051218714889, "learning_rate": 7.638013972288581e-07, "loss": 1.0859, "step": 4342 }, { "epoch": 0.5887616078085813, "grad_norm": 6.419616919926186, "learning_rate": 7.63374707595523e-07, "loss": 1.0933, "step": 4343 }, { "epoch": 0.5888971734562461, "grad_norm": 5.322897017945476, "learning_rate": 7.629480635935082e-07, "loss": 1.1104, "step": 4344 }, { "epoch": 0.5890327391039111, "grad_norm": 3.8128473036878816, "learning_rate": 7.625214653050874e-07, "loss": 1.0883, "step": 4345 }, { "epoch": 0.589168304751576, "grad_norm": 4.799745665465697, "learning_rate": 7.620949128125282e-07, "loss": 1.1032, "step": 4346 }, { "epoch": 0.5893038703992408, "grad_norm": 4.871199865958922, "learning_rate": 7.616684061980867e-07, "loss": 1.1215, "step": 4347 }, { "epoch": 0.5894394360469057, "grad_norm": 3.5879812825353117, "learning_rate": 7.612419455440119e-07, "loss": 1.1322, "step": 4348 }, { "epoch": 0.5895750016945706, "grad_norm": 3.955191780652181, "learning_rate": 7.608155309325435e-07, "loss": 1.1113, "step": 4349 }, { "epoch": 0.5897105673422355, "grad_norm": 4.71534449183855, "learning_rate": 7.603891624459114e-07, "loss": 1.0811, "step": 4350 }, { "epoch": 0.5898461329899004, "grad_norm": 3.8776397447372446, "learning_rate": 7.599628401663384e-07, "loss": 1.1561, "step": 4351 }, { "epoch": 0.5899816986375652, "grad_norm": 3.493204251741356, "learning_rate": 7.595365641760367e-07, "loss": 1.1202, "step": 4352 }, { "epoch": 0.5901172642852301, "grad_norm": 4.623952937109454, "learning_rate": 7.591103345572109e-07, "loss": 1.0817, "step": 4353 }, { "epoch": 0.590252829932895, "grad_norm": 5.448249652374638, "learning_rate": 7.58684151392055e-07, "loss": 1.1331, "step": 4354 }, { "epoch": 0.5903883955805599, "grad_norm": 4.96743193938768, "learning_rate": 7.582580147627562e-07, "loss": 1.124, "step": 4355 }, { "epoch": 0.5905239612282248, "grad_norm": 4.260622796799465, "learning_rate": 7.578319247514906e-07, "loss": 1.0669, "step": 4356 }, { "epoch": 0.5906595268758896, "grad_norm": 15.536649988366259, "learning_rate": 7.574058814404272e-07, "loss": 1.1078, "step": 4357 }, { "epoch": 0.5907950925235546, "grad_norm": 5.067455028918015, "learning_rate": 7.569798849117241e-07, "loss": 1.1223, "step": 4358 }, { "epoch": 0.5909306581712194, "grad_norm": 5.496265058508385, "learning_rate": 7.565539352475325e-07, "loss": 1.1127, "step": 4359 }, { "epoch": 0.5910662238188843, "grad_norm": 5.748587417739268, "learning_rate": 7.561280325299924e-07, "loss": 1.0873, "step": 4360 }, { "epoch": 0.5912017894665492, "grad_norm": 8.632136280318981, "learning_rate": 7.557021768412366e-07, "loss": 1.1162, "step": 4361 }, { "epoch": 0.591337355114214, "grad_norm": 5.383819776647693, "learning_rate": 7.552763682633877e-07, "loss": 1.0946, "step": 4362 }, { "epoch": 0.591472920761879, "grad_norm": 9.64357011417298, "learning_rate": 7.548506068785589e-07, "loss": 1.0548, "step": 4363 }, { "epoch": 0.5916084864095438, "grad_norm": 3.9070459955378665, "learning_rate": 7.544248927688561e-07, "loss": 1.124, "step": 4364 }, { "epoch": 0.5917440520572087, "grad_norm": 6.2236207068093075, "learning_rate": 7.539992260163735e-07, "loss": 1.0915, "step": 4365 }, { "epoch": 0.5918796177048736, "grad_norm": 4.527541818196497, "learning_rate": 7.535736067031991e-07, "loss": 1.1473, "step": 4366 }, { "epoch": 0.5920151833525384, "grad_norm": 7.784447707275542, "learning_rate": 7.531480349114088e-07, "loss": 1.0965, "step": 4367 }, { "epoch": 0.5921507490002034, "grad_norm": 7.4358630346742185, "learning_rate": 7.527225107230721e-07, "loss": 1.1102, "step": 4368 }, { "epoch": 0.5922863146478682, "grad_norm": 5.171637449218933, "learning_rate": 7.52297034220247e-07, "loss": 1.097, "step": 4369 }, { "epoch": 0.5924218802955331, "grad_norm": 5.952996588821918, "learning_rate": 7.518716054849836e-07, "loss": 1.1004, "step": 4370 }, { "epoch": 0.592557445943198, "grad_norm": 4.0802605666156975, "learning_rate": 7.514462245993225e-07, "loss": 1.1152, "step": 4371 }, { "epoch": 0.5926930115908629, "grad_norm": 5.241341156907886, "learning_rate": 7.51020891645295e-07, "loss": 1.094, "step": 4372 }, { "epoch": 0.5928285772385278, "grad_norm": 6.032063713118402, "learning_rate": 7.505956067049232e-07, "loss": 1.1347, "step": 4373 }, { "epoch": 0.5929641428861926, "grad_norm": 5.538663199945034, "learning_rate": 7.501703698602202e-07, "loss": 1.1239, "step": 4374 }, { "epoch": 0.5930997085338575, "grad_norm": 4.135607583081631, "learning_rate": 7.497451811931891e-07, "loss": 1.0939, "step": 4375 }, { "epoch": 0.5932352741815224, "grad_norm": 4.749187249602325, "learning_rate": 7.493200407858245e-07, "loss": 1.0938, "step": 4376 }, { "epoch": 0.5933708398291873, "grad_norm": 3.4358546311913476, "learning_rate": 7.488949487201112e-07, "loss": 1.1336, "step": 4377 }, { "epoch": 0.5935064054768522, "grad_norm": 4.425425563464222, "learning_rate": 7.48469905078025e-07, "loss": 1.1247, "step": 4378 }, { "epoch": 0.593641971124517, "grad_norm": 4.651964068074679, "learning_rate": 7.480449099415322e-07, "loss": 1.1262, "step": 4379 }, { "epoch": 0.5937775367721819, "grad_norm": 7.268858588302442, "learning_rate": 7.476199633925894e-07, "loss": 1.1232, "step": 4380 }, { "epoch": 0.5939131024198469, "grad_norm": 8.826617821519074, "learning_rate": 7.471950655131451e-07, "loss": 1.0805, "step": 4381 }, { "epoch": 0.5940486680675117, "grad_norm": 4.976616548441259, "learning_rate": 7.467702163851363e-07, "loss": 1.0886, "step": 4382 }, { "epoch": 0.5941842337151766, "grad_norm": 4.758096272078015, "learning_rate": 7.463454160904927e-07, "loss": 1.0942, "step": 4383 }, { "epoch": 0.5943197993628414, "grad_norm": 4.557822253369825, "learning_rate": 7.459206647111331e-07, "loss": 1.1293, "step": 4384 }, { "epoch": 0.5944553650105063, "grad_norm": 3.671549432252443, "learning_rate": 7.454959623289682e-07, "loss": 1.0845, "step": 4385 }, { "epoch": 0.5945909306581713, "grad_norm": 5.7180382900747215, "learning_rate": 7.450713090258976e-07, "loss": 1.0709, "step": 4386 }, { "epoch": 0.5947264963058361, "grad_norm": 4.8770370948301744, "learning_rate": 7.44646704883813e-07, "loss": 1.1151, "step": 4387 }, { "epoch": 0.594862061953501, "grad_norm": 3.873640009573609, "learning_rate": 7.442221499845955e-07, "loss": 1.0991, "step": 4388 }, { "epoch": 0.5949976276011658, "grad_norm": 6.217733230947731, "learning_rate": 7.437976444101177e-07, "loss": 1.105, "step": 4389 }, { "epoch": 0.5951331932488307, "grad_norm": 3.6294869173577897, "learning_rate": 7.433731882422418e-07, "loss": 1.0837, "step": 4390 }, { "epoch": 0.5952687588964957, "grad_norm": 4.681073801392388, "learning_rate": 7.429487815628206e-07, "loss": 1.0808, "step": 4391 }, { "epoch": 0.5954043245441605, "grad_norm": 4.827387751017235, "learning_rate": 7.425244244536981e-07, "loss": 1.1438, "step": 4392 }, { "epoch": 0.5955398901918254, "grad_norm": 14.42505418958452, "learning_rate": 7.421001169967076e-07, "loss": 1.136, "step": 4393 }, { "epoch": 0.5956754558394902, "grad_norm": 6.19692040110925, "learning_rate": 7.416758592736742e-07, "loss": 1.0904, "step": 4394 }, { "epoch": 0.5958110214871551, "grad_norm": 4.4784647237707285, "learning_rate": 7.41251651366412e-07, "loss": 1.1016, "step": 4395 }, { "epoch": 0.5959465871348201, "grad_norm": 12.89318434960375, "learning_rate": 7.408274933567267e-07, "loss": 1.0907, "step": 4396 }, { "epoch": 0.5960821527824849, "grad_norm": 4.256101332913263, "learning_rate": 7.404033853264131e-07, "loss": 1.1011, "step": 4397 }, { "epoch": 0.5962177184301498, "grad_norm": 15.05075068309934, "learning_rate": 7.399793273572578e-07, "loss": 1.1571, "step": 4398 }, { "epoch": 0.5963532840778146, "grad_norm": 4.691020967119557, "learning_rate": 7.395553195310364e-07, "loss": 1.0989, "step": 4399 }, { "epoch": 0.5964888497254796, "grad_norm": 4.6447866146797425, "learning_rate": 7.391313619295163e-07, "loss": 1.0905, "step": 4400 }, { "epoch": 0.5966244153731445, "grad_norm": 4.642734938711342, "learning_rate": 7.387074546344536e-07, "loss": 1.1043, "step": 4401 }, { "epoch": 0.5967599810208093, "grad_norm": 5.975275698828645, "learning_rate": 7.382835977275959e-07, "loss": 1.1274, "step": 4402 }, { "epoch": 0.5968955466684742, "grad_norm": 4.787408847308515, "learning_rate": 7.378597912906805e-07, "loss": 1.1037, "step": 4403 }, { "epoch": 0.5970311123161391, "grad_norm": 4.434917081387196, "learning_rate": 7.374360354054348e-07, "loss": 1.0761, "step": 4404 }, { "epoch": 0.597166677963804, "grad_norm": 6.2368133006338, "learning_rate": 7.370123301535777e-07, "loss": 1.1106, "step": 4405 }, { "epoch": 0.5973022436114689, "grad_norm": 4.9018052319133485, "learning_rate": 7.365886756168165e-07, "loss": 1.1508, "step": 4406 }, { "epoch": 0.5974378092591337, "grad_norm": 101.7986054098021, "learning_rate": 7.3616507187685e-07, "loss": 1.1218, "step": 4407 }, { "epoch": 0.5975733749067986, "grad_norm": 6.489343845635815, "learning_rate": 7.357415190153666e-07, "loss": 1.069, "step": 4408 }, { "epoch": 0.5977089405544636, "grad_norm": 6.7266765353370115, "learning_rate": 7.353180171140455e-07, "loss": 1.1237, "step": 4409 }, { "epoch": 0.5978445062021284, "grad_norm": 5.491794706954078, "learning_rate": 7.348945662545556e-07, "loss": 1.1008, "step": 4410 }, { "epoch": 0.5979800718497933, "grad_norm": 4.4500656524308395, "learning_rate": 7.34471166518556e-07, "loss": 1.1157, "step": 4411 }, { "epoch": 0.5981156374974581, "grad_norm": 4.7875644133237785, "learning_rate": 7.340478179876957e-07, "loss": 1.138, "step": 4412 }, { "epoch": 0.598251203145123, "grad_norm": 5.52105731506731, "learning_rate": 7.336245207436147e-07, "loss": 1.1169, "step": 4413 }, { "epoch": 0.598386768792788, "grad_norm": 4.360756460501011, "learning_rate": 7.332012748679419e-07, "loss": 1.0875, "step": 4414 }, { "epoch": 0.5985223344404528, "grad_norm": 4.870823870109173, "learning_rate": 7.327780804422977e-07, "loss": 1.1329, "step": 4415 }, { "epoch": 0.5986579000881177, "grad_norm": 6.543695510268159, "learning_rate": 7.32354937548291e-07, "loss": 1.1264, "step": 4416 }, { "epoch": 0.5987934657357825, "grad_norm": 4.532883313525607, "learning_rate": 7.319318462675223e-07, "loss": 1.0891, "step": 4417 }, { "epoch": 0.5989290313834474, "grad_norm": 4.090846414896058, "learning_rate": 7.315088066815809e-07, "loss": 1.0841, "step": 4418 }, { "epoch": 0.5990645970311124, "grad_norm": 5.352291781432344, "learning_rate": 7.310858188720466e-07, "loss": 1.0775, "step": 4419 }, { "epoch": 0.5992001626787772, "grad_norm": 14.44311507329849, "learning_rate": 7.306628829204897e-07, "loss": 1.0902, "step": 4420 }, { "epoch": 0.5993357283264421, "grad_norm": 6.676039598576404, "learning_rate": 7.302399989084695e-07, "loss": 1.1371, "step": 4421 }, { "epoch": 0.5994712939741069, "grad_norm": 4.477023706319101, "learning_rate": 7.298171669175365e-07, "loss": 1.1035, "step": 4422 }, { "epoch": 0.5996068596217718, "grad_norm": 4.531439474851852, "learning_rate": 7.293943870292299e-07, "loss": 1.1203, "step": 4423 }, { "epoch": 0.5997424252694368, "grad_norm": 4.958921619932146, "learning_rate": 7.289716593250798e-07, "loss": 1.1325, "step": 4424 }, { "epoch": 0.5998779909171016, "grad_norm": 5.950668578260652, "learning_rate": 7.285489838866057e-07, "loss": 1.1146, "step": 4425 }, { "epoch": 0.6000135565647665, "grad_norm": 4.5917180410996865, "learning_rate": 7.281263607953177e-07, "loss": 1.0799, "step": 4426 }, { "epoch": 0.6001491222124313, "grad_norm": 4.483949597159932, "learning_rate": 7.277037901327145e-07, "loss": 1.1003, "step": 4427 }, { "epoch": 0.6002846878600963, "grad_norm": 5.246015485186964, "learning_rate": 7.272812719802865e-07, "loss": 1.118, "step": 4428 }, { "epoch": 0.6004202535077612, "grad_norm": 4.931187039608459, "learning_rate": 7.268588064195122e-07, "loss": 1.1041, "step": 4429 }, { "epoch": 0.600555819155426, "grad_norm": 4.629696520162935, "learning_rate": 7.264363935318612e-07, "loss": 1.0992, "step": 4430 }, { "epoch": 0.6006913848030909, "grad_norm": 8.80725202068247, "learning_rate": 7.260140333987925e-07, "loss": 1.1224, "step": 4431 }, { "epoch": 0.6008269504507557, "grad_norm": 5.246354848975095, "learning_rate": 7.255917261017543e-07, "loss": 1.1001, "step": 4432 }, { "epoch": 0.6009625160984207, "grad_norm": 16.731623771458473, "learning_rate": 7.25169471722186e-07, "loss": 1.1055, "step": 4433 }, { "epoch": 0.6010980817460856, "grad_norm": 5.094208082259241, "learning_rate": 7.247472703415154e-07, "loss": 1.1301, "step": 4434 }, { "epoch": 0.6012336473937504, "grad_norm": 6.076367659887049, "learning_rate": 7.243251220411612e-07, "loss": 1.1659, "step": 4435 }, { "epoch": 0.6013692130414153, "grad_norm": 4.901884251471859, "learning_rate": 7.23903026902531e-07, "loss": 1.1168, "step": 4436 }, { "epoch": 0.6015047786890801, "grad_norm": 3.7403131849469755, "learning_rate": 7.234809850070231e-07, "loss": 1.0718, "step": 4437 }, { "epoch": 0.6016403443367451, "grad_norm": 5.2397883679022845, "learning_rate": 7.230589964360242e-07, "loss": 1.1004, "step": 4438 }, { "epoch": 0.60177590998441, "grad_norm": 3.9719041980173024, "learning_rate": 7.226370612709119e-07, "loss": 1.1029, "step": 4439 }, { "epoch": 0.6019114756320748, "grad_norm": 5.764049784713366, "learning_rate": 7.222151795930528e-07, "loss": 1.0896, "step": 4440 }, { "epoch": 0.6020470412797397, "grad_norm": 5.302605824650714, "learning_rate": 7.21793351483804e-07, "loss": 1.1335, "step": 4441 }, { "epoch": 0.6021826069274046, "grad_norm": 6.469622960393899, "learning_rate": 7.213715770245108e-07, "loss": 1.1296, "step": 4442 }, { "epoch": 0.6023181725750695, "grad_norm": 5.335276965069379, "learning_rate": 7.209498562965101e-07, "loss": 1.1262, "step": 4443 }, { "epoch": 0.6024537382227344, "grad_norm": 7.210886083097158, "learning_rate": 7.205281893811264e-07, "loss": 1.1023, "step": 4444 }, { "epoch": 0.6025893038703992, "grad_norm": 4.592025117080773, "learning_rate": 7.201065763596758e-07, "loss": 1.1115, "step": 4445 }, { "epoch": 0.6027248695180641, "grad_norm": 4.509500450061313, "learning_rate": 7.196850173134628e-07, "loss": 1.1016, "step": 4446 }, { "epoch": 0.602860435165729, "grad_norm": 6.386303706860891, "learning_rate": 7.192635123237809e-07, "loss": 1.0931, "step": 4447 }, { "epoch": 0.6029960008133939, "grad_norm": 4.865778083535216, "learning_rate": 7.188420614719152e-07, "loss": 1.1386, "step": 4448 }, { "epoch": 0.6031315664610588, "grad_norm": 5.006656294080967, "learning_rate": 7.184206648391381e-07, "loss": 1.1218, "step": 4449 }, { "epoch": 0.6032671321087236, "grad_norm": 4.748440385451038, "learning_rate": 7.179993225067136e-07, "loss": 1.1464, "step": 4450 }, { "epoch": 0.6034026977563886, "grad_norm": 3.8772505631763354, "learning_rate": 7.175780345558934e-07, "loss": 1.0937, "step": 4451 }, { "epoch": 0.6035382634040534, "grad_norm": 8.301445358440832, "learning_rate": 7.171568010679203e-07, "loss": 1.1121, "step": 4452 }, { "epoch": 0.6036738290517183, "grad_norm": 4.815550376315478, "learning_rate": 7.167356221240251e-07, "loss": 1.0888, "step": 4453 }, { "epoch": 0.6038093946993832, "grad_norm": 5.719947587828329, "learning_rate": 7.163144978054296e-07, "loss": 1.1075, "step": 4454 }, { "epoch": 0.603944960347048, "grad_norm": 8.689269421233213, "learning_rate": 7.158934281933435e-07, "loss": 1.0952, "step": 4455 }, { "epoch": 0.604080525994713, "grad_norm": 5.721659927514625, "learning_rate": 7.154724133689676e-07, "loss": 1.1234, "step": 4456 }, { "epoch": 0.6042160916423778, "grad_norm": 5.943186774597705, "learning_rate": 7.150514534134905e-07, "loss": 1.1387, "step": 4457 }, { "epoch": 0.6043516572900427, "grad_norm": 7.917144691851764, "learning_rate": 7.146305484080916e-07, "loss": 1.0794, "step": 4458 }, { "epoch": 0.6044872229377076, "grad_norm": 6.955491731874711, "learning_rate": 7.142096984339392e-07, "loss": 1.1227, "step": 4459 }, { "epoch": 0.6046227885853724, "grad_norm": 5.909027391345318, "learning_rate": 7.137889035721898e-07, "loss": 1.09, "step": 4460 }, { "epoch": 0.6047583542330374, "grad_norm": 3.628174984151734, "learning_rate": 7.133681639039917e-07, "loss": 1.1068, "step": 4461 }, { "epoch": 0.6048939198807022, "grad_norm": 6.512385939346876, "learning_rate": 7.129474795104802e-07, "loss": 1.1045, "step": 4462 }, { "epoch": 0.6050294855283671, "grad_norm": 13.003598529003972, "learning_rate": 7.12526850472782e-07, "loss": 1.1066, "step": 4463 }, { "epoch": 0.605165051176032, "grad_norm": 6.200994306273764, "learning_rate": 7.121062768720109e-07, "loss": 1.082, "step": 4464 }, { "epoch": 0.6053006168236968, "grad_norm": 4.862898493231653, "learning_rate": 7.116857587892724e-07, "loss": 1.0662, "step": 4465 }, { "epoch": 0.6054361824713618, "grad_norm": 5.381448483675793, "learning_rate": 7.112652963056589e-07, "loss": 1.065, "step": 4466 }, { "epoch": 0.6055717481190266, "grad_norm": 4.745635974203882, "learning_rate": 7.108448895022544e-07, "loss": 1.0764, "step": 4467 }, { "epoch": 0.6057073137666915, "grad_norm": 5.441773488383502, "learning_rate": 7.104245384601303e-07, "loss": 1.0883, "step": 4468 }, { "epoch": 0.6058428794143564, "grad_norm": 3.976340636778258, "learning_rate": 7.100042432603481e-07, "loss": 1.0906, "step": 4469 }, { "epoch": 0.6059784450620213, "grad_norm": 7.452454678781956, "learning_rate": 7.095840039839587e-07, "loss": 1.0947, "step": 4470 }, { "epoch": 0.6061140107096862, "grad_norm": 6.8209607698421335, "learning_rate": 7.091638207120015e-07, "loss": 1.1299, "step": 4471 }, { "epoch": 0.606249576357351, "grad_norm": 5.423416459669621, "learning_rate": 7.087436935255058e-07, "loss": 1.108, "step": 4472 }, { "epoch": 0.6063851420050159, "grad_norm": 8.242480576546344, "learning_rate": 7.083236225054901e-07, "loss": 1.1334, "step": 4473 }, { "epoch": 0.6065207076526808, "grad_norm": 4.584783554502356, "learning_rate": 7.079036077329612e-07, "loss": 1.1131, "step": 4474 }, { "epoch": 0.6066562733003457, "grad_norm": 4.3555336065830055, "learning_rate": 7.074836492889158e-07, "loss": 1.1331, "step": 4475 }, { "epoch": 0.6067918389480106, "grad_norm": 4.864303352068669, "learning_rate": 7.070637472543397e-07, "loss": 1.1151, "step": 4476 }, { "epoch": 0.6069274045956754, "grad_norm": 7.562086330477637, "learning_rate": 7.066439017102076e-07, "loss": 1.1237, "step": 4477 }, { "epoch": 0.6070629702433403, "grad_norm": 9.585853882282192, "learning_rate": 7.062241127374838e-07, "loss": 1.0506, "step": 4478 }, { "epoch": 0.6071985358910053, "grad_norm": 5.8632397474905185, "learning_rate": 7.058043804171203e-07, "loss": 1.1135, "step": 4479 }, { "epoch": 0.6073341015386701, "grad_norm": 4.674137679008877, "learning_rate": 7.053847048300603e-07, "loss": 1.0717, "step": 4480 }, { "epoch": 0.607469667186335, "grad_norm": 7.08135087724937, "learning_rate": 7.04965086057234e-07, "loss": 1.1202, "step": 4481 }, { "epoch": 0.6076052328339999, "grad_norm": 111.34640850127147, "learning_rate": 7.045455241795624e-07, "loss": 1.0929, "step": 4482 }, { "epoch": 0.6077407984816647, "grad_norm": 4.445500575024244, "learning_rate": 7.041260192779539e-07, "loss": 1.1223, "step": 4483 }, { "epoch": 0.6078763641293297, "grad_norm": 7.653000008220138, "learning_rate": 7.037065714333075e-07, "loss": 1.1164, "step": 4484 }, { "epoch": 0.6080119297769945, "grad_norm": 5.467298728456033, "learning_rate": 7.032871807265096e-07, "loss": 1.1181, "step": 4485 }, { "epoch": 0.6081474954246594, "grad_norm": 5.153498584087681, "learning_rate": 7.028678472384373e-07, "loss": 1.1022, "step": 4486 }, { "epoch": 0.6082830610723243, "grad_norm": 6.9886346189579625, "learning_rate": 7.02448571049955e-07, "loss": 1.1086, "step": 4487 }, { "epoch": 0.6084186267199891, "grad_norm": 3.7650557159982525, "learning_rate": 7.020293522419168e-07, "loss": 1.1235, "step": 4488 }, { "epoch": 0.6085541923676541, "grad_norm": 6.610089374919249, "learning_rate": 7.016101908951663e-07, "loss": 1.1226, "step": 4489 }, { "epoch": 0.6086897580153189, "grad_norm": 5.659179846129685, "learning_rate": 7.011910870905349e-07, "loss": 1.0397, "step": 4490 }, { "epoch": 0.6088253236629838, "grad_norm": 6.508528441953236, "learning_rate": 7.00772040908844e-07, "loss": 1.0835, "step": 4491 }, { "epoch": 0.6089608893106487, "grad_norm": 5.177278788102354, "learning_rate": 7.003530524309025e-07, "loss": 1.0673, "step": 4492 }, { "epoch": 0.6090964549583135, "grad_norm": 10.38367466806621, "learning_rate": 6.999341217375103e-07, "loss": 1.0935, "step": 4493 }, { "epoch": 0.6092320206059785, "grad_norm": 4.4252071750465625, "learning_rate": 6.995152489094535e-07, "loss": 1.1123, "step": 4494 }, { "epoch": 0.6093675862536433, "grad_norm": 8.309063681734726, "learning_rate": 6.990964340275095e-07, "loss": 1.1314, "step": 4495 }, { "epoch": 0.6095031519013082, "grad_norm": 5.187194250679384, "learning_rate": 6.986776771724427e-07, "loss": 1.151, "step": 4496 }, { "epoch": 0.6096387175489731, "grad_norm": 4.3201487115704476, "learning_rate": 6.982589784250077e-07, "loss": 1.1316, "step": 4497 }, { "epoch": 0.609774283196638, "grad_norm": 4.384140608881688, "learning_rate": 6.978403378659466e-07, "loss": 1.0927, "step": 4498 }, { "epoch": 0.6099098488443029, "grad_norm": 5.529011280818706, "learning_rate": 6.974217555759913e-07, "loss": 1.1032, "step": 4499 }, { "epoch": 0.6100454144919677, "grad_norm": 9.254205144045683, "learning_rate": 6.970032316358623e-07, "loss": 1.1275, "step": 4500 }, { "epoch": 0.6101809801396326, "grad_norm": 5.429422148254542, "learning_rate": 6.965847661262681e-07, "loss": 1.1288, "step": 4501 }, { "epoch": 0.6103165457872975, "grad_norm": 3.975989413849654, "learning_rate": 6.96166359127907e-07, "loss": 1.0786, "step": 4502 }, { "epoch": 0.6104521114349624, "grad_norm": 4.690728661945474, "learning_rate": 6.957480107214648e-07, "loss": 1.0915, "step": 4503 }, { "epoch": 0.6105876770826273, "grad_norm": 4.910065365660897, "learning_rate": 6.953297209876174e-07, "loss": 1.0824, "step": 4504 }, { "epoch": 0.6107232427302921, "grad_norm": 3.9705029940318917, "learning_rate": 6.949114900070284e-07, "loss": 1.061, "step": 4505 }, { "epoch": 0.610858808377957, "grad_norm": 5.731572572184886, "learning_rate": 6.944933178603503e-07, "loss": 1.1125, "step": 4506 }, { "epoch": 0.610994374025622, "grad_norm": 5.025387470228882, "learning_rate": 6.940752046282242e-07, "loss": 1.1033, "step": 4507 }, { "epoch": 0.6111299396732868, "grad_norm": 4.576978472005297, "learning_rate": 6.936571503912803e-07, "loss": 1.1218, "step": 4508 }, { "epoch": 0.6112655053209517, "grad_norm": 4.2796520059895, "learning_rate": 6.932391552301366e-07, "loss": 1.1356, "step": 4509 }, { "epoch": 0.6114010709686165, "grad_norm": 5.568754230141302, "learning_rate": 6.928212192254006e-07, "loss": 1.1153, "step": 4510 }, { "epoch": 0.6115366366162814, "grad_norm": 4.305279787012166, "learning_rate": 6.924033424576674e-07, "loss": 1.1473, "step": 4511 }, { "epoch": 0.6116722022639464, "grad_norm": 4.330110908144142, "learning_rate": 6.91985525007522e-07, "loss": 1.0732, "step": 4512 }, { "epoch": 0.6118077679116112, "grad_norm": 3.731526861652547, "learning_rate": 6.915677669555363e-07, "loss": 1.1339, "step": 4513 }, { "epoch": 0.6119433335592761, "grad_norm": 5.182403908027282, "learning_rate": 6.911500683822726e-07, "loss": 1.1331, "step": 4514 }, { "epoch": 0.6120788992069409, "grad_norm": 4.473051969016956, "learning_rate": 6.907324293682803e-07, "loss": 1.163, "step": 4515 }, { "epoch": 0.6122144648546058, "grad_norm": 8.734277540539058, "learning_rate": 6.903148499940974e-07, "loss": 1.0824, "step": 4516 }, { "epoch": 0.6123500305022708, "grad_norm": 6.663157574325358, "learning_rate": 6.898973303402516e-07, "loss": 1.0848, "step": 4517 }, { "epoch": 0.6124855961499356, "grad_norm": 5.14430057122811, "learning_rate": 6.894798704872574e-07, "loss": 1.1015, "step": 4518 }, { "epoch": 0.6126211617976005, "grad_norm": 3.870672250097099, "learning_rate": 6.890624705156194e-07, "loss": 1.1092, "step": 4519 }, { "epoch": 0.6127567274452653, "grad_norm": 4.987774048333947, "learning_rate": 6.886451305058293e-07, "loss": 1.0904, "step": 4520 }, { "epoch": 0.6128922930929303, "grad_norm": 5.991983916425166, "learning_rate": 6.882278505383685e-07, "loss": 1.1266, "step": 4521 }, { "epoch": 0.6130278587405952, "grad_norm": 5.35996545910925, "learning_rate": 6.878106306937053e-07, "loss": 1.0725, "step": 4522 }, { "epoch": 0.61316342438826, "grad_norm": 4.015814337189134, "learning_rate": 6.873934710522979e-07, "loss": 1.0844, "step": 4523 }, { "epoch": 0.6132989900359249, "grad_norm": 6.532444298601157, "learning_rate": 6.86976371694592e-07, "loss": 1.1332, "step": 4524 }, { "epoch": 0.6134345556835897, "grad_norm": 7.036418780431537, "learning_rate": 6.865593327010221e-07, "loss": 1.0556, "step": 4525 }, { "epoch": 0.6135701213312547, "grad_norm": 5.066777390417818, "learning_rate": 6.861423541520104e-07, "loss": 1.1525, "step": 4526 }, { "epoch": 0.6137056869789196, "grad_norm": 4.468372124105151, "learning_rate": 6.857254361279688e-07, "loss": 1.0858, "step": 4527 }, { "epoch": 0.6138412526265844, "grad_norm": 4.520280300746366, "learning_rate": 6.853085787092956e-07, "loss": 1.0931, "step": 4528 }, { "epoch": 0.6139768182742493, "grad_norm": 6.594033367138388, "learning_rate": 6.848917819763793e-07, "loss": 1.1369, "step": 4529 }, { "epoch": 0.6141123839219141, "grad_norm": 4.675007742566754, "learning_rate": 6.844750460095956e-07, "loss": 1.0859, "step": 4530 }, { "epoch": 0.6142479495695791, "grad_norm": 4.4576539064789795, "learning_rate": 6.840583708893083e-07, "loss": 1.1128, "step": 4531 }, { "epoch": 0.614383515217244, "grad_norm": 4.16957918815982, "learning_rate": 6.836417566958707e-07, "loss": 1.1685, "step": 4532 }, { "epoch": 0.6145190808649088, "grad_norm": 4.157805933981122, "learning_rate": 6.832252035096227e-07, "loss": 1.0991, "step": 4533 }, { "epoch": 0.6146546465125737, "grad_norm": 6.63162997499914, "learning_rate": 6.82808711410894e-07, "loss": 1.1676, "step": 4534 }, { "epoch": 0.6147902121602385, "grad_norm": 4.336904777091727, "learning_rate": 6.823922804800016e-07, "loss": 1.096, "step": 4535 }, { "epoch": 0.6149257778079035, "grad_norm": 4.057510429219721, "learning_rate": 6.819759107972507e-07, "loss": 1.1577, "step": 4536 }, { "epoch": 0.6150613434555684, "grad_norm": 6.216138114128963, "learning_rate": 6.815596024429351e-07, "loss": 1.0888, "step": 4537 }, { "epoch": 0.6151969091032332, "grad_norm": 6.614742818634075, "learning_rate": 6.811433554973366e-07, "loss": 1.1648, "step": 4538 }, { "epoch": 0.6153324747508981, "grad_norm": 4.513790764282729, "learning_rate": 6.807271700407251e-07, "loss": 1.093, "step": 4539 }, { "epoch": 0.615468040398563, "grad_norm": 6.263413674650968, "learning_rate": 6.803110461533587e-07, "loss": 1.0635, "step": 4540 }, { "epoch": 0.6156036060462279, "grad_norm": 6.813678109601586, "learning_rate": 6.798949839154834e-07, "loss": 1.1131, "step": 4541 }, { "epoch": 0.6157391716938928, "grad_norm": 4.460962070259312, "learning_rate": 6.79478983407334e-07, "loss": 1.0711, "step": 4542 }, { "epoch": 0.6158747373415576, "grad_norm": 4.767364002627803, "learning_rate": 6.790630447091325e-07, "loss": 1.0898, "step": 4543 }, { "epoch": 0.6160103029892225, "grad_norm": 5.156752604323467, "learning_rate": 6.786471679010895e-07, "loss": 1.0981, "step": 4544 }, { "epoch": 0.6161458686368874, "grad_norm": 4.90255027598807, "learning_rate": 6.782313530634036e-07, "loss": 1.1168, "step": 4545 }, { "epoch": 0.6162814342845523, "grad_norm": 5.016725124487034, "learning_rate": 6.77815600276261e-07, "loss": 1.1068, "step": 4546 }, { "epoch": 0.6164169999322172, "grad_norm": 4.309903800909969, "learning_rate": 6.773999096198373e-07, "loss": 1.097, "step": 4547 }, { "epoch": 0.616552565579882, "grad_norm": 5.326687179204575, "learning_rate": 6.769842811742941e-07, "loss": 1.0874, "step": 4548 }, { "epoch": 0.616688131227547, "grad_norm": 7.816162171813349, "learning_rate": 6.765687150197827e-07, "loss": 1.1207, "step": 4549 }, { "epoch": 0.6168236968752118, "grad_norm": 3.9460974954904566, "learning_rate": 6.761532112364414e-07, "loss": 1.0994, "step": 4550 }, { "epoch": 0.6169592625228767, "grad_norm": 5.235941758728398, "learning_rate": 6.757377699043976e-07, "loss": 1.1058, "step": 4551 }, { "epoch": 0.6170948281705416, "grad_norm": 3.9857883424411447, "learning_rate": 6.753223911037646e-07, "loss": 1.143, "step": 4552 }, { "epoch": 0.6172303938182064, "grad_norm": 5.34900064498815, "learning_rate": 6.749070749146461e-07, "loss": 1.1362, "step": 4553 }, { "epoch": 0.6173659594658714, "grad_norm": 5.398498003282135, "learning_rate": 6.744918214171318e-07, "loss": 1.1108, "step": 4554 }, { "epoch": 0.6175015251135362, "grad_norm": 8.631936332833524, "learning_rate": 6.740766306913007e-07, "loss": 1.0983, "step": 4555 }, { "epoch": 0.6176370907612011, "grad_norm": 5.861305557817497, "learning_rate": 6.736615028172183e-07, "loss": 1.158, "step": 4556 }, { "epoch": 0.617772656408866, "grad_norm": 3.6236013138055387, "learning_rate": 6.732464378749394e-07, "loss": 1.101, "step": 4557 }, { "epoch": 0.6179082220565308, "grad_norm": 4.4707619379558095, "learning_rate": 6.728314359445058e-07, "loss": 1.1766, "step": 4558 }, { "epoch": 0.6180437877041958, "grad_norm": 6.334680682125294, "learning_rate": 6.724164971059469e-07, "loss": 1.0935, "step": 4559 }, { "epoch": 0.6181793533518606, "grad_norm": 4.905068535120869, "learning_rate": 6.720016214392812e-07, "loss": 1.0963, "step": 4560 }, { "epoch": 0.6183149189995255, "grad_norm": 6.666547211104566, "learning_rate": 6.715868090245131e-07, "loss": 1.0984, "step": 4561 }, { "epoch": 0.6184504846471904, "grad_norm": 5.76611531188992, "learning_rate": 6.711720599416373e-07, "loss": 1.0565, "step": 4562 }, { "epoch": 0.6185860502948552, "grad_norm": 4.366808037539299, "learning_rate": 6.707573742706334e-07, "loss": 1.1147, "step": 4563 }, { "epoch": 0.6187216159425202, "grad_norm": 5.870488112460735, "learning_rate": 6.703427520914715e-07, "loss": 1.0753, "step": 4564 }, { "epoch": 0.6188571815901851, "grad_norm": 6.122313163120966, "learning_rate": 6.699281934841073e-07, "loss": 1.1271, "step": 4565 }, { "epoch": 0.6189927472378499, "grad_norm": 5.430633172267324, "learning_rate": 6.69513698528486e-07, "loss": 1.1133, "step": 4566 }, { "epoch": 0.6191283128855148, "grad_norm": 4.485445976342643, "learning_rate": 6.69099267304539e-07, "loss": 1.113, "step": 4567 }, { "epoch": 0.6192638785331797, "grad_norm": 4.96251796243927, "learning_rate": 6.686848998921864e-07, "loss": 1.0961, "step": 4568 }, { "epoch": 0.6193994441808446, "grad_norm": 4.952088167478997, "learning_rate": 6.682705963713355e-07, "loss": 1.1135, "step": 4569 }, { "epoch": 0.6195350098285095, "grad_norm": 4.3921768000860695, "learning_rate": 6.678563568218816e-07, "loss": 1.104, "step": 4570 }, { "epoch": 0.6196705754761743, "grad_norm": 4.210853018172589, "learning_rate": 6.674421813237079e-07, "loss": 1.147, "step": 4571 }, { "epoch": 0.6198061411238392, "grad_norm": 4.85382198173681, "learning_rate": 6.670280699566841e-07, "loss": 1.0997, "step": 4572 }, { "epoch": 0.6199417067715041, "grad_norm": 6.212617479991437, "learning_rate": 6.666140228006687e-07, "loss": 1.0764, "step": 4573 }, { "epoch": 0.620077272419169, "grad_norm": 5.074820563816849, "learning_rate": 6.662000399355075e-07, "loss": 1.138, "step": 4574 }, { "epoch": 0.6202128380668339, "grad_norm": 4.243064827054636, "learning_rate": 6.657861214410338e-07, "loss": 1.0852, "step": 4575 }, { "epoch": 0.6203484037144987, "grad_norm": 4.501452098400845, "learning_rate": 6.653722673970681e-07, "loss": 1.1216, "step": 4576 }, { "epoch": 0.6204839693621637, "grad_norm": 12.23824309968029, "learning_rate": 6.649584778834196e-07, "loss": 1.0852, "step": 4577 }, { "epoch": 0.6206195350098285, "grad_norm": 7.031786245851898, "learning_rate": 6.645447529798838e-07, "loss": 1.0952, "step": 4578 }, { "epoch": 0.6207551006574934, "grad_norm": 5.620507891835343, "learning_rate": 6.641310927662447e-07, "loss": 1.0953, "step": 4579 }, { "epoch": 0.6208906663051583, "grad_norm": 8.409902157864359, "learning_rate": 6.637174973222727e-07, "loss": 1.104, "step": 4580 }, { "epoch": 0.6210262319528231, "grad_norm": 4.781166932969708, "learning_rate": 6.633039667277274e-07, "loss": 1.1218, "step": 4581 }, { "epoch": 0.6211617976004881, "grad_norm": 3.9459918550066186, "learning_rate": 6.62890501062354e-07, "loss": 1.0966, "step": 4582 }, { "epoch": 0.6212973632481529, "grad_norm": 4.958730320192523, "learning_rate": 6.624771004058868e-07, "loss": 1.1313, "step": 4583 }, { "epoch": 0.6214329288958178, "grad_norm": 7.143303636689911, "learning_rate": 6.620637648380463e-07, "loss": 1.1146, "step": 4584 }, { "epoch": 0.6215684945434827, "grad_norm": 4.648010125199305, "learning_rate": 6.616504944385415e-07, "loss": 1.0826, "step": 4585 }, { "epoch": 0.6217040601911475, "grad_norm": 4.395006016950565, "learning_rate": 6.612372892870681e-07, "loss": 1.1136, "step": 4586 }, { "epoch": 0.6218396258388125, "grad_norm": 6.035242126994744, "learning_rate": 6.608241494633092e-07, "loss": 1.1016, "step": 4587 }, { "epoch": 0.6219751914864773, "grad_norm": 7.719269455444584, "learning_rate": 6.604110750469358e-07, "loss": 1.0979, "step": 4588 }, { "epoch": 0.6221107571341422, "grad_norm": 5.230854323834046, "learning_rate": 6.599980661176059e-07, "loss": 1.1113, "step": 4589 }, { "epoch": 0.6222463227818071, "grad_norm": 5.274700651970217, "learning_rate": 6.595851227549656e-07, "loss": 1.09, "step": 4590 }, { "epoch": 0.622381888429472, "grad_norm": 3.515394174766526, "learning_rate": 6.591722450386468e-07, "loss": 1.1042, "step": 4591 }, { "epoch": 0.6225174540771369, "grad_norm": 5.078844618653557, "learning_rate": 6.587594330482707e-07, "loss": 1.143, "step": 4592 }, { "epoch": 0.6226530197248017, "grad_norm": 5.281526381804316, "learning_rate": 6.583466868634437e-07, "loss": 1.1157, "step": 4593 }, { "epoch": 0.6227885853724666, "grad_norm": 5.176393099178137, "learning_rate": 6.579340065637619e-07, "loss": 1.0963, "step": 4594 }, { "epoch": 0.6229241510201315, "grad_norm": 7.452569320932888, "learning_rate": 6.575213922288064e-07, "loss": 1.0849, "step": 4595 }, { "epoch": 0.6230597166677964, "grad_norm": 5.236784060386885, "learning_rate": 6.571088439381475e-07, "loss": 1.1224, "step": 4596 }, { "epoch": 0.6231952823154613, "grad_norm": 13.943186310754676, "learning_rate": 6.566963617713412e-07, "loss": 1.1419, "step": 4597 }, { "epoch": 0.6233308479631261, "grad_norm": 8.741511404689474, "learning_rate": 6.562839458079315e-07, "loss": 1.1102, "step": 4598 }, { "epoch": 0.623466413610791, "grad_norm": 5.645438277021203, "learning_rate": 6.558715961274501e-07, "loss": 1.097, "step": 4599 }, { "epoch": 0.623601979258456, "grad_norm": 5.850698355213961, "learning_rate": 6.554593128094145e-07, "loss": 1.0981, "step": 4600 }, { "epoch": 0.6237375449061208, "grad_norm": 6.244894045550112, "learning_rate": 6.550470959333313e-07, "loss": 1.0913, "step": 4601 }, { "epoch": 0.6238731105537857, "grad_norm": 5.850652808992895, "learning_rate": 6.546349455786925e-07, "loss": 1.105, "step": 4602 }, { "epoch": 0.6240086762014505, "grad_norm": 3.4996606043293053, "learning_rate": 6.542228618249784e-07, "loss": 1.1404, "step": 4603 }, { "epoch": 0.6241442418491154, "grad_norm": 4.964488213980168, "learning_rate": 6.538108447516557e-07, "loss": 1.1131, "step": 4604 }, { "epoch": 0.6242798074967804, "grad_norm": 5.504996047397063, "learning_rate": 6.533988944381792e-07, "loss": 1.1297, "step": 4605 }, { "epoch": 0.6244153731444452, "grad_norm": 4.815618340492956, "learning_rate": 6.529870109639899e-07, "loss": 1.094, "step": 4606 }, { "epoch": 0.6245509387921101, "grad_norm": 4.922135862717194, "learning_rate": 6.525751944085166e-07, "loss": 1.1206, "step": 4607 }, { "epoch": 0.6246865044397749, "grad_norm": 11.497930069830153, "learning_rate": 6.521634448511743e-07, "loss": 1.1234, "step": 4608 }, { "epoch": 0.6248220700874398, "grad_norm": 7.407467931562672, "learning_rate": 6.517517623713664e-07, "loss": 1.1431, "step": 4609 }, { "epoch": 0.6249576357351048, "grad_norm": 4.628594865277681, "learning_rate": 6.513401470484817e-07, "loss": 1.0746, "step": 4610 }, { "epoch": 0.6250932013827696, "grad_norm": 10.203221537433596, "learning_rate": 6.50928598961898e-07, "loss": 1.1188, "step": 4611 }, { "epoch": 0.6252287670304345, "grad_norm": 5.026640929403705, "learning_rate": 6.505171181909782e-07, "loss": 1.1147, "step": 4612 }, { "epoch": 0.6253643326780993, "grad_norm": 6.371367174140004, "learning_rate": 6.501057048150738e-07, "loss": 1.1014, "step": 4613 }, { "epoch": 0.6254998983257642, "grad_norm": 5.230783520656706, "learning_rate": 6.496943589135225e-07, "loss": 1.0885, "step": 4614 }, { "epoch": 0.6256354639734292, "grad_norm": 5.444048060990577, "learning_rate": 6.492830805656484e-07, "loss": 1.14, "step": 4615 }, { "epoch": 0.625771029621094, "grad_norm": 5.392698819673076, "learning_rate": 6.488718698507643e-07, "loss": 1.088, "step": 4616 }, { "epoch": 0.6259065952687589, "grad_norm": 5.494930311535556, "learning_rate": 6.484607268481681e-07, "loss": 1.1372, "step": 4617 }, { "epoch": 0.6260421609164237, "grad_norm": 6.068690238314998, "learning_rate": 6.480496516371461e-07, "loss": 1.1311, "step": 4618 }, { "epoch": 0.6261777265640887, "grad_norm": 4.31458142324003, "learning_rate": 6.476386442969703e-07, "loss": 1.1131, "step": 4619 }, { "epoch": 0.6263132922117536, "grad_norm": 5.018000860111607, "learning_rate": 6.472277049069011e-07, "loss": 1.0827, "step": 4620 }, { "epoch": 0.6264488578594184, "grad_norm": 4.914680771762028, "learning_rate": 6.468168335461839e-07, "loss": 1.1083, "step": 4621 }, { "epoch": 0.6265844235070833, "grad_norm": 5.134066894808368, "learning_rate": 6.464060302940528e-07, "loss": 1.0701, "step": 4622 }, { "epoch": 0.6267199891547481, "grad_norm": 5.274478122125835, "learning_rate": 6.459952952297274e-07, "loss": 1.1065, "step": 4623 }, { "epoch": 0.6268555548024131, "grad_norm": 7.345300148857762, "learning_rate": 6.455846284324153e-07, "loss": 1.1271, "step": 4624 }, { "epoch": 0.626991120450078, "grad_norm": 5.896824456628931, "learning_rate": 6.451740299813097e-07, "loss": 1.1049, "step": 4625 }, { "epoch": 0.6271266860977428, "grad_norm": 3.991286143524285, "learning_rate": 6.447634999555919e-07, "loss": 1.1469, "step": 4626 }, { "epoch": 0.6272622517454077, "grad_norm": 5.498903427026845, "learning_rate": 6.443530384344291e-07, "loss": 1.145, "step": 4627 }, { "epoch": 0.6273978173930725, "grad_norm": 4.480645600876426, "learning_rate": 6.439426454969752e-07, "loss": 1.1075, "step": 4628 }, { "epoch": 0.6275333830407375, "grad_norm": 4.948716404381203, "learning_rate": 6.435323212223718e-07, "loss": 1.0897, "step": 4629 }, { "epoch": 0.6276689486884024, "grad_norm": 3.782617484028434, "learning_rate": 6.431220656897463e-07, "loss": 1.08, "step": 4630 }, { "epoch": 0.6278045143360672, "grad_norm": 4.677137093336177, "learning_rate": 6.427118789782136e-07, "loss": 1.1197, "step": 4631 }, { "epoch": 0.6279400799837321, "grad_norm": 4.509383611901748, "learning_rate": 6.423017611668744e-07, "loss": 1.1008, "step": 4632 }, { "epoch": 0.628075645631397, "grad_norm": 6.151691754869829, "learning_rate": 6.418917123348176e-07, "loss": 1.1416, "step": 4633 }, { "epoch": 0.6282112112790619, "grad_norm": 4.465908532904476, "learning_rate": 6.41481732561117e-07, "loss": 1.1054, "step": 4634 }, { "epoch": 0.6283467769267268, "grad_norm": 4.611679674644159, "learning_rate": 6.410718219248344e-07, "loss": 1.1188, "step": 4635 }, { "epoch": 0.6284823425743916, "grad_norm": 4.710397207786699, "learning_rate": 6.406619805050177e-07, "loss": 1.1037, "step": 4636 }, { "epoch": 0.6286179082220565, "grad_norm": 3.908142234013787, "learning_rate": 6.402522083807016e-07, "loss": 1.1401, "step": 4637 }, { "epoch": 0.6287534738697214, "grad_norm": 4.776850667800949, "learning_rate": 6.398425056309073e-07, "loss": 1.0966, "step": 4638 }, { "epoch": 0.6288890395173863, "grad_norm": 3.9266379125660062, "learning_rate": 6.394328723346433e-07, "loss": 1.1004, "step": 4639 }, { "epoch": 0.6290246051650512, "grad_norm": 5.420345861532475, "learning_rate": 6.390233085709034e-07, "loss": 1.1358, "step": 4640 }, { "epoch": 0.629160170812716, "grad_norm": 4.774516057694799, "learning_rate": 6.386138144186693e-07, "loss": 1.1094, "step": 4641 }, { "epoch": 0.629295736460381, "grad_norm": 7.287379618441988, "learning_rate": 6.382043899569083e-07, "loss": 1.0829, "step": 4642 }, { "epoch": 0.6294313021080459, "grad_norm": 9.749408819944113, "learning_rate": 6.377950352645748e-07, "loss": 1.0909, "step": 4643 }, { "epoch": 0.6295668677557107, "grad_norm": 5.466555243869416, "learning_rate": 6.373857504206099e-07, "loss": 1.1183, "step": 4644 }, { "epoch": 0.6297024334033756, "grad_norm": 3.750946713656373, "learning_rate": 6.369765355039405e-07, "loss": 1.1012, "step": 4645 }, { "epoch": 0.6298379990510404, "grad_norm": 4.181175982291623, "learning_rate": 6.365673905934809e-07, "loss": 1.0765, "step": 4646 }, { "epoch": 0.6299735646987054, "grad_norm": 4.365850662108153, "learning_rate": 6.361583157681309e-07, "loss": 1.0945, "step": 4647 }, { "epoch": 0.6301091303463703, "grad_norm": 4.771568134098933, "learning_rate": 6.357493111067781e-07, "loss": 1.1333, "step": 4648 }, { "epoch": 0.6302446959940351, "grad_norm": 9.650108150784584, "learning_rate": 6.353403766882951e-07, "loss": 1.1258, "step": 4649 }, { "epoch": 0.6303802616417, "grad_norm": 4.473112219088918, "learning_rate": 6.349315125915424e-07, "loss": 1.1466, "step": 4650 }, { "epoch": 0.6305158272893648, "grad_norm": 6.47505012042688, "learning_rate": 6.345227188953653e-07, "loss": 1.0837, "step": 4651 }, { "epoch": 0.6306513929370298, "grad_norm": 4.939135126786597, "learning_rate": 6.341139956785974e-07, "loss": 1.1089, "step": 4652 }, { "epoch": 0.6307869585846947, "grad_norm": 6.362168357079173, "learning_rate": 6.337053430200571e-07, "loss": 1.133, "step": 4653 }, { "epoch": 0.6309225242323595, "grad_norm": 4.2196604549774035, "learning_rate": 6.332967609985502e-07, "loss": 1.0964, "step": 4654 }, { "epoch": 0.6310580898800244, "grad_norm": 4.4660968403949965, "learning_rate": 6.328882496928685e-07, "loss": 1.0439, "step": 4655 }, { "epoch": 0.6311936555276892, "grad_norm": 6.250117141209095, "learning_rate": 6.324798091817897e-07, "loss": 1.1286, "step": 4656 }, { "epoch": 0.6313292211753542, "grad_norm": 4.43122871251451, "learning_rate": 6.320714395440789e-07, "loss": 1.0939, "step": 4657 }, { "epoch": 0.6314647868230191, "grad_norm": 10.588982024675706, "learning_rate": 6.316631408584865e-07, "loss": 1.0926, "step": 4658 }, { "epoch": 0.6316003524706839, "grad_norm": 3.993530349122458, "learning_rate": 6.312549132037501e-07, "loss": 1.0691, "step": 4659 }, { "epoch": 0.6317359181183488, "grad_norm": 7.939562433977387, "learning_rate": 6.308467566585927e-07, "loss": 1.0953, "step": 4660 }, { "epoch": 0.6318714837660137, "grad_norm": 8.015041547163031, "learning_rate": 6.304386713017249e-07, "loss": 1.1194, "step": 4661 }, { "epoch": 0.6320070494136786, "grad_norm": 5.939036089297716, "learning_rate": 6.300306572118417e-07, "loss": 1.1114, "step": 4662 }, { "epoch": 0.6321426150613435, "grad_norm": 4.956736723813681, "learning_rate": 6.296227144676262e-07, "loss": 1.1045, "step": 4663 }, { "epoch": 0.6322781807090083, "grad_norm": 4.398381242418184, "learning_rate": 6.292148431477465e-07, "loss": 1.0975, "step": 4664 }, { "epoch": 0.6324137463566732, "grad_norm": 4.889670441377316, "learning_rate": 6.288070433308575e-07, "loss": 1.117, "step": 4665 }, { "epoch": 0.6325493120043381, "grad_norm": 5.472717334938071, "learning_rate": 6.283993150956002e-07, "loss": 1.1143, "step": 4666 }, { "epoch": 0.632684877652003, "grad_norm": 3.684237223907224, "learning_rate": 6.279916585206018e-07, "loss": 1.0806, "step": 4667 }, { "epoch": 0.6328204432996679, "grad_norm": 4.510660211955158, "learning_rate": 6.275840736844754e-07, "loss": 1.0835, "step": 4668 }, { "epoch": 0.6329560089473327, "grad_norm": 9.163571888962743, "learning_rate": 6.27176560665821e-07, "loss": 1.0843, "step": 4669 }, { "epoch": 0.6330915745949977, "grad_norm": 4.412650283759319, "learning_rate": 6.267691195432239e-07, "loss": 1.11, "step": 4670 }, { "epoch": 0.6332271402426625, "grad_norm": 8.671691754765522, "learning_rate": 6.263617503952559e-07, "loss": 1.1121, "step": 4671 }, { "epoch": 0.6333627058903274, "grad_norm": 7.036329087015013, "learning_rate": 6.259544533004751e-07, "loss": 1.1533, "step": 4672 }, { "epoch": 0.6334982715379923, "grad_norm": 5.019119549044581, "learning_rate": 6.255472283374253e-07, "loss": 1.0814, "step": 4673 }, { "epoch": 0.6336338371856571, "grad_norm": 5.90701889885827, "learning_rate": 6.251400755846371e-07, "loss": 1.1257, "step": 4674 }, { "epoch": 0.6337694028333221, "grad_norm": 5.2282116409981345, "learning_rate": 6.247329951206259e-07, "loss": 1.1083, "step": 4675 }, { "epoch": 0.6339049684809869, "grad_norm": 5.7516056447085, "learning_rate": 6.243259870238948e-07, "loss": 1.0868, "step": 4676 }, { "epoch": 0.6340405341286518, "grad_norm": 6.6725691480720535, "learning_rate": 6.239190513729313e-07, "loss": 1.0857, "step": 4677 }, { "epoch": 0.6341760997763167, "grad_norm": 5.661772924965767, "learning_rate": 6.235121882462107e-07, "loss": 1.1169, "step": 4678 }, { "epoch": 0.6343116654239815, "grad_norm": 5.2709521286933745, "learning_rate": 6.23105397722192e-07, "loss": 1.1242, "step": 4679 }, { "epoch": 0.6344472310716465, "grad_norm": 7.195995375273147, "learning_rate": 6.226986798793231e-07, "loss": 1.0745, "step": 4680 }, { "epoch": 0.6345827967193113, "grad_norm": 6.652063586537122, "learning_rate": 6.22292034796035e-07, "loss": 1.1004, "step": 4681 }, { "epoch": 0.6347183623669762, "grad_norm": 4.65504411576402, "learning_rate": 6.21885462550747e-07, "loss": 1.1084, "step": 4682 }, { "epoch": 0.6348539280146411, "grad_norm": 6.059961155097925, "learning_rate": 6.214789632218628e-07, "loss": 1.1515, "step": 4683 }, { "epoch": 0.634989493662306, "grad_norm": 4.444601916596591, "learning_rate": 6.210725368877723e-07, "loss": 1.0412, "step": 4684 }, { "epoch": 0.6351250593099709, "grad_norm": 4.76311977938366, "learning_rate": 6.206661836268525e-07, "loss": 1.0918, "step": 4685 }, { "epoch": 0.6352606249576357, "grad_norm": 4.143763011783691, "learning_rate": 6.202599035174645e-07, "loss": 1.0771, "step": 4686 }, { "epoch": 0.6353961906053006, "grad_norm": 4.43144241959773, "learning_rate": 6.19853696637957e-07, "loss": 1.0665, "step": 4687 }, { "epoch": 0.6355317562529655, "grad_norm": 4.877898344295134, "learning_rate": 6.194475630666629e-07, "loss": 1.134, "step": 4688 }, { "epoch": 0.6356673219006304, "grad_norm": 5.202618875224446, "learning_rate": 6.190415028819029e-07, "loss": 1.06, "step": 4689 }, { "epoch": 0.6358028875482953, "grad_norm": 4.808504487777157, "learning_rate": 6.186355161619814e-07, "loss": 1.108, "step": 4690 }, { "epoch": 0.6359384531959601, "grad_norm": 4.646285854321152, "learning_rate": 6.182296029851908e-07, "loss": 1.1247, "step": 4691 }, { "epoch": 0.636074018843625, "grad_norm": 5.699805785119773, "learning_rate": 6.178237634298073e-07, "loss": 1.1283, "step": 4692 }, { "epoch": 0.63620958449129, "grad_norm": 4.646147133850953, "learning_rate": 6.174179975740949e-07, "loss": 1.1224, "step": 4693 }, { "epoch": 0.6363451501389548, "grad_norm": 14.079126085621741, "learning_rate": 6.170123054963012e-07, "loss": 1.0942, "step": 4694 }, { "epoch": 0.6364807157866197, "grad_norm": 5.81431110583729, "learning_rate": 6.166066872746616e-07, "loss": 1.139, "step": 4695 }, { "epoch": 0.6366162814342845, "grad_norm": 4.844364928273675, "learning_rate": 6.162011429873959e-07, "loss": 1.1165, "step": 4696 }, { "epoch": 0.6367518470819494, "grad_norm": 5.685837231812664, "learning_rate": 6.157956727127102e-07, "loss": 1.0789, "step": 4697 }, { "epoch": 0.6368874127296144, "grad_norm": 5.716748986920902, "learning_rate": 6.153902765287966e-07, "loss": 1.1023, "step": 4698 }, { "epoch": 0.6370229783772792, "grad_norm": 4.5203954543135545, "learning_rate": 6.149849545138319e-07, "loss": 1.0617, "step": 4699 }, { "epoch": 0.6371585440249441, "grad_norm": 6.583039489261601, "learning_rate": 6.145797067459799e-07, "loss": 1.1, "step": 4700 }, { "epoch": 0.6372941096726089, "grad_norm": 4.20586312587734, "learning_rate": 6.141745333033889e-07, "loss": 1.0879, "step": 4701 }, { "epoch": 0.6374296753202738, "grad_norm": 20.16672797279213, "learning_rate": 6.137694342641937e-07, "loss": 1.0924, "step": 4702 }, { "epoch": 0.6375652409679388, "grad_norm": 4.809661023672352, "learning_rate": 6.133644097065143e-07, "loss": 1.1137, "step": 4703 }, { "epoch": 0.6377008066156036, "grad_norm": 4.161483783499084, "learning_rate": 6.129594597084567e-07, "loss": 1.0893, "step": 4704 }, { "epoch": 0.6378363722632685, "grad_norm": 5.679234609476501, "learning_rate": 6.125545843481119e-07, "loss": 1.0504, "step": 4705 }, { "epoch": 0.6379719379109333, "grad_norm": 4.6642200252187465, "learning_rate": 6.121497837035576e-07, "loss": 1.0759, "step": 4706 }, { "epoch": 0.6381075035585982, "grad_norm": 4.73981313436327, "learning_rate": 6.117450578528556e-07, "loss": 1.0903, "step": 4707 }, { "epoch": 0.6382430692062632, "grad_norm": 4.753247310727436, "learning_rate": 6.11340406874055e-07, "loss": 1.1421, "step": 4708 }, { "epoch": 0.638378634853928, "grad_norm": 5.433454062143399, "learning_rate": 6.109358308451885e-07, "loss": 1.0873, "step": 4709 }, { "epoch": 0.6385142005015929, "grad_norm": 4.3539922961172275, "learning_rate": 6.105313298442764e-07, "loss": 1.1056, "step": 4710 }, { "epoch": 0.6386497661492577, "grad_norm": 4.954256191184552, "learning_rate": 6.10126903949323e-07, "loss": 1.1174, "step": 4711 }, { "epoch": 0.6387853317969227, "grad_norm": 5.9150372657245995, "learning_rate": 6.097225532383184e-07, "loss": 1.0885, "step": 4712 }, { "epoch": 0.6389208974445876, "grad_norm": 5.4887570787132685, "learning_rate": 6.093182777892392e-07, "loss": 1.1054, "step": 4713 }, { "epoch": 0.6390564630922524, "grad_norm": 5.093319872699161, "learning_rate": 6.089140776800456e-07, "loss": 1.1105, "step": 4714 }, { "epoch": 0.6391920287399173, "grad_norm": 6.16097972050896, "learning_rate": 6.085099529886857e-07, "loss": 1.0886, "step": 4715 }, { "epoch": 0.6393275943875821, "grad_norm": 5.430408918785719, "learning_rate": 6.081059037930907e-07, "loss": 1.1239, "step": 4716 }, { "epoch": 0.6394631600352471, "grad_norm": 4.57399364558949, "learning_rate": 6.07701930171179e-07, "loss": 1.0953, "step": 4717 }, { "epoch": 0.639598725682912, "grad_norm": 4.70757190161937, "learning_rate": 6.072980322008532e-07, "loss": 1.0905, "step": 4718 }, { "epoch": 0.6397342913305768, "grad_norm": 3.8315631659259797, "learning_rate": 6.068942099600025e-07, "loss": 1.0925, "step": 4719 }, { "epoch": 0.6398698569782417, "grad_norm": 3.9508766740639145, "learning_rate": 6.064904635264999e-07, "loss": 1.1196, "step": 4720 }, { "epoch": 0.6400054226259067, "grad_norm": 5.924328958042891, "learning_rate": 6.060867929782057e-07, "loss": 1.0823, "step": 4721 }, { "epoch": 0.6401409882735715, "grad_norm": 3.7763898863640586, "learning_rate": 6.056831983929638e-07, "loss": 1.0732, "step": 4722 }, { "epoch": 0.6402765539212364, "grad_norm": 4.969636119585994, "learning_rate": 6.052796798486049e-07, "loss": 1.1072, "step": 4723 }, { "epoch": 0.6404121195689012, "grad_norm": 7.421279580646238, "learning_rate": 6.048762374229435e-07, "loss": 1.1038, "step": 4724 }, { "epoch": 0.6405476852165661, "grad_norm": 5.099426270988953, "learning_rate": 6.044728711937812e-07, "loss": 1.1024, "step": 4725 }, { "epoch": 0.6406832508642311, "grad_norm": 6.800611011559941, "learning_rate": 6.040695812389036e-07, "loss": 1.1259, "step": 4726 }, { "epoch": 0.6408188165118959, "grad_norm": 3.938752240082333, "learning_rate": 6.036663676360816e-07, "loss": 1.098, "step": 4727 }, { "epoch": 0.6409543821595608, "grad_norm": 4.244041589503124, "learning_rate": 6.032632304630726e-07, "loss": 1.1165, "step": 4728 }, { "epoch": 0.6410899478072256, "grad_norm": 4.465515014054558, "learning_rate": 6.028601697976175e-07, "loss": 1.113, "step": 4729 }, { "epoch": 0.6412255134548905, "grad_norm": 4.400661846361778, "learning_rate": 6.024571857174442e-07, "loss": 1.1085, "step": 4730 }, { "epoch": 0.6413610791025555, "grad_norm": 8.838829929073478, "learning_rate": 6.020542783002643e-07, "loss": 1.145, "step": 4731 }, { "epoch": 0.6414966447502203, "grad_norm": 4.110949901231358, "learning_rate": 6.01651447623776e-07, "loss": 1.0709, "step": 4732 }, { "epoch": 0.6416322103978852, "grad_norm": 5.530073653616745, "learning_rate": 6.012486937656613e-07, "loss": 1.1114, "step": 4733 }, { "epoch": 0.64176777604555, "grad_norm": 6.503028788235418, "learning_rate": 6.008460168035887e-07, "loss": 1.1244, "step": 4734 }, { "epoch": 0.641903341693215, "grad_norm": 3.915006077865822, "learning_rate": 6.004434168152109e-07, "loss": 1.0893, "step": 4735 }, { "epoch": 0.6420389073408799, "grad_norm": 5.053960238362699, "learning_rate": 6.000408938781665e-07, "loss": 1.1227, "step": 4736 }, { "epoch": 0.6421744729885447, "grad_norm": 4.613472285350905, "learning_rate": 5.996384480700783e-07, "loss": 1.1105, "step": 4737 }, { "epoch": 0.6423100386362096, "grad_norm": 6.140437974336782, "learning_rate": 5.992360794685554e-07, "loss": 1.0898, "step": 4738 }, { "epoch": 0.6424456042838744, "grad_norm": 4.110805045836889, "learning_rate": 5.988337881511909e-07, "loss": 1.0936, "step": 4739 }, { "epoch": 0.6425811699315394, "grad_norm": 5.3996248598787915, "learning_rate": 5.984315741955639e-07, "loss": 1.1191, "step": 4740 }, { "epoch": 0.6427167355792043, "grad_norm": 4.9467650883238115, "learning_rate": 5.98029437679238e-07, "loss": 1.1656, "step": 4741 }, { "epoch": 0.6428523012268691, "grad_norm": 5.112807688172794, "learning_rate": 5.976273786797619e-07, "loss": 1.1373, "step": 4742 }, { "epoch": 0.642987866874534, "grad_norm": 3.7793190266067693, "learning_rate": 5.972253972746701e-07, "loss": 1.1043, "step": 4743 }, { "epoch": 0.6431234325221988, "grad_norm": 6.621524172518218, "learning_rate": 5.968234935414807e-07, "loss": 1.081, "step": 4744 }, { "epoch": 0.6432589981698638, "grad_norm": 5.668261261122718, "learning_rate": 5.964216675576983e-07, "loss": 1.0669, "step": 4745 }, { "epoch": 0.6433945638175287, "grad_norm": 7.449586831433814, "learning_rate": 5.960199194008115e-07, "loss": 1.1173, "step": 4746 }, { "epoch": 0.6435301294651935, "grad_norm": 12.405664720171876, "learning_rate": 5.956182491482946e-07, "loss": 1.0986, "step": 4747 }, { "epoch": 0.6436656951128584, "grad_norm": 6.703042346048183, "learning_rate": 5.952166568776062e-07, "loss": 1.0586, "step": 4748 }, { "epoch": 0.6438012607605232, "grad_norm": 4.530825356797216, "learning_rate": 5.948151426661904e-07, "loss": 1.1245, "step": 4749 }, { "epoch": 0.6439368264081882, "grad_norm": 4.211866917238862, "learning_rate": 5.944137065914759e-07, "loss": 1.0789, "step": 4750 }, { "epoch": 0.6440723920558531, "grad_norm": 4.4934286027839665, "learning_rate": 5.94012348730877e-07, "loss": 1.1191, "step": 4751 }, { "epoch": 0.6442079577035179, "grad_norm": 5.124217116881592, "learning_rate": 5.936110691617915e-07, "loss": 1.1039, "step": 4752 }, { "epoch": 0.6443435233511828, "grad_norm": 11.410636153610827, "learning_rate": 5.932098679616038e-07, "loss": 1.0974, "step": 4753 }, { "epoch": 0.6444790889988476, "grad_norm": 7.608873207859691, "learning_rate": 5.928087452076821e-07, "loss": 1.0796, "step": 4754 }, { "epoch": 0.6446146546465126, "grad_norm": 8.227830421565866, "learning_rate": 5.924077009773794e-07, "loss": 1.1154, "step": 4755 }, { "epoch": 0.6447502202941775, "grad_norm": 11.029152516507166, "learning_rate": 5.920067353480345e-07, "loss": 1.0879, "step": 4756 }, { "epoch": 0.6448857859418423, "grad_norm": 3.53704096153297, "learning_rate": 5.916058483969698e-07, "loss": 1.1296, "step": 4757 }, { "epoch": 0.6450213515895072, "grad_norm": 4.745089670514431, "learning_rate": 5.912050402014941e-07, "loss": 1.0847, "step": 4758 }, { "epoch": 0.6451569172371721, "grad_norm": 14.92256411152909, "learning_rate": 5.908043108388989e-07, "loss": 1.1015, "step": 4759 }, { "epoch": 0.645292482884837, "grad_norm": 4.726162947742513, "learning_rate": 5.90403660386463e-07, "loss": 1.0955, "step": 4760 }, { "epoch": 0.6454280485325019, "grad_norm": 4.6720756567051, "learning_rate": 5.900030889214476e-07, "loss": 1.1103, "step": 4761 }, { "epoch": 0.6455636141801667, "grad_norm": 4.029051355014514, "learning_rate": 5.896025965211005e-07, "loss": 1.0519, "step": 4762 }, { "epoch": 0.6456991798278316, "grad_norm": 8.986645632038144, "learning_rate": 5.89202183262653e-07, "loss": 1.0734, "step": 4763 }, { "epoch": 0.6458347454754965, "grad_norm": 4.685139341064115, "learning_rate": 5.888018492233219e-07, "loss": 1.0868, "step": 4764 }, { "epoch": 0.6459703111231614, "grad_norm": 4.329989112710489, "learning_rate": 5.884015944803084e-07, "loss": 1.0923, "step": 4765 }, { "epoch": 0.6461058767708263, "grad_norm": 3.768557365884211, "learning_rate": 5.880014191107982e-07, "loss": 1.0987, "step": 4766 }, { "epoch": 0.6462414424184911, "grad_norm": 5.160422485937282, "learning_rate": 5.876013231919628e-07, "loss": 1.0958, "step": 4767 }, { "epoch": 0.6463770080661561, "grad_norm": 6.5510941688272695, "learning_rate": 5.872013068009565e-07, "loss": 1.0924, "step": 4768 }, { "epoch": 0.6465125737138209, "grad_norm": 3.909880418102421, "learning_rate": 5.868013700149197e-07, "loss": 1.0726, "step": 4769 }, { "epoch": 0.6466481393614858, "grad_norm": 5.407491924721801, "learning_rate": 5.864015129109771e-07, "loss": 1.0964, "step": 4770 }, { "epoch": 0.6467837050091507, "grad_norm": 4.982118168727045, "learning_rate": 5.860017355662381e-07, "loss": 1.1416, "step": 4771 }, { "epoch": 0.6469192706568155, "grad_norm": 7.028742890344849, "learning_rate": 5.856020380577964e-07, "loss": 1.0835, "step": 4772 }, { "epoch": 0.6470548363044805, "grad_norm": 7.191482421054565, "learning_rate": 5.852024204627308e-07, "loss": 1.1006, "step": 4773 }, { "epoch": 0.6471904019521453, "grad_norm": 7.144381163809436, "learning_rate": 5.84802882858104e-07, "loss": 1.1192, "step": 4774 }, { "epoch": 0.6473259675998102, "grad_norm": 4.49170192039116, "learning_rate": 5.844034253209641e-07, "loss": 1.0817, "step": 4775 }, { "epoch": 0.6474615332474751, "grad_norm": 3.6727279991197563, "learning_rate": 5.840040479283428e-07, "loss": 1.1108, "step": 4776 }, { "epoch": 0.6475970988951399, "grad_norm": 5.957167997599238, "learning_rate": 5.836047507572575e-07, "loss": 1.1047, "step": 4777 }, { "epoch": 0.6477326645428049, "grad_norm": 3.6945071859127547, "learning_rate": 5.832055338847089e-07, "loss": 1.104, "step": 4778 }, { "epoch": 0.6478682301904697, "grad_norm": 9.597931709143236, "learning_rate": 5.828063973876833e-07, "loss": 1.1219, "step": 4779 }, { "epoch": 0.6480037958381346, "grad_norm": 5.654186198679223, "learning_rate": 5.824073413431507e-07, "loss": 1.0746, "step": 4780 }, { "epoch": 0.6481393614857995, "grad_norm": 4.501176584625381, "learning_rate": 5.820083658280661e-07, "loss": 1.0953, "step": 4781 }, { "epoch": 0.6482749271334644, "grad_norm": 4.038908739972048, "learning_rate": 5.816094709193688e-07, "loss": 1.1034, "step": 4782 }, { "epoch": 0.6484104927811293, "grad_norm": 5.925430515460981, "learning_rate": 5.812106566939824e-07, "loss": 1.1002, "step": 4783 }, { "epoch": 0.6485460584287941, "grad_norm": 4.952757701069828, "learning_rate": 5.808119232288151e-07, "loss": 1.096, "step": 4784 }, { "epoch": 0.648681624076459, "grad_norm": 4.286653821425312, "learning_rate": 5.804132706007597e-07, "loss": 1.1262, "step": 4785 }, { "epoch": 0.6488171897241239, "grad_norm": 4.454196876345052, "learning_rate": 5.800146988866927e-07, "loss": 1.106, "step": 4786 }, { "epoch": 0.6489527553717888, "grad_norm": 5.6128942860834705, "learning_rate": 5.796162081634761e-07, "loss": 1.0895, "step": 4787 }, { "epoch": 0.6490883210194537, "grad_norm": 5.3843426509997325, "learning_rate": 5.792177985079558e-07, "loss": 1.0644, "step": 4788 }, { "epoch": 0.6492238866671185, "grad_norm": 5.293205027891196, "learning_rate": 5.788194699969608e-07, "loss": 1.1122, "step": 4789 }, { "epoch": 0.6493594523147834, "grad_norm": 3.9853728018586656, "learning_rate": 5.784212227073073e-07, "loss": 1.1013, "step": 4790 }, { "epoch": 0.6494950179624484, "grad_norm": 4.2028311904204365, "learning_rate": 5.780230567157924e-07, "loss": 1.1526, "step": 4791 }, { "epoch": 0.6496305836101132, "grad_norm": 6.5090038776561325, "learning_rate": 5.776249720992009e-07, "loss": 1.0931, "step": 4792 }, { "epoch": 0.6497661492577781, "grad_norm": 14.633045134777692, "learning_rate": 5.772269689342988e-07, "loss": 1.0809, "step": 4793 }, { "epoch": 0.6499017149054429, "grad_norm": 4.273302198200328, "learning_rate": 5.768290472978392e-07, "loss": 1.0664, "step": 4794 }, { "epoch": 0.6500372805531078, "grad_norm": 5.549263477510379, "learning_rate": 5.764312072665574e-07, "loss": 1.1308, "step": 4795 }, { "epoch": 0.6501728462007728, "grad_norm": 5.704573976485087, "learning_rate": 5.760334489171735e-07, "loss": 1.1054, "step": 4796 }, { "epoch": 0.6503084118484376, "grad_norm": 3.823359979315301, "learning_rate": 5.756357723263926e-07, "loss": 1.1077, "step": 4797 }, { "epoch": 0.6504439774961025, "grad_norm": 4.469860056512672, "learning_rate": 5.752381775709032e-07, "loss": 1.1162, "step": 4798 }, { "epoch": 0.6505795431437674, "grad_norm": 5.715417524014736, "learning_rate": 5.748406647273784e-07, "loss": 1.1055, "step": 4799 }, { "epoch": 0.6507151087914322, "grad_norm": 4.78259431993053, "learning_rate": 5.744432338724754e-07, "loss": 1.1085, "step": 4800 }, { "epoch": 0.6508506744390972, "grad_norm": 4.247689395081615, "learning_rate": 5.740458850828356e-07, "loss": 1.0953, "step": 4801 }, { "epoch": 0.650986240086762, "grad_norm": 4.511887040504957, "learning_rate": 5.736486184350846e-07, "loss": 1.1463, "step": 4802 }, { "epoch": 0.6511218057344269, "grad_norm": 10.2666154488867, "learning_rate": 5.732514340058321e-07, "loss": 1.0668, "step": 4803 }, { "epoch": 0.6512573713820918, "grad_norm": 4.07184567826688, "learning_rate": 5.728543318716721e-07, "loss": 1.1566, "step": 4804 }, { "epoch": 0.6513929370297566, "grad_norm": 3.757688701650893, "learning_rate": 5.724573121091825e-07, "loss": 1.1542, "step": 4805 }, { "epoch": 0.6515285026774216, "grad_norm": 4.3301529264626595, "learning_rate": 5.720603747949253e-07, "loss": 1.1575, "step": 4806 }, { "epoch": 0.6516640683250864, "grad_norm": 5.786274579911413, "learning_rate": 5.716635200054469e-07, "loss": 1.1026, "step": 4807 }, { "epoch": 0.6517996339727513, "grad_norm": 3.9521113763647104, "learning_rate": 5.712667478172776e-07, "loss": 1.1456, "step": 4808 }, { "epoch": 0.6519351996204162, "grad_norm": 7.381184679605699, "learning_rate": 5.708700583069319e-07, "loss": 1.0642, "step": 4809 }, { "epoch": 0.652070765268081, "grad_norm": 4.135997904629369, "learning_rate": 5.704734515509085e-07, "loss": 1.1173, "step": 4810 }, { "epoch": 0.652206330915746, "grad_norm": 4.954075308022803, "learning_rate": 5.700769276256886e-07, "loss": 1.1151, "step": 4811 }, { "epoch": 0.6523418965634108, "grad_norm": 4.71051648602859, "learning_rate": 5.696804866077404e-07, "loss": 1.1161, "step": 4812 }, { "epoch": 0.6524774622110757, "grad_norm": 3.9711496068662884, "learning_rate": 5.692841285735128e-07, "loss": 1.1115, "step": 4813 }, { "epoch": 0.6526130278587406, "grad_norm": 8.283514623910637, "learning_rate": 5.68887853599442e-07, "loss": 1.1013, "step": 4814 }, { "epoch": 0.6527485935064055, "grad_norm": 4.491719226813221, "learning_rate": 5.684916617619453e-07, "loss": 1.1087, "step": 4815 }, { "epoch": 0.6528841591540704, "grad_norm": 29.31255486946691, "learning_rate": 5.680955531374255e-07, "loss": 1.1266, "step": 4816 }, { "epoch": 0.6530197248017352, "grad_norm": 6.100751024186153, "learning_rate": 5.676995278022688e-07, "loss": 1.1259, "step": 4817 }, { "epoch": 0.6531552904494001, "grad_norm": 5.886933175445168, "learning_rate": 5.67303585832846e-07, "loss": 1.1058, "step": 4818 }, { "epoch": 0.653290856097065, "grad_norm": 4.728888754439498, "learning_rate": 5.669077273055111e-07, "loss": 1.1124, "step": 4819 }, { "epoch": 0.6534264217447299, "grad_norm": 3.4978027242015384, "learning_rate": 5.665119522966024e-07, "loss": 1.0993, "step": 4820 }, { "epoch": 0.6535619873923948, "grad_norm": 6.1623623280413184, "learning_rate": 5.661162608824419e-07, "loss": 1.0811, "step": 4821 }, { "epoch": 0.6536975530400596, "grad_norm": 5.401427485772748, "learning_rate": 5.657206531393358e-07, "loss": 1.0935, "step": 4822 }, { "epoch": 0.6538331186877245, "grad_norm": 5.01872866135969, "learning_rate": 5.653251291435735e-07, "loss": 1.0774, "step": 4823 }, { "epoch": 0.6539686843353895, "grad_norm": 4.544690711171425, "learning_rate": 5.64929688971429e-07, "loss": 1.1277, "step": 4824 }, { "epoch": 0.6541042499830543, "grad_norm": 2.9808044918591032, "learning_rate": 5.645343326991602e-07, "loss": 1.0917, "step": 4825 }, { "epoch": 0.6542398156307192, "grad_norm": 7.794667687554724, "learning_rate": 5.641390604030072e-07, "loss": 1.0866, "step": 4826 }, { "epoch": 0.654375381278384, "grad_norm": 8.685359862847541, "learning_rate": 5.637438721591967e-07, "loss": 1.1165, "step": 4827 }, { "epoch": 0.6545109469260489, "grad_norm": 5.145609635692126, "learning_rate": 5.633487680439361e-07, "loss": 1.1216, "step": 4828 }, { "epoch": 0.6546465125737139, "grad_norm": 4.666572229484775, "learning_rate": 5.629537481334195e-07, "loss": 1.0964, "step": 4829 }, { "epoch": 0.6547820782213787, "grad_norm": 3.2510262914126398, "learning_rate": 5.625588125038221e-07, "loss": 1.1166, "step": 4830 }, { "epoch": 0.6549176438690436, "grad_norm": 6.394221874238571, "learning_rate": 5.621639612313056e-07, "loss": 1.1189, "step": 4831 }, { "epoch": 0.6550532095167084, "grad_norm": 5.61500572309333, "learning_rate": 5.617691943920122e-07, "loss": 1.0488, "step": 4832 }, { "epoch": 0.6551887751643733, "grad_norm": 5.173025526287063, "learning_rate": 5.613745120620712e-07, "loss": 1.1186, "step": 4833 }, { "epoch": 0.6553243408120383, "grad_norm": 5.274361259055433, "learning_rate": 5.609799143175927e-07, "loss": 1.0415, "step": 4834 }, { "epoch": 0.6554599064597031, "grad_norm": 5.431336838447903, "learning_rate": 5.605854012346729e-07, "loss": 1.0752, "step": 4835 }, { "epoch": 0.655595472107368, "grad_norm": 4.280932414572979, "learning_rate": 5.601909728893892e-07, "loss": 1.0656, "step": 4836 }, { "epoch": 0.6557310377550328, "grad_norm": 4.858817830727833, "learning_rate": 5.597966293578055e-07, "loss": 1.1218, "step": 4837 }, { "epoch": 0.6558666034026978, "grad_norm": 5.474536540406697, "learning_rate": 5.594023707159668e-07, "loss": 1.1161, "step": 4838 }, { "epoch": 0.6560021690503627, "grad_norm": 5.470118470759669, "learning_rate": 5.590081970399028e-07, "loss": 1.0938, "step": 4839 }, { "epoch": 0.6561377346980275, "grad_norm": 3.9819273078730855, "learning_rate": 5.586141084056273e-07, "loss": 1.1249, "step": 4840 }, { "epoch": 0.6562733003456924, "grad_norm": 4.300758839761677, "learning_rate": 5.582201048891367e-07, "loss": 1.0932, "step": 4841 }, { "epoch": 0.6564088659933572, "grad_norm": 4.83636622147553, "learning_rate": 5.578261865664118e-07, "loss": 1.0876, "step": 4842 }, { "epoch": 0.6565444316410222, "grad_norm": 3.444804819134027, "learning_rate": 5.574323535134164e-07, "loss": 1.1012, "step": 4843 }, { "epoch": 0.6566799972886871, "grad_norm": 5.0891118712589964, "learning_rate": 5.570386058060983e-07, "loss": 1.0691, "step": 4844 }, { "epoch": 0.6568155629363519, "grad_norm": 5.97123804191111, "learning_rate": 5.566449435203886e-07, "loss": 1.0782, "step": 4845 }, { "epoch": 0.6569511285840168, "grad_norm": 3.901464793154636, "learning_rate": 5.562513667322018e-07, "loss": 1.0855, "step": 4846 }, { "epoch": 0.6570866942316816, "grad_norm": 6.124973378843211, "learning_rate": 5.558578755174363e-07, "loss": 1.0975, "step": 4847 }, { "epoch": 0.6572222598793466, "grad_norm": 5.063437070311178, "learning_rate": 5.554644699519735e-07, "loss": 1.1106, "step": 4848 }, { "epoch": 0.6573578255270115, "grad_norm": 10.18143175143524, "learning_rate": 5.550711501116788e-07, "loss": 1.1605, "step": 4849 }, { "epoch": 0.6574933911746763, "grad_norm": 7.916101501727401, "learning_rate": 5.546779160724012e-07, "loss": 1.0711, "step": 4850 }, { "epoch": 0.6576289568223412, "grad_norm": 4.650549590078063, "learning_rate": 5.542847679099715e-07, "loss": 1.0755, "step": 4851 }, { "epoch": 0.657764522470006, "grad_norm": 4.34733622063258, "learning_rate": 5.538917057002069e-07, "loss": 1.0899, "step": 4852 }, { "epoch": 0.657900088117671, "grad_norm": 4.824503204793083, "learning_rate": 5.534987295189049e-07, "loss": 1.1132, "step": 4853 }, { "epoch": 0.6580356537653359, "grad_norm": 9.714800996605858, "learning_rate": 5.531058394418487e-07, "loss": 1.0872, "step": 4854 }, { "epoch": 0.6581712194130007, "grad_norm": 4.05724791913362, "learning_rate": 5.527130355448035e-07, "loss": 1.1217, "step": 4855 }, { "epoch": 0.6583067850606656, "grad_norm": 3.978189223724015, "learning_rate": 5.523203179035189e-07, "loss": 1.0533, "step": 4856 }, { "epoch": 0.6584423507083305, "grad_norm": 8.089041942827553, "learning_rate": 5.519276865937272e-07, "loss": 1.0799, "step": 4857 }, { "epoch": 0.6585779163559954, "grad_norm": 5.207536296208772, "learning_rate": 5.515351416911442e-07, "loss": 1.1064, "step": 4858 }, { "epoch": 0.6587134820036603, "grad_norm": 7.303748479103744, "learning_rate": 5.511426832714694e-07, "loss": 1.1412, "step": 4859 }, { "epoch": 0.6588490476513251, "grad_norm": 6.07437421880056, "learning_rate": 5.507503114103849e-07, "loss": 1.1825, "step": 4860 }, { "epoch": 0.65898461329899, "grad_norm": 7.140311620120097, "learning_rate": 5.503580261835566e-07, "loss": 1.0881, "step": 4861 }, { "epoch": 0.6591201789466549, "grad_norm": 5.59239022944935, "learning_rate": 5.499658276666338e-07, "loss": 1.0964, "step": 4862 }, { "epoch": 0.6592557445943198, "grad_norm": 4.0804940912625955, "learning_rate": 5.495737159352487e-07, "loss": 1.065, "step": 4863 }, { "epoch": 0.6593913102419847, "grad_norm": 3.2090403089167565, "learning_rate": 5.491816910650171e-07, "loss": 1.1042, "step": 4864 }, { "epoch": 0.6595268758896495, "grad_norm": 5.465644670223147, "learning_rate": 5.48789753131538e-07, "loss": 1.0926, "step": 4865 }, { "epoch": 0.6596624415373145, "grad_norm": 5.3932864387775306, "learning_rate": 5.483979022103935e-07, "loss": 1.0829, "step": 4866 }, { "epoch": 0.6597980071849793, "grad_norm": 5.061049944765077, "learning_rate": 5.480061383771481e-07, "loss": 1.1423, "step": 4867 }, { "epoch": 0.6599335728326442, "grad_norm": 6.691813428770923, "learning_rate": 5.476144617073519e-07, "loss": 1.1022, "step": 4868 }, { "epoch": 0.6600691384803091, "grad_norm": 4.228449305659288, "learning_rate": 5.472228722765351e-07, "loss": 1.149, "step": 4869 }, { "epoch": 0.6602047041279739, "grad_norm": 4.157197106621832, "learning_rate": 5.46831370160214e-07, "loss": 1.0884, "step": 4870 }, { "epoch": 0.6603402697756389, "grad_norm": 3.694506290889047, "learning_rate": 5.464399554338856e-07, "loss": 1.0789, "step": 4871 }, { "epoch": 0.6604758354233037, "grad_norm": 5.245090000536898, "learning_rate": 5.460486281730322e-07, "loss": 1.1177, "step": 4872 }, { "epoch": 0.6606114010709686, "grad_norm": 5.023831982223929, "learning_rate": 5.456573884531168e-07, "loss": 1.131, "step": 4873 }, { "epoch": 0.6607469667186335, "grad_norm": 5.97474477156046, "learning_rate": 5.452662363495884e-07, "loss": 1.1175, "step": 4874 }, { "epoch": 0.6608825323662983, "grad_norm": 4.099310794321905, "learning_rate": 5.448751719378762e-07, "loss": 1.0901, "step": 4875 }, { "epoch": 0.6610180980139633, "grad_norm": 5.700678860666737, "learning_rate": 5.444841952933953e-07, "loss": 1.1255, "step": 4876 }, { "epoch": 0.6611536636616281, "grad_norm": 4.589023641064307, "learning_rate": 5.440933064915413e-07, "loss": 1.0855, "step": 4877 }, { "epoch": 0.661289229309293, "grad_norm": 7.862268785313642, "learning_rate": 5.437025056076945e-07, "loss": 1.1345, "step": 4878 }, { "epoch": 0.6614247949569579, "grad_norm": 4.716264495986954, "learning_rate": 5.433117927172176e-07, "loss": 1.0858, "step": 4879 }, { "epoch": 0.6615603606046228, "grad_norm": 9.507949403278339, "learning_rate": 5.429211678954566e-07, "loss": 1.1086, "step": 4880 }, { "epoch": 0.6616959262522877, "grad_norm": 5.383169102445931, "learning_rate": 5.425306312177404e-07, "loss": 1.0964, "step": 4881 }, { "epoch": 0.6618314918999526, "grad_norm": 5.601145199411007, "learning_rate": 5.421401827593812e-07, "loss": 1.1309, "step": 4882 }, { "epoch": 0.6619670575476174, "grad_norm": 7.588089220750215, "learning_rate": 5.417498225956734e-07, "loss": 1.1278, "step": 4883 }, { "epoch": 0.6621026231952823, "grad_norm": 4.467499013107119, "learning_rate": 5.413595508018951e-07, "loss": 1.0983, "step": 4884 }, { "epoch": 0.6622381888429472, "grad_norm": 5.242991032160707, "learning_rate": 5.409693674533071e-07, "loss": 1.1286, "step": 4885 }, { "epoch": 0.6623737544906121, "grad_norm": 4.182617905269702, "learning_rate": 5.405792726251532e-07, "loss": 1.1004, "step": 4886 }, { "epoch": 0.662509320138277, "grad_norm": 4.522272422554477, "learning_rate": 5.401892663926606e-07, "loss": 1.0904, "step": 4887 }, { "epoch": 0.6626448857859418, "grad_norm": 3.7949398722539263, "learning_rate": 5.397993488310378e-07, "loss": 1.1295, "step": 4888 }, { "epoch": 0.6627804514336068, "grad_norm": 5.943120954950738, "learning_rate": 5.394095200154786e-07, "loss": 1.0511, "step": 4889 }, { "epoch": 0.6629160170812716, "grad_norm": 5.032453668356003, "learning_rate": 5.39019780021157e-07, "loss": 1.1342, "step": 4890 }, { "epoch": 0.6630515827289365, "grad_norm": 4.1119470356132375, "learning_rate": 5.386301289232329e-07, "loss": 1.087, "step": 4891 }, { "epoch": 0.6631871483766014, "grad_norm": 3.72923938089256, "learning_rate": 5.382405667968457e-07, "loss": 1.0624, "step": 4892 }, { "epoch": 0.6633227140242662, "grad_norm": 6.9545362552859125, "learning_rate": 5.378510937171212e-07, "loss": 1.1065, "step": 4893 }, { "epoch": 0.6634582796719312, "grad_norm": 3.332158932003748, "learning_rate": 5.37461709759165e-07, "loss": 1.1175, "step": 4894 }, { "epoch": 0.663593845319596, "grad_norm": 3.325423396886537, "learning_rate": 5.370724149980668e-07, "loss": 1.0816, "step": 4895 }, { "epoch": 0.6637294109672609, "grad_norm": 6.173004953273336, "learning_rate": 5.366832095088994e-07, "loss": 1.0826, "step": 4896 }, { "epoch": 0.6638649766149258, "grad_norm": 11.504723185089873, "learning_rate": 5.362940933667177e-07, "loss": 1.0785, "step": 4897 }, { "epoch": 0.6640005422625906, "grad_norm": 5.780027336580391, "learning_rate": 5.359050666465599e-07, "loss": 1.0897, "step": 4898 }, { "epoch": 0.6641361079102556, "grad_norm": 5.535723834727959, "learning_rate": 5.355161294234465e-07, "loss": 1.1027, "step": 4899 }, { "epoch": 0.6642716735579204, "grad_norm": 18.33047405489127, "learning_rate": 5.351272817723813e-07, "loss": 1.1192, "step": 4900 }, { "epoch": 0.6644072392055853, "grad_norm": 5.490996206151459, "learning_rate": 5.347385237683504e-07, "loss": 1.1123, "step": 4901 }, { "epoch": 0.6645428048532502, "grad_norm": 3.957094811544176, "learning_rate": 5.343498554863225e-07, "loss": 1.1224, "step": 4902 }, { "epoch": 0.664678370500915, "grad_norm": 4.361767880421103, "learning_rate": 5.339612770012494e-07, "loss": 1.1323, "step": 4903 }, { "epoch": 0.66481393614858, "grad_norm": 4.043638709025469, "learning_rate": 5.335727883880654e-07, "loss": 1.0936, "step": 4904 }, { "epoch": 0.6649495017962448, "grad_norm": 5.136184672678977, "learning_rate": 5.331843897216873e-07, "loss": 1.0973, "step": 4905 }, { "epoch": 0.6650850674439097, "grad_norm": 4.3470453891423, "learning_rate": 5.327960810770149e-07, "loss": 1.0615, "step": 4906 }, { "epoch": 0.6652206330915746, "grad_norm": 4.234416837792234, "learning_rate": 5.324078625289304e-07, "loss": 1.1244, "step": 4907 }, { "epoch": 0.6653561987392395, "grad_norm": 9.569004305748887, "learning_rate": 5.320197341522985e-07, "loss": 1.1427, "step": 4908 }, { "epoch": 0.6654917643869044, "grad_norm": 5.581389627018402, "learning_rate": 5.316316960219673e-07, "loss": 1.0836, "step": 4909 }, { "epoch": 0.6656273300345692, "grad_norm": 6.320485718213851, "learning_rate": 5.312437482127659e-07, "loss": 1.1356, "step": 4910 }, { "epoch": 0.6657628956822341, "grad_norm": 5.937831032423714, "learning_rate": 5.30855890799508e-07, "loss": 1.0628, "step": 4911 }, { "epoch": 0.665898461329899, "grad_norm": 7.6339724732529355, "learning_rate": 5.304681238569877e-07, "loss": 1.0958, "step": 4912 }, { "epoch": 0.6660340269775639, "grad_norm": 4.630115452633979, "learning_rate": 5.300804474599842e-07, "loss": 1.095, "step": 4913 }, { "epoch": 0.6661695926252288, "grad_norm": 5.67182113408817, "learning_rate": 5.296928616832568e-07, "loss": 1.091, "step": 4914 }, { "epoch": 0.6663051582728936, "grad_norm": 4.218200941877016, "learning_rate": 5.293053666015485e-07, "loss": 1.1226, "step": 4915 }, { "epoch": 0.6664407239205585, "grad_norm": 6.648520791599287, "learning_rate": 5.28917962289585e-07, "loss": 1.0739, "step": 4916 }, { "epoch": 0.6665762895682235, "grad_norm": 13.98839000283888, "learning_rate": 5.28530648822074e-07, "loss": 1.0457, "step": 4917 }, { "epoch": 0.6667118552158883, "grad_norm": 6.606193155364651, "learning_rate": 5.281434262737056e-07, "loss": 1.1352, "step": 4918 }, { "epoch": 0.6668474208635532, "grad_norm": 6.380217246892729, "learning_rate": 5.277562947191529e-07, "loss": 1.0933, "step": 4919 }, { "epoch": 0.666982986511218, "grad_norm": 6.037744153282227, "learning_rate": 5.273692542330713e-07, "loss": 1.1212, "step": 4920 }, { "epoch": 0.6671185521588829, "grad_norm": 3.9431388448483085, "learning_rate": 5.269823048900981e-07, "loss": 1.1338, "step": 4921 }, { "epoch": 0.6672541178065479, "grad_norm": 5.147436048065499, "learning_rate": 5.265954467648539e-07, "loss": 1.0783, "step": 4922 }, { "epoch": 0.6673896834542127, "grad_norm": 4.962157605661765, "learning_rate": 5.262086799319405e-07, "loss": 1.1027, "step": 4923 }, { "epoch": 0.6675252491018776, "grad_norm": 4.128485057079059, "learning_rate": 5.258220044659438e-07, "loss": 1.091, "step": 4924 }, { "epoch": 0.6676608147495424, "grad_norm": 5.8808318621944675, "learning_rate": 5.2543542044143e-07, "loss": 1.0836, "step": 4925 }, { "epoch": 0.6677963803972073, "grad_norm": 4.519324596334747, "learning_rate": 5.2504892793295e-07, "loss": 1.097, "step": 4926 }, { "epoch": 0.6679319460448723, "grad_norm": 5.631002395667181, "learning_rate": 5.246625270150346e-07, "loss": 1.0931, "step": 4927 }, { "epoch": 0.6680675116925371, "grad_norm": 4.645807499675642, "learning_rate": 5.242762177621994e-07, "loss": 1.157, "step": 4928 }, { "epoch": 0.668203077340202, "grad_norm": 3.5111377442969873, "learning_rate": 5.238900002489398e-07, "loss": 1.0839, "step": 4929 }, { "epoch": 0.6683386429878668, "grad_norm": 4.000418997410443, "learning_rate": 5.235038745497363e-07, "loss": 1.0768, "step": 4930 }, { "epoch": 0.6684742086355318, "grad_norm": 4.264123593418505, "learning_rate": 5.231178407390484e-07, "loss": 1.1114, "step": 4931 }, { "epoch": 0.6686097742831967, "grad_norm": 4.878312824064608, "learning_rate": 5.227318988913216e-07, "loss": 1.1099, "step": 4932 }, { "epoch": 0.6687453399308615, "grad_norm": 8.696735096972425, "learning_rate": 5.223460490809799e-07, "loss": 1.0827, "step": 4933 }, { "epoch": 0.6688809055785264, "grad_norm": 4.232458454820357, "learning_rate": 5.21960291382433e-07, "loss": 1.1585, "step": 4934 }, { "epoch": 0.6690164712261912, "grad_norm": 4.975519479196246, "learning_rate": 5.215746258700698e-07, "loss": 1.0788, "step": 4935 }, { "epoch": 0.6691520368738562, "grad_norm": 3.8519314198566983, "learning_rate": 5.211890526182642e-07, "loss": 1.0946, "step": 4936 }, { "epoch": 0.6692876025215211, "grad_norm": 5.6166631754837395, "learning_rate": 5.208035717013702e-07, "loss": 1.1201, "step": 4937 }, { "epoch": 0.6694231681691859, "grad_norm": 5.691079778203571, "learning_rate": 5.204181831937245e-07, "loss": 1.1215, "step": 4938 }, { "epoch": 0.6695587338168508, "grad_norm": 5.676463158699705, "learning_rate": 5.200328871696468e-07, "loss": 1.1474, "step": 4939 }, { "epoch": 0.6696942994645156, "grad_norm": 12.415643094461052, "learning_rate": 5.19647683703438e-07, "loss": 1.1163, "step": 4940 }, { "epoch": 0.6698298651121806, "grad_norm": 5.032590942848006, "learning_rate": 5.192625728693819e-07, "loss": 1.1284, "step": 4941 }, { "epoch": 0.6699654307598455, "grad_norm": 4.91081082131054, "learning_rate": 5.188775547417439e-07, "loss": 1.1085, "step": 4942 }, { "epoch": 0.6701009964075103, "grad_norm": 3.9759618483441916, "learning_rate": 5.184926293947716e-07, "loss": 1.1442, "step": 4943 }, { "epoch": 0.6702365620551752, "grad_norm": 4.756600766800148, "learning_rate": 5.181077969026951e-07, "loss": 1.0894, "step": 4944 }, { "epoch": 0.67037212770284, "grad_norm": 5.433061666973281, "learning_rate": 5.17723057339726e-07, "loss": 1.0925, "step": 4945 }, { "epoch": 0.670507693350505, "grad_norm": 4.341497531026634, "learning_rate": 5.173384107800585e-07, "loss": 1.0835, "step": 4946 }, { "epoch": 0.6706432589981699, "grad_norm": 6.340965844462329, "learning_rate": 5.169538572978684e-07, "loss": 1.0938, "step": 4947 }, { "epoch": 0.6707788246458347, "grad_norm": 4.667044496728543, "learning_rate": 5.165693969673142e-07, "loss": 1.105, "step": 4948 }, { "epoch": 0.6709143902934996, "grad_norm": 6.126241865248802, "learning_rate": 5.161850298625362e-07, "loss": 1.0666, "step": 4949 }, { "epoch": 0.6710499559411645, "grad_norm": 4.348101449157878, "learning_rate": 5.158007560576557e-07, "loss": 1.1036, "step": 4950 }, { "epoch": 0.6711855215888294, "grad_norm": 4.965007591286048, "learning_rate": 5.154165756267774e-07, "loss": 1.1372, "step": 4951 }, { "epoch": 0.6713210872364943, "grad_norm": 7.971807819416816, "learning_rate": 5.150324886439874e-07, "loss": 1.1094, "step": 4952 }, { "epoch": 0.6714566528841591, "grad_norm": 6.640889938444223, "learning_rate": 5.14648495183354e-07, "loss": 1.0997, "step": 4953 }, { "epoch": 0.671592218531824, "grad_norm": 5.379599992279529, "learning_rate": 5.142645953189271e-07, "loss": 1.0974, "step": 4954 }, { "epoch": 0.6717277841794889, "grad_norm": 5.801588875794947, "learning_rate": 5.138807891247388e-07, "loss": 1.1038, "step": 4955 }, { "epoch": 0.6718633498271538, "grad_norm": 4.805634820402132, "learning_rate": 5.13497076674803e-07, "loss": 1.1113, "step": 4956 }, { "epoch": 0.6719989154748187, "grad_norm": 6.525108687730102, "learning_rate": 5.13113458043116e-07, "loss": 1.1103, "step": 4957 }, { "epoch": 0.6721344811224835, "grad_norm": 8.919462528483537, "learning_rate": 5.127299333036552e-07, "loss": 1.0933, "step": 4958 }, { "epoch": 0.6722700467701485, "grad_norm": 3.975840360016407, "learning_rate": 5.123465025303804e-07, "loss": 1.1049, "step": 4959 }, { "epoch": 0.6724056124178134, "grad_norm": 6.198834125025858, "learning_rate": 5.119631657972334e-07, "loss": 1.0655, "step": 4960 }, { "epoch": 0.6725411780654782, "grad_norm": 5.146079501889239, "learning_rate": 5.115799231781377e-07, "loss": 1.0796, "step": 4961 }, { "epoch": 0.6726767437131431, "grad_norm": 47.67171017325057, "learning_rate": 5.111967747469983e-07, "loss": 1.1119, "step": 4962 }, { "epoch": 0.6728123093608079, "grad_norm": 4.863687689518871, "learning_rate": 5.108137205777026e-07, "loss": 1.1001, "step": 4963 }, { "epoch": 0.6729478750084729, "grad_norm": 5.37811812367513, "learning_rate": 5.104307607441193e-07, "loss": 1.0862, "step": 4964 }, { "epoch": 0.6730834406561378, "grad_norm": 4.639161984425289, "learning_rate": 5.100478953200999e-07, "loss": 1.0548, "step": 4965 }, { "epoch": 0.6732190063038026, "grad_norm": 4.221676860312746, "learning_rate": 5.096651243794756e-07, "loss": 1.0671, "step": 4966 }, { "epoch": 0.6733545719514675, "grad_norm": 4.93372595774896, "learning_rate": 5.092824479960625e-07, "loss": 1.062, "step": 4967 }, { "epoch": 0.6734901375991323, "grad_norm": 6.369414796262493, "learning_rate": 5.088998662436548e-07, "loss": 1.0703, "step": 4968 }, { "epoch": 0.6736257032467973, "grad_norm": 4.187860838363405, "learning_rate": 5.085173791960324e-07, "loss": 1.0394, "step": 4969 }, { "epoch": 0.6737612688944622, "grad_norm": 4.86309104052918, "learning_rate": 5.081349869269529e-07, "loss": 1.0895, "step": 4970 }, { "epoch": 0.673896834542127, "grad_norm": 4.989868095444324, "learning_rate": 5.077526895101596e-07, "loss": 1.0674, "step": 4971 }, { "epoch": 0.6740324001897919, "grad_norm": 5.246654967684473, "learning_rate": 5.073704870193736e-07, "loss": 1.0793, "step": 4972 }, { "epoch": 0.6741679658374568, "grad_norm": 7.689819895071812, "learning_rate": 5.069883795283015e-07, "loss": 1.0988, "step": 4973 }, { "epoch": 0.6743035314851217, "grad_norm": 5.515103582945424, "learning_rate": 5.066063671106281e-07, "loss": 1.1176, "step": 4974 }, { "epoch": 0.6744390971327866, "grad_norm": 3.937993879279727, "learning_rate": 5.062244498400228e-07, "loss": 1.0914, "step": 4975 }, { "epoch": 0.6745746627804514, "grad_norm": 6.720432287457476, "learning_rate": 5.058426277901344e-07, "loss": 1.1463, "step": 4976 }, { "epoch": 0.6747102284281163, "grad_norm": 5.303207637301954, "learning_rate": 5.054609010345947e-07, "loss": 1.098, "step": 4977 }, { "epoch": 0.6748457940757812, "grad_norm": 8.031450153609443, "learning_rate": 5.050792696470165e-07, "loss": 1.1443, "step": 4978 }, { "epoch": 0.6749813597234461, "grad_norm": 7.90903773509956, "learning_rate": 5.046977337009945e-07, "loss": 1.0611, "step": 4979 }, { "epoch": 0.675116925371111, "grad_norm": 6.7408144057828325, "learning_rate": 5.043162932701048e-07, "loss": 1.1089, "step": 4980 }, { "epoch": 0.6752524910187758, "grad_norm": 4.240034987928345, "learning_rate": 5.039349484279053e-07, "loss": 1.0929, "step": 4981 }, { "epoch": 0.6753880566664408, "grad_norm": 28.577831272586078, "learning_rate": 5.035536992479352e-07, "loss": 1.1003, "step": 4982 }, { "epoch": 0.6755236223141056, "grad_norm": 3.6638709375739076, "learning_rate": 5.031725458037157e-07, "loss": 1.0936, "step": 4983 }, { "epoch": 0.6756591879617705, "grad_norm": 4.472393858048233, "learning_rate": 5.027914881687489e-07, "loss": 1.0771, "step": 4984 }, { "epoch": 0.6757947536094354, "grad_norm": 4.493801358013563, "learning_rate": 5.024105264165188e-07, "loss": 1.1007, "step": 4985 }, { "epoch": 0.6759303192571002, "grad_norm": 5.582730963704908, "learning_rate": 5.020296606204915e-07, "loss": 1.1027, "step": 4986 }, { "epoch": 0.6760658849047652, "grad_norm": 11.004858255418867, "learning_rate": 5.016488908541125e-07, "loss": 1.0822, "step": 4987 }, { "epoch": 0.67620145055243, "grad_norm": 5.235467495351563, "learning_rate": 5.01268217190812e-07, "loss": 1.081, "step": 4988 }, { "epoch": 0.6763370162000949, "grad_norm": 5.254356761514523, "learning_rate": 5.008876397039983e-07, "loss": 1.1229, "step": 4989 }, { "epoch": 0.6764725818477598, "grad_norm": 6.452143058871681, "learning_rate": 5.005071584670644e-07, "loss": 1.1091, "step": 4990 }, { "epoch": 0.6766081474954246, "grad_norm": 8.88512890146683, "learning_rate": 5.001267735533811e-07, "loss": 1.0913, "step": 4991 }, { "epoch": 0.6767437131430896, "grad_norm": 4.6376445632348995, "learning_rate": 4.997464850363049e-07, "loss": 1.1176, "step": 4992 }, { "epoch": 0.6768792787907544, "grad_norm": 8.078637468073735, "learning_rate": 4.993662929891698e-07, "loss": 1.1437, "step": 4993 }, { "epoch": 0.6770148444384193, "grad_norm": 5.130987635288152, "learning_rate": 4.989861974852934e-07, "loss": 1.1204, "step": 4994 }, { "epoch": 0.6771504100860842, "grad_norm": 4.712258246849047, "learning_rate": 4.986061985979739e-07, "loss": 1.112, "step": 4995 }, { "epoch": 0.677285975733749, "grad_norm": 5.581082245196237, "learning_rate": 4.982262964004913e-07, "loss": 1.0866, "step": 4996 }, { "epoch": 0.677421541381414, "grad_norm": 3.5842782457252156, "learning_rate": 4.978464909661067e-07, "loss": 1.0924, "step": 4997 }, { "epoch": 0.6775571070290788, "grad_norm": 6.215270751829988, "learning_rate": 4.974667823680626e-07, "loss": 1.1291, "step": 4998 }, { "epoch": 0.6776926726767437, "grad_norm": 5.253039652444198, "learning_rate": 4.970871706795827e-07, "loss": 1.0886, "step": 4999 }, { "epoch": 0.6778282383244086, "grad_norm": 6.806186508369851, "learning_rate": 4.967076559738722e-07, "loss": 1.1248, "step": 5000 }, { "epoch": 0.6779638039720735, "grad_norm": 13.363902760874266, "learning_rate": 4.963282383241175e-07, "loss": 1.101, "step": 5001 }, { "epoch": 0.6780993696197384, "grad_norm": 6.278655610587076, "learning_rate": 4.959489178034863e-07, "loss": 1.1075, "step": 5002 }, { "epoch": 0.6782349352674032, "grad_norm": 5.905175948945087, "learning_rate": 4.955696944851276e-07, "loss": 1.0968, "step": 5003 }, { "epoch": 0.6783705009150681, "grad_norm": 5.643461825165371, "learning_rate": 4.951905684421716e-07, "loss": 1.1357, "step": 5004 }, { "epoch": 0.678506066562733, "grad_norm": 9.603840290763678, "learning_rate": 4.948115397477296e-07, "loss": 1.1055, "step": 5005 }, { "epoch": 0.6786416322103979, "grad_norm": 4.15480661080474, "learning_rate": 4.94432608474895e-07, "loss": 1.0841, "step": 5006 }, { "epoch": 0.6787771978580628, "grad_norm": 4.398637053613255, "learning_rate": 4.940537746967403e-07, "loss": 1.0857, "step": 5007 }, { "epoch": 0.6789127635057276, "grad_norm": 6.351348581682683, "learning_rate": 4.936750384863222e-07, "loss": 1.0688, "step": 5008 }, { "epoch": 0.6790483291533925, "grad_norm": 8.379362056821652, "learning_rate": 4.932963999166755e-07, "loss": 1.125, "step": 5009 }, { "epoch": 0.6791838948010575, "grad_norm": 4.330417235006841, "learning_rate": 4.929178590608191e-07, "loss": 1.1135, "step": 5010 }, { "epoch": 0.6793194604487223, "grad_norm": 4.375705611130267, "learning_rate": 4.925394159917506e-07, "loss": 1.0844, "step": 5011 }, { "epoch": 0.6794550260963872, "grad_norm": 6.345819336412282, "learning_rate": 4.921610707824501e-07, "loss": 1.1123, "step": 5012 }, { "epoch": 0.679590591744052, "grad_norm": 5.078919095672958, "learning_rate": 4.917828235058785e-07, "loss": 1.1377, "step": 5013 }, { "epoch": 0.6797261573917169, "grad_norm": 4.475169646563547, "learning_rate": 4.914046742349777e-07, "loss": 1.1012, "step": 5014 }, { "epoch": 0.6798617230393819, "grad_norm": 5.146705993607752, "learning_rate": 4.910266230426708e-07, "loss": 1.0961, "step": 5015 }, { "epoch": 0.6799972886870467, "grad_norm": 4.902150516099689, "learning_rate": 4.906486700018622e-07, "loss": 1.1044, "step": 5016 }, { "epoch": 0.6801328543347116, "grad_norm": 4.613654785049762, "learning_rate": 4.90270815185437e-07, "loss": 1.125, "step": 5017 }, { "epoch": 0.6802684199823764, "grad_norm": 5.3474565329969, "learning_rate": 4.898930586662614e-07, "loss": 1.1109, "step": 5018 }, { "epoch": 0.6804039856300413, "grad_norm": 22.0756408185679, "learning_rate": 4.89515400517183e-07, "loss": 1.1041, "step": 5019 }, { "epoch": 0.6805395512777063, "grad_norm": 5.496243064190722, "learning_rate": 4.891378408110301e-07, "loss": 1.0943, "step": 5020 }, { "epoch": 0.6806751169253711, "grad_norm": 5.358661805441072, "learning_rate": 4.887603796206124e-07, "loss": 1.1063, "step": 5021 }, { "epoch": 0.680810682573036, "grad_norm": 8.821296406887686, "learning_rate": 4.883830170187193e-07, "loss": 1.077, "step": 5022 }, { "epoch": 0.6809462482207008, "grad_norm": 7.804125283105952, "learning_rate": 4.880057530781237e-07, "loss": 1.1028, "step": 5023 }, { "epoch": 0.6810818138683657, "grad_norm": 4.0755158094475314, "learning_rate": 4.876285878715763e-07, "loss": 1.0816, "step": 5024 }, { "epoch": 0.6812173795160307, "grad_norm": 5.942268935102784, "learning_rate": 4.872515214718123e-07, "loss": 1.1422, "step": 5025 }, { "epoch": 0.6813529451636955, "grad_norm": 5.1822699024316785, "learning_rate": 4.86874553951544e-07, "loss": 1.0864, "step": 5026 }, { "epoch": 0.6814885108113604, "grad_norm": 5.705656573910613, "learning_rate": 4.864976853834684e-07, "loss": 1.0974, "step": 5027 }, { "epoch": 0.6816240764590252, "grad_norm": 5.578088175255963, "learning_rate": 4.861209158402601e-07, "loss": 1.0965, "step": 5028 }, { "epoch": 0.6817596421066902, "grad_norm": 5.550577764603405, "learning_rate": 4.857442453945779e-07, "loss": 1.0795, "step": 5029 }, { "epoch": 0.6818952077543551, "grad_norm": 6.248624322237601, "learning_rate": 4.853676741190576e-07, "loss": 1.0974, "step": 5030 }, { "epoch": 0.6820307734020199, "grad_norm": 5.464194800870805, "learning_rate": 4.849912020863198e-07, "loss": 1.1397, "step": 5031 }, { "epoch": 0.6821663390496848, "grad_norm": 5.5573906483582345, "learning_rate": 4.846148293689629e-07, "loss": 1.1124, "step": 5032 }, { "epoch": 0.6823019046973496, "grad_norm": 5.876123910508236, "learning_rate": 4.842385560395687e-07, "loss": 1.0951, "step": 5033 }, { "epoch": 0.6824374703450146, "grad_norm": 5.73745073392995, "learning_rate": 4.838623821706973e-07, "loss": 1.0853, "step": 5034 }, { "epoch": 0.6825730359926795, "grad_norm": 18.406390832888206, "learning_rate": 4.834863078348915e-07, "loss": 1.1429, "step": 5035 }, { "epoch": 0.6827086016403443, "grad_norm": 7.93453533988255, "learning_rate": 4.831103331046739e-07, "loss": 1.106, "step": 5036 }, { "epoch": 0.6828441672880092, "grad_norm": 5.954876461494089, "learning_rate": 4.827344580525487e-07, "loss": 1.0824, "step": 5037 }, { "epoch": 0.6829797329356742, "grad_norm": 5.122689832916915, "learning_rate": 4.82358682751e-07, "loss": 1.0758, "step": 5038 }, { "epoch": 0.683115298583339, "grad_norm": 31.67896421189642, "learning_rate": 4.819830072724934e-07, "loss": 1.119, "step": 5039 }, { "epoch": 0.6832508642310039, "grad_norm": 5.011356997115459, "learning_rate": 4.816074316894749e-07, "loss": 1.0836, "step": 5040 }, { "epoch": 0.6833864298786687, "grad_norm": 4.354333028374812, "learning_rate": 4.812319560743713e-07, "loss": 1.1326, "step": 5041 }, { "epoch": 0.6835219955263336, "grad_norm": 15.013623154766414, "learning_rate": 4.8085658049959e-07, "loss": 1.0659, "step": 5042 }, { "epoch": 0.6836575611739986, "grad_norm": 4.551217816400046, "learning_rate": 4.804813050375194e-07, "loss": 1.0812, "step": 5043 }, { "epoch": 0.6837931268216634, "grad_norm": 4.8291231284502505, "learning_rate": 4.801061297605282e-07, "loss": 1.0988, "step": 5044 }, { "epoch": 0.6839286924693283, "grad_norm": 4.199474889598643, "learning_rate": 4.797310547409661e-07, "loss": 1.0739, "step": 5045 }, { "epoch": 0.6840642581169931, "grad_norm": 8.229374909411485, "learning_rate": 4.793560800511634e-07, "loss": 1.1163, "step": 5046 }, { "epoch": 0.684199823764658, "grad_norm": 8.829662418173797, "learning_rate": 4.789812057634308e-07, "loss": 1.1286, "step": 5047 }, { "epoch": 0.684335389412323, "grad_norm": 4.671347195506153, "learning_rate": 4.786064319500604e-07, "loss": 1.0935, "step": 5048 }, { "epoch": 0.6844709550599878, "grad_norm": 4.170173536254509, "learning_rate": 4.782317586833236e-07, "loss": 1.0894, "step": 5049 }, { "epoch": 0.6846065207076527, "grad_norm": 3.8703258641370706, "learning_rate": 4.778571860354737e-07, "loss": 1.1005, "step": 5050 }, { "epoch": 0.6847420863553175, "grad_norm": 4.347984095034775, "learning_rate": 4.774827140787437e-07, "loss": 1.0947, "step": 5051 }, { "epoch": 0.6848776520029825, "grad_norm": 4.029090469598024, "learning_rate": 4.77108342885348e-07, "loss": 1.1074, "step": 5052 }, { "epoch": 0.6850132176506474, "grad_norm": 6.028281579937166, "learning_rate": 4.767340725274809e-07, "loss": 1.092, "step": 5053 }, { "epoch": 0.6851487832983122, "grad_norm": 5.079396697917426, "learning_rate": 4.763599030773173e-07, "loss": 1.1163, "step": 5054 }, { "epoch": 0.6852843489459771, "grad_norm": 5.824342498491296, "learning_rate": 4.7598583460701324e-07, "loss": 1.1073, "step": 5055 }, { "epoch": 0.6854199145936419, "grad_norm": 6.861033683387055, "learning_rate": 4.756118671887046e-07, "loss": 1.0921, "step": 5056 }, { "epoch": 0.6855554802413069, "grad_norm": 4.414869463024663, "learning_rate": 4.7523800089450804e-07, "loss": 1.1066, "step": 5057 }, { "epoch": 0.6856910458889718, "grad_norm": 13.908830769282774, "learning_rate": 4.748642357965208e-07, "loss": 1.1118, "step": 5058 }, { "epoch": 0.6858266115366366, "grad_norm": 7.010922984725927, "learning_rate": 4.7449057196682063e-07, "loss": 1.0815, "step": 5059 }, { "epoch": 0.6859621771843015, "grad_norm": 4.57101787345546, "learning_rate": 4.7411700947746534e-07, "loss": 1.1205, "step": 5060 }, { "epoch": 0.6860977428319663, "grad_norm": 3.5166527470624906, "learning_rate": 4.737435484004939e-07, "loss": 1.1271, "step": 5061 }, { "epoch": 0.6862333084796313, "grad_norm": 6.284382145573102, "learning_rate": 4.7337018880792544e-07, "loss": 1.1221, "step": 5062 }, { "epoch": 0.6863688741272962, "grad_norm": 5.729574772416838, "learning_rate": 4.729969307717583e-07, "loss": 1.0617, "step": 5063 }, { "epoch": 0.686504439774961, "grad_norm": 9.457441605240135, "learning_rate": 4.7262377436397396e-07, "loss": 1.1299, "step": 5064 }, { "epoch": 0.6866400054226259, "grad_norm": 8.4268615066424, "learning_rate": 4.722507196565311e-07, "loss": 1.1004, "step": 5065 }, { "epoch": 0.6867755710702907, "grad_norm": 4.83809758594155, "learning_rate": 4.718777667213719e-07, "loss": 1.126, "step": 5066 }, { "epoch": 0.6869111367179557, "grad_norm": 4.931109006547407, "learning_rate": 4.7150491563041597e-07, "loss": 1.1054, "step": 5067 }, { "epoch": 0.6870467023656206, "grad_norm": 4.106808518659171, "learning_rate": 4.7113216645556606e-07, "loss": 1.0711, "step": 5068 }, { "epoch": 0.6871822680132854, "grad_norm": 5.0172731994350075, "learning_rate": 4.707595192687025e-07, "loss": 1.0886, "step": 5069 }, { "epoch": 0.6873178336609503, "grad_norm": 4.25632185373917, "learning_rate": 4.703869741416888e-07, "loss": 1.1124, "step": 5070 }, { "epoch": 0.6874533993086152, "grad_norm": 3.865698439435548, "learning_rate": 4.700145311463659e-07, "loss": 1.049, "step": 5071 }, { "epoch": 0.6875889649562801, "grad_norm": 7.472476188535494, "learning_rate": 4.696421903545579e-07, "loss": 1.1076, "step": 5072 }, { "epoch": 0.687724530603945, "grad_norm": 5.529741131394615, "learning_rate": 4.692699518380664e-07, "loss": 1.0926, "step": 5073 }, { "epoch": 0.6878600962516098, "grad_norm": 4.981887666653228, "learning_rate": 4.6889781566867617e-07, "loss": 1.1172, "step": 5074 }, { "epoch": 0.6879956618992747, "grad_norm": 7.93038160700703, "learning_rate": 4.685257819181494e-07, "loss": 1.0908, "step": 5075 }, { "epoch": 0.6881312275469396, "grad_norm": 4.5401348496584655, "learning_rate": 4.6815385065823053e-07, "loss": 1.07, "step": 5076 }, { "epoch": 0.6882667931946045, "grad_norm": 8.063757934153983, "learning_rate": 4.677820219606433e-07, "loss": 1.0918, "step": 5077 }, { "epoch": 0.6884023588422694, "grad_norm": 6.040224498031688, "learning_rate": 4.6741029589709216e-07, "loss": 1.1116, "step": 5078 }, { "epoch": 0.6885379244899342, "grad_norm": 4.29448448304613, "learning_rate": 4.6703867253926144e-07, "loss": 1.0783, "step": 5079 }, { "epoch": 0.6886734901375992, "grad_norm": 3.981866151063021, "learning_rate": 4.666671519588158e-07, "loss": 1.0811, "step": 5080 }, { "epoch": 0.688809055785264, "grad_norm": 5.263978107722332, "learning_rate": 4.662957342274e-07, "loss": 1.1183, "step": 5081 }, { "epoch": 0.6889446214329289, "grad_norm": 4.327149750103607, "learning_rate": 4.6592441941663896e-07, "loss": 1.1249, "step": 5082 }, { "epoch": 0.6890801870805938, "grad_norm": 12.045474620188536, "learning_rate": 4.655532075981383e-07, "loss": 1.1096, "step": 5083 }, { "epoch": 0.6892157527282586, "grad_norm": 4.578570017942775, "learning_rate": 4.6518209884348227e-07, "loss": 1.1242, "step": 5084 }, { "epoch": 0.6893513183759236, "grad_norm": 4.058863110168844, "learning_rate": 4.648110932242375e-07, "loss": 1.1148, "step": 5085 }, { "epoch": 0.6894868840235884, "grad_norm": 4.110585358573926, "learning_rate": 4.644401908119482e-07, "loss": 1.1103, "step": 5086 }, { "epoch": 0.6896224496712533, "grad_norm": 5.7238967539337375, "learning_rate": 4.640693916781414e-07, "loss": 1.0929, "step": 5087 }, { "epoch": 0.6897580153189182, "grad_norm": 6.311492510371296, "learning_rate": 4.636986958943212e-07, "loss": 1.0717, "step": 5088 }, { "epoch": 0.689893580966583, "grad_norm": 7.036738452087431, "learning_rate": 4.6332810353197503e-07, "loss": 1.0958, "step": 5089 }, { "epoch": 0.690029146614248, "grad_norm": 40.781522875621526, "learning_rate": 4.629576146625674e-07, "loss": 1.1209, "step": 5090 }, { "epoch": 0.6901647122619128, "grad_norm": 4.448342354683408, "learning_rate": 4.625872293575448e-07, "loss": 1.0718, "step": 5091 }, { "epoch": 0.6903002779095777, "grad_norm": 5.414764702366447, "learning_rate": 4.6221694768833276e-07, "loss": 1.0957, "step": 5092 }, { "epoch": 0.6904358435572426, "grad_norm": 7.386554903049562, "learning_rate": 4.6184676972633753e-07, "loss": 1.0976, "step": 5093 }, { "epoch": 0.6905714092049074, "grad_norm": 5.41088190069672, "learning_rate": 4.614766955429447e-07, "loss": 1.0991, "step": 5094 }, { "epoch": 0.6907069748525724, "grad_norm": 5.764085425302833, "learning_rate": 4.6110672520952033e-07, "loss": 1.0962, "step": 5095 }, { "epoch": 0.6908425405002372, "grad_norm": 6.6959481253956605, "learning_rate": 4.607368587974102e-07, "loss": 1.1185, "step": 5096 }, { "epoch": 0.6909781061479021, "grad_norm": 8.362870173028975, "learning_rate": 4.6036709637794026e-07, "loss": 1.1091, "step": 5097 }, { "epoch": 0.691113671795567, "grad_norm": 6.063929754157841, "learning_rate": 4.599974380224161e-07, "loss": 1.1317, "step": 5098 }, { "epoch": 0.6912492374432319, "grad_norm": 4.676766934809188, "learning_rate": 4.5962788380212346e-07, "loss": 1.0985, "step": 5099 }, { "epoch": 0.6913848030908968, "grad_norm": 4.234528804397195, "learning_rate": 4.592584337883281e-07, "loss": 1.0868, "step": 5100 }, { "epoch": 0.6915203687385616, "grad_norm": 5.211039467450969, "learning_rate": 4.5888908805227536e-07, "loss": 1.0909, "step": 5101 }, { "epoch": 0.6916559343862265, "grad_norm": 8.105738688256073, "learning_rate": 4.585198466651907e-07, "loss": 1.1006, "step": 5102 }, { "epoch": 0.6917915000338914, "grad_norm": 5.3922966106864285, "learning_rate": 4.581507096982794e-07, "loss": 1.0826, "step": 5103 }, { "epoch": 0.6919270656815563, "grad_norm": 9.074166481561932, "learning_rate": 4.5778167722272674e-07, "loss": 1.0862, "step": 5104 }, { "epoch": 0.6920626313292212, "grad_norm": 5.937761314600592, "learning_rate": 4.57412749309698e-07, "loss": 1.0829, "step": 5105 }, { "epoch": 0.692198196976886, "grad_norm": 4.0531414016520895, "learning_rate": 4.570439260303368e-07, "loss": 1.0791, "step": 5106 }, { "epoch": 0.6923337626245509, "grad_norm": 5.9084983271945335, "learning_rate": 4.566752074557694e-07, "loss": 1.1243, "step": 5107 }, { "epoch": 0.6924693282722159, "grad_norm": 8.112088515300588, "learning_rate": 4.563065936570988e-07, "loss": 1.0964, "step": 5108 }, { "epoch": 0.6926048939198807, "grad_norm": 4.722984293899586, "learning_rate": 4.559380847054106e-07, "loss": 1.1102, "step": 5109 }, { "epoch": 0.6927404595675456, "grad_norm": 7.467582959277723, "learning_rate": 4.555696806717679e-07, "loss": 1.1247, "step": 5110 }, { "epoch": 0.6928760252152104, "grad_norm": 6.930134268052658, "learning_rate": 4.552013816272148e-07, "loss": 1.0902, "step": 5111 }, { "epoch": 0.6930115908628753, "grad_norm": 5.74403586918436, "learning_rate": 4.548331876427749e-07, "loss": 1.0779, "step": 5112 }, { "epoch": 0.6931471565105403, "grad_norm": 5.536381391812328, "learning_rate": 4.544650987894514e-07, "loss": 1.0842, "step": 5113 }, { "epoch": 0.6932827221582051, "grad_norm": 4.553633081050099, "learning_rate": 4.5409711513822745e-07, "loss": 1.1312, "step": 5114 }, { "epoch": 0.69341828780587, "grad_norm": 4.674026471556039, "learning_rate": 4.537292367600658e-07, "loss": 1.0856, "step": 5115 }, { "epoch": 0.6935538534535349, "grad_norm": 6.119839259185752, "learning_rate": 4.5336146372590876e-07, "loss": 1.0883, "step": 5116 }, { "epoch": 0.6936894191011997, "grad_norm": 3.5573996875493905, "learning_rate": 4.5299379610667865e-07, "loss": 1.1629, "step": 5117 }, { "epoch": 0.6938249847488647, "grad_norm": 5.3691464129212, "learning_rate": 4.5262623397327706e-07, "loss": 1.1053, "step": 5118 }, { "epoch": 0.6939605503965295, "grad_norm": 23.06068764911531, "learning_rate": 4.522587773965856e-07, "loss": 1.1452, "step": 5119 }, { "epoch": 0.6940961160441944, "grad_norm": 6.8040874809246725, "learning_rate": 4.518914264474657e-07, "loss": 1.1298, "step": 5120 }, { "epoch": 0.6942316816918593, "grad_norm": 7.238599269226879, "learning_rate": 4.5152418119675684e-07, "loss": 1.0896, "step": 5121 }, { "epoch": 0.6943672473395242, "grad_norm": 5.380235805657057, "learning_rate": 4.5115704171528103e-07, "loss": 1.0578, "step": 5122 }, { "epoch": 0.6945028129871891, "grad_norm": 3.9968272991677103, "learning_rate": 4.507900080738367e-07, "loss": 1.1272, "step": 5123 }, { "epoch": 0.6946383786348539, "grad_norm": 5.1275265593395565, "learning_rate": 4.5042308034320487e-07, "loss": 1.1298, "step": 5124 }, { "epoch": 0.6947739442825188, "grad_norm": 4.12723140438576, "learning_rate": 4.500562585941432e-07, "loss": 1.0656, "step": 5125 }, { "epoch": 0.6949095099301837, "grad_norm": 7.81389392391463, "learning_rate": 4.496895428973917e-07, "loss": 1.1212, "step": 5126 }, { "epoch": 0.6950450755778486, "grad_norm": 7.266994174863652, "learning_rate": 4.4932293332366733e-07, "loss": 1.1089, "step": 5127 }, { "epoch": 0.6951806412255135, "grad_norm": 4.727469933399051, "learning_rate": 4.489564299436691e-07, "loss": 1.0978, "step": 5128 }, { "epoch": 0.6953162068731783, "grad_norm": 5.639781410397791, "learning_rate": 4.4859003282807305e-07, "loss": 1.0644, "step": 5129 }, { "epoch": 0.6954517725208432, "grad_norm": 5.345092594204634, "learning_rate": 4.4822374204753734e-07, "loss": 1.1203, "step": 5130 }, { "epoch": 0.6955873381685082, "grad_norm": 5.2242911942924755, "learning_rate": 4.4785755767269675e-07, "loss": 1.1093, "step": 5131 }, { "epoch": 0.695722903816173, "grad_norm": 8.441011323377513, "learning_rate": 4.474914797741686e-07, "loss": 1.0899, "step": 5132 }, { "epoch": 0.6958584694638379, "grad_norm": 7.380617959848616, "learning_rate": 4.471255084225468e-07, "loss": 1.0952, "step": 5133 }, { "epoch": 0.6959940351115027, "grad_norm": 6.251833385368639, "learning_rate": 4.467596436884068e-07, "loss": 1.113, "step": 5134 }, { "epoch": 0.6961296007591676, "grad_norm": 4.587115717518893, "learning_rate": 4.463938856423023e-07, "loss": 1.1042, "step": 5135 }, { "epoch": 0.6962651664068326, "grad_norm": 5.094383144013761, "learning_rate": 4.4602823435476723e-07, "loss": 1.1123, "step": 5136 }, { "epoch": 0.6964007320544974, "grad_norm": 5.658384160695704, "learning_rate": 4.4566268989631427e-07, "loss": 1.1163, "step": 5137 }, { "epoch": 0.6965362977021623, "grad_norm": 10.878138095981074, "learning_rate": 4.452972523374359e-07, "loss": 1.1089, "step": 5138 }, { "epoch": 0.6966718633498271, "grad_norm": 4.541472182036323, "learning_rate": 4.4493192174860394e-07, "loss": 1.1252, "step": 5139 }, { "epoch": 0.696807428997492, "grad_norm": 4.170301995146688, "learning_rate": 4.4456669820026935e-07, "loss": 1.1102, "step": 5140 }, { "epoch": 0.696942994645157, "grad_norm": 4.463066937614539, "learning_rate": 4.442015817628627e-07, "loss": 1.1222, "step": 5141 }, { "epoch": 0.6970785602928218, "grad_norm": 5.591232764378964, "learning_rate": 4.438365725067937e-07, "loss": 1.1168, "step": 5142 }, { "epoch": 0.6972141259404867, "grad_norm": 4.603899509669217, "learning_rate": 4.434716705024518e-07, "loss": 1.1083, "step": 5143 }, { "epoch": 0.6973496915881515, "grad_norm": 10.792534452506604, "learning_rate": 4.4310687582020524e-07, "loss": 1.0723, "step": 5144 }, { "epoch": 0.6974852572358164, "grad_norm": 5.077465469958806, "learning_rate": 4.4274218853040213e-07, "loss": 1.0897, "step": 5145 }, { "epoch": 0.6976208228834814, "grad_norm": 5.66553890854231, "learning_rate": 4.4237760870336883e-07, "loss": 1.0995, "step": 5146 }, { "epoch": 0.6977563885311462, "grad_norm": 4.170681988429673, "learning_rate": 4.420131364094122e-07, "loss": 1.0738, "step": 5147 }, { "epoch": 0.6978919541788111, "grad_norm": 8.138577277532631, "learning_rate": 4.4164877171881765e-07, "loss": 1.1551, "step": 5148 }, { "epoch": 0.6980275198264759, "grad_norm": 5.1069618864849655, "learning_rate": 4.4128451470185013e-07, "loss": 1.1371, "step": 5149 }, { "epoch": 0.6981630854741409, "grad_norm": 3.3682329450585864, "learning_rate": 4.409203654287538e-07, "loss": 1.112, "step": 5150 }, { "epoch": 0.6982986511218058, "grad_norm": 3.967133507554598, "learning_rate": 4.4055632396975174e-07, "loss": 1.0853, "step": 5151 }, { "epoch": 0.6984342167694706, "grad_norm": 9.96757183359617, "learning_rate": 4.4019239039504676e-07, "loss": 1.0978, "step": 5152 }, { "epoch": 0.6985697824171355, "grad_norm": 5.484179925646424, "learning_rate": 4.3982856477482034e-07, "loss": 1.0874, "step": 5153 }, { "epoch": 0.6987053480648003, "grad_norm": 4.7017288229259915, "learning_rate": 4.394648471792335e-07, "loss": 1.1054, "step": 5154 }, { "epoch": 0.6988409137124653, "grad_norm": 9.191553843878497, "learning_rate": 4.391012376784263e-07, "loss": 1.0966, "step": 5155 }, { "epoch": 0.6989764793601302, "grad_norm": 5.084385142980498, "learning_rate": 4.3873773634251796e-07, "loss": 1.091, "step": 5156 }, { "epoch": 0.699112045007795, "grad_norm": 4.518939316479425, "learning_rate": 4.3837434324160684e-07, "loss": 1.1103, "step": 5157 }, { "epoch": 0.6992476106554599, "grad_norm": 4.087914367771396, "learning_rate": 4.380110584457705e-07, "loss": 1.082, "step": 5158 }, { "epoch": 0.6993831763031247, "grad_norm": 4.923355206276339, "learning_rate": 4.376478820250653e-07, "loss": 1.0733, "step": 5159 }, { "epoch": 0.6995187419507897, "grad_norm": 4.470504068124982, "learning_rate": 4.3728481404952724e-07, "loss": 1.1061, "step": 5160 }, { "epoch": 0.6996543075984546, "grad_norm": 8.907000885105257, "learning_rate": 4.369218545891713e-07, "loss": 1.1451, "step": 5161 }, { "epoch": 0.6997898732461194, "grad_norm": 7.411387268527418, "learning_rate": 4.3655900371399025e-07, "loss": 1.1147, "step": 5162 }, { "epoch": 0.6999254388937843, "grad_norm": 4.2131352968776135, "learning_rate": 4.361962614939586e-07, "loss": 1.1092, "step": 5163 }, { "epoch": 0.7000610045414492, "grad_norm": 8.96221513277501, "learning_rate": 4.358336279990268e-07, "loss": 1.0974, "step": 5164 }, { "epoch": 0.7001965701891141, "grad_norm": 4.611696419331558, "learning_rate": 4.354711032991273e-07, "loss": 1.0861, "step": 5165 }, { "epoch": 0.700332135836779, "grad_norm": 4.175368105142914, "learning_rate": 4.3510868746416875e-07, "loss": 1.1272, "step": 5166 }, { "epoch": 0.7004677014844438, "grad_norm": 5.5484898130909786, "learning_rate": 4.3474638056404146e-07, "loss": 1.0958, "step": 5167 }, { "epoch": 0.7006032671321087, "grad_norm": 8.284214394844808, "learning_rate": 4.343841826686121e-07, "loss": 1.1036, "step": 5168 }, { "epoch": 0.7007388327797736, "grad_norm": 5.9594906634770615, "learning_rate": 4.3402209384772925e-07, "loss": 1.1282, "step": 5169 }, { "epoch": 0.7008743984274385, "grad_norm": 4.828506287649016, "learning_rate": 4.336601141712172e-07, "loss": 1.06, "step": 5170 }, { "epoch": 0.7010099640751034, "grad_norm": 4.097850338058813, "learning_rate": 4.332982437088825e-07, "loss": 1.0561, "step": 5171 }, { "epoch": 0.7011455297227682, "grad_norm": 5.0324050771335225, "learning_rate": 4.3293648253050786e-07, "loss": 1.0721, "step": 5172 }, { "epoch": 0.7012810953704331, "grad_norm": 5.063788136333977, "learning_rate": 4.3257483070585644e-07, "loss": 1.1247, "step": 5173 }, { "epoch": 0.701416661018098, "grad_norm": 6.270541594564515, "learning_rate": 4.3221328830466996e-07, "loss": 1.0837, "step": 5174 }, { "epoch": 0.7015522266657629, "grad_norm": 6.0663056139231, "learning_rate": 4.318518553966689e-07, "loss": 1.1074, "step": 5175 }, { "epoch": 0.7016877923134278, "grad_norm": 6.09277830960914, "learning_rate": 4.3149053205155295e-07, "loss": 1.1305, "step": 5176 }, { "epoch": 0.7018233579610926, "grad_norm": 8.546299725164857, "learning_rate": 4.3112931833900036e-07, "loss": 1.0736, "step": 5177 }, { "epoch": 0.7019589236087576, "grad_norm": 4.971649229973655, "learning_rate": 4.307682143286683e-07, "loss": 1.1248, "step": 5178 }, { "epoch": 0.7020944892564224, "grad_norm": 5.558184584005358, "learning_rate": 4.3040722009019284e-07, "loss": 1.091, "step": 5179 }, { "epoch": 0.7022300549040873, "grad_norm": 4.10509433109832, "learning_rate": 4.300463356931888e-07, "loss": 1.095, "step": 5180 }, { "epoch": 0.7023656205517522, "grad_norm": 5.124522203046623, "learning_rate": 4.296855612072501e-07, "loss": 1.0612, "step": 5181 }, { "epoch": 0.702501186199417, "grad_norm": 9.103476284617694, "learning_rate": 4.293248967019495e-07, "loss": 1.1369, "step": 5182 }, { "epoch": 0.702636751847082, "grad_norm": 4.313137006639786, "learning_rate": 4.289643422468372e-07, "loss": 1.1031, "step": 5183 }, { "epoch": 0.7027723174947468, "grad_norm": 5.159498974526277, "learning_rate": 4.286038979114447e-07, "loss": 1.0676, "step": 5184 }, { "epoch": 0.7029078831424117, "grad_norm": 4.028360885999124, "learning_rate": 4.282435637652795e-07, "loss": 1.1055, "step": 5185 }, { "epoch": 0.7030434487900766, "grad_norm": 3.7063668259724984, "learning_rate": 4.278833398778305e-07, "loss": 1.1109, "step": 5186 }, { "epoch": 0.7031790144377414, "grad_norm": 5.809221639429438, "learning_rate": 4.2752322631856275e-07, "loss": 1.1123, "step": 5187 }, { "epoch": 0.7033145800854064, "grad_norm": 6.223894576901639, "learning_rate": 4.2716322315692266e-07, "loss": 1.0889, "step": 5188 }, { "epoch": 0.7034501457330712, "grad_norm": 3.7623087260316264, "learning_rate": 4.2680333046233286e-07, "loss": 1.0918, "step": 5189 }, { "epoch": 0.7035857113807361, "grad_norm": 5.398136928353964, "learning_rate": 4.2644354830419627e-07, "loss": 1.0904, "step": 5190 }, { "epoch": 0.703721277028401, "grad_norm": 4.869285015644092, "learning_rate": 4.2608387675189404e-07, "loss": 1.0846, "step": 5191 }, { "epoch": 0.7038568426760659, "grad_norm": 4.127619714484532, "learning_rate": 4.2572431587478594e-07, "loss": 1.09, "step": 5192 }, { "epoch": 0.7039924083237308, "grad_norm": 5.5845656416018485, "learning_rate": 4.253648657422105e-07, "loss": 1.1108, "step": 5193 }, { "epoch": 0.7041279739713956, "grad_norm": 7.284527384804471, "learning_rate": 4.2500552642348475e-07, "loss": 1.0928, "step": 5194 }, { "epoch": 0.7042635396190605, "grad_norm": 4.289239691652395, "learning_rate": 4.2464629798790453e-07, "loss": 1.0953, "step": 5195 }, { "epoch": 0.7043991052667254, "grad_norm": 4.354724827752507, "learning_rate": 4.242871805047442e-07, "loss": 1.1232, "step": 5196 }, { "epoch": 0.7045346709143903, "grad_norm": 4.017681726775972, "learning_rate": 4.2392817404325665e-07, "loss": 1.1073, "step": 5197 }, { "epoch": 0.7046702365620552, "grad_norm": 4.339322370681183, "learning_rate": 4.2356927867267355e-07, "loss": 1.0818, "step": 5198 }, { "epoch": 0.7048058022097201, "grad_norm": 6.73608919341067, "learning_rate": 4.23210494462205e-07, "loss": 1.1096, "step": 5199 }, { "epoch": 0.7049413678573849, "grad_norm": 4.192407711062292, "learning_rate": 4.228518214810396e-07, "loss": 1.1573, "step": 5200 }, { "epoch": 0.7050769335050499, "grad_norm": 5.375702534811593, "learning_rate": 4.2249325979834484e-07, "loss": 1.1385, "step": 5201 }, { "epoch": 0.7052124991527147, "grad_norm": 3.9609481407003986, "learning_rate": 4.221348094832666e-07, "loss": 1.1101, "step": 5202 }, { "epoch": 0.7053480648003796, "grad_norm": 4.255088653637946, "learning_rate": 4.217764706049283e-07, "loss": 1.1158, "step": 5203 }, { "epoch": 0.7054836304480445, "grad_norm": 3.9786215942341827, "learning_rate": 4.2141824323243416e-07, "loss": 1.1211, "step": 5204 }, { "epoch": 0.7056191960957093, "grad_norm": 4.915623942535581, "learning_rate": 4.21060127434864e-07, "loss": 1.0408, "step": 5205 }, { "epoch": 0.7057547617433743, "grad_norm": 3.6398655379175424, "learning_rate": 4.207021232812792e-07, "loss": 1.0785, "step": 5206 }, { "epoch": 0.7058903273910391, "grad_norm": 4.71461630887414, "learning_rate": 4.2034423084071637e-07, "loss": 1.0888, "step": 5207 }, { "epoch": 0.706025893038704, "grad_norm": 5.237732022644324, "learning_rate": 4.199864501821939e-07, "loss": 1.0835, "step": 5208 }, { "epoch": 0.7061614586863689, "grad_norm": 5.155319358325216, "learning_rate": 4.196287813747058e-07, "loss": 1.0741, "step": 5209 }, { "epoch": 0.7062970243340337, "grad_norm": 6.14570928697157, "learning_rate": 4.1927122448722597e-07, "loss": 1.0876, "step": 5210 }, { "epoch": 0.7064325899816987, "grad_norm": 5.14314899626422, "learning_rate": 4.1891377958870657e-07, "loss": 1.0867, "step": 5211 }, { "epoch": 0.7065681556293635, "grad_norm": 5.024708059457666, "learning_rate": 4.18556446748078e-07, "loss": 1.1376, "step": 5212 }, { "epoch": 0.7067037212770284, "grad_norm": 4.748967747208801, "learning_rate": 4.1819922603424895e-07, "loss": 1.0691, "step": 5213 }, { "epoch": 0.7068392869246933, "grad_norm": 7.799338204881223, "learning_rate": 4.1784211751610675e-07, "loss": 1.1256, "step": 5214 }, { "epoch": 0.7069748525723581, "grad_norm": 4.336015921336594, "learning_rate": 4.174851212625169e-07, "loss": 1.1021, "step": 5215 }, { "epoch": 0.7071104182200231, "grad_norm": 4.313706082997862, "learning_rate": 4.171282373423234e-07, "loss": 1.123, "step": 5216 }, { "epoch": 0.7072459838676879, "grad_norm": 4.0941689420850995, "learning_rate": 4.167714658243486e-07, "loss": 1.1225, "step": 5217 }, { "epoch": 0.7073815495153528, "grad_norm": 4.929613118571913, "learning_rate": 4.1641480677739236e-07, "loss": 1.1157, "step": 5218 }, { "epoch": 0.7075171151630177, "grad_norm": 4.525124353305751, "learning_rate": 4.160582602702347e-07, "loss": 1.0811, "step": 5219 }, { "epoch": 0.7076526808106826, "grad_norm": 4.278210357893363, "learning_rate": 4.1570182637163153e-07, "loss": 1.0878, "step": 5220 }, { "epoch": 0.7077882464583475, "grad_norm": 5.816897912586075, "learning_rate": 4.153455051503196e-07, "loss": 1.1107, "step": 5221 }, { "epoch": 0.7079238121060123, "grad_norm": 4.10807768497392, "learning_rate": 4.149892966750114e-07, "loss": 1.1255, "step": 5222 }, { "epoch": 0.7080593777536772, "grad_norm": 6.147269642699405, "learning_rate": 4.1463320101440027e-07, "loss": 1.1324, "step": 5223 }, { "epoch": 0.7081949434013421, "grad_norm": 8.894518418604024, "learning_rate": 4.1427721823715487e-07, "loss": 1.0968, "step": 5224 }, { "epoch": 0.708330509049007, "grad_norm": 4.148494993693528, "learning_rate": 4.1392134841192537e-07, "loss": 1.1298, "step": 5225 }, { "epoch": 0.7084660746966719, "grad_norm": 22.673554080198326, "learning_rate": 4.135655916073368e-07, "loss": 1.1257, "step": 5226 }, { "epoch": 0.7086016403443367, "grad_norm": 4.880710456377757, "learning_rate": 4.132099478919957e-07, "loss": 1.095, "step": 5227 }, { "epoch": 0.7087372059920016, "grad_norm": 5.522562736050872, "learning_rate": 4.1285441733448344e-07, "loss": 1.0778, "step": 5228 }, { "epoch": 0.7088727716396666, "grad_norm": 4.7996062509106485, "learning_rate": 4.124990000033629e-07, "loss": 1.0994, "step": 5229 }, { "epoch": 0.7090083372873314, "grad_norm": 5.543931486043969, "learning_rate": 4.1214369596717244e-07, "loss": 1.1212, "step": 5230 }, { "epoch": 0.7091439029349963, "grad_norm": 3.914669793702046, "learning_rate": 4.1178850529442996e-07, "loss": 1.1043, "step": 5231 }, { "epoch": 0.7092794685826611, "grad_norm": 4.7989180207820175, "learning_rate": 4.1143342805363123e-07, "loss": 1.0837, "step": 5232 }, { "epoch": 0.709415034230326, "grad_norm": 7.909671519539383, "learning_rate": 4.1107846431325e-07, "loss": 1.0918, "step": 5233 }, { "epoch": 0.709550599877991, "grad_norm": 4.357489161696387, "learning_rate": 4.1072361414173815e-07, "loss": 1.0993, "step": 5234 }, { "epoch": 0.7096861655256558, "grad_norm": 5.034303621081348, "learning_rate": 4.10368877607526e-07, "loss": 1.0943, "step": 5235 }, { "epoch": 0.7098217311733207, "grad_norm": 3.8521844664854563, "learning_rate": 4.100142547790214e-07, "loss": 1.0995, "step": 5236 }, { "epoch": 0.7099572968209855, "grad_norm": 7.11512556151196, "learning_rate": 4.096597457246108e-07, "loss": 1.0892, "step": 5237 }, { "epoch": 0.7100928624686504, "grad_norm": 4.610510581684342, "learning_rate": 4.0930535051265835e-07, "loss": 1.1069, "step": 5238 }, { "epoch": 0.7102284281163154, "grad_norm": 4.249499975232564, "learning_rate": 4.0895106921150644e-07, "loss": 1.1382, "step": 5239 }, { "epoch": 0.7103639937639802, "grad_norm": 4.567387262882669, "learning_rate": 4.0859690188947525e-07, "loss": 1.0751, "step": 5240 }, { "epoch": 0.7104995594116451, "grad_norm": 7.3487157296261385, "learning_rate": 4.0824284861486346e-07, "loss": 1.1248, "step": 5241 }, { "epoch": 0.7106351250593099, "grad_norm": 4.645583462590087, "learning_rate": 4.0788890945594714e-07, "loss": 1.141, "step": 5242 }, { "epoch": 0.7107706907069749, "grad_norm": 4.253666121422827, "learning_rate": 4.0753508448098085e-07, "loss": 1.0749, "step": 5243 }, { "epoch": 0.7109062563546398, "grad_norm": 9.82396646525487, "learning_rate": 4.0718137375819717e-07, "loss": 1.0882, "step": 5244 }, { "epoch": 0.7110418220023046, "grad_norm": 6.71759354416203, "learning_rate": 4.0682777735580586e-07, "loss": 1.1058, "step": 5245 }, { "epoch": 0.7111773876499695, "grad_norm": 3.962404390015454, "learning_rate": 4.064742953419954e-07, "loss": 1.1305, "step": 5246 }, { "epoch": 0.7113129532976343, "grad_norm": 6.119882254273073, "learning_rate": 4.061209277849321e-07, "loss": 1.0634, "step": 5247 }, { "epoch": 0.7114485189452993, "grad_norm": 4.026515812609603, "learning_rate": 4.057676747527601e-07, "loss": 1.1062, "step": 5248 }, { "epoch": 0.7115840845929642, "grad_norm": 4.650709474113323, "learning_rate": 4.054145363136013e-07, "loss": 1.0968, "step": 5249 }, { "epoch": 0.711719650240629, "grad_norm": 5.51601316577261, "learning_rate": 4.05061512535556e-07, "loss": 1.0817, "step": 5250 }, { "epoch": 0.7118552158882939, "grad_norm": 4.307313257860988, "learning_rate": 4.047086034867018e-07, "loss": 1.0793, "step": 5251 }, { "epoch": 0.7119907815359587, "grad_norm": 4.259322629061458, "learning_rate": 4.0435580923509436e-07, "loss": 1.1043, "step": 5252 }, { "epoch": 0.7121263471836237, "grad_norm": 3.665055029645585, "learning_rate": 4.040031298487675e-07, "loss": 1.1077, "step": 5253 }, { "epoch": 0.7122619128312886, "grad_norm": 23.736789818427756, "learning_rate": 4.036505653957325e-07, "loss": 1.1121, "step": 5254 }, { "epoch": 0.7123974784789534, "grad_norm": 4.202639246683864, "learning_rate": 4.032981159439787e-07, "loss": 1.1197, "step": 5255 }, { "epoch": 0.7125330441266183, "grad_norm": 39.975995458344954, "learning_rate": 4.029457815614731e-07, "loss": 1.1078, "step": 5256 }, { "epoch": 0.7126686097742831, "grad_norm": 7.172835731170123, "learning_rate": 4.025935623161607e-07, "loss": 1.1221, "step": 5257 }, { "epoch": 0.7128041754219481, "grad_norm": 9.22428445231751, "learning_rate": 4.022414582759646e-07, "loss": 1.1279, "step": 5258 }, { "epoch": 0.712939741069613, "grad_norm": 4.9595883603977855, "learning_rate": 4.01889469508784e-07, "loss": 1.1132, "step": 5259 }, { "epoch": 0.7130753067172778, "grad_norm": 9.854871954653927, "learning_rate": 4.0153759608249883e-07, "loss": 1.1074, "step": 5260 }, { "epoch": 0.7132108723649427, "grad_norm": 13.28013781261636, "learning_rate": 4.011858380649634e-07, "loss": 1.047, "step": 5261 }, { "epoch": 0.7133464380126076, "grad_norm": 4.8159915620908205, "learning_rate": 4.008341955240132e-07, "loss": 1.1135, "step": 5262 }, { "epoch": 0.7134820036602725, "grad_norm": 9.064120747715766, "learning_rate": 4.0048266852745815e-07, "loss": 1.0757, "step": 5263 }, { "epoch": 0.7136175693079374, "grad_norm": 5.9296215190331125, "learning_rate": 4.0013125714308883e-07, "loss": 1.1082, "step": 5264 }, { "epoch": 0.7137531349556022, "grad_norm": 5.527553764906572, "learning_rate": 3.9977996143867086e-07, "loss": 1.1229, "step": 5265 }, { "epoch": 0.7138887006032671, "grad_norm": 4.555618247320797, "learning_rate": 3.9942878148195015e-07, "loss": 1.0527, "step": 5266 }, { "epoch": 0.714024266250932, "grad_norm": 5.60420829817452, "learning_rate": 3.9907771734064756e-07, "loss": 1.1039, "step": 5267 }, { "epoch": 0.7141598318985969, "grad_norm": 15.83153323881992, "learning_rate": 3.987267690824646e-07, "loss": 1.1252, "step": 5268 }, { "epoch": 0.7142953975462618, "grad_norm": 4.656251512210638, "learning_rate": 3.983759367750772e-07, "loss": 1.0848, "step": 5269 }, { "epoch": 0.7144309631939266, "grad_norm": 7.391369095753569, "learning_rate": 3.980252204861423e-07, "loss": 1.1071, "step": 5270 }, { "epoch": 0.7145665288415916, "grad_norm": 10.714511206545366, "learning_rate": 3.9767462028329156e-07, "loss": 1.098, "step": 5271 }, { "epoch": 0.7147020944892564, "grad_norm": 4.907802953692124, "learning_rate": 3.973241362341357e-07, "loss": 1.0693, "step": 5272 }, { "epoch": 0.7148376601369213, "grad_norm": 3.7719179727529135, "learning_rate": 3.9697376840626304e-07, "loss": 1.1049, "step": 5273 }, { "epoch": 0.7149732257845862, "grad_norm": 4.968079617321578, "learning_rate": 3.9662351686723914e-07, "loss": 1.1206, "step": 5274 }, { "epoch": 0.715108791432251, "grad_norm": 4.270626460076365, "learning_rate": 3.962733816846073e-07, "loss": 1.078, "step": 5275 }, { "epoch": 0.715244357079916, "grad_norm": 4.970996803610327, "learning_rate": 3.9592336292588825e-07, "loss": 1.0675, "step": 5276 }, { "epoch": 0.7153799227275809, "grad_norm": 4.45283552138292, "learning_rate": 3.9557346065858034e-07, "loss": 1.1041, "step": 5277 }, { "epoch": 0.7155154883752457, "grad_norm": 5.8864763106386775, "learning_rate": 3.952236749501594e-07, "loss": 1.1173, "step": 5278 }, { "epoch": 0.7156510540229106, "grad_norm": 5.854337594858179, "learning_rate": 3.948740058680791e-07, "loss": 1.1212, "step": 5279 }, { "epoch": 0.7157866196705754, "grad_norm": 4.743566705067752, "learning_rate": 3.9452445347977e-07, "loss": 1.1227, "step": 5280 }, { "epoch": 0.7159221853182404, "grad_norm": 5.174162149778401, "learning_rate": 3.941750178526413e-07, "loss": 1.0978, "step": 5281 }, { "epoch": 0.7160577509659053, "grad_norm": 6.471998787781223, "learning_rate": 3.938256990540775e-07, "loss": 1.134, "step": 5282 }, { "epoch": 0.7161933166135701, "grad_norm": 4.431586674964088, "learning_rate": 3.934764971514434e-07, "loss": 1.079, "step": 5283 }, { "epoch": 0.716328882261235, "grad_norm": 6.032901070136952, "learning_rate": 3.931274122120786e-07, "loss": 1.1124, "step": 5284 }, { "epoch": 0.7164644479088998, "grad_norm": 7.081718538521717, "learning_rate": 3.9277844430330277e-07, "loss": 1.1322, "step": 5285 }, { "epoch": 0.7166000135565648, "grad_norm": 10.469922939778186, "learning_rate": 3.9242959349241036e-07, "loss": 1.1353, "step": 5286 }, { "epoch": 0.7167355792042297, "grad_norm": 4.144334898891006, "learning_rate": 3.9208085984667507e-07, "loss": 1.097, "step": 5287 }, { "epoch": 0.7168711448518945, "grad_norm": 6.041508123188762, "learning_rate": 3.917322434333472e-07, "loss": 1.1143, "step": 5288 }, { "epoch": 0.7170067104995594, "grad_norm": 4.0491760740887495, "learning_rate": 3.913837443196549e-07, "loss": 1.0898, "step": 5289 }, { "epoch": 0.7171422761472243, "grad_norm": 4.767875320918422, "learning_rate": 3.9103536257280343e-07, "loss": 1.112, "step": 5290 }, { "epoch": 0.7172778417948892, "grad_norm": 4.346371582886906, "learning_rate": 3.9068709825997534e-07, "loss": 1.1117, "step": 5291 }, { "epoch": 0.7174134074425541, "grad_norm": 5.25137868362922, "learning_rate": 3.903389514483308e-07, "loss": 1.0644, "step": 5292 }, { "epoch": 0.7175489730902189, "grad_norm": 3.9778112628816658, "learning_rate": 3.899909222050071e-07, "loss": 1.0657, "step": 5293 }, { "epoch": 0.7176845387378838, "grad_norm": 5.3738096807901705, "learning_rate": 3.896430105971188e-07, "loss": 1.0956, "step": 5294 }, { "epoch": 0.7178201043855487, "grad_norm": 6.316014797888191, "learning_rate": 3.8929521669175813e-07, "loss": 1.119, "step": 5295 }, { "epoch": 0.7179556700332136, "grad_norm": 4.522692598099753, "learning_rate": 3.889475405559943e-07, "loss": 1.1035, "step": 5296 }, { "epoch": 0.7180912356808785, "grad_norm": 5.137780842468076, "learning_rate": 3.88599982256874e-07, "loss": 1.1317, "step": 5297 }, { "epoch": 0.7182268013285433, "grad_norm": 5.797489627540523, "learning_rate": 3.8825254186142097e-07, "loss": 1.1227, "step": 5298 }, { "epoch": 0.7183623669762083, "grad_norm": 4.380168710395426, "learning_rate": 3.8790521943663633e-07, "loss": 1.0952, "step": 5299 }, { "epoch": 0.7184979326238731, "grad_norm": 4.638834875895617, "learning_rate": 3.875580150494986e-07, "loss": 1.0934, "step": 5300 }, { "epoch": 0.718633498271538, "grad_norm": 9.249311268512464, "learning_rate": 3.8721092876696373e-07, "loss": 1.1023, "step": 5301 }, { "epoch": 0.7187690639192029, "grad_norm": 4.687488743797402, "learning_rate": 3.868639606559635e-07, "loss": 1.0978, "step": 5302 }, { "epoch": 0.7189046295668677, "grad_norm": 5.586627777147738, "learning_rate": 3.8651711078340923e-07, "loss": 1.1006, "step": 5303 }, { "epoch": 0.7190401952145327, "grad_norm": 3.4890343138570863, "learning_rate": 3.86170379216187e-07, "loss": 1.0688, "step": 5304 }, { "epoch": 0.7191757608621975, "grad_norm": 8.368985526526258, "learning_rate": 3.8582376602116254e-07, "loss": 1.0988, "step": 5305 }, { "epoch": 0.7193113265098624, "grad_norm": 13.894134179078435, "learning_rate": 3.854772712651765e-07, "loss": 1.1204, "step": 5306 }, { "epoch": 0.7194468921575273, "grad_norm": 8.446856835204974, "learning_rate": 3.8513089501504783e-07, "loss": 1.0854, "step": 5307 }, { "epoch": 0.7195824578051921, "grad_norm": 4.191425993312543, "learning_rate": 3.847846373375726e-07, "loss": 1.0841, "step": 5308 }, { "epoch": 0.7197180234528571, "grad_norm": 5.684148282528054, "learning_rate": 3.844384982995239e-07, "loss": 1.1441, "step": 5309 }, { "epoch": 0.7198535891005219, "grad_norm": 6.619750530575187, "learning_rate": 3.8409247796765185e-07, "loss": 1.1241, "step": 5310 }, { "epoch": 0.7199891547481868, "grad_norm": 7.61431228563347, "learning_rate": 3.837465764086837e-07, "loss": 1.0621, "step": 5311 }, { "epoch": 0.7201247203958517, "grad_norm": 3.660866896239188, "learning_rate": 3.83400793689324e-07, "loss": 1.0658, "step": 5312 }, { "epoch": 0.7202602860435166, "grad_norm": 6.7822281767014125, "learning_rate": 3.83055129876254e-07, "loss": 1.1394, "step": 5313 }, { "epoch": 0.7203958516911815, "grad_norm": 6.264358165688883, "learning_rate": 3.8270958503613225e-07, "loss": 1.0895, "step": 5314 }, { "epoch": 0.7205314173388463, "grad_norm": 4.7625392722719315, "learning_rate": 3.8236415923559463e-07, "loss": 1.1087, "step": 5315 }, { "epoch": 0.7206669829865112, "grad_norm": 4.48972338117157, "learning_rate": 3.820188525412538e-07, "loss": 1.0952, "step": 5316 }, { "epoch": 0.7208025486341761, "grad_norm": 6.199853854040211, "learning_rate": 3.8167366501969855e-07, "loss": 1.0897, "step": 5317 }, { "epoch": 0.720938114281841, "grad_norm": 5.67351441568103, "learning_rate": 3.8132859673749685e-07, "loss": 1.0981, "step": 5318 }, { "epoch": 0.7210736799295059, "grad_norm": 6.264453437176862, "learning_rate": 3.809836477611912e-07, "loss": 1.0677, "step": 5319 }, { "epoch": 0.7212092455771707, "grad_norm": 4.810234019245673, "learning_rate": 3.806388181573035e-07, "loss": 1.0593, "step": 5320 }, { "epoch": 0.7213448112248356, "grad_norm": 6.651178489636552, "learning_rate": 3.8029410799233006e-07, "loss": 1.1214, "step": 5321 }, { "epoch": 0.7214803768725006, "grad_norm": 5.748916216384821, "learning_rate": 3.7994951733274695e-07, "loss": 1.1028, "step": 5322 }, { "epoch": 0.7216159425201654, "grad_norm": 5.104315834370597, "learning_rate": 3.7960504624500436e-07, "loss": 1.122, "step": 5323 }, { "epoch": 0.7217515081678303, "grad_norm": 9.86939366447058, "learning_rate": 3.792606947955321e-07, "loss": 1.1218, "step": 5324 }, { "epoch": 0.7218870738154951, "grad_norm": 6.2392636066722185, "learning_rate": 3.7891646305073456e-07, "loss": 1.1075, "step": 5325 }, { "epoch": 0.72202263946316, "grad_norm": 5.688370619211231, "learning_rate": 3.78572351076995e-07, "loss": 1.0713, "step": 5326 }, { "epoch": 0.722158205110825, "grad_norm": 4.514511345386917, "learning_rate": 3.7822835894067185e-07, "loss": 1.0774, "step": 5327 }, { "epoch": 0.7222937707584898, "grad_norm": 4.430327214943794, "learning_rate": 3.7788448670810225e-07, "loss": 1.1099, "step": 5328 }, { "epoch": 0.7224293364061547, "grad_norm": 3.7845034464406013, "learning_rate": 3.775407344455984e-07, "loss": 1.1225, "step": 5329 }, { "epoch": 0.7225649020538195, "grad_norm": 6.596334650381962, "learning_rate": 3.7719710221945055e-07, "loss": 1.1187, "step": 5330 }, { "epoch": 0.7227004677014844, "grad_norm": 12.791482884808062, "learning_rate": 3.768535900959253e-07, "loss": 1.0746, "step": 5331 }, { "epoch": 0.7228360333491494, "grad_norm": 10.97089253569415, "learning_rate": 3.765101981412665e-07, "loss": 1.1143, "step": 5332 }, { "epoch": 0.7229715989968142, "grad_norm": 7.2084670082196265, "learning_rate": 3.7616692642169443e-07, "loss": 1.0764, "step": 5333 }, { "epoch": 0.7231071646444791, "grad_norm": 6.906716451219071, "learning_rate": 3.7582377500340636e-07, "loss": 1.1179, "step": 5334 }, { "epoch": 0.7232427302921439, "grad_norm": 4.110346679645253, "learning_rate": 3.7548074395257634e-07, "loss": 1.0971, "step": 5335 }, { "epoch": 0.7233782959398088, "grad_norm": 4.35361972056506, "learning_rate": 3.751378333353552e-07, "loss": 1.0843, "step": 5336 }, { "epoch": 0.7235138615874738, "grad_norm": 5.021349525382846, "learning_rate": 3.747950432178706e-07, "loss": 1.1109, "step": 5337 }, { "epoch": 0.7236494272351386, "grad_norm": 3.8493858674963617, "learning_rate": 3.744523736662267e-07, "loss": 1.0997, "step": 5338 }, { "epoch": 0.7237849928828035, "grad_norm": 4.760276872293655, "learning_rate": 3.7410982474650486e-07, "loss": 1.1245, "step": 5339 }, { "epoch": 0.7239205585304683, "grad_norm": 4.191348060377504, "learning_rate": 3.7376739652476287e-07, "loss": 1.1197, "step": 5340 }, { "epoch": 0.7240561241781333, "grad_norm": 5.073314015028963, "learning_rate": 3.734250890670352e-07, "loss": 1.0995, "step": 5341 }, { "epoch": 0.7241916898257982, "grad_norm": 5.014263314699319, "learning_rate": 3.730829024393333e-07, "loss": 1.0951, "step": 5342 }, { "epoch": 0.724327255473463, "grad_norm": 6.0547984979770355, "learning_rate": 3.727408367076453e-07, "loss": 1.1088, "step": 5343 }, { "epoch": 0.7244628211211279, "grad_norm": 5.984356166296751, "learning_rate": 3.723988919379354e-07, "loss": 1.0823, "step": 5344 }, { "epoch": 0.7245983867687927, "grad_norm": 5.513321026609987, "learning_rate": 3.7205706819614527e-07, "loss": 1.088, "step": 5345 }, { "epoch": 0.7247339524164577, "grad_norm": 5.270918955378279, "learning_rate": 3.717153655481927e-07, "loss": 1.117, "step": 5346 }, { "epoch": 0.7248695180641226, "grad_norm": 6.499610403644463, "learning_rate": 3.7137378405997267e-07, "loss": 1.0911, "step": 5347 }, { "epoch": 0.7250050837117874, "grad_norm": 6.3006540164925955, "learning_rate": 3.710323237973563e-07, "loss": 1.09, "step": 5348 }, { "epoch": 0.7251406493594523, "grad_norm": 5.6772287545390805, "learning_rate": 3.7069098482619145e-07, "loss": 1.108, "step": 5349 }, { "epoch": 0.7252762150071171, "grad_norm": 4.136708119138534, "learning_rate": 3.703497672123026e-07, "loss": 1.1255, "step": 5350 }, { "epoch": 0.7254117806547821, "grad_norm": 3.7106648947450847, "learning_rate": 3.7000867102149114e-07, "loss": 1.0915, "step": 5351 }, { "epoch": 0.725547346302447, "grad_norm": 5.220098078801597, "learning_rate": 3.6966769631953466e-07, "loss": 1.0481, "step": 5352 }, { "epoch": 0.7256829119501118, "grad_norm": 4.094611914855287, "learning_rate": 3.693268431721873e-07, "loss": 1.1555, "step": 5353 }, { "epoch": 0.7258184775977767, "grad_norm": 4.392992063313893, "learning_rate": 3.6898611164518e-07, "loss": 1.144, "step": 5354 }, { "epoch": 0.7259540432454417, "grad_norm": 3.7972305045821773, "learning_rate": 3.6864550180422014e-07, "loss": 1.1078, "step": 5355 }, { "epoch": 0.7260896088931065, "grad_norm": 16.55426675335947, "learning_rate": 3.683050137149918e-07, "loss": 1.118, "step": 5356 }, { "epoch": 0.7262251745407714, "grad_norm": 4.1315664936604435, "learning_rate": 3.6796464744315545e-07, "loss": 1.0974, "step": 5357 }, { "epoch": 0.7263607401884362, "grad_norm": 6.137062671525643, "learning_rate": 3.6762440305434726e-07, "loss": 1.1279, "step": 5358 }, { "epoch": 0.7264963058361011, "grad_norm": 5.210577749109681, "learning_rate": 3.6728428061418195e-07, "loss": 1.0786, "step": 5359 }, { "epoch": 0.7266318714837661, "grad_norm": 6.299695622134705, "learning_rate": 3.66944280188248e-07, "loss": 1.1181, "step": 5360 }, { "epoch": 0.7267674371314309, "grad_norm": 4.753039502859897, "learning_rate": 3.6660440184211326e-07, "loss": 1.0901, "step": 5361 }, { "epoch": 0.7269030027790958, "grad_norm": 5.539823863477366, "learning_rate": 3.662646456413193e-07, "loss": 1.1345, "step": 5362 }, { "epoch": 0.7270385684267606, "grad_norm": 6.563245313452282, "learning_rate": 3.6592501165138666e-07, "loss": 1.1082, "step": 5363 }, { "epoch": 0.7271741340744255, "grad_norm": 4.096686389042165, "learning_rate": 3.6558549993780985e-07, "loss": 1.1177, "step": 5364 }, { "epoch": 0.7273096997220905, "grad_norm": 5.117120172649726, "learning_rate": 3.6524611056606226e-07, "loss": 1.108, "step": 5365 }, { "epoch": 0.7274452653697553, "grad_norm": 4.188703940410847, "learning_rate": 3.6490684360159106e-07, "loss": 1.0816, "step": 5366 }, { "epoch": 0.7275808310174202, "grad_norm": 5.717297250728447, "learning_rate": 3.6456769910982264e-07, "loss": 1.0877, "step": 5367 }, { "epoch": 0.727716396665085, "grad_norm": 11.267058368331279, "learning_rate": 3.6422867715615703e-07, "loss": 1.1343, "step": 5368 }, { "epoch": 0.72785196231275, "grad_norm": 5.128890375459652, "learning_rate": 3.638897778059732e-07, "loss": 1.1343, "step": 5369 }, { "epoch": 0.7279875279604149, "grad_norm": 3.9741786346751695, "learning_rate": 3.6355100112462425e-07, "loss": 1.1208, "step": 5370 }, { "epoch": 0.7281230936080797, "grad_norm": 5.1784528987866105, "learning_rate": 3.632123471774409e-07, "loss": 1.1137, "step": 5371 }, { "epoch": 0.7282586592557446, "grad_norm": 4.7901299331684015, "learning_rate": 3.628738160297299e-07, "loss": 1.0492, "step": 5372 }, { "epoch": 0.7283942249034094, "grad_norm": 3.8223188737596434, "learning_rate": 3.625354077467743e-07, "loss": 1.1165, "step": 5373 }, { "epoch": 0.7285297905510744, "grad_norm": 4.578286111248853, "learning_rate": 3.6219712239383336e-07, "loss": 1.0773, "step": 5374 }, { "epoch": 0.7286653561987393, "grad_norm": 4.689792571061697, "learning_rate": 3.6185896003614303e-07, "loss": 1.0996, "step": 5375 }, { "epoch": 0.7288009218464041, "grad_norm": 4.825855762968874, "learning_rate": 3.6152092073891504e-07, "loss": 1.0537, "step": 5376 }, { "epoch": 0.728936487494069, "grad_norm": 5.7319382878908325, "learning_rate": 3.6118300456733764e-07, "loss": 1.1541, "step": 5377 }, { "epoch": 0.7290720531417338, "grad_norm": 4.052827204770296, "learning_rate": 3.6084521158657555e-07, "loss": 1.075, "step": 5378 }, { "epoch": 0.7292076187893988, "grad_norm": 8.604054452782439, "learning_rate": 3.605075418617687e-07, "loss": 1.1159, "step": 5379 }, { "epoch": 0.7293431844370637, "grad_norm": 5.854284545531547, "learning_rate": 3.6016999545803504e-07, "loss": 1.1132, "step": 5380 }, { "epoch": 0.7294787500847285, "grad_norm": 8.124025325551854, "learning_rate": 3.5983257244046674e-07, "loss": 1.1011, "step": 5381 }, { "epoch": 0.7296143157323934, "grad_norm": 5.997972700673159, "learning_rate": 3.594952728741343e-07, "loss": 1.1169, "step": 5382 }, { "epoch": 0.7297498813800583, "grad_norm": 5.022019564888029, "learning_rate": 3.591580968240819e-07, "loss": 1.1305, "step": 5383 }, { "epoch": 0.7298854470277232, "grad_norm": 3.721925912421601, "learning_rate": 3.5882104435533276e-07, "loss": 1.1257, "step": 5384 }, { "epoch": 0.7300210126753881, "grad_norm": 4.468499082491654, "learning_rate": 3.584841155328837e-07, "loss": 1.0746, "step": 5385 }, { "epoch": 0.7301565783230529, "grad_norm": 4.980048002048528, "learning_rate": 3.581473104217092e-07, "loss": 1.0986, "step": 5386 }, { "epoch": 0.7302921439707178, "grad_norm": 6.218680582472385, "learning_rate": 3.578106290867593e-07, "loss": 1.0856, "step": 5387 }, { "epoch": 0.7304277096183827, "grad_norm": 3.81439945368283, "learning_rate": 3.5747407159296063e-07, "loss": 1.0862, "step": 5388 }, { "epoch": 0.7305632752660476, "grad_norm": 3.9786051697231697, "learning_rate": 3.571376380052152e-07, "loss": 1.0806, "step": 5389 }, { "epoch": 0.7306988409137125, "grad_norm": 4.133680289207226, "learning_rate": 3.5680132838840205e-07, "loss": 1.0708, "step": 5390 }, { "epoch": 0.7308344065613773, "grad_norm": 5.233899968796697, "learning_rate": 3.564651428073755e-07, "loss": 1.111, "step": 5391 }, { "epoch": 0.7309699722090423, "grad_norm": 12.109581089010186, "learning_rate": 3.561290813269665e-07, "loss": 1.0725, "step": 5392 }, { "epoch": 0.7311055378567071, "grad_norm": 5.215164218533097, "learning_rate": 3.5579314401198166e-07, "loss": 1.1377, "step": 5393 }, { "epoch": 0.731241103504372, "grad_norm": 3.868171981218734, "learning_rate": 3.5545733092720396e-07, "loss": 1.0939, "step": 5394 }, { "epoch": 0.7313766691520369, "grad_norm": 4.220158791398874, "learning_rate": 3.551216421373924e-07, "loss": 1.1034, "step": 5395 }, { "epoch": 0.7315122347997017, "grad_norm": 4.739976033510125, "learning_rate": 3.5478607770728164e-07, "loss": 1.1233, "step": 5396 }, { "epoch": 0.7316478004473667, "grad_norm": 8.091693010546274, "learning_rate": 3.544506377015829e-07, "loss": 1.1067, "step": 5397 }, { "epoch": 0.7317833660950315, "grad_norm": 4.422402257681696, "learning_rate": 3.5411532218498296e-07, "loss": 1.0484, "step": 5398 }, { "epoch": 0.7319189317426964, "grad_norm": 13.363563958665441, "learning_rate": 3.537801312221448e-07, "loss": 1.0804, "step": 5399 }, { "epoch": 0.7320544973903613, "grad_norm": 6.115143634013468, "learning_rate": 3.5344506487770774e-07, "loss": 1.0831, "step": 5400 }, { "epoch": 0.7321900630380261, "grad_norm": 4.312382249933636, "learning_rate": 3.5311012321628577e-07, "loss": 1.1171, "step": 5401 }, { "epoch": 0.7323256286856911, "grad_norm": 3.8265674149721125, "learning_rate": 3.527753063024708e-07, "loss": 1.0944, "step": 5402 }, { "epoch": 0.7324611943333559, "grad_norm": 4.410742655786234, "learning_rate": 3.524406142008285e-07, "loss": 1.0807, "step": 5403 }, { "epoch": 0.7325967599810208, "grad_norm": 3.5654161535602324, "learning_rate": 3.5210604697590297e-07, "loss": 1.0904, "step": 5404 }, { "epoch": 0.7327323256286857, "grad_norm": 6.3811554467931675, "learning_rate": 3.5177160469221176e-07, "loss": 1.1359, "step": 5405 }, { "epoch": 0.7328678912763505, "grad_norm": 4.9775729753388145, "learning_rate": 3.514372874142497e-07, "loss": 1.1529, "step": 5406 }, { "epoch": 0.7330034569240155, "grad_norm": 4.513304285280345, "learning_rate": 3.511030952064874e-07, "loss": 1.1241, "step": 5407 }, { "epoch": 0.7331390225716803, "grad_norm": 5.304189145899167, "learning_rate": 3.507690281333712e-07, "loss": 1.0974, "step": 5408 }, { "epoch": 0.7332745882193452, "grad_norm": 4.275059110168994, "learning_rate": 3.504350862593231e-07, "loss": 1.0934, "step": 5409 }, { "epoch": 0.7334101538670101, "grad_norm": 5.087130394173443, "learning_rate": 3.501012696487412e-07, "loss": 1.0879, "step": 5410 }, { "epoch": 0.733545719514675, "grad_norm": 5.09899250789194, "learning_rate": 3.497675783659995e-07, "loss": 1.0991, "step": 5411 }, { "epoch": 0.7336812851623399, "grad_norm": 4.171580501571318, "learning_rate": 3.4943401247544766e-07, "loss": 1.1008, "step": 5412 }, { "epoch": 0.7338168508100047, "grad_norm": 4.24723678147912, "learning_rate": 3.491005720414113e-07, "loss": 1.082, "step": 5413 }, { "epoch": 0.7339524164576696, "grad_norm": 6.077690148319606, "learning_rate": 3.487672571281918e-07, "loss": 1.0736, "step": 5414 }, { "epoch": 0.7340879821053345, "grad_norm": 4.505331007462943, "learning_rate": 3.4843406780006644e-07, "loss": 1.0911, "step": 5415 }, { "epoch": 0.7342235477529994, "grad_norm": 7.687067613787282, "learning_rate": 3.481010041212874e-07, "loss": 1.1026, "step": 5416 }, { "epoch": 0.7343591134006643, "grad_norm": 5.279119601835294, "learning_rate": 3.477680661560846e-07, "loss": 1.1116, "step": 5417 }, { "epoch": 0.7344946790483291, "grad_norm": 7.541421073515527, "learning_rate": 3.4743525396866114e-07, "loss": 1.1458, "step": 5418 }, { "epoch": 0.734630244695994, "grad_norm": 4.3466462505550405, "learning_rate": 3.471025676231986e-07, "loss": 1.1099, "step": 5419 }, { "epoch": 0.734765810343659, "grad_norm": 5.326584774438117, "learning_rate": 3.467700071838515e-07, "loss": 1.1555, "step": 5420 }, { "epoch": 0.7349013759913238, "grad_norm": 5.317012490474797, "learning_rate": 3.4643757271475293e-07, "loss": 1.0905, "step": 5421 }, { "epoch": 0.7350369416389887, "grad_norm": 4.3780302136869675, "learning_rate": 3.4610526428000897e-07, "loss": 1.1038, "step": 5422 }, { "epoch": 0.7351725072866535, "grad_norm": 8.105226102491947, "learning_rate": 3.457730819437038e-07, "loss": 1.1224, "step": 5423 }, { "epoch": 0.7353080729343184, "grad_norm": 5.178813189193366, "learning_rate": 3.454410257698951e-07, "loss": 1.0769, "step": 5424 }, { "epoch": 0.7354436385819834, "grad_norm": 5.450389913464041, "learning_rate": 3.451090958226184e-07, "loss": 1.1097, "step": 5425 }, { "epoch": 0.7355792042296482, "grad_norm": 4.921151971819644, "learning_rate": 3.447772921658825e-07, "loss": 1.0976, "step": 5426 }, { "epoch": 0.7357147698773131, "grad_norm": 4.933760834701751, "learning_rate": 3.444456148636744e-07, "loss": 1.1101, "step": 5427 }, { "epoch": 0.7358503355249779, "grad_norm": 9.254767312639387, "learning_rate": 3.441140639799546e-07, "loss": 1.0944, "step": 5428 }, { "epoch": 0.7359859011726428, "grad_norm": 11.84818420604152, "learning_rate": 3.4378263957866026e-07, "loss": 1.0842, "step": 5429 }, { "epoch": 0.7361214668203078, "grad_norm": 3.618047374929492, "learning_rate": 3.4345134172370407e-07, "loss": 1.0662, "step": 5430 }, { "epoch": 0.7362570324679726, "grad_norm": 3.5488229187163856, "learning_rate": 3.431201704789741e-07, "loss": 1.1262, "step": 5431 }, { "epoch": 0.7363925981156375, "grad_norm": 8.085457041905277, "learning_rate": 3.427891259083342e-07, "loss": 1.0767, "step": 5432 }, { "epoch": 0.7365281637633023, "grad_norm": 5.207308709300639, "learning_rate": 3.4245820807562365e-07, "loss": 1.0899, "step": 5433 }, { "epoch": 0.7366637294109672, "grad_norm": 5.59747554312352, "learning_rate": 3.4212741704465733e-07, "loss": 1.1096, "step": 5434 }, { "epoch": 0.7367992950586322, "grad_norm": 12.032930222731144, "learning_rate": 3.4179675287922573e-07, "loss": 1.0969, "step": 5435 }, { "epoch": 0.736934860706297, "grad_norm": 16.949517423223302, "learning_rate": 3.4146621564309476e-07, "loss": 1.0999, "step": 5436 }, { "epoch": 0.7370704263539619, "grad_norm": 13.163961427679732, "learning_rate": 3.41135805400006e-07, "loss": 1.1315, "step": 5437 }, { "epoch": 0.7372059920016268, "grad_norm": 4.66944791147922, "learning_rate": 3.408055222136763e-07, "loss": 1.099, "step": 5438 }, { "epoch": 0.7373415576492917, "grad_norm": 6.4533895548175995, "learning_rate": 3.4047536614779837e-07, "loss": 1.1245, "step": 5439 }, { "epoch": 0.7374771232969566, "grad_norm": 6.478322991156309, "learning_rate": 3.4014533726604046e-07, "loss": 1.1055, "step": 5440 }, { "epoch": 0.7376126889446214, "grad_norm": 10.664533382688036, "learning_rate": 3.398154356320454e-07, "loss": 1.0715, "step": 5441 }, { "epoch": 0.7377482545922863, "grad_norm": 7.364506910096827, "learning_rate": 3.394856613094322e-07, "loss": 1.0512, "step": 5442 }, { "epoch": 0.7378838202399512, "grad_norm": 6.0268945182989455, "learning_rate": 3.3915601436179564e-07, "loss": 1.0553, "step": 5443 }, { "epoch": 0.7380193858876161, "grad_norm": 5.179824443292803, "learning_rate": 3.388264948527052e-07, "loss": 1.099, "step": 5444 }, { "epoch": 0.738154951535281, "grad_norm": 5.299433578923633, "learning_rate": 3.384971028457063e-07, "loss": 1.1276, "step": 5445 }, { "epoch": 0.7382905171829458, "grad_norm": 3.84297622645784, "learning_rate": 3.381678384043195e-07, "loss": 1.0967, "step": 5446 }, { "epoch": 0.7384260828306107, "grad_norm": 5.141689799588693, "learning_rate": 3.378387015920409e-07, "loss": 1.0843, "step": 5447 }, { "epoch": 0.7385616484782757, "grad_norm": 4.683393571288546, "learning_rate": 3.3750969247234184e-07, "loss": 1.1304, "step": 5448 }, { "epoch": 0.7386972141259405, "grad_norm": 3.6159619005206656, "learning_rate": 3.371808111086694e-07, "loss": 1.0854, "step": 5449 }, { "epoch": 0.7388327797736054, "grad_norm": 3.547309807555913, "learning_rate": 3.3685205756444534e-07, "loss": 1.0637, "step": 5450 }, { "epoch": 0.7389683454212702, "grad_norm": 7.24677694912382, "learning_rate": 3.365234319030675e-07, "loss": 1.0892, "step": 5451 }, { "epoch": 0.7391039110689351, "grad_norm": 5.458342775372177, "learning_rate": 3.361949341879087e-07, "loss": 1.0939, "step": 5452 }, { "epoch": 0.7392394767166001, "grad_norm": 5.370221899868765, "learning_rate": 3.35866564482317e-07, "loss": 1.1077, "step": 5453 }, { "epoch": 0.7393750423642649, "grad_norm": 5.657508042915505, "learning_rate": 3.3553832284961603e-07, "loss": 1.0968, "step": 5454 }, { "epoch": 0.7395106080119298, "grad_norm": 5.040866320254737, "learning_rate": 3.352102093531045e-07, "loss": 1.0959, "step": 5455 }, { "epoch": 0.7396461736595946, "grad_norm": 6.3423391053012885, "learning_rate": 3.348822240560569e-07, "loss": 1.1178, "step": 5456 }, { "epoch": 0.7397817393072595, "grad_norm": 4.886629134265705, "learning_rate": 3.345543670217217e-07, "loss": 1.0904, "step": 5457 }, { "epoch": 0.7399173049549245, "grad_norm": 5.151183312584639, "learning_rate": 3.3422663831332477e-07, "loss": 1.0863, "step": 5458 }, { "epoch": 0.7400528706025893, "grad_norm": 14.727422471553579, "learning_rate": 3.338990379940646e-07, "loss": 1.1194, "step": 5459 }, { "epoch": 0.7401884362502542, "grad_norm": 8.401442937326836, "learning_rate": 3.335715661271178e-07, "loss": 1.0782, "step": 5460 }, { "epoch": 0.740324001897919, "grad_norm": 4.099139475501099, "learning_rate": 3.3324422277563326e-07, "loss": 1.1142, "step": 5461 }, { "epoch": 0.740459567545584, "grad_norm": 5.070304841600822, "learning_rate": 3.32917008002738e-07, "loss": 1.0873, "step": 5462 }, { "epoch": 0.7405951331932489, "grad_norm": 5.240993413320609, "learning_rate": 3.3258992187153144e-07, "loss": 1.1016, "step": 5463 }, { "epoch": 0.7407306988409137, "grad_norm": 9.023636724853413, "learning_rate": 3.322629644450909e-07, "loss": 1.1021, "step": 5464 }, { "epoch": 0.7408662644885786, "grad_norm": 4.676542442888465, "learning_rate": 3.319361357864663e-07, "loss": 1.097, "step": 5465 }, { "epoch": 0.7410018301362434, "grad_norm": 4.870527541840197, "learning_rate": 3.316094359586852e-07, "loss": 1.0776, "step": 5466 }, { "epoch": 0.7411373957839084, "grad_norm": 5.822598721579187, "learning_rate": 3.3128286502474803e-07, "loss": 1.1166, "step": 5467 }, { "epoch": 0.7412729614315733, "grad_norm": 5.039048670567861, "learning_rate": 3.3095642304763183e-07, "loss": 1.1119, "step": 5468 }, { "epoch": 0.7414085270792381, "grad_norm": 13.672999838361216, "learning_rate": 3.306301100902883e-07, "loss": 1.0352, "step": 5469 }, { "epoch": 0.741544092726903, "grad_norm": 4.377018641763959, "learning_rate": 3.303039262156443e-07, "loss": 1.0377, "step": 5470 }, { "epoch": 0.7416796583745678, "grad_norm": 5.903854345646597, "learning_rate": 3.2997787148660195e-07, "loss": 1.0755, "step": 5471 }, { "epoch": 0.7418152240222328, "grad_norm": 4.712973229407215, "learning_rate": 3.296519459660383e-07, "loss": 1.1591, "step": 5472 }, { "epoch": 0.7419507896698977, "grad_norm": 6.923638878482709, "learning_rate": 3.293261497168054e-07, "loss": 1.0977, "step": 5473 }, { "epoch": 0.7420863553175625, "grad_norm": 8.550545959431238, "learning_rate": 3.2900048280173055e-07, "loss": 1.0777, "step": 5474 }, { "epoch": 0.7422219209652274, "grad_norm": 5.607459207825113, "learning_rate": 3.2867494528361605e-07, "loss": 1.1071, "step": 5475 }, { "epoch": 0.7423574866128922, "grad_norm": 5.957565249805913, "learning_rate": 3.2834953722523915e-07, "loss": 1.0877, "step": 5476 }, { "epoch": 0.7424930522605572, "grad_norm": 4.792200955057543, "learning_rate": 3.2802425868935277e-07, "loss": 1.0799, "step": 5477 }, { "epoch": 0.7426286179082221, "grad_norm": 4.730710597143724, "learning_rate": 3.276991097386831e-07, "loss": 1.1117, "step": 5478 }, { "epoch": 0.7427641835558869, "grad_norm": 5.611814990702528, "learning_rate": 3.27374090435934e-07, "loss": 1.1261, "step": 5479 }, { "epoch": 0.7428997492035518, "grad_norm": 15.205880481624728, "learning_rate": 3.270492008437815e-07, "loss": 1.056, "step": 5480 }, { "epoch": 0.7430353148512167, "grad_norm": 5.807312218754133, "learning_rate": 3.267244410248794e-07, "loss": 1.0552, "step": 5481 }, { "epoch": 0.7431708804988816, "grad_norm": 3.7085531654592354, "learning_rate": 3.2639981104185355e-07, "loss": 1.0947, "step": 5482 }, { "epoch": 0.7433064461465465, "grad_norm": 6.267155149861075, "learning_rate": 3.260753109573078e-07, "loss": 1.1114, "step": 5483 }, { "epoch": 0.7434420117942113, "grad_norm": 15.550128864074377, "learning_rate": 3.2575094083381837e-07, "loss": 1.1294, "step": 5484 }, { "epoch": 0.7435775774418762, "grad_norm": 7.412054887646936, "learning_rate": 3.2542670073393776e-07, "loss": 1.069, "step": 5485 }, { "epoch": 0.7437131430895411, "grad_norm": 4.3394443238911835, "learning_rate": 3.251025907201932e-07, "loss": 1.0776, "step": 5486 }, { "epoch": 0.743848708737206, "grad_norm": 27.65059403818702, "learning_rate": 3.247786108550866e-07, "loss": 1.127, "step": 5487 }, { "epoch": 0.7439842743848709, "grad_norm": 4.376967819837711, "learning_rate": 3.244547612010952e-07, "loss": 1.113, "step": 5488 }, { "epoch": 0.7441198400325357, "grad_norm": 4.974266958371762, "learning_rate": 3.241310418206705e-07, "loss": 1.1166, "step": 5489 }, { "epoch": 0.7442554056802007, "grad_norm": 3.930747427294983, "learning_rate": 3.238074527762394e-07, "loss": 1.0846, "step": 5490 }, { "epoch": 0.7443909713278655, "grad_norm": 4.915101066555862, "learning_rate": 3.2348399413020365e-07, "loss": 1.0899, "step": 5491 }, { "epoch": 0.7445265369755304, "grad_norm": 3.4133218127909157, "learning_rate": 3.231606659449394e-07, "loss": 1.1301, "step": 5492 }, { "epoch": 0.7446621026231953, "grad_norm": 6.199223309948796, "learning_rate": 3.228374682827982e-07, "loss": 1.0921, "step": 5493 }, { "epoch": 0.7447976682708601, "grad_norm": 22.404317984944903, "learning_rate": 3.2251440120610596e-07, "loss": 1.1076, "step": 5494 }, { "epoch": 0.7449332339185251, "grad_norm": 5.526049278641512, "learning_rate": 3.2219146477716376e-07, "loss": 1.0997, "step": 5495 }, { "epoch": 0.7450687995661899, "grad_norm": 6.68787289537586, "learning_rate": 3.2186865905824724e-07, "loss": 1.0824, "step": 5496 }, { "epoch": 0.7452043652138548, "grad_norm": 6.104923270398986, "learning_rate": 3.215459841116073e-07, "loss": 1.0894, "step": 5497 }, { "epoch": 0.7453399308615197, "grad_norm": 4.4148550806967215, "learning_rate": 3.212234399994682e-07, "loss": 1.0883, "step": 5498 }, { "epoch": 0.7454754965091845, "grad_norm": 9.046819842421339, "learning_rate": 3.209010267840315e-07, "loss": 1.1402, "step": 5499 }, { "epoch": 0.7456110621568495, "grad_norm": 6.5627717330077475, "learning_rate": 3.205787445274707e-07, "loss": 1.1366, "step": 5500 }, { "epoch": 0.7457466278045143, "grad_norm": 5.18117925265423, "learning_rate": 3.2025659329193654e-07, "loss": 1.1195, "step": 5501 }, { "epoch": 0.7458821934521792, "grad_norm": 7.591916607057478, "learning_rate": 3.1993457313955217e-07, "loss": 1.1219, "step": 5502 }, { "epoch": 0.7460177590998441, "grad_norm": 5.006076429222606, "learning_rate": 3.19612684132418e-07, "loss": 1.0825, "step": 5503 }, { "epoch": 0.746153324747509, "grad_norm": 4.611180831770958, "learning_rate": 3.1929092633260667e-07, "loss": 1.0617, "step": 5504 }, { "epoch": 0.7462888903951739, "grad_norm": 6.595041688877827, "learning_rate": 3.1896929980216704e-07, "loss": 1.0866, "step": 5505 }, { "epoch": 0.7464244560428387, "grad_norm": 4.8806373373890155, "learning_rate": 3.186478046031221e-07, "loss": 1.1066, "step": 5506 }, { "epoch": 0.7465600216905036, "grad_norm": 4.840760870767363, "learning_rate": 3.1832644079746984e-07, "loss": 1.1349, "step": 5507 }, { "epoch": 0.7466955873381685, "grad_norm": 4.958631373449598, "learning_rate": 3.180052084471827e-07, "loss": 1.1085, "step": 5508 }, { "epoch": 0.7468311529858334, "grad_norm": 6.424524353750389, "learning_rate": 3.176841076142077e-07, "loss": 1.1304, "step": 5509 }, { "epoch": 0.7469667186334983, "grad_norm": 16.06690655833677, "learning_rate": 3.173631383604667e-07, "loss": 1.0939, "step": 5510 }, { "epoch": 0.7471022842811631, "grad_norm": 4.158815467308142, "learning_rate": 3.170423007478561e-07, "loss": 1.0623, "step": 5511 }, { "epoch": 0.747237849928828, "grad_norm": 10.170454877667575, "learning_rate": 3.167215948382471e-07, "loss": 1.0947, "step": 5512 }, { "epoch": 0.747373415576493, "grad_norm": 5.161078404319699, "learning_rate": 3.164010206934845e-07, "loss": 1.1002, "step": 5513 }, { "epoch": 0.7475089812241578, "grad_norm": 10.602114533653078, "learning_rate": 3.160805783753897e-07, "loss": 1.0949, "step": 5514 }, { "epoch": 0.7476445468718227, "grad_norm": 5.400823870422428, "learning_rate": 3.1576026794575615e-07, "loss": 1.1201, "step": 5515 }, { "epoch": 0.7477801125194876, "grad_norm": 5.758415079363835, "learning_rate": 3.154400894663546e-07, "loss": 1.1368, "step": 5516 }, { "epoch": 0.7479156781671524, "grad_norm": 4.476417582684305, "learning_rate": 3.1512004299892747e-07, "loss": 1.1107, "step": 5517 }, { "epoch": 0.7480512438148174, "grad_norm": 22.44512558919976, "learning_rate": 3.1480012860519453e-07, "loss": 1.1011, "step": 5518 }, { "epoch": 0.7481868094624822, "grad_norm": 4.395771333749116, "learning_rate": 3.1448034634684764e-07, "loss": 1.0978, "step": 5519 }, { "epoch": 0.7483223751101471, "grad_norm": 5.33300235321106, "learning_rate": 3.141606962855553e-07, "loss": 1.1125, "step": 5520 }, { "epoch": 0.748457940757812, "grad_norm": 4.719978793289064, "learning_rate": 3.1384117848295843e-07, "loss": 1.0847, "step": 5521 }, { "epoch": 0.7485935064054768, "grad_norm": 4.194168032658333, "learning_rate": 3.135217930006747e-07, "loss": 1.1263, "step": 5522 }, { "epoch": 0.7487290720531418, "grad_norm": 6.981050954609606, "learning_rate": 3.1320253990029387e-07, "loss": 1.1509, "step": 5523 }, { "epoch": 0.7488646377008066, "grad_norm": 4.481140097427943, "learning_rate": 3.128834192433826e-07, "loss": 1.0792, "step": 5524 }, { "epoch": 0.7490002033484715, "grad_norm": 5.953509271985122, "learning_rate": 3.125644310914798e-07, "loss": 1.0954, "step": 5525 }, { "epoch": 0.7491357689961364, "grad_norm": 7.7174942566080444, "learning_rate": 3.122455755061002e-07, "loss": 1.1036, "step": 5526 }, { "epoch": 0.7492713346438012, "grad_norm": 4.5861998359203575, "learning_rate": 3.1192685254873254e-07, "loss": 1.0823, "step": 5527 }, { "epoch": 0.7494069002914662, "grad_norm": 5.209360967271628, "learning_rate": 3.1160826228084004e-07, "loss": 1.0737, "step": 5528 }, { "epoch": 0.749542465939131, "grad_norm": 4.322934238627817, "learning_rate": 3.1128980476386035e-07, "loss": 1.1102, "step": 5529 }, { "epoch": 0.7496780315867959, "grad_norm": 7.040032862033497, "learning_rate": 3.109714800592055e-07, "loss": 1.0972, "step": 5530 }, { "epoch": 0.7498135972344608, "grad_norm": 23.17000037954723, "learning_rate": 3.106532882282618e-07, "loss": 1.0849, "step": 5531 }, { "epoch": 0.7499491628821257, "grad_norm": 6.685832941448193, "learning_rate": 3.103352293323901e-07, "loss": 1.076, "step": 5532 }, { "epoch": 0.7500847285297906, "grad_norm": 4.967762992140464, "learning_rate": 3.1001730343292556e-07, "loss": 1.0958, "step": 5533 }, { "epoch": 0.7502202941774554, "grad_norm": 7.013636185072051, "learning_rate": 3.096995105911776e-07, "loss": 1.1204, "step": 5534 }, { "epoch": 0.7503558598251203, "grad_norm": 5.66264017472676, "learning_rate": 3.093818508684302e-07, "loss": 1.1038, "step": 5535 }, { "epoch": 0.7504914254727852, "grad_norm": 15.191794040020984, "learning_rate": 3.090643243259414e-07, "loss": 1.1194, "step": 5536 }, { "epoch": 0.7506269911204501, "grad_norm": 4.692945910401092, "learning_rate": 3.0874693102494374e-07, "loss": 1.1184, "step": 5537 }, { "epoch": 0.750762556768115, "grad_norm": 4.096692014852002, "learning_rate": 3.084296710266441e-07, "loss": 1.0613, "step": 5538 }, { "epoch": 0.7508981224157798, "grad_norm": 4.832771509272056, "learning_rate": 3.081125443922237e-07, "loss": 1.1277, "step": 5539 }, { "epoch": 0.7510336880634447, "grad_norm": 4.392662210067362, "learning_rate": 3.077955511828374e-07, "loss": 1.1008, "step": 5540 }, { "epoch": 0.7511692537111097, "grad_norm": 5.40247972673518, "learning_rate": 3.074786914596151e-07, "loss": 1.1204, "step": 5541 }, { "epoch": 0.7513048193587745, "grad_norm": 6.4055135692132845, "learning_rate": 3.071619652836608e-07, "loss": 1.0937, "step": 5542 }, { "epoch": 0.7514403850064394, "grad_norm": 6.939851287934574, "learning_rate": 3.068453727160525e-07, "loss": 1.1047, "step": 5543 }, { "epoch": 0.7515759506541042, "grad_norm": 5.036033436564992, "learning_rate": 3.065289138178426e-07, "loss": 1.0714, "step": 5544 }, { "epoch": 0.7517115163017691, "grad_norm": 8.116343352929269, "learning_rate": 3.062125886500578e-07, "loss": 1.1, "step": 5545 }, { "epoch": 0.7518470819494341, "grad_norm": 5.983965433025501, "learning_rate": 3.0589639727369886e-07, "loss": 1.1031, "step": 5546 }, { "epoch": 0.7519826475970989, "grad_norm": 21.974220681761803, "learning_rate": 3.0558033974974076e-07, "loss": 1.113, "step": 5547 }, { "epoch": 0.7521182132447638, "grad_norm": 5.3792667370761915, "learning_rate": 3.052644161391328e-07, "loss": 1.0873, "step": 5548 }, { "epoch": 0.7522537788924286, "grad_norm": 6.028053964910032, "learning_rate": 3.0494862650279816e-07, "loss": 1.116, "step": 5549 }, { "epoch": 0.7523893445400935, "grad_norm": 7.80482778913003, "learning_rate": 3.046329709016345e-07, "loss": 1.0888, "step": 5550 }, { "epoch": 0.7525249101877585, "grad_norm": 4.814848099197018, "learning_rate": 3.043174493965136e-07, "loss": 1.128, "step": 5551 }, { "epoch": 0.7526604758354233, "grad_norm": 5.548526163819058, "learning_rate": 3.040020620482812e-07, "loss": 1.0763, "step": 5552 }, { "epoch": 0.7527960414830882, "grad_norm": 5.4751273360458015, "learning_rate": 3.0368680891775755e-07, "loss": 1.1036, "step": 5553 }, { "epoch": 0.752931607130753, "grad_norm": 4.703884254321456, "learning_rate": 3.033716900657357e-07, "loss": 1.1228, "step": 5554 }, { "epoch": 0.753067172778418, "grad_norm": 4.368785569001915, "learning_rate": 3.0305670555298533e-07, "loss": 1.1113, "step": 5555 }, { "epoch": 0.7532027384260829, "grad_norm": 5.614967649507069, "learning_rate": 3.027418554402473e-07, "loss": 1.0757, "step": 5556 }, { "epoch": 0.7533383040737477, "grad_norm": 5.765405370786625, "learning_rate": 3.024271397882393e-07, "loss": 1.091, "step": 5557 }, { "epoch": 0.7534738697214126, "grad_norm": 6.78769679854762, "learning_rate": 3.021125586576504e-07, "loss": 1.1369, "step": 5558 }, { "epoch": 0.7536094353690774, "grad_norm": 4.655633253933488, "learning_rate": 3.017981121091464e-07, "loss": 1.0953, "step": 5559 }, { "epoch": 0.7537450010167424, "grad_norm": 6.676185921768871, "learning_rate": 3.014838002033645e-07, "loss": 1.137, "step": 5560 }, { "epoch": 0.7538805666644073, "grad_norm": 4.802423080035256, "learning_rate": 3.0116962300091876e-07, "loss": 1.0406, "step": 5561 }, { "epoch": 0.7540161323120721, "grad_norm": 6.770304640739647, "learning_rate": 3.0085558056239426e-07, "loss": 1.0819, "step": 5562 }, { "epoch": 0.754151697959737, "grad_norm": 4.249712026520611, "learning_rate": 3.0054167294835306e-07, "loss": 1.0678, "step": 5563 }, { "epoch": 0.7542872636074018, "grad_norm": 4.084817858910036, "learning_rate": 3.002279002193283e-07, "loss": 1.0961, "step": 5564 }, { "epoch": 0.7544228292550668, "grad_norm": 5.516734859430889, "learning_rate": 2.9991426243583005e-07, "loss": 1.033, "step": 5565 }, { "epoch": 0.7545583949027317, "grad_norm": 6.283369736573651, "learning_rate": 2.9960075965833974e-07, "loss": 1.1245, "step": 5566 }, { "epoch": 0.7546939605503965, "grad_norm": 4.323196772706938, "learning_rate": 2.9928739194731444e-07, "loss": 1.0857, "step": 5567 }, { "epoch": 0.7548295261980614, "grad_norm": 9.790182447188867, "learning_rate": 2.9897415936318436e-07, "loss": 1.1095, "step": 5568 }, { "epoch": 0.7549650918457262, "grad_norm": 4.49609652644586, "learning_rate": 2.986610619663542e-07, "loss": 1.0539, "step": 5569 }, { "epoch": 0.7551006574933912, "grad_norm": 4.773611519310314, "learning_rate": 2.983480998172022e-07, "loss": 1.0999, "step": 5570 }, { "epoch": 0.7552362231410561, "grad_norm": 5.5303697341234175, "learning_rate": 2.980352729760807e-07, "loss": 1.0503, "step": 5571 }, { "epoch": 0.7553717887887209, "grad_norm": 12.46643043897839, "learning_rate": 2.9772258150331565e-07, "loss": 1.0938, "step": 5572 }, { "epoch": 0.7555073544363858, "grad_norm": 22.078141662552365, "learning_rate": 2.974100254592075e-07, "loss": 1.125, "step": 5573 }, { "epoch": 0.7556429200840507, "grad_norm": 5.440941323646842, "learning_rate": 2.970976049040299e-07, "loss": 1.0952, "step": 5574 }, { "epoch": 0.7557784857317156, "grad_norm": 5.6699124891344175, "learning_rate": 2.967853198980309e-07, "loss": 1.0934, "step": 5575 }, { "epoch": 0.7559140513793805, "grad_norm": 3.547026768612581, "learning_rate": 2.964731705014324e-07, "loss": 1.1304, "step": 5576 }, { "epoch": 0.7560496170270453, "grad_norm": 4.766807196239218, "learning_rate": 2.9616115677442897e-07, "loss": 1.116, "step": 5577 }, { "epoch": 0.7561851826747102, "grad_norm": 4.780499826910997, "learning_rate": 2.9584927877719145e-07, "loss": 1.0378, "step": 5578 }, { "epoch": 0.7563207483223751, "grad_norm": 8.096204053612352, "learning_rate": 2.9553753656986155e-07, "loss": 1.1125, "step": 5579 }, { "epoch": 0.75645631397004, "grad_norm": 14.824556400195119, "learning_rate": 2.952259302125578e-07, "loss": 1.1072, "step": 5580 }, { "epoch": 0.7565918796177049, "grad_norm": 5.5608931309364475, "learning_rate": 2.9491445976536977e-07, "loss": 1.0576, "step": 5581 }, { "epoch": 0.7567274452653697, "grad_norm": 3.6118610043901946, "learning_rate": 2.9460312528836274e-07, "loss": 1.0721, "step": 5582 }, { "epoch": 0.7568630109130347, "grad_norm": 4.118100667408348, "learning_rate": 2.942919268415748e-07, "loss": 1.0539, "step": 5583 }, { "epoch": 0.7569985765606995, "grad_norm": 8.414889132238427, "learning_rate": 2.9398086448501837e-07, "loss": 1.0788, "step": 5584 }, { "epoch": 0.7571341422083644, "grad_norm": 5.492176973347392, "learning_rate": 2.9366993827867913e-07, "loss": 1.1034, "step": 5585 }, { "epoch": 0.7572697078560293, "grad_norm": 5.276151066920456, "learning_rate": 2.9335914828251694e-07, "loss": 1.0922, "step": 5586 }, { "epoch": 0.7574052735036941, "grad_norm": 4.822176439374636, "learning_rate": 2.9304849455646505e-07, "loss": 1.1166, "step": 5587 }, { "epoch": 0.7575408391513591, "grad_norm": 5.74755385326746, "learning_rate": 2.9273797716043067e-07, "loss": 1.0866, "step": 5588 }, { "epoch": 0.7576764047990239, "grad_norm": 4.769146301746977, "learning_rate": 2.9242759615429467e-07, "loss": 1.0896, "step": 5589 }, { "epoch": 0.7578119704466888, "grad_norm": 4.053959072812565, "learning_rate": 2.9211735159791153e-07, "loss": 1.098, "step": 5590 }, { "epoch": 0.7579475360943537, "grad_norm": 35.302679264454646, "learning_rate": 2.918072435511093e-07, "loss": 1.1264, "step": 5591 }, { "epoch": 0.7580831017420185, "grad_norm": 9.103456453731352, "learning_rate": 2.914972720736901e-07, "loss": 1.1127, "step": 5592 }, { "epoch": 0.7582186673896835, "grad_norm": 6.725710200272151, "learning_rate": 2.9118743722542937e-07, "loss": 1.0997, "step": 5593 }, { "epoch": 0.7583542330373484, "grad_norm": 4.082190871854249, "learning_rate": 2.908777390660765e-07, "loss": 1.1322, "step": 5594 }, { "epoch": 0.7584897986850132, "grad_norm": 4.270551916090518, "learning_rate": 2.9056817765535404e-07, "loss": 1.0962, "step": 5595 }, { "epoch": 0.7586253643326781, "grad_norm": 4.679835076268693, "learning_rate": 2.9025875305295886e-07, "loss": 1.1575, "step": 5596 }, { "epoch": 0.758760929980343, "grad_norm": 4.225487926874066, "learning_rate": 2.8994946531856035e-07, "loss": 1.1161, "step": 5597 }, { "epoch": 0.7588964956280079, "grad_norm": 6.056373547631791, "learning_rate": 2.8964031451180316e-07, "loss": 1.1194, "step": 5598 }, { "epoch": 0.7590320612756728, "grad_norm": 8.915193351345643, "learning_rate": 2.893313006923035e-07, "loss": 1.083, "step": 5599 }, { "epoch": 0.7591676269233376, "grad_norm": 5.509800151689273, "learning_rate": 2.8902242391965335e-07, "loss": 1.1102, "step": 5600 }, { "epoch": 0.7593031925710025, "grad_norm": 7.8709400334862645, "learning_rate": 2.8871368425341634e-07, "loss": 1.1291, "step": 5601 }, { "epoch": 0.7594387582186674, "grad_norm": 5.562030181528754, "learning_rate": 2.8840508175313095e-07, "loss": 1.1336, "step": 5602 }, { "epoch": 0.7595743238663323, "grad_norm": 5.538441041705162, "learning_rate": 2.880966164783084e-07, "loss": 1.0867, "step": 5603 }, { "epoch": 0.7597098895139972, "grad_norm": 3.690517241749186, "learning_rate": 2.87788288488434e-07, "loss": 1.0898, "step": 5604 }, { "epoch": 0.759845455161662, "grad_norm": 6.790058204943525, "learning_rate": 2.8748009784296625e-07, "loss": 1.1, "step": 5605 }, { "epoch": 0.759981020809327, "grad_norm": 6.016526706694563, "learning_rate": 2.871720446013374e-07, "loss": 1.1269, "step": 5606 }, { "epoch": 0.7601165864569918, "grad_norm": 10.435407817496493, "learning_rate": 2.8686412882295287e-07, "loss": 1.0644, "step": 5607 }, { "epoch": 0.7602521521046567, "grad_norm": 11.766483254212828, "learning_rate": 2.865563505671921e-07, "loss": 1.1173, "step": 5608 }, { "epoch": 0.7603877177523216, "grad_norm": 4.875876878916049, "learning_rate": 2.8624870989340757e-07, "loss": 1.0844, "step": 5609 }, { "epoch": 0.7605232833999864, "grad_norm": 12.617514369369948, "learning_rate": 2.8594120686092515e-07, "loss": 1.1094, "step": 5610 }, { "epoch": 0.7606588490476514, "grad_norm": 6.390977684091161, "learning_rate": 2.8563384152904503e-07, "loss": 1.1081, "step": 5611 }, { "epoch": 0.7607944146953162, "grad_norm": 12.668254984681822, "learning_rate": 2.8532661395703905e-07, "loss": 1.0987, "step": 5612 }, { "epoch": 0.7609299803429811, "grad_norm": 6.610511869210655, "learning_rate": 2.8501952420415486e-07, "loss": 1.1401, "step": 5613 }, { "epoch": 0.761065545990646, "grad_norm": 4.316241173776723, "learning_rate": 2.847125723296111e-07, "loss": 1.074, "step": 5614 }, { "epoch": 0.7612011116383108, "grad_norm": 4.763317139154261, "learning_rate": 2.8440575839260227e-07, "loss": 1.08, "step": 5615 }, { "epoch": 0.7613366772859758, "grad_norm": 6.798629633703065, "learning_rate": 2.8409908245229374e-07, "loss": 1.0808, "step": 5616 }, { "epoch": 0.7614722429336406, "grad_norm": 5.939931048074757, "learning_rate": 2.8379254456782685e-07, "loss": 1.1099, "step": 5617 }, { "epoch": 0.7616078085813055, "grad_norm": 7.72658687671058, "learning_rate": 2.8348614479831367e-07, "loss": 1.065, "step": 5618 }, { "epoch": 0.7617433742289704, "grad_norm": 5.784449114967797, "learning_rate": 2.8317988320284223e-07, "loss": 1.0977, "step": 5619 }, { "epoch": 0.7618789398766352, "grad_norm": 5.809376438797757, "learning_rate": 2.828737598404716e-07, "loss": 1.0782, "step": 5620 }, { "epoch": 0.7620145055243002, "grad_norm": 5.2847834145154495, "learning_rate": 2.8256777477023617e-07, "loss": 1.0877, "step": 5621 }, { "epoch": 0.762150071171965, "grad_norm": 6.610926906169388, "learning_rate": 2.822619280511418e-07, "loss": 1.1212, "step": 5622 }, { "epoch": 0.7622856368196299, "grad_norm": 4.5230122387095175, "learning_rate": 2.8195621974216975e-07, "loss": 1.1094, "step": 5623 }, { "epoch": 0.7624212024672948, "grad_norm": 6.404208604563711, "learning_rate": 2.816506499022725e-07, "loss": 1.0974, "step": 5624 }, { "epoch": 0.7625567681149596, "grad_norm": 5.482668384810694, "learning_rate": 2.8134521859037707e-07, "loss": 1.1083, "step": 5625 }, { "epoch": 0.7626923337626246, "grad_norm": 8.7943748519419, "learning_rate": 2.810399258653836e-07, "loss": 1.1208, "step": 5626 }, { "epoch": 0.7628278994102894, "grad_norm": 6.581533838110858, "learning_rate": 2.807347717861653e-07, "loss": 1.0999, "step": 5627 }, { "epoch": 0.7629634650579543, "grad_norm": 5.380584315014858, "learning_rate": 2.8042975641156864e-07, "loss": 1.0978, "step": 5628 }, { "epoch": 0.7630990307056192, "grad_norm": 7.889023310706642, "learning_rate": 2.8012487980041354e-07, "loss": 1.118, "step": 5629 }, { "epoch": 0.7632345963532841, "grad_norm": 4.802996445042986, "learning_rate": 2.798201420114931e-07, "loss": 1.0988, "step": 5630 }, { "epoch": 0.763370162000949, "grad_norm": 5.29729707267734, "learning_rate": 2.795155431035735e-07, "loss": 1.0442, "step": 5631 }, { "epoch": 0.7635057276486138, "grad_norm": 4.6845480607372245, "learning_rate": 2.7921108313539423e-07, "loss": 1.1487, "step": 5632 }, { "epoch": 0.7636412932962787, "grad_norm": 5.276478251595138, "learning_rate": 2.78906762165668e-07, "loss": 1.1084, "step": 5633 }, { "epoch": 0.7637768589439436, "grad_norm": 4.796655562485571, "learning_rate": 2.786025802530807e-07, "loss": 1.1326, "step": 5634 }, { "epoch": 0.7639124245916085, "grad_norm": 4.302496399707866, "learning_rate": 2.782985374562915e-07, "loss": 1.0981, "step": 5635 }, { "epoch": 0.7640479902392734, "grad_norm": 5.865880126925022, "learning_rate": 2.779946338339325e-07, "loss": 1.0874, "step": 5636 }, { "epoch": 0.7641835558869382, "grad_norm": 5.312235180371148, "learning_rate": 2.776908694446095e-07, "loss": 1.0859, "step": 5637 }, { "epoch": 0.7643191215346031, "grad_norm": 4.588382379649128, "learning_rate": 2.773872443469005e-07, "loss": 1.0787, "step": 5638 }, { "epoch": 0.7644546871822681, "grad_norm": 3.8206722156208013, "learning_rate": 2.770837585993575e-07, "loss": 1.0631, "step": 5639 }, { "epoch": 0.7645902528299329, "grad_norm": 10.030308775904517, "learning_rate": 2.767804122605053e-07, "loss": 1.0818, "step": 5640 }, { "epoch": 0.7647258184775978, "grad_norm": 7.712272159930957, "learning_rate": 2.764772053888419e-07, "loss": 1.1067, "step": 5641 }, { "epoch": 0.7648613841252626, "grad_norm": 6.289456970850219, "learning_rate": 2.7617413804283815e-07, "loss": 1.1308, "step": 5642 }, { "epoch": 0.7649969497729275, "grad_norm": 5.806254001163002, "learning_rate": 2.7587121028093853e-07, "loss": 1.104, "step": 5643 }, { "epoch": 0.7651325154205925, "grad_norm": 8.509827838782773, "learning_rate": 2.7556842216155996e-07, "loss": 1.0944, "step": 5644 }, { "epoch": 0.7652680810682573, "grad_norm": 10.85412236070112, "learning_rate": 2.752657737430928e-07, "loss": 1.0794, "step": 5645 }, { "epoch": 0.7654036467159222, "grad_norm": 7.018598298748194, "learning_rate": 2.749632650839006e-07, "loss": 1.1215, "step": 5646 }, { "epoch": 0.765539212363587, "grad_norm": 6.253572691349422, "learning_rate": 2.746608962423196e-07, "loss": 1.1097, "step": 5647 }, { "epoch": 0.7656747780112519, "grad_norm": 5.084291431755176, "learning_rate": 2.7435866727665924e-07, "loss": 1.0718, "step": 5648 }, { "epoch": 0.7658103436589169, "grad_norm": 5.455406432425319, "learning_rate": 2.74056578245202e-07, "loss": 1.0676, "step": 5649 }, { "epoch": 0.7659459093065817, "grad_norm": 4.618471278754439, "learning_rate": 2.7375462920620354e-07, "loss": 1.1064, "step": 5650 }, { "epoch": 0.7660814749542466, "grad_norm": 5.902659713846883, "learning_rate": 2.7345282021789204e-07, "loss": 1.1205, "step": 5651 }, { "epoch": 0.7662170406019114, "grad_norm": 4.867022381147176, "learning_rate": 2.731511513384696e-07, "loss": 1.0692, "step": 5652 }, { "epoch": 0.7663526062495764, "grad_norm": 5.785847708040533, "learning_rate": 2.7284962262610946e-07, "loss": 1.114, "step": 5653 }, { "epoch": 0.7664881718972413, "grad_norm": 7.6391331806741425, "learning_rate": 2.7254823413896056e-07, "loss": 1.0998, "step": 5654 }, { "epoch": 0.7666237375449061, "grad_norm": 4.132220049494197, "learning_rate": 2.7224698593514183e-07, "loss": 1.1167, "step": 5655 }, { "epoch": 0.766759303192571, "grad_norm": 4.890377332754441, "learning_rate": 2.7194587807274803e-07, "loss": 1.0985, "step": 5656 }, { "epoch": 0.7668948688402358, "grad_norm": 6.182172811201991, "learning_rate": 2.7164491060984417e-07, "loss": 1.1071, "step": 5657 }, { "epoch": 0.7670304344879008, "grad_norm": 5.1538191088831455, "learning_rate": 2.713440836044705e-07, "loss": 1.1268, "step": 5658 }, { "epoch": 0.7671660001355657, "grad_norm": 6.21670128855155, "learning_rate": 2.710433971146381e-07, "loss": 1.0938, "step": 5659 }, { "epoch": 0.7673015657832305, "grad_norm": 5.471257716246492, "learning_rate": 2.7074285119833315e-07, "loss": 1.1268, "step": 5660 }, { "epoch": 0.7674371314308954, "grad_norm": 6.037173148496545, "learning_rate": 2.704424459135123e-07, "loss": 1.0696, "step": 5661 }, { "epoch": 0.7675726970785602, "grad_norm": 10.716804666135594, "learning_rate": 2.701421813181076e-07, "loss": 1.0609, "step": 5662 }, { "epoch": 0.7677082627262252, "grad_norm": 3.729079665397305, "learning_rate": 2.6984205747002153e-07, "loss": 1.0558, "step": 5663 }, { "epoch": 0.7678438283738901, "grad_norm": 4.026888533743067, "learning_rate": 2.6954207442713174e-07, "loss": 1.1007, "step": 5664 }, { "epoch": 0.7679793940215549, "grad_norm": 4.9712750631051685, "learning_rate": 2.692422322472866e-07, "loss": 1.13, "step": 5665 }, { "epoch": 0.7681149596692198, "grad_norm": 4.5967412017842015, "learning_rate": 2.689425309883089e-07, "loss": 1.0931, "step": 5666 }, { "epoch": 0.7682505253168846, "grad_norm": 5.620455334695531, "learning_rate": 2.6864297070799336e-07, "loss": 1.0851, "step": 5667 }, { "epoch": 0.7683860909645496, "grad_norm": 4.104116854080862, "learning_rate": 2.6834355146410793e-07, "loss": 1.1096, "step": 5668 }, { "epoch": 0.7685216566122145, "grad_norm": 5.976963048554384, "learning_rate": 2.6804427331439327e-07, "loss": 1.0879, "step": 5669 }, { "epoch": 0.7686572222598793, "grad_norm": 7.292426118739483, "learning_rate": 2.677451363165628e-07, "loss": 1.1039, "step": 5670 }, { "epoch": 0.7687927879075442, "grad_norm": 6.205421583593289, "learning_rate": 2.674461405283027e-07, "loss": 1.1502, "step": 5671 }, { "epoch": 0.7689283535552092, "grad_norm": 4.244591377855573, "learning_rate": 2.671472860072721e-07, "loss": 1.0896, "step": 5672 }, { "epoch": 0.769063919202874, "grad_norm": 4.9464876835164775, "learning_rate": 2.6684857281110286e-07, "loss": 1.1604, "step": 5673 }, { "epoch": 0.7691994848505389, "grad_norm": 3.721200308404409, "learning_rate": 2.6655000099739857e-07, "loss": 1.0716, "step": 5674 }, { "epoch": 0.7693350504982037, "grad_norm": 4.819875787507706, "learning_rate": 2.662515706237376e-07, "loss": 1.0549, "step": 5675 }, { "epoch": 0.7694706161458686, "grad_norm": 5.532317586049523, "learning_rate": 2.6595328174766885e-07, "loss": 1.0866, "step": 5676 }, { "epoch": 0.7696061817935336, "grad_norm": 4.223241197934126, "learning_rate": 2.656551344267162e-07, "loss": 1.1277, "step": 5677 }, { "epoch": 0.7697417474411984, "grad_norm": 5.269038079620037, "learning_rate": 2.6535712871837357e-07, "loss": 1.0862, "step": 5678 }, { "epoch": 0.7698773130888633, "grad_norm": 5.019402361480783, "learning_rate": 2.6505926468011044e-07, "loss": 1.1097, "step": 5679 }, { "epoch": 0.7700128787365281, "grad_norm": 6.83087619182886, "learning_rate": 2.6476154236936643e-07, "loss": 1.0895, "step": 5680 }, { "epoch": 0.770148444384193, "grad_norm": 4.206269936621865, "learning_rate": 2.6446396184355545e-07, "loss": 1.0747, "step": 5681 }, { "epoch": 0.770284010031858, "grad_norm": 6.99659502588428, "learning_rate": 2.641665231600634e-07, "loss": 1.1022, "step": 5682 }, { "epoch": 0.7704195756795228, "grad_norm": 6.534555523137139, "learning_rate": 2.6386922637624906e-07, "loss": 1.044, "step": 5683 }, { "epoch": 0.7705551413271877, "grad_norm": 3.365341764242001, "learning_rate": 2.635720715494438e-07, "loss": 1.0708, "step": 5684 }, { "epoch": 0.7706907069748525, "grad_norm": 13.797899653003505, "learning_rate": 2.6327505873695157e-07, "loss": 1.1488, "step": 5685 }, { "epoch": 0.7708262726225175, "grad_norm": 6.354092274170082, "learning_rate": 2.629781879960488e-07, "loss": 1.0897, "step": 5686 }, { "epoch": 0.7709618382701824, "grad_norm": 3.686484714035546, "learning_rate": 2.626814593839848e-07, "loss": 1.0987, "step": 5687 }, { "epoch": 0.7710974039178472, "grad_norm": 6.922958226352556, "learning_rate": 2.623848729579813e-07, "loss": 1.1052, "step": 5688 }, { "epoch": 0.7712329695655121, "grad_norm": 5.054335514820396, "learning_rate": 2.620884287752327e-07, "loss": 1.0995, "step": 5689 }, { "epoch": 0.7713685352131769, "grad_norm": 5.429408658764206, "learning_rate": 2.61792126892906e-07, "loss": 1.0924, "step": 5690 }, { "epoch": 0.7715041008608419, "grad_norm": 4.620077567934063, "learning_rate": 2.614959673681404e-07, "loss": 1.1032, "step": 5691 }, { "epoch": 0.7716396665085068, "grad_norm": 4.398816605553307, "learning_rate": 2.611999502580482e-07, "loss": 1.0973, "step": 5692 }, { "epoch": 0.7717752321561716, "grad_norm": 5.054757127904154, "learning_rate": 2.6090407561971405e-07, "loss": 1.0462, "step": 5693 }, { "epoch": 0.7719107978038365, "grad_norm": 5.412234873333998, "learning_rate": 2.6060834351019433e-07, "loss": 1.0993, "step": 5694 }, { "epoch": 0.7720463634515013, "grad_norm": 6.103214227421836, "learning_rate": 2.6031275398651986e-07, "loss": 1.1264, "step": 5695 }, { "epoch": 0.7721819290991663, "grad_norm": 4.268921727247467, "learning_rate": 2.6001730710569123e-07, "loss": 1.0757, "step": 5696 }, { "epoch": 0.7723174947468312, "grad_norm": 6.602057439472112, "learning_rate": 2.597220029246846e-07, "loss": 1.09, "step": 5697 }, { "epoch": 0.772453060394496, "grad_norm": 4.1983079639521526, "learning_rate": 2.594268415004457e-07, "loss": 1.1379, "step": 5698 }, { "epoch": 0.7725886260421609, "grad_norm": 5.930777412938728, "learning_rate": 2.591318228898953e-07, "loss": 1.171, "step": 5699 }, { "epoch": 0.7727241916898258, "grad_norm": 5.805529884833417, "learning_rate": 2.5883694714992446e-07, "loss": 1.0869, "step": 5700 }, { "epoch": 0.7728597573374907, "grad_norm": 4.281169878998615, "learning_rate": 2.5854221433739797e-07, "loss": 1.1122, "step": 5701 }, { "epoch": 0.7729953229851556, "grad_norm": 4.700615040640882, "learning_rate": 2.582476245091527e-07, "loss": 1.1065, "step": 5702 }, { "epoch": 0.7731308886328204, "grad_norm": 4.885858327219352, "learning_rate": 2.579531777219981e-07, "loss": 1.0973, "step": 5703 }, { "epoch": 0.7732664542804853, "grad_norm": 5.053082496256663, "learning_rate": 2.576588740327158e-07, "loss": 1.07, "step": 5704 }, { "epoch": 0.7734020199281502, "grad_norm": 6.033659289515315, "learning_rate": 2.573647134980599e-07, "loss": 1.097, "step": 5705 }, { "epoch": 0.7735375855758151, "grad_norm": 6.309695796401424, "learning_rate": 2.57070696174757e-07, "loss": 1.1017, "step": 5706 }, { "epoch": 0.77367315122348, "grad_norm": 5.579130060749867, "learning_rate": 2.5677682211950604e-07, "loss": 1.0864, "step": 5707 }, { "epoch": 0.7738087168711448, "grad_norm": 5.80559532865772, "learning_rate": 2.564830913889783e-07, "loss": 1.1066, "step": 5708 }, { "epoch": 0.7739442825188098, "grad_norm": 6.55369491272627, "learning_rate": 2.561895040398173e-07, "loss": 1.0942, "step": 5709 }, { "epoch": 0.7740798481664746, "grad_norm": 4.846032543993368, "learning_rate": 2.5589606012863964e-07, "loss": 1.098, "step": 5710 }, { "epoch": 0.7742154138141395, "grad_norm": 6.225054182629207, "learning_rate": 2.556027597120325e-07, "loss": 1.1104, "step": 5711 }, { "epoch": 0.7743509794618044, "grad_norm": 8.26779883534398, "learning_rate": 2.553096028465578e-07, "loss": 1.1038, "step": 5712 }, { "epoch": 0.7744865451094692, "grad_norm": 5.440892521536651, "learning_rate": 2.550165895887474e-07, "loss": 1.0957, "step": 5713 }, { "epoch": 0.7746221107571342, "grad_norm": 11.990723689779028, "learning_rate": 2.547237199951078e-07, "loss": 1.1159, "step": 5714 }, { "epoch": 0.774757676404799, "grad_norm": 6.060364919350008, "learning_rate": 2.5443099412211535e-07, "loss": 1.102, "step": 5715 }, { "epoch": 0.7748932420524639, "grad_norm": 4.794807114464031, "learning_rate": 2.54138412026221e-07, "loss": 1.1239, "step": 5716 }, { "epoch": 0.7750288077001288, "grad_norm": 10.470213208718786, "learning_rate": 2.5384597376384596e-07, "loss": 1.1138, "step": 5717 }, { "epoch": 0.7751643733477936, "grad_norm": 4.521342470745331, "learning_rate": 2.535536793913856e-07, "loss": 1.1051, "step": 5718 }, { "epoch": 0.7752999389954586, "grad_norm": 4.362711168777118, "learning_rate": 2.532615289652055e-07, "loss": 1.0743, "step": 5719 }, { "epoch": 0.7754355046431234, "grad_norm": 8.412317920930818, "learning_rate": 2.5296952254164573e-07, "loss": 1.0908, "step": 5720 }, { "epoch": 0.7755710702907883, "grad_norm": 4.5746729637665124, "learning_rate": 2.5267766017701664e-07, "loss": 1.0746, "step": 5721 }, { "epoch": 0.7757066359384532, "grad_norm": 6.806514869122844, "learning_rate": 2.5238594192760165e-07, "loss": 1.0919, "step": 5722 }, { "epoch": 0.775842201586118, "grad_norm": 5.126318098222726, "learning_rate": 2.5209436784965657e-07, "loss": 1.0881, "step": 5723 }, { "epoch": 0.775977767233783, "grad_norm": 3.722808333438008, "learning_rate": 2.5180293799940886e-07, "loss": 1.0857, "step": 5724 }, { "epoch": 0.7761133328814478, "grad_norm": 5.314089336385821, "learning_rate": 2.5151165243305885e-07, "loss": 1.0818, "step": 5725 }, { "epoch": 0.7762488985291127, "grad_norm": 4.188895594676938, "learning_rate": 2.512205112067783e-07, "loss": 1.0912, "step": 5726 }, { "epoch": 0.7763844641767776, "grad_norm": 3.7565554765148903, "learning_rate": 2.5092951437671184e-07, "loss": 1.1346, "step": 5727 }, { "epoch": 0.7765200298244425, "grad_norm": 4.623523161397321, "learning_rate": 2.5063866199897556e-07, "loss": 1.0977, "step": 5728 }, { "epoch": 0.7766555954721074, "grad_norm": 5.442192700820108, "learning_rate": 2.5034795412965825e-07, "loss": 1.0623, "step": 5729 }, { "epoch": 0.7767911611197722, "grad_norm": 4.265191577280355, "learning_rate": 2.500573908248207e-07, "loss": 1.1263, "step": 5730 }, { "epoch": 0.7769267267674371, "grad_norm": 4.116759091698832, "learning_rate": 2.497669721404956e-07, "loss": 1.0903, "step": 5731 }, { "epoch": 0.777062292415102, "grad_norm": 3.6233646532305794, "learning_rate": 2.494766981326878e-07, "loss": 1.092, "step": 5732 }, { "epoch": 0.7771978580627669, "grad_norm": 4.267281425193674, "learning_rate": 2.4918656885737465e-07, "loss": 1.0584, "step": 5733 }, { "epoch": 0.7773334237104318, "grad_norm": 4.253791973440954, "learning_rate": 2.488965843705051e-07, "loss": 1.1083, "step": 5734 }, { "epoch": 0.7774689893580966, "grad_norm": 7.5555703523803945, "learning_rate": 2.4860674472800036e-07, "loss": 1.1034, "step": 5735 }, { "epoch": 0.7776045550057615, "grad_norm": 9.476434559861818, "learning_rate": 2.483170499857541e-07, "loss": 1.119, "step": 5736 }, { "epoch": 0.7777401206534265, "grad_norm": 5.309137714043221, "learning_rate": 2.48027500199631e-07, "loss": 1.0986, "step": 5737 }, { "epoch": 0.7778756863010913, "grad_norm": 11.599277894849134, "learning_rate": 2.477380954254689e-07, "loss": 1.085, "step": 5738 }, { "epoch": 0.7780112519487562, "grad_norm": 6.1329124354236315, "learning_rate": 2.4744883571907694e-07, "loss": 1.0897, "step": 5739 }, { "epoch": 0.778146817596421, "grad_norm": 4.709525746488091, "learning_rate": 2.471597211362367e-07, "loss": 1.1234, "step": 5740 }, { "epoch": 0.7782823832440859, "grad_norm": 3.8948716085387325, "learning_rate": 2.468707517327019e-07, "loss": 1.128, "step": 5741 }, { "epoch": 0.7784179488917509, "grad_norm": 5.498150276573484, "learning_rate": 2.465819275641976e-07, "loss": 1.104, "step": 5742 }, { "epoch": 0.7785535145394157, "grad_norm": 5.829673010700652, "learning_rate": 2.462932486864215e-07, "loss": 1.1088, "step": 5743 }, { "epoch": 0.7786890801870806, "grad_norm": 5.800451885094177, "learning_rate": 2.4600471515504293e-07, "loss": 1.0722, "step": 5744 }, { "epoch": 0.7788246458347454, "grad_norm": 5.989844796315261, "learning_rate": 2.4571632702570356e-07, "loss": 1.0864, "step": 5745 }, { "epoch": 0.7789602114824103, "grad_norm": 4.26899003140795, "learning_rate": 2.454280843540164e-07, "loss": 1.102, "step": 5746 }, { "epoch": 0.7790957771300753, "grad_norm": 5.405285993678367, "learning_rate": 2.4513998719556693e-07, "loss": 1.0988, "step": 5747 }, { "epoch": 0.7792313427777401, "grad_norm": 5.931974206301837, "learning_rate": 2.448520356059125e-07, "loss": 1.0784, "step": 5748 }, { "epoch": 0.779366908425405, "grad_norm": 4.250450324831237, "learning_rate": 2.4456422964058254e-07, "loss": 1.1371, "step": 5749 }, { "epoch": 0.7795024740730698, "grad_norm": 5.406555501170031, "learning_rate": 2.442765693550772e-07, "loss": 1.1051, "step": 5750 }, { "epoch": 0.7796380397207348, "grad_norm": 7.072843261214542, "learning_rate": 2.4398905480487073e-07, "loss": 1.0325, "step": 5751 }, { "epoch": 0.7797736053683997, "grad_norm": 6.159959718908195, "learning_rate": 2.4370168604540697e-07, "loss": 1.0717, "step": 5752 }, { "epoch": 0.7799091710160645, "grad_norm": 5.111705937016586, "learning_rate": 2.4341446313210365e-07, "loss": 1.0854, "step": 5753 }, { "epoch": 0.7800447366637294, "grad_norm": 16.472563320380615, "learning_rate": 2.4312738612034843e-07, "loss": 1.0907, "step": 5754 }, { "epoch": 0.7801803023113943, "grad_norm": 4.038568279400761, "learning_rate": 2.428404550655031e-07, "loss": 1.0686, "step": 5755 }, { "epoch": 0.7803158679590592, "grad_norm": 9.412167810940947, "learning_rate": 2.425536700228986e-07, "loss": 1.0934, "step": 5756 }, { "epoch": 0.7804514336067241, "grad_norm": 3.987395240313684, "learning_rate": 2.422670310478406e-07, "loss": 1.1105, "step": 5757 }, { "epoch": 0.7805869992543889, "grad_norm": 10.618238478670342, "learning_rate": 2.4198053819560394e-07, "loss": 1.1009, "step": 5758 }, { "epoch": 0.7807225649020538, "grad_norm": 8.441149256420312, "learning_rate": 2.4169419152143766e-07, "loss": 1.12, "step": 5759 }, { "epoch": 0.7808581305497188, "grad_norm": 3.770958895760354, "learning_rate": 2.414079910805601e-07, "loss": 1.1032, "step": 5760 }, { "epoch": 0.7809936961973836, "grad_norm": 8.65043734909975, "learning_rate": 2.4112193692816416e-07, "loss": 1.1067, "step": 5761 }, { "epoch": 0.7811292618450485, "grad_norm": 8.924830875473235, "learning_rate": 2.4083602911941224e-07, "loss": 1.122, "step": 5762 }, { "epoch": 0.7812648274927133, "grad_norm": 4.264948468038443, "learning_rate": 2.405502677094395e-07, "loss": 1.072, "step": 5763 }, { "epoch": 0.7814003931403782, "grad_norm": 4.956828954645284, "learning_rate": 2.4026465275335306e-07, "loss": 1.1324, "step": 5764 }, { "epoch": 0.7815359587880432, "grad_norm": 4.506067839049622, "learning_rate": 2.399791843062312e-07, "loss": 1.0748, "step": 5765 }, { "epoch": 0.781671524435708, "grad_norm": 4.639781982269274, "learning_rate": 2.396938624231245e-07, "loss": 1.0719, "step": 5766 }, { "epoch": 0.7818070900833729, "grad_norm": 3.946477721563553, "learning_rate": 2.3940868715905495e-07, "loss": 1.0935, "step": 5767 }, { "epoch": 0.7819426557310377, "grad_norm": 4.961364813442389, "learning_rate": 2.3912365856901627e-07, "loss": 1.0895, "step": 5768 }, { "epoch": 0.7820782213787026, "grad_norm": 6.411872908967617, "learning_rate": 2.38838776707974e-07, "loss": 1.1623, "step": 5769 }, { "epoch": 0.7822137870263676, "grad_norm": 5.40288994793926, "learning_rate": 2.3855404163086556e-07, "loss": 1.0901, "step": 5770 }, { "epoch": 0.7823493526740324, "grad_norm": 4.9607650310598315, "learning_rate": 2.3826945339259964e-07, "loss": 1.0961, "step": 5771 }, { "epoch": 0.7824849183216973, "grad_norm": 5.8851111852606905, "learning_rate": 2.379850120480571e-07, "loss": 1.1432, "step": 5772 }, { "epoch": 0.7826204839693621, "grad_norm": 5.276885754635643, "learning_rate": 2.3770071765208956e-07, "loss": 1.126, "step": 5773 }, { "epoch": 0.782756049617027, "grad_norm": 6.7344173663516695, "learning_rate": 2.3741657025952188e-07, "loss": 1.1245, "step": 5774 }, { "epoch": 0.782891615264692, "grad_norm": 7.701551846994949, "learning_rate": 2.3713256992514853e-07, "loss": 1.1331, "step": 5775 }, { "epoch": 0.7830271809123568, "grad_norm": 13.957633311687417, "learning_rate": 2.3684871670373806e-07, "loss": 1.1373, "step": 5776 }, { "epoch": 0.7831627465600217, "grad_norm": 5.809217064645087, "learning_rate": 2.365650106500282e-07, "loss": 1.076, "step": 5777 }, { "epoch": 0.7832983122076865, "grad_norm": 6.459019756555373, "learning_rate": 2.3628145181872994e-07, "loss": 1.0959, "step": 5778 }, { "epoch": 0.7834338778553515, "grad_norm": 5.920405158655133, "learning_rate": 2.359980402645253e-07, "loss": 1.1471, "step": 5779 }, { "epoch": 0.7835694435030164, "grad_norm": 4.35859876711917, "learning_rate": 2.3571477604206792e-07, "loss": 1.1266, "step": 5780 }, { "epoch": 0.7837050091506812, "grad_norm": 4.962399471809961, "learning_rate": 2.3543165920598308e-07, "loss": 1.1161, "step": 5781 }, { "epoch": 0.7838405747983461, "grad_norm": 7.054707772234247, "learning_rate": 2.3514868981086755e-07, "loss": 1.0729, "step": 5782 }, { "epoch": 0.7839761404460109, "grad_norm": 4.767848774227263, "learning_rate": 2.3486586791128982e-07, "loss": 1.1588, "step": 5783 }, { "epoch": 0.7841117060936759, "grad_norm": 5.999396495860681, "learning_rate": 2.345831935617899e-07, "loss": 1.0818, "step": 5784 }, { "epoch": 0.7842472717413408, "grad_norm": 4.902332236327135, "learning_rate": 2.3430066681687932e-07, "loss": 1.0786, "step": 5785 }, { "epoch": 0.7843828373890056, "grad_norm": 4.915796487661633, "learning_rate": 2.3401828773104103e-07, "loss": 1.0679, "step": 5786 }, { "epoch": 0.7845184030366705, "grad_norm": 4.43046923930325, "learning_rate": 2.3373605635872972e-07, "loss": 1.0609, "step": 5787 }, { "epoch": 0.7846539686843353, "grad_norm": 6.075215205936903, "learning_rate": 2.334539727543713e-07, "loss": 1.1027, "step": 5788 }, { "epoch": 0.7847895343320003, "grad_norm": 5.37529645876503, "learning_rate": 2.3317203697236353e-07, "loss": 1.1527, "step": 5789 }, { "epoch": 0.7849250999796652, "grad_norm": 6.234698573559299, "learning_rate": 2.3289024906707555e-07, "loss": 1.1207, "step": 5790 }, { "epoch": 0.78506066562733, "grad_norm": 5.046749296384922, "learning_rate": 2.3260860909284773e-07, "loss": 1.104, "step": 5791 }, { "epoch": 0.7851962312749949, "grad_norm": 5.133370083920942, "learning_rate": 2.3232711710399255e-07, "loss": 1.0965, "step": 5792 }, { "epoch": 0.7853317969226598, "grad_norm": 7.8856369796801, "learning_rate": 2.3204577315479269e-07, "loss": 1.1086, "step": 5793 }, { "epoch": 0.7854673625703247, "grad_norm": 6.564632774597346, "learning_rate": 2.3176457729950417e-07, "loss": 1.0859, "step": 5794 }, { "epoch": 0.7856029282179896, "grad_norm": 4.645150915989363, "learning_rate": 2.3148352959235218e-07, "loss": 1.109, "step": 5795 }, { "epoch": 0.7857384938656544, "grad_norm": 6.033946986597122, "learning_rate": 2.3120263008753582e-07, "loss": 1.1279, "step": 5796 }, { "epoch": 0.7858740595133193, "grad_norm": 5.064710873422158, "learning_rate": 2.309218788392232e-07, "loss": 1.0933, "step": 5797 }, { "epoch": 0.7860096251609842, "grad_norm": 5.309865900608958, "learning_rate": 2.3064127590155603e-07, "loss": 1.0947, "step": 5798 }, { "epoch": 0.7861451908086491, "grad_norm": 5.4232063202389, "learning_rate": 2.3036082132864555e-07, "loss": 1.1075, "step": 5799 }, { "epoch": 0.786280756456314, "grad_norm": 5.414267100719424, "learning_rate": 2.300805151745756e-07, "loss": 1.1088, "step": 5800 }, { "epoch": 0.7864163221039788, "grad_norm": 5.18708239373338, "learning_rate": 2.2980035749340088e-07, "loss": 1.0627, "step": 5801 }, { "epoch": 0.7865518877516438, "grad_norm": 4.072582786202786, "learning_rate": 2.2952034833914757e-07, "loss": 1.0937, "step": 5802 }, { "epoch": 0.7866874533993086, "grad_norm": 5.062357166735173, "learning_rate": 2.292404877658134e-07, "loss": 1.0927, "step": 5803 }, { "epoch": 0.7868230190469735, "grad_norm": 5.3744694698262245, "learning_rate": 2.2896077582736705e-07, "loss": 1.0952, "step": 5804 }, { "epoch": 0.7869585846946384, "grad_norm": 10.106565232251885, "learning_rate": 2.2868121257774885e-07, "loss": 1.0859, "step": 5805 }, { "epoch": 0.7870941503423032, "grad_norm": 4.161019143767235, "learning_rate": 2.2840179807087044e-07, "loss": 1.1169, "step": 5806 }, { "epoch": 0.7872297159899682, "grad_norm": 7.006357383112986, "learning_rate": 2.2812253236061497e-07, "loss": 1.0724, "step": 5807 }, { "epoch": 0.787365281637633, "grad_norm": 4.929324910331659, "learning_rate": 2.2784341550083574e-07, "loss": 1.0773, "step": 5808 }, { "epoch": 0.7875008472852979, "grad_norm": 5.1933772930405215, "learning_rate": 2.275644475453593e-07, "loss": 1.0925, "step": 5809 }, { "epoch": 0.7876364129329628, "grad_norm": 11.930142853957221, "learning_rate": 2.272856285479814e-07, "loss": 1.1454, "step": 5810 }, { "epoch": 0.7877719785806276, "grad_norm": 4.3262388908402984, "learning_rate": 2.2700695856247122e-07, "loss": 1.0768, "step": 5811 }, { "epoch": 0.7879075442282926, "grad_norm": 5.511207919264985, "learning_rate": 2.2672843764256678e-07, "loss": 1.0915, "step": 5812 }, { "epoch": 0.7880431098759574, "grad_norm": 3.9033455559739227, "learning_rate": 2.264500658419799e-07, "loss": 1.0948, "step": 5813 }, { "epoch": 0.7881786755236223, "grad_norm": 4.445462513299778, "learning_rate": 2.261718432143912e-07, "loss": 1.0792, "step": 5814 }, { "epoch": 0.7883142411712872, "grad_norm": 4.102917643868781, "learning_rate": 2.2589376981345487e-07, "loss": 1.0606, "step": 5815 }, { "epoch": 0.788449806818952, "grad_norm": 4.375386684419576, "learning_rate": 2.25615845692794e-07, "loss": 1.11, "step": 5816 }, { "epoch": 0.788585372466617, "grad_norm": 7.893476243940553, "learning_rate": 2.253380709060053e-07, "loss": 1.0843, "step": 5817 }, { "epoch": 0.7887209381142818, "grad_norm": 5.663294023931302, "learning_rate": 2.2506044550665438e-07, "loss": 1.0878, "step": 5818 }, { "epoch": 0.7888565037619467, "grad_norm": 6.603652401957627, "learning_rate": 2.247829695482799e-07, "loss": 1.0723, "step": 5819 }, { "epoch": 0.7889920694096116, "grad_norm": 8.303573380747366, "learning_rate": 2.2450564308439036e-07, "loss": 1.131, "step": 5820 }, { "epoch": 0.7891276350572765, "grad_norm": 4.926302745339743, "learning_rate": 2.2422846616846613e-07, "loss": 1.0639, "step": 5821 }, { "epoch": 0.7892632007049414, "grad_norm": 7.5518771241003515, "learning_rate": 2.2395143885395873e-07, "loss": 1.1164, "step": 5822 }, { "epoch": 0.7893987663526062, "grad_norm": 9.413571474800364, "learning_rate": 2.236745611942905e-07, "loss": 1.1315, "step": 5823 }, { "epoch": 0.7895343320002711, "grad_norm": 4.476257207423292, "learning_rate": 2.2339783324285523e-07, "loss": 1.1117, "step": 5824 }, { "epoch": 0.789669897647936, "grad_norm": 4.707634351970914, "learning_rate": 2.231212550530177e-07, "loss": 1.1018, "step": 5825 }, { "epoch": 0.7898054632956009, "grad_norm": 6.381920633543459, "learning_rate": 2.2284482667811378e-07, "loss": 1.0961, "step": 5826 }, { "epoch": 0.7899410289432658, "grad_norm": 4.978499819325802, "learning_rate": 2.2256854817145065e-07, "loss": 1.0947, "step": 5827 }, { "epoch": 0.7900765945909306, "grad_norm": 4.7560368204603956, "learning_rate": 2.2229241958630617e-07, "loss": 1.1427, "step": 5828 }, { "epoch": 0.7902121602385955, "grad_norm": 3.9433387340589805, "learning_rate": 2.2201644097592987e-07, "loss": 1.0896, "step": 5829 }, { "epoch": 0.7903477258862605, "grad_norm": 8.102544119985447, "learning_rate": 2.217406123935418e-07, "loss": 1.132, "step": 5830 }, { "epoch": 0.7904832915339253, "grad_norm": 4.6436407645743305, "learning_rate": 2.2146493389233357e-07, "loss": 1.0617, "step": 5831 }, { "epoch": 0.7906188571815902, "grad_norm": 4.0041596484973825, "learning_rate": 2.211894055254673e-07, "loss": 1.0987, "step": 5832 }, { "epoch": 0.7907544228292551, "grad_norm": 5.791652368286594, "learning_rate": 2.20914027346077e-07, "loss": 1.1218, "step": 5833 }, { "epoch": 0.7908899884769199, "grad_norm": 4.89350167545723, "learning_rate": 2.206387994072665e-07, "loss": 1.0772, "step": 5834 }, { "epoch": 0.7910255541245849, "grad_norm": 5.583754505824889, "learning_rate": 2.2036372176211148e-07, "loss": 1.1116, "step": 5835 }, { "epoch": 0.7911611197722497, "grad_norm": 7.273270057833011, "learning_rate": 2.200887944636588e-07, "loss": 1.0812, "step": 5836 }, { "epoch": 0.7912966854199146, "grad_norm": 7.066180263697882, "learning_rate": 2.198140175649259e-07, "loss": 1.1234, "step": 5837 }, { "epoch": 0.7914322510675795, "grad_norm": 3.5727796858958065, "learning_rate": 2.195393911189012e-07, "loss": 1.1215, "step": 5838 }, { "epoch": 0.7915678167152443, "grad_norm": 8.043545127161313, "learning_rate": 2.192649151785444e-07, "loss": 1.1171, "step": 5839 }, { "epoch": 0.7917033823629093, "grad_norm": 6.268043465798527, "learning_rate": 2.1899058979678586e-07, "loss": 1.0939, "step": 5840 }, { "epoch": 0.7918389480105741, "grad_norm": 3.9819846996962536, "learning_rate": 2.1871641502652728e-07, "loss": 1.0894, "step": 5841 }, { "epoch": 0.791974513658239, "grad_norm": 6.138234375827679, "learning_rate": 2.1844239092064088e-07, "loss": 1.0976, "step": 5842 }, { "epoch": 0.7921100793059039, "grad_norm": 4.595832085819474, "learning_rate": 2.181685175319702e-07, "loss": 1.0785, "step": 5843 }, { "epoch": 0.7922456449535688, "grad_norm": 5.786230158822606, "learning_rate": 2.1789479491332953e-07, "loss": 1.1159, "step": 5844 }, { "epoch": 0.7923812106012337, "grad_norm": 4.883565229547819, "learning_rate": 2.176212231175041e-07, "loss": 1.1239, "step": 5845 }, { "epoch": 0.7925167762488985, "grad_norm": 4.285772841239101, "learning_rate": 2.1734780219725e-07, "loss": 1.0899, "step": 5846 }, { "epoch": 0.7926523418965634, "grad_norm": 4.692863709092313, "learning_rate": 2.1707453220529448e-07, "loss": 1.1258, "step": 5847 }, { "epoch": 0.7927879075442283, "grad_norm": 4.988764493210637, "learning_rate": 2.1680141319433564e-07, "loss": 1.0944, "step": 5848 }, { "epoch": 0.7929234731918932, "grad_norm": 4.476678387062757, "learning_rate": 2.165284452170415e-07, "loss": 1.0702, "step": 5849 }, { "epoch": 0.7930590388395581, "grad_norm": 4.913837628673331, "learning_rate": 2.1625562832605281e-07, "loss": 1.1125, "step": 5850 }, { "epoch": 0.7931946044872229, "grad_norm": 5.290020383304523, "learning_rate": 2.159829625739793e-07, "loss": 1.1172, "step": 5851 }, { "epoch": 0.7933301701348878, "grad_norm": 6.746714735115339, "learning_rate": 2.157104480134032e-07, "loss": 1.0682, "step": 5852 }, { "epoch": 0.7934657357825528, "grad_norm": 7.689258339220073, "learning_rate": 2.1543808469687596e-07, "loss": 1.06, "step": 5853 }, { "epoch": 0.7936013014302176, "grad_norm": 5.068651698035303, "learning_rate": 2.1516587267692165e-07, "loss": 1.0594, "step": 5854 }, { "epoch": 0.7937368670778825, "grad_norm": 5.943501396449875, "learning_rate": 2.1489381200603307e-07, "loss": 1.1519, "step": 5855 }, { "epoch": 0.7938724327255473, "grad_norm": 7.443330134627742, "learning_rate": 2.1462190273667624e-07, "loss": 1.0936, "step": 5856 }, { "epoch": 0.7940079983732122, "grad_norm": 5.305038652795953, "learning_rate": 2.1435014492128545e-07, "loss": 1.0415, "step": 5857 }, { "epoch": 0.7941435640208772, "grad_norm": 4.823824999235445, "learning_rate": 2.1407853861226833e-07, "loss": 1.0968, "step": 5858 }, { "epoch": 0.794279129668542, "grad_norm": 3.4524151202598934, "learning_rate": 2.1380708386200075e-07, "loss": 1.0812, "step": 5859 }, { "epoch": 0.7944146953162069, "grad_norm": 4.120077422915767, "learning_rate": 2.1353578072283175e-07, "loss": 1.1048, "step": 5860 }, { "epoch": 0.7945502609638717, "grad_norm": 3.846845733174311, "learning_rate": 2.1326462924707912e-07, "loss": 1.0951, "step": 5861 }, { "epoch": 0.7946858266115366, "grad_norm": 4.000818105662922, "learning_rate": 2.129936294870327e-07, "loss": 1.111, "step": 5862 }, { "epoch": 0.7948213922592016, "grad_norm": 4.697018513103365, "learning_rate": 2.127227814949526e-07, "loss": 1.0925, "step": 5863 }, { "epoch": 0.7949569579068664, "grad_norm": 4.206273822585842, "learning_rate": 2.124520853230697e-07, "loss": 1.1078, "step": 5864 }, { "epoch": 0.7950925235545313, "grad_norm": 4.569219839170414, "learning_rate": 2.1218154102358554e-07, "loss": 1.1166, "step": 5865 }, { "epoch": 0.7952280892021961, "grad_norm": 5.336539775841253, "learning_rate": 2.1191114864867255e-07, "loss": 1.0988, "step": 5866 }, { "epoch": 0.795363654849861, "grad_norm": 6.159178297132528, "learning_rate": 2.1164090825047388e-07, "loss": 1.0663, "step": 5867 }, { "epoch": 0.795499220497526, "grad_norm": 4.277455804946946, "learning_rate": 2.1137081988110294e-07, "loss": 1.1053, "step": 5868 }, { "epoch": 0.7956347861451908, "grad_norm": 10.658421775719649, "learning_rate": 2.1110088359264445e-07, "loss": 1.13, "step": 5869 }, { "epoch": 0.7957703517928557, "grad_norm": 5.150099284795021, "learning_rate": 2.108310994371534e-07, "loss": 1.1038, "step": 5870 }, { "epoch": 0.7959059174405205, "grad_norm": 4.016509713226346, "learning_rate": 2.105614674666556e-07, "loss": 1.0654, "step": 5871 }, { "epoch": 0.7960414830881855, "grad_norm": 3.7775324731098405, "learning_rate": 2.1029198773314693e-07, "loss": 1.114, "step": 5872 }, { "epoch": 0.7961770487358504, "grad_norm": 9.856660421031775, "learning_rate": 2.1002266028859539e-07, "loss": 1.1056, "step": 5873 }, { "epoch": 0.7963126143835152, "grad_norm": 6.041048944162777, "learning_rate": 2.0975348518493762e-07, "loss": 1.0956, "step": 5874 }, { "epoch": 0.7964481800311801, "grad_norm": 7.263057471551958, "learning_rate": 2.094844624740828e-07, "loss": 1.1077, "step": 5875 }, { "epoch": 0.7965837456788449, "grad_norm": 3.5723870121537917, "learning_rate": 2.092155922079093e-07, "loss": 1.1133, "step": 5876 }, { "epoch": 0.7967193113265099, "grad_norm": 6.772818077930999, "learning_rate": 2.0894687443826675e-07, "loss": 1.0941, "step": 5877 }, { "epoch": 0.7968548769741748, "grad_norm": 5.625071877985006, "learning_rate": 2.0867830921697527e-07, "loss": 1.1208, "step": 5878 }, { "epoch": 0.7969904426218396, "grad_norm": 13.257029551317773, "learning_rate": 2.0840989659582552e-07, "loss": 1.049, "step": 5879 }, { "epoch": 0.7971260082695045, "grad_norm": 6.030634157475134, "learning_rate": 2.081416366265787e-07, "loss": 1.0891, "step": 5880 }, { "epoch": 0.7972615739171693, "grad_norm": 4.871070165959265, "learning_rate": 2.078735293609668e-07, "loss": 1.1007, "step": 5881 }, { "epoch": 0.7973971395648343, "grad_norm": 6.697004410958652, "learning_rate": 2.0760557485069208e-07, "loss": 1.0592, "step": 5882 }, { "epoch": 0.7975327052124992, "grad_norm": 5.218720594932422, "learning_rate": 2.073377731474275e-07, "loss": 1.0868, "step": 5883 }, { "epoch": 0.797668270860164, "grad_norm": 6.190702620737587, "learning_rate": 2.0707012430281646e-07, "loss": 1.1141, "step": 5884 }, { "epoch": 0.7978038365078289, "grad_norm": 4.562138832808875, "learning_rate": 2.0680262836847294e-07, "loss": 1.0857, "step": 5885 }, { "epoch": 0.7979394021554937, "grad_norm": 4.52319235046156, "learning_rate": 2.065352853959814e-07, "loss": 1.093, "step": 5886 }, { "epoch": 0.7980749678031587, "grad_norm": 4.544949933175779, "learning_rate": 2.0626809543689682e-07, "loss": 1.0978, "step": 5887 }, { "epoch": 0.7982105334508236, "grad_norm": 5.05902790101396, "learning_rate": 2.0600105854274474e-07, "loss": 1.0636, "step": 5888 }, { "epoch": 0.7983460990984884, "grad_norm": 6.280105582256944, "learning_rate": 2.0573417476502108e-07, "loss": 1.0965, "step": 5889 }, { "epoch": 0.7984816647461533, "grad_norm": 4.859871020824227, "learning_rate": 2.0546744415519223e-07, "loss": 1.0559, "step": 5890 }, { "epoch": 0.7986172303938182, "grad_norm": 7.3982287380681, "learning_rate": 2.052008667646954e-07, "loss": 1.1066, "step": 5891 }, { "epoch": 0.7987527960414831, "grad_norm": 4.609452978879718, "learning_rate": 2.049344426449371e-07, "loss": 1.0428, "step": 5892 }, { "epoch": 0.798888361689148, "grad_norm": 8.237032983019684, "learning_rate": 2.0466817184729624e-07, "loss": 1.0667, "step": 5893 }, { "epoch": 0.7990239273368128, "grad_norm": 6.6232219314546095, "learning_rate": 2.0440205442311987e-07, "loss": 1.0895, "step": 5894 }, { "epoch": 0.7991594929844777, "grad_norm": 5.067668262668996, "learning_rate": 2.041360904237278e-07, "loss": 1.09, "step": 5895 }, { "epoch": 0.7992950586321426, "grad_norm": 6.936092385898765, "learning_rate": 2.0387027990040827e-07, "loss": 1.0825, "step": 5896 }, { "epoch": 0.7994306242798075, "grad_norm": 5.122341649858369, "learning_rate": 2.0360462290442105e-07, "loss": 1.0657, "step": 5897 }, { "epoch": 0.7995661899274724, "grad_norm": 7.801012474487165, "learning_rate": 2.033391194869959e-07, "loss": 1.1228, "step": 5898 }, { "epoch": 0.7997017555751372, "grad_norm": 3.672721011456146, "learning_rate": 2.03073769699333e-07, "loss": 1.1168, "step": 5899 }, { "epoch": 0.7998373212228022, "grad_norm": 3.872096860693514, "learning_rate": 2.0280857359260316e-07, "loss": 1.0853, "step": 5900 }, { "epoch": 0.799972886870467, "grad_norm": 4.555618044238675, "learning_rate": 2.025435312179472e-07, "loss": 1.1183, "step": 5901 }, { "epoch": 0.8001084525181319, "grad_norm": 7.18909345115964, "learning_rate": 2.0227864262647664e-07, "loss": 1.1044, "step": 5902 }, { "epoch": 0.8002440181657968, "grad_norm": 4.677128414207441, "learning_rate": 2.0201390786927286e-07, "loss": 1.0901, "step": 5903 }, { "epoch": 0.8003795838134616, "grad_norm": 5.0832426766878, "learning_rate": 2.017493269973881e-07, "loss": 1.0926, "step": 5904 }, { "epoch": 0.8005151494611266, "grad_norm": 6.1601180859471665, "learning_rate": 2.014849000618446e-07, "loss": 1.0948, "step": 5905 }, { "epoch": 0.8006507151087914, "grad_norm": 7.561650133026337, "learning_rate": 2.012206271136353e-07, "loss": 1.1027, "step": 5906 }, { "epoch": 0.8007862807564563, "grad_norm": 4.7103677910223585, "learning_rate": 2.0095650820372234e-07, "loss": 1.103, "step": 5907 }, { "epoch": 0.8009218464041212, "grad_norm": 8.616013108620153, "learning_rate": 2.006925433830401e-07, "loss": 1.0884, "step": 5908 }, { "epoch": 0.801057412051786, "grad_norm": 7.103926377127475, "learning_rate": 2.0042873270249094e-07, "loss": 1.1025, "step": 5909 }, { "epoch": 0.801192977699451, "grad_norm": 7.842771248821557, "learning_rate": 2.0016507621294975e-07, "loss": 1.0753, "step": 5910 }, { "epoch": 0.8013285433471159, "grad_norm": 6.7528351887517, "learning_rate": 1.9990157396525963e-07, "loss": 1.1055, "step": 5911 }, { "epoch": 0.8014641089947807, "grad_norm": 4.228183744340931, "learning_rate": 1.9963822601023595e-07, "loss": 1.0854, "step": 5912 }, { "epoch": 0.8015996746424456, "grad_norm": 5.517541150861771, "learning_rate": 1.9937503239866205e-07, "loss": 1.1189, "step": 5913 }, { "epoch": 0.8017352402901105, "grad_norm": 4.608685813370202, "learning_rate": 1.9911199318129403e-07, "loss": 1.1005, "step": 5914 }, { "epoch": 0.8018708059377754, "grad_norm": 6.917132372634052, "learning_rate": 1.9884910840885571e-07, "loss": 1.1031, "step": 5915 }, { "epoch": 0.8020063715854403, "grad_norm": 4.493975242096677, "learning_rate": 1.9858637813204349e-07, "loss": 1.0938, "step": 5916 }, { "epoch": 0.8021419372331051, "grad_norm": 5.599528315355499, "learning_rate": 1.983238024015217e-07, "loss": 1.0874, "step": 5917 }, { "epoch": 0.80227750288077, "grad_norm": 7.636619042826409, "learning_rate": 1.9806138126792716e-07, "loss": 1.105, "step": 5918 }, { "epoch": 0.8024130685284349, "grad_norm": 4.532627610496739, "learning_rate": 1.9779911478186485e-07, "loss": 1.1146, "step": 5919 }, { "epoch": 0.8025486341760998, "grad_norm": 6.783201824808772, "learning_rate": 1.9753700299391107e-07, "loss": 1.0737, "step": 5920 }, { "epoch": 0.8026841998237647, "grad_norm": 5.7963984024087365, "learning_rate": 1.9727504595461198e-07, "loss": 1.046, "step": 5921 }, { "epoch": 0.8028197654714295, "grad_norm": 3.9457371126616136, "learning_rate": 1.970132437144839e-07, "loss": 1.1, "step": 5922 }, { "epoch": 0.8029553311190945, "grad_norm": 5.057626160736696, "learning_rate": 1.967515963240135e-07, "loss": 1.0763, "step": 5923 }, { "epoch": 0.8030908967667593, "grad_norm": 6.441198381466982, "learning_rate": 1.9649010383365717e-07, "loss": 1.0984, "step": 5924 }, { "epoch": 0.8032264624144242, "grad_norm": 4.866644987749951, "learning_rate": 1.962287662938419e-07, "loss": 1.0679, "step": 5925 }, { "epoch": 0.8033620280620891, "grad_norm": 4.5922355873612375, "learning_rate": 1.9596758375496435e-07, "loss": 1.0906, "step": 5926 }, { "epoch": 0.8034975937097539, "grad_norm": 4.0081281610056125, "learning_rate": 1.9570655626739176e-07, "loss": 1.0921, "step": 5927 }, { "epoch": 0.8036331593574189, "grad_norm": 7.97787300081433, "learning_rate": 1.9544568388146098e-07, "loss": 1.0807, "step": 5928 }, { "epoch": 0.8037687250050837, "grad_norm": 6.173122410395234, "learning_rate": 1.951849666474793e-07, "loss": 1.0998, "step": 5929 }, { "epoch": 0.8039042906527486, "grad_norm": 3.329893714301808, "learning_rate": 1.9492440461572401e-07, "loss": 1.0807, "step": 5930 }, { "epoch": 0.8040398563004135, "grad_norm": 10.3604141584116, "learning_rate": 1.9466399783644249e-07, "loss": 1.0909, "step": 5931 }, { "epoch": 0.8041754219480783, "grad_norm": 8.587974796311043, "learning_rate": 1.9440374635985224e-07, "loss": 1.0751, "step": 5932 }, { "epoch": 0.8043109875957433, "grad_norm": 4.985545658161185, "learning_rate": 1.941436502361402e-07, "loss": 1.1176, "step": 5933 }, { "epoch": 0.8044465532434081, "grad_norm": 6.767365621954511, "learning_rate": 1.9388370951546428e-07, "loss": 1.0982, "step": 5934 }, { "epoch": 0.804582118891073, "grad_norm": 8.44607749391104, "learning_rate": 1.9362392424795183e-07, "loss": 1.1105, "step": 5935 }, { "epoch": 0.8047176845387379, "grad_norm": 5.063805034489452, "learning_rate": 1.933642944837004e-07, "loss": 1.0938, "step": 5936 }, { "epoch": 0.8048532501864027, "grad_norm": 13.609293869781583, "learning_rate": 1.9310482027277763e-07, "loss": 1.0846, "step": 5937 }, { "epoch": 0.8049888158340677, "grad_norm": 6.371651321472826, "learning_rate": 1.9284550166522108e-07, "loss": 1.0898, "step": 5938 }, { "epoch": 0.8051243814817325, "grad_norm": 10.238590307921642, "learning_rate": 1.9258633871103814e-07, "loss": 1.1068, "step": 5939 }, { "epoch": 0.8052599471293974, "grad_norm": 3.6958090391990184, "learning_rate": 1.923273314602065e-07, "loss": 1.1001, "step": 5940 }, { "epoch": 0.8053955127770623, "grad_norm": 4.902220135977056, "learning_rate": 1.920684799626736e-07, "loss": 1.0734, "step": 5941 }, { "epoch": 0.8055310784247272, "grad_norm": 8.097974175988586, "learning_rate": 1.9180978426835693e-07, "loss": 1.1002, "step": 5942 }, { "epoch": 0.8056666440723921, "grad_norm": 7.946918297187738, "learning_rate": 1.9155124442714387e-07, "loss": 1.1281, "step": 5943 }, { "epoch": 0.8058022097200569, "grad_norm": 5.598372590226196, "learning_rate": 1.912928604888918e-07, "loss": 1.0754, "step": 5944 }, { "epoch": 0.8059377753677218, "grad_norm": 4.233745805450167, "learning_rate": 1.91034632503428e-07, "loss": 1.0901, "step": 5945 }, { "epoch": 0.8060733410153867, "grad_norm": 4.549925447270411, "learning_rate": 1.907765605205498e-07, "loss": 1.1274, "step": 5946 }, { "epoch": 0.8062089066630516, "grad_norm": 4.419354914068982, "learning_rate": 1.9051864459002454e-07, "loss": 1.1076, "step": 5947 }, { "epoch": 0.8063444723107165, "grad_norm": 4.732557861684577, "learning_rate": 1.9026088476158851e-07, "loss": 1.0687, "step": 5948 }, { "epoch": 0.8064800379583813, "grad_norm": 5.984094897536739, "learning_rate": 1.9000328108494967e-07, "loss": 1.0737, "step": 5949 }, { "epoch": 0.8066156036060462, "grad_norm": 5.648347500018019, "learning_rate": 1.897458336097838e-07, "loss": 1.1547, "step": 5950 }, { "epoch": 0.8067511692537112, "grad_norm": 10.42716300061308, "learning_rate": 1.8948854238573874e-07, "loss": 1.1058, "step": 5951 }, { "epoch": 0.806886734901376, "grad_norm": 5.939744421925454, "learning_rate": 1.8923140746242994e-07, "loss": 1.1406, "step": 5952 }, { "epoch": 0.8070223005490409, "grad_norm": 4.7957803209777445, "learning_rate": 1.8897442888944492e-07, "loss": 1.1374, "step": 5953 }, { "epoch": 0.8071578661967057, "grad_norm": 10.13472723301925, "learning_rate": 1.8871760671633895e-07, "loss": 1.0358, "step": 5954 }, { "epoch": 0.8072934318443706, "grad_norm": 4.3852775232180745, "learning_rate": 1.884609409926391e-07, "loss": 1.0924, "step": 5955 }, { "epoch": 0.8074289974920356, "grad_norm": 3.691090888557621, "learning_rate": 1.882044317678404e-07, "loss": 1.0986, "step": 5956 }, { "epoch": 0.8075645631397004, "grad_norm": 49.28238867430964, "learning_rate": 1.8794807909140963e-07, "loss": 1.1199, "step": 5957 }, { "epoch": 0.8077001287873653, "grad_norm": 8.204488180915275, "learning_rate": 1.8769188301278126e-07, "loss": 1.1504, "step": 5958 }, { "epoch": 0.8078356944350301, "grad_norm": 6.689201286896421, "learning_rate": 1.8743584358136188e-07, "loss": 1.0808, "step": 5959 }, { "epoch": 0.807971260082695, "grad_norm": 4.014812860034225, "learning_rate": 1.8717996084652587e-07, "loss": 1.1208, "step": 5960 }, { "epoch": 0.80810682573036, "grad_norm": 6.383866530738087, "learning_rate": 1.8692423485761833e-07, "loss": 1.0666, "step": 5961 }, { "epoch": 0.8082423913780248, "grad_norm": 4.599181812201494, "learning_rate": 1.86668665663954e-07, "loss": 1.0642, "step": 5962 }, { "epoch": 0.8083779570256897, "grad_norm": 6.5308882722753205, "learning_rate": 1.8641325331481762e-07, "loss": 1.111, "step": 5963 }, { "epoch": 0.8085135226733545, "grad_norm": 4.327198921852239, "learning_rate": 1.861579978594632e-07, "loss": 1.0736, "step": 5964 }, { "epoch": 0.8086490883210194, "grad_norm": 3.9768209532766634, "learning_rate": 1.859028993471148e-07, "loss": 1.0844, "step": 5965 }, { "epoch": 0.8087846539686844, "grad_norm": 4.940830590022097, "learning_rate": 1.8564795782696607e-07, "loss": 1.1083, "step": 5966 }, { "epoch": 0.8089202196163492, "grad_norm": 13.441639371977137, "learning_rate": 1.8539317334818072e-07, "loss": 1.0712, "step": 5967 }, { "epoch": 0.8090557852640141, "grad_norm": 5.244693278789301, "learning_rate": 1.8513854595989198e-07, "loss": 1.0745, "step": 5968 }, { "epoch": 0.8091913509116789, "grad_norm": 4.497113691074843, "learning_rate": 1.848840757112019e-07, "loss": 1.0807, "step": 5969 }, { "epoch": 0.8093269165593439, "grad_norm": 3.666309208718205, "learning_rate": 1.8462976265118436e-07, "loss": 1.1047, "step": 5970 }, { "epoch": 0.8094624822070088, "grad_norm": 8.728013622040574, "learning_rate": 1.8437560682888043e-07, "loss": 1.09, "step": 5971 }, { "epoch": 0.8095980478546736, "grad_norm": 4.082305569044029, "learning_rate": 1.8412160829330304e-07, "loss": 1.1077, "step": 5972 }, { "epoch": 0.8097336135023385, "grad_norm": 4.079485501612253, "learning_rate": 1.8386776709343278e-07, "loss": 1.0329, "step": 5973 }, { "epoch": 0.8098691791500033, "grad_norm": 5.019251706263421, "learning_rate": 1.8361408327822203e-07, "loss": 1.0937, "step": 5974 }, { "epoch": 0.8100047447976683, "grad_norm": 5.180164197417773, "learning_rate": 1.8336055689659091e-07, "loss": 1.1045, "step": 5975 }, { "epoch": 0.8101403104453332, "grad_norm": 4.782619162831452, "learning_rate": 1.831071879974302e-07, "loss": 1.1055, "step": 5976 }, { "epoch": 0.810275876092998, "grad_norm": 12.479986305698135, "learning_rate": 1.8285397662960022e-07, "loss": 1.1022, "step": 5977 }, { "epoch": 0.8104114417406629, "grad_norm": 4.067135638736605, "learning_rate": 1.8260092284193062e-07, "loss": 1.091, "step": 5978 }, { "epoch": 0.8105470073883277, "grad_norm": 9.559012507926802, "learning_rate": 1.823480266832209e-07, "loss": 1.1121, "step": 5979 }, { "epoch": 0.8106825730359927, "grad_norm": 4.745742481555867, "learning_rate": 1.8209528820224008e-07, "loss": 1.1113, "step": 5980 }, { "epoch": 0.8108181386836576, "grad_norm": 4.3186869987396905, "learning_rate": 1.8184270744772678e-07, "loss": 1.0899, "step": 5981 }, { "epoch": 0.8109537043313224, "grad_norm": 5.041064556280628, "learning_rate": 1.815902844683892e-07, "loss": 1.1037, "step": 5982 }, { "epoch": 0.8110892699789873, "grad_norm": 12.550427353057929, "learning_rate": 1.8133801931290516e-07, "loss": 1.093, "step": 5983 }, { "epoch": 0.8112248356266522, "grad_norm": 4.3007791668121635, "learning_rate": 1.8108591202992195e-07, "loss": 1.1197, "step": 5984 }, { "epoch": 0.8113604012743171, "grad_norm": 7.016067669316853, "learning_rate": 1.808339626680565e-07, "loss": 1.088, "step": 5985 }, { "epoch": 0.811495966921982, "grad_norm": 8.71829076582116, "learning_rate": 1.8058217127589526e-07, "loss": 1.0713, "step": 5986 }, { "epoch": 0.8116315325696468, "grad_norm": 5.029942480841334, "learning_rate": 1.8033053790199415e-07, "loss": 1.1479, "step": 5987 }, { "epoch": 0.8117670982173117, "grad_norm": 5.338208309904585, "learning_rate": 1.8007906259487904e-07, "loss": 1.0713, "step": 5988 }, { "epoch": 0.8119026638649767, "grad_norm": 13.6920694033676, "learning_rate": 1.7982774540304402e-07, "loss": 1.0675, "step": 5989 }, { "epoch": 0.8120382295126415, "grad_norm": 4.786575359252179, "learning_rate": 1.7957658637495488e-07, "loss": 1.0916, "step": 5990 }, { "epoch": 0.8121737951603064, "grad_norm": 5.292752975830434, "learning_rate": 1.7932558555904453e-07, "loss": 1.0578, "step": 5991 }, { "epoch": 0.8123093608079712, "grad_norm": 8.557145449728068, "learning_rate": 1.790747430037174e-07, "loss": 1.0954, "step": 5992 }, { "epoch": 0.8124449264556362, "grad_norm": 4.6237802631480305, "learning_rate": 1.7882405875734564e-07, "loss": 1.1223, "step": 5993 }, { "epoch": 0.8125804921033011, "grad_norm": 4.321934073385472, "learning_rate": 1.785735328682727e-07, "loss": 1.0714, "step": 5994 }, { "epoch": 0.8127160577509659, "grad_norm": 3.773005047205251, "learning_rate": 1.7832316538480973e-07, "loss": 1.0802, "step": 5995 }, { "epoch": 0.8128516233986308, "grad_norm": 5.660215775848428, "learning_rate": 1.7807295635523845e-07, "loss": 1.0531, "step": 5996 }, { "epoch": 0.8129871890462956, "grad_norm": 5.012782660816186, "learning_rate": 1.7782290582780958e-07, "loss": 1.1076, "step": 5997 }, { "epoch": 0.8131227546939606, "grad_norm": 4.695125912972874, "learning_rate": 1.7757301385074342e-07, "loss": 1.0438, "step": 5998 }, { "epoch": 0.8132583203416255, "grad_norm": 4.830248328504129, "learning_rate": 1.7732328047222978e-07, "loss": 1.0883, "step": 5999 }, { "epoch": 0.8133938859892903, "grad_norm": 4.0830141221114955, "learning_rate": 1.7707370574042769e-07, "loss": 1.0757, "step": 6000 }, { "epoch": 0.8135294516369552, "grad_norm": 8.570483023191311, "learning_rate": 1.7682428970346553e-07, "loss": 1.1461, "step": 6001 }, { "epoch": 0.81366501728462, "grad_norm": 6.159498151730787, "learning_rate": 1.765750324094415e-07, "loss": 1.1071, "step": 6002 }, { "epoch": 0.813800582932285, "grad_norm": 4.718531441490299, "learning_rate": 1.763259339064226e-07, "loss": 1.104, "step": 6003 }, { "epoch": 0.8139361485799499, "grad_norm": 5.034144541354849, "learning_rate": 1.7607699424244582e-07, "loss": 1.1217, "step": 6004 }, { "epoch": 0.8140717142276147, "grad_norm": 6.288741030029713, "learning_rate": 1.7582821346551711e-07, "loss": 1.0732, "step": 6005 }, { "epoch": 0.8142072798752796, "grad_norm": 10.925041044866072, "learning_rate": 1.7557959162361148e-07, "loss": 1.0619, "step": 6006 }, { "epoch": 0.8143428455229444, "grad_norm": 6.0796112741987915, "learning_rate": 1.753311287646745e-07, "loss": 1.154, "step": 6007 }, { "epoch": 0.8144784111706094, "grad_norm": 4.16442143840779, "learning_rate": 1.7508282493661918e-07, "loss": 1.0883, "step": 6008 }, { "epoch": 0.8146139768182743, "grad_norm": 8.419095540957786, "learning_rate": 1.7483468018733017e-07, "loss": 1.0666, "step": 6009 }, { "epoch": 0.8147495424659391, "grad_norm": 4.759389733380771, "learning_rate": 1.7458669456465914e-07, "loss": 1.0324, "step": 6010 }, { "epoch": 0.814885108113604, "grad_norm": 5.172324016477012, "learning_rate": 1.7433886811642916e-07, "loss": 1.0884, "step": 6011 }, { "epoch": 0.8150206737612689, "grad_norm": 6.402337355635893, "learning_rate": 1.740912008904305e-07, "loss": 1.0964, "step": 6012 }, { "epoch": 0.8151562394089338, "grad_norm": 8.454828192994585, "learning_rate": 1.7384369293442501e-07, "loss": 1.1072, "step": 6013 }, { "epoch": 0.8152918050565987, "grad_norm": 4.557928113068966, "learning_rate": 1.7359634429614145e-07, "loss": 1.07, "step": 6014 }, { "epoch": 0.8154273707042635, "grad_norm": 4.1717136282366205, "learning_rate": 1.7334915502328028e-07, "loss": 1.129, "step": 6015 }, { "epoch": 0.8155629363519284, "grad_norm": 4.606592413879796, "learning_rate": 1.7310212516350908e-07, "loss": 1.1151, "step": 6016 }, { "epoch": 0.8156985019995933, "grad_norm": 4.550516799419651, "learning_rate": 1.7285525476446594e-07, "loss": 1.069, "step": 6017 }, { "epoch": 0.8158340676472582, "grad_norm": 3.9723497673614987, "learning_rate": 1.7260854387375778e-07, "loss": 1.0606, "step": 6018 }, { "epoch": 0.8159696332949231, "grad_norm": 6.149795735256934, "learning_rate": 1.7236199253896089e-07, "loss": 1.0845, "step": 6019 }, { "epoch": 0.8161051989425879, "grad_norm": 5.665229987268497, "learning_rate": 1.7211560080762078e-07, "loss": 1.108, "step": 6020 }, { "epoch": 0.8162407645902529, "grad_norm": 5.579978652386997, "learning_rate": 1.718693687272521e-07, "loss": 1.0784, "step": 6021 }, { "epoch": 0.8163763302379177, "grad_norm": 5.668774564434714, "learning_rate": 1.716232963453389e-07, "loss": 1.1363, "step": 6022 }, { "epoch": 0.8165118958855826, "grad_norm": 5.027919585325825, "learning_rate": 1.7137738370933408e-07, "loss": 1.1068, "step": 6023 }, { "epoch": 0.8166474615332475, "grad_norm": 5.163646364864996, "learning_rate": 1.7113163086666016e-07, "loss": 1.1044, "step": 6024 }, { "epoch": 0.8167830271809123, "grad_norm": 4.2390869429756695, "learning_rate": 1.7088603786470845e-07, "loss": 1.0661, "step": 6025 }, { "epoch": 0.8169185928285773, "grad_norm": 5.959822403542378, "learning_rate": 1.7064060475083975e-07, "loss": 1.1015, "step": 6026 }, { "epoch": 0.8170541584762421, "grad_norm": 5.336159833501374, "learning_rate": 1.7039533157238394e-07, "loss": 1.0609, "step": 6027 }, { "epoch": 0.817189724123907, "grad_norm": 6.004209509413609, "learning_rate": 1.7015021837663979e-07, "loss": 1.0927, "step": 6028 }, { "epoch": 0.8173252897715719, "grad_norm": 4.707093012539199, "learning_rate": 1.6990526521087567e-07, "loss": 1.1038, "step": 6029 }, { "epoch": 0.8174608554192367, "grad_norm": 4.718860457979358, "learning_rate": 1.696604721223288e-07, "loss": 1.084, "step": 6030 }, { "epoch": 0.8175964210669017, "grad_norm": 4.752601181998204, "learning_rate": 1.6941583915820578e-07, "loss": 1.1234, "step": 6031 }, { "epoch": 0.8177319867145665, "grad_norm": 4.0137262250234516, "learning_rate": 1.6917136636568176e-07, "loss": 1.0987, "step": 6032 }, { "epoch": 0.8178675523622314, "grad_norm": 7.074588825882387, "learning_rate": 1.6892705379190153e-07, "loss": 1.0598, "step": 6033 }, { "epoch": 0.8180031180098963, "grad_norm": 6.384321112072404, "learning_rate": 1.6868290148397878e-07, "loss": 1.0995, "step": 6034 }, { "epoch": 0.8181386836575611, "grad_norm": 5.2693814815254605, "learning_rate": 1.6843890948899665e-07, "loss": 1.1484, "step": 6035 }, { "epoch": 0.8182742493052261, "grad_norm": 7.814434564210597, "learning_rate": 1.6819507785400677e-07, "loss": 1.1384, "step": 6036 }, { "epoch": 0.8184098149528909, "grad_norm": 7.37605949278328, "learning_rate": 1.6795140662603026e-07, "loss": 1.1175, "step": 6037 }, { "epoch": 0.8185453806005558, "grad_norm": 3.6851869055289077, "learning_rate": 1.6770789585205725e-07, "loss": 1.0786, "step": 6038 }, { "epoch": 0.8186809462482207, "grad_norm": 6.277374130151843, "learning_rate": 1.6746454557904677e-07, "loss": 1.0407, "step": 6039 }, { "epoch": 0.8188165118958856, "grad_norm": 4.554024718465152, "learning_rate": 1.6722135585392706e-07, "loss": 1.1139, "step": 6040 }, { "epoch": 0.8189520775435505, "grad_norm": 4.35996426758938, "learning_rate": 1.6697832672359525e-07, "loss": 1.1076, "step": 6041 }, { "epoch": 0.8190876431912153, "grad_norm": 4.105713073406539, "learning_rate": 1.6673545823491774e-07, "loss": 1.094, "step": 6042 }, { "epoch": 0.8192232088388802, "grad_norm": 6.3216639383462665, "learning_rate": 1.6649275043472965e-07, "loss": 1.1056, "step": 6043 }, { "epoch": 0.8193587744865451, "grad_norm": 5.073837145556742, "learning_rate": 1.6625020336983565e-07, "loss": 1.0722, "step": 6044 }, { "epoch": 0.81949434013421, "grad_norm": 12.006354860242114, "learning_rate": 1.6600781708700816e-07, "loss": 1.0861, "step": 6045 }, { "epoch": 0.8196299057818749, "grad_norm": 7.913080474074769, "learning_rate": 1.6576559163299053e-07, "loss": 1.0704, "step": 6046 }, { "epoch": 0.8197654714295397, "grad_norm": 4.691695496017785, "learning_rate": 1.6552352705449302e-07, "loss": 1.0819, "step": 6047 }, { "epoch": 0.8199010370772046, "grad_norm": 20.68337342612684, "learning_rate": 1.6528162339819685e-07, "loss": 1.07, "step": 6048 }, { "epoch": 0.8200366027248696, "grad_norm": 5.572687303382168, "learning_rate": 1.6503988071075026e-07, "loss": 1.1045, "step": 6049 }, { "epoch": 0.8201721683725344, "grad_norm": 4.988924885895556, "learning_rate": 1.647982990387724e-07, "loss": 1.0673, "step": 6050 }, { "epoch": 0.8203077340201993, "grad_norm": 4.309790074875189, "learning_rate": 1.6455687842884936e-07, "loss": 1.046, "step": 6051 }, { "epoch": 0.8204432996678641, "grad_norm": 4.099563441232673, "learning_rate": 1.643156189275382e-07, "loss": 1.1448, "step": 6052 }, { "epoch": 0.820578865315529, "grad_norm": 8.814397839052097, "learning_rate": 1.6407452058136294e-07, "loss": 1.0824, "step": 6053 }, { "epoch": 0.820714430963194, "grad_norm": 6.4502411534107305, "learning_rate": 1.6383358343681852e-07, "loss": 1.1378, "step": 6054 }, { "epoch": 0.8208499966108588, "grad_norm": 5.912188256834623, "learning_rate": 1.6359280754036675e-07, "loss": 1.0845, "step": 6055 }, { "epoch": 0.8209855622585237, "grad_norm": 8.001136080453156, "learning_rate": 1.6335219293844038e-07, "loss": 1.0807, "step": 6056 }, { "epoch": 0.8211211279061885, "grad_norm": 6.38108900112304, "learning_rate": 1.6311173967743918e-07, "loss": 1.1307, "step": 6057 }, { "epoch": 0.8212566935538534, "grad_norm": 4.406627366951647, "learning_rate": 1.6287144780373308e-07, "loss": 1.1087, "step": 6058 }, { "epoch": 0.8213922592015184, "grad_norm": 4.316120126218616, "learning_rate": 1.6263131736366032e-07, "loss": 1.1156, "step": 6059 }, { "epoch": 0.8215278248491832, "grad_norm": 5.159512050153321, "learning_rate": 1.623913484035282e-07, "loss": 1.0917, "step": 6060 }, { "epoch": 0.8216633904968481, "grad_norm": 8.354916402850085, "learning_rate": 1.6215154096961292e-07, "loss": 1.0847, "step": 6061 }, { "epoch": 0.8217989561445129, "grad_norm": 21.861795287085222, "learning_rate": 1.619118951081594e-07, "loss": 1.0967, "step": 6062 }, { "epoch": 0.8219345217921779, "grad_norm": 3.78564337733093, "learning_rate": 1.616724108653813e-07, "loss": 1.0827, "step": 6063 }, { "epoch": 0.8220700874398428, "grad_norm": 4.606746485098543, "learning_rate": 1.614330882874616e-07, "loss": 1.1126, "step": 6064 }, { "epoch": 0.8222056530875076, "grad_norm": 5.059810239482121, "learning_rate": 1.611939274205515e-07, "loss": 1.1101, "step": 6065 }, { "epoch": 0.8223412187351725, "grad_norm": 4.564067870511866, "learning_rate": 1.6095492831077128e-07, "loss": 1.1105, "step": 6066 }, { "epoch": 0.8224767843828373, "grad_norm": 6.243165453601421, "learning_rate": 1.6071609100421048e-07, "loss": 1.0814, "step": 6067 }, { "epoch": 0.8226123500305023, "grad_norm": 4.939177640950315, "learning_rate": 1.6047741554692606e-07, "loss": 1.1347, "step": 6068 }, { "epoch": 0.8227479156781672, "grad_norm": 4.061749845162622, "learning_rate": 1.6023890198494584e-07, "loss": 1.1177, "step": 6069 }, { "epoch": 0.822883481325832, "grad_norm": 4.2170880226086815, "learning_rate": 1.6000055036426407e-07, "loss": 1.0962, "step": 6070 }, { "epoch": 0.8230190469734969, "grad_norm": 6.128787834211982, "learning_rate": 1.5976236073084627e-07, "loss": 1.0605, "step": 6071 }, { "epoch": 0.8231546126211619, "grad_norm": 3.7880789808973736, "learning_rate": 1.595243331306244e-07, "loss": 1.1311, "step": 6072 }, { "epoch": 0.8232901782688267, "grad_norm": 4.029062461875393, "learning_rate": 1.592864676095006e-07, "loss": 1.0885, "step": 6073 }, { "epoch": 0.8234257439164916, "grad_norm": 3.9177881846725984, "learning_rate": 1.5904876421334534e-07, "loss": 1.0575, "step": 6074 }, { "epoch": 0.8235613095641564, "grad_norm": 4.196531887319385, "learning_rate": 1.5881122298799788e-07, "loss": 1.099, "step": 6075 }, { "epoch": 0.8236968752118213, "grad_norm": 5.2213235537584195, "learning_rate": 1.585738439792661e-07, "loss": 1.1034, "step": 6076 }, { "epoch": 0.8238324408594863, "grad_norm": 4.568699006077439, "learning_rate": 1.5833662723292662e-07, "loss": 1.0849, "step": 6077 }, { "epoch": 0.8239680065071511, "grad_norm": 8.017244411516176, "learning_rate": 1.5809957279472496e-07, "loss": 1.0982, "step": 6078 }, { "epoch": 0.824103572154816, "grad_norm": 7.063299894056158, "learning_rate": 1.578626807103751e-07, "loss": 1.0647, "step": 6079 }, { "epoch": 0.8242391378024808, "grad_norm": 4.416718314494453, "learning_rate": 1.5762595102555987e-07, "loss": 1.0831, "step": 6080 }, { "epoch": 0.8243747034501457, "grad_norm": 5.5514941259142, "learning_rate": 1.5738938378593068e-07, "loss": 1.1166, "step": 6081 }, { "epoch": 0.8245102690978107, "grad_norm": 4.515075148874208, "learning_rate": 1.5715297903710767e-07, "loss": 1.0934, "step": 6082 }, { "epoch": 0.8246458347454755, "grad_norm": 5.171182156339941, "learning_rate": 1.5691673682467967e-07, "loss": 1.0989, "step": 6083 }, { "epoch": 0.8247814003931404, "grad_norm": 5.079015473578724, "learning_rate": 1.5668065719420398e-07, "loss": 1.1157, "step": 6084 }, { "epoch": 0.8249169660408052, "grad_norm": 4.027972822271686, "learning_rate": 1.564447401912069e-07, "loss": 1.0604, "step": 6085 }, { "epoch": 0.8250525316884701, "grad_norm": 3.6203490662565683, "learning_rate": 1.5620898586118292e-07, "loss": 1.1015, "step": 6086 }, { "epoch": 0.8251880973361351, "grad_norm": 14.833045651891478, "learning_rate": 1.5597339424959588e-07, "loss": 1.1095, "step": 6087 }, { "epoch": 0.8253236629837999, "grad_norm": 10.734561888153303, "learning_rate": 1.557379654018769e-07, "loss": 1.1121, "step": 6088 }, { "epoch": 0.8254592286314648, "grad_norm": 6.120959758232375, "learning_rate": 1.555026993634275e-07, "loss": 1.063, "step": 6089 }, { "epoch": 0.8255947942791296, "grad_norm": 4.992793965602128, "learning_rate": 1.5526759617961614e-07, "loss": 1.104, "step": 6090 }, { "epoch": 0.8257303599267946, "grad_norm": 5.617082594713395, "learning_rate": 1.5503265589578128e-07, "loss": 1.1524, "step": 6091 }, { "epoch": 0.8258659255744595, "grad_norm": 4.532818820001655, "learning_rate": 1.5479787855722858e-07, "loss": 1.0535, "step": 6092 }, { "epoch": 0.8260014912221243, "grad_norm": 4.540198389180418, "learning_rate": 1.5456326420923382e-07, "loss": 1.0947, "step": 6093 }, { "epoch": 0.8261370568697892, "grad_norm": 5.595358274716888, "learning_rate": 1.543288128970399e-07, "loss": 1.1044, "step": 6094 }, { "epoch": 0.826272622517454, "grad_norm": 5.20255732682625, "learning_rate": 1.5409452466585903e-07, "loss": 1.1105, "step": 6095 }, { "epoch": 0.826408188165119, "grad_norm": 10.048056843018454, "learning_rate": 1.5386039956087194e-07, "loss": 1.0972, "step": 6096 }, { "epoch": 0.8265437538127839, "grad_norm": 5.451413593353702, "learning_rate": 1.5362643762722782e-07, "loss": 1.0716, "step": 6097 }, { "epoch": 0.8266793194604487, "grad_norm": 4.947262963008348, "learning_rate": 1.5339263891004427e-07, "loss": 1.1132, "step": 6098 }, { "epoch": 0.8268148851081136, "grad_norm": 6.287624443489907, "learning_rate": 1.5315900345440757e-07, "loss": 1.1085, "step": 6099 }, { "epoch": 0.8269504507557784, "grad_norm": 7.154391766675998, "learning_rate": 1.5292553130537255e-07, "loss": 1.0664, "step": 6100 }, { "epoch": 0.8270860164034434, "grad_norm": 5.423231425211706, "learning_rate": 1.526922225079623e-07, "loss": 1.0949, "step": 6101 }, { "epoch": 0.8272215820511083, "grad_norm": 5.13904335499947, "learning_rate": 1.524590771071691e-07, "loss": 1.1035, "step": 6102 }, { "epoch": 0.8273571476987731, "grad_norm": 4.22132553615924, "learning_rate": 1.5222609514795225e-07, "loss": 1.0991, "step": 6103 }, { "epoch": 0.827492713346438, "grad_norm": 6.974748460946203, "learning_rate": 1.5199327667524154e-07, "loss": 1.1066, "step": 6104 }, { "epoch": 0.8276282789941029, "grad_norm": 3.9730239324290073, "learning_rate": 1.5176062173393312e-07, "loss": 1.0592, "step": 6105 }, { "epoch": 0.8277638446417678, "grad_norm": 5.2793054564080935, "learning_rate": 1.5152813036889378e-07, "loss": 1.0931, "step": 6106 }, { "epoch": 0.8278994102894327, "grad_norm": 4.009581457996126, "learning_rate": 1.5129580262495656e-07, "loss": 1.109, "step": 6107 }, { "epoch": 0.8280349759370975, "grad_norm": 6.704828639179938, "learning_rate": 1.5106363854692493e-07, "loss": 1.137, "step": 6108 }, { "epoch": 0.8281705415847624, "grad_norm": 5.4380786950834406, "learning_rate": 1.5083163817956913e-07, "loss": 1.0865, "step": 6109 }, { "epoch": 0.8283061072324273, "grad_norm": 4.913090652378196, "learning_rate": 1.5059980156762942e-07, "loss": 1.0821, "step": 6110 }, { "epoch": 0.8284416728800922, "grad_norm": 6.457356099728259, "learning_rate": 1.5036812875581274e-07, "loss": 1.0705, "step": 6111 }, { "epoch": 0.8285772385277571, "grad_norm": 4.4028063160299, "learning_rate": 1.5013661978879632e-07, "loss": 1.1102, "step": 6112 }, { "epoch": 0.8287128041754219, "grad_norm": 3.795179738003743, "learning_rate": 1.4990527471122382e-07, "loss": 1.0687, "step": 6113 }, { "epoch": 0.8288483698230869, "grad_norm": 4.564539294869655, "learning_rate": 1.4967409356770945e-07, "loss": 1.1151, "step": 6114 }, { "epoch": 0.8289839354707517, "grad_norm": 10.532769394913316, "learning_rate": 1.4944307640283382e-07, "loss": 1.0991, "step": 6115 }, { "epoch": 0.8291195011184166, "grad_norm": 4.60179417050778, "learning_rate": 1.4921222326114692e-07, "loss": 1.114, "step": 6116 }, { "epoch": 0.8292550667660815, "grad_norm": 8.762683130785716, "learning_rate": 1.4898153418716708e-07, "loss": 1.1156, "step": 6117 }, { "epoch": 0.8293906324137463, "grad_norm": 4.675711593712247, "learning_rate": 1.4875100922538087e-07, "loss": 1.0368, "step": 6118 }, { "epoch": 0.8295261980614113, "grad_norm": 4.877092278454099, "learning_rate": 1.4852064842024325e-07, "loss": 1.0949, "step": 6119 }, { "epoch": 0.8296617637090761, "grad_norm": 5.601079074279611, "learning_rate": 1.4829045181617727e-07, "loss": 1.1004, "step": 6120 }, { "epoch": 0.829797329356741, "grad_norm": 8.225799990903644, "learning_rate": 1.4806041945757474e-07, "loss": 1.1341, "step": 6121 }, { "epoch": 0.8299328950044059, "grad_norm": 6.248441799482834, "learning_rate": 1.4783055138879562e-07, "loss": 1.1161, "step": 6122 }, { "epoch": 0.8300684606520707, "grad_norm": 5.620128728340819, "learning_rate": 1.476008476541679e-07, "loss": 1.1422, "step": 6123 }, { "epoch": 0.8302040262997357, "grad_norm": 5.071835845063034, "learning_rate": 1.473713082979884e-07, "loss": 1.0561, "step": 6124 }, { "epoch": 0.8303395919474005, "grad_norm": 4.127962912863783, "learning_rate": 1.4714193336452174e-07, "loss": 1.0773, "step": 6125 }, { "epoch": 0.8304751575950654, "grad_norm": 5.81223701630325, "learning_rate": 1.4691272289800115e-07, "loss": 1.0762, "step": 6126 }, { "epoch": 0.8306107232427303, "grad_norm": 4.781284455586856, "learning_rate": 1.4668367694262817e-07, "loss": 1.1316, "step": 6127 }, { "epoch": 0.8307462888903951, "grad_norm": 5.096110049895988, "learning_rate": 1.4645479554257267e-07, "loss": 1.0877, "step": 6128 }, { "epoch": 0.8308818545380601, "grad_norm": 5.518573436102921, "learning_rate": 1.4622607874197214e-07, "loss": 1.0932, "step": 6129 }, { "epoch": 0.8310174201857249, "grad_norm": 8.278727770794282, "learning_rate": 1.4599752658493304e-07, "loss": 1.0957, "step": 6130 }, { "epoch": 0.8311529858333898, "grad_norm": 4.602933649315932, "learning_rate": 1.457691391155298e-07, "loss": 1.1057, "step": 6131 }, { "epoch": 0.8312885514810547, "grad_norm": 5.094609446139635, "learning_rate": 1.4554091637780518e-07, "loss": 1.1193, "step": 6132 }, { "epoch": 0.8314241171287196, "grad_norm": 5.7925522438129535, "learning_rate": 1.4531285841577024e-07, "loss": 1.0991, "step": 6133 }, { "epoch": 0.8315596827763845, "grad_norm": 6.620928250548275, "learning_rate": 1.4508496527340398e-07, "loss": 1.0356, "step": 6134 }, { "epoch": 0.8316952484240493, "grad_norm": 3.4528851928400384, "learning_rate": 1.448572369946539e-07, "loss": 1.1016, "step": 6135 }, { "epoch": 0.8318308140717142, "grad_norm": 4.12127365280749, "learning_rate": 1.446296736234356e-07, "loss": 1.0733, "step": 6136 }, { "epoch": 0.8319663797193791, "grad_norm": 4.114104488428055, "learning_rate": 1.444022752036328e-07, "loss": 1.1247, "step": 6137 }, { "epoch": 0.832101945367044, "grad_norm": 5.925919884565816, "learning_rate": 1.4417504177909767e-07, "loss": 1.0918, "step": 6138 }, { "epoch": 0.8322375110147089, "grad_norm": 4.395268602760095, "learning_rate": 1.4394797339365017e-07, "loss": 1.0924, "step": 6139 }, { "epoch": 0.8323730766623737, "grad_norm": 4.285966438287784, "learning_rate": 1.437210700910787e-07, "loss": 1.1131, "step": 6140 }, { "epoch": 0.8325086423100386, "grad_norm": 13.583080020079372, "learning_rate": 1.4349433191513994e-07, "loss": 1.1098, "step": 6141 }, { "epoch": 0.8326442079577036, "grad_norm": 3.77477179837348, "learning_rate": 1.4326775890955833e-07, "loss": 1.0916, "step": 6142 }, { "epoch": 0.8327797736053684, "grad_norm": 11.31839662406937, "learning_rate": 1.4304135111802707e-07, "loss": 1.1163, "step": 6143 }, { "epoch": 0.8329153392530333, "grad_norm": 4.985654693388461, "learning_rate": 1.4281510858420632e-07, "loss": 1.0985, "step": 6144 }, { "epoch": 0.8330509049006981, "grad_norm": 24.126753103118848, "learning_rate": 1.4258903135172605e-07, "loss": 1.1215, "step": 6145 }, { "epoch": 0.833186470548363, "grad_norm": 6.932971863473776, "learning_rate": 1.423631194641828e-07, "loss": 1.0643, "step": 6146 }, { "epoch": 0.833322036196028, "grad_norm": 6.163719239546204, "learning_rate": 1.421373729651425e-07, "loss": 1.0749, "step": 6147 }, { "epoch": 0.8334576018436928, "grad_norm": 4.3849090773670865, "learning_rate": 1.4191179189813796e-07, "loss": 1.0548, "step": 6148 }, { "epoch": 0.8335931674913577, "grad_norm": 4.838181165291054, "learning_rate": 1.4168637630667135e-07, "loss": 1.1074, "step": 6149 }, { "epoch": 0.8337287331390226, "grad_norm": 4.951329022247143, "learning_rate": 1.4146112623421158e-07, "loss": 1.1112, "step": 6150 }, { "epoch": 0.8338642987866874, "grad_norm": 5.965797822724177, "learning_rate": 1.4123604172419713e-07, "loss": 1.0797, "step": 6151 }, { "epoch": 0.8339998644343524, "grad_norm": 4.02185419659132, "learning_rate": 1.410111228200329e-07, "loss": 1.1673, "step": 6152 }, { "epoch": 0.8341354300820172, "grad_norm": 6.021159762801849, "learning_rate": 1.407863695650936e-07, "loss": 1.1201, "step": 6153 }, { "epoch": 0.8342709957296821, "grad_norm": 31.313469204645237, "learning_rate": 1.405617820027204e-07, "loss": 1.0476, "step": 6154 }, { "epoch": 0.834406561377347, "grad_norm": 4.544493967703673, "learning_rate": 1.4033736017622388e-07, "loss": 1.1306, "step": 6155 }, { "epoch": 0.8345421270250118, "grad_norm": 4.327870540511658, "learning_rate": 1.4011310412888145e-07, "loss": 1.1108, "step": 6156 }, { "epoch": 0.8346776926726768, "grad_norm": 7.01133608696199, "learning_rate": 1.398890139039395e-07, "loss": 1.088, "step": 6157 }, { "epoch": 0.8348132583203416, "grad_norm": 5.128166700588417, "learning_rate": 1.3966508954461175e-07, "loss": 1.0597, "step": 6158 }, { "epoch": 0.8349488239680065, "grad_norm": 5.24463037897709, "learning_rate": 1.3944133109408053e-07, "loss": 1.1332, "step": 6159 }, { "epoch": 0.8350843896156714, "grad_norm": 4.341404067116691, "learning_rate": 1.3921773859549569e-07, "loss": 1.0945, "step": 6160 }, { "epoch": 0.8352199552633363, "grad_norm": 5.872947842184885, "learning_rate": 1.389943120919753e-07, "loss": 1.1237, "step": 6161 }, { "epoch": 0.8353555209110012, "grad_norm": 5.727676919685892, "learning_rate": 1.3877105162660564e-07, "loss": 1.09, "step": 6162 }, { "epoch": 0.835491086558666, "grad_norm": 5.555582974601035, "learning_rate": 1.385479572424404e-07, "loss": 1.0762, "step": 6163 }, { "epoch": 0.8356266522063309, "grad_norm": 4.359371848146416, "learning_rate": 1.3832502898250174e-07, "loss": 1.094, "step": 6164 }, { "epoch": 0.8357622178539958, "grad_norm": 5.356527592688117, "learning_rate": 1.3810226688977967e-07, "loss": 1.0733, "step": 6165 }, { "epoch": 0.8358977835016607, "grad_norm": 4.206307452258038, "learning_rate": 1.378796710072322e-07, "loss": 1.0988, "step": 6166 }, { "epoch": 0.8360333491493256, "grad_norm": 5.981059598585987, "learning_rate": 1.3765724137778456e-07, "loss": 1.1085, "step": 6167 }, { "epoch": 0.8361689147969904, "grad_norm": 6.655863073259258, "learning_rate": 1.3743497804433147e-07, "loss": 1.0922, "step": 6168 }, { "epoch": 0.8363044804446553, "grad_norm": 5.206358912476174, "learning_rate": 1.3721288104973372e-07, "loss": 1.0781, "step": 6169 }, { "epoch": 0.8364400460923203, "grad_norm": 5.443346396783627, "learning_rate": 1.3699095043682184e-07, "loss": 1.056, "step": 6170 }, { "epoch": 0.8365756117399851, "grad_norm": 4.534427292026618, "learning_rate": 1.3676918624839285e-07, "loss": 1.1093, "step": 6171 }, { "epoch": 0.83671117738765, "grad_norm": 3.7631749046681824, "learning_rate": 1.3654758852721226e-07, "loss": 1.0939, "step": 6172 }, { "epoch": 0.8368467430353148, "grad_norm": 5.752470252195518, "learning_rate": 1.363261573160136e-07, "loss": 1.1432, "step": 6173 }, { "epoch": 0.8369823086829797, "grad_norm": 7.277695655259127, "learning_rate": 1.3610489265749801e-07, "loss": 1.0979, "step": 6174 }, { "epoch": 0.8371178743306447, "grad_norm": 5.005477487119326, "learning_rate": 1.3588379459433485e-07, "loss": 1.0432, "step": 6175 }, { "epoch": 0.8372534399783095, "grad_norm": 3.9222890689150742, "learning_rate": 1.3566286316916087e-07, "loss": 1.1017, "step": 6176 }, { "epoch": 0.8373890056259744, "grad_norm": 4.420739838554449, "learning_rate": 1.354420984245811e-07, "loss": 1.1012, "step": 6177 }, { "epoch": 0.8375245712736392, "grad_norm": 11.823636237316892, "learning_rate": 1.3522150040316826e-07, "loss": 1.1145, "step": 6178 }, { "epoch": 0.8376601369213041, "grad_norm": 6.8181046941151955, "learning_rate": 1.350010691474629e-07, "loss": 1.0977, "step": 6179 }, { "epoch": 0.8377957025689691, "grad_norm": 5.454544912722694, "learning_rate": 1.3478080469997344e-07, "loss": 1.1293, "step": 6180 }, { "epoch": 0.8379312682166339, "grad_norm": 4.237398494944032, "learning_rate": 1.3456070710317624e-07, "loss": 1.0724, "step": 6181 }, { "epoch": 0.8380668338642988, "grad_norm": 5.833221023408641, "learning_rate": 1.3434077639951525e-07, "loss": 1.0965, "step": 6182 }, { "epoch": 0.8382023995119636, "grad_norm": 5.197949384915497, "learning_rate": 1.341210126314024e-07, "loss": 1.0711, "step": 6183 }, { "epoch": 0.8383379651596286, "grad_norm": 8.03780487905834, "learning_rate": 1.3390141584121772e-07, "loss": 1.078, "step": 6184 }, { "epoch": 0.8384735308072935, "grad_norm": 4.356313187869925, "learning_rate": 1.33681986071308e-07, "loss": 1.1187, "step": 6185 }, { "epoch": 0.8386090964549583, "grad_norm": 6.106032888481173, "learning_rate": 1.3346272336398934e-07, "loss": 1.0988, "step": 6186 }, { "epoch": 0.8387446621026232, "grad_norm": 4.138503008932861, "learning_rate": 1.3324362776154408e-07, "loss": 1.1095, "step": 6187 }, { "epoch": 0.838880227750288, "grad_norm": 6.629525212583091, "learning_rate": 1.3302469930622383e-07, "loss": 1.1304, "step": 6188 }, { "epoch": 0.839015793397953, "grad_norm": 6.254427211290928, "learning_rate": 1.3280593804024642e-07, "loss": 1.0963, "step": 6189 }, { "epoch": 0.8391513590456179, "grad_norm": 4.97327686124048, "learning_rate": 1.3258734400579908e-07, "loss": 1.0836, "step": 6190 }, { "epoch": 0.8392869246932827, "grad_norm": 5.721520041944915, "learning_rate": 1.323689172450353e-07, "loss": 1.1015, "step": 6191 }, { "epoch": 0.8394224903409476, "grad_norm": 5.654879388110266, "learning_rate": 1.3215065780007718e-07, "loss": 1.0687, "step": 6192 }, { "epoch": 0.8395580559886124, "grad_norm": 3.9277274962466198, "learning_rate": 1.3193256571301426e-07, "loss": 1.0904, "step": 6193 }, { "epoch": 0.8396936216362774, "grad_norm": 6.639186731216732, "learning_rate": 1.3171464102590392e-07, "loss": 1.0477, "step": 6194 }, { "epoch": 0.8398291872839423, "grad_norm": 5.407876554881997, "learning_rate": 1.3149688378077128e-07, "loss": 1.1225, "step": 6195 }, { "epoch": 0.8399647529316071, "grad_norm": 6.186586597295336, "learning_rate": 1.3127929401960903e-07, "loss": 1.0945, "step": 6196 }, { "epoch": 0.840100318579272, "grad_norm": 7.676009300082096, "learning_rate": 1.3106187178437768e-07, "loss": 1.0865, "step": 6197 }, { "epoch": 0.8402358842269368, "grad_norm": 6.348929738027081, "learning_rate": 1.3084461711700544e-07, "loss": 1.0585, "step": 6198 }, { "epoch": 0.8403714498746018, "grad_norm": 4.621083443285548, "learning_rate": 1.3062753005938798e-07, "loss": 1.0898, "step": 6199 }, { "epoch": 0.8405070155222667, "grad_norm": 5.514191794042898, "learning_rate": 1.30410610653389e-07, "loss": 1.1179, "step": 6200 }, { "epoch": 0.8406425811699315, "grad_norm": 5.369318733498287, "learning_rate": 1.3019385894083988e-07, "loss": 1.0905, "step": 6201 }, { "epoch": 0.8407781468175964, "grad_norm": 8.28413571625134, "learning_rate": 1.2997727496353872e-07, "loss": 1.1273, "step": 6202 }, { "epoch": 0.8409137124652613, "grad_norm": 5.777730489915281, "learning_rate": 1.2976085876325303e-07, "loss": 1.1239, "step": 6203 }, { "epoch": 0.8410492781129262, "grad_norm": 6.6625580871282235, "learning_rate": 1.2954461038171603e-07, "loss": 1.0708, "step": 6204 }, { "epoch": 0.8411848437605911, "grad_norm": 4.950845069674152, "learning_rate": 1.2932852986063046e-07, "loss": 1.1116, "step": 6205 }, { "epoch": 0.8413204094082559, "grad_norm": 3.6566128441606724, "learning_rate": 1.2911261724166468e-07, "loss": 1.1008, "step": 6206 }, { "epoch": 0.8414559750559208, "grad_norm": 5.90562571661524, "learning_rate": 1.2889687256645686e-07, "loss": 1.0902, "step": 6207 }, { "epoch": 0.8415915407035857, "grad_norm": 4.530520072217708, "learning_rate": 1.286812958766106e-07, "loss": 1.0833, "step": 6208 }, { "epoch": 0.8417271063512506, "grad_norm": 4.23951962756918, "learning_rate": 1.284658872136991e-07, "loss": 1.1097, "step": 6209 }, { "epoch": 0.8418626719989155, "grad_norm": 4.487445301418404, "learning_rate": 1.2825064661926133e-07, "loss": 1.1163, "step": 6210 }, { "epoch": 0.8419982376465803, "grad_norm": 5.534379241543296, "learning_rate": 1.280355741348056e-07, "loss": 1.1349, "step": 6211 }, { "epoch": 0.8421338032942453, "grad_norm": 3.8044672615418036, "learning_rate": 1.278206698018064e-07, "loss": 1.0984, "step": 6212 }, { "epoch": 0.8422693689419101, "grad_norm": 6.901884950833638, "learning_rate": 1.2760593366170635e-07, "loss": 1.0838, "step": 6213 }, { "epoch": 0.842404934589575, "grad_norm": 4.629121911041336, "learning_rate": 1.273913657559158e-07, "loss": 1.095, "step": 6214 }, { "epoch": 0.8425405002372399, "grad_norm": 5.157246056773757, "learning_rate": 1.271769661258124e-07, "loss": 1.121, "step": 6215 }, { "epoch": 0.8426760658849047, "grad_norm": 4.604536439382431, "learning_rate": 1.2696273481274144e-07, "loss": 1.1107, "step": 6216 }, { "epoch": 0.8428116315325697, "grad_norm": 8.29253378790574, "learning_rate": 1.2674867185801575e-07, "loss": 1.1436, "step": 6217 }, { "epoch": 0.8429471971802345, "grad_norm": 6.715641196836751, "learning_rate": 1.2653477730291563e-07, "loss": 1.0984, "step": 6218 }, { "epoch": 0.8430827628278994, "grad_norm": 5.87244585876108, "learning_rate": 1.2632105118868896e-07, "loss": 1.1261, "step": 6219 }, { "epoch": 0.8432183284755643, "grad_norm": 4.788261219070887, "learning_rate": 1.2610749355655125e-07, "loss": 1.119, "step": 6220 }, { "epoch": 0.8433538941232291, "grad_norm": 6.518363659741, "learning_rate": 1.2589410444768522e-07, "loss": 1.0762, "step": 6221 }, { "epoch": 0.8434894597708941, "grad_norm": 4.530345132872542, "learning_rate": 1.256808839032415e-07, "loss": 1.1184, "step": 6222 }, { "epoch": 0.8436250254185589, "grad_norm": 6.992752584830884, "learning_rate": 1.2546783196433774e-07, "loss": 1.051, "step": 6223 }, { "epoch": 0.8437605910662238, "grad_norm": 6.064976764892199, "learning_rate": 1.2525494867205954e-07, "loss": 1.137, "step": 6224 }, { "epoch": 0.8438961567138887, "grad_norm": 4.493058141256548, "learning_rate": 1.2504223406745963e-07, "loss": 1.0862, "step": 6225 }, { "epoch": 0.8440317223615535, "grad_norm": 10.041495842317998, "learning_rate": 1.2482968819155837e-07, "loss": 1.0557, "step": 6226 }, { "epoch": 0.8441672880092185, "grad_norm": 6.767143533062967, "learning_rate": 1.2461731108534378e-07, "loss": 1.1029, "step": 6227 }, { "epoch": 0.8443028536568834, "grad_norm": 8.823892600558557, "learning_rate": 1.244051027897708e-07, "loss": 1.11, "step": 6228 }, { "epoch": 0.8444384193045482, "grad_norm": 4.981947981331535, "learning_rate": 1.2419306334576207e-07, "loss": 1.0563, "step": 6229 }, { "epoch": 0.8445739849522131, "grad_norm": 4.875762032174318, "learning_rate": 1.2398119279420793e-07, "loss": 1.0934, "step": 6230 }, { "epoch": 0.844709550599878, "grad_norm": 13.172122333844198, "learning_rate": 1.2376949117596592e-07, "loss": 1.0982, "step": 6231 }, { "epoch": 0.8448451162475429, "grad_norm": 10.875947706429766, "learning_rate": 1.2355795853186102e-07, "loss": 1.0657, "step": 6232 }, { "epoch": 0.8449806818952078, "grad_norm": 4.74294142428932, "learning_rate": 1.233465949026855e-07, "loss": 1.0942, "step": 6233 }, { "epoch": 0.8451162475428726, "grad_norm": 4.296655772422533, "learning_rate": 1.2313540032919935e-07, "loss": 1.0537, "step": 6234 }, { "epoch": 0.8452518131905375, "grad_norm": 6.499813379860282, "learning_rate": 1.2292437485212957e-07, "loss": 1.1633, "step": 6235 }, { "epoch": 0.8453873788382024, "grad_norm": 4.090080329620769, "learning_rate": 1.2271351851217104e-07, "loss": 1.0739, "step": 6236 }, { "epoch": 0.8455229444858673, "grad_norm": 4.202095810940515, "learning_rate": 1.225028313499855e-07, "loss": 1.1159, "step": 6237 }, { "epoch": 0.8456585101335322, "grad_norm": 5.786361489452508, "learning_rate": 1.222923134062025e-07, "loss": 1.0898, "step": 6238 }, { "epoch": 0.845794075781197, "grad_norm": 8.994665435810058, "learning_rate": 1.220819647214185e-07, "loss": 1.0914, "step": 6239 }, { "epoch": 0.845929641428862, "grad_norm": 5.386110668889333, "learning_rate": 1.2187178533619803e-07, "loss": 1.0798, "step": 6240 }, { "epoch": 0.8460652070765268, "grad_norm": 5.4724644237973425, "learning_rate": 1.216617752910718e-07, "loss": 1.0869, "step": 6241 }, { "epoch": 0.8462007727241917, "grad_norm": 5.532266461359799, "learning_rate": 1.2145193462653946e-07, "loss": 1.0465, "step": 6242 }, { "epoch": 0.8463363383718566, "grad_norm": 7.2548222034448635, "learning_rate": 1.212422633830663e-07, "loss": 1.1504, "step": 6243 }, { "epoch": 0.8464719040195214, "grad_norm": 8.120195127713194, "learning_rate": 1.2103276160108656e-07, "loss": 1.1166, "step": 6244 }, { "epoch": 0.8466074696671864, "grad_norm": 7.889910583050529, "learning_rate": 1.208234293210002e-07, "loss": 1.0802, "step": 6245 }, { "epoch": 0.8467430353148512, "grad_norm": 4.5118474094859975, "learning_rate": 1.2061426658317608e-07, "loss": 1.0972, "step": 6246 }, { "epoch": 0.8468786009625161, "grad_norm": 4.841848675437327, "learning_rate": 1.2040527342794872e-07, "loss": 1.0829, "step": 6247 }, { "epoch": 0.847014166610181, "grad_norm": 5.416566936125796, "learning_rate": 1.2019644989562184e-07, "loss": 1.107, "step": 6248 }, { "epoch": 0.8471497322578458, "grad_norm": 7.207349273848568, "learning_rate": 1.1998779602646436e-07, "loss": 1.1426, "step": 6249 }, { "epoch": 0.8472852979055108, "grad_norm": 4.569037023392002, "learning_rate": 1.1977931186071443e-07, "loss": 1.0436, "step": 6250 }, { "epoch": 0.8474208635531756, "grad_norm": 3.8228378880419784, "learning_rate": 1.1957099743857568e-07, "loss": 1.1075, "step": 6251 }, { "epoch": 0.8475564292008405, "grad_norm": 3.3910116868515945, "learning_rate": 1.1936285280022096e-07, "loss": 1.0883, "step": 6252 }, { "epoch": 0.8476919948485054, "grad_norm": 4.7333811761275735, "learning_rate": 1.1915487798578816e-07, "loss": 1.0597, "step": 6253 }, { "epoch": 0.8478275604961703, "grad_norm": 6.588058944071082, "learning_rate": 1.1894707303538476e-07, "loss": 1.0784, "step": 6254 }, { "epoch": 0.8479631261438352, "grad_norm": 6.566261953142074, "learning_rate": 1.1873943798908336e-07, "loss": 1.088, "step": 6255 }, { "epoch": 0.8480986917915, "grad_norm": 5.337930546816417, "learning_rate": 1.1853197288692518e-07, "loss": 1.124, "step": 6256 }, { "epoch": 0.8482342574391649, "grad_norm": 6.0240727868228054, "learning_rate": 1.183246777689182e-07, "loss": 1.0552, "step": 6257 }, { "epoch": 0.8483698230868298, "grad_norm": 5.557988293178676, "learning_rate": 1.1811755267503754e-07, "loss": 1.1018, "step": 6258 }, { "epoch": 0.8485053887344947, "grad_norm": 4.39601834847595, "learning_rate": 1.179105976452256e-07, "loss": 1.096, "step": 6259 }, { "epoch": 0.8486409543821596, "grad_norm": 13.366227913702145, "learning_rate": 1.1770381271939223e-07, "loss": 1.1206, "step": 6260 }, { "epoch": 0.8487765200298244, "grad_norm": 7.072186676325146, "learning_rate": 1.1749719793741409e-07, "loss": 1.102, "step": 6261 }, { "epoch": 0.8489120856774893, "grad_norm": 4.955973678691072, "learning_rate": 1.172907533391353e-07, "loss": 1.1131, "step": 6262 }, { "epoch": 0.8490476513251543, "grad_norm": 5.232643919337755, "learning_rate": 1.1708447896436724e-07, "loss": 1.0834, "step": 6263 }, { "epoch": 0.8491832169728191, "grad_norm": 5.719645001368853, "learning_rate": 1.1687837485288766e-07, "loss": 1.0836, "step": 6264 }, { "epoch": 0.849318782620484, "grad_norm": 3.536186022489581, "learning_rate": 1.1667244104444308e-07, "loss": 1.054, "step": 6265 }, { "epoch": 0.8494543482681488, "grad_norm": 3.517292997898538, "learning_rate": 1.1646667757874507e-07, "loss": 1.0691, "step": 6266 }, { "epoch": 0.8495899139158137, "grad_norm": 9.295454215879568, "learning_rate": 1.1626108449547467e-07, "loss": 1.0705, "step": 6267 }, { "epoch": 0.8497254795634787, "grad_norm": 5.352131917286746, "learning_rate": 1.1605566183427807e-07, "loss": 1.0842, "step": 6268 }, { "epoch": 0.8498610452111435, "grad_norm": 6.444973595574692, "learning_rate": 1.1585040963476966e-07, "loss": 1.1174, "step": 6269 }, { "epoch": 0.8499966108588084, "grad_norm": 13.585373592346254, "learning_rate": 1.156453279365307e-07, "loss": 1.0698, "step": 6270 }, { "epoch": 0.8501321765064732, "grad_norm": 5.000380146636008, "learning_rate": 1.1544041677910954e-07, "loss": 1.0633, "step": 6271 }, { "epoch": 0.8502677421541381, "grad_norm": 7.0441753817938, "learning_rate": 1.152356762020218e-07, "loss": 1.1156, "step": 6272 }, { "epoch": 0.8504033078018031, "grad_norm": 4.776969216766899, "learning_rate": 1.1503110624474987e-07, "loss": 1.0885, "step": 6273 }, { "epoch": 0.8505388734494679, "grad_norm": 4.4483664689843945, "learning_rate": 1.1482670694674367e-07, "loss": 1.1118, "step": 6274 }, { "epoch": 0.8506744390971328, "grad_norm": 5.994000389930738, "learning_rate": 1.146224783474199e-07, "loss": 1.0486, "step": 6275 }, { "epoch": 0.8508100047447976, "grad_norm": 4.750016646033573, "learning_rate": 1.1441842048616234e-07, "loss": 1.0994, "step": 6276 }, { "epoch": 0.8509455703924625, "grad_norm": 4.004048972228791, "learning_rate": 1.1421453340232213e-07, "loss": 1.084, "step": 6277 }, { "epoch": 0.8510811360401275, "grad_norm": 8.689635787431964, "learning_rate": 1.140108171352172e-07, "loss": 1.0738, "step": 6278 }, { "epoch": 0.8512167016877923, "grad_norm": 4.189055860959875, "learning_rate": 1.1380727172413262e-07, "loss": 1.0991, "step": 6279 }, { "epoch": 0.8513522673354572, "grad_norm": 4.88779711577883, "learning_rate": 1.1360389720832042e-07, "loss": 1.0957, "step": 6280 }, { "epoch": 0.851487832983122, "grad_norm": 4.845742866228685, "learning_rate": 1.1340069362699988e-07, "loss": 1.1162, "step": 6281 }, { "epoch": 0.851623398630787, "grad_norm": 4.039615741835147, "learning_rate": 1.1319766101935724e-07, "loss": 1.1115, "step": 6282 }, { "epoch": 0.8517589642784519, "grad_norm": 3.962161340862719, "learning_rate": 1.1299479942454592e-07, "loss": 1.0739, "step": 6283 }, { "epoch": 0.8518945299261167, "grad_norm": 3.9445225527750964, "learning_rate": 1.1279210888168544e-07, "loss": 1.083, "step": 6284 }, { "epoch": 0.8520300955737816, "grad_norm": 5.358120636638443, "learning_rate": 1.1258958942986396e-07, "loss": 1.0942, "step": 6285 }, { "epoch": 0.8521656612214464, "grad_norm": 8.067340415441661, "learning_rate": 1.1238724110813502e-07, "loss": 1.1307, "step": 6286 }, { "epoch": 0.8523012268691114, "grad_norm": 6.268942113887616, "learning_rate": 1.1218506395552063e-07, "loss": 1.0682, "step": 6287 }, { "epoch": 0.8524367925167763, "grad_norm": 17.647947355597395, "learning_rate": 1.1198305801100827e-07, "loss": 1.1024, "step": 6288 }, { "epoch": 0.8525723581644411, "grad_norm": 4.375499141837151, "learning_rate": 1.11781223313554e-07, "loss": 1.1165, "step": 6289 }, { "epoch": 0.852707923812106, "grad_norm": 4.9089606490223625, "learning_rate": 1.1157955990207946e-07, "loss": 1.0434, "step": 6290 }, { "epoch": 0.8528434894597708, "grad_norm": 5.108119362645076, "learning_rate": 1.1137806781547398e-07, "loss": 1.1184, "step": 6291 }, { "epoch": 0.8529790551074358, "grad_norm": 6.24014154198228, "learning_rate": 1.1117674709259372e-07, "loss": 1.1, "step": 6292 }, { "epoch": 0.8531146207551007, "grad_norm": 5.483413822721751, "learning_rate": 1.1097559777226196e-07, "loss": 1.0706, "step": 6293 }, { "epoch": 0.8532501864027655, "grad_norm": 5.35431455678248, "learning_rate": 1.1077461989326864e-07, "loss": 1.1146, "step": 6294 }, { "epoch": 0.8533857520504304, "grad_norm": 5.067948164900857, "learning_rate": 1.1057381349437067e-07, "loss": 1.0613, "step": 6295 }, { "epoch": 0.8535213176980952, "grad_norm": 4.277377721355515, "learning_rate": 1.1037317861429208e-07, "loss": 1.1101, "step": 6296 }, { "epoch": 0.8536568833457602, "grad_norm": 4.730726024565679, "learning_rate": 1.1017271529172367e-07, "loss": 1.1004, "step": 6297 }, { "epoch": 0.8537924489934251, "grad_norm": 4.478686364141669, "learning_rate": 1.0997242356532333e-07, "loss": 1.096, "step": 6298 }, { "epoch": 0.8539280146410899, "grad_norm": 4.0846396620859124, "learning_rate": 1.0977230347371568e-07, "loss": 1.0541, "step": 6299 }, { "epoch": 0.8540635802887548, "grad_norm": 9.482879865955889, "learning_rate": 1.0957235505549233e-07, "loss": 1.1011, "step": 6300 }, { "epoch": 0.8541991459364197, "grad_norm": 3.9162036955486186, "learning_rate": 1.0937257834921144e-07, "loss": 1.0702, "step": 6301 }, { "epoch": 0.8543347115840846, "grad_norm": 3.724265737368211, "learning_rate": 1.0917297339339892e-07, "loss": 1.0842, "step": 6302 }, { "epoch": 0.8544702772317495, "grad_norm": 4.880136565862914, "learning_rate": 1.0897354022654648e-07, "loss": 1.0728, "step": 6303 }, { "epoch": 0.8546058428794143, "grad_norm": 4.497501210516567, "learning_rate": 1.0877427888711377e-07, "loss": 1.0776, "step": 6304 }, { "epoch": 0.8547414085270792, "grad_norm": 5.700864080447357, "learning_rate": 1.0857518941352605e-07, "loss": 1.1254, "step": 6305 }, { "epoch": 0.8548769741747442, "grad_norm": 7.033004506687652, "learning_rate": 1.0837627184417697e-07, "loss": 1.1202, "step": 6306 }, { "epoch": 0.855012539822409, "grad_norm": 8.970339624823628, "learning_rate": 1.0817752621742537e-07, "loss": 1.0771, "step": 6307 }, { "epoch": 0.8551481054700739, "grad_norm": 10.497858619621018, "learning_rate": 1.0797895257159872e-07, "loss": 1.1378, "step": 6308 }, { "epoch": 0.8552836711177387, "grad_norm": 4.168117131437056, "learning_rate": 1.077805509449895e-07, "loss": 1.1514, "step": 6309 }, { "epoch": 0.8554192367654037, "grad_norm": 7.389029184098027, "learning_rate": 1.0758232137585854e-07, "loss": 1.0696, "step": 6310 }, { "epoch": 0.8555548024130686, "grad_norm": 8.102918234821628, "learning_rate": 1.073842639024325e-07, "loss": 1.0902, "step": 6311 }, { "epoch": 0.8556903680607334, "grad_norm": 6.220450555179414, "learning_rate": 1.0718637856290525e-07, "loss": 1.1041, "step": 6312 }, { "epoch": 0.8558259337083983, "grad_norm": 12.071404563643362, "learning_rate": 1.069886653954375e-07, "loss": 1.1319, "step": 6313 }, { "epoch": 0.8559614993560631, "grad_norm": 5.488921790524747, "learning_rate": 1.0679112443815652e-07, "loss": 1.0952, "step": 6314 }, { "epoch": 0.8560970650037281, "grad_norm": 4.839357554665643, "learning_rate": 1.0659375572915674e-07, "loss": 1.1121, "step": 6315 }, { "epoch": 0.856232630651393, "grad_norm": 4.734063572650314, "learning_rate": 1.0639655930649894e-07, "loss": 1.0471, "step": 6316 }, { "epoch": 0.8563681962990578, "grad_norm": 3.9517506696557656, "learning_rate": 1.0619953520821112e-07, "loss": 1.1203, "step": 6317 }, { "epoch": 0.8565037619467227, "grad_norm": 5.93219804976245, "learning_rate": 1.0600268347228757e-07, "loss": 1.1204, "step": 6318 }, { "epoch": 0.8566393275943875, "grad_norm": 89.7728979172213, "learning_rate": 1.0580600413668983e-07, "loss": 1.0992, "step": 6319 }, { "epoch": 0.8567748932420525, "grad_norm": 4.534189578366474, "learning_rate": 1.0560949723934587e-07, "loss": 1.1278, "step": 6320 }, { "epoch": 0.8569104588897174, "grad_norm": 4.740570996066388, "learning_rate": 1.0541316281815038e-07, "loss": 1.123, "step": 6321 }, { "epoch": 0.8570460245373822, "grad_norm": 5.33599821327672, "learning_rate": 1.0521700091096508e-07, "loss": 1.0895, "step": 6322 }, { "epoch": 0.8571815901850471, "grad_norm": 16.590982079605364, "learning_rate": 1.0502101155561816e-07, "loss": 1.113, "step": 6323 }, { "epoch": 0.857317155832712, "grad_norm": 6.664563461199032, "learning_rate": 1.0482519478990481e-07, "loss": 1.1089, "step": 6324 }, { "epoch": 0.8574527214803769, "grad_norm": 4.550381476269426, "learning_rate": 1.0462955065158618e-07, "loss": 1.0691, "step": 6325 }, { "epoch": 0.8575882871280418, "grad_norm": 4.842130539226926, "learning_rate": 1.0443407917839141e-07, "loss": 1.06, "step": 6326 }, { "epoch": 0.8577238527757066, "grad_norm": 12.922738768986413, "learning_rate": 1.0423878040801514e-07, "loss": 1.0902, "step": 6327 }, { "epoch": 0.8578594184233715, "grad_norm": 26.256260452736626, "learning_rate": 1.0404365437811946e-07, "loss": 1.064, "step": 6328 }, { "epoch": 0.8579949840710364, "grad_norm": 5.243628421345465, "learning_rate": 1.0384870112633271e-07, "loss": 1.0265, "step": 6329 }, { "epoch": 0.8581305497187013, "grad_norm": 4.399283178089424, "learning_rate": 1.0365392069025014e-07, "loss": 1.0961, "step": 6330 }, { "epoch": 0.8582661153663662, "grad_norm": 6.336289057563389, "learning_rate": 1.034593131074336e-07, "loss": 1.0962, "step": 6331 }, { "epoch": 0.858401681014031, "grad_norm": 4.267474002175087, "learning_rate": 1.0326487841541176e-07, "loss": 1.0983, "step": 6332 }, { "epoch": 0.858537246661696, "grad_norm": 9.37235173126667, "learning_rate": 1.030706166516796e-07, "loss": 1.0916, "step": 6333 }, { "epoch": 0.8586728123093608, "grad_norm": 4.296717113410337, "learning_rate": 1.0287652785369916e-07, "loss": 1.1361, "step": 6334 }, { "epoch": 0.8588083779570257, "grad_norm": 4.559732914880882, "learning_rate": 1.0268261205889894e-07, "loss": 1.0399, "step": 6335 }, { "epoch": 0.8589439436046906, "grad_norm": 4.869546534186785, "learning_rate": 1.0248886930467393e-07, "loss": 1.069, "step": 6336 }, { "epoch": 0.8590795092523554, "grad_norm": 13.363052210477012, "learning_rate": 1.022952996283859e-07, "loss": 1.0867, "step": 6337 }, { "epoch": 0.8592150749000204, "grad_norm": 4.131829931754184, "learning_rate": 1.0210190306736333e-07, "loss": 1.0848, "step": 6338 }, { "epoch": 0.8593506405476852, "grad_norm": 4.837676676549356, "learning_rate": 1.0190867965890137e-07, "loss": 1.085, "step": 6339 }, { "epoch": 0.8594862061953501, "grad_norm": 4.6384272419145445, "learning_rate": 1.0171562944026102e-07, "loss": 1.1087, "step": 6340 }, { "epoch": 0.859621771843015, "grad_norm": 4.438526727870587, "learning_rate": 1.0152275244867137e-07, "loss": 1.1159, "step": 6341 }, { "epoch": 0.8597573374906798, "grad_norm": 6.317008283907724, "learning_rate": 1.0133004872132623e-07, "loss": 1.1039, "step": 6342 }, { "epoch": 0.8598929031383448, "grad_norm": 9.407392636037224, "learning_rate": 1.0113751829538808e-07, "loss": 1.1227, "step": 6343 }, { "epoch": 0.8600284687860096, "grad_norm": 11.258795618159267, "learning_rate": 1.009451612079838e-07, "loss": 1.068, "step": 6344 }, { "epoch": 0.8601640344336745, "grad_norm": 3.5996549112447727, "learning_rate": 1.0075297749620904e-07, "loss": 1.0995, "step": 6345 }, { "epoch": 0.8602996000813394, "grad_norm": 5.417534048371695, "learning_rate": 1.0056096719712382e-07, "loss": 1.0798, "step": 6346 }, { "epoch": 0.8604351657290042, "grad_norm": 3.2167976577330983, "learning_rate": 1.0036913034775673e-07, "loss": 1.0834, "step": 6347 }, { "epoch": 0.8605707313766692, "grad_norm": 14.369645388574945, "learning_rate": 1.0017746698510122e-07, "loss": 1.1108, "step": 6348 }, { "epoch": 0.860706297024334, "grad_norm": 4.07093229903813, "learning_rate": 9.998597714611889e-08, "loss": 1.1029, "step": 6349 }, { "epoch": 0.8608418626719989, "grad_norm": 6.489459903874416, "learning_rate": 9.979466086773614e-08, "loss": 1.1029, "step": 6350 }, { "epoch": 0.8609774283196638, "grad_norm": 4.083519401133808, "learning_rate": 9.960351818684764e-08, "loss": 1.0781, "step": 6351 }, { "epoch": 0.8611129939673287, "grad_norm": 4.210920631612709, "learning_rate": 9.941254914031316e-08, "loss": 1.0983, "step": 6352 }, { "epoch": 0.8612485596149936, "grad_norm": 4.485413961800772, "learning_rate": 9.922175376495979e-08, "loss": 1.0742, "step": 6353 }, { "epoch": 0.8613841252626584, "grad_norm": 3.889773195880232, "learning_rate": 9.903113209758096e-08, "loss": 1.1267, "step": 6354 }, { "epoch": 0.8615196909103233, "grad_norm": 5.4656265085926785, "learning_rate": 9.88406841749364e-08, "loss": 1.0856, "step": 6355 }, { "epoch": 0.8616552565579882, "grad_norm": 4.3420774767539045, "learning_rate": 9.865041003375263e-08, "loss": 1.1088, "step": 6356 }, { "epoch": 0.8617908222056531, "grad_norm": 7.278964379207196, "learning_rate": 9.846030971072239e-08, "loss": 1.1028, "step": 6357 }, { "epoch": 0.861926387853318, "grad_norm": 9.78618194964203, "learning_rate": 9.827038324250514e-08, "loss": 1.0797, "step": 6358 }, { "epoch": 0.8620619535009828, "grad_norm": 4.628288400654533, "learning_rate": 9.80806306657267e-08, "loss": 1.1203, "step": 6359 }, { "epoch": 0.8621975191486477, "grad_norm": 5.096757089465211, "learning_rate": 9.789105201697923e-08, "loss": 1.0659, "step": 6360 }, { "epoch": 0.8623330847963127, "grad_norm": 6.495736529451225, "learning_rate": 9.77016473328216e-08, "loss": 1.0785, "step": 6361 }, { "epoch": 0.8624686504439775, "grad_norm": 4.391276802281098, "learning_rate": 9.751241664977927e-08, "loss": 1.143, "step": 6362 }, { "epoch": 0.8626042160916424, "grad_norm": 4.34472927572954, "learning_rate": 9.732336000434304e-08, "loss": 1.1247, "step": 6363 }, { "epoch": 0.8627397817393072, "grad_norm": 3.754694913047418, "learning_rate": 9.713447743297198e-08, "loss": 1.0981, "step": 6364 }, { "epoch": 0.8628753473869721, "grad_norm": 4.16107251365751, "learning_rate": 9.694576897208984e-08, "loss": 1.0807, "step": 6365 }, { "epoch": 0.8630109130346371, "grad_norm": 4.093971905993207, "learning_rate": 9.675723465808827e-08, "loss": 1.1063, "step": 6366 }, { "epoch": 0.8631464786823019, "grad_norm": 4.494275374552961, "learning_rate": 9.656887452732399e-08, "loss": 1.0992, "step": 6367 }, { "epoch": 0.8632820443299668, "grad_norm": 6.4561150543154255, "learning_rate": 9.638068861612091e-08, "loss": 1.0772, "step": 6368 }, { "epoch": 0.8634176099776316, "grad_norm": 4.680371773364999, "learning_rate": 9.619267696076938e-08, "loss": 1.0774, "step": 6369 }, { "epoch": 0.8635531756252965, "grad_norm": 4.432427650517396, "learning_rate": 9.600483959752592e-08, "loss": 1.1074, "step": 6370 }, { "epoch": 0.8636887412729615, "grad_norm": 4.932957931559542, "learning_rate": 9.581717656261335e-08, "loss": 1.098, "step": 6371 }, { "epoch": 0.8638243069206263, "grad_norm": 5.683476025190331, "learning_rate": 9.562968789222114e-08, "loss": 1.0637, "step": 6372 }, { "epoch": 0.8639598725682912, "grad_norm": 8.786516895364112, "learning_rate": 9.544237362250495e-08, "loss": 1.1162, "step": 6373 }, { "epoch": 0.864095438215956, "grad_norm": 4.5749021151628275, "learning_rate": 9.525523378958688e-08, "loss": 1.0394, "step": 6374 }, { "epoch": 0.864231003863621, "grad_norm": 5.484363477404481, "learning_rate": 9.50682684295554e-08, "loss": 1.0561, "step": 6375 }, { "epoch": 0.8643665695112859, "grad_norm": 4.269189309895092, "learning_rate": 9.488147757846521e-08, "loss": 1.0865, "step": 6376 }, { "epoch": 0.8645021351589507, "grad_norm": 5.124897870067812, "learning_rate": 9.46948612723375e-08, "loss": 1.0924, "step": 6377 }, { "epoch": 0.8646377008066156, "grad_norm": 4.745107880165996, "learning_rate": 9.450841954715971e-08, "loss": 1.1222, "step": 6378 }, { "epoch": 0.8647732664542804, "grad_norm": 6.670181173191833, "learning_rate": 9.432215243888575e-08, "loss": 1.0754, "step": 6379 }, { "epoch": 0.8649088321019454, "grad_norm": 10.212547316991396, "learning_rate": 9.413605998343566e-08, "loss": 1.0839, "step": 6380 }, { "epoch": 0.8650443977496103, "grad_norm": 3.9394026618512266, "learning_rate": 9.395014221669595e-08, "loss": 1.0746, "step": 6381 }, { "epoch": 0.8651799633972751, "grad_norm": 14.132182969328206, "learning_rate": 9.376439917451962e-08, "loss": 1.1113, "step": 6382 }, { "epoch": 0.86531552904494, "grad_norm": 4.700157222117008, "learning_rate": 9.357883089272512e-08, "loss": 1.1061, "step": 6383 }, { "epoch": 0.8654510946926048, "grad_norm": 4.101363191886897, "learning_rate": 9.33934374070986e-08, "loss": 1.0952, "step": 6384 }, { "epoch": 0.8655866603402698, "grad_norm": 18.60606551988984, "learning_rate": 9.320821875339091e-08, "loss": 1.0961, "step": 6385 }, { "epoch": 0.8657222259879347, "grad_norm": 5.842931319394384, "learning_rate": 9.302317496732092e-08, "loss": 1.0785, "step": 6386 }, { "epoch": 0.8658577916355995, "grad_norm": 6.290191946675568, "learning_rate": 9.283830608457199e-08, "loss": 1.0713, "step": 6387 }, { "epoch": 0.8659933572832644, "grad_norm": 4.443780403713145, "learning_rate": 9.265361214079548e-08, "loss": 1.1288, "step": 6388 }, { "epoch": 0.8661289229309294, "grad_norm": 4.949507031553907, "learning_rate": 9.246909317160744e-08, "loss": 1.0725, "step": 6389 }, { "epoch": 0.8662644885785942, "grad_norm": 4.694659608125236, "learning_rate": 9.228474921259121e-08, "loss": 1.1137, "step": 6390 }, { "epoch": 0.8664000542262591, "grad_norm": 4.4115906702396, "learning_rate": 9.210058029929602e-08, "loss": 1.1002, "step": 6391 }, { "epoch": 0.8665356198739239, "grad_norm": 18.36527199488011, "learning_rate": 9.191658646723732e-08, "loss": 1.1128, "step": 6392 }, { "epoch": 0.8666711855215888, "grad_norm": 27.337478610647988, "learning_rate": 9.173276775189709e-08, "loss": 1.0874, "step": 6393 }, { "epoch": 0.8668067511692538, "grad_norm": 5.392852664669087, "learning_rate": 9.154912418872306e-08, "loss": 1.1197, "step": 6394 }, { "epoch": 0.8669423168169186, "grad_norm": 6.144249463373787, "learning_rate": 9.136565581312961e-08, "loss": 1.0811, "step": 6395 }, { "epoch": 0.8670778824645835, "grad_norm": 13.073795170602942, "learning_rate": 9.118236266049705e-08, "loss": 1.0969, "step": 6396 }, { "epoch": 0.8672134481122483, "grad_norm": 5.378809950128747, "learning_rate": 9.099924476617216e-08, "loss": 1.0728, "step": 6397 }, { "epoch": 0.8673490137599132, "grad_norm": 4.745399461262382, "learning_rate": 9.081630216546766e-08, "loss": 1.1121, "step": 6398 }, { "epoch": 0.8674845794075782, "grad_norm": 6.076164182094861, "learning_rate": 9.063353489366287e-08, "loss": 1.0945, "step": 6399 }, { "epoch": 0.867620145055243, "grad_norm": 5.678055379792364, "learning_rate": 9.045094298600232e-08, "loss": 1.1086, "step": 6400 }, { "epoch": 0.8677557107029079, "grad_norm": 6.673816480781233, "learning_rate": 9.026852647769822e-08, "loss": 1.0846, "step": 6401 }, { "epoch": 0.8678912763505727, "grad_norm": 6.976097095911414, "learning_rate": 9.008628540392749e-08, "loss": 1.0916, "step": 6402 }, { "epoch": 0.8680268419982377, "grad_norm": 10.745807351996183, "learning_rate": 8.990421979983465e-08, "loss": 1.1062, "step": 6403 }, { "epoch": 0.8681624076459026, "grad_norm": 5.1858133288343495, "learning_rate": 8.972232970052873e-08, "loss": 1.0926, "step": 6404 }, { "epoch": 0.8682979732935674, "grad_norm": 5.026449041387194, "learning_rate": 8.954061514108657e-08, "loss": 1.1207, "step": 6405 }, { "epoch": 0.8684335389412323, "grad_norm": 6.208120188439161, "learning_rate": 8.93590761565497e-08, "loss": 1.1045, "step": 6406 }, { "epoch": 0.8685691045888971, "grad_norm": 3.597339277863684, "learning_rate": 8.917771278192709e-08, "loss": 1.0886, "step": 6407 }, { "epoch": 0.8687046702365621, "grad_norm": 6.1360010778412075, "learning_rate": 8.899652505219279e-08, "loss": 1.034, "step": 6408 }, { "epoch": 0.868840235884227, "grad_norm": 10.515899423366214, "learning_rate": 8.881551300228785e-08, "loss": 1.0824, "step": 6409 }, { "epoch": 0.8689758015318918, "grad_norm": 5.961050711477499, "learning_rate": 8.863467666711865e-08, "loss": 1.0821, "step": 6410 }, { "epoch": 0.8691113671795567, "grad_norm": 5.897750495476286, "learning_rate": 8.845401608155822e-08, "loss": 1.0869, "step": 6411 }, { "epoch": 0.8692469328272215, "grad_norm": 5.383406111110248, "learning_rate": 8.827353128044535e-08, "loss": 1.1176, "step": 6412 }, { "epoch": 0.8693824984748865, "grad_norm": 4.538965274732983, "learning_rate": 8.809322229858529e-08, "loss": 1.0787, "step": 6413 }, { "epoch": 0.8695180641225514, "grad_norm": 6.436626541067294, "learning_rate": 8.791308917074925e-08, "loss": 1.0857, "step": 6414 }, { "epoch": 0.8696536297702162, "grad_norm": 5.037420862165741, "learning_rate": 8.773313193167431e-08, "loss": 1.0641, "step": 6415 }, { "epoch": 0.8697891954178811, "grad_norm": 14.962930792324615, "learning_rate": 8.755335061606383e-08, "loss": 1.0482, "step": 6416 }, { "epoch": 0.869924761065546, "grad_norm": 4.435011925734513, "learning_rate": 8.737374525858743e-08, "loss": 1.1039, "step": 6417 }, { "epoch": 0.8700603267132109, "grad_norm": 6.949929105722671, "learning_rate": 8.719431589388026e-08, "loss": 1.0466, "step": 6418 }, { "epoch": 0.8701958923608758, "grad_norm": 5.5016722066761, "learning_rate": 8.701506255654411e-08, "loss": 1.109, "step": 6419 }, { "epoch": 0.8703314580085406, "grad_norm": 5.803037365238376, "learning_rate": 8.683598528114644e-08, "loss": 1.0816, "step": 6420 }, { "epoch": 0.8704670236562055, "grad_norm": 5.287107086594329, "learning_rate": 8.665708410222095e-08, "loss": 1.0934, "step": 6421 }, { "epoch": 0.8706025893038704, "grad_norm": 6.626580252587827, "learning_rate": 8.647835905426726e-08, "loss": 1.0892, "step": 6422 }, { "epoch": 0.8707381549515353, "grad_norm": 6.3219824184769555, "learning_rate": 8.629981017175136e-08, "loss": 1.1092, "step": 6423 }, { "epoch": 0.8708737205992002, "grad_norm": 6.32447224378881, "learning_rate": 8.61214374891045e-08, "loss": 1.131, "step": 6424 }, { "epoch": 0.871009286246865, "grad_norm": 4.431712743206938, "learning_rate": 8.59432410407248e-08, "loss": 1.134, "step": 6425 }, { "epoch": 0.87114485189453, "grad_norm": 6.390900099911164, "learning_rate": 8.576522086097593e-08, "loss": 1.0867, "step": 6426 }, { "epoch": 0.8712804175421948, "grad_norm": 7.824733201598443, "learning_rate": 8.55873769841876e-08, "loss": 1.0754, "step": 6427 }, { "epoch": 0.8714159831898597, "grad_norm": 3.8854430737126884, "learning_rate": 8.540970944465575e-08, "loss": 1.1011, "step": 6428 }, { "epoch": 0.8715515488375246, "grad_norm": 4.955298074938619, "learning_rate": 8.523221827664206e-08, "loss": 1.0933, "step": 6429 }, { "epoch": 0.8716871144851894, "grad_norm": 5.605723346436638, "learning_rate": 8.505490351437438e-08, "loss": 1.083, "step": 6430 }, { "epoch": 0.8718226801328544, "grad_norm": 10.23103308919381, "learning_rate": 8.487776519204637e-08, "loss": 1.0163, "step": 6431 }, { "epoch": 0.8719582457805192, "grad_norm": 5.448141718940039, "learning_rate": 8.470080334381791e-08, "loss": 1.084, "step": 6432 }, { "epoch": 0.8720938114281841, "grad_norm": 6.2985713739483415, "learning_rate": 8.452401800381448e-08, "loss": 1.0982, "step": 6433 }, { "epoch": 0.872229377075849, "grad_norm": 4.368583551527707, "learning_rate": 8.434740920612792e-08, "loss": 1.0502, "step": 6434 }, { "epoch": 0.8723649427235138, "grad_norm": 4.596543807790981, "learning_rate": 8.417097698481568e-08, "loss": 1.0918, "step": 6435 }, { "epoch": 0.8725005083711788, "grad_norm": 18.68480006627179, "learning_rate": 8.399472137390152e-08, "loss": 1.0828, "step": 6436 }, { "epoch": 0.8726360740188436, "grad_norm": 3.8715097559626757, "learning_rate": 8.38186424073748e-08, "loss": 1.0915, "step": 6437 }, { "epoch": 0.8727716396665085, "grad_norm": 4.258646113632195, "learning_rate": 8.364274011919114e-08, "loss": 1.1049, "step": 6438 }, { "epoch": 0.8729072053141734, "grad_norm": 5.086487814455862, "learning_rate": 8.346701454327143e-08, "loss": 1.1378, "step": 6439 }, { "epoch": 0.8730427709618382, "grad_norm": 5.806287669622855, "learning_rate": 8.329146571350365e-08, "loss": 1.0797, "step": 6440 }, { "epoch": 0.8731783366095032, "grad_norm": 3.9584161684321186, "learning_rate": 8.311609366374028e-08, "loss": 1.1282, "step": 6441 }, { "epoch": 0.873313902257168, "grad_norm": 3.791005750203046, "learning_rate": 8.294089842780117e-08, "loss": 1.0894, "step": 6442 }, { "epoch": 0.8734494679048329, "grad_norm": 5.177876766742116, "learning_rate": 8.27658800394706e-08, "loss": 1.0496, "step": 6443 }, { "epoch": 0.8735850335524978, "grad_norm": 4.580107854754112, "learning_rate": 8.259103853250027e-08, "loss": 1.0794, "step": 6444 }, { "epoch": 0.8737205992001627, "grad_norm": 4.693690729526821, "learning_rate": 8.241637394060619e-08, "loss": 1.0796, "step": 6445 }, { "epoch": 0.8738561648478276, "grad_norm": 5.082823877220271, "learning_rate": 8.224188629747175e-08, "loss": 1.0475, "step": 6446 }, { "epoch": 0.8739917304954924, "grad_norm": 4.99962691510773, "learning_rate": 8.206757563674493e-08, "loss": 1.1052, "step": 6447 }, { "epoch": 0.8741272961431573, "grad_norm": 4.971065023035394, "learning_rate": 8.189344199204073e-08, "loss": 1.1284, "step": 6448 }, { "epoch": 0.8742628617908222, "grad_norm": 10.761860539979555, "learning_rate": 8.171948539693874e-08, "loss": 1.0966, "step": 6449 }, { "epoch": 0.8743984274384871, "grad_norm": 5.997924269813314, "learning_rate": 8.154570588498599e-08, "loss": 1.1003, "step": 6450 }, { "epoch": 0.874533993086152, "grad_norm": 4.7542627861339835, "learning_rate": 8.13721034896938e-08, "loss": 1.0742, "step": 6451 }, { "epoch": 0.8746695587338168, "grad_norm": 4.8738111956110135, "learning_rate": 8.119867824454018e-08, "loss": 1.1167, "step": 6452 }, { "epoch": 0.8748051243814817, "grad_norm": 4.7184668412483255, "learning_rate": 8.102543018296892e-08, "loss": 1.1121, "step": 6453 }, { "epoch": 0.8749406900291467, "grad_norm": 4.577547022486366, "learning_rate": 8.085235933838952e-08, "loss": 1.0548, "step": 6454 }, { "epoch": 0.8750762556768115, "grad_norm": 7.403114147676685, "learning_rate": 8.067946574417739e-08, "loss": 1.0511, "step": 6455 }, { "epoch": 0.8752118213244764, "grad_norm": 4.038750558238049, "learning_rate": 8.050674943367352e-08, "loss": 1.1259, "step": 6456 }, { "epoch": 0.8753473869721412, "grad_norm": 9.417093344219337, "learning_rate": 8.033421044018496e-08, "loss": 1.1086, "step": 6457 }, { "epoch": 0.8754829526198061, "grad_norm": 4.87090628507725, "learning_rate": 8.016184879698462e-08, "loss": 1.0969, "step": 6458 }, { "epoch": 0.8756185182674711, "grad_norm": 4.5132284161998575, "learning_rate": 7.998966453731093e-08, "loss": 1.0954, "step": 6459 }, { "epoch": 0.8757540839151359, "grad_norm": 4.4978118390467605, "learning_rate": 7.981765769436833e-08, "loss": 1.0921, "step": 6460 }, { "epoch": 0.8758896495628008, "grad_norm": 5.575279163804891, "learning_rate": 7.964582830132704e-08, "loss": 1.1018, "step": 6461 }, { "epoch": 0.8760252152104656, "grad_norm": 4.48255586767327, "learning_rate": 7.94741763913227e-08, "loss": 1.1263, "step": 6462 }, { "epoch": 0.8761607808581305, "grad_norm": 5.553939788312305, "learning_rate": 7.930270199745748e-08, "loss": 1.1023, "step": 6463 }, { "epoch": 0.8762963465057955, "grad_norm": 4.633257803460365, "learning_rate": 7.913140515279837e-08, "loss": 1.0724, "step": 6464 }, { "epoch": 0.8764319121534603, "grad_norm": 3.888314005729507, "learning_rate": 7.896028589037929e-08, "loss": 1.1111, "step": 6465 }, { "epoch": 0.8765674778011252, "grad_norm": 17.97237744657706, "learning_rate": 7.87893442431985e-08, "loss": 1.1147, "step": 6466 }, { "epoch": 0.8767030434487901, "grad_norm": 6.921345298715674, "learning_rate": 7.86185802442212e-08, "loss": 1.0904, "step": 6467 }, { "epoch": 0.876838609096455, "grad_norm": 6.229295675422679, "learning_rate": 7.844799392637769e-08, "loss": 1.1292, "step": 6468 }, { "epoch": 0.8769741747441199, "grad_norm": 8.861654788109814, "learning_rate": 7.827758532256435e-08, "loss": 1.0752, "step": 6469 }, { "epoch": 0.8771097403917847, "grad_norm": 10.951515611494509, "learning_rate": 7.810735446564298e-08, "loss": 1.1091, "step": 6470 }, { "epoch": 0.8772453060394496, "grad_norm": 14.04383231871192, "learning_rate": 7.793730138844134e-08, "loss": 1.1127, "step": 6471 }, { "epoch": 0.8773808716871145, "grad_norm": 4.696999863738393, "learning_rate": 7.776742612375275e-08, "loss": 1.1009, "step": 6472 }, { "epoch": 0.8775164373347794, "grad_norm": 9.27328985274974, "learning_rate": 7.759772870433645e-08, "loss": 1.0773, "step": 6473 }, { "epoch": 0.8776520029824443, "grad_norm": 4.821766885224153, "learning_rate": 7.742820916291714e-08, "loss": 1.125, "step": 6474 }, { "epoch": 0.8777875686301091, "grad_norm": 4.385278338266494, "learning_rate": 7.725886753218536e-08, "loss": 1.0939, "step": 6475 }, { "epoch": 0.877923134277774, "grad_norm": 4.477212459047862, "learning_rate": 7.708970384479729e-08, "loss": 1.0552, "step": 6476 }, { "epoch": 0.878058699925439, "grad_norm": 6.9767284609962035, "learning_rate": 7.692071813337487e-08, "loss": 1.1317, "step": 6477 }, { "epoch": 0.8781942655731038, "grad_norm": 3.8948842102666523, "learning_rate": 7.675191043050556e-08, "loss": 1.0965, "step": 6478 }, { "epoch": 0.8783298312207687, "grad_norm": 9.784585220295615, "learning_rate": 7.658328076874287e-08, "loss": 1.0486, "step": 6479 }, { "epoch": 0.8784653968684335, "grad_norm": 7.55603228687425, "learning_rate": 7.641482918060504e-08, "loss": 1.1348, "step": 6480 }, { "epoch": 0.8786009625160984, "grad_norm": 3.9205864589131525, "learning_rate": 7.624655569857751e-08, "loss": 1.0631, "step": 6481 }, { "epoch": 0.8787365281637634, "grad_norm": 4.477793227311399, "learning_rate": 7.607846035510957e-08, "loss": 1.1224, "step": 6482 }, { "epoch": 0.8788720938114282, "grad_norm": 4.61242396914188, "learning_rate": 7.591054318261802e-08, "loss": 1.1397, "step": 6483 }, { "epoch": 0.8790076594590931, "grad_norm": 5.079631004323811, "learning_rate": 7.574280421348356e-08, "loss": 1.1198, "step": 6484 }, { "epoch": 0.8791432251067579, "grad_norm": 5.442072715833831, "learning_rate": 7.557524348005395e-08, "loss": 1.0635, "step": 6485 }, { "epoch": 0.8792787907544228, "grad_norm": 5.085058348783063, "learning_rate": 7.540786101464136e-08, "loss": 1.1122, "step": 6486 }, { "epoch": 0.8794143564020878, "grad_norm": 4.204623655084886, "learning_rate": 7.524065684952475e-08, "loss": 1.1114, "step": 6487 }, { "epoch": 0.8795499220497526, "grad_norm": 6.987751992450683, "learning_rate": 7.507363101694775e-08, "loss": 1.0952, "step": 6488 }, { "epoch": 0.8796854876974175, "grad_norm": 8.095532697558003, "learning_rate": 7.490678354912006e-08, "loss": 1.1347, "step": 6489 }, { "epoch": 0.8798210533450823, "grad_norm": 8.57719068740785, "learning_rate": 7.474011447821704e-08, "loss": 1.0885, "step": 6490 }, { "epoch": 0.8799566189927472, "grad_norm": 5.779501598566245, "learning_rate": 7.457362383637922e-08, "loss": 1.0704, "step": 6491 }, { "epoch": 0.8800921846404122, "grad_norm": 4.160964488188582, "learning_rate": 7.440731165571323e-08, "loss": 1.0864, "step": 6492 }, { "epoch": 0.880227750288077, "grad_norm": 4.408302294311316, "learning_rate": 7.42411779682911e-08, "loss": 1.0993, "step": 6493 }, { "epoch": 0.8803633159357419, "grad_norm": 4.398985495165272, "learning_rate": 7.407522280615019e-08, "loss": 1.0974, "step": 6494 }, { "epoch": 0.8804988815834067, "grad_norm": 5.679809764908166, "learning_rate": 7.39094462012938e-08, "loss": 1.1611, "step": 6495 }, { "epoch": 0.8806344472310716, "grad_norm": 13.498644709700931, "learning_rate": 7.374384818569069e-08, "loss": 1.0996, "step": 6496 }, { "epoch": 0.8807700128787366, "grad_norm": 4.7759887011267725, "learning_rate": 7.357842879127474e-08, "loss": 1.1134, "step": 6497 }, { "epoch": 0.8809055785264014, "grad_norm": 7.498207386567431, "learning_rate": 7.341318804994645e-08, "loss": 1.1213, "step": 6498 }, { "epoch": 0.8810411441740663, "grad_norm": 6.178641384085988, "learning_rate": 7.324812599357044e-08, "loss": 1.0774, "step": 6499 }, { "epoch": 0.8811767098217311, "grad_norm": 17.932575553302193, "learning_rate": 7.308324265397836e-08, "loss": 1.1033, "step": 6500 }, { "epoch": 0.8813122754693961, "grad_norm": 4.111296076775313, "learning_rate": 7.291853806296599e-08, "loss": 1.0985, "step": 6501 }, { "epoch": 0.881447841117061, "grad_norm": 5.106819116879665, "learning_rate": 7.275401225229583e-08, "loss": 1.0942, "step": 6502 }, { "epoch": 0.8815834067647258, "grad_norm": 10.691487579889431, "learning_rate": 7.258966525369492e-08, "loss": 1.1068, "step": 6503 }, { "epoch": 0.8817189724123907, "grad_norm": 4.8756928708018625, "learning_rate": 7.242549709885693e-08, "loss": 1.0546, "step": 6504 }, { "epoch": 0.8818545380600555, "grad_norm": 17.571979305326796, "learning_rate": 7.226150781943963e-08, "loss": 1.0717, "step": 6505 }, { "epoch": 0.8819901037077205, "grad_norm": 4.085741692051158, "learning_rate": 7.209769744706772e-08, "loss": 1.0622, "step": 6506 }, { "epoch": 0.8821256693553854, "grad_norm": 16.19598284258792, "learning_rate": 7.193406601333018e-08, "loss": 1.1018, "step": 6507 }, { "epoch": 0.8822612350030502, "grad_norm": 4.872767866421358, "learning_rate": 7.177061354978242e-08, "loss": 1.0979, "step": 6508 }, { "epoch": 0.8823968006507151, "grad_norm": 5.00195324319457, "learning_rate": 7.160734008794489e-08, "loss": 1.0939, "step": 6509 }, { "epoch": 0.8825323662983799, "grad_norm": 5.589996654742368, "learning_rate": 7.144424565930341e-08, "loss": 1.0627, "step": 6510 }, { "epoch": 0.8826679319460449, "grad_norm": 4.403062655675216, "learning_rate": 7.128133029530969e-08, "loss": 1.0669, "step": 6511 }, { "epoch": 0.8828034975937098, "grad_norm": 8.156424063091473, "learning_rate": 7.111859402738052e-08, "loss": 1.1016, "step": 6512 }, { "epoch": 0.8829390632413746, "grad_norm": 5.505530779178914, "learning_rate": 7.095603688689833e-08, "loss": 1.0761, "step": 6513 }, { "epoch": 0.8830746288890395, "grad_norm": 6.098383584222341, "learning_rate": 7.079365890521106e-08, "loss": 1.0664, "step": 6514 }, { "epoch": 0.8832101945367044, "grad_norm": 4.646302908188673, "learning_rate": 7.063146011363186e-08, "loss": 1.0746, "step": 6515 }, { "epoch": 0.8833457601843693, "grad_norm": 4.7513071317015445, "learning_rate": 7.046944054343961e-08, "loss": 1.1133, "step": 6516 }, { "epoch": 0.8834813258320342, "grad_norm": 5.367788488214851, "learning_rate": 7.030760022587856e-08, "loss": 1.1077, "step": 6517 }, { "epoch": 0.883616891479699, "grad_norm": 9.508857626244632, "learning_rate": 7.014593919215816e-08, "loss": 1.0884, "step": 6518 }, { "epoch": 0.8837524571273639, "grad_norm": 11.351748936443347, "learning_rate": 6.998445747345371e-08, "loss": 1.0913, "step": 6519 }, { "epoch": 0.8838880227750288, "grad_norm": 3.886839997197105, "learning_rate": 6.982315510090542e-08, "loss": 1.1104, "step": 6520 }, { "epoch": 0.8840235884226937, "grad_norm": 6.689142345927097, "learning_rate": 6.966203210561927e-08, "loss": 1.0784, "step": 6521 }, { "epoch": 0.8841591540703586, "grad_norm": 8.068348868615416, "learning_rate": 6.950108851866687e-08, "loss": 1.0716, "step": 6522 }, { "epoch": 0.8842947197180234, "grad_norm": 6.9315078175122835, "learning_rate": 6.934032437108439e-08, "loss": 1.1065, "step": 6523 }, { "epoch": 0.8844302853656884, "grad_norm": 9.491515241278876, "learning_rate": 6.917973969387424e-08, "loss": 1.1005, "step": 6524 }, { "epoch": 0.8845658510133532, "grad_norm": 6.115540113507207, "learning_rate": 6.901933451800379e-08, "loss": 1.1099, "step": 6525 }, { "epoch": 0.8847014166610181, "grad_norm": 17.732323863709713, "learning_rate": 6.885910887440593e-08, "loss": 1.075, "step": 6526 }, { "epoch": 0.884836982308683, "grad_norm": 4.875077259829453, "learning_rate": 6.869906279397897e-08, "loss": 1.1097, "step": 6527 }, { "epoch": 0.8849725479563478, "grad_norm": 5.090940899553093, "learning_rate": 6.853919630758653e-08, "loss": 1.1132, "step": 6528 }, { "epoch": 0.8851081136040128, "grad_norm": 2.962258131101913, "learning_rate": 6.837950944605763e-08, "loss": 1.0629, "step": 6529 }, { "epoch": 0.8852436792516776, "grad_norm": 4.042065260675917, "learning_rate": 6.822000224018653e-08, "loss": 1.074, "step": 6530 }, { "epoch": 0.8853792448993425, "grad_norm": 4.526803627945666, "learning_rate": 6.806067472073296e-08, "loss": 1.1075, "step": 6531 }, { "epoch": 0.8855148105470074, "grad_norm": 4.935043516710161, "learning_rate": 6.790152691842199e-08, "loss": 1.1131, "step": 6532 }, { "epoch": 0.8856503761946722, "grad_norm": 3.631441616707496, "learning_rate": 6.774255886394397e-08, "loss": 1.0701, "step": 6533 }, { "epoch": 0.8857859418423372, "grad_norm": 4.682693230668686, "learning_rate": 6.758377058795473e-08, "loss": 1.0749, "step": 6534 }, { "epoch": 0.885921507490002, "grad_norm": 6.09629834845644, "learning_rate": 6.742516212107541e-08, "loss": 1.12, "step": 6535 }, { "epoch": 0.8860570731376669, "grad_norm": 5.204885987297203, "learning_rate": 6.726673349389201e-08, "loss": 1.1025, "step": 6536 }, { "epoch": 0.8861926387853318, "grad_norm": 11.207128670263439, "learning_rate": 6.710848473695674e-08, "loss": 1.0808, "step": 6537 }, { "epoch": 0.8863282044329966, "grad_norm": 4.435449283110232, "learning_rate": 6.69504158807862e-08, "loss": 1.0635, "step": 6538 }, { "epoch": 0.8864637700806616, "grad_norm": 4.700854266847669, "learning_rate": 6.679252695586312e-08, "loss": 1.1132, "step": 6539 }, { "epoch": 0.8865993357283264, "grad_norm": 4.669535413545177, "learning_rate": 6.663481799263471e-08, "loss": 1.0423, "step": 6540 }, { "epoch": 0.8867349013759913, "grad_norm": 9.382007706002026, "learning_rate": 6.647728902151428e-08, "loss": 1.0788, "step": 6541 }, { "epoch": 0.8868704670236562, "grad_norm": 4.777989691190316, "learning_rate": 6.631994007287966e-08, "loss": 1.0983, "step": 6542 }, { "epoch": 0.887006032671321, "grad_norm": 3.412396395376187, "learning_rate": 6.616277117707492e-08, "loss": 1.0935, "step": 6543 }, { "epoch": 0.887141598318986, "grad_norm": 4.668818687326266, "learning_rate": 6.600578236440812e-08, "loss": 1.1803, "step": 6544 }, { "epoch": 0.8872771639666509, "grad_norm": 4.469413314823168, "learning_rate": 6.584897366515407e-08, "loss": 1.0778, "step": 6545 }, { "epoch": 0.8874127296143157, "grad_norm": 7.1510368209410275, "learning_rate": 6.569234510955135e-08, "loss": 1.098, "step": 6546 }, { "epoch": 0.8875482952619806, "grad_norm": 3.9592849385962, "learning_rate": 6.553589672780524e-08, "loss": 1.0901, "step": 6547 }, { "epoch": 0.8876838609096455, "grad_norm": 4.264817914064028, "learning_rate": 6.537962855008483e-08, "loss": 1.0781, "step": 6548 }, { "epoch": 0.8878194265573104, "grad_norm": 3.9864427745682454, "learning_rate": 6.522354060652602e-08, "loss": 1.1047, "step": 6549 }, { "epoch": 0.8879549922049753, "grad_norm": 5.448552917201004, "learning_rate": 6.50676329272285e-08, "loss": 1.0626, "step": 6550 }, { "epoch": 0.8880905578526401, "grad_norm": 6.310192873997657, "learning_rate": 6.491190554225811e-08, "loss": 1.0423, "step": 6551 }, { "epoch": 0.888226123500305, "grad_norm": 5.9389636388931075, "learning_rate": 6.475635848164562e-08, "loss": 1.0913, "step": 6552 }, { "epoch": 0.8883616891479699, "grad_norm": 4.640377142321369, "learning_rate": 6.460099177538703e-08, "loss": 1.0893, "step": 6553 }, { "epoch": 0.8884972547956348, "grad_norm": 4.804963121835348, "learning_rate": 6.444580545344358e-08, "loss": 1.0793, "step": 6554 }, { "epoch": 0.8886328204432997, "grad_norm": 5.042860580028391, "learning_rate": 6.429079954574168e-08, "loss": 1.0949, "step": 6555 }, { "epoch": 0.8887683860909645, "grad_norm": 7.105817129890064, "learning_rate": 6.413597408217309e-08, "loss": 1.1009, "step": 6556 }, { "epoch": 0.8889039517386295, "grad_norm": 8.376014672120077, "learning_rate": 6.398132909259457e-08, "loss": 1.0484, "step": 6557 }, { "epoch": 0.8890395173862943, "grad_norm": 3.546417386105895, "learning_rate": 6.382686460682851e-08, "loss": 1.0952, "step": 6558 }, { "epoch": 0.8891750830339592, "grad_norm": 4.961556785159372, "learning_rate": 6.367258065466152e-08, "loss": 1.0783, "step": 6559 }, { "epoch": 0.8893106486816241, "grad_norm": 7.125107942657434, "learning_rate": 6.35184772658468e-08, "loss": 1.0691, "step": 6560 }, { "epoch": 0.8894462143292889, "grad_norm": 6.877830844753448, "learning_rate": 6.336455447010126e-08, "loss": 1.1124, "step": 6561 }, { "epoch": 0.8895817799769539, "grad_norm": 4.046659588852112, "learning_rate": 6.321081229710834e-08, "loss": 1.0997, "step": 6562 }, { "epoch": 0.8897173456246187, "grad_norm": 7.554719174896079, "learning_rate": 6.305725077651558e-08, "loss": 1.1112, "step": 6563 }, { "epoch": 0.8898529112722836, "grad_norm": 4.248406448623946, "learning_rate": 6.290386993793617e-08, "loss": 1.0715, "step": 6564 }, { "epoch": 0.8899884769199485, "grad_norm": 7.298447831826002, "learning_rate": 6.275066981094857e-08, "loss": 1.0697, "step": 6565 }, { "epoch": 0.8901240425676133, "grad_norm": 6.892939267786237, "learning_rate": 6.259765042509602e-08, "loss": 1.0907, "step": 6566 }, { "epoch": 0.8902596082152783, "grad_norm": 4.7452979735769345, "learning_rate": 6.244481180988714e-08, "loss": 1.0711, "step": 6567 }, { "epoch": 0.8903951738629431, "grad_norm": 4.578676753400205, "learning_rate": 6.229215399479582e-08, "loss": 1.1227, "step": 6568 }, { "epoch": 0.890530739510608, "grad_norm": 4.529256228133703, "learning_rate": 6.213967700926071e-08, "loss": 1.0738, "step": 6569 }, { "epoch": 0.8906663051582729, "grad_norm": 4.689434636106902, "learning_rate": 6.198738088268585e-08, "loss": 1.0974, "step": 6570 }, { "epoch": 0.8908018708059378, "grad_norm": 4.295870536024485, "learning_rate": 6.183526564444042e-08, "loss": 1.1167, "step": 6571 }, { "epoch": 0.8909374364536027, "grad_norm": 9.875330838880384, "learning_rate": 6.16833313238585e-08, "loss": 1.0841, "step": 6572 }, { "epoch": 0.8910730021012675, "grad_norm": 10.869555196468712, "learning_rate": 6.153157795023956e-08, "loss": 1.1324, "step": 6573 }, { "epoch": 0.8912085677489324, "grad_norm": 3.9898039807391825, "learning_rate": 6.138000555284806e-08, "loss": 1.0764, "step": 6574 }, { "epoch": 0.8913441333965973, "grad_norm": 7.020813730427769, "learning_rate": 6.12286141609134e-08, "loss": 1.0745, "step": 6575 }, { "epoch": 0.8914796990442622, "grad_norm": 16.647660374469076, "learning_rate": 6.107740380363036e-08, "loss": 1.0984, "step": 6576 }, { "epoch": 0.8916152646919271, "grad_norm": 5.04773353465022, "learning_rate": 6.092637451015847e-08, "loss": 1.1568, "step": 6577 }, { "epoch": 0.8917508303395919, "grad_norm": 8.274720453501214, "learning_rate": 6.07755263096229e-08, "loss": 1.0908, "step": 6578 }, { "epoch": 0.8918863959872568, "grad_norm": 4.749947524995323, "learning_rate": 6.062485923111293e-08, "loss": 1.0889, "step": 6579 }, { "epoch": 0.8920219616349218, "grad_norm": 4.740145291585795, "learning_rate": 6.047437330368421e-08, "loss": 1.1153, "step": 6580 }, { "epoch": 0.8921575272825866, "grad_norm": 10.4430936089392, "learning_rate": 6.032406855635619e-08, "loss": 1.1528, "step": 6581 }, { "epoch": 0.8922930929302515, "grad_norm": 4.745236855573327, "learning_rate": 6.017394501811445e-08, "loss": 1.1284, "step": 6582 }, { "epoch": 0.8924286585779163, "grad_norm": 13.403330147459824, "learning_rate": 6.002400271790864e-08, "loss": 1.0432, "step": 6583 }, { "epoch": 0.8925642242255812, "grad_norm": 4.6691028233404825, "learning_rate": 5.987424168465439e-08, "loss": 1.1204, "step": 6584 }, { "epoch": 0.8926997898732462, "grad_norm": 9.711371435726967, "learning_rate": 5.972466194723159e-08, "loss": 1.1046, "step": 6585 }, { "epoch": 0.892835355520911, "grad_norm": 6.444266421798227, "learning_rate": 5.957526353448572e-08, "loss": 1.1253, "step": 6586 }, { "epoch": 0.8929709211685759, "grad_norm": 4.338226097450068, "learning_rate": 5.9426046475226975e-08, "loss": 1.1063, "step": 6587 }, { "epoch": 0.8931064868162407, "grad_norm": 5.4274713269567, "learning_rate": 5.9277010798230666e-08, "loss": 1.1041, "step": 6588 }, { "epoch": 0.8932420524639056, "grad_norm": 4.063764024903415, "learning_rate": 5.912815653223724e-08, "loss": 1.0364, "step": 6589 }, { "epoch": 0.8933776181115706, "grad_norm": 4.2192468124067055, "learning_rate": 5.897948370595207e-08, "loss": 1.0791, "step": 6590 }, { "epoch": 0.8935131837592354, "grad_norm": 5.110357897595751, "learning_rate": 5.8830992348045563e-08, "loss": 1.1043, "step": 6591 }, { "epoch": 0.8936487494069003, "grad_norm": 6.102307148844779, "learning_rate": 5.8682682487152915e-08, "loss": 1.0658, "step": 6592 }, { "epoch": 0.8937843150545651, "grad_norm": 7.367416527251729, "learning_rate": 5.8534554151874805e-08, "loss": 1.087, "step": 6593 }, { "epoch": 0.89391988070223, "grad_norm": 3.780779935622281, "learning_rate": 5.8386607370776274e-08, "loss": 1.083, "step": 6594 }, { "epoch": 0.894055446349895, "grad_norm": 4.6321203388590595, "learning_rate": 5.823884217238817e-08, "loss": 1.0651, "step": 6595 }, { "epoch": 0.8941910119975598, "grad_norm": 8.045961751018192, "learning_rate": 5.809125858520514e-08, "loss": 1.0629, "step": 6596 }, { "epoch": 0.8943265776452247, "grad_norm": 3.770821238662544, "learning_rate": 5.794385663768819e-08, "loss": 1.1011, "step": 6597 }, { "epoch": 0.8944621432928895, "grad_norm": 4.334426192164891, "learning_rate": 5.7796636358262155e-08, "loss": 1.0821, "step": 6598 }, { "epoch": 0.8945977089405545, "grad_norm": 7.397987959466974, "learning_rate": 5.764959777531775e-08, "loss": 1.0999, "step": 6599 }, { "epoch": 0.8947332745882194, "grad_norm": 5.317708786491681, "learning_rate": 5.750274091720964e-08, "loss": 1.1025, "step": 6600 }, { "epoch": 0.8948688402358842, "grad_norm": 4.5837626042353214, "learning_rate": 5.7356065812258604e-08, "loss": 1.0848, "step": 6601 }, { "epoch": 0.8950044058835491, "grad_norm": 4.996004309064138, "learning_rate": 5.720957248874925e-08, "loss": 1.0925, "step": 6602 }, { "epoch": 0.8951399715312139, "grad_norm": 13.807180584047833, "learning_rate": 5.706326097493219e-08, "loss": 1.0704, "step": 6603 }, { "epoch": 0.8952755371788789, "grad_norm": 6.044336476874707, "learning_rate": 5.691713129902187e-08, "loss": 1.0991, "step": 6604 }, { "epoch": 0.8954111028265438, "grad_norm": 4.593439171663866, "learning_rate": 5.677118348919874e-08, "loss": 1.094, "step": 6605 }, { "epoch": 0.8955466684742086, "grad_norm": 5.11841245243035, "learning_rate": 5.662541757360739e-08, "loss": 1.1029, "step": 6606 }, { "epoch": 0.8956822341218735, "grad_norm": 5.184832479475487, "learning_rate": 5.6479833580357796e-08, "loss": 1.0845, "step": 6607 }, { "epoch": 0.8958177997695383, "grad_norm": 5.683519294008223, "learning_rate": 5.633443153752448e-08, "loss": 1.0659, "step": 6608 }, { "epoch": 0.8959533654172033, "grad_norm": 4.2332209166913914, "learning_rate": 5.6189211473147256e-08, "loss": 1.0871, "step": 6609 }, { "epoch": 0.8960889310648682, "grad_norm": 7.9409057399778264, "learning_rate": 5.60441734152306e-08, "loss": 1.1054, "step": 6610 }, { "epoch": 0.896224496712533, "grad_norm": 5.191553668009181, "learning_rate": 5.5899317391744025e-08, "loss": 1.1035, "step": 6611 }, { "epoch": 0.8963600623601979, "grad_norm": 7.205725462115379, "learning_rate": 5.575464343062175e-08, "loss": 1.1187, "step": 6612 }, { "epoch": 0.8964956280078628, "grad_norm": 5.571062619093133, "learning_rate": 5.561015155976312e-08, "loss": 1.1096, "step": 6613 }, { "epoch": 0.8966311936555277, "grad_norm": 4.887313537611279, "learning_rate": 5.546584180703207e-08, "loss": 1.106, "step": 6614 }, { "epoch": 0.8967667593031926, "grad_norm": 4.033163821167741, "learning_rate": 5.5321714200257884e-08, "loss": 1.0704, "step": 6615 }, { "epoch": 0.8969023249508574, "grad_norm": 6.9582948334107595, "learning_rate": 5.5177768767234236e-08, "loss": 1.0677, "step": 6616 }, { "epoch": 0.8970378905985223, "grad_norm": 4.6334222763788, "learning_rate": 5.50340055357198e-08, "loss": 1.094, "step": 6617 }, { "epoch": 0.8971734562461872, "grad_norm": 6.095413807580434, "learning_rate": 5.4890424533438394e-08, "loss": 1.0923, "step": 6618 }, { "epoch": 0.8973090218938521, "grad_norm": 5.81644220195943, "learning_rate": 5.4747025788078546e-08, "loss": 1.122, "step": 6619 }, { "epoch": 0.897444587541517, "grad_norm": 4.8959433413558635, "learning_rate": 5.460380932729303e-08, "loss": 1.1003, "step": 6620 }, { "epoch": 0.8975801531891818, "grad_norm": 4.364450190742219, "learning_rate": 5.4460775178700736e-08, "loss": 1.0602, "step": 6621 }, { "epoch": 0.8977157188368468, "grad_norm": 4.543586679795936, "learning_rate": 5.431792336988417e-08, "loss": 1.0789, "step": 6622 }, { "epoch": 0.8978512844845117, "grad_norm": 5.5222728795394564, "learning_rate": 5.417525392839129e-08, "loss": 1.1184, "step": 6623 }, { "epoch": 0.8979868501321765, "grad_norm": 5.504418751542574, "learning_rate": 5.4032766881734745e-08, "loss": 1.072, "step": 6624 }, { "epoch": 0.8981224157798414, "grad_norm": 6.8602996253608515, "learning_rate": 5.3890462257392246e-08, "loss": 1.0906, "step": 6625 }, { "epoch": 0.8982579814275062, "grad_norm": 13.218149217971474, "learning_rate": 5.3748340082805824e-08, "loss": 1.0928, "step": 6626 }, { "epoch": 0.8983935470751712, "grad_norm": 5.132436325412049, "learning_rate": 5.360640038538278e-08, "loss": 1.0779, "step": 6627 }, { "epoch": 0.8985291127228361, "grad_norm": 4.9567802010516795, "learning_rate": 5.3464643192495104e-08, "loss": 1.0931, "step": 6628 }, { "epoch": 0.8986646783705009, "grad_norm": 6.9319341502412675, "learning_rate": 5.33230685314795e-08, "loss": 1.0727, "step": 6629 }, { "epoch": 0.8988002440181658, "grad_norm": 8.665559016928388, "learning_rate": 5.3181676429637447e-08, "loss": 1.0654, "step": 6630 }, { "epoch": 0.8989358096658306, "grad_norm": 5.470315488194879, "learning_rate": 5.304046691423536e-08, "loss": 1.0931, "step": 6631 }, { "epoch": 0.8990713753134956, "grad_norm": 5.967424096368395, "learning_rate": 5.289944001250446e-08, "loss": 1.115, "step": 6632 }, { "epoch": 0.8992069409611605, "grad_norm": 4.176191331615289, "learning_rate": 5.275859575164054e-08, "loss": 1.135, "step": 6633 }, { "epoch": 0.8993425066088253, "grad_norm": 5.443868965150337, "learning_rate": 5.2617934158804557e-08, "loss": 1.1134, "step": 6634 }, { "epoch": 0.8994780722564902, "grad_norm": 4.186038460586741, "learning_rate": 5.247745526112146e-08, "loss": 1.1135, "step": 6635 }, { "epoch": 0.899613637904155, "grad_norm": 4.216650683556953, "learning_rate": 5.233715908568215e-08, "loss": 1.0804, "step": 6636 }, { "epoch": 0.89974920355182, "grad_norm": 4.566230354721171, "learning_rate": 5.219704565954097e-08, "loss": 1.0897, "step": 6637 }, { "epoch": 0.8998847691994849, "grad_norm": 5.103893754151057, "learning_rate": 5.2057115009718434e-08, "loss": 1.1103, "step": 6638 }, { "epoch": 0.9000203348471497, "grad_norm": 4.797484235832523, "learning_rate": 5.191736716319828e-08, "loss": 1.1202, "step": 6639 }, { "epoch": 0.9001559004948146, "grad_norm": 7.2004630623211385, "learning_rate": 5.17778021469305e-08, "loss": 1.0854, "step": 6640 }, { "epoch": 0.9002914661424795, "grad_norm": 6.726243885827549, "learning_rate": 5.1638419987828365e-08, "loss": 1.1032, "step": 6641 }, { "epoch": 0.9004270317901444, "grad_norm": 8.191085623653066, "learning_rate": 5.149922071277146e-08, "loss": 1.049, "step": 6642 }, { "epoch": 0.9005625974378093, "grad_norm": 6.058096851887038, "learning_rate": 5.136020434860244e-08, "loss": 1.0645, "step": 6643 }, { "epoch": 0.9006981630854741, "grad_norm": 38.79998101183933, "learning_rate": 5.122137092213019e-08, "loss": 1.0889, "step": 6644 }, { "epoch": 0.900833728733139, "grad_norm": 4.963431572382984, "learning_rate": 5.108272046012718e-08, "loss": 1.1269, "step": 6645 }, { "epoch": 0.9009692943808039, "grad_norm": 6.010126231599855, "learning_rate": 5.094425298933136e-08, "loss": 1.1079, "step": 6646 }, { "epoch": 0.9011048600284688, "grad_norm": 4.505946383842033, "learning_rate": 5.080596853644492e-08, "loss": 1.0736, "step": 6647 }, { "epoch": 0.9012404256761337, "grad_norm": 4.9940162378995865, "learning_rate": 5.066786712813498e-08, "loss": 1.1272, "step": 6648 }, { "epoch": 0.9013759913237985, "grad_norm": 6.727583759932212, "learning_rate": 5.052994879103323e-08, "loss": 1.1015, "step": 6649 }, { "epoch": 0.9015115569714635, "grad_norm": 4.163698210505032, "learning_rate": 5.0392213551736176e-08, "loss": 1.0979, "step": 6650 }, { "epoch": 0.9016471226191283, "grad_norm": 20.76950378447314, "learning_rate": 5.0254661436805015e-08, "loss": 1.0839, "step": 6651 }, { "epoch": 0.9017826882667932, "grad_norm": 4.1223452437922505, "learning_rate": 5.0117292472765635e-08, "loss": 1.0888, "step": 6652 }, { "epoch": 0.9019182539144581, "grad_norm": 3.6370346649028704, "learning_rate": 4.9980106686108416e-08, "loss": 1.0939, "step": 6653 }, { "epoch": 0.9020538195621229, "grad_norm": 5.9522574453959844, "learning_rate": 4.9843104103288625e-08, "loss": 1.0717, "step": 6654 }, { "epoch": 0.9021893852097879, "grad_norm": 4.330816165425003, "learning_rate": 4.9706284750726135e-08, "loss": 1.0778, "step": 6655 }, { "epoch": 0.9023249508574527, "grad_norm": 4.282289072084844, "learning_rate": 4.956964865480551e-08, "loss": 1.0947, "step": 6656 }, { "epoch": 0.9024605165051176, "grad_norm": 4.825974098770866, "learning_rate": 4.9433195841875995e-08, "loss": 1.081, "step": 6657 }, { "epoch": 0.9025960821527825, "grad_norm": 8.378807162517658, "learning_rate": 4.9296926338251e-08, "loss": 1.0619, "step": 6658 }, { "epoch": 0.9027316478004473, "grad_norm": 5.3367730854192095, "learning_rate": 4.916084017020972e-08, "loss": 1.0849, "step": 6659 }, { "epoch": 0.9028672134481123, "grad_norm": 4.35593072649091, "learning_rate": 4.9024937363994714e-08, "loss": 1.1376, "step": 6660 }, { "epoch": 0.9030027790957771, "grad_norm": 4.815076701360549, "learning_rate": 4.888921794581424e-08, "loss": 1.0901, "step": 6661 }, { "epoch": 0.903138344743442, "grad_norm": 3.8742469370627335, "learning_rate": 4.875368194184026e-08, "loss": 1.0866, "step": 6662 }, { "epoch": 0.9032739103911069, "grad_norm": 4.533605118550536, "learning_rate": 4.8618329378210085e-08, "loss": 1.1318, "step": 6663 }, { "epoch": 0.9034094760387718, "grad_norm": 3.934332135516149, "learning_rate": 4.848316028102539e-08, "loss": 1.0943, "step": 6664 }, { "epoch": 0.9035450416864367, "grad_norm": 8.07354193447449, "learning_rate": 4.834817467635233e-08, "loss": 1.1122, "step": 6665 }, { "epoch": 0.9036806073341015, "grad_norm": 5.0699083497714055, "learning_rate": 4.821337259022196e-08, "loss": 1.0361, "step": 6666 }, { "epoch": 0.9038161729817664, "grad_norm": 4.3497985637216425, "learning_rate": 4.807875404862971e-08, "loss": 1.0989, "step": 6667 }, { "epoch": 0.9039517386294313, "grad_norm": 7.945078133760138, "learning_rate": 4.794431907753571e-08, "loss": 1.0891, "step": 6668 }, { "epoch": 0.9040873042770962, "grad_norm": 27.940998455880006, "learning_rate": 4.781006770286478e-08, "loss": 1.0831, "step": 6669 }, { "epoch": 0.9042228699247611, "grad_norm": 4.5178874621026255, "learning_rate": 4.767599995050609e-08, "loss": 1.1087, "step": 6670 }, { "epoch": 0.9043584355724259, "grad_norm": 5.309884512087542, "learning_rate": 4.7542115846313734e-08, "loss": 1.077, "step": 6671 }, { "epoch": 0.9044940012200908, "grad_norm": 6.178280177131304, "learning_rate": 4.740841541610596e-08, "loss": 1.0934, "step": 6672 }, { "epoch": 0.9046295668677558, "grad_norm": 5.905566833106348, "learning_rate": 4.727489868566603e-08, "loss": 1.1067, "step": 6673 }, { "epoch": 0.9047651325154206, "grad_norm": 6.804294001272577, "learning_rate": 4.714156568074157e-08, "loss": 1.1052, "step": 6674 }, { "epoch": 0.9049006981630855, "grad_norm": 6.060756351205914, "learning_rate": 4.700841642704478e-08, "loss": 1.0895, "step": 6675 }, { "epoch": 0.9050362638107503, "grad_norm": 4.194130377902384, "learning_rate": 4.687545095025225e-08, "loss": 1.0798, "step": 6676 }, { "epoch": 0.9051718294584152, "grad_norm": 4.04362904899519, "learning_rate": 4.6742669276005786e-08, "loss": 1.1166, "step": 6677 }, { "epoch": 0.9053073951060802, "grad_norm": 4.143362843403936, "learning_rate": 4.661007142991069e-08, "loss": 1.0746, "step": 6678 }, { "epoch": 0.905442960753745, "grad_norm": 6.522883340834855, "learning_rate": 4.6477657437537953e-08, "loss": 1.0577, "step": 6679 }, { "epoch": 0.9055785264014099, "grad_norm": 5.802976598245987, "learning_rate": 4.634542732442204e-08, "loss": 1.0845, "step": 6680 }, { "epoch": 0.9057140920490747, "grad_norm": 7.607703133111589, "learning_rate": 4.62133811160631e-08, "loss": 1.0683, "step": 6681 }, { "epoch": 0.9058496576967396, "grad_norm": 20.10799978725817, "learning_rate": 4.608151883792466e-08, "loss": 1.0552, "step": 6682 }, { "epoch": 0.9059852233444046, "grad_norm": 8.731559835931478, "learning_rate": 4.5949840515435715e-08, "loss": 1.123, "step": 6683 }, { "epoch": 0.9061207889920694, "grad_norm": 6.347873314396063, "learning_rate": 4.581834617398916e-08, "loss": 1.0747, "step": 6684 }, { "epoch": 0.9062563546397343, "grad_norm": 4.537636756864557, "learning_rate": 4.568703583894262e-08, "loss": 1.0704, "step": 6685 }, { "epoch": 0.9063919202873991, "grad_norm": 5.341872594721073, "learning_rate": 4.555590953561839e-08, "loss": 1.0909, "step": 6686 }, { "epoch": 0.906527485935064, "grad_norm": 6.54149917224897, "learning_rate": 4.542496728930301e-08, "loss": 1.0945, "step": 6687 }, { "epoch": 0.906663051582729, "grad_norm": 4.5850940406007785, "learning_rate": 4.529420912524773e-08, "loss": 1.0684, "step": 6688 }, { "epoch": 0.9067986172303938, "grad_norm": 5.247508503726573, "learning_rate": 4.516363506866827e-08, "loss": 1.0598, "step": 6689 }, { "epoch": 0.9069341828780587, "grad_norm": 4.400328825734223, "learning_rate": 4.503324514474483e-08, "loss": 1.1001, "step": 6690 }, { "epoch": 0.9070697485257235, "grad_norm": 5.602019919535275, "learning_rate": 4.4903039378621945e-08, "loss": 1.0729, "step": 6691 }, { "epoch": 0.9072053141733885, "grad_norm": 9.151481185181902, "learning_rate": 4.477301779540887e-08, "loss": 1.1043, "step": 6692 }, { "epoch": 0.9073408798210534, "grad_norm": 4.03637565335155, "learning_rate": 4.4643180420179113e-08, "loss": 1.0819, "step": 6693 }, { "epoch": 0.9074764454687182, "grad_norm": 4.644872490301521, "learning_rate": 4.451352727797109e-08, "loss": 1.077, "step": 6694 }, { "epoch": 0.9076120111163831, "grad_norm": 4.177726343033467, "learning_rate": 4.4384058393786895e-08, "loss": 1.0994, "step": 6695 }, { "epoch": 0.9077475767640479, "grad_norm": 6.26212559884093, "learning_rate": 4.425477379259424e-08, "loss": 1.0842, "step": 6696 }, { "epoch": 0.9078831424117129, "grad_norm": 11.29118602421191, "learning_rate": 4.412567349932384e-08, "loss": 1.101, "step": 6697 }, { "epoch": 0.9080187080593778, "grad_norm": 3.893031174507414, "learning_rate": 4.399675753887244e-08, "loss": 1.0952, "step": 6698 }, { "epoch": 0.9081542737070426, "grad_norm": 9.717000016210832, "learning_rate": 4.386802593609984e-08, "loss": 1.1112, "step": 6699 }, { "epoch": 0.9082898393547075, "grad_norm": 22.944146525657008, "learning_rate": 4.37394787158315e-08, "loss": 1.0725, "step": 6700 }, { "epoch": 0.9084254050023723, "grad_norm": 6.828260053640877, "learning_rate": 4.3611115902856044e-08, "loss": 1.0873, "step": 6701 }, { "epoch": 0.9085609706500373, "grad_norm": 10.030248019690703, "learning_rate": 4.3482937521928e-08, "loss": 1.1036, "step": 6702 }, { "epoch": 0.9086965362977022, "grad_norm": 3.9561315225827407, "learning_rate": 4.335494359776493e-08, "loss": 1.1092, "step": 6703 }, { "epoch": 0.908832101945367, "grad_norm": 4.561343590123654, "learning_rate": 4.322713415504975e-08, "loss": 1.0801, "step": 6704 }, { "epoch": 0.9089676675930319, "grad_norm": 7.14086482998081, "learning_rate": 4.3099509218429416e-08, "loss": 1.0531, "step": 6705 }, { "epoch": 0.9091032332406969, "grad_norm": 5.543365971648295, "learning_rate": 4.297206881251547e-08, "loss": 1.0916, "step": 6706 }, { "epoch": 0.9092387988883617, "grad_norm": 4.131154832850755, "learning_rate": 4.284481296188369e-08, "loss": 1.1039, "step": 6707 }, { "epoch": 0.9093743645360266, "grad_norm": 7.266313207362654, "learning_rate": 4.271774169107445e-08, "loss": 1.0688, "step": 6708 }, { "epoch": 0.9095099301836914, "grad_norm": 6.520531136894563, "learning_rate": 4.259085502459236e-08, "loss": 1.1028, "step": 6709 }, { "epoch": 0.9096454958313563, "grad_norm": 5.106476557677859, "learning_rate": 4.246415298690653e-08, "loss": 1.0936, "step": 6710 }, { "epoch": 0.9097810614790213, "grad_norm": 4.417541876622405, "learning_rate": 4.2337635602450514e-08, "loss": 1.0699, "step": 6711 }, { "epoch": 0.9099166271266861, "grad_norm": 5.568721445300284, "learning_rate": 4.2211302895622136e-08, "loss": 1.0861, "step": 6712 }, { "epoch": 0.910052192774351, "grad_norm": 6.623464951769584, "learning_rate": 4.208515489078368e-08, "loss": 1.0789, "step": 6713 }, { "epoch": 0.9101877584220158, "grad_norm": 4.792477814260207, "learning_rate": 4.19591916122618e-08, "loss": 1.1044, "step": 6714 }, { "epoch": 0.9103233240696808, "grad_norm": 6.163562249802779, "learning_rate": 4.18334130843474e-08, "loss": 1.0683, "step": 6715 }, { "epoch": 0.9104588897173457, "grad_norm": 5.6694838785432395, "learning_rate": 4.1707819331296076e-08, "loss": 1.1161, "step": 6716 }, { "epoch": 0.9105944553650105, "grad_norm": 10.507905183289795, "learning_rate": 4.158241037732746e-08, "loss": 1.0919, "step": 6717 }, { "epoch": 0.9107300210126754, "grad_norm": 5.40712375952366, "learning_rate": 4.1457186246625863e-08, "loss": 1.099, "step": 6718 }, { "epoch": 0.9108655866603402, "grad_norm": 5.529073629368636, "learning_rate": 4.133214696333942e-08, "loss": 1.0902, "step": 6719 }, { "epoch": 0.9110011523080052, "grad_norm": 7.814173673776146, "learning_rate": 4.1207292551581284e-08, "loss": 1.0961, "step": 6720 }, { "epoch": 0.9111367179556701, "grad_norm": 6.458378198274736, "learning_rate": 4.1082623035428424e-08, "loss": 1.111, "step": 6721 }, { "epoch": 0.9112722836033349, "grad_norm": 3.754071578577251, "learning_rate": 4.095813843892259e-08, "loss": 1.0766, "step": 6722 }, { "epoch": 0.9114078492509998, "grad_norm": 6.007033893190165, "learning_rate": 4.08338387860695e-08, "loss": 1.108, "step": 6723 }, { "epoch": 0.9115434148986646, "grad_norm": 4.158771455095877, "learning_rate": 4.0709724100839395e-08, "loss": 1.0994, "step": 6724 }, { "epoch": 0.9116789805463296, "grad_norm": 4.114677811765838, "learning_rate": 4.058579440716681e-08, "loss": 1.0874, "step": 6725 }, { "epoch": 0.9118145461939945, "grad_norm": 8.751832600905711, "learning_rate": 4.046204972895062e-08, "loss": 1.1057, "step": 6726 }, { "epoch": 0.9119501118416593, "grad_norm": 6.156102046708923, "learning_rate": 4.0338490090053966e-08, "loss": 1.1062, "step": 6727 }, { "epoch": 0.9120856774893242, "grad_norm": 4.97138509983432, "learning_rate": 4.0215115514304456e-08, "loss": 1.1135, "step": 6728 }, { "epoch": 0.912221243136989, "grad_norm": 4.807063475153926, "learning_rate": 4.009192602549383e-08, "loss": 1.1158, "step": 6729 }, { "epoch": 0.912356808784654, "grad_norm": 4.238836767587013, "learning_rate": 3.996892164737819e-08, "loss": 1.0948, "step": 6730 }, { "epoch": 0.9124923744323189, "grad_norm": 5.035660900188386, "learning_rate": 3.9846102403678027e-08, "loss": 1.0819, "step": 6731 }, { "epoch": 0.9126279400799837, "grad_norm": 4.723362619942653, "learning_rate": 3.972346831807793e-08, "loss": 1.1291, "step": 6732 }, { "epoch": 0.9127635057276486, "grad_norm": 12.54520647930711, "learning_rate": 3.960101941422711e-08, "loss": 1.0867, "step": 6733 }, { "epoch": 0.9128990713753135, "grad_norm": 6.761932076054819, "learning_rate": 3.947875571573867e-08, "loss": 1.1007, "step": 6734 }, { "epoch": 0.9130346370229784, "grad_norm": 5.786970321561242, "learning_rate": 3.93566772461904e-08, "loss": 1.0835, "step": 6735 }, { "epoch": 0.9131702026706433, "grad_norm": 5.095758378648759, "learning_rate": 3.923478402912395e-08, "loss": 1.0886, "step": 6736 }, { "epoch": 0.9133057683183081, "grad_norm": 5.801536317650991, "learning_rate": 3.911307608804582e-08, "loss": 1.0657, "step": 6737 }, { "epoch": 0.913441333965973, "grad_norm": 4.704604910153083, "learning_rate": 3.899155344642579e-08, "loss": 1.1285, "step": 6738 }, { "epoch": 0.9135768996136379, "grad_norm": 4.3539233953502485, "learning_rate": 3.887021612769936e-08, "loss": 1.0999, "step": 6739 }, { "epoch": 0.9137124652613028, "grad_norm": 5.859496163265717, "learning_rate": 3.8749064155264685e-08, "loss": 1.083, "step": 6740 }, { "epoch": 0.9138480309089677, "grad_norm": 5.395797881491602, "learning_rate": 3.862809755248564e-08, "loss": 1.1007, "step": 6741 }, { "epoch": 0.9139835965566325, "grad_norm": 4.842503215604368, "learning_rate": 3.850731634268911e-08, "loss": 1.0839, "step": 6742 }, { "epoch": 0.9141191622042975, "grad_norm": 7.6195280349947465, "learning_rate": 3.838672054916725e-08, "loss": 1.1147, "step": 6743 }, { "epoch": 0.9142547278519623, "grad_norm": 4.68538814038237, "learning_rate": 3.826631019517568e-08, "loss": 1.089, "step": 6744 }, { "epoch": 0.9143902934996272, "grad_norm": 4.690691590134507, "learning_rate": 3.814608530393493e-08, "loss": 1.1352, "step": 6745 }, { "epoch": 0.9145258591472921, "grad_norm": 5.339021381858261, "learning_rate": 3.802604589862912e-08, "loss": 1.0995, "step": 6746 }, { "epoch": 0.9146614247949569, "grad_norm": 6.185351852830596, "learning_rate": 3.790619200240697e-08, "loss": 1.0565, "step": 6747 }, { "epoch": 0.9147969904426219, "grad_norm": 4.783177469785058, "learning_rate": 3.7786523638381306e-08, "loss": 1.1043, "step": 6748 }, { "epoch": 0.9149325560902867, "grad_norm": 8.175203537671434, "learning_rate": 3.766704082962935e-08, "loss": 1.105, "step": 6749 }, { "epoch": 0.9150681217379516, "grad_norm": 8.50898659017956, "learning_rate": 3.754774359919244e-08, "loss": 1.1278, "step": 6750 }, { "epoch": 0.9152036873856165, "grad_norm": 4.069288467074857, "learning_rate": 3.7428631970076065e-08, "loss": 1.1191, "step": 6751 }, { "epoch": 0.9153392530332813, "grad_norm": 4.231022928092326, "learning_rate": 3.730970596524985e-08, "loss": 1.0858, "step": 6752 }, { "epoch": 0.9154748186809463, "grad_norm": 6.629575771241378, "learning_rate": 3.719096560764778e-08, "loss": 1.0631, "step": 6753 }, { "epoch": 0.9156103843286111, "grad_norm": 5.666800170496258, "learning_rate": 3.707241092016811e-08, "loss": 1.0884, "step": 6754 }, { "epoch": 0.915745949976276, "grad_norm": 6.322747578313881, "learning_rate": 3.69540419256732e-08, "loss": 1.1338, "step": 6755 }, { "epoch": 0.9158815156239409, "grad_norm": 4.178000213094222, "learning_rate": 3.683585864698946e-08, "loss": 1.1224, "step": 6756 }, { "epoch": 0.9160170812716057, "grad_norm": 3.730561430264897, "learning_rate": 3.6717861106907447e-08, "loss": 1.1148, "step": 6757 }, { "epoch": 0.9161526469192707, "grad_norm": 14.89138016311524, "learning_rate": 3.66000493281825e-08, "loss": 1.0671, "step": 6758 }, { "epoch": 0.9162882125669355, "grad_norm": 4.685358116783908, "learning_rate": 3.648242333353324e-08, "loss": 1.111, "step": 6759 }, { "epoch": 0.9164237782146004, "grad_norm": 6.490944786011275, "learning_rate": 3.6364983145643066e-08, "loss": 1.0946, "step": 6760 }, { "epoch": 0.9165593438622653, "grad_norm": 6.033109502677444, "learning_rate": 3.624772878715954e-08, "loss": 1.0866, "step": 6761 }, { "epoch": 0.9166949095099302, "grad_norm": 7.160297516763189, "learning_rate": 3.6130660280694005e-08, "loss": 1.0414, "step": 6762 }, { "epoch": 0.9168304751575951, "grad_norm": 6.330224590494688, "learning_rate": 3.6013777648822406e-08, "loss": 1.0554, "step": 6763 }, { "epoch": 0.9169660408052599, "grad_norm": 5.378641852924643, "learning_rate": 3.58970809140845e-08, "loss": 1.096, "step": 6764 }, { "epoch": 0.9171016064529248, "grad_norm": 9.550985333632466, "learning_rate": 3.5780570098984273e-08, "loss": 1.0987, "step": 6765 }, { "epoch": 0.9172371721005897, "grad_norm": 25.087564447009466, "learning_rate": 3.5664245225990206e-08, "loss": 1.0801, "step": 6766 }, { "epoch": 0.9173727377482546, "grad_norm": 8.07215572236466, "learning_rate": 3.554810631753436e-08, "loss": 1.0882, "step": 6767 }, { "epoch": 0.9175083033959195, "grad_norm": 5.964450082168694, "learning_rate": 3.543215339601324e-08, "loss": 1.0759, "step": 6768 }, { "epoch": 0.9176438690435843, "grad_norm": 8.338374116482235, "learning_rate": 3.531638648378754e-08, "loss": 1.0702, "step": 6769 }, { "epoch": 0.9177794346912492, "grad_norm": 4.411316845365287, "learning_rate": 3.520080560318195e-08, "loss": 1.0939, "step": 6770 }, { "epoch": 0.9179150003389142, "grad_norm": 7.357102306903869, "learning_rate": 3.508541077648541e-08, "loss": 1.1404, "step": 6771 }, { "epoch": 0.918050565986579, "grad_norm": 4.095869584200528, "learning_rate": 3.497020202595069e-08, "loss": 1.0814, "step": 6772 }, { "epoch": 0.9181861316342439, "grad_norm": 4.8485782958271955, "learning_rate": 3.485517937379512e-08, "loss": 1.0722, "step": 6773 }, { "epoch": 0.9183216972819087, "grad_norm": 4.682780068627706, "learning_rate": 3.474034284219995e-08, "loss": 1.0829, "step": 6774 }, { "epoch": 0.9184572629295736, "grad_norm": 5.708537588432172, "learning_rate": 3.462569245331004e-08, "loss": 1.0942, "step": 6775 }, { "epoch": 0.9185928285772386, "grad_norm": 4.9294849184184235, "learning_rate": 3.451122822923547e-08, "loss": 1.0668, "step": 6776 }, { "epoch": 0.9187283942249034, "grad_norm": 6.829977926148105, "learning_rate": 3.4396950192049134e-08, "loss": 1.0458, "step": 6777 }, { "epoch": 0.9188639598725683, "grad_norm": 8.205487393033946, "learning_rate": 3.4282858363789194e-08, "loss": 1.0634, "step": 6778 }, { "epoch": 0.9189995255202331, "grad_norm": 5.622283310100331, "learning_rate": 3.4168952766456924e-08, "loss": 1.0849, "step": 6779 }, { "epoch": 0.919135091167898, "grad_norm": 6.034564275971058, "learning_rate": 3.405523342201855e-08, "loss": 1.0825, "step": 6780 }, { "epoch": 0.919270656815563, "grad_norm": 5.133178672712271, "learning_rate": 3.39417003524034e-08, "loss": 1.1108, "step": 6781 }, { "epoch": 0.9194062224632278, "grad_norm": 4.3436394833715495, "learning_rate": 3.3828353579505975e-08, "loss": 1.0773, "step": 6782 }, { "epoch": 0.9195417881108927, "grad_norm": 3.5450171321091877, "learning_rate": 3.3715193125184005e-08, "loss": 1.1118, "step": 6783 }, { "epoch": 0.9196773537585576, "grad_norm": 5.260724280028162, "learning_rate": 3.3602219011259595e-08, "loss": 1.0749, "step": 6784 }, { "epoch": 0.9198129194062225, "grad_norm": 5.513426993101331, "learning_rate": 3.3489431259518975e-08, "loss": 1.0797, "step": 6785 }, { "epoch": 0.9199484850538874, "grad_norm": 4.018175066759089, "learning_rate": 3.337682989171242e-08, "loss": 1.1342, "step": 6786 }, { "epoch": 0.9200840507015522, "grad_norm": 4.818273639552952, "learning_rate": 3.326441492955412e-08, "loss": 1.0691, "step": 6787 }, { "epoch": 0.9202196163492171, "grad_norm": 5.959348441340349, "learning_rate": 3.3152186394722506e-08, "loss": 1.0939, "step": 6788 }, { "epoch": 0.920355181996882, "grad_norm": 6.171099581337683, "learning_rate": 3.304014430885982e-08, "loss": 1.0914, "step": 6789 }, { "epoch": 0.9204907476445469, "grad_norm": 5.3213392416736625, "learning_rate": 3.292828869357267e-08, "loss": 1.1285, "step": 6790 }, { "epoch": 0.9206263132922118, "grad_norm": 16.85051815257343, "learning_rate": 3.281661957043147e-08, "loss": 1.1124, "step": 6791 }, { "epoch": 0.9207618789398766, "grad_norm": 3.5654609805135906, "learning_rate": 3.270513696097055e-08, "loss": 1.1077, "step": 6792 }, { "epoch": 0.9208974445875415, "grad_norm": 10.924325812703213, "learning_rate": 3.2593840886688815e-08, "loss": 1.089, "step": 6793 }, { "epoch": 0.9210330102352065, "grad_norm": 4.171146555213543, "learning_rate": 3.248273136904844e-08, "loss": 1.0584, "step": 6794 }, { "epoch": 0.9211685758828713, "grad_norm": 4.6985732135193174, "learning_rate": 3.23718084294764e-08, "loss": 1.084, "step": 6795 }, { "epoch": 0.9213041415305362, "grad_norm": 7.544022391618123, "learning_rate": 3.226107208936279e-08, "loss": 1.0703, "step": 6796 }, { "epoch": 0.921439707178201, "grad_norm": 3.6007990158168366, "learning_rate": 3.2150522370062886e-08, "loss": 1.0724, "step": 6797 }, { "epoch": 0.9215752728258659, "grad_norm": 5.0641841289154454, "learning_rate": 3.204015929289483e-08, "loss": 1.0814, "step": 6798 }, { "epoch": 0.9217108384735309, "grad_norm": 4.562988883481651, "learning_rate": 3.1929982879141613e-08, "loss": 1.0994, "step": 6799 }, { "epoch": 0.9218464041211957, "grad_norm": 5.012049184486893, "learning_rate": 3.181999315004946e-08, "loss": 1.0636, "step": 6800 }, { "epoch": 0.9219819697688606, "grad_norm": 4.305287716968441, "learning_rate": 3.171019012682952e-08, "loss": 1.1203, "step": 6801 }, { "epoch": 0.9221175354165254, "grad_norm": 5.309873672699087, "learning_rate": 3.160057383065606e-08, "loss": 1.0682, "step": 6802 }, { "epoch": 0.9222531010641903, "grad_norm": 5.492005953898744, "learning_rate": 3.149114428266786e-08, "loss": 1.0997, "step": 6803 }, { "epoch": 0.9223886667118553, "grad_norm": 3.9652863556087175, "learning_rate": 3.138190150396758e-08, "loss": 1.0967, "step": 6804 }, { "epoch": 0.9225242323595201, "grad_norm": 4.081378168707408, "learning_rate": 3.1272845515621816e-08, "loss": 1.1372, "step": 6805 }, { "epoch": 0.922659798007185, "grad_norm": 8.813475507548286, "learning_rate": 3.116397633866108e-08, "loss": 1.1034, "step": 6806 }, { "epoch": 0.9227953636548498, "grad_norm": 5.394971451133652, "learning_rate": 3.1055293994080024e-08, "loss": 1.1156, "step": 6807 }, { "epoch": 0.9229309293025147, "grad_norm": 8.202200629774453, "learning_rate": 3.09467985028371e-08, "loss": 1.1362, "step": 6808 }, { "epoch": 0.9230664949501797, "grad_norm": 4.665174854779358, "learning_rate": 3.08384898858548e-08, "loss": 1.0923, "step": 6809 }, { "epoch": 0.9232020605978445, "grad_norm": 5.461827363340418, "learning_rate": 3.073036816401975e-08, "loss": 1.095, "step": 6810 }, { "epoch": 0.9233376262455094, "grad_norm": 5.727415934591515, "learning_rate": 3.062243335818215e-08, "loss": 1.0812, "step": 6811 }, { "epoch": 0.9234731918931742, "grad_norm": 5.341773479456003, "learning_rate": 3.051468548915648e-08, "loss": 1.0939, "step": 6812 }, { "epoch": 0.9236087575408392, "grad_norm": 4.8423176615202355, "learning_rate": 3.04071245777211e-08, "loss": 1.099, "step": 6813 }, { "epoch": 0.9237443231885041, "grad_norm": 5.223393561956888, "learning_rate": 3.0299750644618205e-08, "loss": 1.0601, "step": 6814 }, { "epoch": 0.9238798888361689, "grad_norm": 5.452481974446945, "learning_rate": 3.019256371055423e-08, "loss": 1.0978, "step": 6815 }, { "epoch": 0.9240154544838338, "grad_norm": 4.392221079192663, "learning_rate": 3.0085563796198866e-08, "loss": 1.1151, "step": 6816 }, { "epoch": 0.9241510201314986, "grad_norm": 15.474023228560277, "learning_rate": 2.997875092218671e-08, "loss": 1.0901, "step": 6817 }, { "epoch": 0.9242865857791636, "grad_norm": 6.127754082706043, "learning_rate": 2.987212510911541e-08, "loss": 1.0851, "step": 6818 }, { "epoch": 0.9244221514268285, "grad_norm": 5.729623589613003, "learning_rate": 2.976568637754717e-08, "loss": 1.0755, "step": 6819 }, { "epoch": 0.9245577170744933, "grad_norm": 3.2115478352020137, "learning_rate": 2.9659434748007696e-08, "loss": 1.0289, "step": 6820 }, { "epoch": 0.9246932827221582, "grad_norm": 5.717030428209538, "learning_rate": 2.9553370240986808e-08, "loss": 1.0732, "step": 6821 }, { "epoch": 0.924828848369823, "grad_norm": 6.805590126187482, "learning_rate": 2.944749287693815e-08, "loss": 1.1074, "step": 6822 }, { "epoch": 0.924964414017488, "grad_norm": 4.312546997670548, "learning_rate": 2.9341802676279505e-08, "loss": 1.0934, "step": 6823 }, { "epoch": 0.9250999796651529, "grad_norm": 4.209992157468812, "learning_rate": 2.923629965939234e-08, "loss": 1.0898, "step": 6824 }, { "epoch": 0.9252355453128177, "grad_norm": 4.097817632937809, "learning_rate": 2.913098384662205e-08, "loss": 1.0545, "step": 6825 }, { "epoch": 0.9253711109604826, "grad_norm": 5.938551812313157, "learning_rate": 2.902585525827783e-08, "loss": 1.0835, "step": 6826 }, { "epoch": 0.9255066766081474, "grad_norm": 5.472680665883622, "learning_rate": 2.8920913914633138e-08, "loss": 1.0848, "step": 6827 }, { "epoch": 0.9256422422558124, "grad_norm": 5.174041018020188, "learning_rate": 2.881615983592489e-08, "loss": 1.1027, "step": 6828 }, { "epoch": 0.9257778079034773, "grad_norm": 4.696695304535328, "learning_rate": 2.8711593042354154e-08, "loss": 1.0779, "step": 6829 }, { "epoch": 0.9259133735511421, "grad_norm": 10.999342465303293, "learning_rate": 2.8607213554086018e-08, "loss": 1.0968, "step": 6830 }, { "epoch": 0.926048939198807, "grad_norm": 5.2650775663849005, "learning_rate": 2.8503021391248718e-08, "loss": 1.1003, "step": 6831 }, { "epoch": 0.9261845048464719, "grad_norm": 5.126664975844648, "learning_rate": 2.839901657393551e-08, "loss": 1.116, "step": 6832 }, { "epoch": 0.9263200704941368, "grad_norm": 3.9758659954839546, "learning_rate": 2.829519912220235e-08, "loss": 1.0937, "step": 6833 }, { "epoch": 0.9264556361418017, "grad_norm": 5.660644107400706, "learning_rate": 2.819156905607012e-08, "loss": 1.1222, "step": 6834 }, { "epoch": 0.9265912017894665, "grad_norm": 7.599217487627203, "learning_rate": 2.8088126395522495e-08, "loss": 1.0737, "step": 6835 }, { "epoch": 0.9267267674371314, "grad_norm": 4.906725373797268, "learning_rate": 2.7984871160508185e-08, "loss": 1.0637, "step": 6836 }, { "epoch": 0.9268623330847963, "grad_norm": 5.377860140967681, "learning_rate": 2.7881803370938595e-08, "loss": 1.0631, "step": 6837 }, { "epoch": 0.9269978987324612, "grad_norm": 4.35742285055028, "learning_rate": 2.777892304669005e-08, "loss": 1.1097, "step": 6838 }, { "epoch": 0.9271334643801261, "grad_norm": 7.8016897294777525, "learning_rate": 2.7676230207601793e-08, "loss": 1.1361, "step": 6839 }, { "epoch": 0.9272690300277909, "grad_norm": 9.675414048832002, "learning_rate": 2.757372487347753e-08, "loss": 1.0959, "step": 6840 }, { "epoch": 0.9274045956754559, "grad_norm": 7.694729364002845, "learning_rate": 2.747140706408446e-08, "loss": 1.1048, "step": 6841 }, { "epoch": 0.9275401613231207, "grad_norm": 4.872204449306823, "learning_rate": 2.7369276799154017e-08, "loss": 1.1161, "step": 6842 }, { "epoch": 0.9276757269707856, "grad_norm": 9.084464650115914, "learning_rate": 2.7267334098381e-08, "loss": 1.091, "step": 6843 }, { "epoch": 0.9278112926184505, "grad_norm": 3.9937137409636065, "learning_rate": 2.7165578981424354e-08, "loss": 1.1088, "step": 6844 }, { "epoch": 0.9279468582661153, "grad_norm": 5.406064183537715, "learning_rate": 2.70640114679066e-08, "loss": 1.1056, "step": 6845 }, { "epoch": 0.9280824239137803, "grad_norm": 5.48173080651546, "learning_rate": 2.696263157741441e-08, "loss": 1.1161, "step": 6846 }, { "epoch": 0.9282179895614451, "grad_norm": 36.20567929054046, "learning_rate": 2.6861439329498026e-08, "loss": 1.1007, "step": 6847 }, { "epoch": 0.92835355520911, "grad_norm": 4.006615166720146, "learning_rate": 2.6760434743671623e-08, "loss": 1.0795, "step": 6848 }, { "epoch": 0.9284891208567749, "grad_norm": 4.480023859029842, "learning_rate": 2.665961783941306e-08, "loss": 1.1138, "step": 6849 }, { "epoch": 0.9286246865044397, "grad_norm": 9.015024914605242, "learning_rate": 2.6558988636164127e-08, "loss": 1.1037, "step": 6850 }, { "epoch": 0.9287602521521047, "grad_norm": 8.361339092942154, "learning_rate": 2.645854715333029e-08, "loss": 1.115, "step": 6851 }, { "epoch": 0.9288958177997695, "grad_norm": 5.55388143870181, "learning_rate": 2.6358293410281062e-08, "loss": 1.1031, "step": 6852 }, { "epoch": 0.9290313834474344, "grad_norm": 6.183257281507733, "learning_rate": 2.6258227426349533e-08, "loss": 1.1075, "step": 6853 }, { "epoch": 0.9291669490950993, "grad_norm": 3.411982888157858, "learning_rate": 2.6158349220832375e-08, "loss": 1.0798, "step": 6854 }, { "epoch": 0.9293025147427642, "grad_norm": 4.032160033172195, "learning_rate": 2.605865881299074e-08, "loss": 1.0705, "step": 6855 }, { "epoch": 0.9294380803904291, "grad_norm": 3.645897446470299, "learning_rate": 2.5959156222048805e-08, "loss": 1.0995, "step": 6856 }, { "epoch": 0.9295736460380939, "grad_norm": 4.872674350423866, "learning_rate": 2.585984146719511e-08, "loss": 1.0946, "step": 6857 }, { "epoch": 0.9297092116857588, "grad_norm": 5.720695772893044, "learning_rate": 2.5760714567581554e-08, "loss": 1.1042, "step": 6858 }, { "epoch": 0.9298447773334237, "grad_norm": 5.04795924627134, "learning_rate": 2.566177554232396e-08, "loss": 1.0719, "step": 6859 }, { "epoch": 0.9299803429810886, "grad_norm": 5.381532275931082, "learning_rate": 2.5563024410501954e-08, "loss": 1.1036, "step": 6860 }, { "epoch": 0.9301159086287535, "grad_norm": 5.5919991735836785, "learning_rate": 2.546446119115908e-08, "loss": 1.1294, "step": 6861 }, { "epoch": 0.9302514742764184, "grad_norm": 5.5786677140501455, "learning_rate": 2.5366085903302247e-08, "loss": 1.1303, "step": 6862 }, { "epoch": 0.9303870399240832, "grad_norm": 4.503632783369354, "learning_rate": 2.5267898565902503e-08, "loss": 1.0649, "step": 6863 }, { "epoch": 0.9305226055717482, "grad_norm": 3.844224214711318, "learning_rate": 2.5169899197894363e-08, "loss": 1.0893, "step": 6864 }, { "epoch": 0.930658171219413, "grad_norm": 4.160228440822478, "learning_rate": 2.507208781817638e-08, "loss": 1.1171, "step": 6865 }, { "epoch": 0.9307937368670779, "grad_norm": 9.283728146413036, "learning_rate": 2.4974464445610688e-08, "loss": 1.1063, "step": 6866 }, { "epoch": 0.9309293025147428, "grad_norm": 4.8926012446528375, "learning_rate": 2.4877029099023116e-08, "loss": 1.0916, "step": 6867 }, { "epoch": 0.9310648681624076, "grad_norm": 5.993456889222534, "learning_rate": 2.4779781797203303e-08, "loss": 1.1064, "step": 6868 }, { "epoch": 0.9312004338100726, "grad_norm": 4.615138107316933, "learning_rate": 2.468272255890469e-08, "loss": 1.0573, "step": 6869 }, { "epoch": 0.9313359994577374, "grad_norm": 3.684942289014095, "learning_rate": 2.4585851402844305e-08, "loss": 1.0773, "step": 6870 }, { "epoch": 0.9314715651054023, "grad_norm": 5.534914718244278, "learning_rate": 2.4489168347703093e-08, "loss": 1.1101, "step": 6871 }, { "epoch": 0.9316071307530672, "grad_norm": 4.236880591277273, "learning_rate": 2.4392673412125476e-08, "loss": 1.0973, "step": 6872 }, { "epoch": 0.931742696400732, "grad_norm": 4.357080599379889, "learning_rate": 2.429636661472001e-08, "loss": 1.0839, "step": 6873 }, { "epoch": 0.931878262048397, "grad_norm": 3.5222851921009135, "learning_rate": 2.4200247974058175e-08, "loss": 1.0929, "step": 6874 }, { "epoch": 0.9320138276960618, "grad_norm": 6.2419835691461, "learning_rate": 2.4104317508676363e-08, "loss": 1.0957, "step": 6875 }, { "epoch": 0.9321493933437267, "grad_norm": 5.538889163861948, "learning_rate": 2.4008575237073335e-08, "loss": 1.1118, "step": 6876 }, { "epoch": 0.9322849589913916, "grad_norm": 5.9614025722538, "learning_rate": 2.3913021177712876e-08, "loss": 1.0929, "step": 6877 }, { "epoch": 0.9324205246390564, "grad_norm": 7.068358305627641, "learning_rate": 2.3817655349021247e-08, "loss": 1.1015, "step": 6878 }, { "epoch": 0.9325560902867214, "grad_norm": 11.61835027649795, "learning_rate": 2.3722477769389515e-08, "loss": 1.0916, "step": 6879 }, { "epoch": 0.9326916559343862, "grad_norm": 7.94261279026853, "learning_rate": 2.362748845717155e-08, "loss": 1.099, "step": 6880 }, { "epoch": 0.9328272215820511, "grad_norm": 5.045236117713097, "learning_rate": 2.3532687430685373e-08, "loss": 1.0601, "step": 6881 }, { "epoch": 0.932962787229716, "grad_norm": 26.13912876616065, "learning_rate": 2.3438074708212795e-08, "loss": 1.1033, "step": 6882 }, { "epoch": 0.9330983528773809, "grad_norm": 11.189628431799573, "learning_rate": 2.3343650307998896e-08, "loss": 1.0869, "step": 6883 }, { "epoch": 0.9332339185250458, "grad_norm": 7.883999599271442, "learning_rate": 2.3249414248252775e-08, "loss": 1.113, "step": 6884 }, { "epoch": 0.9333694841727106, "grad_norm": 4.098976768364471, "learning_rate": 2.3155366547147115e-08, "loss": 1.105, "step": 6885 }, { "epoch": 0.9335050498203755, "grad_norm": 8.65341842487518, "learning_rate": 2.30615072228183e-08, "loss": 1.0734, "step": 6886 }, { "epoch": 0.9336406154680404, "grad_norm": 5.4645789995873875, "learning_rate": 2.2967836293366405e-08, "loss": 1.0384, "step": 6887 }, { "epoch": 0.9337761811157053, "grad_norm": 3.871143357876909, "learning_rate": 2.287435377685498e-08, "loss": 1.1005, "step": 6888 }, { "epoch": 0.9339117467633702, "grad_norm": 5.8181802562468645, "learning_rate": 2.2781059691311498e-08, "loss": 1.1282, "step": 6889 }, { "epoch": 0.934047312411035, "grad_norm": 4.891148386717184, "learning_rate": 2.268795405472701e-08, "loss": 1.1171, "step": 6890 }, { "epoch": 0.9341828780586999, "grad_norm": 4.565246356531831, "learning_rate": 2.259503688505593e-08, "loss": 1.1181, "step": 6891 }, { "epoch": 0.9343184437063649, "grad_norm": 4.493263327930127, "learning_rate": 2.2502308200217037e-08, "loss": 1.0737, "step": 6892 }, { "epoch": 0.9344540093540297, "grad_norm": 8.212324992524646, "learning_rate": 2.2409768018092024e-08, "loss": 1.0862, "step": 6893 }, { "epoch": 0.9345895750016946, "grad_norm": 5.789003241282313, "learning_rate": 2.231741635652673e-08, "loss": 1.0831, "step": 6894 }, { "epoch": 0.9347251406493594, "grad_norm": 4.319835463385474, "learning_rate": 2.222525323333013e-08, "loss": 1.0732, "step": 6895 }, { "epoch": 0.9348607062970243, "grad_norm": 6.213245124501671, "learning_rate": 2.2133278666275567e-08, "loss": 1.0779, "step": 6896 }, { "epoch": 0.9349962719446893, "grad_norm": 5.017773054232104, "learning_rate": 2.2041492673099182e-08, "loss": 1.1129, "step": 6897 }, { "epoch": 0.9351318375923541, "grad_norm": 5.472048781721232, "learning_rate": 2.1949895271501596e-08, "loss": 1.0585, "step": 6898 }, { "epoch": 0.935267403240019, "grad_norm": 5.107136953714731, "learning_rate": 2.1858486479146344e-08, "loss": 1.0872, "step": 6899 }, { "epoch": 0.9354029688876838, "grad_norm": 4.6363275982961705, "learning_rate": 2.1767266313661102e-08, "loss": 1.0983, "step": 6900 }, { "epoch": 0.9355385345353487, "grad_norm": 5.354462065545632, "learning_rate": 2.1676234792636693e-08, "loss": 1.0998, "step": 6901 }, { "epoch": 0.9356741001830137, "grad_norm": 4.829312369794265, "learning_rate": 2.1585391933628073e-08, "loss": 1.0761, "step": 6902 }, { "epoch": 0.9358096658306785, "grad_norm": 5.3715234057329715, "learning_rate": 2.1494737754153558e-08, "loss": 1.1313, "step": 6903 }, { "epoch": 0.9359452314783434, "grad_norm": 11.786735574798852, "learning_rate": 2.1404272271694945e-08, "loss": 1.1147, "step": 6904 }, { "epoch": 0.9360807971260082, "grad_norm": 4.4987944606479715, "learning_rate": 2.1313995503697833e-08, "loss": 1.0994, "step": 6905 }, { "epoch": 0.9362163627736731, "grad_norm": 11.037529159408622, "learning_rate": 2.122390746757141e-08, "loss": 1.0963, "step": 6906 }, { "epoch": 0.9363519284213381, "grad_norm": 4.552813501189932, "learning_rate": 2.1134008180688445e-08, "loss": 1.0749, "step": 6907 }, { "epoch": 0.9364874940690029, "grad_norm": 5.894701772177948, "learning_rate": 2.1044297660385292e-08, "loss": 1.1043, "step": 6908 }, { "epoch": 0.9366230597166678, "grad_norm": 4.9579178146536025, "learning_rate": 2.0954775923961997e-08, "loss": 1.088, "step": 6909 }, { "epoch": 0.9367586253643326, "grad_norm": 11.803496932630166, "learning_rate": 2.086544298868198e-08, "loss": 1.1273, "step": 6910 }, { "epoch": 0.9368941910119976, "grad_norm": 3.887870842747934, "learning_rate": 2.077629887177257e-08, "loss": 1.1277, "step": 6911 }, { "epoch": 0.9370297566596625, "grad_norm": 4.512430076456689, "learning_rate": 2.0687343590424232e-08, "loss": 1.0618, "step": 6912 }, { "epoch": 0.9371653223073273, "grad_norm": 4.601911239365299, "learning_rate": 2.0598577161791587e-08, "loss": 1.0801, "step": 6913 }, { "epoch": 0.9373008879549922, "grad_norm": 6.257179454236033, "learning_rate": 2.050999960299249e-08, "loss": 1.1305, "step": 6914 }, { "epoch": 0.937436453602657, "grad_norm": 3.8778703412397997, "learning_rate": 2.0421610931108168e-08, "loss": 1.1394, "step": 6915 }, { "epoch": 0.937572019250322, "grad_norm": 5.459865489919403, "learning_rate": 2.033341116318399e-08, "loss": 1.066, "step": 6916 }, { "epoch": 0.9377075848979869, "grad_norm": 24.355299714012546, "learning_rate": 2.0245400316228344e-08, "loss": 1.121, "step": 6917 }, { "epoch": 0.9378431505456517, "grad_norm": 6.962376722107119, "learning_rate": 2.015757840721366e-08, "loss": 1.0923, "step": 6918 }, { "epoch": 0.9379787161933166, "grad_norm": 4.084104823976338, "learning_rate": 2.006994545307539e-08, "loss": 1.078, "step": 6919 }, { "epoch": 0.9381142818409814, "grad_norm": 6.606172663962169, "learning_rate": 1.998250147071323e-08, "loss": 1.0915, "step": 6920 }, { "epoch": 0.9382498474886464, "grad_norm": 6.693059291325755, "learning_rate": 1.9895246476989703e-08, "loss": 1.0875, "step": 6921 }, { "epoch": 0.9383854131363113, "grad_norm": 5.885343740725741, "learning_rate": 1.9808180488731564e-08, "loss": 1.1094, "step": 6922 }, { "epoch": 0.9385209787839761, "grad_norm": 6.260157866898991, "learning_rate": 1.9721303522728605e-08, "loss": 1.126, "step": 6923 }, { "epoch": 0.938656544431641, "grad_norm": 9.105943147436662, "learning_rate": 1.9634615595734316e-08, "loss": 1.1033, "step": 6924 }, { "epoch": 0.9387921100793059, "grad_norm": 4.12476873939611, "learning_rate": 1.954811672446599e-08, "loss": 1.0866, "step": 6925 }, { "epoch": 0.9389276757269708, "grad_norm": 5.27150911658637, "learning_rate": 1.9461806925604064e-08, "loss": 1.1139, "step": 6926 }, { "epoch": 0.9390632413746357, "grad_norm": 3.8248944032223204, "learning_rate": 1.9375686215792886e-08, "loss": 1.0714, "step": 6927 }, { "epoch": 0.9391988070223005, "grad_norm": 4.233596782353899, "learning_rate": 1.9289754611639954e-08, "loss": 1.0987, "step": 6928 }, { "epoch": 0.9393343726699654, "grad_norm": 5.321998731676607, "learning_rate": 1.9204012129716672e-08, "loss": 1.0833, "step": 6929 }, { "epoch": 0.9394699383176303, "grad_norm": 4.0459337980184955, "learning_rate": 1.911845878655749e-08, "loss": 1.091, "step": 6930 }, { "epoch": 0.9396055039652952, "grad_norm": 4.85111042737005, "learning_rate": 1.9033094598661204e-08, "loss": 1.0664, "step": 6931 }, { "epoch": 0.9397410696129601, "grad_norm": 3.4234686594733623, "learning_rate": 1.89479195824892e-08, "loss": 1.1044, "step": 6932 }, { "epoch": 0.9398766352606249, "grad_norm": 5.079443723966666, "learning_rate": 1.8862933754467013e-08, "loss": 1.1301, "step": 6933 }, { "epoch": 0.9400122009082899, "grad_norm": 4.965588845014908, "learning_rate": 1.8778137130983307e-08, "loss": 1.1024, "step": 6934 }, { "epoch": 0.9401477665559547, "grad_norm": 4.9166217700143084, "learning_rate": 1.8693529728390667e-08, "loss": 1.1072, "step": 6935 }, { "epoch": 0.9402833322036196, "grad_norm": 3.8135921309024763, "learning_rate": 1.860911156300482e-08, "loss": 1.0509, "step": 6936 }, { "epoch": 0.9404188978512845, "grad_norm": 5.429345998694769, "learning_rate": 1.8524882651105188e-08, "loss": 1.0394, "step": 6937 }, { "epoch": 0.9405544634989493, "grad_norm": 6.741566734923746, "learning_rate": 1.844084300893456e-08, "loss": 1.0684, "step": 6938 }, { "epoch": 0.9406900291466143, "grad_norm": 6.887143409014916, "learning_rate": 1.835699265269963e-08, "loss": 1.0977, "step": 6939 }, { "epoch": 0.9408255947942791, "grad_norm": 6.531568419915424, "learning_rate": 1.827333159856981e-08, "loss": 1.1204, "step": 6940 }, { "epoch": 0.940961160441944, "grad_norm": 4.354048972558569, "learning_rate": 1.8189859862678848e-08, "loss": 1.1313, "step": 6941 }, { "epoch": 0.9410967260896089, "grad_norm": 7.885907894706996, "learning_rate": 1.8106577461123428e-08, "loss": 1.0572, "step": 6942 }, { "epoch": 0.9412322917372737, "grad_norm": 5.305614302202391, "learning_rate": 1.802348440996393e-08, "loss": 1.0829, "step": 6943 }, { "epoch": 0.9413678573849387, "grad_norm": 4.7870206584105475, "learning_rate": 1.794058072522431e-08, "loss": 1.0979, "step": 6944 }, { "epoch": 0.9415034230326036, "grad_norm": 5.729533779645332, "learning_rate": 1.7857866422891665e-08, "loss": 1.1151, "step": 6945 }, { "epoch": 0.9416389886802684, "grad_norm": 5.109354648878771, "learning_rate": 1.777534151891702e-08, "loss": 1.1202, "step": 6946 }, { "epoch": 0.9417745543279333, "grad_norm": 4.470637213366263, "learning_rate": 1.7693006029214418e-08, "loss": 1.0813, "step": 6947 }, { "epoch": 0.9419101199755981, "grad_norm": 11.29051599551648, "learning_rate": 1.7610859969661827e-08, "loss": 1.0799, "step": 6948 }, { "epoch": 0.9420456856232631, "grad_norm": 5.05699898551089, "learning_rate": 1.7528903356100466e-08, "loss": 1.075, "step": 6949 }, { "epoch": 0.942181251270928, "grad_norm": 3.953973100280741, "learning_rate": 1.74471362043348e-08, "loss": 1.0942, "step": 6950 }, { "epoch": 0.9423168169185928, "grad_norm": 4.306270662398794, "learning_rate": 1.7365558530133218e-08, "loss": 1.072, "step": 6951 }, { "epoch": 0.9424523825662577, "grad_norm": 5.049714840169096, "learning_rate": 1.7284170349227246e-08, "loss": 1.0705, "step": 6952 }, { "epoch": 0.9425879482139226, "grad_norm": 6.472256779482177, "learning_rate": 1.7202971677311774e-08, "loss": 1.0534, "step": 6953 }, { "epoch": 0.9427235138615875, "grad_norm": 4.480224755065001, "learning_rate": 1.712196253004572e-08, "loss": 1.1258, "step": 6954 }, { "epoch": 0.9428590795092524, "grad_norm": 5.408322631010855, "learning_rate": 1.704114292305059e-08, "loss": 1.0965, "step": 6955 }, { "epoch": 0.9429946451569172, "grad_norm": 9.021898620568853, "learning_rate": 1.6960512871912246e-08, "loss": 1.0654, "step": 6956 }, { "epoch": 0.9431302108045821, "grad_norm": 6.413534695144754, "learning_rate": 1.6880072392179146e-08, "loss": 1.0835, "step": 6957 }, { "epoch": 0.943265776452247, "grad_norm": 7.747310538859518, "learning_rate": 1.6799821499363987e-08, "loss": 1.0926, "step": 6958 }, { "epoch": 0.9434013420999119, "grad_norm": 7.355267083315255, "learning_rate": 1.671976020894228e-08, "loss": 1.1066, "step": 6959 }, { "epoch": 0.9435369077475768, "grad_norm": 7.92058373446269, "learning_rate": 1.663988853635323e-08, "loss": 1.08, "step": 6960 }, { "epoch": 0.9436724733952416, "grad_norm": 4.752346969813602, "learning_rate": 1.6560206496999517e-08, "loss": 1.1271, "step": 6961 }, { "epoch": 0.9438080390429066, "grad_norm": 4.922138839989447, "learning_rate": 1.6480714106247186e-08, "loss": 1.0701, "step": 6962 }, { "epoch": 0.9439436046905714, "grad_norm": 6.93832069671361, "learning_rate": 1.6401411379425746e-08, "loss": 1.1074, "step": 6963 }, { "epoch": 0.9440791703382363, "grad_norm": 3.91935089367354, "learning_rate": 1.6322298331827967e-08, "loss": 1.0406, "step": 6964 }, { "epoch": 0.9442147359859012, "grad_norm": 4.934170397487387, "learning_rate": 1.624337497871042e-08, "loss": 1.1319, "step": 6965 }, { "epoch": 0.944350301633566, "grad_norm": 5.6518324129447866, "learning_rate": 1.6164641335292606e-08, "loss": 1.0457, "step": 6966 }, { "epoch": 0.944485867281231, "grad_norm": 5.212319817081558, "learning_rate": 1.6086097416757816e-08, "loss": 1.1186, "step": 6967 }, { "epoch": 0.9446214329288958, "grad_norm": 6.917815687294895, "learning_rate": 1.60077432382526e-08, "loss": 1.0515, "step": 6968 }, { "epoch": 0.9447569985765607, "grad_norm": 5.426922009187765, "learning_rate": 1.5929578814886878e-08, "loss": 1.0533, "step": 6969 }, { "epoch": 0.9448925642242256, "grad_norm": 5.579116874913109, "learning_rate": 1.5851604161734256e-08, "loss": 1.1047, "step": 6970 }, { "epoch": 0.9450281298718904, "grad_norm": 4.064615346322368, "learning_rate": 1.5773819293831148e-08, "loss": 1.0808, "step": 6971 }, { "epoch": 0.9451636955195554, "grad_norm": 4.596637990423629, "learning_rate": 1.5696224226178224e-08, "loss": 1.0794, "step": 6972 }, { "epoch": 0.9452992611672202, "grad_norm": 9.471598997243548, "learning_rate": 1.5618818973738625e-08, "loss": 1.1308, "step": 6973 }, { "epoch": 0.9454348268148851, "grad_norm": 5.282060603318505, "learning_rate": 1.554160355143974e-08, "loss": 1.0615, "step": 6974 }, { "epoch": 0.94557039246255, "grad_norm": 5.33631500158193, "learning_rate": 1.5464577974171554e-08, "loss": 1.0624, "step": 6975 }, { "epoch": 0.9457059581102149, "grad_norm": 6.986783416069441, "learning_rate": 1.5387742256788294e-08, "loss": 1.1289, "step": 6976 }, { "epoch": 0.9458415237578798, "grad_norm": 4.444567462817856, "learning_rate": 1.531109641410666e-08, "loss": 1.0974, "step": 6977 }, { "epoch": 0.9459770894055446, "grad_norm": 4.215162255172278, "learning_rate": 1.523464046090761e-08, "loss": 1.07, "step": 6978 }, { "epoch": 0.9461126550532095, "grad_norm": 5.706301423877493, "learning_rate": 1.5158374411934793e-08, "loss": 1.0765, "step": 6979 }, { "epoch": 0.9462482207008744, "grad_norm": 6.322557717047872, "learning_rate": 1.5082298281895666e-08, "loss": 1.0676, "step": 6980 }, { "epoch": 0.9463837863485393, "grad_norm": 4.34501311087722, "learning_rate": 1.500641208546072e-08, "loss": 1.0719, "step": 6981 }, { "epoch": 0.9465193519962042, "grad_norm": 4.070949050947197, "learning_rate": 1.493071583726424e-08, "loss": 1.1009, "step": 6982 }, { "epoch": 0.946654917643869, "grad_norm": 12.168521275725388, "learning_rate": 1.4855209551903559e-08, "loss": 1.0793, "step": 6983 }, { "epoch": 0.9467904832915339, "grad_norm": 7.136916487499293, "learning_rate": 1.4779893243939356e-08, "loss": 1.1115, "step": 6984 }, { "epoch": 0.9469260489391989, "grad_norm": 9.299537565476877, "learning_rate": 1.4704766927895907e-08, "loss": 1.1108, "step": 6985 }, { "epoch": 0.9470616145868637, "grad_norm": 6.344546273084193, "learning_rate": 1.462983061826084e-08, "loss": 1.0715, "step": 6986 }, { "epoch": 0.9471971802345286, "grad_norm": 4.887192555774669, "learning_rate": 1.4555084329484713e-08, "loss": 1.1072, "step": 6987 }, { "epoch": 0.9473327458821934, "grad_norm": 8.209705893218013, "learning_rate": 1.4480528075982102e-08, "loss": 1.0995, "step": 6988 }, { "epoch": 0.9474683115298583, "grad_norm": 5.384769126428216, "learning_rate": 1.4406161872130396e-08, "loss": 1.1155, "step": 6989 }, { "epoch": 0.9476038771775233, "grad_norm": 9.257009940062396, "learning_rate": 1.4331985732270457e-08, "loss": 1.1133, "step": 6990 }, { "epoch": 0.9477394428251881, "grad_norm": 5.64044599965465, "learning_rate": 1.4257999670706844e-08, "loss": 1.1171, "step": 6991 }, { "epoch": 0.947875008472853, "grad_norm": 4.11801002361015, "learning_rate": 1.418420370170681e-08, "loss": 1.0641, "step": 6992 }, { "epoch": 0.9480105741205178, "grad_norm": 5.939962653193406, "learning_rate": 1.4110597839501748e-08, "loss": 1.09, "step": 6993 }, { "epoch": 0.9481461397681827, "grad_norm": 3.772851825464852, "learning_rate": 1.4037182098285639e-08, "loss": 1.0932, "step": 6994 }, { "epoch": 0.9482817054158477, "grad_norm": 5.627294846279736, "learning_rate": 1.3963956492216377e-08, "loss": 1.1196, "step": 6995 }, { "epoch": 0.9484172710635125, "grad_norm": 15.616835734283601, "learning_rate": 1.389092103541456e-08, "loss": 1.0875, "step": 6996 }, { "epoch": 0.9485528367111774, "grad_norm": 13.655063891394875, "learning_rate": 1.3818075741965029e-08, "loss": 1.1141, "step": 6997 }, { "epoch": 0.9486884023588422, "grad_norm": 6.006854614070361, "learning_rate": 1.3745420625914995e-08, "loss": 1.0807, "step": 6998 }, { "epoch": 0.9488239680065071, "grad_norm": 11.119708086262156, "learning_rate": 1.3672955701275579e-08, "loss": 1.0937, "step": 6999 }, { "epoch": 0.9489595336541721, "grad_norm": 4.5049827853214115, "learning_rate": 1.360068098202105e-08, "loss": 1.0382, "step": 7000 }, { "epoch": 0.9490950993018369, "grad_norm": 4.853199685538841, "learning_rate": 1.3528596482089039e-08, "loss": 1.0504, "step": 7001 }, { "epoch": 0.9492306649495018, "grad_norm": 5.450303631306061, "learning_rate": 1.3456702215380534e-08, "loss": 1.0707, "step": 7002 }, { "epoch": 0.9493662305971666, "grad_norm": 4.067378793347342, "learning_rate": 1.3384998195759667e-08, "loss": 1.103, "step": 7003 }, { "epoch": 0.9495017962448316, "grad_norm": 5.381108086356234, "learning_rate": 1.3313484437053935e-08, "loss": 1.1215, "step": 7004 }, { "epoch": 0.9496373618924965, "grad_norm": 3.802214533273828, "learning_rate": 1.3242160953054415e-08, "loss": 1.0862, "step": 7005 }, { "epoch": 0.9497729275401613, "grad_norm": 4.964951101140285, "learning_rate": 1.3171027757515107e-08, "loss": 1.1067, "step": 7006 }, { "epoch": 0.9499084931878262, "grad_norm": 5.250447850349493, "learning_rate": 1.3100084864153593e-08, "loss": 1.1287, "step": 7007 }, { "epoch": 0.950044058835491, "grad_norm": 6.053048471865102, "learning_rate": 1.3029332286650596e-08, "loss": 1.0942, "step": 7008 }, { "epoch": 0.950179624483156, "grad_norm": 6.557793661318121, "learning_rate": 1.295877003865009e-08, "loss": 1.0653, "step": 7009 }, { "epoch": 0.9503151901308209, "grad_norm": 3.44173076720694, "learning_rate": 1.2888398133759637e-08, "loss": 1.1169, "step": 7010 }, { "epoch": 0.9504507557784857, "grad_norm": 5.291854592324938, "learning_rate": 1.2818216585549824e-08, "loss": 1.0548, "step": 7011 }, { "epoch": 0.9505863214261506, "grad_norm": 5.6797897742858225, "learning_rate": 1.2748225407554603e-08, "loss": 1.1085, "step": 7012 }, { "epoch": 0.9507218870738154, "grad_norm": 4.141946879601515, "learning_rate": 1.2678424613271288e-08, "loss": 1.1166, "step": 7013 }, { "epoch": 0.9508574527214804, "grad_norm": 4.487871288737486, "learning_rate": 1.2608814216160223e-08, "loss": 1.0947, "step": 7014 }, { "epoch": 0.9509930183691453, "grad_norm": 14.711105195534735, "learning_rate": 1.253939422964545e-08, "loss": 1.0709, "step": 7015 }, { "epoch": 0.9511285840168101, "grad_norm": 7.822452918082913, "learning_rate": 1.2470164667113926e-08, "loss": 1.0887, "step": 7016 }, { "epoch": 0.951264149664475, "grad_norm": 6.004309544167734, "learning_rate": 1.2401125541915968e-08, "loss": 1.099, "step": 7017 }, { "epoch": 0.9513997153121398, "grad_norm": 7.227177771082765, "learning_rate": 1.2332276867365377e-08, "loss": 1.073, "step": 7018 }, { "epoch": 0.9515352809598048, "grad_norm": 4.433597204852437, "learning_rate": 1.2263618656739083e-08, "loss": 1.1005, "step": 7019 }, { "epoch": 0.9516708466074697, "grad_norm": 5.437100509873848, "learning_rate": 1.2195150923277054e-08, "loss": 1.0624, "step": 7020 }, { "epoch": 0.9518064122551345, "grad_norm": 6.621498223515069, "learning_rate": 1.2126873680183058e-08, "loss": 1.0873, "step": 7021 }, { "epoch": 0.9519419779027994, "grad_norm": 5.2832172457687845, "learning_rate": 1.2058786940623678e-08, "loss": 1.0773, "step": 7022 }, { "epoch": 0.9520775435504644, "grad_norm": 5.856766529694817, "learning_rate": 1.1990890717728852e-08, "loss": 1.0978, "step": 7023 }, { "epoch": 0.9522131091981292, "grad_norm": 6.382664481688382, "learning_rate": 1.1923185024591775e-08, "loss": 1.0652, "step": 7024 }, { "epoch": 0.9523486748457941, "grad_norm": 6.142766318840027, "learning_rate": 1.1855669874269225e-08, "loss": 1.093, "step": 7025 }, { "epoch": 0.9524842404934589, "grad_norm": 5.593473431002213, "learning_rate": 1.1788345279780786e-08, "loss": 1.083, "step": 7026 }, { "epoch": 0.9526198061411238, "grad_norm": 4.483356051592548, "learning_rate": 1.1721211254109408e-08, "loss": 1.0983, "step": 7027 }, { "epoch": 0.9527553717887888, "grad_norm": 7.3699565563420135, "learning_rate": 1.1654267810201512e-08, "loss": 1.1096, "step": 7028 }, { "epoch": 0.9528909374364536, "grad_norm": 10.985758542104396, "learning_rate": 1.1587514960966437e-08, "loss": 1.1325, "step": 7029 }, { "epoch": 0.9530265030841185, "grad_norm": 5.440265390818984, "learning_rate": 1.1520952719277222e-08, "loss": 1.1374, "step": 7030 }, { "epoch": 0.9531620687317833, "grad_norm": 5.479095325007168, "learning_rate": 1.1454581097969595e-08, "loss": 1.0959, "step": 7031 }, { "epoch": 0.9532976343794483, "grad_norm": 4.668148998682819, "learning_rate": 1.1388400109842878e-08, "loss": 1.079, "step": 7032 }, { "epoch": 0.9534332000271132, "grad_norm": 5.43838943514591, "learning_rate": 1.1322409767659525e-08, "loss": 1.1118, "step": 7033 }, { "epoch": 0.953568765674778, "grad_norm": 11.522451484403996, "learning_rate": 1.1256610084145468e-08, "loss": 1.0585, "step": 7034 }, { "epoch": 0.9537043313224429, "grad_norm": 5.117540163574867, "learning_rate": 1.1191001071989336e-08, "loss": 1.1036, "step": 7035 }, { "epoch": 0.9538398969701077, "grad_norm": 9.368557750677285, "learning_rate": 1.1125582743843564e-08, "loss": 1.0716, "step": 7036 }, { "epoch": 0.9539754626177727, "grad_norm": 4.528898856509254, "learning_rate": 1.1060355112323395e-08, "loss": 1.1464, "step": 7037 }, { "epoch": 0.9541110282654376, "grad_norm": 22.497717773430395, "learning_rate": 1.0995318190007652e-08, "loss": 1.092, "step": 7038 }, { "epoch": 0.9542465939131024, "grad_norm": 6.0590841838245515, "learning_rate": 1.0930471989437862e-08, "loss": 1.0551, "step": 7039 }, { "epoch": 0.9543821595607673, "grad_norm": 5.366164169870183, "learning_rate": 1.0865816523119464e-08, "loss": 1.1228, "step": 7040 }, { "epoch": 0.9545177252084321, "grad_norm": 3.932416472723915, "learning_rate": 1.0801351803520598e-08, "loss": 1.0759, "step": 7041 }, { "epoch": 0.9546532908560971, "grad_norm": 9.463248228056793, "learning_rate": 1.0737077843072762e-08, "loss": 1.082, "step": 7042 }, { "epoch": 0.954788856503762, "grad_norm": 7.268418011340451, "learning_rate": 1.0672994654170598e-08, "loss": 1.0959, "step": 7043 }, { "epoch": 0.9549244221514268, "grad_norm": 5.685395460617145, "learning_rate": 1.060910224917222e-08, "loss": 1.0788, "step": 7044 }, { "epoch": 0.9550599877990917, "grad_norm": 4.743174289673366, "learning_rate": 1.054540064039866e-08, "loss": 1.0906, "step": 7045 }, { "epoch": 0.9551955534467566, "grad_norm": 7.361899476049103, "learning_rate": 1.0481889840134428e-08, "loss": 1.0861, "step": 7046 }, { "epoch": 0.9553311190944215, "grad_norm": 5.8548033522002285, "learning_rate": 1.0418569860626836e-08, "loss": 1.0798, "step": 7047 }, { "epoch": 0.9554666847420864, "grad_norm": 4.351166987162971, "learning_rate": 1.0355440714086782e-08, "loss": 1.1073, "step": 7048 }, { "epoch": 0.9556022503897512, "grad_norm": 4.50909033086094, "learning_rate": 1.0292502412688198e-08, "loss": 1.0913, "step": 7049 }, { "epoch": 0.9557378160374161, "grad_norm": 4.481549357075193, "learning_rate": 1.0229754968568261e-08, "loss": 1.0946, "step": 7050 }, { "epoch": 0.955873381685081, "grad_norm": 7.657575930885257, "learning_rate": 1.0167198393827403e-08, "loss": 1.081, "step": 7051 }, { "epoch": 0.9560089473327459, "grad_norm": 4.010943679431255, "learning_rate": 1.0104832700528975e-08, "loss": 1.0872, "step": 7052 }, { "epoch": 0.9561445129804108, "grad_norm": 6.461299724506083, "learning_rate": 1.0042657900699803e-08, "loss": 1.1153, "step": 7053 }, { "epoch": 0.9562800786280756, "grad_norm": 4.509329815920603, "learning_rate": 9.980674006329848e-09, "loss": 1.0786, "step": 7054 }, { "epoch": 0.9564156442757406, "grad_norm": 4.7201866923575455, "learning_rate": 9.918881029372106e-09, "loss": 1.0651, "step": 7055 }, { "epoch": 0.9565512099234054, "grad_norm": 5.462145368981539, "learning_rate": 9.857278981742934e-09, "loss": 1.1303, "step": 7056 }, { "epoch": 0.9566867755710703, "grad_norm": 5.794315854476765, "learning_rate": 9.795867875321829e-09, "loss": 1.1126, "step": 7057 }, { "epoch": 0.9568223412187352, "grad_norm": 4.659804838297236, "learning_rate": 9.734647721951427e-09, "loss": 1.1012, "step": 7058 }, { "epoch": 0.9569579068664, "grad_norm": 3.9606097532823816, "learning_rate": 9.673618533437511e-09, "loss": 1.0957, "step": 7059 }, { "epoch": 0.957093472514065, "grad_norm": 11.669034501631034, "learning_rate": 9.612780321549108e-09, "loss": 1.117, "step": 7060 }, { "epoch": 0.9572290381617298, "grad_norm": 6.231787300350207, "learning_rate": 9.552133098018389e-09, "loss": 1.1057, "step": 7061 }, { "epoch": 0.9573646038093947, "grad_norm": 4.907284279740157, "learning_rate": 9.491676874540666e-09, "loss": 1.0677, "step": 7062 }, { "epoch": 0.9575001694570596, "grad_norm": 4.779956698374788, "learning_rate": 9.431411662774502e-09, "loss": 1.1295, "step": 7063 }, { "epoch": 0.9576357351047244, "grad_norm": 3.7568444894244672, "learning_rate": 9.37133747434149e-09, "loss": 1.1136, "step": 7064 }, { "epoch": 0.9577713007523894, "grad_norm": 10.661498004303207, "learning_rate": 9.311454320826473e-09, "loss": 1.1113, "step": 7065 }, { "epoch": 0.9579068664000542, "grad_norm": 6.357073655447834, "learning_rate": 9.251762213777437e-09, "loss": 1.0987, "step": 7066 }, { "epoch": 0.9580424320477191, "grad_norm": 4.243869273499131, "learning_rate": 9.192261164705617e-09, "loss": 1.1168, "step": 7067 }, { "epoch": 0.958177997695384, "grad_norm": 4.0030336147162515, "learning_rate": 9.132951185085281e-09, "loss": 1.09, "step": 7068 }, { "epoch": 0.9583135633430488, "grad_norm": 4.800632980142025, "learning_rate": 9.073832286353944e-09, "loss": 1.102, "step": 7069 }, { "epoch": 0.9584491289907138, "grad_norm": 6.043477194696921, "learning_rate": 9.014904479912044e-09, "loss": 1.1031, "step": 7070 }, { "epoch": 0.9585846946383786, "grad_norm": 5.614836758710009, "learning_rate": 8.956167777123602e-09, "loss": 1.0841, "step": 7071 }, { "epoch": 0.9587202602860435, "grad_norm": 9.547173159155928, "learning_rate": 8.897622189315224e-09, "loss": 1.1005, "step": 7072 }, { "epoch": 0.9588558259337084, "grad_norm": 4.340368773978432, "learning_rate": 8.839267727777211e-09, "loss": 1.121, "step": 7073 }, { "epoch": 0.9589913915813733, "grad_norm": 10.284536860675368, "learning_rate": 8.781104403762563e-09, "loss": 1.0645, "step": 7074 }, { "epoch": 0.9591269572290382, "grad_norm": 5.916632313204043, "learning_rate": 8.723132228487861e-09, "loss": 1.1309, "step": 7075 }, { "epoch": 0.959262522876703, "grad_norm": 6.672221768804276, "learning_rate": 8.665351213132278e-09, "loss": 1.0775, "step": 7076 }, { "epoch": 0.9593980885243679, "grad_norm": 6.751524050063776, "learning_rate": 8.607761368838785e-09, "loss": 1.0741, "step": 7077 }, { "epoch": 0.9595336541720328, "grad_norm": 10.256330776358302, "learning_rate": 8.550362706712832e-09, "loss": 1.1013, "step": 7078 }, { "epoch": 0.9596692198196977, "grad_norm": 4.583102617051773, "learning_rate": 8.493155237823347e-09, "loss": 1.114, "step": 7079 }, { "epoch": 0.9598047854673626, "grad_norm": 5.2179827556791425, "learning_rate": 8.4361389732025e-09, "loss": 1.0831, "step": 7080 }, { "epoch": 0.9599403511150274, "grad_norm": 5.0710366138230185, "learning_rate": 8.379313923845277e-09, "loss": 1.0895, "step": 7081 }, { "epoch": 0.9600759167626923, "grad_norm": 4.717248108127909, "learning_rate": 8.322680100710022e-09, "loss": 1.0804, "step": 7082 }, { "epoch": 0.9602114824103573, "grad_norm": 3.948598705054701, "learning_rate": 8.266237514718e-09, "loss": 1.1094, "step": 7083 }, { "epoch": 0.9603470480580221, "grad_norm": 6.73722392301204, "learning_rate": 8.209986176753948e-09, "loss": 1.0615, "step": 7084 }, { "epoch": 0.960482613705687, "grad_norm": 5.401423942535459, "learning_rate": 8.153926097665186e-09, "loss": 1.0926, "step": 7085 }, { "epoch": 0.9606181793533518, "grad_norm": 5.842402602098256, "learning_rate": 8.098057288262738e-09, "loss": 1.0833, "step": 7086 }, { "epoch": 0.9607537450010167, "grad_norm": 5.136927081973162, "learning_rate": 8.042379759320317e-09, "loss": 1.124, "step": 7087 }, { "epoch": 0.9608893106486817, "grad_norm": 6.692598408802029, "learning_rate": 7.986893521574888e-09, "loss": 1.0855, "step": 7088 }, { "epoch": 0.9610248762963465, "grad_norm": 4.4248844565862795, "learning_rate": 7.931598585726562e-09, "loss": 1.1029, "step": 7089 }, { "epoch": 0.9611604419440114, "grad_norm": 4.28060965580701, "learning_rate": 7.876494962438585e-09, "loss": 1.1108, "step": 7090 }, { "epoch": 0.9612960075916762, "grad_norm": 6.882983741363048, "learning_rate": 7.821582662337123e-09, "loss": 1.0885, "step": 7091 }, { "epoch": 0.9614315732393411, "grad_norm": 5.690688798992504, "learning_rate": 7.766861696011816e-09, "loss": 1.0875, "step": 7092 }, { "epoch": 0.9615671388870061, "grad_norm": 12.490579226773065, "learning_rate": 7.712332074014893e-09, "loss": 1.0715, "step": 7093 }, { "epoch": 0.9617027045346709, "grad_norm": 3.734431338907193, "learning_rate": 7.657993806862162e-09, "loss": 1.1127, "step": 7094 }, { "epoch": 0.9618382701823358, "grad_norm": 5.20817437252271, "learning_rate": 7.603846905032129e-09, "loss": 1.0738, "step": 7095 }, { "epoch": 0.9619738358300006, "grad_norm": 5.1411619089905845, "learning_rate": 7.549891378966888e-09, "loss": 1.1073, "step": 7096 }, { "epoch": 0.9621094014776655, "grad_norm": 6.030797577232647, "learning_rate": 7.496127239071003e-09, "loss": 1.1127, "step": 7097 }, { "epoch": 0.9622449671253305, "grad_norm": 3.8635508153213163, "learning_rate": 7.442554495712738e-09, "loss": 1.0881, "step": 7098 }, { "epoch": 0.9623805327729953, "grad_norm": 4.005662239831764, "learning_rate": 7.3891731592230496e-09, "loss": 1.0705, "step": 7099 }, { "epoch": 0.9625160984206602, "grad_norm": 3.800667313568065, "learning_rate": 7.335983239896148e-09, "loss": 1.0917, "step": 7100 }, { "epoch": 0.9626516640683251, "grad_norm": 5.12534377617325, "learning_rate": 7.282984747989163e-09, "loss": 1.0891, "step": 7101 }, { "epoch": 0.96278722971599, "grad_norm": 5.976676714509997, "learning_rate": 7.230177693722583e-09, "loss": 1.1041, "step": 7102 }, { "epoch": 0.9629227953636549, "grad_norm": 3.96603048062147, "learning_rate": 7.17756208727982e-09, "loss": 1.1019, "step": 7103 }, { "epoch": 0.9630583610113197, "grad_norm": 4.75931556368224, "learning_rate": 7.125137938807424e-09, "loss": 1.0803, "step": 7104 }, { "epoch": 0.9631939266589846, "grad_norm": 4.293328798158134, "learning_rate": 7.072905258414752e-09, "loss": 1.0851, "step": 7105 }, { "epoch": 0.9633294923066495, "grad_norm": 6.153011087213776, "learning_rate": 7.020864056174635e-09, "loss": 1.0592, "step": 7106 }, { "epoch": 0.9634650579543144, "grad_norm": 7.923505546336438, "learning_rate": 6.969014342122825e-09, "loss": 1.077, "step": 7107 }, { "epoch": 0.9636006236019793, "grad_norm": 20.68703143203049, "learning_rate": 6.9173561262581e-09, "loss": 1.1212, "step": 7108 }, { "epoch": 0.9637361892496441, "grad_norm": 5.514632814732578, "learning_rate": 6.86588941854227e-09, "loss": 1.1039, "step": 7109 }, { "epoch": 0.963871754897309, "grad_norm": 4.038729738831953, "learning_rate": 6.814614228900506e-09, "loss": 1.0841, "step": 7110 }, { "epoch": 0.964007320544974, "grad_norm": 5.293410029331567, "learning_rate": 6.763530567220455e-09, "loss": 1.1302, "step": 7111 }, { "epoch": 0.9641428861926388, "grad_norm": 12.006387881676883, "learning_rate": 6.712638443353569e-09, "loss": 1.0847, "step": 7112 }, { "epoch": 0.9642784518403037, "grad_norm": 4.6801186954450635, "learning_rate": 6.661937867113665e-09, "loss": 1.1073, "step": 7113 }, { "epoch": 0.9644140174879685, "grad_norm": 8.924368061954397, "learning_rate": 6.611428848278256e-09, "loss": 1.0989, "step": 7114 }, { "epoch": 0.9645495831356334, "grad_norm": 4.710015949873337, "learning_rate": 6.5611113965873265e-09, "loss": 1.1572, "step": 7115 }, { "epoch": 0.9646851487832984, "grad_norm": 5.473567177595665, "learning_rate": 6.51098552174445e-09, "loss": 1.1355, "step": 7116 }, { "epoch": 0.9648207144309632, "grad_norm": 5.489783064938204, "learning_rate": 6.461051233415782e-09, "loss": 1.0889, "step": 7117 }, { "epoch": 0.9649562800786281, "grad_norm": 4.5175475485640835, "learning_rate": 6.4113085412309535e-09, "loss": 1.1029, "step": 7118 }, { "epoch": 0.9650918457262929, "grad_norm": 5.05551198065102, "learning_rate": 6.361757454782291e-09, "loss": 1.0826, "step": 7119 }, { "epoch": 0.9652274113739578, "grad_norm": 6.566454066669898, "learning_rate": 6.312397983625483e-09, "loss": 1.117, "step": 7120 }, { "epoch": 0.9653629770216228, "grad_norm": 6.309671760372006, "learning_rate": 6.2632301372789185e-09, "loss": 1.1035, "step": 7121 }, { "epoch": 0.9654985426692876, "grad_norm": 5.391494597501845, "learning_rate": 6.214253925224455e-09, "loss": 1.0923, "step": 7122 }, { "epoch": 0.9656341083169525, "grad_norm": 5.926723798113411, "learning_rate": 6.165469356906539e-09, "loss": 1.1029, "step": 7123 }, { "epoch": 0.9657696739646173, "grad_norm": 5.759189407012342, "learning_rate": 6.116876441733087e-09, "loss": 1.1159, "step": 7124 }, { "epoch": 0.9659052396122823, "grad_norm": 10.208255833340305, "learning_rate": 6.068475189074829e-09, "loss": 1.093, "step": 7125 }, { "epoch": 0.9660408052599472, "grad_norm": 4.755195329142922, "learning_rate": 6.020265608265407e-09, "loss": 1.101, "step": 7126 }, { "epoch": 0.966176370907612, "grad_norm": 7.540676946416274, "learning_rate": 5.97224770860183e-09, "loss": 1.1281, "step": 7127 }, { "epoch": 0.9663119365552769, "grad_norm": 9.17845113810601, "learning_rate": 5.924421499343801e-09, "loss": 1.0754, "step": 7128 }, { "epoch": 0.9664475022029417, "grad_norm": 8.654740790363778, "learning_rate": 5.8767869897145e-09, "loss": 1.1226, "step": 7129 }, { "epoch": 0.9665830678506067, "grad_norm": 4.568548509538622, "learning_rate": 5.8293441888994655e-09, "loss": 1.0721, "step": 7130 }, { "epoch": 0.9667186334982716, "grad_norm": 5.6419722764483415, "learning_rate": 5.7820931060481585e-09, "loss": 1.0722, "step": 7131 }, { "epoch": 0.9668541991459364, "grad_norm": 3.503828104420856, "learning_rate": 5.735033750272067e-09, "loss": 1.084, "step": 7132 }, { "epoch": 0.9669897647936013, "grad_norm": 5.11035470451689, "learning_rate": 5.68816613064671e-09, "loss": 1.1143, "step": 7133 }, { "epoch": 0.9671253304412661, "grad_norm": 4.096001731510628, "learning_rate": 5.6414902562096356e-09, "loss": 1.1171, "step": 7134 }, { "epoch": 0.9672608960889311, "grad_norm": 9.012218312204164, "learning_rate": 5.595006135962421e-09, "loss": 1.0952, "step": 7135 }, { "epoch": 0.967396461736596, "grad_norm": 5.011178646812501, "learning_rate": 5.548713778868786e-09, "loss": 1.1195, "step": 7136 }, { "epoch": 0.9675320273842608, "grad_norm": 7.7701747564684585, "learning_rate": 5.502613193856031e-09, "loss": 1.0646, "step": 7137 }, { "epoch": 0.9676675930319257, "grad_norm": 4.657692129663843, "learning_rate": 5.45670438981416e-09, "loss": 1.104, "step": 7138 }, { "epoch": 0.9678031586795905, "grad_norm": 5.448582650919136, "learning_rate": 5.4109873755964205e-09, "loss": 1.0943, "step": 7139 }, { "epoch": 0.9679387243272555, "grad_norm": 17.085359343114717, "learning_rate": 5.365462160018985e-09, "loss": 1.1004, "step": 7140 }, { "epoch": 0.9680742899749204, "grad_norm": 3.934696458467899, "learning_rate": 5.3201287518610525e-09, "loss": 1.0809, "step": 7141 }, { "epoch": 0.9682098556225852, "grad_norm": 4.204971046007864, "learning_rate": 5.274987159864741e-09, "loss": 1.0822, "step": 7142 }, { "epoch": 0.9683454212702501, "grad_norm": 11.614582840683772, "learning_rate": 5.2300373927351984e-09, "loss": 1.1424, "step": 7143 }, { "epoch": 0.968480986917915, "grad_norm": 5.3128496553412985, "learning_rate": 5.185279459140823e-09, "loss": 1.1169, "step": 7144 }, { "epoch": 0.9686165525655799, "grad_norm": 7.6414896807658055, "learning_rate": 5.140713367712601e-09, "loss": 1.1031, "step": 7145 }, { "epoch": 0.9687521182132448, "grad_norm": 6.3557434574062235, "learning_rate": 5.09633912704488e-09, "loss": 1.1159, "step": 7146 }, { "epoch": 0.9688876838609096, "grad_norm": 5.319400535483431, "learning_rate": 5.052156745694924e-09, "loss": 1.0961, "step": 7147 }, { "epoch": 0.9690232495085745, "grad_norm": 5.333199500337712, "learning_rate": 5.00816623218292e-09, "loss": 1.1466, "step": 7148 }, { "epoch": 0.9691588151562394, "grad_norm": 5.295627520111719, "learning_rate": 4.964367594991969e-09, "loss": 1.0857, "step": 7149 }, { "epoch": 0.9692943808039043, "grad_norm": 5.5439622842449285, "learning_rate": 4.920760842568539e-09, "loss": 1.0919, "step": 7150 }, { "epoch": 0.9694299464515692, "grad_norm": 5.335020758534068, "learning_rate": 4.877345983321568e-09, "loss": 1.1184, "step": 7151 }, { "epoch": 0.969565512099234, "grad_norm": 4.9675187709672715, "learning_rate": 4.834123025623471e-09, "loss": 1.1358, "step": 7152 }, { "epoch": 0.969701077746899, "grad_norm": 4.055006712761432, "learning_rate": 4.791091977809358e-09, "loss": 1.0883, "step": 7153 }, { "epoch": 0.9698366433945638, "grad_norm": 5.649288475008575, "learning_rate": 4.7482528481774805e-09, "loss": 1.0291, "step": 7154 }, { "epoch": 0.9699722090422287, "grad_norm": 4.5045558193700375, "learning_rate": 4.705605644988897e-09, "loss": 1.119, "step": 7155 }, { "epoch": 0.9701077746898936, "grad_norm": 5.735760031581613, "learning_rate": 4.663150376468028e-09, "loss": 1.0821, "step": 7156 }, { "epoch": 0.9702433403375584, "grad_norm": 6.275037463466455, "learning_rate": 4.62088705080177e-09, "loss": 1.1229, "step": 7157 }, { "epoch": 0.9703789059852234, "grad_norm": 3.9445724928109263, "learning_rate": 4.5788156761404906e-09, "loss": 1.1069, "step": 7158 }, { "epoch": 0.9705144716328882, "grad_norm": 5.524463520510005, "learning_rate": 4.536936260597257e-09, "loss": 1.0961, "step": 7159 }, { "epoch": 0.9706500372805531, "grad_norm": 6.7166453730527715, "learning_rate": 4.495248812248054e-09, "loss": 1.1026, "step": 7160 }, { "epoch": 0.970785602928218, "grad_norm": 7.14952102984992, "learning_rate": 4.453753339132116e-09, "loss": 1.0819, "step": 7161 }, { "epoch": 0.9709211685758828, "grad_norm": 6.746965812884899, "learning_rate": 4.412449849251598e-09, "loss": 1.0814, "step": 7162 }, { "epoch": 0.9710567342235478, "grad_norm": 5.239067529434925, "learning_rate": 4.371338350571352e-09, "loss": 1.0917, "step": 7163 }, { "epoch": 0.9711922998712126, "grad_norm": 5.021706649981924, "learning_rate": 4.3304188510194795e-09, "loss": 1.1007, "step": 7164 }, { "epoch": 0.9713278655188775, "grad_norm": 5.9434645185441015, "learning_rate": 4.289691358486891e-09, "loss": 1.1172, "step": 7165 }, { "epoch": 0.9714634311665424, "grad_norm": 5.169710476872213, "learning_rate": 4.249155880827859e-09, "loss": 1.0861, "step": 7166 }, { "epoch": 0.9715989968142072, "grad_norm": 4.160010868780353, "learning_rate": 4.2088124258590205e-09, "loss": 1.0952, "step": 7167 }, { "epoch": 0.9717345624618722, "grad_norm": 6.792848016996787, "learning_rate": 4.168661001360485e-09, "loss": 1.0889, "step": 7168 }, { "epoch": 0.971870128109537, "grad_norm": 6.4816276989265145, "learning_rate": 4.128701615074947e-09, "loss": 1.0874, "step": 7169 }, { "epoch": 0.9720056937572019, "grad_norm": 4.475234617204447, "learning_rate": 4.088934274708466e-09, "loss": 1.097, "step": 7170 }, { "epoch": 0.9721412594048668, "grad_norm": 24.269683578214412, "learning_rate": 4.049358987929685e-09, "loss": 1.1007, "step": 7171 }, { "epoch": 0.9722768250525317, "grad_norm": 7.1579394954451026, "learning_rate": 4.00997576237061e-09, "loss": 1.1126, "step": 7172 }, { "epoch": 0.9724123907001966, "grad_norm": 4.823660514296992, "learning_rate": 3.970784605625721e-09, "loss": 1.0583, "step": 7173 }, { "epoch": 0.9725479563478614, "grad_norm": 12.635762865127115, "learning_rate": 3.931785525252862e-09, "loss": 1.0689, "step": 7174 }, { "epoch": 0.9726835219955263, "grad_norm": 4.981030346424309, "learning_rate": 3.892978528772684e-09, "loss": 1.0773, "step": 7175 }, { "epoch": 0.9728190876431912, "grad_norm": 4.335939815136743, "learning_rate": 3.854363623668866e-09, "loss": 1.0905, "step": 7176 }, { "epoch": 0.9729546532908561, "grad_norm": 5.454973429543767, "learning_rate": 3.815940817387786e-09, "loss": 1.0995, "step": 7177 }, { "epoch": 0.973090218938521, "grad_norm": 4.245279538285368, "learning_rate": 3.777710117339183e-09, "loss": 1.0724, "step": 7178 }, { "epoch": 0.9732257845861859, "grad_norm": 4.875296137386539, "learning_rate": 3.739671530895605e-09, "loss": 1.0988, "step": 7179 }, { "epoch": 0.9733613502338507, "grad_norm": 5.440446814740574, "learning_rate": 3.7018250653921834e-09, "loss": 1.0887, "step": 7180 }, { "epoch": 0.9734969158815157, "grad_norm": 7.277937483938916, "learning_rate": 3.6641707281276357e-09, "loss": 1.0855, "step": 7181 }, { "epoch": 0.9736324815291805, "grad_norm": 15.818771122680943, "learning_rate": 3.6267085263631537e-09, "loss": 1.0741, "step": 7182 }, { "epoch": 0.9737680471768454, "grad_norm": 4.561274883107086, "learning_rate": 3.589438467322958e-09, "loss": 1.0978, "step": 7183 }, { "epoch": 0.9739036128245103, "grad_norm": 4.170355552885138, "learning_rate": 3.5523605581944115e-09, "loss": 1.1028, "step": 7184 }, { "epoch": 0.9740391784721751, "grad_norm": 5.238765018092359, "learning_rate": 3.5154748061276828e-09, "loss": 1.1156, "step": 7185 }, { "epoch": 0.9741747441198401, "grad_norm": 8.736825768400447, "learning_rate": 3.47878121823586e-09, "loss": 1.0994, "step": 7186 }, { "epoch": 0.9743103097675049, "grad_norm": 4.7668351058509915, "learning_rate": 3.4422798015949496e-09, "loss": 1.0894, "step": 7187 }, { "epoch": 0.9744458754151698, "grad_norm": 5.296684244543857, "learning_rate": 3.405970563244098e-09, "loss": 1.0915, "step": 7188 }, { "epoch": 0.9745814410628347, "grad_norm": 4.9883472996968425, "learning_rate": 3.36985351018515e-09, "loss": 1.0377, "step": 7189 }, { "epoch": 0.9747170067104995, "grad_norm": 5.531240782077325, "learning_rate": 3.3339286493830886e-09, "loss": 1.1236, "step": 7190 }, { "epoch": 0.9748525723581645, "grad_norm": 4.062642826381888, "learning_rate": 3.2981959877657063e-09, "loss": 1.0958, "step": 7191 }, { "epoch": 0.9749881380058293, "grad_norm": 10.29840532390479, "learning_rate": 3.2626555322236014e-09, "loss": 1.1141, "step": 7192 }, { "epoch": 0.9751237036534942, "grad_norm": 5.511101660446167, "learning_rate": 3.227307289610737e-09, "loss": 1.0684, "step": 7193 }, { "epoch": 0.9752592693011591, "grad_norm": 4.609491763136235, "learning_rate": 3.192151266743548e-09, "loss": 1.1253, "step": 7194 }, { "epoch": 0.975394834948824, "grad_norm": 3.989379837511294, "learning_rate": 3.157187470401723e-09, "loss": 1.1177, "step": 7195 }, { "epoch": 0.9755304005964889, "grad_norm": 7.898368263547551, "learning_rate": 3.122415907327647e-09, "loss": 1.112, "step": 7196 }, { "epoch": 0.9756659662441537, "grad_norm": 3.754661038074206, "learning_rate": 3.0878365842268437e-09, "loss": 1.1108, "step": 7197 }, { "epoch": 0.9758015318918186, "grad_norm": 5.603037040913452, "learning_rate": 3.053449507767536e-09, "loss": 1.09, "step": 7198 }, { "epoch": 0.9759370975394835, "grad_norm": 4.343022624954639, "learning_rate": 3.019254684581085e-09, "loss": 1.1286, "step": 7199 }, { "epoch": 0.9760726631871484, "grad_norm": 4.632887684530066, "learning_rate": 2.985252121261661e-09, "loss": 1.0908, "step": 7200 }, { "epoch": 0.9762082288348133, "grad_norm": 4.387305554630456, "learning_rate": 2.951441824366463e-09, "loss": 1.0844, "step": 7201 }, { "epoch": 0.9763437944824781, "grad_norm": 4.169178536284164, "learning_rate": 2.9178238004154975e-09, "loss": 1.0824, "step": 7202 }, { "epoch": 0.976479360130143, "grad_norm": 4.376592146224193, "learning_rate": 2.88439805589169e-09, "loss": 1.0837, "step": 7203 }, { "epoch": 0.976614925777808, "grad_norm": 5.3290511301329975, "learning_rate": 2.851164597240996e-09, "loss": 1.1005, "step": 7204 }, { "epoch": 0.9767504914254728, "grad_norm": 4.328550552085893, "learning_rate": 2.8181234308721767e-09, "loss": 1.0676, "step": 7205 }, { "epoch": 0.9768860570731377, "grad_norm": 4.259342830792652, "learning_rate": 2.7852745631570253e-09, "loss": 1.0835, "step": 7206 }, { "epoch": 0.9770216227208025, "grad_norm": 8.01540108637579, "learning_rate": 2.7526180004300294e-09, "loss": 1.1102, "step": 7207 }, { "epoch": 0.9771571883684674, "grad_norm": 4.171905052049607, "learning_rate": 2.720153748988929e-09, "loss": 1.0892, "step": 7208 }, { "epoch": 0.9772927540161324, "grad_norm": 3.2620052998106153, "learning_rate": 2.6878818150941616e-09, "loss": 1.1031, "step": 7209 }, { "epoch": 0.9774283196637972, "grad_norm": 7.341892507035918, "learning_rate": 2.655802204968971e-09, "loss": 1.105, "step": 7210 }, { "epoch": 0.9775638853114621, "grad_norm": 5.64296733053247, "learning_rate": 2.6239149247999635e-09, "loss": 1.077, "step": 7211 }, { "epoch": 0.9776994509591269, "grad_norm": 3.9362250211288314, "learning_rate": 2.592219980735999e-09, "loss": 1.0943, "step": 7212 }, { "epoch": 0.9778350166067918, "grad_norm": 7.17917838638432, "learning_rate": 2.5607173788894097e-09, "loss": 1.1071, "step": 7213 }, { "epoch": 0.9779705822544568, "grad_norm": 6.352848808071177, "learning_rate": 2.5294071253351146e-09, "loss": 1.0874, "step": 7214 }, { "epoch": 0.9781061479021216, "grad_norm": 5.335259356679355, "learning_rate": 2.498289226111061e-09, "loss": 1.06, "step": 7215 }, { "epoch": 0.9782417135497865, "grad_norm": 4.556506313122588, "learning_rate": 2.467363687218227e-09, "loss": 1.1327, "step": 7216 }, { "epoch": 0.9783772791974513, "grad_norm": 4.142784710052318, "learning_rate": 2.436630514620286e-09, "loss": 1.0806, "step": 7217 }, { "epoch": 0.9785128448451162, "grad_norm": 5.182008142243423, "learning_rate": 2.4060897142438308e-09, "loss": 1.1102, "step": 7218 }, { "epoch": 0.9786484104927812, "grad_norm": 13.93851600936423, "learning_rate": 2.3757412919783725e-09, "loss": 1.122, "step": 7219 }, { "epoch": 0.978783976140446, "grad_norm": 4.37851978918041, "learning_rate": 2.345585253676452e-09, "loss": 1.1121, "step": 7220 }, { "epoch": 0.9789195417881109, "grad_norm": 4.0410470756044266, "learning_rate": 2.3156216051535284e-09, "loss": 1.0599, "step": 7221 }, { "epoch": 0.9790551074357757, "grad_norm": 4.383789037159345, "learning_rate": 2.285850352187646e-09, "loss": 1.0883, "step": 7222 }, { "epoch": 0.9791906730834407, "grad_norm": 5.34920553570851, "learning_rate": 2.2562715005201016e-09, "loss": 1.0768, "step": 7223 }, { "epoch": 0.9793262387311056, "grad_norm": 5.456815687580125, "learning_rate": 2.226885055854777e-09, "loss": 1.123, "step": 7224 }, { "epoch": 0.9794618043787704, "grad_norm": 4.484179481434788, "learning_rate": 2.1976910238588055e-09, "loss": 1.1059, "step": 7225 }, { "epoch": 0.9795973700264353, "grad_norm": 5.532348841066092, "learning_rate": 2.168689410162017e-09, "loss": 1.0768, "step": 7226 }, { "epoch": 0.9797329356741001, "grad_norm": 3.73687927938667, "learning_rate": 2.1398802203569375e-09, "loss": 1.1125, "step": 7227 }, { "epoch": 0.9798685013217651, "grad_norm": 4.942180433296325, "learning_rate": 2.111263459999457e-09, "loss": 1.0717, "step": 7228 }, { "epoch": 0.98000406696943, "grad_norm": 4.7051805233456525, "learning_rate": 2.0828391346078277e-09, "loss": 1.0748, "step": 7229 }, { "epoch": 0.9801396326170948, "grad_norm": 7.93828036618153, "learning_rate": 2.054607249663665e-09, "loss": 1.1071, "step": 7230 }, { "epoch": 0.9802751982647597, "grad_norm": 6.217824071452354, "learning_rate": 2.0265678106111685e-09, "loss": 1.1253, "step": 7231 }, { "epoch": 0.9804107639124245, "grad_norm": 15.652669941425588, "learning_rate": 1.9987208228575693e-09, "loss": 1.083, "step": 7232 }, { "epoch": 0.9805463295600895, "grad_norm": 8.543548606440842, "learning_rate": 1.971066291772905e-09, "loss": 1.1186, "step": 7233 }, { "epoch": 0.9806818952077544, "grad_norm": 5.385379811101531, "learning_rate": 1.9436042226901315e-09, "loss": 1.0991, "step": 7234 }, { "epoch": 0.9808174608554192, "grad_norm": 3.8261997601020465, "learning_rate": 1.9163346209051246e-09, "loss": 1.0821, "step": 7235 }, { "epoch": 0.9809530265030841, "grad_norm": 7.138566365869714, "learning_rate": 1.889257491676677e-09, "loss": 1.0678, "step": 7236 }, { "epoch": 0.981088592150749, "grad_norm": 4.88421511416083, "learning_rate": 1.8623728402261674e-09, "loss": 1.0693, "step": 7237 }, { "epoch": 0.9812241577984139, "grad_norm": 6.250721081958892, "learning_rate": 1.8356806717383377e-09, "loss": 1.1135, "step": 7238 }, { "epoch": 0.9813597234460788, "grad_norm": 5.45409699634398, "learning_rate": 1.809180991360404e-09, "loss": 1.0946, "step": 7239 }, { "epoch": 0.9814952890937436, "grad_norm": 4.142937406672712, "learning_rate": 1.7828738042027225e-09, "loss": 1.0981, "step": 7240 }, { "epoch": 0.9816308547414085, "grad_norm": 6.636459562012845, "learning_rate": 1.7567591153383466e-09, "loss": 1.1071, "step": 7241 }, { "epoch": 0.9817664203890734, "grad_norm": 11.46828789234797, "learning_rate": 1.7308369298033587e-09, "loss": 1.1169, "step": 7242 }, { "epoch": 0.9819019860367383, "grad_norm": 5.27302576952218, "learning_rate": 1.7051072525965382e-09, "loss": 1.0904, "step": 7243 }, { "epoch": 0.9820375516844032, "grad_norm": 6.268527480308323, "learning_rate": 1.6795700886798049e-09, "loss": 1.1025, "step": 7244 }, { "epoch": 0.982173117332068, "grad_norm": 4.454846512854601, "learning_rate": 1.6542254429776636e-09, "loss": 1.0387, "step": 7245 }, { "epoch": 0.982308682979733, "grad_norm": 4.230119564867262, "learning_rate": 1.6290733203776497e-09, "loss": 1.121, "step": 7246 }, { "epoch": 0.9824442486273978, "grad_norm": 4.289430545897311, "learning_rate": 1.6041137257303272e-09, "loss": 1.0857, "step": 7247 }, { "epoch": 0.9825798142750627, "grad_norm": 4.587830530303657, "learning_rate": 1.5793466638486242e-09, "loss": 1.1123, "step": 7248 }, { "epoch": 0.9827153799227276, "grad_norm": 21.50410375224658, "learning_rate": 1.554772139509053e-09, "loss": 1.0853, "step": 7249 }, { "epoch": 0.9828509455703924, "grad_norm": 12.627138345252353, "learning_rate": 1.5303901574502675e-09, "loss": 1.066, "step": 7250 }, { "epoch": 0.9829865112180574, "grad_norm": 3.5356071983584467, "learning_rate": 1.5062007223743956e-09, "loss": 1.1089, "step": 7251 }, { "epoch": 0.9831220768657222, "grad_norm": 6.620877029474317, "learning_rate": 1.482203838946039e-09, "loss": 1.0894, "step": 7252 }, { "epoch": 0.9832576425133871, "grad_norm": 4.331397870152994, "learning_rate": 1.4583995117929404e-09, "loss": 1.1269, "step": 7253 }, { "epoch": 0.983393208161052, "grad_norm": 6.092679126515824, "learning_rate": 1.434787745505317e-09, "loss": 1.1217, "step": 7254 }, { "epoch": 0.9835287738087168, "grad_norm": 8.726184691037158, "learning_rate": 1.4113685446368595e-09, "loss": 1.0729, "step": 7255 }, { "epoch": 0.9836643394563818, "grad_norm": 5.804484064477394, "learning_rate": 1.388141913703511e-09, "loss": 1.0982, "step": 7256 }, { "epoch": 0.9837999051040466, "grad_norm": 6.1197461426793085, "learning_rate": 1.3651078571844664e-09, "loss": 1.0625, "step": 7257 }, { "epoch": 0.9839354707517115, "grad_norm": 4.440404349218565, "learning_rate": 1.3422663795215062e-09, "loss": 1.1027, "step": 7258 }, { "epoch": 0.9840710363993764, "grad_norm": 4.393306570533547, "learning_rate": 1.3196174851196617e-09, "loss": 1.0714, "step": 7259 }, { "epoch": 0.9842066020470412, "grad_norm": 5.674490675628539, "learning_rate": 1.2971611783465507e-09, "loss": 1.0495, "step": 7260 }, { "epoch": 0.9843421676947062, "grad_norm": 5.614304076546377, "learning_rate": 1.274897463532487e-09, "loss": 1.0948, "step": 7261 }, { "epoch": 0.9844777333423711, "grad_norm": 4.159603057836971, "learning_rate": 1.2528263449710363e-09, "loss": 1.1086, "step": 7262 }, { "epoch": 0.9846132989900359, "grad_norm": 4.930329991329258, "learning_rate": 1.2309478269184602e-09, "loss": 1.0903, "step": 7263 }, { "epoch": 0.9847488646377008, "grad_norm": 4.611882193709822, "learning_rate": 1.2092619135937177e-09, "loss": 1.1259, "step": 7264 }, { "epoch": 0.9848844302853657, "grad_norm": 4.707554094439277, "learning_rate": 1.1877686091787963e-09, "loss": 1.0707, "step": 7265 }, { "epoch": 0.9850199959330306, "grad_norm": 4.795339098555899, "learning_rate": 1.1664679178186032e-09, "loss": 1.0582, "step": 7266 }, { "epoch": 0.9851555615806955, "grad_norm": 5.707566032850582, "learning_rate": 1.1453598436208522e-09, "loss": 1.1139, "step": 7267 }, { "epoch": 0.9852911272283603, "grad_norm": 3.9194204055449826, "learning_rate": 1.1244443906558432e-09, "loss": 1.0937, "step": 7268 }, { "epoch": 0.9854266928760252, "grad_norm": 4.910023746369269, "learning_rate": 1.1037215629571272e-09, "loss": 1.1088, "step": 7269 }, { "epoch": 0.9855622585236901, "grad_norm": 5.06808799456456, "learning_rate": 1.0831913645209522e-09, "loss": 1.0982, "step": 7270 }, { "epoch": 0.985697824171355, "grad_norm": 6.101539331766897, "learning_rate": 1.0628537993063736e-09, "loss": 1.1476, "step": 7271 }, { "epoch": 0.9858333898190199, "grad_norm": 4.826851020409591, "learning_rate": 1.042708871235143e-09, "loss": 1.0958, "step": 7272 }, { "epoch": 0.9859689554666847, "grad_norm": 7.783813431317606, "learning_rate": 1.0227565841923746e-09, "loss": 1.0613, "step": 7273 }, { "epoch": 0.9861045211143497, "grad_norm": 5.060948082846267, "learning_rate": 1.002996942025547e-09, "loss": 1.1008, "step": 7274 }, { "epoch": 0.9862400867620145, "grad_norm": 5.572672638962776, "learning_rate": 9.834299485450559e-10, "loss": 1.1059, "step": 7275 }, { "epoch": 0.9863756524096794, "grad_norm": 8.854810064403008, "learning_rate": 9.640556075244388e-10, "loss": 1.1197, "step": 7276 }, { "epoch": 0.9865112180573443, "grad_norm": 4.446969323381799, "learning_rate": 9.448739226997072e-10, "loss": 1.0991, "step": 7277 }, { "epoch": 0.9866467837050091, "grad_norm": 5.429229171136425, "learning_rate": 9.258848977700129e-10, "loss": 1.1042, "step": 7278 }, { "epoch": 0.9867823493526741, "grad_norm": 7.975984919174487, "learning_rate": 9.070885363972047e-10, "loss": 1.1228, "step": 7279 }, { "epoch": 0.9869179150003389, "grad_norm": 4.710401872494041, "learning_rate": 8.884848422060498e-10, "loss": 1.1118, "step": 7280 }, { "epoch": 0.9870534806480038, "grad_norm": 5.42446677039369, "learning_rate": 8.700738187840118e-10, "loss": 1.1051, "step": 7281 }, { "epoch": 0.9871890462956687, "grad_norm": 7.424310937379188, "learning_rate": 8.518554696815838e-10, "loss": 1.1303, "step": 7282 }, { "epoch": 0.9873246119433335, "grad_norm": 4.821904930264009, "learning_rate": 8.338297984121778e-10, "loss": 1.0849, "step": 7283 }, { "epoch": 0.9874601775909985, "grad_norm": 5.518811940377126, "learning_rate": 8.159968084515689e-10, "loss": 1.086, "step": 7284 }, { "epoch": 0.9875957432386633, "grad_norm": 4.282287768107756, "learning_rate": 7.983565032390061e-10, "loss": 1.0697, "step": 7285 }, { "epoch": 0.9877313088863282, "grad_norm": 6.399835185335807, "learning_rate": 7.809088861762125e-10, "loss": 1.0832, "step": 7286 }, { "epoch": 0.9878668745339931, "grad_norm": 3.7519144065872063, "learning_rate": 7.636539606277192e-10, "loss": 1.1509, "step": 7287 }, { "epoch": 0.988002440181658, "grad_norm": 4.862494845597184, "learning_rate": 7.465917299210866e-10, "loss": 1.0843, "step": 7288 }, { "epoch": 0.9881380058293229, "grad_norm": 3.752499409929051, "learning_rate": 7.297221973465717e-10, "loss": 1.0978, "step": 7289 }, { "epoch": 0.9882735714769877, "grad_norm": 6.879509364406475, "learning_rate": 7.130453661573499e-10, "loss": 1.0981, "step": 7290 }, { "epoch": 0.9884091371246526, "grad_norm": 9.74975111323311, "learning_rate": 6.965612395695153e-10, "loss": 1.1163, "step": 7291 }, { "epoch": 0.9885447027723175, "grad_norm": 7.250800878150794, "learning_rate": 6.802698207617474e-10, "loss": 1.1038, "step": 7292 }, { "epoch": 0.9886802684199824, "grad_norm": 6.40564970852915, "learning_rate": 6.641711128758665e-10, "loss": 1.0708, "step": 7293 }, { "epoch": 0.9888158340676473, "grad_norm": 3.8876409741997775, "learning_rate": 6.48265119016278e-10, "loss": 1.0696, "step": 7294 }, { "epoch": 0.9889513997153121, "grad_norm": 5.142705623425465, "learning_rate": 6.325518422503063e-10, "loss": 1.0926, "step": 7295 }, { "epoch": 0.989086965362977, "grad_norm": 5.947229277100162, "learning_rate": 6.170312856083048e-10, "loss": 1.1207, "step": 7296 }, { "epoch": 0.989222531010642, "grad_norm": 6.3868720946549695, "learning_rate": 6.017034520831021e-10, "loss": 1.1311, "step": 7297 }, { "epoch": 0.9893580966583068, "grad_norm": 4.204891870333813, "learning_rate": 5.865683446305558e-10, "loss": 1.097, "step": 7298 }, { "epoch": 0.9894936623059717, "grad_norm": 4.034329859876725, "learning_rate": 5.716259661695533e-10, "loss": 1.0971, "step": 7299 }, { "epoch": 0.9896292279536365, "grad_norm": 7.207837883811374, "learning_rate": 5.568763195813453e-10, "loss": 1.0713, "step": 7300 }, { "epoch": 0.9897647936013014, "grad_norm": 6.168858698666201, "learning_rate": 5.423194077104343e-10, "loss": 1.057, "step": 7301 }, { "epoch": 0.9899003592489664, "grad_norm": 5.456784686615142, "learning_rate": 5.279552333640191e-10, "loss": 1.0561, "step": 7302 }, { "epoch": 0.9900359248966312, "grad_norm": 4.14671786447015, "learning_rate": 5.137837993121064e-10, "loss": 1.0719, "step": 7303 }, { "epoch": 0.9901714905442961, "grad_norm": 6.614819885367848, "learning_rate": 4.998051082875099e-10, "loss": 1.1754, "step": 7304 }, { "epoch": 0.9903070561919609, "grad_norm": 13.536902392804706, "learning_rate": 4.860191629859623e-10, "loss": 1.1215, "step": 7305 }, { "epoch": 0.9904426218396258, "grad_norm": 5.95858594127814, "learning_rate": 4.724259660658924e-10, "loss": 1.0882, "step": 7306 }, { "epoch": 0.9905781874872908, "grad_norm": 5.964919343243653, "learning_rate": 4.5902552014864815e-10, "loss": 1.074, "step": 7307 }, { "epoch": 0.9907137531349556, "grad_norm": 4.857399079260704, "learning_rate": 4.458178278184954e-10, "loss": 1.0891, "step": 7308 }, { "epoch": 0.9908493187826205, "grad_norm": 19.32164621616366, "learning_rate": 4.328028916222859e-10, "loss": 1.0974, "step": 7309 }, { "epoch": 0.9909848844302853, "grad_norm": 7.088787163176644, "learning_rate": 4.199807140700118e-10, "loss": 1.1125, "step": 7310 }, { "epoch": 0.9911204500779502, "grad_norm": 4.949079280247408, "learning_rate": 4.073512976342508e-10, "loss": 1.0855, "step": 7311 }, { "epoch": 0.9912560157256152, "grad_norm": 5.56264577506791, "learning_rate": 3.9491464475049916e-10, "loss": 1.1142, "step": 7312 }, { "epoch": 0.99139158137328, "grad_norm": 4.967909153689259, "learning_rate": 3.826707578170607e-10, "loss": 1.0971, "step": 7313 }, { "epoch": 0.9915271470209449, "grad_norm": 4.581804266028414, "learning_rate": 3.7061963919504667e-10, "loss": 1.1121, "step": 7314 }, { "epoch": 0.9916627126686097, "grad_norm": 5.62021568451383, "learning_rate": 3.5876129120837596e-10, "loss": 1.1116, "step": 7315 }, { "epoch": 0.9917982783162747, "grad_norm": 5.184235733869374, "learning_rate": 3.470957161439969e-10, "loss": 1.0893, "step": 7316 }, { "epoch": 0.9919338439639396, "grad_norm": 4.6190106193282565, "learning_rate": 3.3562291625133245e-10, "loss": 1.1181, "step": 7317 }, { "epoch": 0.9920694096116044, "grad_norm": 9.49111935839781, "learning_rate": 3.24342893742946e-10, "loss": 1.0678, "step": 7318 }, { "epoch": 0.9922049752592693, "grad_norm": 4.020357541676827, "learning_rate": 3.1325565079409755e-10, "loss": 1.0928, "step": 7319 }, { "epoch": 0.9923405409069341, "grad_norm": 5.5146398694691205, "learning_rate": 3.023611895428546e-10, "loss": 1.0585, "step": 7320 }, { "epoch": 0.9924761065545991, "grad_norm": 5.005447943525821, "learning_rate": 2.9165951209020325e-10, "loss": 1.0847, "step": 7321 }, { "epoch": 0.992611672202264, "grad_norm": 6.005868284518075, "learning_rate": 2.8115062049971493e-10, "loss": 1.0627, "step": 7322 }, { "epoch": 0.9927472378499288, "grad_norm": 4.954239983635507, "learning_rate": 2.7083451679799084e-10, "loss": 1.0593, "step": 7323 }, { "epoch": 0.9928828034975937, "grad_norm": 4.339810403127429, "learning_rate": 2.6071120297443963e-10, "loss": 1.1152, "step": 7324 }, { "epoch": 0.9930183691452585, "grad_norm": 5.24399891292011, "learning_rate": 2.507806809813884e-10, "loss": 1.0744, "step": 7325 }, { "epoch": 0.9931539347929235, "grad_norm": 4.335189845667627, "learning_rate": 2.410429527336388e-10, "loss": 1.0992, "step": 7326 }, { "epoch": 0.9932895004405884, "grad_norm": 3.7180376596466775, "learning_rate": 2.3149802010913322e-10, "loss": 1.1075, "step": 7327 }, { "epoch": 0.9934250660882532, "grad_norm": 4.976369204928567, "learning_rate": 2.221458849486213e-10, "loss": 1.1224, "step": 7328 }, { "epoch": 0.9935606317359181, "grad_norm": 5.309769767876014, "learning_rate": 2.1298654905543834e-10, "loss": 1.0799, "step": 7329 }, { "epoch": 0.993696197383583, "grad_norm": 4.663162823011079, "learning_rate": 2.0402001419594917e-10, "loss": 1.0887, "step": 7330 }, { "epoch": 0.9938317630312479, "grad_norm": 4.72109192834578, "learning_rate": 1.9524628209943718e-10, "loss": 1.1315, "step": 7331 }, { "epoch": 0.9939673286789128, "grad_norm": 5.517319623722868, "learning_rate": 1.8666535445754917e-10, "loss": 1.1023, "step": 7332 }, { "epoch": 0.9941028943265776, "grad_norm": 7.590972250014197, "learning_rate": 1.7827723292518358e-10, "loss": 1.0881, "step": 7333 }, { "epoch": 0.9942384599742425, "grad_norm": 6.454141713174682, "learning_rate": 1.7008191912004645e-10, "loss": 1.0972, "step": 7334 }, { "epoch": 0.9943740256219074, "grad_norm": 3.5555101468706307, "learning_rate": 1.6207941462242912e-10, "loss": 1.0921, "step": 7335 }, { "epoch": 0.9945095912695723, "grad_norm": 3.7250338712816333, "learning_rate": 1.5426972097543068e-10, "loss": 1.0764, "step": 7336 }, { "epoch": 0.9946451569172372, "grad_norm": 43.57779612268436, "learning_rate": 1.4665283968529062e-10, "loss": 1.0417, "step": 7337 }, { "epoch": 0.994780722564902, "grad_norm": 4.673221516048316, "learning_rate": 1.3922877222083407e-10, "loss": 1.1082, "step": 7338 }, { "epoch": 0.994916288212567, "grad_norm": 6.255006972626996, "learning_rate": 1.3199752001369359e-10, "loss": 1.1231, "step": 7339 }, { "epoch": 0.9950518538602319, "grad_norm": 8.058655869894073, "learning_rate": 1.2495908445830928e-10, "loss": 1.0601, "step": 7340 }, { "epoch": 0.9951874195078967, "grad_norm": 6.019529618861766, "learning_rate": 1.1811346691203982e-10, "loss": 1.0659, "step": 7341 }, { "epoch": 0.9953229851555616, "grad_norm": 5.719403502457392, "learning_rate": 1.1146066869494042e-10, "loss": 1.0992, "step": 7342 }, { "epoch": 0.9954585508032264, "grad_norm": 4.485671213220754, "learning_rate": 1.0500069109009579e-10, "loss": 1.1083, "step": 7343 }, { "epoch": 0.9955941164508914, "grad_norm": 5.098113038548876, "learning_rate": 9.873353534317619e-11, "loss": 1.0688, "step": 7344 }, { "epoch": 0.9957296820985563, "grad_norm": 3.991237892590682, "learning_rate": 9.265920266265936e-11, "loss": 1.092, "step": 7345 }, { "epoch": 0.9958652477462211, "grad_norm": 6.590459617280725, "learning_rate": 8.677769422005266e-11, "loss": 1.1132, "step": 7346 }, { "epoch": 0.996000813393886, "grad_norm": 4.562626457438091, "learning_rate": 8.108901114955991e-11, "loss": 1.0924, "step": 7347 }, { "epoch": 0.9961363790415508, "grad_norm": 5.041403364998656, "learning_rate": 7.559315454819249e-11, "loss": 1.0711, "step": 7348 }, { "epoch": 0.9962719446892158, "grad_norm": 9.834989166320119, "learning_rate": 7.029012547576929e-11, "loss": 1.1037, "step": 7349 }, { "epoch": 0.9964075103368807, "grad_norm": 22.63675256439717, "learning_rate": 6.517992495491676e-11, "loss": 1.1103, "step": 7350 }, { "epoch": 0.9965430759845455, "grad_norm": 9.593663979689707, "learning_rate": 6.026255397106884e-11, "loss": 1.0973, "step": 7351 }, { "epoch": 0.9966786416322104, "grad_norm": 5.029323712078444, "learning_rate": 5.553801347257803e-11, "loss": 1.1101, "step": 7352 }, { "epoch": 0.9968142072798752, "grad_norm": 5.634119346268222, "learning_rate": 5.1006304370493355e-11, "loss": 1.0842, "step": 7353 }, { "epoch": 0.9969497729275402, "grad_norm": 9.91063398089406, "learning_rate": 4.6667427538782386e-11, "loss": 1.0605, "step": 7354 }, { "epoch": 0.9970853385752051, "grad_norm": 4.852382068089261, "learning_rate": 4.252138381399817e-11, "loss": 1.0961, "step": 7355 }, { "epoch": 0.9972209042228699, "grad_norm": 6.866342541632946, "learning_rate": 3.856817399594536e-11, "loss": 1.0981, "step": 7356 }, { "epoch": 0.9973564698705348, "grad_norm": 6.106934753664064, "learning_rate": 3.4807798846681055e-11, "loss": 1.1166, "step": 7357 }, { "epoch": 0.9974920355181996, "grad_norm": 4.6914263050054785, "learning_rate": 3.124025909151395e-11, "loss": 1.1093, "step": 7358 }, { "epoch": 0.9976276011658646, "grad_norm": 6.075315149583405, "learning_rate": 2.7865555418338238e-11, "loss": 1.1509, "step": 7359 }, { "epoch": 0.9977631668135295, "grad_norm": 4.102790085309159, "learning_rate": 2.4683688477966647e-11, "loss": 1.1045, "step": 7360 }, { "epoch": 0.9978987324611943, "grad_norm": 6.103459858766425, "learning_rate": 2.1694658884130468e-11, "loss": 1.1148, "step": 7361 }, { "epoch": 0.9980342981088592, "grad_norm": 5.138834879006109, "learning_rate": 1.8898467213146473e-11, "loss": 1.1085, "step": 7362 }, { "epoch": 0.9981698637565241, "grad_norm": 5.2740169111134225, "learning_rate": 1.6295114004138965e-11, "loss": 1.0991, "step": 7363 }, { "epoch": 0.998305429404189, "grad_norm": 4.79903045886579, "learning_rate": 1.3884599759261818e-11, "loss": 1.0891, "step": 7364 }, { "epoch": 0.9984409950518539, "grad_norm": 5.139239120993949, "learning_rate": 1.1666924943254386e-11, "loss": 1.1052, "step": 7365 }, { "epoch": 0.9985765606995187, "grad_norm": 4.279013865242631, "learning_rate": 9.642089983885604e-12, "loss": 1.1095, "step": 7366 }, { "epoch": 0.9987121263471836, "grad_norm": 40.431018277187725, "learning_rate": 7.810095271620908e-12, "loss": 1.0699, "step": 7367 }, { "epoch": 0.9988476919948485, "grad_norm": 5.294595924507272, "learning_rate": 6.170941159733267e-12, "loss": 1.101, "step": 7368 }, { "epoch": 0.9989832576425134, "grad_norm": 5.83121807229278, "learning_rate": 4.724627964303174e-12, "loss": 1.0871, "step": 7369 }, { "epoch": 0.9991188232901783, "grad_norm": 3.9639726745893817, "learning_rate": 3.4711559642186527e-12, "loss": 1.0795, "step": 7370 }, { "epoch": 0.9992543889378431, "grad_norm": 9.225585843340703, "learning_rate": 2.4105254012862784e-12, "loss": 1.0795, "step": 7371 }, { "epoch": 0.9993899545855081, "grad_norm": 4.807983155386636, "learning_rate": 1.5427364800091325e-12, "loss": 1.1144, "step": 7372 }, { "epoch": 0.9995255202331729, "grad_norm": 18.889312291586773, "learning_rate": 8.67789367586802e-13, "loss": 1.0757, "step": 7373 }, { "epoch": 0.9996610858808378, "grad_norm": 3.7838995515773286, "learning_rate": 3.856841943594702e-13, "loss": 1.109, "step": 7374 }, { "epoch": 0.9997966515285027, "grad_norm": 4.343433279669052, "learning_rate": 9.642105325280425e-14, "loss": 1.0564, "step": 7375 }, { "epoch": 0.9999322171761675, "grad_norm": 4.9217634399587, "learning_rate": 0.0, "loss": 1.1175, "step": 7376 }, { "epoch": 0.9999322171761675, "step": 7376, "total_flos": 3.1045960116627046e+17, "train_loss": 1.1601818115844384, "train_runtime": 64981.4666, "train_samples_per_second": 10.898, "train_steps_per_second": 0.114 } ], "logging_steps": 1.0, "max_steps": 7376, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.1045960116627046e+17, "train_batch_size": 6, "trial_name": null, "trial_params": null }