{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.992769342010122, "global_step": 6910, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.894356005788712e-07, "loss": 5.3281, "step": 1 }, { "epoch": 0.0, "learning_rate": 5.788712011577424e-07, "loss": 5.5625, "step": 2 }, { "epoch": 0.0, "learning_rate": 8.683068017366137e-07, "loss": 5.5938, "step": 3 }, { "epoch": 0.01, "learning_rate": 1.1577424023154849e-06, "loss": 5.625, "step": 4 }, { "epoch": 0.01, "learning_rate": 1.447178002894356e-06, "loss": 5.7031, "step": 5 }, { "epoch": 0.01, "learning_rate": 1.7366136034732275e-06, "loss": 5.7812, "step": 6 }, { "epoch": 0.01, "learning_rate": 2.0260492040520984e-06, "loss": 6.6719, "step": 7 }, { "epoch": 0.01, "learning_rate": 2.3154848046309697e-06, "loss": 6.125, "step": 8 }, { "epoch": 0.01, "learning_rate": 2.604920405209841e-06, "loss": 5.3438, "step": 9 }, { "epoch": 0.01, "learning_rate": 2.894356005788712e-06, "loss": 5.0938, "step": 10 }, { "epoch": 0.02, "learning_rate": 3.1837916063675832e-06, "loss": 5.0625, "step": 11 }, { "epoch": 0.02, "learning_rate": 3.473227206946455e-06, "loss": 5.1875, "step": 12 }, { "epoch": 0.02, "learning_rate": 3.762662807525326e-06, "loss": 5.7656, "step": 13 }, { "epoch": 0.02, "learning_rate": 4.052098408104197e-06, "loss": 5.5156, "step": 14 }, { "epoch": 0.02, "learning_rate": 4.341534008683068e-06, "loss": 5.1719, "step": 15 }, { "epoch": 0.02, "learning_rate": 4.630969609261939e-06, "loss": 4.6719, "step": 16 }, { "epoch": 0.02, "learning_rate": 4.920405209840811e-06, "loss": 4.2969, "step": 17 }, { "epoch": 0.03, "learning_rate": 5.209840810419682e-06, "loss": 4.3594, "step": 18 }, { "epoch": 0.03, "learning_rate": 5.499276410998553e-06, "loss": 4.4062, "step": 19 }, { "epoch": 0.03, "learning_rate": 5.788712011577424e-06, "loss": 3.6875, "step": 20 }, { "epoch": 0.03, "learning_rate": 6.078147612156296e-06, "loss": 3.6172, "step": 21 }, { "epoch": 0.03, "learning_rate": 6.3675832127351665e-06, "loss": 3.4297, "step": 22 }, { "epoch": 0.03, "learning_rate": 6.657018813314038e-06, "loss": 2.3203, "step": 23 }, { "epoch": 0.03, "learning_rate": 6.94645441389291e-06, "loss": 2.5, "step": 24 }, { "epoch": 0.04, "learning_rate": 7.2358900144717804e-06, "loss": 3.5859, "step": 25 }, { "epoch": 0.04, "learning_rate": 7.525325615050652e-06, "loss": 2.9141, "step": 26 }, { "epoch": 0.04, "learning_rate": 7.814761215629521e-06, "loss": 2.2188, "step": 27 }, { "epoch": 0.04, "learning_rate": 8.104196816208394e-06, "loss": 3.4844, "step": 28 }, { "epoch": 0.04, "learning_rate": 8.393632416787266e-06, "loss": 2.2109, "step": 29 }, { "epoch": 0.04, "learning_rate": 8.683068017366136e-06, "loss": 1.9023, "step": 30 }, { "epoch": 0.04, "learning_rate": 8.972503617945008e-06, "loss": 2.2031, "step": 31 }, { "epoch": 0.05, "learning_rate": 9.261939218523879e-06, "loss": 2.3516, "step": 32 }, { "epoch": 0.05, "learning_rate": 9.55137481910275e-06, "loss": 2.8203, "step": 33 }, { "epoch": 0.05, "learning_rate": 9.840810419681621e-06, "loss": 1.7383, "step": 34 }, { "epoch": 0.05, "learning_rate": 1.0130246020260492e-05, "loss": 2.3672, "step": 35 }, { "epoch": 0.05, "learning_rate": 1.0419681620839364e-05, "loss": 3.1172, "step": 36 }, { "epoch": 0.05, "learning_rate": 1.0709117221418236e-05, "loss": 1.5898, "step": 37 }, { "epoch": 0.05, "learning_rate": 1.0998552821997107e-05, "loss": 2.5781, "step": 38 }, { "epoch": 0.06, "learning_rate": 1.1287988422575977e-05, "loss": 1.1875, "step": 39 }, { "epoch": 0.06, "learning_rate": 1.1577424023154848e-05, "loss": 0.9941, "step": 40 }, { "epoch": 0.06, "learning_rate": 1.186685962373372e-05, "loss": 1.5625, "step": 41 }, { "epoch": 0.06, "learning_rate": 1.2156295224312592e-05, "loss": 2.1562, "step": 42 }, { "epoch": 0.06, "learning_rate": 1.2445730824891462e-05, "loss": 1.4414, "step": 43 }, { "epoch": 0.06, "learning_rate": 1.2735166425470333e-05, "loss": 0.6738, "step": 44 }, { "epoch": 0.07, "learning_rate": 1.3024602026049203e-05, "loss": 0.3584, "step": 45 }, { "epoch": 0.07, "learning_rate": 1.3314037626628076e-05, "loss": 0.3916, "step": 46 }, { "epoch": 0.07, "learning_rate": 1.3603473227206948e-05, "loss": 1.0889, "step": 47 }, { "epoch": 0.07, "learning_rate": 1.389290882778582e-05, "loss": 0.21, "step": 48 }, { "epoch": 0.07, "learning_rate": 1.4182344428364689e-05, "loss": 1.3892, "step": 49 }, { "epoch": 0.07, "learning_rate": 1.4471780028943561e-05, "loss": 0.5454, "step": 50 }, { "epoch": 0.07, "learning_rate": 1.4761215629522431e-05, "loss": 1.0078, "step": 51 }, { "epoch": 0.08, "learning_rate": 1.5050651230101304e-05, "loss": 1.1934, "step": 52 }, { "epoch": 0.08, "learning_rate": 1.5340086830680174e-05, "loss": 0.1655, "step": 53 }, { "epoch": 0.08, "learning_rate": 1.5629522431259043e-05, "loss": 0.8237, "step": 54 }, { "epoch": 0.08, "learning_rate": 1.5918958031837915e-05, "loss": 0.1689, "step": 55 }, { "epoch": 0.08, "learning_rate": 1.6208393632416787e-05, "loss": 0.9995, "step": 56 }, { "epoch": 0.08, "learning_rate": 1.649782923299566e-05, "loss": 0.8347, "step": 57 }, { "epoch": 0.08, "learning_rate": 1.678726483357453e-05, "loss": 1.3672, "step": 58 }, { "epoch": 0.09, "learning_rate": 1.70767004341534e-05, "loss": 1.1719, "step": 59 }, { "epoch": 0.09, "learning_rate": 1.7366136034732272e-05, "loss": 1.0591, "step": 60 }, { "epoch": 0.09, "learning_rate": 1.7655571635311145e-05, "loss": 0.5669, "step": 61 }, { "epoch": 0.09, "learning_rate": 1.7945007235890017e-05, "loss": 0.8821, "step": 62 }, { "epoch": 0.09, "learning_rate": 1.823444283646889e-05, "loss": 1.2715, "step": 63 }, { "epoch": 0.09, "learning_rate": 1.8523878437047758e-05, "loss": 1.3438, "step": 64 }, { "epoch": 0.09, "learning_rate": 1.881331403762663e-05, "loss": 0.7327, "step": 65 }, { "epoch": 0.1, "learning_rate": 1.91027496382055e-05, "loss": 0.0759, "step": 66 }, { "epoch": 0.1, "learning_rate": 1.939218523878437e-05, "loss": 0.468, "step": 67 }, { "epoch": 0.1, "learning_rate": 1.9681620839363243e-05, "loss": 0.5835, "step": 68 }, { "epoch": 0.1, "learning_rate": 1.9971056439942115e-05, "loss": 1.5748, "step": 69 }, { "epoch": 0.1, "learning_rate": 2.0260492040520984e-05, "loss": 0.92, "step": 70 }, { "epoch": 0.1, "learning_rate": 2.0549927641099856e-05, "loss": 1.1442, "step": 71 }, { "epoch": 0.1, "learning_rate": 2.0839363241678728e-05, "loss": 0.0486, "step": 72 }, { "epoch": 0.11, "learning_rate": 2.11287988422576e-05, "loss": 1.2773, "step": 73 }, { "epoch": 0.11, "learning_rate": 2.1418234442836473e-05, "loss": 0.4969, "step": 74 }, { "epoch": 0.11, "learning_rate": 2.170767004341534e-05, "loss": 0.0969, "step": 75 }, { "epoch": 0.11, "learning_rate": 2.1997105643994213e-05, "loss": 0.1355, "step": 76 }, { "epoch": 0.11, "learning_rate": 2.2286541244573082e-05, "loss": 0.1102, "step": 77 }, { "epoch": 0.11, "learning_rate": 2.2575976845151954e-05, "loss": 0.4373, "step": 78 }, { "epoch": 0.11, "learning_rate": 2.2865412445730827e-05, "loss": 0.6619, "step": 79 }, { "epoch": 0.12, "learning_rate": 2.3154848046309695e-05, "loss": 0.5676, "step": 80 }, { "epoch": 0.12, "learning_rate": 2.3444283646888568e-05, "loss": 0.6392, "step": 81 }, { "epoch": 0.12, "learning_rate": 2.373371924746744e-05, "loss": 0.4504, "step": 82 }, { "epoch": 0.12, "learning_rate": 2.4023154848046312e-05, "loss": 0.0518, "step": 83 }, { "epoch": 0.12, "learning_rate": 2.4312590448625184e-05, "loss": 0.1421, "step": 84 }, { "epoch": 0.12, "learning_rate": 2.4602026049204053e-05, "loss": 0.0485, "step": 85 }, { "epoch": 0.12, "learning_rate": 2.4891461649782925e-05, "loss": 0.7021, "step": 86 }, { "epoch": 0.13, "learning_rate": 2.5180897250361797e-05, "loss": 0.7388, "step": 87 }, { "epoch": 0.13, "learning_rate": 2.5470332850940666e-05, "loss": 0.0339, "step": 88 }, { "epoch": 0.13, "learning_rate": 2.5759768451519538e-05, "loss": 0.2732, "step": 89 }, { "epoch": 0.13, "learning_rate": 2.6049204052098407e-05, "loss": 0.2972, "step": 90 }, { "epoch": 0.13, "learning_rate": 2.6338639652677282e-05, "loss": 0.3364, "step": 91 }, { "epoch": 0.13, "learning_rate": 2.662807525325615e-05, "loss": 1.4609, "step": 92 }, { "epoch": 0.13, "learning_rate": 2.691751085383502e-05, "loss": 0.0658, "step": 93 }, { "epoch": 0.14, "learning_rate": 2.7206946454413896e-05, "loss": 0.3535, "step": 94 }, { "epoch": 0.14, "learning_rate": 2.7496382054992764e-05, "loss": 1.1055, "step": 95 }, { "epoch": 0.14, "learning_rate": 2.778581765557164e-05, "loss": 1.4414, "step": 96 }, { "epoch": 0.14, "learning_rate": 2.807525325615051e-05, "loss": 1.875, "step": 97 }, { "epoch": 0.14, "learning_rate": 2.8364688856729377e-05, "loss": 0.8364, "step": 98 }, { "epoch": 0.14, "learning_rate": 2.865412445730825e-05, "loss": 0.4307, "step": 99 }, { "epoch": 0.14, "learning_rate": 2.8943560057887122e-05, "loss": 0.7852, "step": 100 }, { "epoch": 0.15, "learning_rate": 2.9232995658465994e-05, "loss": 0.2856, "step": 101 }, { "epoch": 0.15, "learning_rate": 2.9522431259044863e-05, "loss": 0.4302, "step": 102 }, { "epoch": 0.15, "learning_rate": 2.981186685962373e-05, "loss": 1.0903, "step": 103 }, { "epoch": 0.15, "learning_rate": 3.0101302460202607e-05, "loss": 1.3477, "step": 104 }, { "epoch": 0.15, "learning_rate": 3.0390738060781476e-05, "loss": 0.5117, "step": 105 }, { "epoch": 0.15, "learning_rate": 3.068017366136035e-05, "loss": 0.5107, "step": 106 }, { "epoch": 0.15, "learning_rate": 3.096960926193922e-05, "loss": 0.0619, "step": 107 }, { "epoch": 0.16, "learning_rate": 3.1259044862518086e-05, "loss": 0.7822, "step": 108 }, { "epoch": 0.16, "learning_rate": 3.1548480463096964e-05, "loss": 1.0176, "step": 109 }, { "epoch": 0.16, "learning_rate": 3.183791606367583e-05, "loss": 0.0381, "step": 110 }, { "epoch": 0.16, "learning_rate": 3.212735166425471e-05, "loss": 0.0771, "step": 111 }, { "epoch": 0.16, "learning_rate": 3.2416787264833574e-05, "loss": 0.6042, "step": 112 }, { "epoch": 0.16, "learning_rate": 3.2706222865412446e-05, "loss": 0.0258, "step": 113 }, { "epoch": 0.16, "learning_rate": 3.299565846599132e-05, "loss": 0.6804, "step": 114 }, { "epoch": 0.17, "learning_rate": 3.328509406657019e-05, "loss": 0.0977, "step": 115 }, { "epoch": 0.17, "learning_rate": 3.357452966714906e-05, "loss": 0.0099, "step": 116 }, { "epoch": 0.17, "learning_rate": 3.3863965267727935e-05, "loss": 0.4032, "step": 117 }, { "epoch": 0.17, "learning_rate": 3.41534008683068e-05, "loss": 0.6332, "step": 118 }, { "epoch": 0.17, "learning_rate": 3.444283646888567e-05, "loss": 0.8965, "step": 119 }, { "epoch": 0.17, "learning_rate": 3.4732272069464545e-05, "loss": 0.4485, "step": 120 }, { "epoch": 0.17, "learning_rate": 3.502170767004342e-05, "loss": 0.0394, "step": 121 }, { "epoch": 0.18, "learning_rate": 3.531114327062229e-05, "loss": 0.0372, "step": 122 }, { "epoch": 0.18, "learning_rate": 3.5600578871201154e-05, "loss": 0.3734, "step": 123 }, { "epoch": 0.18, "learning_rate": 3.5890014471780033e-05, "loss": 0.8356, "step": 124 }, { "epoch": 0.18, "learning_rate": 3.61794500723589e-05, "loss": 0.0643, "step": 125 }, { "epoch": 0.18, "learning_rate": 3.646888567293778e-05, "loss": 0.0152, "step": 126 }, { "epoch": 0.18, "learning_rate": 3.675832127351664e-05, "loss": 0.2809, "step": 127 }, { "epoch": 0.19, "learning_rate": 3.7047756874095515e-05, "loss": 0.2233, "step": 128 }, { "epoch": 0.19, "learning_rate": 3.733719247467439e-05, "loss": 0.5156, "step": 129 }, { "epoch": 0.19, "learning_rate": 3.762662807525326e-05, "loss": 0.6548, "step": 130 }, { "epoch": 0.19, "learning_rate": 3.791606367583213e-05, "loss": 1.1465, "step": 131 }, { "epoch": 0.19, "learning_rate": 3.8205499276411e-05, "loss": 0.2869, "step": 132 }, { "epoch": 0.19, "learning_rate": 3.8494934876989876e-05, "loss": 0.045, "step": 133 }, { "epoch": 0.19, "learning_rate": 3.878437047756874e-05, "loss": 0.7078, "step": 134 }, { "epoch": 0.2, "learning_rate": 3.9073806078147614e-05, "loss": 0.0347, "step": 135 }, { "epoch": 0.2, "learning_rate": 3.9363241678726486e-05, "loss": 0.0981, "step": 136 }, { "epoch": 0.2, "learning_rate": 3.965267727930536e-05, "loss": 0.2703, "step": 137 }, { "epoch": 0.2, "learning_rate": 3.994211287988423e-05, "loss": 0.2527, "step": 138 }, { "epoch": 0.2, "learning_rate": 4.02315484804631e-05, "loss": 0.0697, "step": 139 }, { "epoch": 0.2, "learning_rate": 4.052098408104197e-05, "loss": 0.0642, "step": 140 }, { "epoch": 0.2, "learning_rate": 4.081041968162084e-05, "loss": 0.7895, "step": 141 }, { "epoch": 0.21, "learning_rate": 4.109985528219971e-05, "loss": 0.4514, "step": 142 }, { "epoch": 0.21, "learning_rate": 4.1389290882778584e-05, "loss": 0.8613, "step": 143 }, { "epoch": 0.21, "learning_rate": 4.1678726483357456e-05, "loss": 0.467, "step": 144 }, { "epoch": 0.21, "learning_rate": 4.196816208393632e-05, "loss": 0.0636, "step": 145 }, { "epoch": 0.21, "learning_rate": 4.22575976845152e-05, "loss": 0.5093, "step": 146 }, { "epoch": 0.21, "learning_rate": 4.2547033285094066e-05, "loss": 0.8398, "step": 147 }, { "epoch": 0.21, "learning_rate": 4.2836468885672945e-05, "loss": 0.262, "step": 148 }, { "epoch": 0.22, "learning_rate": 4.312590448625181e-05, "loss": 0.7925, "step": 149 }, { "epoch": 0.22, "learning_rate": 4.341534008683068e-05, "loss": 0.0974, "step": 150 }, { "epoch": 0.22, "learning_rate": 4.3704775687409555e-05, "loss": 0.2759, "step": 151 }, { "epoch": 0.22, "learning_rate": 4.399421128798843e-05, "loss": 0.3937, "step": 152 }, { "epoch": 0.22, "learning_rate": 4.42836468885673e-05, "loss": 0.0422, "step": 153 }, { "epoch": 0.22, "learning_rate": 4.4573082489146165e-05, "loss": 0.8379, "step": 154 }, { "epoch": 0.22, "learning_rate": 4.486251808972504e-05, "loss": 0.4929, "step": 155 }, { "epoch": 0.23, "learning_rate": 4.515195369030391e-05, "loss": 0.0918, "step": 156 }, { "epoch": 0.23, "learning_rate": 4.544138929088278e-05, "loss": 0.0916, "step": 157 }, { "epoch": 0.23, "learning_rate": 4.573082489146165e-05, "loss": 0.0862, "step": 158 }, { "epoch": 0.23, "learning_rate": 4.6020260492040525e-05, "loss": 0.1279, "step": 159 }, { "epoch": 0.23, "learning_rate": 4.630969609261939e-05, "loss": 0.9927, "step": 160 }, { "epoch": 0.23, "learning_rate": 4.659913169319827e-05, "loss": 0.3762, "step": 161 }, { "epoch": 0.23, "learning_rate": 4.6888567293777135e-05, "loss": 0.3406, "step": 162 }, { "epoch": 0.24, "learning_rate": 4.717800289435601e-05, "loss": 0.926, "step": 163 }, { "epoch": 0.24, "learning_rate": 4.746743849493488e-05, "loss": 0.0336, "step": 164 }, { "epoch": 0.24, "learning_rate": 4.775687409551375e-05, "loss": 0.9177, "step": 165 }, { "epoch": 0.24, "learning_rate": 4.8046309696092624e-05, "loss": 0.1737, "step": 166 }, { "epoch": 0.24, "learning_rate": 4.833574529667149e-05, "loss": 1.1484, "step": 167 }, { "epoch": 0.24, "learning_rate": 4.862518089725037e-05, "loss": 0.7409, "step": 168 }, { "epoch": 0.24, "learning_rate": 4.8914616497829233e-05, "loss": 0.9434, "step": 169 }, { "epoch": 0.25, "learning_rate": 4.9204052098408106e-05, "loss": 0.9844, "step": 170 }, { "epoch": 0.25, "learning_rate": 4.949348769898698e-05, "loss": 0.0173, "step": 171 }, { "epoch": 0.25, "learning_rate": 4.978292329956585e-05, "loss": 0.5267, "step": 172 }, { "epoch": 0.25, "learning_rate": 5.007235890014472e-05, "loss": 0.3511, "step": 173 }, { "epoch": 0.25, "learning_rate": 5.0361794500723594e-05, "loss": 0.113, "step": 174 }, { "epoch": 0.25, "learning_rate": 5.065123010130246e-05, "loss": 1.1113, "step": 175 }, { "epoch": 0.25, "learning_rate": 5.094066570188133e-05, "loss": 0.9424, "step": 176 }, { "epoch": 0.26, "learning_rate": 5.123010130246021e-05, "loss": 0.2178, "step": 177 }, { "epoch": 0.26, "learning_rate": 5.1519536903039076e-05, "loss": 0.0814, "step": 178 }, { "epoch": 0.26, "learning_rate": 5.180897250361795e-05, "loss": 0.104, "step": 179 }, { "epoch": 0.26, "learning_rate": 5.2098408104196814e-05, "loss": 0.585, "step": 180 }, { "epoch": 0.26, "learning_rate": 5.2387843704775686e-05, "loss": 0.6309, "step": 181 }, { "epoch": 0.26, "learning_rate": 5.2677279305354565e-05, "loss": 0.1016, "step": 182 }, { "epoch": 0.26, "learning_rate": 5.296671490593344e-05, "loss": 0.0828, "step": 183 }, { "epoch": 0.27, "learning_rate": 5.32561505065123e-05, "loss": 0.0729, "step": 184 }, { "epoch": 0.27, "learning_rate": 5.3545586107091175e-05, "loss": 0.0723, "step": 185 }, { "epoch": 0.27, "learning_rate": 5.383502170767004e-05, "loss": 0.655, "step": 186 }, { "epoch": 0.27, "learning_rate": 5.412445730824892e-05, "loss": 0.0266, "step": 187 }, { "epoch": 0.27, "learning_rate": 5.441389290882779e-05, "loss": 0.3758, "step": 188 }, { "epoch": 0.27, "learning_rate": 5.4703328509406656e-05, "loss": 0.8789, "step": 189 }, { "epoch": 0.27, "learning_rate": 5.499276410998553e-05, "loss": 0.9509, "step": 190 }, { "epoch": 0.28, "learning_rate": 5.5282199710564394e-05, "loss": 0.4947, "step": 191 }, { "epoch": 0.28, "learning_rate": 5.557163531114328e-05, "loss": 0.4727, "step": 192 }, { "epoch": 0.28, "learning_rate": 5.5861070911722145e-05, "loss": 0.4902, "step": 193 }, { "epoch": 0.28, "learning_rate": 5.615050651230102e-05, "loss": 0.1271, "step": 194 }, { "epoch": 0.28, "learning_rate": 5.643994211287988e-05, "loss": 0.5073, "step": 195 }, { "epoch": 0.28, "learning_rate": 5.6729377713458755e-05, "loss": 0.6868, "step": 196 }, { "epoch": 0.28, "learning_rate": 5.7018813314037634e-05, "loss": 0.0614, "step": 197 }, { "epoch": 0.29, "learning_rate": 5.73082489146165e-05, "loss": 0.0624, "step": 198 }, { "epoch": 0.29, "learning_rate": 5.759768451519537e-05, "loss": 0.0533, "step": 199 }, { "epoch": 0.29, "learning_rate": 5.7887120115774244e-05, "loss": 0.6312, "step": 200 }, { "epoch": 0.29, "learning_rate": 5.817655571635311e-05, "loss": 0.6699, "step": 201 }, { "epoch": 0.29, "learning_rate": 5.846599131693199e-05, "loss": 1.2324, "step": 202 }, { "epoch": 0.29, "learning_rate": 5.875542691751086e-05, "loss": 0.1017, "step": 203 }, { "epoch": 0.3, "learning_rate": 5.9044862518089725e-05, "loss": 0.0933, "step": 204 }, { "epoch": 0.3, "learning_rate": 5.93342981186686e-05, "loss": 0.0483, "step": 205 }, { "epoch": 0.3, "learning_rate": 5.962373371924746e-05, "loss": 0.0848, "step": 206 }, { "epoch": 0.3, "learning_rate": 5.991316931982635e-05, "loss": 0.5275, "step": 207 }, { "epoch": 0.3, "learning_rate": 6.0202604920405214e-05, "loss": 0.7949, "step": 208 }, { "epoch": 0.3, "learning_rate": 6.0492040520984086e-05, "loss": 1.2207, "step": 209 }, { "epoch": 0.3, "learning_rate": 6.078147612156295e-05, "loss": 0.582, "step": 210 }, { "epoch": 0.31, "learning_rate": 6.107091172214182e-05, "loss": 0.6777, "step": 211 }, { "epoch": 0.31, "learning_rate": 6.13603473227207e-05, "loss": 0.4133, "step": 212 }, { "epoch": 0.31, "learning_rate": 6.164978292329957e-05, "loss": 0.1567, "step": 213 }, { "epoch": 0.31, "learning_rate": 6.193921852387844e-05, "loss": 0.4307, "step": 214 }, { "epoch": 0.31, "learning_rate": 6.22286541244573e-05, "loss": 0.4292, "step": 215 }, { "epoch": 0.31, "learning_rate": 6.251808972503617e-05, "loss": 0.0686, "step": 216 }, { "epoch": 0.31, "learning_rate": 6.280752532561506e-05, "loss": 0.3237, "step": 217 }, { "epoch": 0.32, "learning_rate": 6.309696092619393e-05, "loss": 0.4565, "step": 218 }, { "epoch": 0.32, "learning_rate": 6.33863965267728e-05, "loss": 0.0735, "step": 219 }, { "epoch": 0.32, "learning_rate": 6.367583212735166e-05, "loss": 0.9447, "step": 220 }, { "epoch": 0.32, "learning_rate": 6.396526772793054e-05, "loss": 1.2578, "step": 221 }, { "epoch": 0.32, "learning_rate": 6.425470332850942e-05, "loss": 0.0218, "step": 222 }, { "epoch": 0.32, "learning_rate": 6.454413892908828e-05, "loss": 0.4785, "step": 223 }, { "epoch": 0.32, "learning_rate": 6.483357452966715e-05, "loss": 0.6276, "step": 224 }, { "epoch": 0.33, "learning_rate": 6.512301013024601e-05, "loss": 0.0713, "step": 225 }, { "epoch": 0.33, "learning_rate": 6.541244573082489e-05, "loss": 0.0536, "step": 226 }, { "epoch": 0.33, "learning_rate": 6.570188133140377e-05, "loss": 0.1152, "step": 227 }, { "epoch": 0.33, "learning_rate": 6.599131693198264e-05, "loss": 0.7061, "step": 228 }, { "epoch": 0.33, "learning_rate": 6.62807525325615e-05, "loss": 0.0188, "step": 229 }, { "epoch": 0.33, "learning_rate": 6.657018813314038e-05, "loss": 0.049, "step": 230 }, { "epoch": 0.33, "learning_rate": 6.685962373371925e-05, "loss": 0.0456, "step": 231 }, { "epoch": 0.34, "learning_rate": 6.714905933429813e-05, "loss": 0.0203, "step": 232 }, { "epoch": 0.34, "learning_rate": 6.743849493487699e-05, "loss": 0.1289, "step": 233 }, { "epoch": 0.34, "learning_rate": 6.772793053545587e-05, "loss": 1.2441, "step": 234 }, { "epoch": 0.34, "learning_rate": 6.801736613603474e-05, "loss": 0.6691, "step": 235 }, { "epoch": 0.34, "learning_rate": 6.83068017366136e-05, "loss": 0.6297, "step": 236 }, { "epoch": 0.34, "learning_rate": 6.859623733719248e-05, "loss": 0.7148, "step": 237 }, { "epoch": 0.34, "learning_rate": 6.888567293777135e-05, "loss": 0.0471, "step": 238 }, { "epoch": 0.35, "learning_rate": 6.917510853835022e-05, "loss": 0.026, "step": 239 }, { "epoch": 0.35, "learning_rate": 6.946454413892909e-05, "loss": 0.3683, "step": 240 }, { "epoch": 0.35, "learning_rate": 6.975397973950795e-05, "loss": 0.0953, "step": 241 }, { "epoch": 0.35, "learning_rate": 7.004341534008683e-05, "loss": 0.4937, "step": 242 }, { "epoch": 0.35, "learning_rate": 7.033285094066571e-05, "loss": 0.0798, "step": 243 }, { "epoch": 0.35, "learning_rate": 7.062228654124458e-05, "loss": 0.042, "step": 244 }, { "epoch": 0.35, "learning_rate": 7.091172214182344e-05, "loss": 0.0323, "step": 245 }, { "epoch": 0.36, "learning_rate": 7.120115774240231e-05, "loss": 0.7539, "step": 246 }, { "epoch": 0.36, "learning_rate": 7.149059334298119e-05, "loss": 0.0509, "step": 247 }, { "epoch": 0.36, "learning_rate": 7.178002894356007e-05, "loss": 0.2661, "step": 248 }, { "epoch": 0.36, "learning_rate": 7.206946454413893e-05, "loss": 0.0911, "step": 249 }, { "epoch": 0.36, "learning_rate": 7.23589001447178e-05, "loss": 0.0252, "step": 250 }, { "epoch": 0.36, "learning_rate": 7.264833574529666e-05, "loss": 0.8215, "step": 251 }, { "epoch": 0.36, "learning_rate": 7.293777134587556e-05, "loss": 0.6487, "step": 252 }, { "epoch": 0.37, "learning_rate": 7.322720694645442e-05, "loss": 0.2935, "step": 253 }, { "epoch": 0.37, "learning_rate": 7.351664254703329e-05, "loss": 0.083, "step": 254 }, { "epoch": 0.37, "learning_rate": 7.380607814761215e-05, "loss": 1.0977, "step": 255 }, { "epoch": 0.37, "learning_rate": 7.409551374819103e-05, "loss": 0.0552, "step": 256 }, { "epoch": 0.37, "learning_rate": 7.438494934876991e-05, "loss": 0.0669, "step": 257 }, { "epoch": 0.37, "learning_rate": 7.467438494934877e-05, "loss": 1.0615, "step": 258 }, { "epoch": 0.37, "learning_rate": 7.496382054992764e-05, "loss": 0.209, "step": 259 }, { "epoch": 0.38, "learning_rate": 7.525325615050652e-05, "loss": 0.5072, "step": 260 }, { "epoch": 0.38, "learning_rate": 7.55426917510854e-05, "loss": 0.5352, "step": 261 }, { "epoch": 0.38, "learning_rate": 7.583212735166426e-05, "loss": 0.0402, "step": 262 }, { "epoch": 0.38, "learning_rate": 7.612156295224313e-05, "loss": 0.2839, "step": 263 }, { "epoch": 0.38, "learning_rate": 7.6410998552822e-05, "loss": 0.1694, "step": 264 }, { "epoch": 0.38, "learning_rate": 7.670043415340087e-05, "loss": 0.7488, "step": 265 }, { "epoch": 0.38, "learning_rate": 7.698986975397975e-05, "loss": 0.5178, "step": 266 }, { "epoch": 0.39, "learning_rate": 7.727930535455862e-05, "loss": 1.1289, "step": 267 }, { "epoch": 0.39, "learning_rate": 7.756874095513748e-05, "loss": 0.0543, "step": 268 }, { "epoch": 0.39, "learning_rate": 7.785817655571636e-05, "loss": 0.541, "step": 269 }, { "epoch": 0.39, "learning_rate": 7.814761215629523e-05, "loss": 0.8145, "step": 270 }, { "epoch": 0.39, "learning_rate": 7.84370477568741e-05, "loss": 0.5225, "step": 271 }, { "epoch": 0.39, "learning_rate": 7.872648335745297e-05, "loss": 0.0215, "step": 272 }, { "epoch": 0.39, "learning_rate": 7.901591895803184e-05, "loss": 0.6094, "step": 273 }, { "epoch": 0.4, "learning_rate": 7.930535455861072e-05, "loss": 1.0547, "step": 274 }, { "epoch": 0.4, "learning_rate": 7.959479015918958e-05, "loss": 0.6432, "step": 275 }, { "epoch": 0.4, "learning_rate": 7.988422575976846e-05, "loss": 1.4297, "step": 276 }, { "epoch": 0.4, "learning_rate": 8.017366136034733e-05, "loss": 0.438, "step": 277 }, { "epoch": 0.4, "learning_rate": 8.04630969609262e-05, "loss": 0.0575, "step": 278 }, { "epoch": 0.4, "learning_rate": 8.075253256150507e-05, "loss": 0.058, "step": 279 }, { "epoch": 0.4, "learning_rate": 8.104196816208394e-05, "loss": 0.9927, "step": 280 }, { "epoch": 0.41, "learning_rate": 8.133140376266281e-05, "loss": 0.2661, "step": 281 }, { "epoch": 0.41, "learning_rate": 8.162083936324168e-05, "loss": 0.5847, "step": 282 }, { "epoch": 0.41, "learning_rate": 8.191027496382056e-05, "loss": 0.0798, "step": 283 }, { "epoch": 0.41, "learning_rate": 8.219971056439942e-05, "loss": 0.0494, "step": 284 }, { "epoch": 0.41, "learning_rate": 8.248914616497829e-05, "loss": 0.3375, "step": 285 }, { "epoch": 0.41, "learning_rate": 8.277858176555717e-05, "loss": 0.0316, "step": 286 }, { "epoch": 0.42, "learning_rate": 8.306801736613605e-05, "loss": 0.3951, "step": 287 }, { "epoch": 0.42, "learning_rate": 8.335745296671491e-05, "loss": 0.0169, "step": 288 }, { "epoch": 0.42, "learning_rate": 8.364688856729378e-05, "loss": 1.4922, "step": 289 }, { "epoch": 0.42, "learning_rate": 8.393632416787264e-05, "loss": 0.0526, "step": 290 }, { "epoch": 0.42, "learning_rate": 8.422575976845152e-05, "loss": 0.7547, "step": 291 }, { "epoch": 0.42, "learning_rate": 8.45151953690304e-05, "loss": 0.1838, "step": 292 }, { "epoch": 0.42, "learning_rate": 8.480463096960927e-05, "loss": 0.0503, "step": 293 }, { "epoch": 0.43, "learning_rate": 8.509406657018813e-05, "loss": 0.189, "step": 294 }, { "epoch": 0.43, "learning_rate": 8.538350217076701e-05, "loss": 0.502, "step": 295 }, { "epoch": 0.43, "learning_rate": 8.567293777134589e-05, "loss": 0.2156, "step": 296 }, { "epoch": 0.43, "learning_rate": 8.596237337192476e-05, "loss": 0.0355, "step": 297 }, { "epoch": 0.43, "learning_rate": 8.625180897250362e-05, "loss": 0.2061, "step": 298 }, { "epoch": 0.43, "learning_rate": 8.654124457308249e-05, "loss": 0.2424, "step": 299 }, { "epoch": 0.43, "learning_rate": 8.683068017366137e-05, "loss": 0.5928, "step": 300 }, { "epoch": 0.44, "learning_rate": 8.712011577424024e-05, "loss": 0.6719, "step": 301 }, { "epoch": 0.44, "learning_rate": 8.740955137481911e-05, "loss": 0.0627, "step": 302 }, { "epoch": 0.44, "learning_rate": 8.769898697539797e-05, "loss": 0.5321, "step": 303 }, { "epoch": 0.44, "learning_rate": 8.798842257597685e-05, "loss": 0.0397, "step": 304 }, { "epoch": 0.44, "learning_rate": 8.827785817655572e-05, "loss": 1.0449, "step": 305 }, { "epoch": 0.44, "learning_rate": 8.85672937771346e-05, "loss": 0.1439, "step": 306 }, { "epoch": 0.44, "learning_rate": 8.885672937771346e-05, "loss": 0.548, "step": 307 }, { "epoch": 0.45, "learning_rate": 8.914616497829233e-05, "loss": 0.4983, "step": 308 }, { "epoch": 0.45, "learning_rate": 8.943560057887121e-05, "loss": 0.5488, "step": 309 }, { "epoch": 0.45, "learning_rate": 8.972503617945007e-05, "loss": 0.0569, "step": 310 }, { "epoch": 0.45, "learning_rate": 9.001447178002895e-05, "loss": 0.4834, "step": 311 }, { "epoch": 0.45, "learning_rate": 9.030390738060782e-05, "loss": 0.6709, "step": 312 }, { "epoch": 0.45, "learning_rate": 9.05933429811867e-05, "loss": 0.0659, "step": 313 }, { "epoch": 0.45, "learning_rate": 9.088277858176556e-05, "loss": 0.521, "step": 314 }, { "epoch": 0.46, "learning_rate": 9.117221418234443e-05, "loss": 0.0502, "step": 315 }, { "epoch": 0.46, "learning_rate": 9.14616497829233e-05, "loss": 0.3198, "step": 316 }, { "epoch": 0.46, "learning_rate": 9.175108538350217e-05, "loss": 0.0432, "step": 317 }, { "epoch": 0.46, "learning_rate": 9.204052098408105e-05, "loss": 0.1003, "step": 318 }, { "epoch": 0.46, "learning_rate": 9.232995658465992e-05, "loss": 0.5898, "step": 319 }, { "epoch": 0.46, "learning_rate": 9.261939218523878e-05, "loss": 0.0759, "step": 320 }, { "epoch": 0.46, "learning_rate": 9.290882778581766e-05, "loss": 0.1221, "step": 321 }, { "epoch": 0.47, "learning_rate": 9.319826338639654e-05, "loss": 0.0482, "step": 322 }, { "epoch": 0.47, "learning_rate": 9.34876989869754e-05, "loss": 0.8018, "step": 323 }, { "epoch": 0.47, "learning_rate": 9.377713458755427e-05, "loss": 0.4397, "step": 324 }, { "epoch": 0.47, "learning_rate": 9.406657018813314e-05, "loss": 0.1709, "step": 325 }, { "epoch": 0.47, "learning_rate": 9.435600578871201e-05, "loss": 0.094, "step": 326 }, { "epoch": 0.47, "learning_rate": 9.46454413892909e-05, "loss": 0.1226, "step": 327 }, { "epoch": 0.47, "learning_rate": 9.493487698986976e-05, "loss": 1.1562, "step": 328 }, { "epoch": 0.48, "learning_rate": 9.522431259044862e-05, "loss": 0.3064, "step": 329 }, { "epoch": 0.48, "learning_rate": 9.55137481910275e-05, "loss": 0.5471, "step": 330 }, { "epoch": 0.48, "learning_rate": 9.580318379160638e-05, "loss": 0.3022, "step": 331 }, { "epoch": 0.48, "learning_rate": 9.609261939218525e-05, "loss": 0.2545, "step": 332 }, { "epoch": 0.48, "learning_rate": 9.638205499276411e-05, "loss": 0.3213, "step": 333 }, { "epoch": 0.48, "learning_rate": 9.667149059334298e-05, "loss": 0.1443, "step": 334 }, { "epoch": 0.48, "learning_rate": 9.696092619392186e-05, "loss": 0.2532, "step": 335 }, { "epoch": 0.49, "learning_rate": 9.725036179450074e-05, "loss": 0.2549, "step": 336 }, { "epoch": 0.49, "learning_rate": 9.75397973950796e-05, "loss": 0.0463, "step": 337 }, { "epoch": 0.49, "learning_rate": 9.782923299565847e-05, "loss": 0.0223, "step": 338 }, { "epoch": 0.49, "learning_rate": 9.811866859623735e-05, "loss": 0.048, "step": 339 }, { "epoch": 0.49, "learning_rate": 9.840810419681621e-05, "loss": 1.1521, "step": 340 }, { "epoch": 0.49, "learning_rate": 9.869753979739509e-05, "loss": 0.1086, "step": 341 }, { "epoch": 0.49, "learning_rate": 9.898697539797396e-05, "loss": 0.8707, "step": 342 }, { "epoch": 0.5, "learning_rate": 9.927641099855282e-05, "loss": 0.1023, "step": 343 }, { "epoch": 0.5, "learning_rate": 9.95658465991317e-05, "loss": 0.0659, "step": 344 }, { "epoch": 0.5, "learning_rate": 9.985528219971057e-05, "loss": 0.5345, "step": 345 }, { "epoch": 0.5, "learning_rate": 0.00010014471780028944, "loss": 0.0257, "step": 346 }, { "epoch": 0.5, "learning_rate": 0.0001004341534008683, "loss": 0.0636, "step": 347 }, { "epoch": 0.5, "learning_rate": 0.00010072358900144719, "loss": 0.0141, "step": 348 }, { "epoch": 0.5, "learning_rate": 0.00010101302460202607, "loss": 0.0201, "step": 349 }, { "epoch": 0.51, "learning_rate": 0.00010130246020260492, "loss": 0.0502, "step": 350 }, { "epoch": 0.51, "learning_rate": 0.0001015918958031838, "loss": 0.2144, "step": 351 }, { "epoch": 0.51, "learning_rate": 0.00010188133140376266, "loss": 0.3022, "step": 352 }, { "epoch": 0.51, "learning_rate": 0.00010217076700434154, "loss": 0.4375, "step": 353 }, { "epoch": 0.51, "learning_rate": 0.00010246020260492042, "loss": 0.0182, "step": 354 }, { "epoch": 0.51, "learning_rate": 0.00010274963820549927, "loss": 0.0703, "step": 355 }, { "epoch": 0.51, "learning_rate": 0.00010303907380607815, "loss": 0.3344, "step": 356 }, { "epoch": 0.52, "learning_rate": 0.00010332850940665702, "loss": 0.9077, "step": 357 }, { "epoch": 0.52, "learning_rate": 0.0001036179450072359, "loss": 0.2336, "step": 358 }, { "epoch": 0.52, "learning_rate": 0.00010390738060781478, "loss": 0.709, "step": 359 }, { "epoch": 0.52, "learning_rate": 0.00010419681620839363, "loss": 0.3228, "step": 360 }, { "epoch": 0.52, "learning_rate": 0.0001044862518089725, "loss": 0.2303, "step": 361 }, { "epoch": 0.52, "learning_rate": 0.00010477568740955137, "loss": 0.0859, "step": 362 }, { "epoch": 0.52, "learning_rate": 0.00010506512301013025, "loss": 0.1255, "step": 363 }, { "epoch": 0.53, "learning_rate": 0.00010535455861070913, "loss": 0.5679, "step": 364 }, { "epoch": 0.53, "learning_rate": 0.000105643994211288, "loss": 0.3262, "step": 365 }, { "epoch": 0.53, "learning_rate": 0.00010593342981186687, "loss": 0.064, "step": 366 }, { "epoch": 0.53, "learning_rate": 0.00010622286541244573, "loss": 0.0355, "step": 367 }, { "epoch": 0.53, "learning_rate": 0.0001065123010130246, "loss": 0.582, "step": 368 }, { "epoch": 0.53, "learning_rate": 0.00010680173661360348, "loss": 0.2993, "step": 369 }, { "epoch": 0.54, "learning_rate": 0.00010709117221418235, "loss": 0.5687, "step": 370 }, { "epoch": 0.54, "learning_rate": 0.00010738060781476123, "loss": 0.2557, "step": 371 }, { "epoch": 0.54, "learning_rate": 0.00010767004341534008, "loss": 0.1971, "step": 372 }, { "epoch": 0.54, "learning_rate": 0.00010795947901591896, "loss": 0.1919, "step": 373 }, { "epoch": 0.54, "learning_rate": 0.00010824891461649784, "loss": 0.0218, "step": 374 }, { "epoch": 0.54, "learning_rate": 0.0001085383502170767, "loss": 0.0707, "step": 375 }, { "epoch": 0.54, "learning_rate": 0.00010882778581765558, "loss": 0.4258, "step": 376 }, { "epoch": 0.55, "learning_rate": 0.00010911722141823443, "loss": 0.1049, "step": 377 }, { "epoch": 0.55, "learning_rate": 0.00010940665701881331, "loss": 0.9863, "step": 378 }, { "epoch": 0.55, "learning_rate": 0.0001096960926193922, "loss": 0.283, "step": 379 }, { "epoch": 0.55, "learning_rate": 0.00010998552821997106, "loss": 0.8438, "step": 380 }, { "epoch": 0.55, "learning_rate": 0.00011027496382054994, "loss": 0.1466, "step": 381 }, { "epoch": 0.55, "learning_rate": 0.00011056439942112879, "loss": 0.7773, "step": 382 }, { "epoch": 0.55, "learning_rate": 0.00011085383502170768, "loss": 0.4629, "step": 383 }, { "epoch": 0.56, "learning_rate": 0.00011114327062228656, "loss": 0.6543, "step": 384 }, { "epoch": 0.56, "learning_rate": 0.00011143270622286541, "loss": 0.0559, "step": 385 }, { "epoch": 0.56, "learning_rate": 0.00011172214182344429, "loss": 0.0292, "step": 386 }, { "epoch": 0.56, "learning_rate": 0.00011201157742402316, "loss": 0.0813, "step": 387 }, { "epoch": 0.56, "learning_rate": 0.00011230101302460203, "loss": 0.7441, "step": 388 }, { "epoch": 0.56, "learning_rate": 0.00011259044862518091, "loss": 0.3094, "step": 389 }, { "epoch": 0.56, "learning_rate": 0.00011287988422575977, "loss": 0.178, "step": 390 }, { "epoch": 0.57, "learning_rate": 0.00011316931982633864, "loss": 0.1458, "step": 391 }, { "epoch": 0.57, "learning_rate": 0.00011345875542691751, "loss": 0.7825, "step": 392 }, { "epoch": 0.57, "learning_rate": 0.00011374819102749639, "loss": 0.2544, "step": 393 }, { "epoch": 0.57, "learning_rate": 0.00011403762662807527, "loss": 0.2808, "step": 394 }, { "epoch": 0.57, "learning_rate": 0.00011432706222865412, "loss": 0.0527, "step": 395 }, { "epoch": 0.57, "learning_rate": 0.000114616497829233, "loss": 0.1658, "step": 396 }, { "epoch": 0.57, "learning_rate": 0.00011490593342981186, "loss": 0.606, "step": 397 }, { "epoch": 0.58, "learning_rate": 0.00011519536903039074, "loss": 0.3398, "step": 398 }, { "epoch": 0.58, "learning_rate": 0.00011548480463096962, "loss": 0.0579, "step": 399 }, { "epoch": 0.58, "learning_rate": 0.00011577424023154849, "loss": 0.6958, "step": 400 }, { "epoch": 0.58, "learning_rate": 0.00011606367583212737, "loss": 0.1011, "step": 401 }, { "epoch": 0.58, "learning_rate": 0.00011635311143270622, "loss": 0.457, "step": 402 }, { "epoch": 0.58, "learning_rate": 0.0001166425470332851, "loss": 0.1709, "step": 403 }, { "epoch": 0.58, "learning_rate": 0.00011693198263386398, "loss": 0.2092, "step": 404 }, { "epoch": 0.59, "learning_rate": 0.00011722141823444284, "loss": 0.2481, "step": 405 }, { "epoch": 0.59, "learning_rate": 0.00011751085383502172, "loss": 0.4773, "step": 406 }, { "epoch": 0.59, "learning_rate": 0.00011780028943560057, "loss": 0.2705, "step": 407 }, { "epoch": 0.59, "learning_rate": 0.00011808972503617945, "loss": 0.502, "step": 408 }, { "epoch": 0.59, "learning_rate": 0.00011837916063675833, "loss": 0.1675, "step": 409 }, { "epoch": 0.59, "learning_rate": 0.0001186685962373372, "loss": 0.259, "step": 410 }, { "epoch": 0.59, "learning_rate": 0.00011895803183791607, "loss": 0.9707, "step": 411 }, { "epoch": 0.6, "learning_rate": 0.00011924746743849493, "loss": 0.3896, "step": 412 }, { "epoch": 0.6, "learning_rate": 0.0001195369030390738, "loss": 0.2913, "step": 413 }, { "epoch": 0.6, "learning_rate": 0.0001198263386396527, "loss": 0.915, "step": 414 }, { "epoch": 0.6, "learning_rate": 0.00012011577424023155, "loss": 0.2295, "step": 415 }, { "epoch": 0.6, "learning_rate": 0.00012040520984081043, "loss": 0.7373, "step": 416 }, { "epoch": 0.6, "learning_rate": 0.0001206946454413893, "loss": 0.0531, "step": 417 }, { "epoch": 0.6, "learning_rate": 0.00012098408104196817, "loss": 0.064, "step": 418 }, { "epoch": 0.61, "learning_rate": 0.00012127351664254705, "loss": 0.105, "step": 419 }, { "epoch": 0.61, "learning_rate": 0.0001215629522431259, "loss": 0.3093, "step": 420 }, { "epoch": 0.61, "learning_rate": 0.00012185238784370478, "loss": 0.1482, "step": 421 }, { "epoch": 0.61, "learning_rate": 0.00012214182344428363, "loss": 0.0113, "step": 422 }, { "epoch": 0.61, "learning_rate": 0.0001224312590448625, "loss": 0.2091, "step": 423 }, { "epoch": 0.61, "learning_rate": 0.0001227206946454414, "loss": 0.0088, "step": 424 }, { "epoch": 0.61, "learning_rate": 0.00012301013024602027, "loss": 0.0457, "step": 425 }, { "epoch": 0.62, "learning_rate": 0.00012329956584659915, "loss": 0.3201, "step": 426 }, { "epoch": 0.62, "learning_rate": 0.000123589001447178, "loss": 0.5742, "step": 427 }, { "epoch": 0.62, "learning_rate": 0.00012387843704775688, "loss": 0.3484, "step": 428 }, { "epoch": 0.62, "learning_rate": 0.00012416787264833576, "loss": 0.0786, "step": 429 }, { "epoch": 0.62, "learning_rate": 0.0001244573082489146, "loss": 0.489, "step": 430 }, { "epoch": 0.62, "learning_rate": 0.0001247467438494935, "loss": 0.3994, "step": 431 }, { "epoch": 0.62, "learning_rate": 0.00012503617945007234, "loss": 0.5774, "step": 432 }, { "epoch": 0.63, "learning_rate": 0.00012532561505065125, "loss": 0.1578, "step": 433 }, { "epoch": 0.63, "learning_rate": 0.00012561505065123013, "loss": 0.0563, "step": 434 }, { "epoch": 0.63, "learning_rate": 0.00012590448625180898, "loss": 0.3621, "step": 435 }, { "epoch": 0.63, "learning_rate": 0.00012619392185238786, "loss": 0.0391, "step": 436 }, { "epoch": 0.63, "learning_rate": 0.0001264833574529667, "loss": 0.9502, "step": 437 }, { "epoch": 0.63, "learning_rate": 0.0001267727930535456, "loss": 0.2712, "step": 438 }, { "epoch": 0.63, "learning_rate": 0.00012706222865412447, "loss": 0.0184, "step": 439 }, { "epoch": 0.64, "learning_rate": 0.00012735166425470332, "loss": 0.049, "step": 440 }, { "epoch": 0.64, "learning_rate": 0.0001276410998552822, "loss": 0.4179, "step": 441 }, { "epoch": 0.64, "learning_rate": 0.00012793053545586108, "loss": 0.0219, "step": 442 }, { "epoch": 0.64, "learning_rate": 0.00012821997105643996, "loss": 0.3604, "step": 443 }, { "epoch": 0.64, "learning_rate": 0.00012850940665701884, "loss": 0.4763, "step": 444 }, { "epoch": 0.64, "learning_rate": 0.0001287988422575977, "loss": 0.4058, "step": 445 }, { "epoch": 0.64, "learning_rate": 0.00012908827785817657, "loss": 0.127, "step": 446 }, { "epoch": 0.65, "learning_rate": 0.00012937771345875542, "loss": 0.0172, "step": 447 }, { "epoch": 0.65, "learning_rate": 0.0001296671490593343, "loss": 0.3008, "step": 448 }, { "epoch": 0.65, "learning_rate": 0.00012995658465991318, "loss": 0.8535, "step": 449 }, { "epoch": 0.65, "learning_rate": 0.00013024602026049203, "loss": 0.5227, "step": 450 }, { "epoch": 0.65, "learning_rate": 0.00013053545586107093, "loss": 0.6855, "step": 451 }, { "epoch": 0.65, "learning_rate": 0.00013082489146164979, "loss": 0.0535, "step": 452 }, { "epoch": 0.66, "learning_rate": 0.00013111432706222866, "loss": 0.396, "step": 453 }, { "epoch": 0.66, "learning_rate": 0.00013140376266280754, "loss": 0.4805, "step": 454 }, { "epoch": 0.66, "learning_rate": 0.0001316931982633864, "loss": 0.0789, "step": 455 }, { "epoch": 0.66, "learning_rate": 0.00013198263386396527, "loss": 0.0667, "step": 456 }, { "epoch": 0.66, "learning_rate": 0.00013227206946454413, "loss": 0.8062, "step": 457 }, { "epoch": 0.66, "learning_rate": 0.000132561505065123, "loss": 0.0636, "step": 458 }, { "epoch": 0.66, "learning_rate": 0.00013285094066570188, "loss": 0.3038, "step": 459 }, { "epoch": 0.67, "learning_rate": 0.00013314037626628076, "loss": 1.5527, "step": 460 }, { "epoch": 0.67, "learning_rate": 0.00013342981186685964, "loss": 0.6377, "step": 461 }, { "epoch": 0.67, "learning_rate": 0.0001337192474674385, "loss": 0.2294, "step": 462 }, { "epoch": 0.67, "learning_rate": 0.00013400868306801737, "loss": 0.8262, "step": 463 }, { "epoch": 0.67, "learning_rate": 0.00013429811866859625, "loss": 0.0189, "step": 464 }, { "epoch": 0.67, "learning_rate": 0.0001345875542691751, "loss": 0.4209, "step": 465 }, { "epoch": 0.67, "learning_rate": 0.00013487698986975398, "loss": 0.1531, "step": 466 }, { "epoch": 0.68, "learning_rate": 0.00013516642547033283, "loss": 0.0201, "step": 467 }, { "epoch": 0.68, "learning_rate": 0.00013545586107091174, "loss": 0.2681, "step": 468 }, { "epoch": 0.68, "learning_rate": 0.00013574529667149062, "loss": 0.3821, "step": 469 }, { "epoch": 0.68, "learning_rate": 0.00013603473227206947, "loss": 0.047, "step": 470 }, { "epoch": 0.68, "learning_rate": 0.00013632416787264835, "loss": 0.0713, "step": 471 }, { "epoch": 0.68, "learning_rate": 0.0001366136034732272, "loss": 0.4363, "step": 472 }, { "epoch": 0.68, "learning_rate": 0.00013690303907380608, "loss": 0.0745, "step": 473 }, { "epoch": 0.69, "learning_rate": 0.00013719247467438496, "loss": 0.0414, "step": 474 }, { "epoch": 0.69, "learning_rate": 0.0001374819102749638, "loss": 0.0825, "step": 475 }, { "epoch": 0.69, "learning_rate": 0.0001377713458755427, "loss": 0.0565, "step": 476 }, { "epoch": 0.69, "learning_rate": 0.00013806078147612157, "loss": 0.0571, "step": 477 }, { "epoch": 0.69, "learning_rate": 0.00013835021707670045, "loss": 0.0342, "step": 478 }, { "epoch": 0.69, "learning_rate": 0.00013863965267727933, "loss": 0.6047, "step": 479 }, { "epoch": 0.69, "learning_rate": 0.00013892908827785818, "loss": 0.0559, "step": 480 }, { "epoch": 0.7, "learning_rate": 0.00013921852387843706, "loss": 0.1201, "step": 481 }, { "epoch": 0.7, "learning_rate": 0.0001395079594790159, "loss": 0.4338, "step": 482 }, { "epoch": 0.7, "learning_rate": 0.0001397973950795948, "loss": 0.2939, "step": 483 }, { "epoch": 0.7, "learning_rate": 0.00014008683068017367, "loss": 0.9219, "step": 484 }, { "epoch": 0.7, "learning_rate": 0.00014037626628075252, "loss": 0.0867, "step": 485 }, { "epoch": 0.7, "learning_rate": 0.00014066570188133143, "loss": 0.0616, "step": 486 }, { "epoch": 0.7, "learning_rate": 0.00014095513748191028, "loss": 0.4156, "step": 487 }, { "epoch": 0.71, "learning_rate": 0.00014124457308248916, "loss": 0.4788, "step": 488 }, { "epoch": 0.71, "learning_rate": 0.00014153400868306804, "loss": 0.1907, "step": 489 }, { "epoch": 0.71, "learning_rate": 0.0001418234442836469, "loss": 0.0765, "step": 490 }, { "epoch": 0.71, "learning_rate": 0.00014211287988422577, "loss": 0.3845, "step": 491 }, { "epoch": 0.71, "learning_rate": 0.00014240231548480462, "loss": 0.8008, "step": 492 }, { "epoch": 0.71, "learning_rate": 0.0001426917510853835, "loss": 0.1536, "step": 493 }, { "epoch": 0.71, "learning_rate": 0.00014298118668596238, "loss": 0.3577, "step": 494 }, { "epoch": 0.72, "learning_rate": 0.00014327062228654125, "loss": 0.3896, "step": 495 }, { "epoch": 0.72, "learning_rate": 0.00014356005788712013, "loss": 0.0389, "step": 496 }, { "epoch": 0.72, "learning_rate": 0.00014384949348769899, "loss": 0.1799, "step": 497 }, { "epoch": 0.72, "learning_rate": 0.00014413892908827786, "loss": 0.7617, "step": 498 }, { "epoch": 0.72, "learning_rate": 0.00014442836468885674, "loss": 0.6846, "step": 499 }, { "epoch": 0.72, "learning_rate": 0.0001447178002894356, "loss": 0.0329, "step": 500 }, { "epoch": 0.72, "learning_rate": 0.00014500723589001447, "loss": 0.4768, "step": 501 }, { "epoch": 0.73, "learning_rate": 0.00014529667149059333, "loss": 0.55, "step": 502 }, { "epoch": 0.73, "learning_rate": 0.00014558610709117223, "loss": 0.2052, "step": 503 }, { "epoch": 0.73, "learning_rate": 0.0001458755426917511, "loss": 0.0249, "step": 504 }, { "epoch": 0.73, "learning_rate": 0.00014616497829232996, "loss": 0.051, "step": 505 }, { "epoch": 0.73, "learning_rate": 0.00014645441389290884, "loss": 0.25, "step": 506 }, { "epoch": 0.73, "learning_rate": 0.0001467438494934877, "loss": 0.2465, "step": 507 }, { "epoch": 0.73, "learning_rate": 0.00014703328509406657, "loss": 0.1636, "step": 508 }, { "epoch": 0.74, "learning_rate": 0.00014732272069464545, "loss": 0.3411, "step": 509 }, { "epoch": 0.74, "learning_rate": 0.0001476121562952243, "loss": 0.5214, "step": 510 }, { "epoch": 0.74, "learning_rate": 0.00014790159189580318, "loss": 0.1299, "step": 511 }, { "epoch": 0.74, "learning_rate": 0.00014819102749638206, "loss": 0.752, "step": 512 }, { "epoch": 0.74, "learning_rate": 0.00014848046309696094, "loss": 0.0596, "step": 513 }, { "epoch": 0.74, "learning_rate": 0.00014876989869753982, "loss": 0.6367, "step": 514 }, { "epoch": 0.74, "learning_rate": 0.00014905933429811867, "loss": 0.5713, "step": 515 }, { "epoch": 0.75, "learning_rate": 0.00014934876989869755, "loss": 0.9316, "step": 516 }, { "epoch": 0.75, "learning_rate": 0.0001496382054992764, "loss": 0.6392, "step": 517 }, { "epoch": 0.75, "learning_rate": 0.00014992764109985528, "loss": 0.1814, "step": 518 }, { "epoch": 0.75, "learning_rate": 0.00015021707670043416, "loss": 0.3689, "step": 519 }, { "epoch": 0.75, "learning_rate": 0.00015050651230101304, "loss": 0.0558, "step": 520 }, { "epoch": 0.75, "learning_rate": 0.00015079594790159192, "loss": 0.0455, "step": 521 }, { "epoch": 0.75, "learning_rate": 0.0001510853835021708, "loss": 0.4558, "step": 522 }, { "epoch": 0.76, "learning_rate": 0.00015137481910274965, "loss": 0.0275, "step": 523 }, { "epoch": 0.76, "learning_rate": 0.00015166425470332853, "loss": 0.3442, "step": 524 }, { "epoch": 0.76, "learning_rate": 0.00015195369030390738, "loss": 1.0703, "step": 525 }, { "epoch": 0.76, "learning_rate": 0.00015224312590448626, "loss": 0.1638, "step": 526 }, { "epoch": 0.76, "learning_rate": 0.00015253256150506514, "loss": 0.3613, "step": 527 }, { "epoch": 0.76, "learning_rate": 0.000152821997105644, "loss": 0.2158, "step": 528 }, { "epoch": 0.77, "learning_rate": 0.00015311143270622287, "loss": 0.0121, "step": 529 }, { "epoch": 0.77, "learning_rate": 0.00015340086830680175, "loss": 0.2, "step": 530 }, { "epoch": 0.77, "learning_rate": 0.00015369030390738063, "loss": 0.4082, "step": 531 }, { "epoch": 0.77, "learning_rate": 0.0001539797395079595, "loss": 0.3195, "step": 532 }, { "epoch": 0.77, "learning_rate": 0.00015426917510853836, "loss": 0.4443, "step": 533 }, { "epoch": 0.77, "learning_rate": 0.00015455861070911724, "loss": 0.7148, "step": 534 }, { "epoch": 0.77, "learning_rate": 0.0001548480463096961, "loss": 0.446, "step": 535 }, { "epoch": 0.78, "learning_rate": 0.00015513748191027497, "loss": 0.0763, "step": 536 }, { "epoch": 0.78, "learning_rate": 0.00015542691751085385, "loss": 0.6726, "step": 537 }, { "epoch": 0.78, "learning_rate": 0.00015571635311143272, "loss": 0.5931, "step": 538 }, { "epoch": 0.78, "learning_rate": 0.0001560057887120116, "loss": 0.5104, "step": 539 }, { "epoch": 0.78, "learning_rate": 0.00015629522431259045, "loss": 0.1196, "step": 540 }, { "epoch": 0.78, "learning_rate": 0.00015658465991316933, "loss": 0.3542, "step": 541 }, { "epoch": 0.78, "learning_rate": 0.0001568740955137482, "loss": 0.2173, "step": 542 }, { "epoch": 0.79, "learning_rate": 0.00015716353111432706, "loss": 0.04, "step": 543 }, { "epoch": 0.79, "learning_rate": 0.00015745296671490594, "loss": 0.342, "step": 544 }, { "epoch": 0.79, "learning_rate": 0.0001577424023154848, "loss": 0.0574, "step": 545 }, { "epoch": 0.79, "learning_rate": 0.00015803183791606367, "loss": 0.5027, "step": 546 }, { "epoch": 0.79, "learning_rate": 0.00015832127351664255, "loss": 0.2007, "step": 547 }, { "epoch": 0.79, "learning_rate": 0.00015861070911722143, "loss": 0.4073, "step": 548 }, { "epoch": 0.79, "learning_rate": 0.0001589001447178003, "loss": 0.0255, "step": 549 }, { "epoch": 0.8, "learning_rate": 0.00015918958031837916, "loss": 0.2272, "step": 550 }, { "epoch": 0.8, "learning_rate": 0.00015947901591895804, "loss": 0.2021, "step": 551 }, { "epoch": 0.8, "learning_rate": 0.00015976845151953692, "loss": 0.3438, "step": 552 }, { "epoch": 0.8, "learning_rate": 0.00016005788712011577, "loss": 0.1482, "step": 553 }, { "epoch": 0.8, "learning_rate": 0.00016034732272069465, "loss": 0.5955, "step": 554 }, { "epoch": 0.8, "learning_rate": 0.00016063675832127353, "loss": 0.0479, "step": 555 }, { "epoch": 0.8, "learning_rate": 0.0001609261939218524, "loss": 0.4019, "step": 556 }, { "epoch": 0.81, "learning_rate": 0.0001612156295224313, "loss": 0.4027, "step": 557 }, { "epoch": 0.81, "learning_rate": 0.00016150506512301014, "loss": 0.1664, "step": 558 }, { "epoch": 0.81, "learning_rate": 0.00016179450072358902, "loss": 0.0526, "step": 559 }, { "epoch": 0.81, "learning_rate": 0.00016208393632416787, "loss": 0.0497, "step": 560 }, { "epoch": 0.81, "learning_rate": 0.00016237337192474675, "loss": 0.5215, "step": 561 }, { "epoch": 0.81, "learning_rate": 0.00016266280752532563, "loss": 0.5849, "step": 562 }, { "epoch": 0.81, "learning_rate": 0.00016295224312590448, "loss": 0.472, "step": 563 }, { "epoch": 0.82, "learning_rate": 0.00016324167872648336, "loss": 0.2003, "step": 564 }, { "epoch": 0.82, "learning_rate": 0.00016353111432706224, "loss": 0.6387, "step": 565 }, { "epoch": 0.82, "learning_rate": 0.00016382054992764112, "loss": 0.309, "step": 566 }, { "epoch": 0.82, "learning_rate": 0.00016410998552822, "loss": 0.3745, "step": 567 }, { "epoch": 0.82, "learning_rate": 0.00016439942112879885, "loss": 0.8184, "step": 568 }, { "epoch": 0.82, "learning_rate": 0.00016468885672937773, "loss": 0.0325, "step": 569 }, { "epoch": 0.82, "learning_rate": 0.00016497829232995658, "loss": 0.2354, "step": 570 }, { "epoch": 0.83, "learning_rate": 0.00016526772793053546, "loss": 0.0662, "step": 571 }, { "epoch": 0.83, "learning_rate": 0.00016555716353111434, "loss": 0.0291, "step": 572 }, { "epoch": 0.83, "learning_rate": 0.00016584659913169322, "loss": 0.0197, "step": 573 }, { "epoch": 0.83, "learning_rate": 0.0001661360347322721, "loss": 0.4701, "step": 574 }, { "epoch": 0.83, "learning_rate": 0.00016642547033285095, "loss": 0.4367, "step": 575 }, { "epoch": 0.83, "learning_rate": 0.00016671490593342983, "loss": 0.9717, "step": 576 }, { "epoch": 0.83, "learning_rate": 0.0001670043415340087, "loss": 0.349, "step": 577 }, { "epoch": 0.84, "learning_rate": 0.00016729377713458756, "loss": 0.1098, "step": 578 }, { "epoch": 0.84, "learning_rate": 0.00016758321273516644, "loss": 0.5322, "step": 579 }, { "epoch": 0.84, "learning_rate": 0.0001678726483357453, "loss": 0.3179, "step": 580 }, { "epoch": 0.84, "learning_rate": 0.00016816208393632417, "loss": 0.0468, "step": 581 }, { "epoch": 0.84, "learning_rate": 0.00016845151953690305, "loss": 0.5146, "step": 582 }, { "epoch": 0.84, "learning_rate": 0.00016874095513748192, "loss": 0.5583, "step": 583 }, { "epoch": 0.84, "learning_rate": 0.0001690303907380608, "loss": 0.6935, "step": 584 }, { "epoch": 0.85, "learning_rate": 0.00016931982633863965, "loss": 0.1995, "step": 585 }, { "epoch": 0.85, "learning_rate": 0.00016960926193921853, "loss": 0.0502, "step": 586 }, { "epoch": 0.85, "learning_rate": 0.0001698986975397974, "loss": 1.0508, "step": 587 }, { "epoch": 0.85, "learning_rate": 0.00017018813314037626, "loss": 0.5117, "step": 588 }, { "epoch": 0.85, "learning_rate": 0.00017047756874095514, "loss": 0.948, "step": 589 }, { "epoch": 0.85, "learning_rate": 0.00017076700434153402, "loss": 0.3545, "step": 590 }, { "epoch": 0.85, "learning_rate": 0.0001710564399421129, "loss": 0.7126, "step": 591 }, { "epoch": 0.86, "learning_rate": 0.00017134587554269178, "loss": 0.0356, "step": 592 }, { "epoch": 0.86, "learning_rate": 0.00017163531114327063, "loss": 0.4066, "step": 593 }, { "epoch": 0.86, "learning_rate": 0.0001719247467438495, "loss": 0.6196, "step": 594 }, { "epoch": 0.86, "learning_rate": 0.00017221418234442836, "loss": 0.3135, "step": 595 }, { "epoch": 0.86, "learning_rate": 0.00017250361794500724, "loss": 0.2045, "step": 596 }, { "epoch": 0.86, "learning_rate": 0.00017279305354558612, "loss": 0.1234, "step": 597 }, { "epoch": 0.86, "learning_rate": 0.00017308248914616497, "loss": 0.0611, "step": 598 }, { "epoch": 0.87, "learning_rate": 0.00017337192474674385, "loss": 0.3921, "step": 599 }, { "epoch": 0.87, "learning_rate": 0.00017366136034732273, "loss": 0.7344, "step": 600 }, { "epoch": 0.87, "learning_rate": 0.0001739507959479016, "loss": 0.6831, "step": 601 }, { "epoch": 0.87, "learning_rate": 0.0001742402315484805, "loss": 0.1262, "step": 602 }, { "epoch": 0.87, "learning_rate": 0.00017452966714905934, "loss": 0.2593, "step": 603 }, { "epoch": 0.87, "learning_rate": 0.00017481910274963822, "loss": 0.979, "step": 604 }, { "epoch": 0.87, "learning_rate": 0.00017510853835021707, "loss": 0.1428, "step": 605 }, { "epoch": 0.88, "learning_rate": 0.00017539797395079595, "loss": 0.0601, "step": 606 }, { "epoch": 0.88, "learning_rate": 0.00017568740955137483, "loss": 0.082, "step": 607 }, { "epoch": 0.88, "learning_rate": 0.0001759768451519537, "loss": 1.3086, "step": 608 }, { "epoch": 0.88, "learning_rate": 0.0001762662807525326, "loss": 0.0571, "step": 609 }, { "epoch": 0.88, "learning_rate": 0.00017655571635311144, "loss": 0.5908, "step": 610 }, { "epoch": 0.88, "learning_rate": 0.00017684515195369032, "loss": 0.1278, "step": 611 }, { "epoch": 0.89, "learning_rate": 0.0001771345875542692, "loss": 0.5269, "step": 612 }, { "epoch": 0.89, "learning_rate": 0.00017742402315484805, "loss": 0.3262, "step": 613 }, { "epoch": 0.89, "learning_rate": 0.00017771345875542693, "loss": 0.0226, "step": 614 }, { "epoch": 0.89, "learning_rate": 0.00017800289435600578, "loss": 0.4878, "step": 615 }, { "epoch": 0.89, "learning_rate": 0.00017829232995658466, "loss": 0.2938, "step": 616 }, { "epoch": 0.89, "learning_rate": 0.00017858176555716354, "loss": 0.0129, "step": 617 }, { "epoch": 0.89, "learning_rate": 0.00017887120115774242, "loss": 0.4194, "step": 618 }, { "epoch": 0.9, "learning_rate": 0.0001791606367583213, "loss": 0.0166, "step": 619 }, { "epoch": 0.9, "learning_rate": 0.00017945007235890015, "loss": 0.1968, "step": 620 }, { "epoch": 0.9, "learning_rate": 0.00017973950795947903, "loss": 0.0948, "step": 621 }, { "epoch": 0.9, "learning_rate": 0.0001800289435600579, "loss": 0.2634, "step": 622 }, { "epoch": 0.9, "learning_rate": 0.00018031837916063676, "loss": 0.2871, "step": 623 }, { "epoch": 0.9, "learning_rate": 0.00018060781476121564, "loss": 0.4049, "step": 624 }, { "epoch": 0.9, "learning_rate": 0.00018089725036179451, "loss": 0.2789, "step": 625 }, { "epoch": 0.91, "learning_rate": 0.0001811866859623734, "loss": 0.022, "step": 626 }, { "epoch": 0.91, "learning_rate": 0.00018147612156295227, "loss": 0.0571, "step": 627 }, { "epoch": 0.91, "learning_rate": 0.00018176555716353112, "loss": 0.1159, "step": 628 }, { "epoch": 0.91, "learning_rate": 0.00018205499276411, "loss": 0.0524, "step": 629 }, { "epoch": 0.91, "learning_rate": 0.00018234442836468885, "loss": 0.2393, "step": 630 }, { "epoch": 0.91, "learning_rate": 0.00018263386396526773, "loss": 0.5359, "step": 631 }, { "epoch": 0.91, "learning_rate": 0.0001829232995658466, "loss": 0.2319, "step": 632 }, { "epoch": 0.92, "learning_rate": 0.00018321273516642546, "loss": 0.1389, "step": 633 }, { "epoch": 0.92, "learning_rate": 0.00018350217076700434, "loss": 0.2798, "step": 634 }, { "epoch": 0.92, "learning_rate": 0.00018379160636758322, "loss": 0.0769, "step": 635 }, { "epoch": 0.92, "learning_rate": 0.0001840810419681621, "loss": 0.1543, "step": 636 }, { "epoch": 0.92, "learning_rate": 0.00018437047756874098, "loss": 0.0673, "step": 637 }, { "epoch": 0.92, "learning_rate": 0.00018465991316931983, "loss": 0.2407, "step": 638 }, { "epoch": 0.92, "learning_rate": 0.0001849493487698987, "loss": 0.0511, "step": 639 }, { "epoch": 0.93, "learning_rate": 0.00018523878437047756, "loss": 0.1172, "step": 640 }, { "epoch": 0.93, "learning_rate": 0.00018552821997105644, "loss": 0.4808, "step": 641 }, { "epoch": 0.93, "learning_rate": 0.00018581765557163532, "loss": 0.1897, "step": 642 }, { "epoch": 0.93, "learning_rate": 0.0001861070911722142, "loss": 0.0951, "step": 643 }, { "epoch": 0.93, "learning_rate": 0.00018639652677279308, "loss": 0.0674, "step": 644 }, { "epoch": 0.93, "learning_rate": 0.00018668596237337193, "loss": 0.0357, "step": 645 }, { "epoch": 0.93, "learning_rate": 0.0001869753979739508, "loss": 0.3633, "step": 646 }, { "epoch": 0.94, "learning_rate": 0.0001872648335745297, "loss": 0.0094, "step": 647 }, { "epoch": 0.94, "learning_rate": 0.00018755426917510854, "loss": 0.3036, "step": 648 }, { "epoch": 0.94, "learning_rate": 0.00018784370477568742, "loss": 0.464, "step": 649 }, { "epoch": 0.94, "learning_rate": 0.00018813314037626627, "loss": 0.1885, "step": 650 }, { "epoch": 0.94, "learning_rate": 0.00018842257597684515, "loss": 0.4131, "step": 651 }, { "epoch": 0.94, "learning_rate": 0.00018871201157742403, "loss": 0.6436, "step": 652 }, { "epoch": 0.94, "learning_rate": 0.0001890014471780029, "loss": 0.061, "step": 653 }, { "epoch": 0.95, "learning_rate": 0.0001892908827785818, "loss": 0.0765, "step": 654 }, { "epoch": 0.95, "learning_rate": 0.00018958031837916064, "loss": 0.0526, "step": 655 }, { "epoch": 0.95, "learning_rate": 0.00018986975397973952, "loss": 0.6934, "step": 656 }, { "epoch": 0.95, "learning_rate": 0.0001901591895803184, "loss": 0.0625, "step": 657 }, { "epoch": 0.95, "learning_rate": 0.00019044862518089725, "loss": 0.228, "step": 658 }, { "epoch": 0.95, "learning_rate": 0.00019073806078147613, "loss": 1.2227, "step": 659 }, { "epoch": 0.95, "learning_rate": 0.000191027496382055, "loss": 0.6377, "step": 660 }, { "epoch": 0.96, "learning_rate": 0.00019131693198263389, "loss": 0.0963, "step": 661 }, { "epoch": 0.96, "learning_rate": 0.00019160636758321276, "loss": 0.0621, "step": 662 }, { "epoch": 0.96, "learning_rate": 0.00019189580318379162, "loss": 0.3513, "step": 663 }, { "epoch": 0.96, "learning_rate": 0.0001921852387843705, "loss": 0.0582, "step": 664 }, { "epoch": 0.96, "learning_rate": 0.00019247467438494935, "loss": 0.0352, "step": 665 }, { "epoch": 0.96, "learning_rate": 0.00019276410998552823, "loss": 0.7373, "step": 666 }, { "epoch": 0.96, "learning_rate": 0.0001930535455861071, "loss": 0.022, "step": 667 }, { "epoch": 0.97, "learning_rate": 0.00019334298118668596, "loss": 0.7031, "step": 668 }, { "epoch": 0.97, "learning_rate": 0.00019363241678726484, "loss": 0.1364, "step": 669 }, { "epoch": 0.97, "learning_rate": 0.00019392185238784371, "loss": 0.052, "step": 670 }, { "epoch": 0.97, "learning_rate": 0.0001942112879884226, "loss": 0.2917, "step": 671 }, { "epoch": 0.97, "learning_rate": 0.00019450072358900147, "loss": 0.4948, "step": 672 }, { "epoch": 0.97, "learning_rate": 0.00019479015918958032, "loss": 0.0405, "step": 673 }, { "epoch": 0.97, "learning_rate": 0.0001950795947901592, "loss": 0.2135, "step": 674 }, { "epoch": 0.98, "learning_rate": 0.00019536903039073805, "loss": 0.2072, "step": 675 }, { "epoch": 0.98, "learning_rate": 0.00019565846599131693, "loss": 0.5903, "step": 676 }, { "epoch": 0.98, "learning_rate": 0.0001959479015918958, "loss": 0.0421, "step": 677 }, { "epoch": 0.98, "learning_rate": 0.0001962373371924747, "loss": 0.2101, "step": 678 }, { "epoch": 0.98, "learning_rate": 0.00019652677279305357, "loss": 0.229, "step": 679 }, { "epoch": 0.98, "learning_rate": 0.00019681620839363242, "loss": 0.1355, "step": 680 }, { "epoch": 0.98, "learning_rate": 0.0001971056439942113, "loss": 0.7979, "step": 681 }, { "epoch": 0.99, "learning_rate": 0.00019739507959479018, "loss": 0.6934, "step": 682 }, { "epoch": 0.99, "learning_rate": 0.00019768451519536903, "loss": 0.6152, "step": 683 }, { "epoch": 0.99, "learning_rate": 0.0001979739507959479, "loss": 0.0513, "step": 684 }, { "epoch": 0.99, "learning_rate": 0.00019826338639652676, "loss": 0.1348, "step": 685 }, { "epoch": 0.99, "learning_rate": 0.00019855282199710564, "loss": 0.0345, "step": 686 }, { "epoch": 0.99, "learning_rate": 0.00019884225759768452, "loss": 0.4137, "step": 687 }, { "epoch": 0.99, "learning_rate": 0.0001991316931982634, "loss": 0.4524, "step": 688 }, { "epoch": 1.0, "learning_rate": 0.00019942112879884228, "loss": 0.4225, "step": 689 }, { "epoch": 1.0, "learning_rate": 0.00019971056439942113, "loss": 0.0942, "step": 690 }, { "epoch": 1.0, "learning_rate": 0.0002, "loss": 0.0252, "step": 691 }, { "epoch": 1.0, "learning_rate": 0.00019996784048882457, "loss": 0.3408, "step": 692 }, { "epoch": 1.0, "learning_rate": 0.00019993568097764915, "loss": 0.3481, "step": 693 }, { "epoch": 1.0, "learning_rate": 0.0001999035214664737, "loss": 0.386, "step": 694 }, { "epoch": 1.01, "learning_rate": 0.0001998713619552983, "loss": 0.2822, "step": 695 }, { "epoch": 1.01, "learning_rate": 0.00019983920244412288, "loss": 0.0194, "step": 696 }, { "epoch": 1.01, "learning_rate": 0.00019980704293294743, "loss": 0.3878, "step": 697 }, { "epoch": 1.01, "learning_rate": 0.000199774883421772, "loss": 0.417, "step": 698 }, { "epoch": 1.01, "learning_rate": 0.00019974272391059657, "loss": 0.3496, "step": 699 }, { "epoch": 1.01, "learning_rate": 0.00019971056439942113, "loss": 0.2631, "step": 700 }, { "epoch": 1.01, "learning_rate": 0.00019967840488824571, "loss": 0.321, "step": 701 }, { "epoch": 1.02, "learning_rate": 0.00019964624537707027, "loss": 0.3857, "step": 702 }, { "epoch": 1.02, "learning_rate": 0.00019961408586589486, "loss": 0.0278, "step": 703 }, { "epoch": 1.02, "learning_rate": 0.0001995819263547194, "loss": 0.0323, "step": 704 }, { "epoch": 1.02, "learning_rate": 0.000199549766843544, "loss": 0.0267, "step": 705 }, { "epoch": 1.02, "learning_rate": 0.00019951760733236855, "loss": 0.0706, "step": 706 }, { "epoch": 1.02, "learning_rate": 0.0001994854478211931, "loss": 0.0236, "step": 707 }, { "epoch": 1.02, "learning_rate": 0.0001994532883100177, "loss": 0.1183, "step": 708 }, { "epoch": 1.03, "learning_rate": 0.00019942112879884228, "loss": 0.0308, "step": 709 }, { "epoch": 1.03, "learning_rate": 0.00019938896928766684, "loss": 0.1538, "step": 710 }, { "epoch": 1.03, "learning_rate": 0.0001993568097764914, "loss": 0.3545, "step": 711 }, { "epoch": 1.03, "learning_rate": 0.00019932465026531598, "loss": 0.3336, "step": 712 }, { "epoch": 1.03, "learning_rate": 0.00019929249075414053, "loss": 0.1243, "step": 713 }, { "epoch": 1.03, "learning_rate": 0.00019926033124296512, "loss": 0.6566, "step": 714 }, { "epoch": 1.03, "learning_rate": 0.0001992281717317897, "loss": 0.3848, "step": 715 }, { "epoch": 1.04, "learning_rate": 0.00019919601222061426, "loss": 0.0545, "step": 716 }, { "epoch": 1.04, "learning_rate": 0.00019916385270943882, "loss": 0.0125, "step": 717 }, { "epoch": 1.04, "learning_rate": 0.0001991316931982634, "loss": 0.1392, "step": 718 }, { "epoch": 1.04, "learning_rate": 0.00019909953368708796, "loss": 0.0367, "step": 719 }, { "epoch": 1.04, "learning_rate": 0.00019906737417591254, "loss": 0.4795, "step": 720 }, { "epoch": 1.04, "learning_rate": 0.00019903521466473712, "loss": 0.7667, "step": 721 }, { "epoch": 1.04, "learning_rate": 0.00019900305515356168, "loss": 0.0344, "step": 722 }, { "epoch": 1.05, "learning_rate": 0.00019897089564238624, "loss": 0.0776, "step": 723 }, { "epoch": 1.05, "learning_rate": 0.00019893873613121082, "loss": 0.0356, "step": 724 }, { "epoch": 1.05, "learning_rate": 0.00019890657662003538, "loss": 0.0447, "step": 725 }, { "epoch": 1.05, "learning_rate": 0.00019887441710885994, "loss": 0.0123, "step": 726 }, { "epoch": 1.05, "learning_rate": 0.00019884225759768452, "loss": 0.6309, "step": 727 }, { "epoch": 1.05, "learning_rate": 0.0001988100980865091, "loss": 0.0251, "step": 728 }, { "epoch": 1.05, "learning_rate": 0.00019877793857533366, "loss": 0.0093, "step": 729 }, { "epoch": 1.06, "learning_rate": 0.00019874577906415825, "loss": 0.4271, "step": 730 }, { "epoch": 1.06, "learning_rate": 0.0001987136195529828, "loss": 0.2076, "step": 731 }, { "epoch": 1.06, "learning_rate": 0.00019868146004180736, "loss": 0.5195, "step": 732 }, { "epoch": 1.06, "learning_rate": 0.00019864930053063194, "loss": 0.227, "step": 733 }, { "epoch": 1.06, "learning_rate": 0.00019861714101945653, "loss": 0.0182, "step": 734 }, { "epoch": 1.06, "learning_rate": 0.00019858498150828109, "loss": 0.0089, "step": 735 }, { "epoch": 1.06, "learning_rate": 0.00019855282199710564, "loss": 0.7885, "step": 736 }, { "epoch": 1.07, "learning_rate": 0.00019852066248593023, "loss": 0.2239, "step": 737 }, { "epoch": 1.07, "learning_rate": 0.00019848850297475478, "loss": 0.3374, "step": 738 }, { "epoch": 1.07, "learning_rate": 0.00019845634346357934, "loss": 0.2722, "step": 739 }, { "epoch": 1.07, "learning_rate": 0.00019842418395240395, "loss": 0.5312, "step": 740 }, { "epoch": 1.07, "learning_rate": 0.0001983920244412285, "loss": 0.2378, "step": 741 }, { "epoch": 1.07, "learning_rate": 0.00019835986493005307, "loss": 1.2676, "step": 742 }, { "epoch": 1.07, "learning_rate": 0.00019832770541887765, "loss": 0.0294, "step": 743 }, { "epoch": 1.08, "learning_rate": 0.0001982955459077022, "loss": 0.2363, "step": 744 }, { "epoch": 1.08, "learning_rate": 0.00019826338639652676, "loss": 0.0473, "step": 745 }, { "epoch": 1.08, "learning_rate": 0.00019823122688535137, "loss": 0.3779, "step": 746 }, { "epoch": 1.08, "learning_rate": 0.00019819906737417593, "loss": 0.1676, "step": 747 }, { "epoch": 1.08, "learning_rate": 0.0001981669078630005, "loss": 0.2056, "step": 748 }, { "epoch": 1.08, "learning_rate": 0.00019813474835182507, "loss": 0.0484, "step": 749 }, { "epoch": 1.08, "learning_rate": 0.00019810258884064963, "loss": 0.6602, "step": 750 }, { "epoch": 1.09, "learning_rate": 0.00019807042932947419, "loss": 0.4297, "step": 751 }, { "epoch": 1.09, "learning_rate": 0.00019803826981829877, "loss": 0.2424, "step": 752 }, { "epoch": 1.09, "learning_rate": 0.00019800611030712335, "loss": 0.149, "step": 753 }, { "epoch": 1.09, "learning_rate": 0.0001979739507959479, "loss": 0.2679, "step": 754 }, { "epoch": 1.09, "learning_rate": 0.0001979417912847725, "loss": 0.1884, "step": 755 }, { "epoch": 1.09, "learning_rate": 0.00019790963177359705, "loss": 0.543, "step": 756 }, { "epoch": 1.09, "learning_rate": 0.0001978774722624216, "loss": 0.1717, "step": 757 }, { "epoch": 1.1, "learning_rate": 0.0001978453127512462, "loss": 0.0874, "step": 758 }, { "epoch": 1.1, "learning_rate": 0.00019781315324007078, "loss": 0.1072, "step": 759 }, { "epoch": 1.1, "learning_rate": 0.00019778099372889533, "loss": 0.0767, "step": 760 }, { "epoch": 1.1, "learning_rate": 0.0001977488342177199, "loss": 0.3069, "step": 761 }, { "epoch": 1.1, "learning_rate": 0.00019771667470654448, "loss": 0.4237, "step": 762 }, { "epoch": 1.1, "learning_rate": 0.00019768451519536903, "loss": 0.0507, "step": 763 }, { "epoch": 1.1, "learning_rate": 0.0001976523556841936, "loss": 0.395, "step": 764 }, { "epoch": 1.11, "learning_rate": 0.0001976201961730182, "loss": 0.0614, "step": 765 }, { "epoch": 1.11, "learning_rate": 0.00019758803666184276, "loss": 0.3135, "step": 766 }, { "epoch": 1.11, "learning_rate": 0.00019755587715066731, "loss": 0.0808, "step": 767 }, { "epoch": 1.11, "learning_rate": 0.0001975237176394919, "loss": 0.4873, "step": 768 }, { "epoch": 1.11, "learning_rate": 0.00019749155812831646, "loss": 0.0158, "step": 769 }, { "epoch": 1.11, "learning_rate": 0.000197459398617141, "loss": 0.5422, "step": 770 }, { "epoch": 1.11, "learning_rate": 0.0001974272391059656, "loss": 0.1547, "step": 771 }, { "epoch": 1.12, "learning_rate": 0.00019739507959479018, "loss": 0.46, "step": 772 }, { "epoch": 1.12, "learning_rate": 0.00019736292008361474, "loss": 0.208, "step": 773 }, { "epoch": 1.12, "learning_rate": 0.00019733076057243932, "loss": 0.0283, "step": 774 }, { "epoch": 1.12, "learning_rate": 0.00019729860106126388, "loss": 0.038, "step": 775 }, { "epoch": 1.12, "learning_rate": 0.00019726644155008844, "loss": 0.9277, "step": 776 }, { "epoch": 1.12, "learning_rate": 0.00019723428203891302, "loss": 0.0224, "step": 777 }, { "epoch": 1.13, "learning_rate": 0.0001972021225277376, "loss": 0.1313, "step": 778 }, { "epoch": 1.13, "learning_rate": 0.00019716996301656216, "loss": 0.213, "step": 779 }, { "epoch": 1.13, "learning_rate": 0.00019713780350538674, "loss": 0.0822, "step": 780 }, { "epoch": 1.13, "learning_rate": 0.0001971056439942113, "loss": 0.071, "step": 781 }, { "epoch": 1.13, "learning_rate": 0.00019707348448303586, "loss": 0.352, "step": 782 }, { "epoch": 1.13, "learning_rate": 0.00019704132497186044, "loss": 0.189, "step": 783 }, { "epoch": 1.13, "learning_rate": 0.000197009165460685, "loss": 0.7429, "step": 784 }, { "epoch": 1.14, "learning_rate": 0.00019697700594950958, "loss": 0.3916, "step": 785 }, { "epoch": 1.14, "learning_rate": 0.00019694484643833414, "loss": 0.5176, "step": 786 }, { "epoch": 1.14, "learning_rate": 0.00019691268692715872, "loss": 0.3162, "step": 787 }, { "epoch": 1.14, "learning_rate": 0.00019688052741598328, "loss": 0.0395, "step": 788 }, { "epoch": 1.14, "learning_rate": 0.00019684836790480784, "loss": 0.1326, "step": 789 }, { "epoch": 1.14, "learning_rate": 0.00019681620839363242, "loss": 0.1671, "step": 790 }, { "epoch": 1.14, "learning_rate": 0.000196784048882457, "loss": 0.5352, "step": 791 }, { "epoch": 1.15, "learning_rate": 0.00019675188937128156, "loss": 0.0375, "step": 792 }, { "epoch": 1.15, "learning_rate": 0.00019671972986010615, "loss": 0.1194, "step": 793 }, { "epoch": 1.15, "learning_rate": 0.0001966875703489307, "loss": 0.25, "step": 794 }, { "epoch": 1.15, "learning_rate": 0.00019665541083775526, "loss": 0.0228, "step": 795 }, { "epoch": 1.15, "learning_rate": 0.00019662325132657985, "loss": 0.0889, "step": 796 }, { "epoch": 1.15, "learning_rate": 0.0001965910918154044, "loss": 0.1284, "step": 797 }, { "epoch": 1.15, "learning_rate": 0.000196558932304229, "loss": 0.0215, "step": 798 }, { "epoch": 1.16, "learning_rate": 0.00019652677279305357, "loss": 0.5572, "step": 799 }, { "epoch": 1.16, "learning_rate": 0.00019649461328187813, "loss": 0.1245, "step": 800 }, { "epoch": 1.16, "learning_rate": 0.00019646245377070268, "loss": 0.0206, "step": 801 }, { "epoch": 1.16, "learning_rate": 0.00019643029425952727, "loss": 0.0119, "step": 802 }, { "epoch": 1.16, "learning_rate": 0.00019639813474835183, "loss": 0.0217, "step": 803 }, { "epoch": 1.16, "learning_rate": 0.0001963659752371764, "loss": 0.3288, "step": 804 }, { "epoch": 1.16, "learning_rate": 0.000196333815726001, "loss": 0.0081, "step": 805 }, { "epoch": 1.17, "learning_rate": 0.00019630165621482555, "loss": 0.0589, "step": 806 }, { "epoch": 1.17, "learning_rate": 0.0001962694967036501, "loss": 0.126, "step": 807 }, { "epoch": 1.17, "learning_rate": 0.0001962373371924747, "loss": 0.5117, "step": 808 }, { "epoch": 1.17, "learning_rate": 0.00019620517768129925, "loss": 0.1243, "step": 809 }, { "epoch": 1.17, "learning_rate": 0.00019617301817012383, "loss": 0.021, "step": 810 }, { "epoch": 1.17, "learning_rate": 0.0001961408586589484, "loss": 0.7065, "step": 811 }, { "epoch": 1.17, "learning_rate": 0.00019610869914777297, "loss": 0.0422, "step": 812 }, { "epoch": 1.18, "learning_rate": 0.00019607653963659753, "loss": 0.3604, "step": 813 }, { "epoch": 1.18, "learning_rate": 0.0001960443801254221, "loss": 0.0692, "step": 814 }, { "epoch": 1.18, "learning_rate": 0.00019601222061424667, "loss": 0.5098, "step": 815 }, { "epoch": 1.18, "learning_rate": 0.00019598006110307123, "loss": 0.3054, "step": 816 }, { "epoch": 1.18, "learning_rate": 0.0001959479015918958, "loss": 0.3528, "step": 817 }, { "epoch": 1.18, "learning_rate": 0.0001959157420807204, "loss": 0.2984, "step": 818 }, { "epoch": 1.18, "learning_rate": 0.00019588358256954495, "loss": 0.0104, "step": 819 }, { "epoch": 1.19, "learning_rate": 0.0001958514230583695, "loss": 0.2346, "step": 820 }, { "epoch": 1.19, "learning_rate": 0.0001958192635471941, "loss": 0.1403, "step": 821 }, { "epoch": 1.19, "learning_rate": 0.00019578710403601865, "loss": 0.2032, "step": 822 }, { "epoch": 1.19, "learning_rate": 0.00019575494452484324, "loss": 0.3615, "step": 823 }, { "epoch": 1.19, "learning_rate": 0.00019572278501366782, "loss": 0.0606, "step": 824 }, { "epoch": 1.19, "learning_rate": 0.00019569062550249238, "loss": 0.0425, "step": 825 }, { "epoch": 1.19, "learning_rate": 0.00019565846599131693, "loss": 0.0184, "step": 826 }, { "epoch": 1.2, "learning_rate": 0.00019562630648014152, "loss": 0.2004, "step": 827 }, { "epoch": 1.2, "learning_rate": 0.00019559414696896607, "loss": 0.3335, "step": 828 }, { "epoch": 1.2, "learning_rate": 0.00019556198745779063, "loss": 0.2844, "step": 829 }, { "epoch": 1.2, "learning_rate": 0.00019552982794661522, "loss": 0.4512, "step": 830 }, { "epoch": 1.2, "learning_rate": 0.0001954976684354398, "loss": 0.5098, "step": 831 }, { "epoch": 1.2, "learning_rate": 0.00019546550892426436, "loss": 0.1321, "step": 832 }, { "epoch": 1.2, "learning_rate": 0.00019543334941308894, "loss": 0.1355, "step": 833 }, { "epoch": 1.21, "learning_rate": 0.0001954011899019135, "loss": 0.5576, "step": 834 }, { "epoch": 1.21, "learning_rate": 0.00019536903039073805, "loss": 0.4808, "step": 835 }, { "epoch": 1.21, "learning_rate": 0.00019533687087956264, "loss": 0.0499, "step": 836 }, { "epoch": 1.21, "learning_rate": 0.00019530471136838722, "loss": 0.022, "step": 837 }, { "epoch": 1.21, "learning_rate": 0.00019527255185721178, "loss": 0.0367, "step": 838 }, { "epoch": 1.21, "learning_rate": 0.00019524039234603634, "loss": 0.0673, "step": 839 }, { "epoch": 1.21, "learning_rate": 0.00019520823283486092, "loss": 0.0881, "step": 840 }, { "epoch": 1.22, "learning_rate": 0.00019517607332368548, "loss": 0.624, "step": 841 }, { "epoch": 1.22, "learning_rate": 0.00019514391381251006, "loss": 0.0612, "step": 842 }, { "epoch": 1.22, "learning_rate": 0.00019511175430133465, "loss": 0.1339, "step": 843 }, { "epoch": 1.22, "learning_rate": 0.0001950795947901592, "loss": 0.4902, "step": 844 }, { "epoch": 1.22, "learning_rate": 0.00019504743527898376, "loss": 0.0575, "step": 845 }, { "epoch": 1.22, "learning_rate": 0.00019501527576780834, "loss": 0.0995, "step": 846 }, { "epoch": 1.22, "learning_rate": 0.0001949831162566329, "loss": 0.015, "step": 847 }, { "epoch": 1.23, "learning_rate": 0.00019495095674545746, "loss": 0.5713, "step": 848 }, { "epoch": 1.23, "learning_rate": 0.00019491879723428207, "loss": 0.1938, "step": 849 }, { "epoch": 1.23, "learning_rate": 0.00019488663772310663, "loss": 0.1047, "step": 850 }, { "epoch": 1.23, "learning_rate": 0.00019485447821193118, "loss": 0.0495, "step": 851 }, { "epoch": 1.23, "learning_rate": 0.00019482231870075577, "loss": 0.5898, "step": 852 }, { "epoch": 1.23, "learning_rate": 0.00019479015918958032, "loss": 0.786, "step": 853 }, { "epoch": 1.23, "learning_rate": 0.00019475799967840488, "loss": 0.5889, "step": 854 }, { "epoch": 1.24, "learning_rate": 0.00019472584016722947, "loss": 0.6982, "step": 855 }, { "epoch": 1.24, "learning_rate": 0.00019469368065605405, "loss": 0.7012, "step": 856 }, { "epoch": 1.24, "learning_rate": 0.0001946615211448786, "loss": 0.0637, "step": 857 }, { "epoch": 1.24, "learning_rate": 0.0001946293616337032, "loss": 0.0812, "step": 858 }, { "epoch": 1.24, "learning_rate": 0.00019459720212252775, "loss": 0.3584, "step": 859 }, { "epoch": 1.24, "learning_rate": 0.0001945650426113523, "loss": 0.0645, "step": 860 }, { "epoch": 1.25, "learning_rate": 0.0001945328831001769, "loss": 0.0497, "step": 861 }, { "epoch": 1.25, "learning_rate": 0.00019450072358900147, "loss": 0.2715, "step": 862 }, { "epoch": 1.25, "learning_rate": 0.00019446856407782603, "loss": 0.0211, "step": 863 }, { "epoch": 1.25, "learning_rate": 0.00019443640456665059, "loss": 0.1919, "step": 864 }, { "epoch": 1.25, "learning_rate": 0.00019440424505547517, "loss": 0.2264, "step": 865 }, { "epoch": 1.25, "learning_rate": 0.00019437208554429973, "loss": 0.3857, "step": 866 }, { "epoch": 1.25, "learning_rate": 0.00019433992603312428, "loss": 0.3017, "step": 867 }, { "epoch": 1.26, "learning_rate": 0.0001943077665219489, "loss": 0.0172, "step": 868 }, { "epoch": 1.26, "learning_rate": 0.00019427560701077345, "loss": 0.9082, "step": 869 }, { "epoch": 1.26, "learning_rate": 0.000194243447499598, "loss": 0.492, "step": 870 }, { "epoch": 1.26, "learning_rate": 0.0001942112879884226, "loss": 0.0222, "step": 871 }, { "epoch": 1.26, "learning_rate": 0.00019417912847724715, "loss": 0.0169, "step": 872 }, { "epoch": 1.26, "learning_rate": 0.0001941469689660717, "loss": 0.009, "step": 873 }, { "epoch": 1.26, "learning_rate": 0.0001941148094548963, "loss": 0.073, "step": 874 }, { "epoch": 1.27, "learning_rate": 0.00019408264994372088, "loss": 0.0075, "step": 875 }, { "epoch": 1.27, "learning_rate": 0.00019405049043254543, "loss": 0.0551, "step": 876 }, { "epoch": 1.27, "learning_rate": 0.00019401833092137002, "loss": 0.3218, "step": 877 }, { "epoch": 1.27, "learning_rate": 0.00019398617141019457, "loss": 0.3012, "step": 878 }, { "epoch": 1.27, "learning_rate": 0.00019395401189901913, "loss": 0.0089, "step": 879 }, { "epoch": 1.27, "learning_rate": 0.00019392185238784371, "loss": 0.2744, "step": 880 }, { "epoch": 1.27, "learning_rate": 0.0001938896928766683, "loss": 0.1006, "step": 881 }, { "epoch": 1.28, "learning_rate": 0.00019385753336549286, "loss": 0.0089, "step": 882 }, { "epoch": 1.28, "learning_rate": 0.00019382537385431744, "loss": 0.1519, "step": 883 }, { "epoch": 1.28, "learning_rate": 0.000193793214343142, "loss": 0.3051, "step": 884 }, { "epoch": 1.28, "learning_rate": 0.00019376105483196655, "loss": 0.1578, "step": 885 }, { "epoch": 1.28, "learning_rate": 0.00019372889532079114, "loss": 0.7026, "step": 886 }, { "epoch": 1.28, "learning_rate": 0.0001936967358096157, "loss": 0.6621, "step": 887 }, { "epoch": 1.28, "learning_rate": 0.00019366457629844028, "loss": 0.259, "step": 888 }, { "epoch": 1.29, "learning_rate": 0.00019363241678726484, "loss": 0.3672, "step": 889 }, { "epoch": 1.29, "learning_rate": 0.00019360025727608942, "loss": 0.0356, "step": 890 }, { "epoch": 1.29, "learning_rate": 0.00019356809776491398, "loss": 0.0346, "step": 891 }, { "epoch": 1.29, "learning_rate": 0.00019353593825373853, "loss": 0.5264, "step": 892 }, { "epoch": 1.29, "learning_rate": 0.00019350377874256312, "loss": 0.1868, "step": 893 }, { "epoch": 1.29, "learning_rate": 0.0001934716192313877, "loss": 0.2217, "step": 894 }, { "epoch": 1.29, "learning_rate": 0.00019343945972021226, "loss": 0.0532, "step": 895 }, { "epoch": 1.3, "learning_rate": 0.00019340730020903684, "loss": 0.3773, "step": 896 }, { "epoch": 1.3, "learning_rate": 0.0001933751406978614, "loss": 0.6562, "step": 897 }, { "epoch": 1.3, "learning_rate": 0.00019334298118668596, "loss": 0.1519, "step": 898 }, { "epoch": 1.3, "learning_rate": 0.00019331082167551054, "loss": 0.3264, "step": 899 }, { "epoch": 1.3, "learning_rate": 0.0001932786621643351, "loss": 0.2769, "step": 900 }, { "epoch": 1.3, "learning_rate": 0.00019324650265315968, "loss": 0.7891, "step": 901 }, { "epoch": 1.3, "learning_rate": 0.00019321434314198427, "loss": 0.076, "step": 902 }, { "epoch": 1.31, "learning_rate": 0.00019318218363080882, "loss": 0.5869, "step": 903 }, { "epoch": 1.31, "learning_rate": 0.00019315002411963338, "loss": 0.0352, "step": 904 }, { "epoch": 1.31, "learning_rate": 0.00019311786460845796, "loss": 0.5879, "step": 905 }, { "epoch": 1.31, "learning_rate": 0.00019308570509728252, "loss": 0.0486, "step": 906 }, { "epoch": 1.31, "learning_rate": 0.0001930535455861071, "loss": 0.0986, "step": 907 }, { "epoch": 1.31, "learning_rate": 0.0001930213860749317, "loss": 0.0199, "step": 908 }, { "epoch": 1.31, "learning_rate": 0.00019298922656375625, "loss": 0.1453, "step": 909 }, { "epoch": 1.32, "learning_rate": 0.0001929570670525808, "loss": 0.9766, "step": 910 }, { "epoch": 1.32, "learning_rate": 0.0001929249075414054, "loss": 0.2535, "step": 911 }, { "epoch": 1.32, "learning_rate": 0.00019289274803022994, "loss": 0.4243, "step": 912 }, { "epoch": 1.32, "learning_rate": 0.00019286058851905453, "loss": 0.0386, "step": 913 }, { "epoch": 1.32, "learning_rate": 0.00019282842900787908, "loss": 0.4512, "step": 914 }, { "epoch": 1.32, "learning_rate": 0.00019279626949670367, "loss": 0.0309, "step": 915 }, { "epoch": 1.32, "learning_rate": 0.00019276410998552823, "loss": 0.3744, "step": 916 }, { "epoch": 1.33, "learning_rate": 0.00019273195047435278, "loss": 0.09, "step": 917 }, { "epoch": 1.33, "learning_rate": 0.00019269979096317737, "loss": 0.681, "step": 918 }, { "epoch": 1.33, "learning_rate": 0.00019266763145200192, "loss": 0.3566, "step": 919 }, { "epoch": 1.33, "learning_rate": 0.0001926354719408265, "loss": 0.0121, "step": 920 }, { "epoch": 1.33, "learning_rate": 0.0001926033124296511, "loss": 0.47, "step": 921 }, { "epoch": 1.33, "learning_rate": 0.00019257115291847565, "loss": 0.562, "step": 922 }, { "epoch": 1.33, "learning_rate": 0.0001925389934073002, "loss": 0.0125, "step": 923 }, { "epoch": 1.34, "learning_rate": 0.0001925068338961248, "loss": 0.2288, "step": 924 }, { "epoch": 1.34, "learning_rate": 0.00019247467438494935, "loss": 0.0442, "step": 925 }, { "epoch": 1.34, "learning_rate": 0.00019244251487377393, "loss": 0.126, "step": 926 }, { "epoch": 1.34, "learning_rate": 0.00019241035536259851, "loss": 0.0255, "step": 927 }, { "epoch": 1.34, "learning_rate": 0.00019237819585142307, "loss": 0.4468, "step": 928 }, { "epoch": 1.34, "learning_rate": 0.00019234603634024763, "loss": 0.5752, "step": 929 }, { "epoch": 1.34, "learning_rate": 0.0001923138768290722, "loss": 0.8468, "step": 930 }, { "epoch": 1.35, "learning_rate": 0.00019228171731789677, "loss": 0.2637, "step": 931 }, { "epoch": 1.35, "learning_rate": 0.00019224955780672133, "loss": 0.4243, "step": 932 }, { "epoch": 1.35, "learning_rate": 0.00019221739829554594, "loss": 0.2211, "step": 933 }, { "epoch": 1.35, "learning_rate": 0.0001921852387843705, "loss": 0.9297, "step": 934 }, { "epoch": 1.35, "learning_rate": 0.00019215307927319505, "loss": 0.3627, "step": 935 }, { "epoch": 1.35, "learning_rate": 0.00019212091976201964, "loss": 0.7842, "step": 936 }, { "epoch": 1.36, "learning_rate": 0.0001920887602508442, "loss": 0.0265, "step": 937 }, { "epoch": 1.36, "learning_rate": 0.00019205660073966875, "loss": 0.069, "step": 938 }, { "epoch": 1.36, "learning_rate": 0.00019202444122849333, "loss": 0.3796, "step": 939 }, { "epoch": 1.36, "learning_rate": 0.00019199228171731792, "loss": 0.0178, "step": 940 }, { "epoch": 1.36, "learning_rate": 0.00019196012220614248, "loss": 0.1887, "step": 941 }, { "epoch": 1.36, "learning_rate": 0.00019192796269496703, "loss": 0.006, "step": 942 }, { "epoch": 1.36, "learning_rate": 0.00019189580318379162, "loss": 0.1221, "step": 943 }, { "epoch": 1.37, "learning_rate": 0.00019186364367261617, "loss": 0.4207, "step": 944 }, { "epoch": 1.37, "learning_rate": 0.00019183148416144076, "loss": 0.2934, "step": 945 }, { "epoch": 1.37, "learning_rate": 0.00019179932465026534, "loss": 0.295, "step": 946 }, { "epoch": 1.37, "learning_rate": 0.0001917671651390899, "loss": 0.8662, "step": 947 }, { "epoch": 1.37, "learning_rate": 0.00019173500562791446, "loss": 0.2792, "step": 948 }, { "epoch": 1.37, "learning_rate": 0.00019170284611673904, "loss": 0.2147, "step": 949 }, { "epoch": 1.37, "learning_rate": 0.0001916706866055636, "loss": 0.3335, "step": 950 }, { "epoch": 1.38, "learning_rate": 0.00019163852709438815, "loss": 0.0969, "step": 951 }, { "epoch": 1.38, "learning_rate": 0.00019160636758321276, "loss": 0.2739, "step": 952 }, { "epoch": 1.38, "learning_rate": 0.00019157420807203732, "loss": 0.4346, "step": 953 }, { "epoch": 1.38, "learning_rate": 0.00019154204856086188, "loss": 0.0239, "step": 954 }, { "epoch": 1.38, "learning_rate": 0.00019150988904968646, "loss": 0.2833, "step": 955 }, { "epoch": 1.38, "learning_rate": 0.00019147772953851102, "loss": 0.024, "step": 956 }, { "epoch": 1.38, "learning_rate": 0.00019144557002733558, "loss": 0.0626, "step": 957 }, { "epoch": 1.39, "learning_rate": 0.0001914134105161602, "loss": 0.0427, "step": 958 }, { "epoch": 1.39, "learning_rate": 0.00019138125100498474, "loss": 0.353, "step": 959 }, { "epoch": 1.39, "learning_rate": 0.0001913490914938093, "loss": 0.3364, "step": 960 }, { "epoch": 1.39, "learning_rate": 0.00019131693198263389, "loss": 0.3345, "step": 961 }, { "epoch": 1.39, "learning_rate": 0.00019128477247145844, "loss": 0.0337, "step": 962 }, { "epoch": 1.39, "learning_rate": 0.000191252612960283, "loss": 0.0411, "step": 963 }, { "epoch": 1.39, "learning_rate": 0.00019122045344910758, "loss": 0.3207, "step": 964 }, { "epoch": 1.4, "learning_rate": 0.00019118829393793217, "loss": 0.2686, "step": 965 }, { "epoch": 1.4, "learning_rate": 0.00019115613442675672, "loss": 0.2738, "step": 966 }, { "epoch": 1.4, "learning_rate": 0.00019112397491558128, "loss": 0.072, "step": 967 }, { "epoch": 1.4, "learning_rate": 0.00019109181540440587, "loss": 0.2651, "step": 968 }, { "epoch": 1.4, "learning_rate": 0.00019105965589323042, "loss": 0.0262, "step": 969 }, { "epoch": 1.4, "learning_rate": 0.000191027496382055, "loss": 0.2111, "step": 970 }, { "epoch": 1.4, "learning_rate": 0.0001909953368708796, "loss": 0.0267, "step": 971 }, { "epoch": 1.41, "learning_rate": 0.00019096317735970415, "loss": 0.198, "step": 972 }, { "epoch": 1.41, "learning_rate": 0.0001909310178485287, "loss": 0.203, "step": 973 }, { "epoch": 1.41, "learning_rate": 0.0001908988583373533, "loss": 0.0707, "step": 974 }, { "epoch": 1.41, "learning_rate": 0.00019086669882617785, "loss": 0.0169, "step": 975 }, { "epoch": 1.41, "learning_rate": 0.0001908345393150024, "loss": 0.3423, "step": 976 }, { "epoch": 1.41, "learning_rate": 0.00019080237980382699, "loss": 1.1914, "step": 977 }, { "epoch": 1.41, "learning_rate": 0.00019077022029265157, "loss": 0.6133, "step": 978 }, { "epoch": 1.42, "learning_rate": 0.00019073806078147613, "loss": 0.1249, "step": 979 }, { "epoch": 1.42, "learning_rate": 0.0001907059012703007, "loss": 0.9961, "step": 980 }, { "epoch": 1.42, "learning_rate": 0.00019067374175912527, "loss": 0.6514, "step": 981 }, { "epoch": 1.42, "learning_rate": 0.00019064158224794983, "loss": 0.2559, "step": 982 }, { "epoch": 1.42, "learning_rate": 0.0001906094227367744, "loss": 1.457, "step": 983 }, { "epoch": 1.42, "learning_rate": 0.000190577263225599, "loss": 0.0573, "step": 984 }, { "epoch": 1.42, "learning_rate": 0.00019054510371442355, "loss": 0.2492, "step": 985 }, { "epoch": 1.43, "learning_rate": 0.00019051294420324813, "loss": 0.1501, "step": 986 }, { "epoch": 1.43, "learning_rate": 0.0001904807846920727, "loss": 0.308, "step": 987 }, { "epoch": 1.43, "learning_rate": 0.00019044862518089725, "loss": 0.2319, "step": 988 }, { "epoch": 1.43, "learning_rate": 0.00019041646566972183, "loss": 0.0435, "step": 989 }, { "epoch": 1.43, "learning_rate": 0.0001903843061585464, "loss": 0.2747, "step": 990 }, { "epoch": 1.43, "learning_rate": 0.00019035214664737097, "loss": 0.069, "step": 991 }, { "epoch": 1.43, "learning_rate": 0.00019031998713619553, "loss": 0.0928, "step": 992 }, { "epoch": 1.44, "learning_rate": 0.00019028782762502011, "loss": 0.0871, "step": 993 }, { "epoch": 1.44, "learning_rate": 0.00019025566811384467, "loss": 0.2117, "step": 994 }, { "epoch": 1.44, "learning_rate": 0.00019022350860266926, "loss": 0.0742, "step": 995 }, { "epoch": 1.44, "learning_rate": 0.0001901913490914938, "loss": 0.0276, "step": 996 }, { "epoch": 1.44, "learning_rate": 0.0001901591895803184, "loss": 0.0288, "step": 997 }, { "epoch": 1.44, "learning_rate": 0.00019012703006914295, "loss": 0.0482, "step": 998 }, { "epoch": 1.44, "learning_rate": 0.00019009487055796754, "loss": 0.0405, "step": 999 }, { "epoch": 1.45, "learning_rate": 0.0001900627110467921, "loss": 0.4185, "step": 1000 }, { "epoch": 1.45, "learning_rate": 0.00019003055153561665, "loss": 0.052, "step": 1001 }, { "epoch": 1.45, "learning_rate": 0.00018999839202444124, "loss": 0.2536, "step": 1002 }, { "epoch": 1.45, "learning_rate": 0.00018996623251326582, "loss": 0.0666, "step": 1003 }, { "epoch": 1.45, "learning_rate": 0.00018993407300209038, "loss": 0.1761, "step": 1004 }, { "epoch": 1.45, "learning_rate": 0.00018990191349091496, "loss": 0.1955, "step": 1005 }, { "epoch": 1.45, "learning_rate": 0.00018986975397973952, "loss": 0.6914, "step": 1006 }, { "epoch": 1.46, "learning_rate": 0.00018983759446856407, "loss": 0.3228, "step": 1007 }, { "epoch": 1.46, "learning_rate": 0.00018980543495738866, "loss": 0.3789, "step": 1008 }, { "epoch": 1.46, "learning_rate": 0.00018977327544621322, "loss": 0.196, "step": 1009 }, { "epoch": 1.46, "learning_rate": 0.0001897411159350378, "loss": 0.5684, "step": 1010 }, { "epoch": 1.46, "learning_rate": 0.00018970895642386238, "loss": 0.1248, "step": 1011 }, { "epoch": 1.46, "learning_rate": 0.00018967679691268694, "loss": 0.4219, "step": 1012 }, { "epoch": 1.46, "learning_rate": 0.0001896446374015115, "loss": 0.4936, "step": 1013 }, { "epoch": 1.47, "learning_rate": 0.00018961247789033608, "loss": 0.085, "step": 1014 }, { "epoch": 1.47, "learning_rate": 0.00018958031837916064, "loss": 0.7246, "step": 1015 }, { "epoch": 1.47, "learning_rate": 0.00018954815886798522, "loss": 0.1851, "step": 1016 }, { "epoch": 1.47, "learning_rate": 0.00018951599935680978, "loss": 0.6416, "step": 1017 }, { "epoch": 1.47, "learning_rate": 0.00018948383984563436, "loss": 0.2969, "step": 1018 }, { "epoch": 1.47, "learning_rate": 0.00018945168033445892, "loss": 0.0389, "step": 1019 }, { "epoch": 1.48, "learning_rate": 0.0001894195208232835, "loss": 0.7307, "step": 1020 }, { "epoch": 1.48, "learning_rate": 0.00018938736131210806, "loss": 0.5123, "step": 1021 }, { "epoch": 1.48, "learning_rate": 0.00018935520180093262, "loss": 0.6025, "step": 1022 }, { "epoch": 1.48, "learning_rate": 0.0001893230422897572, "loss": 0.136, "step": 1023 }, { "epoch": 1.48, "learning_rate": 0.0001892908827785818, "loss": 0.1534, "step": 1024 }, { "epoch": 1.48, "learning_rate": 0.00018925872326740634, "loss": 0.2471, "step": 1025 }, { "epoch": 1.48, "learning_rate": 0.0001892265637562309, "loss": 0.2319, "step": 1026 }, { "epoch": 1.49, "learning_rate": 0.00018919440424505548, "loss": 0.2034, "step": 1027 }, { "epoch": 1.49, "learning_rate": 0.00018916224473388004, "loss": 0.5246, "step": 1028 }, { "epoch": 1.49, "learning_rate": 0.00018913008522270463, "loss": 0.0864, "step": 1029 }, { "epoch": 1.49, "learning_rate": 0.0001890979257115292, "loss": 0.328, "step": 1030 }, { "epoch": 1.49, "learning_rate": 0.00018906576620035377, "loss": 0.0555, "step": 1031 }, { "epoch": 1.49, "learning_rate": 0.00018903360668917832, "loss": 0.2856, "step": 1032 }, { "epoch": 1.49, "learning_rate": 0.0001890014471780029, "loss": 0.0161, "step": 1033 }, { "epoch": 1.5, "learning_rate": 0.00018896928766682746, "loss": 0.5574, "step": 1034 }, { "epoch": 1.5, "learning_rate": 0.00018893712815565202, "loss": 0.0394, "step": 1035 }, { "epoch": 1.5, "learning_rate": 0.00018890496864447663, "loss": 0.5786, "step": 1036 }, { "epoch": 1.5, "learning_rate": 0.0001888728091333012, "loss": 0.6406, "step": 1037 }, { "epoch": 1.5, "learning_rate": 0.00018884064962212575, "loss": 0.0483, "step": 1038 }, { "epoch": 1.5, "learning_rate": 0.00018880849011095033, "loss": 0.3685, "step": 1039 }, { "epoch": 1.5, "learning_rate": 0.0001887763305997749, "loss": 0.5945, "step": 1040 }, { "epoch": 1.51, "learning_rate": 0.00018874417108859944, "loss": 0.22, "step": 1041 }, { "epoch": 1.51, "learning_rate": 0.00018871201157742403, "loss": 0.24, "step": 1042 }, { "epoch": 1.51, "learning_rate": 0.0001886798520662486, "loss": 0.319, "step": 1043 }, { "epoch": 1.51, "learning_rate": 0.00018864769255507317, "loss": 0.1765, "step": 1044 }, { "epoch": 1.51, "learning_rate": 0.00018861553304389775, "loss": 0.3809, "step": 1045 }, { "epoch": 1.51, "learning_rate": 0.0001885833735327223, "loss": 0.1314, "step": 1046 }, { "epoch": 1.51, "learning_rate": 0.00018855121402154687, "loss": 0.3179, "step": 1047 }, { "epoch": 1.52, "learning_rate": 0.00018851905451037145, "loss": 0.2455, "step": 1048 }, { "epoch": 1.52, "learning_rate": 0.00018848689499919604, "loss": 0.3326, "step": 1049 }, { "epoch": 1.52, "learning_rate": 0.0001884547354880206, "loss": 0.4805, "step": 1050 }, { "epoch": 1.52, "learning_rate": 0.00018842257597684515, "loss": 0.4333, "step": 1051 }, { "epoch": 1.52, "learning_rate": 0.00018839041646566973, "loss": 0.3871, "step": 1052 }, { "epoch": 1.52, "learning_rate": 0.0001883582569544943, "loss": 0.0124, "step": 1053 }, { "epoch": 1.52, "learning_rate": 0.00018832609744331885, "loss": 0.3501, "step": 1054 }, { "epoch": 1.53, "learning_rate": 0.00018829393793214346, "loss": 0.0182, "step": 1055 }, { "epoch": 1.53, "learning_rate": 0.00018826177842096802, "loss": 0.5273, "step": 1056 }, { "epoch": 1.53, "learning_rate": 0.00018822961890979257, "loss": 1.0527, "step": 1057 }, { "epoch": 1.53, "learning_rate": 0.00018819745939861716, "loss": 0.0105, "step": 1058 }, { "epoch": 1.53, "learning_rate": 0.00018816529988744171, "loss": 0.0106, "step": 1059 }, { "epoch": 1.53, "learning_rate": 0.00018813314037626627, "loss": 0.1983, "step": 1060 }, { "epoch": 1.53, "learning_rate": 0.00018810098086509088, "loss": 0.4026, "step": 1061 }, { "epoch": 1.54, "learning_rate": 0.00018806882135391544, "loss": 0.035, "step": 1062 }, { "epoch": 1.54, "learning_rate": 0.00018803666184274, "loss": 0.4431, "step": 1063 }, { "epoch": 1.54, "learning_rate": 0.00018800450233156458, "loss": 0.2834, "step": 1064 }, { "epoch": 1.54, "learning_rate": 0.00018797234282038914, "loss": 0.6074, "step": 1065 }, { "epoch": 1.54, "learning_rate": 0.0001879401833092137, "loss": 0.0211, "step": 1066 }, { "epoch": 1.54, "learning_rate": 0.00018790802379803828, "loss": 0.0605, "step": 1067 }, { "epoch": 1.54, "learning_rate": 0.00018787586428686286, "loss": 0.1231, "step": 1068 }, { "epoch": 1.55, "learning_rate": 0.00018784370477568742, "loss": 0.2887, "step": 1069 }, { "epoch": 1.55, "learning_rate": 0.000187811545264512, "loss": 0.6699, "step": 1070 }, { "epoch": 1.55, "learning_rate": 0.00018777938575333656, "loss": 0.5364, "step": 1071 }, { "epoch": 1.55, "learning_rate": 0.00018774722624216112, "loss": 0.2706, "step": 1072 }, { "epoch": 1.55, "learning_rate": 0.0001877150667309857, "loss": 0.312, "step": 1073 }, { "epoch": 1.55, "learning_rate": 0.00018768290721981029, "loss": 0.5498, "step": 1074 }, { "epoch": 1.55, "learning_rate": 0.00018765074770863484, "loss": 0.7256, "step": 1075 }, { "epoch": 1.56, "learning_rate": 0.0001876185881974594, "loss": 0.1799, "step": 1076 }, { "epoch": 1.56, "learning_rate": 0.00018758642868628398, "loss": 0.1133, "step": 1077 }, { "epoch": 1.56, "learning_rate": 0.00018755426917510854, "loss": 0.0159, "step": 1078 }, { "epoch": 1.56, "learning_rate": 0.0001875221096639331, "loss": 0.3236, "step": 1079 }, { "epoch": 1.56, "learning_rate": 0.00018748995015275768, "loss": 0.1868, "step": 1080 }, { "epoch": 1.56, "learning_rate": 0.00018745779064158227, "loss": 0.6394, "step": 1081 }, { "epoch": 1.56, "learning_rate": 0.00018742563113040682, "loss": 0.498, "step": 1082 }, { "epoch": 1.57, "learning_rate": 0.0001873934716192314, "loss": 0.1713, "step": 1083 }, { "epoch": 1.57, "learning_rate": 0.00018736131210805596, "loss": 0.0169, "step": 1084 }, { "epoch": 1.57, "learning_rate": 0.00018732915259688052, "loss": 0.6252, "step": 1085 }, { "epoch": 1.57, "learning_rate": 0.0001872969930857051, "loss": 0.0166, "step": 1086 }, { "epoch": 1.57, "learning_rate": 0.0001872648335745297, "loss": 0.2162, "step": 1087 }, { "epoch": 1.57, "learning_rate": 0.00018723267406335425, "loss": 0.2617, "step": 1088 }, { "epoch": 1.57, "learning_rate": 0.00018720051455217883, "loss": 0.5088, "step": 1089 }, { "epoch": 1.58, "learning_rate": 0.0001871683550410034, "loss": 0.0519, "step": 1090 }, { "epoch": 1.58, "learning_rate": 0.00018713619552982794, "loss": 0.1472, "step": 1091 }, { "epoch": 1.58, "learning_rate": 0.00018710403601865253, "loss": 0.2544, "step": 1092 }, { "epoch": 1.58, "learning_rate": 0.0001870718765074771, "loss": 0.0144, "step": 1093 }, { "epoch": 1.58, "learning_rate": 0.00018703971699630167, "loss": 0.4844, "step": 1094 }, { "epoch": 1.58, "learning_rate": 0.00018700755748512625, "loss": 0.9346, "step": 1095 }, { "epoch": 1.58, "learning_rate": 0.0001869753979739508, "loss": 0.5234, "step": 1096 }, { "epoch": 1.59, "learning_rate": 0.00018694323846277537, "loss": 0.0341, "step": 1097 }, { "epoch": 1.59, "learning_rate": 0.00018691107895159995, "loss": 0.1488, "step": 1098 }, { "epoch": 1.59, "learning_rate": 0.0001868789194404245, "loss": 0.0436, "step": 1099 }, { "epoch": 1.59, "learning_rate": 0.0001868467599292491, "loss": 0.0294, "step": 1100 }, { "epoch": 1.59, "learning_rate": 0.00018681460041807365, "loss": 0.7031, "step": 1101 }, { "epoch": 1.59, "learning_rate": 0.00018678244090689823, "loss": 0.1275, "step": 1102 }, { "epoch": 1.6, "learning_rate": 0.0001867502813957228, "loss": 0.1259, "step": 1103 }, { "epoch": 1.6, "learning_rate": 0.00018671812188454735, "loss": 0.1619, "step": 1104 }, { "epoch": 1.6, "learning_rate": 0.00018668596237337193, "loss": 0.9824, "step": 1105 }, { "epoch": 1.6, "learning_rate": 0.00018665380286219651, "loss": 0.2413, "step": 1106 }, { "epoch": 1.6, "learning_rate": 0.00018662164335102107, "loss": 0.0363, "step": 1107 }, { "epoch": 1.6, "learning_rate": 0.00018658948383984566, "loss": 0.5703, "step": 1108 }, { "epoch": 1.6, "learning_rate": 0.0001865573243286702, "loss": 0.0309, "step": 1109 }, { "epoch": 1.61, "learning_rate": 0.00018652516481749477, "loss": 0.4009, "step": 1110 }, { "epoch": 1.61, "learning_rate": 0.00018649300530631935, "loss": 0.4575, "step": 1111 }, { "epoch": 1.61, "learning_rate": 0.0001864608457951439, "loss": 0.0128, "step": 1112 }, { "epoch": 1.61, "learning_rate": 0.0001864286862839685, "loss": 0.3362, "step": 1113 }, { "epoch": 1.61, "learning_rate": 0.00018639652677279308, "loss": 0.0386, "step": 1114 }, { "epoch": 1.61, "learning_rate": 0.00018636436726161764, "loss": 0.1039, "step": 1115 }, { "epoch": 1.61, "learning_rate": 0.0001863322077504422, "loss": 1.1211, "step": 1116 }, { "epoch": 1.62, "learning_rate": 0.00018630004823926678, "loss": 0.3276, "step": 1117 }, { "epoch": 1.62, "learning_rate": 0.00018626788872809133, "loss": 0.0742, "step": 1118 }, { "epoch": 1.62, "learning_rate": 0.00018623572921691592, "loss": 0.2123, "step": 1119 }, { "epoch": 1.62, "learning_rate": 0.00018620356970574047, "loss": 0.3115, "step": 1120 }, { "epoch": 1.62, "learning_rate": 0.00018617141019456506, "loss": 0.0569, "step": 1121 }, { "epoch": 1.62, "learning_rate": 0.00018613925068338962, "loss": 0.3574, "step": 1122 }, { "epoch": 1.62, "learning_rate": 0.0001861070911722142, "loss": 0.0503, "step": 1123 }, { "epoch": 1.63, "learning_rate": 0.00018607493166103876, "loss": 0.2169, "step": 1124 }, { "epoch": 1.63, "learning_rate": 0.00018604277214986331, "loss": 0.4248, "step": 1125 }, { "epoch": 1.63, "learning_rate": 0.0001860106126386879, "loss": 0.5635, "step": 1126 }, { "epoch": 1.63, "learning_rate": 0.00018597845312751248, "loss": 0.4785, "step": 1127 }, { "epoch": 1.63, "learning_rate": 0.00018594629361633704, "loss": 0.0535, "step": 1128 }, { "epoch": 1.63, "learning_rate": 0.0001859141341051616, "loss": 0.2972, "step": 1129 }, { "epoch": 1.63, "learning_rate": 0.00018588197459398618, "loss": 0.2052, "step": 1130 }, { "epoch": 1.64, "learning_rate": 0.00018584981508281074, "loss": 0.0143, "step": 1131 }, { "epoch": 1.64, "learning_rate": 0.00018581765557163532, "loss": 0.3059, "step": 1132 }, { "epoch": 1.64, "learning_rate": 0.0001857854960604599, "loss": 0.0747, "step": 1133 }, { "epoch": 1.64, "learning_rate": 0.00018575333654928446, "loss": 0.3878, "step": 1134 }, { "epoch": 1.64, "learning_rate": 0.00018572117703810902, "loss": 0.4272, "step": 1135 }, { "epoch": 1.64, "learning_rate": 0.0001856890175269336, "loss": 0.041, "step": 1136 }, { "epoch": 1.64, "learning_rate": 0.00018565685801575816, "loss": 0.4455, "step": 1137 }, { "epoch": 1.65, "learning_rate": 0.00018562469850458274, "loss": 0.6536, "step": 1138 }, { "epoch": 1.65, "learning_rate": 0.00018559253899340733, "loss": 0.4277, "step": 1139 }, { "epoch": 1.65, "learning_rate": 0.00018556037948223188, "loss": 0.093, "step": 1140 }, { "epoch": 1.65, "learning_rate": 0.00018552821997105644, "loss": 0.2984, "step": 1141 }, { "epoch": 1.65, "learning_rate": 0.00018549606045988103, "loss": 0.0369, "step": 1142 }, { "epoch": 1.65, "learning_rate": 0.00018546390094870558, "loss": 0.0335, "step": 1143 }, { "epoch": 1.65, "learning_rate": 0.00018543174143753014, "loss": 0.3027, "step": 1144 }, { "epoch": 1.66, "learning_rate": 0.00018539958192635472, "loss": 0.4346, "step": 1145 }, { "epoch": 1.66, "learning_rate": 0.0001853674224151793, "loss": 0.3199, "step": 1146 }, { "epoch": 1.66, "learning_rate": 0.00018533526290400387, "loss": 0.2645, "step": 1147 }, { "epoch": 1.66, "learning_rate": 0.00018530310339282845, "loss": 0.5078, "step": 1148 }, { "epoch": 1.66, "learning_rate": 0.000185270943881653, "loss": 0.0616, "step": 1149 }, { "epoch": 1.66, "learning_rate": 0.00018523878437047756, "loss": 0.3135, "step": 1150 }, { "epoch": 1.66, "learning_rate": 0.00018520662485930215, "loss": 0.0333, "step": 1151 }, { "epoch": 1.67, "learning_rate": 0.00018517446534812673, "loss": 0.0086, "step": 1152 }, { "epoch": 1.67, "learning_rate": 0.0001851423058369513, "loss": 0.0484, "step": 1153 }, { "epoch": 1.67, "learning_rate": 0.00018511014632577585, "loss": 0.1115, "step": 1154 }, { "epoch": 1.67, "learning_rate": 0.00018507798681460043, "loss": 0.7109, "step": 1155 }, { "epoch": 1.67, "learning_rate": 0.00018504582730342499, "loss": 0.0742, "step": 1156 }, { "epoch": 1.67, "learning_rate": 0.00018501366779224954, "loss": 0.4846, "step": 1157 }, { "epoch": 1.67, "learning_rate": 0.00018498150828107415, "loss": 0.5688, "step": 1158 }, { "epoch": 1.68, "learning_rate": 0.0001849493487698987, "loss": 0.0156, "step": 1159 }, { "epoch": 1.68, "learning_rate": 0.00018491718925872327, "loss": 0.8037, "step": 1160 }, { "epoch": 1.68, "learning_rate": 0.00018488502974754785, "loss": 0.1136, "step": 1161 }, { "epoch": 1.68, "learning_rate": 0.0001848528702363724, "loss": 0.0808, "step": 1162 }, { "epoch": 1.68, "learning_rate": 0.00018482071072519697, "loss": 0.0624, "step": 1163 }, { "epoch": 1.68, "learning_rate": 0.00018478855121402158, "loss": 0.0318, "step": 1164 }, { "epoch": 1.68, "learning_rate": 0.00018475639170284613, "loss": 0.0629, "step": 1165 }, { "epoch": 1.69, "learning_rate": 0.0001847242321916707, "loss": 0.0481, "step": 1166 }, { "epoch": 1.69, "learning_rate": 0.00018469207268049528, "loss": 0.2267, "step": 1167 }, { "epoch": 1.69, "learning_rate": 0.00018465991316931983, "loss": 1.2656, "step": 1168 }, { "epoch": 1.69, "learning_rate": 0.0001846277536581444, "loss": 0.0562, "step": 1169 }, { "epoch": 1.69, "learning_rate": 0.00018459559414696897, "loss": 0.2952, "step": 1170 }, { "epoch": 1.69, "learning_rate": 0.00018456343463579356, "loss": 0.2499, "step": 1171 }, { "epoch": 1.69, "learning_rate": 0.00018453127512461811, "loss": 0.4171, "step": 1172 }, { "epoch": 1.7, "learning_rate": 0.0001844991156134427, "loss": 0.1648, "step": 1173 }, { "epoch": 1.7, "learning_rate": 0.00018446695610226726, "loss": 0.1066, "step": 1174 }, { "epoch": 1.7, "learning_rate": 0.0001844347965910918, "loss": 0.0715, "step": 1175 }, { "epoch": 1.7, "learning_rate": 0.0001844026370799164, "loss": 0.2234, "step": 1176 }, { "epoch": 1.7, "learning_rate": 0.00018437047756874098, "loss": 0.6753, "step": 1177 }, { "epoch": 1.7, "learning_rate": 0.00018433831805756554, "loss": 0.012, "step": 1178 }, { "epoch": 1.7, "learning_rate": 0.0001843061585463901, "loss": 0.2018, "step": 1179 }, { "epoch": 1.71, "learning_rate": 0.00018427399903521468, "loss": 0.0097, "step": 1180 }, { "epoch": 1.71, "learning_rate": 0.00018424183952403924, "loss": 0.8384, "step": 1181 }, { "epoch": 1.71, "learning_rate": 0.0001842096800128638, "loss": 0.2432, "step": 1182 }, { "epoch": 1.71, "learning_rate": 0.0001841775205016884, "loss": 0.583, "step": 1183 }, { "epoch": 1.71, "learning_rate": 0.00018414536099051296, "loss": 0.0136, "step": 1184 }, { "epoch": 1.71, "learning_rate": 0.00018411320147933752, "loss": 0.2087, "step": 1185 }, { "epoch": 1.72, "learning_rate": 0.0001840810419681621, "loss": 0.6973, "step": 1186 }, { "epoch": 1.72, "learning_rate": 0.00018404888245698666, "loss": 0.3066, "step": 1187 }, { "epoch": 1.72, "learning_rate": 0.00018401672294581122, "loss": 0.3223, "step": 1188 }, { "epoch": 1.72, "learning_rate": 0.0001839845634346358, "loss": 0.1769, "step": 1189 }, { "epoch": 1.72, "learning_rate": 0.00018395240392346038, "loss": 0.2669, "step": 1190 }, { "epoch": 1.72, "learning_rate": 0.00018392024441228494, "loss": 0.0819, "step": 1191 }, { "epoch": 1.72, "learning_rate": 0.00018388808490110952, "loss": 0.2836, "step": 1192 }, { "epoch": 1.73, "learning_rate": 0.00018385592538993408, "loss": 0.219, "step": 1193 }, { "epoch": 1.73, "learning_rate": 0.00018382376587875864, "loss": 0.5264, "step": 1194 }, { "epoch": 1.73, "learning_rate": 0.00018379160636758322, "loss": 0.0989, "step": 1195 }, { "epoch": 1.73, "learning_rate": 0.0001837594468564078, "loss": 0.3771, "step": 1196 }, { "epoch": 1.73, "learning_rate": 0.00018372728734523236, "loss": 0.1754, "step": 1197 }, { "epoch": 1.73, "learning_rate": 0.00018369512783405695, "loss": 0.0994, "step": 1198 }, { "epoch": 1.73, "learning_rate": 0.0001836629683228815, "loss": 0.0092, "step": 1199 }, { "epoch": 1.74, "learning_rate": 0.00018363080881170606, "loss": 0.75, "step": 1200 }, { "epoch": 1.74, "learning_rate": 0.00018359864930053065, "loss": 0.2956, "step": 1201 }, { "epoch": 1.74, "learning_rate": 0.0001835664897893552, "loss": 0.0122, "step": 1202 }, { "epoch": 1.74, "learning_rate": 0.0001835343302781798, "loss": 0.4326, "step": 1203 }, { "epoch": 1.74, "learning_rate": 0.00018350217076700434, "loss": 0.228, "step": 1204 }, { "epoch": 1.74, "learning_rate": 0.00018347001125582893, "loss": 0.0306, "step": 1205 }, { "epoch": 1.74, "learning_rate": 0.00018343785174465348, "loss": 0.0407, "step": 1206 }, { "epoch": 1.75, "learning_rate": 0.00018340569223347804, "loss": 0.1704, "step": 1207 }, { "epoch": 1.75, "learning_rate": 0.00018337353272230263, "loss": 0.5367, "step": 1208 }, { "epoch": 1.75, "learning_rate": 0.0001833413732111272, "loss": 0.1538, "step": 1209 }, { "epoch": 1.75, "learning_rate": 0.00018330921369995177, "loss": 0.2784, "step": 1210 }, { "epoch": 1.75, "learning_rate": 0.00018327705418877635, "loss": 0.0811, "step": 1211 }, { "epoch": 1.75, "learning_rate": 0.0001832448946776009, "loss": 0.0466, "step": 1212 }, { "epoch": 1.75, "learning_rate": 0.00018321273516642546, "loss": 0.0638, "step": 1213 }, { "epoch": 1.76, "learning_rate": 0.00018318057565525005, "loss": 0.0652, "step": 1214 }, { "epoch": 1.76, "learning_rate": 0.0001831484161440746, "loss": 0.5955, "step": 1215 }, { "epoch": 1.76, "learning_rate": 0.0001831162566328992, "loss": 0.4305, "step": 1216 }, { "epoch": 1.76, "learning_rate": 0.00018308409712172377, "loss": 0.4907, "step": 1217 }, { "epoch": 1.76, "learning_rate": 0.00018305193761054833, "loss": 0.0278, "step": 1218 }, { "epoch": 1.76, "learning_rate": 0.0001830197780993729, "loss": 0.0686, "step": 1219 }, { "epoch": 1.76, "learning_rate": 0.00018298761858819747, "loss": 0.3708, "step": 1220 }, { "epoch": 1.77, "learning_rate": 0.00018295545907702203, "loss": 0.4863, "step": 1221 }, { "epoch": 1.77, "learning_rate": 0.0001829232995658466, "loss": 0.2507, "step": 1222 }, { "epoch": 1.77, "learning_rate": 0.0001828911400546712, "loss": 0.1841, "step": 1223 }, { "epoch": 1.77, "learning_rate": 0.00018285898054349575, "loss": 0.8984, "step": 1224 }, { "epoch": 1.77, "learning_rate": 0.0001828268210323203, "loss": 0.6284, "step": 1225 }, { "epoch": 1.77, "learning_rate": 0.0001827946615211449, "loss": 0.3675, "step": 1226 }, { "epoch": 1.77, "learning_rate": 0.00018276250200996945, "loss": 0.5925, "step": 1227 }, { "epoch": 1.78, "learning_rate": 0.000182730342498794, "loss": 0.4229, "step": 1228 }, { "epoch": 1.78, "learning_rate": 0.0001826981829876186, "loss": 0.0353, "step": 1229 }, { "epoch": 1.78, "learning_rate": 0.00018266602347644318, "loss": 0.0444, "step": 1230 }, { "epoch": 1.78, "learning_rate": 0.00018263386396526773, "loss": 0.2043, "step": 1231 }, { "epoch": 1.78, "learning_rate": 0.0001826017044540923, "loss": 0.5171, "step": 1232 }, { "epoch": 1.78, "learning_rate": 0.00018256954494291687, "loss": 0.3355, "step": 1233 }, { "epoch": 1.78, "learning_rate": 0.00018253738543174143, "loss": 0.7305, "step": 1234 }, { "epoch": 1.79, "learning_rate": 0.00018250522592056602, "loss": 0.0635, "step": 1235 }, { "epoch": 1.79, "learning_rate": 0.0001824730664093906, "loss": 0.0251, "step": 1236 }, { "epoch": 1.79, "learning_rate": 0.00018244090689821516, "loss": 0.1512, "step": 1237 }, { "epoch": 1.79, "learning_rate": 0.00018240874738703971, "loss": 0.0754, "step": 1238 }, { "epoch": 1.79, "learning_rate": 0.0001823765878758643, "loss": 0.1655, "step": 1239 }, { "epoch": 1.79, "learning_rate": 0.00018234442836468885, "loss": 0.3105, "step": 1240 }, { "epoch": 1.79, "learning_rate": 0.00018231226885351344, "loss": 0.1657, "step": 1241 }, { "epoch": 1.8, "learning_rate": 0.00018228010934233802, "loss": 0.0334, "step": 1242 }, { "epoch": 1.8, "learning_rate": 0.00018224794983116258, "loss": 0.2773, "step": 1243 }, { "epoch": 1.8, "learning_rate": 0.00018221579031998714, "loss": 0.3327, "step": 1244 }, { "epoch": 1.8, "learning_rate": 0.00018218363080881172, "loss": 0.4805, "step": 1245 }, { "epoch": 1.8, "learning_rate": 0.00018215147129763628, "loss": 0.323, "step": 1246 }, { "epoch": 1.8, "learning_rate": 0.00018211931178646083, "loss": 0.0529, "step": 1247 }, { "epoch": 1.8, "learning_rate": 0.00018208715227528545, "loss": 0.0443, "step": 1248 }, { "epoch": 1.81, "learning_rate": 0.00018205499276411, "loss": 0.3828, "step": 1249 }, { "epoch": 1.81, "learning_rate": 0.00018202283325293456, "loss": 0.6518, "step": 1250 }, { "epoch": 1.81, "learning_rate": 0.00018199067374175914, "loss": 0.7816, "step": 1251 }, { "epoch": 1.81, "learning_rate": 0.0001819585142305837, "loss": 0.2339, "step": 1252 }, { "epoch": 1.81, "learning_rate": 0.00018192635471940826, "loss": 0.2413, "step": 1253 }, { "epoch": 1.81, "learning_rate": 0.00018189419520823284, "loss": 0.2589, "step": 1254 }, { "epoch": 1.81, "learning_rate": 0.00018186203569705743, "loss": 0.2373, "step": 1255 }, { "epoch": 1.82, "learning_rate": 0.00018182987618588198, "loss": 0.1313, "step": 1256 }, { "epoch": 1.82, "learning_rate": 0.00018179771667470654, "loss": 1.1133, "step": 1257 }, { "epoch": 1.82, "learning_rate": 0.00018176555716353112, "loss": 0.0735, "step": 1258 }, { "epoch": 1.82, "learning_rate": 0.00018173339765235568, "loss": 0.0523, "step": 1259 }, { "epoch": 1.82, "learning_rate": 0.00018170123814118027, "loss": 0.1436, "step": 1260 }, { "epoch": 1.82, "learning_rate": 0.00018166907863000485, "loss": 0.1799, "step": 1261 }, { "epoch": 1.83, "learning_rate": 0.0001816369191188294, "loss": 0.7617, "step": 1262 }, { "epoch": 1.83, "learning_rate": 0.00018160475960765396, "loss": 0.0324, "step": 1263 }, { "epoch": 1.83, "learning_rate": 0.00018157260009647855, "loss": 0.1908, "step": 1264 }, { "epoch": 1.83, "learning_rate": 0.0001815404405853031, "loss": 0.3379, "step": 1265 }, { "epoch": 1.83, "learning_rate": 0.00018150828107412766, "loss": 0.0172, "step": 1266 }, { "epoch": 1.83, "learning_rate": 0.00018147612156295227, "loss": 0.0453, "step": 1267 }, { "epoch": 1.83, "learning_rate": 0.00018144396205177683, "loss": 0.1499, "step": 1268 }, { "epoch": 1.84, "learning_rate": 0.00018141180254060139, "loss": 0.0984, "step": 1269 }, { "epoch": 1.84, "learning_rate": 0.00018137964302942597, "loss": 0.4248, "step": 1270 }, { "epoch": 1.84, "learning_rate": 0.00018134748351825053, "loss": 0.5939, "step": 1271 }, { "epoch": 1.84, "learning_rate": 0.00018131532400707508, "loss": 0.1431, "step": 1272 }, { "epoch": 1.84, "learning_rate": 0.00018128316449589967, "loss": 0.458, "step": 1273 }, { "epoch": 1.84, "learning_rate": 0.00018125100498472425, "loss": 0.5557, "step": 1274 }, { "epoch": 1.84, "learning_rate": 0.0001812188454735488, "loss": 0.2148, "step": 1275 }, { "epoch": 1.85, "learning_rate": 0.0001811866859623734, "loss": 0.046, "step": 1276 }, { "epoch": 1.85, "learning_rate": 0.00018115452645119795, "loss": 0.7124, "step": 1277 }, { "epoch": 1.85, "learning_rate": 0.0001811223669400225, "loss": 0.0284, "step": 1278 }, { "epoch": 1.85, "learning_rate": 0.0001810902074288471, "loss": 0.3447, "step": 1279 }, { "epoch": 1.85, "learning_rate": 0.00018105804791767168, "loss": 0.0402, "step": 1280 }, { "epoch": 1.85, "learning_rate": 0.00018102588840649623, "loss": 0.0349, "step": 1281 }, { "epoch": 1.85, "learning_rate": 0.0001809937288953208, "loss": 0.0183, "step": 1282 }, { "epoch": 1.86, "learning_rate": 0.00018096156938414537, "loss": 0.0629, "step": 1283 }, { "epoch": 1.86, "learning_rate": 0.00018092940987296993, "loss": 0.0102, "step": 1284 }, { "epoch": 1.86, "learning_rate": 0.00018089725036179451, "loss": 0.4517, "step": 1285 }, { "epoch": 1.86, "learning_rate": 0.0001808650908506191, "loss": 0.998, "step": 1286 }, { "epoch": 1.86, "learning_rate": 0.00018083293133944366, "loss": 0.2278, "step": 1287 }, { "epoch": 1.86, "learning_rate": 0.0001808007718282682, "loss": 0.1328, "step": 1288 }, { "epoch": 1.86, "learning_rate": 0.0001807686123170928, "loss": 0.7578, "step": 1289 }, { "epoch": 1.87, "learning_rate": 0.00018073645280591735, "loss": 0.0531, "step": 1290 }, { "epoch": 1.87, "learning_rate": 0.0001807042932947419, "loss": 0.4336, "step": 1291 }, { "epoch": 1.87, "learning_rate": 0.0001806721337835665, "loss": 0.2167, "step": 1292 }, { "epoch": 1.87, "learning_rate": 0.00018063997427239108, "loss": 0.2501, "step": 1293 }, { "epoch": 1.87, "learning_rate": 0.00018060781476121564, "loss": 0.4304, "step": 1294 }, { "epoch": 1.87, "learning_rate": 0.00018057565525004022, "loss": 0.8184, "step": 1295 }, { "epoch": 1.87, "learning_rate": 0.00018054349573886478, "loss": 0.0942, "step": 1296 }, { "epoch": 1.88, "learning_rate": 0.00018051133622768933, "loss": 1.062, "step": 1297 }, { "epoch": 1.88, "learning_rate": 0.00018047917671651392, "loss": 0.0394, "step": 1298 }, { "epoch": 1.88, "learning_rate": 0.0001804470172053385, "loss": 0.0308, "step": 1299 }, { "epoch": 1.88, "learning_rate": 0.00018041485769416306, "loss": 0.1382, "step": 1300 }, { "epoch": 1.88, "learning_rate": 0.00018038269818298764, "loss": 0.2758, "step": 1301 }, { "epoch": 1.88, "learning_rate": 0.0001803505386718122, "loss": 0.2295, "step": 1302 }, { "epoch": 1.88, "learning_rate": 0.00018031837916063676, "loss": 0.0132, "step": 1303 }, { "epoch": 1.89, "learning_rate": 0.00018028621964946134, "loss": 0.2671, "step": 1304 }, { "epoch": 1.89, "learning_rate": 0.0001802540601382859, "loss": 0.2633, "step": 1305 }, { "epoch": 1.89, "learning_rate": 0.00018022190062711048, "loss": 0.0056, "step": 1306 }, { "epoch": 1.89, "learning_rate": 0.00018018974111593504, "loss": 0.1809, "step": 1307 }, { "epoch": 1.89, "learning_rate": 0.00018015758160475962, "loss": 0.6835, "step": 1308 }, { "epoch": 1.89, "learning_rate": 0.00018012542209358418, "loss": 0.4326, "step": 1309 }, { "epoch": 1.89, "learning_rate": 0.00018009326258240876, "loss": 0.2063, "step": 1310 }, { "epoch": 1.9, "learning_rate": 0.00018006110307123332, "loss": 0.7178, "step": 1311 }, { "epoch": 1.9, "learning_rate": 0.0001800289435600579, "loss": 0.0146, "step": 1312 }, { "epoch": 1.9, "learning_rate": 0.00017999678404888246, "loss": 0.041, "step": 1313 }, { "epoch": 1.9, "learning_rate": 0.00017996462453770705, "loss": 0.5034, "step": 1314 }, { "epoch": 1.9, "learning_rate": 0.0001799324650265316, "loss": 0.7383, "step": 1315 }, { "epoch": 1.9, "learning_rate": 0.00017990030551535616, "loss": 0.0416, "step": 1316 }, { "epoch": 1.9, "learning_rate": 0.00017986814600418074, "loss": 0.5586, "step": 1317 }, { "epoch": 1.91, "learning_rate": 0.0001798359864930053, "loss": 0.3043, "step": 1318 }, { "epoch": 1.91, "learning_rate": 0.00017980382698182988, "loss": 0.2427, "step": 1319 }, { "epoch": 1.91, "learning_rate": 0.00017977166747065447, "loss": 0.4307, "step": 1320 }, { "epoch": 1.91, "learning_rate": 0.00017973950795947903, "loss": 0.0485, "step": 1321 }, { "epoch": 1.91, "learning_rate": 0.00017970734844830358, "loss": 0.7734, "step": 1322 }, { "epoch": 1.91, "learning_rate": 0.00017967518893712817, "loss": 0.0746, "step": 1323 }, { "epoch": 1.91, "learning_rate": 0.00017964302942595272, "loss": 0.0929, "step": 1324 }, { "epoch": 1.92, "learning_rate": 0.0001796108699147773, "loss": 0.6484, "step": 1325 }, { "epoch": 1.92, "learning_rate": 0.0001795787104036019, "loss": 0.365, "step": 1326 }, { "epoch": 1.92, "learning_rate": 0.00017954655089242645, "loss": 0.0298, "step": 1327 }, { "epoch": 1.92, "learning_rate": 0.000179514391381251, "loss": 0.2339, "step": 1328 }, { "epoch": 1.92, "learning_rate": 0.0001794822318700756, "loss": 0.0439, "step": 1329 }, { "epoch": 1.92, "learning_rate": 0.00017945007235890015, "loss": 0.477, "step": 1330 }, { "epoch": 1.92, "learning_rate": 0.00017941791284772473, "loss": 0.0862, "step": 1331 }, { "epoch": 1.93, "learning_rate": 0.0001793857533365493, "loss": 0.1543, "step": 1332 }, { "epoch": 1.93, "learning_rate": 0.00017935359382537387, "loss": 0.6621, "step": 1333 }, { "epoch": 1.93, "learning_rate": 0.00017932143431419843, "loss": 0.0511, "step": 1334 }, { "epoch": 1.93, "learning_rate": 0.000179289274803023, "loss": 0.0583, "step": 1335 }, { "epoch": 1.93, "learning_rate": 0.00017925711529184757, "loss": 0.0653, "step": 1336 }, { "epoch": 1.93, "learning_rate": 0.00017922495578067213, "loss": 0.3071, "step": 1337 }, { "epoch": 1.93, "learning_rate": 0.0001791927962694967, "loss": 0.0223, "step": 1338 }, { "epoch": 1.94, "learning_rate": 0.0001791606367583213, "loss": 0.022, "step": 1339 }, { "epoch": 1.94, "learning_rate": 0.00017912847724714585, "loss": 0.4209, "step": 1340 }, { "epoch": 1.94, "learning_rate": 0.0001790963177359704, "loss": 0.351, "step": 1341 }, { "epoch": 1.94, "learning_rate": 0.000179064158224795, "loss": 0.019, "step": 1342 }, { "epoch": 1.94, "learning_rate": 0.00017903199871361955, "loss": 0.4434, "step": 1343 }, { "epoch": 1.94, "learning_rate": 0.00017899983920244413, "loss": 0.1196, "step": 1344 }, { "epoch": 1.95, "learning_rate": 0.00017896767969126872, "loss": 0.0281, "step": 1345 }, { "epoch": 1.95, "learning_rate": 0.00017893552018009327, "loss": 0.025, "step": 1346 }, { "epoch": 1.95, "learning_rate": 0.00017890336066891783, "loss": 0.2275, "step": 1347 }, { "epoch": 1.95, "learning_rate": 0.00017887120115774242, "loss": 0.091, "step": 1348 }, { "epoch": 1.95, "learning_rate": 0.00017883904164656697, "loss": 0.2425, "step": 1349 }, { "epoch": 1.95, "learning_rate": 0.00017880688213539153, "loss": 0.0718, "step": 1350 }, { "epoch": 1.95, "learning_rate": 0.00017877472262421614, "loss": 0.0259, "step": 1351 }, { "epoch": 1.96, "learning_rate": 0.0001787425631130407, "loss": 0.3746, "step": 1352 }, { "epoch": 1.96, "learning_rate": 0.00017871040360186525, "loss": 0.1061, "step": 1353 }, { "epoch": 1.96, "learning_rate": 0.00017867824409068984, "loss": 0.6211, "step": 1354 }, { "epoch": 1.96, "learning_rate": 0.0001786460845795144, "loss": 0.1288, "step": 1355 }, { "epoch": 1.96, "learning_rate": 0.00017861392506833895, "loss": 0.25, "step": 1356 }, { "epoch": 1.96, "learning_rate": 0.00017858176555716354, "loss": 0.1924, "step": 1357 }, { "epoch": 1.96, "learning_rate": 0.00017854960604598812, "loss": 0.0574, "step": 1358 }, { "epoch": 1.97, "learning_rate": 0.00017851744653481268, "loss": 0.0112, "step": 1359 }, { "epoch": 1.97, "learning_rate": 0.00017848528702363726, "loss": 0.0092, "step": 1360 }, { "epoch": 1.97, "learning_rate": 0.00017845312751246182, "loss": 0.4004, "step": 1361 }, { "epoch": 1.97, "learning_rate": 0.00017842096800128638, "loss": 0.3518, "step": 1362 }, { "epoch": 1.97, "learning_rate": 0.00017838880849011096, "loss": 0.0601, "step": 1363 }, { "epoch": 1.97, "learning_rate": 0.00017835664897893554, "loss": 0.1061, "step": 1364 }, { "epoch": 1.97, "learning_rate": 0.0001783244894677601, "loss": 0.4639, "step": 1365 }, { "epoch": 1.98, "learning_rate": 0.00017829232995658466, "loss": 0.2524, "step": 1366 }, { "epoch": 1.98, "learning_rate": 0.00017826017044540924, "loss": 0.0637, "step": 1367 }, { "epoch": 1.98, "learning_rate": 0.0001782280109342338, "loss": 0.0394, "step": 1368 }, { "epoch": 1.98, "learning_rate": 0.00017819585142305836, "loss": 0.0407, "step": 1369 }, { "epoch": 1.98, "learning_rate": 0.00017816369191188297, "loss": 0.7366, "step": 1370 }, { "epoch": 1.98, "learning_rate": 0.00017813153240070752, "loss": 0.2137, "step": 1371 }, { "epoch": 1.98, "learning_rate": 0.00017809937288953208, "loss": 0.3064, "step": 1372 }, { "epoch": 1.99, "learning_rate": 0.00017806721337835667, "loss": 0.0546, "step": 1373 }, { "epoch": 1.99, "learning_rate": 0.00017803505386718122, "loss": 0.457, "step": 1374 }, { "epoch": 1.99, "learning_rate": 0.00017800289435600578, "loss": 0.2714, "step": 1375 }, { "epoch": 1.99, "learning_rate": 0.0001779707348448304, "loss": 0.0291, "step": 1376 }, { "epoch": 1.99, "learning_rate": 0.00017793857533365495, "loss": 0.0305, "step": 1377 }, { "epoch": 1.99, "learning_rate": 0.0001779064158224795, "loss": 1.0703, "step": 1378 }, { "epoch": 1.99, "learning_rate": 0.0001778742563113041, "loss": 0.3367, "step": 1379 }, { "epoch": 2.0, "learning_rate": 0.00017784209680012865, "loss": 0.0811, "step": 1380 }, { "epoch": 2.0, "learning_rate": 0.0001778099372889532, "loss": 0.0503, "step": 1381 }, { "epoch": 2.0, "learning_rate": 0.00017777777777777779, "loss": 0.3013, "step": 1382 }, { "epoch": 2.0, "learning_rate": 0.00017774561826660237, "loss": 0.4683, "step": 1383 }, { "epoch": 2.0, "learning_rate": 0.00017771345875542693, "loss": 0.1169, "step": 1384 }, { "epoch": 2.0, "learning_rate": 0.0001776812992442515, "loss": 0.333, "step": 1385 }, { "epoch": 2.0, "learning_rate": 0.00017764913973307607, "loss": 0.0984, "step": 1386 }, { "epoch": 2.01, "learning_rate": 0.00017761698022190063, "loss": 0.209, "step": 1387 }, { "epoch": 2.01, "learning_rate": 0.0001775848207107252, "loss": 0.0814, "step": 1388 }, { "epoch": 2.01, "learning_rate": 0.0001775526611995498, "loss": 0.4619, "step": 1389 }, { "epoch": 2.01, "learning_rate": 0.00017752050168837435, "loss": 0.2397, "step": 1390 }, { "epoch": 2.01, "learning_rate": 0.0001774883421771989, "loss": 0.1909, "step": 1391 }, { "epoch": 2.01, "learning_rate": 0.0001774561826660235, "loss": 0.0117, "step": 1392 }, { "epoch": 2.01, "learning_rate": 0.00017742402315484805, "loss": 0.0206, "step": 1393 }, { "epoch": 2.02, "learning_rate": 0.0001773918636436726, "loss": 0.178, "step": 1394 }, { "epoch": 2.02, "learning_rate": 0.0001773597041324972, "loss": 0.1544, "step": 1395 }, { "epoch": 2.02, "learning_rate": 0.00017732754462132177, "loss": 0.5376, "step": 1396 }, { "epoch": 2.02, "learning_rate": 0.00017729538511014633, "loss": 0.232, "step": 1397 }, { "epoch": 2.02, "learning_rate": 0.00017726322559897091, "loss": 0.3546, "step": 1398 }, { "epoch": 2.02, "learning_rate": 0.00017723106608779547, "loss": 0.4072, "step": 1399 }, { "epoch": 2.02, "learning_rate": 0.00017719890657662003, "loss": 0.5112, "step": 1400 }, { "epoch": 2.03, "learning_rate": 0.0001771667470654446, "loss": 0.5982, "step": 1401 }, { "epoch": 2.03, "learning_rate": 0.0001771345875542692, "loss": 0.0368, "step": 1402 }, { "epoch": 2.03, "learning_rate": 0.00017710242804309375, "loss": 0.3184, "step": 1403 }, { "epoch": 2.03, "learning_rate": 0.00017707026853191834, "loss": 0.1766, "step": 1404 }, { "epoch": 2.03, "learning_rate": 0.0001770381090207429, "loss": 0.0089, "step": 1405 }, { "epoch": 2.03, "learning_rate": 0.00017700594950956745, "loss": 0.0193, "step": 1406 }, { "epoch": 2.03, "learning_rate": 0.00017697378999839204, "loss": 0.0452, "step": 1407 }, { "epoch": 2.04, "learning_rate": 0.0001769416304872166, "loss": 0.4775, "step": 1408 }, { "epoch": 2.04, "learning_rate": 0.00017690947097604118, "loss": 0.4712, "step": 1409 }, { "epoch": 2.04, "learning_rate": 0.00017687731146486573, "loss": 0.2158, "step": 1410 }, { "epoch": 2.04, "learning_rate": 0.00017684515195369032, "loss": 0.0389, "step": 1411 }, { "epoch": 2.04, "learning_rate": 0.00017681299244251487, "loss": 0.4897, "step": 1412 }, { "epoch": 2.04, "learning_rate": 0.00017678083293133946, "loss": 0.0214, "step": 1413 }, { "epoch": 2.04, "learning_rate": 0.00017674867342016402, "loss": 0.4355, "step": 1414 }, { "epoch": 2.05, "learning_rate": 0.0001767165139089886, "loss": 0.1139, "step": 1415 }, { "epoch": 2.05, "learning_rate": 0.00017668435439781316, "loss": 0.3818, "step": 1416 }, { "epoch": 2.05, "learning_rate": 0.00017665219488663774, "loss": 0.1379, "step": 1417 }, { "epoch": 2.05, "learning_rate": 0.0001766200353754623, "loss": 0.3631, "step": 1418 }, { "epoch": 2.05, "learning_rate": 0.00017658787586428685, "loss": 0.1336, "step": 1419 }, { "epoch": 2.05, "learning_rate": 0.00017655571635311144, "loss": 0.0152, "step": 1420 }, { "epoch": 2.05, "learning_rate": 0.00017652355684193602, "loss": 0.0558, "step": 1421 }, { "epoch": 2.06, "learning_rate": 0.00017649139733076058, "loss": 0.0133, "step": 1422 }, { "epoch": 2.06, "learning_rate": 0.00017645923781958516, "loss": 0.0412, "step": 1423 }, { "epoch": 2.06, "learning_rate": 0.00017642707830840972, "loss": 0.6738, "step": 1424 }, { "epoch": 2.06, "learning_rate": 0.00017639491879723428, "loss": 0.0502, "step": 1425 }, { "epoch": 2.06, "learning_rate": 0.00017636275928605886, "loss": 0.4653, "step": 1426 }, { "epoch": 2.06, "learning_rate": 0.00017633059977488342, "loss": 0.1294, "step": 1427 }, { "epoch": 2.07, "learning_rate": 0.000176298440263708, "loss": 0.1364, "step": 1428 }, { "epoch": 2.07, "learning_rate": 0.0001762662807525326, "loss": 0.1282, "step": 1429 }, { "epoch": 2.07, "learning_rate": 0.00017623412124135714, "loss": 0.0245, "step": 1430 }, { "epoch": 2.07, "learning_rate": 0.0001762019617301817, "loss": 0.1396, "step": 1431 }, { "epoch": 2.07, "learning_rate": 0.00017616980221900628, "loss": 0.4551, "step": 1432 }, { "epoch": 2.07, "learning_rate": 0.00017613764270783084, "loss": 0.4282, "step": 1433 }, { "epoch": 2.07, "learning_rate": 0.00017610548319665543, "loss": 0.2305, "step": 1434 }, { "epoch": 2.08, "learning_rate": 0.00017607332368547998, "loss": 0.4277, "step": 1435 }, { "epoch": 2.08, "learning_rate": 0.00017604116417430457, "loss": 0.2585, "step": 1436 }, { "epoch": 2.08, "learning_rate": 0.00017600900466312912, "loss": 0.0382, "step": 1437 }, { "epoch": 2.08, "learning_rate": 0.0001759768451519537, "loss": 0.0378, "step": 1438 }, { "epoch": 2.08, "learning_rate": 0.00017594468564077826, "loss": 0.0575, "step": 1439 }, { "epoch": 2.08, "learning_rate": 0.00017591252612960282, "loss": 0.394, "step": 1440 }, { "epoch": 2.08, "learning_rate": 0.0001758803666184274, "loss": 0.0131, "step": 1441 }, { "epoch": 2.09, "learning_rate": 0.000175848207107252, "loss": 0.415, "step": 1442 }, { "epoch": 2.09, "learning_rate": 0.00017581604759607655, "loss": 0.2232, "step": 1443 }, { "epoch": 2.09, "learning_rate": 0.0001757838880849011, "loss": 0.2194, "step": 1444 }, { "epoch": 2.09, "learning_rate": 0.0001757517285737257, "loss": 0.0507, "step": 1445 }, { "epoch": 2.09, "learning_rate": 0.00017571956906255024, "loss": 0.0937, "step": 1446 }, { "epoch": 2.09, "learning_rate": 0.00017568740955137483, "loss": 0.0722, "step": 1447 }, { "epoch": 2.09, "learning_rate": 0.0001756552500401994, "loss": 0.2513, "step": 1448 }, { "epoch": 2.1, "learning_rate": 0.00017562309052902397, "loss": 0.5186, "step": 1449 }, { "epoch": 2.1, "learning_rate": 0.00017559093101784853, "loss": 0.4209, "step": 1450 }, { "epoch": 2.1, "learning_rate": 0.0001755587715066731, "loss": 0.2663, "step": 1451 }, { "epoch": 2.1, "learning_rate": 0.00017552661199549767, "loss": 0.2131, "step": 1452 }, { "epoch": 2.1, "learning_rate": 0.00017549445248432222, "loss": 0.0486, "step": 1453 }, { "epoch": 2.1, "learning_rate": 0.00017546229297314684, "loss": 0.067, "step": 1454 }, { "epoch": 2.1, "learning_rate": 0.0001754301334619714, "loss": 0.0178, "step": 1455 }, { "epoch": 2.11, "learning_rate": 0.00017539797395079595, "loss": 0.0964, "step": 1456 }, { "epoch": 2.11, "learning_rate": 0.00017536581443962053, "loss": 0.7441, "step": 1457 }, { "epoch": 2.11, "learning_rate": 0.0001753336549284451, "loss": 0.2605, "step": 1458 }, { "epoch": 2.11, "learning_rate": 0.00017530149541726965, "loss": 0.8633, "step": 1459 }, { "epoch": 2.11, "learning_rate": 0.00017526933590609423, "loss": 0.1082, "step": 1460 }, { "epoch": 2.11, "learning_rate": 0.00017523717639491882, "loss": 0.0782, "step": 1461 }, { "epoch": 2.11, "learning_rate": 0.00017520501688374337, "loss": 0.3411, "step": 1462 }, { "epoch": 2.12, "learning_rate": 0.00017517285737256796, "loss": 0.0447, "step": 1463 }, { "epoch": 2.12, "learning_rate": 0.00017514069786139251, "loss": 0.1625, "step": 1464 }, { "epoch": 2.12, "learning_rate": 0.00017510853835021707, "loss": 0.3384, "step": 1465 }, { "epoch": 2.12, "learning_rate": 0.00017507637883904166, "loss": 0.1653, "step": 1466 }, { "epoch": 2.12, "learning_rate": 0.00017504421932786624, "loss": 0.2932, "step": 1467 }, { "epoch": 2.12, "learning_rate": 0.0001750120598166908, "loss": 0.0236, "step": 1468 }, { "epoch": 2.12, "learning_rate": 0.00017497990030551535, "loss": 0.1301, "step": 1469 }, { "epoch": 2.13, "learning_rate": 0.00017494774079433994, "loss": 0.308, "step": 1470 }, { "epoch": 2.13, "learning_rate": 0.0001749155812831645, "loss": 0.2839, "step": 1471 }, { "epoch": 2.13, "learning_rate": 0.00017488342177198905, "loss": 0.7676, "step": 1472 }, { "epoch": 2.13, "learning_rate": 0.00017485126226081366, "loss": 0.0378, "step": 1473 }, { "epoch": 2.13, "learning_rate": 0.00017481910274963822, "loss": 0.0452, "step": 1474 }, { "epoch": 2.13, "learning_rate": 0.00017478694323846278, "loss": 0.1438, "step": 1475 }, { "epoch": 2.13, "learning_rate": 0.00017475478372728736, "loss": 0.0236, "step": 1476 }, { "epoch": 2.14, "learning_rate": 0.00017472262421611192, "loss": 0.0728, "step": 1477 }, { "epoch": 2.14, "learning_rate": 0.00017469046470493647, "loss": 0.1371, "step": 1478 }, { "epoch": 2.14, "learning_rate": 0.00017465830519376109, "loss": 0.1774, "step": 1479 }, { "epoch": 2.14, "learning_rate": 0.00017462614568258564, "loss": 0.0717, "step": 1480 }, { "epoch": 2.14, "learning_rate": 0.0001745939861714102, "loss": 0.3423, "step": 1481 }, { "epoch": 2.14, "learning_rate": 0.00017456182666023478, "loss": 0.0059, "step": 1482 }, { "epoch": 2.14, "learning_rate": 0.00017452966714905934, "loss": 0.0123, "step": 1483 }, { "epoch": 2.15, "learning_rate": 0.0001744975076378839, "loss": 0.0057, "step": 1484 }, { "epoch": 2.15, "learning_rate": 0.00017446534812670848, "loss": 0.1255, "step": 1485 }, { "epoch": 2.15, "learning_rate": 0.00017443318861553307, "loss": 0.011, "step": 1486 }, { "epoch": 2.15, "learning_rate": 0.00017440102910435762, "loss": 0.3121, "step": 1487 }, { "epoch": 2.15, "learning_rate": 0.0001743688695931822, "loss": 0.1583, "step": 1488 }, { "epoch": 2.15, "learning_rate": 0.00017433671008200676, "loss": 0.2458, "step": 1489 }, { "epoch": 2.15, "learning_rate": 0.00017430455057083132, "loss": 0.0442, "step": 1490 }, { "epoch": 2.16, "learning_rate": 0.0001742723910596559, "loss": 0.114, "step": 1491 }, { "epoch": 2.16, "learning_rate": 0.0001742402315484805, "loss": 0.3412, "step": 1492 }, { "epoch": 2.16, "learning_rate": 0.00017420807203730505, "loss": 0.127, "step": 1493 }, { "epoch": 2.16, "learning_rate": 0.0001741759125261296, "loss": 0.5459, "step": 1494 }, { "epoch": 2.16, "learning_rate": 0.00017414375301495419, "loss": 0.3782, "step": 1495 }, { "epoch": 2.16, "learning_rate": 0.00017411159350377874, "loss": 0.012, "step": 1496 }, { "epoch": 2.16, "learning_rate": 0.0001740794339926033, "loss": 0.2576, "step": 1497 }, { "epoch": 2.17, "learning_rate": 0.00017404727448142788, "loss": 0.0093, "step": 1498 }, { "epoch": 2.17, "learning_rate": 0.00017401511497025247, "loss": 0.0206, "step": 1499 }, { "epoch": 2.17, "learning_rate": 0.00017398295545907703, "loss": 0.3408, "step": 1500 }, { "epoch": 2.17, "learning_rate": 0.0001739507959479016, "loss": 0.2435, "step": 1501 }, { "epoch": 2.17, "learning_rate": 0.00017391863643672617, "loss": 0.2646, "step": 1502 }, { "epoch": 2.17, "learning_rate": 0.00017388647692555072, "loss": 0.427, "step": 1503 }, { "epoch": 2.17, "learning_rate": 0.0001738543174143753, "loss": 0.1873, "step": 1504 }, { "epoch": 2.18, "learning_rate": 0.0001738221579031999, "loss": 0.2759, "step": 1505 }, { "epoch": 2.18, "learning_rate": 0.00017378999839202445, "loss": 0.175, "step": 1506 }, { "epoch": 2.18, "learning_rate": 0.00017375783888084903, "loss": 0.0141, "step": 1507 }, { "epoch": 2.18, "learning_rate": 0.0001737256793696736, "loss": 0.4731, "step": 1508 }, { "epoch": 2.18, "learning_rate": 0.00017369351985849815, "loss": 0.0511, "step": 1509 }, { "epoch": 2.18, "learning_rate": 0.00017366136034732273, "loss": 0.0182, "step": 1510 }, { "epoch": 2.19, "learning_rate": 0.00017362920083614731, "loss": 0.1714, "step": 1511 }, { "epoch": 2.19, "learning_rate": 0.00017359704132497187, "loss": 0.0319, "step": 1512 }, { "epoch": 2.19, "learning_rate": 0.00017356488181379646, "loss": 0.4464, "step": 1513 }, { "epoch": 2.19, "learning_rate": 0.000173532722302621, "loss": 0.1361, "step": 1514 }, { "epoch": 2.19, "learning_rate": 0.00017350056279144557, "loss": 0.0107, "step": 1515 }, { "epoch": 2.19, "learning_rate": 0.00017346840328027015, "loss": 0.0139, "step": 1516 }, { "epoch": 2.19, "learning_rate": 0.0001734362437690947, "loss": 0.1436, "step": 1517 }, { "epoch": 2.2, "learning_rate": 0.0001734040842579193, "loss": 0.1785, "step": 1518 }, { "epoch": 2.2, "learning_rate": 0.00017337192474674385, "loss": 0.243, "step": 1519 }, { "epoch": 2.2, "learning_rate": 0.00017333976523556844, "loss": 0.2253, "step": 1520 }, { "epoch": 2.2, "learning_rate": 0.000173307605724393, "loss": 0.2687, "step": 1521 }, { "epoch": 2.2, "learning_rate": 0.00017327544621321755, "loss": 0.0188, "step": 1522 }, { "epoch": 2.2, "learning_rate": 0.00017324328670204213, "loss": 0.2349, "step": 1523 }, { "epoch": 2.2, "learning_rate": 0.00017321112719086672, "loss": 0.0208, "step": 1524 }, { "epoch": 2.21, "learning_rate": 0.00017317896767969127, "loss": 0.2988, "step": 1525 }, { "epoch": 2.21, "learning_rate": 0.00017314680816851586, "loss": 0.1503, "step": 1526 }, { "epoch": 2.21, "learning_rate": 0.00017311464865734042, "loss": 0.2207, "step": 1527 }, { "epoch": 2.21, "learning_rate": 0.00017308248914616497, "loss": 0.0321, "step": 1528 }, { "epoch": 2.21, "learning_rate": 0.00017305032963498956, "loss": 0.2839, "step": 1529 }, { "epoch": 2.21, "learning_rate": 0.00017301817012381411, "loss": 0.0342, "step": 1530 }, { "epoch": 2.21, "learning_rate": 0.0001729860106126387, "loss": 0.3098, "step": 1531 }, { "epoch": 2.22, "learning_rate": 0.00017295385110146328, "loss": 0.1734, "step": 1532 }, { "epoch": 2.22, "learning_rate": 0.00017292169159028784, "loss": 0.318, "step": 1533 }, { "epoch": 2.22, "learning_rate": 0.0001728895320791124, "loss": 0.0157, "step": 1534 }, { "epoch": 2.22, "learning_rate": 0.00017285737256793698, "loss": 0.5957, "step": 1535 }, { "epoch": 2.22, "learning_rate": 0.00017282521305676154, "loss": 0.3463, "step": 1536 }, { "epoch": 2.22, "learning_rate": 0.00017279305354558612, "loss": 0.1742, "step": 1537 }, { "epoch": 2.22, "learning_rate": 0.0001727608940344107, "loss": 0.0176, "step": 1538 }, { "epoch": 2.23, "learning_rate": 0.00017272873452323526, "loss": 0.2388, "step": 1539 }, { "epoch": 2.23, "learning_rate": 0.00017269657501205982, "loss": 0.028, "step": 1540 }, { "epoch": 2.23, "learning_rate": 0.0001726644155008844, "loss": 0.2358, "step": 1541 }, { "epoch": 2.23, "learning_rate": 0.00017263225598970896, "loss": 0.2746, "step": 1542 }, { "epoch": 2.23, "learning_rate": 0.00017260009647853352, "loss": 0.1541, "step": 1543 }, { "epoch": 2.23, "learning_rate": 0.0001725679369673581, "loss": 0.2676, "step": 1544 }, { "epoch": 2.23, "learning_rate": 0.00017253577745618268, "loss": 0.1106, "step": 1545 }, { "epoch": 2.24, "learning_rate": 0.00017250361794500724, "loss": 0.208, "step": 1546 }, { "epoch": 2.24, "learning_rate": 0.0001724714584338318, "loss": 0.3488, "step": 1547 }, { "epoch": 2.24, "learning_rate": 0.00017243929892265638, "loss": 0.0475, "step": 1548 }, { "epoch": 2.24, "learning_rate": 0.00017240713941148094, "loss": 0.2738, "step": 1549 }, { "epoch": 2.24, "learning_rate": 0.00017237497990030552, "loss": 0.0111, "step": 1550 }, { "epoch": 2.24, "learning_rate": 0.0001723428203891301, "loss": 0.3933, "step": 1551 }, { "epoch": 2.24, "learning_rate": 0.00017231066087795466, "loss": 0.2181, "step": 1552 }, { "epoch": 2.25, "learning_rate": 0.00017227850136677922, "loss": 0.0124, "step": 1553 }, { "epoch": 2.25, "learning_rate": 0.0001722463418556038, "loss": 0.1319, "step": 1554 }, { "epoch": 2.25, "learning_rate": 0.00017221418234442836, "loss": 0.2083, "step": 1555 }, { "epoch": 2.25, "learning_rate": 0.00017218202283325292, "loss": 0.0042, "step": 1556 }, { "epoch": 2.25, "learning_rate": 0.00017214986332207753, "loss": 0.0425, "step": 1557 }, { "epoch": 2.25, "learning_rate": 0.0001721177038109021, "loss": 0.1321, "step": 1558 }, { "epoch": 2.25, "learning_rate": 0.00017208554429972664, "loss": 0.064, "step": 1559 }, { "epoch": 2.26, "learning_rate": 0.00017205338478855123, "loss": 0.4538, "step": 1560 }, { "epoch": 2.26, "learning_rate": 0.00017202122527737579, "loss": 0.2072, "step": 1561 }, { "epoch": 2.26, "learning_rate": 0.00017198906576620034, "loss": 0.0178, "step": 1562 }, { "epoch": 2.26, "learning_rate": 0.00017195690625502495, "loss": 0.2131, "step": 1563 }, { "epoch": 2.26, "learning_rate": 0.0001719247467438495, "loss": 0.0525, "step": 1564 }, { "epoch": 2.26, "learning_rate": 0.00017189258723267407, "loss": 0.022, "step": 1565 }, { "epoch": 2.26, "learning_rate": 0.00017186042772149865, "loss": 0.3335, "step": 1566 }, { "epoch": 2.27, "learning_rate": 0.0001718282682103232, "loss": 0.206, "step": 1567 }, { "epoch": 2.27, "learning_rate": 0.00017179610869914777, "loss": 0.2913, "step": 1568 }, { "epoch": 2.27, "learning_rate": 0.00017176394918797235, "loss": 0.2917, "step": 1569 }, { "epoch": 2.27, "learning_rate": 0.00017173178967679693, "loss": 0.0248, "step": 1570 }, { "epoch": 2.27, "learning_rate": 0.0001716996301656215, "loss": 0.5918, "step": 1571 }, { "epoch": 2.27, "learning_rate": 0.00017166747065444605, "loss": 0.1367, "step": 1572 }, { "epoch": 2.27, "learning_rate": 0.00017163531114327063, "loss": 0.0558, "step": 1573 }, { "epoch": 2.28, "learning_rate": 0.0001716031516320952, "loss": 0.1682, "step": 1574 }, { "epoch": 2.28, "learning_rate": 0.00017157099212091977, "loss": 0.4746, "step": 1575 }, { "epoch": 2.28, "learning_rate": 0.00017153883260974436, "loss": 0.0621, "step": 1576 }, { "epoch": 2.28, "learning_rate": 0.00017150667309856891, "loss": 0.1, "step": 1577 }, { "epoch": 2.28, "learning_rate": 0.00017147451358739347, "loss": 0.4941, "step": 1578 }, { "epoch": 2.28, "learning_rate": 0.00017144235407621806, "loss": 0.3308, "step": 1579 }, { "epoch": 2.28, "learning_rate": 0.0001714101945650426, "loss": 0.3972, "step": 1580 }, { "epoch": 2.29, "learning_rate": 0.00017137803505386717, "loss": 0.1355, "step": 1581 }, { "epoch": 2.29, "learning_rate": 0.00017134587554269178, "loss": 0.2352, "step": 1582 }, { "epoch": 2.29, "learning_rate": 0.00017131371603151634, "loss": 0.4062, "step": 1583 }, { "epoch": 2.29, "learning_rate": 0.0001712815565203409, "loss": 0.2715, "step": 1584 }, { "epoch": 2.29, "learning_rate": 0.00017124939700916548, "loss": 0.2617, "step": 1585 }, { "epoch": 2.29, "learning_rate": 0.00017121723749799004, "loss": 0.3064, "step": 1586 }, { "epoch": 2.3, "learning_rate": 0.0001711850779868146, "loss": 0.0356, "step": 1587 }, { "epoch": 2.3, "learning_rate": 0.00017115291847563918, "loss": 0.2171, "step": 1588 }, { "epoch": 2.3, "learning_rate": 0.00017112075896446376, "loss": 0.0757, "step": 1589 }, { "epoch": 2.3, "learning_rate": 0.00017108859945328832, "loss": 0.2402, "step": 1590 }, { "epoch": 2.3, "learning_rate": 0.0001710564399421129, "loss": 0.0119, "step": 1591 }, { "epoch": 2.3, "learning_rate": 0.00017102428043093746, "loss": 0.2251, "step": 1592 }, { "epoch": 2.3, "learning_rate": 0.00017099212091976202, "loss": 0.1385, "step": 1593 }, { "epoch": 2.31, "learning_rate": 0.0001709599614085866, "loss": 0.2054, "step": 1594 }, { "epoch": 2.31, "learning_rate": 0.00017092780189741118, "loss": 0.1274, "step": 1595 }, { "epoch": 2.31, "learning_rate": 0.00017089564238623574, "loss": 0.1765, "step": 1596 }, { "epoch": 2.31, "learning_rate": 0.0001708634828750603, "loss": 0.6494, "step": 1597 }, { "epoch": 2.31, "learning_rate": 0.00017083132336388488, "loss": 0.0504, "step": 1598 }, { "epoch": 2.31, "learning_rate": 0.00017079916385270944, "loss": 0.2953, "step": 1599 }, { "epoch": 2.31, "learning_rate": 0.00017076700434153402, "loss": 0.3096, "step": 1600 }, { "epoch": 2.32, "learning_rate": 0.00017073484483035858, "loss": 0.0234, "step": 1601 }, { "epoch": 2.32, "learning_rate": 0.00017070268531918316, "loss": 0.1962, "step": 1602 }, { "epoch": 2.32, "learning_rate": 0.00017067052580800772, "loss": 0.1768, "step": 1603 }, { "epoch": 2.32, "learning_rate": 0.0001706383662968323, "loss": 0.012, "step": 1604 }, { "epoch": 2.32, "learning_rate": 0.00017060620678565686, "loss": 0.2014, "step": 1605 }, { "epoch": 2.32, "learning_rate": 0.00017057404727448142, "loss": 0.2881, "step": 1606 }, { "epoch": 2.32, "learning_rate": 0.000170541887763306, "loss": 0.158, "step": 1607 }, { "epoch": 2.33, "learning_rate": 0.0001705097282521306, "loss": 0.4688, "step": 1608 }, { "epoch": 2.33, "learning_rate": 0.00017047756874095514, "loss": 0.0106, "step": 1609 }, { "epoch": 2.33, "learning_rate": 0.00017044540922977973, "loss": 0.008, "step": 1610 }, { "epoch": 2.33, "learning_rate": 0.00017041324971860428, "loss": 0.2302, "step": 1611 }, { "epoch": 2.33, "learning_rate": 0.00017038109020742884, "loss": 0.2566, "step": 1612 }, { "epoch": 2.33, "learning_rate": 0.00017034893069625343, "loss": 0.0629, "step": 1613 }, { "epoch": 2.33, "learning_rate": 0.000170316771185078, "loss": 0.0368, "step": 1614 }, { "epoch": 2.34, "learning_rate": 0.00017028461167390257, "loss": 0.2354, "step": 1615 }, { "epoch": 2.34, "learning_rate": 0.00017025245216272715, "loss": 0.0455, "step": 1616 }, { "epoch": 2.34, "learning_rate": 0.0001702202926515517, "loss": 0.0187, "step": 1617 }, { "epoch": 2.34, "learning_rate": 0.00017018813314037626, "loss": 0.2166, "step": 1618 }, { "epoch": 2.34, "learning_rate": 0.00017015597362920085, "loss": 0.1858, "step": 1619 }, { "epoch": 2.34, "learning_rate": 0.0001701238141180254, "loss": 0.1926, "step": 1620 }, { "epoch": 2.34, "learning_rate": 0.00017009165460685, "loss": 0.0081, "step": 1621 }, { "epoch": 2.35, "learning_rate": 0.00017005949509567455, "loss": 0.0862, "step": 1622 }, { "epoch": 2.35, "learning_rate": 0.00017002733558449913, "loss": 0.0774, "step": 1623 }, { "epoch": 2.35, "learning_rate": 0.0001699951760733237, "loss": 0.204, "step": 1624 }, { "epoch": 2.35, "learning_rate": 0.00016996301656214827, "loss": 0.1578, "step": 1625 }, { "epoch": 2.35, "learning_rate": 0.00016993085705097283, "loss": 0.3604, "step": 1626 }, { "epoch": 2.35, "learning_rate": 0.0001698986975397974, "loss": 0.0543, "step": 1627 }, { "epoch": 2.35, "learning_rate": 0.00016986653802862197, "loss": 0.0081, "step": 1628 }, { "epoch": 2.36, "learning_rate": 0.00016983437851744655, "loss": 0.0377, "step": 1629 }, { "epoch": 2.36, "learning_rate": 0.0001698022190062711, "loss": 0.0327, "step": 1630 }, { "epoch": 2.36, "learning_rate": 0.00016977005949509567, "loss": 0.5742, "step": 1631 }, { "epoch": 2.36, "learning_rate": 0.00016973789998392025, "loss": 0.1239, "step": 1632 }, { "epoch": 2.36, "learning_rate": 0.0001697057404727448, "loss": 0.2592, "step": 1633 }, { "epoch": 2.36, "learning_rate": 0.0001696735809615694, "loss": 0.5004, "step": 1634 }, { "epoch": 2.36, "learning_rate": 0.00016964142145039398, "loss": 0.051, "step": 1635 }, { "epoch": 2.37, "learning_rate": 0.00016960926193921853, "loss": 0.0735, "step": 1636 }, { "epoch": 2.37, "learning_rate": 0.0001695771024280431, "loss": 0.3551, "step": 1637 }, { "epoch": 2.37, "learning_rate": 0.00016954494291686767, "loss": 0.4069, "step": 1638 }, { "epoch": 2.37, "learning_rate": 0.00016951278340569223, "loss": 0.0287, "step": 1639 }, { "epoch": 2.37, "learning_rate": 0.00016948062389451682, "loss": 0.0249, "step": 1640 }, { "epoch": 2.37, "learning_rate": 0.0001694484643833414, "loss": 0.1749, "step": 1641 }, { "epoch": 2.37, "learning_rate": 0.00016941630487216596, "loss": 0.0287, "step": 1642 }, { "epoch": 2.38, "learning_rate": 0.00016938414536099051, "loss": 0.5527, "step": 1643 }, { "epoch": 2.38, "learning_rate": 0.0001693519858498151, "loss": 0.0955, "step": 1644 }, { "epoch": 2.38, "learning_rate": 0.00016931982633863965, "loss": 0.0499, "step": 1645 }, { "epoch": 2.38, "learning_rate": 0.0001692876668274642, "loss": 0.1488, "step": 1646 }, { "epoch": 2.38, "learning_rate": 0.0001692555073162888, "loss": 0.0203, "step": 1647 }, { "epoch": 2.38, "learning_rate": 0.00016922334780511338, "loss": 0.1826, "step": 1648 }, { "epoch": 2.38, "learning_rate": 0.00016919118829393794, "loss": 0.0136, "step": 1649 }, { "epoch": 2.39, "learning_rate": 0.00016915902878276252, "loss": 0.5833, "step": 1650 }, { "epoch": 2.39, "learning_rate": 0.00016912686927158708, "loss": 0.1033, "step": 1651 }, { "epoch": 2.39, "learning_rate": 0.00016909470976041163, "loss": 0.0281, "step": 1652 }, { "epoch": 2.39, "learning_rate": 0.00016906255024923622, "loss": 0.0058, "step": 1653 }, { "epoch": 2.39, "learning_rate": 0.0001690303907380608, "loss": 0.1567, "step": 1654 }, { "epoch": 2.39, "learning_rate": 0.00016899823122688536, "loss": 0.2866, "step": 1655 }, { "epoch": 2.39, "learning_rate": 0.00016896607171570992, "loss": 0.2544, "step": 1656 }, { "epoch": 2.4, "learning_rate": 0.0001689339122045345, "loss": 0.0928, "step": 1657 }, { "epoch": 2.4, "learning_rate": 0.00016890175269335906, "loss": 0.008, "step": 1658 }, { "epoch": 2.4, "learning_rate": 0.00016886959318218364, "loss": 0.0258, "step": 1659 }, { "epoch": 2.4, "learning_rate": 0.00016883743367100823, "loss": 0.1749, "step": 1660 }, { "epoch": 2.4, "learning_rate": 0.00016880527415983278, "loss": 0.3221, "step": 1661 }, { "epoch": 2.4, "learning_rate": 0.00016877311464865734, "loss": 0.3672, "step": 1662 }, { "epoch": 2.4, "learning_rate": 0.00016874095513748192, "loss": 0.2632, "step": 1663 }, { "epoch": 2.41, "learning_rate": 0.00016870879562630648, "loss": 0.2772, "step": 1664 }, { "epoch": 2.41, "learning_rate": 0.00016867663611513104, "loss": 0.2218, "step": 1665 }, { "epoch": 2.41, "learning_rate": 0.00016864447660395565, "loss": 0.0139, "step": 1666 }, { "epoch": 2.41, "learning_rate": 0.0001686123170927802, "loss": 0.0659, "step": 1667 }, { "epoch": 2.41, "learning_rate": 0.00016858015758160476, "loss": 0.0154, "step": 1668 }, { "epoch": 2.41, "learning_rate": 0.00016854799807042935, "loss": 0.007, "step": 1669 }, { "epoch": 2.42, "learning_rate": 0.0001685158385592539, "loss": 0.0093, "step": 1670 }, { "epoch": 2.42, "learning_rate": 0.00016848367904807846, "loss": 0.2729, "step": 1671 }, { "epoch": 2.42, "learning_rate": 0.00016845151953690305, "loss": 0.2322, "step": 1672 }, { "epoch": 2.42, "learning_rate": 0.00016841936002572763, "loss": 0.2107, "step": 1673 }, { "epoch": 2.42, "learning_rate": 0.00016838720051455219, "loss": 0.0215, "step": 1674 }, { "epoch": 2.42, "learning_rate": 0.00016835504100337677, "loss": 0.0486, "step": 1675 }, { "epoch": 2.42, "learning_rate": 0.00016832288149220133, "loss": 0.1523, "step": 1676 }, { "epoch": 2.43, "learning_rate": 0.00016829072198102588, "loss": 0.0129, "step": 1677 }, { "epoch": 2.43, "learning_rate": 0.00016825856246985047, "loss": 0.0858, "step": 1678 }, { "epoch": 2.43, "learning_rate": 0.00016822640295867505, "loss": 0.4778, "step": 1679 }, { "epoch": 2.43, "learning_rate": 0.0001681942434474996, "loss": 0.3769, "step": 1680 }, { "epoch": 2.43, "learning_rate": 0.00016816208393632417, "loss": 0.3506, "step": 1681 }, { "epoch": 2.43, "learning_rate": 0.00016812992442514875, "loss": 0.2158, "step": 1682 }, { "epoch": 2.43, "learning_rate": 0.0001680977649139733, "loss": 0.0997, "step": 1683 }, { "epoch": 2.44, "learning_rate": 0.00016806560540279786, "loss": 0.0117, "step": 1684 }, { "epoch": 2.44, "learning_rate": 0.00016803344589162248, "loss": 0.2346, "step": 1685 }, { "epoch": 2.44, "learning_rate": 0.00016800128638044703, "loss": 0.7344, "step": 1686 }, { "epoch": 2.44, "learning_rate": 0.0001679691268692716, "loss": 0.1329, "step": 1687 }, { "epoch": 2.44, "learning_rate": 0.00016793696735809617, "loss": 0.4749, "step": 1688 }, { "epoch": 2.44, "learning_rate": 0.00016790480784692073, "loss": 0.1582, "step": 1689 }, { "epoch": 2.44, "learning_rate": 0.0001678726483357453, "loss": 0.2216, "step": 1690 }, { "epoch": 2.45, "learning_rate": 0.00016784048882456987, "loss": 0.0297, "step": 1691 }, { "epoch": 2.45, "learning_rate": 0.00016780832931339446, "loss": 0.2529, "step": 1692 }, { "epoch": 2.45, "learning_rate": 0.000167776169802219, "loss": 0.0373, "step": 1693 }, { "epoch": 2.45, "learning_rate": 0.0001677440102910436, "loss": 0.347, "step": 1694 }, { "epoch": 2.45, "learning_rate": 0.00016771185077986815, "loss": 0.1392, "step": 1695 }, { "epoch": 2.45, "learning_rate": 0.0001676796912686927, "loss": 0.0433, "step": 1696 }, { "epoch": 2.45, "learning_rate": 0.0001676475317575173, "loss": 0.0459, "step": 1697 }, { "epoch": 2.46, "learning_rate": 0.00016761537224634188, "loss": 0.2012, "step": 1698 }, { "epoch": 2.46, "learning_rate": 0.00016758321273516644, "loss": 0.0261, "step": 1699 }, { "epoch": 2.46, "learning_rate": 0.000167551053223991, "loss": 0.1572, "step": 1700 }, { "epoch": 2.46, "learning_rate": 0.00016751889371281558, "loss": 0.3038, "step": 1701 }, { "epoch": 2.46, "learning_rate": 0.00016748673420164013, "loss": 0.0451, "step": 1702 }, { "epoch": 2.46, "learning_rate": 0.00016745457469046472, "loss": 0.0455, "step": 1703 }, { "epoch": 2.46, "learning_rate": 0.0001674224151792893, "loss": 0.3909, "step": 1704 }, { "epoch": 2.47, "learning_rate": 0.00016739025566811386, "loss": 0.017, "step": 1705 }, { "epoch": 2.47, "learning_rate": 0.00016735809615693842, "loss": 0.4316, "step": 1706 }, { "epoch": 2.47, "learning_rate": 0.000167325936645763, "loss": 0.0135, "step": 1707 }, { "epoch": 2.47, "learning_rate": 0.00016729377713458756, "loss": 0.3873, "step": 1708 }, { "epoch": 2.47, "learning_rate": 0.0001672616176234121, "loss": 0.0713, "step": 1709 }, { "epoch": 2.47, "learning_rate": 0.0001672294581122367, "loss": 0.3954, "step": 1710 }, { "epoch": 2.47, "learning_rate": 0.00016719729860106128, "loss": 0.171, "step": 1711 }, { "epoch": 2.48, "learning_rate": 0.00016716513908988584, "loss": 0.272, "step": 1712 }, { "epoch": 2.48, "learning_rate": 0.00016713297957871042, "loss": 0.2456, "step": 1713 }, { "epoch": 2.48, "learning_rate": 0.00016710082006753498, "loss": 0.3362, "step": 1714 }, { "epoch": 2.48, "learning_rate": 0.00016706866055635954, "loss": 0.1274, "step": 1715 }, { "epoch": 2.48, "learning_rate": 0.00016703650104518412, "loss": 0.0745, "step": 1716 }, { "epoch": 2.48, "learning_rate": 0.0001670043415340087, "loss": 0.1982, "step": 1717 }, { "epoch": 2.48, "learning_rate": 0.00016697218202283326, "loss": 0.4167, "step": 1718 }, { "epoch": 2.49, "learning_rate": 0.00016694002251165785, "loss": 0.1379, "step": 1719 }, { "epoch": 2.49, "learning_rate": 0.0001669078630004824, "loss": 0.0204, "step": 1720 }, { "epoch": 2.49, "learning_rate": 0.00016687570348930696, "loss": 0.2314, "step": 1721 }, { "epoch": 2.49, "learning_rate": 0.00016684354397813154, "loss": 0.094, "step": 1722 }, { "epoch": 2.49, "learning_rate": 0.0001668113844669561, "loss": 0.0729, "step": 1723 }, { "epoch": 2.49, "learning_rate": 0.00016677922495578068, "loss": 0.0383, "step": 1724 }, { "epoch": 2.49, "learning_rate": 0.00016674706544460524, "loss": 0.4639, "step": 1725 }, { "epoch": 2.5, "learning_rate": 0.00016671490593342983, "loss": 0.2704, "step": 1726 }, { "epoch": 2.5, "learning_rate": 0.00016668274642225438, "loss": 0.2522, "step": 1727 }, { "epoch": 2.5, "learning_rate": 0.00016665058691107897, "loss": 0.1787, "step": 1728 }, { "epoch": 2.5, "learning_rate": 0.00016661842739990352, "loss": 0.2781, "step": 1729 }, { "epoch": 2.5, "learning_rate": 0.0001665862678887281, "loss": 0.0497, "step": 1730 }, { "epoch": 2.5, "learning_rate": 0.00016655410837755266, "loss": 0.2639, "step": 1731 }, { "epoch": 2.5, "learning_rate": 0.00016652194886637725, "loss": 0.0533, "step": 1732 }, { "epoch": 2.51, "learning_rate": 0.0001664897893552018, "loss": 0.0938, "step": 1733 }, { "epoch": 2.51, "learning_rate": 0.00016645762984402636, "loss": 0.1753, "step": 1734 }, { "epoch": 2.51, "learning_rate": 0.00016642547033285095, "loss": 0.286, "step": 1735 }, { "epoch": 2.51, "learning_rate": 0.0001663933108216755, "loss": 0.458, "step": 1736 }, { "epoch": 2.51, "learning_rate": 0.0001663611513105001, "loss": 0.2529, "step": 1737 }, { "epoch": 2.51, "learning_rate": 0.00016632899179932467, "loss": 0.0336, "step": 1738 }, { "epoch": 2.51, "learning_rate": 0.00016629683228814923, "loss": 0.0185, "step": 1739 }, { "epoch": 2.52, "learning_rate": 0.00016626467277697379, "loss": 0.5234, "step": 1740 }, { "epoch": 2.52, "learning_rate": 0.00016623251326579837, "loss": 0.3333, "step": 1741 }, { "epoch": 2.52, "learning_rate": 0.00016620035375462293, "loss": 0.5117, "step": 1742 }, { "epoch": 2.52, "learning_rate": 0.0001661681942434475, "loss": 0.0285, "step": 1743 }, { "epoch": 2.52, "learning_rate": 0.0001661360347322721, "loss": 0.3501, "step": 1744 }, { "epoch": 2.52, "learning_rate": 0.00016610387522109665, "loss": 0.2922, "step": 1745 }, { "epoch": 2.52, "learning_rate": 0.0001660717157099212, "loss": 0.0815, "step": 1746 }, { "epoch": 2.53, "learning_rate": 0.0001660395561987458, "loss": 0.1758, "step": 1747 }, { "epoch": 2.53, "learning_rate": 0.00016600739668757035, "loss": 0.0198, "step": 1748 }, { "epoch": 2.53, "learning_rate": 0.00016597523717639493, "loss": 0.3853, "step": 1749 }, { "epoch": 2.53, "learning_rate": 0.0001659430776652195, "loss": 0.2095, "step": 1750 }, { "epoch": 2.53, "learning_rate": 0.00016591091815404407, "loss": 0.5811, "step": 1751 }, { "epoch": 2.53, "learning_rate": 0.00016587875864286863, "loss": 0.1929, "step": 1752 }, { "epoch": 2.54, "learning_rate": 0.00016584659913169322, "loss": 0.1608, "step": 1753 }, { "epoch": 2.54, "learning_rate": 0.00016581443962051777, "loss": 0.2416, "step": 1754 }, { "epoch": 2.54, "learning_rate": 0.00016578228010934233, "loss": 0.4052, "step": 1755 }, { "epoch": 2.54, "learning_rate": 0.00016575012059816691, "loss": 0.2181, "step": 1756 }, { "epoch": 2.54, "learning_rate": 0.0001657179610869915, "loss": 0.3525, "step": 1757 }, { "epoch": 2.54, "learning_rate": 0.00016568580157581605, "loss": 0.158, "step": 1758 }, { "epoch": 2.54, "learning_rate": 0.0001656536420646406, "loss": 0.4443, "step": 1759 }, { "epoch": 2.55, "learning_rate": 0.0001656214825534652, "loss": 0.0185, "step": 1760 }, { "epoch": 2.55, "learning_rate": 0.00016558932304228975, "loss": 0.192, "step": 1761 }, { "epoch": 2.55, "learning_rate": 0.00016555716353111434, "loss": 0.0534, "step": 1762 }, { "epoch": 2.55, "learning_rate": 0.00016552500401993892, "loss": 0.0596, "step": 1763 }, { "epoch": 2.55, "learning_rate": 0.00016549284450876348, "loss": 0.1501, "step": 1764 }, { "epoch": 2.55, "learning_rate": 0.00016546068499758803, "loss": 0.3413, "step": 1765 }, { "epoch": 2.55, "learning_rate": 0.00016542852548641262, "loss": 0.0476, "step": 1766 }, { "epoch": 2.56, "learning_rate": 0.00016539636597523718, "loss": 0.0573, "step": 1767 }, { "epoch": 2.56, "learning_rate": 0.00016536420646406173, "loss": 0.0079, "step": 1768 }, { "epoch": 2.56, "learning_rate": 0.00016533204695288634, "loss": 0.4541, "step": 1769 }, { "epoch": 2.56, "learning_rate": 0.0001652998874417109, "loss": 0.082, "step": 1770 }, { "epoch": 2.56, "learning_rate": 0.00016526772793053546, "loss": 0.0681, "step": 1771 }, { "epoch": 2.56, "learning_rate": 0.00016523556841936004, "loss": 0.189, "step": 1772 }, { "epoch": 2.56, "learning_rate": 0.0001652034089081846, "loss": 0.1296, "step": 1773 }, { "epoch": 2.57, "learning_rate": 0.00016517124939700916, "loss": 0.5586, "step": 1774 }, { "epoch": 2.57, "learning_rate": 0.00016513908988583374, "loss": 0.024, "step": 1775 }, { "epoch": 2.57, "learning_rate": 0.00016510693037465832, "loss": 0.1633, "step": 1776 }, { "epoch": 2.57, "learning_rate": 0.00016507477086348288, "loss": 0.3184, "step": 1777 }, { "epoch": 2.57, "learning_rate": 0.00016504261135230747, "loss": 0.4739, "step": 1778 }, { "epoch": 2.57, "learning_rate": 0.00016501045184113202, "loss": 0.1322, "step": 1779 }, { "epoch": 2.57, "learning_rate": 0.00016497829232995658, "loss": 0.2874, "step": 1780 }, { "epoch": 2.58, "learning_rate": 0.00016494613281878116, "loss": 0.1083, "step": 1781 }, { "epoch": 2.58, "learning_rate": 0.00016491397330760575, "loss": 0.2228, "step": 1782 }, { "epoch": 2.58, "learning_rate": 0.0001648818137964303, "loss": 0.1611, "step": 1783 }, { "epoch": 2.58, "learning_rate": 0.00016484965428525486, "loss": 0.3369, "step": 1784 }, { "epoch": 2.58, "learning_rate": 0.00016481749477407945, "loss": 0.0122, "step": 1785 }, { "epoch": 2.58, "learning_rate": 0.000164785335262904, "loss": 0.0317, "step": 1786 }, { "epoch": 2.58, "learning_rate": 0.00016475317575172856, "loss": 0.2966, "step": 1787 }, { "epoch": 2.59, "learning_rate": 0.00016472101624055317, "loss": 0.146, "step": 1788 }, { "epoch": 2.59, "learning_rate": 0.00016468885672937773, "loss": 0.0584, "step": 1789 }, { "epoch": 2.59, "learning_rate": 0.00016465669721820228, "loss": 0.1643, "step": 1790 }, { "epoch": 2.59, "learning_rate": 0.00016462453770702687, "loss": 0.2365, "step": 1791 }, { "epoch": 2.59, "learning_rate": 0.00016459237819585143, "loss": 0.0609, "step": 1792 }, { "epoch": 2.59, "learning_rate": 0.00016456021868467598, "loss": 0.0488, "step": 1793 }, { "epoch": 2.59, "learning_rate": 0.0001645280591735006, "loss": 0.5989, "step": 1794 }, { "epoch": 2.6, "learning_rate": 0.00016449589966232515, "loss": 0.1776, "step": 1795 }, { "epoch": 2.6, "learning_rate": 0.0001644637401511497, "loss": 0.1645, "step": 1796 }, { "epoch": 2.6, "learning_rate": 0.0001644315806399743, "loss": 0.0588, "step": 1797 }, { "epoch": 2.6, "learning_rate": 0.00016439942112879885, "loss": 0.0313, "step": 1798 }, { "epoch": 2.6, "learning_rate": 0.0001643672616176234, "loss": 0.2765, "step": 1799 }, { "epoch": 2.6, "learning_rate": 0.000164335102106448, "loss": 0.0788, "step": 1800 }, { "epoch": 2.6, "learning_rate": 0.00016430294259527257, "loss": 0.137, "step": 1801 }, { "epoch": 2.61, "learning_rate": 0.00016427078308409713, "loss": 0.0137, "step": 1802 }, { "epoch": 2.61, "learning_rate": 0.00016423862357292171, "loss": 0.4941, "step": 1803 }, { "epoch": 2.61, "learning_rate": 0.00016420646406174627, "loss": 0.0987, "step": 1804 }, { "epoch": 2.61, "learning_rate": 0.00016417430455057083, "loss": 0.0638, "step": 1805 }, { "epoch": 2.61, "learning_rate": 0.0001641421450393954, "loss": 0.0332, "step": 1806 }, { "epoch": 2.61, "learning_rate": 0.00016410998552822, "loss": 0.0114, "step": 1807 }, { "epoch": 2.61, "learning_rate": 0.00016407782601704455, "loss": 0.0179, "step": 1808 }, { "epoch": 2.62, "learning_rate": 0.0001640456665058691, "loss": 0.5625, "step": 1809 }, { "epoch": 2.62, "learning_rate": 0.0001640135069946937, "loss": 0.0345, "step": 1810 }, { "epoch": 2.62, "learning_rate": 0.00016398134748351825, "loss": 0.1685, "step": 1811 }, { "epoch": 2.62, "learning_rate": 0.0001639491879723428, "loss": 0.1735, "step": 1812 }, { "epoch": 2.62, "learning_rate": 0.0001639170284611674, "loss": 0.1466, "step": 1813 }, { "epoch": 2.62, "learning_rate": 0.00016388486894999198, "loss": 0.2954, "step": 1814 }, { "epoch": 2.62, "learning_rate": 0.00016385270943881653, "loss": 0.1772, "step": 1815 }, { "epoch": 2.63, "learning_rate": 0.00016382054992764112, "loss": 0.1562, "step": 1816 }, { "epoch": 2.63, "learning_rate": 0.00016378839041646567, "loss": 0.1105, "step": 1817 }, { "epoch": 2.63, "learning_rate": 0.00016375623090529023, "loss": 0.1384, "step": 1818 }, { "epoch": 2.63, "learning_rate": 0.00016372407139411482, "loss": 0.2349, "step": 1819 }, { "epoch": 2.63, "learning_rate": 0.0001636919118829394, "loss": 0.1816, "step": 1820 }, { "epoch": 2.63, "learning_rate": 0.00016365975237176396, "loss": 0.2266, "step": 1821 }, { "epoch": 2.63, "learning_rate": 0.00016362759286058854, "loss": 0.0424, "step": 1822 }, { "epoch": 2.64, "learning_rate": 0.0001635954333494131, "loss": 0.087, "step": 1823 }, { "epoch": 2.64, "learning_rate": 0.00016356327383823765, "loss": 0.2419, "step": 1824 }, { "epoch": 2.64, "learning_rate": 0.00016353111432706224, "loss": 0.6549, "step": 1825 }, { "epoch": 2.64, "learning_rate": 0.0001634989548158868, "loss": 0.1173, "step": 1826 }, { "epoch": 2.64, "learning_rate": 0.00016346679530471138, "loss": 0.0299, "step": 1827 }, { "epoch": 2.64, "learning_rate": 0.00016343463579353596, "loss": 0.4004, "step": 1828 }, { "epoch": 2.64, "learning_rate": 0.00016340247628236052, "loss": 0.0555, "step": 1829 }, { "epoch": 2.65, "learning_rate": 0.00016337031677118508, "loss": 0.0529, "step": 1830 }, { "epoch": 2.65, "learning_rate": 0.00016333815726000966, "loss": 0.0267, "step": 1831 }, { "epoch": 2.65, "learning_rate": 0.00016330599774883422, "loss": 0.0415, "step": 1832 }, { "epoch": 2.65, "learning_rate": 0.0001632738382376588, "loss": 0.3406, "step": 1833 }, { "epoch": 2.65, "learning_rate": 0.00016324167872648336, "loss": 0.5527, "step": 1834 }, { "epoch": 2.65, "learning_rate": 0.00016320951921530794, "loss": 0.1747, "step": 1835 }, { "epoch": 2.66, "learning_rate": 0.0001631773597041325, "loss": 0.1993, "step": 1836 }, { "epoch": 2.66, "learning_rate": 0.00016314520019295706, "loss": 0.0702, "step": 1837 }, { "epoch": 2.66, "learning_rate": 0.00016311304068178164, "loss": 0.0735, "step": 1838 }, { "epoch": 2.66, "learning_rate": 0.00016308088117060623, "loss": 0.2521, "step": 1839 }, { "epoch": 2.66, "learning_rate": 0.00016304872165943078, "loss": 0.2911, "step": 1840 }, { "epoch": 2.66, "learning_rate": 0.00016301656214825537, "loss": 0.0173, "step": 1841 }, { "epoch": 2.66, "learning_rate": 0.00016298440263707992, "loss": 0.0466, "step": 1842 }, { "epoch": 2.67, "learning_rate": 0.00016295224312590448, "loss": 0.25, "step": 1843 }, { "epoch": 2.67, "learning_rate": 0.00016292008361472906, "loss": 0.0391, "step": 1844 }, { "epoch": 2.67, "learning_rate": 0.00016288792410355362, "loss": 0.038, "step": 1845 }, { "epoch": 2.67, "learning_rate": 0.0001628557645923782, "loss": 0.1174, "step": 1846 }, { "epoch": 2.67, "learning_rate": 0.0001628236050812028, "loss": 0.4521, "step": 1847 }, { "epoch": 2.67, "learning_rate": 0.00016279144557002735, "loss": 0.3975, "step": 1848 }, { "epoch": 2.67, "learning_rate": 0.0001627592860588519, "loss": 0.124, "step": 1849 }, { "epoch": 2.68, "learning_rate": 0.0001627271265476765, "loss": 0.0085, "step": 1850 }, { "epoch": 2.68, "learning_rate": 0.00016269496703650104, "loss": 0.1329, "step": 1851 }, { "epoch": 2.68, "learning_rate": 0.00016266280752532563, "loss": 0.1874, "step": 1852 }, { "epoch": 2.68, "learning_rate": 0.0001626306480141502, "loss": 0.3021, "step": 1853 }, { "epoch": 2.68, "learning_rate": 0.00016259848850297477, "loss": 0.0092, "step": 1854 }, { "epoch": 2.68, "learning_rate": 0.00016256632899179933, "loss": 0.2493, "step": 1855 }, { "epoch": 2.68, "learning_rate": 0.0001625341694806239, "loss": 0.2693, "step": 1856 }, { "epoch": 2.69, "learning_rate": 0.00016250200996944847, "loss": 0.6055, "step": 1857 }, { "epoch": 2.69, "learning_rate": 0.00016246985045827302, "loss": 0.2944, "step": 1858 }, { "epoch": 2.69, "learning_rate": 0.0001624376909470976, "loss": 0.0911, "step": 1859 }, { "epoch": 2.69, "learning_rate": 0.0001624055314359222, "loss": 0.048, "step": 1860 }, { "epoch": 2.69, "learning_rate": 0.00016237337192474675, "loss": 0.2705, "step": 1861 }, { "epoch": 2.69, "learning_rate": 0.0001623412124135713, "loss": 0.2197, "step": 1862 }, { "epoch": 2.69, "learning_rate": 0.0001623090529023959, "loss": 0.5763, "step": 1863 }, { "epoch": 2.7, "learning_rate": 0.00016227689339122045, "loss": 0.207, "step": 1864 }, { "epoch": 2.7, "learning_rate": 0.00016224473388004503, "loss": 0.3257, "step": 1865 }, { "epoch": 2.7, "learning_rate": 0.00016221257436886962, "loss": 0.0339, "step": 1866 }, { "epoch": 2.7, "learning_rate": 0.00016218041485769417, "loss": 0.293, "step": 1867 }, { "epoch": 2.7, "learning_rate": 0.00016214825534651873, "loss": 0.3452, "step": 1868 }, { "epoch": 2.7, "learning_rate": 0.00016211609583534331, "loss": 0.4609, "step": 1869 }, { "epoch": 2.7, "learning_rate": 0.00016208393632416787, "loss": 0.2551, "step": 1870 }, { "epoch": 2.71, "learning_rate": 0.00016205177681299243, "loss": 0.1295, "step": 1871 }, { "epoch": 2.71, "learning_rate": 0.00016201961730181704, "loss": 0.0521, "step": 1872 }, { "epoch": 2.71, "learning_rate": 0.0001619874577906416, "loss": 0.1946, "step": 1873 }, { "epoch": 2.71, "learning_rate": 0.00016195529827946615, "loss": 0.0767, "step": 1874 }, { "epoch": 2.71, "learning_rate": 0.00016192313876829074, "loss": 0.2735, "step": 1875 }, { "epoch": 2.71, "learning_rate": 0.0001618909792571153, "loss": 0.2793, "step": 1876 }, { "epoch": 2.71, "learning_rate": 0.00016185881974593985, "loss": 0.1774, "step": 1877 }, { "epoch": 2.72, "learning_rate": 0.00016182666023476446, "loss": 0.2725, "step": 1878 }, { "epoch": 2.72, "learning_rate": 0.00016179450072358902, "loss": 0.0097, "step": 1879 }, { "epoch": 2.72, "learning_rate": 0.00016176234121241358, "loss": 0.1431, "step": 1880 }, { "epoch": 2.72, "learning_rate": 0.00016173018170123816, "loss": 0.3501, "step": 1881 }, { "epoch": 2.72, "learning_rate": 0.00016169802219006272, "loss": 0.3003, "step": 1882 }, { "epoch": 2.72, "learning_rate": 0.00016166586267888727, "loss": 0.2848, "step": 1883 }, { "epoch": 2.72, "learning_rate": 0.00016163370316771186, "loss": 0.0099, "step": 1884 }, { "epoch": 2.73, "learning_rate": 0.00016160154365653644, "loss": 0.122, "step": 1885 }, { "epoch": 2.73, "learning_rate": 0.000161569384145361, "loss": 0.4888, "step": 1886 }, { "epoch": 2.73, "learning_rate": 0.00016153722463418556, "loss": 0.0296, "step": 1887 }, { "epoch": 2.73, "learning_rate": 0.00016150506512301014, "loss": 0.3306, "step": 1888 }, { "epoch": 2.73, "learning_rate": 0.0001614729056118347, "loss": 0.3263, "step": 1889 }, { "epoch": 2.73, "learning_rate": 0.00016144074610065928, "loss": 0.1064, "step": 1890 }, { "epoch": 2.73, "learning_rate": 0.00016140858658948387, "loss": 0.4111, "step": 1891 }, { "epoch": 2.74, "learning_rate": 0.00016137642707830842, "loss": 0.0181, "step": 1892 }, { "epoch": 2.74, "learning_rate": 0.00016134426756713298, "loss": 0.2513, "step": 1893 }, { "epoch": 2.74, "learning_rate": 0.00016131210805595756, "loss": 0.3185, "step": 1894 }, { "epoch": 2.74, "learning_rate": 0.00016127994854478212, "loss": 0.1161, "step": 1895 }, { "epoch": 2.74, "learning_rate": 0.00016124778903360668, "loss": 0.1246, "step": 1896 }, { "epoch": 2.74, "learning_rate": 0.0001612156295224313, "loss": 0.0745, "step": 1897 }, { "epoch": 2.74, "learning_rate": 0.00016118347001125585, "loss": 0.5488, "step": 1898 }, { "epoch": 2.75, "learning_rate": 0.0001611513105000804, "loss": 0.0399, "step": 1899 }, { "epoch": 2.75, "learning_rate": 0.00016111915098890499, "loss": 0.3871, "step": 1900 }, { "epoch": 2.75, "learning_rate": 0.00016108699147772954, "loss": 0.6504, "step": 1901 }, { "epoch": 2.75, "learning_rate": 0.0001610548319665541, "loss": 0.0786, "step": 1902 }, { "epoch": 2.75, "learning_rate": 0.00016102267245537868, "loss": 0.0549, "step": 1903 }, { "epoch": 2.75, "learning_rate": 0.00016099051294420327, "loss": 0.3979, "step": 1904 }, { "epoch": 2.75, "learning_rate": 0.00016095835343302783, "loss": 0.7207, "step": 1905 }, { "epoch": 2.76, "learning_rate": 0.0001609261939218524, "loss": 0.061, "step": 1906 }, { "epoch": 2.76, "learning_rate": 0.00016089403441067697, "loss": 0.2457, "step": 1907 }, { "epoch": 2.76, "learning_rate": 0.00016086187489950152, "loss": 0.21, "step": 1908 }, { "epoch": 2.76, "learning_rate": 0.0001608297153883261, "loss": 0.1514, "step": 1909 }, { "epoch": 2.76, "learning_rate": 0.0001607975558771507, "loss": 0.3848, "step": 1910 }, { "epoch": 2.76, "learning_rate": 0.00016076539636597525, "loss": 0.2281, "step": 1911 }, { "epoch": 2.77, "learning_rate": 0.0001607332368547998, "loss": 0.3379, "step": 1912 }, { "epoch": 2.77, "learning_rate": 0.0001607010773436244, "loss": 0.2444, "step": 1913 }, { "epoch": 2.77, "learning_rate": 0.00016066891783244895, "loss": 0.3932, "step": 1914 }, { "epoch": 2.77, "learning_rate": 0.00016063675832127353, "loss": 0.0188, "step": 1915 }, { "epoch": 2.77, "learning_rate": 0.0001606045988100981, "loss": 0.1398, "step": 1916 }, { "epoch": 2.77, "learning_rate": 0.00016057243929892267, "loss": 0.377, "step": 1917 }, { "epoch": 2.77, "learning_rate": 0.00016054027978774723, "loss": 0.074, "step": 1918 }, { "epoch": 2.78, "learning_rate": 0.0001605081202765718, "loss": 0.1083, "step": 1919 }, { "epoch": 2.78, "learning_rate": 0.00016047596076539637, "loss": 0.2512, "step": 1920 }, { "epoch": 2.78, "learning_rate": 0.00016044380125422093, "loss": 0.1392, "step": 1921 }, { "epoch": 2.78, "learning_rate": 0.0001604116417430455, "loss": 0.0454, "step": 1922 }, { "epoch": 2.78, "learning_rate": 0.0001603794822318701, "loss": 0.0328, "step": 1923 }, { "epoch": 2.78, "learning_rate": 0.00016034732272069465, "loss": 0.1243, "step": 1924 }, { "epoch": 2.78, "learning_rate": 0.00016031516320951924, "loss": 0.5527, "step": 1925 }, { "epoch": 2.79, "learning_rate": 0.0001602830036983438, "loss": 0.4385, "step": 1926 }, { "epoch": 2.79, "learning_rate": 0.00016025084418716835, "loss": 0.0394, "step": 1927 }, { "epoch": 2.79, "learning_rate": 0.00016021868467599293, "loss": 0.2795, "step": 1928 }, { "epoch": 2.79, "learning_rate": 0.0001601865251648175, "loss": 0.3065, "step": 1929 }, { "epoch": 2.79, "learning_rate": 0.00016015436565364207, "loss": 0.0449, "step": 1930 }, { "epoch": 2.79, "learning_rate": 0.00016012220614246666, "loss": 0.0168, "step": 1931 }, { "epoch": 2.79, "learning_rate": 0.00016009004663129122, "loss": 0.0139, "step": 1932 }, { "epoch": 2.8, "learning_rate": 0.00016005788712011577, "loss": 0.013, "step": 1933 }, { "epoch": 2.8, "learning_rate": 0.00016002572760894036, "loss": 0.7988, "step": 1934 }, { "epoch": 2.8, "learning_rate": 0.00015999356809776491, "loss": 0.2734, "step": 1935 }, { "epoch": 2.8, "learning_rate": 0.0001599614085865895, "loss": 0.1261, "step": 1936 }, { "epoch": 2.8, "learning_rate": 0.00015992924907541405, "loss": 0.019, "step": 1937 }, { "epoch": 2.8, "learning_rate": 0.00015989708956423864, "loss": 0.0402, "step": 1938 }, { "epoch": 2.8, "learning_rate": 0.0001598649300530632, "loss": 0.3096, "step": 1939 }, { "epoch": 2.81, "learning_rate": 0.00015983277054188778, "loss": 0.0116, "step": 1940 }, { "epoch": 2.81, "learning_rate": 0.00015980061103071234, "loss": 0.0453, "step": 1941 }, { "epoch": 2.81, "learning_rate": 0.00015976845151953692, "loss": 0.2426, "step": 1942 }, { "epoch": 2.81, "learning_rate": 0.00015973629200836148, "loss": 0.0579, "step": 1943 }, { "epoch": 2.81, "learning_rate": 0.00015970413249718606, "loss": 0.2295, "step": 1944 }, { "epoch": 2.81, "learning_rate": 0.00015967197298601062, "loss": 0.2825, "step": 1945 }, { "epoch": 2.81, "learning_rate": 0.00015963981347483518, "loss": 0.4873, "step": 1946 }, { "epoch": 2.82, "learning_rate": 0.00015960765396365976, "loss": 0.5137, "step": 1947 }, { "epoch": 2.82, "learning_rate": 0.00015957549445248432, "loss": 0.2691, "step": 1948 }, { "epoch": 2.82, "learning_rate": 0.0001595433349413089, "loss": 0.0272, "step": 1949 }, { "epoch": 2.82, "learning_rate": 0.00015951117543013348, "loss": 0.113, "step": 1950 }, { "epoch": 2.82, "learning_rate": 0.00015947901591895804, "loss": 0.2876, "step": 1951 }, { "epoch": 2.82, "learning_rate": 0.0001594468564077826, "loss": 0.3877, "step": 1952 }, { "epoch": 2.82, "learning_rate": 0.00015941469689660718, "loss": 0.3262, "step": 1953 }, { "epoch": 2.83, "learning_rate": 0.00015938253738543174, "loss": 0.2032, "step": 1954 }, { "epoch": 2.83, "learning_rate": 0.00015935037787425632, "loss": 0.2339, "step": 1955 }, { "epoch": 2.83, "learning_rate": 0.0001593182183630809, "loss": 0.3164, "step": 1956 }, { "epoch": 2.83, "learning_rate": 0.00015928605885190546, "loss": 0.1805, "step": 1957 }, { "epoch": 2.83, "learning_rate": 0.00015925389934073002, "loss": 0.0377, "step": 1958 }, { "epoch": 2.83, "learning_rate": 0.0001592217398295546, "loss": 0.1407, "step": 1959 }, { "epoch": 2.83, "learning_rate": 0.00015918958031837916, "loss": 0.1158, "step": 1960 }, { "epoch": 2.84, "learning_rate": 0.00015915742080720372, "loss": 0.3525, "step": 1961 }, { "epoch": 2.84, "learning_rate": 0.0001591252612960283, "loss": 0.1221, "step": 1962 }, { "epoch": 2.84, "learning_rate": 0.0001590931017848529, "loss": 0.0285, "step": 1963 }, { "epoch": 2.84, "learning_rate": 0.00015906094227367744, "loss": 0.0173, "step": 1964 }, { "epoch": 2.84, "learning_rate": 0.00015902878276250203, "loss": 0.8516, "step": 1965 }, { "epoch": 2.84, "learning_rate": 0.00015899662325132659, "loss": 0.0987, "step": 1966 }, { "epoch": 2.84, "learning_rate": 0.00015896446374015114, "loss": 0.02, "step": 1967 }, { "epoch": 2.85, "learning_rate": 0.00015893230422897573, "loss": 0.1711, "step": 1968 }, { "epoch": 2.85, "learning_rate": 0.0001589001447178003, "loss": 0.3748, "step": 1969 }, { "epoch": 2.85, "learning_rate": 0.00015886798520662487, "loss": 0.5713, "step": 1970 }, { "epoch": 2.85, "learning_rate": 0.00015883582569544942, "loss": 0.1522, "step": 1971 }, { "epoch": 2.85, "learning_rate": 0.000158803666184274, "loss": 0.2998, "step": 1972 }, { "epoch": 2.85, "learning_rate": 0.00015877150667309857, "loss": 0.1785, "step": 1973 }, { "epoch": 2.85, "learning_rate": 0.00015873934716192312, "loss": 0.0141, "step": 1974 }, { "epoch": 2.86, "learning_rate": 0.00015870718765074773, "loss": 0.4346, "step": 1975 }, { "epoch": 2.86, "learning_rate": 0.0001586750281395723, "loss": 0.0159, "step": 1976 }, { "epoch": 2.86, "learning_rate": 0.00015864286862839685, "loss": 0.0084, "step": 1977 }, { "epoch": 2.86, "learning_rate": 0.00015861070911722143, "loss": 0.4219, "step": 1978 }, { "epoch": 2.86, "learning_rate": 0.000158578549606046, "loss": 0.1133, "step": 1979 }, { "epoch": 2.86, "learning_rate": 0.00015854639009487055, "loss": 0.0104, "step": 1980 }, { "epoch": 2.86, "learning_rate": 0.00015851423058369516, "loss": 0.0461, "step": 1981 }, { "epoch": 2.87, "learning_rate": 0.00015848207107251971, "loss": 0.3641, "step": 1982 }, { "epoch": 2.87, "learning_rate": 0.00015844991156134427, "loss": 0.194, "step": 1983 }, { "epoch": 2.87, "learning_rate": 0.00015841775205016886, "loss": 0.0982, "step": 1984 }, { "epoch": 2.87, "learning_rate": 0.0001583855925389934, "loss": 0.5241, "step": 1985 }, { "epoch": 2.87, "learning_rate": 0.00015835343302781797, "loss": 0.0928, "step": 1986 }, { "epoch": 2.87, "learning_rate": 0.00015832127351664255, "loss": 0.0137, "step": 1987 }, { "epoch": 2.87, "learning_rate": 0.00015828911400546714, "loss": 0.2435, "step": 1988 }, { "epoch": 2.88, "learning_rate": 0.0001582569544942917, "loss": 0.037, "step": 1989 }, { "epoch": 2.88, "learning_rate": 0.00015822479498311625, "loss": 0.216, "step": 1990 }, { "epoch": 2.88, "learning_rate": 0.00015819263547194084, "loss": 0.4902, "step": 1991 }, { "epoch": 2.88, "learning_rate": 0.0001581604759607654, "loss": 0.0137, "step": 1992 }, { "epoch": 2.88, "learning_rate": 0.00015812831644958998, "loss": 0.026, "step": 1993 }, { "epoch": 2.88, "learning_rate": 0.00015809615693841456, "loss": 0.0464, "step": 1994 }, { "epoch": 2.89, "learning_rate": 0.00015806399742723912, "loss": 0.0305, "step": 1995 }, { "epoch": 2.89, "learning_rate": 0.00015803183791606367, "loss": 0.2808, "step": 1996 }, { "epoch": 2.89, "learning_rate": 0.00015799967840488826, "loss": 0.3354, "step": 1997 }, { "epoch": 2.89, "learning_rate": 0.00015796751889371282, "loss": 0.0187, "step": 1998 }, { "epoch": 2.89, "learning_rate": 0.00015793535938253737, "loss": 0.0598, "step": 1999 }, { "epoch": 2.89, "learning_rate": 0.00015790319987136198, "loss": 0.1438, "step": 2000 }, { "epoch": 2.89, "learning_rate": 0.00015787104036018654, "loss": 0.0119, "step": 2001 }, { "epoch": 2.9, "learning_rate": 0.0001578388808490111, "loss": 0.017, "step": 2002 }, { "epoch": 2.9, "learning_rate": 0.00015780672133783568, "loss": 0.3076, "step": 2003 }, { "epoch": 2.9, "learning_rate": 0.00015777456182666024, "loss": 0.0094, "step": 2004 }, { "epoch": 2.9, "learning_rate": 0.0001577424023154848, "loss": 0.3273, "step": 2005 }, { "epoch": 2.9, "learning_rate": 0.00015771024280430938, "loss": 0.4968, "step": 2006 }, { "epoch": 2.9, "learning_rate": 0.00015767808329313396, "loss": 0.2585, "step": 2007 }, { "epoch": 2.9, "learning_rate": 0.00015764592378195852, "loss": 0.1998, "step": 2008 }, { "epoch": 2.91, "learning_rate": 0.0001576137642707831, "loss": 0.0549, "step": 2009 }, { "epoch": 2.91, "learning_rate": 0.00015758160475960766, "loss": 0.0504, "step": 2010 }, { "epoch": 2.91, "learning_rate": 0.00015754944524843222, "loss": 0.0888, "step": 2011 }, { "epoch": 2.91, "learning_rate": 0.0001575172857372568, "loss": 0.2949, "step": 2012 }, { "epoch": 2.91, "learning_rate": 0.0001574851262260814, "loss": 0.2532, "step": 2013 }, { "epoch": 2.91, "learning_rate": 0.00015745296671490594, "loss": 0.2325, "step": 2014 }, { "epoch": 2.91, "learning_rate": 0.0001574208072037305, "loss": 0.0394, "step": 2015 }, { "epoch": 2.92, "learning_rate": 0.00015738864769255508, "loss": 0.252, "step": 2016 }, { "epoch": 2.92, "learning_rate": 0.00015735648818137964, "loss": 0.1149, "step": 2017 }, { "epoch": 2.92, "learning_rate": 0.00015732432867020423, "loss": 0.0218, "step": 2018 }, { "epoch": 2.92, "learning_rate": 0.00015729216915902878, "loss": 0.4497, "step": 2019 }, { "epoch": 2.92, "learning_rate": 0.00015726000964785337, "loss": 0.2221, "step": 2020 }, { "epoch": 2.92, "learning_rate": 0.00015722785013667792, "loss": 0.1936, "step": 2021 }, { "epoch": 2.92, "learning_rate": 0.0001571956906255025, "loss": 0.065, "step": 2022 }, { "epoch": 2.93, "learning_rate": 0.00015716353111432706, "loss": 0.1867, "step": 2023 }, { "epoch": 2.93, "learning_rate": 0.00015713137160315162, "loss": 0.1152, "step": 2024 }, { "epoch": 2.93, "learning_rate": 0.0001570992120919762, "loss": 0.0612, "step": 2025 }, { "epoch": 2.93, "learning_rate": 0.0001570670525808008, "loss": 0.0903, "step": 2026 }, { "epoch": 2.93, "learning_rate": 0.00015703489306962535, "loss": 0.0605, "step": 2027 }, { "epoch": 2.93, "learning_rate": 0.00015700273355844993, "loss": 0.0685, "step": 2028 }, { "epoch": 2.93, "learning_rate": 0.0001569705740472745, "loss": 0.2529, "step": 2029 }, { "epoch": 2.94, "learning_rate": 0.00015693841453609904, "loss": 0.3046, "step": 2030 }, { "epoch": 2.94, "learning_rate": 0.00015690625502492363, "loss": 0.1602, "step": 2031 }, { "epoch": 2.94, "learning_rate": 0.0001568740955137482, "loss": 0.4082, "step": 2032 }, { "epoch": 2.94, "learning_rate": 0.00015684193600257277, "loss": 0.0274, "step": 2033 }, { "epoch": 2.94, "learning_rate": 0.00015680977649139735, "loss": 0.0233, "step": 2034 }, { "epoch": 2.94, "learning_rate": 0.0001567776169802219, "loss": 0.4481, "step": 2035 }, { "epoch": 2.94, "learning_rate": 0.00015674545746904647, "loss": 0.3701, "step": 2036 }, { "epoch": 2.95, "learning_rate": 0.00015671329795787105, "loss": 0.0446, "step": 2037 }, { "epoch": 2.95, "learning_rate": 0.0001566811384466956, "loss": 0.5459, "step": 2038 }, { "epoch": 2.95, "learning_rate": 0.0001566489789355202, "loss": 0.4189, "step": 2039 }, { "epoch": 2.95, "learning_rate": 0.00015661681942434475, "loss": 0.1179, "step": 2040 }, { "epoch": 2.95, "learning_rate": 0.00015658465991316933, "loss": 0.205, "step": 2041 }, { "epoch": 2.95, "learning_rate": 0.0001565525004019939, "loss": 0.2819, "step": 2042 }, { "epoch": 2.95, "learning_rate": 0.00015652034089081847, "loss": 0.2593, "step": 2043 }, { "epoch": 2.96, "learning_rate": 0.00015648818137964303, "loss": 0.5488, "step": 2044 }, { "epoch": 2.96, "learning_rate": 0.00015645602186846762, "loss": 0.2734, "step": 2045 }, { "epoch": 2.96, "learning_rate": 0.00015642386235729217, "loss": 0.1224, "step": 2046 }, { "epoch": 2.96, "learning_rate": 0.00015639170284611676, "loss": 0.5303, "step": 2047 }, { "epoch": 2.96, "learning_rate": 0.00015635954333494131, "loss": 0.2375, "step": 2048 }, { "epoch": 2.96, "learning_rate": 0.00015632738382376587, "loss": 0.2517, "step": 2049 }, { "epoch": 2.96, "learning_rate": 0.00015629522431259045, "loss": 0.2017, "step": 2050 }, { "epoch": 2.97, "learning_rate": 0.000156263064801415, "loss": 0.2212, "step": 2051 }, { "epoch": 2.97, "learning_rate": 0.0001562309052902396, "loss": 0.3268, "step": 2052 }, { "epoch": 2.97, "learning_rate": 0.00015619874577906418, "loss": 0.0243, "step": 2053 }, { "epoch": 2.97, "learning_rate": 0.00015616658626788874, "loss": 0.3549, "step": 2054 }, { "epoch": 2.97, "learning_rate": 0.0001561344267567133, "loss": 0.1644, "step": 2055 }, { "epoch": 2.97, "learning_rate": 0.00015610226724553788, "loss": 0.4922, "step": 2056 }, { "epoch": 2.97, "learning_rate": 0.00015607010773436243, "loss": 0.583, "step": 2057 }, { "epoch": 2.98, "learning_rate": 0.00015603794822318702, "loss": 0.2036, "step": 2058 }, { "epoch": 2.98, "learning_rate": 0.0001560057887120116, "loss": 0.2704, "step": 2059 }, { "epoch": 2.98, "learning_rate": 0.00015597362920083616, "loss": 0.0529, "step": 2060 }, { "epoch": 2.98, "learning_rate": 0.00015594146968966072, "loss": 0.4307, "step": 2061 }, { "epoch": 2.98, "learning_rate": 0.0001559093101784853, "loss": 0.0286, "step": 2062 }, { "epoch": 2.98, "learning_rate": 0.00015587715066730986, "loss": 0.3091, "step": 2063 }, { "epoch": 2.98, "learning_rate": 0.00015584499115613441, "loss": 0.2855, "step": 2064 }, { "epoch": 2.99, "learning_rate": 0.000155812831644959, "loss": 0.5498, "step": 2065 }, { "epoch": 2.99, "learning_rate": 0.00015578067213378358, "loss": 0.0745, "step": 2066 }, { "epoch": 2.99, "learning_rate": 0.00015574851262260814, "loss": 0.015, "step": 2067 }, { "epoch": 2.99, "learning_rate": 0.00015571635311143272, "loss": 0.2207, "step": 2068 }, { "epoch": 2.99, "learning_rate": 0.00015568419360025728, "loss": 0.0756, "step": 2069 }, { "epoch": 2.99, "learning_rate": 0.00015565203408908184, "loss": 0.157, "step": 2070 }, { "epoch": 2.99, "learning_rate": 0.00015561987457790642, "loss": 0.0165, "step": 2071 }, { "epoch": 3.0, "learning_rate": 0.000155587715066731, "loss": 0.3521, "step": 2072 }, { "epoch": 3.0, "learning_rate": 0.00015555555555555556, "loss": 0.0047, "step": 2073 }, { "epoch": 3.0, "learning_rate": 0.00015552339604438012, "loss": 0.6113, "step": 2074 }, { "epoch": 3.0, "learning_rate": 0.0001554912365332047, "loss": 0.02, "step": 2075 }, { "epoch": 3.0, "learning_rate": 0.00015545907702202926, "loss": 0.3555, "step": 2076 }, { "epoch": 3.0, "learning_rate": 0.00015542691751085385, "loss": 0.1415, "step": 2077 }, { "epoch": 3.01, "learning_rate": 0.00015539475799967843, "loss": 0.183, "step": 2078 }, { "epoch": 3.01, "learning_rate": 0.00015536259848850299, "loss": 0.12, "step": 2079 }, { "epoch": 3.01, "learning_rate": 0.00015533043897732754, "loss": 0.0127, "step": 2080 }, { "epoch": 3.01, "learning_rate": 0.00015529827946615213, "loss": 0.0058, "step": 2081 }, { "epoch": 3.01, "learning_rate": 0.00015526611995497668, "loss": 0.1831, "step": 2082 }, { "epoch": 3.01, "learning_rate": 0.00015523396044380124, "loss": 0.2087, "step": 2083 }, { "epoch": 3.01, "learning_rate": 0.00015520180093262585, "loss": 0.0527, "step": 2084 }, { "epoch": 3.02, "learning_rate": 0.0001551696414214504, "loss": 0.0683, "step": 2085 }, { "epoch": 3.02, "learning_rate": 0.00015513748191027497, "loss": 0.0721, "step": 2086 }, { "epoch": 3.02, "learning_rate": 0.00015510532239909955, "loss": 0.1737, "step": 2087 }, { "epoch": 3.02, "learning_rate": 0.0001550731628879241, "loss": 0.061, "step": 2088 }, { "epoch": 3.02, "learning_rate": 0.00015504100337674866, "loss": 0.0925, "step": 2089 }, { "epoch": 3.02, "learning_rate": 0.00015500884386557325, "loss": 0.0186, "step": 2090 }, { "epoch": 3.02, "learning_rate": 0.00015497668435439783, "loss": 0.1965, "step": 2091 }, { "epoch": 3.03, "learning_rate": 0.0001549445248432224, "loss": 0.0649, "step": 2092 }, { "epoch": 3.03, "learning_rate": 0.00015491236533204697, "loss": 0.0434, "step": 2093 }, { "epoch": 3.03, "learning_rate": 0.00015488020582087153, "loss": 0.0138, "step": 2094 }, { "epoch": 3.03, "learning_rate": 0.0001548480463096961, "loss": 0.1797, "step": 2095 }, { "epoch": 3.03, "learning_rate": 0.00015481588679852067, "loss": 0.1164, "step": 2096 }, { "epoch": 3.03, "learning_rate": 0.00015478372728734526, "loss": 0.0092, "step": 2097 }, { "epoch": 3.03, "learning_rate": 0.0001547515677761698, "loss": 0.0776, "step": 2098 }, { "epoch": 3.04, "learning_rate": 0.00015471940826499437, "loss": 0.1714, "step": 2099 }, { "epoch": 3.04, "learning_rate": 0.00015468724875381895, "loss": 0.1658, "step": 2100 }, { "epoch": 3.04, "learning_rate": 0.0001546550892426435, "loss": 0.0601, "step": 2101 }, { "epoch": 3.04, "learning_rate": 0.00015462292973146807, "loss": 0.0217, "step": 2102 }, { "epoch": 3.04, "learning_rate": 0.00015459077022029268, "loss": 0.0279, "step": 2103 }, { "epoch": 3.04, "learning_rate": 0.00015455861070911724, "loss": 0.124, "step": 2104 }, { "epoch": 3.04, "learning_rate": 0.0001545264511979418, "loss": 0.0349, "step": 2105 }, { "epoch": 3.05, "learning_rate": 0.00015449429168676638, "loss": 0.2842, "step": 2106 }, { "epoch": 3.05, "learning_rate": 0.00015446213217559093, "loss": 0.085, "step": 2107 }, { "epoch": 3.05, "learning_rate": 0.0001544299726644155, "loss": 0.251, "step": 2108 }, { "epoch": 3.05, "learning_rate": 0.00015439781315324007, "loss": 0.0368, "step": 2109 }, { "epoch": 3.05, "learning_rate": 0.00015436565364206466, "loss": 0.271, "step": 2110 }, { "epoch": 3.05, "learning_rate": 0.00015433349413088922, "loss": 0.0139, "step": 2111 }, { "epoch": 3.05, "learning_rate": 0.0001543013346197138, "loss": 0.1753, "step": 2112 }, { "epoch": 3.06, "learning_rate": 0.00015426917510853836, "loss": 0.1248, "step": 2113 }, { "epoch": 3.06, "learning_rate": 0.0001542370155973629, "loss": 0.1971, "step": 2114 }, { "epoch": 3.06, "learning_rate": 0.0001542048560861875, "loss": 0.2129, "step": 2115 }, { "epoch": 3.06, "learning_rate": 0.00015417269657501208, "loss": 0.0497, "step": 2116 }, { "epoch": 3.06, "learning_rate": 0.00015414053706383664, "loss": 0.1599, "step": 2117 }, { "epoch": 3.06, "learning_rate": 0.00015410837755266122, "loss": 0.2656, "step": 2118 }, { "epoch": 3.06, "learning_rate": 0.00015407621804148578, "loss": 0.025, "step": 2119 }, { "epoch": 3.07, "learning_rate": 0.00015404405853031034, "loss": 0.1426, "step": 2120 }, { "epoch": 3.07, "learning_rate": 0.00015401189901913492, "loss": 0.012, "step": 2121 }, { "epoch": 3.07, "learning_rate": 0.0001539797395079595, "loss": 0.097, "step": 2122 }, { "epoch": 3.07, "learning_rate": 0.00015394757999678406, "loss": 0.0129, "step": 2123 }, { "epoch": 3.07, "learning_rate": 0.00015391542048560862, "loss": 0.0309, "step": 2124 }, { "epoch": 3.07, "learning_rate": 0.0001538832609744332, "loss": 0.0213, "step": 2125 }, { "epoch": 3.07, "learning_rate": 0.00015385110146325776, "loss": 0.0036, "step": 2126 }, { "epoch": 3.08, "learning_rate": 0.00015381894195208232, "loss": 0.2964, "step": 2127 }, { "epoch": 3.08, "learning_rate": 0.0001537867824409069, "loss": 0.2764, "step": 2128 }, { "epoch": 3.08, "learning_rate": 0.00015375462292973148, "loss": 0.0239, "step": 2129 }, { "epoch": 3.08, "learning_rate": 0.00015372246341855604, "loss": 0.0202, "step": 2130 }, { "epoch": 3.08, "learning_rate": 0.00015369030390738063, "loss": 0.0266, "step": 2131 }, { "epoch": 3.08, "learning_rate": 0.00015365814439620518, "loss": 0.1533, "step": 2132 }, { "epoch": 3.08, "learning_rate": 0.00015362598488502974, "loss": 0.2759, "step": 2133 }, { "epoch": 3.09, "learning_rate": 0.00015359382537385432, "loss": 0.2373, "step": 2134 }, { "epoch": 3.09, "learning_rate": 0.0001535616658626789, "loss": 0.0333, "step": 2135 }, { "epoch": 3.09, "learning_rate": 0.00015352950635150346, "loss": 0.0421, "step": 2136 }, { "epoch": 3.09, "learning_rate": 0.00015349734684032805, "loss": 0.097, "step": 2137 }, { "epoch": 3.09, "learning_rate": 0.0001534651873291526, "loss": 0.0835, "step": 2138 }, { "epoch": 3.09, "learning_rate": 0.00015343302781797716, "loss": 0.1277, "step": 2139 }, { "epoch": 3.09, "learning_rate": 0.00015340086830680175, "loss": 0.334, "step": 2140 }, { "epoch": 3.1, "learning_rate": 0.0001533687087956263, "loss": 0.2192, "step": 2141 }, { "epoch": 3.1, "learning_rate": 0.0001533365492844509, "loss": 0.056, "step": 2142 }, { "epoch": 3.1, "learning_rate": 0.00015330438977327547, "loss": 0.073, "step": 2143 }, { "epoch": 3.1, "learning_rate": 0.00015327223026210003, "loss": 0.1313, "step": 2144 }, { "epoch": 3.1, "learning_rate": 0.00015324007075092459, "loss": 0.102, "step": 2145 }, { "epoch": 3.1, "learning_rate": 0.00015320791123974917, "loss": 0.1021, "step": 2146 }, { "epoch": 3.1, "learning_rate": 0.00015317575172857373, "loss": 0.0301, "step": 2147 }, { "epoch": 3.11, "learning_rate": 0.0001531435922173983, "loss": 0.1536, "step": 2148 }, { "epoch": 3.11, "learning_rate": 0.00015311143270622287, "loss": 0.0132, "step": 2149 }, { "epoch": 3.11, "learning_rate": 0.00015307927319504745, "loss": 0.319, "step": 2150 }, { "epoch": 3.11, "learning_rate": 0.000153047113683872, "loss": 0.1758, "step": 2151 }, { "epoch": 3.11, "learning_rate": 0.00015301495417269657, "loss": 0.1602, "step": 2152 }, { "epoch": 3.11, "learning_rate": 0.00015298279466152115, "loss": 0.0742, "step": 2153 }, { "epoch": 3.11, "learning_rate": 0.0001529506351503457, "loss": 0.0083, "step": 2154 }, { "epoch": 3.12, "learning_rate": 0.0001529184756391703, "loss": 0.0659, "step": 2155 }, { "epoch": 3.12, "learning_rate": 0.00015288631612799487, "loss": 0.1758, "step": 2156 }, { "epoch": 3.12, "learning_rate": 0.00015285415661681943, "loss": 0.1928, "step": 2157 }, { "epoch": 3.12, "learning_rate": 0.000152821997105644, "loss": 0.0133, "step": 2158 }, { "epoch": 3.12, "learning_rate": 0.00015278983759446857, "loss": 0.1567, "step": 2159 }, { "epoch": 3.12, "learning_rate": 0.00015275767808329313, "loss": 0.2162, "step": 2160 }, { "epoch": 3.13, "learning_rate": 0.00015272551857211771, "loss": 0.252, "step": 2161 }, { "epoch": 3.13, "learning_rate": 0.0001526933590609423, "loss": 0.0325, "step": 2162 }, { "epoch": 3.13, "learning_rate": 0.00015266119954976685, "loss": 0.016, "step": 2163 }, { "epoch": 3.13, "learning_rate": 0.0001526290400385914, "loss": 0.4502, "step": 2164 }, { "epoch": 3.13, "learning_rate": 0.000152596880527416, "loss": 0.0421, "step": 2165 }, { "epoch": 3.13, "learning_rate": 0.00015256472101624055, "loss": 0.112, "step": 2166 }, { "epoch": 3.13, "learning_rate": 0.00015253256150506514, "loss": 0.0369, "step": 2167 }, { "epoch": 3.14, "learning_rate": 0.00015250040199388972, "loss": 0.3315, "step": 2168 }, { "epoch": 3.14, "learning_rate": 0.00015246824248271428, "loss": 0.0422, "step": 2169 }, { "epoch": 3.14, "learning_rate": 0.00015243608297153883, "loss": 0.0102, "step": 2170 }, { "epoch": 3.14, "learning_rate": 0.00015240392346036342, "loss": 0.007, "step": 2171 }, { "epoch": 3.14, "learning_rate": 0.00015237176394918798, "loss": 0.1807, "step": 2172 }, { "epoch": 3.14, "learning_rate": 0.00015233960443801253, "loss": 0.2046, "step": 2173 }, { "epoch": 3.14, "learning_rate": 0.00015230744492683712, "loss": 0.0137, "step": 2174 }, { "epoch": 3.15, "learning_rate": 0.0001522752854156617, "loss": 0.2129, "step": 2175 }, { "epoch": 3.15, "learning_rate": 0.00015224312590448626, "loss": 0.0042, "step": 2176 }, { "epoch": 3.15, "learning_rate": 0.00015221096639331081, "loss": 0.1875, "step": 2177 }, { "epoch": 3.15, "learning_rate": 0.0001521788068821354, "loss": 0.1357, "step": 2178 }, { "epoch": 3.15, "learning_rate": 0.00015214664737095996, "loss": 0.0496, "step": 2179 }, { "epoch": 3.15, "learning_rate": 0.00015211448785978454, "loss": 0.2524, "step": 2180 }, { "epoch": 3.15, "learning_rate": 0.00015208232834860912, "loss": 0.3174, "step": 2181 }, { "epoch": 3.16, "learning_rate": 0.00015205016883743368, "loss": 0.2973, "step": 2182 }, { "epoch": 3.16, "learning_rate": 0.00015201800932625824, "loss": 0.2039, "step": 2183 }, { "epoch": 3.16, "learning_rate": 0.00015198584981508282, "loss": 0.0242, "step": 2184 }, { "epoch": 3.16, "learning_rate": 0.00015195369030390738, "loss": 0.1917, "step": 2185 }, { "epoch": 3.16, "learning_rate": 0.00015192153079273194, "loss": 0.0186, "step": 2186 }, { "epoch": 3.16, "learning_rate": 0.00015188937128155655, "loss": 0.0708, "step": 2187 }, { "epoch": 3.16, "learning_rate": 0.0001518572117703811, "loss": 0.0344, "step": 2188 }, { "epoch": 3.17, "learning_rate": 0.00015182505225920566, "loss": 0.0535, "step": 2189 }, { "epoch": 3.17, "learning_rate": 0.00015179289274803025, "loss": 0.1399, "step": 2190 }, { "epoch": 3.17, "learning_rate": 0.0001517607332368548, "loss": 0.0433, "step": 2191 }, { "epoch": 3.17, "learning_rate": 0.00015172857372567936, "loss": 0.2455, "step": 2192 }, { "epoch": 3.17, "learning_rate": 0.00015169641421450397, "loss": 0.259, "step": 2193 }, { "epoch": 3.17, "learning_rate": 0.00015166425470332853, "loss": 0.0286, "step": 2194 }, { "epoch": 3.17, "learning_rate": 0.00015163209519215308, "loss": 0.1496, "step": 2195 }, { "epoch": 3.18, "learning_rate": 0.00015159993568097767, "loss": 0.1963, "step": 2196 }, { "epoch": 3.18, "learning_rate": 0.00015156777616980223, "loss": 0.0679, "step": 2197 }, { "epoch": 3.18, "learning_rate": 0.00015153561665862678, "loss": 0.0514, "step": 2198 }, { "epoch": 3.18, "learning_rate": 0.00015150345714745137, "loss": 0.1929, "step": 2199 }, { "epoch": 3.18, "learning_rate": 0.00015147129763627595, "loss": 0.2537, "step": 2200 }, { "epoch": 3.18, "learning_rate": 0.0001514391381251005, "loss": 0.0592, "step": 2201 }, { "epoch": 3.18, "learning_rate": 0.00015140697861392506, "loss": 0.0889, "step": 2202 }, { "epoch": 3.19, "learning_rate": 0.00015137481910274965, "loss": 0.0225, "step": 2203 }, { "epoch": 3.19, "learning_rate": 0.0001513426595915742, "loss": 0.1571, "step": 2204 }, { "epoch": 3.19, "learning_rate": 0.0001513105000803988, "loss": 0.0239, "step": 2205 }, { "epoch": 3.19, "learning_rate": 0.00015127834056922337, "loss": 0.0629, "step": 2206 }, { "epoch": 3.19, "learning_rate": 0.00015124618105804793, "loss": 0.0769, "step": 2207 }, { "epoch": 3.19, "learning_rate": 0.0001512140215468725, "loss": 0.1362, "step": 2208 }, { "epoch": 3.19, "learning_rate": 0.00015118186203569707, "loss": 0.0548, "step": 2209 }, { "epoch": 3.2, "learning_rate": 0.00015114970252452163, "loss": 0.0979, "step": 2210 }, { "epoch": 3.2, "learning_rate": 0.00015111754301334619, "loss": 0.2207, "step": 2211 }, { "epoch": 3.2, "learning_rate": 0.0001510853835021708, "loss": 0.1075, "step": 2212 }, { "epoch": 3.2, "learning_rate": 0.00015105322399099535, "loss": 0.1743, "step": 2213 }, { "epoch": 3.2, "learning_rate": 0.0001510210644798199, "loss": 0.1801, "step": 2214 }, { "epoch": 3.2, "learning_rate": 0.0001509889049686445, "loss": 0.2422, "step": 2215 }, { "epoch": 3.2, "learning_rate": 0.00015095674545746905, "loss": 0.0664, "step": 2216 }, { "epoch": 3.21, "learning_rate": 0.0001509245859462936, "loss": 0.1568, "step": 2217 }, { "epoch": 3.21, "learning_rate": 0.0001508924264351182, "loss": 0.0115, "step": 2218 }, { "epoch": 3.21, "learning_rate": 0.00015086026692394278, "loss": 0.1263, "step": 2219 }, { "epoch": 3.21, "learning_rate": 0.00015082810741276733, "loss": 0.2305, "step": 2220 }, { "epoch": 3.21, "learning_rate": 0.00015079594790159192, "loss": 0.0483, "step": 2221 }, { "epoch": 3.21, "learning_rate": 0.00015076378839041647, "loss": 0.0881, "step": 2222 }, { "epoch": 3.21, "learning_rate": 0.00015073162887924103, "loss": 0.2249, "step": 2223 }, { "epoch": 3.22, "learning_rate": 0.00015069946936806562, "loss": 0.2402, "step": 2224 }, { "epoch": 3.22, "learning_rate": 0.0001506673098568902, "loss": 0.0283, "step": 2225 }, { "epoch": 3.22, "learning_rate": 0.00015063515034571476, "loss": 0.252, "step": 2226 }, { "epoch": 3.22, "learning_rate": 0.0001506029908345393, "loss": 0.1022, "step": 2227 }, { "epoch": 3.22, "learning_rate": 0.0001505708313233639, "loss": 0.1093, "step": 2228 }, { "epoch": 3.22, "learning_rate": 0.00015053867181218845, "loss": 0.1129, "step": 2229 }, { "epoch": 3.22, "learning_rate": 0.00015050651230101304, "loss": 0.0269, "step": 2230 }, { "epoch": 3.23, "learning_rate": 0.0001504743527898376, "loss": 0.1677, "step": 2231 }, { "epoch": 3.23, "learning_rate": 0.00015044219327866218, "loss": 0.0752, "step": 2232 }, { "epoch": 3.23, "learning_rate": 0.00015041003376748674, "loss": 0.0987, "step": 2233 }, { "epoch": 3.23, "learning_rate": 0.00015037787425631132, "loss": 0.3164, "step": 2234 }, { "epoch": 3.23, "learning_rate": 0.00015034571474513588, "loss": 0.147, "step": 2235 }, { "epoch": 3.23, "learning_rate": 0.00015031355523396043, "loss": 0.1199, "step": 2236 }, { "epoch": 3.23, "learning_rate": 0.00015028139572278502, "loss": 0.1531, "step": 2237 }, { "epoch": 3.24, "learning_rate": 0.0001502492362116096, "loss": 0.0872, "step": 2238 }, { "epoch": 3.24, "learning_rate": 0.00015021707670043416, "loss": 0.1255, "step": 2239 }, { "epoch": 3.24, "learning_rate": 0.00015018491718925874, "loss": 0.0168, "step": 2240 }, { "epoch": 3.24, "learning_rate": 0.0001501527576780833, "loss": 0.1337, "step": 2241 }, { "epoch": 3.24, "learning_rate": 0.00015012059816690786, "loss": 0.0651, "step": 2242 }, { "epoch": 3.24, "learning_rate": 0.00015008843865573244, "loss": 0.0089, "step": 2243 }, { "epoch": 3.25, "learning_rate": 0.000150056279144557, "loss": 0.1321, "step": 2244 }, { "epoch": 3.25, "learning_rate": 0.00015002411963338158, "loss": 0.0795, "step": 2245 }, { "epoch": 3.25, "learning_rate": 0.00014999196012220617, "loss": 0.12, "step": 2246 }, { "epoch": 3.25, "learning_rate": 0.00014995980061103072, "loss": 0.0311, "step": 2247 }, { "epoch": 3.25, "learning_rate": 0.00014992764109985528, "loss": 0.167, "step": 2248 }, { "epoch": 3.25, "learning_rate": 0.00014989548158867986, "loss": 0.1797, "step": 2249 }, { "epoch": 3.25, "learning_rate": 0.00014986332207750442, "loss": 0.0106, "step": 2250 }, { "epoch": 3.26, "learning_rate": 0.000149831162566329, "loss": 0.1895, "step": 2251 }, { "epoch": 3.26, "learning_rate": 0.00014979900305515356, "loss": 0.1045, "step": 2252 }, { "epoch": 3.26, "learning_rate": 0.00014976684354397815, "loss": 0.1655, "step": 2253 }, { "epoch": 3.26, "learning_rate": 0.0001497346840328027, "loss": 0.0066, "step": 2254 }, { "epoch": 3.26, "learning_rate": 0.00014970252452162726, "loss": 0.1371, "step": 2255 }, { "epoch": 3.26, "learning_rate": 0.00014967036501045184, "loss": 0.3135, "step": 2256 }, { "epoch": 3.26, "learning_rate": 0.0001496382054992764, "loss": 0.1536, "step": 2257 }, { "epoch": 3.27, "learning_rate": 0.00014960604598810099, "loss": 0.2502, "step": 2258 }, { "epoch": 3.27, "learning_rate": 0.00014957388647692557, "loss": 0.127, "step": 2259 }, { "epoch": 3.27, "learning_rate": 0.00014954172696575013, "loss": 0.0455, "step": 2260 }, { "epoch": 3.27, "learning_rate": 0.00014950956745457468, "loss": 0.0283, "step": 2261 }, { "epoch": 3.27, "learning_rate": 0.00014947740794339927, "loss": 0.0894, "step": 2262 }, { "epoch": 3.27, "learning_rate": 0.00014944524843222382, "loss": 0.1685, "step": 2263 }, { "epoch": 3.27, "learning_rate": 0.0001494130889210484, "loss": 0.2177, "step": 2264 }, { "epoch": 3.28, "learning_rate": 0.000149380929409873, "loss": 0.1078, "step": 2265 }, { "epoch": 3.28, "learning_rate": 0.00014934876989869755, "loss": 0.2177, "step": 2266 }, { "epoch": 3.28, "learning_rate": 0.0001493166103875221, "loss": 0.0515, "step": 2267 }, { "epoch": 3.28, "learning_rate": 0.0001492844508763467, "loss": 0.0804, "step": 2268 }, { "epoch": 3.28, "learning_rate": 0.00014925229136517125, "loss": 0.1871, "step": 2269 }, { "epoch": 3.28, "learning_rate": 0.00014922013185399583, "loss": 0.1134, "step": 2270 }, { "epoch": 3.28, "learning_rate": 0.00014918797234282042, "loss": 0.3008, "step": 2271 }, { "epoch": 3.29, "learning_rate": 0.00014915581283164497, "loss": 0.0806, "step": 2272 }, { "epoch": 3.29, "learning_rate": 0.00014912365332046953, "loss": 0.2502, "step": 2273 }, { "epoch": 3.29, "learning_rate": 0.00014909149380929411, "loss": 0.0444, "step": 2274 }, { "epoch": 3.29, "learning_rate": 0.00014905933429811867, "loss": 0.0165, "step": 2275 }, { "epoch": 3.29, "learning_rate": 0.00014902717478694323, "loss": 0.1357, "step": 2276 }, { "epoch": 3.29, "learning_rate": 0.0001489950152757678, "loss": 0.0804, "step": 2277 }, { "epoch": 3.29, "learning_rate": 0.0001489628557645924, "loss": 0.083, "step": 2278 }, { "epoch": 3.3, "learning_rate": 0.00014893069625341695, "loss": 0.2651, "step": 2279 }, { "epoch": 3.3, "learning_rate": 0.0001488985367422415, "loss": 0.0897, "step": 2280 }, { "epoch": 3.3, "learning_rate": 0.0001488663772310661, "loss": 0.2312, "step": 2281 }, { "epoch": 3.3, "learning_rate": 0.00014883421771989065, "loss": 0.1382, "step": 2282 }, { "epoch": 3.3, "learning_rate": 0.00014880205820871524, "loss": 0.1403, "step": 2283 }, { "epoch": 3.3, "learning_rate": 0.00014876989869753982, "loss": 0.0115, "step": 2284 }, { "epoch": 3.3, "learning_rate": 0.00014873773918636438, "loss": 0.1068, "step": 2285 }, { "epoch": 3.31, "learning_rate": 0.00014870557967518893, "loss": 0.2568, "step": 2286 }, { "epoch": 3.31, "learning_rate": 0.00014867342016401352, "loss": 0.1916, "step": 2287 }, { "epoch": 3.31, "learning_rate": 0.00014864126065283807, "loss": 0.0672, "step": 2288 }, { "epoch": 3.31, "learning_rate": 0.00014860910114166263, "loss": 0.2488, "step": 2289 }, { "epoch": 3.31, "learning_rate": 0.00014857694163048724, "loss": 0.1151, "step": 2290 }, { "epoch": 3.31, "learning_rate": 0.0001485447821193118, "loss": 0.2559, "step": 2291 }, { "epoch": 3.31, "learning_rate": 0.00014851262260813636, "loss": 0.0839, "step": 2292 }, { "epoch": 3.32, "learning_rate": 0.00014848046309696094, "loss": 0.2759, "step": 2293 }, { "epoch": 3.32, "learning_rate": 0.0001484483035857855, "loss": 0.0214, "step": 2294 }, { "epoch": 3.32, "learning_rate": 0.00014841614407461005, "loss": 0.2426, "step": 2295 }, { "epoch": 3.32, "learning_rate": 0.00014838398456343467, "loss": 0.0507, "step": 2296 }, { "epoch": 3.32, "learning_rate": 0.00014835182505225922, "loss": 0.0613, "step": 2297 }, { "epoch": 3.32, "learning_rate": 0.00014831966554108378, "loss": 0.1235, "step": 2298 }, { "epoch": 3.32, "learning_rate": 0.00014828750602990836, "loss": 0.0665, "step": 2299 }, { "epoch": 3.33, "learning_rate": 0.00014825534651873292, "loss": 0.1319, "step": 2300 }, { "epoch": 3.33, "learning_rate": 0.00014822318700755748, "loss": 0.0274, "step": 2301 }, { "epoch": 3.33, "learning_rate": 0.00014819102749638206, "loss": 0.2178, "step": 2302 }, { "epoch": 3.33, "learning_rate": 0.00014815886798520665, "loss": 0.239, "step": 2303 }, { "epoch": 3.33, "learning_rate": 0.0001481267084740312, "loss": 0.1039, "step": 2304 }, { "epoch": 3.33, "learning_rate": 0.00014809454896285576, "loss": 0.0701, "step": 2305 }, { "epoch": 3.33, "learning_rate": 0.00014806238945168034, "loss": 0.2457, "step": 2306 }, { "epoch": 3.34, "learning_rate": 0.0001480302299405049, "loss": 0.017, "step": 2307 }, { "epoch": 3.34, "learning_rate": 0.00014799807042932948, "loss": 0.061, "step": 2308 }, { "epoch": 3.34, "learning_rate": 0.00014796591091815407, "loss": 0.25, "step": 2309 }, { "epoch": 3.34, "learning_rate": 0.00014793375140697863, "loss": 0.0047, "step": 2310 }, { "epoch": 3.34, "learning_rate": 0.00014790159189580318, "loss": 0.1229, "step": 2311 }, { "epoch": 3.34, "learning_rate": 0.00014786943238462777, "loss": 0.1846, "step": 2312 }, { "epoch": 3.34, "learning_rate": 0.00014783727287345232, "loss": 0.144, "step": 2313 }, { "epoch": 3.35, "learning_rate": 0.00014780511336227688, "loss": 0.3208, "step": 2314 }, { "epoch": 3.35, "learning_rate": 0.0001477729538511015, "loss": 0.3176, "step": 2315 }, { "epoch": 3.35, "learning_rate": 0.00014774079433992605, "loss": 0.1576, "step": 2316 }, { "epoch": 3.35, "learning_rate": 0.0001477086348287506, "loss": 0.2107, "step": 2317 }, { "epoch": 3.35, "learning_rate": 0.0001476764753175752, "loss": 0.105, "step": 2318 }, { "epoch": 3.35, "learning_rate": 0.00014764431580639975, "loss": 0.0339, "step": 2319 }, { "epoch": 3.36, "learning_rate": 0.0001476121562952243, "loss": 0.046, "step": 2320 }, { "epoch": 3.36, "learning_rate": 0.0001475799967840489, "loss": 0.0342, "step": 2321 }, { "epoch": 3.36, "learning_rate": 0.00014754783727287347, "loss": 0.1483, "step": 2322 }, { "epoch": 3.36, "learning_rate": 0.00014751567776169803, "loss": 0.2056, "step": 2323 }, { "epoch": 3.36, "learning_rate": 0.0001474835182505226, "loss": 0.215, "step": 2324 }, { "epoch": 3.36, "learning_rate": 0.00014745135873934717, "loss": 0.0714, "step": 2325 }, { "epoch": 3.36, "learning_rate": 0.00014741919922817173, "loss": 0.2939, "step": 2326 }, { "epoch": 3.37, "learning_rate": 0.0001473870397169963, "loss": 0.0181, "step": 2327 }, { "epoch": 3.37, "learning_rate": 0.0001473548802058209, "loss": 0.049, "step": 2328 }, { "epoch": 3.37, "learning_rate": 0.00014732272069464545, "loss": 0.1611, "step": 2329 }, { "epoch": 3.37, "learning_rate": 0.00014729056118347, "loss": 0.1524, "step": 2330 }, { "epoch": 3.37, "learning_rate": 0.0001472584016722946, "loss": 0.1208, "step": 2331 }, { "epoch": 3.37, "learning_rate": 0.00014722624216111915, "loss": 0.0706, "step": 2332 }, { "epoch": 3.37, "learning_rate": 0.00014719408264994373, "loss": 0.0977, "step": 2333 }, { "epoch": 3.38, "learning_rate": 0.0001471619231387683, "loss": 0.2532, "step": 2334 }, { "epoch": 3.38, "learning_rate": 0.00014712976362759287, "loss": 0.0044, "step": 2335 }, { "epoch": 3.38, "learning_rate": 0.00014709760411641743, "loss": 0.0038, "step": 2336 }, { "epoch": 3.38, "learning_rate": 0.00014706544460524202, "loss": 0.3496, "step": 2337 }, { "epoch": 3.38, "learning_rate": 0.00014703328509406657, "loss": 0.1411, "step": 2338 }, { "epoch": 3.38, "learning_rate": 0.00014700112558289113, "loss": 0.0596, "step": 2339 }, { "epoch": 3.38, "learning_rate": 0.00014696896607171571, "loss": 0.1354, "step": 2340 }, { "epoch": 3.39, "learning_rate": 0.0001469368065605403, "loss": 0.0434, "step": 2341 }, { "epoch": 3.39, "learning_rate": 0.00014690464704936485, "loss": 0.2293, "step": 2342 }, { "epoch": 3.39, "learning_rate": 0.00014687248753818944, "loss": 0.103, "step": 2343 }, { "epoch": 3.39, "learning_rate": 0.000146840328027014, "loss": 0.113, "step": 2344 }, { "epoch": 3.39, "learning_rate": 0.00014680816851583855, "loss": 0.1689, "step": 2345 }, { "epoch": 3.39, "learning_rate": 0.00014677600900466314, "loss": 0.095, "step": 2346 }, { "epoch": 3.39, "learning_rate": 0.0001467438494934877, "loss": 0.1616, "step": 2347 }, { "epoch": 3.4, "learning_rate": 0.00014671168998231228, "loss": 0.1036, "step": 2348 }, { "epoch": 3.4, "learning_rate": 0.00014667953047113686, "loss": 0.025, "step": 2349 }, { "epoch": 3.4, "learning_rate": 0.00014664737095996142, "loss": 0.1931, "step": 2350 }, { "epoch": 3.4, "learning_rate": 0.00014661521144878598, "loss": 0.0475, "step": 2351 }, { "epoch": 3.4, "learning_rate": 0.00014658305193761056, "loss": 0.0693, "step": 2352 }, { "epoch": 3.4, "learning_rate": 0.00014655089242643512, "loss": 0.0176, "step": 2353 }, { "epoch": 3.4, "learning_rate": 0.0001465187329152597, "loss": 0.0444, "step": 2354 }, { "epoch": 3.41, "learning_rate": 0.00014648657340408426, "loss": 0.1219, "step": 2355 }, { "epoch": 3.41, "learning_rate": 0.00014645441389290884, "loss": 0.083, "step": 2356 }, { "epoch": 3.41, "learning_rate": 0.0001464222543817334, "loss": 0.048, "step": 2357 }, { "epoch": 3.41, "learning_rate": 0.00014639009487055798, "loss": 0.0598, "step": 2358 }, { "epoch": 3.41, "learning_rate": 0.00014635793535938254, "loss": 0.2192, "step": 2359 }, { "epoch": 3.41, "learning_rate": 0.00014632577584820712, "loss": 0.0372, "step": 2360 }, { "epoch": 3.41, "learning_rate": 0.00014629361633703168, "loss": 0.0825, "step": 2361 }, { "epoch": 3.42, "learning_rate": 0.00014626145682585626, "loss": 0.2793, "step": 2362 }, { "epoch": 3.42, "learning_rate": 0.00014622929731468082, "loss": 0.0414, "step": 2363 }, { "epoch": 3.42, "learning_rate": 0.00014619713780350538, "loss": 0.2427, "step": 2364 }, { "epoch": 3.42, "learning_rate": 0.00014616497829232996, "loss": 0.0898, "step": 2365 }, { "epoch": 3.42, "learning_rate": 0.00014613281878115452, "loss": 0.1904, "step": 2366 }, { "epoch": 3.42, "learning_rate": 0.0001461006592699791, "loss": 0.0643, "step": 2367 }, { "epoch": 3.42, "learning_rate": 0.0001460684997588037, "loss": 0.129, "step": 2368 }, { "epoch": 3.43, "learning_rate": 0.00014603634024762824, "loss": 0.3325, "step": 2369 }, { "epoch": 3.43, "learning_rate": 0.0001460041807364528, "loss": 0.3403, "step": 2370 }, { "epoch": 3.43, "learning_rate": 0.00014597202122527739, "loss": 0.0083, "step": 2371 }, { "epoch": 3.43, "learning_rate": 0.00014593986171410194, "loss": 0.0469, "step": 2372 }, { "epoch": 3.43, "learning_rate": 0.00014590770220292653, "loss": 0.153, "step": 2373 }, { "epoch": 3.43, "learning_rate": 0.0001458755426917511, "loss": 0.1052, "step": 2374 }, { "epoch": 3.43, "learning_rate": 0.00014584338318057567, "loss": 0.2611, "step": 2375 }, { "epoch": 3.44, "learning_rate": 0.00014581122366940022, "loss": 0.4355, "step": 2376 }, { "epoch": 3.44, "learning_rate": 0.0001457790641582248, "loss": 0.0771, "step": 2377 }, { "epoch": 3.44, "learning_rate": 0.00014574690464704937, "loss": 0.0085, "step": 2378 }, { "epoch": 3.44, "learning_rate": 0.00014571474513587392, "loss": 0.1448, "step": 2379 }, { "epoch": 3.44, "learning_rate": 0.0001456825856246985, "loss": 0.1914, "step": 2380 }, { "epoch": 3.44, "learning_rate": 0.0001456504261135231, "loss": 0.0477, "step": 2381 }, { "epoch": 3.44, "learning_rate": 0.00014561826660234765, "loss": 0.1934, "step": 2382 }, { "epoch": 3.45, "learning_rate": 0.00014558610709117223, "loss": 0.1792, "step": 2383 }, { "epoch": 3.45, "learning_rate": 0.0001455539475799968, "loss": 0.05, "step": 2384 }, { "epoch": 3.45, "learning_rate": 0.00014552178806882135, "loss": 0.1281, "step": 2385 }, { "epoch": 3.45, "learning_rate": 0.00014548962855764593, "loss": 0.2778, "step": 2386 }, { "epoch": 3.45, "learning_rate": 0.00014545746904647051, "loss": 0.0759, "step": 2387 }, { "epoch": 3.45, "learning_rate": 0.00014542530953529507, "loss": 0.0454, "step": 2388 }, { "epoch": 3.45, "learning_rate": 0.00014539315002411963, "loss": 0.1728, "step": 2389 }, { "epoch": 3.46, "learning_rate": 0.0001453609905129442, "loss": 0.0707, "step": 2390 }, { "epoch": 3.46, "learning_rate": 0.00014532883100176877, "loss": 0.1086, "step": 2391 }, { "epoch": 3.46, "learning_rate": 0.00014529667149059333, "loss": 0.0909, "step": 2392 }, { "epoch": 3.46, "learning_rate": 0.00014526451197941794, "loss": 0.0963, "step": 2393 }, { "epoch": 3.46, "learning_rate": 0.0001452323524682425, "loss": 0.194, "step": 2394 }, { "epoch": 3.46, "learning_rate": 0.00014520019295706705, "loss": 0.012, "step": 2395 }, { "epoch": 3.46, "learning_rate": 0.00014516803344589164, "loss": 0.0109, "step": 2396 }, { "epoch": 3.47, "learning_rate": 0.0001451358739347162, "loss": 0.078, "step": 2397 }, { "epoch": 3.47, "learning_rate": 0.00014510371442354075, "loss": 0.0479, "step": 2398 }, { "epoch": 3.47, "learning_rate": 0.00014507155491236536, "loss": 0.1395, "step": 2399 }, { "epoch": 3.47, "learning_rate": 0.00014503939540118992, "loss": 0.1588, "step": 2400 }, { "epoch": 3.47, "learning_rate": 0.00014500723589001447, "loss": 0.0574, "step": 2401 }, { "epoch": 3.47, "learning_rate": 0.00014497507637883906, "loss": 0.1456, "step": 2402 }, { "epoch": 3.48, "learning_rate": 0.00014494291686766362, "loss": 0.1139, "step": 2403 }, { "epoch": 3.48, "learning_rate": 0.00014491075735648817, "loss": 0.1553, "step": 2404 }, { "epoch": 3.48, "learning_rate": 0.00014487859784531276, "loss": 0.066, "step": 2405 }, { "epoch": 3.48, "learning_rate": 0.00014484643833413734, "loss": 0.0324, "step": 2406 }, { "epoch": 3.48, "learning_rate": 0.0001448142788229619, "loss": 0.0213, "step": 2407 }, { "epoch": 3.48, "learning_rate": 0.00014478211931178648, "loss": 0.3984, "step": 2408 }, { "epoch": 3.48, "learning_rate": 0.00014474995980061104, "loss": 0.0739, "step": 2409 }, { "epoch": 3.49, "learning_rate": 0.0001447178002894356, "loss": 0.1646, "step": 2410 }, { "epoch": 3.49, "learning_rate": 0.00014468564077826018, "loss": 0.14, "step": 2411 }, { "epoch": 3.49, "learning_rate": 0.00014465348126708476, "loss": 0.1953, "step": 2412 }, { "epoch": 3.49, "learning_rate": 0.00014462132175590932, "loss": 0.0742, "step": 2413 }, { "epoch": 3.49, "learning_rate": 0.00014458916224473388, "loss": 0.165, "step": 2414 }, { "epoch": 3.49, "learning_rate": 0.00014455700273355846, "loss": 0.0695, "step": 2415 }, { "epoch": 3.49, "learning_rate": 0.00014452484322238302, "loss": 0.0086, "step": 2416 }, { "epoch": 3.5, "learning_rate": 0.00014449268371120758, "loss": 0.0104, "step": 2417 }, { "epoch": 3.5, "learning_rate": 0.00014446052420003219, "loss": 0.2061, "step": 2418 }, { "epoch": 3.5, "learning_rate": 0.00014442836468885674, "loss": 0.0661, "step": 2419 }, { "epoch": 3.5, "learning_rate": 0.0001443962051776813, "loss": 0.1223, "step": 2420 }, { "epoch": 3.5, "learning_rate": 0.00014436404566650588, "loss": 0.1023, "step": 2421 }, { "epoch": 3.5, "learning_rate": 0.00014433188615533044, "loss": 0.2482, "step": 2422 }, { "epoch": 3.5, "learning_rate": 0.000144299726644155, "loss": 0.0273, "step": 2423 }, { "epoch": 3.51, "learning_rate": 0.00014426756713297958, "loss": 0.0416, "step": 2424 }, { "epoch": 3.51, "learning_rate": 0.00014423540762180417, "loss": 0.109, "step": 2425 }, { "epoch": 3.51, "learning_rate": 0.00014420324811062872, "loss": 0.0549, "step": 2426 }, { "epoch": 3.51, "learning_rate": 0.0001441710885994533, "loss": 0.117, "step": 2427 }, { "epoch": 3.51, "learning_rate": 0.00014413892908827786, "loss": 0.2542, "step": 2428 }, { "epoch": 3.51, "learning_rate": 0.00014410676957710242, "loss": 0.1802, "step": 2429 }, { "epoch": 3.51, "learning_rate": 0.000144074610065927, "loss": 0.282, "step": 2430 }, { "epoch": 3.52, "learning_rate": 0.0001440424505547516, "loss": 0.1406, "step": 2431 }, { "epoch": 3.52, "learning_rate": 0.00014401029104357615, "loss": 0.2694, "step": 2432 }, { "epoch": 3.52, "learning_rate": 0.00014397813153240073, "loss": 0.1134, "step": 2433 }, { "epoch": 3.52, "learning_rate": 0.0001439459720212253, "loss": 0.1541, "step": 2434 }, { "epoch": 3.52, "learning_rate": 0.00014391381251004984, "loss": 0.1364, "step": 2435 }, { "epoch": 3.52, "learning_rate": 0.00014388165299887443, "loss": 0.014, "step": 2436 }, { "epoch": 3.52, "learning_rate": 0.00014384949348769899, "loss": 0.129, "step": 2437 }, { "epoch": 3.53, "learning_rate": 0.00014381733397652357, "loss": 0.0565, "step": 2438 }, { "epoch": 3.53, "learning_rate": 0.00014378517446534813, "loss": 0.397, "step": 2439 }, { "epoch": 3.53, "learning_rate": 0.0001437530149541727, "loss": 0.1969, "step": 2440 }, { "epoch": 3.53, "learning_rate": 0.00014372085544299727, "loss": 0.0239, "step": 2441 }, { "epoch": 3.53, "learning_rate": 0.00014368869593182182, "loss": 0.2764, "step": 2442 }, { "epoch": 3.53, "learning_rate": 0.0001436565364206464, "loss": 0.024, "step": 2443 }, { "epoch": 3.53, "learning_rate": 0.000143624376909471, "loss": 0.1076, "step": 2444 }, { "epoch": 3.54, "learning_rate": 0.00014359221739829555, "loss": 0.0045, "step": 2445 }, { "epoch": 3.54, "learning_rate": 0.00014356005788712013, "loss": 0.1488, "step": 2446 }, { "epoch": 3.54, "learning_rate": 0.0001435278983759447, "loss": 0.1208, "step": 2447 }, { "epoch": 3.54, "learning_rate": 0.00014349573886476925, "loss": 0.225, "step": 2448 }, { "epoch": 3.54, "learning_rate": 0.00014346357935359383, "loss": 0.092, "step": 2449 }, { "epoch": 3.54, "learning_rate": 0.00014343141984241842, "loss": 0.1454, "step": 2450 }, { "epoch": 3.54, "learning_rate": 0.00014339926033124297, "loss": 0.0631, "step": 2451 }, { "epoch": 3.55, "learning_rate": 0.00014336710082006756, "loss": 0.2091, "step": 2452 }, { "epoch": 3.55, "learning_rate": 0.00014333494130889211, "loss": 0.1223, "step": 2453 }, { "epoch": 3.55, "learning_rate": 0.00014330278179771667, "loss": 0.0366, "step": 2454 }, { "epoch": 3.55, "learning_rate": 0.00014327062228654125, "loss": 0.0574, "step": 2455 }, { "epoch": 3.55, "learning_rate": 0.0001432384627753658, "loss": 0.2998, "step": 2456 }, { "epoch": 3.55, "learning_rate": 0.0001432063032641904, "loss": 0.0232, "step": 2457 }, { "epoch": 3.55, "learning_rate": 0.00014317414375301498, "loss": 0.0276, "step": 2458 }, { "epoch": 3.56, "learning_rate": 0.00014314198424183954, "loss": 0.025, "step": 2459 }, { "epoch": 3.56, "learning_rate": 0.0001431098247306641, "loss": 0.0738, "step": 2460 }, { "epoch": 3.56, "learning_rate": 0.00014307766521948868, "loss": 0.0136, "step": 2461 }, { "epoch": 3.56, "learning_rate": 0.00014304550570831323, "loss": 0.0225, "step": 2462 }, { "epoch": 3.56, "learning_rate": 0.00014301334619713782, "loss": 0.2814, "step": 2463 }, { "epoch": 3.56, "learning_rate": 0.00014298118668596238, "loss": 0.1664, "step": 2464 }, { "epoch": 3.56, "learning_rate": 0.00014294902717478696, "loss": 0.249, "step": 2465 }, { "epoch": 3.57, "learning_rate": 0.00014291686766361152, "loss": 0.0632, "step": 2466 }, { "epoch": 3.57, "learning_rate": 0.00014288470815243607, "loss": 0.0151, "step": 2467 }, { "epoch": 3.57, "learning_rate": 0.00014285254864126066, "loss": 0.1273, "step": 2468 }, { "epoch": 3.57, "learning_rate": 0.00014282038913008521, "loss": 0.1478, "step": 2469 }, { "epoch": 3.57, "learning_rate": 0.0001427882296189098, "loss": 0.081, "step": 2470 }, { "epoch": 3.57, "learning_rate": 0.00014275607010773438, "loss": 0.2267, "step": 2471 }, { "epoch": 3.57, "learning_rate": 0.00014272391059655894, "loss": 0.2105, "step": 2472 }, { "epoch": 3.58, "learning_rate": 0.0001426917510853835, "loss": 0.1763, "step": 2473 }, { "epoch": 3.58, "learning_rate": 0.00014265959157420808, "loss": 0.2033, "step": 2474 }, { "epoch": 3.58, "learning_rate": 0.00014262743206303264, "loss": 0.2751, "step": 2475 }, { "epoch": 3.58, "learning_rate": 0.00014259527255185722, "loss": 0.2607, "step": 2476 }, { "epoch": 3.58, "learning_rate": 0.0001425631130406818, "loss": 0.0376, "step": 2477 }, { "epoch": 3.58, "learning_rate": 0.00014253095352950636, "loss": 0.0496, "step": 2478 }, { "epoch": 3.58, "learning_rate": 0.00014249879401833092, "loss": 0.3325, "step": 2479 }, { "epoch": 3.59, "learning_rate": 0.0001424666345071555, "loss": 0.0139, "step": 2480 }, { "epoch": 3.59, "learning_rate": 0.00014243447499598006, "loss": 0.1357, "step": 2481 }, { "epoch": 3.59, "learning_rate": 0.00014240231548480462, "loss": 0.0439, "step": 2482 }, { "epoch": 3.59, "learning_rate": 0.00014237015597362923, "loss": 0.3174, "step": 2483 }, { "epoch": 3.59, "learning_rate": 0.00014233799646245379, "loss": 0.0307, "step": 2484 }, { "epoch": 3.59, "learning_rate": 0.00014230583695127834, "loss": 0.1641, "step": 2485 }, { "epoch": 3.6, "learning_rate": 0.00014227367744010293, "loss": 0.1063, "step": 2486 }, { "epoch": 3.6, "learning_rate": 0.00014224151792892748, "loss": 0.1777, "step": 2487 }, { "epoch": 3.6, "learning_rate": 0.00014220935841775204, "loss": 0.0412, "step": 2488 }, { "epoch": 3.6, "learning_rate": 0.00014217719890657663, "loss": 0.1049, "step": 2489 }, { "epoch": 3.6, "learning_rate": 0.0001421450393954012, "loss": 0.0356, "step": 2490 }, { "epoch": 3.6, "learning_rate": 0.00014211287988422577, "loss": 0.0391, "step": 2491 }, { "epoch": 3.6, "learning_rate": 0.00014208072037305032, "loss": 0.0954, "step": 2492 }, { "epoch": 3.61, "learning_rate": 0.0001420485608618749, "loss": 0.0277, "step": 2493 }, { "epoch": 3.61, "learning_rate": 0.00014201640135069946, "loss": 0.1258, "step": 2494 }, { "epoch": 3.61, "learning_rate": 0.00014198424183952405, "loss": 0.1572, "step": 2495 }, { "epoch": 3.61, "learning_rate": 0.00014195208232834863, "loss": 0.0323, "step": 2496 }, { "epoch": 3.61, "learning_rate": 0.0001419199228171732, "loss": 0.0321, "step": 2497 }, { "epoch": 3.61, "learning_rate": 0.00014188776330599775, "loss": 0.1465, "step": 2498 }, { "epoch": 3.61, "learning_rate": 0.00014185560379482233, "loss": 0.0353, "step": 2499 }, { "epoch": 3.62, "learning_rate": 0.0001418234442836469, "loss": 0.0626, "step": 2500 }, { "epoch": 3.62, "learning_rate": 0.00014179128477247144, "loss": 0.0428, "step": 2501 }, { "epoch": 3.62, "learning_rate": 0.00014175912526129606, "loss": 0.0217, "step": 2502 }, { "epoch": 3.62, "learning_rate": 0.0001417269657501206, "loss": 0.1519, "step": 2503 }, { "epoch": 3.62, "learning_rate": 0.00014169480623894517, "loss": 0.1953, "step": 2504 }, { "epoch": 3.62, "learning_rate": 0.00014166264672776975, "loss": 0.2417, "step": 2505 }, { "epoch": 3.62, "learning_rate": 0.0001416304872165943, "loss": 0.0337, "step": 2506 }, { "epoch": 3.63, "learning_rate": 0.00014159832770541887, "loss": 0.1387, "step": 2507 }, { "epoch": 3.63, "learning_rate": 0.00014156616819424345, "loss": 0.2074, "step": 2508 }, { "epoch": 3.63, "learning_rate": 0.00014153400868306804, "loss": 0.0554, "step": 2509 }, { "epoch": 3.63, "learning_rate": 0.0001415018491718926, "loss": 0.3789, "step": 2510 }, { "epoch": 3.63, "learning_rate": 0.00014146968966071718, "loss": 0.0323, "step": 2511 }, { "epoch": 3.63, "learning_rate": 0.00014143753014954173, "loss": 0.5098, "step": 2512 }, { "epoch": 3.63, "learning_rate": 0.0001414053706383663, "loss": 0.1738, "step": 2513 }, { "epoch": 3.64, "learning_rate": 0.00014137321112719087, "loss": 0.0403, "step": 2514 }, { "epoch": 3.64, "learning_rate": 0.00014134105161601546, "loss": 0.141, "step": 2515 }, { "epoch": 3.64, "learning_rate": 0.00014130889210484002, "loss": 0.1518, "step": 2516 }, { "epoch": 3.64, "learning_rate": 0.00014127673259366457, "loss": 0.2151, "step": 2517 }, { "epoch": 3.64, "learning_rate": 0.00014124457308248916, "loss": 0.015, "step": 2518 }, { "epoch": 3.64, "learning_rate": 0.0001412124135713137, "loss": 0.0858, "step": 2519 }, { "epoch": 3.64, "learning_rate": 0.0001411802540601383, "loss": 0.1735, "step": 2520 }, { "epoch": 3.65, "learning_rate": 0.00014114809454896288, "loss": 0.1978, "step": 2521 }, { "epoch": 3.65, "learning_rate": 0.00014111593503778744, "loss": 0.0113, "step": 2522 }, { "epoch": 3.65, "learning_rate": 0.000141083775526612, "loss": 0.003, "step": 2523 }, { "epoch": 3.65, "learning_rate": 0.00014105161601543658, "loss": 0.0547, "step": 2524 }, { "epoch": 3.65, "learning_rate": 0.00014101945650426114, "loss": 0.0878, "step": 2525 }, { "epoch": 3.65, "learning_rate": 0.0001409872969930857, "loss": 0.3672, "step": 2526 }, { "epoch": 3.65, "learning_rate": 0.00014095513748191028, "loss": 0.2368, "step": 2527 }, { "epoch": 3.66, "learning_rate": 0.00014092297797073486, "loss": 0.2212, "step": 2528 }, { "epoch": 3.66, "learning_rate": 0.00014089081845955942, "loss": 0.007, "step": 2529 }, { "epoch": 3.66, "learning_rate": 0.000140858658948384, "loss": 0.2922, "step": 2530 }, { "epoch": 3.66, "learning_rate": 0.00014082649943720856, "loss": 0.0599, "step": 2531 }, { "epoch": 3.66, "learning_rate": 0.00014079433992603312, "loss": 0.2712, "step": 2532 }, { "epoch": 3.66, "learning_rate": 0.0001407621804148577, "loss": 0.0172, "step": 2533 }, { "epoch": 3.66, "learning_rate": 0.00014073002090368228, "loss": 0.2765, "step": 2534 }, { "epoch": 3.67, "learning_rate": 0.00014069786139250684, "loss": 0.0279, "step": 2535 }, { "epoch": 3.67, "learning_rate": 0.00014066570188133143, "loss": 0.0845, "step": 2536 }, { "epoch": 3.67, "learning_rate": 0.00014063354237015598, "loss": 0.2998, "step": 2537 }, { "epoch": 3.67, "learning_rate": 0.00014060138285898054, "loss": 0.2195, "step": 2538 }, { "epoch": 3.67, "learning_rate": 0.00014056922334780512, "loss": 0.3013, "step": 2539 }, { "epoch": 3.67, "learning_rate": 0.0001405370638366297, "loss": 0.2228, "step": 2540 }, { "epoch": 3.67, "learning_rate": 0.00014050490432545426, "loss": 0.1082, "step": 2541 }, { "epoch": 3.68, "learning_rate": 0.00014047274481427882, "loss": 0.0708, "step": 2542 }, { "epoch": 3.68, "learning_rate": 0.0001404405853031034, "loss": 0.0754, "step": 2543 }, { "epoch": 3.68, "learning_rate": 0.00014040842579192796, "loss": 0.1901, "step": 2544 }, { "epoch": 3.68, "learning_rate": 0.00014037626628075252, "loss": 0.1733, "step": 2545 }, { "epoch": 3.68, "learning_rate": 0.0001403441067695771, "loss": 0.121, "step": 2546 }, { "epoch": 3.68, "learning_rate": 0.0001403119472584017, "loss": 0.2085, "step": 2547 }, { "epoch": 3.68, "learning_rate": 0.00014027978774722624, "loss": 0.1233, "step": 2548 }, { "epoch": 3.69, "learning_rate": 0.00014024762823605083, "loss": 0.0298, "step": 2549 }, { "epoch": 3.69, "learning_rate": 0.00014021546872487539, "loss": 0.2759, "step": 2550 }, { "epoch": 3.69, "learning_rate": 0.00014018330921369994, "loss": 0.1448, "step": 2551 }, { "epoch": 3.69, "learning_rate": 0.00014015114970252453, "loss": 0.1897, "step": 2552 }, { "epoch": 3.69, "learning_rate": 0.0001401189901913491, "loss": 0.0074, "step": 2553 }, { "epoch": 3.69, "learning_rate": 0.00014008683068017367, "loss": 0.0731, "step": 2554 }, { "epoch": 3.69, "learning_rate": 0.00014005467116899825, "loss": 0.0331, "step": 2555 }, { "epoch": 3.7, "learning_rate": 0.0001400225116578228, "loss": 0.0887, "step": 2556 }, { "epoch": 3.7, "learning_rate": 0.00013999035214664737, "loss": 0.3906, "step": 2557 }, { "epoch": 3.7, "learning_rate": 0.00013995819263547195, "loss": 0.0381, "step": 2558 }, { "epoch": 3.7, "learning_rate": 0.0001399260331242965, "loss": 0.1776, "step": 2559 }, { "epoch": 3.7, "learning_rate": 0.0001398938736131211, "loss": 0.1151, "step": 2560 }, { "epoch": 3.7, "learning_rate": 0.00013986171410194567, "loss": 0.0053, "step": 2561 }, { "epoch": 3.7, "learning_rate": 0.00013982955459077023, "loss": 0.3486, "step": 2562 }, { "epoch": 3.71, "learning_rate": 0.0001397973950795948, "loss": 0.2056, "step": 2563 }, { "epoch": 3.71, "learning_rate": 0.00013976523556841937, "loss": 0.103, "step": 2564 }, { "epoch": 3.71, "learning_rate": 0.00013973307605724393, "loss": 0.0137, "step": 2565 }, { "epoch": 3.71, "learning_rate": 0.00013970091654606851, "loss": 0.111, "step": 2566 }, { "epoch": 3.71, "learning_rate": 0.00013966875703489307, "loss": 0.0178, "step": 2567 }, { "epoch": 3.71, "learning_rate": 0.00013963659752371765, "loss": 0.0841, "step": 2568 }, { "epoch": 3.72, "learning_rate": 0.0001396044380125422, "loss": 0.293, "step": 2569 }, { "epoch": 3.72, "learning_rate": 0.00013957227850136677, "loss": 0.2329, "step": 2570 }, { "epoch": 3.72, "learning_rate": 0.00013954011899019135, "loss": 0.0773, "step": 2571 }, { "epoch": 3.72, "learning_rate": 0.0001395079594790159, "loss": 0.2578, "step": 2572 }, { "epoch": 3.72, "learning_rate": 0.0001394757999678405, "loss": 0.052, "step": 2573 }, { "epoch": 3.72, "learning_rate": 0.00013944364045666508, "loss": 0.0087, "step": 2574 }, { "epoch": 3.72, "learning_rate": 0.00013941148094548963, "loss": 0.3438, "step": 2575 }, { "epoch": 3.73, "learning_rate": 0.0001393793214343142, "loss": 0.2544, "step": 2576 }, { "epoch": 3.73, "learning_rate": 0.00013934716192313878, "loss": 0.1897, "step": 2577 }, { "epoch": 3.73, "learning_rate": 0.00013931500241196333, "loss": 0.1146, "step": 2578 }, { "epoch": 3.73, "learning_rate": 0.00013928284290078792, "loss": 0.0984, "step": 2579 }, { "epoch": 3.73, "learning_rate": 0.0001392506833896125, "loss": 0.074, "step": 2580 }, { "epoch": 3.73, "learning_rate": 0.00013921852387843706, "loss": 0.3896, "step": 2581 }, { "epoch": 3.73, "learning_rate": 0.00013918636436726161, "loss": 0.0078, "step": 2582 }, { "epoch": 3.74, "learning_rate": 0.0001391542048560862, "loss": 0.0721, "step": 2583 }, { "epoch": 3.74, "learning_rate": 0.00013912204534491076, "loss": 0.0859, "step": 2584 }, { "epoch": 3.74, "learning_rate": 0.00013908988583373534, "loss": 0.1468, "step": 2585 }, { "epoch": 3.74, "learning_rate": 0.00013905772632255992, "loss": 0.0035, "step": 2586 }, { "epoch": 3.74, "learning_rate": 0.00013902556681138448, "loss": 0.0611, "step": 2587 }, { "epoch": 3.74, "learning_rate": 0.00013899340730020904, "loss": 0.1444, "step": 2588 }, { "epoch": 3.74, "learning_rate": 0.00013896124778903362, "loss": 0.1697, "step": 2589 }, { "epoch": 3.75, "learning_rate": 0.00013892908827785818, "loss": 0.1718, "step": 2590 }, { "epoch": 3.75, "learning_rate": 0.00013889692876668274, "loss": 0.0135, "step": 2591 }, { "epoch": 3.75, "learning_rate": 0.00013886476925550732, "loss": 0.0086, "step": 2592 }, { "epoch": 3.75, "learning_rate": 0.0001388326097443319, "loss": 0.2661, "step": 2593 }, { "epoch": 3.75, "learning_rate": 0.00013880045023315646, "loss": 0.0434, "step": 2594 }, { "epoch": 3.75, "learning_rate": 0.00013876829072198102, "loss": 0.2144, "step": 2595 }, { "epoch": 3.75, "learning_rate": 0.0001387361312108056, "loss": 0.1537, "step": 2596 }, { "epoch": 3.76, "learning_rate": 0.00013870397169963016, "loss": 0.0781, "step": 2597 }, { "epoch": 3.76, "learning_rate": 0.00013867181218845474, "loss": 0.0903, "step": 2598 }, { "epoch": 3.76, "learning_rate": 0.00013863965267727933, "loss": 0.0192, "step": 2599 }, { "epoch": 3.76, "learning_rate": 0.00013860749316610388, "loss": 0.2544, "step": 2600 }, { "epoch": 3.76, "learning_rate": 0.00013857533365492844, "loss": 0.3428, "step": 2601 }, { "epoch": 3.76, "learning_rate": 0.00013854317414375303, "loss": 0.2545, "step": 2602 }, { "epoch": 3.76, "learning_rate": 0.00013851101463257758, "loss": 0.0393, "step": 2603 }, { "epoch": 3.77, "learning_rate": 0.00013847885512140214, "loss": 0.064, "step": 2604 }, { "epoch": 3.77, "learning_rate": 0.00013844669561022675, "loss": 0.0376, "step": 2605 }, { "epoch": 3.77, "learning_rate": 0.0001384145360990513, "loss": 0.0293, "step": 2606 }, { "epoch": 3.77, "learning_rate": 0.00013838237658787586, "loss": 0.4336, "step": 2607 }, { "epoch": 3.77, "learning_rate": 0.00013835021707670045, "loss": 0.1915, "step": 2608 }, { "epoch": 3.77, "learning_rate": 0.000138318057565525, "loss": 0.161, "step": 2609 }, { "epoch": 3.77, "learning_rate": 0.00013828589805434956, "loss": 0.0375, "step": 2610 }, { "epoch": 3.78, "learning_rate": 0.00013825373854317417, "loss": 0.1117, "step": 2611 }, { "epoch": 3.78, "learning_rate": 0.00013822157903199873, "loss": 0.1504, "step": 2612 }, { "epoch": 3.78, "learning_rate": 0.0001381894195208233, "loss": 0.2334, "step": 2613 }, { "epoch": 3.78, "learning_rate": 0.00013815726000964787, "loss": 0.2437, "step": 2614 }, { "epoch": 3.78, "learning_rate": 0.00013812510049847243, "loss": 0.1284, "step": 2615 }, { "epoch": 3.78, "learning_rate": 0.00013809294098729699, "loss": 0.1729, "step": 2616 }, { "epoch": 3.78, "learning_rate": 0.00013806078147612157, "loss": 0.017, "step": 2617 }, { "epoch": 3.79, "learning_rate": 0.00013802862196494615, "loss": 0.0602, "step": 2618 }, { "epoch": 3.79, "learning_rate": 0.0001379964624537707, "loss": 0.0143, "step": 2619 }, { "epoch": 3.79, "learning_rate": 0.00013796430294259527, "loss": 0.1401, "step": 2620 }, { "epoch": 3.79, "learning_rate": 0.00013793214343141985, "loss": 0.3111, "step": 2621 }, { "epoch": 3.79, "learning_rate": 0.0001378999839202444, "loss": 0.2465, "step": 2622 }, { "epoch": 3.79, "learning_rate": 0.000137867824409069, "loss": 0.1692, "step": 2623 }, { "epoch": 3.79, "learning_rate": 0.00013783566489789358, "loss": 0.1254, "step": 2624 }, { "epoch": 3.8, "learning_rate": 0.00013780350538671813, "loss": 0.2031, "step": 2625 }, { "epoch": 3.8, "learning_rate": 0.0001377713458755427, "loss": 0.1836, "step": 2626 }, { "epoch": 3.8, "learning_rate": 0.00013773918636436727, "loss": 0.0214, "step": 2627 }, { "epoch": 3.8, "learning_rate": 0.00013770702685319183, "loss": 0.3643, "step": 2628 }, { "epoch": 3.8, "learning_rate": 0.0001376748673420164, "loss": 0.0388, "step": 2629 }, { "epoch": 3.8, "learning_rate": 0.00013764270783084097, "loss": 0.135, "step": 2630 }, { "epoch": 3.8, "learning_rate": 0.00013761054831966556, "loss": 0.1223, "step": 2631 }, { "epoch": 3.81, "learning_rate": 0.0001375783888084901, "loss": 0.0628, "step": 2632 }, { "epoch": 3.81, "learning_rate": 0.0001375462292973147, "loss": 0.0469, "step": 2633 }, { "epoch": 3.81, "learning_rate": 0.00013751406978613925, "loss": 0.0178, "step": 2634 }, { "epoch": 3.81, "learning_rate": 0.0001374819102749638, "loss": 0.0163, "step": 2635 }, { "epoch": 3.81, "learning_rate": 0.0001374497507637884, "loss": 0.158, "step": 2636 }, { "epoch": 3.81, "learning_rate": 0.00013741759125261298, "loss": 0.2888, "step": 2637 }, { "epoch": 3.81, "learning_rate": 0.00013738543174143754, "loss": 0.0836, "step": 2638 }, { "epoch": 3.82, "learning_rate": 0.00013735327223026212, "loss": 0.2397, "step": 2639 }, { "epoch": 3.82, "learning_rate": 0.00013732111271908668, "loss": 0.4287, "step": 2640 }, { "epoch": 3.82, "learning_rate": 0.00013728895320791123, "loss": 0.2916, "step": 2641 }, { "epoch": 3.82, "learning_rate": 0.00013725679369673582, "loss": 0.0308, "step": 2642 }, { "epoch": 3.82, "learning_rate": 0.0001372246341855604, "loss": 0.1259, "step": 2643 }, { "epoch": 3.82, "learning_rate": 0.00013719247467438496, "loss": 0.3623, "step": 2644 }, { "epoch": 3.83, "learning_rate": 0.00013716031516320952, "loss": 0.2725, "step": 2645 }, { "epoch": 3.83, "learning_rate": 0.0001371281556520341, "loss": 0.1414, "step": 2646 }, { "epoch": 3.83, "learning_rate": 0.00013709599614085866, "loss": 0.1588, "step": 2647 }, { "epoch": 3.83, "learning_rate": 0.00013706383662968324, "loss": 0.1205, "step": 2648 }, { "epoch": 3.83, "learning_rate": 0.0001370316771185078, "loss": 0.1737, "step": 2649 }, { "epoch": 3.83, "learning_rate": 0.00013699951760733238, "loss": 0.0642, "step": 2650 }, { "epoch": 3.83, "learning_rate": 0.00013696735809615694, "loss": 0.0712, "step": 2651 }, { "epoch": 3.84, "learning_rate": 0.00013693519858498152, "loss": 0.0101, "step": 2652 }, { "epoch": 3.84, "learning_rate": 0.00013690303907380608, "loss": 0.2517, "step": 2653 }, { "epoch": 3.84, "learning_rate": 0.00013687087956263064, "loss": 0.0915, "step": 2654 }, { "epoch": 3.84, "learning_rate": 0.00013683872005145522, "loss": 0.0109, "step": 2655 }, { "epoch": 3.84, "learning_rate": 0.0001368065605402798, "loss": 0.08, "step": 2656 }, { "epoch": 3.84, "learning_rate": 0.00013677440102910436, "loss": 0.1438, "step": 2657 }, { "epoch": 3.84, "learning_rate": 0.00013674224151792895, "loss": 0.0546, "step": 2658 }, { "epoch": 3.85, "learning_rate": 0.0001367100820067535, "loss": 0.2761, "step": 2659 }, { "epoch": 3.85, "learning_rate": 0.00013667792249557806, "loss": 0.1412, "step": 2660 }, { "epoch": 3.85, "learning_rate": 0.00013664576298440264, "loss": 0.3975, "step": 2661 }, { "epoch": 3.85, "learning_rate": 0.0001366136034732272, "loss": 0.1272, "step": 2662 }, { "epoch": 3.85, "learning_rate": 0.00013658144396205179, "loss": 0.0806, "step": 2663 }, { "epoch": 3.85, "learning_rate": 0.00013654928445087637, "loss": 0.1996, "step": 2664 }, { "epoch": 3.85, "learning_rate": 0.00013651712493970093, "loss": 0.1963, "step": 2665 }, { "epoch": 3.86, "learning_rate": 0.00013648496542852548, "loss": 0.043, "step": 2666 }, { "epoch": 3.86, "learning_rate": 0.00013645280591735007, "loss": 0.0738, "step": 2667 }, { "epoch": 3.86, "learning_rate": 0.00013642064640617462, "loss": 0.2234, "step": 2668 }, { "epoch": 3.86, "learning_rate": 0.0001363884868949992, "loss": 0.2429, "step": 2669 }, { "epoch": 3.86, "learning_rate": 0.00013635632738382377, "loss": 0.1432, "step": 2670 }, { "epoch": 3.86, "learning_rate": 0.00013632416787264835, "loss": 0.1307, "step": 2671 }, { "epoch": 3.86, "learning_rate": 0.0001362920083614729, "loss": 0.0442, "step": 2672 }, { "epoch": 3.87, "learning_rate": 0.0001362598488502975, "loss": 0.1423, "step": 2673 }, { "epoch": 3.87, "learning_rate": 0.00013622768933912205, "loss": 0.0301, "step": 2674 }, { "epoch": 3.87, "learning_rate": 0.0001361955298279466, "loss": 0.1074, "step": 2675 }, { "epoch": 3.87, "learning_rate": 0.0001361633703167712, "loss": 0.1718, "step": 2676 }, { "epoch": 3.87, "learning_rate": 0.00013613121080559577, "loss": 0.1346, "step": 2677 }, { "epoch": 3.87, "learning_rate": 0.00013609905129442033, "loss": 0.2354, "step": 2678 }, { "epoch": 3.87, "learning_rate": 0.0001360668917832449, "loss": 0.0293, "step": 2679 }, { "epoch": 3.88, "learning_rate": 0.00013603473227206947, "loss": 0.2172, "step": 2680 }, { "epoch": 3.88, "learning_rate": 0.00013600257276089403, "loss": 0.0549, "step": 2681 }, { "epoch": 3.88, "learning_rate": 0.0001359704132497186, "loss": 0.061, "step": 2682 }, { "epoch": 3.88, "learning_rate": 0.0001359382537385432, "loss": 0.2646, "step": 2683 }, { "epoch": 3.88, "learning_rate": 0.00013590609422736775, "loss": 0.2695, "step": 2684 }, { "epoch": 3.88, "learning_rate": 0.0001358739347161923, "loss": 0.0085, "step": 2685 }, { "epoch": 3.88, "learning_rate": 0.0001358417752050169, "loss": 0.0308, "step": 2686 }, { "epoch": 3.89, "learning_rate": 0.00013580961569384145, "loss": 0.0105, "step": 2687 }, { "epoch": 3.89, "learning_rate": 0.00013577745618266604, "loss": 0.15, "step": 2688 }, { "epoch": 3.89, "learning_rate": 0.00013574529667149062, "loss": 0.0389, "step": 2689 }, { "epoch": 3.89, "learning_rate": 0.00013571313716031518, "loss": 0.1042, "step": 2690 }, { "epoch": 3.89, "learning_rate": 0.00013568097764913973, "loss": 0.5225, "step": 2691 }, { "epoch": 3.89, "learning_rate": 0.00013564881813796432, "loss": 0.3247, "step": 2692 }, { "epoch": 3.89, "learning_rate": 0.00013561665862678887, "loss": 0.2188, "step": 2693 }, { "epoch": 3.9, "learning_rate": 0.00013558449911561343, "loss": 0.1084, "step": 2694 }, { "epoch": 3.9, "learning_rate": 0.00013555233960443802, "loss": 0.0742, "step": 2695 }, { "epoch": 3.9, "learning_rate": 0.0001355201800932626, "loss": 0.1572, "step": 2696 }, { "epoch": 3.9, "learning_rate": 0.00013548802058208716, "loss": 0.022, "step": 2697 }, { "epoch": 3.9, "learning_rate": 0.00013545586107091174, "loss": 0.0238, "step": 2698 }, { "epoch": 3.9, "learning_rate": 0.0001354237015597363, "loss": 0.0622, "step": 2699 }, { "epoch": 3.9, "learning_rate": 0.00013539154204856085, "loss": 0.189, "step": 2700 }, { "epoch": 3.91, "learning_rate": 0.00013535938253738544, "loss": 0.0329, "step": 2701 }, { "epoch": 3.91, "learning_rate": 0.00013532722302621002, "loss": 0.1896, "step": 2702 }, { "epoch": 3.91, "learning_rate": 0.00013529506351503458, "loss": 0.2559, "step": 2703 }, { "epoch": 3.91, "learning_rate": 0.00013526290400385914, "loss": 0.0652, "step": 2704 }, { "epoch": 3.91, "learning_rate": 0.00013523074449268372, "loss": 0.0107, "step": 2705 }, { "epoch": 3.91, "learning_rate": 0.00013519858498150828, "loss": 0.0195, "step": 2706 }, { "epoch": 3.91, "learning_rate": 0.00013516642547033283, "loss": 0.2856, "step": 2707 }, { "epoch": 3.92, "learning_rate": 0.00013513426595915745, "loss": 0.313, "step": 2708 }, { "epoch": 3.92, "learning_rate": 0.000135102106447982, "loss": 0.0421, "step": 2709 }, { "epoch": 3.92, "learning_rate": 0.00013506994693680656, "loss": 0.1102, "step": 2710 }, { "epoch": 3.92, "learning_rate": 0.00013503778742563114, "loss": 0.1168, "step": 2711 }, { "epoch": 3.92, "learning_rate": 0.0001350056279144557, "loss": 0.144, "step": 2712 }, { "epoch": 3.92, "learning_rate": 0.00013497346840328026, "loss": 0.0374, "step": 2713 }, { "epoch": 3.92, "learning_rate": 0.00013494130889210487, "loss": 0.0984, "step": 2714 }, { "epoch": 3.93, "learning_rate": 0.00013490914938092943, "loss": 0.0265, "step": 2715 }, { "epoch": 3.93, "learning_rate": 0.00013487698986975398, "loss": 0.0994, "step": 2716 }, { "epoch": 3.93, "learning_rate": 0.00013484483035857857, "loss": 0.0246, "step": 2717 }, { "epoch": 3.93, "learning_rate": 0.00013481267084740312, "loss": 0.1948, "step": 2718 }, { "epoch": 3.93, "learning_rate": 0.00013478051133622768, "loss": 0.1774, "step": 2719 }, { "epoch": 3.93, "learning_rate": 0.00013474835182505226, "loss": 0.0889, "step": 2720 }, { "epoch": 3.93, "learning_rate": 0.00013471619231387685, "loss": 0.2098, "step": 2721 }, { "epoch": 3.94, "learning_rate": 0.0001346840328027014, "loss": 0.1232, "step": 2722 }, { "epoch": 3.94, "learning_rate": 0.000134651873291526, "loss": 0.1697, "step": 2723 }, { "epoch": 3.94, "learning_rate": 0.00013461971378035055, "loss": 0.0459, "step": 2724 }, { "epoch": 3.94, "learning_rate": 0.0001345875542691751, "loss": 0.1804, "step": 2725 }, { "epoch": 3.94, "learning_rate": 0.0001345553947579997, "loss": 0.1339, "step": 2726 }, { "epoch": 3.94, "learning_rate": 0.00013452323524682427, "loss": 0.2424, "step": 2727 }, { "epoch": 3.95, "learning_rate": 0.00013449107573564883, "loss": 0.0598, "step": 2728 }, { "epoch": 3.95, "learning_rate": 0.00013445891622447339, "loss": 0.1699, "step": 2729 }, { "epoch": 3.95, "learning_rate": 0.00013442675671329797, "loss": 0.1, "step": 2730 }, { "epoch": 3.95, "learning_rate": 0.00013439459720212253, "loss": 0.2554, "step": 2731 }, { "epoch": 3.95, "learning_rate": 0.00013436243769094708, "loss": 0.2988, "step": 2732 }, { "epoch": 3.95, "learning_rate": 0.0001343302781797717, "loss": 0.2961, "step": 2733 }, { "epoch": 3.95, "learning_rate": 0.00013429811866859625, "loss": 0.0289, "step": 2734 }, { "epoch": 3.96, "learning_rate": 0.0001342659591574208, "loss": 0.0309, "step": 2735 }, { "epoch": 3.96, "learning_rate": 0.0001342337996462454, "loss": 0.0965, "step": 2736 }, { "epoch": 3.96, "learning_rate": 0.00013420164013506995, "loss": 0.088, "step": 2737 }, { "epoch": 3.96, "learning_rate": 0.0001341694806238945, "loss": 0.1633, "step": 2738 }, { "epoch": 3.96, "learning_rate": 0.0001341373211127191, "loss": 0.0501, "step": 2739 }, { "epoch": 3.96, "learning_rate": 0.00013410516160154367, "loss": 0.0489, "step": 2740 }, { "epoch": 3.96, "learning_rate": 0.00013407300209036823, "loss": 0.2336, "step": 2741 }, { "epoch": 3.97, "learning_rate": 0.00013404084257919282, "loss": 0.1605, "step": 2742 }, { "epoch": 3.97, "learning_rate": 0.00013400868306801737, "loss": 0.0494, "step": 2743 }, { "epoch": 3.97, "learning_rate": 0.00013397652355684193, "loss": 0.1282, "step": 2744 }, { "epoch": 3.97, "learning_rate": 0.0001339443640456665, "loss": 0.0048, "step": 2745 }, { "epoch": 3.97, "learning_rate": 0.0001339122045344911, "loss": 0.0749, "step": 2746 }, { "epoch": 3.97, "learning_rate": 0.00013388004502331565, "loss": 0.0113, "step": 2747 }, { "epoch": 3.97, "learning_rate": 0.00013384788551214024, "loss": 0.1257, "step": 2748 }, { "epoch": 3.98, "learning_rate": 0.0001338157260009648, "loss": 0.1504, "step": 2749 }, { "epoch": 3.98, "learning_rate": 0.00013378356648978935, "loss": 0.1152, "step": 2750 }, { "epoch": 3.98, "learning_rate": 0.00013375140697861394, "loss": 0.0057, "step": 2751 }, { "epoch": 3.98, "learning_rate": 0.0001337192474674385, "loss": 0.0202, "step": 2752 }, { "epoch": 3.98, "learning_rate": 0.00013368708795626308, "loss": 0.1018, "step": 2753 }, { "epoch": 3.98, "learning_rate": 0.00013365492844508763, "loss": 0.0255, "step": 2754 }, { "epoch": 3.98, "learning_rate": 0.00013362276893391222, "loss": 0.0334, "step": 2755 }, { "epoch": 3.99, "learning_rate": 0.00013359060942273678, "loss": 0.0118, "step": 2756 }, { "epoch": 3.99, "learning_rate": 0.00013355844991156133, "loss": 0.1422, "step": 2757 }, { "epoch": 3.99, "learning_rate": 0.00013352629040038592, "loss": 0.1011, "step": 2758 }, { "epoch": 3.99, "learning_rate": 0.0001334941308892105, "loss": 0.1716, "step": 2759 }, { "epoch": 3.99, "learning_rate": 0.00013346197137803506, "loss": 0.1792, "step": 2760 }, { "epoch": 3.99, "learning_rate": 0.00013342981186685964, "loss": 0.0273, "step": 2761 }, { "epoch": 3.99, "learning_rate": 0.0001333976523556842, "loss": 0.0128, "step": 2762 }, { "epoch": 4.0, "learning_rate": 0.00013336549284450876, "loss": 0.035, "step": 2763 }, { "epoch": 4.0, "learning_rate": 0.00013333333333333334, "loss": 0.023, "step": 2764 }, { "epoch": 4.0, "learning_rate": 0.0001333011738221579, "loss": 0.06, "step": 2765 }, { "epoch": 4.0, "learning_rate": 0.00013326901431098248, "loss": 0.1799, "step": 2766 }, { "epoch": 4.0, "learning_rate": 0.00013323685479980706, "loss": 0.0686, "step": 2767 }, { "epoch": 4.0, "learning_rate": 0.00013320469528863162, "loss": 0.1179, "step": 2768 }, { "epoch": 4.0, "learning_rate": 0.00013317253577745618, "loss": 0.0988, "step": 2769 }, { "epoch": 4.01, "learning_rate": 0.00013314037626628076, "loss": 0.0282, "step": 2770 }, { "epoch": 4.01, "learning_rate": 0.00013310821675510532, "loss": 0.146, "step": 2771 }, { "epoch": 4.01, "learning_rate": 0.0001330760572439299, "loss": 0.0303, "step": 2772 }, { "epoch": 4.01, "learning_rate": 0.0001330438977327545, "loss": 0.0592, "step": 2773 }, { "epoch": 4.01, "learning_rate": 0.00013301173822157904, "loss": 0.0385, "step": 2774 }, { "epoch": 4.01, "learning_rate": 0.0001329795787104036, "loss": 0.0346, "step": 2775 }, { "epoch": 4.01, "learning_rate": 0.00013294741919922819, "loss": 0.0203, "step": 2776 }, { "epoch": 4.02, "learning_rate": 0.00013291525968805274, "loss": 0.0518, "step": 2777 }, { "epoch": 4.02, "learning_rate": 0.00013288310017687733, "loss": 0.0085, "step": 2778 }, { "epoch": 4.02, "learning_rate": 0.00013285094066570188, "loss": 0.0712, "step": 2779 }, { "epoch": 4.02, "learning_rate": 0.00013281878115452647, "loss": 0.0099, "step": 2780 }, { "epoch": 4.02, "learning_rate": 0.00013278662164335102, "loss": 0.0529, "step": 2781 }, { "epoch": 4.02, "learning_rate": 0.00013275446213217558, "loss": 0.0137, "step": 2782 }, { "epoch": 4.02, "learning_rate": 0.00013272230262100017, "loss": 0.0247, "step": 2783 }, { "epoch": 4.03, "learning_rate": 0.00013269014310982472, "loss": 0.157, "step": 2784 }, { "epoch": 4.03, "learning_rate": 0.0001326579835986493, "loss": 0.0727, "step": 2785 }, { "epoch": 4.03, "learning_rate": 0.0001326258240874739, "loss": 0.0894, "step": 2786 }, { "epoch": 4.03, "learning_rate": 0.00013259366457629845, "loss": 0.0852, "step": 2787 }, { "epoch": 4.03, "learning_rate": 0.000132561505065123, "loss": 0.0137, "step": 2788 }, { "epoch": 4.03, "learning_rate": 0.0001325293455539476, "loss": 0.0555, "step": 2789 }, { "epoch": 4.03, "learning_rate": 0.00013249718604277215, "loss": 0.0683, "step": 2790 }, { "epoch": 4.04, "learning_rate": 0.00013246502653159673, "loss": 0.146, "step": 2791 }, { "epoch": 4.04, "learning_rate": 0.00013243286702042131, "loss": 0.1023, "step": 2792 }, { "epoch": 4.04, "learning_rate": 0.00013240070750924587, "loss": 0.1013, "step": 2793 }, { "epoch": 4.04, "learning_rate": 0.00013236854799807043, "loss": 0.0159, "step": 2794 }, { "epoch": 4.04, "learning_rate": 0.000132336388486895, "loss": 0.1562, "step": 2795 }, { "epoch": 4.04, "learning_rate": 0.00013230422897571957, "loss": 0.0916, "step": 2796 }, { "epoch": 4.04, "learning_rate": 0.00013227206946454413, "loss": 0.0656, "step": 2797 }, { "epoch": 4.05, "learning_rate": 0.0001322399099533687, "loss": 0.065, "step": 2798 }, { "epoch": 4.05, "learning_rate": 0.0001322077504421933, "loss": 0.0615, "step": 2799 }, { "epoch": 4.05, "learning_rate": 0.00013217559093101785, "loss": 0.1175, "step": 2800 }, { "epoch": 4.05, "learning_rate": 0.00013214343141984244, "loss": 0.1191, "step": 2801 }, { "epoch": 4.05, "learning_rate": 0.000132111271908667, "loss": 0.2065, "step": 2802 }, { "epoch": 4.05, "learning_rate": 0.00013207911239749155, "loss": 0.0631, "step": 2803 }, { "epoch": 4.05, "learning_rate": 0.00013204695288631613, "loss": 0.0476, "step": 2804 }, { "epoch": 4.06, "learning_rate": 0.00013201479337514072, "loss": 0.1284, "step": 2805 }, { "epoch": 4.06, "learning_rate": 0.00013198263386396527, "loss": 0.1031, "step": 2806 }, { "epoch": 4.06, "learning_rate": 0.00013195047435278983, "loss": 0.0455, "step": 2807 }, { "epoch": 4.06, "learning_rate": 0.00013191831484161442, "loss": 0.0962, "step": 2808 }, { "epoch": 4.06, "learning_rate": 0.00013188615533043897, "loss": 0.0145, "step": 2809 }, { "epoch": 4.06, "learning_rate": 0.00013185399581926356, "loss": 0.0927, "step": 2810 }, { "epoch": 4.07, "learning_rate": 0.00013182183630808814, "loss": 0.1124, "step": 2811 }, { "epoch": 4.07, "learning_rate": 0.0001317896767969127, "loss": 0.0252, "step": 2812 }, { "epoch": 4.07, "learning_rate": 0.00013175751728573725, "loss": 0.0813, "step": 2813 }, { "epoch": 4.07, "learning_rate": 0.00013172535777456184, "loss": 0.0279, "step": 2814 }, { "epoch": 4.07, "learning_rate": 0.0001316931982633864, "loss": 0.1022, "step": 2815 }, { "epoch": 4.07, "learning_rate": 0.00013166103875221095, "loss": 0.149, "step": 2816 }, { "epoch": 4.07, "learning_rate": 0.00013162887924103556, "loss": 0.0593, "step": 2817 }, { "epoch": 4.08, "learning_rate": 0.00013159671972986012, "loss": 0.026, "step": 2818 }, { "epoch": 4.08, "learning_rate": 0.00013156456021868468, "loss": 0.0951, "step": 2819 }, { "epoch": 4.08, "learning_rate": 0.00013153240070750926, "loss": 0.0235, "step": 2820 }, { "epoch": 4.08, "learning_rate": 0.00013150024119633382, "loss": 0.0885, "step": 2821 }, { "epoch": 4.08, "learning_rate": 0.00013146808168515838, "loss": 0.0973, "step": 2822 }, { "epoch": 4.08, "learning_rate": 0.00013143592217398296, "loss": 0.011, "step": 2823 }, { "epoch": 4.08, "learning_rate": 0.00013140376266280754, "loss": 0.1362, "step": 2824 }, { "epoch": 4.09, "learning_rate": 0.0001313716031516321, "loss": 0.1348, "step": 2825 }, { "epoch": 4.09, "learning_rate": 0.00013133944364045668, "loss": 0.1865, "step": 2826 }, { "epoch": 4.09, "learning_rate": 0.00013130728412928124, "loss": 0.0811, "step": 2827 }, { "epoch": 4.09, "learning_rate": 0.0001312751246181058, "loss": 0.2988, "step": 2828 }, { "epoch": 4.09, "learning_rate": 0.00013124296510693038, "loss": 0.0668, "step": 2829 }, { "epoch": 4.09, "learning_rate": 0.00013121080559575497, "loss": 0.1145, "step": 2830 }, { "epoch": 4.09, "learning_rate": 0.00013117864608457952, "loss": 0.0825, "step": 2831 }, { "epoch": 4.1, "learning_rate": 0.00013114648657340408, "loss": 0.0658, "step": 2832 }, { "epoch": 4.1, "learning_rate": 0.00013111432706222866, "loss": 0.0279, "step": 2833 }, { "epoch": 4.1, "learning_rate": 0.00013108216755105322, "loss": 0.1161, "step": 2834 }, { "epoch": 4.1, "learning_rate": 0.00013105000803987778, "loss": 0.0884, "step": 2835 }, { "epoch": 4.1, "learning_rate": 0.0001310178485287024, "loss": 0.0322, "step": 2836 }, { "epoch": 4.1, "learning_rate": 0.00013098568901752695, "loss": 0.0104, "step": 2837 }, { "epoch": 4.1, "learning_rate": 0.0001309535295063515, "loss": 0.1492, "step": 2838 }, { "epoch": 4.11, "learning_rate": 0.0001309213699951761, "loss": 0.1538, "step": 2839 }, { "epoch": 4.11, "learning_rate": 0.00013088921048400064, "loss": 0.0629, "step": 2840 }, { "epoch": 4.11, "learning_rate": 0.0001308570509728252, "loss": 0.0055, "step": 2841 }, { "epoch": 4.11, "learning_rate": 0.00013082489146164979, "loss": 0.0378, "step": 2842 }, { "epoch": 4.11, "learning_rate": 0.00013079273195047437, "loss": 0.0759, "step": 2843 }, { "epoch": 4.11, "learning_rate": 0.00013076057243929893, "loss": 0.0969, "step": 2844 }, { "epoch": 4.11, "learning_rate": 0.0001307284129281235, "loss": 0.0681, "step": 2845 }, { "epoch": 4.12, "learning_rate": 0.00013069625341694807, "loss": 0.0856, "step": 2846 }, { "epoch": 4.12, "learning_rate": 0.00013066409390577262, "loss": 0.0071, "step": 2847 }, { "epoch": 4.12, "learning_rate": 0.0001306319343945972, "loss": 0.0203, "step": 2848 }, { "epoch": 4.12, "learning_rate": 0.0001305997748834218, "loss": 0.0363, "step": 2849 }, { "epoch": 4.12, "learning_rate": 0.00013056761537224635, "loss": 0.0092, "step": 2850 }, { "epoch": 4.12, "learning_rate": 0.00013053545586107093, "loss": 0.0652, "step": 2851 }, { "epoch": 4.12, "learning_rate": 0.0001305032963498955, "loss": 0.0487, "step": 2852 }, { "epoch": 4.13, "learning_rate": 0.00013047113683872005, "loss": 0.0103, "step": 2853 }, { "epoch": 4.13, "learning_rate": 0.00013043897732754463, "loss": 0.0459, "step": 2854 }, { "epoch": 4.13, "learning_rate": 0.0001304068178163692, "loss": 0.0503, "step": 2855 }, { "epoch": 4.13, "learning_rate": 0.00013037465830519377, "loss": 0.1184, "step": 2856 }, { "epoch": 4.13, "learning_rate": 0.00013034249879401833, "loss": 0.1365, "step": 2857 }, { "epoch": 4.13, "learning_rate": 0.00013031033928284291, "loss": 0.0912, "step": 2858 }, { "epoch": 4.13, "learning_rate": 0.00013027817977166747, "loss": 0.1053, "step": 2859 }, { "epoch": 4.14, "learning_rate": 0.00013024602026049203, "loss": 0.1394, "step": 2860 }, { "epoch": 4.14, "learning_rate": 0.0001302138607493166, "loss": 0.0623, "step": 2861 }, { "epoch": 4.14, "learning_rate": 0.0001301817012381412, "loss": 0.0495, "step": 2862 }, { "epoch": 4.14, "learning_rate": 0.00013014954172696575, "loss": 0.0733, "step": 2863 }, { "epoch": 4.14, "learning_rate": 0.00013011738221579034, "loss": 0.0553, "step": 2864 }, { "epoch": 4.14, "learning_rate": 0.0001300852227046149, "loss": 0.0267, "step": 2865 }, { "epoch": 4.14, "learning_rate": 0.00013005306319343945, "loss": 0.2041, "step": 2866 }, { "epoch": 4.15, "learning_rate": 0.00013002090368226403, "loss": 0.0453, "step": 2867 }, { "epoch": 4.15, "learning_rate": 0.00012998874417108862, "loss": 0.1534, "step": 2868 }, { "epoch": 4.15, "learning_rate": 0.00012995658465991318, "loss": 0.0668, "step": 2869 }, { "epoch": 4.15, "learning_rate": 0.00012992442514873776, "loss": 0.1655, "step": 2870 }, { "epoch": 4.15, "learning_rate": 0.00012989226563756232, "loss": 0.1758, "step": 2871 }, { "epoch": 4.15, "learning_rate": 0.00012986010612638687, "loss": 0.0076, "step": 2872 }, { "epoch": 4.15, "learning_rate": 0.00012982794661521146, "loss": 0.0241, "step": 2873 }, { "epoch": 4.16, "learning_rate": 0.00012979578710403601, "loss": 0.045, "step": 2874 }, { "epoch": 4.16, "learning_rate": 0.0001297636275928606, "loss": 0.125, "step": 2875 }, { "epoch": 4.16, "learning_rate": 0.00012973146808168518, "loss": 0.0752, "step": 2876 }, { "epoch": 4.16, "learning_rate": 0.00012969930857050974, "loss": 0.0187, "step": 2877 }, { "epoch": 4.16, "learning_rate": 0.0001296671490593343, "loss": 0.0217, "step": 2878 }, { "epoch": 4.16, "learning_rate": 0.00012963498954815888, "loss": 0.0326, "step": 2879 }, { "epoch": 4.16, "learning_rate": 0.00012960283003698344, "loss": 0.0155, "step": 2880 }, { "epoch": 4.17, "learning_rate": 0.00012957067052580802, "loss": 0.0717, "step": 2881 }, { "epoch": 4.17, "learning_rate": 0.00012953851101463258, "loss": 0.0037, "step": 2882 }, { "epoch": 4.17, "learning_rate": 0.00012950635150345716, "loss": 0.0782, "step": 2883 }, { "epoch": 4.17, "learning_rate": 0.00012947419199228172, "loss": 0.155, "step": 2884 }, { "epoch": 4.17, "learning_rate": 0.00012944203248110628, "loss": 0.0768, "step": 2885 }, { "epoch": 4.17, "learning_rate": 0.00012940987296993086, "loss": 0.0446, "step": 2886 }, { "epoch": 4.17, "learning_rate": 0.00012937771345875542, "loss": 0.0237, "step": 2887 }, { "epoch": 4.18, "learning_rate": 0.00012934555394758, "loss": 0.0082, "step": 2888 }, { "epoch": 4.18, "learning_rate": 0.00012931339443640459, "loss": 0.1724, "step": 2889 }, { "epoch": 4.18, "learning_rate": 0.00012928123492522914, "loss": 0.1616, "step": 2890 }, { "epoch": 4.18, "learning_rate": 0.0001292490754140537, "loss": 0.0657, "step": 2891 }, { "epoch": 4.18, "learning_rate": 0.00012921691590287828, "loss": 0.2979, "step": 2892 }, { "epoch": 4.18, "learning_rate": 0.00012918475639170284, "loss": 0.0326, "step": 2893 }, { "epoch": 4.19, "learning_rate": 0.00012915259688052742, "loss": 0.0265, "step": 2894 }, { "epoch": 4.19, "learning_rate": 0.000129120437369352, "loss": 0.0746, "step": 2895 }, { "epoch": 4.19, "learning_rate": 0.00012908827785817657, "loss": 0.0663, "step": 2896 }, { "epoch": 4.19, "learning_rate": 0.00012905611834700112, "loss": 0.0587, "step": 2897 }, { "epoch": 4.19, "learning_rate": 0.0001290239588358257, "loss": 0.0138, "step": 2898 }, { "epoch": 4.19, "learning_rate": 0.00012899179932465026, "loss": 0.067, "step": 2899 }, { "epoch": 4.19, "learning_rate": 0.00012895963981347482, "loss": 0.0708, "step": 2900 }, { "epoch": 4.2, "learning_rate": 0.00012892748030229943, "loss": 0.0207, "step": 2901 }, { "epoch": 4.2, "learning_rate": 0.000128895320791124, "loss": 0.146, "step": 2902 }, { "epoch": 4.2, "learning_rate": 0.00012886316127994855, "loss": 0.1227, "step": 2903 }, { "epoch": 4.2, "learning_rate": 0.00012883100176877313, "loss": 0.1053, "step": 2904 }, { "epoch": 4.2, "learning_rate": 0.0001287988422575977, "loss": 0.0703, "step": 2905 }, { "epoch": 4.2, "learning_rate": 0.00012876668274642224, "loss": 0.074, "step": 2906 }, { "epoch": 4.2, "learning_rate": 0.00012873452323524683, "loss": 0.2832, "step": 2907 }, { "epoch": 4.21, "learning_rate": 0.0001287023637240714, "loss": 0.1479, "step": 2908 }, { "epoch": 4.21, "learning_rate": 0.00012867020421289597, "loss": 0.1494, "step": 2909 }, { "epoch": 4.21, "learning_rate": 0.00012863804470172053, "loss": 0.029, "step": 2910 }, { "epoch": 4.21, "learning_rate": 0.0001286058851905451, "loss": 0.012, "step": 2911 }, { "epoch": 4.21, "learning_rate": 0.00012857372567936967, "loss": 0.0056, "step": 2912 }, { "epoch": 4.21, "learning_rate": 0.00012854156616819425, "loss": 0.0258, "step": 2913 }, { "epoch": 4.21, "learning_rate": 0.00012850940665701884, "loss": 0.1733, "step": 2914 }, { "epoch": 4.22, "learning_rate": 0.0001284772471458434, "loss": 0.0892, "step": 2915 }, { "epoch": 4.22, "learning_rate": 0.00012844508763466795, "loss": 0.0356, "step": 2916 }, { "epoch": 4.22, "learning_rate": 0.00012841292812349253, "loss": 0.0195, "step": 2917 }, { "epoch": 4.22, "learning_rate": 0.0001283807686123171, "loss": 0.0845, "step": 2918 }, { "epoch": 4.22, "learning_rate": 0.00012834860910114165, "loss": 0.1233, "step": 2919 }, { "epoch": 4.22, "learning_rate": 0.00012831644958996626, "loss": 0.2241, "step": 2920 }, { "epoch": 4.22, "learning_rate": 0.00012828429007879082, "loss": 0.0223, "step": 2921 }, { "epoch": 4.23, "learning_rate": 0.00012825213056761537, "loss": 0.0144, "step": 2922 }, { "epoch": 4.23, "learning_rate": 0.00012821997105643996, "loss": 0.084, "step": 2923 }, { "epoch": 4.23, "learning_rate": 0.0001281878115452645, "loss": 0.0709, "step": 2924 }, { "epoch": 4.23, "learning_rate": 0.00012815565203408907, "loss": 0.292, "step": 2925 }, { "epoch": 4.23, "learning_rate": 0.00012812349252291368, "loss": 0.1943, "step": 2926 }, { "epoch": 4.23, "learning_rate": 0.00012809133301173824, "loss": 0.1029, "step": 2927 }, { "epoch": 4.23, "learning_rate": 0.0001280591735005628, "loss": 0.0654, "step": 2928 }, { "epoch": 4.24, "learning_rate": 0.00012802701398938738, "loss": 0.0672, "step": 2929 }, { "epoch": 4.24, "learning_rate": 0.00012799485447821194, "loss": 0.0692, "step": 2930 }, { "epoch": 4.24, "learning_rate": 0.0001279626949670365, "loss": 0.043, "step": 2931 }, { "epoch": 4.24, "learning_rate": 0.00012793053545586108, "loss": 0.0998, "step": 2932 }, { "epoch": 4.24, "learning_rate": 0.00012789837594468566, "loss": 0.0591, "step": 2933 }, { "epoch": 4.24, "learning_rate": 0.00012786621643351022, "loss": 0.1001, "step": 2934 }, { "epoch": 4.24, "learning_rate": 0.00012783405692233478, "loss": 0.0824, "step": 2935 }, { "epoch": 4.25, "learning_rate": 0.00012780189741115936, "loss": 0.0649, "step": 2936 }, { "epoch": 4.25, "learning_rate": 0.00012776973789998392, "loss": 0.0632, "step": 2937 }, { "epoch": 4.25, "learning_rate": 0.0001277375783888085, "loss": 0.2466, "step": 2938 }, { "epoch": 4.25, "learning_rate": 0.00012770541887763308, "loss": 0.2349, "step": 2939 }, { "epoch": 4.25, "learning_rate": 0.00012767325936645764, "loss": 0.0661, "step": 2940 }, { "epoch": 4.25, "learning_rate": 0.0001276410998552822, "loss": 0.0994, "step": 2941 }, { "epoch": 4.25, "learning_rate": 0.00012760894034410678, "loss": 0.0406, "step": 2942 }, { "epoch": 4.26, "learning_rate": 0.00012757678083293134, "loss": 0.1845, "step": 2943 }, { "epoch": 4.26, "learning_rate": 0.0001275446213217559, "loss": 0.3096, "step": 2944 }, { "epoch": 4.26, "learning_rate": 0.00012751246181058048, "loss": 0.0142, "step": 2945 }, { "epoch": 4.26, "learning_rate": 0.00012748030229940506, "loss": 0.0319, "step": 2946 }, { "epoch": 4.26, "learning_rate": 0.00012744814278822962, "loss": 0.0804, "step": 2947 }, { "epoch": 4.26, "learning_rate": 0.0001274159832770542, "loss": 0.0957, "step": 2948 }, { "epoch": 4.26, "learning_rate": 0.00012738382376587876, "loss": 0.3096, "step": 2949 }, { "epoch": 4.27, "learning_rate": 0.00012735166425470332, "loss": 0.1389, "step": 2950 }, { "epoch": 4.27, "learning_rate": 0.0001273195047435279, "loss": 0.0387, "step": 2951 }, { "epoch": 4.27, "learning_rate": 0.0001272873452323525, "loss": 0.168, "step": 2952 }, { "epoch": 4.27, "learning_rate": 0.00012725518572117704, "loss": 0.1748, "step": 2953 }, { "epoch": 4.27, "learning_rate": 0.00012722302621000163, "loss": 0.038, "step": 2954 }, { "epoch": 4.27, "learning_rate": 0.00012719086669882619, "loss": 0.0212, "step": 2955 }, { "epoch": 4.27, "learning_rate": 0.00012715870718765074, "loss": 0.024, "step": 2956 }, { "epoch": 4.28, "learning_rate": 0.00012712654767647533, "loss": 0.151, "step": 2957 }, { "epoch": 4.28, "learning_rate": 0.00012709438816529988, "loss": 0.1545, "step": 2958 }, { "epoch": 4.28, "learning_rate": 0.00012706222865412447, "loss": 0.1324, "step": 2959 }, { "epoch": 4.28, "learning_rate": 0.00012703006914294902, "loss": 0.0176, "step": 2960 }, { "epoch": 4.28, "learning_rate": 0.0001269979096317736, "loss": 0.0132, "step": 2961 }, { "epoch": 4.28, "learning_rate": 0.00012696575012059817, "loss": 0.0636, "step": 2962 }, { "epoch": 4.28, "learning_rate": 0.00012693359060942275, "loss": 0.1572, "step": 2963 }, { "epoch": 4.29, "learning_rate": 0.0001269014310982473, "loss": 0.0467, "step": 2964 }, { "epoch": 4.29, "learning_rate": 0.0001268692715870719, "loss": 0.1252, "step": 2965 }, { "epoch": 4.29, "learning_rate": 0.00012683711207589645, "loss": 0.0265, "step": 2966 }, { "epoch": 4.29, "learning_rate": 0.00012680495256472103, "loss": 0.0124, "step": 2967 }, { "epoch": 4.29, "learning_rate": 0.0001267727930535456, "loss": 0.0205, "step": 2968 }, { "epoch": 4.29, "learning_rate": 0.00012674063354237015, "loss": 0.0665, "step": 2969 }, { "epoch": 4.3, "learning_rate": 0.00012670847403119473, "loss": 0.0825, "step": 2970 }, { "epoch": 4.3, "learning_rate": 0.00012667631452001931, "loss": 0.075, "step": 2971 }, { "epoch": 4.3, "learning_rate": 0.00012664415500884387, "loss": 0.0536, "step": 2972 }, { "epoch": 4.3, "learning_rate": 0.00012661199549766845, "loss": 0.0973, "step": 2973 }, { "epoch": 4.3, "learning_rate": 0.000126579835986493, "loss": 0.0587, "step": 2974 }, { "epoch": 4.3, "learning_rate": 0.00012654767647531757, "loss": 0.1216, "step": 2975 }, { "epoch": 4.3, "learning_rate": 0.00012651551696414215, "loss": 0.1453, "step": 2976 }, { "epoch": 4.31, "learning_rate": 0.0001264833574529667, "loss": 0.0196, "step": 2977 }, { "epoch": 4.31, "learning_rate": 0.0001264511979417913, "loss": 0.0232, "step": 2978 }, { "epoch": 4.31, "learning_rate": 0.00012641903843061588, "loss": 0.0514, "step": 2979 }, { "epoch": 4.31, "learning_rate": 0.00012638687891944043, "loss": 0.1265, "step": 2980 }, { "epoch": 4.31, "learning_rate": 0.000126354719408265, "loss": 0.0283, "step": 2981 }, { "epoch": 4.31, "learning_rate": 0.00012632255989708958, "loss": 0.2331, "step": 2982 }, { "epoch": 4.31, "learning_rate": 0.00012629040038591413, "loss": 0.1095, "step": 2983 }, { "epoch": 4.32, "learning_rate": 0.00012625824087473872, "loss": 0.0069, "step": 2984 }, { "epoch": 4.32, "learning_rate": 0.00012622608136356327, "loss": 0.0309, "step": 2985 }, { "epoch": 4.32, "learning_rate": 0.00012619392185238786, "loss": 0.1372, "step": 2986 }, { "epoch": 4.32, "learning_rate": 0.00012616176234121241, "loss": 0.1316, "step": 2987 }, { "epoch": 4.32, "learning_rate": 0.000126129602830037, "loss": 0.0494, "step": 2988 }, { "epoch": 4.32, "learning_rate": 0.00012609744331886156, "loss": 0.1191, "step": 2989 }, { "epoch": 4.32, "learning_rate": 0.0001260652838076861, "loss": 0.1021, "step": 2990 }, { "epoch": 4.33, "learning_rate": 0.0001260331242965107, "loss": 0.0847, "step": 2991 }, { "epoch": 4.33, "learning_rate": 0.00012600096478533528, "loss": 0.0675, "step": 2992 }, { "epoch": 4.33, "learning_rate": 0.00012596880527415984, "loss": 0.1481, "step": 2993 }, { "epoch": 4.33, "learning_rate": 0.0001259366457629844, "loss": 0.0088, "step": 2994 }, { "epoch": 4.33, "learning_rate": 0.00012590448625180898, "loss": 0.2358, "step": 2995 }, { "epoch": 4.33, "learning_rate": 0.00012587232674063354, "loss": 0.011, "step": 2996 }, { "epoch": 4.33, "learning_rate": 0.00012584016722945812, "loss": 0.0259, "step": 2997 }, { "epoch": 4.34, "learning_rate": 0.0001258080077182827, "loss": 0.0108, "step": 2998 }, { "epoch": 4.34, "learning_rate": 0.00012577584820710726, "loss": 0.0401, "step": 2999 }, { "epoch": 4.34, "learning_rate": 0.00012574368869593182, "loss": 0.1401, "step": 3000 }, { "epoch": 4.34, "learning_rate": 0.0001257115291847564, "loss": 0.0763, "step": 3001 }, { "epoch": 4.34, "learning_rate": 0.00012567936967358096, "loss": 0.1997, "step": 3002 }, { "epoch": 4.34, "learning_rate": 0.00012564721016240552, "loss": 0.0175, "step": 3003 }, { "epoch": 4.34, "learning_rate": 0.00012561505065123013, "loss": 0.0631, "step": 3004 }, { "epoch": 4.35, "learning_rate": 0.00012558289114005468, "loss": 0.1621, "step": 3005 }, { "epoch": 4.35, "learning_rate": 0.00012555073162887924, "loss": 0.0155, "step": 3006 }, { "epoch": 4.35, "learning_rate": 0.00012551857211770383, "loss": 0.1589, "step": 3007 }, { "epoch": 4.35, "learning_rate": 0.00012548641260652838, "loss": 0.0344, "step": 3008 }, { "epoch": 4.35, "learning_rate": 0.00012545425309535294, "loss": 0.0843, "step": 3009 }, { "epoch": 4.35, "learning_rate": 0.00012542209358417752, "loss": 0.041, "step": 3010 }, { "epoch": 4.35, "learning_rate": 0.0001253899340730021, "loss": 0.0448, "step": 3011 }, { "epoch": 4.36, "learning_rate": 0.00012535777456182666, "loss": 0.0165, "step": 3012 }, { "epoch": 4.36, "learning_rate": 0.00012532561505065125, "loss": 0.0914, "step": 3013 }, { "epoch": 4.36, "learning_rate": 0.0001252934555394758, "loss": 0.059, "step": 3014 }, { "epoch": 4.36, "learning_rate": 0.00012526129602830036, "loss": 0.2056, "step": 3015 }, { "epoch": 4.36, "learning_rate": 0.00012522913651712495, "loss": 0.025, "step": 3016 }, { "epoch": 4.36, "learning_rate": 0.00012519697700594953, "loss": 0.1689, "step": 3017 }, { "epoch": 4.36, "learning_rate": 0.0001251648174947741, "loss": 0.0738, "step": 3018 }, { "epoch": 4.37, "learning_rate": 0.00012513265798359864, "loss": 0.1239, "step": 3019 }, { "epoch": 4.37, "learning_rate": 0.00012510049847242323, "loss": 0.1052, "step": 3020 }, { "epoch": 4.37, "learning_rate": 0.00012506833896124779, "loss": 0.0781, "step": 3021 }, { "epoch": 4.37, "learning_rate": 0.00012503617945007234, "loss": 0.1748, "step": 3022 }, { "epoch": 4.37, "learning_rate": 0.00012500401993889695, "loss": 0.045, "step": 3023 }, { "epoch": 4.37, "learning_rate": 0.0001249718604277215, "loss": 0.0972, "step": 3024 }, { "epoch": 4.37, "learning_rate": 0.00012493970091654607, "loss": 0.106, "step": 3025 }, { "epoch": 4.38, "learning_rate": 0.00012490754140537065, "loss": 0.0561, "step": 3026 }, { "epoch": 4.38, "learning_rate": 0.0001248753818941952, "loss": 0.1469, "step": 3027 }, { "epoch": 4.38, "learning_rate": 0.00012484322238301977, "loss": 0.1316, "step": 3028 }, { "epoch": 4.38, "learning_rate": 0.00012481106287184438, "loss": 0.0822, "step": 3029 }, { "epoch": 4.38, "learning_rate": 0.00012477890336066893, "loss": 0.0131, "step": 3030 }, { "epoch": 4.38, "learning_rate": 0.0001247467438494935, "loss": 0.2401, "step": 3031 }, { "epoch": 4.38, "learning_rate": 0.00012471458433831807, "loss": 0.0603, "step": 3032 }, { "epoch": 4.39, "learning_rate": 0.00012468242482714263, "loss": 0.0343, "step": 3033 }, { "epoch": 4.39, "learning_rate": 0.0001246502653159672, "loss": 0.1592, "step": 3034 }, { "epoch": 4.39, "learning_rate": 0.00012461810580479177, "loss": 0.1494, "step": 3035 }, { "epoch": 4.39, "learning_rate": 0.00012458594629361636, "loss": 0.0659, "step": 3036 }, { "epoch": 4.39, "learning_rate": 0.0001245537867824409, "loss": 0.1936, "step": 3037 }, { "epoch": 4.39, "learning_rate": 0.0001245216272712655, "loss": 0.0666, "step": 3038 }, { "epoch": 4.39, "learning_rate": 0.00012448946776009005, "loss": 0.1023, "step": 3039 }, { "epoch": 4.4, "learning_rate": 0.0001244573082489146, "loss": 0.1145, "step": 3040 }, { "epoch": 4.4, "learning_rate": 0.0001244251487377392, "loss": 0.0669, "step": 3041 }, { "epoch": 4.4, "learning_rate": 0.00012439298922656378, "loss": 0.125, "step": 3042 }, { "epoch": 4.4, "learning_rate": 0.00012436082971538834, "loss": 0.2695, "step": 3043 }, { "epoch": 4.4, "learning_rate": 0.0001243286702042129, "loss": 0.1206, "step": 3044 }, { "epoch": 4.4, "learning_rate": 0.00012429651069303748, "loss": 0.0237, "step": 3045 }, { "epoch": 4.4, "learning_rate": 0.00012426435118186203, "loss": 0.0243, "step": 3046 }, { "epoch": 4.41, "learning_rate": 0.0001242321916706866, "loss": 0.0237, "step": 3047 }, { "epoch": 4.41, "learning_rate": 0.00012420003215951118, "loss": 0.1255, "step": 3048 }, { "epoch": 4.41, "learning_rate": 0.00012416787264833576, "loss": 0.1887, "step": 3049 }, { "epoch": 4.41, "learning_rate": 0.00012413571313716032, "loss": 0.009, "step": 3050 }, { "epoch": 4.41, "learning_rate": 0.0001241035536259849, "loss": 0.1335, "step": 3051 }, { "epoch": 4.41, "learning_rate": 0.00012407139411480946, "loss": 0.1479, "step": 3052 }, { "epoch": 4.42, "learning_rate": 0.00012403923460363401, "loss": 0.0038, "step": 3053 }, { "epoch": 4.42, "learning_rate": 0.0001240070750924586, "loss": 0.1455, "step": 3054 }, { "epoch": 4.42, "learning_rate": 0.00012397491558128318, "loss": 0.0109, "step": 3055 }, { "epoch": 4.42, "learning_rate": 0.00012394275607010774, "loss": 0.1047, "step": 3056 }, { "epoch": 4.42, "learning_rate": 0.00012391059655893232, "loss": 0.0931, "step": 3057 }, { "epoch": 4.42, "learning_rate": 0.00012387843704775688, "loss": 0.1758, "step": 3058 }, { "epoch": 4.42, "learning_rate": 0.00012384627753658144, "loss": 0.0183, "step": 3059 }, { "epoch": 4.43, "learning_rate": 0.00012381411802540602, "loss": 0.0834, "step": 3060 }, { "epoch": 4.43, "learning_rate": 0.0001237819585142306, "loss": 0.0075, "step": 3061 }, { "epoch": 4.43, "learning_rate": 0.00012374979900305516, "loss": 0.106, "step": 3062 }, { "epoch": 4.43, "learning_rate": 0.00012371763949187975, "loss": 0.0263, "step": 3063 }, { "epoch": 4.43, "learning_rate": 0.0001236854799807043, "loss": 0.081, "step": 3064 }, { "epoch": 4.43, "learning_rate": 0.00012365332046952886, "loss": 0.0067, "step": 3065 }, { "epoch": 4.43, "learning_rate": 0.00012362116095835344, "loss": 0.0057, "step": 3066 }, { "epoch": 4.44, "learning_rate": 0.000123589001447178, "loss": 0.109, "step": 3067 }, { "epoch": 4.44, "learning_rate": 0.00012355684193600259, "loss": 0.0513, "step": 3068 }, { "epoch": 4.44, "learning_rate": 0.00012352468242482714, "loss": 0.1738, "step": 3069 }, { "epoch": 4.44, "learning_rate": 0.00012349252291365173, "loss": 0.084, "step": 3070 }, { "epoch": 4.44, "learning_rate": 0.00012346036340247628, "loss": 0.0084, "step": 3071 }, { "epoch": 4.44, "learning_rate": 0.00012342820389130084, "loss": 0.1714, "step": 3072 }, { "epoch": 4.44, "learning_rate": 0.00012339604438012542, "loss": 0.1261, "step": 3073 }, { "epoch": 4.45, "learning_rate": 0.00012336388486895, "loss": 0.0707, "step": 3074 }, { "epoch": 4.45, "learning_rate": 0.00012333172535777457, "loss": 0.0038, "step": 3075 }, { "epoch": 4.45, "learning_rate": 0.00012329956584659915, "loss": 0.082, "step": 3076 }, { "epoch": 4.45, "learning_rate": 0.0001232674063354237, "loss": 0.228, "step": 3077 }, { "epoch": 4.45, "learning_rate": 0.00012323524682424826, "loss": 0.1146, "step": 3078 }, { "epoch": 4.45, "learning_rate": 0.00012320308731307285, "loss": 0.0909, "step": 3079 }, { "epoch": 4.45, "learning_rate": 0.0001231709278018974, "loss": 0.1385, "step": 3080 }, { "epoch": 4.46, "learning_rate": 0.000123138768290722, "loss": 0.1851, "step": 3081 }, { "epoch": 4.46, "learning_rate": 0.00012310660877954657, "loss": 0.1282, "step": 3082 }, { "epoch": 4.46, "learning_rate": 0.00012307444926837113, "loss": 0.0516, "step": 3083 }, { "epoch": 4.46, "learning_rate": 0.0001230422897571957, "loss": 0.087, "step": 3084 }, { "epoch": 4.46, "learning_rate": 0.00012301013024602027, "loss": 0.0964, "step": 3085 }, { "epoch": 4.46, "learning_rate": 0.00012297797073484483, "loss": 0.0458, "step": 3086 }, { "epoch": 4.46, "learning_rate": 0.0001229458112236694, "loss": 0.0728, "step": 3087 }, { "epoch": 4.47, "learning_rate": 0.00012291365171249397, "loss": 0.0543, "step": 3088 }, { "epoch": 4.47, "learning_rate": 0.00012288149220131855, "loss": 0.117, "step": 3089 }, { "epoch": 4.47, "learning_rate": 0.0001228493326901431, "loss": 0.089, "step": 3090 }, { "epoch": 4.47, "learning_rate": 0.0001228171731789677, "loss": 0.103, "step": 3091 }, { "epoch": 4.47, "learning_rate": 0.00012278501366779225, "loss": 0.1855, "step": 3092 }, { "epoch": 4.47, "learning_rate": 0.0001227528541566168, "loss": 0.0225, "step": 3093 }, { "epoch": 4.47, "learning_rate": 0.0001227206946454414, "loss": 0.1587, "step": 3094 }, { "epoch": 4.48, "learning_rate": 0.00012268853513426598, "loss": 0.046, "step": 3095 }, { "epoch": 4.48, "learning_rate": 0.00012265637562309053, "loss": 0.1155, "step": 3096 }, { "epoch": 4.48, "learning_rate": 0.0001226242161119151, "loss": 0.0156, "step": 3097 }, { "epoch": 4.48, "learning_rate": 0.00012259205660073967, "loss": 0.0701, "step": 3098 }, { "epoch": 4.48, "learning_rate": 0.00012255989708956423, "loss": 0.0561, "step": 3099 }, { "epoch": 4.48, "learning_rate": 0.00012252773757838881, "loss": 0.0229, "step": 3100 }, { "epoch": 4.48, "learning_rate": 0.0001224955780672134, "loss": 0.0362, "step": 3101 }, { "epoch": 4.49, "learning_rate": 0.00012246341855603796, "loss": 0.2048, "step": 3102 }, { "epoch": 4.49, "learning_rate": 0.0001224312590448625, "loss": 0.0489, "step": 3103 }, { "epoch": 4.49, "learning_rate": 0.0001223990995336871, "loss": 0.057, "step": 3104 }, { "epoch": 4.49, "learning_rate": 0.00012236694002251165, "loss": 0.0464, "step": 3105 }, { "epoch": 4.49, "learning_rate": 0.00012233478051133624, "loss": 0.0452, "step": 3106 }, { "epoch": 4.49, "learning_rate": 0.00012230262100016082, "loss": 0.0036, "step": 3107 }, { "epoch": 4.49, "learning_rate": 0.00012227046148898538, "loss": 0.0276, "step": 3108 }, { "epoch": 4.5, "learning_rate": 0.00012223830197780994, "loss": 0.1462, "step": 3109 }, { "epoch": 4.5, "learning_rate": 0.00012220614246663452, "loss": 0.0097, "step": 3110 }, { "epoch": 4.5, "learning_rate": 0.00012217398295545908, "loss": 0.0901, "step": 3111 }, { "epoch": 4.5, "learning_rate": 0.00012214182344428363, "loss": 0.2214, "step": 3112 }, { "epoch": 4.5, "learning_rate": 0.00012210966393310822, "loss": 0.0298, "step": 3113 }, { "epoch": 4.5, "learning_rate": 0.0001220775044219328, "loss": 0.1808, "step": 3114 }, { "epoch": 4.5, "learning_rate": 0.00012204534491075736, "loss": 0.012, "step": 3115 }, { "epoch": 4.51, "learning_rate": 0.00012201318539958193, "loss": 0.1572, "step": 3116 }, { "epoch": 4.51, "learning_rate": 0.0001219810258884065, "loss": 0.1045, "step": 3117 }, { "epoch": 4.51, "learning_rate": 0.00012194886637723106, "loss": 0.0372, "step": 3118 }, { "epoch": 4.51, "learning_rate": 0.00012191670686605565, "loss": 0.058, "step": 3119 }, { "epoch": 4.51, "learning_rate": 0.00012188454735488023, "loss": 0.0297, "step": 3120 }, { "epoch": 4.51, "learning_rate": 0.00012185238784370478, "loss": 0.0112, "step": 3121 }, { "epoch": 4.51, "learning_rate": 0.00012182022833252935, "loss": 0.0329, "step": 3122 }, { "epoch": 4.52, "learning_rate": 0.00012178806882135392, "loss": 0.0151, "step": 3123 }, { "epoch": 4.52, "learning_rate": 0.00012175590931017848, "loss": 0.271, "step": 3124 }, { "epoch": 4.52, "learning_rate": 0.00012172374979900305, "loss": 0.1106, "step": 3125 }, { "epoch": 4.52, "learning_rate": 0.00012169159028782763, "loss": 0.0121, "step": 3126 }, { "epoch": 4.52, "learning_rate": 0.0001216594307766522, "loss": 0.0177, "step": 3127 }, { "epoch": 4.52, "learning_rate": 0.00012162727126547678, "loss": 0.0875, "step": 3128 }, { "epoch": 4.52, "learning_rate": 0.00012159511175430133, "loss": 0.016, "step": 3129 }, { "epoch": 4.53, "learning_rate": 0.0001215629522431259, "loss": 0.127, "step": 3130 }, { "epoch": 4.53, "learning_rate": 0.00012153079273195047, "loss": 0.1212, "step": 3131 }, { "epoch": 4.53, "learning_rate": 0.00012149863322077506, "loss": 0.2024, "step": 3132 }, { "epoch": 4.53, "learning_rate": 0.00012146647370959963, "loss": 0.0374, "step": 3133 }, { "epoch": 4.53, "learning_rate": 0.0001214343141984242, "loss": 0.001, "step": 3134 }, { "epoch": 4.53, "learning_rate": 0.00012140215468724876, "loss": 0.1068, "step": 3135 }, { "epoch": 4.54, "learning_rate": 0.00012136999517607333, "loss": 0.1196, "step": 3136 }, { "epoch": 4.54, "learning_rate": 0.0001213378356648979, "loss": 0.0833, "step": 3137 }, { "epoch": 4.54, "learning_rate": 0.00012130567615372245, "loss": 0.0392, "step": 3138 }, { "epoch": 4.54, "learning_rate": 0.00012127351664254705, "loss": 0.0611, "step": 3139 }, { "epoch": 4.54, "learning_rate": 0.00012124135713137161, "loss": 0.1672, "step": 3140 }, { "epoch": 4.54, "learning_rate": 0.00012120919762019618, "loss": 0.0349, "step": 3141 }, { "epoch": 4.54, "learning_rate": 0.00012117703810902075, "loss": 0.1315, "step": 3142 }, { "epoch": 4.55, "learning_rate": 0.0001211448785978453, "loss": 0.1345, "step": 3143 }, { "epoch": 4.55, "learning_rate": 0.00012111271908666988, "loss": 0.0016, "step": 3144 }, { "epoch": 4.55, "learning_rate": 0.00012108055957549447, "loss": 0.0394, "step": 3145 }, { "epoch": 4.55, "learning_rate": 0.00012104840006431903, "loss": 0.0068, "step": 3146 }, { "epoch": 4.55, "learning_rate": 0.0001210162405531436, "loss": 0.0362, "step": 3147 }, { "epoch": 4.55, "learning_rate": 0.00012098408104196817, "loss": 0.0562, "step": 3148 }, { "epoch": 4.55, "learning_rate": 0.00012095192153079273, "loss": 0.0021, "step": 3149 }, { "epoch": 4.56, "learning_rate": 0.0001209197620196173, "loss": 0.123, "step": 3150 }, { "epoch": 4.56, "learning_rate": 0.00012088760250844188, "loss": 0.0248, "step": 3151 }, { "epoch": 4.56, "learning_rate": 0.00012085544299726645, "loss": 0.1709, "step": 3152 }, { "epoch": 4.56, "learning_rate": 0.00012082328348609103, "loss": 0.2107, "step": 3153 }, { "epoch": 4.56, "learning_rate": 0.00012079112397491558, "loss": 0.0949, "step": 3154 }, { "epoch": 4.56, "learning_rate": 0.00012075896446374015, "loss": 0.0022, "step": 3155 }, { "epoch": 4.56, "learning_rate": 0.00012072680495256472, "loss": 0.241, "step": 3156 }, { "epoch": 4.57, "learning_rate": 0.0001206946454413893, "loss": 0.0357, "step": 3157 }, { "epoch": 4.57, "learning_rate": 0.00012066248593021388, "loss": 0.1113, "step": 3158 }, { "epoch": 4.57, "learning_rate": 0.00012063032641903845, "loss": 0.0668, "step": 3159 }, { "epoch": 4.57, "learning_rate": 0.000120598166907863, "loss": 0.0134, "step": 3160 }, { "epoch": 4.57, "learning_rate": 0.00012056600739668758, "loss": 0.0934, "step": 3161 }, { "epoch": 4.57, "learning_rate": 0.00012053384788551215, "loss": 0.114, "step": 3162 }, { "epoch": 4.57, "learning_rate": 0.0001205016883743367, "loss": 0.0467, "step": 3163 }, { "epoch": 4.58, "learning_rate": 0.0001204695288631613, "loss": 0.0459, "step": 3164 }, { "epoch": 4.58, "learning_rate": 0.00012043736935198586, "loss": 0.0158, "step": 3165 }, { "epoch": 4.58, "learning_rate": 0.00012040520984081043, "loss": 0.0537, "step": 3166 }, { "epoch": 4.58, "learning_rate": 0.000120373050329635, "loss": 0.0104, "step": 3167 }, { "epoch": 4.58, "learning_rate": 0.00012034089081845956, "loss": 0.1179, "step": 3168 }, { "epoch": 4.58, "learning_rate": 0.00012030873130728413, "loss": 0.0654, "step": 3169 }, { "epoch": 4.58, "learning_rate": 0.0001202765717961087, "loss": 0.1226, "step": 3170 }, { "epoch": 4.59, "learning_rate": 0.00012024441228493328, "loss": 0.0804, "step": 3171 }, { "epoch": 4.59, "learning_rate": 0.00012021225277375785, "loss": 0.0593, "step": 3172 }, { "epoch": 4.59, "learning_rate": 0.00012018009326258242, "loss": 0.0451, "step": 3173 }, { "epoch": 4.59, "learning_rate": 0.00012014793375140698, "loss": 0.1097, "step": 3174 }, { "epoch": 4.59, "learning_rate": 0.00012011577424023155, "loss": 0.1182, "step": 3175 }, { "epoch": 4.59, "learning_rate": 0.00012008361472905612, "loss": 0.0321, "step": 3176 }, { "epoch": 4.59, "learning_rate": 0.0001200514552178807, "loss": 0.1045, "step": 3177 }, { "epoch": 4.6, "learning_rate": 0.00012001929570670527, "loss": 0.147, "step": 3178 }, { "epoch": 4.6, "learning_rate": 0.00011998713619552983, "loss": 0.0089, "step": 3179 }, { "epoch": 4.6, "learning_rate": 0.0001199549766843544, "loss": 0.2695, "step": 3180 }, { "epoch": 4.6, "learning_rate": 0.00011992281717317897, "loss": 0.0244, "step": 3181 }, { "epoch": 4.6, "learning_rate": 0.00011989065766200353, "loss": 0.0593, "step": 3182 }, { "epoch": 4.6, "learning_rate": 0.0001198584981508281, "loss": 0.0263, "step": 3183 }, { "epoch": 4.6, "learning_rate": 0.0001198263386396527, "loss": 0.2083, "step": 3184 }, { "epoch": 4.61, "learning_rate": 0.00011979417912847725, "loss": 0.0807, "step": 3185 }, { "epoch": 4.61, "learning_rate": 0.00011976201961730182, "loss": 0.1112, "step": 3186 }, { "epoch": 4.61, "learning_rate": 0.0001197298601061264, "loss": 0.012, "step": 3187 }, { "epoch": 4.61, "learning_rate": 0.00011969770059495095, "loss": 0.1014, "step": 3188 }, { "epoch": 4.61, "learning_rate": 0.00011966554108377552, "loss": 0.0051, "step": 3189 }, { "epoch": 4.61, "learning_rate": 0.00011963338157260011, "loss": 0.0085, "step": 3190 }, { "epoch": 4.61, "learning_rate": 0.00011960122206142468, "loss": 0.12, "step": 3191 }, { "epoch": 4.62, "learning_rate": 0.00011956906255024925, "loss": 0.1223, "step": 3192 }, { "epoch": 4.62, "learning_rate": 0.0001195369030390738, "loss": 0.1169, "step": 3193 }, { "epoch": 4.62, "learning_rate": 0.00011950474352789838, "loss": 0.0567, "step": 3194 }, { "epoch": 4.62, "learning_rate": 0.00011947258401672295, "loss": 0.2517, "step": 3195 }, { "epoch": 4.62, "learning_rate": 0.00011944042450554753, "loss": 0.0841, "step": 3196 }, { "epoch": 4.62, "learning_rate": 0.0001194082649943721, "loss": 0.0198, "step": 3197 }, { "epoch": 4.62, "learning_rate": 0.00011937610548319667, "loss": 0.1187, "step": 3198 }, { "epoch": 4.63, "learning_rate": 0.00011934394597202123, "loss": 0.1026, "step": 3199 }, { "epoch": 4.63, "learning_rate": 0.0001193117864608458, "loss": 0.0437, "step": 3200 }, { "epoch": 4.63, "learning_rate": 0.00011927962694967037, "loss": 0.249, "step": 3201 }, { "epoch": 4.63, "learning_rate": 0.00011924746743849493, "loss": 0.0095, "step": 3202 }, { "epoch": 4.63, "learning_rate": 0.00011921530792731952, "loss": 0.0851, "step": 3203 }, { "epoch": 4.63, "learning_rate": 0.00011918314841614408, "loss": 0.1836, "step": 3204 }, { "epoch": 4.63, "learning_rate": 0.00011915098890496865, "loss": 0.1199, "step": 3205 }, { "epoch": 4.64, "learning_rate": 0.00011911882939379322, "loss": 0.1057, "step": 3206 }, { "epoch": 4.64, "learning_rate": 0.00011908666988261778, "loss": 0.026, "step": 3207 }, { "epoch": 4.64, "learning_rate": 0.00011905451037144235, "loss": 0.1266, "step": 3208 }, { "epoch": 4.64, "learning_rate": 0.00011902235086026695, "loss": 0.1238, "step": 3209 }, { "epoch": 4.64, "learning_rate": 0.0001189901913490915, "loss": 0.0536, "step": 3210 }, { "epoch": 4.64, "learning_rate": 0.00011895803183791607, "loss": 0.0358, "step": 3211 }, { "epoch": 4.64, "learning_rate": 0.00011892587232674064, "loss": 0.0096, "step": 3212 }, { "epoch": 4.65, "learning_rate": 0.0001188937128155652, "loss": 0.0303, "step": 3213 }, { "epoch": 4.65, "learning_rate": 0.00011886155330438977, "loss": 0.002, "step": 3214 }, { "epoch": 4.65, "learning_rate": 0.00011882939379321434, "loss": 0.0452, "step": 3215 }, { "epoch": 4.65, "learning_rate": 0.00011879723428203893, "loss": 0.0098, "step": 3216 }, { "epoch": 4.65, "learning_rate": 0.0001187650747708635, "loss": 0.0583, "step": 3217 }, { "epoch": 4.65, "learning_rate": 0.00011873291525968805, "loss": 0.0849, "step": 3218 }, { "epoch": 4.66, "learning_rate": 0.00011870075574851262, "loss": 0.1161, "step": 3219 }, { "epoch": 4.66, "learning_rate": 0.0001186685962373372, "loss": 0.0693, "step": 3220 }, { "epoch": 4.66, "learning_rate": 0.00011863643672616177, "loss": 0.0009, "step": 3221 }, { "epoch": 4.66, "learning_rate": 0.00011860427721498635, "loss": 0.1709, "step": 3222 }, { "epoch": 4.66, "learning_rate": 0.00011857211770381092, "loss": 0.0888, "step": 3223 }, { "epoch": 4.66, "learning_rate": 0.00011853995819263548, "loss": 0.2039, "step": 3224 }, { "epoch": 4.66, "learning_rate": 0.00011850779868146005, "loss": 0.0234, "step": 3225 }, { "epoch": 4.67, "learning_rate": 0.00011847563917028462, "loss": 0.0831, "step": 3226 }, { "epoch": 4.67, "learning_rate": 0.00011844347965910918, "loss": 0.1071, "step": 3227 }, { "epoch": 4.67, "learning_rate": 0.00011841132014793375, "loss": 0.0547, "step": 3228 }, { "epoch": 4.67, "learning_rate": 0.00011837916063675833, "loss": 0.1079, "step": 3229 }, { "epoch": 4.67, "learning_rate": 0.0001183470011255829, "loss": 0.1572, "step": 3230 }, { "epoch": 4.67, "learning_rate": 0.00011831484161440747, "loss": 0.1424, "step": 3231 }, { "epoch": 4.67, "learning_rate": 0.00011828268210323203, "loss": 0.1217, "step": 3232 }, { "epoch": 4.68, "learning_rate": 0.0001182505225920566, "loss": 0.0394, "step": 3233 }, { "epoch": 4.68, "learning_rate": 0.00011821836308088117, "loss": 0.0112, "step": 3234 }, { "epoch": 4.68, "learning_rate": 0.00011818620356970575, "loss": 0.0126, "step": 3235 }, { "epoch": 4.68, "learning_rate": 0.00011815404405853032, "loss": 0.0036, "step": 3236 }, { "epoch": 4.68, "learning_rate": 0.0001181218845473549, "loss": 0.0685, "step": 3237 }, { "epoch": 4.68, "learning_rate": 0.00011808972503617945, "loss": 0.1091, "step": 3238 }, { "epoch": 4.68, "learning_rate": 0.00011805756552500402, "loss": 0.0106, "step": 3239 }, { "epoch": 4.69, "learning_rate": 0.00011802540601382859, "loss": 0.1321, "step": 3240 }, { "epoch": 4.69, "learning_rate": 0.00011799324650265318, "loss": 0.0059, "step": 3241 }, { "epoch": 4.69, "learning_rate": 0.00011796108699147775, "loss": 0.0701, "step": 3242 }, { "epoch": 4.69, "learning_rate": 0.0001179289274803023, "loss": 0.0402, "step": 3243 }, { "epoch": 4.69, "learning_rate": 0.00011789676796912687, "loss": 0.1011, "step": 3244 }, { "epoch": 4.69, "learning_rate": 0.00011786460845795144, "loss": 0.0107, "step": 3245 }, { "epoch": 4.69, "learning_rate": 0.00011783244894677601, "loss": 0.2283, "step": 3246 }, { "epoch": 4.7, "learning_rate": 0.00011780028943560057, "loss": 0.1047, "step": 3247 }, { "epoch": 4.7, "learning_rate": 0.00011776812992442517, "loss": 0.0382, "step": 3248 }, { "epoch": 4.7, "learning_rate": 0.00011773597041324973, "loss": 0.0144, "step": 3249 }, { "epoch": 4.7, "learning_rate": 0.0001177038109020743, "loss": 0.0202, "step": 3250 }, { "epoch": 4.7, "learning_rate": 0.00011767165139089887, "loss": 0.2681, "step": 3251 }, { "epoch": 4.7, "learning_rate": 0.00011763949187972342, "loss": 0.1631, "step": 3252 }, { "epoch": 4.7, "learning_rate": 0.000117607332368548, "loss": 0.2761, "step": 3253 }, { "epoch": 4.71, "learning_rate": 0.00011757517285737258, "loss": 0.0179, "step": 3254 }, { "epoch": 4.71, "learning_rate": 0.00011754301334619715, "loss": 0.0215, "step": 3255 }, { "epoch": 4.71, "learning_rate": 0.00011751085383502172, "loss": 0.0018, "step": 3256 }, { "epoch": 4.71, "learning_rate": 0.00011747869432384628, "loss": 0.0269, "step": 3257 }, { "epoch": 4.71, "learning_rate": 0.00011744653481267085, "loss": 0.0287, "step": 3258 }, { "epoch": 4.71, "learning_rate": 0.00011741437530149542, "loss": 0.0102, "step": 3259 }, { "epoch": 4.71, "learning_rate": 0.00011738221579031999, "loss": 0.1592, "step": 3260 }, { "epoch": 4.72, "learning_rate": 0.00011735005627914457, "loss": 0.0215, "step": 3261 }, { "epoch": 4.72, "learning_rate": 0.00011731789676796914, "loss": 0.0673, "step": 3262 }, { "epoch": 4.72, "learning_rate": 0.0001172857372567937, "loss": 0.134, "step": 3263 }, { "epoch": 4.72, "learning_rate": 0.00011725357774561827, "loss": 0.167, "step": 3264 }, { "epoch": 4.72, "learning_rate": 0.00011722141823444284, "loss": 0.1841, "step": 3265 }, { "epoch": 4.72, "learning_rate": 0.0001171892587232674, "loss": 0.0464, "step": 3266 }, { "epoch": 4.72, "learning_rate": 0.000117157099212092, "loss": 0.161, "step": 3267 }, { "epoch": 4.73, "learning_rate": 0.00011712493970091655, "loss": 0.1211, "step": 3268 }, { "epoch": 4.73, "learning_rate": 0.00011709278018974112, "loss": 0.0263, "step": 3269 }, { "epoch": 4.73, "learning_rate": 0.0001170606206785657, "loss": 0.0316, "step": 3270 }, { "epoch": 4.73, "learning_rate": 0.00011702846116739026, "loss": 0.0596, "step": 3271 }, { "epoch": 4.73, "learning_rate": 0.00011699630165621482, "loss": 0.1389, "step": 3272 }, { "epoch": 4.73, "learning_rate": 0.00011696414214503939, "loss": 0.0166, "step": 3273 }, { "epoch": 4.73, "learning_rate": 0.00011693198263386398, "loss": 0.1619, "step": 3274 }, { "epoch": 4.74, "learning_rate": 0.00011689982312268855, "loss": 0.0945, "step": 3275 }, { "epoch": 4.74, "learning_rate": 0.00011686766361151312, "loss": 0.1208, "step": 3276 }, { "epoch": 4.74, "learning_rate": 0.00011683550410033767, "loss": 0.0483, "step": 3277 }, { "epoch": 4.74, "learning_rate": 0.00011680334458916224, "loss": 0.0106, "step": 3278 }, { "epoch": 4.74, "learning_rate": 0.00011677118507798681, "loss": 0.0903, "step": 3279 }, { "epoch": 4.74, "learning_rate": 0.0001167390255668114, "loss": 0.1836, "step": 3280 }, { "epoch": 4.74, "learning_rate": 0.00011670686605563597, "loss": 0.0889, "step": 3281 }, { "epoch": 4.75, "learning_rate": 0.00011667470654446053, "loss": 0.1365, "step": 3282 }, { "epoch": 4.75, "learning_rate": 0.0001166425470332851, "loss": 0.2114, "step": 3283 }, { "epoch": 4.75, "learning_rate": 0.00011661038752210967, "loss": 0.076, "step": 3284 }, { "epoch": 4.75, "learning_rate": 0.00011657822801093424, "loss": 0.0027, "step": 3285 }, { "epoch": 4.75, "learning_rate": 0.0001165460684997588, "loss": 0.012, "step": 3286 }, { "epoch": 4.75, "learning_rate": 0.00011651390898858339, "loss": 0.0945, "step": 3287 }, { "epoch": 4.75, "learning_rate": 0.00011648174947740795, "loss": 0.0659, "step": 3288 }, { "epoch": 4.76, "learning_rate": 0.00011644958996623252, "loss": 0.0052, "step": 3289 }, { "epoch": 4.76, "learning_rate": 0.00011641743045505709, "loss": 0.2563, "step": 3290 }, { "epoch": 4.76, "learning_rate": 0.00011638527094388165, "loss": 0.1082, "step": 3291 }, { "epoch": 4.76, "learning_rate": 0.00011635311143270622, "loss": 0.1584, "step": 3292 }, { "epoch": 4.76, "learning_rate": 0.0001163209519215308, "loss": 0.021, "step": 3293 }, { "epoch": 4.76, "learning_rate": 0.00011628879241035537, "loss": 0.137, "step": 3294 }, { "epoch": 4.77, "learning_rate": 0.00011625663289917994, "loss": 0.0308, "step": 3295 }, { "epoch": 4.77, "learning_rate": 0.00011622447338800451, "loss": 0.1475, "step": 3296 }, { "epoch": 4.77, "learning_rate": 0.00011619231387682907, "loss": 0.1608, "step": 3297 }, { "epoch": 4.77, "learning_rate": 0.00011616015436565364, "loss": 0.0366, "step": 3298 }, { "epoch": 4.77, "learning_rate": 0.00011612799485447822, "loss": 0.1346, "step": 3299 }, { "epoch": 4.77, "learning_rate": 0.0001160958353433028, "loss": 0.1233, "step": 3300 }, { "epoch": 4.77, "learning_rate": 0.00011606367583212737, "loss": 0.0077, "step": 3301 }, { "epoch": 4.78, "learning_rate": 0.00011603151632095192, "loss": 0.0181, "step": 3302 }, { "epoch": 4.78, "learning_rate": 0.0001159993568097765, "loss": 0.032, "step": 3303 }, { "epoch": 4.78, "learning_rate": 0.00011596719729860106, "loss": 0.0104, "step": 3304 }, { "epoch": 4.78, "learning_rate": 0.00011593503778742562, "loss": 0.1782, "step": 3305 }, { "epoch": 4.78, "learning_rate": 0.00011590287827625022, "loss": 0.1862, "step": 3306 }, { "epoch": 4.78, "learning_rate": 0.00011587071876507478, "loss": 0.0157, "step": 3307 }, { "epoch": 4.78, "learning_rate": 0.00011583855925389935, "loss": 0.197, "step": 3308 }, { "epoch": 4.79, "learning_rate": 0.00011580639974272392, "loss": 0.0056, "step": 3309 }, { "epoch": 4.79, "learning_rate": 0.00011577424023154849, "loss": 0.1137, "step": 3310 }, { "epoch": 4.79, "learning_rate": 0.00011574208072037304, "loss": 0.1819, "step": 3311 }, { "epoch": 4.79, "learning_rate": 0.00011570992120919764, "loss": 0.0659, "step": 3312 }, { "epoch": 4.79, "learning_rate": 0.0001156777616980222, "loss": 0.165, "step": 3313 }, { "epoch": 4.79, "learning_rate": 0.00011564560218684677, "loss": 0.0401, "step": 3314 }, { "epoch": 4.79, "learning_rate": 0.00011561344267567134, "loss": 0.035, "step": 3315 }, { "epoch": 4.8, "learning_rate": 0.0001155812831644959, "loss": 0.1002, "step": 3316 }, { "epoch": 4.8, "learning_rate": 0.00011554912365332047, "loss": 0.1362, "step": 3317 }, { "epoch": 4.8, "learning_rate": 0.00011551696414214504, "loss": 0.0072, "step": 3318 }, { "epoch": 4.8, "learning_rate": 0.00011548480463096962, "loss": 0.0773, "step": 3319 }, { "epoch": 4.8, "learning_rate": 0.00011545264511979419, "loss": 0.0059, "step": 3320 }, { "epoch": 4.8, "learning_rate": 0.00011542048560861875, "loss": 0.037, "step": 3321 }, { "epoch": 4.8, "learning_rate": 0.00011538832609744332, "loss": 0.0677, "step": 3322 }, { "epoch": 4.81, "learning_rate": 0.00011535616658626789, "loss": 0.059, "step": 3323 }, { "epoch": 4.81, "learning_rate": 0.00011532400707509246, "loss": 0.0963, "step": 3324 }, { "epoch": 4.81, "learning_rate": 0.00011529184756391704, "loss": 0.0848, "step": 3325 }, { "epoch": 4.81, "learning_rate": 0.00011525968805274162, "loss": 0.0204, "step": 3326 }, { "epoch": 4.81, "learning_rate": 0.00011522752854156617, "loss": 0.0409, "step": 3327 }, { "epoch": 4.81, "learning_rate": 0.00011519536903039074, "loss": 0.0273, "step": 3328 }, { "epoch": 4.81, "learning_rate": 0.00011516320951921531, "loss": 0.0704, "step": 3329 }, { "epoch": 4.82, "learning_rate": 0.00011513105000803987, "loss": 0.0218, "step": 3330 }, { "epoch": 4.82, "learning_rate": 0.00011509889049686444, "loss": 0.1216, "step": 3331 }, { "epoch": 4.82, "learning_rate": 0.00011506673098568902, "loss": 0.0228, "step": 3332 }, { "epoch": 4.82, "learning_rate": 0.0001150345714745136, "loss": 0.0019, "step": 3333 }, { "epoch": 4.82, "learning_rate": 0.00011500241196333817, "loss": 0.0899, "step": 3334 }, { "epoch": 4.82, "learning_rate": 0.00011497025245216274, "loss": 0.1477, "step": 3335 }, { "epoch": 4.82, "learning_rate": 0.0001149380929409873, "loss": 0.0353, "step": 3336 }, { "epoch": 4.83, "learning_rate": 0.00011490593342981186, "loss": 0.1909, "step": 3337 }, { "epoch": 4.83, "learning_rate": 0.00011487377391863645, "loss": 0.0306, "step": 3338 }, { "epoch": 4.83, "learning_rate": 0.00011484161440746102, "loss": 0.165, "step": 3339 }, { "epoch": 4.83, "learning_rate": 0.00011480945489628559, "loss": 0.1074, "step": 3340 }, { "epoch": 4.83, "learning_rate": 0.00011477729538511015, "loss": 0.0317, "step": 3341 }, { "epoch": 4.83, "learning_rate": 0.00011474513587393472, "loss": 0.0163, "step": 3342 }, { "epoch": 4.83, "learning_rate": 0.00011471297636275929, "loss": 0.0335, "step": 3343 }, { "epoch": 4.84, "learning_rate": 0.00011468081685158387, "loss": 0.1794, "step": 3344 }, { "epoch": 4.84, "learning_rate": 0.00011464865734040844, "loss": 0.0469, "step": 3345 }, { "epoch": 4.84, "learning_rate": 0.000114616497829233, "loss": 0.0563, "step": 3346 }, { "epoch": 4.84, "learning_rate": 0.00011458433831805757, "loss": 0.024, "step": 3347 }, { "epoch": 4.84, "learning_rate": 0.00011455217880688214, "loss": 0.127, "step": 3348 }, { "epoch": 4.84, "learning_rate": 0.00011452001929570671, "loss": 0.0812, "step": 3349 }, { "epoch": 4.84, "learning_rate": 0.00011448785978453127, "loss": 0.0544, "step": 3350 }, { "epoch": 4.85, "learning_rate": 0.00011445570027335586, "loss": 0.0784, "step": 3351 }, { "epoch": 4.85, "learning_rate": 0.00011442354076218042, "loss": 0.1711, "step": 3352 }, { "epoch": 4.85, "learning_rate": 0.00011439138125100499, "loss": 0.1221, "step": 3353 }, { "epoch": 4.85, "learning_rate": 0.00011435922173982956, "loss": 0.0096, "step": 3354 }, { "epoch": 4.85, "learning_rate": 0.00011432706222865412, "loss": 0.0137, "step": 3355 }, { "epoch": 4.85, "learning_rate": 0.00011429490271747869, "loss": 0.0561, "step": 3356 }, { "epoch": 4.85, "learning_rate": 0.00011426274320630327, "loss": 0.1108, "step": 3357 }, { "epoch": 4.86, "learning_rate": 0.00011423058369512784, "loss": 0.2173, "step": 3358 }, { "epoch": 4.86, "learning_rate": 0.00011419842418395242, "loss": 0.1934, "step": 3359 }, { "epoch": 4.86, "learning_rate": 0.00011416626467277699, "loss": 0.0114, "step": 3360 }, { "epoch": 4.86, "learning_rate": 0.00011413410516160154, "loss": 0.0213, "step": 3361 }, { "epoch": 4.86, "learning_rate": 0.00011410194565042611, "loss": 0.0022, "step": 3362 }, { "epoch": 4.86, "learning_rate": 0.00011406978613925068, "loss": 0.1525, "step": 3363 }, { "epoch": 4.86, "learning_rate": 0.00011403762662807527, "loss": 0.0498, "step": 3364 }, { "epoch": 4.87, "learning_rate": 0.00011400546711689984, "loss": 0.1116, "step": 3365 }, { "epoch": 4.87, "learning_rate": 0.0001139733076057244, "loss": 0.0052, "step": 3366 }, { "epoch": 4.87, "learning_rate": 0.00011394114809454897, "loss": 0.0176, "step": 3367 }, { "epoch": 4.87, "learning_rate": 0.00011390898858337354, "loss": 0.0986, "step": 3368 }, { "epoch": 4.87, "learning_rate": 0.00011387682907219809, "loss": 0.1611, "step": 3369 }, { "epoch": 4.87, "learning_rate": 0.00011384466956102269, "loss": 0.0601, "step": 3370 }, { "epoch": 4.87, "learning_rate": 0.00011381251004984725, "loss": 0.0545, "step": 3371 }, { "epoch": 4.88, "learning_rate": 0.00011378035053867182, "loss": 0.2225, "step": 3372 }, { "epoch": 4.88, "learning_rate": 0.00011374819102749639, "loss": 0.2065, "step": 3373 }, { "epoch": 4.88, "learning_rate": 0.00011371603151632096, "loss": 0.0424, "step": 3374 }, { "epoch": 4.88, "learning_rate": 0.00011368387200514552, "loss": 0.0454, "step": 3375 }, { "epoch": 4.88, "learning_rate": 0.00011365171249397009, "loss": 0.1577, "step": 3376 }, { "epoch": 4.88, "learning_rate": 0.00011361955298279467, "loss": 0.0411, "step": 3377 }, { "epoch": 4.89, "learning_rate": 0.00011358739347161924, "loss": 0.0115, "step": 3378 }, { "epoch": 4.89, "learning_rate": 0.00011355523396044381, "loss": 0.1567, "step": 3379 }, { "epoch": 4.89, "learning_rate": 0.00011352307444926837, "loss": 0.1769, "step": 3380 }, { "epoch": 4.89, "learning_rate": 0.00011349091493809294, "loss": 0.1533, "step": 3381 }, { "epoch": 4.89, "learning_rate": 0.00011345875542691751, "loss": 0.0079, "step": 3382 }, { "epoch": 4.89, "learning_rate": 0.0001134265959157421, "loss": 0.1816, "step": 3383 }, { "epoch": 4.89, "learning_rate": 0.00011339443640456666, "loss": 0.1786, "step": 3384 }, { "epoch": 4.9, "learning_rate": 0.00011336227689339123, "loss": 0.0121, "step": 3385 }, { "epoch": 4.9, "learning_rate": 0.00011333011738221579, "loss": 0.0194, "step": 3386 }, { "epoch": 4.9, "learning_rate": 0.00011329795787104036, "loss": 0.0862, "step": 3387 }, { "epoch": 4.9, "learning_rate": 0.00011326579835986493, "loss": 0.023, "step": 3388 }, { "epoch": 4.9, "learning_rate": 0.00011323363884868952, "loss": 0.1265, "step": 3389 }, { "epoch": 4.9, "learning_rate": 0.00011320147933751409, "loss": 0.0904, "step": 3390 }, { "epoch": 4.9, "learning_rate": 0.00011316931982633864, "loss": 0.0191, "step": 3391 }, { "epoch": 4.91, "learning_rate": 0.00011313716031516321, "loss": 0.1133, "step": 3392 }, { "epoch": 4.91, "learning_rate": 0.00011310500080398779, "loss": 0.0426, "step": 3393 }, { "epoch": 4.91, "learning_rate": 0.00011307284129281234, "loss": 0.0226, "step": 3394 }, { "epoch": 4.91, "learning_rate": 0.00011304068178163691, "loss": 0.1357, "step": 3395 }, { "epoch": 4.91, "learning_rate": 0.0001130085222704615, "loss": 0.0037, "step": 3396 }, { "epoch": 4.91, "learning_rate": 0.00011297636275928607, "loss": 0.0677, "step": 3397 }, { "epoch": 4.91, "learning_rate": 0.00011294420324811064, "loss": 0.0869, "step": 3398 }, { "epoch": 4.92, "learning_rate": 0.00011291204373693521, "loss": 0.0239, "step": 3399 }, { "epoch": 4.92, "learning_rate": 0.00011287988422575977, "loss": 0.1428, "step": 3400 }, { "epoch": 4.92, "learning_rate": 0.00011284772471458434, "loss": 0.0458, "step": 3401 }, { "epoch": 4.92, "learning_rate": 0.00011281556520340892, "loss": 0.1279, "step": 3402 }, { "epoch": 4.92, "learning_rate": 0.00011278340569223349, "loss": 0.0145, "step": 3403 }, { "epoch": 4.92, "learning_rate": 0.00011275124618105806, "loss": 0.0284, "step": 3404 }, { "epoch": 4.92, "learning_rate": 0.00011271908666988262, "loss": 0.2061, "step": 3405 }, { "epoch": 4.93, "learning_rate": 0.00011268692715870719, "loss": 0.2148, "step": 3406 }, { "epoch": 4.93, "learning_rate": 0.00011265476764753176, "loss": 0.0876, "step": 3407 }, { "epoch": 4.93, "learning_rate": 0.00011262260813635632, "loss": 0.2207, "step": 3408 }, { "epoch": 4.93, "learning_rate": 0.00011259044862518091, "loss": 0.093, "step": 3409 }, { "epoch": 4.93, "learning_rate": 0.00011255828911400548, "loss": 0.1113, "step": 3410 }, { "epoch": 4.93, "learning_rate": 0.00011252612960283004, "loss": 0.0114, "step": 3411 }, { "epoch": 4.93, "learning_rate": 0.00011249397009165461, "loss": 0.1272, "step": 3412 }, { "epoch": 4.94, "learning_rate": 0.00011246181058047918, "loss": 0.158, "step": 3413 }, { "epoch": 4.94, "learning_rate": 0.00011242965106930374, "loss": 0.0859, "step": 3414 }, { "epoch": 4.94, "learning_rate": 0.00011239749155812834, "loss": 0.1205, "step": 3415 }, { "epoch": 4.94, "learning_rate": 0.0001123653320469529, "loss": 0.0398, "step": 3416 }, { "epoch": 4.94, "learning_rate": 0.00011233317253577746, "loss": 0.0214, "step": 3417 }, { "epoch": 4.94, "learning_rate": 0.00011230101302460203, "loss": 0.0473, "step": 3418 }, { "epoch": 4.94, "learning_rate": 0.00011226885351342659, "loss": 0.059, "step": 3419 }, { "epoch": 4.95, "learning_rate": 0.00011223669400225116, "loss": 0.1176, "step": 3420 }, { "epoch": 4.95, "learning_rate": 0.00011220453449107573, "loss": 0.1257, "step": 3421 }, { "epoch": 4.95, "learning_rate": 0.00011217237497990032, "loss": 0.0118, "step": 3422 }, { "epoch": 4.95, "learning_rate": 0.00011214021546872489, "loss": 0.0152, "step": 3423 }, { "epoch": 4.95, "learning_rate": 0.00011210805595754946, "loss": 0.0901, "step": 3424 }, { "epoch": 4.95, "learning_rate": 0.00011207589644637401, "loss": 0.0112, "step": 3425 }, { "epoch": 4.95, "learning_rate": 0.00011204373693519859, "loss": 0.0093, "step": 3426 }, { "epoch": 4.96, "learning_rate": 0.00011201157742402316, "loss": 0.087, "step": 3427 }, { "epoch": 4.96, "learning_rate": 0.00011197941791284774, "loss": 0.1232, "step": 3428 }, { "epoch": 4.96, "learning_rate": 0.00011194725840167231, "loss": 0.1061, "step": 3429 }, { "epoch": 4.96, "learning_rate": 0.00011191509889049687, "loss": 0.024, "step": 3430 }, { "epoch": 4.96, "learning_rate": 0.00011188293937932144, "loss": 0.0859, "step": 3431 }, { "epoch": 4.96, "learning_rate": 0.00011185077986814601, "loss": 0.0286, "step": 3432 }, { "epoch": 4.96, "learning_rate": 0.00011181862035697057, "loss": 0.1498, "step": 3433 }, { "epoch": 4.97, "learning_rate": 0.00011178646084579516, "loss": 0.0741, "step": 3434 }, { "epoch": 4.97, "learning_rate": 0.00011175430133461972, "loss": 0.0199, "step": 3435 }, { "epoch": 4.97, "learning_rate": 0.00011172214182344429, "loss": 0.0536, "step": 3436 }, { "epoch": 4.97, "learning_rate": 0.00011168998231226886, "loss": 0.0418, "step": 3437 }, { "epoch": 4.97, "learning_rate": 0.00011165782280109343, "loss": 0.0745, "step": 3438 }, { "epoch": 4.97, "learning_rate": 0.00011162566328991799, "loss": 0.1021, "step": 3439 }, { "epoch": 4.97, "learning_rate": 0.00011159350377874256, "loss": 0.0029, "step": 3440 }, { "epoch": 4.98, "learning_rate": 0.00011156134426756714, "loss": 0.0058, "step": 3441 }, { "epoch": 4.98, "learning_rate": 0.00011152918475639171, "loss": 0.0101, "step": 3442 }, { "epoch": 4.98, "learning_rate": 0.00011149702524521628, "loss": 0.0162, "step": 3443 }, { "epoch": 4.98, "learning_rate": 0.00011146486573404084, "loss": 0.0014, "step": 3444 }, { "epoch": 4.98, "learning_rate": 0.00011143270622286541, "loss": 0.0255, "step": 3445 }, { "epoch": 4.98, "learning_rate": 0.00011140054671168998, "loss": 0.0141, "step": 3446 }, { "epoch": 4.98, "learning_rate": 0.00011136838720051457, "loss": 0.0119, "step": 3447 }, { "epoch": 4.99, "learning_rate": 0.00011133622768933914, "loss": 0.0092, "step": 3448 }, { "epoch": 4.99, "learning_rate": 0.00011130406817816371, "loss": 0.0736, "step": 3449 }, { "epoch": 4.99, "learning_rate": 0.00011127190866698826, "loss": 0.1072, "step": 3450 }, { "epoch": 4.99, "learning_rate": 0.00011123974915581283, "loss": 0.0139, "step": 3451 }, { "epoch": 4.99, "learning_rate": 0.0001112075896446374, "loss": 0.0187, "step": 3452 }, { "epoch": 4.99, "learning_rate": 0.00011117543013346196, "loss": 0.0154, "step": 3453 }, { "epoch": 4.99, "learning_rate": 0.00011114327062228656, "loss": 0.001, "step": 3454 }, { "epoch": 5.0, "learning_rate": 0.00011111111111111112, "loss": 0.0146, "step": 3455 }, { "epoch": 5.0, "learning_rate": 0.00011107895159993569, "loss": 0.1416, "step": 3456 }, { "epoch": 5.0, "learning_rate": 0.00011104679208876026, "loss": 0.0823, "step": 3457 }, { "epoch": 5.0, "learning_rate": 0.00011101463257758481, "loss": 0.0003, "step": 3458 }, { "epoch": 5.0, "learning_rate": 0.00011098247306640938, "loss": 0.0021, "step": 3459 }, { "epoch": 5.0, "learning_rate": 0.00011095031355523397, "loss": 0.0128, "step": 3460 }, { "epoch": 5.01, "learning_rate": 0.00011091815404405854, "loss": 0.0748, "step": 3461 }, { "epoch": 5.01, "learning_rate": 0.00011088599453288311, "loss": 0.0283, "step": 3462 }, { "epoch": 5.01, "learning_rate": 0.00011085383502170768, "loss": 0.0089, "step": 3463 }, { "epoch": 5.01, "learning_rate": 0.00011082167551053224, "loss": 0.0033, "step": 3464 }, { "epoch": 5.01, "learning_rate": 0.00011078951599935681, "loss": 0.0216, "step": 3465 }, { "epoch": 5.01, "learning_rate": 0.00011075735648818138, "loss": 0.0742, "step": 3466 }, { "epoch": 5.01, "learning_rate": 0.00011072519697700596, "loss": 0.0464, "step": 3467 }, { "epoch": 5.02, "learning_rate": 0.00011069303746583053, "loss": 0.0029, "step": 3468 }, { "epoch": 5.02, "learning_rate": 0.00011066087795465509, "loss": 0.0112, "step": 3469 }, { "epoch": 5.02, "learning_rate": 0.00011062871844347966, "loss": 0.0459, "step": 3470 }, { "epoch": 5.02, "learning_rate": 0.00011059655893230423, "loss": 0.1023, "step": 3471 }, { "epoch": 5.02, "learning_rate": 0.00011056439942112879, "loss": 0.0056, "step": 3472 }, { "epoch": 5.02, "learning_rate": 0.00011053223990995339, "loss": 0.0555, "step": 3473 }, { "epoch": 5.02, "learning_rate": 0.00011050008039877796, "loss": 0.0039, "step": 3474 }, { "epoch": 5.03, "learning_rate": 0.00011046792088760251, "loss": 0.0802, "step": 3475 }, { "epoch": 5.03, "learning_rate": 0.00011043576137642708, "loss": 0.0959, "step": 3476 }, { "epoch": 5.03, "learning_rate": 0.00011040360186525165, "loss": 0.0032, "step": 3477 }, { "epoch": 5.03, "learning_rate": 0.00011037144235407621, "loss": 0.0699, "step": 3478 }, { "epoch": 5.03, "learning_rate": 0.00011033928284290081, "loss": 0.0049, "step": 3479 }, { "epoch": 5.03, "learning_rate": 0.00011030712333172537, "loss": 0.1417, "step": 3480 }, { "epoch": 5.03, "learning_rate": 0.00011027496382054994, "loss": 0.0177, "step": 3481 }, { "epoch": 5.04, "learning_rate": 0.0001102428043093745, "loss": 0.0202, "step": 3482 }, { "epoch": 5.04, "learning_rate": 0.00011021064479819906, "loss": 0.1257, "step": 3483 }, { "epoch": 5.04, "learning_rate": 0.00011017848528702363, "loss": 0.0087, "step": 3484 }, { "epoch": 5.04, "learning_rate": 0.0001101463257758482, "loss": 0.0484, "step": 3485 }, { "epoch": 5.04, "learning_rate": 0.00011011416626467279, "loss": 0.0444, "step": 3486 }, { "epoch": 5.04, "learning_rate": 0.00011008200675349736, "loss": 0.118, "step": 3487 }, { "epoch": 5.04, "learning_rate": 0.00011004984724232193, "loss": 0.0774, "step": 3488 }, { "epoch": 5.05, "learning_rate": 0.00011001768773114649, "loss": 0.0127, "step": 3489 }, { "epoch": 5.05, "learning_rate": 0.00010998552821997106, "loss": 0.0308, "step": 3490 }, { "epoch": 5.05, "learning_rate": 0.00010995336870879563, "loss": 0.026, "step": 3491 }, { "epoch": 5.05, "learning_rate": 0.00010992120919762021, "loss": 0.0363, "step": 3492 }, { "epoch": 5.05, "learning_rate": 0.00010988904968644478, "loss": 0.0382, "step": 3493 }, { "epoch": 5.05, "learning_rate": 0.00010985689017526934, "loss": 0.0124, "step": 3494 }, { "epoch": 5.05, "learning_rate": 0.00010982473066409391, "loss": 0.0321, "step": 3495 }, { "epoch": 5.06, "learning_rate": 0.00010979257115291848, "loss": 0.0271, "step": 3496 }, { "epoch": 5.06, "learning_rate": 0.00010976041164174304, "loss": 0.0049, "step": 3497 }, { "epoch": 5.06, "learning_rate": 0.00010972825213056761, "loss": 0.1907, "step": 3498 }, { "epoch": 5.06, "learning_rate": 0.0001096960926193922, "loss": 0.0617, "step": 3499 }, { "epoch": 5.06, "learning_rate": 0.00010966393310821676, "loss": 0.0015, "step": 3500 }, { "epoch": 5.06, "learning_rate": 0.00010963177359704133, "loss": 0.0085, "step": 3501 }, { "epoch": 5.06, "learning_rate": 0.0001095996140858659, "loss": 0.0366, "step": 3502 }, { "epoch": 5.07, "learning_rate": 0.00010956745457469046, "loss": 0.0118, "step": 3503 }, { "epoch": 5.07, "learning_rate": 0.00010953529506351503, "loss": 0.063, "step": 3504 }, { "epoch": 5.07, "learning_rate": 0.00010950313555233961, "loss": 0.0116, "step": 3505 }, { "epoch": 5.07, "learning_rate": 0.00010947097604116419, "loss": 0.0113, "step": 3506 }, { "epoch": 5.07, "learning_rate": 0.00010943881652998876, "loss": 0.062, "step": 3507 }, { "epoch": 5.07, "learning_rate": 0.00010940665701881331, "loss": 0.0054, "step": 3508 }, { "epoch": 5.07, "learning_rate": 0.00010937449750763788, "loss": 0.113, "step": 3509 }, { "epoch": 5.08, "learning_rate": 0.00010934233799646245, "loss": 0.0588, "step": 3510 }, { "epoch": 5.08, "learning_rate": 0.00010931017848528702, "loss": 0.1572, "step": 3511 }, { "epoch": 5.08, "learning_rate": 0.00010927801897411161, "loss": 0.0124, "step": 3512 }, { "epoch": 5.08, "learning_rate": 0.00010924585946293618, "loss": 0.1279, "step": 3513 }, { "epoch": 5.08, "learning_rate": 0.00010921369995176074, "loss": 0.007, "step": 3514 }, { "epoch": 5.08, "learning_rate": 0.0001091815404405853, "loss": 0.0463, "step": 3515 }, { "epoch": 5.08, "learning_rate": 0.00010914938092940988, "loss": 0.0131, "step": 3516 }, { "epoch": 5.09, "learning_rate": 0.00010911722141823443, "loss": 0.1027, "step": 3517 }, { "epoch": 5.09, "learning_rate": 0.00010908506190705903, "loss": 0.0484, "step": 3518 }, { "epoch": 5.09, "learning_rate": 0.00010905290239588359, "loss": 0.0016, "step": 3519 }, { "epoch": 5.09, "learning_rate": 0.00010902074288470816, "loss": 0.0063, "step": 3520 }, { "epoch": 5.09, "learning_rate": 0.00010898858337353273, "loss": 0.0544, "step": 3521 }, { "epoch": 5.09, "learning_rate": 0.00010895642386235729, "loss": 0.0453, "step": 3522 }, { "epoch": 5.09, "learning_rate": 0.00010892426435118186, "loss": 0.0369, "step": 3523 }, { "epoch": 5.1, "learning_rate": 0.00010889210484000645, "loss": 0.0325, "step": 3524 }, { "epoch": 5.1, "learning_rate": 0.00010885994532883101, "loss": 0.0135, "step": 3525 }, { "epoch": 5.1, "learning_rate": 0.00010882778581765558, "loss": 0.0237, "step": 3526 }, { "epoch": 5.1, "learning_rate": 0.00010879562630648015, "loss": 0.1377, "step": 3527 }, { "epoch": 5.1, "learning_rate": 0.00010876346679530471, "loss": 0.0163, "step": 3528 }, { "epoch": 5.1, "learning_rate": 0.00010873130728412928, "loss": 0.0663, "step": 3529 }, { "epoch": 5.1, "learning_rate": 0.00010869914777295385, "loss": 0.0075, "step": 3530 }, { "epoch": 5.11, "learning_rate": 0.00010866698826177843, "loss": 0.0183, "step": 3531 }, { "epoch": 5.11, "learning_rate": 0.000108634828750603, "loss": 0.1311, "step": 3532 }, { "epoch": 5.11, "learning_rate": 0.00010860266923942756, "loss": 0.0321, "step": 3533 }, { "epoch": 5.11, "learning_rate": 0.00010857050972825213, "loss": 0.0177, "step": 3534 }, { "epoch": 5.11, "learning_rate": 0.0001085383502170767, "loss": 0.0413, "step": 3535 }, { "epoch": 5.11, "learning_rate": 0.00010850619070590127, "loss": 0.0012, "step": 3536 }, { "epoch": 5.11, "learning_rate": 0.00010847403119472586, "loss": 0.027, "step": 3537 }, { "epoch": 5.12, "learning_rate": 0.00010844187168355043, "loss": 0.0486, "step": 3538 }, { "epoch": 5.12, "learning_rate": 0.00010840971217237499, "loss": 0.0388, "step": 3539 }, { "epoch": 5.12, "learning_rate": 0.00010837755266119956, "loss": 0.0203, "step": 3540 }, { "epoch": 5.12, "learning_rate": 0.00010834539315002413, "loss": 0.1855, "step": 3541 }, { "epoch": 5.12, "learning_rate": 0.00010831323363884868, "loss": 0.1627, "step": 3542 }, { "epoch": 5.12, "learning_rate": 0.00010828107412767325, "loss": 0.1836, "step": 3543 }, { "epoch": 5.13, "learning_rate": 0.00010824891461649784, "loss": 0.0114, "step": 3544 }, { "epoch": 5.13, "learning_rate": 0.00010821675510532241, "loss": 0.0381, "step": 3545 }, { "epoch": 5.13, "learning_rate": 0.00010818459559414698, "loss": 0.0021, "step": 3546 }, { "epoch": 5.13, "learning_rate": 0.00010815243608297154, "loss": 0.0488, "step": 3547 }, { "epoch": 5.13, "learning_rate": 0.0001081202765717961, "loss": 0.084, "step": 3548 }, { "epoch": 5.13, "learning_rate": 0.00010808811706062068, "loss": 0.0801, "step": 3549 }, { "epoch": 5.13, "learning_rate": 0.00010805595754944526, "loss": 0.009, "step": 3550 }, { "epoch": 5.14, "learning_rate": 0.00010802379803826983, "loss": 0.0217, "step": 3551 }, { "epoch": 5.14, "learning_rate": 0.0001079916385270944, "loss": 0.1034, "step": 3552 }, { "epoch": 5.14, "learning_rate": 0.00010795947901591896, "loss": 0.0074, "step": 3553 }, { "epoch": 5.14, "learning_rate": 0.00010792731950474353, "loss": 0.0073, "step": 3554 }, { "epoch": 5.14, "learning_rate": 0.0001078951599935681, "loss": 0.0347, "step": 3555 }, { "epoch": 5.14, "learning_rate": 0.00010786300048239266, "loss": 0.0096, "step": 3556 }, { "epoch": 5.14, "learning_rate": 0.00010783084097121725, "loss": 0.0926, "step": 3557 }, { "epoch": 5.15, "learning_rate": 0.00010779868146004181, "loss": 0.0865, "step": 3558 }, { "epoch": 5.15, "learning_rate": 0.00010776652194886638, "loss": 0.0015, "step": 3559 }, { "epoch": 5.15, "learning_rate": 0.00010773436243769095, "loss": 0.0687, "step": 3560 }, { "epoch": 5.15, "learning_rate": 0.00010770220292651552, "loss": 0.0201, "step": 3561 }, { "epoch": 5.15, "learning_rate": 0.00010767004341534008, "loss": 0.0847, "step": 3562 }, { "epoch": 5.15, "learning_rate": 0.00010763788390416468, "loss": 0.118, "step": 3563 }, { "epoch": 5.15, "learning_rate": 0.00010760572439298923, "loss": 0.0553, "step": 3564 }, { "epoch": 5.16, "learning_rate": 0.0001075735648818138, "loss": 0.0298, "step": 3565 }, { "epoch": 5.16, "learning_rate": 0.00010754140537063838, "loss": 0.0544, "step": 3566 }, { "epoch": 5.16, "learning_rate": 0.00010750924585946293, "loss": 0.1294, "step": 3567 }, { "epoch": 5.16, "learning_rate": 0.0001074770863482875, "loss": 0.0237, "step": 3568 }, { "epoch": 5.16, "learning_rate": 0.00010744492683711209, "loss": 0.0419, "step": 3569 }, { "epoch": 5.16, "learning_rate": 0.00010741276732593666, "loss": 0.0197, "step": 3570 }, { "epoch": 5.16, "learning_rate": 0.00010738060781476123, "loss": 0.1235, "step": 3571 }, { "epoch": 5.17, "learning_rate": 0.00010734844830358579, "loss": 0.0557, "step": 3572 }, { "epoch": 5.17, "learning_rate": 0.00010731628879241036, "loss": 0.0676, "step": 3573 }, { "epoch": 5.17, "learning_rate": 0.00010728412928123493, "loss": 0.0238, "step": 3574 }, { "epoch": 5.17, "learning_rate": 0.0001072519697700595, "loss": 0.0654, "step": 3575 }, { "epoch": 5.17, "learning_rate": 0.00010721981025888408, "loss": 0.1082, "step": 3576 }, { "epoch": 5.17, "learning_rate": 0.00010718765074770865, "loss": 0.0219, "step": 3577 }, { "epoch": 5.17, "learning_rate": 0.00010715549123653321, "loss": 0.0705, "step": 3578 }, { "epoch": 5.18, "learning_rate": 0.00010712333172535778, "loss": 0.004, "step": 3579 }, { "epoch": 5.18, "learning_rate": 0.00010709117221418235, "loss": 0.0497, "step": 3580 }, { "epoch": 5.18, "learning_rate": 0.0001070590127030069, "loss": 0.0288, "step": 3581 }, { "epoch": 5.18, "learning_rate": 0.0001070268531918315, "loss": 0.0069, "step": 3582 }, { "epoch": 5.18, "learning_rate": 0.00010699469368065606, "loss": 0.0952, "step": 3583 }, { "epoch": 5.18, "learning_rate": 0.00010696253416948063, "loss": 0.0187, "step": 3584 }, { "epoch": 5.18, "learning_rate": 0.0001069303746583052, "loss": 0.0955, "step": 3585 }, { "epoch": 5.19, "learning_rate": 0.00010689821514712977, "loss": 0.103, "step": 3586 }, { "epoch": 5.19, "learning_rate": 0.00010686605563595433, "loss": 0.0037, "step": 3587 }, { "epoch": 5.19, "learning_rate": 0.0001068338961247789, "loss": 0.0026, "step": 3588 }, { "epoch": 5.19, "learning_rate": 0.00010680173661360348, "loss": 0.0206, "step": 3589 }, { "epoch": 5.19, "learning_rate": 0.00010676957710242805, "loss": 0.0178, "step": 3590 }, { "epoch": 5.19, "learning_rate": 0.00010673741759125262, "loss": 0.0173, "step": 3591 }, { "epoch": 5.19, "learning_rate": 0.00010670525808007718, "loss": 0.0281, "step": 3592 }, { "epoch": 5.2, "learning_rate": 0.00010667309856890175, "loss": 0.0075, "step": 3593 }, { "epoch": 5.2, "learning_rate": 0.00010664093905772632, "loss": 0.0119, "step": 3594 }, { "epoch": 5.2, "learning_rate": 0.00010660877954655091, "loss": 0.0583, "step": 3595 }, { "epoch": 5.2, "learning_rate": 0.00010657662003537548, "loss": 0.1653, "step": 3596 }, { "epoch": 5.2, "learning_rate": 0.00010654446052420003, "loss": 0.0016, "step": 3597 }, { "epoch": 5.2, "learning_rate": 0.0001065123010130246, "loss": 0.0715, "step": 3598 }, { "epoch": 5.2, "learning_rate": 0.00010648014150184918, "loss": 0.0664, "step": 3599 }, { "epoch": 5.21, "learning_rate": 0.00010644798199067375, "loss": 0.0658, "step": 3600 }, { "epoch": 5.21, "learning_rate": 0.0001064158224794983, "loss": 0.0613, "step": 3601 }, { "epoch": 5.21, "learning_rate": 0.0001063836629683229, "loss": 0.0577, "step": 3602 }, { "epoch": 5.21, "learning_rate": 0.00010635150345714746, "loss": 0.0018, "step": 3603 }, { "epoch": 5.21, "learning_rate": 0.00010631934394597203, "loss": 0.0065, "step": 3604 }, { "epoch": 5.21, "learning_rate": 0.0001062871844347966, "loss": 0.0005, "step": 3605 }, { "epoch": 5.21, "learning_rate": 0.00010625502492362116, "loss": 0.013, "step": 3606 }, { "epoch": 5.22, "learning_rate": 0.00010622286541244573, "loss": 0.0228, "step": 3607 }, { "epoch": 5.22, "learning_rate": 0.00010619070590127031, "loss": 0.0197, "step": 3608 }, { "epoch": 5.22, "learning_rate": 0.00010615854639009488, "loss": 0.0833, "step": 3609 }, { "epoch": 5.22, "learning_rate": 0.00010612638687891945, "loss": 0.0176, "step": 3610 }, { "epoch": 5.22, "learning_rate": 0.00010609422736774401, "loss": 0.0699, "step": 3611 }, { "epoch": 5.22, "learning_rate": 0.00010606206785656858, "loss": 0.004, "step": 3612 }, { "epoch": 5.22, "learning_rate": 0.00010602990834539315, "loss": 0.1128, "step": 3613 }, { "epoch": 5.23, "learning_rate": 0.00010599774883421773, "loss": 0.0725, "step": 3614 }, { "epoch": 5.23, "learning_rate": 0.0001059655893230423, "loss": 0.0675, "step": 3615 }, { "epoch": 5.23, "learning_rate": 0.00010593342981186687, "loss": 0.015, "step": 3616 }, { "epoch": 5.23, "learning_rate": 0.00010590127030069143, "loss": 0.0089, "step": 3617 }, { "epoch": 5.23, "learning_rate": 0.000105869110789516, "loss": 0.0435, "step": 3618 }, { "epoch": 5.23, "learning_rate": 0.00010583695127834057, "loss": 0.0175, "step": 3619 }, { "epoch": 5.23, "learning_rate": 0.00010580479176716513, "loss": 0.0738, "step": 3620 }, { "epoch": 5.24, "learning_rate": 0.00010577263225598973, "loss": 0.0852, "step": 3621 }, { "epoch": 5.24, "learning_rate": 0.00010574047274481428, "loss": 0.0322, "step": 3622 }, { "epoch": 5.24, "learning_rate": 0.00010570831323363885, "loss": 0.1085, "step": 3623 }, { "epoch": 5.24, "learning_rate": 0.00010567615372246342, "loss": 0.0935, "step": 3624 }, { "epoch": 5.24, "learning_rate": 0.000105643994211288, "loss": 0.046, "step": 3625 }, { "epoch": 5.24, "learning_rate": 0.00010561183470011255, "loss": 0.0067, "step": 3626 }, { "epoch": 5.25, "learning_rate": 0.00010557967518893715, "loss": 0.0695, "step": 3627 }, { "epoch": 5.25, "learning_rate": 0.0001055475156777617, "loss": 0.0543, "step": 3628 }, { "epoch": 5.25, "learning_rate": 0.00010551535616658628, "loss": 0.0783, "step": 3629 }, { "epoch": 5.25, "learning_rate": 0.00010548319665541085, "loss": 0.1373, "step": 3630 }, { "epoch": 5.25, "learning_rate": 0.0001054510371442354, "loss": 0.0126, "step": 3631 }, { "epoch": 5.25, "learning_rate": 0.00010541887763305998, "loss": 0.0039, "step": 3632 }, { "epoch": 5.25, "learning_rate": 0.00010538671812188455, "loss": 0.1631, "step": 3633 }, { "epoch": 5.26, "learning_rate": 0.00010535455861070913, "loss": 0.0278, "step": 3634 }, { "epoch": 5.26, "learning_rate": 0.0001053223990995337, "loss": 0.0014, "step": 3635 }, { "epoch": 5.26, "learning_rate": 0.00010529023958835826, "loss": 0.0963, "step": 3636 }, { "epoch": 5.26, "learning_rate": 0.00010525808007718283, "loss": 0.0739, "step": 3637 }, { "epoch": 5.26, "learning_rate": 0.0001052259205660074, "loss": 0.1128, "step": 3638 }, { "epoch": 5.26, "learning_rate": 0.00010519376105483197, "loss": 0.0658, "step": 3639 }, { "epoch": 5.26, "learning_rate": 0.00010516160154365655, "loss": 0.0011, "step": 3640 }, { "epoch": 5.27, "learning_rate": 0.00010512944203248112, "loss": 0.0293, "step": 3641 }, { "epoch": 5.27, "learning_rate": 0.00010509728252130568, "loss": 0.0743, "step": 3642 }, { "epoch": 5.27, "learning_rate": 0.00010506512301013025, "loss": 0.0627, "step": 3643 }, { "epoch": 5.27, "learning_rate": 0.00010503296349895482, "loss": 0.0025, "step": 3644 }, { "epoch": 5.27, "learning_rate": 0.00010500080398777938, "loss": 0.0079, "step": 3645 }, { "epoch": 5.27, "learning_rate": 0.00010496864447660395, "loss": 0.0242, "step": 3646 }, { "epoch": 5.27, "learning_rate": 0.00010493648496542853, "loss": 0.0104, "step": 3647 }, { "epoch": 5.28, "learning_rate": 0.0001049043254542531, "loss": 0.0138, "step": 3648 }, { "epoch": 5.28, "learning_rate": 0.00010487216594307767, "loss": 0.0366, "step": 3649 }, { "epoch": 5.28, "learning_rate": 0.00010484000643190224, "loss": 0.0379, "step": 3650 }, { "epoch": 5.28, "learning_rate": 0.0001048078469207268, "loss": 0.0279, "step": 3651 }, { "epoch": 5.28, "learning_rate": 0.00010477568740955137, "loss": 0.041, "step": 3652 }, { "epoch": 5.28, "learning_rate": 0.00010474352789837596, "loss": 0.0381, "step": 3653 }, { "epoch": 5.28, "learning_rate": 0.00010471136838720053, "loss": 0.0122, "step": 3654 }, { "epoch": 5.29, "learning_rate": 0.0001046792088760251, "loss": 0.0011, "step": 3655 }, { "epoch": 5.29, "learning_rate": 0.00010464704936484965, "loss": 0.0344, "step": 3656 }, { "epoch": 5.29, "learning_rate": 0.00010461488985367422, "loss": 0.0058, "step": 3657 }, { "epoch": 5.29, "learning_rate": 0.0001045827303424988, "loss": 0.044, "step": 3658 }, { "epoch": 5.29, "learning_rate": 0.00010455057083132335, "loss": 0.0014, "step": 3659 }, { "epoch": 5.29, "learning_rate": 0.00010451841132014795, "loss": 0.0141, "step": 3660 }, { "epoch": 5.29, "learning_rate": 0.0001044862518089725, "loss": 0.0003, "step": 3661 }, { "epoch": 5.3, "learning_rate": 0.00010445409229779708, "loss": 0.0139, "step": 3662 }, { "epoch": 5.3, "learning_rate": 0.00010442193278662165, "loss": 0.1423, "step": 3663 }, { "epoch": 5.3, "learning_rate": 0.00010438977327544622, "loss": 0.013, "step": 3664 }, { "epoch": 5.3, "learning_rate": 0.00010435761376427077, "loss": 0.0166, "step": 3665 }, { "epoch": 5.3, "learning_rate": 0.00010432545425309537, "loss": 0.0243, "step": 3666 }, { "epoch": 5.3, "learning_rate": 0.00010429329474191993, "loss": 0.0272, "step": 3667 }, { "epoch": 5.3, "learning_rate": 0.0001042611352307445, "loss": 0.049, "step": 3668 }, { "epoch": 5.31, "learning_rate": 0.00010422897571956907, "loss": 0.0623, "step": 3669 }, { "epoch": 5.31, "learning_rate": 0.00010419681620839363, "loss": 0.0977, "step": 3670 }, { "epoch": 5.31, "learning_rate": 0.0001041646566972182, "loss": 0.0042, "step": 3671 }, { "epoch": 5.31, "learning_rate": 0.00010413249718604278, "loss": 0.0055, "step": 3672 }, { "epoch": 5.31, "learning_rate": 0.00010410033767486735, "loss": 0.0009, "step": 3673 }, { "epoch": 5.31, "learning_rate": 0.00010406817816369192, "loss": 0.0038, "step": 3674 }, { "epoch": 5.31, "learning_rate": 0.0001040360186525165, "loss": 0.0582, "step": 3675 }, { "epoch": 5.32, "learning_rate": 0.00010400385914134105, "loss": 0.175, "step": 3676 }, { "epoch": 5.32, "learning_rate": 0.00010397169963016562, "loss": 0.0062, "step": 3677 }, { "epoch": 5.32, "learning_rate": 0.00010393954011899019, "loss": 0.0052, "step": 3678 }, { "epoch": 5.32, "learning_rate": 0.00010390738060781478, "loss": 0.0734, "step": 3679 }, { "epoch": 5.32, "learning_rate": 0.00010387522109663935, "loss": 0.1667, "step": 3680 }, { "epoch": 5.32, "learning_rate": 0.0001038430615854639, "loss": 0.0698, "step": 3681 }, { "epoch": 5.32, "learning_rate": 0.00010381090207428847, "loss": 0.0544, "step": 3682 }, { "epoch": 5.33, "learning_rate": 0.00010377874256311304, "loss": 0.0926, "step": 3683 }, { "epoch": 5.33, "learning_rate": 0.0001037465830519376, "loss": 0.0192, "step": 3684 }, { "epoch": 5.33, "learning_rate": 0.0001037144235407622, "loss": 0.0342, "step": 3685 }, { "epoch": 5.33, "learning_rate": 0.00010368226402958676, "loss": 0.0476, "step": 3686 }, { "epoch": 5.33, "learning_rate": 0.00010365010451841133, "loss": 0.0613, "step": 3687 }, { "epoch": 5.33, "learning_rate": 0.0001036179450072359, "loss": 0.0038, "step": 3688 }, { "epoch": 5.33, "learning_rate": 0.00010358578549606047, "loss": 0.066, "step": 3689 }, { "epoch": 5.34, "learning_rate": 0.00010355362598488502, "loss": 0.0728, "step": 3690 }, { "epoch": 5.34, "learning_rate": 0.0001035214664737096, "loss": 0.0632, "step": 3691 }, { "epoch": 5.34, "learning_rate": 0.00010348930696253418, "loss": 0.016, "step": 3692 }, { "epoch": 5.34, "learning_rate": 0.00010345714745135875, "loss": 0.0028, "step": 3693 }, { "epoch": 5.34, "learning_rate": 0.00010342498794018332, "loss": 0.0226, "step": 3694 }, { "epoch": 5.34, "learning_rate": 0.00010339282842900788, "loss": 0.015, "step": 3695 }, { "epoch": 5.34, "learning_rate": 0.00010336066891783245, "loss": 0.1165, "step": 3696 }, { "epoch": 5.35, "learning_rate": 0.00010332850940665702, "loss": 0.1357, "step": 3697 }, { "epoch": 5.35, "learning_rate": 0.0001032963498954816, "loss": 0.0324, "step": 3698 }, { "epoch": 5.35, "learning_rate": 0.00010326419038430617, "loss": 0.0636, "step": 3699 }, { "epoch": 5.35, "learning_rate": 0.00010323203087313074, "loss": 0.1609, "step": 3700 }, { "epoch": 5.35, "learning_rate": 0.0001031998713619553, "loss": 0.0018, "step": 3701 }, { "epoch": 5.35, "learning_rate": 0.00010316771185077987, "loss": 0.0273, "step": 3702 }, { "epoch": 5.36, "learning_rate": 0.00010313555233960444, "loss": 0.0206, "step": 3703 }, { "epoch": 5.36, "learning_rate": 0.000103103392828429, "loss": 0.0774, "step": 3704 }, { "epoch": 5.36, "learning_rate": 0.0001030712333172536, "loss": 0.0391, "step": 3705 }, { "epoch": 5.36, "learning_rate": 0.00010303907380607815, "loss": 0.1055, "step": 3706 }, { "epoch": 5.36, "learning_rate": 0.00010300691429490272, "loss": 0.049, "step": 3707 }, { "epoch": 5.36, "learning_rate": 0.0001029747547837273, "loss": 0.0161, "step": 3708 }, { "epoch": 5.36, "learning_rate": 0.00010294259527255185, "loss": 0.0056, "step": 3709 }, { "epoch": 5.37, "learning_rate": 0.00010291043576137642, "loss": 0.0835, "step": 3710 }, { "epoch": 5.37, "learning_rate": 0.000102878276250201, "loss": 0.0106, "step": 3711 }, { "epoch": 5.37, "learning_rate": 0.00010284611673902558, "loss": 0.0347, "step": 3712 }, { "epoch": 5.37, "learning_rate": 0.00010281395722785015, "loss": 0.0724, "step": 3713 }, { "epoch": 5.37, "learning_rate": 0.00010278179771667472, "loss": 0.0172, "step": 3714 }, { "epoch": 5.37, "learning_rate": 0.00010274963820549927, "loss": 0.0116, "step": 3715 }, { "epoch": 5.37, "learning_rate": 0.00010271747869432384, "loss": 0.05, "step": 3716 }, { "epoch": 5.38, "learning_rate": 0.00010268531918314843, "loss": 0.0186, "step": 3717 }, { "epoch": 5.38, "learning_rate": 0.000102653159671973, "loss": 0.0073, "step": 3718 }, { "epoch": 5.38, "learning_rate": 0.00010262100016079757, "loss": 0.0396, "step": 3719 }, { "epoch": 5.38, "learning_rate": 0.00010258884064962213, "loss": 0.0004, "step": 3720 }, { "epoch": 5.38, "learning_rate": 0.0001025566811384467, "loss": 0.0648, "step": 3721 }, { "epoch": 5.38, "learning_rate": 0.00010252452162727127, "loss": 0.0886, "step": 3722 }, { "epoch": 5.38, "learning_rate": 0.00010249236211609582, "loss": 0.0335, "step": 3723 }, { "epoch": 5.39, "learning_rate": 0.00010246020260492042, "loss": 0.0204, "step": 3724 }, { "epoch": 5.39, "learning_rate": 0.00010242804309374498, "loss": 0.1017, "step": 3725 }, { "epoch": 5.39, "learning_rate": 0.00010239588358256955, "loss": 0.0573, "step": 3726 }, { "epoch": 5.39, "learning_rate": 0.00010236372407139412, "loss": 0.0275, "step": 3727 }, { "epoch": 5.39, "learning_rate": 0.00010233156456021869, "loss": 0.0464, "step": 3728 }, { "epoch": 5.39, "learning_rate": 0.00010229940504904325, "loss": 0.0062, "step": 3729 }, { "epoch": 5.39, "learning_rate": 0.00010226724553786784, "loss": 0.1086, "step": 3730 }, { "epoch": 5.4, "learning_rate": 0.0001022350860266924, "loss": 0.006, "step": 3731 }, { "epoch": 5.4, "learning_rate": 0.00010220292651551697, "loss": 0.0089, "step": 3732 }, { "epoch": 5.4, "learning_rate": 0.00010217076700434154, "loss": 0.0508, "step": 3733 }, { "epoch": 5.4, "learning_rate": 0.0001021386074931661, "loss": 0.0007, "step": 3734 }, { "epoch": 5.4, "learning_rate": 0.00010210644798199067, "loss": 0.0469, "step": 3735 }, { "epoch": 5.4, "learning_rate": 0.00010207428847081524, "loss": 0.0042, "step": 3736 }, { "epoch": 5.4, "learning_rate": 0.00010204212895963982, "loss": 0.001, "step": 3737 }, { "epoch": 5.41, "learning_rate": 0.0001020099694484644, "loss": 0.094, "step": 3738 }, { "epoch": 5.41, "learning_rate": 0.00010197780993728897, "loss": 0.0014, "step": 3739 }, { "epoch": 5.41, "learning_rate": 0.00010194565042611352, "loss": 0.011, "step": 3740 }, { "epoch": 5.41, "learning_rate": 0.00010191349091493809, "loss": 0.0685, "step": 3741 }, { "epoch": 5.41, "learning_rate": 0.00010188133140376266, "loss": 0.0596, "step": 3742 }, { "epoch": 5.41, "learning_rate": 0.00010184917189258725, "loss": 0.0454, "step": 3743 }, { "epoch": 5.41, "learning_rate": 0.00010181701238141182, "loss": 0.0569, "step": 3744 }, { "epoch": 5.42, "learning_rate": 0.00010178485287023638, "loss": 0.0476, "step": 3745 }, { "epoch": 5.42, "learning_rate": 0.00010175269335906095, "loss": 0.0287, "step": 3746 }, { "epoch": 5.42, "learning_rate": 0.00010172053384788552, "loss": 0.0228, "step": 3747 }, { "epoch": 5.42, "learning_rate": 0.00010168837433671007, "loss": 0.0137, "step": 3748 }, { "epoch": 5.42, "learning_rate": 0.00010165621482553464, "loss": 0.1128, "step": 3749 }, { "epoch": 5.42, "learning_rate": 0.00010162405531435923, "loss": 0.0004, "step": 3750 }, { "epoch": 5.42, "learning_rate": 0.0001015918958031838, "loss": 0.0817, "step": 3751 }, { "epoch": 5.43, "learning_rate": 0.00010155973629200837, "loss": 0.0684, "step": 3752 }, { "epoch": 5.43, "learning_rate": 0.00010152757678083294, "loss": 0.0209, "step": 3753 }, { "epoch": 5.43, "learning_rate": 0.0001014954172696575, "loss": 0.09, "step": 3754 }, { "epoch": 5.43, "learning_rate": 0.00010146325775848207, "loss": 0.1208, "step": 3755 }, { "epoch": 5.43, "learning_rate": 0.00010143109824730665, "loss": 0.0384, "step": 3756 }, { "epoch": 5.43, "learning_rate": 0.00010139893873613122, "loss": 0.0333, "step": 3757 }, { "epoch": 5.43, "learning_rate": 0.00010136677922495579, "loss": 0.0222, "step": 3758 }, { "epoch": 5.44, "learning_rate": 0.00010133461971378035, "loss": 0.0796, "step": 3759 }, { "epoch": 5.44, "learning_rate": 0.00010130246020260492, "loss": 0.0061, "step": 3760 }, { "epoch": 5.44, "learning_rate": 0.00010127030069142949, "loss": 0.0298, "step": 3761 }, { "epoch": 5.44, "learning_rate": 0.00010123814118025407, "loss": 0.0225, "step": 3762 }, { "epoch": 5.44, "learning_rate": 0.00010120598166907864, "loss": 0.0448, "step": 3763 }, { "epoch": 5.44, "learning_rate": 0.00010117382215790321, "loss": 0.0302, "step": 3764 }, { "epoch": 5.44, "learning_rate": 0.00010114166264672777, "loss": 0.1109, "step": 3765 }, { "epoch": 5.45, "learning_rate": 0.00010110950313555234, "loss": 0.1968, "step": 3766 }, { "epoch": 5.45, "learning_rate": 0.00010107734362437691, "loss": 0.0386, "step": 3767 }, { "epoch": 5.45, "learning_rate": 0.00010104518411320147, "loss": 0.0024, "step": 3768 }, { "epoch": 5.45, "learning_rate": 0.00010101302460202607, "loss": 0.0103, "step": 3769 }, { "epoch": 5.45, "learning_rate": 0.00010098086509085062, "loss": 0.0546, "step": 3770 }, { "epoch": 5.45, "learning_rate": 0.0001009487055796752, "loss": 0.0258, "step": 3771 }, { "epoch": 5.45, "learning_rate": 0.00010091654606849977, "loss": 0.0323, "step": 3772 }, { "epoch": 5.46, "learning_rate": 0.00010088438655732432, "loss": 0.0331, "step": 3773 }, { "epoch": 5.46, "learning_rate": 0.00010085222704614889, "loss": 0.0568, "step": 3774 }, { "epoch": 5.46, "learning_rate": 0.00010082006753497348, "loss": 0.0183, "step": 3775 }, { "epoch": 5.46, "learning_rate": 0.00010078790802379805, "loss": 0.155, "step": 3776 }, { "epoch": 5.46, "learning_rate": 0.00010075574851262262, "loss": 0.0251, "step": 3777 }, { "epoch": 5.46, "learning_rate": 0.00010072358900144719, "loss": 0.042, "step": 3778 }, { "epoch": 5.46, "learning_rate": 0.00010069142949027175, "loss": 0.0383, "step": 3779 }, { "epoch": 5.47, "learning_rate": 0.00010065926997909632, "loss": 0.0033, "step": 3780 }, { "epoch": 5.47, "learning_rate": 0.00010062711046792089, "loss": 0.0316, "step": 3781 }, { "epoch": 5.47, "learning_rate": 0.00010059495095674547, "loss": 0.065, "step": 3782 }, { "epoch": 5.47, "learning_rate": 0.00010056279144557004, "loss": 0.0416, "step": 3783 }, { "epoch": 5.47, "learning_rate": 0.0001005306319343946, "loss": 0.0268, "step": 3784 }, { "epoch": 5.47, "learning_rate": 0.00010049847242321917, "loss": 0.0988, "step": 3785 }, { "epoch": 5.48, "learning_rate": 0.00010046631291204374, "loss": 0.0322, "step": 3786 }, { "epoch": 5.48, "learning_rate": 0.0001004341534008683, "loss": 0.0439, "step": 3787 }, { "epoch": 5.48, "learning_rate": 0.0001004019938896929, "loss": 0.0184, "step": 3788 }, { "epoch": 5.48, "learning_rate": 0.00010036983437851746, "loss": 0.0398, "step": 3789 }, { "epoch": 5.48, "learning_rate": 0.00010033767486734202, "loss": 0.0322, "step": 3790 }, { "epoch": 5.48, "learning_rate": 0.00010030551535616659, "loss": 0.0725, "step": 3791 }, { "epoch": 5.48, "learning_rate": 0.00010027335584499116, "loss": 0.0023, "step": 3792 }, { "epoch": 5.49, "learning_rate": 0.00010024119633381572, "loss": 0.0107, "step": 3793 }, { "epoch": 5.49, "learning_rate": 0.00010020903682264029, "loss": 0.1041, "step": 3794 }, { "epoch": 5.49, "learning_rate": 0.00010017687731146487, "loss": 0.1348, "step": 3795 }, { "epoch": 5.49, "learning_rate": 0.00010014471780028944, "loss": 0.0354, "step": 3796 }, { "epoch": 5.49, "learning_rate": 0.00010011255828911401, "loss": 0.0713, "step": 3797 }, { "epoch": 5.49, "learning_rate": 0.00010008039877793857, "loss": 0.0472, "step": 3798 }, { "epoch": 5.49, "learning_rate": 0.00010004823926676314, "loss": 0.0009, "step": 3799 }, { "epoch": 5.5, "learning_rate": 0.00010001607975558771, "loss": 0.042, "step": 3800 }, { "epoch": 5.5, "learning_rate": 9.998392024441228e-05, "loss": 0.026, "step": 3801 }, { "epoch": 5.5, "learning_rate": 9.995176073323685e-05, "loss": 0.0794, "step": 3802 }, { "epoch": 5.5, "learning_rate": 9.991960122206144e-05, "loss": 0.0039, "step": 3803 }, { "epoch": 5.5, "learning_rate": 9.9887441710886e-05, "loss": 0.1085, "step": 3804 }, { "epoch": 5.5, "learning_rate": 9.985528219971057e-05, "loss": 0.0104, "step": 3805 }, { "epoch": 5.5, "learning_rate": 9.982312268853514e-05, "loss": 0.0299, "step": 3806 }, { "epoch": 5.51, "learning_rate": 9.97909631773597e-05, "loss": 0.064, "step": 3807 }, { "epoch": 5.51, "learning_rate": 9.975880366618428e-05, "loss": 0.0356, "step": 3808 }, { "epoch": 5.51, "learning_rate": 9.972664415500885e-05, "loss": 0.0124, "step": 3809 }, { "epoch": 5.51, "learning_rate": 9.969448464383342e-05, "loss": 0.0448, "step": 3810 }, { "epoch": 5.51, "learning_rate": 9.966232513265799e-05, "loss": 0.005, "step": 3811 }, { "epoch": 5.51, "learning_rate": 9.963016562148256e-05, "loss": 0.028, "step": 3812 }, { "epoch": 5.51, "learning_rate": 9.959800611030713e-05, "loss": 0.0088, "step": 3813 }, { "epoch": 5.52, "learning_rate": 9.95658465991317e-05, "loss": 0.0635, "step": 3814 }, { "epoch": 5.52, "learning_rate": 9.953368708795627e-05, "loss": 0.0482, "step": 3815 }, { "epoch": 5.52, "learning_rate": 9.950152757678084e-05, "loss": 0.0423, "step": 3816 }, { "epoch": 5.52, "learning_rate": 9.946936806560541e-05, "loss": 0.0104, "step": 3817 }, { "epoch": 5.52, "learning_rate": 9.943720855442997e-05, "loss": 0.0939, "step": 3818 }, { "epoch": 5.52, "learning_rate": 9.940504904325455e-05, "loss": 0.1426, "step": 3819 }, { "epoch": 5.52, "learning_rate": 9.937288953207912e-05, "loss": 0.0778, "step": 3820 }, { "epoch": 5.53, "learning_rate": 9.934073002090368e-05, "loss": 0.0005, "step": 3821 }, { "epoch": 5.53, "learning_rate": 9.930857050972826e-05, "loss": 0.0166, "step": 3822 }, { "epoch": 5.53, "learning_rate": 9.927641099855282e-05, "loss": 0.1753, "step": 3823 }, { "epoch": 5.53, "learning_rate": 9.924425148737739e-05, "loss": 0.0354, "step": 3824 }, { "epoch": 5.53, "learning_rate": 9.921209197620198e-05, "loss": 0.0719, "step": 3825 }, { "epoch": 5.53, "learning_rate": 9.917993246502653e-05, "loss": 0.1179, "step": 3826 }, { "epoch": 5.53, "learning_rate": 9.91477729538511e-05, "loss": 0.036, "step": 3827 }, { "epoch": 5.54, "learning_rate": 9.911561344267569e-05, "loss": 0.0023, "step": 3828 }, { "epoch": 5.54, "learning_rate": 9.908345393150024e-05, "loss": 0.0628, "step": 3829 }, { "epoch": 5.54, "learning_rate": 9.905129442032481e-05, "loss": 0.1398, "step": 3830 }, { "epoch": 5.54, "learning_rate": 9.901913490914939e-05, "loss": 0.0011, "step": 3831 }, { "epoch": 5.54, "learning_rate": 9.898697539797396e-05, "loss": 0.0019, "step": 3832 }, { "epoch": 5.54, "learning_rate": 9.895481588679853e-05, "loss": 0.0627, "step": 3833 }, { "epoch": 5.54, "learning_rate": 9.89226563756231e-05, "loss": 0.0923, "step": 3834 }, { "epoch": 5.55, "learning_rate": 9.889049686444767e-05, "loss": 0.1775, "step": 3835 }, { "epoch": 5.55, "learning_rate": 9.885833735327224e-05, "loss": 0.0267, "step": 3836 }, { "epoch": 5.55, "learning_rate": 9.88261778420968e-05, "loss": 0.0767, "step": 3837 }, { "epoch": 5.55, "learning_rate": 9.879401833092138e-05, "loss": 0.0206, "step": 3838 }, { "epoch": 5.55, "learning_rate": 9.876185881974595e-05, "loss": 0.0162, "step": 3839 }, { "epoch": 5.55, "learning_rate": 9.87296993085705e-05, "loss": 0.1787, "step": 3840 }, { "epoch": 5.55, "learning_rate": 9.869753979739509e-05, "loss": 0.0177, "step": 3841 }, { "epoch": 5.56, "learning_rate": 9.866538028621966e-05, "loss": 0.0903, "step": 3842 }, { "epoch": 5.56, "learning_rate": 9.863322077504422e-05, "loss": 0.1145, "step": 3843 }, { "epoch": 5.56, "learning_rate": 9.86010612638688e-05, "loss": 0.0725, "step": 3844 }, { "epoch": 5.56, "learning_rate": 9.856890175269337e-05, "loss": 0.0339, "step": 3845 }, { "epoch": 5.56, "learning_rate": 9.853674224151793e-05, "loss": 0.1057, "step": 3846 }, { "epoch": 5.56, "learning_rate": 9.85045827303425e-05, "loss": 0.0336, "step": 3847 }, { "epoch": 5.56, "learning_rate": 9.847242321916707e-05, "loss": 0.0545, "step": 3848 }, { "epoch": 5.57, "learning_rate": 9.844026370799164e-05, "loss": 0.096, "step": 3849 }, { "epoch": 5.57, "learning_rate": 9.840810419681621e-05, "loss": 0.1375, "step": 3850 }, { "epoch": 5.57, "learning_rate": 9.837594468564078e-05, "loss": 0.1203, "step": 3851 }, { "epoch": 5.57, "learning_rate": 9.834378517446535e-05, "loss": 0.0724, "step": 3852 }, { "epoch": 5.57, "learning_rate": 9.831162566328992e-05, "loss": 0.0542, "step": 3853 }, { "epoch": 5.57, "learning_rate": 9.82794661521145e-05, "loss": 0.1094, "step": 3854 }, { "epoch": 5.57, "learning_rate": 9.824730664093906e-05, "loss": 0.0134, "step": 3855 }, { "epoch": 5.58, "learning_rate": 9.821514712976363e-05, "loss": 0.0789, "step": 3856 }, { "epoch": 5.58, "learning_rate": 9.81829876185882e-05, "loss": 0.0124, "step": 3857 }, { "epoch": 5.58, "learning_rate": 9.815082810741278e-05, "loss": 0.0107, "step": 3858 }, { "epoch": 5.58, "learning_rate": 9.811866859623735e-05, "loss": 0.063, "step": 3859 }, { "epoch": 5.58, "learning_rate": 9.808650908506192e-05, "loss": 0.0774, "step": 3860 }, { "epoch": 5.58, "learning_rate": 9.805434957388649e-05, "loss": 0.0945, "step": 3861 }, { "epoch": 5.58, "learning_rate": 9.802219006271104e-05, "loss": 0.0331, "step": 3862 }, { "epoch": 5.59, "learning_rate": 9.799003055153561e-05, "loss": 0.0111, "step": 3863 }, { "epoch": 5.59, "learning_rate": 9.79578710403602e-05, "loss": 0.0023, "step": 3864 }, { "epoch": 5.59, "learning_rate": 9.792571152918476e-05, "loss": 0.1092, "step": 3865 }, { "epoch": 5.59, "learning_rate": 9.789355201800933e-05, "loss": 0.021, "step": 3866 }, { "epoch": 5.59, "learning_rate": 9.786139250683391e-05, "loss": 0.0427, "step": 3867 }, { "epoch": 5.59, "learning_rate": 9.782923299565847e-05, "loss": 0.0223, "step": 3868 }, { "epoch": 5.6, "learning_rate": 9.779707348448304e-05, "loss": 0.0439, "step": 3869 }, { "epoch": 5.6, "learning_rate": 9.776491397330761e-05, "loss": 0.0004, "step": 3870 }, { "epoch": 5.6, "learning_rate": 9.773275446213218e-05, "loss": 0.0463, "step": 3871 }, { "epoch": 5.6, "learning_rate": 9.770059495095675e-05, "loss": 0.0428, "step": 3872 }, { "epoch": 5.6, "learning_rate": 9.766843543978132e-05, "loss": 0.0007, "step": 3873 }, { "epoch": 5.6, "learning_rate": 9.763627592860589e-05, "loss": 0.009, "step": 3874 }, { "epoch": 5.6, "learning_rate": 9.760411641743046e-05, "loss": 0.1479, "step": 3875 }, { "epoch": 5.61, "learning_rate": 9.757195690625503e-05, "loss": 0.0605, "step": 3876 }, { "epoch": 5.61, "learning_rate": 9.75397973950796e-05, "loss": 0.0679, "step": 3877 }, { "epoch": 5.61, "learning_rate": 9.750763788390417e-05, "loss": 0.0319, "step": 3878 }, { "epoch": 5.61, "learning_rate": 9.747547837272873e-05, "loss": 0.0075, "step": 3879 }, { "epoch": 5.61, "learning_rate": 9.744331886155331e-05, "loss": 0.1591, "step": 3880 }, { "epoch": 5.61, "learning_rate": 9.741115935037788e-05, "loss": 0.0093, "step": 3881 }, { "epoch": 5.61, "learning_rate": 9.737899983920244e-05, "loss": 0.005, "step": 3882 }, { "epoch": 5.62, "learning_rate": 9.734684032802702e-05, "loss": 0.0053, "step": 3883 }, { "epoch": 5.62, "learning_rate": 9.73146808168516e-05, "loss": 0.1351, "step": 3884 }, { "epoch": 5.62, "learning_rate": 9.728252130567615e-05, "loss": 0.0707, "step": 3885 }, { "epoch": 5.62, "learning_rate": 9.725036179450074e-05, "loss": 0.0108, "step": 3886 }, { "epoch": 5.62, "learning_rate": 9.721820228332529e-05, "loss": 0.0157, "step": 3887 }, { "epoch": 5.62, "learning_rate": 9.718604277214986e-05, "loss": 0.0488, "step": 3888 }, { "epoch": 5.62, "learning_rate": 9.715388326097445e-05, "loss": 0.051, "step": 3889 }, { "epoch": 5.63, "learning_rate": 9.7121723749799e-05, "loss": 0.0181, "step": 3890 }, { "epoch": 5.63, "learning_rate": 9.708956423862358e-05, "loss": 0.046, "step": 3891 }, { "epoch": 5.63, "learning_rate": 9.705740472744815e-05, "loss": 0.0869, "step": 3892 }, { "epoch": 5.63, "learning_rate": 9.702524521627272e-05, "loss": 0.0817, "step": 3893 }, { "epoch": 5.63, "learning_rate": 9.699308570509729e-05, "loss": 0.0027, "step": 3894 }, { "epoch": 5.63, "learning_rate": 9.696092619392186e-05, "loss": 0.0194, "step": 3895 }, { "epoch": 5.63, "learning_rate": 9.692876668274643e-05, "loss": 0.0079, "step": 3896 }, { "epoch": 5.64, "learning_rate": 9.6896607171571e-05, "loss": 0.006, "step": 3897 }, { "epoch": 5.64, "learning_rate": 9.686444766039557e-05, "loss": 0.0731, "step": 3898 }, { "epoch": 5.64, "learning_rate": 9.683228814922014e-05, "loss": 0.0573, "step": 3899 }, { "epoch": 5.64, "learning_rate": 9.680012863804471e-05, "loss": 0.1375, "step": 3900 }, { "epoch": 5.64, "learning_rate": 9.676796912686927e-05, "loss": 0.0343, "step": 3901 }, { "epoch": 5.64, "learning_rate": 9.673580961569385e-05, "loss": 0.1231, "step": 3902 }, { "epoch": 5.64, "learning_rate": 9.670365010451842e-05, "loss": 0.0695, "step": 3903 }, { "epoch": 5.65, "learning_rate": 9.667149059334298e-05, "loss": 0.0255, "step": 3904 }, { "epoch": 5.65, "learning_rate": 9.663933108216755e-05, "loss": 0.0656, "step": 3905 }, { "epoch": 5.65, "learning_rate": 9.660717157099213e-05, "loss": 0.0264, "step": 3906 }, { "epoch": 5.65, "learning_rate": 9.657501205981669e-05, "loss": 0.0685, "step": 3907 }, { "epoch": 5.65, "learning_rate": 9.654285254864126e-05, "loss": 0.0936, "step": 3908 }, { "epoch": 5.65, "learning_rate": 9.651069303746584e-05, "loss": 0.0148, "step": 3909 }, { "epoch": 5.65, "learning_rate": 9.64785335262904e-05, "loss": 0.0212, "step": 3910 }, { "epoch": 5.66, "learning_rate": 9.644637401511497e-05, "loss": 0.0103, "step": 3911 }, { "epoch": 5.66, "learning_rate": 9.641421450393954e-05, "loss": 0.001, "step": 3912 }, { "epoch": 5.66, "learning_rate": 9.638205499276411e-05, "loss": 0.0766, "step": 3913 }, { "epoch": 5.66, "learning_rate": 9.634989548158868e-05, "loss": 0.0121, "step": 3914 }, { "epoch": 5.66, "learning_rate": 9.631773597041325e-05, "loss": 0.0839, "step": 3915 }, { "epoch": 5.66, "learning_rate": 9.628557645923782e-05, "loss": 0.146, "step": 3916 }, { "epoch": 5.66, "learning_rate": 9.62534169480624e-05, "loss": 0.02, "step": 3917 }, { "epoch": 5.67, "learning_rate": 9.622125743688697e-05, "loss": 0.0059, "step": 3918 }, { "epoch": 5.67, "learning_rate": 9.618909792571154e-05, "loss": 0.0008, "step": 3919 }, { "epoch": 5.67, "learning_rate": 9.61569384145361e-05, "loss": 0.1636, "step": 3920 }, { "epoch": 5.67, "learning_rate": 9.612477890336066e-05, "loss": 0.0062, "step": 3921 }, { "epoch": 5.67, "learning_rate": 9.609261939218525e-05, "loss": 0.0413, "step": 3922 }, { "epoch": 5.67, "learning_rate": 9.606045988100982e-05, "loss": 0.0076, "step": 3923 }, { "epoch": 5.67, "learning_rate": 9.602830036983437e-05, "loss": 0.0714, "step": 3924 }, { "epoch": 5.68, "learning_rate": 9.599614085865896e-05, "loss": 0.1099, "step": 3925 }, { "epoch": 5.68, "learning_rate": 9.596398134748352e-05, "loss": 0.0954, "step": 3926 }, { "epoch": 5.68, "learning_rate": 9.593182183630809e-05, "loss": 0.001, "step": 3927 }, { "epoch": 5.68, "learning_rate": 9.589966232513267e-05, "loss": 0.0509, "step": 3928 }, { "epoch": 5.68, "learning_rate": 9.586750281395723e-05, "loss": 0.0172, "step": 3929 }, { "epoch": 5.68, "learning_rate": 9.58353433027818e-05, "loss": 0.0486, "step": 3930 }, { "epoch": 5.68, "learning_rate": 9.580318379160638e-05, "loss": 0.0169, "step": 3931 }, { "epoch": 5.69, "learning_rate": 9.577102428043094e-05, "loss": 0.0935, "step": 3932 }, { "epoch": 5.69, "learning_rate": 9.573886476925551e-05, "loss": 0.0384, "step": 3933 }, { "epoch": 5.69, "learning_rate": 9.57067052580801e-05, "loss": 0.1775, "step": 3934 }, { "epoch": 5.69, "learning_rate": 9.567454574690465e-05, "loss": 0.093, "step": 3935 }, { "epoch": 5.69, "learning_rate": 9.564238623572922e-05, "loss": 0.0078, "step": 3936 }, { "epoch": 5.69, "learning_rate": 9.561022672455379e-05, "loss": 0.0322, "step": 3937 }, { "epoch": 5.69, "learning_rate": 9.557806721337836e-05, "loss": 0.0169, "step": 3938 }, { "epoch": 5.7, "learning_rate": 9.554590770220293e-05, "loss": 0.0044, "step": 3939 }, { "epoch": 5.7, "learning_rate": 9.55137481910275e-05, "loss": 0.0286, "step": 3940 }, { "epoch": 5.7, "learning_rate": 9.548158867985207e-05, "loss": 0.0123, "step": 3941 }, { "epoch": 5.7, "learning_rate": 9.544942916867664e-05, "loss": 0.0192, "step": 3942 }, { "epoch": 5.7, "learning_rate": 9.54172696575012e-05, "loss": 0.0638, "step": 3943 }, { "epoch": 5.7, "learning_rate": 9.538511014632579e-05, "loss": 0.045, "step": 3944 }, { "epoch": 5.7, "learning_rate": 9.535295063515036e-05, "loss": 0.0082, "step": 3945 }, { "epoch": 5.71, "learning_rate": 9.532079112397491e-05, "loss": 0.0329, "step": 3946 }, { "epoch": 5.71, "learning_rate": 9.52886316127995e-05, "loss": 0.02, "step": 3947 }, { "epoch": 5.71, "learning_rate": 9.525647210162407e-05, "loss": 0.0005, "step": 3948 }, { "epoch": 5.71, "learning_rate": 9.522431259044862e-05, "loss": 0.0131, "step": 3949 }, { "epoch": 5.71, "learning_rate": 9.51921530792732e-05, "loss": 0.087, "step": 3950 }, { "epoch": 5.71, "learning_rate": 9.515999356809777e-05, "loss": 0.1202, "step": 3951 }, { "epoch": 5.72, "learning_rate": 9.512783405692234e-05, "loss": 0.0343, "step": 3952 }, { "epoch": 5.72, "learning_rate": 9.50956745457469e-05, "loss": 0.0952, "step": 3953 }, { "epoch": 5.72, "learning_rate": 9.506351503457148e-05, "loss": 0.0333, "step": 3954 }, { "epoch": 5.72, "learning_rate": 9.503135552339605e-05, "loss": 0.1196, "step": 3955 }, { "epoch": 5.72, "learning_rate": 9.499919601222062e-05, "loss": 0.0555, "step": 3956 }, { "epoch": 5.72, "learning_rate": 9.496703650104519e-05, "loss": 0.0209, "step": 3957 }, { "epoch": 5.72, "learning_rate": 9.493487698986976e-05, "loss": 0.0521, "step": 3958 }, { "epoch": 5.73, "learning_rate": 9.490271747869433e-05, "loss": 0.1033, "step": 3959 }, { "epoch": 5.73, "learning_rate": 9.48705579675189e-05, "loss": 0.0678, "step": 3960 }, { "epoch": 5.73, "learning_rate": 9.483839845634347e-05, "loss": 0.0147, "step": 3961 }, { "epoch": 5.73, "learning_rate": 9.480623894516804e-05, "loss": 0.3096, "step": 3962 }, { "epoch": 5.73, "learning_rate": 9.477407943399261e-05, "loss": 0.1233, "step": 3963 }, { "epoch": 5.73, "learning_rate": 9.474191992281718e-05, "loss": 0.0362, "step": 3964 }, { "epoch": 5.73, "learning_rate": 9.470976041164175e-05, "loss": 0.0868, "step": 3965 }, { "epoch": 5.74, "learning_rate": 9.467760090046631e-05, "loss": 0.0198, "step": 3966 }, { "epoch": 5.74, "learning_rate": 9.46454413892909e-05, "loss": 0.0512, "step": 3967 }, { "epoch": 5.74, "learning_rate": 9.461328187811545e-05, "loss": 0.0659, "step": 3968 }, { "epoch": 5.74, "learning_rate": 9.458112236694002e-05, "loss": 0.0583, "step": 3969 }, { "epoch": 5.74, "learning_rate": 9.45489628557646e-05, "loss": 0.0457, "step": 3970 }, { "epoch": 5.74, "learning_rate": 9.451680334458916e-05, "loss": 0.0453, "step": 3971 }, { "epoch": 5.74, "learning_rate": 9.448464383341373e-05, "loss": 0.1051, "step": 3972 }, { "epoch": 5.75, "learning_rate": 9.445248432223832e-05, "loss": 0.0337, "step": 3973 }, { "epoch": 5.75, "learning_rate": 9.442032481106287e-05, "loss": 0.0132, "step": 3974 }, { "epoch": 5.75, "learning_rate": 9.438816529988744e-05, "loss": 0.0255, "step": 3975 }, { "epoch": 5.75, "learning_rate": 9.435600578871201e-05, "loss": 0.0305, "step": 3976 }, { "epoch": 5.75, "learning_rate": 9.432384627753659e-05, "loss": 0.0135, "step": 3977 }, { "epoch": 5.75, "learning_rate": 9.429168676636116e-05, "loss": 0.0244, "step": 3978 }, { "epoch": 5.75, "learning_rate": 9.425952725518573e-05, "loss": 0.0435, "step": 3979 }, { "epoch": 5.76, "learning_rate": 9.42273677440103e-05, "loss": 0.0299, "step": 3980 }, { "epoch": 5.76, "learning_rate": 9.419520823283487e-05, "loss": 0.0247, "step": 3981 }, { "epoch": 5.76, "learning_rate": 9.416304872165942e-05, "loss": 0.0401, "step": 3982 }, { "epoch": 5.76, "learning_rate": 9.413088921048401e-05, "loss": 0.092, "step": 3983 }, { "epoch": 5.76, "learning_rate": 9.409872969930858e-05, "loss": 0.1001, "step": 3984 }, { "epoch": 5.76, "learning_rate": 9.406657018813314e-05, "loss": 0.0417, "step": 3985 }, { "epoch": 5.76, "learning_rate": 9.403441067695772e-05, "loss": 0.1111, "step": 3986 }, { "epoch": 5.77, "learning_rate": 9.400225116578229e-05, "loss": 0.0703, "step": 3987 }, { "epoch": 5.77, "learning_rate": 9.397009165460685e-05, "loss": 0.0939, "step": 3988 }, { "epoch": 5.77, "learning_rate": 9.393793214343143e-05, "loss": 0.1139, "step": 3989 }, { "epoch": 5.77, "learning_rate": 9.3905772632256e-05, "loss": 0.0901, "step": 3990 }, { "epoch": 5.77, "learning_rate": 9.387361312108056e-05, "loss": 0.0144, "step": 3991 }, { "epoch": 5.77, "learning_rate": 9.384145360990514e-05, "loss": 0.012, "step": 3992 }, { "epoch": 5.77, "learning_rate": 9.38092940987297e-05, "loss": 0.017, "step": 3993 }, { "epoch": 5.78, "learning_rate": 9.377713458755427e-05, "loss": 0.0543, "step": 3994 }, { "epoch": 5.78, "learning_rate": 9.374497507637884e-05, "loss": 0.0774, "step": 3995 }, { "epoch": 5.78, "learning_rate": 9.371281556520341e-05, "loss": 0.0413, "step": 3996 }, { "epoch": 5.78, "learning_rate": 9.368065605402798e-05, "loss": 0.0659, "step": 3997 }, { "epoch": 5.78, "learning_rate": 9.364849654285255e-05, "loss": 0.0488, "step": 3998 }, { "epoch": 5.78, "learning_rate": 9.361633703167712e-05, "loss": 0.0495, "step": 3999 }, { "epoch": 5.78, "learning_rate": 9.35841775205017e-05, "loss": 0.0439, "step": 4000 }, { "epoch": 5.79, "learning_rate": 9.355201800932626e-05, "loss": 0.0541, "step": 4001 }, { "epoch": 5.79, "learning_rate": 9.351985849815083e-05, "loss": 0.1075, "step": 4002 }, { "epoch": 5.79, "learning_rate": 9.34876989869754e-05, "loss": 0.0176, "step": 4003 }, { "epoch": 5.79, "learning_rate": 9.345553947579998e-05, "loss": 0.038, "step": 4004 }, { "epoch": 5.79, "learning_rate": 9.342337996462455e-05, "loss": 0.0551, "step": 4005 }, { "epoch": 5.79, "learning_rate": 9.339122045344912e-05, "loss": 0.0279, "step": 4006 }, { "epoch": 5.79, "learning_rate": 9.335906094227367e-05, "loss": 0.0266, "step": 4007 }, { "epoch": 5.8, "learning_rate": 9.332690143109826e-05, "loss": 0.0013, "step": 4008 }, { "epoch": 5.8, "learning_rate": 9.329474191992283e-05, "loss": 0.0042, "step": 4009 }, { "epoch": 5.8, "learning_rate": 9.326258240874738e-05, "loss": 0.0453, "step": 4010 }, { "epoch": 5.8, "learning_rate": 9.323042289757196e-05, "loss": 0.0405, "step": 4011 }, { "epoch": 5.8, "learning_rate": 9.319826338639654e-05, "loss": 0.0674, "step": 4012 }, { "epoch": 5.8, "learning_rate": 9.31661038752211e-05, "loss": 0.0075, "step": 4013 }, { "epoch": 5.8, "learning_rate": 9.313394436404567e-05, "loss": 0.0465, "step": 4014 }, { "epoch": 5.81, "learning_rate": 9.310178485287024e-05, "loss": 0.0237, "step": 4015 }, { "epoch": 5.81, "learning_rate": 9.306962534169481e-05, "loss": 0.0253, "step": 4016 }, { "epoch": 5.81, "learning_rate": 9.303746583051938e-05, "loss": 0.0052, "step": 4017 }, { "epoch": 5.81, "learning_rate": 9.300530631934395e-05, "loss": 0.0436, "step": 4018 }, { "epoch": 5.81, "learning_rate": 9.297314680816852e-05, "loss": 0.099, "step": 4019 }, { "epoch": 5.81, "learning_rate": 9.294098729699309e-05, "loss": 0.0073, "step": 4020 }, { "epoch": 5.81, "learning_rate": 9.290882778581766e-05, "loss": 0.0417, "step": 4021 }, { "epoch": 5.82, "learning_rate": 9.287666827464223e-05, "loss": 0.0123, "step": 4022 }, { "epoch": 5.82, "learning_rate": 9.28445087634668e-05, "loss": 0.0198, "step": 4023 }, { "epoch": 5.82, "learning_rate": 9.281234925229137e-05, "loss": 0.0675, "step": 4024 }, { "epoch": 5.82, "learning_rate": 9.278018974111594e-05, "loss": 0.0874, "step": 4025 }, { "epoch": 5.82, "learning_rate": 9.274803022994051e-05, "loss": 0.103, "step": 4026 }, { "epoch": 5.82, "learning_rate": 9.271587071876507e-05, "loss": 0.003, "step": 4027 }, { "epoch": 5.83, "learning_rate": 9.268371120758965e-05, "loss": 0.0515, "step": 4028 }, { "epoch": 5.83, "learning_rate": 9.265155169641422e-05, "loss": 0.0028, "step": 4029 }, { "epoch": 5.83, "learning_rate": 9.261939218523878e-05, "loss": 0.0136, "step": 4030 }, { "epoch": 5.83, "learning_rate": 9.258723267406337e-05, "loss": 0.0036, "step": 4031 }, { "epoch": 5.83, "learning_rate": 9.255507316288792e-05, "loss": 0.013, "step": 4032 }, { "epoch": 5.83, "learning_rate": 9.252291365171249e-05, "loss": 0.0633, "step": 4033 }, { "epoch": 5.83, "learning_rate": 9.249075414053708e-05, "loss": 0.0088, "step": 4034 }, { "epoch": 5.84, "learning_rate": 9.245859462936163e-05, "loss": 0.0145, "step": 4035 }, { "epoch": 5.84, "learning_rate": 9.24264351181862e-05, "loss": 0.0431, "step": 4036 }, { "epoch": 5.84, "learning_rate": 9.239427560701079e-05, "loss": 0.0009, "step": 4037 }, { "epoch": 5.84, "learning_rate": 9.236211609583535e-05, "loss": 0.0764, "step": 4038 }, { "epoch": 5.84, "learning_rate": 9.232995658465992e-05, "loss": 0.0778, "step": 4039 }, { "epoch": 5.84, "learning_rate": 9.229779707348449e-05, "loss": 0.0015, "step": 4040 }, { "epoch": 5.84, "learning_rate": 9.226563756230906e-05, "loss": 0.0788, "step": 4041 }, { "epoch": 5.85, "learning_rate": 9.223347805113363e-05, "loss": 0.0288, "step": 4042 }, { "epoch": 5.85, "learning_rate": 9.22013185399582e-05, "loss": 0.0029, "step": 4043 }, { "epoch": 5.85, "learning_rate": 9.216915902878277e-05, "loss": 0.0494, "step": 4044 }, { "epoch": 5.85, "learning_rate": 9.213699951760734e-05, "loss": 0.0545, "step": 4045 }, { "epoch": 5.85, "learning_rate": 9.21048400064319e-05, "loss": 0.0052, "step": 4046 }, { "epoch": 5.85, "learning_rate": 9.207268049525648e-05, "loss": 0.0007, "step": 4047 }, { "epoch": 5.85, "learning_rate": 9.204052098408105e-05, "loss": 0.0311, "step": 4048 }, { "epoch": 5.86, "learning_rate": 9.200836147290561e-05, "loss": 0.0019, "step": 4049 }, { "epoch": 5.86, "learning_rate": 9.197620196173019e-05, "loss": 0.0493, "step": 4050 }, { "epoch": 5.86, "learning_rate": 9.194404245055476e-05, "loss": 0.0469, "step": 4051 }, { "epoch": 5.86, "learning_rate": 9.191188293937932e-05, "loss": 0.0005, "step": 4052 }, { "epoch": 5.86, "learning_rate": 9.18797234282039e-05, "loss": 0.0133, "step": 4053 }, { "epoch": 5.86, "learning_rate": 9.184756391702847e-05, "loss": 0.0082, "step": 4054 }, { "epoch": 5.86, "learning_rate": 9.181540440585303e-05, "loss": 0.1074, "step": 4055 }, { "epoch": 5.87, "learning_rate": 9.17832448946776e-05, "loss": 0.0155, "step": 4056 }, { "epoch": 5.87, "learning_rate": 9.175108538350217e-05, "loss": 0.0316, "step": 4057 }, { "epoch": 5.87, "learning_rate": 9.171892587232674e-05, "loss": 0.0285, "step": 4058 }, { "epoch": 5.87, "learning_rate": 9.168676636115131e-05, "loss": 0.0782, "step": 4059 }, { "epoch": 5.87, "learning_rate": 9.165460684997588e-05, "loss": 0.1307, "step": 4060 }, { "epoch": 5.87, "learning_rate": 9.162244733880045e-05, "loss": 0.0977, "step": 4061 }, { "epoch": 5.87, "learning_rate": 9.159028782762502e-05, "loss": 0.0294, "step": 4062 }, { "epoch": 5.88, "learning_rate": 9.15581283164496e-05, "loss": 0.0175, "step": 4063 }, { "epoch": 5.88, "learning_rate": 9.152596880527417e-05, "loss": 0.0576, "step": 4064 }, { "epoch": 5.88, "learning_rate": 9.149380929409874e-05, "loss": 0.0012, "step": 4065 }, { "epoch": 5.88, "learning_rate": 9.14616497829233e-05, "loss": 0.0708, "step": 4066 }, { "epoch": 5.88, "learning_rate": 9.142949027174788e-05, "loss": 0.0037, "step": 4067 }, { "epoch": 5.88, "learning_rate": 9.139733076057245e-05, "loss": 0.0989, "step": 4068 }, { "epoch": 5.88, "learning_rate": 9.1365171249397e-05, "loss": 0.1206, "step": 4069 }, { "epoch": 5.89, "learning_rate": 9.133301173822159e-05, "loss": 0.0879, "step": 4070 }, { "epoch": 5.89, "learning_rate": 9.130085222704615e-05, "loss": 0.0039, "step": 4071 }, { "epoch": 5.89, "learning_rate": 9.126869271587072e-05, "loss": 0.018, "step": 4072 }, { "epoch": 5.89, "learning_rate": 9.12365332046953e-05, "loss": 0.0397, "step": 4073 }, { "epoch": 5.89, "learning_rate": 9.120437369351986e-05, "loss": 0.0534, "step": 4074 }, { "epoch": 5.89, "learning_rate": 9.117221418234443e-05, "loss": 0.0229, "step": 4075 }, { "epoch": 5.89, "learning_rate": 9.114005467116901e-05, "loss": 0.0095, "step": 4076 }, { "epoch": 5.9, "learning_rate": 9.110789515999357e-05, "loss": 0.0817, "step": 4077 }, { "epoch": 5.9, "learning_rate": 9.107573564881814e-05, "loss": 0.0783, "step": 4078 }, { "epoch": 5.9, "learning_rate": 9.104357613764272e-05, "loss": 0.0346, "step": 4079 }, { "epoch": 5.9, "learning_rate": 9.101141662646728e-05, "loss": 0.0791, "step": 4080 }, { "epoch": 5.9, "learning_rate": 9.097925711529185e-05, "loss": 0.0457, "step": 4081 }, { "epoch": 5.9, "learning_rate": 9.094709760411642e-05, "loss": 0.0386, "step": 4082 }, { "epoch": 5.9, "learning_rate": 9.091493809294099e-05, "loss": 0.0472, "step": 4083 }, { "epoch": 5.91, "learning_rate": 9.088277858176556e-05, "loss": 0.0752, "step": 4084 }, { "epoch": 5.91, "learning_rate": 9.085061907059013e-05, "loss": 0.0874, "step": 4085 }, { "epoch": 5.91, "learning_rate": 9.08184595594147e-05, "loss": 0.0231, "step": 4086 }, { "epoch": 5.91, "learning_rate": 9.078630004823927e-05, "loss": 0.0179, "step": 4087 }, { "epoch": 5.91, "learning_rate": 9.075414053706383e-05, "loss": 0.0738, "step": 4088 }, { "epoch": 5.91, "learning_rate": 9.072198102588841e-05, "loss": 0.0272, "step": 4089 }, { "epoch": 5.91, "learning_rate": 9.068982151471299e-05, "loss": 0.0848, "step": 4090 }, { "epoch": 5.92, "learning_rate": 9.065766200353754e-05, "loss": 0.0175, "step": 4091 }, { "epoch": 5.92, "learning_rate": 9.062550249236213e-05, "loss": 0.0896, "step": 4092 }, { "epoch": 5.92, "learning_rate": 9.05933429811867e-05, "loss": 0.0064, "step": 4093 }, { "epoch": 5.92, "learning_rate": 9.056118347001125e-05, "loss": 0.1038, "step": 4094 }, { "epoch": 5.92, "learning_rate": 9.052902395883584e-05, "loss": 0.1006, "step": 4095 }, { "epoch": 5.92, "learning_rate": 9.04968644476604e-05, "loss": 0.1094, "step": 4096 }, { "epoch": 5.92, "learning_rate": 9.046470493648497e-05, "loss": 0.015, "step": 4097 }, { "epoch": 5.93, "learning_rate": 9.043254542530955e-05, "loss": 0.0579, "step": 4098 }, { "epoch": 5.93, "learning_rate": 9.04003859141341e-05, "loss": 0.02, "step": 4099 }, { "epoch": 5.93, "learning_rate": 9.036822640295868e-05, "loss": 0.0903, "step": 4100 }, { "epoch": 5.93, "learning_rate": 9.033606689178325e-05, "loss": 0.0065, "step": 4101 }, { "epoch": 5.93, "learning_rate": 9.030390738060782e-05, "loss": 0.085, "step": 4102 }, { "epoch": 5.93, "learning_rate": 9.027174786943239e-05, "loss": 0.0085, "step": 4103 }, { "epoch": 5.93, "learning_rate": 9.023958835825696e-05, "loss": 0.0375, "step": 4104 }, { "epoch": 5.94, "learning_rate": 9.020742884708153e-05, "loss": 0.032, "step": 4105 }, { "epoch": 5.94, "learning_rate": 9.01752693359061e-05, "loss": 0.1562, "step": 4106 }, { "epoch": 5.94, "learning_rate": 9.014310982473067e-05, "loss": 0.0633, "step": 4107 }, { "epoch": 5.94, "learning_rate": 9.011095031355524e-05, "loss": 0.0037, "step": 4108 }, { "epoch": 5.94, "learning_rate": 9.007879080237981e-05, "loss": 0.0053, "step": 4109 }, { "epoch": 5.94, "learning_rate": 9.004663129120438e-05, "loss": 0.1316, "step": 4110 }, { "epoch": 5.95, "learning_rate": 9.001447178002895e-05, "loss": 0.0345, "step": 4111 }, { "epoch": 5.95, "learning_rate": 8.998231226885352e-05, "loss": 0.0282, "step": 4112 }, { "epoch": 5.95, "learning_rate": 8.995015275767808e-05, "loss": 0.0178, "step": 4113 }, { "epoch": 5.95, "learning_rate": 8.991799324650265e-05, "loss": 0.1552, "step": 4114 }, { "epoch": 5.95, "learning_rate": 8.988583373532723e-05, "loss": 0.0069, "step": 4115 }, { "epoch": 5.95, "learning_rate": 8.985367422415179e-05, "loss": 0.0261, "step": 4116 }, { "epoch": 5.95, "learning_rate": 8.982151471297636e-05, "loss": 0.0876, "step": 4117 }, { "epoch": 5.96, "learning_rate": 8.978935520180095e-05, "loss": 0.0541, "step": 4118 }, { "epoch": 5.96, "learning_rate": 8.97571956906255e-05, "loss": 0.0661, "step": 4119 }, { "epoch": 5.96, "learning_rate": 8.972503617945007e-05, "loss": 0.0376, "step": 4120 }, { "epoch": 5.96, "learning_rate": 8.969287666827464e-05, "loss": 0.0121, "step": 4121 }, { "epoch": 5.96, "learning_rate": 8.966071715709921e-05, "loss": 0.0403, "step": 4122 }, { "epoch": 5.96, "learning_rate": 8.962855764592378e-05, "loss": 0.0569, "step": 4123 }, { "epoch": 5.96, "learning_rate": 8.959639813474836e-05, "loss": 0.0113, "step": 4124 }, { "epoch": 5.97, "learning_rate": 8.956423862357293e-05, "loss": 0.1667, "step": 4125 }, { "epoch": 5.97, "learning_rate": 8.95320791123975e-05, "loss": 0.0543, "step": 4126 }, { "epoch": 5.97, "learning_rate": 8.949991960122207e-05, "loss": 0.0787, "step": 4127 }, { "epoch": 5.97, "learning_rate": 8.946776009004664e-05, "loss": 0.0945, "step": 4128 }, { "epoch": 5.97, "learning_rate": 8.943560057887121e-05, "loss": 0.0522, "step": 4129 }, { "epoch": 5.97, "learning_rate": 8.940344106769576e-05, "loss": 0.071, "step": 4130 }, { "epoch": 5.97, "learning_rate": 8.937128155652035e-05, "loss": 0.0023, "step": 4131 }, { "epoch": 5.98, "learning_rate": 8.933912204534492e-05, "loss": 0.0134, "step": 4132 }, { "epoch": 5.98, "learning_rate": 8.930696253416948e-05, "loss": 0.1431, "step": 4133 }, { "epoch": 5.98, "learning_rate": 8.927480302299406e-05, "loss": 0.0663, "step": 4134 }, { "epoch": 5.98, "learning_rate": 8.924264351181863e-05, "loss": 0.1484, "step": 4135 }, { "epoch": 5.98, "learning_rate": 8.921048400064319e-05, "loss": 0.0338, "step": 4136 }, { "epoch": 5.98, "learning_rate": 8.917832448946777e-05, "loss": 0.0256, "step": 4137 }, { "epoch": 5.98, "learning_rate": 8.914616497829233e-05, "loss": 0.0184, "step": 4138 }, { "epoch": 5.99, "learning_rate": 8.91140054671169e-05, "loss": 0.1091, "step": 4139 }, { "epoch": 5.99, "learning_rate": 8.908184595594148e-05, "loss": 0.0308, "step": 4140 }, { "epoch": 5.99, "learning_rate": 8.904968644476604e-05, "loss": 0.115, "step": 4141 }, { "epoch": 5.99, "learning_rate": 8.901752693359061e-05, "loss": 0.0253, "step": 4142 }, { "epoch": 5.99, "learning_rate": 8.89853674224152e-05, "loss": 0.0158, "step": 4143 }, { "epoch": 5.99, "learning_rate": 8.895320791123975e-05, "loss": 0.0548, "step": 4144 }, { "epoch": 5.99, "learning_rate": 8.892104840006432e-05, "loss": 0.0258, "step": 4145 }, { "epoch": 6.0, "learning_rate": 8.888888888888889e-05, "loss": 0.0777, "step": 4146 }, { "epoch": 6.0, "learning_rate": 8.885672937771346e-05, "loss": 0.0923, "step": 4147 }, { "epoch": 6.0, "learning_rate": 8.882456986653803e-05, "loss": 0.0565, "step": 4148 }, { "epoch": 6.0, "learning_rate": 8.87924103553626e-05, "loss": 0.0017, "step": 4149 }, { "epoch": 6.0, "learning_rate": 8.876025084418718e-05, "loss": 0.0086, "step": 4150 }, { "epoch": 6.0, "learning_rate": 8.872809133301175e-05, "loss": 0.0111, "step": 4151 }, { "epoch": 6.0, "learning_rate": 8.86959318218363e-05, "loss": 0.0059, "step": 4152 }, { "epoch": 6.01, "learning_rate": 8.866377231066089e-05, "loss": 0.0173, "step": 4153 }, { "epoch": 6.01, "learning_rate": 8.863161279948546e-05, "loss": 0.0042, "step": 4154 }, { "epoch": 6.01, "learning_rate": 8.859945328831001e-05, "loss": 0.0256, "step": 4155 }, { "epoch": 6.01, "learning_rate": 8.85672937771346e-05, "loss": 0.0371, "step": 4156 }, { "epoch": 6.01, "learning_rate": 8.853513426595917e-05, "loss": 0.004, "step": 4157 }, { "epoch": 6.01, "learning_rate": 8.850297475478373e-05, "loss": 0.0002, "step": 4158 }, { "epoch": 6.01, "learning_rate": 8.84708152436083e-05, "loss": 0.0013, "step": 4159 }, { "epoch": 6.02, "learning_rate": 8.843865573243287e-05, "loss": 0.0146, "step": 4160 }, { "epoch": 6.02, "learning_rate": 8.840649622125744e-05, "loss": 0.0254, "step": 4161 }, { "epoch": 6.02, "learning_rate": 8.837433671008201e-05, "loss": 0.0113, "step": 4162 }, { "epoch": 6.02, "learning_rate": 8.834217719890658e-05, "loss": 0.0507, "step": 4163 }, { "epoch": 6.02, "learning_rate": 8.831001768773115e-05, "loss": 0.0594, "step": 4164 }, { "epoch": 6.02, "learning_rate": 8.827785817655572e-05, "loss": 0.0524, "step": 4165 }, { "epoch": 6.02, "learning_rate": 8.824569866538029e-05, "loss": 0.0481, "step": 4166 }, { "epoch": 6.03, "learning_rate": 8.821353915420486e-05, "loss": 0.0177, "step": 4167 }, { "epoch": 6.03, "learning_rate": 8.818137964302943e-05, "loss": 0.0452, "step": 4168 }, { "epoch": 6.03, "learning_rate": 8.8149220131854e-05, "loss": 0.0389, "step": 4169 }, { "epoch": 6.03, "learning_rate": 8.811706062067857e-05, "loss": 0.057, "step": 4170 }, { "epoch": 6.03, "learning_rate": 8.808490110950314e-05, "loss": 0.0012, "step": 4171 }, { "epoch": 6.03, "learning_rate": 8.805274159832771e-05, "loss": 0.0026, "step": 4172 }, { "epoch": 6.03, "learning_rate": 8.802058208715228e-05, "loss": 0.0062, "step": 4173 }, { "epoch": 6.04, "learning_rate": 8.798842257597685e-05, "loss": 0.0345, "step": 4174 }, { "epoch": 6.04, "learning_rate": 8.795626306480141e-05, "loss": 0.0843, "step": 4175 }, { "epoch": 6.04, "learning_rate": 8.7924103553626e-05, "loss": 0.0494, "step": 4176 }, { "epoch": 6.04, "learning_rate": 8.789194404245055e-05, "loss": 0.0333, "step": 4177 }, { "epoch": 6.04, "learning_rate": 8.785978453127512e-05, "loss": 0.0086, "step": 4178 }, { "epoch": 6.04, "learning_rate": 8.78276250200997e-05, "loss": 0.0021, "step": 4179 }, { "epoch": 6.04, "learning_rate": 8.779546550892426e-05, "loss": 0.004, "step": 4180 }, { "epoch": 6.05, "learning_rate": 8.776330599774883e-05, "loss": 0.0093, "step": 4181 }, { "epoch": 6.05, "learning_rate": 8.773114648657342e-05, "loss": 0.0341, "step": 4182 }, { "epoch": 6.05, "learning_rate": 8.769898697539797e-05, "loss": 0.0622, "step": 4183 }, { "epoch": 6.05, "learning_rate": 8.766682746422255e-05, "loss": 0.0159, "step": 4184 }, { "epoch": 6.05, "learning_rate": 8.763466795304712e-05, "loss": 0.0172, "step": 4185 }, { "epoch": 6.05, "learning_rate": 8.760250844187169e-05, "loss": 0.0492, "step": 4186 }, { "epoch": 6.05, "learning_rate": 8.757034893069626e-05, "loss": 0.0633, "step": 4187 }, { "epoch": 6.06, "learning_rate": 8.753818941952083e-05, "loss": 0.001, "step": 4188 }, { "epoch": 6.06, "learning_rate": 8.75060299083454e-05, "loss": 0.0452, "step": 4189 }, { "epoch": 6.06, "learning_rate": 8.747387039716997e-05, "loss": 0.0029, "step": 4190 }, { "epoch": 6.06, "learning_rate": 8.744171088599453e-05, "loss": 0.013, "step": 4191 }, { "epoch": 6.06, "learning_rate": 8.740955137481911e-05, "loss": 0.028, "step": 4192 }, { "epoch": 6.06, "learning_rate": 8.737739186364368e-05, "loss": 0.0095, "step": 4193 }, { "epoch": 6.07, "learning_rate": 8.734523235246824e-05, "loss": 0.0014, "step": 4194 }, { "epoch": 6.07, "learning_rate": 8.731307284129282e-05, "loss": 0.0235, "step": 4195 }, { "epoch": 6.07, "learning_rate": 8.728091333011739e-05, "loss": 0.0003, "step": 4196 }, { "epoch": 6.07, "learning_rate": 8.724875381894195e-05, "loss": 0.0123, "step": 4197 }, { "epoch": 6.07, "learning_rate": 8.721659430776653e-05, "loss": 0.0081, "step": 4198 }, { "epoch": 6.07, "learning_rate": 8.71844347965911e-05, "loss": 0.0053, "step": 4199 }, { "epoch": 6.07, "learning_rate": 8.715227528541566e-05, "loss": 0.05, "step": 4200 }, { "epoch": 6.08, "learning_rate": 8.712011577424024e-05, "loss": 0.0035, "step": 4201 }, { "epoch": 6.08, "learning_rate": 8.70879562630648e-05, "loss": 0.0033, "step": 4202 }, { "epoch": 6.08, "learning_rate": 8.705579675188937e-05, "loss": 0.0796, "step": 4203 }, { "epoch": 6.08, "learning_rate": 8.702363724071394e-05, "loss": 0.0023, "step": 4204 }, { "epoch": 6.08, "learning_rate": 8.699147772953851e-05, "loss": 0.0051, "step": 4205 }, { "epoch": 6.08, "learning_rate": 8.695931821836308e-05, "loss": 0.0036, "step": 4206 }, { "epoch": 6.08, "learning_rate": 8.692715870718765e-05, "loss": 0.0483, "step": 4207 }, { "epoch": 6.09, "learning_rate": 8.689499919601222e-05, "loss": 0.044, "step": 4208 }, { "epoch": 6.09, "learning_rate": 8.68628396848368e-05, "loss": 0.0312, "step": 4209 }, { "epoch": 6.09, "learning_rate": 8.683068017366137e-05, "loss": 0.0537, "step": 4210 }, { "epoch": 6.09, "learning_rate": 8.679852066248594e-05, "loss": 0.0945, "step": 4211 }, { "epoch": 6.09, "learning_rate": 8.67663611513105e-05, "loss": 0.0078, "step": 4212 }, { "epoch": 6.09, "learning_rate": 8.673420164013508e-05, "loss": 0.0635, "step": 4213 }, { "epoch": 6.09, "learning_rate": 8.670204212895965e-05, "loss": 0.0618, "step": 4214 }, { "epoch": 6.1, "learning_rate": 8.666988261778422e-05, "loss": 0.0465, "step": 4215 }, { "epoch": 6.1, "learning_rate": 8.663772310660877e-05, "loss": 0.0021, "step": 4216 }, { "epoch": 6.1, "learning_rate": 8.660556359543336e-05, "loss": 0.0224, "step": 4217 }, { "epoch": 6.1, "learning_rate": 8.657340408425793e-05, "loss": 0.033, "step": 4218 }, { "epoch": 6.1, "learning_rate": 8.654124457308249e-05, "loss": 0.0623, "step": 4219 }, { "epoch": 6.1, "learning_rate": 8.650908506190706e-05, "loss": 0.0273, "step": 4220 }, { "epoch": 6.1, "learning_rate": 8.647692555073164e-05, "loss": 0.0501, "step": 4221 }, { "epoch": 6.11, "learning_rate": 8.64447660395562e-05, "loss": 0.0297, "step": 4222 }, { "epoch": 6.11, "learning_rate": 8.641260652838077e-05, "loss": 0.0002, "step": 4223 }, { "epoch": 6.11, "learning_rate": 8.638044701720535e-05, "loss": 0.0103, "step": 4224 }, { "epoch": 6.11, "learning_rate": 8.634828750602991e-05, "loss": 0.0701, "step": 4225 }, { "epoch": 6.11, "learning_rate": 8.631612799485448e-05, "loss": 0.0225, "step": 4226 }, { "epoch": 6.11, "learning_rate": 8.628396848367905e-05, "loss": 0.0564, "step": 4227 }, { "epoch": 6.11, "learning_rate": 8.625180897250362e-05, "loss": 0.0698, "step": 4228 }, { "epoch": 6.12, "learning_rate": 8.621964946132819e-05, "loss": 0.0581, "step": 4229 }, { "epoch": 6.12, "learning_rate": 8.618748995015276e-05, "loss": 0.0164, "step": 4230 }, { "epoch": 6.12, "learning_rate": 8.615533043897733e-05, "loss": 0.1338, "step": 4231 }, { "epoch": 6.12, "learning_rate": 8.61231709278019e-05, "loss": 0.1035, "step": 4232 }, { "epoch": 6.12, "learning_rate": 8.609101141662646e-05, "loss": 0.0101, "step": 4233 }, { "epoch": 6.12, "learning_rate": 8.605885190545104e-05, "loss": 0.0017, "step": 4234 }, { "epoch": 6.12, "learning_rate": 8.602669239427561e-05, "loss": 0.0366, "step": 4235 }, { "epoch": 6.13, "learning_rate": 8.599453288310017e-05, "loss": 0.0696, "step": 4236 }, { "epoch": 6.13, "learning_rate": 8.596237337192476e-05, "loss": 0.0594, "step": 4237 }, { "epoch": 6.13, "learning_rate": 8.593021386074933e-05, "loss": 0.0298, "step": 4238 }, { "epoch": 6.13, "learning_rate": 8.589805434957388e-05, "loss": 0.018, "step": 4239 }, { "epoch": 6.13, "learning_rate": 8.586589483839847e-05, "loss": 0.0037, "step": 4240 }, { "epoch": 6.13, "learning_rate": 8.583373532722302e-05, "loss": 0.0101, "step": 4241 }, { "epoch": 6.13, "learning_rate": 8.58015758160476e-05, "loss": 0.0196, "step": 4242 }, { "epoch": 6.14, "learning_rate": 8.576941630487218e-05, "loss": 0.0289, "step": 4243 }, { "epoch": 6.14, "learning_rate": 8.573725679369674e-05, "loss": 0.0147, "step": 4244 }, { "epoch": 6.14, "learning_rate": 8.57050972825213e-05, "loss": 0.0352, "step": 4245 }, { "epoch": 6.14, "learning_rate": 8.567293777134589e-05, "loss": 0.041, "step": 4246 }, { "epoch": 6.14, "learning_rate": 8.564077826017045e-05, "loss": 0.1059, "step": 4247 }, { "epoch": 6.14, "learning_rate": 8.560861874899502e-05, "loss": 0.006, "step": 4248 }, { "epoch": 6.14, "learning_rate": 8.557645923781959e-05, "loss": 0.0121, "step": 4249 }, { "epoch": 6.15, "learning_rate": 8.554429972664416e-05, "loss": 0.0084, "step": 4250 }, { "epoch": 6.15, "learning_rate": 8.551214021546873e-05, "loss": 0.0441, "step": 4251 }, { "epoch": 6.15, "learning_rate": 8.54799807042933e-05, "loss": 0.0009, "step": 4252 }, { "epoch": 6.15, "learning_rate": 8.544782119311787e-05, "loss": 0.0025, "step": 4253 }, { "epoch": 6.15, "learning_rate": 8.541566168194244e-05, "loss": 0.0052, "step": 4254 }, { "epoch": 6.15, "learning_rate": 8.538350217076701e-05, "loss": 0.0588, "step": 4255 }, { "epoch": 6.15, "learning_rate": 8.535134265959158e-05, "loss": 0.0263, "step": 4256 }, { "epoch": 6.16, "learning_rate": 8.531918314841615e-05, "loss": 0.0477, "step": 4257 }, { "epoch": 6.16, "learning_rate": 8.528702363724071e-05, "loss": 0.0952, "step": 4258 }, { "epoch": 6.16, "learning_rate": 8.52548641260653e-05, "loss": 0.0071, "step": 4259 }, { "epoch": 6.16, "learning_rate": 8.522270461488986e-05, "loss": 0.0804, "step": 4260 }, { "epoch": 6.16, "learning_rate": 8.519054510371442e-05, "loss": 0.0199, "step": 4261 }, { "epoch": 6.16, "learning_rate": 8.5158385592539e-05, "loss": 0.0429, "step": 4262 }, { "epoch": 6.16, "learning_rate": 8.512622608136358e-05, "loss": 0.0523, "step": 4263 }, { "epoch": 6.17, "learning_rate": 8.509406657018813e-05, "loss": 0.0538, "step": 4264 }, { "epoch": 6.17, "learning_rate": 8.50619070590127e-05, "loss": 0.0286, "step": 4265 }, { "epoch": 6.17, "learning_rate": 8.502974754783727e-05, "loss": 0.0084, "step": 4266 }, { "epoch": 6.17, "learning_rate": 8.499758803666184e-05, "loss": 0.0813, "step": 4267 }, { "epoch": 6.17, "learning_rate": 8.496542852548641e-05, "loss": 0.0345, "step": 4268 }, { "epoch": 6.17, "learning_rate": 8.493326901431098e-05, "loss": 0.1299, "step": 4269 }, { "epoch": 6.17, "learning_rate": 8.490110950313556e-05, "loss": 0.0033, "step": 4270 }, { "epoch": 6.18, "learning_rate": 8.486894999196013e-05, "loss": 0.0126, "step": 4271 }, { "epoch": 6.18, "learning_rate": 8.48367904807847e-05, "loss": 0.0123, "step": 4272 }, { "epoch": 6.18, "learning_rate": 8.480463096960927e-05, "loss": 0.0803, "step": 4273 }, { "epoch": 6.18, "learning_rate": 8.477247145843384e-05, "loss": 0.032, "step": 4274 }, { "epoch": 6.18, "learning_rate": 8.474031194725841e-05, "loss": 0.0294, "step": 4275 }, { "epoch": 6.18, "learning_rate": 8.470815243608298e-05, "loss": 0.0107, "step": 4276 }, { "epoch": 6.19, "learning_rate": 8.467599292490755e-05, "loss": 0.0504, "step": 4277 }, { "epoch": 6.19, "learning_rate": 8.46438334137321e-05, "loss": 0.0654, "step": 4278 }, { "epoch": 6.19, "learning_rate": 8.461167390255669e-05, "loss": 0.0236, "step": 4279 }, { "epoch": 6.19, "learning_rate": 8.457951439138126e-05, "loss": 0.1341, "step": 4280 }, { "epoch": 6.19, "learning_rate": 8.454735488020582e-05, "loss": 0.0062, "step": 4281 }, { "epoch": 6.19, "learning_rate": 8.45151953690304e-05, "loss": 0.0582, "step": 4282 }, { "epoch": 6.19, "learning_rate": 8.448303585785496e-05, "loss": 0.0496, "step": 4283 }, { "epoch": 6.2, "learning_rate": 8.445087634667953e-05, "loss": 0.0925, "step": 4284 }, { "epoch": 6.2, "learning_rate": 8.441871683550411e-05, "loss": 0.0791, "step": 4285 }, { "epoch": 6.2, "learning_rate": 8.438655732432867e-05, "loss": 0.0048, "step": 4286 }, { "epoch": 6.2, "learning_rate": 8.435439781315324e-05, "loss": 0.014, "step": 4287 }, { "epoch": 6.2, "learning_rate": 8.432223830197782e-05, "loss": 0.0074, "step": 4288 }, { "epoch": 6.2, "learning_rate": 8.429007879080238e-05, "loss": 0.0079, "step": 4289 }, { "epoch": 6.2, "learning_rate": 8.425791927962695e-05, "loss": 0.0145, "step": 4290 }, { "epoch": 6.21, "learning_rate": 8.422575976845152e-05, "loss": 0.0217, "step": 4291 }, { "epoch": 6.21, "learning_rate": 8.419360025727609e-05, "loss": 0.0053, "step": 4292 }, { "epoch": 6.21, "learning_rate": 8.416144074610066e-05, "loss": 0.0504, "step": 4293 }, { "epoch": 6.21, "learning_rate": 8.412928123492523e-05, "loss": 0.0085, "step": 4294 }, { "epoch": 6.21, "learning_rate": 8.40971217237498e-05, "loss": 0.0401, "step": 4295 }, { "epoch": 6.21, "learning_rate": 8.406496221257438e-05, "loss": 0.0022, "step": 4296 }, { "epoch": 6.21, "learning_rate": 8.403280270139893e-05, "loss": 0.0296, "step": 4297 }, { "epoch": 6.22, "learning_rate": 8.400064319022352e-05, "loss": 0.0032, "step": 4298 }, { "epoch": 6.22, "learning_rate": 8.396848367904809e-05, "loss": 0.0001, "step": 4299 }, { "epoch": 6.22, "learning_rate": 8.393632416787264e-05, "loss": 0.0699, "step": 4300 }, { "epoch": 6.22, "learning_rate": 8.390416465669723e-05, "loss": 0.0702, "step": 4301 }, { "epoch": 6.22, "learning_rate": 8.38720051455218e-05, "loss": 0.0135, "step": 4302 }, { "epoch": 6.22, "learning_rate": 8.383984563434636e-05, "loss": 0.0008, "step": 4303 }, { "epoch": 6.22, "learning_rate": 8.380768612317094e-05, "loss": 0.0453, "step": 4304 }, { "epoch": 6.23, "learning_rate": 8.37755266119955e-05, "loss": 0.0017, "step": 4305 }, { "epoch": 6.23, "learning_rate": 8.374336710082007e-05, "loss": 0.0894, "step": 4306 }, { "epoch": 6.23, "learning_rate": 8.371120758964465e-05, "loss": 0.0066, "step": 4307 }, { "epoch": 6.23, "learning_rate": 8.367904807846921e-05, "loss": 0.0378, "step": 4308 }, { "epoch": 6.23, "learning_rate": 8.364688856729378e-05, "loss": 0.1074, "step": 4309 }, { "epoch": 6.23, "learning_rate": 8.361472905611835e-05, "loss": 0.0005, "step": 4310 }, { "epoch": 6.23, "learning_rate": 8.358256954494292e-05, "loss": 0.0576, "step": 4311 }, { "epoch": 6.24, "learning_rate": 8.355041003376749e-05, "loss": 0.0828, "step": 4312 }, { "epoch": 6.24, "learning_rate": 8.351825052259206e-05, "loss": 0.0005, "step": 4313 }, { "epoch": 6.24, "learning_rate": 8.348609101141663e-05, "loss": 0.0073, "step": 4314 }, { "epoch": 6.24, "learning_rate": 8.34539315002412e-05, "loss": 0.1302, "step": 4315 }, { "epoch": 6.24, "learning_rate": 8.342177198906577e-05, "loss": 0.0004, "step": 4316 }, { "epoch": 6.24, "learning_rate": 8.338961247789034e-05, "loss": 0.0071, "step": 4317 }, { "epoch": 6.24, "learning_rate": 8.335745296671491e-05, "loss": 0.0519, "step": 4318 }, { "epoch": 6.25, "learning_rate": 8.332529345553948e-05, "loss": 0.0285, "step": 4319 }, { "epoch": 6.25, "learning_rate": 8.329313394436405e-05, "loss": 0.0077, "step": 4320 }, { "epoch": 6.25, "learning_rate": 8.326097443318862e-05, "loss": 0.0914, "step": 4321 }, { "epoch": 6.25, "learning_rate": 8.322881492201318e-05, "loss": 0.0055, "step": 4322 }, { "epoch": 6.25, "learning_rate": 8.319665541083775e-05, "loss": 0.063, "step": 4323 }, { "epoch": 6.25, "learning_rate": 8.316449589966234e-05, "loss": 0.023, "step": 4324 }, { "epoch": 6.25, "learning_rate": 8.313233638848689e-05, "loss": 0.0024, "step": 4325 }, { "epoch": 6.26, "learning_rate": 8.310017687731146e-05, "loss": 0.0941, "step": 4326 }, { "epoch": 6.26, "learning_rate": 8.306801736613605e-05, "loss": 0.0193, "step": 4327 }, { "epoch": 6.26, "learning_rate": 8.30358578549606e-05, "loss": 0.0004, "step": 4328 }, { "epoch": 6.26, "learning_rate": 8.300369834378517e-05, "loss": 0.0638, "step": 4329 }, { "epoch": 6.26, "learning_rate": 8.297153883260975e-05, "loss": 0.0648, "step": 4330 }, { "epoch": 6.26, "learning_rate": 8.293937932143432e-05, "loss": 0.0677, "step": 4331 }, { "epoch": 6.26, "learning_rate": 8.290721981025889e-05, "loss": 0.0474, "step": 4332 }, { "epoch": 6.27, "learning_rate": 8.287506029908346e-05, "loss": 0.0046, "step": 4333 }, { "epoch": 6.27, "learning_rate": 8.284290078790803e-05, "loss": 0.0063, "step": 4334 }, { "epoch": 6.27, "learning_rate": 8.28107412767326e-05, "loss": 0.045, "step": 4335 }, { "epoch": 6.27, "learning_rate": 8.277858176555717e-05, "loss": 0.0062, "step": 4336 }, { "epoch": 6.27, "learning_rate": 8.274642225438174e-05, "loss": 0.0335, "step": 4337 }, { "epoch": 6.27, "learning_rate": 8.271426274320631e-05, "loss": 0.0229, "step": 4338 }, { "epoch": 6.27, "learning_rate": 8.268210323203087e-05, "loss": 0.0336, "step": 4339 }, { "epoch": 6.28, "learning_rate": 8.264994372085545e-05, "loss": 0.0768, "step": 4340 }, { "epoch": 6.28, "learning_rate": 8.261778420968002e-05, "loss": 0.0133, "step": 4341 }, { "epoch": 6.28, "learning_rate": 8.258562469850458e-05, "loss": 0.001, "step": 4342 }, { "epoch": 6.28, "learning_rate": 8.255346518732916e-05, "loss": 0.0367, "step": 4343 }, { "epoch": 6.28, "learning_rate": 8.252130567615373e-05, "loss": 0.0186, "step": 4344 }, { "epoch": 6.28, "learning_rate": 8.248914616497829e-05, "loss": 0.0868, "step": 4345 }, { "epoch": 6.28, "learning_rate": 8.245698665380287e-05, "loss": 0.1079, "step": 4346 }, { "epoch": 6.29, "learning_rate": 8.242482714262743e-05, "loss": 0.1953, "step": 4347 }, { "epoch": 6.29, "learning_rate": 8.2392667631452e-05, "loss": 0.0463, "step": 4348 }, { "epoch": 6.29, "learning_rate": 8.236050812027659e-05, "loss": 0.0155, "step": 4349 }, { "epoch": 6.29, "learning_rate": 8.232834860910114e-05, "loss": 0.001, "step": 4350 }, { "epoch": 6.29, "learning_rate": 8.229618909792571e-05, "loss": 0.04, "step": 4351 }, { "epoch": 6.29, "learning_rate": 8.22640295867503e-05, "loss": 0.0059, "step": 4352 }, { "epoch": 6.3, "learning_rate": 8.223187007557485e-05, "loss": 0.0024, "step": 4353 }, { "epoch": 6.3, "learning_rate": 8.219971056439942e-05, "loss": 0.0022, "step": 4354 }, { "epoch": 6.3, "learning_rate": 8.2167551053224e-05, "loss": 0.0793, "step": 4355 }, { "epoch": 6.3, "learning_rate": 8.213539154204857e-05, "loss": 0.0113, "step": 4356 }, { "epoch": 6.3, "learning_rate": 8.210323203087314e-05, "loss": 0.0513, "step": 4357 }, { "epoch": 6.3, "learning_rate": 8.20710725196977e-05, "loss": 0.0318, "step": 4358 }, { "epoch": 6.3, "learning_rate": 8.203891300852228e-05, "loss": 0.0026, "step": 4359 }, { "epoch": 6.31, "learning_rate": 8.200675349734685e-05, "loss": 0.001, "step": 4360 }, { "epoch": 6.31, "learning_rate": 8.19745939861714e-05, "loss": 0.0275, "step": 4361 }, { "epoch": 6.31, "learning_rate": 8.194243447499599e-05, "loss": 0.0701, "step": 4362 }, { "epoch": 6.31, "learning_rate": 8.191027496382056e-05, "loss": 0.0083, "step": 4363 }, { "epoch": 6.31, "learning_rate": 8.187811545264512e-05, "loss": 0.0927, "step": 4364 }, { "epoch": 6.31, "learning_rate": 8.18459559414697e-05, "loss": 0.0031, "step": 4365 }, { "epoch": 6.31, "learning_rate": 8.181379643029427e-05, "loss": 0.0457, "step": 4366 }, { "epoch": 6.32, "learning_rate": 8.178163691911883e-05, "loss": 0.0011, "step": 4367 }, { "epoch": 6.32, "learning_rate": 8.17494774079434e-05, "loss": 0.0103, "step": 4368 }, { "epoch": 6.32, "learning_rate": 8.171731789676798e-05, "loss": 0.0547, "step": 4369 }, { "epoch": 6.32, "learning_rate": 8.168515838559254e-05, "loss": 0.0073, "step": 4370 }, { "epoch": 6.32, "learning_rate": 8.165299887441711e-05, "loss": 0.0151, "step": 4371 }, { "epoch": 6.32, "learning_rate": 8.162083936324168e-05, "loss": 0.0089, "step": 4372 }, { "epoch": 6.32, "learning_rate": 8.158867985206625e-05, "loss": 0.006, "step": 4373 }, { "epoch": 6.33, "learning_rate": 8.155652034089082e-05, "loss": 0.0598, "step": 4374 }, { "epoch": 6.33, "learning_rate": 8.152436082971539e-05, "loss": 0.0733, "step": 4375 }, { "epoch": 6.33, "learning_rate": 8.149220131853996e-05, "loss": 0.0135, "step": 4376 }, { "epoch": 6.33, "learning_rate": 8.146004180736453e-05, "loss": 0.0493, "step": 4377 }, { "epoch": 6.33, "learning_rate": 8.14278822961891e-05, "loss": 0.0075, "step": 4378 }, { "epoch": 6.33, "learning_rate": 8.139572278501367e-05, "loss": 0.0164, "step": 4379 }, { "epoch": 6.33, "learning_rate": 8.136356327383824e-05, "loss": 0.0086, "step": 4380 }, { "epoch": 6.34, "learning_rate": 8.133140376266281e-05, "loss": 0.0016, "step": 4381 }, { "epoch": 6.34, "learning_rate": 8.129924425148738e-05, "loss": 0.0186, "step": 4382 }, { "epoch": 6.34, "learning_rate": 8.126708474031196e-05, "loss": 0.01, "step": 4383 }, { "epoch": 6.34, "learning_rate": 8.123492522913651e-05, "loss": 0.0791, "step": 4384 }, { "epoch": 6.34, "learning_rate": 8.12027657179611e-05, "loss": 0.0284, "step": 4385 }, { "epoch": 6.34, "learning_rate": 8.117060620678565e-05, "loss": 0.0194, "step": 4386 }, { "epoch": 6.34, "learning_rate": 8.113844669561022e-05, "loss": 0.0123, "step": 4387 }, { "epoch": 6.35, "learning_rate": 8.110628718443481e-05, "loss": 0.0802, "step": 4388 }, { "epoch": 6.35, "learning_rate": 8.107412767325936e-05, "loss": 0.0127, "step": 4389 }, { "epoch": 6.35, "learning_rate": 8.104196816208394e-05, "loss": 0.0262, "step": 4390 }, { "epoch": 6.35, "learning_rate": 8.100980865090852e-05, "loss": 0.0229, "step": 4391 }, { "epoch": 6.35, "learning_rate": 8.097764913973308e-05, "loss": 0.0743, "step": 4392 }, { "epoch": 6.35, "learning_rate": 8.094548962855765e-05, "loss": 0.0089, "step": 4393 }, { "epoch": 6.35, "learning_rate": 8.091333011738223e-05, "loss": 0.0002, "step": 4394 }, { "epoch": 6.36, "learning_rate": 8.088117060620679e-05, "loss": 0.0024, "step": 4395 }, { "epoch": 6.36, "learning_rate": 8.084901109503136e-05, "loss": 0.0009, "step": 4396 }, { "epoch": 6.36, "learning_rate": 8.081685158385593e-05, "loss": 0.0021, "step": 4397 }, { "epoch": 6.36, "learning_rate": 8.07846920726805e-05, "loss": 0.0074, "step": 4398 }, { "epoch": 6.36, "learning_rate": 8.075253256150507e-05, "loss": 0.0005, "step": 4399 }, { "epoch": 6.36, "learning_rate": 8.072037305032964e-05, "loss": 0.0131, "step": 4400 }, { "epoch": 6.36, "learning_rate": 8.068821353915421e-05, "loss": 0.1079, "step": 4401 }, { "epoch": 6.37, "learning_rate": 8.065605402797878e-05, "loss": 0.0132, "step": 4402 }, { "epoch": 6.37, "learning_rate": 8.062389451680334e-05, "loss": 0.0193, "step": 4403 }, { "epoch": 6.37, "learning_rate": 8.059173500562792e-05, "loss": 0.0477, "step": 4404 }, { "epoch": 6.37, "learning_rate": 8.055957549445249e-05, "loss": 0.0054, "step": 4405 }, { "epoch": 6.37, "learning_rate": 8.052741598327705e-05, "loss": 0.0217, "step": 4406 }, { "epoch": 6.37, "learning_rate": 8.049525647210163e-05, "loss": 0.0745, "step": 4407 }, { "epoch": 6.37, "learning_rate": 8.04630969609262e-05, "loss": 0.0816, "step": 4408 }, { "epoch": 6.38, "learning_rate": 8.043093744975076e-05, "loss": 0.0045, "step": 4409 }, { "epoch": 6.38, "learning_rate": 8.039877793857535e-05, "loss": 0.0106, "step": 4410 }, { "epoch": 6.38, "learning_rate": 8.03666184273999e-05, "loss": 0.0001, "step": 4411 }, { "epoch": 6.38, "learning_rate": 8.033445891622447e-05, "loss": 0.0349, "step": 4412 }, { "epoch": 6.38, "learning_rate": 8.030229940504904e-05, "loss": 0.0467, "step": 4413 }, { "epoch": 6.38, "learning_rate": 8.027013989387361e-05, "loss": 0.0001, "step": 4414 }, { "epoch": 6.38, "learning_rate": 8.023798038269818e-05, "loss": 0.0014, "step": 4415 }, { "epoch": 6.39, "learning_rate": 8.020582087152276e-05, "loss": 0.0371, "step": 4416 }, { "epoch": 6.39, "learning_rate": 8.017366136034733e-05, "loss": 0.0438, "step": 4417 }, { "epoch": 6.39, "learning_rate": 8.01415018491719e-05, "loss": 0.0073, "step": 4418 }, { "epoch": 6.39, "learning_rate": 8.010934233799647e-05, "loss": 0.0071, "step": 4419 }, { "epoch": 6.39, "learning_rate": 8.007718282682104e-05, "loss": 0.1196, "step": 4420 }, { "epoch": 6.39, "learning_rate": 8.004502331564561e-05, "loss": 0.0181, "step": 4421 }, { "epoch": 6.39, "learning_rate": 8.001286380447018e-05, "loss": 0.0035, "step": 4422 }, { "epoch": 6.4, "learning_rate": 7.998070429329475e-05, "loss": 0.0035, "step": 4423 }, { "epoch": 6.4, "learning_rate": 7.994854478211932e-05, "loss": 0.046, "step": 4424 }, { "epoch": 6.4, "learning_rate": 7.991638527094389e-05, "loss": 0.0065, "step": 4425 }, { "epoch": 6.4, "learning_rate": 7.988422575976846e-05, "loss": 0.0001, "step": 4426 }, { "epoch": 6.4, "learning_rate": 7.985206624859303e-05, "loss": 0.0752, "step": 4427 }, { "epoch": 6.4, "learning_rate": 7.981990673741759e-05, "loss": 0.0214, "step": 4428 }, { "epoch": 6.4, "learning_rate": 7.978774722624216e-05, "loss": 0.0234, "step": 4429 }, { "epoch": 6.41, "learning_rate": 7.975558771506674e-05, "loss": 0.0464, "step": 4430 }, { "epoch": 6.41, "learning_rate": 7.97234282038913e-05, "loss": 0.0011, "step": 4431 }, { "epoch": 6.41, "learning_rate": 7.969126869271587e-05, "loss": 0.1292, "step": 4432 }, { "epoch": 6.41, "learning_rate": 7.965910918154045e-05, "loss": 0.0952, "step": 4433 }, { "epoch": 6.41, "learning_rate": 7.962694967036501e-05, "loss": 0.0064, "step": 4434 }, { "epoch": 6.41, "learning_rate": 7.959479015918958e-05, "loss": 0.0521, "step": 4435 }, { "epoch": 6.42, "learning_rate": 7.956263064801415e-05, "loss": 0.0292, "step": 4436 }, { "epoch": 6.42, "learning_rate": 7.953047113683872e-05, "loss": 0.0275, "step": 4437 }, { "epoch": 6.42, "learning_rate": 7.949831162566329e-05, "loss": 0.0, "step": 4438 }, { "epoch": 6.42, "learning_rate": 7.946615211448786e-05, "loss": 0.0027, "step": 4439 }, { "epoch": 6.42, "learning_rate": 7.943399260331243e-05, "loss": 0.0023, "step": 4440 }, { "epoch": 6.42, "learning_rate": 7.9401833092137e-05, "loss": 0.001, "step": 4441 }, { "epoch": 6.42, "learning_rate": 7.936967358096156e-05, "loss": 0.0147, "step": 4442 }, { "epoch": 6.43, "learning_rate": 7.933751406978615e-05, "loss": 0.0153, "step": 4443 }, { "epoch": 6.43, "learning_rate": 7.930535455861072e-05, "loss": 0.0002, "step": 4444 }, { "epoch": 6.43, "learning_rate": 7.927319504743527e-05, "loss": 0.0118, "step": 4445 }, { "epoch": 6.43, "learning_rate": 7.924103553625986e-05, "loss": 0.0002, "step": 4446 }, { "epoch": 6.43, "learning_rate": 7.920887602508443e-05, "loss": 0.0009, "step": 4447 }, { "epoch": 6.43, "learning_rate": 7.917671651390898e-05, "loss": 0.0015, "step": 4448 }, { "epoch": 6.43, "learning_rate": 7.914455700273357e-05, "loss": 0.0018, "step": 4449 }, { "epoch": 6.44, "learning_rate": 7.911239749155813e-05, "loss": 0.0061, "step": 4450 }, { "epoch": 6.44, "learning_rate": 7.90802379803827e-05, "loss": 0.0474, "step": 4451 }, { "epoch": 6.44, "learning_rate": 7.904807846920728e-05, "loss": 0.0292, "step": 4452 }, { "epoch": 6.44, "learning_rate": 7.901591895803184e-05, "loss": 0.0728, "step": 4453 }, { "epoch": 6.44, "learning_rate": 7.898375944685641e-05, "loss": 0.0003, "step": 4454 }, { "epoch": 6.44, "learning_rate": 7.895159993568099e-05, "loss": 0.0226, "step": 4455 }, { "epoch": 6.44, "learning_rate": 7.891944042450555e-05, "loss": 0.0645, "step": 4456 }, { "epoch": 6.45, "learning_rate": 7.888728091333012e-05, "loss": 0.0034, "step": 4457 }, { "epoch": 6.45, "learning_rate": 7.885512140215469e-05, "loss": 0.0038, "step": 4458 }, { "epoch": 6.45, "learning_rate": 7.882296189097926e-05, "loss": 0.1216, "step": 4459 }, { "epoch": 6.45, "learning_rate": 7.879080237980383e-05, "loss": 0.0427, "step": 4460 }, { "epoch": 6.45, "learning_rate": 7.87586428686284e-05, "loss": 0.0014, "step": 4461 }, { "epoch": 6.45, "learning_rate": 7.872648335745297e-05, "loss": 0.0203, "step": 4462 }, { "epoch": 6.45, "learning_rate": 7.869432384627754e-05, "loss": 0.0763, "step": 4463 }, { "epoch": 6.46, "learning_rate": 7.866216433510211e-05, "loss": 0.0491, "step": 4464 }, { "epoch": 6.46, "learning_rate": 7.863000482392668e-05, "loss": 0.0252, "step": 4465 }, { "epoch": 6.46, "learning_rate": 7.859784531275125e-05, "loss": 0.0003, "step": 4466 }, { "epoch": 6.46, "learning_rate": 7.856568580157581e-05, "loss": 0.0079, "step": 4467 }, { "epoch": 6.46, "learning_rate": 7.85335262904004e-05, "loss": 0.167, "step": 4468 }, { "epoch": 6.46, "learning_rate": 7.850136677922497e-05, "loss": 0.0, "step": 4469 }, { "epoch": 6.46, "learning_rate": 7.846920726804952e-05, "loss": 0.1235, "step": 4470 }, { "epoch": 6.47, "learning_rate": 7.84370477568741e-05, "loss": 0.0901, "step": 4471 }, { "epoch": 6.47, "learning_rate": 7.840488824569868e-05, "loss": 0.0669, "step": 4472 }, { "epoch": 6.47, "learning_rate": 7.837272873452323e-05, "loss": 0.0008, "step": 4473 }, { "epoch": 6.47, "learning_rate": 7.83405692233478e-05, "loss": 0.0197, "step": 4474 }, { "epoch": 6.47, "learning_rate": 7.830840971217237e-05, "loss": 0.0582, "step": 4475 }, { "epoch": 6.47, "learning_rate": 7.827625020099695e-05, "loss": 0.0068, "step": 4476 }, { "epoch": 6.47, "learning_rate": 7.824409068982152e-05, "loss": 0.0044, "step": 4477 }, { "epoch": 6.48, "learning_rate": 7.821193117864609e-05, "loss": 0.0002, "step": 4478 }, { "epoch": 6.48, "learning_rate": 7.817977166747066e-05, "loss": 0.0459, "step": 4479 }, { "epoch": 6.48, "learning_rate": 7.814761215629523e-05, "loss": 0.0461, "step": 4480 }, { "epoch": 6.48, "learning_rate": 7.81154526451198e-05, "loss": 0.0094, "step": 4481 }, { "epoch": 6.48, "learning_rate": 7.808329313394437e-05, "loss": 0.0052, "step": 4482 }, { "epoch": 6.48, "learning_rate": 7.805113362276894e-05, "loss": 0.0027, "step": 4483 }, { "epoch": 6.48, "learning_rate": 7.801897411159351e-05, "loss": 0.0523, "step": 4484 }, { "epoch": 6.49, "learning_rate": 7.798681460041808e-05, "loss": 0.0139, "step": 4485 }, { "epoch": 6.49, "learning_rate": 7.795465508924265e-05, "loss": 0.1003, "step": 4486 }, { "epoch": 6.49, "learning_rate": 7.792249557806721e-05, "loss": 0.0558, "step": 4487 }, { "epoch": 6.49, "learning_rate": 7.789033606689179e-05, "loss": 0.0277, "step": 4488 }, { "epoch": 6.49, "learning_rate": 7.785817655571636e-05, "loss": 0.0651, "step": 4489 }, { "epoch": 6.49, "learning_rate": 7.782601704454092e-05, "loss": 0.045, "step": 4490 }, { "epoch": 6.49, "learning_rate": 7.77938575333655e-05, "loss": 0.0139, "step": 4491 }, { "epoch": 6.5, "learning_rate": 7.776169802219006e-05, "loss": 0.012, "step": 4492 }, { "epoch": 6.5, "learning_rate": 7.772953851101463e-05, "loss": 0.0135, "step": 4493 }, { "epoch": 6.5, "learning_rate": 7.769737899983921e-05, "loss": 0.0748, "step": 4494 }, { "epoch": 6.5, "learning_rate": 7.766521948866377e-05, "loss": 0.0501, "step": 4495 }, { "epoch": 6.5, "learning_rate": 7.763305997748834e-05, "loss": 0.1151, "step": 4496 }, { "epoch": 6.5, "learning_rate": 7.760090046631293e-05, "loss": 0.0025, "step": 4497 }, { "epoch": 6.5, "learning_rate": 7.756874095513748e-05, "loss": 0.0103, "step": 4498 }, { "epoch": 6.51, "learning_rate": 7.753658144396205e-05, "loss": 0.0481, "step": 4499 }, { "epoch": 6.51, "learning_rate": 7.750442193278662e-05, "loss": 0.0282, "step": 4500 }, { "epoch": 6.51, "learning_rate": 7.74722624216112e-05, "loss": 0.0801, "step": 4501 }, { "epoch": 6.51, "learning_rate": 7.744010291043577e-05, "loss": 0.0289, "step": 4502 }, { "epoch": 6.51, "learning_rate": 7.740794339926034e-05, "loss": 0.019, "step": 4503 }, { "epoch": 6.51, "learning_rate": 7.73757838880849e-05, "loss": 0.023, "step": 4504 }, { "epoch": 6.51, "learning_rate": 7.734362437690948e-05, "loss": 0.1052, "step": 4505 }, { "epoch": 6.52, "learning_rate": 7.731146486573403e-05, "loss": 0.0067, "step": 4506 }, { "epoch": 6.52, "learning_rate": 7.727930535455862e-05, "loss": 0.0665, "step": 4507 }, { "epoch": 6.52, "learning_rate": 7.724714584338319e-05, "loss": 0.0075, "step": 4508 }, { "epoch": 6.52, "learning_rate": 7.721498633220775e-05, "loss": 0.0453, "step": 4509 }, { "epoch": 6.52, "learning_rate": 7.718282682103233e-05, "loss": 0.0812, "step": 4510 }, { "epoch": 6.52, "learning_rate": 7.71506673098569e-05, "loss": 0.0689, "step": 4511 }, { "epoch": 6.52, "learning_rate": 7.711850779868146e-05, "loss": 0.0284, "step": 4512 }, { "epoch": 6.53, "learning_rate": 7.708634828750604e-05, "loss": 0.0954, "step": 4513 }, { "epoch": 6.53, "learning_rate": 7.705418877633061e-05, "loss": 0.0034, "step": 4514 }, { "epoch": 6.53, "learning_rate": 7.702202926515517e-05, "loss": 0.0424, "step": 4515 }, { "epoch": 6.53, "learning_rate": 7.698986975397975e-05, "loss": 0.0119, "step": 4516 }, { "epoch": 6.53, "learning_rate": 7.695771024280431e-05, "loss": 0.0236, "step": 4517 }, { "epoch": 6.53, "learning_rate": 7.692555073162888e-05, "loss": 0.0135, "step": 4518 }, { "epoch": 6.54, "learning_rate": 7.689339122045345e-05, "loss": 0.0138, "step": 4519 }, { "epoch": 6.54, "learning_rate": 7.686123170927802e-05, "loss": 0.064, "step": 4520 }, { "epoch": 6.54, "learning_rate": 7.682907219810259e-05, "loss": 0.0139, "step": 4521 }, { "epoch": 6.54, "learning_rate": 7.679691268692716e-05, "loss": 0.0964, "step": 4522 }, { "epoch": 6.54, "learning_rate": 7.676475317575173e-05, "loss": 0.0704, "step": 4523 }, { "epoch": 6.54, "learning_rate": 7.67325936645763e-05, "loss": 0.0048, "step": 4524 }, { "epoch": 6.54, "learning_rate": 7.670043415340087e-05, "loss": 0.0054, "step": 4525 }, { "epoch": 6.55, "learning_rate": 7.666827464222544e-05, "loss": 0.0083, "step": 4526 }, { "epoch": 6.55, "learning_rate": 7.663611513105001e-05, "loss": 0.0113, "step": 4527 }, { "epoch": 6.55, "learning_rate": 7.660395561987458e-05, "loss": 0.0542, "step": 4528 }, { "epoch": 6.55, "learning_rate": 7.657179610869916e-05, "loss": 0.0802, "step": 4529 }, { "epoch": 6.55, "learning_rate": 7.653963659752373e-05, "loss": 0.0642, "step": 4530 }, { "epoch": 6.55, "learning_rate": 7.650747708634828e-05, "loss": 0.0169, "step": 4531 }, { "epoch": 6.55, "learning_rate": 7.647531757517285e-05, "loss": 0.0106, "step": 4532 }, { "epoch": 6.56, "learning_rate": 7.644315806399744e-05, "loss": 0.0151, "step": 4533 }, { "epoch": 6.56, "learning_rate": 7.6410998552822e-05, "loss": 0.0406, "step": 4534 }, { "epoch": 6.56, "learning_rate": 7.637883904164656e-05, "loss": 0.1053, "step": 4535 }, { "epoch": 6.56, "learning_rate": 7.634667953047115e-05, "loss": 0.0271, "step": 4536 }, { "epoch": 6.56, "learning_rate": 7.63145200192957e-05, "loss": 0.065, "step": 4537 }, { "epoch": 6.56, "learning_rate": 7.628236050812028e-05, "loss": 0.0002, "step": 4538 }, { "epoch": 6.56, "learning_rate": 7.625020099694486e-05, "loss": 0.0608, "step": 4539 }, { "epoch": 6.57, "learning_rate": 7.621804148576942e-05, "loss": 0.0161, "step": 4540 }, { "epoch": 6.57, "learning_rate": 7.618588197459399e-05, "loss": 0.0107, "step": 4541 }, { "epoch": 6.57, "learning_rate": 7.615372246341856e-05, "loss": 0.0091, "step": 4542 }, { "epoch": 6.57, "learning_rate": 7.612156295224313e-05, "loss": 0.0464, "step": 4543 }, { "epoch": 6.57, "learning_rate": 7.60894034410677e-05, "loss": 0.0938, "step": 4544 }, { "epoch": 6.57, "learning_rate": 7.605724392989227e-05, "loss": 0.051, "step": 4545 }, { "epoch": 6.57, "learning_rate": 7.602508441871684e-05, "loss": 0.044, "step": 4546 }, { "epoch": 6.58, "learning_rate": 7.599292490754141e-05, "loss": 0.0054, "step": 4547 }, { "epoch": 6.58, "learning_rate": 7.596076539636597e-05, "loss": 0.051, "step": 4548 }, { "epoch": 6.58, "learning_rate": 7.592860588519055e-05, "loss": 0.0141, "step": 4549 }, { "epoch": 6.58, "learning_rate": 7.589644637401512e-05, "loss": 0.0857, "step": 4550 }, { "epoch": 6.58, "learning_rate": 7.586428686283968e-05, "loss": 0.1123, "step": 4551 }, { "epoch": 6.58, "learning_rate": 7.583212735166426e-05, "loss": 0.0249, "step": 4552 }, { "epoch": 6.58, "learning_rate": 7.579996784048883e-05, "loss": 0.0944, "step": 4553 }, { "epoch": 6.59, "learning_rate": 7.576780832931339e-05, "loss": 0.0021, "step": 4554 }, { "epoch": 6.59, "learning_rate": 7.573564881813798e-05, "loss": 0.0195, "step": 4555 }, { "epoch": 6.59, "learning_rate": 7.570348930696253e-05, "loss": 0.0061, "step": 4556 }, { "epoch": 6.59, "learning_rate": 7.56713297957871e-05, "loss": 0.0201, "step": 4557 }, { "epoch": 6.59, "learning_rate": 7.563917028461169e-05, "loss": 0.0694, "step": 4558 }, { "epoch": 6.59, "learning_rate": 7.560701077343624e-05, "loss": 0.0281, "step": 4559 }, { "epoch": 6.59, "learning_rate": 7.557485126226081e-05, "loss": 0.0273, "step": 4560 }, { "epoch": 6.6, "learning_rate": 7.55426917510854e-05, "loss": 0.0342, "step": 4561 }, { "epoch": 6.6, "learning_rate": 7.551053223990996e-05, "loss": 0.0615, "step": 4562 }, { "epoch": 6.6, "learning_rate": 7.547837272873453e-05, "loss": 0.0567, "step": 4563 }, { "epoch": 6.6, "learning_rate": 7.54462132175591e-05, "loss": 0.0157, "step": 4564 }, { "epoch": 6.6, "learning_rate": 7.541405370638367e-05, "loss": 0.0563, "step": 4565 }, { "epoch": 6.6, "learning_rate": 7.538189419520824e-05, "loss": 0.0012, "step": 4566 }, { "epoch": 6.6, "learning_rate": 7.534973468403281e-05, "loss": 0.0142, "step": 4567 }, { "epoch": 6.61, "learning_rate": 7.531757517285738e-05, "loss": 0.0049, "step": 4568 }, { "epoch": 6.61, "learning_rate": 7.528541566168195e-05, "loss": 0.0049, "step": 4569 }, { "epoch": 6.61, "learning_rate": 7.525325615050652e-05, "loss": 0.0144, "step": 4570 }, { "epoch": 6.61, "learning_rate": 7.522109663933109e-05, "loss": 0.0506, "step": 4571 }, { "epoch": 6.61, "learning_rate": 7.518893712815566e-05, "loss": 0.0471, "step": 4572 }, { "epoch": 6.61, "learning_rate": 7.515677761698022e-05, "loss": 0.0414, "step": 4573 }, { "epoch": 6.61, "learning_rate": 7.51246181058048e-05, "loss": 0.0284, "step": 4574 }, { "epoch": 6.62, "learning_rate": 7.509245859462937e-05, "loss": 0.0346, "step": 4575 }, { "epoch": 6.62, "learning_rate": 7.506029908345393e-05, "loss": 0.0914, "step": 4576 }, { "epoch": 6.62, "learning_rate": 7.50281395722785e-05, "loss": 0.0075, "step": 4577 }, { "epoch": 6.62, "learning_rate": 7.499598006110308e-05, "loss": 0.016, "step": 4578 }, { "epoch": 6.62, "learning_rate": 7.496382054992764e-05, "loss": 0.0263, "step": 4579 }, { "epoch": 6.62, "learning_rate": 7.493166103875221e-05, "loss": 0.0229, "step": 4580 }, { "epoch": 6.62, "learning_rate": 7.489950152757678e-05, "loss": 0.0088, "step": 4581 }, { "epoch": 6.63, "learning_rate": 7.486734201640135e-05, "loss": 0.0308, "step": 4582 }, { "epoch": 6.63, "learning_rate": 7.483518250522592e-05, "loss": 0.0169, "step": 4583 }, { "epoch": 6.63, "learning_rate": 7.480302299405049e-05, "loss": 0.0702, "step": 4584 }, { "epoch": 6.63, "learning_rate": 7.477086348287506e-05, "loss": 0.0084, "step": 4585 }, { "epoch": 6.63, "learning_rate": 7.473870397169963e-05, "loss": 0.0199, "step": 4586 }, { "epoch": 6.63, "learning_rate": 7.47065444605242e-05, "loss": 0.0009, "step": 4587 }, { "epoch": 6.63, "learning_rate": 7.467438494934877e-05, "loss": 0.0122, "step": 4588 }, { "epoch": 6.64, "learning_rate": 7.464222543817335e-05, "loss": 0.0718, "step": 4589 }, { "epoch": 6.64, "learning_rate": 7.461006592699792e-05, "loss": 0.0271, "step": 4590 }, { "epoch": 6.64, "learning_rate": 7.457790641582249e-05, "loss": 0.0028, "step": 4591 }, { "epoch": 6.64, "learning_rate": 7.454574690464706e-05, "loss": 0.0333, "step": 4592 }, { "epoch": 6.64, "learning_rate": 7.451358739347161e-05, "loss": 0.036, "step": 4593 }, { "epoch": 6.64, "learning_rate": 7.44814278822962e-05, "loss": 0.072, "step": 4594 }, { "epoch": 6.64, "learning_rate": 7.444926837112075e-05, "loss": 0.046, "step": 4595 }, { "epoch": 6.65, "learning_rate": 7.441710885994533e-05, "loss": 0.0512, "step": 4596 }, { "epoch": 6.65, "learning_rate": 7.438494934876991e-05, "loss": 0.0503, "step": 4597 }, { "epoch": 6.65, "learning_rate": 7.435278983759447e-05, "loss": 0.0047, "step": 4598 }, { "epoch": 6.65, "learning_rate": 7.432063032641904e-05, "loss": 0.0022, "step": 4599 }, { "epoch": 6.65, "learning_rate": 7.428847081524362e-05, "loss": 0.0164, "step": 4600 }, { "epoch": 6.65, "learning_rate": 7.425631130406818e-05, "loss": 0.0479, "step": 4601 }, { "epoch": 6.66, "learning_rate": 7.422415179289275e-05, "loss": 0.0333, "step": 4602 }, { "epoch": 6.66, "learning_rate": 7.419199228171733e-05, "loss": 0.1096, "step": 4603 }, { "epoch": 6.66, "learning_rate": 7.415983277054189e-05, "loss": 0.0249, "step": 4604 }, { "epoch": 6.66, "learning_rate": 7.412767325936646e-05, "loss": 0.0168, "step": 4605 }, { "epoch": 6.66, "learning_rate": 7.409551374819103e-05, "loss": 0.0212, "step": 4606 }, { "epoch": 6.66, "learning_rate": 7.40633542370156e-05, "loss": 0.1777, "step": 4607 }, { "epoch": 6.66, "learning_rate": 7.403119472584017e-05, "loss": 0.008, "step": 4608 }, { "epoch": 6.67, "learning_rate": 7.399903521466474e-05, "loss": 0.013, "step": 4609 }, { "epoch": 6.67, "learning_rate": 7.396687570348931e-05, "loss": 0.0514, "step": 4610 }, { "epoch": 6.67, "learning_rate": 7.393471619231388e-05, "loss": 0.0133, "step": 4611 }, { "epoch": 6.67, "learning_rate": 7.390255668113844e-05, "loss": 0.0227, "step": 4612 }, { "epoch": 6.67, "learning_rate": 7.387039716996302e-05, "loss": 0.0102, "step": 4613 }, { "epoch": 6.67, "learning_rate": 7.38382376587876e-05, "loss": 0.0007, "step": 4614 }, { "epoch": 6.67, "learning_rate": 7.380607814761215e-05, "loss": 0.0169, "step": 4615 }, { "epoch": 6.68, "learning_rate": 7.377391863643674e-05, "loss": 0.0243, "step": 4616 }, { "epoch": 6.68, "learning_rate": 7.37417591252613e-05, "loss": 0.0068, "step": 4617 }, { "epoch": 6.68, "learning_rate": 7.370959961408586e-05, "loss": 0.0594, "step": 4618 }, { "epoch": 6.68, "learning_rate": 7.367744010291045e-05, "loss": 0.0581, "step": 4619 }, { "epoch": 6.68, "learning_rate": 7.3645280591735e-05, "loss": 0.0069, "step": 4620 }, { "epoch": 6.68, "learning_rate": 7.361312108055957e-05, "loss": 0.0546, "step": 4621 }, { "epoch": 6.68, "learning_rate": 7.358096156938415e-05, "loss": 0.0106, "step": 4622 }, { "epoch": 6.69, "learning_rate": 7.354880205820872e-05, "loss": 0.0549, "step": 4623 }, { "epoch": 6.69, "learning_rate": 7.351664254703329e-05, "loss": 0.0047, "step": 4624 }, { "epoch": 6.69, "learning_rate": 7.348448303585786e-05, "loss": 0.0148, "step": 4625 }, { "epoch": 6.69, "learning_rate": 7.345232352468243e-05, "loss": 0.0362, "step": 4626 }, { "epoch": 6.69, "learning_rate": 7.3420164013507e-05, "loss": 0.0633, "step": 4627 }, { "epoch": 6.69, "learning_rate": 7.338800450233157e-05, "loss": 0.1009, "step": 4628 }, { "epoch": 6.69, "learning_rate": 7.335584499115614e-05, "loss": 0.0018, "step": 4629 }, { "epoch": 6.7, "learning_rate": 7.332368547998071e-05, "loss": 0.0577, "step": 4630 }, { "epoch": 6.7, "learning_rate": 7.329152596880528e-05, "loss": 0.0076, "step": 4631 }, { "epoch": 6.7, "learning_rate": 7.325936645762985e-05, "loss": 0.0031, "step": 4632 }, { "epoch": 6.7, "learning_rate": 7.322720694645442e-05, "loss": 0.0001, "step": 4633 }, { "epoch": 6.7, "learning_rate": 7.319504743527899e-05, "loss": 0.003, "step": 4634 }, { "epoch": 6.7, "learning_rate": 7.316288792410356e-05, "loss": 0.0002, "step": 4635 }, { "epoch": 6.7, "learning_rate": 7.313072841292813e-05, "loss": 0.0374, "step": 4636 }, { "epoch": 6.71, "learning_rate": 7.309856890175269e-05, "loss": 0.0646, "step": 4637 }, { "epoch": 6.71, "learning_rate": 7.306640939057726e-05, "loss": 0.0099, "step": 4638 }, { "epoch": 6.71, "learning_rate": 7.303424987940184e-05, "loss": 0.0033, "step": 4639 }, { "epoch": 6.71, "learning_rate": 7.30020903682264e-05, "loss": 0.0821, "step": 4640 }, { "epoch": 6.71, "learning_rate": 7.296993085705097e-05, "loss": 0.0121, "step": 4641 }, { "epoch": 6.71, "learning_rate": 7.293777134587556e-05, "loss": 0.0446, "step": 4642 }, { "epoch": 6.71, "learning_rate": 7.290561183470011e-05, "loss": 0.0641, "step": 4643 }, { "epoch": 6.72, "learning_rate": 7.287345232352468e-05, "loss": 0.0153, "step": 4644 }, { "epoch": 6.72, "learning_rate": 7.284129281234925e-05, "loss": 0.0239, "step": 4645 }, { "epoch": 6.72, "learning_rate": 7.280913330117382e-05, "loss": 0.0003, "step": 4646 }, { "epoch": 6.72, "learning_rate": 7.27769737899984e-05, "loss": 0.0094, "step": 4647 }, { "epoch": 6.72, "learning_rate": 7.274481427882297e-05, "loss": 0.0095, "step": 4648 }, { "epoch": 6.72, "learning_rate": 7.271265476764754e-05, "loss": 0.074, "step": 4649 }, { "epoch": 6.72, "learning_rate": 7.26804952564721e-05, "loss": 0.0049, "step": 4650 }, { "epoch": 6.73, "learning_rate": 7.264833574529666e-05, "loss": 0.0024, "step": 4651 }, { "epoch": 6.73, "learning_rate": 7.261617623412125e-05, "loss": 0.0071, "step": 4652 }, { "epoch": 6.73, "learning_rate": 7.258401672294582e-05, "loss": 0.0026, "step": 4653 }, { "epoch": 6.73, "learning_rate": 7.255185721177037e-05, "loss": 0.1013, "step": 4654 }, { "epoch": 6.73, "learning_rate": 7.251969770059496e-05, "loss": 0.0128, "step": 4655 }, { "epoch": 6.73, "learning_rate": 7.248753818941953e-05, "loss": 0.0542, "step": 4656 }, { "epoch": 6.73, "learning_rate": 7.245537867824409e-05, "loss": 0.1428, "step": 4657 }, { "epoch": 6.74, "learning_rate": 7.242321916706867e-05, "loss": 0.0079, "step": 4658 }, { "epoch": 6.74, "learning_rate": 7.239105965589324e-05, "loss": 0.0426, "step": 4659 }, { "epoch": 6.74, "learning_rate": 7.23589001447178e-05, "loss": 0.0059, "step": 4660 }, { "epoch": 6.74, "learning_rate": 7.232674063354238e-05, "loss": 0.0161, "step": 4661 }, { "epoch": 6.74, "learning_rate": 7.229458112236694e-05, "loss": 0.0059, "step": 4662 }, { "epoch": 6.74, "learning_rate": 7.226242161119151e-05, "loss": 0.0038, "step": 4663 }, { "epoch": 6.74, "learning_rate": 7.223026210001609e-05, "loss": 0.0119, "step": 4664 }, { "epoch": 6.75, "learning_rate": 7.219810258884065e-05, "loss": 0.0133, "step": 4665 }, { "epoch": 6.75, "learning_rate": 7.216594307766522e-05, "loss": 0.0786, "step": 4666 }, { "epoch": 6.75, "learning_rate": 7.213378356648979e-05, "loss": 0.0001, "step": 4667 }, { "epoch": 6.75, "learning_rate": 7.210162405531436e-05, "loss": 0.0483, "step": 4668 }, { "epoch": 6.75, "learning_rate": 7.206946454413893e-05, "loss": 0.0496, "step": 4669 }, { "epoch": 6.75, "learning_rate": 7.20373050329635e-05, "loss": 0.0044, "step": 4670 }, { "epoch": 6.75, "learning_rate": 7.200514552178807e-05, "loss": 0.0033, "step": 4671 }, { "epoch": 6.76, "learning_rate": 7.197298601061264e-05, "loss": 0.0078, "step": 4672 }, { "epoch": 6.76, "learning_rate": 7.194082649943721e-05, "loss": 0.075, "step": 4673 }, { "epoch": 6.76, "learning_rate": 7.190866698826178e-05, "loss": 0.0043, "step": 4674 }, { "epoch": 6.76, "learning_rate": 7.187650747708636e-05, "loss": 0.0036, "step": 4675 }, { "epoch": 6.76, "learning_rate": 7.184434796591091e-05, "loss": 0.0154, "step": 4676 }, { "epoch": 6.76, "learning_rate": 7.18121884547355e-05, "loss": 0.034, "step": 4677 }, { "epoch": 6.77, "learning_rate": 7.178002894356007e-05, "loss": 0.0288, "step": 4678 }, { "epoch": 6.77, "learning_rate": 7.174786943238462e-05, "loss": 0.0279, "step": 4679 }, { "epoch": 6.77, "learning_rate": 7.171570992120921e-05, "loss": 0.0348, "step": 4680 }, { "epoch": 6.77, "learning_rate": 7.168355041003378e-05, "loss": 0.0079, "step": 4681 }, { "epoch": 6.77, "learning_rate": 7.165139089885834e-05, "loss": 0.0018, "step": 4682 }, { "epoch": 6.77, "learning_rate": 7.16192313876829e-05, "loss": 0.0883, "step": 4683 }, { "epoch": 6.77, "learning_rate": 7.158707187650749e-05, "loss": 0.0184, "step": 4684 }, { "epoch": 6.78, "learning_rate": 7.155491236533205e-05, "loss": 0.002, "step": 4685 }, { "epoch": 6.78, "learning_rate": 7.152275285415662e-05, "loss": 0.0056, "step": 4686 }, { "epoch": 6.78, "learning_rate": 7.149059334298119e-05, "loss": 0.0005, "step": 4687 }, { "epoch": 6.78, "learning_rate": 7.145843383180576e-05, "loss": 0.0377, "step": 4688 }, { "epoch": 6.78, "learning_rate": 7.142627432063033e-05, "loss": 0.0309, "step": 4689 }, { "epoch": 6.78, "learning_rate": 7.13941148094549e-05, "loss": 0.0191, "step": 4690 }, { "epoch": 6.78, "learning_rate": 7.136195529827947e-05, "loss": 0.0563, "step": 4691 }, { "epoch": 6.79, "learning_rate": 7.132979578710404e-05, "loss": 0.0153, "step": 4692 }, { "epoch": 6.79, "learning_rate": 7.129763627592861e-05, "loss": 0.005, "step": 4693 }, { "epoch": 6.79, "learning_rate": 7.126547676475318e-05, "loss": 0.0061, "step": 4694 }, { "epoch": 6.79, "learning_rate": 7.123331725357775e-05, "loss": 0.0065, "step": 4695 }, { "epoch": 6.79, "learning_rate": 7.120115774240231e-05, "loss": 0.0004, "step": 4696 }, { "epoch": 6.79, "learning_rate": 7.116899823122689e-05, "loss": 0.0078, "step": 4697 }, { "epoch": 6.79, "learning_rate": 7.113683872005146e-05, "loss": 0.019, "step": 4698 }, { "epoch": 6.8, "learning_rate": 7.110467920887602e-05, "loss": 0.0038, "step": 4699 }, { "epoch": 6.8, "learning_rate": 7.10725196977006e-05, "loss": 0.0596, "step": 4700 }, { "epoch": 6.8, "learning_rate": 7.104036018652516e-05, "loss": 0.0063, "step": 4701 }, { "epoch": 6.8, "learning_rate": 7.100820067534973e-05, "loss": 0.0576, "step": 4702 }, { "epoch": 6.8, "learning_rate": 7.097604116417432e-05, "loss": 0.0342, "step": 4703 }, { "epoch": 6.8, "learning_rate": 7.094388165299887e-05, "loss": 0.0048, "step": 4704 }, { "epoch": 6.8, "learning_rate": 7.091172214182344e-05, "loss": 0.0444, "step": 4705 }, { "epoch": 6.81, "learning_rate": 7.087956263064803e-05, "loss": 0.0374, "step": 4706 }, { "epoch": 6.81, "learning_rate": 7.084740311947258e-05, "loss": 0.0515, "step": 4707 }, { "epoch": 6.81, "learning_rate": 7.081524360829716e-05, "loss": 0.0452, "step": 4708 }, { "epoch": 6.81, "learning_rate": 7.078308409712173e-05, "loss": 0.0325, "step": 4709 }, { "epoch": 6.81, "learning_rate": 7.07509245859463e-05, "loss": 0.0079, "step": 4710 }, { "epoch": 6.81, "learning_rate": 7.071876507477087e-05, "loss": 0.0735, "step": 4711 }, { "epoch": 6.81, "learning_rate": 7.068660556359544e-05, "loss": 0.125, "step": 4712 }, { "epoch": 6.82, "learning_rate": 7.065444605242001e-05, "loss": 0.0323, "step": 4713 }, { "epoch": 6.82, "learning_rate": 7.062228654124458e-05, "loss": 0.1091, "step": 4714 }, { "epoch": 6.82, "learning_rate": 7.059012703006915e-05, "loss": 0.0024, "step": 4715 }, { "epoch": 6.82, "learning_rate": 7.055796751889372e-05, "loss": 0.0011, "step": 4716 }, { "epoch": 6.82, "learning_rate": 7.052580800771829e-05, "loss": 0.0271, "step": 4717 }, { "epoch": 6.82, "learning_rate": 7.049364849654285e-05, "loss": 0.0011, "step": 4718 }, { "epoch": 6.82, "learning_rate": 7.046148898536743e-05, "loss": 0.0122, "step": 4719 }, { "epoch": 6.83, "learning_rate": 7.0429329474192e-05, "loss": 0.0033, "step": 4720 }, { "epoch": 6.83, "learning_rate": 7.039716996301656e-05, "loss": 0.0029, "step": 4721 }, { "epoch": 6.83, "learning_rate": 7.036501045184114e-05, "loss": 0.1697, "step": 4722 }, { "epoch": 6.83, "learning_rate": 7.033285094066571e-05, "loss": 0.0158, "step": 4723 }, { "epoch": 6.83, "learning_rate": 7.030069142949027e-05, "loss": 0.0604, "step": 4724 }, { "epoch": 6.83, "learning_rate": 7.026853191831485e-05, "loss": 0.1119, "step": 4725 }, { "epoch": 6.83, "learning_rate": 7.023637240713941e-05, "loss": 0.0291, "step": 4726 }, { "epoch": 6.84, "learning_rate": 7.020421289596398e-05, "loss": 0.0082, "step": 4727 }, { "epoch": 6.84, "learning_rate": 7.017205338478855e-05, "loss": 0.0634, "step": 4728 }, { "epoch": 6.84, "learning_rate": 7.013989387361312e-05, "loss": 0.0797, "step": 4729 }, { "epoch": 6.84, "learning_rate": 7.010773436243769e-05, "loss": 0.0036, "step": 4730 }, { "epoch": 6.84, "learning_rate": 7.007557485126226e-05, "loss": 0.0095, "step": 4731 }, { "epoch": 6.84, "learning_rate": 7.004341534008683e-05, "loss": 0.0071, "step": 4732 }, { "epoch": 6.84, "learning_rate": 7.00112558289114e-05, "loss": 0.0034, "step": 4733 }, { "epoch": 6.85, "learning_rate": 6.997909631773597e-05, "loss": 0.017, "step": 4734 }, { "epoch": 6.85, "learning_rate": 6.994693680656055e-05, "loss": 0.0061, "step": 4735 }, { "epoch": 6.85, "learning_rate": 6.991477729538512e-05, "loss": 0.0029, "step": 4736 }, { "epoch": 6.85, "learning_rate": 6.988261778420969e-05, "loss": 0.0264, "step": 4737 }, { "epoch": 6.85, "learning_rate": 6.985045827303426e-05, "loss": 0.0623, "step": 4738 }, { "epoch": 6.85, "learning_rate": 6.981829876185883e-05, "loss": 0.0218, "step": 4739 }, { "epoch": 6.85, "learning_rate": 6.978613925068338e-05, "loss": 0.0098, "step": 4740 }, { "epoch": 6.86, "learning_rate": 6.975397973950795e-05, "loss": 0.005, "step": 4741 }, { "epoch": 6.86, "learning_rate": 6.972182022833254e-05, "loss": 0.0558, "step": 4742 }, { "epoch": 6.86, "learning_rate": 6.96896607171571e-05, "loss": 0.0504, "step": 4743 }, { "epoch": 6.86, "learning_rate": 6.965750120598167e-05, "loss": 0.0298, "step": 4744 }, { "epoch": 6.86, "learning_rate": 6.962534169480625e-05, "loss": 0.0354, "step": 4745 }, { "epoch": 6.86, "learning_rate": 6.959318218363081e-05, "loss": 0.032, "step": 4746 }, { "epoch": 6.86, "learning_rate": 6.956102267245538e-05, "loss": 0.0058, "step": 4747 }, { "epoch": 6.87, "learning_rate": 6.952886316127996e-05, "loss": 0.0229, "step": 4748 }, { "epoch": 6.87, "learning_rate": 6.949670365010452e-05, "loss": 0.0113, "step": 4749 }, { "epoch": 6.87, "learning_rate": 6.946454413892909e-05, "loss": 0.0503, "step": 4750 }, { "epoch": 6.87, "learning_rate": 6.943238462775366e-05, "loss": 0.0222, "step": 4751 }, { "epoch": 6.87, "learning_rate": 6.940022511657823e-05, "loss": 0.0112, "step": 4752 }, { "epoch": 6.87, "learning_rate": 6.93680656054028e-05, "loss": 0.0093, "step": 4753 }, { "epoch": 6.87, "learning_rate": 6.933590609422737e-05, "loss": 0.0102, "step": 4754 }, { "epoch": 6.88, "learning_rate": 6.930374658305194e-05, "loss": 0.035, "step": 4755 }, { "epoch": 6.88, "learning_rate": 6.927158707187651e-05, "loss": 0.0366, "step": 4756 }, { "epoch": 6.88, "learning_rate": 6.923942756070107e-05, "loss": 0.017, "step": 4757 }, { "epoch": 6.88, "learning_rate": 6.920726804952565e-05, "loss": 0.0933, "step": 4758 }, { "epoch": 6.88, "learning_rate": 6.917510853835022e-05, "loss": 0.0312, "step": 4759 }, { "epoch": 6.88, "learning_rate": 6.914294902717478e-05, "loss": 0.0005, "step": 4760 }, { "epoch": 6.89, "learning_rate": 6.911078951599937e-05, "loss": 0.0287, "step": 4761 }, { "epoch": 6.89, "learning_rate": 6.907863000482394e-05, "loss": 0.0367, "step": 4762 }, { "epoch": 6.89, "learning_rate": 6.904647049364849e-05, "loss": 0.0767, "step": 4763 }, { "epoch": 6.89, "learning_rate": 6.901431098247308e-05, "loss": 0.0061, "step": 4764 }, { "epoch": 6.89, "learning_rate": 6.898215147129763e-05, "loss": 0.0138, "step": 4765 }, { "epoch": 6.89, "learning_rate": 6.89499919601222e-05, "loss": 0.0972, "step": 4766 }, { "epoch": 6.89, "learning_rate": 6.891783244894679e-05, "loss": 0.0329, "step": 4767 }, { "epoch": 6.9, "learning_rate": 6.888567293777135e-05, "loss": 0.0266, "step": 4768 }, { "epoch": 6.9, "learning_rate": 6.885351342659592e-05, "loss": 0.0265, "step": 4769 }, { "epoch": 6.9, "learning_rate": 6.882135391542049e-05, "loss": 0.0977, "step": 4770 }, { "epoch": 6.9, "learning_rate": 6.878919440424506e-05, "loss": 0.0043, "step": 4771 }, { "epoch": 6.9, "learning_rate": 6.875703489306963e-05, "loss": 0.0268, "step": 4772 }, { "epoch": 6.9, "learning_rate": 6.87248753818942e-05, "loss": 0.027, "step": 4773 }, { "epoch": 6.9, "learning_rate": 6.869271587071877e-05, "loss": 0.0079, "step": 4774 }, { "epoch": 6.91, "learning_rate": 6.866055635954334e-05, "loss": 0.1165, "step": 4775 }, { "epoch": 6.91, "learning_rate": 6.862839684836791e-05, "loss": 0.0034, "step": 4776 }, { "epoch": 6.91, "learning_rate": 6.859623733719248e-05, "loss": 0.0684, "step": 4777 }, { "epoch": 6.91, "learning_rate": 6.856407782601705e-05, "loss": 0.0369, "step": 4778 }, { "epoch": 6.91, "learning_rate": 6.853191831484162e-05, "loss": 0.1068, "step": 4779 }, { "epoch": 6.91, "learning_rate": 6.849975880366619e-05, "loss": 0.0857, "step": 4780 }, { "epoch": 6.91, "learning_rate": 6.846759929249076e-05, "loss": 0.0077, "step": 4781 }, { "epoch": 6.92, "learning_rate": 6.843543978131532e-05, "loss": 0.0448, "step": 4782 }, { "epoch": 6.92, "learning_rate": 6.84032802701399e-05, "loss": 0.0044, "step": 4783 }, { "epoch": 6.92, "learning_rate": 6.837112075896447e-05, "loss": 0.1012, "step": 4784 }, { "epoch": 6.92, "learning_rate": 6.833896124778903e-05, "loss": 0.0835, "step": 4785 }, { "epoch": 6.92, "learning_rate": 6.83068017366136e-05, "loss": 0.0253, "step": 4786 }, { "epoch": 6.92, "learning_rate": 6.827464222543818e-05, "loss": 0.003, "step": 4787 }, { "epoch": 6.92, "learning_rate": 6.824248271426274e-05, "loss": 0.125, "step": 4788 }, { "epoch": 6.93, "learning_rate": 6.821032320308731e-05, "loss": 0.0409, "step": 4789 }, { "epoch": 6.93, "learning_rate": 6.817816369191188e-05, "loss": 0.0415, "step": 4790 }, { "epoch": 6.93, "learning_rate": 6.814600418073645e-05, "loss": 0.0244, "step": 4791 }, { "epoch": 6.93, "learning_rate": 6.811384466956102e-05, "loss": 0.0039, "step": 4792 }, { "epoch": 6.93, "learning_rate": 6.80816851583856e-05, "loss": 0.0004, "step": 4793 }, { "epoch": 6.93, "learning_rate": 6.804952564721016e-05, "loss": 0.0092, "step": 4794 }, { "epoch": 6.93, "learning_rate": 6.801736613603474e-05, "loss": 0.0684, "step": 4795 }, { "epoch": 6.94, "learning_rate": 6.79852066248593e-05, "loss": 0.0019, "step": 4796 }, { "epoch": 6.94, "learning_rate": 6.795304711368388e-05, "loss": 0.005, "step": 4797 }, { "epoch": 6.94, "learning_rate": 6.792088760250845e-05, "loss": 0.0001, "step": 4798 }, { "epoch": 6.94, "learning_rate": 6.788872809133302e-05, "loss": 0.1151, "step": 4799 }, { "epoch": 6.94, "learning_rate": 6.785656858015759e-05, "loss": 0.0639, "step": 4800 }, { "epoch": 6.94, "learning_rate": 6.782440906898216e-05, "loss": 0.0111, "step": 4801 }, { "epoch": 6.94, "learning_rate": 6.779224955780672e-05, "loss": 0.0795, "step": 4802 }, { "epoch": 6.95, "learning_rate": 6.77600900466313e-05, "loss": 0.0624, "step": 4803 }, { "epoch": 6.95, "learning_rate": 6.772793053545587e-05, "loss": 0.0641, "step": 4804 }, { "epoch": 6.95, "learning_rate": 6.769577102428043e-05, "loss": 0.0359, "step": 4805 }, { "epoch": 6.95, "learning_rate": 6.766361151310501e-05, "loss": 0.0095, "step": 4806 }, { "epoch": 6.95, "learning_rate": 6.763145200192957e-05, "loss": 0.0332, "step": 4807 }, { "epoch": 6.95, "learning_rate": 6.759929249075414e-05, "loss": 0.0401, "step": 4808 }, { "epoch": 6.95, "learning_rate": 6.756713297957872e-05, "loss": 0.0066, "step": 4809 }, { "epoch": 6.96, "learning_rate": 6.753497346840328e-05, "loss": 0.0016, "step": 4810 }, { "epoch": 6.96, "learning_rate": 6.750281395722785e-05, "loss": 0.0132, "step": 4811 }, { "epoch": 6.96, "learning_rate": 6.747065444605243e-05, "loss": 0.0564, "step": 4812 }, { "epoch": 6.96, "learning_rate": 6.743849493487699e-05, "loss": 0.0042, "step": 4813 }, { "epoch": 6.96, "learning_rate": 6.740633542370156e-05, "loss": 0.0006, "step": 4814 }, { "epoch": 6.96, "learning_rate": 6.737417591252613e-05, "loss": 0.0014, "step": 4815 }, { "epoch": 6.96, "learning_rate": 6.73420164013507e-05, "loss": 0.0212, "step": 4816 }, { "epoch": 6.97, "learning_rate": 6.730985689017527e-05, "loss": 0.0362, "step": 4817 }, { "epoch": 6.97, "learning_rate": 6.727769737899984e-05, "loss": 0.0641, "step": 4818 }, { "epoch": 6.97, "learning_rate": 6.724553786782441e-05, "loss": 0.0686, "step": 4819 }, { "epoch": 6.97, "learning_rate": 6.721337835664898e-05, "loss": 0.0009, "step": 4820 }, { "epoch": 6.97, "learning_rate": 6.718121884547354e-05, "loss": 0.0597, "step": 4821 }, { "epoch": 6.97, "learning_rate": 6.714905933429813e-05, "loss": 0.0004, "step": 4822 }, { "epoch": 6.97, "learning_rate": 6.71168998231227e-05, "loss": 0.103, "step": 4823 }, { "epoch": 6.98, "learning_rate": 6.708474031194725e-05, "loss": 0.0001, "step": 4824 }, { "epoch": 6.98, "learning_rate": 6.705258080077184e-05, "loss": 0.0291, "step": 4825 }, { "epoch": 6.98, "learning_rate": 6.702042128959641e-05, "loss": 0.0735, "step": 4826 }, { "epoch": 6.98, "learning_rate": 6.698826177842096e-05, "loss": 0.0007, "step": 4827 }, { "epoch": 6.98, "learning_rate": 6.695610226724555e-05, "loss": 0.0099, "step": 4828 }, { "epoch": 6.98, "learning_rate": 6.692394275607012e-05, "loss": 0.0062, "step": 4829 }, { "epoch": 6.98, "learning_rate": 6.689178324489468e-05, "loss": 0.0456, "step": 4830 }, { "epoch": 6.99, "learning_rate": 6.685962373371925e-05, "loss": 0.0007, "step": 4831 }, { "epoch": 6.99, "learning_rate": 6.682746422254382e-05, "loss": 0.0564, "step": 4832 }, { "epoch": 6.99, "learning_rate": 6.679530471136839e-05, "loss": 0.0333, "step": 4833 }, { "epoch": 6.99, "learning_rate": 6.676314520019296e-05, "loss": 0.0177, "step": 4834 }, { "epoch": 6.99, "learning_rate": 6.673098568901753e-05, "loss": 0.0151, "step": 4835 }, { "epoch": 6.99, "learning_rate": 6.66988261778421e-05, "loss": 0.0354, "step": 4836 }, { "epoch": 6.99, "learning_rate": 6.666666666666667e-05, "loss": 0.0631, "step": 4837 }, { "epoch": 7.0, "learning_rate": 6.663450715549124e-05, "loss": 0.0302, "step": 4838 }, { "epoch": 7.0, "learning_rate": 6.660234764431581e-05, "loss": 0.0148, "step": 4839 }, { "epoch": 7.0, "learning_rate": 6.657018813314038e-05, "loss": 0.125, "step": 4840 }, { "epoch": 7.0, "learning_rate": 6.653802862196495e-05, "loss": 0.0004, "step": 4841 }, { "epoch": 7.0, "learning_rate": 6.650586911078952e-05, "loss": 0.0049, "step": 4842 }, { "epoch": 7.0, "learning_rate": 6.647370959961409e-05, "loss": 0.0255, "step": 4843 }, { "epoch": 7.01, "learning_rate": 6.644155008843866e-05, "loss": 0.001, "step": 4844 }, { "epoch": 7.01, "learning_rate": 6.640939057726323e-05, "loss": 0.0016, "step": 4845 }, { "epoch": 7.01, "learning_rate": 6.637723106608779e-05, "loss": 0.0197, "step": 4846 }, { "epoch": 7.01, "learning_rate": 6.634507155491236e-05, "loss": 0.0001, "step": 4847 }, { "epoch": 7.01, "learning_rate": 6.631291204373695e-05, "loss": 0.0503, "step": 4848 }, { "epoch": 7.01, "learning_rate": 6.62807525325615e-05, "loss": 0.023, "step": 4849 }, { "epoch": 7.01, "learning_rate": 6.624859302138607e-05, "loss": 0.0252, "step": 4850 }, { "epoch": 7.02, "learning_rate": 6.621643351021066e-05, "loss": 0.0195, "step": 4851 }, { "epoch": 7.02, "learning_rate": 6.618427399903521e-05, "loss": 0.0074, "step": 4852 }, { "epoch": 7.02, "learning_rate": 6.615211448785978e-05, "loss": 0.006, "step": 4853 }, { "epoch": 7.02, "learning_rate": 6.611995497668436e-05, "loss": 0.0055, "step": 4854 }, { "epoch": 7.02, "learning_rate": 6.608779546550893e-05, "loss": 0.014, "step": 4855 }, { "epoch": 7.02, "learning_rate": 6.60556359543335e-05, "loss": 0.0487, "step": 4856 }, { "epoch": 7.02, "learning_rate": 6.602347644315807e-05, "loss": 0.0562, "step": 4857 }, { "epoch": 7.03, "learning_rate": 6.599131693198264e-05, "loss": 0.0113, "step": 4858 }, { "epoch": 7.03, "learning_rate": 6.595915742080721e-05, "loss": 0.0052, "step": 4859 }, { "epoch": 7.03, "learning_rate": 6.592699790963178e-05, "loss": 0.0156, "step": 4860 }, { "epoch": 7.03, "learning_rate": 6.589483839845635e-05, "loss": 0.0703, "step": 4861 }, { "epoch": 7.03, "learning_rate": 6.586267888728092e-05, "loss": 0.0393, "step": 4862 }, { "epoch": 7.03, "learning_rate": 6.583051937610548e-05, "loss": 0.0021, "step": 4863 }, { "epoch": 7.03, "learning_rate": 6.579835986493006e-05, "loss": 0.0977, "step": 4864 }, { "epoch": 7.04, "learning_rate": 6.576620035375463e-05, "loss": 0.0354, "step": 4865 }, { "epoch": 7.04, "learning_rate": 6.573404084257919e-05, "loss": 0.0003, "step": 4866 }, { "epoch": 7.04, "learning_rate": 6.570188133140377e-05, "loss": 0.001, "step": 4867 }, { "epoch": 7.04, "learning_rate": 6.566972182022834e-05, "loss": 0.0194, "step": 4868 }, { "epoch": 7.04, "learning_rate": 6.56375623090529e-05, "loss": 0.0349, "step": 4869 }, { "epoch": 7.04, "learning_rate": 6.560540279787748e-05, "loss": 0.0123, "step": 4870 }, { "epoch": 7.04, "learning_rate": 6.557324328670204e-05, "loss": 0.0374, "step": 4871 }, { "epoch": 7.05, "learning_rate": 6.554108377552661e-05, "loss": 0.0436, "step": 4872 }, { "epoch": 7.05, "learning_rate": 6.55089242643512e-05, "loss": 0.0396, "step": 4873 }, { "epoch": 7.05, "learning_rate": 6.547676475317575e-05, "loss": 0.0008, "step": 4874 }, { "epoch": 7.05, "learning_rate": 6.544460524200032e-05, "loss": 0.0161, "step": 4875 }, { "epoch": 7.05, "learning_rate": 6.541244573082489e-05, "loss": 0.0422, "step": 4876 }, { "epoch": 7.05, "learning_rate": 6.538028621964946e-05, "loss": 0.0041, "step": 4877 }, { "epoch": 7.05, "learning_rate": 6.534812670847403e-05, "loss": 0.0036, "step": 4878 }, { "epoch": 7.06, "learning_rate": 6.53159671972986e-05, "loss": 0.0136, "step": 4879 }, { "epoch": 7.06, "learning_rate": 6.528380768612317e-05, "loss": 0.021, "step": 4880 }, { "epoch": 7.06, "learning_rate": 6.525164817494775e-05, "loss": 0.023, "step": 4881 }, { "epoch": 7.06, "learning_rate": 6.521948866377232e-05, "loss": 0.0005, "step": 4882 }, { "epoch": 7.06, "learning_rate": 6.518732915259689e-05, "loss": 0.0211, "step": 4883 }, { "epoch": 7.06, "learning_rate": 6.515516964142146e-05, "loss": 0.0305, "step": 4884 }, { "epoch": 7.06, "learning_rate": 6.512301013024601e-05, "loss": 0.0039, "step": 4885 }, { "epoch": 7.07, "learning_rate": 6.50908506190706e-05, "loss": 0.005, "step": 4886 }, { "epoch": 7.07, "learning_rate": 6.505869110789517e-05, "loss": 0.0004, "step": 4887 }, { "epoch": 7.07, "learning_rate": 6.502653159671973e-05, "loss": 0.0068, "step": 4888 }, { "epoch": 7.07, "learning_rate": 6.499437208554431e-05, "loss": 0.0873, "step": 4889 }, { "epoch": 7.07, "learning_rate": 6.496221257436888e-05, "loss": 0.001, "step": 4890 }, { "epoch": 7.07, "learning_rate": 6.493005306319344e-05, "loss": 0.0006, "step": 4891 }, { "epoch": 7.07, "learning_rate": 6.489789355201801e-05, "loss": 0.0076, "step": 4892 }, { "epoch": 7.08, "learning_rate": 6.486573404084259e-05, "loss": 0.0376, "step": 4893 }, { "epoch": 7.08, "learning_rate": 6.483357452966715e-05, "loss": 0.0482, "step": 4894 }, { "epoch": 7.08, "learning_rate": 6.480141501849172e-05, "loss": 0.0325, "step": 4895 }, { "epoch": 7.08, "learning_rate": 6.476925550731629e-05, "loss": 0.0486, "step": 4896 }, { "epoch": 7.08, "learning_rate": 6.473709599614086e-05, "loss": 0.0172, "step": 4897 }, { "epoch": 7.08, "learning_rate": 6.470493648496543e-05, "loss": 0.0347, "step": 4898 }, { "epoch": 7.08, "learning_rate": 6.467277697379e-05, "loss": 0.0104, "step": 4899 }, { "epoch": 7.09, "learning_rate": 6.464061746261457e-05, "loss": 0.0011, "step": 4900 }, { "epoch": 7.09, "learning_rate": 6.460845795143914e-05, "loss": 0.0069, "step": 4901 }, { "epoch": 7.09, "learning_rate": 6.457629844026371e-05, "loss": 0.0036, "step": 4902 }, { "epoch": 7.09, "learning_rate": 6.454413892908828e-05, "loss": 0.0139, "step": 4903 }, { "epoch": 7.09, "learning_rate": 6.451197941791285e-05, "loss": 0.0329, "step": 4904 }, { "epoch": 7.09, "learning_rate": 6.447981990673741e-05, "loss": 0.0693, "step": 4905 }, { "epoch": 7.09, "learning_rate": 6.4447660395562e-05, "loss": 0.0472, "step": 4906 }, { "epoch": 7.1, "learning_rate": 6.441550088438657e-05, "loss": 0.0475, "step": 4907 }, { "epoch": 7.1, "learning_rate": 6.438334137321112e-05, "loss": 0.0138, "step": 4908 }, { "epoch": 7.1, "learning_rate": 6.43511818620357e-05, "loss": 0.0328, "step": 4909 }, { "epoch": 7.1, "learning_rate": 6.431902235086026e-05, "loss": 0.0443, "step": 4910 }, { "epoch": 7.1, "learning_rate": 6.428686283968483e-05, "loss": 0.0206, "step": 4911 }, { "epoch": 7.1, "learning_rate": 6.425470332850942e-05, "loss": 0.0093, "step": 4912 }, { "epoch": 7.1, "learning_rate": 6.422254381733397e-05, "loss": 0.0256, "step": 4913 }, { "epoch": 7.11, "learning_rate": 6.419038430615855e-05, "loss": 0.0013, "step": 4914 }, { "epoch": 7.11, "learning_rate": 6.415822479498313e-05, "loss": 0.0504, "step": 4915 }, { "epoch": 7.11, "learning_rate": 6.412606528380769e-05, "loss": 0.0515, "step": 4916 }, { "epoch": 7.11, "learning_rate": 6.409390577263226e-05, "loss": 0.0023, "step": 4917 }, { "epoch": 7.11, "learning_rate": 6.406174626145684e-05, "loss": 0.0062, "step": 4918 }, { "epoch": 7.11, "learning_rate": 6.40295867502814e-05, "loss": 0.0272, "step": 4919 }, { "epoch": 7.11, "learning_rate": 6.399742723910597e-05, "loss": 0.053, "step": 4920 }, { "epoch": 7.12, "learning_rate": 6.396526772793054e-05, "loss": 0.0023, "step": 4921 }, { "epoch": 7.12, "learning_rate": 6.393310821675511e-05, "loss": 0.0177, "step": 4922 }, { "epoch": 7.12, "learning_rate": 6.390094870557968e-05, "loss": 0.0174, "step": 4923 }, { "epoch": 7.12, "learning_rate": 6.386878919440425e-05, "loss": 0.005, "step": 4924 }, { "epoch": 7.12, "learning_rate": 6.383662968322882e-05, "loss": 0.0213, "step": 4925 }, { "epoch": 7.12, "learning_rate": 6.380447017205339e-05, "loss": 0.0562, "step": 4926 }, { "epoch": 7.13, "learning_rate": 6.377231066087795e-05, "loss": 0.0031, "step": 4927 }, { "epoch": 7.13, "learning_rate": 6.374015114970253e-05, "loss": 0.0363, "step": 4928 }, { "epoch": 7.13, "learning_rate": 6.37079916385271e-05, "loss": 0.0084, "step": 4929 }, { "epoch": 7.13, "learning_rate": 6.367583212735166e-05, "loss": 0.0046, "step": 4930 }, { "epoch": 7.13, "learning_rate": 6.364367261617624e-05, "loss": 0.0043, "step": 4931 }, { "epoch": 7.13, "learning_rate": 6.361151310500081e-05, "loss": 0.0211, "step": 4932 }, { "epoch": 7.13, "learning_rate": 6.357935359382537e-05, "loss": 0.0372, "step": 4933 }, { "epoch": 7.14, "learning_rate": 6.354719408264994e-05, "loss": 0.0016, "step": 4934 }, { "epoch": 7.14, "learning_rate": 6.351503457147451e-05, "loss": 0.0306, "step": 4935 }, { "epoch": 7.14, "learning_rate": 6.348287506029908e-05, "loss": 0.0471, "step": 4936 }, { "epoch": 7.14, "learning_rate": 6.345071554912365e-05, "loss": 0.0524, "step": 4937 }, { "epoch": 7.14, "learning_rate": 6.341855603794822e-05, "loss": 0.0095, "step": 4938 }, { "epoch": 7.14, "learning_rate": 6.33863965267728e-05, "loss": 0.0469, "step": 4939 }, { "epoch": 7.14, "learning_rate": 6.335423701559736e-05, "loss": 0.0757, "step": 4940 }, { "epoch": 7.15, "learning_rate": 6.332207750442194e-05, "loss": 0.0003, "step": 4941 }, { "epoch": 7.15, "learning_rate": 6.32899179932465e-05, "loss": 0.0019, "step": 4942 }, { "epoch": 7.15, "learning_rate": 6.325775848207108e-05, "loss": 0.0112, "step": 4943 }, { "epoch": 7.15, "learning_rate": 6.322559897089565e-05, "loss": 0.0696, "step": 4944 }, { "epoch": 7.15, "learning_rate": 6.319343945972022e-05, "loss": 0.0232, "step": 4945 }, { "epoch": 7.15, "learning_rate": 6.316127994854479e-05, "loss": 0.0001, "step": 4946 }, { "epoch": 7.15, "learning_rate": 6.312912043736936e-05, "loss": 0.0374, "step": 4947 }, { "epoch": 7.16, "learning_rate": 6.309696092619393e-05, "loss": 0.0505, "step": 4948 }, { "epoch": 7.16, "learning_rate": 6.30648014150185e-05, "loss": 0.05, "step": 4949 }, { "epoch": 7.16, "learning_rate": 6.303264190384306e-05, "loss": 0.0077, "step": 4950 }, { "epoch": 7.16, "learning_rate": 6.300048239266764e-05, "loss": 0.0001, "step": 4951 }, { "epoch": 7.16, "learning_rate": 6.29683228814922e-05, "loss": 0.0598, "step": 4952 }, { "epoch": 7.16, "learning_rate": 6.293616337031677e-05, "loss": 0.0384, "step": 4953 }, { "epoch": 7.16, "learning_rate": 6.290400385914135e-05, "loss": 0.0054, "step": 4954 }, { "epoch": 7.17, "learning_rate": 6.287184434796591e-05, "loss": 0.0559, "step": 4955 }, { "epoch": 7.17, "learning_rate": 6.283968483679048e-05, "loss": 0.0011, "step": 4956 }, { "epoch": 7.17, "learning_rate": 6.280752532561506e-05, "loss": 0.0077, "step": 4957 }, { "epoch": 7.17, "learning_rate": 6.277536581443962e-05, "loss": 0.0146, "step": 4958 }, { "epoch": 7.17, "learning_rate": 6.274320630326419e-05, "loss": 0.0006, "step": 4959 }, { "epoch": 7.17, "learning_rate": 6.271104679208876e-05, "loss": 0.002, "step": 4960 }, { "epoch": 7.17, "learning_rate": 6.267888728091333e-05, "loss": 0.002, "step": 4961 }, { "epoch": 7.18, "learning_rate": 6.26467277697379e-05, "loss": 0.0001, "step": 4962 }, { "epoch": 7.18, "learning_rate": 6.261456825856247e-05, "loss": 0.0158, "step": 4963 }, { "epoch": 7.18, "learning_rate": 6.258240874738704e-05, "loss": 0.0335, "step": 4964 }, { "epoch": 7.18, "learning_rate": 6.255024923621161e-05, "loss": 0.0591, "step": 4965 }, { "epoch": 7.18, "learning_rate": 6.251808972503617e-05, "loss": 0.0396, "step": 4966 }, { "epoch": 7.18, "learning_rate": 6.248593021386076e-05, "loss": 0.0, "step": 4967 }, { "epoch": 7.18, "learning_rate": 6.245377070268533e-05, "loss": 0.0027, "step": 4968 }, { "epoch": 7.19, "learning_rate": 6.242161119150988e-05, "loss": 0.0495, "step": 4969 }, { "epoch": 7.19, "learning_rate": 6.238945168033447e-05, "loss": 0.0225, "step": 4970 }, { "epoch": 7.19, "learning_rate": 6.235729216915904e-05, "loss": 0.0231, "step": 4971 }, { "epoch": 7.19, "learning_rate": 6.23251326579836e-05, "loss": 0.0335, "step": 4972 }, { "epoch": 7.19, "learning_rate": 6.229297314680818e-05, "loss": 0.0008, "step": 4973 }, { "epoch": 7.19, "learning_rate": 6.226081363563275e-05, "loss": 0.001, "step": 4974 }, { "epoch": 7.19, "learning_rate": 6.22286541244573e-05, "loss": 0.0187, "step": 4975 }, { "epoch": 7.2, "learning_rate": 6.219649461328189e-05, "loss": 0.0694, "step": 4976 }, { "epoch": 7.2, "learning_rate": 6.216433510210645e-05, "loss": 0.0698, "step": 4977 }, { "epoch": 7.2, "learning_rate": 6.213217559093102e-05, "loss": 0.0129, "step": 4978 }, { "epoch": 7.2, "learning_rate": 6.210001607975559e-05, "loss": 0.0004, "step": 4979 }, { "epoch": 7.2, "learning_rate": 6.206785656858016e-05, "loss": 0.0008, "step": 4980 }, { "epoch": 7.2, "learning_rate": 6.203569705740473e-05, "loss": 0.0153, "step": 4981 }, { "epoch": 7.2, "learning_rate": 6.20035375462293e-05, "loss": 0.0008, "step": 4982 }, { "epoch": 7.21, "learning_rate": 6.197137803505387e-05, "loss": 0.0004, "step": 4983 }, { "epoch": 7.21, "learning_rate": 6.193921852387844e-05, "loss": 0.0885, "step": 4984 }, { "epoch": 7.21, "learning_rate": 6.190705901270301e-05, "loss": 0.0883, "step": 4985 }, { "epoch": 7.21, "learning_rate": 6.187489950152758e-05, "loss": 0.0113, "step": 4986 }, { "epoch": 7.21, "learning_rate": 6.184273999035215e-05, "loss": 0.0087, "step": 4987 }, { "epoch": 7.21, "learning_rate": 6.181058047917672e-05, "loss": 0.0035, "step": 4988 }, { "epoch": 7.21, "learning_rate": 6.177842096800129e-05, "loss": 0.0002, "step": 4989 }, { "epoch": 7.22, "learning_rate": 6.174626145682586e-05, "loss": 0.0427, "step": 4990 }, { "epoch": 7.22, "learning_rate": 6.171410194565042e-05, "loss": 0.0047, "step": 4991 }, { "epoch": 7.22, "learning_rate": 6.1681942434475e-05, "loss": 0.002, "step": 4992 }, { "epoch": 7.22, "learning_rate": 6.164978292329957e-05, "loss": 0.0043, "step": 4993 }, { "epoch": 7.22, "learning_rate": 6.161762341212413e-05, "loss": 0.0341, "step": 4994 }, { "epoch": 7.22, "learning_rate": 6.15854639009487e-05, "loss": 0.043, "step": 4995 }, { "epoch": 7.22, "learning_rate": 6.155330438977329e-05, "loss": 0.0013, "step": 4996 }, { "epoch": 7.23, "learning_rate": 6.152114487859784e-05, "loss": 0.0464, "step": 4997 }, { "epoch": 7.23, "learning_rate": 6.148898536742241e-05, "loss": 0.0851, "step": 4998 }, { "epoch": 7.23, "learning_rate": 6.145682585624698e-05, "loss": 0.0441, "step": 4999 }, { "epoch": 7.23, "learning_rate": 6.142466634507155e-05, "loss": 0.0108, "step": 5000 }, { "epoch": 7.23, "learning_rate": 6.139250683389613e-05, "loss": 0.0053, "step": 5001 }, { "epoch": 7.23, "learning_rate": 6.13603473227207e-05, "loss": 0.0043, "step": 5002 }, { "epoch": 7.23, "learning_rate": 6.132818781154527e-05, "loss": 0.0012, "step": 5003 }, { "epoch": 7.24, "learning_rate": 6.129602830036984e-05, "loss": 0.0023, "step": 5004 }, { "epoch": 7.24, "learning_rate": 6.126386878919441e-05, "loss": 0.0002, "step": 5005 }, { "epoch": 7.24, "learning_rate": 6.123170927801898e-05, "loss": 0.0221, "step": 5006 }, { "epoch": 7.24, "learning_rate": 6.119954976684355e-05, "loss": 0.0629, "step": 5007 }, { "epoch": 7.24, "learning_rate": 6.116739025566812e-05, "loss": 0.0218, "step": 5008 }, { "epoch": 7.24, "learning_rate": 6.113523074449269e-05, "loss": 0.0085, "step": 5009 }, { "epoch": 7.25, "learning_rate": 6.110307123331726e-05, "loss": 0.0664, "step": 5010 }, { "epoch": 7.25, "learning_rate": 6.107091172214182e-05, "loss": 0.0899, "step": 5011 }, { "epoch": 7.25, "learning_rate": 6.10387522109664e-05, "loss": 0.0346, "step": 5012 }, { "epoch": 7.25, "learning_rate": 6.1006592699790965e-05, "loss": 0.0105, "step": 5013 }, { "epoch": 7.25, "learning_rate": 6.097443318861553e-05, "loss": 0.0002, "step": 5014 }, { "epoch": 7.25, "learning_rate": 6.094227367744011e-05, "loss": 0.0045, "step": 5015 }, { "epoch": 7.25, "learning_rate": 6.0910114166264676e-05, "loss": 0.0307, "step": 5016 }, { "epoch": 7.26, "learning_rate": 6.087795465508924e-05, "loss": 0.0901, "step": 5017 }, { "epoch": 7.26, "learning_rate": 6.084579514391382e-05, "loss": 0.0015, "step": 5018 }, { "epoch": 7.26, "learning_rate": 6.081363563273839e-05, "loss": 0.0001, "step": 5019 }, { "epoch": 7.26, "learning_rate": 6.078147612156295e-05, "loss": 0.056, "step": 5020 }, { "epoch": 7.26, "learning_rate": 6.074931661038753e-05, "loss": 0.0523, "step": 5021 }, { "epoch": 7.26, "learning_rate": 6.07171570992121e-05, "loss": 0.0005, "step": 5022 }, { "epoch": 7.26, "learning_rate": 6.068499758803666e-05, "loss": 0.0001, "step": 5023 }, { "epoch": 7.27, "learning_rate": 6.065283807686123e-05, "loss": 0.0155, "step": 5024 }, { "epoch": 7.27, "learning_rate": 6.0620678565685804e-05, "loss": 0.0067, "step": 5025 }, { "epoch": 7.27, "learning_rate": 6.0588519054510375e-05, "loss": 0.0214, "step": 5026 }, { "epoch": 7.27, "learning_rate": 6.055635954333494e-05, "loss": 0.049, "step": 5027 }, { "epoch": 7.27, "learning_rate": 6.0524200032159516e-05, "loss": 0.0041, "step": 5028 }, { "epoch": 7.27, "learning_rate": 6.0492040520984086e-05, "loss": 0.0106, "step": 5029 }, { "epoch": 7.27, "learning_rate": 6.045988100980865e-05, "loss": 0.0501, "step": 5030 }, { "epoch": 7.28, "learning_rate": 6.042772149863323e-05, "loss": 0.0502, "step": 5031 }, { "epoch": 7.28, "learning_rate": 6.039556198745779e-05, "loss": 0.1036, "step": 5032 }, { "epoch": 7.28, "learning_rate": 6.036340247628236e-05, "loss": 0.0496, "step": 5033 }, { "epoch": 7.28, "learning_rate": 6.033124296510694e-05, "loss": 0.04, "step": 5034 }, { "epoch": 7.28, "learning_rate": 6.02990834539315e-05, "loss": 0.0054, "step": 5035 }, { "epoch": 7.28, "learning_rate": 6.026692394275607e-05, "loss": 0.0393, "step": 5036 }, { "epoch": 7.28, "learning_rate": 6.023476443158065e-05, "loss": 0.0001, "step": 5037 }, { "epoch": 7.29, "learning_rate": 6.0202604920405214e-05, "loss": 0.0708, "step": 5038 }, { "epoch": 7.29, "learning_rate": 6.017044540922978e-05, "loss": 0.0583, "step": 5039 }, { "epoch": 7.29, "learning_rate": 6.013828589805435e-05, "loss": 0.0008, "step": 5040 }, { "epoch": 7.29, "learning_rate": 6.0106126386878926e-05, "loss": 0.0571, "step": 5041 }, { "epoch": 7.29, "learning_rate": 6.007396687570349e-05, "loss": 0.0024, "step": 5042 }, { "epoch": 7.29, "learning_rate": 6.004180736452806e-05, "loss": 0.0018, "step": 5043 }, { "epoch": 7.29, "learning_rate": 6.000964785335264e-05, "loss": 0.0032, "step": 5044 }, { "epoch": 7.3, "learning_rate": 5.99774883421772e-05, "loss": 0.0336, "step": 5045 }, { "epoch": 7.3, "learning_rate": 5.9945328831001765e-05, "loss": 0.0312, "step": 5046 }, { "epoch": 7.3, "learning_rate": 5.991316931982635e-05, "loss": 0.0001, "step": 5047 }, { "epoch": 7.3, "learning_rate": 5.988100980865091e-05, "loss": 0.0901, "step": 5048 }, { "epoch": 7.3, "learning_rate": 5.9848850297475476e-05, "loss": 0.0234, "step": 5049 }, { "epoch": 7.3, "learning_rate": 5.9816690786300053e-05, "loss": 0.0376, "step": 5050 }, { "epoch": 7.3, "learning_rate": 5.9784531275124624e-05, "loss": 0.0004, "step": 5051 }, { "epoch": 7.31, "learning_rate": 5.975237176394919e-05, "loss": 0.0042, "step": 5052 }, { "epoch": 7.31, "learning_rate": 5.9720212252773765e-05, "loss": 0.013, "step": 5053 }, { "epoch": 7.31, "learning_rate": 5.9688052741598335e-05, "loss": 0.013, "step": 5054 }, { "epoch": 7.31, "learning_rate": 5.96558932304229e-05, "loss": 0.0037, "step": 5055 }, { "epoch": 7.31, "learning_rate": 5.962373371924746e-05, "loss": 0.061, "step": 5056 }, { "epoch": 7.31, "learning_rate": 5.959157420807204e-05, "loss": 0.0009, "step": 5057 }, { "epoch": 7.31, "learning_rate": 5.955941469689661e-05, "loss": 0.025, "step": 5058 }, { "epoch": 7.32, "learning_rate": 5.9527255185721174e-05, "loss": 0.0071, "step": 5059 }, { "epoch": 7.32, "learning_rate": 5.949509567454575e-05, "loss": 0.0077, "step": 5060 }, { "epoch": 7.32, "learning_rate": 5.946293616337032e-05, "loss": 0.049, "step": 5061 }, { "epoch": 7.32, "learning_rate": 5.9430776652194886e-05, "loss": 0.0, "step": 5062 }, { "epoch": 7.32, "learning_rate": 5.939861714101946e-05, "loss": 0.0679, "step": 5063 }, { "epoch": 7.32, "learning_rate": 5.936645762984403e-05, "loss": 0.041, "step": 5064 }, { "epoch": 7.32, "learning_rate": 5.93342981186686e-05, "loss": 0.0698, "step": 5065 }, { "epoch": 7.33, "learning_rate": 5.9302138607493175e-05, "loss": 0.0048, "step": 5066 }, { "epoch": 7.33, "learning_rate": 5.926997909631774e-05, "loss": 0.0075, "step": 5067 }, { "epoch": 7.33, "learning_rate": 5.923781958514231e-05, "loss": 0.0639, "step": 5068 }, { "epoch": 7.33, "learning_rate": 5.920566007396687e-05, "loss": 0.0012, "step": 5069 }, { "epoch": 7.33, "learning_rate": 5.917350056279145e-05, "loss": 0.0568, "step": 5070 }, { "epoch": 7.33, "learning_rate": 5.9141341051616014e-05, "loss": 0.0244, "step": 5071 }, { "epoch": 7.33, "learning_rate": 5.9109181540440584e-05, "loss": 0.0137, "step": 5072 }, { "epoch": 7.34, "learning_rate": 5.907702202926516e-05, "loss": 0.0024, "step": 5073 }, { "epoch": 7.34, "learning_rate": 5.9044862518089725e-05, "loss": 0.0108, "step": 5074 }, { "epoch": 7.34, "learning_rate": 5.9012703006914296e-05, "loss": 0.0051, "step": 5075 }, { "epoch": 7.34, "learning_rate": 5.898054349573887e-05, "loss": 0.0466, "step": 5076 }, { "epoch": 7.34, "learning_rate": 5.894838398456344e-05, "loss": 0.0105, "step": 5077 }, { "epoch": 7.34, "learning_rate": 5.891622447338801e-05, "loss": 0.0004, "step": 5078 }, { "epoch": 7.34, "learning_rate": 5.8884064962212585e-05, "loss": 0.0261, "step": 5079 }, { "epoch": 7.35, "learning_rate": 5.885190545103715e-05, "loss": 0.0029, "step": 5080 }, { "epoch": 7.35, "learning_rate": 5.881974593986171e-05, "loss": 0.014, "step": 5081 }, { "epoch": 7.35, "learning_rate": 5.878758642868629e-05, "loss": 0.0248, "step": 5082 }, { "epoch": 7.35, "learning_rate": 5.875542691751086e-05, "loss": 0.0009, "step": 5083 }, { "epoch": 7.35, "learning_rate": 5.8723267406335424e-05, "loss": 0.0862, "step": 5084 }, { "epoch": 7.35, "learning_rate": 5.8691107895159994e-05, "loss": 0.0762, "step": 5085 }, { "epoch": 7.36, "learning_rate": 5.865894838398457e-05, "loss": 0.0049, "step": 5086 }, { "epoch": 7.36, "learning_rate": 5.8626788872809135e-05, "loss": 0.0085, "step": 5087 }, { "epoch": 7.36, "learning_rate": 5.85946293616337e-05, "loss": 0.0668, "step": 5088 }, { "epoch": 7.36, "learning_rate": 5.8562469850458276e-05, "loss": 0.0, "step": 5089 }, { "epoch": 7.36, "learning_rate": 5.853031033928285e-05, "loss": 0.0582, "step": 5090 }, { "epoch": 7.36, "learning_rate": 5.849815082810741e-05, "loss": 0.0007, "step": 5091 }, { "epoch": 7.36, "learning_rate": 5.846599131693199e-05, "loss": 0.005, "step": 5092 }, { "epoch": 7.37, "learning_rate": 5.843383180575656e-05, "loss": 0.0002, "step": 5093 }, { "epoch": 7.37, "learning_rate": 5.840167229458112e-05, "loss": 0.0543, "step": 5094 }, { "epoch": 7.37, "learning_rate": 5.83695127834057e-05, "loss": 0.0405, "step": 5095 }, { "epoch": 7.37, "learning_rate": 5.833735327223026e-05, "loss": 0.0019, "step": 5096 }, { "epoch": 7.37, "learning_rate": 5.8305193761054834e-05, "loss": 0.006, "step": 5097 }, { "epoch": 7.37, "learning_rate": 5.82730342498794e-05, "loss": 0.0143, "step": 5098 }, { "epoch": 7.37, "learning_rate": 5.8240874738703975e-05, "loss": 0.0117, "step": 5099 }, { "epoch": 7.38, "learning_rate": 5.8208715227528545e-05, "loss": 0.0486, "step": 5100 }, { "epoch": 7.38, "learning_rate": 5.817655571635311e-05, "loss": 0.0049, "step": 5101 }, { "epoch": 7.38, "learning_rate": 5.8144396205177686e-05, "loss": 0.001, "step": 5102 }, { "epoch": 7.38, "learning_rate": 5.811223669400226e-05, "loss": 0.0012, "step": 5103 }, { "epoch": 7.38, "learning_rate": 5.808007718282682e-05, "loss": 0.0586, "step": 5104 }, { "epoch": 7.38, "learning_rate": 5.80479176716514e-05, "loss": 0.0435, "step": 5105 }, { "epoch": 7.38, "learning_rate": 5.801575816047596e-05, "loss": 0.1699, "step": 5106 }, { "epoch": 7.39, "learning_rate": 5.798359864930053e-05, "loss": 0.0004, "step": 5107 }, { "epoch": 7.39, "learning_rate": 5.795143913812511e-05, "loss": 0.0168, "step": 5108 }, { "epoch": 7.39, "learning_rate": 5.791927962694967e-05, "loss": 0.0012, "step": 5109 }, { "epoch": 7.39, "learning_rate": 5.7887120115774244e-05, "loss": 0.0009, "step": 5110 }, { "epoch": 7.39, "learning_rate": 5.785496060459882e-05, "loss": 0.0701, "step": 5111 }, { "epoch": 7.39, "learning_rate": 5.7822801093423385e-05, "loss": 0.0113, "step": 5112 }, { "epoch": 7.39, "learning_rate": 5.779064158224795e-05, "loss": 0.032, "step": 5113 }, { "epoch": 7.4, "learning_rate": 5.775848207107252e-05, "loss": 0.032, "step": 5114 }, { "epoch": 7.4, "learning_rate": 5.7726322559897096e-05, "loss": 0.0001, "step": 5115 }, { "epoch": 7.4, "learning_rate": 5.769416304872166e-05, "loss": 0.0162, "step": 5116 }, { "epoch": 7.4, "learning_rate": 5.766200353754623e-05, "loss": 0.0221, "step": 5117 }, { "epoch": 7.4, "learning_rate": 5.762984402637081e-05, "loss": 0.0033, "step": 5118 }, { "epoch": 7.4, "learning_rate": 5.759768451519537e-05, "loss": 0.0078, "step": 5119 }, { "epoch": 7.4, "learning_rate": 5.7565525004019935e-05, "loss": 0.0109, "step": 5120 }, { "epoch": 7.41, "learning_rate": 5.753336549284451e-05, "loss": 0.0004, "step": 5121 }, { "epoch": 7.41, "learning_rate": 5.750120598166908e-05, "loss": 0.0224, "step": 5122 }, { "epoch": 7.41, "learning_rate": 5.746904647049365e-05, "loss": 0.0066, "step": 5123 }, { "epoch": 7.41, "learning_rate": 5.7436886959318224e-05, "loss": 0.0533, "step": 5124 }, { "epoch": 7.41, "learning_rate": 5.7404727448142794e-05, "loss": 0.1218, "step": 5125 }, { "epoch": 7.41, "learning_rate": 5.737256793696736e-05, "loss": 0.1104, "step": 5126 }, { "epoch": 7.41, "learning_rate": 5.7340408425791935e-05, "loss": 0.0076, "step": 5127 }, { "epoch": 7.42, "learning_rate": 5.73082489146165e-05, "loss": 0.0071, "step": 5128 }, { "epoch": 7.42, "learning_rate": 5.727608940344107e-05, "loss": 0.0003, "step": 5129 }, { "epoch": 7.42, "learning_rate": 5.7243929892265633e-05, "loss": 0.0155, "step": 5130 }, { "epoch": 7.42, "learning_rate": 5.721177038109021e-05, "loss": 0.0826, "step": 5131 }, { "epoch": 7.42, "learning_rate": 5.717961086991478e-05, "loss": 0.0013, "step": 5132 }, { "epoch": 7.42, "learning_rate": 5.7147451358739345e-05, "loss": 0.0357, "step": 5133 }, { "epoch": 7.42, "learning_rate": 5.711529184756392e-05, "loss": 0.0001, "step": 5134 }, { "epoch": 7.43, "learning_rate": 5.708313233638849e-05, "loss": 0.0447, "step": 5135 }, { "epoch": 7.43, "learning_rate": 5.7050972825213057e-05, "loss": 0.0012, "step": 5136 }, { "epoch": 7.43, "learning_rate": 5.7018813314037634e-05, "loss": 0.0011, "step": 5137 }, { "epoch": 7.43, "learning_rate": 5.69866538028622e-05, "loss": 0.0139, "step": 5138 }, { "epoch": 7.43, "learning_rate": 5.695449429168677e-05, "loss": 0.0068, "step": 5139 }, { "epoch": 7.43, "learning_rate": 5.6922334780511345e-05, "loss": 0.0014, "step": 5140 }, { "epoch": 7.43, "learning_rate": 5.689017526933591e-05, "loss": 0.0165, "step": 5141 }, { "epoch": 7.44, "learning_rate": 5.685801575816048e-05, "loss": 0.0002, "step": 5142 }, { "epoch": 7.44, "learning_rate": 5.682585624698504e-05, "loss": 0.0328, "step": 5143 }, { "epoch": 7.44, "learning_rate": 5.679369673580962e-05, "loss": 0.0048, "step": 5144 }, { "epoch": 7.44, "learning_rate": 5.6761537224634184e-05, "loss": 0.0011, "step": 5145 }, { "epoch": 7.44, "learning_rate": 5.6729377713458755e-05, "loss": 0.0396, "step": 5146 }, { "epoch": 7.44, "learning_rate": 5.669721820228333e-05, "loss": 0.0012, "step": 5147 }, { "epoch": 7.44, "learning_rate": 5.6665058691107896e-05, "loss": 0.0203, "step": 5148 }, { "epoch": 7.45, "learning_rate": 5.6632899179932466e-05, "loss": 0.0062, "step": 5149 }, { "epoch": 7.45, "learning_rate": 5.6600739668757044e-05, "loss": 0.0099, "step": 5150 }, { "epoch": 7.45, "learning_rate": 5.656858015758161e-05, "loss": 0.0014, "step": 5151 }, { "epoch": 7.45, "learning_rate": 5.653642064640617e-05, "loss": 0.0612, "step": 5152 }, { "epoch": 7.45, "learning_rate": 5.650426113523075e-05, "loss": 0.0357, "step": 5153 }, { "epoch": 7.45, "learning_rate": 5.647210162405532e-05, "loss": 0.0015, "step": 5154 }, { "epoch": 7.45, "learning_rate": 5.643994211287988e-05, "loss": 0.0097, "step": 5155 }, { "epoch": 7.46, "learning_rate": 5.640778260170446e-05, "loss": 0.0402, "step": 5156 }, { "epoch": 7.46, "learning_rate": 5.637562309052903e-05, "loss": 0.0, "step": 5157 }, { "epoch": 7.46, "learning_rate": 5.6343463579353594e-05, "loss": 0.0958, "step": 5158 }, { "epoch": 7.46, "learning_rate": 5.631130406817816e-05, "loss": 0.0046, "step": 5159 }, { "epoch": 7.46, "learning_rate": 5.627914455700274e-05, "loss": 0.0472, "step": 5160 }, { "epoch": 7.46, "learning_rate": 5.6246985045827306e-05, "loss": 0.0044, "step": 5161 }, { "epoch": 7.46, "learning_rate": 5.621482553465187e-05, "loss": 0.1125, "step": 5162 }, { "epoch": 7.47, "learning_rate": 5.618266602347645e-05, "loss": 0.0007, "step": 5163 }, { "epoch": 7.47, "learning_rate": 5.615050651230102e-05, "loss": 0.0024, "step": 5164 }, { "epoch": 7.47, "learning_rate": 5.611834700112558e-05, "loss": 0.0829, "step": 5165 }, { "epoch": 7.47, "learning_rate": 5.608618748995016e-05, "loss": 0.0075, "step": 5166 }, { "epoch": 7.47, "learning_rate": 5.605402797877473e-05, "loss": 0.0199, "step": 5167 }, { "epoch": 7.47, "learning_rate": 5.602186846759929e-05, "loss": 0.0339, "step": 5168 }, { "epoch": 7.48, "learning_rate": 5.598970895642387e-05, "loss": 0.0879, "step": 5169 }, { "epoch": 7.48, "learning_rate": 5.5957549445248434e-05, "loss": 0.0083, "step": 5170 }, { "epoch": 7.48, "learning_rate": 5.5925389934073004e-05, "loss": 0.0031, "step": 5171 }, { "epoch": 7.48, "learning_rate": 5.589323042289758e-05, "loss": 0.0044, "step": 5172 }, { "epoch": 7.48, "learning_rate": 5.5861070911722145e-05, "loss": 0.0393, "step": 5173 }, { "epoch": 7.48, "learning_rate": 5.5828911400546716e-05, "loss": 0.002, "step": 5174 }, { "epoch": 7.48, "learning_rate": 5.579675188937128e-05, "loss": 0.066, "step": 5175 }, { "epoch": 7.49, "learning_rate": 5.576459237819586e-05, "loss": 0.0193, "step": 5176 }, { "epoch": 7.49, "learning_rate": 5.573243286702042e-05, "loss": 0.0179, "step": 5177 }, { "epoch": 7.49, "learning_rate": 5.570027335584499e-05, "loss": 0.0037, "step": 5178 }, { "epoch": 7.49, "learning_rate": 5.566811384466957e-05, "loss": 0.0704, "step": 5179 }, { "epoch": 7.49, "learning_rate": 5.563595433349413e-05, "loss": 0.0281, "step": 5180 }, { "epoch": 7.49, "learning_rate": 5.56037948223187e-05, "loss": 0.0633, "step": 5181 }, { "epoch": 7.49, "learning_rate": 5.557163531114328e-05, "loss": 0.0077, "step": 5182 }, { "epoch": 7.5, "learning_rate": 5.5539475799967843e-05, "loss": 0.0031, "step": 5183 }, { "epoch": 7.5, "learning_rate": 5.550731628879241e-05, "loss": 0.0434, "step": 5184 }, { "epoch": 7.5, "learning_rate": 5.5475156777616984e-05, "loss": 0.0052, "step": 5185 }, { "epoch": 7.5, "learning_rate": 5.5442997266441555e-05, "loss": 0.0402, "step": 5186 }, { "epoch": 7.5, "learning_rate": 5.541083775526612e-05, "loss": 0.0532, "step": 5187 }, { "epoch": 7.5, "learning_rate": 5.537867824409069e-05, "loss": 0.0118, "step": 5188 }, { "epoch": 7.5, "learning_rate": 5.5346518732915267e-05, "loss": 0.012, "step": 5189 }, { "epoch": 7.51, "learning_rate": 5.531435922173983e-05, "loss": 0.0181, "step": 5190 }, { "epoch": 7.51, "learning_rate": 5.5282199710564394e-05, "loss": 0.0867, "step": 5191 }, { "epoch": 7.51, "learning_rate": 5.525004019938898e-05, "loss": 0.0718, "step": 5192 }, { "epoch": 7.51, "learning_rate": 5.521788068821354e-05, "loss": 0.0131, "step": 5193 }, { "epoch": 7.51, "learning_rate": 5.5185721177038106e-05, "loss": 0.0003, "step": 5194 }, { "epoch": 7.51, "learning_rate": 5.515356166586268e-05, "loss": 0.0155, "step": 5195 }, { "epoch": 7.51, "learning_rate": 5.512140215468725e-05, "loss": 0.0153, "step": 5196 }, { "epoch": 7.52, "learning_rate": 5.508924264351182e-05, "loss": 0.0368, "step": 5197 }, { "epoch": 7.52, "learning_rate": 5.5057083132336394e-05, "loss": 0.0226, "step": 5198 }, { "epoch": 7.52, "learning_rate": 5.5024923621160965e-05, "loss": 0.0003, "step": 5199 }, { "epoch": 7.52, "learning_rate": 5.499276410998553e-05, "loss": 0.0425, "step": 5200 }, { "epoch": 7.52, "learning_rate": 5.4960604598810106e-05, "loss": 0.0065, "step": 5201 }, { "epoch": 7.52, "learning_rate": 5.492844508763467e-05, "loss": 0.008, "step": 5202 }, { "epoch": 7.52, "learning_rate": 5.489628557645924e-05, "loss": 0.1279, "step": 5203 }, { "epoch": 7.53, "learning_rate": 5.4864126065283804e-05, "loss": 0.0321, "step": 5204 }, { "epoch": 7.53, "learning_rate": 5.483196655410838e-05, "loss": 0.0027, "step": 5205 }, { "epoch": 7.53, "learning_rate": 5.479980704293295e-05, "loss": 0.0363, "step": 5206 }, { "epoch": 7.53, "learning_rate": 5.4767647531757515e-05, "loss": 0.0059, "step": 5207 }, { "epoch": 7.53, "learning_rate": 5.473548802058209e-05, "loss": 0.0576, "step": 5208 }, { "epoch": 7.53, "learning_rate": 5.4703328509406656e-05, "loss": 0.0035, "step": 5209 }, { "epoch": 7.53, "learning_rate": 5.467116899823123e-05, "loss": 0.1033, "step": 5210 }, { "epoch": 7.54, "learning_rate": 5.4639009487055804e-05, "loss": 0.0077, "step": 5211 }, { "epoch": 7.54, "learning_rate": 5.460684997588037e-05, "loss": 0.0003, "step": 5212 }, { "epoch": 7.54, "learning_rate": 5.457469046470494e-05, "loss": 0.0153, "step": 5213 }, { "epoch": 7.54, "learning_rate": 5.4542530953529516e-05, "loss": 0.0074, "step": 5214 }, { "epoch": 7.54, "learning_rate": 5.451037144235408e-05, "loss": 0.0172, "step": 5215 }, { "epoch": 7.54, "learning_rate": 5.447821193117864e-05, "loss": 0.0105, "step": 5216 }, { "epoch": 7.54, "learning_rate": 5.444605242000323e-05, "loss": 0.027, "step": 5217 }, { "epoch": 7.55, "learning_rate": 5.441389290882779e-05, "loss": 0.0027, "step": 5218 }, { "epoch": 7.55, "learning_rate": 5.4381733397652355e-05, "loss": 0.0434, "step": 5219 }, { "epoch": 7.55, "learning_rate": 5.4349573886476925e-05, "loss": 0.0977, "step": 5220 }, { "epoch": 7.55, "learning_rate": 5.43174143753015e-05, "loss": 0.0046, "step": 5221 }, { "epoch": 7.55, "learning_rate": 5.4285254864126066e-05, "loss": 0.0012, "step": 5222 }, { "epoch": 7.55, "learning_rate": 5.425309535295064e-05, "loss": 0.0041, "step": 5223 }, { "epoch": 7.55, "learning_rate": 5.4220935841775214e-05, "loss": 0.0, "step": 5224 }, { "epoch": 7.56, "learning_rate": 5.418877633059978e-05, "loss": 0.0019, "step": 5225 }, { "epoch": 7.56, "learning_rate": 5.415661681942434e-05, "loss": 0.0187, "step": 5226 }, { "epoch": 7.56, "learning_rate": 5.412445730824892e-05, "loss": 0.1328, "step": 5227 }, { "epoch": 7.56, "learning_rate": 5.409229779707349e-05, "loss": 0.0, "step": 5228 }, { "epoch": 7.56, "learning_rate": 5.406013828589805e-05, "loss": 0.043, "step": 5229 }, { "epoch": 7.56, "learning_rate": 5.402797877472263e-05, "loss": 0.0564, "step": 5230 }, { "epoch": 7.56, "learning_rate": 5.39958192635472e-05, "loss": 0.1058, "step": 5231 }, { "epoch": 7.57, "learning_rate": 5.3963659752371765e-05, "loss": 0.0017, "step": 5232 }, { "epoch": 7.57, "learning_rate": 5.393150024119633e-05, "loss": 0.0001, "step": 5233 }, { "epoch": 7.57, "learning_rate": 5.3899340730020906e-05, "loss": 0.0067, "step": 5234 }, { "epoch": 7.57, "learning_rate": 5.3867181218845476e-05, "loss": 0.014, "step": 5235 }, { "epoch": 7.57, "learning_rate": 5.383502170767004e-05, "loss": 0.0757, "step": 5236 }, { "epoch": 7.57, "learning_rate": 5.380286219649462e-05, "loss": 0.0313, "step": 5237 }, { "epoch": 7.57, "learning_rate": 5.377070268531919e-05, "loss": 0.0104, "step": 5238 }, { "epoch": 7.58, "learning_rate": 5.373854317414375e-05, "loss": 0.0, "step": 5239 }, { "epoch": 7.58, "learning_rate": 5.370638366296833e-05, "loss": 0.0052, "step": 5240 }, { "epoch": 7.58, "learning_rate": 5.367422415179289e-05, "loss": 0.0089, "step": 5241 }, { "epoch": 7.58, "learning_rate": 5.364206464061746e-05, "loss": 0.0534, "step": 5242 }, { "epoch": 7.58, "learning_rate": 5.360990512944204e-05, "loss": 0.0, "step": 5243 }, { "epoch": 7.58, "learning_rate": 5.3577745618266604e-05, "loss": 0.0048, "step": 5244 }, { "epoch": 7.58, "learning_rate": 5.3545586107091175e-05, "loss": 0.0044, "step": 5245 }, { "epoch": 7.59, "learning_rate": 5.351342659591575e-05, "loss": 0.0056, "step": 5246 }, { "epoch": 7.59, "learning_rate": 5.3481267084740316e-05, "loss": 0.0007, "step": 5247 }, { "epoch": 7.59, "learning_rate": 5.3449107573564886e-05, "loss": 0.0003, "step": 5248 }, { "epoch": 7.59, "learning_rate": 5.341694806238945e-05, "loss": 0.001, "step": 5249 }, { "epoch": 7.59, "learning_rate": 5.338478855121403e-05, "loss": 0.0003, "step": 5250 }, { "epoch": 7.59, "learning_rate": 5.335262904003859e-05, "loss": 0.0384, "step": 5251 }, { "epoch": 7.6, "learning_rate": 5.332046952886316e-05, "loss": 0.0, "step": 5252 }, { "epoch": 7.6, "learning_rate": 5.328831001768774e-05, "loss": 0.0757, "step": 5253 }, { "epoch": 7.6, "learning_rate": 5.32561505065123e-05, "loss": 0.0318, "step": 5254 }, { "epoch": 7.6, "learning_rate": 5.322399099533687e-05, "loss": 0.0266, "step": 5255 }, { "epoch": 7.6, "learning_rate": 5.319183148416145e-05, "loss": 0.004, "step": 5256 }, { "epoch": 7.6, "learning_rate": 5.3159671972986014e-05, "loss": 0.0003, "step": 5257 }, { "epoch": 7.6, "learning_rate": 5.312751246181058e-05, "loss": 0.0822, "step": 5258 }, { "epoch": 7.61, "learning_rate": 5.3095352950635155e-05, "loss": 0.0003, "step": 5259 }, { "epoch": 7.61, "learning_rate": 5.3063193439459725e-05, "loss": 0.0051, "step": 5260 }, { "epoch": 7.61, "learning_rate": 5.303103392828429e-05, "loss": 0.069, "step": 5261 }, { "epoch": 7.61, "learning_rate": 5.2998874417108867e-05, "loss": 0.0134, "step": 5262 }, { "epoch": 7.61, "learning_rate": 5.296671490593344e-05, "loss": 0.0442, "step": 5263 }, { "epoch": 7.61, "learning_rate": 5.2934555394758e-05, "loss": 0.0024, "step": 5264 }, { "epoch": 7.61, "learning_rate": 5.2902395883582564e-05, "loss": 0.0346, "step": 5265 }, { "epoch": 7.62, "learning_rate": 5.287023637240714e-05, "loss": 0.0178, "step": 5266 }, { "epoch": 7.62, "learning_rate": 5.283807686123171e-05, "loss": 0.0051, "step": 5267 }, { "epoch": 7.62, "learning_rate": 5.2805917350056276e-05, "loss": 0.0535, "step": 5268 }, { "epoch": 7.62, "learning_rate": 5.277375783888085e-05, "loss": 0.0826, "step": 5269 }, { "epoch": 7.62, "learning_rate": 5.2741598327705424e-05, "loss": 0.0526, "step": 5270 }, { "epoch": 7.62, "learning_rate": 5.270943881652999e-05, "loss": 0.053, "step": 5271 }, { "epoch": 7.62, "learning_rate": 5.2677279305354565e-05, "loss": 0.0035, "step": 5272 }, { "epoch": 7.63, "learning_rate": 5.264511979417913e-05, "loss": 0.0, "step": 5273 }, { "epoch": 7.63, "learning_rate": 5.26129602830037e-05, "loss": 0.0477, "step": 5274 }, { "epoch": 7.63, "learning_rate": 5.2580800771828276e-05, "loss": 0.0034, "step": 5275 }, { "epoch": 7.63, "learning_rate": 5.254864126065284e-05, "loss": 0.002, "step": 5276 }, { "epoch": 7.63, "learning_rate": 5.251648174947741e-05, "loss": 0.0372, "step": 5277 }, { "epoch": 7.63, "learning_rate": 5.2484322238301974e-05, "loss": 0.0759, "step": 5278 }, { "epoch": 7.63, "learning_rate": 5.245216272712655e-05, "loss": 0.0011, "step": 5279 }, { "epoch": 7.64, "learning_rate": 5.242000321595112e-05, "loss": 0.0188, "step": 5280 }, { "epoch": 7.64, "learning_rate": 5.2387843704775686e-05, "loss": 0.0084, "step": 5281 }, { "epoch": 7.64, "learning_rate": 5.235568419360026e-05, "loss": 0.0286, "step": 5282 }, { "epoch": 7.64, "learning_rate": 5.232352468242483e-05, "loss": 0.0751, "step": 5283 }, { "epoch": 7.64, "learning_rate": 5.22913651712494e-05, "loss": 0.0032, "step": 5284 }, { "epoch": 7.64, "learning_rate": 5.2259205660073975e-05, "loss": 0.0054, "step": 5285 }, { "epoch": 7.64, "learning_rate": 5.222704614889854e-05, "loss": 0.0035, "step": 5286 }, { "epoch": 7.65, "learning_rate": 5.219488663772311e-05, "loss": 0.0012, "step": 5287 }, { "epoch": 7.65, "learning_rate": 5.2162727126547686e-05, "loss": 0.0, "step": 5288 }, { "epoch": 7.65, "learning_rate": 5.213056761537225e-05, "loss": 0.0004, "step": 5289 }, { "epoch": 7.65, "learning_rate": 5.2098408104196814e-05, "loss": 0.0105, "step": 5290 }, { "epoch": 7.65, "learning_rate": 5.206624859302139e-05, "loss": 0.0, "step": 5291 }, { "epoch": 7.65, "learning_rate": 5.203408908184596e-05, "loss": 0.0235, "step": 5292 }, { "epoch": 7.65, "learning_rate": 5.2001929570670525e-05, "loss": 0.0009, "step": 5293 }, { "epoch": 7.66, "learning_rate": 5.1969770059495096e-05, "loss": 0.0396, "step": 5294 }, { "epoch": 7.66, "learning_rate": 5.193761054831967e-05, "loss": 0.0767, "step": 5295 }, { "epoch": 7.66, "learning_rate": 5.190545103714424e-05, "loss": 0.0176, "step": 5296 }, { "epoch": 7.66, "learning_rate": 5.18732915259688e-05, "loss": 0.0063, "step": 5297 }, { "epoch": 7.66, "learning_rate": 5.184113201479338e-05, "loss": 0.0978, "step": 5298 }, { "epoch": 7.66, "learning_rate": 5.180897250361795e-05, "loss": 0.0192, "step": 5299 }, { "epoch": 7.66, "learning_rate": 5.177681299244251e-05, "loss": 0.0195, "step": 5300 }, { "epoch": 7.67, "learning_rate": 5.174465348126709e-05, "loss": 0.0085, "step": 5301 }, { "epoch": 7.67, "learning_rate": 5.171249397009166e-05, "loss": 0.1357, "step": 5302 }, { "epoch": 7.67, "learning_rate": 5.1680334458916224e-05, "loss": 0.0158, "step": 5303 }, { "epoch": 7.67, "learning_rate": 5.16481749477408e-05, "loss": 0.0059, "step": 5304 }, { "epoch": 7.67, "learning_rate": 5.161601543656537e-05, "loss": 0.0232, "step": 5305 }, { "epoch": 7.67, "learning_rate": 5.1583855925389935e-05, "loss": 0.0051, "step": 5306 }, { "epoch": 7.67, "learning_rate": 5.15516964142145e-05, "loss": 0.0154, "step": 5307 }, { "epoch": 7.68, "learning_rate": 5.1519536903039076e-05, "loss": 0.004, "step": 5308 }, { "epoch": 7.68, "learning_rate": 5.148737739186365e-05, "loss": 0.0716, "step": 5309 }, { "epoch": 7.68, "learning_rate": 5.145521788068821e-05, "loss": 0.0001, "step": 5310 }, { "epoch": 7.68, "learning_rate": 5.142305836951279e-05, "loss": 0.0073, "step": 5311 }, { "epoch": 7.68, "learning_rate": 5.139089885833736e-05, "loss": 0.0055, "step": 5312 }, { "epoch": 7.68, "learning_rate": 5.135873934716192e-05, "loss": 0.0278, "step": 5313 }, { "epoch": 7.68, "learning_rate": 5.13265798359865e-05, "loss": 0.0248, "step": 5314 }, { "epoch": 7.69, "learning_rate": 5.129442032481106e-05, "loss": 0.0003, "step": 5315 }, { "epoch": 7.69, "learning_rate": 5.1262260813635634e-05, "loss": 0.0028, "step": 5316 }, { "epoch": 7.69, "learning_rate": 5.123010130246021e-05, "loss": 0.0271, "step": 5317 }, { "epoch": 7.69, "learning_rate": 5.1197941791284775e-05, "loss": 0.0006, "step": 5318 }, { "epoch": 7.69, "learning_rate": 5.1165782280109345e-05, "loss": 0.0001, "step": 5319 }, { "epoch": 7.69, "learning_rate": 5.113362276893392e-05, "loss": 0.0, "step": 5320 }, { "epoch": 7.69, "learning_rate": 5.1101463257758486e-05, "loss": 0.0602, "step": 5321 }, { "epoch": 7.7, "learning_rate": 5.106930374658305e-05, "loss": 0.0391, "step": 5322 }, { "epoch": 7.7, "learning_rate": 5.103714423540762e-05, "loss": 0.0013, "step": 5323 }, { "epoch": 7.7, "learning_rate": 5.10049847242322e-05, "loss": 0.0075, "step": 5324 }, { "epoch": 7.7, "learning_rate": 5.097282521305676e-05, "loss": 0.0041, "step": 5325 }, { "epoch": 7.7, "learning_rate": 5.094066570188133e-05, "loss": 0.0152, "step": 5326 }, { "epoch": 7.7, "learning_rate": 5.090850619070591e-05, "loss": 0.0001, "step": 5327 }, { "epoch": 7.7, "learning_rate": 5.087634667953047e-05, "loss": 0.0412, "step": 5328 }, { "epoch": 7.71, "learning_rate": 5.084418716835504e-05, "loss": 0.0066, "step": 5329 }, { "epoch": 7.71, "learning_rate": 5.0812027657179614e-05, "loss": 0.0072, "step": 5330 }, { "epoch": 7.71, "learning_rate": 5.0779868146004184e-05, "loss": 0.0294, "step": 5331 }, { "epoch": 7.71, "learning_rate": 5.074770863482875e-05, "loss": 0.055, "step": 5332 }, { "epoch": 7.71, "learning_rate": 5.0715549123653325e-05, "loss": 0.0095, "step": 5333 }, { "epoch": 7.71, "learning_rate": 5.0683389612477896e-05, "loss": 0.0005, "step": 5334 }, { "epoch": 7.72, "learning_rate": 5.065123010130246e-05, "loss": 0.0283, "step": 5335 }, { "epoch": 7.72, "learning_rate": 5.061907059012704e-05, "loss": 0.0164, "step": 5336 }, { "epoch": 7.72, "learning_rate": 5.058691107895161e-05, "loss": 0.005, "step": 5337 }, { "epoch": 7.72, "learning_rate": 5.055475156777617e-05, "loss": 0.0034, "step": 5338 }, { "epoch": 7.72, "learning_rate": 5.0522592056600735e-05, "loss": 0.0023, "step": 5339 }, { "epoch": 7.72, "learning_rate": 5.049043254542531e-05, "loss": 0.0002, "step": 5340 }, { "epoch": 7.72, "learning_rate": 5.045827303424988e-05, "loss": 0.0005, "step": 5341 }, { "epoch": 7.73, "learning_rate": 5.0426113523074446e-05, "loss": 0.0012, "step": 5342 }, { "epoch": 7.73, "learning_rate": 5.0393954011899024e-05, "loss": 0.0001, "step": 5343 }, { "epoch": 7.73, "learning_rate": 5.0361794500723594e-05, "loss": 0.0015, "step": 5344 }, { "epoch": 7.73, "learning_rate": 5.032963498954816e-05, "loss": 0.0031, "step": 5345 }, { "epoch": 7.73, "learning_rate": 5.0297475478372735e-05, "loss": 0.0292, "step": 5346 }, { "epoch": 7.73, "learning_rate": 5.02653159671973e-05, "loss": 0.0084, "step": 5347 }, { "epoch": 7.73, "learning_rate": 5.023315645602187e-05, "loss": 0.0226, "step": 5348 }, { "epoch": 7.74, "learning_rate": 5.020099694484645e-05, "loss": 0.0117, "step": 5349 }, { "epoch": 7.74, "learning_rate": 5.016883743367101e-05, "loss": 0.0019, "step": 5350 }, { "epoch": 7.74, "learning_rate": 5.013667792249558e-05, "loss": 0.0294, "step": 5351 }, { "epoch": 7.74, "learning_rate": 5.0104518411320145e-05, "loss": 0.0, "step": 5352 }, { "epoch": 7.74, "learning_rate": 5.007235890014472e-05, "loss": 0.0134, "step": 5353 }, { "epoch": 7.74, "learning_rate": 5.0040199388969286e-05, "loss": 0.004, "step": 5354 }, { "epoch": 7.74, "learning_rate": 5.0008039877793856e-05, "loss": 0.0109, "step": 5355 }, { "epoch": 7.75, "learning_rate": 4.997588036661843e-05, "loss": 0.0542, "step": 5356 }, { "epoch": 7.75, "learning_rate": 4.9943720855443e-05, "loss": 0.0196, "step": 5357 }, { "epoch": 7.75, "learning_rate": 4.991156134426757e-05, "loss": 0.0066, "step": 5358 }, { "epoch": 7.75, "learning_rate": 4.987940183309214e-05, "loss": 0.0039, "step": 5359 }, { "epoch": 7.75, "learning_rate": 4.984724232191671e-05, "loss": 0.0378, "step": 5360 }, { "epoch": 7.75, "learning_rate": 4.981508281074128e-05, "loss": 0.0, "step": 5361 }, { "epoch": 7.75, "learning_rate": 4.978292329956585e-05, "loss": 0.0004, "step": 5362 }, { "epoch": 7.76, "learning_rate": 4.975076378839042e-05, "loss": 0.0052, "step": 5363 }, { "epoch": 7.76, "learning_rate": 4.9718604277214984e-05, "loss": 0.0013, "step": 5364 }, { "epoch": 7.76, "learning_rate": 4.968644476603956e-05, "loss": 0.0218, "step": 5365 }, { "epoch": 7.76, "learning_rate": 4.965428525486413e-05, "loss": 0.0, "step": 5366 }, { "epoch": 7.76, "learning_rate": 4.9622125743688696e-05, "loss": 0.0206, "step": 5367 }, { "epoch": 7.76, "learning_rate": 4.9589966232513266e-05, "loss": 0.0072, "step": 5368 }, { "epoch": 7.76, "learning_rate": 4.9557806721337844e-05, "loss": 0.0131, "step": 5369 }, { "epoch": 7.77, "learning_rate": 4.952564721016241e-05, "loss": 0.0023, "step": 5370 }, { "epoch": 7.77, "learning_rate": 4.949348769898698e-05, "loss": 0.0029, "step": 5371 }, { "epoch": 7.77, "learning_rate": 4.946132818781155e-05, "loss": 0.0079, "step": 5372 }, { "epoch": 7.77, "learning_rate": 4.942916867663612e-05, "loss": 0.0399, "step": 5373 }, { "epoch": 7.77, "learning_rate": 4.939700916546069e-05, "loss": 0.0068, "step": 5374 }, { "epoch": 7.77, "learning_rate": 4.936484965428525e-05, "loss": 0.03, "step": 5375 }, { "epoch": 7.77, "learning_rate": 4.933269014310983e-05, "loss": 0.0205, "step": 5376 }, { "epoch": 7.78, "learning_rate": 4.93005306319344e-05, "loss": 0.0269, "step": 5377 }, { "epoch": 7.78, "learning_rate": 4.9268371120758965e-05, "loss": 0.0069, "step": 5378 }, { "epoch": 7.78, "learning_rate": 4.9236211609583535e-05, "loss": 0.0127, "step": 5379 }, { "epoch": 7.78, "learning_rate": 4.9204052098408106e-05, "loss": 0.0808, "step": 5380 }, { "epoch": 7.78, "learning_rate": 4.9171892587232676e-05, "loss": 0.0026, "step": 5381 }, { "epoch": 7.78, "learning_rate": 4.913973307605725e-05, "loss": 0.0233, "step": 5382 }, { "epoch": 7.78, "learning_rate": 4.910757356488182e-05, "loss": 0.0457, "step": 5383 }, { "epoch": 7.79, "learning_rate": 4.907541405370639e-05, "loss": 0.0001, "step": 5384 }, { "epoch": 7.79, "learning_rate": 4.904325454253096e-05, "loss": 0.0691, "step": 5385 }, { "epoch": 7.79, "learning_rate": 4.901109503135552e-05, "loss": 0.0002, "step": 5386 }, { "epoch": 7.79, "learning_rate": 4.89789355201801e-05, "loss": 0.0204, "step": 5387 }, { "epoch": 7.79, "learning_rate": 4.894677600900466e-05, "loss": 0.0049, "step": 5388 }, { "epoch": 7.79, "learning_rate": 4.8914616497829233e-05, "loss": 0.0238, "step": 5389 }, { "epoch": 7.79, "learning_rate": 4.8882456986653804e-05, "loss": 0.0016, "step": 5390 }, { "epoch": 7.8, "learning_rate": 4.8850297475478374e-05, "loss": 0.003, "step": 5391 }, { "epoch": 7.8, "learning_rate": 4.8818137964302945e-05, "loss": 0.0058, "step": 5392 }, { "epoch": 7.8, "learning_rate": 4.8785978453127516e-05, "loss": 0.0002, "step": 5393 }, { "epoch": 7.8, "learning_rate": 4.8753818941952086e-05, "loss": 0.0121, "step": 5394 }, { "epoch": 7.8, "learning_rate": 4.8721659430776657e-05, "loss": 0.0004, "step": 5395 }, { "epoch": 7.8, "learning_rate": 4.868949991960122e-05, "loss": 0.0032, "step": 5396 }, { "epoch": 7.8, "learning_rate": 4.86573404084258e-05, "loss": 0.0459, "step": 5397 }, { "epoch": 7.81, "learning_rate": 4.862518089725037e-05, "loss": 0.0405, "step": 5398 }, { "epoch": 7.81, "learning_rate": 4.859302138607493e-05, "loss": 0.0084, "step": 5399 }, { "epoch": 7.81, "learning_rate": 4.85608618748995e-05, "loss": 0.0023, "step": 5400 }, { "epoch": 7.81, "learning_rate": 4.852870236372407e-05, "loss": 0.0613, "step": 5401 }, { "epoch": 7.81, "learning_rate": 4.849654285254864e-05, "loss": 0.0159, "step": 5402 }, { "epoch": 7.81, "learning_rate": 4.8464383341373214e-05, "loss": 0.0188, "step": 5403 }, { "epoch": 7.81, "learning_rate": 4.8432223830197784e-05, "loss": 0.0011, "step": 5404 }, { "epoch": 7.82, "learning_rate": 4.8400064319022355e-05, "loss": 0.0033, "step": 5405 }, { "epoch": 7.82, "learning_rate": 4.8367904807846925e-05, "loss": 0.0779, "step": 5406 }, { "epoch": 7.82, "learning_rate": 4.833574529667149e-05, "loss": 0.0096, "step": 5407 }, { "epoch": 7.82, "learning_rate": 4.8303585785496066e-05, "loss": 0.0278, "step": 5408 }, { "epoch": 7.82, "learning_rate": 4.827142627432063e-05, "loss": 0.0471, "step": 5409 }, { "epoch": 7.82, "learning_rate": 4.82392667631452e-05, "loss": 0.0064, "step": 5410 }, { "epoch": 7.83, "learning_rate": 4.820710725196977e-05, "loss": 0.001, "step": 5411 }, { "epoch": 7.83, "learning_rate": 4.817494774079434e-05, "loss": 0.1589, "step": 5412 }, { "epoch": 7.83, "learning_rate": 4.814278822961891e-05, "loss": 0.068, "step": 5413 }, { "epoch": 7.83, "learning_rate": 4.811062871844348e-05, "loss": 0.0023, "step": 5414 }, { "epoch": 7.83, "learning_rate": 4.807846920726805e-05, "loss": 0.0397, "step": 5415 }, { "epoch": 7.83, "learning_rate": 4.8046309696092624e-05, "loss": 0.0596, "step": 5416 }, { "epoch": 7.83, "learning_rate": 4.801415018491719e-05, "loss": 0.0009, "step": 5417 }, { "epoch": 7.84, "learning_rate": 4.798199067374176e-05, "loss": 0.0002, "step": 5418 }, { "epoch": 7.84, "learning_rate": 4.7949831162566335e-05, "loss": 0.0327, "step": 5419 }, { "epoch": 7.84, "learning_rate": 4.79176716513909e-05, "loss": 0.0874, "step": 5420 }, { "epoch": 7.84, "learning_rate": 4.788551214021547e-05, "loss": 0.0045, "step": 5421 }, { "epoch": 7.84, "learning_rate": 4.785335262904005e-05, "loss": 0.004, "step": 5422 }, { "epoch": 7.84, "learning_rate": 4.782119311786461e-05, "loss": 0.0553, "step": 5423 }, { "epoch": 7.84, "learning_rate": 4.778903360668918e-05, "loss": 0.0469, "step": 5424 }, { "epoch": 7.85, "learning_rate": 4.775687409551375e-05, "loss": 0.0003, "step": 5425 }, { "epoch": 7.85, "learning_rate": 4.772471458433832e-05, "loss": 0.0319, "step": 5426 }, { "epoch": 7.85, "learning_rate": 4.769255507316289e-05, "loss": 0.0974, "step": 5427 }, { "epoch": 7.85, "learning_rate": 4.7660395561987456e-05, "loss": 0.0018, "step": 5428 }, { "epoch": 7.85, "learning_rate": 4.7628236050812034e-05, "loss": 0.0022, "step": 5429 }, { "epoch": 7.85, "learning_rate": 4.75960765396366e-05, "loss": 0.0007, "step": 5430 }, { "epoch": 7.85, "learning_rate": 4.756391702846117e-05, "loss": 0.0031, "step": 5431 }, { "epoch": 7.86, "learning_rate": 4.753175751728574e-05, "loss": 0.0137, "step": 5432 }, { "epoch": 7.86, "learning_rate": 4.749959800611031e-05, "loss": 0.2048, "step": 5433 }, { "epoch": 7.86, "learning_rate": 4.746743849493488e-05, "loss": 0.0135, "step": 5434 }, { "epoch": 7.86, "learning_rate": 4.743527898375945e-05, "loss": 0.0204, "step": 5435 }, { "epoch": 7.86, "learning_rate": 4.740311947258402e-05, "loss": 0.0001, "step": 5436 }, { "epoch": 7.86, "learning_rate": 4.737095996140859e-05, "loss": 0.0033, "step": 5437 }, { "epoch": 7.86, "learning_rate": 4.7338800450233155e-05, "loss": 0.0335, "step": 5438 }, { "epoch": 7.87, "learning_rate": 4.7306640939057725e-05, "loss": 0.0041, "step": 5439 }, { "epoch": 7.87, "learning_rate": 4.72744814278823e-05, "loss": 0.0552, "step": 5440 }, { "epoch": 7.87, "learning_rate": 4.7242321916706866e-05, "loss": 0.0167, "step": 5441 }, { "epoch": 7.87, "learning_rate": 4.721016240553144e-05, "loss": 0.055, "step": 5442 }, { "epoch": 7.87, "learning_rate": 4.717800289435601e-05, "loss": 0.0112, "step": 5443 }, { "epoch": 7.87, "learning_rate": 4.714584338318058e-05, "loss": 0.0021, "step": 5444 }, { "epoch": 7.87, "learning_rate": 4.711368387200515e-05, "loss": 0.0315, "step": 5445 }, { "epoch": 7.88, "learning_rate": 4.708152436082971e-05, "loss": 0.058, "step": 5446 }, { "epoch": 7.88, "learning_rate": 4.704936484965429e-05, "loss": 0.0173, "step": 5447 }, { "epoch": 7.88, "learning_rate": 4.701720533847886e-05, "loss": 0.0612, "step": 5448 }, { "epoch": 7.88, "learning_rate": 4.6985045827303424e-05, "loss": 0.0427, "step": 5449 }, { "epoch": 7.88, "learning_rate": 4.6952886316128e-05, "loss": 0.06, "step": 5450 }, { "epoch": 7.88, "learning_rate": 4.692072680495257e-05, "loss": 0.0146, "step": 5451 }, { "epoch": 7.88, "learning_rate": 4.6888567293777135e-05, "loss": 0.0493, "step": 5452 }, { "epoch": 7.89, "learning_rate": 4.6856407782601706e-05, "loss": 0.0233, "step": 5453 }, { "epoch": 7.89, "learning_rate": 4.6824248271426276e-05, "loss": 0.0005, "step": 5454 }, { "epoch": 7.89, "learning_rate": 4.679208876025085e-05, "loss": 0.0173, "step": 5455 }, { "epoch": 7.89, "learning_rate": 4.675992924907542e-05, "loss": 0.0796, "step": 5456 }, { "epoch": 7.89, "learning_rate": 4.672776973789999e-05, "loss": 0.0044, "step": 5457 }, { "epoch": 7.89, "learning_rate": 4.669561022672456e-05, "loss": 0.0019, "step": 5458 }, { "epoch": 7.89, "learning_rate": 4.666345071554913e-05, "loss": 0.0292, "step": 5459 }, { "epoch": 7.9, "learning_rate": 4.663129120437369e-05, "loss": 0.0066, "step": 5460 }, { "epoch": 7.9, "learning_rate": 4.659913169319827e-05, "loss": 0.0588, "step": 5461 }, { "epoch": 7.9, "learning_rate": 4.6566972182022833e-05, "loss": 0.0029, "step": 5462 }, { "epoch": 7.9, "learning_rate": 4.6534812670847404e-05, "loss": 0.066, "step": 5463 }, { "epoch": 7.9, "learning_rate": 4.6502653159671974e-05, "loss": 0.0012, "step": 5464 }, { "epoch": 7.9, "learning_rate": 4.6470493648496545e-05, "loss": 0.052, "step": 5465 }, { "epoch": 7.9, "learning_rate": 4.6438334137321115e-05, "loss": 0.0073, "step": 5466 }, { "epoch": 7.91, "learning_rate": 4.6406174626145686e-05, "loss": 0.0015, "step": 5467 }, { "epoch": 7.91, "learning_rate": 4.6374015114970256e-05, "loss": 0.0001, "step": 5468 }, { "epoch": 7.91, "learning_rate": 4.634185560379483e-05, "loss": 0.054, "step": 5469 }, { "epoch": 7.91, "learning_rate": 4.630969609261939e-05, "loss": 0.0006, "step": 5470 }, { "epoch": 7.91, "learning_rate": 4.627753658144396e-05, "loss": 0.0075, "step": 5471 }, { "epoch": 7.91, "learning_rate": 4.624537707026854e-05, "loss": 0.0701, "step": 5472 }, { "epoch": 7.91, "learning_rate": 4.62132175590931e-05, "loss": 0.0331, "step": 5473 }, { "epoch": 7.92, "learning_rate": 4.618105804791767e-05, "loss": 0.064, "step": 5474 }, { "epoch": 7.92, "learning_rate": 4.614889853674224e-05, "loss": 0.0027, "step": 5475 }, { "epoch": 7.92, "learning_rate": 4.6116739025566814e-05, "loss": 0.0266, "step": 5476 }, { "epoch": 7.92, "learning_rate": 4.6084579514391384e-05, "loss": 0.059, "step": 5477 }, { "epoch": 7.92, "learning_rate": 4.605242000321595e-05, "loss": 0.0002, "step": 5478 }, { "epoch": 7.92, "learning_rate": 4.6020260492040525e-05, "loss": 0.0409, "step": 5479 }, { "epoch": 7.92, "learning_rate": 4.5988100980865096e-05, "loss": 0.0609, "step": 5480 }, { "epoch": 7.93, "learning_rate": 4.595594146968966e-05, "loss": 0.0045, "step": 5481 }, { "epoch": 7.93, "learning_rate": 4.592378195851424e-05, "loss": 0.0333, "step": 5482 }, { "epoch": 7.93, "learning_rate": 4.58916224473388e-05, "loss": 0.0164, "step": 5483 }, { "epoch": 7.93, "learning_rate": 4.585946293616337e-05, "loss": 0.0001, "step": 5484 }, { "epoch": 7.93, "learning_rate": 4.582730342498794e-05, "loss": 0.0235, "step": 5485 }, { "epoch": 7.93, "learning_rate": 4.579514391381251e-05, "loss": 0.0313, "step": 5486 }, { "epoch": 7.93, "learning_rate": 4.576298440263708e-05, "loss": 0.0022, "step": 5487 }, { "epoch": 7.94, "learning_rate": 4.573082489146165e-05, "loss": 0.0065, "step": 5488 }, { "epoch": 7.94, "learning_rate": 4.5698665380286224e-05, "loss": 0.0107, "step": 5489 }, { "epoch": 7.94, "learning_rate": 4.5666505869110794e-05, "loss": 0.0002, "step": 5490 }, { "epoch": 7.94, "learning_rate": 4.563434635793536e-05, "loss": 0.0548, "step": 5491 }, { "epoch": 7.94, "learning_rate": 4.560218684675993e-05, "loss": 0.0069, "step": 5492 }, { "epoch": 7.94, "learning_rate": 4.5570027335584506e-05, "loss": 0.0122, "step": 5493 }, { "epoch": 7.95, "learning_rate": 4.553786782440907e-05, "loss": 0.0064, "step": 5494 }, { "epoch": 7.95, "learning_rate": 4.550570831323364e-05, "loss": 0.1196, "step": 5495 }, { "epoch": 7.95, "learning_rate": 4.547354880205821e-05, "loss": 0.0, "step": 5496 }, { "epoch": 7.95, "learning_rate": 4.544138929088278e-05, "loss": 0.0294, "step": 5497 }, { "epoch": 7.95, "learning_rate": 4.540922977970735e-05, "loss": 0.0138, "step": 5498 }, { "epoch": 7.95, "learning_rate": 4.5377070268531915e-05, "loss": 0.0018, "step": 5499 }, { "epoch": 7.95, "learning_rate": 4.534491075735649e-05, "loss": 0.0454, "step": 5500 }, { "epoch": 7.96, "learning_rate": 4.531275124618106e-05, "loss": 0.0003, "step": 5501 }, { "epoch": 7.96, "learning_rate": 4.528059173500563e-05, "loss": 0.1141, "step": 5502 }, { "epoch": 7.96, "learning_rate": 4.52484322238302e-05, "loss": 0.0114, "step": 5503 }, { "epoch": 7.96, "learning_rate": 4.5216272712654775e-05, "loss": 0.012, "step": 5504 }, { "epoch": 7.96, "learning_rate": 4.518411320147934e-05, "loss": 0.0494, "step": 5505 }, { "epoch": 7.96, "learning_rate": 4.515195369030391e-05, "loss": 0.0344, "step": 5506 }, { "epoch": 7.96, "learning_rate": 4.511979417912848e-05, "loss": 0.0719, "step": 5507 }, { "epoch": 7.97, "learning_rate": 4.508763466795305e-05, "loss": 0.0512, "step": 5508 }, { "epoch": 7.97, "learning_rate": 4.505547515677762e-05, "loss": 0.01, "step": 5509 }, { "epoch": 7.97, "learning_rate": 4.502331564560219e-05, "loss": 0.0066, "step": 5510 }, { "epoch": 7.97, "learning_rate": 4.499115613442676e-05, "loss": 0.0048, "step": 5511 }, { "epoch": 7.97, "learning_rate": 4.4958996623251325e-05, "loss": 0.0066, "step": 5512 }, { "epoch": 7.97, "learning_rate": 4.4926837112075896e-05, "loss": 0.0896, "step": 5513 }, { "epoch": 7.97, "learning_rate": 4.489467760090047e-05, "loss": 0.0517, "step": 5514 }, { "epoch": 7.98, "learning_rate": 4.486251808972504e-05, "loss": 0.0013, "step": 5515 }, { "epoch": 7.98, "learning_rate": 4.483035857854961e-05, "loss": 0.0421, "step": 5516 }, { "epoch": 7.98, "learning_rate": 4.479819906737418e-05, "loss": 0.0118, "step": 5517 }, { "epoch": 7.98, "learning_rate": 4.476603955619875e-05, "loss": 0.074, "step": 5518 }, { "epoch": 7.98, "learning_rate": 4.473388004502332e-05, "loss": 0.0369, "step": 5519 }, { "epoch": 7.98, "learning_rate": 4.470172053384788e-05, "loss": 0.0052, "step": 5520 }, { "epoch": 7.98, "learning_rate": 4.466956102267246e-05, "loss": 0.0456, "step": 5521 }, { "epoch": 7.99, "learning_rate": 4.463740151149703e-05, "loss": 0.009, "step": 5522 }, { "epoch": 7.99, "learning_rate": 4.4605242000321594e-05, "loss": 0.0002, "step": 5523 }, { "epoch": 7.99, "learning_rate": 4.4573082489146165e-05, "loss": 0.0618, "step": 5524 }, { "epoch": 7.99, "learning_rate": 4.454092297797074e-05, "loss": 0.0438, "step": 5525 }, { "epoch": 7.99, "learning_rate": 4.4508763466795306e-05, "loss": 0.0016, "step": 5526 }, { "epoch": 7.99, "learning_rate": 4.4476603955619876e-05, "loss": 0.017, "step": 5527 }, { "epoch": 7.99, "learning_rate": 4.4444444444444447e-05, "loss": 0.0654, "step": 5528 }, { "epoch": 8.0, "learning_rate": 4.441228493326902e-05, "loss": 0.0001, "step": 5529 }, { "epoch": 8.0, "learning_rate": 4.438012542209359e-05, "loss": 0.0176, "step": 5530 }, { "epoch": 8.0, "learning_rate": 4.434796591091815e-05, "loss": 0.0081, "step": 5531 }, { "epoch": 8.0, "learning_rate": 4.431580639974273e-05, "loss": 0.0391, "step": 5532 }, { "epoch": 8.0, "learning_rate": 4.42836468885673e-05, "loss": 0.0115, "step": 5533 }, { "epoch": 8.0, "learning_rate": 4.425148737739186e-05, "loss": 0.0005, "step": 5534 }, { "epoch": 8.0, "learning_rate": 4.421932786621643e-05, "loss": 0.0103, "step": 5535 }, { "epoch": 8.01, "learning_rate": 4.4187168355041004e-05, "loss": 0.0006, "step": 5536 }, { "epoch": 8.01, "learning_rate": 4.4155008843865574e-05, "loss": 0.0812, "step": 5537 }, { "epoch": 8.01, "learning_rate": 4.4122849332690145e-05, "loss": 0.0002, "step": 5538 }, { "epoch": 8.01, "learning_rate": 4.4090689821514715e-05, "loss": 0.0003, "step": 5539 }, { "epoch": 8.01, "learning_rate": 4.4058530310339286e-05, "loss": 0.011, "step": 5540 }, { "epoch": 8.01, "learning_rate": 4.4026370799163856e-05, "loss": 0.0282, "step": 5541 }, { "epoch": 8.01, "learning_rate": 4.399421128798843e-05, "loss": 0.051, "step": 5542 }, { "epoch": 8.02, "learning_rate": 4.3962051776813e-05, "loss": 0.0207, "step": 5543 }, { "epoch": 8.02, "learning_rate": 4.392989226563756e-05, "loss": 0.0497, "step": 5544 }, { "epoch": 8.02, "learning_rate": 4.389773275446213e-05, "loss": 0.0004, "step": 5545 }, { "epoch": 8.02, "learning_rate": 4.386557324328671e-05, "loss": 0.0024, "step": 5546 }, { "epoch": 8.02, "learning_rate": 4.383341373211127e-05, "loss": 0.0064, "step": 5547 }, { "epoch": 8.02, "learning_rate": 4.380125422093584e-05, "loss": 0.0068, "step": 5548 }, { "epoch": 8.02, "learning_rate": 4.3769094709760414e-05, "loss": 0.0258, "step": 5549 }, { "epoch": 8.03, "learning_rate": 4.3736935198584984e-05, "loss": 0.0067, "step": 5550 }, { "epoch": 8.03, "learning_rate": 4.3704775687409555e-05, "loss": 0.0002, "step": 5551 }, { "epoch": 8.03, "learning_rate": 4.367261617623412e-05, "loss": 0.0018, "step": 5552 }, { "epoch": 8.03, "learning_rate": 4.3640456665058696e-05, "loss": 0.0149, "step": 5553 }, { "epoch": 8.03, "learning_rate": 4.3608297153883266e-05, "loss": 0.0079, "step": 5554 }, { "epoch": 8.03, "learning_rate": 4.357613764270783e-05, "loss": 0.0328, "step": 5555 }, { "epoch": 8.03, "learning_rate": 4.35439781315324e-05, "loss": 0.0522, "step": 5556 }, { "epoch": 8.04, "learning_rate": 4.351181862035697e-05, "loss": 0.0459, "step": 5557 }, { "epoch": 8.04, "learning_rate": 4.347965910918154e-05, "loss": 0.0157, "step": 5558 }, { "epoch": 8.04, "learning_rate": 4.344749959800611e-05, "loss": 0.0312, "step": 5559 }, { "epoch": 8.04, "learning_rate": 4.341534008683068e-05, "loss": 0.004, "step": 5560 }, { "epoch": 8.04, "learning_rate": 4.338318057565525e-05, "loss": 0.0734, "step": 5561 }, { "epoch": 8.04, "learning_rate": 4.3351021064479824e-05, "loss": 0.0062, "step": 5562 }, { "epoch": 8.04, "learning_rate": 4.331886155330439e-05, "loss": 0.0, "step": 5563 }, { "epoch": 8.05, "learning_rate": 4.3286702042128965e-05, "loss": 0.014, "step": 5564 }, { "epoch": 8.05, "learning_rate": 4.325454253095353e-05, "loss": 0.0001, "step": 5565 }, { "epoch": 8.05, "learning_rate": 4.32223830197781e-05, "loss": 0.0238, "step": 5566 }, { "epoch": 8.05, "learning_rate": 4.3190223508602676e-05, "loss": 0.0352, "step": 5567 }, { "epoch": 8.05, "learning_rate": 4.315806399742724e-05, "loss": 0.0432, "step": 5568 }, { "epoch": 8.05, "learning_rate": 4.312590448625181e-05, "loss": 0.0707, "step": 5569 }, { "epoch": 8.05, "learning_rate": 4.309374497507638e-05, "loss": 0.0004, "step": 5570 }, { "epoch": 8.06, "learning_rate": 4.306158546390095e-05, "loss": 0.0037, "step": 5571 }, { "epoch": 8.06, "learning_rate": 4.302942595272552e-05, "loss": 0.0017, "step": 5572 }, { "epoch": 8.06, "learning_rate": 4.2997266441550086e-05, "loss": 0.0637, "step": 5573 }, { "epoch": 8.06, "learning_rate": 4.296510693037466e-05, "loss": 0.0316, "step": 5574 }, { "epoch": 8.06, "learning_rate": 4.2932947419199234e-05, "loss": 0.0172, "step": 5575 }, { "epoch": 8.06, "learning_rate": 4.29007879080238e-05, "loss": 0.0029, "step": 5576 }, { "epoch": 8.07, "learning_rate": 4.286862839684837e-05, "loss": 0.0023, "step": 5577 }, { "epoch": 8.07, "learning_rate": 4.2836468885672945e-05, "loss": 0.0405, "step": 5578 }, { "epoch": 8.07, "learning_rate": 4.280430937449751e-05, "loss": 0.0066, "step": 5579 }, { "epoch": 8.07, "learning_rate": 4.277214986332208e-05, "loss": 0.0384, "step": 5580 }, { "epoch": 8.07, "learning_rate": 4.273999035214665e-05, "loss": 0.0035, "step": 5581 }, { "epoch": 8.07, "learning_rate": 4.270783084097122e-05, "loss": 0.0048, "step": 5582 }, { "epoch": 8.07, "learning_rate": 4.267567132979579e-05, "loss": 0.0894, "step": 5583 }, { "epoch": 8.08, "learning_rate": 4.2643511818620355e-05, "loss": 0.003, "step": 5584 }, { "epoch": 8.08, "learning_rate": 4.261135230744493e-05, "loss": 0.0081, "step": 5585 }, { "epoch": 8.08, "learning_rate": 4.25791927962695e-05, "loss": 0.0474, "step": 5586 }, { "epoch": 8.08, "learning_rate": 4.2547033285094066e-05, "loss": 0.0414, "step": 5587 }, { "epoch": 8.08, "learning_rate": 4.251487377391864e-05, "loss": 0.0176, "step": 5588 }, { "epoch": 8.08, "learning_rate": 4.248271426274321e-05, "loss": 0.08, "step": 5589 }, { "epoch": 8.08, "learning_rate": 4.245055475156778e-05, "loss": 0.022, "step": 5590 }, { "epoch": 8.09, "learning_rate": 4.241839524039235e-05, "loss": 0.0469, "step": 5591 }, { "epoch": 8.09, "learning_rate": 4.238623572921692e-05, "loss": 0.0714, "step": 5592 }, { "epoch": 8.09, "learning_rate": 4.235407621804149e-05, "loss": 0.0525, "step": 5593 }, { "epoch": 8.09, "learning_rate": 4.232191670686605e-05, "loss": 0.0057, "step": 5594 }, { "epoch": 8.09, "learning_rate": 4.228975719569063e-05, "loss": 0.0371, "step": 5595 }, { "epoch": 8.09, "learning_rate": 4.22575976845152e-05, "loss": 0.0371, "step": 5596 }, { "epoch": 8.09, "learning_rate": 4.2225438173339764e-05, "loss": 0.0004, "step": 5597 }, { "epoch": 8.1, "learning_rate": 4.2193278662164335e-05, "loss": 0.034, "step": 5598 }, { "epoch": 8.1, "learning_rate": 4.216111915098891e-05, "loss": 0.0374, "step": 5599 }, { "epoch": 8.1, "learning_rate": 4.2128959639813476e-05, "loss": 0.0133, "step": 5600 }, { "epoch": 8.1, "learning_rate": 4.2096800128638047e-05, "loss": 0.063, "step": 5601 }, { "epoch": 8.1, "learning_rate": 4.206464061746262e-05, "loss": 0.0322, "step": 5602 }, { "epoch": 8.1, "learning_rate": 4.203248110628719e-05, "loss": 0.0041, "step": 5603 }, { "epoch": 8.1, "learning_rate": 4.200032159511176e-05, "loss": 0.0286, "step": 5604 }, { "epoch": 8.11, "learning_rate": 4.196816208393632e-05, "loss": 0.007, "step": 5605 }, { "epoch": 8.11, "learning_rate": 4.19360025727609e-05, "loss": 0.0645, "step": 5606 }, { "epoch": 8.11, "learning_rate": 4.190384306158547e-05, "loss": 0.0153, "step": 5607 }, { "epoch": 8.11, "learning_rate": 4.187168355041003e-05, "loss": 0.0148, "step": 5608 }, { "epoch": 8.11, "learning_rate": 4.1839524039234604e-05, "loss": 0.0001, "step": 5609 }, { "epoch": 8.11, "learning_rate": 4.1807364528059174e-05, "loss": 0.0004, "step": 5610 }, { "epoch": 8.11, "learning_rate": 4.1775205016883745e-05, "loss": 0.0007, "step": 5611 }, { "epoch": 8.12, "learning_rate": 4.1743045505708315e-05, "loss": 0.0806, "step": 5612 }, { "epoch": 8.12, "learning_rate": 4.1710885994532886e-05, "loss": 0.0006, "step": 5613 }, { "epoch": 8.12, "learning_rate": 4.1678726483357456e-05, "loss": 0.0138, "step": 5614 }, { "epoch": 8.12, "learning_rate": 4.164656697218203e-05, "loss": 0.0256, "step": 5615 }, { "epoch": 8.12, "learning_rate": 4.161440746100659e-05, "loss": 0.0613, "step": 5616 }, { "epoch": 8.12, "learning_rate": 4.158224794983117e-05, "loss": 0.0319, "step": 5617 }, { "epoch": 8.12, "learning_rate": 4.155008843865573e-05, "loss": 0.0002, "step": 5618 }, { "epoch": 8.13, "learning_rate": 4.15179289274803e-05, "loss": 0.0121, "step": 5619 }, { "epoch": 8.13, "learning_rate": 4.148576941630487e-05, "loss": 0.0001, "step": 5620 }, { "epoch": 8.13, "learning_rate": 4.145360990512944e-05, "loss": 0.0056, "step": 5621 }, { "epoch": 8.13, "learning_rate": 4.1421450393954014e-05, "loss": 0.0397, "step": 5622 }, { "epoch": 8.13, "learning_rate": 4.1389290882778584e-05, "loss": 0.0772, "step": 5623 }, { "epoch": 8.13, "learning_rate": 4.1357131371603155e-05, "loss": 0.0189, "step": 5624 }, { "epoch": 8.13, "learning_rate": 4.1324971860427725e-05, "loss": 0.0058, "step": 5625 }, { "epoch": 8.14, "learning_rate": 4.129281234925229e-05, "loss": 0.0372, "step": 5626 }, { "epoch": 8.14, "learning_rate": 4.1260652838076866e-05, "loss": 0.0001, "step": 5627 }, { "epoch": 8.14, "learning_rate": 4.122849332690144e-05, "loss": 0.0234, "step": 5628 }, { "epoch": 8.14, "learning_rate": 4.1196333815726e-05, "loss": 0.0036, "step": 5629 }, { "epoch": 8.14, "learning_rate": 4.116417430455057e-05, "loss": 0.0247, "step": 5630 }, { "epoch": 8.14, "learning_rate": 4.113201479337515e-05, "loss": 0.018, "step": 5631 }, { "epoch": 8.14, "learning_rate": 4.109985528219971e-05, "loss": 0.0232, "step": 5632 }, { "epoch": 8.15, "learning_rate": 4.106769577102428e-05, "loss": 0.0192, "step": 5633 }, { "epoch": 8.15, "learning_rate": 4.103553625984885e-05, "loss": 0.02, "step": 5634 }, { "epoch": 8.15, "learning_rate": 4.1003376748673424e-05, "loss": 0.0406, "step": 5635 }, { "epoch": 8.15, "learning_rate": 4.0971217237497994e-05, "loss": 0.0232, "step": 5636 }, { "epoch": 8.15, "learning_rate": 4.093905772632256e-05, "loss": 0.0126, "step": 5637 }, { "epoch": 8.15, "learning_rate": 4.0906898215147135e-05, "loss": 0.0301, "step": 5638 }, { "epoch": 8.15, "learning_rate": 4.08747387039717e-05, "loss": 0.0006, "step": 5639 }, { "epoch": 8.16, "learning_rate": 4.084257919279627e-05, "loss": 0.0022, "step": 5640 }, { "epoch": 8.16, "learning_rate": 4.081041968162084e-05, "loss": 0.0002, "step": 5641 }, { "epoch": 8.16, "learning_rate": 4.077826017044541e-05, "loss": 0.0273, "step": 5642 }, { "epoch": 8.16, "learning_rate": 4.074610065926998e-05, "loss": 0.0005, "step": 5643 }, { "epoch": 8.16, "learning_rate": 4.071394114809455e-05, "loss": 0.0164, "step": 5644 }, { "epoch": 8.16, "learning_rate": 4.068178163691912e-05, "loss": 0.0001, "step": 5645 }, { "epoch": 8.16, "learning_rate": 4.064962212574369e-05, "loss": 0.0509, "step": 5646 }, { "epoch": 8.17, "learning_rate": 4.0617462614568256e-05, "loss": 0.0002, "step": 5647 }, { "epoch": 8.17, "learning_rate": 4.058530310339283e-05, "loss": 0.0072, "step": 5648 }, { "epoch": 8.17, "learning_rate": 4.0553143592217404e-05, "loss": 0.0012, "step": 5649 }, { "epoch": 8.17, "learning_rate": 4.052098408104197e-05, "loss": 0.0126, "step": 5650 }, { "epoch": 8.17, "learning_rate": 4.048882456986654e-05, "loss": 0.0208, "step": 5651 }, { "epoch": 8.17, "learning_rate": 4.0456665058691116e-05, "loss": 0.0223, "step": 5652 }, { "epoch": 8.17, "learning_rate": 4.042450554751568e-05, "loss": 0.0107, "step": 5653 }, { "epoch": 8.18, "learning_rate": 4.039234603634025e-05, "loss": 0.0004, "step": 5654 }, { "epoch": 8.18, "learning_rate": 4.036018652516482e-05, "loss": 0.0413, "step": 5655 }, { "epoch": 8.18, "learning_rate": 4.032802701398939e-05, "loss": 0.0282, "step": 5656 }, { "epoch": 8.18, "learning_rate": 4.029586750281396e-05, "loss": 0.0491, "step": 5657 }, { "epoch": 8.18, "learning_rate": 4.0263707991638525e-05, "loss": 0.0052, "step": 5658 }, { "epoch": 8.18, "learning_rate": 4.02315484804631e-05, "loss": 0.0307, "step": 5659 }, { "epoch": 8.19, "learning_rate": 4.019938896928767e-05, "loss": 0.0, "step": 5660 }, { "epoch": 8.19, "learning_rate": 4.0167229458112237e-05, "loss": 0.0128, "step": 5661 }, { "epoch": 8.19, "learning_rate": 4.013506994693681e-05, "loss": 0.0743, "step": 5662 }, { "epoch": 8.19, "learning_rate": 4.010291043576138e-05, "loss": 0.0005, "step": 5663 }, { "epoch": 8.19, "learning_rate": 4.007075092458595e-05, "loss": 0.0002, "step": 5664 }, { "epoch": 8.19, "learning_rate": 4.003859141341052e-05, "loss": 0.0808, "step": 5665 }, { "epoch": 8.19, "learning_rate": 4.000643190223509e-05, "loss": 0.0454, "step": 5666 }, { "epoch": 8.2, "learning_rate": 3.997427239105966e-05, "loss": 0.0024, "step": 5667 }, { "epoch": 8.2, "learning_rate": 3.994211287988423e-05, "loss": 0.0009, "step": 5668 }, { "epoch": 8.2, "learning_rate": 3.9909953368708794e-05, "loss": 0.0137, "step": 5669 }, { "epoch": 8.2, "learning_rate": 3.987779385753337e-05, "loss": 0.0018, "step": 5670 }, { "epoch": 8.2, "learning_rate": 3.9845634346357935e-05, "loss": 0.0928, "step": 5671 }, { "epoch": 8.2, "learning_rate": 3.9813474835182505e-05, "loss": 0.013, "step": 5672 }, { "epoch": 8.2, "learning_rate": 3.9781315324007076e-05, "loss": 0.0031, "step": 5673 }, { "epoch": 8.21, "learning_rate": 3.9749155812831646e-05, "loss": 0.0069, "step": 5674 }, { "epoch": 8.21, "learning_rate": 3.971699630165622e-05, "loss": 0.0027, "step": 5675 }, { "epoch": 8.21, "learning_rate": 3.968483679048078e-05, "loss": 0.0152, "step": 5676 }, { "epoch": 8.21, "learning_rate": 3.965267727930536e-05, "loss": 0.0001, "step": 5677 }, { "epoch": 8.21, "learning_rate": 3.962051776812993e-05, "loss": 0.0064, "step": 5678 }, { "epoch": 8.21, "learning_rate": 3.958835825695449e-05, "loss": 0.0007, "step": 5679 }, { "epoch": 8.21, "learning_rate": 3.955619874577906e-05, "loss": 0.0248, "step": 5680 }, { "epoch": 8.22, "learning_rate": 3.952403923460364e-05, "loss": 0.0003, "step": 5681 }, { "epoch": 8.22, "learning_rate": 3.9491879723428204e-05, "loss": 0.0088, "step": 5682 }, { "epoch": 8.22, "learning_rate": 3.9459720212252774e-05, "loss": 0.0643, "step": 5683 }, { "epoch": 8.22, "learning_rate": 3.9427560701077345e-05, "loss": 0.0007, "step": 5684 }, { "epoch": 8.22, "learning_rate": 3.9395401189901915e-05, "loss": 0.0572, "step": 5685 }, { "epoch": 8.22, "learning_rate": 3.9363241678726486e-05, "loss": 0.0267, "step": 5686 }, { "epoch": 8.22, "learning_rate": 3.9331082167551056e-05, "loss": 0.0001, "step": 5687 }, { "epoch": 8.23, "learning_rate": 3.929892265637563e-05, "loss": 0.0145, "step": 5688 }, { "epoch": 8.23, "learning_rate": 3.92667631452002e-05, "loss": 0.0072, "step": 5689 }, { "epoch": 8.23, "learning_rate": 3.923460363402476e-05, "loss": 0.026, "step": 5690 }, { "epoch": 8.23, "learning_rate": 3.920244412284934e-05, "loss": 0.0158, "step": 5691 }, { "epoch": 8.23, "learning_rate": 3.91702846116739e-05, "loss": 0.0002, "step": 5692 }, { "epoch": 8.23, "learning_rate": 3.913812510049847e-05, "loss": 0.0013, "step": 5693 }, { "epoch": 8.23, "learning_rate": 3.910596558932304e-05, "loss": 0.0025, "step": 5694 }, { "epoch": 8.24, "learning_rate": 3.9073806078147614e-05, "loss": 0.002, "step": 5695 }, { "epoch": 8.24, "learning_rate": 3.9041646566972184e-05, "loss": 0.0342, "step": 5696 }, { "epoch": 8.24, "learning_rate": 3.9009487055796755e-05, "loss": 0.0541, "step": 5697 }, { "epoch": 8.24, "learning_rate": 3.8977327544621325e-05, "loss": 0.0187, "step": 5698 }, { "epoch": 8.24, "learning_rate": 3.8945168033445896e-05, "loss": 0.0001, "step": 5699 }, { "epoch": 8.24, "learning_rate": 3.891300852227046e-05, "loss": 0.0011, "step": 5700 }, { "epoch": 8.24, "learning_rate": 3.888084901109503e-05, "loss": 0.0401, "step": 5701 }, { "epoch": 8.25, "learning_rate": 3.884868949991961e-05, "loss": 0.0095, "step": 5702 }, { "epoch": 8.25, "learning_rate": 3.881652998874417e-05, "loss": 0.0198, "step": 5703 }, { "epoch": 8.25, "learning_rate": 3.878437047756874e-05, "loss": 0.0427, "step": 5704 }, { "epoch": 8.25, "learning_rate": 3.875221096639331e-05, "loss": 0.0037, "step": 5705 }, { "epoch": 8.25, "learning_rate": 3.872005145521788e-05, "loss": 0.0125, "step": 5706 }, { "epoch": 8.25, "learning_rate": 3.868789194404245e-05, "loss": 0.0111, "step": 5707 }, { "epoch": 8.25, "learning_rate": 3.865573243286702e-05, "loss": 0.0361, "step": 5708 }, { "epoch": 8.26, "learning_rate": 3.8623572921691594e-05, "loss": 0.0041, "step": 5709 }, { "epoch": 8.26, "learning_rate": 3.8591413410516165e-05, "loss": 0.1292, "step": 5710 }, { "epoch": 8.26, "learning_rate": 3.855925389934073e-05, "loss": 0.0086, "step": 5711 }, { "epoch": 8.26, "learning_rate": 3.8527094388165306e-05, "loss": 0.0001, "step": 5712 }, { "epoch": 8.26, "learning_rate": 3.8494934876989876e-05, "loss": 0.0618, "step": 5713 }, { "epoch": 8.26, "learning_rate": 3.846277536581444e-05, "loss": 0.0002, "step": 5714 }, { "epoch": 8.26, "learning_rate": 3.843061585463901e-05, "loss": 0.0884, "step": 5715 }, { "epoch": 8.27, "learning_rate": 3.839845634346358e-05, "loss": 0.0002, "step": 5716 }, { "epoch": 8.27, "learning_rate": 3.836629683228815e-05, "loss": 0.0113, "step": 5717 }, { "epoch": 8.27, "learning_rate": 3.833413732111272e-05, "loss": 0.0441, "step": 5718 }, { "epoch": 8.27, "learning_rate": 3.830197780993729e-05, "loss": 0.0002, "step": 5719 }, { "epoch": 8.27, "learning_rate": 3.826981829876186e-05, "loss": 0.0181, "step": 5720 }, { "epoch": 8.27, "learning_rate": 3.823765878758643e-05, "loss": 0.0002, "step": 5721 }, { "epoch": 8.27, "learning_rate": 3.8205499276411e-05, "loss": 0.0001, "step": 5722 }, { "epoch": 8.28, "learning_rate": 3.8173339765235574e-05, "loss": 0.0127, "step": 5723 }, { "epoch": 8.28, "learning_rate": 3.814118025406014e-05, "loss": 0.0111, "step": 5724 }, { "epoch": 8.28, "learning_rate": 3.810902074288471e-05, "loss": 0.0028, "step": 5725 }, { "epoch": 8.28, "learning_rate": 3.807686123170928e-05, "loss": 0.0315, "step": 5726 }, { "epoch": 8.28, "learning_rate": 3.804470172053385e-05, "loss": 0.0016, "step": 5727 }, { "epoch": 8.28, "learning_rate": 3.801254220935842e-05, "loss": 0.0241, "step": 5728 }, { "epoch": 8.28, "learning_rate": 3.7980382698182984e-05, "loss": 0.0429, "step": 5729 }, { "epoch": 8.29, "learning_rate": 3.794822318700756e-05, "loss": 0.0003, "step": 5730 }, { "epoch": 8.29, "learning_rate": 3.791606367583213e-05, "loss": 0.0006, "step": 5731 }, { "epoch": 8.29, "learning_rate": 3.7883904164656696e-05, "loss": 0.0105, "step": 5732 }, { "epoch": 8.29, "learning_rate": 3.7851744653481266e-05, "loss": 0.0, "step": 5733 }, { "epoch": 8.29, "learning_rate": 3.781958514230584e-05, "loss": 0.0252, "step": 5734 }, { "epoch": 8.29, "learning_rate": 3.778742563113041e-05, "loss": 0.0525, "step": 5735 }, { "epoch": 8.3, "learning_rate": 3.775526611995498e-05, "loss": 0.0049, "step": 5736 }, { "epoch": 8.3, "learning_rate": 3.772310660877955e-05, "loss": 0.0427, "step": 5737 }, { "epoch": 8.3, "learning_rate": 3.769094709760412e-05, "loss": 0.0121, "step": 5738 }, { "epoch": 8.3, "learning_rate": 3.765878758642869e-05, "loss": 0.0257, "step": 5739 }, { "epoch": 8.3, "learning_rate": 3.762662807525326e-05, "loss": 0.0011, "step": 5740 }, { "epoch": 8.3, "learning_rate": 3.759446856407783e-05, "loss": 0.0001, "step": 5741 }, { "epoch": 8.3, "learning_rate": 3.75623090529024e-05, "loss": 0.0001, "step": 5742 }, { "epoch": 8.31, "learning_rate": 3.7530149541726964e-05, "loss": 0.0002, "step": 5743 }, { "epoch": 8.31, "learning_rate": 3.749799003055154e-05, "loss": 0.0008, "step": 5744 }, { "epoch": 8.31, "learning_rate": 3.7465830519376105e-05, "loss": 0.0, "step": 5745 }, { "epoch": 8.31, "learning_rate": 3.7433671008200676e-05, "loss": 0.0004, "step": 5746 }, { "epoch": 8.31, "learning_rate": 3.7401511497025246e-05, "loss": 0.0658, "step": 5747 }, { "epoch": 8.31, "learning_rate": 3.736935198584982e-05, "loss": 0.0, "step": 5748 }, { "epoch": 8.31, "learning_rate": 3.733719247467439e-05, "loss": 0.0152, "step": 5749 }, { "epoch": 8.32, "learning_rate": 3.730503296349896e-05, "loss": 0.0023, "step": 5750 }, { "epoch": 8.32, "learning_rate": 3.727287345232353e-05, "loss": 0.0117, "step": 5751 }, { "epoch": 8.32, "learning_rate": 3.72407139411481e-05, "loss": 0.0226, "step": 5752 }, { "epoch": 8.32, "learning_rate": 3.720855442997266e-05, "loss": 0.0003, "step": 5753 }, { "epoch": 8.32, "learning_rate": 3.717639491879723e-05, "loss": 0.0017, "step": 5754 }, { "epoch": 8.32, "learning_rate": 3.714423540762181e-05, "loss": 0.0293, "step": 5755 }, { "epoch": 8.32, "learning_rate": 3.7112075896446374e-05, "loss": 0.0529, "step": 5756 }, { "epoch": 8.33, "learning_rate": 3.7079916385270945e-05, "loss": 0.0414, "step": 5757 }, { "epoch": 8.33, "learning_rate": 3.7047756874095515e-05, "loss": 0.0005, "step": 5758 }, { "epoch": 8.33, "learning_rate": 3.7015597362920086e-05, "loss": 0.0695, "step": 5759 }, { "epoch": 8.33, "learning_rate": 3.6983437851744656e-05, "loss": 0.066, "step": 5760 }, { "epoch": 8.33, "learning_rate": 3.695127834056922e-05, "loss": 0.0004, "step": 5761 }, { "epoch": 8.33, "learning_rate": 3.69191188293938e-05, "loss": 0.0547, "step": 5762 }, { "epoch": 8.33, "learning_rate": 3.688695931821837e-05, "loss": 0.0156, "step": 5763 }, { "epoch": 8.34, "learning_rate": 3.685479980704293e-05, "loss": 0.0219, "step": 5764 }, { "epoch": 8.34, "learning_rate": 3.68226402958675e-05, "loss": 0.0008, "step": 5765 }, { "epoch": 8.34, "learning_rate": 3.679048078469207e-05, "loss": 0.098, "step": 5766 }, { "epoch": 8.34, "learning_rate": 3.675832127351664e-05, "loss": 0.0001, "step": 5767 }, { "epoch": 8.34, "learning_rate": 3.6726161762341214e-05, "loss": 0.0001, "step": 5768 }, { "epoch": 8.34, "learning_rate": 3.6694002251165784e-05, "loss": 0.0092, "step": 5769 }, { "epoch": 8.34, "learning_rate": 3.6661842739990355e-05, "loss": 0.0528, "step": 5770 }, { "epoch": 8.35, "learning_rate": 3.6629683228814925e-05, "loss": 0.036, "step": 5771 }, { "epoch": 8.35, "learning_rate": 3.6597523717639496e-05, "loss": 0.0002, "step": 5772 }, { "epoch": 8.35, "learning_rate": 3.6565364206464066e-05, "loss": 0.0, "step": 5773 }, { "epoch": 8.35, "learning_rate": 3.653320469528863e-05, "loss": 0.001, "step": 5774 }, { "epoch": 8.35, "learning_rate": 3.65010451841132e-05, "loss": 0.0126, "step": 5775 }, { "epoch": 8.35, "learning_rate": 3.646888567293778e-05, "loss": 0.1199, "step": 5776 }, { "epoch": 8.35, "learning_rate": 3.643672616176234e-05, "loss": 0.0736, "step": 5777 }, { "epoch": 8.36, "learning_rate": 3.640456665058691e-05, "loss": 0.0383, "step": 5778 }, { "epoch": 8.36, "learning_rate": 3.637240713941148e-05, "loss": 0.0035, "step": 5779 }, { "epoch": 8.36, "learning_rate": 3.634024762823605e-05, "loss": 0.0003, "step": 5780 }, { "epoch": 8.36, "learning_rate": 3.6308088117060624e-05, "loss": 0.0103, "step": 5781 }, { "epoch": 8.36, "learning_rate": 3.627592860588519e-05, "loss": 0.0003, "step": 5782 }, { "epoch": 8.36, "learning_rate": 3.6243769094709765e-05, "loss": 0.0136, "step": 5783 }, { "epoch": 8.36, "learning_rate": 3.6211609583534335e-05, "loss": 0.0535, "step": 5784 }, { "epoch": 8.37, "learning_rate": 3.61794500723589e-05, "loss": 0.0009, "step": 5785 }, { "epoch": 8.37, "learning_rate": 3.614729056118347e-05, "loss": 0.0008, "step": 5786 }, { "epoch": 8.37, "learning_rate": 3.6115131050008047e-05, "loss": 0.0659, "step": 5787 }, { "epoch": 8.37, "learning_rate": 3.608297153883261e-05, "loss": 0.0511, "step": 5788 }, { "epoch": 8.37, "learning_rate": 3.605081202765718e-05, "loss": 0.0004, "step": 5789 }, { "epoch": 8.37, "learning_rate": 3.601865251648175e-05, "loss": 0.0205, "step": 5790 }, { "epoch": 8.37, "learning_rate": 3.598649300530632e-05, "loss": 0.0112, "step": 5791 }, { "epoch": 8.38, "learning_rate": 3.595433349413089e-05, "loss": 0.0014, "step": 5792 }, { "epoch": 8.38, "learning_rate": 3.5922173982955456e-05, "loss": 0.0012, "step": 5793 }, { "epoch": 8.38, "learning_rate": 3.5890014471780033e-05, "loss": 0.0227, "step": 5794 }, { "epoch": 8.38, "learning_rate": 3.5857854960604604e-05, "loss": 0.0171, "step": 5795 }, { "epoch": 8.38, "learning_rate": 3.582569544942917e-05, "loss": 0.0005, "step": 5796 }, { "epoch": 8.38, "learning_rate": 3.5793535938253745e-05, "loss": 0.0076, "step": 5797 }, { "epoch": 8.38, "learning_rate": 3.576137642707831e-05, "loss": 0.0026, "step": 5798 }, { "epoch": 8.39, "learning_rate": 3.572921691590288e-05, "loss": 0.0025, "step": 5799 }, { "epoch": 8.39, "learning_rate": 3.569705740472745e-05, "loss": 0.0214, "step": 5800 }, { "epoch": 8.39, "learning_rate": 3.566489789355202e-05, "loss": 0.018, "step": 5801 }, { "epoch": 8.39, "learning_rate": 3.563273838237659e-05, "loss": 0.0192, "step": 5802 }, { "epoch": 8.39, "learning_rate": 3.5600578871201154e-05, "loss": 0.0005, "step": 5803 }, { "epoch": 8.39, "learning_rate": 3.556841936002573e-05, "loss": 0.0027, "step": 5804 }, { "epoch": 8.39, "learning_rate": 3.55362598488503e-05, "loss": 0.0284, "step": 5805 }, { "epoch": 8.4, "learning_rate": 3.5504100337674866e-05, "loss": 0.0004, "step": 5806 }, { "epoch": 8.4, "learning_rate": 3.5471940826499437e-05, "loss": 0.0157, "step": 5807 }, { "epoch": 8.4, "learning_rate": 3.5439781315324014e-05, "loss": 0.0014, "step": 5808 }, { "epoch": 8.4, "learning_rate": 3.540762180414858e-05, "loss": 0.0109, "step": 5809 }, { "epoch": 8.4, "learning_rate": 3.537546229297315e-05, "loss": 0.0053, "step": 5810 }, { "epoch": 8.4, "learning_rate": 3.534330278179772e-05, "loss": 0.035, "step": 5811 }, { "epoch": 8.4, "learning_rate": 3.531114327062229e-05, "loss": 0.0004, "step": 5812 }, { "epoch": 8.41, "learning_rate": 3.527898375944686e-05, "loss": 0.0003, "step": 5813 }, { "epoch": 8.41, "learning_rate": 3.524682424827142e-05, "loss": 0.0381, "step": 5814 }, { "epoch": 8.41, "learning_rate": 3.5214664737096e-05, "loss": 0.0002, "step": 5815 }, { "epoch": 8.41, "learning_rate": 3.518250522592057e-05, "loss": 0.0184, "step": 5816 }, { "epoch": 8.41, "learning_rate": 3.5150345714745135e-05, "loss": 0.0313, "step": 5817 }, { "epoch": 8.41, "learning_rate": 3.5118186203569705e-05, "loss": 0.0002, "step": 5818 }, { "epoch": 8.42, "learning_rate": 3.5086026692394276e-05, "loss": 0.0006, "step": 5819 }, { "epoch": 8.42, "learning_rate": 3.5053867181218846e-05, "loss": 0.0007, "step": 5820 }, { "epoch": 8.42, "learning_rate": 3.502170767004342e-05, "loss": 0.0777, "step": 5821 }, { "epoch": 8.42, "learning_rate": 3.498954815886799e-05, "loss": 0.0158, "step": 5822 }, { "epoch": 8.42, "learning_rate": 3.495738864769256e-05, "loss": 0.013, "step": 5823 }, { "epoch": 8.42, "learning_rate": 3.492522913651713e-05, "loss": 0.0012, "step": 5824 }, { "epoch": 8.42, "learning_rate": 3.489306962534169e-05, "loss": 0.0404, "step": 5825 }, { "epoch": 8.43, "learning_rate": 3.486091011416627e-05, "loss": 0.0006, "step": 5826 }, { "epoch": 8.43, "learning_rate": 3.482875060299083e-05, "loss": 0.0129, "step": 5827 }, { "epoch": 8.43, "learning_rate": 3.4796591091815404e-05, "loss": 0.0006, "step": 5828 }, { "epoch": 8.43, "learning_rate": 3.476443158063998e-05, "loss": 0.02, "step": 5829 }, { "epoch": 8.43, "learning_rate": 3.4732272069464545e-05, "loss": 0.0005, "step": 5830 }, { "epoch": 8.43, "learning_rate": 3.4700112558289115e-05, "loss": 0.0429, "step": 5831 }, { "epoch": 8.43, "learning_rate": 3.4667953047113686e-05, "loss": 0.0402, "step": 5832 }, { "epoch": 8.44, "learning_rate": 3.4635793535938256e-05, "loss": 0.0186, "step": 5833 }, { "epoch": 8.44, "learning_rate": 3.460363402476283e-05, "loss": 0.0041, "step": 5834 }, { "epoch": 8.44, "learning_rate": 3.457147451358739e-05, "loss": 0.0001, "step": 5835 }, { "epoch": 8.44, "learning_rate": 3.453931500241197e-05, "loss": 0.0001, "step": 5836 }, { "epoch": 8.44, "learning_rate": 3.450715549123654e-05, "loss": 0.012, "step": 5837 }, { "epoch": 8.44, "learning_rate": 3.44749959800611e-05, "loss": 0.0115, "step": 5838 }, { "epoch": 8.44, "learning_rate": 3.444283646888567e-05, "loss": 0.006, "step": 5839 }, { "epoch": 8.45, "learning_rate": 3.441067695771024e-05, "loss": 0.0104, "step": 5840 }, { "epoch": 8.45, "learning_rate": 3.4378517446534814e-05, "loss": 0.0001, "step": 5841 }, { "epoch": 8.45, "learning_rate": 3.4346357935359384e-05, "loss": 0.0005, "step": 5842 }, { "epoch": 8.45, "learning_rate": 3.4314198424183955e-05, "loss": 0.0503, "step": 5843 }, { "epoch": 8.45, "learning_rate": 3.4282038913008525e-05, "loss": 0.0002, "step": 5844 }, { "epoch": 8.45, "learning_rate": 3.4249879401833096e-05, "loss": 0.0244, "step": 5845 }, { "epoch": 8.45, "learning_rate": 3.421771989065766e-05, "loss": 0.0338, "step": 5846 }, { "epoch": 8.46, "learning_rate": 3.418556037948224e-05, "loss": 0.0379, "step": 5847 }, { "epoch": 8.46, "learning_rate": 3.41534008683068e-05, "loss": 0.0011, "step": 5848 }, { "epoch": 8.46, "learning_rate": 3.412124135713137e-05, "loss": 0.0004, "step": 5849 }, { "epoch": 8.46, "learning_rate": 3.408908184595594e-05, "loss": 0.0025, "step": 5850 }, { "epoch": 8.46, "learning_rate": 3.405692233478051e-05, "loss": 0.006, "step": 5851 }, { "epoch": 8.46, "learning_rate": 3.402476282360508e-05, "loss": 0.0001, "step": 5852 }, { "epoch": 8.46, "learning_rate": 3.399260331242965e-05, "loss": 0.0006, "step": 5853 }, { "epoch": 8.47, "learning_rate": 3.3960443801254223e-05, "loss": 0.0406, "step": 5854 }, { "epoch": 8.47, "learning_rate": 3.3928284290078794e-05, "loss": 0.0072, "step": 5855 }, { "epoch": 8.47, "learning_rate": 3.389612477890336e-05, "loss": 0.0007, "step": 5856 }, { "epoch": 8.47, "learning_rate": 3.3863965267727935e-05, "loss": 0.0017, "step": 5857 }, { "epoch": 8.47, "learning_rate": 3.3831805756552506e-05, "loss": 0.0988, "step": 5858 }, { "epoch": 8.47, "learning_rate": 3.379964624537707e-05, "loss": 0.0189, "step": 5859 }, { "epoch": 8.47, "learning_rate": 3.376748673420164e-05, "loss": 0.0258, "step": 5860 }, { "epoch": 8.48, "learning_rate": 3.373532722302622e-05, "loss": 0.0029, "step": 5861 }, { "epoch": 8.48, "learning_rate": 3.370316771185078e-05, "loss": 0.0469, "step": 5862 }, { "epoch": 8.48, "learning_rate": 3.367100820067535e-05, "loss": 0.0732, "step": 5863 }, { "epoch": 8.48, "learning_rate": 3.363884868949992e-05, "loss": 0.0092, "step": 5864 }, { "epoch": 8.48, "learning_rate": 3.360668917832449e-05, "loss": 0.0762, "step": 5865 }, { "epoch": 8.48, "learning_rate": 3.357452966714906e-05, "loss": 0.0, "step": 5866 }, { "epoch": 8.48, "learning_rate": 3.3542370155973627e-05, "loss": 0.0016, "step": 5867 }, { "epoch": 8.49, "learning_rate": 3.3510210644798204e-05, "loss": 0.0074, "step": 5868 }, { "epoch": 8.49, "learning_rate": 3.3478051133622774e-05, "loss": 0.0019, "step": 5869 }, { "epoch": 8.49, "learning_rate": 3.344589162244734e-05, "loss": 0.0214, "step": 5870 }, { "epoch": 8.49, "learning_rate": 3.341373211127191e-05, "loss": 0.0039, "step": 5871 }, { "epoch": 8.49, "learning_rate": 3.338157260009648e-05, "loss": 0.0023, "step": 5872 }, { "epoch": 8.49, "learning_rate": 3.334941308892105e-05, "loss": 0.0489, "step": 5873 }, { "epoch": 8.49, "learning_rate": 3.331725357774562e-05, "loss": 0.0002, "step": 5874 }, { "epoch": 8.5, "learning_rate": 3.328509406657019e-05, "loss": 0.0056, "step": 5875 }, { "epoch": 8.5, "learning_rate": 3.325293455539476e-05, "loss": 0.003, "step": 5876 }, { "epoch": 8.5, "learning_rate": 3.322077504421933e-05, "loss": 0.0002, "step": 5877 }, { "epoch": 8.5, "learning_rate": 3.3188615533043895e-05, "loss": 0.0013, "step": 5878 }, { "epoch": 8.5, "learning_rate": 3.315645602186847e-05, "loss": 0.0245, "step": 5879 }, { "epoch": 8.5, "learning_rate": 3.3124296510693036e-05, "loss": 0.0001, "step": 5880 }, { "epoch": 8.5, "learning_rate": 3.309213699951761e-05, "loss": 0.0435, "step": 5881 }, { "epoch": 8.51, "learning_rate": 3.305997748834218e-05, "loss": 0.0221, "step": 5882 }, { "epoch": 8.51, "learning_rate": 3.302781797716675e-05, "loss": 0.0308, "step": 5883 }, { "epoch": 8.51, "learning_rate": 3.299565846599132e-05, "loss": 0.0608, "step": 5884 }, { "epoch": 8.51, "learning_rate": 3.296349895481589e-05, "loss": 0.0042, "step": 5885 }, { "epoch": 8.51, "learning_rate": 3.293133944364046e-05, "loss": 0.0009, "step": 5886 }, { "epoch": 8.51, "learning_rate": 3.289917993246503e-05, "loss": 0.0309, "step": 5887 }, { "epoch": 8.51, "learning_rate": 3.2867020421289594e-05, "loss": 0.0012, "step": 5888 }, { "epoch": 8.52, "learning_rate": 3.283486091011417e-05, "loss": 0.0364, "step": 5889 }, { "epoch": 8.52, "learning_rate": 3.280270139893874e-05, "loss": 0.014, "step": 5890 }, { "epoch": 8.52, "learning_rate": 3.2770541887763305e-05, "loss": 0.0365, "step": 5891 }, { "epoch": 8.52, "learning_rate": 3.2738382376587876e-05, "loss": 0.0008, "step": 5892 }, { "epoch": 8.52, "learning_rate": 3.2706222865412446e-05, "loss": 0.001, "step": 5893 }, { "epoch": 8.52, "learning_rate": 3.267406335423702e-05, "loss": 0.0021, "step": 5894 }, { "epoch": 8.52, "learning_rate": 3.264190384306159e-05, "loss": 0.0474, "step": 5895 }, { "epoch": 8.53, "learning_rate": 3.260974433188616e-05, "loss": 0.0502, "step": 5896 }, { "epoch": 8.53, "learning_rate": 3.257758482071073e-05, "loss": 0.0005, "step": 5897 }, { "epoch": 8.53, "learning_rate": 3.25454253095353e-05, "loss": 0.0013, "step": 5898 }, { "epoch": 8.53, "learning_rate": 3.251326579835986e-05, "loss": 0.025, "step": 5899 }, { "epoch": 8.53, "learning_rate": 3.248110628718444e-05, "loss": 0.0156, "step": 5900 }, { "epoch": 8.53, "learning_rate": 3.2448946776009004e-05, "loss": 0.0, "step": 5901 }, { "epoch": 8.54, "learning_rate": 3.2416787264833574e-05, "loss": 0.0142, "step": 5902 }, { "epoch": 8.54, "learning_rate": 3.2384627753658145e-05, "loss": 0.0123, "step": 5903 }, { "epoch": 8.54, "learning_rate": 3.2352468242482715e-05, "loss": 0.0023, "step": 5904 }, { "epoch": 8.54, "learning_rate": 3.2320308731307286e-05, "loss": 0.0028, "step": 5905 }, { "epoch": 8.54, "learning_rate": 3.2288149220131856e-05, "loss": 0.0446, "step": 5906 }, { "epoch": 8.54, "learning_rate": 3.225598970895643e-05, "loss": 0.0011, "step": 5907 }, { "epoch": 8.54, "learning_rate": 3.2223830197781e-05, "loss": 0.0797, "step": 5908 }, { "epoch": 8.55, "learning_rate": 3.219167068660556e-05, "loss": 0.041, "step": 5909 }, { "epoch": 8.55, "learning_rate": 3.215951117543013e-05, "loss": 0.0101, "step": 5910 }, { "epoch": 8.55, "learning_rate": 3.212735166425471e-05, "loss": 0.0572, "step": 5911 }, { "epoch": 8.55, "learning_rate": 3.209519215307927e-05, "loss": 0.0084, "step": 5912 }, { "epoch": 8.55, "learning_rate": 3.206303264190384e-05, "loss": 0.0415, "step": 5913 }, { "epoch": 8.55, "learning_rate": 3.203087313072842e-05, "loss": 0.0002, "step": 5914 }, { "epoch": 8.55, "learning_rate": 3.1998713619552984e-05, "loss": 0.0014, "step": 5915 }, { "epoch": 8.56, "learning_rate": 3.1966554108377555e-05, "loss": 0.0473, "step": 5916 }, { "epoch": 8.56, "learning_rate": 3.1934394597202125e-05, "loss": 0.001, "step": 5917 }, { "epoch": 8.56, "learning_rate": 3.1902235086026696e-05, "loss": 0.007, "step": 5918 }, { "epoch": 8.56, "learning_rate": 3.1870075574851266e-05, "loss": 0.0648, "step": 5919 }, { "epoch": 8.56, "learning_rate": 3.183791606367583e-05, "loss": 0.0002, "step": 5920 }, { "epoch": 8.56, "learning_rate": 3.180575655250041e-05, "loss": 0.0, "step": 5921 }, { "epoch": 8.56, "learning_rate": 3.177359704132497e-05, "loss": 0.0137, "step": 5922 }, { "epoch": 8.57, "learning_rate": 3.174143753014954e-05, "loss": 0.0374, "step": 5923 }, { "epoch": 8.57, "learning_rate": 3.170927801897411e-05, "loss": 0.0369, "step": 5924 }, { "epoch": 8.57, "learning_rate": 3.167711850779868e-05, "loss": 0.0012, "step": 5925 }, { "epoch": 8.57, "learning_rate": 3.164495899662325e-05, "loss": 0.0001, "step": 5926 }, { "epoch": 8.57, "learning_rate": 3.1612799485447823e-05, "loss": 0.0, "step": 5927 }, { "epoch": 8.57, "learning_rate": 3.1580639974272394e-05, "loss": 0.0073, "step": 5928 }, { "epoch": 8.57, "learning_rate": 3.1548480463096964e-05, "loss": 0.0381, "step": 5929 }, { "epoch": 8.58, "learning_rate": 3.151632095192153e-05, "loss": 0.0, "step": 5930 }, { "epoch": 8.58, "learning_rate": 3.14841614407461e-05, "loss": 0.0364, "step": 5931 }, { "epoch": 8.58, "learning_rate": 3.1452001929570676e-05, "loss": 0.0, "step": 5932 }, { "epoch": 8.58, "learning_rate": 3.141984241839524e-05, "loss": 0.0298, "step": 5933 }, { "epoch": 8.58, "learning_rate": 3.138768290721981e-05, "loss": 0.005, "step": 5934 }, { "epoch": 8.58, "learning_rate": 3.135552339604438e-05, "loss": 0.0699, "step": 5935 }, { "epoch": 8.58, "learning_rate": 3.132336388486895e-05, "loss": 0.0803, "step": 5936 }, { "epoch": 8.59, "learning_rate": 3.129120437369352e-05, "loss": 0.0003, "step": 5937 }, { "epoch": 8.59, "learning_rate": 3.1259044862518086e-05, "loss": 0.0002, "step": 5938 }, { "epoch": 8.59, "learning_rate": 3.122688535134266e-05, "loss": 0.0591, "step": 5939 }, { "epoch": 8.59, "learning_rate": 3.119472584016723e-05, "loss": 0.0064, "step": 5940 }, { "epoch": 8.59, "learning_rate": 3.11625663289918e-05, "loss": 0.0705, "step": 5941 }, { "epoch": 8.59, "learning_rate": 3.1130406817816374e-05, "loss": 0.0036, "step": 5942 }, { "epoch": 8.59, "learning_rate": 3.1098247306640945e-05, "loss": 0.0003, "step": 5943 }, { "epoch": 8.6, "learning_rate": 3.106608779546551e-05, "loss": 0.0001, "step": 5944 }, { "epoch": 8.6, "learning_rate": 3.103392828429008e-05, "loss": 0.0353, "step": 5945 }, { "epoch": 8.6, "learning_rate": 3.100176877311465e-05, "loss": 0.0, "step": 5946 }, { "epoch": 8.6, "learning_rate": 3.096960926193922e-05, "loss": 0.0008, "step": 5947 }, { "epoch": 8.6, "learning_rate": 3.093744975076379e-05, "loss": 0.0369, "step": 5948 }, { "epoch": 8.6, "learning_rate": 3.090529023958836e-05, "loss": 0.0003, "step": 5949 }, { "epoch": 8.6, "learning_rate": 3.087313072841293e-05, "loss": 0.0468, "step": 5950 }, { "epoch": 8.61, "learning_rate": 3.08409712172375e-05, "loss": 0.0063, "step": 5951 }, { "epoch": 8.61, "learning_rate": 3.0808811706062066e-05, "loss": 0.0005, "step": 5952 }, { "epoch": 8.61, "learning_rate": 3.077665219488664e-05, "loss": 0.0087, "step": 5953 }, { "epoch": 8.61, "learning_rate": 3.074449268371121e-05, "loss": 0.0462, "step": 5954 }, { "epoch": 8.61, "learning_rate": 3.071233317253578e-05, "loss": 0.0, "step": 5955 }, { "epoch": 8.61, "learning_rate": 3.068017366136035e-05, "loss": 0.0053, "step": 5956 }, { "epoch": 8.61, "learning_rate": 3.064801415018492e-05, "loss": 0.0044, "step": 5957 }, { "epoch": 8.62, "learning_rate": 3.061585463900949e-05, "loss": 0.0137, "step": 5958 }, { "epoch": 8.62, "learning_rate": 3.058369512783406e-05, "loss": 0.0001, "step": 5959 }, { "epoch": 8.62, "learning_rate": 3.055153561665863e-05, "loss": 0.0, "step": 5960 }, { "epoch": 8.62, "learning_rate": 3.05193761054832e-05, "loss": 0.0119, "step": 5961 }, { "epoch": 8.62, "learning_rate": 3.0487216594307764e-05, "loss": 0.0003, "step": 5962 }, { "epoch": 8.62, "learning_rate": 3.0455057083132338e-05, "loss": 0.0695, "step": 5963 }, { "epoch": 8.62, "learning_rate": 3.042289757195691e-05, "loss": 0.0168, "step": 5964 }, { "epoch": 8.63, "learning_rate": 3.0390738060781476e-05, "loss": 0.0315, "step": 5965 }, { "epoch": 8.63, "learning_rate": 3.035857854960605e-05, "loss": 0.0109, "step": 5966 }, { "epoch": 8.63, "learning_rate": 3.0326419038430613e-05, "loss": 0.0039, "step": 5967 }, { "epoch": 8.63, "learning_rate": 3.0294259527255187e-05, "loss": 0.0036, "step": 5968 }, { "epoch": 8.63, "learning_rate": 3.0262100016079758e-05, "loss": 0.0064, "step": 5969 }, { "epoch": 8.63, "learning_rate": 3.0229940504904325e-05, "loss": 0.0118, "step": 5970 }, { "epoch": 8.63, "learning_rate": 3.0197780993728895e-05, "loss": 0.0031, "step": 5971 }, { "epoch": 8.64, "learning_rate": 3.016562148255347e-05, "loss": 0.0194, "step": 5972 }, { "epoch": 8.64, "learning_rate": 3.0133461971378037e-05, "loss": 0.0245, "step": 5973 }, { "epoch": 8.64, "learning_rate": 3.0101302460202607e-05, "loss": 0.0224, "step": 5974 }, { "epoch": 8.64, "learning_rate": 3.0069142949027174e-05, "loss": 0.0379, "step": 5975 }, { "epoch": 8.64, "learning_rate": 3.0036983437851745e-05, "loss": 0.0001, "step": 5976 }, { "epoch": 8.64, "learning_rate": 3.000482392667632e-05, "loss": 0.0824, "step": 5977 }, { "epoch": 8.64, "learning_rate": 2.9972664415500882e-05, "loss": 0.0198, "step": 5978 }, { "epoch": 8.65, "learning_rate": 2.9940504904325456e-05, "loss": 0.0002, "step": 5979 }, { "epoch": 8.65, "learning_rate": 2.9908345393150027e-05, "loss": 0.0593, "step": 5980 }, { "epoch": 8.65, "learning_rate": 2.9876185881974594e-05, "loss": 0.0021, "step": 5981 }, { "epoch": 8.65, "learning_rate": 2.9844026370799168e-05, "loss": 0.0396, "step": 5982 }, { "epoch": 8.65, "learning_rate": 2.981186685962373e-05, "loss": 0.0016, "step": 5983 }, { "epoch": 8.65, "learning_rate": 2.9779707348448305e-05, "loss": 0.0299, "step": 5984 }, { "epoch": 8.66, "learning_rate": 2.9747547837272876e-05, "loss": 0.03, "step": 5985 }, { "epoch": 8.66, "learning_rate": 2.9715388326097443e-05, "loss": 0.0041, "step": 5986 }, { "epoch": 8.66, "learning_rate": 2.9683228814922014e-05, "loss": 0.0, "step": 5987 }, { "epoch": 8.66, "learning_rate": 2.9651069303746587e-05, "loss": 0.0004, "step": 5988 }, { "epoch": 8.66, "learning_rate": 2.9618909792571155e-05, "loss": 0.0234, "step": 5989 }, { "epoch": 8.66, "learning_rate": 2.9586750281395725e-05, "loss": 0.0319, "step": 5990 }, { "epoch": 8.66, "learning_rate": 2.9554590770220292e-05, "loss": 0.0259, "step": 5991 }, { "epoch": 8.67, "learning_rate": 2.9522431259044863e-05, "loss": 0.0487, "step": 5992 }, { "epoch": 8.67, "learning_rate": 2.9490271747869437e-05, "loss": 0.0689, "step": 5993 }, { "epoch": 8.67, "learning_rate": 2.9458112236694004e-05, "loss": 0.0013, "step": 5994 }, { "epoch": 8.67, "learning_rate": 2.9425952725518574e-05, "loss": 0.0007, "step": 5995 }, { "epoch": 8.67, "learning_rate": 2.9393793214343145e-05, "loss": 0.0064, "step": 5996 }, { "epoch": 8.67, "learning_rate": 2.9361633703167712e-05, "loss": 0.0007, "step": 5997 }, { "epoch": 8.67, "learning_rate": 2.9329474191992286e-05, "loss": 0.045, "step": 5998 }, { "epoch": 8.68, "learning_rate": 2.929731468081685e-05, "loss": 0.0006, "step": 5999 }, { "epoch": 8.68, "learning_rate": 2.9265155169641423e-05, "loss": 0.0165, "step": 6000 }, { "epoch": 8.68, "learning_rate": 2.9232995658465994e-05, "loss": 0.0524, "step": 6001 }, { "epoch": 8.68, "learning_rate": 2.920083614729056e-05, "loss": 0.0043, "step": 6002 }, { "epoch": 8.68, "learning_rate": 2.916867663611513e-05, "loss": 0.0101, "step": 6003 }, { "epoch": 8.68, "learning_rate": 2.91365171249397e-05, "loss": 0.032, "step": 6004 }, { "epoch": 8.68, "learning_rate": 2.9104357613764273e-05, "loss": 0.003, "step": 6005 }, { "epoch": 8.69, "learning_rate": 2.9072198102588843e-05, "loss": 0.0073, "step": 6006 }, { "epoch": 8.69, "learning_rate": 2.904003859141341e-05, "loss": 0.0313, "step": 6007 }, { "epoch": 8.69, "learning_rate": 2.900787908023798e-05, "loss": 0.0005, "step": 6008 }, { "epoch": 8.69, "learning_rate": 2.8975719569062555e-05, "loss": 0.0194, "step": 6009 }, { "epoch": 8.69, "learning_rate": 2.8943560057887122e-05, "loss": 0.0, "step": 6010 }, { "epoch": 8.69, "learning_rate": 2.8911400546711692e-05, "loss": 0.0532, "step": 6011 }, { "epoch": 8.69, "learning_rate": 2.887924103553626e-05, "loss": 0.0001, "step": 6012 }, { "epoch": 8.7, "learning_rate": 2.884708152436083e-05, "loss": 0.0003, "step": 6013 }, { "epoch": 8.7, "learning_rate": 2.8814922013185404e-05, "loss": 0.0482, "step": 6014 }, { "epoch": 8.7, "learning_rate": 2.8782762502009968e-05, "loss": 0.0002, "step": 6015 }, { "epoch": 8.7, "learning_rate": 2.875060299083454e-05, "loss": 0.002, "step": 6016 }, { "epoch": 8.7, "learning_rate": 2.8718443479659112e-05, "loss": 0.0243, "step": 6017 }, { "epoch": 8.7, "learning_rate": 2.868628396848368e-05, "loss": 0.0002, "step": 6018 }, { "epoch": 8.7, "learning_rate": 2.865412445730825e-05, "loss": 0.0045, "step": 6019 }, { "epoch": 8.71, "learning_rate": 2.8621964946132817e-05, "loss": 0.0003, "step": 6020 }, { "epoch": 8.71, "learning_rate": 2.858980543495739e-05, "loss": 0.0073, "step": 6021 }, { "epoch": 8.71, "learning_rate": 2.855764592378196e-05, "loss": 0.002, "step": 6022 }, { "epoch": 8.71, "learning_rate": 2.8525486412606528e-05, "loss": 0.005, "step": 6023 }, { "epoch": 8.71, "learning_rate": 2.84933269014311e-05, "loss": 0.0154, "step": 6024 }, { "epoch": 8.71, "learning_rate": 2.8461167390255673e-05, "loss": 0.0388, "step": 6025 }, { "epoch": 8.71, "learning_rate": 2.842900787908024e-05, "loss": 0.0448, "step": 6026 }, { "epoch": 8.72, "learning_rate": 2.839684836790481e-05, "loss": 0.0356, "step": 6027 }, { "epoch": 8.72, "learning_rate": 2.8364688856729377e-05, "loss": 0.0113, "step": 6028 }, { "epoch": 8.72, "learning_rate": 2.8332529345553948e-05, "loss": 0.0461, "step": 6029 }, { "epoch": 8.72, "learning_rate": 2.8300369834378522e-05, "loss": 0.0332, "step": 6030 }, { "epoch": 8.72, "learning_rate": 2.8268210323203086e-05, "loss": 0.0171, "step": 6031 }, { "epoch": 8.72, "learning_rate": 2.823605081202766e-05, "loss": 0.0049, "step": 6032 }, { "epoch": 8.72, "learning_rate": 2.820389130085223e-05, "loss": 0.0, "step": 6033 }, { "epoch": 8.73, "learning_rate": 2.8171731789676797e-05, "loss": 0.0186, "step": 6034 }, { "epoch": 8.73, "learning_rate": 2.813957227850137e-05, "loss": 0.0231, "step": 6035 }, { "epoch": 8.73, "learning_rate": 2.8107412767325935e-05, "loss": 0.0508, "step": 6036 }, { "epoch": 8.73, "learning_rate": 2.807525325615051e-05, "loss": 0.0541, "step": 6037 }, { "epoch": 8.73, "learning_rate": 2.804309374497508e-05, "loss": 0.0475, "step": 6038 }, { "epoch": 8.73, "learning_rate": 2.8010934233799646e-05, "loss": 0.0043, "step": 6039 }, { "epoch": 8.73, "learning_rate": 2.7978774722624217e-05, "loss": 0.0308, "step": 6040 }, { "epoch": 8.74, "learning_rate": 2.794661521144879e-05, "loss": 0.0039, "step": 6041 }, { "epoch": 8.74, "learning_rate": 2.7914455700273358e-05, "loss": 0.0476, "step": 6042 }, { "epoch": 8.74, "learning_rate": 2.788229618909793e-05, "loss": 0.0001, "step": 6043 }, { "epoch": 8.74, "learning_rate": 2.7850136677922495e-05, "loss": 0.0, "step": 6044 }, { "epoch": 8.74, "learning_rate": 2.7817977166747066e-05, "loss": 0.0, "step": 6045 }, { "epoch": 8.74, "learning_rate": 2.778581765557164e-05, "loss": 0.0317, "step": 6046 }, { "epoch": 8.74, "learning_rate": 2.7753658144396204e-05, "loss": 0.0364, "step": 6047 }, { "epoch": 8.75, "learning_rate": 2.7721498633220778e-05, "loss": 0.0383, "step": 6048 }, { "epoch": 8.75, "learning_rate": 2.7689339122045345e-05, "loss": 0.0739, "step": 6049 }, { "epoch": 8.75, "learning_rate": 2.7657179610869915e-05, "loss": 0.0, "step": 6050 }, { "epoch": 8.75, "learning_rate": 2.762502009969449e-05, "loss": 0.0188, "step": 6051 }, { "epoch": 8.75, "learning_rate": 2.7592860588519053e-05, "loss": 0.0397, "step": 6052 }, { "epoch": 8.75, "learning_rate": 2.7560701077343627e-05, "loss": 0.0013, "step": 6053 }, { "epoch": 8.75, "learning_rate": 2.7528541566168197e-05, "loss": 0.0, "step": 6054 }, { "epoch": 8.76, "learning_rate": 2.7496382054992764e-05, "loss": 0.0273, "step": 6055 }, { "epoch": 8.76, "learning_rate": 2.7464222543817335e-05, "loss": 0.0471, "step": 6056 }, { "epoch": 8.76, "learning_rate": 2.7432063032641902e-05, "loss": 0.0026, "step": 6057 }, { "epoch": 8.76, "learning_rate": 2.7399903521466476e-05, "loss": 0.0021, "step": 6058 }, { "epoch": 8.76, "learning_rate": 2.7367744010291046e-05, "loss": 0.0619, "step": 6059 }, { "epoch": 8.76, "learning_rate": 2.7335584499115613e-05, "loss": 0.0103, "step": 6060 }, { "epoch": 8.77, "learning_rate": 2.7303424987940184e-05, "loss": 0.0536, "step": 6061 }, { "epoch": 8.77, "learning_rate": 2.7271265476764758e-05, "loss": 0.0396, "step": 6062 }, { "epoch": 8.77, "learning_rate": 2.723910596558932e-05, "loss": 0.0206, "step": 6063 }, { "epoch": 8.77, "learning_rate": 2.7206946454413896e-05, "loss": 0.0009, "step": 6064 }, { "epoch": 8.77, "learning_rate": 2.7174786943238463e-05, "loss": 0.0366, "step": 6065 }, { "epoch": 8.77, "learning_rate": 2.7142627432063033e-05, "loss": 0.0006, "step": 6066 }, { "epoch": 8.77, "learning_rate": 2.7110467920887607e-05, "loss": 0.0079, "step": 6067 }, { "epoch": 8.78, "learning_rate": 2.707830840971217e-05, "loss": 0.0814, "step": 6068 }, { "epoch": 8.78, "learning_rate": 2.7046148898536745e-05, "loss": 0.0268, "step": 6069 }, { "epoch": 8.78, "learning_rate": 2.7013989387361315e-05, "loss": 0.0001, "step": 6070 }, { "epoch": 8.78, "learning_rate": 2.6981829876185882e-05, "loss": 0.0, "step": 6071 }, { "epoch": 8.78, "learning_rate": 2.6949670365010453e-05, "loss": 0.006, "step": 6072 }, { "epoch": 8.78, "learning_rate": 2.691751085383502e-05, "loss": 0.0125, "step": 6073 }, { "epoch": 8.78, "learning_rate": 2.6885351342659594e-05, "loss": 0.005, "step": 6074 }, { "epoch": 8.79, "learning_rate": 2.6853191831484164e-05, "loss": 0.0019, "step": 6075 }, { "epoch": 8.79, "learning_rate": 2.682103232030873e-05, "loss": 0.0074, "step": 6076 }, { "epoch": 8.79, "learning_rate": 2.6788872809133302e-05, "loss": 0.0001, "step": 6077 }, { "epoch": 8.79, "learning_rate": 2.6756713297957876e-05, "loss": 0.0045, "step": 6078 }, { "epoch": 8.79, "learning_rate": 2.6724553786782443e-05, "loss": 0.0384, "step": 6079 }, { "epoch": 8.79, "learning_rate": 2.6692394275607014e-05, "loss": 0.0014, "step": 6080 }, { "epoch": 8.79, "learning_rate": 2.666023476443158e-05, "loss": 0.0405, "step": 6081 }, { "epoch": 8.8, "learning_rate": 2.662807525325615e-05, "loss": 0.0085, "step": 6082 }, { "epoch": 8.8, "learning_rate": 2.6595915742080725e-05, "loss": 0.0972, "step": 6083 }, { "epoch": 8.8, "learning_rate": 2.656375623090529e-05, "loss": 0.0235, "step": 6084 }, { "epoch": 8.8, "learning_rate": 2.6531596719729863e-05, "loss": 0.0027, "step": 6085 }, { "epoch": 8.8, "learning_rate": 2.6499437208554433e-05, "loss": 0.0708, "step": 6086 }, { "epoch": 8.8, "learning_rate": 2.6467277697379e-05, "loss": 0.0147, "step": 6087 }, { "epoch": 8.8, "learning_rate": 2.643511818620357e-05, "loss": 0.0005, "step": 6088 }, { "epoch": 8.81, "learning_rate": 2.6402958675028138e-05, "loss": 0.0003, "step": 6089 }, { "epoch": 8.81, "learning_rate": 2.6370799163852712e-05, "loss": 0.0262, "step": 6090 }, { "epoch": 8.81, "learning_rate": 2.6338639652677282e-05, "loss": 0.0097, "step": 6091 }, { "epoch": 8.81, "learning_rate": 2.630648014150185e-05, "loss": 0.0, "step": 6092 }, { "epoch": 8.81, "learning_rate": 2.627432063032642e-05, "loss": 0.0515, "step": 6093 }, { "epoch": 8.81, "learning_rate": 2.6242161119150987e-05, "loss": 0.0061, "step": 6094 }, { "epoch": 8.81, "learning_rate": 2.621000160797556e-05, "loss": 0.0133, "step": 6095 }, { "epoch": 8.82, "learning_rate": 2.617784209680013e-05, "loss": 0.0349, "step": 6096 }, { "epoch": 8.82, "learning_rate": 2.61456825856247e-05, "loss": 0.0095, "step": 6097 }, { "epoch": 8.82, "learning_rate": 2.611352307444927e-05, "loss": 0.0008, "step": 6098 }, { "epoch": 8.82, "learning_rate": 2.6081363563273843e-05, "loss": 0.0211, "step": 6099 }, { "epoch": 8.82, "learning_rate": 2.6049204052098407e-05, "loss": 0.0036, "step": 6100 }, { "epoch": 8.82, "learning_rate": 2.601704454092298e-05, "loss": 0.046, "step": 6101 }, { "epoch": 8.82, "learning_rate": 2.5984885029747548e-05, "loss": 0.0044, "step": 6102 }, { "epoch": 8.83, "learning_rate": 2.595272551857212e-05, "loss": 0.0332, "step": 6103 }, { "epoch": 8.83, "learning_rate": 2.592056600739669e-05, "loss": 0.002, "step": 6104 }, { "epoch": 8.83, "learning_rate": 2.5888406496221256e-05, "loss": 0.0203, "step": 6105 }, { "epoch": 8.83, "learning_rate": 2.585624698504583e-05, "loss": 0.0002, "step": 6106 }, { "epoch": 8.83, "learning_rate": 2.58240874738704e-05, "loss": 0.0223, "step": 6107 }, { "epoch": 8.83, "learning_rate": 2.5791927962694968e-05, "loss": 0.0294, "step": 6108 }, { "epoch": 8.83, "learning_rate": 2.5759768451519538e-05, "loss": 0.0471, "step": 6109 }, { "epoch": 8.84, "learning_rate": 2.5727608940344105e-05, "loss": 0.0005, "step": 6110 }, { "epoch": 8.84, "learning_rate": 2.569544942916868e-05, "loss": 0.0004, "step": 6111 }, { "epoch": 8.84, "learning_rate": 2.566328991799325e-05, "loss": 0.0083, "step": 6112 }, { "epoch": 8.84, "learning_rate": 2.5631130406817817e-05, "loss": 0.0606, "step": 6113 }, { "epoch": 8.84, "learning_rate": 2.5598970895642387e-05, "loss": 0.0006, "step": 6114 }, { "epoch": 8.84, "learning_rate": 2.556681138446696e-05, "loss": 0.0203, "step": 6115 }, { "epoch": 8.84, "learning_rate": 2.5534651873291525e-05, "loss": 0.0133, "step": 6116 }, { "epoch": 8.85, "learning_rate": 2.55024923621161e-05, "loss": 0.0002, "step": 6117 }, { "epoch": 8.85, "learning_rate": 2.5470332850940666e-05, "loss": 0.0248, "step": 6118 }, { "epoch": 8.85, "learning_rate": 2.5438173339765236e-05, "loss": 0.0797, "step": 6119 }, { "epoch": 8.85, "learning_rate": 2.5406013828589807e-05, "loss": 0.0, "step": 6120 }, { "epoch": 8.85, "learning_rate": 2.5373854317414374e-05, "loss": 0.0295, "step": 6121 }, { "epoch": 8.85, "learning_rate": 2.5341694806238948e-05, "loss": 0.0503, "step": 6122 }, { "epoch": 8.85, "learning_rate": 2.530953529506352e-05, "loss": 0.0686, "step": 6123 }, { "epoch": 8.86, "learning_rate": 2.5277375783888086e-05, "loss": 0.0083, "step": 6124 }, { "epoch": 8.86, "learning_rate": 2.5245216272712656e-05, "loss": 0.0001, "step": 6125 }, { "epoch": 8.86, "learning_rate": 2.5213056761537223e-05, "loss": 0.0092, "step": 6126 }, { "epoch": 8.86, "learning_rate": 2.5180897250361797e-05, "loss": 0.0203, "step": 6127 }, { "epoch": 8.86, "learning_rate": 2.5148737739186368e-05, "loss": 0.0049, "step": 6128 }, { "epoch": 8.86, "learning_rate": 2.5116578228010935e-05, "loss": 0.0016, "step": 6129 }, { "epoch": 8.86, "learning_rate": 2.5084418716835505e-05, "loss": 0.0191, "step": 6130 }, { "epoch": 8.87, "learning_rate": 2.5052259205660072e-05, "loss": 0.0104, "step": 6131 }, { "epoch": 8.87, "learning_rate": 2.5020099694484643e-05, "loss": 0.0004, "step": 6132 }, { "epoch": 8.87, "learning_rate": 2.4987940183309213e-05, "loss": 0.0043, "step": 6133 }, { "epoch": 8.87, "learning_rate": 2.4955780672133784e-05, "loss": 0.059, "step": 6134 }, { "epoch": 8.87, "learning_rate": 2.4923621160958354e-05, "loss": 0.0455, "step": 6135 }, { "epoch": 8.87, "learning_rate": 2.4891461649782925e-05, "loss": 0.0032, "step": 6136 }, { "epoch": 8.87, "learning_rate": 2.4859302138607492e-05, "loss": 0.0343, "step": 6137 }, { "epoch": 8.88, "learning_rate": 2.4827142627432066e-05, "loss": 0.035, "step": 6138 }, { "epoch": 8.88, "learning_rate": 2.4794983116256633e-05, "loss": 0.0, "step": 6139 }, { "epoch": 8.88, "learning_rate": 2.4762823605081204e-05, "loss": 0.0366, "step": 6140 }, { "epoch": 8.88, "learning_rate": 2.4730664093905774e-05, "loss": 0.048, "step": 6141 }, { "epoch": 8.88, "learning_rate": 2.4698504582730345e-05, "loss": 0.0424, "step": 6142 }, { "epoch": 8.88, "learning_rate": 2.4666345071554915e-05, "loss": 0.0177, "step": 6143 }, { "epoch": 8.89, "learning_rate": 2.4634185560379482e-05, "loss": 0.0, "step": 6144 }, { "epoch": 8.89, "learning_rate": 2.4602026049204053e-05, "loss": 0.0111, "step": 6145 }, { "epoch": 8.89, "learning_rate": 2.4569866538028623e-05, "loss": 0.0462, "step": 6146 }, { "epoch": 8.89, "learning_rate": 2.4537707026853194e-05, "loss": 0.0008, "step": 6147 }, { "epoch": 8.89, "learning_rate": 2.450554751567776e-05, "loss": 0.0593, "step": 6148 }, { "epoch": 8.89, "learning_rate": 2.447338800450233e-05, "loss": 0.0002, "step": 6149 }, { "epoch": 8.89, "learning_rate": 2.4441228493326902e-05, "loss": 0.0077, "step": 6150 }, { "epoch": 8.9, "learning_rate": 2.4409068982151473e-05, "loss": 0.025, "step": 6151 }, { "epoch": 8.9, "learning_rate": 2.4376909470976043e-05, "loss": 0.0131, "step": 6152 }, { "epoch": 8.9, "learning_rate": 2.434474995980061e-05, "loss": 0.0, "step": 6153 }, { "epoch": 8.9, "learning_rate": 2.4312590448625184e-05, "loss": 0.0302, "step": 6154 }, { "epoch": 8.9, "learning_rate": 2.428043093744975e-05, "loss": 0.0001, "step": 6155 }, { "epoch": 8.9, "learning_rate": 2.424827142627432e-05, "loss": 0.0019, "step": 6156 }, { "epoch": 8.9, "learning_rate": 2.4216111915098892e-05, "loss": 0.0, "step": 6157 }, { "epoch": 8.91, "learning_rate": 2.4183952403923463e-05, "loss": 0.0176, "step": 6158 }, { "epoch": 8.91, "learning_rate": 2.4151792892748033e-05, "loss": 0.0008, "step": 6159 }, { "epoch": 8.91, "learning_rate": 2.41196333815726e-05, "loss": 0.0579, "step": 6160 }, { "epoch": 8.91, "learning_rate": 2.408747387039717e-05, "loss": 0.0162, "step": 6161 }, { "epoch": 8.91, "learning_rate": 2.405531435922174e-05, "loss": 0.0325, "step": 6162 }, { "epoch": 8.91, "learning_rate": 2.4023154848046312e-05, "loss": 0.0003, "step": 6163 }, { "epoch": 8.91, "learning_rate": 2.399099533687088e-05, "loss": 0.0002, "step": 6164 }, { "epoch": 8.92, "learning_rate": 2.395883582569545e-05, "loss": 0.012, "step": 6165 }, { "epoch": 8.92, "learning_rate": 2.3926676314520023e-05, "loss": 0.0016, "step": 6166 }, { "epoch": 8.92, "learning_rate": 2.389451680334459e-05, "loss": 0.0011, "step": 6167 }, { "epoch": 8.92, "learning_rate": 2.386235729216916e-05, "loss": 0.0006, "step": 6168 }, { "epoch": 8.92, "learning_rate": 2.3830197780993728e-05, "loss": 0.0427, "step": 6169 }, { "epoch": 8.92, "learning_rate": 2.37980382698183e-05, "loss": 0.0006, "step": 6170 }, { "epoch": 8.92, "learning_rate": 2.376587875864287e-05, "loss": 0.0023, "step": 6171 }, { "epoch": 8.93, "learning_rate": 2.373371924746744e-05, "loss": 0.0006, "step": 6172 }, { "epoch": 8.93, "learning_rate": 2.370155973629201e-05, "loss": 0.0001, "step": 6173 }, { "epoch": 8.93, "learning_rate": 2.3669400225116577e-05, "loss": 0.0, "step": 6174 }, { "epoch": 8.93, "learning_rate": 2.363724071394115e-05, "loss": 0.0009, "step": 6175 }, { "epoch": 8.93, "learning_rate": 2.360508120276572e-05, "loss": 0.0186, "step": 6176 }, { "epoch": 8.93, "learning_rate": 2.357292169159029e-05, "loss": 0.0713, "step": 6177 }, { "epoch": 8.93, "learning_rate": 2.3540762180414856e-05, "loss": 0.0107, "step": 6178 }, { "epoch": 8.94, "learning_rate": 2.350860266923943e-05, "loss": 0.0904, "step": 6179 }, { "epoch": 8.94, "learning_rate": 2.3476443158064e-05, "loss": 0.0014, "step": 6180 }, { "epoch": 8.94, "learning_rate": 2.3444283646888568e-05, "loss": 0.0012, "step": 6181 }, { "epoch": 8.94, "learning_rate": 2.3412124135713138e-05, "loss": 0.0259, "step": 6182 }, { "epoch": 8.94, "learning_rate": 2.337996462453771e-05, "loss": 0.0084, "step": 6183 }, { "epoch": 8.94, "learning_rate": 2.334780511336228e-05, "loss": 0.0026, "step": 6184 }, { "epoch": 8.94, "learning_rate": 2.3315645602186846e-05, "loss": 0.0466, "step": 6185 }, { "epoch": 8.95, "learning_rate": 2.3283486091011417e-05, "loss": 0.0049, "step": 6186 }, { "epoch": 8.95, "learning_rate": 2.3251326579835987e-05, "loss": 0.0002, "step": 6187 }, { "epoch": 8.95, "learning_rate": 2.3219167068660558e-05, "loss": 0.0392, "step": 6188 }, { "epoch": 8.95, "learning_rate": 2.3187007557485128e-05, "loss": 0.0, "step": 6189 }, { "epoch": 8.95, "learning_rate": 2.3154848046309695e-05, "loss": 0.0011, "step": 6190 }, { "epoch": 8.95, "learning_rate": 2.312268853513427e-05, "loss": 0.0272, "step": 6191 }, { "epoch": 8.95, "learning_rate": 2.3090529023958836e-05, "loss": 0.0405, "step": 6192 }, { "epoch": 8.96, "learning_rate": 2.3058369512783407e-05, "loss": 0.0387, "step": 6193 }, { "epoch": 8.96, "learning_rate": 2.3026210001607974e-05, "loss": 0.0535, "step": 6194 }, { "epoch": 8.96, "learning_rate": 2.2994050490432548e-05, "loss": 0.0076, "step": 6195 }, { "epoch": 8.96, "learning_rate": 2.296189097925712e-05, "loss": 0.0001, "step": 6196 }, { "epoch": 8.96, "learning_rate": 2.2929731468081686e-05, "loss": 0.0045, "step": 6197 }, { "epoch": 8.96, "learning_rate": 2.2897571956906256e-05, "loss": 0.0141, "step": 6198 }, { "epoch": 8.96, "learning_rate": 2.2865412445730827e-05, "loss": 0.0037, "step": 6199 }, { "epoch": 8.97, "learning_rate": 2.2833252934555397e-05, "loss": 0.0171, "step": 6200 }, { "epoch": 8.97, "learning_rate": 2.2801093423379964e-05, "loss": 0.0, "step": 6201 }, { "epoch": 8.97, "learning_rate": 2.2768933912204535e-05, "loss": 0.1466, "step": 6202 }, { "epoch": 8.97, "learning_rate": 2.2736774401029105e-05, "loss": 0.0148, "step": 6203 }, { "epoch": 8.97, "learning_rate": 2.2704614889853676e-05, "loss": 0.0095, "step": 6204 }, { "epoch": 8.97, "learning_rate": 2.2672455378678246e-05, "loss": 0.0999, "step": 6205 }, { "epoch": 8.97, "learning_rate": 2.2640295867502813e-05, "loss": 0.0362, "step": 6206 }, { "epoch": 8.98, "learning_rate": 2.2608136356327387e-05, "loss": 0.0199, "step": 6207 }, { "epoch": 8.98, "learning_rate": 2.2575976845151954e-05, "loss": 0.0002, "step": 6208 }, { "epoch": 8.98, "learning_rate": 2.2543817333976525e-05, "loss": 0.0003, "step": 6209 }, { "epoch": 8.98, "learning_rate": 2.2511657822801095e-05, "loss": 0.0072, "step": 6210 }, { "epoch": 8.98, "learning_rate": 2.2479498311625663e-05, "loss": 0.0147, "step": 6211 }, { "epoch": 8.98, "learning_rate": 2.2447338800450236e-05, "loss": 0.0006, "step": 6212 }, { "epoch": 8.98, "learning_rate": 2.2415179289274804e-05, "loss": 0.0206, "step": 6213 }, { "epoch": 8.99, "learning_rate": 2.2383019778099374e-05, "loss": 0.0253, "step": 6214 }, { "epoch": 8.99, "learning_rate": 2.235086026692394e-05, "loss": 0.0226, "step": 6215 }, { "epoch": 8.99, "learning_rate": 2.2318700755748515e-05, "loss": 0.0713, "step": 6216 }, { "epoch": 8.99, "learning_rate": 2.2286541244573082e-05, "loss": 0.0011, "step": 6217 }, { "epoch": 8.99, "learning_rate": 2.2254381733397653e-05, "loss": 0.001, "step": 6218 }, { "epoch": 8.99, "learning_rate": 2.2222222222222223e-05, "loss": 0.0002, "step": 6219 }, { "epoch": 8.99, "learning_rate": 2.2190062711046794e-05, "loss": 0.0046, "step": 6220 }, { "epoch": 9.0, "learning_rate": 2.2157903199871364e-05, "loss": 0.0684, "step": 6221 }, { "epoch": 9.0, "learning_rate": 2.212574368869593e-05, "loss": 0.0601, "step": 6222 }, { "epoch": 9.0, "learning_rate": 2.2093584177520502e-05, "loss": 0.0049, "step": 6223 }, { "epoch": 9.0, "learning_rate": 2.2061424666345072e-05, "loss": 0.04, "step": 6224 }, { "epoch": 9.0, "learning_rate": 2.2029265155169643e-05, "loss": 0.001, "step": 6225 }, { "epoch": 9.0, "learning_rate": 2.1997105643994213e-05, "loss": 0.0211, "step": 6226 }, { "epoch": 9.01, "learning_rate": 2.196494613281878e-05, "loss": 0.0142, "step": 6227 }, { "epoch": 9.01, "learning_rate": 2.1932786621643355e-05, "loss": 0.0021, "step": 6228 }, { "epoch": 9.01, "learning_rate": 2.190062711046792e-05, "loss": 0.0311, "step": 6229 }, { "epoch": 9.01, "learning_rate": 2.1868467599292492e-05, "loss": 0.0389, "step": 6230 }, { "epoch": 9.01, "learning_rate": 2.183630808811706e-05, "loss": 0.0071, "step": 6231 }, { "epoch": 9.01, "learning_rate": 2.1804148576941633e-05, "loss": 0.0059, "step": 6232 }, { "epoch": 9.01, "learning_rate": 2.17719890657662e-05, "loss": 0.0093, "step": 6233 }, { "epoch": 9.02, "learning_rate": 2.173982955459077e-05, "loss": 0.0006, "step": 6234 }, { "epoch": 9.02, "learning_rate": 2.170767004341534e-05, "loss": 0.0, "step": 6235 }, { "epoch": 9.02, "learning_rate": 2.1675510532239912e-05, "loss": 0.0008, "step": 6236 }, { "epoch": 9.02, "learning_rate": 2.1643351021064482e-05, "loss": 0.0366, "step": 6237 }, { "epoch": 9.02, "learning_rate": 2.161119150988905e-05, "loss": 0.0, "step": 6238 }, { "epoch": 9.02, "learning_rate": 2.157903199871362e-05, "loss": 0.0, "step": 6239 }, { "epoch": 9.02, "learning_rate": 2.154687248753819e-05, "loss": 0.0013, "step": 6240 }, { "epoch": 9.03, "learning_rate": 2.151471297636276e-05, "loss": 0.0034, "step": 6241 }, { "epoch": 9.03, "learning_rate": 2.148255346518733e-05, "loss": 0.0004, "step": 6242 }, { "epoch": 9.03, "learning_rate": 2.14503939540119e-05, "loss": 0.04, "step": 6243 }, { "epoch": 9.03, "learning_rate": 2.1418234442836473e-05, "loss": 0.0197, "step": 6244 }, { "epoch": 9.03, "learning_rate": 2.138607493166104e-05, "loss": 0.038, "step": 6245 }, { "epoch": 9.03, "learning_rate": 2.135391542048561e-05, "loss": 0.0001, "step": 6246 }, { "epoch": 9.03, "learning_rate": 2.1321755909310177e-05, "loss": 0.0072, "step": 6247 }, { "epoch": 9.04, "learning_rate": 2.128959639813475e-05, "loss": 0.0021, "step": 6248 }, { "epoch": 9.04, "learning_rate": 2.125743688695932e-05, "loss": 0.001, "step": 6249 }, { "epoch": 9.04, "learning_rate": 2.122527737578389e-05, "loss": 0.0032, "step": 6250 }, { "epoch": 9.04, "learning_rate": 2.119311786460846e-05, "loss": 0.1011, "step": 6251 }, { "epoch": 9.04, "learning_rate": 2.1160958353433026e-05, "loss": 0.015, "step": 6252 }, { "epoch": 9.04, "learning_rate": 2.11287988422576e-05, "loss": 0.0004, "step": 6253 }, { "epoch": 9.04, "learning_rate": 2.1096639331082167e-05, "loss": 0.0099, "step": 6254 }, { "epoch": 9.05, "learning_rate": 2.1064479819906738e-05, "loss": 0.0297, "step": 6255 }, { "epoch": 9.05, "learning_rate": 2.103232030873131e-05, "loss": 0.0217, "step": 6256 }, { "epoch": 9.05, "learning_rate": 2.100016079755588e-05, "loss": 0.0002, "step": 6257 }, { "epoch": 9.05, "learning_rate": 2.096800128638045e-05, "loss": 0.0108, "step": 6258 }, { "epoch": 9.05, "learning_rate": 2.0935841775205017e-05, "loss": 0.0365, "step": 6259 }, { "epoch": 9.05, "learning_rate": 2.0903682264029587e-05, "loss": 0.063, "step": 6260 }, { "epoch": 9.05, "learning_rate": 2.0871522752854158e-05, "loss": 0.0002, "step": 6261 }, { "epoch": 9.06, "learning_rate": 2.0839363241678728e-05, "loss": 0.0001, "step": 6262 }, { "epoch": 9.06, "learning_rate": 2.0807203730503295e-05, "loss": 0.0066, "step": 6263 }, { "epoch": 9.06, "learning_rate": 2.0775044219327866e-05, "loss": 0.0134, "step": 6264 }, { "epoch": 9.06, "learning_rate": 2.0742884708152436e-05, "loss": 0.0304, "step": 6265 }, { "epoch": 9.06, "learning_rate": 2.0710725196977007e-05, "loss": 0.0143, "step": 6266 }, { "epoch": 9.06, "learning_rate": 2.0678565685801577e-05, "loss": 0.0379, "step": 6267 }, { "epoch": 9.06, "learning_rate": 2.0646406174626145e-05, "loss": 0.0732, "step": 6268 }, { "epoch": 9.07, "learning_rate": 2.061424666345072e-05, "loss": 0.0361, "step": 6269 }, { "epoch": 9.07, "learning_rate": 2.0582087152275286e-05, "loss": 0.0332, "step": 6270 }, { "epoch": 9.07, "learning_rate": 2.0549927641099856e-05, "loss": 0.0, "step": 6271 }, { "epoch": 9.07, "learning_rate": 2.0517768129924427e-05, "loss": 0.0033, "step": 6272 }, { "epoch": 9.07, "learning_rate": 2.0485608618748997e-05, "loss": 0.0464, "step": 6273 }, { "epoch": 9.07, "learning_rate": 2.0453449107573568e-05, "loss": 0.0, "step": 6274 }, { "epoch": 9.07, "learning_rate": 2.0421289596398135e-05, "loss": 0.0354, "step": 6275 }, { "epoch": 9.08, "learning_rate": 2.0389130085222705e-05, "loss": 0.0251, "step": 6276 }, { "epoch": 9.08, "learning_rate": 2.0356970574047276e-05, "loss": 0.0047, "step": 6277 }, { "epoch": 9.08, "learning_rate": 2.0324811062871846e-05, "loss": 0.0004, "step": 6278 }, { "epoch": 9.08, "learning_rate": 2.0292651551696413e-05, "loss": 0.0369, "step": 6279 }, { "epoch": 9.08, "learning_rate": 2.0260492040520984e-05, "loss": 0.0096, "step": 6280 }, { "epoch": 9.08, "learning_rate": 2.0228332529345558e-05, "loss": 0.0004, "step": 6281 }, { "epoch": 9.08, "learning_rate": 2.0196173018170125e-05, "loss": 0.0408, "step": 6282 }, { "epoch": 9.09, "learning_rate": 2.0164013506994695e-05, "loss": 0.0025, "step": 6283 }, { "epoch": 9.09, "learning_rate": 2.0131853995819263e-05, "loss": 0.0012, "step": 6284 }, { "epoch": 9.09, "learning_rate": 2.0099694484643836e-05, "loss": 0.0003, "step": 6285 }, { "epoch": 9.09, "learning_rate": 2.0067534973468404e-05, "loss": 0.0188, "step": 6286 }, { "epoch": 9.09, "learning_rate": 2.0035375462292974e-05, "loss": 0.0226, "step": 6287 }, { "epoch": 9.09, "learning_rate": 2.0003215951117545e-05, "loss": 0.0103, "step": 6288 }, { "epoch": 9.09, "learning_rate": 1.9971056439942115e-05, "loss": 0.0289, "step": 6289 }, { "epoch": 9.1, "learning_rate": 1.9938896928766686e-05, "loss": 0.049, "step": 6290 }, { "epoch": 9.1, "learning_rate": 1.9906737417591253e-05, "loss": 0.0421, "step": 6291 }, { "epoch": 9.1, "learning_rate": 1.9874577906415823e-05, "loss": 0.008, "step": 6292 }, { "epoch": 9.1, "learning_rate": 1.984241839524039e-05, "loss": 0.0002, "step": 6293 }, { "epoch": 9.1, "learning_rate": 1.9810258884064964e-05, "loss": 0.0337, "step": 6294 }, { "epoch": 9.1, "learning_rate": 1.977809937288953e-05, "loss": 0.0, "step": 6295 }, { "epoch": 9.1, "learning_rate": 1.9745939861714102e-05, "loss": 0.0028, "step": 6296 }, { "epoch": 9.11, "learning_rate": 1.9713780350538672e-05, "loss": 0.0144, "step": 6297 }, { "epoch": 9.11, "learning_rate": 1.9681620839363243e-05, "loss": 0.0041, "step": 6298 }, { "epoch": 9.11, "learning_rate": 1.9649461328187813e-05, "loss": 0.0141, "step": 6299 }, { "epoch": 9.11, "learning_rate": 1.961730181701238e-05, "loss": 0.0002, "step": 6300 }, { "epoch": 9.11, "learning_rate": 1.958514230583695e-05, "loss": 0.0239, "step": 6301 }, { "epoch": 9.11, "learning_rate": 1.955298279466152e-05, "loss": 0.0011, "step": 6302 }, { "epoch": 9.11, "learning_rate": 1.9520823283486092e-05, "loss": 0.0019, "step": 6303 }, { "epoch": 9.12, "learning_rate": 1.9488663772310663e-05, "loss": 0.0001, "step": 6304 }, { "epoch": 9.12, "learning_rate": 1.945650426113523e-05, "loss": 0.0205, "step": 6305 }, { "epoch": 9.12, "learning_rate": 1.9424344749959804e-05, "loss": 0.0069, "step": 6306 }, { "epoch": 9.12, "learning_rate": 1.939218523878437e-05, "loss": 0.0003, "step": 6307 }, { "epoch": 9.12, "learning_rate": 1.936002572760894e-05, "loss": 0.0442, "step": 6308 }, { "epoch": 9.12, "learning_rate": 1.932786621643351e-05, "loss": 0.0479, "step": 6309 }, { "epoch": 9.13, "learning_rate": 1.9295706705258082e-05, "loss": 0.0707, "step": 6310 }, { "epoch": 9.13, "learning_rate": 1.9263547194082653e-05, "loss": 0.0001, "step": 6311 }, { "epoch": 9.13, "learning_rate": 1.923138768290722e-05, "loss": 0.0111, "step": 6312 }, { "epoch": 9.13, "learning_rate": 1.919922817173179e-05, "loss": 0.0424, "step": 6313 }, { "epoch": 9.13, "learning_rate": 1.916706866055636e-05, "loss": 0.0005, "step": 6314 }, { "epoch": 9.13, "learning_rate": 1.913490914938093e-05, "loss": 0.0017, "step": 6315 }, { "epoch": 9.13, "learning_rate": 1.91027496382055e-05, "loss": 0.0004, "step": 6316 }, { "epoch": 9.14, "learning_rate": 1.907059012703007e-05, "loss": 0.0004, "step": 6317 }, { "epoch": 9.14, "learning_rate": 1.903843061585464e-05, "loss": 0.0042, "step": 6318 }, { "epoch": 9.14, "learning_rate": 1.900627110467921e-05, "loss": 0.0071, "step": 6319 }, { "epoch": 9.14, "learning_rate": 1.897411159350378e-05, "loss": 0.0312, "step": 6320 }, { "epoch": 9.14, "learning_rate": 1.8941952082328348e-05, "loss": 0.0006, "step": 6321 }, { "epoch": 9.14, "learning_rate": 1.890979257115292e-05, "loss": 0.0004, "step": 6322 }, { "epoch": 9.14, "learning_rate": 1.887763305997749e-05, "loss": 0.0623, "step": 6323 }, { "epoch": 9.15, "learning_rate": 1.884547354880206e-05, "loss": 0.004, "step": 6324 }, { "epoch": 9.15, "learning_rate": 1.881331403762663e-05, "loss": 0.0001, "step": 6325 }, { "epoch": 9.15, "learning_rate": 1.87811545264512e-05, "loss": 0.0, "step": 6326 }, { "epoch": 9.15, "learning_rate": 1.874899501527577e-05, "loss": 0.0069, "step": 6327 }, { "epoch": 9.15, "learning_rate": 1.8716835504100338e-05, "loss": 0.0002, "step": 6328 }, { "epoch": 9.15, "learning_rate": 1.868467599292491e-05, "loss": 0.0049, "step": 6329 }, { "epoch": 9.15, "learning_rate": 1.865251648174948e-05, "loss": 0.0008, "step": 6330 }, { "epoch": 9.16, "learning_rate": 1.862035697057405e-05, "loss": 0.088, "step": 6331 }, { "epoch": 9.16, "learning_rate": 1.8588197459398617e-05, "loss": 0.0225, "step": 6332 }, { "epoch": 9.16, "learning_rate": 1.8556037948223187e-05, "loss": 0.0278, "step": 6333 }, { "epoch": 9.16, "learning_rate": 1.8523878437047758e-05, "loss": 0.0383, "step": 6334 }, { "epoch": 9.16, "learning_rate": 1.8491718925872328e-05, "loss": 0.0054, "step": 6335 }, { "epoch": 9.16, "learning_rate": 1.84595594146969e-05, "loss": 0.0025, "step": 6336 }, { "epoch": 9.16, "learning_rate": 1.8427399903521466e-05, "loss": 0.0007, "step": 6337 }, { "epoch": 9.17, "learning_rate": 1.8395240392346036e-05, "loss": 0.0006, "step": 6338 }, { "epoch": 9.17, "learning_rate": 1.8363080881170607e-05, "loss": 0.0029, "step": 6339 }, { "epoch": 9.17, "learning_rate": 1.8330921369995177e-05, "loss": 0.0369, "step": 6340 }, { "epoch": 9.17, "learning_rate": 1.8298761858819748e-05, "loss": 0.0687, "step": 6341 }, { "epoch": 9.17, "learning_rate": 1.8266602347644315e-05, "loss": 0.0, "step": 6342 }, { "epoch": 9.17, "learning_rate": 1.823444283646889e-05, "loss": 0.0, "step": 6343 }, { "epoch": 9.17, "learning_rate": 1.8202283325293456e-05, "loss": 0.0001, "step": 6344 }, { "epoch": 9.18, "learning_rate": 1.8170123814118027e-05, "loss": 0.0269, "step": 6345 }, { "epoch": 9.18, "learning_rate": 1.8137964302942594e-05, "loss": 0.0508, "step": 6346 }, { "epoch": 9.18, "learning_rate": 1.8105804791767168e-05, "loss": 0.0011, "step": 6347 }, { "epoch": 9.18, "learning_rate": 1.8073645280591735e-05, "loss": 0.0532, "step": 6348 }, { "epoch": 9.18, "learning_rate": 1.8041485769416305e-05, "loss": 0.0041, "step": 6349 }, { "epoch": 9.18, "learning_rate": 1.8009326258240876e-05, "loss": 0.0001, "step": 6350 }, { "epoch": 9.18, "learning_rate": 1.7977166747065446e-05, "loss": 0.0274, "step": 6351 }, { "epoch": 9.19, "learning_rate": 1.7945007235890017e-05, "loss": 0.0286, "step": 6352 }, { "epoch": 9.19, "learning_rate": 1.7912847724714584e-05, "loss": 0.0006, "step": 6353 }, { "epoch": 9.19, "learning_rate": 1.7880688213539154e-05, "loss": 0.026, "step": 6354 }, { "epoch": 9.19, "learning_rate": 1.7848528702363725e-05, "loss": 0.039, "step": 6355 }, { "epoch": 9.19, "learning_rate": 1.7816369191188295e-05, "loss": 0.0132, "step": 6356 }, { "epoch": 9.19, "learning_rate": 1.7784209680012866e-05, "loss": 0.0623, "step": 6357 }, { "epoch": 9.19, "learning_rate": 1.7752050168837433e-05, "loss": 0.0005, "step": 6358 }, { "epoch": 9.2, "learning_rate": 1.7719890657662007e-05, "loss": 0.0001, "step": 6359 }, { "epoch": 9.2, "learning_rate": 1.7687731146486574e-05, "loss": 0.0012, "step": 6360 }, { "epoch": 9.2, "learning_rate": 1.7655571635311145e-05, "loss": 0.0293, "step": 6361 }, { "epoch": 9.2, "learning_rate": 1.762341212413571e-05, "loss": 0.0271, "step": 6362 }, { "epoch": 9.2, "learning_rate": 1.7591252612960286e-05, "loss": 0.0016, "step": 6363 }, { "epoch": 9.2, "learning_rate": 1.7559093101784853e-05, "loss": 0.0091, "step": 6364 }, { "epoch": 9.2, "learning_rate": 1.7526933590609423e-05, "loss": 0.0497, "step": 6365 }, { "epoch": 9.21, "learning_rate": 1.7494774079433994e-05, "loss": 0.0017, "step": 6366 }, { "epoch": 9.21, "learning_rate": 1.7462614568258564e-05, "loss": 0.0001, "step": 6367 }, { "epoch": 9.21, "learning_rate": 1.7430455057083135e-05, "loss": 0.0001, "step": 6368 }, { "epoch": 9.21, "learning_rate": 1.7398295545907702e-05, "loss": 0.0253, "step": 6369 }, { "epoch": 9.21, "learning_rate": 1.7366136034732272e-05, "loss": 0.0006, "step": 6370 }, { "epoch": 9.21, "learning_rate": 1.7333976523556843e-05, "loss": 0.002, "step": 6371 }, { "epoch": 9.21, "learning_rate": 1.7301817012381413e-05, "loss": 0.0084, "step": 6372 }, { "epoch": 9.22, "learning_rate": 1.7269657501205984e-05, "loss": 0.0074, "step": 6373 }, { "epoch": 9.22, "learning_rate": 1.723749799003055e-05, "loss": 0.0018, "step": 6374 }, { "epoch": 9.22, "learning_rate": 1.720533847885512e-05, "loss": 0.0601, "step": 6375 }, { "epoch": 9.22, "learning_rate": 1.7173178967679692e-05, "loss": 0.0244, "step": 6376 }, { "epoch": 9.22, "learning_rate": 1.7141019456504263e-05, "loss": 0.0034, "step": 6377 }, { "epoch": 9.22, "learning_rate": 1.710885994532883e-05, "loss": 0.0112, "step": 6378 }, { "epoch": 9.22, "learning_rate": 1.70767004341534e-05, "loss": 0.0002, "step": 6379 }, { "epoch": 9.23, "learning_rate": 1.704454092297797e-05, "loss": 0.0, "step": 6380 }, { "epoch": 9.23, "learning_rate": 1.701238141180254e-05, "loss": 0.016, "step": 6381 }, { "epoch": 9.23, "learning_rate": 1.6980221900627112e-05, "loss": 0.0002, "step": 6382 }, { "epoch": 9.23, "learning_rate": 1.694806238945168e-05, "loss": 0.0243, "step": 6383 }, { "epoch": 9.23, "learning_rate": 1.6915902878276253e-05, "loss": 0.0055, "step": 6384 }, { "epoch": 9.23, "learning_rate": 1.688374336710082e-05, "loss": 0.001, "step": 6385 }, { "epoch": 9.23, "learning_rate": 1.685158385592539e-05, "loss": 0.0095, "step": 6386 }, { "epoch": 9.24, "learning_rate": 1.681942434474996e-05, "loss": 0.0496, "step": 6387 }, { "epoch": 9.24, "learning_rate": 1.678726483357453e-05, "loss": 0.0075, "step": 6388 }, { "epoch": 9.24, "learning_rate": 1.6755105322399102e-05, "loss": 0.0001, "step": 6389 }, { "epoch": 9.24, "learning_rate": 1.672294581122367e-05, "loss": 0.002, "step": 6390 }, { "epoch": 9.24, "learning_rate": 1.669078630004824e-05, "loss": 0.0002, "step": 6391 }, { "epoch": 9.24, "learning_rate": 1.665862678887281e-05, "loss": 0.0134, "step": 6392 }, { "epoch": 9.25, "learning_rate": 1.662646727769738e-05, "loss": 0.0166, "step": 6393 }, { "epoch": 9.25, "learning_rate": 1.6594307766521948e-05, "loss": 0.0391, "step": 6394 }, { "epoch": 9.25, "learning_rate": 1.6562148255346518e-05, "loss": 0.0162, "step": 6395 }, { "epoch": 9.25, "learning_rate": 1.652998874417109e-05, "loss": 0.0004, "step": 6396 }, { "epoch": 9.25, "learning_rate": 1.649782923299566e-05, "loss": 0.0146, "step": 6397 }, { "epoch": 9.25, "learning_rate": 1.646566972182023e-05, "loss": 0.0427, "step": 6398 }, { "epoch": 9.25, "learning_rate": 1.6433510210644797e-05, "loss": 0.0294, "step": 6399 }, { "epoch": 9.26, "learning_rate": 1.640135069946937e-05, "loss": 0.0005, "step": 6400 }, { "epoch": 9.26, "learning_rate": 1.6369191188293938e-05, "loss": 0.0326, "step": 6401 }, { "epoch": 9.26, "learning_rate": 1.633703167711851e-05, "loss": 0.0212, "step": 6402 }, { "epoch": 9.26, "learning_rate": 1.630487216594308e-05, "loss": 0.0011, "step": 6403 }, { "epoch": 9.26, "learning_rate": 1.627271265476765e-05, "loss": 0.0067, "step": 6404 }, { "epoch": 9.26, "learning_rate": 1.624055314359222e-05, "loss": 0.0604, "step": 6405 }, { "epoch": 9.26, "learning_rate": 1.6208393632416787e-05, "loss": 0.0681, "step": 6406 }, { "epoch": 9.27, "learning_rate": 1.6176234121241358e-05, "loss": 0.0433, "step": 6407 }, { "epoch": 9.27, "learning_rate": 1.6144074610065928e-05, "loss": 0.0235, "step": 6408 }, { "epoch": 9.27, "learning_rate": 1.61119150988905e-05, "loss": 0.0047, "step": 6409 }, { "epoch": 9.27, "learning_rate": 1.6079755587715066e-05, "loss": 0.0021, "step": 6410 }, { "epoch": 9.27, "learning_rate": 1.6047596076539636e-05, "loss": 0.0003, "step": 6411 }, { "epoch": 9.27, "learning_rate": 1.601543656536421e-05, "loss": 0.0266, "step": 6412 }, { "epoch": 9.27, "learning_rate": 1.5983277054188777e-05, "loss": 0.0001, "step": 6413 }, { "epoch": 9.28, "learning_rate": 1.5951117543013348e-05, "loss": 0.0001, "step": 6414 }, { "epoch": 9.28, "learning_rate": 1.5918958031837915e-05, "loss": 0.0472, "step": 6415 }, { "epoch": 9.28, "learning_rate": 1.5886798520662485e-05, "loss": 0.001, "step": 6416 }, { "epoch": 9.28, "learning_rate": 1.5854639009487056e-05, "loss": 0.0, "step": 6417 }, { "epoch": 9.28, "learning_rate": 1.5822479498311626e-05, "loss": 0.0037, "step": 6418 }, { "epoch": 9.28, "learning_rate": 1.5790319987136197e-05, "loss": 0.0536, "step": 6419 }, { "epoch": 9.28, "learning_rate": 1.5758160475960764e-05, "loss": 0.0344, "step": 6420 }, { "epoch": 9.29, "learning_rate": 1.5726000964785338e-05, "loss": 0.0004, "step": 6421 }, { "epoch": 9.29, "learning_rate": 1.5693841453609905e-05, "loss": 0.0086, "step": 6422 }, { "epoch": 9.29, "learning_rate": 1.5661681942434476e-05, "loss": 0.0554, "step": 6423 }, { "epoch": 9.29, "learning_rate": 1.5629522431259043e-05, "loss": 0.0036, "step": 6424 }, { "epoch": 9.29, "learning_rate": 1.5597362920083617e-05, "loss": 0.0506, "step": 6425 }, { "epoch": 9.29, "learning_rate": 1.5565203408908187e-05, "loss": 0.0003, "step": 6426 }, { "epoch": 9.29, "learning_rate": 1.5533043897732754e-05, "loss": 0.019, "step": 6427 }, { "epoch": 9.3, "learning_rate": 1.5500884386557325e-05, "loss": 0.0023, "step": 6428 }, { "epoch": 9.3, "learning_rate": 1.5468724875381895e-05, "loss": 0.0002, "step": 6429 }, { "epoch": 9.3, "learning_rate": 1.5436565364206466e-05, "loss": 0.0018, "step": 6430 }, { "epoch": 9.3, "learning_rate": 1.5404405853031033e-05, "loss": 0.0001, "step": 6431 }, { "epoch": 9.3, "learning_rate": 1.5372246341855603e-05, "loss": 0.0027, "step": 6432 }, { "epoch": 9.3, "learning_rate": 1.5340086830680174e-05, "loss": 0.0002, "step": 6433 }, { "epoch": 9.3, "learning_rate": 1.5307927319504745e-05, "loss": 0.0006, "step": 6434 }, { "epoch": 9.31, "learning_rate": 1.5275767808329315e-05, "loss": 0.0323, "step": 6435 }, { "epoch": 9.31, "learning_rate": 1.5243608297153882e-05, "loss": 0.0408, "step": 6436 }, { "epoch": 9.31, "learning_rate": 1.5211448785978454e-05, "loss": 0.0443, "step": 6437 }, { "epoch": 9.31, "learning_rate": 1.5179289274803025e-05, "loss": 0.0623, "step": 6438 }, { "epoch": 9.31, "learning_rate": 1.5147129763627594e-05, "loss": 0.0081, "step": 6439 }, { "epoch": 9.31, "learning_rate": 1.5114970252452162e-05, "loss": 0.0, "step": 6440 }, { "epoch": 9.31, "learning_rate": 1.5082810741276735e-05, "loss": 0.0225, "step": 6441 }, { "epoch": 9.32, "learning_rate": 1.5050651230101304e-05, "loss": 0.0004, "step": 6442 }, { "epoch": 9.32, "learning_rate": 1.5018491718925872e-05, "loss": 0.0036, "step": 6443 }, { "epoch": 9.32, "learning_rate": 1.4986332207750441e-05, "loss": 0.0128, "step": 6444 }, { "epoch": 9.32, "learning_rate": 1.4954172696575013e-05, "loss": 0.0004, "step": 6445 }, { "epoch": 9.32, "learning_rate": 1.4922013185399584e-05, "loss": 0.0001, "step": 6446 }, { "epoch": 9.32, "learning_rate": 1.4889853674224153e-05, "loss": 0.0058, "step": 6447 }, { "epoch": 9.32, "learning_rate": 1.4857694163048722e-05, "loss": 0.0774, "step": 6448 }, { "epoch": 9.33, "learning_rate": 1.4825534651873294e-05, "loss": 0.008, "step": 6449 }, { "epoch": 9.33, "learning_rate": 1.4793375140697863e-05, "loss": 0.0533, "step": 6450 }, { "epoch": 9.33, "learning_rate": 1.4761215629522431e-05, "loss": 0.0109, "step": 6451 }, { "epoch": 9.33, "learning_rate": 1.4729056118347002e-05, "loss": 0.0141, "step": 6452 }, { "epoch": 9.33, "learning_rate": 1.4696896607171572e-05, "loss": 0.0372, "step": 6453 }, { "epoch": 9.33, "learning_rate": 1.4664737095996143e-05, "loss": 0.0006, "step": 6454 }, { "epoch": 9.33, "learning_rate": 1.4632577584820712e-05, "loss": 0.0178, "step": 6455 }, { "epoch": 9.34, "learning_rate": 1.460041807364528e-05, "loss": 0.0039, "step": 6456 }, { "epoch": 9.34, "learning_rate": 1.456825856246985e-05, "loss": 0.0001, "step": 6457 }, { "epoch": 9.34, "learning_rate": 1.4536099051294422e-05, "loss": 0.0347, "step": 6458 }, { "epoch": 9.34, "learning_rate": 1.450393954011899e-05, "loss": 0.0311, "step": 6459 }, { "epoch": 9.34, "learning_rate": 1.4471780028943561e-05, "loss": 0.0199, "step": 6460 }, { "epoch": 9.34, "learning_rate": 1.443962051776813e-05, "loss": 0.0333, "step": 6461 }, { "epoch": 9.34, "learning_rate": 1.4407461006592702e-05, "loss": 0.0001, "step": 6462 }, { "epoch": 9.35, "learning_rate": 1.437530149541727e-05, "loss": 0.0, "step": 6463 }, { "epoch": 9.35, "learning_rate": 1.434314198424184e-05, "loss": 0.0004, "step": 6464 }, { "epoch": 9.35, "learning_rate": 1.4310982473066408e-05, "loss": 0.0202, "step": 6465 }, { "epoch": 9.35, "learning_rate": 1.427882296189098e-05, "loss": 0.0496, "step": 6466 }, { "epoch": 9.35, "learning_rate": 1.424666345071555e-05, "loss": 0.0225, "step": 6467 }, { "epoch": 9.35, "learning_rate": 1.421450393954012e-05, "loss": 0.0498, "step": 6468 }, { "epoch": 9.36, "learning_rate": 1.4182344428364689e-05, "loss": 0.0826, "step": 6469 }, { "epoch": 9.36, "learning_rate": 1.4150184917189261e-05, "loss": 0.0011, "step": 6470 }, { "epoch": 9.36, "learning_rate": 1.411802540601383e-05, "loss": 0.0015, "step": 6471 }, { "epoch": 9.36, "learning_rate": 1.4085865894838399e-05, "loss": 0.0017, "step": 6472 }, { "epoch": 9.36, "learning_rate": 1.4053706383662967e-05, "loss": 0.0035, "step": 6473 }, { "epoch": 9.36, "learning_rate": 1.402154687248754e-05, "loss": 0.0659, "step": 6474 }, { "epoch": 9.36, "learning_rate": 1.3989387361312108e-05, "loss": 0.0004, "step": 6475 }, { "epoch": 9.37, "learning_rate": 1.3957227850136679e-05, "loss": 0.014, "step": 6476 }, { "epoch": 9.37, "learning_rate": 1.3925068338961248e-05, "loss": 0.0139, "step": 6477 }, { "epoch": 9.37, "learning_rate": 1.389290882778582e-05, "loss": 0.0004, "step": 6478 }, { "epoch": 9.37, "learning_rate": 1.3860749316610389e-05, "loss": 0.0004, "step": 6479 }, { "epoch": 9.37, "learning_rate": 1.3828589805434958e-05, "loss": 0.02, "step": 6480 }, { "epoch": 9.37, "learning_rate": 1.3796430294259526e-05, "loss": 0.0004, "step": 6481 }, { "epoch": 9.37, "learning_rate": 1.3764270783084099e-05, "loss": 0.0261, "step": 6482 }, { "epoch": 9.38, "learning_rate": 1.3732111271908667e-05, "loss": 0.0478, "step": 6483 }, { "epoch": 9.38, "learning_rate": 1.3699951760733238e-05, "loss": 0.0055, "step": 6484 }, { "epoch": 9.38, "learning_rate": 1.3667792249557807e-05, "loss": 0.0223, "step": 6485 }, { "epoch": 9.38, "learning_rate": 1.3635632738382379e-05, "loss": 0.0697, "step": 6486 }, { "epoch": 9.38, "learning_rate": 1.3603473227206948e-05, "loss": 0.0002, "step": 6487 }, { "epoch": 9.38, "learning_rate": 1.3571313716031517e-05, "loss": 0.0008, "step": 6488 }, { "epoch": 9.38, "learning_rate": 1.3539154204856085e-05, "loss": 0.0042, "step": 6489 }, { "epoch": 9.39, "learning_rate": 1.3506994693680658e-05, "loss": 0.0698, "step": 6490 }, { "epoch": 9.39, "learning_rate": 1.3474835182505226e-05, "loss": 0.0029, "step": 6491 }, { "epoch": 9.39, "learning_rate": 1.3442675671329797e-05, "loss": 0.0534, "step": 6492 }, { "epoch": 9.39, "learning_rate": 1.3410516160154366e-05, "loss": 0.026, "step": 6493 }, { "epoch": 9.39, "learning_rate": 1.3378356648978938e-05, "loss": 0.0053, "step": 6494 }, { "epoch": 9.39, "learning_rate": 1.3346197137803507e-05, "loss": 0.0144, "step": 6495 }, { "epoch": 9.39, "learning_rate": 1.3314037626628076e-05, "loss": 0.0034, "step": 6496 }, { "epoch": 9.4, "learning_rate": 1.3281878115452644e-05, "loss": 0.0256, "step": 6497 }, { "epoch": 9.4, "learning_rate": 1.3249718604277217e-05, "loss": 0.0024, "step": 6498 }, { "epoch": 9.4, "learning_rate": 1.3217559093101785e-05, "loss": 0.0002, "step": 6499 }, { "epoch": 9.4, "learning_rate": 1.3185399581926356e-05, "loss": 0.0325, "step": 6500 }, { "epoch": 9.4, "learning_rate": 1.3153240070750925e-05, "loss": 0.0003, "step": 6501 }, { "epoch": 9.4, "learning_rate": 1.3121080559575494e-05, "loss": 0.0292, "step": 6502 }, { "epoch": 9.4, "learning_rate": 1.3088921048400066e-05, "loss": 0.0337, "step": 6503 }, { "epoch": 9.41, "learning_rate": 1.3056761537224635e-05, "loss": 0.0112, "step": 6504 }, { "epoch": 9.41, "learning_rate": 1.3024602026049203e-05, "loss": 0.0014, "step": 6505 }, { "epoch": 9.41, "learning_rate": 1.2992442514873774e-05, "loss": 0.002, "step": 6506 }, { "epoch": 9.41, "learning_rate": 1.2960283003698344e-05, "loss": 0.0059, "step": 6507 }, { "epoch": 9.41, "learning_rate": 1.2928123492522915e-05, "loss": 0.0, "step": 6508 }, { "epoch": 9.41, "learning_rate": 1.2895963981347484e-05, "loss": 0.0559, "step": 6509 }, { "epoch": 9.41, "learning_rate": 1.2863804470172053e-05, "loss": 0.0027, "step": 6510 }, { "epoch": 9.42, "learning_rate": 1.2831644958996625e-05, "loss": 0.0064, "step": 6511 }, { "epoch": 9.42, "learning_rate": 1.2799485447821194e-05, "loss": 0.0268, "step": 6512 }, { "epoch": 9.42, "learning_rate": 1.2767325936645762e-05, "loss": 0.0092, "step": 6513 }, { "epoch": 9.42, "learning_rate": 1.2735166425470333e-05, "loss": 0.0013, "step": 6514 }, { "epoch": 9.42, "learning_rate": 1.2703006914294903e-05, "loss": 0.0063, "step": 6515 }, { "epoch": 9.42, "learning_rate": 1.2670847403119474e-05, "loss": 0.0866, "step": 6516 }, { "epoch": 9.42, "learning_rate": 1.2638687891944043e-05, "loss": 0.0015, "step": 6517 }, { "epoch": 9.43, "learning_rate": 1.2606528380768612e-05, "loss": 0.0572, "step": 6518 }, { "epoch": 9.43, "learning_rate": 1.2574368869593184e-05, "loss": 0.0385, "step": 6519 }, { "epoch": 9.43, "learning_rate": 1.2542209358417753e-05, "loss": 0.0006, "step": 6520 }, { "epoch": 9.43, "learning_rate": 1.2510049847242321e-05, "loss": 0.0, "step": 6521 }, { "epoch": 9.43, "learning_rate": 1.2477890336066892e-05, "loss": 0.0007, "step": 6522 }, { "epoch": 9.43, "learning_rate": 1.2445730824891462e-05, "loss": 0.0003, "step": 6523 }, { "epoch": 9.43, "learning_rate": 1.2413571313716033e-05, "loss": 0.0, "step": 6524 }, { "epoch": 9.44, "learning_rate": 1.2381411802540602e-05, "loss": 0.0413, "step": 6525 }, { "epoch": 9.44, "learning_rate": 1.2349252291365172e-05, "loss": 0.0089, "step": 6526 }, { "epoch": 9.44, "learning_rate": 1.2317092780189741e-05, "loss": 0.0074, "step": 6527 }, { "epoch": 9.44, "learning_rate": 1.2284933269014312e-05, "loss": 0.0547, "step": 6528 }, { "epoch": 9.44, "learning_rate": 1.225277375783888e-05, "loss": 0.0015, "step": 6529 }, { "epoch": 9.44, "learning_rate": 1.2220614246663451e-05, "loss": 0.0134, "step": 6530 }, { "epoch": 9.44, "learning_rate": 1.2188454735488022e-05, "loss": 0.0252, "step": 6531 }, { "epoch": 9.45, "learning_rate": 1.2156295224312592e-05, "loss": 0.0007, "step": 6532 }, { "epoch": 9.45, "learning_rate": 1.212413571313716e-05, "loss": 0.0383, "step": 6533 }, { "epoch": 9.45, "learning_rate": 1.2091976201961731e-05, "loss": 0.0001, "step": 6534 }, { "epoch": 9.45, "learning_rate": 1.20598166907863e-05, "loss": 0.0432, "step": 6535 }, { "epoch": 9.45, "learning_rate": 1.202765717961087e-05, "loss": 0.0379, "step": 6536 }, { "epoch": 9.45, "learning_rate": 1.199549766843544e-05, "loss": 0.0184, "step": 6537 }, { "epoch": 9.45, "learning_rate": 1.1963338157260012e-05, "loss": 0.0031, "step": 6538 }, { "epoch": 9.46, "learning_rate": 1.193117864608458e-05, "loss": 0.0289, "step": 6539 }, { "epoch": 9.46, "learning_rate": 1.189901913490915e-05, "loss": 0.0155, "step": 6540 }, { "epoch": 9.46, "learning_rate": 1.186685962373372e-05, "loss": 0.0186, "step": 6541 }, { "epoch": 9.46, "learning_rate": 1.1834700112558289e-05, "loss": 0.0002, "step": 6542 }, { "epoch": 9.46, "learning_rate": 1.180254060138286e-05, "loss": 0.0003, "step": 6543 }, { "epoch": 9.46, "learning_rate": 1.1770381090207428e-05, "loss": 0.0138, "step": 6544 }, { "epoch": 9.46, "learning_rate": 1.1738221579032e-05, "loss": 0.0176, "step": 6545 }, { "epoch": 9.47, "learning_rate": 1.1706062067856569e-05, "loss": 0.0444, "step": 6546 }, { "epoch": 9.47, "learning_rate": 1.167390255668114e-05, "loss": 0.0214, "step": 6547 }, { "epoch": 9.47, "learning_rate": 1.1641743045505708e-05, "loss": 0.0096, "step": 6548 }, { "epoch": 9.47, "learning_rate": 1.1609583534330279e-05, "loss": 0.0001, "step": 6549 }, { "epoch": 9.47, "learning_rate": 1.1577424023154848e-05, "loss": 0.0001, "step": 6550 }, { "epoch": 9.47, "learning_rate": 1.1545264511979418e-05, "loss": 0.0105, "step": 6551 }, { "epoch": 9.48, "learning_rate": 1.1513105000803987e-05, "loss": 0.0076, "step": 6552 }, { "epoch": 9.48, "learning_rate": 1.148094548962856e-05, "loss": 0.0337, "step": 6553 }, { "epoch": 9.48, "learning_rate": 1.1448785978453128e-05, "loss": 0.0111, "step": 6554 }, { "epoch": 9.48, "learning_rate": 1.1416626467277699e-05, "loss": 0.0106, "step": 6555 }, { "epoch": 9.48, "learning_rate": 1.1384466956102267e-05, "loss": 0.0586, "step": 6556 }, { "epoch": 9.48, "learning_rate": 1.1352307444926838e-05, "loss": 0.057, "step": 6557 }, { "epoch": 9.48, "learning_rate": 1.1320147933751407e-05, "loss": 0.0015, "step": 6558 }, { "epoch": 9.49, "learning_rate": 1.1287988422575977e-05, "loss": 0.001, "step": 6559 }, { "epoch": 9.49, "learning_rate": 1.1255828911400548e-05, "loss": 0.0298, "step": 6560 }, { "epoch": 9.49, "learning_rate": 1.1223669400225118e-05, "loss": 0.044, "step": 6561 }, { "epoch": 9.49, "learning_rate": 1.1191509889049687e-05, "loss": 0.0107, "step": 6562 }, { "epoch": 9.49, "learning_rate": 1.1159350377874258e-05, "loss": 0.0659, "step": 6563 }, { "epoch": 9.49, "learning_rate": 1.1127190866698826e-05, "loss": 0.0154, "step": 6564 }, { "epoch": 9.49, "learning_rate": 1.1095031355523397e-05, "loss": 0.0, "step": 6565 }, { "epoch": 9.5, "learning_rate": 1.1062871844347966e-05, "loss": 0.0285, "step": 6566 }, { "epoch": 9.5, "learning_rate": 1.1030712333172536e-05, "loss": 0.0006, "step": 6567 }, { "epoch": 9.5, "learning_rate": 1.0998552821997107e-05, "loss": 0.0009, "step": 6568 }, { "epoch": 9.5, "learning_rate": 1.0966393310821677e-05, "loss": 0.0253, "step": 6569 }, { "epoch": 9.5, "learning_rate": 1.0934233799646246e-05, "loss": 0.0005, "step": 6570 }, { "epoch": 9.5, "learning_rate": 1.0902074288470817e-05, "loss": 0.0002, "step": 6571 }, { "epoch": 9.5, "learning_rate": 1.0869914777295385e-05, "loss": 0.0039, "step": 6572 }, { "epoch": 9.51, "learning_rate": 1.0837755266119956e-05, "loss": 0.0007, "step": 6573 }, { "epoch": 9.51, "learning_rate": 1.0805595754944525e-05, "loss": 0.0, "step": 6574 }, { "epoch": 9.51, "learning_rate": 1.0773436243769095e-05, "loss": 0.0025, "step": 6575 }, { "epoch": 9.51, "learning_rate": 1.0741276732593666e-05, "loss": 0.0537, "step": 6576 }, { "epoch": 9.51, "learning_rate": 1.0709117221418236e-05, "loss": 0.011, "step": 6577 }, { "epoch": 9.51, "learning_rate": 1.0676957710242805e-05, "loss": 0.0391, "step": 6578 }, { "epoch": 9.51, "learning_rate": 1.0644798199067376e-05, "loss": 0.0254, "step": 6579 }, { "epoch": 9.52, "learning_rate": 1.0612638687891944e-05, "loss": 0.0013, "step": 6580 }, { "epoch": 9.52, "learning_rate": 1.0580479176716513e-05, "loss": 0.0069, "step": 6581 }, { "epoch": 9.52, "learning_rate": 1.0548319665541084e-05, "loss": 0.0024, "step": 6582 }, { "epoch": 9.52, "learning_rate": 1.0516160154365654e-05, "loss": 0.0595, "step": 6583 }, { "epoch": 9.52, "learning_rate": 1.0484000643190225e-05, "loss": 0.0118, "step": 6584 }, { "epoch": 9.52, "learning_rate": 1.0451841132014794e-05, "loss": 0.0112, "step": 6585 }, { "epoch": 9.52, "learning_rate": 1.0419681620839364e-05, "loss": 0.0005, "step": 6586 }, { "epoch": 9.53, "learning_rate": 1.0387522109663933e-05, "loss": 0.0054, "step": 6587 }, { "epoch": 9.53, "learning_rate": 1.0355362598488503e-05, "loss": 0.0003, "step": 6588 }, { "epoch": 9.53, "learning_rate": 1.0323203087313072e-05, "loss": 0.0358, "step": 6589 }, { "epoch": 9.53, "learning_rate": 1.0291043576137643e-05, "loss": 0.0039, "step": 6590 }, { "epoch": 9.53, "learning_rate": 1.0258884064962213e-05, "loss": 0.0092, "step": 6591 }, { "epoch": 9.53, "learning_rate": 1.0226724553786784e-05, "loss": 0.0003, "step": 6592 }, { "epoch": 9.53, "learning_rate": 1.0194565042611353e-05, "loss": 0.0051, "step": 6593 }, { "epoch": 9.54, "learning_rate": 1.0162405531435923e-05, "loss": 0.0, "step": 6594 }, { "epoch": 9.54, "learning_rate": 1.0130246020260492e-05, "loss": 0.0002, "step": 6595 }, { "epoch": 9.54, "learning_rate": 1.0098086509085062e-05, "loss": 0.0006, "step": 6596 }, { "epoch": 9.54, "learning_rate": 1.0065926997909631e-05, "loss": 0.0281, "step": 6597 }, { "epoch": 9.54, "learning_rate": 1.0033767486734202e-05, "loss": 0.0117, "step": 6598 }, { "epoch": 9.54, "learning_rate": 1.0001607975558772e-05, "loss": 0.01, "step": 6599 }, { "epoch": 9.54, "learning_rate": 9.969448464383343e-06, "loss": 0.0001, "step": 6600 }, { "epoch": 9.55, "learning_rate": 9.937288953207912e-06, "loss": 0.0197, "step": 6601 }, { "epoch": 9.55, "learning_rate": 9.905129442032482e-06, "loss": 0.0146, "step": 6602 }, { "epoch": 9.55, "learning_rate": 9.872969930857051e-06, "loss": 0.0561, "step": 6603 }, { "epoch": 9.55, "learning_rate": 9.840810419681621e-06, "loss": 0.0004, "step": 6604 }, { "epoch": 9.55, "learning_rate": 9.80865090850619e-06, "loss": 0.0546, "step": 6605 }, { "epoch": 9.55, "learning_rate": 9.77649139733076e-06, "loss": 0.0427, "step": 6606 }, { "epoch": 9.55, "learning_rate": 9.744331886155331e-06, "loss": 0.062, "step": 6607 }, { "epoch": 9.56, "learning_rate": 9.712172374979902e-06, "loss": 0.0144, "step": 6608 }, { "epoch": 9.56, "learning_rate": 9.68001286380447e-06, "loss": 0.0042, "step": 6609 }, { "epoch": 9.56, "learning_rate": 9.647853352629041e-06, "loss": 0.0005, "step": 6610 }, { "epoch": 9.56, "learning_rate": 9.61569384145361e-06, "loss": 0.0002, "step": 6611 }, { "epoch": 9.56, "learning_rate": 9.58353433027818e-06, "loss": 0.0019, "step": 6612 }, { "epoch": 9.56, "learning_rate": 9.55137481910275e-06, "loss": 0.0001, "step": 6613 }, { "epoch": 9.56, "learning_rate": 9.51921530792732e-06, "loss": 0.0264, "step": 6614 }, { "epoch": 9.57, "learning_rate": 9.48705579675189e-06, "loss": 0.0547, "step": 6615 }, { "epoch": 9.57, "learning_rate": 9.45489628557646e-06, "loss": 0.0019, "step": 6616 }, { "epoch": 9.57, "learning_rate": 9.42273677440103e-06, "loss": 0.0001, "step": 6617 }, { "epoch": 9.57, "learning_rate": 9.3905772632256e-06, "loss": 0.0062, "step": 6618 }, { "epoch": 9.57, "learning_rate": 9.358417752050169e-06, "loss": 0.0363, "step": 6619 }, { "epoch": 9.57, "learning_rate": 9.32625824087474e-06, "loss": 0.0189, "step": 6620 }, { "epoch": 9.57, "learning_rate": 9.294098729699308e-06, "loss": 0.0288, "step": 6621 }, { "epoch": 9.58, "learning_rate": 9.261939218523879e-06, "loss": 0.0669, "step": 6622 }, { "epoch": 9.58, "learning_rate": 9.22977970734845e-06, "loss": 0.0011, "step": 6623 }, { "epoch": 9.58, "learning_rate": 9.197620196173018e-06, "loss": 0.0002, "step": 6624 }, { "epoch": 9.58, "learning_rate": 9.165460684997589e-06, "loss": 0.0001, "step": 6625 }, { "epoch": 9.58, "learning_rate": 9.133301173822157e-06, "loss": 0.0001, "step": 6626 }, { "epoch": 9.58, "learning_rate": 9.101141662646728e-06, "loss": 0.0, "step": 6627 }, { "epoch": 9.58, "learning_rate": 9.068982151471297e-06, "loss": 0.0501, "step": 6628 }, { "epoch": 9.59, "learning_rate": 9.036822640295867e-06, "loss": 0.0516, "step": 6629 }, { "epoch": 9.59, "learning_rate": 9.004663129120438e-06, "loss": 0.0141, "step": 6630 }, { "epoch": 9.59, "learning_rate": 8.972503617945008e-06, "loss": 0.0042, "step": 6631 }, { "epoch": 9.59, "learning_rate": 8.940344106769577e-06, "loss": 0.0181, "step": 6632 }, { "epoch": 9.59, "learning_rate": 8.908184595594148e-06, "loss": 0.0764, "step": 6633 }, { "epoch": 9.59, "learning_rate": 8.876025084418717e-06, "loss": 0.0026, "step": 6634 }, { "epoch": 9.6, "learning_rate": 8.843865573243287e-06, "loss": 0.0015, "step": 6635 }, { "epoch": 9.6, "learning_rate": 8.811706062067856e-06, "loss": 0.0368, "step": 6636 }, { "epoch": 9.6, "learning_rate": 8.779546550892426e-06, "loss": 0.0005, "step": 6637 }, { "epoch": 9.6, "learning_rate": 8.747387039716997e-06, "loss": 0.0052, "step": 6638 }, { "epoch": 9.6, "learning_rate": 8.715227528541567e-06, "loss": 0.0008, "step": 6639 }, { "epoch": 9.6, "learning_rate": 8.683068017366136e-06, "loss": 0.0, "step": 6640 }, { "epoch": 9.6, "learning_rate": 8.650908506190707e-06, "loss": 0.0001, "step": 6641 }, { "epoch": 9.61, "learning_rate": 8.618748995015276e-06, "loss": 0.0001, "step": 6642 }, { "epoch": 9.61, "learning_rate": 8.586589483839846e-06, "loss": 0.0059, "step": 6643 }, { "epoch": 9.61, "learning_rate": 8.554429972664415e-06, "loss": 0.0004, "step": 6644 }, { "epoch": 9.61, "learning_rate": 8.522270461488985e-06, "loss": 0.0227, "step": 6645 }, { "epoch": 9.61, "learning_rate": 8.490110950313556e-06, "loss": 0.017, "step": 6646 }, { "epoch": 9.61, "learning_rate": 8.457951439138126e-06, "loss": 0.0214, "step": 6647 }, { "epoch": 9.61, "learning_rate": 8.425791927962695e-06, "loss": 0.0078, "step": 6648 }, { "epoch": 9.62, "learning_rate": 8.393632416787266e-06, "loss": 0.0016, "step": 6649 }, { "epoch": 9.62, "learning_rate": 8.361472905611835e-06, "loss": 0.021, "step": 6650 }, { "epoch": 9.62, "learning_rate": 8.329313394436405e-06, "loss": 0.0013, "step": 6651 }, { "epoch": 9.62, "learning_rate": 8.297153883260974e-06, "loss": 0.0005, "step": 6652 }, { "epoch": 9.62, "learning_rate": 8.264994372085544e-06, "loss": 0.0358, "step": 6653 }, { "epoch": 9.62, "learning_rate": 8.232834860910115e-06, "loss": 0.0012, "step": 6654 }, { "epoch": 9.62, "learning_rate": 8.200675349734685e-06, "loss": 0.0018, "step": 6655 }, { "epoch": 9.63, "learning_rate": 8.168515838559254e-06, "loss": 0.0003, "step": 6656 }, { "epoch": 9.63, "learning_rate": 8.136356327383825e-06, "loss": 0.0224, "step": 6657 }, { "epoch": 9.63, "learning_rate": 8.104196816208394e-06, "loss": 0.0408, "step": 6658 }, { "epoch": 9.63, "learning_rate": 8.072037305032964e-06, "loss": 0.0, "step": 6659 }, { "epoch": 9.63, "learning_rate": 8.039877793857533e-06, "loss": 0.0337, "step": 6660 }, { "epoch": 9.63, "learning_rate": 8.007718282682105e-06, "loss": 0.016, "step": 6661 }, { "epoch": 9.63, "learning_rate": 7.975558771506674e-06, "loss": 0.0, "step": 6662 }, { "epoch": 9.64, "learning_rate": 7.943399260331243e-06, "loss": 0.0191, "step": 6663 }, { "epoch": 9.64, "learning_rate": 7.911239749155813e-06, "loss": 0.0275, "step": 6664 }, { "epoch": 9.64, "learning_rate": 7.879080237980382e-06, "loss": 0.0091, "step": 6665 }, { "epoch": 9.64, "learning_rate": 7.846920726804953e-06, "loss": 0.0272, "step": 6666 }, { "epoch": 9.64, "learning_rate": 7.814761215629521e-06, "loss": 0.0158, "step": 6667 }, { "epoch": 9.64, "learning_rate": 7.782601704454094e-06, "loss": 0.0003, "step": 6668 }, { "epoch": 9.64, "learning_rate": 7.750442193278662e-06, "loss": 0.0359, "step": 6669 }, { "epoch": 9.65, "learning_rate": 7.718282682103233e-06, "loss": 0.0036, "step": 6670 }, { "epoch": 9.65, "learning_rate": 7.686123170927802e-06, "loss": 0.0051, "step": 6671 }, { "epoch": 9.65, "learning_rate": 7.653963659752372e-06, "loss": 0.0002, "step": 6672 }, { "epoch": 9.65, "learning_rate": 7.621804148576941e-06, "loss": 0.014, "step": 6673 }, { "epoch": 9.65, "learning_rate": 7.5896446374015124e-06, "loss": 0.0813, "step": 6674 }, { "epoch": 9.65, "learning_rate": 7.557485126226081e-06, "loss": 0.0131, "step": 6675 }, { "epoch": 9.65, "learning_rate": 7.525325615050652e-06, "loss": 0.0001, "step": 6676 }, { "epoch": 9.66, "learning_rate": 7.493166103875221e-06, "loss": 0.0471, "step": 6677 }, { "epoch": 9.66, "learning_rate": 7.461006592699792e-06, "loss": 0.0095, "step": 6678 }, { "epoch": 9.66, "learning_rate": 7.428847081524361e-06, "loss": 0.0218, "step": 6679 }, { "epoch": 9.66, "learning_rate": 7.396687570348931e-06, "loss": 0.0019, "step": 6680 }, { "epoch": 9.66, "learning_rate": 7.364528059173501e-06, "loss": 0.0135, "step": 6681 }, { "epoch": 9.66, "learning_rate": 7.3323685479980714e-06, "loss": 0.0, "step": 6682 }, { "epoch": 9.66, "learning_rate": 7.30020903682264e-06, "loss": 0.0407, "step": 6683 }, { "epoch": 9.67, "learning_rate": 7.268049525647211e-06, "loss": 0.0022, "step": 6684 }, { "epoch": 9.67, "learning_rate": 7.2358900144717804e-06, "loss": 0.0757, "step": 6685 }, { "epoch": 9.67, "learning_rate": 7.203730503296351e-06, "loss": 0.0091, "step": 6686 }, { "epoch": 9.67, "learning_rate": 7.17157099212092e-06, "loss": 0.0196, "step": 6687 }, { "epoch": 9.67, "learning_rate": 7.13941148094549e-06, "loss": 0.0002, "step": 6688 }, { "epoch": 9.67, "learning_rate": 7.10725196977006e-06, "loss": 0.0009, "step": 6689 }, { "epoch": 9.67, "learning_rate": 7.0750924585946305e-06, "loss": 0.0002, "step": 6690 }, { "epoch": 9.68, "learning_rate": 7.042932947419199e-06, "loss": 0.0803, "step": 6691 }, { "epoch": 9.68, "learning_rate": 7.01077343624377e-06, "loss": 0.0421, "step": 6692 }, { "epoch": 9.68, "learning_rate": 6.9786139250683395e-06, "loss": 0.0004, "step": 6693 }, { "epoch": 9.68, "learning_rate": 6.94645441389291e-06, "loss": 0.0003, "step": 6694 }, { "epoch": 9.68, "learning_rate": 6.914294902717479e-06, "loss": 0.0005, "step": 6695 }, { "epoch": 9.68, "learning_rate": 6.882135391542049e-06, "loss": 0.0201, "step": 6696 }, { "epoch": 9.68, "learning_rate": 6.849975880366619e-06, "loss": 0.0085, "step": 6697 }, { "epoch": 9.69, "learning_rate": 6.8178163691911895e-06, "loss": 0.0003, "step": 6698 }, { "epoch": 9.69, "learning_rate": 6.785656858015758e-06, "loss": 0.0192, "step": 6699 }, { "epoch": 9.69, "learning_rate": 6.753497346840329e-06, "loss": 0.0614, "step": 6700 }, { "epoch": 9.69, "learning_rate": 6.7213378356648985e-06, "loss": 0.0, "step": 6701 }, { "epoch": 9.69, "learning_rate": 6.689178324489469e-06, "loss": 0.0018, "step": 6702 }, { "epoch": 9.69, "learning_rate": 6.657018813314038e-06, "loss": 0.0478, "step": 6703 }, { "epoch": 9.69, "learning_rate": 6.624859302138608e-06, "loss": 0.0475, "step": 6704 }, { "epoch": 9.7, "learning_rate": 6.592699790963178e-06, "loss": 0.0501, "step": 6705 }, { "epoch": 9.7, "learning_rate": 6.560540279787747e-06, "loss": 0.0026, "step": 6706 }, { "epoch": 9.7, "learning_rate": 6.528380768612317e-06, "loss": 0.0649, "step": 6707 }, { "epoch": 9.7, "learning_rate": 6.496221257436887e-06, "loss": 0.0598, "step": 6708 }, { "epoch": 9.7, "learning_rate": 6.4640617462614575e-06, "loss": 0.0062, "step": 6709 }, { "epoch": 9.7, "learning_rate": 6.431902235086026e-06, "loss": 0.0574, "step": 6710 }, { "epoch": 9.7, "learning_rate": 6.399742723910597e-06, "loss": 0.0507, "step": 6711 }, { "epoch": 9.71, "learning_rate": 6.3675832127351665e-06, "loss": 0.0012, "step": 6712 }, { "epoch": 9.71, "learning_rate": 6.335423701559737e-06, "loss": 0.0173, "step": 6713 }, { "epoch": 9.71, "learning_rate": 6.303264190384306e-06, "loss": 0.0365, "step": 6714 }, { "epoch": 9.71, "learning_rate": 6.271104679208876e-06, "loss": 0.0002, "step": 6715 }, { "epoch": 9.71, "learning_rate": 6.238945168033446e-06, "loss": 0.0011, "step": 6716 }, { "epoch": 9.71, "learning_rate": 6.2067856568580165e-06, "loss": 0.0002, "step": 6717 }, { "epoch": 9.72, "learning_rate": 6.174626145682586e-06, "loss": 0.0, "step": 6718 }, { "epoch": 9.72, "learning_rate": 6.142466634507156e-06, "loss": 0.0003, "step": 6719 }, { "epoch": 9.72, "learning_rate": 6.1103071233317255e-06, "loss": 0.0123, "step": 6720 }, { "epoch": 9.72, "learning_rate": 6.078147612156296e-06, "loss": 0.0017, "step": 6721 }, { "epoch": 9.72, "learning_rate": 6.045988100980866e-06, "loss": 0.027, "step": 6722 }, { "epoch": 9.72, "learning_rate": 6.013828589805435e-06, "loss": 0.0053, "step": 6723 }, { "epoch": 9.72, "learning_rate": 5.981669078630006e-06, "loss": 0.004, "step": 6724 }, { "epoch": 9.73, "learning_rate": 5.949509567454575e-06, "loss": 0.028, "step": 6725 }, { "epoch": 9.73, "learning_rate": 5.917350056279144e-06, "loss": 0.0013, "step": 6726 }, { "epoch": 9.73, "learning_rate": 5.885190545103714e-06, "loss": 0.0194, "step": 6727 }, { "epoch": 9.73, "learning_rate": 5.8530310339282845e-06, "loss": 0.0001, "step": 6728 }, { "epoch": 9.73, "learning_rate": 5.820871522752854e-06, "loss": 0.0241, "step": 6729 }, { "epoch": 9.73, "learning_rate": 5.788712011577424e-06, "loss": 0.0012, "step": 6730 }, { "epoch": 9.73, "learning_rate": 5.7565525004019935e-06, "loss": 0.0, "step": 6731 }, { "epoch": 9.74, "learning_rate": 5.724392989226564e-06, "loss": 0.0007, "step": 6732 }, { "epoch": 9.74, "learning_rate": 5.692233478051134e-06, "loss": 0.0001, "step": 6733 }, { "epoch": 9.74, "learning_rate": 5.660073966875703e-06, "loss": 0.0106, "step": 6734 }, { "epoch": 9.74, "learning_rate": 5.627914455700274e-06, "loss": 0.0002, "step": 6735 }, { "epoch": 9.74, "learning_rate": 5.5957549445248435e-06, "loss": 0.0302, "step": 6736 }, { "epoch": 9.74, "learning_rate": 5.563595433349413e-06, "loss": 0.0, "step": 6737 }, { "epoch": 9.74, "learning_rate": 5.531435922173983e-06, "loss": 0.0136, "step": 6738 }, { "epoch": 9.75, "learning_rate": 5.499276410998553e-06, "loss": 0.0087, "step": 6739 }, { "epoch": 9.75, "learning_rate": 5.467116899823123e-06, "loss": 0.0008, "step": 6740 }, { "epoch": 9.75, "learning_rate": 5.434957388647693e-06, "loss": 0.055, "step": 6741 }, { "epoch": 9.75, "learning_rate": 5.402797877472262e-06, "loss": 0.0063, "step": 6742 }, { "epoch": 9.75, "learning_rate": 5.370638366296833e-06, "loss": 0.0023, "step": 6743 }, { "epoch": 9.75, "learning_rate": 5.3384788551214025e-06, "loss": 0.0401, "step": 6744 }, { "epoch": 9.75, "learning_rate": 5.306319343945972e-06, "loss": 0.0358, "step": 6745 }, { "epoch": 9.76, "learning_rate": 5.274159832770542e-06, "loss": 0.0413, "step": 6746 }, { "epoch": 9.76, "learning_rate": 5.242000321595112e-06, "loss": 0.0003, "step": 6747 }, { "epoch": 9.76, "learning_rate": 5.209840810419682e-06, "loss": 0.0029, "step": 6748 }, { "epoch": 9.76, "learning_rate": 5.177681299244252e-06, "loss": 0.0313, "step": 6749 }, { "epoch": 9.76, "learning_rate": 5.145521788068821e-06, "loss": 0.0449, "step": 6750 }, { "epoch": 9.76, "learning_rate": 5.113362276893392e-06, "loss": 0.0057, "step": 6751 }, { "epoch": 9.76, "learning_rate": 5.0812027657179616e-06, "loss": 0.0603, "step": 6752 }, { "epoch": 9.77, "learning_rate": 5.049043254542531e-06, "loss": 0.0295, "step": 6753 }, { "epoch": 9.77, "learning_rate": 5.016883743367101e-06, "loss": 0.0779, "step": 6754 }, { "epoch": 9.77, "learning_rate": 4.984724232191671e-06, "loss": 0.0092, "step": 6755 }, { "epoch": 9.77, "learning_rate": 4.952564721016241e-06, "loss": 0.0002, "step": 6756 }, { "epoch": 9.77, "learning_rate": 4.920405209840811e-06, "loss": 0.0007, "step": 6757 }, { "epoch": 9.77, "learning_rate": 4.88824569866538e-06, "loss": 0.0139, "step": 6758 }, { "epoch": 9.77, "learning_rate": 4.856086187489951e-06, "loss": 0.003, "step": 6759 }, { "epoch": 9.78, "learning_rate": 4.8239266763145206e-06, "loss": 0.0001, "step": 6760 }, { "epoch": 9.78, "learning_rate": 4.79176716513909e-06, "loss": 0.0083, "step": 6761 }, { "epoch": 9.78, "learning_rate": 4.75960765396366e-06, "loss": 0.0262, "step": 6762 }, { "epoch": 9.78, "learning_rate": 4.72744814278823e-06, "loss": 0.0002, "step": 6763 }, { "epoch": 9.78, "learning_rate": 4.6952886316128e-06, "loss": 0.0086, "step": 6764 }, { "epoch": 9.78, "learning_rate": 4.66312912043737e-06, "loss": 0.0005, "step": 6765 }, { "epoch": 9.78, "learning_rate": 4.630969609261939e-06, "loss": 0.0038, "step": 6766 }, { "epoch": 9.79, "learning_rate": 4.598810098086509e-06, "loss": 0.0361, "step": 6767 }, { "epoch": 9.79, "learning_rate": 4.566650586911079e-06, "loss": 0.0007, "step": 6768 }, { "epoch": 9.79, "learning_rate": 4.534491075735648e-06, "loss": 0.0235, "step": 6769 }, { "epoch": 9.79, "learning_rate": 4.502331564560219e-06, "loss": 0.0007, "step": 6770 }, { "epoch": 9.79, "learning_rate": 4.470172053384789e-06, "loss": 0.0001, "step": 6771 }, { "epoch": 9.79, "learning_rate": 4.438012542209358e-06, "loss": 0.0269, "step": 6772 }, { "epoch": 9.79, "learning_rate": 4.405853031033928e-06, "loss": 0.0012, "step": 6773 }, { "epoch": 9.8, "learning_rate": 4.3736935198584984e-06, "loss": 0.0104, "step": 6774 }, { "epoch": 9.8, "learning_rate": 4.341534008683068e-06, "loss": 0.0151, "step": 6775 }, { "epoch": 9.8, "learning_rate": 4.309374497507638e-06, "loss": 0.0009, "step": 6776 }, { "epoch": 9.8, "learning_rate": 4.2772149863322074e-06, "loss": 0.0003, "step": 6777 }, { "epoch": 9.8, "learning_rate": 4.245055475156778e-06, "loss": 0.013, "step": 6778 }, { "epoch": 9.8, "learning_rate": 4.212895963981348e-06, "loss": 0.0, "step": 6779 }, { "epoch": 9.8, "learning_rate": 4.180736452805917e-06, "loss": 0.0012, "step": 6780 }, { "epoch": 9.81, "learning_rate": 4.148576941630487e-06, "loss": 0.0004, "step": 6781 }, { "epoch": 9.81, "learning_rate": 4.1164174304550574e-06, "loss": 0.0515, "step": 6782 }, { "epoch": 9.81, "learning_rate": 4.084257919279627e-06, "loss": 0.0315, "step": 6783 }, { "epoch": 9.81, "learning_rate": 4.052098408104197e-06, "loss": 0.0002, "step": 6784 }, { "epoch": 9.81, "learning_rate": 4.0199388969287664e-06, "loss": 0.0001, "step": 6785 }, { "epoch": 9.81, "learning_rate": 3.987779385753337e-06, "loss": 0.0046, "step": 6786 }, { "epoch": 9.81, "learning_rate": 3.955619874577907e-06, "loss": 0.0001, "step": 6787 }, { "epoch": 9.82, "learning_rate": 3.923460363402476e-06, "loss": 0.0006, "step": 6788 }, { "epoch": 9.82, "learning_rate": 3.891300852227047e-06, "loss": 0.0217, "step": 6789 }, { "epoch": 9.82, "learning_rate": 3.8591413410516165e-06, "loss": 0.0063, "step": 6790 }, { "epoch": 9.82, "learning_rate": 3.826981829876186e-06, "loss": 0.0003, "step": 6791 }, { "epoch": 9.82, "learning_rate": 3.7948223187007562e-06, "loss": 0.0003, "step": 6792 }, { "epoch": 9.82, "learning_rate": 3.762662807525326e-06, "loss": 0.0, "step": 6793 }, { "epoch": 9.83, "learning_rate": 3.730503296349896e-06, "loss": 0.0037, "step": 6794 }, { "epoch": 9.83, "learning_rate": 3.6983437851744656e-06, "loss": 0.0014, "step": 6795 }, { "epoch": 9.83, "learning_rate": 3.6661842739990357e-06, "loss": 0.0003, "step": 6796 }, { "epoch": 9.83, "learning_rate": 3.6340247628236054e-06, "loss": 0.0102, "step": 6797 }, { "epoch": 9.83, "learning_rate": 3.6018652516481755e-06, "loss": 0.0042, "step": 6798 }, { "epoch": 9.83, "learning_rate": 3.569705740472745e-06, "loss": 0.0103, "step": 6799 }, { "epoch": 9.83, "learning_rate": 3.5375462292973152e-06, "loss": 0.0002, "step": 6800 }, { "epoch": 9.84, "learning_rate": 3.505386718121885e-06, "loss": 0.0009, "step": 6801 }, { "epoch": 9.84, "learning_rate": 3.473227206946455e-06, "loss": 0.0049, "step": 6802 }, { "epoch": 9.84, "learning_rate": 3.4410676957710246e-06, "loss": 0.0819, "step": 6803 }, { "epoch": 9.84, "learning_rate": 3.4089081845955947e-06, "loss": 0.0381, "step": 6804 }, { "epoch": 9.84, "learning_rate": 3.3767486734201644e-06, "loss": 0.0475, "step": 6805 }, { "epoch": 9.84, "learning_rate": 3.3445891622447345e-06, "loss": 0.0, "step": 6806 }, { "epoch": 9.84, "learning_rate": 3.312429651069304e-06, "loss": 0.0127, "step": 6807 }, { "epoch": 9.85, "learning_rate": 3.2802701398938734e-06, "loss": 0.0054, "step": 6808 }, { "epoch": 9.85, "learning_rate": 3.2481106287184435e-06, "loss": 0.0087, "step": 6809 }, { "epoch": 9.85, "learning_rate": 3.215951117543013e-06, "loss": 0.0058, "step": 6810 }, { "epoch": 9.85, "learning_rate": 3.1837916063675832e-06, "loss": 0.0002, "step": 6811 }, { "epoch": 9.85, "learning_rate": 3.151632095192153e-06, "loss": 0.0174, "step": 6812 }, { "epoch": 9.85, "learning_rate": 3.119472584016723e-06, "loss": 0.0769, "step": 6813 }, { "epoch": 9.85, "learning_rate": 3.087313072841293e-06, "loss": 0.0075, "step": 6814 }, { "epoch": 9.86, "learning_rate": 3.0551535616658627e-06, "loss": 0.0002, "step": 6815 }, { "epoch": 9.86, "learning_rate": 3.022994050490433e-06, "loss": 0.0974, "step": 6816 }, { "epoch": 9.86, "learning_rate": 2.990834539315003e-06, "loss": 0.0698, "step": 6817 }, { "epoch": 9.86, "learning_rate": 2.958675028139572e-06, "loss": 0.0002, "step": 6818 }, { "epoch": 9.86, "learning_rate": 2.9265155169641423e-06, "loss": 0.0005, "step": 6819 }, { "epoch": 9.86, "learning_rate": 2.894356005788712e-06, "loss": 0.0014, "step": 6820 }, { "epoch": 9.86, "learning_rate": 2.862196494613282e-06, "loss": 0.0241, "step": 6821 }, { "epoch": 9.87, "learning_rate": 2.8300369834378517e-06, "loss": 0.0162, "step": 6822 }, { "epoch": 9.87, "learning_rate": 2.7978774722624218e-06, "loss": 0.0, "step": 6823 }, { "epoch": 9.87, "learning_rate": 2.7657179610869914e-06, "loss": 0.0632, "step": 6824 }, { "epoch": 9.87, "learning_rate": 2.7335584499115615e-06, "loss": 0.0283, "step": 6825 }, { "epoch": 9.87, "learning_rate": 2.701398938736131e-06, "loss": 0.0188, "step": 6826 }, { "epoch": 9.87, "learning_rate": 2.6692394275607013e-06, "loss": 0.0042, "step": 6827 }, { "epoch": 9.87, "learning_rate": 2.637079916385271e-06, "loss": 0.0, "step": 6828 }, { "epoch": 9.88, "learning_rate": 2.604920405209841e-06, "loss": 0.0234, "step": 6829 }, { "epoch": 9.88, "learning_rate": 2.5727608940344107e-06, "loss": 0.0066, "step": 6830 }, { "epoch": 9.88, "learning_rate": 2.5406013828589808e-06, "loss": 0.0002, "step": 6831 }, { "epoch": 9.88, "learning_rate": 2.5084418716835504e-06, "loss": 0.0067, "step": 6832 }, { "epoch": 9.88, "learning_rate": 2.4762823605081205e-06, "loss": 0.0472, "step": 6833 }, { "epoch": 9.88, "learning_rate": 2.44412284933269e-06, "loss": 0.0212, "step": 6834 }, { "epoch": 9.88, "learning_rate": 2.4119633381572603e-06, "loss": 0.0588, "step": 6835 }, { "epoch": 9.89, "learning_rate": 2.37980382698183e-06, "loss": 0.0001, "step": 6836 }, { "epoch": 9.89, "learning_rate": 2.3476443158064e-06, "loss": 0.0049, "step": 6837 }, { "epoch": 9.89, "learning_rate": 2.3154848046309697e-06, "loss": 0.005, "step": 6838 }, { "epoch": 9.89, "learning_rate": 2.2833252934555394e-06, "loss": 0.0035, "step": 6839 }, { "epoch": 9.89, "learning_rate": 2.2511657822801095e-06, "loss": 0.0002, "step": 6840 }, { "epoch": 9.89, "learning_rate": 2.219006271104679e-06, "loss": 0.0054, "step": 6841 }, { "epoch": 9.89, "learning_rate": 2.1868467599292492e-06, "loss": 0.0663, "step": 6842 }, { "epoch": 9.9, "learning_rate": 2.154687248753819e-06, "loss": 0.0003, "step": 6843 }, { "epoch": 9.9, "learning_rate": 2.122527737578389e-06, "loss": 0.0001, "step": 6844 }, { "epoch": 9.9, "learning_rate": 2.0903682264029586e-06, "loss": 0.0149, "step": 6845 }, { "epoch": 9.9, "learning_rate": 2.0582087152275287e-06, "loss": 0.0197, "step": 6846 }, { "epoch": 9.9, "learning_rate": 2.0260492040520984e-06, "loss": 0.0663, "step": 6847 }, { "epoch": 9.9, "learning_rate": 1.9938896928766685e-06, "loss": 0.043, "step": 6848 }, { "epoch": 9.9, "learning_rate": 1.961730181701238e-06, "loss": 0.0036, "step": 6849 }, { "epoch": 9.91, "learning_rate": 1.9295706705258082e-06, "loss": 0.0006, "step": 6850 }, { "epoch": 9.91, "learning_rate": 1.8974111593503781e-06, "loss": 0.0061, "step": 6851 }, { "epoch": 9.91, "learning_rate": 1.865251648174948e-06, "loss": 0.0383, "step": 6852 }, { "epoch": 9.91, "learning_rate": 1.8330921369995179e-06, "loss": 0.0002, "step": 6853 }, { "epoch": 9.91, "learning_rate": 1.8009326258240877e-06, "loss": 0.0, "step": 6854 }, { "epoch": 9.91, "learning_rate": 1.7687731146486576e-06, "loss": 0.0184, "step": 6855 }, { "epoch": 9.91, "learning_rate": 1.7366136034732275e-06, "loss": 0.0017, "step": 6856 }, { "epoch": 9.92, "learning_rate": 1.7044540922977974e-06, "loss": 0.0263, "step": 6857 }, { "epoch": 9.92, "learning_rate": 1.6722945811223672e-06, "loss": 0.0013, "step": 6858 }, { "epoch": 9.92, "learning_rate": 1.6401350699469367e-06, "loss": 0.0201, "step": 6859 }, { "epoch": 9.92, "learning_rate": 1.6079755587715066e-06, "loss": 0.0096, "step": 6860 }, { "epoch": 9.92, "learning_rate": 1.5758160475960765e-06, "loss": 0.0138, "step": 6861 }, { "epoch": 9.92, "learning_rate": 1.5436565364206465e-06, "loss": 0.01, "step": 6862 }, { "epoch": 9.92, "learning_rate": 1.5114970252452164e-06, "loss": 0.0003, "step": 6863 }, { "epoch": 9.93, "learning_rate": 1.479337514069786e-06, "loss": 0.0519, "step": 6864 }, { "epoch": 9.93, "learning_rate": 1.447178002894356e-06, "loss": 0.0005, "step": 6865 }, { "epoch": 9.93, "learning_rate": 1.4150184917189258e-06, "loss": 0.0432, "step": 6866 }, { "epoch": 9.93, "learning_rate": 1.3828589805434957e-06, "loss": 0.0002, "step": 6867 }, { "epoch": 9.93, "learning_rate": 1.3506994693680656e-06, "loss": 0.0001, "step": 6868 }, { "epoch": 9.93, "learning_rate": 1.3185399581926355e-06, "loss": 0.0003, "step": 6869 }, { "epoch": 9.93, "learning_rate": 1.2863804470172053e-06, "loss": 0.0001, "step": 6870 }, { "epoch": 9.94, "learning_rate": 1.2542209358417752e-06, "loss": 0.0, "step": 6871 }, { "epoch": 9.94, "learning_rate": 1.222061424666345e-06, "loss": 0.0411, "step": 6872 }, { "epoch": 9.94, "learning_rate": 1.189901913490915e-06, "loss": 0.0221, "step": 6873 }, { "epoch": 9.94, "learning_rate": 1.1577424023154849e-06, "loss": 0.0118, "step": 6874 }, { "epoch": 9.94, "learning_rate": 1.1255828911400547e-06, "loss": 0.0001, "step": 6875 }, { "epoch": 9.94, "learning_rate": 1.0934233799646246e-06, "loss": 0.0224, "step": 6876 }, { "epoch": 9.95, "learning_rate": 1.0612638687891945e-06, "loss": 0.0076, "step": 6877 }, { "epoch": 9.95, "learning_rate": 1.0291043576137644e-06, "loss": 0.0178, "step": 6878 }, { "epoch": 9.95, "learning_rate": 9.969448464383342e-07, "loss": 0.0214, "step": 6879 }, { "epoch": 9.95, "learning_rate": 9.647853352629041e-07, "loss": 0.012, "step": 6880 }, { "epoch": 9.95, "learning_rate": 9.32625824087474e-07, "loss": 0.0002, "step": 6881 }, { "epoch": 9.95, "learning_rate": 9.004663129120439e-07, "loss": 0.0303, "step": 6882 }, { "epoch": 9.95, "learning_rate": 8.683068017366137e-07, "loss": 0.0462, "step": 6883 }, { "epoch": 9.96, "learning_rate": 8.361472905611836e-07, "loss": 0.0025, "step": 6884 }, { "epoch": 9.96, "learning_rate": 8.039877793857533e-07, "loss": 0.0119, "step": 6885 }, { "epoch": 9.96, "learning_rate": 7.718282682103233e-07, "loss": 0.0005, "step": 6886 }, { "epoch": 9.96, "learning_rate": 7.39668757034893e-07, "loss": 0.0239, "step": 6887 }, { "epoch": 9.96, "learning_rate": 7.075092458594629e-07, "loss": 0.0432, "step": 6888 }, { "epoch": 9.96, "learning_rate": 6.753497346840328e-07, "loss": 0.0311, "step": 6889 }, { "epoch": 9.96, "learning_rate": 6.431902235086027e-07, "loss": 0.0004, "step": 6890 }, { "epoch": 9.97, "learning_rate": 6.110307123331725e-07, "loss": 0.0014, "step": 6891 }, { "epoch": 9.97, "learning_rate": 5.788712011577424e-07, "loss": 0.0, "step": 6892 }, { "epoch": 9.97, "learning_rate": 5.467116899823123e-07, "loss": 0.0296, "step": 6893 }, { "epoch": 9.97, "learning_rate": 5.145521788068822e-07, "loss": 0.0104, "step": 6894 }, { "epoch": 9.97, "learning_rate": 4.823926676314521e-07, "loss": 0.0, "step": 6895 }, { "epoch": 9.97, "learning_rate": 4.5023315645602193e-07, "loss": 0.0004, "step": 6896 }, { "epoch": 9.97, "learning_rate": 4.180736452805918e-07, "loss": 0.0317, "step": 6897 }, { "epoch": 9.98, "learning_rate": 3.8591413410516164e-07, "loss": 0.0009, "step": 6898 }, { "epoch": 9.98, "learning_rate": 3.5375462292973146e-07, "loss": 0.0588, "step": 6899 }, { "epoch": 9.98, "learning_rate": 3.2159511175430134e-07, "loss": 0.0007, "step": 6900 }, { "epoch": 9.98, "learning_rate": 2.894356005788712e-07, "loss": 0.0295, "step": 6901 }, { "epoch": 9.98, "learning_rate": 2.572760894034411e-07, "loss": 0.0574, "step": 6902 }, { "epoch": 9.98, "learning_rate": 2.2511657822801097e-07, "loss": 0.0482, "step": 6903 }, { "epoch": 9.98, "learning_rate": 1.9295706705258082e-07, "loss": 0.0005, "step": 6904 }, { "epoch": 9.99, "learning_rate": 1.6079755587715067e-07, "loss": 0.035, "step": 6905 }, { "epoch": 9.99, "learning_rate": 1.2863804470172055e-07, "loss": 0.0175, "step": 6906 }, { "epoch": 9.99, "learning_rate": 9.647853352629041e-08, "loss": 0.0177, "step": 6907 }, { "epoch": 9.99, "learning_rate": 6.431902235086027e-08, "loss": 0.0254, "step": 6908 }, { "epoch": 9.99, "learning_rate": 3.2159511175430136e-08, "loss": 0.0063, "step": 6909 }, { "epoch": 9.99, "learning_rate": 0.0, "loss": 0.0, "step": 6910 }, { "epoch": 9.99, "step": 6910, "total_flos": 695877609873408.0, "train_loss": 0.14138391603609243, "train_runtime": 47099.76, "train_samples_per_second": 2.348, "train_steps_per_second": 0.147 } ], "max_steps": 6910, "num_train_epochs": 10, "total_flos": 695877609873408.0, "trial_name": null, "trial_params": null }