|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 2523, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.6576, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.6592, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0, |
|
"loss": 0.6936, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0, |
|
"loss": 0.6209, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0, |
|
"loss": 0.7266, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0, |
|
"loss": 0.583, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0, |
|
"loss": 0.5872, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0, |
|
"loss": 0.6971, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0, |
|
"loss": 0.6075, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.6744, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.5986, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.6547, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.5527, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.4789, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.5411, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.5627, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.0666666666666667e-05, |
|
"loss": 0.5674, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.5058, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.5078, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.4666666666666666e-05, |
|
"loss": 0.5258, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.5611, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.7333333333333336e-05, |
|
"loss": 0.4715, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.866666666666667e-05, |
|
"loss": 0.7397, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5526, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.999996823967381e-05, |
|
"loss": 0.5387, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9999872958896982e-05, |
|
"loss": 0.5412, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9999714158274743e-05, |
|
"loss": 0.487, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9999491838815805e-05, |
|
"loss": 0.5345, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.999920600193236e-05, |
|
"loss": 0.5062, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9998856649440058e-05, |
|
"loss": 0.5077, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.999844378355801e-05, |
|
"loss": 0.5029, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.999796740690877e-05, |
|
"loss": 0.5374, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9997427522518315e-05, |
|
"loss": 0.5321, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.999682413381602e-05, |
|
"loss": 0.4695, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9996157244634647e-05, |
|
"loss": 0.4798, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9995799988672346e-05, |
|
"loss": 0.4682, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.999503785684114e-05, |
|
"loss": 0.5138, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9994212235877407e-05, |
|
"loss": 0.5271, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.999332313102555e-05, |
|
"loss": 0.5526, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.999237054793322e-05, |
|
"loss": 0.5385, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9991354492651283e-05, |
|
"loss": 0.5259, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9990274971633787e-05, |
|
"loss": 0.5054, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9989131991737928e-05, |
|
"loss": 0.4698, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.998792556022398e-05, |
|
"loss": 0.4909, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.998665568475528e-05, |
|
"loss": 0.462, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.998532237339816e-05, |
|
"loss": 0.4898, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9983925634621894e-05, |
|
"loss": 0.4771, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.998246547729867e-05, |
|
"loss": 0.4774, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.998094191070349e-05, |
|
"loss": 0.421, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.997935494451416e-05, |
|
"loss": 0.4991, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9977704588811183e-05, |
|
"loss": 0.5349, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9975990854077733e-05, |
|
"loss": 0.4717, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9974213751199556e-05, |
|
"loss": 0.5024, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9972373291464933e-05, |
|
"loss": 0.5904, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9970469486564585e-05, |
|
"loss": 0.4817, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.99685023485916e-05, |
|
"loss": 0.488, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9966471890041374e-05, |
|
"loss": 0.5262, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9964378123811502e-05, |
|
"loss": 0.4721, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9962221063201734e-05, |
|
"loss": 0.4594, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.996000072191385e-05, |
|
"loss": 0.5068, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9957717114051608e-05, |
|
"loss": 0.5169, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9955370254120635e-05, |
|
"loss": 0.4781, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9952960157028335e-05, |
|
"loss": 0.4682, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9950486838083807e-05, |
|
"loss": 0.4754, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.994795031299773e-05, |
|
"loss": 0.479, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9945350597882275e-05, |
|
"loss": 0.5381, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9942687709251006e-05, |
|
"loss": 0.5457, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.993996166401877e-05, |
|
"loss": 0.4653, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9937172479501573e-05, |
|
"loss": 0.4686, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9934320173416502e-05, |
|
"loss": 0.5098, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9931404763881598e-05, |
|
"loss": 0.4823, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.992842626941572e-05, |
|
"loss": 0.4999, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.992538470893847e-05, |
|
"loss": 0.4868, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.992228010177003e-05, |
|
"loss": 0.4668, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9919112467631074e-05, |
|
"loss": 0.4955, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.991588182664262e-05, |
|
"loss": 0.4855, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.99125881993259e-05, |
|
"loss": 0.4646, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9909231606602266e-05, |
|
"loss": 0.5182, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9905812069793002e-05, |
|
"loss": 0.4857, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.990232961061924e-05, |
|
"loss": 0.5048, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.989878425120177e-05, |
|
"loss": 0.4597, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9895176014060964e-05, |
|
"loss": 0.4387, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9891504922116572e-05, |
|
"loss": 0.4556, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9887770998687607e-05, |
|
"loss": 0.4937, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9883974267492202e-05, |
|
"loss": 0.5738, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9880114752647434e-05, |
|
"loss": 0.4769, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9876192478669197e-05, |
|
"loss": 0.5109, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.987220747047203e-05, |
|
"loss": 0.4926, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9868159753368964e-05, |
|
"loss": 0.4726, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9864049353071365e-05, |
|
"loss": 0.4675, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.985987629568876e-05, |
|
"loss": 0.4723, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9855640607728684e-05, |
|
"loss": 0.4215, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9851342316096503e-05, |
|
"loss": 0.505, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.984698144809525e-05, |
|
"loss": 0.5278, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9842558031425434e-05, |
|
"loss": 0.4836, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.983807209418489e-05, |
|
"loss": 0.445, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9833523664868587e-05, |
|
"loss": 0.4585, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.982891277236845e-05, |
|
"loss": 0.4934, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.982423944597315e-05, |
|
"loss": 0.4899, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.981950371536798e-05, |
|
"loss": 0.5321, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9814705610634602e-05, |
|
"loss": 0.4883, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.980984516225089e-05, |
|
"loss": 0.4805, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9804922401090732e-05, |
|
"loss": 0.4507, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9799937358423826e-05, |
|
"loss": 0.4872, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9794890065915486e-05, |
|
"loss": 0.5495, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9789780555626444e-05, |
|
"loss": 0.4936, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9784608860012652e-05, |
|
"loss": 0.4769, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.9779375011925046e-05, |
|
"loss": 0.4714, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.9774079044609373e-05, |
|
"loss": 0.5064, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.976872099170597e-05, |
|
"loss": 0.4629, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.976330088724953e-05, |
|
"loss": 0.4149, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9757818765668916e-05, |
|
"loss": 0.4565, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9752274661786916e-05, |
|
"loss": 0.4233, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9746668610820047e-05, |
|
"loss": 0.4557, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9741000648378303e-05, |
|
"loss": 0.5186, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9735270810464958e-05, |
|
"loss": 0.4425, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.972947913347631e-05, |
|
"loss": 0.5274, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9723625654201472e-05, |
|
"loss": 0.4087, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.971771040982213e-05, |
|
"loss": 0.4412, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9711733437912293e-05, |
|
"loss": 0.4998, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9705694776438084e-05, |
|
"loss": 0.5343, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9699594463757475e-05, |
|
"loss": 0.4365, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9693432538620046e-05, |
|
"loss": 0.4877, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9687209040166748e-05, |
|
"loss": 0.4361, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.968092400792965e-05, |
|
"loss": 0.4555, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.967457748183169e-05, |
|
"loss": 0.4353, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.966816950218641e-05, |
|
"loss": 0.3476, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9661700109697718e-05, |
|
"loss": 0.4342, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9655169345459622e-05, |
|
"loss": 0.4603, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.964857725095595e-05, |
|
"loss": 0.4514, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.964192386806013e-05, |
|
"loss": 0.4889, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9635209239034872e-05, |
|
"loss": 0.5255, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.962843340653195e-05, |
|
"loss": 0.5325, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9621596413591885e-05, |
|
"loss": 0.5163, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.96146983036437e-05, |
|
"loss": 0.3708, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.960773912050465e-05, |
|
"loss": 0.5378, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.960071890837991e-05, |
|
"loss": 0.4389, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9593637711862335e-05, |
|
"loss": 0.4565, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9586495575932137e-05, |
|
"loss": 0.5446, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.957929254595664e-05, |
|
"loss": 0.5079, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.957202866768995e-05, |
|
"loss": 0.3997, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9564703987272703e-05, |
|
"loss": 0.4934, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9557318551231745e-05, |
|
"loss": 0.5076, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9549872406479843e-05, |
|
"loss": 0.4487, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9542365600315403e-05, |
|
"loss": 0.5515, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.953479818042214e-05, |
|
"loss": 0.5262, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.95271701948688e-05, |
|
"loss": 0.4953, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.951948169210885e-05, |
|
"loss": 0.5127, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9511732720980156e-05, |
|
"loss": 0.4796, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.950392333070469e-05, |
|
"loss": 0.5016, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9496053570888205e-05, |
|
"loss": 0.5114, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9488123491519935e-05, |
|
"loss": 0.4471, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9480133142972257e-05, |
|
"loss": 0.4427, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.947208257600039e-05, |
|
"loss": 0.457, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9463971841742057e-05, |
|
"loss": 0.5193, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.945580099171717e-05, |
|
"loss": 0.5337, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9447570077827503e-05, |
|
"loss": 0.4758, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.9439279152356363e-05, |
|
"loss": 0.4223, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.943092826796824e-05, |
|
"loss": 0.4909, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.9422517477708506e-05, |
|
"loss": 0.4615, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.9414046835003043e-05, |
|
"loss": 0.3863, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.940551639365793e-05, |
|
"loss": 0.4676, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9396926207859085e-05, |
|
"loss": 0.488, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.938827633217193e-05, |
|
"loss": 0.4888, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9379566821541034e-05, |
|
"loss": 0.4754, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9370797731289784e-05, |
|
"loss": 0.7974, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.936196911712001e-05, |
|
"loss": 0.3944, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9353081035111644e-05, |
|
"loss": 0.4883, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9344133541722368e-05, |
|
"loss": 0.5429, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9335126693787237e-05, |
|
"loss": 0.4573, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9326060548518342e-05, |
|
"loss": 0.4276, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9316935163504424e-05, |
|
"loss": 0.5089, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.930775059671053e-05, |
|
"loss": 0.4285, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9298506906477623e-05, |
|
"loss": 0.4438, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9289204151522227e-05, |
|
"loss": 0.4644, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.927984239093605e-05, |
|
"loss": 0.3904, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9270421684185603e-05, |
|
"loss": 0.5486, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9260942091111836e-05, |
|
"loss": 0.5429, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.9251403671929738e-05, |
|
"loss": 0.4597, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.9241806487227967e-05, |
|
"loss": 0.4721, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.923215059796847e-05, |
|
"loss": 0.4689, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.922243606548609e-05, |
|
"loss": 0.4716, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.9212662951488162e-05, |
|
"loss": 0.4993, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.9202831318054153e-05, |
|
"loss": 0.4557, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.9192941227635232e-05, |
|
"loss": 0.4701, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.91829927430539e-05, |
|
"loss": 0.4032, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.9172985927503584e-05, |
|
"loss": 0.4743, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9162920844548227e-05, |
|
"loss": 0.4491, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9152797558121894e-05, |
|
"loss": 0.429, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9142616132528356e-05, |
|
"loss": 0.4707, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.91323766324407e-05, |
|
"loss": 0.4371, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.912207912290089e-05, |
|
"loss": 0.5172, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.9111723669319385e-05, |
|
"loss": 0.4482, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.91013103374747e-05, |
|
"loss": 0.4701, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.9090839193513e-05, |
|
"loss": 0.4737, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.9080310303947668e-05, |
|
"loss": 0.4922, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.9069723735658903e-05, |
|
"loss": 0.4081, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.9059079555893277e-05, |
|
"loss": 0.486, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.9048377832263314e-05, |
|
"loss": 0.4674, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.903761863274706e-05, |
|
"loss": 0.4528, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.902680202568765e-05, |
|
"loss": 0.4716, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.9015928079792884e-05, |
|
"loss": 0.5213, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.9004996864134767e-05, |
|
"loss": 0.4836, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.8994008448149103e-05, |
|
"loss": 0.5513, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.8982962901635022e-05, |
|
"loss": 0.4634, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.8971860294754554e-05, |
|
"loss": 0.5111, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.8960700698032194e-05, |
|
"loss": 0.4474, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.894948418235441e-05, |
|
"loss": 0.4785, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.8938210818969257e-05, |
|
"loss": 0.4977, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.8926880679485865e-05, |
|
"loss": 0.4346, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.8915493835874026e-05, |
|
"loss": 0.4139, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.8904050360463708e-05, |
|
"loss": 0.4975, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.8892550325944617e-05, |
|
"loss": 0.4766, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.888099380536572e-05, |
|
"loss": 0.484, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.886938087213479e-05, |
|
"loss": 0.4301, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.885771160001794e-05, |
|
"loss": 0.4474, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.8845986063139144e-05, |
|
"loss": 0.4445, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.8834204335979777e-05, |
|
"loss": 0.4422, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.8822366493378143e-05, |
|
"loss": 0.5337, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.8810472610528987e-05, |
|
"loss": 0.4704, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.8798522762983026e-05, |
|
"loss": 0.458, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.8786517026646474e-05, |
|
"loss": 0.3866, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.8774455477780557e-05, |
|
"loss": 0.4939, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.8762338193001013e-05, |
|
"loss": 0.496, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.8750165249277625e-05, |
|
"loss": 0.4171, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.873793672393373e-05, |
|
"loss": 0.4582, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.8725652694645714e-05, |
|
"loss": 0.4762, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.871331323944254e-05, |
|
"loss": 0.3859, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.8700918436705226e-05, |
|
"loss": 0.4165, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.868846836516637e-05, |
|
"loss": 0.3933, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.8675963103909636e-05, |
|
"loss": 0.4746, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.866340273236926e-05, |
|
"loss": 0.4893, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.8650787330329546e-05, |
|
"loss": 0.516, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.8638116977924346e-05, |
|
"loss": 0.4391, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.862539175563657e-05, |
|
"loss": 0.3998, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.861261174429765e-05, |
|
"loss": 0.4525, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.8599777025087068e-05, |
|
"loss": 0.4023, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.858688767953178e-05, |
|
"loss": 0.397, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.8573943789505762e-05, |
|
"loss": 0.4845, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.8560945437229443e-05, |
|
"loss": 0.4518, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.8547892705269207e-05, |
|
"loss": 0.5362, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.8534785676536856e-05, |
|
"loss": 0.4601, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.8521624434289094e-05, |
|
"loss": 0.4814, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.850840906212699e-05, |
|
"loss": 0.4707, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.849513964399545e-05, |
|
"loss": 0.4144, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.8481816264182678e-05, |
|
"loss": 0.434, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.8468439007319663e-05, |
|
"loss": 0.4782, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.8455007958379604e-05, |
|
"loss": 0.3848, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.8441523202677406e-05, |
|
"loss": 0.4541, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.8427984825869114e-05, |
|
"loss": 0.4708, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.8414392913951382e-05, |
|
"loss": 0.5103, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.8400747553260915e-05, |
|
"loss": 0.4201, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.8387048830473948e-05, |
|
"loss": 0.4586, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.8373296832605647e-05, |
|
"loss": 0.4667, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.8359491647009608e-05, |
|
"loss": 0.4846, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.834563336137727e-05, |
|
"loss": 0.5255, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.8331722063737365e-05, |
|
"loss": 0.482, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.8317757842455363e-05, |
|
"loss": 0.4211, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.830374078623291e-05, |
|
"loss": 0.4852, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8289670984107263e-05, |
|
"loss": 0.4299, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8275548525450722e-05, |
|
"loss": 0.5044, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8261373499970064e-05, |
|
"loss": 0.4072, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8247145997705977e-05, |
|
"loss": 0.4478, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.823286610903248e-05, |
|
"loss": 0.4962, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8218533924656367e-05, |
|
"loss": 0.4658, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8204149535616596e-05, |
|
"loss": 0.4124, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8189713033283755e-05, |
|
"loss": 0.4149, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.817522450935944e-05, |
|
"loss": 0.4327, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8160684055875704e-05, |
|
"loss": 0.4469, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8146091765194458e-05, |
|
"loss": 0.44, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8131447730006885e-05, |
|
"loss": 0.4911, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.8116752043332848e-05, |
|
"loss": 0.4848, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.810200479852031e-05, |
|
"loss": 0.4297, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.8087206089244728e-05, |
|
"loss": 0.4205, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.8072356009508473e-05, |
|
"loss": 0.3892, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.805745465364022e-05, |
|
"loss": 0.4519, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8042502116294355e-05, |
|
"loss": 0.4376, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8027498492450367e-05, |
|
"loss": 0.4538, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8012443877412253e-05, |
|
"loss": 0.4672, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.799733836680791e-05, |
|
"loss": 0.4034, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.7982182056588536e-05, |
|
"loss": 0.4613, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.796697504302799e-05, |
|
"loss": 0.4664, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.795171742272222e-05, |
|
"loss": 0.4271, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7936409292588627e-05, |
|
"loss": 0.4741, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.792105074986545e-05, |
|
"loss": 0.3175, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7905641892111152e-05, |
|
"loss": 0.4154, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7890182817203806e-05, |
|
"loss": 0.4558, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7874673623340463e-05, |
|
"loss": 0.465, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.785911440903653e-05, |
|
"loss": 0.4688, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7843505273125164e-05, |
|
"loss": 0.4411, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7827846314756604e-05, |
|
"loss": 0.4286, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7812137633397577e-05, |
|
"loss": 0.4425, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7796379328830652e-05, |
|
"loss": 0.4126, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.778057150115361e-05, |
|
"loss": 0.479, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.77647142507788e-05, |
|
"loss": 0.3994, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7748807678432514e-05, |
|
"loss": 0.4574, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7732851885154336e-05, |
|
"loss": 0.3901, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7716846972296505e-05, |
|
"loss": 0.435, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7700793041523272e-05, |
|
"loss": 0.4337, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7684690194810256e-05, |
|
"loss": 0.4196, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7668538534443782e-05, |
|
"loss": 0.4508, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7652338163020257e-05, |
|
"loss": 0.4583, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.76360891834455e-05, |
|
"loss": 0.4499, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7619791698934077e-05, |
|
"loss": 0.4263, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7603445813008685e-05, |
|
"loss": 0.3721, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7587051629499452e-05, |
|
"loss": 0.3788, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7570609252543302e-05, |
|
"loss": 0.4405, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.755411878658329e-05, |
|
"loss": 0.4939, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7537580336367925e-05, |
|
"loss": 0.4188, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7520994006950526e-05, |
|
"loss": 0.4425, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7504359903688537e-05, |
|
"loss": 0.37, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.748767813224287e-05, |
|
"loss": 0.4633, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.747094879857722e-05, |
|
"loss": 0.3729, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.7454172008957417e-05, |
|
"loss": 0.4312, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.7437347869950713e-05, |
|
"loss": 0.4367, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.7420476488425138e-05, |
|
"loss": 0.4091, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.740355797154881e-05, |
|
"loss": 0.469, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.7386592426789252e-05, |
|
"loss": 0.4872, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.7369579961912712e-05, |
|
"loss": 0.4932, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.7352520684983474e-05, |
|
"loss": 0.3848, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.7335414704363178e-05, |
|
"loss": 0.3694, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.7318262128710132e-05, |
|
"loss": 0.5099, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.7301063066978617e-05, |
|
"loss": 0.4407, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.728381762841819e-05, |
|
"loss": 0.4409, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.7266525922573e-05, |
|
"loss": 0.4444, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.72491880592811e-05, |
|
"loss": 0.4079, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.7231804148673717e-05, |
|
"loss": 0.4502, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.7214374301174594e-05, |
|
"loss": 0.49, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.719689862749926e-05, |
|
"loss": 0.4778, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.7179377238654325e-05, |
|
"loss": 0.3734, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.716181024593681e-05, |
|
"loss": 0.4956, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.714419776093338e-05, |
|
"loss": 0.3712, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.7126539895519698e-05, |
|
"loss": 0.3779, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.710883676185968e-05, |
|
"loss": 0.4457, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.709108847240478e-05, |
|
"loss": 0.4161, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.7073295139893296e-05, |
|
"loss": 0.4459, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.705545687734963e-05, |
|
"loss": 0.4465, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.7037573798083598e-05, |
|
"loss": 0.4284, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.701964601568968e-05, |
|
"loss": 0.3806, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.7001673644046322e-05, |
|
"loss": 0.4591, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.6983656797315197e-05, |
|
"loss": 0.4809, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.6965595589940496e-05, |
|
"loss": 0.3811, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.6947490136648182e-05, |
|
"loss": 0.4223, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.6929340552445283e-05, |
|
"loss": 0.4698, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.6911146952619132e-05, |
|
"loss": 0.4059, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.689290945273667e-05, |
|
"loss": 0.4451, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.6874628168643683e-05, |
|
"loss": 0.3428, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.685630321646408e-05, |
|
"loss": 0.4552, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.683793471259915e-05, |
|
"loss": 0.6389, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.681952277372683e-05, |
|
"loss": 0.4356, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.680106751680096e-05, |
|
"loss": 0.4199, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.6782569059050535e-05, |
|
"loss": 0.4369, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.676402751797896e-05, |
|
"loss": 0.3924, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.674544301136332e-05, |
|
"loss": 0.386, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.672681565725361e-05, |
|
"loss": 0.464, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.6708145573972005e-05, |
|
"loss": 0.4597, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.6689432880112078e-05, |
|
"loss": 0.4164, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.6670677694538096e-05, |
|
"loss": 0.3761, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.6651880136384215e-05, |
|
"loss": 0.4499, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.6633040325053746e-05, |
|
"loss": 0.438, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.661415838021841e-05, |
|
"loss": 0.4526, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.659523442181754e-05, |
|
"loss": 0.4427, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.6576268570057363e-05, |
|
"loss": 0.5268, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.655726094541021e-05, |
|
"loss": 0.4135, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.653821166861374e-05, |
|
"loss": 0.42, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.6519120860670215e-05, |
|
"loss": 0.449, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.6499988642845686e-05, |
|
"loss": 0.4751, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6480815136669248e-05, |
|
"loss": 0.3826, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6461600463932266e-05, |
|
"loss": 0.4712, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6442344746687594e-05, |
|
"loss": 0.4128, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.64230481072488e-05, |
|
"loss": 0.4679, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.640371066818941e-05, |
|
"loss": 0.4768, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.638433255234208e-05, |
|
"loss": 0.4785, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.6364913882797875e-05, |
|
"loss": 0.4334, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.6345454782905454e-05, |
|
"loss": 0.4015, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.6325955376270286e-05, |
|
"loss": 0.439, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.630641578675387e-05, |
|
"loss": 0.3951, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6296630962191733e-05, |
|
"loss": 0.5453, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.62770313311519e-05, |
|
"loss": 0.3902, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.625739182799955e-05, |
|
"loss": 0.3943, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.6237712577486092e-05, |
|
"loss": 0.3312, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.62179937046154e-05, |
|
"loss": 0.4366, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.6198235334643045e-05, |
|
"loss": 0.3924, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.6178437593075487e-05, |
|
"loss": 0.378, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6158600605669264e-05, |
|
"loss": 0.4624, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.613872449843022e-05, |
|
"loss": 0.4411, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6118809397612678e-05, |
|
"loss": 0.4695, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6098855429718662e-05, |
|
"loss": 0.4348, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.607886272149708e-05, |
|
"loss": 0.4048, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6058831399942917e-05, |
|
"loss": 0.3485, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6038761592296435e-05, |
|
"loss": 0.4146, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6018653426042357e-05, |
|
"loss": 0.4398, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.5998507028909074e-05, |
|
"loss": 0.5815, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.597832252886781e-05, |
|
"loss": 0.4502, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.5958100054131828e-05, |
|
"loss": 0.4275, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.5937839733155603e-05, |
|
"loss": 0.4269, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.591754169463402e-05, |
|
"loss": 0.4211, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5897206067501544e-05, |
|
"loss": 0.4194, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5876832980931405e-05, |
|
"loss": 0.3833, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5856422564334772e-05, |
|
"loss": 0.4176, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5835974947359952e-05, |
|
"loss": 0.5327, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.581549025989154e-05, |
|
"loss": 0.4776, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.5794968632049598e-05, |
|
"loss": 0.3573, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.5774410194188856e-05, |
|
"loss": 0.464, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.5753815076897848e-05, |
|
"loss": 0.4549, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.57331834109981e-05, |
|
"loss": 0.463, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5712515327543307e-05, |
|
"loss": 0.4438, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5691810957818475e-05, |
|
"loss": 0.4306, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5671070433339116e-05, |
|
"loss": 0.4135, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.5650293885850393e-05, |
|
"loss": 0.4706, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.5629481447326297e-05, |
|
"loss": 0.4427, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.5608633249968783e-05, |
|
"loss": 0.4661, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.558774942620697e-05, |
|
"loss": 0.3674, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.5566830108696265e-05, |
|
"loss": 0.4204, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5545875430317546e-05, |
|
"loss": 0.4685, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5524885524176287e-05, |
|
"loss": 0.4583, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.550386052360174e-05, |
|
"loss": 0.4306, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.548280056214609e-05, |
|
"loss": 0.4203, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.546170577358358e-05, |
|
"loss": 0.4664, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.544057629190969e-05, |
|
"loss": 0.3553, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.541941225134025e-05, |
|
"loss": 0.368, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.5398213786310643e-05, |
|
"loss": 0.3176, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.537698103147489e-05, |
|
"loss": 0.2801, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.5355714121704846e-05, |
|
"loss": 0.2576, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.53344131920893e-05, |
|
"loss": 0.2689, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.531307837793315e-05, |
|
"loss": 0.3045, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.529170981475653e-05, |
|
"loss": 0.2506, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.5270307638293943e-05, |
|
"loss": 0.2546, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.524887198449341e-05, |
|
"loss": 0.2853, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.5227402989515607e-05, |
|
"loss": 0.2772, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.5205900789732986e-05, |
|
"loss": 0.2763, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.5184365521728928e-05, |
|
"loss": 0.2578, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.5162797322296855e-05, |
|
"loss": 0.3121, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.5141196328439377e-05, |
|
"loss": 0.3037, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.5119562677367421e-05, |
|
"loss": 0.2877, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.5097896506499349e-05, |
|
"loss": 0.2856, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.5076197953460087e-05, |
|
"loss": 0.3417, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.5054467156080262e-05, |
|
"loss": 0.285, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.5032704252395315e-05, |
|
"loss": 0.3137, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.5010909380644636e-05, |
|
"loss": 0.2204, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.4989082679270668e-05, |
|
"loss": 0.2808, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.496722428691804e-05, |
|
"loss": 0.2691, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.4945334342432688e-05, |
|
"loss": 0.2638, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.492341298486097e-05, |
|
"loss": 0.26, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.490146035344878e-05, |
|
"loss": 0.2764, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.4879476587640657e-05, |
|
"loss": 0.2558, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.4868473072968645e-05, |
|
"loss": 0.5349, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.4846442867457533e-05, |
|
"loss": 0.2937, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.4824381877025154e-05, |
|
"loss": 0.2684, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.4802290241804355e-05, |
|
"loss": 0.2491, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.478016810212265e-05, |
|
"loss": 0.2634, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.4758015598501308e-05, |
|
"loss": 0.2889, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.473583287165448e-05, |
|
"loss": 0.2843, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.4713620062488296e-05, |
|
"loss": 0.2705, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.4691377312099965e-05, |
|
"loss": 0.2765, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.4669104761776892e-05, |
|
"loss": 0.2595, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.4646802552995767e-05, |
|
"loss": 0.2101, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.4624470827421675e-05, |
|
"loss": 0.263, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.4602109726907197e-05, |
|
"loss": 0.2592, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.4579719393491496e-05, |
|
"loss": 0.2732, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.455729996939944e-05, |
|
"loss": 0.3056, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.4534851597040666e-05, |
|
"loss": 0.2886, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.45123744190087e-05, |
|
"loss": 0.2493, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.4489868578080046e-05, |
|
"loss": 0.271, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.4467334217213274e-05, |
|
"loss": 0.2752, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.4444771479548115e-05, |
|
"loss": 0.3108, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.4422180508404544e-05, |
|
"loss": 0.2946, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.439956144728189e-05, |
|
"loss": 0.2401, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.4376914439857905e-05, |
|
"loss": 0.3501, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.4354239629987857e-05, |
|
"loss": 0.2895, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.4331537161703612e-05, |
|
"loss": 0.2632, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.4308807179212736e-05, |
|
"loss": 0.261, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.4286049826897559e-05, |
|
"loss": 0.3207, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.4263265249314269e-05, |
|
"loss": 0.2592, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.4240453591191984e-05, |
|
"loss": 0.2468, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.4217614997431847e-05, |
|
"loss": 0.2483, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.41947496131061e-05, |
|
"loss": 0.2657, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.4171857583457154e-05, |
|
"loss": 0.2389, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.4148939053896669e-05, |
|
"loss": 0.2404, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.4125994170004644e-05, |
|
"loss": 0.2539, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.4103023077528482e-05, |
|
"loss": 0.2721, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.4080025922382056e-05, |
|
"loss": 0.3314, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.4057002850644796e-05, |
|
"loss": 0.2668, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.4033954008560758e-05, |
|
"loss": 0.2295, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.401087954253769e-05, |
|
"loss": 0.284, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.3987779599146105e-05, |
|
"loss": 0.2595, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.396465432511835e-05, |
|
"loss": 0.2849, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.3941503867347672e-05, |
|
"loss": 0.271, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.3918328372887295e-05, |
|
"loss": 0.2943, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.3895127988949471e-05, |
|
"loss": 0.2751, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.3871902862904544e-05, |
|
"loss": 0.276, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.3848653142280037e-05, |
|
"loss": 0.2251, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.3825378974759696e-05, |
|
"loss": 0.2722, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.3802080508182543e-05, |
|
"loss": 0.2927, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.377875789054196e-05, |
|
"loss": 0.2473, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.376708757136279e-05, |
|
"loss": 0.3166, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.3743729004949972e-05, |
|
"loss": 0.3079, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.3720346658126286e-05, |
|
"loss": 0.2695, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.3696940679417918e-05, |
|
"loss": 0.3125, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.3673511217501172e-05, |
|
"loss": 0.2874, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.3650058421201517e-05, |
|
"loss": 0.31, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.362658243949265e-05, |
|
"loss": 0.2795, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.3603083421495535e-05, |
|
"loss": 0.2693, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.3579561516477467e-05, |
|
"loss": 0.2659, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.355601687385112e-05, |
|
"loss": 0.2909, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.3532449643173604e-05, |
|
"loss": 0.262, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.3508859974145504e-05, |
|
"loss": 0.2538, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.3485248016609937e-05, |
|
"loss": 0.2674, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.3461613920551598e-05, |
|
"loss": 0.2863, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.3437957836095804e-05, |
|
"loss": 0.3213, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.3414279913507548e-05, |
|
"loss": 0.2932, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.3390580303190541e-05, |
|
"loss": 0.2604, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.3366859155686253e-05, |
|
"loss": 0.275, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.3343116621672959e-05, |
|
"loss": 0.2625, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.3319352851964787e-05, |
|
"loss": 0.2664, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.3295567997510747e-05, |
|
"loss": 0.2567, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.3271762209393793e-05, |
|
"loss": 0.2469, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.3247935638829838e-05, |
|
"loss": 0.2596, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.3224088437166818e-05, |
|
"loss": 0.3033, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.320022075588371e-05, |
|
"loss": 0.3582, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.3176332746589587e-05, |
|
"loss": 0.2339, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.3152424561022634e-05, |
|
"loss": 0.2622, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.3128496351049216e-05, |
|
"loss": 0.2388, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.3104548268662873e-05, |
|
"loss": 0.2322, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.3080580465983397e-05, |
|
"loss": 0.3108, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.3056593095255825e-05, |
|
"loss": 0.2339, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.3032586308849512e-05, |
|
"loss": 0.2731, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.3008560259257117e-05, |
|
"loss": 0.2677, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.2984515099093687e-05, |
|
"loss": 0.2907, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.2960450981095643e-05, |
|
"loss": 0.2836, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.2936368058119828e-05, |
|
"loss": 0.2621, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.2912266483142545e-05, |
|
"loss": 0.3009, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.2888146409258575e-05, |
|
"loss": 0.252, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.2864007989680194e-05, |
|
"loss": 0.3354, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.2839851377736216e-05, |
|
"loss": 0.2908, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.281567672687102e-05, |
|
"loss": 0.284, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.2791484190643571e-05, |
|
"loss": 0.2882, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.2767273922726427e-05, |
|
"loss": 0.3096, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.2743046076904795e-05, |
|
"loss": 0.2674, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.271880080707553e-05, |
|
"loss": 0.2207, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.2694538267246168e-05, |
|
"loss": 0.2605, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.2670258611533947e-05, |
|
"loss": 0.2598, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.2645961994164822e-05, |
|
"loss": 0.2856, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.2621648569472491e-05, |
|
"loss": 0.2436, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.2597318491897416e-05, |
|
"loss": 0.2606, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.257297191598584e-05, |
|
"loss": 0.2602, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.2548608996388792e-05, |
|
"loss": 0.2465, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.2524229887861132e-05, |
|
"loss": 0.2536, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.2499834745260553e-05, |
|
"loss": 0.2859, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.2475423723546584e-05, |
|
"loss": 0.2539, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.245099697777963e-05, |
|
"loss": 0.2299, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.2426554663119975e-05, |
|
"loss": 0.4215, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.2402096934826794e-05, |
|
"loss": 0.2575, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.237762394825718e-05, |
|
"loss": 0.3257, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.2353135858865128e-05, |
|
"loss": 0.2778, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.232863282220059e-05, |
|
"loss": 0.2592, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.230411499390845e-05, |
|
"loss": 0.2539, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.2279582529727552e-05, |
|
"loss": 0.2831, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.2255035585489705e-05, |
|
"loss": 0.2806, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.2230474317118708e-05, |
|
"loss": 0.2777, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.2205898880629336e-05, |
|
"loss": 0.3334, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.2181309432126366e-05, |
|
"loss": 0.302, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.2156706127803578e-05, |
|
"loss": 0.2659, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.2132089123942764e-05, |
|
"loss": 0.297, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.2107458576912743e-05, |
|
"loss": 0.3207, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.2082814643168357e-05, |
|
"loss": 0.2224, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.2058157479249475e-05, |
|
"loss": 0.295, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.2033487241780014e-05, |
|
"loss": 0.2238, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.2008804087466931e-05, |
|
"loss": 0.277, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.1984108173099238e-05, |
|
"loss": 0.2906, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.1959399655546989e-05, |
|
"loss": 0.2649, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.1934678691760296e-05, |
|
"loss": 0.3147, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.190994543876834e-05, |
|
"loss": 0.2761, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.188520005367836e-05, |
|
"loss": 0.2252, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.1860442693674648e-05, |
|
"loss": 0.2521, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.1835673516017571e-05, |
|
"loss": 0.2618, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.1810892678042565e-05, |
|
"loss": 0.2869, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.1786100337159132e-05, |
|
"loss": 0.2124, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.177369990233723e-05, |
|
"loss": 0.3826, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.1748890602393521e-05, |
|
"loss": 0.2805, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.172407019338261e-05, |
|
"loss": 0.2467, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.1699238832965358e-05, |
|
"loss": 0.2405, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.1674396678872186e-05, |
|
"loss": 0.3017, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.164954388890207e-05, |
|
"loss": 0.2584, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.162468062092156e-05, |
|
"loss": 0.3141, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.1599807032863756e-05, |
|
"loss": 0.3254, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.1574923282727314e-05, |
|
"loss": 0.2703, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.1550029528575428e-05, |
|
"loss": 0.2207, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.152512592853486e-05, |
|
"loss": 0.2634, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.1500212640794895e-05, |
|
"loss": 0.3368, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.1475289823606364e-05, |
|
"loss": 0.2535, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.1450357635280628e-05, |
|
"loss": 0.287, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.1425416234188578e-05, |
|
"loss": 0.3052, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.1400465778759611e-05, |
|
"loss": 0.2909, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.1375506427480658e-05, |
|
"loss": 0.2904, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.135053833889514e-05, |
|
"loss": 0.339, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.1325561671601987e-05, |
|
"loss": 0.292, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.1300576584254617e-05, |
|
"loss": 0.2424, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.127558323555994e-05, |
|
"loss": 0.267, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.125058178427733e-05, |
|
"loss": 0.2641, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.1225572389217643e-05, |
|
"loss": 0.269, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.1200555209242182e-05, |
|
"loss": 0.2903, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.1175530403261716e-05, |
|
"loss": 0.2622, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.1150498130235435e-05, |
|
"loss": 0.3199, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.1125458549169977e-05, |
|
"loss": 0.2469, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.1100411819118387e-05, |
|
"loss": 0.2781, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.1075358099179136e-05, |
|
"loss": 0.3293, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.1050297548495084e-05, |
|
"loss": 0.2065, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.1025230326252484e-05, |
|
"loss": 0.2548, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.1000156591679971e-05, |
|
"loss": 0.3063, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.0975076504047535e-05, |
|
"loss": 0.3099, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0949990222665532e-05, |
|
"loss": 0.2805, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0924897906883663e-05, |
|
"loss": 0.288, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0899799716089949e-05, |
|
"loss": 0.3014, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0874695809709737e-05, |
|
"loss": 0.2768, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0849586347204677e-05, |
|
"loss": 0.2894, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0824471488071714e-05, |
|
"loss": 0.2718, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0799351391842074e-05, |
|
"loss": 0.2476, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0774226218080244e-05, |
|
"loss": 0.2318, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0749096126382965e-05, |
|
"loss": 0.2545, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.0723961276378225e-05, |
|
"loss": 0.2708, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.0698821827724225e-05, |
|
"loss": 0.3471, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.0673677940108386e-05, |
|
"loss": 0.2528, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.0648529773246324e-05, |
|
"loss": 0.2625, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0623377486880831e-05, |
|
"loss": 0.2634, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0598221240780874e-05, |
|
"loss": 0.2506, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0573061194740568e-05, |
|
"loss": 0.2659, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.054789750857817e-05, |
|
"loss": 0.239, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.052273034213505e-05, |
|
"loss": 0.2465, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.0497559855274699e-05, |
|
"loss": 0.2512, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.0472386207881684e-05, |
|
"loss": 0.303, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.0447209559860658e-05, |
|
"loss": 0.2542, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.0422030071135336e-05, |
|
"loss": 0.2995, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.0396847901647469e-05, |
|
"loss": 0.2597, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.037166321135584e-05, |
|
"loss": 0.2773, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.0346476160235246e-05, |
|
"loss": 0.2771, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.0321286908275476e-05, |
|
"loss": 0.2906, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.0296095615480309e-05, |
|
"loss": 0.3002, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.0270902441866474e-05, |
|
"loss": 0.2267, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.0245707547462654e-05, |
|
"loss": 0.2545, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.0220511092308463e-05, |
|
"loss": 0.2412, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.0195313236453431e-05, |
|
"loss": 0.2567, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.0170114139955975e-05, |
|
"loss": 0.2589, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.0144913962882406e-05, |
|
"loss": 0.2834, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.0119712865305891e-05, |
|
"loss": 0.2504, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.0094511007305445e-05, |
|
"loss": 0.2788, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.0069308548964915e-05, |
|
"loss": 0.2664, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.0044105650371961e-05, |
|
"loss": 0.2695, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.0018902471617037e-05, |
|
"loss": 0.2309, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.993699172792381e-06, |
|
"loss": 0.2949, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.96849591399099e-06, |
|
"loss": 0.3248, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.943292855305611e-06, |
|
"loss": 0.2719, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.918090156827712e-06, |
|
"loss": 0.2417, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.892887978646483e-06, |
|
"loss": 0.2371, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.867686480847801e-06, |
|
"loss": 0.271, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.842485823513222e-06, |
|
"loss": 0.2791, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.817286166718971e-06, |
|
"loss": 0.2759, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.792087670534908e-06, |
|
"loss": 0.2867, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.766890495023522e-06, |
|
"loss": 0.2964, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.741694800238923e-06, |
|
"loss": 0.2466, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.716500746225802e-06, |
|
"loss": 0.2745, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.691308493018439e-06, |
|
"loss": 0.2429, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.666118200639667e-06, |
|
"loss": 0.2561, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.640930029099863e-06, |
|
"loss": 0.2462, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.615744138395941e-06, |
|
"loss": 0.2294, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.590560688510323e-06, |
|
"loss": 0.2462, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.565379839409916e-06, |
|
"loss": 0.2755, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.540201751045127e-06, |
|
"loss": 0.2623, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.515026583348811e-06, |
|
"loss": 0.3047, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.489854496235278e-06, |
|
"loss": 0.2489, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.464685649599266e-06, |
|
"loss": 0.23, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.439520203314927e-06, |
|
"loss": 0.2517, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.414358317234826e-06, |
|
"loss": 0.3041, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.3892001511889e-06, |
|
"loss": 0.2922, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.364045864983454e-06, |
|
"loss": 0.2451, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.338895618400168e-06, |
|
"loss": 0.2457, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.313749571195041e-06, |
|
"loss": 0.2488, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.28860788309741e-06, |
|
"loss": 0.2599, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.263470713808917e-06, |
|
"loss": 0.2192, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.238338223002496e-06, |
|
"loss": 0.2296, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.213210570321374e-06, |
|
"loss": 0.2907, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.188087915378037e-06, |
|
"loss": 0.2644, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.162970417753229e-06, |
|
"loss": 0.2307, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.137858236994932e-06, |
|
"loss": 0.2493, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.112751532617361e-06, |
|
"loss": 0.2546, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.087650464099937e-06, |
|
"loss": 0.2835, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.062555190886287e-06, |
|
"loss": 0.2701, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.037465872383219e-06, |
|
"loss": 0.2514, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.012382667959724e-06, |
|
"loss": 0.2423, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 8.987305736945955e-06, |
|
"loss": 0.2289, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 8.962235238632208e-06, |
|
"loss": 0.2504, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.937171332267927e-06, |
|
"loss": 0.2912, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.912114177060681e-06, |
|
"loss": 0.2368, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.887063932175156e-06, |
|
"loss": 0.2823, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.862020756732141e-06, |
|
"loss": 0.2289, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.836984809807514e-06, |
|
"loss": 0.2332, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.811956250431253e-06, |
|
"loss": 0.2627, |
|
"step": 1394 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.786935237586394e-06, |
|
"loss": 0.2613, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.761921930208044e-06, |
|
"loss": 0.2353, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.73691648718236e-06, |
|
"loss": 0.2668, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.71191906734555e-06, |
|
"loss": 0.2292, |
|
"step": 1402 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.686929829482862e-06, |
|
"loss": 0.2577, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.661948932327558e-06, |
|
"loss": 0.2177, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.636976534559926e-06, |
|
"loss": 0.2849, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.61201279480627e-06, |
|
"loss": 0.2575, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.587057871637891e-06, |
|
"loss": 0.267, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.562111923570091e-06, |
|
"loss": 0.2572, |
|
"step": 1414 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.537175109061154e-06, |
|
"loss": 0.2546, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.512247586511354e-06, |
|
"loss": 0.3069, |
|
"step": 1418 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.487329514261948e-06, |
|
"loss": 0.2915, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.46242105059415e-06, |
|
"loss": 0.3033, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.437522353728147e-06, |
|
"loss": 0.2389, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.412633581822086e-06, |
|
"loss": 0.4257, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.387754892971073e-06, |
|
"loss": 0.2206, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.36288644520616e-06, |
|
"loss": 0.2217, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.338028396493345e-06, |
|
"loss": 0.27, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.313180904732578e-06, |
|
"loss": 0.266, |
|
"step": 1434 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.288344127756755e-06, |
|
"loss": 0.2922, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.263518223330698e-06, |
|
"loss": 0.2122, |
|
"step": 1438 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.238703349150169e-06, |
|
"loss": 0.2822, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.213899662840871e-06, |
|
"loss": 0.2687, |
|
"step": 1442 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.189107321957437e-06, |
|
"loss": 0.2783, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.164326483982434e-06, |
|
"loss": 0.2499, |
|
"step": 1446 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.139557306325359e-06, |
|
"loss": 0.2408, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.114799946321647e-06, |
|
"loss": 0.2584, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.090054561231659e-06, |
|
"loss": 0.237, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.065321308239706e-06, |
|
"loss": 0.2553, |
|
"step": 1454 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.040600344453013e-06, |
|
"loss": 0.2224, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.015891826900764e-06, |
|
"loss": 0.3115, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 7.99119591253307e-06, |
|
"loss": 0.223, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 7.966512758219991e-06, |
|
"loss": 0.2604, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 7.941842520750529e-06, |
|
"loss": 0.262, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 7.91718535683165e-06, |
|
"loss": 0.2583, |
|
"step": 1466 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.892541423087258e-06, |
|
"loss": 0.2318, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.867910876057238e-06, |
|
"loss": 0.2489, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.843293872196425e-06, |
|
"loss": 0.2609, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.818690567873637e-06, |
|
"loss": 0.2592, |
|
"step": 1474 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.794101119370668e-06, |
|
"loss": 0.269, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.769525682881295e-06, |
|
"loss": 0.2532, |
|
"step": 1478 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.744964414510297e-06, |
|
"loss": 0.2223, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.720417470272455e-06, |
|
"loss": 0.2525, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.695885006091552e-06, |
|
"loss": 0.2701, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.67136717779941e-06, |
|
"loss": 0.3059, |
|
"step": 1486 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.646864141134874e-06, |
|
"loss": 0.2591, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.622376051742824e-06, |
|
"loss": 0.2618, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.5979030651732065e-06, |
|
"loss": 0.2814, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.573445336880029e-06, |
|
"loss": 0.2389, |
|
"step": 1494 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.549003022220374e-06, |
|
"loss": 0.2347, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.524576276453422e-06, |
|
"loss": 0.2642, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.500165254739453e-06, |
|
"loss": 0.2591, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.475770112138867e-06, |
|
"loss": 0.2591, |
|
"step": 1502 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.4513910036112105e-06, |
|
"loss": 0.2878, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.427028084014163e-06, |
|
"loss": 0.305, |
|
"step": 1506 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.402681508102585e-06, |
|
"loss": 0.263, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.378351430527511e-06, |
|
"loss": 0.2868, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.35403800583518e-06, |
|
"loss": 0.2299, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.329741388466056e-06, |
|
"loss": 0.2942, |
|
"step": 1514 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.305461732753836e-06, |
|
"loss": 0.2993, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.281199192924473e-06, |
|
"loss": 0.263, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.256953923095209e-06, |
|
"loss": 0.2591, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.232726077273575e-06, |
|
"loss": 0.2612, |
|
"step": 1522 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.208515809356434e-06, |
|
"loss": 0.2354, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.184323273128981e-06, |
|
"loss": 0.2504, |
|
"step": 1526 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.160148622263786e-06, |
|
"loss": 0.2437, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.135992010319812e-06, |
|
"loss": 0.2154, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.123920516899151e-06, |
|
"loss": 0.2963, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.0997912510091335e-06, |
|
"loss": 0.2307, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.075680407434289e-06, |
|
"loss": 0.2826, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.051588139328276e-06, |
|
"loss": 0.2344, |
|
"step": 1538 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.0275145997267544e-06, |
|
"loss": 0.3482, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.0034599415464135e-06, |
|
"loss": 0.2818, |
|
"step": 1542 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 6.979424317584014e-06, |
|
"loss": 0.3444, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 6.955407880515404e-06, |
|
"loss": 0.2712, |
|
"step": 1546 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 6.931410782894563e-06, |
|
"loss": 0.2794, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 6.907433177152618e-06, |
|
"loss": 0.2701, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.883475215596882e-06, |
|
"loss": 0.2943, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.859537050409895e-06, |
|
"loss": 0.2488, |
|
"step": 1554 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.835618833648443e-06, |
|
"loss": 0.2451, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.8117207172425996e-06, |
|
"loss": 0.3123, |
|
"step": 1558 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.787842852994757e-06, |
|
"loss": 0.2522, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 6.763985392578667e-06, |
|
"loss": 0.2766, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 6.740148487538476e-06, |
|
"loss": 0.2473, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 6.716332289287759e-06, |
|
"loss": 0.214, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 6.692536949108562e-06, |
|
"loss": 0.2742, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 6.6687626181504315e-06, |
|
"loss": 0.2348, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 6.64500944742948e-06, |
|
"loss": 0.2363, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 6.6212775878273925e-06, |
|
"loss": 0.2686, |
|
"step": 1574 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 6.59756719009049e-06, |
|
"loss": 0.2852, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.5738784048287615e-06, |
|
"loss": 0.2272, |
|
"step": 1578 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.550211382514922e-06, |
|
"loss": 0.2975, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.526566273483439e-06, |
|
"loss": 0.2563, |
|
"step": 1582 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.502943227929586e-06, |
|
"loss": 0.2971, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.479342395908487e-06, |
|
"loss": 0.2601, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.455763927334177e-06, |
|
"loss": 0.258, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.432207971978619e-06, |
|
"loss": 0.2953, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.4086746794707795e-06, |
|
"loss": 0.2961, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.385164199295666e-06, |
|
"loss": 0.262, |
|
"step": 1594 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.3616766807933875e-06, |
|
"loss": 0.2151, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.338212273158188e-06, |
|
"loss": 0.2525, |
|
"step": 1598 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.314771125437517e-06, |
|
"loss": 0.2393, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.291353386531074e-06, |
|
"loss": 0.2758, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.2679592051898685e-06, |
|
"loss": 0.2312, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.244588730015264e-06, |
|
"loss": 0.2428, |
|
"step": 1606 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.221242109458043e-06, |
|
"loss": 0.2551, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.197919491817459e-06, |
|
"loss": 0.2715, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.174621025240307e-06, |
|
"loss": 0.2359, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.151346857719964e-06, |
|
"loss": 0.2671, |
|
"step": 1614 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.128097137095458e-06, |
|
"loss": 0.1967, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.104872011050534e-06, |
|
"loss": 0.2563, |
|
"step": 1618 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.081671627112704e-06, |
|
"loss": 0.2613, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.0584961326523285e-06, |
|
"loss": 0.2962, |
|
"step": 1622 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.0353456748816545e-06, |
|
"loss": 0.2087, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.012220400853899e-06, |
|
"loss": 0.2902, |
|
"step": 1626 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.989120457462314e-06, |
|
"loss": 0.2565, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.9660459914392465e-06, |
|
"loss": 0.243, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.942997149355208e-06, |
|
"loss": 0.2895, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.9199740776179494e-06, |
|
"loss": 0.3008, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.89697692247152e-06, |
|
"loss": 0.2124, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.874005829995358e-06, |
|
"loss": 0.2094, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.851060946103334e-06, |
|
"loss": 0.2355, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.828142416542852e-06, |
|
"loss": 0.304, |
|
"step": 1642 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.8052503868939005e-06, |
|
"loss": 0.213, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.782385002568153e-06, |
|
"loss": 0.2471, |
|
"step": 1646 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.759546408808019e-06, |
|
"loss": 0.2432, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.736734750685737e-06, |
|
"loss": 0.2565, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.713950173102441e-06, |
|
"loss": 0.2502, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.691192820787266e-06, |
|
"loss": 0.2105, |
|
"step": 1654 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.6684628382963905e-06, |
|
"loss": 0.2437, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.645760370012149e-06, |
|
"loss": 0.2149, |
|
"step": 1658 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.623085560142099e-06, |
|
"loss": 0.29, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.60043855271811e-06, |
|
"loss": 0.2718, |
|
"step": 1662 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.577819491595457e-06, |
|
"loss": 0.2147, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.555228520451891e-06, |
|
"loss": 0.1925, |
|
"step": 1666 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.53266578278673e-06, |
|
"loss": 0.2404, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.510131421919955e-06, |
|
"loss": 0.2405, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.487625580991303e-06, |
|
"loss": 0.1999, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.465148402959339e-06, |
|
"loss": 0.2185, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.442700030600565e-06, |
|
"loss": 0.3006, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.420280606508503e-06, |
|
"loss": 0.2406, |
|
"step": 1678 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.397890273092807e-06, |
|
"loss": 0.2317, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.375529172578329e-06, |
|
"loss": 0.2616, |
|
"step": 1682 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.353197447004239e-06, |
|
"loss": 0.1408, |
|
"step": 1684 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.33089523822311e-06, |
|
"loss": 0.1778, |
|
"step": 1686 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.308622687900038e-06, |
|
"loss": 0.1207, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.286379937511707e-06, |
|
"loss": 0.1327, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.264167128345523e-06, |
|
"loss": 0.125, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.241984401498693e-06, |
|
"loss": 0.17, |
|
"step": 1694 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.219831897877353e-06, |
|
"loss": 0.159, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.197709758195648e-06, |
|
"loss": 0.1156, |
|
"step": 1698 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.175618122974851e-06, |
|
"loss": 0.1356, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.153557132542473e-06, |
|
"loss": 0.1645, |
|
"step": 1702 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.131526927031356e-06, |
|
"loss": 0.1626, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.109527646378815e-06, |
|
"loss": 0.1278, |
|
"step": 1706 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.087559430325708e-06, |
|
"loss": 0.136, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.0656224184155764e-06, |
|
"loss": 0.1141, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.043716749993757e-06, |
|
"loss": 0.1444, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.02184256420648e-06, |
|
"loss": 0.1151, |
|
"step": 1714 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.000000000000003e-06, |
|
"loss": 0.1474, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.978189196119716e-06, |
|
"loss": 0.121, |
|
"step": 1718 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.9564102911092646e-06, |
|
"loss": 0.1284, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.934663423309685e-06, |
|
"loss": 0.153, |
|
"step": 1722 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.912948730858492e-06, |
|
"loss": 0.145, |
|
"step": 1724 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.891266351688829e-06, |
|
"loss": 0.1167, |
|
"step": 1726 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.869616423528588e-06, |
|
"loss": 0.1338, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.847999083899522e-06, |
|
"loss": 0.1208, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.826414470116382e-06, |
|
"loss": 0.1403, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.804862719286044e-06, |
|
"loss": 0.1463, |
|
"step": 1734 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.783343968306631e-06, |
|
"loss": 0.1276, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.7618583538666605e-06, |
|
"loss": 0.1242, |
|
"step": 1738 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.740406012444153e-06, |
|
"loss": 0.1402, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.718987080305778e-06, |
|
"loss": 0.3846, |
|
"step": 1742 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.697601693505996e-06, |
|
"loss": 0.161, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.6762499878861764e-06, |
|
"loss": 0.1102, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.654932099073746e-06, |
|
"loss": 0.1343, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.633648162481326e-06, |
|
"loss": 0.146, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.612398313305867e-06, |
|
"loss": 0.1533, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.5911826865277975e-06, |
|
"loss": 0.1346, |
|
"step": 1754 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.570001416910168e-06, |
|
"loss": 0.1579, |
|
"step": 1756 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.548854638997778e-06, |
|
"loss": 0.1382, |
|
"step": 1758 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.527742487116349e-06, |
|
"loss": 0.133, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.506665095371642e-06, |
|
"loss": 0.121, |
|
"step": 1762 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.485622597648624e-06, |
|
"loss": 0.1225, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.464615127610615e-06, |
|
"loss": 0.1139, |
|
"step": 1766 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.443642818698434e-06, |
|
"loss": 0.1329, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.4227058041295515e-06, |
|
"loss": 0.1131, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.401804216897258e-06, |
|
"loss": 0.1301, |
|
"step": 1772 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.380938189769791e-06, |
|
"loss": 0.1443, |
|
"step": 1774 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.3601078552895245e-06, |
|
"loss": 0.1306, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.339313345772098e-06, |
|
"loss": 0.1501, |
|
"step": 1778 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.318554793305592e-06, |
|
"loss": 0.1076, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.297832329749687e-06, |
|
"loss": 0.1671, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.277146086734823e-06, |
|
"loss": 0.1364, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.2564961956613605e-06, |
|
"loss": 0.1347, |
|
"step": 1786 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.235882787698763e-06, |
|
"loss": 0.1667, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.2153059937847355e-06, |
|
"loss": 0.1366, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.194765944624423e-06, |
|
"loss": 0.1142, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.174262770689552e-06, |
|
"loss": 0.1188, |
|
"step": 1794 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.153796602217623e-06, |
|
"loss": 0.1068, |
|
"step": 1796 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.133367569211074e-06, |
|
"loss": 0.1359, |
|
"step": 1798 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.112975801436454e-06, |
|
"loss": 0.1584, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.092621428423601e-06, |
|
"loss": 0.1308, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.07230457946482e-06, |
|
"loss": 0.1324, |
|
"step": 1804 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.052025383614061e-06, |
|
"loss": 0.1339, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.031783969686105e-06, |
|
"loss": 0.1178, |
|
"step": 1808 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.011580466255729e-06, |
|
"loss": 0.127, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 3.991415001656906e-06, |
|
"loss": 0.1334, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.971287703981982e-06, |
|
"loss": 0.1201, |
|
"step": 1814 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.9511987010808635e-06, |
|
"loss": 0.1435, |
|
"step": 1816 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.931148120560211e-06, |
|
"loss": 0.1394, |
|
"step": 1818 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.911136089782613e-06, |
|
"loss": 0.1229, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.8911627358658e-06, |
|
"loss": 0.123, |
|
"step": 1822 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.871228185681822e-06, |
|
"loss": 0.1433, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.8513325658562395e-06, |
|
"loss": 0.1238, |
|
"step": 1826 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.831476002767327e-06, |
|
"loss": 0.1456, |
|
"step": 1828 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.811658622545268e-06, |
|
"loss": 0.1478, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.7918805510713553e-06, |
|
"loss": 0.1269, |
|
"step": 1832 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.7721419139771886e-06, |
|
"loss": 0.1379, |
|
"step": 1834 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.7524428366438757e-06, |
|
"loss": 0.1365, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.7327834442012433e-06, |
|
"loss": 0.1263, |
|
"step": 1838 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.7131638615270404e-06, |
|
"loss": 0.1055, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.6935842132461307e-06, |
|
"loss": 0.1053, |
|
"step": 1842 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.6740446237297177e-06, |
|
"loss": 0.1259, |
|
"step": 1844 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.6545452170945496e-06, |
|
"loss": 0.1428, |
|
"step": 1846 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.635086117202128e-06, |
|
"loss": 0.1375, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.6156674476579266e-06, |
|
"loss": 0.1469, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.5962893318105963e-06, |
|
"loss": 0.123, |
|
"step": 1852 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.576951892751197e-06, |
|
"loss": 0.128, |
|
"step": 1854 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.5576552533124074e-06, |
|
"loss": 0.1403, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.538399536067736e-06, |
|
"loss": 0.1392, |
|
"step": 1858 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.5191848633307545e-06, |
|
"loss": 0.1259, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.5000113571543183e-06, |
|
"loss": 0.1252, |
|
"step": 1862 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.480879139329789e-06, |
|
"loss": 0.1641, |
|
"step": 1864 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.4617883313862633e-06, |
|
"loss": 0.1396, |
|
"step": 1866 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.4427390545897955e-06, |
|
"loss": 0.1284, |
|
"step": 1868 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.423731429942636e-06, |
|
"loss": 0.1255, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.4047655781824605e-06, |
|
"loss": 0.1381, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.3858416197815947e-06, |
|
"loss": 0.1587, |
|
"step": 1874 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.3669596749462562e-06, |
|
"loss": 0.1148, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.3481198636157908e-06, |
|
"loss": 0.1187, |
|
"step": 1878 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.3293223054619073e-06, |
|
"loss": 0.1328, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.3105671198879243e-06, |
|
"loss": 0.1166, |
|
"step": 1882 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.2918544260279985e-06, |
|
"loss": 0.133, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.2731843427463894e-06, |
|
"loss": 0.127, |
|
"step": 1886 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.254556988636678e-06, |
|
"loss": 0.1678, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.2359724820210394e-06, |
|
"loss": 0.1156, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.2174309409494675e-06, |
|
"loss": 0.1384, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.198932483199041e-06, |
|
"loss": 0.1324, |
|
"step": 1894 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.180477226273172e-06, |
|
"loss": 0.1498, |
|
"step": 1896 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.162065287400855e-06, |
|
"loss": 0.1482, |
|
"step": 1898 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.1436967835359245e-06, |
|
"loss": 0.1001, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.1253718313563207e-06, |
|
"loss": 0.1328, |
|
"step": 1902 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.1070905472633307e-06, |
|
"loss": 0.1343, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.0888530473808677e-06, |
|
"loss": 0.1721, |
|
"step": 1906 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.070659447554719e-06, |
|
"loss": 0.1211, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.052509863351818e-06, |
|
"loss": 0.1267, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.0344044100595073e-06, |
|
"loss": 0.1257, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.016343202684807e-06, |
|
"loss": 0.1769, |
|
"step": 1914 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.9983263559536813e-06, |
|
"loss": 0.1398, |
|
"step": 1916 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.9803539843103226e-06, |
|
"loss": 0.0818, |
|
"step": 1918 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.962426201916402e-06, |
|
"loss": 0.1552, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.9445431226503683e-06, |
|
"loss": 0.1296, |
|
"step": 1922 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.926704860106706e-06, |
|
"loss": 0.1082, |
|
"step": 1924 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.9089115275952217e-06, |
|
"loss": 0.143, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.891163238140323e-06, |
|
"loss": 0.1019, |
|
"step": 1928 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.8734601044803056e-06, |
|
"loss": 0.1256, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.855802239066623e-06, |
|
"loss": 0.1159, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.8381897540631964e-06, |
|
"loss": 0.1187, |
|
"step": 1934 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.820622761345676e-06, |
|
"loss": 0.1375, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.8031013725007415e-06, |
|
"loss": 0.1305, |
|
"step": 1938 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.785625698825406e-06, |
|
"loss": 0.1443, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.768195851326285e-06, |
|
"loss": 0.1351, |
|
"step": 1942 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.750811940718906e-06, |
|
"loss": 0.1378, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.733474077427004e-06, |
|
"loss": 0.0981, |
|
"step": 1946 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.716182371581814e-06, |
|
"loss": 0.146, |
|
"step": 1948 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.6989369330213865e-06, |
|
"loss": 0.1286, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.681737871289869e-06, |
|
"loss": 0.1551, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.6645852956368214e-06, |
|
"loss": 0.1166, |
|
"step": 1954 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.647479315016528e-06, |
|
"loss": 0.1181, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.6304200380872913e-06, |
|
"loss": 0.1341, |
|
"step": 1958 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.61340757321075e-06, |
|
"loss": 0.1196, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.596442028451194e-06, |
|
"loss": 0.1364, |
|
"step": 1962 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.579523511574864e-06, |
|
"loss": 0.1209, |
|
"step": 1964 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.56265213004929e-06, |
|
"loss": 0.1174, |
|
"step": 1966 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.5458279910425865e-06, |
|
"loss": 0.1383, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.5290512014227774e-06, |
|
"loss": 0.1044, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.5123218677571313e-06, |
|
"loss": 0.1163, |
|
"step": 1972 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.4956400963114647e-06, |
|
"loss": 0.137, |
|
"step": 1974 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.479005993049478e-06, |
|
"loss": 0.1591, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.4624196636320795e-06, |
|
"loss": 0.137, |
|
"step": 1978 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.445881213416713e-06, |
|
"loss": 0.1583, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.429390747456699e-06, |
|
"loss": 0.1252, |
|
"step": 1982 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.412948370500551e-06, |
|
"loss": 0.1552, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.3965541869913188e-06, |
|
"loss": 0.1481, |
|
"step": 1986 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.3802083010659238e-06, |
|
"loss": 0.1243, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.3639108165545057e-06, |
|
"loss": 0.1273, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.3476618369797457e-06, |
|
"loss": 0.1403, |
|
"step": 1992 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.331461465556222e-06, |
|
"loss": 0.1391, |
|
"step": 1994 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.315309805189748e-06, |
|
"loss": 0.1376, |
|
"step": 1996 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.299206958476731e-06, |
|
"loss": 0.1253, |
|
"step": 1998 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.2831530277034985e-06, |
|
"loss": 0.131, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.2671481148456685e-06, |
|
"loss": 0.1377, |
|
"step": 2002 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.251192321567488e-06, |
|
"loss": 0.1077, |
|
"step": 2004 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.235285749221201e-06, |
|
"loss": 0.1253, |
|
"step": 2006 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.219428498846393e-06, |
|
"loss": 0.1271, |
|
"step": 2008 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.2036206711693508e-06, |
|
"loss": 0.1449, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.1878623666024233e-06, |
|
"loss": 0.1024, |
|
"step": 2012 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.1721536852433976e-06, |
|
"loss": 0.1141, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.1564947268748382e-06, |
|
"loss": 0.1023, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.1408855909634696e-06, |
|
"loss": 0.1113, |
|
"step": 2018 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.125326376659539e-06, |
|
"loss": 0.1467, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.1098171827961965e-06, |
|
"loss": 0.1194, |
|
"step": 2022 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.094358107888852e-06, |
|
"loss": 0.1676, |
|
"step": 2024 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.0789492501345553e-06, |
|
"loss": 0.1367, |
|
"step": 2026 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.0635907074113737e-06, |
|
"loss": 0.1231, |
|
"step": 2028 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.0482825772777804e-06, |
|
"loss": 0.1626, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.0330249569720116e-06, |
|
"loss": 0.1117, |
|
"step": 2032 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.0178179434114674e-06, |
|
"loss": 0.1303, |
|
"step": 2034 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.00266163319209e-06, |
|
"loss": 0.1336, |
|
"step": 2036 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.9875561225877482e-06, |
|
"loss": 0.1143, |
|
"step": 2038 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.972501507549637e-06, |
|
"loss": 0.1449, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.957497883705649e-06, |
|
"loss": 0.1331, |
|
"step": 2042 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.9425453463597798e-06, |
|
"loss": 0.0957, |
|
"step": 2044 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.927643990491528e-06, |
|
"loss": 0.1182, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.912793910755275e-06, |
|
"loss": 0.1394, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.8979952014796954e-06, |
|
"loss": 0.1155, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.883247956667157e-06, |
|
"loss": 0.1681, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.8685522699931169e-06, |
|
"loss": 0.1517, |
|
"step": 2054 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.8539082348055427e-06, |
|
"loss": 0.1491, |
|
"step": 2056 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.839315944124298e-06, |
|
"loss": 0.1276, |
|
"step": 2058 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.8247754906405624e-06, |
|
"loss": 0.1343, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.8102869667162494e-06, |
|
"loss": 0.1477, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.7958504643834062e-06, |
|
"loss": 0.1584, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.7814660753436386e-06, |
|
"loss": 0.1316, |
|
"step": 2066 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.7671338909675218e-06, |
|
"loss": 0.1373, |
|
"step": 2068 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.7528540022940288e-06, |
|
"loss": 0.131, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.7386265000299385e-06, |
|
"loss": 0.1206, |
|
"step": 2072 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.7244514745492813e-06, |
|
"loss": 0.117, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.71032901589274e-06, |
|
"loss": 0.1368, |
|
"step": 2076 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.6962592137670897e-06, |
|
"loss": 0.1176, |
|
"step": 2078 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.6822421575446378e-06, |
|
"loss": 0.1501, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.6682779362626378e-06, |
|
"loss": 0.1326, |
|
"step": 2082 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.6543666386227343e-06, |
|
"loss": 0.1357, |
|
"step": 2084 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.6405083529903954e-06, |
|
"loss": 0.1039, |
|
"step": 2086 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.6267031673943546e-06, |
|
"loss": 0.1407, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.6129511695260558e-06, |
|
"loss": 0.1312, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.5992524467390858e-06, |
|
"loss": 0.1198, |
|
"step": 2092 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.5856070860486205e-06, |
|
"loss": 0.1091, |
|
"step": 2094 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.5720151741308875e-06, |
|
"loss": 0.119, |
|
"step": 2096 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.5584767973225967e-06, |
|
"loss": 0.1316, |
|
"step": 2098 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.544992041620398e-06, |
|
"loss": 0.1108, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.531560992680341e-06, |
|
"loss": 0.1267, |
|
"step": 2102 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.5181837358173223e-06, |
|
"loss": 0.1292, |
|
"step": 2104 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.5048603560045549e-06, |
|
"loss": 0.124, |
|
"step": 2106 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.4915909378730143e-06, |
|
"loss": 0.1466, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.4783755657109079e-06, |
|
"loss": 0.103, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.4652143234631465e-06, |
|
"loss": 0.1478, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.4521072947307957e-06, |
|
"loss": 0.1196, |
|
"step": 2114 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.4390545627705588e-06, |
|
"loss": 0.1203, |
|
"step": 2116 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.426056210494241e-06, |
|
"loss": 0.125, |
|
"step": 2118 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.413112320468223e-06, |
|
"loss": 0.1612, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.400222974912936e-06, |
|
"loss": 0.1226, |
|
"step": 2122 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.3873882557023488e-06, |
|
"loss": 0.1304, |
|
"step": 2124 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.3746082443634311e-06, |
|
"loss": 0.1172, |
|
"step": 2126 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.361883022075653e-06, |
|
"loss": 0.1441, |
|
"step": 2128 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.3492126696704544e-06, |
|
"loss": 0.1232, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.3365972676307403e-06, |
|
"loss": 0.1127, |
|
"step": 2132 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.3240368960903671e-06, |
|
"loss": 0.1298, |
|
"step": 2134 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.3115316348336348e-06, |
|
"loss": 0.1358, |
|
"step": 2136 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.2990815632947763e-06, |
|
"loss": 0.1689, |
|
"step": 2138 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.2866867605574628e-06, |
|
"loss": 0.1101, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.2743473053542842e-06, |
|
"loss": 0.1308, |
|
"step": 2142 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.262063276066272e-06, |
|
"loss": 0.1472, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.2498347507223763e-06, |
|
"loss": 0.1298, |
|
"step": 2146 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.237661806998991e-06, |
|
"loss": 0.1323, |
|
"step": 2148 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.2255445222194462e-06, |
|
"loss": 0.0947, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.2134829733535269e-06, |
|
"loss": 0.1199, |
|
"step": 2152 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.2014772370169747e-06, |
|
"loss": 0.1284, |
|
"step": 2154 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.1895273894710157e-06, |
|
"loss": 0.1323, |
|
"step": 2156 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.177633506621857e-06, |
|
"loss": 0.1188, |
|
"step": 2158 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.1657956640202217e-06, |
|
"loss": 0.1448, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.1540139368608572e-06, |
|
"loss": 0.1819, |
|
"step": 2162 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.142288399982061e-06, |
|
"loss": 0.2198, |
|
"step": 2164 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.1306191278652112e-06, |
|
"loss": 0.1341, |
|
"step": 2166 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.1190061946342835e-06, |
|
"loss": 0.2365, |
|
"step": 2168 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.1074496740553853e-06, |
|
"loss": 0.1264, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.0959496395362946e-06, |
|
"loss": 0.1328, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.0845061641259757e-06, |
|
"loss": 0.1076, |
|
"step": 2174 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.0731193205141354e-06, |
|
"loss": 0.1372, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.0617891810307458e-06, |
|
"loss": 0.1536, |
|
"step": 2178 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.050515817645591e-06, |
|
"loss": 0.1243, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.039299301967811e-06, |
|
"loss": 0.2138, |
|
"step": 2182 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.0281397052454457e-06, |
|
"loss": 0.1357, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.0170370983649792e-06, |
|
"loss": 0.1623, |
|
"step": 2186 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.005991551850899e-06, |
|
"loss": 0.1314, |
|
"step": 2188 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 9.950031358652313e-07, |
|
"loss": 0.1163, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 9.84071920207118e-07, |
|
"loss": 0.1045, |
|
"step": 2192 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 9.73197974312351e-07, |
|
"loss": 0.1449, |
|
"step": 2194 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 9.623813672529437e-07, |
|
"loss": 0.1287, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 9.516221677366888e-07, |
|
"loss": 0.1193, |
|
"step": 2198 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 9.409204441067254e-07, |
|
"loss": 0.1306, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 9.302762643411e-07, |
|
"loss": 0.1151, |
|
"step": 2202 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 9.196896960523349e-07, |
|
"loss": 0.1287, |
|
"step": 2204 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 9.091608064870028e-07, |
|
"loss": 0.099, |
|
"step": 2206 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 8.986896625253006e-07, |
|
"loss": 0.1151, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 8.882763306806163e-07, |
|
"loss": 0.1466, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 8.779208770991121e-07, |
|
"loss": 0.1133, |
|
"step": 2212 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 8.676233675593038e-07, |
|
"loss": 0.157, |
|
"step": 2214 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 8.573838674716461e-07, |
|
"loss": 0.1184, |
|
"step": 2216 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 8.472024418781099e-07, |
|
"loss": 0.125, |
|
"step": 2218 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 8.370791554517743e-07, |
|
"loss": 0.146, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 8.270140724964159e-07, |
|
"loss": 0.0981, |
|
"step": 2222 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 8.170072569460996e-07, |
|
"loss": 0.1288, |
|
"step": 2224 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 8.070587723647705e-07, |
|
"loss": 0.1714, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 7.971686819458502e-07, |
|
"loss": 0.1147, |
|
"step": 2228 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 7.873370485118381e-07, |
|
"loss": 0.1307, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 7.77563934513913e-07, |
|
"loss": 0.1278, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 7.678494020315308e-07, |
|
"loss": 0.118, |
|
"step": 2234 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 7.581935127720352e-07, |
|
"loss": 0.1289, |
|
"step": 2236 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 7.485963280702646e-07, |
|
"loss": 0.1139, |
|
"step": 2238 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 7.390579088881655e-07, |
|
"loss": 0.1164, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.295783158143976e-07, |
|
"loss": 0.0974, |
|
"step": 2242 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.201576090639529e-07, |
|
"loss": 0.1444, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.107958484777755e-07, |
|
"loss": 0.1599, |
|
"step": 2246 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.014930935223807e-07, |
|
"loss": 0.1482, |
|
"step": 2248 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 6.922494032894744e-07, |
|
"loss": 0.1095, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 6.830648364955772e-07, |
|
"loss": 0.1398, |
|
"step": 2252 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 6.739394514816622e-07, |
|
"loss": 0.1333, |
|
"step": 2254 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 6.648733062127643e-07, |
|
"loss": 0.1209, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 6.558664582776341e-07, |
|
"loss": 0.1019, |
|
"step": 2258 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 6.469189648883567e-07, |
|
"loss": 0.1099, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 6.380308828799919e-07, |
|
"loss": 0.1176, |
|
"step": 2262 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 6.292022687102184e-07, |
|
"loss": 0.1138, |
|
"step": 2264 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 6.204331784589679e-07, |
|
"loss": 0.1311, |
|
"step": 2266 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 6.117236678280736e-07, |
|
"loss": 0.1296, |
|
"step": 2268 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 6.030737921409169e-07, |
|
"loss": 0.1377, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.9448360634207e-07, |
|
"loss": 0.1579, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.859531649969563e-07, |
|
"loss": 0.1133, |
|
"step": 2274 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.774825222914948e-07, |
|
"loss": 0.1324, |
|
"step": 2276 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.690717320317595e-07, |
|
"loss": 0.1227, |
|
"step": 2278 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.60720847643641e-07, |
|
"loss": 0.116, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.524299221724993e-07, |
|
"loss": 0.1574, |
|
"step": 2282 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.44199008282833e-07, |
|
"loss": 0.1149, |
|
"step": 2284 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.360281582579474e-07, |
|
"loss": 0.0964, |
|
"step": 2286 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.279174239996132e-07, |
|
"loss": 0.1096, |
|
"step": 2288 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.198668570277443e-07, |
|
"loss": 0.1395, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.11876508480067e-07, |
|
"loss": 0.1232, |
|
"step": 2292 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.039464291117968e-07, |
|
"loss": 0.1222, |
|
"step": 2294 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.960766692953145e-07, |
|
"loss": 0.16, |
|
"step": 2296 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.882672790198473e-07, |
|
"loss": 0.1558, |
|
"step": 2298 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.805183078911524e-07, |
|
"loss": 0.1193, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.728298051312008e-07, |
|
"loss": 0.1342, |
|
"step": 2302 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.652018195778629e-07, |
|
"loss": 0.1598, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.576343996845989e-07, |
|
"loss": 0.1324, |
|
"step": 2306 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.5012759352015766e-07, |
|
"loss": 0.0991, |
|
"step": 2308 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.4268144876825846e-07, |
|
"loss": 0.1399, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.352960127272987e-07, |
|
"loss": 0.1098, |
|
"step": 2312 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.2797133231005207e-07, |
|
"loss": 0.1343, |
|
"step": 2314 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.207074540433631e-07, |
|
"loss": 0.1038, |
|
"step": 2316 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.1350442406786317e-07, |
|
"loss": 0.1445, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.063622881376683e-07, |
|
"loss": 0.1484, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.9928109162008953e-07, |
|
"loss": 0.1116, |
|
"step": 2322 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.922608794953531e-07, |
|
"loss": 0.1271, |
|
"step": 2324 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.8530169635630055e-07, |
|
"loss": 0.1471, |
|
"step": 2326 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.7840358640812036e-07, |
|
"loss": 0.1074, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.715665934680546e-07, |
|
"loss": 0.1571, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.64790760965128e-07, |
|
"loss": 0.0928, |
|
"step": 2332 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.580761319398729e-07, |
|
"loss": 0.1362, |
|
"step": 2334 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.514227490440503e-07, |
|
"loss": 0.1168, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.4483065454038123e-07, |
|
"loss": 0.1497, |
|
"step": 2338 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.3829989030228163e-07, |
|
"loss": 0.1244, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.3183049781359e-07, |
|
"loss": 0.1058, |
|
"step": 2342 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.2542251816831237e-07, |
|
"loss": 0.1158, |
|
"step": 2344 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.190759920703512e-07, |
|
"loss": 0.1208, |
|
"step": 2346 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.127909598332535e-07, |
|
"loss": 0.1214, |
|
"step": 2348 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.065674613799574e-07, |
|
"loss": 0.1258, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.0040553624252844e-07, |
|
"loss": 0.136, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.9430522356191814e-07, |
|
"loss": 0.1553, |
|
"step": 2354 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.88266562087709e-07, |
|
"loss": 0.164, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.822895901778744e-07, |
|
"loss": 0.1372, |
|
"step": 2358 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.7637434579853016e-07, |
|
"loss": 0.1196, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.7052086652369356e-07, |
|
"loss": 0.1351, |
|
"step": 2362 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.6472918953504566e-07, |
|
"loss": 0.1407, |
|
"step": 2364 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.589993516216993e-07, |
|
"loss": 0.1181, |
|
"step": 2366 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.5333138917995714e-07, |
|
"loss": 0.1484, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.477253382130862e-07, |
|
"loss": 0.125, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.4218123433108696e-07, |
|
"loss": 0.1309, |
|
"step": 2372 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.366991127504714e-07, |
|
"loss": 0.1325, |
|
"step": 2374 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.3127900829403305e-07, |
|
"loss": 0.1297, |
|
"step": 2376 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.259209553906272e-07, |
|
"loss": 0.1242, |
|
"step": 2378 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.2062498807495669e-07, |
|
"loss": 0.155, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.1539113998735094e-07, |
|
"loss": 0.1266, |
|
"step": 2382 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.10219444373555e-07, |
|
"loss": 0.1459, |
|
"step": 2384 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.051099340845164e-07, |
|
"loss": 0.1544, |
|
"step": 2386 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.000626415761786e-07, |
|
"loss": 0.1347, |
|
"step": 2388 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.9507759890927125e-07, |
|
"loss": 0.146, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.9015483774911249e-07, |
|
"loss": 0.1342, |
|
"step": 2392 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.8529438936540022e-07, |
|
"loss": 0.1166, |
|
"step": 2394 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.8049628463202128e-07, |
|
"loss": 0.1313, |
|
"step": 2396 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.7576055402685034e-07, |
|
"loss": 0.1251, |
|
"step": 2398 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.710872276315556e-07, |
|
"loss": 0.1077, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.664763351314125e-07, |
|
"loss": 0.1095, |
|
"step": 2402 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.619279058151102e-07, |
|
"loss": 0.0939, |
|
"step": 2404 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.5744196857456874e-07, |
|
"loss": 0.1118, |
|
"step": 2406 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.5301855190475445e-07, |
|
"loss": 0.1477, |
|
"step": 2408 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.4865768390349812e-07, |
|
"loss": 0.1073, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.4435939227131712e-07, |
|
"loss": 0.1194, |
|
"step": 2412 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.4012370431124133e-07, |
|
"loss": 0.1222, |
|
"step": 2414 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.3595064692863757e-07, |
|
"loss": 0.1367, |
|
"step": 2416 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.3184024663103755e-07, |
|
"loss": 0.1182, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.277925295279725e-07, |
|
"loss": 0.1297, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.2380752133080433e-07, |
|
"loss": 0.1067, |
|
"step": 2422 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.198852473525669e-07, |
|
"loss": 0.1483, |
|
"step": 2424 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.1602573250779958e-07, |
|
"loss": 0.1534, |
|
"step": 2426 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.1222900131239279e-07, |
|
"loss": 0.1177, |
|
"step": 2428 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.0849507788343038e-07, |
|
"loss": 0.2059, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.0482398593903764e-07, |
|
"loss": 0.1369, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.0121574879823015e-07, |
|
"loss": 0.1178, |
|
"step": 2434 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.767038938076511e-08, |
|
"loss": 0.1323, |
|
"step": 2436 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.418793020699813e-08, |
|
"loss": 0.1272, |
|
"step": 2438 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.076839339773547e-08, |
|
"loss": 0.112, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 8.741180067409982e-08, |
|
"loss": 0.1444, |
|
"step": 2442 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.411817335738482e-08, |
|
"loss": 0.1269, |
|
"step": 2444 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.088753236892843e-08, |
|
"loss": 0.1206, |
|
"step": 2446 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 7.771989822997206e-08, |
|
"loss": 0.1349, |
|
"step": 2448 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 7.461529106153387e-08, |
|
"loss": 0.1141, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.15737305842823e-08, |
|
"loss": 0.1265, |
|
"step": 2452 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 6.859523611840612e-08, |
|
"loss": 0.12, |
|
"step": 2454 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 6.567982658349792e-08, |
|
"loss": 0.1161, |
|
"step": 2456 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 6.282752049842855e-08, |
|
"loss": 0.1389, |
|
"step": 2458 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.003833598123287e-08, |
|
"loss": 0.1444, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 5.731229074899203e-08, |
|
"loss": 0.11, |
|
"step": 2462 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 5.464940211772574e-08, |
|
"loss": 0.1272, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 5.204968700227242e-08, |
|
"loss": 0.1016, |
|
"step": 2466 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 4.951316191619593e-08, |
|
"loss": 0.1521, |
|
"step": 2468 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.703984297166564e-08, |
|
"loss": 0.142, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.4629745879367634e-08, |
|
"loss": 0.1034, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.228288594839369e-08, |
|
"loss": 0.118, |
|
"step": 2474 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.999927808615245e-08, |
|
"loss": 0.1166, |
|
"step": 2476 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.777893679827061e-08, |
|
"loss": 0.1518, |
|
"step": 2478 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.562187618849855e-08, |
|
"loss": 0.1538, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.352810995862932e-08, |
|
"loss": 0.1053, |
|
"step": 2482 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.1497651408399774e-08, |
|
"loss": 0.1095, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.9530513435416243e-08, |
|
"loss": 0.1574, |
|
"step": 2486 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.762670853506677e-08, |
|
"loss": 0.1147, |
|
"step": 2488 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.578624880044567e-08, |
|
"loss": 0.143, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.4009145922271327e-08, |
|
"loss": 0.0848, |
|
"step": 2492 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.2295411188819616e-08, |
|
"loss": 0.1291, |
|
"step": 2494 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.0645055485842837e-08, |
|
"loss": 0.132, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.9058089296509762e-08, |
|
"loss": 0.106, |
|
"step": 2498 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.753452270133238e-08, |
|
"loss": 0.119, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.6074365378105915e-08, |
|
"loss": 0.1213, |
|
"step": 2502 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.4677626601843353e-08, |
|
"loss": 0.1392, |
|
"step": 2504 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.3344315244722128e-08, |
|
"loss": 0.1121, |
|
"step": 2506 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.2074439776021962e-08, |
|
"loss": 0.1163, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.0868008262076013e-08, |
|
"loss": 0.149, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 9.725028366214251e-09, |
|
"loss": 0.1174, |
|
"step": 2512 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 8.64550734872016e-09, |
|
"loss": 0.1226, |
|
"step": 2514 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 7.629452066783006e-09, |
|
"loss": 0.1374, |
|
"step": 2516 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 6.6768689744500796e-09, |
|
"loss": 0.112, |
|
"step": 2518 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 5.787764122592299e-09, |
|
"loss": 0.1154, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.9621431588620096e-09, |
|
"loss": 0.1275, |
|
"step": 2522 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2523, |
|
"total_flos": 2232048114991104.0, |
|
"train_loss": 0.2896275484415448, |
|
"train_runtime": 144327.3604, |
|
"train_samples_per_second": 0.559, |
|
"train_steps_per_second": 0.017 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 2523, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"total_flos": 2232048114991104.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|