MoTCoder-15B-v1.0 / trainer_state.json
JingyaoLi's picture
Upload 17 files
c0e7739
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 2523,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0,
"loss": 0.6576,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 0.0,
"loss": 0.6592,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 0.0,
"loss": 0.6936,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 0.0,
"loss": 0.6209,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 0.0,
"loss": 0.7266,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 0.0,
"loss": 0.583,
"step": 12
},
{
"epoch": 0.02,
"learning_rate": 0.0,
"loss": 0.5872,
"step": 14
},
{
"epoch": 0.02,
"learning_rate": 0.0,
"loss": 0.6971,
"step": 16
},
{
"epoch": 0.02,
"learning_rate": 0.0,
"loss": 0.6075,
"step": 18
},
{
"epoch": 0.02,
"learning_rate": 1.3333333333333334e-06,
"loss": 0.6744,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 2.666666666666667e-06,
"loss": 0.5986,
"step": 22
},
{
"epoch": 0.03,
"learning_rate": 4.000000000000001e-06,
"loss": 0.6547,
"step": 24
},
{
"epoch": 0.03,
"learning_rate": 5.333333333333334e-06,
"loss": 0.5527,
"step": 26
},
{
"epoch": 0.03,
"learning_rate": 6.666666666666667e-06,
"loss": 0.4789,
"step": 28
},
{
"epoch": 0.04,
"learning_rate": 8.000000000000001e-06,
"loss": 0.5411,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 9.333333333333334e-06,
"loss": 0.5627,
"step": 32
},
{
"epoch": 0.04,
"learning_rate": 1.0666666666666667e-05,
"loss": 0.5674,
"step": 34
},
{
"epoch": 0.04,
"learning_rate": 1.2e-05,
"loss": 0.5058,
"step": 36
},
{
"epoch": 0.05,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.5078,
"step": 38
},
{
"epoch": 0.05,
"learning_rate": 1.4666666666666666e-05,
"loss": 0.5258,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.5611,
"step": 42
},
{
"epoch": 0.05,
"learning_rate": 1.7333333333333336e-05,
"loss": 0.4715,
"step": 44
},
{
"epoch": 0.05,
"learning_rate": 1.866666666666667e-05,
"loss": 0.7397,
"step": 46
},
{
"epoch": 0.06,
"learning_rate": 2e-05,
"loss": 0.5526,
"step": 48
},
{
"epoch": 0.06,
"learning_rate": 1.999996823967381e-05,
"loss": 0.5387,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 1.9999872958896982e-05,
"loss": 0.5412,
"step": 52
},
{
"epoch": 0.06,
"learning_rate": 1.9999714158274743e-05,
"loss": 0.487,
"step": 54
},
{
"epoch": 0.07,
"learning_rate": 1.9999491838815805e-05,
"loss": 0.5345,
"step": 56
},
{
"epoch": 0.07,
"learning_rate": 1.999920600193236e-05,
"loss": 0.5062,
"step": 58
},
{
"epoch": 0.07,
"learning_rate": 1.9998856649440058e-05,
"loss": 0.5077,
"step": 60
},
{
"epoch": 0.07,
"learning_rate": 1.999844378355801e-05,
"loss": 0.5029,
"step": 62
},
{
"epoch": 0.08,
"learning_rate": 1.999796740690877e-05,
"loss": 0.5374,
"step": 64
},
{
"epoch": 0.08,
"learning_rate": 1.9997427522518315e-05,
"loss": 0.5321,
"step": 66
},
{
"epoch": 0.08,
"learning_rate": 1.999682413381602e-05,
"loss": 0.4695,
"step": 68
},
{
"epoch": 0.08,
"learning_rate": 1.9996157244634647e-05,
"loss": 0.4798,
"step": 70
},
{
"epoch": 0.09,
"learning_rate": 1.9995799988672346e-05,
"loss": 0.4682,
"step": 72
},
{
"epoch": 0.09,
"learning_rate": 1.999503785684114e-05,
"loss": 0.5138,
"step": 74
},
{
"epoch": 0.09,
"learning_rate": 1.9994212235877407e-05,
"loss": 0.5271,
"step": 76
},
{
"epoch": 0.09,
"learning_rate": 1.999332313102555e-05,
"loss": 0.5526,
"step": 78
},
{
"epoch": 0.1,
"learning_rate": 1.999237054793322e-05,
"loss": 0.5385,
"step": 80
},
{
"epoch": 0.1,
"learning_rate": 1.9991354492651283e-05,
"loss": 0.5259,
"step": 82
},
{
"epoch": 0.1,
"learning_rate": 1.9990274971633787e-05,
"loss": 0.5054,
"step": 84
},
{
"epoch": 0.1,
"learning_rate": 1.9989131991737928e-05,
"loss": 0.4698,
"step": 86
},
{
"epoch": 0.1,
"learning_rate": 1.998792556022398e-05,
"loss": 0.4909,
"step": 88
},
{
"epoch": 0.11,
"learning_rate": 1.998665568475528e-05,
"loss": 0.462,
"step": 90
},
{
"epoch": 0.11,
"learning_rate": 1.998532237339816e-05,
"loss": 0.4898,
"step": 92
},
{
"epoch": 0.11,
"learning_rate": 1.9983925634621894e-05,
"loss": 0.4771,
"step": 94
},
{
"epoch": 0.11,
"learning_rate": 1.998246547729867e-05,
"loss": 0.4774,
"step": 96
},
{
"epoch": 0.12,
"learning_rate": 1.998094191070349e-05,
"loss": 0.421,
"step": 98
},
{
"epoch": 0.12,
"learning_rate": 1.997935494451416e-05,
"loss": 0.4991,
"step": 100
},
{
"epoch": 0.12,
"learning_rate": 1.9977704588811183e-05,
"loss": 0.5349,
"step": 102
},
{
"epoch": 0.12,
"learning_rate": 1.9975990854077733e-05,
"loss": 0.4717,
"step": 104
},
{
"epoch": 0.13,
"learning_rate": 1.9974213751199556e-05,
"loss": 0.5024,
"step": 106
},
{
"epoch": 0.13,
"learning_rate": 1.9972373291464933e-05,
"loss": 0.5904,
"step": 108
},
{
"epoch": 0.13,
"learning_rate": 1.9970469486564585e-05,
"loss": 0.4817,
"step": 110
},
{
"epoch": 0.13,
"learning_rate": 1.99685023485916e-05,
"loss": 0.488,
"step": 112
},
{
"epoch": 0.14,
"learning_rate": 1.9966471890041374e-05,
"loss": 0.5262,
"step": 114
},
{
"epoch": 0.14,
"learning_rate": 1.9964378123811502e-05,
"loss": 0.4721,
"step": 116
},
{
"epoch": 0.14,
"learning_rate": 1.9962221063201734e-05,
"loss": 0.4594,
"step": 118
},
{
"epoch": 0.14,
"learning_rate": 1.996000072191385e-05,
"loss": 0.5068,
"step": 120
},
{
"epoch": 0.15,
"learning_rate": 1.9957717114051608e-05,
"loss": 0.5169,
"step": 122
},
{
"epoch": 0.15,
"learning_rate": 1.9955370254120635e-05,
"loss": 0.4781,
"step": 124
},
{
"epoch": 0.15,
"learning_rate": 1.9952960157028335e-05,
"loss": 0.4682,
"step": 126
},
{
"epoch": 0.15,
"learning_rate": 1.9950486838083807e-05,
"loss": 0.4754,
"step": 128
},
{
"epoch": 0.15,
"learning_rate": 1.994795031299773e-05,
"loss": 0.479,
"step": 130
},
{
"epoch": 0.16,
"learning_rate": 1.9945350597882275e-05,
"loss": 0.5381,
"step": 132
},
{
"epoch": 0.16,
"learning_rate": 1.9942687709251006e-05,
"loss": 0.5457,
"step": 134
},
{
"epoch": 0.16,
"learning_rate": 1.993996166401877e-05,
"loss": 0.4653,
"step": 136
},
{
"epoch": 0.16,
"learning_rate": 1.9937172479501573e-05,
"loss": 0.4686,
"step": 138
},
{
"epoch": 0.17,
"learning_rate": 1.9934320173416502e-05,
"loss": 0.5098,
"step": 140
},
{
"epoch": 0.17,
"learning_rate": 1.9931404763881598e-05,
"loss": 0.4823,
"step": 142
},
{
"epoch": 0.17,
"learning_rate": 1.992842626941572e-05,
"loss": 0.4999,
"step": 144
},
{
"epoch": 0.17,
"learning_rate": 1.992538470893847e-05,
"loss": 0.4868,
"step": 146
},
{
"epoch": 0.18,
"learning_rate": 1.992228010177003e-05,
"loss": 0.4668,
"step": 148
},
{
"epoch": 0.18,
"learning_rate": 1.9919112467631074e-05,
"loss": 0.4955,
"step": 150
},
{
"epoch": 0.18,
"learning_rate": 1.991588182664262e-05,
"loss": 0.4855,
"step": 152
},
{
"epoch": 0.18,
"learning_rate": 1.99125881993259e-05,
"loss": 0.4646,
"step": 154
},
{
"epoch": 0.19,
"learning_rate": 1.9909231606602266e-05,
"loss": 0.5182,
"step": 156
},
{
"epoch": 0.19,
"learning_rate": 1.9905812069793002e-05,
"loss": 0.4857,
"step": 158
},
{
"epoch": 0.19,
"learning_rate": 1.990232961061924e-05,
"loss": 0.5048,
"step": 160
},
{
"epoch": 0.19,
"learning_rate": 1.989878425120177e-05,
"loss": 0.4597,
"step": 162
},
{
"epoch": 0.2,
"learning_rate": 1.9895176014060964e-05,
"loss": 0.4387,
"step": 164
},
{
"epoch": 0.2,
"learning_rate": 1.9891504922116572e-05,
"loss": 0.4556,
"step": 166
},
{
"epoch": 0.2,
"learning_rate": 1.9887770998687607e-05,
"loss": 0.4937,
"step": 168
},
{
"epoch": 0.2,
"learning_rate": 1.9883974267492202e-05,
"loss": 0.5738,
"step": 170
},
{
"epoch": 0.2,
"learning_rate": 1.9880114752647434e-05,
"loss": 0.4769,
"step": 172
},
{
"epoch": 0.21,
"learning_rate": 1.9876192478669197e-05,
"loss": 0.5109,
"step": 174
},
{
"epoch": 0.21,
"learning_rate": 1.987220747047203e-05,
"loss": 0.4926,
"step": 176
},
{
"epoch": 0.21,
"learning_rate": 1.9868159753368964e-05,
"loss": 0.4726,
"step": 178
},
{
"epoch": 0.21,
"learning_rate": 1.9864049353071365e-05,
"loss": 0.4675,
"step": 180
},
{
"epoch": 0.22,
"learning_rate": 1.985987629568876e-05,
"loss": 0.4723,
"step": 182
},
{
"epoch": 0.22,
"learning_rate": 1.9855640607728684e-05,
"loss": 0.4215,
"step": 184
},
{
"epoch": 0.22,
"learning_rate": 1.9851342316096503e-05,
"loss": 0.505,
"step": 186
},
{
"epoch": 0.22,
"learning_rate": 1.984698144809525e-05,
"loss": 0.5278,
"step": 188
},
{
"epoch": 0.23,
"learning_rate": 1.9842558031425434e-05,
"loss": 0.4836,
"step": 190
},
{
"epoch": 0.23,
"learning_rate": 1.983807209418489e-05,
"loss": 0.445,
"step": 192
},
{
"epoch": 0.23,
"learning_rate": 1.9833523664868587e-05,
"loss": 0.4585,
"step": 194
},
{
"epoch": 0.23,
"learning_rate": 1.982891277236845e-05,
"loss": 0.4934,
"step": 196
},
{
"epoch": 0.24,
"learning_rate": 1.982423944597315e-05,
"loss": 0.4899,
"step": 198
},
{
"epoch": 0.24,
"learning_rate": 1.981950371536798e-05,
"loss": 0.5321,
"step": 200
},
{
"epoch": 0.24,
"learning_rate": 1.9814705610634602e-05,
"loss": 0.4883,
"step": 202
},
{
"epoch": 0.24,
"learning_rate": 1.980984516225089e-05,
"loss": 0.4805,
"step": 204
},
{
"epoch": 0.24,
"learning_rate": 1.9804922401090732e-05,
"loss": 0.4507,
"step": 206
},
{
"epoch": 0.25,
"learning_rate": 1.9799937358423826e-05,
"loss": 0.4872,
"step": 208
},
{
"epoch": 0.25,
"learning_rate": 1.9794890065915486e-05,
"loss": 0.5495,
"step": 210
},
{
"epoch": 0.25,
"learning_rate": 1.9789780555626444e-05,
"loss": 0.4936,
"step": 212
},
{
"epoch": 0.25,
"learning_rate": 1.9784608860012652e-05,
"loss": 0.4769,
"step": 214
},
{
"epoch": 0.26,
"learning_rate": 1.9779375011925046e-05,
"loss": 0.4714,
"step": 216
},
{
"epoch": 0.26,
"learning_rate": 1.9774079044609373e-05,
"loss": 0.5064,
"step": 218
},
{
"epoch": 0.26,
"learning_rate": 1.976872099170597e-05,
"loss": 0.4629,
"step": 220
},
{
"epoch": 0.26,
"learning_rate": 1.976330088724953e-05,
"loss": 0.4149,
"step": 222
},
{
"epoch": 0.27,
"learning_rate": 1.9757818765668916e-05,
"loss": 0.4565,
"step": 224
},
{
"epoch": 0.27,
"learning_rate": 1.9752274661786916e-05,
"loss": 0.4233,
"step": 226
},
{
"epoch": 0.27,
"learning_rate": 1.9746668610820047e-05,
"loss": 0.4557,
"step": 228
},
{
"epoch": 0.27,
"learning_rate": 1.9741000648378303e-05,
"loss": 0.5186,
"step": 230
},
{
"epoch": 0.28,
"learning_rate": 1.9735270810464958e-05,
"loss": 0.4425,
"step": 232
},
{
"epoch": 0.28,
"learning_rate": 1.972947913347631e-05,
"loss": 0.5274,
"step": 234
},
{
"epoch": 0.28,
"learning_rate": 1.9723625654201472e-05,
"loss": 0.4087,
"step": 236
},
{
"epoch": 0.28,
"learning_rate": 1.971771040982213e-05,
"loss": 0.4412,
"step": 238
},
{
"epoch": 0.29,
"learning_rate": 1.9711733437912293e-05,
"loss": 0.4998,
"step": 240
},
{
"epoch": 0.29,
"learning_rate": 1.9705694776438084e-05,
"loss": 0.5343,
"step": 242
},
{
"epoch": 0.29,
"learning_rate": 1.9699594463757475e-05,
"loss": 0.4365,
"step": 244
},
{
"epoch": 0.29,
"learning_rate": 1.9693432538620046e-05,
"loss": 0.4877,
"step": 246
},
{
"epoch": 0.29,
"learning_rate": 1.9687209040166748e-05,
"loss": 0.4361,
"step": 248
},
{
"epoch": 0.3,
"learning_rate": 1.968092400792965e-05,
"loss": 0.4555,
"step": 250
},
{
"epoch": 0.3,
"learning_rate": 1.967457748183169e-05,
"loss": 0.4353,
"step": 252
},
{
"epoch": 0.3,
"learning_rate": 1.966816950218641e-05,
"loss": 0.3476,
"step": 254
},
{
"epoch": 0.3,
"learning_rate": 1.9661700109697718e-05,
"loss": 0.4342,
"step": 256
},
{
"epoch": 0.31,
"learning_rate": 1.9655169345459622e-05,
"loss": 0.4603,
"step": 258
},
{
"epoch": 0.31,
"learning_rate": 1.964857725095595e-05,
"loss": 0.4514,
"step": 260
},
{
"epoch": 0.31,
"learning_rate": 1.964192386806013e-05,
"loss": 0.4889,
"step": 262
},
{
"epoch": 0.31,
"learning_rate": 1.9635209239034872e-05,
"loss": 0.5255,
"step": 264
},
{
"epoch": 0.32,
"learning_rate": 1.962843340653195e-05,
"loss": 0.5325,
"step": 266
},
{
"epoch": 0.32,
"learning_rate": 1.9621596413591885e-05,
"loss": 0.5163,
"step": 268
},
{
"epoch": 0.32,
"learning_rate": 1.96146983036437e-05,
"loss": 0.3708,
"step": 270
},
{
"epoch": 0.32,
"learning_rate": 1.960773912050465e-05,
"loss": 0.5378,
"step": 272
},
{
"epoch": 0.33,
"learning_rate": 1.960071890837991e-05,
"loss": 0.4389,
"step": 274
},
{
"epoch": 0.33,
"learning_rate": 1.9593637711862335e-05,
"loss": 0.4565,
"step": 276
},
{
"epoch": 0.33,
"learning_rate": 1.9586495575932137e-05,
"loss": 0.5446,
"step": 278
},
{
"epoch": 0.33,
"learning_rate": 1.957929254595664e-05,
"loss": 0.5079,
"step": 280
},
{
"epoch": 0.34,
"learning_rate": 1.957202866768995e-05,
"loss": 0.3997,
"step": 282
},
{
"epoch": 0.34,
"learning_rate": 1.9564703987272703e-05,
"loss": 0.4934,
"step": 284
},
{
"epoch": 0.34,
"learning_rate": 1.9557318551231745e-05,
"loss": 0.5076,
"step": 286
},
{
"epoch": 0.34,
"learning_rate": 1.9549872406479843e-05,
"loss": 0.4487,
"step": 288
},
{
"epoch": 0.34,
"learning_rate": 1.9542365600315403e-05,
"loss": 0.5515,
"step": 290
},
{
"epoch": 0.35,
"learning_rate": 1.953479818042214e-05,
"loss": 0.5262,
"step": 292
},
{
"epoch": 0.35,
"learning_rate": 1.95271701948688e-05,
"loss": 0.4953,
"step": 294
},
{
"epoch": 0.35,
"learning_rate": 1.951948169210885e-05,
"loss": 0.5127,
"step": 296
},
{
"epoch": 0.35,
"learning_rate": 1.9511732720980156e-05,
"loss": 0.4796,
"step": 298
},
{
"epoch": 0.36,
"learning_rate": 1.950392333070469e-05,
"loss": 0.5016,
"step": 300
},
{
"epoch": 0.36,
"learning_rate": 1.9496053570888205e-05,
"loss": 0.5114,
"step": 302
},
{
"epoch": 0.36,
"learning_rate": 1.9488123491519935e-05,
"loss": 0.4471,
"step": 304
},
{
"epoch": 0.36,
"learning_rate": 1.9480133142972257e-05,
"loss": 0.4427,
"step": 306
},
{
"epoch": 0.37,
"learning_rate": 1.947208257600039e-05,
"loss": 0.457,
"step": 308
},
{
"epoch": 0.37,
"learning_rate": 1.9463971841742057e-05,
"loss": 0.5193,
"step": 310
},
{
"epoch": 0.37,
"learning_rate": 1.945580099171717e-05,
"loss": 0.5337,
"step": 312
},
{
"epoch": 0.37,
"learning_rate": 1.9447570077827503e-05,
"loss": 0.4758,
"step": 314
},
{
"epoch": 0.38,
"learning_rate": 1.9439279152356363e-05,
"loss": 0.4223,
"step": 316
},
{
"epoch": 0.38,
"learning_rate": 1.943092826796824e-05,
"loss": 0.4909,
"step": 318
},
{
"epoch": 0.38,
"learning_rate": 1.9422517477708506e-05,
"loss": 0.4615,
"step": 320
},
{
"epoch": 0.38,
"learning_rate": 1.9414046835003043e-05,
"loss": 0.3863,
"step": 322
},
{
"epoch": 0.39,
"learning_rate": 1.940551639365793e-05,
"loss": 0.4676,
"step": 324
},
{
"epoch": 0.39,
"learning_rate": 1.9396926207859085e-05,
"loss": 0.488,
"step": 326
},
{
"epoch": 0.39,
"learning_rate": 1.938827633217193e-05,
"loss": 0.4888,
"step": 328
},
{
"epoch": 0.39,
"learning_rate": 1.9379566821541034e-05,
"loss": 0.4754,
"step": 330
},
{
"epoch": 0.39,
"learning_rate": 1.9370797731289784e-05,
"loss": 0.7974,
"step": 332
},
{
"epoch": 0.4,
"learning_rate": 1.936196911712001e-05,
"loss": 0.3944,
"step": 334
},
{
"epoch": 0.4,
"learning_rate": 1.9353081035111644e-05,
"loss": 0.4883,
"step": 336
},
{
"epoch": 0.4,
"learning_rate": 1.9344133541722368e-05,
"loss": 0.5429,
"step": 338
},
{
"epoch": 0.4,
"learning_rate": 1.9335126693787237e-05,
"loss": 0.4573,
"step": 340
},
{
"epoch": 0.41,
"learning_rate": 1.9326060548518342e-05,
"loss": 0.4276,
"step": 342
},
{
"epoch": 0.41,
"learning_rate": 1.9316935163504424e-05,
"loss": 0.5089,
"step": 344
},
{
"epoch": 0.41,
"learning_rate": 1.930775059671053e-05,
"loss": 0.4285,
"step": 346
},
{
"epoch": 0.41,
"learning_rate": 1.9298506906477623e-05,
"loss": 0.4438,
"step": 348
},
{
"epoch": 0.42,
"learning_rate": 1.9289204151522227e-05,
"loss": 0.4644,
"step": 350
},
{
"epoch": 0.42,
"learning_rate": 1.927984239093605e-05,
"loss": 0.3904,
"step": 352
},
{
"epoch": 0.42,
"learning_rate": 1.9270421684185603e-05,
"loss": 0.5486,
"step": 354
},
{
"epoch": 0.42,
"learning_rate": 1.9260942091111836e-05,
"loss": 0.5429,
"step": 356
},
{
"epoch": 0.43,
"learning_rate": 1.9251403671929738e-05,
"loss": 0.4597,
"step": 358
},
{
"epoch": 0.43,
"learning_rate": 1.9241806487227967e-05,
"loss": 0.4721,
"step": 360
},
{
"epoch": 0.43,
"learning_rate": 1.923215059796847e-05,
"loss": 0.4689,
"step": 362
},
{
"epoch": 0.43,
"learning_rate": 1.922243606548609e-05,
"loss": 0.4716,
"step": 364
},
{
"epoch": 0.44,
"learning_rate": 1.9212662951488162e-05,
"loss": 0.4993,
"step": 366
},
{
"epoch": 0.44,
"learning_rate": 1.9202831318054153e-05,
"loss": 0.4557,
"step": 368
},
{
"epoch": 0.44,
"learning_rate": 1.9192941227635232e-05,
"loss": 0.4701,
"step": 370
},
{
"epoch": 0.44,
"learning_rate": 1.91829927430539e-05,
"loss": 0.4032,
"step": 372
},
{
"epoch": 0.44,
"learning_rate": 1.9172985927503584e-05,
"loss": 0.4743,
"step": 374
},
{
"epoch": 0.45,
"learning_rate": 1.9162920844548227e-05,
"loss": 0.4491,
"step": 376
},
{
"epoch": 0.45,
"learning_rate": 1.9152797558121894e-05,
"loss": 0.429,
"step": 378
},
{
"epoch": 0.45,
"learning_rate": 1.9142616132528356e-05,
"loss": 0.4707,
"step": 380
},
{
"epoch": 0.45,
"learning_rate": 1.91323766324407e-05,
"loss": 0.4371,
"step": 382
},
{
"epoch": 0.46,
"learning_rate": 1.912207912290089e-05,
"loss": 0.5172,
"step": 384
},
{
"epoch": 0.46,
"learning_rate": 1.9111723669319385e-05,
"loss": 0.4482,
"step": 386
},
{
"epoch": 0.46,
"learning_rate": 1.91013103374747e-05,
"loss": 0.4701,
"step": 388
},
{
"epoch": 0.46,
"learning_rate": 1.9090839193513e-05,
"loss": 0.4737,
"step": 390
},
{
"epoch": 0.47,
"learning_rate": 1.9080310303947668e-05,
"loss": 0.4922,
"step": 392
},
{
"epoch": 0.47,
"learning_rate": 1.9069723735658903e-05,
"loss": 0.4081,
"step": 394
},
{
"epoch": 0.47,
"learning_rate": 1.9059079555893277e-05,
"loss": 0.486,
"step": 396
},
{
"epoch": 0.47,
"learning_rate": 1.9048377832263314e-05,
"loss": 0.4674,
"step": 398
},
{
"epoch": 0.48,
"learning_rate": 1.903761863274706e-05,
"loss": 0.4528,
"step": 400
},
{
"epoch": 0.48,
"learning_rate": 1.902680202568765e-05,
"loss": 0.4716,
"step": 402
},
{
"epoch": 0.48,
"learning_rate": 1.9015928079792884e-05,
"loss": 0.5213,
"step": 404
},
{
"epoch": 0.48,
"learning_rate": 1.9004996864134767e-05,
"loss": 0.4836,
"step": 406
},
{
"epoch": 0.49,
"learning_rate": 1.8994008448149103e-05,
"loss": 0.5513,
"step": 408
},
{
"epoch": 0.49,
"learning_rate": 1.8982962901635022e-05,
"loss": 0.4634,
"step": 410
},
{
"epoch": 0.49,
"learning_rate": 1.8971860294754554e-05,
"loss": 0.5111,
"step": 412
},
{
"epoch": 0.49,
"learning_rate": 1.8960700698032194e-05,
"loss": 0.4474,
"step": 414
},
{
"epoch": 0.49,
"learning_rate": 1.894948418235441e-05,
"loss": 0.4785,
"step": 416
},
{
"epoch": 0.5,
"learning_rate": 1.8938210818969257e-05,
"loss": 0.4977,
"step": 418
},
{
"epoch": 0.5,
"learning_rate": 1.8926880679485865e-05,
"loss": 0.4346,
"step": 420
},
{
"epoch": 0.5,
"learning_rate": 1.8915493835874026e-05,
"loss": 0.4139,
"step": 422
},
{
"epoch": 0.5,
"learning_rate": 1.8904050360463708e-05,
"loss": 0.4975,
"step": 424
},
{
"epoch": 0.51,
"learning_rate": 1.8892550325944617e-05,
"loss": 0.4766,
"step": 426
},
{
"epoch": 0.51,
"learning_rate": 1.888099380536572e-05,
"loss": 0.484,
"step": 428
},
{
"epoch": 0.51,
"learning_rate": 1.886938087213479e-05,
"loss": 0.4301,
"step": 430
},
{
"epoch": 0.51,
"learning_rate": 1.885771160001794e-05,
"loss": 0.4474,
"step": 432
},
{
"epoch": 0.52,
"learning_rate": 1.8845986063139144e-05,
"loss": 0.4445,
"step": 434
},
{
"epoch": 0.52,
"learning_rate": 1.8834204335979777e-05,
"loss": 0.4422,
"step": 436
},
{
"epoch": 0.52,
"learning_rate": 1.8822366493378143e-05,
"loss": 0.5337,
"step": 438
},
{
"epoch": 0.52,
"learning_rate": 1.8810472610528987e-05,
"loss": 0.4704,
"step": 440
},
{
"epoch": 0.53,
"learning_rate": 1.8798522762983026e-05,
"loss": 0.458,
"step": 442
},
{
"epoch": 0.53,
"learning_rate": 1.8786517026646474e-05,
"loss": 0.3866,
"step": 444
},
{
"epoch": 0.53,
"learning_rate": 1.8774455477780557e-05,
"loss": 0.4939,
"step": 446
},
{
"epoch": 0.53,
"learning_rate": 1.8762338193001013e-05,
"loss": 0.496,
"step": 448
},
{
"epoch": 0.54,
"learning_rate": 1.8750165249277625e-05,
"loss": 0.4171,
"step": 450
},
{
"epoch": 0.54,
"learning_rate": 1.873793672393373e-05,
"loss": 0.4582,
"step": 452
},
{
"epoch": 0.54,
"learning_rate": 1.8725652694645714e-05,
"loss": 0.4762,
"step": 454
},
{
"epoch": 0.54,
"learning_rate": 1.871331323944254e-05,
"loss": 0.3859,
"step": 456
},
{
"epoch": 0.54,
"learning_rate": 1.8700918436705226e-05,
"loss": 0.4165,
"step": 458
},
{
"epoch": 0.55,
"learning_rate": 1.868846836516637e-05,
"loss": 0.3933,
"step": 460
},
{
"epoch": 0.55,
"learning_rate": 1.8675963103909636e-05,
"loss": 0.4746,
"step": 462
},
{
"epoch": 0.55,
"learning_rate": 1.866340273236926e-05,
"loss": 0.4893,
"step": 464
},
{
"epoch": 0.55,
"learning_rate": 1.8650787330329546e-05,
"loss": 0.516,
"step": 466
},
{
"epoch": 0.56,
"learning_rate": 1.8638116977924346e-05,
"loss": 0.4391,
"step": 468
},
{
"epoch": 0.56,
"learning_rate": 1.862539175563657e-05,
"loss": 0.3998,
"step": 470
},
{
"epoch": 0.56,
"learning_rate": 1.861261174429765e-05,
"loss": 0.4525,
"step": 472
},
{
"epoch": 0.56,
"learning_rate": 1.8599777025087068e-05,
"loss": 0.4023,
"step": 474
},
{
"epoch": 0.57,
"learning_rate": 1.858688767953178e-05,
"loss": 0.397,
"step": 476
},
{
"epoch": 0.57,
"learning_rate": 1.8573943789505762e-05,
"loss": 0.4845,
"step": 478
},
{
"epoch": 0.57,
"learning_rate": 1.8560945437229443e-05,
"loss": 0.4518,
"step": 480
},
{
"epoch": 0.57,
"learning_rate": 1.8547892705269207e-05,
"loss": 0.5362,
"step": 482
},
{
"epoch": 0.58,
"learning_rate": 1.8534785676536856e-05,
"loss": 0.4601,
"step": 484
},
{
"epoch": 0.58,
"learning_rate": 1.8521624434289094e-05,
"loss": 0.4814,
"step": 486
},
{
"epoch": 0.58,
"learning_rate": 1.850840906212699e-05,
"loss": 0.4707,
"step": 488
},
{
"epoch": 0.58,
"learning_rate": 1.849513964399545e-05,
"loss": 0.4144,
"step": 490
},
{
"epoch": 0.59,
"learning_rate": 1.8481816264182678e-05,
"loss": 0.434,
"step": 492
},
{
"epoch": 0.59,
"learning_rate": 1.8468439007319663e-05,
"loss": 0.4782,
"step": 494
},
{
"epoch": 0.59,
"learning_rate": 1.8455007958379604e-05,
"loss": 0.3848,
"step": 496
},
{
"epoch": 0.59,
"learning_rate": 1.8441523202677406e-05,
"loss": 0.4541,
"step": 498
},
{
"epoch": 0.59,
"learning_rate": 1.8427984825869114e-05,
"loss": 0.4708,
"step": 500
},
{
"epoch": 0.6,
"learning_rate": 1.8414392913951382e-05,
"loss": 0.5103,
"step": 502
},
{
"epoch": 0.6,
"learning_rate": 1.8400747553260915e-05,
"loss": 0.4201,
"step": 504
},
{
"epoch": 0.6,
"learning_rate": 1.8387048830473948e-05,
"loss": 0.4586,
"step": 506
},
{
"epoch": 0.6,
"learning_rate": 1.8373296832605647e-05,
"loss": 0.4667,
"step": 508
},
{
"epoch": 0.61,
"learning_rate": 1.8359491647009608e-05,
"loss": 0.4846,
"step": 510
},
{
"epoch": 0.61,
"learning_rate": 1.834563336137727e-05,
"loss": 0.5255,
"step": 512
},
{
"epoch": 0.61,
"learning_rate": 1.8331722063737365e-05,
"loss": 0.482,
"step": 514
},
{
"epoch": 0.61,
"learning_rate": 1.8317757842455363e-05,
"loss": 0.4211,
"step": 516
},
{
"epoch": 0.62,
"learning_rate": 1.830374078623291e-05,
"loss": 0.4852,
"step": 518
},
{
"epoch": 0.62,
"learning_rate": 1.8289670984107263e-05,
"loss": 0.4299,
"step": 520
},
{
"epoch": 0.62,
"learning_rate": 1.8275548525450722e-05,
"loss": 0.5044,
"step": 522
},
{
"epoch": 0.62,
"learning_rate": 1.8261373499970064e-05,
"loss": 0.4072,
"step": 524
},
{
"epoch": 0.63,
"learning_rate": 1.8247145997705977e-05,
"loss": 0.4478,
"step": 526
},
{
"epoch": 0.63,
"learning_rate": 1.823286610903248e-05,
"loss": 0.4962,
"step": 528
},
{
"epoch": 0.63,
"learning_rate": 1.8218533924656367e-05,
"loss": 0.4658,
"step": 530
},
{
"epoch": 0.63,
"learning_rate": 1.8204149535616596e-05,
"loss": 0.4124,
"step": 532
},
{
"epoch": 0.63,
"learning_rate": 1.8189713033283755e-05,
"loss": 0.4149,
"step": 534
},
{
"epoch": 0.64,
"learning_rate": 1.817522450935944e-05,
"loss": 0.4327,
"step": 536
},
{
"epoch": 0.64,
"learning_rate": 1.8160684055875704e-05,
"loss": 0.4469,
"step": 538
},
{
"epoch": 0.64,
"learning_rate": 1.8146091765194458e-05,
"loss": 0.44,
"step": 540
},
{
"epoch": 0.64,
"learning_rate": 1.8131447730006885e-05,
"loss": 0.4911,
"step": 542
},
{
"epoch": 0.65,
"learning_rate": 1.8116752043332848e-05,
"loss": 0.4848,
"step": 544
},
{
"epoch": 0.65,
"learning_rate": 1.810200479852031e-05,
"loss": 0.4297,
"step": 546
},
{
"epoch": 0.65,
"learning_rate": 1.8087206089244728e-05,
"loss": 0.4205,
"step": 548
},
{
"epoch": 0.65,
"learning_rate": 1.8072356009508473e-05,
"loss": 0.3892,
"step": 550
},
{
"epoch": 0.66,
"learning_rate": 1.805745465364022e-05,
"loss": 0.4519,
"step": 552
},
{
"epoch": 0.66,
"learning_rate": 1.8042502116294355e-05,
"loss": 0.4376,
"step": 554
},
{
"epoch": 0.66,
"learning_rate": 1.8027498492450367e-05,
"loss": 0.4538,
"step": 556
},
{
"epoch": 0.66,
"learning_rate": 1.8012443877412253e-05,
"loss": 0.4672,
"step": 558
},
{
"epoch": 0.67,
"learning_rate": 1.799733836680791e-05,
"loss": 0.4034,
"step": 560
},
{
"epoch": 0.67,
"learning_rate": 1.7982182056588536e-05,
"loss": 0.4613,
"step": 562
},
{
"epoch": 0.67,
"learning_rate": 1.796697504302799e-05,
"loss": 0.4664,
"step": 564
},
{
"epoch": 0.67,
"learning_rate": 1.795171742272222e-05,
"loss": 0.4271,
"step": 566
},
{
"epoch": 0.68,
"learning_rate": 1.7936409292588627e-05,
"loss": 0.4741,
"step": 568
},
{
"epoch": 0.68,
"learning_rate": 1.792105074986545e-05,
"loss": 0.3175,
"step": 570
},
{
"epoch": 0.68,
"learning_rate": 1.7905641892111152e-05,
"loss": 0.4154,
"step": 572
},
{
"epoch": 0.68,
"learning_rate": 1.7890182817203806e-05,
"loss": 0.4558,
"step": 574
},
{
"epoch": 0.68,
"learning_rate": 1.7874673623340463e-05,
"loss": 0.465,
"step": 576
},
{
"epoch": 0.69,
"learning_rate": 1.785911440903653e-05,
"loss": 0.4688,
"step": 578
},
{
"epoch": 0.69,
"learning_rate": 1.7843505273125164e-05,
"loss": 0.4411,
"step": 580
},
{
"epoch": 0.69,
"learning_rate": 1.7827846314756604e-05,
"loss": 0.4286,
"step": 582
},
{
"epoch": 0.69,
"learning_rate": 1.7812137633397577e-05,
"loss": 0.4425,
"step": 584
},
{
"epoch": 0.7,
"learning_rate": 1.7796379328830652e-05,
"loss": 0.4126,
"step": 586
},
{
"epoch": 0.7,
"learning_rate": 1.778057150115361e-05,
"loss": 0.479,
"step": 588
},
{
"epoch": 0.7,
"learning_rate": 1.77647142507788e-05,
"loss": 0.3994,
"step": 590
},
{
"epoch": 0.7,
"learning_rate": 1.7748807678432514e-05,
"loss": 0.4574,
"step": 592
},
{
"epoch": 0.71,
"learning_rate": 1.7732851885154336e-05,
"loss": 0.3901,
"step": 594
},
{
"epoch": 0.71,
"learning_rate": 1.7716846972296505e-05,
"loss": 0.435,
"step": 596
},
{
"epoch": 0.71,
"learning_rate": 1.7700793041523272e-05,
"loss": 0.4337,
"step": 598
},
{
"epoch": 0.71,
"learning_rate": 1.7684690194810256e-05,
"loss": 0.4196,
"step": 600
},
{
"epoch": 0.72,
"learning_rate": 1.7668538534443782e-05,
"loss": 0.4508,
"step": 602
},
{
"epoch": 0.72,
"learning_rate": 1.7652338163020257e-05,
"loss": 0.4583,
"step": 604
},
{
"epoch": 0.72,
"learning_rate": 1.76360891834455e-05,
"loss": 0.4499,
"step": 606
},
{
"epoch": 0.72,
"learning_rate": 1.7619791698934077e-05,
"loss": 0.4263,
"step": 608
},
{
"epoch": 0.73,
"learning_rate": 1.7603445813008685e-05,
"loss": 0.3721,
"step": 610
},
{
"epoch": 0.73,
"learning_rate": 1.7587051629499452e-05,
"loss": 0.3788,
"step": 612
},
{
"epoch": 0.73,
"learning_rate": 1.7570609252543302e-05,
"loss": 0.4405,
"step": 614
},
{
"epoch": 0.73,
"learning_rate": 1.755411878658329e-05,
"loss": 0.4939,
"step": 616
},
{
"epoch": 0.73,
"learning_rate": 1.7537580336367925e-05,
"loss": 0.4188,
"step": 618
},
{
"epoch": 0.74,
"learning_rate": 1.7520994006950526e-05,
"loss": 0.4425,
"step": 620
},
{
"epoch": 0.74,
"learning_rate": 1.7504359903688537e-05,
"loss": 0.37,
"step": 622
},
{
"epoch": 0.74,
"learning_rate": 1.748767813224287e-05,
"loss": 0.4633,
"step": 624
},
{
"epoch": 0.74,
"learning_rate": 1.747094879857722e-05,
"loss": 0.3729,
"step": 626
},
{
"epoch": 0.75,
"learning_rate": 1.7454172008957417e-05,
"loss": 0.4312,
"step": 628
},
{
"epoch": 0.75,
"learning_rate": 1.7437347869950713e-05,
"loss": 0.4367,
"step": 630
},
{
"epoch": 0.75,
"learning_rate": 1.7420476488425138e-05,
"loss": 0.4091,
"step": 632
},
{
"epoch": 0.75,
"learning_rate": 1.740355797154881e-05,
"loss": 0.469,
"step": 634
},
{
"epoch": 0.76,
"learning_rate": 1.7386592426789252e-05,
"loss": 0.4872,
"step": 636
},
{
"epoch": 0.76,
"learning_rate": 1.7369579961912712e-05,
"loss": 0.4932,
"step": 638
},
{
"epoch": 0.76,
"learning_rate": 1.7352520684983474e-05,
"loss": 0.3848,
"step": 640
},
{
"epoch": 0.76,
"learning_rate": 1.7335414704363178e-05,
"loss": 0.3694,
"step": 642
},
{
"epoch": 0.77,
"learning_rate": 1.7318262128710132e-05,
"loss": 0.5099,
"step": 644
},
{
"epoch": 0.77,
"learning_rate": 1.7301063066978617e-05,
"loss": 0.4407,
"step": 646
},
{
"epoch": 0.77,
"learning_rate": 1.728381762841819e-05,
"loss": 0.4409,
"step": 648
},
{
"epoch": 0.77,
"learning_rate": 1.7266525922573e-05,
"loss": 0.4444,
"step": 650
},
{
"epoch": 0.78,
"learning_rate": 1.72491880592811e-05,
"loss": 0.4079,
"step": 652
},
{
"epoch": 0.78,
"learning_rate": 1.7231804148673717e-05,
"loss": 0.4502,
"step": 654
},
{
"epoch": 0.78,
"learning_rate": 1.7214374301174594e-05,
"loss": 0.49,
"step": 656
},
{
"epoch": 0.78,
"learning_rate": 1.719689862749926e-05,
"loss": 0.4778,
"step": 658
},
{
"epoch": 0.78,
"learning_rate": 1.7179377238654325e-05,
"loss": 0.3734,
"step": 660
},
{
"epoch": 0.79,
"learning_rate": 1.716181024593681e-05,
"loss": 0.4956,
"step": 662
},
{
"epoch": 0.79,
"learning_rate": 1.714419776093338e-05,
"loss": 0.3712,
"step": 664
},
{
"epoch": 0.79,
"learning_rate": 1.7126539895519698e-05,
"loss": 0.3779,
"step": 666
},
{
"epoch": 0.79,
"learning_rate": 1.710883676185968e-05,
"loss": 0.4457,
"step": 668
},
{
"epoch": 0.8,
"learning_rate": 1.709108847240478e-05,
"loss": 0.4161,
"step": 670
},
{
"epoch": 0.8,
"learning_rate": 1.7073295139893296e-05,
"loss": 0.4459,
"step": 672
},
{
"epoch": 0.8,
"learning_rate": 1.705545687734963e-05,
"loss": 0.4465,
"step": 674
},
{
"epoch": 0.8,
"learning_rate": 1.7037573798083598e-05,
"loss": 0.4284,
"step": 676
},
{
"epoch": 0.81,
"learning_rate": 1.701964601568968e-05,
"loss": 0.3806,
"step": 678
},
{
"epoch": 0.81,
"learning_rate": 1.7001673644046322e-05,
"loss": 0.4591,
"step": 680
},
{
"epoch": 0.81,
"learning_rate": 1.6983656797315197e-05,
"loss": 0.4809,
"step": 682
},
{
"epoch": 0.81,
"learning_rate": 1.6965595589940496e-05,
"loss": 0.3811,
"step": 684
},
{
"epoch": 0.82,
"learning_rate": 1.6947490136648182e-05,
"loss": 0.4223,
"step": 686
},
{
"epoch": 0.82,
"learning_rate": 1.6929340552445283e-05,
"loss": 0.4698,
"step": 688
},
{
"epoch": 0.82,
"learning_rate": 1.6911146952619132e-05,
"loss": 0.4059,
"step": 690
},
{
"epoch": 0.82,
"learning_rate": 1.689290945273667e-05,
"loss": 0.4451,
"step": 692
},
{
"epoch": 0.83,
"learning_rate": 1.6874628168643683e-05,
"loss": 0.3428,
"step": 694
},
{
"epoch": 0.83,
"learning_rate": 1.685630321646408e-05,
"loss": 0.4552,
"step": 696
},
{
"epoch": 0.83,
"learning_rate": 1.683793471259915e-05,
"loss": 0.6389,
"step": 698
},
{
"epoch": 0.83,
"learning_rate": 1.681952277372683e-05,
"loss": 0.4356,
"step": 700
},
{
"epoch": 0.83,
"learning_rate": 1.680106751680096e-05,
"loss": 0.4199,
"step": 702
},
{
"epoch": 0.84,
"learning_rate": 1.6782569059050535e-05,
"loss": 0.4369,
"step": 704
},
{
"epoch": 0.84,
"learning_rate": 1.676402751797896e-05,
"loss": 0.3924,
"step": 706
},
{
"epoch": 0.84,
"learning_rate": 1.674544301136332e-05,
"loss": 0.386,
"step": 708
},
{
"epoch": 0.84,
"learning_rate": 1.672681565725361e-05,
"loss": 0.464,
"step": 710
},
{
"epoch": 0.85,
"learning_rate": 1.6708145573972005e-05,
"loss": 0.4597,
"step": 712
},
{
"epoch": 0.85,
"learning_rate": 1.6689432880112078e-05,
"loss": 0.4164,
"step": 714
},
{
"epoch": 0.85,
"learning_rate": 1.6670677694538096e-05,
"loss": 0.3761,
"step": 716
},
{
"epoch": 0.85,
"learning_rate": 1.6651880136384215e-05,
"loss": 0.4499,
"step": 718
},
{
"epoch": 0.86,
"learning_rate": 1.6633040325053746e-05,
"loss": 0.438,
"step": 720
},
{
"epoch": 0.86,
"learning_rate": 1.661415838021841e-05,
"loss": 0.4526,
"step": 722
},
{
"epoch": 0.86,
"learning_rate": 1.659523442181754e-05,
"loss": 0.4427,
"step": 724
},
{
"epoch": 0.86,
"learning_rate": 1.6576268570057363e-05,
"loss": 0.5268,
"step": 726
},
{
"epoch": 0.87,
"learning_rate": 1.655726094541021e-05,
"loss": 0.4135,
"step": 728
},
{
"epoch": 0.87,
"learning_rate": 1.653821166861374e-05,
"loss": 0.42,
"step": 730
},
{
"epoch": 0.87,
"learning_rate": 1.6519120860670215e-05,
"loss": 0.449,
"step": 732
},
{
"epoch": 0.87,
"learning_rate": 1.6499988642845686e-05,
"loss": 0.4751,
"step": 734
},
{
"epoch": 0.88,
"learning_rate": 1.6480815136669248e-05,
"loss": 0.3826,
"step": 736
},
{
"epoch": 0.88,
"learning_rate": 1.6461600463932266e-05,
"loss": 0.4712,
"step": 738
},
{
"epoch": 0.88,
"learning_rate": 1.6442344746687594e-05,
"loss": 0.4128,
"step": 740
},
{
"epoch": 0.88,
"learning_rate": 1.64230481072488e-05,
"loss": 0.4679,
"step": 742
},
{
"epoch": 0.88,
"learning_rate": 1.640371066818941e-05,
"loss": 0.4768,
"step": 744
},
{
"epoch": 0.89,
"learning_rate": 1.638433255234208e-05,
"loss": 0.4785,
"step": 746
},
{
"epoch": 0.89,
"learning_rate": 1.6364913882797875e-05,
"loss": 0.4334,
"step": 748
},
{
"epoch": 0.89,
"learning_rate": 1.6345454782905454e-05,
"loss": 0.4015,
"step": 750
},
{
"epoch": 0.89,
"learning_rate": 1.6325955376270286e-05,
"loss": 0.439,
"step": 752
},
{
"epoch": 0.9,
"learning_rate": 1.630641578675387e-05,
"loss": 0.3951,
"step": 754
},
{
"epoch": 0.9,
"learning_rate": 1.6296630962191733e-05,
"loss": 0.5453,
"step": 756
},
{
"epoch": 0.9,
"learning_rate": 1.62770313311519e-05,
"loss": 0.3902,
"step": 758
},
{
"epoch": 0.9,
"learning_rate": 1.625739182799955e-05,
"loss": 0.3943,
"step": 760
},
{
"epoch": 0.91,
"learning_rate": 1.6237712577486092e-05,
"loss": 0.3312,
"step": 762
},
{
"epoch": 0.91,
"learning_rate": 1.62179937046154e-05,
"loss": 0.4366,
"step": 764
},
{
"epoch": 0.91,
"learning_rate": 1.6198235334643045e-05,
"loss": 0.3924,
"step": 766
},
{
"epoch": 0.91,
"learning_rate": 1.6178437593075487e-05,
"loss": 0.378,
"step": 768
},
{
"epoch": 0.92,
"learning_rate": 1.6158600605669264e-05,
"loss": 0.4624,
"step": 770
},
{
"epoch": 0.92,
"learning_rate": 1.613872449843022e-05,
"loss": 0.4411,
"step": 772
},
{
"epoch": 0.92,
"learning_rate": 1.6118809397612678e-05,
"loss": 0.4695,
"step": 774
},
{
"epoch": 0.92,
"learning_rate": 1.6098855429718662e-05,
"loss": 0.4348,
"step": 776
},
{
"epoch": 0.93,
"learning_rate": 1.607886272149708e-05,
"loss": 0.4048,
"step": 778
},
{
"epoch": 0.93,
"learning_rate": 1.6058831399942917e-05,
"loss": 0.3485,
"step": 780
},
{
"epoch": 0.93,
"learning_rate": 1.6038761592296435e-05,
"loss": 0.4146,
"step": 782
},
{
"epoch": 0.93,
"learning_rate": 1.6018653426042357e-05,
"loss": 0.4398,
"step": 784
},
{
"epoch": 0.93,
"learning_rate": 1.5998507028909074e-05,
"loss": 0.5815,
"step": 786
},
{
"epoch": 0.94,
"learning_rate": 1.597832252886781e-05,
"loss": 0.4502,
"step": 788
},
{
"epoch": 0.94,
"learning_rate": 1.5958100054131828e-05,
"loss": 0.4275,
"step": 790
},
{
"epoch": 0.94,
"learning_rate": 1.5937839733155603e-05,
"loss": 0.4269,
"step": 792
},
{
"epoch": 0.94,
"learning_rate": 1.591754169463402e-05,
"loss": 0.4211,
"step": 794
},
{
"epoch": 0.95,
"learning_rate": 1.5897206067501544e-05,
"loss": 0.4194,
"step": 796
},
{
"epoch": 0.95,
"learning_rate": 1.5876832980931405e-05,
"loss": 0.3833,
"step": 798
},
{
"epoch": 0.95,
"learning_rate": 1.5856422564334772e-05,
"loss": 0.4176,
"step": 800
},
{
"epoch": 0.95,
"learning_rate": 1.5835974947359952e-05,
"loss": 0.5327,
"step": 802
},
{
"epoch": 0.96,
"learning_rate": 1.581549025989154e-05,
"loss": 0.4776,
"step": 804
},
{
"epoch": 0.96,
"learning_rate": 1.5794968632049598e-05,
"loss": 0.3573,
"step": 806
},
{
"epoch": 0.96,
"learning_rate": 1.5774410194188856e-05,
"loss": 0.464,
"step": 808
},
{
"epoch": 0.96,
"learning_rate": 1.5753815076897848e-05,
"loss": 0.4549,
"step": 810
},
{
"epoch": 0.97,
"learning_rate": 1.57331834109981e-05,
"loss": 0.463,
"step": 812
},
{
"epoch": 0.97,
"learning_rate": 1.5712515327543307e-05,
"loss": 0.4438,
"step": 814
},
{
"epoch": 0.97,
"learning_rate": 1.5691810957818475e-05,
"loss": 0.4306,
"step": 816
},
{
"epoch": 0.97,
"learning_rate": 1.5671070433339116e-05,
"loss": 0.4135,
"step": 818
},
{
"epoch": 0.98,
"learning_rate": 1.5650293885850393e-05,
"loss": 0.4706,
"step": 820
},
{
"epoch": 0.98,
"learning_rate": 1.5629481447326297e-05,
"loss": 0.4427,
"step": 822
},
{
"epoch": 0.98,
"learning_rate": 1.5608633249968783e-05,
"loss": 0.4661,
"step": 824
},
{
"epoch": 0.98,
"learning_rate": 1.558774942620697e-05,
"loss": 0.3674,
"step": 826
},
{
"epoch": 0.98,
"learning_rate": 1.5566830108696265e-05,
"loss": 0.4204,
"step": 828
},
{
"epoch": 0.99,
"learning_rate": 1.5545875430317546e-05,
"loss": 0.4685,
"step": 830
},
{
"epoch": 0.99,
"learning_rate": 1.5524885524176287e-05,
"loss": 0.4583,
"step": 832
},
{
"epoch": 0.99,
"learning_rate": 1.550386052360174e-05,
"loss": 0.4306,
"step": 834
},
{
"epoch": 0.99,
"learning_rate": 1.548280056214609e-05,
"loss": 0.4203,
"step": 836
},
{
"epoch": 1.0,
"learning_rate": 1.546170577358358e-05,
"loss": 0.4664,
"step": 838
},
{
"epoch": 1.0,
"learning_rate": 1.544057629190969e-05,
"loss": 0.3553,
"step": 840
},
{
"epoch": 1.0,
"learning_rate": 1.541941225134025e-05,
"loss": 0.368,
"step": 842
},
{
"epoch": 1.0,
"learning_rate": 1.5398213786310643e-05,
"loss": 0.3176,
"step": 844
},
{
"epoch": 1.01,
"learning_rate": 1.537698103147489e-05,
"loss": 0.2801,
"step": 846
},
{
"epoch": 1.01,
"learning_rate": 1.5355714121704846e-05,
"loss": 0.2576,
"step": 848
},
{
"epoch": 1.01,
"learning_rate": 1.53344131920893e-05,
"loss": 0.2689,
"step": 850
},
{
"epoch": 1.01,
"learning_rate": 1.531307837793315e-05,
"loss": 0.3045,
"step": 852
},
{
"epoch": 1.02,
"learning_rate": 1.529170981475653e-05,
"loss": 0.2506,
"step": 854
},
{
"epoch": 1.02,
"learning_rate": 1.5270307638293943e-05,
"loss": 0.2546,
"step": 856
},
{
"epoch": 1.02,
"learning_rate": 1.524887198449341e-05,
"loss": 0.2853,
"step": 858
},
{
"epoch": 1.02,
"learning_rate": 1.5227402989515607e-05,
"loss": 0.2772,
"step": 860
},
{
"epoch": 1.02,
"learning_rate": 1.5205900789732986e-05,
"loss": 0.2763,
"step": 862
},
{
"epoch": 1.03,
"learning_rate": 1.5184365521728928e-05,
"loss": 0.2578,
"step": 864
},
{
"epoch": 1.03,
"learning_rate": 1.5162797322296855e-05,
"loss": 0.3121,
"step": 866
},
{
"epoch": 1.03,
"learning_rate": 1.5141196328439377e-05,
"loss": 0.3037,
"step": 868
},
{
"epoch": 1.03,
"learning_rate": 1.5119562677367421e-05,
"loss": 0.2877,
"step": 870
},
{
"epoch": 1.04,
"learning_rate": 1.5097896506499349e-05,
"loss": 0.2856,
"step": 872
},
{
"epoch": 1.04,
"learning_rate": 1.5076197953460087e-05,
"loss": 0.3417,
"step": 874
},
{
"epoch": 1.04,
"learning_rate": 1.5054467156080262e-05,
"loss": 0.285,
"step": 876
},
{
"epoch": 1.04,
"learning_rate": 1.5032704252395315e-05,
"loss": 0.3137,
"step": 878
},
{
"epoch": 1.05,
"learning_rate": 1.5010909380644636e-05,
"loss": 0.2204,
"step": 880
},
{
"epoch": 1.05,
"learning_rate": 1.4989082679270668e-05,
"loss": 0.2808,
"step": 882
},
{
"epoch": 1.05,
"learning_rate": 1.496722428691804e-05,
"loss": 0.2691,
"step": 884
},
{
"epoch": 1.05,
"learning_rate": 1.4945334342432688e-05,
"loss": 0.2638,
"step": 886
},
{
"epoch": 1.06,
"learning_rate": 1.492341298486097e-05,
"loss": 0.26,
"step": 888
},
{
"epoch": 1.06,
"learning_rate": 1.490146035344878e-05,
"loss": 0.2764,
"step": 890
},
{
"epoch": 1.06,
"learning_rate": 1.4879476587640657e-05,
"loss": 0.2558,
"step": 892
},
{
"epoch": 1.06,
"learning_rate": 1.4868473072968645e-05,
"loss": 0.5349,
"step": 894
},
{
"epoch": 1.07,
"learning_rate": 1.4846442867457533e-05,
"loss": 0.2937,
"step": 896
},
{
"epoch": 1.07,
"learning_rate": 1.4824381877025154e-05,
"loss": 0.2684,
"step": 898
},
{
"epoch": 1.07,
"learning_rate": 1.4802290241804355e-05,
"loss": 0.2491,
"step": 900
},
{
"epoch": 1.07,
"learning_rate": 1.478016810212265e-05,
"loss": 0.2634,
"step": 902
},
{
"epoch": 1.07,
"learning_rate": 1.4758015598501308e-05,
"loss": 0.2889,
"step": 904
},
{
"epoch": 1.08,
"learning_rate": 1.473583287165448e-05,
"loss": 0.2843,
"step": 906
},
{
"epoch": 1.08,
"learning_rate": 1.4713620062488296e-05,
"loss": 0.2705,
"step": 908
},
{
"epoch": 1.08,
"learning_rate": 1.4691377312099965e-05,
"loss": 0.2765,
"step": 910
},
{
"epoch": 1.08,
"learning_rate": 1.4669104761776892e-05,
"loss": 0.2595,
"step": 912
},
{
"epoch": 1.09,
"learning_rate": 1.4646802552995767e-05,
"loss": 0.2101,
"step": 914
},
{
"epoch": 1.09,
"learning_rate": 1.4624470827421675e-05,
"loss": 0.263,
"step": 916
},
{
"epoch": 1.09,
"learning_rate": 1.4602109726907197e-05,
"loss": 0.2592,
"step": 918
},
{
"epoch": 1.09,
"learning_rate": 1.4579719393491496e-05,
"loss": 0.2732,
"step": 920
},
{
"epoch": 1.1,
"learning_rate": 1.455729996939944e-05,
"loss": 0.3056,
"step": 922
},
{
"epoch": 1.1,
"learning_rate": 1.4534851597040666e-05,
"loss": 0.2886,
"step": 924
},
{
"epoch": 1.1,
"learning_rate": 1.45123744190087e-05,
"loss": 0.2493,
"step": 926
},
{
"epoch": 1.1,
"learning_rate": 1.4489868578080046e-05,
"loss": 0.271,
"step": 928
},
{
"epoch": 1.11,
"learning_rate": 1.4467334217213274e-05,
"loss": 0.2752,
"step": 930
},
{
"epoch": 1.11,
"learning_rate": 1.4444771479548115e-05,
"loss": 0.3108,
"step": 932
},
{
"epoch": 1.11,
"learning_rate": 1.4422180508404544e-05,
"loss": 0.2946,
"step": 934
},
{
"epoch": 1.11,
"learning_rate": 1.439956144728189e-05,
"loss": 0.2401,
"step": 936
},
{
"epoch": 1.12,
"learning_rate": 1.4376914439857905e-05,
"loss": 0.3501,
"step": 938
},
{
"epoch": 1.12,
"learning_rate": 1.4354239629987857e-05,
"loss": 0.2895,
"step": 940
},
{
"epoch": 1.12,
"learning_rate": 1.4331537161703612e-05,
"loss": 0.2632,
"step": 942
},
{
"epoch": 1.12,
"learning_rate": 1.4308807179212736e-05,
"loss": 0.261,
"step": 944
},
{
"epoch": 1.12,
"learning_rate": 1.4286049826897559e-05,
"loss": 0.3207,
"step": 946
},
{
"epoch": 1.13,
"learning_rate": 1.4263265249314269e-05,
"loss": 0.2592,
"step": 948
},
{
"epoch": 1.13,
"learning_rate": 1.4240453591191984e-05,
"loss": 0.2468,
"step": 950
},
{
"epoch": 1.13,
"learning_rate": 1.4217614997431847e-05,
"loss": 0.2483,
"step": 952
},
{
"epoch": 1.13,
"learning_rate": 1.41947496131061e-05,
"loss": 0.2657,
"step": 954
},
{
"epoch": 1.14,
"learning_rate": 1.4171857583457154e-05,
"loss": 0.2389,
"step": 956
},
{
"epoch": 1.14,
"learning_rate": 1.4148939053896669e-05,
"loss": 0.2404,
"step": 958
},
{
"epoch": 1.14,
"learning_rate": 1.4125994170004644e-05,
"loss": 0.2539,
"step": 960
},
{
"epoch": 1.14,
"learning_rate": 1.4103023077528482e-05,
"loss": 0.2721,
"step": 962
},
{
"epoch": 1.15,
"learning_rate": 1.4080025922382056e-05,
"loss": 0.3314,
"step": 964
},
{
"epoch": 1.15,
"learning_rate": 1.4057002850644796e-05,
"loss": 0.2668,
"step": 966
},
{
"epoch": 1.15,
"learning_rate": 1.4033954008560758e-05,
"loss": 0.2295,
"step": 968
},
{
"epoch": 1.15,
"learning_rate": 1.401087954253769e-05,
"loss": 0.284,
"step": 970
},
{
"epoch": 1.16,
"learning_rate": 1.3987779599146105e-05,
"loss": 0.2595,
"step": 972
},
{
"epoch": 1.16,
"learning_rate": 1.396465432511835e-05,
"loss": 0.2849,
"step": 974
},
{
"epoch": 1.16,
"learning_rate": 1.3941503867347672e-05,
"loss": 0.271,
"step": 976
},
{
"epoch": 1.16,
"learning_rate": 1.3918328372887295e-05,
"loss": 0.2943,
"step": 978
},
{
"epoch": 1.17,
"learning_rate": 1.3895127988949471e-05,
"loss": 0.2751,
"step": 980
},
{
"epoch": 1.17,
"learning_rate": 1.3871902862904544e-05,
"loss": 0.276,
"step": 982
},
{
"epoch": 1.17,
"learning_rate": 1.3848653142280037e-05,
"loss": 0.2251,
"step": 984
},
{
"epoch": 1.17,
"learning_rate": 1.3825378974759696e-05,
"loss": 0.2722,
"step": 986
},
{
"epoch": 1.17,
"learning_rate": 1.3802080508182543e-05,
"loss": 0.2927,
"step": 988
},
{
"epoch": 1.18,
"learning_rate": 1.377875789054196e-05,
"loss": 0.2473,
"step": 990
},
{
"epoch": 1.18,
"learning_rate": 1.376708757136279e-05,
"loss": 0.3166,
"step": 992
},
{
"epoch": 1.18,
"learning_rate": 1.3743729004949972e-05,
"loss": 0.3079,
"step": 994
},
{
"epoch": 1.18,
"learning_rate": 1.3720346658126286e-05,
"loss": 0.2695,
"step": 996
},
{
"epoch": 1.19,
"learning_rate": 1.3696940679417918e-05,
"loss": 0.3125,
"step": 998
},
{
"epoch": 1.19,
"learning_rate": 1.3673511217501172e-05,
"loss": 0.2874,
"step": 1000
},
{
"epoch": 1.19,
"learning_rate": 1.3650058421201517e-05,
"loss": 0.31,
"step": 1002
},
{
"epoch": 1.19,
"learning_rate": 1.362658243949265e-05,
"loss": 0.2795,
"step": 1004
},
{
"epoch": 1.2,
"learning_rate": 1.3603083421495535e-05,
"loss": 0.2693,
"step": 1006
},
{
"epoch": 1.2,
"learning_rate": 1.3579561516477467e-05,
"loss": 0.2659,
"step": 1008
},
{
"epoch": 1.2,
"learning_rate": 1.355601687385112e-05,
"loss": 0.2909,
"step": 1010
},
{
"epoch": 1.2,
"learning_rate": 1.3532449643173604e-05,
"loss": 0.262,
"step": 1012
},
{
"epoch": 1.21,
"learning_rate": 1.3508859974145504e-05,
"loss": 0.2538,
"step": 1014
},
{
"epoch": 1.21,
"learning_rate": 1.3485248016609937e-05,
"loss": 0.2674,
"step": 1016
},
{
"epoch": 1.21,
"learning_rate": 1.3461613920551598e-05,
"loss": 0.2863,
"step": 1018
},
{
"epoch": 1.21,
"learning_rate": 1.3437957836095804e-05,
"loss": 0.3213,
"step": 1020
},
{
"epoch": 1.22,
"learning_rate": 1.3414279913507548e-05,
"loss": 0.2932,
"step": 1022
},
{
"epoch": 1.22,
"learning_rate": 1.3390580303190541e-05,
"loss": 0.2604,
"step": 1024
},
{
"epoch": 1.22,
"learning_rate": 1.3366859155686253e-05,
"loss": 0.275,
"step": 1026
},
{
"epoch": 1.22,
"learning_rate": 1.3343116621672959e-05,
"loss": 0.2625,
"step": 1028
},
{
"epoch": 1.22,
"learning_rate": 1.3319352851964787e-05,
"loss": 0.2664,
"step": 1030
},
{
"epoch": 1.23,
"learning_rate": 1.3295567997510747e-05,
"loss": 0.2567,
"step": 1032
},
{
"epoch": 1.23,
"learning_rate": 1.3271762209393793e-05,
"loss": 0.2469,
"step": 1034
},
{
"epoch": 1.23,
"learning_rate": 1.3247935638829838e-05,
"loss": 0.2596,
"step": 1036
},
{
"epoch": 1.23,
"learning_rate": 1.3224088437166818e-05,
"loss": 0.3033,
"step": 1038
},
{
"epoch": 1.24,
"learning_rate": 1.320022075588371e-05,
"loss": 0.3582,
"step": 1040
},
{
"epoch": 1.24,
"learning_rate": 1.3176332746589587e-05,
"loss": 0.2339,
"step": 1042
},
{
"epoch": 1.24,
"learning_rate": 1.3152424561022634e-05,
"loss": 0.2622,
"step": 1044
},
{
"epoch": 1.24,
"learning_rate": 1.3128496351049216e-05,
"loss": 0.2388,
"step": 1046
},
{
"epoch": 1.25,
"learning_rate": 1.3104548268662873e-05,
"loss": 0.2322,
"step": 1048
},
{
"epoch": 1.25,
"learning_rate": 1.3080580465983397e-05,
"loss": 0.3108,
"step": 1050
},
{
"epoch": 1.25,
"learning_rate": 1.3056593095255825e-05,
"loss": 0.2339,
"step": 1052
},
{
"epoch": 1.25,
"learning_rate": 1.3032586308849512e-05,
"loss": 0.2731,
"step": 1054
},
{
"epoch": 1.26,
"learning_rate": 1.3008560259257117e-05,
"loss": 0.2677,
"step": 1056
},
{
"epoch": 1.26,
"learning_rate": 1.2984515099093687e-05,
"loss": 0.2907,
"step": 1058
},
{
"epoch": 1.26,
"learning_rate": 1.2960450981095643e-05,
"loss": 0.2836,
"step": 1060
},
{
"epoch": 1.26,
"learning_rate": 1.2936368058119828e-05,
"loss": 0.2621,
"step": 1062
},
{
"epoch": 1.27,
"learning_rate": 1.2912266483142545e-05,
"loss": 0.3009,
"step": 1064
},
{
"epoch": 1.27,
"learning_rate": 1.2888146409258575e-05,
"loss": 0.252,
"step": 1066
},
{
"epoch": 1.27,
"learning_rate": 1.2864007989680194e-05,
"loss": 0.3354,
"step": 1068
},
{
"epoch": 1.27,
"learning_rate": 1.2839851377736216e-05,
"loss": 0.2908,
"step": 1070
},
{
"epoch": 1.27,
"learning_rate": 1.281567672687102e-05,
"loss": 0.284,
"step": 1072
},
{
"epoch": 1.28,
"learning_rate": 1.2791484190643571e-05,
"loss": 0.2882,
"step": 1074
},
{
"epoch": 1.28,
"learning_rate": 1.2767273922726427e-05,
"loss": 0.3096,
"step": 1076
},
{
"epoch": 1.28,
"learning_rate": 1.2743046076904795e-05,
"loss": 0.2674,
"step": 1078
},
{
"epoch": 1.28,
"learning_rate": 1.271880080707553e-05,
"loss": 0.2207,
"step": 1080
},
{
"epoch": 1.29,
"learning_rate": 1.2694538267246168e-05,
"loss": 0.2605,
"step": 1082
},
{
"epoch": 1.29,
"learning_rate": 1.2670258611533947e-05,
"loss": 0.2598,
"step": 1084
},
{
"epoch": 1.29,
"learning_rate": 1.2645961994164822e-05,
"loss": 0.2856,
"step": 1086
},
{
"epoch": 1.29,
"learning_rate": 1.2621648569472491e-05,
"loss": 0.2436,
"step": 1088
},
{
"epoch": 1.3,
"learning_rate": 1.2597318491897416e-05,
"loss": 0.2606,
"step": 1090
},
{
"epoch": 1.3,
"learning_rate": 1.257297191598584e-05,
"loss": 0.2602,
"step": 1092
},
{
"epoch": 1.3,
"learning_rate": 1.2548608996388792e-05,
"loss": 0.2465,
"step": 1094
},
{
"epoch": 1.3,
"learning_rate": 1.2524229887861132e-05,
"loss": 0.2536,
"step": 1096
},
{
"epoch": 1.31,
"learning_rate": 1.2499834745260553e-05,
"loss": 0.2859,
"step": 1098
},
{
"epoch": 1.31,
"learning_rate": 1.2475423723546584e-05,
"loss": 0.2539,
"step": 1100
},
{
"epoch": 1.31,
"learning_rate": 1.245099697777963e-05,
"loss": 0.2299,
"step": 1102
},
{
"epoch": 1.31,
"learning_rate": 1.2426554663119975e-05,
"loss": 0.4215,
"step": 1104
},
{
"epoch": 1.32,
"learning_rate": 1.2402096934826794e-05,
"loss": 0.2575,
"step": 1106
},
{
"epoch": 1.32,
"learning_rate": 1.237762394825718e-05,
"loss": 0.3257,
"step": 1108
},
{
"epoch": 1.32,
"learning_rate": 1.2353135858865128e-05,
"loss": 0.2778,
"step": 1110
},
{
"epoch": 1.32,
"learning_rate": 1.232863282220059e-05,
"loss": 0.2592,
"step": 1112
},
{
"epoch": 1.32,
"learning_rate": 1.230411499390845e-05,
"loss": 0.2539,
"step": 1114
},
{
"epoch": 1.33,
"learning_rate": 1.2279582529727552e-05,
"loss": 0.2831,
"step": 1116
},
{
"epoch": 1.33,
"learning_rate": 1.2255035585489705e-05,
"loss": 0.2806,
"step": 1118
},
{
"epoch": 1.33,
"learning_rate": 1.2230474317118708e-05,
"loss": 0.2777,
"step": 1120
},
{
"epoch": 1.33,
"learning_rate": 1.2205898880629336e-05,
"loss": 0.3334,
"step": 1122
},
{
"epoch": 1.34,
"learning_rate": 1.2181309432126366e-05,
"loss": 0.302,
"step": 1124
},
{
"epoch": 1.34,
"learning_rate": 1.2156706127803578e-05,
"loss": 0.2659,
"step": 1126
},
{
"epoch": 1.34,
"learning_rate": 1.2132089123942764e-05,
"loss": 0.297,
"step": 1128
},
{
"epoch": 1.34,
"learning_rate": 1.2107458576912743e-05,
"loss": 0.3207,
"step": 1130
},
{
"epoch": 1.35,
"learning_rate": 1.2082814643168357e-05,
"loss": 0.2224,
"step": 1132
},
{
"epoch": 1.35,
"learning_rate": 1.2058157479249475e-05,
"loss": 0.295,
"step": 1134
},
{
"epoch": 1.35,
"learning_rate": 1.2033487241780014e-05,
"loss": 0.2238,
"step": 1136
},
{
"epoch": 1.35,
"learning_rate": 1.2008804087466931e-05,
"loss": 0.277,
"step": 1138
},
{
"epoch": 1.36,
"learning_rate": 1.1984108173099238e-05,
"loss": 0.2906,
"step": 1140
},
{
"epoch": 1.36,
"learning_rate": 1.1959399655546989e-05,
"loss": 0.2649,
"step": 1142
},
{
"epoch": 1.36,
"learning_rate": 1.1934678691760296e-05,
"loss": 0.3147,
"step": 1144
},
{
"epoch": 1.36,
"learning_rate": 1.190994543876834e-05,
"loss": 0.2761,
"step": 1146
},
{
"epoch": 1.37,
"learning_rate": 1.188520005367836e-05,
"loss": 0.2252,
"step": 1148
},
{
"epoch": 1.37,
"learning_rate": 1.1860442693674648e-05,
"loss": 0.2521,
"step": 1150
},
{
"epoch": 1.37,
"learning_rate": 1.1835673516017571e-05,
"loss": 0.2618,
"step": 1152
},
{
"epoch": 1.37,
"learning_rate": 1.1810892678042565e-05,
"loss": 0.2869,
"step": 1154
},
{
"epoch": 1.37,
"learning_rate": 1.1786100337159132e-05,
"loss": 0.2124,
"step": 1156
},
{
"epoch": 1.38,
"learning_rate": 1.177369990233723e-05,
"loss": 0.3826,
"step": 1158
},
{
"epoch": 1.38,
"learning_rate": 1.1748890602393521e-05,
"loss": 0.2805,
"step": 1160
},
{
"epoch": 1.38,
"learning_rate": 1.172407019338261e-05,
"loss": 0.2467,
"step": 1162
},
{
"epoch": 1.38,
"learning_rate": 1.1699238832965358e-05,
"loss": 0.2405,
"step": 1164
},
{
"epoch": 1.39,
"learning_rate": 1.1674396678872186e-05,
"loss": 0.3017,
"step": 1166
},
{
"epoch": 1.39,
"learning_rate": 1.164954388890207e-05,
"loss": 0.2584,
"step": 1168
},
{
"epoch": 1.39,
"learning_rate": 1.162468062092156e-05,
"loss": 0.3141,
"step": 1170
},
{
"epoch": 1.39,
"learning_rate": 1.1599807032863756e-05,
"loss": 0.3254,
"step": 1172
},
{
"epoch": 1.4,
"learning_rate": 1.1574923282727314e-05,
"loss": 0.2703,
"step": 1174
},
{
"epoch": 1.4,
"learning_rate": 1.1550029528575428e-05,
"loss": 0.2207,
"step": 1176
},
{
"epoch": 1.4,
"learning_rate": 1.152512592853486e-05,
"loss": 0.2634,
"step": 1178
},
{
"epoch": 1.4,
"learning_rate": 1.1500212640794895e-05,
"loss": 0.3368,
"step": 1180
},
{
"epoch": 1.41,
"learning_rate": 1.1475289823606364e-05,
"loss": 0.2535,
"step": 1182
},
{
"epoch": 1.41,
"learning_rate": 1.1450357635280628e-05,
"loss": 0.287,
"step": 1184
},
{
"epoch": 1.41,
"learning_rate": 1.1425416234188578e-05,
"loss": 0.3052,
"step": 1186
},
{
"epoch": 1.41,
"learning_rate": 1.1400465778759611e-05,
"loss": 0.2909,
"step": 1188
},
{
"epoch": 1.41,
"learning_rate": 1.1375506427480658e-05,
"loss": 0.2904,
"step": 1190
},
{
"epoch": 1.42,
"learning_rate": 1.135053833889514e-05,
"loss": 0.339,
"step": 1192
},
{
"epoch": 1.42,
"learning_rate": 1.1325561671601987e-05,
"loss": 0.292,
"step": 1194
},
{
"epoch": 1.42,
"learning_rate": 1.1300576584254617e-05,
"loss": 0.2424,
"step": 1196
},
{
"epoch": 1.42,
"learning_rate": 1.127558323555994e-05,
"loss": 0.267,
"step": 1198
},
{
"epoch": 1.43,
"learning_rate": 1.125058178427733e-05,
"loss": 0.2641,
"step": 1200
},
{
"epoch": 1.43,
"learning_rate": 1.1225572389217643e-05,
"loss": 0.269,
"step": 1202
},
{
"epoch": 1.43,
"learning_rate": 1.1200555209242182e-05,
"loss": 0.2903,
"step": 1204
},
{
"epoch": 1.43,
"learning_rate": 1.1175530403261716e-05,
"loss": 0.2622,
"step": 1206
},
{
"epoch": 1.44,
"learning_rate": 1.1150498130235435e-05,
"loss": 0.3199,
"step": 1208
},
{
"epoch": 1.44,
"learning_rate": 1.1125458549169977e-05,
"loss": 0.2469,
"step": 1210
},
{
"epoch": 1.44,
"learning_rate": 1.1100411819118387e-05,
"loss": 0.2781,
"step": 1212
},
{
"epoch": 1.44,
"learning_rate": 1.1075358099179136e-05,
"loss": 0.3293,
"step": 1214
},
{
"epoch": 1.45,
"learning_rate": 1.1050297548495084e-05,
"loss": 0.2065,
"step": 1216
},
{
"epoch": 1.45,
"learning_rate": 1.1025230326252484e-05,
"loss": 0.2548,
"step": 1218
},
{
"epoch": 1.45,
"learning_rate": 1.1000156591679971e-05,
"loss": 0.3063,
"step": 1220
},
{
"epoch": 1.45,
"learning_rate": 1.0975076504047535e-05,
"loss": 0.3099,
"step": 1222
},
{
"epoch": 1.46,
"learning_rate": 1.0949990222665532e-05,
"loss": 0.2805,
"step": 1224
},
{
"epoch": 1.46,
"learning_rate": 1.0924897906883663e-05,
"loss": 0.288,
"step": 1226
},
{
"epoch": 1.46,
"learning_rate": 1.0899799716089949e-05,
"loss": 0.3014,
"step": 1228
},
{
"epoch": 1.46,
"learning_rate": 1.0874695809709737e-05,
"loss": 0.2768,
"step": 1230
},
{
"epoch": 1.46,
"learning_rate": 1.0849586347204677e-05,
"loss": 0.2894,
"step": 1232
},
{
"epoch": 1.47,
"learning_rate": 1.0824471488071714e-05,
"loss": 0.2718,
"step": 1234
},
{
"epoch": 1.47,
"learning_rate": 1.0799351391842074e-05,
"loss": 0.2476,
"step": 1236
},
{
"epoch": 1.47,
"learning_rate": 1.0774226218080244e-05,
"loss": 0.2318,
"step": 1238
},
{
"epoch": 1.47,
"learning_rate": 1.0749096126382965e-05,
"loss": 0.2545,
"step": 1240
},
{
"epoch": 1.48,
"learning_rate": 1.0723961276378225e-05,
"loss": 0.2708,
"step": 1242
},
{
"epoch": 1.48,
"learning_rate": 1.0698821827724225e-05,
"loss": 0.3471,
"step": 1244
},
{
"epoch": 1.48,
"learning_rate": 1.0673677940108386e-05,
"loss": 0.2528,
"step": 1246
},
{
"epoch": 1.48,
"learning_rate": 1.0648529773246324e-05,
"loss": 0.2625,
"step": 1248
},
{
"epoch": 1.49,
"learning_rate": 1.0623377486880831e-05,
"loss": 0.2634,
"step": 1250
},
{
"epoch": 1.49,
"learning_rate": 1.0598221240780874e-05,
"loss": 0.2506,
"step": 1252
},
{
"epoch": 1.49,
"learning_rate": 1.0573061194740568e-05,
"loss": 0.2659,
"step": 1254
},
{
"epoch": 1.49,
"learning_rate": 1.054789750857817e-05,
"loss": 0.239,
"step": 1256
},
{
"epoch": 1.5,
"learning_rate": 1.052273034213505e-05,
"loss": 0.2465,
"step": 1258
},
{
"epoch": 1.5,
"learning_rate": 1.0497559855274699e-05,
"loss": 0.2512,
"step": 1260
},
{
"epoch": 1.5,
"learning_rate": 1.0472386207881684e-05,
"loss": 0.303,
"step": 1262
},
{
"epoch": 1.5,
"learning_rate": 1.0447209559860658e-05,
"loss": 0.2542,
"step": 1264
},
{
"epoch": 1.51,
"learning_rate": 1.0422030071135336e-05,
"loss": 0.2995,
"step": 1266
},
{
"epoch": 1.51,
"learning_rate": 1.0396847901647469e-05,
"loss": 0.2597,
"step": 1268
},
{
"epoch": 1.51,
"learning_rate": 1.037166321135584e-05,
"loss": 0.2773,
"step": 1270
},
{
"epoch": 1.51,
"learning_rate": 1.0346476160235246e-05,
"loss": 0.2771,
"step": 1272
},
{
"epoch": 1.51,
"learning_rate": 1.0321286908275476e-05,
"loss": 0.2906,
"step": 1274
},
{
"epoch": 1.52,
"learning_rate": 1.0296095615480309e-05,
"loss": 0.3002,
"step": 1276
},
{
"epoch": 1.52,
"learning_rate": 1.0270902441866474e-05,
"loss": 0.2267,
"step": 1278
},
{
"epoch": 1.52,
"learning_rate": 1.0245707547462654e-05,
"loss": 0.2545,
"step": 1280
},
{
"epoch": 1.52,
"learning_rate": 1.0220511092308463e-05,
"loss": 0.2412,
"step": 1282
},
{
"epoch": 1.53,
"learning_rate": 1.0195313236453431e-05,
"loss": 0.2567,
"step": 1284
},
{
"epoch": 1.53,
"learning_rate": 1.0170114139955975e-05,
"loss": 0.2589,
"step": 1286
},
{
"epoch": 1.53,
"learning_rate": 1.0144913962882406e-05,
"loss": 0.2834,
"step": 1288
},
{
"epoch": 1.53,
"learning_rate": 1.0119712865305891e-05,
"loss": 0.2504,
"step": 1290
},
{
"epoch": 1.54,
"learning_rate": 1.0094511007305445e-05,
"loss": 0.2788,
"step": 1292
},
{
"epoch": 1.54,
"learning_rate": 1.0069308548964915e-05,
"loss": 0.2664,
"step": 1294
},
{
"epoch": 1.54,
"learning_rate": 1.0044105650371961e-05,
"loss": 0.2695,
"step": 1296
},
{
"epoch": 1.54,
"learning_rate": 1.0018902471617037e-05,
"loss": 0.2309,
"step": 1298
},
{
"epoch": 1.55,
"learning_rate": 9.993699172792381e-06,
"loss": 0.2949,
"step": 1300
},
{
"epoch": 1.55,
"learning_rate": 9.96849591399099e-06,
"loss": 0.3248,
"step": 1302
},
{
"epoch": 1.55,
"learning_rate": 9.943292855305611e-06,
"loss": 0.2719,
"step": 1304
},
{
"epoch": 1.55,
"learning_rate": 9.918090156827712e-06,
"loss": 0.2417,
"step": 1306
},
{
"epoch": 1.56,
"learning_rate": 9.892887978646483e-06,
"loss": 0.2371,
"step": 1308
},
{
"epoch": 1.56,
"learning_rate": 9.867686480847801e-06,
"loss": 0.271,
"step": 1310
},
{
"epoch": 1.56,
"learning_rate": 9.842485823513222e-06,
"loss": 0.2791,
"step": 1312
},
{
"epoch": 1.56,
"learning_rate": 9.817286166718971e-06,
"loss": 0.2759,
"step": 1314
},
{
"epoch": 1.56,
"learning_rate": 9.792087670534908e-06,
"loss": 0.2867,
"step": 1316
},
{
"epoch": 1.57,
"learning_rate": 9.766890495023522e-06,
"loss": 0.2964,
"step": 1318
},
{
"epoch": 1.57,
"learning_rate": 9.741694800238923e-06,
"loss": 0.2466,
"step": 1320
},
{
"epoch": 1.57,
"learning_rate": 9.716500746225802e-06,
"loss": 0.2745,
"step": 1322
},
{
"epoch": 1.57,
"learning_rate": 9.691308493018439e-06,
"loss": 0.2429,
"step": 1324
},
{
"epoch": 1.58,
"learning_rate": 9.666118200639667e-06,
"loss": 0.2561,
"step": 1326
},
{
"epoch": 1.58,
"learning_rate": 9.640930029099863e-06,
"loss": 0.2462,
"step": 1328
},
{
"epoch": 1.58,
"learning_rate": 9.615744138395941e-06,
"loss": 0.2294,
"step": 1330
},
{
"epoch": 1.58,
"learning_rate": 9.590560688510323e-06,
"loss": 0.2462,
"step": 1332
},
{
"epoch": 1.59,
"learning_rate": 9.565379839409916e-06,
"loss": 0.2755,
"step": 1334
},
{
"epoch": 1.59,
"learning_rate": 9.540201751045127e-06,
"loss": 0.2623,
"step": 1336
},
{
"epoch": 1.59,
"learning_rate": 9.515026583348811e-06,
"loss": 0.3047,
"step": 1338
},
{
"epoch": 1.59,
"learning_rate": 9.489854496235278e-06,
"loss": 0.2489,
"step": 1340
},
{
"epoch": 1.6,
"learning_rate": 9.464685649599266e-06,
"loss": 0.23,
"step": 1342
},
{
"epoch": 1.6,
"learning_rate": 9.439520203314927e-06,
"loss": 0.2517,
"step": 1344
},
{
"epoch": 1.6,
"learning_rate": 9.414358317234826e-06,
"loss": 0.3041,
"step": 1346
},
{
"epoch": 1.6,
"learning_rate": 9.3892001511889e-06,
"loss": 0.2922,
"step": 1348
},
{
"epoch": 1.61,
"learning_rate": 9.364045864983454e-06,
"loss": 0.2451,
"step": 1350
},
{
"epoch": 1.61,
"learning_rate": 9.338895618400168e-06,
"loss": 0.2457,
"step": 1352
},
{
"epoch": 1.61,
"learning_rate": 9.313749571195041e-06,
"loss": 0.2488,
"step": 1354
},
{
"epoch": 1.61,
"learning_rate": 9.28860788309741e-06,
"loss": 0.2599,
"step": 1356
},
{
"epoch": 1.61,
"learning_rate": 9.263470713808917e-06,
"loss": 0.2192,
"step": 1358
},
{
"epoch": 1.62,
"learning_rate": 9.238338223002496e-06,
"loss": 0.2296,
"step": 1360
},
{
"epoch": 1.62,
"learning_rate": 9.213210570321374e-06,
"loss": 0.2907,
"step": 1362
},
{
"epoch": 1.62,
"learning_rate": 9.188087915378037e-06,
"loss": 0.2644,
"step": 1364
},
{
"epoch": 1.62,
"learning_rate": 9.162970417753229e-06,
"loss": 0.2307,
"step": 1366
},
{
"epoch": 1.63,
"learning_rate": 9.137858236994932e-06,
"loss": 0.2493,
"step": 1368
},
{
"epoch": 1.63,
"learning_rate": 9.112751532617361e-06,
"loss": 0.2546,
"step": 1370
},
{
"epoch": 1.63,
"learning_rate": 9.087650464099937e-06,
"loss": 0.2835,
"step": 1372
},
{
"epoch": 1.63,
"learning_rate": 9.062555190886287e-06,
"loss": 0.2701,
"step": 1374
},
{
"epoch": 1.64,
"learning_rate": 9.037465872383219e-06,
"loss": 0.2514,
"step": 1376
},
{
"epoch": 1.64,
"learning_rate": 9.012382667959724e-06,
"loss": 0.2423,
"step": 1378
},
{
"epoch": 1.64,
"learning_rate": 8.987305736945955e-06,
"loss": 0.2289,
"step": 1380
},
{
"epoch": 1.64,
"learning_rate": 8.962235238632208e-06,
"loss": 0.2504,
"step": 1382
},
{
"epoch": 1.65,
"learning_rate": 8.937171332267927e-06,
"loss": 0.2912,
"step": 1384
},
{
"epoch": 1.65,
"learning_rate": 8.912114177060681e-06,
"loss": 0.2368,
"step": 1386
},
{
"epoch": 1.65,
"learning_rate": 8.887063932175156e-06,
"loss": 0.2823,
"step": 1388
},
{
"epoch": 1.65,
"learning_rate": 8.862020756732141e-06,
"loss": 0.2289,
"step": 1390
},
{
"epoch": 1.66,
"learning_rate": 8.836984809807514e-06,
"loss": 0.2332,
"step": 1392
},
{
"epoch": 1.66,
"learning_rate": 8.811956250431253e-06,
"loss": 0.2627,
"step": 1394
},
{
"epoch": 1.66,
"learning_rate": 8.786935237586394e-06,
"loss": 0.2613,
"step": 1396
},
{
"epoch": 1.66,
"learning_rate": 8.761921930208044e-06,
"loss": 0.2353,
"step": 1398
},
{
"epoch": 1.66,
"learning_rate": 8.73691648718236e-06,
"loss": 0.2668,
"step": 1400
},
{
"epoch": 1.67,
"learning_rate": 8.71191906734555e-06,
"loss": 0.2292,
"step": 1402
},
{
"epoch": 1.67,
"learning_rate": 8.686929829482862e-06,
"loss": 0.2577,
"step": 1404
},
{
"epoch": 1.67,
"learning_rate": 8.661948932327558e-06,
"loss": 0.2177,
"step": 1406
},
{
"epoch": 1.67,
"learning_rate": 8.636976534559926e-06,
"loss": 0.2849,
"step": 1408
},
{
"epoch": 1.68,
"learning_rate": 8.61201279480627e-06,
"loss": 0.2575,
"step": 1410
},
{
"epoch": 1.68,
"learning_rate": 8.587057871637891e-06,
"loss": 0.267,
"step": 1412
},
{
"epoch": 1.68,
"learning_rate": 8.562111923570091e-06,
"loss": 0.2572,
"step": 1414
},
{
"epoch": 1.68,
"learning_rate": 8.537175109061154e-06,
"loss": 0.2546,
"step": 1416
},
{
"epoch": 1.69,
"learning_rate": 8.512247586511354e-06,
"loss": 0.3069,
"step": 1418
},
{
"epoch": 1.69,
"learning_rate": 8.487329514261948e-06,
"loss": 0.2915,
"step": 1420
},
{
"epoch": 1.69,
"learning_rate": 8.46242105059415e-06,
"loss": 0.3033,
"step": 1422
},
{
"epoch": 1.69,
"learning_rate": 8.437522353728147e-06,
"loss": 0.2389,
"step": 1424
},
{
"epoch": 1.7,
"learning_rate": 8.412633581822086e-06,
"loss": 0.4257,
"step": 1426
},
{
"epoch": 1.7,
"learning_rate": 8.387754892971073e-06,
"loss": 0.2206,
"step": 1428
},
{
"epoch": 1.7,
"learning_rate": 8.36288644520616e-06,
"loss": 0.2217,
"step": 1430
},
{
"epoch": 1.7,
"learning_rate": 8.338028396493345e-06,
"loss": 0.27,
"step": 1432
},
{
"epoch": 1.71,
"learning_rate": 8.313180904732578e-06,
"loss": 0.266,
"step": 1434
},
{
"epoch": 1.71,
"learning_rate": 8.288344127756755e-06,
"loss": 0.2922,
"step": 1436
},
{
"epoch": 1.71,
"learning_rate": 8.263518223330698e-06,
"loss": 0.2122,
"step": 1438
},
{
"epoch": 1.71,
"learning_rate": 8.238703349150169e-06,
"loss": 0.2822,
"step": 1440
},
{
"epoch": 1.71,
"learning_rate": 8.213899662840871e-06,
"loss": 0.2687,
"step": 1442
},
{
"epoch": 1.72,
"learning_rate": 8.189107321957437e-06,
"loss": 0.2783,
"step": 1444
},
{
"epoch": 1.72,
"learning_rate": 8.164326483982434e-06,
"loss": 0.2499,
"step": 1446
},
{
"epoch": 1.72,
"learning_rate": 8.139557306325359e-06,
"loss": 0.2408,
"step": 1448
},
{
"epoch": 1.72,
"learning_rate": 8.114799946321647e-06,
"loss": 0.2584,
"step": 1450
},
{
"epoch": 1.73,
"learning_rate": 8.090054561231659e-06,
"loss": 0.237,
"step": 1452
},
{
"epoch": 1.73,
"learning_rate": 8.065321308239706e-06,
"loss": 0.2553,
"step": 1454
},
{
"epoch": 1.73,
"learning_rate": 8.040600344453013e-06,
"loss": 0.2224,
"step": 1456
},
{
"epoch": 1.73,
"learning_rate": 8.015891826900764e-06,
"loss": 0.3115,
"step": 1458
},
{
"epoch": 1.74,
"learning_rate": 7.99119591253307e-06,
"loss": 0.223,
"step": 1460
},
{
"epoch": 1.74,
"learning_rate": 7.966512758219991e-06,
"loss": 0.2604,
"step": 1462
},
{
"epoch": 1.74,
"learning_rate": 7.941842520750529e-06,
"loss": 0.262,
"step": 1464
},
{
"epoch": 1.74,
"learning_rate": 7.91718535683165e-06,
"loss": 0.2583,
"step": 1466
},
{
"epoch": 1.75,
"learning_rate": 7.892541423087258e-06,
"loss": 0.2318,
"step": 1468
},
{
"epoch": 1.75,
"learning_rate": 7.867910876057238e-06,
"loss": 0.2489,
"step": 1470
},
{
"epoch": 1.75,
"learning_rate": 7.843293872196425e-06,
"loss": 0.2609,
"step": 1472
},
{
"epoch": 1.75,
"learning_rate": 7.818690567873637e-06,
"loss": 0.2592,
"step": 1474
},
{
"epoch": 1.76,
"learning_rate": 7.794101119370668e-06,
"loss": 0.269,
"step": 1476
},
{
"epoch": 1.76,
"learning_rate": 7.769525682881295e-06,
"loss": 0.2532,
"step": 1478
},
{
"epoch": 1.76,
"learning_rate": 7.744964414510297e-06,
"loss": 0.2223,
"step": 1480
},
{
"epoch": 1.76,
"learning_rate": 7.720417470272455e-06,
"loss": 0.2525,
"step": 1482
},
{
"epoch": 1.76,
"learning_rate": 7.695885006091552e-06,
"loss": 0.2701,
"step": 1484
},
{
"epoch": 1.77,
"learning_rate": 7.67136717779941e-06,
"loss": 0.3059,
"step": 1486
},
{
"epoch": 1.77,
"learning_rate": 7.646864141134874e-06,
"loss": 0.2591,
"step": 1488
},
{
"epoch": 1.77,
"learning_rate": 7.622376051742824e-06,
"loss": 0.2618,
"step": 1490
},
{
"epoch": 1.77,
"learning_rate": 7.5979030651732065e-06,
"loss": 0.2814,
"step": 1492
},
{
"epoch": 1.78,
"learning_rate": 7.573445336880029e-06,
"loss": 0.2389,
"step": 1494
},
{
"epoch": 1.78,
"learning_rate": 7.549003022220374e-06,
"loss": 0.2347,
"step": 1496
},
{
"epoch": 1.78,
"learning_rate": 7.524576276453422e-06,
"loss": 0.2642,
"step": 1498
},
{
"epoch": 1.78,
"learning_rate": 7.500165254739453e-06,
"loss": 0.2591,
"step": 1500
},
{
"epoch": 1.79,
"learning_rate": 7.475770112138867e-06,
"loss": 0.2591,
"step": 1502
},
{
"epoch": 1.79,
"learning_rate": 7.4513910036112105e-06,
"loss": 0.2878,
"step": 1504
},
{
"epoch": 1.79,
"learning_rate": 7.427028084014163e-06,
"loss": 0.305,
"step": 1506
},
{
"epoch": 1.79,
"learning_rate": 7.402681508102585e-06,
"loss": 0.263,
"step": 1508
},
{
"epoch": 1.8,
"learning_rate": 7.378351430527511e-06,
"loss": 0.2868,
"step": 1510
},
{
"epoch": 1.8,
"learning_rate": 7.35403800583518e-06,
"loss": 0.2299,
"step": 1512
},
{
"epoch": 1.8,
"learning_rate": 7.329741388466056e-06,
"loss": 0.2942,
"step": 1514
},
{
"epoch": 1.8,
"learning_rate": 7.305461732753836e-06,
"loss": 0.2993,
"step": 1516
},
{
"epoch": 1.8,
"learning_rate": 7.281199192924473e-06,
"loss": 0.263,
"step": 1518
},
{
"epoch": 1.81,
"learning_rate": 7.256953923095209e-06,
"loss": 0.2591,
"step": 1520
},
{
"epoch": 1.81,
"learning_rate": 7.232726077273575e-06,
"loss": 0.2612,
"step": 1522
},
{
"epoch": 1.81,
"learning_rate": 7.208515809356434e-06,
"loss": 0.2354,
"step": 1524
},
{
"epoch": 1.81,
"learning_rate": 7.184323273128981e-06,
"loss": 0.2504,
"step": 1526
},
{
"epoch": 1.82,
"learning_rate": 7.160148622263786e-06,
"loss": 0.2437,
"step": 1528
},
{
"epoch": 1.82,
"learning_rate": 7.135992010319812e-06,
"loss": 0.2154,
"step": 1530
},
{
"epoch": 1.82,
"learning_rate": 7.123920516899151e-06,
"loss": 0.2963,
"step": 1532
},
{
"epoch": 1.82,
"learning_rate": 7.0997912510091335e-06,
"loss": 0.2307,
"step": 1534
},
{
"epoch": 1.83,
"learning_rate": 7.075680407434289e-06,
"loss": 0.2826,
"step": 1536
},
{
"epoch": 1.83,
"learning_rate": 7.051588139328276e-06,
"loss": 0.2344,
"step": 1538
},
{
"epoch": 1.83,
"learning_rate": 7.0275145997267544e-06,
"loss": 0.3482,
"step": 1540
},
{
"epoch": 1.83,
"learning_rate": 7.0034599415464135e-06,
"loss": 0.2818,
"step": 1542
},
{
"epoch": 1.84,
"learning_rate": 6.979424317584014e-06,
"loss": 0.3444,
"step": 1544
},
{
"epoch": 1.84,
"learning_rate": 6.955407880515404e-06,
"loss": 0.2712,
"step": 1546
},
{
"epoch": 1.84,
"learning_rate": 6.931410782894563e-06,
"loss": 0.2794,
"step": 1548
},
{
"epoch": 1.84,
"learning_rate": 6.907433177152618e-06,
"loss": 0.2701,
"step": 1550
},
{
"epoch": 1.85,
"learning_rate": 6.883475215596882e-06,
"loss": 0.2943,
"step": 1552
},
{
"epoch": 1.85,
"learning_rate": 6.859537050409895e-06,
"loss": 0.2488,
"step": 1554
},
{
"epoch": 1.85,
"learning_rate": 6.835618833648443e-06,
"loss": 0.2451,
"step": 1556
},
{
"epoch": 1.85,
"learning_rate": 6.8117207172425996e-06,
"loss": 0.3123,
"step": 1558
},
{
"epoch": 1.85,
"learning_rate": 6.787842852994757e-06,
"loss": 0.2522,
"step": 1560
},
{
"epoch": 1.86,
"learning_rate": 6.763985392578667e-06,
"loss": 0.2766,
"step": 1562
},
{
"epoch": 1.86,
"learning_rate": 6.740148487538476e-06,
"loss": 0.2473,
"step": 1564
},
{
"epoch": 1.86,
"learning_rate": 6.716332289287759e-06,
"loss": 0.214,
"step": 1566
},
{
"epoch": 1.86,
"learning_rate": 6.692536949108562e-06,
"loss": 0.2742,
"step": 1568
},
{
"epoch": 1.87,
"learning_rate": 6.6687626181504315e-06,
"loss": 0.2348,
"step": 1570
},
{
"epoch": 1.87,
"learning_rate": 6.64500944742948e-06,
"loss": 0.2363,
"step": 1572
},
{
"epoch": 1.87,
"learning_rate": 6.6212775878273925e-06,
"loss": 0.2686,
"step": 1574
},
{
"epoch": 1.87,
"learning_rate": 6.59756719009049e-06,
"loss": 0.2852,
"step": 1576
},
{
"epoch": 1.88,
"learning_rate": 6.5738784048287615e-06,
"loss": 0.2272,
"step": 1578
},
{
"epoch": 1.88,
"learning_rate": 6.550211382514922e-06,
"loss": 0.2975,
"step": 1580
},
{
"epoch": 1.88,
"learning_rate": 6.526566273483439e-06,
"loss": 0.2563,
"step": 1582
},
{
"epoch": 1.88,
"learning_rate": 6.502943227929586e-06,
"loss": 0.2971,
"step": 1584
},
{
"epoch": 1.89,
"learning_rate": 6.479342395908487e-06,
"loss": 0.2601,
"step": 1586
},
{
"epoch": 1.89,
"learning_rate": 6.455763927334177e-06,
"loss": 0.258,
"step": 1588
},
{
"epoch": 1.89,
"learning_rate": 6.432207971978619e-06,
"loss": 0.2953,
"step": 1590
},
{
"epoch": 1.89,
"learning_rate": 6.4086746794707795e-06,
"loss": 0.2961,
"step": 1592
},
{
"epoch": 1.9,
"learning_rate": 6.385164199295666e-06,
"loss": 0.262,
"step": 1594
},
{
"epoch": 1.9,
"learning_rate": 6.3616766807933875e-06,
"loss": 0.2151,
"step": 1596
},
{
"epoch": 1.9,
"learning_rate": 6.338212273158188e-06,
"loss": 0.2525,
"step": 1598
},
{
"epoch": 1.9,
"learning_rate": 6.314771125437517e-06,
"loss": 0.2393,
"step": 1600
},
{
"epoch": 1.9,
"learning_rate": 6.291353386531074e-06,
"loss": 0.2758,
"step": 1602
},
{
"epoch": 1.91,
"learning_rate": 6.2679592051898685e-06,
"loss": 0.2312,
"step": 1604
},
{
"epoch": 1.91,
"learning_rate": 6.244588730015264e-06,
"loss": 0.2428,
"step": 1606
},
{
"epoch": 1.91,
"learning_rate": 6.221242109458043e-06,
"loss": 0.2551,
"step": 1608
},
{
"epoch": 1.91,
"learning_rate": 6.197919491817459e-06,
"loss": 0.2715,
"step": 1610
},
{
"epoch": 1.92,
"learning_rate": 6.174621025240307e-06,
"loss": 0.2359,
"step": 1612
},
{
"epoch": 1.92,
"learning_rate": 6.151346857719964e-06,
"loss": 0.2671,
"step": 1614
},
{
"epoch": 1.92,
"learning_rate": 6.128097137095458e-06,
"loss": 0.1967,
"step": 1616
},
{
"epoch": 1.92,
"learning_rate": 6.104872011050534e-06,
"loss": 0.2563,
"step": 1618
},
{
"epoch": 1.93,
"learning_rate": 6.081671627112704e-06,
"loss": 0.2613,
"step": 1620
},
{
"epoch": 1.93,
"learning_rate": 6.0584961326523285e-06,
"loss": 0.2962,
"step": 1622
},
{
"epoch": 1.93,
"learning_rate": 6.0353456748816545e-06,
"loss": 0.2087,
"step": 1624
},
{
"epoch": 1.93,
"learning_rate": 6.012220400853899e-06,
"loss": 0.2902,
"step": 1626
},
{
"epoch": 1.94,
"learning_rate": 5.989120457462314e-06,
"loss": 0.2565,
"step": 1628
},
{
"epoch": 1.94,
"learning_rate": 5.9660459914392465e-06,
"loss": 0.243,
"step": 1630
},
{
"epoch": 1.94,
"learning_rate": 5.942997149355208e-06,
"loss": 0.2895,
"step": 1632
},
{
"epoch": 1.94,
"learning_rate": 5.9199740776179494e-06,
"loss": 0.3008,
"step": 1634
},
{
"epoch": 1.95,
"learning_rate": 5.89697692247152e-06,
"loss": 0.2124,
"step": 1636
},
{
"epoch": 1.95,
"learning_rate": 5.874005829995358e-06,
"loss": 0.2094,
"step": 1638
},
{
"epoch": 1.95,
"learning_rate": 5.851060946103334e-06,
"loss": 0.2355,
"step": 1640
},
{
"epoch": 1.95,
"learning_rate": 5.828142416542852e-06,
"loss": 0.304,
"step": 1642
},
{
"epoch": 1.95,
"learning_rate": 5.8052503868939005e-06,
"loss": 0.213,
"step": 1644
},
{
"epoch": 1.96,
"learning_rate": 5.782385002568153e-06,
"loss": 0.2471,
"step": 1646
},
{
"epoch": 1.96,
"learning_rate": 5.759546408808019e-06,
"loss": 0.2432,
"step": 1648
},
{
"epoch": 1.96,
"learning_rate": 5.736734750685737e-06,
"loss": 0.2565,
"step": 1650
},
{
"epoch": 1.96,
"learning_rate": 5.713950173102441e-06,
"loss": 0.2502,
"step": 1652
},
{
"epoch": 1.97,
"learning_rate": 5.691192820787266e-06,
"loss": 0.2105,
"step": 1654
},
{
"epoch": 1.97,
"learning_rate": 5.6684628382963905e-06,
"loss": 0.2437,
"step": 1656
},
{
"epoch": 1.97,
"learning_rate": 5.645760370012149e-06,
"loss": 0.2149,
"step": 1658
},
{
"epoch": 1.97,
"learning_rate": 5.623085560142099e-06,
"loss": 0.29,
"step": 1660
},
{
"epoch": 1.98,
"learning_rate": 5.60043855271811e-06,
"loss": 0.2718,
"step": 1662
},
{
"epoch": 1.98,
"learning_rate": 5.577819491595457e-06,
"loss": 0.2147,
"step": 1664
},
{
"epoch": 1.98,
"learning_rate": 5.555228520451891e-06,
"loss": 0.1925,
"step": 1666
},
{
"epoch": 1.98,
"learning_rate": 5.53266578278673e-06,
"loss": 0.2404,
"step": 1668
},
{
"epoch": 1.99,
"learning_rate": 5.510131421919955e-06,
"loss": 0.2405,
"step": 1670
},
{
"epoch": 1.99,
"learning_rate": 5.487625580991303e-06,
"loss": 0.1999,
"step": 1672
},
{
"epoch": 1.99,
"learning_rate": 5.465148402959339e-06,
"loss": 0.2185,
"step": 1674
},
{
"epoch": 1.99,
"learning_rate": 5.442700030600565e-06,
"loss": 0.3006,
"step": 1676
},
{
"epoch": 2.0,
"learning_rate": 5.420280606508503e-06,
"loss": 0.2406,
"step": 1678
},
{
"epoch": 2.0,
"learning_rate": 5.397890273092807e-06,
"loss": 0.2317,
"step": 1680
},
{
"epoch": 2.0,
"learning_rate": 5.375529172578329e-06,
"loss": 0.2616,
"step": 1682
},
{
"epoch": 2.0,
"learning_rate": 5.353197447004239e-06,
"loss": 0.1408,
"step": 1684
},
{
"epoch": 2.0,
"learning_rate": 5.33089523822311e-06,
"loss": 0.1778,
"step": 1686
},
{
"epoch": 2.01,
"learning_rate": 5.308622687900038e-06,
"loss": 0.1207,
"step": 1688
},
{
"epoch": 2.01,
"learning_rate": 5.286379937511707e-06,
"loss": 0.1327,
"step": 1690
},
{
"epoch": 2.01,
"learning_rate": 5.264167128345523e-06,
"loss": 0.125,
"step": 1692
},
{
"epoch": 2.01,
"learning_rate": 5.241984401498693e-06,
"loss": 0.17,
"step": 1694
},
{
"epoch": 2.02,
"learning_rate": 5.219831897877353e-06,
"loss": 0.159,
"step": 1696
},
{
"epoch": 2.02,
"learning_rate": 5.197709758195648e-06,
"loss": 0.1156,
"step": 1698
},
{
"epoch": 2.02,
"learning_rate": 5.175618122974851e-06,
"loss": 0.1356,
"step": 1700
},
{
"epoch": 2.02,
"learning_rate": 5.153557132542473e-06,
"loss": 0.1645,
"step": 1702
},
{
"epoch": 2.03,
"learning_rate": 5.131526927031356e-06,
"loss": 0.1626,
"step": 1704
},
{
"epoch": 2.03,
"learning_rate": 5.109527646378815e-06,
"loss": 0.1278,
"step": 1706
},
{
"epoch": 2.03,
"learning_rate": 5.087559430325708e-06,
"loss": 0.136,
"step": 1708
},
{
"epoch": 2.03,
"learning_rate": 5.0656224184155764e-06,
"loss": 0.1141,
"step": 1710
},
{
"epoch": 2.04,
"learning_rate": 5.043716749993757e-06,
"loss": 0.1444,
"step": 1712
},
{
"epoch": 2.04,
"learning_rate": 5.02184256420648e-06,
"loss": 0.1151,
"step": 1714
},
{
"epoch": 2.04,
"learning_rate": 5.000000000000003e-06,
"loss": 0.1474,
"step": 1716
},
{
"epoch": 2.04,
"learning_rate": 4.978189196119716e-06,
"loss": 0.121,
"step": 1718
},
{
"epoch": 2.05,
"learning_rate": 4.9564102911092646e-06,
"loss": 0.1284,
"step": 1720
},
{
"epoch": 2.05,
"learning_rate": 4.934663423309685e-06,
"loss": 0.153,
"step": 1722
},
{
"epoch": 2.05,
"learning_rate": 4.912948730858492e-06,
"loss": 0.145,
"step": 1724
},
{
"epoch": 2.05,
"learning_rate": 4.891266351688829e-06,
"loss": 0.1167,
"step": 1726
},
{
"epoch": 2.05,
"learning_rate": 4.869616423528588e-06,
"loss": 0.1338,
"step": 1728
},
{
"epoch": 2.06,
"learning_rate": 4.847999083899522e-06,
"loss": 0.1208,
"step": 1730
},
{
"epoch": 2.06,
"learning_rate": 4.826414470116382e-06,
"loss": 0.1403,
"step": 1732
},
{
"epoch": 2.06,
"learning_rate": 4.804862719286044e-06,
"loss": 0.1463,
"step": 1734
},
{
"epoch": 2.06,
"learning_rate": 4.783343968306631e-06,
"loss": 0.1276,
"step": 1736
},
{
"epoch": 2.07,
"learning_rate": 4.7618583538666605e-06,
"loss": 0.1242,
"step": 1738
},
{
"epoch": 2.07,
"learning_rate": 4.740406012444153e-06,
"loss": 0.1402,
"step": 1740
},
{
"epoch": 2.07,
"learning_rate": 4.718987080305778e-06,
"loss": 0.3846,
"step": 1742
},
{
"epoch": 2.07,
"learning_rate": 4.697601693505996e-06,
"loss": 0.161,
"step": 1744
},
{
"epoch": 2.08,
"learning_rate": 4.6762499878861764e-06,
"loss": 0.1102,
"step": 1746
},
{
"epoch": 2.08,
"learning_rate": 4.654932099073746e-06,
"loss": 0.1343,
"step": 1748
},
{
"epoch": 2.08,
"learning_rate": 4.633648162481326e-06,
"loss": 0.146,
"step": 1750
},
{
"epoch": 2.08,
"learning_rate": 4.612398313305867e-06,
"loss": 0.1533,
"step": 1752
},
{
"epoch": 2.09,
"learning_rate": 4.5911826865277975e-06,
"loss": 0.1346,
"step": 1754
},
{
"epoch": 2.09,
"learning_rate": 4.570001416910168e-06,
"loss": 0.1579,
"step": 1756
},
{
"epoch": 2.09,
"learning_rate": 4.548854638997778e-06,
"loss": 0.1382,
"step": 1758
},
{
"epoch": 2.09,
"learning_rate": 4.527742487116349e-06,
"loss": 0.133,
"step": 1760
},
{
"epoch": 2.1,
"learning_rate": 4.506665095371642e-06,
"loss": 0.121,
"step": 1762
},
{
"epoch": 2.1,
"learning_rate": 4.485622597648624e-06,
"loss": 0.1225,
"step": 1764
},
{
"epoch": 2.1,
"learning_rate": 4.464615127610615e-06,
"loss": 0.1139,
"step": 1766
},
{
"epoch": 2.1,
"learning_rate": 4.443642818698434e-06,
"loss": 0.1329,
"step": 1768
},
{
"epoch": 2.1,
"learning_rate": 4.4227058041295515e-06,
"loss": 0.1131,
"step": 1770
},
{
"epoch": 2.11,
"learning_rate": 4.401804216897258e-06,
"loss": 0.1301,
"step": 1772
},
{
"epoch": 2.11,
"learning_rate": 4.380938189769791e-06,
"loss": 0.1443,
"step": 1774
},
{
"epoch": 2.11,
"learning_rate": 4.3601078552895245e-06,
"loss": 0.1306,
"step": 1776
},
{
"epoch": 2.11,
"learning_rate": 4.339313345772098e-06,
"loss": 0.1501,
"step": 1778
},
{
"epoch": 2.12,
"learning_rate": 4.318554793305592e-06,
"loss": 0.1076,
"step": 1780
},
{
"epoch": 2.12,
"learning_rate": 4.297832329749687e-06,
"loss": 0.1671,
"step": 1782
},
{
"epoch": 2.12,
"learning_rate": 4.277146086734823e-06,
"loss": 0.1364,
"step": 1784
},
{
"epoch": 2.12,
"learning_rate": 4.2564961956613605e-06,
"loss": 0.1347,
"step": 1786
},
{
"epoch": 2.13,
"learning_rate": 4.235882787698763e-06,
"loss": 0.1667,
"step": 1788
},
{
"epoch": 2.13,
"learning_rate": 4.2153059937847355e-06,
"loss": 0.1366,
"step": 1790
},
{
"epoch": 2.13,
"learning_rate": 4.194765944624423e-06,
"loss": 0.1142,
"step": 1792
},
{
"epoch": 2.13,
"learning_rate": 4.174262770689552e-06,
"loss": 0.1188,
"step": 1794
},
{
"epoch": 2.14,
"learning_rate": 4.153796602217623e-06,
"loss": 0.1068,
"step": 1796
},
{
"epoch": 2.14,
"learning_rate": 4.133367569211074e-06,
"loss": 0.1359,
"step": 1798
},
{
"epoch": 2.14,
"learning_rate": 4.112975801436454e-06,
"loss": 0.1584,
"step": 1800
},
{
"epoch": 2.14,
"learning_rate": 4.092621428423601e-06,
"loss": 0.1308,
"step": 1802
},
{
"epoch": 2.15,
"learning_rate": 4.07230457946482e-06,
"loss": 0.1324,
"step": 1804
},
{
"epoch": 2.15,
"learning_rate": 4.052025383614061e-06,
"loss": 0.1339,
"step": 1806
},
{
"epoch": 2.15,
"learning_rate": 4.031783969686105e-06,
"loss": 0.1178,
"step": 1808
},
{
"epoch": 2.15,
"learning_rate": 4.011580466255729e-06,
"loss": 0.127,
"step": 1810
},
{
"epoch": 2.15,
"learning_rate": 3.991415001656906e-06,
"loss": 0.1334,
"step": 1812
},
{
"epoch": 2.16,
"learning_rate": 3.971287703981982e-06,
"loss": 0.1201,
"step": 1814
},
{
"epoch": 2.16,
"learning_rate": 3.9511987010808635e-06,
"loss": 0.1435,
"step": 1816
},
{
"epoch": 2.16,
"learning_rate": 3.931148120560211e-06,
"loss": 0.1394,
"step": 1818
},
{
"epoch": 2.16,
"learning_rate": 3.911136089782613e-06,
"loss": 0.1229,
"step": 1820
},
{
"epoch": 2.17,
"learning_rate": 3.8911627358658e-06,
"loss": 0.123,
"step": 1822
},
{
"epoch": 2.17,
"learning_rate": 3.871228185681822e-06,
"loss": 0.1433,
"step": 1824
},
{
"epoch": 2.17,
"learning_rate": 3.8513325658562395e-06,
"loss": 0.1238,
"step": 1826
},
{
"epoch": 2.17,
"learning_rate": 3.831476002767327e-06,
"loss": 0.1456,
"step": 1828
},
{
"epoch": 2.18,
"learning_rate": 3.811658622545268e-06,
"loss": 0.1478,
"step": 1830
},
{
"epoch": 2.18,
"learning_rate": 3.7918805510713553e-06,
"loss": 0.1269,
"step": 1832
},
{
"epoch": 2.18,
"learning_rate": 3.7721419139771886e-06,
"loss": 0.1379,
"step": 1834
},
{
"epoch": 2.18,
"learning_rate": 3.7524428366438757e-06,
"loss": 0.1365,
"step": 1836
},
{
"epoch": 2.19,
"learning_rate": 3.7327834442012433e-06,
"loss": 0.1263,
"step": 1838
},
{
"epoch": 2.19,
"learning_rate": 3.7131638615270404e-06,
"loss": 0.1055,
"step": 1840
},
{
"epoch": 2.19,
"learning_rate": 3.6935842132461307e-06,
"loss": 0.1053,
"step": 1842
},
{
"epoch": 2.19,
"learning_rate": 3.6740446237297177e-06,
"loss": 0.1259,
"step": 1844
},
{
"epoch": 2.2,
"learning_rate": 3.6545452170945496e-06,
"loss": 0.1428,
"step": 1846
},
{
"epoch": 2.2,
"learning_rate": 3.635086117202128e-06,
"loss": 0.1375,
"step": 1848
},
{
"epoch": 2.2,
"learning_rate": 3.6156674476579266e-06,
"loss": 0.1469,
"step": 1850
},
{
"epoch": 2.2,
"learning_rate": 3.5962893318105963e-06,
"loss": 0.123,
"step": 1852
},
{
"epoch": 2.2,
"learning_rate": 3.576951892751197e-06,
"loss": 0.128,
"step": 1854
},
{
"epoch": 2.21,
"learning_rate": 3.5576552533124074e-06,
"loss": 0.1403,
"step": 1856
},
{
"epoch": 2.21,
"learning_rate": 3.538399536067736e-06,
"loss": 0.1392,
"step": 1858
},
{
"epoch": 2.21,
"learning_rate": 3.5191848633307545e-06,
"loss": 0.1259,
"step": 1860
},
{
"epoch": 2.21,
"learning_rate": 3.5000113571543183e-06,
"loss": 0.1252,
"step": 1862
},
{
"epoch": 2.22,
"learning_rate": 3.480879139329789e-06,
"loss": 0.1641,
"step": 1864
},
{
"epoch": 2.22,
"learning_rate": 3.4617883313862633e-06,
"loss": 0.1396,
"step": 1866
},
{
"epoch": 2.22,
"learning_rate": 3.4427390545897955e-06,
"loss": 0.1284,
"step": 1868
},
{
"epoch": 2.22,
"learning_rate": 3.423731429942636e-06,
"loss": 0.1255,
"step": 1870
},
{
"epoch": 2.23,
"learning_rate": 3.4047655781824605e-06,
"loss": 0.1381,
"step": 1872
},
{
"epoch": 2.23,
"learning_rate": 3.3858416197815947e-06,
"loss": 0.1587,
"step": 1874
},
{
"epoch": 2.23,
"learning_rate": 3.3669596749462562e-06,
"loss": 0.1148,
"step": 1876
},
{
"epoch": 2.23,
"learning_rate": 3.3481198636157908e-06,
"loss": 0.1187,
"step": 1878
},
{
"epoch": 2.24,
"learning_rate": 3.3293223054619073e-06,
"loss": 0.1328,
"step": 1880
},
{
"epoch": 2.24,
"learning_rate": 3.3105671198879243e-06,
"loss": 0.1166,
"step": 1882
},
{
"epoch": 2.24,
"learning_rate": 3.2918544260279985e-06,
"loss": 0.133,
"step": 1884
},
{
"epoch": 2.24,
"learning_rate": 3.2731843427463894e-06,
"loss": 0.127,
"step": 1886
},
{
"epoch": 2.24,
"learning_rate": 3.254556988636678e-06,
"loss": 0.1678,
"step": 1888
},
{
"epoch": 2.25,
"learning_rate": 3.2359724820210394e-06,
"loss": 0.1156,
"step": 1890
},
{
"epoch": 2.25,
"learning_rate": 3.2174309409494675e-06,
"loss": 0.1384,
"step": 1892
},
{
"epoch": 2.25,
"learning_rate": 3.198932483199041e-06,
"loss": 0.1324,
"step": 1894
},
{
"epoch": 2.25,
"learning_rate": 3.180477226273172e-06,
"loss": 0.1498,
"step": 1896
},
{
"epoch": 2.26,
"learning_rate": 3.162065287400855e-06,
"loss": 0.1482,
"step": 1898
},
{
"epoch": 2.26,
"learning_rate": 3.1436967835359245e-06,
"loss": 0.1001,
"step": 1900
},
{
"epoch": 2.26,
"learning_rate": 3.1253718313563207e-06,
"loss": 0.1328,
"step": 1902
},
{
"epoch": 2.26,
"learning_rate": 3.1070905472633307e-06,
"loss": 0.1343,
"step": 1904
},
{
"epoch": 2.27,
"learning_rate": 3.0888530473808677e-06,
"loss": 0.1721,
"step": 1906
},
{
"epoch": 2.27,
"learning_rate": 3.070659447554719e-06,
"loss": 0.1211,
"step": 1908
},
{
"epoch": 2.27,
"learning_rate": 3.052509863351818e-06,
"loss": 0.1267,
"step": 1910
},
{
"epoch": 2.27,
"learning_rate": 3.0344044100595073e-06,
"loss": 0.1257,
"step": 1912
},
{
"epoch": 2.28,
"learning_rate": 3.016343202684807e-06,
"loss": 0.1769,
"step": 1914
},
{
"epoch": 2.28,
"learning_rate": 2.9983263559536813e-06,
"loss": 0.1398,
"step": 1916
},
{
"epoch": 2.28,
"learning_rate": 2.9803539843103226e-06,
"loss": 0.0818,
"step": 1918
},
{
"epoch": 2.28,
"learning_rate": 2.962426201916402e-06,
"loss": 0.1552,
"step": 1920
},
{
"epoch": 2.29,
"learning_rate": 2.9445431226503683e-06,
"loss": 0.1296,
"step": 1922
},
{
"epoch": 2.29,
"learning_rate": 2.926704860106706e-06,
"loss": 0.1082,
"step": 1924
},
{
"epoch": 2.29,
"learning_rate": 2.9089115275952217e-06,
"loss": 0.143,
"step": 1926
},
{
"epoch": 2.29,
"learning_rate": 2.891163238140323e-06,
"loss": 0.1019,
"step": 1928
},
{
"epoch": 2.29,
"learning_rate": 2.8734601044803056e-06,
"loss": 0.1256,
"step": 1930
},
{
"epoch": 2.3,
"learning_rate": 2.855802239066623e-06,
"loss": 0.1159,
"step": 1932
},
{
"epoch": 2.3,
"learning_rate": 2.8381897540631964e-06,
"loss": 0.1187,
"step": 1934
},
{
"epoch": 2.3,
"learning_rate": 2.820622761345676e-06,
"loss": 0.1375,
"step": 1936
},
{
"epoch": 2.3,
"learning_rate": 2.8031013725007415e-06,
"loss": 0.1305,
"step": 1938
},
{
"epoch": 2.31,
"learning_rate": 2.785625698825406e-06,
"loss": 0.1443,
"step": 1940
},
{
"epoch": 2.31,
"learning_rate": 2.768195851326285e-06,
"loss": 0.1351,
"step": 1942
},
{
"epoch": 2.31,
"learning_rate": 2.750811940718906e-06,
"loss": 0.1378,
"step": 1944
},
{
"epoch": 2.31,
"learning_rate": 2.733474077427004e-06,
"loss": 0.0981,
"step": 1946
},
{
"epoch": 2.32,
"learning_rate": 2.716182371581814e-06,
"loss": 0.146,
"step": 1948
},
{
"epoch": 2.32,
"learning_rate": 2.6989369330213865e-06,
"loss": 0.1286,
"step": 1950
},
{
"epoch": 2.32,
"learning_rate": 2.681737871289869e-06,
"loss": 0.1551,
"step": 1952
},
{
"epoch": 2.32,
"learning_rate": 2.6645852956368214e-06,
"loss": 0.1166,
"step": 1954
},
{
"epoch": 2.33,
"learning_rate": 2.647479315016528e-06,
"loss": 0.1181,
"step": 1956
},
{
"epoch": 2.33,
"learning_rate": 2.6304200380872913e-06,
"loss": 0.1341,
"step": 1958
},
{
"epoch": 2.33,
"learning_rate": 2.61340757321075e-06,
"loss": 0.1196,
"step": 1960
},
{
"epoch": 2.33,
"learning_rate": 2.596442028451194e-06,
"loss": 0.1364,
"step": 1962
},
{
"epoch": 2.34,
"learning_rate": 2.579523511574864e-06,
"loss": 0.1209,
"step": 1964
},
{
"epoch": 2.34,
"learning_rate": 2.56265213004929e-06,
"loss": 0.1174,
"step": 1966
},
{
"epoch": 2.34,
"learning_rate": 2.5458279910425865e-06,
"loss": 0.1383,
"step": 1968
},
{
"epoch": 2.34,
"learning_rate": 2.5290512014227774e-06,
"loss": 0.1044,
"step": 1970
},
{
"epoch": 2.34,
"learning_rate": 2.5123218677571313e-06,
"loss": 0.1163,
"step": 1972
},
{
"epoch": 2.35,
"learning_rate": 2.4956400963114647e-06,
"loss": 0.137,
"step": 1974
},
{
"epoch": 2.35,
"learning_rate": 2.479005993049478e-06,
"loss": 0.1591,
"step": 1976
},
{
"epoch": 2.35,
"learning_rate": 2.4624196636320795e-06,
"loss": 0.137,
"step": 1978
},
{
"epoch": 2.35,
"learning_rate": 2.445881213416713e-06,
"loss": 0.1583,
"step": 1980
},
{
"epoch": 2.36,
"learning_rate": 2.429390747456699e-06,
"loss": 0.1252,
"step": 1982
},
{
"epoch": 2.36,
"learning_rate": 2.412948370500551e-06,
"loss": 0.1552,
"step": 1984
},
{
"epoch": 2.36,
"learning_rate": 2.3965541869913188e-06,
"loss": 0.1481,
"step": 1986
},
{
"epoch": 2.36,
"learning_rate": 2.3802083010659238e-06,
"loss": 0.1243,
"step": 1988
},
{
"epoch": 2.37,
"learning_rate": 2.3639108165545057e-06,
"loss": 0.1273,
"step": 1990
},
{
"epoch": 2.37,
"learning_rate": 2.3476618369797457e-06,
"loss": 0.1403,
"step": 1992
},
{
"epoch": 2.37,
"learning_rate": 2.331461465556222e-06,
"loss": 0.1391,
"step": 1994
},
{
"epoch": 2.37,
"learning_rate": 2.315309805189748e-06,
"loss": 0.1376,
"step": 1996
},
{
"epoch": 2.38,
"learning_rate": 2.299206958476731e-06,
"loss": 0.1253,
"step": 1998
},
{
"epoch": 2.38,
"learning_rate": 2.2831530277034985e-06,
"loss": 0.131,
"step": 2000
},
{
"epoch": 2.38,
"learning_rate": 2.2671481148456685e-06,
"loss": 0.1377,
"step": 2002
},
{
"epoch": 2.38,
"learning_rate": 2.251192321567488e-06,
"loss": 0.1077,
"step": 2004
},
{
"epoch": 2.39,
"learning_rate": 2.235285749221201e-06,
"loss": 0.1253,
"step": 2006
},
{
"epoch": 2.39,
"learning_rate": 2.219428498846393e-06,
"loss": 0.1271,
"step": 2008
},
{
"epoch": 2.39,
"learning_rate": 2.2036206711693508e-06,
"loss": 0.1449,
"step": 2010
},
{
"epoch": 2.39,
"learning_rate": 2.1878623666024233e-06,
"loss": 0.1024,
"step": 2012
},
{
"epoch": 2.39,
"learning_rate": 2.1721536852433976e-06,
"loss": 0.1141,
"step": 2014
},
{
"epoch": 2.4,
"learning_rate": 2.1564947268748382e-06,
"loss": 0.1023,
"step": 2016
},
{
"epoch": 2.4,
"learning_rate": 2.1408855909634696e-06,
"loss": 0.1113,
"step": 2018
},
{
"epoch": 2.4,
"learning_rate": 2.125326376659539e-06,
"loss": 0.1467,
"step": 2020
},
{
"epoch": 2.4,
"learning_rate": 2.1098171827961965e-06,
"loss": 0.1194,
"step": 2022
},
{
"epoch": 2.41,
"learning_rate": 2.094358107888852e-06,
"loss": 0.1676,
"step": 2024
},
{
"epoch": 2.41,
"learning_rate": 2.0789492501345553e-06,
"loss": 0.1367,
"step": 2026
},
{
"epoch": 2.41,
"learning_rate": 2.0635907074113737e-06,
"loss": 0.1231,
"step": 2028
},
{
"epoch": 2.41,
"learning_rate": 2.0482825772777804e-06,
"loss": 0.1626,
"step": 2030
},
{
"epoch": 2.42,
"learning_rate": 2.0330249569720116e-06,
"loss": 0.1117,
"step": 2032
},
{
"epoch": 2.42,
"learning_rate": 2.0178179434114674e-06,
"loss": 0.1303,
"step": 2034
},
{
"epoch": 2.42,
"learning_rate": 2.00266163319209e-06,
"loss": 0.1336,
"step": 2036
},
{
"epoch": 2.42,
"learning_rate": 1.9875561225877482e-06,
"loss": 0.1143,
"step": 2038
},
{
"epoch": 2.43,
"learning_rate": 1.972501507549637e-06,
"loss": 0.1449,
"step": 2040
},
{
"epoch": 2.43,
"learning_rate": 1.957497883705649e-06,
"loss": 0.1331,
"step": 2042
},
{
"epoch": 2.43,
"learning_rate": 1.9425453463597798e-06,
"loss": 0.0957,
"step": 2044
},
{
"epoch": 2.43,
"learning_rate": 1.927643990491528e-06,
"loss": 0.1182,
"step": 2046
},
{
"epoch": 2.44,
"learning_rate": 1.912793910755275e-06,
"loss": 0.1394,
"step": 2048
},
{
"epoch": 2.44,
"learning_rate": 1.8979952014796954e-06,
"loss": 0.1155,
"step": 2050
},
{
"epoch": 2.44,
"learning_rate": 1.883247956667157e-06,
"loss": 0.1681,
"step": 2052
},
{
"epoch": 2.44,
"learning_rate": 1.8685522699931169e-06,
"loss": 0.1517,
"step": 2054
},
{
"epoch": 2.44,
"learning_rate": 1.8539082348055427e-06,
"loss": 0.1491,
"step": 2056
},
{
"epoch": 2.45,
"learning_rate": 1.839315944124298e-06,
"loss": 0.1276,
"step": 2058
},
{
"epoch": 2.45,
"learning_rate": 1.8247754906405624e-06,
"loss": 0.1343,
"step": 2060
},
{
"epoch": 2.45,
"learning_rate": 1.8102869667162494e-06,
"loss": 0.1477,
"step": 2062
},
{
"epoch": 2.45,
"learning_rate": 1.7958504643834062e-06,
"loss": 0.1584,
"step": 2064
},
{
"epoch": 2.46,
"learning_rate": 1.7814660753436386e-06,
"loss": 0.1316,
"step": 2066
},
{
"epoch": 2.46,
"learning_rate": 1.7671338909675218e-06,
"loss": 0.1373,
"step": 2068
},
{
"epoch": 2.46,
"learning_rate": 1.7528540022940288e-06,
"loss": 0.131,
"step": 2070
},
{
"epoch": 2.46,
"learning_rate": 1.7386265000299385e-06,
"loss": 0.1206,
"step": 2072
},
{
"epoch": 2.47,
"learning_rate": 1.7244514745492813e-06,
"loss": 0.117,
"step": 2074
},
{
"epoch": 2.47,
"learning_rate": 1.71032901589274e-06,
"loss": 0.1368,
"step": 2076
},
{
"epoch": 2.47,
"learning_rate": 1.6962592137670897e-06,
"loss": 0.1176,
"step": 2078
},
{
"epoch": 2.47,
"learning_rate": 1.6822421575446378e-06,
"loss": 0.1501,
"step": 2080
},
{
"epoch": 2.48,
"learning_rate": 1.6682779362626378e-06,
"loss": 0.1326,
"step": 2082
},
{
"epoch": 2.48,
"learning_rate": 1.6543666386227343e-06,
"loss": 0.1357,
"step": 2084
},
{
"epoch": 2.48,
"learning_rate": 1.6405083529903954e-06,
"loss": 0.1039,
"step": 2086
},
{
"epoch": 2.48,
"learning_rate": 1.6267031673943546e-06,
"loss": 0.1407,
"step": 2088
},
{
"epoch": 2.49,
"learning_rate": 1.6129511695260558e-06,
"loss": 0.1312,
"step": 2090
},
{
"epoch": 2.49,
"learning_rate": 1.5992524467390858e-06,
"loss": 0.1198,
"step": 2092
},
{
"epoch": 2.49,
"learning_rate": 1.5856070860486205e-06,
"loss": 0.1091,
"step": 2094
},
{
"epoch": 2.49,
"learning_rate": 1.5720151741308875e-06,
"loss": 0.119,
"step": 2096
},
{
"epoch": 2.49,
"learning_rate": 1.5584767973225967e-06,
"loss": 0.1316,
"step": 2098
},
{
"epoch": 2.5,
"learning_rate": 1.544992041620398e-06,
"loss": 0.1108,
"step": 2100
},
{
"epoch": 2.5,
"learning_rate": 1.531560992680341e-06,
"loss": 0.1267,
"step": 2102
},
{
"epoch": 2.5,
"learning_rate": 1.5181837358173223e-06,
"loss": 0.1292,
"step": 2104
},
{
"epoch": 2.5,
"learning_rate": 1.5048603560045549e-06,
"loss": 0.124,
"step": 2106
},
{
"epoch": 2.51,
"learning_rate": 1.4915909378730143e-06,
"loss": 0.1466,
"step": 2108
},
{
"epoch": 2.51,
"learning_rate": 1.4783755657109079e-06,
"loss": 0.103,
"step": 2110
},
{
"epoch": 2.51,
"learning_rate": 1.4652143234631465e-06,
"loss": 0.1478,
"step": 2112
},
{
"epoch": 2.51,
"learning_rate": 1.4521072947307957e-06,
"loss": 0.1196,
"step": 2114
},
{
"epoch": 2.52,
"learning_rate": 1.4390545627705588e-06,
"loss": 0.1203,
"step": 2116
},
{
"epoch": 2.52,
"learning_rate": 1.426056210494241e-06,
"loss": 0.125,
"step": 2118
},
{
"epoch": 2.52,
"learning_rate": 1.413112320468223e-06,
"loss": 0.1612,
"step": 2120
},
{
"epoch": 2.52,
"learning_rate": 1.400222974912936e-06,
"loss": 0.1226,
"step": 2122
},
{
"epoch": 2.53,
"learning_rate": 1.3873882557023488e-06,
"loss": 0.1304,
"step": 2124
},
{
"epoch": 2.53,
"learning_rate": 1.3746082443634311e-06,
"loss": 0.1172,
"step": 2126
},
{
"epoch": 2.53,
"learning_rate": 1.361883022075653e-06,
"loss": 0.1441,
"step": 2128
},
{
"epoch": 2.53,
"learning_rate": 1.3492126696704544e-06,
"loss": 0.1232,
"step": 2130
},
{
"epoch": 2.54,
"learning_rate": 1.3365972676307403e-06,
"loss": 0.1127,
"step": 2132
},
{
"epoch": 2.54,
"learning_rate": 1.3240368960903671e-06,
"loss": 0.1298,
"step": 2134
},
{
"epoch": 2.54,
"learning_rate": 1.3115316348336348e-06,
"loss": 0.1358,
"step": 2136
},
{
"epoch": 2.54,
"learning_rate": 1.2990815632947763e-06,
"loss": 0.1689,
"step": 2138
},
{
"epoch": 2.54,
"learning_rate": 1.2866867605574628e-06,
"loss": 0.1101,
"step": 2140
},
{
"epoch": 2.55,
"learning_rate": 1.2743473053542842e-06,
"loss": 0.1308,
"step": 2142
},
{
"epoch": 2.55,
"learning_rate": 1.262063276066272e-06,
"loss": 0.1472,
"step": 2144
},
{
"epoch": 2.55,
"learning_rate": 1.2498347507223763e-06,
"loss": 0.1298,
"step": 2146
},
{
"epoch": 2.55,
"learning_rate": 1.237661806998991e-06,
"loss": 0.1323,
"step": 2148
},
{
"epoch": 2.56,
"learning_rate": 1.2255445222194462e-06,
"loss": 0.0947,
"step": 2150
},
{
"epoch": 2.56,
"learning_rate": 1.2134829733535269e-06,
"loss": 0.1199,
"step": 2152
},
{
"epoch": 2.56,
"learning_rate": 1.2014772370169747e-06,
"loss": 0.1284,
"step": 2154
},
{
"epoch": 2.56,
"learning_rate": 1.1895273894710157e-06,
"loss": 0.1323,
"step": 2156
},
{
"epoch": 2.57,
"learning_rate": 1.177633506621857e-06,
"loss": 0.1188,
"step": 2158
},
{
"epoch": 2.57,
"learning_rate": 1.1657956640202217e-06,
"loss": 0.1448,
"step": 2160
},
{
"epoch": 2.57,
"learning_rate": 1.1540139368608572e-06,
"loss": 0.1819,
"step": 2162
},
{
"epoch": 2.57,
"learning_rate": 1.142288399982061e-06,
"loss": 0.2198,
"step": 2164
},
{
"epoch": 2.58,
"learning_rate": 1.1306191278652112e-06,
"loss": 0.1341,
"step": 2166
},
{
"epoch": 2.58,
"learning_rate": 1.1190061946342835e-06,
"loss": 0.2365,
"step": 2168
},
{
"epoch": 2.58,
"learning_rate": 1.1074496740553853e-06,
"loss": 0.1264,
"step": 2170
},
{
"epoch": 2.58,
"learning_rate": 1.0959496395362946e-06,
"loss": 0.1328,
"step": 2172
},
{
"epoch": 2.59,
"learning_rate": 1.0845061641259757e-06,
"loss": 0.1076,
"step": 2174
},
{
"epoch": 2.59,
"learning_rate": 1.0731193205141354e-06,
"loss": 0.1372,
"step": 2176
},
{
"epoch": 2.59,
"learning_rate": 1.0617891810307458e-06,
"loss": 0.1536,
"step": 2178
},
{
"epoch": 2.59,
"learning_rate": 1.050515817645591e-06,
"loss": 0.1243,
"step": 2180
},
{
"epoch": 2.59,
"learning_rate": 1.039299301967811e-06,
"loss": 0.2138,
"step": 2182
},
{
"epoch": 2.6,
"learning_rate": 1.0281397052454457e-06,
"loss": 0.1357,
"step": 2184
},
{
"epoch": 2.6,
"learning_rate": 1.0170370983649792e-06,
"loss": 0.1623,
"step": 2186
},
{
"epoch": 2.6,
"learning_rate": 1.005991551850899e-06,
"loss": 0.1314,
"step": 2188
},
{
"epoch": 2.6,
"learning_rate": 9.950031358652313e-07,
"loss": 0.1163,
"step": 2190
},
{
"epoch": 2.61,
"learning_rate": 9.84071920207118e-07,
"loss": 0.1045,
"step": 2192
},
{
"epoch": 2.61,
"learning_rate": 9.73197974312351e-07,
"loss": 0.1449,
"step": 2194
},
{
"epoch": 2.61,
"learning_rate": 9.623813672529437e-07,
"loss": 0.1287,
"step": 2196
},
{
"epoch": 2.61,
"learning_rate": 9.516221677366888e-07,
"loss": 0.1193,
"step": 2198
},
{
"epoch": 2.62,
"learning_rate": 9.409204441067254e-07,
"loss": 0.1306,
"step": 2200
},
{
"epoch": 2.62,
"learning_rate": 9.302762643411e-07,
"loss": 0.1151,
"step": 2202
},
{
"epoch": 2.62,
"learning_rate": 9.196896960523349e-07,
"loss": 0.1287,
"step": 2204
},
{
"epoch": 2.62,
"learning_rate": 9.091608064870028e-07,
"loss": 0.099,
"step": 2206
},
{
"epoch": 2.63,
"learning_rate": 8.986896625253006e-07,
"loss": 0.1151,
"step": 2208
},
{
"epoch": 2.63,
"learning_rate": 8.882763306806163e-07,
"loss": 0.1466,
"step": 2210
},
{
"epoch": 2.63,
"learning_rate": 8.779208770991121e-07,
"loss": 0.1133,
"step": 2212
},
{
"epoch": 2.63,
"learning_rate": 8.676233675593038e-07,
"loss": 0.157,
"step": 2214
},
{
"epoch": 2.63,
"learning_rate": 8.573838674716461e-07,
"loss": 0.1184,
"step": 2216
},
{
"epoch": 2.64,
"learning_rate": 8.472024418781099e-07,
"loss": 0.125,
"step": 2218
},
{
"epoch": 2.64,
"learning_rate": 8.370791554517743e-07,
"loss": 0.146,
"step": 2220
},
{
"epoch": 2.64,
"learning_rate": 8.270140724964159e-07,
"loss": 0.0981,
"step": 2222
},
{
"epoch": 2.64,
"learning_rate": 8.170072569460996e-07,
"loss": 0.1288,
"step": 2224
},
{
"epoch": 2.65,
"learning_rate": 8.070587723647705e-07,
"loss": 0.1714,
"step": 2226
},
{
"epoch": 2.65,
"learning_rate": 7.971686819458502e-07,
"loss": 0.1147,
"step": 2228
},
{
"epoch": 2.65,
"learning_rate": 7.873370485118381e-07,
"loss": 0.1307,
"step": 2230
},
{
"epoch": 2.65,
"learning_rate": 7.77563934513913e-07,
"loss": 0.1278,
"step": 2232
},
{
"epoch": 2.66,
"learning_rate": 7.678494020315308e-07,
"loss": 0.118,
"step": 2234
},
{
"epoch": 2.66,
"learning_rate": 7.581935127720352e-07,
"loss": 0.1289,
"step": 2236
},
{
"epoch": 2.66,
"learning_rate": 7.485963280702646e-07,
"loss": 0.1139,
"step": 2238
},
{
"epoch": 2.66,
"learning_rate": 7.390579088881655e-07,
"loss": 0.1164,
"step": 2240
},
{
"epoch": 2.67,
"learning_rate": 7.295783158143976e-07,
"loss": 0.0974,
"step": 2242
},
{
"epoch": 2.67,
"learning_rate": 7.201576090639529e-07,
"loss": 0.1444,
"step": 2244
},
{
"epoch": 2.67,
"learning_rate": 7.107958484777755e-07,
"loss": 0.1599,
"step": 2246
},
{
"epoch": 2.67,
"learning_rate": 7.014930935223807e-07,
"loss": 0.1482,
"step": 2248
},
{
"epoch": 2.68,
"learning_rate": 6.922494032894744e-07,
"loss": 0.1095,
"step": 2250
},
{
"epoch": 2.68,
"learning_rate": 6.830648364955772e-07,
"loss": 0.1398,
"step": 2252
},
{
"epoch": 2.68,
"learning_rate": 6.739394514816622e-07,
"loss": 0.1333,
"step": 2254
},
{
"epoch": 2.68,
"learning_rate": 6.648733062127643e-07,
"loss": 0.1209,
"step": 2256
},
{
"epoch": 2.68,
"learning_rate": 6.558664582776341e-07,
"loss": 0.1019,
"step": 2258
},
{
"epoch": 2.69,
"learning_rate": 6.469189648883567e-07,
"loss": 0.1099,
"step": 2260
},
{
"epoch": 2.69,
"learning_rate": 6.380308828799919e-07,
"loss": 0.1176,
"step": 2262
},
{
"epoch": 2.69,
"learning_rate": 6.292022687102184e-07,
"loss": 0.1138,
"step": 2264
},
{
"epoch": 2.69,
"learning_rate": 6.204331784589679e-07,
"loss": 0.1311,
"step": 2266
},
{
"epoch": 2.7,
"learning_rate": 6.117236678280736e-07,
"loss": 0.1296,
"step": 2268
},
{
"epoch": 2.7,
"learning_rate": 6.030737921409169e-07,
"loss": 0.1377,
"step": 2270
},
{
"epoch": 2.7,
"learning_rate": 5.9448360634207e-07,
"loss": 0.1579,
"step": 2272
},
{
"epoch": 2.7,
"learning_rate": 5.859531649969563e-07,
"loss": 0.1133,
"step": 2274
},
{
"epoch": 2.71,
"learning_rate": 5.774825222914948e-07,
"loss": 0.1324,
"step": 2276
},
{
"epoch": 2.71,
"learning_rate": 5.690717320317595e-07,
"loss": 0.1227,
"step": 2278
},
{
"epoch": 2.71,
"learning_rate": 5.60720847643641e-07,
"loss": 0.116,
"step": 2280
},
{
"epoch": 2.71,
"learning_rate": 5.524299221724993e-07,
"loss": 0.1574,
"step": 2282
},
{
"epoch": 2.72,
"learning_rate": 5.44199008282833e-07,
"loss": 0.1149,
"step": 2284
},
{
"epoch": 2.72,
"learning_rate": 5.360281582579474e-07,
"loss": 0.0964,
"step": 2286
},
{
"epoch": 2.72,
"learning_rate": 5.279174239996132e-07,
"loss": 0.1096,
"step": 2288
},
{
"epoch": 2.72,
"learning_rate": 5.198668570277443e-07,
"loss": 0.1395,
"step": 2290
},
{
"epoch": 2.73,
"learning_rate": 5.11876508480067e-07,
"loss": 0.1232,
"step": 2292
},
{
"epoch": 2.73,
"learning_rate": 5.039464291117968e-07,
"loss": 0.1222,
"step": 2294
},
{
"epoch": 2.73,
"learning_rate": 4.960766692953145e-07,
"loss": 0.16,
"step": 2296
},
{
"epoch": 2.73,
"learning_rate": 4.882672790198473e-07,
"loss": 0.1558,
"step": 2298
},
{
"epoch": 2.73,
"learning_rate": 4.805183078911524e-07,
"loss": 0.1193,
"step": 2300
},
{
"epoch": 2.74,
"learning_rate": 4.728298051312008e-07,
"loss": 0.1342,
"step": 2302
},
{
"epoch": 2.74,
"learning_rate": 4.652018195778629e-07,
"loss": 0.1598,
"step": 2304
},
{
"epoch": 2.74,
"learning_rate": 4.576343996845989e-07,
"loss": 0.1324,
"step": 2306
},
{
"epoch": 2.74,
"learning_rate": 4.5012759352015766e-07,
"loss": 0.0991,
"step": 2308
},
{
"epoch": 2.75,
"learning_rate": 4.4268144876825846e-07,
"loss": 0.1399,
"step": 2310
},
{
"epoch": 2.75,
"learning_rate": 4.352960127272987e-07,
"loss": 0.1098,
"step": 2312
},
{
"epoch": 2.75,
"learning_rate": 4.2797133231005207e-07,
"loss": 0.1343,
"step": 2314
},
{
"epoch": 2.75,
"learning_rate": 4.207074540433631e-07,
"loss": 0.1038,
"step": 2316
},
{
"epoch": 2.76,
"learning_rate": 4.1350442406786317e-07,
"loss": 0.1445,
"step": 2318
},
{
"epoch": 2.76,
"learning_rate": 4.063622881376683e-07,
"loss": 0.1484,
"step": 2320
},
{
"epoch": 2.76,
"learning_rate": 3.9928109162008953e-07,
"loss": 0.1116,
"step": 2322
},
{
"epoch": 2.76,
"learning_rate": 3.922608794953531e-07,
"loss": 0.1271,
"step": 2324
},
{
"epoch": 2.77,
"learning_rate": 3.8530169635630055e-07,
"loss": 0.1471,
"step": 2326
},
{
"epoch": 2.77,
"learning_rate": 3.7840358640812036e-07,
"loss": 0.1074,
"step": 2328
},
{
"epoch": 2.77,
"learning_rate": 3.715665934680546e-07,
"loss": 0.1571,
"step": 2330
},
{
"epoch": 2.77,
"learning_rate": 3.64790760965128e-07,
"loss": 0.0928,
"step": 2332
},
{
"epoch": 2.78,
"learning_rate": 3.580761319398729e-07,
"loss": 0.1362,
"step": 2334
},
{
"epoch": 2.78,
"learning_rate": 3.514227490440503e-07,
"loss": 0.1168,
"step": 2336
},
{
"epoch": 2.78,
"learning_rate": 3.4483065454038123e-07,
"loss": 0.1497,
"step": 2338
},
{
"epoch": 2.78,
"learning_rate": 3.3829989030228163e-07,
"loss": 0.1244,
"step": 2340
},
{
"epoch": 2.78,
"learning_rate": 3.3183049781359e-07,
"loss": 0.1058,
"step": 2342
},
{
"epoch": 2.79,
"learning_rate": 3.2542251816831237e-07,
"loss": 0.1158,
"step": 2344
},
{
"epoch": 2.79,
"learning_rate": 3.190759920703512e-07,
"loss": 0.1208,
"step": 2346
},
{
"epoch": 2.79,
"learning_rate": 3.127909598332535e-07,
"loss": 0.1214,
"step": 2348
},
{
"epoch": 2.79,
"learning_rate": 3.065674613799574e-07,
"loss": 0.1258,
"step": 2350
},
{
"epoch": 2.8,
"learning_rate": 3.0040553624252844e-07,
"loss": 0.136,
"step": 2352
},
{
"epoch": 2.8,
"learning_rate": 2.9430522356191814e-07,
"loss": 0.1553,
"step": 2354
},
{
"epoch": 2.8,
"learning_rate": 2.88266562087709e-07,
"loss": 0.164,
"step": 2356
},
{
"epoch": 2.8,
"learning_rate": 2.822895901778744e-07,
"loss": 0.1372,
"step": 2358
},
{
"epoch": 2.81,
"learning_rate": 2.7637434579853016e-07,
"loss": 0.1196,
"step": 2360
},
{
"epoch": 2.81,
"learning_rate": 2.7052086652369356e-07,
"loss": 0.1351,
"step": 2362
},
{
"epoch": 2.81,
"learning_rate": 2.6472918953504566e-07,
"loss": 0.1407,
"step": 2364
},
{
"epoch": 2.81,
"learning_rate": 2.589993516216993e-07,
"loss": 0.1181,
"step": 2366
},
{
"epoch": 2.82,
"learning_rate": 2.5333138917995714e-07,
"loss": 0.1484,
"step": 2368
},
{
"epoch": 2.82,
"learning_rate": 2.477253382130862e-07,
"loss": 0.125,
"step": 2370
},
{
"epoch": 2.82,
"learning_rate": 2.4218123433108696e-07,
"loss": 0.1309,
"step": 2372
},
{
"epoch": 2.82,
"learning_rate": 2.366991127504714e-07,
"loss": 0.1325,
"step": 2374
},
{
"epoch": 2.83,
"learning_rate": 2.3127900829403305e-07,
"loss": 0.1297,
"step": 2376
},
{
"epoch": 2.83,
"learning_rate": 2.259209553906272e-07,
"loss": 0.1242,
"step": 2378
},
{
"epoch": 2.83,
"learning_rate": 2.2062498807495669e-07,
"loss": 0.155,
"step": 2380
},
{
"epoch": 2.83,
"learning_rate": 2.1539113998735094e-07,
"loss": 0.1266,
"step": 2382
},
{
"epoch": 2.83,
"learning_rate": 2.10219444373555e-07,
"loss": 0.1459,
"step": 2384
},
{
"epoch": 2.84,
"learning_rate": 2.051099340845164e-07,
"loss": 0.1544,
"step": 2386
},
{
"epoch": 2.84,
"learning_rate": 2.000626415761786e-07,
"loss": 0.1347,
"step": 2388
},
{
"epoch": 2.84,
"learning_rate": 1.9507759890927125e-07,
"loss": 0.146,
"step": 2390
},
{
"epoch": 2.84,
"learning_rate": 1.9015483774911249e-07,
"loss": 0.1342,
"step": 2392
},
{
"epoch": 2.85,
"learning_rate": 1.8529438936540022e-07,
"loss": 0.1166,
"step": 2394
},
{
"epoch": 2.85,
"learning_rate": 1.8049628463202128e-07,
"loss": 0.1313,
"step": 2396
},
{
"epoch": 2.85,
"learning_rate": 1.7576055402685034e-07,
"loss": 0.1251,
"step": 2398
},
{
"epoch": 2.85,
"learning_rate": 1.710872276315556e-07,
"loss": 0.1077,
"step": 2400
},
{
"epoch": 2.86,
"learning_rate": 1.664763351314125e-07,
"loss": 0.1095,
"step": 2402
},
{
"epoch": 2.86,
"learning_rate": 1.619279058151102e-07,
"loss": 0.0939,
"step": 2404
},
{
"epoch": 2.86,
"learning_rate": 1.5744196857456874e-07,
"loss": 0.1118,
"step": 2406
},
{
"epoch": 2.86,
"learning_rate": 1.5301855190475445e-07,
"loss": 0.1477,
"step": 2408
},
{
"epoch": 2.87,
"learning_rate": 1.4865768390349812e-07,
"loss": 0.1073,
"step": 2410
},
{
"epoch": 2.87,
"learning_rate": 1.4435939227131712e-07,
"loss": 0.1194,
"step": 2412
},
{
"epoch": 2.87,
"learning_rate": 1.4012370431124133e-07,
"loss": 0.1222,
"step": 2414
},
{
"epoch": 2.87,
"learning_rate": 1.3595064692863757e-07,
"loss": 0.1367,
"step": 2416
},
{
"epoch": 2.88,
"learning_rate": 1.3184024663103755e-07,
"loss": 0.1182,
"step": 2418
},
{
"epoch": 2.88,
"learning_rate": 1.277925295279725e-07,
"loss": 0.1297,
"step": 2420
},
{
"epoch": 2.88,
"learning_rate": 1.2380752133080433e-07,
"loss": 0.1067,
"step": 2422
},
{
"epoch": 2.88,
"learning_rate": 1.198852473525669e-07,
"loss": 0.1483,
"step": 2424
},
{
"epoch": 2.88,
"learning_rate": 1.1602573250779958e-07,
"loss": 0.1534,
"step": 2426
},
{
"epoch": 2.89,
"learning_rate": 1.1222900131239279e-07,
"loss": 0.1177,
"step": 2428
},
{
"epoch": 2.89,
"learning_rate": 1.0849507788343038e-07,
"loss": 0.2059,
"step": 2430
},
{
"epoch": 2.89,
"learning_rate": 1.0482398593903764e-07,
"loss": 0.1369,
"step": 2432
},
{
"epoch": 2.89,
"learning_rate": 1.0121574879823015e-07,
"loss": 0.1178,
"step": 2434
},
{
"epoch": 2.9,
"learning_rate": 9.767038938076511e-08,
"loss": 0.1323,
"step": 2436
},
{
"epoch": 2.9,
"learning_rate": 9.418793020699813e-08,
"loss": 0.1272,
"step": 2438
},
{
"epoch": 2.9,
"learning_rate": 9.076839339773547e-08,
"loss": 0.112,
"step": 2440
},
{
"epoch": 2.9,
"learning_rate": 8.741180067409982e-08,
"loss": 0.1444,
"step": 2442
},
{
"epoch": 2.91,
"learning_rate": 8.411817335738482e-08,
"loss": 0.1269,
"step": 2444
},
{
"epoch": 2.91,
"learning_rate": 8.088753236892843e-08,
"loss": 0.1206,
"step": 2446
},
{
"epoch": 2.91,
"learning_rate": 7.771989822997206e-08,
"loss": 0.1349,
"step": 2448
},
{
"epoch": 2.91,
"learning_rate": 7.461529106153387e-08,
"loss": 0.1141,
"step": 2450
},
{
"epoch": 2.92,
"learning_rate": 7.15737305842823e-08,
"loss": 0.1265,
"step": 2452
},
{
"epoch": 2.92,
"learning_rate": 6.859523611840612e-08,
"loss": 0.12,
"step": 2454
},
{
"epoch": 2.92,
"learning_rate": 6.567982658349792e-08,
"loss": 0.1161,
"step": 2456
},
{
"epoch": 2.92,
"learning_rate": 6.282752049842855e-08,
"loss": 0.1389,
"step": 2458
},
{
"epoch": 2.93,
"learning_rate": 6.003833598123287e-08,
"loss": 0.1444,
"step": 2460
},
{
"epoch": 2.93,
"learning_rate": 5.731229074899203e-08,
"loss": 0.11,
"step": 2462
},
{
"epoch": 2.93,
"learning_rate": 5.464940211772574e-08,
"loss": 0.1272,
"step": 2464
},
{
"epoch": 2.93,
"learning_rate": 5.204968700227242e-08,
"loss": 0.1016,
"step": 2466
},
{
"epoch": 2.93,
"learning_rate": 4.951316191619593e-08,
"loss": 0.1521,
"step": 2468
},
{
"epoch": 2.94,
"learning_rate": 4.703984297166564e-08,
"loss": 0.142,
"step": 2470
},
{
"epoch": 2.94,
"learning_rate": 4.4629745879367634e-08,
"loss": 0.1034,
"step": 2472
},
{
"epoch": 2.94,
"learning_rate": 4.228288594839369e-08,
"loss": 0.118,
"step": 2474
},
{
"epoch": 2.94,
"learning_rate": 3.999927808615245e-08,
"loss": 0.1166,
"step": 2476
},
{
"epoch": 2.95,
"learning_rate": 3.777893679827061e-08,
"loss": 0.1518,
"step": 2478
},
{
"epoch": 2.95,
"learning_rate": 3.562187618849855e-08,
"loss": 0.1538,
"step": 2480
},
{
"epoch": 2.95,
"learning_rate": 3.352810995862932e-08,
"loss": 0.1053,
"step": 2482
},
{
"epoch": 2.95,
"learning_rate": 3.1497651408399774e-08,
"loss": 0.1095,
"step": 2484
},
{
"epoch": 2.96,
"learning_rate": 2.9530513435416243e-08,
"loss": 0.1574,
"step": 2486
},
{
"epoch": 2.96,
"learning_rate": 2.762670853506677e-08,
"loss": 0.1147,
"step": 2488
},
{
"epoch": 2.96,
"learning_rate": 2.578624880044567e-08,
"loss": 0.143,
"step": 2490
},
{
"epoch": 2.96,
"learning_rate": 2.4009145922271327e-08,
"loss": 0.0848,
"step": 2492
},
{
"epoch": 2.97,
"learning_rate": 2.2295411188819616e-08,
"loss": 0.1291,
"step": 2494
},
{
"epoch": 2.97,
"learning_rate": 2.0645055485842837e-08,
"loss": 0.132,
"step": 2496
},
{
"epoch": 2.97,
"learning_rate": 1.9058089296509762e-08,
"loss": 0.106,
"step": 2498
},
{
"epoch": 2.97,
"learning_rate": 1.753452270133238e-08,
"loss": 0.119,
"step": 2500
},
{
"epoch": 2.98,
"learning_rate": 1.6074365378105915e-08,
"loss": 0.1213,
"step": 2502
},
{
"epoch": 2.98,
"learning_rate": 1.4677626601843353e-08,
"loss": 0.1392,
"step": 2504
},
{
"epoch": 2.98,
"learning_rate": 1.3344315244722128e-08,
"loss": 0.1121,
"step": 2506
},
{
"epoch": 2.98,
"learning_rate": 1.2074439776021962e-08,
"loss": 0.1163,
"step": 2508
},
{
"epoch": 2.98,
"learning_rate": 1.0868008262076013e-08,
"loss": 0.149,
"step": 2510
},
{
"epoch": 2.99,
"learning_rate": 9.725028366214251e-09,
"loss": 0.1174,
"step": 2512
},
{
"epoch": 2.99,
"learning_rate": 8.64550734872016e-09,
"loss": 0.1226,
"step": 2514
},
{
"epoch": 2.99,
"learning_rate": 7.629452066783006e-09,
"loss": 0.1374,
"step": 2516
},
{
"epoch": 2.99,
"learning_rate": 6.6768689744500796e-09,
"loss": 0.112,
"step": 2518
},
{
"epoch": 3.0,
"learning_rate": 5.787764122592299e-09,
"loss": 0.1154,
"step": 2520
},
{
"epoch": 3.0,
"learning_rate": 4.9621431588620096e-09,
"loss": 0.1275,
"step": 2522
},
{
"epoch": 3.0,
"step": 2523,
"total_flos": 2232048114991104.0,
"train_loss": 0.2896275484415448,
"train_runtime": 144327.3604,
"train_samples_per_second": 0.559,
"train_steps_per_second": 0.017
}
],
"logging_steps": 2,
"max_steps": 2523,
"num_train_epochs": 3,
"save_steps": 1000,
"total_flos": 2232048114991104.0,
"trial_name": null,
"trial_params": null
}