{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.57446808510638, "eval_steps": 500, "global_step": 675, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 4.9999906714572185e-05, "loss": 2.0607, "step": 1 }, { "epoch": 0.21, "learning_rate": 4.999766789911305e-05, "loss": 2.0068, "step": 5 }, { "epoch": 0.43, "learning_rate": 4.999067203154777e-05, "loss": 1.8243, "step": 10 }, { "epoch": 0.55, "eval_loss": 1.6684231758117676, "eval_runtime": 4.0605, "eval_samples_per_second": 20.441, "eval_steps_per_second": 1.478, "step": 13 }, { "epoch": 1.06, "learning_rate": 4.9979013702509664e-05, "loss": 1.7052, "step": 15 }, { "epoch": 1.28, "learning_rate": 4.99626950870707e-05, "loss": 1.6022, "step": 20 }, { "epoch": 1.49, "learning_rate": 4.994171922976348e-05, "loss": 1.5291, "step": 25 }, { "epoch": 1.57, "eval_loss": 1.400346040725708, "eval_runtime": 3.9709, "eval_samples_per_second": 20.902, "eval_steps_per_second": 1.511, "step": 27 }, { "epoch": 2.13, "learning_rate": 4.991609004401324e-05, "loss": 1.4168, "step": 30 }, { "epoch": 2.34, "learning_rate": 4.988581231140772e-05, "loss": 1.312, "step": 35 }, { "epoch": 2.55, "learning_rate": 4.985089168080509e-05, "loss": 1.2355, "step": 40 }, { "epoch": 2.55, "eval_loss": 1.1807862520217896, "eval_runtime": 3.9823, "eval_samples_per_second": 20.842, "eval_steps_per_second": 1.507, "step": 40 }, { "epoch": 3.19, "learning_rate": 4.981133466728004e-05, "loss": 1.1754, "step": 45 }, { "epoch": 3.4, "learning_rate": 4.976714865090827e-05, "loss": 1.1393, "step": 50 }, { "epoch": 3.57, "eval_loss": 1.0949289798736572, "eval_runtime": 3.9683, "eval_samples_per_second": 20.916, "eval_steps_per_second": 1.512, "step": 54 }, { "epoch": 4.04, "learning_rate": 4.9718341875389625e-05, "loss": 1.123, "step": 55 }, { "epoch": 4.26, "learning_rate": 4.966492344651005e-05, "loss": 1.0919, "step": 60 }, { "epoch": 4.47, "learning_rate": 4.960690333044279e-05, "loss": 1.0659, "step": 65 }, { "epoch": 4.55, "eval_loss": 1.045737624168396, "eval_runtime": 3.9807, "eval_samples_per_second": 20.85, "eval_steps_per_second": 1.507, "step": 67 }, { "epoch": 5.11, "learning_rate": 4.9544292351888966e-05, "loss": 1.0601, "step": 70 }, { "epoch": 5.32, "learning_rate": 4.947710219205808e-05, "loss": 1.0387, "step": 75 }, { "epoch": 5.53, "learning_rate": 4.9405345386488614e-05, "loss": 1.0196, "step": 80 }, { "epoch": 5.57, "eval_loss": 1.0065257549285889, "eval_runtime": 3.9679, "eval_samples_per_second": 20.918, "eval_steps_per_second": 1.512, "step": 81 }, { "epoch": 6.17, "learning_rate": 4.9329035322709386e-05, "loss": 0.9975, "step": 85 }, { "epoch": 6.38, "learning_rate": 4.924818623774178e-05, "loss": 0.9831, "step": 90 }, { "epoch": 6.55, "eval_loss": 0.9685614109039307, "eval_runtime": 3.9721, "eval_samples_per_second": 20.896, "eval_steps_per_second": 1.511, "step": 94 }, { "epoch": 7.02, "learning_rate": 4.916281321544362e-05, "loss": 0.9791, "step": 95 }, { "epoch": 7.23, "learning_rate": 4.907293218369499e-05, "loss": 0.9494, "step": 100 }, { "epoch": 7.45, "learning_rate": 4.897855991142658e-05, "loss": 0.9281, "step": 105 }, { "epoch": 7.57, "eval_loss": 0.9254654049873352, "eval_runtime": 3.9691, "eval_samples_per_second": 20.911, "eval_steps_per_second": 1.512, "step": 108 }, { "epoch": 8.09, "learning_rate": 4.88797140054912e-05, "loss": 0.9186, "step": 110 }, { "epoch": 8.3, "learning_rate": 4.877641290737884e-05, "loss": 0.8878, "step": 115 }, { "epoch": 8.51, "learning_rate": 4.8668675889776095e-05, "loss": 0.8678, "step": 120 }, { "epoch": 8.55, "eval_loss": 0.8814197182655334, "eval_runtime": 3.978, "eval_samples_per_second": 20.865, "eval_steps_per_second": 1.508, "step": 121 }, { "epoch": 9.15, "learning_rate": 4.855652305297052e-05, "loss": 0.8535, "step": 125 }, { "epoch": 9.36, "learning_rate": 4.843997532110051e-05, "loss": 0.816, "step": 130 }, { "epoch": 9.57, "learning_rate": 4.831905443825159e-05, "loss": 0.8054, "step": 135 }, { "epoch": 9.57, "eval_loss": 0.8274821639060974, "eval_runtime": 3.9679, "eval_samples_per_second": 20.918, "eval_steps_per_second": 1.512, "step": 135 }, { "epoch": 10.21, "learning_rate": 4.819378296439961e-05, "loss": 0.7717, "step": 140 }, { "epoch": 10.43, "learning_rate": 4.806418427120179e-05, "loss": 0.7683, "step": 145 }, { "epoch": 10.55, "eval_loss": 0.7860919237136841, "eval_runtime": 3.9806, "eval_samples_per_second": 20.851, "eval_steps_per_second": 1.507, "step": 148 }, { "epoch": 11.06, "learning_rate": 4.793028253763633e-05, "loss": 0.741, "step": 150 }, { "epoch": 11.28, "learning_rate": 4.779210274549134e-05, "loss": 0.7057, "step": 155 }, { "epoch": 11.49, "learning_rate": 4.76496706747041e-05, "loss": 0.6906, "step": 160 }, { "epoch": 11.57, "eval_loss": 0.7272327542304993, "eval_runtime": 3.9685, "eval_samples_per_second": 20.915, "eval_steps_per_second": 1.512, "step": 162 }, { "epoch": 12.13, "learning_rate": 4.750301289855128e-05, "loss": 0.673, "step": 165 }, { "epoch": 12.34, "learning_rate": 4.735215677869128e-05, "loss": 0.6411, "step": 170 }, { "epoch": 12.55, "learning_rate": 4.719713046005938e-05, "loss": 0.6246, "step": 175 }, { "epoch": 12.55, "eval_loss": 0.6794944405555725, "eval_runtime": 3.9823, "eval_samples_per_second": 20.842, "eval_steps_per_second": 1.507, "step": 175 }, { "epoch": 13.19, "learning_rate": 4.703796286561679e-05, "loss": 0.6086, "step": 180 }, { "epoch": 13.4, "learning_rate": 4.687468369095457e-05, "loss": 0.5813, "step": 185 }, { "epoch": 13.57, "eval_loss": 0.6364239454269409, "eval_runtime": 3.9657, "eval_samples_per_second": 20.93, "eval_steps_per_second": 1.513, "step": 189 }, { "epoch": 14.04, "learning_rate": 4.6707323398753346e-05, "loss": 0.5586, "step": 190 }, { "epoch": 14.26, "learning_rate": 4.65359132131e-05, "loss": 0.5556, "step": 195 }, { "epoch": 14.47, "learning_rate": 4.6360485113662216e-05, "loss": 0.5253, "step": 200 }, { "epoch": 14.55, "eval_loss": 0.6078140139579773, "eval_runtime": 3.9649, "eval_samples_per_second": 20.934, "eval_steps_per_second": 1.513, "step": 202 }, { "epoch": 15.11, "learning_rate": 4.618107182972209e-05, "loss": 0.5329, "step": 205 }, { "epoch": 15.32, "learning_rate": 4.599770683406991e-05, "loss": 0.4948, "step": 210 }, { "epoch": 15.53, "learning_rate": 4.581042433675921e-05, "loss": 0.5149, "step": 215 }, { "epoch": 15.57, "eval_loss": 0.5811336636543274, "eval_runtime": 3.9681, "eval_samples_per_second": 20.917, "eval_steps_per_second": 1.512, "step": 216 }, { "epoch": 16.17, "learning_rate": 4.5619259278724214e-05, "loss": 0.4857, "step": 220 }, { "epoch": 16.38, "learning_rate": 4.542424732526105e-05, "loss": 0.4949, "step": 225 }, { "epoch": 16.55, "eval_loss": 0.5605343580245972, "eval_runtime": 3.9813, "eval_samples_per_second": 20.847, "eval_steps_per_second": 1.507, "step": 229 }, { "epoch": 17.02, "learning_rate": 4.522542485937369e-05, "loss": 0.4737, "step": 230 }, { "epoch": 17.23, "learning_rate": 4.5022828974986044e-05, "loss": 0.4675, "step": 235 }, { "epoch": 17.45, "learning_rate": 4.4816497470021454e-05, "loss": 0.4644, "step": 240 }, { "epoch": 17.57, "eval_loss": 0.5462371110916138, "eval_runtime": 3.9643, "eval_samples_per_second": 20.937, "eval_steps_per_second": 1.514, "step": 243 }, { "epoch": 18.09, "learning_rate": 4.4606468839350785e-05, "loss": 0.4565, "step": 245 }, { "epoch": 18.3, "learning_rate": 4.43927822676105e-05, "loss": 0.4365, "step": 250 }, { "epoch": 18.51, "learning_rate": 4.417547762189207e-05, "loss": 0.458, "step": 255 }, { "epoch": 18.55, "eval_loss": 0.5346133708953857, "eval_runtime": 3.9682, "eval_samples_per_second": 20.916, "eval_steps_per_second": 1.512, "step": 256 }, { "epoch": 19.15, "learning_rate": 4.395459544430407e-05, "loss": 0.449, "step": 260 }, { "epoch": 19.36, "learning_rate": 4.373017694440827e-05, "loss": 0.4401, "step": 265 }, { "epoch": 19.57, "learning_rate": 4.35022639915313e-05, "loss": 0.4294, "step": 270 }, { "epoch": 19.57, "eval_loss": 0.5201942920684814, "eval_runtime": 3.9685, "eval_samples_per_second": 20.915, "eval_steps_per_second": 1.512, "step": 270 }, { "epoch": 20.21, "learning_rate": 4.3270899106953105e-05, "loss": 0.4367, "step": 275 }, { "epoch": 20.43, "learning_rate": 4.3036125455973896e-05, "loss": 0.4143, "step": 280 }, { "epoch": 20.55, "eval_loss": 0.5177348852157593, "eval_runtime": 3.9827, "eval_samples_per_second": 20.84, "eval_steps_per_second": 1.507, "step": 283 }, { "epoch": 21.06, "learning_rate": 4.279798683986084e-05, "loss": 0.4244, "step": 285 }, { "epoch": 21.28, "learning_rate": 4.2556527687676186e-05, "loss": 0.4096, "step": 290 }, { "epoch": 21.49, "learning_rate": 4.231179304798815e-05, "loss": 0.4161, "step": 295 }, { "epoch": 21.57, "eval_loss": 0.5107729434967041, "eval_runtime": 3.9674, "eval_samples_per_second": 20.92, "eval_steps_per_second": 1.512, "step": 297 }, { "epoch": 22.13, "learning_rate": 4.206382858046636e-05, "loss": 0.4075, "step": 300 }, { "epoch": 22.34, "learning_rate": 4.181268054736318e-05, "loss": 0.4002, "step": 305 }, { "epoch": 22.55, "learning_rate": 4.1558395804882695e-05, "loss": 0.4128, "step": 310 }, { "epoch": 22.55, "eval_loss": 0.5056843757629395, "eval_runtime": 3.9783, "eval_samples_per_second": 20.863, "eval_steps_per_second": 1.508, "step": 310 }, { "epoch": 23.19, "learning_rate": 4.130102179443877e-05, "loss": 0.3928, "step": 315 }, { "epoch": 23.4, "learning_rate": 4.1040606533804024e-05, "loss": 0.4055, "step": 320 }, { "epoch": 23.57, "eval_loss": 0.5070647597312927, "eval_runtime": 3.9713, "eval_samples_per_second": 20.9, "eval_steps_per_second": 1.511, "step": 324 }, { "epoch": 24.04, "learning_rate": 4.077719860815132e-05, "loss": 0.4021, "step": 325 }, { "epoch": 24.26, "learning_rate": 4.051084716098921e-05, "loss": 0.3999, "step": 330 }, { "epoch": 24.47, "learning_rate": 4.0241601884993366e-05, "loss": 0.3937, "step": 335 }, { "epoch": 24.55, "eval_loss": 0.505768895149231, "eval_runtime": 3.9783, "eval_samples_per_second": 20.863, "eval_steps_per_second": 1.508, "step": 337 }, { "epoch": 25.11, "learning_rate": 3.996951301273557e-05, "loss": 0.3927, "step": 340 }, { "epoch": 25.32, "learning_rate": 3.969463130731183e-05, "loss": 0.377, "step": 345 }, { "epoch": 25.53, "learning_rate": 3.941700805287168e-05, "loss": 0.3967, "step": 350 }, { "epoch": 25.57, "eval_loss": 0.5016723871231079, "eval_runtime": 3.9654, "eval_samples_per_second": 20.931, "eval_steps_per_second": 1.513, "step": 351 }, { "epoch": 26.17, "learning_rate": 3.913669504505015e-05, "loss": 0.3859, "step": 355 }, { "epoch": 26.38, "learning_rate": 3.885374458130438e-05, "loss": 0.3754, "step": 360 }, { "epoch": 26.55, "eval_loss": 0.4997941851615906, "eval_runtime": 3.9766, "eval_samples_per_second": 20.872, "eval_steps_per_second": 1.509, "step": 364 }, { "epoch": 27.02, "learning_rate": 3.856820945115655e-05, "loss": 0.3853, "step": 365 }, { "epoch": 27.23, "learning_rate": 3.828014292634509e-05, "loss": 0.3852, "step": 370 }, { "epoch": 27.45, "learning_rate": 3.798959875088584e-05, "loss": 0.3742, "step": 375 }, { "epoch": 27.57, "eval_loss": 0.5018514394760132, "eval_runtime": 3.9719, "eval_samples_per_second": 20.897, "eval_steps_per_second": 1.511, "step": 378 }, { "epoch": 28.09, "learning_rate": 3.769663113104516e-05, "loss": 0.3706, "step": 380 }, { "epoch": 28.3, "learning_rate": 3.74012947252267e-05, "loss": 0.3759, "step": 385 }, { "epoch": 28.51, "learning_rate": 3.7103644633774014e-05, "loss": 0.3756, "step": 390 }, { "epoch": 28.55, "eval_loss": 0.5018544793128967, "eval_runtime": 3.9835, "eval_samples_per_second": 20.836, "eval_steps_per_second": 1.506, "step": 391 }, { "epoch": 29.15, "learning_rate": 3.680373638869047e-05, "loss": 0.3688, "step": 395 }, { "epoch": 29.36, "learning_rate": 3.6501625943278805e-05, "loss": 0.3764, "step": 400 }, { "epoch": 29.57, "learning_rate": 3.619736966170205e-05, "loss": 0.3652, "step": 405 }, { "epoch": 29.57, "eval_loss": 0.5060749650001526, "eval_runtime": 3.9634, "eval_samples_per_second": 20.942, "eval_steps_per_second": 1.514, "step": 405 }, { "epoch": 30.21, "learning_rate": 3.589102430846773e-05, "loss": 0.3688, "step": 410 }, { "epoch": 30.43, "learning_rate": 3.5582647037837445e-05, "loss": 0.3597, "step": 415 }, { "epoch": 30.55, "eval_loss": 0.5076042413711548, "eval_runtime": 3.9772, "eval_samples_per_second": 20.869, "eval_steps_per_second": 1.509, "step": 418 }, { "epoch": 31.06, "learning_rate": 3.527229538316371e-05, "loss": 0.3635, "step": 420 }, { "epoch": 31.28, "learning_rate": 3.496002724615604e-05, "loss": 0.3564, "step": 425 }, { "epoch": 31.49, "learning_rate": 3.464590088607839e-05, "loss": 0.3609, "step": 430 }, { "epoch": 31.57, "eval_loss": 0.5078853964805603, "eval_runtime": 3.9577, "eval_samples_per_second": 20.972, "eval_steps_per_second": 1.516, "step": 432 }, { "epoch": 32.13, "learning_rate": 3.4329974908879783e-05, "loss": 0.3679, "step": 435 }, { "epoch": 32.34, "learning_rate": 3.401230825626037e-05, "loss": 0.3498, "step": 440 }, { "epoch": 32.55, "learning_rate": 3.369296019467473e-05, "loss": 0.3581, "step": 445 }, { "epoch": 32.55, "eval_loss": 0.5108149647712708, "eval_runtime": 3.9725, "eval_samples_per_second": 20.894, "eval_steps_per_second": 1.51, "step": 445 }, { "epoch": 33.19, "learning_rate": 3.3371990304274656e-05, "loss": 0.3623, "step": 450 }, { "epoch": 33.4, "learning_rate": 3.304945846779346e-05, "loss": 0.3426, "step": 455 }, { "epoch": 33.57, "eval_loss": 0.511660635471344, "eval_runtime": 3.9737, "eval_samples_per_second": 20.887, "eval_steps_per_second": 1.51, "step": 459 }, { "epoch": 34.04, "learning_rate": 3.272542485937369e-05, "loss": 0.3469, "step": 460 }, { "epoch": 34.26, "learning_rate": 3.239994993334059e-05, "loss": 0.3513, "step": 465 }, { "epoch": 34.47, "learning_rate": 3.207309441292325e-05, "loss": 0.3481, "step": 470 }, { "epoch": 34.55, "eval_loss": 0.5140624046325684, "eval_runtime": 3.9754, "eval_samples_per_second": 20.878, "eval_steps_per_second": 1.509, "step": 472 }, { "epoch": 35.11, "learning_rate": 3.1744919278925605e-05, "loss": 0.3408, "step": 475 }, { "epoch": 35.32, "learning_rate": 3.1415485758349346e-05, "loss": 0.3469, "step": 480 }, { "epoch": 35.53, "learning_rate": 3.1084855312970896e-05, "loss": 0.3435, "step": 485 }, { "epoch": 35.57, "eval_loss": 0.5150405168533325, "eval_runtime": 3.9656, "eval_samples_per_second": 20.93, "eval_steps_per_second": 1.513, "step": 486 }, { "epoch": 36.17, "learning_rate": 3.075308962787466e-05, "loss": 0.3419, "step": 490 }, { "epoch": 36.38, "learning_rate": 3.0420250599944523e-05, "loss": 0.3317, "step": 495 }, { "epoch": 36.55, "eval_loss": 0.524531900882721, "eval_runtime": 3.975, "eval_samples_per_second": 20.881, "eval_steps_per_second": 1.509, "step": 499 }, { "epoch": 37.02, "learning_rate": 3.008640032631585e-05, "loss": 0.342, "step": 500 }, { "epoch": 37.23, "learning_rate": 2.9751601092790184e-05, "loss": 0.3352, "step": 505 }, { "epoch": 37.45, "learning_rate": 2.9415915362214692e-05, "loss": 0.3387, "step": 510 }, { "epoch": 37.57, "eval_loss": 0.5238548517227173, "eval_runtime": 3.9677, "eval_samples_per_second": 20.919, "eval_steps_per_second": 1.512, "step": 513 }, { "epoch": 38.09, "learning_rate": 2.907940576282856e-05, "loss": 0.3257, "step": 515 }, { "epoch": 38.3, "learning_rate": 2.874213507657861e-05, "loss": 0.3316, "step": 520 }, { "epoch": 38.51, "learning_rate": 2.840416622740617e-05, "loss": 0.332, "step": 525 }, { "epoch": 38.55, "eval_loss": 0.5318763852119446, "eval_runtime": 3.9794, "eval_samples_per_second": 20.858, "eval_steps_per_second": 1.508, "step": 526 }, { "epoch": 39.15, "learning_rate": 2.8065562269507463e-05, "loss": 0.3178, "step": 530 }, { "epoch": 39.36, "learning_rate": 2.7726386375569748e-05, "loss": 0.3287, "step": 535 }, { "epoch": 39.57, "learning_rate": 2.7386701824985255e-05, "loss": 0.3334, "step": 540 }, { "epoch": 39.57, "eval_loss": 0.5342416167259216, "eval_runtime": 3.9681, "eval_samples_per_second": 20.917, "eval_steps_per_second": 1.512, "step": 540 }, { "epoch": 40.21, "learning_rate": 2.7046571992045334e-05, "loss": 0.3303, "step": 545 }, { "epoch": 40.43, "learning_rate": 2.6706060334116777e-05, "loss": 0.323, "step": 550 }, { "epoch": 40.55, "eval_loss": 0.538831353187561, "eval_runtime": 3.9807, "eval_samples_per_second": 20.851, "eval_steps_per_second": 1.507, "step": 553 }, { "epoch": 41.06, "learning_rate": 2.636523037980275e-05, "loss": 0.3103, "step": 555 }, { "epoch": 41.28, "learning_rate": 2.6024145717090358e-05, "loss": 0.3232, "step": 560 }, { "epoch": 41.49, "learning_rate": 2.5682869981487152e-05, "loss": 0.3144, "step": 565 }, { "epoch": 41.57, "eval_loss": 0.5423159599304199, "eval_runtime": 3.9774, "eval_samples_per_second": 20.868, "eval_steps_per_second": 1.509, "step": 567 }, { "epoch": 42.13, "learning_rate": 2.5341466844148775e-05, "loss": 0.3109, "step": 570 }, { "epoch": 42.34, "learning_rate": 2.5e-05, "loss": 0.3162, "step": 575 }, { "epoch": 42.55, "learning_rate": 2.4658533155851228e-05, "loss": 0.3092, "step": 580 }, { "epoch": 42.55, "eval_loss": 0.54653400182724, "eval_runtime": 3.9802, "eval_samples_per_second": 20.853, "eval_steps_per_second": 1.507, "step": 580 }, { "epoch": 43.19, "learning_rate": 2.431713001851286e-05, "loss": 0.3175, "step": 585 }, { "epoch": 43.4, "learning_rate": 2.3975854282909644e-05, "loss": 0.3084, "step": 590 }, { "epoch": 43.57, "eval_loss": 0.5480648875236511, "eval_runtime": 3.9672, "eval_samples_per_second": 20.921, "eval_steps_per_second": 1.512, "step": 594 }, { "epoch": 44.04, "learning_rate": 2.3634769620197254e-05, "loss": 0.3039, "step": 595 }, { "epoch": 44.26, "learning_rate": 2.329393966588323e-05, "loss": 0.2977, "step": 600 }, { "epoch": 44.47, "learning_rate": 2.295342800795468e-05, "loss": 0.3091, "step": 605 }, { "epoch": 44.55, "eval_loss": 0.5604838728904724, "eval_runtime": 3.9783, "eval_samples_per_second": 20.863, "eval_steps_per_second": 1.508, "step": 607 }, { "epoch": 45.11, "learning_rate": 2.261329817501475e-05, "loss": 0.3087, "step": 610 }, { "epoch": 45.32, "learning_rate": 2.2273613624430255e-05, "loss": 0.2994, "step": 615 }, { "epoch": 45.53, "learning_rate": 2.1934437730492543e-05, "loss": 0.3044, "step": 620 }, { "epoch": 45.57, "eval_loss": 0.560636579990387, "eval_runtime": 3.9626, "eval_samples_per_second": 20.946, "eval_steps_per_second": 1.514, "step": 621 }, { "epoch": 46.17, "learning_rate": 2.159583377259384e-05, "loss": 0.2867, "step": 625 }, { "epoch": 46.38, "learning_rate": 2.1257864923421404e-05, "loss": 0.303, "step": 630 }, { "epoch": 46.55, "eval_loss": 0.5683414340019226, "eval_runtime": 3.9884, "eval_samples_per_second": 20.81, "eval_steps_per_second": 1.504, "step": 634 }, { "epoch": 47.02, "learning_rate": 2.092059423717145e-05, "loss": 0.2971, "step": 635 }, { "epoch": 47.23, "learning_rate": 2.0584084637785317e-05, "loss": 0.2923, "step": 640 }, { "epoch": 47.45, "learning_rate": 2.0248398907209826e-05, "loss": 0.2896, "step": 645 }, { "epoch": 47.57, "eval_loss": 0.572201669216156, "eval_runtime": 3.9766, "eval_samples_per_second": 20.872, "eval_steps_per_second": 1.509, "step": 648 }, { "epoch": 48.09, "learning_rate": 1.991359967368416e-05, "loss": 0.2963, "step": 650 }, { "epoch": 48.3, "learning_rate": 1.957974940005548e-05, "loss": 0.2849, "step": 655 }, { "epoch": 48.51, "learning_rate": 1.9246910372125342e-05, "loss": 0.2854, "step": 660 }, { "epoch": 48.55, "eval_loss": 0.5778339505195618, "eval_runtime": 3.9935, "eval_samples_per_second": 20.784, "eval_steps_per_second": 1.502, "step": 661 }, { "epoch": 49.15, "learning_rate": 1.8915144687029106e-05, "loss": 0.2884, "step": 665 }, { "epoch": 49.36, "learning_rate": 1.8584514241650666e-05, "loss": 0.2767, "step": 670 }, { "epoch": 49.57, "learning_rate": 1.825508072107439e-05, "loss": 0.291, "step": 675 }, { "epoch": 49.57, "eval_loss": 0.5825899839401245, "eval_runtime": 3.9815, "eval_samples_per_second": 20.846, "eval_steps_per_second": 1.507, "step": 675 }, { "epoch": 49.57, "step": 675, "total_flos": 6306831714484224.0, "train_loss": 0.5519325028525458, "train_runtime": 6501.4714, "train_samples_per_second": 5.737, "train_steps_per_second": 0.177 } ], "logging_steps": 5, "max_steps": 1150, "num_train_epochs": 50, "save_steps": 500, "total_flos": 6306831714484224.0, "trial_name": null, "trial_params": null }