diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,29163 @@ +{ + "best_metric": 5.630408763885498, + "best_model_checkpoint": "Ghazal-L/checkpoint-7791000", + "epoch": 2.4790997910777524, + "eval_steps": 500, + "global_step": 9431500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999978722176e-05, + "loss": 8.6812, + "step": 5000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999989221511e-05, + "loss": 7.9277, + "step": 10000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999739178886e-05, + "loss": 7.5087, + "step": 15000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999951960287e-05, + "loss": 7.2437, + "step": 20000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999233550944e-05, + "loss": 7.0395, + "step": 25000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999888083142e-05, + "loss": 6.8838, + "step": 30000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999998461726527e-05, + "loss": 6.7713, + "step": 35000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999797600462e-05, + "loss": 6.6587, + "step": 40000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999742374291e-05, + "loss": 6.5715, + "step": 45000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999680507188e-05, + "loss": 6.4858, + "step": 50000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999996119600115e-05, + "loss": 6.4163, + "step": 55000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999995367719034e-05, + "loss": 6.3559, + "step": 60000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999994549298155e-05, + "loss": 6.2706, + "step": 65000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999993664337485e-05, + "loss": 6.2375, + "step": 70000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999271303407e-05, + "loss": 6.1624, + "step": 75000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999169500715e-05, + "loss": 6.132, + "step": 80000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999061044045e-05, + "loss": 6.0399, + "step": 85000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998945933397e-05, + "loss": 6.0234, + "step": 90000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998824168772e-05, + "loss": 5.9853, + "step": 95000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998695776529e-05, + "loss": 5.9428, + "step": 100000 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999856067759e-05, + "loss": 5.927, + "step": 105000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998418951035e-05, + "loss": 5.8845, + "step": 110000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998270600857e-05, + "loss": 5.8365, + "step": 115000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998115535997e-05, + "loss": 5.8354, + "step": 120000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997953847514e-05, + "loss": 5.8035, + "step": 125000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997785505056e-05, + "loss": 5.772, + "step": 130000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997610508625e-05, + "loss": 5.7249, + "step": 135000 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999742885822e-05, + "loss": 5.7336, + "step": 140000 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999724055384e-05, + "loss": 5.6806, + "step": 145000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997045595489e-05, + "loss": 5.6693, + "step": 150000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996844024168e-05, + "loss": 5.6532, + "step": 155000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999966357168684e-05, + "loss": 5.6243, + "step": 160000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996420840268e-05, + "loss": 5.6043, + "step": 165000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996199267362e-05, + "loss": 5.5726, + "step": 170000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995971040484e-05, + "loss": 5.5617, + "step": 175000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999957361119776e-05, + "loss": 5.562, + "step": 180000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999954945758306e-05, + "loss": 5.5073, + "step": 185000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995246385716e-05, + "loss": 5.5126, + "step": 190000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994991593288e-05, + "loss": 5.5045, + "step": 195000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994730043585e-05, + "loss": 5.4829, + "step": 200000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994461891568e-05, + "loss": 5.4754, + "step": 205000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994187085588e-05, + "loss": 5.4791, + "step": 210000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993905625642e-05, + "loss": 5.4468, + "step": 215000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993617511732e-05, + "loss": 5.4435, + "step": 220000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993322743859e-05, + "loss": 5.4306, + "step": 225000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993021382997e-05, + "loss": 5.4099, + "step": 230000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992713246226e-05, + "loss": 5.4021, + "step": 235000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992398516467e-05, + "loss": 5.3877, + "step": 240000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992077197718e-05, + "loss": 5.3769, + "step": 245000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991749161371e-05, + "loss": 5.3719, + "step": 250000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999914144710673e-05, + "loss": 5.3718, + "step": 255000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999910730578436e-05, + "loss": 5.3472, + "step": 260000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990725058294e-05, + "loss": 5.3371, + "step": 265000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990370476414e-05, + "loss": 5.3296, + "step": 270000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990009097329e-05, + "loss": 5.3426, + "step": 275000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989641210204e-05, + "loss": 5.3135, + "step": 280000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989266596169e-05, + "loss": 5.3124, + "step": 285000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999888852512325e-05, + "loss": 5.3119, + "step": 290000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988497327964e-05, + "loss": 5.3043, + "step": 295000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999881027507466e-05, + "loss": 5.3108, + "step": 300000 + }, + { + "epoch": 0.01, + "learning_rate": 4.99998770151958e-05, + "loss": 5.2972, + "step": 305000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999987293552191e-05, + "loss": 5.265, + "step": 310000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999868790954055e-05, + "loss": 5.2639, + "step": 315000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999986457817462e-05, + "loss": 5.2773, + "step": 320000 + }, + { + "epoch": 0.01, + "learning_rate": 4.99998602996918e-05, + "loss": 5.2507, + "step": 325000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999985595466956e-05, + "loss": 5.2486, + "step": 330000 + }, + { + "epoch": 0.01, + "learning_rate": 4.99998515431079e-05, + "loss": 5.2741, + "step": 335000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999984706500683e-05, + "loss": 5.2326, + "step": 340000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999984251945041e-05, + "loss": 5.2543, + "step": 345000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999837909186514e-05, + "loss": 5.2391, + "step": 350000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999983323052472e-05, + "loss": 5.2242, + "step": 355000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999982848625283e-05, + "loss": 5.2275, + "step": 360000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999823675441594e-05, + "loss": 5.2365, + "step": 365000 + }, + { + "epoch": 0.01, + "learning_rate": 4.99998187971085e-05, + "loss": 5.2114, + "step": 370000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999981385420113e-05, + "loss": 5.2166, + "step": 375000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999808843771935e-05, + "loss": 5.2035, + "step": 380000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999980376578098e-05, + "loss": 5.1739, + "step": 385000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999979862329567e-05, + "loss": 5.2064, + "step": 390000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999979341219955e-05, + "loss": 5.2046, + "step": 395000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999978813559995e-05, + "loss": 5.1882, + "step": 400000 + }, + { + "epoch": 0.01, + "learning_rate": 4.99997827924611e-05, + "loss": 5.1813, + "step": 405000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999977738387206e-05, + "loss": 5.1692, + "step": 410000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999977190656576e-05, + "loss": 5.1503, + "step": 415000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999766363809286e-05, + "loss": 5.1606, + "step": 420000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999976075338466e-05, + "loss": 5.1564, + "step": 425000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999755077536534e-05, + "loss": 5.1689, + "step": 430000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999974933514925e-05, + "loss": 5.157, + "step": 435000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999974352622283e-05, + "loss": 5.1639, + "step": 440000 + }, + { + "epoch": 0.01, + "learning_rate": 4.99997376507573e-05, + "loss": 5.1809, + "step": 445000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999973170875266e-05, + "loss": 5.1484, + "step": 450000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999725700208924e-05, + "loss": 5.1759, + "step": 455000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999719623903964e-05, + "loss": 5.1526, + "step": 460000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999713483504266e-05, + "loss": 5.1495, + "step": 465000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999970727659215e-05, + "loss": 5.1589, + "step": 470000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999970099938135e-05, + "loss": 5.1373, + "step": 475000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999969466067993e-05, + "loss": 5.155, + "step": 480000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999688251626596e-05, + "loss": 5.1218, + "step": 485000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999968177861174e-05, + "loss": 5.1333, + "step": 490000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999967523776921e-05, + "loss": 5.1422, + "step": 495000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999966862773041e-05, + "loss": 5.1352, + "step": 500000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999966195378339e-05, + "loss": 5.1379, + "step": 505000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999655214652764e-05, + "loss": 5.1377, + "step": 510000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999964840627267e-05, + "loss": 5.1024, + "step": 515000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999964153409091e-05, + "loss": 5.1358, + "step": 520000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999963459400171e-05, + "loss": 5.1273, + "step": 525000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999962758596512e-05, + "loss": 5.1146, + "step": 530000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999620514206824e-05, + "loss": 5.119, + "step": 535000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999961337306601e-05, + "loss": 5.109, + "step": 540000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999606168256764e-05, + "loss": 5.098, + "step": 545000 + }, + { + "epoch": 0.01, + "learning_rate": 4.99995988954736e-05, + "loss": 5.1051, + "step": 550000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999959155615169e-05, + "loss": 5.1003, + "step": 555000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958415029108e-05, + "loss": 5.0914, + "step": 560000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999576677891766e-05, + "loss": 5.0988, + "step": 565000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999956913743872e-05, + "loss": 5.1097, + "step": 570000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999956153194876e-05, + "loss": 5.0982, + "step": 575000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999955385992017e-05, + "loss": 5.0839, + "step": 580000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999954612135296e-05, + "loss": 5.0799, + "step": 585000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999953831781546e-05, + "loss": 5.0851, + "step": 590000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999953044460278e-05, + "loss": 5.0826, + "step": 595000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999952250641985e-05, + "loss": 5.0825, + "step": 600000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999951450330663e-05, + "loss": 5.0746, + "step": 605000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999950643043841e-05, + "loss": 5.0877, + "step": 610000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999949829263996e-05, + "loss": 5.0676, + "step": 615000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999949008830302e-05, + "loss": 5.0661, + "step": 620000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999948181742765e-05, + "loss": 5.0697, + "step": 625000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999473478339056e-05, + "loss": 5.1043, + "step": 630000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999465074373544e-05, + "loss": 5.0648, + "step": 635000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999945660216822e-05, + "loss": 5.0778, + "step": 640000 + }, + { + "epoch": 0.01, + "learning_rate": 4.99994480633979e-05, + "loss": 5.0595, + "step": 645000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999439459790695e-05, + "loss": 5.1031, + "step": 650000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999430791386565e-05, + "loss": 5.067, + "step": 655000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999942205471607e-05, + "loss": 5.0772, + "step": 660000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999941325150732e-05, + "loss": 5.0867, + "step": 665000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999404379979006e-05, + "loss": 5.0644, + "step": 670000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999939544368049e-05, + "loss": 5.0614, + "step": 675000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999938644084376e-05, + "loss": 5.0724, + "step": 680000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999377369647616e-05, + "loss": 5.0891, + "step": 685000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999368235555845e-05, + "loss": 5.0768, + "step": 690000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999935903125681e-05, + "loss": 5.0706, + "step": 695000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999349764115444e-05, + "loss": 5.0672, + "step": 700000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999340426713607e-05, + "loss": 5.0637, + "step": 705000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999933102652275e-05, + "loss": 5.07, + "step": 710000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999932155601822e-05, + "loss": 5.0515, + "step": 715000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999931202277797e-05, + "loss": 5.0552, + "step": 720000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999930241917085e-05, + "loss": 5.0438, + "step": 725000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999929275094023e-05, + "loss": 5.0637, + "step": 730000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999283016171696e-05, + "loss": 5.079, + "step": 735000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999927321289756e-05, + "loss": 5.0587, + "step": 740000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999926334305893e-05, + "loss": 5.0578, + "step": 745000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999925340865016e-05, + "loss": 5.0345, + "step": 750000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999924340569593e-05, + "loss": 5.042, + "step": 755000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999923333819825e-05, + "loss": 5.0546, + "step": 760000 + }, + { + "epoch": 0.01, + "learning_rate": 4.99992232041628e-05, + "loss": 5.0553, + "step": 765000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999213001542037e-05, + "loss": 5.0472, + "step": 770000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999920273853963e-05, + "loss": 5.0469, + "step": 775000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999192400755945e-05, + "loss": 5.0548, + "step": 780000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999918200055641e-05, + "loss": 5.0352, + "step": 785000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999917153592009e-05, + "loss": 5.0211, + "step": 790000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999161000544494e-05, + "loss": 5.046, + "step": 795000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999150400732165e-05, + "loss": 5.0437, + "step": 800000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999913973652307e-05, + "loss": 5.0523, + "step": 805000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999912900149492e-05, + "loss": 5.0482, + "step": 810000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999911820207005e-05, + "loss": 5.0536, + "step": 815000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999910733392701e-05, + "loss": 5.0585, + "step": 820000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999096403607983e-05, + "loss": 5.049, + "step": 825000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999908540457083e-05, + "loss": 5.0388, + "step": 830000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999907433677567e-05, + "loss": 5.0137, + "step": 835000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999906320688447e-05, + "loss": 5.0421, + "step": 840000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905200823532e-05, + "loss": 5.0273, + "step": 845000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999904074078827e-05, + "loss": 5.0336, + "step": 850000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999029409051265e-05, + "loss": 5.044, + "step": 855000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999901801077705e-05, + "loss": 5.0324, + "step": 860000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999900654596564e-05, + "loss": 5.024, + "step": 865000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9998995016930915e-05, + "loss": 5.0345, + "step": 870000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9998983416731374e-05, + "loss": 5.0391, + "step": 875000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999897175230858e-05, + "loss": 5.0455, + "step": 880000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9998960021348724e-05, + "loss": 5.0014, + "step": 885000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999894822621893e-05, + "loss": 5.0323, + "step": 890000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9998936359817934e-05, + "loss": 5.0319, + "step": 895000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999892442685334e-05, + "loss": 5.02, + "step": 900000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9998912432139255e-05, + "loss": 4.9959, + "step": 905000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999890036607419e-05, + "loss": 5.0338, + "step": 910000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999888823587929e-05, + "loss": 5.0296, + "step": 915000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999887603914754e-05, + "loss": 5.0287, + "step": 920000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999886377587899e-05, + "loss": 5.0292, + "step": 925000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998851443600044e-05, + "loss": 5.0319, + "step": 930000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999883904724465e-05, + "loss": 5.0326, + "step": 935000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998826584352544e-05, + "loss": 5.0304, + "step": 940000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999881405241021e-05, + "loss": 5.0287, + "step": 945000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999880145895832e-05, + "loss": 5.0116, + "step": 950000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999878879391611e-05, + "loss": 5.012, + "step": 955000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999877606486417e-05, + "loss": 5.0323, + "step": 960000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998763269275696e-05, + "loss": 5.005, + "step": 965000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998750407150696e-05, + "loss": 5.0243, + "step": 970000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999873747848923e-05, + "loss": 5.0379, + "step": 975000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999872448589805e-05, + "loss": 5.0232, + "step": 980000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998711421557005e-05, + "loss": 5.0341, + "step": 985000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999869829328632e-05, + "loss": 5.0154, + "step": 990000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999868509847928e-05, + "loss": 5.0392, + "step": 995000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999867183447596e-05, + "loss": 5.0064, + "step": 1000000 + }, + { + "epoch": 0.02, + "eval_loss": 5.773796558380127, + "eval_runtime": 79570.5409, + "eval_samples_per_second": 139.284, + "eval_steps_per_second": 27.857, + "step": 1000000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998658506583055e-05, + "loss": 5.0198, + "step": 1005000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998645114840526e-05, + "loss": 5.0128, + "step": 1010000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999863165118858e-05, + "loss": 5.008, + "step": 1015000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999861812097383e-05, + "loss": 5.0197, + "step": 1020000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998604526922896e-05, + "loss": 5.0019, + "step": 1025000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999859086633587e-05, + "loss": 5.0375, + "step": 1030000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998577139212795e-05, + "loss": 4.9895, + "step": 1035000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999856334555371e-05, + "loss": 4.9852, + "step": 1040000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998549485358634e-05, + "loss": 4.9984, + "step": 1045000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998535558627615e-05, + "loss": 5.0064, + "step": 1050000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998521568167126e-05, + "loss": 5.008, + "step": 1055000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998507505557915e-05, + "loss": 4.9982, + "step": 1060000 + }, + { + "epoch": 0.02, + "learning_rate": 4.99984933792193e-05, + "loss": 5.0093, + "step": 1065000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999847918349852e-05, + "loss": 5.0144, + "step": 1070000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999846492693473e-05, + "loss": 5.0098, + "step": 1075000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999845060098885e-05, + "loss": 5.0126, + "step": 1080000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998436208507295e-05, + "loss": 4.993, + "step": 1085000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999842175238971e-05, + "loss": 5.008, + "step": 1090000 + }, + { + "epoch": 0.02, + "learning_rate": 4.99984072239373e-05, + "loss": 5.0045, + "step": 1095000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998392631848933e-05, + "loss": 5.0101, + "step": 1100000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999837797322505e-05, + "loss": 4.991, + "step": 1105000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999836324511281e-05, + "loss": 4.994, + "step": 1110000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999834845340468e-05, + "loss": 5.0062, + "step": 1115000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999833359218165e-05, + "loss": 4.9875, + "step": 1120000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999831866738943e-05, + "loss": 5.0169, + "step": 1125000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998303679068e-05, + "loss": 5.0026, + "step": 1130000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998288618199054e-05, + "loss": 5.0003, + "step": 1135000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999827349683371e-05, + "loss": 5.0057, + "step": 1140000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998258305913745e-05, + "loss": 5.013, + "step": 1145000 + }, + { + "epoch": 0.02, + "learning_rate": 4.99982430484586e-05, + "loss": 4.9979, + "step": 1150000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998227724468336e-05, + "loss": 5.0058, + "step": 1155000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999821233394298e-05, + "loss": 5.0132, + "step": 1160000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998196876882586e-05, + "loss": 5.0015, + "step": 1165000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999818135328719e-05, + "loss": 4.9969, + "step": 1170000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999816576003089e-05, + "loss": 4.984, + "step": 1175000 + }, + { + "epoch": 0.02, + "learning_rate": 4.99981501033523e-05, + "loss": 4.9961, + "step": 1180000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999813438013882e-05, + "loss": 5.0074, + "step": 1185000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999811859039052e-05, + "loss": 5.0037, + "step": 1190000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999810273410741e-05, + "loss": 4.9901, + "step": 1195000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998086811289546e-05, + "loss": 4.9886, + "step": 1200000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999807081873117e-05, + "loss": 4.9975, + "step": 1205000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999805476604974e-05, + "loss": 4.9744, + "step": 1210000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998038640395454e-05, + "loss": 4.9892, + "step": 1215000 + }, + { + "epoch": 0.02, + "learning_rate": 4.99980224514257e-05, + "loss": 4.9925, + "step": 1220000 + }, + { + "epoch": 0.02, + "learning_rate": 4.99980061959214e-05, + "loss": 4.9752, + "step": 1225000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999798987388261e-05, + "loss": 5.0102, + "step": 1230000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997973482023686e-05, + "loss": 4.9919, + "step": 1235000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999795702690272e-05, + "loss": 4.9995, + "step": 1240000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999794050524739e-05, + "loss": 4.979, + "step": 1245000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999792391373212e-05, + "loss": 5.0021, + "step": 1250000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999790726233381e-05, + "loss": 4.9852, + "step": 1255000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999789053772341e-05, + "loss": 4.9791, + "step": 1260000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997873749917744e-05, + "loss": 4.9933, + "step": 1265000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999785689557794e-05, + "loss": 5.0006, + "step": 1270000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999783997470404e-05, + "loss": 4.989, + "step": 1275000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997822990701585e-05, + "loss": 5.0154, + "step": 1280000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997805933354134e-05, + "loss": 4.9763, + "step": 1285000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997788809446075e-05, + "loss": 4.9917, + "step": 1290000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999777162242293e-05, + "loss": 4.9771, + "step": 1295000 + }, + { + "epoch": 0.02, + "learning_rate": 4.99977543688659e-05, + "loss": 4.9696, + "step": 1300000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997737045302987e-05, + "loss": 4.9729, + "step": 1305000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997719662150425e-05, + "loss": 4.9715, + "step": 1310000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999770220899206e-05, + "loss": 4.9749, + "step": 1315000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997684689300004e-05, + "loss": 4.9947, + "step": 1320000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999766710307432e-05, + "loss": 4.9725, + "step": 1325000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999764945031503e-05, + "loss": 4.9766, + "step": 1330000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997631731022206e-05, + "loss": 4.9832, + "step": 1335000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999761394519587e-05, + "loss": 4.9818, + "step": 1340000 + }, + { + "epoch": 0.02, + "learning_rate": 4.99975960928361e-05, + "loss": 4.9924, + "step": 1345000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997578177534776e-05, + "loss": 5.0034, + "step": 1350000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997560188516365e-05, + "loss": 4.9997, + "step": 1355000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997542136556516e-05, + "loss": 5.0057, + "step": 1360000 + }, + { + "epoch": 0.02, + "learning_rate": 4.99975240144316e-05, + "loss": 4.9847, + "step": 1365000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999750583303709e-05, + "loss": 4.9622, + "step": 1370000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997487581477596e-05, + "loss": 4.9807, + "step": 1375000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999746925971325e-05, + "loss": 4.9725, + "step": 1380000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999745087507427e-05, + "loss": 4.9928, + "step": 1385000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999743242390227e-05, + "loss": 4.9988, + "step": 1390000 + }, + { + "epoch": 0.02, + "learning_rate": 4.99974139061973e-05, + "loss": 4.9705, + "step": 1395000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997395321959415e-05, + "loss": 4.9875, + "step": 1400000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999737667118866e-05, + "loss": 4.9837, + "step": 1405000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999735795388508e-05, + "loss": 4.9921, + "step": 1410000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999733917004874e-05, + "loss": 5.0016, + "step": 1415000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997320323457906e-05, + "loss": 4.9757, + "step": 1420000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999730140656948e-05, + "loss": 4.9827, + "step": 1425000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997282426953294e-05, + "loss": 4.9861, + "step": 1430000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999726337319482e-05, + "loss": 4.9724, + "step": 1435000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997244256708686e-05, + "loss": 4.9922, + "step": 1440000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999722507369008e-05, + "loss": 4.9678, + "step": 1445000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997205824139066e-05, + "loss": 4.9937, + "step": 1450000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999718650805568e-05, + "loss": 4.986, + "step": 1455000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999716712155525e-05, + "loss": 4.9758, + "step": 1460000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997147672393976e-05, + "loss": 4.9773, + "step": 1465000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999712816061185e-05, + "loss": 4.9762, + "step": 1470000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999710857447485e-05, + "loss": 4.9724, + "step": 1475000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999708892177912e-05, + "loss": 4.9772, + "step": 1480000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999706920647601e-05, + "loss": 4.9673, + "step": 1485000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997049424640896e-05, + "loss": 4.9662, + "step": 1490000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997029580251743e-05, + "loss": 4.9891, + "step": 1495000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999700966137486e-05, + "loss": 4.9728, + "step": 1500000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9996989675939515e-05, + "loss": 5.0021, + "step": 1505000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999696963198146e-05, + "loss": 4.9789, + "step": 1510000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999694951748713e-05, + "loss": 4.9816, + "step": 1515000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999692933241664e-05, + "loss": 4.9701, + "step": 1520000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999690908484568e-05, + "loss": 4.9786, + "step": 1525000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999688876667205e-05, + "loss": 4.9885, + "step": 1530000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999686839010909e-05, + "loss": 4.9945, + "step": 1535000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999684793884584e-05, + "loss": 4.9886, + "step": 1540000 + }, + { + "epoch": 0.03, + "learning_rate": 4.99968274251356e-05, + "loss": 4.9804, + "step": 1545000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996806844894e-05, + "loss": 4.9786, + "step": 1550000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996786198121085e-05, + "loss": 4.9756, + "step": 1555000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996765484816925e-05, + "loss": 4.9923, + "step": 1560000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999674470914586e-05, + "loss": 4.9675, + "step": 1565000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999672385861509e-05, + "loss": 4.9914, + "step": 1570000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996702949908427e-05, + "loss": 4.9874, + "step": 1575000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996681966288915e-05, + "loss": 4.9805, + "step": 1580000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996660924546876e-05, + "loss": 4.9592, + "step": 1585000 + }, + { + "epoch": 0.03, + "learning_rate": 4.99966398120697e-05, + "loss": 4.9766, + "step": 1590000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999661862881752e-05, + "loss": 4.9733, + "step": 1595000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999659738326537e-05, + "loss": 4.9693, + "step": 1600000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999657607545326e-05, + "loss": 4.9853, + "step": 1605000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996554696852985e-05, + "loss": 4.958, + "step": 1610000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999653324742468e-05, + "loss": 4.9694, + "step": 1615000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996511740060606e-05, + "loss": 4.9776, + "step": 1620000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999649016186861e-05, + "loss": 4.9912, + "step": 1625000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999646852148348e-05, + "loss": 4.9824, + "step": 1630000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996446805893296e-05, + "loss": 4.9809, + "step": 1635000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999642502374613e-05, + "loss": 4.9915, + "step": 1640000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999640318379659e-05, + "loss": 4.9845, + "step": 1645000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996381272952867e-05, + "loss": 4.9653, + "step": 1650000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999635929557897e-05, + "loss": 4.9847, + "step": 1655000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999633725167497e-05, + "loss": 4.9643, + "step": 1660000 + }, + { + "epoch": 0.03, + "learning_rate": 4.99963151368104e-05, + "loss": 4.9823, + "step": 1665000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996292959833035e-05, + "loss": 4.9676, + "step": 1670000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996270716325734e-05, + "loss": 4.969, + "step": 1675000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996248406288554e-05, + "loss": 4.9867, + "step": 1680000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999622602972156e-05, + "loss": 4.964, + "step": 1685000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996203586624806e-05, + "loss": 4.9894, + "step": 1690000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999618108150874e-05, + "loss": 4.9831, + "step": 1695000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996158505365965e-05, + "loss": 4.9843, + "step": 1700000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999613586269361e-05, + "loss": 4.9802, + "step": 1705000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999611315349173e-05, + "loss": 4.9813, + "step": 1710000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996090377760404e-05, + "loss": 4.9926, + "step": 1715000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999606753092274e-05, + "loss": 4.9658, + "step": 1720000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999604462211936e-05, + "loss": 4.9687, + "step": 1725000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999602164678672e-05, + "loss": 4.9948, + "step": 1730000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999599860492486e-05, + "loss": 4.9653, + "step": 1735000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995975496533836e-05, + "loss": 4.9896, + "step": 1740000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995952316970235e-05, + "loss": 4.9673, + "step": 1745000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995929080164594e-05, + "loss": 4.9952, + "step": 1750000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995905767516375e-05, + "loss": 4.9687, + "step": 1755000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995882397679486e-05, + "loss": 4.9719, + "step": 1760000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995858951946894e-05, + "loss": 4.9668, + "step": 1765000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995835449079e-05, + "loss": 4.9714, + "step": 1770000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995811870262294e-05, + "loss": 4.9448, + "step": 1775000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999578822962698e-05, + "loss": 4.9636, + "step": 1780000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995764527213054e-05, + "loss": 4.9678, + "step": 1785000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999574074877063e-05, + "loss": 4.9727, + "step": 1790000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999571690854973e-05, + "loss": 4.9729, + "step": 1795000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999569300180042e-05, + "loss": 4.9859, + "step": 1800000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995669023719547e-05, + "loss": 4.9872, + "step": 1805000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995644993533394e-05, + "loss": 4.9602, + "step": 1810000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999562088238272e-05, + "loss": 4.9965, + "step": 1815000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995596709520443e-05, + "loss": 4.9745, + "step": 1820000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999557246527361e-05, + "loss": 4.9714, + "step": 1825000 + }, + { + "epoch": 0.03, + "learning_rate": 4.99955481642117e-05, + "loss": 4.9821, + "step": 1830000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999552379176537e-05, + "loss": 4.9864, + "step": 1835000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999549934789474e-05, + "loss": 4.9616, + "step": 1840000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995474847289105e-05, + "loss": 4.988, + "step": 1845000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995450275259297e-05, + "loss": 4.942, + "step": 1850000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995425636701806e-05, + "loss": 4.9557, + "step": 1855000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995400931616674e-05, + "loss": 4.9699, + "step": 1860000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995376160003995e-05, + "loss": 4.96, + "step": 1865000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999535131688754e-05, + "loss": 4.981, + "step": 1870000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995326412206615e-05, + "loss": 4.9691, + "step": 1875000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999530145100412e-05, + "loss": 4.9658, + "step": 1880000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995276408278954e-05, + "loss": 4.974, + "step": 1885000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999525130402946e-05, + "loss": 4.9582, + "step": 1890000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995226133252794e-05, + "loss": 4.9645, + "step": 1895000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999520090100517e-05, + "loss": 4.9447, + "step": 1900000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999517559718768e-05, + "loss": 4.9521, + "step": 1905000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995150226843245e-05, + "loss": 4.9638, + "step": 1910000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995124789971906e-05, + "loss": 4.9675, + "step": 1915000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995099286573746e-05, + "loss": 4.9582, + "step": 1920000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999507372177151e-05, + "loss": 4.9748, + "step": 1925000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995048085333206e-05, + "loss": 4.9858, + "step": 1930000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999502238236828e-05, + "loss": 4.9623, + "step": 1935000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999499661287681e-05, + "loss": 4.959, + "step": 1940000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994970776858845e-05, + "loss": 4.9808, + "step": 1945000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994944869125225e-05, + "loss": 4.9774, + "step": 1950000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994918905243735e-05, + "loss": 4.9379, + "step": 1955000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999489286964673e-05, + "loss": 4.9793, + "step": 1960000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994866762294347e-05, + "loss": 4.9586, + "step": 1965000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999484059887416e-05, + "loss": 4.9688, + "step": 1970000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999481435844294e-05, + "loss": 4.9639, + "step": 1975000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994788051459107e-05, + "loss": 4.9492, + "step": 1980000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994761683205125e-05, + "loss": 4.9648, + "step": 1985000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994735248425283e-05, + "loss": 4.9681, + "step": 1990000 + }, + { + "epoch": 0.03, + "learning_rate": 4.99947087524287e-05, + "loss": 4.9756, + "step": 1995000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999468218461066e-05, + "loss": 4.9719, + "step": 2000000 + }, + { + "epoch": 0.03, + "eval_loss": 5.719051361083984, + "eval_runtime": 82549.8142, + "eval_samples_per_second": 134.257, + "eval_steps_per_second": 26.851, + "step": 2000000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994655544931316e-05, + "loss": 4.9601, + "step": 2005000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999462884404875e-05, + "loss": 4.9826, + "step": 2010000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994602082002936e-05, + "loss": 4.9646, + "step": 2015000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994575248082746e-05, + "loss": 4.9528, + "step": 2020000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999454834763717e-05, + "loss": 4.9636, + "step": 2025000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999452138606852e-05, + "loss": 4.9577, + "step": 2030000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999449434175471e-05, + "loss": 4.9785, + "step": 2035000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994467241720164e-05, + "loss": 4.969, + "step": 2040000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994440075160553e-05, + "loss": 4.9579, + "step": 2045000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994412836620476e-05, + "loss": 4.9636, + "step": 2050000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994385536997614e-05, + "loss": 4.945, + "step": 2055000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999435817084989e-05, + "loss": 4.9918, + "step": 2060000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999433073817738e-05, + "loss": 4.9331, + "step": 2065000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994303244488855e-05, + "loss": 4.943, + "step": 2070000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999427567325829e-05, + "loss": 4.9542, + "step": 2075000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999424805208249e-05, + "loss": 4.9669, + "step": 2080000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999422034778957e-05, + "loss": 4.9442, + "step": 2085000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999419258806948e-05, + "loss": 4.9446, + "step": 2090000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999416475070117e-05, + "loss": 4.9613, + "step": 2095000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994136852370523e-05, + "loss": 4.9802, + "step": 2100000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999410888751567e-05, + "loss": 4.9685, + "step": 2105000 + }, + { + "epoch": 0.03, + "learning_rate": 4.99940808561367e-05, + "loss": 4.9578, + "step": 2110000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999405276386218e-05, + "loss": 4.9618, + "step": 2115000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9994024593806696e-05, + "loss": 4.9633, + "step": 2120000 + }, + { + "epoch": 0.03, + "learning_rate": 4.999399636285581e-05, + "loss": 4.9497, + "step": 2125000 + }, + { + "epoch": 0.03, + "learning_rate": 4.99939680653811e-05, + "loss": 4.9753, + "step": 2130000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999393970138264e-05, + "loss": 4.9638, + "step": 2135000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9993911270860516e-05, + "loss": 4.9457, + "step": 2140000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999388276810644e-05, + "loss": 4.9599, + "step": 2145000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999385421024554e-05, + "loss": 4.9752, + "step": 2150000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999382558015284e-05, + "loss": 4.9622, + "step": 2155000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999379687778851e-05, + "loss": 4.9644, + "step": 2160000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999376810887425e-05, + "loss": 4.9546, + "step": 2165000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9993739279185063e-05, + "loss": 4.9692, + "step": 2170000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9993710388760946e-05, + "loss": 4.9523, + "step": 2175000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999368142023735e-05, + "loss": 4.9523, + "step": 2180000 + }, + { + "epoch": 0.04, + "learning_rate": 4.99936523967938e-05, + "loss": 4.9624, + "step": 2185000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9993623301025824e-05, + "loss": 4.9495, + "step": 2190000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999359413289357e-05, + "loss": 4.9382, + "step": 2195000 + }, + { + "epoch": 0.04, + "learning_rate": 4.99935649040667e-05, + "loss": 4.9535, + "step": 2200000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999353561458523e-05, + "loss": 4.9521, + "step": 2205000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999350624684502e-05, + "loss": 4.9546, + "step": 2210000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999347681845037e-05, + "loss": 4.9624, + "step": 2215000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999344732353327e-05, + "loss": 4.9458, + "step": 2220000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9993417762093806e-05, + "loss": 4.9396, + "step": 2225000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999338813413207e-05, + "loss": 4.9561, + "step": 2230000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999335843964812e-05, + "loss": 4.929, + "step": 2235000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999332867864205e-05, + "loss": 4.9538, + "step": 2240000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999329884513938e-05, + "loss": 4.9436, + "step": 2245000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999326895107599e-05, + "loss": 4.9516, + "step": 2250000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9993238996491875e-05, + "loss": 4.9513, + "step": 2255000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999320896338361e-05, + "loss": 4.9562, + "step": 2260000 + }, + { + "epoch": 0.04, + "learning_rate": 4.99931788697548e-05, + "loss": 4.9438, + "step": 2265000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999314871564543e-05, + "loss": 4.9385, + "step": 2270000 + }, + { + "epoch": 0.04, + "learning_rate": 4.99931184829323e-05, + "loss": 4.9435, + "step": 2275000 + }, + { + "epoch": 0.04, + "learning_rate": 4.99930881958065e-05, + "loss": 4.9489, + "step": 2280000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9993057836104884e-05, + "loss": 4.9593, + "step": 2285000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999302740988195e-05, + "loss": 4.969, + "step": 2290000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999299691103012e-05, + "loss": 4.9469, + "step": 2295000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999296634563051e-05, + "loss": 4.9486, + "step": 2300000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9992935713683184e-05, + "loss": 4.9685, + "step": 2305000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999290503363099e-05, + "loss": 4.9482, + "step": 2310000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9992874268628445e-05, + "loss": 4.9626, + "step": 2315000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999284344942683e-05, + "loss": 4.9382, + "step": 2320000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9992812551356086e-05, + "loss": 4.9518, + "step": 2325000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999278158673804e-05, + "loss": 4.9347, + "step": 2330000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9992750568001026e-05, + "loss": 4.9461, + "step": 2335000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9992719476542707e-05, + "loss": 4.9411, + "step": 2340000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999268831856395e-05, + "loss": 4.9451, + "step": 2345000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9992657087810803e-05, + "loss": 4.9322, + "step": 2350000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999262579051077e-05, + "loss": 4.9449, + "step": 2355000 + }, + { + "epoch": 0.04, + "learning_rate": 4.99925944392253e-05, + "loss": 4.9457, + "step": 2360000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9992563015152374e-05, + "loss": 4.9532, + "step": 2365000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999253152455943e-05, + "loss": 4.9344, + "step": 2370000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999249996744657e-05, + "loss": 4.9077, + "step": 2375000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999246834381387e-05, + "loss": 4.9646, + "step": 2380000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999243664731419e-05, + "loss": 4.9412, + "step": 2385000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999240489062875e-05, + "loss": 4.9373, + "step": 2390000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999237307379756e-05, + "loss": 4.9372, + "step": 2395000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999234118408634e-05, + "loss": 4.9428, + "step": 2400000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999230922145524e-05, + "loss": 4.9434, + "step": 2405000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9992277198691964e-05, + "loss": 4.96, + "step": 2410000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999224510940943e-05, + "loss": 4.9458, + "step": 2415000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999221294716735e-05, + "loss": 4.9492, + "step": 2420000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999218073128698e-05, + "loss": 4.952, + "step": 2425000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9992148435980214e-05, + "loss": 4.9514, + "step": 2430000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9992116080608245e-05, + "loss": 4.9313, + "step": 2435000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999208365871745e-05, + "loss": 4.9407, + "step": 2440000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999205117030793e-05, + "loss": 4.9555, + "step": 2445000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9992018608859515e-05, + "loss": 4.9404, + "step": 2450000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999198598739948e-05, + "loss": 4.9434, + "step": 2455000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999195329287409e-05, + "loss": 4.9536, + "step": 2460000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9991920538363895e-05, + "loss": 4.9463, + "step": 2465000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9991887723908884e-05, + "loss": 4.9408, + "step": 2470000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9991854836375476e-05, + "loss": 4.93, + "step": 2475000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9991821875723846e-05, + "loss": 4.9485, + "step": 2480000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9991788855140976e-05, + "loss": 4.9414, + "step": 2485000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999175576141343e-05, + "loss": 4.9339, + "step": 2490000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9991722614421475e-05, + "loss": 4.9528, + "step": 2495000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999168939428502e-05, + "loss": 4.9414, + "step": 2500000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999165610763089e-05, + "loss": 4.9255, + "step": 2505000 + }, + { + "epoch": 0.04, + "learning_rate": 4.99916227477792e-05, + "loss": 4.9432, + "step": 2510000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999158932807666e-05, + "loss": 4.9629, + "step": 2515000 + }, + { + "epoch": 0.04, + "learning_rate": 4.99915558418567e-05, + "loss": 4.9463, + "step": 2520000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999152228239953e-05, + "loss": 4.9545, + "step": 2525000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999148866313169e-05, + "loss": 4.937, + "step": 2530000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9991454970600206e-05, + "loss": 4.9416, + "step": 2535000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999142121828485e-05, + "loss": 4.9435, + "step": 2540000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999138739945254e-05, + "loss": 4.9542, + "step": 2545000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999135352088977e-05, + "loss": 4.9355, + "step": 2550000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999131956223735e-05, + "loss": 4.9257, + "step": 2555000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999128554385466e-05, + "loss": 4.9365, + "step": 2560000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9991251458955366e-05, + "loss": 4.9443, + "step": 2565000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9991217307539553e-05, + "loss": 4.9337, + "step": 2570000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999118310331327e-05, + "loss": 4.9442, + "step": 2575000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9991148812025036e-05, + "loss": 4.942, + "step": 2580000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999111445419393e-05, + "loss": 4.9333, + "step": 2585000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999108004360586e-05, + "loss": 4.9404, + "step": 2590000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999104555271592e-05, + "loss": 4.9378, + "step": 2595000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999101100220292e-05, + "loss": 4.9194, + "step": 2600000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999097639210687e-05, + "loss": 4.9526, + "step": 2605000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999094170857552e-05, + "loss": 4.9416, + "step": 2610000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999090695156903e-05, + "loss": 4.9486, + "step": 2615000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999087213499306e-05, + "loss": 4.9481, + "step": 2620000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999083725888766e-05, + "loss": 4.9471, + "step": 2625000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9990802302294724e-05, + "loss": 4.9297, + "step": 2630000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999076728617252e-05, + "loss": 4.9311, + "step": 2635000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999073219650909e-05, + "loss": 4.9628, + "step": 2640000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999069705438252e-05, + "loss": 4.9256, + "step": 2645000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999066183871492e-05, + "loss": 4.9357, + "step": 2650000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999062655653236e-05, + "loss": 4.9444, + "step": 2655000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999059120783495e-05, + "loss": 4.9479, + "step": 2660000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9990555792622775e-05, + "loss": 4.9257, + "step": 2665000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999052031089594e-05, + "loss": 4.9425, + "step": 2670000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999048476265453e-05, + "loss": 4.9507, + "step": 2675000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999044914076619e-05, + "loss": 4.9667, + "step": 2680000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999041345948262e-05, + "loss": 4.9325, + "step": 2685000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999037771168475e-05, + "loss": 4.9479, + "step": 2690000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9990341897372705e-05, + "loss": 4.9516, + "step": 2695000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999030602373225e-05, + "loss": 4.9275, + "step": 2700000 + }, + { + "epoch": 0.04, + "learning_rate": 4.99902700692064e-05, + "loss": 4.9423, + "step": 2705000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999023406256465e-05, + "loss": 4.9466, + "step": 2710000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9990197982210095e-05, + "loss": 4.9467, + "step": 2715000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999016183534183e-05, + "loss": 4.9458, + "step": 2720000 + }, + { + "epoch": 0.04, + "learning_rate": 4.99901256147077e-05, + "loss": 4.9614, + "step": 2725000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999008934206453e-05, + "loss": 4.9259, + "step": 2730000 + }, + { + "epoch": 0.04, + "learning_rate": 4.999005299565569e-05, + "loss": 4.9341, + "step": 2735000 + }, + { + "epoch": 0.05, + "learning_rate": 4.999001657544138e-05, + "loss": 4.943, + "step": 2740000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998998009599268e-05, + "loss": 4.9582, + "step": 2745000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9989943557349615e-05, + "loss": 4.9442, + "step": 2750000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998990693755597e-05, + "loss": 4.9486, + "step": 2755000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998987026591355e-05, + "loss": 4.9424, + "step": 2760000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998983352042621e-05, + "loss": 4.9407, + "step": 2765000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998979670842613e-05, + "loss": 4.9422, + "step": 2770000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9989759829913397e-05, + "loss": 4.9472, + "step": 2775000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9989722892286726e-05, + "loss": 4.9282, + "step": 2780000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998968587335039e-05, + "loss": 4.9236, + "step": 2785000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998964880272554e-05, + "loss": 4.9466, + "step": 2790000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998961165817652e-05, + "loss": 4.9366, + "step": 2795000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998957444711535e-05, + "loss": 4.9328, + "step": 2800000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998953716954212e-05, + "loss": 4.9401, + "step": 2805000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9989499832935395e-05, + "loss": 4.9316, + "step": 2810000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9989462422351663e-05, + "loss": 4.9366, + "step": 2815000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998942494525618e-05, + "loss": 4.9367, + "step": 2820000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9989387394130655e-05, + "loss": 4.9562, + "step": 2825000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998934978399865e-05, + "loss": 4.9364, + "step": 2830000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998931210735519e-05, + "loss": 4.9502, + "step": 2835000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9989274364200364e-05, + "loss": 4.939, + "step": 2840000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9989236554534294e-05, + "loss": 4.92, + "step": 2845000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998919867835707e-05, + "loss": 4.9283, + "step": 2850000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998916072807056e-05, + "loss": 4.9325, + "step": 2855000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998912272646955e-05, + "loss": 4.9461, + "step": 2860000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998908464313463e-05, + "loss": 4.9508, + "step": 2865000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998904650090048e-05, + "loss": 4.9215, + "step": 2870000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998900829980715e-05, + "loss": 4.937, + "step": 2875000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998897002456512e-05, + "loss": 4.9482, + "step": 2880000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998893168281264e-05, + "loss": 4.9501, + "step": 2885000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998889326685844e-05, + "loss": 4.9536, + "step": 2890000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998885479977676e-05, + "loss": 4.9603, + "step": 2895000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9988816258493566e-05, + "loss": 4.9065, + "step": 2900000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9988777642969024e-05, + "loss": 4.9258, + "step": 2905000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998873897639716e-05, + "loss": 4.926, + "step": 2910000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9988700227826246e-05, + "loss": 4.8974, + "step": 2915000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998866142826144e-05, + "loss": 4.9423, + "step": 2920000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998862254664456e-05, + "loss": 4.9384, + "step": 2925000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9988583606289376e-05, + "loss": 4.9316, + "step": 2930000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9988544599424784e-05, + "loss": 4.9531, + "step": 2935000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998850552605088e-05, + "loss": 4.9239, + "step": 2940000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998846637833e-05, + "loss": 4.9559, + "step": 2945000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998842717192447e-05, + "loss": 4.9289, + "step": 2950000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998838789900995e-05, + "loss": 4.9373, + "step": 2955000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9988348559586525e-05, + "loss": 4.9325, + "step": 2960000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998830914576331e-05, + "loss": 4.9291, + "step": 2965000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9988269673309104e-05, + "loss": 4.9247, + "step": 2970000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998823013434632e-05, + "loss": 4.9331, + "step": 2975000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9988190528875056e-05, + "loss": 4.9405, + "step": 2980000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998815084895119e-05, + "loss": 4.9216, + "step": 2985000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998811111840752e-05, + "loss": 4.9334, + "step": 2990000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998807130544062e-05, + "loss": 4.9352, + "step": 2995000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9988031433923196e-05, + "loss": 4.9369, + "step": 3000000 + }, + { + "epoch": 0.05, + "eval_loss": 5.712026596069336, + "eval_runtime": 87990.2621, + "eval_samples_per_second": 125.956, + "eval_steps_per_second": 25.191, + "step": 3000000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9987991495897836e-05, + "loss": 4.9361, + "step": 3005000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9987951491364635e-05, + "loss": 4.9279, + "step": 3010000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998791142032369e-05, + "loss": 4.9049, + "step": 3015000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998787128277512e-05, + "loss": 4.9403, + "step": 3020000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9987831078719035e-05, + "loss": 4.9221, + "step": 3025000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998779080815553e-05, + "loss": 4.9696, + "step": 3030000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998775047108472e-05, + "loss": 4.9318, + "step": 3035000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998771008368791e-05, + "loss": 4.9252, + "step": 3040000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998766960552552e-05, + "loss": 4.9268, + "step": 3045000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998762906894674e-05, + "loss": 4.9176, + "step": 3050000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998758846586109e-05, + "loss": 4.9199, + "step": 3055000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998754779626866e-05, + "loss": 4.9302, + "step": 3060000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9987507076483844e-05, + "loss": 4.9306, + "step": 3065000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998746625756395e-05, + "loss": 4.938, + "step": 3070000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998742538026813e-05, + "loss": 4.9427, + "step": 3075000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998738445283347e-05, + "loss": 4.9361, + "step": 3080000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9987343442498474e-05, + "loss": 4.9058, + "step": 3085000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9987302382078084e-05, + "loss": 4.9278, + "step": 3090000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9987261238704355e-05, + "loss": 4.9324, + "step": 3095000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998722003704839e-05, + "loss": 4.9023, + "step": 3100000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998717877715024e-05, + "loss": 4.9327, + "step": 3105000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998713744249611e-05, + "loss": 4.9507, + "step": 3110000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998709603304623e-05, + "loss": 4.9325, + "step": 3115000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9987054557064264e-05, + "loss": 4.9206, + "step": 3120000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9987013031183975e-05, + "loss": 4.927, + "step": 3125000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998697143049494e-05, + "loss": 4.9498, + "step": 3130000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9986929763300786e-05, + "loss": 4.9347, + "step": 3135000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9986888021244875e-05, + "loss": 4.9294, + "step": 3140000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9986846221027494e-05, + "loss": 4.9208, + "step": 3145000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998680436268866e-05, + "loss": 4.9293, + "step": 3150000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998676242947511e-05, + "loss": 4.9128, + "step": 3155000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9986720421347034e-05, + "loss": 4.9213, + "step": 3160000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998667836353441e-05, + "loss": 4.9289, + "step": 3165000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9986636222370914e-05, + "loss": 4.931, + "step": 3170000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9986594023126456e-05, + "loss": 4.9282, + "step": 3175000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9986551757377863e-05, + "loss": 4.9166, + "step": 3180000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998650942512527e-05, + "loss": 4.9288, + "step": 3185000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998646703485857e-05, + "loss": 4.9499, + "step": 3190000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998642456110851e-05, + "loss": 4.9204, + "step": 3195000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9986382029344566e-05, + "loss": 4.9312, + "step": 3200000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9986339422547345e-05, + "loss": 4.9217, + "step": 3205000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9986296766306115e-05, + "loss": 4.9027, + "step": 3210000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998625402647551e-05, + "loss": 4.9325, + "step": 3215000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998621124582399e-05, + "loss": 4.8991, + "step": 3220000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9986168372973765e-05, + "loss": 4.9357, + "step": 3225000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9986125442190176e-05, + "loss": 4.9251, + "step": 3230000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998608243629417e-05, + "loss": 4.9276, + "step": 3235000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998603938111451e-05, + "loss": 4.9316, + "step": 3240000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998599625082265e-05, + "loss": 4.9187, + "step": 3245000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9985953054028264e-05, + "loss": 4.927, + "step": 3250000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9985909790731457e-05, + "loss": 4.9324, + "step": 3255000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9985866469608434e-05, + "loss": 4.9167, + "step": 3260000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998582306463107e-05, + "loss": 4.9196, + "step": 3265000 + }, + { + "epoch": 0.05, + "learning_rate": 4.99857796018277e-05, + "loss": 4.9038, + "step": 3270000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9985736072522395e-05, + "loss": 4.9262, + "step": 3275000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998569248544454e-05, + "loss": 4.9168, + "step": 3280000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998564882314898e-05, + "loss": 4.9056, + "step": 3285000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998560508559589e-05, + "loss": 4.9094, + "step": 3290000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998556129028392e-05, + "loss": 4.914, + "step": 3295000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9985517428470584e-05, + "loss": 4.9264, + "step": 3300000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998547349136016e-05, + "loss": 4.9283, + "step": 3305000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998542950534025e-05, + "loss": 4.9277, + "step": 3310000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998538543520106e-05, + "loss": 4.9495, + "step": 3315000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998534130737009e-05, + "loss": 4.9314, + "step": 3320000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998529711303834e-05, + "loss": 4.9316, + "step": 3325000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9985252861068276e-05, + "loss": 4.9105, + "step": 3330000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9985208524872937e-05, + "loss": 4.9296, + "step": 3335000 + }, + { + "epoch": 0.05, + "learning_rate": 4.998516413103953e-05, + "loss": 4.9421, + "step": 3340000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9985119670705816e-05, + "loss": 4.9085, + "step": 3345000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9985075143871895e-05, + "loss": 4.9252, + "step": 3350000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998503055053789e-05, + "loss": 4.9306, + "step": 3355000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998498589070394e-05, + "loss": 4.9169, + "step": 3360000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9984941164370134e-05, + "loss": 4.923, + "step": 3365000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9984896371536615e-05, + "loss": 4.9267, + "step": 3370000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9984851512203496e-05, + "loss": 4.9252, + "step": 3375000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998480658637089e-05, + "loss": 4.9304, + "step": 3380000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998476159403892e-05, + "loss": 4.9267, + "step": 3385000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998471654422973e-05, + "loss": 4.9305, + "step": 3390000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998467140987736e-05, + "loss": 4.9249, + "step": 3395000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998462621804802e-05, + "loss": 4.9292, + "step": 3400000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998458096878174e-05, + "loss": 4.9303, + "step": 3405000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9984535643968045e-05, + "loss": 4.9385, + "step": 3410000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998449025265572e-05, + "loss": 4.9136, + "step": 3415000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998444479484487e-05, + "loss": 4.9172, + "step": 3420000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998439926142046e-05, + "loss": 4.9149, + "step": 3425000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998435367972809e-05, + "loss": 4.9248, + "step": 3430000 + }, + { + "epoch": 0.06, + "learning_rate": 4.99843080224224e-05, + "loss": 4.938, + "step": 3435000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998426230777375e-05, + "loss": 4.9206, + "step": 3440000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998421650831704e-05, + "loss": 4.9468, + "step": 3445000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998417064233592e-05, + "loss": 4.8991, + "step": 3450000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998412471902551e-05, + "loss": 4.927, + "step": 3455000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998407872921756e-05, + "loss": 4.9184, + "step": 3460000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998403267291217e-05, + "loss": 4.9265, + "step": 3465000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9983986568579285e-05, + "loss": 4.9239, + "step": 3470000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998394036080961e-05, + "loss": 4.9122, + "step": 3475000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9983894114274195e-05, + "loss": 4.9104, + "step": 3480000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9983847782718816e-05, + "loss": 4.9235, + "step": 3485000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998380139392814e-05, + "loss": 4.9364, + "step": 3490000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9983754947942206e-05, + "loss": 4.9282, + "step": 3495000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998370841685685e-05, + "loss": 4.9226, + "step": 3500000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998366182857648e-05, + "loss": 4.9129, + "step": 3505000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9983615183141135e-05, + "loss": 4.931, + "step": 3510000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998356845252692e-05, + "loss": 4.9167, + "step": 3515000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998352166475797e-05, + "loss": 4.9115, + "step": 3520000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998347481049308e-05, + "loss": 4.9167, + "step": 3525000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998342788973237e-05, + "loss": 4.9318, + "step": 3530000 + }, + { + "epoch": 0.06, + "learning_rate": 4.99833808930681e-05, + "loss": 4.9451, + "step": 3535000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998333383930281e-05, + "loss": 4.93, + "step": 3540000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9983286719042096e-05, + "loss": 4.9041, + "step": 3545000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998323954173383e-05, + "loss": 4.9116, + "step": 3550000 + }, + { + "epoch": 0.06, + "learning_rate": 4.99831922884959e-05, + "loss": 4.914, + "step": 3555000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9983144968762904e-05, + "loss": 4.9201, + "step": 3560000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9983097582534974e-05, + "loss": 4.925, + "step": 3565000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998305012981222e-05, + "loss": 4.9243, + "step": 3570000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9983002601080476e-05, + "loss": 4.9237, + "step": 3575000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9982955005827555e-05, + "loss": 4.922, + "step": 3580000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998290736313542e-05, + "loss": 4.9136, + "step": 3585000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998285964442138e-05, + "loss": 4.9179, + "step": 3590000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998281185921314e-05, + "loss": 4.9133, + "step": 3595000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998276399793003e-05, + "loss": 4.9165, + "step": 3600000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998271607972051e-05, + "loss": 4.932, + "step": 3605000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998266810462462e-05, + "loss": 4.9317, + "step": 3610000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998262004382019e-05, + "loss": 4.9294, + "step": 3615000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998257191649561e-05, + "loss": 4.9324, + "step": 3620000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998252374194571e-05, + "loss": 4.9324, + "step": 3625000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998247548160782e-05, + "loss": 4.9123, + "step": 3630000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998242716442411e-05, + "loss": 4.9102, + "step": 3635000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998237877106012e-05, + "loss": 4.9151, + "step": 3640000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998233031117661e-05, + "loss": 4.9093, + "step": 3645000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9982281813915333e-05, + "loss": 4.9394, + "step": 3650000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9982233221033106e-05, + "loss": 4.9352, + "step": 3655000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9982184571372234e-05, + "loss": 4.9184, + "step": 3660000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998213585521898e-05, + "loss": 4.9127, + "step": 3665000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9982087072573466e-05, + "loss": 4.9108, + "step": 3670000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9982038233216225e-05, + "loss": 4.928, + "step": 3675000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9981989307806196e-05, + "loss": 4.9343, + "step": 3680000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9981940325684696e-05, + "loss": 4.9287, + "step": 3685000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998189127707147e-05, + "loss": 4.9036, + "step": 3690000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998184217180023e-05, + "loss": 4.914, + "step": 3695000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998179299021723e-05, + "loss": 4.9034, + "step": 3700000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998174373228266e-05, + "loss": 4.9301, + "step": 3705000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998169443745082e-05, + "loss": 4.9152, + "step": 3710000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998164504652066e-05, + "loss": 4.9493, + "step": 3715000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9981595598973076e-05, + "loss": 4.9314, + "step": 3720000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998154607502125e-05, + "loss": 4.9165, + "step": 3725000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998149650440557e-05, + "loss": 4.9222, + "step": 3730000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998144685738593e-05, + "loss": 4.9132, + "step": 3735000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998139714387585e-05, + "loss": 4.9146, + "step": 3740000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9981347353908855e-05, + "loss": 4.8965, + "step": 3745000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998129751738498e-05, + "loss": 4.9208, + "step": 3750000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9981247594411195e-05, + "loss": 4.9253, + "step": 3755000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9981197624934004e-05, + "loss": 4.9077, + "step": 3760000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9981147568953965e-05, + "loss": 4.8947, + "step": 3765000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998109745649085e-05, + "loss": 4.9191, + "step": 3770000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998104727753824e-05, + "loss": 4.9135, + "step": 3775000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998099704215603e-05, + "loss": 4.9328, + "step": 3780000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998094673023814e-05, + "loss": 4.9244, + "step": 3785000 + }, + { + "epoch": 0.06, + "learning_rate": 4.99808963417448e-05, + "loss": 4.9095, + "step": 3790000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998084589683556e-05, + "loss": 4.9019, + "step": 3795000 + }, + { + "epoch": 0.06, + "learning_rate": 4.99807953854375e-05, + "loss": 4.916, + "step": 3800000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998074480755075e-05, + "loss": 4.9155, + "step": 3805000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9980694153035866e-05, + "loss": 4.9176, + "step": 3810000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998064344215885e-05, + "loss": 4.907, + "step": 3815000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998059267495973e-05, + "loss": 4.9276, + "step": 3820000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998054182094008e-05, + "loss": 4.9121, + "step": 3825000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998049092079141e-05, + "loss": 4.9073, + "step": 3830000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998043993376928e-05, + "loss": 4.9338, + "step": 3835000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998038889045219e-05, + "loss": 4.9215, + "step": 3840000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998033779088021e-05, + "loss": 4.9176, + "step": 3845000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998028660435534e-05, + "loss": 4.9032, + "step": 3850000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9980235371835153e-05, + "loss": 4.9017, + "step": 3855000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998018406258177e-05, + "loss": 4.9277, + "step": 3860000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9980132676555404e-05, + "loss": 4.9127, + "step": 3865000 + }, + { + "epoch": 0.06, + "learning_rate": 4.998008124461394e-05, + "loss": 4.9288, + "step": 3870000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9980029735899784e-05, + "loss": 4.9136, + "step": 3875000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9979978160698975e-05, + "loss": 4.9349, + "step": 3880000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9979926519011656e-05, + "loss": 4.9386, + "step": 3885000 + }, + { + "epoch": 0.06, + "learning_rate": 4.997987482119038e-05, + "loss": 4.9066, + "step": 3890000 + }, + { + "epoch": 0.06, + "learning_rate": 4.997982304654375e-05, + "loss": 4.9161, + "step": 3895000 + }, + { + "epoch": 0.06, + "learning_rate": 4.997977120541102e-05, + "loss": 4.8986, + "step": 3900000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9979719297792326e-05, + "loss": 4.9414, + "step": 3905000 + }, + { + "epoch": 0.06, + "learning_rate": 4.997966733409344e-05, + "loss": 4.9378, + "step": 3910000 + }, + { + "epoch": 0.06, + "learning_rate": 4.997961528309761e-05, + "loss": 4.9176, + "step": 3915000 + }, + { + "epoch": 0.06, + "learning_rate": 4.997956317602186e-05, + "loss": 4.9413, + "step": 3920000 + }, + { + "epoch": 0.06, + "learning_rate": 4.997951100246071e-05, + "loss": 4.9116, + "step": 3925000 + }, + { + "epoch": 0.06, + "learning_rate": 4.997945876241428e-05, + "loss": 4.9077, + "step": 3930000 + }, + { + "epoch": 0.06, + "learning_rate": 4.997940645588273e-05, + "loss": 4.9164, + "step": 3935000 + }, + { + "epoch": 0.06, + "learning_rate": 4.997935408286618e-05, + "loss": 4.9314, + "step": 3940000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9979301643364784e-05, + "loss": 4.9021, + "step": 3945000 + }, + { + "epoch": 0.06, + "learning_rate": 4.997924913737867e-05, + "loss": 4.9233, + "step": 3950000 + }, + { + "epoch": 0.06, + "learning_rate": 4.997919656490799e-05, + "loss": 4.919, + "step": 3955000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997914392595288e-05, + "loss": 4.931, + "step": 3960000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9979091220513474e-05, + "loss": 4.9163, + "step": 3965000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997903844858991e-05, + "loss": 4.9235, + "step": 3970000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9978985620760905e-05, + "loss": 4.922, + "step": 3975000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997893271588277e-05, + "loss": 4.9297, + "step": 3980000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9978879733915735e-05, + "loss": 4.9327, + "step": 3985000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997882669605698e-05, + "loss": 4.9429, + "step": 3990000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997877359171477e-05, + "loss": 4.9304, + "step": 3995000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997872042088926e-05, + "loss": 4.9064, + "step": 4000000 + }, + { + "epoch": 0.07, + "eval_loss": 5.7131476402282715, + "eval_runtime": 89252.7005, + "eval_samples_per_second": 124.174, + "eval_steps_per_second": 24.835, + "step": 4000000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997866718358058e-05, + "loss": 4.9185, + "step": 4005000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9978613890460555e-05, + "loss": 4.9265, + "step": 4010000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997856050951429e-05, + "loss": 4.9214, + "step": 4015000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9978507072756974e-05, + "loss": 4.9284, + "step": 4020000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9978453569517057e-05, + "loss": 4.9222, + "step": 4025000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997839999979468e-05, + "loss": 4.9012, + "step": 4030000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997834636359e-05, + "loss": 4.9201, + "step": 4035000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997829266090315e-05, + "loss": 4.9297, + "step": 4040000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9978238902499056e-05, + "loss": 4.9262, + "step": 4045000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997818506686159e-05, + "loss": 4.9268, + "step": 4050000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9978131164742385e-05, + "loss": 4.9287, + "step": 4055000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9978077206946275e-05, + "loss": 4.9217, + "step": 4060000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997802317187733e-05, + "loss": 4.9243, + "step": 4065000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997796908115836e-05, + "loss": 4.9359, + "step": 4070000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9977914913140236e-05, + "loss": 4.8912, + "step": 4075000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997786067864109e-05, + "loss": 4.9456, + "step": 4080000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9977806366789875e-05, + "loss": 4.9221, + "step": 4085000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9977751999315824e-05, + "loss": 4.9373, + "step": 4090000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997769756536118e-05, + "loss": 4.9233, + "step": 4095000 + }, + { + "epoch": 0.07, + "learning_rate": 4.99776430649261e-05, + "loss": 4.9114, + "step": 4100000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997758849801072e-05, + "loss": 4.933, + "step": 4105000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997753386461518e-05, + "loss": 4.9103, + "step": 4110000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997747916473964e-05, + "loss": 4.9157, + "step": 4115000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9977424387419933e-05, + "loss": 4.9396, + "step": 4120000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997736956554913e-05, + "loss": 4.92, + "step": 4125000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9977314666234434e-05, + "loss": 4.9271, + "step": 4130000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9977259700440324e-05, + "loss": 4.9224, + "step": 4135000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997720466816694e-05, + "loss": 4.9348, + "step": 4140000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9977149558383614e-05, + "loss": 4.933, + "step": 4145000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997709440418292e-05, + "loss": 4.9052, + "step": 4150000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9977039183529996e-05, + "loss": 4.9324, + "step": 4155000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9976983874283555e-05, + "loss": 4.9387, + "step": 4160000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9976928509615986e-05, + "loss": 4.9388, + "step": 4165000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997687307847002e-05, + "loss": 4.9191, + "step": 4170000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997681756973518e-05, + "loss": 4.9155, + "step": 4175000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9976762016743485e-05, + "loss": 4.9267, + "step": 4180000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997670638616322e-05, + "loss": 4.9189, + "step": 4185000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997665068910515e-05, + "loss": 4.934, + "step": 4190000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997659492556942e-05, + "loss": 4.945, + "step": 4195000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9976539095556184e-05, + "loss": 4.9301, + "step": 4200000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9976483199065596e-05, + "loss": 4.9186, + "step": 4205000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997642723609778e-05, + "loss": 4.9274, + "step": 4210000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997637119543589e-05, + "loss": 4.9078, + "step": 4215000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997631509950082e-05, + "loss": 4.915, + "step": 4220000 + }, + { + "epoch": 0.07, + "learning_rate": 4.99762589483326e-05, + "loss": 4.9113, + "step": 4225000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997620271945744e-05, + "loss": 4.9002, + "step": 4230000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9976146412835595e-05, + "loss": 4.9034, + "step": 4235000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997609005099436e-05, + "loss": 4.9529, + "step": 4240000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9976033622676966e-05, + "loss": 4.9164, + "step": 4245000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997597712788356e-05, + "loss": 4.9256, + "step": 4250000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997592056661428e-05, + "loss": 4.9165, + "step": 4255000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997586392753256e-05, + "loss": 4.914, + "step": 4260000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997580723329871e-05, + "loss": 4.9178, + "step": 4265000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997575047258944e-05, + "loss": 4.9151, + "step": 4270000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9975693645404924e-05, + "loss": 4.9352, + "step": 4275000 + }, + { + "epoch": 0.07, + "learning_rate": 4.99756367517453e-05, + "loss": 4.9225, + "step": 4280000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9975579803013945e-05, + "loss": 4.909, + "step": 4285000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997552276500131e-05, + "loss": 4.9245, + "step": 4290000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997546567191727e-05, + "loss": 4.9402, + "step": 4295000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997540850091558e-05, + "loss": 4.9305, + "step": 4300000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997535127486938e-05, + "loss": 4.9365, + "step": 4305000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9975293993818726e-05, + "loss": 4.9068, + "step": 4310000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997523663483758e-05, + "loss": 4.917, + "step": 4315000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9975179209382545e-05, + "loss": 4.9151, + "step": 4320000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9975121728963405e-05, + "loss": 4.9292, + "step": 4325000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997506415905141e-05, + "loss": 4.9071, + "step": 4330000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9975006534175614e-05, + "loss": 4.9275, + "step": 4335000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997494884282654e-05, + "loss": 4.9266, + "step": 4340000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997489108500434e-05, + "loss": 4.9184, + "step": 4345000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9974833260709164e-05, + "loss": 4.9157, + "step": 4350000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997477538153061e-05, + "loss": 4.9291, + "step": 4355000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9974717412700514e-05, + "loss": 4.9364, + "step": 4360000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997465940060339e-05, + "loss": 4.9038, + "step": 4365000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997460132206049e-05, + "loss": 4.9008, + "step": 4370000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997454315378674e-05, + "loss": 4.8946, + "step": 4375000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9974484930670265e-05, + "loss": 4.9404, + "step": 4380000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9974426652751126e-05, + "loss": 4.9428, + "step": 4385000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9974368296704325e-05, + "loss": 4.8944, + "step": 4390000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997430987418594e-05, + "loss": 4.9099, + "step": 4395000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9974251385196133e-05, + "loss": 4.9063, + "step": 4400000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9974192818012634e-05, + "loss": 4.9214, + "step": 4405000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997413420780287e-05, + "loss": 4.9207, + "step": 4410000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997407550765069e-05, + "loss": 4.9138, + "step": 4415000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997401675276343e-05, + "loss": 4.9118, + "step": 4420000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9973957931405535e-05, + "loss": 4.9215, + "step": 4425000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997389905536608e-05, + "loss": 4.9255, + "step": 4430000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9973840089278427e-05, + "loss": 4.9152, + "step": 4435000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997378108032506e-05, + "loss": 4.9019, + "step": 4440000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9973721993099445e-05, + "loss": 4.9177, + "step": 4445000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997366283940397e-05, + "loss": 4.9176, + "step": 4450000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997360363109421e-05, + "loss": 4.9307, + "step": 4455000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997354434447279e-05, + "loss": 4.9169, + "step": 4460000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997348499138197e-05, + "loss": 4.9188, + "step": 4465000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997342557182193e-05, + "loss": 4.9217, + "step": 4470000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9973366085792815e-05, + "loss": 4.917, + "step": 4475000 + }, + { + "epoch": 0.07, + "learning_rate": 4.99733065452167e-05, + "loss": 4.9257, + "step": 4480000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997324692626321e-05, + "loss": 4.9169, + "step": 4485000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997318722889262e-05, + "loss": 4.8994, + "step": 4490000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9973127476988794e-05, + "loss": 4.9202, + "step": 4495000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9973067658616696e-05, + "loss": 4.9199, + "step": 4500000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997300777377648e-05, + "loss": 4.9212, + "step": 4505000 + }, + { + "epoch": 0.07, + "learning_rate": 4.99729478224683e-05, + "loss": 4.9335, + "step": 4510000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9972887816707326e-05, + "loss": 4.9245, + "step": 4515000 + }, + { + "epoch": 0.07, + "learning_rate": 4.99728277204487e-05, + "loss": 4.9058, + "step": 4520000 + }, + { + "epoch": 0.07, + "learning_rate": 4.99727675697376e-05, + "loss": 4.9293, + "step": 4525000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997270735255918e-05, + "loss": 4.9101, + "step": 4530000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997264706891359e-05, + "loss": 4.8956, + "step": 4535000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9972586718801014e-05, + "loss": 4.9183, + "step": 4540000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997252630222159e-05, + "loss": 4.9539, + "step": 4545000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997246581917549e-05, + "loss": 4.9095, + "step": 4550000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997240526966287e-05, + "loss": 4.9115, + "step": 4555000 + }, + { + "epoch": 0.07, + "learning_rate": 4.99723446536839e-05, + "loss": 4.9332, + "step": 4560000 + }, + { + "epoch": 0.07, + "learning_rate": 4.997228395909073e-05, + "loss": 4.9382, + "step": 4565000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997222321016623e-05, + "loss": 4.9363, + "step": 4570000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9972162382601264e-05, + "loss": 4.9139, + "step": 4575000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997210151291977e-05, + "loss": 4.9042, + "step": 4580000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9972040552396946e-05, + "loss": 4.9126, + "step": 4585000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9971979549811124e-05, + "loss": 4.9085, + "step": 4590000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997191845633109e-05, + "loss": 4.9193, + "step": 4595000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997185730860049e-05, + "loss": 4.9203, + "step": 4600000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997179609440499e-05, + "loss": 4.9222, + "step": 4605000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997173481374475e-05, + "loss": 4.9331, + "step": 4610000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997167345433896e-05, + "loss": 4.9181, + "step": 4615000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997161204073645e-05, + "loss": 4.9076, + "step": 4620000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9971550548362114e-05, + "loss": 4.9337, + "step": 4625000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997148901413885e-05, + "loss": 4.9546, + "step": 4630000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997142740114409e-05, + "loss": 4.9294, + "step": 4635000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997136572168558e-05, + "loss": 4.9112, + "step": 4640000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9971303975763485e-05, + "loss": 4.9278, + "step": 4645000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997124217575203e-05, + "loss": 4.9031, + "step": 4650000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9971180296916536e-05, + "loss": 4.9047, + "step": 4655000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9971118351617955e-05, + "loss": 4.9296, + "step": 4660000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9971056327442466e-05, + "loss": 4.9256, + "step": 4665000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997099424920488e-05, + "loss": 4.9212, + "step": 4670000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9970932116945266e-05, + "loss": 4.9109, + "step": 4675000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997086990579595e-05, + "loss": 4.9353, + "step": 4680000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9970807640651505e-05, + "loss": 4.9037, + "step": 4685000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997074528411066e-05, + "loss": 4.9395, + "step": 4690000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9970682873575026e-05, + "loss": 4.9152, + "step": 4695000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997062039657762e-05, + "loss": 4.9204, + "step": 4700000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9970557853118616e-05, + "loss": 4.9222, + "step": 4705000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997049524319816e-05, + "loss": 4.9172, + "step": 4710000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9970432566816436e-05, + "loss": 4.9059, + "step": 4715000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997036982397361e-05, + "loss": 4.9106, + "step": 4720000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997030702724338e-05, + "loss": 4.945, + "step": 4725000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997024415149213e-05, + "loss": 4.9166, + "step": 4730000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9970181196680155e-05, + "loss": 4.9219, + "step": 4735000 + }, + { + "epoch": 0.08, + "learning_rate": 4.997011818799457e-05, + "loss": 4.9137, + "step": 4740000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9970055112848724e-05, + "loss": 4.9339, + "step": 4745000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996999197124277e-05, + "loss": 4.9207, + "step": 4750000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996992876317689e-05, + "loss": 4.9344, + "step": 4755000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9969865488651235e-05, + "loss": 4.9365, + "step": 4760000 + }, + { + "epoch": 0.08, + "learning_rate": 4.99698021603459e-05, + "loss": 4.9161, + "step": 4765000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9969738740221314e-05, + "loss": 4.9314, + "step": 4770000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9969675266317386e-05, + "loss": 4.9272, + "step": 4775000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996961172595436e-05, + "loss": 4.9435, + "step": 4780000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996954811913241e-05, + "loss": 4.9358, + "step": 4785000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9969484433105306e-05, + "loss": 4.9237, + "step": 4790000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996942070611243e-05, + "loss": 4.9297, + "step": 4795000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9969356899914735e-05, + "loss": 4.9316, + "step": 4800000 + }, + { + "epoch": 0.08, + "learning_rate": 4.99692930272588e-05, + "loss": 4.9147, + "step": 4805000 + }, + { + "epoch": 0.08, + "learning_rate": 4.99692290753452e-05, + "loss": 4.9103, + "step": 4810000 + }, + { + "epoch": 0.08, + "learning_rate": 4.99691650569471e-05, + "loss": 4.9392, + "step": 4815000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996910098489087e-05, + "loss": 4.921, + "step": 4820000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996903684637706e-05, + "loss": 4.9247, + "step": 4825000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996897266711143e-05, + "loss": 4.9239, + "step": 4830000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996890838284354e-05, + "loss": 4.9129, + "step": 4835000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996884404497137e-05, + "loss": 4.9093, + "step": 4840000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996877964064233e-05, + "loss": 4.9133, + "step": 4845000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996871516985658e-05, + "loss": 4.9149, + "step": 4850000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9968650619695035e-05, + "loss": 4.9068, + "step": 4855000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996858601598309e-05, + "loss": 4.924, + "step": 4860000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9968521345814955e-05, + "loss": 4.9329, + "step": 4865000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9968456635109106e-05, + "loss": 4.9235, + "step": 4870000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9968391806110796e-05, + "loss": 4.9145, + "step": 4875000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996832693657513e-05, + "loss": 4.9195, + "step": 4880000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996826200058394e-05, + "loss": 4.9141, + "step": 4885000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996819701114978e-05, + "loss": 4.9266, + "step": 4890000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996813192923578e-05, + "loss": 4.9243, + "step": 4895000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996806679387913e-05, + "loss": 4.9194, + "step": 4900000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9968001592067685e-05, + "loss": 4.9293, + "step": 4905000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996793632380159e-05, + "loss": 4.9222, + "step": 4910000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996787100215986e-05, + "loss": 4.9123, + "step": 4915000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9967805600998316e-05, + "loss": 4.9448, + "step": 4920000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9967740120277244e-05, + "loss": 4.905, + "step": 4925000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996767458619436e-05, + "loss": 4.9361, + "step": 4930000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9967608985657696e-05, + "loss": 4.9311, + "step": 4935000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9967543318667445e-05, + "loss": 4.92, + "step": 4940000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9967477598382376e-05, + "loss": 4.9233, + "step": 4945000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996741179849876e-05, + "loss": 4.9099, + "step": 4950000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996734593216209e-05, + "loss": 4.9289, + "step": 4955000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9967279986174034e-05, + "loss": 4.9286, + "step": 4960000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996721398691846e-05, + "loss": 4.9171, + "step": 4965000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9967147921210346e-05, + "loss": 4.9352, + "step": 4970000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9967081789049855e-05, + "loss": 4.9172, + "step": 4975000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9967015590437196e-05, + "loss": 4.9076, + "step": 4980000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9966949325372506e-05, + "loss": 4.9229, + "step": 4985000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9966882993855993e-05, + "loss": 4.9203, + "step": 4990000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9966816609179375e-05, + "loss": 4.9071, + "step": 4995000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996675013146817e-05, + "loss": 4.9428, + "step": 5000000 + }, + { + "epoch": 0.08, + "eval_loss": 5.677032947540283, + "eval_runtime": 85195.0572, + "eval_samples_per_second": 130.089, + "eval_steps_per_second": 26.018, + "step": 5000000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996668361391535e-05, + "loss": 4.9122, + "step": 5005000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996661701660655e-05, + "loss": 4.902, + "step": 5010000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996655035284681e-05, + "loss": 4.9102, + "step": 5015000 + }, + { + "epoch": 0.08, + "learning_rate": 4.99664836226363e-05, + "loss": 4.9285, + "step": 5020000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996641682597518e-05, + "loss": 4.9177, + "step": 5025000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9966349962863656e-05, + "loss": 4.9285, + "step": 5030000 + }, + { + "epoch": 0.08, + "learning_rate": 4.99662830466998e-05, + "loss": 4.9162, + "step": 5035000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9966216050701264e-05, + "loss": 4.9232, + "step": 5040000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996614898825285e-05, + "loss": 4.9128, + "step": 5045000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996608187279251e-05, + "loss": 4.9258, + "step": 5050000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9966014677458165e-05, + "loss": 4.923, + "step": 5055000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9965947415674464e-05, + "loss": 4.9133, + "step": 5060000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996588007396391e-05, + "loss": 4.9094, + "step": 5065000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996581269275973e-05, + "loss": 4.9235, + "step": 5070000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996574523162906e-05, + "loss": 4.916, + "step": 5075000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996567770404976e-05, + "loss": 4.943, + "step": 5080000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996561011002201e-05, + "loss": 4.9123, + "step": 5085000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996554244954599e-05, + "loss": 4.9294, + "step": 5090000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9965474722621876e-05, + "loss": 4.9273, + "step": 5095000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996540692924985e-05, + "loss": 4.938, + "step": 5100000 + }, + { + "epoch": 0.08, + "learning_rate": 4.99653390694301e-05, + "loss": 4.937, + "step": 5105000 + }, + { + "epoch": 0.08, + "learning_rate": 4.99652711431628e-05, + "loss": 4.9338, + "step": 5110000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996520315044813e-05, + "loss": 4.9217, + "step": 5115000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9965135091286275e-05, + "loss": 4.9369, + "step": 5120000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996506696567741e-05, + "loss": 4.9121, + "step": 5125000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996499878727224e-05, + "loss": 4.9089, + "step": 5130000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9964930528783196e-05, + "loss": 4.9013, + "step": 5135000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9964862203847704e-05, + "loss": 4.9176, + "step": 5140000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996479381246592e-05, + "loss": 4.9345, + "step": 5145000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996472534093435e-05, + "loss": 4.9277, + "step": 5150000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996465683036425e-05, + "loss": 4.9382, + "step": 5155000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996458823964473e-05, + "loss": 4.9261, + "step": 5160000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996451958247965e-05, + "loss": 4.9125, + "step": 5165000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9964450858869205e-05, + "loss": 4.9254, + "step": 5170000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996438208258374e-05, + "loss": 4.9183, + "step": 5175000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9964313226096405e-05, + "loss": 4.9256, + "step": 5180000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996424428936749e-05, + "loss": 4.9332, + "step": 5185000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996417531378745e-05, + "loss": 4.9342, + "step": 5190000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996410624414287e-05, + "loss": 4.9092, + "step": 5195000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996403712186406e-05, + "loss": 4.9144, + "step": 5200000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996396793314117e-05, + "loss": 4.9206, + "step": 5205000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9963898691837605e-05, + "loss": 4.9171, + "step": 5210000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9963829370240395e-05, + "loss": 4.9223, + "step": 5215000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996375996830984e-05, + "loss": 4.9195, + "step": 5220000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996369051381246e-05, + "loss": 4.9448, + "step": 5225000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996362099287192e-05, + "loss": 4.9378, + "step": 5230000 + }, + { + "epoch": 0.09, + "learning_rate": 4.99635514054884e-05, + "loss": 4.9403, + "step": 5235000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9963481765605066e-05, + "loss": 4.9432, + "step": 5240000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996341203139317e-05, + "loss": 4.9481, + "step": 5245000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996334224468183e-05, + "loss": 4.9374, + "step": 5250000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996327239152826e-05, + "loss": 4.9241, + "step": 5255000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9963202485928804e-05, + "loss": 4.9357, + "step": 5260000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996313248589516e-05, + "loss": 4.9083, + "step": 5265000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9963062433416e-05, + "loss": 4.9228, + "step": 5270000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996299230045931e-05, + "loss": 4.911, + "step": 5275000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9962922115084065e-05, + "loss": 4.9283, + "step": 5280000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9962851863267704e-05, + "loss": 4.9118, + "step": 5285000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9962781559086337e-05, + "loss": 4.906, + "step": 5290000 + }, + { + "epoch": 0.09, + "learning_rate": 4.99627111744016e-05, + "loss": 4.9237, + "step": 5295000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996264070917379e-05, + "loss": 4.9426, + "step": 5300000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996257019159484e-05, + "loss": 4.9223, + "step": 5305000 + }, + { + "epoch": 0.09, + "learning_rate": 4.99624996075757e-05, + "loss": 4.9339, + "step": 5310000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996242895711657e-05, + "loss": 4.9214, + "step": 5315000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996235824021764e-05, + "loss": 4.9167, + "step": 5320000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9962287456879094e-05, + "loss": 4.9186, + "step": 5325000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996221660710113e-05, + "loss": 4.9382, + "step": 5330000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9962145690883925e-05, + "loss": 4.9006, + "step": 5335000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996207472243651e-05, + "loss": 4.9277, + "step": 5340000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996200365913254e-05, + "loss": 4.9307, + "step": 5345000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996193254359874e-05, + "loss": 4.9336, + "step": 5350000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996186137587521e-05, + "loss": 4.9094, + "step": 5355000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996179012747793e-05, + "loss": 4.9238, + "step": 5360000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996171881264256e-05, + "loss": 4.8896, + "step": 5365000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996164743136926e-05, + "loss": 4.9159, + "step": 5370000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996157598365824e-05, + "loss": 4.919, + "step": 5375000 + }, + { + "epoch": 0.09, + "learning_rate": 4.99615044838249e-05, + "loss": 4.9258, + "step": 5380000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9961432888923795e-05, + "loss": 4.9135, + "step": 5385000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9961361241900764e-05, + "loss": 4.9231, + "step": 5390000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9961289528440744e-05, + "loss": 4.9105, + "step": 5395000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996121776291234e-05, + "loss": 4.9226, + "step": 5400000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996114590221061e-05, + "loss": 4.9253, + "step": 5405000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996107400383582e-05, + "loss": 4.9183, + "step": 5410000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9961002024643174e-05, + "loss": 4.9047, + "step": 5415000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9960929979014515e-05, + "loss": 4.9159, + "step": 5420000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9960857866950046e-05, + "loss": 4.917, + "step": 5425000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996078568844995e-05, + "loss": 4.9087, + "step": 5430000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996071344351443e-05, + "loss": 4.9094, + "step": 5435000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996064111766896e-05, + "loss": 4.9254, + "step": 5440000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996056873984987e-05, + "loss": 4.9297, + "step": 5445000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996049629559592e-05, + "loss": 4.9362, + "step": 5450000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996042378490731e-05, + "loss": 4.916, + "step": 5455000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9960351207784225e-05, + "loss": 4.909, + "step": 5460000 + }, + { + "epoch": 0.09, + "learning_rate": 4.99602785496857e-05, + "loss": 4.9215, + "step": 5465000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9960205839680976e-05, + "loss": 4.944, + "step": 5470000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996013307781013e-05, + "loss": 4.8969, + "step": 5475000 + }, + { + "epoch": 0.09, + "learning_rate": 4.996006023495112e-05, + "loss": 4.9181, + "step": 5480000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995998732565861e-05, + "loss": 4.9069, + "step": 5485000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995991434993279e-05, + "loss": 4.9199, + "step": 5490000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995984129315294e-05, + "loss": 4.918, + "step": 5495000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9959768184547806e-05, + "loss": 4.9056, + "step": 5500000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995969500950995e-05, + "loss": 4.9039, + "step": 5505000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9959621782700374e-05, + "loss": 4.9216, + "step": 5510000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9959548460136865e-05, + "loss": 4.9275, + "step": 5515000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995947508580201e-05, + "loss": 4.9196, + "step": 5520000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9959401630334534e-05, + "loss": 4.9126, + "step": 5525000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995932813783667e-05, + "loss": 4.9173, + "step": 5530000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995925456420659e-05, + "loss": 4.9423, + "step": 5535000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9959180924145146e-05, + "loss": 4.9511, + "step": 5540000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9959107217652536e-05, + "loss": 4.9235, + "step": 5545000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9959033444728975e-05, + "loss": 4.915, + "step": 5550000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9958959620155066e-05, + "loss": 4.9189, + "step": 5555000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995888569958974e-05, + "loss": 4.924, + "step": 5560000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995881172737446e-05, + "loss": 4.9233, + "step": 5565000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995873768872901e-05, + "loss": 4.8955, + "step": 5570000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995866358365359e-05, + "loss": 4.8917, + "step": 5575000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995858939730149e-05, + "loss": 4.9227, + "step": 5580000 + }, + { + "epoch": 0.09, + "learning_rate": 4.99585151593534e-05, + "loss": 4.9236, + "step": 5585000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995844085497593e-05, + "loss": 4.9225, + "step": 5590000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9958366484169264e-05, + "loss": 4.9129, + "step": 5595000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9958292032033563e-05, + "loss": 4.9159, + "step": 5600000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9958217543269164e-05, + "loss": 4.9225, + "step": 5605000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995814297317612e-05, + "loss": 4.9161, + "step": 5610000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9958068336654686e-05, + "loss": 4.9218, + "step": 5615000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9957993618751835e-05, + "loss": 4.9026, + "step": 5620000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995791884936091e-05, + "loss": 4.914, + "step": 5625000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9957844013542185e-05, + "loss": 4.9279, + "step": 5630000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995776911129587e-05, + "loss": 4.9127, + "step": 5635000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9957694142622135e-05, + "loss": 4.9347, + "step": 5640000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995761910752121e-05, + "loss": 4.933, + "step": 5645000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9957544005993284e-05, + "loss": 4.9166, + "step": 5650000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995746883803856e-05, + "loss": 4.9364, + "step": 5655000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995739360365722e-05, + "loss": 4.9375, + "step": 5660000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995731830284949e-05, + "loss": 4.9149, + "step": 5665000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995724293561555e-05, + "loss": 4.9096, + "step": 5670000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9957167501955614e-05, + "loss": 4.9111, + "step": 5675000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9957092016982585e-05, + "loss": 4.9225, + "step": 5680000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995701643535854e-05, + "loss": 4.9161, + "step": 5685000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9956940802421815e-05, + "loss": 4.8988, + "step": 5690000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995686511821246e-05, + "loss": 4.9165, + "step": 5695000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995678935243883e-05, + "loss": 4.8971, + "step": 5700000 + }, + { + "epoch": 0.09, + "learning_rate": 4.99567135202404e-05, + "loss": 4.917, + "step": 5705000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995663762161738e-05, + "loss": 4.9228, + "step": 5710000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995656164136424e-05, + "loss": 4.9218, + "step": 5715000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995648562509838e-05, + "loss": 4.9217, + "step": 5720000 + }, + { + "epoch": 0.09, + "learning_rate": 4.99564095272028e-05, + "loss": 4.9123, + "step": 5725000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9956333362883434e-05, + "loss": 4.9256, + "step": 5730000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995625713214049e-05, + "loss": 4.9086, + "step": 5735000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995618083497417e-05, + "loss": 4.9183, + "step": 5740000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995610447138468e-05, + "loss": 4.9188, + "step": 5745000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995602804137222e-05, + "loss": 4.9211, + "step": 5750000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9955951544936986e-05, + "loss": 4.9245, + "step": 5755000 + }, + { + "epoch": 0.09, + "learning_rate": 4.99558749820792e-05, + "loss": 4.9131, + "step": 5760000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995579833746041e-05, + "loss": 4.9239, + "step": 5765000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995572164174481e-05, + "loss": 4.9015, + "step": 5770000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995564487960725e-05, + "loss": 4.9251, + "step": 5775000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9955568051047964e-05, + "loss": 4.9102, + "step": 5780000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995549115606712e-05, + "loss": 4.9056, + "step": 5785000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995541419466494e-05, + "loss": 4.9051, + "step": 5790000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995533716684163e-05, + "loss": 4.9242, + "step": 5795000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9955260088029054e-05, + "loss": 4.9139, + "step": 5800000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995518292737738e-05, + "loss": 4.9185, + "step": 5805000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9955105700305197e-05, + "loss": 4.9296, + "step": 5810000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995502840681269e-05, + "loss": 4.8918, + "step": 5815000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995495106238489e-05, + "loss": 4.8975, + "step": 5820000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9954873636065674e-05, + "loss": 4.8996, + "step": 5825000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995479614332676e-05, + "loss": 4.8957, + "step": 5830000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995471858416836e-05, + "loss": 4.9077, + "step": 5835000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995464095859067e-05, + "loss": 4.9201, + "step": 5840000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995456326659391e-05, + "loss": 4.9425, + "step": 5845000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995448550817828e-05, + "loss": 4.8956, + "step": 5850000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9954407683343996e-05, + "loss": 4.9371, + "step": 5855000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9954329807682375e-05, + "loss": 4.9212, + "step": 5860000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995425183442026e-05, + "loss": 4.9269, + "step": 5865000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995417379471352e-05, + "loss": 4.9197, + "step": 5870000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995409571982436e-05, + "loss": 4.9239, + "step": 5875000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9954017547255596e-05, + "loss": 4.8961, + "step": 5880000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9953939339557975e-05, + "loss": 4.9359, + "step": 5885000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995386104979886e-05, + "loss": 4.9142, + "step": 5890000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995378269362275e-05, + "loss": 4.938, + "step": 5895000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9953704271029855e-05, + "loss": 4.9212, + "step": 5900000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995362578202037e-05, + "loss": 4.9258, + "step": 5905000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995354722659451e-05, + "loss": 4.9309, + "step": 5910000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9953468604752496e-05, + "loss": 4.919, + "step": 5915000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995338991649452e-05, + "loss": 4.9288, + "step": 5920000 + }, + { + "epoch": 0.1, + "learning_rate": 4.99533111618208e-05, + "loss": 4.9206, + "step": 5925000 + }, + { + "epoch": 0.1, + "learning_rate": 4.99532323565087e-05, + "loss": 4.912, + "step": 5930000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9953153469017416e-05, + "loss": 4.9242, + "step": 5935000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9953074499307264e-05, + "loss": 4.9023, + "step": 5940000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995299547897265e-05, + "loss": 4.9384, + "step": 5945000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995291639222336e-05, + "loss": 4.9035, + "step": 5950000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995283722321596e-05, + "loss": 4.9171, + "step": 5955000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995275800362461e-05, + "loss": 4.9029, + "step": 5960000 + }, + { + "epoch": 0.1, + "learning_rate": 4.99526787176192e-05, + "loss": 4.9086, + "step": 5965000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995259936519994e-05, + "loss": 4.9193, + "step": 5970000 + }, + { + "epoch": 0.1, + "learning_rate": 4.99525199622638e-05, + "loss": 4.8984, + "step": 5975000 + }, + { + "epoch": 0.1, + "learning_rate": 4.99524404611207e-05, + "loss": 4.9028, + "step": 5980000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995236090946114e-05, + "loss": 4.9133, + "step": 5985000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995228130732521e-05, + "loss": 4.9212, + "step": 5990000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9952201622853144e-05, + "loss": 4.9333, + "step": 5995000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9952121871968496e-05, + "loss": 4.9389, + "step": 6000000 + }, + { + "epoch": 0.1, + "eval_loss": 5.650006294250488, + "eval_runtime": 85085.7097, + "eval_samples_per_second": 130.256, + "eval_steps_per_second": 26.051, + "step": 6000000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995204205467148e-05, + "loss": 4.9401, + "step": 6005000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995196215497252e-05, + "loss": 4.9246, + "step": 6010000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995188220483811e-05, + "loss": 4.9318, + "step": 6015000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9951802204308315e-05, + "loss": 4.9152, + "step": 6020000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995172212136394e-05, + "loss": 4.8964, + "step": 6025000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995164198805119e-05, + "loss": 4.9242, + "step": 6030000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995156175624149e-05, + "loss": 4.9058, + "step": 6035000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9951481474063835e-05, + "loss": 4.9334, + "step": 6040000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995140112547552e-05, + "loss": 4.922, + "step": 6045000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995132071047674e-05, + "loss": 4.9226, + "step": 6050000 + }, + { + "epoch": 0.1, + "learning_rate": 4.99512402451771e-05, + "loss": 4.8979, + "step": 6055000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9951159713494e-05, + "loss": 4.9251, + "step": 6060000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995107908315579e-05, + "loss": 4.9086, + "step": 6065000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995099840253064e-05, + "loss": 4.8939, + "step": 6070000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995091767165862e-05, + "loss": 4.9283, + "step": 6075000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995083685822821e-05, + "loss": 4.9162, + "step": 6080000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995075597838885e-05, + "loss": 4.9214, + "step": 6085000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995067503214076e-05, + "loss": 4.9047, + "step": 6090000 + }, + { + "epoch": 0.1, + "learning_rate": 4.99505940356998e-05, + "loss": 4.9354, + "step": 6095000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9950512940419226e-05, + "loss": 4.9343, + "step": 6100000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9950431794946215e-05, + "loss": 4.9173, + "step": 6105000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995035058306534e-05, + "loss": 4.9254, + "step": 6110000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995026932104561e-05, + "loss": 4.9172, + "step": 6115000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995018796008083e-05, + "loss": 4.9204, + "step": 6120000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995010654897763e-05, + "loss": 4.8992, + "step": 6125000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995002507146743e-05, + "loss": 4.91, + "step": 6130000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9949943527550434e-05, + "loss": 4.9096, + "step": 6135000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994986191722687e-05, + "loss": 4.913, + "step": 6140000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9949780240496946e-05, + "loss": 4.9158, + "step": 6145000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994969849736089e-05, + "loss": 4.913, + "step": 6150000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994961668781891e-05, + "loss": 4.8981, + "step": 6155000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994953481187123e-05, + "loss": 4.9225, + "step": 6160000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994945286951806e-05, + "loss": 4.925, + "step": 6165000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994937086075963e-05, + "loss": 4.9059, + "step": 6170000 + }, + { + "epoch": 0.1, + "learning_rate": 4.99492887691679e-05, + "loss": 4.914, + "step": 6175000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9949206644027836e-05, + "loss": 4.9309, + "step": 6180000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994912443605491e-05, + "loss": 4.9199, + "step": 6185000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9949042194613796e-05, + "loss": 4.9042, + "step": 6190000 + }, + { + "epoch": 0.1, + "learning_rate": 4.99489598373775e-05, + "loss": 4.903, + "step": 6195000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994887741371067e-05, + "loss": 4.9259, + "step": 6200000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9948794923613523e-05, + "loss": 4.9206, + "step": 6205000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9948712383607545e-05, + "loss": 4.9163, + "step": 6210000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994862981026733e-05, + "loss": 4.935, + "step": 6215000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994854712093374e-05, + "loss": 4.896, + "step": 6220000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9948464381731816e-05, + "loss": 4.9025, + "step": 6225000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994838155955287e-05, + "loss": 4.9165, + "step": 6230000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994829870412029e-05, + "loss": 4.9194, + "step": 6235000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994821576571112e-05, + "loss": 4.9007, + "step": 6240000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994813274428573e-05, + "loss": 4.8997, + "step": 6245000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994804968968709e-05, + "loss": 4.9116, + "step": 6250000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994796655207266e-05, + "loss": 4.908, + "step": 6255000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994788334805692e-05, + "loss": 4.9162, + "step": 6260000 + }, + { + "epoch": 0.1, + "learning_rate": 4.99478000776401e-05, + "loss": 4.8971, + "step": 6265000 + }, + { + "epoch": 0.1, + "learning_rate": 4.99477167408224e-05, + "loss": 4.9241, + "step": 6270000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994763333760407e-05, + "loss": 4.9324, + "step": 6275000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9947549867985307e-05, + "loss": 4.9231, + "step": 6280000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994746633196634e-05, + "loss": 4.9082, + "step": 6285000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9947382712813577e-05, + "loss": 4.9048, + "step": 6290000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9947299043981596e-05, + "loss": 4.9052, + "step": 6295000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994721530875007e-05, + "loss": 4.918, + "step": 6300000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994713150711924e-05, + "loss": 4.916, + "step": 6305000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9947047622302364e-05, + "loss": 4.9241, + "step": 6310000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9946963704660546e-05, + "loss": 4.8937, + "step": 6315000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994687972064664e-05, + "loss": 4.9021, + "step": 6320000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994679563660728e-05, + "loss": 4.9262, + "step": 6325000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994671151982334e-05, + "loss": 4.9182, + "step": 6330000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994662731981461e-05, + "loss": 4.9193, + "step": 6335000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9946543036541474e-05, + "loss": 4.9181, + "step": 6340000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994645868684424e-05, + "loss": 4.9237, + "step": 6345000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994637430450963e-05, + "loss": 4.8969, + "step": 6350000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994628982199145e-05, + "loss": 4.9031, + "step": 6355000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9946205289969674e-05, + "loss": 4.9138, + "step": 6360000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994612069155128e-05, + "loss": 4.9229, + "step": 6365000 + }, + { + "epoch": 0.1, + "learning_rate": 4.99460360097901e-05, + "loss": 4.8962, + "step": 6370000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994595127856584e-05, + "loss": 4.9141, + "step": 6375000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994586648094564e-05, + "loss": 4.9073, + "step": 6380000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994578161692972e-05, + "loss": 4.9301, + "step": 6385000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994569670351782e-05, + "loss": 4.9162, + "step": 6390000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9945611706724415e-05, + "loss": 4.9297, + "step": 6395000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9945526660562055e-05, + "loss": 4.8963, + "step": 6400000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994544149691333e-05, + "loss": 4.9434, + "step": 6405000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994535631797485e-05, + "loss": 4.9127, + "step": 6410000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9945271055602636e-05, + "loss": 4.912, + "step": 6415000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994518570975706e-05, + "loss": 4.9208, + "step": 6420000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9945100297491e-05, + "loss": 4.9039, + "step": 6425000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994501485301628e-05, + "loss": 4.9135, + "step": 6430000 + }, + { + "epoch": 0.11, + "learning_rate": 4.99449293250556e-05, + "loss": 4.9357, + "step": 6435000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994484371356933e-05, + "loss": 4.926, + "step": 6440000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9944758069954786e-05, + "loss": 4.9108, + "step": 6445000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9944672325656175e-05, + "loss": 4.9273, + "step": 6450000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994458653211067e-05, + "loss": 4.9349, + "step": 6455000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9944500672172844e-05, + "loss": 4.9072, + "step": 6460000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9944414728644156e-05, + "loss": 4.9072, + "step": 6465000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994432873590911e-05, + "loss": 4.9296, + "step": 6470000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994424269400779e-05, + "loss": 4.9294, + "step": 6475000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9944156568502996e-05, + "loss": 4.9098, + "step": 6480000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994407035935512e-05, + "loss": 4.9125, + "step": 6485000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994398410105493e-05, + "loss": 4.9203, + "step": 6490000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994389777636403e-05, + "loss": 4.9125, + "step": 6495000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994381138528266e-05, + "loss": 4.9206, + "step": 6500000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994372492781103e-05, + "loss": 4.9204, + "step": 6505000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994363838663104e-05, + "loss": 4.928, + "step": 6510000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9943551796366316e-05, + "loss": 4.9159, + "step": 6515000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994346513971203e-05, + "loss": 4.904, + "step": 6520000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994337843402661e-05, + "loss": 4.9203, + "step": 6525000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994329164460718e-05, + "loss": 4.8918, + "step": 6530000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994320478879888e-05, + "loss": 4.917, + "step": 6535000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9943117883999985e-05, + "loss": 4.9088, + "step": 6540000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994303089542793e-05, + "loss": 4.933, + "step": 6545000 + }, + { + "epoch": 0.11, + "learning_rate": 4.99429438230431e-05, + "loss": 4.9257, + "step": 6550000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9942856719119526e-05, + "loss": 4.914, + "step": 6555000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994276951393248e-05, + "loss": 4.9507, + "step": 6560000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994268225979583e-05, + "loss": 4.9125, + "step": 6565000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9942594956749676e-05, + "loss": 4.9312, + "step": 6570000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994250756985205e-05, + "loss": 4.9083, + "step": 6575000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9942420099063355e-05, + "loss": 4.908, + "step": 6580000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994233256186154e-05, + "loss": 4.9238, + "step": 6585000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9942244975777705e-05, + "loss": 4.9361, + "step": 6590000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994215734085194e-05, + "loss": 4.8937, + "step": 6595000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994206962200946e-05, + "loss": 4.9202, + "step": 6600000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9941981836781374e-05, + "loss": 4.9152, + "step": 6605000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9941893985167885e-05, + "loss": 4.9454, + "step": 6610000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9941806049571986e-05, + "loss": 4.9135, + "step": 6615000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9941718082785726e-05, + "loss": 4.9423, + "step": 6620000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9941630032017505e-05, + "loss": 4.9067, + "step": 6625000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9941541914864845e-05, + "loss": 4.9064, + "step": 6630000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9941453748978375e-05, + "loss": 4.9141, + "step": 6635000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9941365481407124e-05, + "loss": 4.9089, + "step": 6640000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994127716510253e-05, + "loss": 4.9154, + "step": 6645000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994118876472418e-05, + "loss": 4.9237, + "step": 6650000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994110033334306e-05, + "loss": 4.9165, + "step": 6655000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994101180017184e-05, + "loss": 4.8951, + "step": 6660000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994092323605145e-05, + "loss": 4.9051, + "step": 6665000 + }, + { + "epoch": 0.11, + "learning_rate": 4.99408345700883e-05, + "loss": 4.9211, + "step": 6670000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994074585547292e-05, + "loss": 4.9143, + "step": 6675000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9940657092245395e-05, + "loss": 4.9334, + "step": 6680000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9940568227096126e-05, + "loss": 4.9227, + "step": 6685000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9940479331131684e-05, + "loss": 4.894, + "step": 6690000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994039035100264e-05, + "loss": 4.8943, + "step": 6695000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9940301304492444e-05, + "loss": 4.9329, + "step": 6700000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9940212191601346e-05, + "loss": 4.8919, + "step": 6705000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994012301232957e-05, + "loss": 4.9021, + "step": 6710000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994003376667736e-05, + "loss": 4.9328, + "step": 6715000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993994445464496e-05, + "loss": 4.919, + "step": 6720000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993985505834312e-05, + "loss": 4.898, + "step": 6725000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993976561353775e-05, + "loss": 4.936, + "step": 6730000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993967612026894e-05, + "loss": 4.9333, + "step": 6735000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9939586524788804e-05, + "loss": 4.9107, + "step": 6740000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9939496880845704e-05, + "loss": 4.9171, + "step": 6745000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993940718847972e-05, + "loss": 4.9042, + "step": 6750000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993931739382344e-05, + "loss": 4.9168, + "step": 6755000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993922755074475e-05, + "loss": 4.9343, + "step": 6760000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993913764128801e-05, + "loss": 4.918, + "step": 6765000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993904764744445e-05, + "loss": 4.9368, + "step": 6770000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993895762324134e-05, + "loss": 4.9378, + "step": 6775000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993886749661631e-05, + "loss": 4.9198, + "step": 6780000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993877733968534e-05, + "loss": 4.9142, + "step": 6785000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9938687080279805e-05, + "loss": 4.9213, + "step": 6790000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993859677254651e-05, + "loss": 4.9231, + "step": 6795000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993850641652554e-05, + "loss": 4.918, + "step": 6800000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993841597605302e-05, + "loss": 4.912, + "step": 6805000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9938325469204615e-05, + "loss": 4.9047, + "step": 6810000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993823487785202e-05, + "loss": 4.8998, + "step": 6815000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993814423823927e-05, + "loss": 4.9277, + "step": 6820000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993805353225136e-05, + "loss": 4.9259, + "step": 6825000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9937962759888515e-05, + "loss": 4.9087, + "step": 6830000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993787192115099e-05, + "loss": 4.9127, + "step": 6835000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993778101603902e-05, + "loss": 4.8976, + "step": 6840000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993769006276107e-05, + "loss": 4.9248, + "step": 6845000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9937599006692724e-05, + "loss": 4.9266, + "step": 6850000 + }, + { + "epoch": 0.11, + "learning_rate": 4.99375078842241e-05, + "loss": 4.9136, + "step": 6855000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993741673185156e-05, + "loss": 4.9103, + "step": 6860000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993732547660967e-05, + "loss": 4.9216, + "step": 6865000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9937234173242856e-05, + "loss": 4.9084, + "step": 6870000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993714280350329e-05, + "loss": 4.9164, + "step": 6875000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9937051349090046e-05, + "loss": 4.9128, + "step": 6880000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9936959846592444e-05, + "loss": 4.9198, + "step": 6885000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993686825939508e-05, + "loss": 4.9275, + "step": 6890000 + }, + { + "epoch": 0.11, + "learning_rate": 4.99367766241404e-05, + "loss": 4.9066, + "step": 6895000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993668494086851e-05, + "loss": 4.9167, + "step": 6900000 + }, + { + "epoch": 0.11, + "learning_rate": 4.99365931728843e-05, + "loss": 4.913, + "step": 6905000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9936501320148206e-05, + "loss": 4.9104, + "step": 6910000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993640941940889e-05, + "loss": 4.9095, + "step": 6915000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993631745229902e-05, + "loss": 4.9188, + "step": 6920000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9936225400398146e-05, + "loss": 4.9321, + "step": 6925000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9936133318968615e-05, + "loss": 4.9417, + "step": 6930000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993604115274857e-05, + "loss": 4.9126, + "step": 6935000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993594892015895e-05, + "loss": 4.9049, + "step": 6940000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993585662120001e-05, + "loss": 4.9071, + "step": 6945000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9935764237384885e-05, + "loss": 4.9048, + "step": 6950000 + }, + { + "epoch": 0.11, + "learning_rate": 4.99356718426755e-05, + "loss": 4.9314, + "step": 6955000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993557932610969e-05, + "loss": 4.9096, + "step": 6960000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9935486761675904e-05, + "loss": 4.9289, + "step": 6965000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9935394130874024e-05, + "loss": 4.9181, + "step": 6970000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993530145225779e-05, + "loss": 4.9035, + "step": 6975000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9935208707300515e-05, + "loss": 4.9133, + "step": 6980000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993511585884234e-05, + "loss": 4.9306, + "step": 6985000 + }, + { + "epoch": 0.11, + "learning_rate": 4.993502294399051e-05, + "loss": 4.9277, + "step": 6990000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9934929981351855e-05, + "loss": 4.9021, + "step": 6995000 + }, + { + "epoch": 0.11, + "learning_rate": 4.99348369523466e-05, + "loss": 4.9221, + "step": 7000000 + }, + { + "epoch": 0.11, + "eval_loss": 5.69034481048584, + "eval_runtime": 76912.0259, + "eval_samples_per_second": 144.098, + "eval_steps_per_second": 28.82, + "step": 7000000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993474385697497e-05, + "loss": 4.9069, + "step": 7005000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993465069523723e-05, + "loss": 4.9032, + "step": 7010000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9934557448473897e-05, + "loss": 4.9113, + "step": 7015000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993446417266438e-05, + "loss": 4.9186, + "step": 7020000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9934370811829776e-05, + "loss": 4.9221, + "step": 7025000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993427738463005e-05, + "loss": 4.9145, + "step": 7030000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993418389106544e-05, + "loss": 4.9133, + "step": 7035000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993409031241009e-05, + "loss": 4.8956, + "step": 7040000 + }, + { + "epoch": 0.12, + "learning_rate": 4.99339966861032e-05, + "loss": 4.9121, + "step": 7045000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993390299343218e-05, + "loss": 4.9088, + "step": 7050000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993380923439729e-05, + "loss": 4.9195, + "step": 7055000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9933715390219534e-05, + "loss": 4.9139, + "step": 7060000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993362149844435e-05, + "loss": 4.9004, + "step": 7065000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993352752150025e-05, + "loss": 4.9012, + "step": 7070000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993343349698577e-05, + "loss": 4.9146, + "step": 7075000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993333940610867e-05, + "loss": 4.9169, + "step": 7080000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993324524886919e-05, + "loss": 4.9012, + "step": 7085000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993315102526758e-05, + "loss": 4.9026, + "step": 7090000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9933056716431916e-05, + "loss": 4.9224, + "step": 7095000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993296236009353e-05, + "loss": 4.9079, + "step": 7100000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993286793739376e-05, + "loss": 4.8972, + "step": 7105000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993277344833286e-05, + "loss": 4.9158, + "step": 7110000 + }, + { + "epoch": 0.12, + "learning_rate": 4.99326788929111e-05, + "loss": 4.9137, + "step": 7115000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9932584271128714e-05, + "loss": 4.905, + "step": 7120000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9932489601937804e-05, + "loss": 4.9143, + "step": 7125000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993239482848309e-05, + "loss": 4.9257, + "step": 7130000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993230000762035e-05, + "loss": 4.9141, + "step": 7135000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993220510140633e-05, + "loss": 4.905, + "step": 7140000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993211016681629e-05, + "loss": 4.9089, + "step": 7145000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993201516589371e-05, + "loss": 4.9215, + "step": 7150000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993192007960731e-05, + "loss": 4.8852, + "step": 7155000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9931824907917534e-05, + "loss": 4.9173, + "step": 7160000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993172968890091e-05, + "loss": 4.9148, + "step": 7165000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993163440352619e-05, + "loss": 4.9157, + "step": 7170000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993153905179364e-05, + "loss": 4.9195, + "step": 7175000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993144363370349e-05, + "loss": 4.9002, + "step": 7180000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9931348168367184e-05, + "loss": 4.9025, + "step": 7185000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993125259845146e-05, + "loss": 4.9177, + "step": 7190000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9931156981290075e-05, + "loss": 4.9019, + "step": 7195000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993106127862112e-05, + "loss": 4.9239, + "step": 7200000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993096554789786e-05, + "loss": 4.9051, + "step": 7205000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993086971248997e-05, + "loss": 4.9189, + "step": 7210000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993077382989056e-05, + "loss": 4.9175, + "step": 7215000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993067788093559e-05, + "loss": 4.9108, + "step": 7220000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993058186562534e-05, + "loss": 4.909, + "step": 7225000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993048576472938e-05, + "loss": 4.9133, + "step": 7230000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9930389635939965e-05, + "loss": 4.8877, + "step": 7235000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9930293421565356e-05, + "loss": 4.9071, + "step": 7240000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993019714083648e-05, + "loss": 4.892, + "step": 7245000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9930100774469815e-05, + "loss": 4.9077, + "step": 7250000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993000438031692e-05, + "loss": 4.9091, + "step": 7255000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992990791983708e-05, + "loss": 4.9224, + "step": 7260000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992981133505976e-05, + "loss": 4.9184, + "step": 7265000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992971474188696e-05, + "loss": 4.9184, + "step": 7270000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9929618063037845e-05, + "loss": 4.8741, + "step": 7275000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992952133719966e-05, + "loss": 4.9126, + "step": 7280000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9929424525659115e-05, + "loss": 4.8903, + "step": 7285000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992932762837664e-05, + "loss": 4.899, + "step": 7290000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992923070352241e-05, + "loss": 4.9158, + "step": 7295000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992913369292677e-05, + "loss": 4.9198, + "step": 7300000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992903661597994e-05, + "loss": 4.9155, + "step": 7305000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992893947268218e-05, + "loss": 4.9181, + "step": 7310000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992884226303376e-05, + "loss": 4.9229, + "step": 7315000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9928744987034926e-05, + "loss": 4.8984, + "step": 7320000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992864762520304e-05, + "loss": 4.9016, + "step": 7325000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992855019699471e-05, + "loss": 4.8943, + "step": 7330000 + }, + { + "epoch": 0.12, + "learning_rate": 4.99284527414291e-05, + "loss": 4.908, + "step": 7335000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992835518049521e-05, + "loss": 4.9186, + "step": 7340000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9928257592257665e-05, + "loss": 4.9064, + "step": 7345000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992815989859927e-05, + "loss": 4.9151, + "step": 7350000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9928062158128275e-05, + "loss": 4.8866, + "step": 7355000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992796435130895e-05, + "loss": 4.9129, + "step": 7360000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992786647814155e-05, + "loss": 4.8822, + "step": 7365000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992776853862634e-05, + "loss": 4.9039, + "step": 7370000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992767053276357e-05, + "loss": 4.9152, + "step": 7375000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992757246055352e-05, + "loss": 4.8742, + "step": 7380000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9927474302354226e-05, + "loss": 4.9087, + "step": 7385000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992737609743711e-05, + "loss": 4.9115, + "step": 7390000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9927277806504714e-05, + "loss": 4.9003, + "step": 7395000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992717946888156e-05, + "loss": 4.9181, + "step": 7400000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992708106491242e-05, + "loss": 4.9194, + "step": 7405000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9926982574888975e-05, + "loss": 4.9147, + "step": 7410000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992688405793724e-05, + "loss": 4.9147, + "step": 7415000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992678543519659e-05, + "loss": 4.9148, + "step": 7420000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992668680532967e-05, + "loss": 4.9106, + "step": 7425000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9926588030124454e-05, + "loss": 4.9121, + "step": 7430000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992648922807165e-05, + "loss": 4.9091, + "step": 7435000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992639035967469e-05, + "loss": 4.9129, + "step": 7440000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992629142493385e-05, + "loss": 4.936, + "step": 7445000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992619242384939e-05, + "loss": 4.9241, + "step": 7450000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9926093356421575e-05, + "loss": 4.897, + "step": 7455000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992599424249199e-05, + "loss": 4.914, + "step": 7460000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992589502253692e-05, + "loss": 4.9252, + "step": 7465000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992579575608062e-05, + "loss": 4.9215, + "step": 7470000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992569640340086e-05, + "loss": 4.9119, + "step": 7475000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992559700424695e-05, + "loss": 4.9074, + "step": 7480000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9925497558658954e-05, + "loss": 4.9159, + "step": 7485000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992539802683503e-05, + "loss": 4.9173, + "step": 7490000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9925298428669865e-05, + "loss": 4.9077, + "step": 7495000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9925198764163725e-05, + "loss": 4.9113, + "step": 7500000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992509901335608e-05, + "loss": 4.9032, + "step": 7505000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992499921615551e-05, + "loss": 4.8993, + "step": 7510000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992489933262741e-05, + "loss": 4.8992, + "step": 7515000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992479940273347e-05, + "loss": 4.9129, + "step": 7520000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992469942651376e-05, + "loss": 4.9134, + "step": 7525000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9924599343926903e-05, + "loss": 4.9029, + "step": 7530000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992449921501481e-05, + "loss": 4.9137, + "step": 7535000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992439901976387e-05, + "loss": 4.9346, + "step": 7540000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9924298738107357e-05, + "loss": 4.9131, + "step": 7545000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9924198410166245e-05, + "loss": 4.9114, + "step": 7550000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992409801588708e-05, + "loss": 4.9219, + "step": 7555000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992399755527013e-05, + "loss": 4.9014, + "step": 7560000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992389702831567e-05, + "loss": 4.9037, + "step": 7565000 + }, + { + "epoch": 0.12, + "learning_rate": 4.99237964551573e-05, + "loss": 4.929, + "step": 7570000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992369577539527e-05, + "loss": 4.9065, + "step": 7575000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992359506958976e-05, + "loss": 4.9226, + "step": 7580000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9923494277301186e-05, + "loss": 4.9068, + "step": 7585000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992339339849e-05, + "loss": 4.9234, + "step": 7590000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992329247351607e-05, + "loss": 4.909, + "step": 7595000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992319150241949e-05, + "loss": 4.9247, + "step": 7600000 + }, + { + "epoch": 0.12, + "learning_rate": 4.992309042456157e-05, + "loss": 4.9109, + "step": 7605000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992298928034201e-05, + "loss": 4.9229, + "step": 7610000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992288811026668e-05, + "loss": 4.8935, + "step": 7615000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9922786853617244e-05, + "loss": 4.9075, + "step": 7620000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992268555091286e-05, + "loss": 4.9058, + "step": 7625000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9922584181901e-05, + "loss": 4.9159, + "step": 7630000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9922482705970167e-05, + "loss": 4.9132, + "step": 7635000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992238120431761e-05, + "loss": 4.9002, + "step": 7640000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9922279595693536e-05, + "loss": 4.9065, + "step": 7645000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992217796140137e-05, + "loss": 4.9013, + "step": 7650000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9922076220085135e-05, + "loss": 4.8884, + "step": 7655000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9921974453154444e-05, + "loss": 4.909, + "step": 7660000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992187259953264e-05, + "loss": 4.9116, + "step": 7665000 + }, + { + "epoch": 0.13, + "learning_rate": 4.99217706591802e-05, + "loss": 4.9256, + "step": 7670000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992166867288169e-05, + "loss": 4.8828, + "step": 7675000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9921566620251856e-05, + "loss": 4.9144, + "step": 7680000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992146450129098e-05, + "loss": 4.9127, + "step": 7685000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992136231599934e-05, + "loss": 4.8991, + "step": 7690000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9921260064377184e-05, + "loss": 4.9079, + "step": 7695000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992115774642481e-05, + "loss": 4.9086, + "step": 7700000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992105538263416e-05, + "loss": 4.8903, + "step": 7705000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9920952911530455e-05, + "loss": 4.8963, + "step": 7710000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9920850394589026e-05, + "loss": 4.9086, + "step": 7715000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992074781131846e-05, + "loss": 4.9055, + "step": 7720000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992064514117425e-05, + "loss": 4.8936, + "step": 7725000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9920542425232954e-05, + "loss": 4.9052, + "step": 7730000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992043962239202e-05, + "loss": 4.9122, + "step": 7735000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992033677378109e-05, + "loss": 4.8984, + "step": 7740000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9920233879440245e-05, + "loss": 4.8991, + "step": 7745000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992013087757619e-05, + "loss": 4.9002, + "step": 7750000 + }, + { + "epoch": 0.13, + "learning_rate": 4.992002785060716e-05, + "loss": 4.8876, + "step": 7755000 + }, + { + "epoch": 0.13, + "learning_rate": 4.991992473670006e-05, + "loss": 4.9147, + "step": 7760000 + }, + { + "epoch": 0.13, + "learning_rate": 4.991982155646628e-05, + "loss": 4.9252, + "step": 7765000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9919718330570307e-05, + "loss": 4.8726, + "step": 7770000 + }, + { + "epoch": 0.13, + "learning_rate": 4.991961501769727e-05, + "loss": 4.9098, + "step": 7775000 + }, + { + "epoch": 0.13, + "learning_rate": 4.991951161780764e-05, + "loss": 4.9355, + "step": 7780000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9919408192973936e-05, + "loss": 4.9141, + "step": 7785000 + }, + { + "epoch": 0.13, + "learning_rate": 4.991930466040688e-05, + "loss": 4.9022, + "step": 7790000 + }, + { + "epoch": 0.2, + "learning_rate": 4.979356606294619e-05, + "loss": 4.8945, + "step": 7790500 + }, + { + "epoch": 0.2, + "eval_loss": 5.672119140625, + "eval_runtime": 179340.9606, + "eval_samples_per_second": 61.798, + "eval_steps_per_second": 7.725, + "step": 7790500 + }, + { + "epoch": 0.2, + "learning_rate": 4.979353958670229e-05, + "loss": 4.8691, + "step": 7791000 + }, + { + "epoch": 0.2, + "eval_loss": 5.630408763885498, + "eval_runtime": 182643.2798, + "eval_samples_per_second": 60.681, + "eval_steps_per_second": 7.585, + "step": 7791000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9948325182178755e-05, + "loss": 4.9075, + "step": 7791500 + }, + { + "epoch": 0.1, + "eval_loss": 5.701673984527588, + "eval_runtime": 92709.3971, + "eval_samples_per_second": 119.545, + "eval_steps_per_second": 29.886, + "step": 7791500 + }, + { + "epoch": 0.1, + "learning_rate": 4.9948318548597226e-05, + "loss": 4.9375, + "step": 7792000 + }, + { + "epoch": 0.1, + "eval_loss": 5.707010746002197, + "eval_runtime": 92373.8149, + "eval_samples_per_second": 119.979, + "eval_steps_per_second": 29.995, + "step": 7792000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994831191459037e-05, + "loss": 4.9309, + "step": 7792500 + }, + { + "epoch": 0.1, + "eval_loss": 5.719743251800537, + "eval_runtime": 94469.6382, + "eval_samples_per_second": 117.317, + "eval_steps_per_second": 29.329, + "step": 7792500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9536024499286255e-05, + "loss": 4.8612, + "step": 7793000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9535965113896765e-05, + "loss": 4.8292, + "step": 7793500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953590572474267e-05, + "loss": 4.7822, + "step": 7794000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953584633182399e-05, + "loss": 4.7956, + "step": 7794500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9535786935140734e-05, + "loss": 4.7506, + "step": 7795000 + }, + { + "epoch": 0.31, + "learning_rate": 4.95357275346929e-05, + "loss": 4.745, + "step": 7795500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953566813048051e-05, + "loss": 4.6809, + "step": 7796000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953560872250357e-05, + "loss": 4.7162, + "step": 7796500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953554931076208e-05, + "loss": 4.7217, + "step": 7797000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953548989525606e-05, + "loss": 4.7182, + "step": 7797500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953543047598551e-05, + "loss": 4.6784, + "step": 7798000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953537105295044e-05, + "loss": 4.6782, + "step": 7798500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953531162615087e-05, + "loss": 4.6857, + "step": 7799000 + }, + { + "epoch": 0.31, + "learning_rate": 4.95352521955868e-05, + "loss": 4.6785, + "step": 7799500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953519276125824e-05, + "loss": 4.6447, + "step": 7800000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9535133323165204e-05, + "loss": 4.6579, + "step": 7800500 + }, + { + "epoch": 0.31, + "learning_rate": 4.95350738813077e-05, + "loss": 4.6466, + "step": 7801000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953501443568573e-05, + "loss": 4.6551, + "step": 7801500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9534954986299306e-05, + "loss": 4.6558, + "step": 7802000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953489553314844e-05, + "loss": 4.6364, + "step": 7802500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9534836076233145e-05, + "loss": 4.6399, + "step": 7803000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953477661555342e-05, + "loss": 4.6346, + "step": 7803500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953471715110928e-05, + "loss": 4.6702, + "step": 7804000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953465768290073e-05, + "loss": 4.6617, + "step": 7804500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9534598210927785e-05, + "loss": 4.665, + "step": 7805000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953453873519045e-05, + "loss": 4.587, + "step": 7805500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9534479255688734e-05, + "loss": 4.6294, + "step": 7806000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9534419772422656e-05, + "loss": 4.6085, + "step": 7806500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953436028539221e-05, + "loss": 4.5988, + "step": 7807000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9534300794597425e-05, + "loss": 4.6167, + "step": 7807500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953424130003828e-05, + "loss": 4.5976, + "step": 7808000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9534181801714806e-05, + "loss": 4.6199, + "step": 7808500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9534122299627006e-05, + "loss": 4.6038, + "step": 7809000 + }, + { + "epoch": 0.31, + "learning_rate": 4.95340627937749e-05, + "loss": 4.6106, + "step": 7809500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953400328415848e-05, + "loss": 4.6398, + "step": 7810000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953394377077776e-05, + "loss": 4.5802, + "step": 7810500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9533884253632754e-05, + "loss": 4.5946, + "step": 7811000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9533824732723475e-05, + "loss": 4.5917, + "step": 7811500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953376520804992e-05, + "loss": 4.6138, + "step": 7812000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9533705679612105e-05, + "loss": 4.6439, + "step": 7812500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953364614741004e-05, + "loss": 4.6253, + "step": 7813000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9533586611443735e-05, + "loss": 4.6166, + "step": 7813500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9533527071713194e-05, + "loss": 4.6166, + "step": 7814000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9533467528218425e-05, + "loss": 4.5992, + "step": 7814500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953340798095944e-05, + "loss": 4.5926, + "step": 7815000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953334842993626e-05, + "loss": 4.5851, + "step": 7815500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9533288875148874e-05, + "loss": 4.6173, + "step": 7816000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953322931659731e-05, + "loss": 4.5892, + "step": 7816500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953316975428156e-05, + "loss": 4.5904, + "step": 7817000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9533110188201636e-05, + "loss": 4.6194, + "step": 7817500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953305061835756e-05, + "loss": 4.555, + "step": 7818000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953299104474933e-05, + "loss": 4.5782, + "step": 7818500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953293146737696e-05, + "loss": 4.6522, + "step": 7819000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953287188624046e-05, + "loss": 4.5718, + "step": 7819500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9532812301339825e-05, + "loss": 4.5712, + "step": 7820000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953275271267509e-05, + "loss": 4.5779, + "step": 7820500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9532693120246236e-05, + "loss": 4.6231, + "step": 7821000 + }, + { + "epoch": 0.31, + "learning_rate": 4.95326335240533e-05, + "loss": 4.5759, + "step": 7821500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953257392409626e-05, + "loss": 4.5895, + "step": 7822000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9532514320375154e-05, + "loss": 4.5562, + "step": 7822500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9532454712889974e-05, + "loss": 4.5577, + "step": 7823000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953239510164074e-05, + "loss": 4.5973, + "step": 7823500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9532335486627444e-05, + "loss": 4.6045, + "step": 7824000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953227586785012e-05, + "loss": 4.5774, + "step": 7824500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953221624530876e-05, + "loss": 4.6098, + "step": 7825000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9532156619003376e-05, + "loss": 4.6056, + "step": 7825500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9532096988933973e-05, + "loss": 4.5986, + "step": 7826000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953203735510057e-05, + "loss": 4.588, + "step": 7826500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9531977717503175e-05, + "loss": 4.5583, + "step": 7827000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953191807614179e-05, + "loss": 4.5519, + "step": 7827500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953185843101643e-05, + "loss": 4.6029, + "step": 7828000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9531798782127096e-05, + "loss": 4.624, + "step": 7828500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953173912947381e-05, + "loss": 4.5634, + "step": 7829000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953167947305657e-05, + "loss": 4.5747, + "step": 7829500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9531619812875385e-05, + "loss": 4.5932, + "step": 7830000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9531560148930275e-05, + "loss": 4.5631, + "step": 7830500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953150048122124e-05, + "loss": 4.5562, + "step": 7831000 + }, + { + "epoch": 0.31, + "learning_rate": 4.95314408097483e-05, + "loss": 4.5914, + "step": 7831500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953138113451145e-05, + "loss": 4.5415, + "step": 7832000 + }, + { + "epoch": 0.31, + "learning_rate": 4.95313214555107e-05, + "loss": 4.5948, + "step": 7832500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953126177274607e-05, + "loss": 4.5817, + "step": 7833000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953120208621756e-05, + "loss": 4.6046, + "step": 7833500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953114239592518e-05, + "loss": 4.5677, + "step": 7834000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9531082701868944e-05, + "loss": 4.5397, + "step": 7834500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9531023004048865e-05, + "loss": 4.6294, + "step": 7835000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953096330246494e-05, + "loss": 4.5566, + "step": 7835500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953090359711719e-05, + "loss": 4.5779, + "step": 7836000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953084388800561e-05, + "loss": 4.5749, + "step": 7836500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953078417513023e-05, + "loss": 4.5291, + "step": 7837000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9530724458491036e-05, + "loss": 4.5535, + "step": 7837500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953066473808805e-05, + "loss": 4.5876, + "step": 7838000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9530605013921275e-05, + "loss": 4.6016, + "step": 7838500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953054528599073e-05, + "loss": 4.5715, + "step": 7839000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953048555429642e-05, + "loss": 4.6129, + "step": 7839500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953042581883835e-05, + "loss": 4.5554, + "step": 7840000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953036607961653e-05, + "loss": 4.5604, + "step": 7840500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953030633663097e-05, + "loss": 4.5253, + "step": 7841000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953024658988169e-05, + "loss": 4.5583, + "step": 7841500 + }, + { + "epoch": 0.31, + "learning_rate": 4.953018683936867e-05, + "loss": 4.5406, + "step": 7842000 + }, + { + "epoch": 0.31, + "learning_rate": 4.953012708509196e-05, + "loss": 4.5415, + "step": 7842500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9530067327051535e-05, + "loss": 4.547, + "step": 7843000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9530007565247414e-05, + "loss": 4.597, + "step": 7843500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9529947799679615e-05, + "loss": 4.5551, + "step": 7844000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9529888030348136e-05, + "loss": 4.5403, + "step": 7844500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9529828257253e-05, + "loss": 4.5598, + "step": 7845000 + }, + { + "epoch": 0.31, + "learning_rate": 4.95297684803942e-05, + "loss": 4.5137, + "step": 7845500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952970869977176e-05, + "loss": 4.5272, + "step": 7846000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952964891538567e-05, + "loss": 4.5522, + "step": 7846500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952958912723596e-05, + "loss": 4.5634, + "step": 7847000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952952933532263e-05, + "loss": 4.5496, + "step": 7847500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952946953964568e-05, + "loss": 4.5081, + "step": 7848000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952940974020514e-05, + "loss": 4.5514, + "step": 7848500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9529349937001e-05, + "loss": 4.6069, + "step": 7849000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952929013003328e-05, + "loss": 4.5521, + "step": 7849500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952923031930199e-05, + "loss": 4.5415, + "step": 7850000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9529170504807125e-05, + "loss": 4.5653, + "step": 7850500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9529110686548705e-05, + "loss": 4.554, + "step": 7851000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952905086452675e-05, + "loss": 4.5148, + "step": 7851500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952899103874125e-05, + "loss": 4.5553, + "step": 7852000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952893120919222e-05, + "loss": 4.5251, + "step": 7852500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952887137587968e-05, + "loss": 4.5356, + "step": 7853000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9528811538803624e-05, + "loss": 4.5284, + "step": 7853500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952875169796406e-05, + "loss": 4.5529, + "step": 7854000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9528691853361016e-05, + "loss": 4.5379, + "step": 7854500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952863200499448e-05, + "loss": 4.4988, + "step": 7855000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952857215286448e-05, + "loss": 4.5566, + "step": 7855500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952851229697101e-05, + "loss": 4.5513, + "step": 7856000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9528452437314085e-05, + "loss": 4.5642, + "step": 7856500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9528392573893726e-05, + "loss": 4.5133, + "step": 7857000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952833270670991e-05, + "loss": 4.5363, + "step": 7857500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952827283576269e-05, + "loss": 4.5583, + "step": 7858000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9528212961052036e-05, + "loss": 4.5148, + "step": 7858500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9528153082577977e-05, + "loss": 4.5658, + "step": 7859000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952809320034052e-05, + "loss": 4.5032, + "step": 7859500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9528033314339674e-05, + "loss": 4.5723, + "step": 7860000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952797342457544e-05, + "loss": 4.5684, + "step": 7860500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952791353104784e-05, + "loss": 4.5312, + "step": 7861000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952785363375687e-05, + "loss": 4.5507, + "step": 7861500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952779373270255e-05, + "loss": 4.5543, + "step": 7862000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9527733827884894e-05, + "loss": 4.5329, + "step": 7862500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9527673919303895e-05, + "loss": 4.5364, + "step": 7863000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952761400695957e-05, + "loss": 4.5158, + "step": 7863500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952755409085193e-05, + "loss": 4.5164, + "step": 7864000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952749417098098e-05, + "loss": 4.5927, + "step": 7864500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9527434247346735e-05, + "loss": 4.513, + "step": 7865000 + }, + { + "epoch": 0.31, + "learning_rate": 4.95273743199492e-05, + "loss": 4.5604, + "step": 7865500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952731438878838e-05, + "loss": 4.4923, + "step": 7866000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952725445386429e-05, + "loss": 4.5298, + "step": 7866500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9527194515176944e-05, + "loss": 4.5088, + "step": 7867000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9527134572726344e-05, + "loss": 4.5497, + "step": 7867500 + }, + { + "epoch": 0.31, + "learning_rate": 4.95270746265125e-05, + "loss": 4.55, + "step": 7868000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952701467653542e-05, + "loss": 4.5444, + "step": 7868500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952695472279512e-05, + "loss": 4.5853, + "step": 7869000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952689476529159e-05, + "loss": 4.5176, + "step": 7869500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952683480402487e-05, + "loss": 4.5614, + "step": 7870000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952677483899494e-05, + "loss": 4.5399, + "step": 7870500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952671487020183e-05, + "loss": 4.5546, + "step": 7871000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952665489764554e-05, + "loss": 4.5327, + "step": 7871500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952659492132608e-05, + "loss": 4.5615, + "step": 7872000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9526534941243456e-05, + "loss": 4.5585, + "step": 7872500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9526474957397684e-05, + "loss": 4.5292, + "step": 7873000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952641496978877e-05, + "loss": 4.5353, + "step": 7873500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9526354978416724e-05, + "loss": 4.5504, + "step": 7874000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952629498328155e-05, + "loss": 4.4971, + "step": 7874500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952623498438327e-05, + "loss": 4.5471, + "step": 7875000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9526174981721875e-05, + "loss": 4.5367, + "step": 7875500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952611497529739e-05, + "loss": 4.5159, + "step": 7876000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9526054965109815e-05, + "loss": 4.5044, + "step": 7876500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952599495115916e-05, + "loss": 4.534, + "step": 7877000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952593493344544e-05, + "loss": 4.5761, + "step": 7877500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952587491196866e-05, + "loss": 4.5259, + "step": 7878000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9525814886728825e-05, + "loss": 4.5185, + "step": 7878500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952575485772596e-05, + "loss": 4.5063, + "step": 7879000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952569482496006e-05, + "loss": 4.57, + "step": 7879500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952563478843113e-05, + "loss": 4.5443, + "step": 7880000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952557474813919e-05, + "loss": 4.521, + "step": 7880500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9525514704084256e-05, + "loss": 4.5027, + "step": 7881000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9525454656266314e-05, + "loss": 4.567, + "step": 7881500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952539460468539e-05, + "loss": 4.5025, + "step": 7882000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9525334549341495e-05, + "loss": 4.5471, + "step": 7882500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952527449023463e-05, + "loss": 4.5487, + "step": 7883000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9525214427364805e-05, + "loss": 4.499, + "step": 7883500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952515436073203e-05, + "loss": 4.5616, + "step": 7884000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952509429033631e-05, + "loss": 4.5141, + "step": 7884500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952503421617768e-05, + "loss": 4.5142, + "step": 7885000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9524974138256106e-05, + "loss": 4.5276, + "step": 7885500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952491405657164e-05, + "loss": 4.5277, + "step": 7886000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952485397112426e-05, + "loss": 4.5364, + "step": 7886500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9524793881913986e-05, + "loss": 4.5209, + "step": 7887000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952473378894083e-05, + "loss": 4.494, + "step": 7887500 + }, + { + "epoch": 0.31, + "learning_rate": 4.95246736922048e-05, + "loss": 4.5327, + "step": 7888000 + }, + { + "epoch": 0.31, + "learning_rate": 4.95246135917059e-05, + "loss": 4.5503, + "step": 7888500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9524553487444146e-05, + "loss": 4.5413, + "step": 7889000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952449337941955e-05, + "loss": 4.5606, + "step": 7889500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952443326763211e-05, + "loss": 4.5026, + "step": 7890000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952437315208184e-05, + "loss": 4.5389, + "step": 7890500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9524313032768755e-05, + "loss": 4.5439, + "step": 7891000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952425290969286e-05, + "loss": 4.5228, + "step": 7891500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952419278285416e-05, + "loss": 4.5441, + "step": 7892000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952413265225267e-05, + "loss": 4.5475, + "step": 7892500 + }, + { + "epoch": 0.31, + "learning_rate": 4.95240725178884e-05, + "loss": 4.5289, + "step": 7893000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9524012379761355e-05, + "loss": 4.5227, + "step": 7893500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952395223787154e-05, + "loss": 4.4732, + "step": 7894000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952389209221897e-05, + "loss": 4.5336, + "step": 7894500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952383194280366e-05, + "loss": 4.5427, + "step": 7895000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952377178962561e-05, + "loss": 4.5239, + "step": 7895500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9523711632684834e-05, + "loss": 4.5252, + "step": 7896000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952365147198134e-05, + "loss": 4.4954, + "step": 7896500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952359130751514e-05, + "loss": 4.5146, + "step": 7897000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952353113928624e-05, + "loss": 4.4922, + "step": 7897500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952347096729464e-05, + "loss": 4.5261, + "step": 7898000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952341079154037e-05, + "loss": 4.5141, + "step": 7898500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952335061202342e-05, + "loss": 4.5403, + "step": 7899000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9523290428743805e-05, + "loss": 4.5, + "step": 7899500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9523230241701545e-05, + "loss": 4.5092, + "step": 7900000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9523170050896636e-05, + "loss": 4.5334, + "step": 7900500 + }, + { + "epoch": 0.31, + "learning_rate": 4.95231098563291e-05, + "loss": 4.5233, + "step": 7901000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952304965799893e-05, + "loss": 4.5058, + "step": 7901500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952298945590614e-05, + "loss": 4.5238, + "step": 7902000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952292925005075e-05, + "loss": 4.5025, + "step": 7902500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952286904043276e-05, + "loss": 4.55, + "step": 7903000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952280882705218e-05, + "loss": 4.5025, + "step": 7903500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9522748609909025e-05, + "loss": 4.508, + "step": 7904000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9522688389003296e-05, + "loss": 4.548, + "step": 7904500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9522628164335006e-05, + "loss": 4.5207, + "step": 7905000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952256793590416e-05, + "loss": 4.5066, + "step": 7905500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952250770371078e-05, + "loss": 4.4974, + "step": 7906000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9522447467754864e-05, + "loss": 4.5198, + "step": 7906500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952238722803642e-05, + "loss": 4.5114, + "step": 7907000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9522326984555465e-05, + "loss": 4.5025, + "step": 7907500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9522266737312e-05, + "loss": 4.5415, + "step": 7908000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952220648630604e-05, + "loss": 4.5222, + "step": 7908500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952214623153759e-05, + "loss": 4.5155, + "step": 7909000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952208597300667e-05, + "loss": 4.495, + "step": 7909500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9522025710713274e-05, + "loss": 4.4813, + "step": 7910000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952196544465742e-05, + "loss": 4.5345, + "step": 7910500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952190517483912e-05, + "loss": 4.499, + "step": 7911000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9521844901258375e-05, + "loss": 4.4816, + "step": 7911500 + }, + { + "epoch": 0.31, + "learning_rate": 4.95217846239152e-05, + "loss": 4.5215, + "step": 7912000 + }, + { + "epoch": 0.31, + "learning_rate": 4.95217243428096e-05, + "loss": 4.5127, + "step": 7912500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9521664057941586e-05, + "loss": 4.4809, + "step": 7913000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9521603769311174e-05, + "loss": 4.5235, + "step": 7913500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9521543476918365e-05, + "loss": 4.4819, + "step": 7914000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9521483180763164e-05, + "loss": 4.5429, + "step": 7914500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952142288084559e-05, + "loss": 4.4964, + "step": 7915000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9521362577165654e-05, + "loss": 4.5041, + "step": 7915500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952130226972336e-05, + "loss": 4.4992, + "step": 7916000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9521241958518714e-05, + "loss": 4.4952, + "step": 7916500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952118164355173e-05, + "loss": 4.4988, + "step": 7917000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952112132482241e-05, + "loss": 4.5303, + "step": 7917500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952106100233077e-05, + "loss": 4.5367, + "step": 7918000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952100067607683e-05, + "loss": 4.5378, + "step": 7918500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952094034606059e-05, + "loss": 4.5136, + "step": 7919000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952088001228204e-05, + "loss": 4.4974, + "step": 7919500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9520819674741215e-05, + "loss": 4.4957, + "step": 7920000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9520759333438114e-05, + "loss": 4.5469, + "step": 7920500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952069898837275e-05, + "loss": 4.4989, + "step": 7921000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9520638639545124e-05, + "loss": 4.5036, + "step": 7921500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952057828695526e-05, + "loss": 4.4667, + "step": 7922000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952051793060316e-05, + "loss": 4.5068, + "step": 7922500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952045757048882e-05, + "loss": 4.4679, + "step": 7923000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952039720661227e-05, + "loss": 4.5289, + "step": 7923500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952033683897351e-05, + "loss": 4.4929, + "step": 7924000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952027646757254e-05, + "loss": 4.4989, + "step": 7924500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952021609240939e-05, + "loss": 4.5143, + "step": 7925000 + }, + { + "epoch": 0.31, + "learning_rate": 4.952015571348406e-05, + "loss": 4.4872, + "step": 7925500 + }, + { + "epoch": 0.31, + "learning_rate": 4.952009533079655e-05, + "loss": 4.5158, + "step": 7926000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9520034944346875e-05, + "loss": 4.5327, + "step": 7926500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951997455413505e-05, + "loss": 4.5031, + "step": 7927000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951991416016109e-05, + "loss": 4.5474, + "step": 7927500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951985376242498e-05, + "loss": 4.5063, + "step": 7928000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951979336092675e-05, + "loss": 4.5166, + "step": 7928500 + }, + { + "epoch": 0.31, + "learning_rate": 4.95197329556664e-05, + "loss": 4.4765, + "step": 7929000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9519672546643944e-05, + "loss": 4.4942, + "step": 7929500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951961213385938e-05, + "loss": 4.4936, + "step": 7930000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951955171731274e-05, + "loss": 4.5326, + "step": 7930500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951949129700402e-05, + "loss": 4.5455, + "step": 7931000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9519430872933224e-05, + "loss": 4.5026, + "step": 7931500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951937044510037e-05, + "loss": 4.4614, + "step": 7932000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951931001350546e-05, + "loss": 4.5074, + "step": 7932500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951924957814851e-05, + "loss": 4.4584, + "step": 7933000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951918913902953e-05, + "loss": 4.4517, + "step": 7933500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951912869614852e-05, + "loss": 4.5142, + "step": 7934000 + }, + { + "epoch": 0.31, + "learning_rate": 4.95190682495055e-05, + "loss": 4.4971, + "step": 7934500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951900779910047e-05, + "loss": 4.5271, + "step": 7935000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951894734493344e-05, + "loss": 4.4847, + "step": 7935500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951888688700443e-05, + "loss": 4.5009, + "step": 7936000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951882642531344e-05, + "loss": 4.5195, + "step": 7936500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951876595986048e-05, + "loss": 4.4888, + "step": 7937000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951870549064556e-05, + "loss": 4.4792, + "step": 7937500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9518645017668694e-05, + "loss": 4.5384, + "step": 7938000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951858454092988e-05, + "loss": 4.4968, + "step": 7938500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9518524060429136e-05, + "loss": 4.4933, + "step": 7939000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951846357616648e-05, + "loss": 4.5054, + "step": 7939500 + }, + { + "epoch": 0.31, + "learning_rate": 4.95184030881419e-05, + "loss": 4.5341, + "step": 7940000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951834259635542e-05, + "loss": 4.4838, + "step": 7940500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9518282100807045e-05, + "loss": 4.4883, + "step": 7941000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951822160149679e-05, + "loss": 4.5527, + "step": 7941500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9518161098424656e-05, + "loss": 4.4699, + "step": 7942000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9518100591590656e-05, + "loss": 4.535, + "step": 7942500 + }, + { + "epoch": 0.31, + "learning_rate": 4.95180400809948e-05, + "loss": 4.5154, + "step": 7943000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951797956663709e-05, + "loss": 4.5342, + "step": 7943500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951791904851755e-05, + "loss": 4.5012, + "step": 7944000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951785852663617e-05, + "loss": 4.5285, + "step": 7944500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951779800099298e-05, + "loss": 4.4879, + "step": 7945000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951773747158797e-05, + "loss": 4.5246, + "step": 7945500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951767693842116e-05, + "loss": 4.4581, + "step": 7946000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951761640149256e-05, + "loss": 4.4953, + "step": 7946500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951755586080218e-05, + "loss": 4.5112, + "step": 7947000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951749531635003e-05, + "loss": 4.528, + "step": 7947500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9517434768136105e-05, + "loss": 4.4774, + "step": 7948000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9517374216160426e-05, + "loss": 4.519, + "step": 7948500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951731366042301e-05, + "loss": 4.4445, + "step": 7949000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9517253100923846e-05, + "loss": 4.4853, + "step": 7949500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951719253766296e-05, + "loss": 4.473, + "step": 7950000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951713197064036e-05, + "loss": 4.5106, + "step": 7950500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951707139985605e-05, + "loss": 4.536, + "step": 7951000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9517010825310036e-05, + "loss": 4.5211, + "step": 7951500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951695024700233e-05, + "loss": 4.4836, + "step": 7952000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9516889664932956e-05, + "loss": 4.5044, + "step": 7952500 + }, + { + "epoch": 0.31, + "learning_rate": 4.95168290791019e-05, + "loss": 4.5094, + "step": 7953000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951676848950918e-05, + "loss": 4.4759, + "step": 7953500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951670789615482e-05, + "loss": 4.4635, + "step": 7954000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951664729903881e-05, + "loss": 4.5016, + "step": 7954500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951658669816116e-05, + "loss": 4.4965, + "step": 7955000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951652609352189e-05, + "loss": 4.5193, + "step": 7955500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9516465485121e-05, + "loss": 4.5031, + "step": 7956000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951640487295851e-05, + "loss": 4.5122, + "step": 7956500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951634425703442e-05, + "loss": 4.5116, + "step": 7957000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951628363734874e-05, + "loss": 4.5082, + "step": 7957500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9516223013901486e-05, + "loss": 4.4694, + "step": 7958000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951616238669266e-05, + "loss": 4.4595, + "step": 7958500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9516101755722275e-05, + "loss": 4.5085, + "step": 7959000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951604112099034e-05, + "loss": 4.5149, + "step": 7959500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951598048249686e-05, + "loss": 4.5012, + "step": 7960000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9515919840241854e-05, + "loss": 4.4888, + "step": 7960500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9515859194225325e-05, + "loss": 4.4936, + "step": 7961000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951579854444728e-05, + "loss": 4.5286, + "step": 7961500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9515737890907735e-05, + "loss": 4.4887, + "step": 7962000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951567723360668e-05, + "loss": 4.4848, + "step": 7962500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9515616572544156e-05, + "loss": 4.4893, + "step": 7963000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951555590772015e-05, + "loss": 4.4959, + "step": 7963500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951549523913468e-05, + "loss": 4.4937, + "step": 7964000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951543456678774e-05, + "loss": 4.5301, + "step": 7964500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951537389067937e-05, + "loss": 4.5406, + "step": 7965000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951531321080955e-05, + "loss": 4.4682, + "step": 7965500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951525252717831e-05, + "loss": 4.5092, + "step": 7966000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9515191839785644e-05, + "loss": 4.4942, + "step": 7966500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951513114863157e-05, + "loss": 4.4808, + "step": 7967000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9515070453716086e-05, + "loss": 4.4836, + "step": 7967500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951500975503921e-05, + "loss": 4.4802, + "step": 7968000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951494905260096e-05, + "loss": 4.514, + "step": 7968500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951488834640134e-05, + "loss": 4.4749, + "step": 7969000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951482763644034e-05, + "loss": 4.5292, + "step": 7969500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951476692271799e-05, + "loss": 4.4685, + "step": 7970000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951470620523429e-05, + "loss": 4.4601, + "step": 7970500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9514645483989264e-05, + "loss": 4.4976, + "step": 7971000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951458475898291e-05, + "loss": 4.4794, + "step": 7971500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951452403021524e-05, + "loss": 4.5324, + "step": 7972000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9514463297686254e-05, + "loss": 4.4912, + "step": 7972500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951440256139597e-05, + "loss": 4.4753, + "step": 7973000 + }, + { + "epoch": 0.31, + "learning_rate": 4.95143418213444e-05, + "loss": 4.4856, + "step": 7973500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9514281077531544e-05, + "loss": 4.4803, + "step": 7974000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951422032995742e-05, + "loss": 4.4657, + "step": 7974500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951415957862203e-05, + "loss": 4.4841, + "step": 7975000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951409882352539e-05, + "loss": 4.5289, + "step": 7975500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9514038064667514e-05, + "loss": 4.4992, + "step": 7976000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9513977302048396e-05, + "loss": 4.5256, + "step": 7976500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951391653566806e-05, + "loss": 4.5201, + "step": 7977000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951385576552651e-05, + "loss": 4.4758, + "step": 7977500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951379499162374e-05, + "loss": 4.5366, + "step": 7978000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9513734213959783e-05, + "loss": 4.4562, + "step": 7978500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9513673432534636e-05, + "loss": 4.4665, + "step": 7979000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951361264734832e-05, + "loss": 4.5108, + "step": 7979500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951355185840082e-05, + "loss": 4.5309, + "step": 7980000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951349106569218e-05, + "loss": 4.4664, + "step": 7980500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951343026922237e-05, + "loss": 4.4847, + "step": 7981000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9513369468991424e-05, + "loss": 4.5175, + "step": 7981500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951330866499936e-05, + "loss": 4.4669, + "step": 7982000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951324785724616e-05, + "loss": 4.4807, + "step": 7982500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9513187045731854e-05, + "loss": 4.4902, + "step": 7983000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951312623045644e-05, + "loss": 4.4618, + "step": 7983500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951306541141993e-05, + "loss": 4.5187, + "step": 7984000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951300458862235e-05, + "loss": 4.5089, + "step": 7984500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951294376206368e-05, + "loss": 4.5166, + "step": 7985000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951288293174395e-05, + "loss": 4.5159, + "step": 7985500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951282209766316e-05, + "loss": 4.4676, + "step": 7986000 + }, + { + "epoch": 0.31, + "learning_rate": 4.951276125982133e-05, + "loss": 4.4825, + "step": 7986500 + }, + { + "epoch": 0.31, + "learning_rate": 4.951270041821845e-05, + "loss": 4.5077, + "step": 7987000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9512639572854546e-05, + "loss": 4.4714, + "step": 7987500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9512578723729633e-05, + "loss": 4.5176, + "step": 7988000 + }, + { + "epoch": 0.31, + "learning_rate": 4.95125178708437e-05, + "loss": 4.4753, + "step": 7988500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9512457014196767e-05, + "loss": 4.4964, + "step": 7989000 + }, + { + "epoch": 0.32, + "learning_rate": 4.951239615378884e-05, + "loss": 4.4921, + "step": 7989500 + }, + { + "epoch": 0.32, + "learning_rate": 4.951233528961994e-05, + "loss": 4.5027, + "step": 7990000 + }, + { + "epoch": 0.32, + "learning_rate": 4.951227442169005e-05, + "loss": 4.4717, + "step": 7990500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9512213549999213e-05, + "loss": 4.5142, + "step": 7991000 + }, + { + "epoch": 0.32, + "learning_rate": 4.951215267454742e-05, + "loss": 4.5036, + "step": 7991500 + }, + { + "epoch": 0.32, + "learning_rate": 4.951209179533468e-05, + "loss": 4.4987, + "step": 7992000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9512030912361005e-05, + "loss": 4.503, + "step": 7992500 + }, + { + "epoch": 0.32, + "learning_rate": 4.95119700256264e-05, + "loss": 4.4978, + "step": 7993000 + }, + { + "epoch": 0.32, + "learning_rate": 4.951190913513089e-05, + "loss": 4.5189, + "step": 7993500 + }, + { + "epoch": 0.32, + "learning_rate": 4.951184824087446e-05, + "loss": 4.4736, + "step": 7994000 + }, + { + "epoch": 0.32, + "learning_rate": 4.951178734285714e-05, + "loss": 4.4797, + "step": 7994500 + }, + { + "epoch": 0.32, + "learning_rate": 4.951172644107892e-05, + "loss": 4.4886, + "step": 7995000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9511665535539833e-05, + "loss": 4.4778, + "step": 7995500 + }, + { + "epoch": 0.32, + "learning_rate": 4.951160462623987e-05, + "loss": 4.4881, + "step": 7996000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9511543713179054e-05, + "loss": 4.4866, + "step": 7996500 + }, + { + "epoch": 0.32, + "learning_rate": 4.951148279635738e-05, + "loss": 4.4877, + "step": 7997000 + }, + { + "epoch": 0.32, + "learning_rate": 4.951142187577487e-05, + "loss": 4.5288, + "step": 7997500 + }, + { + "epoch": 0.32, + "learning_rate": 4.951136095143152e-05, + "loss": 4.5034, + "step": 7998000 + }, + { + "epoch": 0.32, + "learning_rate": 4.951130002332734e-05, + "loss": 4.5241, + "step": 7998500 + }, + { + "epoch": 0.32, + "learning_rate": 4.951123909146236e-05, + "loss": 4.4905, + "step": 7999000 + }, + { + "epoch": 0.32, + "learning_rate": 4.951117815583657e-05, + "loss": 4.5089, + "step": 7999500 + }, + { + "epoch": 0.32, + "learning_rate": 4.951111721644999e-05, + "loss": 4.4696, + "step": 8000000 + }, + { + "epoch": 0.32, + "learning_rate": 4.951105627330262e-05, + "loss": 4.5035, + "step": 8000500 + }, + { + "epoch": 0.32, + "learning_rate": 4.951099532639447e-05, + "loss": 4.5203, + "step": 8001000 + }, + { + "epoch": 0.32, + "learning_rate": 4.951093437572556e-05, + "loss": 4.5057, + "step": 8001500 + }, + { + "epoch": 0.32, + "learning_rate": 4.951087342129589e-05, + "loss": 4.5162, + "step": 8002000 + }, + { + "epoch": 0.32, + "learning_rate": 4.951081246310547e-05, + "loss": 4.4804, + "step": 8002500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9510751501154314e-05, + "loss": 4.5052, + "step": 8003000 + }, + { + "epoch": 0.32, + "learning_rate": 4.951069053544243e-05, + "loss": 4.4922, + "step": 8003500 + }, + { + "epoch": 0.32, + "learning_rate": 4.951062956596981e-05, + "loss": 4.4925, + "step": 8004000 + }, + { + "epoch": 0.32, + "learning_rate": 4.95105685927365e-05, + "loss": 4.4902, + "step": 8004500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9510507615742475e-05, + "loss": 4.4896, + "step": 8005000 + }, + { + "epoch": 0.32, + "learning_rate": 4.951044663498776e-05, + "loss": 4.5233, + "step": 8005500 + }, + { + "epoch": 0.32, + "learning_rate": 4.951038565047237e-05, + "loss": 4.4947, + "step": 8006000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9510324662196296e-05, + "loss": 4.51, + "step": 8006500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9510263670159564e-05, + "loss": 4.4876, + "step": 8007000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9510202674362175e-05, + "loss": 4.4877, + "step": 8007500 + }, + { + "epoch": 0.32, + "learning_rate": 4.951014167480414e-05, + "loss": 4.4498, + "step": 8008000 + }, + { + "epoch": 0.32, + "learning_rate": 4.951008067148547e-05, + "loss": 4.5003, + "step": 8008500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9510019664406177e-05, + "loss": 4.485, + "step": 8009000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950995865356626e-05, + "loss": 4.5162, + "step": 8009500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950989763896574e-05, + "loss": 4.4846, + "step": 8010000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950983662060462e-05, + "loss": 4.5069, + "step": 8010500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950977559848291e-05, + "loss": 4.4423, + "step": 8011000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950971457260062e-05, + "loss": 4.4861, + "step": 8011500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950965354295776e-05, + "loss": 4.4159, + "step": 8012000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9509592509554336e-05, + "loss": 4.4605, + "step": 8012500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950953147239037e-05, + "loss": 4.5167, + "step": 8013000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9509470431465846e-05, + "loss": 4.5059, + "step": 8013500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9509409386780806e-05, + "loss": 4.4802, + "step": 8014000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9509348338335224e-05, + "loss": 4.5013, + "step": 8014500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950928728612914e-05, + "loss": 4.4943, + "step": 8015000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950922623016255e-05, + "loss": 4.5184, + "step": 8015500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9509165170435465e-05, + "loss": 4.5093, + "step": 8016000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950910410694789e-05, + "loss": 4.4791, + "step": 8016500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950904303969984e-05, + "loss": 4.5041, + "step": 8017000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950898196869131e-05, + "loss": 4.4751, + "step": 8017500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950892089392234e-05, + "loss": 4.4939, + "step": 8018000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9508859815392917e-05, + "loss": 4.4985, + "step": 8018500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950879873310305e-05, + "loss": 4.5232, + "step": 8019000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950873764705276e-05, + "loss": 4.4496, + "step": 8019500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950867655724204e-05, + "loss": 4.4828, + "step": 8020000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9508615463670914e-05, + "loss": 4.4924, + "step": 8020500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950855436633939e-05, + "loss": 4.4796, + "step": 8021000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950849326524747e-05, + "loss": 4.5069, + "step": 8021500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950843216039516e-05, + "loss": 4.4914, + "step": 8022000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950837105178248e-05, + "loss": 4.4774, + "step": 8022500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950830993940944e-05, + "loss": 4.4913, + "step": 8023000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9508248823276044e-05, + "loss": 4.4705, + "step": 8023500 + }, + { + "epoch": 0.32, + "learning_rate": 4.95081877033823e-05, + "loss": 4.5312, + "step": 8024000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950812657972822e-05, + "loss": 4.449, + "step": 8024500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950806545231381e-05, + "loss": 4.4749, + "step": 8025000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950800432113909e-05, + "loss": 4.4759, + "step": 8025500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950794318620405e-05, + "loss": 4.4567, + "step": 8026000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950788204750873e-05, + "loss": 4.4977, + "step": 8026500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950782090505311e-05, + "loss": 4.467, + "step": 8027000 + }, + { + "epoch": 0.32, + "learning_rate": 4.95077597588372e-05, + "loss": 4.4624, + "step": 8027500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950769860886103e-05, + "loss": 4.4835, + "step": 8028000 + }, + { + "epoch": 0.32, + "learning_rate": 4.95076374551246e-05, + "loss": 4.5069, + "step": 8028500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950757629762792e-05, + "loss": 4.4601, + "step": 8029000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950751513637099e-05, + "loss": 4.4461, + "step": 8029500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9507453971353834e-05, + "loss": 4.5043, + "step": 8030000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950739280257645e-05, + "loss": 4.4759, + "step": 8030500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950733163003885e-05, + "loss": 4.4858, + "step": 8031000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950727045374105e-05, + "loss": 4.4651, + "step": 8031500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9507209273683044e-05, + "loss": 4.5038, + "step": 8032000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9507148089864867e-05, + "loss": 4.4798, + "step": 8032500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9507086902286506e-05, + "loss": 4.4895, + "step": 8033000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950702571094798e-05, + "loss": 4.4999, + "step": 8033500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9506964515849286e-05, + "loss": 4.4664, + "step": 8034000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950690331699045e-05, + "loss": 4.4694, + "step": 8034500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950684211437148e-05, + "loss": 4.5031, + "step": 8035000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9506780907992374e-05, + "loss": 4.4879, + "step": 8035500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950671969785315e-05, + "loss": 4.5053, + "step": 8036000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9506658483953816e-05, + "loss": 4.4551, + "step": 8036500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9506597266294375e-05, + "loss": 4.442, + "step": 8037000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9506536044874846e-05, + "loss": 4.477, + "step": 8037500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950647481969524e-05, + "loss": 4.4709, + "step": 8038000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9506413590755546e-05, + "loss": 4.4592, + "step": 8038500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9506352358055795e-05, + "loss": 4.4831, + "step": 8039000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950629112159599e-05, + "loss": 4.4897, + "step": 8039500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9506229881376145e-05, + "loss": 4.5002, + "step": 8040000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950616863739625e-05, + "loss": 4.4708, + "step": 8040500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950610738965634e-05, + "loss": 4.4637, + "step": 8041000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9506046138156416e-05, + "loss": 4.4638, + "step": 8041500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950598488289647e-05, + "loss": 4.4898, + "step": 8042000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950592362387654e-05, + "loss": 4.5219, + "step": 8042500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9505862361096616e-05, + "loss": 4.4601, + "step": 8043000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950580109455671e-05, + "loss": 4.4674, + "step": 8043500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950573982425684e-05, + "loss": 4.5067, + "step": 8044000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9505678550196996e-05, + "loss": 4.5069, + "step": 8044500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9505617272377216e-05, + "loss": 4.4445, + "step": 8045000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950555599079749e-05, + "loss": 4.4715, + "step": 8045500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950549470545783e-05, + "loss": 4.4692, + "step": 8046000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9505433416358245e-05, + "loss": 4.4778, + "step": 8046500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950537212349875e-05, + "loss": 4.5238, + "step": 8047000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9505310826879346e-05, + "loss": 4.5204, + "step": 8047500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950524952650005e-05, + "loss": 4.4802, + "step": 8048000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950518822236086e-05, + "loss": 4.5086, + "step": 8048500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950512691446181e-05, + "loss": 4.4653, + "step": 8049000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950506560280288e-05, + "loss": 4.4842, + "step": 8049500 + }, + { + "epoch": 0.32, + "learning_rate": 4.95050042873841e-05, + "loss": 4.5275, + "step": 8050000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950494296820547e-05, + "loss": 4.492, + "step": 8050500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9504881645267e-05, + "loss": 4.4641, + "step": 8051000 + }, + { + "epoch": 0.32, + "learning_rate": 4.95048203185687e-05, + "loss": 4.4642, + "step": 8051500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950475898811059e-05, + "loss": 4.4888, + "step": 8052000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950469765389266e-05, + "loss": 4.4785, + "step": 8052500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950463631591493e-05, + "loss": 4.4676, + "step": 8053000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9504574974177406e-05, + "loss": 4.4979, + "step": 8053500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950451362868011e-05, + "loss": 4.4509, + "step": 8054000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950445227942304e-05, + "loss": 4.4882, + "step": 8054500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9504390926406195e-05, + "loss": 4.4834, + "step": 8055000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950432956962961e-05, + "loss": 4.477, + "step": 8055500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9504268209093273e-05, + "loss": 4.4791, + "step": 8056000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950420684479721e-05, + "loss": 4.512, + "step": 8056500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950414547674141e-05, + "loss": 4.4799, + "step": 8057000 + }, + { + "epoch": 0.32, + "learning_rate": 4.95040841049259e-05, + "loss": 4.4918, + "step": 8057500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9504022729350675e-05, + "loss": 4.5011, + "step": 8058000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950396135001576e-05, + "loss": 4.4349, + "step": 8058500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950389996692116e-05, + "loss": 4.4999, + "step": 8059000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9503838580066874e-05, + "loss": 4.4798, + "step": 8059500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950377718945293e-05, + "loss": 4.4921, + "step": 8060000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950371579507932e-05, + "loss": 4.4675, + "step": 8060500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950365439694605e-05, + "loss": 4.4774, + "step": 8061000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950359299505315e-05, + "loss": 4.4717, + "step": 8061500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950353158940062e-05, + "loss": 4.488, + "step": 8062000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9503470179988466e-05, + "loss": 4.4681, + "step": 8062500 + }, + { + "epoch": 0.32, + "learning_rate": 4.95034087668167e-05, + "loss": 4.4715, + "step": 8063000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950334734988533e-05, + "loss": 4.4684, + "step": 8063500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950328592919437e-05, + "loss": 4.4622, + "step": 8064000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950322450474382e-05, + "loss": 4.4576, + "step": 8064500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950316307653369e-05, + "loss": 4.481, + "step": 8065000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950310164456401e-05, + "loss": 4.4719, + "step": 8065500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950304020883476e-05, + "loss": 4.5027, + "step": 8066000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950297876934597e-05, + "loss": 4.484, + "step": 8066500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950291732609764e-05, + "loss": 4.4811, + "step": 8067000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950285587908979e-05, + "loss": 4.4502, + "step": 8067500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950279442832242e-05, + "loss": 4.441, + "step": 8068000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950273297379553e-05, + "loss": 4.4814, + "step": 8068500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950267151550915e-05, + "loss": 4.4594, + "step": 8069000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950261005346328e-05, + "loss": 4.4427, + "step": 8069500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950254858765793e-05, + "loss": 4.4937, + "step": 8070000 + }, + { + "epoch": 0.32, + "learning_rate": 4.95024871180931e-05, + "loss": 4.5078, + "step": 8070500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9502425644768815e-05, + "loss": 4.4376, + "step": 8071000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9502364167685084e-05, + "loss": 4.4555, + "step": 8071500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950230268684191e-05, + "loss": 4.4483, + "step": 8072000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950224120223929e-05, + "loss": 4.4555, + "step": 8072500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950217971387725e-05, + "loss": 4.4904, + "step": 8073000 + }, + { + "epoch": 0.32, + "learning_rate": 4.95021182217558e-05, + "loss": 4.4773, + "step": 8073500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950205672587495e-05, + "loss": 4.5089, + "step": 8074000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9501995226234695e-05, + "loss": 4.5197, + "step": 8074500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950193372283505e-05, + "loss": 4.4798, + "step": 8075000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950187221567604e-05, + "loss": 4.4625, + "step": 8075500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950181070475766e-05, + "loss": 4.4522, + "step": 8076000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9501749190079914e-05, + "loss": 4.5039, + "step": 8076500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950168767164283e-05, + "loss": 4.4821, + "step": 8077000 + }, + { + "epoch": 0.32, + "learning_rate": 4.95016261494464e-05, + "loss": 4.5027, + "step": 8077500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950156462349064e-05, + "loss": 4.5241, + "step": 8078000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950150309377557e-05, + "loss": 4.4424, + "step": 8078500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9501441560301174e-05, + "loss": 4.466, + "step": 8079000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950138002306749e-05, + "loss": 4.4812, + "step": 8079500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950131848207451e-05, + "loss": 4.4732, + "step": 8080000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950125693732224e-05, + "loss": 4.4908, + "step": 8080500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950119538881071e-05, + "loss": 4.5096, + "step": 8081000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9501133836539906e-05, + "loss": 4.4917, + "step": 8081500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950107228050985e-05, + "loss": 4.4799, + "step": 8082000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9501010720720555e-05, + "loss": 4.4542, + "step": 8082500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9500949157172025e-05, + "loss": 4.4543, + "step": 8083000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950088758986426e-05, + "loss": 4.4934, + "step": 8083500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9500826018797284e-05, + "loss": 4.4838, + "step": 8084000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950076444397111e-05, + "loss": 4.4747, + "step": 8084500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950070286538573e-05, + "loss": 4.4957, + "step": 8085000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950064128304116e-05, + "loss": 4.4788, + "step": 8085500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9500579696937416e-05, + "loss": 4.4449, + "step": 8086000 + }, + { + "epoch": 0.32, + "learning_rate": 4.95005181070745e-05, + "loss": 4.4979, + "step": 8086500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950045651345243e-05, + "loss": 4.486, + "step": 8087000 + }, + { + "epoch": 0.32, + "learning_rate": 4.95003949160712e-05, + "loss": 4.4905, + "step": 8087500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9500333314930836e-05, + "loss": 4.483, + "step": 8088000 + }, + { + "epoch": 0.32, + "learning_rate": 4.950027171003134e-05, + "loss": 4.5002, + "step": 8088500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9500210101372725e-05, + "loss": 4.4581, + "step": 8089000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9500148488954995e-05, + "loss": 4.4992, + "step": 8089500 + }, + { + "epoch": 0.32, + "learning_rate": 4.950008687277816e-05, + "loss": 4.473, + "step": 8090000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9500025252842234e-05, + "loss": 4.4622, + "step": 8090500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949996362914723e-05, + "loss": 4.5029, + "step": 8091000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949990200169315e-05, + "loss": 4.4978, + "step": 8091500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9499840370479986e-05, + "loss": 4.4687, + "step": 8092000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949977873550778e-05, + "loss": 4.4555, + "step": 8092500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949971709677653e-05, + "loss": 4.4693, + "step": 8093000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9499655454286246e-05, + "loss": 4.4823, + "step": 8093500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9499593808036937e-05, + "loss": 4.459, + "step": 8094000 + }, + { + "epoch": 0.32, + "learning_rate": 4.94995321580286e-05, + "loss": 4.48, + "step": 8094500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949947050426126e-05, + "loss": 4.4835, + "step": 8095000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949940884673492e-05, + "loss": 4.4512, + "step": 8095500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949934718544959e-05, + "loss": 4.4869, + "step": 8096000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949928552040528e-05, + "loss": 4.5156, + "step": 8096500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9499223851602004e-05, + "loss": 4.4707, + "step": 8097000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949916217903976e-05, + "loss": 4.4932, + "step": 8097500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949910050271857e-05, + "loss": 4.4915, + "step": 8098000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949903882263844e-05, + "loss": 4.4474, + "step": 8098500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949897713879937e-05, + "loss": 4.4597, + "step": 8099000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9498915451201375e-05, + "loss": 4.458, + "step": 8099500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949885375984448e-05, + "loss": 4.4348, + "step": 8100000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949879206472867e-05, + "loss": 4.4548, + "step": 8100500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949873036585397e-05, + "loss": 4.4491, + "step": 8101000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949866866322038e-05, + "loss": 4.4872, + "step": 8101500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949860695682792e-05, + "loss": 4.4734, + "step": 8102000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9498545246676584e-05, + "loss": 4.5067, + "step": 8102500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9498483532766404e-05, + "loss": 4.4988, + "step": 8103000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949842181509737e-05, + "loss": 4.4477, + "step": 8103500 + }, + { + "epoch": 0.32, + "learning_rate": 4.94983600936695e-05, + "loss": 4.4824, + "step": 8104000 + }, + { + "epoch": 0.32, + "learning_rate": 4.94982983684828e-05, + "loss": 4.4823, + "step": 8104500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949823663953728e-05, + "loss": 4.4692, + "step": 8105000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9498174906832954e-05, + "loss": 4.4766, + "step": 8105500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949811317036982e-05, + "loss": 4.4846, + "step": 8106000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949805143014791e-05, + "loss": 4.476, + "step": 8106500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9497989686167204e-05, + "loss": 4.5429, + "step": 8107000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9497927938427734e-05, + "loss": 4.4611, + "step": 8107500 + }, + { + "epoch": 0.32, + "learning_rate": 4.94978661869295e-05, + "loss": 4.4499, + "step": 8108000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949780443167251e-05, + "loss": 4.4489, + "step": 8108500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949774267265679e-05, + "loss": 4.4932, + "step": 8109000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949768090988232e-05, + "loss": 4.4872, + "step": 8109500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9497619143349136e-05, + "loss": 4.4601, + "step": 8110000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949755737305724e-05, + "loss": 4.4632, + "step": 8110500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949749559900663e-05, + "loss": 4.4866, + "step": 8111000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9497433821197326e-05, + "loss": 4.4888, + "step": 8111500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949737203962933e-05, + "loss": 4.4794, + "step": 8112000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949731025430267e-05, + "loss": 4.4394, + "step": 8112500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949724846521733e-05, + "loss": 4.4767, + "step": 8113000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9497186672373343e-05, + "loss": 4.4821, + "step": 8113500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9497124875770705e-05, + "loss": 4.5212, + "step": 8114000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949706307540942e-05, + "loss": 4.4417, + "step": 8114500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9497001271289515e-05, + "loss": 4.4232, + "step": 8115000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9496939463410984e-05, + "loss": 4.4828, + "step": 8115500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949687765177385e-05, + "loss": 4.4672, + "step": 8116000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949681583637811e-05, + "loss": 4.482, + "step": 8116500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9496754017223776e-05, + "loss": 4.4683, + "step": 8117000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949669219431087e-05, + "loss": 4.504, + "step": 8117500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949663036763938e-05, + "loss": 4.4171, + "step": 8118000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949656853720933e-05, + "loss": 4.4762, + "step": 8118500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949650670302073e-05, + "loss": 4.483, + "step": 8119000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9496444865073587e-05, + "loss": 4.452, + "step": 8119500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949638302336791e-05, + "loss": 4.4731, + "step": 8120000 + }, + { + "epoch": 0.32, + "learning_rate": 4.94963211779037e-05, + "loss": 4.4554, + "step": 8120500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9496259328680974e-05, + "loss": 4.445, + "step": 8121000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949619747569975e-05, + "loss": 4.4803, + "step": 8121500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949613561896003e-05, + "loss": 4.4568, + "step": 8122000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949607375846182e-05, + "loss": 4.5191, + "step": 8122500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9496011894205125e-05, + "loss": 4.4902, + "step": 8123000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9495950026189966e-05, + "loss": 4.4693, + "step": 8123500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949588815441635e-05, + "loss": 4.4779, + "step": 8124000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949582627888429e-05, + "loss": 4.4869, + "step": 8124500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949576439959378e-05, + "loss": 4.4708, + "step": 8125000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949570251654485e-05, + "loss": 4.4435, + "step": 8125500 + }, + { + "epoch": 0.32, + "learning_rate": 4.94956406297375e-05, + "loss": 4.4459, + "step": 8126000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9495578739171725e-05, + "loss": 4.4434, + "step": 8126500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949551684484755e-05, + "loss": 4.4594, + "step": 8127000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949545494676499e-05, + "loss": 4.4633, + "step": 8127500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9495393044924054e-05, + "loss": 4.4741, + "step": 8128000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949533113932474e-05, + "loss": 4.4503, + "step": 8128500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949526922996706e-05, + "loss": 4.4918, + "step": 8129000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949520731685103e-05, + "loss": 4.463, + "step": 8129500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949514539997665e-05, + "loss": 4.4741, + "step": 8130000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9495083479343934e-05, + "loss": 4.4718, + "step": 8130500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949502155495289e-05, + "loss": 4.4883, + "step": 8131000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949495962680354e-05, + "loss": 4.4618, + "step": 8131500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949489769489588e-05, + "loss": 4.4783, + "step": 8132000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949483575922992e-05, + "loss": 4.4658, + "step": 8132500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9494773819805675e-05, + "loss": 4.4629, + "step": 8133000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9494711876623144e-05, + "loss": 4.4267, + "step": 8133500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949464992968235e-05, + "loss": 4.4501, + "step": 8134000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9494587978983296e-05, + "loss": 4.4884, + "step": 8134500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9494526024526e-05, + "loss": 4.4692, + "step": 8135000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9494464066310455e-05, + "loss": 4.4975, + "step": 8135500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949440210433668e-05, + "loss": 4.4657, + "step": 8136000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9494340138604684e-05, + "loss": 4.4698, + "step": 8136500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9494278169114476e-05, + "loss": 4.4895, + "step": 8137000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949421619586607e-05, + "loss": 4.4651, + "step": 8137500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949415421885947e-05, + "loss": 4.4821, + "step": 8138000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949409223809468e-05, + "loss": 4.4841, + "step": 8138500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949403025357172e-05, + "loss": 4.4586, + "step": 8139000 + }, + { + "epoch": 0.32, + "learning_rate": 4.94939682652906e-05, + "loss": 4.44, + "step": 8139500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9493906273251325e-05, + "loss": 4.4818, + "step": 8140000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9493844277453906e-05, + "loss": 4.4228, + "step": 8140500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949378227789835e-05, + "loss": 4.4735, + "step": 8141000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949372027458467e-05, + "loss": 4.4468, + "step": 8141500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949365826751287e-05, + "loss": 4.45, + "step": 8142000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949359625668296e-05, + "loss": 4.4538, + "step": 8142500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9493534242094955e-05, + "loss": 4.4524, + "step": 8143000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949347222374886e-05, + "loss": 4.489, + "step": 8143500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9493410201644694e-05, + "loss": 4.4536, + "step": 8144000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949334817578245e-05, + "loss": 4.4623, + "step": 8144500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949328614616215e-05, + "loss": 4.4926, + "step": 8145000 + }, + { + "epoch": 0.32, + "learning_rate": 4.94932241127838e-05, + "loss": 4.4671, + "step": 8145500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949316207564741e-05, + "loss": 4.4905, + "step": 8146000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949310003475299e-05, + "loss": 4.4573, + "step": 8146500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9493037990100556e-05, + "loss": 4.4395, + "step": 8147000 + }, + { + "epoch": 0.32, + "learning_rate": 4.94929759416901e-05, + "loss": 4.495, + "step": 8147500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949291388952164e-05, + "loss": 4.4824, + "step": 8148000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949285183359519e-05, + "loss": 4.482, + "step": 8148500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9492789773910766e-05, + "loss": 4.4678, + "step": 8149000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9492727710468355e-05, + "loss": 4.4966, + "step": 8149500 + }, + { + "epoch": 0.32, + "learning_rate": 4.949266564326799e-05, + "loss": 4.4809, + "step": 8150000 + }, + { + "epoch": 0.32, + "learning_rate": 4.949260357230966e-05, + "loss": 4.4869, + "step": 8150500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9492541497593394e-05, + "loss": 4.5019, + "step": 8151000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9492479419119184e-05, + "loss": 4.4745, + "step": 8151500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9492417336887054e-05, + "loss": 4.4322, + "step": 8152000 + }, + { + "epoch": 0.54, + "learning_rate": 4.859835936525288e-05, + "loss": 4.4627, + "step": 8152500 + }, + { + "epoch": 0.54, + "learning_rate": 4.859818897265622e-05, + "loss": 4.4212, + "step": 8153000 + }, + { + "epoch": 0.54, + "learning_rate": 4.8598018570001925e-05, + "loss": 4.4103, + "step": 8153500 + }, + { + "epoch": 0.54, + "learning_rate": 4.859784815729006e-05, + "loss": 4.4058, + "step": 8154000 + }, + { + "epoch": 0.43, + "learning_rate": 4.909945922851138e-05, + "loss": 4.3992, + "step": 8154500 + }, + { + "epoch": 0.43, + "learning_rate": 4.9099349403649844e-05, + "loss": 4.3872, + "step": 8155000 + }, + { + "epoch": 0.43, + "learning_rate": 4.909923957221475e-05, + "loss": 4.4025, + "step": 8155500 + }, + { + "epoch": 0.43, + "learning_rate": 4.909912973420613e-05, + "loss": 4.3924, + "step": 8156000 + }, + { + "epoch": 0.43, + "learning_rate": 4.909901988962401e-05, + "loss": 4.4153, + "step": 8156500 + }, + { + "epoch": 0.43, + "learning_rate": 4.909891003846842e-05, + "loss": 4.4105, + "step": 8157000 + }, + { + "epoch": 0.43, + "learning_rate": 4.9098800180739394e-05, + "loss": 4.4276, + "step": 8157500 + }, + { + "epoch": 0.43, + "learning_rate": 4.909869031643696e-05, + "loss": 4.3883, + "step": 8158000 + }, + { + "epoch": 0.43, + "learning_rate": 4.909858044556114e-05, + "loss": 4.3904, + "step": 8158500 + }, + { + "epoch": 0.43, + "learning_rate": 4.909847056811197e-05, + "loss": 4.3956, + "step": 8159000 + }, + { + "epoch": 0.43, + "learning_rate": 4.9098360684089485e-05, + "loss": 4.437, + "step": 8159500 + }, + { + "epoch": 0.43, + "learning_rate": 4.909825079349371e-05, + "loss": 4.3866, + "step": 8160000 + }, + { + "epoch": 0.43, + "learning_rate": 4.9098140896324674e-05, + "loss": 4.3935, + "step": 8160500 + }, + { + "epoch": 0.43, + "learning_rate": 4.909803099258241e-05, + "loss": 4.4064, + "step": 8161000 + }, + { + "epoch": 0.43, + "learning_rate": 4.909792108226695e-05, + "loss": 4.3843, + "step": 8161500 + }, + { + "epoch": 0.43, + "learning_rate": 4.909781116537832e-05, + "loss": 4.3992, + "step": 8162000 + }, + { + "epoch": 0.43, + "learning_rate": 4.909770124191655e-05, + "loss": 4.404, + "step": 8162500 + }, + { + "epoch": 0.43, + "learning_rate": 4.909759131188166e-05, + "loss": 4.3925, + "step": 8163000 + }, + { + "epoch": 0.43, + "learning_rate": 4.909748137527371e-05, + "loss": 4.4304, + "step": 8163500 + }, + { + "epoch": 0.43, + "learning_rate": 4.90973714320927e-05, + "loss": 4.4208, + "step": 8164000 + }, + { + "epoch": 0.43, + "learning_rate": 4.909726148233867e-05, + "loss": 4.3684, + "step": 8164500 + }, + { + "epoch": 0.43, + "learning_rate": 4.9097151526011656e-05, + "loss": 4.384, + "step": 8165000 + }, + { + "epoch": 0.43, + "learning_rate": 4.909704156311167e-05, + "loss": 4.3721, + "step": 8165500 + }, + { + "epoch": 0.43, + "learning_rate": 4.9096931593638764e-05, + "loss": 4.3738, + "step": 8166000 + }, + { + "epoch": 0.43, + "learning_rate": 4.909682161759296e-05, + "loss": 4.4199, + "step": 8166500 + }, + { + "epoch": 0.43, + "learning_rate": 4.909671163497428e-05, + "loss": 4.3798, + "step": 8167000 + }, + { + "epoch": 0.43, + "learning_rate": 4.909660164578277e-05, + "loss": 4.3808, + "step": 8167500 + }, + { + "epoch": 0.86, + "learning_rate": 4.645119997945853e-05, + "loss": 4.3118, + "step": 8168000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6450775864685545e-05, + "loss": 4.3175, + "step": 8168500 + }, + { + "epoch": 0.86, + "learning_rate": 4.645035172650764e-05, + "loss": 4.3171, + "step": 8169000 + }, + { + "epoch": 0.86, + "learning_rate": 4.644992756492525e-05, + "loss": 4.3011, + "step": 8169500 + }, + { + "epoch": 0.86, + "learning_rate": 4.644950337993886e-05, + "loss": 4.2909, + "step": 8170000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6449079171548924e-05, + "loss": 4.3004, + "step": 8170500 + }, + { + "epoch": 0.86, + "learning_rate": 4.644865493975591e-05, + "loss": 4.2675, + "step": 8171000 + }, + { + "epoch": 0.86, + "learning_rate": 4.644823068456029e-05, + "loss": 4.284, + "step": 8171500 + }, + { + "epoch": 0.86, + "learning_rate": 4.6447806405962504e-05, + "loss": 4.2574, + "step": 8172000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6447382103963024e-05, + "loss": 4.2643, + "step": 8172500 + }, + { + "epoch": 0.86, + "learning_rate": 4.644695777856232e-05, + "loss": 4.2733, + "step": 8173000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6446533429760855e-05, + "loss": 4.2643, + "step": 8173500 + }, + { + "epoch": 0.86, + "learning_rate": 4.644610905755908e-05, + "loss": 4.26, + "step": 8174000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6445684661957466e-05, + "loss": 4.262, + "step": 8174500 + }, + { + "epoch": 0.86, + "learning_rate": 4.644526024295648e-05, + "loss": 4.2704, + "step": 8175000 + }, + { + "epoch": 0.86, + "learning_rate": 4.644483580055658e-05, + "loss": 4.2449, + "step": 8175500 + }, + { + "epoch": 0.86, + "learning_rate": 4.6444411334758224e-05, + "loss": 4.2446, + "step": 8176000 + }, + { + "epoch": 0.86, + "learning_rate": 4.644398684556188e-05, + "loss": 4.2453, + "step": 8176500 + }, + { + "epoch": 0.86, + "learning_rate": 4.644356233296802e-05, + "loss": 4.2739, + "step": 8177000 + }, + { + "epoch": 0.86, + "learning_rate": 4.64431377969771e-05, + "loss": 4.2233, + "step": 8177500 + }, + { + "epoch": 0.86, + "learning_rate": 4.6442713237589574e-05, + "loss": 4.2228, + "step": 8178000 + }, + { + "epoch": 0.86, + "learning_rate": 4.644228865480592e-05, + "loss": 4.2538, + "step": 8178500 + }, + { + "epoch": 0.86, + "learning_rate": 4.644186404862659e-05, + "loss": 4.248, + "step": 8179000 + }, + { + "epoch": 0.86, + "learning_rate": 4.644143941905206e-05, + "loss": 4.2478, + "step": 8179500 + }, + { + "epoch": 0.86, + "learning_rate": 4.644101476608278e-05, + "loss": 4.2319, + "step": 8180000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6440590089719224e-05, + "loss": 4.2364, + "step": 8180500 + }, + { + "epoch": 0.86, + "learning_rate": 4.644016538996184e-05, + "loss": 4.2213, + "step": 8181000 + }, + { + "epoch": 0.86, + "learning_rate": 4.643974066681111e-05, + "loss": 4.2024, + "step": 8181500 + }, + { + "epoch": 0.86, + "learning_rate": 4.643931592026749e-05, + "loss": 4.2113, + "step": 8182000 + }, + { + "epoch": 0.86, + "learning_rate": 4.643889115033143e-05, + "loss": 4.225, + "step": 8182500 + }, + { + "epoch": 0.86, + "learning_rate": 4.643846635700342e-05, + "loss": 4.2516, + "step": 8183000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6438041540283906e-05, + "loss": 4.2235, + "step": 8183500 + }, + { + "epoch": 0.86, + "learning_rate": 4.643761670017335e-05, + "loss": 4.209, + "step": 8184000 + }, + { + "epoch": 0.86, + "learning_rate": 4.643719183667223e-05, + "loss": 4.2488, + "step": 8184500 + }, + { + "epoch": 0.86, + "learning_rate": 4.6436766949780994e-05, + "loss": 4.2455, + "step": 8185000 + }, + { + "epoch": 0.86, + "learning_rate": 4.643634203950011e-05, + "loss": 4.2315, + "step": 8185500 + }, + { + "epoch": 0.86, + "learning_rate": 4.643591710583004e-05, + "loss": 4.2171, + "step": 8186000 + }, + { + "epoch": 0.86, + "learning_rate": 4.643549214877126e-05, + "loss": 4.2111, + "step": 8186500 + }, + { + "epoch": 0.86, + "learning_rate": 4.643506716832422e-05, + "loss": 4.2297, + "step": 8187000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6434642164489386e-05, + "loss": 4.2162, + "step": 8187500 + }, + { + "epoch": 0.86, + "learning_rate": 4.6434217137267235e-05, + "loss": 4.2278, + "step": 8188000 + }, + { + "epoch": 0.86, + "learning_rate": 4.643379208665821e-05, + "loss": 4.2078, + "step": 8188500 + }, + { + "epoch": 0.86, + "learning_rate": 4.643336701266278e-05, + "loss": 4.2234, + "step": 8189000 + }, + { + "epoch": 0.86, + "learning_rate": 4.643294191528143e-05, + "loss": 4.2135, + "step": 8189500 + }, + { + "epoch": 0.86, + "learning_rate": 4.643251679451459e-05, + "loss": 4.2373, + "step": 8190000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6432091650362747e-05, + "loss": 4.1907, + "step": 8190500 + }, + { + "epoch": 0.86, + "learning_rate": 4.643166648282636e-05, + "loss": 4.2056, + "step": 8191000 + }, + { + "epoch": 0.86, + "learning_rate": 4.643124129190589e-05, + "loss": 4.217, + "step": 8191500 + }, + { + "epoch": 0.86, + "learning_rate": 4.6430816077601804e-05, + "loss": 4.2234, + "step": 8192000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6430390839914565e-05, + "loss": 4.1924, + "step": 8192500 + }, + { + "epoch": 0.86, + "learning_rate": 4.6429965578844635e-05, + "loss": 4.2202, + "step": 8193000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6429540294392483e-05, + "loss": 4.2052, + "step": 8193500 + }, + { + "epoch": 0.86, + "learning_rate": 4.642911498655856e-05, + "loss": 4.2183, + "step": 8194000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6428689655343355e-05, + "loss": 4.2244, + "step": 8194500 + }, + { + "epoch": 0.86, + "learning_rate": 4.642826430074731e-05, + "loss": 4.205, + "step": 8195000 + }, + { + "epoch": 0.86, + "learning_rate": 4.642783892277089e-05, + "loss": 4.212, + "step": 8195500 + }, + { + "epoch": 0.86, + "learning_rate": 4.6427413521414574e-05, + "loss": 4.2037, + "step": 8196000 + }, + { + "epoch": 0.86, + "learning_rate": 4.642698809667881e-05, + "loss": 4.1919, + "step": 8196500 + }, + { + "epoch": 0.86, + "learning_rate": 4.642656264856408e-05, + "loss": 4.2078, + "step": 8197000 + }, + { + "epoch": 0.86, + "learning_rate": 4.642613717707083e-05, + "loss": 4.1971, + "step": 8197500 + }, + { + "epoch": 0.86, + "learning_rate": 4.642571168219953e-05, + "loss": 4.2125, + "step": 8198000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6425286163950646e-05, + "loss": 4.1942, + "step": 8198500 + }, + { + "epoch": 0.86, + "learning_rate": 4.642486062232465e-05, + "loss": 4.208, + "step": 8199000 + }, + { + "epoch": 0.86, + "learning_rate": 4.642443505732199e-05, + "loss": 4.1824, + "step": 8199500 + }, + { + "epoch": 0.86, + "learning_rate": 4.642400946894314e-05, + "loss": 4.1897, + "step": 8200000 + }, + { + "epoch": 0.86, + "learning_rate": 4.642358385718857e-05, + "loss": 4.1995, + "step": 8200500 + }, + { + "epoch": 0.86, + "learning_rate": 4.6423158222058725e-05, + "loss": 4.1978, + "step": 8201000 + }, + { + "epoch": 0.86, + "learning_rate": 4.642273256355409e-05, + "loss": 4.1964, + "step": 8201500 + }, + { + "epoch": 0.86, + "learning_rate": 4.6422306881675125e-05, + "loss": 4.1949, + "step": 8202000 + }, + { + "epoch": 0.86, + "learning_rate": 4.642188117642229e-05, + "loss": 4.1864, + "step": 8202500 + }, + { + "epoch": 0.86, + "learning_rate": 4.642145544779605e-05, + "loss": 4.1986, + "step": 8203000 + }, + { + "epoch": 0.86, + "learning_rate": 4.642102969579687e-05, + "loss": 4.2176, + "step": 8203500 + }, + { + "epoch": 0.86, + "learning_rate": 4.642060392042521e-05, + "loss": 4.1595, + "step": 8204000 + }, + { + "epoch": 0.86, + "learning_rate": 4.642017812168154e-05, + "loss": 4.1708, + "step": 8204500 + }, + { + "epoch": 0.86, + "learning_rate": 4.641975229956633e-05, + "loss": 4.1812, + "step": 8205000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6419326454080034e-05, + "loss": 4.187, + "step": 8205500 + }, + { + "epoch": 0.86, + "learning_rate": 4.641890058522312e-05, + "loss": 4.1814, + "step": 8206000 + }, + { + "epoch": 0.86, + "learning_rate": 4.641847469299605e-05, + "loss": 4.196, + "step": 8206500 + }, + { + "epoch": 0.86, + "learning_rate": 4.6418048777399304e-05, + "loss": 4.1815, + "step": 8207000 + }, + { + "epoch": 0.86, + "learning_rate": 4.641762283843333e-05, + "loss": 4.2001, + "step": 8207500 + }, + { + "epoch": 0.86, + "learning_rate": 4.641719687609859e-05, + "loss": 4.1813, + "step": 8208000 + }, + { + "epoch": 0.86, + "learning_rate": 4.641677089039557e-05, + "loss": 4.1845, + "step": 8208500 + }, + { + "epoch": 0.86, + "learning_rate": 4.641634488132471e-05, + "loss": 4.1917, + "step": 8209000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6415918848886486e-05, + "loss": 4.1619, + "step": 8209500 + }, + { + "epoch": 0.86, + "learning_rate": 4.641549279308137e-05, + "loss": 4.1908, + "step": 8210000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6415066713909816e-05, + "loss": 4.1859, + "step": 8210500 + }, + { + "epoch": 0.86, + "learning_rate": 4.64146406113723e-05, + "loss": 4.1818, + "step": 8211000 + }, + { + "epoch": 0.86, + "learning_rate": 4.641421448546927e-05, + "loss": 4.1637, + "step": 8211500 + }, + { + "epoch": 0.86, + "learning_rate": 4.64137883362012e-05, + "loss": 4.1833, + "step": 8212000 + }, + { + "epoch": 0.86, + "learning_rate": 4.641336216356856e-05, + "loss": 4.1875, + "step": 8212500 + }, + { + "epoch": 0.86, + "learning_rate": 4.6412935967571814e-05, + "loss": 4.1862, + "step": 8213000 + }, + { + "epoch": 0.86, + "learning_rate": 4.641250974821142e-05, + "loss": 4.1823, + "step": 8213500 + }, + { + "epoch": 0.86, + "learning_rate": 4.6412083505487846e-05, + "loss": 4.1695, + "step": 8214000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6411657239401554e-05, + "loss": 4.165, + "step": 8214500 + }, + { + "epoch": 0.86, + "learning_rate": 4.641123094995302e-05, + "loss": 4.1875, + "step": 8215000 + }, + { + "epoch": 0.86, + "learning_rate": 4.64108046371427e-05, + "loss": 4.1701, + "step": 8215500 + }, + { + "epoch": 0.86, + "learning_rate": 4.641037830097106e-05, + "loss": 4.1865, + "step": 8216000 + }, + { + "epoch": 0.86, + "learning_rate": 4.640995194143857e-05, + "loss": 4.1837, + "step": 8216500 + }, + { + "epoch": 0.86, + "learning_rate": 4.640952555854569e-05, + "loss": 4.17, + "step": 8217000 + }, + { + "epoch": 0.86, + "learning_rate": 4.640909915229288e-05, + "loss": 4.1845, + "step": 8217500 + }, + { + "epoch": 0.86, + "learning_rate": 4.640867272268062e-05, + "loss": 4.1769, + "step": 8218000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6408246269709364e-05, + "loss": 4.1581, + "step": 8218500 + }, + { + "epoch": 0.86, + "learning_rate": 4.640781979337958e-05, + "loss": 4.1945, + "step": 8219000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6407393293691734e-05, + "loss": 4.1839, + "step": 8219500 + }, + { + "epoch": 0.86, + "learning_rate": 4.640696677064629e-05, + "loss": 4.1849, + "step": 8220000 + }, + { + "epoch": 0.86, + "learning_rate": 4.640654022424372e-05, + "loss": 4.184, + "step": 8220500 + }, + { + "epoch": 0.86, + "learning_rate": 4.640611365448448e-05, + "loss": 4.1734, + "step": 8221000 + }, + { + "epoch": 0.86, + "learning_rate": 4.640568706136904e-05, + "loss": 4.1759, + "step": 8221500 + }, + { + "epoch": 0.86, + "learning_rate": 4.640526044489786e-05, + "loss": 4.1631, + "step": 8222000 + }, + { + "epoch": 0.86, + "learning_rate": 4.640483380507142e-05, + "loss": 4.1695, + "step": 8222500 + }, + { + "epoch": 0.86, + "learning_rate": 4.6404407141890173e-05, + "loss": 4.1762, + "step": 8223000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6403980455354575e-05, + "loss": 4.1529, + "step": 8223500 + }, + { + "epoch": 0.86, + "learning_rate": 4.640355374546512e-05, + "loss": 4.1377, + "step": 8224000 + }, + { + "epoch": 0.86, + "learning_rate": 4.6403127012222255e-05, + "loss": 4.1494, + "step": 8224500 + }, + { + "epoch": 0.86, + "learning_rate": 4.640270025562645e-05, + "loss": 4.1721, + "step": 8225000 + }, + { + "epoch": 0.86, + "learning_rate": 4.640227347567816e-05, + "loss": 4.1709, + "step": 8225500 + }, + { + "epoch": 0.86, + "learning_rate": 4.640184667237786e-05, + "loss": 4.1464, + "step": 8226000 + }, + { + "epoch": 0.86, + "learning_rate": 4.640141984572602e-05, + "loss": 4.1872, + "step": 8226500 + }, + { + "epoch": 0.86, + "learning_rate": 4.64009929957231e-05, + "loss": 4.149, + "step": 8227000 + }, + { + "epoch": 0.87, + "learning_rate": 4.6400566122369574e-05, + "loss": 4.1341, + "step": 8227500 + }, + { + "epoch": 0.87, + "learning_rate": 4.6400139225665884e-05, + "loss": 4.184, + "step": 8228000 + }, + { + "epoch": 0.87, + "learning_rate": 4.6399712305612525e-05, + "loss": 4.16, + "step": 8228500 + }, + { + "epoch": 0.87, + "learning_rate": 4.6399285362209944e-05, + "loss": 4.1627, + "step": 8229000 + }, + { + "epoch": 0.87, + "learning_rate": 4.6398858395458615e-05, + "loss": 4.1495, + "step": 8229500 + }, + { + "epoch": 0.87, + "learning_rate": 4.6398431405359e-05, + "loss": 4.1839, + "step": 8230000 + }, + { + "epoch": 0.87, + "learning_rate": 4.639800439191157e-05, + "loss": 4.1486, + "step": 8230500 + }, + { + "epoch": 0.87, + "learning_rate": 4.639757735511678e-05, + "loss": 4.1805, + "step": 8231000 + }, + { + "epoch": 0.87, + "learning_rate": 4.639715029497511e-05, + "loss": 4.1617, + "step": 8231500 + }, + { + "epoch": 0.87, + "learning_rate": 4.639672321148702e-05, + "loss": 4.1648, + "step": 8232000 + }, + { + "epoch": 0.87, + "learning_rate": 4.639629610465298e-05, + "loss": 4.1248, + "step": 8232500 + }, + { + "epoch": 0.87, + "learning_rate": 4.6395868974473445e-05, + "loss": 4.1899, + "step": 8233000 + }, + { + "epoch": 0.87, + "learning_rate": 4.6395441820948885e-05, + "loss": 4.1415, + "step": 8233500 + }, + { + "epoch": 0.87, + "learning_rate": 4.639501464407977e-05, + "loss": 4.1576, + "step": 8234000 + }, + { + "epoch": 0.87, + "learning_rate": 4.6394587443866565e-05, + "loss": 4.1529, + "step": 8234500 + }, + { + "epoch": 0.87, + "learning_rate": 4.6394160220309733e-05, + "loss": 4.1592, + "step": 8235000 + }, + { + "epoch": 0.87, + "learning_rate": 4.639373297340975e-05, + "loss": 4.1874, + "step": 8235500 + }, + { + "epoch": 0.87, + "learning_rate": 4.6393305703167066e-05, + "loss": 4.1776, + "step": 8236000 + }, + { + "epoch": 0.87, + "learning_rate": 4.6392878409582166e-05, + "loss": 4.1428, + "step": 8236500 + }, + { + "epoch": 0.87, + "learning_rate": 4.63924510926555e-05, + "loss": 4.1408, + "step": 8237000 + }, + { + "epoch": 0.87, + "learning_rate": 4.639202375238755e-05, + "loss": 4.1628, + "step": 8237500 + }, + { + "epoch": 0.87, + "learning_rate": 4.639159638877876e-05, + "loss": 4.1785, + "step": 8238000 + }, + { + "epoch": 0.87, + "learning_rate": 4.639116900182962e-05, + "loss": 4.1583, + "step": 8238500 + }, + { + "epoch": 0.87, + "learning_rate": 4.639074159154058e-05, + "loss": 4.1674, + "step": 8239000 + }, + { + "epoch": 0.87, + "learning_rate": 4.639031415791211e-05, + "loss": 4.165, + "step": 8239500 + }, + { + "epoch": 0.87, + "learning_rate": 4.6389886700944676e-05, + "loss": 4.1641, + "step": 8240000 + }, + { + "epoch": 0.87, + "learning_rate": 4.6389459220638755e-05, + "loss": 4.1618, + "step": 8240500 + }, + { + "epoch": 0.87, + "learning_rate": 4.638903171699479e-05, + "loss": 4.163, + "step": 8241000 + }, + { + "epoch": 0.87, + "learning_rate": 4.638860419001328e-05, + "loss": 4.1633, + "step": 8241500 + }, + { + "epoch": 0.87, + "learning_rate": 4.638817663969467e-05, + "loss": 4.1362, + "step": 8242000 + }, + { + "epoch": 0.87, + "learning_rate": 4.638774906603943e-05, + "loss": 4.1494, + "step": 8242500 + }, + { + "epoch": 0.87, + "learning_rate": 4.6387321469048025e-05, + "loss": 4.1299, + "step": 8243000 + }, + { + "epoch": 0.87, + "learning_rate": 4.638689384872092e-05, + "loss": 4.1526, + "step": 8243500 + }, + { + "epoch": 0.87, + "learning_rate": 4.6386466205058586e-05, + "loss": 4.1739, + "step": 8244000 + }, + { + "epoch": 0.87, + "learning_rate": 4.6386038538061495e-05, + "loss": 4.152, + "step": 8244500 + }, + { + "epoch": 0.87, + "learning_rate": 4.63856108477301e-05, + "loss": 4.153, + "step": 8245000 + }, + { + "epoch": 0.87, + "learning_rate": 4.638518313406488e-05, + "loss": 4.1407, + "step": 8245500 + }, + { + "epoch": 0.87, + "learning_rate": 4.6384755397066305e-05, + "loss": 4.1669, + "step": 8246000 + }, + { + "epoch": 0.87, + "learning_rate": 4.638432763673482e-05, + "loss": 4.1531, + "step": 8246500 + }, + { + "epoch": 0.87, + "learning_rate": 4.638389985307091e-05, + "loss": 4.1533, + "step": 8247000 + }, + { + "epoch": 0.87, + "learning_rate": 4.638347204607504e-05, + "loss": 4.142, + "step": 8247500 + }, + { + "epoch": 0.87, + "learning_rate": 4.638304421574767e-05, + "loss": 4.147, + "step": 8248000 + }, + { + "epoch": 0.87, + "learning_rate": 4.638261636208927e-05, + "loss": 4.1503, + "step": 8248500 + }, + { + "epoch": 1.08, + "learning_rate": 4.44256798399337e-05, + "loss": 4.1485, + "step": 8249000 + }, + { + "epoch": 1.08, + "learning_rate": 4.442503005661928e-05, + "loss": 4.1505, + "step": 8249500 + }, + { + "epoch": 1.08, + "learning_rate": 4.442438024018789e-05, + "loss": 4.1365, + "step": 8250000 + }, + { + "epoch": 1.08, + "learning_rate": 4.442373039064062e-05, + "loss": 4.1398, + "step": 8250500 + }, + { + "epoch": 1.08, + "learning_rate": 4.44230805079786e-05, + "loss": 4.1186, + "step": 8251000 + }, + { + "epoch": 1.08, + "learning_rate": 4.442243059220292e-05, + "loss": 4.1104, + "step": 8251500 + }, + { + "epoch": 1.08, + "learning_rate": 4.44217806433147e-05, + "loss": 4.1098, + "step": 8252000 + }, + { + "epoch": 1.08, + "learning_rate": 4.442113066131504e-05, + "loss": 4.1154, + "step": 8252500 + }, + { + "epoch": 1.08, + "learning_rate": 4.442048064620505e-05, + "loss": 4.1157, + "step": 8253000 + }, + { + "epoch": 1.08, + "learning_rate": 4.441983059798585e-05, + "loss": 4.1367, + "step": 8253500 + }, + { + "epoch": 1.08, + "learning_rate": 4.441918051665852e-05, + "loss": 4.1233, + "step": 8254000 + }, + { + "epoch": 1.08, + "learning_rate": 4.44185304022242e-05, + "loss": 4.1019, + "step": 8254500 + }, + { + "epoch": 1.08, + "learning_rate": 4.4417880254683975e-05, + "loss": 4.0863, + "step": 8255000 + }, + { + "epoch": 1.08, + "learning_rate": 4.441723007403897e-05, + "loss": 4.0967, + "step": 8255500 + }, + { + "epoch": 1.09, + "learning_rate": 4.441657986029028e-05, + "loss": 4.1193, + "step": 8256000 + }, + { + "epoch": 1.09, + "learning_rate": 4.441592961343902e-05, + "loss": 4.1082, + "step": 8256500 + }, + { + "epoch": 1.09, + "learning_rate": 4.44152793334863e-05, + "loss": 4.1076, + "step": 8257000 + }, + { + "epoch": 1.09, + "learning_rate": 4.441462902043323e-05, + "loss": 4.1135, + "step": 8257500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4413978674280915e-05, + "loss": 4.1101, + "step": 8258000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4413328295030466e-05, + "loss": 4.0954, + "step": 8258500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4412677882682985e-05, + "loss": 4.1386, + "step": 8259000 + }, + { + "epoch": 1.09, + "learning_rate": 4.441202743723959e-05, + "loss": 4.1114, + "step": 8259500 + }, + { + "epoch": 1.09, + "learning_rate": 4.441137695870138e-05, + "loss": 4.1035, + "step": 8260000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4410726447069474e-05, + "loss": 4.1033, + "step": 8260500 + }, + { + "epoch": 1.09, + "learning_rate": 4.441007590234497e-05, + "loss": 4.1041, + "step": 8261000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4409425324528995e-05, + "loss": 4.1006, + "step": 8261500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4408774713622645e-05, + "loss": 4.0958, + "step": 8262000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4408124069627035e-05, + "loss": 4.1115, + "step": 8262500 + }, + { + "epoch": 1.09, + "learning_rate": 4.440747339254326e-05, + "loss": 4.0921, + "step": 8263000 + }, + { + "epoch": 1.09, + "learning_rate": 4.440682268237244e-05, + "loss": 4.1148, + "step": 8263500 + }, + { + "epoch": 1.09, + "learning_rate": 4.440617193911569e-05, + "loss": 4.1071, + "step": 8264000 + }, + { + "epoch": 1.09, + "learning_rate": 4.440552116277411e-05, + "loss": 4.0899, + "step": 8264500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4404870353348824e-05, + "loss": 4.1, + "step": 8265000 + }, + { + "epoch": 1.09, + "learning_rate": 4.440421951084092e-05, + "loss": 4.0822, + "step": 8265500 + }, + { + "epoch": 1.09, + "learning_rate": 4.440356863525152e-05, + "loss": 4.1035, + "step": 8266000 + }, + { + "epoch": 1.09, + "learning_rate": 4.440291772658173e-05, + "loss": 4.1026, + "step": 8266500 + }, + { + "epoch": 1.09, + "learning_rate": 4.440226678483267e-05, + "loss": 4.1307, + "step": 8267000 + }, + { + "epoch": 1.09, + "learning_rate": 4.440161581000544e-05, + "loss": 4.1143, + "step": 8267500 + }, + { + "epoch": 1.09, + "learning_rate": 4.440096480210114e-05, + "loss": 4.1139, + "step": 8268000 + }, + { + "epoch": 1.09, + "learning_rate": 4.44003137611209e-05, + "loss": 4.1134, + "step": 8268500 + }, + { + "epoch": 1.09, + "learning_rate": 4.439966268706582e-05, + "loss": 4.0922, + "step": 8269000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4399011579937014e-05, + "loss": 4.1066, + "step": 8269500 + }, + { + "epoch": 1.09, + "learning_rate": 4.439836043973558e-05, + "loss": 4.1258, + "step": 8270000 + }, + { + "epoch": 1.09, + "learning_rate": 4.439770926646264e-05, + "loss": 4.0858, + "step": 8270500 + }, + { + "epoch": 1.09, + "learning_rate": 4.439705806011931e-05, + "loss": 4.0913, + "step": 8271000 + }, + { + "epoch": 1.09, + "learning_rate": 4.439640682070668e-05, + "loss": 4.0972, + "step": 8271500 + }, + { + "epoch": 1.09, + "learning_rate": 4.439575554822588e-05, + "loss": 4.0977, + "step": 8272000 + }, + { + "epoch": 1.09, + "learning_rate": 4.439510424267801e-05, + "loss": 4.1343, + "step": 8272500 + }, + { + "epoch": 1.09, + "learning_rate": 4.439445290406419e-05, + "loss": 4.1221, + "step": 8273000 + }, + { + "epoch": 1.09, + "learning_rate": 4.439380153238551e-05, + "loss": 4.1244, + "step": 8273500 + }, + { + "epoch": 1.09, + "learning_rate": 4.43931501276431e-05, + "loss": 4.1026, + "step": 8274000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4392498689838066e-05, + "loss": 4.1069, + "step": 8274500 + }, + { + "epoch": 1.09, + "learning_rate": 4.439184721897151e-05, + "loss": 4.0873, + "step": 8275000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4391195715044556e-05, + "loss": 4.0965, + "step": 8275500 + }, + { + "epoch": 1.09, + "learning_rate": 4.43905441780583e-05, + "loss": 4.0952, + "step": 8276000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4389892608013875e-05, + "loss": 4.0886, + "step": 8276500 + }, + { + "epoch": 1.09, + "learning_rate": 4.438924100491236e-05, + "loss": 4.0944, + "step": 8277000 + }, + { + "epoch": 1.09, + "learning_rate": 4.438858936875489e-05, + "loss": 4.1055, + "step": 8277500 + }, + { + "epoch": 1.09, + "learning_rate": 4.438793769954258e-05, + "loss": 4.0761, + "step": 8278000 + }, + { + "epoch": 1.09, + "learning_rate": 4.438728599727652e-05, + "loss": 4.0893, + "step": 8278500 + }, + { + "epoch": 1.09, + "learning_rate": 4.438663426195784e-05, + "loss": 4.0663, + "step": 8279000 + }, + { + "epoch": 1.09, + "learning_rate": 4.438598249358763e-05, + "loss": 4.0905, + "step": 8279500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4385330692167026e-05, + "loss": 4.076, + "step": 8280000 + }, + { + "epoch": 1.09, + "learning_rate": 4.438467885769712e-05, + "loss": 4.1025, + "step": 8280500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4384026990179034e-05, + "loss": 4.1031, + "step": 8281000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4383375089613874e-05, + "loss": 4.0903, + "step": 8281500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4382723156002755e-05, + "loss": 4.0981, + "step": 8282000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4382071189346786e-05, + "loss": 4.0994, + "step": 8282500 + }, + { + "epoch": 1.09, + "learning_rate": 4.438141918964708e-05, + "loss": 4.0772, + "step": 8283000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4380767156904746e-05, + "loss": 4.0931, + "step": 8283500 + }, + { + "epoch": 1.09, + "learning_rate": 4.43801150911209e-05, + "loss": 4.0956, + "step": 8284000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4379462992296645e-05, + "loss": 4.0906, + "step": 8284500 + }, + { + "epoch": 1.09, + "learning_rate": 4.43788108604331e-05, + "loss": 4.088, + "step": 8285000 + }, + { + "epoch": 1.09, + "learning_rate": 4.437815869553138e-05, + "loss": 4.0716, + "step": 8285500 + }, + { + "epoch": 1.09, + "learning_rate": 4.437750649759259e-05, + "loss": 4.0844, + "step": 8286000 + }, + { + "epoch": 1.09, + "learning_rate": 4.437685426661784e-05, + "loss": 4.087, + "step": 8286500 + }, + { + "epoch": 1.09, + "learning_rate": 4.437620200260826e-05, + "loss": 4.0959, + "step": 8287000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4375549705564936e-05, + "loss": 4.0735, + "step": 8287500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4374897375488993e-05, + "loss": 4.0761, + "step": 8288000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4374245012381544e-05, + "loss": 4.0849, + "step": 8288500 + }, + { + "epoch": 1.09, + "learning_rate": 4.43735926162437e-05, + "loss": 4.0775, + "step": 8289000 + }, + { + "epoch": 1.09, + "learning_rate": 4.437294018707657e-05, + "loss": 4.0742, + "step": 8289500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4372287724881275e-05, + "loss": 4.0833, + "step": 8290000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4371635229658926e-05, + "loss": 4.0826, + "step": 8290500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4370982701410626e-05, + "loss": 4.0774, + "step": 8291000 + }, + { + "epoch": 1.09, + "learning_rate": 4.437033014013748e-05, + "loss": 4.1045, + "step": 8291500 + }, + { + "epoch": 1.09, + "learning_rate": 4.436967754584063e-05, + "loss": 4.0664, + "step": 8292000 + }, + { + "epoch": 1.09, + "learning_rate": 4.436902491852116e-05, + "loss": 4.1001, + "step": 8292500 + }, + { + "epoch": 1.09, + "learning_rate": 4.43683722581802e-05, + "loss": 4.0852, + "step": 8293000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4367719564818866e-05, + "loss": 4.0586, + "step": 8293500 + }, + { + "epoch": 1.09, + "learning_rate": 4.436706683843825e-05, + "loss": 4.0787, + "step": 8294000 + }, + { + "epoch": 1.09, + "learning_rate": 4.436641407903948e-05, + "loss": 4.0911, + "step": 8294500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4365761286623665e-05, + "loss": 4.0763, + "step": 8295000 + }, + { + "epoch": 1.09, + "learning_rate": 4.436510846119192e-05, + "loss": 4.0884, + "step": 8295500 + }, + { + "epoch": 1.09, + "learning_rate": 4.436445560274535e-05, + "loss": 4.1023, + "step": 8296000 + }, + { + "epoch": 1.09, + "learning_rate": 4.436380271128507e-05, + "loss": 4.0866, + "step": 8296500 + }, + { + "epoch": 1.09, + "learning_rate": 4.436314978681221e-05, + "loss": 4.0769, + "step": 8297000 + }, + { + "epoch": 1.09, + "learning_rate": 4.436249682932787e-05, + "loss": 4.0816, + "step": 8297500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4361843838833164e-05, + "loss": 4.0757, + "step": 8298000 + }, + { + "epoch": 1.09, + "learning_rate": 4.436119081532919e-05, + "loss": 4.1179, + "step": 8298500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4360537758817086e-05, + "loss": 4.0841, + "step": 8299000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4359884669297955e-05, + "loss": 4.0695, + "step": 8299500 + }, + { + "epoch": 1.09, + "learning_rate": 4.435923154677292e-05, + "loss": 4.0807, + "step": 8300000 + }, + { + "epoch": 1.09, + "learning_rate": 4.435857839124308e-05, + "loss": 4.0825, + "step": 8300500 + }, + { + "epoch": 1.09, + "learning_rate": 4.435792520270955e-05, + "loss": 4.0732, + "step": 8301000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4357271981173446e-05, + "loss": 4.0716, + "step": 8301500 + }, + { + "epoch": 1.09, + "learning_rate": 4.435661872663589e-05, + "loss": 4.0742, + "step": 8302000 + }, + { + "epoch": 1.09, + "learning_rate": 4.435596543909799e-05, + "loss": 4.0815, + "step": 8302500 + }, + { + "epoch": 1.09, + "learning_rate": 4.435531211856085e-05, + "loss": 4.0918, + "step": 8303000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4354658765025605e-05, + "loss": 4.0811, + "step": 8303500 + }, + { + "epoch": 1.09, + "learning_rate": 4.435400537849335e-05, + "loss": 4.0635, + "step": 8304000 + }, + { + "epoch": 1.09, + "learning_rate": 4.43533519589652e-05, + "loss": 4.0569, + "step": 8304500 + }, + { + "epoch": 1.09, + "learning_rate": 4.435269850644228e-05, + "loss": 4.0601, + "step": 8305000 + }, + { + "epoch": 1.09, + "learning_rate": 4.43520450209257e-05, + "loss": 4.0822, + "step": 8305500 + }, + { + "epoch": 1.09, + "learning_rate": 4.435139150241656e-05, + "loss": 4.0685, + "step": 8306000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4350737950916e-05, + "loss": 4.0683, + "step": 8306500 + }, + { + "epoch": 1.09, + "learning_rate": 4.435008436642512e-05, + "loss": 4.0606, + "step": 8307000 + }, + { + "epoch": 1.09, + "learning_rate": 4.434943074894503e-05, + "loss": 4.0591, + "step": 8307500 + }, + { + "epoch": 1.09, + "learning_rate": 4.434877709847686e-05, + "loss": 4.0767, + "step": 8308000 + }, + { + "epoch": 1.09, + "learning_rate": 4.43481234150217e-05, + "loss": 4.0687, + "step": 8308500 + }, + { + "epoch": 1.09, + "learning_rate": 4.434746969858069e-05, + "loss": 4.0791, + "step": 8309000 + }, + { + "epoch": 1.09, + "learning_rate": 4.434681594915493e-05, + "loss": 4.0561, + "step": 8309500 + }, + { + "epoch": 1.09, + "learning_rate": 4.434616216674553e-05, + "loss": 4.0761, + "step": 8310000 + }, + { + "epoch": 1.09, + "learning_rate": 4.434550835135362e-05, + "loss": 4.0798, + "step": 8310500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4344854502980304e-05, + "loss": 4.0644, + "step": 8311000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4344200621626694e-05, + "loss": 4.0927, + "step": 8311500 + }, + { + "epoch": 1.09, + "learning_rate": 4.434354670729392e-05, + "loss": 4.0724, + "step": 8312000 + }, + { + "epoch": 1.09, + "learning_rate": 4.434289275998308e-05, + "loss": 4.0834, + "step": 8312500 + }, + { + "epoch": 1.09, + "learning_rate": 4.43422387796953e-05, + "loss": 4.0682, + "step": 8313000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4341584766431696e-05, + "loss": 4.0688, + "step": 8313500 + }, + { + "epoch": 1.09, + "learning_rate": 4.434093072019337e-05, + "loss": 4.0732, + "step": 8314000 + }, + { + "epoch": 1.09, + "learning_rate": 4.434027664098145e-05, + "loss": 4.0547, + "step": 8314500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4339622528797036e-05, + "loss": 4.0685, + "step": 8315000 + }, + { + "epoch": 1.09, + "learning_rate": 4.433896838364127e-05, + "loss": 4.0843, + "step": 8315500 + }, + { + "epoch": 1.09, + "learning_rate": 4.433831420551523e-05, + "loss": 4.0786, + "step": 8316000 + }, + { + "epoch": 1.09, + "learning_rate": 4.433765999442006e-05, + "loss": 4.0747, + "step": 8316500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4337005750356875e-05, + "loss": 4.091, + "step": 8317000 + }, + { + "epoch": 1.09, + "learning_rate": 4.433635147332678e-05, + "loss": 4.0477, + "step": 8317500 + }, + { + "epoch": 1.09, + "learning_rate": 4.433569716333088e-05, + "loss": 4.0656, + "step": 8318000 + }, + { + "epoch": 1.09, + "learning_rate": 4.433504282037032e-05, + "loss": 4.0623, + "step": 8318500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4334388444446184e-05, + "loss": 4.0826, + "step": 8319000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4333734035559614e-05, + "loss": 4.0703, + "step": 8319500 + }, + { + "epoch": 1.09, + "learning_rate": 4.433307959371171e-05, + "loss": 4.0551, + "step": 8320000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4332425118903596e-05, + "loss": 4.0749, + "step": 8320500 + }, + { + "epoch": 1.09, + "learning_rate": 4.433177061113638e-05, + "loss": 4.08, + "step": 8321000 + }, + { + "epoch": 1.09, + "learning_rate": 4.433111607041118e-05, + "loss": 4.1058, + "step": 8321500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4330461496729114e-05, + "loss": 4.066, + "step": 8322000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4329806890091305e-05, + "loss": 4.0675, + "step": 8322500 + }, + { + "epoch": 1.09, + "learning_rate": 4.432915225049885e-05, + "loss": 4.0599, + "step": 8323000 + }, + { + "epoch": 1.09, + "learning_rate": 4.432849757795289e-05, + "loss": 4.0672, + "step": 8323500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4327842872454514e-05, + "loss": 4.0674, + "step": 8324000 + }, + { + "epoch": 1.09, + "learning_rate": 4.432718813400486e-05, + "loss": 4.0739, + "step": 8324500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4326533362605034e-05, + "loss": 4.0556, + "step": 8325000 + }, + { + "epoch": 1.09, + "learning_rate": 4.432587855825615e-05, + "loss": 4.0582, + "step": 8325500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4325223720959333e-05, + "loss": 4.0759, + "step": 8326000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4324568850715696e-05, + "loss": 4.0725, + "step": 8326500 + }, + { + "epoch": 1.09, + "learning_rate": 4.432391394752635e-05, + "loss": 4.0591, + "step": 8327000 + }, + { + "epoch": 1.09, + "learning_rate": 4.432325901139243e-05, + "loss": 4.0371, + "step": 8327500 + }, + { + "epoch": 1.09, + "learning_rate": 4.432260404231503e-05, + "loss": 4.047, + "step": 8328000 + }, + { + "epoch": 1.09, + "learning_rate": 4.432194904029527e-05, + "loss": 4.0747, + "step": 8328500 + }, + { + "epoch": 1.09, + "learning_rate": 4.432129400533428e-05, + "loss": 4.0524, + "step": 8329000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4320638937433156e-05, + "loss": 4.0345, + "step": 8329500 + }, + { + "epoch": 1.09, + "learning_rate": 4.431998383659304e-05, + "loss": 4.068, + "step": 8330000 + }, + { + "epoch": 1.09, + "learning_rate": 4.4319328702815035e-05, + "loss": 4.0742, + "step": 8330500 + }, + { + "epoch": 1.09, + "learning_rate": 4.431867353610026e-05, + "loss": 4.0534, + "step": 8331000 + }, + { + "epoch": 1.09, + "learning_rate": 4.431801833644983e-05, + "loss": 4.0748, + "step": 8331500 + }, + { + "epoch": 1.1, + "learning_rate": 4.431736310386486e-05, + "loss": 4.0643, + "step": 8332000 + }, + { + "epoch": 1.1, + "learning_rate": 4.431670783834647e-05, + "loss": 4.0658, + "step": 8332500 + }, + { + "epoch": 1.1, + "learning_rate": 4.431605253989578e-05, + "loss": 4.0618, + "step": 8333000 + }, + { + "epoch": 1.1, + "learning_rate": 4.431539720851391e-05, + "loss": 4.0434, + "step": 8333500 + }, + { + "epoch": 1.1, + "learning_rate": 4.431474184420196e-05, + "loss": 4.0638, + "step": 8334000 + }, + { + "epoch": 1.1, + "learning_rate": 4.431408644696107e-05, + "loss": 4.0475, + "step": 8334500 + }, + { + "epoch": 1.1, + "learning_rate": 4.431343101679234e-05, + "loss": 4.0613, + "step": 8335000 + }, + { + "epoch": 1.1, + "learning_rate": 4.43127755536969e-05, + "loss": 4.067, + "step": 8335500 + }, + { + "epoch": 1.1, + "learning_rate": 4.431212005767586e-05, + "loss": 4.089, + "step": 8336000 + }, + { + "epoch": 1.1, + "learning_rate": 4.431146452873034e-05, + "loss": 4.0638, + "step": 8336500 + }, + { + "epoch": 1.1, + "learning_rate": 4.431080896686145e-05, + "loss": 4.0669, + "step": 8337000 + }, + { + "epoch": 1.1, + "learning_rate": 4.431015337207032e-05, + "loss": 4.0628, + "step": 8337500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4309497744358065e-05, + "loss": 4.0771, + "step": 8338000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4308842083725785e-05, + "loss": 4.0556, + "step": 8338500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4308186390174626e-05, + "loss": 4.0672, + "step": 8339000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4307530663705694e-05, + "loss": 4.0406, + "step": 8339500 + }, + { + "epoch": 1.1, + "learning_rate": 4.43068749043201e-05, + "loss": 4.0707, + "step": 8340000 + }, + { + "epoch": 1.1, + "learning_rate": 4.430621911201897e-05, + "loss": 4.0715, + "step": 8340500 + }, + { + "epoch": 1.1, + "learning_rate": 4.430556328680342e-05, + "loss": 4.0723, + "step": 8341000 + }, + { + "epoch": 1.1, + "learning_rate": 4.430490742867456e-05, + "loss": 4.0499, + "step": 8341500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4304251537633524e-05, + "loss": 4.0753, + "step": 8342000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4303595613681415e-05, + "loss": 4.0638, + "step": 8342500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4302939656819365e-05, + "loss": 4.0487, + "step": 8343000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4302283667048486e-05, + "loss": 4.0545, + "step": 8343500 + }, + { + "epoch": 1.1, + "learning_rate": 4.430162764436989e-05, + "loss": 4.0545, + "step": 8344000 + }, + { + "epoch": 1.1, + "learning_rate": 4.43009715887847e-05, + "loss": 4.0485, + "step": 8344500 + }, + { + "epoch": 1.1, + "learning_rate": 4.430031550029404e-05, + "loss": 4.083, + "step": 8345000 + }, + { + "epoch": 1.1, + "learning_rate": 4.429965937889902e-05, + "loss": 4.0525, + "step": 8345500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4299003224600766e-05, + "loss": 4.0537, + "step": 8346000 + }, + { + "epoch": 1.1, + "learning_rate": 4.42983470374004e-05, + "loss": 4.0351, + "step": 8346500 + }, + { + "epoch": 1.1, + "learning_rate": 4.429769081729903e-05, + "loss": 4.0731, + "step": 8347000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4297034564297774e-05, + "loss": 4.0593, + "step": 8347500 + }, + { + "epoch": 1.1, + "learning_rate": 4.429637827839775e-05, + "loss": 4.0533, + "step": 8348000 + }, + { + "epoch": 1.1, + "learning_rate": 4.42957219596001e-05, + "loss": 4.0674, + "step": 8348500 + }, + { + "epoch": 1.1, + "learning_rate": 4.429506560790592e-05, + "loss": 4.04, + "step": 8349000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4294409223316325e-05, + "loss": 4.0644, + "step": 8349500 + }, + { + "epoch": 1.1, + "learning_rate": 4.429375280583245e-05, + "loss": 4.0298, + "step": 8350000 + }, + { + "epoch": 1.1, + "learning_rate": 4.429309635545541e-05, + "loss": 4.0459, + "step": 8350500 + }, + { + "epoch": 1.1, + "learning_rate": 4.429243987218632e-05, + "loss": 4.0629, + "step": 8351000 + }, + { + "epoch": 1.1, + "learning_rate": 4.429178335602629e-05, + "loss": 4.0626, + "step": 8351500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4291126806976466e-05, + "loss": 4.0482, + "step": 8352000 + }, + { + "epoch": 1.1, + "learning_rate": 4.429047022503795e-05, + "loss": 4.0492, + "step": 8352500 + }, + { + "epoch": 1.1, + "learning_rate": 4.428981361021186e-05, + "loss": 4.0696, + "step": 8353000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4289156962499316e-05, + "loss": 4.0571, + "step": 8353500 + }, + { + "epoch": 1.1, + "learning_rate": 4.428850028190144e-05, + "loss": 4.0508, + "step": 8354000 + }, + { + "epoch": 1.1, + "learning_rate": 4.428784356841936e-05, + "loss": 4.0606, + "step": 8354500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4287186822054176e-05, + "loss": 4.0473, + "step": 8355000 + }, + { + "epoch": 1.1, + "learning_rate": 4.428653004280703e-05, + "loss": 4.056, + "step": 8355500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4285873230679023e-05, + "loss": 4.0409, + "step": 8356000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4285216385671277e-05, + "loss": 4.0446, + "step": 8356500 + }, + { + "epoch": 1.1, + "learning_rate": 4.428455950778493e-05, + "loss": 4.0449, + "step": 8357000 + }, + { + "epoch": 1.1, + "learning_rate": 4.428390259702108e-05, + "loss": 4.0399, + "step": 8357500 + }, + { + "epoch": 1.1, + "learning_rate": 4.428324565338086e-05, + "loss": 4.0538, + "step": 8358000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4282588676865386e-05, + "loss": 4.0645, + "step": 8358500 + }, + { + "epoch": 1.1, + "learning_rate": 4.428193166747577e-05, + "loss": 4.0245, + "step": 8359000 + }, + { + "epoch": 1.1, + "learning_rate": 4.428127462521315e-05, + "loss": 4.0333, + "step": 8359500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4280617550078636e-05, + "loss": 4.0613, + "step": 8360000 + }, + { + "epoch": 1.1, + "learning_rate": 4.427996044207334e-05, + "loss": 4.042, + "step": 8360500 + }, + { + "epoch": 1.1, + "learning_rate": 4.42793033011984e-05, + "loss": 4.0665, + "step": 8361000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4278646127454924e-05, + "loss": 4.0506, + "step": 8361500 + }, + { + "epoch": 1.1, + "learning_rate": 4.427798892084404e-05, + "loss": 4.0404, + "step": 8362000 + }, + { + "epoch": 1.1, + "learning_rate": 4.427733168136685e-05, + "loss": 4.0574, + "step": 8362500 + }, + { + "epoch": 1.1, + "learning_rate": 4.42766744090245e-05, + "loss": 4.0629, + "step": 8363000 + }, + { + "epoch": 1.1, + "learning_rate": 4.42760171038181e-05, + "loss": 4.0507, + "step": 8363500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4275359765748766e-05, + "loss": 4.0508, + "step": 8364000 + }, + { + "epoch": 1.1, + "learning_rate": 4.427470239481762e-05, + "loss": 4.0421, + "step": 8364500 + }, + { + "epoch": 1.1, + "learning_rate": 4.427404499102579e-05, + "loss": 4.0299, + "step": 8365000 + }, + { + "epoch": 1.1, + "learning_rate": 4.427338755437438e-05, + "loss": 4.0512, + "step": 8365500 + }, + { + "epoch": 1.1, + "learning_rate": 4.427273008486453e-05, + "loss": 4.0497, + "step": 8366000 + }, + { + "epoch": 1.1, + "learning_rate": 4.427207258249736e-05, + "loss": 4.0578, + "step": 8366500 + }, + { + "epoch": 1.1, + "learning_rate": 4.427141504727398e-05, + "loss": 4.0455, + "step": 8367000 + }, + { + "epoch": 1.1, + "learning_rate": 4.427075747919552e-05, + "loss": 4.0398, + "step": 8367500 + }, + { + "epoch": 1.1, + "learning_rate": 4.427009987826309e-05, + "loss": 4.0651, + "step": 8368000 + }, + { + "epoch": 1.1, + "learning_rate": 4.426944224447782e-05, + "loss": 4.0536, + "step": 8368500 + }, + { + "epoch": 1.1, + "learning_rate": 4.426878457784083e-05, + "loss": 4.0574, + "step": 8369000 + }, + { + "epoch": 1.1, + "learning_rate": 4.426812687835323e-05, + "loss": 4.0248, + "step": 8369500 + }, + { + "epoch": 1.1, + "learning_rate": 4.426746914601616e-05, + "loss": 4.0348, + "step": 8370000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4266811380830734e-05, + "loss": 4.0382, + "step": 8370500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4266153582798076e-05, + "loss": 4.0661, + "step": 8371000 + }, + { + "epoch": 1.1, + "learning_rate": 4.42654957519193e-05, + "loss": 4.0479, + "step": 8371500 + }, + { + "epoch": 1.1, + "learning_rate": 4.426483788819552e-05, + "loss": 4.0218, + "step": 8372000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4264179991627886e-05, + "loss": 4.0576, + "step": 8372500 + }, + { + "epoch": 1.1, + "learning_rate": 4.426352206221749e-05, + "loss": 4.0534, + "step": 8373000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4262864099965474e-05, + "loss": 4.0347, + "step": 8373500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4262206104872946e-05, + "loss": 4.0626, + "step": 8374000 + }, + { + "epoch": 1.1, + "learning_rate": 4.426154807694103e-05, + "loss": 4.0475, + "step": 8374500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4260890016170856e-05, + "loss": 4.0622, + "step": 8375000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4260231922563544e-05, + "loss": 4.058, + "step": 8375500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4259573796120214e-05, + "loss": 4.0512, + "step": 8376000 + }, + { + "epoch": 1.1, + "learning_rate": 4.425891563684198e-05, + "loss": 4.0553, + "step": 8376500 + }, + { + "epoch": 1.1, + "learning_rate": 4.425825744472998e-05, + "loss": 4.0388, + "step": 8377000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4257599219785316e-05, + "loss": 4.0526, + "step": 8377500 + }, + { + "epoch": 1.1, + "learning_rate": 4.425694096200913e-05, + "loss": 4.047, + "step": 8378000 + }, + { + "epoch": 1.1, + "learning_rate": 4.425628267140254e-05, + "loss": 4.0462, + "step": 8378500 + }, + { + "epoch": 1.1, + "learning_rate": 4.425562434796665e-05, + "loss": 4.0416, + "step": 8379000 + }, + { + "epoch": 1.1, + "learning_rate": 4.425496599170261e-05, + "loss": 4.0342, + "step": 8379500 + }, + { + "epoch": 1.1, + "learning_rate": 4.425430760261152e-05, + "loss": 4.0301, + "step": 8380000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4253649180694516e-05, + "loss": 4.0588, + "step": 8380500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4252990725952715e-05, + "loss": 4.0368, + "step": 8381000 + }, + { + "epoch": 1.1, + "learning_rate": 4.425233223838724e-05, + "loss": 4.0364, + "step": 8381500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4251673717999216e-05, + "loss": 4.0199, + "step": 8382000 + }, + { + "epoch": 1.1, + "learning_rate": 4.425101516478975e-05, + "loss": 4.0468, + "step": 8382500 + }, + { + "epoch": 1.1, + "learning_rate": 4.425035657876e-05, + "loss": 4.0419, + "step": 8383000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4249697959911054e-05, + "loss": 4.0331, + "step": 8383500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4249039308244054e-05, + "loss": 4.0523, + "step": 8384000 + }, + { + "epoch": 1.1, + "learning_rate": 4.424838062376012e-05, + "loss": 4.0468, + "step": 8384500 + }, + { + "epoch": 1.1, + "learning_rate": 4.424772190646036e-05, + "loss": 4.0308, + "step": 8385000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4247063156345915e-05, + "loss": 4.042, + "step": 8385500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4246404373417905e-05, + "loss": 4.0316, + "step": 8386000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4245745557677445e-05, + "loss": 4.053, + "step": 8386500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4245086709125666e-05, + "loss": 4.0413, + "step": 8387000 + }, + { + "epoch": 1.1, + "learning_rate": 4.424442782776369e-05, + "loss": 4.038, + "step": 8387500 + }, + { + "epoch": 1.1, + "learning_rate": 4.424376891359264e-05, + "loss": 4.0379, + "step": 8388000 + }, + { + "epoch": 1.1, + "learning_rate": 4.424310996661363e-05, + "loss": 4.0296, + "step": 8388500 + }, + { + "epoch": 1.1, + "learning_rate": 4.42424509868278e-05, + "loss": 4.0518, + "step": 8389000 + }, + { + "epoch": 1.1, + "learning_rate": 4.424179197423627e-05, + "loss": 4.0423, + "step": 8389500 + }, + { + "epoch": 1.1, + "learning_rate": 4.424113292884015e-05, + "loss": 4.0356, + "step": 8390000 + }, + { + "epoch": 1.1, + "learning_rate": 4.424047385064057e-05, + "loss": 4.0207, + "step": 8390500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4239814739638664e-05, + "loss": 4.0333, + "step": 8391000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4239155595835533e-05, + "loss": 4.046, + "step": 8391500 + }, + { + "epoch": 1.1, + "learning_rate": 4.423849641923233e-05, + "loss": 4.0309, + "step": 8392000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4237837209830166e-05, + "loss": 4.0318, + "step": 8392500 + }, + { + "epoch": 1.1, + "learning_rate": 4.423717796763015e-05, + "loss": 4.0432, + "step": 8393000 + }, + { + "epoch": 1.1, + "learning_rate": 4.423651869263343e-05, + "loss": 4.0458, + "step": 8393500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4235859384841114e-05, + "loss": 4.0588, + "step": 8394000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4235200044254334e-05, + "loss": 4.044, + "step": 8394500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4234540670874205e-05, + "loss": 4.0382, + "step": 8395000 + }, + { + "epoch": 1.1, + "learning_rate": 4.423388126470187e-05, + "loss": 4.0365, + "step": 8395500 + }, + { + "epoch": 1.1, + "learning_rate": 4.423322182573842e-05, + "loss": 4.0423, + "step": 8396000 + }, + { + "epoch": 1.1, + "learning_rate": 4.423256235398502e-05, + "loss": 4.0239, + "step": 8396500 + }, + { + "epoch": 1.1, + "learning_rate": 4.423190284944276e-05, + "loss": 4.0683, + "step": 8397000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4231243312112785e-05, + "loss": 4.0509, + "step": 8397500 + }, + { + "epoch": 1.1, + "learning_rate": 4.423058374199621e-05, + "loss": 4.0236, + "step": 8398000 + }, + { + "epoch": 1.1, + "learning_rate": 4.422992413909417e-05, + "loss": 4.0415, + "step": 8398500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4229264503407776e-05, + "loss": 4.0213, + "step": 8399000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4228604834938146e-05, + "loss": 4.035, + "step": 8399500 + }, + { + "epoch": 1.1, + "learning_rate": 4.422794513368643e-05, + "loss": 4.0323, + "step": 8400000 + }, + { + "epoch": 1.1, + "learning_rate": 4.422728539965374e-05, + "loss": 4.0373, + "step": 8400500 + }, + { + "epoch": 1.1, + "learning_rate": 4.42266256328412e-05, + "loss": 4.0082, + "step": 8401000 + }, + { + "epoch": 1.1, + "learning_rate": 4.422596583324993e-05, + "loss": 4.047, + "step": 8401500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4225306000881065e-05, + "loss": 4.0258, + "step": 8402000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4224646135735725e-05, + "loss": 4.0366, + "step": 8402500 + }, + { + "epoch": 1.1, + "learning_rate": 4.422398623781503e-05, + "loss": 4.0512, + "step": 8403000 + }, + { + "epoch": 1.1, + "learning_rate": 4.422332630712012e-05, + "loss": 4.0398, + "step": 8403500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4222666343652095e-05, + "loss": 4.0331, + "step": 8404000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4222006347412116e-05, + "loss": 4.0338, + "step": 8404500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4221346318401266e-05, + "loss": 4.0332, + "step": 8405000 + }, + { + "epoch": 1.1, + "learning_rate": 4.42206862566207e-05, + "loss": 4.0509, + "step": 8405500 + }, + { + "epoch": 1.1, + "learning_rate": 4.422002616207154e-05, + "loss": 4.037, + "step": 8406000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4219366034754906e-05, + "loss": 4.0033, + "step": 8406500 + }, + { + "epoch": 1.1, + "learning_rate": 4.4218705874671915e-05, + "loss": 4.0139, + "step": 8407000 + }, + { + "epoch": 1.1, + "learning_rate": 4.42180456818237e-05, + "loss": 4.0488, + "step": 8407500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4217385456211403e-05, + "loss": 4.0341, + "step": 8408000 + }, + { + "epoch": 1.11, + "learning_rate": 4.421672519783613e-05, + "loss": 4.0345, + "step": 8408500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4216064906699005e-05, + "loss": 4.0228, + "step": 8409000 + }, + { + "epoch": 1.11, + "learning_rate": 4.421540458280116e-05, + "loss": 4.0475, + "step": 8409500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4214744226143725e-05, + "loss": 4.0177, + "step": 8410000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4214083836727817e-05, + "loss": 4.0192, + "step": 8410500 + }, + { + "epoch": 1.11, + "learning_rate": 4.421342341455457e-05, + "loss": 4.0186, + "step": 8411000 + }, + { + "epoch": 1.11, + "learning_rate": 4.42127629596251e-05, + "loss": 4.0087, + "step": 8411500 + }, + { + "epoch": 1.11, + "learning_rate": 4.421210247194055e-05, + "loss": 4.0494, + "step": 8412000 + }, + { + "epoch": 1.11, + "learning_rate": 4.421144195150203e-05, + "loss": 4.0446, + "step": 8412500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4210781398310665e-05, + "loss": 4.0303, + "step": 8413000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4210120812367595e-05, + "loss": 4.0094, + "step": 8413500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4209460193673936e-05, + "loss": 4.0433, + "step": 8414000 + }, + { + "epoch": 1.11, + "learning_rate": 4.420879954223082e-05, + "loss": 4.0177, + "step": 8414500 + }, + { + "epoch": 1.11, + "learning_rate": 4.420813885803936e-05, + "loss": 4.043, + "step": 8415000 + }, + { + "epoch": 1.11, + "learning_rate": 4.42074781411007e-05, + "loss": 4.0275, + "step": 8415500 + }, + { + "epoch": 1.11, + "learning_rate": 4.420681739141596e-05, + "loss": 4.0168, + "step": 8416000 + }, + { + "epoch": 1.11, + "learning_rate": 4.420615660898626e-05, + "loss": 4.0269, + "step": 8416500 + }, + { + "epoch": 1.11, + "learning_rate": 4.420549579381274e-05, + "loss": 4.0325, + "step": 8417000 + }, + { + "epoch": 1.11, + "learning_rate": 4.420483494589651e-05, + "loss": 4.0188, + "step": 8417500 + }, + { + "epoch": 1.11, + "learning_rate": 4.420417406523871e-05, + "loss": 4.027, + "step": 8418000 + }, + { + "epoch": 1.11, + "learning_rate": 4.420351315184047e-05, + "loss": 4.0325, + "step": 8418500 + }, + { + "epoch": 1.11, + "learning_rate": 4.42028522057029e-05, + "loss": 4.0337, + "step": 8419000 + }, + { + "epoch": 1.11, + "learning_rate": 4.420219122682714e-05, + "loss": 4.0555, + "step": 8419500 + }, + { + "epoch": 1.11, + "learning_rate": 4.42015302152143e-05, + "loss": 4.0359, + "step": 8420000 + }, + { + "epoch": 1.11, + "learning_rate": 4.420086917086553e-05, + "loss": 4.0212, + "step": 8420500 + }, + { + "epoch": 1.11, + "learning_rate": 4.420020809378195e-05, + "loss": 4.024, + "step": 8421000 + }, + { + "epoch": 1.11, + "learning_rate": 4.419954698396468e-05, + "loss": 4.0351, + "step": 8421500 + }, + { + "epoch": 1.11, + "learning_rate": 4.419888584141485e-05, + "loss": 4.0235, + "step": 8422000 + }, + { + "epoch": 1.11, + "learning_rate": 4.419822466613358e-05, + "loss": 4.0274, + "step": 8422500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4197563458122015e-05, + "loss": 4.0073, + "step": 8423000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4196902217381264e-05, + "loss": 4.028, + "step": 8423500 + }, + { + "epoch": 1.11, + "learning_rate": 4.419624094391247e-05, + "loss": 4.0487, + "step": 8424000 + }, + { + "epoch": 1.11, + "learning_rate": 4.419557963771675e-05, + "loss": 4.034, + "step": 8424500 + }, + { + "epoch": 1.11, + "learning_rate": 4.419491829879524e-05, + "loss": 4.017, + "step": 8425000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4194256927149055e-05, + "loss": 4.0268, + "step": 8425500 + }, + { + "epoch": 1.11, + "learning_rate": 4.419359552277933e-05, + "loss": 4.0447, + "step": 8426000 + }, + { + "epoch": 1.11, + "learning_rate": 4.419293408568719e-05, + "loss": 4.0375, + "step": 8426500 + }, + { + "epoch": 1.11, + "learning_rate": 4.419227261587377e-05, + "loss": 4.0254, + "step": 8427000 + }, + { + "epoch": 1.11, + "learning_rate": 4.419161111334019e-05, + "loss": 4.0191, + "step": 8427500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4190949578087584e-05, + "loss": 4.0155, + "step": 8428000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4190288010117066e-05, + "loss": 4.0207, + "step": 8428500 + }, + { + "epoch": 1.11, + "learning_rate": 4.418962640942977e-05, + "loss": 4.0252, + "step": 8429000 + }, + { + "epoch": 1.11, + "learning_rate": 4.418896477602684e-05, + "loss": 4.0195, + "step": 8429500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4188303109909396e-05, + "loss": 4.0314, + "step": 8430000 + }, + { + "epoch": 1.11, + "learning_rate": 4.418764141107855e-05, + "loss": 4.0404, + "step": 8430500 + }, + { + "epoch": 1.11, + "learning_rate": 4.418697967953545e-05, + "loss": 4.0426, + "step": 8431000 + }, + { + "epoch": 1.11, + "learning_rate": 4.418631791528122e-05, + "loss": 4.0608, + "step": 8431500 + }, + { + "epoch": 1.11, + "learning_rate": 4.418565611831697e-05, + "loss": 4.0522, + "step": 8432000 + }, + { + "epoch": 1.11, + "learning_rate": 4.418499428864385e-05, + "loss": 4.0392, + "step": 8432500 + }, + { + "epoch": 1.11, + "learning_rate": 4.418433242626298e-05, + "loss": 4.0357, + "step": 8433000 + }, + { + "epoch": 1.11, + "learning_rate": 4.418367053117549e-05, + "loss": 4.0203, + "step": 8433500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4183008603382506e-05, + "loss": 4.0461, + "step": 8434000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4182346642885164e-05, + "loss": 4.0218, + "step": 8434500 + }, + { + "epoch": 1.11, + "learning_rate": 4.418168464968458e-05, + "loss": 4.026, + "step": 8435000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4181022623781886e-05, + "loss": 4.0377, + "step": 8435500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4180360565178226e-05, + "loss": 4.024, + "step": 8436000 + }, + { + "epoch": 1.11, + "learning_rate": 4.417969847387471e-05, + "loss": 4.0286, + "step": 8436500 + }, + { + "epoch": 1.11, + "learning_rate": 4.417903634987247e-05, + "loss": 4.0377, + "step": 8437000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4178374193172646e-05, + "loss": 4.0328, + "step": 8437500 + }, + { + "epoch": 1.11, + "learning_rate": 4.417771200377636e-05, + "loss": 4.0229, + "step": 8438000 + }, + { + "epoch": 1.11, + "learning_rate": 4.417704978168473e-05, + "loss": 3.9991, + "step": 8438500 + }, + { + "epoch": 1.11, + "learning_rate": 4.41763875268989e-05, + "loss": 4.0287, + "step": 8439000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4175725239419995e-05, + "loss": 4.03, + "step": 8439500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4175062919249146e-05, + "loss": 4.0068, + "step": 8440000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4174400566387474e-05, + "loss": 4.0088, + "step": 8440500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4173738180836124e-05, + "loss": 4.0229, + "step": 8441000 + }, + { + "epoch": 1.11, + "learning_rate": 4.417307576259621e-05, + "loss": 4.047, + "step": 8441500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4172413311668856e-05, + "loss": 4.0335, + "step": 8442000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4171750828055214e-05, + "loss": 4.0019, + "step": 8442500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4171088311756394e-05, + "loss": 4.0153, + "step": 8443000 + }, + { + "epoch": 1.11, + "learning_rate": 4.417042576277354e-05, + "loss": 4.0127, + "step": 8443500 + }, + { + "epoch": 1.11, + "learning_rate": 4.416976318110776e-05, + "loss": 4.0468, + "step": 8444000 + }, + { + "epoch": 1.11, + "learning_rate": 4.416910056676021e-05, + "loss": 4.0376, + "step": 8444500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4168437919732006e-05, + "loss": 4.0275, + "step": 8445000 + }, + { + "epoch": 1.11, + "learning_rate": 4.416777524002428e-05, + "loss": 4.0275, + "step": 8445500 + }, + { + "epoch": 1.11, + "learning_rate": 4.416711252763815e-05, + "loss": 4.0137, + "step": 8446000 + }, + { + "epoch": 1.11, + "learning_rate": 4.416644978257477e-05, + "loss": 4.0229, + "step": 8446500 + }, + { + "epoch": 1.11, + "learning_rate": 4.416578700483526e-05, + "loss": 4.0216, + "step": 8447000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4165124194420733e-05, + "loss": 4.0161, + "step": 8447500 + }, + { + "epoch": 1.11, + "learning_rate": 4.416446135133234e-05, + "loss": 4.0408, + "step": 8448000 + }, + { + "epoch": 1.11, + "learning_rate": 4.41637984755712e-05, + "loss": 4.0432, + "step": 8448500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4163135567138445e-05, + "loss": 4.0181, + "step": 8449000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4162472626035214e-05, + "loss": 4.0137, + "step": 8449500 + }, + { + "epoch": 1.11, + "learning_rate": 4.416180965226263e-05, + "loss": 4.0309, + "step": 8450000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4161146645821815e-05, + "loss": 4.0187, + "step": 8450500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4160483606713924e-05, + "loss": 4.0313, + "step": 8451000 + }, + { + "epoch": 1.11, + "learning_rate": 4.415982053494006e-05, + "loss": 4.0165, + "step": 8451500 + }, + { + "epoch": 1.11, + "learning_rate": 4.415915743050136e-05, + "loss": 4.0201, + "step": 8452000 + }, + { + "epoch": 1.11, + "learning_rate": 4.415849429339897e-05, + "loss": 4.0262, + "step": 8452500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4157831123634e-05, + "loss": 4.0245, + "step": 8453000 + }, + { + "epoch": 1.11, + "learning_rate": 4.415716792120759e-05, + "loss": 4.0427, + "step": 8453500 + }, + { + "epoch": 1.11, + "learning_rate": 4.415650468612088e-05, + "loss": 4.0236, + "step": 8454000 + }, + { + "epoch": 1.11, + "learning_rate": 4.415584141837499e-05, + "loss": 4.0243, + "step": 8454500 + }, + { + "epoch": 1.11, + "learning_rate": 4.415517811797104e-05, + "loss": 4.0353, + "step": 8455000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4154514784910186e-05, + "loss": 4.0222, + "step": 8455500 + }, + { + "epoch": 1.11, + "learning_rate": 4.415385141919354e-05, + "loss": 4.0406, + "step": 8456000 + }, + { + "epoch": 1.11, + "learning_rate": 4.415318802082224e-05, + "loss": 4.0091, + "step": 8456500 + }, + { + "epoch": 1.11, + "learning_rate": 4.415252458979743e-05, + "loss": 4.0207, + "step": 8457000 + }, + { + "epoch": 1.11, + "learning_rate": 4.415186112612021e-05, + "loss": 4.026, + "step": 8457500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4151197629791734e-05, + "loss": 4.0182, + "step": 8458000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4150534100813126e-05, + "loss": 4.0169, + "step": 8458500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4149870539185524e-05, + "loss": 4.0219, + "step": 8459000 + }, + { + "epoch": 1.11, + "learning_rate": 4.414920694491005e-05, + "loss": 3.9987, + "step": 8459500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4148543317987834e-05, + "loss": 4.0043, + "step": 8460000 + }, + { + "epoch": 1.11, + "learning_rate": 4.414787965842002e-05, + "loss": 4.0165, + "step": 8460500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4147215966207736e-05, + "loss": 4.0004, + "step": 8461000 + }, + { + "epoch": 1.11, + "learning_rate": 4.41465522413521e-05, + "loss": 4.0011, + "step": 8461500 + }, + { + "epoch": 1.11, + "learning_rate": 4.414588848385425e-05, + "loss": 4.0268, + "step": 8462000 + }, + { + "epoch": 1.11, + "learning_rate": 4.414522469371534e-05, + "loss": 4.0225, + "step": 8462500 + }, + { + "epoch": 1.11, + "learning_rate": 4.414456087093646e-05, + "loss": 4.0323, + "step": 8463000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4143897015518776e-05, + "loss": 4.0256, + "step": 8463500 + }, + { + "epoch": 1.11, + "learning_rate": 4.414323312746341e-05, + "loss": 4.0217, + "step": 8464000 + }, + { + "epoch": 1.11, + "learning_rate": 4.414256920677149e-05, + "loss": 4.0251, + "step": 8464500 + }, + { + "epoch": 1.11, + "learning_rate": 4.414190525344415e-05, + "loss": 4.016, + "step": 8465000 + }, + { + "epoch": 1.11, + "learning_rate": 4.414124126748251e-05, + "loss": 4.0521, + "step": 8465500 + }, + { + "epoch": 1.11, + "learning_rate": 4.414057724888773e-05, + "loss": 4.0315, + "step": 8466000 + }, + { + "epoch": 1.11, + "learning_rate": 4.413991319766092e-05, + "loss": 4.0078, + "step": 8466500 + }, + { + "epoch": 1.11, + "learning_rate": 4.413924911380321e-05, + "loss": 4.022, + "step": 8467000 + }, + { + "epoch": 1.11, + "learning_rate": 4.413858499731575e-05, + "loss": 4.0239, + "step": 8467500 + }, + { + "epoch": 1.11, + "learning_rate": 4.413792084819967e-05, + "loss": 4.0186, + "step": 8468000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4137256666456075e-05, + "loss": 4.0302, + "step": 8468500 + }, + { + "epoch": 1.11, + "learning_rate": 4.413659245208613e-05, + "loss": 4.0022, + "step": 8469000 + }, + { + "epoch": 1.11, + "learning_rate": 4.413592820509095e-05, + "loss": 4.0075, + "step": 8469500 + }, + { + "epoch": 1.11, + "learning_rate": 4.413526392547167e-05, + "loss": 4.0303, + "step": 8470000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4134599613229424e-05, + "loss": 4.0231, + "step": 8470500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4133935268365344e-05, + "loss": 4.0003, + "step": 8471000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4133270890880574e-05, + "loss": 4.021, + "step": 8471500 + }, + { + "epoch": 1.11, + "learning_rate": 4.413260648077623e-05, + "loss": 4.0161, + "step": 8472000 + }, + { + "epoch": 1.11, + "learning_rate": 4.413194203805344e-05, + "loss": 4.0049, + "step": 8472500 + }, + { + "epoch": 1.11, + "learning_rate": 4.413127756271336e-05, + "loss": 4.02, + "step": 8473000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4130613054757105e-05, + "loss": 4.003, + "step": 8473500 + }, + { + "epoch": 1.11, + "learning_rate": 4.412994851418582e-05, + "loss": 4.0071, + "step": 8474000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4129283941000626e-05, + "loss": 4.0442, + "step": 8474500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4128619335202665e-05, + "loss": 4.0235, + "step": 8475000 + }, + { + "epoch": 1.11, + "learning_rate": 4.412795469679306e-05, + "loss": 4.0145, + "step": 8475500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4127290025772956e-05, + "loss": 4.0175, + "step": 8476000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4126625322143476e-05, + "loss": 4.005, + "step": 8476500 + }, + { + "epoch": 1.11, + "learning_rate": 4.412596058590577e-05, + "loss": 4.0082, + "step": 8477000 + }, + { + "epoch": 1.11, + "learning_rate": 4.412529581706094e-05, + "loss": 4.0201, + "step": 8477500 + }, + { + "epoch": 1.11, + "learning_rate": 4.412463101561015e-05, + "loss": 4.0171, + "step": 8478000 + }, + { + "epoch": 1.11, + "learning_rate": 4.412396618155452e-05, + "loss": 4.0097, + "step": 8478500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4123301314895185e-05, + "loss": 4.0288, + "step": 8479000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4122636415633275e-05, + "loss": 4.0192, + "step": 8479500 + }, + { + "epoch": 1.11, + "learning_rate": 4.412197148376994e-05, + "loss": 4.0098, + "step": 8480000 + }, + { + "epoch": 1.11, + "learning_rate": 4.412130651930629e-05, + "loss": 4.0242, + "step": 8480500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4120641522243474e-05, + "loss": 4.0082, + "step": 8481000 + }, + { + "epoch": 1.11, + "learning_rate": 4.411997649258262e-05, + "loss": 4.0088, + "step": 8481500 + }, + { + "epoch": 1.11, + "learning_rate": 4.411931143032486e-05, + "loss": 4.0076, + "step": 8482000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4118646335471336e-05, + "loss": 4.0222, + "step": 8482500 + }, + { + "epoch": 1.11, + "learning_rate": 4.4117981208023175e-05, + "loss": 4.0059, + "step": 8483000 + }, + { + "epoch": 1.11, + "learning_rate": 4.411731604798151e-05, + "loss": 3.9991, + "step": 8483500 + }, + { + "epoch": 1.12, + "learning_rate": 4.411665085534748e-05, + "loss": 4.0293, + "step": 8484000 + }, + { + "epoch": 1.12, + "learning_rate": 4.411598563012222e-05, + "loss": 4.0198, + "step": 8484500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4115320372306856e-05, + "loss": 3.9984, + "step": 8485000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4114655081902526e-05, + "loss": 3.9902, + "step": 8485500 + }, + { + "epoch": 1.12, + "learning_rate": 4.411398975891037e-05, + "loss": 4.0228, + "step": 8486000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4113324403331525e-05, + "loss": 4.0272, + "step": 8486500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4112659015167104e-05, + "loss": 4.0137, + "step": 8487000 + }, + { + "epoch": 1.12, + "learning_rate": 4.411199359441826e-05, + "loss": 4.0107, + "step": 8487500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4111328141086126e-05, + "loss": 4.0102, + "step": 8488000 + }, + { + "epoch": 1.12, + "learning_rate": 4.411066265517183e-05, + "loss": 4.0096, + "step": 8488500 + }, + { + "epoch": 1.12, + "learning_rate": 4.410999713667652e-05, + "loss": 4.0064, + "step": 8489000 + }, + { + "epoch": 1.12, + "learning_rate": 4.41093315856013e-05, + "loss": 4.0134, + "step": 8489500 + }, + { + "epoch": 1.12, + "learning_rate": 4.410866600194734e-05, + "loss": 4.0394, + "step": 8490000 + }, + { + "epoch": 1.12, + "learning_rate": 4.410800038571576e-05, + "loss": 4.0248, + "step": 8490500 + }, + { + "epoch": 1.12, + "learning_rate": 4.410733473690769e-05, + "loss": 4.0038, + "step": 8491000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4106669055524274e-05, + "loss": 4.025, + "step": 8491500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4106003341566635e-05, + "loss": 4.0145, + "step": 8492000 + }, + { + "epoch": 1.12, + "learning_rate": 4.410533759503593e-05, + "loss": 4.0113, + "step": 8492500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4104671815933264e-05, + "loss": 3.9919, + "step": 8493000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4104006004259796e-05, + "loss": 4.0132, + "step": 8493500 + }, + { + "epoch": 1.12, + "learning_rate": 4.410334016001665e-05, + "loss": 3.9847, + "step": 8494000 + }, + { + "epoch": 1.12, + "learning_rate": 4.410267428320497e-05, + "loss": 4.0119, + "step": 8494500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4102008373825876e-05, + "loss": 3.9958, + "step": 8495000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4101342431880516e-05, + "loss": 4.0252, + "step": 8495500 + }, + { + "epoch": 1.12, + "learning_rate": 4.410067645737001e-05, + "loss": 4.0245, + "step": 8496000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4100010450295524e-05, + "loss": 4.0238, + "step": 8496500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4099344410658175e-05, + "loss": 4.0035, + "step": 8497000 + }, + { + "epoch": 1.12, + "learning_rate": 4.409867833845909e-05, + "loss": 4.0088, + "step": 8497500 + }, + { + "epoch": 1.12, + "learning_rate": 4.40980122336994e-05, + "loss": 3.9887, + "step": 8498000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4097346096380275e-05, + "loss": 4.011, + "step": 8498500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4096679926502815e-05, + "loss": 3.9817, + "step": 8499000 + }, + { + "epoch": 1.12, + "learning_rate": 4.409601372406817e-05, + "loss": 3.9932, + "step": 8499500 + }, + { + "epoch": 1.12, + "learning_rate": 4.409534748907749e-05, + "loss": 3.9895, + "step": 8500000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4094681221531884e-05, + "loss": 4.0153, + "step": 8500500 + }, + { + "epoch": 1.12, + "learning_rate": 4.40940149214325e-05, + "loss": 3.9989, + "step": 8501000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4093348588780474e-05, + "loss": 4.0128, + "step": 8501500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4092682223576945e-05, + "loss": 4.0088, + "step": 8502000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4092015825823044e-05, + "loss": 3.999, + "step": 8502500 + }, + { + "epoch": 1.12, + "learning_rate": 4.409134939551991e-05, + "loss": 4.0183, + "step": 8503000 + }, + { + "epoch": 1.12, + "learning_rate": 4.409068293266868e-05, + "loss": 4.0231, + "step": 8503500 + }, + { + "epoch": 1.12, + "learning_rate": 4.409001643727049e-05, + "loss": 3.9966, + "step": 8504000 + }, + { + "epoch": 1.12, + "learning_rate": 4.408934990932647e-05, + "loss": 4.003, + "step": 8504500 + }, + { + "epoch": 1.12, + "learning_rate": 4.408868334883777e-05, + "loss": 4.0012, + "step": 8505000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4088016755805506e-05, + "loss": 3.9908, + "step": 8505500 + }, + { + "epoch": 1.12, + "learning_rate": 4.408735013023083e-05, + "loss": 4.0071, + "step": 8506000 + }, + { + "epoch": 1.12, + "learning_rate": 4.408668347211487e-05, + "loss": 4.0045, + "step": 8506500 + }, + { + "epoch": 1.12, + "learning_rate": 4.408601678145878e-05, + "loss": 3.9915, + "step": 8507000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4085350058263676e-05, + "loss": 4.0206, + "step": 8507500 + }, + { + "epoch": 1.12, + "learning_rate": 4.408468330253071e-05, + "loss": 4.0096, + "step": 8508000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4084016514260994e-05, + "loss": 4.0197, + "step": 8508500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4083349693455695e-05, + "loss": 3.9946, + "step": 8509000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4082682840115935e-05, + "loss": 3.9974, + "step": 8509500 + }, + { + "epoch": 1.12, + "learning_rate": 4.408201595424285e-05, + "loss": 4.0129, + "step": 8510000 + }, + { + "epoch": 1.12, + "learning_rate": 4.408134903583758e-05, + "loss": 3.9988, + "step": 8510500 + }, + { + "epoch": 1.12, + "learning_rate": 4.408068208490127e-05, + "loss": 3.9979, + "step": 8511000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4080015101435034e-05, + "loss": 3.9991, + "step": 8511500 + }, + { + "epoch": 1.12, + "learning_rate": 4.407934808544004e-05, + "loss": 4.0185, + "step": 8512000 + }, + { + "epoch": 1.12, + "learning_rate": 4.40786810369174e-05, + "loss": 4.0116, + "step": 8512500 + }, + { + "epoch": 1.12, + "learning_rate": 4.407801395586826e-05, + "loss": 3.9901, + "step": 8513000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4077346842293755e-05, + "loss": 4.0152, + "step": 8513500 + }, + { + "epoch": 1.12, + "learning_rate": 4.407667969619503e-05, + "loss": 4.0028, + "step": 8514000 + }, + { + "epoch": 1.12, + "learning_rate": 4.407601251757322e-05, + "loss": 4.0168, + "step": 8514500 + }, + { + "epoch": 1.12, + "learning_rate": 4.407534530642945e-05, + "loss": 3.9999, + "step": 8515000 + }, + { + "epoch": 1.12, + "learning_rate": 4.407467806276488e-05, + "loss": 4.0113, + "step": 8515500 + }, + { + "epoch": 1.12, + "learning_rate": 4.407401078658062e-05, + "loss": 3.9749, + "step": 8516000 + }, + { + "epoch": 1.12, + "learning_rate": 4.407334347787783e-05, + "loss": 3.9993, + "step": 8516500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4072676136657636e-05, + "loss": 4.0107, + "step": 8517000 + }, + { + "epoch": 1.12, + "learning_rate": 4.407200876292118e-05, + "loss": 4.0146, + "step": 8517500 + }, + { + "epoch": 1.12, + "learning_rate": 4.407134135666961e-05, + "loss": 4.0151, + "step": 8518000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4070673917904036e-05, + "loss": 4.0369, + "step": 8518500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4070006446625626e-05, + "loss": 4.0036, + "step": 8519000 + }, + { + "epoch": 1.12, + "learning_rate": 4.40693389428355e-05, + "loss": 4.0278, + "step": 8519500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4068671406534806e-05, + "loss": 3.995, + "step": 8520000 + }, + { + "epoch": 1.12, + "learning_rate": 4.406800383772467e-05, + "loss": 4.0194, + "step": 8520500 + }, + { + "epoch": 1.12, + "learning_rate": 4.406733623640624e-05, + "loss": 4.0171, + "step": 8521000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4066668602580664e-05, + "loss": 4.0082, + "step": 8521500 + }, + { + "epoch": 1.12, + "learning_rate": 4.406600093624905e-05, + "loss": 4.0036, + "step": 8522000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4065333237412564e-05, + "loss": 4.0183, + "step": 8522500 + }, + { + "epoch": 1.12, + "learning_rate": 4.406466550607233e-05, + "loss": 4.0147, + "step": 8523000 + }, + { + "epoch": 1.12, + "learning_rate": 4.406399774222949e-05, + "loss": 3.9993, + "step": 8523500 + }, + { + "epoch": 1.12, + "learning_rate": 4.406332994588519e-05, + "loss": 4.0252, + "step": 8524000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4062662117040556e-05, + "loss": 4.0163, + "step": 8524500 + }, + { + "epoch": 1.12, + "learning_rate": 4.406199425569673e-05, + "loss": 3.9896, + "step": 8525000 + }, + { + "epoch": 1.12, + "learning_rate": 4.406132636185486e-05, + "loss": 4.0159, + "step": 8525500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4060658435516066e-05, + "loss": 4.009, + "step": 8526000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4059990476681514e-05, + "loss": 4.012, + "step": 8526500 + }, + { + "epoch": 1.12, + "learning_rate": 4.405932248535232e-05, + "loss": 4.0257, + "step": 8527000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4058654461529625e-05, + "loss": 4.0275, + "step": 8527500 + }, + { + "epoch": 1.12, + "learning_rate": 4.405798640521457e-05, + "loss": 3.9924, + "step": 8528000 + }, + { + "epoch": 1.12, + "learning_rate": 4.405731831640831e-05, + "loss": 4.0022, + "step": 8528500 + }, + { + "epoch": 1.12, + "learning_rate": 4.405665019511196e-05, + "loss": 4.0066, + "step": 8529000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4055982041326674e-05, + "loss": 3.9975, + "step": 8529500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4055313855053584e-05, + "loss": 4.0189, + "step": 8530000 + }, + { + "epoch": 1.12, + "learning_rate": 4.405464563629383e-05, + "loss": 4.0239, + "step": 8530500 + }, + { + "epoch": 1.12, + "learning_rate": 4.405397738504856e-05, + "loss": 3.9998, + "step": 8531000 + }, + { + "epoch": 1.12, + "learning_rate": 4.405330910131891e-05, + "loss": 3.9863, + "step": 8531500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4052640785106004e-05, + "loss": 3.994, + "step": 8532000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4051972436411006e-05, + "loss": 3.9984, + "step": 8532500 + }, + { + "epoch": 1.12, + "learning_rate": 4.405130405523503e-05, + "loss": 3.9977, + "step": 8533000 + }, + { + "epoch": 1.12, + "learning_rate": 4.405063564157923e-05, + "loss": 3.9808, + "step": 8533500 + }, + { + "epoch": 1.12, + "learning_rate": 4.404996719544475e-05, + "loss": 4.0107, + "step": 8534000 + }, + { + "epoch": 1.12, + "learning_rate": 4.404929871683272e-05, + "loss": 4.0111, + "step": 8534500 + }, + { + "epoch": 1.12, + "learning_rate": 4.404863020574428e-05, + "loss": 3.9792, + "step": 8535000 + }, + { + "epoch": 1.12, + "learning_rate": 4.404796166218058e-05, + "loss": 4.019, + "step": 8535500 + }, + { + "epoch": 1.12, + "learning_rate": 4.404729308614275e-05, + "loss": 3.9849, + "step": 8536000 + }, + { + "epoch": 1.12, + "learning_rate": 4.404662447763193e-05, + "loss": 4.0189, + "step": 8536500 + }, + { + "epoch": 1.12, + "learning_rate": 4.404595583664927e-05, + "loss": 4.0243, + "step": 8537000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4045287163195894e-05, + "loss": 4.0228, + "step": 8537500 + }, + { + "epoch": 1.12, + "learning_rate": 4.404461845727294e-05, + "loss": 4.02, + "step": 8538000 + }, + { + "epoch": 1.12, + "learning_rate": 4.404394971888158e-05, + "loss": 4.016, + "step": 8538500 + }, + { + "epoch": 1.12, + "learning_rate": 4.404328094802292e-05, + "loss": 4.0095, + "step": 8539000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4042612144698113e-05, + "loss": 4.0281, + "step": 8539500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4041943308908295e-05, + "loss": 4.013, + "step": 8540000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4041274440654624e-05, + "loss": 4.0271, + "step": 8540500 + }, + { + "epoch": 1.12, + "learning_rate": 4.404060553993821e-05, + "loss": 3.9879, + "step": 8541000 + }, + { + "epoch": 1.12, + "learning_rate": 4.403993660676022e-05, + "loss": 4.0261, + "step": 8541500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4039267641121786e-05, + "loss": 3.999, + "step": 8542000 + }, + { + "epoch": 1.12, + "learning_rate": 4.403859864302403e-05, + "loss": 4.0061, + "step": 8542500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4037929612468125e-05, + "loss": 4.0143, + "step": 8543000 + }, + { + "epoch": 1.12, + "learning_rate": 4.403726054945519e-05, + "loss": 4.0098, + "step": 8543500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4036591453986376e-05, + "loss": 4.0009, + "step": 8544000 + }, + { + "epoch": 1.12, + "learning_rate": 4.403592232606281e-05, + "loss": 4.004, + "step": 8544500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4035253165685655e-05, + "loss": 4.0084, + "step": 8545000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4034583972856026e-05, + "loss": 4.0116, + "step": 8545500 + }, + { + "epoch": 1.12, + "learning_rate": 4.403391474757508e-05, + "loss": 4.0041, + "step": 8546000 + }, + { + "epoch": 1.12, + "learning_rate": 4.403324548984396e-05, + "loss": 4.0242, + "step": 8546500 + }, + { + "epoch": 1.12, + "learning_rate": 4.40325761996638e-05, + "loss": 4.02, + "step": 8547000 + }, + { + "epoch": 1.12, + "learning_rate": 4.403190687703573e-05, + "loss": 4.0194, + "step": 8547500 + }, + { + "epoch": 1.12, + "learning_rate": 4.403123752196092e-05, + "loss": 3.997, + "step": 8548000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4030568134440484e-05, + "loss": 4.0182, + "step": 8548500 + }, + { + "epoch": 1.12, + "learning_rate": 4.402989871447558e-05, + "loss": 3.9891, + "step": 8549000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4029229262067347e-05, + "loss": 4.0183, + "step": 8549500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4028559777216916e-05, + "loss": 4.0134, + "step": 8550000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4027890259925435e-05, + "loss": 4.0028, + "step": 8550500 + }, + { + "epoch": 1.12, + "learning_rate": 4.402722071019405e-05, + "loss": 4.0026, + "step": 8551000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4026551128023896e-05, + "loss": 4.0086, + "step": 8551500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4025881513416115e-05, + "loss": 4.0062, + "step": 8552000 + }, + { + "epoch": 1.12, + "learning_rate": 4.402521186637185e-05, + "loss": 4.0103, + "step": 8552500 + }, + { + "epoch": 1.12, + "learning_rate": 4.402454218689224e-05, + "loss": 4.0019, + "step": 8553000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4023872474978435e-05, + "loss": 4.0017, + "step": 8553500 + }, + { + "epoch": 1.12, + "learning_rate": 4.402320273063156e-05, + "loss": 4.0191, + "step": 8554000 + }, + { + "epoch": 1.12, + "learning_rate": 4.402253295385278e-05, + "loss": 4.0053, + "step": 8554500 + }, + { + "epoch": 1.12, + "learning_rate": 4.402186314464323e-05, + "loss": 4.0266, + "step": 8555000 + }, + { + "epoch": 1.12, + "learning_rate": 4.402119330300403e-05, + "loss": 4.0, + "step": 8555500 + }, + { + "epoch": 1.12, + "learning_rate": 4.402052342893635e-05, + "loss": 4.0201, + "step": 8556000 + }, + { + "epoch": 1.12, + "learning_rate": 4.4019853522441316e-05, + "loss": 4.0357, + "step": 8556500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4019183583520076e-05, + "loss": 4.0167, + "step": 8557000 + }, + { + "epoch": 1.12, + "learning_rate": 4.401851361217377e-05, + "loss": 4.0055, + "step": 8557500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4017843608403554e-05, + "loss": 4.0055, + "step": 8558000 + }, + { + "epoch": 1.12, + "learning_rate": 4.401717357221054e-05, + "loss": 3.9995, + "step": 8558500 + }, + { + "epoch": 1.12, + "learning_rate": 4.40165035035959e-05, + "loss": 3.9901, + "step": 8559000 + }, + { + "epoch": 1.12, + "learning_rate": 4.401583340256076e-05, + "loss": 4.0089, + "step": 8559500 + }, + { + "epoch": 1.13, + "learning_rate": 4.401516326910626e-05, + "loss": 3.9998, + "step": 8560000 + }, + { + "epoch": 1.13, + "learning_rate": 4.4014493103233554e-05, + "loss": 4.0007, + "step": 8560500 + }, + { + "epoch": 1.13, + "learning_rate": 4.401382290494378e-05, + "loss": 3.9875, + "step": 8561000 + }, + { + "epoch": 1.13, + "learning_rate": 4.401315267423808e-05, + "loss": 3.9991, + "step": 8561500 + }, + { + "epoch": 1.13, + "learning_rate": 4.4012482411117595e-05, + "loss": 3.9874, + "step": 8562000 + }, + { + "epoch": 1.13, + "learning_rate": 4.401181211558347e-05, + "loss": 4.0205, + "step": 8562500 + }, + { + "epoch": 1.13, + "learning_rate": 4.401114178763686e-05, + "loss": 4.0056, + "step": 8563000 + }, + { + "epoch": 1.13, + "learning_rate": 4.401047142727888e-05, + "loss": 4.03, + "step": 8563500 + }, + { + "epoch": 1.13, + "learning_rate": 4.400980103451069e-05, + "loss": 3.9863, + "step": 8564000 + }, + { + "epoch": 1.13, + "learning_rate": 4.400913060933344e-05, + "loss": 4.0275, + "step": 8564500 + }, + { + "epoch": 1.13, + "learning_rate": 4.400846015174826e-05, + "loss": 4.0221, + "step": 8565000 + }, + { + "epoch": 1.13, + "learning_rate": 4.400778966175629e-05, + "loss": 3.9991, + "step": 8565500 + }, + { + "epoch": 1.13, + "learning_rate": 4.4007119139358685e-05, + "loss": 4.0087, + "step": 8566000 + }, + { + "epoch": 1.13, + "learning_rate": 4.4006448584556584e-05, + "loss": 4.0141, + "step": 8566500 + }, + { + "epoch": 1.13, + "learning_rate": 4.400577799735113e-05, + "loss": 3.997, + "step": 8567000 + }, + { + "epoch": 1.13, + "learning_rate": 4.4005107377743466e-05, + "loss": 3.9981, + "step": 8567500 + }, + { + "epoch": 1.13, + "learning_rate": 4.400443672573473e-05, + "loss": 4.0111, + "step": 8568000 + }, + { + "epoch": 1.13, + "learning_rate": 4.400376604132608e-05, + "loss": 4.001, + "step": 8568500 + }, + { + "epoch": 1.13, + "learning_rate": 4.400309532451864e-05, + "loss": 3.989, + "step": 8569000 + }, + { + "epoch": 1.13, + "learning_rate": 4.400242457531357e-05, + "loss": 3.9895, + "step": 8569500 + }, + { + "epoch": 1.13, + "learning_rate": 4.4001753793712e-05, + "loss": 4.0103, + "step": 8570000 + }, + { + "epoch": 1.13, + "learning_rate": 4.4001082979715094e-05, + "loss": 4.009, + "step": 8570500 + }, + { + "epoch": 1.13, + "learning_rate": 4.400041213332397e-05, + "loss": 4.0036, + "step": 8571000 + }, + { + "epoch": 1.13, + "learning_rate": 4.399974125453978e-05, + "loss": 3.9899, + "step": 8571500 + }, + { + "epoch": 1.13, + "learning_rate": 4.399907034336368e-05, + "loss": 3.9889, + "step": 8572000 + }, + { + "epoch": 1.13, + "learning_rate": 4.39983993997968e-05, + "loss": 3.9849, + "step": 8572500 + }, + { + "epoch": 1.13, + "learning_rate": 4.39977284238403e-05, + "loss": 4.0004, + "step": 8573000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3997057415495304e-05, + "loss": 3.9836, + "step": 8573500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3996386374762964e-05, + "loss": 4.0012, + "step": 8574000 + }, + { + "epoch": 1.13, + "learning_rate": 4.399571530164443e-05, + "loss": 3.9993, + "step": 8574500 + }, + { + "epoch": 1.13, + "learning_rate": 4.399504419614084e-05, + "loss": 3.9948, + "step": 8575000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3994373058253335e-05, + "loss": 3.9766, + "step": 8575500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3993701887983075e-05, + "loss": 3.9879, + "step": 8576000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3993030685331185e-05, + "loss": 4.0035, + "step": 8576500 + }, + { + "epoch": 1.13, + "learning_rate": 4.399235945029881e-05, + "loss": 4.0052, + "step": 8577000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3991688182887114e-05, + "loss": 3.9801, + "step": 8577500 + }, + { + "epoch": 1.13, + "learning_rate": 4.399101688309722e-05, + "loss": 4.0018, + "step": 8578000 + }, + { + "epoch": 1.13, + "learning_rate": 4.399034555093029e-05, + "loss": 4.0007, + "step": 8578500 + }, + { + "epoch": 1.13, + "learning_rate": 4.398967418638745e-05, + "loss": 4.0079, + "step": 8579000 + }, + { + "epoch": 1.13, + "learning_rate": 4.398900278946986e-05, + "loss": 4.0056, + "step": 8579500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3988331360178655e-05, + "loss": 4.007, + "step": 8580000 + }, + { + "epoch": 1.13, + "learning_rate": 4.398765989851499e-05, + "loss": 4.0089, + "step": 8580500 + }, + { + "epoch": 1.13, + "learning_rate": 4.398698840448001e-05, + "loss": 4.0092, + "step": 8581000 + }, + { + "epoch": 1.13, + "learning_rate": 4.398631687807484e-05, + "loss": 4.0228, + "step": 8581500 + }, + { + "epoch": 1.13, + "learning_rate": 4.398564531930064e-05, + "loss": 4.0063, + "step": 8582000 + }, + { + "epoch": 1.13, + "learning_rate": 4.398497372815856e-05, + "loss": 3.9995, + "step": 8582500 + }, + { + "epoch": 1.13, + "learning_rate": 4.398430210464973e-05, + "loss": 4.0028, + "step": 8583000 + }, + { + "epoch": 1.13, + "learning_rate": 4.398363044877531e-05, + "loss": 4.0179, + "step": 8583500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3982958760536444e-05, + "loss": 4.0031, + "step": 8584000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3982287039934254e-05, + "loss": 4.0167, + "step": 8584500 + }, + { + "epoch": 1.13, + "learning_rate": 4.398161528696991e-05, + "loss": 4.0072, + "step": 8585000 + }, + { + "epoch": 1.13, + "learning_rate": 4.398094350164456e-05, + "loss": 3.9958, + "step": 8585500 + }, + { + "epoch": 1.13, + "learning_rate": 4.398027168395933e-05, + "loss": 4.0112, + "step": 8586000 + }, + { + "epoch": 1.13, + "learning_rate": 4.397959983391537e-05, + "loss": 3.9953, + "step": 8586500 + }, + { + "epoch": 1.13, + "learning_rate": 4.397892795151384e-05, + "loss": 3.9968, + "step": 8587000 + }, + { + "epoch": 1.13, + "learning_rate": 4.397825603675587e-05, + "loss": 3.9935, + "step": 8587500 + }, + { + "epoch": 1.13, + "learning_rate": 4.39775840896426e-05, + "loss": 4.012, + "step": 8588000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3976912110175204e-05, + "loss": 3.9766, + "step": 8588500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3976240098354806e-05, + "loss": 3.9942, + "step": 8589000 + }, + { + "epoch": 1.13, + "learning_rate": 4.397556805418255e-05, + "loss": 4.0112, + "step": 8589500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3974895977659595e-05, + "loss": 4.0154, + "step": 8590000 + }, + { + "epoch": 1.13, + "learning_rate": 4.397422386878707e-05, + "loss": 3.9928, + "step": 8590500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3973551727566134e-05, + "loss": 4.0091, + "step": 8591000 + }, + { + "epoch": 1.13, + "learning_rate": 4.397287955399794e-05, + "loss": 4.0147, + "step": 8591500 + }, + { + "epoch": 1.13, + "learning_rate": 4.397220734808361e-05, + "loss": 3.9914, + "step": 8592000 + }, + { + "epoch": 1.13, + "learning_rate": 4.39715351098243e-05, + "loss": 4.0008, + "step": 8592500 + }, + { + "epoch": 1.13, + "learning_rate": 4.397086283922117e-05, + "loss": 4.0094, + "step": 8593000 + }, + { + "epoch": 1.13, + "learning_rate": 4.397019053627534e-05, + "loss": 4.0058, + "step": 8593500 + }, + { + "epoch": 1.13, + "learning_rate": 4.396951820098799e-05, + "loss": 3.9852, + "step": 8594000 + }, + { + "epoch": 1.13, + "learning_rate": 4.396884583336024e-05, + "loss": 3.9884, + "step": 8594500 + }, + { + "epoch": 1.13, + "learning_rate": 4.396817343339325e-05, + "loss": 4.0184, + "step": 8595000 + }, + { + "epoch": 1.13, + "learning_rate": 4.396750100108815e-05, + "loss": 3.9988, + "step": 8595500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3966828536446095e-05, + "loss": 3.9828, + "step": 8596000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3966156039468244e-05, + "loss": 3.9878, + "step": 8596500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3965483510155724e-05, + "loss": 4.002, + "step": 8597000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3964810948509694e-05, + "loss": 4.0079, + "step": 8597500 + }, + { + "epoch": 1.13, + "learning_rate": 4.39641383545313e-05, + "loss": 4.0075, + "step": 8598000 + }, + { + "epoch": 1.13, + "learning_rate": 4.396346572822168e-05, + "loss": 3.9927, + "step": 8598500 + }, + { + "epoch": 1.13, + "learning_rate": 4.396279306958199e-05, + "loss": 4.0077, + "step": 8599000 + }, + { + "epoch": 1.13, + "learning_rate": 4.396212037861337e-05, + "loss": 4.0016, + "step": 8599500 + }, + { + "epoch": 1.13, + "learning_rate": 4.396144765531697e-05, + "loss": 3.9992, + "step": 8600000 + }, + { + "epoch": 1.13, + "learning_rate": 4.396077489969394e-05, + "loss": 4.0166, + "step": 8600500 + }, + { + "epoch": 1.13, + "learning_rate": 4.396010211174543e-05, + "loss": 3.9859, + "step": 8601000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3959429291472566e-05, + "loss": 4.0006, + "step": 8601500 + }, + { + "epoch": 1.13, + "learning_rate": 4.395875643887651e-05, + "loss": 4.0089, + "step": 8602000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3958083553958425e-05, + "loss": 4.033, + "step": 8602500 + }, + { + "epoch": 1.13, + "learning_rate": 4.395741063671943e-05, + "loss": 3.9774, + "step": 8603000 + }, + { + "epoch": 1.13, + "learning_rate": 4.395673768716069e-05, + "loss": 3.9762, + "step": 8603500 + }, + { + "epoch": 1.13, + "learning_rate": 4.395606470528334e-05, + "loss": 4.0021, + "step": 8604000 + }, + { + "epoch": 1.13, + "learning_rate": 4.395539169108854e-05, + "loss": 4.0158, + "step": 8604500 + }, + { + "epoch": 1.13, + "learning_rate": 4.395471864457743e-05, + "loss": 4.0107, + "step": 8605000 + }, + { + "epoch": 1.13, + "learning_rate": 4.395404556575116e-05, + "loss": 3.992, + "step": 8605500 + }, + { + "epoch": 1.13, + "learning_rate": 4.395337245461087e-05, + "loss": 4.0294, + "step": 8606000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3952699311157717e-05, + "loss": 3.998, + "step": 8606500 + }, + { + "epoch": 1.13, + "learning_rate": 4.395202613539285e-05, + "loss": 3.9763, + "step": 8607000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3951352927317416e-05, + "loss": 3.9959, + "step": 8607500 + }, + { + "epoch": 1.13, + "learning_rate": 4.395067968693255e-05, + "loss": 4.0014, + "step": 8608000 + }, + { + "epoch": 1.13, + "learning_rate": 4.395000641423941e-05, + "loss": 4.0091, + "step": 8608500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3949333109239135e-05, + "loss": 3.9993, + "step": 8609000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3948659771932895e-05, + "loss": 4.0137, + "step": 8609500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3947986402321814e-05, + "loss": 4.0021, + "step": 8610000 + }, + { + "epoch": 1.13, + "learning_rate": 4.394731300040705e-05, + "loss": 4.009, + "step": 8610500 + }, + { + "epoch": 1.13, + "learning_rate": 4.394663956618976e-05, + "loss": 3.9956, + "step": 8611000 + }, + { + "epoch": 1.13, + "learning_rate": 4.394596609967107e-05, + "loss": 4.0309, + "step": 8611500 + }, + { + "epoch": 1.13, + "learning_rate": 4.394529260085215e-05, + "loss": 3.9829, + "step": 8612000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3944619069734133e-05, + "loss": 3.9858, + "step": 8612500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3943945506318174e-05, + "loss": 4.002, + "step": 8613000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3943271910605423e-05, + "loss": 4.0056, + "step": 8613500 + }, + { + "epoch": 1.13, + "learning_rate": 4.394259828259703e-05, + "loss": 4.0019, + "step": 8614000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3941924622294126e-05, + "loss": 4.004, + "step": 8614500 + }, + { + "epoch": 1.13, + "learning_rate": 4.394125092969788e-05, + "loss": 3.9863, + "step": 8615000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3940577204809444e-05, + "loss": 4.0039, + "step": 8615500 + }, + { + "epoch": 1.13, + "learning_rate": 4.393990344762994e-05, + "loss": 4.0008, + "step": 8616000 + }, + { + "epoch": 1.13, + "learning_rate": 4.393922965816054e-05, + "loss": 3.992, + "step": 8616500 + }, + { + "epoch": 1.13, + "learning_rate": 4.393855583640238e-05, + "loss": 4.0031, + "step": 8617000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3937881982356625e-05, + "loss": 4.0098, + "step": 8617500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3937208096024404e-05, + "loss": 3.9969, + "step": 8618000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3936534177406885e-05, + "loss": 4.0082, + "step": 8618500 + }, + { + "epoch": 1.13, + "learning_rate": 4.39358602265052e-05, + "loss": 4.0128, + "step": 8619000 + }, + { + "epoch": 1.13, + "learning_rate": 4.39351862433205e-05, + "loss": 4.0012, + "step": 8619500 + }, + { + "epoch": 1.13, + "learning_rate": 4.393451222785394e-05, + "loss": 4.0099, + "step": 8620000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3933838180106676e-05, + "loss": 3.9977, + "step": 8620500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3933164100079846e-05, + "loss": 3.9953, + "step": 8621000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3932489987774597e-05, + "loss": 4.0218, + "step": 8621500 + }, + { + "epoch": 1.13, + "learning_rate": 4.393181584319209e-05, + "loss": 4.0049, + "step": 8622000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3931141666333466e-05, + "loss": 4.0025, + "step": 8622500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3930467457199876e-05, + "loss": 4.0194, + "step": 8623000 + }, + { + "epoch": 1.13, + "learning_rate": 4.392979321579247e-05, + "loss": 3.9936, + "step": 8623500 + }, + { + "epoch": 1.13, + "learning_rate": 4.392911894211239e-05, + "loss": 3.9989, + "step": 8624000 + }, + { + "epoch": 1.13, + "learning_rate": 4.392844463616081e-05, + "loss": 4.0103, + "step": 8624500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3927770297938846e-05, + "loss": 3.9964, + "step": 8625000 + }, + { + "epoch": 1.13, + "learning_rate": 4.392709592744767e-05, + "loss": 3.9844, + "step": 8625500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3926421524688424e-05, + "loss": 4.0056, + "step": 8626000 + }, + { + "epoch": 1.13, + "learning_rate": 4.392574708966226e-05, + "loss": 4.0079, + "step": 8626500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3925072622370334e-05, + "loss": 4.0054, + "step": 8627000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3924398122813784e-05, + "loss": 3.9993, + "step": 8627500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3923723590993756e-05, + "loss": 4.0003, + "step": 8628000 + }, + { + "epoch": 1.13, + "learning_rate": 4.392304902691142e-05, + "loss": 3.9971, + "step": 8628500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3922374430567915e-05, + "loss": 3.998, + "step": 8629000 + }, + { + "epoch": 1.13, + "learning_rate": 4.392169980196439e-05, + "loss": 3.9885, + "step": 8629500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3921025141102e-05, + "loss": 3.9953, + "step": 8630000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3920350447981886e-05, + "loss": 3.9826, + "step": 8630500 + }, + { + "epoch": 1.13, + "learning_rate": 4.39196757226052e-05, + "loss": 3.9714, + "step": 8631000 + }, + { + "epoch": 1.13, + "learning_rate": 4.39190009649731e-05, + "loss": 3.9948, + "step": 8631500 + }, + { + "epoch": 1.13, + "learning_rate": 4.391832617508674e-05, + "loss": 3.997, + "step": 8632000 + }, + { + "epoch": 1.13, + "learning_rate": 4.391765135294725e-05, + "loss": 3.9932, + "step": 8632500 + }, + { + "epoch": 1.13, + "learning_rate": 4.391697649855581e-05, + "loss": 4.0104, + "step": 8633000 + }, + { + "epoch": 1.13, + "learning_rate": 4.3916301611913536e-05, + "loss": 4.0044, + "step": 8633500 + }, + { + "epoch": 1.13, + "learning_rate": 4.39156266930216e-05, + "loss": 4.026, + "step": 8634000 + }, + { + "epoch": 1.13, + "learning_rate": 4.391495174188116e-05, + "loss": 3.9984, + "step": 8634500 + }, + { + "epoch": 1.13, + "learning_rate": 4.391427675849335e-05, + "loss": 4.0089, + "step": 8635000 + }, + { + "epoch": 1.13, + "learning_rate": 4.391360174285933e-05, + "loss": 4.0041, + "step": 8635500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3912926694980235e-05, + "loss": 3.9979, + "step": 8636000 + }, + { + "epoch": 1.14, + "learning_rate": 4.391225161485724e-05, + "loss": 4.0057, + "step": 8636500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3911576502491484e-05, + "loss": 4.0023, + "step": 8637000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3910901357884115e-05, + "loss": 4.0285, + "step": 8637500 + }, + { + "epoch": 1.14, + "learning_rate": 4.391022618103629e-05, + "loss": 3.9979, + "step": 8638000 + }, + { + "epoch": 1.14, + "learning_rate": 4.390955097194915e-05, + "loss": 4.0063, + "step": 8638500 + }, + { + "epoch": 1.14, + "learning_rate": 4.390887573062387e-05, + "loss": 3.9853, + "step": 8639000 + }, + { + "epoch": 1.14, + "learning_rate": 4.390820045706157e-05, + "loss": 3.9849, + "step": 8639500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3907525151263416e-05, + "loss": 4.0054, + "step": 8640000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3906849813230565e-05, + "loss": 4.002, + "step": 8640500 + }, + { + "epoch": 1.14, + "learning_rate": 4.390617444296415e-05, + "loss": 4.0143, + "step": 8641000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3905499040465345e-05, + "loss": 3.9962, + "step": 8641500 + }, + { + "epoch": 1.14, + "learning_rate": 4.39048236057353e-05, + "loss": 4.0019, + "step": 8642000 + }, + { + "epoch": 1.14, + "learning_rate": 4.390414813877515e-05, + "loss": 3.996, + "step": 8642500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3903472639586055e-05, + "loss": 3.9979, + "step": 8643000 + }, + { + "epoch": 1.14, + "learning_rate": 4.390279710816916e-05, + "loss": 4.0219, + "step": 8643500 + }, + { + "epoch": 1.14, + "learning_rate": 4.390212154452563e-05, + "loss": 4.0124, + "step": 8644000 + }, + { + "epoch": 1.14, + "learning_rate": 4.390144594865661e-05, + "loss": 4.0183, + "step": 8644500 + }, + { + "epoch": 1.14, + "learning_rate": 4.390077032056325e-05, + "loss": 4.0135, + "step": 8645000 + }, + { + "epoch": 1.14, + "learning_rate": 4.390009466024669e-05, + "loss": 3.9872, + "step": 8645500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3899418967708116e-05, + "loss": 4.0227, + "step": 8646000 + }, + { + "epoch": 1.14, + "learning_rate": 4.389874324294865e-05, + "loss": 3.9978, + "step": 8646500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3898067485969455e-05, + "loss": 4.0126, + "step": 8647000 + }, + { + "epoch": 1.14, + "learning_rate": 4.389739169677168e-05, + "loss": 4.0117, + "step": 8647500 + }, + { + "epoch": 1.14, + "learning_rate": 4.389671587535647e-05, + "loss": 3.9826, + "step": 8648000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3896040021724996e-05, + "loss": 3.9957, + "step": 8648500 + }, + { + "epoch": 1.14, + "learning_rate": 4.38953641358784e-05, + "loss": 4.0002, + "step": 8649000 + }, + { + "epoch": 1.14, + "learning_rate": 4.389468821781783e-05, + "loss": 4.0138, + "step": 8649500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3894012267544446e-05, + "loss": 3.9924, + "step": 8650000 + }, + { + "epoch": 1.14, + "learning_rate": 4.389333628505939e-05, + "loss": 3.9955, + "step": 8650500 + }, + { + "epoch": 1.14, + "learning_rate": 4.389266027036383e-05, + "loss": 3.9913, + "step": 8651000 + }, + { + "epoch": 1.14, + "learning_rate": 4.38919842234589e-05, + "loss": 4.011, + "step": 8651500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3891308144345764e-05, + "loss": 3.9752, + "step": 8652000 + }, + { + "epoch": 1.14, + "learning_rate": 4.389063203302558e-05, + "loss": 4.0098, + "step": 8652500 + }, + { + "epoch": 1.14, + "learning_rate": 4.38899558894995e-05, + "loss": 4.0046, + "step": 8653000 + }, + { + "epoch": 1.14, + "learning_rate": 4.388927971376865e-05, + "loss": 3.9915, + "step": 8653500 + }, + { + "epoch": 1.14, + "learning_rate": 4.388860350583422e-05, + "loss": 4.0083, + "step": 8654000 + }, + { + "epoch": 1.14, + "learning_rate": 4.388792726569734e-05, + "loss": 3.9929, + "step": 8654500 + }, + { + "epoch": 1.14, + "learning_rate": 4.388725099335916e-05, + "loss": 3.996, + "step": 8655000 + }, + { + "epoch": 1.14, + "learning_rate": 4.388657468882086e-05, + "loss": 4.0113, + "step": 8655500 + }, + { + "epoch": 1.14, + "learning_rate": 4.388589835208356e-05, + "loss": 4.0118, + "step": 8656000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3885221983148433e-05, + "loss": 4.0112, + "step": 8656500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3884545582016626e-05, + "loss": 3.9962, + "step": 8657000 + }, + { + "epoch": 1.14, + "learning_rate": 4.388386914868929e-05, + "loss": 3.9931, + "step": 8657500 + }, + { + "epoch": 1.14, + "learning_rate": 4.38831926831676e-05, + "loss": 3.9895, + "step": 8658000 + }, + { + "epoch": 1.14, + "learning_rate": 4.388251618545267e-05, + "loss": 4.0014, + "step": 8658500 + }, + { + "epoch": 1.14, + "learning_rate": 4.388183965554568e-05, + "loss": 4.0117, + "step": 8659000 + }, + { + "epoch": 1.14, + "learning_rate": 4.388116309344777e-05, + "loss": 3.993, + "step": 8659500 + }, + { + "epoch": 1.14, + "learning_rate": 4.388048649916011e-05, + "loss": 3.9994, + "step": 8660000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3879809872683854e-05, + "loss": 4.0047, + "step": 8660500 + }, + { + "epoch": 1.14, + "learning_rate": 4.387913321402013e-05, + "loss": 3.9992, + "step": 8661000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3878456523170106e-05, + "loss": 4.0141, + "step": 8661500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3877779800134947e-05, + "loss": 3.9981, + "step": 8662000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3877103044915794e-05, + "loss": 4.0054, + "step": 8662500 + }, + { + "epoch": 1.14, + "learning_rate": 4.38764262575138e-05, + "loss": 4.0113, + "step": 8663000 + }, + { + "epoch": 1.14, + "learning_rate": 4.387574943793013e-05, + "loss": 4.0168, + "step": 8663500 + }, + { + "epoch": 1.14, + "learning_rate": 4.387507258616593e-05, + "loss": 4.014, + "step": 8664000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3874395702222346e-05, + "loss": 3.981, + "step": 8664500 + }, + { + "epoch": 1.14, + "learning_rate": 4.387371878610055e-05, + "loss": 4.0021, + "step": 8665000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3873041837801674e-05, + "loss": 4.0042, + "step": 8665500 + }, + { + "epoch": 1.14, + "learning_rate": 4.387236485732689e-05, + "loss": 4.0108, + "step": 8666000 + }, + { + "epoch": 1.14, + "learning_rate": 4.387168784467735e-05, + "loss": 4.0102, + "step": 8666500 + }, + { + "epoch": 1.14, + "learning_rate": 4.387101079985421e-05, + "loss": 4.0102, + "step": 8667000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3870333722858604e-05, + "loss": 4.0084, + "step": 8667500 + }, + { + "epoch": 1.14, + "learning_rate": 4.386965661369171e-05, + "loss": 3.9905, + "step": 8668000 + }, + { + "epoch": 1.14, + "learning_rate": 4.386897947235468e-05, + "loss": 4.0071, + "step": 8668500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3868302298848654e-05, + "loss": 4.0106, + "step": 8669000 + }, + { + "epoch": 1.14, + "learning_rate": 4.386762509317479e-05, + "loss": 4.0155, + "step": 8669500 + }, + { + "epoch": 1.14, + "learning_rate": 4.386694785533425e-05, + "loss": 3.9992, + "step": 8670000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3866270585328195e-05, + "loss": 4.0181, + "step": 8670500 + }, + { + "epoch": 1.14, + "learning_rate": 4.386559328315776e-05, + "loss": 4.0034, + "step": 8671000 + }, + { + "epoch": 1.14, + "learning_rate": 4.386491594882412e-05, + "loss": 4.0024, + "step": 8671500 + }, + { + "epoch": 1.14, + "learning_rate": 4.386423858232841e-05, + "loss": 4.0228, + "step": 8672000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3863561183671794e-05, + "loss": 3.9992, + "step": 8672500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3862883752855436e-05, + "loss": 3.9982, + "step": 8673000 + }, + { + "epoch": 1.14, + "learning_rate": 4.386220628988048e-05, + "loss": 4.0034, + "step": 8673500 + }, + { + "epoch": 1.14, + "learning_rate": 4.386152879474809e-05, + "loss": 4.0067, + "step": 8674000 + }, + { + "epoch": 1.14, + "learning_rate": 4.38608512674594e-05, + "loss": 3.9911, + "step": 8674500 + }, + { + "epoch": 1.14, + "learning_rate": 4.386017370801559e-05, + "loss": 4.0084, + "step": 8675000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3859496116417806e-05, + "loss": 3.9945, + "step": 8675500 + }, + { + "epoch": 1.14, + "learning_rate": 4.38588184926672e-05, + "loss": 3.9931, + "step": 8676000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3858140836764925e-05, + "loss": 3.9839, + "step": 8676500 + }, + { + "epoch": 1.14, + "learning_rate": 4.385746314871214e-05, + "loss": 3.9969, + "step": 8677000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3856785428510006e-05, + "loss": 3.9864, + "step": 8677500 + }, + { + "epoch": 1.14, + "learning_rate": 4.385610767615968e-05, + "loss": 4.0064, + "step": 8678000 + }, + { + "epoch": 1.14, + "learning_rate": 4.385542989166229e-05, + "loss": 3.9971, + "step": 8678500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3854752075019026e-05, + "loss": 4.0061, + "step": 8679000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3854074226231034e-05, + "loss": 3.9941, + "step": 8679500 + }, + { + "epoch": 1.14, + "learning_rate": 4.385339634529946e-05, + "loss": 3.9904, + "step": 8680000 + }, + { + "epoch": 1.14, + "learning_rate": 4.385271843222547e-05, + "loss": 4.0035, + "step": 8680500 + }, + { + "epoch": 1.14, + "learning_rate": 4.38520404870102e-05, + "loss": 3.9906, + "step": 8681000 + }, + { + "epoch": 1.14, + "learning_rate": 4.385136250965484e-05, + "loss": 3.9901, + "step": 8681500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3850684500160514e-05, + "loss": 3.9993, + "step": 8682000 + }, + { + "epoch": 1.14, + "learning_rate": 4.385000645852839e-05, + "loss": 4.0064, + "step": 8682500 + }, + { + "epoch": 1.14, + "learning_rate": 4.384932838475964e-05, + "loss": 4.0008, + "step": 8683000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3848650278855385e-05, + "loss": 3.9982, + "step": 8683500 + }, + { + "epoch": 1.14, + "learning_rate": 4.384797214081681e-05, + "loss": 4.0289, + "step": 8684000 + }, + { + "epoch": 1.14, + "learning_rate": 4.384729397064506e-05, + "loss": 4.0068, + "step": 8684500 + }, + { + "epoch": 1.14, + "learning_rate": 4.38466157683413e-05, + "loss": 4.0085, + "step": 8685000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3845937533906674e-05, + "loss": 3.9898, + "step": 8685500 + }, + { + "epoch": 1.14, + "learning_rate": 4.384525926734234e-05, + "loss": 4.0111, + "step": 8686000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3844580968649466e-05, + "loss": 3.9869, + "step": 8686500 + }, + { + "epoch": 1.14, + "learning_rate": 4.384390263782919e-05, + "loss": 3.9909, + "step": 8687000 + }, + { + "epoch": 1.14, + "learning_rate": 4.384322427488269e-05, + "loss": 4.003, + "step": 8687500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3842545879811105e-05, + "loss": 4.0025, + "step": 8688000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3841867452615593e-05, + "loss": 4.0129, + "step": 8688500 + }, + { + "epoch": 1.14, + "learning_rate": 4.384118899329732e-05, + "loss": 3.9924, + "step": 8689000 + }, + { + "epoch": 1.14, + "learning_rate": 4.384051050185744e-05, + "loss": 4.0122, + "step": 8689500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3839831978297106e-05, + "loss": 4.0201, + "step": 8690000 + }, + { + "epoch": 1.14, + "learning_rate": 4.383915342261748e-05, + "loss": 3.9802, + "step": 8690500 + }, + { + "epoch": 1.14, + "learning_rate": 4.383847483481971e-05, + "loss": 3.9924, + "step": 8691000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3837796214904955e-05, + "loss": 3.9762, + "step": 8691500 + }, + { + "epoch": 1.14, + "learning_rate": 4.383711756287439e-05, + "loss": 3.9975, + "step": 8692000 + }, + { + "epoch": 1.14, + "learning_rate": 4.383643887872915e-05, + "loss": 3.9949, + "step": 8692500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3835760162470394e-05, + "loss": 3.9917, + "step": 8693000 + }, + { + "epoch": 1.14, + "learning_rate": 4.383508141409929e-05, + "loss": 3.9999, + "step": 8693500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3834402633616986e-05, + "loss": 4.0152, + "step": 8694000 + }, + { + "epoch": 1.14, + "learning_rate": 4.383372382102465e-05, + "loss": 4.0033, + "step": 8694500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3833044976323425e-05, + "loss": 4.0153, + "step": 8695000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3832366099514477e-05, + "loss": 4.0003, + "step": 8695500 + }, + { + "epoch": 1.14, + "learning_rate": 4.383168719059896e-05, + "loss": 3.9785, + "step": 8696000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3831008249578044e-05, + "loss": 4.0195, + "step": 8696500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3830329276452864e-05, + "loss": 4.0129, + "step": 8697000 + }, + { + "epoch": 1.14, + "learning_rate": 4.382965027122459e-05, + "loss": 3.9948, + "step": 8697500 + }, + { + "epoch": 1.14, + "learning_rate": 4.382897123389439e-05, + "loss": 3.9903, + "step": 8698000 + }, + { + "epoch": 1.14, + "learning_rate": 4.38282921644634e-05, + "loss": 3.9911, + "step": 8698500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3827613062932795e-05, + "loss": 3.9906, + "step": 8699000 + }, + { + "epoch": 1.14, + "learning_rate": 4.382693392930372e-05, + "loss": 3.9865, + "step": 8699500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3826254763577344e-05, + "loss": 3.991, + "step": 8700000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3825575565754814e-05, + "loss": 4.0136, + "step": 8700500 + }, + { + "epoch": 1.14, + "learning_rate": 4.38248963358373e-05, + "loss": 3.9931, + "step": 8701000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3824217073825955e-05, + "loss": 3.988, + "step": 8701500 + }, + { + "epoch": 1.14, + "learning_rate": 4.382353777972193e-05, + "loss": 4.0006, + "step": 8702000 + }, + { + "epoch": 1.14, + "learning_rate": 4.382285845352639e-05, + "loss": 3.9845, + "step": 8702500 + }, + { + "epoch": 1.14, + "learning_rate": 4.38221790952405e-05, + "loss": 4.0057, + "step": 8703000 + }, + { + "epoch": 1.14, + "learning_rate": 4.38214997048654e-05, + "loss": 4.008, + "step": 8703500 + }, + { + "epoch": 1.14, + "learning_rate": 4.382082028240226e-05, + "loss": 3.9853, + "step": 8704000 + }, + { + "epoch": 1.14, + "learning_rate": 4.382014082785224e-05, + "loss": 3.9974, + "step": 8704500 + }, + { + "epoch": 1.14, + "learning_rate": 4.381946134121649e-05, + "loss": 3.9958, + "step": 8705000 + }, + { + "epoch": 1.14, + "learning_rate": 4.381878182249619e-05, + "loss": 3.9973, + "step": 8705500 + }, + { + "epoch": 1.14, + "learning_rate": 4.3818102271692464e-05, + "loss": 3.99, + "step": 8706000 + }, + { + "epoch": 1.14, + "learning_rate": 4.38174226888065e-05, + "loss": 4.0017, + "step": 8706500 + }, + { + "epoch": 1.14, + "learning_rate": 4.381674307383944e-05, + "loss": 3.9949, + "step": 8707000 + }, + { + "epoch": 1.14, + "learning_rate": 4.3816063426792446e-05, + "loss": 3.9778, + "step": 8707500 + }, + { + "epoch": 1.14, + "learning_rate": 4.381538374766668e-05, + "loss": 4.0064, + "step": 8708000 + }, + { + "epoch": 1.14, + "learning_rate": 4.381470403646331e-05, + "loss": 3.9965, + "step": 8708500 + }, + { + "epoch": 1.14, + "learning_rate": 4.381402429318347e-05, + "loss": 3.9998, + "step": 8709000 + }, + { + "epoch": 1.14, + "learning_rate": 4.381334451782834e-05, + "loss": 3.9917, + "step": 8709500 + }, + { + "epoch": 1.14, + "learning_rate": 4.381266471039906e-05, + "loss": 3.9848, + "step": 8710000 + }, + { + "epoch": 1.14, + "learning_rate": 4.381198487089682e-05, + "loss": 3.998, + "step": 8710500 + }, + { + "epoch": 1.14, + "learning_rate": 4.381130499932276e-05, + "loss": 3.9974, + "step": 8711000 + }, + { + "epoch": 1.14, + "learning_rate": 4.381062509567803e-05, + "loss": 4.0166, + "step": 8711500 + }, + { + "epoch": 1.14, + "learning_rate": 4.38099451599638e-05, + "loss": 3.9703, + "step": 8712000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3809265192181223e-05, + "loss": 4.0003, + "step": 8712500 + }, + { + "epoch": 1.15, + "learning_rate": 4.380858519233147e-05, + "loss": 3.9847, + "step": 8713000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3807905160415685e-05, + "loss": 3.9888, + "step": 8713500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3807225096435045e-05, + "loss": 4.0163, + "step": 8714000 + }, + { + "epoch": 1.15, + "learning_rate": 4.380654500039069e-05, + "loss": 3.9894, + "step": 8714500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3805864872283794e-05, + "loss": 3.9884, + "step": 8715000 + }, + { + "epoch": 1.15, + "learning_rate": 4.380518471211551e-05, + "loss": 3.9938, + "step": 8715500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3804504519887005e-05, + "loss": 4.0024, + "step": 8716000 + }, + { + "epoch": 1.15, + "learning_rate": 4.380382429559943e-05, + "loss": 3.9936, + "step": 8716500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3803144039253954e-05, + "loss": 3.9955, + "step": 8717000 + }, + { + "epoch": 1.15, + "learning_rate": 4.380246375085172e-05, + "loss": 4.0132, + "step": 8717500 + }, + { + "epoch": 1.15, + "learning_rate": 4.38017834303939e-05, + "loss": 4.0156, + "step": 8718000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3801103077881655e-05, + "loss": 3.9882, + "step": 8718500 + }, + { + "epoch": 1.15, + "learning_rate": 4.380042269331614e-05, + "loss": 3.9888, + "step": 8719000 + }, + { + "epoch": 1.15, + "learning_rate": 4.379974227669852e-05, + "loss": 4.038, + "step": 8719500 + }, + { + "epoch": 1.15, + "learning_rate": 4.379906182802995e-05, + "loss": 4.0215, + "step": 8720000 + }, + { + "epoch": 1.15, + "learning_rate": 4.379838134731159e-05, + "loss": 3.9897, + "step": 8720500 + }, + { + "epoch": 1.15, + "learning_rate": 4.379770083454461e-05, + "loss": 4.0055, + "step": 8721000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3797020289730156e-05, + "loss": 4.0059, + "step": 8721500 + }, + { + "epoch": 1.15, + "learning_rate": 4.379633971286939e-05, + "loss": 4.0049, + "step": 8722000 + }, + { + "epoch": 1.15, + "learning_rate": 4.379565910396349e-05, + "loss": 3.9992, + "step": 8722500 + }, + { + "epoch": 1.15, + "learning_rate": 4.379497846301359e-05, + "loss": 3.9903, + "step": 8723000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3794297790020874e-05, + "loss": 3.9896, + "step": 8723500 + }, + { + "epoch": 1.15, + "learning_rate": 4.379361708498649e-05, + "loss": 3.9853, + "step": 8724000 + }, + { + "epoch": 1.15, + "learning_rate": 4.37929363479116e-05, + "loss": 3.9874, + "step": 8724500 + }, + { + "epoch": 1.15, + "learning_rate": 4.379225557879736e-05, + "loss": 3.9751, + "step": 8725000 + }, + { + "epoch": 1.15, + "learning_rate": 4.379157477764494e-05, + "loss": 4.0004, + "step": 8725500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3790893944455494e-05, + "loss": 3.9891, + "step": 8726000 + }, + { + "epoch": 1.15, + "learning_rate": 4.379021307923019e-05, + "loss": 3.9959, + "step": 8726500 + }, + { + "epoch": 1.15, + "learning_rate": 4.378953218197018e-05, + "loss": 3.9955, + "step": 8727000 + }, + { + "epoch": 1.15, + "learning_rate": 4.378885125267663e-05, + "loss": 3.9773, + "step": 8727500 + }, + { + "epoch": 1.15, + "learning_rate": 4.37881702913507e-05, + "loss": 3.9943, + "step": 8728000 + }, + { + "epoch": 1.15, + "learning_rate": 4.378748929799355e-05, + "loss": 3.9817, + "step": 8728500 + }, + { + "epoch": 1.15, + "learning_rate": 4.378680827260634e-05, + "loss": 3.9905, + "step": 8729000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3786127215190236e-05, + "loss": 3.9804, + "step": 8729500 + }, + { + "epoch": 1.15, + "learning_rate": 4.378544612574639e-05, + "loss": 3.9879, + "step": 8730000 + }, + { + "epoch": 1.15, + "learning_rate": 4.378476500427598e-05, + "loss": 3.9879, + "step": 8730500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3784083850780146e-05, + "loss": 3.9909, + "step": 8731000 + }, + { + "epoch": 1.15, + "learning_rate": 4.378340266526007e-05, + "loss": 3.9889, + "step": 8731500 + }, + { + "epoch": 1.15, + "learning_rate": 4.378272144771689e-05, + "loss": 3.9936, + "step": 8732000 + }, + { + "epoch": 1.15, + "learning_rate": 4.378204019815179e-05, + "loss": 3.9981, + "step": 8732500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3781358916565915e-05, + "loss": 4.0112, + "step": 8733000 + }, + { + "epoch": 1.15, + "learning_rate": 4.378067760296043e-05, + "loss": 4.0074, + "step": 8733500 + }, + { + "epoch": 1.15, + "learning_rate": 4.377999625733651e-05, + "loss": 3.9862, + "step": 8734000 + }, + { + "epoch": 1.15, + "learning_rate": 4.37793148796953e-05, + "loss": 4.0057, + "step": 8734500 + }, + { + "epoch": 1.15, + "learning_rate": 4.377863347003797e-05, + "loss": 4.003, + "step": 8735000 + }, + { + "epoch": 1.15, + "learning_rate": 4.377795202836568e-05, + "loss": 3.9918, + "step": 8735500 + }, + { + "epoch": 1.15, + "learning_rate": 4.377727055467958e-05, + "loss": 4.0089, + "step": 8736000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3776589048980866e-05, + "loss": 3.9968, + "step": 8736500 + }, + { + "epoch": 1.15, + "learning_rate": 4.377590751127067e-05, + "loss": 3.9875, + "step": 8737000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3775225941550144e-05, + "loss": 3.9736, + "step": 8737500 + }, + { + "epoch": 1.15, + "learning_rate": 4.377454433982048e-05, + "loss": 4.0025, + "step": 8738000 + }, + { + "epoch": 1.15, + "learning_rate": 4.377386270608282e-05, + "loss": 4.0016, + "step": 8738500 + }, + { + "epoch": 1.15, + "learning_rate": 4.377318104033834e-05, + "loss": 3.9998, + "step": 8739000 + }, + { + "epoch": 1.15, + "learning_rate": 4.37724993425882e-05, + "loss": 3.9941, + "step": 8739500 + }, + { + "epoch": 1.15, + "learning_rate": 4.377181761283355e-05, + "loss": 3.9705, + "step": 8740000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3771135851075565e-05, + "loss": 3.9685, + "step": 8740500 + }, + { + "epoch": 1.15, + "learning_rate": 4.377045405731539e-05, + "loss": 3.9845, + "step": 8741000 + }, + { + "epoch": 1.15, + "learning_rate": 4.376977223155421e-05, + "loss": 4.0025, + "step": 8741500 + }, + { + "epoch": 1.15, + "learning_rate": 4.376909037379317e-05, + "loss": 4.0082, + "step": 8742000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3768408484033455e-05, + "loss": 3.9765, + "step": 8742500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3767726562276194e-05, + "loss": 3.9811, + "step": 8743000 + }, + { + "epoch": 1.15, + "learning_rate": 4.376704460852258e-05, + "loss": 3.9953, + "step": 8743500 + }, + { + "epoch": 1.15, + "learning_rate": 4.376636262277375e-05, + "loss": 4.0027, + "step": 8744000 + }, + { + "epoch": 1.15, + "learning_rate": 4.376568060503089e-05, + "loss": 4.0136, + "step": 8744500 + }, + { + "epoch": 1.15, + "learning_rate": 4.376499855529514e-05, + "loss": 3.9806, + "step": 8745000 + }, + { + "epoch": 1.15, + "learning_rate": 4.376431647356769e-05, + "loss": 3.9951, + "step": 8745500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3763634359849685e-05, + "loss": 4.0033, + "step": 8746000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3762952214142285e-05, + "loss": 3.9999, + "step": 8746500 + }, + { + "epoch": 1.15, + "learning_rate": 4.376227003644666e-05, + "loss": 3.9814, + "step": 8747000 + }, + { + "epoch": 1.15, + "learning_rate": 4.376158782676397e-05, + "loss": 3.979, + "step": 8747500 + }, + { + "epoch": 1.15, + "learning_rate": 4.376090558509539e-05, + "loss": 3.993, + "step": 8748000 + }, + { + "epoch": 1.15, + "learning_rate": 4.376022331144206e-05, + "loss": 3.9829, + "step": 8748500 + }, + { + "epoch": 1.15, + "learning_rate": 4.375954100580517e-05, + "loss": 3.9786, + "step": 8749000 + }, + { + "epoch": 1.15, + "learning_rate": 4.375885866818587e-05, + "loss": 3.9976, + "step": 8749500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3758176298585306e-05, + "loss": 4.0138, + "step": 8750000 + }, + { + "epoch": 1.15, + "learning_rate": 4.375749389700467e-05, + "loss": 3.9731, + "step": 8750500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3756811463445116e-05, + "loss": 3.9841, + "step": 8751000 + }, + { + "epoch": 1.15, + "learning_rate": 4.37561289979078e-05, + "loss": 3.9777, + "step": 8751500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3755446500393886e-05, + "loss": 4.0008, + "step": 8752000 + }, + { + "epoch": 1.15, + "learning_rate": 4.375476397090455e-05, + "loss": 3.9818, + "step": 8752500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3754081409440944e-05, + "loss": 3.995, + "step": 8753000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3753398816004234e-05, + "loss": 3.9793, + "step": 8753500 + }, + { + "epoch": 1.15, + "learning_rate": 4.375271619059559e-05, + "loss": 3.9898, + "step": 8754000 + }, + { + "epoch": 1.15, + "learning_rate": 4.375203353321617e-05, + "loss": 3.9641, + "step": 8754500 + }, + { + "epoch": 1.15, + "learning_rate": 4.375135084386714e-05, + "loss": 3.9823, + "step": 8755000 + }, + { + "epoch": 1.15, + "learning_rate": 4.375066812254966e-05, + "loss": 3.973, + "step": 8755500 + }, + { + "epoch": 1.15, + "learning_rate": 4.374998536926489e-05, + "loss": 3.982, + "step": 8756000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3749302584014015e-05, + "loss": 4.0092, + "step": 8756500 + }, + { + "epoch": 1.15, + "learning_rate": 4.374861976679818e-05, + "loss": 4.0116, + "step": 8757000 + }, + { + "epoch": 1.15, + "learning_rate": 4.374793691761855e-05, + "loss": 3.9862, + "step": 8757500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3747254036476296e-05, + "loss": 3.9801, + "step": 8758000 + }, + { + "epoch": 1.15, + "learning_rate": 4.374657112337257e-05, + "loss": 3.9877, + "step": 8758500 + }, + { + "epoch": 1.15, + "learning_rate": 4.374588817830856e-05, + "loss": 3.984, + "step": 8759000 + }, + { + "epoch": 1.15, + "learning_rate": 4.37452052012854e-05, + "loss": 3.9955, + "step": 8759500 + }, + { + "epoch": 1.15, + "learning_rate": 4.374452219230428e-05, + "loss": 3.9707, + "step": 8760000 + }, + { + "epoch": 1.15, + "learning_rate": 4.374383915136635e-05, + "loss": 3.9901, + "step": 8760500 + }, + { + "epoch": 1.15, + "learning_rate": 4.374315607847279e-05, + "loss": 3.9895, + "step": 8761000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3742472973624745e-05, + "loss": 3.9872, + "step": 8761500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3741789836823387e-05, + "loss": 3.9964, + "step": 8762000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3741106668069885e-05, + "loss": 3.9948, + "step": 8762500 + }, + { + "epoch": 1.15, + "learning_rate": 4.37404234673654e-05, + "loss": 4.0069, + "step": 8763000 + }, + { + "epoch": 1.15, + "learning_rate": 4.37397402347111e-05, + "loss": 3.9929, + "step": 8763500 + }, + { + "epoch": 1.15, + "learning_rate": 4.373905697010814e-05, + "loss": 4.0116, + "step": 8764000 + }, + { + "epoch": 1.15, + "learning_rate": 4.37383736735577e-05, + "loss": 4.0026, + "step": 8764500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3737690345060934e-05, + "loss": 3.9992, + "step": 8765000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3737006984619004e-05, + "loss": 3.9944, + "step": 8765500 + }, + { + "epoch": 1.15, + "learning_rate": 4.373632359223308e-05, + "loss": 4.002, + "step": 8766000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3735640167904344e-05, + "loss": 3.999, + "step": 8766500 + }, + { + "epoch": 1.15, + "learning_rate": 4.373495671163393e-05, + "loss": 3.9906, + "step": 8767000 + }, + { + "epoch": 1.15, + "learning_rate": 4.373427322342303e-05, + "loss": 3.9986, + "step": 8767500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3733589703272795e-05, + "loss": 3.9958, + "step": 8768000 + }, + { + "epoch": 1.15, + "learning_rate": 4.373290615118438e-05, + "loss": 4.0039, + "step": 8768500 + }, + { + "epoch": 1.15, + "learning_rate": 4.373222256715898e-05, + "loss": 3.9657, + "step": 8769000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3731538951197736e-05, + "loss": 3.9694, + "step": 8769500 + }, + { + "epoch": 1.15, + "learning_rate": 4.373085530330182e-05, + "loss": 3.9845, + "step": 8770000 + }, + { + "epoch": 1.15, + "learning_rate": 4.37301716234724e-05, + "loss": 4.0024, + "step": 8770500 + }, + { + "epoch": 1.15, + "learning_rate": 4.372948791171064e-05, + "loss": 3.9809, + "step": 8771000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3728804168017704e-05, + "loss": 3.9996, + "step": 8771500 + }, + { + "epoch": 1.15, + "learning_rate": 4.372812039239477e-05, + "loss": 3.9857, + "step": 8772000 + }, + { + "epoch": 1.15, + "learning_rate": 4.372743658484298e-05, + "loss": 4.0082, + "step": 8772500 + }, + { + "epoch": 1.15, + "learning_rate": 4.372675274536352e-05, + "loss": 3.9937, + "step": 8773000 + }, + { + "epoch": 1.15, + "learning_rate": 4.372606887395755e-05, + "loss": 3.9793, + "step": 8773500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3725384970626224e-05, + "loss": 3.955, + "step": 8774000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3724701035370735e-05, + "loss": 3.9812, + "step": 8774500 + }, + { + "epoch": 1.15, + "learning_rate": 4.372401706819223e-05, + "loss": 4.0048, + "step": 8775000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3723333069091867e-05, + "loss": 3.9915, + "step": 8775500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3722649038070825e-05, + "loss": 4.0041, + "step": 8776000 + }, + { + "epoch": 1.15, + "learning_rate": 4.372196497513028e-05, + "loss": 3.9915, + "step": 8776500 + }, + { + "epoch": 1.15, + "learning_rate": 4.372128088027138e-05, + "loss": 3.9839, + "step": 8777000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3720596753495294e-05, + "loss": 3.9959, + "step": 8777500 + }, + { + "epoch": 1.15, + "learning_rate": 4.37199125948032e-05, + "loss": 3.9763, + "step": 8778000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3719228404196245e-05, + "loss": 3.9735, + "step": 8778500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3718544181675616e-05, + "loss": 3.9881, + "step": 8779000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3717859927242466e-05, + "loss": 3.9961, + "step": 8779500 + }, + { + "epoch": 1.15, + "learning_rate": 4.371717564089797e-05, + "loss": 3.991, + "step": 8780000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3716491322643296e-05, + "loss": 3.9952, + "step": 8780500 + }, + { + "epoch": 1.15, + "learning_rate": 4.371580697247959e-05, + "loss": 3.9968, + "step": 8781000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3715122590408044e-05, + "loss": 3.9779, + "step": 8781500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3714438176429814e-05, + "loss": 4.0111, + "step": 8782000 + }, + { + "epoch": 1.15, + "learning_rate": 4.371375373054607e-05, + "loss": 3.9668, + "step": 8782500 + }, + { + "epoch": 1.15, + "learning_rate": 4.3713069252757974e-05, + "loss": 3.9827, + "step": 8783000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3712384743066695e-05, + "loss": 3.9911, + "step": 8783500 + }, + { + "epoch": 1.15, + "learning_rate": 4.371170020147339e-05, + "loss": 4.0026, + "step": 8784000 + }, + { + "epoch": 1.15, + "learning_rate": 4.371101562797925e-05, + "loss": 3.9774, + "step": 8784500 + }, + { + "epoch": 1.15, + "learning_rate": 4.371033102258543e-05, + "loss": 3.9933, + "step": 8785000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3709646385293094e-05, + "loss": 3.983, + "step": 8785500 + }, + { + "epoch": 1.15, + "learning_rate": 4.37089617161034e-05, + "loss": 3.9812, + "step": 8786000 + }, + { + "epoch": 1.15, + "learning_rate": 4.370827701501754e-05, + "loss": 3.9868, + "step": 8786500 + }, + { + "epoch": 1.15, + "learning_rate": 4.370759228203666e-05, + "loss": 3.9999, + "step": 8787000 + }, + { + "epoch": 1.15, + "learning_rate": 4.3706907517161935e-05, + "loss": 4.0006, + "step": 8787500 + }, + { + "epoch": 1.15, + "learning_rate": 4.370622272039454e-05, + "loss": 3.9927, + "step": 8788000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3705537891735624e-05, + "loss": 3.9891, + "step": 8788500 + }, + { + "epoch": 1.16, + "learning_rate": 4.370485303118638e-05, + "loss": 3.9834, + "step": 8789000 + }, + { + "epoch": 1.16, + "learning_rate": 4.370416813874795e-05, + "loss": 3.9751, + "step": 8789500 + }, + { + "epoch": 1.16, + "learning_rate": 4.370348321442152e-05, + "loss": 3.9976, + "step": 8790000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3702798258208235e-05, + "loss": 3.9846, + "step": 8790500 + }, + { + "epoch": 1.16, + "learning_rate": 4.370211327010929e-05, + "loss": 3.9979, + "step": 8791000 + }, + { + "epoch": 1.16, + "learning_rate": 4.370142825012584e-05, + "loss": 3.9775, + "step": 8791500 + }, + { + "epoch": 1.16, + "learning_rate": 4.370074319825905e-05, + "loss": 3.9618, + "step": 8792000 + }, + { + "epoch": 1.16, + "learning_rate": 4.370005811451009e-05, + "loss": 3.9896, + "step": 8792500 + }, + { + "epoch": 1.16, + "learning_rate": 4.369937299888014e-05, + "loss": 3.9812, + "step": 8793000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3698687851370345e-05, + "loss": 3.9854, + "step": 8793500 + }, + { + "epoch": 1.16, + "learning_rate": 4.36980026719819e-05, + "loss": 3.9836, + "step": 8794000 + }, + { + "epoch": 1.16, + "learning_rate": 4.369731746071595e-05, + "loss": 3.9705, + "step": 8794500 + }, + { + "epoch": 1.16, + "learning_rate": 4.369663221757367e-05, + "loss": 3.9991, + "step": 8795000 + }, + { + "epoch": 1.16, + "learning_rate": 4.369594694255623e-05, + "loss": 3.9942, + "step": 8795500 + }, + { + "epoch": 1.16, + "learning_rate": 4.369526163566481e-05, + "loss": 3.9439, + "step": 8796000 + }, + { + "epoch": 1.16, + "learning_rate": 4.369457629690056e-05, + "loss": 3.9858, + "step": 8796500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3693890926264644e-05, + "loss": 3.9832, + "step": 8797000 + }, + { + "epoch": 1.16, + "learning_rate": 4.369320552375826e-05, + "loss": 3.9569, + "step": 8797500 + }, + { + "epoch": 1.16, + "learning_rate": 4.369252008938255e-05, + "loss": 3.9928, + "step": 8798000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3691834623138694e-05, + "loss": 3.9775, + "step": 8798500 + }, + { + "epoch": 1.16, + "learning_rate": 4.369114912502785e-05, + "loss": 3.9751, + "step": 8799000 + }, + { + "epoch": 1.16, + "learning_rate": 4.36904635950512e-05, + "loss": 4.0054, + "step": 8799500 + }, + { + "epoch": 1.16, + "learning_rate": 4.368977803320991e-05, + "loss": 3.9791, + "step": 8800000 + }, + { + "epoch": 1.16, + "learning_rate": 4.368909243950514e-05, + "loss": 3.9819, + "step": 8800500 + }, + { + "epoch": 1.16, + "learning_rate": 4.368840681393807e-05, + "loss": 3.9738, + "step": 8801000 + }, + { + "epoch": 1.16, + "learning_rate": 4.368772115650986e-05, + "loss": 3.9782, + "step": 8801500 + }, + { + "epoch": 1.16, + "learning_rate": 4.368703546722168e-05, + "loss": 3.9597, + "step": 8802000 + }, + { + "epoch": 1.16, + "learning_rate": 4.368634974607471e-05, + "loss": 3.9735, + "step": 8802500 + }, + { + "epoch": 1.16, + "learning_rate": 4.36856639930701e-05, + "loss": 3.9651, + "step": 8803000 + }, + { + "epoch": 1.16, + "learning_rate": 4.368497820820904e-05, + "loss": 3.9907, + "step": 8803500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3684292391492675e-05, + "loss": 3.9911, + "step": 8804000 + }, + { + "epoch": 1.16, + "learning_rate": 4.36836065429222e-05, + "loss": 3.9723, + "step": 8804500 + }, + { + "epoch": 1.16, + "learning_rate": 4.368292066249877e-05, + "loss": 3.9766, + "step": 8805000 + }, + { + "epoch": 1.16, + "learning_rate": 4.368223475022355e-05, + "loss": 3.9776, + "step": 8805500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3681548806097725e-05, + "loss": 3.9873, + "step": 8806000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3680862830122456e-05, + "loss": 3.9805, + "step": 8806500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3680176822298904e-05, + "loss": 3.9859, + "step": 8807000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3679490782628255e-05, + "loss": 3.9824, + "step": 8807500 + }, + { + "epoch": 1.16, + "learning_rate": 4.367880471111167e-05, + "loss": 3.966, + "step": 8808000 + }, + { + "epoch": 1.16, + "learning_rate": 4.367811860775031e-05, + "loss": 3.9599, + "step": 8808500 + }, + { + "epoch": 1.16, + "learning_rate": 4.367743247254535e-05, + "loss": 3.9733, + "step": 8809000 + }, + { + "epoch": 1.16, + "learning_rate": 4.367674630549797e-05, + "loss": 4.004, + "step": 8809500 + }, + { + "epoch": 1.16, + "learning_rate": 4.367606010660934e-05, + "loss": 3.984, + "step": 8810000 + }, + { + "epoch": 1.16, + "learning_rate": 4.367537387588062e-05, + "loss": 3.963, + "step": 8810500 + }, + { + "epoch": 1.16, + "learning_rate": 4.367468761331298e-05, + "loss": 3.9955, + "step": 8811000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3674001318907584e-05, + "loss": 3.9743, + "step": 8811500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3673314992665625e-05, + "loss": 3.9611, + "step": 8812000 + }, + { + "epoch": 1.16, + "learning_rate": 4.367262863458825e-05, + "loss": 3.9688, + "step": 8812500 + }, + { + "epoch": 1.16, + "learning_rate": 4.367194224467665e-05, + "loss": 3.9778, + "step": 8813000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3671255822931966e-05, + "loss": 3.9867, + "step": 8813500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3670569369355395e-05, + "loss": 3.9822, + "step": 8814000 + }, + { + "epoch": 1.16, + "learning_rate": 4.36698828839481e-05, + "loss": 3.9733, + "step": 8814500 + }, + { + "epoch": 1.16, + "learning_rate": 4.366919636671125e-05, + "loss": 3.9774, + "step": 8815000 + }, + { + "epoch": 1.16, + "learning_rate": 4.366850981764601e-05, + "loss": 3.9765, + "step": 8815500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3667823236753554e-05, + "loss": 3.9911, + "step": 8816000 + }, + { + "epoch": 1.16, + "learning_rate": 4.366713662403506e-05, + "loss": 3.9703, + "step": 8816500 + }, + { + "epoch": 1.16, + "learning_rate": 4.366644997949169e-05, + "loss": 4.0031, + "step": 8817000 + }, + { + "epoch": 1.16, + "learning_rate": 4.366576330312461e-05, + "loss": 3.9796, + "step": 8817500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3665076594935e-05, + "loss": 3.994, + "step": 8818000 + }, + { + "epoch": 1.16, + "learning_rate": 4.366438985492403e-05, + "loss": 3.9878, + "step": 8818500 + }, + { + "epoch": 1.16, + "learning_rate": 4.366370308309287e-05, + "loss": 3.9897, + "step": 8819000 + }, + { + "epoch": 1.16, + "learning_rate": 4.366301627944269e-05, + "loss": 3.9655, + "step": 8819500 + }, + { + "epoch": 1.16, + "learning_rate": 4.366232944397467e-05, + "loss": 3.962, + "step": 8820000 + }, + { + "epoch": 1.16, + "learning_rate": 4.366164257668995e-05, + "loss": 3.9616, + "step": 8820500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3660955677589743e-05, + "loss": 3.979, + "step": 8821000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3660268746675195e-05, + "loss": 3.9704, + "step": 8821500 + }, + { + "epoch": 1.16, + "learning_rate": 4.365958178394747e-05, + "loss": 3.9854, + "step": 8822000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3658894789407764e-05, + "loss": 3.9767, + "step": 8822500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3658207763057234e-05, + "loss": 3.9855, + "step": 8823000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3657520704897046e-05, + "loss": 3.9705, + "step": 8823500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3656833614928385e-05, + "loss": 3.9826, + "step": 8824000 + }, + { + "epoch": 1.16, + "learning_rate": 4.365614649315242e-05, + "loss": 3.9881, + "step": 8824500 + }, + { + "epoch": 1.16, + "learning_rate": 4.36554593395703e-05, + "loss": 3.9796, + "step": 8825000 + }, + { + "epoch": 1.16, + "learning_rate": 4.365477215418323e-05, + "loss": 3.9841, + "step": 8825500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3654084936992355e-05, + "loss": 3.9962, + "step": 8826000 + }, + { + "epoch": 1.16, + "learning_rate": 4.365339768799886e-05, + "loss": 3.9631, + "step": 8826500 + }, + { + "epoch": 1.16, + "learning_rate": 4.365271040720393e-05, + "loss": 3.993, + "step": 8827000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3652023094608706e-05, + "loss": 3.9702, + "step": 8827500 + }, + { + "epoch": 1.16, + "learning_rate": 4.365133575021438e-05, + "loss": 3.9829, + "step": 8828000 + }, + { + "epoch": 1.16, + "learning_rate": 4.365064837402211e-05, + "loss": 3.9893, + "step": 8828500 + }, + { + "epoch": 1.16, + "learning_rate": 4.364996096603309e-05, + "loss": 3.9657, + "step": 8829000 + }, + { + "epoch": 1.16, + "learning_rate": 4.364927352624847e-05, + "loss": 3.9631, + "step": 8829500 + }, + { + "epoch": 1.16, + "learning_rate": 4.364858605466943e-05, + "loss": 3.9845, + "step": 8830000 + }, + { + "epoch": 1.16, + "learning_rate": 4.364789855129714e-05, + "loss": 3.9666, + "step": 8830500 + }, + { + "epoch": 1.16, + "learning_rate": 4.364721101613278e-05, + "loss": 3.9656, + "step": 8831000 + }, + { + "epoch": 1.16, + "learning_rate": 4.364652344917751e-05, + "loss": 3.9801, + "step": 8831500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3645835850432515e-05, + "loss": 3.955, + "step": 8832000 + }, + { + "epoch": 1.16, + "learning_rate": 4.364514821989896e-05, + "loss": 3.9932, + "step": 8832500 + }, + { + "epoch": 1.16, + "learning_rate": 4.364446055757802e-05, + "loss": 3.9635, + "step": 8833000 + }, + { + "epoch": 1.16, + "learning_rate": 4.364377286347087e-05, + "loss": 3.9885, + "step": 8833500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3643085137578664e-05, + "loss": 3.9721, + "step": 8834000 + }, + { + "epoch": 1.16, + "learning_rate": 4.36423973799026e-05, + "loss": 3.985, + "step": 8834500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3641709590443835e-05, + "loss": 3.9629, + "step": 8835000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3641021769203544e-05, + "loss": 3.9926, + "step": 8835500 + }, + { + "epoch": 1.16, + "learning_rate": 4.364033391618291e-05, + "loss": 3.9852, + "step": 8836000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3639646031383086e-05, + "loss": 3.9813, + "step": 8836500 + }, + { + "epoch": 1.16, + "learning_rate": 4.363895811480526e-05, + "loss": 4.0005, + "step": 8837000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3638270166450604e-05, + "loss": 3.9866, + "step": 8837500 + }, + { + "epoch": 1.16, + "learning_rate": 4.363758218632028e-05, + "loss": 3.9807, + "step": 8838000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3636894174415474e-05, + "loss": 3.9925, + "step": 8838500 + }, + { + "epoch": 1.16, + "learning_rate": 4.363620613073736e-05, + "loss": 3.9815, + "step": 8839000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3635518055287096e-05, + "loss": 3.9753, + "step": 8839500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3634829948065855e-05, + "loss": 3.9819, + "step": 8840000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3634141809074834e-05, + "loss": 4.002, + "step": 8840500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3633453638315186e-05, + "loss": 4.0025, + "step": 8841000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3632765435788095e-05, + "loss": 3.9618, + "step": 8841500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3632077201494714e-05, + "loss": 3.9659, + "step": 8842000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3631388935436235e-05, + "loss": 3.9667, + "step": 8842500 + }, + { + "epoch": 1.16, + "learning_rate": 4.363070063761384e-05, + "loss": 3.9722, + "step": 8843000 + }, + { + "epoch": 1.16, + "learning_rate": 4.363001230802868e-05, + "loss": 3.9916, + "step": 8843500 + }, + { + "epoch": 1.16, + "learning_rate": 4.362932394668193e-05, + "loss": 3.9761, + "step": 8844000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3628635553574785e-05, + "loss": 3.9866, + "step": 8844500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3627947128708396e-05, + "loss": 3.9617, + "step": 8845000 + }, + { + "epoch": 1.16, + "learning_rate": 4.362725867208395e-05, + "loss": 3.96, + "step": 8845500 + }, + { + "epoch": 1.16, + "learning_rate": 4.362657018370262e-05, + "loss": 3.9752, + "step": 8846000 + }, + { + "epoch": 1.16, + "learning_rate": 4.362588166356557e-05, + "loss": 3.9768, + "step": 8846500 + }, + { + "epoch": 1.16, + "learning_rate": 4.362519311167398e-05, + "loss": 3.9839, + "step": 8847000 + }, + { + "epoch": 1.16, + "learning_rate": 4.362450452802903e-05, + "loss": 3.9709, + "step": 8847500 + }, + { + "epoch": 1.16, + "learning_rate": 4.362381591263188e-05, + "loss": 3.9695, + "step": 8848000 + }, + { + "epoch": 1.16, + "learning_rate": 4.362312726548372e-05, + "loss": 3.9869, + "step": 8848500 + }, + { + "epoch": 1.16, + "learning_rate": 4.362243858658571e-05, + "loss": 3.9913, + "step": 8849000 + }, + { + "epoch": 1.16, + "learning_rate": 4.362174987593904e-05, + "loss": 3.9628, + "step": 8849500 + }, + { + "epoch": 1.16, + "learning_rate": 4.362106113354486e-05, + "loss": 3.9752, + "step": 8850000 + }, + { + "epoch": 1.16, + "learning_rate": 4.362037235940437e-05, + "loss": 3.9783, + "step": 8850500 + }, + { + "epoch": 1.16, + "learning_rate": 4.361968355351873e-05, + "loss": 3.9836, + "step": 8851000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3618994715889113e-05, + "loss": 3.9718, + "step": 8851500 + }, + { + "epoch": 1.16, + "learning_rate": 4.361830584651671e-05, + "loss": 3.9662, + "step": 8852000 + }, + { + "epoch": 1.16, + "learning_rate": 4.361761694540266e-05, + "loss": 3.9695, + "step": 8852500 + }, + { + "epoch": 1.16, + "learning_rate": 4.361692801254817e-05, + "loss": 3.9687, + "step": 8853000 + }, + { + "epoch": 1.16, + "learning_rate": 4.361623904795442e-05, + "loss": 3.9638, + "step": 8853500 + }, + { + "epoch": 1.16, + "learning_rate": 4.361555005162256e-05, + "loss": 3.9711, + "step": 8854000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3614861023553765e-05, + "loss": 3.9791, + "step": 8854500 + }, + { + "epoch": 1.16, + "learning_rate": 4.361417196374923e-05, + "loss": 3.9677, + "step": 8855000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3613482872210116e-05, + "loss": 3.9713, + "step": 8855500 + }, + { + "epoch": 1.16, + "learning_rate": 4.36127937489376e-05, + "loss": 3.9674, + "step": 8856000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3612104593932855e-05, + "loss": 3.963, + "step": 8856500 + }, + { + "epoch": 1.16, + "learning_rate": 4.361141540719706e-05, + "loss": 3.9747, + "step": 8857000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3610726188731385e-05, + "loss": 3.9753, + "step": 8857500 + }, + { + "epoch": 1.16, + "learning_rate": 4.361003693853701e-05, + "loss": 3.9716, + "step": 8858000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3609347656615114e-05, + "loss": 3.9815, + "step": 8858500 + }, + { + "epoch": 1.16, + "learning_rate": 4.360865834296686e-05, + "loss": 3.983, + "step": 8859000 + }, + { + "epoch": 1.16, + "learning_rate": 4.360796899759343e-05, + "loss": 3.9798, + "step": 8859500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3607279620496e-05, + "loss": 3.9629, + "step": 8860000 + }, + { + "epoch": 1.16, + "learning_rate": 4.3606590211675745e-05, + "loss": 3.9812, + "step": 8860500 + }, + { + "epoch": 1.16, + "learning_rate": 4.360590077113384e-05, + "loss": 3.9728, + "step": 8861000 + }, + { + "epoch": 1.16, + "learning_rate": 4.360521129887146e-05, + "loss": 3.9837, + "step": 8861500 + }, + { + "epoch": 1.16, + "learning_rate": 4.360452179488977e-05, + "loss": 3.9733, + "step": 8862000 + }, + { + "epoch": 1.16, + "learning_rate": 4.360383225918997e-05, + "loss": 3.9826, + "step": 8862500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3603142691773215e-05, + "loss": 3.972, + "step": 8863000 + }, + { + "epoch": 1.16, + "learning_rate": 4.360245309264069e-05, + "loss": 3.9882, + "step": 8863500 + }, + { + "epoch": 1.16, + "learning_rate": 4.3601763461793566e-05, + "loss": 3.9738, + "step": 8864000 + }, + { + "epoch": 1.17, + "learning_rate": 4.360107379923302e-05, + "loss": 3.9905, + "step": 8864500 + }, + { + "epoch": 1.17, + "learning_rate": 4.360038410496022e-05, + "loss": 3.9671, + "step": 8865000 + }, + { + "epoch": 1.17, + "learning_rate": 4.359969437897636e-05, + "loss": 3.9516, + "step": 8865500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3599004621282605e-05, + "loss": 3.961, + "step": 8866000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3598314831880135e-05, + "loss": 3.9736, + "step": 8866500 + }, + { + "epoch": 1.17, + "learning_rate": 4.359762501077012e-05, + "loss": 3.975, + "step": 8867000 + }, + { + "epoch": 1.17, + "learning_rate": 4.359693515795373e-05, + "loss": 3.9954, + "step": 8867500 + }, + { + "epoch": 1.17, + "learning_rate": 4.359624527343216e-05, + "loss": 3.9642, + "step": 8868000 + }, + { + "epoch": 1.17, + "learning_rate": 4.359555535720658e-05, + "loss": 3.9994, + "step": 8868500 + }, + { + "epoch": 1.17, + "learning_rate": 4.359486540927815e-05, + "loss": 3.9743, + "step": 8869000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3594175429648066e-05, + "loss": 3.9743, + "step": 8869500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3593485418317495e-05, + "loss": 3.963, + "step": 8870000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3592795375287624e-05, + "loss": 3.9547, + "step": 8870500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3592105300559605e-05, + "loss": 3.9598, + "step": 8871000 + }, + { + "epoch": 1.17, + "learning_rate": 4.359141519413464e-05, + "loss": 3.9632, + "step": 8871500 + }, + { + "epoch": 1.17, + "learning_rate": 4.359072505601389e-05, + "loss": 3.9643, + "step": 8872000 + }, + { + "epoch": 1.17, + "learning_rate": 4.359003488619854e-05, + "loss": 3.9799, + "step": 8872500 + }, + { + "epoch": 1.17, + "learning_rate": 4.358934468468977e-05, + "loss": 3.9834, + "step": 8873000 + }, + { + "epoch": 1.17, + "learning_rate": 4.358865445148874e-05, + "loss": 3.9793, + "step": 8873500 + }, + { + "epoch": 1.17, + "learning_rate": 4.358796418659664e-05, + "loss": 3.9793, + "step": 8874000 + }, + { + "epoch": 1.17, + "learning_rate": 4.358727389001465e-05, + "loss": 3.9753, + "step": 8874500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3586583561743943e-05, + "loss": 3.9742, + "step": 8875000 + }, + { + "epoch": 1.17, + "learning_rate": 4.358589320178569e-05, + "loss": 3.9893, + "step": 8875500 + }, + { + "epoch": 1.17, + "learning_rate": 4.358520281014107e-05, + "loss": 3.965, + "step": 8876000 + }, + { + "epoch": 1.17, + "learning_rate": 4.358451238681126e-05, + "loss": 3.9826, + "step": 8876500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3583821931797444e-05, + "loss": 3.9782, + "step": 8877000 + }, + { + "epoch": 1.17, + "learning_rate": 4.358313144510079e-05, + "loss": 3.9601, + "step": 8877500 + }, + { + "epoch": 1.17, + "learning_rate": 4.358244092672248e-05, + "loss": 3.9732, + "step": 8878000 + }, + { + "epoch": 1.17, + "learning_rate": 4.358175037666369e-05, + "loss": 3.9816, + "step": 8878500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3581059794925604e-05, + "loss": 3.9743, + "step": 8879000 + }, + { + "epoch": 1.17, + "learning_rate": 4.358036918150939e-05, + "loss": 3.9781, + "step": 8879500 + }, + { + "epoch": 1.17, + "learning_rate": 4.357967853641623e-05, + "loss": 3.9845, + "step": 8880000 + }, + { + "epoch": 1.17, + "learning_rate": 4.35789878596473e-05, + "loss": 4.0015, + "step": 8880500 + }, + { + "epoch": 1.17, + "learning_rate": 4.357829715120377e-05, + "loss": 3.9742, + "step": 8881000 + }, + { + "epoch": 1.17, + "learning_rate": 4.357760641108684e-05, + "loss": 3.9849, + "step": 8881500 + }, + { + "epoch": 1.17, + "learning_rate": 4.357691563929766e-05, + "loss": 3.9849, + "step": 8882000 + }, + { + "epoch": 1.17, + "learning_rate": 4.357622483583742e-05, + "loss": 3.9689, + "step": 8882500 + }, + { + "epoch": 1.17, + "learning_rate": 4.357553400070731e-05, + "loss": 3.9774, + "step": 8883000 + }, + { + "epoch": 1.17, + "learning_rate": 4.357484313390848e-05, + "loss": 3.9658, + "step": 8883500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3574152235442134e-05, + "loss": 3.9914, + "step": 8884000 + }, + { + "epoch": 1.17, + "learning_rate": 4.357346130530944e-05, + "loss": 3.9615, + "step": 8884500 + }, + { + "epoch": 1.17, + "learning_rate": 4.357277034351158e-05, + "loss": 3.9903, + "step": 8885000 + }, + { + "epoch": 1.17, + "learning_rate": 4.357207935004971e-05, + "loss": 3.9728, + "step": 8885500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3571388324925045e-05, + "loss": 3.9613, + "step": 8886000 + }, + { + "epoch": 1.17, + "learning_rate": 4.357069726813873e-05, + "loss": 3.9884, + "step": 8886500 + }, + { + "epoch": 1.17, + "learning_rate": 4.357000617969197e-05, + "loss": 4.0045, + "step": 8887000 + }, + { + "epoch": 1.17, + "learning_rate": 4.356931505958592e-05, + "loss": 3.986, + "step": 8887500 + }, + { + "epoch": 1.17, + "learning_rate": 4.356862390782177e-05, + "loss": 3.9841, + "step": 8888000 + }, + { + "epoch": 1.17, + "learning_rate": 4.35679327244007e-05, + "loss": 3.9844, + "step": 8888500 + }, + { + "epoch": 1.17, + "learning_rate": 4.356724150932388e-05, + "loss": 3.9724, + "step": 8889000 + }, + { + "epoch": 1.17, + "learning_rate": 4.35665502625925e-05, + "loss": 3.9942, + "step": 8889500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3565858984207725e-05, + "loss": 3.9739, + "step": 8890000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3565167674170745e-05, + "loss": 3.9846, + "step": 8890500 + }, + { + "epoch": 1.17, + "learning_rate": 4.356447633248273e-05, + "loss": 3.9652, + "step": 8891000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3563784959144874e-05, + "loss": 3.9686, + "step": 8891500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3563093554158326e-05, + "loss": 3.9505, + "step": 8892000 + }, + { + "epoch": 1.17, + "learning_rate": 4.35624021175243e-05, + "loss": 3.9775, + "step": 8892500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3561710649243945e-05, + "loss": 3.9721, + "step": 8893000 + }, + { + "epoch": 1.17, + "learning_rate": 4.356101914931846e-05, + "loss": 3.9753, + "step": 8893500 + }, + { + "epoch": 1.17, + "learning_rate": 4.356032761774902e-05, + "loss": 3.9861, + "step": 8894000 + }, + { + "epoch": 1.17, + "learning_rate": 4.35596360545368e-05, + "loss": 3.9781, + "step": 8894500 + }, + { + "epoch": 1.17, + "learning_rate": 4.355894445968297e-05, + "loss": 3.9682, + "step": 8895000 + }, + { + "epoch": 1.17, + "learning_rate": 4.355825283318873e-05, + "loss": 3.9857, + "step": 8895500 + }, + { + "epoch": 1.17, + "learning_rate": 4.355756117505525e-05, + "loss": 3.9674, + "step": 8896000 + }, + { + "epoch": 1.17, + "learning_rate": 4.355686948528369e-05, + "loss": 3.9741, + "step": 8896500 + }, + { + "epoch": 1.17, + "learning_rate": 4.355617776387526e-05, + "loss": 3.9511, + "step": 8897000 + }, + { + "epoch": 1.17, + "learning_rate": 4.355548601083113e-05, + "loss": 3.9672, + "step": 8897500 + }, + { + "epoch": 1.17, + "learning_rate": 4.355479422615246e-05, + "loss": 3.9739, + "step": 8898000 + }, + { + "epoch": 1.17, + "learning_rate": 4.355410240984045e-05, + "loss": 3.973, + "step": 8898500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3553410561896276e-05, + "loss": 3.9719, + "step": 8899000 + }, + { + "epoch": 1.17, + "learning_rate": 4.355271868232112e-05, + "loss": 3.9621, + "step": 8899500 + }, + { + "epoch": 1.17, + "learning_rate": 4.355202677111615e-05, + "loss": 3.9671, + "step": 8900000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3551334828282556e-05, + "loss": 3.9597, + "step": 8900500 + }, + { + "epoch": 1.17, + "learning_rate": 4.355064285382151e-05, + "loss": 3.9588, + "step": 8901000 + }, + { + "epoch": 1.17, + "learning_rate": 4.35499508477342e-05, + "loss": 3.9554, + "step": 8901500 + }, + { + "epoch": 1.17, + "learning_rate": 4.35492588100218e-05, + "loss": 3.9634, + "step": 8902000 + }, + { + "epoch": 1.17, + "learning_rate": 4.354856674068549e-05, + "loss": 3.9576, + "step": 8902500 + }, + { + "epoch": 1.17, + "learning_rate": 4.354787463972646e-05, + "loss": 3.9708, + "step": 8903000 + }, + { + "epoch": 1.17, + "learning_rate": 4.354718250714587e-05, + "loss": 3.9644, + "step": 8903500 + }, + { + "epoch": 1.17, + "learning_rate": 4.354649034294492e-05, + "loss": 3.9703, + "step": 8904000 + }, + { + "epoch": 1.17, + "learning_rate": 4.354579814712477e-05, + "loss": 3.9626, + "step": 8904500 + }, + { + "epoch": 1.17, + "learning_rate": 4.354510591968663e-05, + "loss": 3.9612, + "step": 8905000 + }, + { + "epoch": 1.17, + "learning_rate": 4.354441366063164e-05, + "loss": 3.9538, + "step": 8905500 + }, + { + "epoch": 1.17, + "learning_rate": 4.354372136996102e-05, + "loss": 3.9645, + "step": 8906000 + }, + { + "epoch": 1.17, + "learning_rate": 4.354302904767592e-05, + "loss": 3.9486, + "step": 8906500 + }, + { + "epoch": 1.17, + "learning_rate": 4.354233669377754e-05, + "loss": 3.9762, + "step": 8907000 + }, + { + "epoch": 1.17, + "learning_rate": 4.354164430826705e-05, + "loss": 3.9599, + "step": 8907500 + }, + { + "epoch": 1.17, + "learning_rate": 4.354095189114563e-05, + "loss": 3.977, + "step": 8908000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3540259442414465e-05, + "loss": 3.9743, + "step": 8908500 + }, + { + "epoch": 1.17, + "learning_rate": 4.353956696207474e-05, + "loss": 3.9719, + "step": 8909000 + }, + { + "epoch": 1.17, + "learning_rate": 4.353887445012762e-05, + "loss": 3.9622, + "step": 8909500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3538181906574306e-05, + "loss": 3.9691, + "step": 8910000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3537489331415957e-05, + "loss": 3.9587, + "step": 8910500 + }, + { + "epoch": 1.17, + "learning_rate": 4.353679672465377e-05, + "loss": 3.9726, + "step": 8911000 + }, + { + "epoch": 1.17, + "learning_rate": 4.353610408628893e-05, + "loss": 3.9606, + "step": 8911500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3535411416322596e-05, + "loss": 3.9888, + "step": 8912000 + }, + { + "epoch": 1.17, + "learning_rate": 4.353471871475597e-05, + "loss": 3.9918, + "step": 8912500 + }, + { + "epoch": 1.17, + "learning_rate": 4.353402598159022e-05, + "loss": 3.9631, + "step": 8913000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3533333216826525e-05, + "loss": 3.9815, + "step": 8913500 + }, + { + "epoch": 1.17, + "learning_rate": 4.353264042046608e-05, + "loss": 3.9622, + "step": 8914000 + }, + { + "epoch": 1.17, + "learning_rate": 4.353194759251006e-05, + "loss": 3.9668, + "step": 8914500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3531254732959634e-05, + "loss": 3.9899, + "step": 8915000 + }, + { + "epoch": 1.17, + "learning_rate": 4.353056184181601e-05, + "loss": 3.9776, + "step": 8915500 + }, + { + "epoch": 1.17, + "learning_rate": 4.352986891908034e-05, + "loss": 3.975, + "step": 8916000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3529175964753825e-05, + "loss": 3.9698, + "step": 8916500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3528482978837636e-05, + "loss": 3.9711, + "step": 8917000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3527789961332966e-05, + "loss": 3.9685, + "step": 8917500 + }, + { + "epoch": 1.17, + "learning_rate": 4.352709691224098e-05, + "loss": 3.9764, + "step": 8918000 + }, + { + "epoch": 1.17, + "learning_rate": 4.352640383156288e-05, + "loss": 3.9707, + "step": 8918500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3525710719299826e-05, + "loss": 3.9657, + "step": 8919000 + }, + { + "epoch": 1.17, + "learning_rate": 4.352501757545301e-05, + "loss": 3.9564, + "step": 8919500 + }, + { + "epoch": 1.17, + "learning_rate": 4.352432440002361e-05, + "loss": 3.9643, + "step": 8920000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3523631193012824e-05, + "loss": 3.9633, + "step": 8920500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3522937954421815e-05, + "loss": 3.9494, + "step": 8921000 + }, + { + "epoch": 1.17, + "learning_rate": 4.352224468425177e-05, + "loss": 3.9863, + "step": 8921500 + }, + { + "epoch": 1.17, + "learning_rate": 4.352155138250387e-05, + "loss": 3.9632, + "step": 8922000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3520858049179306e-05, + "loss": 3.9863, + "step": 8922500 + }, + { + "epoch": 1.17, + "learning_rate": 4.352016468427924e-05, + "loss": 3.976, + "step": 8923000 + }, + { + "epoch": 1.17, + "learning_rate": 4.351947128780488e-05, + "loss": 3.9792, + "step": 8923500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3518777859757386e-05, + "loss": 3.9583, + "step": 8924000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3518084400137946e-05, + "loss": 3.9747, + "step": 8924500 + }, + { + "epoch": 1.17, + "learning_rate": 4.351739090894776e-05, + "loss": 3.9598, + "step": 8925000 + }, + { + "epoch": 1.17, + "learning_rate": 4.351669738618799e-05, + "loss": 3.9739, + "step": 8925500 + }, + { + "epoch": 1.17, + "learning_rate": 4.351600383185982e-05, + "loss": 3.9735, + "step": 8926000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3515310245964437e-05, + "loss": 3.9753, + "step": 8926500 + }, + { + "epoch": 1.17, + "learning_rate": 4.351461662850302e-05, + "loss": 3.9512, + "step": 8927000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3513922979476764e-05, + "loss": 3.9847, + "step": 8927500 + }, + { + "epoch": 1.17, + "learning_rate": 4.351322929888684e-05, + "loss": 3.9586, + "step": 8928000 + }, + { + "epoch": 1.17, + "learning_rate": 4.351253558673443e-05, + "loss": 3.954, + "step": 8928500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3511841843020726e-05, + "loss": 3.9708, + "step": 8929000 + }, + { + "epoch": 1.17, + "learning_rate": 4.351114806774689e-05, + "loss": 3.9578, + "step": 8929500 + }, + { + "epoch": 1.17, + "learning_rate": 4.351045426091414e-05, + "loss": 3.9643, + "step": 8930000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3509760422523614e-05, + "loss": 3.9497, + "step": 8930500 + }, + { + "epoch": 1.17, + "learning_rate": 4.350906655257653e-05, + "loss": 3.9612, + "step": 8931000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3508372651074065e-05, + "loss": 3.967, + "step": 8931500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3507678718017385e-05, + "loss": 3.9602, + "step": 8932000 + }, + { + "epoch": 1.17, + "learning_rate": 4.35069847534077e-05, + "loss": 3.9886, + "step": 8932500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3506290757246174e-05, + "loss": 3.9642, + "step": 8933000 + }, + { + "epoch": 1.17, + "learning_rate": 4.350559672953398e-05, + "loss": 3.9782, + "step": 8933500 + }, + { + "epoch": 1.17, + "learning_rate": 4.350490267027233e-05, + "loss": 3.9624, + "step": 8934000 + }, + { + "epoch": 1.17, + "learning_rate": 4.350420857946238e-05, + "loss": 3.9866, + "step": 8934500 + }, + { + "epoch": 1.17, + "learning_rate": 4.350351445710534e-05, + "loss": 3.9691, + "step": 8935000 + }, + { + "epoch": 1.17, + "learning_rate": 4.350282030320237e-05, + "loss": 3.9791, + "step": 8935500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3502126117754665e-05, + "loss": 3.9886, + "step": 8936000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3501431900763406e-05, + "loss": 3.9391, + "step": 8936500 + }, + { + "epoch": 1.17, + "learning_rate": 4.350073765222978e-05, + "loss": 3.9858, + "step": 8937000 + }, + { + "epoch": 1.17, + "learning_rate": 4.350004337215496e-05, + "loss": 3.9818, + "step": 8937500 + }, + { + "epoch": 1.17, + "learning_rate": 4.349934906054015e-05, + "loss": 3.9752, + "step": 8938000 + }, + { + "epoch": 1.17, + "learning_rate": 4.34986547173865e-05, + "loss": 3.956, + "step": 8938500 + }, + { + "epoch": 1.17, + "learning_rate": 4.349796034269523e-05, + "loss": 3.9607, + "step": 8939000 + }, + { + "epoch": 1.17, + "learning_rate": 4.3497265936467516e-05, + "loss": 3.9986, + "step": 8939500 + }, + { + "epoch": 1.17, + "learning_rate": 4.349657149870452e-05, + "loss": 3.9432, + "step": 8940000 + }, + { + "epoch": 1.18, + "learning_rate": 4.349587702940744e-05, + "loss": 3.9663, + "step": 8940500 + }, + { + "epoch": 1.18, + "learning_rate": 4.349518252857746e-05, + "loss": 3.9601, + "step": 8941000 + }, + { + "epoch": 1.18, + "learning_rate": 4.349448799621577e-05, + "loss": 3.9578, + "step": 8941500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3493793432323544e-05, + "loss": 3.9768, + "step": 8942000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3493098836901966e-05, + "loss": 3.9669, + "step": 8942500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3492404209952235e-05, + "loss": 3.9858, + "step": 8943000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3491709551475525e-05, + "loss": 3.9753, + "step": 8943500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3491014861473015e-05, + "loss": 3.9732, + "step": 8944000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3490320139945897e-05, + "loss": 3.9916, + "step": 8944500 + }, + { + "epoch": 1.18, + "learning_rate": 4.348962538689534e-05, + "loss": 3.9589, + "step": 8945000 + }, + { + "epoch": 1.18, + "learning_rate": 4.348893060232255e-05, + "loss": 3.9634, + "step": 8945500 + }, + { + "epoch": 1.18, + "learning_rate": 4.348823578622871e-05, + "loss": 3.9864, + "step": 8946000 + }, + { + "epoch": 1.18, + "learning_rate": 4.348754093861499e-05, + "loss": 3.966, + "step": 8946500 + }, + { + "epoch": 1.18, + "learning_rate": 4.348684605948258e-05, + "loss": 3.974, + "step": 8947000 + }, + { + "epoch": 1.18, + "learning_rate": 4.348615114883268e-05, + "loss": 3.9907, + "step": 8947500 + }, + { + "epoch": 1.18, + "learning_rate": 4.348545620666644e-05, + "loss": 3.9787, + "step": 8948000 + }, + { + "epoch": 1.18, + "learning_rate": 4.348476123298508e-05, + "loss": 3.9798, + "step": 8948500 + }, + { + "epoch": 1.18, + "learning_rate": 4.348406622778977e-05, + "loss": 3.9661, + "step": 8949000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3483371191081693e-05, + "loss": 3.979, + "step": 8949500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3482676122862045e-05, + "loss": 3.9508, + "step": 8950000 + }, + { + "epoch": 1.18, + "learning_rate": 4.348198102313199e-05, + "loss": 3.9547, + "step": 8950500 + }, + { + "epoch": 1.18, + "learning_rate": 4.348128589189273e-05, + "loss": 3.9501, + "step": 8951000 + }, + { + "epoch": 1.18, + "learning_rate": 4.348059072914545e-05, + "loss": 3.9715, + "step": 8951500 + }, + { + "epoch": 1.18, + "learning_rate": 4.347989553489132e-05, + "loss": 3.9733, + "step": 8952000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3479200309131554e-05, + "loss": 3.9656, + "step": 8952500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3478505051867304e-05, + "loss": 3.9963, + "step": 8953000 + }, + { + "epoch": 1.18, + "learning_rate": 4.347780976309978e-05, + "loss": 3.9636, + "step": 8953500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3477114442830155e-05, + "loss": 3.9606, + "step": 8954000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3476419091059614e-05, + "loss": 3.9635, + "step": 8954500 + }, + { + "epoch": 1.18, + "learning_rate": 4.347572370778935e-05, + "loss": 3.9744, + "step": 8955000 + }, + { + "epoch": 1.18, + "learning_rate": 4.347502829302055e-05, + "loss": 3.9725, + "step": 8955500 + }, + { + "epoch": 1.18, + "learning_rate": 4.347433284675439e-05, + "loss": 3.979, + "step": 8956000 + }, + { + "epoch": 1.18, + "learning_rate": 4.347363736899206e-05, + "loss": 3.9792, + "step": 8956500 + }, + { + "epoch": 1.18, + "learning_rate": 4.347294185973474e-05, + "loss": 3.973, + "step": 8957000 + }, + { + "epoch": 1.18, + "learning_rate": 4.347224631898362e-05, + "loss": 3.9863, + "step": 8957500 + }, + { + "epoch": 1.18, + "learning_rate": 4.34715507467399e-05, + "loss": 3.9802, + "step": 8958000 + }, + { + "epoch": 1.18, + "learning_rate": 4.347085514300474e-05, + "loss": 3.986, + "step": 8958500 + }, + { + "epoch": 1.18, + "learning_rate": 4.347015950777935e-05, + "loss": 3.9551, + "step": 8959000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3469463841064895e-05, + "loss": 3.9546, + "step": 8959500 + }, + { + "epoch": 1.18, + "learning_rate": 4.346876814286258e-05, + "loss": 3.9652, + "step": 8960000 + }, + { + "epoch": 1.18, + "learning_rate": 4.346807241317358e-05, + "loss": 3.9436, + "step": 8960500 + }, + { + "epoch": 1.18, + "learning_rate": 4.346737665199907e-05, + "loss": 3.9657, + "step": 8961000 + }, + { + "epoch": 1.18, + "learning_rate": 4.346668085934026e-05, + "loss": 3.988, + "step": 8961500 + }, + { + "epoch": 1.18, + "learning_rate": 4.346598503519833e-05, + "loss": 3.9757, + "step": 8962000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3465289179574456e-05, + "loss": 3.964, + "step": 8962500 + }, + { + "epoch": 1.18, + "learning_rate": 4.346459329246983e-05, + "loss": 3.9442, + "step": 8963000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3463897373885636e-05, + "loss": 3.982, + "step": 8963500 + }, + { + "epoch": 1.18, + "learning_rate": 4.346320142382306e-05, + "loss": 3.9631, + "step": 8964000 + }, + { + "epoch": 1.18, + "learning_rate": 4.346250544228331e-05, + "loss": 3.9816, + "step": 8964500 + }, + { + "epoch": 1.18, + "learning_rate": 4.346180942926754e-05, + "loss": 3.9713, + "step": 8965000 + }, + { + "epoch": 1.18, + "learning_rate": 4.346111338477695e-05, + "loss": 3.9701, + "step": 8965500 + }, + { + "epoch": 1.18, + "learning_rate": 4.346041730881273e-05, + "loss": 3.9667, + "step": 8966000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3459721201376056e-05, + "loss": 3.9714, + "step": 8966500 + }, + { + "epoch": 1.18, + "learning_rate": 4.345902506246813e-05, + "loss": 3.9519, + "step": 8967000 + }, + { + "epoch": 1.18, + "learning_rate": 4.345832889209014e-05, + "loss": 3.9761, + "step": 8967500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3457632690243255e-05, + "loss": 3.9818, + "step": 8968000 + }, + { + "epoch": 1.18, + "learning_rate": 4.345693645692866e-05, + "loss": 3.9833, + "step": 8968500 + }, + { + "epoch": 1.18, + "learning_rate": 4.345624019214757e-05, + "loss": 3.9729, + "step": 8969000 + }, + { + "epoch": 1.18, + "learning_rate": 4.345554389590115e-05, + "loss": 3.9698, + "step": 8969500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3454847568190593e-05, + "loss": 3.9694, + "step": 8970000 + }, + { + "epoch": 1.18, + "learning_rate": 4.345415120901709e-05, + "loss": 3.9654, + "step": 8970500 + }, + { + "epoch": 1.18, + "learning_rate": 4.345345481838182e-05, + "loss": 3.9703, + "step": 8971000 + }, + { + "epoch": 1.18, + "learning_rate": 4.345275839628597e-05, + "loss": 3.9665, + "step": 8971500 + }, + { + "epoch": 1.18, + "learning_rate": 4.345206194273074e-05, + "loss": 3.9791, + "step": 8972000 + }, + { + "epoch": 1.18, + "learning_rate": 4.34513654577173e-05, + "loss": 3.9571, + "step": 8972500 + }, + { + "epoch": 1.18, + "learning_rate": 4.345066894124685e-05, + "loss": 3.9856, + "step": 8973000 + }, + { + "epoch": 1.18, + "learning_rate": 4.344997239332057e-05, + "loss": 3.9573, + "step": 8973500 + }, + { + "epoch": 1.18, + "learning_rate": 4.344927581393966e-05, + "loss": 3.9717, + "step": 8974000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3448579203105286e-05, + "loss": 3.9784, + "step": 8974500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3447882560818656e-05, + "loss": 3.9853, + "step": 8975000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3447185887080955e-05, + "loss": 3.9801, + "step": 8975500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3446489181893354e-05, + "loss": 3.9862, + "step": 8976000 + }, + { + "epoch": 1.18, + "learning_rate": 4.344579244525706e-05, + "loss": 3.9632, + "step": 8976500 + }, + { + "epoch": 1.18, + "learning_rate": 4.344509567717325e-05, + "loss": 3.9781, + "step": 8977000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3444398877643114e-05, + "loss": 3.9771, + "step": 8977500 + }, + { + "epoch": 1.18, + "learning_rate": 4.344370204666785e-05, + "loss": 3.9705, + "step": 8978000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3443005184248624e-05, + "loss": 3.9742, + "step": 8978500 + }, + { + "epoch": 1.18, + "learning_rate": 4.344230829038665e-05, + "loss": 3.9838, + "step": 8979000 + }, + { + "epoch": 1.18, + "learning_rate": 4.344161136508309e-05, + "loss": 3.9771, + "step": 8979500 + }, + { + "epoch": 1.18, + "learning_rate": 4.344091440833915e-05, + "loss": 3.9625, + "step": 8980000 + }, + { + "epoch": 1.18, + "learning_rate": 4.344021742015602e-05, + "loss": 3.9786, + "step": 8980500 + }, + { + "epoch": 1.18, + "learning_rate": 4.343952040053488e-05, + "loss": 3.9827, + "step": 8981000 + }, + { + "epoch": 1.18, + "learning_rate": 4.343882334947692e-05, + "loss": 3.9505, + "step": 8981500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3438126266983326e-05, + "loss": 3.9583, + "step": 8982000 + }, + { + "epoch": 1.18, + "learning_rate": 4.343742915305529e-05, + "loss": 4.0005, + "step": 8982500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3436732007694e-05, + "loss": 3.9588, + "step": 8983000 + }, + { + "epoch": 1.18, + "learning_rate": 4.343603483090064e-05, + "loss": 3.9804, + "step": 8983500 + }, + { + "epoch": 1.18, + "learning_rate": 4.343533762267641e-05, + "loss": 3.9768, + "step": 8984000 + }, + { + "epoch": 1.18, + "learning_rate": 4.343464038302249e-05, + "loss": 3.9567, + "step": 8984500 + }, + { + "epoch": 1.18, + "learning_rate": 4.343394311194007e-05, + "loss": 3.9488, + "step": 8985000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3433245809430335e-05, + "loss": 3.9793, + "step": 8985500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3432548475494474e-05, + "loss": 3.9613, + "step": 8986000 + }, + { + "epoch": 1.18, + "learning_rate": 4.343185111013369e-05, + "loss": 3.9753, + "step": 8986500 + }, + { + "epoch": 1.18, + "learning_rate": 4.343115371334915e-05, + "loss": 3.9699, + "step": 8987000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3430456285142064e-05, + "loss": 3.9551, + "step": 8987500 + }, + { + "epoch": 1.18, + "learning_rate": 4.342975882551361e-05, + "loss": 3.9688, + "step": 8988000 + }, + { + "epoch": 1.18, + "learning_rate": 4.342906133446497e-05, + "loss": 3.9638, + "step": 8988500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3428363811997345e-05, + "loss": 3.9863, + "step": 8989000 + }, + { + "epoch": 1.18, + "learning_rate": 4.342766625811193e-05, + "loss": 3.9623, + "step": 8989500 + }, + { + "epoch": 1.18, + "learning_rate": 4.342696867280989e-05, + "loss": 3.9846, + "step": 8990000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3426271056092435e-05, + "loss": 3.9685, + "step": 8990500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3425573407960753e-05, + "loss": 3.9957, + "step": 8991000 + }, + { + "epoch": 1.18, + "learning_rate": 4.342487572841603e-05, + "loss": 3.9803, + "step": 8991500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3424178017459446e-05, + "loss": 3.9788, + "step": 8992000 + }, + { + "epoch": 1.18, + "learning_rate": 4.342348027509221e-05, + "loss": 3.9492, + "step": 8992500 + }, + { + "epoch": 1.18, + "learning_rate": 4.342278250131549e-05, + "loss": 3.973, + "step": 8993000 + }, + { + "epoch": 1.18, + "learning_rate": 4.342208469613049e-05, + "loss": 3.9676, + "step": 8993500 + }, + { + "epoch": 1.18, + "learning_rate": 4.34213868595384e-05, + "loss": 3.95, + "step": 8994000 + }, + { + "epoch": 1.18, + "learning_rate": 4.34206889915404e-05, + "loss": 3.9799, + "step": 8994500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3419991092137686e-05, + "loss": 3.9864, + "step": 8995000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3419293161331445e-05, + "loss": 3.9616, + "step": 8995500 + }, + { + "epoch": 1.18, + "learning_rate": 4.341859519912288e-05, + "loss": 3.9717, + "step": 8996000 + }, + { + "epoch": 1.18, + "learning_rate": 4.341789720551316e-05, + "loss": 3.9756, + "step": 8996500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3417199180503486e-05, + "loss": 3.967, + "step": 8997000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3416501124095046e-05, + "loss": 3.9544, + "step": 8997500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3415803036289026e-05, + "loss": 3.9672, + "step": 8998000 + }, + { + "epoch": 1.18, + "learning_rate": 4.341510491708663e-05, + "loss": 3.9603, + "step": 8998500 + }, + { + "epoch": 1.18, + "learning_rate": 4.341440676648904e-05, + "loss": 3.953, + "step": 8999000 + }, + { + "epoch": 1.18, + "learning_rate": 4.341370858449744e-05, + "loss": 3.9824, + "step": 8999500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3413010371113025e-05, + "loss": 3.967, + "step": 9000000 + }, + { + "epoch": 1.18, + "learning_rate": 4.341231212633699e-05, + "loss": 3.9881, + "step": 9000500 + }, + { + "epoch": 1.18, + "learning_rate": 4.341161385017052e-05, + "loss": 3.9702, + "step": 9001000 + }, + { + "epoch": 1.18, + "learning_rate": 4.341091554261481e-05, + "loss": 3.9699, + "step": 9001500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3410217203671044e-05, + "loss": 3.9595, + "step": 9002000 + }, + { + "epoch": 1.18, + "learning_rate": 4.340951883334041e-05, + "loss": 3.9766, + "step": 9002500 + }, + { + "epoch": 1.18, + "learning_rate": 4.340882043162412e-05, + "loss": 3.9614, + "step": 9003000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3408121998523346e-05, + "loss": 3.9778, + "step": 9003500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3407423534039274e-05, + "loss": 3.9756, + "step": 9004000 + }, + { + "epoch": 1.18, + "learning_rate": 4.34067250381731e-05, + "loss": 3.967, + "step": 9004500 + }, + { + "epoch": 1.18, + "learning_rate": 4.340602651092603e-05, + "loss": 3.9663, + "step": 9005000 + }, + { + "epoch": 1.18, + "learning_rate": 4.340532795229924e-05, + "loss": 3.9644, + "step": 9005500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3404629362293915e-05, + "loss": 3.9721, + "step": 9006000 + }, + { + "epoch": 1.18, + "learning_rate": 4.340393074091126e-05, + "loss": 3.9815, + "step": 9006500 + }, + { + "epoch": 1.18, + "learning_rate": 4.340323208815246e-05, + "loss": 3.9796, + "step": 9007000 + }, + { + "epoch": 1.18, + "learning_rate": 4.340253340401871e-05, + "loss": 3.9718, + "step": 9007500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3401834688511193e-05, + "loss": 3.981, + "step": 9008000 + }, + { + "epoch": 1.18, + "learning_rate": 4.340113594163111e-05, + "loss": 3.9767, + "step": 9008500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3400437163379645e-05, + "loss": 3.9709, + "step": 9009000 + }, + { + "epoch": 1.18, + "learning_rate": 4.339973835375799e-05, + "loss": 3.9731, + "step": 9009500 + }, + { + "epoch": 1.18, + "learning_rate": 4.339903951276734e-05, + "loss": 3.9811, + "step": 9010000 + }, + { + "epoch": 1.18, + "learning_rate": 4.3398340640408886e-05, + "loss": 3.9597, + "step": 9010500 + }, + { + "epoch": 1.18, + "learning_rate": 4.339764173668382e-05, + "loss": 3.9607, + "step": 9011000 + }, + { + "epoch": 1.18, + "learning_rate": 4.339694280159332e-05, + "loss": 3.9662, + "step": 9011500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3396243835138606e-05, + "loss": 3.9974, + "step": 9012000 + }, + { + "epoch": 1.18, + "learning_rate": 4.339554483732084e-05, + "loss": 3.9634, + "step": 9012500 + }, + { + "epoch": 1.18, + "learning_rate": 4.339484580814123e-05, + "loss": 3.9776, + "step": 9013000 + }, + { + "epoch": 1.18, + "learning_rate": 4.339414674760096e-05, + "loss": 3.9767, + "step": 9013500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3393447655701234e-05, + "loss": 3.9933, + "step": 9014000 + }, + { + "epoch": 1.18, + "learning_rate": 4.339274853244324e-05, + "loss": 3.9749, + "step": 9014500 + }, + { + "epoch": 1.18, + "learning_rate": 4.3392049377828155e-05, + "loss": 3.975, + "step": 9015000 + }, + { + "epoch": 1.18, + "learning_rate": 4.339135019185718e-05, + "loss": 3.9587, + "step": 9015500 + }, + { + "epoch": 1.18, + "learning_rate": 4.339065097453152e-05, + "loss": 3.9604, + "step": 9016000 + }, + { + "epoch": 1.19, + "learning_rate": 4.338995172585235e-05, + "loss": 3.9593, + "step": 9016500 + }, + { + "epoch": 1.19, + "learning_rate": 4.338925244582087e-05, + "loss": 3.9665, + "step": 9017000 + }, + { + "epoch": 1.19, + "learning_rate": 4.338855313443826e-05, + "loss": 3.9513, + "step": 9017500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3387853791705734e-05, + "loss": 3.9707, + "step": 9018000 + }, + { + "epoch": 1.19, + "learning_rate": 4.338715441762446e-05, + "loss": 3.969, + "step": 9018500 + }, + { + "epoch": 1.19, + "learning_rate": 4.338645501219566e-05, + "loss": 3.9697, + "step": 9019000 + }, + { + "epoch": 1.19, + "learning_rate": 4.338575557542051e-05, + "loss": 3.9701, + "step": 9019500 + }, + { + "epoch": 1.19, + "learning_rate": 4.338505610730018e-05, + "loss": 3.9543, + "step": 9020000 + }, + { + "epoch": 1.19, + "learning_rate": 4.33843566078359e-05, + "loss": 3.9865, + "step": 9020500 + }, + { + "epoch": 1.19, + "learning_rate": 4.338365707702885e-05, + "loss": 3.9698, + "step": 9021000 + }, + { + "epoch": 1.19, + "learning_rate": 4.338295751488021e-05, + "loss": 3.9519, + "step": 9021500 + }, + { + "epoch": 1.19, + "learning_rate": 4.338225792139119e-05, + "loss": 3.9951, + "step": 9022000 + }, + { + "epoch": 1.19, + "learning_rate": 4.338155829656297e-05, + "loss": 3.982, + "step": 9022500 + }, + { + "epoch": 1.19, + "learning_rate": 4.338085864039675e-05, + "loss": 3.9833, + "step": 9023000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3380158952893716e-05, + "loss": 3.9612, + "step": 9023500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3379459234055066e-05, + "loss": 3.9538, + "step": 9024000 + }, + { + "epoch": 1.19, + "learning_rate": 4.337875948388199e-05, + "loss": 3.9771, + "step": 9024500 + }, + { + "epoch": 1.19, + "learning_rate": 4.33780597023757e-05, + "loss": 4.0031, + "step": 9025000 + }, + { + "epoch": 1.19, + "learning_rate": 4.337735988953735e-05, + "loss": 3.9898, + "step": 9025500 + }, + { + "epoch": 1.19, + "learning_rate": 4.337666004536817e-05, + "loss": 3.968, + "step": 9026000 + }, + { + "epoch": 1.19, + "learning_rate": 4.337596016986933e-05, + "loss": 3.9847, + "step": 9026500 + }, + { + "epoch": 1.19, + "learning_rate": 4.337526026304203e-05, + "loss": 3.951, + "step": 9027000 + }, + { + "epoch": 1.19, + "learning_rate": 4.337456032488747e-05, + "loss": 3.9811, + "step": 9027500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3373860355406845e-05, + "loss": 3.9718, + "step": 9028000 + }, + { + "epoch": 1.19, + "learning_rate": 4.337316035460133e-05, + "loss": 3.9716, + "step": 9028500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3372460322472134e-05, + "loss": 3.9692, + "step": 9029000 + }, + { + "epoch": 1.19, + "learning_rate": 4.337176025902045e-05, + "loss": 3.9518, + "step": 9029500 + }, + { + "epoch": 1.19, + "learning_rate": 4.337106016424746e-05, + "loss": 3.9888, + "step": 9030000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3370360038154367e-05, + "loss": 3.9712, + "step": 9030500 + }, + { + "epoch": 1.19, + "learning_rate": 4.336965988074237e-05, + "loss": 3.9773, + "step": 9031000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3368959692012656e-05, + "loss": 3.973, + "step": 9031500 + }, + { + "epoch": 1.19, + "learning_rate": 4.336825947196641e-05, + "loss": 3.974, + "step": 9032000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3367559220604834e-05, + "loss": 3.9751, + "step": 9032500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3366858937929135e-05, + "loss": 3.9829, + "step": 9033000 + }, + { + "epoch": 1.19, + "learning_rate": 4.336615862394048e-05, + "loss": 3.9792, + "step": 9033500 + }, + { + "epoch": 1.19, + "learning_rate": 4.336545827864009e-05, + "loss": 3.9783, + "step": 9034000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3364757902029134e-05, + "loss": 3.9426, + "step": 9034500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3364057494108826e-05, + "loss": 3.9671, + "step": 9035000 + }, + { + "epoch": 1.19, + "learning_rate": 4.336335705488034e-05, + "loss": 3.983, + "step": 9035500 + }, + { + "epoch": 1.19, + "learning_rate": 4.336265658434489e-05, + "loss": 3.9726, + "step": 9036000 + }, + { + "epoch": 1.19, + "learning_rate": 4.336195608250366e-05, + "loss": 3.9801, + "step": 9036500 + }, + { + "epoch": 1.19, + "learning_rate": 4.336125554935785e-05, + "loss": 3.948, + "step": 9037000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3360554984908654e-05, + "loss": 3.9775, + "step": 9037500 + }, + { + "epoch": 1.19, + "learning_rate": 4.335985438915726e-05, + "loss": 3.9859, + "step": 9038000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3359153762104865e-05, + "loss": 3.9682, + "step": 9038500 + }, + { + "epoch": 1.19, + "learning_rate": 4.335845310375267e-05, + "loss": 3.9701, + "step": 9039000 + }, + { + "epoch": 1.19, + "learning_rate": 4.335775241410186e-05, + "loss": 3.9877, + "step": 9039500 + }, + { + "epoch": 1.19, + "learning_rate": 4.335705169315363e-05, + "loss": 3.9645, + "step": 9040000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3356350940909175e-05, + "loss": 3.9663, + "step": 9040500 + }, + { + "epoch": 1.19, + "learning_rate": 4.33556501573697e-05, + "loss": 3.9625, + "step": 9041000 + }, + { + "epoch": 1.19, + "learning_rate": 4.335494934253639e-05, + "loss": 3.9811, + "step": 9041500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3354248496410446e-05, + "loss": 3.9798, + "step": 9042000 + }, + { + "epoch": 1.19, + "learning_rate": 4.335354761899305e-05, + "loss": 3.9878, + "step": 9042500 + }, + { + "epoch": 1.19, + "learning_rate": 4.335284671028541e-05, + "loss": 3.9768, + "step": 9043000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3352145770288724e-05, + "loss": 3.9648, + "step": 9043500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3351444799004173e-05, + "loss": 3.9618, + "step": 9044000 + }, + { + "epoch": 1.19, + "learning_rate": 4.335074379643295e-05, + "loss": 3.974, + "step": 9044500 + }, + { + "epoch": 1.19, + "learning_rate": 4.335004276257627e-05, + "loss": 3.9839, + "step": 9045000 + }, + { + "epoch": 1.19, + "learning_rate": 4.334934169743532e-05, + "loss": 3.9732, + "step": 9045500 + }, + { + "epoch": 1.19, + "learning_rate": 4.334864060101129e-05, + "loss": 3.9876, + "step": 9046000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3347939473305374e-05, + "loss": 3.9863, + "step": 9046500 + }, + { + "epoch": 1.19, + "learning_rate": 4.334723831431877e-05, + "loss": 3.9844, + "step": 9047000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3346537124052675e-05, + "loss": 3.9874, + "step": 9047500 + }, + { + "epoch": 1.19, + "learning_rate": 4.334583590250829e-05, + "loss": 3.9627, + "step": 9048000 + }, + { + "epoch": 1.19, + "learning_rate": 4.33451346496868e-05, + "loss": 3.9805, + "step": 9048500 + }, + { + "epoch": 1.19, + "learning_rate": 4.33444333655894e-05, + "loss": 3.9829, + "step": 9049000 + }, + { + "epoch": 1.19, + "learning_rate": 4.33437320502173e-05, + "loss": 3.9865, + "step": 9049500 + }, + { + "epoch": 1.19, + "learning_rate": 4.334303070357168e-05, + "loss": 3.9818, + "step": 9050000 + }, + { + "epoch": 1.19, + "learning_rate": 4.334232932565374e-05, + "loss": 3.9908, + "step": 9050500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3341627916464684e-05, + "loss": 3.9739, + "step": 9051000 + }, + { + "epoch": 1.19, + "learning_rate": 4.33409264760057e-05, + "loss": 3.9612, + "step": 9051500 + }, + { + "epoch": 1.19, + "learning_rate": 4.334022500427798e-05, + "loss": 3.9794, + "step": 9052000 + }, + { + "epoch": 1.19, + "learning_rate": 4.333952350128273e-05, + "loss": 3.9562, + "step": 9052500 + }, + { + "epoch": 1.19, + "learning_rate": 4.333882196702114e-05, + "loss": 3.9606, + "step": 9053000 + }, + { + "epoch": 1.19, + "learning_rate": 4.333812040149441e-05, + "loss": 3.9693, + "step": 9053500 + }, + { + "epoch": 1.19, + "learning_rate": 4.333741880470372e-05, + "loss": 3.9632, + "step": 9054000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3336717176650294e-05, + "loss": 3.9613, + "step": 9054500 + }, + { + "epoch": 1.19, + "learning_rate": 4.333601551733531e-05, + "loss": 3.9568, + "step": 9055000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3335313826759966e-05, + "loss": 3.9743, + "step": 9055500 + }, + { + "epoch": 1.19, + "learning_rate": 4.333461210492546e-05, + "loss": 3.9686, + "step": 9056000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3333910351832986e-05, + "loss": 3.967, + "step": 9056500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3333208567483745e-05, + "loss": 3.9606, + "step": 9057000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3332506751878935e-05, + "loss": 3.9725, + "step": 9057500 + }, + { + "epoch": 1.19, + "learning_rate": 4.333180490501975e-05, + "loss": 3.9484, + "step": 9058000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3331103026907375e-05, + "loss": 3.979, + "step": 9058500 + }, + { + "epoch": 1.19, + "learning_rate": 4.333040111754302e-05, + "loss": 3.9683, + "step": 9059000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3329699176927886e-05, + "loss": 3.98, + "step": 9059500 + }, + { + "epoch": 1.19, + "learning_rate": 4.332899720506315e-05, + "loss": 3.9852, + "step": 9060000 + }, + { + "epoch": 1.19, + "learning_rate": 4.332829520195003e-05, + "loss": 3.973, + "step": 9060500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3327593167589706e-05, + "loss": 3.9794, + "step": 9061000 + }, + { + "epoch": 1.19, + "learning_rate": 4.332689110198339e-05, + "loss": 3.9818, + "step": 9061500 + }, + { + "epoch": 1.19, + "learning_rate": 4.332618900513227e-05, + "loss": 3.971, + "step": 9062000 + }, + { + "epoch": 1.19, + "learning_rate": 4.332548687703754e-05, + "loss": 3.9754, + "step": 9062500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3324784717700413e-05, + "loss": 3.9911, + "step": 9063000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3324082527122066e-05, + "loss": 4.0017, + "step": 9063500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3323380305303706e-05, + "loss": 3.9617, + "step": 9064000 + }, + { + "epoch": 1.19, + "learning_rate": 4.332267805224653e-05, + "loss": 3.9561, + "step": 9064500 + }, + { + "epoch": 1.19, + "learning_rate": 4.332197576795174e-05, + "loss": 3.9788, + "step": 9065000 + }, + { + "epoch": 1.19, + "learning_rate": 4.332127345242052e-05, + "loss": 3.9695, + "step": 9065500 + }, + { + "epoch": 1.19, + "learning_rate": 4.332057110565407e-05, + "loss": 3.9738, + "step": 9066000 + }, + { + "epoch": 1.19, + "learning_rate": 4.33198687276536e-05, + "loss": 3.9693, + "step": 9066500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3319166318420295e-05, + "loss": 3.9857, + "step": 9067000 + }, + { + "epoch": 1.19, + "learning_rate": 4.331846387795536e-05, + "loss": 4.002, + "step": 9067500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3317761406259994e-05, + "loss": 3.9611, + "step": 9068000 + }, + { + "epoch": 1.19, + "learning_rate": 4.331705890333538e-05, + "loss": 3.9572, + "step": 9068500 + }, + { + "epoch": 1.19, + "learning_rate": 4.331635636918273e-05, + "loss": 3.9573, + "step": 9069000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3315653803803246e-05, + "loss": 3.967, + "step": 9069500 + }, + { + "epoch": 1.19, + "learning_rate": 4.331495120719811e-05, + "loss": 3.9734, + "step": 9070000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3314248579368534e-05, + "loss": 3.9732, + "step": 9070500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3313545920315705e-05, + "loss": 3.9731, + "step": 9071000 + }, + { + "epoch": 1.19, + "learning_rate": 4.331284323004082e-05, + "loss": 3.9621, + "step": 9071500 + }, + { + "epoch": 1.19, + "learning_rate": 4.331214050854509e-05, + "loss": 3.9736, + "step": 9072000 + }, + { + "epoch": 1.19, + "learning_rate": 4.33114377558297e-05, + "loss": 3.9723, + "step": 9072500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3310734971895854e-05, + "loss": 3.9612, + "step": 9073000 + }, + { + "epoch": 1.19, + "learning_rate": 4.331003215674475e-05, + "loss": 3.9764, + "step": 9073500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3309329310377586e-05, + "loss": 3.9721, + "step": 9074000 + }, + { + "epoch": 1.19, + "learning_rate": 4.330862643279556e-05, + "loss": 3.9691, + "step": 9074500 + }, + { + "epoch": 1.19, + "learning_rate": 4.330792352399987e-05, + "loss": 3.961, + "step": 9075000 + }, + { + "epoch": 1.19, + "learning_rate": 4.330722058399172e-05, + "loss": 3.9779, + "step": 9075500 + }, + { + "epoch": 1.19, + "learning_rate": 4.33065176127723e-05, + "loss": 3.9806, + "step": 9076000 + }, + { + "epoch": 1.19, + "learning_rate": 4.330581461034281e-05, + "loss": 3.984, + "step": 9076500 + }, + { + "epoch": 1.19, + "learning_rate": 4.330511157670445e-05, + "loss": 3.9462, + "step": 9077000 + }, + { + "epoch": 1.19, + "learning_rate": 4.330440851185842e-05, + "loss": 3.9583, + "step": 9077500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3303705415805916e-05, + "loss": 3.9734, + "step": 9078000 + }, + { + "epoch": 1.19, + "learning_rate": 4.330300228854813e-05, + "loss": 3.9807, + "step": 9078500 + }, + { + "epoch": 1.19, + "learning_rate": 4.330229913008628e-05, + "loss": 3.9806, + "step": 9079000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3301595940421554e-05, + "loss": 3.9663, + "step": 9079500 + }, + { + "epoch": 1.19, + "learning_rate": 4.330089271955515e-05, + "loss": 3.9794, + "step": 9080000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3300189467488264e-05, + "loss": 3.9936, + "step": 9080500 + }, + { + "epoch": 1.19, + "learning_rate": 4.329948618422209e-05, + "loss": 3.967, + "step": 9081000 + }, + { + "epoch": 1.19, + "learning_rate": 4.329878286975785e-05, + "loss": 3.9635, + "step": 9081500 + }, + { + "epoch": 1.19, + "learning_rate": 4.329807952409672e-05, + "loss": 3.9502, + "step": 9082000 + }, + { + "epoch": 1.19, + "learning_rate": 4.329737614723991e-05, + "loss": 3.9615, + "step": 9082500 + }, + { + "epoch": 1.19, + "learning_rate": 4.3296672739188616e-05, + "loss": 3.9731, + "step": 9083000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3295969299944025e-05, + "loss": 3.9835, + "step": 9083500 + }, + { + "epoch": 1.19, + "learning_rate": 4.329526582950737e-05, + "loss": 3.9787, + "step": 9084000 + }, + { + "epoch": 1.19, + "learning_rate": 4.329456232787982e-05, + "loss": 3.9643, + "step": 9084500 + }, + { + "epoch": 1.19, + "learning_rate": 4.329385879506258e-05, + "loss": 3.9526, + "step": 9085000 + }, + { + "epoch": 1.19, + "learning_rate": 4.329315523105686e-05, + "loss": 3.9561, + "step": 9085500 + }, + { + "epoch": 1.19, + "learning_rate": 4.329245163586385e-05, + "loss": 3.9601, + "step": 9086000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3291748009484745e-05, + "loss": 3.9834, + "step": 9086500 + }, + { + "epoch": 1.19, + "learning_rate": 4.329104435192076e-05, + "loss": 3.9706, + "step": 9087000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3290340663173086e-05, + "loss": 3.9964, + "step": 9087500 + }, + { + "epoch": 1.19, + "learning_rate": 4.328963694324292e-05, + "loss": 3.9685, + "step": 9088000 + }, + { + "epoch": 1.19, + "learning_rate": 4.328893319213146e-05, + "loss": 3.9632, + "step": 9088500 + }, + { + "epoch": 1.19, + "learning_rate": 4.328822940983992e-05, + "loss": 3.9826, + "step": 9089000 + }, + { + "epoch": 1.19, + "learning_rate": 4.328752559636949e-05, + "loss": 3.9623, + "step": 9089500 + }, + { + "epoch": 1.19, + "learning_rate": 4.328682175172136e-05, + "loss": 3.9613, + "step": 9090000 + }, + { + "epoch": 1.19, + "learning_rate": 4.3286117875896746e-05, + "loss": 3.9816, + "step": 9090500 + }, + { + "epoch": 1.19, + "learning_rate": 4.328541396889685e-05, + "loss": 3.9773, + "step": 9091000 + }, + { + "epoch": 1.19, + "learning_rate": 4.328471003072285e-05, + "loss": 3.9649, + "step": 9091500 + }, + { + "epoch": 1.19, + "learning_rate": 4.328400606137598e-05, + "loss": 3.9704, + "step": 9092000 + }, + { + "epoch": 1.19, + "learning_rate": 4.32833020608574e-05, + "loss": 3.9635, + "step": 9092500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3282598029168344e-05, + "loss": 3.9675, + "step": 9093000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3281893966309996e-05, + "loss": 3.9683, + "step": 9093500 + }, + { + "epoch": 1.2, + "learning_rate": 4.328118987228355e-05, + "loss": 3.9838, + "step": 9094000 + }, + { + "epoch": 1.2, + "learning_rate": 4.328048574709023e-05, + "loss": 3.971, + "step": 9094500 + }, + { + "epoch": 1.2, + "learning_rate": 4.327978159073122e-05, + "loss": 3.9468, + "step": 9095000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3279077403207714e-05, + "loss": 3.9714, + "step": 9095500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3278373184520925e-05, + "loss": 3.9664, + "step": 9096000 + }, + { + "epoch": 1.2, + "learning_rate": 4.327766893467206e-05, + "loss": 3.985, + "step": 9096500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3276964653662285e-05, + "loss": 3.9486, + "step": 9097000 + }, + { + "epoch": 1.2, + "learning_rate": 4.327626034149285e-05, + "loss": 3.9552, + "step": 9097500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3275555998164916e-05, + "loss": 3.96, + "step": 9098000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3274851623679704e-05, + "loss": 3.9547, + "step": 9098500 + }, + { + "epoch": 1.2, + "learning_rate": 4.327414721803841e-05, + "loss": 3.9798, + "step": 9099000 + }, + { + "epoch": 1.2, + "learning_rate": 4.327344278124224e-05, + "loss": 3.9605, + "step": 9099500 + }, + { + "epoch": 1.2, + "learning_rate": 4.327273831329238e-05, + "loss": 3.9824, + "step": 9100000 + }, + { + "epoch": 1.2, + "learning_rate": 4.327203381419005e-05, + "loss": 3.9864, + "step": 9100500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3271329283936435e-05, + "loss": 3.9456, + "step": 9101000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3270624722532736e-05, + "loss": 3.98, + "step": 9101500 + }, + { + "epoch": 1.2, + "learning_rate": 4.326992012998018e-05, + "loss": 3.9774, + "step": 9102000 + }, + { + "epoch": 1.2, + "learning_rate": 4.326921550627993e-05, + "loss": 3.9559, + "step": 9102500 + }, + { + "epoch": 1.2, + "learning_rate": 4.326851085143322e-05, + "loss": 3.9866, + "step": 9103000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3267806165441224e-05, + "loss": 3.9772, + "step": 9103500 + }, + { + "epoch": 1.2, + "learning_rate": 4.326710144830517e-05, + "loss": 3.9581, + "step": 9104000 + }, + { + "epoch": 1.2, + "learning_rate": 4.326639670002624e-05, + "loss": 3.9718, + "step": 9104500 + }, + { + "epoch": 1.2, + "learning_rate": 4.326569192060564e-05, + "loss": 4.0073, + "step": 9105000 + }, + { + "epoch": 1.2, + "learning_rate": 4.326498711004459e-05, + "loss": 3.976, + "step": 9105500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3264282268344255e-05, + "loss": 3.9654, + "step": 9106000 + }, + { + "epoch": 1.2, + "learning_rate": 4.326357739550587e-05, + "loss": 3.9626, + "step": 9106500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3262872491530616e-05, + "loss": 3.9624, + "step": 9107000 + }, + { + "epoch": 1.2, + "learning_rate": 4.326216755641971e-05, + "loss": 3.9572, + "step": 9107500 + }, + { + "epoch": 1.2, + "learning_rate": 4.326146259017434e-05, + "loss": 3.9782, + "step": 9108000 + }, + { + "epoch": 1.2, + "learning_rate": 4.326075759279571e-05, + "loss": 3.977, + "step": 9108500 + }, + { + "epoch": 1.2, + "learning_rate": 4.326005256428503e-05, + "loss": 3.9786, + "step": 9109000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3259347504643497e-05, + "loss": 3.9986, + "step": 9109500 + }, + { + "epoch": 1.2, + "learning_rate": 4.325864241387232e-05, + "loss": 3.9518, + "step": 9110000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3257937291972697e-05, + "loss": 3.9597, + "step": 9110500 + }, + { + "epoch": 1.2, + "learning_rate": 4.325723213894581e-05, + "loss": 3.9803, + "step": 9111000 + }, + { + "epoch": 1.2, + "learning_rate": 4.32565269547929e-05, + "loss": 3.9495, + "step": 9111500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3255821739515136e-05, + "loss": 3.9468, + "step": 9112000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3255116493113745e-05, + "loss": 3.9807, + "step": 9112500 + }, + { + "epoch": 1.2, + "learning_rate": 4.325441121558991e-05, + "loss": 3.9674, + "step": 9113000 + }, + { + "epoch": 1.2, + "learning_rate": 4.325370590694484e-05, + "loss": 3.9462, + "step": 9113500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3253000567179734e-05, + "loss": 3.9872, + "step": 9114000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3252295196295805e-05, + "loss": 3.9698, + "step": 9114500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3251589794294244e-05, + "loss": 3.9811, + "step": 9115000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3250884361176264e-05, + "loss": 3.9603, + "step": 9115500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3250178896943064e-05, + "loss": 3.9738, + "step": 9116000 + }, + { + "epoch": 1.2, + "learning_rate": 4.324947340159584e-05, + "loss": 3.9509, + "step": 9116500 + }, + { + "epoch": 1.2, + "learning_rate": 4.32487678751358e-05, + "loss": 3.9647, + "step": 9117000 + }, + { + "epoch": 1.2, + "learning_rate": 4.324806231756415e-05, + "loss": 3.9666, + "step": 9117500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3247356728882085e-05, + "loss": 3.9581, + "step": 9118000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3246651109090816e-05, + "loss": 3.9317, + "step": 9118500 + }, + { + "epoch": 1.2, + "learning_rate": 4.324594545819155e-05, + "loss": 3.9524, + "step": 9119000 + }, + { + "epoch": 1.2, + "learning_rate": 4.324523977618547e-05, + "loss": 3.9774, + "step": 9119500 + }, + { + "epoch": 1.2, + "learning_rate": 4.32445340630738e-05, + "loss": 3.9702, + "step": 9120000 + }, + { + "epoch": 1.2, + "learning_rate": 4.324382831885773e-05, + "loss": 3.9657, + "step": 9120500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3243122543538474e-05, + "loss": 3.9607, + "step": 9121000 + }, + { + "epoch": 1.2, + "learning_rate": 4.324241673711722e-05, + "loss": 3.9714, + "step": 9121500 + }, + { + "epoch": 1.2, + "learning_rate": 4.324171089959519e-05, + "loss": 3.986, + "step": 9122000 + }, + { + "epoch": 1.2, + "learning_rate": 4.324100503097357e-05, + "loss": 3.9645, + "step": 9122500 + }, + { + "epoch": 1.2, + "learning_rate": 4.324029913125358e-05, + "loss": 3.9524, + "step": 9123000 + }, + { + "epoch": 1.2, + "learning_rate": 4.323959320043641e-05, + "loss": 3.9771, + "step": 9123500 + }, + { + "epoch": 1.2, + "learning_rate": 4.323888723852326e-05, + "loss": 3.9756, + "step": 9124000 + }, + { + "epoch": 1.2, + "learning_rate": 4.323818124551535e-05, + "loss": 3.9553, + "step": 9124500 + }, + { + "epoch": 1.2, + "learning_rate": 4.323747522141388e-05, + "loss": 3.953, + "step": 9125000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3236769166220046e-05, + "loss": 3.9872, + "step": 9125500 + }, + { + "epoch": 1.2, + "learning_rate": 4.323606307993505e-05, + "loss": 3.9739, + "step": 9126000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3235356962560105e-05, + "loss": 4.0027, + "step": 9126500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3234650814096414e-05, + "loss": 3.9665, + "step": 9127000 + }, + { + "epoch": 1.2, + "learning_rate": 4.323394463454518e-05, + "loss": 3.9494, + "step": 9127500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3233238423907595e-05, + "loss": 3.9468, + "step": 9128000 + }, + { + "epoch": 1.2, + "learning_rate": 4.323253218218487e-05, + "loss": 3.9638, + "step": 9128500 + }, + { + "epoch": 1.2, + "learning_rate": 4.323182590937822e-05, + "loss": 3.9483, + "step": 9129000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3231119605488836e-05, + "loss": 3.9747, + "step": 9129500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3230413270517936e-05, + "loss": 3.9658, + "step": 9130000 + }, + { + "epoch": 1.2, + "learning_rate": 4.32297069044667e-05, + "loss": 3.9515, + "step": 9130500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3229000507336356e-05, + "loss": 3.9743, + "step": 9131000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3228294079128094e-05, + "loss": 3.9677, + "step": 9131500 + }, + { + "epoch": 1.2, + "learning_rate": 4.322758761984312e-05, + "loss": 3.9785, + "step": 9132000 + }, + { + "epoch": 1.2, + "learning_rate": 4.322688112948265e-05, + "loss": 3.9589, + "step": 9132500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3226174608047885e-05, + "loss": 3.9588, + "step": 9133000 + }, + { + "epoch": 1.2, + "learning_rate": 4.322546805554002e-05, + "loss": 3.9616, + "step": 9133500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3224761471960266e-05, + "loss": 3.9514, + "step": 9134000 + }, + { + "epoch": 1.2, + "learning_rate": 4.322405485730983e-05, + "loss": 3.967, + "step": 9134500 + }, + { + "epoch": 1.2, + "learning_rate": 4.32233482115899e-05, + "loss": 3.9471, + "step": 9135000 + }, + { + "epoch": 1.2, + "learning_rate": 4.322264153480171e-05, + "loss": 3.9635, + "step": 9135500 + }, + { + "epoch": 1.2, + "learning_rate": 4.322193482694644e-05, + "loss": 3.9826, + "step": 9136000 + }, + { + "epoch": 1.2, + "learning_rate": 4.32212280880253e-05, + "loss": 3.9392, + "step": 9136500 + }, + { + "epoch": 1.2, + "learning_rate": 4.32205213180395e-05, + "loss": 3.9564, + "step": 9137000 + }, + { + "epoch": 1.2, + "learning_rate": 4.321981451699025e-05, + "loss": 3.9828, + "step": 9137500 + }, + { + "epoch": 1.2, + "learning_rate": 4.321910768487875e-05, + "loss": 3.9467, + "step": 9138000 + }, + { + "epoch": 1.2, + "learning_rate": 4.321840082170619e-05, + "loss": 3.9696, + "step": 9138500 + }, + { + "epoch": 1.2, + "learning_rate": 4.32176939274738e-05, + "loss": 3.968, + "step": 9139000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3216987002182764e-05, + "loss": 3.965, + "step": 9139500 + }, + { + "epoch": 1.2, + "learning_rate": 4.321628004583431e-05, + "loss": 3.9452, + "step": 9140000 + }, + { + "epoch": 1.2, + "learning_rate": 4.321557305842961e-05, + "loss": 3.9772, + "step": 9140500 + }, + { + "epoch": 1.2, + "learning_rate": 4.321486603996991e-05, + "loss": 3.9495, + "step": 9141000 + }, + { + "epoch": 1.2, + "learning_rate": 4.321415899045639e-05, + "loss": 3.9705, + "step": 9141500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3213451909890254e-05, + "loss": 3.9589, + "step": 9142000 + }, + { + "epoch": 1.2, + "learning_rate": 4.321274479827272e-05, + "loss": 3.9691, + "step": 9142500 + }, + { + "epoch": 1.2, + "learning_rate": 4.321203765560498e-05, + "loss": 3.9711, + "step": 9143000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3211330481888244e-05, + "loss": 3.9548, + "step": 9143500 + }, + { + "epoch": 1.2, + "learning_rate": 4.321062327712373e-05, + "loss": 3.9694, + "step": 9144000 + }, + { + "epoch": 1.2, + "learning_rate": 4.320991604131263e-05, + "loss": 3.9556, + "step": 9144500 + }, + { + "epoch": 1.2, + "learning_rate": 4.320920877445616e-05, + "loss": 3.956, + "step": 9145000 + }, + { + "epoch": 1.2, + "learning_rate": 4.320850147655551e-05, + "loss": 3.9569, + "step": 9145500 + }, + { + "epoch": 1.2, + "learning_rate": 4.320779414761189e-05, + "loss": 3.9767, + "step": 9146000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3207086787626525e-05, + "loss": 3.9809, + "step": 9146500 + }, + { + "epoch": 1.2, + "learning_rate": 4.32063793966006e-05, + "loss": 3.9745, + "step": 9147000 + }, + { + "epoch": 1.2, + "learning_rate": 4.320567197453533e-05, + "loss": 3.9471, + "step": 9147500 + }, + { + "epoch": 1.2, + "learning_rate": 4.320496452143192e-05, + "loss": 3.9566, + "step": 9148000 + }, + { + "epoch": 1.2, + "learning_rate": 4.320425703729158e-05, + "loss": 3.9673, + "step": 9148500 + }, + { + "epoch": 1.2, + "learning_rate": 4.320354952211551e-05, + "loss": 3.9408, + "step": 9149000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3202841975904915e-05, + "loss": 3.9755, + "step": 9149500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3202134398661e-05, + "loss": 3.9671, + "step": 9150000 + }, + { + "epoch": 1.2, + "learning_rate": 4.320142679038498e-05, + "loss": 3.9625, + "step": 9150500 + }, + { + "epoch": 1.2, + "learning_rate": 4.320071915107806e-05, + "loss": 3.9567, + "step": 9151000 + }, + { + "epoch": 1.2, + "learning_rate": 4.320001148074144e-05, + "loss": 3.9397, + "step": 9151500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3199303779376326e-05, + "loss": 3.9607, + "step": 9152000 + }, + { + "epoch": 1.2, + "learning_rate": 4.319859604698393e-05, + "loss": 3.9597, + "step": 9152500 + }, + { + "epoch": 1.2, + "learning_rate": 4.319788828356546e-05, + "loss": 3.9442, + "step": 9153000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3197180489122115e-05, + "loss": 3.9495, + "step": 9153500 + }, + { + "epoch": 1.2, + "learning_rate": 4.319647266365511e-05, + "loss": 3.9595, + "step": 9154000 + }, + { + "epoch": 1.2, + "learning_rate": 4.319576480716565e-05, + "loss": 3.9665, + "step": 9154500 + }, + { + "epoch": 1.2, + "learning_rate": 4.319505691965493e-05, + "loss": 3.9585, + "step": 9155000 + }, + { + "epoch": 1.2, + "learning_rate": 4.319434900112418e-05, + "loss": 3.9578, + "step": 9155500 + }, + { + "epoch": 1.2, + "learning_rate": 4.319364105157459e-05, + "loss": 3.9694, + "step": 9156000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3192933071007356e-05, + "loss": 3.9663, + "step": 9156500 + }, + { + "epoch": 1.2, + "learning_rate": 4.319222505942371e-05, + "loss": 3.9694, + "step": 9157000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3191517016824854e-05, + "loss": 3.9883, + "step": 9157500 + }, + { + "epoch": 1.2, + "learning_rate": 4.319080894321198e-05, + "loss": 3.9676, + "step": 9158000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3190100838586306e-05, + "loss": 3.967, + "step": 9158500 + }, + { + "epoch": 1.2, + "learning_rate": 4.318939270294904e-05, + "loss": 3.9649, + "step": 9159000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3188684536301396e-05, + "loss": 3.9712, + "step": 9159500 + }, + { + "epoch": 1.2, + "learning_rate": 4.318797633864456e-05, + "loss": 3.9472, + "step": 9160000 + }, + { + "epoch": 1.2, + "learning_rate": 4.318726810997976e-05, + "loss": 3.9681, + "step": 9160500 + }, + { + "epoch": 1.2, + "learning_rate": 4.318655985030819e-05, + "loss": 3.9629, + "step": 9161000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3185851559631065e-05, + "loss": 3.9489, + "step": 9161500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3185143237949585e-05, + "loss": 3.9666, + "step": 9162000 + }, + { + "epoch": 1.2, + "learning_rate": 4.318443488526497e-05, + "loss": 3.977, + "step": 9162500 + }, + { + "epoch": 1.2, + "learning_rate": 4.3183726501578416e-05, + "loss": 3.9497, + "step": 9163000 + }, + { + "epoch": 1.2, + "learning_rate": 4.318301808689114e-05, + "loss": 3.9712, + "step": 9163500 + }, + { + "epoch": 1.2, + "learning_rate": 4.318230964120434e-05, + "loss": 3.9621, + "step": 9164000 + }, + { + "epoch": 1.2, + "learning_rate": 4.318160116451924e-05, + "loss": 3.9493, + "step": 9164500 + }, + { + "epoch": 1.2, + "learning_rate": 4.318089265683702e-05, + "loss": 3.9416, + "step": 9165000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3180184118158914e-05, + "loss": 3.9697, + "step": 9165500 + }, + { + "epoch": 1.2, + "learning_rate": 4.317947554848612e-05, + "loss": 3.9563, + "step": 9166000 + }, + { + "epoch": 1.2, + "learning_rate": 4.317876694781985e-05, + "loss": 3.9343, + "step": 9166500 + }, + { + "epoch": 1.2, + "learning_rate": 4.31780583161613e-05, + "loss": 3.9528, + "step": 9167000 + }, + { + "epoch": 1.2, + "learning_rate": 4.3177349653511694e-05, + "loss": 3.9509, + "step": 9167500 + }, + { + "epoch": 1.2, + "learning_rate": 4.317664095987223e-05, + "loss": 3.968, + "step": 9168000 + }, + { + "epoch": 1.2, + "learning_rate": 4.317593223524412e-05, + "loss": 3.9775, + "step": 9168500 + }, + { + "epoch": 1.21, + "learning_rate": 4.317522347962857e-05, + "loss": 3.9453, + "step": 9169000 + }, + { + "epoch": 1.21, + "learning_rate": 4.317451469302679e-05, + "loss": 3.9554, + "step": 9169500 + }, + { + "epoch": 1.21, + "learning_rate": 4.317380587543999e-05, + "loss": 3.9633, + "step": 9170000 + }, + { + "epoch": 1.21, + "learning_rate": 4.317309702686938e-05, + "loss": 3.9682, + "step": 9170500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3172388147316154e-05, + "loss": 3.9566, + "step": 9171000 + }, + { + "epoch": 1.21, + "learning_rate": 4.317167923678154e-05, + "loss": 3.957, + "step": 9171500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3170970295266736e-05, + "loss": 3.958, + "step": 9172000 + }, + { + "epoch": 1.21, + "learning_rate": 4.317026132277295e-05, + "loss": 3.9699, + "step": 9172500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3169552319301405e-05, + "loss": 3.9777, + "step": 9173000 + }, + { + "epoch": 1.21, + "learning_rate": 4.316884328485329e-05, + "loss": 3.9647, + "step": 9173500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3168134219429825e-05, + "loss": 3.9737, + "step": 9174000 + }, + { + "epoch": 1.21, + "learning_rate": 4.316742512303221e-05, + "loss": 3.9626, + "step": 9174500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3166715995661665e-05, + "loss": 3.9438, + "step": 9175000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3166006837319386e-05, + "loss": 3.9544, + "step": 9175500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3165297648006595e-05, + "loss": 3.9618, + "step": 9176000 + }, + { + "epoch": 1.21, + "learning_rate": 4.31645884277245e-05, + "loss": 3.9504, + "step": 9176500 + }, + { + "epoch": 1.21, + "learning_rate": 4.316387917647431e-05, + "loss": 3.981, + "step": 9177000 + }, + { + "epoch": 1.21, + "learning_rate": 4.316316989425722e-05, + "loss": 3.9561, + "step": 9177500 + }, + { + "epoch": 1.21, + "learning_rate": 4.316246058107445e-05, + "loss": 3.9532, + "step": 9178000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3161751236927214e-05, + "loss": 3.9271, + "step": 9178500 + }, + { + "epoch": 1.21, + "learning_rate": 4.316104186181671e-05, + "loss": 3.9562, + "step": 9179000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3160332455744154e-05, + "loss": 3.9557, + "step": 9179500 + }, + { + "epoch": 1.21, + "learning_rate": 4.315962301871076e-05, + "loss": 3.9552, + "step": 9180000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3158913550717736e-05, + "loss": 3.9865, + "step": 9180500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3158204051766284e-05, + "loss": 3.9356, + "step": 9181000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3157494521857614e-05, + "loss": 3.9464, + "step": 9181500 + }, + { + "epoch": 1.21, + "learning_rate": 4.315678496099293e-05, + "loss": 3.9577, + "step": 9182000 + }, + { + "epoch": 1.21, + "learning_rate": 4.315607536917347e-05, + "loss": 3.9614, + "step": 9182500 + }, + { + "epoch": 1.21, + "learning_rate": 4.315536574640042e-05, + "loss": 3.9623, + "step": 9183000 + }, + { + "epoch": 1.21, + "learning_rate": 4.315465609267499e-05, + "loss": 3.9659, + "step": 9183500 + }, + { + "epoch": 1.21, + "learning_rate": 4.315394640799839e-05, + "loss": 3.9692, + "step": 9184000 + }, + { + "epoch": 1.21, + "learning_rate": 4.315323669237184e-05, + "loss": 3.9501, + "step": 9184500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3152526945796543e-05, + "loss": 3.958, + "step": 9185000 + }, + { + "epoch": 1.21, + "learning_rate": 4.315181716827371e-05, + "loss": 3.9824, + "step": 9185500 + }, + { + "epoch": 1.21, + "learning_rate": 4.315110735980455e-05, + "loss": 3.9682, + "step": 9186000 + }, + { + "epoch": 1.21, + "learning_rate": 4.315039752039028e-05, + "loss": 3.9552, + "step": 9186500 + }, + { + "epoch": 1.21, + "learning_rate": 4.314968765003211e-05, + "loss": 3.9533, + "step": 9187000 + }, + { + "epoch": 1.21, + "learning_rate": 4.314897774873124e-05, + "loss": 3.9526, + "step": 9187500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3148267816488876e-05, + "loss": 3.9678, + "step": 9188000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3147557853306245e-05, + "loss": 3.9776, + "step": 9188500 + }, + { + "epoch": 1.21, + "learning_rate": 4.314684785918454e-05, + "loss": 3.956, + "step": 9189000 + }, + { + "epoch": 1.21, + "learning_rate": 4.314613783412499e-05, + "loss": 3.9759, + "step": 9189500 + }, + { + "epoch": 1.21, + "learning_rate": 4.314542777812879e-05, + "loss": 3.9521, + "step": 9190000 + }, + { + "epoch": 1.21, + "learning_rate": 4.314471769119717e-05, + "loss": 3.9561, + "step": 9190500 + }, + { + "epoch": 1.21, + "learning_rate": 4.314400757333132e-05, + "loss": 3.9715, + "step": 9191000 + }, + { + "epoch": 1.21, + "learning_rate": 4.314329742453246e-05, + "loss": 3.9833, + "step": 9191500 + }, + { + "epoch": 1.21, + "learning_rate": 4.31425872448018e-05, + "loss": 3.963, + "step": 9192000 + }, + { + "epoch": 1.21, + "learning_rate": 4.314187703414054e-05, + "loss": 3.959, + "step": 9192500 + }, + { + "epoch": 1.21, + "learning_rate": 4.314116679254991e-05, + "loss": 3.9727, + "step": 9193000 + }, + { + "epoch": 1.21, + "learning_rate": 4.314045652003111e-05, + "loss": 3.9552, + "step": 9193500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3139746216585365e-05, + "loss": 3.9705, + "step": 9194000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3139035882213855e-05, + "loss": 3.954, + "step": 9194500 + }, + { + "epoch": 1.21, + "learning_rate": 4.313832551691782e-05, + "loss": 3.9619, + "step": 9195000 + }, + { + "epoch": 1.21, + "learning_rate": 4.313761512069846e-05, + "loss": 3.9488, + "step": 9195500 + }, + { + "epoch": 1.21, + "learning_rate": 4.313690469355698e-05, + "loss": 3.9547, + "step": 9196000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3136194235494606e-05, + "loss": 3.9589, + "step": 9196500 + }, + { + "epoch": 1.21, + "learning_rate": 4.313548374651254e-05, + "loss": 3.9656, + "step": 9197000 + }, + { + "epoch": 1.21, + "learning_rate": 4.313477322661199e-05, + "loss": 3.9454, + "step": 9197500 + }, + { + "epoch": 1.21, + "learning_rate": 4.313406267579417e-05, + "loss": 3.9638, + "step": 9198000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3133352094060296e-05, + "loss": 3.9742, + "step": 9198500 + }, + { + "epoch": 1.21, + "learning_rate": 4.313264148141158e-05, + "loss": 3.982, + "step": 9199000 + }, + { + "epoch": 1.21, + "learning_rate": 4.313193083784923e-05, + "loss": 3.9658, + "step": 9199500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3131220163374454e-05, + "loss": 3.9772, + "step": 9200000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3130509457988466e-05, + "loss": 3.9482, + "step": 9200500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3129798721692484e-05, + "loss": 3.9409, + "step": 9201000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3129087954487714e-05, + "loss": 3.9524, + "step": 9201500 + }, + { + "epoch": 1.21, + "learning_rate": 4.312837715637536e-05, + "loss": 3.9367, + "step": 9202000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3127666327356656e-05, + "loss": 3.9579, + "step": 9202500 + }, + { + "epoch": 1.21, + "learning_rate": 4.312695546743279e-05, + "loss": 3.9663, + "step": 9203000 + }, + { + "epoch": 1.21, + "learning_rate": 4.312624457660499e-05, + "loss": 3.9644, + "step": 9203500 + }, + { + "epoch": 1.21, + "learning_rate": 4.312553365487446e-05, + "loss": 3.9318, + "step": 9204000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3124822702242414e-05, + "loss": 3.9421, + "step": 9204500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3124111718710064e-05, + "loss": 3.9674, + "step": 9205000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3123400704278617e-05, + "loss": 3.9721, + "step": 9205500 + }, + { + "epoch": 1.21, + "learning_rate": 4.31226896589493e-05, + "loss": 3.9533, + "step": 9206000 + }, + { + "epoch": 1.21, + "learning_rate": 4.312197858272331e-05, + "loss": 3.9662, + "step": 9206500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3121267475601864e-05, + "loss": 3.9494, + "step": 9207000 + }, + { + "epoch": 1.21, + "learning_rate": 4.312055633758618e-05, + "loss": 3.9626, + "step": 9207500 + }, + { + "epoch": 1.21, + "learning_rate": 4.311984516867747e-05, + "loss": 3.9455, + "step": 9208000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3119133968876925e-05, + "loss": 3.9401, + "step": 9208500 + }, + { + "epoch": 1.21, + "learning_rate": 4.311842273818578e-05, + "loss": 3.9525, + "step": 9209000 + }, + { + "epoch": 1.21, + "learning_rate": 4.311771147660525e-05, + "loss": 3.9475, + "step": 9209500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3117000184136534e-05, + "loss": 3.9397, + "step": 9210000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3116288860780854e-05, + "loss": 3.9544, + "step": 9210500 + }, + { + "epoch": 1.21, + "learning_rate": 4.311557750653942e-05, + "loss": 3.9528, + "step": 9211000 + }, + { + "epoch": 1.21, + "learning_rate": 4.311486612141344e-05, + "loss": 3.9679, + "step": 9211500 + }, + { + "epoch": 1.21, + "learning_rate": 4.311415470540413e-05, + "loss": 3.9494, + "step": 9212000 + }, + { + "epoch": 1.21, + "learning_rate": 4.311344325851271e-05, + "loss": 3.967, + "step": 9212500 + }, + { + "epoch": 1.21, + "learning_rate": 4.311273178074038e-05, + "loss": 3.9579, + "step": 9213000 + }, + { + "epoch": 1.21, + "learning_rate": 4.311202027208836e-05, + "loss": 3.9575, + "step": 9213500 + }, + { + "epoch": 1.21, + "learning_rate": 4.311130873255787e-05, + "loss": 3.9536, + "step": 9214000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3110597162150104e-05, + "loss": 3.9563, + "step": 9214500 + }, + { + "epoch": 1.21, + "learning_rate": 4.310988556086629e-05, + "loss": 3.9554, + "step": 9215000 + }, + { + "epoch": 1.21, + "learning_rate": 4.310917392870765e-05, + "loss": 3.957, + "step": 9215500 + }, + { + "epoch": 1.21, + "learning_rate": 4.310846226567537e-05, + "loss": 3.9948, + "step": 9216000 + }, + { + "epoch": 1.21, + "learning_rate": 4.310775057177069e-05, + "loss": 3.9624, + "step": 9216500 + }, + { + "epoch": 1.21, + "learning_rate": 4.31070388469948e-05, + "loss": 3.965, + "step": 9217000 + }, + { + "epoch": 1.21, + "learning_rate": 4.310632709134893e-05, + "loss": 3.9426, + "step": 9217500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3105615304834293e-05, + "loss": 3.9602, + "step": 9218000 + }, + { + "epoch": 1.21, + "learning_rate": 4.310490348745209e-05, + "loss": 3.9432, + "step": 9218500 + }, + { + "epoch": 1.21, + "learning_rate": 4.310419163920355e-05, + "loss": 3.9552, + "step": 9219000 + }, + { + "epoch": 1.21, + "learning_rate": 4.310347976008987e-05, + "loss": 3.9453, + "step": 9219500 + }, + { + "epoch": 1.21, + "learning_rate": 4.310276785011228e-05, + "loss": 3.9624, + "step": 9220000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3102055909271985e-05, + "loss": 3.9472, + "step": 9220500 + }, + { + "epoch": 1.21, + "learning_rate": 4.31013439375702e-05, + "loss": 3.9618, + "step": 9221000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3100631935008144e-05, + "loss": 3.9477, + "step": 9221500 + }, + { + "epoch": 1.21, + "learning_rate": 4.309991990158702e-05, + "loss": 3.9693, + "step": 9222000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3099207837308055e-05, + "loss": 3.9577, + "step": 9222500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3098495742172444e-05, + "loss": 3.9629, + "step": 9223000 + }, + { + "epoch": 1.21, + "learning_rate": 4.309778361618143e-05, + "loss": 3.962, + "step": 9223500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3097071459336194e-05, + "loss": 3.962, + "step": 9224000 + }, + { + "epoch": 1.21, + "learning_rate": 4.309635927163797e-05, + "loss": 3.9593, + "step": 9224500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3095647053087976e-05, + "loss": 3.9521, + "step": 9225000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3094934803687404e-05, + "loss": 3.974, + "step": 9225500 + }, + { + "epoch": 1.21, + "learning_rate": 4.309422252343749e-05, + "loss": 3.9599, + "step": 9226000 + }, + { + "epoch": 1.21, + "learning_rate": 4.309351021233945e-05, + "loss": 3.9499, + "step": 9226500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3092797870394475e-05, + "loss": 3.9684, + "step": 9227000 + }, + { + "epoch": 1.21, + "learning_rate": 4.309208549760381e-05, + "loss": 3.9488, + "step": 9227500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3091373093968634e-05, + "loss": 3.9772, + "step": 9228000 + }, + { + "epoch": 1.21, + "learning_rate": 4.30906606594902e-05, + "loss": 3.9455, + "step": 9228500 + }, + { + "epoch": 1.21, + "learning_rate": 4.308994819416969e-05, + "loss": 3.9537, + "step": 9229000 + }, + { + "epoch": 1.21, + "learning_rate": 4.308923569800834e-05, + "loss": 3.974, + "step": 9229500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3088523171007355e-05, + "loss": 3.9539, + "step": 9230000 + }, + { + "epoch": 1.21, + "learning_rate": 4.308781061316794e-05, + "loss": 3.9702, + "step": 9230500 + }, + { + "epoch": 1.21, + "learning_rate": 4.308709802449134e-05, + "loss": 3.9562, + "step": 9231000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3086385404978745e-05, + "loss": 3.9699, + "step": 9231500 + }, + { + "epoch": 1.21, + "learning_rate": 4.308567275463138e-05, + "loss": 3.9523, + "step": 9232000 + }, + { + "epoch": 1.21, + "learning_rate": 4.308496007345045e-05, + "loss": 3.97, + "step": 9232500 + }, + { + "epoch": 1.21, + "learning_rate": 4.308424736143718e-05, + "loss": 3.9329, + "step": 9233000 + }, + { + "epoch": 1.21, + "learning_rate": 4.308353461859278e-05, + "loss": 3.9388, + "step": 9233500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3082821844918466e-05, + "loss": 3.9497, + "step": 9234000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3082109040415455e-05, + "loss": 3.955, + "step": 9234500 + }, + { + "epoch": 1.21, + "learning_rate": 4.308139620508496e-05, + "loss": 3.9521, + "step": 9235000 + }, + { + "epoch": 1.21, + "learning_rate": 4.308068333892819e-05, + "loss": 3.9591, + "step": 9235500 + }, + { + "epoch": 1.21, + "learning_rate": 4.307997044194638e-05, + "loss": 3.9653, + "step": 9236000 + }, + { + "epoch": 1.21, + "learning_rate": 4.307925751414073e-05, + "loss": 3.9699, + "step": 9236500 + }, + { + "epoch": 1.21, + "learning_rate": 4.307854455551245e-05, + "loss": 3.9536, + "step": 9237000 + }, + { + "epoch": 1.21, + "learning_rate": 4.307783156606277e-05, + "loss": 3.9642, + "step": 9237500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3077118545792893e-05, + "loss": 3.9521, + "step": 9238000 + }, + { + "epoch": 1.21, + "learning_rate": 4.307640549470405e-05, + "loss": 3.9657, + "step": 9238500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3075692412797444e-05, + "loss": 3.9536, + "step": 9239000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3074979300074296e-05, + "loss": 3.9734, + "step": 9239500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3074266156535824e-05, + "loss": 3.9389, + "step": 9240000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3073552982183236e-05, + "loss": 3.9594, + "step": 9240500 + }, + { + "epoch": 1.21, + "learning_rate": 4.307283977701775e-05, + "loss": 3.9491, + "step": 9241000 + }, + { + "epoch": 1.21, + "learning_rate": 4.307212654104058e-05, + "loss": 3.9496, + "step": 9241500 + }, + { + "epoch": 1.21, + "learning_rate": 4.3071413274252956e-05, + "loss": 3.9475, + "step": 9242000 + }, + { + "epoch": 1.21, + "learning_rate": 4.307069997665608e-05, + "loss": 3.9561, + "step": 9242500 + }, + { + "epoch": 1.21, + "learning_rate": 4.306998664825117e-05, + "loss": 3.9547, + "step": 9243000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3069273289039447e-05, + "loss": 3.9609, + "step": 9243500 + }, + { + "epoch": 1.21, + "learning_rate": 4.306855989902212e-05, + "loss": 3.9543, + "step": 9244000 + }, + { + "epoch": 1.21, + "learning_rate": 4.306784647820041e-05, + "loss": 3.9478, + "step": 9244500 + }, + { + "epoch": 1.22, + "learning_rate": 4.306713302657553e-05, + "loss": 3.957, + "step": 9245000 + }, + { + "epoch": 1.22, + "learning_rate": 4.306641954414871e-05, + "loss": 3.9291, + "step": 9245500 + }, + { + "epoch": 1.22, + "learning_rate": 4.306570603092114e-05, + "loss": 3.948, + "step": 9246000 + }, + { + "epoch": 1.22, + "learning_rate": 4.306499248689406e-05, + "loss": 3.9704, + "step": 9246500 + }, + { + "epoch": 1.22, + "learning_rate": 4.306427891206868e-05, + "loss": 3.9487, + "step": 9247000 + }, + { + "epoch": 1.22, + "learning_rate": 4.306356530644621e-05, + "loss": 3.9422, + "step": 9247500 + }, + { + "epoch": 1.22, + "learning_rate": 4.306285167002788e-05, + "loss": 3.947, + "step": 9248000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3062138002814887e-05, + "loss": 3.9373, + "step": 9248500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3061424304808465e-05, + "loss": 3.9737, + "step": 9249000 + }, + { + "epoch": 1.22, + "learning_rate": 4.306071057600983e-05, + "loss": 3.9558, + "step": 9249500 + }, + { + "epoch": 1.22, + "learning_rate": 4.305999681642018e-05, + "loss": 3.9559, + "step": 9250000 + }, + { + "epoch": 1.22, + "learning_rate": 4.305928302604075e-05, + "loss": 3.9668, + "step": 9250500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3058569204872763e-05, + "loss": 3.9586, + "step": 9251000 + }, + { + "epoch": 1.22, + "learning_rate": 4.305785535291741e-05, + "loss": 3.9438, + "step": 9251500 + }, + { + "epoch": 1.22, + "learning_rate": 4.305714147017593e-05, + "loss": 3.9475, + "step": 9252000 + }, + { + "epoch": 1.22, + "learning_rate": 4.305642755664954e-05, + "loss": 3.963, + "step": 9252500 + }, + { + "epoch": 1.22, + "learning_rate": 4.305571361233944e-05, + "loss": 3.9414, + "step": 9253000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3054999637246865e-05, + "loss": 3.9572, + "step": 9253500 + }, + { + "epoch": 1.22, + "learning_rate": 4.305428563137302e-05, + "loss": 3.9474, + "step": 9254000 + }, + { + "epoch": 1.22, + "learning_rate": 4.305357159471912e-05, + "loss": 3.9532, + "step": 9254500 + }, + { + "epoch": 1.22, + "learning_rate": 4.30528575272864e-05, + "loss": 3.9572, + "step": 9255000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3052143429076064e-05, + "loss": 3.9464, + "step": 9255500 + }, + { + "epoch": 1.22, + "learning_rate": 4.305142930008933e-05, + "loss": 3.9584, + "step": 9256000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3050715140327425e-05, + "loss": 3.9564, + "step": 9256500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3050000949791545e-05, + "loss": 3.9387, + "step": 9257000 + }, + { + "epoch": 1.22, + "learning_rate": 4.304928672848294e-05, + "loss": 3.9679, + "step": 9257500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3048572476402795e-05, + "loss": 3.952, + "step": 9258000 + }, + { + "epoch": 1.22, + "learning_rate": 4.304785819355235e-05, + "loss": 3.9776, + "step": 9258500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3047143879932806e-05, + "loss": 3.9527, + "step": 9259000 + }, + { + "epoch": 1.22, + "learning_rate": 4.30464295355454e-05, + "loss": 3.9634, + "step": 9259500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3045715160391334e-05, + "loss": 3.9487, + "step": 9260000 + }, + { + "epoch": 1.22, + "learning_rate": 4.304500075447183e-05, + "loss": 3.9707, + "step": 9260500 + }, + { + "epoch": 1.22, + "learning_rate": 4.304428631778811e-05, + "loss": 3.9637, + "step": 9261000 + }, + { + "epoch": 1.22, + "learning_rate": 4.304357185034139e-05, + "loss": 3.9708, + "step": 9261500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3042857352132884e-05, + "loss": 3.9361, + "step": 9262000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3042142823163824e-05, + "loss": 3.9577, + "step": 9262500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3041428263435406e-05, + "loss": 3.9546, + "step": 9263000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3040713672948864e-05, + "loss": 3.9392, + "step": 9263500 + }, + { + "epoch": 1.22, + "learning_rate": 4.303999905170542e-05, + "loss": 3.9548, + "step": 9264000 + }, + { + "epoch": 1.22, + "learning_rate": 4.303928439970627e-05, + "loss": 3.9514, + "step": 9264500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3038569716952654e-05, + "loss": 3.9483, + "step": 9265000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3037855003445785e-05, + "loss": 3.9726, + "step": 9265500 + }, + { + "epoch": 1.22, + "learning_rate": 4.303714025918688e-05, + "loss": 3.9342, + "step": 9266000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3036425484177153e-05, + "loss": 3.9503, + "step": 9266500 + }, + { + "epoch": 1.22, + "learning_rate": 4.303571067841783e-05, + "loss": 3.9438, + "step": 9267000 + }, + { + "epoch": 1.22, + "learning_rate": 4.303499584191013e-05, + "loss": 3.9605, + "step": 9267500 + }, + { + "epoch": 1.22, + "learning_rate": 4.303428097465526e-05, + "loss": 3.9521, + "step": 9268000 + }, + { + "epoch": 1.22, + "learning_rate": 4.303356607665446e-05, + "loss": 3.97, + "step": 9268500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3032851147908926e-05, + "loss": 3.9461, + "step": 9269000 + }, + { + "epoch": 1.22, + "learning_rate": 4.303213618841989e-05, + "loss": 3.939, + "step": 9269500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3031421198188566e-05, + "loss": 3.9616, + "step": 9270000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3030706177216177e-05, + "loss": 3.9531, + "step": 9270500 + }, + { + "epoch": 1.22, + "learning_rate": 4.302999112550394e-05, + "loss": 3.9514, + "step": 9271000 + }, + { + "epoch": 1.22, + "learning_rate": 4.302927604305307e-05, + "loss": 3.9708, + "step": 9271500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3028560929864794e-05, + "loss": 3.9706, + "step": 9272000 + }, + { + "epoch": 1.22, + "learning_rate": 4.302784578594032e-05, + "loss": 3.9519, + "step": 9272500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3027130611280885e-05, + "loss": 3.9344, + "step": 9273000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3026415405887685e-05, + "loss": 3.9576, + "step": 9273500 + }, + { + "epoch": 1.22, + "learning_rate": 4.302570016976196e-05, + "loss": 3.9451, + "step": 9274000 + }, + { + "epoch": 1.22, + "learning_rate": 4.302498490290492e-05, + "loss": 3.9593, + "step": 9274500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3024269605317794e-05, + "loss": 3.9468, + "step": 9275000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3023554277001776e-05, + "loss": 3.9286, + "step": 9275500 + }, + { + "epoch": 1.22, + "learning_rate": 4.302283891795812e-05, + "loss": 3.958, + "step": 9276000 + }, + { + "epoch": 1.22, + "learning_rate": 4.302212352818802e-05, + "loss": 3.9546, + "step": 9276500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3021408107692694e-05, + "loss": 3.972, + "step": 9277000 + }, + { + "epoch": 1.22, + "learning_rate": 4.302069265647338e-05, + "loss": 3.9634, + "step": 9277500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3019977174531297e-05, + "loss": 3.9334, + "step": 9278000 + }, + { + "epoch": 1.22, + "learning_rate": 4.301926166186765e-05, + "loss": 3.9557, + "step": 9278500 + }, + { + "epoch": 1.22, + "learning_rate": 4.301854611848367e-05, + "loss": 3.966, + "step": 9279000 + }, + { + "epoch": 1.22, + "learning_rate": 4.301783054438057e-05, + "loss": 3.9459, + "step": 9279500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3017114939559564e-05, + "loss": 3.948, + "step": 9280000 + }, + { + "epoch": 1.22, + "learning_rate": 4.301639930402189e-05, + "loss": 3.9633, + "step": 9280500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3015683637768764e-05, + "loss": 3.9439, + "step": 9281000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3014967940801396e-05, + "loss": 3.9546, + "step": 9281500 + }, + { + "epoch": 1.22, + "learning_rate": 4.301425221312101e-05, + "loss": 3.9505, + "step": 9282000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3013536454728834e-05, + "loss": 3.9704, + "step": 9282500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3012820665626067e-05, + "loss": 3.9572, + "step": 9283000 + }, + { + "epoch": 1.22, + "learning_rate": 4.301210484581395e-05, + "loss": 3.9363, + "step": 9283500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3011388995293705e-05, + "loss": 3.9678, + "step": 9284000 + }, + { + "epoch": 1.22, + "learning_rate": 4.301067311406654e-05, + "loss": 3.9505, + "step": 9284500 + }, + { + "epoch": 1.22, + "learning_rate": 4.300995720213368e-05, + "loss": 3.9548, + "step": 9285000 + }, + { + "epoch": 1.22, + "learning_rate": 4.300924125949635e-05, + "loss": 3.9426, + "step": 9285500 + }, + { + "epoch": 1.22, + "learning_rate": 4.300852528615575e-05, + "loss": 3.9533, + "step": 9286000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3007809282113135e-05, + "loss": 3.9507, + "step": 9286500 + }, + { + "epoch": 1.22, + "learning_rate": 4.300709324736971e-05, + "loss": 3.9592, + "step": 9287000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3006377181926684e-05, + "loss": 3.9395, + "step": 9287500 + }, + { + "epoch": 1.22, + "learning_rate": 4.300566108578529e-05, + "loss": 3.9419, + "step": 9288000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3004944958946734e-05, + "loss": 3.9648, + "step": 9288500 + }, + { + "epoch": 1.22, + "learning_rate": 4.300422880141227e-05, + "loss": 3.9714, + "step": 9289000 + }, + { + "epoch": 1.22, + "learning_rate": 4.3003512613183084e-05, + "loss": 3.9432, + "step": 9289500 + }, + { + "epoch": 1.22, + "learning_rate": 4.300279639426041e-05, + "loss": 3.9548, + "step": 9290000 + }, + { + "epoch": 1.22, + "learning_rate": 4.300208014464548e-05, + "loss": 3.9547, + "step": 9290500 + }, + { + "epoch": 1.22, + "learning_rate": 4.3001363864339494e-05, + "loss": 3.9624, + "step": 9291000 + }, + { + "epoch": 1.22, + "learning_rate": 4.30006475533437e-05, + "loss": 3.9408, + "step": 9291500 + }, + { + "epoch": 1.22, + "learning_rate": 4.299993121165929e-05, + "loss": 3.9396, + "step": 9292000 + }, + { + "epoch": 1.22, + "learning_rate": 4.299921483928751e-05, + "loss": 3.9536, + "step": 9292500 + }, + { + "epoch": 1.22, + "learning_rate": 4.299849843622955e-05, + "loss": 3.9419, + "step": 9293000 + }, + { + "epoch": 1.22, + "learning_rate": 4.2997782002486666e-05, + "loss": 3.9645, + "step": 9293500 + }, + { + "epoch": 1.22, + "learning_rate": 4.299706553806007e-05, + "loss": 3.958, + "step": 9294000 + }, + { + "epoch": 1.22, + "learning_rate": 4.299634904295097e-05, + "loss": 3.9767, + "step": 9294500 + }, + { + "epoch": 1.22, + "learning_rate": 4.299563251716059e-05, + "loss": 3.977, + "step": 9295000 + }, + { + "epoch": 1.22, + "learning_rate": 4.2994915960690164e-05, + "loss": 3.9661, + "step": 9295500 + }, + { + "epoch": 1.22, + "learning_rate": 4.299419937354091e-05, + "loss": 3.9416, + "step": 9296000 + }, + { + "epoch": 1.22, + "learning_rate": 4.2993482755714046e-05, + "loss": 3.9455, + "step": 9296500 + }, + { + "epoch": 1.22, + "learning_rate": 4.299276610721079e-05, + "loss": 3.9505, + "step": 9297000 + }, + { + "epoch": 1.22, + "learning_rate": 4.299204942803237e-05, + "loss": 3.9595, + "step": 9297500 + }, + { + "epoch": 1.22, + "learning_rate": 4.299133271818001e-05, + "loss": 3.9435, + "step": 9298000 + }, + { + "epoch": 1.22, + "learning_rate": 4.2990615977654924e-05, + "loss": 3.9543, + "step": 9298500 + }, + { + "epoch": 1.22, + "learning_rate": 4.298989920645834e-05, + "loss": 3.9622, + "step": 9299000 + }, + { + "epoch": 2.44, + "learning_rate": 2.5887841376303117e-05, + "loss": 3.9039, + "step": 9299500 + }, + { + "epoch": 2.44, + "learning_rate": 2.5885778122934457e-05, + "loss": 3.8688, + "step": 9300000 + }, + { + "epoch": 2.44, + "learning_rate": 2.5883714863524973e-05, + "loss": 3.8688, + "step": 9300500 + }, + { + "epoch": 2.44, + "learning_rate": 2.588165159808872e-05, + "loss": 3.8524, + "step": 9301000 + }, + { + "epoch": 2.44, + "learning_rate": 2.587958832663977e-05, + "loss": 3.8366, + "step": 9301500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5877525049192207e-05, + "loss": 3.8272, + "step": 9302000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5875461765760095e-05, + "loss": 3.8267, + "step": 9302500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5873398476357496e-05, + "loss": 3.8313, + "step": 9303000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5871335180998503e-05, + "loss": 3.8385, + "step": 9303500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5869271879697164e-05, + "loss": 3.8201, + "step": 9304000 + }, + { + "epoch": 2.45, + "learning_rate": 2.586720857246757e-05, + "loss": 3.8152, + "step": 9304500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5865145259323776e-05, + "loss": 3.8057, + "step": 9305000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5863081940279865e-05, + "loss": 3.8022, + "step": 9305500 + }, + { + "epoch": 2.45, + "learning_rate": 2.58610186153499e-05, + "loss": 3.7993, + "step": 9306000 + }, + { + "epoch": 2.45, + "learning_rate": 2.585895528454796e-05, + "loss": 3.8109, + "step": 9306500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5856891947888105e-05, + "loss": 3.7997, + "step": 9307000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5854828605384417e-05, + "loss": 3.7914, + "step": 9307500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5852765257050978e-05, + "loss": 3.8103, + "step": 9308000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5850701902901837e-05, + "loss": 3.7866, + "step": 9308500 + }, + { + "epoch": 2.45, + "learning_rate": 2.584863854295107e-05, + "loss": 3.8196, + "step": 9309000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5846575177212763e-05, + "loss": 3.786, + "step": 9309500 + }, + { + "epoch": 2.45, + "learning_rate": 2.584451180570097e-05, + "loss": 3.7941, + "step": 9310000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5842448428429776e-05, + "loss": 3.7814, + "step": 9310500 + }, + { + "epoch": 2.45, + "learning_rate": 2.584038504541325e-05, + "loss": 3.7887, + "step": 9311000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5838321656665464e-05, + "loss": 3.7795, + "step": 9311500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5836258262200486e-05, + "loss": 3.7817, + "step": 9312000 + }, + { + "epoch": 2.45, + "learning_rate": 2.583419486203239e-05, + "loss": 3.7737, + "step": 9312500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5832131456175253e-05, + "loss": 3.7661, + "step": 9313000 + }, + { + "epoch": 2.45, + "learning_rate": 2.583006804464313e-05, + "loss": 3.7788, + "step": 9313500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5828004627450113e-05, + "loss": 3.779, + "step": 9314000 + }, + { + "epoch": 2.45, + "learning_rate": 2.582594120461027e-05, + "loss": 3.7712, + "step": 9314500 + }, + { + "epoch": 2.45, + "learning_rate": 2.582387777613766e-05, + "loss": 3.7671, + "step": 9315000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5821814342046374e-05, + "loss": 3.7739, + "step": 9315500 + }, + { + "epoch": 2.45, + "learning_rate": 2.581975090235047e-05, + "loss": 3.7802, + "step": 9316000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5817687457064026e-05, + "loss": 3.7662, + "step": 9316500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5815624006201118e-05, + "loss": 3.7655, + "step": 9317000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5813560549775805e-05, + "loss": 3.777, + "step": 9317500 + }, + { + "epoch": 2.45, + "learning_rate": 2.581149708780218e-05, + "loss": 3.7671, + "step": 9318000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5809433620294287e-05, + "loss": 3.7565, + "step": 9318500 + }, + { + "epoch": 2.45, + "learning_rate": 2.580737014726623e-05, + "loss": 3.7713, + "step": 9319000 + }, + { + "epoch": 2.45, + "learning_rate": 2.580530666873206e-05, + "loss": 3.7624, + "step": 9319500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5803243184705856e-05, + "loss": 3.774, + "step": 9320000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5801179695201694e-05, + "loss": 3.7551, + "step": 9320500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5799116200233637e-05, + "loss": 3.7488, + "step": 9321000 + }, + { + "epoch": 2.45, + "learning_rate": 2.579705269981577e-05, + "loss": 3.7623, + "step": 9321500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5794989193962155e-05, + "loss": 3.7537, + "step": 9322000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5792925682686868e-05, + "loss": 3.7595, + "step": 9322500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5790862166003994e-05, + "loss": 3.7594, + "step": 9323000 + }, + { + "epoch": 2.45, + "learning_rate": 2.578879864392758e-05, + "loss": 3.7637, + "step": 9323500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5786735116471724e-05, + "loss": 3.757, + "step": 9324000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5784671583650483e-05, + "loss": 3.7589, + "step": 9324500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5782608045477936e-05, + "loss": 3.7465, + "step": 9325000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5780544501968156e-05, + "loss": 3.7458, + "step": 9325500 + }, + { + "epoch": 2.45, + "learning_rate": 2.577848095313522e-05, + "loss": 3.7516, + "step": 9326000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5776417398993185e-05, + "loss": 3.7508, + "step": 9326500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5774353839556147e-05, + "loss": 3.7395, + "step": 9327000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5772290274838163e-05, + "loss": 3.7365, + "step": 9327500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5770226704853307e-05, + "loss": 3.7496, + "step": 9328000 + }, + { + "epoch": 2.45, + "learning_rate": 2.576816312961566e-05, + "loss": 3.7548, + "step": 9328500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5766099549139287e-05, + "loss": 3.7433, + "step": 9329000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5764035963438264e-05, + "loss": 3.7534, + "step": 9329500 + }, + { + "epoch": 2.45, + "learning_rate": 2.576197237252667e-05, + "loss": 3.7554, + "step": 9330000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5759908776418574e-05, + "loss": 3.7504, + "step": 9330500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5757845175128042e-05, + "loss": 3.7509, + "step": 9331000 + }, + { + "epoch": 2.45, + "learning_rate": 2.575578156866916e-05, + "loss": 3.7519, + "step": 9331500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5753717957055988e-05, + "loss": 3.736, + "step": 9332000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5751654340302612e-05, + "loss": 3.7577, + "step": 9332500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5749590718423104e-05, + "loss": 3.7571, + "step": 9333000 + }, + { + "epoch": 2.45, + "learning_rate": 2.574752709143153e-05, + "loss": 3.7508, + "step": 9333500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5745463459341967e-05, + "loss": 3.7432, + "step": 9334000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5743399822168484e-05, + "loss": 3.7531, + "step": 9334500 + }, + { + "epoch": 2.45, + "learning_rate": 2.574133617992517e-05, + "loss": 3.7479, + "step": 9335000 + }, + { + "epoch": 2.45, + "learning_rate": 2.573927253262608e-05, + "loss": 3.7398, + "step": 9335500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5737208880285303e-05, + "loss": 3.7442, + "step": 9336000 + }, + { + "epoch": 2.45, + "learning_rate": 2.57351452229169e-05, + "loss": 3.7452, + "step": 9336500 + }, + { + "epoch": 2.45, + "learning_rate": 2.573308156053495e-05, + "loss": 3.7448, + "step": 9337000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5731017893153537e-05, + "loss": 3.7461, + "step": 9337500 + }, + { + "epoch": 2.45, + "learning_rate": 2.572895422078671e-05, + "loss": 3.7483, + "step": 9338000 + }, + { + "epoch": 2.45, + "learning_rate": 2.572689054344857e-05, + "loss": 3.7416, + "step": 9338500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5724826861153172e-05, + "loss": 3.7339, + "step": 9339000 + }, + { + "epoch": 2.45, + "learning_rate": 2.57227631739146e-05, + "loss": 3.7422, + "step": 9339500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5720699481746922e-05, + "loss": 3.7122, + "step": 9340000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5718635784664214e-05, + "loss": 3.7345, + "step": 9340500 + }, + { + "epoch": 2.46, + "learning_rate": 2.571657208268055e-05, + "loss": 3.7357, + "step": 9341000 + }, + { + "epoch": 2.46, + "learning_rate": 2.571450837581001e-05, + "loss": 3.7184, + "step": 9341500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5712444664066653e-05, + "loss": 3.7609, + "step": 9342000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5710380947464576e-05, + "loss": 3.7325, + "step": 9342500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5708317226017825e-05, + "loss": 3.733, + "step": 9343000 + }, + { + "epoch": 2.46, + "learning_rate": 2.57062534997405e-05, + "loss": 3.7329, + "step": 9343500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5704189768646665e-05, + "loss": 3.7518, + "step": 9344000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5702126032750383e-05, + "loss": 3.7378, + "step": 9344500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5700062292065753e-05, + "loss": 3.7433, + "step": 9345000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5697998546606822e-05, + "loss": 3.7264, + "step": 9345500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5695934796387684e-05, + "loss": 3.7299, + "step": 9346000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5693871041422407e-05, + "loss": 3.7308, + "step": 9346500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5691807281725062e-05, + "loss": 3.7292, + "step": 9347000 + }, + { + "epoch": 2.46, + "learning_rate": 2.568974351730973e-05, + "loss": 3.7279, + "step": 9347500 + }, + { + "epoch": 2.46, + "learning_rate": 2.568767974819048e-05, + "loss": 3.7339, + "step": 9348000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5685615974381387e-05, + "loss": 3.7229, + "step": 9348500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5683552195896533e-05, + "loss": 3.73, + "step": 9349000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5681488412749982e-05, + "loss": 3.7291, + "step": 9349500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5679424624955812e-05, + "loss": 3.7229, + "step": 9350000 + }, + { + "epoch": 2.46, + "learning_rate": 2.56773608325281e-05, + "loss": 3.7169, + "step": 9350500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5675297035480926e-05, + "loss": 3.737, + "step": 9351000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5673233233828348e-05, + "loss": 3.729, + "step": 9351500 + }, + { + "epoch": 2.46, + "learning_rate": 2.567116942758446e-05, + "loss": 3.7137, + "step": 9352000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5669105616763322e-05, + "loss": 3.7384, + "step": 9352500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5667041801379015e-05, + "loss": 3.732, + "step": 9353000 + }, + { + "epoch": 2.46, + "learning_rate": 2.566497798144562e-05, + "loss": 3.727, + "step": 9353500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5662914156977197e-05, + "loss": 3.725, + "step": 9354000 + }, + { + "epoch": 2.46, + "learning_rate": 2.566085032798783e-05, + "loss": 3.7306, + "step": 9354500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5658786494491598e-05, + "loss": 3.7239, + "step": 9355000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5656722656502564e-05, + "loss": 3.7314, + "step": 9355500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5654658814034816e-05, + "loss": 3.7289, + "step": 9356000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5652594967102418e-05, + "loss": 3.7309, + "step": 9356500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5650531115719455e-05, + "loss": 3.7278, + "step": 9357000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5648467259899988e-05, + "loss": 3.7328, + "step": 9357500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5646403399658114e-05, + "loss": 3.735, + "step": 9358000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5644339535007888e-05, + "loss": 3.7266, + "step": 9358500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5642275665963393e-05, + "loss": 3.7167, + "step": 9359000 + }, + { + "epoch": 1.23, + "learning_rate": 4.290294387560506e-05, + "loss": 3.7927, + "step": 9359500 + }, + { + "epoch": 1.23, + "learning_rate": 4.29022233716699e-05, + "loss": 3.8237, + "step": 9360000 + }, + { + "epoch": 1.23, + "learning_rate": 4.290150283721394e-05, + "loss": 3.8322, + "step": 9360500 + }, + { + "epoch": 1.23, + "learning_rate": 4.290078227223839e-05, + "loss": 3.8385, + "step": 9361000 + }, + { + "epoch": 2.95, + "learning_rate": 1.7998866419044223e-05, + "loss": 3.8023, + "step": 9361500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5629892360204977e-05, + "loss": 3.777, + "step": 9362000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5627828460794694e-05, + "loss": 3.7604, + "step": 9362500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5625764557102754e-05, + "loss": 3.7465, + "step": 9363000 + }, + { + "epoch": 2.46, + "learning_rate": 2.562370064914322e-05, + "loss": 3.7316, + "step": 9363500 + }, + { + "epoch": 2.46, + "learning_rate": 2.562163673693017e-05, + "loss": 3.7617, + "step": 9364000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5619572820477683e-05, + "loss": 3.7349, + "step": 9364500 + }, + { + "epoch": 2.46, + "learning_rate": 2.561750889979982e-05, + "loss": 3.7371, + "step": 9365000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5615444974910673e-05, + "loss": 3.7308, + "step": 9365500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5613381045824316e-05, + "loss": 3.7355, + "step": 9366000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5611317112554812e-05, + "loss": 3.743, + "step": 9366500 + }, + { + "epoch": 2.46, + "learning_rate": 2.560925317511625e-05, + "loss": 3.7564, + "step": 9367000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5607189233522695e-05, + "loss": 3.7354, + "step": 9367500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5605125287788234e-05, + "loss": 3.7207, + "step": 9368000 + }, + { + "epoch": 2.46, + "learning_rate": 2.560306133792693e-05, + "loss": 3.744, + "step": 9368500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5600997383952875e-05, + "loss": 3.729, + "step": 9369000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5598933425880133e-05, + "loss": 3.7462, + "step": 9369500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5596869463722777e-05, + "loss": 3.73, + "step": 9370000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5594805497494896e-05, + "loss": 3.7372, + "step": 9370500 + }, + { + "epoch": 2.46, + "learning_rate": 2.559274152721055e-05, + "loss": 3.7377, + "step": 9371000 + }, + { + "epoch": 2.46, + "learning_rate": 2.559067755288383e-05, + "loss": 3.7157, + "step": 9371500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5588613574528797e-05, + "loss": 3.717, + "step": 9372000 + }, + { + "epoch": 2.46, + "learning_rate": 2.558654959215954e-05, + "loss": 3.7229, + "step": 9372500 + }, + { + "epoch": 2.46, + "learning_rate": 2.558448560579013e-05, + "loss": 3.7186, + "step": 9373000 + }, + { + "epoch": 2.46, + "learning_rate": 2.558242161543464e-05, + "loss": 3.7397, + "step": 9373500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5580357621107148e-05, + "loss": 3.7292, + "step": 9374000 + }, + { + "epoch": 2.46, + "learning_rate": 2.557829362282173e-05, + "loss": 3.7216, + "step": 9374500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5576229620592462e-05, + "loss": 3.7303, + "step": 9375000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5574165614433425e-05, + "loss": 3.7209, + "step": 9375500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5572101604358684e-05, + "loss": 3.7135, + "step": 9376000 + }, + { + "epoch": 2.46, + "learning_rate": 2.557003759038233e-05, + "loss": 3.727, + "step": 9376500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5567973572518428e-05, + "loss": 3.7132, + "step": 9377000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5565909550781053e-05, + "loss": 3.7086, + "step": 9377500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5563845525184287e-05, + "loss": 3.7131, + "step": 9378000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5561781495742205e-05, + "loss": 3.7012, + "step": 9378500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5559717462468885e-05, + "loss": 3.7163, + "step": 9379000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5557653425378397e-05, + "loss": 3.7185, + "step": 9379500 + }, + { + "epoch": 2.47, + "learning_rate": 2.555558938448483e-05, + "loss": 3.7267, + "step": 9380000 + }, + { + "epoch": 2.47, + "learning_rate": 2.555352533980225e-05, + "loss": 3.7152, + "step": 9380500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5551461291344732e-05, + "loss": 3.7235, + "step": 9381000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5549397239126354e-05, + "loss": 3.7106, + "step": 9381500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5547333183161197e-05, + "loss": 3.7094, + "step": 9382000 + }, + { + "epoch": 2.47, + "learning_rate": 2.554526912346334e-05, + "loss": 3.7159, + "step": 9382500 + }, + { + "epoch": 2.47, + "learning_rate": 2.554320506004685e-05, + "loss": 3.7121, + "step": 9383000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5541140992925798e-05, + "loss": 3.7216, + "step": 9383500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5539076922114285e-05, + "loss": 3.7356, + "step": 9384000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5537012847626364e-05, + "loss": 3.7126, + "step": 9384500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5534948769476123e-05, + "loss": 3.7227, + "step": 9385000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5532884687677637e-05, + "loss": 3.7129, + "step": 9385500 + }, + { + "epoch": 2.47, + "learning_rate": 2.553082060224498e-05, + "loss": 3.7193, + "step": 9386000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5528756513192236e-05, + "loss": 3.7288, + "step": 9386500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5526692420533466e-05, + "loss": 3.7164, + "step": 9387000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5524628324282762e-05, + "loss": 3.7244, + "step": 9387500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5522564224454197e-05, + "loss": 3.6996, + "step": 9388000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5520500121061846e-05, + "loss": 3.7156, + "step": 9388500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5518436014119783e-05, + "loss": 3.7223, + "step": 9389000 + }, + { + "epoch": 2.47, + "learning_rate": 2.551637190364209e-05, + "loss": 3.7081, + "step": 9389500 + }, + { + "epoch": 2.47, + "learning_rate": 2.551430778964284e-05, + "loss": 3.7187, + "step": 9390000 + }, + { + "epoch": 2.47, + "learning_rate": 2.551224367213611e-05, + "loss": 3.7019, + "step": 9390500 + }, + { + "epoch": 2.47, + "learning_rate": 2.551017955113598e-05, + "loss": 3.7131, + "step": 9391000 + }, + { + "epoch": 2.47, + "learning_rate": 2.550811542665653e-05, + "loss": 3.7236, + "step": 9391500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5506051298711826e-05, + "loss": 3.7088, + "step": 9392000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5503987167315956e-05, + "loss": 3.7092, + "step": 9392500 + }, + { + "epoch": 2.47, + "learning_rate": 2.550192303248299e-05, + "loss": 3.6995, + "step": 9393000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5499858894227002e-05, + "loss": 3.6963, + "step": 9393500 + }, + { + "epoch": 2.47, + "learning_rate": 2.549779475256208e-05, + "loss": 3.7109, + "step": 9394000 + }, + { + "epoch": 2.47, + "learning_rate": 2.54957306075023e-05, + "loss": 3.7258, + "step": 9394500 + }, + { + "epoch": 2.47, + "learning_rate": 2.549366645906172e-05, + "loss": 3.7092, + "step": 9395000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5491602307254443e-05, + "loss": 3.7211, + "step": 9395500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5489538152094535e-05, + "loss": 3.6946, + "step": 9396000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5487473993596067e-05, + "loss": 3.7034, + "step": 9396500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5485409831773123e-05, + "loss": 3.6997, + "step": 9397000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5483345666639778e-05, + "loss": 3.7013, + "step": 9397500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5481281498210106e-05, + "loss": 3.7127, + "step": 9398000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5479217326498195e-05, + "loss": 3.715, + "step": 9398500 + }, + { + "epoch": 2.47, + "learning_rate": 2.547715315151812e-05, + "loss": 3.7097, + "step": 9399000 + }, + { + "epoch": 2.47, + "learning_rate": 2.547508897328394e-05, + "loss": 3.7098, + "step": 9399500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5473024791809757e-05, + "loss": 3.7104, + "step": 9400000 + }, + { + "epoch": 2.47, + "learning_rate": 2.547096060710963e-05, + "loss": 3.7117, + "step": 9400500 + }, + { + "epoch": 2.47, + "learning_rate": 2.546889641919765e-05, + "loss": 3.6973, + "step": 9401000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5466832228087882e-05, + "loss": 3.7018, + "step": 9401500 + }, + { + "epoch": 2.47, + "learning_rate": 2.546476803379442e-05, + "loss": 3.6948, + "step": 9402000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5462703836331324e-05, + "loss": 3.7109, + "step": 9402500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5460639635712675e-05, + "loss": 3.7168, + "step": 9403000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5458575431952558e-05, + "loss": 3.6981, + "step": 9403500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5456511225065042e-05, + "loss": 3.7157, + "step": 9404000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5454447015064213e-05, + "loss": 3.7119, + "step": 9404500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5452382801964148e-05, + "loss": 3.695, + "step": 9405000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5450318585778915e-05, + "loss": 3.7007, + "step": 9405500 + }, + { + "epoch": 2.47, + "learning_rate": 2.54482543665226e-05, + "loss": 3.7039, + "step": 9406000 + }, + { + "epoch": 2.47, + "learning_rate": 2.544619014420927e-05, + "loss": 3.7073, + "step": 9406500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5444125918853024e-05, + "loss": 3.7055, + "step": 9407000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5442061690467918e-05, + "loss": 3.7114, + "step": 9407500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5439997459068038e-05, + "loss": 3.7028, + "step": 9408000 + }, + { + "epoch": 2.47, + "learning_rate": 2.543793322466746e-05, + "loss": 3.7069, + "step": 9408500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5435868987280266e-05, + "loss": 3.7, + "step": 9409000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5433804746920532e-05, + "loss": 3.7035, + "step": 9409500 + }, + { + "epoch": 2.47, + "learning_rate": 2.543174050360233e-05, + "loss": 3.6934, + "step": 9410000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5429676257339742e-05, + "loss": 3.7084, + "step": 9410500 + }, + { + "epoch": 2.47, + "learning_rate": 2.542761200814685e-05, + "loss": 3.7189, + "step": 9411000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5425547756037727e-05, + "loss": 3.7037, + "step": 9411500 + }, + { + "epoch": 2.47, + "learning_rate": 2.542348350102645e-05, + "loss": 3.7109, + "step": 9412000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5421419243127105e-05, + "loss": 3.7088, + "step": 9412500 + }, + { + "epoch": 2.47, + "learning_rate": 2.541935498235375e-05, + "loss": 3.7136, + "step": 9413000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5417290718720487e-05, + "loss": 3.7052, + "step": 9413500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5415226452241375e-05, + "loss": 3.7064, + "step": 9414000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5413162182930506e-05, + "loss": 3.7024, + "step": 9414500 + }, + { + "epoch": 2.47, + "learning_rate": 2.541109791080195e-05, + "loss": 3.6935, + "step": 9415000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5409033635869782e-05, + "loss": 3.7194, + "step": 9415500 + }, + { + "epoch": 2.48, + "learning_rate": 2.5406969358148093e-05, + "loss": 3.7062, + "step": 9416000 + }, + { + "epoch": 2.48, + "learning_rate": 2.5404905077650954e-05, + "loss": 3.7031, + "step": 9416500 + }, + { + "epoch": 2.48, + "learning_rate": 2.540284079439243e-05, + "loss": 3.7061, + "step": 9417000 + }, + { + "epoch": 2.48, + "learning_rate": 2.540077650838662e-05, + "loss": 3.6979, + "step": 9417500 + }, + { + "epoch": 2.48, + "learning_rate": 2.539871221964759e-05, + "loss": 3.7088, + "step": 9418000 + }, + { + "epoch": 2.48, + "learning_rate": 2.5396647928189415e-05, + "loss": 3.6954, + "step": 9418500 + }, + { + "epoch": 2.48, + "learning_rate": 2.5394583634026187e-05, + "loss": 3.7017, + "step": 9419000 + }, + { + "epoch": 2.48, + "learning_rate": 2.5392519337171978e-05, + "loss": 3.7028, + "step": 9419500 + }, + { + "epoch": 2.48, + "learning_rate": 2.5390455037640854e-05, + "loss": 3.6962, + "step": 9420000 + }, + { + "epoch": 2.48, + "learning_rate": 2.538839073544691e-05, + "loss": 3.7057, + "step": 9420500 + }, + { + "epoch": 2.48, + "learning_rate": 2.5386326430604214e-05, + "loss": 3.6997, + "step": 9421000 + }, + { + "epoch": 2.48, + "learning_rate": 2.5384262123126847e-05, + "loss": 3.695, + "step": 9421500 + }, + { + "epoch": 2.48, + "learning_rate": 2.5382197813028892e-05, + "loss": 3.7038, + "step": 9422000 + }, + { + "epoch": 2.48, + "learning_rate": 2.538013350032441e-05, + "loss": 3.706, + "step": 9422500 + }, + { + "epoch": 2.48, + "learning_rate": 2.5378069185027502e-05, + "loss": 3.7139, + "step": 9423000 + }, + { + "epoch": 2.48, + "learning_rate": 2.5376004867152236e-05, + "loss": 3.7052, + "step": 9423500 + }, + { + "epoch": 2.48, + "learning_rate": 2.5373940546712687e-05, + "loss": 3.6919, + "step": 9424000 + }, + { + "epoch": 2.48, + "learning_rate": 2.537187622372294e-05, + "loss": 3.6975, + "step": 9424500 + }, + { + "epoch": 2.48, + "learning_rate": 2.5369811898197067e-05, + "loss": 3.6903, + "step": 9425000 + }, + { + "epoch": 2.48, + "learning_rate": 2.5367747570149152e-05, + "loss": 3.685, + "step": 9425500 + }, + { + "epoch": 2.48, + "learning_rate": 2.5365683239593268e-05, + "loss": 3.7019, + "step": 9426000 + }, + { + "epoch": 2.48, + "learning_rate": 2.53636189065435e-05, + "loss": 3.7149, + "step": 9426500 + }, + { + "epoch": 2.48, + "learning_rate": 2.5361554571013918e-05, + "loss": 3.7002, + "step": 9427000 + }, + { + "epoch": 2.48, + "learning_rate": 2.5359490233018607e-05, + "loss": 3.7061, + "step": 9427500 + }, + { + "epoch": 2.48, + "learning_rate": 2.535742589257164e-05, + "loss": 3.6874, + "step": 9428000 + }, + { + "epoch": 2.48, + "learning_rate": 2.53553615496871e-05, + "loss": 3.7131, + "step": 9428500 + }, + { + "epoch": 2.48, + "learning_rate": 2.535329720437906e-05, + "loss": 3.7035, + "step": 9429000 + }, + { + "epoch": 2.48, + "learning_rate": 2.535123285666161e-05, + "loss": 3.6984, + "step": 9429500 + }, + { + "epoch": 2.48, + "learning_rate": 2.5349168506548817e-05, + "loss": 3.7162, + "step": 9430000 + }, + { + "epoch": 2.48, + "learning_rate": 2.5347104154054762e-05, + "loss": 3.7038, + "step": 9430500 + }, + { + "epoch": 2.48, + "learning_rate": 2.534503979919352e-05, + "loss": 3.7025, + "step": 9431000 + }, + { + "epoch": 2.48, + "learning_rate": 2.5342975441979182e-05, + "loss": 3.6859, + "step": 9431500 + } + ], + "logging_steps": 500, + "max_steps": 19022025, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "total_flos": 7.18259224341547e+19, + "train_batch_size": null, + "trial_name": null, + "trial_params": null +}