{ "best_metric": 5.630408763885498, "best_model_checkpoint": "Ghazal-L/checkpoint-7791000", "epoch": 2.4790997910777524, "eval_steps": 500, "global_step": 9431500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9999999978722176e-05, "loss": 8.6812, "step": 5000 }, { "epoch": 0.0, "learning_rate": 4.999999989221511e-05, "loss": 7.9277, "step": 10000 }, { "epoch": 0.0, "learning_rate": 4.9999999739178886e-05, "loss": 7.5087, "step": 15000 }, { "epoch": 0.0, "learning_rate": 4.999999951960287e-05, "loss": 7.2437, "step": 20000 }, { "epoch": 0.0, "learning_rate": 4.9999999233550944e-05, "loss": 7.0395, "step": 25000 }, { "epoch": 0.0, "learning_rate": 4.999999888083142e-05, "loss": 6.8838, "step": 30000 }, { "epoch": 0.0, "learning_rate": 4.9999998461726527e-05, "loss": 6.7713, "step": 35000 }, { "epoch": 0.0, "learning_rate": 4.999999797600462e-05, "loss": 6.6587, "step": 40000 }, { "epoch": 0.0, "learning_rate": 4.999999742374291e-05, "loss": 6.5715, "step": 45000 }, { "epoch": 0.0, "learning_rate": 4.999999680507188e-05, "loss": 6.4858, "step": 50000 }, { "epoch": 0.0, "learning_rate": 4.9999996119600115e-05, "loss": 6.4163, "step": 55000 }, { "epoch": 0.0, "learning_rate": 4.9999995367719034e-05, "loss": 6.3559, "step": 60000 }, { "epoch": 0.0, "learning_rate": 4.9999994549298155e-05, "loss": 6.2706, "step": 65000 }, { "epoch": 0.0, "learning_rate": 4.9999993664337485e-05, "loss": 6.2375, "step": 70000 }, { "epoch": 0.0, "learning_rate": 4.999999271303407e-05, "loss": 6.1624, "step": 75000 }, { "epoch": 0.0, "learning_rate": 4.999999169500715e-05, "loss": 6.132, "step": 80000 }, { "epoch": 0.0, "learning_rate": 4.999999061044045e-05, "loss": 6.0399, "step": 85000 }, { "epoch": 0.0, "learning_rate": 4.999998945933397e-05, "loss": 6.0234, "step": 90000 }, { "epoch": 0.0, "learning_rate": 4.999998824168772e-05, "loss": 5.9853, "step": 95000 }, { "epoch": 0.0, "learning_rate": 4.999998695776529e-05, "loss": 5.9428, "step": 100000 }, { "epoch": 0.0, "learning_rate": 4.99999856067759e-05, "loss": 5.927, "step": 105000 }, { "epoch": 0.0, "learning_rate": 4.999998418951035e-05, "loss": 5.8845, "step": 110000 }, { "epoch": 0.0, "learning_rate": 4.999998270600857e-05, "loss": 5.8365, "step": 115000 }, { "epoch": 0.0, "learning_rate": 4.999998115535997e-05, "loss": 5.8354, "step": 120000 }, { "epoch": 0.0, "learning_rate": 4.999997953847514e-05, "loss": 5.8035, "step": 125000 }, { "epoch": 0.0, "learning_rate": 4.999997785505056e-05, "loss": 5.772, "step": 130000 }, { "epoch": 0.0, "learning_rate": 4.999997610508625e-05, "loss": 5.7249, "step": 135000 }, { "epoch": 0.0, "learning_rate": 4.99999742885822e-05, "loss": 5.7336, "step": 140000 }, { "epoch": 0.0, "learning_rate": 4.99999724055384e-05, "loss": 5.6806, "step": 145000 }, { "epoch": 0.0, "learning_rate": 4.999997045595489e-05, "loss": 5.6693, "step": 150000 }, { "epoch": 0.0, "learning_rate": 4.999996844024168e-05, "loss": 5.6532, "step": 155000 }, { "epoch": 0.0, "learning_rate": 4.9999966357168684e-05, "loss": 5.6243, "step": 160000 }, { "epoch": 0.0, "learning_rate": 4.999996420840268e-05, "loss": 5.6043, "step": 165000 }, { "epoch": 0.0, "learning_rate": 4.999996199267362e-05, "loss": 5.5726, "step": 170000 }, { "epoch": 0.0, "learning_rate": 4.999995971040484e-05, "loss": 5.5617, "step": 175000 }, { "epoch": 0.0, "learning_rate": 4.9999957361119776e-05, "loss": 5.562, "step": 180000 }, { "epoch": 0.0, "learning_rate": 4.9999954945758306e-05, "loss": 5.5073, "step": 185000 }, { "epoch": 0.0, "learning_rate": 4.999995246385716e-05, "loss": 5.5126, "step": 190000 }, { "epoch": 0.0, "learning_rate": 4.999994991593288e-05, "loss": 5.5045, "step": 195000 }, { "epoch": 0.0, "learning_rate": 4.999994730043585e-05, "loss": 5.4829, "step": 200000 }, { "epoch": 0.0, "learning_rate": 4.999994461891568e-05, "loss": 5.4754, "step": 205000 }, { "epoch": 0.0, "learning_rate": 4.999994187085588e-05, "loss": 5.4791, "step": 210000 }, { "epoch": 0.0, "learning_rate": 4.999993905625642e-05, "loss": 5.4468, "step": 215000 }, { "epoch": 0.0, "learning_rate": 4.999993617511732e-05, "loss": 5.4435, "step": 220000 }, { "epoch": 0.0, "learning_rate": 4.999993322743859e-05, "loss": 5.4306, "step": 225000 }, { "epoch": 0.0, "learning_rate": 4.999993021382997e-05, "loss": 5.4099, "step": 230000 }, { "epoch": 0.0, "learning_rate": 4.999992713246226e-05, "loss": 5.4021, "step": 235000 }, { "epoch": 0.0, "learning_rate": 4.999992398516467e-05, "loss": 5.3877, "step": 240000 }, { "epoch": 0.0, "learning_rate": 4.999992077197718e-05, "loss": 5.3769, "step": 245000 }, { "epoch": 0.0, "learning_rate": 4.999991749161371e-05, "loss": 5.3719, "step": 250000 }, { "epoch": 0.0, "learning_rate": 4.9999914144710673e-05, "loss": 5.3718, "step": 255000 }, { "epoch": 0.0, "learning_rate": 4.9999910730578436e-05, "loss": 5.3472, "step": 260000 }, { "epoch": 0.0, "learning_rate": 4.999990725058294e-05, "loss": 5.3371, "step": 265000 }, { "epoch": 0.0, "learning_rate": 4.999990370476414e-05, "loss": 5.3296, "step": 270000 }, { "epoch": 0.0, "learning_rate": 4.999990009097329e-05, "loss": 5.3426, "step": 275000 }, { "epoch": 0.0, "learning_rate": 4.999989641210204e-05, "loss": 5.3135, "step": 280000 }, { "epoch": 0.0, "learning_rate": 4.999989266596169e-05, "loss": 5.3124, "step": 285000 }, { "epoch": 0.0, "learning_rate": 4.9999888852512325e-05, "loss": 5.3119, "step": 290000 }, { "epoch": 0.0, "learning_rate": 4.999988497327964e-05, "loss": 5.3043, "step": 295000 }, { "epoch": 0.0, "learning_rate": 4.9999881027507466e-05, "loss": 5.3108, "step": 300000 }, { "epoch": 0.01, "learning_rate": 4.99998770151958e-05, "loss": 5.2972, "step": 305000 }, { "epoch": 0.01, "learning_rate": 4.999987293552191e-05, "loss": 5.265, "step": 310000 }, { "epoch": 0.01, "learning_rate": 4.9999868790954055e-05, "loss": 5.2639, "step": 315000 }, { "epoch": 0.01, "learning_rate": 4.999986457817462e-05, "loss": 5.2773, "step": 320000 }, { "epoch": 0.01, "learning_rate": 4.99998602996918e-05, "loss": 5.2507, "step": 325000 }, { "epoch": 0.01, "learning_rate": 4.999985595466956e-05, "loss": 5.2486, "step": 330000 }, { "epoch": 0.01, "learning_rate": 4.99998515431079e-05, "loss": 5.2741, "step": 335000 }, { "epoch": 0.01, "learning_rate": 4.999984706500683e-05, "loss": 5.2326, "step": 340000 }, { "epoch": 0.01, "learning_rate": 4.999984251945041e-05, "loss": 5.2543, "step": 345000 }, { "epoch": 0.01, "learning_rate": 4.9999837909186514e-05, "loss": 5.2391, "step": 350000 }, { "epoch": 0.01, "learning_rate": 4.999983323052472e-05, "loss": 5.2242, "step": 355000 }, { "epoch": 0.01, "learning_rate": 4.999982848625283e-05, "loss": 5.2275, "step": 360000 }, { "epoch": 0.01, "learning_rate": 4.9999823675441594e-05, "loss": 5.2365, "step": 365000 }, { "epoch": 0.01, "learning_rate": 4.99998187971085e-05, "loss": 5.2114, "step": 370000 }, { "epoch": 0.01, "learning_rate": 4.999981385420113e-05, "loss": 5.2166, "step": 375000 }, { "epoch": 0.01, "learning_rate": 4.9999808843771935e-05, "loss": 5.2035, "step": 380000 }, { "epoch": 0.01, "learning_rate": 4.999980376578098e-05, "loss": 5.1739, "step": 385000 }, { "epoch": 0.01, "learning_rate": 4.999979862329567e-05, "loss": 5.2064, "step": 390000 }, { "epoch": 0.01, "learning_rate": 4.999979341219955e-05, "loss": 5.2046, "step": 395000 }, { "epoch": 0.01, "learning_rate": 4.999978813559995e-05, "loss": 5.1882, "step": 400000 }, { "epoch": 0.01, "learning_rate": 4.99997827924611e-05, "loss": 5.1813, "step": 405000 }, { "epoch": 0.01, "learning_rate": 4.999977738387206e-05, "loss": 5.1692, "step": 410000 }, { "epoch": 0.01, "learning_rate": 4.999977190656576e-05, "loss": 5.1503, "step": 415000 }, { "epoch": 0.01, "learning_rate": 4.9999766363809286e-05, "loss": 5.1606, "step": 420000 }, { "epoch": 0.01, "learning_rate": 4.999976075338466e-05, "loss": 5.1564, "step": 425000 }, { "epoch": 0.01, "learning_rate": 4.9999755077536534e-05, "loss": 5.1689, "step": 430000 }, { "epoch": 0.01, "learning_rate": 4.999974933514925e-05, "loss": 5.157, "step": 435000 }, { "epoch": 0.01, "learning_rate": 4.999974352622283e-05, "loss": 5.1639, "step": 440000 }, { "epoch": 0.01, "learning_rate": 4.99997376507573e-05, "loss": 5.1809, "step": 445000 }, { "epoch": 0.01, "learning_rate": 4.999973170875266e-05, "loss": 5.1484, "step": 450000 }, { "epoch": 0.01, "learning_rate": 4.9999725700208924e-05, "loss": 5.1759, "step": 455000 }, { "epoch": 0.01, "learning_rate": 4.9999719623903964e-05, "loss": 5.1526, "step": 460000 }, { "epoch": 0.01, "learning_rate": 4.9999713483504266e-05, "loss": 5.1495, "step": 465000 }, { "epoch": 0.01, "learning_rate": 4.999970727659215e-05, "loss": 5.1589, "step": 470000 }, { "epoch": 0.01, "learning_rate": 4.999970099938135e-05, "loss": 5.1373, "step": 475000 }, { "epoch": 0.01, "learning_rate": 4.999969466067993e-05, "loss": 5.155, "step": 480000 }, { "epoch": 0.01, "learning_rate": 4.9999688251626596e-05, "loss": 5.1218, "step": 485000 }, { "epoch": 0.01, "learning_rate": 4.999968177861174e-05, "loss": 5.1333, "step": 490000 }, { "epoch": 0.01, "learning_rate": 4.999967523776921e-05, "loss": 5.1422, "step": 495000 }, { "epoch": 0.01, "learning_rate": 4.999966862773041e-05, "loss": 5.1352, "step": 500000 }, { "epoch": 0.01, "learning_rate": 4.999966195378339e-05, "loss": 5.1379, "step": 505000 }, { "epoch": 0.01, "learning_rate": 4.9999655214652764e-05, "loss": 5.1377, "step": 510000 }, { "epoch": 0.01, "learning_rate": 4.999964840627267e-05, "loss": 5.1024, "step": 515000 }, { "epoch": 0.01, "learning_rate": 4.999964153409091e-05, "loss": 5.1358, "step": 520000 }, { "epoch": 0.01, "learning_rate": 4.999963459400171e-05, "loss": 5.1273, "step": 525000 }, { "epoch": 0.01, "learning_rate": 4.999962758596512e-05, "loss": 5.1146, "step": 530000 }, { "epoch": 0.01, "learning_rate": 4.9999620514206824e-05, "loss": 5.119, "step": 535000 }, { "epoch": 0.01, "learning_rate": 4.999961337306601e-05, "loss": 5.109, "step": 540000 }, { "epoch": 0.01, "learning_rate": 4.9999606168256764e-05, "loss": 5.098, "step": 545000 }, { "epoch": 0.01, "learning_rate": 4.99995988954736e-05, "loss": 5.1051, "step": 550000 }, { "epoch": 0.01, "learning_rate": 4.999959155615169e-05, "loss": 5.1003, "step": 555000 }, { "epoch": 0.01, "learning_rate": 4.999958415029108e-05, "loss": 5.0914, "step": 560000 }, { "epoch": 0.01, "learning_rate": 4.9999576677891766e-05, "loss": 5.0988, "step": 565000 }, { "epoch": 0.01, "learning_rate": 4.999956913743872e-05, "loss": 5.1097, "step": 570000 }, { "epoch": 0.01, "learning_rate": 4.999956153194876e-05, "loss": 5.0982, "step": 575000 }, { "epoch": 0.01, "learning_rate": 4.999955385992017e-05, "loss": 5.0839, "step": 580000 }, { "epoch": 0.01, "learning_rate": 4.999954612135296e-05, "loss": 5.0799, "step": 585000 }, { "epoch": 0.01, "learning_rate": 4.999953831781546e-05, "loss": 5.0851, "step": 590000 }, { "epoch": 0.01, "learning_rate": 4.999953044460278e-05, "loss": 5.0826, "step": 595000 }, { "epoch": 0.01, "learning_rate": 4.999952250641985e-05, "loss": 5.0825, "step": 600000 }, { "epoch": 0.01, "learning_rate": 4.999951450330663e-05, "loss": 5.0746, "step": 605000 }, { "epoch": 0.01, "learning_rate": 4.999950643043841e-05, "loss": 5.0877, "step": 610000 }, { "epoch": 0.01, "learning_rate": 4.999949829263996e-05, "loss": 5.0676, "step": 615000 }, { "epoch": 0.01, "learning_rate": 4.999949008830302e-05, "loss": 5.0661, "step": 620000 }, { "epoch": 0.01, "learning_rate": 4.999948181742765e-05, "loss": 5.0697, "step": 625000 }, { "epoch": 0.01, "learning_rate": 4.9999473478339056e-05, "loss": 5.1043, "step": 630000 }, { "epoch": 0.01, "learning_rate": 4.9999465074373544e-05, "loss": 5.0648, "step": 635000 }, { "epoch": 0.01, "learning_rate": 4.999945660216822e-05, "loss": 5.0778, "step": 640000 }, { "epoch": 0.01, "learning_rate": 4.99994480633979e-05, "loss": 5.0595, "step": 645000 }, { "epoch": 0.01, "learning_rate": 4.9999439459790695e-05, "loss": 5.1031, "step": 650000 }, { "epoch": 0.01, "learning_rate": 4.9999430791386565e-05, "loss": 5.067, "step": 655000 }, { "epoch": 0.01, "learning_rate": 4.999942205471607e-05, "loss": 5.0772, "step": 660000 }, { "epoch": 0.01, "learning_rate": 4.999941325150732e-05, "loss": 5.0867, "step": 665000 }, { "epoch": 0.01, "learning_rate": 4.9999404379979006e-05, "loss": 5.0644, "step": 670000 }, { "epoch": 0.01, "learning_rate": 4.999939544368049e-05, "loss": 5.0614, "step": 675000 }, { "epoch": 0.01, "learning_rate": 4.999938644084376e-05, "loss": 5.0724, "step": 680000 }, { "epoch": 0.01, "learning_rate": 4.9999377369647616e-05, "loss": 5.0891, "step": 685000 }, { "epoch": 0.01, "learning_rate": 4.9999368235555845e-05, "loss": 5.0768, "step": 690000 }, { "epoch": 0.01, "learning_rate": 4.999935903125681e-05, "loss": 5.0706, "step": 695000 }, { "epoch": 0.01, "learning_rate": 4.9999349764115444e-05, "loss": 5.0672, "step": 700000 }, { "epoch": 0.01, "learning_rate": 4.9999340426713607e-05, "loss": 5.0637, "step": 705000 }, { "epoch": 0.01, "learning_rate": 4.999933102652275e-05, "loss": 5.07, "step": 710000 }, { "epoch": 0.01, "learning_rate": 4.999932155601822e-05, "loss": 5.0515, "step": 715000 }, { "epoch": 0.01, "learning_rate": 4.999931202277797e-05, "loss": 5.0552, "step": 720000 }, { "epoch": 0.01, "learning_rate": 4.999930241917085e-05, "loss": 5.0438, "step": 725000 }, { "epoch": 0.01, "learning_rate": 4.999929275094023e-05, "loss": 5.0637, "step": 730000 }, { "epoch": 0.01, "learning_rate": 4.9999283016171696e-05, "loss": 5.079, "step": 735000 }, { "epoch": 0.01, "learning_rate": 4.999927321289756e-05, "loss": 5.0587, "step": 740000 }, { "epoch": 0.01, "learning_rate": 4.999926334305893e-05, "loss": 5.0578, "step": 745000 }, { "epoch": 0.01, "learning_rate": 4.999925340865016e-05, "loss": 5.0345, "step": 750000 }, { "epoch": 0.01, "learning_rate": 4.999924340569593e-05, "loss": 5.042, "step": 755000 }, { "epoch": 0.01, "learning_rate": 4.999923333819825e-05, "loss": 5.0546, "step": 760000 }, { "epoch": 0.01, "learning_rate": 4.99992232041628e-05, "loss": 5.0553, "step": 765000 }, { "epoch": 0.01, "learning_rate": 4.9999213001542037e-05, "loss": 5.0472, "step": 770000 }, { "epoch": 0.01, "learning_rate": 4.999920273853963e-05, "loss": 5.0469, "step": 775000 }, { "epoch": 0.01, "learning_rate": 4.9999192400755945e-05, "loss": 5.0548, "step": 780000 }, { "epoch": 0.01, "learning_rate": 4.999918200055641e-05, "loss": 5.0352, "step": 785000 }, { "epoch": 0.01, "learning_rate": 4.999917153592009e-05, "loss": 5.0211, "step": 790000 }, { "epoch": 0.01, "learning_rate": 4.9999161000544494e-05, "loss": 5.046, "step": 795000 }, { "epoch": 0.01, "learning_rate": 4.9999150400732165e-05, "loss": 5.0437, "step": 800000 }, { "epoch": 0.01, "learning_rate": 4.999913973652307e-05, "loss": 5.0523, "step": 805000 }, { "epoch": 0.01, "learning_rate": 4.999912900149492e-05, "loss": 5.0482, "step": 810000 }, { "epoch": 0.01, "learning_rate": 4.999911820207005e-05, "loss": 5.0536, "step": 815000 }, { "epoch": 0.01, "learning_rate": 4.999910733392701e-05, "loss": 5.0585, "step": 820000 }, { "epoch": 0.01, "learning_rate": 4.9999096403607983e-05, "loss": 5.049, "step": 825000 }, { "epoch": 0.01, "learning_rate": 4.999908540457083e-05, "loss": 5.0388, "step": 830000 }, { "epoch": 0.01, "learning_rate": 4.999907433677567e-05, "loss": 5.0137, "step": 835000 }, { "epoch": 0.01, "learning_rate": 4.999906320688447e-05, "loss": 5.0421, "step": 840000 }, { "epoch": 0.01, "learning_rate": 4.999905200823532e-05, "loss": 5.0273, "step": 845000 }, { "epoch": 0.01, "learning_rate": 4.999904074078827e-05, "loss": 5.0336, "step": 850000 }, { "epoch": 0.01, "learning_rate": 4.9999029409051265e-05, "loss": 5.044, "step": 855000 }, { "epoch": 0.01, "learning_rate": 4.999901801077705e-05, "loss": 5.0324, "step": 860000 }, { "epoch": 0.01, "learning_rate": 4.999900654596564e-05, "loss": 5.024, "step": 865000 }, { "epoch": 0.01, "learning_rate": 4.9998995016930915e-05, "loss": 5.0345, "step": 870000 }, { "epoch": 0.01, "learning_rate": 4.9998983416731374e-05, "loss": 5.0391, "step": 875000 }, { "epoch": 0.01, "learning_rate": 4.999897175230858e-05, "loss": 5.0455, "step": 880000 }, { "epoch": 0.01, "learning_rate": 4.9998960021348724e-05, "loss": 5.0014, "step": 885000 }, { "epoch": 0.01, "learning_rate": 4.999894822621893e-05, "loss": 5.0323, "step": 890000 }, { "epoch": 0.01, "learning_rate": 4.9998936359817934e-05, "loss": 5.0319, "step": 895000 }, { "epoch": 0.01, "learning_rate": 4.999892442685334e-05, "loss": 5.02, "step": 900000 }, { "epoch": 0.01, "learning_rate": 4.9998912432139255e-05, "loss": 4.9959, "step": 905000 }, { "epoch": 0.01, "learning_rate": 4.999890036607419e-05, "loss": 5.0338, "step": 910000 }, { "epoch": 0.02, "learning_rate": 4.999888823587929e-05, "loss": 5.0296, "step": 915000 }, { "epoch": 0.02, "learning_rate": 4.999887603914754e-05, "loss": 5.0287, "step": 920000 }, { "epoch": 0.02, "learning_rate": 4.999886377587899e-05, "loss": 5.0292, "step": 925000 }, { "epoch": 0.02, "learning_rate": 4.9998851443600044e-05, "loss": 5.0319, "step": 930000 }, { "epoch": 0.02, "learning_rate": 4.999883904724465e-05, "loss": 5.0326, "step": 935000 }, { "epoch": 0.02, "learning_rate": 4.9998826584352544e-05, "loss": 5.0304, "step": 940000 }, { "epoch": 0.02, "learning_rate": 4.999881405241021e-05, "loss": 5.0287, "step": 945000 }, { "epoch": 0.02, "learning_rate": 4.999880145895832e-05, "loss": 5.0116, "step": 950000 }, { "epoch": 0.02, "learning_rate": 4.999878879391611e-05, "loss": 5.012, "step": 955000 }, { "epoch": 0.02, "learning_rate": 4.999877606486417e-05, "loss": 5.0323, "step": 960000 }, { "epoch": 0.02, "learning_rate": 4.9998763269275696e-05, "loss": 5.005, "step": 965000 }, { "epoch": 0.02, "learning_rate": 4.9998750407150696e-05, "loss": 5.0243, "step": 970000 }, { "epoch": 0.02, "learning_rate": 4.999873747848923e-05, "loss": 5.0379, "step": 975000 }, { "epoch": 0.02, "learning_rate": 4.999872448589805e-05, "loss": 5.0232, "step": 980000 }, { "epoch": 0.02, "learning_rate": 4.9998711421557005e-05, "loss": 5.0341, "step": 985000 }, { "epoch": 0.02, "learning_rate": 4.999869829328632e-05, "loss": 5.0154, "step": 990000 }, { "epoch": 0.02, "learning_rate": 4.999868509847928e-05, "loss": 5.0392, "step": 995000 }, { "epoch": 0.02, "learning_rate": 4.999867183447596e-05, "loss": 5.0064, "step": 1000000 }, { "epoch": 0.02, "eval_loss": 5.773796558380127, "eval_runtime": 79570.5409, "eval_samples_per_second": 139.284, "eval_steps_per_second": 27.857, "step": 1000000 }, { "epoch": 0.02, "learning_rate": 4.9998658506583055e-05, "loss": 5.0198, "step": 1005000 }, { "epoch": 0.02, "learning_rate": 4.9998645114840526e-05, "loss": 5.0128, "step": 1010000 }, { "epoch": 0.02, "learning_rate": 4.999863165118858e-05, "loss": 5.008, "step": 1015000 }, { "epoch": 0.02, "learning_rate": 4.999861812097383e-05, "loss": 5.0197, "step": 1020000 }, { "epoch": 0.02, "learning_rate": 4.9998604526922896e-05, "loss": 5.0019, "step": 1025000 }, { "epoch": 0.02, "learning_rate": 4.999859086633587e-05, "loss": 5.0375, "step": 1030000 }, { "epoch": 0.02, "learning_rate": 4.9998577139212795e-05, "loss": 4.9895, "step": 1035000 }, { "epoch": 0.02, "learning_rate": 4.999856334555371e-05, "loss": 4.9852, "step": 1040000 }, { "epoch": 0.02, "learning_rate": 4.9998549485358634e-05, "loss": 4.9984, "step": 1045000 }, { "epoch": 0.02, "learning_rate": 4.9998535558627615e-05, "loss": 5.0064, "step": 1050000 }, { "epoch": 0.02, "learning_rate": 4.9998521568167126e-05, "loss": 5.008, "step": 1055000 }, { "epoch": 0.02, "learning_rate": 4.9998507505557915e-05, "loss": 4.9982, "step": 1060000 }, { "epoch": 0.02, "learning_rate": 4.99984933792193e-05, "loss": 5.0093, "step": 1065000 }, { "epoch": 0.02, "learning_rate": 4.999847918349852e-05, "loss": 5.0144, "step": 1070000 }, { "epoch": 0.02, "learning_rate": 4.999846492693473e-05, "loss": 5.0098, "step": 1075000 }, { "epoch": 0.02, "learning_rate": 4.999845060098885e-05, "loss": 5.0126, "step": 1080000 }, { "epoch": 0.02, "learning_rate": 4.9998436208507295e-05, "loss": 4.993, "step": 1085000 }, { "epoch": 0.02, "learning_rate": 4.999842175238971e-05, "loss": 5.008, "step": 1090000 }, { "epoch": 0.02, "learning_rate": 4.99984072239373e-05, "loss": 5.0045, "step": 1095000 }, { "epoch": 0.02, "learning_rate": 4.9998392631848933e-05, "loss": 5.0101, "step": 1100000 }, { "epoch": 0.02, "learning_rate": 4.999837797322505e-05, "loss": 4.991, "step": 1105000 }, { "epoch": 0.02, "learning_rate": 4.999836324511281e-05, "loss": 4.994, "step": 1110000 }, { "epoch": 0.02, "learning_rate": 4.999834845340468e-05, "loss": 5.0062, "step": 1115000 }, { "epoch": 0.02, "learning_rate": 4.999833359218165e-05, "loss": 4.9875, "step": 1120000 }, { "epoch": 0.02, "learning_rate": 4.999831866738943e-05, "loss": 5.0169, "step": 1125000 }, { "epoch": 0.02, "learning_rate": 4.9998303679068e-05, "loss": 5.0026, "step": 1130000 }, { "epoch": 0.02, "learning_rate": 4.9998288618199054e-05, "loss": 5.0003, "step": 1135000 }, { "epoch": 0.02, "learning_rate": 4.999827349683371e-05, "loss": 5.0057, "step": 1140000 }, { "epoch": 0.02, "learning_rate": 4.9998258305913745e-05, "loss": 5.013, "step": 1145000 }, { "epoch": 0.02, "learning_rate": 4.99982430484586e-05, "loss": 4.9979, "step": 1150000 }, { "epoch": 0.02, "learning_rate": 4.9998227724468336e-05, "loss": 5.0058, "step": 1155000 }, { "epoch": 0.02, "learning_rate": 4.999821233394298e-05, "loss": 5.0132, "step": 1160000 }, { "epoch": 0.02, "learning_rate": 4.9998196876882586e-05, "loss": 5.0015, "step": 1165000 }, { "epoch": 0.02, "learning_rate": 4.999818135328719e-05, "loss": 4.9969, "step": 1170000 }, { "epoch": 0.02, "learning_rate": 4.999816576003089e-05, "loss": 4.984, "step": 1175000 }, { "epoch": 0.02, "learning_rate": 4.99981501033523e-05, "loss": 4.9961, "step": 1180000 }, { "epoch": 0.02, "learning_rate": 4.999813438013882e-05, "loss": 5.0074, "step": 1185000 }, { "epoch": 0.02, "learning_rate": 4.999811859039052e-05, "loss": 5.0037, "step": 1190000 }, { "epoch": 0.02, "learning_rate": 4.999810273410741e-05, "loss": 4.9901, "step": 1195000 }, { "epoch": 0.02, "learning_rate": 4.9998086811289546e-05, "loss": 4.9886, "step": 1200000 }, { "epoch": 0.02, "learning_rate": 4.999807081873117e-05, "loss": 4.9975, "step": 1205000 }, { "epoch": 0.02, "learning_rate": 4.999805476604974e-05, "loss": 4.9744, "step": 1210000 }, { "epoch": 0.02, "learning_rate": 4.9998038640395454e-05, "loss": 4.9892, "step": 1215000 }, { "epoch": 0.02, "learning_rate": 4.99980224514257e-05, "loss": 4.9925, "step": 1220000 }, { "epoch": 0.02, "learning_rate": 4.99980061959214e-05, "loss": 4.9752, "step": 1225000 }, { "epoch": 0.02, "learning_rate": 4.999798987388261e-05, "loss": 5.0102, "step": 1230000 }, { "epoch": 0.02, "learning_rate": 4.9997973482023686e-05, "loss": 4.9919, "step": 1235000 }, { "epoch": 0.02, "learning_rate": 4.999795702690272e-05, "loss": 4.9995, "step": 1240000 }, { "epoch": 0.02, "learning_rate": 4.999794050524739e-05, "loss": 4.979, "step": 1245000 }, { "epoch": 0.02, "learning_rate": 4.999792391373212e-05, "loss": 5.0021, "step": 1250000 }, { "epoch": 0.02, "learning_rate": 4.999790726233381e-05, "loss": 4.9852, "step": 1255000 }, { "epoch": 0.02, "learning_rate": 4.999789053772341e-05, "loss": 4.9791, "step": 1260000 }, { "epoch": 0.02, "learning_rate": 4.9997873749917744e-05, "loss": 4.9933, "step": 1265000 }, { "epoch": 0.02, "learning_rate": 4.999785689557794e-05, "loss": 5.0006, "step": 1270000 }, { "epoch": 0.02, "learning_rate": 4.999783997470404e-05, "loss": 4.989, "step": 1275000 }, { "epoch": 0.02, "learning_rate": 4.9997822990701585e-05, "loss": 5.0154, "step": 1280000 }, { "epoch": 0.02, "learning_rate": 4.9997805933354134e-05, "loss": 4.9763, "step": 1285000 }, { "epoch": 0.02, "learning_rate": 4.9997788809446075e-05, "loss": 4.9917, "step": 1290000 }, { "epoch": 0.02, "learning_rate": 4.999777162242293e-05, "loss": 4.9771, "step": 1295000 }, { "epoch": 0.02, "learning_rate": 4.99977543688659e-05, "loss": 4.9696, "step": 1300000 }, { "epoch": 0.02, "learning_rate": 4.9997737045302987e-05, "loss": 4.9729, "step": 1305000 }, { "epoch": 0.02, "learning_rate": 4.9997719662150425e-05, "loss": 4.9715, "step": 1310000 }, { "epoch": 0.02, "learning_rate": 4.999770220899206e-05, "loss": 4.9749, "step": 1315000 }, { "epoch": 0.02, "learning_rate": 4.9997684689300004e-05, "loss": 4.9947, "step": 1320000 }, { "epoch": 0.02, "learning_rate": 4.999766710307432e-05, "loss": 4.9725, "step": 1325000 }, { "epoch": 0.02, "learning_rate": 4.999764945031503e-05, "loss": 4.9766, "step": 1330000 }, { "epoch": 0.02, "learning_rate": 4.9997631731022206e-05, "loss": 4.9832, "step": 1335000 }, { "epoch": 0.02, "learning_rate": 4.999761394519587e-05, "loss": 4.9818, "step": 1340000 }, { "epoch": 0.02, "learning_rate": 4.99975960928361e-05, "loss": 4.9924, "step": 1345000 }, { "epoch": 0.02, "learning_rate": 4.9997578177534776e-05, "loss": 5.0034, "step": 1350000 }, { "epoch": 0.02, "learning_rate": 4.9997560188516365e-05, "loss": 4.9997, "step": 1355000 }, { "epoch": 0.02, "learning_rate": 4.9997542136556516e-05, "loss": 5.0057, "step": 1360000 }, { "epoch": 0.02, "learning_rate": 4.99975240144316e-05, "loss": 4.9847, "step": 1365000 }, { "epoch": 0.02, "learning_rate": 4.999750583303709e-05, "loss": 4.9622, "step": 1370000 }, { "epoch": 0.02, "learning_rate": 4.9997487581477596e-05, "loss": 4.9807, "step": 1375000 }, { "epoch": 0.02, "learning_rate": 4.999746925971325e-05, "loss": 4.9725, "step": 1380000 }, { "epoch": 0.02, "learning_rate": 4.999745087507427e-05, "loss": 4.9928, "step": 1385000 }, { "epoch": 0.02, "learning_rate": 4.999743242390227e-05, "loss": 4.9988, "step": 1390000 }, { "epoch": 0.02, "learning_rate": 4.99974139061973e-05, "loss": 4.9705, "step": 1395000 }, { "epoch": 0.02, "learning_rate": 4.9997395321959415e-05, "loss": 4.9875, "step": 1400000 }, { "epoch": 0.02, "learning_rate": 4.999737667118866e-05, "loss": 4.9837, "step": 1405000 }, { "epoch": 0.02, "learning_rate": 4.999735795388508e-05, "loss": 4.9921, "step": 1410000 }, { "epoch": 0.02, "learning_rate": 4.999733917004874e-05, "loss": 5.0016, "step": 1415000 }, { "epoch": 0.02, "learning_rate": 4.9997320323457906e-05, "loss": 4.9757, "step": 1420000 }, { "epoch": 0.02, "learning_rate": 4.999730140656948e-05, "loss": 4.9827, "step": 1425000 }, { "epoch": 0.02, "learning_rate": 4.9997282426953294e-05, "loss": 4.9861, "step": 1430000 }, { "epoch": 0.02, "learning_rate": 4.999726337319482e-05, "loss": 4.9724, "step": 1435000 }, { "epoch": 0.02, "learning_rate": 4.9997244256708686e-05, "loss": 4.9922, "step": 1440000 }, { "epoch": 0.02, "learning_rate": 4.999722507369008e-05, "loss": 4.9678, "step": 1445000 }, { "epoch": 0.02, "learning_rate": 4.9997205824139066e-05, "loss": 4.9937, "step": 1450000 }, { "epoch": 0.02, "learning_rate": 4.999718650805568e-05, "loss": 4.986, "step": 1455000 }, { "epoch": 0.02, "learning_rate": 4.999716712155525e-05, "loss": 4.9758, "step": 1460000 }, { "epoch": 0.02, "learning_rate": 4.9997147672393976e-05, "loss": 4.9773, "step": 1465000 }, { "epoch": 0.02, "learning_rate": 4.999712816061185e-05, "loss": 4.9762, "step": 1470000 }, { "epoch": 0.02, "learning_rate": 4.999710857447485e-05, "loss": 4.9724, "step": 1475000 }, { "epoch": 0.02, "learning_rate": 4.999708892177912e-05, "loss": 4.9772, "step": 1480000 }, { "epoch": 0.02, "learning_rate": 4.999706920647601e-05, "loss": 4.9673, "step": 1485000 }, { "epoch": 0.02, "learning_rate": 4.9997049424640896e-05, "loss": 4.9662, "step": 1490000 }, { "epoch": 0.02, "learning_rate": 4.9997029580251743e-05, "loss": 4.9891, "step": 1495000 }, { "epoch": 0.02, "learning_rate": 4.999700966137486e-05, "loss": 4.9728, "step": 1500000 }, { "epoch": 0.02, "learning_rate": 4.9996989675939515e-05, "loss": 5.0021, "step": 1505000 }, { "epoch": 0.02, "learning_rate": 4.999696963198146e-05, "loss": 4.9789, "step": 1510000 }, { "epoch": 0.02, "learning_rate": 4.999694951748713e-05, "loss": 4.9816, "step": 1515000 }, { "epoch": 0.02, "learning_rate": 4.999692933241664e-05, "loss": 4.9701, "step": 1520000 }, { "epoch": 0.03, "learning_rate": 4.999690908484568e-05, "loss": 4.9786, "step": 1525000 }, { "epoch": 0.03, "learning_rate": 4.999688876667205e-05, "loss": 4.9885, "step": 1530000 }, { "epoch": 0.03, "learning_rate": 4.999686839010909e-05, "loss": 4.9945, "step": 1535000 }, { "epoch": 0.03, "learning_rate": 4.999684793884584e-05, "loss": 4.9886, "step": 1540000 }, { "epoch": 0.03, "learning_rate": 4.99968274251356e-05, "loss": 4.9804, "step": 1545000 }, { "epoch": 0.03, "learning_rate": 4.9996806844894e-05, "loss": 4.9786, "step": 1550000 }, { "epoch": 0.03, "learning_rate": 4.9996786198121085e-05, "loss": 4.9756, "step": 1555000 }, { "epoch": 0.03, "learning_rate": 4.9996765484816925e-05, "loss": 4.9923, "step": 1560000 }, { "epoch": 0.03, "learning_rate": 4.999674470914586e-05, "loss": 4.9675, "step": 1565000 }, { "epoch": 0.03, "learning_rate": 4.999672385861509e-05, "loss": 4.9914, "step": 1570000 }, { "epoch": 0.03, "learning_rate": 4.9996702949908427e-05, "loss": 4.9874, "step": 1575000 }, { "epoch": 0.03, "learning_rate": 4.9996681966288915e-05, "loss": 4.9805, "step": 1580000 }, { "epoch": 0.03, "learning_rate": 4.9996660924546876e-05, "loss": 4.9592, "step": 1585000 }, { "epoch": 0.03, "learning_rate": 4.99966398120697e-05, "loss": 4.9766, "step": 1590000 }, { "epoch": 0.03, "learning_rate": 4.999661862881752e-05, "loss": 4.9733, "step": 1595000 }, { "epoch": 0.03, "learning_rate": 4.999659738326537e-05, "loss": 4.9693, "step": 1600000 }, { "epoch": 0.03, "learning_rate": 4.999657607545326e-05, "loss": 4.9853, "step": 1605000 }, { "epoch": 0.03, "learning_rate": 4.9996554696852985e-05, "loss": 4.958, "step": 1610000 }, { "epoch": 0.03, "learning_rate": 4.999653324742468e-05, "loss": 4.9694, "step": 1615000 }, { "epoch": 0.03, "learning_rate": 4.9996511740060606e-05, "loss": 4.9776, "step": 1620000 }, { "epoch": 0.03, "learning_rate": 4.999649016186861e-05, "loss": 4.9912, "step": 1625000 }, { "epoch": 0.03, "learning_rate": 4.999646852148348e-05, "loss": 4.9824, "step": 1630000 }, { "epoch": 0.03, "learning_rate": 4.9996446805893296e-05, "loss": 4.9809, "step": 1635000 }, { "epoch": 0.03, "learning_rate": 4.999642502374613e-05, "loss": 4.9915, "step": 1640000 }, { "epoch": 0.03, "learning_rate": 4.999640318379659e-05, "loss": 4.9845, "step": 1645000 }, { "epoch": 0.03, "learning_rate": 4.9996381272952867e-05, "loss": 4.9653, "step": 1650000 }, { "epoch": 0.03, "learning_rate": 4.999635929557897e-05, "loss": 4.9847, "step": 1655000 }, { "epoch": 0.03, "learning_rate": 4.999633725167497e-05, "loss": 4.9643, "step": 1660000 }, { "epoch": 0.03, "learning_rate": 4.99963151368104e-05, "loss": 4.9823, "step": 1665000 }, { "epoch": 0.03, "learning_rate": 4.9996292959833035e-05, "loss": 4.9676, "step": 1670000 }, { "epoch": 0.03, "learning_rate": 4.9996270716325734e-05, "loss": 4.969, "step": 1675000 }, { "epoch": 0.03, "learning_rate": 4.9996248406288554e-05, "loss": 4.9867, "step": 1680000 }, { "epoch": 0.03, "learning_rate": 4.999622602972156e-05, "loss": 4.964, "step": 1685000 }, { "epoch": 0.03, "learning_rate": 4.9996203586624806e-05, "loss": 4.9894, "step": 1690000 }, { "epoch": 0.03, "learning_rate": 4.999618108150874e-05, "loss": 4.9831, "step": 1695000 }, { "epoch": 0.03, "learning_rate": 4.9996158505365965e-05, "loss": 4.9843, "step": 1700000 }, { "epoch": 0.03, "learning_rate": 4.999613586269361e-05, "loss": 4.9802, "step": 1705000 }, { "epoch": 0.03, "learning_rate": 4.999611315349173e-05, "loss": 4.9813, "step": 1710000 }, { "epoch": 0.03, "learning_rate": 4.9996090377760404e-05, "loss": 4.9926, "step": 1715000 }, { "epoch": 0.03, "learning_rate": 4.999606753092274e-05, "loss": 4.9658, "step": 1720000 }, { "epoch": 0.03, "learning_rate": 4.999604462211936e-05, "loss": 4.9687, "step": 1725000 }, { "epoch": 0.03, "learning_rate": 4.999602164678672e-05, "loss": 4.9948, "step": 1730000 }, { "epoch": 0.03, "learning_rate": 4.999599860492486e-05, "loss": 4.9653, "step": 1735000 }, { "epoch": 0.03, "learning_rate": 4.9995975496533836e-05, "loss": 4.9896, "step": 1740000 }, { "epoch": 0.03, "learning_rate": 4.9995952316970235e-05, "loss": 4.9673, "step": 1745000 }, { "epoch": 0.03, "learning_rate": 4.9995929080164594e-05, "loss": 4.9952, "step": 1750000 }, { "epoch": 0.03, "learning_rate": 4.9995905767516375e-05, "loss": 4.9687, "step": 1755000 }, { "epoch": 0.03, "learning_rate": 4.9995882397679486e-05, "loss": 4.9719, "step": 1760000 }, { "epoch": 0.03, "learning_rate": 4.9995858951946894e-05, "loss": 4.9668, "step": 1765000 }, { "epoch": 0.03, "learning_rate": 4.9995835449079e-05, "loss": 4.9714, "step": 1770000 }, { "epoch": 0.03, "learning_rate": 4.9995811870262294e-05, "loss": 4.9448, "step": 1775000 }, { "epoch": 0.03, "learning_rate": 4.999578822962698e-05, "loss": 4.9636, "step": 1780000 }, { "epoch": 0.03, "learning_rate": 4.9995764527213054e-05, "loss": 4.9678, "step": 1785000 }, { "epoch": 0.03, "learning_rate": 4.999574074877063e-05, "loss": 4.9727, "step": 1790000 }, { "epoch": 0.03, "learning_rate": 4.999571690854973e-05, "loss": 4.9729, "step": 1795000 }, { "epoch": 0.03, "learning_rate": 4.999569300180042e-05, "loss": 4.9859, "step": 1800000 }, { "epoch": 0.03, "learning_rate": 4.9995669023719547e-05, "loss": 4.9872, "step": 1805000 }, { "epoch": 0.03, "learning_rate": 4.9995644993533394e-05, "loss": 4.9602, "step": 1810000 }, { "epoch": 0.03, "learning_rate": 4.999562088238272e-05, "loss": 4.9965, "step": 1815000 }, { "epoch": 0.03, "learning_rate": 4.9995596709520443e-05, "loss": 4.9745, "step": 1820000 }, { "epoch": 0.03, "learning_rate": 4.999557246527361e-05, "loss": 4.9714, "step": 1825000 }, { "epoch": 0.03, "learning_rate": 4.99955481642117e-05, "loss": 4.9821, "step": 1830000 }, { "epoch": 0.03, "learning_rate": 4.999552379176537e-05, "loss": 4.9864, "step": 1835000 }, { "epoch": 0.03, "learning_rate": 4.999549934789474e-05, "loss": 4.9616, "step": 1840000 }, { "epoch": 0.03, "learning_rate": 4.9995474847289105e-05, "loss": 4.988, "step": 1845000 }, { "epoch": 0.03, "learning_rate": 4.9995450275259297e-05, "loss": 4.942, "step": 1850000 }, { "epoch": 0.03, "learning_rate": 4.9995425636701806e-05, "loss": 4.9557, "step": 1855000 }, { "epoch": 0.03, "learning_rate": 4.9995400931616674e-05, "loss": 4.9699, "step": 1860000 }, { "epoch": 0.03, "learning_rate": 4.9995376160003995e-05, "loss": 4.96, "step": 1865000 }, { "epoch": 0.03, "learning_rate": 4.999535131688754e-05, "loss": 4.981, "step": 1870000 }, { "epoch": 0.03, "learning_rate": 4.9995326412206615e-05, "loss": 4.9691, "step": 1875000 }, { "epoch": 0.03, "learning_rate": 4.999530145100412e-05, "loss": 4.9658, "step": 1880000 }, { "epoch": 0.03, "learning_rate": 4.9995276408278954e-05, "loss": 4.974, "step": 1885000 }, { "epoch": 0.03, "learning_rate": 4.999525130402946e-05, "loss": 4.9582, "step": 1890000 }, { "epoch": 0.03, "learning_rate": 4.9995226133252794e-05, "loss": 4.9645, "step": 1895000 }, { "epoch": 0.03, "learning_rate": 4.999520090100517e-05, "loss": 4.9447, "step": 1900000 }, { "epoch": 0.03, "learning_rate": 4.999517559718768e-05, "loss": 4.9521, "step": 1905000 }, { "epoch": 0.03, "learning_rate": 4.9995150226843245e-05, "loss": 4.9638, "step": 1910000 }, { "epoch": 0.03, "learning_rate": 4.9995124789971906e-05, "loss": 4.9675, "step": 1915000 }, { "epoch": 0.03, "learning_rate": 4.9995099286573746e-05, "loss": 4.9582, "step": 1920000 }, { "epoch": 0.03, "learning_rate": 4.999507372177151e-05, "loss": 4.9748, "step": 1925000 }, { "epoch": 0.03, "learning_rate": 4.9995048085333206e-05, "loss": 4.9858, "step": 1930000 }, { "epoch": 0.03, "learning_rate": 4.999502238236828e-05, "loss": 4.9623, "step": 1935000 }, { "epoch": 0.03, "learning_rate": 4.999499661287681e-05, "loss": 4.959, "step": 1940000 }, { "epoch": 0.03, "learning_rate": 4.9994970776858845e-05, "loss": 4.9808, "step": 1945000 }, { "epoch": 0.03, "learning_rate": 4.9994944869125225e-05, "loss": 4.9774, "step": 1950000 }, { "epoch": 0.03, "learning_rate": 4.9994918905243735e-05, "loss": 4.9379, "step": 1955000 }, { "epoch": 0.03, "learning_rate": 4.999489286964673e-05, "loss": 4.9793, "step": 1960000 }, { "epoch": 0.03, "learning_rate": 4.9994866762294347e-05, "loss": 4.9586, "step": 1965000 }, { "epoch": 0.03, "learning_rate": 4.999484059887416e-05, "loss": 4.9688, "step": 1970000 }, { "epoch": 0.03, "learning_rate": 4.999481435844294e-05, "loss": 4.9639, "step": 1975000 }, { "epoch": 0.03, "learning_rate": 4.9994788051459107e-05, "loss": 4.9492, "step": 1980000 }, { "epoch": 0.03, "learning_rate": 4.9994761683205125e-05, "loss": 4.9648, "step": 1985000 }, { "epoch": 0.03, "learning_rate": 4.9994735248425283e-05, "loss": 4.9681, "step": 1990000 }, { "epoch": 0.03, "learning_rate": 4.99947087524287e-05, "loss": 4.9756, "step": 1995000 }, { "epoch": 0.03, "learning_rate": 4.999468218461066e-05, "loss": 4.9719, "step": 2000000 }, { "epoch": 0.03, "eval_loss": 5.719051361083984, "eval_runtime": 82549.8142, "eval_samples_per_second": 134.257, "eval_steps_per_second": 26.851, "step": 2000000 }, { "epoch": 0.03, "learning_rate": 4.9994655544931316e-05, "loss": 4.9601, "step": 2005000 }, { "epoch": 0.03, "learning_rate": 4.999462884404875e-05, "loss": 4.9826, "step": 2010000 }, { "epoch": 0.03, "learning_rate": 4.9994602082002936e-05, "loss": 4.9646, "step": 2015000 }, { "epoch": 0.03, "learning_rate": 4.9994575248082746e-05, "loss": 4.9528, "step": 2020000 }, { "epoch": 0.03, "learning_rate": 4.999454834763717e-05, "loss": 4.9636, "step": 2025000 }, { "epoch": 0.03, "learning_rate": 4.999452138606852e-05, "loss": 4.9577, "step": 2030000 }, { "epoch": 0.03, "learning_rate": 4.999449434175471e-05, "loss": 4.9785, "step": 2035000 }, { "epoch": 0.03, "learning_rate": 4.9994467241720164e-05, "loss": 4.969, "step": 2040000 }, { "epoch": 0.03, "learning_rate": 4.9994440075160553e-05, "loss": 4.9579, "step": 2045000 }, { "epoch": 0.03, "learning_rate": 4.9994412836620476e-05, "loss": 4.9636, "step": 2050000 }, { "epoch": 0.03, "learning_rate": 4.9994385536997614e-05, "loss": 4.945, "step": 2055000 }, { "epoch": 0.03, "learning_rate": 4.999435817084989e-05, "loss": 4.9918, "step": 2060000 }, { "epoch": 0.03, "learning_rate": 4.999433073817738e-05, "loss": 4.9331, "step": 2065000 }, { "epoch": 0.03, "learning_rate": 4.9994303244488855e-05, "loss": 4.943, "step": 2070000 }, { "epoch": 0.03, "learning_rate": 4.999427567325829e-05, "loss": 4.9542, "step": 2075000 }, { "epoch": 0.03, "learning_rate": 4.999424805208249e-05, "loss": 4.9669, "step": 2080000 }, { "epoch": 0.03, "learning_rate": 4.999422034778957e-05, "loss": 4.9442, "step": 2085000 }, { "epoch": 0.03, "learning_rate": 4.999419258806948e-05, "loss": 4.9446, "step": 2090000 }, { "epoch": 0.03, "learning_rate": 4.999416475070117e-05, "loss": 4.9613, "step": 2095000 }, { "epoch": 0.03, "learning_rate": 4.9994136852370523e-05, "loss": 4.9802, "step": 2100000 }, { "epoch": 0.03, "learning_rate": 4.999410888751567e-05, "loss": 4.9685, "step": 2105000 }, { "epoch": 0.03, "learning_rate": 4.99940808561367e-05, "loss": 4.9578, "step": 2110000 }, { "epoch": 0.03, "learning_rate": 4.999405276386218e-05, "loss": 4.9618, "step": 2115000 }, { "epoch": 0.03, "learning_rate": 4.9994024593806696e-05, "loss": 4.9633, "step": 2120000 }, { "epoch": 0.03, "learning_rate": 4.999399636285581e-05, "loss": 4.9497, "step": 2125000 }, { "epoch": 0.03, "learning_rate": 4.99939680653811e-05, "loss": 4.9753, "step": 2130000 }, { "epoch": 0.04, "learning_rate": 4.999393970138264e-05, "loss": 4.9638, "step": 2135000 }, { "epoch": 0.04, "learning_rate": 4.9993911270860516e-05, "loss": 4.9457, "step": 2140000 }, { "epoch": 0.04, "learning_rate": 4.999388276810644e-05, "loss": 4.9599, "step": 2145000 }, { "epoch": 0.04, "learning_rate": 4.999385421024554e-05, "loss": 4.9752, "step": 2150000 }, { "epoch": 0.04, "learning_rate": 4.999382558015284e-05, "loss": 4.9622, "step": 2155000 }, { "epoch": 0.04, "learning_rate": 4.999379687778851e-05, "loss": 4.9644, "step": 2160000 }, { "epoch": 0.04, "learning_rate": 4.999376810887425e-05, "loss": 4.9546, "step": 2165000 }, { "epoch": 0.04, "learning_rate": 4.9993739279185063e-05, "loss": 4.9692, "step": 2170000 }, { "epoch": 0.04, "learning_rate": 4.9993710388760946e-05, "loss": 4.9523, "step": 2175000 }, { "epoch": 0.04, "learning_rate": 4.999368142023735e-05, "loss": 4.9523, "step": 2180000 }, { "epoch": 0.04, "learning_rate": 4.99936523967938e-05, "loss": 4.9624, "step": 2185000 }, { "epoch": 0.04, "learning_rate": 4.9993623301025824e-05, "loss": 4.9495, "step": 2190000 }, { "epoch": 0.04, "learning_rate": 4.999359413289357e-05, "loss": 4.9382, "step": 2195000 }, { "epoch": 0.04, "learning_rate": 4.99935649040667e-05, "loss": 4.9535, "step": 2200000 }, { "epoch": 0.04, "learning_rate": 4.999353561458523e-05, "loss": 4.9521, "step": 2205000 }, { "epoch": 0.04, "learning_rate": 4.999350624684502e-05, "loss": 4.9546, "step": 2210000 }, { "epoch": 0.04, "learning_rate": 4.999347681845037e-05, "loss": 4.9624, "step": 2215000 }, { "epoch": 0.04, "learning_rate": 4.999344732353327e-05, "loss": 4.9458, "step": 2220000 }, { "epoch": 0.04, "learning_rate": 4.9993417762093806e-05, "loss": 4.9396, "step": 2225000 }, { "epoch": 0.04, "learning_rate": 4.999338813413207e-05, "loss": 4.9561, "step": 2230000 }, { "epoch": 0.04, "learning_rate": 4.999335843964812e-05, "loss": 4.929, "step": 2235000 }, { "epoch": 0.04, "learning_rate": 4.999332867864205e-05, "loss": 4.9538, "step": 2240000 }, { "epoch": 0.04, "learning_rate": 4.999329884513938e-05, "loss": 4.9436, "step": 2245000 }, { "epoch": 0.04, "learning_rate": 4.999326895107599e-05, "loss": 4.9516, "step": 2250000 }, { "epoch": 0.04, "learning_rate": 4.9993238996491875e-05, "loss": 4.9513, "step": 2255000 }, { "epoch": 0.04, "learning_rate": 4.999320896338361e-05, "loss": 4.9562, "step": 2260000 }, { "epoch": 0.04, "learning_rate": 4.99931788697548e-05, "loss": 4.9438, "step": 2265000 }, { "epoch": 0.04, "learning_rate": 4.999314871564543e-05, "loss": 4.9385, "step": 2270000 }, { "epoch": 0.04, "learning_rate": 4.99931184829323e-05, "loss": 4.9435, "step": 2275000 }, { "epoch": 0.04, "learning_rate": 4.99930881958065e-05, "loss": 4.9489, "step": 2280000 }, { "epoch": 0.04, "learning_rate": 4.9993057836104884e-05, "loss": 4.9593, "step": 2285000 }, { "epoch": 0.04, "learning_rate": 4.999302740988195e-05, "loss": 4.969, "step": 2290000 }, { "epoch": 0.04, "learning_rate": 4.999299691103012e-05, "loss": 4.9469, "step": 2295000 }, { "epoch": 0.04, "learning_rate": 4.999296634563051e-05, "loss": 4.9486, "step": 2300000 }, { "epoch": 0.04, "learning_rate": 4.9992935713683184e-05, "loss": 4.9685, "step": 2305000 }, { "epoch": 0.04, "learning_rate": 4.999290503363099e-05, "loss": 4.9482, "step": 2310000 }, { "epoch": 0.04, "learning_rate": 4.9992874268628445e-05, "loss": 4.9626, "step": 2315000 }, { "epoch": 0.04, "learning_rate": 4.999284344942683e-05, "loss": 4.9382, "step": 2320000 }, { "epoch": 0.04, "learning_rate": 4.9992812551356086e-05, "loss": 4.9518, "step": 2325000 }, { "epoch": 0.04, "learning_rate": 4.999278158673804e-05, "loss": 4.9347, "step": 2330000 }, { "epoch": 0.04, "learning_rate": 4.9992750568001026e-05, "loss": 4.9461, "step": 2335000 }, { "epoch": 0.04, "learning_rate": 4.9992719476542707e-05, "loss": 4.9411, "step": 2340000 }, { "epoch": 0.04, "learning_rate": 4.999268831856395e-05, "loss": 4.9451, "step": 2345000 }, { "epoch": 0.04, "learning_rate": 4.9992657087810803e-05, "loss": 4.9322, "step": 2350000 }, { "epoch": 0.04, "learning_rate": 4.999262579051077e-05, "loss": 4.9449, "step": 2355000 }, { "epoch": 0.04, "learning_rate": 4.99925944392253e-05, "loss": 4.9457, "step": 2360000 }, { "epoch": 0.04, "learning_rate": 4.9992563015152374e-05, "loss": 4.9532, "step": 2365000 }, { "epoch": 0.04, "learning_rate": 4.999253152455943e-05, "loss": 4.9344, "step": 2370000 }, { "epoch": 0.04, "learning_rate": 4.999249996744657e-05, "loss": 4.9077, "step": 2375000 }, { "epoch": 0.04, "learning_rate": 4.999246834381387e-05, "loss": 4.9646, "step": 2380000 }, { "epoch": 0.04, "learning_rate": 4.999243664731419e-05, "loss": 4.9412, "step": 2385000 }, { "epoch": 0.04, "learning_rate": 4.999240489062875e-05, "loss": 4.9373, "step": 2390000 }, { "epoch": 0.04, "learning_rate": 4.999237307379756e-05, "loss": 4.9372, "step": 2395000 }, { "epoch": 0.04, "learning_rate": 4.999234118408634e-05, "loss": 4.9428, "step": 2400000 }, { "epoch": 0.04, "learning_rate": 4.999230922145524e-05, "loss": 4.9434, "step": 2405000 }, { "epoch": 0.04, "learning_rate": 4.9992277198691964e-05, "loss": 4.96, "step": 2410000 }, { "epoch": 0.04, "learning_rate": 4.999224510940943e-05, "loss": 4.9458, "step": 2415000 }, { "epoch": 0.04, "learning_rate": 4.999221294716735e-05, "loss": 4.9492, "step": 2420000 }, { "epoch": 0.04, "learning_rate": 4.999218073128698e-05, "loss": 4.952, "step": 2425000 }, { "epoch": 0.04, "learning_rate": 4.9992148435980214e-05, "loss": 4.9514, "step": 2430000 }, { "epoch": 0.04, "learning_rate": 4.9992116080608245e-05, "loss": 4.9313, "step": 2435000 }, { "epoch": 0.04, "learning_rate": 4.999208365871745e-05, "loss": 4.9407, "step": 2440000 }, { "epoch": 0.04, "learning_rate": 4.999205117030793e-05, "loss": 4.9555, "step": 2445000 }, { "epoch": 0.04, "learning_rate": 4.9992018608859515e-05, "loss": 4.9404, "step": 2450000 }, { "epoch": 0.04, "learning_rate": 4.999198598739948e-05, "loss": 4.9434, "step": 2455000 }, { "epoch": 0.04, "learning_rate": 4.999195329287409e-05, "loss": 4.9536, "step": 2460000 }, { "epoch": 0.04, "learning_rate": 4.9991920538363895e-05, "loss": 4.9463, "step": 2465000 }, { "epoch": 0.04, "learning_rate": 4.9991887723908884e-05, "loss": 4.9408, "step": 2470000 }, { "epoch": 0.04, "learning_rate": 4.9991854836375476e-05, "loss": 4.93, "step": 2475000 }, { "epoch": 0.04, "learning_rate": 4.9991821875723846e-05, "loss": 4.9485, "step": 2480000 }, { "epoch": 0.04, "learning_rate": 4.9991788855140976e-05, "loss": 4.9414, "step": 2485000 }, { "epoch": 0.04, "learning_rate": 4.999175576141343e-05, "loss": 4.9339, "step": 2490000 }, { "epoch": 0.04, "learning_rate": 4.9991722614421475e-05, "loss": 4.9528, "step": 2495000 }, { "epoch": 0.04, "learning_rate": 4.999168939428502e-05, "loss": 4.9414, "step": 2500000 }, { "epoch": 0.04, "learning_rate": 4.999165610763089e-05, "loss": 4.9255, "step": 2505000 }, { "epoch": 0.04, "learning_rate": 4.99916227477792e-05, "loss": 4.9432, "step": 2510000 }, { "epoch": 0.04, "learning_rate": 4.999158932807666e-05, "loss": 4.9629, "step": 2515000 }, { "epoch": 0.04, "learning_rate": 4.99915558418567e-05, "loss": 4.9463, "step": 2520000 }, { "epoch": 0.04, "learning_rate": 4.999152228239953e-05, "loss": 4.9545, "step": 2525000 }, { "epoch": 0.04, "learning_rate": 4.999148866313169e-05, "loss": 4.937, "step": 2530000 }, { "epoch": 0.04, "learning_rate": 4.9991454970600206e-05, "loss": 4.9416, "step": 2535000 }, { "epoch": 0.04, "learning_rate": 4.999142121828485e-05, "loss": 4.9435, "step": 2540000 }, { "epoch": 0.04, "learning_rate": 4.999138739945254e-05, "loss": 4.9542, "step": 2545000 }, { "epoch": 0.04, "learning_rate": 4.999135352088977e-05, "loss": 4.9355, "step": 2550000 }, { "epoch": 0.04, "learning_rate": 4.999131956223735e-05, "loss": 4.9257, "step": 2555000 }, { "epoch": 0.04, "learning_rate": 4.999128554385466e-05, "loss": 4.9365, "step": 2560000 }, { "epoch": 0.04, "learning_rate": 4.9991251458955366e-05, "loss": 4.9443, "step": 2565000 }, { "epoch": 0.04, "learning_rate": 4.9991217307539553e-05, "loss": 4.9337, "step": 2570000 }, { "epoch": 0.04, "learning_rate": 4.999118310331327e-05, "loss": 4.9442, "step": 2575000 }, { "epoch": 0.04, "learning_rate": 4.9991148812025036e-05, "loss": 4.942, "step": 2580000 }, { "epoch": 0.04, "learning_rate": 4.999111445419393e-05, "loss": 4.9333, "step": 2585000 }, { "epoch": 0.04, "learning_rate": 4.999108004360586e-05, "loss": 4.9404, "step": 2590000 }, { "epoch": 0.04, "learning_rate": 4.999104555271592e-05, "loss": 4.9378, "step": 2595000 }, { "epoch": 0.04, "learning_rate": 4.999101100220292e-05, "loss": 4.9194, "step": 2600000 }, { "epoch": 0.04, "learning_rate": 4.999097639210687e-05, "loss": 4.9526, "step": 2605000 }, { "epoch": 0.04, "learning_rate": 4.999094170857552e-05, "loss": 4.9416, "step": 2610000 }, { "epoch": 0.04, "learning_rate": 4.999090695156903e-05, "loss": 4.9486, "step": 2615000 }, { "epoch": 0.04, "learning_rate": 4.999087213499306e-05, "loss": 4.9481, "step": 2620000 }, { "epoch": 0.04, "learning_rate": 4.999083725888766e-05, "loss": 4.9471, "step": 2625000 }, { "epoch": 0.04, "learning_rate": 4.9990802302294724e-05, "loss": 4.9297, "step": 2630000 }, { "epoch": 0.04, "learning_rate": 4.999076728617252e-05, "loss": 4.9311, "step": 2635000 }, { "epoch": 0.04, "learning_rate": 4.999073219650909e-05, "loss": 4.9628, "step": 2640000 }, { "epoch": 0.04, "learning_rate": 4.999069705438252e-05, "loss": 4.9256, "step": 2645000 }, { "epoch": 0.04, "learning_rate": 4.999066183871492e-05, "loss": 4.9357, "step": 2650000 }, { "epoch": 0.04, "learning_rate": 4.999062655653236e-05, "loss": 4.9444, "step": 2655000 }, { "epoch": 0.04, "learning_rate": 4.999059120783495e-05, "loss": 4.9479, "step": 2660000 }, { "epoch": 0.04, "learning_rate": 4.9990555792622775e-05, "loss": 4.9257, "step": 2665000 }, { "epoch": 0.04, "learning_rate": 4.999052031089594e-05, "loss": 4.9425, "step": 2670000 }, { "epoch": 0.04, "learning_rate": 4.999048476265453e-05, "loss": 4.9507, "step": 2675000 }, { "epoch": 0.04, "learning_rate": 4.999044914076619e-05, "loss": 4.9667, "step": 2680000 }, { "epoch": 0.04, "learning_rate": 4.999041345948262e-05, "loss": 4.9325, "step": 2685000 }, { "epoch": 0.04, "learning_rate": 4.999037771168475e-05, "loss": 4.9479, "step": 2690000 }, { "epoch": 0.04, "learning_rate": 4.9990341897372705e-05, "loss": 4.9516, "step": 2695000 }, { "epoch": 0.04, "learning_rate": 4.999030602373225e-05, "loss": 4.9275, "step": 2700000 }, { "epoch": 0.04, "learning_rate": 4.99902700692064e-05, "loss": 4.9423, "step": 2705000 }, { "epoch": 0.04, "learning_rate": 4.999023406256465e-05, "loss": 4.9466, "step": 2710000 }, { "epoch": 0.04, "learning_rate": 4.9990197982210095e-05, "loss": 4.9467, "step": 2715000 }, { "epoch": 0.04, "learning_rate": 4.999016183534183e-05, "loss": 4.9458, "step": 2720000 }, { "epoch": 0.04, "learning_rate": 4.99901256147077e-05, "loss": 4.9614, "step": 2725000 }, { "epoch": 0.04, "learning_rate": 4.999008934206453e-05, "loss": 4.9259, "step": 2730000 }, { "epoch": 0.04, "learning_rate": 4.999005299565569e-05, "loss": 4.9341, "step": 2735000 }, { "epoch": 0.05, "learning_rate": 4.999001657544138e-05, "loss": 4.943, "step": 2740000 }, { "epoch": 0.05, "learning_rate": 4.998998009599268e-05, "loss": 4.9582, "step": 2745000 }, { "epoch": 0.05, "learning_rate": 4.9989943557349615e-05, "loss": 4.9442, "step": 2750000 }, { "epoch": 0.05, "learning_rate": 4.998990693755597e-05, "loss": 4.9486, "step": 2755000 }, { "epoch": 0.05, "learning_rate": 4.998987026591355e-05, "loss": 4.9424, "step": 2760000 }, { "epoch": 0.05, "learning_rate": 4.998983352042621e-05, "loss": 4.9407, "step": 2765000 }, { "epoch": 0.05, "learning_rate": 4.998979670842613e-05, "loss": 4.9422, "step": 2770000 }, { "epoch": 0.05, "learning_rate": 4.9989759829913397e-05, "loss": 4.9472, "step": 2775000 }, { "epoch": 0.05, "learning_rate": 4.9989722892286726e-05, "loss": 4.9282, "step": 2780000 }, { "epoch": 0.05, "learning_rate": 4.998968587335039e-05, "loss": 4.9236, "step": 2785000 }, { "epoch": 0.05, "learning_rate": 4.998964880272554e-05, "loss": 4.9466, "step": 2790000 }, { "epoch": 0.05, "learning_rate": 4.998961165817652e-05, "loss": 4.9366, "step": 2795000 }, { "epoch": 0.05, "learning_rate": 4.998957444711535e-05, "loss": 4.9328, "step": 2800000 }, { "epoch": 0.05, "learning_rate": 4.998953716954212e-05, "loss": 4.9401, "step": 2805000 }, { "epoch": 0.05, "learning_rate": 4.9989499832935395e-05, "loss": 4.9316, "step": 2810000 }, { "epoch": 0.05, "learning_rate": 4.9989462422351663e-05, "loss": 4.9366, "step": 2815000 }, { "epoch": 0.05, "learning_rate": 4.998942494525618e-05, "loss": 4.9367, "step": 2820000 }, { "epoch": 0.05, "learning_rate": 4.9989387394130655e-05, "loss": 4.9562, "step": 2825000 }, { "epoch": 0.05, "learning_rate": 4.998934978399865e-05, "loss": 4.9364, "step": 2830000 }, { "epoch": 0.05, "learning_rate": 4.998931210735519e-05, "loss": 4.9502, "step": 2835000 }, { "epoch": 0.05, "learning_rate": 4.9989274364200364e-05, "loss": 4.939, "step": 2840000 }, { "epoch": 0.05, "learning_rate": 4.9989236554534294e-05, "loss": 4.92, "step": 2845000 }, { "epoch": 0.05, "learning_rate": 4.998919867835707e-05, "loss": 4.9283, "step": 2850000 }, { "epoch": 0.05, "learning_rate": 4.998916072807056e-05, "loss": 4.9325, "step": 2855000 }, { "epoch": 0.05, "learning_rate": 4.998912272646955e-05, "loss": 4.9461, "step": 2860000 }, { "epoch": 0.05, "learning_rate": 4.998908464313463e-05, "loss": 4.9508, "step": 2865000 }, { "epoch": 0.05, "learning_rate": 4.998904650090048e-05, "loss": 4.9215, "step": 2870000 }, { "epoch": 0.05, "learning_rate": 4.998900829980715e-05, "loss": 4.937, "step": 2875000 }, { "epoch": 0.05, "learning_rate": 4.998897002456512e-05, "loss": 4.9482, "step": 2880000 }, { "epoch": 0.05, "learning_rate": 4.998893168281264e-05, "loss": 4.9501, "step": 2885000 }, { "epoch": 0.05, "learning_rate": 4.998889326685844e-05, "loss": 4.9536, "step": 2890000 }, { "epoch": 0.05, "learning_rate": 4.998885479977676e-05, "loss": 4.9603, "step": 2895000 }, { "epoch": 0.05, "learning_rate": 4.9988816258493566e-05, "loss": 4.9065, "step": 2900000 }, { "epoch": 0.05, "learning_rate": 4.9988777642969024e-05, "loss": 4.9258, "step": 2905000 }, { "epoch": 0.05, "learning_rate": 4.998873897639716e-05, "loss": 4.926, "step": 2910000 }, { "epoch": 0.05, "learning_rate": 4.9988700227826246e-05, "loss": 4.8974, "step": 2915000 }, { "epoch": 0.05, "learning_rate": 4.998866142826144e-05, "loss": 4.9423, "step": 2920000 }, { "epoch": 0.05, "learning_rate": 4.998862254664456e-05, "loss": 4.9384, "step": 2925000 }, { "epoch": 0.05, "learning_rate": 4.9988583606289376e-05, "loss": 4.9316, "step": 2930000 }, { "epoch": 0.05, "learning_rate": 4.9988544599424784e-05, "loss": 4.9531, "step": 2935000 }, { "epoch": 0.05, "learning_rate": 4.998850552605088e-05, "loss": 4.9239, "step": 2940000 }, { "epoch": 0.05, "learning_rate": 4.998846637833e-05, "loss": 4.9559, "step": 2945000 }, { "epoch": 0.05, "learning_rate": 4.998842717192447e-05, "loss": 4.9289, "step": 2950000 }, { "epoch": 0.05, "learning_rate": 4.998838789900995e-05, "loss": 4.9373, "step": 2955000 }, { "epoch": 0.05, "learning_rate": 4.9988348559586525e-05, "loss": 4.9325, "step": 2960000 }, { "epoch": 0.05, "learning_rate": 4.998830914576331e-05, "loss": 4.9291, "step": 2965000 }, { "epoch": 0.05, "learning_rate": 4.9988269673309104e-05, "loss": 4.9247, "step": 2970000 }, { "epoch": 0.05, "learning_rate": 4.998823013434632e-05, "loss": 4.9331, "step": 2975000 }, { "epoch": 0.05, "learning_rate": 4.9988190528875056e-05, "loss": 4.9405, "step": 2980000 }, { "epoch": 0.05, "learning_rate": 4.998815084895119e-05, "loss": 4.9216, "step": 2985000 }, { "epoch": 0.05, "learning_rate": 4.998811111840752e-05, "loss": 4.9334, "step": 2990000 }, { "epoch": 0.05, "learning_rate": 4.998807130544062e-05, "loss": 4.9352, "step": 2995000 }, { "epoch": 0.05, "learning_rate": 4.9988031433923196e-05, "loss": 4.9369, "step": 3000000 }, { "epoch": 0.05, "eval_loss": 5.712026596069336, "eval_runtime": 87990.2621, "eval_samples_per_second": 125.956, "eval_steps_per_second": 25.191, "step": 3000000 }, { "epoch": 0.05, "learning_rate": 4.9987991495897836e-05, "loss": 4.9361, "step": 3005000 }, { "epoch": 0.05, "learning_rate": 4.9987951491364635e-05, "loss": 4.9279, "step": 3010000 }, { "epoch": 0.05, "learning_rate": 4.998791142032369e-05, "loss": 4.9049, "step": 3015000 }, { "epoch": 0.05, "learning_rate": 4.998787128277512e-05, "loss": 4.9403, "step": 3020000 }, { "epoch": 0.05, "learning_rate": 4.9987831078719035e-05, "loss": 4.9221, "step": 3025000 }, { "epoch": 0.05, "learning_rate": 4.998779080815553e-05, "loss": 4.9696, "step": 3030000 }, { "epoch": 0.05, "learning_rate": 4.998775047108472e-05, "loss": 4.9318, "step": 3035000 }, { "epoch": 0.05, "learning_rate": 4.998771008368791e-05, "loss": 4.9252, "step": 3040000 }, { "epoch": 0.05, "learning_rate": 4.998766960552552e-05, "loss": 4.9268, "step": 3045000 }, { "epoch": 0.05, "learning_rate": 4.998762906894674e-05, "loss": 4.9176, "step": 3050000 }, { "epoch": 0.05, "learning_rate": 4.998758846586109e-05, "loss": 4.9199, "step": 3055000 }, { "epoch": 0.05, "learning_rate": 4.998754779626866e-05, "loss": 4.9302, "step": 3060000 }, { "epoch": 0.05, "learning_rate": 4.9987507076483844e-05, "loss": 4.9306, "step": 3065000 }, { "epoch": 0.05, "learning_rate": 4.998746625756395e-05, "loss": 4.938, "step": 3070000 }, { "epoch": 0.05, "learning_rate": 4.998742538026813e-05, "loss": 4.9427, "step": 3075000 }, { "epoch": 0.05, "learning_rate": 4.998738445283347e-05, "loss": 4.9361, "step": 3080000 }, { "epoch": 0.05, "learning_rate": 4.9987343442498474e-05, "loss": 4.9058, "step": 3085000 }, { "epoch": 0.05, "learning_rate": 4.9987302382078084e-05, "loss": 4.9278, "step": 3090000 }, { "epoch": 0.05, "learning_rate": 4.9987261238704355e-05, "loss": 4.9324, "step": 3095000 }, { "epoch": 0.05, "learning_rate": 4.998722003704839e-05, "loss": 4.9023, "step": 3100000 }, { "epoch": 0.05, "learning_rate": 4.998717877715024e-05, "loss": 4.9327, "step": 3105000 }, { "epoch": 0.05, "learning_rate": 4.998713744249611e-05, "loss": 4.9507, "step": 3110000 }, { "epoch": 0.05, "learning_rate": 4.998709603304623e-05, "loss": 4.9325, "step": 3115000 }, { "epoch": 0.05, "learning_rate": 4.9987054557064264e-05, "loss": 4.9206, "step": 3120000 }, { "epoch": 0.05, "learning_rate": 4.9987013031183975e-05, "loss": 4.927, "step": 3125000 }, { "epoch": 0.05, "learning_rate": 4.998697143049494e-05, "loss": 4.9498, "step": 3130000 }, { "epoch": 0.05, "learning_rate": 4.9986929763300786e-05, "loss": 4.9347, "step": 3135000 }, { "epoch": 0.05, "learning_rate": 4.9986888021244875e-05, "loss": 4.9294, "step": 3140000 }, { "epoch": 0.05, "learning_rate": 4.9986846221027494e-05, "loss": 4.9208, "step": 3145000 }, { "epoch": 0.05, "learning_rate": 4.998680436268866e-05, "loss": 4.9293, "step": 3150000 }, { "epoch": 0.05, "learning_rate": 4.998676242947511e-05, "loss": 4.9128, "step": 3155000 }, { "epoch": 0.05, "learning_rate": 4.9986720421347034e-05, "loss": 4.9213, "step": 3160000 }, { "epoch": 0.05, "learning_rate": 4.998667836353441e-05, "loss": 4.9289, "step": 3165000 }, { "epoch": 0.05, "learning_rate": 4.9986636222370914e-05, "loss": 4.931, "step": 3170000 }, { "epoch": 0.05, "learning_rate": 4.9986594023126456e-05, "loss": 4.9282, "step": 3175000 }, { "epoch": 0.05, "learning_rate": 4.9986551757377863e-05, "loss": 4.9166, "step": 3180000 }, { "epoch": 0.05, "learning_rate": 4.998650942512527e-05, "loss": 4.9288, "step": 3185000 }, { "epoch": 0.05, "learning_rate": 4.998646703485857e-05, "loss": 4.9499, "step": 3190000 }, { "epoch": 0.05, "learning_rate": 4.998642456110851e-05, "loss": 4.9204, "step": 3195000 }, { "epoch": 0.05, "learning_rate": 4.9986382029344566e-05, "loss": 4.9312, "step": 3200000 }, { "epoch": 0.05, "learning_rate": 4.9986339422547345e-05, "loss": 4.9217, "step": 3205000 }, { "epoch": 0.05, "learning_rate": 4.9986296766306115e-05, "loss": 4.9027, "step": 3210000 }, { "epoch": 0.05, "learning_rate": 4.998625402647551e-05, "loss": 4.9325, "step": 3215000 }, { "epoch": 0.05, "learning_rate": 4.998621124582399e-05, "loss": 4.8991, "step": 3220000 }, { "epoch": 0.05, "learning_rate": 4.9986168372973765e-05, "loss": 4.9357, "step": 3225000 }, { "epoch": 0.05, "learning_rate": 4.9986125442190176e-05, "loss": 4.9251, "step": 3230000 }, { "epoch": 0.05, "learning_rate": 4.998608243629417e-05, "loss": 4.9276, "step": 3235000 }, { "epoch": 0.05, "learning_rate": 4.998603938111451e-05, "loss": 4.9316, "step": 3240000 }, { "epoch": 0.05, "learning_rate": 4.998599625082265e-05, "loss": 4.9187, "step": 3245000 }, { "epoch": 0.05, "learning_rate": 4.9985953054028264e-05, "loss": 4.927, "step": 3250000 }, { "epoch": 0.05, "learning_rate": 4.9985909790731457e-05, "loss": 4.9324, "step": 3255000 }, { "epoch": 0.05, "learning_rate": 4.9985866469608434e-05, "loss": 4.9167, "step": 3260000 }, { "epoch": 0.05, "learning_rate": 4.998582306463107e-05, "loss": 4.9196, "step": 3265000 }, { "epoch": 0.05, "learning_rate": 4.99857796018277e-05, "loss": 4.9038, "step": 3270000 }, { "epoch": 0.05, "learning_rate": 4.9985736072522395e-05, "loss": 4.9262, "step": 3275000 }, { "epoch": 0.05, "learning_rate": 4.998569248544454e-05, "loss": 4.9168, "step": 3280000 }, { "epoch": 0.05, "learning_rate": 4.998564882314898e-05, "loss": 4.9056, "step": 3285000 }, { "epoch": 0.05, "learning_rate": 4.998560508559589e-05, "loss": 4.9094, "step": 3290000 }, { "epoch": 0.05, "learning_rate": 4.998556129028392e-05, "loss": 4.914, "step": 3295000 }, { "epoch": 0.05, "learning_rate": 4.9985517428470584e-05, "loss": 4.9264, "step": 3300000 }, { "epoch": 0.05, "learning_rate": 4.998547349136016e-05, "loss": 4.9283, "step": 3305000 }, { "epoch": 0.05, "learning_rate": 4.998542950534025e-05, "loss": 4.9277, "step": 3310000 }, { "epoch": 0.05, "learning_rate": 4.998538543520106e-05, "loss": 4.9495, "step": 3315000 }, { "epoch": 0.05, "learning_rate": 4.998534130737009e-05, "loss": 4.9314, "step": 3320000 }, { "epoch": 0.05, "learning_rate": 4.998529711303834e-05, "loss": 4.9316, "step": 3325000 }, { "epoch": 0.05, "learning_rate": 4.9985252861068276e-05, "loss": 4.9105, "step": 3330000 }, { "epoch": 0.05, "learning_rate": 4.9985208524872937e-05, "loss": 4.9296, "step": 3335000 }, { "epoch": 0.05, "learning_rate": 4.998516413103953e-05, "loss": 4.9421, "step": 3340000 }, { "epoch": 0.05, "learning_rate": 4.9985119670705816e-05, "loss": 4.9085, "step": 3345000 }, { "epoch": 0.06, "learning_rate": 4.9985075143871895e-05, "loss": 4.9252, "step": 3350000 }, { "epoch": 0.06, "learning_rate": 4.998503055053789e-05, "loss": 4.9306, "step": 3355000 }, { "epoch": 0.06, "learning_rate": 4.998498589070394e-05, "loss": 4.9169, "step": 3360000 }, { "epoch": 0.06, "learning_rate": 4.9984941164370134e-05, "loss": 4.923, "step": 3365000 }, { "epoch": 0.06, "learning_rate": 4.9984896371536615e-05, "loss": 4.9267, "step": 3370000 }, { "epoch": 0.06, "learning_rate": 4.9984851512203496e-05, "loss": 4.9252, "step": 3375000 }, { "epoch": 0.06, "learning_rate": 4.998480658637089e-05, "loss": 4.9304, "step": 3380000 }, { "epoch": 0.06, "learning_rate": 4.998476159403892e-05, "loss": 4.9267, "step": 3385000 }, { "epoch": 0.06, "learning_rate": 4.998471654422973e-05, "loss": 4.9305, "step": 3390000 }, { "epoch": 0.06, "learning_rate": 4.998467140987736e-05, "loss": 4.9249, "step": 3395000 }, { "epoch": 0.06, "learning_rate": 4.998462621804802e-05, "loss": 4.9292, "step": 3400000 }, { "epoch": 0.06, "learning_rate": 4.998458096878174e-05, "loss": 4.9303, "step": 3405000 }, { "epoch": 0.06, "learning_rate": 4.9984535643968045e-05, "loss": 4.9385, "step": 3410000 }, { "epoch": 0.06, "learning_rate": 4.998449025265572e-05, "loss": 4.9136, "step": 3415000 }, { "epoch": 0.06, "learning_rate": 4.998444479484487e-05, "loss": 4.9172, "step": 3420000 }, { "epoch": 0.06, "learning_rate": 4.998439926142046e-05, "loss": 4.9149, "step": 3425000 }, { "epoch": 0.06, "learning_rate": 4.998435367972809e-05, "loss": 4.9248, "step": 3430000 }, { "epoch": 0.06, "learning_rate": 4.99843080224224e-05, "loss": 4.938, "step": 3435000 }, { "epoch": 0.06, "learning_rate": 4.998426230777375e-05, "loss": 4.9206, "step": 3440000 }, { "epoch": 0.06, "learning_rate": 4.998421650831704e-05, "loss": 4.9468, "step": 3445000 }, { "epoch": 0.06, "learning_rate": 4.998417064233592e-05, "loss": 4.8991, "step": 3450000 }, { "epoch": 0.06, "learning_rate": 4.998412471902551e-05, "loss": 4.927, "step": 3455000 }, { "epoch": 0.06, "learning_rate": 4.998407872921756e-05, "loss": 4.9184, "step": 3460000 }, { "epoch": 0.06, "learning_rate": 4.998403267291217e-05, "loss": 4.9265, "step": 3465000 }, { "epoch": 0.06, "learning_rate": 4.9983986568579285e-05, "loss": 4.9239, "step": 3470000 }, { "epoch": 0.06, "learning_rate": 4.998394036080961e-05, "loss": 4.9122, "step": 3475000 }, { "epoch": 0.06, "learning_rate": 4.9983894114274195e-05, "loss": 4.9104, "step": 3480000 }, { "epoch": 0.06, "learning_rate": 4.9983847782718816e-05, "loss": 4.9235, "step": 3485000 }, { "epoch": 0.06, "learning_rate": 4.998380139392814e-05, "loss": 4.9364, "step": 3490000 }, { "epoch": 0.06, "learning_rate": 4.9983754947942206e-05, "loss": 4.9282, "step": 3495000 }, { "epoch": 0.06, "learning_rate": 4.998370841685685e-05, "loss": 4.9226, "step": 3500000 }, { "epoch": 0.06, "learning_rate": 4.998366182857648e-05, "loss": 4.9129, "step": 3505000 }, { "epoch": 0.06, "learning_rate": 4.9983615183141135e-05, "loss": 4.931, "step": 3510000 }, { "epoch": 0.06, "learning_rate": 4.998356845252692e-05, "loss": 4.9167, "step": 3515000 }, { "epoch": 0.06, "learning_rate": 4.998352166475797e-05, "loss": 4.9115, "step": 3520000 }, { "epoch": 0.06, "learning_rate": 4.998347481049308e-05, "loss": 4.9167, "step": 3525000 }, { "epoch": 0.06, "learning_rate": 4.998342788973237e-05, "loss": 4.9318, "step": 3530000 }, { "epoch": 0.06, "learning_rate": 4.99833808930681e-05, "loss": 4.9451, "step": 3535000 }, { "epoch": 0.06, "learning_rate": 4.998333383930281e-05, "loss": 4.93, "step": 3540000 }, { "epoch": 0.06, "learning_rate": 4.9983286719042096e-05, "loss": 4.9041, "step": 3545000 }, { "epoch": 0.06, "learning_rate": 4.998323954173383e-05, "loss": 4.9116, "step": 3550000 }, { "epoch": 0.06, "learning_rate": 4.99831922884959e-05, "loss": 4.914, "step": 3555000 }, { "epoch": 0.06, "learning_rate": 4.9983144968762904e-05, "loss": 4.9201, "step": 3560000 }, { "epoch": 0.06, "learning_rate": 4.9983097582534974e-05, "loss": 4.925, "step": 3565000 }, { "epoch": 0.06, "learning_rate": 4.998305012981222e-05, "loss": 4.9243, "step": 3570000 }, { "epoch": 0.06, "learning_rate": 4.9983002601080476e-05, "loss": 4.9237, "step": 3575000 }, { "epoch": 0.06, "learning_rate": 4.9982955005827555e-05, "loss": 4.922, "step": 3580000 }, { "epoch": 0.06, "learning_rate": 4.998290736313542e-05, "loss": 4.9136, "step": 3585000 }, { "epoch": 0.06, "learning_rate": 4.998285964442138e-05, "loss": 4.9179, "step": 3590000 }, { "epoch": 0.06, "learning_rate": 4.998281185921314e-05, "loss": 4.9133, "step": 3595000 }, { "epoch": 0.06, "learning_rate": 4.998276399793003e-05, "loss": 4.9165, "step": 3600000 }, { "epoch": 0.06, "learning_rate": 4.998271607972051e-05, "loss": 4.932, "step": 3605000 }, { "epoch": 0.06, "learning_rate": 4.998266810462462e-05, "loss": 4.9317, "step": 3610000 }, { "epoch": 0.06, "learning_rate": 4.998262004382019e-05, "loss": 4.9294, "step": 3615000 }, { "epoch": 0.06, "learning_rate": 4.998257191649561e-05, "loss": 4.9324, "step": 3620000 }, { "epoch": 0.06, "learning_rate": 4.998252374194571e-05, "loss": 4.9324, "step": 3625000 }, { "epoch": 0.06, "learning_rate": 4.998247548160782e-05, "loss": 4.9123, "step": 3630000 }, { "epoch": 0.06, "learning_rate": 4.998242716442411e-05, "loss": 4.9102, "step": 3635000 }, { "epoch": 0.06, "learning_rate": 4.998237877106012e-05, "loss": 4.9151, "step": 3640000 }, { "epoch": 0.06, "learning_rate": 4.998233031117661e-05, "loss": 4.9093, "step": 3645000 }, { "epoch": 0.06, "learning_rate": 4.9982281813915333e-05, "loss": 4.9394, "step": 3650000 }, { "epoch": 0.06, "learning_rate": 4.9982233221033106e-05, "loss": 4.9352, "step": 3655000 }, { "epoch": 0.06, "learning_rate": 4.9982184571372234e-05, "loss": 4.9184, "step": 3660000 }, { "epoch": 0.06, "learning_rate": 4.998213585521898e-05, "loss": 4.9127, "step": 3665000 }, { "epoch": 0.06, "learning_rate": 4.9982087072573466e-05, "loss": 4.9108, "step": 3670000 }, { "epoch": 0.06, "learning_rate": 4.9982038233216225e-05, "loss": 4.928, "step": 3675000 }, { "epoch": 0.06, "learning_rate": 4.9981989307806196e-05, "loss": 4.9343, "step": 3680000 }, { "epoch": 0.06, "learning_rate": 4.9981940325684696e-05, "loss": 4.9287, "step": 3685000 }, { "epoch": 0.06, "learning_rate": 4.998189127707147e-05, "loss": 4.9036, "step": 3690000 }, { "epoch": 0.06, "learning_rate": 4.998184217180023e-05, "loss": 4.914, "step": 3695000 }, { "epoch": 0.06, "learning_rate": 4.998179299021723e-05, "loss": 4.9034, "step": 3700000 }, { "epoch": 0.06, "learning_rate": 4.998174373228266e-05, "loss": 4.9301, "step": 3705000 }, { "epoch": 0.06, "learning_rate": 4.998169443745082e-05, "loss": 4.9152, "step": 3710000 }, { "epoch": 0.06, "learning_rate": 4.998164504652066e-05, "loss": 4.9493, "step": 3715000 }, { "epoch": 0.06, "learning_rate": 4.9981595598973076e-05, "loss": 4.9314, "step": 3720000 }, { "epoch": 0.06, "learning_rate": 4.998154607502125e-05, "loss": 4.9165, "step": 3725000 }, { "epoch": 0.06, "learning_rate": 4.998149650440557e-05, "loss": 4.9222, "step": 3730000 }, { "epoch": 0.06, "learning_rate": 4.998144685738593e-05, "loss": 4.9132, "step": 3735000 }, { "epoch": 0.06, "learning_rate": 4.998139714387585e-05, "loss": 4.9146, "step": 3740000 }, { "epoch": 0.06, "learning_rate": 4.9981347353908855e-05, "loss": 4.8965, "step": 3745000 }, { "epoch": 0.06, "learning_rate": 4.998129751738498e-05, "loss": 4.9208, "step": 3750000 }, { "epoch": 0.06, "learning_rate": 4.9981247594411195e-05, "loss": 4.9253, "step": 3755000 }, { "epoch": 0.06, "learning_rate": 4.9981197624934004e-05, "loss": 4.9077, "step": 3760000 }, { "epoch": 0.06, "learning_rate": 4.9981147568953965e-05, "loss": 4.8947, "step": 3765000 }, { "epoch": 0.06, "learning_rate": 4.998109745649085e-05, "loss": 4.9191, "step": 3770000 }, { "epoch": 0.06, "learning_rate": 4.998104727753824e-05, "loss": 4.9135, "step": 3775000 }, { "epoch": 0.06, "learning_rate": 4.998099704215603e-05, "loss": 4.9328, "step": 3780000 }, { "epoch": 0.06, "learning_rate": 4.998094673023814e-05, "loss": 4.9244, "step": 3785000 }, { "epoch": 0.06, "learning_rate": 4.99808963417448e-05, "loss": 4.9095, "step": 3790000 }, { "epoch": 0.06, "learning_rate": 4.998084589683556e-05, "loss": 4.9019, "step": 3795000 }, { "epoch": 0.06, "learning_rate": 4.99807953854375e-05, "loss": 4.916, "step": 3800000 }, { "epoch": 0.06, "learning_rate": 4.998074480755075e-05, "loss": 4.9155, "step": 3805000 }, { "epoch": 0.06, "learning_rate": 4.9980694153035866e-05, "loss": 4.9176, "step": 3810000 }, { "epoch": 0.06, "learning_rate": 4.998064344215885e-05, "loss": 4.907, "step": 3815000 }, { "epoch": 0.06, "learning_rate": 4.998059267495973e-05, "loss": 4.9276, "step": 3820000 }, { "epoch": 0.06, "learning_rate": 4.998054182094008e-05, "loss": 4.9121, "step": 3825000 }, { "epoch": 0.06, "learning_rate": 4.998049092079141e-05, "loss": 4.9073, "step": 3830000 }, { "epoch": 0.06, "learning_rate": 4.998043993376928e-05, "loss": 4.9338, "step": 3835000 }, { "epoch": 0.06, "learning_rate": 4.998038889045219e-05, "loss": 4.9215, "step": 3840000 }, { "epoch": 0.06, "learning_rate": 4.998033779088021e-05, "loss": 4.9176, "step": 3845000 }, { "epoch": 0.06, "learning_rate": 4.998028660435534e-05, "loss": 4.9032, "step": 3850000 }, { "epoch": 0.06, "learning_rate": 4.9980235371835153e-05, "loss": 4.9017, "step": 3855000 }, { "epoch": 0.06, "learning_rate": 4.998018406258177e-05, "loss": 4.9277, "step": 3860000 }, { "epoch": 0.06, "learning_rate": 4.9980132676555404e-05, "loss": 4.9127, "step": 3865000 }, { "epoch": 0.06, "learning_rate": 4.998008124461394e-05, "loss": 4.9288, "step": 3870000 }, { "epoch": 0.06, "learning_rate": 4.9980029735899784e-05, "loss": 4.9136, "step": 3875000 }, { "epoch": 0.06, "learning_rate": 4.9979978160698975e-05, "loss": 4.9349, "step": 3880000 }, { "epoch": 0.06, "learning_rate": 4.9979926519011656e-05, "loss": 4.9386, "step": 3885000 }, { "epoch": 0.06, "learning_rate": 4.997987482119038e-05, "loss": 4.9066, "step": 3890000 }, { "epoch": 0.06, "learning_rate": 4.997982304654375e-05, "loss": 4.9161, "step": 3895000 }, { "epoch": 0.06, "learning_rate": 4.997977120541102e-05, "loss": 4.8986, "step": 3900000 }, { "epoch": 0.06, "learning_rate": 4.9979719297792326e-05, "loss": 4.9414, "step": 3905000 }, { "epoch": 0.06, "learning_rate": 4.997966733409344e-05, "loss": 4.9378, "step": 3910000 }, { "epoch": 0.06, "learning_rate": 4.997961528309761e-05, "loss": 4.9176, "step": 3915000 }, { "epoch": 0.06, "learning_rate": 4.997956317602186e-05, "loss": 4.9413, "step": 3920000 }, { "epoch": 0.06, "learning_rate": 4.997951100246071e-05, "loss": 4.9116, "step": 3925000 }, { "epoch": 0.06, "learning_rate": 4.997945876241428e-05, "loss": 4.9077, "step": 3930000 }, { "epoch": 0.06, "learning_rate": 4.997940645588273e-05, "loss": 4.9164, "step": 3935000 }, { "epoch": 0.06, "learning_rate": 4.997935408286618e-05, "loss": 4.9314, "step": 3940000 }, { "epoch": 0.06, "learning_rate": 4.9979301643364784e-05, "loss": 4.9021, "step": 3945000 }, { "epoch": 0.06, "learning_rate": 4.997924913737867e-05, "loss": 4.9233, "step": 3950000 }, { "epoch": 0.06, "learning_rate": 4.997919656490799e-05, "loss": 4.919, "step": 3955000 }, { "epoch": 0.07, "learning_rate": 4.997914392595288e-05, "loss": 4.931, "step": 3960000 }, { "epoch": 0.07, "learning_rate": 4.9979091220513474e-05, "loss": 4.9163, "step": 3965000 }, { "epoch": 0.07, "learning_rate": 4.997903844858991e-05, "loss": 4.9235, "step": 3970000 }, { "epoch": 0.07, "learning_rate": 4.9978985620760905e-05, "loss": 4.922, "step": 3975000 }, { "epoch": 0.07, "learning_rate": 4.997893271588277e-05, "loss": 4.9297, "step": 3980000 }, { "epoch": 0.07, "learning_rate": 4.9978879733915735e-05, "loss": 4.9327, "step": 3985000 }, { "epoch": 0.07, "learning_rate": 4.997882669605698e-05, "loss": 4.9429, "step": 3990000 }, { "epoch": 0.07, "learning_rate": 4.997877359171477e-05, "loss": 4.9304, "step": 3995000 }, { "epoch": 0.07, "learning_rate": 4.997872042088926e-05, "loss": 4.9064, "step": 4000000 }, { "epoch": 0.07, "eval_loss": 5.7131476402282715, "eval_runtime": 89252.7005, "eval_samples_per_second": 124.174, "eval_steps_per_second": 24.835, "step": 4000000 }, { "epoch": 0.07, "learning_rate": 4.997866718358058e-05, "loss": 4.9185, "step": 4005000 }, { "epoch": 0.07, "learning_rate": 4.9978613890460555e-05, "loss": 4.9265, "step": 4010000 }, { "epoch": 0.07, "learning_rate": 4.997856050951429e-05, "loss": 4.9214, "step": 4015000 }, { "epoch": 0.07, "learning_rate": 4.9978507072756974e-05, "loss": 4.9284, "step": 4020000 }, { "epoch": 0.07, "learning_rate": 4.9978453569517057e-05, "loss": 4.9222, "step": 4025000 }, { "epoch": 0.07, "learning_rate": 4.997839999979468e-05, "loss": 4.9012, "step": 4030000 }, { "epoch": 0.07, "learning_rate": 4.997834636359e-05, "loss": 4.9201, "step": 4035000 }, { "epoch": 0.07, "learning_rate": 4.997829266090315e-05, "loss": 4.9297, "step": 4040000 }, { "epoch": 0.07, "learning_rate": 4.9978238902499056e-05, "loss": 4.9262, "step": 4045000 }, { "epoch": 0.07, "learning_rate": 4.997818506686159e-05, "loss": 4.9268, "step": 4050000 }, { "epoch": 0.07, "learning_rate": 4.9978131164742385e-05, "loss": 4.9287, "step": 4055000 }, { "epoch": 0.07, "learning_rate": 4.9978077206946275e-05, "loss": 4.9217, "step": 4060000 }, { "epoch": 0.07, "learning_rate": 4.997802317187733e-05, "loss": 4.9243, "step": 4065000 }, { "epoch": 0.07, "learning_rate": 4.997796908115836e-05, "loss": 4.9359, "step": 4070000 }, { "epoch": 0.07, "learning_rate": 4.9977914913140236e-05, "loss": 4.8912, "step": 4075000 }, { "epoch": 0.07, "learning_rate": 4.997786067864109e-05, "loss": 4.9456, "step": 4080000 }, { "epoch": 0.07, "learning_rate": 4.9977806366789875e-05, "loss": 4.9221, "step": 4085000 }, { "epoch": 0.07, "learning_rate": 4.9977751999315824e-05, "loss": 4.9373, "step": 4090000 }, { "epoch": 0.07, "learning_rate": 4.997769756536118e-05, "loss": 4.9233, "step": 4095000 }, { "epoch": 0.07, "learning_rate": 4.99776430649261e-05, "loss": 4.9114, "step": 4100000 }, { "epoch": 0.07, "learning_rate": 4.997758849801072e-05, "loss": 4.933, "step": 4105000 }, { "epoch": 0.07, "learning_rate": 4.997753386461518e-05, "loss": 4.9103, "step": 4110000 }, { "epoch": 0.07, "learning_rate": 4.997747916473964e-05, "loss": 4.9157, "step": 4115000 }, { "epoch": 0.07, "learning_rate": 4.9977424387419933e-05, "loss": 4.9396, "step": 4120000 }, { "epoch": 0.07, "learning_rate": 4.997736956554913e-05, "loss": 4.92, "step": 4125000 }, { "epoch": 0.07, "learning_rate": 4.9977314666234434e-05, "loss": 4.9271, "step": 4130000 }, { "epoch": 0.07, "learning_rate": 4.9977259700440324e-05, "loss": 4.9224, "step": 4135000 }, { "epoch": 0.07, "learning_rate": 4.997720466816694e-05, "loss": 4.9348, "step": 4140000 }, { "epoch": 0.07, "learning_rate": 4.9977149558383614e-05, "loss": 4.933, "step": 4145000 }, { "epoch": 0.07, "learning_rate": 4.997709440418292e-05, "loss": 4.9052, "step": 4150000 }, { "epoch": 0.07, "learning_rate": 4.9977039183529996e-05, "loss": 4.9324, "step": 4155000 }, { "epoch": 0.07, "learning_rate": 4.9976983874283555e-05, "loss": 4.9387, "step": 4160000 }, { "epoch": 0.07, "learning_rate": 4.9976928509615986e-05, "loss": 4.9388, "step": 4165000 }, { "epoch": 0.07, "learning_rate": 4.997687307847002e-05, "loss": 4.9191, "step": 4170000 }, { "epoch": 0.07, "learning_rate": 4.997681756973518e-05, "loss": 4.9155, "step": 4175000 }, { "epoch": 0.07, "learning_rate": 4.9976762016743485e-05, "loss": 4.9267, "step": 4180000 }, { "epoch": 0.07, "learning_rate": 4.997670638616322e-05, "loss": 4.9189, "step": 4185000 }, { "epoch": 0.07, "learning_rate": 4.997665068910515e-05, "loss": 4.934, "step": 4190000 }, { "epoch": 0.07, "learning_rate": 4.997659492556942e-05, "loss": 4.945, "step": 4195000 }, { "epoch": 0.07, "learning_rate": 4.9976539095556184e-05, "loss": 4.9301, "step": 4200000 }, { "epoch": 0.07, "learning_rate": 4.9976483199065596e-05, "loss": 4.9186, "step": 4205000 }, { "epoch": 0.07, "learning_rate": 4.997642723609778e-05, "loss": 4.9274, "step": 4210000 }, { "epoch": 0.07, "learning_rate": 4.997637119543589e-05, "loss": 4.9078, "step": 4215000 }, { "epoch": 0.07, "learning_rate": 4.997631509950082e-05, "loss": 4.915, "step": 4220000 }, { "epoch": 0.07, "learning_rate": 4.99762589483326e-05, "loss": 4.9113, "step": 4225000 }, { "epoch": 0.07, "learning_rate": 4.997620271945744e-05, "loss": 4.9002, "step": 4230000 }, { "epoch": 0.07, "learning_rate": 4.9976146412835595e-05, "loss": 4.9034, "step": 4235000 }, { "epoch": 0.07, "learning_rate": 4.997609005099436e-05, "loss": 4.9529, "step": 4240000 }, { "epoch": 0.07, "learning_rate": 4.9976033622676966e-05, "loss": 4.9164, "step": 4245000 }, { "epoch": 0.07, "learning_rate": 4.997597712788356e-05, "loss": 4.9256, "step": 4250000 }, { "epoch": 0.07, "learning_rate": 4.997592056661428e-05, "loss": 4.9165, "step": 4255000 }, { "epoch": 0.07, "learning_rate": 4.997586392753256e-05, "loss": 4.914, "step": 4260000 }, { "epoch": 0.07, "learning_rate": 4.997580723329871e-05, "loss": 4.9178, "step": 4265000 }, { "epoch": 0.07, "learning_rate": 4.997575047258944e-05, "loss": 4.9151, "step": 4270000 }, { "epoch": 0.07, "learning_rate": 4.9975693645404924e-05, "loss": 4.9352, "step": 4275000 }, { "epoch": 0.07, "learning_rate": 4.99756367517453e-05, "loss": 4.9225, "step": 4280000 }, { "epoch": 0.07, "learning_rate": 4.9975579803013945e-05, "loss": 4.909, "step": 4285000 }, { "epoch": 0.07, "learning_rate": 4.997552276500131e-05, "loss": 4.9245, "step": 4290000 }, { "epoch": 0.07, "learning_rate": 4.997546567191727e-05, "loss": 4.9402, "step": 4295000 }, { "epoch": 0.07, "learning_rate": 4.997540850091558e-05, "loss": 4.9305, "step": 4300000 }, { "epoch": 0.07, "learning_rate": 4.997535127486938e-05, "loss": 4.9365, "step": 4305000 }, { "epoch": 0.07, "learning_rate": 4.9975293993818726e-05, "loss": 4.9068, "step": 4310000 }, { "epoch": 0.07, "learning_rate": 4.997523663483758e-05, "loss": 4.917, "step": 4315000 }, { "epoch": 0.07, "learning_rate": 4.9975179209382545e-05, "loss": 4.9151, "step": 4320000 }, { "epoch": 0.07, "learning_rate": 4.9975121728963405e-05, "loss": 4.9292, "step": 4325000 }, { "epoch": 0.07, "learning_rate": 4.997506415905141e-05, "loss": 4.9071, "step": 4330000 }, { "epoch": 0.07, "learning_rate": 4.9975006534175614e-05, "loss": 4.9275, "step": 4335000 }, { "epoch": 0.07, "learning_rate": 4.997494884282654e-05, "loss": 4.9266, "step": 4340000 }, { "epoch": 0.07, "learning_rate": 4.997489108500434e-05, "loss": 4.9184, "step": 4345000 }, { "epoch": 0.07, "learning_rate": 4.9974833260709164e-05, "loss": 4.9157, "step": 4350000 }, { "epoch": 0.07, "learning_rate": 4.997477538153061e-05, "loss": 4.9291, "step": 4355000 }, { "epoch": 0.07, "learning_rate": 4.9974717412700514e-05, "loss": 4.9364, "step": 4360000 }, { "epoch": 0.07, "learning_rate": 4.997465940060339e-05, "loss": 4.9038, "step": 4365000 }, { "epoch": 0.07, "learning_rate": 4.997460132206049e-05, "loss": 4.9008, "step": 4370000 }, { "epoch": 0.07, "learning_rate": 4.997454315378674e-05, "loss": 4.8946, "step": 4375000 }, { "epoch": 0.07, "learning_rate": 4.9974484930670265e-05, "loss": 4.9404, "step": 4380000 }, { "epoch": 0.07, "learning_rate": 4.9974426652751126e-05, "loss": 4.9428, "step": 4385000 }, { "epoch": 0.07, "learning_rate": 4.9974368296704325e-05, "loss": 4.8944, "step": 4390000 }, { "epoch": 0.07, "learning_rate": 4.997430987418594e-05, "loss": 4.9099, "step": 4395000 }, { "epoch": 0.07, "learning_rate": 4.9974251385196133e-05, "loss": 4.9063, "step": 4400000 }, { "epoch": 0.07, "learning_rate": 4.9974192818012634e-05, "loss": 4.9214, "step": 4405000 }, { "epoch": 0.07, "learning_rate": 4.997413420780287e-05, "loss": 4.9207, "step": 4410000 }, { "epoch": 0.07, "learning_rate": 4.997407550765069e-05, "loss": 4.9138, "step": 4415000 }, { "epoch": 0.07, "learning_rate": 4.997401675276343e-05, "loss": 4.9118, "step": 4420000 }, { "epoch": 0.07, "learning_rate": 4.9973957931405535e-05, "loss": 4.9215, "step": 4425000 }, { "epoch": 0.07, "learning_rate": 4.997389905536608e-05, "loss": 4.9255, "step": 4430000 }, { "epoch": 0.07, "learning_rate": 4.9973840089278427e-05, "loss": 4.9152, "step": 4435000 }, { "epoch": 0.07, "learning_rate": 4.997378108032506e-05, "loss": 4.9019, "step": 4440000 }, { "epoch": 0.07, "learning_rate": 4.9973721993099445e-05, "loss": 4.9177, "step": 4445000 }, { "epoch": 0.07, "learning_rate": 4.997366283940397e-05, "loss": 4.9176, "step": 4450000 }, { "epoch": 0.07, "learning_rate": 4.997360363109421e-05, "loss": 4.9307, "step": 4455000 }, { "epoch": 0.07, "learning_rate": 4.997354434447279e-05, "loss": 4.9169, "step": 4460000 }, { "epoch": 0.07, "learning_rate": 4.997348499138197e-05, "loss": 4.9188, "step": 4465000 }, { "epoch": 0.07, "learning_rate": 4.997342557182193e-05, "loss": 4.9217, "step": 4470000 }, { "epoch": 0.07, "learning_rate": 4.9973366085792815e-05, "loss": 4.917, "step": 4475000 }, { "epoch": 0.07, "learning_rate": 4.99733065452167e-05, "loss": 4.9257, "step": 4480000 }, { "epoch": 0.07, "learning_rate": 4.997324692626321e-05, "loss": 4.9169, "step": 4485000 }, { "epoch": 0.07, "learning_rate": 4.997318722889262e-05, "loss": 4.8994, "step": 4490000 }, { "epoch": 0.07, "learning_rate": 4.9973127476988794e-05, "loss": 4.9202, "step": 4495000 }, { "epoch": 0.07, "learning_rate": 4.9973067658616696e-05, "loss": 4.9199, "step": 4500000 }, { "epoch": 0.07, "learning_rate": 4.997300777377648e-05, "loss": 4.9212, "step": 4505000 }, { "epoch": 0.07, "learning_rate": 4.99729478224683e-05, "loss": 4.9335, "step": 4510000 }, { "epoch": 0.07, "learning_rate": 4.9972887816707326e-05, "loss": 4.9245, "step": 4515000 }, { "epoch": 0.07, "learning_rate": 4.99728277204487e-05, "loss": 4.9058, "step": 4520000 }, { "epoch": 0.07, "learning_rate": 4.99727675697376e-05, "loss": 4.9293, "step": 4525000 }, { "epoch": 0.07, "learning_rate": 4.997270735255918e-05, "loss": 4.9101, "step": 4530000 }, { "epoch": 0.07, "learning_rate": 4.997264706891359e-05, "loss": 4.8956, "step": 4535000 }, { "epoch": 0.07, "learning_rate": 4.9972586718801014e-05, "loss": 4.9183, "step": 4540000 }, { "epoch": 0.07, "learning_rate": 4.997252630222159e-05, "loss": 4.9539, "step": 4545000 }, { "epoch": 0.07, "learning_rate": 4.997246581917549e-05, "loss": 4.9095, "step": 4550000 }, { "epoch": 0.07, "learning_rate": 4.997240526966287e-05, "loss": 4.9115, "step": 4555000 }, { "epoch": 0.07, "learning_rate": 4.99723446536839e-05, "loss": 4.9332, "step": 4560000 }, { "epoch": 0.07, "learning_rate": 4.997228395909073e-05, "loss": 4.9382, "step": 4565000 }, { "epoch": 0.08, "learning_rate": 4.997222321016623e-05, "loss": 4.9363, "step": 4570000 }, { "epoch": 0.08, "learning_rate": 4.9972162382601264e-05, "loss": 4.9139, "step": 4575000 }, { "epoch": 0.08, "learning_rate": 4.997210151291977e-05, "loss": 4.9042, "step": 4580000 }, { "epoch": 0.08, "learning_rate": 4.9972040552396946e-05, "loss": 4.9126, "step": 4585000 }, { "epoch": 0.08, "learning_rate": 4.9971979549811124e-05, "loss": 4.9085, "step": 4590000 }, { "epoch": 0.08, "learning_rate": 4.997191845633109e-05, "loss": 4.9193, "step": 4595000 }, { "epoch": 0.08, "learning_rate": 4.997185730860049e-05, "loss": 4.9203, "step": 4600000 }, { "epoch": 0.08, "learning_rate": 4.997179609440499e-05, "loss": 4.9222, "step": 4605000 }, { "epoch": 0.08, "learning_rate": 4.997173481374475e-05, "loss": 4.9331, "step": 4610000 }, { "epoch": 0.08, "learning_rate": 4.997167345433896e-05, "loss": 4.9181, "step": 4615000 }, { "epoch": 0.08, "learning_rate": 4.997161204073645e-05, "loss": 4.9076, "step": 4620000 }, { "epoch": 0.08, "learning_rate": 4.9971550548362114e-05, "loss": 4.9337, "step": 4625000 }, { "epoch": 0.08, "learning_rate": 4.997148901413885e-05, "loss": 4.9546, "step": 4630000 }, { "epoch": 0.08, "learning_rate": 4.997142740114409e-05, "loss": 4.9294, "step": 4635000 }, { "epoch": 0.08, "learning_rate": 4.997136572168558e-05, "loss": 4.9112, "step": 4640000 }, { "epoch": 0.08, "learning_rate": 4.9971303975763485e-05, "loss": 4.9278, "step": 4645000 }, { "epoch": 0.08, "learning_rate": 4.997124217575203e-05, "loss": 4.9031, "step": 4650000 }, { "epoch": 0.08, "learning_rate": 4.9971180296916536e-05, "loss": 4.9047, "step": 4655000 }, { "epoch": 0.08, "learning_rate": 4.9971118351617955e-05, "loss": 4.9296, "step": 4660000 }, { "epoch": 0.08, "learning_rate": 4.9971056327442466e-05, "loss": 4.9256, "step": 4665000 }, { "epoch": 0.08, "learning_rate": 4.997099424920488e-05, "loss": 4.9212, "step": 4670000 }, { "epoch": 0.08, "learning_rate": 4.9970932116945266e-05, "loss": 4.9109, "step": 4675000 }, { "epoch": 0.08, "learning_rate": 4.997086990579595e-05, "loss": 4.9353, "step": 4680000 }, { "epoch": 0.08, "learning_rate": 4.9970807640651505e-05, "loss": 4.9037, "step": 4685000 }, { "epoch": 0.08, "learning_rate": 4.997074528411066e-05, "loss": 4.9395, "step": 4690000 }, { "epoch": 0.08, "learning_rate": 4.9970682873575026e-05, "loss": 4.9152, "step": 4695000 }, { "epoch": 0.08, "learning_rate": 4.997062039657762e-05, "loss": 4.9204, "step": 4700000 }, { "epoch": 0.08, "learning_rate": 4.9970557853118616e-05, "loss": 4.9222, "step": 4705000 }, { "epoch": 0.08, "learning_rate": 4.997049524319816e-05, "loss": 4.9172, "step": 4710000 }, { "epoch": 0.08, "learning_rate": 4.9970432566816436e-05, "loss": 4.9059, "step": 4715000 }, { "epoch": 0.08, "learning_rate": 4.997036982397361e-05, "loss": 4.9106, "step": 4720000 }, { "epoch": 0.08, "learning_rate": 4.997030702724338e-05, "loss": 4.945, "step": 4725000 }, { "epoch": 0.08, "learning_rate": 4.997024415149213e-05, "loss": 4.9166, "step": 4730000 }, { "epoch": 0.08, "learning_rate": 4.9970181196680155e-05, "loss": 4.9219, "step": 4735000 }, { "epoch": 0.08, "learning_rate": 4.997011818799457e-05, "loss": 4.9137, "step": 4740000 }, { "epoch": 0.08, "learning_rate": 4.9970055112848724e-05, "loss": 4.9339, "step": 4745000 }, { "epoch": 0.08, "learning_rate": 4.996999197124277e-05, "loss": 4.9207, "step": 4750000 }, { "epoch": 0.08, "learning_rate": 4.996992876317689e-05, "loss": 4.9344, "step": 4755000 }, { "epoch": 0.08, "learning_rate": 4.9969865488651235e-05, "loss": 4.9365, "step": 4760000 }, { "epoch": 0.08, "learning_rate": 4.99698021603459e-05, "loss": 4.9161, "step": 4765000 }, { "epoch": 0.08, "learning_rate": 4.9969738740221314e-05, "loss": 4.9314, "step": 4770000 }, { "epoch": 0.08, "learning_rate": 4.9969675266317386e-05, "loss": 4.9272, "step": 4775000 }, { "epoch": 0.08, "learning_rate": 4.996961172595436e-05, "loss": 4.9435, "step": 4780000 }, { "epoch": 0.08, "learning_rate": 4.996954811913241e-05, "loss": 4.9358, "step": 4785000 }, { "epoch": 0.08, "learning_rate": 4.9969484433105306e-05, "loss": 4.9237, "step": 4790000 }, { "epoch": 0.08, "learning_rate": 4.996942070611243e-05, "loss": 4.9297, "step": 4795000 }, { "epoch": 0.08, "learning_rate": 4.9969356899914735e-05, "loss": 4.9316, "step": 4800000 }, { "epoch": 0.08, "learning_rate": 4.99692930272588e-05, "loss": 4.9147, "step": 4805000 }, { "epoch": 0.08, "learning_rate": 4.99692290753452e-05, "loss": 4.9103, "step": 4810000 }, { "epoch": 0.08, "learning_rate": 4.99691650569471e-05, "loss": 4.9392, "step": 4815000 }, { "epoch": 0.08, "learning_rate": 4.996910098489087e-05, "loss": 4.921, "step": 4820000 }, { "epoch": 0.08, "learning_rate": 4.996903684637706e-05, "loss": 4.9247, "step": 4825000 }, { "epoch": 0.08, "learning_rate": 4.996897266711143e-05, "loss": 4.9239, "step": 4830000 }, { "epoch": 0.08, "learning_rate": 4.996890838284354e-05, "loss": 4.9129, "step": 4835000 }, { "epoch": 0.08, "learning_rate": 4.996884404497137e-05, "loss": 4.9093, "step": 4840000 }, { "epoch": 0.08, "learning_rate": 4.996877964064233e-05, "loss": 4.9133, "step": 4845000 }, { "epoch": 0.08, "learning_rate": 4.996871516985658e-05, "loss": 4.9149, "step": 4850000 }, { "epoch": 0.08, "learning_rate": 4.9968650619695035e-05, "loss": 4.9068, "step": 4855000 }, { "epoch": 0.08, "learning_rate": 4.996858601598309e-05, "loss": 4.924, "step": 4860000 }, { "epoch": 0.08, "learning_rate": 4.9968521345814955e-05, "loss": 4.9329, "step": 4865000 }, { "epoch": 0.08, "learning_rate": 4.9968456635109106e-05, "loss": 4.9235, "step": 4870000 }, { "epoch": 0.08, "learning_rate": 4.9968391806110796e-05, "loss": 4.9145, "step": 4875000 }, { "epoch": 0.08, "learning_rate": 4.996832693657513e-05, "loss": 4.9195, "step": 4880000 }, { "epoch": 0.08, "learning_rate": 4.996826200058394e-05, "loss": 4.9141, "step": 4885000 }, { "epoch": 0.08, "learning_rate": 4.996819701114978e-05, "loss": 4.9266, "step": 4890000 }, { "epoch": 0.08, "learning_rate": 4.996813192923578e-05, "loss": 4.9243, "step": 4895000 }, { "epoch": 0.08, "learning_rate": 4.996806679387913e-05, "loss": 4.9194, "step": 4900000 }, { "epoch": 0.08, "learning_rate": 4.9968001592067685e-05, "loss": 4.9293, "step": 4905000 }, { "epoch": 0.08, "learning_rate": 4.996793632380159e-05, "loss": 4.9222, "step": 4910000 }, { "epoch": 0.08, "learning_rate": 4.996787100215986e-05, "loss": 4.9123, "step": 4915000 }, { "epoch": 0.08, "learning_rate": 4.9967805600998316e-05, "loss": 4.9448, "step": 4920000 }, { "epoch": 0.08, "learning_rate": 4.9967740120277244e-05, "loss": 4.905, "step": 4925000 }, { "epoch": 0.08, "learning_rate": 4.996767458619436e-05, "loss": 4.9361, "step": 4930000 }, { "epoch": 0.08, "learning_rate": 4.9967608985657696e-05, "loss": 4.9311, "step": 4935000 }, { "epoch": 0.08, "learning_rate": 4.9967543318667445e-05, "loss": 4.92, "step": 4940000 }, { "epoch": 0.08, "learning_rate": 4.9967477598382376e-05, "loss": 4.9233, "step": 4945000 }, { "epoch": 0.08, "learning_rate": 4.996741179849876e-05, "loss": 4.9099, "step": 4950000 }, { "epoch": 0.08, "learning_rate": 4.996734593216209e-05, "loss": 4.9289, "step": 4955000 }, { "epoch": 0.08, "learning_rate": 4.9967279986174034e-05, "loss": 4.9286, "step": 4960000 }, { "epoch": 0.08, "learning_rate": 4.996721398691846e-05, "loss": 4.9171, "step": 4965000 }, { "epoch": 0.08, "learning_rate": 4.9967147921210346e-05, "loss": 4.9352, "step": 4970000 }, { "epoch": 0.08, "learning_rate": 4.9967081789049855e-05, "loss": 4.9172, "step": 4975000 }, { "epoch": 0.08, "learning_rate": 4.9967015590437196e-05, "loss": 4.9076, "step": 4980000 }, { "epoch": 0.08, "learning_rate": 4.9966949325372506e-05, "loss": 4.9229, "step": 4985000 }, { "epoch": 0.08, "learning_rate": 4.9966882993855993e-05, "loss": 4.9203, "step": 4990000 }, { "epoch": 0.08, "learning_rate": 4.9966816609179375e-05, "loss": 4.9071, "step": 4995000 }, { "epoch": 0.08, "learning_rate": 4.996675013146817e-05, "loss": 4.9428, "step": 5000000 }, { "epoch": 0.08, "eval_loss": 5.677032947540283, "eval_runtime": 85195.0572, "eval_samples_per_second": 130.089, "eval_steps_per_second": 26.018, "step": 5000000 }, { "epoch": 0.08, "learning_rate": 4.996668361391535e-05, "loss": 4.9122, "step": 5005000 }, { "epoch": 0.08, "learning_rate": 4.996661701660655e-05, "loss": 4.902, "step": 5010000 }, { "epoch": 0.08, "learning_rate": 4.996655035284681e-05, "loss": 4.9102, "step": 5015000 }, { "epoch": 0.08, "learning_rate": 4.99664836226363e-05, "loss": 4.9285, "step": 5020000 }, { "epoch": 0.08, "learning_rate": 4.996641682597518e-05, "loss": 4.9177, "step": 5025000 }, { "epoch": 0.08, "learning_rate": 4.9966349962863656e-05, "loss": 4.9285, "step": 5030000 }, { "epoch": 0.08, "learning_rate": 4.99662830466998e-05, "loss": 4.9162, "step": 5035000 }, { "epoch": 0.08, "learning_rate": 4.9966216050701264e-05, "loss": 4.9232, "step": 5040000 }, { "epoch": 0.08, "learning_rate": 4.996614898825285e-05, "loss": 4.9128, "step": 5045000 }, { "epoch": 0.08, "learning_rate": 4.996608187279251e-05, "loss": 4.9258, "step": 5050000 }, { "epoch": 0.08, "learning_rate": 4.9966014677458165e-05, "loss": 4.923, "step": 5055000 }, { "epoch": 0.08, "learning_rate": 4.9965947415674464e-05, "loss": 4.9133, "step": 5060000 }, { "epoch": 0.08, "learning_rate": 4.996588007396391e-05, "loss": 4.9094, "step": 5065000 }, { "epoch": 0.08, "learning_rate": 4.996581269275973e-05, "loss": 4.9235, "step": 5070000 }, { "epoch": 0.08, "learning_rate": 4.996574523162906e-05, "loss": 4.916, "step": 5075000 }, { "epoch": 0.08, "learning_rate": 4.996567770404976e-05, "loss": 4.943, "step": 5080000 }, { "epoch": 0.08, "learning_rate": 4.996561011002201e-05, "loss": 4.9123, "step": 5085000 }, { "epoch": 0.08, "learning_rate": 4.996554244954599e-05, "loss": 4.9294, "step": 5090000 }, { "epoch": 0.08, "learning_rate": 4.9965474722621876e-05, "loss": 4.9273, "step": 5095000 }, { "epoch": 0.08, "learning_rate": 4.996540692924985e-05, "loss": 4.938, "step": 5100000 }, { "epoch": 0.08, "learning_rate": 4.99653390694301e-05, "loss": 4.937, "step": 5105000 }, { "epoch": 0.08, "learning_rate": 4.99652711431628e-05, "loss": 4.9338, "step": 5110000 }, { "epoch": 0.08, "learning_rate": 4.996520315044813e-05, "loss": 4.9217, "step": 5115000 }, { "epoch": 0.08, "learning_rate": 4.9965135091286275e-05, "loss": 4.9369, "step": 5120000 }, { "epoch": 0.08, "learning_rate": 4.996506696567741e-05, "loss": 4.9121, "step": 5125000 }, { "epoch": 0.08, "learning_rate": 4.996499878727224e-05, "loss": 4.9089, "step": 5130000 }, { "epoch": 0.08, "learning_rate": 4.9964930528783196e-05, "loss": 4.9013, "step": 5135000 }, { "epoch": 0.08, "learning_rate": 4.9964862203847704e-05, "loss": 4.9176, "step": 5140000 }, { "epoch": 0.08, "learning_rate": 4.996479381246592e-05, "loss": 4.9345, "step": 5145000 }, { "epoch": 0.08, "learning_rate": 4.996472534093435e-05, "loss": 4.9277, "step": 5150000 }, { "epoch": 0.08, "learning_rate": 4.996465683036425e-05, "loss": 4.9382, "step": 5155000 }, { "epoch": 0.08, "learning_rate": 4.996458823964473e-05, "loss": 4.9261, "step": 5160000 }, { "epoch": 0.08, "learning_rate": 4.996451958247965e-05, "loss": 4.9125, "step": 5165000 }, { "epoch": 0.08, "learning_rate": 4.9964450858869205e-05, "loss": 4.9254, "step": 5170000 }, { "epoch": 0.09, "learning_rate": 4.996438208258374e-05, "loss": 4.9183, "step": 5175000 }, { "epoch": 0.09, "learning_rate": 4.9964313226096405e-05, "loss": 4.9256, "step": 5180000 }, { "epoch": 0.09, "learning_rate": 4.996424428936749e-05, "loss": 4.9332, "step": 5185000 }, { "epoch": 0.09, "learning_rate": 4.996417531378745e-05, "loss": 4.9342, "step": 5190000 }, { "epoch": 0.09, "learning_rate": 4.996410624414287e-05, "loss": 4.9092, "step": 5195000 }, { "epoch": 0.09, "learning_rate": 4.996403712186406e-05, "loss": 4.9144, "step": 5200000 }, { "epoch": 0.09, "learning_rate": 4.996396793314117e-05, "loss": 4.9206, "step": 5205000 }, { "epoch": 0.09, "learning_rate": 4.9963898691837605e-05, "loss": 4.9171, "step": 5210000 }, { "epoch": 0.09, "learning_rate": 4.9963829370240395e-05, "loss": 4.9223, "step": 5215000 }, { "epoch": 0.09, "learning_rate": 4.996375996830984e-05, "loss": 4.9195, "step": 5220000 }, { "epoch": 0.09, "learning_rate": 4.996369051381246e-05, "loss": 4.9448, "step": 5225000 }, { "epoch": 0.09, "learning_rate": 4.996362099287192e-05, "loss": 4.9378, "step": 5230000 }, { "epoch": 0.09, "learning_rate": 4.99635514054884e-05, "loss": 4.9403, "step": 5235000 }, { "epoch": 0.09, "learning_rate": 4.9963481765605066e-05, "loss": 4.9432, "step": 5240000 }, { "epoch": 0.09, "learning_rate": 4.996341203139317e-05, "loss": 4.9481, "step": 5245000 }, { "epoch": 0.09, "learning_rate": 4.996334224468183e-05, "loss": 4.9374, "step": 5250000 }, { "epoch": 0.09, "learning_rate": 4.996327239152826e-05, "loss": 4.9241, "step": 5255000 }, { "epoch": 0.09, "learning_rate": 4.9963202485928804e-05, "loss": 4.9357, "step": 5260000 }, { "epoch": 0.09, "learning_rate": 4.996313248589516e-05, "loss": 4.9083, "step": 5265000 }, { "epoch": 0.09, "learning_rate": 4.9963062433416e-05, "loss": 4.9228, "step": 5270000 }, { "epoch": 0.09, "learning_rate": 4.996299230045931e-05, "loss": 4.911, "step": 5275000 }, { "epoch": 0.09, "learning_rate": 4.9962922115084065e-05, "loss": 4.9283, "step": 5280000 }, { "epoch": 0.09, "learning_rate": 4.9962851863267704e-05, "loss": 4.9118, "step": 5285000 }, { "epoch": 0.09, "learning_rate": 4.9962781559086337e-05, "loss": 4.906, "step": 5290000 }, { "epoch": 0.09, "learning_rate": 4.99627111744016e-05, "loss": 4.9237, "step": 5295000 }, { "epoch": 0.09, "learning_rate": 4.996264070917379e-05, "loss": 4.9426, "step": 5300000 }, { "epoch": 0.09, "learning_rate": 4.996257019159484e-05, "loss": 4.9223, "step": 5305000 }, { "epoch": 0.09, "learning_rate": 4.99624996075757e-05, "loss": 4.9339, "step": 5310000 }, { "epoch": 0.09, "learning_rate": 4.996242895711657e-05, "loss": 4.9214, "step": 5315000 }, { "epoch": 0.09, "learning_rate": 4.996235824021764e-05, "loss": 4.9167, "step": 5320000 }, { "epoch": 0.09, "learning_rate": 4.9962287456879094e-05, "loss": 4.9186, "step": 5325000 }, { "epoch": 0.09, "learning_rate": 4.996221660710113e-05, "loss": 4.9382, "step": 5330000 }, { "epoch": 0.09, "learning_rate": 4.9962145690883925e-05, "loss": 4.9006, "step": 5335000 }, { "epoch": 0.09, "learning_rate": 4.996207472243651e-05, "loss": 4.9277, "step": 5340000 }, { "epoch": 0.09, "learning_rate": 4.996200365913254e-05, "loss": 4.9307, "step": 5345000 }, { "epoch": 0.09, "learning_rate": 4.996193254359874e-05, "loss": 4.9336, "step": 5350000 }, { "epoch": 0.09, "learning_rate": 4.996186137587521e-05, "loss": 4.9094, "step": 5355000 }, { "epoch": 0.09, "learning_rate": 4.996179012747793e-05, "loss": 4.9238, "step": 5360000 }, { "epoch": 0.09, "learning_rate": 4.996171881264256e-05, "loss": 4.8896, "step": 5365000 }, { "epoch": 0.09, "learning_rate": 4.996164743136926e-05, "loss": 4.9159, "step": 5370000 }, { "epoch": 0.09, "learning_rate": 4.996157598365824e-05, "loss": 4.919, "step": 5375000 }, { "epoch": 0.09, "learning_rate": 4.99615044838249e-05, "loss": 4.9258, "step": 5380000 }, { "epoch": 0.09, "learning_rate": 4.9961432888923795e-05, "loss": 4.9135, "step": 5385000 }, { "epoch": 0.09, "learning_rate": 4.9961361241900764e-05, "loss": 4.9231, "step": 5390000 }, { "epoch": 0.09, "learning_rate": 4.9961289528440744e-05, "loss": 4.9105, "step": 5395000 }, { "epoch": 0.09, "learning_rate": 4.996121776291234e-05, "loss": 4.9226, "step": 5400000 }, { "epoch": 0.09, "learning_rate": 4.996114590221061e-05, "loss": 4.9253, "step": 5405000 }, { "epoch": 0.09, "learning_rate": 4.996107400383582e-05, "loss": 4.9183, "step": 5410000 }, { "epoch": 0.09, "learning_rate": 4.9961002024643174e-05, "loss": 4.9047, "step": 5415000 }, { "epoch": 0.09, "learning_rate": 4.9960929979014515e-05, "loss": 4.9159, "step": 5420000 }, { "epoch": 0.09, "learning_rate": 4.9960857866950046e-05, "loss": 4.917, "step": 5425000 }, { "epoch": 0.09, "learning_rate": 4.996078568844995e-05, "loss": 4.9087, "step": 5430000 }, { "epoch": 0.09, "learning_rate": 4.996071344351443e-05, "loss": 4.9094, "step": 5435000 }, { "epoch": 0.09, "learning_rate": 4.996064111766896e-05, "loss": 4.9254, "step": 5440000 }, { "epoch": 0.09, "learning_rate": 4.996056873984987e-05, "loss": 4.9297, "step": 5445000 }, { "epoch": 0.09, "learning_rate": 4.996049629559592e-05, "loss": 4.9362, "step": 5450000 }, { "epoch": 0.09, "learning_rate": 4.996042378490731e-05, "loss": 4.916, "step": 5455000 }, { "epoch": 0.09, "learning_rate": 4.9960351207784225e-05, "loss": 4.909, "step": 5460000 }, { "epoch": 0.09, "learning_rate": 4.99602785496857e-05, "loss": 4.9215, "step": 5465000 }, { "epoch": 0.09, "learning_rate": 4.9960205839680976e-05, "loss": 4.944, "step": 5470000 }, { "epoch": 0.09, "learning_rate": 4.996013307781013e-05, "loss": 4.8969, "step": 5475000 }, { "epoch": 0.09, "learning_rate": 4.996006023495112e-05, "loss": 4.9181, "step": 5480000 }, { "epoch": 0.09, "learning_rate": 4.995998732565861e-05, "loss": 4.9069, "step": 5485000 }, { "epoch": 0.09, "learning_rate": 4.995991434993279e-05, "loss": 4.9199, "step": 5490000 }, { "epoch": 0.09, "learning_rate": 4.995984129315294e-05, "loss": 4.918, "step": 5495000 }, { "epoch": 0.09, "learning_rate": 4.9959768184547806e-05, "loss": 4.9056, "step": 5500000 }, { "epoch": 0.09, "learning_rate": 4.995969500950995e-05, "loss": 4.9039, "step": 5505000 }, { "epoch": 0.09, "learning_rate": 4.9959621782700374e-05, "loss": 4.9216, "step": 5510000 }, { "epoch": 0.09, "learning_rate": 4.9959548460136865e-05, "loss": 4.9275, "step": 5515000 }, { "epoch": 0.09, "learning_rate": 4.995947508580201e-05, "loss": 4.9196, "step": 5520000 }, { "epoch": 0.09, "learning_rate": 4.9959401630334534e-05, "loss": 4.9126, "step": 5525000 }, { "epoch": 0.09, "learning_rate": 4.995932813783667e-05, "loss": 4.9173, "step": 5530000 }, { "epoch": 0.09, "learning_rate": 4.995925456420659e-05, "loss": 4.9423, "step": 5535000 }, { "epoch": 0.09, "learning_rate": 4.9959180924145146e-05, "loss": 4.9511, "step": 5540000 }, { "epoch": 0.09, "learning_rate": 4.9959107217652536e-05, "loss": 4.9235, "step": 5545000 }, { "epoch": 0.09, "learning_rate": 4.9959033444728975e-05, "loss": 4.915, "step": 5550000 }, { "epoch": 0.09, "learning_rate": 4.9958959620155066e-05, "loss": 4.9189, "step": 5555000 }, { "epoch": 0.09, "learning_rate": 4.995888569958974e-05, "loss": 4.924, "step": 5560000 }, { "epoch": 0.09, "learning_rate": 4.995881172737446e-05, "loss": 4.9233, "step": 5565000 }, { "epoch": 0.09, "learning_rate": 4.995873768872901e-05, "loss": 4.8955, "step": 5570000 }, { "epoch": 0.09, "learning_rate": 4.995866358365359e-05, "loss": 4.8917, "step": 5575000 }, { "epoch": 0.09, "learning_rate": 4.995858939730149e-05, "loss": 4.9227, "step": 5580000 }, { "epoch": 0.09, "learning_rate": 4.99585151593534e-05, "loss": 4.9236, "step": 5585000 }, { "epoch": 0.09, "learning_rate": 4.995844085497593e-05, "loss": 4.9225, "step": 5590000 }, { "epoch": 0.09, "learning_rate": 4.9958366484169264e-05, "loss": 4.9129, "step": 5595000 }, { "epoch": 0.09, "learning_rate": 4.9958292032033563e-05, "loss": 4.9159, "step": 5600000 }, { "epoch": 0.09, "learning_rate": 4.9958217543269164e-05, "loss": 4.9225, "step": 5605000 }, { "epoch": 0.09, "learning_rate": 4.995814297317612e-05, "loss": 4.9161, "step": 5610000 }, { "epoch": 0.09, "learning_rate": 4.9958068336654686e-05, "loss": 4.9218, "step": 5615000 }, { "epoch": 0.09, "learning_rate": 4.9957993618751835e-05, "loss": 4.9026, "step": 5620000 }, { "epoch": 0.09, "learning_rate": 4.995791884936091e-05, "loss": 4.914, "step": 5625000 }, { "epoch": 0.09, "learning_rate": 4.9957844013542185e-05, "loss": 4.9279, "step": 5630000 }, { "epoch": 0.09, "learning_rate": 4.995776911129587e-05, "loss": 4.9127, "step": 5635000 }, { "epoch": 0.09, "learning_rate": 4.9957694142622135e-05, "loss": 4.9347, "step": 5640000 }, { "epoch": 0.09, "learning_rate": 4.995761910752121e-05, "loss": 4.933, "step": 5645000 }, { "epoch": 0.09, "learning_rate": 4.9957544005993284e-05, "loss": 4.9166, "step": 5650000 }, { "epoch": 0.09, "learning_rate": 4.995746883803856e-05, "loss": 4.9364, "step": 5655000 }, { "epoch": 0.09, "learning_rate": 4.995739360365722e-05, "loss": 4.9375, "step": 5660000 }, { "epoch": 0.09, "learning_rate": 4.995731830284949e-05, "loss": 4.9149, "step": 5665000 }, { "epoch": 0.09, "learning_rate": 4.995724293561555e-05, "loss": 4.9096, "step": 5670000 }, { "epoch": 0.09, "learning_rate": 4.9957167501955614e-05, "loss": 4.9111, "step": 5675000 }, { "epoch": 0.09, "learning_rate": 4.9957092016982585e-05, "loss": 4.9225, "step": 5680000 }, { "epoch": 0.09, "learning_rate": 4.995701643535854e-05, "loss": 4.9161, "step": 5685000 }, { "epoch": 0.09, "learning_rate": 4.9956940802421815e-05, "loss": 4.8988, "step": 5690000 }, { "epoch": 0.09, "learning_rate": 4.995686511821246e-05, "loss": 4.9165, "step": 5695000 }, { "epoch": 0.09, "learning_rate": 4.995678935243883e-05, "loss": 4.8971, "step": 5700000 }, { "epoch": 0.09, "learning_rate": 4.99567135202404e-05, "loss": 4.917, "step": 5705000 }, { "epoch": 0.09, "learning_rate": 4.995663762161738e-05, "loss": 4.9228, "step": 5710000 }, { "epoch": 0.09, "learning_rate": 4.995656164136424e-05, "loss": 4.9218, "step": 5715000 }, { "epoch": 0.09, "learning_rate": 4.995648562509838e-05, "loss": 4.9217, "step": 5720000 }, { "epoch": 0.09, "learning_rate": 4.99564095272028e-05, "loss": 4.9123, "step": 5725000 }, { "epoch": 0.09, "learning_rate": 4.9956333362883434e-05, "loss": 4.9256, "step": 5730000 }, { "epoch": 0.09, "learning_rate": 4.995625713214049e-05, "loss": 4.9086, "step": 5735000 }, { "epoch": 0.09, "learning_rate": 4.995618083497417e-05, "loss": 4.9183, "step": 5740000 }, { "epoch": 0.09, "learning_rate": 4.995610447138468e-05, "loss": 4.9188, "step": 5745000 }, { "epoch": 0.09, "learning_rate": 4.995602804137222e-05, "loss": 4.9211, "step": 5750000 }, { "epoch": 0.09, "learning_rate": 4.9955951544936986e-05, "loss": 4.9245, "step": 5755000 }, { "epoch": 0.09, "learning_rate": 4.99558749820792e-05, "loss": 4.9131, "step": 5760000 }, { "epoch": 0.09, "learning_rate": 4.995579833746041e-05, "loss": 4.9239, "step": 5765000 }, { "epoch": 0.09, "learning_rate": 4.995572164174481e-05, "loss": 4.9015, "step": 5770000 }, { "epoch": 0.09, "learning_rate": 4.995564487960725e-05, "loss": 4.9251, "step": 5775000 }, { "epoch": 0.09, "learning_rate": 4.9955568051047964e-05, "loss": 4.9102, "step": 5780000 }, { "epoch": 0.1, "learning_rate": 4.995549115606712e-05, "loss": 4.9056, "step": 5785000 }, { "epoch": 0.1, "learning_rate": 4.995541419466494e-05, "loss": 4.9051, "step": 5790000 }, { "epoch": 0.1, "learning_rate": 4.995533716684163e-05, "loss": 4.9242, "step": 5795000 }, { "epoch": 0.1, "learning_rate": 4.9955260088029054e-05, "loss": 4.9139, "step": 5800000 }, { "epoch": 0.1, "learning_rate": 4.995518292737738e-05, "loss": 4.9185, "step": 5805000 }, { "epoch": 0.1, "learning_rate": 4.9955105700305197e-05, "loss": 4.9296, "step": 5810000 }, { "epoch": 0.1, "learning_rate": 4.995502840681269e-05, "loss": 4.8918, "step": 5815000 }, { "epoch": 0.1, "learning_rate": 4.995495106238489e-05, "loss": 4.8975, "step": 5820000 }, { "epoch": 0.1, "learning_rate": 4.9954873636065674e-05, "loss": 4.8996, "step": 5825000 }, { "epoch": 0.1, "learning_rate": 4.995479614332676e-05, "loss": 4.8957, "step": 5830000 }, { "epoch": 0.1, "learning_rate": 4.995471858416836e-05, "loss": 4.9077, "step": 5835000 }, { "epoch": 0.1, "learning_rate": 4.995464095859067e-05, "loss": 4.9201, "step": 5840000 }, { "epoch": 0.1, "learning_rate": 4.995456326659391e-05, "loss": 4.9425, "step": 5845000 }, { "epoch": 0.1, "learning_rate": 4.995448550817828e-05, "loss": 4.8956, "step": 5850000 }, { "epoch": 0.1, "learning_rate": 4.9954407683343996e-05, "loss": 4.9371, "step": 5855000 }, { "epoch": 0.1, "learning_rate": 4.9954329807682375e-05, "loss": 4.9212, "step": 5860000 }, { "epoch": 0.1, "learning_rate": 4.995425183442026e-05, "loss": 4.9269, "step": 5865000 }, { "epoch": 0.1, "learning_rate": 4.995417379471352e-05, "loss": 4.9197, "step": 5870000 }, { "epoch": 0.1, "learning_rate": 4.995409571982436e-05, "loss": 4.9239, "step": 5875000 }, { "epoch": 0.1, "learning_rate": 4.9954017547255596e-05, "loss": 4.8961, "step": 5880000 }, { "epoch": 0.1, "learning_rate": 4.9953939339557975e-05, "loss": 4.9359, "step": 5885000 }, { "epoch": 0.1, "learning_rate": 4.995386104979886e-05, "loss": 4.9142, "step": 5890000 }, { "epoch": 0.1, "learning_rate": 4.995378269362275e-05, "loss": 4.938, "step": 5895000 }, { "epoch": 0.1, "learning_rate": 4.9953704271029855e-05, "loss": 4.9212, "step": 5900000 }, { "epoch": 0.1, "learning_rate": 4.995362578202037e-05, "loss": 4.9258, "step": 5905000 }, { "epoch": 0.1, "learning_rate": 4.995354722659451e-05, "loss": 4.9309, "step": 5910000 }, { "epoch": 0.1, "learning_rate": 4.9953468604752496e-05, "loss": 4.919, "step": 5915000 }, { "epoch": 0.1, "learning_rate": 4.995338991649452e-05, "loss": 4.9288, "step": 5920000 }, { "epoch": 0.1, "learning_rate": 4.99533111618208e-05, "loss": 4.9206, "step": 5925000 }, { "epoch": 0.1, "learning_rate": 4.99532323565087e-05, "loss": 4.912, "step": 5930000 }, { "epoch": 0.1, "learning_rate": 4.9953153469017416e-05, "loss": 4.9242, "step": 5935000 }, { "epoch": 0.1, "learning_rate": 4.9953074499307264e-05, "loss": 4.9023, "step": 5940000 }, { "epoch": 0.1, "learning_rate": 4.995299547897265e-05, "loss": 4.9384, "step": 5945000 }, { "epoch": 0.1, "learning_rate": 4.995291639222336e-05, "loss": 4.9035, "step": 5950000 }, { "epoch": 0.1, "learning_rate": 4.995283722321596e-05, "loss": 4.9171, "step": 5955000 }, { "epoch": 0.1, "learning_rate": 4.995275800362461e-05, "loss": 4.9029, "step": 5960000 }, { "epoch": 0.1, "learning_rate": 4.99526787176192e-05, "loss": 4.9086, "step": 5965000 }, { "epoch": 0.1, "learning_rate": 4.995259936519994e-05, "loss": 4.9193, "step": 5970000 }, { "epoch": 0.1, "learning_rate": 4.99525199622638e-05, "loss": 4.8984, "step": 5975000 }, { "epoch": 0.1, "learning_rate": 4.99524404611207e-05, "loss": 4.9028, "step": 5980000 }, { "epoch": 0.1, "learning_rate": 4.995236090946114e-05, "loss": 4.9133, "step": 5985000 }, { "epoch": 0.1, "learning_rate": 4.995228130732521e-05, "loss": 4.9212, "step": 5990000 }, { "epoch": 0.1, "learning_rate": 4.9952201622853144e-05, "loss": 4.9333, "step": 5995000 }, { "epoch": 0.1, "learning_rate": 4.9952121871968496e-05, "loss": 4.9389, "step": 6000000 }, { "epoch": 0.1, "eval_loss": 5.650006294250488, "eval_runtime": 85085.7097, "eval_samples_per_second": 130.256, "eval_steps_per_second": 26.051, "step": 6000000 }, { "epoch": 0.1, "learning_rate": 4.995204205467148e-05, "loss": 4.9401, "step": 6005000 }, { "epoch": 0.1, "learning_rate": 4.995196215497252e-05, "loss": 4.9246, "step": 6010000 }, { "epoch": 0.1, "learning_rate": 4.995188220483811e-05, "loss": 4.9318, "step": 6015000 }, { "epoch": 0.1, "learning_rate": 4.9951802204308315e-05, "loss": 4.9152, "step": 6020000 }, { "epoch": 0.1, "learning_rate": 4.995172212136394e-05, "loss": 4.8964, "step": 6025000 }, { "epoch": 0.1, "learning_rate": 4.995164198805119e-05, "loss": 4.9242, "step": 6030000 }, { "epoch": 0.1, "learning_rate": 4.995156175624149e-05, "loss": 4.9058, "step": 6035000 }, { "epoch": 0.1, "learning_rate": 4.9951481474063835e-05, "loss": 4.9334, "step": 6040000 }, { "epoch": 0.1, "learning_rate": 4.995140112547552e-05, "loss": 4.922, "step": 6045000 }, { "epoch": 0.1, "learning_rate": 4.995132071047674e-05, "loss": 4.9226, "step": 6050000 }, { "epoch": 0.1, "learning_rate": 4.99512402451771e-05, "loss": 4.8979, "step": 6055000 }, { "epoch": 0.1, "learning_rate": 4.9951159713494e-05, "loss": 4.9251, "step": 6060000 }, { "epoch": 0.1, "learning_rate": 4.995107908315579e-05, "loss": 4.9086, "step": 6065000 }, { "epoch": 0.1, "learning_rate": 4.995099840253064e-05, "loss": 4.8939, "step": 6070000 }, { "epoch": 0.1, "learning_rate": 4.995091767165862e-05, "loss": 4.9283, "step": 6075000 }, { "epoch": 0.1, "learning_rate": 4.995083685822821e-05, "loss": 4.9162, "step": 6080000 }, { "epoch": 0.1, "learning_rate": 4.995075597838885e-05, "loss": 4.9214, "step": 6085000 }, { "epoch": 0.1, "learning_rate": 4.995067503214076e-05, "loss": 4.9047, "step": 6090000 }, { "epoch": 0.1, "learning_rate": 4.99505940356998e-05, "loss": 4.9354, "step": 6095000 }, { "epoch": 0.1, "learning_rate": 4.9950512940419226e-05, "loss": 4.9343, "step": 6100000 }, { "epoch": 0.1, "learning_rate": 4.9950431794946215e-05, "loss": 4.9173, "step": 6105000 }, { "epoch": 0.1, "learning_rate": 4.995035058306534e-05, "loss": 4.9254, "step": 6110000 }, { "epoch": 0.1, "learning_rate": 4.995026932104561e-05, "loss": 4.9172, "step": 6115000 }, { "epoch": 0.1, "learning_rate": 4.995018796008083e-05, "loss": 4.9204, "step": 6120000 }, { "epoch": 0.1, "learning_rate": 4.995010654897763e-05, "loss": 4.8992, "step": 6125000 }, { "epoch": 0.1, "learning_rate": 4.995002507146743e-05, "loss": 4.91, "step": 6130000 }, { "epoch": 0.1, "learning_rate": 4.9949943527550434e-05, "loss": 4.9096, "step": 6135000 }, { "epoch": 0.1, "learning_rate": 4.994986191722687e-05, "loss": 4.913, "step": 6140000 }, { "epoch": 0.1, "learning_rate": 4.9949780240496946e-05, "loss": 4.9158, "step": 6145000 }, { "epoch": 0.1, "learning_rate": 4.994969849736089e-05, "loss": 4.913, "step": 6150000 }, { "epoch": 0.1, "learning_rate": 4.994961668781891e-05, "loss": 4.8981, "step": 6155000 }, { "epoch": 0.1, "learning_rate": 4.994953481187123e-05, "loss": 4.9225, "step": 6160000 }, { "epoch": 0.1, "learning_rate": 4.994945286951806e-05, "loss": 4.925, "step": 6165000 }, { "epoch": 0.1, "learning_rate": 4.994937086075963e-05, "loss": 4.9059, "step": 6170000 }, { "epoch": 0.1, "learning_rate": 4.99492887691679e-05, "loss": 4.914, "step": 6175000 }, { "epoch": 0.1, "learning_rate": 4.9949206644027836e-05, "loss": 4.9309, "step": 6180000 }, { "epoch": 0.1, "learning_rate": 4.994912443605491e-05, "loss": 4.9199, "step": 6185000 }, { "epoch": 0.1, "learning_rate": 4.9949042194613796e-05, "loss": 4.9042, "step": 6190000 }, { "epoch": 0.1, "learning_rate": 4.99489598373775e-05, "loss": 4.903, "step": 6195000 }, { "epoch": 0.1, "learning_rate": 4.994887741371067e-05, "loss": 4.9259, "step": 6200000 }, { "epoch": 0.1, "learning_rate": 4.9948794923613523e-05, "loss": 4.9206, "step": 6205000 }, { "epoch": 0.1, "learning_rate": 4.9948712383607545e-05, "loss": 4.9163, "step": 6210000 }, { "epoch": 0.1, "learning_rate": 4.994862981026733e-05, "loss": 4.935, "step": 6215000 }, { "epoch": 0.1, "learning_rate": 4.994854712093374e-05, "loss": 4.896, "step": 6220000 }, { "epoch": 0.1, "learning_rate": 4.9948464381731816e-05, "loss": 4.9025, "step": 6225000 }, { "epoch": 0.1, "learning_rate": 4.994838155955287e-05, "loss": 4.9165, "step": 6230000 }, { "epoch": 0.1, "learning_rate": 4.994829870412029e-05, "loss": 4.9194, "step": 6235000 }, { "epoch": 0.1, "learning_rate": 4.994821576571112e-05, "loss": 4.9007, "step": 6240000 }, { "epoch": 0.1, "learning_rate": 4.994813274428573e-05, "loss": 4.8997, "step": 6245000 }, { "epoch": 0.1, "learning_rate": 4.994804968968709e-05, "loss": 4.9116, "step": 6250000 }, { "epoch": 0.1, "learning_rate": 4.994796655207266e-05, "loss": 4.908, "step": 6255000 }, { "epoch": 0.1, "learning_rate": 4.994788334805692e-05, "loss": 4.9162, "step": 6260000 }, { "epoch": 0.1, "learning_rate": 4.99478000776401e-05, "loss": 4.8971, "step": 6265000 }, { "epoch": 0.1, "learning_rate": 4.99477167408224e-05, "loss": 4.9241, "step": 6270000 }, { "epoch": 0.1, "learning_rate": 4.994763333760407e-05, "loss": 4.9324, "step": 6275000 }, { "epoch": 0.1, "learning_rate": 4.9947549867985307e-05, "loss": 4.9231, "step": 6280000 }, { "epoch": 0.1, "learning_rate": 4.994746633196634e-05, "loss": 4.9082, "step": 6285000 }, { "epoch": 0.1, "learning_rate": 4.9947382712813577e-05, "loss": 4.9048, "step": 6290000 }, { "epoch": 0.1, "learning_rate": 4.9947299043981596e-05, "loss": 4.9052, "step": 6295000 }, { "epoch": 0.1, "learning_rate": 4.994721530875007e-05, "loss": 4.918, "step": 6300000 }, { "epoch": 0.1, "learning_rate": 4.994713150711924e-05, "loss": 4.916, "step": 6305000 }, { "epoch": 0.1, "learning_rate": 4.9947047622302364e-05, "loss": 4.9241, "step": 6310000 }, { "epoch": 0.1, "learning_rate": 4.9946963704660546e-05, "loss": 4.8937, "step": 6315000 }, { "epoch": 0.1, "learning_rate": 4.994687972064664e-05, "loss": 4.9021, "step": 6320000 }, { "epoch": 0.1, "learning_rate": 4.994679563660728e-05, "loss": 4.9262, "step": 6325000 }, { "epoch": 0.1, "learning_rate": 4.994671151982334e-05, "loss": 4.9182, "step": 6330000 }, { "epoch": 0.1, "learning_rate": 4.994662731981461e-05, "loss": 4.9193, "step": 6335000 }, { "epoch": 0.1, "learning_rate": 4.9946543036541474e-05, "loss": 4.9181, "step": 6340000 }, { "epoch": 0.1, "learning_rate": 4.994645868684424e-05, "loss": 4.9237, "step": 6345000 }, { "epoch": 0.1, "learning_rate": 4.994637430450963e-05, "loss": 4.8969, "step": 6350000 }, { "epoch": 0.1, "learning_rate": 4.994628982199145e-05, "loss": 4.9031, "step": 6355000 }, { "epoch": 0.1, "learning_rate": 4.9946205289969674e-05, "loss": 4.9138, "step": 6360000 }, { "epoch": 0.1, "learning_rate": 4.994612069155128e-05, "loss": 4.9229, "step": 6365000 }, { "epoch": 0.1, "learning_rate": 4.99460360097901e-05, "loss": 4.8962, "step": 6370000 }, { "epoch": 0.1, "learning_rate": 4.994595127856584e-05, "loss": 4.9141, "step": 6375000 }, { "epoch": 0.1, "learning_rate": 4.994586648094564e-05, "loss": 4.9073, "step": 6380000 }, { "epoch": 0.1, "learning_rate": 4.994578161692972e-05, "loss": 4.9301, "step": 6385000 }, { "epoch": 0.1, "learning_rate": 4.994569670351782e-05, "loss": 4.9162, "step": 6390000 }, { "epoch": 0.11, "learning_rate": 4.9945611706724415e-05, "loss": 4.9297, "step": 6395000 }, { "epoch": 0.11, "learning_rate": 4.9945526660562055e-05, "loss": 4.8963, "step": 6400000 }, { "epoch": 0.11, "learning_rate": 4.994544149691333e-05, "loss": 4.9434, "step": 6405000 }, { "epoch": 0.11, "learning_rate": 4.994535631797485e-05, "loss": 4.9127, "step": 6410000 }, { "epoch": 0.11, "learning_rate": 4.9945271055602636e-05, "loss": 4.912, "step": 6415000 }, { "epoch": 0.11, "learning_rate": 4.994518570975706e-05, "loss": 4.9208, "step": 6420000 }, { "epoch": 0.11, "learning_rate": 4.9945100297491e-05, "loss": 4.9039, "step": 6425000 }, { "epoch": 0.11, "learning_rate": 4.994501485301628e-05, "loss": 4.9135, "step": 6430000 }, { "epoch": 0.11, "learning_rate": 4.99449293250556e-05, "loss": 4.9357, "step": 6435000 }, { "epoch": 0.11, "learning_rate": 4.994484371356933e-05, "loss": 4.926, "step": 6440000 }, { "epoch": 0.11, "learning_rate": 4.9944758069954786e-05, "loss": 4.9108, "step": 6445000 }, { "epoch": 0.11, "learning_rate": 4.9944672325656175e-05, "loss": 4.9273, "step": 6450000 }, { "epoch": 0.11, "learning_rate": 4.994458653211067e-05, "loss": 4.9349, "step": 6455000 }, { "epoch": 0.11, "learning_rate": 4.9944500672172844e-05, "loss": 4.9072, "step": 6460000 }, { "epoch": 0.11, "learning_rate": 4.9944414728644156e-05, "loss": 4.9072, "step": 6465000 }, { "epoch": 0.11, "learning_rate": 4.994432873590911e-05, "loss": 4.9296, "step": 6470000 }, { "epoch": 0.11, "learning_rate": 4.994424269400779e-05, "loss": 4.9294, "step": 6475000 }, { "epoch": 0.11, "learning_rate": 4.9944156568502996e-05, "loss": 4.9098, "step": 6480000 }, { "epoch": 0.11, "learning_rate": 4.994407035935512e-05, "loss": 4.9125, "step": 6485000 }, { "epoch": 0.11, "learning_rate": 4.994398410105493e-05, "loss": 4.9203, "step": 6490000 }, { "epoch": 0.11, "learning_rate": 4.994389777636403e-05, "loss": 4.9125, "step": 6495000 }, { "epoch": 0.11, "learning_rate": 4.994381138528266e-05, "loss": 4.9206, "step": 6500000 }, { "epoch": 0.11, "learning_rate": 4.994372492781103e-05, "loss": 4.9204, "step": 6505000 }, { "epoch": 0.11, "learning_rate": 4.994363838663104e-05, "loss": 4.928, "step": 6510000 }, { "epoch": 0.11, "learning_rate": 4.9943551796366316e-05, "loss": 4.9159, "step": 6515000 }, { "epoch": 0.11, "learning_rate": 4.994346513971203e-05, "loss": 4.904, "step": 6520000 }, { "epoch": 0.11, "learning_rate": 4.994337843402661e-05, "loss": 4.9203, "step": 6525000 }, { "epoch": 0.11, "learning_rate": 4.994329164460718e-05, "loss": 4.8918, "step": 6530000 }, { "epoch": 0.11, "learning_rate": 4.994320478879888e-05, "loss": 4.917, "step": 6535000 }, { "epoch": 0.11, "learning_rate": 4.9943117883999985e-05, "loss": 4.9088, "step": 6540000 }, { "epoch": 0.11, "learning_rate": 4.994303089542793e-05, "loss": 4.933, "step": 6545000 }, { "epoch": 0.11, "learning_rate": 4.99429438230431e-05, "loss": 4.9257, "step": 6550000 }, { "epoch": 0.11, "learning_rate": 4.9942856719119526e-05, "loss": 4.914, "step": 6555000 }, { "epoch": 0.11, "learning_rate": 4.994276951393248e-05, "loss": 4.9507, "step": 6560000 }, { "epoch": 0.11, "learning_rate": 4.994268225979583e-05, "loss": 4.9125, "step": 6565000 }, { "epoch": 0.11, "learning_rate": 4.9942594956749676e-05, "loss": 4.9312, "step": 6570000 }, { "epoch": 0.11, "learning_rate": 4.994250756985205e-05, "loss": 4.9083, "step": 6575000 }, { "epoch": 0.11, "learning_rate": 4.9942420099063355e-05, "loss": 4.908, "step": 6580000 }, { "epoch": 0.11, "learning_rate": 4.994233256186154e-05, "loss": 4.9238, "step": 6585000 }, { "epoch": 0.11, "learning_rate": 4.9942244975777705e-05, "loss": 4.9361, "step": 6590000 }, { "epoch": 0.11, "learning_rate": 4.994215734085194e-05, "loss": 4.8937, "step": 6595000 }, { "epoch": 0.11, "learning_rate": 4.994206962200946e-05, "loss": 4.9202, "step": 6600000 }, { "epoch": 0.11, "learning_rate": 4.9941981836781374e-05, "loss": 4.9152, "step": 6605000 }, { "epoch": 0.11, "learning_rate": 4.9941893985167885e-05, "loss": 4.9454, "step": 6610000 }, { "epoch": 0.11, "learning_rate": 4.9941806049571986e-05, "loss": 4.9135, "step": 6615000 }, { "epoch": 0.11, "learning_rate": 4.9941718082785726e-05, "loss": 4.9423, "step": 6620000 }, { "epoch": 0.11, "learning_rate": 4.9941630032017505e-05, "loss": 4.9067, "step": 6625000 }, { "epoch": 0.11, "learning_rate": 4.9941541914864845e-05, "loss": 4.9064, "step": 6630000 }, { "epoch": 0.11, "learning_rate": 4.9941453748978375e-05, "loss": 4.9141, "step": 6635000 }, { "epoch": 0.11, "learning_rate": 4.9941365481407124e-05, "loss": 4.9089, "step": 6640000 }, { "epoch": 0.11, "learning_rate": 4.994127716510253e-05, "loss": 4.9154, "step": 6645000 }, { "epoch": 0.11, "learning_rate": 4.994118876472418e-05, "loss": 4.9237, "step": 6650000 }, { "epoch": 0.11, "learning_rate": 4.994110033334306e-05, "loss": 4.9165, "step": 6655000 }, { "epoch": 0.11, "learning_rate": 4.994101180017184e-05, "loss": 4.8951, "step": 6660000 }, { "epoch": 0.11, "learning_rate": 4.994092323605145e-05, "loss": 4.9051, "step": 6665000 }, { "epoch": 0.11, "learning_rate": 4.99408345700883e-05, "loss": 4.9211, "step": 6670000 }, { "epoch": 0.11, "learning_rate": 4.994074585547292e-05, "loss": 4.9143, "step": 6675000 }, { "epoch": 0.11, "learning_rate": 4.9940657092245395e-05, "loss": 4.9334, "step": 6680000 }, { "epoch": 0.11, "learning_rate": 4.9940568227096126e-05, "loss": 4.9227, "step": 6685000 }, { "epoch": 0.11, "learning_rate": 4.9940479331131684e-05, "loss": 4.894, "step": 6690000 }, { "epoch": 0.11, "learning_rate": 4.994039035100264e-05, "loss": 4.8943, "step": 6695000 }, { "epoch": 0.11, "learning_rate": 4.9940301304492444e-05, "loss": 4.9329, "step": 6700000 }, { "epoch": 0.11, "learning_rate": 4.9940212191601346e-05, "loss": 4.8919, "step": 6705000 }, { "epoch": 0.11, "learning_rate": 4.994012301232957e-05, "loss": 4.9021, "step": 6710000 }, { "epoch": 0.11, "learning_rate": 4.994003376667736e-05, "loss": 4.9328, "step": 6715000 }, { "epoch": 0.11, "learning_rate": 4.993994445464496e-05, "loss": 4.919, "step": 6720000 }, { "epoch": 0.11, "learning_rate": 4.993985505834312e-05, "loss": 4.898, "step": 6725000 }, { "epoch": 0.11, "learning_rate": 4.993976561353775e-05, "loss": 4.936, "step": 6730000 }, { "epoch": 0.11, "learning_rate": 4.993967612026894e-05, "loss": 4.9333, "step": 6735000 }, { "epoch": 0.11, "learning_rate": 4.9939586524788804e-05, "loss": 4.9107, "step": 6740000 }, { "epoch": 0.11, "learning_rate": 4.9939496880845704e-05, "loss": 4.9171, "step": 6745000 }, { "epoch": 0.11, "learning_rate": 4.993940718847972e-05, "loss": 4.9042, "step": 6750000 }, { "epoch": 0.11, "learning_rate": 4.993931739382344e-05, "loss": 4.9168, "step": 6755000 }, { "epoch": 0.11, "learning_rate": 4.993922755074475e-05, "loss": 4.9343, "step": 6760000 }, { "epoch": 0.11, "learning_rate": 4.993913764128801e-05, "loss": 4.918, "step": 6765000 }, { "epoch": 0.11, "learning_rate": 4.993904764744445e-05, "loss": 4.9368, "step": 6770000 }, { "epoch": 0.11, "learning_rate": 4.993895762324134e-05, "loss": 4.9378, "step": 6775000 }, { "epoch": 0.11, "learning_rate": 4.993886749661631e-05, "loss": 4.9198, "step": 6780000 }, { "epoch": 0.11, "learning_rate": 4.993877733968534e-05, "loss": 4.9142, "step": 6785000 }, { "epoch": 0.11, "learning_rate": 4.9938687080279805e-05, "loss": 4.9213, "step": 6790000 }, { "epoch": 0.11, "learning_rate": 4.993859677254651e-05, "loss": 4.9231, "step": 6795000 }, { "epoch": 0.11, "learning_rate": 4.993850641652554e-05, "loss": 4.918, "step": 6800000 }, { "epoch": 0.11, "learning_rate": 4.993841597605302e-05, "loss": 4.912, "step": 6805000 }, { "epoch": 0.11, "learning_rate": 4.9938325469204615e-05, "loss": 4.9047, "step": 6810000 }, { "epoch": 0.11, "learning_rate": 4.993823487785202e-05, "loss": 4.8998, "step": 6815000 }, { "epoch": 0.11, "learning_rate": 4.993814423823927e-05, "loss": 4.9277, "step": 6820000 }, { "epoch": 0.11, "learning_rate": 4.993805353225136e-05, "loss": 4.9259, "step": 6825000 }, { "epoch": 0.11, "learning_rate": 4.9937962759888515e-05, "loss": 4.9087, "step": 6830000 }, { "epoch": 0.11, "learning_rate": 4.993787192115099e-05, "loss": 4.9127, "step": 6835000 }, { "epoch": 0.11, "learning_rate": 4.993778101603902e-05, "loss": 4.8976, "step": 6840000 }, { "epoch": 0.11, "learning_rate": 4.993769006276107e-05, "loss": 4.9248, "step": 6845000 }, { "epoch": 0.11, "learning_rate": 4.9937599006692724e-05, "loss": 4.9266, "step": 6850000 }, { "epoch": 0.11, "learning_rate": 4.99375078842241e-05, "loss": 4.9136, "step": 6855000 }, { "epoch": 0.11, "learning_rate": 4.993741673185156e-05, "loss": 4.9103, "step": 6860000 }, { "epoch": 0.11, "learning_rate": 4.993732547660967e-05, "loss": 4.9216, "step": 6865000 }, { "epoch": 0.11, "learning_rate": 4.9937234173242856e-05, "loss": 4.9084, "step": 6870000 }, { "epoch": 0.11, "learning_rate": 4.993714280350329e-05, "loss": 4.9164, "step": 6875000 }, { "epoch": 0.11, "learning_rate": 4.9937051349090046e-05, "loss": 4.9128, "step": 6880000 }, { "epoch": 0.11, "learning_rate": 4.9936959846592444e-05, "loss": 4.9198, "step": 6885000 }, { "epoch": 0.11, "learning_rate": 4.993686825939508e-05, "loss": 4.9275, "step": 6890000 }, { "epoch": 0.11, "learning_rate": 4.99367766241404e-05, "loss": 4.9066, "step": 6895000 }, { "epoch": 0.11, "learning_rate": 4.993668494086851e-05, "loss": 4.9167, "step": 6900000 }, { "epoch": 0.11, "learning_rate": 4.99365931728843e-05, "loss": 4.913, "step": 6905000 }, { "epoch": 0.11, "learning_rate": 4.9936501320148206e-05, "loss": 4.9104, "step": 6910000 }, { "epoch": 0.11, "learning_rate": 4.993640941940889e-05, "loss": 4.9095, "step": 6915000 }, { "epoch": 0.11, "learning_rate": 4.993631745229902e-05, "loss": 4.9188, "step": 6920000 }, { "epoch": 0.11, "learning_rate": 4.9936225400398146e-05, "loss": 4.9321, "step": 6925000 }, { "epoch": 0.11, "learning_rate": 4.9936133318968615e-05, "loss": 4.9417, "step": 6930000 }, { "epoch": 0.11, "learning_rate": 4.993604115274857e-05, "loss": 4.9126, "step": 6935000 }, { "epoch": 0.11, "learning_rate": 4.993594892015895e-05, "loss": 4.9049, "step": 6940000 }, { "epoch": 0.11, "learning_rate": 4.993585662120001e-05, "loss": 4.9071, "step": 6945000 }, { "epoch": 0.11, "learning_rate": 4.9935764237384885e-05, "loss": 4.9048, "step": 6950000 }, { "epoch": 0.11, "learning_rate": 4.99356718426755e-05, "loss": 4.9314, "step": 6955000 }, { "epoch": 0.11, "learning_rate": 4.993557932610969e-05, "loss": 4.9096, "step": 6960000 }, { "epoch": 0.11, "learning_rate": 4.9935486761675904e-05, "loss": 4.9289, "step": 6965000 }, { "epoch": 0.11, "learning_rate": 4.9935394130874024e-05, "loss": 4.9181, "step": 6970000 }, { "epoch": 0.11, "learning_rate": 4.993530145225779e-05, "loss": 4.9035, "step": 6975000 }, { "epoch": 0.11, "learning_rate": 4.9935208707300515e-05, "loss": 4.9133, "step": 6980000 }, { "epoch": 0.11, "learning_rate": 4.993511585884234e-05, "loss": 4.9306, "step": 6985000 }, { "epoch": 0.11, "learning_rate": 4.993502294399051e-05, "loss": 4.9277, "step": 6990000 }, { "epoch": 0.11, "learning_rate": 4.9934929981351855e-05, "loss": 4.9021, "step": 6995000 }, { "epoch": 0.11, "learning_rate": 4.99348369523466e-05, "loss": 4.9221, "step": 7000000 }, { "epoch": 0.11, "eval_loss": 5.69034481048584, "eval_runtime": 76912.0259, "eval_samples_per_second": 144.098, "eval_steps_per_second": 28.82, "step": 7000000 }, { "epoch": 0.12, "learning_rate": 4.993474385697497e-05, "loss": 4.9069, "step": 7005000 }, { "epoch": 0.12, "learning_rate": 4.993465069523723e-05, "loss": 4.9032, "step": 7010000 }, { "epoch": 0.12, "learning_rate": 4.9934557448473897e-05, "loss": 4.9113, "step": 7015000 }, { "epoch": 0.12, "learning_rate": 4.993446417266438e-05, "loss": 4.9186, "step": 7020000 }, { "epoch": 0.12, "learning_rate": 4.9934370811829776e-05, "loss": 4.9221, "step": 7025000 }, { "epoch": 0.12, "learning_rate": 4.993427738463005e-05, "loss": 4.9145, "step": 7030000 }, { "epoch": 0.12, "learning_rate": 4.993418389106544e-05, "loss": 4.9133, "step": 7035000 }, { "epoch": 0.12, "learning_rate": 4.993409031241009e-05, "loss": 4.8956, "step": 7040000 }, { "epoch": 0.12, "learning_rate": 4.99339966861032e-05, "loss": 4.9121, "step": 7045000 }, { "epoch": 0.12, "learning_rate": 4.993390299343218e-05, "loss": 4.9088, "step": 7050000 }, { "epoch": 0.12, "learning_rate": 4.993380923439729e-05, "loss": 4.9195, "step": 7055000 }, { "epoch": 0.12, "learning_rate": 4.9933715390219534e-05, "loss": 4.9139, "step": 7060000 }, { "epoch": 0.12, "learning_rate": 4.993362149844435e-05, "loss": 4.9004, "step": 7065000 }, { "epoch": 0.12, "learning_rate": 4.993352752150025e-05, "loss": 4.9012, "step": 7070000 }, { "epoch": 0.12, "learning_rate": 4.993343349698577e-05, "loss": 4.9146, "step": 7075000 }, { "epoch": 0.12, "learning_rate": 4.993333940610867e-05, "loss": 4.9169, "step": 7080000 }, { "epoch": 0.12, "learning_rate": 4.993324524886919e-05, "loss": 4.9012, "step": 7085000 }, { "epoch": 0.12, "learning_rate": 4.993315102526758e-05, "loss": 4.9026, "step": 7090000 }, { "epoch": 0.12, "learning_rate": 4.9933056716431916e-05, "loss": 4.9224, "step": 7095000 }, { "epoch": 0.12, "learning_rate": 4.993296236009353e-05, "loss": 4.9079, "step": 7100000 }, { "epoch": 0.12, "learning_rate": 4.993286793739376e-05, "loss": 4.8972, "step": 7105000 }, { "epoch": 0.12, "learning_rate": 4.993277344833286e-05, "loss": 4.9158, "step": 7110000 }, { "epoch": 0.12, "learning_rate": 4.99326788929111e-05, "loss": 4.9137, "step": 7115000 }, { "epoch": 0.12, "learning_rate": 4.9932584271128714e-05, "loss": 4.905, "step": 7120000 }, { "epoch": 0.12, "learning_rate": 4.9932489601937804e-05, "loss": 4.9143, "step": 7125000 }, { "epoch": 0.12, "learning_rate": 4.993239482848309e-05, "loss": 4.9257, "step": 7130000 }, { "epoch": 0.12, "learning_rate": 4.993230000762035e-05, "loss": 4.9141, "step": 7135000 }, { "epoch": 0.12, "learning_rate": 4.993220510140633e-05, "loss": 4.905, "step": 7140000 }, { "epoch": 0.12, "learning_rate": 4.993211016681629e-05, "loss": 4.9089, "step": 7145000 }, { "epoch": 0.12, "learning_rate": 4.993201516589371e-05, "loss": 4.9215, "step": 7150000 }, { "epoch": 0.12, "learning_rate": 4.993192007960731e-05, "loss": 4.8852, "step": 7155000 }, { "epoch": 0.12, "learning_rate": 4.9931824907917534e-05, "loss": 4.9173, "step": 7160000 }, { "epoch": 0.12, "learning_rate": 4.993172968890091e-05, "loss": 4.9148, "step": 7165000 }, { "epoch": 0.12, "learning_rate": 4.993163440352619e-05, "loss": 4.9157, "step": 7170000 }, { "epoch": 0.12, "learning_rate": 4.993153905179364e-05, "loss": 4.9195, "step": 7175000 }, { "epoch": 0.12, "learning_rate": 4.993144363370349e-05, "loss": 4.9002, "step": 7180000 }, { "epoch": 0.12, "learning_rate": 4.9931348168367184e-05, "loss": 4.9025, "step": 7185000 }, { "epoch": 0.12, "learning_rate": 4.993125259845146e-05, "loss": 4.9177, "step": 7190000 }, { "epoch": 0.12, "learning_rate": 4.9931156981290075e-05, "loss": 4.9019, "step": 7195000 }, { "epoch": 0.12, "learning_rate": 4.993106127862112e-05, "loss": 4.9239, "step": 7200000 }, { "epoch": 0.12, "learning_rate": 4.993096554789786e-05, "loss": 4.9051, "step": 7205000 }, { "epoch": 0.12, "learning_rate": 4.993086971248997e-05, "loss": 4.9189, "step": 7210000 }, { "epoch": 0.12, "learning_rate": 4.993077382989056e-05, "loss": 4.9175, "step": 7215000 }, { "epoch": 0.12, "learning_rate": 4.993067788093559e-05, "loss": 4.9108, "step": 7220000 }, { "epoch": 0.12, "learning_rate": 4.993058186562534e-05, "loss": 4.909, "step": 7225000 }, { "epoch": 0.12, "learning_rate": 4.993048576472938e-05, "loss": 4.9133, "step": 7230000 }, { "epoch": 0.12, "learning_rate": 4.9930389635939965e-05, "loss": 4.8877, "step": 7235000 }, { "epoch": 0.12, "learning_rate": 4.9930293421565356e-05, "loss": 4.9071, "step": 7240000 }, { "epoch": 0.12, "learning_rate": 4.993019714083648e-05, "loss": 4.892, "step": 7245000 }, { "epoch": 0.12, "learning_rate": 4.9930100774469815e-05, "loss": 4.9077, "step": 7250000 }, { "epoch": 0.12, "learning_rate": 4.993000438031692e-05, "loss": 4.9091, "step": 7255000 }, { "epoch": 0.12, "learning_rate": 4.992990791983708e-05, "loss": 4.9224, "step": 7260000 }, { "epoch": 0.12, "learning_rate": 4.992981133505976e-05, "loss": 4.9184, "step": 7265000 }, { "epoch": 0.12, "learning_rate": 4.992971474188696e-05, "loss": 4.9184, "step": 7270000 }, { "epoch": 0.12, "learning_rate": 4.9929618063037845e-05, "loss": 4.8741, "step": 7275000 }, { "epoch": 0.12, "learning_rate": 4.992952133719966e-05, "loss": 4.9126, "step": 7280000 }, { "epoch": 0.12, "learning_rate": 4.9929424525659115e-05, "loss": 4.8903, "step": 7285000 }, { "epoch": 0.12, "learning_rate": 4.992932762837664e-05, "loss": 4.899, "step": 7290000 }, { "epoch": 0.12, "learning_rate": 4.992923070352241e-05, "loss": 4.9158, "step": 7295000 }, { "epoch": 0.12, "learning_rate": 4.992913369292677e-05, "loss": 4.9198, "step": 7300000 }, { "epoch": 0.12, "learning_rate": 4.992903661597994e-05, "loss": 4.9155, "step": 7305000 }, { "epoch": 0.12, "learning_rate": 4.992893947268218e-05, "loss": 4.9181, "step": 7310000 }, { "epoch": 0.12, "learning_rate": 4.992884226303376e-05, "loss": 4.9229, "step": 7315000 }, { "epoch": 0.12, "learning_rate": 4.9928744987034926e-05, "loss": 4.8984, "step": 7320000 }, { "epoch": 0.12, "learning_rate": 4.992864762520304e-05, "loss": 4.9016, "step": 7325000 }, { "epoch": 0.12, "learning_rate": 4.992855019699471e-05, "loss": 4.8943, "step": 7330000 }, { "epoch": 0.12, "learning_rate": 4.99284527414291e-05, "loss": 4.908, "step": 7335000 }, { "epoch": 0.12, "learning_rate": 4.992835518049521e-05, "loss": 4.9186, "step": 7340000 }, { "epoch": 0.12, "learning_rate": 4.9928257592257665e-05, "loss": 4.9064, "step": 7345000 }, { "epoch": 0.12, "learning_rate": 4.992815989859927e-05, "loss": 4.9151, "step": 7350000 }, { "epoch": 0.12, "learning_rate": 4.9928062158128275e-05, "loss": 4.8866, "step": 7355000 }, { "epoch": 0.12, "learning_rate": 4.992796435130895e-05, "loss": 4.9129, "step": 7360000 }, { "epoch": 0.12, "learning_rate": 4.992786647814155e-05, "loss": 4.8822, "step": 7365000 }, { "epoch": 0.12, "learning_rate": 4.992776853862634e-05, "loss": 4.9039, "step": 7370000 }, { "epoch": 0.12, "learning_rate": 4.992767053276357e-05, "loss": 4.9152, "step": 7375000 }, { "epoch": 0.12, "learning_rate": 4.992757246055352e-05, "loss": 4.8742, "step": 7380000 }, { "epoch": 0.12, "learning_rate": 4.9927474302354226e-05, "loss": 4.9087, "step": 7385000 }, { "epoch": 0.12, "learning_rate": 4.992737609743711e-05, "loss": 4.9115, "step": 7390000 }, { "epoch": 0.12, "learning_rate": 4.9927277806504714e-05, "loss": 4.9003, "step": 7395000 }, { "epoch": 0.12, "learning_rate": 4.992717946888156e-05, "loss": 4.9181, "step": 7400000 }, { "epoch": 0.12, "learning_rate": 4.992708106491242e-05, "loss": 4.9194, "step": 7405000 }, { "epoch": 0.12, "learning_rate": 4.9926982574888975e-05, "loss": 4.9147, "step": 7410000 }, { "epoch": 0.12, "learning_rate": 4.992688405793724e-05, "loss": 4.9147, "step": 7415000 }, { "epoch": 0.12, "learning_rate": 4.992678543519659e-05, "loss": 4.9148, "step": 7420000 }, { "epoch": 0.12, "learning_rate": 4.992668680532967e-05, "loss": 4.9106, "step": 7425000 }, { "epoch": 0.12, "learning_rate": 4.9926588030124454e-05, "loss": 4.9121, "step": 7430000 }, { "epoch": 0.12, "learning_rate": 4.992648922807165e-05, "loss": 4.9091, "step": 7435000 }, { "epoch": 0.12, "learning_rate": 4.992639035967469e-05, "loss": 4.9129, "step": 7440000 }, { "epoch": 0.12, "learning_rate": 4.992629142493385e-05, "loss": 4.936, "step": 7445000 }, { "epoch": 0.12, "learning_rate": 4.992619242384939e-05, "loss": 4.9241, "step": 7450000 }, { "epoch": 0.12, "learning_rate": 4.9926093356421575e-05, "loss": 4.897, "step": 7455000 }, { "epoch": 0.12, "learning_rate": 4.992599424249199e-05, "loss": 4.914, "step": 7460000 }, { "epoch": 0.12, "learning_rate": 4.992589502253692e-05, "loss": 4.9252, "step": 7465000 }, { "epoch": 0.12, "learning_rate": 4.992579575608062e-05, "loss": 4.9215, "step": 7470000 }, { "epoch": 0.12, "learning_rate": 4.992569640340086e-05, "loss": 4.9119, "step": 7475000 }, { "epoch": 0.12, "learning_rate": 4.992559700424695e-05, "loss": 4.9074, "step": 7480000 }, { "epoch": 0.12, "learning_rate": 4.9925497558658954e-05, "loss": 4.9159, "step": 7485000 }, { "epoch": 0.12, "learning_rate": 4.992539802683503e-05, "loss": 4.9173, "step": 7490000 }, { "epoch": 0.12, "learning_rate": 4.9925298428669865e-05, "loss": 4.9077, "step": 7495000 }, { "epoch": 0.12, "learning_rate": 4.9925198764163725e-05, "loss": 4.9113, "step": 7500000 }, { "epoch": 0.12, "learning_rate": 4.992509901335608e-05, "loss": 4.9032, "step": 7505000 }, { "epoch": 0.12, "learning_rate": 4.992499921615551e-05, "loss": 4.8993, "step": 7510000 }, { "epoch": 0.12, "learning_rate": 4.992489933262741e-05, "loss": 4.8992, "step": 7515000 }, { "epoch": 0.12, "learning_rate": 4.992479940273347e-05, "loss": 4.9129, "step": 7520000 }, { "epoch": 0.12, "learning_rate": 4.992469942651376e-05, "loss": 4.9134, "step": 7525000 }, { "epoch": 0.12, "learning_rate": 4.9924599343926903e-05, "loss": 4.9029, "step": 7530000 }, { "epoch": 0.12, "learning_rate": 4.992449921501481e-05, "loss": 4.9137, "step": 7535000 }, { "epoch": 0.12, "learning_rate": 4.992439901976387e-05, "loss": 4.9346, "step": 7540000 }, { "epoch": 0.12, "learning_rate": 4.9924298738107357e-05, "loss": 4.9131, "step": 7545000 }, { "epoch": 0.12, "learning_rate": 4.9924198410166245e-05, "loss": 4.9114, "step": 7550000 }, { "epoch": 0.12, "learning_rate": 4.992409801588708e-05, "loss": 4.9219, "step": 7555000 }, { "epoch": 0.12, "learning_rate": 4.992399755527013e-05, "loss": 4.9014, "step": 7560000 }, { "epoch": 0.12, "learning_rate": 4.992389702831567e-05, "loss": 4.9037, "step": 7565000 }, { "epoch": 0.12, "learning_rate": 4.99237964551573e-05, "loss": 4.929, "step": 7570000 }, { "epoch": 0.12, "learning_rate": 4.992369577539527e-05, "loss": 4.9065, "step": 7575000 }, { "epoch": 0.12, "learning_rate": 4.992359506958976e-05, "loss": 4.9226, "step": 7580000 }, { "epoch": 0.12, "learning_rate": 4.9923494277301186e-05, "loss": 4.9068, "step": 7585000 }, { "epoch": 0.12, "learning_rate": 4.992339339849e-05, "loss": 4.9234, "step": 7590000 }, { "epoch": 0.12, "learning_rate": 4.992329247351607e-05, "loss": 4.909, "step": 7595000 }, { "epoch": 0.12, "learning_rate": 4.992319150241949e-05, "loss": 4.9247, "step": 7600000 }, { "epoch": 0.12, "learning_rate": 4.992309042456157e-05, "loss": 4.9109, "step": 7605000 }, { "epoch": 0.13, "learning_rate": 4.992298928034201e-05, "loss": 4.9229, "step": 7610000 }, { "epoch": 0.13, "learning_rate": 4.992288811026668e-05, "loss": 4.8935, "step": 7615000 }, { "epoch": 0.13, "learning_rate": 4.9922786853617244e-05, "loss": 4.9075, "step": 7620000 }, { "epoch": 0.13, "learning_rate": 4.992268555091286e-05, "loss": 4.9058, "step": 7625000 }, { "epoch": 0.13, "learning_rate": 4.9922584181901e-05, "loss": 4.9159, "step": 7630000 }, { "epoch": 0.13, "learning_rate": 4.9922482705970167e-05, "loss": 4.9132, "step": 7635000 }, { "epoch": 0.13, "learning_rate": 4.992238120431761e-05, "loss": 4.9002, "step": 7640000 }, { "epoch": 0.13, "learning_rate": 4.9922279595693536e-05, "loss": 4.9065, "step": 7645000 }, { "epoch": 0.13, "learning_rate": 4.992217796140137e-05, "loss": 4.9013, "step": 7650000 }, { "epoch": 0.13, "learning_rate": 4.9922076220085135e-05, "loss": 4.8884, "step": 7655000 }, { "epoch": 0.13, "learning_rate": 4.9921974453154444e-05, "loss": 4.909, "step": 7660000 }, { "epoch": 0.13, "learning_rate": 4.992187259953264e-05, "loss": 4.9116, "step": 7665000 }, { "epoch": 0.13, "learning_rate": 4.99217706591802e-05, "loss": 4.9256, "step": 7670000 }, { "epoch": 0.13, "learning_rate": 4.992166867288169e-05, "loss": 4.8828, "step": 7675000 }, { "epoch": 0.13, "learning_rate": 4.9921566620251856e-05, "loss": 4.9144, "step": 7680000 }, { "epoch": 0.13, "learning_rate": 4.992146450129098e-05, "loss": 4.9127, "step": 7685000 }, { "epoch": 0.13, "learning_rate": 4.992136231599934e-05, "loss": 4.8991, "step": 7690000 }, { "epoch": 0.13, "learning_rate": 4.9921260064377184e-05, "loss": 4.9079, "step": 7695000 }, { "epoch": 0.13, "learning_rate": 4.992115774642481e-05, "loss": 4.9086, "step": 7700000 }, { "epoch": 0.13, "learning_rate": 4.992105538263416e-05, "loss": 4.8903, "step": 7705000 }, { "epoch": 0.13, "learning_rate": 4.9920952911530455e-05, "loss": 4.8963, "step": 7710000 }, { "epoch": 0.13, "learning_rate": 4.9920850394589026e-05, "loss": 4.9086, "step": 7715000 }, { "epoch": 0.13, "learning_rate": 4.992074781131846e-05, "loss": 4.9055, "step": 7720000 }, { "epoch": 0.13, "learning_rate": 4.992064514117425e-05, "loss": 4.8936, "step": 7725000 }, { "epoch": 0.13, "learning_rate": 4.9920542425232954e-05, "loss": 4.9052, "step": 7730000 }, { "epoch": 0.13, "learning_rate": 4.992043962239202e-05, "loss": 4.9122, "step": 7735000 }, { "epoch": 0.13, "learning_rate": 4.992033677378109e-05, "loss": 4.8984, "step": 7740000 }, { "epoch": 0.13, "learning_rate": 4.9920233879440245e-05, "loss": 4.8991, "step": 7745000 }, { "epoch": 0.13, "learning_rate": 4.992013087757619e-05, "loss": 4.9002, "step": 7750000 }, { "epoch": 0.13, "learning_rate": 4.992002785060716e-05, "loss": 4.8876, "step": 7755000 }, { "epoch": 0.13, "learning_rate": 4.991992473670006e-05, "loss": 4.9147, "step": 7760000 }, { "epoch": 0.13, "learning_rate": 4.991982155646628e-05, "loss": 4.9252, "step": 7765000 }, { "epoch": 0.13, "learning_rate": 4.9919718330570307e-05, "loss": 4.8726, "step": 7770000 }, { "epoch": 0.13, "learning_rate": 4.991961501769727e-05, "loss": 4.9098, "step": 7775000 }, { "epoch": 0.13, "learning_rate": 4.991951161780764e-05, "loss": 4.9355, "step": 7780000 }, { "epoch": 0.13, "learning_rate": 4.9919408192973936e-05, "loss": 4.9141, "step": 7785000 }, { "epoch": 0.13, "learning_rate": 4.991930466040688e-05, "loss": 4.9022, "step": 7790000 }, { "epoch": 0.2, "learning_rate": 4.979356606294619e-05, "loss": 4.8945, "step": 7790500 }, { "epoch": 0.2, "eval_loss": 5.672119140625, "eval_runtime": 179340.9606, "eval_samples_per_second": 61.798, "eval_steps_per_second": 7.725, "step": 7790500 }, { "epoch": 0.2, "learning_rate": 4.979353958670229e-05, "loss": 4.8691, "step": 7791000 }, { "epoch": 0.2, "eval_loss": 5.630408763885498, "eval_runtime": 182643.2798, "eval_samples_per_second": 60.681, "eval_steps_per_second": 7.585, "step": 7791000 }, { "epoch": 0.1, "learning_rate": 4.9948325182178755e-05, "loss": 4.9075, "step": 7791500 }, { "epoch": 0.1, "eval_loss": 5.701673984527588, "eval_runtime": 92709.3971, "eval_samples_per_second": 119.545, "eval_steps_per_second": 29.886, "step": 7791500 }, { "epoch": 0.1, "learning_rate": 4.9948318548597226e-05, "loss": 4.9375, "step": 7792000 }, { "epoch": 0.1, "eval_loss": 5.707010746002197, "eval_runtime": 92373.8149, "eval_samples_per_second": 119.979, "eval_steps_per_second": 29.995, "step": 7792000 }, { "epoch": 0.1, "learning_rate": 4.994831191459037e-05, "loss": 4.9309, "step": 7792500 }, { "epoch": 0.1, "eval_loss": 5.719743251800537, "eval_runtime": 94469.6382, "eval_samples_per_second": 117.317, "eval_steps_per_second": 29.329, "step": 7792500 }, { "epoch": 0.31, "learning_rate": 4.9536024499286255e-05, "loss": 4.8612, "step": 7793000 }, { "epoch": 0.31, "learning_rate": 4.9535965113896765e-05, "loss": 4.8292, "step": 7793500 }, { "epoch": 0.31, "learning_rate": 4.953590572474267e-05, "loss": 4.7822, "step": 7794000 }, { "epoch": 0.31, "learning_rate": 4.953584633182399e-05, "loss": 4.7956, "step": 7794500 }, { "epoch": 0.31, "learning_rate": 4.9535786935140734e-05, "loss": 4.7506, "step": 7795000 }, { "epoch": 0.31, "learning_rate": 4.95357275346929e-05, "loss": 4.745, "step": 7795500 }, { "epoch": 0.31, "learning_rate": 4.953566813048051e-05, "loss": 4.6809, "step": 7796000 }, { "epoch": 0.31, "learning_rate": 4.953560872250357e-05, "loss": 4.7162, "step": 7796500 }, { "epoch": 0.31, "learning_rate": 4.953554931076208e-05, "loss": 4.7217, "step": 7797000 }, { "epoch": 0.31, "learning_rate": 4.953548989525606e-05, "loss": 4.7182, "step": 7797500 }, { "epoch": 0.31, "learning_rate": 4.953543047598551e-05, "loss": 4.6784, "step": 7798000 }, { "epoch": 0.31, "learning_rate": 4.953537105295044e-05, "loss": 4.6782, "step": 7798500 }, { "epoch": 0.31, "learning_rate": 4.953531162615087e-05, "loss": 4.6857, "step": 7799000 }, { "epoch": 0.31, "learning_rate": 4.95352521955868e-05, "loss": 4.6785, "step": 7799500 }, { "epoch": 0.31, "learning_rate": 4.953519276125824e-05, "loss": 4.6447, "step": 7800000 }, { "epoch": 0.31, "learning_rate": 4.9535133323165204e-05, "loss": 4.6579, "step": 7800500 }, { "epoch": 0.31, "learning_rate": 4.95350738813077e-05, "loss": 4.6466, "step": 7801000 }, { "epoch": 0.31, "learning_rate": 4.953501443568573e-05, "loss": 4.6551, "step": 7801500 }, { "epoch": 0.31, "learning_rate": 4.9534954986299306e-05, "loss": 4.6558, "step": 7802000 }, { "epoch": 0.31, "learning_rate": 4.953489553314844e-05, "loss": 4.6364, "step": 7802500 }, { "epoch": 0.31, "learning_rate": 4.9534836076233145e-05, "loss": 4.6399, "step": 7803000 }, { "epoch": 0.31, "learning_rate": 4.953477661555342e-05, "loss": 4.6346, "step": 7803500 }, { "epoch": 0.31, "learning_rate": 4.953471715110928e-05, "loss": 4.6702, "step": 7804000 }, { "epoch": 0.31, "learning_rate": 4.953465768290073e-05, "loss": 4.6617, "step": 7804500 }, { "epoch": 0.31, "learning_rate": 4.9534598210927785e-05, "loss": 4.665, "step": 7805000 }, { "epoch": 0.31, "learning_rate": 4.953453873519045e-05, "loss": 4.587, "step": 7805500 }, { "epoch": 0.31, "learning_rate": 4.9534479255688734e-05, "loss": 4.6294, "step": 7806000 }, { "epoch": 0.31, "learning_rate": 4.9534419772422656e-05, "loss": 4.6085, "step": 7806500 }, { "epoch": 0.31, "learning_rate": 4.953436028539221e-05, "loss": 4.5988, "step": 7807000 }, { "epoch": 0.31, "learning_rate": 4.9534300794597425e-05, "loss": 4.6167, "step": 7807500 }, { "epoch": 0.31, "learning_rate": 4.953424130003828e-05, "loss": 4.5976, "step": 7808000 }, { "epoch": 0.31, "learning_rate": 4.9534181801714806e-05, "loss": 4.6199, "step": 7808500 }, { "epoch": 0.31, "learning_rate": 4.9534122299627006e-05, "loss": 4.6038, "step": 7809000 }, { "epoch": 0.31, "learning_rate": 4.95340627937749e-05, "loss": 4.6106, "step": 7809500 }, { "epoch": 0.31, "learning_rate": 4.953400328415848e-05, "loss": 4.6398, "step": 7810000 }, { "epoch": 0.31, "learning_rate": 4.953394377077776e-05, "loss": 4.5802, "step": 7810500 }, { "epoch": 0.31, "learning_rate": 4.9533884253632754e-05, "loss": 4.5946, "step": 7811000 }, { "epoch": 0.31, "learning_rate": 4.9533824732723475e-05, "loss": 4.5917, "step": 7811500 }, { "epoch": 0.31, "learning_rate": 4.953376520804992e-05, "loss": 4.6138, "step": 7812000 }, { "epoch": 0.31, "learning_rate": 4.9533705679612105e-05, "loss": 4.6439, "step": 7812500 }, { "epoch": 0.31, "learning_rate": 4.953364614741004e-05, "loss": 4.6253, "step": 7813000 }, { "epoch": 0.31, "learning_rate": 4.9533586611443735e-05, "loss": 4.6166, "step": 7813500 }, { "epoch": 0.31, "learning_rate": 4.9533527071713194e-05, "loss": 4.6166, "step": 7814000 }, { "epoch": 0.31, "learning_rate": 4.9533467528218425e-05, "loss": 4.5992, "step": 7814500 }, { "epoch": 0.31, "learning_rate": 4.953340798095944e-05, "loss": 4.5926, "step": 7815000 }, { "epoch": 0.31, "learning_rate": 4.953334842993626e-05, "loss": 4.5851, "step": 7815500 }, { "epoch": 0.31, "learning_rate": 4.9533288875148874e-05, "loss": 4.6173, "step": 7816000 }, { "epoch": 0.31, "learning_rate": 4.953322931659731e-05, "loss": 4.5892, "step": 7816500 }, { "epoch": 0.31, "learning_rate": 4.953316975428156e-05, "loss": 4.5904, "step": 7817000 }, { "epoch": 0.31, "learning_rate": 4.9533110188201636e-05, "loss": 4.6194, "step": 7817500 }, { "epoch": 0.31, "learning_rate": 4.953305061835756e-05, "loss": 4.555, "step": 7818000 }, { "epoch": 0.31, "learning_rate": 4.953299104474933e-05, "loss": 4.5782, "step": 7818500 }, { "epoch": 0.31, "learning_rate": 4.953293146737696e-05, "loss": 4.6522, "step": 7819000 }, { "epoch": 0.31, "learning_rate": 4.953287188624046e-05, "loss": 4.5718, "step": 7819500 }, { "epoch": 0.31, "learning_rate": 4.9532812301339825e-05, "loss": 4.5712, "step": 7820000 }, { "epoch": 0.31, "learning_rate": 4.953275271267509e-05, "loss": 4.5779, "step": 7820500 }, { "epoch": 0.31, "learning_rate": 4.9532693120246236e-05, "loss": 4.6231, "step": 7821000 }, { "epoch": 0.31, "learning_rate": 4.95326335240533e-05, "loss": 4.5759, "step": 7821500 }, { "epoch": 0.31, "learning_rate": 4.953257392409626e-05, "loss": 4.5895, "step": 7822000 }, { "epoch": 0.31, "learning_rate": 4.9532514320375154e-05, "loss": 4.5562, "step": 7822500 }, { "epoch": 0.31, "learning_rate": 4.9532454712889974e-05, "loss": 4.5577, "step": 7823000 }, { "epoch": 0.31, "learning_rate": 4.953239510164074e-05, "loss": 4.5973, "step": 7823500 }, { "epoch": 0.31, "learning_rate": 4.9532335486627444e-05, "loss": 4.6045, "step": 7824000 }, { "epoch": 0.31, "learning_rate": 4.953227586785012e-05, "loss": 4.5774, "step": 7824500 }, { "epoch": 0.31, "learning_rate": 4.953221624530876e-05, "loss": 4.6098, "step": 7825000 }, { "epoch": 0.31, "learning_rate": 4.9532156619003376e-05, "loss": 4.6056, "step": 7825500 }, { "epoch": 0.31, "learning_rate": 4.9532096988933973e-05, "loss": 4.5986, "step": 7826000 }, { "epoch": 0.31, "learning_rate": 4.953203735510057e-05, "loss": 4.588, "step": 7826500 }, { "epoch": 0.31, "learning_rate": 4.9531977717503175e-05, "loss": 4.5583, "step": 7827000 }, { "epoch": 0.31, "learning_rate": 4.953191807614179e-05, "loss": 4.5519, "step": 7827500 }, { "epoch": 0.31, "learning_rate": 4.953185843101643e-05, "loss": 4.6029, "step": 7828000 }, { "epoch": 0.31, "learning_rate": 4.9531798782127096e-05, "loss": 4.624, "step": 7828500 }, { "epoch": 0.31, "learning_rate": 4.953173912947381e-05, "loss": 4.5634, "step": 7829000 }, { "epoch": 0.31, "learning_rate": 4.953167947305657e-05, "loss": 4.5747, "step": 7829500 }, { "epoch": 0.31, "learning_rate": 4.9531619812875385e-05, "loss": 4.5932, "step": 7830000 }, { "epoch": 0.31, "learning_rate": 4.9531560148930275e-05, "loss": 4.5631, "step": 7830500 }, { "epoch": 0.31, "learning_rate": 4.953150048122124e-05, "loss": 4.5562, "step": 7831000 }, { "epoch": 0.31, "learning_rate": 4.95314408097483e-05, "loss": 4.5914, "step": 7831500 }, { "epoch": 0.31, "learning_rate": 4.953138113451145e-05, "loss": 4.5415, "step": 7832000 }, { "epoch": 0.31, "learning_rate": 4.95313214555107e-05, "loss": 4.5948, "step": 7832500 }, { "epoch": 0.31, "learning_rate": 4.953126177274607e-05, "loss": 4.5817, "step": 7833000 }, { "epoch": 0.31, "learning_rate": 4.953120208621756e-05, "loss": 4.6046, "step": 7833500 }, { "epoch": 0.31, "learning_rate": 4.953114239592518e-05, "loss": 4.5677, "step": 7834000 }, { "epoch": 0.31, "learning_rate": 4.9531082701868944e-05, "loss": 4.5397, "step": 7834500 }, { "epoch": 0.31, "learning_rate": 4.9531023004048865e-05, "loss": 4.6294, "step": 7835000 }, { "epoch": 0.31, "learning_rate": 4.953096330246494e-05, "loss": 4.5566, "step": 7835500 }, { "epoch": 0.31, "learning_rate": 4.953090359711719e-05, "loss": 4.5779, "step": 7836000 }, { "epoch": 0.31, "learning_rate": 4.953084388800561e-05, "loss": 4.5749, "step": 7836500 }, { "epoch": 0.31, "learning_rate": 4.953078417513023e-05, "loss": 4.5291, "step": 7837000 }, { "epoch": 0.31, "learning_rate": 4.9530724458491036e-05, "loss": 4.5535, "step": 7837500 }, { "epoch": 0.31, "learning_rate": 4.953066473808805e-05, "loss": 4.5876, "step": 7838000 }, { "epoch": 0.31, "learning_rate": 4.9530605013921275e-05, "loss": 4.6016, "step": 7838500 }, { "epoch": 0.31, "learning_rate": 4.953054528599073e-05, "loss": 4.5715, "step": 7839000 }, { "epoch": 0.31, "learning_rate": 4.953048555429642e-05, "loss": 4.6129, "step": 7839500 }, { "epoch": 0.31, "learning_rate": 4.953042581883835e-05, "loss": 4.5554, "step": 7840000 }, { "epoch": 0.31, "learning_rate": 4.953036607961653e-05, "loss": 4.5604, "step": 7840500 }, { "epoch": 0.31, "learning_rate": 4.953030633663097e-05, "loss": 4.5253, "step": 7841000 }, { "epoch": 0.31, "learning_rate": 4.953024658988169e-05, "loss": 4.5583, "step": 7841500 }, { "epoch": 0.31, "learning_rate": 4.953018683936867e-05, "loss": 4.5406, "step": 7842000 }, { "epoch": 0.31, "learning_rate": 4.953012708509196e-05, "loss": 4.5415, "step": 7842500 }, { "epoch": 0.31, "learning_rate": 4.9530067327051535e-05, "loss": 4.547, "step": 7843000 }, { "epoch": 0.31, "learning_rate": 4.9530007565247414e-05, "loss": 4.597, "step": 7843500 }, { "epoch": 0.31, "learning_rate": 4.9529947799679615e-05, "loss": 4.5551, "step": 7844000 }, { "epoch": 0.31, "learning_rate": 4.9529888030348136e-05, "loss": 4.5403, "step": 7844500 }, { "epoch": 0.31, "learning_rate": 4.9529828257253e-05, "loss": 4.5598, "step": 7845000 }, { "epoch": 0.31, "learning_rate": 4.95297684803942e-05, "loss": 4.5137, "step": 7845500 }, { "epoch": 0.31, "learning_rate": 4.952970869977176e-05, "loss": 4.5272, "step": 7846000 }, { "epoch": 0.31, "learning_rate": 4.952964891538567e-05, "loss": 4.5522, "step": 7846500 }, { "epoch": 0.31, "learning_rate": 4.952958912723596e-05, "loss": 4.5634, "step": 7847000 }, { "epoch": 0.31, "learning_rate": 4.952952933532263e-05, "loss": 4.5496, "step": 7847500 }, { "epoch": 0.31, "learning_rate": 4.952946953964568e-05, "loss": 4.5081, "step": 7848000 }, { "epoch": 0.31, "learning_rate": 4.952940974020514e-05, "loss": 4.5514, "step": 7848500 }, { "epoch": 0.31, "learning_rate": 4.9529349937001e-05, "loss": 4.6069, "step": 7849000 }, { "epoch": 0.31, "learning_rate": 4.952929013003328e-05, "loss": 4.5521, "step": 7849500 }, { "epoch": 0.31, "learning_rate": 4.952923031930199e-05, "loss": 4.5415, "step": 7850000 }, { "epoch": 0.31, "learning_rate": 4.9529170504807125e-05, "loss": 4.5653, "step": 7850500 }, { "epoch": 0.31, "learning_rate": 4.9529110686548705e-05, "loss": 4.554, "step": 7851000 }, { "epoch": 0.31, "learning_rate": 4.952905086452675e-05, "loss": 4.5148, "step": 7851500 }, { "epoch": 0.31, "learning_rate": 4.952899103874125e-05, "loss": 4.5553, "step": 7852000 }, { "epoch": 0.31, "learning_rate": 4.952893120919222e-05, "loss": 4.5251, "step": 7852500 }, { "epoch": 0.31, "learning_rate": 4.952887137587968e-05, "loss": 4.5356, "step": 7853000 }, { "epoch": 0.31, "learning_rate": 4.9528811538803624e-05, "loss": 4.5284, "step": 7853500 }, { "epoch": 0.31, "learning_rate": 4.952875169796406e-05, "loss": 4.5529, "step": 7854000 }, { "epoch": 0.31, "learning_rate": 4.9528691853361016e-05, "loss": 4.5379, "step": 7854500 }, { "epoch": 0.31, "learning_rate": 4.952863200499448e-05, "loss": 4.4988, "step": 7855000 }, { "epoch": 0.31, "learning_rate": 4.952857215286448e-05, "loss": 4.5566, "step": 7855500 }, { "epoch": 0.31, "learning_rate": 4.952851229697101e-05, "loss": 4.5513, "step": 7856000 }, { "epoch": 0.31, "learning_rate": 4.9528452437314085e-05, "loss": 4.5642, "step": 7856500 }, { "epoch": 0.31, "learning_rate": 4.9528392573893726e-05, "loss": 4.5133, "step": 7857000 }, { "epoch": 0.31, "learning_rate": 4.952833270670991e-05, "loss": 4.5363, "step": 7857500 }, { "epoch": 0.31, "learning_rate": 4.952827283576269e-05, "loss": 4.5583, "step": 7858000 }, { "epoch": 0.31, "learning_rate": 4.9528212961052036e-05, "loss": 4.5148, "step": 7858500 }, { "epoch": 0.31, "learning_rate": 4.9528153082577977e-05, "loss": 4.5658, "step": 7859000 }, { "epoch": 0.31, "learning_rate": 4.952809320034052e-05, "loss": 4.5032, "step": 7859500 }, { "epoch": 0.31, "learning_rate": 4.9528033314339674e-05, "loss": 4.5723, "step": 7860000 }, { "epoch": 0.31, "learning_rate": 4.952797342457544e-05, "loss": 4.5684, "step": 7860500 }, { "epoch": 0.31, "learning_rate": 4.952791353104784e-05, "loss": 4.5312, "step": 7861000 }, { "epoch": 0.31, "learning_rate": 4.952785363375687e-05, "loss": 4.5507, "step": 7861500 }, { "epoch": 0.31, "learning_rate": 4.952779373270255e-05, "loss": 4.5543, "step": 7862000 }, { "epoch": 0.31, "learning_rate": 4.9527733827884894e-05, "loss": 4.5329, "step": 7862500 }, { "epoch": 0.31, "learning_rate": 4.9527673919303895e-05, "loss": 4.5364, "step": 7863000 }, { "epoch": 0.31, "learning_rate": 4.952761400695957e-05, "loss": 4.5158, "step": 7863500 }, { "epoch": 0.31, "learning_rate": 4.952755409085193e-05, "loss": 4.5164, "step": 7864000 }, { "epoch": 0.31, "learning_rate": 4.952749417098098e-05, "loss": 4.5927, "step": 7864500 }, { "epoch": 0.31, "learning_rate": 4.9527434247346735e-05, "loss": 4.513, "step": 7865000 }, { "epoch": 0.31, "learning_rate": 4.95273743199492e-05, "loss": 4.5604, "step": 7865500 }, { "epoch": 0.31, "learning_rate": 4.952731438878838e-05, "loss": 4.4923, "step": 7866000 }, { "epoch": 0.31, "learning_rate": 4.952725445386429e-05, "loss": 4.5298, "step": 7866500 }, { "epoch": 0.31, "learning_rate": 4.9527194515176944e-05, "loss": 4.5088, "step": 7867000 }, { "epoch": 0.31, "learning_rate": 4.9527134572726344e-05, "loss": 4.5497, "step": 7867500 }, { "epoch": 0.31, "learning_rate": 4.95270746265125e-05, "loss": 4.55, "step": 7868000 }, { "epoch": 0.31, "learning_rate": 4.952701467653542e-05, "loss": 4.5444, "step": 7868500 }, { "epoch": 0.31, "learning_rate": 4.952695472279512e-05, "loss": 4.5853, "step": 7869000 }, { "epoch": 0.31, "learning_rate": 4.952689476529159e-05, "loss": 4.5176, "step": 7869500 }, { "epoch": 0.31, "learning_rate": 4.952683480402487e-05, "loss": 4.5614, "step": 7870000 }, { "epoch": 0.31, "learning_rate": 4.952677483899494e-05, "loss": 4.5399, "step": 7870500 }, { "epoch": 0.31, "learning_rate": 4.952671487020183e-05, "loss": 4.5546, "step": 7871000 }, { "epoch": 0.31, "learning_rate": 4.952665489764554e-05, "loss": 4.5327, "step": 7871500 }, { "epoch": 0.31, "learning_rate": 4.952659492132608e-05, "loss": 4.5615, "step": 7872000 }, { "epoch": 0.31, "learning_rate": 4.9526534941243456e-05, "loss": 4.5585, "step": 7872500 }, { "epoch": 0.31, "learning_rate": 4.9526474957397684e-05, "loss": 4.5292, "step": 7873000 }, { "epoch": 0.31, "learning_rate": 4.952641496978877e-05, "loss": 4.5353, "step": 7873500 }, { "epoch": 0.31, "learning_rate": 4.9526354978416724e-05, "loss": 4.5504, "step": 7874000 }, { "epoch": 0.31, "learning_rate": 4.952629498328155e-05, "loss": 4.4971, "step": 7874500 }, { "epoch": 0.31, "learning_rate": 4.952623498438327e-05, "loss": 4.5471, "step": 7875000 }, { "epoch": 0.31, "learning_rate": 4.9526174981721875e-05, "loss": 4.5367, "step": 7875500 }, { "epoch": 0.31, "learning_rate": 4.952611497529739e-05, "loss": 4.5159, "step": 7876000 }, { "epoch": 0.31, "learning_rate": 4.9526054965109815e-05, "loss": 4.5044, "step": 7876500 }, { "epoch": 0.31, "learning_rate": 4.952599495115916e-05, "loss": 4.534, "step": 7877000 }, { "epoch": 0.31, "learning_rate": 4.952593493344544e-05, "loss": 4.5761, "step": 7877500 }, { "epoch": 0.31, "learning_rate": 4.952587491196866e-05, "loss": 4.5259, "step": 7878000 }, { "epoch": 0.31, "learning_rate": 4.9525814886728825e-05, "loss": 4.5185, "step": 7878500 }, { "epoch": 0.31, "learning_rate": 4.952575485772596e-05, "loss": 4.5063, "step": 7879000 }, { "epoch": 0.31, "learning_rate": 4.952569482496006e-05, "loss": 4.57, "step": 7879500 }, { "epoch": 0.31, "learning_rate": 4.952563478843113e-05, "loss": 4.5443, "step": 7880000 }, { "epoch": 0.31, "learning_rate": 4.952557474813919e-05, "loss": 4.521, "step": 7880500 }, { "epoch": 0.31, "learning_rate": 4.9525514704084256e-05, "loss": 4.5027, "step": 7881000 }, { "epoch": 0.31, "learning_rate": 4.9525454656266314e-05, "loss": 4.567, "step": 7881500 }, { "epoch": 0.31, "learning_rate": 4.952539460468539e-05, "loss": 4.5025, "step": 7882000 }, { "epoch": 0.31, "learning_rate": 4.9525334549341495e-05, "loss": 4.5471, "step": 7882500 }, { "epoch": 0.31, "learning_rate": 4.952527449023463e-05, "loss": 4.5487, "step": 7883000 }, { "epoch": 0.31, "learning_rate": 4.9525214427364805e-05, "loss": 4.499, "step": 7883500 }, { "epoch": 0.31, "learning_rate": 4.952515436073203e-05, "loss": 4.5616, "step": 7884000 }, { "epoch": 0.31, "learning_rate": 4.952509429033631e-05, "loss": 4.5141, "step": 7884500 }, { "epoch": 0.31, "learning_rate": 4.952503421617768e-05, "loss": 4.5142, "step": 7885000 }, { "epoch": 0.31, "learning_rate": 4.9524974138256106e-05, "loss": 4.5276, "step": 7885500 }, { "epoch": 0.31, "learning_rate": 4.952491405657164e-05, "loss": 4.5277, "step": 7886000 }, { "epoch": 0.31, "learning_rate": 4.952485397112426e-05, "loss": 4.5364, "step": 7886500 }, { "epoch": 0.31, "learning_rate": 4.9524793881913986e-05, "loss": 4.5209, "step": 7887000 }, { "epoch": 0.31, "learning_rate": 4.952473378894083e-05, "loss": 4.494, "step": 7887500 }, { "epoch": 0.31, "learning_rate": 4.95246736922048e-05, "loss": 4.5327, "step": 7888000 }, { "epoch": 0.31, "learning_rate": 4.95246135917059e-05, "loss": 4.5503, "step": 7888500 }, { "epoch": 0.31, "learning_rate": 4.9524553487444146e-05, "loss": 4.5413, "step": 7889000 }, { "epoch": 0.31, "learning_rate": 4.952449337941955e-05, "loss": 4.5606, "step": 7889500 }, { "epoch": 0.31, "learning_rate": 4.952443326763211e-05, "loss": 4.5026, "step": 7890000 }, { "epoch": 0.31, "learning_rate": 4.952437315208184e-05, "loss": 4.5389, "step": 7890500 }, { "epoch": 0.31, "learning_rate": 4.9524313032768755e-05, "loss": 4.5439, "step": 7891000 }, { "epoch": 0.31, "learning_rate": 4.952425290969286e-05, "loss": 4.5228, "step": 7891500 }, { "epoch": 0.31, "learning_rate": 4.952419278285416e-05, "loss": 4.5441, "step": 7892000 }, { "epoch": 0.31, "learning_rate": 4.952413265225267e-05, "loss": 4.5475, "step": 7892500 }, { "epoch": 0.31, "learning_rate": 4.95240725178884e-05, "loss": 4.5289, "step": 7893000 }, { "epoch": 0.31, "learning_rate": 4.9524012379761355e-05, "loss": 4.5227, "step": 7893500 }, { "epoch": 0.31, "learning_rate": 4.952395223787154e-05, "loss": 4.4732, "step": 7894000 }, { "epoch": 0.31, "learning_rate": 4.952389209221897e-05, "loss": 4.5336, "step": 7894500 }, { "epoch": 0.31, "learning_rate": 4.952383194280366e-05, "loss": 4.5427, "step": 7895000 }, { "epoch": 0.31, "learning_rate": 4.952377178962561e-05, "loss": 4.5239, "step": 7895500 }, { "epoch": 0.31, "learning_rate": 4.9523711632684834e-05, "loss": 4.5252, "step": 7896000 }, { "epoch": 0.31, "learning_rate": 4.952365147198134e-05, "loss": 4.4954, "step": 7896500 }, { "epoch": 0.31, "learning_rate": 4.952359130751514e-05, "loss": 4.5146, "step": 7897000 }, { "epoch": 0.31, "learning_rate": 4.952353113928624e-05, "loss": 4.4922, "step": 7897500 }, { "epoch": 0.31, "learning_rate": 4.952347096729464e-05, "loss": 4.5261, "step": 7898000 }, { "epoch": 0.31, "learning_rate": 4.952341079154037e-05, "loss": 4.5141, "step": 7898500 }, { "epoch": 0.31, "learning_rate": 4.952335061202342e-05, "loss": 4.5403, "step": 7899000 }, { "epoch": 0.31, "learning_rate": 4.9523290428743805e-05, "loss": 4.5, "step": 7899500 }, { "epoch": 0.31, "learning_rate": 4.9523230241701545e-05, "loss": 4.5092, "step": 7900000 }, { "epoch": 0.31, "learning_rate": 4.9523170050896636e-05, "loss": 4.5334, "step": 7900500 }, { "epoch": 0.31, "learning_rate": 4.95231098563291e-05, "loss": 4.5233, "step": 7901000 }, { "epoch": 0.31, "learning_rate": 4.952304965799893e-05, "loss": 4.5058, "step": 7901500 }, { "epoch": 0.31, "learning_rate": 4.952298945590614e-05, "loss": 4.5238, "step": 7902000 }, { "epoch": 0.31, "learning_rate": 4.952292925005075e-05, "loss": 4.5025, "step": 7902500 }, { "epoch": 0.31, "learning_rate": 4.952286904043276e-05, "loss": 4.55, "step": 7903000 }, { "epoch": 0.31, "learning_rate": 4.952280882705218e-05, "loss": 4.5025, "step": 7903500 }, { "epoch": 0.31, "learning_rate": 4.9522748609909025e-05, "loss": 4.508, "step": 7904000 }, { "epoch": 0.31, "learning_rate": 4.9522688389003296e-05, "loss": 4.548, "step": 7904500 }, { "epoch": 0.31, "learning_rate": 4.9522628164335006e-05, "loss": 4.5207, "step": 7905000 }, { "epoch": 0.31, "learning_rate": 4.952256793590416e-05, "loss": 4.5066, "step": 7905500 }, { "epoch": 0.31, "learning_rate": 4.952250770371078e-05, "loss": 4.4974, "step": 7906000 }, { "epoch": 0.31, "learning_rate": 4.9522447467754864e-05, "loss": 4.5198, "step": 7906500 }, { "epoch": 0.31, "learning_rate": 4.952238722803642e-05, "loss": 4.5114, "step": 7907000 }, { "epoch": 0.31, "learning_rate": 4.9522326984555465e-05, "loss": 4.5025, "step": 7907500 }, { "epoch": 0.31, "learning_rate": 4.9522266737312e-05, "loss": 4.5415, "step": 7908000 }, { "epoch": 0.31, "learning_rate": 4.952220648630604e-05, "loss": 4.5222, "step": 7908500 }, { "epoch": 0.31, "learning_rate": 4.952214623153759e-05, "loss": 4.5155, "step": 7909000 }, { "epoch": 0.31, "learning_rate": 4.952208597300667e-05, "loss": 4.495, "step": 7909500 }, { "epoch": 0.31, "learning_rate": 4.9522025710713274e-05, "loss": 4.4813, "step": 7910000 }, { "epoch": 0.31, "learning_rate": 4.952196544465742e-05, "loss": 4.5345, "step": 7910500 }, { "epoch": 0.31, "learning_rate": 4.952190517483912e-05, "loss": 4.499, "step": 7911000 }, { "epoch": 0.31, "learning_rate": 4.9521844901258375e-05, "loss": 4.4816, "step": 7911500 }, { "epoch": 0.31, "learning_rate": 4.95217846239152e-05, "loss": 4.5215, "step": 7912000 }, { "epoch": 0.31, "learning_rate": 4.95217243428096e-05, "loss": 4.5127, "step": 7912500 }, { "epoch": 0.31, "learning_rate": 4.9521664057941586e-05, "loss": 4.4809, "step": 7913000 }, { "epoch": 0.31, "learning_rate": 4.9521603769311174e-05, "loss": 4.5235, "step": 7913500 }, { "epoch": 0.31, "learning_rate": 4.9521543476918365e-05, "loss": 4.4819, "step": 7914000 }, { "epoch": 0.31, "learning_rate": 4.9521483180763164e-05, "loss": 4.5429, "step": 7914500 }, { "epoch": 0.31, "learning_rate": 4.952142288084559e-05, "loss": 4.4964, "step": 7915000 }, { "epoch": 0.31, "learning_rate": 4.9521362577165654e-05, "loss": 4.5041, "step": 7915500 }, { "epoch": 0.31, "learning_rate": 4.952130226972336e-05, "loss": 4.4992, "step": 7916000 }, { "epoch": 0.31, "learning_rate": 4.9521241958518714e-05, "loss": 4.4952, "step": 7916500 }, { "epoch": 0.31, "learning_rate": 4.952118164355173e-05, "loss": 4.4988, "step": 7917000 }, { "epoch": 0.31, "learning_rate": 4.952112132482241e-05, "loss": 4.5303, "step": 7917500 }, { "epoch": 0.31, "learning_rate": 4.952106100233077e-05, "loss": 4.5367, "step": 7918000 }, { "epoch": 0.31, "learning_rate": 4.952100067607683e-05, "loss": 4.5378, "step": 7918500 }, { "epoch": 0.31, "learning_rate": 4.952094034606059e-05, "loss": 4.5136, "step": 7919000 }, { "epoch": 0.31, "learning_rate": 4.952088001228204e-05, "loss": 4.4974, "step": 7919500 }, { "epoch": 0.31, "learning_rate": 4.9520819674741215e-05, "loss": 4.4957, "step": 7920000 }, { "epoch": 0.31, "learning_rate": 4.9520759333438114e-05, "loss": 4.5469, "step": 7920500 }, { "epoch": 0.31, "learning_rate": 4.952069898837275e-05, "loss": 4.4989, "step": 7921000 }, { "epoch": 0.31, "learning_rate": 4.9520638639545124e-05, "loss": 4.5036, "step": 7921500 }, { "epoch": 0.31, "learning_rate": 4.952057828695526e-05, "loss": 4.4667, "step": 7922000 }, { "epoch": 0.31, "learning_rate": 4.952051793060316e-05, "loss": 4.5068, "step": 7922500 }, { "epoch": 0.31, "learning_rate": 4.952045757048882e-05, "loss": 4.4679, "step": 7923000 }, { "epoch": 0.31, "learning_rate": 4.952039720661227e-05, "loss": 4.5289, "step": 7923500 }, { "epoch": 0.31, "learning_rate": 4.952033683897351e-05, "loss": 4.4929, "step": 7924000 }, { "epoch": 0.31, "learning_rate": 4.952027646757254e-05, "loss": 4.4989, "step": 7924500 }, { "epoch": 0.31, "learning_rate": 4.952021609240939e-05, "loss": 4.5143, "step": 7925000 }, { "epoch": 0.31, "learning_rate": 4.952015571348406e-05, "loss": 4.4872, "step": 7925500 }, { "epoch": 0.31, "learning_rate": 4.952009533079655e-05, "loss": 4.5158, "step": 7926000 }, { "epoch": 0.31, "learning_rate": 4.9520034944346875e-05, "loss": 4.5327, "step": 7926500 }, { "epoch": 0.31, "learning_rate": 4.951997455413505e-05, "loss": 4.5031, "step": 7927000 }, { "epoch": 0.31, "learning_rate": 4.951991416016109e-05, "loss": 4.5474, "step": 7927500 }, { "epoch": 0.31, "learning_rate": 4.951985376242498e-05, "loss": 4.5063, "step": 7928000 }, { "epoch": 0.31, "learning_rate": 4.951979336092675e-05, "loss": 4.5166, "step": 7928500 }, { "epoch": 0.31, "learning_rate": 4.95197329556664e-05, "loss": 4.4765, "step": 7929000 }, { "epoch": 0.31, "learning_rate": 4.9519672546643944e-05, "loss": 4.4942, "step": 7929500 }, { "epoch": 0.31, "learning_rate": 4.951961213385938e-05, "loss": 4.4936, "step": 7930000 }, { "epoch": 0.31, "learning_rate": 4.951955171731274e-05, "loss": 4.5326, "step": 7930500 }, { "epoch": 0.31, "learning_rate": 4.951949129700402e-05, "loss": 4.5455, "step": 7931000 }, { "epoch": 0.31, "learning_rate": 4.9519430872933224e-05, "loss": 4.5026, "step": 7931500 }, { "epoch": 0.31, "learning_rate": 4.951937044510037e-05, "loss": 4.4614, "step": 7932000 }, { "epoch": 0.31, "learning_rate": 4.951931001350546e-05, "loss": 4.5074, "step": 7932500 }, { "epoch": 0.31, "learning_rate": 4.951924957814851e-05, "loss": 4.4584, "step": 7933000 }, { "epoch": 0.31, "learning_rate": 4.951918913902953e-05, "loss": 4.4517, "step": 7933500 }, { "epoch": 0.31, "learning_rate": 4.951912869614852e-05, "loss": 4.5142, "step": 7934000 }, { "epoch": 0.31, "learning_rate": 4.95190682495055e-05, "loss": 4.4971, "step": 7934500 }, { "epoch": 0.31, "learning_rate": 4.951900779910047e-05, "loss": 4.5271, "step": 7935000 }, { "epoch": 0.31, "learning_rate": 4.951894734493344e-05, "loss": 4.4847, "step": 7935500 }, { "epoch": 0.31, "learning_rate": 4.951888688700443e-05, "loss": 4.5009, "step": 7936000 }, { "epoch": 0.31, "learning_rate": 4.951882642531344e-05, "loss": 4.5195, "step": 7936500 }, { "epoch": 0.31, "learning_rate": 4.951876595986048e-05, "loss": 4.4888, "step": 7937000 }, { "epoch": 0.31, "learning_rate": 4.951870549064556e-05, "loss": 4.4792, "step": 7937500 }, { "epoch": 0.31, "learning_rate": 4.9518645017668694e-05, "loss": 4.5384, "step": 7938000 }, { "epoch": 0.31, "learning_rate": 4.951858454092988e-05, "loss": 4.4968, "step": 7938500 }, { "epoch": 0.31, "learning_rate": 4.9518524060429136e-05, "loss": 4.4933, "step": 7939000 }, { "epoch": 0.31, "learning_rate": 4.951846357616648e-05, "loss": 4.5054, "step": 7939500 }, { "epoch": 0.31, "learning_rate": 4.95184030881419e-05, "loss": 4.5341, "step": 7940000 }, { "epoch": 0.31, "learning_rate": 4.951834259635542e-05, "loss": 4.4838, "step": 7940500 }, { "epoch": 0.31, "learning_rate": 4.9518282100807045e-05, "loss": 4.4883, "step": 7941000 }, { "epoch": 0.31, "learning_rate": 4.951822160149679e-05, "loss": 4.5527, "step": 7941500 }, { "epoch": 0.31, "learning_rate": 4.9518161098424656e-05, "loss": 4.4699, "step": 7942000 }, { "epoch": 0.31, "learning_rate": 4.9518100591590656e-05, "loss": 4.535, "step": 7942500 }, { "epoch": 0.31, "learning_rate": 4.95180400809948e-05, "loss": 4.5154, "step": 7943000 }, { "epoch": 0.31, "learning_rate": 4.951797956663709e-05, "loss": 4.5342, "step": 7943500 }, { "epoch": 0.31, "learning_rate": 4.951791904851755e-05, "loss": 4.5012, "step": 7944000 }, { "epoch": 0.31, "learning_rate": 4.951785852663617e-05, "loss": 4.5285, "step": 7944500 }, { "epoch": 0.31, "learning_rate": 4.951779800099298e-05, "loss": 4.4879, "step": 7945000 }, { "epoch": 0.31, "learning_rate": 4.951773747158797e-05, "loss": 4.5246, "step": 7945500 }, { "epoch": 0.31, "learning_rate": 4.951767693842116e-05, "loss": 4.4581, "step": 7946000 }, { "epoch": 0.31, "learning_rate": 4.951761640149256e-05, "loss": 4.4953, "step": 7946500 }, { "epoch": 0.31, "learning_rate": 4.951755586080218e-05, "loss": 4.5112, "step": 7947000 }, { "epoch": 0.31, "learning_rate": 4.951749531635003e-05, "loss": 4.528, "step": 7947500 }, { "epoch": 0.31, "learning_rate": 4.9517434768136105e-05, "loss": 4.4774, "step": 7948000 }, { "epoch": 0.31, "learning_rate": 4.9517374216160426e-05, "loss": 4.519, "step": 7948500 }, { "epoch": 0.31, "learning_rate": 4.951731366042301e-05, "loss": 4.4445, "step": 7949000 }, { "epoch": 0.31, "learning_rate": 4.9517253100923846e-05, "loss": 4.4853, "step": 7949500 }, { "epoch": 0.31, "learning_rate": 4.951719253766296e-05, "loss": 4.473, "step": 7950000 }, { "epoch": 0.31, "learning_rate": 4.951713197064036e-05, "loss": 4.5106, "step": 7950500 }, { "epoch": 0.31, "learning_rate": 4.951707139985605e-05, "loss": 4.536, "step": 7951000 }, { "epoch": 0.31, "learning_rate": 4.9517010825310036e-05, "loss": 4.5211, "step": 7951500 }, { "epoch": 0.31, "learning_rate": 4.951695024700233e-05, "loss": 4.4836, "step": 7952000 }, { "epoch": 0.31, "learning_rate": 4.9516889664932956e-05, "loss": 4.5044, "step": 7952500 }, { "epoch": 0.31, "learning_rate": 4.95168290791019e-05, "loss": 4.5094, "step": 7953000 }, { "epoch": 0.31, "learning_rate": 4.951676848950918e-05, "loss": 4.4759, "step": 7953500 }, { "epoch": 0.31, "learning_rate": 4.951670789615482e-05, "loss": 4.4635, "step": 7954000 }, { "epoch": 0.31, "learning_rate": 4.951664729903881e-05, "loss": 4.5016, "step": 7954500 }, { "epoch": 0.31, "learning_rate": 4.951658669816116e-05, "loss": 4.4965, "step": 7955000 }, { "epoch": 0.31, "learning_rate": 4.951652609352189e-05, "loss": 4.5193, "step": 7955500 }, { "epoch": 0.31, "learning_rate": 4.9516465485121e-05, "loss": 4.5031, "step": 7956000 }, { "epoch": 0.31, "learning_rate": 4.951640487295851e-05, "loss": 4.5122, "step": 7956500 }, { "epoch": 0.31, "learning_rate": 4.951634425703442e-05, "loss": 4.5116, "step": 7957000 }, { "epoch": 0.31, "learning_rate": 4.951628363734874e-05, "loss": 4.5082, "step": 7957500 }, { "epoch": 0.31, "learning_rate": 4.9516223013901486e-05, "loss": 4.4694, "step": 7958000 }, { "epoch": 0.31, "learning_rate": 4.951616238669266e-05, "loss": 4.4595, "step": 7958500 }, { "epoch": 0.31, "learning_rate": 4.9516101755722275e-05, "loss": 4.5085, "step": 7959000 }, { "epoch": 0.31, "learning_rate": 4.951604112099034e-05, "loss": 4.5149, "step": 7959500 }, { "epoch": 0.31, "learning_rate": 4.951598048249686e-05, "loss": 4.5012, "step": 7960000 }, { "epoch": 0.31, "learning_rate": 4.9515919840241854e-05, "loss": 4.4888, "step": 7960500 }, { "epoch": 0.31, "learning_rate": 4.9515859194225325e-05, "loss": 4.4936, "step": 7961000 }, { "epoch": 0.31, "learning_rate": 4.951579854444728e-05, "loss": 4.5286, "step": 7961500 }, { "epoch": 0.31, "learning_rate": 4.9515737890907735e-05, "loss": 4.4887, "step": 7962000 }, { "epoch": 0.31, "learning_rate": 4.951567723360668e-05, "loss": 4.4848, "step": 7962500 }, { "epoch": 0.31, "learning_rate": 4.9515616572544156e-05, "loss": 4.4893, "step": 7963000 }, { "epoch": 0.31, "learning_rate": 4.951555590772015e-05, "loss": 4.4959, "step": 7963500 }, { "epoch": 0.31, "learning_rate": 4.951549523913468e-05, "loss": 4.4937, "step": 7964000 }, { "epoch": 0.31, "learning_rate": 4.951543456678774e-05, "loss": 4.5301, "step": 7964500 }, { "epoch": 0.31, "learning_rate": 4.951537389067937e-05, "loss": 4.5406, "step": 7965000 }, { "epoch": 0.31, "learning_rate": 4.951531321080955e-05, "loss": 4.4682, "step": 7965500 }, { "epoch": 0.31, "learning_rate": 4.951525252717831e-05, "loss": 4.5092, "step": 7966000 }, { "epoch": 0.31, "learning_rate": 4.9515191839785644e-05, "loss": 4.4942, "step": 7966500 }, { "epoch": 0.31, "learning_rate": 4.951513114863157e-05, "loss": 4.4808, "step": 7967000 }, { "epoch": 0.31, "learning_rate": 4.9515070453716086e-05, "loss": 4.4836, "step": 7967500 }, { "epoch": 0.31, "learning_rate": 4.951500975503921e-05, "loss": 4.4802, "step": 7968000 }, { "epoch": 0.31, "learning_rate": 4.951494905260096e-05, "loss": 4.514, "step": 7968500 }, { "epoch": 0.31, "learning_rate": 4.951488834640134e-05, "loss": 4.4749, "step": 7969000 }, { "epoch": 0.31, "learning_rate": 4.951482763644034e-05, "loss": 4.5292, "step": 7969500 }, { "epoch": 0.31, "learning_rate": 4.951476692271799e-05, "loss": 4.4685, "step": 7970000 }, { "epoch": 0.31, "learning_rate": 4.951470620523429e-05, "loss": 4.4601, "step": 7970500 }, { "epoch": 0.31, "learning_rate": 4.9514645483989264e-05, "loss": 4.4976, "step": 7971000 }, { "epoch": 0.31, "learning_rate": 4.951458475898291e-05, "loss": 4.4794, "step": 7971500 }, { "epoch": 0.31, "learning_rate": 4.951452403021524e-05, "loss": 4.5324, "step": 7972000 }, { "epoch": 0.31, "learning_rate": 4.9514463297686254e-05, "loss": 4.4912, "step": 7972500 }, { "epoch": 0.31, "learning_rate": 4.951440256139597e-05, "loss": 4.4753, "step": 7973000 }, { "epoch": 0.31, "learning_rate": 4.95143418213444e-05, "loss": 4.4856, "step": 7973500 }, { "epoch": 0.31, "learning_rate": 4.9514281077531544e-05, "loss": 4.4803, "step": 7974000 }, { "epoch": 0.31, "learning_rate": 4.951422032995742e-05, "loss": 4.4657, "step": 7974500 }, { "epoch": 0.31, "learning_rate": 4.951415957862203e-05, "loss": 4.4841, "step": 7975000 }, { "epoch": 0.31, "learning_rate": 4.951409882352539e-05, "loss": 4.5289, "step": 7975500 }, { "epoch": 0.31, "learning_rate": 4.9514038064667514e-05, "loss": 4.4992, "step": 7976000 }, { "epoch": 0.31, "learning_rate": 4.9513977302048396e-05, "loss": 4.5256, "step": 7976500 }, { "epoch": 0.31, "learning_rate": 4.951391653566806e-05, "loss": 4.5201, "step": 7977000 }, { "epoch": 0.31, "learning_rate": 4.951385576552651e-05, "loss": 4.4758, "step": 7977500 }, { "epoch": 0.31, "learning_rate": 4.951379499162374e-05, "loss": 4.5366, "step": 7978000 }, { "epoch": 0.31, "learning_rate": 4.9513734213959783e-05, "loss": 4.4562, "step": 7978500 }, { "epoch": 0.31, "learning_rate": 4.9513673432534636e-05, "loss": 4.4665, "step": 7979000 }, { "epoch": 0.31, "learning_rate": 4.951361264734832e-05, "loss": 4.5108, "step": 7979500 }, { "epoch": 0.31, "learning_rate": 4.951355185840082e-05, "loss": 4.5309, "step": 7980000 }, { "epoch": 0.31, "learning_rate": 4.951349106569218e-05, "loss": 4.4664, "step": 7980500 }, { "epoch": 0.31, "learning_rate": 4.951343026922237e-05, "loss": 4.4847, "step": 7981000 }, { "epoch": 0.31, "learning_rate": 4.9513369468991424e-05, "loss": 4.5175, "step": 7981500 }, { "epoch": 0.31, "learning_rate": 4.951330866499936e-05, "loss": 4.4669, "step": 7982000 }, { "epoch": 0.31, "learning_rate": 4.951324785724616e-05, "loss": 4.4807, "step": 7982500 }, { "epoch": 0.31, "learning_rate": 4.9513187045731854e-05, "loss": 4.4902, "step": 7983000 }, { "epoch": 0.31, "learning_rate": 4.951312623045644e-05, "loss": 4.4618, "step": 7983500 }, { "epoch": 0.31, "learning_rate": 4.951306541141993e-05, "loss": 4.5187, "step": 7984000 }, { "epoch": 0.31, "learning_rate": 4.951300458862235e-05, "loss": 4.5089, "step": 7984500 }, { "epoch": 0.31, "learning_rate": 4.951294376206368e-05, "loss": 4.5166, "step": 7985000 }, { "epoch": 0.31, "learning_rate": 4.951288293174395e-05, "loss": 4.5159, "step": 7985500 }, { "epoch": 0.31, "learning_rate": 4.951282209766316e-05, "loss": 4.4676, "step": 7986000 }, { "epoch": 0.31, "learning_rate": 4.951276125982133e-05, "loss": 4.4825, "step": 7986500 }, { "epoch": 0.31, "learning_rate": 4.951270041821845e-05, "loss": 4.5077, "step": 7987000 }, { "epoch": 0.31, "learning_rate": 4.9512639572854546e-05, "loss": 4.4714, "step": 7987500 }, { "epoch": 0.31, "learning_rate": 4.9512578723729633e-05, "loss": 4.5176, "step": 7988000 }, { "epoch": 0.31, "learning_rate": 4.95125178708437e-05, "loss": 4.4753, "step": 7988500 }, { "epoch": 0.31, "learning_rate": 4.9512457014196767e-05, "loss": 4.4964, "step": 7989000 }, { "epoch": 0.32, "learning_rate": 4.951239615378884e-05, "loss": 4.4921, "step": 7989500 }, { "epoch": 0.32, "learning_rate": 4.951233528961994e-05, "loss": 4.5027, "step": 7990000 }, { "epoch": 0.32, "learning_rate": 4.951227442169005e-05, "loss": 4.4717, "step": 7990500 }, { "epoch": 0.32, "learning_rate": 4.9512213549999213e-05, "loss": 4.5142, "step": 7991000 }, { "epoch": 0.32, "learning_rate": 4.951215267454742e-05, "loss": 4.5036, "step": 7991500 }, { "epoch": 0.32, "learning_rate": 4.951209179533468e-05, "loss": 4.4987, "step": 7992000 }, { "epoch": 0.32, "learning_rate": 4.9512030912361005e-05, "loss": 4.503, "step": 7992500 }, { "epoch": 0.32, "learning_rate": 4.95119700256264e-05, "loss": 4.4978, "step": 7993000 }, { "epoch": 0.32, "learning_rate": 4.951190913513089e-05, "loss": 4.5189, "step": 7993500 }, { "epoch": 0.32, "learning_rate": 4.951184824087446e-05, "loss": 4.4736, "step": 7994000 }, { "epoch": 0.32, "learning_rate": 4.951178734285714e-05, "loss": 4.4797, "step": 7994500 }, { "epoch": 0.32, "learning_rate": 4.951172644107892e-05, "loss": 4.4886, "step": 7995000 }, { "epoch": 0.32, "learning_rate": 4.9511665535539833e-05, "loss": 4.4778, "step": 7995500 }, { "epoch": 0.32, "learning_rate": 4.951160462623987e-05, "loss": 4.4881, "step": 7996000 }, { "epoch": 0.32, "learning_rate": 4.9511543713179054e-05, "loss": 4.4866, "step": 7996500 }, { "epoch": 0.32, "learning_rate": 4.951148279635738e-05, "loss": 4.4877, "step": 7997000 }, { "epoch": 0.32, "learning_rate": 4.951142187577487e-05, "loss": 4.5288, "step": 7997500 }, { "epoch": 0.32, "learning_rate": 4.951136095143152e-05, "loss": 4.5034, "step": 7998000 }, { "epoch": 0.32, "learning_rate": 4.951130002332734e-05, "loss": 4.5241, "step": 7998500 }, { "epoch": 0.32, "learning_rate": 4.951123909146236e-05, "loss": 4.4905, "step": 7999000 }, { "epoch": 0.32, "learning_rate": 4.951117815583657e-05, "loss": 4.5089, "step": 7999500 }, { "epoch": 0.32, "learning_rate": 4.951111721644999e-05, "loss": 4.4696, "step": 8000000 }, { "epoch": 0.32, "learning_rate": 4.951105627330262e-05, "loss": 4.5035, "step": 8000500 }, { "epoch": 0.32, "learning_rate": 4.951099532639447e-05, "loss": 4.5203, "step": 8001000 }, { "epoch": 0.32, "learning_rate": 4.951093437572556e-05, "loss": 4.5057, "step": 8001500 }, { "epoch": 0.32, "learning_rate": 4.951087342129589e-05, "loss": 4.5162, "step": 8002000 }, { "epoch": 0.32, "learning_rate": 4.951081246310547e-05, "loss": 4.4804, "step": 8002500 }, { "epoch": 0.32, "learning_rate": 4.9510751501154314e-05, "loss": 4.5052, "step": 8003000 }, { "epoch": 0.32, "learning_rate": 4.951069053544243e-05, "loss": 4.4922, "step": 8003500 }, { "epoch": 0.32, "learning_rate": 4.951062956596981e-05, "loss": 4.4925, "step": 8004000 }, { "epoch": 0.32, "learning_rate": 4.95105685927365e-05, "loss": 4.4902, "step": 8004500 }, { "epoch": 0.32, "learning_rate": 4.9510507615742475e-05, "loss": 4.4896, "step": 8005000 }, { "epoch": 0.32, "learning_rate": 4.951044663498776e-05, "loss": 4.5233, "step": 8005500 }, { "epoch": 0.32, "learning_rate": 4.951038565047237e-05, "loss": 4.4947, "step": 8006000 }, { "epoch": 0.32, "learning_rate": 4.9510324662196296e-05, "loss": 4.51, "step": 8006500 }, { "epoch": 0.32, "learning_rate": 4.9510263670159564e-05, "loss": 4.4876, "step": 8007000 }, { "epoch": 0.32, "learning_rate": 4.9510202674362175e-05, "loss": 4.4877, "step": 8007500 }, { "epoch": 0.32, "learning_rate": 4.951014167480414e-05, "loss": 4.4498, "step": 8008000 }, { "epoch": 0.32, "learning_rate": 4.951008067148547e-05, "loss": 4.5003, "step": 8008500 }, { "epoch": 0.32, "learning_rate": 4.9510019664406177e-05, "loss": 4.485, "step": 8009000 }, { "epoch": 0.32, "learning_rate": 4.950995865356626e-05, "loss": 4.5162, "step": 8009500 }, { "epoch": 0.32, "learning_rate": 4.950989763896574e-05, "loss": 4.4846, "step": 8010000 }, { "epoch": 0.32, "learning_rate": 4.950983662060462e-05, "loss": 4.5069, "step": 8010500 }, { "epoch": 0.32, "learning_rate": 4.950977559848291e-05, "loss": 4.4423, "step": 8011000 }, { "epoch": 0.32, "learning_rate": 4.950971457260062e-05, "loss": 4.4861, "step": 8011500 }, { "epoch": 0.32, "learning_rate": 4.950965354295776e-05, "loss": 4.4159, "step": 8012000 }, { "epoch": 0.32, "learning_rate": 4.9509592509554336e-05, "loss": 4.4605, "step": 8012500 }, { "epoch": 0.32, "learning_rate": 4.950953147239037e-05, "loss": 4.5167, "step": 8013000 }, { "epoch": 0.32, "learning_rate": 4.9509470431465846e-05, "loss": 4.5059, "step": 8013500 }, { "epoch": 0.32, "learning_rate": 4.9509409386780806e-05, "loss": 4.4802, "step": 8014000 }, { "epoch": 0.32, "learning_rate": 4.9509348338335224e-05, "loss": 4.5013, "step": 8014500 }, { "epoch": 0.32, "learning_rate": 4.950928728612914e-05, "loss": 4.4943, "step": 8015000 }, { "epoch": 0.32, "learning_rate": 4.950922623016255e-05, "loss": 4.5184, "step": 8015500 }, { "epoch": 0.32, "learning_rate": 4.9509165170435465e-05, "loss": 4.5093, "step": 8016000 }, { "epoch": 0.32, "learning_rate": 4.950910410694789e-05, "loss": 4.4791, "step": 8016500 }, { "epoch": 0.32, "learning_rate": 4.950904303969984e-05, "loss": 4.5041, "step": 8017000 }, { "epoch": 0.32, "learning_rate": 4.950898196869131e-05, "loss": 4.4751, "step": 8017500 }, { "epoch": 0.32, "learning_rate": 4.950892089392234e-05, "loss": 4.4939, "step": 8018000 }, { "epoch": 0.32, "learning_rate": 4.9508859815392917e-05, "loss": 4.4985, "step": 8018500 }, { "epoch": 0.32, "learning_rate": 4.950879873310305e-05, "loss": 4.5232, "step": 8019000 }, { "epoch": 0.32, "learning_rate": 4.950873764705276e-05, "loss": 4.4496, "step": 8019500 }, { "epoch": 0.32, "learning_rate": 4.950867655724204e-05, "loss": 4.4828, "step": 8020000 }, { "epoch": 0.32, "learning_rate": 4.9508615463670914e-05, "loss": 4.4924, "step": 8020500 }, { "epoch": 0.32, "learning_rate": 4.950855436633939e-05, "loss": 4.4796, "step": 8021000 }, { "epoch": 0.32, "learning_rate": 4.950849326524747e-05, "loss": 4.5069, "step": 8021500 }, { "epoch": 0.32, "learning_rate": 4.950843216039516e-05, "loss": 4.4914, "step": 8022000 }, { "epoch": 0.32, "learning_rate": 4.950837105178248e-05, "loss": 4.4774, "step": 8022500 }, { "epoch": 0.32, "learning_rate": 4.950830993940944e-05, "loss": 4.4913, "step": 8023000 }, { "epoch": 0.32, "learning_rate": 4.9508248823276044e-05, "loss": 4.4705, "step": 8023500 }, { "epoch": 0.32, "learning_rate": 4.95081877033823e-05, "loss": 4.5312, "step": 8024000 }, { "epoch": 0.32, "learning_rate": 4.950812657972822e-05, "loss": 4.449, "step": 8024500 }, { "epoch": 0.32, "learning_rate": 4.950806545231381e-05, "loss": 4.4749, "step": 8025000 }, { "epoch": 0.32, "learning_rate": 4.950800432113909e-05, "loss": 4.4759, "step": 8025500 }, { "epoch": 0.32, "learning_rate": 4.950794318620405e-05, "loss": 4.4567, "step": 8026000 }, { "epoch": 0.32, "learning_rate": 4.950788204750873e-05, "loss": 4.4977, "step": 8026500 }, { "epoch": 0.32, "learning_rate": 4.950782090505311e-05, "loss": 4.467, "step": 8027000 }, { "epoch": 0.32, "learning_rate": 4.95077597588372e-05, "loss": 4.4624, "step": 8027500 }, { "epoch": 0.32, "learning_rate": 4.950769860886103e-05, "loss": 4.4835, "step": 8028000 }, { "epoch": 0.32, "learning_rate": 4.95076374551246e-05, "loss": 4.5069, "step": 8028500 }, { "epoch": 0.32, "learning_rate": 4.950757629762792e-05, "loss": 4.4601, "step": 8029000 }, { "epoch": 0.32, "learning_rate": 4.950751513637099e-05, "loss": 4.4461, "step": 8029500 }, { "epoch": 0.32, "learning_rate": 4.9507453971353834e-05, "loss": 4.5043, "step": 8030000 }, { "epoch": 0.32, "learning_rate": 4.950739280257645e-05, "loss": 4.4759, "step": 8030500 }, { "epoch": 0.32, "learning_rate": 4.950733163003885e-05, "loss": 4.4858, "step": 8031000 }, { "epoch": 0.32, "learning_rate": 4.950727045374105e-05, "loss": 4.4651, "step": 8031500 }, { "epoch": 0.32, "learning_rate": 4.9507209273683044e-05, "loss": 4.5038, "step": 8032000 }, { "epoch": 0.32, "learning_rate": 4.9507148089864867e-05, "loss": 4.4798, "step": 8032500 }, { "epoch": 0.32, "learning_rate": 4.9507086902286506e-05, "loss": 4.4895, "step": 8033000 }, { "epoch": 0.32, "learning_rate": 4.950702571094798e-05, "loss": 4.4999, "step": 8033500 }, { "epoch": 0.32, "learning_rate": 4.9506964515849286e-05, "loss": 4.4664, "step": 8034000 }, { "epoch": 0.32, "learning_rate": 4.950690331699045e-05, "loss": 4.4694, "step": 8034500 }, { "epoch": 0.32, "learning_rate": 4.950684211437148e-05, "loss": 4.5031, "step": 8035000 }, { "epoch": 0.32, "learning_rate": 4.9506780907992374e-05, "loss": 4.4879, "step": 8035500 }, { "epoch": 0.32, "learning_rate": 4.950671969785315e-05, "loss": 4.5053, "step": 8036000 }, { "epoch": 0.32, "learning_rate": 4.9506658483953816e-05, "loss": 4.4551, "step": 8036500 }, { "epoch": 0.32, "learning_rate": 4.9506597266294375e-05, "loss": 4.442, "step": 8037000 }, { "epoch": 0.32, "learning_rate": 4.9506536044874846e-05, "loss": 4.477, "step": 8037500 }, { "epoch": 0.32, "learning_rate": 4.950647481969524e-05, "loss": 4.4709, "step": 8038000 }, { "epoch": 0.32, "learning_rate": 4.9506413590755546e-05, "loss": 4.4592, "step": 8038500 }, { "epoch": 0.32, "learning_rate": 4.9506352358055795e-05, "loss": 4.4831, "step": 8039000 }, { "epoch": 0.32, "learning_rate": 4.950629112159599e-05, "loss": 4.4897, "step": 8039500 }, { "epoch": 0.32, "learning_rate": 4.9506229881376145e-05, "loss": 4.5002, "step": 8040000 }, { "epoch": 0.32, "learning_rate": 4.950616863739625e-05, "loss": 4.4708, "step": 8040500 }, { "epoch": 0.32, "learning_rate": 4.950610738965634e-05, "loss": 4.4637, "step": 8041000 }, { "epoch": 0.32, "learning_rate": 4.9506046138156416e-05, "loss": 4.4638, "step": 8041500 }, { "epoch": 0.32, "learning_rate": 4.950598488289647e-05, "loss": 4.4898, "step": 8042000 }, { "epoch": 0.32, "learning_rate": 4.950592362387654e-05, "loss": 4.5219, "step": 8042500 }, { "epoch": 0.32, "learning_rate": 4.9505862361096616e-05, "loss": 4.4601, "step": 8043000 }, { "epoch": 0.32, "learning_rate": 4.950580109455671e-05, "loss": 4.4674, "step": 8043500 }, { "epoch": 0.32, "learning_rate": 4.950573982425684e-05, "loss": 4.5067, "step": 8044000 }, { "epoch": 0.32, "learning_rate": 4.9505678550196996e-05, "loss": 4.5069, "step": 8044500 }, { "epoch": 0.32, "learning_rate": 4.9505617272377216e-05, "loss": 4.4445, "step": 8045000 }, { "epoch": 0.32, "learning_rate": 4.950555599079749e-05, "loss": 4.4715, "step": 8045500 }, { "epoch": 0.32, "learning_rate": 4.950549470545783e-05, "loss": 4.4692, "step": 8046000 }, { "epoch": 0.32, "learning_rate": 4.9505433416358245e-05, "loss": 4.4778, "step": 8046500 }, { "epoch": 0.32, "learning_rate": 4.950537212349875e-05, "loss": 4.5238, "step": 8047000 }, { "epoch": 0.32, "learning_rate": 4.9505310826879346e-05, "loss": 4.5204, "step": 8047500 }, { "epoch": 0.32, "learning_rate": 4.950524952650005e-05, "loss": 4.4802, "step": 8048000 }, { "epoch": 0.32, "learning_rate": 4.950518822236086e-05, "loss": 4.5086, "step": 8048500 }, { "epoch": 0.32, "learning_rate": 4.950512691446181e-05, "loss": 4.4653, "step": 8049000 }, { "epoch": 0.32, "learning_rate": 4.950506560280288e-05, "loss": 4.4842, "step": 8049500 }, { "epoch": 0.32, "learning_rate": 4.95050042873841e-05, "loss": 4.5275, "step": 8050000 }, { "epoch": 0.32, "learning_rate": 4.950494296820547e-05, "loss": 4.492, "step": 8050500 }, { "epoch": 0.32, "learning_rate": 4.9504881645267e-05, "loss": 4.4641, "step": 8051000 }, { "epoch": 0.32, "learning_rate": 4.95048203185687e-05, "loss": 4.4642, "step": 8051500 }, { "epoch": 0.32, "learning_rate": 4.950475898811059e-05, "loss": 4.4888, "step": 8052000 }, { "epoch": 0.32, "learning_rate": 4.950469765389266e-05, "loss": 4.4785, "step": 8052500 }, { "epoch": 0.32, "learning_rate": 4.950463631591493e-05, "loss": 4.4676, "step": 8053000 }, { "epoch": 0.32, "learning_rate": 4.9504574974177406e-05, "loss": 4.4979, "step": 8053500 }, { "epoch": 0.32, "learning_rate": 4.950451362868011e-05, "loss": 4.4509, "step": 8054000 }, { "epoch": 0.32, "learning_rate": 4.950445227942304e-05, "loss": 4.4882, "step": 8054500 }, { "epoch": 0.32, "learning_rate": 4.9504390926406195e-05, "loss": 4.4834, "step": 8055000 }, { "epoch": 0.32, "learning_rate": 4.950432956962961e-05, "loss": 4.477, "step": 8055500 }, { "epoch": 0.32, "learning_rate": 4.9504268209093273e-05, "loss": 4.4791, "step": 8056000 }, { "epoch": 0.32, "learning_rate": 4.950420684479721e-05, "loss": 4.512, "step": 8056500 }, { "epoch": 0.32, "learning_rate": 4.950414547674141e-05, "loss": 4.4799, "step": 8057000 }, { "epoch": 0.32, "learning_rate": 4.95040841049259e-05, "loss": 4.4918, "step": 8057500 }, { "epoch": 0.32, "learning_rate": 4.9504022729350675e-05, "loss": 4.5011, "step": 8058000 }, { "epoch": 0.32, "learning_rate": 4.950396135001576e-05, "loss": 4.4349, "step": 8058500 }, { "epoch": 0.32, "learning_rate": 4.950389996692116e-05, "loss": 4.4999, "step": 8059000 }, { "epoch": 0.32, "learning_rate": 4.9503838580066874e-05, "loss": 4.4798, "step": 8059500 }, { "epoch": 0.32, "learning_rate": 4.950377718945293e-05, "loss": 4.4921, "step": 8060000 }, { "epoch": 0.32, "learning_rate": 4.950371579507932e-05, "loss": 4.4675, "step": 8060500 }, { "epoch": 0.32, "learning_rate": 4.950365439694605e-05, "loss": 4.4774, "step": 8061000 }, { "epoch": 0.32, "learning_rate": 4.950359299505315e-05, "loss": 4.4717, "step": 8061500 }, { "epoch": 0.32, "learning_rate": 4.950353158940062e-05, "loss": 4.488, "step": 8062000 }, { "epoch": 0.32, "learning_rate": 4.9503470179988466e-05, "loss": 4.4681, "step": 8062500 }, { "epoch": 0.32, "learning_rate": 4.95034087668167e-05, "loss": 4.4715, "step": 8063000 }, { "epoch": 0.32, "learning_rate": 4.950334734988533e-05, "loss": 4.4684, "step": 8063500 }, { "epoch": 0.32, "learning_rate": 4.950328592919437e-05, "loss": 4.4622, "step": 8064000 }, { "epoch": 0.32, "learning_rate": 4.950322450474382e-05, "loss": 4.4576, "step": 8064500 }, { "epoch": 0.32, "learning_rate": 4.950316307653369e-05, "loss": 4.481, "step": 8065000 }, { "epoch": 0.32, "learning_rate": 4.950310164456401e-05, "loss": 4.4719, "step": 8065500 }, { "epoch": 0.32, "learning_rate": 4.950304020883476e-05, "loss": 4.5027, "step": 8066000 }, { "epoch": 0.32, "learning_rate": 4.950297876934597e-05, "loss": 4.484, "step": 8066500 }, { "epoch": 0.32, "learning_rate": 4.950291732609764e-05, "loss": 4.4811, "step": 8067000 }, { "epoch": 0.32, "learning_rate": 4.950285587908979e-05, "loss": 4.4502, "step": 8067500 }, { "epoch": 0.32, "learning_rate": 4.950279442832242e-05, "loss": 4.441, "step": 8068000 }, { "epoch": 0.32, "learning_rate": 4.950273297379553e-05, "loss": 4.4814, "step": 8068500 }, { "epoch": 0.32, "learning_rate": 4.950267151550915e-05, "loss": 4.4594, "step": 8069000 }, { "epoch": 0.32, "learning_rate": 4.950261005346328e-05, "loss": 4.4427, "step": 8069500 }, { "epoch": 0.32, "learning_rate": 4.950254858765793e-05, "loss": 4.4937, "step": 8070000 }, { "epoch": 0.32, "learning_rate": 4.95024871180931e-05, "loss": 4.5078, "step": 8070500 }, { "epoch": 0.32, "learning_rate": 4.9502425644768815e-05, "loss": 4.4376, "step": 8071000 }, { "epoch": 0.32, "learning_rate": 4.9502364167685084e-05, "loss": 4.4555, "step": 8071500 }, { "epoch": 0.32, "learning_rate": 4.950230268684191e-05, "loss": 4.4483, "step": 8072000 }, { "epoch": 0.32, "learning_rate": 4.950224120223929e-05, "loss": 4.4555, "step": 8072500 }, { "epoch": 0.32, "learning_rate": 4.950217971387725e-05, "loss": 4.4904, "step": 8073000 }, { "epoch": 0.32, "learning_rate": 4.95021182217558e-05, "loss": 4.4773, "step": 8073500 }, { "epoch": 0.32, "learning_rate": 4.950205672587495e-05, "loss": 4.5089, "step": 8074000 }, { "epoch": 0.32, "learning_rate": 4.9501995226234695e-05, "loss": 4.5197, "step": 8074500 }, { "epoch": 0.32, "learning_rate": 4.950193372283505e-05, "loss": 4.4798, "step": 8075000 }, { "epoch": 0.32, "learning_rate": 4.950187221567604e-05, "loss": 4.4625, "step": 8075500 }, { "epoch": 0.32, "learning_rate": 4.950181070475766e-05, "loss": 4.4522, "step": 8076000 }, { "epoch": 0.32, "learning_rate": 4.9501749190079914e-05, "loss": 4.5039, "step": 8076500 }, { "epoch": 0.32, "learning_rate": 4.950168767164283e-05, "loss": 4.4821, "step": 8077000 }, { "epoch": 0.32, "learning_rate": 4.95016261494464e-05, "loss": 4.5027, "step": 8077500 }, { "epoch": 0.32, "learning_rate": 4.950156462349064e-05, "loss": 4.5241, "step": 8078000 }, { "epoch": 0.32, "learning_rate": 4.950150309377557e-05, "loss": 4.4424, "step": 8078500 }, { "epoch": 0.32, "learning_rate": 4.9501441560301174e-05, "loss": 4.466, "step": 8079000 }, { "epoch": 0.32, "learning_rate": 4.950138002306749e-05, "loss": 4.4812, "step": 8079500 }, { "epoch": 0.32, "learning_rate": 4.950131848207451e-05, "loss": 4.4732, "step": 8080000 }, { "epoch": 0.32, "learning_rate": 4.950125693732224e-05, "loss": 4.4908, "step": 8080500 }, { "epoch": 0.32, "learning_rate": 4.950119538881071e-05, "loss": 4.5096, "step": 8081000 }, { "epoch": 0.32, "learning_rate": 4.9501133836539906e-05, "loss": 4.4917, "step": 8081500 }, { "epoch": 0.32, "learning_rate": 4.950107228050985e-05, "loss": 4.4799, "step": 8082000 }, { "epoch": 0.32, "learning_rate": 4.9501010720720555e-05, "loss": 4.4542, "step": 8082500 }, { "epoch": 0.32, "learning_rate": 4.9500949157172025e-05, "loss": 4.4543, "step": 8083000 }, { "epoch": 0.32, "learning_rate": 4.950088758986426e-05, "loss": 4.4934, "step": 8083500 }, { "epoch": 0.32, "learning_rate": 4.9500826018797284e-05, "loss": 4.4838, "step": 8084000 }, { "epoch": 0.32, "learning_rate": 4.950076444397111e-05, "loss": 4.4747, "step": 8084500 }, { "epoch": 0.32, "learning_rate": 4.950070286538573e-05, "loss": 4.4957, "step": 8085000 }, { "epoch": 0.32, "learning_rate": 4.950064128304116e-05, "loss": 4.4788, "step": 8085500 }, { "epoch": 0.32, "learning_rate": 4.9500579696937416e-05, "loss": 4.4449, "step": 8086000 }, { "epoch": 0.32, "learning_rate": 4.95005181070745e-05, "loss": 4.4979, "step": 8086500 }, { "epoch": 0.32, "learning_rate": 4.950045651345243e-05, "loss": 4.486, "step": 8087000 }, { "epoch": 0.32, "learning_rate": 4.95003949160712e-05, "loss": 4.4905, "step": 8087500 }, { "epoch": 0.32, "learning_rate": 4.9500333314930836e-05, "loss": 4.483, "step": 8088000 }, { "epoch": 0.32, "learning_rate": 4.950027171003134e-05, "loss": 4.5002, "step": 8088500 }, { "epoch": 0.32, "learning_rate": 4.9500210101372725e-05, "loss": 4.4581, "step": 8089000 }, { "epoch": 0.32, "learning_rate": 4.9500148488954995e-05, "loss": 4.4992, "step": 8089500 }, { "epoch": 0.32, "learning_rate": 4.950008687277816e-05, "loss": 4.473, "step": 8090000 }, { "epoch": 0.32, "learning_rate": 4.9500025252842234e-05, "loss": 4.4622, "step": 8090500 }, { "epoch": 0.32, "learning_rate": 4.949996362914723e-05, "loss": 4.5029, "step": 8091000 }, { "epoch": 0.32, "learning_rate": 4.949990200169315e-05, "loss": 4.4978, "step": 8091500 }, { "epoch": 0.32, "learning_rate": 4.9499840370479986e-05, "loss": 4.4687, "step": 8092000 }, { "epoch": 0.32, "learning_rate": 4.949977873550778e-05, "loss": 4.4555, "step": 8092500 }, { "epoch": 0.32, "learning_rate": 4.949971709677653e-05, "loss": 4.4693, "step": 8093000 }, { "epoch": 0.32, "learning_rate": 4.9499655454286246e-05, "loss": 4.4823, "step": 8093500 }, { "epoch": 0.32, "learning_rate": 4.9499593808036937e-05, "loss": 4.459, "step": 8094000 }, { "epoch": 0.32, "learning_rate": 4.94995321580286e-05, "loss": 4.48, "step": 8094500 }, { "epoch": 0.32, "learning_rate": 4.949947050426126e-05, "loss": 4.4835, "step": 8095000 }, { "epoch": 0.32, "learning_rate": 4.949940884673492e-05, "loss": 4.4512, "step": 8095500 }, { "epoch": 0.32, "learning_rate": 4.949934718544959e-05, "loss": 4.4869, "step": 8096000 }, { "epoch": 0.32, "learning_rate": 4.949928552040528e-05, "loss": 4.5156, "step": 8096500 }, { "epoch": 0.32, "learning_rate": 4.9499223851602004e-05, "loss": 4.4707, "step": 8097000 }, { "epoch": 0.32, "learning_rate": 4.949916217903976e-05, "loss": 4.4932, "step": 8097500 }, { "epoch": 0.32, "learning_rate": 4.949910050271857e-05, "loss": 4.4915, "step": 8098000 }, { "epoch": 0.32, "learning_rate": 4.949903882263844e-05, "loss": 4.4474, "step": 8098500 }, { "epoch": 0.32, "learning_rate": 4.949897713879937e-05, "loss": 4.4597, "step": 8099000 }, { "epoch": 0.32, "learning_rate": 4.9498915451201375e-05, "loss": 4.458, "step": 8099500 }, { "epoch": 0.32, "learning_rate": 4.949885375984448e-05, "loss": 4.4348, "step": 8100000 }, { "epoch": 0.32, "learning_rate": 4.949879206472867e-05, "loss": 4.4548, "step": 8100500 }, { "epoch": 0.32, "learning_rate": 4.949873036585397e-05, "loss": 4.4491, "step": 8101000 }, { "epoch": 0.32, "learning_rate": 4.949866866322038e-05, "loss": 4.4872, "step": 8101500 }, { "epoch": 0.32, "learning_rate": 4.949860695682792e-05, "loss": 4.4734, "step": 8102000 }, { "epoch": 0.32, "learning_rate": 4.9498545246676584e-05, "loss": 4.5067, "step": 8102500 }, { "epoch": 0.32, "learning_rate": 4.9498483532766404e-05, "loss": 4.4988, "step": 8103000 }, { "epoch": 0.32, "learning_rate": 4.949842181509737e-05, "loss": 4.4477, "step": 8103500 }, { "epoch": 0.32, "learning_rate": 4.94983600936695e-05, "loss": 4.4824, "step": 8104000 }, { "epoch": 0.32, "learning_rate": 4.94982983684828e-05, "loss": 4.4823, "step": 8104500 }, { "epoch": 0.32, "learning_rate": 4.949823663953728e-05, "loss": 4.4692, "step": 8105000 }, { "epoch": 0.32, "learning_rate": 4.9498174906832954e-05, "loss": 4.4766, "step": 8105500 }, { "epoch": 0.32, "learning_rate": 4.949811317036982e-05, "loss": 4.4846, "step": 8106000 }, { "epoch": 0.32, "learning_rate": 4.949805143014791e-05, "loss": 4.476, "step": 8106500 }, { "epoch": 0.32, "learning_rate": 4.9497989686167204e-05, "loss": 4.5429, "step": 8107000 }, { "epoch": 0.32, "learning_rate": 4.9497927938427734e-05, "loss": 4.4611, "step": 8107500 }, { "epoch": 0.32, "learning_rate": 4.94978661869295e-05, "loss": 4.4499, "step": 8108000 }, { "epoch": 0.32, "learning_rate": 4.949780443167251e-05, "loss": 4.4489, "step": 8108500 }, { "epoch": 0.32, "learning_rate": 4.949774267265679e-05, "loss": 4.4932, "step": 8109000 }, { "epoch": 0.32, "learning_rate": 4.949768090988232e-05, "loss": 4.4872, "step": 8109500 }, { "epoch": 0.32, "learning_rate": 4.9497619143349136e-05, "loss": 4.4601, "step": 8110000 }, { "epoch": 0.32, "learning_rate": 4.949755737305724e-05, "loss": 4.4632, "step": 8110500 }, { "epoch": 0.32, "learning_rate": 4.949749559900663e-05, "loss": 4.4866, "step": 8111000 }, { "epoch": 0.32, "learning_rate": 4.9497433821197326e-05, "loss": 4.4888, "step": 8111500 }, { "epoch": 0.32, "learning_rate": 4.949737203962933e-05, "loss": 4.4794, "step": 8112000 }, { "epoch": 0.32, "learning_rate": 4.949731025430267e-05, "loss": 4.4394, "step": 8112500 }, { "epoch": 0.32, "learning_rate": 4.949724846521733e-05, "loss": 4.4767, "step": 8113000 }, { "epoch": 0.32, "learning_rate": 4.9497186672373343e-05, "loss": 4.4821, "step": 8113500 }, { "epoch": 0.32, "learning_rate": 4.9497124875770705e-05, "loss": 4.5212, "step": 8114000 }, { "epoch": 0.32, "learning_rate": 4.949706307540942e-05, "loss": 4.4417, "step": 8114500 }, { "epoch": 0.32, "learning_rate": 4.9497001271289515e-05, "loss": 4.4232, "step": 8115000 }, { "epoch": 0.32, "learning_rate": 4.9496939463410984e-05, "loss": 4.4828, "step": 8115500 }, { "epoch": 0.32, "learning_rate": 4.949687765177385e-05, "loss": 4.4672, "step": 8116000 }, { "epoch": 0.32, "learning_rate": 4.949681583637811e-05, "loss": 4.482, "step": 8116500 }, { "epoch": 0.32, "learning_rate": 4.9496754017223776e-05, "loss": 4.4683, "step": 8117000 }, { "epoch": 0.32, "learning_rate": 4.949669219431087e-05, "loss": 4.504, "step": 8117500 }, { "epoch": 0.32, "learning_rate": 4.949663036763938e-05, "loss": 4.4171, "step": 8118000 }, { "epoch": 0.32, "learning_rate": 4.949656853720933e-05, "loss": 4.4762, "step": 8118500 }, { "epoch": 0.32, "learning_rate": 4.949650670302073e-05, "loss": 4.483, "step": 8119000 }, { "epoch": 0.32, "learning_rate": 4.9496444865073587e-05, "loss": 4.452, "step": 8119500 }, { "epoch": 0.32, "learning_rate": 4.949638302336791e-05, "loss": 4.4731, "step": 8120000 }, { "epoch": 0.32, "learning_rate": 4.94963211779037e-05, "loss": 4.4554, "step": 8120500 }, { "epoch": 0.32, "learning_rate": 4.9496259328680974e-05, "loss": 4.445, "step": 8121000 }, { "epoch": 0.32, "learning_rate": 4.949619747569975e-05, "loss": 4.4803, "step": 8121500 }, { "epoch": 0.32, "learning_rate": 4.949613561896003e-05, "loss": 4.4568, "step": 8122000 }, { "epoch": 0.32, "learning_rate": 4.949607375846182e-05, "loss": 4.5191, "step": 8122500 }, { "epoch": 0.32, "learning_rate": 4.9496011894205125e-05, "loss": 4.4902, "step": 8123000 }, { "epoch": 0.32, "learning_rate": 4.9495950026189966e-05, "loss": 4.4693, "step": 8123500 }, { "epoch": 0.32, "learning_rate": 4.949588815441635e-05, "loss": 4.4779, "step": 8124000 }, { "epoch": 0.32, "learning_rate": 4.949582627888429e-05, "loss": 4.4869, "step": 8124500 }, { "epoch": 0.32, "learning_rate": 4.949576439959378e-05, "loss": 4.4708, "step": 8125000 }, { "epoch": 0.32, "learning_rate": 4.949570251654485e-05, "loss": 4.4435, "step": 8125500 }, { "epoch": 0.32, "learning_rate": 4.94956406297375e-05, "loss": 4.4459, "step": 8126000 }, { "epoch": 0.32, "learning_rate": 4.9495578739171725e-05, "loss": 4.4434, "step": 8126500 }, { "epoch": 0.32, "learning_rate": 4.949551684484755e-05, "loss": 4.4594, "step": 8127000 }, { "epoch": 0.32, "learning_rate": 4.949545494676499e-05, "loss": 4.4633, "step": 8127500 }, { "epoch": 0.32, "learning_rate": 4.9495393044924054e-05, "loss": 4.4741, "step": 8128000 }, { "epoch": 0.32, "learning_rate": 4.949533113932474e-05, "loss": 4.4503, "step": 8128500 }, { "epoch": 0.32, "learning_rate": 4.949526922996706e-05, "loss": 4.4918, "step": 8129000 }, { "epoch": 0.32, "learning_rate": 4.949520731685103e-05, "loss": 4.463, "step": 8129500 }, { "epoch": 0.32, "learning_rate": 4.949514539997665e-05, "loss": 4.4741, "step": 8130000 }, { "epoch": 0.32, "learning_rate": 4.9495083479343934e-05, "loss": 4.4718, "step": 8130500 }, { "epoch": 0.32, "learning_rate": 4.949502155495289e-05, "loss": 4.4883, "step": 8131000 }, { "epoch": 0.32, "learning_rate": 4.949495962680354e-05, "loss": 4.4618, "step": 8131500 }, { "epoch": 0.32, "learning_rate": 4.949489769489588e-05, "loss": 4.4783, "step": 8132000 }, { "epoch": 0.32, "learning_rate": 4.949483575922992e-05, "loss": 4.4658, "step": 8132500 }, { "epoch": 0.32, "learning_rate": 4.9494773819805675e-05, "loss": 4.4629, "step": 8133000 }, { "epoch": 0.32, "learning_rate": 4.9494711876623144e-05, "loss": 4.4267, "step": 8133500 }, { "epoch": 0.32, "learning_rate": 4.949464992968235e-05, "loss": 4.4501, "step": 8134000 }, { "epoch": 0.32, "learning_rate": 4.9494587978983296e-05, "loss": 4.4884, "step": 8134500 }, { "epoch": 0.32, "learning_rate": 4.9494526024526e-05, "loss": 4.4692, "step": 8135000 }, { "epoch": 0.32, "learning_rate": 4.9494464066310455e-05, "loss": 4.4975, "step": 8135500 }, { "epoch": 0.32, "learning_rate": 4.949440210433668e-05, "loss": 4.4657, "step": 8136000 }, { "epoch": 0.32, "learning_rate": 4.9494340138604684e-05, "loss": 4.4698, "step": 8136500 }, { "epoch": 0.32, "learning_rate": 4.9494278169114476e-05, "loss": 4.4895, "step": 8137000 }, { "epoch": 0.32, "learning_rate": 4.949421619586607e-05, "loss": 4.4651, "step": 8137500 }, { "epoch": 0.32, "learning_rate": 4.949415421885947e-05, "loss": 4.4821, "step": 8138000 }, { "epoch": 0.32, "learning_rate": 4.949409223809468e-05, "loss": 4.4841, "step": 8138500 }, { "epoch": 0.32, "learning_rate": 4.949403025357172e-05, "loss": 4.4586, "step": 8139000 }, { "epoch": 0.32, "learning_rate": 4.94939682652906e-05, "loss": 4.44, "step": 8139500 }, { "epoch": 0.32, "learning_rate": 4.9493906273251325e-05, "loss": 4.4818, "step": 8140000 }, { "epoch": 0.32, "learning_rate": 4.9493844277453906e-05, "loss": 4.4228, "step": 8140500 }, { "epoch": 0.32, "learning_rate": 4.949378227789835e-05, "loss": 4.4735, "step": 8141000 }, { "epoch": 0.32, "learning_rate": 4.949372027458467e-05, "loss": 4.4468, "step": 8141500 }, { "epoch": 0.32, "learning_rate": 4.949365826751287e-05, "loss": 4.45, "step": 8142000 }, { "epoch": 0.32, "learning_rate": 4.949359625668296e-05, "loss": 4.4538, "step": 8142500 }, { "epoch": 0.32, "learning_rate": 4.9493534242094955e-05, "loss": 4.4524, "step": 8143000 }, { "epoch": 0.32, "learning_rate": 4.949347222374886e-05, "loss": 4.489, "step": 8143500 }, { "epoch": 0.32, "learning_rate": 4.9493410201644694e-05, "loss": 4.4536, "step": 8144000 }, { "epoch": 0.32, "learning_rate": 4.949334817578245e-05, "loss": 4.4623, "step": 8144500 }, { "epoch": 0.32, "learning_rate": 4.949328614616215e-05, "loss": 4.4926, "step": 8145000 }, { "epoch": 0.32, "learning_rate": 4.94932241127838e-05, "loss": 4.4671, "step": 8145500 }, { "epoch": 0.32, "learning_rate": 4.949316207564741e-05, "loss": 4.4905, "step": 8146000 }, { "epoch": 0.32, "learning_rate": 4.949310003475299e-05, "loss": 4.4573, "step": 8146500 }, { "epoch": 0.32, "learning_rate": 4.9493037990100556e-05, "loss": 4.4395, "step": 8147000 }, { "epoch": 0.32, "learning_rate": 4.94929759416901e-05, "loss": 4.495, "step": 8147500 }, { "epoch": 0.32, "learning_rate": 4.949291388952164e-05, "loss": 4.4824, "step": 8148000 }, { "epoch": 0.32, "learning_rate": 4.949285183359519e-05, "loss": 4.482, "step": 8148500 }, { "epoch": 0.32, "learning_rate": 4.9492789773910766e-05, "loss": 4.4678, "step": 8149000 }, { "epoch": 0.32, "learning_rate": 4.9492727710468355e-05, "loss": 4.4966, "step": 8149500 }, { "epoch": 0.32, "learning_rate": 4.949266564326799e-05, "loss": 4.4809, "step": 8150000 }, { "epoch": 0.32, "learning_rate": 4.949260357230966e-05, "loss": 4.4869, "step": 8150500 }, { "epoch": 0.32, "learning_rate": 4.9492541497593394e-05, "loss": 4.5019, "step": 8151000 }, { "epoch": 0.32, "learning_rate": 4.9492479419119184e-05, "loss": 4.4745, "step": 8151500 }, { "epoch": 0.32, "learning_rate": 4.9492417336887054e-05, "loss": 4.4322, "step": 8152000 }, { "epoch": 0.54, "learning_rate": 4.859835936525288e-05, "loss": 4.4627, "step": 8152500 }, { "epoch": 0.54, "learning_rate": 4.859818897265622e-05, "loss": 4.4212, "step": 8153000 }, { "epoch": 0.54, "learning_rate": 4.8598018570001925e-05, "loss": 4.4103, "step": 8153500 }, { "epoch": 0.54, "learning_rate": 4.859784815729006e-05, "loss": 4.4058, "step": 8154000 }, { "epoch": 0.43, "learning_rate": 4.909945922851138e-05, "loss": 4.3992, "step": 8154500 }, { "epoch": 0.43, "learning_rate": 4.9099349403649844e-05, "loss": 4.3872, "step": 8155000 }, { "epoch": 0.43, "learning_rate": 4.909923957221475e-05, "loss": 4.4025, "step": 8155500 }, { "epoch": 0.43, "learning_rate": 4.909912973420613e-05, "loss": 4.3924, "step": 8156000 }, { "epoch": 0.43, "learning_rate": 4.909901988962401e-05, "loss": 4.4153, "step": 8156500 }, { "epoch": 0.43, "learning_rate": 4.909891003846842e-05, "loss": 4.4105, "step": 8157000 }, { "epoch": 0.43, "learning_rate": 4.9098800180739394e-05, "loss": 4.4276, "step": 8157500 }, { "epoch": 0.43, "learning_rate": 4.909869031643696e-05, "loss": 4.3883, "step": 8158000 }, { "epoch": 0.43, "learning_rate": 4.909858044556114e-05, "loss": 4.3904, "step": 8158500 }, { "epoch": 0.43, "learning_rate": 4.909847056811197e-05, "loss": 4.3956, "step": 8159000 }, { "epoch": 0.43, "learning_rate": 4.9098360684089485e-05, "loss": 4.437, "step": 8159500 }, { "epoch": 0.43, "learning_rate": 4.909825079349371e-05, "loss": 4.3866, "step": 8160000 }, { "epoch": 0.43, "learning_rate": 4.9098140896324674e-05, "loss": 4.3935, "step": 8160500 }, { "epoch": 0.43, "learning_rate": 4.909803099258241e-05, "loss": 4.4064, "step": 8161000 }, { "epoch": 0.43, "learning_rate": 4.909792108226695e-05, "loss": 4.3843, "step": 8161500 }, { "epoch": 0.43, "learning_rate": 4.909781116537832e-05, "loss": 4.3992, "step": 8162000 }, { "epoch": 0.43, "learning_rate": 4.909770124191655e-05, "loss": 4.404, "step": 8162500 }, { "epoch": 0.43, "learning_rate": 4.909759131188166e-05, "loss": 4.3925, "step": 8163000 }, { "epoch": 0.43, "learning_rate": 4.909748137527371e-05, "loss": 4.4304, "step": 8163500 }, { "epoch": 0.43, "learning_rate": 4.90973714320927e-05, "loss": 4.4208, "step": 8164000 }, { "epoch": 0.43, "learning_rate": 4.909726148233867e-05, "loss": 4.3684, "step": 8164500 }, { "epoch": 0.43, "learning_rate": 4.9097151526011656e-05, "loss": 4.384, "step": 8165000 }, { "epoch": 0.43, "learning_rate": 4.909704156311167e-05, "loss": 4.3721, "step": 8165500 }, { "epoch": 0.43, "learning_rate": 4.9096931593638764e-05, "loss": 4.3738, "step": 8166000 }, { "epoch": 0.43, "learning_rate": 4.909682161759296e-05, "loss": 4.4199, "step": 8166500 }, { "epoch": 0.43, "learning_rate": 4.909671163497428e-05, "loss": 4.3798, "step": 8167000 }, { "epoch": 0.43, "learning_rate": 4.909660164578277e-05, "loss": 4.3808, "step": 8167500 }, { "epoch": 0.86, "learning_rate": 4.645119997945853e-05, "loss": 4.3118, "step": 8168000 }, { "epoch": 0.86, "learning_rate": 4.6450775864685545e-05, "loss": 4.3175, "step": 8168500 }, { "epoch": 0.86, "learning_rate": 4.645035172650764e-05, "loss": 4.3171, "step": 8169000 }, { "epoch": 0.86, "learning_rate": 4.644992756492525e-05, "loss": 4.3011, "step": 8169500 }, { "epoch": 0.86, "learning_rate": 4.644950337993886e-05, "loss": 4.2909, "step": 8170000 }, { "epoch": 0.86, "learning_rate": 4.6449079171548924e-05, "loss": 4.3004, "step": 8170500 }, { "epoch": 0.86, "learning_rate": 4.644865493975591e-05, "loss": 4.2675, "step": 8171000 }, { "epoch": 0.86, "learning_rate": 4.644823068456029e-05, "loss": 4.284, "step": 8171500 }, { "epoch": 0.86, "learning_rate": 4.6447806405962504e-05, "loss": 4.2574, "step": 8172000 }, { "epoch": 0.86, "learning_rate": 4.6447382103963024e-05, "loss": 4.2643, "step": 8172500 }, { "epoch": 0.86, "learning_rate": 4.644695777856232e-05, "loss": 4.2733, "step": 8173000 }, { "epoch": 0.86, "learning_rate": 4.6446533429760855e-05, "loss": 4.2643, "step": 8173500 }, { "epoch": 0.86, "learning_rate": 4.644610905755908e-05, "loss": 4.26, "step": 8174000 }, { "epoch": 0.86, "learning_rate": 4.6445684661957466e-05, "loss": 4.262, "step": 8174500 }, { "epoch": 0.86, "learning_rate": 4.644526024295648e-05, "loss": 4.2704, "step": 8175000 }, { "epoch": 0.86, "learning_rate": 4.644483580055658e-05, "loss": 4.2449, "step": 8175500 }, { "epoch": 0.86, "learning_rate": 4.6444411334758224e-05, "loss": 4.2446, "step": 8176000 }, { "epoch": 0.86, "learning_rate": 4.644398684556188e-05, "loss": 4.2453, "step": 8176500 }, { "epoch": 0.86, "learning_rate": 4.644356233296802e-05, "loss": 4.2739, "step": 8177000 }, { "epoch": 0.86, "learning_rate": 4.64431377969771e-05, "loss": 4.2233, "step": 8177500 }, { "epoch": 0.86, "learning_rate": 4.6442713237589574e-05, "loss": 4.2228, "step": 8178000 }, { "epoch": 0.86, "learning_rate": 4.644228865480592e-05, "loss": 4.2538, "step": 8178500 }, { "epoch": 0.86, "learning_rate": 4.644186404862659e-05, "loss": 4.248, "step": 8179000 }, { "epoch": 0.86, "learning_rate": 4.644143941905206e-05, "loss": 4.2478, "step": 8179500 }, { "epoch": 0.86, "learning_rate": 4.644101476608278e-05, "loss": 4.2319, "step": 8180000 }, { "epoch": 0.86, "learning_rate": 4.6440590089719224e-05, "loss": 4.2364, "step": 8180500 }, { "epoch": 0.86, "learning_rate": 4.644016538996184e-05, "loss": 4.2213, "step": 8181000 }, { "epoch": 0.86, "learning_rate": 4.643974066681111e-05, "loss": 4.2024, "step": 8181500 }, { "epoch": 0.86, "learning_rate": 4.643931592026749e-05, "loss": 4.2113, "step": 8182000 }, { "epoch": 0.86, "learning_rate": 4.643889115033143e-05, "loss": 4.225, "step": 8182500 }, { "epoch": 0.86, "learning_rate": 4.643846635700342e-05, "loss": 4.2516, "step": 8183000 }, { "epoch": 0.86, "learning_rate": 4.6438041540283906e-05, "loss": 4.2235, "step": 8183500 }, { "epoch": 0.86, "learning_rate": 4.643761670017335e-05, "loss": 4.209, "step": 8184000 }, { "epoch": 0.86, "learning_rate": 4.643719183667223e-05, "loss": 4.2488, "step": 8184500 }, { "epoch": 0.86, "learning_rate": 4.6436766949780994e-05, "loss": 4.2455, "step": 8185000 }, { "epoch": 0.86, "learning_rate": 4.643634203950011e-05, "loss": 4.2315, "step": 8185500 }, { "epoch": 0.86, "learning_rate": 4.643591710583004e-05, "loss": 4.2171, "step": 8186000 }, { "epoch": 0.86, "learning_rate": 4.643549214877126e-05, "loss": 4.2111, "step": 8186500 }, { "epoch": 0.86, "learning_rate": 4.643506716832422e-05, "loss": 4.2297, "step": 8187000 }, { "epoch": 0.86, "learning_rate": 4.6434642164489386e-05, "loss": 4.2162, "step": 8187500 }, { "epoch": 0.86, "learning_rate": 4.6434217137267235e-05, "loss": 4.2278, "step": 8188000 }, { "epoch": 0.86, "learning_rate": 4.643379208665821e-05, "loss": 4.2078, "step": 8188500 }, { "epoch": 0.86, "learning_rate": 4.643336701266278e-05, "loss": 4.2234, "step": 8189000 }, { "epoch": 0.86, "learning_rate": 4.643294191528143e-05, "loss": 4.2135, "step": 8189500 }, { "epoch": 0.86, "learning_rate": 4.643251679451459e-05, "loss": 4.2373, "step": 8190000 }, { "epoch": 0.86, "learning_rate": 4.6432091650362747e-05, "loss": 4.1907, "step": 8190500 }, { "epoch": 0.86, "learning_rate": 4.643166648282636e-05, "loss": 4.2056, "step": 8191000 }, { "epoch": 0.86, "learning_rate": 4.643124129190589e-05, "loss": 4.217, "step": 8191500 }, { "epoch": 0.86, "learning_rate": 4.6430816077601804e-05, "loss": 4.2234, "step": 8192000 }, { "epoch": 0.86, "learning_rate": 4.6430390839914565e-05, "loss": 4.1924, "step": 8192500 }, { "epoch": 0.86, "learning_rate": 4.6429965578844635e-05, "loss": 4.2202, "step": 8193000 }, { "epoch": 0.86, "learning_rate": 4.6429540294392483e-05, "loss": 4.2052, "step": 8193500 }, { "epoch": 0.86, "learning_rate": 4.642911498655856e-05, "loss": 4.2183, "step": 8194000 }, { "epoch": 0.86, "learning_rate": 4.6428689655343355e-05, "loss": 4.2244, "step": 8194500 }, { "epoch": 0.86, "learning_rate": 4.642826430074731e-05, "loss": 4.205, "step": 8195000 }, { "epoch": 0.86, "learning_rate": 4.642783892277089e-05, "loss": 4.212, "step": 8195500 }, { "epoch": 0.86, "learning_rate": 4.6427413521414574e-05, "loss": 4.2037, "step": 8196000 }, { "epoch": 0.86, "learning_rate": 4.642698809667881e-05, "loss": 4.1919, "step": 8196500 }, { "epoch": 0.86, "learning_rate": 4.642656264856408e-05, "loss": 4.2078, "step": 8197000 }, { "epoch": 0.86, "learning_rate": 4.642613717707083e-05, "loss": 4.1971, "step": 8197500 }, { "epoch": 0.86, "learning_rate": 4.642571168219953e-05, "loss": 4.2125, "step": 8198000 }, { "epoch": 0.86, "learning_rate": 4.6425286163950646e-05, "loss": 4.1942, "step": 8198500 }, { "epoch": 0.86, "learning_rate": 4.642486062232465e-05, "loss": 4.208, "step": 8199000 }, { "epoch": 0.86, "learning_rate": 4.642443505732199e-05, "loss": 4.1824, "step": 8199500 }, { "epoch": 0.86, "learning_rate": 4.642400946894314e-05, "loss": 4.1897, "step": 8200000 }, { "epoch": 0.86, "learning_rate": 4.642358385718857e-05, "loss": 4.1995, "step": 8200500 }, { "epoch": 0.86, "learning_rate": 4.6423158222058725e-05, "loss": 4.1978, "step": 8201000 }, { "epoch": 0.86, "learning_rate": 4.642273256355409e-05, "loss": 4.1964, "step": 8201500 }, { "epoch": 0.86, "learning_rate": 4.6422306881675125e-05, "loss": 4.1949, "step": 8202000 }, { "epoch": 0.86, "learning_rate": 4.642188117642229e-05, "loss": 4.1864, "step": 8202500 }, { "epoch": 0.86, "learning_rate": 4.642145544779605e-05, "loss": 4.1986, "step": 8203000 }, { "epoch": 0.86, "learning_rate": 4.642102969579687e-05, "loss": 4.2176, "step": 8203500 }, { "epoch": 0.86, "learning_rate": 4.642060392042521e-05, "loss": 4.1595, "step": 8204000 }, { "epoch": 0.86, "learning_rate": 4.642017812168154e-05, "loss": 4.1708, "step": 8204500 }, { "epoch": 0.86, "learning_rate": 4.641975229956633e-05, "loss": 4.1812, "step": 8205000 }, { "epoch": 0.86, "learning_rate": 4.6419326454080034e-05, "loss": 4.187, "step": 8205500 }, { "epoch": 0.86, "learning_rate": 4.641890058522312e-05, "loss": 4.1814, "step": 8206000 }, { "epoch": 0.86, "learning_rate": 4.641847469299605e-05, "loss": 4.196, "step": 8206500 }, { "epoch": 0.86, "learning_rate": 4.6418048777399304e-05, "loss": 4.1815, "step": 8207000 }, { "epoch": 0.86, "learning_rate": 4.641762283843333e-05, "loss": 4.2001, "step": 8207500 }, { "epoch": 0.86, "learning_rate": 4.641719687609859e-05, "loss": 4.1813, "step": 8208000 }, { "epoch": 0.86, "learning_rate": 4.641677089039557e-05, "loss": 4.1845, "step": 8208500 }, { "epoch": 0.86, "learning_rate": 4.641634488132471e-05, "loss": 4.1917, "step": 8209000 }, { "epoch": 0.86, "learning_rate": 4.6415918848886486e-05, "loss": 4.1619, "step": 8209500 }, { "epoch": 0.86, "learning_rate": 4.641549279308137e-05, "loss": 4.1908, "step": 8210000 }, { "epoch": 0.86, "learning_rate": 4.6415066713909816e-05, "loss": 4.1859, "step": 8210500 }, { "epoch": 0.86, "learning_rate": 4.64146406113723e-05, "loss": 4.1818, "step": 8211000 }, { "epoch": 0.86, "learning_rate": 4.641421448546927e-05, "loss": 4.1637, "step": 8211500 }, { "epoch": 0.86, "learning_rate": 4.64137883362012e-05, "loss": 4.1833, "step": 8212000 }, { "epoch": 0.86, "learning_rate": 4.641336216356856e-05, "loss": 4.1875, "step": 8212500 }, { "epoch": 0.86, "learning_rate": 4.6412935967571814e-05, "loss": 4.1862, "step": 8213000 }, { "epoch": 0.86, "learning_rate": 4.641250974821142e-05, "loss": 4.1823, "step": 8213500 }, { "epoch": 0.86, "learning_rate": 4.6412083505487846e-05, "loss": 4.1695, "step": 8214000 }, { "epoch": 0.86, "learning_rate": 4.6411657239401554e-05, "loss": 4.165, "step": 8214500 }, { "epoch": 0.86, "learning_rate": 4.641123094995302e-05, "loss": 4.1875, "step": 8215000 }, { "epoch": 0.86, "learning_rate": 4.64108046371427e-05, "loss": 4.1701, "step": 8215500 }, { "epoch": 0.86, "learning_rate": 4.641037830097106e-05, "loss": 4.1865, "step": 8216000 }, { "epoch": 0.86, "learning_rate": 4.640995194143857e-05, "loss": 4.1837, "step": 8216500 }, { "epoch": 0.86, "learning_rate": 4.640952555854569e-05, "loss": 4.17, "step": 8217000 }, { "epoch": 0.86, "learning_rate": 4.640909915229288e-05, "loss": 4.1845, "step": 8217500 }, { "epoch": 0.86, "learning_rate": 4.640867272268062e-05, "loss": 4.1769, "step": 8218000 }, { "epoch": 0.86, "learning_rate": 4.6408246269709364e-05, "loss": 4.1581, "step": 8218500 }, { "epoch": 0.86, "learning_rate": 4.640781979337958e-05, "loss": 4.1945, "step": 8219000 }, { "epoch": 0.86, "learning_rate": 4.6407393293691734e-05, "loss": 4.1839, "step": 8219500 }, { "epoch": 0.86, "learning_rate": 4.640696677064629e-05, "loss": 4.1849, "step": 8220000 }, { "epoch": 0.86, "learning_rate": 4.640654022424372e-05, "loss": 4.184, "step": 8220500 }, { "epoch": 0.86, "learning_rate": 4.640611365448448e-05, "loss": 4.1734, "step": 8221000 }, { "epoch": 0.86, "learning_rate": 4.640568706136904e-05, "loss": 4.1759, "step": 8221500 }, { "epoch": 0.86, "learning_rate": 4.640526044489786e-05, "loss": 4.1631, "step": 8222000 }, { "epoch": 0.86, "learning_rate": 4.640483380507142e-05, "loss": 4.1695, "step": 8222500 }, { "epoch": 0.86, "learning_rate": 4.6404407141890173e-05, "loss": 4.1762, "step": 8223000 }, { "epoch": 0.86, "learning_rate": 4.6403980455354575e-05, "loss": 4.1529, "step": 8223500 }, { "epoch": 0.86, "learning_rate": 4.640355374546512e-05, "loss": 4.1377, "step": 8224000 }, { "epoch": 0.86, "learning_rate": 4.6403127012222255e-05, "loss": 4.1494, "step": 8224500 }, { "epoch": 0.86, "learning_rate": 4.640270025562645e-05, "loss": 4.1721, "step": 8225000 }, { "epoch": 0.86, "learning_rate": 4.640227347567816e-05, "loss": 4.1709, "step": 8225500 }, { "epoch": 0.86, "learning_rate": 4.640184667237786e-05, "loss": 4.1464, "step": 8226000 }, { "epoch": 0.86, "learning_rate": 4.640141984572602e-05, "loss": 4.1872, "step": 8226500 }, { "epoch": 0.86, "learning_rate": 4.64009929957231e-05, "loss": 4.149, "step": 8227000 }, { "epoch": 0.87, "learning_rate": 4.6400566122369574e-05, "loss": 4.1341, "step": 8227500 }, { "epoch": 0.87, "learning_rate": 4.6400139225665884e-05, "loss": 4.184, "step": 8228000 }, { "epoch": 0.87, "learning_rate": 4.6399712305612525e-05, "loss": 4.16, "step": 8228500 }, { "epoch": 0.87, "learning_rate": 4.6399285362209944e-05, "loss": 4.1627, "step": 8229000 }, { "epoch": 0.87, "learning_rate": 4.6398858395458615e-05, "loss": 4.1495, "step": 8229500 }, { "epoch": 0.87, "learning_rate": 4.6398431405359e-05, "loss": 4.1839, "step": 8230000 }, { "epoch": 0.87, "learning_rate": 4.639800439191157e-05, "loss": 4.1486, "step": 8230500 }, { "epoch": 0.87, "learning_rate": 4.639757735511678e-05, "loss": 4.1805, "step": 8231000 }, { "epoch": 0.87, "learning_rate": 4.639715029497511e-05, "loss": 4.1617, "step": 8231500 }, { "epoch": 0.87, "learning_rate": 4.639672321148702e-05, "loss": 4.1648, "step": 8232000 }, { "epoch": 0.87, "learning_rate": 4.639629610465298e-05, "loss": 4.1248, "step": 8232500 }, { "epoch": 0.87, "learning_rate": 4.6395868974473445e-05, "loss": 4.1899, "step": 8233000 }, { "epoch": 0.87, "learning_rate": 4.6395441820948885e-05, "loss": 4.1415, "step": 8233500 }, { "epoch": 0.87, "learning_rate": 4.639501464407977e-05, "loss": 4.1576, "step": 8234000 }, { "epoch": 0.87, "learning_rate": 4.6394587443866565e-05, "loss": 4.1529, "step": 8234500 }, { "epoch": 0.87, "learning_rate": 4.6394160220309733e-05, "loss": 4.1592, "step": 8235000 }, { "epoch": 0.87, "learning_rate": 4.639373297340975e-05, "loss": 4.1874, "step": 8235500 }, { "epoch": 0.87, "learning_rate": 4.6393305703167066e-05, "loss": 4.1776, "step": 8236000 }, { "epoch": 0.87, "learning_rate": 4.6392878409582166e-05, "loss": 4.1428, "step": 8236500 }, { "epoch": 0.87, "learning_rate": 4.63924510926555e-05, "loss": 4.1408, "step": 8237000 }, { "epoch": 0.87, "learning_rate": 4.639202375238755e-05, "loss": 4.1628, "step": 8237500 }, { "epoch": 0.87, "learning_rate": 4.639159638877876e-05, "loss": 4.1785, "step": 8238000 }, { "epoch": 0.87, "learning_rate": 4.639116900182962e-05, "loss": 4.1583, "step": 8238500 }, { "epoch": 0.87, "learning_rate": 4.639074159154058e-05, "loss": 4.1674, "step": 8239000 }, { "epoch": 0.87, "learning_rate": 4.639031415791211e-05, "loss": 4.165, "step": 8239500 }, { "epoch": 0.87, "learning_rate": 4.6389886700944676e-05, "loss": 4.1641, "step": 8240000 }, { "epoch": 0.87, "learning_rate": 4.6389459220638755e-05, "loss": 4.1618, "step": 8240500 }, { "epoch": 0.87, "learning_rate": 4.638903171699479e-05, "loss": 4.163, "step": 8241000 }, { "epoch": 0.87, "learning_rate": 4.638860419001328e-05, "loss": 4.1633, "step": 8241500 }, { "epoch": 0.87, "learning_rate": 4.638817663969467e-05, "loss": 4.1362, "step": 8242000 }, { "epoch": 0.87, "learning_rate": 4.638774906603943e-05, "loss": 4.1494, "step": 8242500 }, { "epoch": 0.87, "learning_rate": 4.6387321469048025e-05, "loss": 4.1299, "step": 8243000 }, { "epoch": 0.87, "learning_rate": 4.638689384872092e-05, "loss": 4.1526, "step": 8243500 }, { "epoch": 0.87, "learning_rate": 4.6386466205058586e-05, "loss": 4.1739, "step": 8244000 }, { "epoch": 0.87, "learning_rate": 4.6386038538061495e-05, "loss": 4.152, "step": 8244500 }, { "epoch": 0.87, "learning_rate": 4.63856108477301e-05, "loss": 4.153, "step": 8245000 }, { "epoch": 0.87, "learning_rate": 4.638518313406488e-05, "loss": 4.1407, "step": 8245500 }, { "epoch": 0.87, "learning_rate": 4.6384755397066305e-05, "loss": 4.1669, "step": 8246000 }, { "epoch": 0.87, "learning_rate": 4.638432763673482e-05, "loss": 4.1531, "step": 8246500 }, { "epoch": 0.87, "learning_rate": 4.638389985307091e-05, "loss": 4.1533, "step": 8247000 }, { "epoch": 0.87, "learning_rate": 4.638347204607504e-05, "loss": 4.142, "step": 8247500 }, { "epoch": 0.87, "learning_rate": 4.638304421574767e-05, "loss": 4.147, "step": 8248000 }, { "epoch": 0.87, "learning_rate": 4.638261636208927e-05, "loss": 4.1503, "step": 8248500 }, { "epoch": 1.08, "learning_rate": 4.44256798399337e-05, "loss": 4.1485, "step": 8249000 }, { "epoch": 1.08, "learning_rate": 4.442503005661928e-05, "loss": 4.1505, "step": 8249500 }, { "epoch": 1.08, "learning_rate": 4.442438024018789e-05, "loss": 4.1365, "step": 8250000 }, { "epoch": 1.08, "learning_rate": 4.442373039064062e-05, "loss": 4.1398, "step": 8250500 }, { "epoch": 1.08, "learning_rate": 4.44230805079786e-05, "loss": 4.1186, "step": 8251000 }, { "epoch": 1.08, "learning_rate": 4.442243059220292e-05, "loss": 4.1104, "step": 8251500 }, { "epoch": 1.08, "learning_rate": 4.44217806433147e-05, "loss": 4.1098, "step": 8252000 }, { "epoch": 1.08, "learning_rate": 4.442113066131504e-05, "loss": 4.1154, "step": 8252500 }, { "epoch": 1.08, "learning_rate": 4.442048064620505e-05, "loss": 4.1157, "step": 8253000 }, { "epoch": 1.08, "learning_rate": 4.441983059798585e-05, "loss": 4.1367, "step": 8253500 }, { "epoch": 1.08, "learning_rate": 4.441918051665852e-05, "loss": 4.1233, "step": 8254000 }, { "epoch": 1.08, "learning_rate": 4.44185304022242e-05, "loss": 4.1019, "step": 8254500 }, { "epoch": 1.08, "learning_rate": 4.4417880254683975e-05, "loss": 4.0863, "step": 8255000 }, { "epoch": 1.08, "learning_rate": 4.441723007403897e-05, "loss": 4.0967, "step": 8255500 }, { "epoch": 1.09, "learning_rate": 4.441657986029028e-05, "loss": 4.1193, "step": 8256000 }, { "epoch": 1.09, "learning_rate": 4.441592961343902e-05, "loss": 4.1082, "step": 8256500 }, { "epoch": 1.09, "learning_rate": 4.44152793334863e-05, "loss": 4.1076, "step": 8257000 }, { "epoch": 1.09, "learning_rate": 4.441462902043323e-05, "loss": 4.1135, "step": 8257500 }, { "epoch": 1.09, "learning_rate": 4.4413978674280915e-05, "loss": 4.1101, "step": 8258000 }, { "epoch": 1.09, "learning_rate": 4.4413328295030466e-05, "loss": 4.0954, "step": 8258500 }, { "epoch": 1.09, "learning_rate": 4.4412677882682985e-05, "loss": 4.1386, "step": 8259000 }, { "epoch": 1.09, "learning_rate": 4.441202743723959e-05, "loss": 4.1114, "step": 8259500 }, { "epoch": 1.09, "learning_rate": 4.441137695870138e-05, "loss": 4.1035, "step": 8260000 }, { "epoch": 1.09, "learning_rate": 4.4410726447069474e-05, "loss": 4.1033, "step": 8260500 }, { "epoch": 1.09, "learning_rate": 4.441007590234497e-05, "loss": 4.1041, "step": 8261000 }, { "epoch": 1.09, "learning_rate": 4.4409425324528995e-05, "loss": 4.1006, "step": 8261500 }, { "epoch": 1.09, "learning_rate": 4.4408774713622645e-05, "loss": 4.0958, "step": 8262000 }, { "epoch": 1.09, "learning_rate": 4.4408124069627035e-05, "loss": 4.1115, "step": 8262500 }, { "epoch": 1.09, "learning_rate": 4.440747339254326e-05, "loss": 4.0921, "step": 8263000 }, { "epoch": 1.09, "learning_rate": 4.440682268237244e-05, "loss": 4.1148, "step": 8263500 }, { "epoch": 1.09, "learning_rate": 4.440617193911569e-05, "loss": 4.1071, "step": 8264000 }, { "epoch": 1.09, "learning_rate": 4.440552116277411e-05, "loss": 4.0899, "step": 8264500 }, { "epoch": 1.09, "learning_rate": 4.4404870353348824e-05, "loss": 4.1, "step": 8265000 }, { "epoch": 1.09, "learning_rate": 4.440421951084092e-05, "loss": 4.0822, "step": 8265500 }, { "epoch": 1.09, "learning_rate": 4.440356863525152e-05, "loss": 4.1035, "step": 8266000 }, { "epoch": 1.09, "learning_rate": 4.440291772658173e-05, "loss": 4.1026, "step": 8266500 }, { "epoch": 1.09, "learning_rate": 4.440226678483267e-05, "loss": 4.1307, "step": 8267000 }, { "epoch": 1.09, "learning_rate": 4.440161581000544e-05, "loss": 4.1143, "step": 8267500 }, { "epoch": 1.09, "learning_rate": 4.440096480210114e-05, "loss": 4.1139, "step": 8268000 }, { "epoch": 1.09, "learning_rate": 4.44003137611209e-05, "loss": 4.1134, "step": 8268500 }, { "epoch": 1.09, "learning_rate": 4.439966268706582e-05, "loss": 4.0922, "step": 8269000 }, { "epoch": 1.09, "learning_rate": 4.4399011579937014e-05, "loss": 4.1066, "step": 8269500 }, { "epoch": 1.09, "learning_rate": 4.439836043973558e-05, "loss": 4.1258, "step": 8270000 }, { "epoch": 1.09, "learning_rate": 4.439770926646264e-05, "loss": 4.0858, "step": 8270500 }, { "epoch": 1.09, "learning_rate": 4.439705806011931e-05, "loss": 4.0913, "step": 8271000 }, { "epoch": 1.09, "learning_rate": 4.439640682070668e-05, "loss": 4.0972, "step": 8271500 }, { "epoch": 1.09, "learning_rate": 4.439575554822588e-05, "loss": 4.0977, "step": 8272000 }, { "epoch": 1.09, "learning_rate": 4.439510424267801e-05, "loss": 4.1343, "step": 8272500 }, { "epoch": 1.09, "learning_rate": 4.439445290406419e-05, "loss": 4.1221, "step": 8273000 }, { "epoch": 1.09, "learning_rate": 4.439380153238551e-05, "loss": 4.1244, "step": 8273500 }, { "epoch": 1.09, "learning_rate": 4.43931501276431e-05, "loss": 4.1026, "step": 8274000 }, { "epoch": 1.09, "learning_rate": 4.4392498689838066e-05, "loss": 4.1069, "step": 8274500 }, { "epoch": 1.09, "learning_rate": 4.439184721897151e-05, "loss": 4.0873, "step": 8275000 }, { "epoch": 1.09, "learning_rate": 4.4391195715044556e-05, "loss": 4.0965, "step": 8275500 }, { "epoch": 1.09, "learning_rate": 4.43905441780583e-05, "loss": 4.0952, "step": 8276000 }, { "epoch": 1.09, "learning_rate": 4.4389892608013875e-05, "loss": 4.0886, "step": 8276500 }, { "epoch": 1.09, "learning_rate": 4.438924100491236e-05, "loss": 4.0944, "step": 8277000 }, { "epoch": 1.09, "learning_rate": 4.438858936875489e-05, "loss": 4.1055, "step": 8277500 }, { "epoch": 1.09, "learning_rate": 4.438793769954258e-05, "loss": 4.0761, "step": 8278000 }, { "epoch": 1.09, "learning_rate": 4.438728599727652e-05, "loss": 4.0893, "step": 8278500 }, { "epoch": 1.09, "learning_rate": 4.438663426195784e-05, "loss": 4.0663, "step": 8279000 }, { "epoch": 1.09, "learning_rate": 4.438598249358763e-05, "loss": 4.0905, "step": 8279500 }, { "epoch": 1.09, "learning_rate": 4.4385330692167026e-05, "loss": 4.076, "step": 8280000 }, { "epoch": 1.09, "learning_rate": 4.438467885769712e-05, "loss": 4.1025, "step": 8280500 }, { "epoch": 1.09, "learning_rate": 4.4384026990179034e-05, "loss": 4.1031, "step": 8281000 }, { "epoch": 1.09, "learning_rate": 4.4383375089613874e-05, "loss": 4.0903, "step": 8281500 }, { "epoch": 1.09, "learning_rate": 4.4382723156002755e-05, "loss": 4.0981, "step": 8282000 }, { "epoch": 1.09, "learning_rate": 4.4382071189346786e-05, "loss": 4.0994, "step": 8282500 }, { "epoch": 1.09, "learning_rate": 4.438141918964708e-05, "loss": 4.0772, "step": 8283000 }, { "epoch": 1.09, "learning_rate": 4.4380767156904746e-05, "loss": 4.0931, "step": 8283500 }, { "epoch": 1.09, "learning_rate": 4.43801150911209e-05, "loss": 4.0956, "step": 8284000 }, { "epoch": 1.09, "learning_rate": 4.4379462992296645e-05, "loss": 4.0906, "step": 8284500 }, { "epoch": 1.09, "learning_rate": 4.43788108604331e-05, "loss": 4.088, "step": 8285000 }, { "epoch": 1.09, "learning_rate": 4.437815869553138e-05, "loss": 4.0716, "step": 8285500 }, { "epoch": 1.09, "learning_rate": 4.437750649759259e-05, "loss": 4.0844, "step": 8286000 }, { "epoch": 1.09, "learning_rate": 4.437685426661784e-05, "loss": 4.087, "step": 8286500 }, { "epoch": 1.09, "learning_rate": 4.437620200260826e-05, "loss": 4.0959, "step": 8287000 }, { "epoch": 1.09, "learning_rate": 4.4375549705564936e-05, "loss": 4.0735, "step": 8287500 }, { "epoch": 1.09, "learning_rate": 4.4374897375488993e-05, "loss": 4.0761, "step": 8288000 }, { "epoch": 1.09, "learning_rate": 4.4374245012381544e-05, "loss": 4.0849, "step": 8288500 }, { "epoch": 1.09, "learning_rate": 4.43735926162437e-05, "loss": 4.0775, "step": 8289000 }, { "epoch": 1.09, "learning_rate": 4.437294018707657e-05, "loss": 4.0742, "step": 8289500 }, { "epoch": 1.09, "learning_rate": 4.4372287724881275e-05, "loss": 4.0833, "step": 8290000 }, { "epoch": 1.09, "learning_rate": 4.4371635229658926e-05, "loss": 4.0826, "step": 8290500 }, { "epoch": 1.09, "learning_rate": 4.4370982701410626e-05, "loss": 4.0774, "step": 8291000 }, { "epoch": 1.09, "learning_rate": 4.437033014013748e-05, "loss": 4.1045, "step": 8291500 }, { "epoch": 1.09, "learning_rate": 4.436967754584063e-05, "loss": 4.0664, "step": 8292000 }, { "epoch": 1.09, "learning_rate": 4.436902491852116e-05, "loss": 4.1001, "step": 8292500 }, { "epoch": 1.09, "learning_rate": 4.43683722581802e-05, "loss": 4.0852, "step": 8293000 }, { "epoch": 1.09, "learning_rate": 4.4367719564818866e-05, "loss": 4.0586, "step": 8293500 }, { "epoch": 1.09, "learning_rate": 4.436706683843825e-05, "loss": 4.0787, "step": 8294000 }, { "epoch": 1.09, "learning_rate": 4.436641407903948e-05, "loss": 4.0911, "step": 8294500 }, { "epoch": 1.09, "learning_rate": 4.4365761286623665e-05, "loss": 4.0763, "step": 8295000 }, { "epoch": 1.09, "learning_rate": 4.436510846119192e-05, "loss": 4.0884, "step": 8295500 }, { "epoch": 1.09, "learning_rate": 4.436445560274535e-05, "loss": 4.1023, "step": 8296000 }, { "epoch": 1.09, "learning_rate": 4.436380271128507e-05, "loss": 4.0866, "step": 8296500 }, { "epoch": 1.09, "learning_rate": 4.436314978681221e-05, "loss": 4.0769, "step": 8297000 }, { "epoch": 1.09, "learning_rate": 4.436249682932787e-05, "loss": 4.0816, "step": 8297500 }, { "epoch": 1.09, "learning_rate": 4.4361843838833164e-05, "loss": 4.0757, "step": 8298000 }, { "epoch": 1.09, "learning_rate": 4.436119081532919e-05, "loss": 4.1179, "step": 8298500 }, { "epoch": 1.09, "learning_rate": 4.4360537758817086e-05, "loss": 4.0841, "step": 8299000 }, { "epoch": 1.09, "learning_rate": 4.4359884669297955e-05, "loss": 4.0695, "step": 8299500 }, { "epoch": 1.09, "learning_rate": 4.435923154677292e-05, "loss": 4.0807, "step": 8300000 }, { "epoch": 1.09, "learning_rate": 4.435857839124308e-05, "loss": 4.0825, "step": 8300500 }, { "epoch": 1.09, "learning_rate": 4.435792520270955e-05, "loss": 4.0732, "step": 8301000 }, { "epoch": 1.09, "learning_rate": 4.4357271981173446e-05, "loss": 4.0716, "step": 8301500 }, { "epoch": 1.09, "learning_rate": 4.435661872663589e-05, "loss": 4.0742, "step": 8302000 }, { "epoch": 1.09, "learning_rate": 4.435596543909799e-05, "loss": 4.0815, "step": 8302500 }, { "epoch": 1.09, "learning_rate": 4.435531211856085e-05, "loss": 4.0918, "step": 8303000 }, { "epoch": 1.09, "learning_rate": 4.4354658765025605e-05, "loss": 4.0811, "step": 8303500 }, { "epoch": 1.09, "learning_rate": 4.435400537849335e-05, "loss": 4.0635, "step": 8304000 }, { "epoch": 1.09, "learning_rate": 4.43533519589652e-05, "loss": 4.0569, "step": 8304500 }, { "epoch": 1.09, "learning_rate": 4.435269850644228e-05, "loss": 4.0601, "step": 8305000 }, { "epoch": 1.09, "learning_rate": 4.43520450209257e-05, "loss": 4.0822, "step": 8305500 }, { "epoch": 1.09, "learning_rate": 4.435139150241656e-05, "loss": 4.0685, "step": 8306000 }, { "epoch": 1.09, "learning_rate": 4.4350737950916e-05, "loss": 4.0683, "step": 8306500 }, { "epoch": 1.09, "learning_rate": 4.435008436642512e-05, "loss": 4.0606, "step": 8307000 }, { "epoch": 1.09, "learning_rate": 4.434943074894503e-05, "loss": 4.0591, "step": 8307500 }, { "epoch": 1.09, "learning_rate": 4.434877709847686e-05, "loss": 4.0767, "step": 8308000 }, { "epoch": 1.09, "learning_rate": 4.43481234150217e-05, "loss": 4.0687, "step": 8308500 }, { "epoch": 1.09, "learning_rate": 4.434746969858069e-05, "loss": 4.0791, "step": 8309000 }, { "epoch": 1.09, "learning_rate": 4.434681594915493e-05, "loss": 4.0561, "step": 8309500 }, { "epoch": 1.09, "learning_rate": 4.434616216674553e-05, "loss": 4.0761, "step": 8310000 }, { "epoch": 1.09, "learning_rate": 4.434550835135362e-05, "loss": 4.0798, "step": 8310500 }, { "epoch": 1.09, "learning_rate": 4.4344854502980304e-05, "loss": 4.0644, "step": 8311000 }, { "epoch": 1.09, "learning_rate": 4.4344200621626694e-05, "loss": 4.0927, "step": 8311500 }, { "epoch": 1.09, "learning_rate": 4.434354670729392e-05, "loss": 4.0724, "step": 8312000 }, { "epoch": 1.09, "learning_rate": 4.434289275998308e-05, "loss": 4.0834, "step": 8312500 }, { "epoch": 1.09, "learning_rate": 4.43422387796953e-05, "loss": 4.0682, "step": 8313000 }, { "epoch": 1.09, "learning_rate": 4.4341584766431696e-05, "loss": 4.0688, "step": 8313500 }, { "epoch": 1.09, "learning_rate": 4.434093072019337e-05, "loss": 4.0732, "step": 8314000 }, { "epoch": 1.09, "learning_rate": 4.434027664098145e-05, "loss": 4.0547, "step": 8314500 }, { "epoch": 1.09, "learning_rate": 4.4339622528797036e-05, "loss": 4.0685, "step": 8315000 }, { "epoch": 1.09, "learning_rate": 4.433896838364127e-05, "loss": 4.0843, "step": 8315500 }, { "epoch": 1.09, "learning_rate": 4.433831420551523e-05, "loss": 4.0786, "step": 8316000 }, { "epoch": 1.09, "learning_rate": 4.433765999442006e-05, "loss": 4.0747, "step": 8316500 }, { "epoch": 1.09, "learning_rate": 4.4337005750356875e-05, "loss": 4.091, "step": 8317000 }, { "epoch": 1.09, "learning_rate": 4.433635147332678e-05, "loss": 4.0477, "step": 8317500 }, { "epoch": 1.09, "learning_rate": 4.433569716333088e-05, "loss": 4.0656, "step": 8318000 }, { "epoch": 1.09, "learning_rate": 4.433504282037032e-05, "loss": 4.0623, "step": 8318500 }, { "epoch": 1.09, "learning_rate": 4.4334388444446184e-05, "loss": 4.0826, "step": 8319000 }, { "epoch": 1.09, "learning_rate": 4.4333734035559614e-05, "loss": 4.0703, "step": 8319500 }, { "epoch": 1.09, "learning_rate": 4.433307959371171e-05, "loss": 4.0551, "step": 8320000 }, { "epoch": 1.09, "learning_rate": 4.4332425118903596e-05, "loss": 4.0749, "step": 8320500 }, { "epoch": 1.09, "learning_rate": 4.433177061113638e-05, "loss": 4.08, "step": 8321000 }, { "epoch": 1.09, "learning_rate": 4.433111607041118e-05, "loss": 4.1058, "step": 8321500 }, { "epoch": 1.09, "learning_rate": 4.4330461496729114e-05, "loss": 4.066, "step": 8322000 }, { "epoch": 1.09, "learning_rate": 4.4329806890091305e-05, "loss": 4.0675, "step": 8322500 }, { "epoch": 1.09, "learning_rate": 4.432915225049885e-05, "loss": 4.0599, "step": 8323000 }, { "epoch": 1.09, "learning_rate": 4.432849757795289e-05, "loss": 4.0672, "step": 8323500 }, { "epoch": 1.09, "learning_rate": 4.4327842872454514e-05, "loss": 4.0674, "step": 8324000 }, { "epoch": 1.09, "learning_rate": 4.432718813400486e-05, "loss": 4.0739, "step": 8324500 }, { "epoch": 1.09, "learning_rate": 4.4326533362605034e-05, "loss": 4.0556, "step": 8325000 }, { "epoch": 1.09, "learning_rate": 4.432587855825615e-05, "loss": 4.0582, "step": 8325500 }, { "epoch": 1.09, "learning_rate": 4.4325223720959333e-05, "loss": 4.0759, "step": 8326000 }, { "epoch": 1.09, "learning_rate": 4.4324568850715696e-05, "loss": 4.0725, "step": 8326500 }, { "epoch": 1.09, "learning_rate": 4.432391394752635e-05, "loss": 4.0591, "step": 8327000 }, { "epoch": 1.09, "learning_rate": 4.432325901139243e-05, "loss": 4.0371, "step": 8327500 }, { "epoch": 1.09, "learning_rate": 4.432260404231503e-05, "loss": 4.047, "step": 8328000 }, { "epoch": 1.09, "learning_rate": 4.432194904029527e-05, "loss": 4.0747, "step": 8328500 }, { "epoch": 1.09, "learning_rate": 4.432129400533428e-05, "loss": 4.0524, "step": 8329000 }, { "epoch": 1.09, "learning_rate": 4.4320638937433156e-05, "loss": 4.0345, "step": 8329500 }, { "epoch": 1.09, "learning_rate": 4.431998383659304e-05, "loss": 4.068, "step": 8330000 }, { "epoch": 1.09, "learning_rate": 4.4319328702815035e-05, "loss": 4.0742, "step": 8330500 }, { "epoch": 1.09, "learning_rate": 4.431867353610026e-05, "loss": 4.0534, "step": 8331000 }, { "epoch": 1.09, "learning_rate": 4.431801833644983e-05, "loss": 4.0748, "step": 8331500 }, { "epoch": 1.1, "learning_rate": 4.431736310386486e-05, "loss": 4.0643, "step": 8332000 }, { "epoch": 1.1, "learning_rate": 4.431670783834647e-05, "loss": 4.0658, "step": 8332500 }, { "epoch": 1.1, "learning_rate": 4.431605253989578e-05, "loss": 4.0618, "step": 8333000 }, { "epoch": 1.1, "learning_rate": 4.431539720851391e-05, "loss": 4.0434, "step": 8333500 }, { "epoch": 1.1, "learning_rate": 4.431474184420196e-05, "loss": 4.0638, "step": 8334000 }, { "epoch": 1.1, "learning_rate": 4.431408644696107e-05, "loss": 4.0475, "step": 8334500 }, { "epoch": 1.1, "learning_rate": 4.431343101679234e-05, "loss": 4.0613, "step": 8335000 }, { "epoch": 1.1, "learning_rate": 4.43127755536969e-05, "loss": 4.067, "step": 8335500 }, { "epoch": 1.1, "learning_rate": 4.431212005767586e-05, "loss": 4.089, "step": 8336000 }, { "epoch": 1.1, "learning_rate": 4.431146452873034e-05, "loss": 4.0638, "step": 8336500 }, { "epoch": 1.1, "learning_rate": 4.431080896686145e-05, "loss": 4.0669, "step": 8337000 }, { "epoch": 1.1, "learning_rate": 4.431015337207032e-05, "loss": 4.0628, "step": 8337500 }, { "epoch": 1.1, "learning_rate": 4.4309497744358065e-05, "loss": 4.0771, "step": 8338000 }, { "epoch": 1.1, "learning_rate": 4.4308842083725785e-05, "loss": 4.0556, "step": 8338500 }, { "epoch": 1.1, "learning_rate": 4.4308186390174626e-05, "loss": 4.0672, "step": 8339000 }, { "epoch": 1.1, "learning_rate": 4.4307530663705694e-05, "loss": 4.0406, "step": 8339500 }, { "epoch": 1.1, "learning_rate": 4.43068749043201e-05, "loss": 4.0707, "step": 8340000 }, { "epoch": 1.1, "learning_rate": 4.430621911201897e-05, "loss": 4.0715, "step": 8340500 }, { "epoch": 1.1, "learning_rate": 4.430556328680342e-05, "loss": 4.0723, "step": 8341000 }, { "epoch": 1.1, "learning_rate": 4.430490742867456e-05, "loss": 4.0499, "step": 8341500 }, { "epoch": 1.1, "learning_rate": 4.4304251537633524e-05, "loss": 4.0753, "step": 8342000 }, { "epoch": 1.1, "learning_rate": 4.4303595613681415e-05, "loss": 4.0638, "step": 8342500 }, { "epoch": 1.1, "learning_rate": 4.4302939656819365e-05, "loss": 4.0487, "step": 8343000 }, { "epoch": 1.1, "learning_rate": 4.4302283667048486e-05, "loss": 4.0545, "step": 8343500 }, { "epoch": 1.1, "learning_rate": 4.430162764436989e-05, "loss": 4.0545, "step": 8344000 }, { "epoch": 1.1, "learning_rate": 4.43009715887847e-05, "loss": 4.0485, "step": 8344500 }, { "epoch": 1.1, "learning_rate": 4.430031550029404e-05, "loss": 4.083, "step": 8345000 }, { "epoch": 1.1, "learning_rate": 4.429965937889902e-05, "loss": 4.0525, "step": 8345500 }, { "epoch": 1.1, "learning_rate": 4.4299003224600766e-05, "loss": 4.0537, "step": 8346000 }, { "epoch": 1.1, "learning_rate": 4.42983470374004e-05, "loss": 4.0351, "step": 8346500 }, { "epoch": 1.1, "learning_rate": 4.429769081729903e-05, "loss": 4.0731, "step": 8347000 }, { "epoch": 1.1, "learning_rate": 4.4297034564297774e-05, "loss": 4.0593, "step": 8347500 }, { "epoch": 1.1, "learning_rate": 4.429637827839775e-05, "loss": 4.0533, "step": 8348000 }, { "epoch": 1.1, "learning_rate": 4.42957219596001e-05, "loss": 4.0674, "step": 8348500 }, { "epoch": 1.1, "learning_rate": 4.429506560790592e-05, "loss": 4.04, "step": 8349000 }, { "epoch": 1.1, "learning_rate": 4.4294409223316325e-05, "loss": 4.0644, "step": 8349500 }, { "epoch": 1.1, "learning_rate": 4.429375280583245e-05, "loss": 4.0298, "step": 8350000 }, { "epoch": 1.1, "learning_rate": 4.429309635545541e-05, "loss": 4.0459, "step": 8350500 }, { "epoch": 1.1, "learning_rate": 4.429243987218632e-05, "loss": 4.0629, "step": 8351000 }, { "epoch": 1.1, "learning_rate": 4.429178335602629e-05, "loss": 4.0626, "step": 8351500 }, { "epoch": 1.1, "learning_rate": 4.4291126806976466e-05, "loss": 4.0482, "step": 8352000 }, { "epoch": 1.1, "learning_rate": 4.429047022503795e-05, "loss": 4.0492, "step": 8352500 }, { "epoch": 1.1, "learning_rate": 4.428981361021186e-05, "loss": 4.0696, "step": 8353000 }, { "epoch": 1.1, "learning_rate": 4.4289156962499316e-05, "loss": 4.0571, "step": 8353500 }, { "epoch": 1.1, "learning_rate": 4.428850028190144e-05, "loss": 4.0508, "step": 8354000 }, { "epoch": 1.1, "learning_rate": 4.428784356841936e-05, "loss": 4.0606, "step": 8354500 }, { "epoch": 1.1, "learning_rate": 4.4287186822054176e-05, "loss": 4.0473, "step": 8355000 }, { "epoch": 1.1, "learning_rate": 4.428653004280703e-05, "loss": 4.056, "step": 8355500 }, { "epoch": 1.1, "learning_rate": 4.4285873230679023e-05, "loss": 4.0409, "step": 8356000 }, { "epoch": 1.1, "learning_rate": 4.4285216385671277e-05, "loss": 4.0446, "step": 8356500 }, { "epoch": 1.1, "learning_rate": 4.428455950778493e-05, "loss": 4.0449, "step": 8357000 }, { "epoch": 1.1, "learning_rate": 4.428390259702108e-05, "loss": 4.0399, "step": 8357500 }, { "epoch": 1.1, "learning_rate": 4.428324565338086e-05, "loss": 4.0538, "step": 8358000 }, { "epoch": 1.1, "learning_rate": 4.4282588676865386e-05, "loss": 4.0645, "step": 8358500 }, { "epoch": 1.1, "learning_rate": 4.428193166747577e-05, "loss": 4.0245, "step": 8359000 }, { "epoch": 1.1, "learning_rate": 4.428127462521315e-05, "loss": 4.0333, "step": 8359500 }, { "epoch": 1.1, "learning_rate": 4.4280617550078636e-05, "loss": 4.0613, "step": 8360000 }, { "epoch": 1.1, "learning_rate": 4.427996044207334e-05, "loss": 4.042, "step": 8360500 }, { "epoch": 1.1, "learning_rate": 4.42793033011984e-05, "loss": 4.0665, "step": 8361000 }, { "epoch": 1.1, "learning_rate": 4.4278646127454924e-05, "loss": 4.0506, "step": 8361500 }, { "epoch": 1.1, "learning_rate": 4.427798892084404e-05, "loss": 4.0404, "step": 8362000 }, { "epoch": 1.1, "learning_rate": 4.427733168136685e-05, "loss": 4.0574, "step": 8362500 }, { "epoch": 1.1, "learning_rate": 4.42766744090245e-05, "loss": 4.0629, "step": 8363000 }, { "epoch": 1.1, "learning_rate": 4.42760171038181e-05, "loss": 4.0507, "step": 8363500 }, { "epoch": 1.1, "learning_rate": 4.4275359765748766e-05, "loss": 4.0508, "step": 8364000 }, { "epoch": 1.1, "learning_rate": 4.427470239481762e-05, "loss": 4.0421, "step": 8364500 }, { "epoch": 1.1, "learning_rate": 4.427404499102579e-05, "loss": 4.0299, "step": 8365000 }, { "epoch": 1.1, "learning_rate": 4.427338755437438e-05, "loss": 4.0512, "step": 8365500 }, { "epoch": 1.1, "learning_rate": 4.427273008486453e-05, "loss": 4.0497, "step": 8366000 }, { "epoch": 1.1, "learning_rate": 4.427207258249736e-05, "loss": 4.0578, "step": 8366500 }, { "epoch": 1.1, "learning_rate": 4.427141504727398e-05, "loss": 4.0455, "step": 8367000 }, { "epoch": 1.1, "learning_rate": 4.427075747919552e-05, "loss": 4.0398, "step": 8367500 }, { "epoch": 1.1, "learning_rate": 4.427009987826309e-05, "loss": 4.0651, "step": 8368000 }, { "epoch": 1.1, "learning_rate": 4.426944224447782e-05, "loss": 4.0536, "step": 8368500 }, { "epoch": 1.1, "learning_rate": 4.426878457784083e-05, "loss": 4.0574, "step": 8369000 }, { "epoch": 1.1, "learning_rate": 4.426812687835323e-05, "loss": 4.0248, "step": 8369500 }, { "epoch": 1.1, "learning_rate": 4.426746914601616e-05, "loss": 4.0348, "step": 8370000 }, { "epoch": 1.1, "learning_rate": 4.4266811380830734e-05, "loss": 4.0382, "step": 8370500 }, { "epoch": 1.1, "learning_rate": 4.4266153582798076e-05, "loss": 4.0661, "step": 8371000 }, { "epoch": 1.1, "learning_rate": 4.42654957519193e-05, "loss": 4.0479, "step": 8371500 }, { "epoch": 1.1, "learning_rate": 4.426483788819552e-05, "loss": 4.0218, "step": 8372000 }, { "epoch": 1.1, "learning_rate": 4.4264179991627886e-05, "loss": 4.0576, "step": 8372500 }, { "epoch": 1.1, "learning_rate": 4.426352206221749e-05, "loss": 4.0534, "step": 8373000 }, { "epoch": 1.1, "learning_rate": 4.4262864099965474e-05, "loss": 4.0347, "step": 8373500 }, { "epoch": 1.1, "learning_rate": 4.4262206104872946e-05, "loss": 4.0626, "step": 8374000 }, { "epoch": 1.1, "learning_rate": 4.426154807694103e-05, "loss": 4.0475, "step": 8374500 }, { "epoch": 1.1, "learning_rate": 4.4260890016170856e-05, "loss": 4.0622, "step": 8375000 }, { "epoch": 1.1, "learning_rate": 4.4260231922563544e-05, "loss": 4.058, "step": 8375500 }, { "epoch": 1.1, "learning_rate": 4.4259573796120214e-05, "loss": 4.0512, "step": 8376000 }, { "epoch": 1.1, "learning_rate": 4.425891563684198e-05, "loss": 4.0553, "step": 8376500 }, { "epoch": 1.1, "learning_rate": 4.425825744472998e-05, "loss": 4.0388, "step": 8377000 }, { "epoch": 1.1, "learning_rate": 4.4257599219785316e-05, "loss": 4.0526, "step": 8377500 }, { "epoch": 1.1, "learning_rate": 4.425694096200913e-05, "loss": 4.047, "step": 8378000 }, { "epoch": 1.1, "learning_rate": 4.425628267140254e-05, "loss": 4.0462, "step": 8378500 }, { "epoch": 1.1, "learning_rate": 4.425562434796665e-05, "loss": 4.0416, "step": 8379000 }, { "epoch": 1.1, "learning_rate": 4.425496599170261e-05, "loss": 4.0342, "step": 8379500 }, { "epoch": 1.1, "learning_rate": 4.425430760261152e-05, "loss": 4.0301, "step": 8380000 }, { "epoch": 1.1, "learning_rate": 4.4253649180694516e-05, "loss": 4.0588, "step": 8380500 }, { "epoch": 1.1, "learning_rate": 4.4252990725952715e-05, "loss": 4.0368, "step": 8381000 }, { "epoch": 1.1, "learning_rate": 4.425233223838724e-05, "loss": 4.0364, "step": 8381500 }, { "epoch": 1.1, "learning_rate": 4.4251673717999216e-05, "loss": 4.0199, "step": 8382000 }, { "epoch": 1.1, "learning_rate": 4.425101516478975e-05, "loss": 4.0468, "step": 8382500 }, { "epoch": 1.1, "learning_rate": 4.425035657876e-05, "loss": 4.0419, "step": 8383000 }, { "epoch": 1.1, "learning_rate": 4.4249697959911054e-05, "loss": 4.0331, "step": 8383500 }, { "epoch": 1.1, "learning_rate": 4.4249039308244054e-05, "loss": 4.0523, "step": 8384000 }, { "epoch": 1.1, "learning_rate": 4.424838062376012e-05, "loss": 4.0468, "step": 8384500 }, { "epoch": 1.1, "learning_rate": 4.424772190646036e-05, "loss": 4.0308, "step": 8385000 }, { "epoch": 1.1, "learning_rate": 4.4247063156345915e-05, "loss": 4.042, "step": 8385500 }, { "epoch": 1.1, "learning_rate": 4.4246404373417905e-05, "loss": 4.0316, "step": 8386000 }, { "epoch": 1.1, "learning_rate": 4.4245745557677445e-05, "loss": 4.053, "step": 8386500 }, { "epoch": 1.1, "learning_rate": 4.4245086709125666e-05, "loss": 4.0413, "step": 8387000 }, { "epoch": 1.1, "learning_rate": 4.424442782776369e-05, "loss": 4.038, "step": 8387500 }, { "epoch": 1.1, "learning_rate": 4.424376891359264e-05, "loss": 4.0379, "step": 8388000 }, { "epoch": 1.1, "learning_rate": 4.424310996661363e-05, "loss": 4.0296, "step": 8388500 }, { "epoch": 1.1, "learning_rate": 4.42424509868278e-05, "loss": 4.0518, "step": 8389000 }, { "epoch": 1.1, "learning_rate": 4.424179197423627e-05, "loss": 4.0423, "step": 8389500 }, { "epoch": 1.1, "learning_rate": 4.424113292884015e-05, "loss": 4.0356, "step": 8390000 }, { "epoch": 1.1, "learning_rate": 4.424047385064057e-05, "loss": 4.0207, "step": 8390500 }, { "epoch": 1.1, "learning_rate": 4.4239814739638664e-05, "loss": 4.0333, "step": 8391000 }, { "epoch": 1.1, "learning_rate": 4.4239155595835533e-05, "loss": 4.046, "step": 8391500 }, { "epoch": 1.1, "learning_rate": 4.423849641923233e-05, "loss": 4.0309, "step": 8392000 }, { "epoch": 1.1, "learning_rate": 4.4237837209830166e-05, "loss": 4.0318, "step": 8392500 }, { "epoch": 1.1, "learning_rate": 4.423717796763015e-05, "loss": 4.0432, "step": 8393000 }, { "epoch": 1.1, "learning_rate": 4.423651869263343e-05, "loss": 4.0458, "step": 8393500 }, { "epoch": 1.1, "learning_rate": 4.4235859384841114e-05, "loss": 4.0588, "step": 8394000 }, { "epoch": 1.1, "learning_rate": 4.4235200044254334e-05, "loss": 4.044, "step": 8394500 }, { "epoch": 1.1, "learning_rate": 4.4234540670874205e-05, "loss": 4.0382, "step": 8395000 }, { "epoch": 1.1, "learning_rate": 4.423388126470187e-05, "loss": 4.0365, "step": 8395500 }, { "epoch": 1.1, "learning_rate": 4.423322182573842e-05, "loss": 4.0423, "step": 8396000 }, { "epoch": 1.1, "learning_rate": 4.423256235398502e-05, "loss": 4.0239, "step": 8396500 }, { "epoch": 1.1, "learning_rate": 4.423190284944276e-05, "loss": 4.0683, "step": 8397000 }, { "epoch": 1.1, "learning_rate": 4.4231243312112785e-05, "loss": 4.0509, "step": 8397500 }, { "epoch": 1.1, "learning_rate": 4.423058374199621e-05, "loss": 4.0236, "step": 8398000 }, { "epoch": 1.1, "learning_rate": 4.422992413909417e-05, "loss": 4.0415, "step": 8398500 }, { "epoch": 1.1, "learning_rate": 4.4229264503407776e-05, "loss": 4.0213, "step": 8399000 }, { "epoch": 1.1, "learning_rate": 4.4228604834938146e-05, "loss": 4.035, "step": 8399500 }, { "epoch": 1.1, "learning_rate": 4.422794513368643e-05, "loss": 4.0323, "step": 8400000 }, { "epoch": 1.1, "learning_rate": 4.422728539965374e-05, "loss": 4.0373, "step": 8400500 }, { "epoch": 1.1, "learning_rate": 4.42266256328412e-05, "loss": 4.0082, "step": 8401000 }, { "epoch": 1.1, "learning_rate": 4.422596583324993e-05, "loss": 4.047, "step": 8401500 }, { "epoch": 1.1, "learning_rate": 4.4225306000881065e-05, "loss": 4.0258, "step": 8402000 }, { "epoch": 1.1, "learning_rate": 4.4224646135735725e-05, "loss": 4.0366, "step": 8402500 }, { "epoch": 1.1, "learning_rate": 4.422398623781503e-05, "loss": 4.0512, "step": 8403000 }, { "epoch": 1.1, "learning_rate": 4.422332630712012e-05, "loss": 4.0398, "step": 8403500 }, { "epoch": 1.1, "learning_rate": 4.4222666343652095e-05, "loss": 4.0331, "step": 8404000 }, { "epoch": 1.1, "learning_rate": 4.4222006347412116e-05, "loss": 4.0338, "step": 8404500 }, { "epoch": 1.1, "learning_rate": 4.4221346318401266e-05, "loss": 4.0332, "step": 8405000 }, { "epoch": 1.1, "learning_rate": 4.42206862566207e-05, "loss": 4.0509, "step": 8405500 }, { "epoch": 1.1, "learning_rate": 4.422002616207154e-05, "loss": 4.037, "step": 8406000 }, { "epoch": 1.1, "learning_rate": 4.4219366034754906e-05, "loss": 4.0033, "step": 8406500 }, { "epoch": 1.1, "learning_rate": 4.4218705874671915e-05, "loss": 4.0139, "step": 8407000 }, { "epoch": 1.1, "learning_rate": 4.42180456818237e-05, "loss": 4.0488, "step": 8407500 }, { "epoch": 1.11, "learning_rate": 4.4217385456211403e-05, "loss": 4.0341, "step": 8408000 }, { "epoch": 1.11, "learning_rate": 4.421672519783613e-05, "loss": 4.0345, "step": 8408500 }, { "epoch": 1.11, "learning_rate": 4.4216064906699005e-05, "loss": 4.0228, "step": 8409000 }, { "epoch": 1.11, "learning_rate": 4.421540458280116e-05, "loss": 4.0475, "step": 8409500 }, { "epoch": 1.11, "learning_rate": 4.4214744226143725e-05, "loss": 4.0177, "step": 8410000 }, { "epoch": 1.11, "learning_rate": 4.4214083836727817e-05, "loss": 4.0192, "step": 8410500 }, { "epoch": 1.11, "learning_rate": 4.421342341455457e-05, "loss": 4.0186, "step": 8411000 }, { "epoch": 1.11, "learning_rate": 4.42127629596251e-05, "loss": 4.0087, "step": 8411500 }, { "epoch": 1.11, "learning_rate": 4.421210247194055e-05, "loss": 4.0494, "step": 8412000 }, { "epoch": 1.11, "learning_rate": 4.421144195150203e-05, "loss": 4.0446, "step": 8412500 }, { "epoch": 1.11, "learning_rate": 4.4210781398310665e-05, "loss": 4.0303, "step": 8413000 }, { "epoch": 1.11, "learning_rate": 4.4210120812367595e-05, "loss": 4.0094, "step": 8413500 }, { "epoch": 1.11, "learning_rate": 4.4209460193673936e-05, "loss": 4.0433, "step": 8414000 }, { "epoch": 1.11, "learning_rate": 4.420879954223082e-05, "loss": 4.0177, "step": 8414500 }, { "epoch": 1.11, "learning_rate": 4.420813885803936e-05, "loss": 4.043, "step": 8415000 }, { "epoch": 1.11, "learning_rate": 4.42074781411007e-05, "loss": 4.0275, "step": 8415500 }, { "epoch": 1.11, "learning_rate": 4.420681739141596e-05, "loss": 4.0168, "step": 8416000 }, { "epoch": 1.11, "learning_rate": 4.420615660898626e-05, "loss": 4.0269, "step": 8416500 }, { "epoch": 1.11, "learning_rate": 4.420549579381274e-05, "loss": 4.0325, "step": 8417000 }, { "epoch": 1.11, "learning_rate": 4.420483494589651e-05, "loss": 4.0188, "step": 8417500 }, { "epoch": 1.11, "learning_rate": 4.420417406523871e-05, "loss": 4.027, "step": 8418000 }, { "epoch": 1.11, "learning_rate": 4.420351315184047e-05, "loss": 4.0325, "step": 8418500 }, { "epoch": 1.11, "learning_rate": 4.42028522057029e-05, "loss": 4.0337, "step": 8419000 }, { "epoch": 1.11, "learning_rate": 4.420219122682714e-05, "loss": 4.0555, "step": 8419500 }, { "epoch": 1.11, "learning_rate": 4.42015302152143e-05, "loss": 4.0359, "step": 8420000 }, { "epoch": 1.11, "learning_rate": 4.420086917086553e-05, "loss": 4.0212, "step": 8420500 }, { "epoch": 1.11, "learning_rate": 4.420020809378195e-05, "loss": 4.024, "step": 8421000 }, { "epoch": 1.11, "learning_rate": 4.419954698396468e-05, "loss": 4.0351, "step": 8421500 }, { "epoch": 1.11, "learning_rate": 4.419888584141485e-05, "loss": 4.0235, "step": 8422000 }, { "epoch": 1.11, "learning_rate": 4.419822466613358e-05, "loss": 4.0274, "step": 8422500 }, { "epoch": 1.11, "learning_rate": 4.4197563458122015e-05, "loss": 4.0073, "step": 8423000 }, { "epoch": 1.11, "learning_rate": 4.4196902217381264e-05, "loss": 4.028, "step": 8423500 }, { "epoch": 1.11, "learning_rate": 4.419624094391247e-05, "loss": 4.0487, "step": 8424000 }, { "epoch": 1.11, "learning_rate": 4.419557963771675e-05, "loss": 4.034, "step": 8424500 }, { "epoch": 1.11, "learning_rate": 4.419491829879524e-05, "loss": 4.017, "step": 8425000 }, { "epoch": 1.11, "learning_rate": 4.4194256927149055e-05, "loss": 4.0268, "step": 8425500 }, { "epoch": 1.11, "learning_rate": 4.419359552277933e-05, "loss": 4.0447, "step": 8426000 }, { "epoch": 1.11, "learning_rate": 4.419293408568719e-05, "loss": 4.0375, "step": 8426500 }, { "epoch": 1.11, "learning_rate": 4.419227261587377e-05, "loss": 4.0254, "step": 8427000 }, { "epoch": 1.11, "learning_rate": 4.419161111334019e-05, "loss": 4.0191, "step": 8427500 }, { "epoch": 1.11, "learning_rate": 4.4190949578087584e-05, "loss": 4.0155, "step": 8428000 }, { "epoch": 1.11, "learning_rate": 4.4190288010117066e-05, "loss": 4.0207, "step": 8428500 }, { "epoch": 1.11, "learning_rate": 4.418962640942977e-05, "loss": 4.0252, "step": 8429000 }, { "epoch": 1.11, "learning_rate": 4.418896477602684e-05, "loss": 4.0195, "step": 8429500 }, { "epoch": 1.11, "learning_rate": 4.4188303109909396e-05, "loss": 4.0314, "step": 8430000 }, { "epoch": 1.11, "learning_rate": 4.418764141107855e-05, "loss": 4.0404, "step": 8430500 }, { "epoch": 1.11, "learning_rate": 4.418697967953545e-05, "loss": 4.0426, "step": 8431000 }, { "epoch": 1.11, "learning_rate": 4.418631791528122e-05, "loss": 4.0608, "step": 8431500 }, { "epoch": 1.11, "learning_rate": 4.418565611831697e-05, "loss": 4.0522, "step": 8432000 }, { "epoch": 1.11, "learning_rate": 4.418499428864385e-05, "loss": 4.0392, "step": 8432500 }, { "epoch": 1.11, "learning_rate": 4.418433242626298e-05, "loss": 4.0357, "step": 8433000 }, { "epoch": 1.11, "learning_rate": 4.418367053117549e-05, "loss": 4.0203, "step": 8433500 }, { "epoch": 1.11, "learning_rate": 4.4183008603382506e-05, "loss": 4.0461, "step": 8434000 }, { "epoch": 1.11, "learning_rate": 4.4182346642885164e-05, "loss": 4.0218, "step": 8434500 }, { "epoch": 1.11, "learning_rate": 4.418168464968458e-05, "loss": 4.026, "step": 8435000 }, { "epoch": 1.11, "learning_rate": 4.4181022623781886e-05, "loss": 4.0377, "step": 8435500 }, { "epoch": 1.11, "learning_rate": 4.4180360565178226e-05, "loss": 4.024, "step": 8436000 }, { "epoch": 1.11, "learning_rate": 4.417969847387471e-05, "loss": 4.0286, "step": 8436500 }, { "epoch": 1.11, "learning_rate": 4.417903634987247e-05, "loss": 4.0377, "step": 8437000 }, { "epoch": 1.11, "learning_rate": 4.4178374193172646e-05, "loss": 4.0328, "step": 8437500 }, { "epoch": 1.11, "learning_rate": 4.417771200377636e-05, "loss": 4.0229, "step": 8438000 }, { "epoch": 1.11, "learning_rate": 4.417704978168473e-05, "loss": 3.9991, "step": 8438500 }, { "epoch": 1.11, "learning_rate": 4.41763875268989e-05, "loss": 4.0287, "step": 8439000 }, { "epoch": 1.11, "learning_rate": 4.4175725239419995e-05, "loss": 4.03, "step": 8439500 }, { "epoch": 1.11, "learning_rate": 4.4175062919249146e-05, "loss": 4.0068, "step": 8440000 }, { "epoch": 1.11, "learning_rate": 4.4174400566387474e-05, "loss": 4.0088, "step": 8440500 }, { "epoch": 1.11, "learning_rate": 4.4173738180836124e-05, "loss": 4.0229, "step": 8441000 }, { "epoch": 1.11, "learning_rate": 4.417307576259621e-05, "loss": 4.047, "step": 8441500 }, { "epoch": 1.11, "learning_rate": 4.4172413311668856e-05, "loss": 4.0335, "step": 8442000 }, { "epoch": 1.11, "learning_rate": 4.4171750828055214e-05, "loss": 4.0019, "step": 8442500 }, { "epoch": 1.11, "learning_rate": 4.4171088311756394e-05, "loss": 4.0153, "step": 8443000 }, { "epoch": 1.11, "learning_rate": 4.417042576277354e-05, "loss": 4.0127, "step": 8443500 }, { "epoch": 1.11, "learning_rate": 4.416976318110776e-05, "loss": 4.0468, "step": 8444000 }, { "epoch": 1.11, "learning_rate": 4.416910056676021e-05, "loss": 4.0376, "step": 8444500 }, { "epoch": 1.11, "learning_rate": 4.4168437919732006e-05, "loss": 4.0275, "step": 8445000 }, { "epoch": 1.11, "learning_rate": 4.416777524002428e-05, "loss": 4.0275, "step": 8445500 }, { "epoch": 1.11, "learning_rate": 4.416711252763815e-05, "loss": 4.0137, "step": 8446000 }, { "epoch": 1.11, "learning_rate": 4.416644978257477e-05, "loss": 4.0229, "step": 8446500 }, { "epoch": 1.11, "learning_rate": 4.416578700483526e-05, "loss": 4.0216, "step": 8447000 }, { "epoch": 1.11, "learning_rate": 4.4165124194420733e-05, "loss": 4.0161, "step": 8447500 }, { "epoch": 1.11, "learning_rate": 4.416446135133234e-05, "loss": 4.0408, "step": 8448000 }, { "epoch": 1.11, "learning_rate": 4.41637984755712e-05, "loss": 4.0432, "step": 8448500 }, { "epoch": 1.11, "learning_rate": 4.4163135567138445e-05, "loss": 4.0181, "step": 8449000 }, { "epoch": 1.11, "learning_rate": 4.4162472626035214e-05, "loss": 4.0137, "step": 8449500 }, { "epoch": 1.11, "learning_rate": 4.416180965226263e-05, "loss": 4.0309, "step": 8450000 }, { "epoch": 1.11, "learning_rate": 4.4161146645821815e-05, "loss": 4.0187, "step": 8450500 }, { "epoch": 1.11, "learning_rate": 4.4160483606713924e-05, "loss": 4.0313, "step": 8451000 }, { "epoch": 1.11, "learning_rate": 4.415982053494006e-05, "loss": 4.0165, "step": 8451500 }, { "epoch": 1.11, "learning_rate": 4.415915743050136e-05, "loss": 4.0201, "step": 8452000 }, { "epoch": 1.11, "learning_rate": 4.415849429339897e-05, "loss": 4.0262, "step": 8452500 }, { "epoch": 1.11, "learning_rate": 4.4157831123634e-05, "loss": 4.0245, "step": 8453000 }, { "epoch": 1.11, "learning_rate": 4.415716792120759e-05, "loss": 4.0427, "step": 8453500 }, { "epoch": 1.11, "learning_rate": 4.415650468612088e-05, "loss": 4.0236, "step": 8454000 }, { "epoch": 1.11, "learning_rate": 4.415584141837499e-05, "loss": 4.0243, "step": 8454500 }, { "epoch": 1.11, "learning_rate": 4.415517811797104e-05, "loss": 4.0353, "step": 8455000 }, { "epoch": 1.11, "learning_rate": 4.4154514784910186e-05, "loss": 4.0222, "step": 8455500 }, { "epoch": 1.11, "learning_rate": 4.415385141919354e-05, "loss": 4.0406, "step": 8456000 }, { "epoch": 1.11, "learning_rate": 4.415318802082224e-05, "loss": 4.0091, "step": 8456500 }, { "epoch": 1.11, "learning_rate": 4.415252458979743e-05, "loss": 4.0207, "step": 8457000 }, { "epoch": 1.11, "learning_rate": 4.415186112612021e-05, "loss": 4.026, "step": 8457500 }, { "epoch": 1.11, "learning_rate": 4.4151197629791734e-05, "loss": 4.0182, "step": 8458000 }, { "epoch": 1.11, "learning_rate": 4.4150534100813126e-05, "loss": 4.0169, "step": 8458500 }, { "epoch": 1.11, "learning_rate": 4.4149870539185524e-05, "loss": 4.0219, "step": 8459000 }, { "epoch": 1.11, "learning_rate": 4.414920694491005e-05, "loss": 3.9987, "step": 8459500 }, { "epoch": 1.11, "learning_rate": 4.4148543317987834e-05, "loss": 4.0043, "step": 8460000 }, { "epoch": 1.11, "learning_rate": 4.414787965842002e-05, "loss": 4.0165, "step": 8460500 }, { "epoch": 1.11, "learning_rate": 4.4147215966207736e-05, "loss": 4.0004, "step": 8461000 }, { "epoch": 1.11, "learning_rate": 4.41465522413521e-05, "loss": 4.0011, "step": 8461500 }, { "epoch": 1.11, "learning_rate": 4.414588848385425e-05, "loss": 4.0268, "step": 8462000 }, { "epoch": 1.11, "learning_rate": 4.414522469371534e-05, "loss": 4.0225, "step": 8462500 }, { "epoch": 1.11, "learning_rate": 4.414456087093646e-05, "loss": 4.0323, "step": 8463000 }, { "epoch": 1.11, "learning_rate": 4.4143897015518776e-05, "loss": 4.0256, "step": 8463500 }, { "epoch": 1.11, "learning_rate": 4.414323312746341e-05, "loss": 4.0217, "step": 8464000 }, { "epoch": 1.11, "learning_rate": 4.414256920677149e-05, "loss": 4.0251, "step": 8464500 }, { "epoch": 1.11, "learning_rate": 4.414190525344415e-05, "loss": 4.016, "step": 8465000 }, { "epoch": 1.11, "learning_rate": 4.414124126748251e-05, "loss": 4.0521, "step": 8465500 }, { "epoch": 1.11, "learning_rate": 4.414057724888773e-05, "loss": 4.0315, "step": 8466000 }, { "epoch": 1.11, "learning_rate": 4.413991319766092e-05, "loss": 4.0078, "step": 8466500 }, { "epoch": 1.11, "learning_rate": 4.413924911380321e-05, "loss": 4.022, "step": 8467000 }, { "epoch": 1.11, "learning_rate": 4.413858499731575e-05, "loss": 4.0239, "step": 8467500 }, { "epoch": 1.11, "learning_rate": 4.413792084819967e-05, "loss": 4.0186, "step": 8468000 }, { "epoch": 1.11, "learning_rate": 4.4137256666456075e-05, "loss": 4.0302, "step": 8468500 }, { "epoch": 1.11, "learning_rate": 4.413659245208613e-05, "loss": 4.0022, "step": 8469000 }, { "epoch": 1.11, "learning_rate": 4.413592820509095e-05, "loss": 4.0075, "step": 8469500 }, { "epoch": 1.11, "learning_rate": 4.413526392547167e-05, "loss": 4.0303, "step": 8470000 }, { "epoch": 1.11, "learning_rate": 4.4134599613229424e-05, "loss": 4.0231, "step": 8470500 }, { "epoch": 1.11, "learning_rate": 4.4133935268365344e-05, "loss": 4.0003, "step": 8471000 }, { "epoch": 1.11, "learning_rate": 4.4133270890880574e-05, "loss": 4.021, "step": 8471500 }, { "epoch": 1.11, "learning_rate": 4.413260648077623e-05, "loss": 4.0161, "step": 8472000 }, { "epoch": 1.11, "learning_rate": 4.413194203805344e-05, "loss": 4.0049, "step": 8472500 }, { "epoch": 1.11, "learning_rate": 4.413127756271336e-05, "loss": 4.02, "step": 8473000 }, { "epoch": 1.11, "learning_rate": 4.4130613054757105e-05, "loss": 4.003, "step": 8473500 }, { "epoch": 1.11, "learning_rate": 4.412994851418582e-05, "loss": 4.0071, "step": 8474000 }, { "epoch": 1.11, "learning_rate": 4.4129283941000626e-05, "loss": 4.0442, "step": 8474500 }, { "epoch": 1.11, "learning_rate": 4.4128619335202665e-05, "loss": 4.0235, "step": 8475000 }, { "epoch": 1.11, "learning_rate": 4.412795469679306e-05, "loss": 4.0145, "step": 8475500 }, { "epoch": 1.11, "learning_rate": 4.4127290025772956e-05, "loss": 4.0175, "step": 8476000 }, { "epoch": 1.11, "learning_rate": 4.4126625322143476e-05, "loss": 4.005, "step": 8476500 }, { "epoch": 1.11, "learning_rate": 4.412596058590577e-05, "loss": 4.0082, "step": 8477000 }, { "epoch": 1.11, "learning_rate": 4.412529581706094e-05, "loss": 4.0201, "step": 8477500 }, { "epoch": 1.11, "learning_rate": 4.412463101561015e-05, "loss": 4.0171, "step": 8478000 }, { "epoch": 1.11, "learning_rate": 4.412396618155452e-05, "loss": 4.0097, "step": 8478500 }, { "epoch": 1.11, "learning_rate": 4.4123301314895185e-05, "loss": 4.0288, "step": 8479000 }, { "epoch": 1.11, "learning_rate": 4.4122636415633275e-05, "loss": 4.0192, "step": 8479500 }, { "epoch": 1.11, "learning_rate": 4.412197148376994e-05, "loss": 4.0098, "step": 8480000 }, { "epoch": 1.11, "learning_rate": 4.412130651930629e-05, "loss": 4.0242, "step": 8480500 }, { "epoch": 1.11, "learning_rate": 4.4120641522243474e-05, "loss": 4.0082, "step": 8481000 }, { "epoch": 1.11, "learning_rate": 4.411997649258262e-05, "loss": 4.0088, "step": 8481500 }, { "epoch": 1.11, "learning_rate": 4.411931143032486e-05, "loss": 4.0076, "step": 8482000 }, { "epoch": 1.11, "learning_rate": 4.4118646335471336e-05, "loss": 4.0222, "step": 8482500 }, { "epoch": 1.11, "learning_rate": 4.4117981208023175e-05, "loss": 4.0059, "step": 8483000 }, { "epoch": 1.11, "learning_rate": 4.411731604798151e-05, "loss": 3.9991, "step": 8483500 }, { "epoch": 1.12, "learning_rate": 4.411665085534748e-05, "loss": 4.0293, "step": 8484000 }, { "epoch": 1.12, "learning_rate": 4.411598563012222e-05, "loss": 4.0198, "step": 8484500 }, { "epoch": 1.12, "learning_rate": 4.4115320372306856e-05, "loss": 3.9984, "step": 8485000 }, { "epoch": 1.12, "learning_rate": 4.4114655081902526e-05, "loss": 3.9902, "step": 8485500 }, { "epoch": 1.12, "learning_rate": 4.411398975891037e-05, "loss": 4.0228, "step": 8486000 }, { "epoch": 1.12, "learning_rate": 4.4113324403331525e-05, "loss": 4.0272, "step": 8486500 }, { "epoch": 1.12, "learning_rate": 4.4112659015167104e-05, "loss": 4.0137, "step": 8487000 }, { "epoch": 1.12, "learning_rate": 4.411199359441826e-05, "loss": 4.0107, "step": 8487500 }, { "epoch": 1.12, "learning_rate": 4.4111328141086126e-05, "loss": 4.0102, "step": 8488000 }, { "epoch": 1.12, "learning_rate": 4.411066265517183e-05, "loss": 4.0096, "step": 8488500 }, { "epoch": 1.12, "learning_rate": 4.410999713667652e-05, "loss": 4.0064, "step": 8489000 }, { "epoch": 1.12, "learning_rate": 4.41093315856013e-05, "loss": 4.0134, "step": 8489500 }, { "epoch": 1.12, "learning_rate": 4.410866600194734e-05, "loss": 4.0394, "step": 8490000 }, { "epoch": 1.12, "learning_rate": 4.410800038571576e-05, "loss": 4.0248, "step": 8490500 }, { "epoch": 1.12, "learning_rate": 4.410733473690769e-05, "loss": 4.0038, "step": 8491000 }, { "epoch": 1.12, "learning_rate": 4.4106669055524274e-05, "loss": 4.025, "step": 8491500 }, { "epoch": 1.12, "learning_rate": 4.4106003341566635e-05, "loss": 4.0145, "step": 8492000 }, { "epoch": 1.12, "learning_rate": 4.410533759503593e-05, "loss": 4.0113, "step": 8492500 }, { "epoch": 1.12, "learning_rate": 4.4104671815933264e-05, "loss": 3.9919, "step": 8493000 }, { "epoch": 1.12, "learning_rate": 4.4104006004259796e-05, "loss": 4.0132, "step": 8493500 }, { "epoch": 1.12, "learning_rate": 4.410334016001665e-05, "loss": 3.9847, "step": 8494000 }, { "epoch": 1.12, "learning_rate": 4.410267428320497e-05, "loss": 4.0119, "step": 8494500 }, { "epoch": 1.12, "learning_rate": 4.4102008373825876e-05, "loss": 3.9958, "step": 8495000 }, { "epoch": 1.12, "learning_rate": 4.4101342431880516e-05, "loss": 4.0252, "step": 8495500 }, { "epoch": 1.12, "learning_rate": 4.410067645737001e-05, "loss": 4.0245, "step": 8496000 }, { "epoch": 1.12, "learning_rate": 4.4100010450295524e-05, "loss": 4.0238, "step": 8496500 }, { "epoch": 1.12, "learning_rate": 4.4099344410658175e-05, "loss": 4.0035, "step": 8497000 }, { "epoch": 1.12, "learning_rate": 4.409867833845909e-05, "loss": 4.0088, "step": 8497500 }, { "epoch": 1.12, "learning_rate": 4.40980122336994e-05, "loss": 3.9887, "step": 8498000 }, { "epoch": 1.12, "learning_rate": 4.4097346096380275e-05, "loss": 4.011, "step": 8498500 }, { "epoch": 1.12, "learning_rate": 4.4096679926502815e-05, "loss": 3.9817, "step": 8499000 }, { "epoch": 1.12, "learning_rate": 4.409601372406817e-05, "loss": 3.9932, "step": 8499500 }, { "epoch": 1.12, "learning_rate": 4.409534748907749e-05, "loss": 3.9895, "step": 8500000 }, { "epoch": 1.12, "learning_rate": 4.4094681221531884e-05, "loss": 4.0153, "step": 8500500 }, { "epoch": 1.12, "learning_rate": 4.40940149214325e-05, "loss": 3.9989, "step": 8501000 }, { "epoch": 1.12, "learning_rate": 4.4093348588780474e-05, "loss": 4.0128, "step": 8501500 }, { "epoch": 1.12, "learning_rate": 4.4092682223576945e-05, "loss": 4.0088, "step": 8502000 }, { "epoch": 1.12, "learning_rate": 4.4092015825823044e-05, "loss": 3.999, "step": 8502500 }, { "epoch": 1.12, "learning_rate": 4.409134939551991e-05, "loss": 4.0183, "step": 8503000 }, { "epoch": 1.12, "learning_rate": 4.409068293266868e-05, "loss": 4.0231, "step": 8503500 }, { "epoch": 1.12, "learning_rate": 4.409001643727049e-05, "loss": 3.9966, "step": 8504000 }, { "epoch": 1.12, "learning_rate": 4.408934990932647e-05, "loss": 4.003, "step": 8504500 }, { "epoch": 1.12, "learning_rate": 4.408868334883777e-05, "loss": 4.0012, "step": 8505000 }, { "epoch": 1.12, "learning_rate": 4.4088016755805506e-05, "loss": 3.9908, "step": 8505500 }, { "epoch": 1.12, "learning_rate": 4.408735013023083e-05, "loss": 4.0071, "step": 8506000 }, { "epoch": 1.12, "learning_rate": 4.408668347211487e-05, "loss": 4.0045, "step": 8506500 }, { "epoch": 1.12, "learning_rate": 4.408601678145878e-05, "loss": 3.9915, "step": 8507000 }, { "epoch": 1.12, "learning_rate": 4.4085350058263676e-05, "loss": 4.0206, "step": 8507500 }, { "epoch": 1.12, "learning_rate": 4.408468330253071e-05, "loss": 4.0096, "step": 8508000 }, { "epoch": 1.12, "learning_rate": 4.4084016514260994e-05, "loss": 4.0197, "step": 8508500 }, { "epoch": 1.12, "learning_rate": 4.4083349693455695e-05, "loss": 3.9946, "step": 8509000 }, { "epoch": 1.12, "learning_rate": 4.4082682840115935e-05, "loss": 3.9974, "step": 8509500 }, { "epoch": 1.12, "learning_rate": 4.408201595424285e-05, "loss": 4.0129, "step": 8510000 }, { "epoch": 1.12, "learning_rate": 4.408134903583758e-05, "loss": 3.9988, "step": 8510500 }, { "epoch": 1.12, "learning_rate": 4.408068208490127e-05, "loss": 3.9979, "step": 8511000 }, { "epoch": 1.12, "learning_rate": 4.4080015101435034e-05, "loss": 3.9991, "step": 8511500 }, { "epoch": 1.12, "learning_rate": 4.407934808544004e-05, "loss": 4.0185, "step": 8512000 }, { "epoch": 1.12, "learning_rate": 4.40786810369174e-05, "loss": 4.0116, "step": 8512500 }, { "epoch": 1.12, "learning_rate": 4.407801395586826e-05, "loss": 3.9901, "step": 8513000 }, { "epoch": 1.12, "learning_rate": 4.4077346842293755e-05, "loss": 4.0152, "step": 8513500 }, { "epoch": 1.12, "learning_rate": 4.407667969619503e-05, "loss": 4.0028, "step": 8514000 }, { "epoch": 1.12, "learning_rate": 4.407601251757322e-05, "loss": 4.0168, "step": 8514500 }, { "epoch": 1.12, "learning_rate": 4.407534530642945e-05, "loss": 3.9999, "step": 8515000 }, { "epoch": 1.12, "learning_rate": 4.407467806276488e-05, "loss": 4.0113, "step": 8515500 }, { "epoch": 1.12, "learning_rate": 4.407401078658062e-05, "loss": 3.9749, "step": 8516000 }, { "epoch": 1.12, "learning_rate": 4.407334347787783e-05, "loss": 3.9993, "step": 8516500 }, { "epoch": 1.12, "learning_rate": 4.4072676136657636e-05, "loss": 4.0107, "step": 8517000 }, { "epoch": 1.12, "learning_rate": 4.407200876292118e-05, "loss": 4.0146, "step": 8517500 }, { "epoch": 1.12, "learning_rate": 4.407134135666961e-05, "loss": 4.0151, "step": 8518000 }, { "epoch": 1.12, "learning_rate": 4.4070673917904036e-05, "loss": 4.0369, "step": 8518500 }, { "epoch": 1.12, "learning_rate": 4.4070006446625626e-05, "loss": 4.0036, "step": 8519000 }, { "epoch": 1.12, "learning_rate": 4.40693389428355e-05, "loss": 4.0278, "step": 8519500 }, { "epoch": 1.12, "learning_rate": 4.4068671406534806e-05, "loss": 3.995, "step": 8520000 }, { "epoch": 1.12, "learning_rate": 4.406800383772467e-05, "loss": 4.0194, "step": 8520500 }, { "epoch": 1.12, "learning_rate": 4.406733623640624e-05, "loss": 4.0171, "step": 8521000 }, { "epoch": 1.12, "learning_rate": 4.4066668602580664e-05, "loss": 4.0082, "step": 8521500 }, { "epoch": 1.12, "learning_rate": 4.406600093624905e-05, "loss": 4.0036, "step": 8522000 }, { "epoch": 1.12, "learning_rate": 4.4065333237412564e-05, "loss": 4.0183, "step": 8522500 }, { "epoch": 1.12, "learning_rate": 4.406466550607233e-05, "loss": 4.0147, "step": 8523000 }, { "epoch": 1.12, "learning_rate": 4.406399774222949e-05, "loss": 3.9993, "step": 8523500 }, { "epoch": 1.12, "learning_rate": 4.406332994588519e-05, "loss": 4.0252, "step": 8524000 }, { "epoch": 1.12, "learning_rate": 4.4062662117040556e-05, "loss": 4.0163, "step": 8524500 }, { "epoch": 1.12, "learning_rate": 4.406199425569673e-05, "loss": 3.9896, "step": 8525000 }, { "epoch": 1.12, "learning_rate": 4.406132636185486e-05, "loss": 4.0159, "step": 8525500 }, { "epoch": 1.12, "learning_rate": 4.4060658435516066e-05, "loss": 4.009, "step": 8526000 }, { "epoch": 1.12, "learning_rate": 4.4059990476681514e-05, "loss": 4.012, "step": 8526500 }, { "epoch": 1.12, "learning_rate": 4.405932248535232e-05, "loss": 4.0257, "step": 8527000 }, { "epoch": 1.12, "learning_rate": 4.4058654461529625e-05, "loss": 4.0275, "step": 8527500 }, { "epoch": 1.12, "learning_rate": 4.405798640521457e-05, "loss": 3.9924, "step": 8528000 }, { "epoch": 1.12, "learning_rate": 4.405731831640831e-05, "loss": 4.0022, "step": 8528500 }, { "epoch": 1.12, "learning_rate": 4.405665019511196e-05, "loss": 4.0066, "step": 8529000 }, { "epoch": 1.12, "learning_rate": 4.4055982041326674e-05, "loss": 3.9975, "step": 8529500 }, { "epoch": 1.12, "learning_rate": 4.4055313855053584e-05, "loss": 4.0189, "step": 8530000 }, { "epoch": 1.12, "learning_rate": 4.405464563629383e-05, "loss": 4.0239, "step": 8530500 }, { "epoch": 1.12, "learning_rate": 4.405397738504856e-05, "loss": 3.9998, "step": 8531000 }, { "epoch": 1.12, "learning_rate": 4.405330910131891e-05, "loss": 3.9863, "step": 8531500 }, { "epoch": 1.12, "learning_rate": 4.4052640785106004e-05, "loss": 3.994, "step": 8532000 }, { "epoch": 1.12, "learning_rate": 4.4051972436411006e-05, "loss": 3.9984, "step": 8532500 }, { "epoch": 1.12, "learning_rate": 4.405130405523503e-05, "loss": 3.9977, "step": 8533000 }, { "epoch": 1.12, "learning_rate": 4.405063564157923e-05, "loss": 3.9808, "step": 8533500 }, { "epoch": 1.12, "learning_rate": 4.404996719544475e-05, "loss": 4.0107, "step": 8534000 }, { "epoch": 1.12, "learning_rate": 4.404929871683272e-05, "loss": 4.0111, "step": 8534500 }, { "epoch": 1.12, "learning_rate": 4.404863020574428e-05, "loss": 3.9792, "step": 8535000 }, { "epoch": 1.12, "learning_rate": 4.404796166218058e-05, "loss": 4.019, "step": 8535500 }, { "epoch": 1.12, "learning_rate": 4.404729308614275e-05, "loss": 3.9849, "step": 8536000 }, { "epoch": 1.12, "learning_rate": 4.404662447763193e-05, "loss": 4.0189, "step": 8536500 }, { "epoch": 1.12, "learning_rate": 4.404595583664927e-05, "loss": 4.0243, "step": 8537000 }, { "epoch": 1.12, "learning_rate": 4.4045287163195894e-05, "loss": 4.0228, "step": 8537500 }, { "epoch": 1.12, "learning_rate": 4.404461845727294e-05, "loss": 4.02, "step": 8538000 }, { "epoch": 1.12, "learning_rate": 4.404394971888158e-05, "loss": 4.016, "step": 8538500 }, { "epoch": 1.12, "learning_rate": 4.404328094802292e-05, "loss": 4.0095, "step": 8539000 }, { "epoch": 1.12, "learning_rate": 4.4042612144698113e-05, "loss": 4.0281, "step": 8539500 }, { "epoch": 1.12, "learning_rate": 4.4041943308908295e-05, "loss": 4.013, "step": 8540000 }, { "epoch": 1.12, "learning_rate": 4.4041274440654624e-05, "loss": 4.0271, "step": 8540500 }, { "epoch": 1.12, "learning_rate": 4.404060553993821e-05, "loss": 3.9879, "step": 8541000 }, { "epoch": 1.12, "learning_rate": 4.403993660676022e-05, "loss": 4.0261, "step": 8541500 }, { "epoch": 1.12, "learning_rate": 4.4039267641121786e-05, "loss": 3.999, "step": 8542000 }, { "epoch": 1.12, "learning_rate": 4.403859864302403e-05, "loss": 4.0061, "step": 8542500 }, { "epoch": 1.12, "learning_rate": 4.4037929612468125e-05, "loss": 4.0143, "step": 8543000 }, { "epoch": 1.12, "learning_rate": 4.403726054945519e-05, "loss": 4.0098, "step": 8543500 }, { "epoch": 1.12, "learning_rate": 4.4036591453986376e-05, "loss": 4.0009, "step": 8544000 }, { "epoch": 1.12, "learning_rate": 4.403592232606281e-05, "loss": 4.004, "step": 8544500 }, { "epoch": 1.12, "learning_rate": 4.4035253165685655e-05, "loss": 4.0084, "step": 8545000 }, { "epoch": 1.12, "learning_rate": 4.4034583972856026e-05, "loss": 4.0116, "step": 8545500 }, { "epoch": 1.12, "learning_rate": 4.403391474757508e-05, "loss": 4.0041, "step": 8546000 }, { "epoch": 1.12, "learning_rate": 4.403324548984396e-05, "loss": 4.0242, "step": 8546500 }, { "epoch": 1.12, "learning_rate": 4.40325761996638e-05, "loss": 4.02, "step": 8547000 }, { "epoch": 1.12, "learning_rate": 4.403190687703573e-05, "loss": 4.0194, "step": 8547500 }, { "epoch": 1.12, "learning_rate": 4.403123752196092e-05, "loss": 3.997, "step": 8548000 }, { "epoch": 1.12, "learning_rate": 4.4030568134440484e-05, "loss": 4.0182, "step": 8548500 }, { "epoch": 1.12, "learning_rate": 4.402989871447558e-05, "loss": 3.9891, "step": 8549000 }, { "epoch": 1.12, "learning_rate": 4.4029229262067347e-05, "loss": 4.0183, "step": 8549500 }, { "epoch": 1.12, "learning_rate": 4.4028559777216916e-05, "loss": 4.0134, "step": 8550000 }, { "epoch": 1.12, "learning_rate": 4.4027890259925435e-05, "loss": 4.0028, "step": 8550500 }, { "epoch": 1.12, "learning_rate": 4.402722071019405e-05, "loss": 4.0026, "step": 8551000 }, { "epoch": 1.12, "learning_rate": 4.4026551128023896e-05, "loss": 4.0086, "step": 8551500 }, { "epoch": 1.12, "learning_rate": 4.4025881513416115e-05, "loss": 4.0062, "step": 8552000 }, { "epoch": 1.12, "learning_rate": 4.402521186637185e-05, "loss": 4.0103, "step": 8552500 }, { "epoch": 1.12, "learning_rate": 4.402454218689224e-05, "loss": 4.0019, "step": 8553000 }, { "epoch": 1.12, "learning_rate": 4.4023872474978435e-05, "loss": 4.0017, "step": 8553500 }, { "epoch": 1.12, "learning_rate": 4.402320273063156e-05, "loss": 4.0191, "step": 8554000 }, { "epoch": 1.12, "learning_rate": 4.402253295385278e-05, "loss": 4.0053, "step": 8554500 }, { "epoch": 1.12, "learning_rate": 4.402186314464323e-05, "loss": 4.0266, "step": 8555000 }, { "epoch": 1.12, "learning_rate": 4.402119330300403e-05, "loss": 4.0, "step": 8555500 }, { "epoch": 1.12, "learning_rate": 4.402052342893635e-05, "loss": 4.0201, "step": 8556000 }, { "epoch": 1.12, "learning_rate": 4.4019853522441316e-05, "loss": 4.0357, "step": 8556500 }, { "epoch": 1.12, "learning_rate": 4.4019183583520076e-05, "loss": 4.0167, "step": 8557000 }, { "epoch": 1.12, "learning_rate": 4.401851361217377e-05, "loss": 4.0055, "step": 8557500 }, { "epoch": 1.12, "learning_rate": 4.4017843608403554e-05, "loss": 4.0055, "step": 8558000 }, { "epoch": 1.12, "learning_rate": 4.401717357221054e-05, "loss": 3.9995, "step": 8558500 }, { "epoch": 1.12, "learning_rate": 4.40165035035959e-05, "loss": 3.9901, "step": 8559000 }, { "epoch": 1.12, "learning_rate": 4.401583340256076e-05, "loss": 4.0089, "step": 8559500 }, { "epoch": 1.13, "learning_rate": 4.401516326910626e-05, "loss": 3.9998, "step": 8560000 }, { "epoch": 1.13, "learning_rate": 4.4014493103233554e-05, "loss": 4.0007, "step": 8560500 }, { "epoch": 1.13, "learning_rate": 4.401382290494378e-05, "loss": 3.9875, "step": 8561000 }, { "epoch": 1.13, "learning_rate": 4.401315267423808e-05, "loss": 3.9991, "step": 8561500 }, { "epoch": 1.13, "learning_rate": 4.4012482411117595e-05, "loss": 3.9874, "step": 8562000 }, { "epoch": 1.13, "learning_rate": 4.401181211558347e-05, "loss": 4.0205, "step": 8562500 }, { "epoch": 1.13, "learning_rate": 4.401114178763686e-05, "loss": 4.0056, "step": 8563000 }, { "epoch": 1.13, "learning_rate": 4.401047142727888e-05, "loss": 4.03, "step": 8563500 }, { "epoch": 1.13, "learning_rate": 4.400980103451069e-05, "loss": 3.9863, "step": 8564000 }, { "epoch": 1.13, "learning_rate": 4.400913060933344e-05, "loss": 4.0275, "step": 8564500 }, { "epoch": 1.13, "learning_rate": 4.400846015174826e-05, "loss": 4.0221, "step": 8565000 }, { "epoch": 1.13, "learning_rate": 4.400778966175629e-05, "loss": 3.9991, "step": 8565500 }, { "epoch": 1.13, "learning_rate": 4.4007119139358685e-05, "loss": 4.0087, "step": 8566000 }, { "epoch": 1.13, "learning_rate": 4.4006448584556584e-05, "loss": 4.0141, "step": 8566500 }, { "epoch": 1.13, "learning_rate": 4.400577799735113e-05, "loss": 3.997, "step": 8567000 }, { "epoch": 1.13, "learning_rate": 4.4005107377743466e-05, "loss": 3.9981, "step": 8567500 }, { "epoch": 1.13, "learning_rate": 4.400443672573473e-05, "loss": 4.0111, "step": 8568000 }, { "epoch": 1.13, "learning_rate": 4.400376604132608e-05, "loss": 4.001, "step": 8568500 }, { "epoch": 1.13, "learning_rate": 4.400309532451864e-05, "loss": 3.989, "step": 8569000 }, { "epoch": 1.13, "learning_rate": 4.400242457531357e-05, "loss": 3.9895, "step": 8569500 }, { "epoch": 1.13, "learning_rate": 4.4001753793712e-05, "loss": 4.0103, "step": 8570000 }, { "epoch": 1.13, "learning_rate": 4.4001082979715094e-05, "loss": 4.009, "step": 8570500 }, { "epoch": 1.13, "learning_rate": 4.400041213332397e-05, "loss": 4.0036, "step": 8571000 }, { "epoch": 1.13, "learning_rate": 4.399974125453978e-05, "loss": 3.9899, "step": 8571500 }, { "epoch": 1.13, "learning_rate": 4.399907034336368e-05, "loss": 3.9889, "step": 8572000 }, { "epoch": 1.13, "learning_rate": 4.39983993997968e-05, "loss": 3.9849, "step": 8572500 }, { "epoch": 1.13, "learning_rate": 4.39977284238403e-05, "loss": 4.0004, "step": 8573000 }, { "epoch": 1.13, "learning_rate": 4.3997057415495304e-05, "loss": 3.9836, "step": 8573500 }, { "epoch": 1.13, "learning_rate": 4.3996386374762964e-05, "loss": 4.0012, "step": 8574000 }, { "epoch": 1.13, "learning_rate": 4.399571530164443e-05, "loss": 3.9993, "step": 8574500 }, { "epoch": 1.13, "learning_rate": 4.399504419614084e-05, "loss": 3.9948, "step": 8575000 }, { "epoch": 1.13, "learning_rate": 4.3994373058253335e-05, "loss": 3.9766, "step": 8575500 }, { "epoch": 1.13, "learning_rate": 4.3993701887983075e-05, "loss": 3.9879, "step": 8576000 }, { "epoch": 1.13, "learning_rate": 4.3993030685331185e-05, "loss": 4.0035, "step": 8576500 }, { "epoch": 1.13, "learning_rate": 4.399235945029881e-05, "loss": 4.0052, "step": 8577000 }, { "epoch": 1.13, "learning_rate": 4.3991688182887114e-05, "loss": 3.9801, "step": 8577500 }, { "epoch": 1.13, "learning_rate": 4.399101688309722e-05, "loss": 4.0018, "step": 8578000 }, { "epoch": 1.13, "learning_rate": 4.399034555093029e-05, "loss": 4.0007, "step": 8578500 }, { "epoch": 1.13, "learning_rate": 4.398967418638745e-05, "loss": 4.0079, "step": 8579000 }, { "epoch": 1.13, "learning_rate": 4.398900278946986e-05, "loss": 4.0056, "step": 8579500 }, { "epoch": 1.13, "learning_rate": 4.3988331360178655e-05, "loss": 4.007, "step": 8580000 }, { "epoch": 1.13, "learning_rate": 4.398765989851499e-05, "loss": 4.0089, "step": 8580500 }, { "epoch": 1.13, "learning_rate": 4.398698840448001e-05, "loss": 4.0092, "step": 8581000 }, { "epoch": 1.13, "learning_rate": 4.398631687807484e-05, "loss": 4.0228, "step": 8581500 }, { "epoch": 1.13, "learning_rate": 4.398564531930064e-05, "loss": 4.0063, "step": 8582000 }, { "epoch": 1.13, "learning_rate": 4.398497372815856e-05, "loss": 3.9995, "step": 8582500 }, { "epoch": 1.13, "learning_rate": 4.398430210464973e-05, "loss": 4.0028, "step": 8583000 }, { "epoch": 1.13, "learning_rate": 4.398363044877531e-05, "loss": 4.0179, "step": 8583500 }, { "epoch": 1.13, "learning_rate": 4.3982958760536444e-05, "loss": 4.0031, "step": 8584000 }, { "epoch": 1.13, "learning_rate": 4.3982287039934254e-05, "loss": 4.0167, "step": 8584500 }, { "epoch": 1.13, "learning_rate": 4.398161528696991e-05, "loss": 4.0072, "step": 8585000 }, { "epoch": 1.13, "learning_rate": 4.398094350164456e-05, "loss": 3.9958, "step": 8585500 }, { "epoch": 1.13, "learning_rate": 4.398027168395933e-05, "loss": 4.0112, "step": 8586000 }, { "epoch": 1.13, "learning_rate": 4.397959983391537e-05, "loss": 3.9953, "step": 8586500 }, { "epoch": 1.13, "learning_rate": 4.397892795151384e-05, "loss": 3.9968, "step": 8587000 }, { "epoch": 1.13, "learning_rate": 4.397825603675587e-05, "loss": 3.9935, "step": 8587500 }, { "epoch": 1.13, "learning_rate": 4.39775840896426e-05, "loss": 4.012, "step": 8588000 }, { "epoch": 1.13, "learning_rate": 4.3976912110175204e-05, "loss": 3.9766, "step": 8588500 }, { "epoch": 1.13, "learning_rate": 4.3976240098354806e-05, "loss": 3.9942, "step": 8589000 }, { "epoch": 1.13, "learning_rate": 4.397556805418255e-05, "loss": 4.0112, "step": 8589500 }, { "epoch": 1.13, "learning_rate": 4.3974895977659595e-05, "loss": 4.0154, "step": 8590000 }, { "epoch": 1.13, "learning_rate": 4.397422386878707e-05, "loss": 3.9928, "step": 8590500 }, { "epoch": 1.13, "learning_rate": 4.3973551727566134e-05, "loss": 4.0091, "step": 8591000 }, { "epoch": 1.13, "learning_rate": 4.397287955399794e-05, "loss": 4.0147, "step": 8591500 }, { "epoch": 1.13, "learning_rate": 4.397220734808361e-05, "loss": 3.9914, "step": 8592000 }, { "epoch": 1.13, "learning_rate": 4.39715351098243e-05, "loss": 4.0008, "step": 8592500 }, { "epoch": 1.13, "learning_rate": 4.397086283922117e-05, "loss": 4.0094, "step": 8593000 }, { "epoch": 1.13, "learning_rate": 4.397019053627534e-05, "loss": 4.0058, "step": 8593500 }, { "epoch": 1.13, "learning_rate": 4.396951820098799e-05, "loss": 3.9852, "step": 8594000 }, { "epoch": 1.13, "learning_rate": 4.396884583336024e-05, "loss": 3.9884, "step": 8594500 }, { "epoch": 1.13, "learning_rate": 4.396817343339325e-05, "loss": 4.0184, "step": 8595000 }, { "epoch": 1.13, "learning_rate": 4.396750100108815e-05, "loss": 3.9988, "step": 8595500 }, { "epoch": 1.13, "learning_rate": 4.3966828536446095e-05, "loss": 3.9828, "step": 8596000 }, { "epoch": 1.13, "learning_rate": 4.3966156039468244e-05, "loss": 3.9878, "step": 8596500 }, { "epoch": 1.13, "learning_rate": 4.3965483510155724e-05, "loss": 4.002, "step": 8597000 }, { "epoch": 1.13, "learning_rate": 4.3964810948509694e-05, "loss": 4.0079, "step": 8597500 }, { "epoch": 1.13, "learning_rate": 4.39641383545313e-05, "loss": 4.0075, "step": 8598000 }, { "epoch": 1.13, "learning_rate": 4.396346572822168e-05, "loss": 3.9927, "step": 8598500 }, { "epoch": 1.13, "learning_rate": 4.396279306958199e-05, "loss": 4.0077, "step": 8599000 }, { "epoch": 1.13, "learning_rate": 4.396212037861337e-05, "loss": 4.0016, "step": 8599500 }, { "epoch": 1.13, "learning_rate": 4.396144765531697e-05, "loss": 3.9992, "step": 8600000 }, { "epoch": 1.13, "learning_rate": 4.396077489969394e-05, "loss": 4.0166, "step": 8600500 }, { "epoch": 1.13, "learning_rate": 4.396010211174543e-05, "loss": 3.9859, "step": 8601000 }, { "epoch": 1.13, "learning_rate": 4.3959429291472566e-05, "loss": 4.0006, "step": 8601500 }, { "epoch": 1.13, "learning_rate": 4.395875643887651e-05, "loss": 4.0089, "step": 8602000 }, { "epoch": 1.13, "learning_rate": 4.3958083553958425e-05, "loss": 4.033, "step": 8602500 }, { "epoch": 1.13, "learning_rate": 4.395741063671943e-05, "loss": 3.9774, "step": 8603000 }, { "epoch": 1.13, "learning_rate": 4.395673768716069e-05, "loss": 3.9762, "step": 8603500 }, { "epoch": 1.13, "learning_rate": 4.395606470528334e-05, "loss": 4.0021, "step": 8604000 }, { "epoch": 1.13, "learning_rate": 4.395539169108854e-05, "loss": 4.0158, "step": 8604500 }, { "epoch": 1.13, "learning_rate": 4.395471864457743e-05, "loss": 4.0107, "step": 8605000 }, { "epoch": 1.13, "learning_rate": 4.395404556575116e-05, "loss": 3.992, "step": 8605500 }, { "epoch": 1.13, "learning_rate": 4.395337245461087e-05, "loss": 4.0294, "step": 8606000 }, { "epoch": 1.13, "learning_rate": 4.3952699311157717e-05, "loss": 3.998, "step": 8606500 }, { "epoch": 1.13, "learning_rate": 4.395202613539285e-05, "loss": 3.9763, "step": 8607000 }, { "epoch": 1.13, "learning_rate": 4.3951352927317416e-05, "loss": 3.9959, "step": 8607500 }, { "epoch": 1.13, "learning_rate": 4.395067968693255e-05, "loss": 4.0014, "step": 8608000 }, { "epoch": 1.13, "learning_rate": 4.395000641423941e-05, "loss": 4.0091, "step": 8608500 }, { "epoch": 1.13, "learning_rate": 4.3949333109239135e-05, "loss": 3.9993, "step": 8609000 }, { "epoch": 1.13, "learning_rate": 4.3948659771932895e-05, "loss": 4.0137, "step": 8609500 }, { "epoch": 1.13, "learning_rate": 4.3947986402321814e-05, "loss": 4.0021, "step": 8610000 }, { "epoch": 1.13, "learning_rate": 4.394731300040705e-05, "loss": 4.009, "step": 8610500 }, { "epoch": 1.13, "learning_rate": 4.394663956618976e-05, "loss": 3.9956, "step": 8611000 }, { "epoch": 1.13, "learning_rate": 4.394596609967107e-05, "loss": 4.0309, "step": 8611500 }, { "epoch": 1.13, "learning_rate": 4.394529260085215e-05, "loss": 3.9829, "step": 8612000 }, { "epoch": 1.13, "learning_rate": 4.3944619069734133e-05, "loss": 3.9858, "step": 8612500 }, { "epoch": 1.13, "learning_rate": 4.3943945506318174e-05, "loss": 4.002, "step": 8613000 }, { "epoch": 1.13, "learning_rate": 4.3943271910605423e-05, "loss": 4.0056, "step": 8613500 }, { "epoch": 1.13, "learning_rate": 4.394259828259703e-05, "loss": 4.0019, "step": 8614000 }, { "epoch": 1.13, "learning_rate": 4.3941924622294126e-05, "loss": 4.004, "step": 8614500 }, { "epoch": 1.13, "learning_rate": 4.394125092969788e-05, "loss": 3.9863, "step": 8615000 }, { "epoch": 1.13, "learning_rate": 4.3940577204809444e-05, "loss": 4.0039, "step": 8615500 }, { "epoch": 1.13, "learning_rate": 4.393990344762994e-05, "loss": 4.0008, "step": 8616000 }, { "epoch": 1.13, "learning_rate": 4.393922965816054e-05, "loss": 3.992, "step": 8616500 }, { "epoch": 1.13, "learning_rate": 4.393855583640238e-05, "loss": 4.0031, "step": 8617000 }, { "epoch": 1.13, "learning_rate": 4.3937881982356625e-05, "loss": 4.0098, "step": 8617500 }, { "epoch": 1.13, "learning_rate": 4.3937208096024404e-05, "loss": 3.9969, "step": 8618000 }, { "epoch": 1.13, "learning_rate": 4.3936534177406885e-05, "loss": 4.0082, "step": 8618500 }, { "epoch": 1.13, "learning_rate": 4.39358602265052e-05, "loss": 4.0128, "step": 8619000 }, { "epoch": 1.13, "learning_rate": 4.39351862433205e-05, "loss": 4.0012, "step": 8619500 }, { "epoch": 1.13, "learning_rate": 4.393451222785394e-05, "loss": 4.0099, "step": 8620000 }, { "epoch": 1.13, "learning_rate": 4.3933838180106676e-05, "loss": 3.9977, "step": 8620500 }, { "epoch": 1.13, "learning_rate": 4.3933164100079846e-05, "loss": 3.9953, "step": 8621000 }, { "epoch": 1.13, "learning_rate": 4.3932489987774597e-05, "loss": 4.0218, "step": 8621500 }, { "epoch": 1.13, "learning_rate": 4.393181584319209e-05, "loss": 4.0049, "step": 8622000 }, { "epoch": 1.13, "learning_rate": 4.3931141666333466e-05, "loss": 4.0025, "step": 8622500 }, { "epoch": 1.13, "learning_rate": 4.3930467457199876e-05, "loss": 4.0194, "step": 8623000 }, { "epoch": 1.13, "learning_rate": 4.392979321579247e-05, "loss": 3.9936, "step": 8623500 }, { "epoch": 1.13, "learning_rate": 4.392911894211239e-05, "loss": 3.9989, "step": 8624000 }, { "epoch": 1.13, "learning_rate": 4.392844463616081e-05, "loss": 4.0103, "step": 8624500 }, { "epoch": 1.13, "learning_rate": 4.3927770297938846e-05, "loss": 3.9964, "step": 8625000 }, { "epoch": 1.13, "learning_rate": 4.392709592744767e-05, "loss": 3.9844, "step": 8625500 }, { "epoch": 1.13, "learning_rate": 4.3926421524688424e-05, "loss": 4.0056, "step": 8626000 }, { "epoch": 1.13, "learning_rate": 4.392574708966226e-05, "loss": 4.0079, "step": 8626500 }, { "epoch": 1.13, "learning_rate": 4.3925072622370334e-05, "loss": 4.0054, "step": 8627000 }, { "epoch": 1.13, "learning_rate": 4.3924398122813784e-05, "loss": 3.9993, "step": 8627500 }, { "epoch": 1.13, "learning_rate": 4.3923723590993756e-05, "loss": 4.0003, "step": 8628000 }, { "epoch": 1.13, "learning_rate": 4.392304902691142e-05, "loss": 3.9971, "step": 8628500 }, { "epoch": 1.13, "learning_rate": 4.3922374430567915e-05, "loss": 3.998, "step": 8629000 }, { "epoch": 1.13, "learning_rate": 4.392169980196439e-05, "loss": 3.9885, "step": 8629500 }, { "epoch": 1.13, "learning_rate": 4.3921025141102e-05, "loss": 3.9953, "step": 8630000 }, { "epoch": 1.13, "learning_rate": 4.3920350447981886e-05, "loss": 3.9826, "step": 8630500 }, { "epoch": 1.13, "learning_rate": 4.39196757226052e-05, "loss": 3.9714, "step": 8631000 }, { "epoch": 1.13, "learning_rate": 4.39190009649731e-05, "loss": 3.9948, "step": 8631500 }, { "epoch": 1.13, "learning_rate": 4.391832617508674e-05, "loss": 3.997, "step": 8632000 }, { "epoch": 1.13, "learning_rate": 4.391765135294725e-05, "loss": 3.9932, "step": 8632500 }, { "epoch": 1.13, "learning_rate": 4.391697649855581e-05, "loss": 4.0104, "step": 8633000 }, { "epoch": 1.13, "learning_rate": 4.3916301611913536e-05, "loss": 4.0044, "step": 8633500 }, { "epoch": 1.13, "learning_rate": 4.39156266930216e-05, "loss": 4.026, "step": 8634000 }, { "epoch": 1.13, "learning_rate": 4.391495174188116e-05, "loss": 3.9984, "step": 8634500 }, { "epoch": 1.13, "learning_rate": 4.391427675849335e-05, "loss": 4.0089, "step": 8635000 }, { "epoch": 1.13, "learning_rate": 4.391360174285933e-05, "loss": 4.0041, "step": 8635500 }, { "epoch": 1.13, "learning_rate": 4.3912926694980235e-05, "loss": 3.9979, "step": 8636000 }, { "epoch": 1.14, "learning_rate": 4.391225161485724e-05, "loss": 4.0057, "step": 8636500 }, { "epoch": 1.14, "learning_rate": 4.3911576502491484e-05, "loss": 4.0023, "step": 8637000 }, { "epoch": 1.14, "learning_rate": 4.3910901357884115e-05, "loss": 4.0285, "step": 8637500 }, { "epoch": 1.14, "learning_rate": 4.391022618103629e-05, "loss": 3.9979, "step": 8638000 }, { "epoch": 1.14, "learning_rate": 4.390955097194915e-05, "loss": 4.0063, "step": 8638500 }, { "epoch": 1.14, "learning_rate": 4.390887573062387e-05, "loss": 3.9853, "step": 8639000 }, { "epoch": 1.14, "learning_rate": 4.390820045706157e-05, "loss": 3.9849, "step": 8639500 }, { "epoch": 1.14, "learning_rate": 4.3907525151263416e-05, "loss": 4.0054, "step": 8640000 }, { "epoch": 1.14, "learning_rate": 4.3906849813230565e-05, "loss": 4.002, "step": 8640500 }, { "epoch": 1.14, "learning_rate": 4.390617444296415e-05, "loss": 4.0143, "step": 8641000 }, { "epoch": 1.14, "learning_rate": 4.3905499040465345e-05, "loss": 3.9962, "step": 8641500 }, { "epoch": 1.14, "learning_rate": 4.39048236057353e-05, "loss": 4.0019, "step": 8642000 }, { "epoch": 1.14, "learning_rate": 4.390414813877515e-05, "loss": 3.996, "step": 8642500 }, { "epoch": 1.14, "learning_rate": 4.3903472639586055e-05, "loss": 3.9979, "step": 8643000 }, { "epoch": 1.14, "learning_rate": 4.390279710816916e-05, "loss": 4.0219, "step": 8643500 }, { "epoch": 1.14, "learning_rate": 4.390212154452563e-05, "loss": 4.0124, "step": 8644000 }, { "epoch": 1.14, "learning_rate": 4.390144594865661e-05, "loss": 4.0183, "step": 8644500 }, { "epoch": 1.14, "learning_rate": 4.390077032056325e-05, "loss": 4.0135, "step": 8645000 }, { "epoch": 1.14, "learning_rate": 4.390009466024669e-05, "loss": 3.9872, "step": 8645500 }, { "epoch": 1.14, "learning_rate": 4.3899418967708116e-05, "loss": 4.0227, "step": 8646000 }, { "epoch": 1.14, "learning_rate": 4.389874324294865e-05, "loss": 3.9978, "step": 8646500 }, { "epoch": 1.14, "learning_rate": 4.3898067485969455e-05, "loss": 4.0126, "step": 8647000 }, { "epoch": 1.14, "learning_rate": 4.389739169677168e-05, "loss": 4.0117, "step": 8647500 }, { "epoch": 1.14, "learning_rate": 4.389671587535647e-05, "loss": 3.9826, "step": 8648000 }, { "epoch": 1.14, "learning_rate": 4.3896040021724996e-05, "loss": 3.9957, "step": 8648500 }, { "epoch": 1.14, "learning_rate": 4.38953641358784e-05, "loss": 4.0002, "step": 8649000 }, { "epoch": 1.14, "learning_rate": 4.389468821781783e-05, "loss": 4.0138, "step": 8649500 }, { "epoch": 1.14, "learning_rate": 4.3894012267544446e-05, "loss": 3.9924, "step": 8650000 }, { "epoch": 1.14, "learning_rate": 4.389333628505939e-05, "loss": 3.9955, "step": 8650500 }, { "epoch": 1.14, "learning_rate": 4.389266027036383e-05, "loss": 3.9913, "step": 8651000 }, { "epoch": 1.14, "learning_rate": 4.38919842234589e-05, "loss": 4.011, "step": 8651500 }, { "epoch": 1.14, "learning_rate": 4.3891308144345764e-05, "loss": 3.9752, "step": 8652000 }, { "epoch": 1.14, "learning_rate": 4.389063203302558e-05, "loss": 4.0098, "step": 8652500 }, { "epoch": 1.14, "learning_rate": 4.38899558894995e-05, "loss": 4.0046, "step": 8653000 }, { "epoch": 1.14, "learning_rate": 4.388927971376865e-05, "loss": 3.9915, "step": 8653500 }, { "epoch": 1.14, "learning_rate": 4.388860350583422e-05, "loss": 4.0083, "step": 8654000 }, { "epoch": 1.14, "learning_rate": 4.388792726569734e-05, "loss": 3.9929, "step": 8654500 }, { "epoch": 1.14, "learning_rate": 4.388725099335916e-05, "loss": 3.996, "step": 8655000 }, { "epoch": 1.14, "learning_rate": 4.388657468882086e-05, "loss": 4.0113, "step": 8655500 }, { "epoch": 1.14, "learning_rate": 4.388589835208356e-05, "loss": 4.0118, "step": 8656000 }, { "epoch": 1.14, "learning_rate": 4.3885221983148433e-05, "loss": 4.0112, "step": 8656500 }, { "epoch": 1.14, "learning_rate": 4.3884545582016626e-05, "loss": 3.9962, "step": 8657000 }, { "epoch": 1.14, "learning_rate": 4.388386914868929e-05, "loss": 3.9931, "step": 8657500 }, { "epoch": 1.14, "learning_rate": 4.38831926831676e-05, "loss": 3.9895, "step": 8658000 }, { "epoch": 1.14, "learning_rate": 4.388251618545267e-05, "loss": 4.0014, "step": 8658500 }, { "epoch": 1.14, "learning_rate": 4.388183965554568e-05, "loss": 4.0117, "step": 8659000 }, { "epoch": 1.14, "learning_rate": 4.388116309344777e-05, "loss": 3.993, "step": 8659500 }, { "epoch": 1.14, "learning_rate": 4.388048649916011e-05, "loss": 3.9994, "step": 8660000 }, { "epoch": 1.14, "learning_rate": 4.3879809872683854e-05, "loss": 4.0047, "step": 8660500 }, { "epoch": 1.14, "learning_rate": 4.387913321402013e-05, "loss": 3.9992, "step": 8661000 }, { "epoch": 1.14, "learning_rate": 4.3878456523170106e-05, "loss": 4.0141, "step": 8661500 }, { "epoch": 1.14, "learning_rate": 4.3877779800134947e-05, "loss": 3.9981, "step": 8662000 }, { "epoch": 1.14, "learning_rate": 4.3877103044915794e-05, "loss": 4.0054, "step": 8662500 }, { "epoch": 1.14, "learning_rate": 4.38764262575138e-05, "loss": 4.0113, "step": 8663000 }, { "epoch": 1.14, "learning_rate": 4.387574943793013e-05, "loss": 4.0168, "step": 8663500 }, { "epoch": 1.14, "learning_rate": 4.387507258616593e-05, "loss": 4.014, "step": 8664000 }, { "epoch": 1.14, "learning_rate": 4.3874395702222346e-05, "loss": 3.981, "step": 8664500 }, { "epoch": 1.14, "learning_rate": 4.387371878610055e-05, "loss": 4.0021, "step": 8665000 }, { "epoch": 1.14, "learning_rate": 4.3873041837801674e-05, "loss": 4.0042, "step": 8665500 }, { "epoch": 1.14, "learning_rate": 4.387236485732689e-05, "loss": 4.0108, "step": 8666000 }, { "epoch": 1.14, "learning_rate": 4.387168784467735e-05, "loss": 4.0102, "step": 8666500 }, { "epoch": 1.14, "learning_rate": 4.387101079985421e-05, "loss": 4.0102, "step": 8667000 }, { "epoch": 1.14, "learning_rate": 4.3870333722858604e-05, "loss": 4.0084, "step": 8667500 }, { "epoch": 1.14, "learning_rate": 4.386965661369171e-05, "loss": 3.9905, "step": 8668000 }, { "epoch": 1.14, "learning_rate": 4.386897947235468e-05, "loss": 4.0071, "step": 8668500 }, { "epoch": 1.14, "learning_rate": 4.3868302298848654e-05, "loss": 4.0106, "step": 8669000 }, { "epoch": 1.14, "learning_rate": 4.386762509317479e-05, "loss": 4.0155, "step": 8669500 }, { "epoch": 1.14, "learning_rate": 4.386694785533425e-05, "loss": 3.9992, "step": 8670000 }, { "epoch": 1.14, "learning_rate": 4.3866270585328195e-05, "loss": 4.0181, "step": 8670500 }, { "epoch": 1.14, "learning_rate": 4.386559328315776e-05, "loss": 4.0034, "step": 8671000 }, { "epoch": 1.14, "learning_rate": 4.386491594882412e-05, "loss": 4.0024, "step": 8671500 }, { "epoch": 1.14, "learning_rate": 4.386423858232841e-05, "loss": 4.0228, "step": 8672000 }, { "epoch": 1.14, "learning_rate": 4.3863561183671794e-05, "loss": 3.9992, "step": 8672500 }, { "epoch": 1.14, "learning_rate": 4.3862883752855436e-05, "loss": 3.9982, "step": 8673000 }, { "epoch": 1.14, "learning_rate": 4.386220628988048e-05, "loss": 4.0034, "step": 8673500 }, { "epoch": 1.14, "learning_rate": 4.386152879474809e-05, "loss": 4.0067, "step": 8674000 }, { "epoch": 1.14, "learning_rate": 4.38608512674594e-05, "loss": 3.9911, "step": 8674500 }, { "epoch": 1.14, "learning_rate": 4.386017370801559e-05, "loss": 4.0084, "step": 8675000 }, { "epoch": 1.14, "learning_rate": 4.3859496116417806e-05, "loss": 3.9945, "step": 8675500 }, { "epoch": 1.14, "learning_rate": 4.38588184926672e-05, "loss": 3.9931, "step": 8676000 }, { "epoch": 1.14, "learning_rate": 4.3858140836764925e-05, "loss": 3.9839, "step": 8676500 }, { "epoch": 1.14, "learning_rate": 4.385746314871214e-05, "loss": 3.9969, "step": 8677000 }, { "epoch": 1.14, "learning_rate": 4.3856785428510006e-05, "loss": 3.9864, "step": 8677500 }, { "epoch": 1.14, "learning_rate": 4.385610767615968e-05, "loss": 4.0064, "step": 8678000 }, { "epoch": 1.14, "learning_rate": 4.385542989166229e-05, "loss": 3.9971, "step": 8678500 }, { "epoch": 1.14, "learning_rate": 4.3854752075019026e-05, "loss": 4.0061, "step": 8679000 }, { "epoch": 1.14, "learning_rate": 4.3854074226231034e-05, "loss": 3.9941, "step": 8679500 }, { "epoch": 1.14, "learning_rate": 4.385339634529946e-05, "loss": 3.9904, "step": 8680000 }, { "epoch": 1.14, "learning_rate": 4.385271843222547e-05, "loss": 4.0035, "step": 8680500 }, { "epoch": 1.14, "learning_rate": 4.38520404870102e-05, "loss": 3.9906, "step": 8681000 }, { "epoch": 1.14, "learning_rate": 4.385136250965484e-05, "loss": 3.9901, "step": 8681500 }, { "epoch": 1.14, "learning_rate": 4.3850684500160514e-05, "loss": 3.9993, "step": 8682000 }, { "epoch": 1.14, "learning_rate": 4.385000645852839e-05, "loss": 4.0064, "step": 8682500 }, { "epoch": 1.14, "learning_rate": 4.384932838475964e-05, "loss": 4.0008, "step": 8683000 }, { "epoch": 1.14, "learning_rate": 4.3848650278855385e-05, "loss": 3.9982, "step": 8683500 }, { "epoch": 1.14, "learning_rate": 4.384797214081681e-05, "loss": 4.0289, "step": 8684000 }, { "epoch": 1.14, "learning_rate": 4.384729397064506e-05, "loss": 4.0068, "step": 8684500 }, { "epoch": 1.14, "learning_rate": 4.38466157683413e-05, "loss": 4.0085, "step": 8685000 }, { "epoch": 1.14, "learning_rate": 4.3845937533906674e-05, "loss": 3.9898, "step": 8685500 }, { "epoch": 1.14, "learning_rate": 4.384525926734234e-05, "loss": 4.0111, "step": 8686000 }, { "epoch": 1.14, "learning_rate": 4.3844580968649466e-05, "loss": 3.9869, "step": 8686500 }, { "epoch": 1.14, "learning_rate": 4.384390263782919e-05, "loss": 3.9909, "step": 8687000 }, { "epoch": 1.14, "learning_rate": 4.384322427488269e-05, "loss": 4.003, "step": 8687500 }, { "epoch": 1.14, "learning_rate": 4.3842545879811105e-05, "loss": 4.0025, "step": 8688000 }, { "epoch": 1.14, "learning_rate": 4.3841867452615593e-05, "loss": 4.0129, "step": 8688500 }, { "epoch": 1.14, "learning_rate": 4.384118899329732e-05, "loss": 3.9924, "step": 8689000 }, { "epoch": 1.14, "learning_rate": 4.384051050185744e-05, "loss": 4.0122, "step": 8689500 }, { "epoch": 1.14, "learning_rate": 4.3839831978297106e-05, "loss": 4.0201, "step": 8690000 }, { "epoch": 1.14, "learning_rate": 4.383915342261748e-05, "loss": 3.9802, "step": 8690500 }, { "epoch": 1.14, "learning_rate": 4.383847483481971e-05, "loss": 3.9924, "step": 8691000 }, { "epoch": 1.14, "learning_rate": 4.3837796214904955e-05, "loss": 3.9762, "step": 8691500 }, { "epoch": 1.14, "learning_rate": 4.383711756287439e-05, "loss": 3.9975, "step": 8692000 }, { "epoch": 1.14, "learning_rate": 4.383643887872915e-05, "loss": 3.9949, "step": 8692500 }, { "epoch": 1.14, "learning_rate": 4.3835760162470394e-05, "loss": 3.9917, "step": 8693000 }, { "epoch": 1.14, "learning_rate": 4.383508141409929e-05, "loss": 3.9999, "step": 8693500 }, { "epoch": 1.14, "learning_rate": 4.3834402633616986e-05, "loss": 4.0152, "step": 8694000 }, { "epoch": 1.14, "learning_rate": 4.383372382102465e-05, "loss": 4.0033, "step": 8694500 }, { "epoch": 1.14, "learning_rate": 4.3833044976323425e-05, "loss": 4.0153, "step": 8695000 }, { "epoch": 1.14, "learning_rate": 4.3832366099514477e-05, "loss": 4.0003, "step": 8695500 }, { "epoch": 1.14, "learning_rate": 4.383168719059896e-05, "loss": 3.9785, "step": 8696000 }, { "epoch": 1.14, "learning_rate": 4.3831008249578044e-05, "loss": 4.0195, "step": 8696500 }, { "epoch": 1.14, "learning_rate": 4.3830329276452864e-05, "loss": 4.0129, "step": 8697000 }, { "epoch": 1.14, "learning_rate": 4.382965027122459e-05, "loss": 3.9948, "step": 8697500 }, { "epoch": 1.14, "learning_rate": 4.382897123389439e-05, "loss": 3.9903, "step": 8698000 }, { "epoch": 1.14, "learning_rate": 4.38282921644634e-05, "loss": 3.9911, "step": 8698500 }, { "epoch": 1.14, "learning_rate": 4.3827613062932795e-05, "loss": 3.9906, "step": 8699000 }, { "epoch": 1.14, "learning_rate": 4.382693392930372e-05, "loss": 3.9865, "step": 8699500 }, { "epoch": 1.14, "learning_rate": 4.3826254763577344e-05, "loss": 3.991, "step": 8700000 }, { "epoch": 1.14, "learning_rate": 4.3825575565754814e-05, "loss": 4.0136, "step": 8700500 }, { "epoch": 1.14, "learning_rate": 4.38248963358373e-05, "loss": 3.9931, "step": 8701000 }, { "epoch": 1.14, "learning_rate": 4.3824217073825955e-05, "loss": 3.988, "step": 8701500 }, { "epoch": 1.14, "learning_rate": 4.382353777972193e-05, "loss": 4.0006, "step": 8702000 }, { "epoch": 1.14, "learning_rate": 4.382285845352639e-05, "loss": 3.9845, "step": 8702500 }, { "epoch": 1.14, "learning_rate": 4.38221790952405e-05, "loss": 4.0057, "step": 8703000 }, { "epoch": 1.14, "learning_rate": 4.38214997048654e-05, "loss": 4.008, "step": 8703500 }, { "epoch": 1.14, "learning_rate": 4.382082028240226e-05, "loss": 3.9853, "step": 8704000 }, { "epoch": 1.14, "learning_rate": 4.382014082785224e-05, "loss": 3.9974, "step": 8704500 }, { "epoch": 1.14, "learning_rate": 4.381946134121649e-05, "loss": 3.9958, "step": 8705000 }, { "epoch": 1.14, "learning_rate": 4.381878182249619e-05, "loss": 3.9973, "step": 8705500 }, { "epoch": 1.14, "learning_rate": 4.3818102271692464e-05, "loss": 3.99, "step": 8706000 }, { "epoch": 1.14, "learning_rate": 4.38174226888065e-05, "loss": 4.0017, "step": 8706500 }, { "epoch": 1.14, "learning_rate": 4.381674307383944e-05, "loss": 3.9949, "step": 8707000 }, { "epoch": 1.14, "learning_rate": 4.3816063426792446e-05, "loss": 3.9778, "step": 8707500 }, { "epoch": 1.14, "learning_rate": 4.381538374766668e-05, "loss": 4.0064, "step": 8708000 }, { "epoch": 1.14, "learning_rate": 4.381470403646331e-05, "loss": 3.9965, "step": 8708500 }, { "epoch": 1.14, "learning_rate": 4.381402429318347e-05, "loss": 3.9998, "step": 8709000 }, { "epoch": 1.14, "learning_rate": 4.381334451782834e-05, "loss": 3.9917, "step": 8709500 }, { "epoch": 1.14, "learning_rate": 4.381266471039906e-05, "loss": 3.9848, "step": 8710000 }, { "epoch": 1.14, "learning_rate": 4.381198487089682e-05, "loss": 3.998, "step": 8710500 }, { "epoch": 1.14, "learning_rate": 4.381130499932276e-05, "loss": 3.9974, "step": 8711000 }, { "epoch": 1.14, "learning_rate": 4.381062509567803e-05, "loss": 4.0166, "step": 8711500 }, { "epoch": 1.14, "learning_rate": 4.38099451599638e-05, "loss": 3.9703, "step": 8712000 }, { "epoch": 1.15, "learning_rate": 4.3809265192181223e-05, "loss": 4.0003, "step": 8712500 }, { "epoch": 1.15, "learning_rate": 4.380858519233147e-05, "loss": 3.9847, "step": 8713000 }, { "epoch": 1.15, "learning_rate": 4.3807905160415685e-05, "loss": 3.9888, "step": 8713500 }, { "epoch": 1.15, "learning_rate": 4.3807225096435045e-05, "loss": 4.0163, "step": 8714000 }, { "epoch": 1.15, "learning_rate": 4.380654500039069e-05, "loss": 3.9894, "step": 8714500 }, { "epoch": 1.15, "learning_rate": 4.3805864872283794e-05, "loss": 3.9884, "step": 8715000 }, { "epoch": 1.15, "learning_rate": 4.380518471211551e-05, "loss": 3.9938, "step": 8715500 }, { "epoch": 1.15, "learning_rate": 4.3804504519887005e-05, "loss": 4.0024, "step": 8716000 }, { "epoch": 1.15, "learning_rate": 4.380382429559943e-05, "loss": 3.9936, "step": 8716500 }, { "epoch": 1.15, "learning_rate": 4.3803144039253954e-05, "loss": 3.9955, "step": 8717000 }, { "epoch": 1.15, "learning_rate": 4.380246375085172e-05, "loss": 4.0132, "step": 8717500 }, { "epoch": 1.15, "learning_rate": 4.38017834303939e-05, "loss": 4.0156, "step": 8718000 }, { "epoch": 1.15, "learning_rate": 4.3801103077881655e-05, "loss": 3.9882, "step": 8718500 }, { "epoch": 1.15, "learning_rate": 4.380042269331614e-05, "loss": 3.9888, "step": 8719000 }, { "epoch": 1.15, "learning_rate": 4.379974227669852e-05, "loss": 4.038, "step": 8719500 }, { "epoch": 1.15, "learning_rate": 4.379906182802995e-05, "loss": 4.0215, "step": 8720000 }, { "epoch": 1.15, "learning_rate": 4.379838134731159e-05, "loss": 3.9897, "step": 8720500 }, { "epoch": 1.15, "learning_rate": 4.379770083454461e-05, "loss": 4.0055, "step": 8721000 }, { "epoch": 1.15, "learning_rate": 4.3797020289730156e-05, "loss": 4.0059, "step": 8721500 }, { "epoch": 1.15, "learning_rate": 4.379633971286939e-05, "loss": 4.0049, "step": 8722000 }, { "epoch": 1.15, "learning_rate": 4.379565910396349e-05, "loss": 3.9992, "step": 8722500 }, { "epoch": 1.15, "learning_rate": 4.379497846301359e-05, "loss": 3.9903, "step": 8723000 }, { "epoch": 1.15, "learning_rate": 4.3794297790020874e-05, "loss": 3.9896, "step": 8723500 }, { "epoch": 1.15, "learning_rate": 4.379361708498649e-05, "loss": 3.9853, "step": 8724000 }, { "epoch": 1.15, "learning_rate": 4.37929363479116e-05, "loss": 3.9874, "step": 8724500 }, { "epoch": 1.15, "learning_rate": 4.379225557879736e-05, "loss": 3.9751, "step": 8725000 }, { "epoch": 1.15, "learning_rate": 4.379157477764494e-05, "loss": 4.0004, "step": 8725500 }, { "epoch": 1.15, "learning_rate": 4.3790893944455494e-05, "loss": 3.9891, "step": 8726000 }, { "epoch": 1.15, "learning_rate": 4.379021307923019e-05, "loss": 3.9959, "step": 8726500 }, { "epoch": 1.15, "learning_rate": 4.378953218197018e-05, "loss": 3.9955, "step": 8727000 }, { "epoch": 1.15, "learning_rate": 4.378885125267663e-05, "loss": 3.9773, "step": 8727500 }, { "epoch": 1.15, "learning_rate": 4.37881702913507e-05, "loss": 3.9943, "step": 8728000 }, { "epoch": 1.15, "learning_rate": 4.378748929799355e-05, "loss": 3.9817, "step": 8728500 }, { "epoch": 1.15, "learning_rate": 4.378680827260634e-05, "loss": 3.9905, "step": 8729000 }, { "epoch": 1.15, "learning_rate": 4.3786127215190236e-05, "loss": 3.9804, "step": 8729500 }, { "epoch": 1.15, "learning_rate": 4.378544612574639e-05, "loss": 3.9879, "step": 8730000 }, { "epoch": 1.15, "learning_rate": 4.378476500427598e-05, "loss": 3.9879, "step": 8730500 }, { "epoch": 1.15, "learning_rate": 4.3784083850780146e-05, "loss": 3.9909, "step": 8731000 }, { "epoch": 1.15, "learning_rate": 4.378340266526007e-05, "loss": 3.9889, "step": 8731500 }, { "epoch": 1.15, "learning_rate": 4.378272144771689e-05, "loss": 3.9936, "step": 8732000 }, { "epoch": 1.15, "learning_rate": 4.378204019815179e-05, "loss": 3.9981, "step": 8732500 }, { "epoch": 1.15, "learning_rate": 4.3781358916565915e-05, "loss": 4.0112, "step": 8733000 }, { "epoch": 1.15, "learning_rate": 4.378067760296043e-05, "loss": 4.0074, "step": 8733500 }, { "epoch": 1.15, "learning_rate": 4.377999625733651e-05, "loss": 3.9862, "step": 8734000 }, { "epoch": 1.15, "learning_rate": 4.37793148796953e-05, "loss": 4.0057, "step": 8734500 }, { "epoch": 1.15, "learning_rate": 4.377863347003797e-05, "loss": 4.003, "step": 8735000 }, { "epoch": 1.15, "learning_rate": 4.377795202836568e-05, "loss": 3.9918, "step": 8735500 }, { "epoch": 1.15, "learning_rate": 4.377727055467958e-05, "loss": 4.0089, "step": 8736000 }, { "epoch": 1.15, "learning_rate": 4.3776589048980866e-05, "loss": 3.9968, "step": 8736500 }, { "epoch": 1.15, "learning_rate": 4.377590751127067e-05, "loss": 3.9875, "step": 8737000 }, { "epoch": 1.15, "learning_rate": 4.3775225941550144e-05, "loss": 3.9736, "step": 8737500 }, { "epoch": 1.15, "learning_rate": 4.377454433982048e-05, "loss": 4.0025, "step": 8738000 }, { "epoch": 1.15, "learning_rate": 4.377386270608282e-05, "loss": 4.0016, "step": 8738500 }, { "epoch": 1.15, "learning_rate": 4.377318104033834e-05, "loss": 3.9998, "step": 8739000 }, { "epoch": 1.15, "learning_rate": 4.37724993425882e-05, "loss": 3.9941, "step": 8739500 }, { "epoch": 1.15, "learning_rate": 4.377181761283355e-05, "loss": 3.9705, "step": 8740000 }, { "epoch": 1.15, "learning_rate": 4.3771135851075565e-05, "loss": 3.9685, "step": 8740500 }, { "epoch": 1.15, "learning_rate": 4.377045405731539e-05, "loss": 3.9845, "step": 8741000 }, { "epoch": 1.15, "learning_rate": 4.376977223155421e-05, "loss": 4.0025, "step": 8741500 }, { "epoch": 1.15, "learning_rate": 4.376909037379317e-05, "loss": 4.0082, "step": 8742000 }, { "epoch": 1.15, "learning_rate": 4.3768408484033455e-05, "loss": 3.9765, "step": 8742500 }, { "epoch": 1.15, "learning_rate": 4.3767726562276194e-05, "loss": 3.9811, "step": 8743000 }, { "epoch": 1.15, "learning_rate": 4.376704460852258e-05, "loss": 3.9953, "step": 8743500 }, { "epoch": 1.15, "learning_rate": 4.376636262277375e-05, "loss": 4.0027, "step": 8744000 }, { "epoch": 1.15, "learning_rate": 4.376568060503089e-05, "loss": 4.0136, "step": 8744500 }, { "epoch": 1.15, "learning_rate": 4.376499855529514e-05, "loss": 3.9806, "step": 8745000 }, { "epoch": 1.15, "learning_rate": 4.376431647356769e-05, "loss": 3.9951, "step": 8745500 }, { "epoch": 1.15, "learning_rate": 4.3763634359849685e-05, "loss": 4.0033, "step": 8746000 }, { "epoch": 1.15, "learning_rate": 4.3762952214142285e-05, "loss": 3.9999, "step": 8746500 }, { "epoch": 1.15, "learning_rate": 4.376227003644666e-05, "loss": 3.9814, "step": 8747000 }, { "epoch": 1.15, "learning_rate": 4.376158782676397e-05, "loss": 3.979, "step": 8747500 }, { "epoch": 1.15, "learning_rate": 4.376090558509539e-05, "loss": 3.993, "step": 8748000 }, { "epoch": 1.15, "learning_rate": 4.376022331144206e-05, "loss": 3.9829, "step": 8748500 }, { "epoch": 1.15, "learning_rate": 4.375954100580517e-05, "loss": 3.9786, "step": 8749000 }, { "epoch": 1.15, "learning_rate": 4.375885866818587e-05, "loss": 3.9976, "step": 8749500 }, { "epoch": 1.15, "learning_rate": 4.3758176298585306e-05, "loss": 4.0138, "step": 8750000 }, { "epoch": 1.15, "learning_rate": 4.375749389700467e-05, "loss": 3.9731, "step": 8750500 }, { "epoch": 1.15, "learning_rate": 4.3756811463445116e-05, "loss": 3.9841, "step": 8751000 }, { "epoch": 1.15, "learning_rate": 4.37561289979078e-05, "loss": 3.9777, "step": 8751500 }, { "epoch": 1.15, "learning_rate": 4.3755446500393886e-05, "loss": 4.0008, "step": 8752000 }, { "epoch": 1.15, "learning_rate": 4.375476397090455e-05, "loss": 3.9818, "step": 8752500 }, { "epoch": 1.15, "learning_rate": 4.3754081409440944e-05, "loss": 3.995, "step": 8753000 }, { "epoch": 1.15, "learning_rate": 4.3753398816004234e-05, "loss": 3.9793, "step": 8753500 }, { "epoch": 1.15, "learning_rate": 4.375271619059559e-05, "loss": 3.9898, "step": 8754000 }, { "epoch": 1.15, "learning_rate": 4.375203353321617e-05, "loss": 3.9641, "step": 8754500 }, { "epoch": 1.15, "learning_rate": 4.375135084386714e-05, "loss": 3.9823, "step": 8755000 }, { "epoch": 1.15, "learning_rate": 4.375066812254966e-05, "loss": 3.973, "step": 8755500 }, { "epoch": 1.15, "learning_rate": 4.374998536926489e-05, "loss": 3.982, "step": 8756000 }, { "epoch": 1.15, "learning_rate": 4.3749302584014015e-05, "loss": 4.0092, "step": 8756500 }, { "epoch": 1.15, "learning_rate": 4.374861976679818e-05, "loss": 4.0116, "step": 8757000 }, { "epoch": 1.15, "learning_rate": 4.374793691761855e-05, "loss": 3.9862, "step": 8757500 }, { "epoch": 1.15, "learning_rate": 4.3747254036476296e-05, "loss": 3.9801, "step": 8758000 }, { "epoch": 1.15, "learning_rate": 4.374657112337257e-05, "loss": 3.9877, "step": 8758500 }, { "epoch": 1.15, "learning_rate": 4.374588817830856e-05, "loss": 3.984, "step": 8759000 }, { "epoch": 1.15, "learning_rate": 4.37452052012854e-05, "loss": 3.9955, "step": 8759500 }, { "epoch": 1.15, "learning_rate": 4.374452219230428e-05, "loss": 3.9707, "step": 8760000 }, { "epoch": 1.15, "learning_rate": 4.374383915136635e-05, "loss": 3.9901, "step": 8760500 }, { "epoch": 1.15, "learning_rate": 4.374315607847279e-05, "loss": 3.9895, "step": 8761000 }, { "epoch": 1.15, "learning_rate": 4.3742472973624745e-05, "loss": 3.9872, "step": 8761500 }, { "epoch": 1.15, "learning_rate": 4.3741789836823387e-05, "loss": 3.9964, "step": 8762000 }, { "epoch": 1.15, "learning_rate": 4.3741106668069885e-05, "loss": 3.9948, "step": 8762500 }, { "epoch": 1.15, "learning_rate": 4.37404234673654e-05, "loss": 4.0069, "step": 8763000 }, { "epoch": 1.15, "learning_rate": 4.37397402347111e-05, "loss": 3.9929, "step": 8763500 }, { "epoch": 1.15, "learning_rate": 4.373905697010814e-05, "loss": 4.0116, "step": 8764000 }, { "epoch": 1.15, "learning_rate": 4.37383736735577e-05, "loss": 4.0026, "step": 8764500 }, { "epoch": 1.15, "learning_rate": 4.3737690345060934e-05, "loss": 3.9992, "step": 8765000 }, { "epoch": 1.15, "learning_rate": 4.3737006984619004e-05, "loss": 3.9944, "step": 8765500 }, { "epoch": 1.15, "learning_rate": 4.373632359223308e-05, "loss": 4.002, "step": 8766000 }, { "epoch": 1.15, "learning_rate": 4.3735640167904344e-05, "loss": 3.999, "step": 8766500 }, { "epoch": 1.15, "learning_rate": 4.373495671163393e-05, "loss": 3.9906, "step": 8767000 }, { "epoch": 1.15, "learning_rate": 4.373427322342303e-05, "loss": 3.9986, "step": 8767500 }, { "epoch": 1.15, "learning_rate": 4.3733589703272795e-05, "loss": 3.9958, "step": 8768000 }, { "epoch": 1.15, "learning_rate": 4.373290615118438e-05, "loss": 4.0039, "step": 8768500 }, { "epoch": 1.15, "learning_rate": 4.373222256715898e-05, "loss": 3.9657, "step": 8769000 }, { "epoch": 1.15, "learning_rate": 4.3731538951197736e-05, "loss": 3.9694, "step": 8769500 }, { "epoch": 1.15, "learning_rate": 4.373085530330182e-05, "loss": 3.9845, "step": 8770000 }, { "epoch": 1.15, "learning_rate": 4.37301716234724e-05, "loss": 4.0024, "step": 8770500 }, { "epoch": 1.15, "learning_rate": 4.372948791171064e-05, "loss": 3.9809, "step": 8771000 }, { "epoch": 1.15, "learning_rate": 4.3728804168017704e-05, "loss": 3.9996, "step": 8771500 }, { "epoch": 1.15, "learning_rate": 4.372812039239477e-05, "loss": 3.9857, "step": 8772000 }, { "epoch": 1.15, "learning_rate": 4.372743658484298e-05, "loss": 4.0082, "step": 8772500 }, { "epoch": 1.15, "learning_rate": 4.372675274536352e-05, "loss": 3.9937, "step": 8773000 }, { "epoch": 1.15, "learning_rate": 4.372606887395755e-05, "loss": 3.9793, "step": 8773500 }, { "epoch": 1.15, "learning_rate": 4.3725384970626224e-05, "loss": 3.955, "step": 8774000 }, { "epoch": 1.15, "learning_rate": 4.3724701035370735e-05, "loss": 3.9812, "step": 8774500 }, { "epoch": 1.15, "learning_rate": 4.372401706819223e-05, "loss": 4.0048, "step": 8775000 }, { "epoch": 1.15, "learning_rate": 4.3723333069091867e-05, "loss": 3.9915, "step": 8775500 }, { "epoch": 1.15, "learning_rate": 4.3722649038070825e-05, "loss": 4.0041, "step": 8776000 }, { "epoch": 1.15, "learning_rate": 4.372196497513028e-05, "loss": 3.9915, "step": 8776500 }, { "epoch": 1.15, "learning_rate": 4.372128088027138e-05, "loss": 3.9839, "step": 8777000 }, { "epoch": 1.15, "learning_rate": 4.3720596753495294e-05, "loss": 3.9959, "step": 8777500 }, { "epoch": 1.15, "learning_rate": 4.37199125948032e-05, "loss": 3.9763, "step": 8778000 }, { "epoch": 1.15, "learning_rate": 4.3719228404196245e-05, "loss": 3.9735, "step": 8778500 }, { "epoch": 1.15, "learning_rate": 4.3718544181675616e-05, "loss": 3.9881, "step": 8779000 }, { "epoch": 1.15, "learning_rate": 4.3717859927242466e-05, "loss": 3.9961, "step": 8779500 }, { "epoch": 1.15, "learning_rate": 4.371717564089797e-05, "loss": 3.991, "step": 8780000 }, { "epoch": 1.15, "learning_rate": 4.3716491322643296e-05, "loss": 3.9952, "step": 8780500 }, { "epoch": 1.15, "learning_rate": 4.371580697247959e-05, "loss": 3.9968, "step": 8781000 }, { "epoch": 1.15, "learning_rate": 4.3715122590408044e-05, "loss": 3.9779, "step": 8781500 }, { "epoch": 1.15, "learning_rate": 4.3714438176429814e-05, "loss": 4.0111, "step": 8782000 }, { "epoch": 1.15, "learning_rate": 4.371375373054607e-05, "loss": 3.9668, "step": 8782500 }, { "epoch": 1.15, "learning_rate": 4.3713069252757974e-05, "loss": 3.9827, "step": 8783000 }, { "epoch": 1.15, "learning_rate": 4.3712384743066695e-05, "loss": 3.9911, "step": 8783500 }, { "epoch": 1.15, "learning_rate": 4.371170020147339e-05, "loss": 4.0026, "step": 8784000 }, { "epoch": 1.15, "learning_rate": 4.371101562797925e-05, "loss": 3.9774, "step": 8784500 }, { "epoch": 1.15, "learning_rate": 4.371033102258543e-05, "loss": 3.9933, "step": 8785000 }, { "epoch": 1.15, "learning_rate": 4.3709646385293094e-05, "loss": 3.983, "step": 8785500 }, { "epoch": 1.15, "learning_rate": 4.37089617161034e-05, "loss": 3.9812, "step": 8786000 }, { "epoch": 1.15, "learning_rate": 4.370827701501754e-05, "loss": 3.9868, "step": 8786500 }, { "epoch": 1.15, "learning_rate": 4.370759228203666e-05, "loss": 3.9999, "step": 8787000 }, { "epoch": 1.15, "learning_rate": 4.3706907517161935e-05, "loss": 4.0006, "step": 8787500 }, { "epoch": 1.15, "learning_rate": 4.370622272039454e-05, "loss": 3.9927, "step": 8788000 }, { "epoch": 1.16, "learning_rate": 4.3705537891735624e-05, "loss": 3.9891, "step": 8788500 }, { "epoch": 1.16, "learning_rate": 4.370485303118638e-05, "loss": 3.9834, "step": 8789000 }, { "epoch": 1.16, "learning_rate": 4.370416813874795e-05, "loss": 3.9751, "step": 8789500 }, { "epoch": 1.16, "learning_rate": 4.370348321442152e-05, "loss": 3.9976, "step": 8790000 }, { "epoch": 1.16, "learning_rate": 4.3702798258208235e-05, "loss": 3.9846, "step": 8790500 }, { "epoch": 1.16, "learning_rate": 4.370211327010929e-05, "loss": 3.9979, "step": 8791000 }, { "epoch": 1.16, "learning_rate": 4.370142825012584e-05, "loss": 3.9775, "step": 8791500 }, { "epoch": 1.16, "learning_rate": 4.370074319825905e-05, "loss": 3.9618, "step": 8792000 }, { "epoch": 1.16, "learning_rate": 4.370005811451009e-05, "loss": 3.9896, "step": 8792500 }, { "epoch": 1.16, "learning_rate": 4.369937299888014e-05, "loss": 3.9812, "step": 8793000 }, { "epoch": 1.16, "learning_rate": 4.3698687851370345e-05, "loss": 3.9854, "step": 8793500 }, { "epoch": 1.16, "learning_rate": 4.36980026719819e-05, "loss": 3.9836, "step": 8794000 }, { "epoch": 1.16, "learning_rate": 4.369731746071595e-05, "loss": 3.9705, "step": 8794500 }, { "epoch": 1.16, "learning_rate": 4.369663221757367e-05, "loss": 3.9991, "step": 8795000 }, { "epoch": 1.16, "learning_rate": 4.369594694255623e-05, "loss": 3.9942, "step": 8795500 }, { "epoch": 1.16, "learning_rate": 4.369526163566481e-05, "loss": 3.9439, "step": 8796000 }, { "epoch": 1.16, "learning_rate": 4.369457629690056e-05, "loss": 3.9858, "step": 8796500 }, { "epoch": 1.16, "learning_rate": 4.3693890926264644e-05, "loss": 3.9832, "step": 8797000 }, { "epoch": 1.16, "learning_rate": 4.369320552375826e-05, "loss": 3.9569, "step": 8797500 }, { "epoch": 1.16, "learning_rate": 4.369252008938255e-05, "loss": 3.9928, "step": 8798000 }, { "epoch": 1.16, "learning_rate": 4.3691834623138694e-05, "loss": 3.9775, "step": 8798500 }, { "epoch": 1.16, "learning_rate": 4.369114912502785e-05, "loss": 3.9751, "step": 8799000 }, { "epoch": 1.16, "learning_rate": 4.36904635950512e-05, "loss": 4.0054, "step": 8799500 }, { "epoch": 1.16, "learning_rate": 4.368977803320991e-05, "loss": 3.9791, "step": 8800000 }, { "epoch": 1.16, "learning_rate": 4.368909243950514e-05, "loss": 3.9819, "step": 8800500 }, { "epoch": 1.16, "learning_rate": 4.368840681393807e-05, "loss": 3.9738, "step": 8801000 }, { "epoch": 1.16, "learning_rate": 4.368772115650986e-05, "loss": 3.9782, "step": 8801500 }, { "epoch": 1.16, "learning_rate": 4.368703546722168e-05, "loss": 3.9597, "step": 8802000 }, { "epoch": 1.16, "learning_rate": 4.368634974607471e-05, "loss": 3.9735, "step": 8802500 }, { "epoch": 1.16, "learning_rate": 4.36856639930701e-05, "loss": 3.9651, "step": 8803000 }, { "epoch": 1.16, "learning_rate": 4.368497820820904e-05, "loss": 3.9907, "step": 8803500 }, { "epoch": 1.16, "learning_rate": 4.3684292391492675e-05, "loss": 3.9911, "step": 8804000 }, { "epoch": 1.16, "learning_rate": 4.36836065429222e-05, "loss": 3.9723, "step": 8804500 }, { "epoch": 1.16, "learning_rate": 4.368292066249877e-05, "loss": 3.9766, "step": 8805000 }, { "epoch": 1.16, "learning_rate": 4.368223475022355e-05, "loss": 3.9776, "step": 8805500 }, { "epoch": 1.16, "learning_rate": 4.3681548806097725e-05, "loss": 3.9873, "step": 8806000 }, { "epoch": 1.16, "learning_rate": 4.3680862830122456e-05, "loss": 3.9805, "step": 8806500 }, { "epoch": 1.16, "learning_rate": 4.3680176822298904e-05, "loss": 3.9859, "step": 8807000 }, { "epoch": 1.16, "learning_rate": 4.3679490782628255e-05, "loss": 3.9824, "step": 8807500 }, { "epoch": 1.16, "learning_rate": 4.367880471111167e-05, "loss": 3.966, "step": 8808000 }, { "epoch": 1.16, "learning_rate": 4.367811860775031e-05, "loss": 3.9599, "step": 8808500 }, { "epoch": 1.16, "learning_rate": 4.367743247254535e-05, "loss": 3.9733, "step": 8809000 }, { "epoch": 1.16, "learning_rate": 4.367674630549797e-05, "loss": 4.004, "step": 8809500 }, { "epoch": 1.16, "learning_rate": 4.367606010660934e-05, "loss": 3.984, "step": 8810000 }, { "epoch": 1.16, "learning_rate": 4.367537387588062e-05, "loss": 3.963, "step": 8810500 }, { "epoch": 1.16, "learning_rate": 4.367468761331298e-05, "loss": 3.9955, "step": 8811000 }, { "epoch": 1.16, "learning_rate": 4.3674001318907584e-05, "loss": 3.9743, "step": 8811500 }, { "epoch": 1.16, "learning_rate": 4.3673314992665625e-05, "loss": 3.9611, "step": 8812000 }, { "epoch": 1.16, "learning_rate": 4.367262863458825e-05, "loss": 3.9688, "step": 8812500 }, { "epoch": 1.16, "learning_rate": 4.367194224467665e-05, "loss": 3.9778, "step": 8813000 }, { "epoch": 1.16, "learning_rate": 4.3671255822931966e-05, "loss": 3.9867, "step": 8813500 }, { "epoch": 1.16, "learning_rate": 4.3670569369355395e-05, "loss": 3.9822, "step": 8814000 }, { "epoch": 1.16, "learning_rate": 4.36698828839481e-05, "loss": 3.9733, "step": 8814500 }, { "epoch": 1.16, "learning_rate": 4.366919636671125e-05, "loss": 3.9774, "step": 8815000 }, { "epoch": 1.16, "learning_rate": 4.366850981764601e-05, "loss": 3.9765, "step": 8815500 }, { "epoch": 1.16, "learning_rate": 4.3667823236753554e-05, "loss": 3.9911, "step": 8816000 }, { "epoch": 1.16, "learning_rate": 4.366713662403506e-05, "loss": 3.9703, "step": 8816500 }, { "epoch": 1.16, "learning_rate": 4.366644997949169e-05, "loss": 4.0031, "step": 8817000 }, { "epoch": 1.16, "learning_rate": 4.366576330312461e-05, "loss": 3.9796, "step": 8817500 }, { "epoch": 1.16, "learning_rate": 4.3665076594935e-05, "loss": 3.994, "step": 8818000 }, { "epoch": 1.16, "learning_rate": 4.366438985492403e-05, "loss": 3.9878, "step": 8818500 }, { "epoch": 1.16, "learning_rate": 4.366370308309287e-05, "loss": 3.9897, "step": 8819000 }, { "epoch": 1.16, "learning_rate": 4.366301627944269e-05, "loss": 3.9655, "step": 8819500 }, { "epoch": 1.16, "learning_rate": 4.366232944397467e-05, "loss": 3.962, "step": 8820000 }, { "epoch": 1.16, "learning_rate": 4.366164257668995e-05, "loss": 3.9616, "step": 8820500 }, { "epoch": 1.16, "learning_rate": 4.3660955677589743e-05, "loss": 3.979, "step": 8821000 }, { "epoch": 1.16, "learning_rate": 4.3660268746675195e-05, "loss": 3.9704, "step": 8821500 }, { "epoch": 1.16, "learning_rate": 4.365958178394747e-05, "loss": 3.9854, "step": 8822000 }, { "epoch": 1.16, "learning_rate": 4.3658894789407764e-05, "loss": 3.9767, "step": 8822500 }, { "epoch": 1.16, "learning_rate": 4.3658207763057234e-05, "loss": 3.9855, "step": 8823000 }, { "epoch": 1.16, "learning_rate": 4.3657520704897046e-05, "loss": 3.9705, "step": 8823500 }, { "epoch": 1.16, "learning_rate": 4.3656833614928385e-05, "loss": 3.9826, "step": 8824000 }, { "epoch": 1.16, "learning_rate": 4.365614649315242e-05, "loss": 3.9881, "step": 8824500 }, { "epoch": 1.16, "learning_rate": 4.36554593395703e-05, "loss": 3.9796, "step": 8825000 }, { "epoch": 1.16, "learning_rate": 4.365477215418323e-05, "loss": 3.9841, "step": 8825500 }, { "epoch": 1.16, "learning_rate": 4.3654084936992355e-05, "loss": 3.9962, "step": 8826000 }, { "epoch": 1.16, "learning_rate": 4.365339768799886e-05, "loss": 3.9631, "step": 8826500 }, { "epoch": 1.16, "learning_rate": 4.365271040720393e-05, "loss": 3.993, "step": 8827000 }, { "epoch": 1.16, "learning_rate": 4.3652023094608706e-05, "loss": 3.9702, "step": 8827500 }, { "epoch": 1.16, "learning_rate": 4.365133575021438e-05, "loss": 3.9829, "step": 8828000 }, { "epoch": 1.16, "learning_rate": 4.365064837402211e-05, "loss": 3.9893, "step": 8828500 }, { "epoch": 1.16, "learning_rate": 4.364996096603309e-05, "loss": 3.9657, "step": 8829000 }, { "epoch": 1.16, "learning_rate": 4.364927352624847e-05, "loss": 3.9631, "step": 8829500 }, { "epoch": 1.16, "learning_rate": 4.364858605466943e-05, "loss": 3.9845, "step": 8830000 }, { "epoch": 1.16, "learning_rate": 4.364789855129714e-05, "loss": 3.9666, "step": 8830500 }, { "epoch": 1.16, "learning_rate": 4.364721101613278e-05, "loss": 3.9656, "step": 8831000 }, { "epoch": 1.16, "learning_rate": 4.364652344917751e-05, "loss": 3.9801, "step": 8831500 }, { "epoch": 1.16, "learning_rate": 4.3645835850432515e-05, "loss": 3.955, "step": 8832000 }, { "epoch": 1.16, "learning_rate": 4.364514821989896e-05, "loss": 3.9932, "step": 8832500 }, { "epoch": 1.16, "learning_rate": 4.364446055757802e-05, "loss": 3.9635, "step": 8833000 }, { "epoch": 1.16, "learning_rate": 4.364377286347087e-05, "loss": 3.9885, "step": 8833500 }, { "epoch": 1.16, "learning_rate": 4.3643085137578664e-05, "loss": 3.9721, "step": 8834000 }, { "epoch": 1.16, "learning_rate": 4.36423973799026e-05, "loss": 3.985, "step": 8834500 }, { "epoch": 1.16, "learning_rate": 4.3641709590443835e-05, "loss": 3.9629, "step": 8835000 }, { "epoch": 1.16, "learning_rate": 4.3641021769203544e-05, "loss": 3.9926, "step": 8835500 }, { "epoch": 1.16, "learning_rate": 4.364033391618291e-05, "loss": 3.9852, "step": 8836000 }, { "epoch": 1.16, "learning_rate": 4.3639646031383086e-05, "loss": 3.9813, "step": 8836500 }, { "epoch": 1.16, "learning_rate": 4.363895811480526e-05, "loss": 4.0005, "step": 8837000 }, { "epoch": 1.16, "learning_rate": 4.3638270166450604e-05, "loss": 3.9866, "step": 8837500 }, { "epoch": 1.16, "learning_rate": 4.363758218632028e-05, "loss": 3.9807, "step": 8838000 }, { "epoch": 1.16, "learning_rate": 4.3636894174415474e-05, "loss": 3.9925, "step": 8838500 }, { "epoch": 1.16, "learning_rate": 4.363620613073736e-05, "loss": 3.9815, "step": 8839000 }, { "epoch": 1.16, "learning_rate": 4.3635518055287096e-05, "loss": 3.9753, "step": 8839500 }, { "epoch": 1.16, "learning_rate": 4.3634829948065855e-05, "loss": 3.9819, "step": 8840000 }, { "epoch": 1.16, "learning_rate": 4.3634141809074834e-05, "loss": 4.002, "step": 8840500 }, { "epoch": 1.16, "learning_rate": 4.3633453638315186e-05, "loss": 4.0025, "step": 8841000 }, { "epoch": 1.16, "learning_rate": 4.3632765435788095e-05, "loss": 3.9618, "step": 8841500 }, { "epoch": 1.16, "learning_rate": 4.3632077201494714e-05, "loss": 3.9659, "step": 8842000 }, { "epoch": 1.16, "learning_rate": 4.3631388935436235e-05, "loss": 3.9667, "step": 8842500 }, { "epoch": 1.16, "learning_rate": 4.363070063761384e-05, "loss": 3.9722, "step": 8843000 }, { "epoch": 1.16, "learning_rate": 4.363001230802868e-05, "loss": 3.9916, "step": 8843500 }, { "epoch": 1.16, "learning_rate": 4.362932394668193e-05, "loss": 3.9761, "step": 8844000 }, { "epoch": 1.16, "learning_rate": 4.3628635553574785e-05, "loss": 3.9866, "step": 8844500 }, { "epoch": 1.16, "learning_rate": 4.3627947128708396e-05, "loss": 3.9617, "step": 8845000 }, { "epoch": 1.16, "learning_rate": 4.362725867208395e-05, "loss": 3.96, "step": 8845500 }, { "epoch": 1.16, "learning_rate": 4.362657018370262e-05, "loss": 3.9752, "step": 8846000 }, { "epoch": 1.16, "learning_rate": 4.362588166356557e-05, "loss": 3.9768, "step": 8846500 }, { "epoch": 1.16, "learning_rate": 4.362519311167398e-05, "loss": 3.9839, "step": 8847000 }, { "epoch": 1.16, "learning_rate": 4.362450452802903e-05, "loss": 3.9709, "step": 8847500 }, { "epoch": 1.16, "learning_rate": 4.362381591263188e-05, "loss": 3.9695, "step": 8848000 }, { "epoch": 1.16, "learning_rate": 4.362312726548372e-05, "loss": 3.9869, "step": 8848500 }, { "epoch": 1.16, "learning_rate": 4.362243858658571e-05, "loss": 3.9913, "step": 8849000 }, { "epoch": 1.16, "learning_rate": 4.362174987593904e-05, "loss": 3.9628, "step": 8849500 }, { "epoch": 1.16, "learning_rate": 4.362106113354486e-05, "loss": 3.9752, "step": 8850000 }, { "epoch": 1.16, "learning_rate": 4.362037235940437e-05, "loss": 3.9783, "step": 8850500 }, { "epoch": 1.16, "learning_rate": 4.361968355351873e-05, "loss": 3.9836, "step": 8851000 }, { "epoch": 1.16, "learning_rate": 4.3618994715889113e-05, "loss": 3.9718, "step": 8851500 }, { "epoch": 1.16, "learning_rate": 4.361830584651671e-05, "loss": 3.9662, "step": 8852000 }, { "epoch": 1.16, "learning_rate": 4.361761694540266e-05, "loss": 3.9695, "step": 8852500 }, { "epoch": 1.16, "learning_rate": 4.361692801254817e-05, "loss": 3.9687, "step": 8853000 }, { "epoch": 1.16, "learning_rate": 4.361623904795442e-05, "loss": 3.9638, "step": 8853500 }, { "epoch": 1.16, "learning_rate": 4.361555005162256e-05, "loss": 3.9711, "step": 8854000 }, { "epoch": 1.16, "learning_rate": 4.3614861023553765e-05, "loss": 3.9791, "step": 8854500 }, { "epoch": 1.16, "learning_rate": 4.361417196374923e-05, "loss": 3.9677, "step": 8855000 }, { "epoch": 1.16, "learning_rate": 4.3613482872210116e-05, "loss": 3.9713, "step": 8855500 }, { "epoch": 1.16, "learning_rate": 4.36127937489376e-05, "loss": 3.9674, "step": 8856000 }, { "epoch": 1.16, "learning_rate": 4.3612104593932855e-05, "loss": 3.963, "step": 8856500 }, { "epoch": 1.16, "learning_rate": 4.361141540719706e-05, "loss": 3.9747, "step": 8857000 }, { "epoch": 1.16, "learning_rate": 4.3610726188731385e-05, "loss": 3.9753, "step": 8857500 }, { "epoch": 1.16, "learning_rate": 4.361003693853701e-05, "loss": 3.9716, "step": 8858000 }, { "epoch": 1.16, "learning_rate": 4.3609347656615114e-05, "loss": 3.9815, "step": 8858500 }, { "epoch": 1.16, "learning_rate": 4.360865834296686e-05, "loss": 3.983, "step": 8859000 }, { "epoch": 1.16, "learning_rate": 4.360796899759343e-05, "loss": 3.9798, "step": 8859500 }, { "epoch": 1.16, "learning_rate": 4.3607279620496e-05, "loss": 3.9629, "step": 8860000 }, { "epoch": 1.16, "learning_rate": 4.3606590211675745e-05, "loss": 3.9812, "step": 8860500 }, { "epoch": 1.16, "learning_rate": 4.360590077113384e-05, "loss": 3.9728, "step": 8861000 }, { "epoch": 1.16, "learning_rate": 4.360521129887146e-05, "loss": 3.9837, "step": 8861500 }, { "epoch": 1.16, "learning_rate": 4.360452179488977e-05, "loss": 3.9733, "step": 8862000 }, { "epoch": 1.16, "learning_rate": 4.360383225918997e-05, "loss": 3.9826, "step": 8862500 }, { "epoch": 1.16, "learning_rate": 4.3603142691773215e-05, "loss": 3.972, "step": 8863000 }, { "epoch": 1.16, "learning_rate": 4.360245309264069e-05, "loss": 3.9882, "step": 8863500 }, { "epoch": 1.16, "learning_rate": 4.3601763461793566e-05, "loss": 3.9738, "step": 8864000 }, { "epoch": 1.17, "learning_rate": 4.360107379923302e-05, "loss": 3.9905, "step": 8864500 }, { "epoch": 1.17, "learning_rate": 4.360038410496022e-05, "loss": 3.9671, "step": 8865000 }, { "epoch": 1.17, "learning_rate": 4.359969437897636e-05, "loss": 3.9516, "step": 8865500 }, { "epoch": 1.17, "learning_rate": 4.3599004621282605e-05, "loss": 3.961, "step": 8866000 }, { "epoch": 1.17, "learning_rate": 4.3598314831880135e-05, "loss": 3.9736, "step": 8866500 }, { "epoch": 1.17, "learning_rate": 4.359762501077012e-05, "loss": 3.975, "step": 8867000 }, { "epoch": 1.17, "learning_rate": 4.359693515795373e-05, "loss": 3.9954, "step": 8867500 }, { "epoch": 1.17, "learning_rate": 4.359624527343216e-05, "loss": 3.9642, "step": 8868000 }, { "epoch": 1.17, "learning_rate": 4.359555535720658e-05, "loss": 3.9994, "step": 8868500 }, { "epoch": 1.17, "learning_rate": 4.359486540927815e-05, "loss": 3.9743, "step": 8869000 }, { "epoch": 1.17, "learning_rate": 4.3594175429648066e-05, "loss": 3.9743, "step": 8869500 }, { "epoch": 1.17, "learning_rate": 4.3593485418317495e-05, "loss": 3.963, "step": 8870000 }, { "epoch": 1.17, "learning_rate": 4.3592795375287624e-05, "loss": 3.9547, "step": 8870500 }, { "epoch": 1.17, "learning_rate": 4.3592105300559605e-05, "loss": 3.9598, "step": 8871000 }, { "epoch": 1.17, "learning_rate": 4.359141519413464e-05, "loss": 3.9632, "step": 8871500 }, { "epoch": 1.17, "learning_rate": 4.359072505601389e-05, "loss": 3.9643, "step": 8872000 }, { "epoch": 1.17, "learning_rate": 4.359003488619854e-05, "loss": 3.9799, "step": 8872500 }, { "epoch": 1.17, "learning_rate": 4.358934468468977e-05, "loss": 3.9834, "step": 8873000 }, { "epoch": 1.17, "learning_rate": 4.358865445148874e-05, "loss": 3.9793, "step": 8873500 }, { "epoch": 1.17, "learning_rate": 4.358796418659664e-05, "loss": 3.9793, "step": 8874000 }, { "epoch": 1.17, "learning_rate": 4.358727389001465e-05, "loss": 3.9753, "step": 8874500 }, { "epoch": 1.17, "learning_rate": 4.3586583561743943e-05, "loss": 3.9742, "step": 8875000 }, { "epoch": 1.17, "learning_rate": 4.358589320178569e-05, "loss": 3.9893, "step": 8875500 }, { "epoch": 1.17, "learning_rate": 4.358520281014107e-05, "loss": 3.965, "step": 8876000 }, { "epoch": 1.17, "learning_rate": 4.358451238681126e-05, "loss": 3.9826, "step": 8876500 }, { "epoch": 1.17, "learning_rate": 4.3583821931797444e-05, "loss": 3.9782, "step": 8877000 }, { "epoch": 1.17, "learning_rate": 4.358313144510079e-05, "loss": 3.9601, "step": 8877500 }, { "epoch": 1.17, "learning_rate": 4.358244092672248e-05, "loss": 3.9732, "step": 8878000 }, { "epoch": 1.17, "learning_rate": 4.358175037666369e-05, "loss": 3.9816, "step": 8878500 }, { "epoch": 1.17, "learning_rate": 4.3581059794925604e-05, "loss": 3.9743, "step": 8879000 }, { "epoch": 1.17, "learning_rate": 4.358036918150939e-05, "loss": 3.9781, "step": 8879500 }, { "epoch": 1.17, "learning_rate": 4.357967853641623e-05, "loss": 3.9845, "step": 8880000 }, { "epoch": 1.17, "learning_rate": 4.35789878596473e-05, "loss": 4.0015, "step": 8880500 }, { "epoch": 1.17, "learning_rate": 4.357829715120377e-05, "loss": 3.9742, "step": 8881000 }, { "epoch": 1.17, "learning_rate": 4.357760641108684e-05, "loss": 3.9849, "step": 8881500 }, { "epoch": 1.17, "learning_rate": 4.357691563929766e-05, "loss": 3.9849, "step": 8882000 }, { "epoch": 1.17, "learning_rate": 4.357622483583742e-05, "loss": 3.9689, "step": 8882500 }, { "epoch": 1.17, "learning_rate": 4.357553400070731e-05, "loss": 3.9774, "step": 8883000 }, { "epoch": 1.17, "learning_rate": 4.357484313390848e-05, "loss": 3.9658, "step": 8883500 }, { "epoch": 1.17, "learning_rate": 4.3574152235442134e-05, "loss": 3.9914, "step": 8884000 }, { "epoch": 1.17, "learning_rate": 4.357346130530944e-05, "loss": 3.9615, "step": 8884500 }, { "epoch": 1.17, "learning_rate": 4.357277034351158e-05, "loss": 3.9903, "step": 8885000 }, { "epoch": 1.17, "learning_rate": 4.357207935004971e-05, "loss": 3.9728, "step": 8885500 }, { "epoch": 1.17, "learning_rate": 4.3571388324925045e-05, "loss": 3.9613, "step": 8886000 }, { "epoch": 1.17, "learning_rate": 4.357069726813873e-05, "loss": 3.9884, "step": 8886500 }, { "epoch": 1.17, "learning_rate": 4.357000617969197e-05, "loss": 4.0045, "step": 8887000 }, { "epoch": 1.17, "learning_rate": 4.356931505958592e-05, "loss": 3.986, "step": 8887500 }, { "epoch": 1.17, "learning_rate": 4.356862390782177e-05, "loss": 3.9841, "step": 8888000 }, { "epoch": 1.17, "learning_rate": 4.35679327244007e-05, "loss": 3.9844, "step": 8888500 }, { "epoch": 1.17, "learning_rate": 4.356724150932388e-05, "loss": 3.9724, "step": 8889000 }, { "epoch": 1.17, "learning_rate": 4.35665502625925e-05, "loss": 3.9942, "step": 8889500 }, { "epoch": 1.17, "learning_rate": 4.3565858984207725e-05, "loss": 3.9739, "step": 8890000 }, { "epoch": 1.17, "learning_rate": 4.3565167674170745e-05, "loss": 3.9846, "step": 8890500 }, { "epoch": 1.17, "learning_rate": 4.356447633248273e-05, "loss": 3.9652, "step": 8891000 }, { "epoch": 1.17, "learning_rate": 4.3563784959144874e-05, "loss": 3.9686, "step": 8891500 }, { "epoch": 1.17, "learning_rate": 4.3563093554158326e-05, "loss": 3.9505, "step": 8892000 }, { "epoch": 1.17, "learning_rate": 4.35624021175243e-05, "loss": 3.9775, "step": 8892500 }, { "epoch": 1.17, "learning_rate": 4.3561710649243945e-05, "loss": 3.9721, "step": 8893000 }, { "epoch": 1.17, "learning_rate": 4.356101914931846e-05, "loss": 3.9753, "step": 8893500 }, { "epoch": 1.17, "learning_rate": 4.356032761774902e-05, "loss": 3.9861, "step": 8894000 }, { "epoch": 1.17, "learning_rate": 4.35596360545368e-05, "loss": 3.9781, "step": 8894500 }, { "epoch": 1.17, "learning_rate": 4.355894445968297e-05, "loss": 3.9682, "step": 8895000 }, { "epoch": 1.17, "learning_rate": 4.355825283318873e-05, "loss": 3.9857, "step": 8895500 }, { "epoch": 1.17, "learning_rate": 4.355756117505525e-05, "loss": 3.9674, "step": 8896000 }, { "epoch": 1.17, "learning_rate": 4.355686948528369e-05, "loss": 3.9741, "step": 8896500 }, { "epoch": 1.17, "learning_rate": 4.355617776387526e-05, "loss": 3.9511, "step": 8897000 }, { "epoch": 1.17, "learning_rate": 4.355548601083113e-05, "loss": 3.9672, "step": 8897500 }, { "epoch": 1.17, "learning_rate": 4.355479422615246e-05, "loss": 3.9739, "step": 8898000 }, { "epoch": 1.17, "learning_rate": 4.355410240984045e-05, "loss": 3.973, "step": 8898500 }, { "epoch": 1.17, "learning_rate": 4.3553410561896276e-05, "loss": 3.9719, "step": 8899000 }, { "epoch": 1.17, "learning_rate": 4.355271868232112e-05, "loss": 3.9621, "step": 8899500 }, { "epoch": 1.17, "learning_rate": 4.355202677111615e-05, "loss": 3.9671, "step": 8900000 }, { "epoch": 1.17, "learning_rate": 4.3551334828282556e-05, "loss": 3.9597, "step": 8900500 }, { "epoch": 1.17, "learning_rate": 4.355064285382151e-05, "loss": 3.9588, "step": 8901000 }, { "epoch": 1.17, "learning_rate": 4.35499508477342e-05, "loss": 3.9554, "step": 8901500 }, { "epoch": 1.17, "learning_rate": 4.35492588100218e-05, "loss": 3.9634, "step": 8902000 }, { "epoch": 1.17, "learning_rate": 4.354856674068549e-05, "loss": 3.9576, "step": 8902500 }, { "epoch": 1.17, "learning_rate": 4.354787463972646e-05, "loss": 3.9708, "step": 8903000 }, { "epoch": 1.17, "learning_rate": 4.354718250714587e-05, "loss": 3.9644, "step": 8903500 }, { "epoch": 1.17, "learning_rate": 4.354649034294492e-05, "loss": 3.9703, "step": 8904000 }, { "epoch": 1.17, "learning_rate": 4.354579814712477e-05, "loss": 3.9626, "step": 8904500 }, { "epoch": 1.17, "learning_rate": 4.354510591968663e-05, "loss": 3.9612, "step": 8905000 }, { "epoch": 1.17, "learning_rate": 4.354441366063164e-05, "loss": 3.9538, "step": 8905500 }, { "epoch": 1.17, "learning_rate": 4.354372136996102e-05, "loss": 3.9645, "step": 8906000 }, { "epoch": 1.17, "learning_rate": 4.354302904767592e-05, "loss": 3.9486, "step": 8906500 }, { "epoch": 1.17, "learning_rate": 4.354233669377754e-05, "loss": 3.9762, "step": 8907000 }, { "epoch": 1.17, "learning_rate": 4.354164430826705e-05, "loss": 3.9599, "step": 8907500 }, { "epoch": 1.17, "learning_rate": 4.354095189114563e-05, "loss": 3.977, "step": 8908000 }, { "epoch": 1.17, "learning_rate": 4.3540259442414465e-05, "loss": 3.9743, "step": 8908500 }, { "epoch": 1.17, "learning_rate": 4.353956696207474e-05, "loss": 3.9719, "step": 8909000 }, { "epoch": 1.17, "learning_rate": 4.353887445012762e-05, "loss": 3.9622, "step": 8909500 }, { "epoch": 1.17, "learning_rate": 4.3538181906574306e-05, "loss": 3.9691, "step": 8910000 }, { "epoch": 1.17, "learning_rate": 4.3537489331415957e-05, "loss": 3.9587, "step": 8910500 }, { "epoch": 1.17, "learning_rate": 4.353679672465377e-05, "loss": 3.9726, "step": 8911000 }, { "epoch": 1.17, "learning_rate": 4.353610408628893e-05, "loss": 3.9606, "step": 8911500 }, { "epoch": 1.17, "learning_rate": 4.3535411416322596e-05, "loss": 3.9888, "step": 8912000 }, { "epoch": 1.17, "learning_rate": 4.353471871475597e-05, "loss": 3.9918, "step": 8912500 }, { "epoch": 1.17, "learning_rate": 4.353402598159022e-05, "loss": 3.9631, "step": 8913000 }, { "epoch": 1.17, "learning_rate": 4.3533333216826525e-05, "loss": 3.9815, "step": 8913500 }, { "epoch": 1.17, "learning_rate": 4.353264042046608e-05, "loss": 3.9622, "step": 8914000 }, { "epoch": 1.17, "learning_rate": 4.353194759251006e-05, "loss": 3.9668, "step": 8914500 }, { "epoch": 1.17, "learning_rate": 4.3531254732959634e-05, "loss": 3.9899, "step": 8915000 }, { "epoch": 1.17, "learning_rate": 4.353056184181601e-05, "loss": 3.9776, "step": 8915500 }, { "epoch": 1.17, "learning_rate": 4.352986891908034e-05, "loss": 3.975, "step": 8916000 }, { "epoch": 1.17, "learning_rate": 4.3529175964753825e-05, "loss": 3.9698, "step": 8916500 }, { "epoch": 1.17, "learning_rate": 4.3528482978837636e-05, "loss": 3.9711, "step": 8917000 }, { "epoch": 1.17, "learning_rate": 4.3527789961332966e-05, "loss": 3.9685, "step": 8917500 }, { "epoch": 1.17, "learning_rate": 4.352709691224098e-05, "loss": 3.9764, "step": 8918000 }, { "epoch": 1.17, "learning_rate": 4.352640383156288e-05, "loss": 3.9707, "step": 8918500 }, { "epoch": 1.17, "learning_rate": 4.3525710719299826e-05, "loss": 3.9657, "step": 8919000 }, { "epoch": 1.17, "learning_rate": 4.352501757545301e-05, "loss": 3.9564, "step": 8919500 }, { "epoch": 1.17, "learning_rate": 4.352432440002361e-05, "loss": 3.9643, "step": 8920000 }, { "epoch": 1.17, "learning_rate": 4.3523631193012824e-05, "loss": 3.9633, "step": 8920500 }, { "epoch": 1.17, "learning_rate": 4.3522937954421815e-05, "loss": 3.9494, "step": 8921000 }, { "epoch": 1.17, "learning_rate": 4.352224468425177e-05, "loss": 3.9863, "step": 8921500 }, { "epoch": 1.17, "learning_rate": 4.352155138250387e-05, "loss": 3.9632, "step": 8922000 }, { "epoch": 1.17, "learning_rate": 4.3520858049179306e-05, "loss": 3.9863, "step": 8922500 }, { "epoch": 1.17, "learning_rate": 4.352016468427924e-05, "loss": 3.976, "step": 8923000 }, { "epoch": 1.17, "learning_rate": 4.351947128780488e-05, "loss": 3.9792, "step": 8923500 }, { "epoch": 1.17, "learning_rate": 4.3518777859757386e-05, "loss": 3.9583, "step": 8924000 }, { "epoch": 1.17, "learning_rate": 4.3518084400137946e-05, "loss": 3.9747, "step": 8924500 }, { "epoch": 1.17, "learning_rate": 4.351739090894776e-05, "loss": 3.9598, "step": 8925000 }, { "epoch": 1.17, "learning_rate": 4.351669738618799e-05, "loss": 3.9739, "step": 8925500 }, { "epoch": 1.17, "learning_rate": 4.351600383185982e-05, "loss": 3.9735, "step": 8926000 }, { "epoch": 1.17, "learning_rate": 4.3515310245964437e-05, "loss": 3.9753, "step": 8926500 }, { "epoch": 1.17, "learning_rate": 4.351461662850302e-05, "loss": 3.9512, "step": 8927000 }, { "epoch": 1.17, "learning_rate": 4.3513922979476764e-05, "loss": 3.9847, "step": 8927500 }, { "epoch": 1.17, "learning_rate": 4.351322929888684e-05, "loss": 3.9586, "step": 8928000 }, { "epoch": 1.17, "learning_rate": 4.351253558673443e-05, "loss": 3.954, "step": 8928500 }, { "epoch": 1.17, "learning_rate": 4.3511841843020726e-05, "loss": 3.9708, "step": 8929000 }, { "epoch": 1.17, "learning_rate": 4.351114806774689e-05, "loss": 3.9578, "step": 8929500 }, { "epoch": 1.17, "learning_rate": 4.351045426091414e-05, "loss": 3.9643, "step": 8930000 }, { "epoch": 1.17, "learning_rate": 4.3509760422523614e-05, "loss": 3.9497, "step": 8930500 }, { "epoch": 1.17, "learning_rate": 4.350906655257653e-05, "loss": 3.9612, "step": 8931000 }, { "epoch": 1.17, "learning_rate": 4.3508372651074065e-05, "loss": 3.967, "step": 8931500 }, { "epoch": 1.17, "learning_rate": 4.3507678718017385e-05, "loss": 3.9602, "step": 8932000 }, { "epoch": 1.17, "learning_rate": 4.35069847534077e-05, "loss": 3.9886, "step": 8932500 }, { "epoch": 1.17, "learning_rate": 4.3506290757246174e-05, "loss": 3.9642, "step": 8933000 }, { "epoch": 1.17, "learning_rate": 4.350559672953398e-05, "loss": 3.9782, "step": 8933500 }, { "epoch": 1.17, "learning_rate": 4.350490267027233e-05, "loss": 3.9624, "step": 8934000 }, { "epoch": 1.17, "learning_rate": 4.350420857946238e-05, "loss": 3.9866, "step": 8934500 }, { "epoch": 1.17, "learning_rate": 4.350351445710534e-05, "loss": 3.9691, "step": 8935000 }, { "epoch": 1.17, "learning_rate": 4.350282030320237e-05, "loss": 3.9791, "step": 8935500 }, { "epoch": 1.17, "learning_rate": 4.3502126117754665e-05, "loss": 3.9886, "step": 8936000 }, { "epoch": 1.17, "learning_rate": 4.3501431900763406e-05, "loss": 3.9391, "step": 8936500 }, { "epoch": 1.17, "learning_rate": 4.350073765222978e-05, "loss": 3.9858, "step": 8937000 }, { "epoch": 1.17, "learning_rate": 4.350004337215496e-05, "loss": 3.9818, "step": 8937500 }, { "epoch": 1.17, "learning_rate": 4.349934906054015e-05, "loss": 3.9752, "step": 8938000 }, { "epoch": 1.17, "learning_rate": 4.34986547173865e-05, "loss": 3.956, "step": 8938500 }, { "epoch": 1.17, "learning_rate": 4.349796034269523e-05, "loss": 3.9607, "step": 8939000 }, { "epoch": 1.17, "learning_rate": 4.3497265936467516e-05, "loss": 3.9986, "step": 8939500 }, { "epoch": 1.17, "learning_rate": 4.349657149870452e-05, "loss": 3.9432, "step": 8940000 }, { "epoch": 1.18, "learning_rate": 4.349587702940744e-05, "loss": 3.9663, "step": 8940500 }, { "epoch": 1.18, "learning_rate": 4.349518252857746e-05, "loss": 3.9601, "step": 8941000 }, { "epoch": 1.18, "learning_rate": 4.349448799621577e-05, "loss": 3.9578, "step": 8941500 }, { "epoch": 1.18, "learning_rate": 4.3493793432323544e-05, "loss": 3.9768, "step": 8942000 }, { "epoch": 1.18, "learning_rate": 4.3493098836901966e-05, "loss": 3.9669, "step": 8942500 }, { "epoch": 1.18, "learning_rate": 4.3492404209952235e-05, "loss": 3.9858, "step": 8943000 }, { "epoch": 1.18, "learning_rate": 4.3491709551475525e-05, "loss": 3.9753, "step": 8943500 }, { "epoch": 1.18, "learning_rate": 4.3491014861473015e-05, "loss": 3.9732, "step": 8944000 }, { "epoch": 1.18, "learning_rate": 4.3490320139945897e-05, "loss": 3.9916, "step": 8944500 }, { "epoch": 1.18, "learning_rate": 4.348962538689534e-05, "loss": 3.9589, "step": 8945000 }, { "epoch": 1.18, "learning_rate": 4.348893060232255e-05, "loss": 3.9634, "step": 8945500 }, { "epoch": 1.18, "learning_rate": 4.348823578622871e-05, "loss": 3.9864, "step": 8946000 }, { "epoch": 1.18, "learning_rate": 4.348754093861499e-05, "loss": 3.966, "step": 8946500 }, { "epoch": 1.18, "learning_rate": 4.348684605948258e-05, "loss": 3.974, "step": 8947000 }, { "epoch": 1.18, "learning_rate": 4.348615114883268e-05, "loss": 3.9907, "step": 8947500 }, { "epoch": 1.18, "learning_rate": 4.348545620666644e-05, "loss": 3.9787, "step": 8948000 }, { "epoch": 1.18, "learning_rate": 4.348476123298508e-05, "loss": 3.9798, "step": 8948500 }, { "epoch": 1.18, "learning_rate": 4.348406622778977e-05, "loss": 3.9661, "step": 8949000 }, { "epoch": 1.18, "learning_rate": 4.3483371191081693e-05, "loss": 3.979, "step": 8949500 }, { "epoch": 1.18, "learning_rate": 4.3482676122862045e-05, "loss": 3.9508, "step": 8950000 }, { "epoch": 1.18, "learning_rate": 4.348198102313199e-05, "loss": 3.9547, "step": 8950500 }, { "epoch": 1.18, "learning_rate": 4.348128589189273e-05, "loss": 3.9501, "step": 8951000 }, { "epoch": 1.18, "learning_rate": 4.348059072914545e-05, "loss": 3.9715, "step": 8951500 }, { "epoch": 1.18, "learning_rate": 4.347989553489132e-05, "loss": 3.9733, "step": 8952000 }, { "epoch": 1.18, "learning_rate": 4.3479200309131554e-05, "loss": 3.9656, "step": 8952500 }, { "epoch": 1.18, "learning_rate": 4.3478505051867304e-05, "loss": 3.9963, "step": 8953000 }, { "epoch": 1.18, "learning_rate": 4.347780976309978e-05, "loss": 3.9636, "step": 8953500 }, { "epoch": 1.18, "learning_rate": 4.3477114442830155e-05, "loss": 3.9606, "step": 8954000 }, { "epoch": 1.18, "learning_rate": 4.3476419091059614e-05, "loss": 3.9635, "step": 8954500 }, { "epoch": 1.18, "learning_rate": 4.347572370778935e-05, "loss": 3.9744, "step": 8955000 }, { "epoch": 1.18, "learning_rate": 4.347502829302055e-05, "loss": 3.9725, "step": 8955500 }, { "epoch": 1.18, "learning_rate": 4.347433284675439e-05, "loss": 3.979, "step": 8956000 }, { "epoch": 1.18, "learning_rate": 4.347363736899206e-05, "loss": 3.9792, "step": 8956500 }, { "epoch": 1.18, "learning_rate": 4.347294185973474e-05, "loss": 3.973, "step": 8957000 }, { "epoch": 1.18, "learning_rate": 4.347224631898362e-05, "loss": 3.9863, "step": 8957500 }, { "epoch": 1.18, "learning_rate": 4.34715507467399e-05, "loss": 3.9802, "step": 8958000 }, { "epoch": 1.18, "learning_rate": 4.347085514300474e-05, "loss": 3.986, "step": 8958500 }, { "epoch": 1.18, "learning_rate": 4.347015950777935e-05, "loss": 3.9551, "step": 8959000 }, { "epoch": 1.18, "learning_rate": 4.3469463841064895e-05, "loss": 3.9546, "step": 8959500 }, { "epoch": 1.18, "learning_rate": 4.346876814286258e-05, "loss": 3.9652, "step": 8960000 }, { "epoch": 1.18, "learning_rate": 4.346807241317358e-05, "loss": 3.9436, "step": 8960500 }, { "epoch": 1.18, "learning_rate": 4.346737665199907e-05, "loss": 3.9657, "step": 8961000 }, { "epoch": 1.18, "learning_rate": 4.346668085934026e-05, "loss": 3.988, "step": 8961500 }, { "epoch": 1.18, "learning_rate": 4.346598503519833e-05, "loss": 3.9757, "step": 8962000 }, { "epoch": 1.18, "learning_rate": 4.3465289179574456e-05, "loss": 3.964, "step": 8962500 }, { "epoch": 1.18, "learning_rate": 4.346459329246983e-05, "loss": 3.9442, "step": 8963000 }, { "epoch": 1.18, "learning_rate": 4.3463897373885636e-05, "loss": 3.982, "step": 8963500 }, { "epoch": 1.18, "learning_rate": 4.346320142382306e-05, "loss": 3.9631, "step": 8964000 }, { "epoch": 1.18, "learning_rate": 4.346250544228331e-05, "loss": 3.9816, "step": 8964500 }, { "epoch": 1.18, "learning_rate": 4.346180942926754e-05, "loss": 3.9713, "step": 8965000 }, { "epoch": 1.18, "learning_rate": 4.346111338477695e-05, "loss": 3.9701, "step": 8965500 }, { "epoch": 1.18, "learning_rate": 4.346041730881273e-05, "loss": 3.9667, "step": 8966000 }, { "epoch": 1.18, "learning_rate": 4.3459721201376056e-05, "loss": 3.9714, "step": 8966500 }, { "epoch": 1.18, "learning_rate": 4.345902506246813e-05, "loss": 3.9519, "step": 8967000 }, { "epoch": 1.18, "learning_rate": 4.345832889209014e-05, "loss": 3.9761, "step": 8967500 }, { "epoch": 1.18, "learning_rate": 4.3457632690243255e-05, "loss": 3.9818, "step": 8968000 }, { "epoch": 1.18, "learning_rate": 4.345693645692866e-05, "loss": 3.9833, "step": 8968500 }, { "epoch": 1.18, "learning_rate": 4.345624019214757e-05, "loss": 3.9729, "step": 8969000 }, { "epoch": 1.18, "learning_rate": 4.345554389590115e-05, "loss": 3.9698, "step": 8969500 }, { "epoch": 1.18, "learning_rate": 4.3454847568190593e-05, "loss": 3.9694, "step": 8970000 }, { "epoch": 1.18, "learning_rate": 4.345415120901709e-05, "loss": 3.9654, "step": 8970500 }, { "epoch": 1.18, "learning_rate": 4.345345481838182e-05, "loss": 3.9703, "step": 8971000 }, { "epoch": 1.18, "learning_rate": 4.345275839628597e-05, "loss": 3.9665, "step": 8971500 }, { "epoch": 1.18, "learning_rate": 4.345206194273074e-05, "loss": 3.9791, "step": 8972000 }, { "epoch": 1.18, "learning_rate": 4.34513654577173e-05, "loss": 3.9571, "step": 8972500 }, { "epoch": 1.18, "learning_rate": 4.345066894124685e-05, "loss": 3.9856, "step": 8973000 }, { "epoch": 1.18, "learning_rate": 4.344997239332057e-05, "loss": 3.9573, "step": 8973500 }, { "epoch": 1.18, "learning_rate": 4.344927581393966e-05, "loss": 3.9717, "step": 8974000 }, { "epoch": 1.18, "learning_rate": 4.3448579203105286e-05, "loss": 3.9784, "step": 8974500 }, { "epoch": 1.18, "learning_rate": 4.3447882560818656e-05, "loss": 3.9853, "step": 8975000 }, { "epoch": 1.18, "learning_rate": 4.3447185887080955e-05, "loss": 3.9801, "step": 8975500 }, { "epoch": 1.18, "learning_rate": 4.3446489181893354e-05, "loss": 3.9862, "step": 8976000 }, { "epoch": 1.18, "learning_rate": 4.344579244525706e-05, "loss": 3.9632, "step": 8976500 }, { "epoch": 1.18, "learning_rate": 4.344509567717325e-05, "loss": 3.9781, "step": 8977000 }, { "epoch": 1.18, "learning_rate": 4.3444398877643114e-05, "loss": 3.9771, "step": 8977500 }, { "epoch": 1.18, "learning_rate": 4.344370204666785e-05, "loss": 3.9705, "step": 8978000 }, { "epoch": 1.18, "learning_rate": 4.3443005184248624e-05, "loss": 3.9742, "step": 8978500 }, { "epoch": 1.18, "learning_rate": 4.344230829038665e-05, "loss": 3.9838, "step": 8979000 }, { "epoch": 1.18, "learning_rate": 4.344161136508309e-05, "loss": 3.9771, "step": 8979500 }, { "epoch": 1.18, "learning_rate": 4.344091440833915e-05, "loss": 3.9625, "step": 8980000 }, { "epoch": 1.18, "learning_rate": 4.344021742015602e-05, "loss": 3.9786, "step": 8980500 }, { "epoch": 1.18, "learning_rate": 4.343952040053488e-05, "loss": 3.9827, "step": 8981000 }, { "epoch": 1.18, "learning_rate": 4.343882334947692e-05, "loss": 3.9505, "step": 8981500 }, { "epoch": 1.18, "learning_rate": 4.3438126266983326e-05, "loss": 3.9583, "step": 8982000 }, { "epoch": 1.18, "learning_rate": 4.343742915305529e-05, "loss": 4.0005, "step": 8982500 }, { "epoch": 1.18, "learning_rate": 4.3436732007694e-05, "loss": 3.9588, "step": 8983000 }, { "epoch": 1.18, "learning_rate": 4.343603483090064e-05, "loss": 3.9804, "step": 8983500 }, { "epoch": 1.18, "learning_rate": 4.343533762267641e-05, "loss": 3.9768, "step": 8984000 }, { "epoch": 1.18, "learning_rate": 4.343464038302249e-05, "loss": 3.9567, "step": 8984500 }, { "epoch": 1.18, "learning_rate": 4.343394311194007e-05, "loss": 3.9488, "step": 8985000 }, { "epoch": 1.18, "learning_rate": 4.3433245809430335e-05, "loss": 3.9793, "step": 8985500 }, { "epoch": 1.18, "learning_rate": 4.3432548475494474e-05, "loss": 3.9613, "step": 8986000 }, { "epoch": 1.18, "learning_rate": 4.343185111013369e-05, "loss": 3.9753, "step": 8986500 }, { "epoch": 1.18, "learning_rate": 4.343115371334915e-05, "loss": 3.9699, "step": 8987000 }, { "epoch": 1.18, "learning_rate": 4.3430456285142064e-05, "loss": 3.9551, "step": 8987500 }, { "epoch": 1.18, "learning_rate": 4.342975882551361e-05, "loss": 3.9688, "step": 8988000 }, { "epoch": 1.18, "learning_rate": 4.342906133446497e-05, "loss": 3.9638, "step": 8988500 }, { "epoch": 1.18, "learning_rate": 4.3428363811997345e-05, "loss": 3.9863, "step": 8989000 }, { "epoch": 1.18, "learning_rate": 4.342766625811193e-05, "loss": 3.9623, "step": 8989500 }, { "epoch": 1.18, "learning_rate": 4.342696867280989e-05, "loss": 3.9846, "step": 8990000 }, { "epoch": 1.18, "learning_rate": 4.3426271056092435e-05, "loss": 3.9685, "step": 8990500 }, { "epoch": 1.18, "learning_rate": 4.3425573407960753e-05, "loss": 3.9957, "step": 8991000 }, { "epoch": 1.18, "learning_rate": 4.342487572841603e-05, "loss": 3.9803, "step": 8991500 }, { "epoch": 1.18, "learning_rate": 4.3424178017459446e-05, "loss": 3.9788, "step": 8992000 }, { "epoch": 1.18, "learning_rate": 4.342348027509221e-05, "loss": 3.9492, "step": 8992500 }, { "epoch": 1.18, "learning_rate": 4.342278250131549e-05, "loss": 3.973, "step": 8993000 }, { "epoch": 1.18, "learning_rate": 4.342208469613049e-05, "loss": 3.9676, "step": 8993500 }, { "epoch": 1.18, "learning_rate": 4.34213868595384e-05, "loss": 3.95, "step": 8994000 }, { "epoch": 1.18, "learning_rate": 4.34206889915404e-05, "loss": 3.9799, "step": 8994500 }, { "epoch": 1.18, "learning_rate": 4.3419991092137686e-05, "loss": 3.9864, "step": 8995000 }, { "epoch": 1.18, "learning_rate": 4.3419293161331445e-05, "loss": 3.9616, "step": 8995500 }, { "epoch": 1.18, "learning_rate": 4.341859519912288e-05, "loss": 3.9717, "step": 8996000 }, { "epoch": 1.18, "learning_rate": 4.341789720551316e-05, "loss": 3.9756, "step": 8996500 }, { "epoch": 1.18, "learning_rate": 4.3417199180503486e-05, "loss": 3.967, "step": 8997000 }, { "epoch": 1.18, "learning_rate": 4.3416501124095046e-05, "loss": 3.9544, "step": 8997500 }, { "epoch": 1.18, "learning_rate": 4.3415803036289026e-05, "loss": 3.9672, "step": 8998000 }, { "epoch": 1.18, "learning_rate": 4.341510491708663e-05, "loss": 3.9603, "step": 8998500 }, { "epoch": 1.18, "learning_rate": 4.341440676648904e-05, "loss": 3.953, "step": 8999000 }, { "epoch": 1.18, "learning_rate": 4.341370858449744e-05, "loss": 3.9824, "step": 8999500 }, { "epoch": 1.18, "learning_rate": 4.3413010371113025e-05, "loss": 3.967, "step": 9000000 }, { "epoch": 1.18, "learning_rate": 4.341231212633699e-05, "loss": 3.9881, "step": 9000500 }, { "epoch": 1.18, "learning_rate": 4.341161385017052e-05, "loss": 3.9702, "step": 9001000 }, { "epoch": 1.18, "learning_rate": 4.341091554261481e-05, "loss": 3.9699, "step": 9001500 }, { "epoch": 1.18, "learning_rate": 4.3410217203671044e-05, "loss": 3.9595, "step": 9002000 }, { "epoch": 1.18, "learning_rate": 4.340951883334041e-05, "loss": 3.9766, "step": 9002500 }, { "epoch": 1.18, "learning_rate": 4.340882043162412e-05, "loss": 3.9614, "step": 9003000 }, { "epoch": 1.18, "learning_rate": 4.3408121998523346e-05, "loss": 3.9778, "step": 9003500 }, { "epoch": 1.18, "learning_rate": 4.3407423534039274e-05, "loss": 3.9756, "step": 9004000 }, { "epoch": 1.18, "learning_rate": 4.34067250381731e-05, "loss": 3.967, "step": 9004500 }, { "epoch": 1.18, "learning_rate": 4.340602651092603e-05, "loss": 3.9663, "step": 9005000 }, { "epoch": 1.18, "learning_rate": 4.340532795229924e-05, "loss": 3.9644, "step": 9005500 }, { "epoch": 1.18, "learning_rate": 4.3404629362293915e-05, "loss": 3.9721, "step": 9006000 }, { "epoch": 1.18, "learning_rate": 4.340393074091126e-05, "loss": 3.9815, "step": 9006500 }, { "epoch": 1.18, "learning_rate": 4.340323208815246e-05, "loss": 3.9796, "step": 9007000 }, { "epoch": 1.18, "learning_rate": 4.340253340401871e-05, "loss": 3.9718, "step": 9007500 }, { "epoch": 1.18, "learning_rate": 4.3401834688511193e-05, "loss": 3.981, "step": 9008000 }, { "epoch": 1.18, "learning_rate": 4.340113594163111e-05, "loss": 3.9767, "step": 9008500 }, { "epoch": 1.18, "learning_rate": 4.3400437163379645e-05, "loss": 3.9709, "step": 9009000 }, { "epoch": 1.18, "learning_rate": 4.339973835375799e-05, "loss": 3.9731, "step": 9009500 }, { "epoch": 1.18, "learning_rate": 4.339903951276734e-05, "loss": 3.9811, "step": 9010000 }, { "epoch": 1.18, "learning_rate": 4.3398340640408886e-05, "loss": 3.9597, "step": 9010500 }, { "epoch": 1.18, "learning_rate": 4.339764173668382e-05, "loss": 3.9607, "step": 9011000 }, { "epoch": 1.18, "learning_rate": 4.339694280159332e-05, "loss": 3.9662, "step": 9011500 }, { "epoch": 1.18, "learning_rate": 4.3396243835138606e-05, "loss": 3.9974, "step": 9012000 }, { "epoch": 1.18, "learning_rate": 4.339554483732084e-05, "loss": 3.9634, "step": 9012500 }, { "epoch": 1.18, "learning_rate": 4.339484580814123e-05, "loss": 3.9776, "step": 9013000 }, { "epoch": 1.18, "learning_rate": 4.339414674760096e-05, "loss": 3.9767, "step": 9013500 }, { "epoch": 1.18, "learning_rate": 4.3393447655701234e-05, "loss": 3.9933, "step": 9014000 }, { "epoch": 1.18, "learning_rate": 4.339274853244324e-05, "loss": 3.9749, "step": 9014500 }, { "epoch": 1.18, "learning_rate": 4.3392049377828155e-05, "loss": 3.975, "step": 9015000 }, { "epoch": 1.18, "learning_rate": 4.339135019185718e-05, "loss": 3.9587, "step": 9015500 }, { "epoch": 1.18, "learning_rate": 4.339065097453152e-05, "loss": 3.9604, "step": 9016000 }, { "epoch": 1.19, "learning_rate": 4.338995172585235e-05, "loss": 3.9593, "step": 9016500 }, { "epoch": 1.19, "learning_rate": 4.338925244582087e-05, "loss": 3.9665, "step": 9017000 }, { "epoch": 1.19, "learning_rate": 4.338855313443826e-05, "loss": 3.9513, "step": 9017500 }, { "epoch": 1.19, "learning_rate": 4.3387853791705734e-05, "loss": 3.9707, "step": 9018000 }, { "epoch": 1.19, "learning_rate": 4.338715441762446e-05, "loss": 3.969, "step": 9018500 }, { "epoch": 1.19, "learning_rate": 4.338645501219566e-05, "loss": 3.9697, "step": 9019000 }, { "epoch": 1.19, "learning_rate": 4.338575557542051e-05, "loss": 3.9701, "step": 9019500 }, { "epoch": 1.19, "learning_rate": 4.338505610730018e-05, "loss": 3.9543, "step": 9020000 }, { "epoch": 1.19, "learning_rate": 4.33843566078359e-05, "loss": 3.9865, "step": 9020500 }, { "epoch": 1.19, "learning_rate": 4.338365707702885e-05, "loss": 3.9698, "step": 9021000 }, { "epoch": 1.19, "learning_rate": 4.338295751488021e-05, "loss": 3.9519, "step": 9021500 }, { "epoch": 1.19, "learning_rate": 4.338225792139119e-05, "loss": 3.9951, "step": 9022000 }, { "epoch": 1.19, "learning_rate": 4.338155829656297e-05, "loss": 3.982, "step": 9022500 }, { "epoch": 1.19, "learning_rate": 4.338085864039675e-05, "loss": 3.9833, "step": 9023000 }, { "epoch": 1.19, "learning_rate": 4.3380158952893716e-05, "loss": 3.9612, "step": 9023500 }, { "epoch": 1.19, "learning_rate": 4.3379459234055066e-05, "loss": 3.9538, "step": 9024000 }, { "epoch": 1.19, "learning_rate": 4.337875948388199e-05, "loss": 3.9771, "step": 9024500 }, { "epoch": 1.19, "learning_rate": 4.33780597023757e-05, "loss": 4.0031, "step": 9025000 }, { "epoch": 1.19, "learning_rate": 4.337735988953735e-05, "loss": 3.9898, "step": 9025500 }, { "epoch": 1.19, "learning_rate": 4.337666004536817e-05, "loss": 3.968, "step": 9026000 }, { "epoch": 1.19, "learning_rate": 4.337596016986933e-05, "loss": 3.9847, "step": 9026500 }, { "epoch": 1.19, "learning_rate": 4.337526026304203e-05, "loss": 3.951, "step": 9027000 }, { "epoch": 1.19, "learning_rate": 4.337456032488747e-05, "loss": 3.9811, "step": 9027500 }, { "epoch": 1.19, "learning_rate": 4.3373860355406845e-05, "loss": 3.9718, "step": 9028000 }, { "epoch": 1.19, "learning_rate": 4.337316035460133e-05, "loss": 3.9716, "step": 9028500 }, { "epoch": 1.19, "learning_rate": 4.3372460322472134e-05, "loss": 3.9692, "step": 9029000 }, { "epoch": 1.19, "learning_rate": 4.337176025902045e-05, "loss": 3.9518, "step": 9029500 }, { "epoch": 1.19, "learning_rate": 4.337106016424746e-05, "loss": 3.9888, "step": 9030000 }, { "epoch": 1.19, "learning_rate": 4.3370360038154367e-05, "loss": 3.9712, "step": 9030500 }, { "epoch": 1.19, "learning_rate": 4.336965988074237e-05, "loss": 3.9773, "step": 9031000 }, { "epoch": 1.19, "learning_rate": 4.3368959692012656e-05, "loss": 3.973, "step": 9031500 }, { "epoch": 1.19, "learning_rate": 4.336825947196641e-05, "loss": 3.974, "step": 9032000 }, { "epoch": 1.19, "learning_rate": 4.3367559220604834e-05, "loss": 3.9751, "step": 9032500 }, { "epoch": 1.19, "learning_rate": 4.3366858937929135e-05, "loss": 3.9829, "step": 9033000 }, { "epoch": 1.19, "learning_rate": 4.336615862394048e-05, "loss": 3.9792, "step": 9033500 }, { "epoch": 1.19, "learning_rate": 4.336545827864009e-05, "loss": 3.9783, "step": 9034000 }, { "epoch": 1.19, "learning_rate": 4.3364757902029134e-05, "loss": 3.9426, "step": 9034500 }, { "epoch": 1.19, "learning_rate": 4.3364057494108826e-05, "loss": 3.9671, "step": 9035000 }, { "epoch": 1.19, "learning_rate": 4.336335705488034e-05, "loss": 3.983, "step": 9035500 }, { "epoch": 1.19, "learning_rate": 4.336265658434489e-05, "loss": 3.9726, "step": 9036000 }, { "epoch": 1.19, "learning_rate": 4.336195608250366e-05, "loss": 3.9801, "step": 9036500 }, { "epoch": 1.19, "learning_rate": 4.336125554935785e-05, "loss": 3.948, "step": 9037000 }, { "epoch": 1.19, "learning_rate": 4.3360554984908654e-05, "loss": 3.9775, "step": 9037500 }, { "epoch": 1.19, "learning_rate": 4.335985438915726e-05, "loss": 3.9859, "step": 9038000 }, { "epoch": 1.19, "learning_rate": 4.3359153762104865e-05, "loss": 3.9682, "step": 9038500 }, { "epoch": 1.19, "learning_rate": 4.335845310375267e-05, "loss": 3.9701, "step": 9039000 }, { "epoch": 1.19, "learning_rate": 4.335775241410186e-05, "loss": 3.9877, "step": 9039500 }, { "epoch": 1.19, "learning_rate": 4.335705169315363e-05, "loss": 3.9645, "step": 9040000 }, { "epoch": 1.19, "learning_rate": 4.3356350940909175e-05, "loss": 3.9663, "step": 9040500 }, { "epoch": 1.19, "learning_rate": 4.33556501573697e-05, "loss": 3.9625, "step": 9041000 }, { "epoch": 1.19, "learning_rate": 4.335494934253639e-05, "loss": 3.9811, "step": 9041500 }, { "epoch": 1.19, "learning_rate": 4.3354248496410446e-05, "loss": 3.9798, "step": 9042000 }, { "epoch": 1.19, "learning_rate": 4.335354761899305e-05, "loss": 3.9878, "step": 9042500 }, { "epoch": 1.19, "learning_rate": 4.335284671028541e-05, "loss": 3.9768, "step": 9043000 }, { "epoch": 1.19, "learning_rate": 4.3352145770288724e-05, "loss": 3.9648, "step": 9043500 }, { "epoch": 1.19, "learning_rate": 4.3351444799004173e-05, "loss": 3.9618, "step": 9044000 }, { "epoch": 1.19, "learning_rate": 4.335074379643295e-05, "loss": 3.974, "step": 9044500 }, { "epoch": 1.19, "learning_rate": 4.335004276257627e-05, "loss": 3.9839, "step": 9045000 }, { "epoch": 1.19, "learning_rate": 4.334934169743532e-05, "loss": 3.9732, "step": 9045500 }, { "epoch": 1.19, "learning_rate": 4.334864060101129e-05, "loss": 3.9876, "step": 9046000 }, { "epoch": 1.19, "learning_rate": 4.3347939473305374e-05, "loss": 3.9863, "step": 9046500 }, { "epoch": 1.19, "learning_rate": 4.334723831431877e-05, "loss": 3.9844, "step": 9047000 }, { "epoch": 1.19, "learning_rate": 4.3346537124052675e-05, "loss": 3.9874, "step": 9047500 }, { "epoch": 1.19, "learning_rate": 4.334583590250829e-05, "loss": 3.9627, "step": 9048000 }, { "epoch": 1.19, "learning_rate": 4.33451346496868e-05, "loss": 3.9805, "step": 9048500 }, { "epoch": 1.19, "learning_rate": 4.33444333655894e-05, "loss": 3.9829, "step": 9049000 }, { "epoch": 1.19, "learning_rate": 4.33437320502173e-05, "loss": 3.9865, "step": 9049500 }, { "epoch": 1.19, "learning_rate": 4.334303070357168e-05, "loss": 3.9818, "step": 9050000 }, { "epoch": 1.19, "learning_rate": 4.334232932565374e-05, "loss": 3.9908, "step": 9050500 }, { "epoch": 1.19, "learning_rate": 4.3341627916464684e-05, "loss": 3.9739, "step": 9051000 }, { "epoch": 1.19, "learning_rate": 4.33409264760057e-05, "loss": 3.9612, "step": 9051500 }, { "epoch": 1.19, "learning_rate": 4.334022500427798e-05, "loss": 3.9794, "step": 9052000 }, { "epoch": 1.19, "learning_rate": 4.333952350128273e-05, "loss": 3.9562, "step": 9052500 }, { "epoch": 1.19, "learning_rate": 4.333882196702114e-05, "loss": 3.9606, "step": 9053000 }, { "epoch": 1.19, "learning_rate": 4.333812040149441e-05, "loss": 3.9693, "step": 9053500 }, { "epoch": 1.19, "learning_rate": 4.333741880470372e-05, "loss": 3.9632, "step": 9054000 }, { "epoch": 1.19, "learning_rate": 4.3336717176650294e-05, "loss": 3.9613, "step": 9054500 }, { "epoch": 1.19, "learning_rate": 4.333601551733531e-05, "loss": 3.9568, "step": 9055000 }, { "epoch": 1.19, "learning_rate": 4.3335313826759966e-05, "loss": 3.9743, "step": 9055500 }, { "epoch": 1.19, "learning_rate": 4.333461210492546e-05, "loss": 3.9686, "step": 9056000 }, { "epoch": 1.19, "learning_rate": 4.3333910351832986e-05, "loss": 3.967, "step": 9056500 }, { "epoch": 1.19, "learning_rate": 4.3333208567483745e-05, "loss": 3.9606, "step": 9057000 }, { "epoch": 1.19, "learning_rate": 4.3332506751878935e-05, "loss": 3.9725, "step": 9057500 }, { "epoch": 1.19, "learning_rate": 4.333180490501975e-05, "loss": 3.9484, "step": 9058000 }, { "epoch": 1.19, "learning_rate": 4.3331103026907375e-05, "loss": 3.979, "step": 9058500 }, { "epoch": 1.19, "learning_rate": 4.333040111754302e-05, "loss": 3.9683, "step": 9059000 }, { "epoch": 1.19, "learning_rate": 4.3329699176927886e-05, "loss": 3.98, "step": 9059500 }, { "epoch": 1.19, "learning_rate": 4.332899720506315e-05, "loss": 3.9852, "step": 9060000 }, { "epoch": 1.19, "learning_rate": 4.332829520195003e-05, "loss": 3.973, "step": 9060500 }, { "epoch": 1.19, "learning_rate": 4.3327593167589706e-05, "loss": 3.9794, "step": 9061000 }, { "epoch": 1.19, "learning_rate": 4.332689110198339e-05, "loss": 3.9818, "step": 9061500 }, { "epoch": 1.19, "learning_rate": 4.332618900513227e-05, "loss": 3.971, "step": 9062000 }, { "epoch": 1.19, "learning_rate": 4.332548687703754e-05, "loss": 3.9754, "step": 9062500 }, { "epoch": 1.19, "learning_rate": 4.3324784717700413e-05, "loss": 3.9911, "step": 9063000 }, { "epoch": 1.19, "learning_rate": 4.3324082527122066e-05, "loss": 4.0017, "step": 9063500 }, { "epoch": 1.19, "learning_rate": 4.3323380305303706e-05, "loss": 3.9617, "step": 9064000 }, { "epoch": 1.19, "learning_rate": 4.332267805224653e-05, "loss": 3.9561, "step": 9064500 }, { "epoch": 1.19, "learning_rate": 4.332197576795174e-05, "loss": 3.9788, "step": 9065000 }, { "epoch": 1.19, "learning_rate": 4.332127345242052e-05, "loss": 3.9695, "step": 9065500 }, { "epoch": 1.19, "learning_rate": 4.332057110565407e-05, "loss": 3.9738, "step": 9066000 }, { "epoch": 1.19, "learning_rate": 4.33198687276536e-05, "loss": 3.9693, "step": 9066500 }, { "epoch": 1.19, "learning_rate": 4.3319166318420295e-05, "loss": 3.9857, "step": 9067000 }, { "epoch": 1.19, "learning_rate": 4.331846387795536e-05, "loss": 4.002, "step": 9067500 }, { "epoch": 1.19, "learning_rate": 4.3317761406259994e-05, "loss": 3.9611, "step": 9068000 }, { "epoch": 1.19, "learning_rate": 4.331705890333538e-05, "loss": 3.9572, "step": 9068500 }, { "epoch": 1.19, "learning_rate": 4.331635636918273e-05, "loss": 3.9573, "step": 9069000 }, { "epoch": 1.19, "learning_rate": 4.3315653803803246e-05, "loss": 3.967, "step": 9069500 }, { "epoch": 1.19, "learning_rate": 4.331495120719811e-05, "loss": 3.9734, "step": 9070000 }, { "epoch": 1.19, "learning_rate": 4.3314248579368534e-05, "loss": 3.9732, "step": 9070500 }, { "epoch": 1.19, "learning_rate": 4.3313545920315705e-05, "loss": 3.9731, "step": 9071000 }, { "epoch": 1.19, "learning_rate": 4.331284323004082e-05, "loss": 3.9621, "step": 9071500 }, { "epoch": 1.19, "learning_rate": 4.331214050854509e-05, "loss": 3.9736, "step": 9072000 }, { "epoch": 1.19, "learning_rate": 4.33114377558297e-05, "loss": 3.9723, "step": 9072500 }, { "epoch": 1.19, "learning_rate": 4.3310734971895854e-05, "loss": 3.9612, "step": 9073000 }, { "epoch": 1.19, "learning_rate": 4.331003215674475e-05, "loss": 3.9764, "step": 9073500 }, { "epoch": 1.19, "learning_rate": 4.3309329310377586e-05, "loss": 3.9721, "step": 9074000 }, { "epoch": 1.19, "learning_rate": 4.330862643279556e-05, "loss": 3.9691, "step": 9074500 }, { "epoch": 1.19, "learning_rate": 4.330792352399987e-05, "loss": 3.961, "step": 9075000 }, { "epoch": 1.19, "learning_rate": 4.330722058399172e-05, "loss": 3.9779, "step": 9075500 }, { "epoch": 1.19, "learning_rate": 4.33065176127723e-05, "loss": 3.9806, "step": 9076000 }, { "epoch": 1.19, "learning_rate": 4.330581461034281e-05, "loss": 3.984, "step": 9076500 }, { "epoch": 1.19, "learning_rate": 4.330511157670445e-05, "loss": 3.9462, "step": 9077000 }, { "epoch": 1.19, "learning_rate": 4.330440851185842e-05, "loss": 3.9583, "step": 9077500 }, { "epoch": 1.19, "learning_rate": 4.3303705415805916e-05, "loss": 3.9734, "step": 9078000 }, { "epoch": 1.19, "learning_rate": 4.330300228854813e-05, "loss": 3.9807, "step": 9078500 }, { "epoch": 1.19, "learning_rate": 4.330229913008628e-05, "loss": 3.9806, "step": 9079000 }, { "epoch": 1.19, "learning_rate": 4.3301595940421554e-05, "loss": 3.9663, "step": 9079500 }, { "epoch": 1.19, "learning_rate": 4.330089271955515e-05, "loss": 3.9794, "step": 9080000 }, { "epoch": 1.19, "learning_rate": 4.3300189467488264e-05, "loss": 3.9936, "step": 9080500 }, { "epoch": 1.19, "learning_rate": 4.329948618422209e-05, "loss": 3.967, "step": 9081000 }, { "epoch": 1.19, "learning_rate": 4.329878286975785e-05, "loss": 3.9635, "step": 9081500 }, { "epoch": 1.19, "learning_rate": 4.329807952409672e-05, "loss": 3.9502, "step": 9082000 }, { "epoch": 1.19, "learning_rate": 4.329737614723991e-05, "loss": 3.9615, "step": 9082500 }, { "epoch": 1.19, "learning_rate": 4.3296672739188616e-05, "loss": 3.9731, "step": 9083000 }, { "epoch": 1.19, "learning_rate": 4.3295969299944025e-05, "loss": 3.9835, "step": 9083500 }, { "epoch": 1.19, "learning_rate": 4.329526582950737e-05, "loss": 3.9787, "step": 9084000 }, { "epoch": 1.19, "learning_rate": 4.329456232787982e-05, "loss": 3.9643, "step": 9084500 }, { "epoch": 1.19, "learning_rate": 4.329385879506258e-05, "loss": 3.9526, "step": 9085000 }, { "epoch": 1.19, "learning_rate": 4.329315523105686e-05, "loss": 3.9561, "step": 9085500 }, { "epoch": 1.19, "learning_rate": 4.329245163586385e-05, "loss": 3.9601, "step": 9086000 }, { "epoch": 1.19, "learning_rate": 4.3291748009484745e-05, "loss": 3.9834, "step": 9086500 }, { "epoch": 1.19, "learning_rate": 4.329104435192076e-05, "loss": 3.9706, "step": 9087000 }, { "epoch": 1.19, "learning_rate": 4.3290340663173086e-05, "loss": 3.9964, "step": 9087500 }, { "epoch": 1.19, "learning_rate": 4.328963694324292e-05, "loss": 3.9685, "step": 9088000 }, { "epoch": 1.19, "learning_rate": 4.328893319213146e-05, "loss": 3.9632, "step": 9088500 }, { "epoch": 1.19, "learning_rate": 4.328822940983992e-05, "loss": 3.9826, "step": 9089000 }, { "epoch": 1.19, "learning_rate": 4.328752559636949e-05, "loss": 3.9623, "step": 9089500 }, { "epoch": 1.19, "learning_rate": 4.328682175172136e-05, "loss": 3.9613, "step": 9090000 }, { "epoch": 1.19, "learning_rate": 4.3286117875896746e-05, "loss": 3.9816, "step": 9090500 }, { "epoch": 1.19, "learning_rate": 4.328541396889685e-05, "loss": 3.9773, "step": 9091000 }, { "epoch": 1.19, "learning_rate": 4.328471003072285e-05, "loss": 3.9649, "step": 9091500 }, { "epoch": 1.19, "learning_rate": 4.328400606137598e-05, "loss": 3.9704, "step": 9092000 }, { "epoch": 1.19, "learning_rate": 4.32833020608574e-05, "loss": 3.9635, "step": 9092500 }, { "epoch": 1.2, "learning_rate": 4.3282598029168344e-05, "loss": 3.9675, "step": 9093000 }, { "epoch": 1.2, "learning_rate": 4.3281893966309996e-05, "loss": 3.9683, "step": 9093500 }, { "epoch": 1.2, "learning_rate": 4.328118987228355e-05, "loss": 3.9838, "step": 9094000 }, { "epoch": 1.2, "learning_rate": 4.328048574709023e-05, "loss": 3.971, "step": 9094500 }, { "epoch": 1.2, "learning_rate": 4.327978159073122e-05, "loss": 3.9468, "step": 9095000 }, { "epoch": 1.2, "learning_rate": 4.3279077403207714e-05, "loss": 3.9714, "step": 9095500 }, { "epoch": 1.2, "learning_rate": 4.3278373184520925e-05, "loss": 3.9664, "step": 9096000 }, { "epoch": 1.2, "learning_rate": 4.327766893467206e-05, "loss": 3.985, "step": 9096500 }, { "epoch": 1.2, "learning_rate": 4.3276964653662285e-05, "loss": 3.9486, "step": 9097000 }, { "epoch": 1.2, "learning_rate": 4.327626034149285e-05, "loss": 3.9552, "step": 9097500 }, { "epoch": 1.2, "learning_rate": 4.3275555998164916e-05, "loss": 3.96, "step": 9098000 }, { "epoch": 1.2, "learning_rate": 4.3274851623679704e-05, "loss": 3.9547, "step": 9098500 }, { "epoch": 1.2, "learning_rate": 4.327414721803841e-05, "loss": 3.9798, "step": 9099000 }, { "epoch": 1.2, "learning_rate": 4.327344278124224e-05, "loss": 3.9605, "step": 9099500 }, { "epoch": 1.2, "learning_rate": 4.327273831329238e-05, "loss": 3.9824, "step": 9100000 }, { "epoch": 1.2, "learning_rate": 4.327203381419005e-05, "loss": 3.9864, "step": 9100500 }, { "epoch": 1.2, "learning_rate": 4.3271329283936435e-05, "loss": 3.9456, "step": 9101000 }, { "epoch": 1.2, "learning_rate": 4.3270624722532736e-05, "loss": 3.98, "step": 9101500 }, { "epoch": 1.2, "learning_rate": 4.326992012998018e-05, "loss": 3.9774, "step": 9102000 }, { "epoch": 1.2, "learning_rate": 4.326921550627993e-05, "loss": 3.9559, "step": 9102500 }, { "epoch": 1.2, "learning_rate": 4.326851085143322e-05, "loss": 3.9866, "step": 9103000 }, { "epoch": 1.2, "learning_rate": 4.3267806165441224e-05, "loss": 3.9772, "step": 9103500 }, { "epoch": 1.2, "learning_rate": 4.326710144830517e-05, "loss": 3.9581, "step": 9104000 }, { "epoch": 1.2, "learning_rate": 4.326639670002624e-05, "loss": 3.9718, "step": 9104500 }, { "epoch": 1.2, "learning_rate": 4.326569192060564e-05, "loss": 4.0073, "step": 9105000 }, { "epoch": 1.2, "learning_rate": 4.326498711004459e-05, "loss": 3.976, "step": 9105500 }, { "epoch": 1.2, "learning_rate": 4.3264282268344255e-05, "loss": 3.9654, "step": 9106000 }, { "epoch": 1.2, "learning_rate": 4.326357739550587e-05, "loss": 3.9626, "step": 9106500 }, { "epoch": 1.2, "learning_rate": 4.3262872491530616e-05, "loss": 3.9624, "step": 9107000 }, { "epoch": 1.2, "learning_rate": 4.326216755641971e-05, "loss": 3.9572, "step": 9107500 }, { "epoch": 1.2, "learning_rate": 4.326146259017434e-05, "loss": 3.9782, "step": 9108000 }, { "epoch": 1.2, "learning_rate": 4.326075759279571e-05, "loss": 3.977, "step": 9108500 }, { "epoch": 1.2, "learning_rate": 4.326005256428503e-05, "loss": 3.9786, "step": 9109000 }, { "epoch": 1.2, "learning_rate": 4.3259347504643497e-05, "loss": 3.9986, "step": 9109500 }, { "epoch": 1.2, "learning_rate": 4.325864241387232e-05, "loss": 3.9518, "step": 9110000 }, { "epoch": 1.2, "learning_rate": 4.3257937291972697e-05, "loss": 3.9597, "step": 9110500 }, { "epoch": 1.2, "learning_rate": 4.325723213894581e-05, "loss": 3.9803, "step": 9111000 }, { "epoch": 1.2, "learning_rate": 4.32565269547929e-05, "loss": 3.9495, "step": 9111500 }, { "epoch": 1.2, "learning_rate": 4.3255821739515136e-05, "loss": 3.9468, "step": 9112000 }, { "epoch": 1.2, "learning_rate": 4.3255116493113745e-05, "loss": 3.9807, "step": 9112500 }, { "epoch": 1.2, "learning_rate": 4.325441121558991e-05, "loss": 3.9674, "step": 9113000 }, { "epoch": 1.2, "learning_rate": 4.325370590694484e-05, "loss": 3.9462, "step": 9113500 }, { "epoch": 1.2, "learning_rate": 4.3253000567179734e-05, "loss": 3.9872, "step": 9114000 }, { "epoch": 1.2, "learning_rate": 4.3252295196295805e-05, "loss": 3.9698, "step": 9114500 }, { "epoch": 1.2, "learning_rate": 4.3251589794294244e-05, "loss": 3.9811, "step": 9115000 }, { "epoch": 1.2, "learning_rate": 4.3250884361176264e-05, "loss": 3.9603, "step": 9115500 }, { "epoch": 1.2, "learning_rate": 4.3250178896943064e-05, "loss": 3.9738, "step": 9116000 }, { "epoch": 1.2, "learning_rate": 4.324947340159584e-05, "loss": 3.9509, "step": 9116500 }, { "epoch": 1.2, "learning_rate": 4.32487678751358e-05, "loss": 3.9647, "step": 9117000 }, { "epoch": 1.2, "learning_rate": 4.324806231756415e-05, "loss": 3.9666, "step": 9117500 }, { "epoch": 1.2, "learning_rate": 4.3247356728882085e-05, "loss": 3.9581, "step": 9118000 }, { "epoch": 1.2, "learning_rate": 4.3246651109090816e-05, "loss": 3.9317, "step": 9118500 }, { "epoch": 1.2, "learning_rate": 4.324594545819155e-05, "loss": 3.9524, "step": 9119000 }, { "epoch": 1.2, "learning_rate": 4.324523977618547e-05, "loss": 3.9774, "step": 9119500 }, { "epoch": 1.2, "learning_rate": 4.32445340630738e-05, "loss": 3.9702, "step": 9120000 }, { "epoch": 1.2, "learning_rate": 4.324382831885773e-05, "loss": 3.9657, "step": 9120500 }, { "epoch": 1.2, "learning_rate": 4.3243122543538474e-05, "loss": 3.9607, "step": 9121000 }, { "epoch": 1.2, "learning_rate": 4.324241673711722e-05, "loss": 3.9714, "step": 9121500 }, { "epoch": 1.2, "learning_rate": 4.324171089959519e-05, "loss": 3.986, "step": 9122000 }, { "epoch": 1.2, "learning_rate": 4.324100503097357e-05, "loss": 3.9645, "step": 9122500 }, { "epoch": 1.2, "learning_rate": 4.324029913125358e-05, "loss": 3.9524, "step": 9123000 }, { "epoch": 1.2, "learning_rate": 4.323959320043641e-05, "loss": 3.9771, "step": 9123500 }, { "epoch": 1.2, "learning_rate": 4.323888723852326e-05, "loss": 3.9756, "step": 9124000 }, { "epoch": 1.2, "learning_rate": 4.323818124551535e-05, "loss": 3.9553, "step": 9124500 }, { "epoch": 1.2, "learning_rate": 4.323747522141388e-05, "loss": 3.953, "step": 9125000 }, { "epoch": 1.2, "learning_rate": 4.3236769166220046e-05, "loss": 3.9872, "step": 9125500 }, { "epoch": 1.2, "learning_rate": 4.323606307993505e-05, "loss": 3.9739, "step": 9126000 }, { "epoch": 1.2, "learning_rate": 4.3235356962560105e-05, "loss": 4.0027, "step": 9126500 }, { "epoch": 1.2, "learning_rate": 4.3234650814096414e-05, "loss": 3.9665, "step": 9127000 }, { "epoch": 1.2, "learning_rate": 4.323394463454518e-05, "loss": 3.9494, "step": 9127500 }, { "epoch": 1.2, "learning_rate": 4.3233238423907595e-05, "loss": 3.9468, "step": 9128000 }, { "epoch": 1.2, "learning_rate": 4.323253218218487e-05, "loss": 3.9638, "step": 9128500 }, { "epoch": 1.2, "learning_rate": 4.323182590937822e-05, "loss": 3.9483, "step": 9129000 }, { "epoch": 1.2, "learning_rate": 4.3231119605488836e-05, "loss": 3.9747, "step": 9129500 }, { "epoch": 1.2, "learning_rate": 4.3230413270517936e-05, "loss": 3.9658, "step": 9130000 }, { "epoch": 1.2, "learning_rate": 4.32297069044667e-05, "loss": 3.9515, "step": 9130500 }, { "epoch": 1.2, "learning_rate": 4.3229000507336356e-05, "loss": 3.9743, "step": 9131000 }, { "epoch": 1.2, "learning_rate": 4.3228294079128094e-05, "loss": 3.9677, "step": 9131500 }, { "epoch": 1.2, "learning_rate": 4.322758761984312e-05, "loss": 3.9785, "step": 9132000 }, { "epoch": 1.2, "learning_rate": 4.322688112948265e-05, "loss": 3.9589, "step": 9132500 }, { "epoch": 1.2, "learning_rate": 4.3226174608047885e-05, "loss": 3.9588, "step": 9133000 }, { "epoch": 1.2, "learning_rate": 4.322546805554002e-05, "loss": 3.9616, "step": 9133500 }, { "epoch": 1.2, "learning_rate": 4.3224761471960266e-05, "loss": 3.9514, "step": 9134000 }, { "epoch": 1.2, "learning_rate": 4.322405485730983e-05, "loss": 3.967, "step": 9134500 }, { "epoch": 1.2, "learning_rate": 4.32233482115899e-05, "loss": 3.9471, "step": 9135000 }, { "epoch": 1.2, "learning_rate": 4.322264153480171e-05, "loss": 3.9635, "step": 9135500 }, { "epoch": 1.2, "learning_rate": 4.322193482694644e-05, "loss": 3.9826, "step": 9136000 }, { "epoch": 1.2, "learning_rate": 4.32212280880253e-05, "loss": 3.9392, "step": 9136500 }, { "epoch": 1.2, "learning_rate": 4.32205213180395e-05, "loss": 3.9564, "step": 9137000 }, { "epoch": 1.2, "learning_rate": 4.321981451699025e-05, "loss": 3.9828, "step": 9137500 }, { "epoch": 1.2, "learning_rate": 4.321910768487875e-05, "loss": 3.9467, "step": 9138000 }, { "epoch": 1.2, "learning_rate": 4.321840082170619e-05, "loss": 3.9696, "step": 9138500 }, { "epoch": 1.2, "learning_rate": 4.32176939274738e-05, "loss": 3.968, "step": 9139000 }, { "epoch": 1.2, "learning_rate": 4.3216987002182764e-05, "loss": 3.965, "step": 9139500 }, { "epoch": 1.2, "learning_rate": 4.321628004583431e-05, "loss": 3.9452, "step": 9140000 }, { "epoch": 1.2, "learning_rate": 4.321557305842961e-05, "loss": 3.9772, "step": 9140500 }, { "epoch": 1.2, "learning_rate": 4.321486603996991e-05, "loss": 3.9495, "step": 9141000 }, { "epoch": 1.2, "learning_rate": 4.321415899045639e-05, "loss": 3.9705, "step": 9141500 }, { "epoch": 1.2, "learning_rate": 4.3213451909890254e-05, "loss": 3.9589, "step": 9142000 }, { "epoch": 1.2, "learning_rate": 4.321274479827272e-05, "loss": 3.9691, "step": 9142500 }, { "epoch": 1.2, "learning_rate": 4.321203765560498e-05, "loss": 3.9711, "step": 9143000 }, { "epoch": 1.2, "learning_rate": 4.3211330481888244e-05, "loss": 3.9548, "step": 9143500 }, { "epoch": 1.2, "learning_rate": 4.321062327712373e-05, "loss": 3.9694, "step": 9144000 }, { "epoch": 1.2, "learning_rate": 4.320991604131263e-05, "loss": 3.9556, "step": 9144500 }, { "epoch": 1.2, "learning_rate": 4.320920877445616e-05, "loss": 3.956, "step": 9145000 }, { "epoch": 1.2, "learning_rate": 4.320850147655551e-05, "loss": 3.9569, "step": 9145500 }, { "epoch": 1.2, "learning_rate": 4.320779414761189e-05, "loss": 3.9767, "step": 9146000 }, { "epoch": 1.2, "learning_rate": 4.3207086787626525e-05, "loss": 3.9809, "step": 9146500 }, { "epoch": 1.2, "learning_rate": 4.32063793966006e-05, "loss": 3.9745, "step": 9147000 }, { "epoch": 1.2, "learning_rate": 4.320567197453533e-05, "loss": 3.9471, "step": 9147500 }, { "epoch": 1.2, "learning_rate": 4.320496452143192e-05, "loss": 3.9566, "step": 9148000 }, { "epoch": 1.2, "learning_rate": 4.320425703729158e-05, "loss": 3.9673, "step": 9148500 }, { "epoch": 1.2, "learning_rate": 4.320354952211551e-05, "loss": 3.9408, "step": 9149000 }, { "epoch": 1.2, "learning_rate": 4.3202841975904915e-05, "loss": 3.9755, "step": 9149500 }, { "epoch": 1.2, "learning_rate": 4.3202134398661e-05, "loss": 3.9671, "step": 9150000 }, { "epoch": 1.2, "learning_rate": 4.320142679038498e-05, "loss": 3.9625, "step": 9150500 }, { "epoch": 1.2, "learning_rate": 4.320071915107806e-05, "loss": 3.9567, "step": 9151000 }, { "epoch": 1.2, "learning_rate": 4.320001148074144e-05, "loss": 3.9397, "step": 9151500 }, { "epoch": 1.2, "learning_rate": 4.3199303779376326e-05, "loss": 3.9607, "step": 9152000 }, { "epoch": 1.2, "learning_rate": 4.319859604698393e-05, "loss": 3.9597, "step": 9152500 }, { "epoch": 1.2, "learning_rate": 4.319788828356546e-05, "loss": 3.9442, "step": 9153000 }, { "epoch": 1.2, "learning_rate": 4.3197180489122115e-05, "loss": 3.9495, "step": 9153500 }, { "epoch": 1.2, "learning_rate": 4.319647266365511e-05, "loss": 3.9595, "step": 9154000 }, { "epoch": 1.2, "learning_rate": 4.319576480716565e-05, "loss": 3.9665, "step": 9154500 }, { "epoch": 1.2, "learning_rate": 4.319505691965493e-05, "loss": 3.9585, "step": 9155000 }, { "epoch": 1.2, "learning_rate": 4.319434900112418e-05, "loss": 3.9578, "step": 9155500 }, { "epoch": 1.2, "learning_rate": 4.319364105157459e-05, "loss": 3.9694, "step": 9156000 }, { "epoch": 1.2, "learning_rate": 4.3192933071007356e-05, "loss": 3.9663, "step": 9156500 }, { "epoch": 1.2, "learning_rate": 4.319222505942371e-05, "loss": 3.9694, "step": 9157000 }, { "epoch": 1.2, "learning_rate": 4.3191517016824854e-05, "loss": 3.9883, "step": 9157500 }, { "epoch": 1.2, "learning_rate": 4.319080894321198e-05, "loss": 3.9676, "step": 9158000 }, { "epoch": 1.2, "learning_rate": 4.3190100838586306e-05, "loss": 3.967, "step": 9158500 }, { "epoch": 1.2, "learning_rate": 4.318939270294904e-05, "loss": 3.9649, "step": 9159000 }, { "epoch": 1.2, "learning_rate": 4.3188684536301396e-05, "loss": 3.9712, "step": 9159500 }, { "epoch": 1.2, "learning_rate": 4.318797633864456e-05, "loss": 3.9472, "step": 9160000 }, { "epoch": 1.2, "learning_rate": 4.318726810997976e-05, "loss": 3.9681, "step": 9160500 }, { "epoch": 1.2, "learning_rate": 4.318655985030819e-05, "loss": 3.9629, "step": 9161000 }, { "epoch": 1.2, "learning_rate": 4.3185851559631065e-05, "loss": 3.9489, "step": 9161500 }, { "epoch": 1.2, "learning_rate": 4.3185143237949585e-05, "loss": 3.9666, "step": 9162000 }, { "epoch": 1.2, "learning_rate": 4.318443488526497e-05, "loss": 3.977, "step": 9162500 }, { "epoch": 1.2, "learning_rate": 4.3183726501578416e-05, "loss": 3.9497, "step": 9163000 }, { "epoch": 1.2, "learning_rate": 4.318301808689114e-05, "loss": 3.9712, "step": 9163500 }, { "epoch": 1.2, "learning_rate": 4.318230964120434e-05, "loss": 3.9621, "step": 9164000 }, { "epoch": 1.2, "learning_rate": 4.318160116451924e-05, "loss": 3.9493, "step": 9164500 }, { "epoch": 1.2, "learning_rate": 4.318089265683702e-05, "loss": 3.9416, "step": 9165000 }, { "epoch": 1.2, "learning_rate": 4.3180184118158914e-05, "loss": 3.9697, "step": 9165500 }, { "epoch": 1.2, "learning_rate": 4.317947554848612e-05, "loss": 3.9563, "step": 9166000 }, { "epoch": 1.2, "learning_rate": 4.317876694781985e-05, "loss": 3.9343, "step": 9166500 }, { "epoch": 1.2, "learning_rate": 4.31780583161613e-05, "loss": 3.9528, "step": 9167000 }, { "epoch": 1.2, "learning_rate": 4.3177349653511694e-05, "loss": 3.9509, "step": 9167500 }, { "epoch": 1.2, "learning_rate": 4.317664095987223e-05, "loss": 3.968, "step": 9168000 }, { "epoch": 1.2, "learning_rate": 4.317593223524412e-05, "loss": 3.9775, "step": 9168500 }, { "epoch": 1.21, "learning_rate": 4.317522347962857e-05, "loss": 3.9453, "step": 9169000 }, { "epoch": 1.21, "learning_rate": 4.317451469302679e-05, "loss": 3.9554, "step": 9169500 }, { "epoch": 1.21, "learning_rate": 4.317380587543999e-05, "loss": 3.9633, "step": 9170000 }, { "epoch": 1.21, "learning_rate": 4.317309702686938e-05, "loss": 3.9682, "step": 9170500 }, { "epoch": 1.21, "learning_rate": 4.3172388147316154e-05, "loss": 3.9566, "step": 9171000 }, { "epoch": 1.21, "learning_rate": 4.317167923678154e-05, "loss": 3.957, "step": 9171500 }, { "epoch": 1.21, "learning_rate": 4.3170970295266736e-05, "loss": 3.958, "step": 9172000 }, { "epoch": 1.21, "learning_rate": 4.317026132277295e-05, "loss": 3.9699, "step": 9172500 }, { "epoch": 1.21, "learning_rate": 4.3169552319301405e-05, "loss": 3.9777, "step": 9173000 }, { "epoch": 1.21, "learning_rate": 4.316884328485329e-05, "loss": 3.9647, "step": 9173500 }, { "epoch": 1.21, "learning_rate": 4.3168134219429825e-05, "loss": 3.9737, "step": 9174000 }, { "epoch": 1.21, "learning_rate": 4.316742512303221e-05, "loss": 3.9626, "step": 9174500 }, { "epoch": 1.21, "learning_rate": 4.3166715995661665e-05, "loss": 3.9438, "step": 9175000 }, { "epoch": 1.21, "learning_rate": 4.3166006837319386e-05, "loss": 3.9544, "step": 9175500 }, { "epoch": 1.21, "learning_rate": 4.3165297648006595e-05, "loss": 3.9618, "step": 9176000 }, { "epoch": 1.21, "learning_rate": 4.31645884277245e-05, "loss": 3.9504, "step": 9176500 }, { "epoch": 1.21, "learning_rate": 4.316387917647431e-05, "loss": 3.981, "step": 9177000 }, { "epoch": 1.21, "learning_rate": 4.316316989425722e-05, "loss": 3.9561, "step": 9177500 }, { "epoch": 1.21, "learning_rate": 4.316246058107445e-05, "loss": 3.9532, "step": 9178000 }, { "epoch": 1.21, "learning_rate": 4.3161751236927214e-05, "loss": 3.9271, "step": 9178500 }, { "epoch": 1.21, "learning_rate": 4.316104186181671e-05, "loss": 3.9562, "step": 9179000 }, { "epoch": 1.21, "learning_rate": 4.3160332455744154e-05, "loss": 3.9557, "step": 9179500 }, { "epoch": 1.21, "learning_rate": 4.315962301871076e-05, "loss": 3.9552, "step": 9180000 }, { "epoch": 1.21, "learning_rate": 4.3158913550717736e-05, "loss": 3.9865, "step": 9180500 }, { "epoch": 1.21, "learning_rate": 4.3158204051766284e-05, "loss": 3.9356, "step": 9181000 }, { "epoch": 1.21, "learning_rate": 4.3157494521857614e-05, "loss": 3.9464, "step": 9181500 }, { "epoch": 1.21, "learning_rate": 4.315678496099293e-05, "loss": 3.9577, "step": 9182000 }, { "epoch": 1.21, "learning_rate": 4.315607536917347e-05, "loss": 3.9614, "step": 9182500 }, { "epoch": 1.21, "learning_rate": 4.315536574640042e-05, "loss": 3.9623, "step": 9183000 }, { "epoch": 1.21, "learning_rate": 4.315465609267499e-05, "loss": 3.9659, "step": 9183500 }, { "epoch": 1.21, "learning_rate": 4.315394640799839e-05, "loss": 3.9692, "step": 9184000 }, { "epoch": 1.21, "learning_rate": 4.315323669237184e-05, "loss": 3.9501, "step": 9184500 }, { "epoch": 1.21, "learning_rate": 4.3152526945796543e-05, "loss": 3.958, "step": 9185000 }, { "epoch": 1.21, "learning_rate": 4.315181716827371e-05, "loss": 3.9824, "step": 9185500 }, { "epoch": 1.21, "learning_rate": 4.315110735980455e-05, "loss": 3.9682, "step": 9186000 }, { "epoch": 1.21, "learning_rate": 4.315039752039028e-05, "loss": 3.9552, "step": 9186500 }, { "epoch": 1.21, "learning_rate": 4.314968765003211e-05, "loss": 3.9533, "step": 9187000 }, { "epoch": 1.21, "learning_rate": 4.314897774873124e-05, "loss": 3.9526, "step": 9187500 }, { "epoch": 1.21, "learning_rate": 4.3148267816488876e-05, "loss": 3.9678, "step": 9188000 }, { "epoch": 1.21, "learning_rate": 4.3147557853306245e-05, "loss": 3.9776, "step": 9188500 }, { "epoch": 1.21, "learning_rate": 4.314684785918454e-05, "loss": 3.956, "step": 9189000 }, { "epoch": 1.21, "learning_rate": 4.314613783412499e-05, "loss": 3.9759, "step": 9189500 }, { "epoch": 1.21, "learning_rate": 4.314542777812879e-05, "loss": 3.9521, "step": 9190000 }, { "epoch": 1.21, "learning_rate": 4.314471769119717e-05, "loss": 3.9561, "step": 9190500 }, { "epoch": 1.21, "learning_rate": 4.314400757333132e-05, "loss": 3.9715, "step": 9191000 }, { "epoch": 1.21, "learning_rate": 4.314329742453246e-05, "loss": 3.9833, "step": 9191500 }, { "epoch": 1.21, "learning_rate": 4.31425872448018e-05, "loss": 3.963, "step": 9192000 }, { "epoch": 1.21, "learning_rate": 4.314187703414054e-05, "loss": 3.959, "step": 9192500 }, { "epoch": 1.21, "learning_rate": 4.314116679254991e-05, "loss": 3.9727, "step": 9193000 }, { "epoch": 1.21, "learning_rate": 4.314045652003111e-05, "loss": 3.9552, "step": 9193500 }, { "epoch": 1.21, "learning_rate": 4.3139746216585365e-05, "loss": 3.9705, "step": 9194000 }, { "epoch": 1.21, "learning_rate": 4.3139035882213855e-05, "loss": 3.954, "step": 9194500 }, { "epoch": 1.21, "learning_rate": 4.313832551691782e-05, "loss": 3.9619, "step": 9195000 }, { "epoch": 1.21, "learning_rate": 4.313761512069846e-05, "loss": 3.9488, "step": 9195500 }, { "epoch": 1.21, "learning_rate": 4.313690469355698e-05, "loss": 3.9547, "step": 9196000 }, { "epoch": 1.21, "learning_rate": 4.3136194235494606e-05, "loss": 3.9589, "step": 9196500 }, { "epoch": 1.21, "learning_rate": 4.313548374651254e-05, "loss": 3.9656, "step": 9197000 }, { "epoch": 1.21, "learning_rate": 4.313477322661199e-05, "loss": 3.9454, "step": 9197500 }, { "epoch": 1.21, "learning_rate": 4.313406267579417e-05, "loss": 3.9638, "step": 9198000 }, { "epoch": 1.21, "learning_rate": 4.3133352094060296e-05, "loss": 3.9742, "step": 9198500 }, { "epoch": 1.21, "learning_rate": 4.313264148141158e-05, "loss": 3.982, "step": 9199000 }, { "epoch": 1.21, "learning_rate": 4.313193083784923e-05, "loss": 3.9658, "step": 9199500 }, { "epoch": 1.21, "learning_rate": 4.3131220163374454e-05, "loss": 3.9772, "step": 9200000 }, { "epoch": 1.21, "learning_rate": 4.3130509457988466e-05, "loss": 3.9482, "step": 9200500 }, { "epoch": 1.21, "learning_rate": 4.3129798721692484e-05, "loss": 3.9409, "step": 9201000 }, { "epoch": 1.21, "learning_rate": 4.3129087954487714e-05, "loss": 3.9524, "step": 9201500 }, { "epoch": 1.21, "learning_rate": 4.312837715637536e-05, "loss": 3.9367, "step": 9202000 }, { "epoch": 1.21, "learning_rate": 4.3127666327356656e-05, "loss": 3.9579, "step": 9202500 }, { "epoch": 1.21, "learning_rate": 4.312695546743279e-05, "loss": 3.9663, "step": 9203000 }, { "epoch": 1.21, "learning_rate": 4.312624457660499e-05, "loss": 3.9644, "step": 9203500 }, { "epoch": 1.21, "learning_rate": 4.312553365487446e-05, "loss": 3.9318, "step": 9204000 }, { "epoch": 1.21, "learning_rate": 4.3124822702242414e-05, "loss": 3.9421, "step": 9204500 }, { "epoch": 1.21, "learning_rate": 4.3124111718710064e-05, "loss": 3.9674, "step": 9205000 }, { "epoch": 1.21, "learning_rate": 4.3123400704278617e-05, "loss": 3.9721, "step": 9205500 }, { "epoch": 1.21, "learning_rate": 4.31226896589493e-05, "loss": 3.9533, "step": 9206000 }, { "epoch": 1.21, "learning_rate": 4.312197858272331e-05, "loss": 3.9662, "step": 9206500 }, { "epoch": 1.21, "learning_rate": 4.3121267475601864e-05, "loss": 3.9494, "step": 9207000 }, { "epoch": 1.21, "learning_rate": 4.312055633758618e-05, "loss": 3.9626, "step": 9207500 }, { "epoch": 1.21, "learning_rate": 4.311984516867747e-05, "loss": 3.9455, "step": 9208000 }, { "epoch": 1.21, "learning_rate": 4.3119133968876925e-05, "loss": 3.9401, "step": 9208500 }, { "epoch": 1.21, "learning_rate": 4.311842273818578e-05, "loss": 3.9525, "step": 9209000 }, { "epoch": 1.21, "learning_rate": 4.311771147660525e-05, "loss": 3.9475, "step": 9209500 }, { "epoch": 1.21, "learning_rate": 4.3117000184136534e-05, "loss": 3.9397, "step": 9210000 }, { "epoch": 1.21, "learning_rate": 4.3116288860780854e-05, "loss": 3.9544, "step": 9210500 }, { "epoch": 1.21, "learning_rate": 4.311557750653942e-05, "loss": 3.9528, "step": 9211000 }, { "epoch": 1.21, "learning_rate": 4.311486612141344e-05, "loss": 3.9679, "step": 9211500 }, { "epoch": 1.21, "learning_rate": 4.311415470540413e-05, "loss": 3.9494, "step": 9212000 }, { "epoch": 1.21, "learning_rate": 4.311344325851271e-05, "loss": 3.967, "step": 9212500 }, { "epoch": 1.21, "learning_rate": 4.311273178074038e-05, "loss": 3.9579, "step": 9213000 }, { "epoch": 1.21, "learning_rate": 4.311202027208836e-05, "loss": 3.9575, "step": 9213500 }, { "epoch": 1.21, "learning_rate": 4.311130873255787e-05, "loss": 3.9536, "step": 9214000 }, { "epoch": 1.21, "learning_rate": 4.3110597162150104e-05, "loss": 3.9563, "step": 9214500 }, { "epoch": 1.21, "learning_rate": 4.310988556086629e-05, "loss": 3.9554, "step": 9215000 }, { "epoch": 1.21, "learning_rate": 4.310917392870765e-05, "loss": 3.957, "step": 9215500 }, { "epoch": 1.21, "learning_rate": 4.310846226567537e-05, "loss": 3.9948, "step": 9216000 }, { "epoch": 1.21, "learning_rate": 4.310775057177069e-05, "loss": 3.9624, "step": 9216500 }, { "epoch": 1.21, "learning_rate": 4.31070388469948e-05, "loss": 3.965, "step": 9217000 }, { "epoch": 1.21, "learning_rate": 4.310632709134893e-05, "loss": 3.9426, "step": 9217500 }, { "epoch": 1.21, "learning_rate": 4.3105615304834293e-05, "loss": 3.9602, "step": 9218000 }, { "epoch": 1.21, "learning_rate": 4.310490348745209e-05, "loss": 3.9432, "step": 9218500 }, { "epoch": 1.21, "learning_rate": 4.310419163920355e-05, "loss": 3.9552, "step": 9219000 }, { "epoch": 1.21, "learning_rate": 4.310347976008987e-05, "loss": 3.9453, "step": 9219500 }, { "epoch": 1.21, "learning_rate": 4.310276785011228e-05, "loss": 3.9624, "step": 9220000 }, { "epoch": 1.21, "learning_rate": 4.3102055909271985e-05, "loss": 3.9472, "step": 9220500 }, { "epoch": 1.21, "learning_rate": 4.31013439375702e-05, "loss": 3.9618, "step": 9221000 }, { "epoch": 1.21, "learning_rate": 4.3100631935008144e-05, "loss": 3.9477, "step": 9221500 }, { "epoch": 1.21, "learning_rate": 4.309991990158702e-05, "loss": 3.9693, "step": 9222000 }, { "epoch": 1.21, "learning_rate": 4.3099207837308055e-05, "loss": 3.9577, "step": 9222500 }, { "epoch": 1.21, "learning_rate": 4.3098495742172444e-05, "loss": 3.9629, "step": 9223000 }, { "epoch": 1.21, "learning_rate": 4.309778361618143e-05, "loss": 3.962, "step": 9223500 }, { "epoch": 1.21, "learning_rate": 4.3097071459336194e-05, "loss": 3.962, "step": 9224000 }, { "epoch": 1.21, "learning_rate": 4.309635927163797e-05, "loss": 3.9593, "step": 9224500 }, { "epoch": 1.21, "learning_rate": 4.3095647053087976e-05, "loss": 3.9521, "step": 9225000 }, { "epoch": 1.21, "learning_rate": 4.3094934803687404e-05, "loss": 3.974, "step": 9225500 }, { "epoch": 1.21, "learning_rate": 4.309422252343749e-05, "loss": 3.9599, "step": 9226000 }, { "epoch": 1.21, "learning_rate": 4.309351021233945e-05, "loss": 3.9499, "step": 9226500 }, { "epoch": 1.21, "learning_rate": 4.3092797870394475e-05, "loss": 3.9684, "step": 9227000 }, { "epoch": 1.21, "learning_rate": 4.309208549760381e-05, "loss": 3.9488, "step": 9227500 }, { "epoch": 1.21, "learning_rate": 4.3091373093968634e-05, "loss": 3.9772, "step": 9228000 }, { "epoch": 1.21, "learning_rate": 4.30906606594902e-05, "loss": 3.9455, "step": 9228500 }, { "epoch": 1.21, "learning_rate": 4.308994819416969e-05, "loss": 3.9537, "step": 9229000 }, { "epoch": 1.21, "learning_rate": 4.308923569800834e-05, "loss": 3.974, "step": 9229500 }, { "epoch": 1.21, "learning_rate": 4.3088523171007355e-05, "loss": 3.9539, "step": 9230000 }, { "epoch": 1.21, "learning_rate": 4.308781061316794e-05, "loss": 3.9702, "step": 9230500 }, { "epoch": 1.21, "learning_rate": 4.308709802449134e-05, "loss": 3.9562, "step": 9231000 }, { "epoch": 1.21, "learning_rate": 4.3086385404978745e-05, "loss": 3.9699, "step": 9231500 }, { "epoch": 1.21, "learning_rate": 4.308567275463138e-05, "loss": 3.9523, "step": 9232000 }, { "epoch": 1.21, "learning_rate": 4.308496007345045e-05, "loss": 3.97, "step": 9232500 }, { "epoch": 1.21, "learning_rate": 4.308424736143718e-05, "loss": 3.9329, "step": 9233000 }, { "epoch": 1.21, "learning_rate": 4.308353461859278e-05, "loss": 3.9388, "step": 9233500 }, { "epoch": 1.21, "learning_rate": 4.3082821844918466e-05, "loss": 3.9497, "step": 9234000 }, { "epoch": 1.21, "learning_rate": 4.3082109040415455e-05, "loss": 3.955, "step": 9234500 }, { "epoch": 1.21, "learning_rate": 4.308139620508496e-05, "loss": 3.9521, "step": 9235000 }, { "epoch": 1.21, "learning_rate": 4.308068333892819e-05, "loss": 3.9591, "step": 9235500 }, { "epoch": 1.21, "learning_rate": 4.307997044194638e-05, "loss": 3.9653, "step": 9236000 }, { "epoch": 1.21, "learning_rate": 4.307925751414073e-05, "loss": 3.9699, "step": 9236500 }, { "epoch": 1.21, "learning_rate": 4.307854455551245e-05, "loss": 3.9536, "step": 9237000 }, { "epoch": 1.21, "learning_rate": 4.307783156606277e-05, "loss": 3.9642, "step": 9237500 }, { "epoch": 1.21, "learning_rate": 4.3077118545792893e-05, "loss": 3.9521, "step": 9238000 }, { "epoch": 1.21, "learning_rate": 4.307640549470405e-05, "loss": 3.9657, "step": 9238500 }, { "epoch": 1.21, "learning_rate": 4.3075692412797444e-05, "loss": 3.9536, "step": 9239000 }, { "epoch": 1.21, "learning_rate": 4.3074979300074296e-05, "loss": 3.9734, "step": 9239500 }, { "epoch": 1.21, "learning_rate": 4.3074266156535824e-05, "loss": 3.9389, "step": 9240000 }, { "epoch": 1.21, "learning_rate": 4.3073552982183236e-05, "loss": 3.9594, "step": 9240500 }, { "epoch": 1.21, "learning_rate": 4.307283977701775e-05, "loss": 3.9491, "step": 9241000 }, { "epoch": 1.21, "learning_rate": 4.307212654104058e-05, "loss": 3.9496, "step": 9241500 }, { "epoch": 1.21, "learning_rate": 4.3071413274252956e-05, "loss": 3.9475, "step": 9242000 }, { "epoch": 1.21, "learning_rate": 4.307069997665608e-05, "loss": 3.9561, "step": 9242500 }, { "epoch": 1.21, "learning_rate": 4.306998664825117e-05, "loss": 3.9547, "step": 9243000 }, { "epoch": 1.21, "learning_rate": 4.3069273289039447e-05, "loss": 3.9609, "step": 9243500 }, { "epoch": 1.21, "learning_rate": 4.306855989902212e-05, "loss": 3.9543, "step": 9244000 }, { "epoch": 1.21, "learning_rate": 4.306784647820041e-05, "loss": 3.9478, "step": 9244500 }, { "epoch": 1.22, "learning_rate": 4.306713302657553e-05, "loss": 3.957, "step": 9245000 }, { "epoch": 1.22, "learning_rate": 4.306641954414871e-05, "loss": 3.9291, "step": 9245500 }, { "epoch": 1.22, "learning_rate": 4.306570603092114e-05, "loss": 3.948, "step": 9246000 }, { "epoch": 1.22, "learning_rate": 4.306499248689406e-05, "loss": 3.9704, "step": 9246500 }, { "epoch": 1.22, "learning_rate": 4.306427891206868e-05, "loss": 3.9487, "step": 9247000 }, { "epoch": 1.22, "learning_rate": 4.306356530644621e-05, "loss": 3.9422, "step": 9247500 }, { "epoch": 1.22, "learning_rate": 4.306285167002788e-05, "loss": 3.947, "step": 9248000 }, { "epoch": 1.22, "learning_rate": 4.3062138002814887e-05, "loss": 3.9373, "step": 9248500 }, { "epoch": 1.22, "learning_rate": 4.3061424304808465e-05, "loss": 3.9737, "step": 9249000 }, { "epoch": 1.22, "learning_rate": 4.306071057600983e-05, "loss": 3.9558, "step": 9249500 }, { "epoch": 1.22, "learning_rate": 4.305999681642018e-05, "loss": 3.9559, "step": 9250000 }, { "epoch": 1.22, "learning_rate": 4.305928302604075e-05, "loss": 3.9668, "step": 9250500 }, { "epoch": 1.22, "learning_rate": 4.3058569204872763e-05, "loss": 3.9586, "step": 9251000 }, { "epoch": 1.22, "learning_rate": 4.305785535291741e-05, "loss": 3.9438, "step": 9251500 }, { "epoch": 1.22, "learning_rate": 4.305714147017593e-05, "loss": 3.9475, "step": 9252000 }, { "epoch": 1.22, "learning_rate": 4.305642755664954e-05, "loss": 3.963, "step": 9252500 }, { "epoch": 1.22, "learning_rate": 4.305571361233944e-05, "loss": 3.9414, "step": 9253000 }, { "epoch": 1.22, "learning_rate": 4.3054999637246865e-05, "loss": 3.9572, "step": 9253500 }, { "epoch": 1.22, "learning_rate": 4.305428563137302e-05, "loss": 3.9474, "step": 9254000 }, { "epoch": 1.22, "learning_rate": 4.305357159471912e-05, "loss": 3.9532, "step": 9254500 }, { "epoch": 1.22, "learning_rate": 4.30528575272864e-05, "loss": 3.9572, "step": 9255000 }, { "epoch": 1.22, "learning_rate": 4.3052143429076064e-05, "loss": 3.9464, "step": 9255500 }, { "epoch": 1.22, "learning_rate": 4.305142930008933e-05, "loss": 3.9584, "step": 9256000 }, { "epoch": 1.22, "learning_rate": 4.3050715140327425e-05, "loss": 3.9564, "step": 9256500 }, { "epoch": 1.22, "learning_rate": 4.3050000949791545e-05, "loss": 3.9387, "step": 9257000 }, { "epoch": 1.22, "learning_rate": 4.304928672848294e-05, "loss": 3.9679, "step": 9257500 }, { "epoch": 1.22, "learning_rate": 4.3048572476402795e-05, "loss": 3.952, "step": 9258000 }, { "epoch": 1.22, "learning_rate": 4.304785819355235e-05, "loss": 3.9776, "step": 9258500 }, { "epoch": 1.22, "learning_rate": 4.3047143879932806e-05, "loss": 3.9527, "step": 9259000 }, { "epoch": 1.22, "learning_rate": 4.30464295355454e-05, "loss": 3.9634, "step": 9259500 }, { "epoch": 1.22, "learning_rate": 4.3045715160391334e-05, "loss": 3.9487, "step": 9260000 }, { "epoch": 1.22, "learning_rate": 4.304500075447183e-05, "loss": 3.9707, "step": 9260500 }, { "epoch": 1.22, "learning_rate": 4.304428631778811e-05, "loss": 3.9637, "step": 9261000 }, { "epoch": 1.22, "learning_rate": 4.304357185034139e-05, "loss": 3.9708, "step": 9261500 }, { "epoch": 1.22, "learning_rate": 4.3042857352132884e-05, "loss": 3.9361, "step": 9262000 }, { "epoch": 1.22, "learning_rate": 4.3042142823163824e-05, "loss": 3.9577, "step": 9262500 }, { "epoch": 1.22, "learning_rate": 4.3041428263435406e-05, "loss": 3.9546, "step": 9263000 }, { "epoch": 1.22, "learning_rate": 4.3040713672948864e-05, "loss": 3.9392, "step": 9263500 }, { "epoch": 1.22, "learning_rate": 4.303999905170542e-05, "loss": 3.9548, "step": 9264000 }, { "epoch": 1.22, "learning_rate": 4.303928439970627e-05, "loss": 3.9514, "step": 9264500 }, { "epoch": 1.22, "learning_rate": 4.3038569716952654e-05, "loss": 3.9483, "step": 9265000 }, { "epoch": 1.22, "learning_rate": 4.3037855003445785e-05, "loss": 3.9726, "step": 9265500 }, { "epoch": 1.22, "learning_rate": 4.303714025918688e-05, "loss": 3.9342, "step": 9266000 }, { "epoch": 1.22, "learning_rate": 4.3036425484177153e-05, "loss": 3.9503, "step": 9266500 }, { "epoch": 1.22, "learning_rate": 4.303571067841783e-05, "loss": 3.9438, "step": 9267000 }, { "epoch": 1.22, "learning_rate": 4.303499584191013e-05, "loss": 3.9605, "step": 9267500 }, { "epoch": 1.22, "learning_rate": 4.303428097465526e-05, "loss": 3.9521, "step": 9268000 }, { "epoch": 1.22, "learning_rate": 4.303356607665446e-05, "loss": 3.97, "step": 9268500 }, { "epoch": 1.22, "learning_rate": 4.3032851147908926e-05, "loss": 3.9461, "step": 9269000 }, { "epoch": 1.22, "learning_rate": 4.303213618841989e-05, "loss": 3.939, "step": 9269500 }, { "epoch": 1.22, "learning_rate": 4.3031421198188566e-05, "loss": 3.9616, "step": 9270000 }, { "epoch": 1.22, "learning_rate": 4.3030706177216177e-05, "loss": 3.9531, "step": 9270500 }, { "epoch": 1.22, "learning_rate": 4.302999112550394e-05, "loss": 3.9514, "step": 9271000 }, { "epoch": 1.22, "learning_rate": 4.302927604305307e-05, "loss": 3.9708, "step": 9271500 }, { "epoch": 1.22, "learning_rate": 4.3028560929864794e-05, "loss": 3.9706, "step": 9272000 }, { "epoch": 1.22, "learning_rate": 4.302784578594032e-05, "loss": 3.9519, "step": 9272500 }, { "epoch": 1.22, "learning_rate": 4.3027130611280885e-05, "loss": 3.9344, "step": 9273000 }, { "epoch": 1.22, "learning_rate": 4.3026415405887685e-05, "loss": 3.9576, "step": 9273500 }, { "epoch": 1.22, "learning_rate": 4.302570016976196e-05, "loss": 3.9451, "step": 9274000 }, { "epoch": 1.22, "learning_rate": 4.302498490290492e-05, "loss": 3.9593, "step": 9274500 }, { "epoch": 1.22, "learning_rate": 4.3024269605317794e-05, "loss": 3.9468, "step": 9275000 }, { "epoch": 1.22, "learning_rate": 4.3023554277001776e-05, "loss": 3.9286, "step": 9275500 }, { "epoch": 1.22, "learning_rate": 4.302283891795812e-05, "loss": 3.958, "step": 9276000 }, { "epoch": 1.22, "learning_rate": 4.302212352818802e-05, "loss": 3.9546, "step": 9276500 }, { "epoch": 1.22, "learning_rate": 4.3021408107692694e-05, "loss": 3.972, "step": 9277000 }, { "epoch": 1.22, "learning_rate": 4.302069265647338e-05, "loss": 3.9634, "step": 9277500 }, { "epoch": 1.22, "learning_rate": 4.3019977174531297e-05, "loss": 3.9334, "step": 9278000 }, { "epoch": 1.22, "learning_rate": 4.301926166186765e-05, "loss": 3.9557, "step": 9278500 }, { "epoch": 1.22, "learning_rate": 4.301854611848367e-05, "loss": 3.966, "step": 9279000 }, { "epoch": 1.22, "learning_rate": 4.301783054438057e-05, "loss": 3.9459, "step": 9279500 }, { "epoch": 1.22, "learning_rate": 4.3017114939559564e-05, "loss": 3.948, "step": 9280000 }, { "epoch": 1.22, "learning_rate": 4.301639930402189e-05, "loss": 3.9633, "step": 9280500 }, { "epoch": 1.22, "learning_rate": 4.3015683637768764e-05, "loss": 3.9439, "step": 9281000 }, { "epoch": 1.22, "learning_rate": 4.3014967940801396e-05, "loss": 3.9546, "step": 9281500 }, { "epoch": 1.22, "learning_rate": 4.301425221312101e-05, "loss": 3.9505, "step": 9282000 }, { "epoch": 1.22, "learning_rate": 4.3013536454728834e-05, "loss": 3.9704, "step": 9282500 }, { "epoch": 1.22, "learning_rate": 4.3012820665626067e-05, "loss": 3.9572, "step": 9283000 }, { "epoch": 1.22, "learning_rate": 4.301210484581395e-05, "loss": 3.9363, "step": 9283500 }, { "epoch": 1.22, "learning_rate": 4.3011388995293705e-05, "loss": 3.9678, "step": 9284000 }, { "epoch": 1.22, "learning_rate": 4.301067311406654e-05, "loss": 3.9505, "step": 9284500 }, { "epoch": 1.22, "learning_rate": 4.300995720213368e-05, "loss": 3.9548, "step": 9285000 }, { "epoch": 1.22, "learning_rate": 4.300924125949635e-05, "loss": 3.9426, "step": 9285500 }, { "epoch": 1.22, "learning_rate": 4.300852528615575e-05, "loss": 3.9533, "step": 9286000 }, { "epoch": 1.22, "learning_rate": 4.3007809282113135e-05, "loss": 3.9507, "step": 9286500 }, { "epoch": 1.22, "learning_rate": 4.300709324736971e-05, "loss": 3.9592, "step": 9287000 }, { "epoch": 1.22, "learning_rate": 4.3006377181926684e-05, "loss": 3.9395, "step": 9287500 }, { "epoch": 1.22, "learning_rate": 4.300566108578529e-05, "loss": 3.9419, "step": 9288000 }, { "epoch": 1.22, "learning_rate": 4.3004944958946734e-05, "loss": 3.9648, "step": 9288500 }, { "epoch": 1.22, "learning_rate": 4.300422880141227e-05, "loss": 3.9714, "step": 9289000 }, { "epoch": 1.22, "learning_rate": 4.3003512613183084e-05, "loss": 3.9432, "step": 9289500 }, { "epoch": 1.22, "learning_rate": 4.300279639426041e-05, "loss": 3.9548, "step": 9290000 }, { "epoch": 1.22, "learning_rate": 4.300208014464548e-05, "loss": 3.9547, "step": 9290500 }, { "epoch": 1.22, "learning_rate": 4.3001363864339494e-05, "loss": 3.9624, "step": 9291000 }, { "epoch": 1.22, "learning_rate": 4.30006475533437e-05, "loss": 3.9408, "step": 9291500 }, { "epoch": 1.22, "learning_rate": 4.299993121165929e-05, "loss": 3.9396, "step": 9292000 }, { "epoch": 1.22, "learning_rate": 4.299921483928751e-05, "loss": 3.9536, "step": 9292500 }, { "epoch": 1.22, "learning_rate": 4.299849843622955e-05, "loss": 3.9419, "step": 9293000 }, { "epoch": 1.22, "learning_rate": 4.2997782002486666e-05, "loss": 3.9645, "step": 9293500 }, { "epoch": 1.22, "learning_rate": 4.299706553806007e-05, "loss": 3.958, "step": 9294000 }, { "epoch": 1.22, "learning_rate": 4.299634904295097e-05, "loss": 3.9767, "step": 9294500 }, { "epoch": 1.22, "learning_rate": 4.299563251716059e-05, "loss": 3.977, "step": 9295000 }, { "epoch": 1.22, "learning_rate": 4.2994915960690164e-05, "loss": 3.9661, "step": 9295500 }, { "epoch": 1.22, "learning_rate": 4.299419937354091e-05, "loss": 3.9416, "step": 9296000 }, { "epoch": 1.22, "learning_rate": 4.2993482755714046e-05, "loss": 3.9455, "step": 9296500 }, { "epoch": 1.22, "learning_rate": 4.299276610721079e-05, "loss": 3.9505, "step": 9297000 }, { "epoch": 1.22, "learning_rate": 4.299204942803237e-05, "loss": 3.9595, "step": 9297500 }, { "epoch": 1.22, "learning_rate": 4.299133271818001e-05, "loss": 3.9435, "step": 9298000 }, { "epoch": 1.22, "learning_rate": 4.2990615977654924e-05, "loss": 3.9543, "step": 9298500 }, { "epoch": 1.22, "learning_rate": 4.298989920645834e-05, "loss": 3.9622, "step": 9299000 }, { "epoch": 2.44, "learning_rate": 2.5887841376303117e-05, "loss": 3.9039, "step": 9299500 }, { "epoch": 2.44, "learning_rate": 2.5885778122934457e-05, "loss": 3.8688, "step": 9300000 }, { "epoch": 2.44, "learning_rate": 2.5883714863524973e-05, "loss": 3.8688, "step": 9300500 }, { "epoch": 2.44, "learning_rate": 2.588165159808872e-05, "loss": 3.8524, "step": 9301000 }, { "epoch": 2.44, "learning_rate": 2.587958832663977e-05, "loss": 3.8366, "step": 9301500 }, { "epoch": 2.45, "learning_rate": 2.5877525049192207e-05, "loss": 3.8272, "step": 9302000 }, { "epoch": 2.45, "learning_rate": 2.5875461765760095e-05, "loss": 3.8267, "step": 9302500 }, { "epoch": 2.45, "learning_rate": 2.5873398476357496e-05, "loss": 3.8313, "step": 9303000 }, { "epoch": 2.45, "learning_rate": 2.5871335180998503e-05, "loss": 3.8385, "step": 9303500 }, { "epoch": 2.45, "learning_rate": 2.5869271879697164e-05, "loss": 3.8201, "step": 9304000 }, { "epoch": 2.45, "learning_rate": 2.586720857246757e-05, "loss": 3.8152, "step": 9304500 }, { "epoch": 2.45, "learning_rate": 2.5865145259323776e-05, "loss": 3.8057, "step": 9305000 }, { "epoch": 2.45, "learning_rate": 2.5863081940279865e-05, "loss": 3.8022, "step": 9305500 }, { "epoch": 2.45, "learning_rate": 2.58610186153499e-05, "loss": 3.7993, "step": 9306000 }, { "epoch": 2.45, "learning_rate": 2.585895528454796e-05, "loss": 3.8109, "step": 9306500 }, { "epoch": 2.45, "learning_rate": 2.5856891947888105e-05, "loss": 3.7997, "step": 9307000 }, { "epoch": 2.45, "learning_rate": 2.5854828605384417e-05, "loss": 3.7914, "step": 9307500 }, { "epoch": 2.45, "learning_rate": 2.5852765257050978e-05, "loss": 3.8103, "step": 9308000 }, { "epoch": 2.45, "learning_rate": 2.5850701902901837e-05, "loss": 3.7866, "step": 9308500 }, { "epoch": 2.45, "learning_rate": 2.584863854295107e-05, "loss": 3.8196, "step": 9309000 }, { "epoch": 2.45, "learning_rate": 2.5846575177212763e-05, "loss": 3.786, "step": 9309500 }, { "epoch": 2.45, "learning_rate": 2.584451180570097e-05, "loss": 3.7941, "step": 9310000 }, { "epoch": 2.45, "learning_rate": 2.5842448428429776e-05, "loss": 3.7814, "step": 9310500 }, { "epoch": 2.45, "learning_rate": 2.584038504541325e-05, "loss": 3.7887, "step": 9311000 }, { "epoch": 2.45, "learning_rate": 2.5838321656665464e-05, "loss": 3.7795, "step": 9311500 }, { "epoch": 2.45, "learning_rate": 2.5836258262200486e-05, "loss": 3.7817, "step": 9312000 }, { "epoch": 2.45, "learning_rate": 2.583419486203239e-05, "loss": 3.7737, "step": 9312500 }, { "epoch": 2.45, "learning_rate": 2.5832131456175253e-05, "loss": 3.7661, "step": 9313000 }, { "epoch": 2.45, "learning_rate": 2.583006804464313e-05, "loss": 3.7788, "step": 9313500 }, { "epoch": 2.45, "learning_rate": 2.5828004627450113e-05, "loss": 3.779, "step": 9314000 }, { "epoch": 2.45, "learning_rate": 2.582594120461027e-05, "loss": 3.7712, "step": 9314500 }, { "epoch": 2.45, "learning_rate": 2.582387777613766e-05, "loss": 3.7671, "step": 9315000 }, { "epoch": 2.45, "learning_rate": 2.5821814342046374e-05, "loss": 3.7739, "step": 9315500 }, { "epoch": 2.45, "learning_rate": 2.581975090235047e-05, "loss": 3.7802, "step": 9316000 }, { "epoch": 2.45, "learning_rate": 2.5817687457064026e-05, "loss": 3.7662, "step": 9316500 }, { "epoch": 2.45, "learning_rate": 2.5815624006201118e-05, "loss": 3.7655, "step": 9317000 }, { "epoch": 2.45, "learning_rate": 2.5813560549775805e-05, "loss": 3.777, "step": 9317500 }, { "epoch": 2.45, "learning_rate": 2.581149708780218e-05, "loss": 3.7671, "step": 9318000 }, { "epoch": 2.45, "learning_rate": 2.5809433620294287e-05, "loss": 3.7565, "step": 9318500 }, { "epoch": 2.45, "learning_rate": 2.580737014726623e-05, "loss": 3.7713, "step": 9319000 }, { "epoch": 2.45, "learning_rate": 2.580530666873206e-05, "loss": 3.7624, "step": 9319500 }, { "epoch": 2.45, "learning_rate": 2.5803243184705856e-05, "loss": 3.774, "step": 9320000 }, { "epoch": 2.45, "learning_rate": 2.5801179695201694e-05, "loss": 3.7551, "step": 9320500 }, { "epoch": 2.45, "learning_rate": 2.5799116200233637e-05, "loss": 3.7488, "step": 9321000 }, { "epoch": 2.45, "learning_rate": 2.579705269981577e-05, "loss": 3.7623, "step": 9321500 }, { "epoch": 2.45, "learning_rate": 2.5794989193962155e-05, "loss": 3.7537, "step": 9322000 }, { "epoch": 2.45, "learning_rate": 2.5792925682686868e-05, "loss": 3.7595, "step": 9322500 }, { "epoch": 2.45, "learning_rate": 2.5790862166003994e-05, "loss": 3.7594, "step": 9323000 }, { "epoch": 2.45, "learning_rate": 2.578879864392758e-05, "loss": 3.7637, "step": 9323500 }, { "epoch": 2.45, "learning_rate": 2.5786735116471724e-05, "loss": 3.757, "step": 9324000 }, { "epoch": 2.45, "learning_rate": 2.5784671583650483e-05, "loss": 3.7589, "step": 9324500 }, { "epoch": 2.45, "learning_rate": 2.5782608045477936e-05, "loss": 3.7465, "step": 9325000 }, { "epoch": 2.45, "learning_rate": 2.5780544501968156e-05, "loss": 3.7458, "step": 9325500 }, { "epoch": 2.45, "learning_rate": 2.577848095313522e-05, "loss": 3.7516, "step": 9326000 }, { "epoch": 2.45, "learning_rate": 2.5776417398993185e-05, "loss": 3.7508, "step": 9326500 }, { "epoch": 2.45, "learning_rate": 2.5774353839556147e-05, "loss": 3.7395, "step": 9327000 }, { "epoch": 2.45, "learning_rate": 2.5772290274838163e-05, "loss": 3.7365, "step": 9327500 }, { "epoch": 2.45, "learning_rate": 2.5770226704853307e-05, "loss": 3.7496, "step": 9328000 }, { "epoch": 2.45, "learning_rate": 2.576816312961566e-05, "loss": 3.7548, "step": 9328500 }, { "epoch": 2.45, "learning_rate": 2.5766099549139287e-05, "loss": 3.7433, "step": 9329000 }, { "epoch": 2.45, "learning_rate": 2.5764035963438264e-05, "loss": 3.7534, "step": 9329500 }, { "epoch": 2.45, "learning_rate": 2.576197237252667e-05, "loss": 3.7554, "step": 9330000 }, { "epoch": 2.45, "learning_rate": 2.5759908776418574e-05, "loss": 3.7504, "step": 9330500 }, { "epoch": 2.45, "learning_rate": 2.5757845175128042e-05, "loss": 3.7509, "step": 9331000 }, { "epoch": 2.45, "learning_rate": 2.575578156866916e-05, "loss": 3.7519, "step": 9331500 }, { "epoch": 2.45, "learning_rate": 2.5753717957055988e-05, "loss": 3.736, "step": 9332000 }, { "epoch": 2.45, "learning_rate": 2.5751654340302612e-05, "loss": 3.7577, "step": 9332500 }, { "epoch": 2.45, "learning_rate": 2.5749590718423104e-05, "loss": 3.7571, "step": 9333000 }, { "epoch": 2.45, "learning_rate": 2.574752709143153e-05, "loss": 3.7508, "step": 9333500 }, { "epoch": 2.45, "learning_rate": 2.5745463459341967e-05, "loss": 3.7432, "step": 9334000 }, { "epoch": 2.45, "learning_rate": 2.5743399822168484e-05, "loss": 3.7531, "step": 9334500 }, { "epoch": 2.45, "learning_rate": 2.574133617992517e-05, "loss": 3.7479, "step": 9335000 }, { "epoch": 2.45, "learning_rate": 2.573927253262608e-05, "loss": 3.7398, "step": 9335500 }, { "epoch": 2.45, "learning_rate": 2.5737208880285303e-05, "loss": 3.7442, "step": 9336000 }, { "epoch": 2.45, "learning_rate": 2.57351452229169e-05, "loss": 3.7452, "step": 9336500 }, { "epoch": 2.45, "learning_rate": 2.573308156053495e-05, "loss": 3.7448, "step": 9337000 }, { "epoch": 2.45, "learning_rate": 2.5731017893153537e-05, "loss": 3.7461, "step": 9337500 }, { "epoch": 2.45, "learning_rate": 2.572895422078671e-05, "loss": 3.7483, "step": 9338000 }, { "epoch": 2.45, "learning_rate": 2.572689054344857e-05, "loss": 3.7416, "step": 9338500 }, { "epoch": 2.45, "learning_rate": 2.5724826861153172e-05, "loss": 3.7339, "step": 9339000 }, { "epoch": 2.45, "learning_rate": 2.57227631739146e-05, "loss": 3.7422, "step": 9339500 }, { "epoch": 2.46, "learning_rate": 2.5720699481746922e-05, "loss": 3.7122, "step": 9340000 }, { "epoch": 2.46, "learning_rate": 2.5718635784664214e-05, "loss": 3.7345, "step": 9340500 }, { "epoch": 2.46, "learning_rate": 2.571657208268055e-05, "loss": 3.7357, "step": 9341000 }, { "epoch": 2.46, "learning_rate": 2.571450837581001e-05, "loss": 3.7184, "step": 9341500 }, { "epoch": 2.46, "learning_rate": 2.5712444664066653e-05, "loss": 3.7609, "step": 9342000 }, { "epoch": 2.46, "learning_rate": 2.5710380947464576e-05, "loss": 3.7325, "step": 9342500 }, { "epoch": 2.46, "learning_rate": 2.5708317226017825e-05, "loss": 3.733, "step": 9343000 }, { "epoch": 2.46, "learning_rate": 2.57062534997405e-05, "loss": 3.7329, "step": 9343500 }, { "epoch": 2.46, "learning_rate": 2.5704189768646665e-05, "loss": 3.7518, "step": 9344000 }, { "epoch": 2.46, "learning_rate": 2.5702126032750383e-05, "loss": 3.7378, "step": 9344500 }, { "epoch": 2.46, "learning_rate": 2.5700062292065753e-05, "loss": 3.7433, "step": 9345000 }, { "epoch": 2.46, "learning_rate": 2.5697998546606822e-05, "loss": 3.7264, "step": 9345500 }, { "epoch": 2.46, "learning_rate": 2.5695934796387684e-05, "loss": 3.7299, "step": 9346000 }, { "epoch": 2.46, "learning_rate": 2.5693871041422407e-05, "loss": 3.7308, "step": 9346500 }, { "epoch": 2.46, "learning_rate": 2.5691807281725062e-05, "loss": 3.7292, "step": 9347000 }, { "epoch": 2.46, "learning_rate": 2.568974351730973e-05, "loss": 3.7279, "step": 9347500 }, { "epoch": 2.46, "learning_rate": 2.568767974819048e-05, "loss": 3.7339, "step": 9348000 }, { "epoch": 2.46, "learning_rate": 2.5685615974381387e-05, "loss": 3.7229, "step": 9348500 }, { "epoch": 2.46, "learning_rate": 2.5683552195896533e-05, "loss": 3.73, "step": 9349000 }, { "epoch": 2.46, "learning_rate": 2.5681488412749982e-05, "loss": 3.7291, "step": 9349500 }, { "epoch": 2.46, "learning_rate": 2.5679424624955812e-05, "loss": 3.7229, "step": 9350000 }, { "epoch": 2.46, "learning_rate": 2.56773608325281e-05, "loss": 3.7169, "step": 9350500 }, { "epoch": 2.46, "learning_rate": 2.5675297035480926e-05, "loss": 3.737, "step": 9351000 }, { "epoch": 2.46, "learning_rate": 2.5673233233828348e-05, "loss": 3.729, "step": 9351500 }, { "epoch": 2.46, "learning_rate": 2.567116942758446e-05, "loss": 3.7137, "step": 9352000 }, { "epoch": 2.46, "learning_rate": 2.5669105616763322e-05, "loss": 3.7384, "step": 9352500 }, { "epoch": 2.46, "learning_rate": 2.5667041801379015e-05, "loss": 3.732, "step": 9353000 }, { "epoch": 2.46, "learning_rate": 2.566497798144562e-05, "loss": 3.727, "step": 9353500 }, { "epoch": 2.46, "learning_rate": 2.5662914156977197e-05, "loss": 3.725, "step": 9354000 }, { "epoch": 2.46, "learning_rate": 2.566085032798783e-05, "loss": 3.7306, "step": 9354500 }, { "epoch": 2.46, "learning_rate": 2.5658786494491598e-05, "loss": 3.7239, "step": 9355000 }, { "epoch": 2.46, "learning_rate": 2.5656722656502564e-05, "loss": 3.7314, "step": 9355500 }, { "epoch": 2.46, "learning_rate": 2.5654658814034816e-05, "loss": 3.7289, "step": 9356000 }, { "epoch": 2.46, "learning_rate": 2.5652594967102418e-05, "loss": 3.7309, "step": 9356500 }, { "epoch": 2.46, "learning_rate": 2.5650531115719455e-05, "loss": 3.7278, "step": 9357000 }, { "epoch": 2.46, "learning_rate": 2.5648467259899988e-05, "loss": 3.7328, "step": 9357500 }, { "epoch": 2.46, "learning_rate": 2.5646403399658114e-05, "loss": 3.735, "step": 9358000 }, { "epoch": 2.46, "learning_rate": 2.5644339535007888e-05, "loss": 3.7266, "step": 9358500 }, { "epoch": 2.46, "learning_rate": 2.5642275665963393e-05, "loss": 3.7167, "step": 9359000 }, { "epoch": 1.23, "learning_rate": 4.290294387560506e-05, "loss": 3.7927, "step": 9359500 }, { "epoch": 1.23, "learning_rate": 4.29022233716699e-05, "loss": 3.8237, "step": 9360000 }, { "epoch": 1.23, "learning_rate": 4.290150283721394e-05, "loss": 3.8322, "step": 9360500 }, { "epoch": 1.23, "learning_rate": 4.290078227223839e-05, "loss": 3.8385, "step": 9361000 }, { "epoch": 2.95, "learning_rate": 1.7998866419044223e-05, "loss": 3.8023, "step": 9361500 }, { "epoch": 2.46, "learning_rate": 2.5629892360204977e-05, "loss": 3.777, "step": 9362000 }, { "epoch": 2.46, "learning_rate": 2.5627828460794694e-05, "loss": 3.7604, "step": 9362500 }, { "epoch": 2.46, "learning_rate": 2.5625764557102754e-05, "loss": 3.7465, "step": 9363000 }, { "epoch": 2.46, "learning_rate": 2.562370064914322e-05, "loss": 3.7316, "step": 9363500 }, { "epoch": 2.46, "learning_rate": 2.562163673693017e-05, "loss": 3.7617, "step": 9364000 }, { "epoch": 2.46, "learning_rate": 2.5619572820477683e-05, "loss": 3.7349, "step": 9364500 }, { "epoch": 2.46, "learning_rate": 2.561750889979982e-05, "loss": 3.7371, "step": 9365000 }, { "epoch": 2.46, "learning_rate": 2.5615444974910673e-05, "loss": 3.7308, "step": 9365500 }, { "epoch": 2.46, "learning_rate": 2.5613381045824316e-05, "loss": 3.7355, "step": 9366000 }, { "epoch": 2.46, "learning_rate": 2.5611317112554812e-05, "loss": 3.743, "step": 9366500 }, { "epoch": 2.46, "learning_rate": 2.560925317511625e-05, "loss": 3.7564, "step": 9367000 }, { "epoch": 2.46, "learning_rate": 2.5607189233522695e-05, "loss": 3.7354, "step": 9367500 }, { "epoch": 2.46, "learning_rate": 2.5605125287788234e-05, "loss": 3.7207, "step": 9368000 }, { "epoch": 2.46, "learning_rate": 2.560306133792693e-05, "loss": 3.744, "step": 9368500 }, { "epoch": 2.46, "learning_rate": 2.5600997383952875e-05, "loss": 3.729, "step": 9369000 }, { "epoch": 2.46, "learning_rate": 2.5598933425880133e-05, "loss": 3.7462, "step": 9369500 }, { "epoch": 2.46, "learning_rate": 2.5596869463722777e-05, "loss": 3.73, "step": 9370000 }, { "epoch": 2.46, "learning_rate": 2.5594805497494896e-05, "loss": 3.7372, "step": 9370500 }, { "epoch": 2.46, "learning_rate": 2.559274152721055e-05, "loss": 3.7377, "step": 9371000 }, { "epoch": 2.46, "learning_rate": 2.559067755288383e-05, "loss": 3.7157, "step": 9371500 }, { "epoch": 2.46, "learning_rate": 2.5588613574528797e-05, "loss": 3.717, "step": 9372000 }, { "epoch": 2.46, "learning_rate": 2.558654959215954e-05, "loss": 3.7229, "step": 9372500 }, { "epoch": 2.46, "learning_rate": 2.558448560579013e-05, "loss": 3.7186, "step": 9373000 }, { "epoch": 2.46, "learning_rate": 2.558242161543464e-05, "loss": 3.7397, "step": 9373500 }, { "epoch": 2.46, "learning_rate": 2.5580357621107148e-05, "loss": 3.7292, "step": 9374000 }, { "epoch": 2.46, "learning_rate": 2.557829362282173e-05, "loss": 3.7216, "step": 9374500 }, { "epoch": 2.46, "learning_rate": 2.5576229620592462e-05, "loss": 3.7303, "step": 9375000 }, { "epoch": 2.46, "learning_rate": 2.5574165614433425e-05, "loss": 3.7209, "step": 9375500 }, { "epoch": 2.46, "learning_rate": 2.5572101604358684e-05, "loss": 3.7135, "step": 9376000 }, { "epoch": 2.46, "learning_rate": 2.557003759038233e-05, "loss": 3.727, "step": 9376500 }, { "epoch": 2.46, "learning_rate": 2.5567973572518428e-05, "loss": 3.7132, "step": 9377000 }, { "epoch": 2.46, "learning_rate": 2.5565909550781053e-05, "loss": 3.7086, "step": 9377500 }, { "epoch": 2.47, "learning_rate": 2.5563845525184287e-05, "loss": 3.7131, "step": 9378000 }, { "epoch": 2.47, "learning_rate": 2.5561781495742205e-05, "loss": 3.7012, "step": 9378500 }, { "epoch": 2.47, "learning_rate": 2.5559717462468885e-05, "loss": 3.7163, "step": 9379000 }, { "epoch": 2.47, "learning_rate": 2.5557653425378397e-05, "loss": 3.7185, "step": 9379500 }, { "epoch": 2.47, "learning_rate": 2.555558938448483e-05, "loss": 3.7267, "step": 9380000 }, { "epoch": 2.47, "learning_rate": 2.555352533980225e-05, "loss": 3.7152, "step": 9380500 }, { "epoch": 2.47, "learning_rate": 2.5551461291344732e-05, "loss": 3.7235, "step": 9381000 }, { "epoch": 2.47, "learning_rate": 2.5549397239126354e-05, "loss": 3.7106, "step": 9381500 }, { "epoch": 2.47, "learning_rate": 2.5547333183161197e-05, "loss": 3.7094, "step": 9382000 }, { "epoch": 2.47, "learning_rate": 2.554526912346334e-05, "loss": 3.7159, "step": 9382500 }, { "epoch": 2.47, "learning_rate": 2.554320506004685e-05, "loss": 3.7121, "step": 9383000 }, { "epoch": 2.47, "learning_rate": 2.5541140992925798e-05, "loss": 3.7216, "step": 9383500 }, { "epoch": 2.47, "learning_rate": 2.5539076922114285e-05, "loss": 3.7356, "step": 9384000 }, { "epoch": 2.47, "learning_rate": 2.5537012847626364e-05, "loss": 3.7126, "step": 9384500 }, { "epoch": 2.47, "learning_rate": 2.5534948769476123e-05, "loss": 3.7227, "step": 9385000 }, { "epoch": 2.47, "learning_rate": 2.5532884687677637e-05, "loss": 3.7129, "step": 9385500 }, { "epoch": 2.47, "learning_rate": 2.553082060224498e-05, "loss": 3.7193, "step": 9386000 }, { "epoch": 2.47, "learning_rate": 2.5528756513192236e-05, "loss": 3.7288, "step": 9386500 }, { "epoch": 2.47, "learning_rate": 2.5526692420533466e-05, "loss": 3.7164, "step": 9387000 }, { "epoch": 2.47, "learning_rate": 2.5524628324282762e-05, "loss": 3.7244, "step": 9387500 }, { "epoch": 2.47, "learning_rate": 2.5522564224454197e-05, "loss": 3.6996, "step": 9388000 }, { "epoch": 2.47, "learning_rate": 2.5520500121061846e-05, "loss": 3.7156, "step": 9388500 }, { "epoch": 2.47, "learning_rate": 2.5518436014119783e-05, "loss": 3.7223, "step": 9389000 }, { "epoch": 2.47, "learning_rate": 2.551637190364209e-05, "loss": 3.7081, "step": 9389500 }, { "epoch": 2.47, "learning_rate": 2.551430778964284e-05, "loss": 3.7187, "step": 9390000 }, { "epoch": 2.47, "learning_rate": 2.551224367213611e-05, "loss": 3.7019, "step": 9390500 }, { "epoch": 2.47, "learning_rate": 2.551017955113598e-05, "loss": 3.7131, "step": 9391000 }, { "epoch": 2.47, "learning_rate": 2.550811542665653e-05, "loss": 3.7236, "step": 9391500 }, { "epoch": 2.47, "learning_rate": 2.5506051298711826e-05, "loss": 3.7088, "step": 9392000 }, { "epoch": 2.47, "learning_rate": 2.5503987167315956e-05, "loss": 3.7092, "step": 9392500 }, { "epoch": 2.47, "learning_rate": 2.550192303248299e-05, "loss": 3.6995, "step": 9393000 }, { "epoch": 2.47, "learning_rate": 2.5499858894227002e-05, "loss": 3.6963, "step": 9393500 }, { "epoch": 2.47, "learning_rate": 2.549779475256208e-05, "loss": 3.7109, "step": 9394000 }, { "epoch": 2.47, "learning_rate": 2.54957306075023e-05, "loss": 3.7258, "step": 9394500 }, { "epoch": 2.47, "learning_rate": 2.549366645906172e-05, "loss": 3.7092, "step": 9395000 }, { "epoch": 2.47, "learning_rate": 2.5491602307254443e-05, "loss": 3.7211, "step": 9395500 }, { "epoch": 2.47, "learning_rate": 2.5489538152094535e-05, "loss": 3.6946, "step": 9396000 }, { "epoch": 2.47, "learning_rate": 2.5487473993596067e-05, "loss": 3.7034, "step": 9396500 }, { "epoch": 2.47, "learning_rate": 2.5485409831773123e-05, "loss": 3.6997, "step": 9397000 }, { "epoch": 2.47, "learning_rate": 2.5483345666639778e-05, "loss": 3.7013, "step": 9397500 }, { "epoch": 2.47, "learning_rate": 2.5481281498210106e-05, "loss": 3.7127, "step": 9398000 }, { "epoch": 2.47, "learning_rate": 2.5479217326498195e-05, "loss": 3.715, "step": 9398500 }, { "epoch": 2.47, "learning_rate": 2.547715315151812e-05, "loss": 3.7097, "step": 9399000 }, { "epoch": 2.47, "learning_rate": 2.547508897328394e-05, "loss": 3.7098, "step": 9399500 }, { "epoch": 2.47, "learning_rate": 2.5473024791809757e-05, "loss": 3.7104, "step": 9400000 }, { "epoch": 2.47, "learning_rate": 2.547096060710963e-05, "loss": 3.7117, "step": 9400500 }, { "epoch": 2.47, "learning_rate": 2.546889641919765e-05, "loss": 3.6973, "step": 9401000 }, { "epoch": 2.47, "learning_rate": 2.5466832228087882e-05, "loss": 3.7018, "step": 9401500 }, { "epoch": 2.47, "learning_rate": 2.546476803379442e-05, "loss": 3.6948, "step": 9402000 }, { "epoch": 2.47, "learning_rate": 2.5462703836331324e-05, "loss": 3.7109, "step": 9402500 }, { "epoch": 2.47, "learning_rate": 2.5460639635712675e-05, "loss": 3.7168, "step": 9403000 }, { "epoch": 2.47, "learning_rate": 2.5458575431952558e-05, "loss": 3.6981, "step": 9403500 }, { "epoch": 2.47, "learning_rate": 2.5456511225065042e-05, "loss": 3.7157, "step": 9404000 }, { "epoch": 2.47, "learning_rate": 2.5454447015064213e-05, "loss": 3.7119, "step": 9404500 }, { "epoch": 2.47, "learning_rate": 2.5452382801964148e-05, "loss": 3.695, "step": 9405000 }, { "epoch": 2.47, "learning_rate": 2.5450318585778915e-05, "loss": 3.7007, "step": 9405500 }, { "epoch": 2.47, "learning_rate": 2.54482543665226e-05, "loss": 3.7039, "step": 9406000 }, { "epoch": 2.47, "learning_rate": 2.544619014420927e-05, "loss": 3.7073, "step": 9406500 }, { "epoch": 2.47, "learning_rate": 2.5444125918853024e-05, "loss": 3.7055, "step": 9407000 }, { "epoch": 2.47, "learning_rate": 2.5442061690467918e-05, "loss": 3.7114, "step": 9407500 }, { "epoch": 2.47, "learning_rate": 2.5439997459068038e-05, "loss": 3.7028, "step": 9408000 }, { "epoch": 2.47, "learning_rate": 2.543793322466746e-05, "loss": 3.7069, "step": 9408500 }, { "epoch": 2.47, "learning_rate": 2.5435868987280266e-05, "loss": 3.7, "step": 9409000 }, { "epoch": 2.47, "learning_rate": 2.5433804746920532e-05, "loss": 3.7035, "step": 9409500 }, { "epoch": 2.47, "learning_rate": 2.543174050360233e-05, "loss": 3.6934, "step": 9410000 }, { "epoch": 2.47, "learning_rate": 2.5429676257339742e-05, "loss": 3.7084, "step": 9410500 }, { "epoch": 2.47, "learning_rate": 2.542761200814685e-05, "loss": 3.7189, "step": 9411000 }, { "epoch": 2.47, "learning_rate": 2.5425547756037727e-05, "loss": 3.7037, "step": 9411500 }, { "epoch": 2.47, "learning_rate": 2.542348350102645e-05, "loss": 3.7109, "step": 9412000 }, { "epoch": 2.47, "learning_rate": 2.5421419243127105e-05, "loss": 3.7088, "step": 9412500 }, { "epoch": 2.47, "learning_rate": 2.541935498235375e-05, "loss": 3.7136, "step": 9413000 }, { "epoch": 2.47, "learning_rate": 2.5417290718720487e-05, "loss": 3.7052, "step": 9413500 }, { "epoch": 2.47, "learning_rate": 2.5415226452241375e-05, "loss": 3.7064, "step": 9414000 }, { "epoch": 2.47, "learning_rate": 2.5413162182930506e-05, "loss": 3.7024, "step": 9414500 }, { "epoch": 2.47, "learning_rate": 2.541109791080195e-05, "loss": 3.6935, "step": 9415000 }, { "epoch": 2.47, "learning_rate": 2.5409033635869782e-05, "loss": 3.7194, "step": 9415500 }, { "epoch": 2.48, "learning_rate": 2.5406969358148093e-05, "loss": 3.7062, "step": 9416000 }, { "epoch": 2.48, "learning_rate": 2.5404905077650954e-05, "loss": 3.7031, "step": 9416500 }, { "epoch": 2.48, "learning_rate": 2.540284079439243e-05, "loss": 3.7061, "step": 9417000 }, { "epoch": 2.48, "learning_rate": 2.540077650838662e-05, "loss": 3.6979, "step": 9417500 }, { "epoch": 2.48, "learning_rate": 2.539871221964759e-05, "loss": 3.7088, "step": 9418000 }, { "epoch": 2.48, "learning_rate": 2.5396647928189415e-05, "loss": 3.6954, "step": 9418500 }, { "epoch": 2.48, "learning_rate": 2.5394583634026187e-05, "loss": 3.7017, "step": 9419000 }, { "epoch": 2.48, "learning_rate": 2.5392519337171978e-05, "loss": 3.7028, "step": 9419500 }, { "epoch": 2.48, "learning_rate": 2.5390455037640854e-05, "loss": 3.6962, "step": 9420000 }, { "epoch": 2.48, "learning_rate": 2.538839073544691e-05, "loss": 3.7057, "step": 9420500 }, { "epoch": 2.48, "learning_rate": 2.5386326430604214e-05, "loss": 3.6997, "step": 9421000 }, { "epoch": 2.48, "learning_rate": 2.5384262123126847e-05, "loss": 3.695, "step": 9421500 }, { "epoch": 2.48, "learning_rate": 2.5382197813028892e-05, "loss": 3.7038, "step": 9422000 }, { "epoch": 2.48, "learning_rate": 2.538013350032441e-05, "loss": 3.706, "step": 9422500 }, { "epoch": 2.48, "learning_rate": 2.5378069185027502e-05, "loss": 3.7139, "step": 9423000 }, { "epoch": 2.48, "learning_rate": 2.5376004867152236e-05, "loss": 3.7052, "step": 9423500 }, { "epoch": 2.48, "learning_rate": 2.5373940546712687e-05, "loss": 3.6919, "step": 9424000 }, { "epoch": 2.48, "learning_rate": 2.537187622372294e-05, "loss": 3.6975, "step": 9424500 }, { "epoch": 2.48, "learning_rate": 2.5369811898197067e-05, "loss": 3.6903, "step": 9425000 }, { "epoch": 2.48, "learning_rate": 2.5367747570149152e-05, "loss": 3.685, "step": 9425500 }, { "epoch": 2.48, "learning_rate": 2.5365683239593268e-05, "loss": 3.7019, "step": 9426000 }, { "epoch": 2.48, "learning_rate": 2.53636189065435e-05, "loss": 3.7149, "step": 9426500 }, { "epoch": 2.48, "learning_rate": 2.5361554571013918e-05, "loss": 3.7002, "step": 9427000 }, { "epoch": 2.48, "learning_rate": 2.5359490233018607e-05, "loss": 3.7061, "step": 9427500 }, { "epoch": 2.48, "learning_rate": 2.535742589257164e-05, "loss": 3.6874, "step": 9428000 }, { "epoch": 2.48, "learning_rate": 2.53553615496871e-05, "loss": 3.7131, "step": 9428500 }, { "epoch": 2.48, "learning_rate": 2.535329720437906e-05, "loss": 3.7035, "step": 9429000 }, { "epoch": 2.48, "learning_rate": 2.535123285666161e-05, "loss": 3.6984, "step": 9429500 }, { "epoch": 2.48, "learning_rate": 2.5349168506548817e-05, "loss": 3.7162, "step": 9430000 }, { "epoch": 2.48, "learning_rate": 2.5347104154054762e-05, "loss": 3.7038, "step": 9430500 }, { "epoch": 2.48, "learning_rate": 2.534503979919352e-05, "loss": 3.7025, "step": 9431000 }, { "epoch": 2.48, "learning_rate": 2.5342975441979182e-05, "loss": 3.6859, "step": 9431500 } ], "logging_steps": 500, "max_steps": 19022025, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 7.18259224341547e+19, "train_batch_size": null, "trial_name": null, "trial_params": null }