{ "best_metric": null, "best_model_checkpoint": null, "epoch": 24.0, "eval_steps": 500, "global_step": 804146, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 0.0001342882721575649, "loss": 12.1916, "step": 3000 }, { "epoch": 0.18, "learning_rate": 0.0002685765443151298, "loss": 2.3149, "step": 6000 }, { "epoch": 0.27, "learning_rate": 0.00029896083615458125, "loss": 1.9954, "step": 9000 }, { "epoch": 0.36, "learning_rate": 0.00029760422539032704, "loss": 1.8429, "step": 12000 }, { "epoch": 0.45, "learning_rate": 0.0002962476146260728, "loss": 1.7657, "step": 15000 }, { "epoch": 0.54, "learning_rate": 0.0002948910038618186, "loss": 1.6944, "step": 18000 }, { "epoch": 0.63, "learning_rate": 0.0002935343930975644, "loss": 1.6484, "step": 21000 }, { "epoch": 0.72, "learning_rate": 0.0002921777823333102, "loss": 1.6187, "step": 24000 }, { "epoch": 0.81, "learning_rate": 0.00029082117156905597, "loss": 1.5982, "step": 27000 }, { "epoch": 0.9, "learning_rate": 0.00028946456080480176, "loss": 1.5673, "step": 30000 }, { "epoch": 0.98, "learning_rate": 0.00028810795004054755, "loss": 1.5474, "step": 33000 }, { "epoch": 1.07, "learning_rate": 0.0002867513392762934, "loss": 1.4948, "step": 36000 }, { "epoch": 1.16, "learning_rate": 0.0002853947285120391, "loss": 1.4913, "step": 39000 }, { "epoch": 1.25, "learning_rate": 0.00028403811774778496, "loss": 1.4821, "step": 42000 }, { "epoch": 1.34, "learning_rate": 0.0002826815069835307, "loss": 1.4474, "step": 45000 }, { "epoch": 1.43, "learning_rate": 0.00028132489621927653, "loss": 1.4602, "step": 48000 }, { "epoch": 1.52, "learning_rate": 0.00027996828545502226, "loss": 1.456, "step": 51000 }, { "epoch": 1.61, "learning_rate": 0.0002786116746907681, "loss": 1.4377, "step": 54000 }, { "epoch": 1.7, "learning_rate": 0.0002772550639265139, "loss": 1.4301, "step": 57000 }, { "epoch": 1.79, "learning_rate": 0.0002758984531622597, "loss": 1.425, "step": 60000 }, { "epoch": 1.88, "learning_rate": 0.00027454184239800546, "loss": 1.4153, "step": 63000 }, { "epoch": 1.97, "learning_rate": 0.00027318523163375125, "loss": 1.4046, "step": 66000 }, { "epoch": 2.06, "learning_rate": 0.00027182862086949704, "loss": 1.3565, "step": 69000 }, { "epoch": 2.15, "learning_rate": 0.0002704720101052428, "loss": 1.3488, "step": 72000 }, { "epoch": 2.24, "learning_rate": 0.0002691153993409886, "loss": 1.339, "step": 75000 }, { "epoch": 2.33, "learning_rate": 0.0002677587885767344, "loss": 1.3513, "step": 78000 }, { "epoch": 2.42, "learning_rate": 0.0002664021778124802, "loss": 1.3491, "step": 81000 }, { "epoch": 2.51, "learning_rate": 0.00026504556704822597, "loss": 1.3345, "step": 84000 }, { "epoch": 2.6, "learning_rate": 0.00026368895628397175, "loss": 1.3313, "step": 87000 }, { "epoch": 2.69, "learning_rate": 0.00026233234551971754, "loss": 1.3319, "step": 90000 }, { "epoch": 2.78, "learning_rate": 0.00026097573475546333, "loss": 1.3185, "step": 93000 }, { "epoch": 2.87, "learning_rate": 0.00025961912399120917, "loss": 1.325, "step": 96000 }, { "epoch": 2.95, "learning_rate": 0.0002582625132269549, "loss": 1.3048, "step": 99000 }, { "epoch": 3.04, "learning_rate": 0.00025690590246270074, "loss": 1.2797, "step": 102000 }, { "epoch": 3.13, "learning_rate": 0.00025554929169844653, "loss": 1.2646, "step": 105000 }, { "epoch": 3.22, "learning_rate": 0.0002541926809341923, "loss": 1.2595, "step": 108000 }, { "epoch": 3.31, "learning_rate": 0.0002528360701699381, "loss": 1.2574, "step": 111000 }, { "epoch": 3.4, "learning_rate": 0.0002514794594056839, "loss": 1.2623, "step": 114000 }, { "epoch": 3.49, "learning_rate": 0.0002501228486414297, "loss": 1.2548, "step": 117000 }, { "epoch": 3.58, "learning_rate": 0.00024876623787717546, "loss": 1.2615, "step": 120000 }, { "epoch": 3.67, "learning_rate": 0.00024740962711292125, "loss": 1.2558, "step": 123000 }, { "epoch": 3.76, "learning_rate": 0.00024605301634866703, "loss": 1.2537, "step": 126000 }, { "epoch": 3.85, "learning_rate": 0.0002446964055844128, "loss": 1.2393, "step": 129000 }, { "epoch": 3.94, "learning_rate": 0.0002433397948201586, "loss": 1.2438, "step": 132000 }, { "epoch": 4.03, "learning_rate": 0.00024198318405590442, "loss": 1.2277, "step": 135000 }, { "epoch": 4.12, "learning_rate": 0.00024062657329165018, "loss": 1.1934, "step": 138000 }, { "epoch": 4.21, "learning_rate": 0.000239269962527396, "loss": 1.1857, "step": 141000 }, { "epoch": 4.3, "learning_rate": 0.00023791335176314175, "loss": 1.1902, "step": 144000 }, { "epoch": 4.39, "learning_rate": 0.00023655674099888756, "loss": 1.1901, "step": 147000 }, { "epoch": 4.48, "learning_rate": 0.00023520013023463332, "loss": 1.1948, "step": 150000 }, { "epoch": 4.57, "learning_rate": 0.00023384351947037914, "loss": 1.1941, "step": 153000 }, { "epoch": 4.66, "learning_rate": 0.00023248690870612495, "loss": 1.1903, "step": 156000 }, { "epoch": 4.75, "learning_rate": 0.0002311302979418707, "loss": 1.1877, "step": 159000 }, { "epoch": 4.83, "learning_rate": 0.00022977368717761652, "loss": 1.1974, "step": 162000 }, { "epoch": 4.92, "learning_rate": 0.00022841707641336228, "loss": 1.189, "step": 165000 }, { "epoch": 5.01, "learning_rate": 0.0002270604656491081, "loss": 1.1851, "step": 168000 }, { "epoch": 5.1, "learning_rate": 0.00022570385488485386, "loss": 1.1479, "step": 171000 }, { "epoch": 5.19, "learning_rate": 0.00022434724412059967, "loss": 1.1374, "step": 174000 }, { "epoch": 5.28, "learning_rate": 0.00022299063335634543, "loss": 1.1343, "step": 177000 }, { "epoch": 5.37, "learning_rate": 0.00022163402259209124, "loss": 1.1306, "step": 180000 }, { "epoch": 5.46, "learning_rate": 0.00022027741182783706, "loss": 1.1399, "step": 183000 }, { "epoch": 5.55, "learning_rate": 0.00021892080106358282, "loss": 1.1457, "step": 186000 }, { "epoch": 5.64, "learning_rate": 0.00021756419029932863, "loss": 1.1469, "step": 189000 }, { "epoch": 5.73, "learning_rate": 0.0002162075795350744, "loss": 1.1448, "step": 192000 }, { "epoch": 5.82, "learning_rate": 0.0002148509687708202, "loss": 1.1397, "step": 195000 }, { "epoch": 5.91, "learning_rate": 0.00021349435800656596, "loss": 1.1441, "step": 198000 }, { "epoch": 6.0, "learning_rate": 0.00021213774724231177, "loss": 1.1453, "step": 201000 }, { "epoch": 6.09, "learning_rate": 0.00021078113647805753, "loss": 1.0897, "step": 204000 }, { "epoch": 6.18, "learning_rate": 0.00020942452571380335, "loss": 1.0956, "step": 207000 }, { "epoch": 6.27, "learning_rate": 0.0002080679149495491, "loss": 1.0947, "step": 210000 }, { "epoch": 6.36, "learning_rate": 0.00020671130418529492, "loss": 1.0961, "step": 213000 }, { "epoch": 6.45, "learning_rate": 0.00020535469342104073, "loss": 1.1117, "step": 216000 }, { "epoch": 6.54, "learning_rate": 0.0002039980826567865, "loss": 1.1032, "step": 219000 }, { "epoch": 6.63, "learning_rate": 0.0002026414718925323, "loss": 1.0983, "step": 222000 }, { "epoch": 6.72, "learning_rate": 0.00020128486112827807, "loss": 1.0885, "step": 225000 }, { "epoch": 6.8, "learning_rate": 0.00019992825036402388, "loss": 1.0867, "step": 228000 }, { "epoch": 6.89, "learning_rate": 0.00019857163959976964, "loss": 1.0993, "step": 231000 }, { "epoch": 6.98, "learning_rate": 0.00019721502883551545, "loss": 1.1021, "step": 234000 }, { "epoch": 7.07, "learning_rate": 0.00019585841807126124, "loss": 1.0519, "step": 237000 }, { "epoch": 7.16, "learning_rate": 0.00019450180730700702, "loss": 1.0594, "step": 240000 }, { "epoch": 7.25, "learning_rate": 0.00019314519654275284, "loss": 1.0555, "step": 243000 }, { "epoch": 7.34, "learning_rate": 0.0001917885857784986, "loss": 1.057, "step": 246000 }, { "epoch": 7.43, "learning_rate": 0.0001904319750142444, "loss": 1.0585, "step": 249000 }, { "epoch": 7.52, "learning_rate": 0.00018907536424999017, "loss": 1.0534, "step": 252000 }, { "epoch": 7.61, "learning_rate": 0.00018771875348573598, "loss": 1.0655, "step": 255000 }, { "epoch": 7.7, "learning_rate": 0.00018636214272148174, "loss": 1.056, "step": 258000 }, { "epoch": 7.79, "learning_rate": 0.00018500553195722756, "loss": 1.0638, "step": 261000 }, { "epoch": 7.88, "learning_rate": 0.00018364892119297334, "loss": 1.0521, "step": 264000 }, { "epoch": 7.97, "learning_rate": 0.00018229231042871913, "loss": 1.0633, "step": 267000 }, { "epoch": 8.06, "learning_rate": 0.00018093569966446492, "loss": 1.0345, "step": 270000 }, { "epoch": 8.15, "learning_rate": 0.0001795790889002107, "loss": 1.0186, "step": 273000 }, { "epoch": 8.24, "learning_rate": 0.00017822247813595652, "loss": 1.0141, "step": 276000 }, { "epoch": 8.33, "learning_rate": 0.00017686586737170228, "loss": 1.0184, "step": 279000 }, { "epoch": 8.42, "learning_rate": 0.0001755092566074481, "loss": 1.0184, "step": 282000 }, { "epoch": 8.51, "learning_rate": 0.00017415264584319385, "loss": 1.0222, "step": 285000 }, { "epoch": 8.6, "learning_rate": 0.00017279603507893966, "loss": 1.0176, "step": 288000 }, { "epoch": 8.68, "learning_rate": 0.00017143942431468545, "loss": 1.0236, "step": 291000 }, { "epoch": 8.77, "learning_rate": 0.00017008281355043123, "loss": 1.0278, "step": 294000 }, { "epoch": 8.86, "learning_rate": 0.00016872620278617702, "loss": 1.0076, "step": 297000 }, { "epoch": 8.95, "learning_rate": 0.0001673695920219228, "loss": 1.0248, "step": 300000 }, { "epoch": 9.04, "learning_rate": 0.00016601298125766862, "loss": 0.9915, "step": 303000 }, { "epoch": 9.13, "learning_rate": 0.00016465637049341438, "loss": 0.9759, "step": 306000 }, { "epoch": 9.22, "learning_rate": 0.0001632997597291602, "loss": 0.9813, "step": 309000 }, { "epoch": 9.31, "learning_rate": 0.00016194314896490595, "loss": 0.9853, "step": 312000 }, { "epoch": 9.4, "learning_rate": 0.00016058653820065177, "loss": 0.9808, "step": 315000 }, { "epoch": 9.49, "learning_rate": 0.00015922992743639755, "loss": 0.9759, "step": 318000 }, { "epoch": 9.58, "learning_rate": 0.00015787331667214334, "loss": 0.9852, "step": 321000 }, { "epoch": 9.67, "learning_rate": 0.00015651670590788913, "loss": 0.9796, "step": 324000 }, { "epoch": 9.76, "learning_rate": 0.0001551600951436349, "loss": 0.9871, "step": 327000 }, { "epoch": 9.85, "learning_rate": 0.0001538034843793807, "loss": 0.9953, "step": 330000 }, { "epoch": 9.94, "learning_rate": 0.00015244687361512649, "loss": 0.9883, "step": 333000 }, { "epoch": 10.03, "learning_rate": 0.0001510902628508723, "loss": 0.9735, "step": 336000 }, { "epoch": 10.12, "learning_rate": 0.00014973365208661809, "loss": 0.9509, "step": 339000 }, { "epoch": 10.21, "learning_rate": 0.00014837704132236387, "loss": 0.9448, "step": 342000 }, { "epoch": 10.3, "learning_rate": 0.00014702043055810966, "loss": 0.9395, "step": 345000 }, { "epoch": 10.39, "learning_rate": 0.00014566381979385544, "loss": 0.9438, "step": 348000 }, { "epoch": 10.48, "learning_rate": 0.00014430720902960123, "loss": 0.9498, "step": 351000 }, { "epoch": 10.57, "learning_rate": 0.00014295059826534702, "loss": 0.9481, "step": 354000 }, { "epoch": 10.65, "learning_rate": 0.0001415939875010928, "loss": 0.9509, "step": 357000 }, { "epoch": 10.74, "learning_rate": 0.0001402373767368386, "loss": 0.9527, "step": 360000 }, { "epoch": 10.83, "learning_rate": 0.0001388807659725844, "loss": 0.944, "step": 363000 }, { "epoch": 10.92, "learning_rate": 0.0001375241552083302, "loss": 0.9427, "step": 366000 }, { "epoch": 11.01, "learning_rate": 0.00013616754444407598, "loss": 0.9511, "step": 369000 }, { "epoch": 11.1, "learning_rate": 0.00013481093367982176, "loss": 0.901, "step": 372000 }, { "epoch": 11.19, "learning_rate": 0.00013345432291556755, "loss": 0.9175, "step": 375000 }, { "epoch": 11.28, "learning_rate": 0.00013209771215131334, "loss": 0.9061, "step": 378000 }, { "epoch": 11.37, "learning_rate": 0.00013074110138705912, "loss": 0.9175, "step": 381000 }, { "epoch": 11.46, "learning_rate": 0.0001293844906228049, "loss": 0.9175, "step": 384000 }, { "epoch": 11.55, "learning_rate": 0.0001280278798585507, "loss": 0.9149, "step": 387000 }, { "epoch": 11.64, "learning_rate": 0.0001266712690942965, "loss": 0.9155, "step": 390000 }, { "epoch": 11.73, "learning_rate": 0.0001253146583300423, "loss": 0.9129, "step": 393000 }, { "epoch": 11.82, "learning_rate": 0.00012395804756578808, "loss": 0.9178, "step": 396000 }, { "epoch": 11.91, "learning_rate": 0.00012260143680153387, "loss": 0.912, "step": 399000 }, { "epoch": 12.0, "learning_rate": 0.00012124482603727964, "loss": 0.9217, "step": 402000 }, { "epoch": 12.09, "learning_rate": 0.00011988821527302545, "loss": 0.8778, "step": 405000 }, { "epoch": 12.18, "learning_rate": 0.00011853160450877124, "loss": 0.8741, "step": 408000 }, { "epoch": 12.27, "learning_rate": 0.00011717499374451703, "loss": 0.8786, "step": 411000 }, { "epoch": 12.36, "learning_rate": 0.00011581838298026281, "loss": 0.8837, "step": 414000 }, { "epoch": 12.45, "learning_rate": 0.0001144617722160086, "loss": 0.883, "step": 417000 }, { "epoch": 12.53, "learning_rate": 0.00011310516145175439, "loss": 0.8764, "step": 420000 }, { "epoch": 12.62, "learning_rate": 0.00011174855068750017, "loss": 0.8881, "step": 423000 }, { "epoch": 12.71, "learning_rate": 0.00011039193992324596, "loss": 0.8844, "step": 426000 }, { "epoch": 12.8, "learning_rate": 0.00010903532915899175, "loss": 0.8838, "step": 429000 }, { "epoch": 12.89, "learning_rate": 0.00010767871839473755, "loss": 0.8799, "step": 432000 }, { "epoch": 12.98, "learning_rate": 0.00010632210763048335, "loss": 0.8766, "step": 435000 }, { "epoch": 13.07, "learning_rate": 0.00010496549686622913, "loss": 0.8562, "step": 438000 }, { "epoch": 13.16, "learning_rate": 0.00010360888610197492, "loss": 0.8445, "step": 441000 }, { "epoch": 13.25, "learning_rate": 0.0001022522753377207, "loss": 0.8422, "step": 444000 }, { "epoch": 13.34, "learning_rate": 0.00010089566457346649, "loss": 0.8405, "step": 447000 }, { "epoch": 13.43, "learning_rate": 9.953905380921228e-05, "loss": 0.8456, "step": 450000 }, { "epoch": 13.52, "learning_rate": 9.818244304495806e-05, "loss": 0.8516, "step": 453000 }, { "epoch": 13.61, "learning_rate": 9.682583228070386e-05, "loss": 0.8514, "step": 456000 }, { "epoch": 13.7, "learning_rate": 9.546922151644965e-05, "loss": 0.8465, "step": 459000 }, { "epoch": 13.79, "learning_rate": 9.411261075219544e-05, "loss": 0.8499, "step": 462000 }, { "epoch": 13.88, "learning_rate": 9.275599998794124e-05, "loss": 0.8582, "step": 465000 }, { "epoch": 13.97, "learning_rate": 9.139938922368702e-05, "loss": 0.8544, "step": 468000 }, { "epoch": 14.06, "learning_rate": 9.004277845943281e-05, "loss": 0.8226, "step": 471000 }, { "epoch": 14.15, "learning_rate": 8.86861676951786e-05, "loss": 0.8132, "step": 474000 }, { "epoch": 14.24, "learning_rate": 8.732955693092438e-05, "loss": 0.8196, "step": 477000 }, { "epoch": 14.33, "learning_rate": 8.597294616667018e-05, "loss": 0.8221, "step": 480000 }, { "epoch": 14.42, "learning_rate": 8.461633540241597e-05, "loss": 0.8155, "step": 483000 }, { "epoch": 14.5, "learning_rate": 8.325972463816176e-05, "loss": 0.8219, "step": 486000 }, { "epoch": 14.59, "learning_rate": 8.190311387390754e-05, "loss": 0.8171, "step": 489000 }, { "epoch": 14.68, "learning_rate": 8.054650310965333e-05, "loss": 0.8116, "step": 492000 }, { "epoch": 14.77, "learning_rate": 7.918989234539913e-05, "loss": 0.8213, "step": 495000 }, { "epoch": 14.86, "learning_rate": 7.783328158114491e-05, "loss": 0.8154, "step": 498000 }, { "epoch": 14.95, "learning_rate": 7.64766708168907e-05, "loss": 0.824, "step": 501000 }, { "epoch": 15.04, "learning_rate": 7.512006005263649e-05, "loss": 0.8068, "step": 504000 }, { "epoch": 15.13, "learning_rate": 7.376344928838229e-05, "loss": 0.7813, "step": 507000 }, { "epoch": 15.22, "learning_rate": 7.240683852412807e-05, "loss": 0.7947, "step": 510000 }, { "epoch": 15.31, "learning_rate": 7.105022775987386e-05, "loss": 0.7899, "step": 513000 }, { "epoch": 15.4, "learning_rate": 6.969361699561965e-05, "loss": 0.7885, "step": 516000 }, { "epoch": 15.49, "learning_rate": 6.833700623136545e-05, "loss": 0.7963, "step": 519000 }, { "epoch": 15.58, "learning_rate": 6.698039546711123e-05, "loss": 0.787, "step": 522000 }, { "epoch": 15.67, "learning_rate": 6.562378470285702e-05, "loss": 0.7877, "step": 525000 }, { "epoch": 15.76, "learning_rate": 6.42671739386028e-05, "loss": 0.7949, "step": 528000 }, { "epoch": 15.85, "learning_rate": 6.291056317434859e-05, "loss": 0.7835, "step": 531000 }, { "epoch": 15.94, "learning_rate": 6.155395241009439e-05, "loss": 0.7904, "step": 534000 }, { "epoch": 16.03, "learning_rate": 6.019734164584017e-05, "loss": 0.7797, "step": 537000 }, { "epoch": 16.12, "learning_rate": 5.8840730881585965e-05, "loss": 0.7606, "step": 540000 }, { "epoch": 16.21, "learning_rate": 5.748412011733175e-05, "loss": 0.7671, "step": 543000 }, { "epoch": 16.3, "learning_rate": 5.6127509353077545e-05, "loss": 0.764, "step": 546000 }, { "epoch": 16.38, "learning_rate": 5.477089858882333e-05, "loss": 0.758, "step": 549000 }, { "epoch": 16.47, "learning_rate": 5.3414287824569124e-05, "loss": 0.7518, "step": 552000 }, { "epoch": 16.56, "learning_rate": 5.205767706031491e-05, "loss": 0.7644, "step": 555000 }, { "epoch": 16.65, "learning_rate": 5.07010662960607e-05, "loss": 0.7577, "step": 558000 }, { "epoch": 16.74, "learning_rate": 4.934445553180649e-05, "loss": 0.762, "step": 561000 }, { "epoch": 16.83, "learning_rate": 4.7987844767552283e-05, "loss": 0.7548, "step": 564000 }, { "epoch": 16.92, "learning_rate": 4.663123400329807e-05, "loss": 0.7567, "step": 567000 }, { "epoch": 17.01, "learning_rate": 4.5274623239043856e-05, "loss": 0.7613, "step": 570000 }, { "epoch": 17.1, "learning_rate": 4.391801247478964e-05, "loss": 0.7346, "step": 573000 }, { "epoch": 17.19, "learning_rate": 4.256140171053544e-05, "loss": 0.7323, "step": 576000 }, { "epoch": 17.28, "learning_rate": 4.120479094628123e-05, "loss": 0.7322, "step": 579000 }, { "epoch": 17.37, "learning_rate": 3.9848180182027016e-05, "loss": 0.7456, "step": 582000 }, { "epoch": 17.46, "learning_rate": 3.84915694177728e-05, "loss": 0.7324, "step": 585000 }, { "epoch": 17.55, "learning_rate": 3.7134958653518595e-05, "loss": 0.7414, "step": 588000 }, { "epoch": 17.64, "learning_rate": 3.577834788926438e-05, "loss": 0.7334, "step": 591000 }, { "epoch": 17.73, "learning_rate": 3.442173712501017e-05, "loss": 0.731, "step": 594000 }, { "epoch": 17.82, "learning_rate": 3.306512636075596e-05, "loss": 0.7488, "step": 597000 }, { "epoch": 17.91, "learning_rate": 3.170851559650175e-05, "loss": 0.7287, "step": 600000 }, { "epoch": 18.0, "learning_rate": 3.035190483224754e-05, "loss": 0.7361, "step": 603000 }, { "epoch": 18.09, "learning_rate": 2.8995294067993327e-05, "loss": 0.7173, "step": 606000 }, { "epoch": 18.18, "learning_rate": 2.763868330373912e-05, "loss": 0.7173, "step": 609000 }, { "epoch": 18.27, "learning_rate": 2.6282072539484907e-05, "loss": 0.7138, "step": 612000 }, { "epoch": 18.35, "learning_rate": 2.4925461775230697e-05, "loss": 0.71, "step": 615000 }, { "epoch": 18.44, "learning_rate": 2.3568851010976486e-05, "loss": 0.7128, "step": 618000 }, { "epoch": 18.53, "learning_rate": 2.2212240246722276e-05, "loss": 0.7168, "step": 621000 }, { "epoch": 18.62, "learning_rate": 2.0855629482468066e-05, "loss": 0.7184, "step": 624000 }, { "epoch": 18.71, "learning_rate": 1.9499018718213856e-05, "loss": 0.7099, "step": 627000 }, { "epoch": 18.8, "learning_rate": 1.8142407953959646e-05, "loss": 0.7047, "step": 630000 }, { "epoch": 18.89, "learning_rate": 1.6785797189705435e-05, "loss": 0.7131, "step": 633000 }, { "epoch": 18.98, "learning_rate": 1.5429186425451222e-05, "loss": 0.7151, "step": 636000 }, { "epoch": 19.07, "learning_rate": 1.4072575661197012e-05, "loss": 0.7058, "step": 639000 }, { "epoch": 19.16, "learning_rate": 1.2715964896942801e-05, "loss": 0.6982, "step": 642000 }, { "epoch": 19.25, "learning_rate": 1.1359354132688591e-05, "loss": 0.6983, "step": 645000 }, { "epoch": 19.34, "learning_rate": 1.0002743368434381e-05, "loss": 0.6932, "step": 648000 }, { "epoch": 19.43, "learning_rate": 8.64613260418017e-06, "loss": 0.7025, "step": 651000 }, { "epoch": 19.52, "learning_rate": 7.289521839925958e-06, "loss": 0.6945, "step": 654000 }, { "epoch": 19.61, "learning_rate": 5.932911075671748e-06, "loss": 0.7039, "step": 657000 }, { "epoch": 19.7, "learning_rate": 4.576300311417537e-06, "loss": 0.6921, "step": 660000 }, { "epoch": 19.79, "learning_rate": 3.2196895471633263e-06, "loss": 0.6914, "step": 663000 }, { "epoch": 19.88, "learning_rate": 1.863078782909116e-06, "loss": 0.6969, "step": 666000 }, { "epoch": 19.97, "learning_rate": 5.064680186549053e-07, "loss": 0.6948, "step": 669000 }, { "epoch": 20.0, "step": 670120, "total_flos": 1.67362005330918e+21, "train_loss": 1.0658713307571661, "train_runtime": 426567.9926, "train_samples_per_second": 25.136, "train_steps_per_second": 1.571 }, { "epoch": 20.06, "learning_rate": 5.053313149301143e-07, "loss": 0.6923, "step": 672000 }, { "epoch": 20.15, "learning_rate": 5.035174260075754e-07, "loss": 0.6958, "step": 675000 }, { "epoch": 20.24, "learning_rate": 5.017035370850364e-07, "loss": 0.6885, "step": 678000 }, { "epoch": 20.32, "learning_rate": 4.998896481624976e-07, "loss": 0.6976, "step": 681000 }, { "epoch": 20.41, "learning_rate": 4.980757592399586e-07, "loss": 0.6997, "step": 684000 }, { "epoch": 20.5, "learning_rate": 4.962618703174198e-07, "loss": 0.6856, "step": 687000 }, { "epoch": 20.59, "learning_rate": 4.944479813948808e-07, "loss": 0.6866, "step": 690000 }, { "epoch": 20.68, "learning_rate": 4.926340924723419e-07, "loss": 0.6981, "step": 693000 }, { "epoch": 20.77, "learning_rate": 4.90820203549803e-07, "loss": 0.695, "step": 696000 }, { "epoch": 20.86, "learning_rate": 4.890063146272641e-07, "loss": 0.6946, "step": 699000 }, { "epoch": 20.95, "learning_rate": 4.871924257047252e-07, "loss": 0.6912, "step": 702000 }, { "epoch": 21.04, "learning_rate": 4.853785367821862e-07, "loss": 0.6885, "step": 705000 }, { "epoch": 21.13, "learning_rate": 4.835646478596474e-07, "loss": 0.6896, "step": 708000 }, { "epoch": 21.22, "learning_rate": 4.817507589371084e-07, "loss": 0.694, "step": 711000 }, { "epoch": 21.31, "learning_rate": 4.799368700145696e-07, "loss": 0.6885, "step": 714000 }, { "epoch": 21.4, "learning_rate": 4.781229810920306e-07, "loss": 0.6924, "step": 717000 }, { "epoch": 21.49, "learning_rate": 4.763090921694917e-07, "loss": 0.6928, "step": 720000 }, { "epoch": 21.58, "learning_rate": 4.744952032469528e-07, "loss": 0.6943, "step": 723000 }, { "epoch": 21.67, "learning_rate": 4.726813143244139e-07, "loss": 0.6893, "step": 726000 }, { "epoch": 21.76, "learning_rate": 4.70867425401875e-07, "loss": 0.6895, "step": 729000 }, { "epoch": 21.85, "learning_rate": 4.690535364793361e-07, "loss": 0.6949, "step": 732000 }, { "epoch": 21.94, "learning_rate": 4.6723964755679714e-07, "loss": 0.6919, "step": 735000 }, { "epoch": 22.03, "learning_rate": 4.6542575863425824e-07, "loss": 0.6885, "step": 738000 }, { "epoch": 22.12, "learning_rate": 4.636118697117194e-07, "loss": 0.6922, "step": 741000 }, { "epoch": 22.2, "learning_rate": 4.617979807891805e-07, "loss": 0.6863, "step": 744000 }, { "epoch": 22.29, "learning_rate": 4.599840918666416e-07, "loss": 0.6899, "step": 747000 }, { "epoch": 22.38, "learning_rate": 4.581702029441027e-07, "loss": 0.6844, "step": 750000 }, { "epoch": 22.47, "learning_rate": 4.563563140215638e-07, "loss": 0.6956, "step": 753000 }, { "epoch": 22.56, "learning_rate": 4.545424250990249e-07, "loss": 0.6916, "step": 756000 }, { "epoch": 22.65, "learning_rate": 4.527285361764859e-07, "loss": 0.6828, "step": 759000 }, { "epoch": 22.74, "learning_rate": 4.50914647253947e-07, "loss": 0.6865, "step": 762000 }, { "epoch": 22.83, "learning_rate": 4.491007583314081e-07, "loss": 0.6916, "step": 765000 }, { "epoch": 22.92, "learning_rate": 4.472868694088692e-07, "loss": 0.6876, "step": 768000 }, { "epoch": 23.01, "learning_rate": 4.454729804863303e-07, "loss": 0.6936, "step": 771000 }, { "epoch": 23.1, "learning_rate": 4.436590915637914e-07, "loss": 0.6908, "step": 774000 }, { "epoch": 23.19, "learning_rate": 4.418452026412525e-07, "loss": 0.6864, "step": 777000 }, { "epoch": 23.28, "learning_rate": 4.400313137187136e-07, "loss": 0.6823, "step": 780000 }, { "epoch": 23.37, "learning_rate": 4.3821742479617464e-07, "loss": 0.6914, "step": 783000 }, { "epoch": 23.46, "learning_rate": 4.3640353587363574e-07, "loss": 0.6915, "step": 786000 }, { "epoch": 23.55, "learning_rate": 4.3458964695109684e-07, "loss": 0.6873, "step": 789000 }, { "epoch": 23.64, "learning_rate": 4.3277575802855793e-07, "loss": 0.69, "step": 792000 }, { "epoch": 23.73, "learning_rate": 4.3096186910601903e-07, "loss": 0.6872, "step": 795000 }, { "epoch": 23.82, "learning_rate": 4.2914798018348013e-07, "loss": 0.6925, "step": 798000 }, { "epoch": 23.91, "learning_rate": 4.273340912609412e-07, "loss": 0.6877, "step": 801000 }, { "epoch": 24.0, "learning_rate": 4.2552020233840227e-07, "loss": 0.6908, "step": 804000 } ], "logging_steps": 3000, "max_steps": 837650, "num_train_epochs": 25, "save_steps": 500, "total_flos": 2.0083483479397598e+21, "trial_name": null, "trial_params": null }