{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "eval_steps": 500, "global_step": 1181220, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 7.61872513332769e-05, "loss": 11.8677, "step": 3000 }, { "epoch": 0.15, "learning_rate": 0.0001523745026665538, "loss": 1.9864, "step": 6000 }, { "epoch": 0.23, "learning_rate": 0.00022856175399983065, "loss": 1.5124, "step": 9000 }, { "epoch": 0.3, "learning_rate": 0.0002999520269675143, "loss": 1.4304, "step": 12000 }, { "epoch": 0.38, "learning_rate": 0.00029918240612549776, "loss": 1.3803, "step": 15000 }, { "epoch": 0.46, "learning_rate": 0.00029841278528348127, "loss": 1.3166, "step": 18000 }, { "epoch": 0.53, "learning_rate": 0.0002976431644414648, "loss": 1.2798, "step": 21000 }, { "epoch": 0.61, "learning_rate": 0.00029687354359944824, "loss": 1.2517, "step": 24000 }, { "epoch": 0.69, "learning_rate": 0.0002961039227574317, "loss": 1.2085, "step": 27000 }, { "epoch": 0.76, "learning_rate": 0.0002953343019154152, "loss": 1.197, "step": 30000 }, { "epoch": 0.84, "learning_rate": 0.0002945646810733987, "loss": 1.1841, "step": 33000 }, { "epoch": 0.91, "learning_rate": 0.0002937950602313822, "loss": 1.1658, "step": 36000 }, { "epoch": 0.99, "learning_rate": 0.0002930254393893657, "loss": 1.1471, "step": 39000 }, { "epoch": 1.07, "learning_rate": 0.0002922558185473492, "loss": 1.0998, "step": 42000 }, { "epoch": 1.14, "learning_rate": 0.0002914861977053327, "loss": 1.0865, "step": 45000 }, { "epoch": 1.22, "learning_rate": 0.0002907165768633162, "loss": 1.0697, "step": 48000 }, { "epoch": 1.3, "learning_rate": 0.00028994695602129963, "loss": 1.0537, "step": 51000 }, { "epoch": 1.37, "learning_rate": 0.00028917733517928315, "loss": 1.0509, "step": 54000 }, { "epoch": 1.45, "learning_rate": 0.00028840771433726666, "loss": 1.039, "step": 57000 }, { "epoch": 1.52, "learning_rate": 0.0002876380934952501, "loss": 1.0224, "step": 60000 }, { "epoch": 1.6, "learning_rate": 0.00028686847265323363, "loss": 1.0209, "step": 63000 }, { "epoch": 1.68, "learning_rate": 0.00028609885181121714, "loss": 1.0103, "step": 66000 }, { "epoch": 1.75, "learning_rate": 0.0002853292309692006, "loss": 1.0058, "step": 69000 }, { "epoch": 1.83, "learning_rate": 0.00028455961012718406, "loss": 0.9954, "step": 72000 }, { "epoch": 1.9, "learning_rate": 0.00028378998928516757, "loss": 0.9815, "step": 75000 }, { "epoch": 1.98, "learning_rate": 0.0002830203684431511, "loss": 0.9832, "step": 78000 }, { "epoch": 2.06, "learning_rate": 0.00028225074760113454, "loss": 0.9556, "step": 81000 }, { "epoch": 2.13, "learning_rate": 0.00028148112675911805, "loss": 0.9396, "step": 84000 }, { "epoch": 2.21, "learning_rate": 0.00028071150591710156, "loss": 0.9337, "step": 87000 }, { "epoch": 2.29, "learning_rate": 0.000279941885075085, "loss": 0.9211, "step": 90000 }, { "epoch": 2.36, "learning_rate": 0.00027917226423306853, "loss": 0.9296, "step": 93000 }, { "epoch": 2.44, "learning_rate": 0.00027840264339105205, "loss": 0.9216, "step": 96000 }, { "epoch": 2.51, "learning_rate": 0.0002776330225490355, "loss": 0.9132, "step": 99000 }, { "epoch": 2.59, "learning_rate": 0.000276863401707019, "loss": 0.9041, "step": 102000 }, { "epoch": 2.67, "learning_rate": 0.0002760937808650025, "loss": 0.9056, "step": 105000 }, { "epoch": 2.74, "learning_rate": 0.000275324160022986, "loss": 0.9025, "step": 108000 }, { "epoch": 2.82, "learning_rate": 0.0002745545391809695, "loss": 0.9021, "step": 111000 }, { "epoch": 2.9, "learning_rate": 0.00027378491833895296, "loss": 0.8958, "step": 114000 }, { "epoch": 2.97, "learning_rate": 0.00027301529749693647, "loss": 0.888, "step": 117000 }, { "epoch": 3.05, "learning_rate": 0.00027224567665492, "loss": 0.8677, "step": 120000 }, { "epoch": 3.12, "learning_rate": 0.00027147605581290344, "loss": 0.8561, "step": 123000 }, { "epoch": 3.2, "learning_rate": 0.0002707064349708869, "loss": 0.8507, "step": 126000 }, { "epoch": 3.28, "learning_rate": 0.0002699368141288704, "loss": 0.8409, "step": 129000 }, { "epoch": 3.35, "learning_rate": 0.0002691671932868539, "loss": 0.8394, "step": 132000 }, { "epoch": 3.43, "learning_rate": 0.0002683975724448374, "loss": 0.8426, "step": 135000 }, { "epoch": 3.5, "learning_rate": 0.0002676279516028209, "loss": 0.8401, "step": 138000 }, { "epoch": 3.58, "learning_rate": 0.0002668583307608044, "loss": 0.8449, "step": 141000 }, { "epoch": 3.66, "learning_rate": 0.00026608870991878786, "loss": 0.838, "step": 144000 }, { "epoch": 3.73, "learning_rate": 0.0002653190890767714, "loss": 0.834, "step": 147000 }, { "epoch": 3.81, "learning_rate": 0.00026454946823475483, "loss": 0.8239, "step": 150000 }, { "epoch": 3.89, "learning_rate": 0.00026377984739273835, "loss": 0.8228, "step": 153000 }, { "epoch": 3.96, "learning_rate": 0.00026301022655072186, "loss": 0.8277, "step": 156000 }, { "epoch": 4.04, "learning_rate": 0.0002622406057087053, "loss": 0.8003, "step": 159000 }, { "epoch": 4.11, "learning_rate": 0.00026147098486668883, "loss": 0.7925, "step": 162000 }, { "epoch": 4.19, "learning_rate": 0.00026070136402467234, "loss": 0.7911, "step": 165000 }, { "epoch": 4.27, "learning_rate": 0.0002599317431826558, "loss": 0.7944, "step": 168000 }, { "epoch": 4.34, "learning_rate": 0.00025916212234063926, "loss": 0.795, "step": 171000 }, { "epoch": 4.42, "learning_rate": 0.00025839250149862277, "loss": 0.7868, "step": 174000 }, { "epoch": 4.5, "learning_rate": 0.0002576228806566063, "loss": 0.7821, "step": 177000 }, { "epoch": 4.57, "learning_rate": 0.00025685325981458974, "loss": 0.7859, "step": 180000 }, { "epoch": 4.65, "learning_rate": 0.00025608363897257325, "loss": 0.7765, "step": 183000 }, { "epoch": 4.72, "learning_rate": 0.00025531401813055676, "loss": 0.775, "step": 186000 }, { "epoch": 4.8, "learning_rate": 0.0002545443972885402, "loss": 0.7768, "step": 189000 }, { "epoch": 4.88, "learning_rate": 0.00025377477644652373, "loss": 0.7757, "step": 192000 }, { "epoch": 4.95, "learning_rate": 0.0002530051556045072, "loss": 0.7701, "step": 195000 }, { "epoch": 5.03, "learning_rate": 0.0002522355347624907, "loss": 0.7557, "step": 198000 }, { "epoch": 5.1, "learning_rate": 0.0002514659139204742, "loss": 0.7312, "step": 201000 }, { "epoch": 5.18, "learning_rate": 0.0002506962930784577, "loss": 0.7372, "step": 204000 }, { "epoch": 5.26, "learning_rate": 0.0002499266722364412, "loss": 0.7383, "step": 207000 }, { "epoch": 5.33, "learning_rate": 0.0002491570513944247, "loss": 0.7398, "step": 210000 }, { "epoch": 5.41, "learning_rate": 0.00024838743055240816, "loss": 0.7229, "step": 213000 }, { "epoch": 5.49, "learning_rate": 0.00024761780971039167, "loss": 0.733, "step": 216000 }, { "epoch": 5.56, "learning_rate": 0.00024684818886837513, "loss": 0.7318, "step": 219000 }, { "epoch": 5.64, "learning_rate": 0.00024607856802635864, "loss": 0.7243, "step": 222000 }, { "epoch": 5.71, "learning_rate": 0.0002453089471843421, "loss": 0.73, "step": 225000 }, { "epoch": 5.79, "learning_rate": 0.0002445393263423256, "loss": 0.7289, "step": 228000 }, { "epoch": 5.87, "learning_rate": 0.00024376970550030912, "loss": 0.7331, "step": 231000 }, { "epoch": 5.94, "learning_rate": 0.0002430000846582926, "loss": 0.7203, "step": 234000 }, { "epoch": 6.02, "learning_rate": 0.0002422304638162761, "loss": 0.7235, "step": 237000 }, { "epoch": 6.1, "learning_rate": 0.0002414608429742596, "loss": 0.6931, "step": 240000 }, { "epoch": 6.17, "learning_rate": 0.0002406912221322431, "loss": 0.6933, "step": 243000 }, { "epoch": 6.25, "learning_rate": 0.00023992160129022655, "loss": 0.697, "step": 246000 }, { "epoch": 6.32, "learning_rate": 0.00023915198044821003, "loss": 0.6926, "step": 249000 }, { "epoch": 6.4, "learning_rate": 0.00023838235960619355, "loss": 0.6953, "step": 252000 }, { "epoch": 6.48, "learning_rate": 0.00023761273876417703, "loss": 0.6934, "step": 255000 }, { "epoch": 6.55, "learning_rate": 0.00023684311792216054, "loss": 0.6926, "step": 258000 }, { "epoch": 6.63, "learning_rate": 0.00023607349708014403, "loss": 0.6969, "step": 261000 }, { "epoch": 6.7, "learning_rate": 0.00023530387623812751, "loss": 0.6866, "step": 264000 }, { "epoch": 6.78, "learning_rate": 0.00023453425539611103, "loss": 0.6929, "step": 267000 }, { "epoch": 6.86, "learning_rate": 0.00023376463455409448, "loss": 0.6914, "step": 270000 }, { "epoch": 6.93, "learning_rate": 0.00023299501371207797, "loss": 0.6866, "step": 273000 }, { "epoch": 7.01, "learning_rate": 0.00023222539287006145, "loss": 0.6806, "step": 276000 }, { "epoch": 7.09, "learning_rate": 0.00023145577202804497, "loss": 0.6561, "step": 279000 }, { "epoch": 7.16, "learning_rate": 0.00023068615118602845, "loss": 0.6596, "step": 282000 }, { "epoch": 7.24, "learning_rate": 0.00022991653034401196, "loss": 0.6621, "step": 285000 }, { "epoch": 7.31, "learning_rate": 0.00022914690950199545, "loss": 0.6644, "step": 288000 }, { "epoch": 7.39, "learning_rate": 0.00022837728865997893, "loss": 0.6642, "step": 291000 }, { "epoch": 7.47, "learning_rate": 0.0002276076678179624, "loss": 0.6621, "step": 294000 }, { "epoch": 7.54, "learning_rate": 0.0002268380469759459, "loss": 0.6596, "step": 297000 }, { "epoch": 7.62, "learning_rate": 0.0002260684261339294, "loss": 0.6616, "step": 300000 }, { "epoch": 7.7, "learning_rate": 0.00022529880529191288, "loss": 0.666, "step": 303000 }, { "epoch": 7.77, "learning_rate": 0.0002245291844498964, "loss": 0.6645, "step": 306000 }, { "epoch": 7.85, "learning_rate": 0.00022375956360787987, "loss": 0.6616, "step": 309000 }, { "epoch": 7.92, "learning_rate": 0.00022298994276586339, "loss": 0.6593, "step": 312000 }, { "epoch": 8.0, "learning_rate": 0.00022222032192384687, "loss": 0.6622, "step": 315000 }, { "epoch": 8.08, "learning_rate": 0.00022145070108183033, "loss": 0.6294, "step": 318000 }, { "epoch": 8.15, "learning_rate": 0.0002206810802398138, "loss": 0.6308, "step": 321000 }, { "epoch": 8.23, "learning_rate": 0.00021991145939779733, "loss": 0.6335, "step": 324000 }, { "epoch": 8.3, "learning_rate": 0.0002191418385557808, "loss": 0.629, "step": 327000 }, { "epoch": 8.38, "learning_rate": 0.0002183722177137643, "loss": 0.6354, "step": 330000 }, { "epoch": 8.46, "learning_rate": 0.0002176025968717478, "loss": 0.6303, "step": 333000 }, { "epoch": 8.53, "learning_rate": 0.0002168329760297313, "loss": 0.6338, "step": 336000 }, { "epoch": 8.61, "learning_rate": 0.0002160633551877148, "loss": 0.6317, "step": 339000 }, { "epoch": 8.69, "learning_rate": 0.00021529373434569826, "loss": 0.6339, "step": 342000 }, { "epoch": 8.76, "learning_rate": 0.00021452411350368175, "loss": 0.6344, "step": 345000 }, { "epoch": 8.84, "learning_rate": 0.00021375449266166523, "loss": 0.631, "step": 348000 }, { "epoch": 8.91, "learning_rate": 0.00021298487181964875, "loss": 0.6273, "step": 351000 }, { "epoch": 8.99, "learning_rate": 0.00021221525097763223, "loss": 0.6276, "step": 354000 }, { "epoch": 9.07, "learning_rate": 0.00021144563013561572, "loss": 0.6045, "step": 357000 }, { "epoch": 9.14, "learning_rate": 0.00021067600929359923, "loss": 0.6012, "step": 360000 }, { "epoch": 9.22, "learning_rate": 0.00020990638845158271, "loss": 0.599, "step": 363000 }, { "epoch": 9.3, "learning_rate": 0.00020913676760956623, "loss": 0.6073, "step": 366000 }, { "epoch": 9.37, "learning_rate": 0.00020836714676754968, "loss": 0.6053, "step": 369000 }, { "epoch": 9.45, "learning_rate": 0.00020759752592553317, "loss": 0.6032, "step": 372000 }, { "epoch": 9.52, "learning_rate": 0.00020682790508351666, "loss": 0.6035, "step": 375000 }, { "epoch": 9.6, "learning_rate": 0.00020605828424150017, "loss": 0.6058, "step": 378000 }, { "epoch": 9.68, "learning_rate": 0.00020528866339948365, "loss": 0.6045, "step": 381000 }, { "epoch": 9.75, "learning_rate": 0.00020451904255746714, "loss": 0.6034, "step": 384000 }, { "epoch": 9.83, "learning_rate": 0.00020374942171545065, "loss": 0.6016, "step": 387000 }, { "epoch": 9.91, "learning_rate": 0.00020297980087343414, "loss": 0.599, "step": 390000 }, { "epoch": 9.98, "learning_rate": 0.0002022101800314176, "loss": 0.6076, "step": 393000 }, { "epoch": 10.06, "learning_rate": 0.0002014405591894011, "loss": 0.5825, "step": 396000 }, { "epoch": 10.13, "learning_rate": 0.0002006709383473846, "loss": 0.5748, "step": 399000 }, { "epoch": 10.21, "learning_rate": 0.00019990131750536808, "loss": 0.5793, "step": 402000 }, { "epoch": 10.29, "learning_rate": 0.0001991316966633516, "loss": 0.5767, "step": 405000 }, { "epoch": 10.36, "learning_rate": 0.00019836207582133507, "loss": 0.5839, "step": 408000 }, { "epoch": 10.44, "learning_rate": 0.00019759245497931856, "loss": 0.5834, "step": 411000 }, { "epoch": 10.51, "learning_rate": 0.00019682283413730207, "loss": 0.5824, "step": 414000 }, { "epoch": 10.59, "learning_rate": 0.00019605321329528553, "loss": 0.5817, "step": 417000 }, { "epoch": 10.67, "learning_rate": 0.00019528359245326901, "loss": 0.5785, "step": 420000 }, { "epoch": 10.74, "learning_rate": 0.00019451397161125253, "loss": 0.5777, "step": 423000 }, { "epoch": 10.82, "learning_rate": 0.000193744350769236, "loss": 0.5787, "step": 426000 }, { "epoch": 10.9, "learning_rate": 0.0001929747299272195, "loss": 0.5803, "step": 429000 }, { "epoch": 10.97, "learning_rate": 0.000192205109085203, "loss": 0.5772, "step": 432000 }, { "epoch": 11.05, "learning_rate": 0.0001914354882431865, "loss": 0.5645, "step": 435000 }, { "epoch": 11.12, "learning_rate": 0.00019066586740116998, "loss": 0.5497, "step": 438000 }, { "epoch": 11.2, "learning_rate": 0.00018989624655915346, "loss": 0.5556, "step": 441000 }, { "epoch": 11.28, "learning_rate": 0.00018912662571713695, "loss": 0.5562, "step": 444000 }, { "epoch": 11.35, "learning_rate": 0.00018835700487512043, "loss": 0.5523, "step": 447000 }, { "epoch": 11.43, "learning_rate": 0.00018758738403310395, "loss": 0.5595, "step": 450000 }, { "epoch": 11.51, "learning_rate": 0.00018681776319108743, "loss": 0.5638, "step": 453000 }, { "epoch": 11.58, "learning_rate": 0.00018604814234907092, "loss": 0.5593, "step": 456000 }, { "epoch": 11.66, "learning_rate": 0.00018527852150705443, "loss": 0.5588, "step": 459000 }, { "epoch": 11.73, "learning_rate": 0.00018450890066503791, "loss": 0.5549, "step": 462000 }, { "epoch": 11.81, "learning_rate": 0.00018373927982302137, "loss": 0.5659, "step": 465000 }, { "epoch": 11.89, "learning_rate": 0.00018296965898100489, "loss": 0.5567, "step": 468000 }, { "epoch": 11.96, "learning_rate": 0.00018220003813898837, "loss": 0.5594, "step": 471000 }, { "epoch": 12.04, "learning_rate": 0.00018143041729697186, "loss": 0.5468, "step": 474000 }, { "epoch": 12.11, "learning_rate": 0.00018066079645495537, "loss": 0.5331, "step": 477000 }, { "epoch": 12.19, "learning_rate": 0.00017989117561293885, "loss": 0.5387, "step": 480000 }, { "epoch": 12.27, "learning_rate": 0.00017912155477092234, "loss": 0.54, "step": 483000 }, { "epoch": 12.34, "learning_rate": 0.00017835193392890585, "loss": 0.5396, "step": 486000 }, { "epoch": 12.42, "learning_rate": 0.00017758231308688934, "loss": 0.5388, "step": 489000 }, { "epoch": 12.5, "learning_rate": 0.0001768126922448728, "loss": 0.5422, "step": 492000 }, { "epoch": 12.57, "learning_rate": 0.0001760430714028563, "loss": 0.5353, "step": 495000 }, { "epoch": 12.65, "learning_rate": 0.0001752734505608398, "loss": 0.536, "step": 498000 }, { "epoch": 12.72, "learning_rate": 0.00017450382971882328, "loss": 0.5418, "step": 501000 }, { "epoch": 12.8, "learning_rate": 0.0001737342088768068, "loss": 0.5449, "step": 504000 }, { "epoch": 12.88, "learning_rate": 0.00017296458803479027, "loss": 0.5385, "step": 507000 }, { "epoch": 12.95, "learning_rate": 0.00017219496719277376, "loss": 0.5465, "step": 510000 }, { "epoch": 13.03, "learning_rate": 0.00017142534635075727, "loss": 0.5263, "step": 513000 }, { "epoch": 13.11, "learning_rate": 0.00017065572550874073, "loss": 0.5144, "step": 516000 }, { "epoch": 13.18, "learning_rate": 0.00016988610466672421, "loss": 0.5184, "step": 519000 }, { "epoch": 13.26, "learning_rate": 0.00016911648382470773, "loss": 0.5191, "step": 522000 }, { "epoch": 13.33, "learning_rate": 0.0001683468629826912, "loss": 0.5202, "step": 525000 }, { "epoch": 13.41, "learning_rate": 0.0001675772421406747, "loss": 0.5225, "step": 528000 }, { "epoch": 13.49, "learning_rate": 0.0001668076212986582, "loss": 0.5247, "step": 531000 }, { "epoch": 13.56, "learning_rate": 0.0001660380004566417, "loss": 0.521, "step": 534000 }, { "epoch": 13.64, "learning_rate": 0.00016526837961462518, "loss": 0.5251, "step": 537000 }, { "epoch": 13.71, "learning_rate": 0.00016449875877260866, "loss": 0.5247, "step": 540000 }, { "epoch": 13.79, "learning_rate": 0.00016372913793059215, "loss": 0.5219, "step": 543000 }, { "epoch": 13.87, "learning_rate": 0.00016295951708857564, "loss": 0.522, "step": 546000 }, { "epoch": 13.94, "learning_rate": 0.00016218989624655915, "loss": 0.5207, "step": 549000 }, { "epoch": 14.02, "learning_rate": 0.00016142027540454263, "loss": 0.5167, "step": 552000 }, { "epoch": 14.1, "learning_rate": 0.00016065065456252612, "loss": 0.5018, "step": 555000 }, { "epoch": 14.17, "learning_rate": 0.00015988103372050963, "loss": 0.5001, "step": 558000 }, { "epoch": 14.25, "learning_rate": 0.00015911141287849312, "loss": 0.4997, "step": 561000 }, { "epoch": 14.32, "learning_rate": 0.00015834179203647657, "loss": 0.5007, "step": 564000 }, { "epoch": 14.4, "learning_rate": 0.00015757217119446009, "loss": 0.5028, "step": 567000 }, { "epoch": 14.48, "learning_rate": 0.00015680255035244357, "loss": 0.5004, "step": 570000 }, { "epoch": 14.55, "learning_rate": 0.00015603292951042706, "loss": 0.5028, "step": 573000 }, { "epoch": 14.63, "learning_rate": 0.00015526330866841057, "loss": 0.5053, "step": 576000 }, { "epoch": 14.71, "learning_rate": 0.00015449368782639405, "loss": 0.5058, "step": 579000 }, { "epoch": 14.78, "learning_rate": 0.00015372406698437754, "loss": 0.5037, "step": 582000 }, { "epoch": 14.86, "learning_rate": 0.00015295444614236105, "loss": 0.5051, "step": 585000 }, { "epoch": 14.93, "learning_rate": 0.0001521848253003445, "loss": 0.5041, "step": 588000 }, { "epoch": 15.01, "learning_rate": 0.000151415204458328, "loss": 0.5031, "step": 591000 }, { "epoch": 15.09, "learning_rate": 0.0001506455836163115, "loss": 0.4792, "step": 594000 }, { "epoch": 15.16, "learning_rate": 0.000149875962774295, "loss": 0.4834, "step": 597000 }, { "epoch": 15.24, "learning_rate": 0.00014910634193227848, "loss": 0.4833, "step": 600000 }, { "epoch": 15.31, "learning_rate": 0.000148336721090262, "loss": 0.4821, "step": 603000 }, { "epoch": 15.39, "learning_rate": 0.00014756710024824547, "loss": 0.4869, "step": 606000 }, { "epoch": 15.47, "learning_rate": 0.00014679747940622896, "loss": 0.4823, "step": 609000 }, { "epoch": 15.54, "learning_rate": 0.00014602785856421244, "loss": 0.486, "step": 612000 }, { "epoch": 15.62, "learning_rate": 0.00014525823772219593, "loss": 0.4847, "step": 615000 }, { "epoch": 15.7, "learning_rate": 0.00014448861688017944, "loss": 0.4897, "step": 618000 }, { "epoch": 15.77, "learning_rate": 0.00014371899603816293, "loss": 0.4886, "step": 621000 }, { "epoch": 15.85, "learning_rate": 0.0001429493751961464, "loss": 0.491, "step": 624000 }, { "epoch": 15.92, "learning_rate": 0.0001421797543541299, "loss": 0.488, "step": 627000 }, { "epoch": 16.0, "learning_rate": 0.0001414101335121134, "loss": 0.4881, "step": 630000 }, { "epoch": 16.08, "learning_rate": 0.00014064051267009687, "loss": 0.4638, "step": 633000 }, { "epoch": 16.15, "learning_rate": 0.00013987089182808038, "loss": 0.4701, "step": 636000 }, { "epoch": 16.23, "learning_rate": 0.00013910127098606387, "loss": 0.4688, "step": 639000 }, { "epoch": 16.31, "learning_rate": 0.00013833165014404735, "loss": 0.4663, "step": 642000 }, { "epoch": 16.38, "learning_rate": 0.00013756202930203084, "loss": 0.4669, "step": 645000 }, { "epoch": 16.46, "learning_rate": 0.00013679240846001435, "loss": 0.4658, "step": 648000 }, { "epoch": 16.53, "learning_rate": 0.00013602278761799783, "loss": 0.463, "step": 651000 }, { "epoch": 16.61, "learning_rate": 0.00013525316677598132, "loss": 0.4683, "step": 654000 }, { "epoch": 16.69, "learning_rate": 0.0001344835459339648, "loss": 0.4669, "step": 657000 }, { "epoch": 16.76, "learning_rate": 0.0001337139250919483, "loss": 0.4691, "step": 660000 }, { "epoch": 16.84, "learning_rate": 0.0001329443042499318, "loss": 0.4701, "step": 663000 }, { "epoch": 16.91, "learning_rate": 0.00013217468340791529, "loss": 0.4698, "step": 666000 }, { "epoch": 16.99, "learning_rate": 0.00013140506256589877, "loss": 0.4738, "step": 669000 }, { "epoch": 17.07, "learning_rate": 0.00013063544172388226, "loss": 0.4584, "step": 672000 }, { "epoch": 17.14, "learning_rate": 0.00012986582088186577, "loss": 0.4488, "step": 675000 }, { "epoch": 17.22, "learning_rate": 0.00012909620003984925, "loss": 0.4494, "step": 678000 }, { "epoch": 17.3, "learning_rate": 0.00012832657919783274, "loss": 0.4521, "step": 681000 }, { "epoch": 17.37, "learning_rate": 0.00012755695835581622, "loss": 0.4551, "step": 684000 }, { "epoch": 17.45, "learning_rate": 0.0001267873375137997, "loss": 0.4511, "step": 687000 }, { "epoch": 17.52, "learning_rate": 0.00012601771667178322, "loss": 0.4536, "step": 690000 }, { "epoch": 17.6, "learning_rate": 0.00012524809582976668, "loss": 0.4532, "step": 693000 }, { "epoch": 17.68, "learning_rate": 0.0001244784749877502, "loss": 0.4541, "step": 696000 }, { "epoch": 17.75, "learning_rate": 0.00012370885414573368, "loss": 0.4535, "step": 699000 }, { "epoch": 17.83, "learning_rate": 0.0001229392333037172, "loss": 0.4516, "step": 702000 }, { "epoch": 17.91, "learning_rate": 0.00012216961246170065, "loss": 0.4519, "step": 705000 }, { "epoch": 17.98, "learning_rate": 0.00012139999161968415, "loss": 0.4577, "step": 708000 }, { "epoch": 18.06, "learning_rate": 0.00012063037077766764, "loss": 0.437, "step": 711000 }, { "epoch": 18.13, "learning_rate": 0.00011986074993565114, "loss": 0.437, "step": 714000 }, { "epoch": 18.21, "learning_rate": 0.00011909112909363462, "loss": 0.4363, "step": 717000 }, { "epoch": 18.29, "learning_rate": 0.00011832150825161811, "loss": 0.4365, "step": 720000 }, { "epoch": 18.36, "learning_rate": 0.00011755188740960161, "loss": 0.4351, "step": 723000 }, { "epoch": 18.44, "learning_rate": 0.0001167822665675851, "loss": 0.4356, "step": 726000 }, { "epoch": 18.51, "learning_rate": 0.0001160126457255686, "loss": 0.4404, "step": 729000 }, { "epoch": 18.59, "learning_rate": 0.00011524302488355208, "loss": 0.4386, "step": 732000 }, { "epoch": 18.67, "learning_rate": 0.00011447340404153557, "loss": 0.4345, "step": 735000 }, { "epoch": 18.74, "learning_rate": 0.00011370378319951907, "loss": 0.4362, "step": 738000 }, { "epoch": 18.82, "learning_rate": 0.00011293416235750256, "loss": 0.4397, "step": 741000 }, { "epoch": 18.9, "learning_rate": 0.00011216454151548604, "loss": 0.4408, "step": 744000 }, { "epoch": 18.97, "learning_rate": 0.00011139492067346953, "loss": 0.4406, "step": 747000 }, { "epoch": 19.05, "learning_rate": 0.00011062529983145303, "loss": 0.4269, "step": 750000 }, { "epoch": 19.12, "learning_rate": 0.00010985567898943652, "loss": 0.4185, "step": 753000 }, { "epoch": 19.2, "learning_rate": 0.00010908605814742, "loss": 0.4227, "step": 756000 }, { "epoch": 19.28, "learning_rate": 0.0001083164373054035, "loss": 0.4208, "step": 759000 }, { "epoch": 19.35, "learning_rate": 0.00010754681646338699, "loss": 0.4222, "step": 762000 }, { "epoch": 19.43, "learning_rate": 0.00010677719562137049, "loss": 0.4209, "step": 765000 }, { "epoch": 19.51, "learning_rate": 0.00010600757477935397, "loss": 0.4235, "step": 768000 }, { "epoch": 19.58, "learning_rate": 0.00010523795393733746, "loss": 0.4229, "step": 771000 }, { "epoch": 19.66, "learning_rate": 0.00010446833309532096, "loss": 0.4263, "step": 774000 }, { "epoch": 19.73, "learning_rate": 0.00010369871225330445, "loss": 0.4247, "step": 777000 }, { "epoch": 19.81, "learning_rate": 0.00010292909141128793, "loss": 0.4224, "step": 780000 }, { "epoch": 19.89, "learning_rate": 0.00010215947056927142, "loss": 0.422, "step": 783000 }, { "epoch": 19.96, "learning_rate": 0.00010138984972725492, "loss": 0.4247, "step": 786000 }, { "epoch": 20.04, "learning_rate": 0.00010062022888523841, "loss": 0.4111, "step": 789000 }, { "epoch": 20.11, "learning_rate": 9.98506080432219e-05, "loss": 0.4058, "step": 792000 }, { "epoch": 20.19, "learning_rate": 9.908098720120539e-05, "loss": 0.4069, "step": 795000 }, { "epoch": 20.27, "learning_rate": 9.831136635918888e-05, "loss": 0.4046, "step": 798000 }, { "epoch": 20.34, "learning_rate": 9.754174551717238e-05, "loss": 0.4051, "step": 801000 }, { "epoch": 20.42, "learning_rate": 9.677212467515585e-05, "loss": 0.4079, "step": 804000 }, { "epoch": 20.5, "learning_rate": 9.600250383313935e-05, "loss": 0.4045, "step": 807000 }, { "epoch": 20.57, "learning_rate": 9.523288299112285e-05, "loss": 0.4083, "step": 810000 }, { "epoch": 20.65, "learning_rate": 9.446326214910634e-05, "loss": 0.408, "step": 813000 }, { "epoch": 20.72, "learning_rate": 9.369364130708982e-05, "loss": 0.4074, "step": 816000 }, { "epoch": 20.8, "learning_rate": 9.292402046507331e-05, "loss": 0.41, "step": 819000 }, { "epoch": 20.88, "learning_rate": 9.215439962305681e-05, "loss": 0.4093, "step": 822000 }, { "epoch": 20.95, "learning_rate": 9.13847787810403e-05, "loss": 0.411, "step": 825000 }, { "epoch": 21.03, "learning_rate": 9.061515793902378e-05, "loss": 0.4029, "step": 828000 }, { "epoch": 21.11, "learning_rate": 8.984553709700728e-05, "loss": 0.3928, "step": 831000 }, { "epoch": 21.18, "learning_rate": 8.907591625499077e-05, "loss": 0.3953, "step": 834000 }, { "epoch": 21.26, "learning_rate": 8.830629541297427e-05, "loss": 0.3899, "step": 837000 }, { "epoch": 21.33, "learning_rate": 8.753667457095774e-05, "loss": 0.3938, "step": 840000 }, { "epoch": 21.41, "learning_rate": 8.676705372894124e-05, "loss": 0.3958, "step": 843000 }, { "epoch": 21.49, "learning_rate": 8.599743288692473e-05, "loss": 0.3952, "step": 846000 }, { "epoch": 21.56, "learning_rate": 8.522781204490823e-05, "loss": 0.3959, "step": 849000 }, { "epoch": 21.64, "learning_rate": 8.445819120289172e-05, "loss": 0.3989, "step": 852000 }, { "epoch": 21.71, "learning_rate": 8.36885703608752e-05, "loss": 0.3976, "step": 855000 }, { "epoch": 21.79, "learning_rate": 8.29189495188587e-05, "loss": 0.3935, "step": 858000 }, { "epoch": 21.87, "learning_rate": 8.214932867684219e-05, "loss": 0.3951, "step": 861000 }, { "epoch": 21.94, "learning_rate": 8.137970783482569e-05, "loss": 0.3976, "step": 864000 }, { "epoch": 22.02, "learning_rate": 8.061008699280916e-05, "loss": 0.388, "step": 867000 }, { "epoch": 22.1, "learning_rate": 7.984046615079266e-05, "loss": 0.3745, "step": 870000 }, { "epoch": 22.17, "learning_rate": 7.907084530877616e-05, "loss": 0.382, "step": 873000 }, { "epoch": 22.25, "learning_rate": 7.830122446675965e-05, "loss": 0.3833, "step": 876000 }, { "epoch": 22.32, "learning_rate": 7.753160362474313e-05, "loss": 0.3837, "step": 879000 }, { "epoch": 22.4, "learning_rate": 7.676198278272662e-05, "loss": 0.3792, "step": 882000 }, { "epoch": 22.48, "learning_rate": 7.599236194071012e-05, "loss": 0.3826, "step": 885000 }, { "epoch": 22.55, "learning_rate": 7.522274109869361e-05, "loss": 0.3802, "step": 888000 }, { "epoch": 22.63, "learning_rate": 7.44531202566771e-05, "loss": 0.3796, "step": 891000 }, { "epoch": 22.71, "learning_rate": 7.368349941466058e-05, "loss": 0.383, "step": 894000 }, { "epoch": 22.78, "learning_rate": 7.291387857264408e-05, "loss": 0.3804, "step": 897000 }, { "epoch": 22.86, "learning_rate": 7.214425773062756e-05, "loss": 0.3802, "step": 900000 }, { "epoch": 22.93, "learning_rate": 7.137463688861106e-05, "loss": 0.383, "step": 903000 }, { "epoch": 23.01, "learning_rate": 7.060501604659455e-05, "loss": 0.3775, "step": 906000 }, { "epoch": 23.09, "learning_rate": 6.983539520457805e-05, "loss": 0.3663, "step": 909000 }, { "epoch": 23.16, "learning_rate": 6.906577436256153e-05, "loss": 0.3634, "step": 912000 }, { "epoch": 23.24, "learning_rate": 6.829615352054503e-05, "loss": 0.3691, "step": 915000 }, { "epoch": 23.31, "learning_rate": 6.752653267852851e-05, "loss": 0.3721, "step": 918000 }, { "epoch": 23.39, "learning_rate": 6.6756911836512e-05, "loss": 0.3694, "step": 921000 }, { "epoch": 23.47, "learning_rate": 6.59872909944955e-05, "loss": 0.371, "step": 924000 }, { "epoch": 23.54, "learning_rate": 6.521767015247898e-05, "loss": 0.3668, "step": 927000 }, { "epoch": 23.62, "learning_rate": 6.444804931046247e-05, "loss": 0.3701, "step": 930000 }, { "epoch": 23.7, "learning_rate": 6.367842846844597e-05, "loss": 0.37, "step": 933000 }, { "epoch": 23.77, "learning_rate": 6.290880762642945e-05, "loss": 0.3682, "step": 936000 }, { "epoch": 23.85, "learning_rate": 6.213918678441295e-05, "loss": 0.3698, "step": 939000 }, { "epoch": 23.92, "learning_rate": 6.136956594239644e-05, "loss": 0.3681, "step": 942000 }, { "epoch": 24.0, "learning_rate": 6.0599945100379935e-05, "loss": 0.3685, "step": 945000 }, { "epoch": 24.08, "learning_rate": 5.983032425836342e-05, "loss": 0.3552, "step": 948000 }, { "epoch": 24.15, "learning_rate": 5.906070341634691e-05, "loss": 0.3553, "step": 951000 }, { "epoch": 24.23, "learning_rate": 5.82910825743304e-05, "loss": 0.3523, "step": 954000 }, { "epoch": 24.31, "learning_rate": 5.7521461732313896e-05, "loss": 0.3557, "step": 957000 }, { "epoch": 24.38, "learning_rate": 5.675184089029738e-05, "loss": 0.3573, "step": 960000 }, { "epoch": 24.46, "learning_rate": 5.5982220048280873e-05, "loss": 0.357, "step": 963000 }, { "epoch": 24.53, "learning_rate": 5.5212599206264365e-05, "loss": 0.358, "step": 966000 }, { "epoch": 24.61, "learning_rate": 5.444297836424786e-05, "loss": 0.3572, "step": 969000 }, { "epoch": 24.69, "learning_rate": 5.367335752223134e-05, "loss": 0.3589, "step": 972000 }, { "epoch": 24.76, "learning_rate": 5.290373668021484e-05, "loss": 0.3585, "step": 975000 }, { "epoch": 24.84, "learning_rate": 5.213411583819833e-05, "loss": 0.3544, "step": 978000 }, { "epoch": 24.91, "learning_rate": 5.136449499618182e-05, "loss": 0.3542, "step": 981000 }, { "epoch": 24.99, "learning_rate": 5.059487415416532e-05, "loss": 0.358, "step": 984000 }, { "epoch": 25.07, "learning_rate": 4.98252533121488e-05, "loss": 0.3488, "step": 987000 }, { "epoch": 25.14, "learning_rate": 4.9055632470132294e-05, "loss": 0.346, "step": 990000 }, { "epoch": 25.22, "learning_rate": 4.8286011628115786e-05, "loss": 0.3437, "step": 993000 }, { "epoch": 25.3, "learning_rate": 4.751639078609928e-05, "loss": 0.3474, "step": 996000 }, { "epoch": 25.37, "learning_rate": 4.674676994408276e-05, "loss": 0.3452, "step": 999000 }, { "epoch": 25.45, "learning_rate": 4.597714910206626e-05, "loss": 0.3439, "step": 1002000 }, { "epoch": 25.52, "learning_rate": 4.520752826004975e-05, "loss": 0.3475, "step": 1005000 }, { "epoch": 25.6, "learning_rate": 4.443790741803324e-05, "loss": 0.3441, "step": 1008000 }, { "epoch": 25.68, "learning_rate": 4.366828657601673e-05, "loss": 0.345, "step": 1011000 }, { "epoch": 25.75, "learning_rate": 4.289866573400022e-05, "loss": 0.3467, "step": 1014000 }, { "epoch": 25.83, "learning_rate": 4.212904489198371e-05, "loss": 0.3473, "step": 1017000 }, { "epoch": 25.91, "learning_rate": 4.135942404996721e-05, "loss": 0.3435, "step": 1020000 }, { "epoch": 25.98, "learning_rate": 4.058980320795069e-05, "loss": 0.3441, "step": 1023000 }, { "epoch": 26.06, "learning_rate": 3.9820182365934184e-05, "loss": 0.3416, "step": 1026000 }, { "epoch": 26.13, "learning_rate": 3.9050561523917676e-05, "loss": 0.3348, "step": 1029000 }, { "epoch": 26.21, "learning_rate": 3.828094068190117e-05, "loss": 0.3382, "step": 1032000 }, { "epoch": 26.29, "learning_rate": 3.751131983988465e-05, "loss": 0.3383, "step": 1035000 }, { "epoch": 26.36, "learning_rate": 3.6741698997868145e-05, "loss": 0.3358, "step": 1038000 }, { "epoch": 26.44, "learning_rate": 3.597207815585164e-05, "loss": 0.3355, "step": 1041000 }, { "epoch": 26.51, "learning_rate": 3.520245731383513e-05, "loss": 0.3377, "step": 1044000 }, { "epoch": 26.59, "learning_rate": 3.443283647181862e-05, "loss": 0.3358, "step": 1047000 }, { "epoch": 26.67, "learning_rate": 3.366321562980211e-05, "loss": 0.336, "step": 1050000 }, { "epoch": 26.74, "learning_rate": 3.2893594787785605e-05, "loss": 0.336, "step": 1053000 }, { "epoch": 26.82, "learning_rate": 3.21239739457691e-05, "loss": 0.3329, "step": 1056000 }, { "epoch": 26.9, "learning_rate": 3.135435310375258e-05, "loss": 0.3364, "step": 1059000 }, { "epoch": 26.97, "learning_rate": 3.0584732261736074e-05, "loss": 0.3354, "step": 1062000 }, { "epoch": 27.05, "learning_rate": 2.9815111419719566e-05, "loss": 0.3311, "step": 1065000 }, { "epoch": 27.12, "learning_rate": 2.9045490577703058e-05, "loss": 0.3287, "step": 1068000 }, { "epoch": 27.2, "learning_rate": 2.8275869735686546e-05, "loss": 0.3288, "step": 1071000 }, { "epoch": 27.28, "learning_rate": 2.7506248893670038e-05, "loss": 0.3261, "step": 1074000 }, { "epoch": 27.35, "learning_rate": 2.673662805165353e-05, "loss": 0.3272, "step": 1077000 }, { "epoch": 27.43, "learning_rate": 2.596700720963702e-05, "loss": 0.3306, "step": 1080000 }, { "epoch": 27.51, "learning_rate": 2.519738636762051e-05, "loss": 0.3268, "step": 1083000 }, { "epoch": 27.58, "learning_rate": 2.4427765525604003e-05, "loss": 0.3256, "step": 1086000 }, { "epoch": 27.66, "learning_rate": 2.365814468358749e-05, "loss": 0.325, "step": 1089000 }, { "epoch": 27.73, "learning_rate": 2.2888523841570983e-05, "loss": 0.3258, "step": 1092000 }, { "epoch": 27.81, "learning_rate": 2.2118902999554475e-05, "loss": 0.3249, "step": 1095000 }, { "epoch": 27.89, "learning_rate": 2.1349282157537964e-05, "loss": 0.3272, "step": 1098000 }, { "epoch": 27.96, "learning_rate": 2.0579661315521456e-05, "loss": 0.3233, "step": 1101000 }, { "epoch": 28.04, "learning_rate": 1.9810040473504944e-05, "loss": 0.3239, "step": 1104000 }, { "epoch": 28.11, "learning_rate": 1.9040419631488436e-05, "loss": 0.3223, "step": 1107000 }, { "epoch": 28.19, "learning_rate": 1.8270798789471928e-05, "loss": 0.3188, "step": 1110000 }, { "epoch": 28.27, "learning_rate": 1.7501177947455417e-05, "loss": 0.3191, "step": 1113000 }, { "epoch": 28.34, "learning_rate": 1.673155710543891e-05, "loss": 0.3201, "step": 1116000 }, { "epoch": 28.42, "learning_rate": 1.59619362634224e-05, "loss": 0.32, "step": 1119000 }, { "epoch": 28.5, "learning_rate": 1.519231542140589e-05, "loss": 0.318, "step": 1122000 }, { "epoch": 28.57, "learning_rate": 1.4422694579389381e-05, "loss": 0.3171, "step": 1125000 }, { "epoch": 28.65, "learning_rate": 1.3653073737372871e-05, "loss": 0.3195, "step": 1128000 }, { "epoch": 28.72, "learning_rate": 1.2883452895356363e-05, "loss": 0.3199, "step": 1131000 }, { "epoch": 28.8, "learning_rate": 1.2113832053339853e-05, "loss": 0.3201, "step": 1134000 }, { "epoch": 28.88, "learning_rate": 1.1344211211323345e-05, "loss": 0.3207, "step": 1137000 }, { "epoch": 28.95, "learning_rate": 1.0574590369306837e-05, "loss": 0.3172, "step": 1140000 }, { "epoch": 29.03, "learning_rate": 9.804969527290328e-06, "loss": 0.3143, "step": 1143000 }, { "epoch": 29.11, "learning_rate": 9.035348685273818e-06, "loss": 0.3172, "step": 1146000 }, { "epoch": 29.18, "learning_rate": 8.265727843257308e-06, "loss": 0.3168, "step": 1149000 }, { "epoch": 29.26, "learning_rate": 7.4961070012408e-06, "loss": 0.3138, "step": 1152000 }, { "epoch": 29.33, "learning_rate": 6.72648615922429e-06, "loss": 0.3136, "step": 1155000 }, { "epoch": 29.41, "learning_rate": 5.956865317207781e-06, "loss": 0.317, "step": 1158000 }, { "epoch": 29.49, "learning_rate": 5.187244475191272e-06, "loss": 0.316, "step": 1161000 }, { "epoch": 29.56, "learning_rate": 4.417623633174763e-06, "loss": 0.3103, "step": 1164000 }, { "epoch": 29.64, "learning_rate": 3.6480027911582534e-06, "loss": 0.3164, "step": 1167000 }, { "epoch": 29.72, "learning_rate": 2.878381949141744e-06, "loss": 0.315, "step": 1170000 }, { "epoch": 29.79, "learning_rate": 2.108761107125235e-06, "loss": 0.3147, "step": 1173000 }, { "epoch": 29.87, "learning_rate": 1.3391402651087259e-06, "loss": 0.3113, "step": 1176000 }, { "epoch": 29.94, "learning_rate": 5.695194230922169e-07, "loss": 0.3131, "step": 1179000 } ], "logging_steps": 3000, "max_steps": 1181220, "num_train_epochs": 30, "save_steps": 500, "total_flos": 2.398747739778959e+21, "trial_name": null, "trial_params": null }