yesj1234's picture
Upload folder using huggingface_hub
b544882
raw
history blame
No virus
47.5 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 29.0,
"eval_steps": 500,
"global_step": 1141846,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"learning_rate": 7.61872513332769e-05,
"loss": 11.8677,
"step": 3000
},
{
"epoch": 0.15,
"learning_rate": 0.0001523745026665538,
"loss": 1.9864,
"step": 6000
},
{
"epoch": 0.23,
"learning_rate": 0.00022856175399983065,
"loss": 1.5124,
"step": 9000
},
{
"epoch": 0.3,
"learning_rate": 0.0002999520269675143,
"loss": 1.4304,
"step": 12000
},
{
"epoch": 0.38,
"learning_rate": 0.00029918240612549776,
"loss": 1.3803,
"step": 15000
},
{
"epoch": 0.46,
"learning_rate": 0.00029841278528348127,
"loss": 1.3166,
"step": 18000
},
{
"epoch": 0.53,
"learning_rate": 0.0002976431644414648,
"loss": 1.2798,
"step": 21000
},
{
"epoch": 0.61,
"learning_rate": 0.00029687354359944824,
"loss": 1.2517,
"step": 24000
},
{
"epoch": 0.69,
"learning_rate": 0.0002961039227574317,
"loss": 1.2085,
"step": 27000
},
{
"epoch": 0.76,
"learning_rate": 0.0002953343019154152,
"loss": 1.197,
"step": 30000
},
{
"epoch": 0.84,
"learning_rate": 0.0002945646810733987,
"loss": 1.1841,
"step": 33000
},
{
"epoch": 0.91,
"learning_rate": 0.0002937950602313822,
"loss": 1.1658,
"step": 36000
},
{
"epoch": 0.99,
"learning_rate": 0.0002930254393893657,
"loss": 1.1471,
"step": 39000
},
{
"epoch": 1.07,
"learning_rate": 0.0002922558185473492,
"loss": 1.0998,
"step": 42000
},
{
"epoch": 1.14,
"learning_rate": 0.0002914861977053327,
"loss": 1.0865,
"step": 45000
},
{
"epoch": 1.22,
"learning_rate": 0.0002907165768633162,
"loss": 1.0697,
"step": 48000
},
{
"epoch": 1.3,
"learning_rate": 0.00028994695602129963,
"loss": 1.0537,
"step": 51000
},
{
"epoch": 1.37,
"learning_rate": 0.00028917733517928315,
"loss": 1.0509,
"step": 54000
},
{
"epoch": 1.45,
"learning_rate": 0.00028840771433726666,
"loss": 1.039,
"step": 57000
},
{
"epoch": 1.52,
"learning_rate": 0.0002876380934952501,
"loss": 1.0224,
"step": 60000
},
{
"epoch": 1.6,
"learning_rate": 0.00028686847265323363,
"loss": 1.0209,
"step": 63000
},
{
"epoch": 1.68,
"learning_rate": 0.00028609885181121714,
"loss": 1.0103,
"step": 66000
},
{
"epoch": 1.75,
"learning_rate": 0.0002853292309692006,
"loss": 1.0058,
"step": 69000
},
{
"epoch": 1.83,
"learning_rate": 0.00028455961012718406,
"loss": 0.9954,
"step": 72000
},
{
"epoch": 1.9,
"learning_rate": 0.00028378998928516757,
"loss": 0.9815,
"step": 75000
},
{
"epoch": 1.98,
"learning_rate": 0.0002830203684431511,
"loss": 0.9832,
"step": 78000
},
{
"epoch": 2.06,
"learning_rate": 0.00028225074760113454,
"loss": 0.9556,
"step": 81000
},
{
"epoch": 2.13,
"learning_rate": 0.00028148112675911805,
"loss": 0.9396,
"step": 84000
},
{
"epoch": 2.21,
"learning_rate": 0.00028071150591710156,
"loss": 0.9337,
"step": 87000
},
{
"epoch": 2.29,
"learning_rate": 0.000279941885075085,
"loss": 0.9211,
"step": 90000
},
{
"epoch": 2.36,
"learning_rate": 0.00027917226423306853,
"loss": 0.9296,
"step": 93000
},
{
"epoch": 2.44,
"learning_rate": 0.00027840264339105205,
"loss": 0.9216,
"step": 96000
},
{
"epoch": 2.51,
"learning_rate": 0.0002776330225490355,
"loss": 0.9132,
"step": 99000
},
{
"epoch": 2.59,
"learning_rate": 0.000276863401707019,
"loss": 0.9041,
"step": 102000
},
{
"epoch": 2.67,
"learning_rate": 0.0002760937808650025,
"loss": 0.9056,
"step": 105000
},
{
"epoch": 2.74,
"learning_rate": 0.000275324160022986,
"loss": 0.9025,
"step": 108000
},
{
"epoch": 2.82,
"learning_rate": 0.0002745545391809695,
"loss": 0.9021,
"step": 111000
},
{
"epoch": 2.9,
"learning_rate": 0.00027378491833895296,
"loss": 0.8958,
"step": 114000
},
{
"epoch": 2.97,
"learning_rate": 0.00027301529749693647,
"loss": 0.888,
"step": 117000
},
{
"epoch": 3.05,
"learning_rate": 0.00027224567665492,
"loss": 0.8677,
"step": 120000
},
{
"epoch": 3.12,
"learning_rate": 0.00027147605581290344,
"loss": 0.8561,
"step": 123000
},
{
"epoch": 3.2,
"learning_rate": 0.0002707064349708869,
"loss": 0.8507,
"step": 126000
},
{
"epoch": 3.28,
"learning_rate": 0.0002699368141288704,
"loss": 0.8409,
"step": 129000
},
{
"epoch": 3.35,
"learning_rate": 0.0002691671932868539,
"loss": 0.8394,
"step": 132000
},
{
"epoch": 3.43,
"learning_rate": 0.0002683975724448374,
"loss": 0.8426,
"step": 135000
},
{
"epoch": 3.5,
"learning_rate": 0.0002676279516028209,
"loss": 0.8401,
"step": 138000
},
{
"epoch": 3.58,
"learning_rate": 0.0002668583307608044,
"loss": 0.8449,
"step": 141000
},
{
"epoch": 3.66,
"learning_rate": 0.00026608870991878786,
"loss": 0.838,
"step": 144000
},
{
"epoch": 3.73,
"learning_rate": 0.0002653190890767714,
"loss": 0.834,
"step": 147000
},
{
"epoch": 3.81,
"learning_rate": 0.00026454946823475483,
"loss": 0.8239,
"step": 150000
},
{
"epoch": 3.89,
"learning_rate": 0.00026377984739273835,
"loss": 0.8228,
"step": 153000
},
{
"epoch": 3.96,
"learning_rate": 0.00026301022655072186,
"loss": 0.8277,
"step": 156000
},
{
"epoch": 4.04,
"learning_rate": 0.0002622406057087053,
"loss": 0.8003,
"step": 159000
},
{
"epoch": 4.11,
"learning_rate": 0.00026147098486668883,
"loss": 0.7925,
"step": 162000
},
{
"epoch": 4.19,
"learning_rate": 0.00026070136402467234,
"loss": 0.7911,
"step": 165000
},
{
"epoch": 4.27,
"learning_rate": 0.0002599317431826558,
"loss": 0.7944,
"step": 168000
},
{
"epoch": 4.34,
"learning_rate": 0.00025916212234063926,
"loss": 0.795,
"step": 171000
},
{
"epoch": 4.42,
"learning_rate": 0.00025839250149862277,
"loss": 0.7868,
"step": 174000
},
{
"epoch": 4.5,
"learning_rate": 0.0002576228806566063,
"loss": 0.7821,
"step": 177000
},
{
"epoch": 4.57,
"learning_rate": 0.00025685325981458974,
"loss": 0.7859,
"step": 180000
},
{
"epoch": 4.65,
"learning_rate": 0.00025608363897257325,
"loss": 0.7765,
"step": 183000
},
{
"epoch": 4.72,
"learning_rate": 0.00025531401813055676,
"loss": 0.775,
"step": 186000
},
{
"epoch": 4.8,
"learning_rate": 0.0002545443972885402,
"loss": 0.7768,
"step": 189000
},
{
"epoch": 4.88,
"learning_rate": 0.00025377477644652373,
"loss": 0.7757,
"step": 192000
},
{
"epoch": 4.95,
"learning_rate": 0.0002530051556045072,
"loss": 0.7701,
"step": 195000
},
{
"epoch": 5.03,
"learning_rate": 0.0002522355347624907,
"loss": 0.7557,
"step": 198000
},
{
"epoch": 5.1,
"learning_rate": 0.0002514659139204742,
"loss": 0.7312,
"step": 201000
},
{
"epoch": 5.18,
"learning_rate": 0.0002506962930784577,
"loss": 0.7372,
"step": 204000
},
{
"epoch": 5.26,
"learning_rate": 0.0002499266722364412,
"loss": 0.7383,
"step": 207000
},
{
"epoch": 5.33,
"learning_rate": 0.0002491570513944247,
"loss": 0.7398,
"step": 210000
},
{
"epoch": 5.41,
"learning_rate": 0.00024838743055240816,
"loss": 0.7229,
"step": 213000
},
{
"epoch": 5.49,
"learning_rate": 0.00024761780971039167,
"loss": 0.733,
"step": 216000
},
{
"epoch": 5.56,
"learning_rate": 0.00024684818886837513,
"loss": 0.7318,
"step": 219000
},
{
"epoch": 5.64,
"learning_rate": 0.00024607856802635864,
"loss": 0.7243,
"step": 222000
},
{
"epoch": 5.71,
"learning_rate": 0.0002453089471843421,
"loss": 0.73,
"step": 225000
},
{
"epoch": 5.79,
"learning_rate": 0.0002445393263423256,
"loss": 0.7289,
"step": 228000
},
{
"epoch": 5.87,
"learning_rate": 0.00024376970550030912,
"loss": 0.7331,
"step": 231000
},
{
"epoch": 5.94,
"learning_rate": 0.0002430000846582926,
"loss": 0.7203,
"step": 234000
},
{
"epoch": 6.02,
"learning_rate": 0.0002422304638162761,
"loss": 0.7235,
"step": 237000
},
{
"epoch": 6.1,
"learning_rate": 0.0002414608429742596,
"loss": 0.6931,
"step": 240000
},
{
"epoch": 6.17,
"learning_rate": 0.0002406912221322431,
"loss": 0.6933,
"step": 243000
},
{
"epoch": 6.25,
"learning_rate": 0.00023992160129022655,
"loss": 0.697,
"step": 246000
},
{
"epoch": 6.32,
"learning_rate": 0.00023915198044821003,
"loss": 0.6926,
"step": 249000
},
{
"epoch": 6.4,
"learning_rate": 0.00023838235960619355,
"loss": 0.6953,
"step": 252000
},
{
"epoch": 6.48,
"learning_rate": 0.00023761273876417703,
"loss": 0.6934,
"step": 255000
},
{
"epoch": 6.55,
"learning_rate": 0.00023684311792216054,
"loss": 0.6926,
"step": 258000
},
{
"epoch": 6.63,
"learning_rate": 0.00023607349708014403,
"loss": 0.6969,
"step": 261000
},
{
"epoch": 6.7,
"learning_rate": 0.00023530387623812751,
"loss": 0.6866,
"step": 264000
},
{
"epoch": 6.78,
"learning_rate": 0.00023453425539611103,
"loss": 0.6929,
"step": 267000
},
{
"epoch": 6.86,
"learning_rate": 0.00023376463455409448,
"loss": 0.6914,
"step": 270000
},
{
"epoch": 6.93,
"learning_rate": 0.00023299501371207797,
"loss": 0.6866,
"step": 273000
},
{
"epoch": 7.01,
"learning_rate": 0.00023222539287006145,
"loss": 0.6806,
"step": 276000
},
{
"epoch": 7.09,
"learning_rate": 0.00023145577202804497,
"loss": 0.6561,
"step": 279000
},
{
"epoch": 7.16,
"learning_rate": 0.00023068615118602845,
"loss": 0.6596,
"step": 282000
},
{
"epoch": 7.24,
"learning_rate": 0.00022991653034401196,
"loss": 0.6621,
"step": 285000
},
{
"epoch": 7.31,
"learning_rate": 0.00022914690950199545,
"loss": 0.6644,
"step": 288000
},
{
"epoch": 7.39,
"learning_rate": 0.00022837728865997893,
"loss": 0.6642,
"step": 291000
},
{
"epoch": 7.47,
"learning_rate": 0.0002276076678179624,
"loss": 0.6621,
"step": 294000
},
{
"epoch": 7.54,
"learning_rate": 0.0002268380469759459,
"loss": 0.6596,
"step": 297000
},
{
"epoch": 7.62,
"learning_rate": 0.0002260684261339294,
"loss": 0.6616,
"step": 300000
},
{
"epoch": 7.7,
"learning_rate": 0.00022529880529191288,
"loss": 0.666,
"step": 303000
},
{
"epoch": 7.77,
"learning_rate": 0.0002245291844498964,
"loss": 0.6645,
"step": 306000
},
{
"epoch": 7.85,
"learning_rate": 0.00022375956360787987,
"loss": 0.6616,
"step": 309000
},
{
"epoch": 7.92,
"learning_rate": 0.00022298994276586339,
"loss": 0.6593,
"step": 312000
},
{
"epoch": 8.0,
"learning_rate": 0.00022222032192384687,
"loss": 0.6622,
"step": 315000
},
{
"epoch": 8.08,
"learning_rate": 0.00022145070108183033,
"loss": 0.6294,
"step": 318000
},
{
"epoch": 8.15,
"learning_rate": 0.0002206810802398138,
"loss": 0.6308,
"step": 321000
},
{
"epoch": 8.23,
"learning_rate": 0.00021991145939779733,
"loss": 0.6335,
"step": 324000
},
{
"epoch": 8.3,
"learning_rate": 0.0002191418385557808,
"loss": 0.629,
"step": 327000
},
{
"epoch": 8.38,
"learning_rate": 0.0002183722177137643,
"loss": 0.6354,
"step": 330000
},
{
"epoch": 8.46,
"learning_rate": 0.0002176025968717478,
"loss": 0.6303,
"step": 333000
},
{
"epoch": 8.53,
"learning_rate": 0.0002168329760297313,
"loss": 0.6338,
"step": 336000
},
{
"epoch": 8.61,
"learning_rate": 0.0002160633551877148,
"loss": 0.6317,
"step": 339000
},
{
"epoch": 8.69,
"learning_rate": 0.00021529373434569826,
"loss": 0.6339,
"step": 342000
},
{
"epoch": 8.76,
"learning_rate": 0.00021452411350368175,
"loss": 0.6344,
"step": 345000
},
{
"epoch": 8.84,
"learning_rate": 0.00021375449266166523,
"loss": 0.631,
"step": 348000
},
{
"epoch": 8.91,
"learning_rate": 0.00021298487181964875,
"loss": 0.6273,
"step": 351000
},
{
"epoch": 8.99,
"learning_rate": 0.00021221525097763223,
"loss": 0.6276,
"step": 354000
},
{
"epoch": 9.07,
"learning_rate": 0.00021144563013561572,
"loss": 0.6045,
"step": 357000
},
{
"epoch": 9.14,
"learning_rate": 0.00021067600929359923,
"loss": 0.6012,
"step": 360000
},
{
"epoch": 9.22,
"learning_rate": 0.00020990638845158271,
"loss": 0.599,
"step": 363000
},
{
"epoch": 9.3,
"learning_rate": 0.00020913676760956623,
"loss": 0.6073,
"step": 366000
},
{
"epoch": 9.37,
"learning_rate": 0.00020836714676754968,
"loss": 0.6053,
"step": 369000
},
{
"epoch": 9.45,
"learning_rate": 0.00020759752592553317,
"loss": 0.6032,
"step": 372000
},
{
"epoch": 9.52,
"learning_rate": 0.00020682790508351666,
"loss": 0.6035,
"step": 375000
},
{
"epoch": 9.6,
"learning_rate": 0.00020605828424150017,
"loss": 0.6058,
"step": 378000
},
{
"epoch": 9.68,
"learning_rate": 0.00020528866339948365,
"loss": 0.6045,
"step": 381000
},
{
"epoch": 9.75,
"learning_rate": 0.00020451904255746714,
"loss": 0.6034,
"step": 384000
},
{
"epoch": 9.83,
"learning_rate": 0.00020374942171545065,
"loss": 0.6016,
"step": 387000
},
{
"epoch": 9.91,
"learning_rate": 0.00020297980087343414,
"loss": 0.599,
"step": 390000
},
{
"epoch": 9.98,
"learning_rate": 0.0002022101800314176,
"loss": 0.6076,
"step": 393000
},
{
"epoch": 10.06,
"learning_rate": 0.0002014405591894011,
"loss": 0.5825,
"step": 396000
},
{
"epoch": 10.13,
"learning_rate": 0.0002006709383473846,
"loss": 0.5748,
"step": 399000
},
{
"epoch": 10.21,
"learning_rate": 0.00019990131750536808,
"loss": 0.5793,
"step": 402000
},
{
"epoch": 10.29,
"learning_rate": 0.0001991316966633516,
"loss": 0.5767,
"step": 405000
},
{
"epoch": 10.36,
"learning_rate": 0.00019836207582133507,
"loss": 0.5839,
"step": 408000
},
{
"epoch": 10.44,
"learning_rate": 0.00019759245497931856,
"loss": 0.5834,
"step": 411000
},
{
"epoch": 10.51,
"learning_rate": 0.00019682283413730207,
"loss": 0.5824,
"step": 414000
},
{
"epoch": 10.59,
"learning_rate": 0.00019605321329528553,
"loss": 0.5817,
"step": 417000
},
{
"epoch": 10.67,
"learning_rate": 0.00019528359245326901,
"loss": 0.5785,
"step": 420000
},
{
"epoch": 10.74,
"learning_rate": 0.00019451397161125253,
"loss": 0.5777,
"step": 423000
},
{
"epoch": 10.82,
"learning_rate": 0.000193744350769236,
"loss": 0.5787,
"step": 426000
},
{
"epoch": 10.9,
"learning_rate": 0.0001929747299272195,
"loss": 0.5803,
"step": 429000
},
{
"epoch": 10.97,
"learning_rate": 0.000192205109085203,
"loss": 0.5772,
"step": 432000
},
{
"epoch": 11.05,
"learning_rate": 0.0001914354882431865,
"loss": 0.5645,
"step": 435000
},
{
"epoch": 11.12,
"learning_rate": 0.00019066586740116998,
"loss": 0.5497,
"step": 438000
},
{
"epoch": 11.2,
"learning_rate": 0.00018989624655915346,
"loss": 0.5556,
"step": 441000
},
{
"epoch": 11.28,
"learning_rate": 0.00018912662571713695,
"loss": 0.5562,
"step": 444000
},
{
"epoch": 11.35,
"learning_rate": 0.00018835700487512043,
"loss": 0.5523,
"step": 447000
},
{
"epoch": 11.43,
"learning_rate": 0.00018758738403310395,
"loss": 0.5595,
"step": 450000
},
{
"epoch": 11.51,
"learning_rate": 0.00018681776319108743,
"loss": 0.5638,
"step": 453000
},
{
"epoch": 11.58,
"learning_rate": 0.00018604814234907092,
"loss": 0.5593,
"step": 456000
},
{
"epoch": 11.66,
"learning_rate": 0.00018527852150705443,
"loss": 0.5588,
"step": 459000
},
{
"epoch": 11.73,
"learning_rate": 0.00018450890066503791,
"loss": 0.5549,
"step": 462000
},
{
"epoch": 11.81,
"learning_rate": 0.00018373927982302137,
"loss": 0.5659,
"step": 465000
},
{
"epoch": 11.89,
"learning_rate": 0.00018296965898100489,
"loss": 0.5567,
"step": 468000
},
{
"epoch": 11.96,
"learning_rate": 0.00018220003813898837,
"loss": 0.5594,
"step": 471000
},
{
"epoch": 12.04,
"learning_rate": 0.00018143041729697186,
"loss": 0.5468,
"step": 474000
},
{
"epoch": 12.11,
"learning_rate": 0.00018066079645495537,
"loss": 0.5331,
"step": 477000
},
{
"epoch": 12.19,
"learning_rate": 0.00017989117561293885,
"loss": 0.5387,
"step": 480000
},
{
"epoch": 12.27,
"learning_rate": 0.00017912155477092234,
"loss": 0.54,
"step": 483000
},
{
"epoch": 12.34,
"learning_rate": 0.00017835193392890585,
"loss": 0.5396,
"step": 486000
},
{
"epoch": 12.42,
"learning_rate": 0.00017758231308688934,
"loss": 0.5388,
"step": 489000
},
{
"epoch": 12.5,
"learning_rate": 0.0001768126922448728,
"loss": 0.5422,
"step": 492000
},
{
"epoch": 12.57,
"learning_rate": 0.0001760430714028563,
"loss": 0.5353,
"step": 495000
},
{
"epoch": 12.65,
"learning_rate": 0.0001752734505608398,
"loss": 0.536,
"step": 498000
},
{
"epoch": 12.72,
"learning_rate": 0.00017450382971882328,
"loss": 0.5418,
"step": 501000
},
{
"epoch": 12.8,
"learning_rate": 0.0001737342088768068,
"loss": 0.5449,
"step": 504000
},
{
"epoch": 12.88,
"learning_rate": 0.00017296458803479027,
"loss": 0.5385,
"step": 507000
},
{
"epoch": 12.95,
"learning_rate": 0.00017219496719277376,
"loss": 0.5465,
"step": 510000
},
{
"epoch": 13.03,
"learning_rate": 0.00017142534635075727,
"loss": 0.5263,
"step": 513000
},
{
"epoch": 13.11,
"learning_rate": 0.00017065572550874073,
"loss": 0.5144,
"step": 516000
},
{
"epoch": 13.18,
"learning_rate": 0.00016988610466672421,
"loss": 0.5184,
"step": 519000
},
{
"epoch": 13.26,
"learning_rate": 0.00016911648382470773,
"loss": 0.5191,
"step": 522000
},
{
"epoch": 13.33,
"learning_rate": 0.0001683468629826912,
"loss": 0.5202,
"step": 525000
},
{
"epoch": 13.41,
"learning_rate": 0.0001675772421406747,
"loss": 0.5225,
"step": 528000
},
{
"epoch": 13.49,
"learning_rate": 0.0001668076212986582,
"loss": 0.5247,
"step": 531000
},
{
"epoch": 13.56,
"learning_rate": 0.0001660380004566417,
"loss": 0.521,
"step": 534000
},
{
"epoch": 13.64,
"learning_rate": 0.00016526837961462518,
"loss": 0.5251,
"step": 537000
},
{
"epoch": 13.71,
"learning_rate": 0.00016449875877260866,
"loss": 0.5247,
"step": 540000
},
{
"epoch": 13.79,
"learning_rate": 0.00016372913793059215,
"loss": 0.5219,
"step": 543000
},
{
"epoch": 13.87,
"learning_rate": 0.00016295951708857564,
"loss": 0.522,
"step": 546000
},
{
"epoch": 13.94,
"learning_rate": 0.00016218989624655915,
"loss": 0.5207,
"step": 549000
},
{
"epoch": 14.02,
"learning_rate": 0.00016142027540454263,
"loss": 0.5167,
"step": 552000
},
{
"epoch": 14.1,
"learning_rate": 0.00016065065456252612,
"loss": 0.5018,
"step": 555000
},
{
"epoch": 14.17,
"learning_rate": 0.00015988103372050963,
"loss": 0.5001,
"step": 558000
},
{
"epoch": 14.25,
"learning_rate": 0.00015911141287849312,
"loss": 0.4997,
"step": 561000
},
{
"epoch": 14.32,
"learning_rate": 0.00015834179203647657,
"loss": 0.5007,
"step": 564000
},
{
"epoch": 14.4,
"learning_rate": 0.00015757217119446009,
"loss": 0.5028,
"step": 567000
},
{
"epoch": 14.48,
"learning_rate": 0.00015680255035244357,
"loss": 0.5004,
"step": 570000
},
{
"epoch": 14.55,
"learning_rate": 0.00015603292951042706,
"loss": 0.5028,
"step": 573000
},
{
"epoch": 14.63,
"learning_rate": 0.00015526330866841057,
"loss": 0.5053,
"step": 576000
},
{
"epoch": 14.71,
"learning_rate": 0.00015449368782639405,
"loss": 0.5058,
"step": 579000
},
{
"epoch": 14.78,
"learning_rate": 0.00015372406698437754,
"loss": 0.5037,
"step": 582000
},
{
"epoch": 14.86,
"learning_rate": 0.00015295444614236105,
"loss": 0.5051,
"step": 585000
},
{
"epoch": 14.93,
"learning_rate": 0.0001521848253003445,
"loss": 0.5041,
"step": 588000
},
{
"epoch": 15.01,
"learning_rate": 0.000151415204458328,
"loss": 0.5031,
"step": 591000
},
{
"epoch": 15.09,
"learning_rate": 0.0001506455836163115,
"loss": 0.4792,
"step": 594000
},
{
"epoch": 15.16,
"learning_rate": 0.000149875962774295,
"loss": 0.4834,
"step": 597000
},
{
"epoch": 15.24,
"learning_rate": 0.00014910634193227848,
"loss": 0.4833,
"step": 600000
},
{
"epoch": 15.31,
"learning_rate": 0.000148336721090262,
"loss": 0.4821,
"step": 603000
},
{
"epoch": 15.39,
"learning_rate": 0.00014756710024824547,
"loss": 0.4869,
"step": 606000
},
{
"epoch": 15.47,
"learning_rate": 0.00014679747940622896,
"loss": 0.4823,
"step": 609000
},
{
"epoch": 15.54,
"learning_rate": 0.00014602785856421244,
"loss": 0.486,
"step": 612000
},
{
"epoch": 15.62,
"learning_rate": 0.00014525823772219593,
"loss": 0.4847,
"step": 615000
},
{
"epoch": 15.7,
"learning_rate": 0.00014448861688017944,
"loss": 0.4897,
"step": 618000
},
{
"epoch": 15.77,
"learning_rate": 0.00014371899603816293,
"loss": 0.4886,
"step": 621000
},
{
"epoch": 15.85,
"learning_rate": 0.0001429493751961464,
"loss": 0.491,
"step": 624000
},
{
"epoch": 15.92,
"learning_rate": 0.0001421797543541299,
"loss": 0.488,
"step": 627000
},
{
"epoch": 16.0,
"learning_rate": 0.0001414101335121134,
"loss": 0.4881,
"step": 630000
},
{
"epoch": 16.08,
"learning_rate": 0.00014064051267009687,
"loss": 0.4638,
"step": 633000
},
{
"epoch": 16.15,
"learning_rate": 0.00013987089182808038,
"loss": 0.4701,
"step": 636000
},
{
"epoch": 16.23,
"learning_rate": 0.00013910127098606387,
"loss": 0.4688,
"step": 639000
},
{
"epoch": 16.31,
"learning_rate": 0.00013833165014404735,
"loss": 0.4663,
"step": 642000
},
{
"epoch": 16.38,
"learning_rate": 0.00013756202930203084,
"loss": 0.4669,
"step": 645000
},
{
"epoch": 16.46,
"learning_rate": 0.00013679240846001435,
"loss": 0.4658,
"step": 648000
},
{
"epoch": 16.53,
"learning_rate": 0.00013602278761799783,
"loss": 0.463,
"step": 651000
},
{
"epoch": 16.61,
"learning_rate": 0.00013525316677598132,
"loss": 0.4683,
"step": 654000
},
{
"epoch": 16.69,
"learning_rate": 0.0001344835459339648,
"loss": 0.4669,
"step": 657000
},
{
"epoch": 16.76,
"learning_rate": 0.0001337139250919483,
"loss": 0.4691,
"step": 660000
},
{
"epoch": 16.84,
"learning_rate": 0.0001329443042499318,
"loss": 0.4701,
"step": 663000
},
{
"epoch": 16.91,
"learning_rate": 0.00013217468340791529,
"loss": 0.4698,
"step": 666000
},
{
"epoch": 16.99,
"learning_rate": 0.00013140506256589877,
"loss": 0.4738,
"step": 669000
},
{
"epoch": 17.07,
"learning_rate": 0.00013063544172388226,
"loss": 0.4584,
"step": 672000
},
{
"epoch": 17.14,
"learning_rate": 0.00012986582088186577,
"loss": 0.4488,
"step": 675000
},
{
"epoch": 17.22,
"learning_rate": 0.00012909620003984925,
"loss": 0.4494,
"step": 678000
},
{
"epoch": 17.3,
"learning_rate": 0.00012832657919783274,
"loss": 0.4521,
"step": 681000
},
{
"epoch": 17.37,
"learning_rate": 0.00012755695835581622,
"loss": 0.4551,
"step": 684000
},
{
"epoch": 17.45,
"learning_rate": 0.0001267873375137997,
"loss": 0.4511,
"step": 687000
},
{
"epoch": 17.52,
"learning_rate": 0.00012601771667178322,
"loss": 0.4536,
"step": 690000
},
{
"epoch": 17.6,
"learning_rate": 0.00012524809582976668,
"loss": 0.4532,
"step": 693000
},
{
"epoch": 17.68,
"learning_rate": 0.0001244784749877502,
"loss": 0.4541,
"step": 696000
},
{
"epoch": 17.75,
"learning_rate": 0.00012370885414573368,
"loss": 0.4535,
"step": 699000
},
{
"epoch": 17.83,
"learning_rate": 0.0001229392333037172,
"loss": 0.4516,
"step": 702000
},
{
"epoch": 17.91,
"learning_rate": 0.00012216961246170065,
"loss": 0.4519,
"step": 705000
},
{
"epoch": 17.98,
"learning_rate": 0.00012139999161968415,
"loss": 0.4577,
"step": 708000
},
{
"epoch": 18.06,
"learning_rate": 0.00012063037077766764,
"loss": 0.437,
"step": 711000
},
{
"epoch": 18.13,
"learning_rate": 0.00011986074993565114,
"loss": 0.437,
"step": 714000
},
{
"epoch": 18.21,
"learning_rate": 0.00011909112909363462,
"loss": 0.4363,
"step": 717000
},
{
"epoch": 18.29,
"learning_rate": 0.00011832150825161811,
"loss": 0.4365,
"step": 720000
},
{
"epoch": 18.36,
"learning_rate": 0.00011755188740960161,
"loss": 0.4351,
"step": 723000
},
{
"epoch": 18.44,
"learning_rate": 0.0001167822665675851,
"loss": 0.4356,
"step": 726000
},
{
"epoch": 18.51,
"learning_rate": 0.0001160126457255686,
"loss": 0.4404,
"step": 729000
},
{
"epoch": 18.59,
"learning_rate": 0.00011524302488355208,
"loss": 0.4386,
"step": 732000
},
{
"epoch": 18.67,
"learning_rate": 0.00011447340404153557,
"loss": 0.4345,
"step": 735000
},
{
"epoch": 18.74,
"learning_rate": 0.00011370378319951907,
"loss": 0.4362,
"step": 738000
},
{
"epoch": 18.82,
"learning_rate": 0.00011293416235750256,
"loss": 0.4397,
"step": 741000
},
{
"epoch": 18.9,
"learning_rate": 0.00011216454151548604,
"loss": 0.4408,
"step": 744000
},
{
"epoch": 18.97,
"learning_rate": 0.00011139492067346953,
"loss": 0.4406,
"step": 747000
},
{
"epoch": 19.05,
"learning_rate": 0.00011062529983145303,
"loss": 0.4269,
"step": 750000
},
{
"epoch": 19.12,
"learning_rate": 0.00010985567898943652,
"loss": 0.4185,
"step": 753000
},
{
"epoch": 19.2,
"learning_rate": 0.00010908605814742,
"loss": 0.4227,
"step": 756000
},
{
"epoch": 19.28,
"learning_rate": 0.0001083164373054035,
"loss": 0.4208,
"step": 759000
},
{
"epoch": 19.35,
"learning_rate": 0.00010754681646338699,
"loss": 0.4222,
"step": 762000
},
{
"epoch": 19.43,
"learning_rate": 0.00010677719562137049,
"loss": 0.4209,
"step": 765000
},
{
"epoch": 19.51,
"learning_rate": 0.00010600757477935397,
"loss": 0.4235,
"step": 768000
},
{
"epoch": 19.58,
"learning_rate": 0.00010523795393733746,
"loss": 0.4229,
"step": 771000
},
{
"epoch": 19.66,
"learning_rate": 0.00010446833309532096,
"loss": 0.4263,
"step": 774000
},
{
"epoch": 19.73,
"learning_rate": 0.00010369871225330445,
"loss": 0.4247,
"step": 777000
},
{
"epoch": 19.81,
"learning_rate": 0.00010292909141128793,
"loss": 0.4224,
"step": 780000
},
{
"epoch": 19.89,
"learning_rate": 0.00010215947056927142,
"loss": 0.422,
"step": 783000
},
{
"epoch": 19.96,
"learning_rate": 0.00010138984972725492,
"loss": 0.4247,
"step": 786000
},
{
"epoch": 20.04,
"learning_rate": 0.00010062022888523841,
"loss": 0.4111,
"step": 789000
},
{
"epoch": 20.11,
"learning_rate": 9.98506080432219e-05,
"loss": 0.4058,
"step": 792000
},
{
"epoch": 20.19,
"learning_rate": 9.908098720120539e-05,
"loss": 0.4069,
"step": 795000
},
{
"epoch": 20.27,
"learning_rate": 9.831136635918888e-05,
"loss": 0.4046,
"step": 798000
},
{
"epoch": 20.34,
"learning_rate": 9.754174551717238e-05,
"loss": 0.4051,
"step": 801000
},
{
"epoch": 20.42,
"learning_rate": 9.677212467515585e-05,
"loss": 0.4079,
"step": 804000
},
{
"epoch": 20.5,
"learning_rate": 9.600250383313935e-05,
"loss": 0.4045,
"step": 807000
},
{
"epoch": 20.57,
"learning_rate": 9.523288299112285e-05,
"loss": 0.4083,
"step": 810000
},
{
"epoch": 20.65,
"learning_rate": 9.446326214910634e-05,
"loss": 0.408,
"step": 813000
},
{
"epoch": 20.72,
"learning_rate": 9.369364130708982e-05,
"loss": 0.4074,
"step": 816000
},
{
"epoch": 20.8,
"learning_rate": 9.292402046507331e-05,
"loss": 0.41,
"step": 819000
},
{
"epoch": 20.88,
"learning_rate": 9.215439962305681e-05,
"loss": 0.4093,
"step": 822000
},
{
"epoch": 20.95,
"learning_rate": 9.13847787810403e-05,
"loss": 0.411,
"step": 825000
},
{
"epoch": 21.03,
"learning_rate": 9.061515793902378e-05,
"loss": 0.4029,
"step": 828000
},
{
"epoch": 21.11,
"learning_rate": 8.984553709700728e-05,
"loss": 0.3928,
"step": 831000
},
{
"epoch": 21.18,
"learning_rate": 8.907591625499077e-05,
"loss": 0.3953,
"step": 834000
},
{
"epoch": 21.26,
"learning_rate": 8.830629541297427e-05,
"loss": 0.3899,
"step": 837000
},
{
"epoch": 21.33,
"learning_rate": 8.753667457095774e-05,
"loss": 0.3938,
"step": 840000
},
{
"epoch": 21.41,
"learning_rate": 8.676705372894124e-05,
"loss": 0.3958,
"step": 843000
},
{
"epoch": 21.49,
"learning_rate": 8.599743288692473e-05,
"loss": 0.3952,
"step": 846000
},
{
"epoch": 21.56,
"learning_rate": 8.522781204490823e-05,
"loss": 0.3959,
"step": 849000
},
{
"epoch": 21.64,
"learning_rate": 8.445819120289172e-05,
"loss": 0.3989,
"step": 852000
},
{
"epoch": 21.71,
"learning_rate": 8.36885703608752e-05,
"loss": 0.3976,
"step": 855000
},
{
"epoch": 21.79,
"learning_rate": 8.29189495188587e-05,
"loss": 0.3935,
"step": 858000
},
{
"epoch": 21.87,
"learning_rate": 8.214932867684219e-05,
"loss": 0.3951,
"step": 861000
},
{
"epoch": 21.94,
"learning_rate": 8.137970783482569e-05,
"loss": 0.3976,
"step": 864000
},
{
"epoch": 22.02,
"learning_rate": 8.061008699280916e-05,
"loss": 0.388,
"step": 867000
},
{
"epoch": 22.1,
"learning_rate": 7.984046615079266e-05,
"loss": 0.3745,
"step": 870000
},
{
"epoch": 22.17,
"learning_rate": 7.907084530877616e-05,
"loss": 0.382,
"step": 873000
},
{
"epoch": 22.25,
"learning_rate": 7.830122446675965e-05,
"loss": 0.3833,
"step": 876000
},
{
"epoch": 22.32,
"learning_rate": 7.753160362474313e-05,
"loss": 0.3837,
"step": 879000
},
{
"epoch": 22.4,
"learning_rate": 7.676198278272662e-05,
"loss": 0.3792,
"step": 882000
},
{
"epoch": 22.48,
"learning_rate": 7.599236194071012e-05,
"loss": 0.3826,
"step": 885000
},
{
"epoch": 22.55,
"learning_rate": 7.522274109869361e-05,
"loss": 0.3802,
"step": 888000
},
{
"epoch": 22.63,
"learning_rate": 7.44531202566771e-05,
"loss": 0.3796,
"step": 891000
},
{
"epoch": 22.71,
"learning_rate": 7.368349941466058e-05,
"loss": 0.383,
"step": 894000
},
{
"epoch": 22.78,
"learning_rate": 7.291387857264408e-05,
"loss": 0.3804,
"step": 897000
},
{
"epoch": 22.86,
"learning_rate": 7.214425773062756e-05,
"loss": 0.3802,
"step": 900000
},
{
"epoch": 22.93,
"learning_rate": 7.137463688861106e-05,
"loss": 0.383,
"step": 903000
},
{
"epoch": 23.01,
"learning_rate": 7.060501604659455e-05,
"loss": 0.3775,
"step": 906000
},
{
"epoch": 23.09,
"learning_rate": 6.983539520457805e-05,
"loss": 0.3663,
"step": 909000
},
{
"epoch": 23.16,
"learning_rate": 6.906577436256153e-05,
"loss": 0.3634,
"step": 912000
},
{
"epoch": 23.24,
"learning_rate": 6.829615352054503e-05,
"loss": 0.3691,
"step": 915000
},
{
"epoch": 23.31,
"learning_rate": 6.752653267852851e-05,
"loss": 0.3721,
"step": 918000
},
{
"epoch": 23.39,
"learning_rate": 6.6756911836512e-05,
"loss": 0.3694,
"step": 921000
},
{
"epoch": 23.47,
"learning_rate": 6.59872909944955e-05,
"loss": 0.371,
"step": 924000
},
{
"epoch": 23.54,
"learning_rate": 6.521767015247898e-05,
"loss": 0.3668,
"step": 927000
},
{
"epoch": 23.62,
"learning_rate": 6.444804931046247e-05,
"loss": 0.3701,
"step": 930000
},
{
"epoch": 23.7,
"learning_rate": 6.367842846844597e-05,
"loss": 0.37,
"step": 933000
},
{
"epoch": 23.77,
"learning_rate": 6.290880762642945e-05,
"loss": 0.3682,
"step": 936000
},
{
"epoch": 23.85,
"learning_rate": 6.213918678441295e-05,
"loss": 0.3698,
"step": 939000
},
{
"epoch": 23.92,
"learning_rate": 6.136956594239644e-05,
"loss": 0.3681,
"step": 942000
},
{
"epoch": 24.0,
"learning_rate": 6.0599945100379935e-05,
"loss": 0.3685,
"step": 945000
},
{
"epoch": 24.08,
"learning_rate": 5.983032425836342e-05,
"loss": 0.3552,
"step": 948000
},
{
"epoch": 24.15,
"learning_rate": 5.906070341634691e-05,
"loss": 0.3553,
"step": 951000
},
{
"epoch": 24.23,
"learning_rate": 5.82910825743304e-05,
"loss": 0.3523,
"step": 954000
},
{
"epoch": 24.31,
"learning_rate": 5.7521461732313896e-05,
"loss": 0.3557,
"step": 957000
},
{
"epoch": 24.38,
"learning_rate": 5.675184089029738e-05,
"loss": 0.3573,
"step": 960000
},
{
"epoch": 24.46,
"learning_rate": 5.5982220048280873e-05,
"loss": 0.357,
"step": 963000
},
{
"epoch": 24.53,
"learning_rate": 5.5212599206264365e-05,
"loss": 0.358,
"step": 966000
},
{
"epoch": 24.61,
"learning_rate": 5.444297836424786e-05,
"loss": 0.3572,
"step": 969000
},
{
"epoch": 24.69,
"learning_rate": 5.367335752223134e-05,
"loss": 0.3589,
"step": 972000
},
{
"epoch": 24.76,
"learning_rate": 5.290373668021484e-05,
"loss": 0.3585,
"step": 975000
},
{
"epoch": 24.84,
"learning_rate": 5.213411583819833e-05,
"loss": 0.3544,
"step": 978000
},
{
"epoch": 24.91,
"learning_rate": 5.136449499618182e-05,
"loss": 0.3542,
"step": 981000
},
{
"epoch": 24.99,
"learning_rate": 5.059487415416532e-05,
"loss": 0.358,
"step": 984000
},
{
"epoch": 25.07,
"learning_rate": 4.98252533121488e-05,
"loss": 0.3488,
"step": 987000
},
{
"epoch": 25.14,
"learning_rate": 4.9055632470132294e-05,
"loss": 0.346,
"step": 990000
},
{
"epoch": 25.22,
"learning_rate": 4.8286011628115786e-05,
"loss": 0.3437,
"step": 993000
},
{
"epoch": 25.3,
"learning_rate": 4.751639078609928e-05,
"loss": 0.3474,
"step": 996000
},
{
"epoch": 25.37,
"learning_rate": 4.674676994408276e-05,
"loss": 0.3452,
"step": 999000
},
{
"epoch": 25.45,
"learning_rate": 4.597714910206626e-05,
"loss": 0.3439,
"step": 1002000
},
{
"epoch": 25.52,
"learning_rate": 4.520752826004975e-05,
"loss": 0.3475,
"step": 1005000
},
{
"epoch": 25.6,
"learning_rate": 4.443790741803324e-05,
"loss": 0.3441,
"step": 1008000
},
{
"epoch": 25.68,
"learning_rate": 4.366828657601673e-05,
"loss": 0.345,
"step": 1011000
},
{
"epoch": 25.75,
"learning_rate": 4.289866573400022e-05,
"loss": 0.3467,
"step": 1014000
},
{
"epoch": 25.83,
"learning_rate": 4.212904489198371e-05,
"loss": 0.3473,
"step": 1017000
},
{
"epoch": 25.91,
"learning_rate": 4.135942404996721e-05,
"loss": 0.3435,
"step": 1020000
},
{
"epoch": 25.98,
"learning_rate": 4.058980320795069e-05,
"loss": 0.3441,
"step": 1023000
},
{
"epoch": 26.06,
"learning_rate": 3.9820182365934184e-05,
"loss": 0.3416,
"step": 1026000
},
{
"epoch": 26.13,
"learning_rate": 3.9050561523917676e-05,
"loss": 0.3348,
"step": 1029000
},
{
"epoch": 26.21,
"learning_rate": 3.828094068190117e-05,
"loss": 0.3382,
"step": 1032000
},
{
"epoch": 26.29,
"learning_rate": 3.751131983988465e-05,
"loss": 0.3383,
"step": 1035000
},
{
"epoch": 26.36,
"learning_rate": 3.6741698997868145e-05,
"loss": 0.3358,
"step": 1038000
},
{
"epoch": 26.44,
"learning_rate": 3.597207815585164e-05,
"loss": 0.3355,
"step": 1041000
},
{
"epoch": 26.51,
"learning_rate": 3.520245731383513e-05,
"loss": 0.3377,
"step": 1044000
},
{
"epoch": 26.59,
"learning_rate": 3.443283647181862e-05,
"loss": 0.3358,
"step": 1047000
},
{
"epoch": 26.67,
"learning_rate": 3.366321562980211e-05,
"loss": 0.336,
"step": 1050000
},
{
"epoch": 26.74,
"learning_rate": 3.2893594787785605e-05,
"loss": 0.336,
"step": 1053000
},
{
"epoch": 26.82,
"learning_rate": 3.21239739457691e-05,
"loss": 0.3329,
"step": 1056000
},
{
"epoch": 26.9,
"learning_rate": 3.135435310375258e-05,
"loss": 0.3364,
"step": 1059000
},
{
"epoch": 26.97,
"learning_rate": 3.0584732261736074e-05,
"loss": 0.3354,
"step": 1062000
},
{
"epoch": 27.05,
"learning_rate": 2.9815111419719566e-05,
"loss": 0.3311,
"step": 1065000
},
{
"epoch": 27.12,
"learning_rate": 2.9045490577703058e-05,
"loss": 0.3287,
"step": 1068000
},
{
"epoch": 27.2,
"learning_rate": 2.8275869735686546e-05,
"loss": 0.3288,
"step": 1071000
},
{
"epoch": 27.28,
"learning_rate": 2.7506248893670038e-05,
"loss": 0.3261,
"step": 1074000
},
{
"epoch": 27.35,
"learning_rate": 2.673662805165353e-05,
"loss": 0.3272,
"step": 1077000
},
{
"epoch": 27.43,
"learning_rate": 2.596700720963702e-05,
"loss": 0.3306,
"step": 1080000
},
{
"epoch": 27.51,
"learning_rate": 2.519738636762051e-05,
"loss": 0.3268,
"step": 1083000
},
{
"epoch": 27.58,
"learning_rate": 2.4427765525604003e-05,
"loss": 0.3256,
"step": 1086000
},
{
"epoch": 27.66,
"learning_rate": 2.365814468358749e-05,
"loss": 0.325,
"step": 1089000
},
{
"epoch": 27.73,
"learning_rate": 2.2888523841570983e-05,
"loss": 0.3258,
"step": 1092000
},
{
"epoch": 27.81,
"learning_rate": 2.2118902999554475e-05,
"loss": 0.3249,
"step": 1095000
},
{
"epoch": 27.89,
"learning_rate": 2.1349282157537964e-05,
"loss": 0.3272,
"step": 1098000
},
{
"epoch": 27.96,
"learning_rate": 2.0579661315521456e-05,
"loss": 0.3233,
"step": 1101000
},
{
"epoch": 28.04,
"learning_rate": 1.9810040473504944e-05,
"loss": 0.3239,
"step": 1104000
},
{
"epoch": 28.11,
"learning_rate": 1.9040419631488436e-05,
"loss": 0.3223,
"step": 1107000
},
{
"epoch": 28.19,
"learning_rate": 1.8270798789471928e-05,
"loss": 0.3188,
"step": 1110000
},
{
"epoch": 28.27,
"learning_rate": 1.7501177947455417e-05,
"loss": 0.3191,
"step": 1113000
},
{
"epoch": 28.34,
"learning_rate": 1.673155710543891e-05,
"loss": 0.3201,
"step": 1116000
},
{
"epoch": 28.42,
"learning_rate": 1.59619362634224e-05,
"loss": 0.32,
"step": 1119000
},
{
"epoch": 28.5,
"learning_rate": 1.519231542140589e-05,
"loss": 0.318,
"step": 1122000
},
{
"epoch": 28.57,
"learning_rate": 1.4422694579389381e-05,
"loss": 0.3171,
"step": 1125000
},
{
"epoch": 28.65,
"learning_rate": 1.3653073737372871e-05,
"loss": 0.3195,
"step": 1128000
},
{
"epoch": 28.72,
"learning_rate": 1.2883452895356363e-05,
"loss": 0.3199,
"step": 1131000
},
{
"epoch": 28.8,
"learning_rate": 1.2113832053339853e-05,
"loss": 0.3201,
"step": 1134000
},
{
"epoch": 28.88,
"learning_rate": 1.1344211211323345e-05,
"loss": 0.3207,
"step": 1137000
},
{
"epoch": 28.95,
"learning_rate": 1.0574590369306837e-05,
"loss": 0.3172,
"step": 1140000
}
],
"logging_steps": 3000,
"max_steps": 1181220,
"num_train_epochs": 30,
"save_steps": 500,
"total_flos": 2.3187897195637338e+21,
"trial_name": null,
"trial_params": null
}