zhko_xlsr_100p_run1 / trainer_state.json
yesj1234's picture
Upload folder using huggingface_hub
f121c04
raw
history blame
29 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 689340,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09,
"learning_rate": 0.00013054830287206266,
"loss": 18.218,
"step": 3000
},
{
"epoch": 0.17,
"learning_rate": 0.0002610966057441253,
"loss": 4.1185,
"step": 6000
},
{
"epoch": 0.26,
"learning_rate": 0.0002990742124651621,
"loss": 3.2662,
"step": 9000
},
{
"epoch": 0.35,
"learning_rate": 0.00029775542680299976,
"loss": 3.0539,
"step": 12000
},
{
"epoch": 0.44,
"learning_rate": 0.0002964366411408375,
"loss": 2.9401,
"step": 15000
},
{
"epoch": 0.52,
"learning_rate": 0.0002951178554786752,
"loss": 2.8804,
"step": 18000
},
{
"epoch": 0.61,
"learning_rate": 0.00029379906981651295,
"loss": 2.8248,
"step": 21000
},
{
"epoch": 0.7,
"learning_rate": 0.0002924802841543507,
"loss": 2.7855,
"step": 24000
},
{
"epoch": 0.78,
"learning_rate": 0.0002911614984921884,
"loss": 2.7762,
"step": 27000
},
{
"epoch": 0.87,
"learning_rate": 0.0002898427128300261,
"loss": 2.7404,
"step": 30000
},
{
"epoch": 0.96,
"learning_rate": 0.0002885239271678638,
"loss": 2.7029,
"step": 33000
},
{
"epoch": 1.04,
"learning_rate": 0.00028720514150570154,
"loss": 2.6612,
"step": 36000
},
{
"epoch": 1.13,
"learning_rate": 0.0002858863558435392,
"loss": 2.6141,
"step": 39000
},
{
"epoch": 1.22,
"learning_rate": 0.000284567570181377,
"loss": 2.5986,
"step": 42000
},
{
"epoch": 1.31,
"learning_rate": 0.00028324878451921467,
"loss": 2.5744,
"step": 45000
},
{
"epoch": 1.39,
"learning_rate": 0.0002819299988570524,
"loss": 2.5785,
"step": 48000
},
{
"epoch": 1.48,
"learning_rate": 0.00028061121319489013,
"loss": 2.5643,
"step": 51000
},
{
"epoch": 1.57,
"learning_rate": 0.0002792924275327278,
"loss": 2.569,
"step": 54000
},
{
"epoch": 1.65,
"learning_rate": 0.00027797364187056553,
"loss": 2.5455,
"step": 57000
},
{
"epoch": 1.74,
"learning_rate": 0.00027665485620840326,
"loss": 2.5417,
"step": 60000
},
{
"epoch": 1.83,
"learning_rate": 0.000275336070546241,
"loss": 2.5095,
"step": 63000
},
{
"epoch": 1.91,
"learning_rate": 0.0002740172848840787,
"loss": 2.5052,
"step": 66000
},
{
"epoch": 2.0,
"learning_rate": 0.00027269849922191645,
"loss": 2.5086,
"step": 69000
},
{
"epoch": 2.09,
"learning_rate": 0.0002713797135597541,
"loss": 2.4391,
"step": 72000
},
{
"epoch": 2.18,
"learning_rate": 0.00027006092789759185,
"loss": 2.4283,
"step": 75000
},
{
"epoch": 2.26,
"learning_rate": 0.0002687421422354296,
"loss": 2.4079,
"step": 78000
},
{
"epoch": 2.35,
"learning_rate": 0.0002674233565732673,
"loss": 2.4065,
"step": 81000
},
{
"epoch": 2.44,
"learning_rate": 0.00026610457091110504,
"loss": 2.3893,
"step": 84000
},
{
"epoch": 2.52,
"learning_rate": 0.00026478578524894277,
"loss": 2.4166,
"step": 87000
},
{
"epoch": 2.61,
"learning_rate": 0.00026346699958678044,
"loss": 2.3907,
"step": 90000
},
{
"epoch": 2.7,
"learning_rate": 0.00026214821392461817,
"loss": 2.3829,
"step": 93000
},
{
"epoch": 2.79,
"learning_rate": 0.0002608294282624559,
"loss": 2.3767,
"step": 96000
},
{
"epoch": 2.87,
"learning_rate": 0.00025951064260029363,
"loss": 2.3518,
"step": 99000
},
{
"epoch": 2.96,
"learning_rate": 0.00025819185693813136,
"loss": 2.3734,
"step": 102000
},
{
"epoch": 3.05,
"learning_rate": 0.0002568730712759691,
"loss": 2.3126,
"step": 105000
},
{
"epoch": 3.13,
"learning_rate": 0.00025555428561380676,
"loss": 2.2802,
"step": 108000
},
{
"epoch": 3.22,
"learning_rate": 0.0002542354999516445,
"loss": 2.2739,
"step": 111000
},
{
"epoch": 3.31,
"learning_rate": 0.0002529167142894822,
"loss": 2.2588,
"step": 114000
},
{
"epoch": 3.39,
"learning_rate": 0.00025159792862731995,
"loss": 2.2573,
"step": 117000
},
{
"epoch": 3.48,
"learning_rate": 0.0002502791429651577,
"loss": 2.2541,
"step": 120000
},
{
"epoch": 3.57,
"learning_rate": 0.0002489603573029954,
"loss": 2.2843,
"step": 123000
},
{
"epoch": 3.66,
"learning_rate": 0.0002476415716408331,
"loss": 2.2548,
"step": 126000
},
{
"epoch": 3.74,
"learning_rate": 0.0002463227859786708,
"loss": 2.252,
"step": 129000
},
{
"epoch": 3.83,
"learning_rate": 0.00024500400031650854,
"loss": 2.2677,
"step": 132000
},
{
"epoch": 3.92,
"learning_rate": 0.00024368521465434624,
"loss": 2.2497,
"step": 135000
},
{
"epoch": 4.0,
"learning_rate": 0.00024236642899218397,
"loss": 2.2434,
"step": 138000
},
{
"epoch": 4.09,
"learning_rate": 0.0002410476433300217,
"loss": 2.1625,
"step": 141000
},
{
"epoch": 4.18,
"learning_rate": 0.0002397288576678594,
"loss": 2.1795,
"step": 144000
},
{
"epoch": 4.26,
"learning_rate": 0.00023841007200569713,
"loss": 2.1666,
"step": 147000
},
{
"epoch": 4.35,
"learning_rate": 0.00023709128634353483,
"loss": 2.1749,
"step": 150000
},
{
"epoch": 4.44,
"learning_rate": 0.00023577250068137256,
"loss": 2.1931,
"step": 153000
},
{
"epoch": 4.53,
"learning_rate": 0.0002344537150192103,
"loss": 2.1797,
"step": 156000
},
{
"epoch": 4.61,
"learning_rate": 0.000233134929357048,
"loss": 2.1573,
"step": 159000
},
{
"epoch": 4.7,
"learning_rate": 0.00023181614369488572,
"loss": 2.1811,
"step": 162000
},
{
"epoch": 4.79,
"learning_rate": 0.00023049735803272345,
"loss": 2.1539,
"step": 165000
},
{
"epoch": 4.87,
"learning_rate": 0.00022917857237056115,
"loss": 2.1657,
"step": 168000
},
{
"epoch": 4.96,
"learning_rate": 0.00022785978670839888,
"loss": 2.1662,
"step": 171000
},
{
"epoch": 5.05,
"learning_rate": 0.0002265410010462366,
"loss": 2.1237,
"step": 174000
},
{
"epoch": 5.14,
"learning_rate": 0.0002252222153840743,
"loss": 2.0812,
"step": 177000
},
{
"epoch": 5.22,
"learning_rate": 0.00022390342972191204,
"loss": 2.0917,
"step": 180000
},
{
"epoch": 5.31,
"learning_rate": 0.00022258464405974977,
"loss": 2.09,
"step": 183000
},
{
"epoch": 5.4,
"learning_rate": 0.00022126585839758747,
"loss": 2.0931,
"step": 186000
},
{
"epoch": 5.48,
"learning_rate": 0.0002199470727354252,
"loss": 2.0907,
"step": 189000
},
{
"epoch": 5.57,
"learning_rate": 0.00021862828707326293,
"loss": 2.0855,
"step": 192000
},
{
"epoch": 5.66,
"learning_rate": 0.00021730950141110063,
"loss": 2.0946,
"step": 195000
},
{
"epoch": 5.74,
"learning_rate": 0.00021599071574893836,
"loss": 2.0904,
"step": 198000
},
{
"epoch": 5.83,
"learning_rate": 0.0002146719300867761,
"loss": 2.0788,
"step": 201000
},
{
"epoch": 5.92,
"learning_rate": 0.0002133531444246138,
"loss": 2.0771,
"step": 204000
},
{
"epoch": 6.01,
"learning_rate": 0.00021203435876245152,
"loss": 2.0746,
"step": 207000
},
{
"epoch": 6.09,
"learning_rate": 0.00021071557310028925,
"loss": 2.0091,
"step": 210000
},
{
"epoch": 6.18,
"learning_rate": 0.00020939678743812695,
"loss": 2.0144,
"step": 213000
},
{
"epoch": 6.27,
"learning_rate": 0.00020807800177596468,
"loss": 2.0083,
"step": 216000
},
{
"epoch": 6.35,
"learning_rate": 0.0002067592161138024,
"loss": 2.0337,
"step": 219000
},
{
"epoch": 6.44,
"learning_rate": 0.0002054404304516401,
"loss": 2.0169,
"step": 222000
},
{
"epoch": 6.53,
"learning_rate": 0.00020412164478947784,
"loss": 2.0264,
"step": 225000
},
{
"epoch": 6.62,
"learning_rate": 0.00020280285912731557,
"loss": 2.0089,
"step": 228000
},
{
"epoch": 6.7,
"learning_rate": 0.00020148407346515327,
"loss": 2.0148,
"step": 231000
},
{
"epoch": 6.79,
"learning_rate": 0.000200165287802991,
"loss": 2.0224,
"step": 234000
},
{
"epoch": 6.88,
"learning_rate": 0.00019884650214082873,
"loss": 2.0242,
"step": 237000
},
{
"epoch": 6.96,
"learning_rate": 0.00019752771647866643,
"loss": 2.0142,
"step": 240000
},
{
"epoch": 7.05,
"learning_rate": 0.00019620893081650416,
"loss": 1.9622,
"step": 243000
},
{
"epoch": 7.14,
"learning_rate": 0.0001948901451543419,
"loss": 1.9643,
"step": 246000
},
{
"epoch": 7.22,
"learning_rate": 0.0001935713594921796,
"loss": 1.9589,
"step": 249000
},
{
"epoch": 7.31,
"learning_rate": 0.00019225257383001732,
"loss": 1.9411,
"step": 252000
},
{
"epoch": 7.4,
"learning_rate": 0.000190933788167855,
"loss": 1.956,
"step": 255000
},
{
"epoch": 7.49,
"learning_rate": 0.00018961500250569275,
"loss": 1.9596,
"step": 258000
},
{
"epoch": 7.57,
"learning_rate": 0.00018829621684353048,
"loss": 1.9373,
"step": 261000
},
{
"epoch": 7.66,
"learning_rate": 0.00018697743118136815,
"loss": 1.9532,
"step": 264000
},
{
"epoch": 7.75,
"learning_rate": 0.00018565864551920588,
"loss": 1.9669,
"step": 267000
},
{
"epoch": 7.83,
"learning_rate": 0.00018433985985704364,
"loss": 1.957,
"step": 270000
},
{
"epoch": 7.92,
"learning_rate": 0.0001830210741948813,
"loss": 1.977,
"step": 273000
},
{
"epoch": 8.01,
"learning_rate": 0.00018170228853271904,
"loss": 1.9712,
"step": 276000
},
{
"epoch": 8.09,
"learning_rate": 0.0001803835028705568,
"loss": 1.8922,
"step": 279000
},
{
"epoch": 8.18,
"learning_rate": 0.00017906471720839447,
"loss": 1.9054,
"step": 282000
},
{
"epoch": 8.27,
"learning_rate": 0.0001777459315462322,
"loss": 1.8847,
"step": 285000
},
{
"epoch": 8.36,
"learning_rate": 0.00017642714588406993,
"loss": 1.896,
"step": 288000
},
{
"epoch": 8.44,
"learning_rate": 0.00017510836022190763,
"loss": 1.9139,
"step": 291000
},
{
"epoch": 8.53,
"learning_rate": 0.00017378957455974536,
"loss": 1.9086,
"step": 294000
},
{
"epoch": 8.62,
"learning_rate": 0.0001724707888975831,
"loss": 1.9158,
"step": 297000
},
{
"epoch": 8.7,
"learning_rate": 0.0001711520032354208,
"loss": 1.908,
"step": 300000
},
{
"epoch": 8.79,
"learning_rate": 0.00016983321757325852,
"loss": 1.9034,
"step": 303000
},
{
"epoch": 8.88,
"learning_rate": 0.00016851443191109625,
"loss": 1.9022,
"step": 306000
},
{
"epoch": 8.97,
"learning_rate": 0.00016719564624893395,
"loss": 1.9045,
"step": 309000
},
{
"epoch": 9.05,
"learning_rate": 0.00016587686058677168,
"loss": 1.8607,
"step": 312000
},
{
"epoch": 9.14,
"learning_rate": 0.0001645580749246094,
"loss": 1.8439,
"step": 315000
},
{
"epoch": 9.23,
"learning_rate": 0.0001632392892624471,
"loss": 1.8252,
"step": 318000
},
{
"epoch": 9.31,
"learning_rate": 0.00016192050360028484,
"loss": 1.844,
"step": 321000
},
{
"epoch": 9.4,
"learning_rate": 0.00016060171793812257,
"loss": 1.839,
"step": 324000
},
{
"epoch": 9.49,
"learning_rate": 0.00015928293227596027,
"loss": 1.8442,
"step": 327000
},
{
"epoch": 9.57,
"learning_rate": 0.000157964146613798,
"loss": 1.8378,
"step": 330000
},
{
"epoch": 9.66,
"learning_rate": 0.00015664536095163573,
"loss": 1.8436,
"step": 333000
},
{
"epoch": 9.75,
"learning_rate": 0.00015532657528947343,
"loss": 1.8399,
"step": 336000
},
{
"epoch": 9.84,
"learning_rate": 0.00015400778962731116,
"loss": 1.8357,
"step": 339000
},
{
"epoch": 9.92,
"learning_rate": 0.0001526890039651489,
"loss": 1.8406,
"step": 342000
},
{
"epoch": 10.01,
"learning_rate": 0.0001513702183029866,
"loss": 1.8385,
"step": 345000
},
{
"epoch": 10.1,
"learning_rate": 0.00015005143264082432,
"loss": 1.7886,
"step": 348000
},
{
"epoch": 10.18,
"learning_rate": 0.00014873264697866202,
"loss": 1.7909,
"step": 351000
},
{
"epoch": 10.27,
"learning_rate": 0.00014741386131649975,
"loss": 1.794,
"step": 354000
},
{
"epoch": 10.36,
"learning_rate": 0.00014609507565433748,
"loss": 1.7712,
"step": 357000
},
{
"epoch": 10.44,
"learning_rate": 0.00014477628999217518,
"loss": 1.7875,
"step": 360000
},
{
"epoch": 10.53,
"learning_rate": 0.0001434575043300129,
"loss": 1.786,
"step": 363000
},
{
"epoch": 10.62,
"learning_rate": 0.00014213871866785064,
"loss": 1.7925,
"step": 366000
},
{
"epoch": 10.71,
"learning_rate": 0.00014081993300568834,
"loss": 1.7875,
"step": 369000
},
{
"epoch": 10.79,
"learning_rate": 0.00013950114734352607,
"loss": 1.7752,
"step": 372000
},
{
"epoch": 10.88,
"learning_rate": 0.0001381823616813638,
"loss": 1.791,
"step": 375000
},
{
"epoch": 10.97,
"learning_rate": 0.0001368635760192015,
"loss": 1.7857,
"step": 378000
},
{
"epoch": 11.05,
"learning_rate": 0.00013554479035703923,
"loss": 1.7407,
"step": 381000
},
{
"epoch": 11.14,
"learning_rate": 0.00013422600469487693,
"loss": 1.7115,
"step": 384000
},
{
"epoch": 11.23,
"learning_rate": 0.00013290721903271466,
"loss": 1.7294,
"step": 387000
},
{
"epoch": 11.32,
"learning_rate": 0.0001315884333705524,
"loss": 1.7205,
"step": 390000
},
{
"epoch": 11.4,
"learning_rate": 0.0001302696477083901,
"loss": 1.7281,
"step": 393000
},
{
"epoch": 11.49,
"learning_rate": 0.00012895086204622782,
"loss": 1.7418,
"step": 396000
},
{
"epoch": 11.58,
"learning_rate": 0.00012763207638406555,
"loss": 1.7316,
"step": 399000
},
{
"epoch": 11.66,
"learning_rate": 0.00012631329072190325,
"loss": 1.7348,
"step": 402000
},
{
"epoch": 11.75,
"learning_rate": 0.00012499450505974098,
"loss": 1.7392,
"step": 405000
},
{
"epoch": 11.84,
"learning_rate": 0.0001236757193975787,
"loss": 1.7341,
"step": 408000
},
{
"epoch": 11.92,
"learning_rate": 0.0001223569337354164,
"loss": 1.7347,
"step": 411000
},
{
"epoch": 12.01,
"learning_rate": 0.00012103814807325414,
"loss": 1.7196,
"step": 414000
},
{
"epoch": 12.1,
"learning_rate": 0.00011971936241109186,
"loss": 1.6614,
"step": 417000
},
{
"epoch": 12.19,
"learning_rate": 0.00011840057674892957,
"loss": 1.6671,
"step": 420000
},
{
"epoch": 12.27,
"learning_rate": 0.0001170817910867673,
"loss": 1.6665,
"step": 423000
},
{
"epoch": 12.36,
"learning_rate": 0.00011576300542460502,
"loss": 1.6775,
"step": 426000
},
{
"epoch": 12.45,
"learning_rate": 0.00011444421976244273,
"loss": 1.6646,
"step": 429000
},
{
"epoch": 12.53,
"learning_rate": 0.00011312543410028046,
"loss": 1.6779,
"step": 432000
},
{
"epoch": 12.62,
"learning_rate": 0.00011180664843811818,
"loss": 1.6802,
"step": 435000
},
{
"epoch": 12.71,
"learning_rate": 0.00011048786277595589,
"loss": 1.6759,
"step": 438000
},
{
"epoch": 12.79,
"learning_rate": 0.0001091690771137936,
"loss": 1.6729,
"step": 441000
},
{
"epoch": 12.88,
"learning_rate": 0.00010785029145163134,
"loss": 1.6769,
"step": 444000
},
{
"epoch": 12.97,
"learning_rate": 0.00010653150578946905,
"loss": 1.6721,
"step": 447000
},
{
"epoch": 13.06,
"learning_rate": 0.00010521272012730677,
"loss": 1.6302,
"step": 450000
},
{
"epoch": 13.14,
"learning_rate": 0.0001038939344651445,
"loss": 1.6112,
"step": 453000
},
{
"epoch": 13.23,
"learning_rate": 0.00010257514880298221,
"loss": 1.5991,
"step": 456000
},
{
"epoch": 13.32,
"learning_rate": 0.00010125636314081991,
"loss": 1.6221,
"step": 459000
},
{
"epoch": 13.4,
"learning_rate": 9.993757747865765e-05,
"loss": 1.6168,
"step": 462000
},
{
"epoch": 13.49,
"learning_rate": 9.861879181649536e-05,
"loss": 1.6166,
"step": 465000
},
{
"epoch": 13.58,
"learning_rate": 9.730000615433307e-05,
"loss": 1.6234,
"step": 468000
},
{
"epoch": 13.67,
"learning_rate": 9.59812204921708e-05,
"loss": 1.6245,
"step": 471000
},
{
"epoch": 13.75,
"learning_rate": 9.466243483000852e-05,
"loss": 1.618,
"step": 474000
},
{
"epoch": 13.84,
"learning_rate": 9.334364916784623e-05,
"loss": 1.6341,
"step": 477000
},
{
"epoch": 13.93,
"learning_rate": 9.202486350568396e-05,
"loss": 1.6353,
"step": 480000
},
{
"epoch": 14.01,
"learning_rate": 9.070607784352168e-05,
"loss": 1.6104,
"step": 483000
},
{
"epoch": 14.1,
"learning_rate": 8.938729218135939e-05,
"loss": 1.562,
"step": 486000
},
{
"epoch": 14.19,
"learning_rate": 8.806850651919711e-05,
"loss": 1.5654,
"step": 489000
},
{
"epoch": 14.27,
"learning_rate": 8.674972085703484e-05,
"loss": 1.564,
"step": 492000
},
{
"epoch": 14.36,
"learning_rate": 8.543093519487255e-05,
"loss": 1.5606,
"step": 495000
},
{
"epoch": 14.45,
"learning_rate": 8.411214953271027e-05,
"loss": 1.5764,
"step": 498000
},
{
"epoch": 14.54,
"learning_rate": 8.2793363870548e-05,
"loss": 1.5718,
"step": 501000
},
{
"epoch": 14.62,
"learning_rate": 8.147457820838571e-05,
"loss": 1.5768,
"step": 504000
},
{
"epoch": 14.71,
"learning_rate": 8.015579254622343e-05,
"loss": 1.5686,
"step": 507000
},
{
"epoch": 14.8,
"learning_rate": 7.883700688406116e-05,
"loss": 1.5753,
"step": 510000
},
{
"epoch": 14.88,
"learning_rate": 7.751822122189887e-05,
"loss": 1.5588,
"step": 513000
},
{
"epoch": 14.97,
"learning_rate": 7.619943555973659e-05,
"loss": 1.5588,
"step": 516000
},
{
"epoch": 15.06,
"learning_rate": 7.48806498975743e-05,
"loss": 1.5413,
"step": 519000
},
{
"epoch": 15.14,
"learning_rate": 7.356186423541203e-05,
"loss": 1.5102,
"step": 522000
},
{
"epoch": 15.23,
"learning_rate": 7.224307857324975e-05,
"loss": 1.5183,
"step": 525000
},
{
"epoch": 15.32,
"learning_rate": 7.092429291108746e-05,
"loss": 1.5267,
"step": 528000
},
{
"epoch": 15.41,
"learning_rate": 6.960550724892519e-05,
"loss": 1.5185,
"step": 531000
},
{
"epoch": 15.49,
"learning_rate": 6.828672158676289e-05,
"loss": 1.5195,
"step": 534000
},
{
"epoch": 15.58,
"learning_rate": 6.696793592460062e-05,
"loss": 1.5154,
"step": 537000
},
{
"epoch": 15.67,
"learning_rate": 6.564915026243834e-05,
"loss": 1.5312,
"step": 540000
},
{
"epoch": 15.75,
"learning_rate": 6.433036460027605e-05,
"loss": 1.5191,
"step": 543000
},
{
"epoch": 15.84,
"learning_rate": 6.301157893811378e-05,
"loss": 1.5209,
"step": 546000
},
{
"epoch": 15.93,
"learning_rate": 6.16927932759515e-05,
"loss": 1.5145,
"step": 549000
},
{
"epoch": 16.02,
"learning_rate": 6.037400761378922e-05,
"loss": 1.5147,
"step": 552000
},
{
"epoch": 16.1,
"learning_rate": 5.905522195162694e-05,
"loss": 1.47,
"step": 555000
},
{
"epoch": 16.19,
"learning_rate": 5.773643628946465e-05,
"loss": 1.4766,
"step": 558000
},
{
"epoch": 16.28,
"learning_rate": 5.641765062730237e-05,
"loss": 1.4688,
"step": 561000
},
{
"epoch": 16.36,
"learning_rate": 5.5098864965140094e-05,
"loss": 1.4727,
"step": 564000
},
{
"epoch": 16.45,
"learning_rate": 5.378007930297781e-05,
"loss": 1.4673,
"step": 567000
},
{
"epoch": 16.54,
"learning_rate": 5.246129364081553e-05,
"loss": 1.4728,
"step": 570000
},
{
"epoch": 16.62,
"learning_rate": 5.1142507978653254e-05,
"loss": 1.4751,
"step": 573000
},
{
"epoch": 16.71,
"learning_rate": 4.982372231649097e-05,
"loss": 1.4652,
"step": 576000
},
{
"epoch": 16.8,
"learning_rate": 4.850493665432869e-05,
"loss": 1.4783,
"step": 579000
},
{
"epoch": 16.89,
"learning_rate": 4.718615099216641e-05,
"loss": 1.4678,
"step": 582000
},
{
"epoch": 16.97,
"learning_rate": 4.586736533000413e-05,
"loss": 1.4717,
"step": 585000
},
{
"epoch": 17.06,
"learning_rate": 4.454857966784185e-05,
"loss": 1.4422,
"step": 588000
},
{
"epoch": 17.15,
"learning_rate": 4.322979400567956e-05,
"loss": 1.4209,
"step": 591000
},
{
"epoch": 17.23,
"learning_rate": 4.191100834351728e-05,
"loss": 1.4398,
"step": 594000
},
{
"epoch": 17.32,
"learning_rate": 4.0592222681355005e-05,
"loss": 1.4331,
"step": 597000
},
{
"epoch": 17.41,
"learning_rate": 3.927343701919272e-05,
"loss": 1.4253,
"step": 600000
},
{
"epoch": 17.49,
"learning_rate": 3.795465135703044e-05,
"loss": 1.4247,
"step": 603000
},
{
"epoch": 17.58,
"learning_rate": 3.6635865694868164e-05,
"loss": 1.432,
"step": 606000
},
{
"epoch": 17.67,
"learning_rate": 3.531708003270588e-05,
"loss": 1.4275,
"step": 609000
},
{
"epoch": 17.76,
"learning_rate": 3.39982943705436e-05,
"loss": 1.4305,
"step": 612000
},
{
"epoch": 17.84,
"learning_rate": 3.267950870838132e-05,
"loss": 1.4237,
"step": 615000
},
{
"epoch": 17.93,
"learning_rate": 3.136072304621904e-05,
"loss": 1.4339,
"step": 618000
},
{
"epoch": 18.02,
"learning_rate": 3.0041937384056755e-05,
"loss": 1.4083,
"step": 621000
},
{
"epoch": 18.1,
"learning_rate": 2.8723151721894477e-05,
"loss": 1.383,
"step": 624000
},
{
"epoch": 18.19,
"learning_rate": 2.7404366059732196e-05,
"loss": 1.3949,
"step": 627000
},
{
"epoch": 18.28,
"learning_rate": 2.6085580397569915e-05,
"loss": 1.3857,
"step": 630000
},
{
"epoch": 18.37,
"learning_rate": 2.4766794735407634e-05,
"loss": 1.4049,
"step": 633000
},
{
"epoch": 18.45,
"learning_rate": 2.3448009073245356e-05,
"loss": 1.3972,
"step": 636000
},
{
"epoch": 18.54,
"learning_rate": 2.2129223411083075e-05,
"loss": 1.3983,
"step": 639000
},
{
"epoch": 18.63,
"learning_rate": 2.081043774892079e-05,
"loss": 1.3907,
"step": 642000
},
{
"epoch": 18.71,
"learning_rate": 1.949165208675851e-05,
"loss": 1.3888,
"step": 645000
},
{
"epoch": 18.8,
"learning_rate": 1.817286642459623e-05,
"loss": 1.3762,
"step": 648000
},
{
"epoch": 18.89,
"learning_rate": 1.685408076243395e-05,
"loss": 1.3926,
"step": 651000
},
{
"epoch": 18.97,
"learning_rate": 1.553529510027167e-05,
"loss": 1.3874,
"step": 654000
},
{
"epoch": 19.06,
"learning_rate": 1.4216509438109387e-05,
"loss": 1.375,
"step": 657000
},
{
"epoch": 19.15,
"learning_rate": 1.2897723775947106e-05,
"loss": 1.3655,
"step": 660000
},
{
"epoch": 19.24,
"learning_rate": 1.1578938113784827e-05,
"loss": 1.3625,
"step": 663000
},
{
"epoch": 19.32,
"learning_rate": 1.0260152451622544e-05,
"loss": 1.3705,
"step": 666000
},
{
"epoch": 19.41,
"learning_rate": 8.941366789460264e-06,
"loss": 1.3672,
"step": 669000
},
{
"epoch": 19.5,
"learning_rate": 7.622581127297983e-06,
"loss": 1.3545,
"step": 672000
},
{
"epoch": 19.58,
"learning_rate": 6.303795465135703e-06,
"loss": 1.3611,
"step": 675000
},
{
"epoch": 19.67,
"learning_rate": 4.985009802973422e-06,
"loss": 1.3574,
"step": 678000
},
{
"epoch": 19.76,
"learning_rate": 3.666224140811141e-06,
"loss": 1.3673,
"step": 681000
},
{
"epoch": 19.85,
"learning_rate": 2.34743847864886e-06,
"loss": 1.3629,
"step": 684000
},
{
"epoch": 19.93,
"learning_rate": 1.028652816486579e-06,
"loss": 1.3694,
"step": 687000
},
{
"epoch": 20.0,
"step": 689340,
"total_flos": 1.193067231326117e+21,
"train_loss": 1.952947931640795,
"train_runtime": 385301.7422,
"train_samples_per_second": 28.625,
"train_steps_per_second": 1.789
}
],
"logging_steps": 3000,
"max_steps": 689340,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 1.193067231326117e+21,
"trial_name": null,
"trial_params": null
}