yesj1234's picture
Upload folder using huggingface_hub
b0aac66
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 28.999987503592717,
"eval_steps": 500,
"global_step": 1160333,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07,
"learning_rate": 7.497500833055648e-05,
"loss": 11.5654,
"step": 3000
},
{
"epoch": 0.15,
"learning_rate": 0.00014995001666111296,
"loss": 1.9313,
"step": 6000
},
{
"epoch": 0.22,
"learning_rate": 0.0002249250249916694,
"loss": 1.3886,
"step": 9000
},
{
"epoch": 0.3,
"learning_rate": 0.0002999000333222259,
"loss": 1.3023,
"step": 12000
},
{
"epoch": 0.37,
"learning_rate": 0.0002992436418962473,
"loss": 1.2746,
"step": 15000
},
{
"epoch": 0.45,
"learning_rate": 0.0002984862739685911,
"loss": 1.2224,
"step": 18000
},
{
"epoch": 0.52,
"learning_rate": 0.00029772890604093486,
"loss": 1.1809,
"step": 21000
},
{
"epoch": 0.6,
"learning_rate": 0.00029697153811327864,
"loss": 1.1405,
"step": 24000
},
{
"epoch": 0.67,
"learning_rate": 0.0002962141701856224,
"loss": 1.1178,
"step": 27000
},
{
"epoch": 0.75,
"learning_rate": 0.0002954568022579662,
"loss": 1.0984,
"step": 30000
},
{
"epoch": 0.82,
"learning_rate": 0.00029469943433031,
"loss": 1.0659,
"step": 33000
},
{
"epoch": 0.9,
"learning_rate": 0.0002939420664026538,
"loss": 1.0466,
"step": 36000
},
{
"epoch": 0.97,
"learning_rate": 0.0002931846984749976,
"loss": 1.0377,
"step": 39000
},
{
"epoch": 1.05,
"learning_rate": 0.00029242733054734134,
"loss": 1.005,
"step": 42000
},
{
"epoch": 1.12,
"learning_rate": 0.0002916699626196851,
"loss": 0.9926,
"step": 45000
},
{
"epoch": 1.2,
"learning_rate": 0.00029091259469202896,
"loss": 0.9863,
"step": 48000
},
{
"epoch": 1.27,
"learning_rate": 0.0002901552267643727,
"loss": 0.9723,
"step": 51000
},
{
"epoch": 1.35,
"learning_rate": 0.00028939785883671653,
"loss": 0.9571,
"step": 54000
},
{
"epoch": 1.42,
"learning_rate": 0.0002886404909090603,
"loss": 0.933,
"step": 57000
},
{
"epoch": 1.5,
"learning_rate": 0.00028788312298140404,
"loss": 0.9296,
"step": 60000
},
{
"epoch": 1.57,
"learning_rate": 0.0002871257550537479,
"loss": 0.9255,
"step": 63000
},
{
"epoch": 1.65,
"learning_rate": 0.00028636838712609166,
"loss": 0.9173,
"step": 66000
},
{
"epoch": 1.72,
"learning_rate": 0.0002856110191984354,
"loss": 0.9063,
"step": 69000
},
{
"epoch": 1.8,
"learning_rate": 0.00028485365127077923,
"loss": 0.9004,
"step": 72000
},
{
"epoch": 1.87,
"learning_rate": 0.000284096283343123,
"loss": 0.8997,
"step": 75000
},
{
"epoch": 1.95,
"learning_rate": 0.00028333891541546674,
"loss": 0.8943,
"step": 78000
},
{
"epoch": 2.02,
"learning_rate": 0.0002825815474878106,
"loss": 0.8797,
"step": 81000
},
{
"epoch": 2.1,
"learning_rate": 0.00028182417956015436,
"loss": 0.8501,
"step": 84000
},
{
"epoch": 2.17,
"learning_rate": 0.0002810668116324981,
"loss": 0.8479,
"step": 87000
},
{
"epoch": 2.25,
"learning_rate": 0.00028030944370484193,
"loss": 0.8451,
"step": 90000
},
{
"epoch": 2.32,
"learning_rate": 0.0002795520757771857,
"loss": 0.8315,
"step": 93000
},
{
"epoch": 2.4,
"learning_rate": 0.0002787947078495295,
"loss": 0.8353,
"step": 96000
},
{
"epoch": 2.47,
"learning_rate": 0.0002780373399218733,
"loss": 0.8271,
"step": 99000
},
{
"epoch": 2.55,
"learning_rate": 0.00027727997199421706,
"loss": 0.8129,
"step": 102000
},
{
"epoch": 2.62,
"learning_rate": 0.00027652260406656084,
"loss": 0.8125,
"step": 105000
},
{
"epoch": 2.7,
"learning_rate": 0.0002757652361389046,
"loss": 0.8119,
"step": 108000
},
{
"epoch": 2.77,
"learning_rate": 0.0002750078682112484,
"loss": 0.8,
"step": 111000
},
{
"epoch": 2.85,
"learning_rate": 0.0002742505002835922,
"loss": 0.7979,
"step": 114000
},
{
"epoch": 2.92,
"learning_rate": 0.000273493132355936,
"loss": 0.7914,
"step": 117000
},
{
"epoch": 3.0,
"learning_rate": 0.00027273576442827976,
"loss": 0.7916,
"step": 120000
},
{
"epoch": 3.07,
"learning_rate": 0.00027197839650062354,
"loss": 0.7542,
"step": 123000
},
{
"epoch": 3.15,
"learning_rate": 0.0002712210285729673,
"loss": 0.7643,
"step": 126000
},
{
"epoch": 3.22,
"learning_rate": 0.0002704636606453111,
"loss": 0.7576,
"step": 129000
},
{
"epoch": 3.3,
"learning_rate": 0.0002697062927176549,
"loss": 0.7551,
"step": 132000
},
{
"epoch": 3.37,
"learning_rate": 0.0002689489247899987,
"loss": 0.7537,
"step": 135000
},
{
"epoch": 3.45,
"learning_rate": 0.00026819155686234246,
"loss": 0.7536,
"step": 138000
},
{
"epoch": 3.52,
"learning_rate": 0.00026743418893468624,
"loss": 0.7466,
"step": 141000
},
{
"epoch": 3.6,
"learning_rate": 0.00026667682100703,
"loss": 0.7488,
"step": 144000
},
{
"epoch": 3.67,
"learning_rate": 0.0002659194530793738,
"loss": 0.7445,
"step": 147000
},
{
"epoch": 3.75,
"learning_rate": 0.0002651620851517176,
"loss": 0.7396,
"step": 150000
},
{
"epoch": 3.82,
"learning_rate": 0.0002644047172240614,
"loss": 0.7387,
"step": 153000
},
{
"epoch": 3.9,
"learning_rate": 0.00026364734929640516,
"loss": 0.7324,
"step": 156000
},
{
"epoch": 3.97,
"learning_rate": 0.00026288998136874894,
"loss": 0.7297,
"step": 159000
},
{
"epoch": 4.05,
"learning_rate": 0.0002621326134410928,
"loss": 0.7069,
"step": 162000
},
{
"epoch": 4.12,
"learning_rate": 0.0002613752455134365,
"loss": 0.7001,
"step": 165000
},
{
"epoch": 4.2,
"learning_rate": 0.0002606178775857803,
"loss": 0.6945,
"step": 168000
},
{
"epoch": 4.27,
"learning_rate": 0.00025986050965812413,
"loss": 0.7017,
"step": 171000
},
{
"epoch": 4.35,
"learning_rate": 0.00025910314173046786,
"loss": 0.7018,
"step": 174000
},
{
"epoch": 4.42,
"learning_rate": 0.00025834577380281164,
"loss": 0.6977,
"step": 177000
},
{
"epoch": 4.5,
"learning_rate": 0.0002575884058751555,
"loss": 0.6923,
"step": 180000
},
{
"epoch": 4.57,
"learning_rate": 0.0002568310379474992,
"loss": 0.6972,
"step": 183000
},
{
"epoch": 4.65,
"learning_rate": 0.000256073670019843,
"loss": 0.685,
"step": 186000
},
{
"epoch": 4.72,
"learning_rate": 0.00025531630209218683,
"loss": 0.6857,
"step": 189000
},
{
"epoch": 4.8,
"learning_rate": 0.00025455893416453056,
"loss": 0.6846,
"step": 192000
},
{
"epoch": 4.87,
"learning_rate": 0.00025380156623687434,
"loss": 0.6848,
"step": 195000
},
{
"epoch": 4.95,
"learning_rate": 0.0002530441983092182,
"loss": 0.6783,
"step": 198000
},
{
"epoch": 5.02,
"learning_rate": 0.00025228683038156196,
"loss": 0.6676,
"step": 201000
},
{
"epoch": 5.1,
"learning_rate": 0.0002515294624539057,
"loss": 0.6514,
"step": 204000
},
{
"epoch": 5.17,
"learning_rate": 0.00025077209452624953,
"loss": 0.6469,
"step": 207000
},
{
"epoch": 5.25,
"learning_rate": 0.0002500147265985933,
"loss": 0.6483,
"step": 210000
},
{
"epoch": 5.32,
"learning_rate": 0.00024925735867093704,
"loss": 0.6525,
"step": 213000
},
{
"epoch": 5.4,
"learning_rate": 0.0002484999907432809,
"loss": 0.6487,
"step": 216000
},
{
"epoch": 5.47,
"learning_rate": 0.00024774262281562466,
"loss": 0.6495,
"step": 219000
},
{
"epoch": 5.55,
"learning_rate": 0.00024698525488796845,
"loss": 0.645,
"step": 222000
},
{
"epoch": 5.62,
"learning_rate": 0.00024622788696031223,
"loss": 0.6517,
"step": 225000
},
{
"epoch": 5.7,
"learning_rate": 0.000245470519032656,
"loss": 0.6459,
"step": 228000
},
{
"epoch": 5.77,
"learning_rate": 0.0002447131511049998,
"loss": 0.6451,
"step": 231000
},
{
"epoch": 5.85,
"learning_rate": 0.00024395578317734358,
"loss": 0.6412,
"step": 234000
},
{
"epoch": 5.92,
"learning_rate": 0.00024319841524968736,
"loss": 0.6457,
"step": 237000
},
{
"epoch": 6.0,
"learning_rate": 0.00024244104732203112,
"loss": 0.64,
"step": 240000
},
{
"epoch": 6.07,
"learning_rate": 0.00024168367939437493,
"loss": 0.6137,
"step": 243000
},
{
"epoch": 6.15,
"learning_rate": 0.0002409263114667187,
"loss": 0.6131,
"step": 246000
},
{
"epoch": 6.22,
"learning_rate": 0.0002401689435390625,
"loss": 0.6153,
"step": 249000
},
{
"epoch": 6.3,
"learning_rate": 0.00023941157561140628,
"loss": 0.6193,
"step": 252000
},
{
"epoch": 6.37,
"learning_rate": 0.00023865420768375006,
"loss": 0.6161,
"step": 255000
},
{
"epoch": 6.45,
"learning_rate": 0.00023789683975609387,
"loss": 0.614,
"step": 258000
},
{
"epoch": 6.52,
"learning_rate": 0.00023713947182843763,
"loss": 0.6142,
"step": 261000
},
{
"epoch": 6.6,
"learning_rate": 0.0002363821039007814,
"loss": 0.6181,
"step": 264000
},
{
"epoch": 6.67,
"learning_rate": 0.00023562473597312522,
"loss": 0.6098,
"step": 267000
},
{
"epoch": 6.75,
"learning_rate": 0.00023486736804546898,
"loss": 0.6165,
"step": 270000
},
{
"epoch": 6.82,
"learning_rate": 0.00023411000011781276,
"loss": 0.612,
"step": 273000
},
{
"epoch": 6.9,
"learning_rate": 0.00023335263219015657,
"loss": 0.6109,
"step": 276000
},
{
"epoch": 6.97,
"learning_rate": 0.00023259526426250033,
"loss": 0.6141,
"step": 279000
},
{
"epoch": 7.05,
"learning_rate": 0.0002318378963348441,
"loss": 0.5912,
"step": 282000
},
{
"epoch": 7.12,
"learning_rate": 0.00023108052840718792,
"loss": 0.5838,
"step": 285000
},
{
"epoch": 7.2,
"learning_rate": 0.00023032316047953168,
"loss": 0.5823,
"step": 288000
},
{
"epoch": 7.27,
"learning_rate": 0.00022956579255187546,
"loss": 0.5836,
"step": 291000
},
{
"epoch": 7.35,
"learning_rate": 0.00022880842462421927,
"loss": 0.5857,
"step": 294000
},
{
"epoch": 7.42,
"learning_rate": 0.00022805105669656303,
"loss": 0.5872,
"step": 297000
},
{
"epoch": 7.5,
"learning_rate": 0.0002272936887689068,
"loss": 0.59,
"step": 300000
},
{
"epoch": 7.57,
"learning_rate": 0.00022653632084125062,
"loss": 0.5808,
"step": 303000
},
{
"epoch": 7.65,
"learning_rate": 0.0002257789529135944,
"loss": 0.5826,
"step": 306000
},
{
"epoch": 7.72,
"learning_rate": 0.00022502158498593816,
"loss": 0.5813,
"step": 309000
},
{
"epoch": 7.8,
"learning_rate": 0.00022426421705828197,
"loss": 0.5883,
"step": 312000
},
{
"epoch": 7.87,
"learning_rate": 0.00022350684913062576,
"loss": 0.5852,
"step": 315000
},
{
"epoch": 7.95,
"learning_rate": 0.00022274948120296954,
"loss": 0.5824,
"step": 318000
},
{
"epoch": 8.02,
"learning_rate": 0.00022199211327531332,
"loss": 0.575,
"step": 321000
},
{
"epoch": 8.1,
"learning_rate": 0.0002212347453476571,
"loss": 0.5561,
"step": 324000
},
{
"epoch": 8.17,
"learning_rate": 0.0002204773774200009,
"loss": 0.5596,
"step": 327000
},
{
"epoch": 8.25,
"learning_rate": 0.00021972000949234467,
"loss": 0.5603,
"step": 330000
},
{
"epoch": 8.32,
"learning_rate": 0.00021896264156468846,
"loss": 0.5575,
"step": 333000
},
{
"epoch": 8.4,
"learning_rate": 0.00021820527363703224,
"loss": 0.5545,
"step": 336000
},
{
"epoch": 8.47,
"learning_rate": 0.00021744790570937602,
"loss": 0.5629,
"step": 339000
},
{
"epoch": 8.55,
"learning_rate": 0.00021669053778171983,
"loss": 0.5572,
"step": 342000
},
{
"epoch": 8.62,
"learning_rate": 0.0002159331698540636,
"loss": 0.5602,
"step": 345000
},
{
"epoch": 8.7,
"learning_rate": 0.00021517580192640737,
"loss": 0.5521,
"step": 348000
},
{
"epoch": 8.77,
"learning_rate": 0.00021441843399875118,
"loss": 0.5582,
"step": 351000
},
{
"epoch": 8.85,
"learning_rate": 0.00021366106607109497,
"loss": 0.5539,
"step": 354000
},
{
"epoch": 8.92,
"learning_rate": 0.00021290369814343872,
"loss": 0.5589,
"step": 357000
},
{
"epoch": 9.0,
"learning_rate": 0.00021214633021578253,
"loss": 0.5549,
"step": 360000
},
{
"epoch": 9.07,
"learning_rate": 0.00021138896228812632,
"loss": 0.525,
"step": 363000
},
{
"epoch": 9.15,
"learning_rate": 0.00021063159436047007,
"loss": 0.5304,
"step": 366000
},
{
"epoch": 9.22,
"learning_rate": 0.00020987422643281388,
"loss": 0.5331,
"step": 369000
},
{
"epoch": 9.3,
"learning_rate": 0.00020911685850515766,
"loss": 0.5341,
"step": 372000
},
{
"epoch": 9.37,
"learning_rate": 0.00020835949057750142,
"loss": 0.5319,
"step": 375000
},
{
"epoch": 9.45,
"learning_rate": 0.00020760212264984523,
"loss": 0.5318,
"step": 378000
},
{
"epoch": 9.52,
"learning_rate": 0.00020684475472218901,
"loss": 0.5368,
"step": 381000
},
{
"epoch": 9.6,
"learning_rate": 0.00020608738679453277,
"loss": 0.532,
"step": 384000
},
{
"epoch": 9.67,
"learning_rate": 0.00020533001886687658,
"loss": 0.5353,
"step": 387000
},
{
"epoch": 9.75,
"learning_rate": 0.00020457265093922036,
"loss": 0.5329,
"step": 390000
},
{
"epoch": 9.82,
"learning_rate": 0.00020381528301156415,
"loss": 0.5354,
"step": 393000
},
{
"epoch": 9.9,
"learning_rate": 0.00020305791508390793,
"loss": 0.5375,
"step": 396000
},
{
"epoch": 9.97,
"learning_rate": 0.00020230054715625171,
"loss": 0.5377,
"step": 399000
},
{
"epoch": 10.05,
"learning_rate": 0.0002015431792285955,
"loss": 0.5191,
"step": 402000
},
{
"epoch": 10.12,
"learning_rate": 0.00020078581130093928,
"loss": 0.5057,
"step": 405000
},
{
"epoch": 10.2,
"learning_rate": 0.00020002844337328306,
"loss": 0.508,
"step": 408000
},
{
"epoch": 10.27,
"learning_rate": 0.00019927107544562687,
"loss": 0.5117,
"step": 411000
},
{
"epoch": 10.35,
"learning_rate": 0.00019851370751797063,
"loss": 0.5135,
"step": 414000
},
{
"epoch": 10.42,
"learning_rate": 0.00019775633959031441,
"loss": 0.5129,
"step": 417000
},
{
"epoch": 10.5,
"learning_rate": 0.00019699897166265822,
"loss": 0.511,
"step": 420000
},
{
"epoch": 10.57,
"learning_rate": 0.00019624160373500198,
"loss": 0.5095,
"step": 423000
},
{
"epoch": 10.65,
"learning_rate": 0.0001954842358073458,
"loss": 0.5129,
"step": 426000
},
{
"epoch": 10.72,
"learning_rate": 0.00019472686787968957,
"loss": 0.5124,
"step": 429000
},
{
"epoch": 10.8,
"learning_rate": 0.00019396949995203333,
"loss": 0.5138,
"step": 432000
},
{
"epoch": 10.87,
"learning_rate": 0.00019321213202437714,
"loss": 0.5125,
"step": 435000
},
{
"epoch": 10.95,
"learning_rate": 0.00019245476409672092,
"loss": 0.5153,
"step": 438000
},
{
"epoch": 11.02,
"learning_rate": 0.00019169739616906468,
"loss": 0.5022,
"step": 441000
},
{
"epoch": 11.1,
"learning_rate": 0.0001909400282414085,
"loss": 0.4908,
"step": 444000
},
{
"epoch": 11.17,
"learning_rate": 0.00019018266031375227,
"loss": 0.4908,
"step": 447000
},
{
"epoch": 11.25,
"learning_rate": 0.00018942529238609603,
"loss": 0.4907,
"step": 450000
},
{
"epoch": 11.32,
"learning_rate": 0.00018866792445843984,
"loss": 0.4895,
"step": 453000
},
{
"epoch": 11.4,
"learning_rate": 0.00018791055653078362,
"loss": 0.4948,
"step": 456000
},
{
"epoch": 11.47,
"learning_rate": 0.00018715318860312743,
"loss": 0.4908,
"step": 459000
},
{
"epoch": 11.55,
"learning_rate": 0.0001863958206754712,
"loss": 0.4931,
"step": 462000
},
{
"epoch": 11.62,
"learning_rate": 0.00018563845274781497,
"loss": 0.4945,
"step": 465000
},
{
"epoch": 11.7,
"learning_rate": 0.00018488108482015878,
"loss": 0.5005,
"step": 468000
},
{
"epoch": 11.77,
"learning_rate": 0.00018412371689250254,
"loss": 0.4974,
"step": 471000
},
{
"epoch": 11.85,
"learning_rate": 0.00018336634896484632,
"loss": 0.4997,
"step": 474000
},
{
"epoch": 11.92,
"learning_rate": 0.00018260898103719013,
"loss": 0.5027,
"step": 477000
},
{
"epoch": 12.0,
"learning_rate": 0.0001818516131095339,
"loss": 0.4962,
"step": 480000
},
{
"epoch": 12.07,
"learning_rate": 0.00018109424518187767,
"loss": 0.4766,
"step": 483000
},
{
"epoch": 12.15,
"learning_rate": 0.00018033687725422148,
"loss": 0.4782,
"step": 486000
},
{
"epoch": 12.22,
"learning_rate": 0.00017957950932656524,
"loss": 0.4793,
"step": 489000
},
{
"epoch": 12.3,
"learning_rate": 0.00017882214139890902,
"loss": 0.4769,
"step": 492000
},
{
"epoch": 12.37,
"learning_rate": 0.00017806477347125283,
"loss": 0.4801,
"step": 495000
},
{
"epoch": 12.45,
"learning_rate": 0.0001773074055435966,
"loss": 0.4783,
"step": 498000
},
{
"epoch": 12.52,
"learning_rate": 0.0001765500376159404,
"loss": 0.4804,
"step": 501000
},
{
"epoch": 12.6,
"learning_rate": 0.00017579266968828418,
"loss": 0.4758,
"step": 504000
},
{
"epoch": 12.67,
"learning_rate": 0.00017503530176062794,
"loss": 0.4761,
"step": 507000
},
{
"epoch": 12.75,
"learning_rate": 0.00017427793383297175,
"loss": 0.473,
"step": 510000
},
{
"epoch": 12.82,
"learning_rate": 0.00017352056590531553,
"loss": 0.4793,
"step": 513000
},
{
"epoch": 12.9,
"learning_rate": 0.00017276319797765932,
"loss": 0.4772,
"step": 516000
},
{
"epoch": 12.97,
"learning_rate": 0.0001720058300500031,
"loss": 0.4752,
"step": 519000
},
{
"epoch": 13.05,
"learning_rate": 0.00017124846212234688,
"loss": 0.4611,
"step": 522000
},
{
"epoch": 13.12,
"learning_rate": 0.0001704910941946907,
"loss": 0.4549,
"step": 525000
},
{
"epoch": 13.2,
"learning_rate": 0.00016973372626703445,
"loss": 0.456,
"step": 528000
},
{
"epoch": 13.27,
"learning_rate": 0.00016897635833937823,
"loss": 0.4596,
"step": 531000
},
{
"epoch": 13.35,
"learning_rate": 0.00016821899041172204,
"loss": 0.4629,
"step": 534000
},
{
"epoch": 13.42,
"learning_rate": 0.0001674616224840658,
"loss": 0.463,
"step": 537000
},
{
"epoch": 13.5,
"learning_rate": 0.00016670425455640958,
"loss": 0.461,
"step": 540000
},
{
"epoch": 13.57,
"learning_rate": 0.0001659468866287534,
"loss": 0.4631,
"step": 543000
},
{
"epoch": 13.65,
"learning_rate": 0.00016518951870109715,
"loss": 0.4627,
"step": 546000
},
{
"epoch": 13.72,
"learning_rate": 0.00016443215077344093,
"loss": 0.4639,
"step": 549000
},
{
"epoch": 13.8,
"learning_rate": 0.00016367478284578474,
"loss": 0.4663,
"step": 552000
},
{
"epoch": 13.87,
"learning_rate": 0.0001629174149181285,
"loss": 0.46,
"step": 555000
},
{
"epoch": 13.95,
"learning_rate": 0.00016216004699047228,
"loss": 0.4661,
"step": 558000
},
{
"epoch": 14.02,
"learning_rate": 0.0001614026790628161,
"loss": 0.4564,
"step": 561000
},
{
"epoch": 14.1,
"learning_rate": 0.00016064531113515988,
"loss": 0.4391,
"step": 564000
},
{
"epoch": 14.17,
"learning_rate": 0.00015988794320750363,
"loss": 0.4479,
"step": 567000
},
{
"epoch": 14.25,
"learning_rate": 0.00015913057527984744,
"loss": 0.4427,
"step": 570000
},
{
"epoch": 14.32,
"learning_rate": 0.00015837320735219123,
"loss": 0.4425,
"step": 573000
},
{
"epoch": 14.4,
"learning_rate": 0.00015761583942453498,
"loss": 0.4412,
"step": 576000
},
{
"epoch": 14.47,
"learning_rate": 0.0001568584714968788,
"loss": 0.4488,
"step": 579000
},
{
"epoch": 14.55,
"learning_rate": 0.00015610110356922258,
"loss": 0.4464,
"step": 582000
},
{
"epoch": 14.62,
"learning_rate": 0.00015534373564156636,
"loss": 0.4507,
"step": 585000
},
{
"epoch": 14.7,
"learning_rate": 0.00015458636771391014,
"loss": 0.4476,
"step": 588000
},
{
"epoch": 14.77,
"learning_rate": 0.00015382899978625393,
"loss": 0.4462,
"step": 591000
},
{
"epoch": 14.85,
"learning_rate": 0.0001530716318585977,
"loss": 0.4493,
"step": 594000
},
{
"epoch": 14.92,
"learning_rate": 0.0001523142639309415,
"loss": 0.4478,
"step": 597000
},
{
"epoch": 15.0,
"learning_rate": 0.00015155689600328528,
"loss": 0.4483,
"step": 600000
},
{
"epoch": 15.07,
"learning_rate": 0.00015079952807562906,
"loss": 0.4276,
"step": 603000
},
{
"epoch": 15.15,
"learning_rate": 0.00015004216014797284,
"loss": 0.4259,
"step": 606000
},
{
"epoch": 15.22,
"learning_rate": 0.00014928479222031665,
"loss": 0.4263,
"step": 609000
},
{
"epoch": 15.3,
"learning_rate": 0.0001485274242926604,
"loss": 0.4302,
"step": 612000
},
{
"epoch": 15.37,
"learning_rate": 0.0001477700563650042,
"loss": 0.4302,
"step": 615000
},
{
"epoch": 15.45,
"learning_rate": 0.000147012688437348,
"loss": 0.4289,
"step": 618000
},
{
"epoch": 15.52,
"learning_rate": 0.00014625532050969176,
"loss": 0.4311,
"step": 621000
},
{
"epoch": 15.6,
"learning_rate": 0.00014549795258203554,
"loss": 0.4327,
"step": 624000
},
{
"epoch": 15.67,
"learning_rate": 0.00014474058465437935,
"loss": 0.4315,
"step": 627000
},
{
"epoch": 15.75,
"learning_rate": 0.0001439832167267231,
"loss": 0.4305,
"step": 630000
},
{
"epoch": 15.82,
"learning_rate": 0.00014322584879906692,
"loss": 0.429,
"step": 633000
},
{
"epoch": 15.9,
"learning_rate": 0.0001424684808714107,
"loss": 0.4288,
"step": 636000
},
{
"epoch": 15.97,
"learning_rate": 0.00014171111294375449,
"loss": 0.4309,
"step": 639000
},
{
"epoch": 16.05,
"learning_rate": 0.00014095374501609827,
"loss": 0.4179,
"step": 642000
},
{
"epoch": 16.12,
"learning_rate": 0.00014019637708844205,
"loss": 0.4098,
"step": 645000
},
{
"epoch": 16.2,
"learning_rate": 0.00013943900916078584,
"loss": 0.4136,
"step": 648000
},
{
"epoch": 16.27,
"learning_rate": 0.00013868164123312962,
"loss": 0.4115,
"step": 651000
},
{
"epoch": 16.35,
"learning_rate": 0.0001379242733054734,
"loss": 0.4192,
"step": 654000
},
{
"epoch": 16.42,
"learning_rate": 0.00013716690537781719,
"loss": 0.4159,
"step": 657000
},
{
"epoch": 16.5,
"learning_rate": 0.00013640953745016097,
"loss": 0.4176,
"step": 660000
},
{
"epoch": 16.57,
"learning_rate": 0.00013565216952250475,
"loss": 0.4166,
"step": 663000
},
{
"epoch": 16.65,
"learning_rate": 0.00013489480159484854,
"loss": 0.4214,
"step": 666000
},
{
"epoch": 16.72,
"learning_rate": 0.00013413743366719232,
"loss": 0.4121,
"step": 669000
},
{
"epoch": 16.8,
"learning_rate": 0.0001333800657395361,
"loss": 0.414,
"step": 672000
},
{
"epoch": 16.87,
"learning_rate": 0.00013262269781187989,
"loss": 0.4159,
"step": 675000
},
{
"epoch": 16.95,
"learning_rate": 0.00013186532988422367,
"loss": 0.4135,
"step": 678000
},
{
"epoch": 17.02,
"learning_rate": 0.00013110796195656748,
"loss": 0.4098,
"step": 681000
},
{
"epoch": 17.1,
"learning_rate": 0.00013035059402891124,
"loss": 0.3947,
"step": 684000
},
{
"epoch": 17.17,
"learning_rate": 0.00012959322610125502,
"loss": 0.4026,
"step": 687000
},
{
"epoch": 17.25,
"learning_rate": 0.00012883585817359883,
"loss": 0.397,
"step": 690000
},
{
"epoch": 17.32,
"learning_rate": 0.0001280784902459426,
"loss": 0.3998,
"step": 693000
},
{
"epoch": 17.39,
"learning_rate": 0.00012732112231828637,
"loss": 0.3988,
"step": 696000
},
{
"epoch": 17.47,
"learning_rate": 0.00012656375439063018,
"loss": 0.4015,
"step": 699000
},
{
"epoch": 17.54,
"learning_rate": 0.00012580638646297396,
"loss": 0.402,
"step": 702000
},
{
"epoch": 17.62,
"learning_rate": 0.00012504901853531775,
"loss": 0.4018,
"step": 705000
},
{
"epoch": 17.69,
"learning_rate": 0.00012429165060766153,
"loss": 0.3987,
"step": 708000
},
{
"epoch": 17.77,
"learning_rate": 0.0001235342826800053,
"loss": 0.3974,
"step": 711000
},
{
"epoch": 17.84,
"learning_rate": 0.0001227769147523491,
"loss": 0.4006,
"step": 714000
},
{
"epoch": 17.92,
"learning_rate": 0.00012201954682469288,
"loss": 0.4028,
"step": 717000
},
{
"epoch": 17.99,
"learning_rate": 0.00012126217889703665,
"loss": 0.4019,
"step": 720000
},
{
"epoch": 18.07,
"learning_rate": 0.00012050481096938045,
"loss": 0.3853,
"step": 723000
},
{
"epoch": 18.14,
"learning_rate": 0.00011974744304172423,
"loss": 0.387,
"step": 726000
},
{
"epoch": 18.22,
"learning_rate": 0.00011899007511406801,
"loss": 0.3822,
"step": 729000
},
{
"epoch": 18.29,
"learning_rate": 0.0001182327071864118,
"loss": 0.3861,
"step": 732000
},
{
"epoch": 18.37,
"learning_rate": 0.00011747533925875558,
"loss": 0.3877,
"step": 735000
},
{
"epoch": 18.44,
"learning_rate": 0.00011671797133109938,
"loss": 0.3867,
"step": 738000
},
{
"epoch": 18.52,
"learning_rate": 0.00011596060340344316,
"loss": 0.388,
"step": 741000
},
{
"epoch": 18.59,
"learning_rate": 0.00011520323547578693,
"loss": 0.3889,
"step": 744000
},
{
"epoch": 18.67,
"learning_rate": 0.00011444586754813073,
"loss": 0.3867,
"step": 747000
},
{
"epoch": 18.74,
"learning_rate": 0.00011368849962047451,
"loss": 0.3897,
"step": 750000
},
{
"epoch": 18.82,
"learning_rate": 0.00011293113169281828,
"loss": 0.3873,
"step": 753000
},
{
"epoch": 18.89,
"learning_rate": 0.00011217376376516208,
"loss": 0.3881,
"step": 756000
},
{
"epoch": 18.97,
"learning_rate": 0.00011141639583750586,
"loss": 0.3915,
"step": 759000
},
{
"epoch": 19.04,
"learning_rate": 0.00011065902790984966,
"loss": 0.3766,
"step": 762000
},
{
"epoch": 19.12,
"learning_rate": 0.00010990165998219343,
"loss": 0.3757,
"step": 765000
},
{
"epoch": 19.19,
"learning_rate": 0.00010914429205453721,
"loss": 0.3705,
"step": 768000
},
{
"epoch": 19.27,
"learning_rate": 0.000108386924126881,
"loss": 0.3728,
"step": 771000
},
{
"epoch": 19.34,
"learning_rate": 0.00010762955619922477,
"loss": 0.3756,
"step": 774000
},
{
"epoch": 19.42,
"learning_rate": 0.00010687218827156856,
"loss": 0.3772,
"step": 777000
},
{
"epoch": 19.49,
"learning_rate": 0.00010611482034391236,
"loss": 0.3761,
"step": 780000
},
{
"epoch": 19.57,
"learning_rate": 0.00010535745241625614,
"loss": 0.3739,
"step": 783000
},
{
"epoch": 19.64,
"learning_rate": 0.00010460008448859992,
"loss": 0.371,
"step": 786000
},
{
"epoch": 19.72,
"learning_rate": 0.0001038427165609437,
"loss": 0.3721,
"step": 789000
},
{
"epoch": 19.79,
"learning_rate": 0.00010308534863328749,
"loss": 0.3733,
"step": 792000
},
{
"epoch": 19.87,
"learning_rate": 0.00010232798070563129,
"loss": 0.3742,
"step": 795000
},
{
"epoch": 19.94,
"learning_rate": 0.00010157061277797505,
"loss": 0.3732,
"step": 798000
},
{
"epoch": 20.02,
"learning_rate": 0.00010081324485031884,
"loss": 0.3703,
"step": 801000
},
{
"epoch": 20.09,
"learning_rate": 0.00010005587692266263,
"loss": 0.3609,
"step": 804000
},
{
"epoch": 20.17,
"learning_rate": 9.92985089950064e-05,
"loss": 0.3604,
"step": 807000
},
{
"epoch": 20.24,
"learning_rate": 9.85411410673502e-05,
"loss": 0.3614,
"step": 810000
},
{
"epoch": 20.32,
"learning_rate": 9.778377313969398e-05,
"loss": 0.3617,
"step": 813000
},
{
"epoch": 20.39,
"learning_rate": 9.702640521203775e-05,
"loss": 0.3611,
"step": 816000
},
{
"epoch": 20.47,
"learning_rate": 9.626903728438155e-05,
"loss": 0.3607,
"step": 819000
},
{
"epoch": 20.54,
"learning_rate": 9.551166935672533e-05,
"loss": 0.3605,
"step": 822000
},
{
"epoch": 20.62,
"learning_rate": 9.475430142906912e-05,
"loss": 0.3589,
"step": 825000
},
{
"epoch": 20.69,
"learning_rate": 9.39969335014129e-05,
"loss": 0.3586,
"step": 828000
},
{
"epoch": 20.77,
"learning_rate": 9.323956557375668e-05,
"loss": 0.3626,
"step": 831000
},
{
"epoch": 20.84,
"learning_rate": 9.248219764610048e-05,
"loss": 0.3616,
"step": 834000
},
{
"epoch": 20.92,
"learning_rate": 9.172482971844426e-05,
"loss": 0.3595,
"step": 837000
},
{
"epoch": 20.99,
"learning_rate": 9.096746179078803e-05,
"loss": 0.3624,
"step": 840000
},
{
"epoch": 21.07,
"learning_rate": 9.021009386313183e-05,
"loss": 0.3468,
"step": 843000
},
{
"epoch": 21.14,
"learning_rate": 8.945272593547561e-05,
"loss": 0.3475,
"step": 846000
},
{
"epoch": 21.22,
"learning_rate": 8.869535800781938e-05,
"loss": 0.3486,
"step": 849000
},
{
"epoch": 21.29,
"learning_rate": 8.793799008016318e-05,
"loss": 0.3454,
"step": 852000
},
{
"epoch": 21.37,
"learning_rate": 8.718062215250696e-05,
"loss": 0.3496,
"step": 855000
},
{
"epoch": 21.44,
"learning_rate": 8.642325422485073e-05,
"loss": 0.3478,
"step": 858000
},
{
"epoch": 21.52,
"learning_rate": 8.566588629719453e-05,
"loss": 0.346,
"step": 861000
},
{
"epoch": 21.59,
"learning_rate": 8.490851836953831e-05,
"loss": 0.3513,
"step": 864000
},
{
"epoch": 21.67,
"learning_rate": 8.415115044188211e-05,
"loss": 0.3524,
"step": 867000
},
{
"epoch": 21.74,
"learning_rate": 8.339378251422588e-05,
"loss": 0.3494,
"step": 870000
},
{
"epoch": 21.82,
"learning_rate": 8.263641458656966e-05,
"loss": 0.3521,
"step": 873000
},
{
"epoch": 21.89,
"learning_rate": 8.187904665891346e-05,
"loss": 0.3509,
"step": 876000
},
{
"epoch": 21.97,
"learning_rate": 8.112167873125724e-05,
"loss": 0.3501,
"step": 879000
},
{
"epoch": 22.04,
"learning_rate": 8.036431080360101e-05,
"loss": 0.3394,
"step": 882000
},
{
"epoch": 22.12,
"learning_rate": 7.960694287594481e-05,
"loss": 0.3357,
"step": 885000
},
{
"epoch": 22.19,
"learning_rate": 7.88495749482886e-05,
"loss": 0.3378,
"step": 888000
},
{
"epoch": 22.27,
"learning_rate": 7.809220702063239e-05,
"loss": 0.3362,
"step": 891000
},
{
"epoch": 22.34,
"learning_rate": 7.733483909297616e-05,
"loss": 0.3382,
"step": 894000
},
{
"epoch": 22.42,
"learning_rate": 7.657747116531994e-05,
"loss": 0.3407,
"step": 897000
},
{
"epoch": 22.49,
"learning_rate": 7.582010323766374e-05,
"loss": 0.3381,
"step": 900000
},
{
"epoch": 22.57,
"learning_rate": 7.506273531000751e-05,
"loss": 0.3378,
"step": 903000
},
{
"epoch": 22.64,
"learning_rate": 7.430536738235131e-05,
"loss": 0.3368,
"step": 906000
},
{
"epoch": 22.72,
"learning_rate": 7.354799945469509e-05,
"loss": 0.3384,
"step": 909000
},
{
"epoch": 22.79,
"learning_rate": 7.279063152703887e-05,
"loss": 0.3351,
"step": 912000
},
{
"epoch": 22.87,
"learning_rate": 7.203326359938266e-05,
"loss": 0.3399,
"step": 915000
},
{
"epoch": 22.94,
"learning_rate": 7.127589567172644e-05,
"loss": 0.3366,
"step": 918000
},
{
"epoch": 23.02,
"learning_rate": 7.051852774407022e-05,
"loss": 0.3313,
"step": 921000
},
{
"epoch": 23.09,
"learning_rate": 6.976115981641401e-05,
"loss": 0.3291,
"step": 924000
},
{
"epoch": 23.17,
"learning_rate": 6.900379188875779e-05,
"loss": 0.3269,
"step": 927000
},
{
"epoch": 23.24,
"learning_rate": 6.824642396110157e-05,
"loss": 0.324,
"step": 930000
},
{
"epoch": 23.32,
"learning_rate": 6.748905603344537e-05,
"loss": 0.3249,
"step": 933000
},
{
"epoch": 23.39,
"learning_rate": 6.673168810578914e-05,
"loss": 0.3257,
"step": 936000
},
{
"epoch": 23.47,
"learning_rate": 6.597432017813294e-05,
"loss": 0.3266,
"step": 939000
},
{
"epoch": 23.54,
"learning_rate": 6.521695225047672e-05,
"loss": 0.3275,
"step": 942000
},
{
"epoch": 23.62,
"learning_rate": 6.44595843228205e-05,
"loss": 0.3279,
"step": 945000
},
{
"epoch": 23.69,
"learning_rate": 6.370221639516429e-05,
"loss": 0.3255,
"step": 948000
},
{
"epoch": 23.77,
"learning_rate": 6.294484846750807e-05,
"loss": 0.3255,
"step": 951000
},
{
"epoch": 23.84,
"learning_rate": 6.218748053985185e-05,
"loss": 0.3233,
"step": 954000
},
{
"epoch": 23.92,
"learning_rate": 6.143011261219564e-05,
"loss": 0.3267,
"step": 957000
},
{
"epoch": 23.99,
"learning_rate": 6.067274468453942e-05,
"loss": 0.3234,
"step": 960000
},
{
"epoch": 24.07,
"learning_rate": 5.991537675688321e-05,
"loss": 0.3174,
"step": 963000
},
{
"epoch": 24.14,
"learning_rate": 5.9158008829226994e-05,
"loss": 0.3149,
"step": 966000
},
{
"epoch": 24.22,
"learning_rate": 5.840064090157077e-05,
"loss": 0.314,
"step": 969000
},
{
"epoch": 24.29,
"learning_rate": 5.764327297391456e-05,
"loss": 0.318,
"step": 972000
},
{
"epoch": 24.37,
"learning_rate": 5.6885905046258344e-05,
"loss": 0.3147,
"step": 975000
},
{
"epoch": 24.44,
"learning_rate": 5.6128537118602134e-05,
"loss": 0.3158,
"step": 978000
},
{
"epoch": 24.52,
"learning_rate": 5.537116919094591e-05,
"loss": 0.3164,
"step": 981000
},
{
"epoch": 24.59,
"learning_rate": 5.46138012632897e-05,
"loss": 0.3162,
"step": 984000
},
{
"epoch": 24.67,
"learning_rate": 5.3856433335633483e-05,
"loss": 0.3153,
"step": 987000
},
{
"epoch": 24.74,
"learning_rate": 5.3099065407977274e-05,
"loss": 0.3165,
"step": 990000
},
{
"epoch": 24.82,
"learning_rate": 5.234169748032105e-05,
"loss": 0.315,
"step": 993000
},
{
"epoch": 24.89,
"learning_rate": 5.158432955266483e-05,
"loss": 0.3132,
"step": 996000
},
{
"epoch": 24.97,
"learning_rate": 5.0826961625008623e-05,
"loss": 0.3163,
"step": 999000
},
{
"epoch": 25.04,
"learning_rate": 5.006959369735241e-05,
"loss": 0.3107,
"step": 1002000
},
{
"epoch": 25.12,
"learning_rate": 4.931222576969619e-05,
"loss": 0.3062,
"step": 1005000
},
{
"epoch": 25.19,
"learning_rate": 4.855485784203997e-05,
"loss": 0.3083,
"step": 1008000
},
{
"epoch": 25.27,
"learning_rate": 4.779748991438376e-05,
"loss": 0.3096,
"step": 1011000
},
{
"epoch": 25.34,
"learning_rate": 4.704012198672755e-05,
"loss": 0.3056,
"step": 1014000
},
{
"epoch": 25.42,
"learning_rate": 4.628275405907132e-05,
"loss": 0.3042,
"step": 1017000
},
{
"epoch": 25.49,
"learning_rate": 4.552538613141511e-05,
"loss": 0.3063,
"step": 1020000
},
{
"epoch": 25.57,
"learning_rate": 4.4768018203758897e-05,
"loss": 0.3025,
"step": 1023000
},
{
"epoch": 25.64,
"learning_rate": 4.4010650276102687e-05,
"loss": 0.3056,
"step": 1026000
},
{
"epoch": 25.72,
"learning_rate": 4.325328234844646e-05,
"loss": 0.3081,
"step": 1029000
},
{
"epoch": 25.79,
"learning_rate": 4.249591442079025e-05,
"loss": 0.3082,
"step": 1032000
},
{
"epoch": 25.87,
"learning_rate": 4.1738546493134036e-05,
"loss": 0.3054,
"step": 1035000
},
{
"epoch": 25.94,
"learning_rate": 4.0981178565477827e-05,
"loss": 0.3052,
"step": 1038000
},
{
"epoch": 26.02,
"learning_rate": 4.02238106378216e-05,
"loss": 0.3013,
"step": 1041000
},
{
"epoch": 26.09,
"learning_rate": 3.9466442710165386e-05,
"loss": 0.2982,
"step": 1044000
},
{
"epoch": 26.17,
"learning_rate": 3.8709074782509176e-05,
"loss": 0.2962,
"step": 1047000
},
{
"epoch": 26.24,
"learning_rate": 3.795170685485296e-05,
"loss": 0.2961,
"step": 1050000
},
{
"epoch": 26.32,
"learning_rate": 3.719433892719674e-05,
"loss": 0.2993,
"step": 1053000
},
{
"epoch": 26.39,
"learning_rate": 3.6436970999540526e-05,
"loss": 0.2941,
"step": 1056000
},
{
"epoch": 26.47,
"learning_rate": 3.5679603071884316e-05,
"loss": 0.2948,
"step": 1059000
},
{
"epoch": 26.54,
"learning_rate": 3.492223514422809e-05,
"loss": 0.2966,
"step": 1062000
},
{
"epoch": 26.62,
"learning_rate": 3.4164867216571876e-05,
"loss": 0.2964,
"step": 1065000
},
{
"epoch": 26.69,
"learning_rate": 3.3407499288915666e-05,
"loss": 0.2971,
"step": 1068000
},
{
"epoch": 26.77,
"learning_rate": 3.265013136125945e-05,
"loss": 0.2974,
"step": 1071000
},
{
"epoch": 26.84,
"learning_rate": 3.189276343360323e-05,
"loss": 0.2963,
"step": 1074000
},
{
"epoch": 26.92,
"learning_rate": 3.1135395505947016e-05,
"loss": 0.2954,
"step": 1077000
},
{
"epoch": 26.99,
"learning_rate": 3.0378027578290803e-05,
"loss": 0.2941,
"step": 1080000
},
{
"epoch": 27.07,
"learning_rate": 2.9620659650634586e-05,
"loss": 0.2896,
"step": 1083000
},
{
"epoch": 27.14,
"learning_rate": 2.8863291722978373e-05,
"loss": 0.289,
"step": 1086000
},
{
"epoch": 27.22,
"learning_rate": 2.8105923795322156e-05,
"loss": 0.2899,
"step": 1089000
},
{
"epoch": 27.29,
"learning_rate": 2.7348555867665943e-05,
"loss": 0.2878,
"step": 1092000
},
{
"epoch": 27.37,
"learning_rate": 2.6591187940009723e-05,
"loss": 0.2858,
"step": 1095000
},
{
"epoch": 27.44,
"learning_rate": 2.583382001235351e-05,
"loss": 0.2901,
"step": 1098000
},
{
"epoch": 27.52,
"learning_rate": 2.5076452084697293e-05,
"loss": 0.2885,
"step": 1101000
},
{
"epoch": 27.59,
"learning_rate": 2.431908415704108e-05,
"loss": 0.2903,
"step": 1104000
},
{
"epoch": 27.67,
"learning_rate": 2.3561716229384863e-05,
"loss": 0.2904,
"step": 1107000
},
{
"epoch": 27.74,
"learning_rate": 2.280434830172865e-05,
"loss": 0.2869,
"step": 1110000
},
{
"epoch": 27.82,
"learning_rate": 2.2046980374072432e-05,
"loss": 0.2902,
"step": 1113000
},
{
"epoch": 27.89,
"learning_rate": 2.128961244641622e-05,
"loss": 0.2867,
"step": 1116000
},
{
"epoch": 27.97,
"learning_rate": 2.053224451876e-05,
"loss": 0.2869,
"step": 1119000
},
{
"epoch": 28.04,
"learning_rate": 1.9774876591103786e-05,
"loss": 0.2844,
"step": 1122000
},
{
"epoch": 28.12,
"learning_rate": 1.901750866344757e-05,
"loss": 0.283,
"step": 1125000
},
{
"epoch": 28.19,
"learning_rate": 1.8260140735791356e-05,
"loss": 0.2816,
"step": 1128000
},
{
"epoch": 28.27,
"learning_rate": 1.750277280813514e-05,
"loss": 0.2848,
"step": 1131000
},
{
"epoch": 28.34,
"learning_rate": 1.6745404880478922e-05,
"loss": 0.2811,
"step": 1134000
},
{
"epoch": 28.42,
"learning_rate": 1.598803695282271e-05,
"loss": 0.283,
"step": 1137000
},
{
"epoch": 28.49,
"learning_rate": 1.5230669025166492e-05,
"loss": 0.2814,
"step": 1140000
},
{
"epoch": 28.57,
"learning_rate": 1.4473301097510277e-05,
"loss": 0.2793,
"step": 1143000
},
{
"epoch": 28.64,
"learning_rate": 1.3715933169854062e-05,
"loss": 0.2834,
"step": 1146000
},
{
"epoch": 28.72,
"learning_rate": 1.2958565242197847e-05,
"loss": 0.2798,
"step": 1149000
},
{
"epoch": 28.79,
"learning_rate": 1.220119731454163e-05,
"loss": 0.2807,
"step": 1152000
},
{
"epoch": 28.87,
"learning_rate": 1.1443829386885416e-05,
"loss": 0.28,
"step": 1155000
},
{
"epoch": 28.94,
"learning_rate": 1.06864614592292e-05,
"loss": 0.2762,
"step": 1158000
}
],
"logging_steps": 3000,
"max_steps": 1200330,
"num_train_epochs": 30,
"save_steps": 500,
"total_flos": 2.386202875193427e+21,
"trial_name": null,
"trial_params": null
}