|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"eval_steps": 500, |
|
"global_step": 1034010, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00013054830287206266, |
|
"loss": 18.218, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002610966057441253, |
|
"loss": 4.1185, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002990742124651621, |
|
"loss": 3.2662, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00029775542680299976, |
|
"loss": 3.0539, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002964366411408375, |
|
"loss": 2.9401, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002951178554786752, |
|
"loss": 2.8804, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00029379906981651295, |
|
"loss": 2.8248, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002924802841543507, |
|
"loss": 2.7855, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0002911614984921884, |
|
"loss": 2.7762, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002898427128300261, |
|
"loss": 2.7404, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0002885239271678638, |
|
"loss": 2.7029, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00028720514150570154, |
|
"loss": 2.6612, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0002858863558435392, |
|
"loss": 2.6141, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.000284567570181377, |
|
"loss": 2.5986, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00028324878451921467, |
|
"loss": 2.5744, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0002819299988570524, |
|
"loss": 2.5785, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00028061121319489013, |
|
"loss": 2.5643, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0002792924275327278, |
|
"loss": 2.569, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00027797364187056553, |
|
"loss": 2.5455, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00027665485620840326, |
|
"loss": 2.5417, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.000275336070546241, |
|
"loss": 2.5095, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.0002740172848840787, |
|
"loss": 2.5052, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00027269849922191645, |
|
"loss": 2.5086, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.0002713797135597541, |
|
"loss": 2.4391, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00027006092789759185, |
|
"loss": 2.4283, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002687421422354296, |
|
"loss": 2.4079, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.0002674233565732673, |
|
"loss": 2.4065, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00026610457091110504, |
|
"loss": 2.3893, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.00026478578524894277, |
|
"loss": 2.4166, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.00026346699958678044, |
|
"loss": 2.3907, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.00026214821392461817, |
|
"loss": 2.3829, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.0002608294282624559, |
|
"loss": 2.3767, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.00025951064260029363, |
|
"loss": 2.3518, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.00025819185693813136, |
|
"loss": 2.3734, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.0002568730712759691, |
|
"loss": 2.3126, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.00025555428561380676, |
|
"loss": 2.2802, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.0002542354999516445, |
|
"loss": 2.2739, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.0002529167142894822, |
|
"loss": 2.2588, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00025159792862731995, |
|
"loss": 2.2573, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.0002502791429651577, |
|
"loss": 2.2541, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.0002489603573029954, |
|
"loss": 2.2843, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.0002476415716408331, |
|
"loss": 2.2548, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.0002463227859786708, |
|
"loss": 2.252, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.00024500400031650854, |
|
"loss": 2.2677, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.00024368521465434624, |
|
"loss": 2.2497, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00024236642899218397, |
|
"loss": 2.2434, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.0002410476433300217, |
|
"loss": 2.1625, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.0002397288576678594, |
|
"loss": 2.1795, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.00023841007200569713, |
|
"loss": 2.1666, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.00023709128634353483, |
|
"loss": 2.1749, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.00023577250068137256, |
|
"loss": 2.1931, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 0.0002344537150192103, |
|
"loss": 2.1797, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.000233134929357048, |
|
"loss": 2.1573, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.00023181614369488572, |
|
"loss": 2.1811, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.00023049735803272345, |
|
"loss": 2.1539, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 0.00022917857237056115, |
|
"loss": 2.1657, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.00022785978670839888, |
|
"loss": 2.1662, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.0002265410010462366, |
|
"loss": 2.1237, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.0002252222153840743, |
|
"loss": 2.0812, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.00022390342972191204, |
|
"loss": 2.0917, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.00022258464405974977, |
|
"loss": 2.09, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.00022126585839758747, |
|
"loss": 2.0931, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.0002199470727354252, |
|
"loss": 2.0907, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.00021862828707326293, |
|
"loss": 2.0855, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.00021730950141110063, |
|
"loss": 2.0946, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.00021599071574893836, |
|
"loss": 2.0904, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.0002146719300867761, |
|
"loss": 2.0788, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.0002133531444246138, |
|
"loss": 2.0771, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 0.00021203435876245152, |
|
"loss": 2.0746, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.00021071557310028925, |
|
"loss": 2.0091, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.00020939678743812695, |
|
"loss": 2.0144, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.00020807800177596468, |
|
"loss": 2.0083, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.0002067592161138024, |
|
"loss": 2.0337, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 0.0002054404304516401, |
|
"loss": 2.0169, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.00020412164478947784, |
|
"loss": 2.0264, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 0.00020280285912731557, |
|
"loss": 2.0089, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.00020148407346515327, |
|
"loss": 2.0148, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 0.000200165287802991, |
|
"loss": 2.0224, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 0.00019884650214082873, |
|
"loss": 2.0242, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.00019752771647866643, |
|
"loss": 2.0142, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.00019620893081650416, |
|
"loss": 1.9622, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.0001948901451543419, |
|
"loss": 1.9643, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 0.0001935713594921796, |
|
"loss": 1.9589, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.00019225257383001732, |
|
"loss": 1.9411, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.000190933788167855, |
|
"loss": 1.956, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 0.00018961500250569275, |
|
"loss": 1.9596, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 0.00018829621684353048, |
|
"loss": 1.9373, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 0.00018697743118136815, |
|
"loss": 1.9532, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 0.00018565864551920588, |
|
"loss": 1.9669, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.00018433985985704364, |
|
"loss": 1.957, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 0.0001830210741948813, |
|
"loss": 1.977, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.00018170228853271904, |
|
"loss": 1.9712, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 0.0001803835028705568, |
|
"loss": 1.8922, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 0.00017906471720839447, |
|
"loss": 1.9054, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 0.0001777459315462322, |
|
"loss": 1.8847, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 0.00017642714588406993, |
|
"loss": 1.896, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 0.00017510836022190763, |
|
"loss": 1.9139, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 0.00017378957455974536, |
|
"loss": 1.9086, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 0.0001724707888975831, |
|
"loss": 1.9158, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 0.0001711520032354208, |
|
"loss": 1.908, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 0.00016983321757325852, |
|
"loss": 1.9034, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 0.00016851443191109625, |
|
"loss": 1.9022, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 0.00016719564624893395, |
|
"loss": 1.9045, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 0.00016587686058677168, |
|
"loss": 1.8607, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 0.0001645580749246094, |
|
"loss": 1.8439, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 0.0001632392892624471, |
|
"loss": 1.8252, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 0.00016192050360028484, |
|
"loss": 1.844, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 0.00016060171793812257, |
|
"loss": 1.839, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 0.00015928293227596027, |
|
"loss": 1.8442, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 0.000157964146613798, |
|
"loss": 1.8378, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 0.00015664536095163573, |
|
"loss": 1.8436, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 0.00015532657528947343, |
|
"loss": 1.8399, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 0.00015400778962731116, |
|
"loss": 1.8357, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 0.0001526890039651489, |
|
"loss": 1.8406, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 0.0001513702183029866, |
|
"loss": 1.8385, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 0.00015005143264082432, |
|
"loss": 1.7886, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 0.00014873264697866202, |
|
"loss": 1.7909, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 0.00014741386131649975, |
|
"loss": 1.794, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"learning_rate": 0.00014609507565433748, |
|
"loss": 1.7712, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 0.00014477628999217518, |
|
"loss": 1.7875, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 0.0001434575043300129, |
|
"loss": 1.786, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 0.00014213871866785064, |
|
"loss": 1.7925, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 0.00014081993300568834, |
|
"loss": 1.7875, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 10.79, |
|
"learning_rate": 0.00013950114734352607, |
|
"loss": 1.7752, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 0.0001381823616813638, |
|
"loss": 1.791, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 0.0001368635760192015, |
|
"loss": 1.7857, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 0.00013554479035703923, |
|
"loss": 1.7407, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 0.00013422600469487693, |
|
"loss": 1.7115, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 0.00013290721903271466, |
|
"loss": 1.7294, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 0.0001315884333705524, |
|
"loss": 1.7205, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 0.0001302696477083901, |
|
"loss": 1.7281, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 0.00012895086204622782, |
|
"loss": 1.7418, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 0.00012763207638406555, |
|
"loss": 1.7316, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"learning_rate": 0.00012631329072190325, |
|
"loss": 1.7348, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 11.75, |
|
"learning_rate": 0.00012499450505974098, |
|
"loss": 1.7392, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 0.0001236757193975787, |
|
"loss": 1.7341, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 11.92, |
|
"learning_rate": 0.0001223569337354164, |
|
"loss": 1.7347, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 0.00012103814807325414, |
|
"loss": 1.7196, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 0.00011971936241109186, |
|
"loss": 1.6614, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 12.19, |
|
"learning_rate": 0.00011840057674892957, |
|
"loss": 1.6671, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 0.0001170817910867673, |
|
"loss": 1.6665, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 0.00011576300542460502, |
|
"loss": 1.6775, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 0.00011444421976244273, |
|
"loss": 1.6646, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 0.00011312543410028046, |
|
"loss": 1.6779, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"learning_rate": 0.00011180664843811818, |
|
"loss": 1.6802, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 0.00011048786277595589, |
|
"loss": 1.6759, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 0.0001091690771137936, |
|
"loss": 1.6729, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"learning_rate": 0.00010785029145163134, |
|
"loss": 1.6769, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 0.00010653150578946905, |
|
"loss": 1.6721, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 0.00010521272012730677, |
|
"loss": 1.6302, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 0.0001038939344651445, |
|
"loss": 1.6112, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 13.23, |
|
"learning_rate": 0.00010257514880298221, |
|
"loss": 1.5991, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"learning_rate": 0.00010125636314081991, |
|
"loss": 1.6221, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 9.993757747865765e-05, |
|
"loss": 1.6168, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"learning_rate": 9.861879181649536e-05, |
|
"loss": 1.6166, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 13.58, |
|
"learning_rate": 9.730000615433307e-05, |
|
"loss": 1.6234, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 9.59812204921708e-05, |
|
"loss": 1.6245, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 9.466243483000852e-05, |
|
"loss": 1.618, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 13.84, |
|
"learning_rate": 9.334364916784623e-05, |
|
"loss": 1.6341, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"learning_rate": 9.202486350568396e-05, |
|
"loss": 1.6353, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 9.070607784352168e-05, |
|
"loss": 1.6104, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 8.938729218135939e-05, |
|
"loss": 1.562, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 8.806850651919711e-05, |
|
"loss": 1.5654, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 14.27, |
|
"learning_rate": 8.674972085703484e-05, |
|
"loss": 1.564, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"learning_rate": 8.543093519487255e-05, |
|
"loss": 1.5606, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 14.45, |
|
"learning_rate": 8.411214953271027e-05, |
|
"loss": 1.5764, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 14.54, |
|
"learning_rate": 8.2793363870548e-05, |
|
"loss": 1.5718, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"learning_rate": 8.147457820838571e-05, |
|
"loss": 1.5768, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 8.015579254622343e-05, |
|
"loss": 1.5686, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 7.883700688406116e-05, |
|
"loss": 1.5753, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"learning_rate": 7.751822122189887e-05, |
|
"loss": 1.5588, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 7.619943555973659e-05, |
|
"loss": 1.5588, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 15.06, |
|
"learning_rate": 7.48806498975743e-05, |
|
"loss": 1.5413, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 15.14, |
|
"learning_rate": 7.356186423541203e-05, |
|
"loss": 1.5102, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 7.224307857324975e-05, |
|
"loss": 1.5183, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 7.092429291108746e-05, |
|
"loss": 1.5267, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 6.960550724892519e-05, |
|
"loss": 1.5185, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 6.828672158676289e-05, |
|
"loss": 1.5195, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 6.696793592460062e-05, |
|
"loss": 1.5154, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 6.564915026243834e-05, |
|
"loss": 1.5312, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 15.75, |
|
"learning_rate": 6.433036460027605e-05, |
|
"loss": 1.5191, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"learning_rate": 6.301157893811378e-05, |
|
"loss": 1.5209, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 6.16927932759515e-05, |
|
"loss": 1.5145, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 6.037400761378922e-05, |
|
"loss": 1.5147, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 5.905522195162694e-05, |
|
"loss": 1.47, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 16.19, |
|
"learning_rate": 5.773643628946465e-05, |
|
"loss": 1.4766, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"learning_rate": 5.641765062730237e-05, |
|
"loss": 1.4688, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 5.5098864965140094e-05, |
|
"loss": 1.4727, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 16.45, |
|
"learning_rate": 5.378007930297781e-05, |
|
"loss": 1.4673, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 16.54, |
|
"learning_rate": 5.246129364081553e-05, |
|
"loss": 1.4728, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"learning_rate": 5.1142507978653254e-05, |
|
"loss": 1.4751, |
|
"step": 573000 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"learning_rate": 4.982372231649097e-05, |
|
"loss": 1.4652, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 4.850493665432869e-05, |
|
"loss": 1.4783, |
|
"step": 579000 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"learning_rate": 4.718615099216641e-05, |
|
"loss": 1.4678, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 16.97, |
|
"learning_rate": 4.586736533000413e-05, |
|
"loss": 1.4717, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 17.06, |
|
"learning_rate": 4.454857966784185e-05, |
|
"loss": 1.4422, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 17.15, |
|
"learning_rate": 4.322979400567956e-05, |
|
"loss": 1.4209, |
|
"step": 591000 |
|
}, |
|
{ |
|
"epoch": 17.23, |
|
"learning_rate": 4.191100834351728e-05, |
|
"loss": 1.4398, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 17.32, |
|
"learning_rate": 4.0592222681355005e-05, |
|
"loss": 1.4331, |
|
"step": 597000 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 3.927343701919272e-05, |
|
"loss": 1.4253, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 17.49, |
|
"learning_rate": 3.795465135703044e-05, |
|
"loss": 1.4247, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 17.58, |
|
"learning_rate": 3.6635865694868164e-05, |
|
"loss": 1.432, |
|
"step": 606000 |
|
}, |
|
{ |
|
"epoch": 17.67, |
|
"learning_rate": 3.531708003270588e-05, |
|
"loss": 1.4275, |
|
"step": 609000 |
|
}, |
|
{ |
|
"epoch": 17.76, |
|
"learning_rate": 3.39982943705436e-05, |
|
"loss": 1.4305, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 17.84, |
|
"learning_rate": 3.267950870838132e-05, |
|
"loss": 1.4237, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 17.93, |
|
"learning_rate": 3.136072304621904e-05, |
|
"loss": 1.4339, |
|
"step": 618000 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 3.0041937384056755e-05, |
|
"loss": 1.4083, |
|
"step": 621000 |
|
}, |
|
{ |
|
"epoch": 18.1, |
|
"learning_rate": 2.8723151721894477e-05, |
|
"loss": 1.383, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"learning_rate": 2.7404366059732196e-05, |
|
"loss": 1.3949, |
|
"step": 627000 |
|
}, |
|
{ |
|
"epoch": 18.28, |
|
"learning_rate": 2.6085580397569915e-05, |
|
"loss": 1.3857, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 18.37, |
|
"learning_rate": 2.4766794735407634e-05, |
|
"loss": 1.4049, |
|
"step": 633000 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"learning_rate": 2.3448009073245356e-05, |
|
"loss": 1.3972, |
|
"step": 636000 |
|
}, |
|
{ |
|
"epoch": 18.54, |
|
"learning_rate": 2.2129223411083075e-05, |
|
"loss": 1.3983, |
|
"step": 639000 |
|
}, |
|
{ |
|
"epoch": 18.63, |
|
"learning_rate": 2.081043774892079e-05, |
|
"loss": 1.3907, |
|
"step": 642000 |
|
}, |
|
{ |
|
"epoch": 18.71, |
|
"learning_rate": 1.949165208675851e-05, |
|
"loss": 1.3888, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 1.817286642459623e-05, |
|
"loss": 1.3762, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 18.89, |
|
"learning_rate": 1.685408076243395e-05, |
|
"loss": 1.3926, |
|
"step": 651000 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"learning_rate": 1.553529510027167e-05, |
|
"loss": 1.3874, |
|
"step": 654000 |
|
}, |
|
{ |
|
"epoch": 19.06, |
|
"learning_rate": 1.4216509438109387e-05, |
|
"loss": 1.375, |
|
"step": 657000 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"learning_rate": 1.2897723775947106e-05, |
|
"loss": 1.3655, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 19.24, |
|
"learning_rate": 1.1578938113784827e-05, |
|
"loss": 1.3625, |
|
"step": 663000 |
|
}, |
|
{ |
|
"epoch": 19.32, |
|
"learning_rate": 1.0260152451622544e-05, |
|
"loss": 1.3705, |
|
"step": 666000 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 8.941366789460264e-06, |
|
"loss": 1.3672, |
|
"step": 669000 |
|
}, |
|
{ |
|
"epoch": 19.5, |
|
"learning_rate": 7.622581127297983e-06, |
|
"loss": 1.3545, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 19.58, |
|
"learning_rate": 6.303795465135703e-06, |
|
"loss": 1.3611, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"learning_rate": 4.985009802973422e-06, |
|
"loss": 1.3574, |
|
"step": 678000 |
|
}, |
|
{ |
|
"epoch": 19.76, |
|
"learning_rate": 3.666224140811141e-06, |
|
"loss": 1.3673, |
|
"step": 681000 |
|
}, |
|
{ |
|
"epoch": 19.85, |
|
"learning_rate": 2.34743847864886e-06, |
|
"loss": 1.3629, |
|
"step": 684000 |
|
}, |
|
{ |
|
"epoch": 19.93, |
|
"learning_rate": 1.028652816486579e-06, |
|
"loss": 1.3694, |
|
"step": 687000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 689340, |
|
"total_flos": 1.193067231326117e+21, |
|
"train_loss": 1.952947931640795, |
|
"train_runtime": 385301.7422, |
|
"train_samples_per_second": 28.625, |
|
"train_steps_per_second": 1.789 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 689340, |
|
"total_flos": 1.193067231326117e+21, |
|
"train_loss": 0.0, |
|
"train_runtime": 10.1094, |
|
"train_samples_per_second": 272749.588, |
|
"train_steps_per_second": 17047.066 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"learning_rate": 1.027864919821501e-06, |
|
"loss": 1.3347, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 20.11, |
|
"learning_rate": 1.0242835713438735e-06, |
|
"loss": 1.3627, |
|
"step": 693000 |
|
}, |
|
{ |
|
"epoch": 20.19, |
|
"learning_rate": 1.0207022228662458e-06, |
|
"loss": 1.3556, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 20.28, |
|
"learning_rate": 1.0171208743886182e-06, |
|
"loss": 1.3565, |
|
"step": 699000 |
|
}, |
|
{ |
|
"epoch": 20.37, |
|
"learning_rate": 1.0135395259109907e-06, |
|
"loss": 1.3619, |
|
"step": 702000 |
|
}, |
|
{ |
|
"epoch": 20.45, |
|
"learning_rate": 1.009958177433363e-06, |
|
"loss": 1.3536, |
|
"step": 705000 |
|
}, |
|
{ |
|
"epoch": 20.54, |
|
"learning_rate": 1.0063768289557357e-06, |
|
"loss": 1.3481, |
|
"step": 708000 |
|
}, |
|
{ |
|
"epoch": 20.63, |
|
"learning_rate": 1.002795480478108e-06, |
|
"loss": 1.348, |
|
"step": 711000 |
|
}, |
|
{ |
|
"epoch": 20.72, |
|
"learning_rate": 9.992141320004804e-07, |
|
"loss": 1.3539, |
|
"step": 714000 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 9.956327835228528e-07, |
|
"loss": 1.365, |
|
"step": 717000 |
|
}, |
|
{ |
|
"epoch": 20.89, |
|
"learning_rate": 9.920514350452253e-07, |
|
"loss": 1.3384, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 20.98, |
|
"learning_rate": 9.884700865675977e-07, |
|
"loss": 1.3558, |
|
"step": 723000 |
|
}, |
|
{ |
|
"epoch": 21.06, |
|
"learning_rate": 9.848887380899703e-07, |
|
"loss": 1.3388, |
|
"step": 726000 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"learning_rate": 9.813073896123426e-07, |
|
"loss": 1.3527, |
|
"step": 729000 |
|
}, |
|
{ |
|
"epoch": 21.24, |
|
"learning_rate": 9.77726041134715e-07, |
|
"loss": 1.3458, |
|
"step": 732000 |
|
}, |
|
{ |
|
"epoch": 21.32, |
|
"learning_rate": 9.741446926570875e-07, |
|
"loss": 1.3496, |
|
"step": 735000 |
|
}, |
|
{ |
|
"epoch": 21.41, |
|
"learning_rate": 9.7056334417946e-07, |
|
"loss": 1.3294, |
|
"step": 738000 |
|
}, |
|
{ |
|
"epoch": 21.5, |
|
"learning_rate": 9.669819957018323e-07, |
|
"loss": 1.3606, |
|
"step": 741000 |
|
}, |
|
{ |
|
"epoch": 21.59, |
|
"learning_rate": 9.634006472242046e-07, |
|
"loss": 1.3573, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"learning_rate": 9.598192987465772e-07, |
|
"loss": 1.3445, |
|
"step": 747000 |
|
}, |
|
{ |
|
"epoch": 21.76, |
|
"learning_rate": 9.562379502689496e-07, |
|
"loss": 1.3407, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 21.85, |
|
"learning_rate": 9.52656601791322e-07, |
|
"loss": 1.3646, |
|
"step": 753000 |
|
}, |
|
{ |
|
"epoch": 21.93, |
|
"learning_rate": 9.490752533136945e-07, |
|
"loss": 1.3589, |
|
"step": 756000 |
|
}, |
|
{ |
|
"epoch": 22.02, |
|
"learning_rate": 9.454939048360669e-07, |
|
"loss": 1.3504, |
|
"step": 759000 |
|
}, |
|
{ |
|
"epoch": 22.11, |
|
"learning_rate": 9.419125563584393e-07, |
|
"loss": 1.3435, |
|
"step": 762000 |
|
}, |
|
{ |
|
"epoch": 22.2, |
|
"learning_rate": 9.383312078808117e-07, |
|
"loss": 1.3433, |
|
"step": 765000 |
|
}, |
|
{ |
|
"epoch": 22.28, |
|
"learning_rate": 9.347498594031841e-07, |
|
"loss": 1.35, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 22.37, |
|
"learning_rate": 9.311685109255566e-07, |
|
"loss": 1.3595, |
|
"step": 771000 |
|
}, |
|
{ |
|
"epoch": 22.46, |
|
"learning_rate": 9.275871624479291e-07, |
|
"loss": 1.3518, |
|
"step": 774000 |
|
}, |
|
{ |
|
"epoch": 22.54, |
|
"learning_rate": 9.240058139703014e-07, |
|
"loss": 1.3451, |
|
"step": 777000 |
|
}, |
|
{ |
|
"epoch": 22.63, |
|
"learning_rate": 9.204244654926739e-07, |
|
"loss": 1.3378, |
|
"step": 780000 |
|
}, |
|
{ |
|
"epoch": 22.72, |
|
"learning_rate": 9.168431170150462e-07, |
|
"loss": 1.3374, |
|
"step": 783000 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"learning_rate": 9.132617685374188e-07, |
|
"loss": 1.3521, |
|
"step": 786000 |
|
}, |
|
{ |
|
"epoch": 22.89, |
|
"learning_rate": 9.096804200597912e-07, |
|
"loss": 1.3519, |
|
"step": 789000 |
|
}, |
|
{ |
|
"epoch": 22.98, |
|
"learning_rate": 9.060990715821636e-07, |
|
"loss": 1.3381, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 23.07, |
|
"learning_rate": 9.02517723104536e-07, |
|
"loss": 1.3294, |
|
"step": 795000 |
|
}, |
|
{ |
|
"epoch": 23.15, |
|
"learning_rate": 8.989363746269084e-07, |
|
"loss": 1.3473, |
|
"step": 798000 |
|
}, |
|
{ |
|
"epoch": 23.24, |
|
"learning_rate": 8.953550261492809e-07, |
|
"loss": 1.3453, |
|
"step": 801000 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"learning_rate": 8.917736776716533e-07, |
|
"loss": 1.3416, |
|
"step": 804000 |
|
}, |
|
{ |
|
"epoch": 23.41, |
|
"learning_rate": 8.881923291940258e-07, |
|
"loss": 1.3383, |
|
"step": 807000 |
|
}, |
|
{ |
|
"epoch": 23.5, |
|
"learning_rate": 8.846109807163981e-07, |
|
"loss": 1.3419, |
|
"step": 810000 |
|
}, |
|
{ |
|
"epoch": 23.59, |
|
"learning_rate": 8.810296322387707e-07, |
|
"loss": 1.3607, |
|
"step": 813000 |
|
}, |
|
{ |
|
"epoch": 23.67, |
|
"learning_rate": 8.77448283761143e-07, |
|
"loss": 1.3438, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 23.76, |
|
"learning_rate": 8.738669352835155e-07, |
|
"loss": 1.3464, |
|
"step": 819000 |
|
}, |
|
{ |
|
"epoch": 23.85, |
|
"learning_rate": 8.702855868058879e-07, |
|
"loss": 1.346, |
|
"step": 822000 |
|
}, |
|
{ |
|
"epoch": 23.94, |
|
"learning_rate": 8.667042383282602e-07, |
|
"loss": 1.3413, |
|
"step": 825000 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 8.631228898506328e-07, |
|
"loss": 1.3386, |
|
"step": 828000 |
|
}, |
|
{ |
|
"epoch": 24.11, |
|
"learning_rate": 8.595415413730052e-07, |
|
"loss": 1.3488, |
|
"step": 831000 |
|
}, |
|
{ |
|
"epoch": 24.2, |
|
"learning_rate": 8.559601928953776e-07, |
|
"loss": 1.3415, |
|
"step": 834000 |
|
}, |
|
{ |
|
"epoch": 24.28, |
|
"learning_rate": 8.5237884441775e-07, |
|
"loss": 1.3466, |
|
"step": 837000 |
|
}, |
|
{ |
|
"epoch": 24.37, |
|
"learning_rate": 8.487974959401224e-07, |
|
"loss": 1.3382, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 24.46, |
|
"learning_rate": 8.452161474624949e-07, |
|
"loss": 1.3346, |
|
"step": 843000 |
|
}, |
|
{ |
|
"epoch": 24.55, |
|
"learning_rate": 8.416347989848674e-07, |
|
"loss": 1.3412, |
|
"step": 846000 |
|
}, |
|
{ |
|
"epoch": 24.63, |
|
"learning_rate": 8.380534505072397e-07, |
|
"loss": 1.3498, |
|
"step": 849000 |
|
}, |
|
{ |
|
"epoch": 24.72, |
|
"learning_rate": 8.344721020296122e-07, |
|
"loss": 1.3344, |
|
"step": 852000 |
|
}, |
|
{ |
|
"epoch": 24.81, |
|
"learning_rate": 8.308907535519846e-07, |
|
"loss": 1.3361, |
|
"step": 855000 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"learning_rate": 8.27309405074357e-07, |
|
"loss": 1.3399, |
|
"step": 858000 |
|
}, |
|
{ |
|
"epoch": 24.98, |
|
"learning_rate": 8.237280565967295e-07, |
|
"loss": 1.3509, |
|
"step": 861000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"step": 861675, |
|
"total_flos": 1.491337883187736e+21, |
|
"train_loss": 0.2694074709793743, |
|
"train_runtime": 96468.0857, |
|
"train_samples_per_second": 142.914, |
|
"train_steps_per_second": 8.932 |
|
}, |
|
{ |
|
"epoch": 25.07, |
|
"learning_rate": 1.000540667186059e-06, |
|
"loss": 1.3464, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 25.15, |
|
"learning_rate": 9.976312306801892e-07, |
|
"loss": 1.3314, |
|
"step": 867000 |
|
}, |
|
{ |
|
"epoch": 25.24, |
|
"learning_rate": 9.947217941743196e-07, |
|
"loss": 1.3447, |
|
"step": 870000 |
|
}, |
|
{ |
|
"epoch": 25.33, |
|
"learning_rate": 9.9181235766845e-07, |
|
"loss": 1.3379, |
|
"step": 873000 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"learning_rate": 9.889029211625802e-07, |
|
"loss": 1.3397, |
|
"step": 876000 |
|
}, |
|
{ |
|
"epoch": 25.5, |
|
"learning_rate": 9.859934846567106e-07, |
|
"loss": 1.3361, |
|
"step": 879000 |
|
}, |
|
{ |
|
"epoch": 25.59, |
|
"learning_rate": 9.830840481508408e-07, |
|
"loss": 1.3312, |
|
"step": 882000 |
|
}, |
|
{ |
|
"epoch": 25.68, |
|
"learning_rate": 9.801746116449712e-07, |
|
"loss": 1.3292, |
|
"step": 885000 |
|
}, |
|
{ |
|
"epoch": 25.76, |
|
"learning_rate": 9.772651751391016e-07, |
|
"loss": 1.3391, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 25.85, |
|
"learning_rate": 9.743557386332318e-07, |
|
"loss": 1.3443, |
|
"step": 891000 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"learning_rate": 9.71446302127362e-07, |
|
"loss": 1.3419, |
|
"step": 894000 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"learning_rate": 9.685368656214923e-07, |
|
"loss": 1.3315, |
|
"step": 897000 |
|
}, |
|
{ |
|
"epoch": 26.11, |
|
"learning_rate": 9.656274291156227e-07, |
|
"loss": 1.3189, |
|
"step": 900000 |
|
}, |
|
{ |
|
"epoch": 26.2, |
|
"learning_rate": 9.62717992609753e-07, |
|
"loss": 1.3396, |
|
"step": 903000 |
|
}, |
|
{ |
|
"epoch": 26.29, |
|
"learning_rate": 9.598085561038833e-07, |
|
"loss": 1.3383, |
|
"step": 906000 |
|
}, |
|
{ |
|
"epoch": 26.37, |
|
"learning_rate": 9.568991195980135e-07, |
|
"loss": 1.3385, |
|
"step": 909000 |
|
}, |
|
{ |
|
"epoch": 26.46, |
|
"learning_rate": 9.539896830921439e-07, |
|
"loss": 1.3352, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"learning_rate": 9.510802465862742e-07, |
|
"loss": 1.3363, |
|
"step": 915000 |
|
}, |
|
{ |
|
"epoch": 26.63, |
|
"learning_rate": 9.481708100804045e-07, |
|
"loss": 1.3416, |
|
"step": 918000 |
|
}, |
|
{ |
|
"epoch": 26.72, |
|
"learning_rate": 9.452613735745349e-07, |
|
"loss": 1.3406, |
|
"step": 921000 |
|
}, |
|
{ |
|
"epoch": 26.81, |
|
"learning_rate": 9.42351937068665e-07, |
|
"loss": 1.3345, |
|
"step": 924000 |
|
}, |
|
{ |
|
"epoch": 26.9, |
|
"learning_rate": 9.394425005627954e-07, |
|
"loss": 1.3428, |
|
"step": 927000 |
|
}, |
|
{ |
|
"epoch": 26.98, |
|
"learning_rate": 9.365330640569257e-07, |
|
"loss": 1.3371, |
|
"step": 930000 |
|
}, |
|
{ |
|
"epoch": 27.07, |
|
"learning_rate": 9.33623627551056e-07, |
|
"loss": 1.3274, |
|
"step": 933000 |
|
}, |
|
{ |
|
"epoch": 27.16, |
|
"learning_rate": 9.307141910451863e-07, |
|
"loss": 1.3417, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 9.278047545393166e-07, |
|
"loss": 1.342, |
|
"step": 939000 |
|
}, |
|
{ |
|
"epoch": 27.33, |
|
"learning_rate": 9.24895318033447e-07, |
|
"loss": 1.3162, |
|
"step": 942000 |
|
}, |
|
{ |
|
"epoch": 27.42, |
|
"learning_rate": 9.219858815275773e-07, |
|
"loss": 1.3529, |
|
"step": 945000 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"learning_rate": 9.190764450217075e-07, |
|
"loss": 1.3195, |
|
"step": 948000 |
|
}, |
|
{ |
|
"epoch": 27.59, |
|
"learning_rate": 9.161670085158379e-07, |
|
"loss": 1.3346, |
|
"step": 951000 |
|
}, |
|
{ |
|
"epoch": 27.68, |
|
"learning_rate": 9.132575720099681e-07, |
|
"loss": 1.3384, |
|
"step": 954000 |
|
}, |
|
{ |
|
"epoch": 27.77, |
|
"learning_rate": 9.103481355040984e-07, |
|
"loss": 1.327, |
|
"step": 957000 |
|
}, |
|
{ |
|
"epoch": 27.85, |
|
"learning_rate": 9.074386989982288e-07, |
|
"loss": 1.3211, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 27.94, |
|
"learning_rate": 9.04529262492359e-07, |
|
"loss": 1.3456, |
|
"step": 963000 |
|
}, |
|
{ |
|
"epoch": 28.03, |
|
"learning_rate": 9.016198259864894e-07, |
|
"loss": 1.3251, |
|
"step": 966000 |
|
}, |
|
{ |
|
"epoch": 28.11, |
|
"learning_rate": 8.987103894806197e-07, |
|
"loss": 1.3312, |
|
"step": 969000 |
|
}, |
|
{ |
|
"epoch": 28.2, |
|
"learning_rate": 8.9580095297475e-07, |
|
"loss": 1.329, |
|
"step": 972000 |
|
}, |
|
{ |
|
"epoch": 28.29, |
|
"learning_rate": 8.928915164688803e-07, |
|
"loss": 1.3289, |
|
"step": 975000 |
|
}, |
|
{ |
|
"epoch": 28.37, |
|
"learning_rate": 8.899820799630106e-07, |
|
"loss": 1.3237, |
|
"step": 978000 |
|
}, |
|
{ |
|
"epoch": 28.46, |
|
"learning_rate": 8.870726434571408e-07, |
|
"loss": 1.3329, |
|
"step": 981000 |
|
}, |
|
{ |
|
"epoch": 28.55, |
|
"learning_rate": 8.841632069512712e-07, |
|
"loss": 1.3461, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 28.64, |
|
"learning_rate": 8.812537704454014e-07, |
|
"loss": 1.3391, |
|
"step": 987000 |
|
}, |
|
{ |
|
"epoch": 28.72, |
|
"learning_rate": 8.783443339395318e-07, |
|
"loss": 1.3301, |
|
"step": 990000 |
|
}, |
|
{ |
|
"epoch": 28.81, |
|
"learning_rate": 8.754348974336621e-07, |
|
"loss": 1.3316, |
|
"step": 993000 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"learning_rate": 8.725254609277924e-07, |
|
"loss": 1.3266, |
|
"step": 996000 |
|
}, |
|
{ |
|
"epoch": 28.98, |
|
"learning_rate": 8.696160244219228e-07, |
|
"loss": 1.3265, |
|
"step": 999000 |
|
}, |
|
{ |
|
"epoch": 29.07, |
|
"learning_rate": 8.66706587916053e-07, |
|
"loss": 1.3303, |
|
"step": 1002000 |
|
}, |
|
{ |
|
"epoch": 29.16, |
|
"learning_rate": 8.637971514101833e-07, |
|
"loss": 1.3281, |
|
"step": 1005000 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"learning_rate": 8.608877149043137e-07, |
|
"loss": 1.3293, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"learning_rate": 8.579782783984439e-07, |
|
"loss": 1.3437, |
|
"step": 1011000 |
|
}, |
|
{ |
|
"epoch": 29.42, |
|
"learning_rate": 8.550688418925742e-07, |
|
"loss": 1.3268, |
|
"step": 1014000 |
|
}, |
|
{ |
|
"epoch": 29.51, |
|
"learning_rate": 8.521594053867045e-07, |
|
"loss": 1.3293, |
|
"step": 1017000 |
|
}, |
|
{ |
|
"epoch": 29.59, |
|
"learning_rate": 8.492499688808348e-07, |
|
"loss": 1.3257, |
|
"step": 1020000 |
|
}, |
|
{ |
|
"epoch": 29.68, |
|
"learning_rate": 8.463405323749652e-07, |
|
"loss": 1.3142, |
|
"step": 1023000 |
|
}, |
|
{ |
|
"epoch": 29.77, |
|
"learning_rate": 8.434310958690955e-07, |
|
"loss": 1.319, |
|
"step": 1026000 |
|
}, |
|
{ |
|
"epoch": 29.85, |
|
"learning_rate": 8.405216593632257e-07, |
|
"loss": 1.3267, |
|
"step": 1029000 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"learning_rate": 8.376122228573561e-07, |
|
"loss": 1.3358, |
|
"step": 1032000 |
|
} |
|
], |
|
"logging_steps": 3000, |
|
"max_steps": 1034010, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 1.789602135135767e+21, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|