|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 29.99962510778151, |
|
"eval_steps": 500, |
|
"global_step": 1200330, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.497500833055648e-05, |
|
"loss": 11.5654, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00014995001666111296, |
|
"loss": 1.9313, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002249250249916694, |
|
"loss": 1.3886, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002999000333222259, |
|
"loss": 1.3023, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002992436418962473, |
|
"loss": 1.2746, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002984862739685911, |
|
"loss": 1.2224, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00029772890604093486, |
|
"loss": 1.1809, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00029697153811327864, |
|
"loss": 1.1405, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002962141701856224, |
|
"loss": 1.1178, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0002954568022579662, |
|
"loss": 1.0984, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00029469943433031, |
|
"loss": 1.0659, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0002939420664026538, |
|
"loss": 1.0466, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0002931846984749976, |
|
"loss": 1.0377, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00029242733054734134, |
|
"loss": 1.005, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0002916699626196851, |
|
"loss": 0.9926, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00029091259469202896, |
|
"loss": 0.9863, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0002901552267643727, |
|
"loss": 0.9723, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00028939785883671653, |
|
"loss": 0.9571, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0002886404909090603, |
|
"loss": 0.933, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00028788312298140404, |
|
"loss": 0.9296, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0002871257550537479, |
|
"loss": 0.9255, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00028636838712609166, |
|
"loss": 0.9173, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0002856110191984354, |
|
"loss": 0.9063, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00028485365127077923, |
|
"loss": 0.9004, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.000284096283343123, |
|
"loss": 0.8997, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00028333891541546674, |
|
"loss": 0.8943, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.0002825815474878106, |
|
"loss": 0.8797, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00028182417956015436, |
|
"loss": 0.8501, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0002810668116324981, |
|
"loss": 0.8479, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.00028030944370484193, |
|
"loss": 0.8451, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0002795520757771857, |
|
"loss": 0.8315, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002787947078495295, |
|
"loss": 0.8353, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.0002780373399218733, |
|
"loss": 0.8271, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.00027727997199421706, |
|
"loss": 0.8129, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.00027652260406656084, |
|
"loss": 0.8125, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002757652361389046, |
|
"loss": 0.8119, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0002750078682112484, |
|
"loss": 0.8, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.0002742505002835922, |
|
"loss": 0.7979, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.000273493132355936, |
|
"loss": 0.7914, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.00027273576442827976, |
|
"loss": 0.7916, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.00027197839650062354, |
|
"loss": 0.7542, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.0002712210285729673, |
|
"loss": 0.7643, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.0002704636606453111, |
|
"loss": 0.7576, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0002697062927176549, |
|
"loss": 0.7551, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.0002689489247899987, |
|
"loss": 0.7537, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.00026819155686234246, |
|
"loss": 0.7536, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.00026743418893468624, |
|
"loss": 0.7466, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.00026667682100703, |
|
"loss": 0.7488, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.0002659194530793738, |
|
"loss": 0.7445, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.0002651620851517176, |
|
"loss": 0.7396, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.0002644047172240614, |
|
"loss": 0.7387, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.00026364734929640516, |
|
"loss": 0.7324, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 0.00026288998136874894, |
|
"loss": 0.7297, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.0002621326134410928, |
|
"loss": 0.7069, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.0002613752455134365, |
|
"loss": 0.7001, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.0002606178775857803, |
|
"loss": 0.6945, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.00025986050965812413, |
|
"loss": 0.7017, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.00025910314173046786, |
|
"loss": 0.7018, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.00025834577380281164, |
|
"loss": 0.6977, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0002575884058751555, |
|
"loss": 0.6923, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.0002568310379474992, |
|
"loss": 0.6972, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 0.000256073670019843, |
|
"loss": 0.685, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.00025531630209218683, |
|
"loss": 0.6857, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.00025455893416453056, |
|
"loss": 0.6846, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 0.00025380156623687434, |
|
"loss": 0.6848, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.0002530441983092182, |
|
"loss": 0.6783, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.00025228683038156196, |
|
"loss": 0.6676, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.0002515294624539057, |
|
"loss": 0.6514, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.00025077209452624953, |
|
"loss": 0.6469, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 0.0002500147265985933, |
|
"loss": 0.6483, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.00024925735867093704, |
|
"loss": 0.6525, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.0002484999907432809, |
|
"loss": 0.6487, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.00024774262281562466, |
|
"loss": 0.6495, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 0.00024698525488796845, |
|
"loss": 0.645, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 0.00024622788696031223, |
|
"loss": 0.6517, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.000245470519032656, |
|
"loss": 0.6459, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 0.0002447131511049998, |
|
"loss": 0.6451, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.00024395578317734358, |
|
"loss": 0.6412, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.00024319841524968736, |
|
"loss": 0.6457, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00024244104732203112, |
|
"loss": 0.64, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.00024168367939437493, |
|
"loss": 0.6137, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.0002409263114667187, |
|
"loss": 0.6131, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.0002401689435390625, |
|
"loss": 0.6153, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.00023941157561140628, |
|
"loss": 0.6193, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.00023865420768375006, |
|
"loss": 0.6161, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.00023789683975609387, |
|
"loss": 0.614, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00023713947182843763, |
|
"loss": 0.6142, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.0002363821039007814, |
|
"loss": 0.6181, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.00023562473597312522, |
|
"loss": 0.6098, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 0.00023486736804546898, |
|
"loss": 0.6165, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 0.00023411000011781276, |
|
"loss": 0.612, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 0.00023335263219015657, |
|
"loss": 0.6109, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 0.00023259526426250033, |
|
"loss": 0.6141, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.0002318378963348441, |
|
"loss": 0.5912, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 0.00023108052840718792, |
|
"loss": 0.5838, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.00023032316047953168, |
|
"loss": 0.5823, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.00022956579255187546, |
|
"loss": 0.5836, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.00022880842462421927, |
|
"loss": 0.5857, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 0.00022805105669656303, |
|
"loss": 0.5872, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.0002272936887689068, |
|
"loss": 0.59, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 0.00022653632084125062, |
|
"loss": 0.5808, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 0.0002257789529135944, |
|
"loss": 0.5826, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 0.00022502158498593816, |
|
"loss": 0.5813, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 0.00022426421705828197, |
|
"loss": 0.5883, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 0.00022350684913062576, |
|
"loss": 0.5852, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 0.00022274948120296954, |
|
"loss": 0.5824, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 0.00022199211327531332, |
|
"loss": 0.575, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 0.0002212347453476571, |
|
"loss": 0.5561, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 0.0002204773774200009, |
|
"loss": 0.5596, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 0.00021972000949234467, |
|
"loss": 0.5603, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 0.00021896264156468846, |
|
"loss": 0.5575, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.00021820527363703224, |
|
"loss": 0.5545, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.00021744790570937602, |
|
"loss": 0.5629, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 0.00021669053778171983, |
|
"loss": 0.5572, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 0.0002159331698540636, |
|
"loss": 0.5602, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 0.00021517580192640737, |
|
"loss": 0.5521, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 0.00021441843399875118, |
|
"loss": 0.5582, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 0.00021366106607109497, |
|
"loss": 0.5539, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 0.00021290369814343872, |
|
"loss": 0.5589, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.00021214633021578253, |
|
"loss": 0.5549, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 0.00021138896228812632, |
|
"loss": 0.525, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 0.00021063159436047007, |
|
"loss": 0.5304, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 0.00020987422643281388, |
|
"loss": 0.5331, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 0.00020911685850515766, |
|
"loss": 0.5341, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 0.00020835949057750142, |
|
"loss": 0.5319, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 0.00020760212264984523, |
|
"loss": 0.5318, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 0.00020684475472218901, |
|
"loss": 0.5368, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.00020608738679453277, |
|
"loss": 0.532, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 0.00020533001886687658, |
|
"loss": 0.5353, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 0.00020457265093922036, |
|
"loss": 0.5329, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 0.00020381528301156415, |
|
"loss": 0.5354, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 0.00020305791508390793, |
|
"loss": 0.5375, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 0.00020230054715625171, |
|
"loss": 0.5377, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 0.0002015431792285955, |
|
"loss": 0.5191, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 0.00020078581130093928, |
|
"loss": 0.5057, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 0.00020002844337328306, |
|
"loss": 0.508, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 0.00019927107544562687, |
|
"loss": 0.5117, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 0.00019851370751797063, |
|
"loss": 0.5135, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 0.00019775633959031441, |
|
"loss": 0.5129, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 0.00019699897166265822, |
|
"loss": 0.511, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 0.00019624160373500198, |
|
"loss": 0.5095, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 0.0001954842358073458, |
|
"loss": 0.5129, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 0.00019472686787968957, |
|
"loss": 0.5124, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 0.00019396949995203333, |
|
"loss": 0.5138, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 0.00019321213202437714, |
|
"loss": 0.5125, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 0.00019245476409672092, |
|
"loss": 0.5153, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 0.00019169739616906468, |
|
"loss": 0.5022, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 0.0001909400282414085, |
|
"loss": 0.4908, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 11.17, |
|
"learning_rate": 0.00019018266031375227, |
|
"loss": 0.4908, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 0.00018942529238609603, |
|
"loss": 0.4907, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 0.00018866792445843984, |
|
"loss": 0.4895, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 0.00018791055653078362, |
|
"loss": 0.4948, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"learning_rate": 0.00018715318860312743, |
|
"loss": 0.4908, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 0.0001863958206754712, |
|
"loss": 0.4931, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 0.00018563845274781497, |
|
"loss": 0.4945, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 0.00018488108482015878, |
|
"loss": 0.5005, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 11.77, |
|
"learning_rate": 0.00018412371689250254, |
|
"loss": 0.4974, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 0.00018336634896484632, |
|
"loss": 0.4997, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 11.92, |
|
"learning_rate": 0.00018260898103719013, |
|
"loss": 0.5027, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.0001818516131095339, |
|
"loss": 0.4962, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"learning_rate": 0.00018109424518187767, |
|
"loss": 0.4766, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 0.00018033687725422148, |
|
"loss": 0.4782, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 0.00017957950932656524, |
|
"loss": 0.4793, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 0.00017882214139890902, |
|
"loss": 0.4769, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 12.37, |
|
"learning_rate": 0.00017806477347125283, |
|
"loss": 0.4801, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 0.0001773074055435966, |
|
"loss": 0.4783, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 0.0001765500376159404, |
|
"loss": 0.4804, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 0.00017579266968828418, |
|
"loss": 0.4758, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 0.00017503530176062794, |
|
"loss": 0.4761, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 12.75, |
|
"learning_rate": 0.00017427793383297175, |
|
"loss": 0.473, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.00017352056590531553, |
|
"loss": 0.4793, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 0.00017276319797765932, |
|
"loss": 0.4772, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 0.0001720058300500031, |
|
"loss": 0.4752, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 0.00017124846212234688, |
|
"loss": 0.4611, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 0.0001704910941946907, |
|
"loss": 0.4549, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 0.00016973372626703445, |
|
"loss": 0.456, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"learning_rate": 0.00016897635833937823, |
|
"loss": 0.4596, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 13.35, |
|
"learning_rate": 0.00016821899041172204, |
|
"loss": 0.4629, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 13.42, |
|
"learning_rate": 0.0001674616224840658, |
|
"loss": 0.463, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 0.00016670425455640958, |
|
"loss": 0.461, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 0.0001659468866287534, |
|
"loss": 0.4631, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 13.65, |
|
"learning_rate": 0.00016518951870109715, |
|
"loss": 0.4627, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 13.72, |
|
"learning_rate": 0.00016443215077344093, |
|
"loss": 0.4639, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 0.00016367478284578474, |
|
"loss": 0.4663, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"learning_rate": 0.0001629174149181285, |
|
"loss": 0.46, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"learning_rate": 0.00016216004699047228, |
|
"loss": 0.4661, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 0.0001614026790628161, |
|
"loss": 0.4564, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 0.00016064531113515988, |
|
"loss": 0.4391, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"learning_rate": 0.00015988794320750363, |
|
"loss": 0.4479, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"learning_rate": 0.00015913057527984744, |
|
"loss": 0.4427, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"learning_rate": 0.00015837320735219123, |
|
"loss": 0.4425, |
|
"step": 573000 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 0.00015761583942453498, |
|
"loss": 0.4412, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 0.0001568584714968788, |
|
"loss": 0.4488, |
|
"step": 579000 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 0.00015610110356922258, |
|
"loss": 0.4464, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"learning_rate": 0.00015534373564156636, |
|
"loss": 0.4507, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 14.7, |
|
"learning_rate": 0.00015458636771391014, |
|
"loss": 0.4476, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"learning_rate": 0.00015382899978625393, |
|
"loss": 0.4462, |
|
"step": 591000 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 0.0001530716318585977, |
|
"loss": 0.4493, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"learning_rate": 0.0001523142639309415, |
|
"loss": 0.4478, |
|
"step": 597000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.00015155689600328528, |
|
"loss": 0.4483, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 15.07, |
|
"learning_rate": 0.00015079952807562906, |
|
"loss": 0.4276, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 0.00015004216014797284, |
|
"loss": 0.4259, |
|
"step": 606000 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 0.00014928479222031665, |
|
"loss": 0.4263, |
|
"step": 609000 |
|
}, |
|
{ |
|
"epoch": 15.3, |
|
"learning_rate": 0.0001485274242926604, |
|
"loss": 0.4302, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"learning_rate": 0.0001477700563650042, |
|
"loss": 0.4302, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 15.45, |
|
"learning_rate": 0.000147012688437348, |
|
"loss": 0.4289, |
|
"step": 618000 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 0.00014625532050969176, |
|
"loss": 0.4311, |
|
"step": 621000 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 0.00014549795258203554, |
|
"loss": 0.4327, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 0.00014474058465437935, |
|
"loss": 0.4315, |
|
"step": 627000 |
|
}, |
|
{ |
|
"epoch": 15.75, |
|
"learning_rate": 0.0001439832167267231, |
|
"loss": 0.4305, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 15.82, |
|
"learning_rate": 0.00014322584879906692, |
|
"loss": 0.429, |
|
"step": 633000 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 0.0001424684808714107, |
|
"loss": 0.4288, |
|
"step": 636000 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"learning_rate": 0.00014171111294375449, |
|
"loss": 0.4309, |
|
"step": 639000 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"learning_rate": 0.00014095374501609827, |
|
"loss": 0.4179, |
|
"step": 642000 |
|
}, |
|
{ |
|
"epoch": 16.12, |
|
"learning_rate": 0.00014019637708844205, |
|
"loss": 0.4098, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 0.00013943900916078584, |
|
"loss": 0.4136, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"learning_rate": 0.00013868164123312962, |
|
"loss": 0.4115, |
|
"step": 651000 |
|
}, |
|
{ |
|
"epoch": 16.35, |
|
"learning_rate": 0.0001379242733054734, |
|
"loss": 0.4192, |
|
"step": 654000 |
|
}, |
|
{ |
|
"epoch": 16.42, |
|
"learning_rate": 0.00013716690537781719, |
|
"loss": 0.4159, |
|
"step": 657000 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"learning_rate": 0.00013640953745016097, |
|
"loss": 0.4176, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"learning_rate": 0.00013565216952250475, |
|
"loss": 0.4166, |
|
"step": 663000 |
|
}, |
|
{ |
|
"epoch": 16.65, |
|
"learning_rate": 0.00013489480159484854, |
|
"loss": 0.4214, |
|
"step": 666000 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"learning_rate": 0.00013413743366719232, |
|
"loss": 0.4121, |
|
"step": 669000 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 0.0001333800657395361, |
|
"loss": 0.414, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 16.87, |
|
"learning_rate": 0.00013262269781187989, |
|
"loss": 0.4159, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 0.00013186532988422367, |
|
"loss": 0.4135, |
|
"step": 678000 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 0.00013110796195656748, |
|
"loss": 0.4098, |
|
"step": 681000 |
|
}, |
|
{ |
|
"epoch": 17.1, |
|
"learning_rate": 0.00013035059402891124, |
|
"loss": 0.3947, |
|
"step": 684000 |
|
}, |
|
{ |
|
"epoch": 17.17, |
|
"learning_rate": 0.00012959322610125502, |
|
"loss": 0.4026, |
|
"step": 687000 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"learning_rate": 0.00012883585817359883, |
|
"loss": 0.397, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 17.32, |
|
"learning_rate": 0.0001280784902459426, |
|
"loss": 0.3998, |
|
"step": 693000 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 0.00012732112231828637, |
|
"loss": 0.3988, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 17.47, |
|
"learning_rate": 0.00012656375439063018, |
|
"loss": 0.4015, |
|
"step": 699000 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"learning_rate": 0.00012580638646297396, |
|
"loss": 0.402, |
|
"step": 702000 |
|
}, |
|
{ |
|
"epoch": 17.62, |
|
"learning_rate": 0.00012504901853531775, |
|
"loss": 0.4018, |
|
"step": 705000 |
|
}, |
|
{ |
|
"epoch": 17.69, |
|
"learning_rate": 0.00012429165060766153, |
|
"loss": 0.3987, |
|
"step": 708000 |
|
}, |
|
{ |
|
"epoch": 17.77, |
|
"learning_rate": 0.0001235342826800053, |
|
"loss": 0.3974, |
|
"step": 711000 |
|
}, |
|
{ |
|
"epoch": 17.84, |
|
"learning_rate": 0.0001227769147523491, |
|
"loss": 0.4006, |
|
"step": 714000 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 0.00012201954682469288, |
|
"loss": 0.4028, |
|
"step": 717000 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 0.00012126217889703665, |
|
"loss": 0.4019, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 18.07, |
|
"learning_rate": 0.00012050481096938045, |
|
"loss": 0.3853, |
|
"step": 723000 |
|
}, |
|
{ |
|
"epoch": 18.14, |
|
"learning_rate": 0.00011974744304172423, |
|
"loss": 0.387, |
|
"step": 726000 |
|
}, |
|
{ |
|
"epoch": 18.22, |
|
"learning_rate": 0.00011899007511406801, |
|
"loss": 0.3822, |
|
"step": 729000 |
|
}, |
|
{ |
|
"epoch": 18.29, |
|
"learning_rate": 0.0001182327071864118, |
|
"loss": 0.3861, |
|
"step": 732000 |
|
}, |
|
{ |
|
"epoch": 18.37, |
|
"learning_rate": 0.00011747533925875558, |
|
"loss": 0.3877, |
|
"step": 735000 |
|
}, |
|
{ |
|
"epoch": 18.44, |
|
"learning_rate": 0.00011671797133109938, |
|
"loss": 0.3867, |
|
"step": 738000 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 0.00011596060340344316, |
|
"loss": 0.388, |
|
"step": 741000 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"learning_rate": 0.00011520323547578693, |
|
"loss": 0.3889, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 0.00011444586754813073, |
|
"loss": 0.3867, |
|
"step": 747000 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"learning_rate": 0.00011368849962047451, |
|
"loss": 0.3897, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 18.82, |
|
"learning_rate": 0.00011293113169281828, |
|
"loss": 0.3873, |
|
"step": 753000 |
|
}, |
|
{ |
|
"epoch": 18.89, |
|
"learning_rate": 0.00011217376376516208, |
|
"loss": 0.3881, |
|
"step": 756000 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"learning_rate": 0.00011141639583750586, |
|
"loss": 0.3915, |
|
"step": 759000 |
|
}, |
|
{ |
|
"epoch": 19.04, |
|
"learning_rate": 0.00011065902790984966, |
|
"loss": 0.3766, |
|
"step": 762000 |
|
}, |
|
{ |
|
"epoch": 19.12, |
|
"learning_rate": 0.00010990165998219343, |
|
"loss": 0.3757, |
|
"step": 765000 |
|
}, |
|
{ |
|
"epoch": 19.19, |
|
"learning_rate": 0.00010914429205453721, |
|
"loss": 0.3705, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 19.27, |
|
"learning_rate": 0.000108386924126881, |
|
"loss": 0.3728, |
|
"step": 771000 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"learning_rate": 0.00010762955619922477, |
|
"loss": 0.3756, |
|
"step": 774000 |
|
}, |
|
{ |
|
"epoch": 19.42, |
|
"learning_rate": 0.00010687218827156856, |
|
"loss": 0.3772, |
|
"step": 777000 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"learning_rate": 0.00010611482034391236, |
|
"loss": 0.3761, |
|
"step": 780000 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 0.00010535745241625614, |
|
"loss": 0.3739, |
|
"step": 783000 |
|
}, |
|
{ |
|
"epoch": 19.64, |
|
"learning_rate": 0.00010460008448859992, |
|
"loss": 0.371, |
|
"step": 786000 |
|
}, |
|
{ |
|
"epoch": 19.72, |
|
"learning_rate": 0.0001038427165609437, |
|
"loss": 0.3721, |
|
"step": 789000 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"learning_rate": 0.00010308534863328749, |
|
"loss": 0.3733, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 19.87, |
|
"learning_rate": 0.00010232798070563129, |
|
"loss": 0.3742, |
|
"step": 795000 |
|
}, |
|
{ |
|
"epoch": 19.94, |
|
"learning_rate": 0.00010157061277797505, |
|
"loss": 0.3732, |
|
"step": 798000 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"learning_rate": 0.00010081324485031884, |
|
"loss": 0.3703, |
|
"step": 801000 |
|
}, |
|
{ |
|
"epoch": 20.09, |
|
"learning_rate": 0.00010005587692266263, |
|
"loss": 0.3609, |
|
"step": 804000 |
|
}, |
|
{ |
|
"epoch": 20.17, |
|
"learning_rate": 9.92985089950064e-05, |
|
"loss": 0.3604, |
|
"step": 807000 |
|
}, |
|
{ |
|
"epoch": 20.24, |
|
"learning_rate": 9.85411410673502e-05, |
|
"loss": 0.3614, |
|
"step": 810000 |
|
}, |
|
{ |
|
"epoch": 20.32, |
|
"learning_rate": 9.778377313969398e-05, |
|
"loss": 0.3617, |
|
"step": 813000 |
|
}, |
|
{ |
|
"epoch": 20.39, |
|
"learning_rate": 9.702640521203775e-05, |
|
"loss": 0.3611, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 20.47, |
|
"learning_rate": 9.626903728438155e-05, |
|
"loss": 0.3607, |
|
"step": 819000 |
|
}, |
|
{ |
|
"epoch": 20.54, |
|
"learning_rate": 9.551166935672533e-05, |
|
"loss": 0.3605, |
|
"step": 822000 |
|
}, |
|
{ |
|
"epoch": 20.62, |
|
"learning_rate": 9.475430142906912e-05, |
|
"loss": 0.3589, |
|
"step": 825000 |
|
}, |
|
{ |
|
"epoch": 20.69, |
|
"learning_rate": 9.39969335014129e-05, |
|
"loss": 0.3586, |
|
"step": 828000 |
|
}, |
|
{ |
|
"epoch": 20.77, |
|
"learning_rate": 9.323956557375668e-05, |
|
"loss": 0.3626, |
|
"step": 831000 |
|
}, |
|
{ |
|
"epoch": 20.84, |
|
"learning_rate": 9.248219764610048e-05, |
|
"loss": 0.3616, |
|
"step": 834000 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"learning_rate": 9.172482971844426e-05, |
|
"loss": 0.3595, |
|
"step": 837000 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"learning_rate": 9.096746179078803e-05, |
|
"loss": 0.3624, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 21.07, |
|
"learning_rate": 9.021009386313183e-05, |
|
"loss": 0.3468, |
|
"step": 843000 |
|
}, |
|
{ |
|
"epoch": 21.14, |
|
"learning_rate": 8.945272593547561e-05, |
|
"loss": 0.3475, |
|
"step": 846000 |
|
}, |
|
{ |
|
"epoch": 21.22, |
|
"learning_rate": 8.869535800781938e-05, |
|
"loss": 0.3486, |
|
"step": 849000 |
|
}, |
|
{ |
|
"epoch": 21.29, |
|
"learning_rate": 8.793799008016318e-05, |
|
"loss": 0.3454, |
|
"step": 852000 |
|
}, |
|
{ |
|
"epoch": 21.37, |
|
"learning_rate": 8.718062215250696e-05, |
|
"loss": 0.3496, |
|
"step": 855000 |
|
}, |
|
{ |
|
"epoch": 21.44, |
|
"learning_rate": 8.642325422485073e-05, |
|
"loss": 0.3478, |
|
"step": 858000 |
|
}, |
|
{ |
|
"epoch": 21.52, |
|
"learning_rate": 8.566588629719453e-05, |
|
"loss": 0.346, |
|
"step": 861000 |
|
}, |
|
{ |
|
"epoch": 21.59, |
|
"learning_rate": 8.490851836953831e-05, |
|
"loss": 0.3513, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"learning_rate": 8.415115044188211e-05, |
|
"loss": 0.3524, |
|
"step": 867000 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 8.339378251422588e-05, |
|
"loss": 0.3494, |
|
"step": 870000 |
|
}, |
|
{ |
|
"epoch": 21.82, |
|
"learning_rate": 8.263641458656966e-05, |
|
"loss": 0.3521, |
|
"step": 873000 |
|
}, |
|
{ |
|
"epoch": 21.89, |
|
"learning_rate": 8.187904665891346e-05, |
|
"loss": 0.3509, |
|
"step": 876000 |
|
}, |
|
{ |
|
"epoch": 21.97, |
|
"learning_rate": 8.112167873125724e-05, |
|
"loss": 0.3501, |
|
"step": 879000 |
|
}, |
|
{ |
|
"epoch": 22.04, |
|
"learning_rate": 8.036431080360101e-05, |
|
"loss": 0.3394, |
|
"step": 882000 |
|
}, |
|
{ |
|
"epoch": 22.12, |
|
"learning_rate": 7.960694287594481e-05, |
|
"loss": 0.3357, |
|
"step": 885000 |
|
}, |
|
{ |
|
"epoch": 22.19, |
|
"learning_rate": 7.88495749482886e-05, |
|
"loss": 0.3378, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 22.27, |
|
"learning_rate": 7.809220702063239e-05, |
|
"loss": 0.3362, |
|
"step": 891000 |
|
}, |
|
{ |
|
"epoch": 22.34, |
|
"learning_rate": 7.733483909297616e-05, |
|
"loss": 0.3382, |
|
"step": 894000 |
|
}, |
|
{ |
|
"epoch": 22.42, |
|
"learning_rate": 7.657747116531994e-05, |
|
"loss": 0.3407, |
|
"step": 897000 |
|
}, |
|
{ |
|
"epoch": 22.49, |
|
"learning_rate": 7.582010323766374e-05, |
|
"loss": 0.3381, |
|
"step": 900000 |
|
}, |
|
{ |
|
"epoch": 22.57, |
|
"learning_rate": 7.506273531000751e-05, |
|
"loss": 0.3378, |
|
"step": 903000 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"learning_rate": 7.430536738235131e-05, |
|
"loss": 0.3368, |
|
"step": 906000 |
|
}, |
|
{ |
|
"epoch": 22.72, |
|
"learning_rate": 7.354799945469509e-05, |
|
"loss": 0.3384, |
|
"step": 909000 |
|
}, |
|
{ |
|
"epoch": 22.79, |
|
"learning_rate": 7.279063152703887e-05, |
|
"loss": 0.3351, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 22.87, |
|
"learning_rate": 7.203326359938266e-05, |
|
"loss": 0.3399, |
|
"step": 915000 |
|
}, |
|
{ |
|
"epoch": 22.94, |
|
"learning_rate": 7.127589567172644e-05, |
|
"loss": 0.3366, |
|
"step": 918000 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"learning_rate": 7.051852774407022e-05, |
|
"loss": 0.3313, |
|
"step": 921000 |
|
}, |
|
{ |
|
"epoch": 23.09, |
|
"learning_rate": 6.976115981641401e-05, |
|
"loss": 0.3291, |
|
"step": 924000 |
|
}, |
|
{ |
|
"epoch": 23.17, |
|
"learning_rate": 6.900379188875779e-05, |
|
"loss": 0.3269, |
|
"step": 927000 |
|
}, |
|
{ |
|
"epoch": 23.24, |
|
"learning_rate": 6.824642396110157e-05, |
|
"loss": 0.324, |
|
"step": 930000 |
|
}, |
|
{ |
|
"epoch": 23.32, |
|
"learning_rate": 6.748905603344537e-05, |
|
"loss": 0.3249, |
|
"step": 933000 |
|
}, |
|
{ |
|
"epoch": 23.39, |
|
"learning_rate": 6.673168810578914e-05, |
|
"loss": 0.3257, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 23.47, |
|
"learning_rate": 6.597432017813294e-05, |
|
"loss": 0.3266, |
|
"step": 939000 |
|
}, |
|
{ |
|
"epoch": 23.54, |
|
"learning_rate": 6.521695225047672e-05, |
|
"loss": 0.3275, |
|
"step": 942000 |
|
}, |
|
{ |
|
"epoch": 23.62, |
|
"learning_rate": 6.44595843228205e-05, |
|
"loss": 0.3279, |
|
"step": 945000 |
|
}, |
|
{ |
|
"epoch": 23.69, |
|
"learning_rate": 6.370221639516429e-05, |
|
"loss": 0.3255, |
|
"step": 948000 |
|
}, |
|
{ |
|
"epoch": 23.77, |
|
"learning_rate": 6.294484846750807e-05, |
|
"loss": 0.3255, |
|
"step": 951000 |
|
}, |
|
{ |
|
"epoch": 23.84, |
|
"learning_rate": 6.218748053985185e-05, |
|
"loss": 0.3233, |
|
"step": 954000 |
|
}, |
|
{ |
|
"epoch": 23.92, |
|
"learning_rate": 6.143011261219564e-05, |
|
"loss": 0.3267, |
|
"step": 957000 |
|
}, |
|
{ |
|
"epoch": 23.99, |
|
"learning_rate": 6.067274468453942e-05, |
|
"loss": 0.3234, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 24.07, |
|
"learning_rate": 5.991537675688321e-05, |
|
"loss": 0.3174, |
|
"step": 963000 |
|
}, |
|
{ |
|
"epoch": 24.14, |
|
"learning_rate": 5.9158008829226994e-05, |
|
"loss": 0.3149, |
|
"step": 966000 |
|
}, |
|
{ |
|
"epoch": 24.22, |
|
"learning_rate": 5.840064090157077e-05, |
|
"loss": 0.314, |
|
"step": 969000 |
|
}, |
|
{ |
|
"epoch": 24.29, |
|
"learning_rate": 5.764327297391456e-05, |
|
"loss": 0.318, |
|
"step": 972000 |
|
}, |
|
{ |
|
"epoch": 24.37, |
|
"learning_rate": 5.6885905046258344e-05, |
|
"loss": 0.3147, |
|
"step": 975000 |
|
}, |
|
{ |
|
"epoch": 24.44, |
|
"learning_rate": 5.6128537118602134e-05, |
|
"loss": 0.3158, |
|
"step": 978000 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"learning_rate": 5.537116919094591e-05, |
|
"loss": 0.3164, |
|
"step": 981000 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"learning_rate": 5.46138012632897e-05, |
|
"loss": 0.3162, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 24.67, |
|
"learning_rate": 5.3856433335633483e-05, |
|
"loss": 0.3153, |
|
"step": 987000 |
|
}, |
|
{ |
|
"epoch": 24.74, |
|
"learning_rate": 5.3099065407977274e-05, |
|
"loss": 0.3165, |
|
"step": 990000 |
|
}, |
|
{ |
|
"epoch": 24.82, |
|
"learning_rate": 5.234169748032105e-05, |
|
"loss": 0.315, |
|
"step": 993000 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"learning_rate": 5.158432955266483e-05, |
|
"loss": 0.3132, |
|
"step": 996000 |
|
}, |
|
{ |
|
"epoch": 24.97, |
|
"learning_rate": 5.0826961625008623e-05, |
|
"loss": 0.3163, |
|
"step": 999000 |
|
}, |
|
{ |
|
"epoch": 25.04, |
|
"learning_rate": 5.006959369735241e-05, |
|
"loss": 0.3107, |
|
"step": 1002000 |
|
}, |
|
{ |
|
"epoch": 25.12, |
|
"learning_rate": 4.931222576969619e-05, |
|
"loss": 0.3062, |
|
"step": 1005000 |
|
}, |
|
{ |
|
"epoch": 25.19, |
|
"learning_rate": 4.855485784203997e-05, |
|
"loss": 0.3083, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 25.27, |
|
"learning_rate": 4.779748991438376e-05, |
|
"loss": 0.3096, |
|
"step": 1011000 |
|
}, |
|
{ |
|
"epoch": 25.34, |
|
"learning_rate": 4.704012198672755e-05, |
|
"loss": 0.3056, |
|
"step": 1014000 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"learning_rate": 4.628275405907132e-05, |
|
"loss": 0.3042, |
|
"step": 1017000 |
|
}, |
|
{ |
|
"epoch": 25.49, |
|
"learning_rate": 4.552538613141511e-05, |
|
"loss": 0.3063, |
|
"step": 1020000 |
|
}, |
|
{ |
|
"epoch": 25.57, |
|
"learning_rate": 4.4768018203758897e-05, |
|
"loss": 0.3025, |
|
"step": 1023000 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 4.4010650276102687e-05, |
|
"loss": 0.3056, |
|
"step": 1026000 |
|
}, |
|
{ |
|
"epoch": 25.72, |
|
"learning_rate": 4.325328234844646e-05, |
|
"loss": 0.3081, |
|
"step": 1029000 |
|
}, |
|
{ |
|
"epoch": 25.79, |
|
"learning_rate": 4.249591442079025e-05, |
|
"loss": 0.3082, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 25.87, |
|
"learning_rate": 4.1738546493134036e-05, |
|
"loss": 0.3054, |
|
"step": 1035000 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"learning_rate": 4.0981178565477827e-05, |
|
"loss": 0.3052, |
|
"step": 1038000 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"learning_rate": 4.02238106378216e-05, |
|
"loss": 0.3013, |
|
"step": 1041000 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 3.9466442710165386e-05, |
|
"loss": 0.2982, |
|
"step": 1044000 |
|
}, |
|
{ |
|
"epoch": 26.17, |
|
"learning_rate": 3.8709074782509176e-05, |
|
"loss": 0.2962, |
|
"step": 1047000 |
|
}, |
|
{ |
|
"epoch": 26.24, |
|
"learning_rate": 3.795170685485296e-05, |
|
"loss": 0.2961, |
|
"step": 1050000 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 3.719433892719674e-05, |
|
"loss": 0.2993, |
|
"step": 1053000 |
|
}, |
|
{ |
|
"epoch": 26.39, |
|
"learning_rate": 3.6436970999540526e-05, |
|
"loss": 0.2941, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 26.47, |
|
"learning_rate": 3.5679603071884316e-05, |
|
"loss": 0.2948, |
|
"step": 1059000 |
|
}, |
|
{ |
|
"epoch": 26.54, |
|
"learning_rate": 3.492223514422809e-05, |
|
"loss": 0.2966, |
|
"step": 1062000 |
|
}, |
|
{ |
|
"epoch": 26.62, |
|
"learning_rate": 3.4164867216571876e-05, |
|
"loss": 0.2964, |
|
"step": 1065000 |
|
}, |
|
{ |
|
"epoch": 26.69, |
|
"learning_rate": 3.3407499288915666e-05, |
|
"loss": 0.2971, |
|
"step": 1068000 |
|
}, |
|
{ |
|
"epoch": 26.77, |
|
"learning_rate": 3.265013136125945e-05, |
|
"loss": 0.2974, |
|
"step": 1071000 |
|
}, |
|
{ |
|
"epoch": 26.84, |
|
"learning_rate": 3.189276343360323e-05, |
|
"loss": 0.2963, |
|
"step": 1074000 |
|
}, |
|
{ |
|
"epoch": 26.92, |
|
"learning_rate": 3.1135395505947016e-05, |
|
"loss": 0.2954, |
|
"step": 1077000 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"learning_rate": 3.0378027578290803e-05, |
|
"loss": 0.2941, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 27.07, |
|
"learning_rate": 2.9620659650634586e-05, |
|
"loss": 0.2896, |
|
"step": 1083000 |
|
}, |
|
{ |
|
"epoch": 27.14, |
|
"learning_rate": 2.8863291722978373e-05, |
|
"loss": 0.289, |
|
"step": 1086000 |
|
}, |
|
{ |
|
"epoch": 27.22, |
|
"learning_rate": 2.8105923795322156e-05, |
|
"loss": 0.2899, |
|
"step": 1089000 |
|
}, |
|
{ |
|
"epoch": 27.29, |
|
"learning_rate": 2.7348555867665943e-05, |
|
"loss": 0.2878, |
|
"step": 1092000 |
|
}, |
|
{ |
|
"epoch": 27.37, |
|
"learning_rate": 2.6591187940009723e-05, |
|
"loss": 0.2858, |
|
"step": 1095000 |
|
}, |
|
{ |
|
"epoch": 27.44, |
|
"learning_rate": 2.583382001235351e-05, |
|
"loss": 0.2901, |
|
"step": 1098000 |
|
}, |
|
{ |
|
"epoch": 27.52, |
|
"learning_rate": 2.5076452084697293e-05, |
|
"loss": 0.2885, |
|
"step": 1101000 |
|
}, |
|
{ |
|
"epoch": 27.59, |
|
"learning_rate": 2.431908415704108e-05, |
|
"loss": 0.2903, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 27.67, |
|
"learning_rate": 2.3561716229384863e-05, |
|
"loss": 0.2904, |
|
"step": 1107000 |
|
}, |
|
{ |
|
"epoch": 27.74, |
|
"learning_rate": 2.280434830172865e-05, |
|
"loss": 0.2869, |
|
"step": 1110000 |
|
}, |
|
{ |
|
"epoch": 27.82, |
|
"learning_rate": 2.2046980374072432e-05, |
|
"loss": 0.2902, |
|
"step": 1113000 |
|
}, |
|
{ |
|
"epoch": 27.89, |
|
"learning_rate": 2.128961244641622e-05, |
|
"loss": 0.2867, |
|
"step": 1116000 |
|
}, |
|
{ |
|
"epoch": 27.97, |
|
"learning_rate": 2.053224451876e-05, |
|
"loss": 0.2869, |
|
"step": 1119000 |
|
}, |
|
{ |
|
"epoch": 28.04, |
|
"learning_rate": 1.9774876591103786e-05, |
|
"loss": 0.2844, |
|
"step": 1122000 |
|
}, |
|
{ |
|
"epoch": 28.12, |
|
"learning_rate": 1.901750866344757e-05, |
|
"loss": 0.283, |
|
"step": 1125000 |
|
}, |
|
{ |
|
"epoch": 28.19, |
|
"learning_rate": 1.8260140735791356e-05, |
|
"loss": 0.2816, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 28.27, |
|
"learning_rate": 1.750277280813514e-05, |
|
"loss": 0.2848, |
|
"step": 1131000 |
|
}, |
|
{ |
|
"epoch": 28.34, |
|
"learning_rate": 1.6745404880478922e-05, |
|
"loss": 0.2811, |
|
"step": 1134000 |
|
}, |
|
{ |
|
"epoch": 28.42, |
|
"learning_rate": 1.598803695282271e-05, |
|
"loss": 0.283, |
|
"step": 1137000 |
|
}, |
|
{ |
|
"epoch": 28.49, |
|
"learning_rate": 1.5230669025166492e-05, |
|
"loss": 0.2814, |
|
"step": 1140000 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"learning_rate": 1.4473301097510277e-05, |
|
"loss": 0.2793, |
|
"step": 1143000 |
|
}, |
|
{ |
|
"epoch": 28.64, |
|
"learning_rate": 1.3715933169854062e-05, |
|
"loss": 0.2834, |
|
"step": 1146000 |
|
}, |
|
{ |
|
"epoch": 28.72, |
|
"learning_rate": 1.2958565242197847e-05, |
|
"loss": 0.2798, |
|
"step": 1149000 |
|
}, |
|
{ |
|
"epoch": 28.79, |
|
"learning_rate": 1.220119731454163e-05, |
|
"loss": 0.2807, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 28.87, |
|
"learning_rate": 1.1443829386885416e-05, |
|
"loss": 0.28, |
|
"step": 1155000 |
|
}, |
|
{ |
|
"epoch": 28.94, |
|
"learning_rate": 1.06864614592292e-05, |
|
"loss": 0.2762, |
|
"step": 1158000 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"learning_rate": 9.929093531572985e-06, |
|
"loss": 0.2802, |
|
"step": 1161000 |
|
}, |
|
{ |
|
"epoch": 29.09, |
|
"learning_rate": 9.171725603916769e-06, |
|
"loss": 0.2761, |
|
"step": 1164000 |
|
}, |
|
{ |
|
"epoch": 29.17, |
|
"learning_rate": 8.414357676260554e-06, |
|
"loss": 0.2771, |
|
"step": 1167000 |
|
}, |
|
{ |
|
"epoch": 29.24, |
|
"learning_rate": 7.656989748604339e-06, |
|
"loss": 0.2777, |
|
"step": 1170000 |
|
}, |
|
{ |
|
"epoch": 29.32, |
|
"learning_rate": 6.899621820948123e-06, |
|
"loss": 0.2767, |
|
"step": 1173000 |
|
}, |
|
{ |
|
"epoch": 29.39, |
|
"learning_rate": 6.142253893291908e-06, |
|
"loss": 0.2773, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 29.47, |
|
"learning_rate": 5.384885965635692e-06, |
|
"loss": 0.2765, |
|
"step": 1179000 |
|
}, |
|
{ |
|
"epoch": 29.54, |
|
"learning_rate": 4.627518037979477e-06, |
|
"loss": 0.2786, |
|
"step": 1182000 |
|
}, |
|
{ |
|
"epoch": 29.62, |
|
"learning_rate": 3.870150110323261e-06, |
|
"loss": 0.2768, |
|
"step": 1185000 |
|
}, |
|
{ |
|
"epoch": 29.69, |
|
"learning_rate": 3.1127821826670457e-06, |
|
"loss": 0.2771, |
|
"step": 1188000 |
|
}, |
|
{ |
|
"epoch": 29.77, |
|
"learning_rate": 2.35541425501083e-06, |
|
"loss": 0.2737, |
|
"step": 1191000 |
|
}, |
|
{ |
|
"epoch": 29.84, |
|
"learning_rate": 1.5980463273546148e-06, |
|
"loss": 0.2737, |
|
"step": 1194000 |
|
}, |
|
{ |
|
"epoch": 29.92, |
|
"learning_rate": 8.406783996983991e-07, |
|
"loss": 0.2781, |
|
"step": 1197000 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"learning_rate": 8.33104720421837e-08, |
|
"loss": 0.2745, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 1200330, |
|
"total_flos": 2.468458772043806e+21, |
|
"train_loss": 0.5222103027782066, |
|
"train_runtime": 708527.1323, |
|
"train_samples_per_second": 27.106, |
|
"train_steps_per_second": 1.694 |
|
} |
|
], |
|
"logging_steps": 3000, |
|
"max_steps": 1200330, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 2.468458772043806e+21, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|