|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"eval_steps": 500, |
|
"global_step": 1204800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.47011952191235e-05, |
|
"loss": 11.8613, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.000149402390438247, |
|
"loss": 1.9866, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002241035856573705, |
|
"loss": 1.4383, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.000298804780876494, |
|
"loss": 1.3467, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002992575153929735, |
|
"loss": 1.3175, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00029850295786550764, |
|
"loss": 1.2551, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00029774840033804177, |
|
"loss": 1.2058, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00029699384281057584, |
|
"loss": 1.1759, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00029623928528310997, |
|
"loss": 1.148, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0002954847277556441, |
|
"loss": 1.116, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00029473017022817817, |
|
"loss": 1.0933, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00029397561270071225, |
|
"loss": 1.0788, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0002932210551732464, |
|
"loss": 1.0578, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0002924664976457805, |
|
"loss": 1.0363, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0002917119401183146, |
|
"loss": 1.0198, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0002909573825908487, |
|
"loss": 1.0132, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00029020282506338283, |
|
"loss": 0.9995, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0002894482675359169, |
|
"loss": 0.992, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.000288693710008451, |
|
"loss": 0.9821, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0002879391524809851, |
|
"loss": 0.9878, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00028718459495351924, |
|
"loss": 0.9952, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0002864300374260533, |
|
"loss": 0.9804, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00028567547989858744, |
|
"loss": 0.9736, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00028492092237112157, |
|
"loss": 0.9681, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00028416636484365564, |
|
"loss": 0.965, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00028341180731618977, |
|
"loss": 0.9519, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00028265724978872385, |
|
"loss": 0.9525, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.000281902692261258, |
|
"loss": 0.9175, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.00028114813473379205, |
|
"loss": 0.9129, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0002803935772063262, |
|
"loss": 0.9167, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0002796390196788603, |
|
"loss": 0.9037, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.0002788844621513944, |
|
"loss": 0.8967, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.0002781299046239285, |
|
"loss": 0.8896, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.0002773753470964626, |
|
"loss": 0.8848, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.0002766207895689967, |
|
"loss": 0.8864, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00027586623204153084, |
|
"loss": 0.8793, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.0002751116745140649, |
|
"loss": 0.8727, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.00027435711698659904, |
|
"loss": 0.8605, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.00027360255945913317, |
|
"loss": 0.8614, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.00027284800193166724, |
|
"loss": 0.8584, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.00027209344440420137, |
|
"loss": 0.8322, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.0002713388868767355, |
|
"loss": 0.8182, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0002705843293492696, |
|
"loss": 0.8203, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.00026982977182180365, |
|
"loss": 0.824, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.0002690752142943378, |
|
"loss": 0.819, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.0002683206567668719, |
|
"loss": 0.8144, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.000267566099239406, |
|
"loss": 0.8158, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.0002668115417119401, |
|
"loss": 0.8148, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.00026605698418447424, |
|
"loss": 0.8188, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.0002653024266570083, |
|
"loss": 0.8064, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.0002645478691295424, |
|
"loss": 0.8047, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.0002637933116020765, |
|
"loss": 0.8008, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.00026303875407461064, |
|
"loss": 0.799, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.0002622841965471447, |
|
"loss": 0.7822, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.00026152963901967884, |
|
"loss": 0.7657, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.00026077508149221297, |
|
"loss": 0.7627, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.00026002052396474705, |
|
"loss": 0.7603, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.0002592659664372812, |
|
"loss": 0.7608, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.00025851140890981525, |
|
"loss": 0.7642, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.0002577568513823494, |
|
"loss": 0.7607, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.0002570022938548835, |
|
"loss": 0.7546, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.0002562477363274176, |
|
"loss": 0.7531, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.0002554931787999517, |
|
"loss": 0.7572, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.0002547386212724858, |
|
"loss": 0.7578, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.0002539840637450199, |
|
"loss": 0.7558, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 0.000253229506217554, |
|
"loss": 0.7556, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0002524749486900881, |
|
"loss": 0.7476, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.00025172039116262224, |
|
"loss": 0.721, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.0002509658336351563, |
|
"loss": 0.7241, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 0.00025021127610769044, |
|
"loss": 0.7183, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 0.00024945671858022457, |
|
"loss": 0.7163, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.00024870216105275865, |
|
"loss": 0.716, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.0002479476035252927, |
|
"loss": 0.7208, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.00024719304599782685, |
|
"loss": 0.7149, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.000246438488470361, |
|
"loss": 0.7168, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 0.00024568393094289505, |
|
"loss": 0.7131, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 0.0002449293734154292, |
|
"loss": 0.7134, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.0002441748158879633, |
|
"loss": 0.7088, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 0.00024342025836049738, |
|
"loss": 0.7101, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.00024266570083303148, |
|
"loss": 0.7106, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.0002419111433055656, |
|
"loss": 0.6846, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.00024115658577809968, |
|
"loss": 0.6761, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.00024040202825063379, |
|
"loss": 0.6783, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.00023964747072316791, |
|
"loss": 0.6815, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.00023889291319570202, |
|
"loss": 0.6864, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 0.00023813835566823612, |
|
"loss": 0.688, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 0.00023738379814077025, |
|
"loss": 0.6843, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.00023662924061330435, |
|
"loss": 0.6842, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 0.00023587468308583842, |
|
"loss": 0.6874, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 0.00023512012555837258, |
|
"loss": 0.6777, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.00023436556803090665, |
|
"loss": 0.6813, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 0.00023361101050344075, |
|
"loss": 0.6859, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 0.00023285645297597488, |
|
"loss": 0.6756, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 0.00023210189544850898, |
|
"loss": 0.6682, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.00023134733792104308, |
|
"loss": 0.6456, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 0.00023059278039357716, |
|
"loss": 0.6463, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.0002298382228661113, |
|
"loss": 0.6508, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 0.00022908366533864539, |
|
"loss": 0.6513, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.0002283291078111795, |
|
"loss": 0.6479, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 0.00022757455028371362, |
|
"loss": 0.6514, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 0.00022681999275624772, |
|
"loss": 0.648, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 0.00022606543522878182, |
|
"loss": 0.6467, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 0.00022531087770131595, |
|
"loss": 0.6476, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 0.00022455632017385005, |
|
"loss": 0.6485, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.00022380176264638412, |
|
"loss": 0.6464, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 0.00022304720511891825, |
|
"loss": 0.644, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.00022229264759145235, |
|
"loss": 0.6423, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.00022153809006398645, |
|
"loss": 0.6169, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 0.00022078353253652058, |
|
"loss": 0.6166, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 0.00022002897500905468, |
|
"loss": 0.6174, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 0.00021927441748158878, |
|
"loss": 0.62, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 0.00021851985995412286, |
|
"loss": 0.6217, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 0.00021776530242665699, |
|
"loss": 0.6187, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 0.0002170107448991911, |
|
"loss": 0.6221, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 0.0002162561873717252, |
|
"loss": 0.6202, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 0.00021550162984425932, |
|
"loss": 0.6198, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 0.00021474707231679342, |
|
"loss": 0.6159, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 0.00021399251478932752, |
|
"loss": 0.615, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 0.00021323795726186165, |
|
"loss": 0.6188, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 0.00021248339973439572, |
|
"loss": 0.6167, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 0.00021172884220692982, |
|
"loss": 0.6022, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 0.00021097428467946395, |
|
"loss": 0.5912, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 0.00021021972715199805, |
|
"loss": 0.5906, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 0.00020946516962453215, |
|
"loss": 0.5921, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 0.00020871061209706628, |
|
"loss": 0.584, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 0.00020795605456960038, |
|
"loss": 0.5884, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 0.00020720149704213446, |
|
"loss": 0.585, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 0.00020644693951466859, |
|
"loss": 0.5878, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 0.0002056923819872027, |
|
"loss": 0.5894, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 0.0002049378244597368, |
|
"loss": 0.5875, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 0.0002041832669322709, |
|
"loss": 0.5877, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 0.00020342870940480502, |
|
"loss": 0.5876, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 0.00020267415187733912, |
|
"loss": 0.5876, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 0.0002019195943498732, |
|
"loss": 0.5885, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 0.00020116503682240732, |
|
"loss": 0.5606, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 0.00020041047929494142, |
|
"loss": 0.5618, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 0.00019965592176747552, |
|
"loss": 0.5661, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 0.00019890136424000965, |
|
"loss": 0.5723, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 0.00019814680671254375, |
|
"loss": 0.5665, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"learning_rate": 0.00019739224918507786, |
|
"loss": 0.5635, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 0.00019663769165761198, |
|
"loss": 0.5677, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 0.00019588313413014606, |
|
"loss": 0.5657, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 0.00019512857660268016, |
|
"loss": 0.5675, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 0.0001943740190752143, |
|
"loss": 0.5646, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 0.0001936194615477484, |
|
"loss": 0.5663, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 0.0001928649040202825, |
|
"loss": 0.5692, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 0.0001921103464928166, |
|
"loss": 0.5664, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 0.00019135578896535072, |
|
"loss": 0.5487, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 0.0001906012314378848, |
|
"loss": 0.5436, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 0.0001898466739104189, |
|
"loss": 0.5438, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 0.00018909211638295302, |
|
"loss": 0.5444, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 0.00018833755885548712, |
|
"loss": 0.5422, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 0.00018758300132802123, |
|
"loss": 0.5452, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 0.00018682844380055535, |
|
"loss": 0.545, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 0.00018607388627308946, |
|
"loss": 0.5418, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 0.00018531932874562353, |
|
"loss": 0.5457, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 0.00018456477121815766, |
|
"loss": 0.5425, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 0.00018381021369069176, |
|
"loss": 0.5435, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 0.00018305565616322586, |
|
"loss": 0.5489, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 0.00018230109863576, |
|
"loss": 0.5457, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 0.0001815465411082941, |
|
"loss": 0.5353, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 0.0001807919835808282, |
|
"loss": 0.5185, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 0.00018003742605336227, |
|
"loss": 0.5223, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 0.0001792828685258964, |
|
"loss": 0.5172, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"learning_rate": 0.0001785283109984305, |
|
"loss": 0.5191, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 0.0001777737534709646, |
|
"loss": 0.5221, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 0.00017701919594349872, |
|
"loss": 0.522, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 0.00017626463841603283, |
|
"loss": 0.525, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"learning_rate": 0.00017551008088856693, |
|
"loss": 0.5265, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 0.00017475552336110106, |
|
"loss": 0.526, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"learning_rate": 0.00017400096583363513, |
|
"loss": 0.527, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 0.00017324640830616923, |
|
"loss": 0.5259, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 0.00017249185077870336, |
|
"loss": 0.5234, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 0.00017173729325123746, |
|
"loss": 0.5259, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 0.00017098273572377156, |
|
"loss": 0.5027, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 13.15, |
|
"learning_rate": 0.0001702281781963057, |
|
"loss": 0.5043, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 0.0001694736206688398, |
|
"loss": 0.5051, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 0.00016871906314137387, |
|
"loss": 0.5062, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"learning_rate": 0.00016796450561390797, |
|
"loss": 0.5062, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 0.0001672099480864421, |
|
"loss": 0.508, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 13.52, |
|
"learning_rate": 0.0001664553905589762, |
|
"loss": 0.5086, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 0.0001657008330315103, |
|
"loss": 0.5072, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 0.00016494627550404443, |
|
"loss": 0.5, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 0.00016419171797657853, |
|
"loss": 0.5067, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 13.82, |
|
"learning_rate": 0.0001634371604491126, |
|
"loss": 0.5055, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 0.00016268260292164673, |
|
"loss": 0.501, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 0.00016192804539418083, |
|
"loss": 0.5068, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 0.00016117348786671493, |
|
"loss": 0.4915, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"learning_rate": 0.00016041893033924906, |
|
"loss": 0.4865, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 0.00015966437281178316, |
|
"loss": 0.4881, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 14.27, |
|
"learning_rate": 0.00015890981528431726, |
|
"loss": 0.4842, |
|
"step": 573000 |
|
}, |
|
{ |
|
"epoch": 14.34, |
|
"learning_rate": 0.0001581552577568514, |
|
"loss": 0.4873, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.00015740070022938547, |
|
"loss": 0.4861, |
|
"step": 579000 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 0.00015664614270191957, |
|
"loss": 0.4867, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"learning_rate": 0.00015589158517445367, |
|
"loss": 0.4902, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 0.0001551370276469878, |
|
"loss": 0.4927, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 0.0001543824701195219, |
|
"loss": 0.4904, |
|
"step": 591000 |
|
}, |
|
{ |
|
"epoch": 14.79, |
|
"learning_rate": 0.000153627912592056, |
|
"loss": 0.4909, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"learning_rate": 0.00015287335506459013, |
|
"loss": 0.4914, |
|
"step": 597000 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"learning_rate": 0.00015211879753712423, |
|
"loss": 0.4864, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 0.0001513642400096583, |
|
"loss": 0.4834, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 0.00015060968248219243, |
|
"loss": 0.4687, |
|
"step": 606000 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"learning_rate": 0.00014985512495472653, |
|
"loss": 0.4683, |
|
"step": 609000 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 0.00014910056742726066, |
|
"loss": 0.4657, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 0.00014834600989979473, |
|
"loss": 0.4727, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"learning_rate": 0.00014759145237232886, |
|
"loss": 0.4709, |
|
"step": 618000 |
|
}, |
|
{ |
|
"epoch": 15.46, |
|
"learning_rate": 0.00014683689484486296, |
|
"loss": 0.4703, |
|
"step": 621000 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"learning_rate": 0.00014608233731739707, |
|
"loss": 0.4709, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 0.00014532777978993117, |
|
"loss": 0.4726, |
|
"step": 627000 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 0.00014457322226246527, |
|
"loss": 0.4684, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 0.0001438186647349994, |
|
"loss": 0.4725, |
|
"step": 633000 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"learning_rate": 0.0001430641072075335, |
|
"loss": 0.4689, |
|
"step": 636000 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 0.0001423095496800676, |
|
"loss": 0.468, |
|
"step": 639000 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 0.0001415549921526017, |
|
"loss": 0.4749, |
|
"step": 642000 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 0.00014080043462513583, |
|
"loss": 0.4551, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 16.14, |
|
"learning_rate": 0.0001400458770976699, |
|
"loss": 0.4536, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 0.00013929131957020403, |
|
"loss": 0.4548, |
|
"step": 651000 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"learning_rate": 0.00013853676204273813, |
|
"loss": 0.4541, |
|
"step": 654000 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 0.00013778220451527223, |
|
"loss": 0.4573, |
|
"step": 657000 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"learning_rate": 0.00013702764698780633, |
|
"loss": 0.4521, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 0.00013627308946034044, |
|
"loss": 0.4564, |
|
"step": 663000 |
|
}, |
|
{ |
|
"epoch": 16.58, |
|
"learning_rate": 0.00013551853193287456, |
|
"loss": 0.4563, |
|
"step": 666000 |
|
}, |
|
{ |
|
"epoch": 16.66, |
|
"learning_rate": 0.00013476397440540867, |
|
"loss": 0.4582, |
|
"step": 669000 |
|
}, |
|
{ |
|
"epoch": 16.73, |
|
"learning_rate": 0.00013400941687794277, |
|
"loss": 0.4556, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 16.81, |
|
"learning_rate": 0.00013325485935047687, |
|
"loss": 0.4527, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"learning_rate": 0.00013250030182301097, |
|
"loss": 0.4535, |
|
"step": 678000 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 0.00013174574429554507, |
|
"loss": 0.4578, |
|
"step": 681000 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"learning_rate": 0.0001309911867680792, |
|
"loss": 0.449, |
|
"step": 684000 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"learning_rate": 0.0001302366292406133, |
|
"loss": 0.4389, |
|
"step": 687000 |
|
}, |
|
{ |
|
"epoch": 17.18, |
|
"learning_rate": 0.0001294820717131474, |
|
"loss": 0.438, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 17.26, |
|
"learning_rate": 0.0001287275141856815, |
|
"loss": 0.4396, |
|
"step": 693000 |
|
}, |
|
{ |
|
"epoch": 17.33, |
|
"learning_rate": 0.0001279729566582156, |
|
"loss": 0.437, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 0.00012721839913074973, |
|
"loss": 0.443, |
|
"step": 699000 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 0.0001264638416032838, |
|
"loss": 0.4428, |
|
"step": 702000 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"learning_rate": 0.00012570928407581793, |
|
"loss": 0.4384, |
|
"step": 705000 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 0.00012495472654835204, |
|
"loss": 0.4387, |
|
"step": 708000 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 0.00012420016902088614, |
|
"loss": 0.4416, |
|
"step": 711000 |
|
}, |
|
{ |
|
"epoch": 17.78, |
|
"learning_rate": 0.00012344561149342024, |
|
"loss": 0.4398, |
|
"step": 714000 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"learning_rate": 0.00012269105396595437, |
|
"loss": 0.437, |
|
"step": 717000 |
|
}, |
|
{ |
|
"epoch": 17.93, |
|
"learning_rate": 0.00012193649643848845, |
|
"loss": 0.4393, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 0.00012118193891102257, |
|
"loss": 0.4416, |
|
"step": 723000 |
|
}, |
|
{ |
|
"epoch": 18.08, |
|
"learning_rate": 0.00012042738138355667, |
|
"loss": 0.4216, |
|
"step": 726000 |
|
}, |
|
{ |
|
"epoch": 18.15, |
|
"learning_rate": 0.00011967282385609077, |
|
"loss": 0.4206, |
|
"step": 729000 |
|
}, |
|
{ |
|
"epoch": 18.23, |
|
"learning_rate": 0.00011891826632862489, |
|
"loss": 0.4223, |
|
"step": 732000 |
|
}, |
|
{ |
|
"epoch": 18.3, |
|
"learning_rate": 0.00011816370880115899, |
|
"loss": 0.4261, |
|
"step": 735000 |
|
}, |
|
{ |
|
"epoch": 18.38, |
|
"learning_rate": 0.0001174091512736931, |
|
"loss": 0.4238, |
|
"step": 738000 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"learning_rate": 0.0001166545937462272, |
|
"loss": 0.4224, |
|
"step": 741000 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 0.0001159000362187613, |
|
"loss": 0.4261, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"learning_rate": 0.00011514547869129542, |
|
"loss": 0.4287, |
|
"step": 747000 |
|
}, |
|
{ |
|
"epoch": 18.68, |
|
"learning_rate": 0.00011439092116382951, |
|
"loss": 0.4261, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"learning_rate": 0.00011363636363636362, |
|
"loss": 0.4259, |
|
"step": 753000 |
|
}, |
|
{ |
|
"epoch": 18.82, |
|
"learning_rate": 0.00011288180610889774, |
|
"loss": 0.4233, |
|
"step": 756000 |
|
}, |
|
{ |
|
"epoch": 18.9, |
|
"learning_rate": 0.00011212724858143184, |
|
"loss": 0.427, |
|
"step": 759000 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"learning_rate": 0.00011137269105396594, |
|
"loss": 0.4302, |
|
"step": 762000 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 0.00011061813352650005, |
|
"loss": 0.4109, |
|
"step": 765000 |
|
}, |
|
{ |
|
"epoch": 19.12, |
|
"learning_rate": 0.00010986357599903416, |
|
"loss": 0.4088, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 0.00010910901847156827, |
|
"loss": 0.4078, |
|
"step": 771000 |
|
}, |
|
{ |
|
"epoch": 19.27, |
|
"learning_rate": 0.00010835446094410236, |
|
"loss": 0.4094, |
|
"step": 774000 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 0.00010759990341663647, |
|
"loss": 0.409, |
|
"step": 777000 |
|
}, |
|
{ |
|
"epoch": 19.42, |
|
"learning_rate": 0.00010684534588917059, |
|
"loss": 0.4086, |
|
"step": 780000 |
|
}, |
|
{ |
|
"epoch": 19.5, |
|
"learning_rate": 0.00010609078836170468, |
|
"loss": 0.406, |
|
"step": 783000 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 0.00010533623083423879, |
|
"loss": 0.4102, |
|
"step": 786000 |
|
}, |
|
{ |
|
"epoch": 19.65, |
|
"learning_rate": 0.0001045816733067729, |
|
"loss": 0.4089, |
|
"step": 789000 |
|
}, |
|
{ |
|
"epoch": 19.72, |
|
"learning_rate": 0.000103827115779307, |
|
"loss": 0.4096, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 19.8, |
|
"learning_rate": 0.00010307255825184111, |
|
"loss": 0.4119, |
|
"step": 795000 |
|
}, |
|
{ |
|
"epoch": 19.87, |
|
"learning_rate": 0.00010231800072437521, |
|
"loss": 0.4101, |
|
"step": 798000 |
|
}, |
|
{ |
|
"epoch": 19.95, |
|
"learning_rate": 0.00010156344319690932, |
|
"loss": 0.4125, |
|
"step": 801000 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"learning_rate": 0.00010080888566944344, |
|
"loss": 0.4091, |
|
"step": 804000 |
|
}, |
|
{ |
|
"epoch": 20.09, |
|
"learning_rate": 0.00010005432814197753, |
|
"loss": 0.3946, |
|
"step": 807000 |
|
}, |
|
{ |
|
"epoch": 20.17, |
|
"learning_rate": 9.929977061451164e-05, |
|
"loss": 0.3959, |
|
"step": 810000 |
|
}, |
|
{ |
|
"epoch": 20.24, |
|
"learning_rate": 9.854521308704576e-05, |
|
"loss": 0.3954, |
|
"step": 813000 |
|
}, |
|
{ |
|
"epoch": 20.32, |
|
"learning_rate": 9.779065555957984e-05, |
|
"loss": 0.3974, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 20.39, |
|
"learning_rate": 9.703609803211396e-05, |
|
"loss": 0.3943, |
|
"step": 819000 |
|
}, |
|
{ |
|
"epoch": 20.47, |
|
"learning_rate": 9.628154050464806e-05, |
|
"loss": 0.3984, |
|
"step": 822000 |
|
}, |
|
{ |
|
"epoch": 20.54, |
|
"learning_rate": 9.552698297718217e-05, |
|
"loss": 0.3963, |
|
"step": 825000 |
|
}, |
|
{ |
|
"epoch": 20.62, |
|
"learning_rate": 9.477242544971629e-05, |
|
"loss": 0.3927, |
|
"step": 828000 |
|
}, |
|
{ |
|
"epoch": 20.69, |
|
"learning_rate": 9.401786792225038e-05, |
|
"loss": 0.3955, |
|
"step": 831000 |
|
}, |
|
{ |
|
"epoch": 20.77, |
|
"learning_rate": 9.326331039478449e-05, |
|
"loss": 0.3982, |
|
"step": 834000 |
|
}, |
|
{ |
|
"epoch": 20.84, |
|
"learning_rate": 9.25087528673186e-05, |
|
"loss": 0.3976, |
|
"step": 837000 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"learning_rate": 9.17541953398527e-05, |
|
"loss": 0.3957, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"learning_rate": 9.099963781238681e-05, |
|
"loss": 0.3982, |
|
"step": 843000 |
|
}, |
|
{ |
|
"epoch": 21.07, |
|
"learning_rate": 9.024508028492091e-05, |
|
"loss": 0.3806, |
|
"step": 846000 |
|
}, |
|
{ |
|
"epoch": 21.14, |
|
"learning_rate": 8.949052275745503e-05, |
|
"loss": 0.3819, |
|
"step": 849000 |
|
}, |
|
{ |
|
"epoch": 21.22, |
|
"learning_rate": 8.873596522998913e-05, |
|
"loss": 0.3847, |
|
"step": 852000 |
|
}, |
|
{ |
|
"epoch": 21.29, |
|
"learning_rate": 8.798140770252323e-05, |
|
"loss": 0.3843, |
|
"step": 855000 |
|
}, |
|
{ |
|
"epoch": 21.36, |
|
"learning_rate": 8.722685017505734e-05, |
|
"loss": 0.3859, |
|
"step": 858000 |
|
}, |
|
{ |
|
"epoch": 21.44, |
|
"learning_rate": 8.647229264759146e-05, |
|
"loss": 0.3821, |
|
"step": 861000 |
|
}, |
|
{ |
|
"epoch": 21.51, |
|
"learning_rate": 8.571773512012555e-05, |
|
"loss": 0.386, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 21.59, |
|
"learning_rate": 8.496317759265966e-05, |
|
"loss": 0.3853, |
|
"step": 867000 |
|
}, |
|
{ |
|
"epoch": 21.66, |
|
"learning_rate": 8.420862006519376e-05, |
|
"loss": 0.3856, |
|
"step": 870000 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 8.345406253772786e-05, |
|
"loss": 0.3843, |
|
"step": 873000 |
|
}, |
|
{ |
|
"epoch": 21.81, |
|
"learning_rate": 8.269950501026198e-05, |
|
"loss": 0.3845, |
|
"step": 876000 |
|
}, |
|
{ |
|
"epoch": 21.89, |
|
"learning_rate": 8.194494748279608e-05, |
|
"loss": 0.3809, |
|
"step": 879000 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"learning_rate": 8.11903899553302e-05, |
|
"loss": 0.3824, |
|
"step": 882000 |
|
}, |
|
{ |
|
"epoch": 22.04, |
|
"learning_rate": 8.04358324278643e-05, |
|
"loss": 0.3801, |
|
"step": 885000 |
|
}, |
|
{ |
|
"epoch": 22.11, |
|
"learning_rate": 7.96812749003984e-05, |
|
"loss": 0.3702, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 22.19, |
|
"learning_rate": 7.892671737293251e-05, |
|
"loss": 0.3713, |
|
"step": 891000 |
|
}, |
|
{ |
|
"epoch": 22.26, |
|
"learning_rate": 7.817215984546663e-05, |
|
"loss": 0.3715, |
|
"step": 894000 |
|
}, |
|
{ |
|
"epoch": 22.34, |
|
"learning_rate": 7.741760231800071e-05, |
|
"loss": 0.3681, |
|
"step": 897000 |
|
}, |
|
{ |
|
"epoch": 22.41, |
|
"learning_rate": 7.666304479053483e-05, |
|
"loss": 0.3714, |
|
"step": 900000 |
|
}, |
|
{ |
|
"epoch": 22.49, |
|
"learning_rate": 7.590848726306893e-05, |
|
"loss": 0.3725, |
|
"step": 903000 |
|
}, |
|
{ |
|
"epoch": 22.56, |
|
"learning_rate": 7.515392973560303e-05, |
|
"loss": 0.3692, |
|
"step": 906000 |
|
}, |
|
{ |
|
"epoch": 22.63, |
|
"learning_rate": 7.439937220813715e-05, |
|
"loss": 0.375, |
|
"step": 909000 |
|
}, |
|
{ |
|
"epoch": 22.71, |
|
"learning_rate": 7.364481468067125e-05, |
|
"loss": 0.3721, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 22.78, |
|
"learning_rate": 7.289025715320536e-05, |
|
"loss": 0.3699, |
|
"step": 915000 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"learning_rate": 7.213569962573946e-05, |
|
"loss": 0.37, |
|
"step": 918000 |
|
}, |
|
{ |
|
"epoch": 22.93, |
|
"learning_rate": 7.138114209827356e-05, |
|
"loss": 0.3702, |
|
"step": 921000 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 7.062658457080768e-05, |
|
"loss": 0.3668, |
|
"step": 924000 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 6.987202704334178e-05, |
|
"loss": 0.3602, |
|
"step": 927000 |
|
}, |
|
{ |
|
"epoch": 23.16, |
|
"learning_rate": 6.911746951587588e-05, |
|
"loss": 0.3557, |
|
"step": 930000 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"learning_rate": 6.836291198840998e-05, |
|
"loss": 0.3607, |
|
"step": 933000 |
|
}, |
|
{ |
|
"epoch": 23.31, |
|
"learning_rate": 6.76083544609441e-05, |
|
"loss": 0.3571, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 23.38, |
|
"learning_rate": 6.68537969334782e-05, |
|
"loss": 0.3589, |
|
"step": 939000 |
|
}, |
|
{ |
|
"epoch": 23.46, |
|
"learning_rate": 6.609923940601231e-05, |
|
"loss": 0.361, |
|
"step": 942000 |
|
}, |
|
{ |
|
"epoch": 23.53, |
|
"learning_rate": 6.534468187854641e-05, |
|
"loss": 0.3595, |
|
"step": 945000 |
|
}, |
|
{ |
|
"epoch": 23.61, |
|
"learning_rate": 6.459012435108053e-05, |
|
"loss": 0.3554, |
|
"step": 948000 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"learning_rate": 6.383556682361463e-05, |
|
"loss": 0.3572, |
|
"step": 951000 |
|
}, |
|
{ |
|
"epoch": 23.75, |
|
"learning_rate": 6.308100929614873e-05, |
|
"loss": 0.3604, |
|
"step": 954000 |
|
}, |
|
{ |
|
"epoch": 23.83, |
|
"learning_rate": 6.232645176868283e-05, |
|
"loss": 0.3591, |
|
"step": 957000 |
|
}, |
|
{ |
|
"epoch": 23.9, |
|
"learning_rate": 6.157189424121695e-05, |
|
"loss": 0.3582, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 23.98, |
|
"learning_rate": 6.0817336713751056e-05, |
|
"loss": 0.3568, |
|
"step": 963000 |
|
}, |
|
{ |
|
"epoch": 24.05, |
|
"learning_rate": 6.006277918628516e-05, |
|
"loss": 0.3495, |
|
"step": 966000 |
|
}, |
|
{ |
|
"epoch": 24.13, |
|
"learning_rate": 5.930822165881926e-05, |
|
"loss": 0.3484, |
|
"step": 969000 |
|
}, |
|
{ |
|
"epoch": 24.2, |
|
"learning_rate": 5.855366413135337e-05, |
|
"loss": 0.3449, |
|
"step": 972000 |
|
}, |
|
{ |
|
"epoch": 24.28, |
|
"learning_rate": 5.7799106603887474e-05, |
|
"loss": 0.3471, |
|
"step": 975000 |
|
}, |
|
{ |
|
"epoch": 24.35, |
|
"learning_rate": 5.704454907642158e-05, |
|
"loss": 0.3495, |
|
"step": 978000 |
|
}, |
|
{ |
|
"epoch": 24.43, |
|
"learning_rate": 5.6289991548955684e-05, |
|
"loss": 0.3489, |
|
"step": 981000 |
|
}, |
|
{ |
|
"epoch": 24.5, |
|
"learning_rate": 5.55354340214898e-05, |
|
"loss": 0.3464, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 24.58, |
|
"learning_rate": 5.47808764940239e-05, |
|
"loss": 0.3485, |
|
"step": 987000 |
|
}, |
|
{ |
|
"epoch": 24.65, |
|
"learning_rate": 5.402631896655801e-05, |
|
"loss": 0.3486, |
|
"step": 990000 |
|
}, |
|
{ |
|
"epoch": 24.73, |
|
"learning_rate": 5.327176143909211e-05, |
|
"loss": 0.3476, |
|
"step": 993000 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"learning_rate": 5.2517203911626224e-05, |
|
"loss": 0.3502, |
|
"step": 996000 |
|
}, |
|
{ |
|
"epoch": 24.88, |
|
"learning_rate": 5.1762646384160325e-05, |
|
"loss": 0.3492, |
|
"step": 999000 |
|
}, |
|
{ |
|
"epoch": 24.95, |
|
"learning_rate": 5.1008088856694426e-05, |
|
"loss": 0.347, |
|
"step": 1002000 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"learning_rate": 5.0253531329228534e-05, |
|
"loss": 0.343, |
|
"step": 1005000 |
|
}, |
|
{ |
|
"epoch": 25.1, |
|
"learning_rate": 4.949897380176264e-05, |
|
"loss": 0.3366, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 25.17, |
|
"learning_rate": 4.874441627429675e-05, |
|
"loss": 0.3348, |
|
"step": 1011000 |
|
}, |
|
{ |
|
"epoch": 25.25, |
|
"learning_rate": 4.798985874683085e-05, |
|
"loss": 0.3344, |
|
"step": 1014000 |
|
}, |
|
{ |
|
"epoch": 25.32, |
|
"learning_rate": 4.723530121936496e-05, |
|
"loss": 0.3406, |
|
"step": 1017000 |
|
}, |
|
{ |
|
"epoch": 25.4, |
|
"learning_rate": 4.648074369189907e-05, |
|
"loss": 0.3366, |
|
"step": 1020000 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"learning_rate": 4.5726186164433176e-05, |
|
"loss": 0.3392, |
|
"step": 1023000 |
|
}, |
|
{ |
|
"epoch": 25.55, |
|
"learning_rate": 4.497162863696728e-05, |
|
"loss": 0.3379, |
|
"step": 1026000 |
|
}, |
|
{ |
|
"epoch": 25.62, |
|
"learning_rate": 4.421707110950138e-05, |
|
"loss": 0.3388, |
|
"step": 1029000 |
|
}, |
|
{ |
|
"epoch": 25.7, |
|
"learning_rate": 4.346251358203549e-05, |
|
"loss": 0.3383, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 25.77, |
|
"learning_rate": 4.27079560545696e-05, |
|
"loss": 0.3372, |
|
"step": 1035000 |
|
}, |
|
{ |
|
"epoch": 25.85, |
|
"learning_rate": 4.19533985271037e-05, |
|
"loss": 0.3358, |
|
"step": 1038000 |
|
}, |
|
{ |
|
"epoch": 25.92, |
|
"learning_rate": 4.1198840999637803e-05, |
|
"loss": 0.3353, |
|
"step": 1041000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 4.044428347217192e-05, |
|
"loss": 0.34, |
|
"step": 1044000 |
|
}, |
|
{ |
|
"epoch": 26.07, |
|
"learning_rate": 3.968972594470602e-05, |
|
"loss": 0.3282, |
|
"step": 1047000 |
|
}, |
|
{ |
|
"epoch": 26.15, |
|
"learning_rate": 3.893516841724013e-05, |
|
"loss": 0.3267, |
|
"step": 1050000 |
|
}, |
|
{ |
|
"epoch": 26.22, |
|
"learning_rate": 3.818061088977423e-05, |
|
"loss": 0.3268, |
|
"step": 1053000 |
|
}, |
|
{ |
|
"epoch": 26.29, |
|
"learning_rate": 3.742605336230834e-05, |
|
"loss": 0.3248, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 26.37, |
|
"learning_rate": 3.6671495834842445e-05, |
|
"loss": 0.3268, |
|
"step": 1059000 |
|
}, |
|
{ |
|
"epoch": 26.44, |
|
"learning_rate": 3.591693830737655e-05, |
|
"loss": 0.324, |
|
"step": 1062000 |
|
}, |
|
{ |
|
"epoch": 26.52, |
|
"learning_rate": 3.516238077991066e-05, |
|
"loss": 0.3298, |
|
"step": 1065000 |
|
}, |
|
{ |
|
"epoch": 26.59, |
|
"learning_rate": 3.440782325244476e-05, |
|
"loss": 0.3296, |
|
"step": 1068000 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 3.365326572497887e-05, |
|
"loss": 0.3261, |
|
"step": 1071000 |
|
}, |
|
{ |
|
"epoch": 26.74, |
|
"learning_rate": 3.289870819751297e-05, |
|
"loss": 0.3284, |
|
"step": 1074000 |
|
}, |
|
{ |
|
"epoch": 26.82, |
|
"learning_rate": 3.214415067004708e-05, |
|
"loss": 0.3279, |
|
"step": 1077000 |
|
}, |
|
{ |
|
"epoch": 26.89, |
|
"learning_rate": 3.138959314258119e-05, |
|
"loss": 0.3273, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"learning_rate": 3.0635035615115295e-05, |
|
"loss": 0.3275, |
|
"step": 1083000 |
|
}, |
|
{ |
|
"epoch": 27.04, |
|
"learning_rate": 2.9880478087649397e-05, |
|
"loss": 0.323, |
|
"step": 1086000 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"learning_rate": 2.9125920560183505e-05, |
|
"loss": 0.3182, |
|
"step": 1089000 |
|
}, |
|
{ |
|
"epoch": 27.19, |
|
"learning_rate": 2.837136303271761e-05, |
|
"loss": 0.3209, |
|
"step": 1092000 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 2.7616805505251717e-05, |
|
"loss": 0.3185, |
|
"step": 1095000 |
|
}, |
|
{ |
|
"epoch": 27.34, |
|
"learning_rate": 2.6862247977785822e-05, |
|
"loss": 0.3214, |
|
"step": 1098000 |
|
}, |
|
{ |
|
"epoch": 27.42, |
|
"learning_rate": 2.610769045031993e-05, |
|
"loss": 0.3164, |
|
"step": 1101000 |
|
}, |
|
{ |
|
"epoch": 27.49, |
|
"learning_rate": 2.5353132922854035e-05, |
|
"loss": 0.3206, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 27.56, |
|
"learning_rate": 2.4598575395388143e-05, |
|
"loss": 0.3175, |
|
"step": 1107000 |
|
}, |
|
{ |
|
"epoch": 27.64, |
|
"learning_rate": 2.3844017867922247e-05, |
|
"loss": 0.3185, |
|
"step": 1110000 |
|
}, |
|
{ |
|
"epoch": 27.71, |
|
"learning_rate": 2.3089460340456355e-05, |
|
"loss": 0.3199, |
|
"step": 1113000 |
|
}, |
|
{ |
|
"epoch": 27.79, |
|
"learning_rate": 2.233490281299046e-05, |
|
"loss": 0.318, |
|
"step": 1116000 |
|
}, |
|
{ |
|
"epoch": 27.86, |
|
"learning_rate": 2.1580345285524568e-05, |
|
"loss": 0.319, |
|
"step": 1119000 |
|
}, |
|
{ |
|
"epoch": 27.94, |
|
"learning_rate": 2.082578775805867e-05, |
|
"loss": 0.3152, |
|
"step": 1122000 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 2.0071230230592777e-05, |
|
"loss": 0.3161, |
|
"step": 1125000 |
|
}, |
|
{ |
|
"epoch": 28.09, |
|
"learning_rate": 1.9316672703126882e-05, |
|
"loss": 0.3142, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 28.16, |
|
"learning_rate": 1.856211517566099e-05, |
|
"loss": 0.3129, |
|
"step": 1131000 |
|
}, |
|
{ |
|
"epoch": 28.24, |
|
"learning_rate": 1.7807557648195098e-05, |
|
"loss": 0.3123, |
|
"step": 1134000 |
|
}, |
|
{ |
|
"epoch": 28.31, |
|
"learning_rate": 1.7053000120729203e-05, |
|
"loss": 0.314, |
|
"step": 1137000 |
|
}, |
|
{ |
|
"epoch": 28.39, |
|
"learning_rate": 1.629844259326331e-05, |
|
"loss": 0.3137, |
|
"step": 1140000 |
|
}, |
|
{ |
|
"epoch": 28.46, |
|
"learning_rate": 1.5543885065797415e-05, |
|
"loss": 0.3093, |
|
"step": 1143000 |
|
}, |
|
{ |
|
"epoch": 28.54, |
|
"learning_rate": 1.4789327538331522e-05, |
|
"loss": 0.3128, |
|
"step": 1146000 |
|
}, |
|
{ |
|
"epoch": 28.61, |
|
"learning_rate": 1.4034770010865628e-05, |
|
"loss": 0.3127, |
|
"step": 1149000 |
|
}, |
|
{ |
|
"epoch": 28.69, |
|
"learning_rate": 1.3280212483399733e-05, |
|
"loss": 0.3116, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 28.76, |
|
"learning_rate": 1.2525654955933839e-05, |
|
"loss": 0.3117, |
|
"step": 1155000 |
|
}, |
|
{ |
|
"epoch": 28.83, |
|
"learning_rate": 1.1771097428467945e-05, |
|
"loss": 0.3083, |
|
"step": 1158000 |
|
}, |
|
{ |
|
"epoch": 28.91, |
|
"learning_rate": 1.1016539901002052e-05, |
|
"loss": 0.3099, |
|
"step": 1161000 |
|
}, |
|
{ |
|
"epoch": 28.98, |
|
"learning_rate": 1.0261982373536158e-05, |
|
"loss": 0.3064, |
|
"step": 1164000 |
|
}, |
|
{ |
|
"epoch": 29.06, |
|
"learning_rate": 9.507424846070264e-06, |
|
"loss": 0.3065, |
|
"step": 1167000 |
|
}, |
|
{ |
|
"epoch": 29.13, |
|
"learning_rate": 8.752867318604369e-06, |
|
"loss": 0.3055, |
|
"step": 1170000 |
|
}, |
|
{ |
|
"epoch": 29.21, |
|
"learning_rate": 7.998309791138475e-06, |
|
"loss": 0.3074, |
|
"step": 1173000 |
|
}, |
|
{ |
|
"epoch": 29.28, |
|
"learning_rate": 7.243752263672582e-06, |
|
"loss": 0.3085, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 29.36, |
|
"learning_rate": 6.489194736206688e-06, |
|
"loss": 0.3071, |
|
"step": 1179000 |
|
}, |
|
{ |
|
"epoch": 29.43, |
|
"learning_rate": 5.734637208740793e-06, |
|
"loss": 0.3092, |
|
"step": 1182000 |
|
}, |
|
{ |
|
"epoch": 29.51, |
|
"learning_rate": 4.9800796812749e-06, |
|
"loss": 0.3074, |
|
"step": 1185000 |
|
}, |
|
{ |
|
"epoch": 29.58, |
|
"learning_rate": 4.225522153809006e-06, |
|
"loss": 0.3055, |
|
"step": 1188000 |
|
}, |
|
{ |
|
"epoch": 29.66, |
|
"learning_rate": 3.4709646263431124e-06, |
|
"loss": 0.3072, |
|
"step": 1191000 |
|
}, |
|
{ |
|
"epoch": 29.73, |
|
"learning_rate": 2.7164070988772183e-06, |
|
"loss": 0.305, |
|
"step": 1194000 |
|
}, |
|
{ |
|
"epoch": 29.81, |
|
"learning_rate": 1.9618495714113242e-06, |
|
"loss": 0.3048, |
|
"step": 1197000 |
|
}, |
|
{ |
|
"epoch": 29.88, |
|
"learning_rate": 1.2072920439454301e-06, |
|
"loss": 0.3068, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 29.96, |
|
"learning_rate": 4.5273451647953635e-07, |
|
"loss": 0.3059, |
|
"step": 1203000 |
|
} |
|
], |
|
"logging_steps": 3000, |
|
"max_steps": 1204800, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 2.3932840682448497e+21, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|