|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.5530429503718315, |
|
"global_step": 30000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 9.4799, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.533333333333333e-06, |
|
"loss": 7.4409, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.866666666666667e-06, |
|
"loss": 6.6304, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.3183333333333333e-05, |
|
"loss": 5.8163, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6516666666666667e-05, |
|
"loss": 5.2109, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.985e-05, |
|
"loss": 4.7702, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.3183333333333336e-05, |
|
"loss": 4.4959, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.6516666666666666e-05, |
|
"loss": 4.3014, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.985e-05, |
|
"loss": 4.1589, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.318333333333333e-05, |
|
"loss": 4.0593, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.651666666666667e-05, |
|
"loss": 3.9601, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.9850000000000006e-05, |
|
"loss": 3.8843, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.318333333333334e-05, |
|
"loss": 3.8441, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.651666666666667e-05, |
|
"loss": 3.8172, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.9850000000000006e-05, |
|
"loss": 3.7464, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.968108198363667e-05, |
|
"loss": 3.7161, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.934713641676407e-05, |
|
"loss": 3.6457, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.901319084989147e-05, |
|
"loss": 3.61, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.867924528301887e-05, |
|
"loss": 3.579, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.834529971614627e-05, |
|
"loss": 3.5302, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.8011354149273667e-05, |
|
"loss": 3.5092, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.767740858240107e-05, |
|
"loss": 3.485, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.734346301552847e-05, |
|
"loss": 3.4616, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.7009517448655874e-05, |
|
"loss": 3.4466, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.667557188178327e-05, |
|
"loss": 3.4108, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.6341626314910676e-05, |
|
"loss": 3.3687, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.6007680748038074e-05, |
|
"loss": 3.3308, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.567373518116547e-05, |
|
"loss": 3.3314, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.533978961429287e-05, |
|
"loss": 3.3101, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.500584404742027e-05, |
|
"loss": 3.3053, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.467189848054767e-05, |
|
"loss": 3.2995, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.433795291367508e-05, |
|
"loss": 3.2495, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.4004007346802475e-05, |
|
"loss": 3.2186, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.367006177992987e-05, |
|
"loss": 2.9888, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.333611621305728e-05, |
|
"loss": 2.98, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.3002170646184675e-05, |
|
"loss": 2.9622, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.266822507931207e-05, |
|
"loss": 2.9751, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.233427951243947e-05, |
|
"loss": 2.9455, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.200033394556687e-05, |
|
"loss": 2.9657, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.1666388378694274e-05, |
|
"loss": 2.9428, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.133244281182168e-05, |
|
"loss": 2.96, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.0998497244949076e-05, |
|
"loss": 2.935, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.0664551678076474e-05, |
|
"loss": 2.9543, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.033060611120388e-05, |
|
"loss": 2.9213, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.9996660544331277e-05, |
|
"loss": 2.9299, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.9662714977458674e-05, |
|
"loss": 2.9429, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.932876941058607e-05, |
|
"loss": 2.936, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.899649357154784e-05, |
|
"loss": 2.8932, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.8662548004675245e-05, |
|
"loss": 2.9069, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.832860243780264e-05, |
|
"loss": 2.8815, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 3.799465687093004e-05, |
|
"loss": 2.8844, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 3.766071130405744e-05, |
|
"loss": 2.8921, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.73284354650192e-05, |
|
"loss": 2.8557, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.6994489898146604e-05, |
|
"loss": 2.8768, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.6660544331274e-05, |
|
"loss": 2.8527, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.632659876440141e-05, |
|
"loss": 2.8839, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.5992653197528805e-05, |
|
"loss": 2.8512, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.565870763065621e-05, |
|
"loss": 2.8428, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.532476206378361e-05, |
|
"loss": 2.8329, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.4990816496911005e-05, |
|
"loss": 2.8266, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.46568709300384e-05, |
|
"loss": 2.8282, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.4324595091000164e-05, |
|
"loss": 2.8202, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.399064952412757e-05, |
|
"loss": 2.8035, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.365670395725497e-05, |
|
"loss": 2.8211, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.332275839038237e-05, |
|
"loss": 2.796, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.298881282350977e-05, |
|
"loss": 2.766, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.265486725663717e-05, |
|
"loss": 2.5018, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.232092168976457e-05, |
|
"loss": 2.5239, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.198697612289197e-05, |
|
"loss": 2.4989, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 3.165303055601937e-05, |
|
"loss": 2.5141, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.1319084989146766e-05, |
|
"loss": 2.4981, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.098513942227417e-05, |
|
"loss": 2.5149, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.0651193855401575e-05, |
|
"loss": 2.4817, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.031724828852897e-05, |
|
"loss": 2.5131, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.998330272165637e-05, |
|
"loss": 2.5058, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.964935715478377e-05, |
|
"loss": 2.5254, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.9315411587911173e-05, |
|
"loss": 2.5074, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.898146602103857e-05, |
|
"loss": 2.5147, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.8647520454165972e-05, |
|
"loss": 2.4908, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.8315244615127734e-05, |
|
"loss": 2.478, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.7981299048255138e-05, |
|
"loss": 2.5051, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.7647353481382536e-05, |
|
"loss": 2.5101, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.7313407914509937e-05, |
|
"loss": 2.502, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.6979462347637335e-05, |
|
"loss": 2.5053, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.6645516780764733e-05, |
|
"loss": 2.4969, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.6311571213892138e-05, |
|
"loss": 2.4882, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.597762564701954e-05, |
|
"loss": 2.4755, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.5643680080146937e-05, |
|
"loss": 2.4798, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.5309734513274335e-05, |
|
"loss": 2.4795, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.4975788946401736e-05, |
|
"loss": 2.4913, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.464184337952914e-05, |
|
"loss": 2.4894, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.430789781265654e-05, |
|
"loss": 2.4678, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.3973952245783936e-05, |
|
"loss": 2.4418, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.3640006678911338e-05, |
|
"loss": 2.4705, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.330606111203874e-05, |
|
"loss": 2.4659, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.297211554516614e-05, |
|
"loss": 2.4752, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.2638169978293538e-05, |
|
"loss": 2.4694, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.230422441142094e-05, |
|
"loss": 2.4442, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 2.197027884454834e-05, |
|
"loss": 2.4312, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 2.1636333277675742e-05, |
|
"loss": 2.2147, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 2.130238771080314e-05, |
|
"loss": 2.2146, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 2.0968442143930538e-05, |
|
"loss": 2.213, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 2.0634496577057942e-05, |
|
"loss": 2.2176, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 2.030055101018534e-05, |
|
"loss": 2.2169, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.9968275171147105e-05, |
|
"loss": 2.223, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.9634329604274503e-05, |
|
"loss": 2.2279, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.9300384037401907e-05, |
|
"loss": 2.2322, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.8966438470529305e-05, |
|
"loss": 2.2428, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.8632492903656703e-05, |
|
"loss": 2.235, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.8298547336784104e-05, |
|
"loss": 2.2323, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.7964601769911506e-05, |
|
"loss": 2.1997, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.7630656203038907e-05, |
|
"loss": 2.2353, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.7296710636166305e-05, |
|
"loss": 2.2258, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.6962765069293706e-05, |
|
"loss": 2.2269, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.6628819502421107e-05, |
|
"loss": 2.2351, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.629654366338287e-05, |
|
"loss": 2.2387, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 1.596259809651027e-05, |
|
"loss": 2.2285, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.5628652529637668e-05, |
|
"loss": 2.2378, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.5294706962765072e-05, |
|
"loss": 2.2274, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.496076139589247e-05, |
|
"loss": 2.2081, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.4626815829019871e-05, |
|
"loss": 2.2183, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.4292870262147271e-05, |
|
"loss": 2.2316, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.3958924695274672e-05, |
|
"loss": 2.2066, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.3624979128402072e-05, |
|
"loss": 2.2214, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.329103356152947e-05, |
|
"loss": 2.2308, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.2957087994656873e-05, |
|
"loss": 2.2389, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 1.262314242778427e-05, |
|
"loss": 2.226, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.2289196860911672e-05, |
|
"loss": 2.2305, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.1955251294039071e-05, |
|
"loss": 2.2206, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 1.1621305727166473e-05, |
|
"loss": 2.2303, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1.1287360160293872e-05, |
|
"loss": 2.218, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 1.0953414593421272e-05, |
|
"loss": 2.1781, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 1.0619469026548673e-05, |
|
"loss": 2.0351, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 1.0285523459676073e-05, |
|
"loss": 2.0511, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 9.951577892803474e-06, |
|
"loss": 2.0529, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 9.617632325930873e-06, |
|
"loss": 2.0306, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 9.283686759058275e-06, |
|
"loss": 2.0486, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 8.949741192185674e-06, |
|
"loss": 2.0505, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 8.615795625313076e-06, |
|
"loss": 2.0395, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 8.281850058440473e-06, |
|
"loss": 2.0387, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 7.947904491567875e-06, |
|
"loss": 2.0537, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 7.615628652529638e-06, |
|
"loss": 2.0339, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 7.281683085657039e-06, |
|
"loss": 2.0676, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 6.947737518784439e-06, |
|
"loss": 2.0498, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 6.61379195191184e-06, |
|
"loss": 2.0341, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 6.279846385039238e-06, |
|
"loss": 2.0446, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 5.94590081816664e-06, |
|
"loss": 2.0435, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 5.611955251294039e-06, |
|
"loss": 2.0513, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 5.27800968442144e-06, |
|
"loss": 2.0378, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 4.94406411754884e-06, |
|
"loss": 2.0463, |
|
"step": 30000 |
|
} |
|
], |
|
"max_steps": 32945, |
|
"num_train_epochs": 5, |
|
"total_flos": 1.0567419081666048e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|