|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.8011204481792715, |
|
"eval_steps": 50, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.6599863767623901, |
|
"eval_runtime": 3.0665, |
|
"eval_samples_per_second": 32.611, |
|
"eval_steps_per_second": 4.239, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.5268713235855103, |
|
"eval_runtime": 3.0649, |
|
"eval_samples_per_second": 32.627, |
|
"eval_steps_per_second": 4.242, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.4272453784942627, |
|
"eval_runtime": 3.0703, |
|
"eval_samples_per_second": 32.57, |
|
"eval_steps_per_second": 4.234, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.3691519498825073, |
|
"eval_runtime": 3.0755, |
|
"eval_samples_per_second": 32.515, |
|
"eval_steps_per_second": 4.227, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.3439632654190063, |
|
"eval_runtime": 3.0781, |
|
"eval_samples_per_second": 32.488, |
|
"eval_steps_per_second": 4.223, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.2951898574829102, |
|
"eval_runtime": 3.0608, |
|
"eval_samples_per_second": 32.671, |
|
"eval_steps_per_second": 4.247, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.2598179578781128, |
|
"eval_runtime": 3.0651, |
|
"eval_samples_per_second": 32.626, |
|
"eval_steps_per_second": 4.241, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.2034798860549927, |
|
"eval_runtime": 3.0667, |
|
"eval_samples_per_second": 32.608, |
|
"eval_steps_per_second": 4.239, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.1628668308258057, |
|
"eval_runtime": 3.0674, |
|
"eval_samples_per_second": 32.601, |
|
"eval_steps_per_second": 4.238, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.75e-05, |
|
"loss": 1.6585, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.1156766414642334, |
|
"eval_runtime": 3.0714, |
|
"eval_samples_per_second": 32.558, |
|
"eval_steps_per_second": 4.233, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.0700278282165527, |
|
"eval_runtime": 3.0584, |
|
"eval_samples_per_second": 32.696, |
|
"eval_steps_per_second": 4.251, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.0359002351760864, |
|
"eval_runtime": 3.0694, |
|
"eval_samples_per_second": 32.58, |
|
"eval_steps_per_second": 4.235, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.019242286682129, |
|
"eval_runtime": 3.0562, |
|
"eval_samples_per_second": 32.72, |
|
"eval_steps_per_second": 4.254, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 0.9841110706329346, |
|
"eval_runtime": 3.0664, |
|
"eval_samples_per_second": 32.611, |
|
"eval_steps_per_second": 4.239, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 0.993296205997467, |
|
"eval_runtime": 3.074, |
|
"eval_samples_per_second": 32.531, |
|
"eval_steps_per_second": 4.229, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 0.9330427646636963, |
|
"eval_runtime": 3.0586, |
|
"eval_samples_per_second": 32.695, |
|
"eval_steps_per_second": 4.25, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 0.9203641414642334, |
|
"eval_runtime": 3.0859, |
|
"eval_samples_per_second": 32.405, |
|
"eval_steps_per_second": 4.213, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 0.9058796763420105, |
|
"eval_runtime": 3.068, |
|
"eval_samples_per_second": 32.595, |
|
"eval_steps_per_second": 4.237, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 0.9017823934555054, |
|
"eval_runtime": 3.0852, |
|
"eval_samples_per_second": 32.413, |
|
"eval_steps_per_second": 4.214, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.3142, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 0.8834471702575684, |
|
"eval_runtime": 3.0917, |
|
"eval_samples_per_second": 32.344, |
|
"eval_steps_per_second": 4.205, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 0.8460087776184082, |
|
"eval_runtime": 3.0708, |
|
"eval_samples_per_second": 32.565, |
|
"eval_steps_per_second": 4.233, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 0.8248443007469177, |
|
"eval_runtime": 3.062, |
|
"eval_samples_per_second": 32.658, |
|
"eval_steps_per_second": 4.246, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 0.8010109066963196, |
|
"eval_runtime": 3.062, |
|
"eval_samples_per_second": 32.659, |
|
"eval_steps_per_second": 4.246, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 0.7615901231765747, |
|
"eval_runtime": 3.0705, |
|
"eval_samples_per_second": 32.568, |
|
"eval_steps_per_second": 4.234, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 0.750758945941925, |
|
"eval_runtime": 3.0726, |
|
"eval_samples_per_second": 32.545, |
|
"eval_steps_per_second": 4.231, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 0.7553930878639221, |
|
"eval_runtime": 3.0632, |
|
"eval_samples_per_second": 32.645, |
|
"eval_steps_per_second": 4.244, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 0.714480459690094, |
|
"eval_runtime": 3.0588, |
|
"eval_samples_per_second": 32.693, |
|
"eval_steps_per_second": 4.25, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 0.6865142583847046, |
|
"eval_runtime": 3.0601, |
|
"eval_samples_per_second": 32.678, |
|
"eval_steps_per_second": 4.248, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 0.6855456829071045, |
|
"eval_runtime": 3.0743, |
|
"eval_samples_per_second": 32.527, |
|
"eval_steps_per_second": 4.229, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.9985, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 0.6646093726158142, |
|
"eval_runtime": 3.0899, |
|
"eval_samples_per_second": 32.363, |
|
"eval_steps_per_second": 4.207, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 0.6673924922943115, |
|
"eval_runtime": 3.0744, |
|
"eval_samples_per_second": 32.526, |
|
"eval_steps_per_second": 4.228, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_loss": 0.6436119675636292, |
|
"eval_runtime": 3.0612, |
|
"eval_samples_per_second": 32.667, |
|
"eval_steps_per_second": 4.247, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 0.6476317644119263, |
|
"eval_runtime": 3.0724, |
|
"eval_samples_per_second": 32.548, |
|
"eval_steps_per_second": 4.231, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 0.6251476407051086, |
|
"eval_runtime": 3.0506, |
|
"eval_samples_per_second": 32.78, |
|
"eval_steps_per_second": 4.261, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 0.6191129088401794, |
|
"eval_runtime": 3.0634, |
|
"eval_samples_per_second": 32.643, |
|
"eval_steps_per_second": 4.244, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 0.6157693862915039, |
|
"eval_runtime": 3.084, |
|
"eval_samples_per_second": 32.426, |
|
"eval_steps_per_second": 4.215, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 0.5934409499168396, |
|
"eval_runtime": 3.0678, |
|
"eval_samples_per_second": 32.597, |
|
"eval_steps_per_second": 4.238, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 0.5992363691329956, |
|
"eval_runtime": 3.0628, |
|
"eval_samples_per_second": 32.649, |
|
"eval_steps_per_second": 4.244, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 0.5798881649971008, |
|
"eval_runtime": 3.0658, |
|
"eval_samples_per_second": 32.618, |
|
"eval_steps_per_second": 4.24, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8817, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_loss": 0.5694944858551025, |
|
"eval_runtime": 3.0588, |
|
"eval_samples_per_second": 32.692, |
|
"eval_steps_per_second": 4.25, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 0.5600036382675171, |
|
"eval_runtime": 3.073, |
|
"eval_samples_per_second": 32.542, |
|
"eval_steps_per_second": 4.23, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 0.5443231463432312, |
|
"eval_runtime": 3.0723, |
|
"eval_samples_per_second": 32.549, |
|
"eval_steps_per_second": 4.231, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 0.5324158668518066, |
|
"eval_runtime": 3.0759, |
|
"eval_samples_per_second": 32.511, |
|
"eval_steps_per_second": 4.226, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 0.5168842077255249, |
|
"eval_runtime": 3.0777, |
|
"eval_samples_per_second": 32.492, |
|
"eval_steps_per_second": 4.224, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_loss": 0.51568603515625, |
|
"eval_runtime": 3.0636, |
|
"eval_samples_per_second": 32.641, |
|
"eval_steps_per_second": 4.243, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_loss": 0.506223738193512, |
|
"eval_runtime": 3.0803, |
|
"eval_samples_per_second": 32.464, |
|
"eval_steps_per_second": 4.22, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 0.5046885013580322, |
|
"eval_runtime": 3.0565, |
|
"eval_samples_per_second": 32.717, |
|
"eval_steps_per_second": 4.253, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_loss": 0.4832480251789093, |
|
"eval_runtime": 3.0606, |
|
"eval_samples_per_second": 32.673, |
|
"eval_steps_per_second": 4.247, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 0.4789283275604248, |
|
"eval_runtime": 3.0598, |
|
"eval_samples_per_second": 32.682, |
|
"eval_steps_per_second": 4.249, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.7454, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 0.4778507947921753, |
|
"eval_runtime": 3.0634, |
|
"eval_samples_per_second": 32.644, |
|
"eval_steps_per_second": 4.244, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_loss": 0.4714885652065277, |
|
"eval_runtime": 3.0662, |
|
"eval_samples_per_second": 32.614, |
|
"eval_steps_per_second": 4.24, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 0.4641421437263489, |
|
"eval_runtime": 3.0498, |
|
"eval_samples_per_second": 32.789, |
|
"eval_steps_per_second": 4.263, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 0.45793575048446655, |
|
"eval_runtime": 3.0673, |
|
"eval_samples_per_second": 32.602, |
|
"eval_steps_per_second": 4.238, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_loss": 0.44898709654808044, |
|
"eval_runtime": 3.0751, |
|
"eval_samples_per_second": 32.519, |
|
"eval_steps_per_second": 4.227, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_loss": 0.44389522075653076, |
|
"eval_runtime": 3.0534, |
|
"eval_samples_per_second": 32.751, |
|
"eval_steps_per_second": 4.258, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_loss": 0.4495103061199188, |
|
"eval_runtime": 3.0657, |
|
"eval_samples_per_second": 32.619, |
|
"eval_steps_per_second": 4.24, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 0.4390113055706024, |
|
"eval_runtime": 3.0609, |
|
"eval_samples_per_second": 32.67, |
|
"eval_steps_per_second": 4.247, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 0.43222054839134216, |
|
"eval_runtime": 3.0574, |
|
"eval_samples_per_second": 32.707, |
|
"eval_steps_per_second": 4.252, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_loss": 0.4251094460487366, |
|
"eval_runtime": 3.0796, |
|
"eval_samples_per_second": 32.472, |
|
"eval_steps_per_second": 4.221, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.6646, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 0.42413151264190674, |
|
"eval_runtime": 3.0651, |
|
"eval_samples_per_second": 32.626, |
|
"eval_steps_per_second": 4.241, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4000, |
|
"num_train_epochs": 4, |
|
"save_steps": 1000, |
|
"total_flos": 4.107859357335552e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|