|
{ |
|
"best_metric": 1.2869539260864258, |
|
"best_model_checkpoint": "./checkpoints/pegasus-xsum/checkpoint-11412", |
|
"epoch": 12.0, |
|
"global_step": 11412, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5e-06, |
|
"loss": 8.3438, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1e-05, |
|
"loss": 7.5374, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.93178426774675e-06, |
|
"loss": 7.0835, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 6.375093936920166, |
|
"eval_runtime": 8.2682, |
|
"eval_samples_per_second": 60.473, |
|
"eval_steps_per_second": 7.62, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.863568535493498e-06, |
|
"loss": 6.7408, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 9.795352803240248e-06, |
|
"loss": 6.0547, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.727137070986997e-06, |
|
"loss": 4.6247, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 9.658921338733747e-06, |
|
"loss": 2.6363, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.4205403327941895, |
|
"eval_runtime": 8.2748, |
|
"eval_samples_per_second": 60.424, |
|
"eval_steps_per_second": 7.613, |
|
"step": 1902 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.590705606480494e-06, |
|
"loss": 1.6679, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.522489874227244e-06, |
|
"loss": 1.5104, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 9.454274141973993e-06, |
|
"loss": 1.5184, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 9.386058409720743e-06, |
|
"loss": 1.4953, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.3500312566757202, |
|
"eval_runtime": 8.2581, |
|
"eval_samples_per_second": 60.546, |
|
"eval_steps_per_second": 7.629, |
|
"step": 2853 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 9.31784267746749e-06, |
|
"loss": 1.4483, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 9.24962694521424e-06, |
|
"loss": 1.3758, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 9.18141121296099e-06, |
|
"loss": 1.3732, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.3236401081085205, |
|
"eval_runtime": 8.2531, |
|
"eval_samples_per_second": 60.584, |
|
"eval_steps_per_second": 7.634, |
|
"step": 3804 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 9.11319548070774e-06, |
|
"loss": 1.4249, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 9.044979748454487e-06, |
|
"loss": 1.4052, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 8.976764016201236e-06, |
|
"loss": 1.3323, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 8.908548283947986e-06, |
|
"loss": 1.3573, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.311529517173767, |
|
"eval_runtime": 8.2933, |
|
"eval_samples_per_second": 60.29, |
|
"eval_steps_per_second": 7.597, |
|
"step": 4755 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 8.840332551694735e-06, |
|
"loss": 1.3169, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 8.772116819441483e-06, |
|
"loss": 1.3142, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 8.703901087188233e-06, |
|
"loss": 1.3562, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 8.635685354934982e-06, |
|
"loss": 1.3171, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.3023052215576172, |
|
"eval_runtime": 8.3165, |
|
"eval_samples_per_second": 60.122, |
|
"eval_steps_per_second": 7.575, |
|
"step": 5706 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 8.567469622681732e-06, |
|
"loss": 1.3457, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 8.499253890428481e-06, |
|
"loss": 1.2753, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 8.431038158175229e-06, |
|
"loss": 1.2898, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 8.362822425921979e-06, |
|
"loss": 1.2848, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.2965139150619507, |
|
"eval_runtime": 8.2552, |
|
"eval_samples_per_second": 60.568, |
|
"eval_steps_per_second": 7.632, |
|
"step": 6657 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 8.294606693668728e-06, |
|
"loss": 1.272, |
|
"step": 6912 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 8.226390961415478e-06, |
|
"loss": 1.2554, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 8.158175229162227e-06, |
|
"loss": 1.2676, |
|
"step": 7424 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.2928757667541504, |
|
"eval_runtime": 8.2765, |
|
"eval_samples_per_second": 60.412, |
|
"eval_steps_per_second": 7.612, |
|
"step": 7608 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 8.089959496908975e-06, |
|
"loss": 1.305, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 8.021743764655724e-06, |
|
"loss": 1.2323, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 7.953528032402474e-06, |
|
"loss": 1.2779, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 7.885312300149223e-06, |
|
"loss": 1.2661, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.2911019325256348, |
|
"eval_runtime": 8.2542, |
|
"eval_samples_per_second": 60.575, |
|
"eval_steps_per_second": 7.632, |
|
"step": 8559 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 7.817096567895973e-06, |
|
"loss": 1.2381, |
|
"step": 8704 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 7.74888083564272e-06, |
|
"loss": 1.2363, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 7.68066510338947e-06, |
|
"loss": 1.2129, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 7.612449371136219e-06, |
|
"loss": 1.232, |
|
"step": 9472 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.288682460784912, |
|
"eval_runtime": 8.2411, |
|
"eval_samples_per_second": 60.671, |
|
"eval_steps_per_second": 7.645, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 7.544233638882968e-06, |
|
"loss": 1.2191, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 7.476017906629717e-06, |
|
"loss": 1.2438, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 7.4078021743764664e-06, |
|
"loss": 1.1995, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.2871848344802856, |
|
"eval_runtime": 8.2533, |
|
"eval_samples_per_second": 60.582, |
|
"eval_steps_per_second": 7.633, |
|
"step": 10461 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 7.339586442123215e-06, |
|
"loss": 1.223, |
|
"step": 10496 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 7.2713707098699646e-06, |
|
"loss": 1.1895, |
|
"step": 10752 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 7.203154977616713e-06, |
|
"loss": 1.2133, |
|
"step": 11008 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 7.134939245363463e-06, |
|
"loss": 1.1951, |
|
"step": 11264 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.2869539260864258, |
|
"eval_runtime": 8.2328, |
|
"eval_samples_per_second": 60.733, |
|
"eval_steps_per_second": 7.652, |
|
"step": 11412 |
|
} |
|
], |
|
"max_steps": 38040, |
|
"num_train_epochs": 40, |
|
"total_flos": 1.3185127263515443e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|