|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 38.72884283246977, |
|
"eval_steps": 500, |
|
"global_step": 7000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 1.9800000000000004e-05, |
|
"loss": 8.0967, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 3.619444444444445e-05, |
|
"loss": 2.6161, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"eval_bleu": 13.5109, |
|
"eval_gen_len": 19.1966, |
|
"eval_loss": 3.176215171813965, |
|
"eval_runtime": 298.8186, |
|
"eval_samples_per_second": 9.685, |
|
"eval_steps_per_second": 1.211, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"eval_bleu": 16.2868, |
|
"eval_gen_len": 18.7985, |
|
"eval_loss": 3.037487268447876, |
|
"eval_runtime": 263.6174, |
|
"eval_samples_per_second": 10.978, |
|
"eval_steps_per_second": 1.373, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"learning_rate": 2.9270833333333338e-05, |
|
"loss": 1.4467, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"eval_bleu": 17.6991, |
|
"eval_gen_len": 18.1949, |
|
"eval_loss": 3.132826328277588, |
|
"eval_runtime": 253.8432, |
|
"eval_samples_per_second": 11.401, |
|
"eval_steps_per_second": 1.426, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 19.39, |
|
"eval_bleu": 17.9052, |
|
"eval_gen_len": 18.3117, |
|
"eval_loss": 3.2690258026123047, |
|
"eval_runtime": 250.4816, |
|
"eval_samples_per_second": 11.554, |
|
"eval_steps_per_second": 1.445, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 22.15, |
|
"learning_rate": 2.232638888888889e-05, |
|
"loss": 0.6809, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 22.15, |
|
"eval_bleu": 18.4075, |
|
"eval_gen_len": 18.2149, |
|
"eval_loss": 3.3850443363189697, |
|
"eval_runtime": 248.7531, |
|
"eval_samples_per_second": 11.634, |
|
"eval_steps_per_second": 1.455, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 24.91, |
|
"eval_bleu": 19.0339, |
|
"eval_gen_len": 18.009, |
|
"eval_loss": 3.446467876434326, |
|
"eval_runtime": 246.4301, |
|
"eval_samples_per_second": 11.744, |
|
"eval_steps_per_second": 1.469, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 27.68, |
|
"learning_rate": 1.5381944444444445e-05, |
|
"loss": 0.3422, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 27.68, |
|
"eval_bleu": 18.7281, |
|
"eval_gen_len": 17.5902, |
|
"eval_loss": 3.5680091381073, |
|
"eval_runtime": 242.436, |
|
"eval_samples_per_second": 11.937, |
|
"eval_steps_per_second": 1.493, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 30.44, |
|
"eval_bleu": 19.1534, |
|
"eval_gen_len": 18.2177, |
|
"eval_loss": 3.634960651397705, |
|
"eval_runtime": 247.9202, |
|
"eval_samples_per_second": 11.673, |
|
"eval_steps_per_second": 1.46, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 33.2, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 0.1941, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 33.2, |
|
"eval_bleu": 19.2575, |
|
"eval_gen_len": 17.8784, |
|
"eval_loss": 3.7152717113494873, |
|
"eval_runtime": 241.6987, |
|
"eval_samples_per_second": 11.974, |
|
"eval_steps_per_second": 1.498, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 35.97, |
|
"eval_bleu": 19.2475, |
|
"eval_gen_len": 17.9831, |
|
"eval_loss": 3.738178253173828, |
|
"eval_runtime": 245.2172, |
|
"eval_samples_per_second": 11.802, |
|
"eval_steps_per_second": 1.476, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 38.73, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.1271, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 38.73, |
|
"eval_bleu": 19.3045, |
|
"eval_gen_len": 17.9889, |
|
"eval_loss": 3.757262706756592, |
|
"eval_runtime": 245.3062, |
|
"eval_samples_per_second": 11.798, |
|
"eval_steps_per_second": 1.476, |
|
"step": 7000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 7200, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"total_flos": 1.941748272070656e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|