|
{ |
|
"best_metric": 8.260101318359375, |
|
"best_model_checkpoint": "/mmfs1/gscratch/stf/abhinavp/corpus-filtering/outputs/2023-11-20/16-19-50/checkpoints/checkpoint-70", |
|
"epoch": 0.56, |
|
"eval_steps": 10, |
|
"global_step": 70, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 10.8117, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 10.801591873168945, |
|
"eval_runtime": 2.6865, |
|
"eval_samples_per_second": 372.226, |
|
"eval_steps_per_second": 46.528, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.2e-05, |
|
"loss": 10.7909, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 10.775726318359375, |
|
"eval_runtime": 2.779, |
|
"eval_samples_per_second": 359.838, |
|
"eval_steps_per_second": 44.98, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.8e-05, |
|
"loss": 10.7524, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 10.704446792602539, |
|
"eval_runtime": 2.7821, |
|
"eval_samples_per_second": 359.435, |
|
"eval_steps_per_second": 44.929, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 10.5629, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 10.172320365905762, |
|
"eval_runtime": 2.7542, |
|
"eval_samples_per_second": 363.084, |
|
"eval_steps_per_second": 45.385, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3e-05, |
|
"loss": 9.7608, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 9.128562927246094, |
|
"eval_runtime": 2.7812, |
|
"eval_samples_per_second": 359.553, |
|
"eval_steps_per_second": 44.944, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 8.9735, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 8.557924270629883, |
|
"eval_runtime": 2.6727, |
|
"eval_samples_per_second": 374.147, |
|
"eval_steps_per_second": 46.768, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 8.5033, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 8.260101318359375, |
|
"eval_runtime": 2.7328, |
|
"eval_samples_per_second": 365.926, |
|
"eval_steps_per_second": 45.741, |
|
"step": 70 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 125, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"total_flos": 4923721736640.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|