|
{ |
|
"best_metric": 2.0136280059814453, |
|
"best_model_checkpoint": "cdetr-cd45rb-s/checkpoint-2420", |
|
"epoch": 10.0, |
|
"global_step": 2420, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6006, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.214585542678833, |
|
"eval_runtime": 17.9919, |
|
"eval_samples_per_second": 5.558, |
|
"eval_steps_per_second": 0.723, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.2409, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.110588788986206, |
|
"eval_runtime": 17.3999, |
|
"eval_samples_per_second": 5.747, |
|
"eval_steps_per_second": 0.747, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4e-05, |
|
"loss": 2.2027, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.145615339279175, |
|
"eval_runtime": 17.7242, |
|
"eval_samples_per_second": 5.642, |
|
"eval_steps_per_second": 0.733, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.183, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.0863144397735596, |
|
"eval_runtime": 17.5156, |
|
"eval_samples_per_second": 5.709, |
|
"eval_steps_per_second": 0.742, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.0628, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.0293631553649902, |
|
"eval_runtime": 17.5041, |
|
"eval_samples_per_second": 5.713, |
|
"eval_steps_per_second": 0.743, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4e-05, |
|
"loss": 2.1588, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.0859251022338867, |
|
"eval_runtime": 17.1118, |
|
"eval_samples_per_second": 5.844, |
|
"eval_steps_per_second": 0.76, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3e-05, |
|
"loss": 2.1731, |
|
"step": 1694 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 2.073065757751465, |
|
"eval_runtime": 16.6383, |
|
"eval_samples_per_second": 6.01, |
|
"eval_steps_per_second": 0.781, |
|
"step": 1694 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.1041, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 2.1051650047302246, |
|
"eval_runtime": 16.6295, |
|
"eval_samples_per_second": 6.013, |
|
"eval_steps_per_second": 0.782, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 1e-05, |
|
"loss": 2.0383, |
|
"step": 2178 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 2.037771463394165, |
|
"eval_runtime": 17.1339, |
|
"eval_samples_per_second": 5.836, |
|
"eval_steps_per_second": 0.759, |
|
"step": 2178 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.967, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 2.0136280059814453, |
|
"eval_runtime": 16.8066, |
|
"eval_samples_per_second": 5.95, |
|
"eval_steps_per_second": 0.774, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2420, |
|
"total_flos": 4.8214727269632e+18, |
|
"train_loss": 1.0441367535551718, |
|
"train_runtime": 1297.7653, |
|
"train_samples_per_second": 7.444, |
|
"train_steps_per_second": 1.865 |
|
} |
|
], |
|
"max_steps": 2420, |
|
"num_train_epochs": 10, |
|
"total_flos": 4.8214727269632e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|