|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.9697275479313827, |
|
"eval_steps": 500, |
|
"global_step": 3844, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.974811083123427e-05, |
|
"loss": 1.0444, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019949622166246853, |
|
"loss": 1.002, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00018895430333613683, |
|
"loss": 0.9986, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00017785253714606113, |
|
"loss": 0.9637, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00016675077095598543, |
|
"loss": 0.9805, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00015564900476590973, |
|
"loss": 0.9943, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00014454723857583403, |
|
"loss": 0.9564, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013344547238575835, |
|
"loss": 0.9335, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00012234370619568265, |
|
"loss": 0.9133, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00011124194000560695, |
|
"loss": 0.9323, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00010014017381553126, |
|
"loss": 0.9367, |
|
"step": 2178 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 8.903840762545556e-05, |
|
"loss": 0.9114, |
|
"step": 2376 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 7.793664143537988e-05, |
|
"loss": 0.8827, |
|
"step": 2574 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 6.683487524530419e-05, |
|
"loss": 0.8703, |
|
"step": 2772 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 5.573310905522848e-05, |
|
"loss": 0.9036, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 4.4631342865152794e-05, |
|
"loss": 0.9028, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 3.352957667507709e-05, |
|
"loss": 0.8646, |
|
"step": 3366 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 2.2427810485001402e-05, |
|
"loss": 0.8536, |
|
"step": 3564 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.1382113821138211e-05, |
|
"loss": 0.849, |
|
"step": 3762 |
|
} |
|
], |
|
"logging_steps": 198, |
|
"max_steps": 3964, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 2.6878066792228454e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|