|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7545638945233266, |
|
"eval_steps": 31, |
|
"global_step": 93, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008113590263691683, |
|
"eval_loss": 5.708795547485352, |
|
"eval_runtime": 34.9917, |
|
"eval_samples_per_second": 5.944, |
|
"eval_steps_per_second": 0.743, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02434077079107505, |
|
"grad_norm": 27.649152755737305, |
|
"learning_rate": 3e-05, |
|
"loss": 22.4374, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0486815415821501, |
|
"grad_norm": 24.12411880493164, |
|
"learning_rate": 6e-05, |
|
"loss": 20.3477, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.07302231237322515, |
|
"grad_norm": 22.957651138305664, |
|
"learning_rate": 9e-05, |
|
"loss": 13.9237, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0973630831643002, |
|
"grad_norm": 22.14281463623047, |
|
"learning_rate": 9.999238475781957e-05, |
|
"loss": 8.2192, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.12170385395537525, |
|
"grad_norm": 15.811911582946777, |
|
"learning_rate": 9.99524110790929e-05, |
|
"loss": 5.4619, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1460446247464503, |
|
"grad_norm": 17.17688751220703, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 4.1998, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.17038539553752535, |
|
"grad_norm": 9.06302547454834, |
|
"learning_rate": 9.976980991835894e-05, |
|
"loss": 3.5102, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.1947261663286004, |
|
"grad_norm": 9.071863174438477, |
|
"learning_rate": 9.962730758206611e-05, |
|
"loss": 2.9501, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.21906693711967545, |
|
"grad_norm": 10.715943336486816, |
|
"learning_rate": 9.945079316809585e-05, |
|
"loss": 2.8262, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.2434077079107505, |
|
"grad_norm": 6.811004161834717, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 2.6217, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2515212981744422, |
|
"eval_loss": 0.6682190299034119, |
|
"eval_runtime": 35.2448, |
|
"eval_samples_per_second": 5.902, |
|
"eval_steps_per_second": 0.738, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.26774847870182555, |
|
"grad_norm": 5.427604675292969, |
|
"learning_rate": 9.899623523104149e-05, |
|
"loss": 2.4006, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.2920892494929006, |
|
"grad_norm": 7.537416458129883, |
|
"learning_rate": 9.871850323926177e-05, |
|
"loss": 2.7397, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.31643002028397565, |
|
"grad_norm": 8.674805641174316, |
|
"learning_rate": 9.84073820189054e-05, |
|
"loss": 2.4828, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.3407707910750507, |
|
"grad_norm": 8.167730331420898, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 2.5302, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.36511156186612576, |
|
"grad_norm": 10.937932014465332, |
|
"learning_rate": 9.768584753741134e-05, |
|
"loss": 2.3383, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.3894523326572008, |
|
"grad_norm": 3.8246636390686035, |
|
"learning_rate": 9.727592877996585e-05, |
|
"loss": 2.212, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.41379310344827586, |
|
"grad_norm": 4.667800426483154, |
|
"learning_rate": 9.683360946241989e-05, |
|
"loss": 2.3763, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.4381338742393509, |
|
"grad_norm": 4.990477561950684, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 2.3995, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.46247464503042596, |
|
"grad_norm": 9.09072494506836, |
|
"learning_rate": 9.58530037192562e-05, |
|
"loss": 2.545, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.486815415821501, |
|
"grad_norm": 5.338695049285889, |
|
"learning_rate": 9.53153893518325e-05, |
|
"loss": 2.3183, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5030425963488844, |
|
"eval_loss": 0.5645254850387573, |
|
"eval_runtime": 13.4155, |
|
"eval_samples_per_second": 15.504, |
|
"eval_steps_per_second": 1.938, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.5111561866125761, |
|
"grad_norm": 3.9001667499542236, |
|
"learning_rate": 9.474671808010126e-05, |
|
"loss": 2.3843, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.5354969574036511, |
|
"grad_norm": 6.379414081573486, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 2.2963, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.5598377281947262, |
|
"grad_norm": 4.97390079498291, |
|
"learning_rate": 9.351778479699499e-05, |
|
"loss": 2.2517, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.5841784989858012, |
|
"grad_norm": 4.063937187194824, |
|
"learning_rate": 9.285836503510562e-05, |
|
"loss": 2.2437, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.6085192697768763, |
|
"grad_norm": 3.602717161178589, |
|
"learning_rate": 9.21695722906443e-05, |
|
"loss": 2.1695, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6328600405679513, |
|
"grad_norm": 4.555128574371338, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 2.1648, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.6572008113590264, |
|
"grad_norm": 5.0544867515563965, |
|
"learning_rate": 9.070577591781597e-05, |
|
"loss": 2.0922, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.6815415821501014, |
|
"grad_norm": 4.92878532409668, |
|
"learning_rate": 8.993177550236464e-05, |
|
"loss": 2.141, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.7058823529411765, |
|
"grad_norm": 3.6681647300720215, |
|
"learning_rate": 8.91304078426207e-05, |
|
"loss": 2.1638, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.7302231237322515, |
|
"grad_norm": 4.451002597808838, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 2.1766, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.7545638945233266, |
|
"grad_norm": 5.902685165405273, |
|
"learning_rate": 8.744778603945011e-05, |
|
"loss": 2.1005, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.7545638945233266, |
|
"eval_loss": 0.5568718314170837, |
|
"eval_runtime": 13.4611, |
|
"eval_samples_per_second": 15.452, |
|
"eval_steps_per_second": 1.931, |
|
"step": 93 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 370, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 31, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2373610700629606e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|