|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.08818342151675485, |
|
"eval_steps": 5, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003527336860670194, |
|
"grad_norm": 0.3190935552120209, |
|
"learning_rate": 1e-05, |
|
"loss": 2.057, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.003527336860670194, |
|
"eval_loss": 2.136761426925659, |
|
"eval_runtime": 26.774, |
|
"eval_samples_per_second": 8.927, |
|
"eval_steps_per_second": 1.12, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007054673721340388, |
|
"grad_norm": 0.2545243203639984, |
|
"learning_rate": 2e-05, |
|
"loss": 2.0049, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.010582010582010581, |
|
"grad_norm": 0.31179338693618774, |
|
"learning_rate": 3e-05, |
|
"loss": 2.213, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.014109347442680775, |
|
"grad_norm": 0.2565617859363556, |
|
"learning_rate": 4e-05, |
|
"loss": 1.879, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01763668430335097, |
|
"grad_norm": 0.2504461705684662, |
|
"learning_rate": 5e-05, |
|
"loss": 1.903, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01763668430335097, |
|
"eval_loss": 2.126079559326172, |
|
"eval_runtime": 25.895, |
|
"eval_samples_per_second": 9.23, |
|
"eval_steps_per_second": 1.159, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.021164021164021163, |
|
"grad_norm": 0.2987203598022461, |
|
"learning_rate": 6e-05, |
|
"loss": 1.9851, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.024691358024691357, |
|
"grad_norm": 0.28110387921333313, |
|
"learning_rate": 7e-05, |
|
"loss": 1.9352, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02821869488536155, |
|
"grad_norm": 0.3136221170425415, |
|
"learning_rate": 8e-05, |
|
"loss": 2.1815, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.031746031746031744, |
|
"grad_norm": 0.30229005217552185, |
|
"learning_rate": 9e-05, |
|
"loss": 1.8252, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03527336860670194, |
|
"grad_norm": 0.3663163483142853, |
|
"learning_rate": 0.0001, |
|
"loss": 2.1421, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03527336860670194, |
|
"eval_loss": 2.0246121883392334, |
|
"eval_runtime": 25.9348, |
|
"eval_samples_per_second": 9.215, |
|
"eval_steps_per_second": 1.157, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03880070546737213, |
|
"grad_norm": 0.3005220890045166, |
|
"learning_rate": 9.98458666866564e-05, |
|
"loss": 2.0235, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.042328042328042326, |
|
"grad_norm": 0.3517952859401703, |
|
"learning_rate": 9.938441702975689e-05, |
|
"loss": 2.0433, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04585537918871252, |
|
"grad_norm": 0.2727072536945343, |
|
"learning_rate": 9.861849601988383e-05, |
|
"loss": 1.8736, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04938271604938271, |
|
"grad_norm": 0.3203047215938568, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 1.8787, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.05291005291005291, |
|
"grad_norm": 0.24644802510738373, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 1.8361, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05291005291005291, |
|
"eval_loss": 1.955213189125061, |
|
"eval_runtime": 25.9726, |
|
"eval_samples_per_second": 9.202, |
|
"eval_steps_per_second": 1.155, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0564373897707231, |
|
"grad_norm": 0.34007856249809265, |
|
"learning_rate": 9.45503262094184e-05, |
|
"loss": 1.9749, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.059964726631393295, |
|
"grad_norm": 0.3507412075996399, |
|
"learning_rate": 9.263200821770461e-05, |
|
"loss": 2.0134, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.06349206349206349, |
|
"grad_norm": 0.3449397385120392, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 1.876, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06701940035273368, |
|
"grad_norm": 0.27750808000564575, |
|
"learning_rate": 8.802029828000156e-05, |
|
"loss": 1.9032, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.07054673721340388, |
|
"grad_norm": 0.3935078978538513, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 1.9907, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07054673721340388, |
|
"eval_loss": 1.9234156608581543, |
|
"eval_runtime": 25.958, |
|
"eval_samples_per_second": 9.207, |
|
"eval_steps_per_second": 1.156, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07407407407407407, |
|
"grad_norm": 0.3393554091453552, |
|
"learning_rate": 8.247240241650918e-05, |
|
"loss": 2.202, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.07760141093474426, |
|
"grad_norm": 0.3277629017829895, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 1.9742, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.08112874779541446, |
|
"grad_norm": 0.2576124668121338, |
|
"learning_rate": 7.612492823579745e-05, |
|
"loss": 1.6614, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.08465608465608465, |
|
"grad_norm": 0.2996937930583954, |
|
"learning_rate": 7.269952498697734e-05, |
|
"loss": 1.9815, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.08818342151675485, |
|
"grad_norm": 0.31329578161239624, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 1.9136, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08818342151675485, |
|
"eval_loss": 1.9130897521972656, |
|
"eval_runtime": 25.9489, |
|
"eval_samples_per_second": 9.21, |
|
"eval_steps_per_second": 1.156, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.852452537316147e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|