|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.010256410256410256, |
|
"eval_steps": 2, |
|
"global_step": 15, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006837606837606838, |
|
"grad_norm": 0.01459360308945179, |
|
"learning_rate": 2e-05, |
|
"loss": 10.3793, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006837606837606838, |
|
"eval_loss": 10.37917423248291, |
|
"eval_runtime": 3.5837, |
|
"eval_samples_per_second": 171.888, |
|
"eval_steps_per_second": 85.944, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0013675213675213675, |
|
"grad_norm": 0.013831038028001785, |
|
"learning_rate": 4e-05, |
|
"loss": 10.3799, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0013675213675213675, |
|
"eval_loss": 10.379171371459961, |
|
"eval_runtime": 3.5514, |
|
"eval_samples_per_second": 173.452, |
|
"eval_steps_per_second": 86.726, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0020512820512820513, |
|
"grad_norm": 0.014491274021565914, |
|
"learning_rate": 6e-05, |
|
"loss": 10.3789, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.002735042735042735, |
|
"grad_norm": 0.01442716270685196, |
|
"learning_rate": 8e-05, |
|
"loss": 10.3815, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.002735042735042735, |
|
"eval_loss": 10.379145622253418, |
|
"eval_runtime": 3.5666, |
|
"eval_samples_per_second": 172.711, |
|
"eval_steps_per_second": 86.356, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003418803418803419, |
|
"grad_norm": 0.01321357674896717, |
|
"learning_rate": 0.0001, |
|
"loss": 10.3781, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0041025641025641026, |
|
"grad_norm": 0.012160197831690311, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 10.3803, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0041025641025641026, |
|
"eval_loss": 10.379097938537598, |
|
"eval_runtime": 3.6086, |
|
"eval_samples_per_second": 170.703, |
|
"eval_steps_per_second": 85.352, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.004786324786324786, |
|
"grad_norm": 0.01298796571791172, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 10.3767, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00547008547008547, |
|
"grad_norm": 0.012850708328187466, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 10.3795, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00547008547008547, |
|
"eval_loss": 10.379048347473145, |
|
"eval_runtime": 3.5895, |
|
"eval_samples_per_second": 171.61, |
|
"eval_steps_per_second": 85.805, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.006153846153846154, |
|
"grad_norm": 0.013943369500339031, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 10.3819, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.006837606837606838, |
|
"grad_norm": 0.017278488725423813, |
|
"learning_rate": 5e-05, |
|
"loss": 10.3765, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006837606837606838, |
|
"eval_loss": 10.379008293151855, |
|
"eval_runtime": 3.566, |
|
"eval_samples_per_second": 172.744, |
|
"eval_steps_per_second": 86.372, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007521367521367521, |
|
"grad_norm": 0.01218665111809969, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 10.3796, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.008205128205128205, |
|
"grad_norm": 0.014596718363463879, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 10.3781, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.008205128205128205, |
|
"eval_loss": 10.378983497619629, |
|
"eval_runtime": 3.7412, |
|
"eval_samples_per_second": 164.654, |
|
"eval_steps_per_second": 82.327, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.008888888888888889, |
|
"grad_norm": 0.016098329797387123, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 10.3813, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.009572649572649573, |
|
"grad_norm": 0.0119888074696064, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 10.3783, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.009572649572649573, |
|
"eval_loss": 10.378974914550781, |
|
"eval_runtime": 3.5778, |
|
"eval_samples_per_second": 172.173, |
|
"eval_steps_per_second": 86.086, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.010256410256410256, |
|
"grad_norm": 0.015922540798783302, |
|
"learning_rate": 0.0, |
|
"loss": 10.3789, |
|
"step": 15 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 15, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 775854489600.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|