|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.02061218179944347, |
|
"eval_steps": 10, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00041224363598886944, |
|
"eval_loss": 2.2431154251098633, |
|
"eval_runtime": 80.7452, |
|
"eval_samples_per_second": 12.657, |
|
"eval_steps_per_second": 6.329, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0020612181799443473, |
|
"grad_norm": 2.1363298892974854, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6744, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0041224363598886945, |
|
"grad_norm": 2.858330488204956, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7039, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0041224363598886945, |
|
"eval_loss": 1.7575262784957886, |
|
"eval_runtime": 85.318, |
|
"eval_samples_per_second": 11.979, |
|
"eval_steps_per_second": 5.989, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006183654539833041, |
|
"grad_norm": 3.944808006286621, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 1.2071, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.008244872719777389, |
|
"grad_norm": 1.3849436044692993, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 1.157, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.008244872719777389, |
|
"eval_loss": 1.1688629388809204, |
|
"eval_runtime": 85.7517, |
|
"eval_samples_per_second": 11.918, |
|
"eval_steps_per_second": 5.959, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.010306090899721735, |
|
"grad_norm": 2.004284143447876, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 1.1034, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.012367309079666083, |
|
"grad_norm": 1.3697354793548584, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0457, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.012367309079666083, |
|
"eval_loss": 1.1273069381713867, |
|
"eval_runtime": 85.6446, |
|
"eval_samples_per_second": 11.933, |
|
"eval_steps_per_second": 5.967, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01442852725961043, |
|
"grad_norm": 2.059553623199463, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 1.0748, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.016489745439554778, |
|
"grad_norm": 1.5975080728530884, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 0.9761, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.016489745439554778, |
|
"eval_loss": 1.1080728769302368, |
|
"eval_runtime": 85.784, |
|
"eval_samples_per_second": 11.914, |
|
"eval_steps_per_second": 5.957, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.018550963619499122, |
|
"grad_norm": 1.6504549980163574, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 1.1147, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02061218179944347, |
|
"grad_norm": 1.9928545951843262, |
|
"learning_rate": 0.0, |
|
"loss": 0.9259, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02061218179944347, |
|
"eval_loss": 1.1036310195922852, |
|
"eval_runtime": 85.8891, |
|
"eval_samples_per_second": 11.899, |
|
"eval_steps_per_second": 5.95, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9432777665544192.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|