|
{ |
|
"best_metric": 0.7490401864051819, |
|
"best_model_checkpoint": "./output/clip-finetuned-csu-p14-336-e4l57-l/checkpoint-4000", |
|
"epoch": 0.5065856129685917, |
|
"eval_steps": 500, |
|
"global_step": 4500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.056287290329843524, |
|
"grad_norm": 150.1642303466797, |
|
"learning_rate": 4.929640887087696e-07, |
|
"loss": 0.3486, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.056287290329843524, |
|
"eval_loss": 1.1265727281570435, |
|
"eval_runtime": 123.7806, |
|
"eval_samples_per_second": 15.948, |
|
"eval_steps_per_second": 1.995, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11257458065968705, |
|
"grad_norm": 366.6913146972656, |
|
"learning_rate": 4.859281774175391e-07, |
|
"loss": 0.2733, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11257458065968705, |
|
"eval_loss": 0.9742079377174377, |
|
"eval_runtime": 125.8047, |
|
"eval_samples_per_second": 15.691, |
|
"eval_steps_per_second": 1.963, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16886187098953057, |
|
"grad_norm": 555.5016479492188, |
|
"learning_rate": 4.788922661263087e-07, |
|
"loss": 0.1851, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16886187098953057, |
|
"eval_loss": 0.9162159562110901, |
|
"eval_runtime": 126.7606, |
|
"eval_samples_per_second": 15.573, |
|
"eval_steps_per_second": 1.949, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2251491613193741, |
|
"grad_norm": 0.423260897397995, |
|
"learning_rate": 4.7185635483507824e-07, |
|
"loss": 0.1973, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2251491613193741, |
|
"eval_loss": 0.8716973662376404, |
|
"eval_runtime": 127.6955, |
|
"eval_samples_per_second": 15.459, |
|
"eval_steps_per_second": 1.934, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2814364516492176, |
|
"grad_norm": 6.840139389038086, |
|
"learning_rate": 4.6482044354384774e-07, |
|
"loss": 0.1881, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2814364516492176, |
|
"eval_loss": 0.830047070980072, |
|
"eval_runtime": 128.0771, |
|
"eval_samples_per_second": 15.413, |
|
"eval_steps_per_second": 1.929, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.33772374197906113, |
|
"grad_norm": 729.8377685546875, |
|
"learning_rate": 4.577845322526173e-07, |
|
"loss": 0.1695, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.33772374197906113, |
|
"eval_loss": 0.8265627026557922, |
|
"eval_runtime": 128.052, |
|
"eval_samples_per_second": 15.416, |
|
"eval_steps_per_second": 1.929, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.39401103230890466, |
|
"grad_norm": 7.96999938756926e-06, |
|
"learning_rate": 4.507486209613869e-07, |
|
"loss": 0.155, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.39401103230890466, |
|
"eval_loss": 0.7721038460731506, |
|
"eval_runtime": 128.1871, |
|
"eval_samples_per_second": 15.399, |
|
"eval_steps_per_second": 1.927, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.4502983226387482, |
|
"grad_norm": 0.006555848754942417, |
|
"learning_rate": 4.4371270967015645e-07, |
|
"loss": 0.1216, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4502983226387482, |
|
"eval_loss": 0.7490401864051819, |
|
"eval_runtime": 127.5516, |
|
"eval_samples_per_second": 15.476, |
|
"eval_steps_per_second": 1.936, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5065856129685917, |
|
"grad_norm": 0.0004515685432124883, |
|
"learning_rate": 4.36676798378926e-07, |
|
"loss": 0.1531, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.5065856129685917, |
|
"eval_loss": 0.7533182501792908, |
|
"eval_runtime": 127.4797, |
|
"eval_samples_per_second": 15.485, |
|
"eval_steps_per_second": 1.938, |
|
"step": 4500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 35532, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1619632097372520.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|