|
{ |
|
"best_metric": 0.6521381139755249, |
|
"best_model_checkpoint": "/hy-tmp/checkpoints/zhongjing_7-13/checkpoint-6000", |
|
"epoch": 2.271221728021198, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5.944240397524184e-05, |
|
"loss": 0.7616, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 0.7090576887130737, |
|
"eval_runtime": 1700.4285, |
|
"eval_samples_per_second": 20.932, |
|
"eval_steps_per_second": 0.328, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.7731724820150744e-05, |
|
"loss": 0.6972, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 0.6910683512687683, |
|
"eval_runtime": 1700.3181, |
|
"eval_samples_per_second": 20.933, |
|
"eval_steps_per_second": 0.328, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5.4934584214999246e-05, |
|
"loss": 0.6845, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 0.6805794835090637, |
|
"eval_runtime": 1698.9819, |
|
"eval_samples_per_second": 20.95, |
|
"eval_steps_per_second": 0.328, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.116056731749404e-05, |
|
"loss": 0.6764, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 0.6737232804298401, |
|
"eval_runtime": 1702.5596, |
|
"eval_samples_per_second": 20.906, |
|
"eval_steps_per_second": 0.327, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.6557530919724635e-05, |
|
"loss": 0.6701, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 0.6684596538543701, |
|
"eval_runtime": 1702.1448, |
|
"eval_samples_per_second": 20.911, |
|
"eval_steps_per_second": 0.327, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.131682830461389e-05, |
|
"loss": 0.663, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 0.6644229292869568, |
|
"eval_runtime": 1699.1079, |
|
"eval_samples_per_second": 20.948, |
|
"eval_steps_per_second": 0.328, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.5622841602995877e-05, |
|
"loss": 0.6594, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 0.661072313785553, |
|
"eval_runtime": 1696.6207, |
|
"eval_samples_per_second": 20.979, |
|
"eval_steps_per_second": 0.328, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.9720460586930557e-05, |
|
"loss": 0.6561, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 0.6583240032196045, |
|
"eval_runtime": 1699.0104, |
|
"eval_samples_per_second": 20.949, |
|
"eval_steps_per_second": 0.328, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.381734702787557e-05, |
|
"loss": 0.6538, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 0.6560451984405518, |
|
"eval_runtime": 1701.2915, |
|
"eval_samples_per_second": 20.921, |
|
"eval_steps_per_second": 0.327, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.815645475714211e-05, |
|
"loss": 0.6525, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 0.654275119304657, |
|
"eval_runtime": 1698.3722, |
|
"eval_samples_per_second": 20.957, |
|
"eval_steps_per_second": 0.328, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.2969355277673462e-05, |
|
"loss": 0.6488, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 0.6530821919441223, |
|
"eval_runtime": 1698.6748, |
|
"eval_samples_per_second": 20.953, |
|
"eval_steps_per_second": 0.328, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.438544816619625e-06, |
|
"loss": 0.6478, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 0.6521381139755249, |
|
"eval_runtime": 1697.953, |
|
"eval_samples_per_second": 20.962, |
|
"eval_steps_per_second": 0.328, |
|
"step": 6000 |
|
} |
|
], |
|
"max_steps": 7923, |
|
"num_train_epochs": 3, |
|
"total_flos": 9.200694994360966e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|