|
{ |
|
"best_metric": 0.8619909882545471, |
|
"best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_mnli/checkpoint-27612", |
|
"epoch": 14.0, |
|
"global_step": 42952, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.9907, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5485481406011208, |
|
"eval_loss": 0.9407809376716614, |
|
"eval_runtime": 15.7503, |
|
"eval_samples_per_second": 623.163, |
|
"eval_steps_per_second": 4.889, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8000325945241203e-05, |
|
"loss": 0.9094, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5818644931227712, |
|
"eval_loss": 0.9065373539924622, |
|
"eval_runtime": 15.8749, |
|
"eval_samples_per_second": 618.272, |
|
"eval_steps_per_second": 4.85, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.70006518904824e-05, |
|
"loss": 0.8828, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.587366276107998, |
|
"eval_loss": 0.8968958854675293, |
|
"eval_runtime": 15.7921, |
|
"eval_samples_per_second": 621.515, |
|
"eval_steps_per_second": 4.876, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.60013037809648e-05, |
|
"loss": 0.8627, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5967396841569027, |
|
"eval_loss": 0.882131040096283, |
|
"eval_runtime": 15.8782, |
|
"eval_samples_per_second": 618.145, |
|
"eval_steps_per_second": 4.849, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5001629726206e-05, |
|
"loss": 0.8429, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6003056546102904, |
|
"eval_loss": 0.8742825984954834, |
|
"eval_runtime": 15.8321, |
|
"eval_samples_per_second": 619.944, |
|
"eval_steps_per_second": 4.864, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.40022816166884e-05, |
|
"loss": 0.8207, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6077432501273561, |
|
"eval_loss": 0.8663310408592224, |
|
"eval_runtime": 15.7218, |
|
"eval_samples_per_second": 624.293, |
|
"eval_steps_per_second": 4.898, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.30026075619296e-05, |
|
"loss": 0.7989, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6099847172694854, |
|
"eval_loss": 0.8664844632148743, |
|
"eval_runtime": 15.7937, |
|
"eval_samples_per_second": 621.449, |
|
"eval_steps_per_second": 4.875, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2003259452412e-05, |
|
"loss": 0.7789, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6095771777890984, |
|
"eval_loss": 0.8751330971717834, |
|
"eval_runtime": 15.8764, |
|
"eval_samples_per_second": 618.214, |
|
"eval_steps_per_second": 4.85, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.100358539765319e-05, |
|
"loss": 0.7603, |
|
"step": 27612 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6138563423331636, |
|
"eval_loss": 0.8619909882545471, |
|
"eval_runtime": 15.8877, |
|
"eval_samples_per_second": 617.774, |
|
"eval_steps_per_second": 4.847, |
|
"step": 27612 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.000423728813559e-05, |
|
"loss": 0.7425, |
|
"step": 30680 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6094752929190015, |
|
"eval_loss": 0.8813133835792542, |
|
"eval_runtime": 15.7795, |
|
"eval_samples_per_second": 622.011, |
|
"eval_steps_per_second": 4.88, |
|
"step": 30680 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9004563233376796e-05, |
|
"loss": 0.7238, |
|
"step": 33748 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6141619969434539, |
|
"eval_loss": 0.8913043737411499, |
|
"eval_runtime": 15.8959, |
|
"eval_samples_per_second": 617.456, |
|
"eval_steps_per_second": 4.844, |
|
"step": 33748 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.8005215123859196e-05, |
|
"loss": 0.7063, |
|
"step": 36816 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6056036678553235, |
|
"eval_loss": 0.9025644063949585, |
|
"eval_runtime": 15.8221, |
|
"eval_samples_per_second": 620.334, |
|
"eval_steps_per_second": 4.867, |
|
"step": 36816 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.7005541069100394e-05, |
|
"loss": 0.6891, |
|
"step": 39884 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5975547631176771, |
|
"eval_loss": 0.9266977310180664, |
|
"eval_runtime": 15.8778, |
|
"eval_samples_per_second": 618.16, |
|
"eval_steps_per_second": 4.85, |
|
"step": 39884 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.600586701434159e-05, |
|
"loss": 0.6721, |
|
"step": 42952 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6104941416199694, |
|
"eval_loss": 0.9071937203407288, |
|
"eval_runtime": 15.8196, |
|
"eval_samples_per_second": 620.435, |
|
"eval_steps_per_second": 4.867, |
|
"step": 42952 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"step": 42952, |
|
"total_flos": 1.7238466281812787e+17, |
|
"train_loss": 0.7986461733901361, |
|
"train_runtime": 29517.4796, |
|
"train_samples_per_second": 665.202, |
|
"train_steps_per_second": 5.197 |
|
} |
|
], |
|
"max_steps": 153400, |
|
"num_train_epochs": 50, |
|
"total_flos": 1.7238466281812787e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|