Bert_v11 / trainer_state.json
JhonMR's picture
End of training
594d485 verified
raw
history blame
8.75 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 8.0,
"eval_steps": 500,
"global_step": 2208,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"step": 276,
"train_eval_accuracy": 0.7003401360544218,
"train_eval_f1": 0.6599009769808177,
"train_eval_loss": 1.0742188692092896,
"train_eval_precision": 0.7685367987097715,
"train_eval_recall": 0.7009459009381873,
"train_loss": 1.074218988418579,
"train_runtime": 292.4292,
"train_samples_per_second": 30.161,
"train_steps_per_second": 0.944
},
{
"epoch": 1.0,
"eval_accuracy": 0.6865079365079365,
"eval_f1": 0.6448401353068663,
"eval_loss": 1.1159266233444214,
"eval_precision": 0.7389370546469851,
"eval_recall": 0.685088186510769,
"eval_runtime": 125.2302,
"eval_samples_per_second": 30.184,
"eval_steps_per_second": 0.95,
"step": 276
},
{
"epoch": 2.0,
"step": 552,
"train_eval_accuracy": 0.9049886621315193,
"train_eval_f1": 0.9047389370162652,
"train_eval_loss": 0.3874468207359314,
"train_eval_precision": 0.9101320070777226,
"train_eval_recall": 0.9053131019265812,
"train_loss": 0.3874468505382538,
"train_runtime": 292.6477,
"train_samples_per_second": 30.139,
"train_steps_per_second": 0.943
},
{
"epoch": 2.0,
"eval_accuracy": 0.873015873015873,
"eval_f1": 0.8716046163578155,
"eval_loss": 0.4920203685760498,
"eval_precision": 0.8781278884267814,
"eval_recall": 0.8725450168508944,
"eval_runtime": 125.7044,
"eval_samples_per_second": 30.071,
"eval_steps_per_second": 0.947,
"step": 552
},
{
"epoch": 3.0,
"step": 828,
"train_eval_accuracy": 0.9286848072562358,
"train_eval_f1": 0.9285171455529603,
"train_eval_loss": 0.27065399289131165,
"train_eval_precision": 0.9322649415650033,
"train_eval_recall": 0.9289123109383434,
"train_loss": 0.27065402269363403,
"train_runtime": 292.5422,
"train_samples_per_second": 30.149,
"train_steps_per_second": 0.943
},
{
"epoch": 3.0,
"eval_accuracy": 0.8973544973544973,
"eval_f1": 0.8961220197070991,
"eval_loss": 0.42477917671203613,
"eval_precision": 0.9011567773926408,
"eval_recall": 0.8971253090647534,
"eval_runtime": 125.1591,
"eval_samples_per_second": 30.202,
"eval_steps_per_second": 0.951,
"step": 828
},
{
"epoch": 4.0,
"step": 1104,
"train_eval_accuracy": 0.9471655328798186,
"train_eval_f1": 0.947517595979444,
"train_eval_loss": 0.2088230848312378,
"train_eval_precision": 0.9488916851742717,
"train_eval_recall": 0.9477037054061244,
"train_loss": 0.2088230848312378,
"train_runtime": 292.1312,
"train_samples_per_second": 30.192,
"train_steps_per_second": 0.945
},
{
"epoch": 4.0,
"eval_accuracy": 0.9010582010582011,
"eval_f1": 0.8999224906052751,
"eval_loss": 0.41053256392478943,
"eval_precision": 0.9025635496532717,
"eval_recall": 0.9002552887231217,
"eval_runtime": 125.1369,
"eval_samples_per_second": 30.207,
"eval_steps_per_second": 0.951,
"step": 1104
},
{
"epoch": 5.0,
"step": 1380,
"train_eval_accuracy": 0.9515873015873015,
"train_eval_f1": 0.9516673939530801,
"train_eval_loss": 0.17656771838665009,
"train_eval_precision": 0.9539169437133798,
"train_eval_recall": 0.9520537526614696,
"train_loss": 0.1765676885843277,
"train_runtime": 292.8194,
"train_samples_per_second": 30.121,
"train_steps_per_second": 0.943
},
{
"epoch": 5.0,
"eval_accuracy": 0.9063492063492063,
"eval_f1": 0.9046138481463505,
"eval_loss": 0.41682690382003784,
"eval_precision": 0.9088264652953383,
"eval_recall": 0.9051041885149284,
"eval_runtime": 125.6464,
"eval_samples_per_second": 30.084,
"eval_steps_per_second": 0.947,
"step": 1380
},
{
"epoch": 6.0,
"step": 1656,
"train_eval_accuracy": 0.9654195011337868,
"train_eval_f1": 0.9656476663607069,
"train_eval_loss": 0.12671761214733124,
"train_eval_precision": 0.9666620904833755,
"train_eval_recall": 0.9657485060432522,
"train_loss": 0.12671762704849243,
"train_runtime": 292.8539,
"train_samples_per_second": 30.117,
"train_steps_per_second": 0.942
},
{
"epoch": 6.0,
"eval_accuracy": 0.9084656084656084,
"eval_f1": 0.9069212907450369,
"eval_loss": 0.41428086161613464,
"eval_precision": 0.909310676401177,
"eval_recall": 0.9074800568136571,
"eval_runtime": 125.6663,
"eval_samples_per_second": 30.08,
"eval_steps_per_second": 0.947,
"step": 1656
},
{
"epoch": 7.0,
"step": 1932,
"train_eval_accuracy": 0.9712018140589569,
"train_eval_f1": 0.9713117879463059,
"train_eval_loss": 0.10472333431243896,
"train_eval_precision": 0.9726857205787973,
"train_eval_recall": 0.9716513762432712,
"train_loss": 0.10472334921360016,
"train_runtime": 293.0912,
"train_samples_per_second": 30.093,
"train_steps_per_second": 0.942
},
{
"epoch": 7.0,
"eval_accuracy": 0.9058201058201059,
"eval_f1": 0.9040971215004513,
"eval_loss": 0.4402031898498535,
"eval_precision": 0.9063334381728124,
"eval_recall": 0.9047346717153943,
"eval_runtime": 125.6961,
"eval_samples_per_second": 30.073,
"eval_steps_per_second": 0.947,
"step": 1932
},
{
"epoch": 8.0,
"step": 2208,
"train_eval_accuracy": 0.977437641723356,
"train_eval_f1": 0.9777628423711627,
"train_eval_loss": 0.08053447306156158,
"train_eval_precision": 0.9778969889316966,
"train_eval_recall": 0.9778987153746018,
"train_loss": 0.08053448051214218,
"train_runtime": 293.5192,
"train_samples_per_second": 30.049,
"train_steps_per_second": 0.94
},
{
"epoch": 8.0,
"eval_accuracy": 0.9031746031746032,
"eval_f1": 0.9017850721426728,
"eval_loss": 0.45825621485710144,
"eval_precision": 0.9036277073612298,
"eval_recall": 0.902066631184587,
"eval_runtime": 125.7628,
"eval_samples_per_second": 30.057,
"eval_steps_per_second": 0.946,
"step": 2208
},
{
"epoch": 8.0,
"step": 2208,
"total_flos": 1.85717836136448e+16,
"train_loss": 0.5860137939453125,
"train_runtime": 10564.8969,
"train_samples_per_second": 12.523,
"train_steps_per_second": 0.392
},
{
"epoch": 8.0,
"eval_accuracy": 0.9031746031746032,
"eval_f1": 0.9017850721426728,
"eval_loss": 0.45825621485710144,
"eval_precision": 0.9036277073612298,
"eval_recall": 0.902066631184587,
"eval_runtime": 125.8523,
"eval_samples_per_second": 30.035,
"eval_steps_per_second": 0.946,
"step": 2208
},
{
"epoch": 8.0,
"step": 2208,
"train_en_eval_accuracy": 0.977437641723356,
"train_en_eval_f1": 0.9777628423711627,
"train_en_eval_loss": 0.08053447306156158,
"train_en_eval_precision": 0.9778969889316966,
"train_en_eval_recall": 0.9778987153746018,
"train_en_loss": 0.08053448051214218,
"train_en_runtime": 292.6514,
"train_en_samples_per_second": 30.138,
"train_en_steps_per_second": 0.943
},
{
"epoch": 8.0,
"step": 2208,
"test_en_eval_accuracy": 0.9031746031746032,
"test_en_eval_f1": 0.9017850721426728,
"test_en_eval_loss": 0.45825621485710144,
"test_en_eval_precision": 0.9036277073612298,
"test_en_eval_recall": 0.902066631184587,
"test_en_loss": 0.45825621485710144,
"test_en_runtime": 125.7293,
"test_en_samples_per_second": 30.065,
"test_en_steps_per_second": 0.946
}
],
"logging_steps": 500,
"max_steps": 4140,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.85717836136448e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}