res_nw_yem / trainer_state.json
nlparabic's picture
End of training
c173919 verified
{
"best_metric": 0.465503990650177,
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_yem/checkpoint-918",
"epoch": 11.0,
"eval_steps": 500,
"global_step": 1683,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 41.373477935791016,
"learning_rate": 1.53e-05,
"loss": 7.4125,
"step": 153
},
{
"epoch": 1.0,
"eval_bleu": 0.013470228801119116,
"eval_loss": 2.3318939208984375,
"eval_rouge1": 0.4042810146176442,
"eval_rouge2": 0.1765421862019638,
"eval_rougeL": 0.40385731535628305,
"eval_runtime": 1.2315,
"eval_samples_per_second": 246.862,
"eval_steps_per_second": 30.858,
"step": 153
},
{
"epoch": 2.0,
"grad_norm": 1.7725179195404053,
"learning_rate": 3.06e-05,
"loss": 1.0077,
"step": 306
},
{
"epoch": 2.0,
"eval_bleu": 0.3375849927940314,
"eval_loss": 0.5519431233406067,
"eval_rouge1": 0.47471630348624405,
"eval_rouge2": 0.18392227454023974,
"eval_rougeL": 0.4733153598700648,
"eval_runtime": 1.1576,
"eval_samples_per_second": 262.603,
"eval_steps_per_second": 32.825,
"step": 306
},
{
"epoch": 3.0,
"grad_norm": 1.682711124420166,
"learning_rate": 4.5900000000000004e-05,
"loss": 0.483,
"step": 459
},
{
"epoch": 3.0,
"eval_bleu": 0.35824220855964284,
"eval_loss": 0.4972667694091797,
"eval_rouge1": 0.5145197705108664,
"eval_rouge2": 0.2200386914249186,
"eval_rougeL": 0.5134621462419481,
"eval_runtime": 1.2102,
"eval_samples_per_second": 251.205,
"eval_steps_per_second": 31.401,
"step": 459
},
{
"epoch": 4.0,
"grad_norm": 1.890738606452942,
"learning_rate": 4.7812500000000003e-05,
"loss": 0.3795,
"step": 612
},
{
"epoch": 4.0,
"eval_bleu": 0.3717354832918087,
"eval_loss": 0.4739198088645935,
"eval_rouge1": 0.5397330658204715,
"eval_rouge2": 0.2524237751117604,
"eval_rougeL": 0.5377765269792509,
"eval_runtime": 1.2779,
"eval_samples_per_second": 237.89,
"eval_steps_per_second": 29.736,
"step": 612
},
{
"epoch": 5.0,
"grad_norm": 1.3704463243484497,
"learning_rate": 4.482421875e-05,
"loss": 0.2901,
"step": 765
},
{
"epoch": 5.0,
"eval_bleu": 0.37954739911448565,
"eval_loss": 0.46911630034446716,
"eval_rouge1": 0.57101655669895,
"eval_rouge2": 0.29268127822000256,
"eval_rougeL": 0.567665717772471,
"eval_runtime": 1.1189,
"eval_samples_per_second": 271.684,
"eval_steps_per_second": 33.96,
"step": 765
},
{
"epoch": 6.0,
"grad_norm": 2.911377429962158,
"learning_rate": 4.18359375e-05,
"loss": 0.2239,
"step": 918
},
{
"epoch": 6.0,
"eval_bleu": 0.39471976173966783,
"eval_loss": 0.465503990650177,
"eval_rouge1": 0.5909012601210781,
"eval_rouge2": 0.32323469643447167,
"eval_rougeL": 0.5882112866531675,
"eval_runtime": 5.248,
"eval_samples_per_second": 57.927,
"eval_steps_per_second": 7.241,
"step": 918
},
{
"epoch": 7.0,
"grad_norm": 1.639866590499878,
"learning_rate": 3.884765625e-05,
"loss": 0.1763,
"step": 1071
},
{
"epoch": 7.0,
"eval_bleu": 0.4011122295711346,
"eval_loss": 0.46726807951927185,
"eval_rouge1": 0.6046871760857919,
"eval_rouge2": 0.3378049073324742,
"eval_rougeL": 0.6018660127815343,
"eval_runtime": 14.6215,
"eval_samples_per_second": 20.791,
"eval_steps_per_second": 2.599,
"step": 1071
},
{
"epoch": 8.0,
"grad_norm": 1.1884260177612305,
"learning_rate": 3.5859375e-05,
"loss": 0.1429,
"step": 1224
},
{
"epoch": 8.0,
"eval_bleu": 0.4084847317717048,
"eval_loss": 0.46664053201675415,
"eval_rouge1": 0.619977821151152,
"eval_rouge2": 0.3652452050559121,
"eval_rougeL": 0.6168287532556173,
"eval_runtime": 13.3071,
"eval_samples_per_second": 22.845,
"eval_steps_per_second": 2.856,
"step": 1224
},
{
"epoch": 9.0,
"grad_norm": 1.133341908454895,
"learning_rate": 3.287109375e-05,
"loss": 0.1206,
"step": 1377
},
{
"epoch": 9.0,
"eval_bleu": 0.412982833438841,
"eval_loss": 0.47646617889404297,
"eval_rouge1": 0.6230350047425831,
"eval_rouge2": 0.37065368099082896,
"eval_rougeL": 0.6207890023241163,
"eval_runtime": 4.1195,
"eval_samples_per_second": 73.796,
"eval_steps_per_second": 9.225,
"step": 1377
},
{
"epoch": 10.0,
"grad_norm": 1.0680757761001587,
"learning_rate": 2.9882812500000002e-05,
"loss": 0.1052,
"step": 1530
},
{
"epoch": 10.0,
"eval_bleu": 0.41481558555871684,
"eval_loss": 0.47586962580680847,
"eval_rouge1": 0.6232860618818599,
"eval_rouge2": 0.3762342947707073,
"eval_rougeL": 0.6204668025380538,
"eval_runtime": 1.7191,
"eval_samples_per_second": 176.836,
"eval_steps_per_second": 22.105,
"step": 1530
},
{
"epoch": 11.0,
"grad_norm": 1.2003881931304932,
"learning_rate": 2.689453125e-05,
"loss": 0.0955,
"step": 1683
},
{
"epoch": 11.0,
"eval_bleu": 0.4179443502693027,
"eval_loss": 0.47885996103286743,
"eval_rouge1": 0.6342804965319174,
"eval_rouge2": 0.3809817493272587,
"eval_rougeL": 0.6306668222386218,
"eval_runtime": 1.9634,
"eval_samples_per_second": 154.831,
"eval_steps_per_second": 19.354,
"step": 1683
},
{
"epoch": 11.0,
"step": 1683,
"total_flos": 875916214272000.0,
"train_loss": 0.9488435417714746,
"train_runtime": 1368.0254,
"train_samples_per_second": 17.821,
"train_steps_per_second": 2.237
}
],
"logging_steps": 500,
"max_steps": 3060,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 875916214272000.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}