|
{ |
|
"best_metric": 0.465503990650177, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_yem/checkpoint-918", |
|
"epoch": 11.0, |
|
"eval_steps": 500, |
|
"global_step": 1683, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 41.373477935791016, |
|
"learning_rate": 1.53e-05, |
|
"loss": 7.4125, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.013470228801119116, |
|
"eval_loss": 2.3318939208984375, |
|
"eval_rouge1": 0.4042810146176442, |
|
"eval_rouge2": 0.1765421862019638, |
|
"eval_rougeL": 0.40385731535628305, |
|
"eval_runtime": 1.2315, |
|
"eval_samples_per_second": 246.862, |
|
"eval_steps_per_second": 30.858, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.7725179195404053, |
|
"learning_rate": 3.06e-05, |
|
"loss": 1.0077, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.3375849927940314, |
|
"eval_loss": 0.5519431233406067, |
|
"eval_rouge1": 0.47471630348624405, |
|
"eval_rouge2": 0.18392227454023974, |
|
"eval_rougeL": 0.4733153598700648, |
|
"eval_runtime": 1.1576, |
|
"eval_samples_per_second": 262.603, |
|
"eval_steps_per_second": 32.825, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.682711124420166, |
|
"learning_rate": 4.5900000000000004e-05, |
|
"loss": 0.483, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.35824220855964284, |
|
"eval_loss": 0.4972667694091797, |
|
"eval_rouge1": 0.5145197705108664, |
|
"eval_rouge2": 0.2200386914249186, |
|
"eval_rougeL": 0.5134621462419481, |
|
"eval_runtime": 1.2102, |
|
"eval_samples_per_second": 251.205, |
|
"eval_steps_per_second": 31.401, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.890738606452942, |
|
"learning_rate": 4.7812500000000003e-05, |
|
"loss": 0.3795, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.3717354832918087, |
|
"eval_loss": 0.4739198088645935, |
|
"eval_rouge1": 0.5397330658204715, |
|
"eval_rouge2": 0.2524237751117604, |
|
"eval_rougeL": 0.5377765269792509, |
|
"eval_runtime": 1.2779, |
|
"eval_samples_per_second": 237.89, |
|
"eval_steps_per_second": 29.736, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.3704463243484497, |
|
"learning_rate": 4.482421875e-05, |
|
"loss": 0.2901, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.37954739911448565, |
|
"eval_loss": 0.46911630034446716, |
|
"eval_rouge1": 0.57101655669895, |
|
"eval_rouge2": 0.29268127822000256, |
|
"eval_rougeL": 0.567665717772471, |
|
"eval_runtime": 1.1189, |
|
"eval_samples_per_second": 271.684, |
|
"eval_steps_per_second": 33.96, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.911377429962158, |
|
"learning_rate": 4.18359375e-05, |
|
"loss": 0.2239, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.39471976173966783, |
|
"eval_loss": 0.465503990650177, |
|
"eval_rouge1": 0.5909012601210781, |
|
"eval_rouge2": 0.32323469643447167, |
|
"eval_rougeL": 0.5882112866531675, |
|
"eval_runtime": 5.248, |
|
"eval_samples_per_second": 57.927, |
|
"eval_steps_per_second": 7.241, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.639866590499878, |
|
"learning_rate": 3.884765625e-05, |
|
"loss": 0.1763, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.4011122295711346, |
|
"eval_loss": 0.46726807951927185, |
|
"eval_rouge1": 0.6046871760857919, |
|
"eval_rouge2": 0.3378049073324742, |
|
"eval_rougeL": 0.6018660127815343, |
|
"eval_runtime": 14.6215, |
|
"eval_samples_per_second": 20.791, |
|
"eval_steps_per_second": 2.599, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.1884260177612305, |
|
"learning_rate": 3.5859375e-05, |
|
"loss": 0.1429, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.4084847317717048, |
|
"eval_loss": 0.46664053201675415, |
|
"eval_rouge1": 0.619977821151152, |
|
"eval_rouge2": 0.3652452050559121, |
|
"eval_rougeL": 0.6168287532556173, |
|
"eval_runtime": 13.3071, |
|
"eval_samples_per_second": 22.845, |
|
"eval_steps_per_second": 2.856, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.133341908454895, |
|
"learning_rate": 3.287109375e-05, |
|
"loss": 0.1206, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.412982833438841, |
|
"eval_loss": 0.47646617889404297, |
|
"eval_rouge1": 0.6230350047425831, |
|
"eval_rouge2": 0.37065368099082896, |
|
"eval_rougeL": 0.6207890023241163, |
|
"eval_runtime": 4.1195, |
|
"eval_samples_per_second": 73.796, |
|
"eval_steps_per_second": 9.225, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.0680757761001587, |
|
"learning_rate": 2.9882812500000002e-05, |
|
"loss": 0.1052, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.41481558555871684, |
|
"eval_loss": 0.47586962580680847, |
|
"eval_rouge1": 0.6232860618818599, |
|
"eval_rouge2": 0.3762342947707073, |
|
"eval_rougeL": 0.6204668025380538, |
|
"eval_runtime": 1.7191, |
|
"eval_samples_per_second": 176.836, |
|
"eval_steps_per_second": 22.105, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 1.2003881931304932, |
|
"learning_rate": 2.689453125e-05, |
|
"loss": 0.0955, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 0.4179443502693027, |
|
"eval_loss": 0.47885996103286743, |
|
"eval_rouge1": 0.6342804965319174, |
|
"eval_rouge2": 0.3809817493272587, |
|
"eval_rougeL": 0.6306668222386218, |
|
"eval_runtime": 1.9634, |
|
"eval_samples_per_second": 154.831, |
|
"eval_steps_per_second": 19.354, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"step": 1683, |
|
"total_flos": 875916214272000.0, |
|
"train_loss": 0.9488435417714746, |
|
"train_runtime": 1368.0254, |
|
"train_samples_per_second": 17.821, |
|
"train_steps_per_second": 2.237 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3060, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 875916214272000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|