|
{ |
|
"best_metric": 0.7315686941146851, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_eg/checkpoint-28428", |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 63963, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.742945671081543, |
|
"learning_rate": 4.7667678621858235e-05, |
|
"loss": 1.1436, |
|
"step": 7107 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.1900415894207328, |
|
"eval_loss": 0.8277140259742737, |
|
"eval_rouge1": 0.5211536867388353, |
|
"eval_rouge2": 0.2576275131704426, |
|
"eval_rougeL": 0.5169189427573101, |
|
"eval_runtime": 204.5959, |
|
"eval_samples_per_second": 69.464, |
|
"eval_steps_per_second": 8.685, |
|
"step": 7107 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.031801223754883, |
|
"learning_rate": 4.515885343123411e-05, |
|
"loss": 0.7508, |
|
"step": 14214 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.22138684401610842, |
|
"eval_loss": 0.7543078064918518, |
|
"eval_rouge1": 0.5674397247471176, |
|
"eval_rouge2": 0.3108337383535441, |
|
"eval_rougeL": 0.5636106781794015, |
|
"eval_runtime": 171.1246, |
|
"eval_samples_per_second": 83.051, |
|
"eval_steps_per_second": 10.384, |
|
"step": 14214 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.8590487241744995, |
|
"learning_rate": 4.265002824061e-05, |
|
"loss": 0.6471, |
|
"step": 21321 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.2374960454489342, |
|
"eval_loss": 0.7337948083877563, |
|
"eval_rouge1": 0.5880985827608463, |
|
"eval_rouge2": 0.33558513842625187, |
|
"eval_rougeL": 0.5844518671510625, |
|
"eval_runtime": 68.7002, |
|
"eval_samples_per_second": 206.87, |
|
"eval_steps_per_second": 25.866, |
|
"step": 21321 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.3944027423858643, |
|
"learning_rate": 4.0141203049985884e-05, |
|
"loss": 0.5713, |
|
"step": 28428 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.24587227576979195, |
|
"eval_loss": 0.7315686941146851, |
|
"eval_rouge1": 0.6017197427075045, |
|
"eval_rouge2": 0.3518746485163118, |
|
"eval_rougeL": 0.5982542515796094, |
|
"eval_runtime": 62.0721, |
|
"eval_samples_per_second": 228.959, |
|
"eval_steps_per_second": 28.628, |
|
"step": 28428 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.199220657348633, |
|
"learning_rate": 3.763237785936176e-05, |
|
"loss": 0.5097, |
|
"step": 35535 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.24748155317226092, |
|
"eval_loss": 0.7390380501747131, |
|
"eval_rouge1": 0.6058102682046419, |
|
"eval_rouge2": 0.357170685615976, |
|
"eval_rougeL": 0.6021635755679425, |
|
"eval_runtime": 67.8747, |
|
"eval_samples_per_second": 209.386, |
|
"eval_steps_per_second": 26.181, |
|
"step": 35535 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.055725574493408, |
|
"learning_rate": 3.512355266873765e-05, |
|
"loss": 0.4573, |
|
"step": 42642 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.25030630377831276, |
|
"eval_loss": 0.748293399810791, |
|
"eval_rouge1": 0.6103116816448397, |
|
"eval_rouge2": 0.361846050958361, |
|
"eval_rougeL": 0.6066395364597333, |
|
"eval_runtime": 56.4418, |
|
"eval_samples_per_second": 251.799, |
|
"eval_steps_per_second": 31.484, |
|
"step": 42642 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.6595733165740967, |
|
"learning_rate": 3.2614727478113526e-05, |
|
"loss": 0.4118, |
|
"step": 49749 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.2494244558337241, |
|
"eval_loss": 0.7635838389396667, |
|
"eval_rouge1": 0.610621109140437, |
|
"eval_rouge2": 0.3633959713058441, |
|
"eval_rougeL": 0.6069537363647842, |
|
"eval_runtime": 173.9311, |
|
"eval_samples_per_second": 81.711, |
|
"eval_steps_per_second": 10.217, |
|
"step": 49749 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.863671064376831, |
|
"learning_rate": 3.010590228748941e-05, |
|
"loss": 0.3725, |
|
"step": 56856 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.25065847486647275, |
|
"eval_loss": 0.7796261310577393, |
|
"eval_rouge1": 0.6126587801190159, |
|
"eval_rouge2": 0.3659624175392553, |
|
"eval_rougeL": 0.6088959046619336, |
|
"eval_runtime": 170.86, |
|
"eval_samples_per_second": 83.179, |
|
"eval_steps_per_second": 10.4, |
|
"step": 56856 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 3.546931266784668, |
|
"learning_rate": 2.7597077096865293e-05, |
|
"loss": 0.3375, |
|
"step": 63963 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.24908190761452426, |
|
"eval_loss": 0.7973926663398743, |
|
"eval_rouge1": 0.6111967178899901, |
|
"eval_rouge2": 0.36536691181853787, |
|
"eval_rougeL": 0.6074289902749841, |
|
"eval_runtime": 173.0755, |
|
"eval_samples_per_second": 82.114, |
|
"eval_steps_per_second": 10.267, |
|
"step": 63963 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 63963, |
|
"total_flos": 3.3423104950272e+16, |
|
"train_loss": 0.5779510789562912, |
|
"train_runtime": 8660.6869, |
|
"train_samples_per_second": 131.285, |
|
"train_steps_per_second": 16.412 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 142140, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.3423104950272e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|