|
{ |
|
"best_metric": 1.9084105491638184, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/results/checkpoint-8500", |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 9220, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.567398119122257, |
|
"grad_norm": 1.353641152381897, |
|
"learning_rate": 5e-05, |
|
"loss": 3.359, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.567398119122257, |
|
"eval_bleu": 0.11424038411303619, |
|
"eval_loss": 3.128293514251709, |
|
"eval_rouge1": 0.3297614987151056, |
|
"eval_rouge2": 0.08429294540985294, |
|
"eval_rougeL": 0.2561476738686219, |
|
"eval_runtime": 26.8133, |
|
"eval_samples_per_second": 31.589, |
|
"eval_steps_per_second": 3.953, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.134796238244514, |
|
"grad_norm": 1.156111717224121, |
|
"learning_rate": 2.71689497716895e-05, |
|
"loss": 2.9208, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.134796238244514, |
|
"eval_bleu": 0.1490666503828191, |
|
"eval_loss": 2.729825496673584, |
|
"eval_rouge1": 0.40409071928626966, |
|
"eval_rouge2": 0.14297878002377568, |
|
"eval_rougeL": 0.34083403761346187, |
|
"eval_runtime": 27.234, |
|
"eval_samples_per_second": 31.101, |
|
"eval_steps_per_second": 3.892, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.702194357366771, |
|
"grad_norm": 1.1165863275527954, |
|
"learning_rate": 4.337899543378996e-06, |
|
"loss": 2.619, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.702194357366771, |
|
"eval_bleu": 0.16068905926811505, |
|
"eval_loss": 2.6229476928710938, |
|
"eval_rouge1": 0.4264320027787866, |
|
"eval_rouge2": 0.1630682859845051, |
|
"eval_rougeL": 0.367472815476786, |
|
"eval_runtime": 27.3027, |
|
"eval_samples_per_second": 31.023, |
|
"eval_steps_per_second": 3.882, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.3383947939262475, |
|
"grad_norm": 1.10550856590271, |
|
"learning_rate": 4.139908256880734e-05, |
|
"loss": 2.4047, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.3383947939262475, |
|
"eval_bleu": 0.27212534220096674, |
|
"eval_loss": 2.200192451477051, |
|
"eval_rouge1": 0.49764917064550795, |
|
"eval_rouge2": 0.25417403674525624, |
|
"eval_rougeL": 0.4505978761161964, |
|
"eval_runtime": 29.8301, |
|
"eval_samples_per_second": 31.009, |
|
"eval_steps_per_second": 3.889, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.422993492407809, |
|
"grad_norm": 1.0486189126968384, |
|
"learning_rate": 3.8532110091743125e-05, |
|
"loss": 2.19, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.422993492407809, |
|
"eval_bleu": 0.2853635265097057, |
|
"eval_loss": 2.099168539047241, |
|
"eval_rouge1": 0.5205238075842558, |
|
"eval_rouge2": 0.27883621341002174, |
|
"eval_rougeL": 0.4772785679427928, |
|
"eval_runtime": 29.5017, |
|
"eval_samples_per_second": 31.354, |
|
"eval_steps_per_second": 3.932, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.507592190889371, |
|
"grad_norm": 1.0022239685058594, |
|
"learning_rate": 3.56651376146789e-05, |
|
"loss": 2.0473, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.507592190889371, |
|
"eval_bleu": 0.29294689624288234, |
|
"eval_loss": 2.0362119674682617, |
|
"eval_rouge1": 0.5380910185587349, |
|
"eval_rouge2": 0.29647105961235576, |
|
"eval_rougeL": 0.49649873151947865, |
|
"eval_runtime": 29.6658, |
|
"eval_samples_per_second": 31.181, |
|
"eval_steps_per_second": 3.91, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.592190889370933, |
|
"grad_norm": 1.1853405237197876, |
|
"learning_rate": 3.2798165137614676e-05, |
|
"loss": 1.9397, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.592190889370933, |
|
"eval_bleu": 0.2996126116957466, |
|
"eval_loss": 1.9933106899261475, |
|
"eval_rouge1": 0.5494053286639744, |
|
"eval_rouge2": 0.31025003697020603, |
|
"eval_rougeL": 0.5101736274334897, |
|
"eval_runtime": 29.6088, |
|
"eval_samples_per_second": 31.241, |
|
"eval_steps_per_second": 3.918, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.676789587852495, |
|
"grad_norm": 1.1255462169647217, |
|
"learning_rate": 2.9931192660550462e-05, |
|
"loss": 1.857, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.676789587852495, |
|
"eval_bleu": 0.30241485912380783, |
|
"eval_loss": 1.9647237062454224, |
|
"eval_rouge1": 0.5597611557009092, |
|
"eval_rouge2": 0.3191422306947157, |
|
"eval_rougeL": 0.5202653323875917, |
|
"eval_runtime": 29.9377, |
|
"eval_samples_per_second": 30.897, |
|
"eval_steps_per_second": 3.875, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.761388286334057, |
|
"grad_norm": 1.1697229146957397, |
|
"learning_rate": 2.7064220183486238e-05, |
|
"loss": 1.784, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.761388286334057, |
|
"eval_bleu": 0.3061719577143718, |
|
"eval_loss": 1.9443068504333496, |
|
"eval_rouge1": 0.567492271856554, |
|
"eval_rouge2": 0.3269182124324805, |
|
"eval_rougeL": 0.5278573882748132, |
|
"eval_runtime": 29.751, |
|
"eval_samples_per_second": 31.091, |
|
"eval_steps_per_second": 3.899, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.845986984815617, |
|
"grad_norm": 1.070591926574707, |
|
"learning_rate": 2.419724770642202e-05, |
|
"loss": 1.7239, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.845986984815617, |
|
"eval_bleu": 0.309858394526436, |
|
"eval_loss": 1.931990385055542, |
|
"eval_rouge1": 0.5723606535196859, |
|
"eval_rouge2": 0.3338521436125379, |
|
"eval_rougeL": 0.5341216118802655, |
|
"eval_runtime": 29.6886, |
|
"eval_samples_per_second": 31.157, |
|
"eval_steps_per_second": 3.907, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 11.93058568329718, |
|
"grad_norm": 1.0755261182785034, |
|
"learning_rate": 2.13302752293578e-05, |
|
"loss": 1.6713, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 11.93058568329718, |
|
"eval_bleu": 0.3115672562854492, |
|
"eval_loss": 1.920640230178833, |
|
"eval_rouge1": 0.5765467952167939, |
|
"eval_rouge2": 0.33826641143296676, |
|
"eval_rougeL": 0.5387314433190069, |
|
"eval_runtime": 29.7016, |
|
"eval_samples_per_second": 31.143, |
|
"eval_steps_per_second": 3.906, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 13.015184381778742, |
|
"grad_norm": 1.0826488733291626, |
|
"learning_rate": 1.8463302752293578e-05, |
|
"loss": 1.6263, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.015184381778742, |
|
"eval_bleu": 0.31268695772405475, |
|
"eval_loss": 1.916778564453125, |
|
"eval_rouge1": 0.5780842791223908, |
|
"eval_rouge2": 0.34164409810850394, |
|
"eval_rougeL": 0.5415509673961407, |
|
"eval_runtime": 29.789, |
|
"eval_samples_per_second": 31.052, |
|
"eval_steps_per_second": 3.894, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 14.099783080260304, |
|
"grad_norm": 1.0868735313415527, |
|
"learning_rate": 1.559633027522936e-05, |
|
"loss": 1.5869, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 14.099783080260304, |
|
"eval_bleu": 0.31365743559233084, |
|
"eval_loss": 1.9147837162017822, |
|
"eval_rouge1": 0.5829184758698387, |
|
"eval_rouge2": 0.3448101826360943, |
|
"eval_rougeL": 0.5450794961513086, |
|
"eval_runtime": 29.7645, |
|
"eval_samples_per_second": 31.077, |
|
"eval_steps_per_second": 3.897, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 15.184381778741866, |
|
"grad_norm": 1.0827687978744507, |
|
"learning_rate": 1.2729357798165138e-05, |
|
"loss": 1.5544, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 15.184381778741866, |
|
"eval_bleu": 0.315769500599606, |
|
"eval_loss": 1.9121257066726685, |
|
"eval_rouge1": 0.5844681250407762, |
|
"eval_rouge2": 0.34764910748110744, |
|
"eval_rougeL": 0.5476190296456669, |
|
"eval_runtime": 29.7415, |
|
"eval_samples_per_second": 31.101, |
|
"eval_steps_per_second": 3.9, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 16.268980477223426, |
|
"grad_norm": 1.1430450677871704, |
|
"learning_rate": 9.862385321100918e-06, |
|
"loss": 1.5307, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 16.268980477223426, |
|
"eval_bleu": 0.31648880861794926, |
|
"eval_loss": 1.9105726480484009, |
|
"eval_rouge1": 0.5852713451659596, |
|
"eval_rouge2": 0.34877835378762495, |
|
"eval_rougeL": 0.5486197186684263, |
|
"eval_runtime": 29.7345, |
|
"eval_samples_per_second": 31.109, |
|
"eval_steps_per_second": 3.901, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 17.35357917570499, |
|
"grad_norm": 1.0865087509155273, |
|
"learning_rate": 6.995412844036697e-06, |
|
"loss": 1.5087, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 17.35357917570499, |
|
"eval_bleu": 0.31692571547155524, |
|
"eval_loss": 1.9093118906021118, |
|
"eval_rouge1": 0.5860996975913157, |
|
"eval_rouge2": 0.3503907384934047, |
|
"eval_rougeL": 0.5500340150392318, |
|
"eval_runtime": 29.7497, |
|
"eval_samples_per_second": 31.093, |
|
"eval_steps_per_second": 3.899, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 18.43817787418655, |
|
"grad_norm": 1.1252211332321167, |
|
"learning_rate": 4.128440366972477e-06, |
|
"loss": 1.4937, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 18.43817787418655, |
|
"eval_bleu": 0.31723468269919336, |
|
"eval_loss": 1.9084105491638184, |
|
"eval_rouge1": 0.5868586694605076, |
|
"eval_rouge2": 0.350546625127078, |
|
"eval_rougeL": 0.5503666110741787, |
|
"eval_runtime": 29.7351, |
|
"eval_samples_per_second": 31.108, |
|
"eval_steps_per_second": 3.901, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 19.522776572668114, |
|
"grad_norm": 1.150936245918274, |
|
"learning_rate": 1.261467889908257e-06, |
|
"loss": 1.4824, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 19.522776572668114, |
|
"eval_bleu": 0.3177718226409019, |
|
"eval_loss": 1.9086270332336426, |
|
"eval_rouge1": 0.5875550437490973, |
|
"eval_rouge2": 0.3512666976647323, |
|
"eval_rougeL": 0.5509556223633276, |
|
"eval_runtime": 30.1604, |
|
"eval_samples_per_second": 30.669, |
|
"eval_steps_per_second": 3.846, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 9220, |
|
"total_flos": 2.8862709792768e+16, |
|
"train_loss": 1.4422371688478681, |
|
"train_runtime": 3284.8472, |
|
"train_samples_per_second": 22.412, |
|
"train_steps_per_second": 2.807 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9220, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8862709792768e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|