|
{ |
|
"best_metric": 2.2772634029388428, |
|
"best_model_checkpoint": "/tmp/model/checkpoint-1600", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 1600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.958333333333333e-06, |
|
"loss": 3.8721, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.125000000000001e-06, |
|
"loss": 3.3675, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.2291666666666666e-05, |
|
"loss": 3.5553, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.6458333333333335e-05, |
|
"loss": 3.2187, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0625e-05, |
|
"loss": 3.3576, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.479166666666667e-05, |
|
"loss": 3.2111, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8958333333333337e-05, |
|
"loss": 3.1881, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.3125e-05, |
|
"loss": 3.0568, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.708333333333334e-05, |
|
"loss": 3.1593, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.125e-05, |
|
"loss": 2.9964, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.541666666666667e-05, |
|
"loss": 3.2086, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.958333333333334e-05, |
|
"loss": 3.314, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.958333333333334e-05, |
|
"loss": 3.1285, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.912037037037037e-05, |
|
"loss": 2.8458, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.865740740740741e-05, |
|
"loss": 2.6054, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.819444444444445e-05, |
|
"loss": 3.1717, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.773148148148148e-05, |
|
"loss": 3.2007, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.726851851851852e-05, |
|
"loss": 3.0557, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.6805555555555556e-05, |
|
"loss": 2.8199, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6342592592592595e-05, |
|
"loss": 2.8118, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.590277777777778e-05, |
|
"loss": 2.9147, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.543981481481482e-05, |
|
"loss": 2.6742, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.497685185185185e-05, |
|
"loss": 2.8732, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.4513888888888885e-05, |
|
"loss": 2.6045, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.405092592592593e-05, |
|
"loss": 2.7271, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.3587962962962965e-05, |
|
"loss": 2.5203, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.3125000000000005e-05, |
|
"loss": 2.7714, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.266203703703704e-05, |
|
"loss": 3.081, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.219907407407408e-05, |
|
"loss": 2.6275, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.173611111111112e-05, |
|
"loss": 2.8672, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.127314814814815e-05, |
|
"loss": 2.6542, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.0810185185185184e-05, |
|
"loss": 2.9697, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0347222222222223e-05, |
|
"loss": 2.8099, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.988425925925926e-05, |
|
"loss": 2.6577, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.94212962962963e-05, |
|
"loss": 2.7843, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8958333333333336e-05, |
|
"loss": 2.7663, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.849537037037037e-05, |
|
"loss": 2.6443, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.803240740740741e-05, |
|
"loss": 2.6549, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.756944444444445e-05, |
|
"loss": 2.4063, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.710648148148148e-05, |
|
"loss": 2.5605, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 141.655, |
|
"eval_loss": 2.4260785579681396, |
|
"eval_rouge1": 36.8904, |
|
"eval_rouge2": 14.0444, |
|
"eval_rougeL": 22.5839, |
|
"eval_rougeLsum": 33.8603, |
|
"eval_runtime": 806.3061, |
|
"eval_samples_per_second": 0.248, |
|
"eval_steps_per_second": 0.062, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.664351851851852e-05, |
|
"loss": 2.3288, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.6180555555555555e-05, |
|
"loss": 2.0535, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.5717592592592595e-05, |
|
"loss": 2.0489, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.5254629629629635e-05, |
|
"loss": 1.9129, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.479166666666667e-05, |
|
"loss": 2.3016, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.432870370370371e-05, |
|
"loss": 2.1098, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.386574074074074e-05, |
|
"loss": 2.0364, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.340277777777778e-05, |
|
"loss": 1.8088, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.293981481481482e-05, |
|
"loss": 1.9104, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.247685185185185e-05, |
|
"loss": 1.9855, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.2013888888888886e-05, |
|
"loss": 2.0865, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.1550925925925926e-05, |
|
"loss": 2.127, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.1087962962962966e-05, |
|
"loss": 2.1918, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0625000000000006e-05, |
|
"loss": 1.9604, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.016203703703704e-05, |
|
"loss": 2.289, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9699074074074075e-05, |
|
"loss": 1.6954, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.9236111111111115e-05, |
|
"loss": 2.2102, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8773148148148148e-05, |
|
"loss": 2.287, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8310185185185185e-05, |
|
"loss": 1.9731, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7847222222222224e-05, |
|
"loss": 2.2276, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.738425925925926e-05, |
|
"loss": 1.8473, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.69212962962963e-05, |
|
"loss": 1.8407, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6458333333333334e-05, |
|
"loss": 1.9851, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.599537037037037e-05, |
|
"loss": 2.2261, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.553240740740741e-05, |
|
"loss": 1.9708, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.5069444444444447e-05, |
|
"loss": 1.9759, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4606481481481483e-05, |
|
"loss": 1.9717, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.414351851851852e-05, |
|
"loss": 1.8061, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3680555555555556e-05, |
|
"loss": 1.7061, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3217592592592592e-05, |
|
"loss": 2.0426, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2754629629629632e-05, |
|
"loss": 2.0017, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.229166666666667e-05, |
|
"loss": 1.979, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1828703703703705e-05, |
|
"loss": 1.9851, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.136574074074074e-05, |
|
"loss": 1.7156, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0902777777777778e-05, |
|
"loss": 2.0078, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0439814814814814e-05, |
|
"loss": 1.8058, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.997685185185185e-05, |
|
"loss": 2.3492, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.951388888888889e-05, |
|
"loss": 2.169, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.9050925925925927e-05, |
|
"loss": 1.9333, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8587962962962964e-05, |
|
"loss": 2.0535, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 141.54, |
|
"eval_loss": 2.2772634029388428, |
|
"eval_rouge1": 39.2994, |
|
"eval_rouge2": 16.9505, |
|
"eval_rougeL": 24.9144, |
|
"eval_rougeLsum": 36.4135, |
|
"eval_runtime": 807.8602, |
|
"eval_samples_per_second": 0.248, |
|
"eval_steps_per_second": 0.062, |
|
"step": 1600 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 2400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 6931077737742336.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|