|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.789564097058193, |
|
"eval_steps": 500, |
|
"global_step": 1440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001, |
|
"loss": 7.5882, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_bleu": 0.0872, |
|
"eval_bp": 0.6461, |
|
"eval_counts_1": 3993, |
|
"eval_counts_2": 105, |
|
"eval_counts_3": 0, |
|
"eval_counts_4": 0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.1155, |
|
"eval_gen_len": 9.7105, |
|
"eval_loss": 5.682333469390869, |
|
"eval_precisions_1": 26.998, |
|
"eval_precisions_2": 0.8343, |
|
"eval_precisions_3": 0.0048, |
|
"eval_precisions_4": 0.0031, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.1101, |
|
"eval_rouge2": 0.0077, |
|
"eval_rougeL": 0.1078, |
|
"eval_rougeLsum": 0.1076, |
|
"eval_runtime": 1951.3051, |
|
"eval_samples_per_second": 1.13, |
|
"eval_steps_per_second": 0.565, |
|
"eval_sys_len": 14790, |
|
"eval_totals_1": 14790, |
|
"eval_totals_2": 12586, |
|
"eval_totals_3": 10382, |
|
"eval_totals_4": 8178, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0001, |
|
"loss": 5.2903, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_bleu": 0.351, |
|
"eval_bp": 0.8828, |
|
"eval_counts_1": 3827, |
|
"eval_counts_2": 229, |
|
"eval_counts_3": 32, |
|
"eval_counts_4": 0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0964, |
|
"eval_gen_len": 16.7005, |
|
"eval_loss": 4.872079372406006, |
|
"eval_precisions_1": 20.2551, |
|
"eval_precisions_2": 1.3721, |
|
"eval_precisions_3": 0.2209, |
|
"eval_precisions_4": 0.0041, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.0924, |
|
"eval_rouge2": 0.015, |
|
"eval_rougeL": 0.091, |
|
"eval_rougeLsum": 0.0909, |
|
"eval_runtime": 3438.1674, |
|
"eval_samples_per_second": 0.641, |
|
"eval_steps_per_second": 0.321, |
|
"eval_sys_len": 18894, |
|
"eval_totals_1": 18894, |
|
"eval_totals_2": 16690, |
|
"eval_totals_3": 14486, |
|
"eval_totals_4": 12282, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0001, |
|
"loss": 4.6636, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.2933, |
|
"eval_bp": 0.6758, |
|
"eval_counts_1": 3638, |
|
"eval_counts_2": 174, |
|
"eval_counts_3": 21, |
|
"eval_counts_4": 0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0925, |
|
"eval_gen_len": 8.9197, |
|
"eval_loss": 4.280586242675781, |
|
"eval_precisions_1": 23.8276, |
|
"eval_precisions_2": 1.3319, |
|
"eval_precisions_3": 0.1934, |
|
"eval_precisions_4": 0.0058, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.0884, |
|
"eval_rouge2": 0.012, |
|
"eval_rougeL": 0.0876, |
|
"eval_rougeLsum": 0.0874, |
|
"eval_runtime": 2326.5895, |
|
"eval_samples_per_second": 0.947, |
|
"eval_steps_per_second": 0.474, |
|
"eval_sys_len": 15268, |
|
"eval_totals_1": 15268, |
|
"eval_totals_2": 13064, |
|
"eval_totals_3": 10860, |
|
"eval_totals_4": 8656, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0001, |
|
"loss": 4.2229, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.2288, |
|
"eval_bp": 1.0, |
|
"eval_counts_1": 4274, |
|
"eval_counts_2": 240, |
|
"eval_counts_3": 24, |
|
"eval_counts_4": 0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.1023, |
|
"eval_gen_len": 24.7015, |
|
"eval_loss": 3.9210410118103027, |
|
"eval_precisions_1": 14.583, |
|
"eval_precisions_2": 0.8855, |
|
"eval_precisions_3": 0.0964, |
|
"eval_precisions_4": 0.0022, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.0894, |
|
"eval_rouge2": 0.0109, |
|
"eval_rougeL": 0.0849, |
|
"eval_rougeLsum": 0.0849, |
|
"eval_runtime": 2975.0462, |
|
"eval_samples_per_second": 0.741, |
|
"eval_steps_per_second": 0.37, |
|
"eval_sys_len": 29308, |
|
"eval_totals_1": 29308, |
|
"eval_totals_2": 27104, |
|
"eval_totals_3": 24900, |
|
"eval_totals_4": 22696, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.0001, |
|
"loss": 3.9434, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_bleu": 0.4204, |
|
"eval_bp": 0.7465, |
|
"eval_counts_1": 3652, |
|
"eval_counts_2": 218, |
|
"eval_counts_3": 35, |
|
"eval_counts_4": 1, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0898, |
|
"eval_gen_len": 12.3049, |
|
"eval_loss": 3.690653085708618, |
|
"eval_precisions_1": 22.2114, |
|
"eval_precisions_2": 1.5311, |
|
"eval_precisions_3": 0.2908, |
|
"eval_precisions_4": 0.0102, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.0856, |
|
"eval_rouge2": 0.0141, |
|
"eval_rougeL": 0.0843, |
|
"eval_rougeLsum": 0.0842, |
|
"eval_runtime": 3036.8902, |
|
"eval_samples_per_second": 0.726, |
|
"eval_steps_per_second": 0.363, |
|
"eval_sys_len": 16442, |
|
"eval_totals_1": 16442, |
|
"eval_totals_2": 14238, |
|
"eval_totals_3": 12034, |
|
"eval_totals_4": 9830, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.0001, |
|
"loss": 3.6152, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_bleu": 1.0505, |
|
"eval_bp": 0.968, |
|
"eval_counts_1": 4103, |
|
"eval_counts_2": 341, |
|
"eval_counts_3": 77, |
|
"eval_counts_4": 11, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.112, |
|
"eval_gen_len": 14.3607, |
|
"eval_loss": 3.460298538208008, |
|
"eval_precisions_1": 19.9359, |
|
"eval_precisions_2": 1.8556, |
|
"eval_precisions_3": 0.4761, |
|
"eval_precisions_4": 0.0787, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.107, |
|
"eval_rouge2": 0.019, |
|
"eval_rougeL": 0.1023, |
|
"eval_rougeLsum": 0.1024, |
|
"eval_runtime": 3225.717, |
|
"eval_samples_per_second": 0.683, |
|
"eval_steps_per_second": 0.342, |
|
"eval_sys_len": 20581, |
|
"eval_totals_1": 20581, |
|
"eval_totals_2": 18377, |
|
"eval_totals_3": 16173, |
|
"eval_totals_4": 13969, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.0001, |
|
"loss": 3.3814, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 2.3489, |
|
"eval_bp": 0.8218, |
|
"eval_counts_1": 4342, |
|
"eval_counts_2": 675, |
|
"eval_counts_3": 218, |
|
"eval_counts_4": 43, |
|
"eval_exact_match": 0.0005, |
|
"eval_f1": 0.1308, |
|
"eval_gen_len": 10.2418, |
|
"eval_loss": 3.2883455753326416, |
|
"eval_precisions_1": 24.4441, |
|
"eval_precisions_2": 4.3383, |
|
"eval_precisions_3": 1.6323, |
|
"eval_precisions_4": 0.3856, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.1264, |
|
"eval_rouge2": 0.0353, |
|
"eval_rougeL": 0.1234, |
|
"eval_rougeLsum": 0.1234, |
|
"eval_runtime": 2402.3288, |
|
"eval_samples_per_second": 0.917, |
|
"eval_steps_per_second": 0.459, |
|
"eval_sys_len": 17763, |
|
"eval_totals_1": 17763, |
|
"eval_totals_2": 15559, |
|
"eval_totals_3": 13355, |
|
"eval_totals_4": 11151, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0001, |
|
"loss": 3.1711, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 2.6207, |
|
"eval_bp": 0.9273, |
|
"eval_counts_1": 4820, |
|
"eval_counts_2": 856, |
|
"eval_counts_3": 246, |
|
"eval_counts_4": 44, |
|
"eval_exact_match": 0.0005, |
|
"eval_f1": 0.1547, |
|
"eval_gen_len": 14.3249, |
|
"eval_loss": 3.0987935066223145, |
|
"eval_precisions_1": 24.3939, |
|
"eval_precisions_2": 4.8761, |
|
"eval_precisions_3": 1.6025, |
|
"eval_precisions_4": 0.3347, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.1503, |
|
"eval_rouge2": 0.0465, |
|
"eval_rougeL": 0.1455, |
|
"eval_rougeLsum": 0.1457, |
|
"eval_runtime": 2969.3248, |
|
"eval_samples_per_second": 0.742, |
|
"eval_steps_per_second": 0.371, |
|
"eval_sys_len": 19759, |
|
"eval_totals_1": 19759, |
|
"eval_totals_2": 17555, |
|
"eval_totals_3": 15351, |
|
"eval_totals_4": 13147, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.0001, |
|
"loss": 3.0147, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_bleu": 3.4764, |
|
"eval_bp": 0.8739, |
|
"eval_counts_1": 5167, |
|
"eval_counts_2": 1066, |
|
"eval_counts_3": 321, |
|
"eval_counts_4": 76, |
|
"eval_exact_match": 0.0018, |
|
"eval_f1": 0.1816, |
|
"eval_gen_len": 14.3067, |
|
"eval_loss": 2.9539589881896973, |
|
"eval_precisions_1": 27.5941, |
|
"eval_precisions_2": 6.4524, |
|
"eval_precisions_3": 2.2421, |
|
"eval_precisions_4": 0.6274, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.1773, |
|
"eval_rouge2": 0.0588, |
|
"eval_rougeL": 0.1721, |
|
"eval_rougeLsum": 0.1721, |
|
"eval_runtime": 2825.1201, |
|
"eval_samples_per_second": 0.78, |
|
"eval_steps_per_second": 0.39, |
|
"eval_sys_len": 18725, |
|
"eval_totals_1": 18725, |
|
"eval_totals_2": 16521, |
|
"eval_totals_3": 14317, |
|
"eval_totals_4": 12113, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.7829, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_bleu": 4.5099, |
|
"eval_bp": 0.7974, |
|
"eval_counts_1": 5625, |
|
"eval_counts_2": 1267, |
|
"eval_counts_3": 420, |
|
"eval_counts_4": 124, |
|
"eval_exact_match": 0.0045, |
|
"eval_f1": 0.2159, |
|
"eval_gen_len": 12.9741, |
|
"eval_loss": 2.8288302421569824, |
|
"eval_precisions_1": 32.4638, |
|
"eval_precisions_2": 8.378, |
|
"eval_precisions_3": 3.251, |
|
"eval_precisions_4": 1.1573, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2127, |
|
"eval_rouge2": 0.0741, |
|
"eval_rougeL": 0.2067, |
|
"eval_rougeLsum": 0.2065, |
|
"eval_runtime": 2709.6941, |
|
"eval_samples_per_second": 0.813, |
|
"eval_steps_per_second": 0.407, |
|
"eval_sys_len": 17327, |
|
"eval_totals_1": 17327, |
|
"eval_totals_2": 15123, |
|
"eval_totals_3": 12919, |
|
"eval_totals_4": 10715, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.6093, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_bleu": 5.5051, |
|
"eval_bp": 0.8685, |
|
"eval_counts_1": 6005, |
|
"eval_counts_2": 1469, |
|
"eval_counts_3": 528, |
|
"eval_counts_4": 181, |
|
"eval_exact_match": 0.0064, |
|
"eval_f1": 0.231, |
|
"eval_gen_len": 14.4791, |
|
"eval_loss": 2.7177300453186035, |
|
"eval_precisions_1": 32.2416, |
|
"eval_precisions_2": 8.9459, |
|
"eval_precisions_3": 3.7139, |
|
"eval_precisions_4": 1.5067, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.229, |
|
"eval_rouge2": 0.0827, |
|
"eval_rougeL": 0.2215, |
|
"eval_rougeLsum": 0.2213, |
|
"eval_runtime": 1457.0803, |
|
"eval_samples_per_second": 1.513, |
|
"eval_steps_per_second": 0.756, |
|
"eval_sys_len": 18625, |
|
"eval_totals_1": 18625, |
|
"eval_totals_2": 16421, |
|
"eval_totals_3": 14217, |
|
"eval_totals_4": 12013, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.453, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 6.6865, |
|
"eval_bp": 0.8515, |
|
"eval_counts_1": 6396, |
|
"eval_counts_2": 1744, |
|
"eval_counts_3": 664, |
|
"eval_counts_4": 246, |
|
"eval_exact_match": 0.0059, |
|
"eval_f1": 0.2565, |
|
"eval_gen_len": 13.7142, |
|
"eval_loss": 2.591360330581665, |
|
"eval_precisions_1": 34.9375, |
|
"eval_precisions_2": 10.8303, |
|
"eval_precisions_3": 4.7773, |
|
"eval_precisions_4": 2.1035, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2553, |
|
"eval_rouge2": 0.0998, |
|
"eval_rougeL": 0.2479, |
|
"eval_rougeLsum": 0.2478, |
|
"eval_runtime": 1377.6536, |
|
"eval_samples_per_second": 1.6, |
|
"eval_steps_per_second": 0.8, |
|
"eval_sys_len": 18307, |
|
"eval_totals_1": 18307, |
|
"eval_totals_2": 16103, |
|
"eval_totals_3": 13899, |
|
"eval_totals_4": 11695, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.3329, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_bleu": 7.383, |
|
"eval_bp": 0.8592, |
|
"eval_counts_1": 6673, |
|
"eval_counts_2": 1888, |
|
"eval_counts_3": 741, |
|
"eval_counts_4": 291, |
|
"eval_exact_match": 0.0091, |
|
"eval_f1": 0.2749, |
|
"eval_gen_len": 14.1751, |
|
"eval_loss": 2.499257802963257, |
|
"eval_precisions_1": 36.1661, |
|
"eval_precisions_2": 11.6206, |
|
"eval_precisions_3": 5.2767, |
|
"eval_precisions_4": 2.458, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2747, |
|
"eval_rouge2": 0.1114, |
|
"eval_rougeL": 0.2652, |
|
"eval_rougeLsum": 0.2652, |
|
"eval_runtime": 1427.0765, |
|
"eval_samples_per_second": 1.544, |
|
"eval_steps_per_second": 0.772, |
|
"eval_sys_len": 18451, |
|
"eval_totals_1": 18451, |
|
"eval_totals_2": 16247, |
|
"eval_totals_3": 14043, |
|
"eval_totals_4": 11839, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.1663, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_bleu": 8.1343, |
|
"eval_bp": 0.8635, |
|
"eval_counts_1": 6953, |
|
"eval_counts_2": 2052, |
|
"eval_counts_3": 834, |
|
"eval_counts_4": 337, |
|
"eval_exact_match": 0.0082, |
|
"eval_f1": 0.2889, |
|
"eval_gen_len": 14.6783, |
|
"eval_loss": 2.4196276664733887, |
|
"eval_precisions_1": 37.5209, |
|
"eval_precisions_2": 12.5681, |
|
"eval_precisions_3": 5.9053, |
|
"eval_precisions_4": 2.8274, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2886, |
|
"eval_rouge2": 0.1215, |
|
"eval_rougeL": 0.2773, |
|
"eval_rougeLsum": 0.277, |
|
"eval_runtime": 1443.0194, |
|
"eval_samples_per_second": 1.527, |
|
"eval_steps_per_second": 0.764, |
|
"eval_sys_len": 18531, |
|
"eval_totals_1": 18531, |
|
"eval_totals_2": 16327, |
|
"eval_totals_3": 14123, |
|
"eval_totals_4": 11919, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.0422, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_bleu": 8.4322, |
|
"eval_bp": 0.8339, |
|
"eval_counts_1": 6968, |
|
"eval_counts_2": 2089, |
|
"eval_counts_3": 862, |
|
"eval_counts_4": 365, |
|
"eval_exact_match": 0.0113, |
|
"eval_f1": 0.2951, |
|
"eval_gen_len": 13.6987, |
|
"eval_loss": 2.3703055381774902, |
|
"eval_precisions_1": 38.7456, |
|
"eval_precisions_2": 13.2383, |
|
"eval_precisions_3": 6.3494, |
|
"eval_precisions_4": 3.2096, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2961, |
|
"eval_rouge2": 0.1268, |
|
"eval_rougeL": 0.2858, |
|
"eval_rougeLsum": 0.2857, |
|
"eval_runtime": 1381.8523, |
|
"eval_samples_per_second": 1.595, |
|
"eval_steps_per_second": 0.797, |
|
"eval_sys_len": 17984, |
|
"eval_totals_1": 17984, |
|
"eval_totals_2": 15780, |
|
"eval_totals_3": 13576, |
|
"eval_totals_4": 11372, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9245, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 9.5973, |
|
"eval_bp": 0.8892, |
|
"eval_counts_1": 7500, |
|
"eval_counts_2": 2353, |
|
"eval_counts_3": 999, |
|
"eval_counts_4": 446, |
|
"eval_exact_match": 0.0132, |
|
"eval_f1": 0.314, |
|
"eval_gen_len": 14.77, |
|
"eval_loss": 2.3217406272888184, |
|
"eval_precisions_1": 39.4384, |
|
"eval_precisions_2": 13.9951, |
|
"eval_precisions_3": 6.8383, |
|
"eval_precisions_4": 3.5953, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3149, |
|
"eval_rouge2": 0.1407, |
|
"eval_rougeL": 0.3017, |
|
"eval_rougeLsum": 0.3017, |
|
"eval_runtime": 1430.5793, |
|
"eval_samples_per_second": 1.541, |
|
"eval_steps_per_second": 0.77, |
|
"eval_sys_len": 19017, |
|
"eval_totals_1": 19017, |
|
"eval_totals_2": 16813, |
|
"eval_totals_3": 14609, |
|
"eval_totals_4": 12405, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8216, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 9.9557, |
|
"eval_bp": 0.8467, |
|
"eval_counts_1": 7444, |
|
"eval_counts_2": 2357, |
|
"eval_counts_3": 1044, |
|
"eval_counts_4": 488, |
|
"eval_exact_match": 0.0132, |
|
"eval_f1": 0.3181, |
|
"eval_gen_len": 13.8031, |
|
"eval_loss": 2.27047061920166, |
|
"eval_precisions_1": 40.8584, |
|
"eval_precisions_2": 14.7175, |
|
"eval_precisions_3": 7.5592, |
|
"eval_precisions_4": 4.2044, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3201, |
|
"eval_rouge2": 0.1437, |
|
"eval_rougeL": 0.3081, |
|
"eval_rougeLsum": 0.3077, |
|
"eval_runtime": 1357.6078, |
|
"eval_samples_per_second": 1.623, |
|
"eval_steps_per_second": 0.812, |
|
"eval_sys_len": 18219, |
|
"eval_totals_1": 18219, |
|
"eval_totals_2": 16015, |
|
"eval_totals_3": 13811, |
|
"eval_totals_4": 11607, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7503, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_bleu": 10.4354, |
|
"eval_bp": 0.8498, |
|
"eval_counts_1": 7571, |
|
"eval_counts_2": 2487, |
|
"eval_counts_3": 1114, |
|
"eval_counts_4": 515, |
|
"eval_exact_match": 0.0145, |
|
"eval_f1": 0.3265, |
|
"eval_gen_len": 13.9106, |
|
"eval_loss": 2.238603353500366, |
|
"eval_precisions_1": 41.4282, |
|
"eval_precisions_2": 15.4751, |
|
"eval_precisions_3": 8.0335, |
|
"eval_precisions_4": 4.4157, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3289, |
|
"eval_rouge2": 0.1512, |
|
"eval_rougeL": 0.3153, |
|
"eval_rougeLsum": 0.3151, |
|
"eval_runtime": 1353.0462, |
|
"eval_samples_per_second": 1.629, |
|
"eval_steps_per_second": 0.814, |
|
"eval_sys_len": 18275, |
|
"eval_totals_1": 18275, |
|
"eval_totals_2": 16071, |
|
"eval_totals_3": 13867, |
|
"eval_totals_4": 11663, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6342, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_bleu": 10.7447, |
|
"eval_bp": 0.8418, |
|
"eval_counts_1": 7697, |
|
"eval_counts_2": 2536, |
|
"eval_counts_3": 1155, |
|
"eval_counts_4": 537, |
|
"eval_exact_match": 0.0177, |
|
"eval_f1": 0.3313, |
|
"eval_gen_len": 13.8494, |
|
"eval_loss": 2.2182679176330566, |
|
"eval_precisions_1": 42.4568, |
|
"eval_precisions_2": 15.9246, |
|
"eval_precisions_3": 8.4178, |
|
"eval_precisions_4": 4.6627, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3342, |
|
"eval_rouge2": 0.1559, |
|
"eval_rougeL": 0.3224, |
|
"eval_rougeLsum": 0.3222, |
|
"eval_runtime": 1333.2607, |
|
"eval_samples_per_second": 1.653, |
|
"eval_steps_per_second": 0.827, |
|
"eval_sys_len": 18129, |
|
"eval_totals_1": 18129, |
|
"eval_totals_2": 15925, |
|
"eval_totals_3": 13721, |
|
"eval_totals_4": 11517, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5474, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"eval_bleu": 11.1066, |
|
"eval_bp": 0.8786, |
|
"eval_counts_1": 7879, |
|
"eval_counts_2": 2632, |
|
"eval_counts_3": 1187, |
|
"eval_counts_4": 570, |
|
"eval_exact_match": 0.0177, |
|
"eval_f1": 0.3375, |
|
"eval_gen_len": 14.5136, |
|
"eval_loss": 2.1956045627593994, |
|
"eval_precisions_1": 41.8762, |
|
"eval_precisions_2": 15.8449, |
|
"eval_precisions_3": 8.2391, |
|
"eval_precisions_4": 4.671, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3398, |
|
"eval_rouge2": 0.1607, |
|
"eval_rougeL": 0.326, |
|
"eval_rougeLsum": 0.326, |
|
"eval_runtime": 1394.5803, |
|
"eval_samples_per_second": 1.58, |
|
"eval_steps_per_second": 0.79, |
|
"eval_sys_len": 18815, |
|
"eval_totals_1": 18815, |
|
"eval_totals_2": 16611, |
|
"eval_totals_3": 14407, |
|
"eval_totals_4": 12203, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"step": 1440, |
|
"total_flos": 8.496574887886848e+17, |
|
"train_loss": 3.111723126305474, |
|
"train_runtime": 93678.1212, |
|
"train_samples_per_second": 1.989, |
|
"train_steps_per_second": 0.015 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1440, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8.496574887886848e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|