|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.922713610991842, |
|
"eval_steps": 500, |
|
"global_step": 2900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0001, |
|
"loss": 5.5131, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 6.2485, |
|
"eval_bp": 0.7216, |
|
"eval_counts_1": 6032, |
|
"eval_counts_2": 1668, |
|
"eval_counts_3": 626, |
|
"eval_counts_4": 216, |
|
"eval_exact_match": 0.0018, |
|
"eval_f1": 0.2406, |
|
"eval_gen_len": 12.6166, |
|
"eval_loss": 1.8697563409805298, |
|
"eval_precisions_1": 37.6459, |
|
"eval_precisions_2": 12.0703, |
|
"eval_precisions_3": 5.3896, |
|
"eval_precisions_4": 2.2952, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2485, |
|
"eval_rouge2": 0.1011, |
|
"eval_rougeL": 0.2368, |
|
"eval_rougeLsum": 0.2366, |
|
"eval_runtime": 467.1177, |
|
"eval_samples_per_second": 4.718, |
|
"eval_steps_per_second": 1.18, |
|
"eval_sys_len": 16023, |
|
"eval_totals_1": 16023, |
|
"eval_totals_2": 13819, |
|
"eval_totals_3": 11615, |
|
"eval_totals_4": 9411, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.3946, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 10.8315, |
|
"eval_bp": 0.7704, |
|
"eval_counts_1": 7325, |
|
"eval_counts_2": 2554, |
|
"eval_counts_3": 1178, |
|
"eval_counts_4": 558, |
|
"eval_exact_match": 0.0145, |
|
"eval_f1": 0.3148, |
|
"eval_gen_len": 12.2582, |
|
"eval_loss": 1.58878493309021, |
|
"eval_precisions_1": 43.4641, |
|
"eval_precisions_2": 17.4346, |
|
"eval_precisions_3": 9.4656, |
|
"eval_precisions_4": 5.4487, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3226, |
|
"eval_rouge2": 0.1585, |
|
"eval_rougeL": 0.31, |
|
"eval_rougeLsum": 0.31, |
|
"eval_runtime": 528.6481, |
|
"eval_samples_per_second": 4.169, |
|
"eval_steps_per_second": 1.042, |
|
"eval_sys_len": 16853, |
|
"eval_totals_1": 16853, |
|
"eval_totals_2": 14649, |
|
"eval_totals_3": 12445, |
|
"eval_totals_4": 10241, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.0101, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 11.7891, |
|
"eval_bp": 0.7812, |
|
"eval_counts_1": 7623, |
|
"eval_counts_2": 2764, |
|
"eval_counts_3": 1304, |
|
"eval_counts_4": 629, |
|
"eval_exact_match": 0.0154, |
|
"eval_f1": 0.3315, |
|
"eval_gen_len": 12.6783, |
|
"eval_loss": 1.4997321367263794, |
|
"eval_precisions_1": 44.7307, |
|
"eval_precisions_2": 18.6278, |
|
"eval_precisions_3": 10.3214, |
|
"eval_precisions_4": 6.0307, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3403, |
|
"eval_rouge2": 0.1723, |
|
"eval_rougeL": 0.3263, |
|
"eval_rougeLsum": 0.3263, |
|
"eval_runtime": 451.1882, |
|
"eval_samples_per_second": 4.885, |
|
"eval_steps_per_second": 1.221, |
|
"eval_sys_len": 17042, |
|
"eval_totals_1": 17042, |
|
"eval_totals_2": 14838, |
|
"eval_totals_3": 12634, |
|
"eval_totals_4": 10430, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8073, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 12.6068, |
|
"eval_bp": 0.7588, |
|
"eval_counts_1": 7728, |
|
"eval_counts_2": 2916, |
|
"eval_counts_3": 1415, |
|
"eval_counts_4": 707, |
|
"eval_exact_match": 0.0168, |
|
"eval_f1": 0.3387, |
|
"eval_gen_len": 12.2963, |
|
"eval_loss": 1.4610050916671753, |
|
"eval_precisions_1": 46.4033, |
|
"eval_precisions_2": 20.1799, |
|
"eval_precisions_3": 11.5548, |
|
"eval_precisions_4": 7.0404, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3461, |
|
"eval_rouge2": 0.1818, |
|
"eval_rougeL": 0.3324, |
|
"eval_rougeLsum": 0.3326, |
|
"eval_runtime": 433.3953, |
|
"eval_samples_per_second": 5.085, |
|
"eval_steps_per_second": 1.271, |
|
"eval_sys_len": 16654, |
|
"eval_totals_1": 16654, |
|
"eval_totals_2": 14450, |
|
"eval_totals_3": 12246, |
|
"eval_totals_4": 10042, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6851, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_bleu": 13.0784, |
|
"eval_bp": 0.8004, |
|
"eval_counts_1": 7964, |
|
"eval_counts_2": 3059, |
|
"eval_counts_3": 1483, |
|
"eval_counts_4": 727, |
|
"eval_exact_match": 0.0159, |
|
"eval_f1": 0.3483, |
|
"eval_gen_len": 12.7436, |
|
"eval_loss": 1.4356882572174072, |
|
"eval_precisions_1": 45.8201, |
|
"eval_precisions_2": 20.1555, |
|
"eval_precisions_3": 11.4314, |
|
"eval_precisions_4": 6.7509, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3558, |
|
"eval_rouge2": 0.1888, |
|
"eval_rougeL": 0.3415, |
|
"eval_rougeLsum": 0.3414, |
|
"eval_runtime": 452.1483, |
|
"eval_samples_per_second": 4.875, |
|
"eval_steps_per_second": 1.219, |
|
"eval_sys_len": 17381, |
|
"eval_totals_1": 17381, |
|
"eval_totals_2": 15177, |
|
"eval_totals_3": 12973, |
|
"eval_totals_4": 10769, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5642, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 13.9065, |
|
"eval_bp": 0.7987, |
|
"eval_counts_1": 8299, |
|
"eval_counts_2": 3224, |
|
"eval_counts_3": 1592, |
|
"eval_counts_4": 788, |
|
"eval_exact_match": 0.0204, |
|
"eval_f1": 0.3736, |
|
"eval_gen_len": 12.9569, |
|
"eval_loss": 1.4003357887268066, |
|
"eval_precisions_1": 47.8301, |
|
"eval_precisions_2": 21.2847, |
|
"eval_precisions_3": 12.3001, |
|
"eval_precisions_4": 7.3377, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3814, |
|
"eval_rouge2": 0.2025, |
|
"eval_rougeL": 0.3684, |
|
"eval_rougeLsum": 0.3685, |
|
"eval_runtime": 450.2054, |
|
"eval_samples_per_second": 4.896, |
|
"eval_steps_per_second": 1.224, |
|
"eval_sys_len": 17351, |
|
"eval_totals_1": 17351, |
|
"eval_totals_2": 15147, |
|
"eval_totals_3": 12943, |
|
"eval_totals_4": 10739, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4756, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_bleu": 14.9146, |
|
"eval_bp": 0.8165, |
|
"eval_counts_1": 8640, |
|
"eval_counts_2": 3430, |
|
"eval_counts_3": 1712, |
|
"eval_counts_4": 879, |
|
"eval_exact_match": 0.025, |
|
"eval_f1": 0.3892, |
|
"eval_gen_len": 13.1084, |
|
"eval_loss": 1.3778630495071411, |
|
"eval_precisions_1": 48.8992, |
|
"eval_precisions_2": 22.1791, |
|
"eval_precisions_3": 12.91, |
|
"eval_precisions_4": 7.9497, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3971, |
|
"eval_rouge2": 0.2133, |
|
"eval_rougeL": 0.3828, |
|
"eval_rougeLsum": 0.3826, |
|
"eval_runtime": 753.2935, |
|
"eval_samples_per_second": 2.926, |
|
"eval_steps_per_second": 0.731, |
|
"eval_sys_len": 17669, |
|
"eval_totals_1": 17669, |
|
"eval_totals_2": 15465, |
|
"eval_totals_3": 13261, |
|
"eval_totals_4": 11057, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3792, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 14.8859, |
|
"eval_bp": 0.8346, |
|
"eval_counts_1": 8732, |
|
"eval_counts_2": 3417, |
|
"eval_counts_3": 1712, |
|
"eval_counts_4": 871, |
|
"eval_exact_match": 0.0245, |
|
"eval_f1": 0.3917, |
|
"eval_gen_len": 13.3748, |
|
"eval_loss": 1.362410306930542, |
|
"eval_precisions_1": 48.5219, |
|
"eval_precisions_2": 21.6375, |
|
"eval_precisions_3": 12.5994, |
|
"eval_precisions_4": 7.6511, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4003, |
|
"eval_rouge2": 0.2131, |
|
"eval_rougeL": 0.3852, |
|
"eval_rougeLsum": 0.3849, |
|
"eval_runtime": 699.0977, |
|
"eval_samples_per_second": 3.153, |
|
"eval_steps_per_second": 0.788, |
|
"eval_sys_len": 17996, |
|
"eval_totals_1": 17996, |
|
"eval_totals_2": 15792, |
|
"eval_totals_3": 13588, |
|
"eval_totals_4": 11384, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3133, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 15.3264, |
|
"eval_bp": 0.8161, |
|
"eval_counts_1": 8804, |
|
"eval_counts_2": 3500, |
|
"eval_counts_3": 1754, |
|
"eval_counts_4": 920, |
|
"eval_exact_match": 0.025, |
|
"eval_f1": 0.4, |
|
"eval_gen_len": 13.2019, |
|
"eval_loss": 1.3630096912384033, |
|
"eval_precisions_1": 49.85, |
|
"eval_precisions_2": 22.6435, |
|
"eval_precisions_3": 13.2347, |
|
"eval_precisions_4": 8.3265, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4078, |
|
"eval_rouge2": 0.219, |
|
"eval_rougeL": 0.3932, |
|
"eval_rougeLsum": 0.3935, |
|
"eval_runtime": 465.2887, |
|
"eval_samples_per_second": 4.737, |
|
"eval_steps_per_second": 1.184, |
|
"eval_sys_len": 17661, |
|
"eval_totals_1": 17661, |
|
"eval_totals_2": 15457, |
|
"eval_totals_3": 13253, |
|
"eval_totals_4": 11049, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.261, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 16.0163, |
|
"eval_bp": 0.8188, |
|
"eval_counts_1": 8910, |
|
"eval_counts_2": 3602, |
|
"eval_counts_3": 1849, |
|
"eval_counts_4": 1000, |
|
"eval_exact_match": 0.0295, |
|
"eval_f1": 0.4055, |
|
"eval_gen_len": 13.1892, |
|
"eval_loss": 1.3685479164123535, |
|
"eval_precisions_1": 50.3134, |
|
"eval_precisions_2": 23.2312, |
|
"eval_precisions_3": 13.9012, |
|
"eval_precisions_4": 9.0114, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4135, |
|
"eval_rouge2": 0.223, |
|
"eval_rougeL": 0.3991, |
|
"eval_rougeLsum": 0.3992, |
|
"eval_runtime": 491.3102, |
|
"eval_samples_per_second": 4.486, |
|
"eval_steps_per_second": 1.121, |
|
"eval_sys_len": 17709, |
|
"eval_totals_1": 17709, |
|
"eval_totals_2": 15505, |
|
"eval_totals_3": 13301, |
|
"eval_totals_4": 11097, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1897, |
|
"step": 1601 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 16.3202, |
|
"eval_bp": 0.849, |
|
"eval_counts_1": 9096, |
|
"eval_counts_2": 3690, |
|
"eval_counts_3": 1902, |
|
"eval_counts_4": 1012, |
|
"eval_exact_match": 0.0281, |
|
"eval_f1": 0.4121, |
|
"eval_gen_len": 13.5077, |
|
"eval_loss": 1.3638867139816284, |
|
"eval_precisions_1": 49.8111, |
|
"eval_precisions_2": 22.9806, |
|
"eval_precisions_3": 13.7299, |
|
"eval_precisions_4": 8.6874, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4201, |
|
"eval_rouge2": 0.2289, |
|
"eval_rougeL": 0.4059, |
|
"eval_rougeLsum": 0.4057, |
|
"eval_runtime": 536.9399, |
|
"eval_samples_per_second": 4.105, |
|
"eval_steps_per_second": 1.026, |
|
"eval_sys_len": 18261, |
|
"eval_totals_1": 18261, |
|
"eval_totals_2": 16057, |
|
"eval_totals_3": 13853, |
|
"eval_totals_4": 11649, |
|
"step": 1601 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1453, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_bleu": 16.4772, |
|
"eval_bp": 0.8527, |
|
"eval_counts_1": 9106, |
|
"eval_counts_2": 3735, |
|
"eval_counts_3": 1932, |
|
"eval_counts_4": 1023, |
|
"eval_exact_match": 0.0281, |
|
"eval_f1": 0.4099, |
|
"eval_gen_len": 13.8013, |
|
"eval_loss": 1.3609519004821777, |
|
"eval_precisions_1": 49.6808, |
|
"eval_precisions_2": 23.1628, |
|
"eval_precisions_3": 13.8783, |
|
"eval_precisions_4": 8.7309, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4173, |
|
"eval_rouge2": 0.2303, |
|
"eval_rougeL": 0.4026, |
|
"eval_rougeLsum": 0.4025, |
|
"eval_runtime": 617.7899, |
|
"eval_samples_per_second": 3.568, |
|
"eval_steps_per_second": 0.892, |
|
"eval_sys_len": 18329, |
|
"eval_totals_1": 18329, |
|
"eval_totals_2": 16125, |
|
"eval_totals_3": 13921, |
|
"eval_totals_4": 11717, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0858, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 16.7204, |
|
"eval_bp": 0.8649, |
|
"eval_counts_1": 9245, |
|
"eval_counts_2": 3778, |
|
"eval_counts_3": 1955, |
|
"eval_counts_4": 1049, |
|
"eval_exact_match": 0.0322, |
|
"eval_f1": 0.417, |
|
"eval_gen_len": 13.8144, |
|
"eval_loss": 1.3716095685958862, |
|
"eval_precisions_1": 49.8222, |
|
"eval_precisions_2": 23.1042, |
|
"eval_precisions_3": 13.8182, |
|
"eval_precisions_4": 8.7827, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4244, |
|
"eval_rouge2": 0.2327, |
|
"eval_rougeL": 0.409, |
|
"eval_rougeLsum": 0.409, |
|
"eval_runtime": 504.2774, |
|
"eval_samples_per_second": 4.371, |
|
"eval_steps_per_second": 1.093, |
|
"eval_sys_len": 18556, |
|
"eval_totals_1": 18556, |
|
"eval_totals_2": 16352, |
|
"eval_totals_3": 14148, |
|
"eval_totals_4": 11944, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0472, |
|
"step": 2037 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_bleu": 16.6825, |
|
"eval_bp": 0.8519, |
|
"eval_counts_1": 9166, |
|
"eval_counts_2": 3756, |
|
"eval_counts_3": 1946, |
|
"eval_counts_4": 1054, |
|
"eval_exact_match": 0.0309, |
|
"eval_f1": 0.4143, |
|
"eval_gen_len": 13.8099, |
|
"eval_loss": 1.3770091533660889, |
|
"eval_precisions_1": 50.0464, |
|
"eval_precisions_2": 23.3133, |
|
"eval_precisions_3": 13.993, |
|
"eval_precisions_4": 9.0062, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4216, |
|
"eval_rouge2": 0.2311, |
|
"eval_rougeL": 0.4068, |
|
"eval_rougeLsum": 0.4067, |
|
"eval_runtime": 581.2707, |
|
"eval_samples_per_second": 3.792, |
|
"eval_steps_per_second": 0.948, |
|
"eval_sys_len": 18315, |
|
"eval_totals_1": 18315, |
|
"eval_totals_2": 16111, |
|
"eval_totals_3": 13907, |
|
"eval_totals_4": 11703, |
|
"step": 2037 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9953, |
|
"step": 2183 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 17.3937, |
|
"eval_bp": 0.842, |
|
"eval_counts_1": 9342, |
|
"eval_counts_2": 3926, |
|
"eval_counts_3": 2046, |
|
"eval_counts_4": 1108, |
|
"eval_exact_match": 0.0327, |
|
"eval_f1": 0.4258, |
|
"eval_gen_len": 13.5023, |
|
"eval_loss": 1.3880597352981567, |
|
"eval_precisions_1": 51.5222, |
|
"eval_precisions_2": 24.6484, |
|
"eval_precisions_3": 14.9082, |
|
"eval_precisions_4": 9.6181, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4328, |
|
"eval_rouge2": 0.2418, |
|
"eval_rougeL": 0.4171, |
|
"eval_rougeLsum": 0.4171, |
|
"eval_runtime": 718.2329, |
|
"eval_samples_per_second": 3.069, |
|
"eval_steps_per_second": 0.767, |
|
"eval_sys_len": 18132, |
|
"eval_totals_1": 18132, |
|
"eval_totals_2": 15928, |
|
"eval_totals_3": 13724, |
|
"eval_totals_4": 11520, |
|
"step": 2183 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9509, |
|
"step": 2329 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 17.1618, |
|
"eval_bp": 0.871, |
|
"eval_counts_1": 9330, |
|
"eval_counts_2": 3894, |
|
"eval_counts_3": 2024, |
|
"eval_counts_4": 1084, |
|
"eval_exact_match": 0.0313, |
|
"eval_f1": 0.4198, |
|
"eval_gen_len": 13.956, |
|
"eval_loss": 1.401639461517334, |
|
"eval_precisions_1": 49.9679, |
|
"eval_precisions_2": 23.6459, |
|
"eval_precisions_3": 14.1896, |
|
"eval_precisions_4": 8.9884, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4269, |
|
"eval_rouge2": 0.237, |
|
"eval_rougeL": 0.4123, |
|
"eval_rougeLsum": 0.4122, |
|
"eval_runtime": 632.3222, |
|
"eval_samples_per_second": 3.486, |
|
"eval_steps_per_second": 0.871, |
|
"eval_sys_len": 18672, |
|
"eval_totals_1": 18672, |
|
"eval_totals_2": 16468, |
|
"eval_totals_3": 14264, |
|
"eval_totals_4": 12060, |
|
"step": 2329 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9183, |
|
"step": 2474 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 16.995, |
|
"eval_bp": 0.8606, |
|
"eval_counts_1": 9303, |
|
"eval_counts_2": 3824, |
|
"eval_counts_3": 1979, |
|
"eval_counts_4": 1084, |
|
"eval_exact_match": 0.0327, |
|
"eval_f1": 0.4199, |
|
"eval_gen_len": 13.7854, |
|
"eval_loss": 1.4152026176452637, |
|
"eval_precisions_1": 50.3518, |
|
"eval_precisions_2": 23.5005, |
|
"eval_precisions_3": 14.0674, |
|
"eval_precisions_4": 9.1369, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4269, |
|
"eval_rouge2": 0.2345, |
|
"eval_rougeL": 0.4121, |
|
"eval_rougeLsum": 0.4122, |
|
"eval_runtime": 466.5423, |
|
"eval_samples_per_second": 4.724, |
|
"eval_steps_per_second": 1.181, |
|
"eval_sys_len": 18476, |
|
"eval_totals_1": 18476, |
|
"eval_totals_2": 16272, |
|
"eval_totals_3": 14068, |
|
"eval_totals_4": 11864, |
|
"step": 2474 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8696, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 16.9541, |
|
"eval_bp": 0.8554, |
|
"eval_counts_1": 9184, |
|
"eval_counts_2": 3798, |
|
"eval_counts_3": 1993, |
|
"eval_counts_4": 1085, |
|
"eval_exact_match": 0.034, |
|
"eval_f1": 0.4148, |
|
"eval_gen_len": 13.726, |
|
"eval_loss": 1.44040048122406, |
|
"eval_precisions_1": 49.9701, |
|
"eval_precisions_2": 23.4807, |
|
"eval_precisions_3": 14.2653, |
|
"eval_precisions_4": 9.2207, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4218, |
|
"eval_rouge2": 0.2333, |
|
"eval_rougeL": 0.4076, |
|
"eval_rougeLsum": 0.4074, |
|
"eval_runtime": 470.6343, |
|
"eval_samples_per_second": 4.683, |
|
"eval_steps_per_second": 1.171, |
|
"eval_sys_len": 18379, |
|
"eval_totals_1": 18379, |
|
"eval_totals_2": 16175, |
|
"eval_totals_3": 13971, |
|
"eval_totals_4": 11767, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8389, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bleu": 17.67, |
|
"eval_bp": 0.8885, |
|
"eval_counts_1": 9476, |
|
"eval_counts_2": 4000, |
|
"eval_counts_3": 2092, |
|
"eval_counts_4": 1139, |
|
"eval_exact_match": 0.0299, |
|
"eval_f1": 0.4239, |
|
"eval_gen_len": 14.2064, |
|
"eval_loss": 1.4360300302505493, |
|
"eval_precisions_1": 49.8658, |
|
"eval_precisions_2": 23.8109, |
|
"eval_precisions_3": 14.3337, |
|
"eval_precisions_4": 9.1922, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4307, |
|
"eval_rouge2": 0.2406, |
|
"eval_rougeL": 0.4161, |
|
"eval_rougeLsum": 0.416, |
|
"eval_runtime": 480.4816, |
|
"eval_samples_per_second": 4.587, |
|
"eval_steps_per_second": 1.147, |
|
"eval_sys_len": 19003, |
|
"eval_totals_1": 19003, |
|
"eval_totals_2": 16799, |
|
"eval_totals_3": 14595, |
|
"eval_totals_4": 12391, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 19.92, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7993, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 19.92, |
|
"eval_bleu": 17.5799, |
|
"eval_bp": 0.8747, |
|
"eval_counts_1": 9464, |
|
"eval_counts_2": 3970, |
|
"eval_counts_3": 2078, |
|
"eval_counts_4": 1126, |
|
"eval_exact_match": 0.0327, |
|
"eval_f1": 0.4269, |
|
"eval_gen_len": 13.9959, |
|
"eval_loss": 1.454466700553894, |
|
"eval_precisions_1": 50.4989, |
|
"eval_precisions_2": 24.0068, |
|
"eval_precisions_3": 14.498, |
|
"eval_precisions_4": 9.2835, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4349, |
|
"eval_rouge2": 0.2424, |
|
"eval_rougeL": 0.4194, |
|
"eval_rougeLsum": 0.4192, |
|
"eval_runtime": 476.8512, |
|
"eval_samples_per_second": 4.622, |
|
"eval_steps_per_second": 1.155, |
|
"eval_sys_len": 18741, |
|
"eval_totals_1": 18741, |
|
"eval_totals_2": 16537, |
|
"eval_totals_3": 14333, |
|
"eval_totals_4": 12129, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 19.92, |
|
"step": 2900, |
|
"total_flos": 4.449947965854843e+17, |
|
"train_loss": 1.5141178552035628, |
|
"train_runtime": 27637.7455, |
|
"train_samples_per_second": 6.74, |
|
"train_steps_per_second": 0.105 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2900, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 4.449947965854843e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|