|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.776824034334766, |
|
"eval_steps": 500, |
|
"global_step": 1440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001, |
|
"loss": 3.458, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_bleu": 4.6384, |
|
"eval_bp": 0.6642, |
|
"eval_counts_1": 5618, |
|
"eval_counts_2": 1383, |
|
"eval_counts_3": 463, |
|
"eval_counts_4": 116, |
|
"eval_exact_match": 0.0005, |
|
"eval_f1": 0.2226, |
|
"eval_gen_len": 11.3013, |
|
"eval_loss": 2.3696436882019043, |
|
"eval_precisions_1": 37.2546, |
|
"eval_precisions_2": 10.7409, |
|
"eval_precisions_3": 4.3385, |
|
"eval_precisions_4": 1.3699, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2266, |
|
"eval_rouge2": 0.0841, |
|
"eval_rougeL": 0.2197, |
|
"eval_rougeLsum": 0.2196, |
|
"eval_runtime": 433.2426, |
|
"eval_samples_per_second": 5.087, |
|
"eval_steps_per_second": 1.272, |
|
"eval_sys_len": 15080, |
|
"eval_totals_1": 15080, |
|
"eval_totals_2": 12876, |
|
"eval_totals_3": 10672, |
|
"eval_totals_4": 8468, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.7548, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_bleu": 6.9183, |
|
"eval_bp": 0.728, |
|
"eval_counts_1": 6361, |
|
"eval_counts_2": 1807, |
|
"eval_counts_3": 700, |
|
"eval_counts_4": 254, |
|
"eval_exact_match": 0.0036, |
|
"eval_f1": 0.2635, |
|
"eval_gen_len": 12.206, |
|
"eval_loss": 2.1310036182403564, |
|
"eval_precisions_1": 39.4358, |
|
"eval_precisions_2": 12.9757, |
|
"eval_precisions_3": 5.9717, |
|
"eval_precisions_4": 2.6686, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2706, |
|
"eval_rouge2": 0.1122, |
|
"eval_rougeL": 0.2596, |
|
"eval_rougeLsum": 0.2596, |
|
"eval_runtime": 445.5518, |
|
"eval_samples_per_second": 4.947, |
|
"eval_steps_per_second": 1.237, |
|
"eval_sys_len": 16130, |
|
"eval_totals_1": 16130, |
|
"eval_totals_2": 13926, |
|
"eval_totals_3": 11722, |
|
"eval_totals_4": 9518, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.5084, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_bleu": 7.616, |
|
"eval_bp": 0.7714, |
|
"eval_counts_1": 6758, |
|
"eval_counts_2": 2001, |
|
"eval_counts_3": 780, |
|
"eval_counts_4": 285, |
|
"eval_exact_match": 0.0045, |
|
"eval_f1": 0.2832, |
|
"eval_gen_len": 12.8825, |
|
"eval_loss": 2.0244088172912598, |
|
"eval_precisions_1": 40.0569, |
|
"eval_precisions_2": 13.6429, |
|
"eval_precisions_3": 6.2585, |
|
"eval_precisions_4": 2.778, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2888, |
|
"eval_rouge2": 0.1258, |
|
"eval_rougeL": 0.2766, |
|
"eval_rougeLsum": 0.2767, |
|
"eval_runtime": 693.3228, |
|
"eval_samples_per_second": 3.179, |
|
"eval_steps_per_second": 0.795, |
|
"eval_sys_len": 16871, |
|
"eval_totals_1": 16871, |
|
"eval_totals_2": 14667, |
|
"eval_totals_3": 12463, |
|
"eval_totals_4": 10259, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.3562, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 8.6611, |
|
"eval_bp": 0.7671, |
|
"eval_counts_1": 7011, |
|
"eval_counts_2": 2193, |
|
"eval_counts_3": 908, |
|
"eval_counts_4": 360, |
|
"eval_exact_match": 0.0077, |
|
"eval_f1": 0.2978, |
|
"eval_gen_len": 12.9142, |
|
"eval_loss": 1.950147032737732, |
|
"eval_precisions_1": 41.7421, |
|
"eval_precisions_2": 15.0288, |
|
"eval_precisions_3": 7.3297, |
|
"eval_precisions_4": 3.535, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.303, |
|
"eval_rouge2": 0.1375, |
|
"eval_rougeL": 0.2892, |
|
"eval_rougeLsum": 0.2894, |
|
"eval_runtime": 807.954, |
|
"eval_samples_per_second": 2.728, |
|
"eval_steps_per_second": 0.682, |
|
"eval_sys_len": 16796, |
|
"eval_totals_1": 16796, |
|
"eval_totals_2": 14592, |
|
"eval_totals_3": 12388, |
|
"eval_totals_4": 10184, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.2383, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 9.6159, |
|
"eval_bp": 0.762, |
|
"eval_counts_1": 7245, |
|
"eval_counts_2": 2386, |
|
"eval_counts_3": 1015, |
|
"eval_counts_4": 435, |
|
"eval_exact_match": 0.0113, |
|
"eval_f1": 0.3155, |
|
"eval_gen_len": 12.8417, |
|
"eval_loss": 1.8873705863952637, |
|
"eval_precisions_1": 43.3625, |
|
"eval_precisions_2": 16.4506, |
|
"eval_precisions_3": 8.252, |
|
"eval_precisions_4": 4.3086, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3198, |
|
"eval_rouge2": 0.1498, |
|
"eval_rougeL": 0.3077, |
|
"eval_rougeLsum": 0.3079, |
|
"eval_runtime": 789.9213, |
|
"eval_samples_per_second": 2.79, |
|
"eval_steps_per_second": 0.698, |
|
"eval_sys_len": 16708, |
|
"eval_totals_1": 16708, |
|
"eval_totals_2": 14504, |
|
"eval_totals_3": 12300, |
|
"eval_totals_4": 10096, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.1576, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_bleu": 9.5745, |
|
"eval_bp": 0.7796, |
|
"eval_counts_1": 7378, |
|
"eval_counts_2": 2382, |
|
"eval_counts_3": 997, |
|
"eval_counts_4": 429, |
|
"eval_exact_match": 0.0109, |
|
"eval_f1": 0.3215, |
|
"eval_gen_len": 13.2187, |
|
"eval_loss": 1.859336018562317, |
|
"eval_precisions_1": 43.3643, |
|
"eval_precisions_2": 16.0837, |
|
"eval_precisions_3": 7.9089, |
|
"eval_precisions_4": 4.1242, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.326, |
|
"eval_rouge2": 0.1497, |
|
"eval_rougeL": 0.3132, |
|
"eval_rougeLsum": 0.3132, |
|
"eval_runtime": 806.6141, |
|
"eval_samples_per_second": 2.732, |
|
"eval_steps_per_second": 0.683, |
|
"eval_sys_len": 17014, |
|
"eval_totals_1": 17014, |
|
"eval_totals_2": 14810, |
|
"eval_totals_3": 12606, |
|
"eval_totals_4": 10402, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.0356, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_bleu": 10.3053, |
|
"eval_bp": 0.7787, |
|
"eval_counts_1": 7570, |
|
"eval_counts_2": 2520, |
|
"eval_counts_3": 1097, |
|
"eval_counts_4": 482, |
|
"eval_exact_match": 0.0123, |
|
"eval_f1": 0.3339, |
|
"eval_gen_len": 13.0368, |
|
"eval_loss": 1.8132530450820923, |
|
"eval_precisions_1": 44.532, |
|
"eval_precisions_2": 17.0328, |
|
"eval_precisions_3": 8.7126, |
|
"eval_precisions_4": 4.6404, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3384, |
|
"eval_rouge2": 0.158, |
|
"eval_rougeL": 0.3258, |
|
"eval_rougeLsum": 0.3257, |
|
"eval_runtime": 454.2622, |
|
"eval_samples_per_second": 4.852, |
|
"eval_steps_per_second": 1.213, |
|
"eval_sys_len": 16999, |
|
"eval_totals_1": 16999, |
|
"eval_totals_2": 14795, |
|
"eval_totals_3": 12591, |
|
"eval_totals_4": 10387, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9575, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_bleu": 10.993, |
|
"eval_bp": 0.8003, |
|
"eval_counts_1": 7764, |
|
"eval_counts_2": 2637, |
|
"eval_counts_3": 1175, |
|
"eval_counts_4": 545, |
|
"eval_exact_match": 0.0136, |
|
"eval_f1": 0.3407, |
|
"eval_gen_len": 13.4719, |
|
"eval_loss": 1.7855687141418457, |
|
"eval_precisions_1": 44.6746, |
|
"eval_precisions_2": 17.3773, |
|
"eval_precisions_3": 9.0587, |
|
"eval_precisions_4": 5.0618, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.345, |
|
"eval_rouge2": 0.1625, |
|
"eval_rougeL": 0.3322, |
|
"eval_rougeLsum": 0.3324, |
|
"eval_runtime": 470.7972, |
|
"eval_samples_per_second": 4.681, |
|
"eval_steps_per_second": 1.17, |
|
"eval_sys_len": 17379, |
|
"eval_totals_1": 17379, |
|
"eval_totals_2": 15175, |
|
"eval_totals_3": 12971, |
|
"eval_totals_4": 10767, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8889, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 10.9637, |
|
"eval_bp": 0.7846, |
|
"eval_counts_1": 7766, |
|
"eval_counts_2": 2644, |
|
"eval_counts_3": 1184, |
|
"eval_counts_4": 532, |
|
"eval_exact_match": 0.0123, |
|
"eval_f1": 0.3438, |
|
"eval_gen_len": 13.2164, |
|
"eval_loss": 1.7666170597076416, |
|
"eval_precisions_1": 45.4099, |
|
"eval_precisions_2": 17.7473, |
|
"eval_precisions_3": 9.3272, |
|
"eval_precisions_4": 5.0715, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3487, |
|
"eval_rouge2": 0.1636, |
|
"eval_rougeL": 0.3348, |
|
"eval_rougeLsum": 0.335, |
|
"eval_runtime": 461.5052, |
|
"eval_samples_per_second": 4.776, |
|
"eval_steps_per_second": 1.194, |
|
"eval_sys_len": 17102, |
|
"eval_totals_1": 17102, |
|
"eval_totals_2": 14898, |
|
"eval_totals_3": 12694, |
|
"eval_totals_4": 10490, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8201, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 11.3891, |
|
"eval_bp": 0.7877, |
|
"eval_counts_1": 7737, |
|
"eval_counts_2": 2680, |
|
"eval_counts_3": 1238, |
|
"eval_counts_4": 587, |
|
"eval_exact_match": 0.0163, |
|
"eval_f1": 0.3406, |
|
"eval_gen_len": 13.1388, |
|
"eval_loss": 1.7414668798446655, |
|
"eval_precisions_1": 45.0979, |
|
"eval_precisions_2": 17.924, |
|
"eval_precisions_3": 9.7113, |
|
"eval_precisions_4": 5.5671, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3453, |
|
"eval_rouge2": 0.1666, |
|
"eval_rougeL": 0.3332, |
|
"eval_rougeLsum": 0.3333, |
|
"eval_runtime": 457.4345, |
|
"eval_samples_per_second": 4.818, |
|
"eval_steps_per_second": 1.205, |
|
"eval_sys_len": 17156, |
|
"eval_totals_1": 17156, |
|
"eval_totals_2": 14952, |
|
"eval_totals_3": 12748, |
|
"eval_totals_4": 10544, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7882, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_bleu": 11.4047, |
|
"eval_bp": 0.7995, |
|
"eval_counts_1": 7859, |
|
"eval_counts_2": 2722, |
|
"eval_counts_3": 1241, |
|
"eval_counts_4": 572, |
|
"eval_exact_match": 0.0145, |
|
"eval_f1": 0.3473, |
|
"eval_gen_len": 13.4052, |
|
"eval_loss": 1.7331299781799316, |
|
"eval_precisions_1": 45.2603, |
|
"eval_precisions_2": 17.9551, |
|
"eval_precisions_3": 9.5786, |
|
"eval_precisions_4": 5.3199, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3524, |
|
"eval_rouge2": 0.1673, |
|
"eval_rougeL": 0.3387, |
|
"eval_rougeLsum": 0.3385, |
|
"eval_runtime": 470.5412, |
|
"eval_samples_per_second": 4.684, |
|
"eval_steps_per_second": 1.171, |
|
"eval_sys_len": 17364, |
|
"eval_totals_1": 17364, |
|
"eval_totals_2": 15160, |
|
"eval_totals_3": 12956, |
|
"eval_totals_4": 10752, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7095, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_bleu": 11.8807, |
|
"eval_bp": 0.8053, |
|
"eval_counts_1": 7968, |
|
"eval_counts_2": 2783, |
|
"eval_counts_3": 1292, |
|
"eval_counts_4": 625, |
|
"eval_exact_match": 0.0154, |
|
"eval_f1": 0.3495, |
|
"eval_gen_len": 13.4437, |
|
"eval_loss": 1.7193998098373413, |
|
"eval_precisions_1": 45.6175, |
|
"eval_precisions_2": 18.2336, |
|
"eval_precisions_3": 9.8936, |
|
"eval_precisions_4": 5.7577, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3547, |
|
"eval_rouge2": 0.1708, |
|
"eval_rougeL": 0.3418, |
|
"eval_rougeLsum": 0.3414, |
|
"eval_runtime": 472.913, |
|
"eval_samples_per_second": 4.66, |
|
"eval_steps_per_second": 1.165, |
|
"eval_sys_len": 17467, |
|
"eval_totals_1": 17467, |
|
"eval_totals_2": 15263, |
|
"eval_totals_3": 13059, |
|
"eval_totals_4": 10855, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6619, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_bleu": 11.7968, |
|
"eval_bp": 0.8034, |
|
"eval_counts_1": 8011, |
|
"eval_counts_2": 2796, |
|
"eval_counts_3": 1286, |
|
"eval_counts_4": 604, |
|
"eval_exact_match": 0.0154, |
|
"eval_f1": 0.3526, |
|
"eval_gen_len": 13.4964, |
|
"eval_loss": 1.703238606452942, |
|
"eval_precisions_1": 45.9531, |
|
"eval_precisions_2": 18.3597, |
|
"eval_precisions_3": 9.8733, |
|
"eval_precisions_4": 5.5817, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3584, |
|
"eval_rouge2": 0.1736, |
|
"eval_rougeL": 0.3454, |
|
"eval_rougeLsum": 0.3454, |
|
"eval_runtime": 460.9308, |
|
"eval_samples_per_second": 4.782, |
|
"eval_steps_per_second": 1.195, |
|
"eval_sys_len": 17433, |
|
"eval_totals_1": 17433, |
|
"eval_totals_2": 15229, |
|
"eval_totals_3": 13025, |
|
"eval_totals_4": 10821, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6103, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_bleu": 12.235, |
|
"eval_bp": 0.8163, |
|
"eval_counts_1": 8154, |
|
"eval_counts_2": 2891, |
|
"eval_counts_3": 1347, |
|
"eval_counts_4": 636, |
|
"eval_exact_match": 0.015, |
|
"eval_f1": 0.3602, |
|
"eval_gen_len": 13.7223, |
|
"eval_loss": 1.7027523517608643, |
|
"eval_precisions_1": 46.1591, |
|
"eval_precisions_2": 18.6987, |
|
"eval_precisions_3": 10.1607, |
|
"eval_precisions_4": 5.7541, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3659, |
|
"eval_rouge2": 0.1795, |
|
"eval_rougeL": 0.3509, |
|
"eval_rougeLsum": 0.3508, |
|
"eval_runtime": 461.3951, |
|
"eval_samples_per_second": 4.777, |
|
"eval_steps_per_second": 1.194, |
|
"eval_sys_len": 17665, |
|
"eval_totals_1": 17665, |
|
"eval_totals_2": 15461, |
|
"eval_totals_3": 13257, |
|
"eval_totals_4": 11053, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.565, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 12.4116, |
|
"eval_bp": 0.8088, |
|
"eval_counts_1": 8135, |
|
"eval_counts_2": 2897, |
|
"eval_counts_3": 1362, |
|
"eval_counts_4": 665, |
|
"eval_exact_match": 0.02, |
|
"eval_f1": 0.3603, |
|
"eval_gen_len": 13.6107, |
|
"eval_loss": 1.6954691410064697, |
|
"eval_precisions_1": 46.4062, |
|
"eval_precisions_2": 18.9025, |
|
"eval_precisions_3": 10.3795, |
|
"eval_precisions_4": 6.0909, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3668, |
|
"eval_rouge2": 0.1808, |
|
"eval_rougeL": 0.3518, |
|
"eval_rougeLsum": 0.3516, |
|
"eval_runtime": 457.9806, |
|
"eval_samples_per_second": 4.812, |
|
"eval_steps_per_second": 1.203, |
|
"eval_sys_len": 17530, |
|
"eval_totals_1": 17530, |
|
"eval_totals_2": 15326, |
|
"eval_totals_3": 13122, |
|
"eval_totals_4": 10918, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.522, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 12.8008, |
|
"eval_bp": 0.8318, |
|
"eval_counts_1": 8271, |
|
"eval_counts_2": 2982, |
|
"eval_counts_3": 1414, |
|
"eval_counts_4": 697, |
|
"eval_exact_match": 0.0191, |
|
"eval_f1": 0.3632, |
|
"eval_gen_len": 13.9192, |
|
"eval_loss": 1.6792546510696411, |
|
"eval_precisions_1": 46.0883, |
|
"eval_precisions_2": 18.943, |
|
"eval_precisions_3": 10.4447, |
|
"eval_precisions_4": 6.1496, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3695, |
|
"eval_rouge2": 0.1828, |
|
"eval_rougeL": 0.354, |
|
"eval_rougeLsum": 0.354, |
|
"eval_runtime": 476.6232, |
|
"eval_samples_per_second": 4.624, |
|
"eval_steps_per_second": 1.156, |
|
"eval_sys_len": 17946, |
|
"eval_totals_1": 17946, |
|
"eval_totals_2": 15742, |
|
"eval_totals_3": 13538, |
|
"eval_totals_4": 11334, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5022, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_bleu": 12.6672, |
|
"eval_bp": 0.8077, |
|
"eval_counts_1": 8244, |
|
"eval_counts_2": 2967, |
|
"eval_counts_3": 1392, |
|
"eval_counts_4": 680, |
|
"eval_exact_match": 0.0191, |
|
"eval_f1": 0.366, |
|
"eval_gen_len": 13.6243, |
|
"eval_loss": 1.684873104095459, |
|
"eval_precisions_1": 47.0817, |
|
"eval_precisions_2": 19.3846, |
|
"eval_precisions_3": 10.6243, |
|
"eval_precisions_4": 6.2397, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3728, |
|
"eval_rouge2": 0.184, |
|
"eval_rougeL": 0.3569, |
|
"eval_rougeLsum": 0.3569, |
|
"eval_runtime": 453.2, |
|
"eval_samples_per_second": 4.863, |
|
"eval_steps_per_second": 1.216, |
|
"eval_sys_len": 17510, |
|
"eval_totals_1": 17510, |
|
"eval_totals_2": 15306, |
|
"eval_totals_3": 13102, |
|
"eval_totals_4": 10898, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4359, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_bleu": 13.0683, |
|
"eval_bp": 0.8278, |
|
"eval_counts_1": 8328, |
|
"eval_counts_2": 3050, |
|
"eval_counts_3": 1448, |
|
"eval_counts_4": 717, |
|
"eval_exact_match": 0.0181, |
|
"eval_f1": 0.3671, |
|
"eval_gen_len": 13.7255, |
|
"eval_loss": 1.686221718788147, |
|
"eval_precisions_1": 46.5954, |
|
"eval_precisions_2": 19.4652, |
|
"eval_precisions_3": 10.7538, |
|
"eval_precisions_4": 6.3671, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3742, |
|
"eval_rouge2": 0.1866, |
|
"eval_rougeL": 0.3582, |
|
"eval_rougeLsum": 0.3583, |
|
"eval_runtime": 451.92, |
|
"eval_samples_per_second": 4.877, |
|
"eval_steps_per_second": 1.219, |
|
"eval_sys_len": 17873, |
|
"eval_totals_1": 17873, |
|
"eval_totals_2": 15669, |
|
"eval_totals_3": 13465, |
|
"eval_totals_4": 11261, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3994, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_bleu": 12.8728, |
|
"eval_bp": 0.8152, |
|
"eval_counts_1": 8272, |
|
"eval_counts_2": 2998, |
|
"eval_counts_3": 1417, |
|
"eval_counts_4": 704, |
|
"eval_exact_match": 0.0213, |
|
"eval_f1": 0.3673, |
|
"eval_gen_len": 13.6956, |
|
"eval_loss": 1.6775314807891846, |
|
"eval_precisions_1": 46.8801, |
|
"eval_precisions_2": 19.4158, |
|
"eval_precisions_3": 10.7048, |
|
"eval_precisions_4": 6.3809, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3739, |
|
"eval_rouge2": 0.1866, |
|
"eval_rougeL": 0.3583, |
|
"eval_rougeLsum": 0.3581, |
|
"eval_runtime": 818.5079, |
|
"eval_samples_per_second": 2.693, |
|
"eval_steps_per_second": 0.673, |
|
"eval_sys_len": 17645, |
|
"eval_totals_1": 17645, |
|
"eval_totals_2": 15441, |
|
"eval_totals_3": 13237, |
|
"eval_totals_4": 11033, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3609, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"eval_bleu": 13.1569, |
|
"eval_bp": 0.8251, |
|
"eval_counts_1": 8347, |
|
"eval_counts_2": 3062, |
|
"eval_counts_3": 1465, |
|
"eval_counts_4": 723, |
|
"eval_exact_match": 0.0204, |
|
"eval_f1": 0.3692, |
|
"eval_gen_len": 13.7328, |
|
"eval_loss": 1.688394546508789, |
|
"eval_precisions_1": 46.8327, |
|
"eval_precisions_2": 19.6043, |
|
"eval_precisions_3": 10.9206, |
|
"eval_precisions_4": 6.449, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3761, |
|
"eval_rouge2": 0.1886, |
|
"eval_rougeL": 0.3601, |
|
"eval_rougeLsum": 0.3596, |
|
"eval_runtime": 834.1703, |
|
"eval_samples_per_second": 2.642, |
|
"eval_steps_per_second": 0.661, |
|
"eval_sys_len": 17823, |
|
"eval_totals_1": 17823, |
|
"eval_totals_2": 15619, |
|
"eval_totals_3": 13415, |
|
"eval_totals_4": 11211, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"step": 1440, |
|
"total_flos": 2.52283256045568e+17, |
|
"train_loss": 1.9421327537960476, |
|
"train_runtime": 22435.6962, |
|
"train_samples_per_second": 8.303, |
|
"train_steps_per_second": 0.064 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1440, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 2.52283256045568e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|