{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.922713610991842, "eval_steps": 500, "global_step": 2900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 0.0001, "loss": 5.5131, "step": 145 }, { "epoch": 1.0, "eval_bleu": 6.2485, "eval_bp": 0.7216, "eval_counts_1": 6032, "eval_counts_2": 1668, "eval_counts_3": 626, "eval_counts_4": 216, "eval_exact_match": 0.0018, "eval_f1": 0.2406, "eval_gen_len": 12.6166, "eval_loss": 1.8697563409805298, "eval_precisions_1": 37.6459, "eval_precisions_2": 12.0703, "eval_precisions_3": 5.3896, "eval_precisions_4": 2.2952, "eval_ref_len": 21250, "eval_rouge1": 0.2485, "eval_rouge2": 0.1011, "eval_rougeL": 0.2368, "eval_rougeLsum": 0.2366, "eval_runtime": 467.1177, "eval_samples_per_second": 4.718, "eval_steps_per_second": 1.18, "eval_sys_len": 16023, "eval_totals_1": 16023, "eval_totals_2": 13819, "eval_totals_3": 11615, "eval_totals_4": 9411, "step": 145 }, { "epoch": 2.0, "learning_rate": 0.0001, "loss": 2.3946, "step": 291 }, { "epoch": 2.0, "eval_bleu": 10.8315, "eval_bp": 0.7704, "eval_counts_1": 7325, "eval_counts_2": 2554, "eval_counts_3": 1178, "eval_counts_4": 558, "eval_exact_match": 0.0145, "eval_f1": 0.3148, "eval_gen_len": 12.2582, "eval_loss": 1.58878493309021, "eval_precisions_1": 43.4641, "eval_precisions_2": 17.4346, "eval_precisions_3": 9.4656, "eval_precisions_4": 5.4487, "eval_ref_len": 21250, "eval_rouge1": 0.3226, "eval_rouge2": 0.1585, "eval_rougeL": 0.31, "eval_rougeLsum": 0.31, "eval_runtime": 528.6481, "eval_samples_per_second": 4.169, "eval_steps_per_second": 1.042, "eval_sys_len": 16853, "eval_totals_1": 16853, "eval_totals_2": 14649, "eval_totals_3": 12445, "eval_totals_4": 10241, "step": 291 }, { "epoch": 3.0, "learning_rate": 0.0001, "loss": 2.0101, "step": 436 }, { "epoch": 3.0, "eval_bleu": 11.7891, "eval_bp": 0.7812, "eval_counts_1": 7623, "eval_counts_2": 2764, "eval_counts_3": 1304, "eval_counts_4": 629, "eval_exact_match": 0.0154, "eval_f1": 0.3315, "eval_gen_len": 12.6783, "eval_loss": 1.4997321367263794, "eval_precisions_1": 44.7307, "eval_precisions_2": 18.6278, "eval_precisions_3": 10.3214, "eval_precisions_4": 6.0307, "eval_ref_len": 21250, "eval_rouge1": 0.3403, "eval_rouge2": 0.1723, "eval_rougeL": 0.3263, "eval_rougeLsum": 0.3263, "eval_runtime": 451.1882, "eval_samples_per_second": 4.885, "eval_steps_per_second": 1.221, "eval_sys_len": 17042, "eval_totals_1": 17042, "eval_totals_2": 14838, "eval_totals_3": 12634, "eval_totals_4": 10430, "step": 436 }, { "epoch": 4.0, "learning_rate": 0.0001, "loss": 1.8073, "step": 582 }, { "epoch": 4.0, "eval_bleu": 12.6068, "eval_bp": 0.7588, "eval_counts_1": 7728, "eval_counts_2": 2916, "eval_counts_3": 1415, "eval_counts_4": 707, "eval_exact_match": 0.0168, "eval_f1": 0.3387, "eval_gen_len": 12.2963, "eval_loss": 1.4610050916671753, "eval_precisions_1": 46.4033, "eval_precisions_2": 20.1799, "eval_precisions_3": 11.5548, "eval_precisions_4": 7.0404, "eval_ref_len": 21250, "eval_rouge1": 0.3461, "eval_rouge2": 0.1818, "eval_rougeL": 0.3324, "eval_rougeLsum": 0.3326, "eval_runtime": 433.3953, "eval_samples_per_second": 5.085, "eval_steps_per_second": 1.271, "eval_sys_len": 16654, "eval_totals_1": 16654, "eval_totals_2": 14450, "eval_totals_3": 12246, "eval_totals_4": 10042, "step": 582 }, { "epoch": 4.99, "learning_rate": 0.0001, "loss": 1.6851, "step": 727 }, { "epoch": 4.99, "eval_bleu": 13.0784, "eval_bp": 0.8004, "eval_counts_1": 7964, "eval_counts_2": 3059, "eval_counts_3": 1483, "eval_counts_4": 727, "eval_exact_match": 0.0159, "eval_f1": 0.3483, "eval_gen_len": 12.7436, "eval_loss": 1.4356882572174072, "eval_precisions_1": 45.8201, "eval_precisions_2": 20.1555, "eval_precisions_3": 11.4314, "eval_precisions_4": 6.7509, "eval_ref_len": 21250, "eval_rouge1": 0.3558, "eval_rouge2": 0.1888, "eval_rougeL": 0.3415, "eval_rougeLsum": 0.3414, "eval_runtime": 452.1483, "eval_samples_per_second": 4.875, "eval_steps_per_second": 1.219, "eval_sys_len": 17381, "eval_totals_1": 17381, "eval_totals_2": 15177, "eval_totals_3": 12973, "eval_totals_4": 10769, "step": 727 }, { "epoch": 6.0, "learning_rate": 0.0001, "loss": 1.5642, "step": 873 }, { "epoch": 6.0, "eval_bleu": 13.9065, "eval_bp": 0.7987, "eval_counts_1": 8299, "eval_counts_2": 3224, "eval_counts_3": 1592, "eval_counts_4": 788, "eval_exact_match": 0.0204, "eval_f1": 0.3736, "eval_gen_len": 12.9569, "eval_loss": 1.4003357887268066, "eval_precisions_1": 47.8301, "eval_precisions_2": 21.2847, "eval_precisions_3": 12.3001, "eval_precisions_4": 7.3377, "eval_ref_len": 21250, "eval_rouge1": 0.3814, "eval_rouge2": 0.2025, "eval_rougeL": 0.3684, "eval_rougeLsum": 0.3685, "eval_runtime": 450.2054, "eval_samples_per_second": 4.896, "eval_steps_per_second": 1.224, "eval_sys_len": 17351, "eval_totals_1": 17351, "eval_totals_2": 15147, "eval_totals_3": 12943, "eval_totals_4": 10739, "step": 873 }, { "epoch": 6.99, "learning_rate": 0.0001, "loss": 1.4756, "step": 1018 }, { "epoch": 6.99, "eval_bleu": 14.9146, "eval_bp": 0.8165, "eval_counts_1": 8640, "eval_counts_2": 3430, "eval_counts_3": 1712, "eval_counts_4": 879, "eval_exact_match": 0.025, "eval_f1": 0.3892, "eval_gen_len": 13.1084, "eval_loss": 1.3778630495071411, "eval_precisions_1": 48.8992, "eval_precisions_2": 22.1791, "eval_precisions_3": 12.91, "eval_precisions_4": 7.9497, "eval_ref_len": 21250, "eval_rouge1": 0.3971, "eval_rouge2": 0.2133, "eval_rougeL": 0.3828, "eval_rougeLsum": 0.3826, "eval_runtime": 753.2935, "eval_samples_per_second": 2.926, "eval_steps_per_second": 0.731, "eval_sys_len": 17669, "eval_totals_1": 17669, "eval_totals_2": 15465, "eval_totals_3": 13261, "eval_totals_4": 11057, "step": 1018 }, { "epoch": 8.0, "learning_rate": 0.0001, "loss": 1.3792, "step": 1164 }, { "epoch": 8.0, "eval_bleu": 14.8859, "eval_bp": 0.8346, "eval_counts_1": 8732, "eval_counts_2": 3417, "eval_counts_3": 1712, "eval_counts_4": 871, "eval_exact_match": 0.0245, "eval_f1": 0.3917, "eval_gen_len": 13.3748, "eval_loss": 1.362410306930542, "eval_precisions_1": 48.5219, "eval_precisions_2": 21.6375, "eval_precisions_3": 12.5994, "eval_precisions_4": 7.6511, "eval_ref_len": 21250, "eval_rouge1": 0.4003, "eval_rouge2": 0.2131, "eval_rougeL": 0.3852, "eval_rougeLsum": 0.3849, "eval_runtime": 699.0977, "eval_samples_per_second": 3.153, "eval_steps_per_second": 0.788, "eval_sys_len": 17996, "eval_totals_1": 17996, "eval_totals_2": 15792, "eval_totals_3": 13588, "eval_totals_4": 11384, "step": 1164 }, { "epoch": 9.0, "learning_rate": 0.0001, "loss": 1.3133, "step": 1310 }, { "epoch": 9.0, "eval_bleu": 15.3264, "eval_bp": 0.8161, "eval_counts_1": 8804, "eval_counts_2": 3500, "eval_counts_3": 1754, "eval_counts_4": 920, "eval_exact_match": 0.025, "eval_f1": 0.4, "eval_gen_len": 13.2019, "eval_loss": 1.3630096912384033, "eval_precisions_1": 49.85, "eval_precisions_2": 22.6435, "eval_precisions_3": 13.2347, "eval_precisions_4": 8.3265, "eval_ref_len": 21250, "eval_rouge1": 0.4078, "eval_rouge2": 0.219, "eval_rougeL": 0.3932, "eval_rougeLsum": 0.3935, "eval_runtime": 465.2887, "eval_samples_per_second": 4.737, "eval_steps_per_second": 1.184, "eval_sys_len": 17661, "eval_totals_1": 17661, "eval_totals_2": 15457, "eval_totals_3": 13253, "eval_totals_4": 11049, "step": 1310 }, { "epoch": 10.0, "learning_rate": 0.0001, "loss": 1.261, "step": 1455 }, { "epoch": 10.0, "eval_bleu": 16.0163, "eval_bp": 0.8188, "eval_counts_1": 8910, "eval_counts_2": 3602, "eval_counts_3": 1849, "eval_counts_4": 1000, "eval_exact_match": 0.0295, "eval_f1": 0.4055, "eval_gen_len": 13.1892, "eval_loss": 1.3685479164123535, "eval_precisions_1": 50.3134, "eval_precisions_2": 23.2312, "eval_precisions_3": 13.9012, "eval_precisions_4": 9.0114, "eval_ref_len": 21250, "eval_rouge1": 0.4135, "eval_rouge2": 0.223, "eval_rougeL": 0.3991, "eval_rougeLsum": 0.3992, "eval_runtime": 491.3102, "eval_samples_per_second": 4.486, "eval_steps_per_second": 1.121, "eval_sys_len": 17709, "eval_totals_1": 17709, "eval_totals_2": 15505, "eval_totals_3": 13301, "eval_totals_4": 11097, "step": 1455 }, { "epoch": 11.0, "learning_rate": 0.0001, "loss": 1.1897, "step": 1601 }, { "epoch": 11.0, "eval_bleu": 16.3202, "eval_bp": 0.849, "eval_counts_1": 9096, "eval_counts_2": 3690, "eval_counts_3": 1902, "eval_counts_4": 1012, "eval_exact_match": 0.0281, "eval_f1": 0.4121, "eval_gen_len": 13.5077, "eval_loss": 1.3638867139816284, "eval_precisions_1": 49.8111, "eval_precisions_2": 22.9806, "eval_precisions_3": 13.7299, "eval_precisions_4": 8.6874, "eval_ref_len": 21250, "eval_rouge1": 0.4201, "eval_rouge2": 0.2289, "eval_rougeL": 0.4059, "eval_rougeLsum": 0.4057, "eval_runtime": 536.9399, "eval_samples_per_second": 4.105, "eval_steps_per_second": 1.026, "eval_sys_len": 18261, "eval_totals_1": 18261, "eval_totals_2": 16057, "eval_totals_3": 13853, "eval_totals_4": 11649, "step": 1601 }, { "epoch": 11.99, "learning_rate": 0.0001, "loss": 1.1453, "step": 1746 }, { "epoch": 11.99, "eval_bleu": 16.4772, "eval_bp": 0.8527, "eval_counts_1": 9106, "eval_counts_2": 3735, "eval_counts_3": 1932, "eval_counts_4": 1023, "eval_exact_match": 0.0281, "eval_f1": 0.4099, "eval_gen_len": 13.8013, "eval_loss": 1.3609519004821777, "eval_precisions_1": 49.6808, "eval_precisions_2": 23.1628, "eval_precisions_3": 13.8783, "eval_precisions_4": 8.7309, "eval_ref_len": 21250, "eval_rouge1": 0.4173, "eval_rouge2": 0.2303, "eval_rougeL": 0.4026, "eval_rougeLsum": 0.4025, "eval_runtime": 617.7899, "eval_samples_per_second": 3.568, "eval_steps_per_second": 0.892, "eval_sys_len": 18329, "eval_totals_1": 18329, "eval_totals_2": 16125, "eval_totals_3": 13921, "eval_totals_4": 11717, "step": 1746 }, { "epoch": 13.0, "learning_rate": 0.0001, "loss": 1.0858, "step": 1892 }, { "epoch": 13.0, "eval_bleu": 16.7204, "eval_bp": 0.8649, "eval_counts_1": 9245, "eval_counts_2": 3778, "eval_counts_3": 1955, "eval_counts_4": 1049, "eval_exact_match": 0.0322, "eval_f1": 0.417, "eval_gen_len": 13.8144, "eval_loss": 1.3716095685958862, "eval_precisions_1": 49.8222, "eval_precisions_2": 23.1042, "eval_precisions_3": 13.8182, "eval_precisions_4": 8.7827, "eval_ref_len": 21250, "eval_rouge1": 0.4244, "eval_rouge2": 0.2327, "eval_rougeL": 0.409, "eval_rougeLsum": 0.409, "eval_runtime": 504.2774, "eval_samples_per_second": 4.371, "eval_steps_per_second": 1.093, "eval_sys_len": 18556, "eval_totals_1": 18556, "eval_totals_2": 16352, "eval_totals_3": 14148, "eval_totals_4": 11944, "step": 1892 }, { "epoch": 13.99, "learning_rate": 0.0001, "loss": 1.0472, "step": 2037 }, { "epoch": 13.99, "eval_bleu": 16.6825, "eval_bp": 0.8519, "eval_counts_1": 9166, "eval_counts_2": 3756, "eval_counts_3": 1946, "eval_counts_4": 1054, "eval_exact_match": 0.0309, "eval_f1": 0.4143, "eval_gen_len": 13.8099, "eval_loss": 1.3770091533660889, "eval_precisions_1": 50.0464, "eval_precisions_2": 23.3133, "eval_precisions_3": 13.993, "eval_precisions_4": 9.0062, "eval_ref_len": 21250, "eval_rouge1": 0.4216, "eval_rouge2": 0.2311, "eval_rougeL": 0.4068, "eval_rougeLsum": 0.4067, "eval_runtime": 581.2707, "eval_samples_per_second": 3.792, "eval_steps_per_second": 0.948, "eval_sys_len": 18315, "eval_totals_1": 18315, "eval_totals_2": 16111, "eval_totals_3": 13907, "eval_totals_4": 11703, "step": 2037 }, { "epoch": 15.0, "learning_rate": 0.0001, "loss": 0.9953, "step": 2183 }, { "epoch": 15.0, "eval_bleu": 17.3937, "eval_bp": 0.842, "eval_counts_1": 9342, "eval_counts_2": 3926, "eval_counts_3": 2046, "eval_counts_4": 1108, "eval_exact_match": 0.0327, "eval_f1": 0.4258, "eval_gen_len": 13.5023, "eval_loss": 1.3880597352981567, "eval_precisions_1": 51.5222, "eval_precisions_2": 24.6484, "eval_precisions_3": 14.9082, "eval_precisions_4": 9.6181, "eval_ref_len": 21250, "eval_rouge1": 0.4328, "eval_rouge2": 0.2418, "eval_rougeL": 0.4171, "eval_rougeLsum": 0.4171, "eval_runtime": 718.2329, "eval_samples_per_second": 3.069, "eval_steps_per_second": 0.767, "eval_sys_len": 18132, "eval_totals_1": 18132, "eval_totals_2": 15928, "eval_totals_3": 13724, "eval_totals_4": 11520, "step": 2183 }, { "epoch": 16.0, "learning_rate": 0.0001, "loss": 0.9509, "step": 2329 }, { "epoch": 16.0, "eval_bleu": 17.1618, "eval_bp": 0.871, "eval_counts_1": 9330, "eval_counts_2": 3894, "eval_counts_3": 2024, "eval_counts_4": 1084, "eval_exact_match": 0.0313, "eval_f1": 0.4198, "eval_gen_len": 13.956, "eval_loss": 1.401639461517334, "eval_precisions_1": 49.9679, "eval_precisions_2": 23.6459, "eval_precisions_3": 14.1896, "eval_precisions_4": 8.9884, "eval_ref_len": 21250, "eval_rouge1": 0.4269, "eval_rouge2": 0.237, "eval_rougeL": 0.4123, "eval_rougeLsum": 0.4122, "eval_runtime": 632.3222, "eval_samples_per_second": 3.486, "eval_steps_per_second": 0.871, "eval_sys_len": 18672, "eval_totals_1": 18672, "eval_totals_2": 16468, "eval_totals_3": 14264, "eval_totals_4": 12060, "step": 2329 }, { "epoch": 17.0, "learning_rate": 0.0001, "loss": 0.9183, "step": 2474 }, { "epoch": 17.0, "eval_bleu": 16.995, "eval_bp": 0.8606, "eval_counts_1": 9303, "eval_counts_2": 3824, "eval_counts_3": 1979, "eval_counts_4": 1084, "eval_exact_match": 0.0327, "eval_f1": 0.4199, "eval_gen_len": 13.7854, "eval_loss": 1.4152026176452637, "eval_precisions_1": 50.3518, "eval_precisions_2": 23.5005, "eval_precisions_3": 14.0674, "eval_precisions_4": 9.1369, "eval_ref_len": 21250, "eval_rouge1": 0.4269, "eval_rouge2": 0.2345, "eval_rougeL": 0.4121, "eval_rougeLsum": 0.4122, "eval_runtime": 466.5423, "eval_samples_per_second": 4.724, "eval_steps_per_second": 1.181, "eval_sys_len": 18476, "eval_totals_1": 18476, "eval_totals_2": 16272, "eval_totals_3": 14068, "eval_totals_4": 11864, "step": 2474 }, { "epoch": 18.0, "learning_rate": 0.0001, "loss": 0.8696, "step": 2620 }, { "epoch": 18.0, "eval_bleu": 16.9541, "eval_bp": 0.8554, "eval_counts_1": 9184, "eval_counts_2": 3798, "eval_counts_3": 1993, "eval_counts_4": 1085, "eval_exact_match": 0.034, "eval_f1": 0.4148, "eval_gen_len": 13.726, "eval_loss": 1.44040048122406, "eval_precisions_1": 49.9701, "eval_precisions_2": 23.4807, "eval_precisions_3": 14.2653, "eval_precisions_4": 9.2207, "eval_ref_len": 21250, "eval_rouge1": 0.4218, "eval_rouge2": 0.2333, "eval_rougeL": 0.4076, "eval_rougeLsum": 0.4074, "eval_runtime": 470.6343, "eval_samples_per_second": 4.683, "eval_steps_per_second": 1.171, "eval_sys_len": 18379, "eval_totals_1": 18379, "eval_totals_2": 16175, "eval_totals_3": 13971, "eval_totals_4": 11767, "step": 2620 }, { "epoch": 19.0, "learning_rate": 0.0001, "loss": 0.8389, "step": 2765 }, { "epoch": 19.0, "eval_bleu": 17.67, "eval_bp": 0.8885, "eval_counts_1": 9476, "eval_counts_2": 4000, "eval_counts_3": 2092, "eval_counts_4": 1139, "eval_exact_match": 0.0299, "eval_f1": 0.4239, "eval_gen_len": 14.2064, "eval_loss": 1.4360300302505493, "eval_precisions_1": 49.8658, "eval_precisions_2": 23.8109, "eval_precisions_3": 14.3337, "eval_precisions_4": 9.1922, "eval_ref_len": 21250, "eval_rouge1": 0.4307, "eval_rouge2": 0.2406, "eval_rougeL": 0.4161, "eval_rougeLsum": 0.416, "eval_runtime": 480.4816, "eval_samples_per_second": 4.587, "eval_steps_per_second": 1.147, "eval_sys_len": 19003, "eval_totals_1": 19003, "eval_totals_2": 16799, "eval_totals_3": 14595, "eval_totals_4": 12391, "step": 2765 }, { "epoch": 19.92, "learning_rate": 0.0001, "loss": 0.7993, "step": 2900 }, { "epoch": 19.92, "eval_bleu": 17.5799, "eval_bp": 0.8747, "eval_counts_1": 9464, "eval_counts_2": 3970, "eval_counts_3": 2078, "eval_counts_4": 1126, "eval_exact_match": 0.0327, "eval_f1": 0.4269, "eval_gen_len": 13.9959, "eval_loss": 1.454466700553894, "eval_precisions_1": 50.4989, "eval_precisions_2": 24.0068, "eval_precisions_3": 14.498, "eval_precisions_4": 9.2835, "eval_ref_len": 21250, "eval_rouge1": 0.4349, "eval_rouge2": 0.2424, "eval_rougeL": 0.4194, "eval_rougeLsum": 0.4192, "eval_runtime": 476.8512, "eval_samples_per_second": 4.622, "eval_steps_per_second": 1.155, "eval_sys_len": 18741, "eval_totals_1": 18741, "eval_totals_2": 16537, "eval_totals_3": 14333, "eval_totals_4": 12129, "step": 2900 }, { "epoch": 19.92, "step": 2900, "total_flos": 4.449947965854843e+17, "train_loss": 1.5141178552035628, "train_runtime": 27637.7455, "train_samples_per_second": 6.74, "train_steps_per_second": 0.105 } ], "logging_steps": 500, "max_steps": 2900, "num_train_epochs": 20, "save_steps": 500, "total_flos": 4.449947965854843e+17, "trial_name": null, "trial_params": null }