{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.8887123564671047, "eval_steps": 500, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 1.9986295347545738e-05, "loss": 4.4247, "step": 500 }, { "epoch": 0.07, "eval_gen_len": 6.267, "eval_loss": 3.5801377296447754, "eval_rouge1": 27.9882, "eval_rouge2": 8.9331, "eval_rougeL": 27.6011, "eval_rougeLsum": 27.5353, "eval_runtime": 1554.6657, "eval_samples_per_second": 1.93, "eval_steps_per_second": 0.482, "step": 500 }, { "epoch": 0.14, "learning_rate": 1.9945218953682736e-05, "loss": 2.959, "step": 1000 }, { "epoch": 0.14, "eval_gen_len": 6.267333333333333, "eval_loss": 3.238922119140625, "eval_rouge1": 34.4913, "eval_rouge2": 14.1739, "eval_rougeL": 34.2505, "eval_rougeLsum": 34.1717, "eval_runtime": 1549.8046, "eval_samples_per_second": 1.936, "eval_steps_per_second": 0.484, "step": 1000 }, { "epoch": 0.22, "learning_rate": 1.9876883405951378e-05, "loss": 2.7367, "step": 1500 }, { "epoch": 0.22, "eval_gen_len": 6.267666666666667, "eval_loss": 2.983262300491333, "eval_rouge1": 40.5052, "eval_rouge2": 18.725, "eval_rougeL": 40.1333, "eval_rougeLsum": 40.0554, "eval_runtime": 1542.1597, "eval_samples_per_second": 1.945, "eval_steps_per_second": 0.486, "step": 1500 }, { "epoch": 0.29, "learning_rate": 1.9781476007338058e-05, "loss": 2.553, "step": 2000 }, { "epoch": 0.29, "eval_gen_len": 6.267333333333333, "eval_loss": 2.781419277191162, "eval_rouge1": 45.972, "eval_rouge2": 24.75, "eval_rougeL": 45.6989, "eval_rougeLsum": 45.6257, "eval_runtime": 1547.6212, "eval_samples_per_second": 1.938, "eval_steps_per_second": 0.485, "step": 2000 }, { "epoch": 0.36, "learning_rate": 1.9659258262890683e-05, "loss": 2.3988, "step": 2500 }, { "epoch": 0.36, "eval_gen_len": 6.267666666666667, "eval_loss": 2.6185145378112793, "eval_rouge1": 51.3237, "eval_rouge2": 30.8584, "eval_rougeL": 51.0697, "eval_rougeLsum": 50.9947, "eval_runtime": 1538.5542, "eval_samples_per_second": 1.95, "eval_steps_per_second": 0.487, "step": 2500 }, { "epoch": 0.43, "learning_rate": 1.9510565162951538e-05, "loss": 2.2788, "step": 3000 }, { "epoch": 0.43, "eval_gen_len": 6.267666666666667, "eval_loss": 2.4821877479553223, "eval_rouge1": 55.054, "eval_rouge2": 35.9558, "eval_rougeL": 54.885, "eval_rougeLsum": 54.8263, "eval_runtime": 1535.9732, "eval_samples_per_second": 1.953, "eval_steps_per_second": 0.488, "step": 3000 }, { "epoch": 0.51, "learning_rate": 1.9335804264972018e-05, "loss": 2.185, "step": 3500 }, { "epoch": 0.51, "eval_gen_len": 6.267666666666667, "eval_loss": 2.372607469558716, "eval_rouge1": 58.4125, "eval_rouge2": 39.7017, "eval_rougeL": 58.2864, "eval_rougeLsum": 58.2323, "eval_runtime": 1555.498, "eval_samples_per_second": 1.929, "eval_steps_per_second": 0.482, "step": 3500 }, { "epoch": 0.58, "learning_rate": 1.913545457642601e-05, "loss": 2.1024, "step": 4000 }, { "epoch": 0.58, "eval_gen_len": 6.267666666666667, "eval_loss": 2.2966315746307373, "eval_rouge1": 60.2269, "eval_rouge2": 42.343, "eval_rougeL": 60.1064, "eval_rougeLsum": 60.015, "eval_runtime": 1539.5016, "eval_samples_per_second": 1.949, "eval_steps_per_second": 0.487, "step": 4000 }, { "epoch": 0.65, "learning_rate": 1.891006524188368e-05, "loss": 2.0395, "step": 4500 }, { "epoch": 0.65, "eval_gen_len": 6.267666666666667, "eval_loss": 2.2079367637634277, "eval_rouge1": 63.9442, "eval_rouge2": 47.1262, "eval_rougeL": 63.8226, "eval_rougeLsum": 63.783, "eval_runtime": 1534.6718, "eval_samples_per_second": 1.955, "eval_steps_per_second": 0.489, "step": 4500 }, { "epoch": 0.72, "learning_rate": 1.866025403784439e-05, "loss": 1.9929, "step": 5000 }, { "epoch": 0.72, "eval_gen_len": 6.267666666666667, "eval_loss": 2.142320156097412, "eval_rouge1": 66.0535, "eval_rouge2": 49.403, "eval_rougeL": 65.9837, "eval_rougeLsum": 65.9295, "eval_runtime": 1531.6986, "eval_samples_per_second": 1.959, "eval_steps_per_second": 0.49, "step": 5000 }, { "epoch": 0.79, "learning_rate": 1.8386705679454243e-05, "loss": 1.944, "step": 5500 }, { "epoch": 0.79, "eval_gen_len": 6.267666666666667, "eval_loss": 2.094369888305664, "eval_rouge1": 67.4662, "eval_rouge2": 50.9242, "eval_rougeL": 67.3906, "eval_rougeLsum": 67.3416, "eval_runtime": 1527.1607, "eval_samples_per_second": 1.964, "eval_steps_per_second": 0.491, "step": 5500 }, { "epoch": 0.87, "learning_rate": 1.8090169943749477e-05, "loss": 1.902, "step": 6000 }, { "epoch": 0.87, "eval_gen_len": 6.267666666666667, "eval_loss": 2.0382838249206543, "eval_rouge1": 69.3267, "eval_rouge2": 53.8395, "eval_rougeL": 69.2759, "eval_rougeLsum": 69.2151, "eval_runtime": 1532.3017, "eval_samples_per_second": 1.958, "eval_steps_per_second": 0.489, "step": 6000 }, { "epoch": 0.94, "learning_rate": 1.777145961456971e-05, "loss": 1.8712, "step": 6500 }, { "epoch": 0.94, "eval_gen_len": 6.267666666666667, "eval_loss": 2.010657548904419, "eval_rouge1": 70.2771, "eval_rouge2": 54.5208, "eval_rougeL": 70.2492, "eval_rougeLsum": 70.2095, "eval_runtime": 1563.3117, "eval_samples_per_second": 1.919, "eval_steps_per_second": 0.48, "step": 6500 }, { "epoch": 1.01, "learning_rate": 1.7431448254773943e-05, "loss": 1.8387, "step": 7000 }, { "epoch": 1.01, "eval_gen_len": 6.267666666666667, "eval_loss": 1.9761910438537598, "eval_rouge1": 71.145, "eval_rouge2": 56.4319, "eval_rougeL": 71.1008, "eval_rougeLsum": 71.0876, "eval_runtime": 1552.4923, "eval_samples_per_second": 1.932, "eval_steps_per_second": 0.483, "step": 7000 }, { "epoch": 1.08, "learning_rate": 1.7071067811865477e-05, "loss": 1.7558, "step": 7500 }, { "epoch": 1.08, "eval_gen_len": 6.267666666666667, "eval_loss": 1.9575979709625244, "eval_rouge1": 72.527, "eval_rouge2": 58.0461, "eval_rougeL": 72.4801, "eval_rougeLsum": 72.461, "eval_runtime": 1522.5751, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.493, "step": 7500 }, { "epoch": 1.16, "learning_rate": 1.6691306063588583e-05, "loss": 1.7363, "step": 8000 }, { "epoch": 1.16, "eval_gen_len": 6.267666666666667, "eval_loss": 1.9305455684661865, "eval_rouge1": 73.3884, "eval_rouge2": 59.6248, "eval_rougeL": 73.3396, "eval_rougeLsum": 73.3362, "eval_runtime": 1541.3792, "eval_samples_per_second": 1.946, "eval_steps_per_second": 0.487, "step": 8000 }, { "epoch": 1.23, "learning_rate": 1.6293203910498375e-05, "loss": 1.7245, "step": 8500 }, { "epoch": 1.23, "eval_gen_len": 6.267666666666667, "eval_loss": 1.9158198833465576, "eval_rouge1": 73.3565, "eval_rouge2": 58.7585, "eval_rougeL": 73.3541, "eval_rougeLsum": 73.3668, "eval_runtime": 1546.9117, "eval_samples_per_second": 1.939, "eval_steps_per_second": 0.485, "step": 8500 }, { "epoch": 1.3, "learning_rate": 1.5877852522924733e-05, "loss": 1.7147, "step": 9000 }, { "epoch": 1.3, "eval_gen_len": 6.267666666666667, "eval_loss": 1.898065447807312, "eval_rouge1": 74.1688, "eval_rouge2": 59.9465, "eval_rougeL": 74.1934, "eval_rougeLsum": 74.1447, "eval_runtime": 1537.688, "eval_samples_per_second": 1.951, "eval_steps_per_second": 0.488, "step": 9000 }, { "epoch": 1.37, "learning_rate": 1.5446390350150272e-05, "loss": 1.7013, "step": 9500 }, { "epoch": 1.37, "eval_gen_len": 6.267666666666667, "eval_loss": 1.8747327327728271, "eval_rouge1": 75.1266, "eval_rouge2": 61.394, "eval_rougeL": 75.128, "eval_rougeLsum": 75.0856, "eval_runtime": 1526.768, "eval_samples_per_second": 1.965, "eval_steps_per_second": 0.491, "step": 9500 }, { "epoch": 1.44, "learning_rate": 1.5000000000000002e-05, "loss": 1.6906, "step": 10000 }, { "epoch": 1.44, "eval_gen_len": 6.267666666666667, "eval_loss": 1.8567513227462769, "eval_rouge1": 75.8503, "eval_rouge2": 62.2772, "eval_rougeL": 75.8449, "eval_rougeLsum": 75.8138, "eval_runtime": 1518.2689, "eval_samples_per_second": 1.976, "eval_steps_per_second": 0.494, "step": 10000 }, { "epoch": 1.52, "learning_rate": 1.4539904997395468e-05, "loss": 1.6827, "step": 10500 }, { "epoch": 1.52, "eval_gen_len": 6.267666666666667, "eval_loss": 1.8478548526763916, "eval_rouge1": 75.7687, "eval_rouge2": 62.3363, "eval_rougeL": 75.7808, "eval_rougeLsum": 75.7533, "eval_runtime": 1520.7572, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.493, "step": 10500 }, { "epoch": 1.59, "learning_rate": 1.4067366430758004e-05, "loss": 1.6808, "step": 11000 }, { "epoch": 1.59, "eval_gen_len": 6.267666666666667, "eval_loss": 1.8268691301345825, "eval_rouge1": 76.5635, "eval_rouge2": 63.5572, "eval_rougeL": 76.5759, "eval_rougeLsum": 76.5281, "eval_runtime": 1529.0949, "eval_samples_per_second": 1.962, "eval_steps_per_second": 0.49, "step": 11000 }, { "epoch": 1.66, "learning_rate": 1.3583679495453e-05, "loss": 1.6747, "step": 11500 }, { "epoch": 1.66, "eval_gen_len": 6.267666666666667, "eval_loss": 1.8204782009124756, "eval_rouge1": 76.0665, "eval_rouge2": 62.7626, "eval_rougeL": 76.0441, "eval_rougeLsum": 76.0155, "eval_runtime": 1521.0486, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.493, "step": 11500 }, { "epoch": 1.73, "learning_rate": 1.3090169943749475e-05, "loss": 1.6628, "step": 12000 }, { "epoch": 1.73, "eval_gen_len": 6.267666666666667, "eval_loss": 1.8059669733047485, "eval_rouge1": 76.6687, "eval_rouge2": 63.8953, "eval_rougeL": 76.678, "eval_rougeLsum": 76.6457, "eval_runtime": 1519.6043, "eval_samples_per_second": 1.974, "eval_steps_per_second": 0.494, "step": 12000 }, { "epoch": 1.81, "learning_rate": 1.2588190451025209e-05, "loss": 1.6517, "step": 12500 }, { "epoch": 1.81, "eval_gen_len": 6.267666666666667, "eval_loss": 1.7973886728286743, "eval_rouge1": 77.5432, "eval_rouge2": 64.831, "eval_rougeL": 77.5477, "eval_rougeLsum": 77.5112, "eval_runtime": 1514.3733, "eval_samples_per_second": 1.981, "eval_steps_per_second": 0.495, "step": 12500 }, { "epoch": 1.88, "learning_rate": 1.2079116908177592e-05, "loss": 1.6493, "step": 13000 }, { "epoch": 1.88, "eval_gen_len": 6.267666666666667, "eval_loss": 1.7867239713668823, "eval_rouge1": 77.6195, "eval_rouge2": 64.9393, "eval_rougeL": 77.5908, "eval_rougeLsum": 77.5498, "eval_runtime": 1514.8609, "eval_samples_per_second": 1.98, "eval_steps_per_second": 0.495, "step": 13000 }, { "epoch": 1.95, "learning_rate": 1.156434465040231e-05, "loss": 1.6435, "step": 13500 }, { "epoch": 1.95, "eval_gen_len": 6.267666666666667, "eval_loss": 1.780102252960205, "eval_rouge1": 77.8512, "eval_rouge2": 65.2471, "eval_rougeL": 77.8573, "eval_rougeLsum": 77.8118, "eval_runtime": 1516.9222, "eval_samples_per_second": 1.978, "eval_steps_per_second": 0.494, "step": 13500 }, { "epoch": 2.02, "learning_rate": 1.1045284632676535e-05, "loss": 1.6189, "step": 14000 }, { "epoch": 2.02, "eval_gen_len": 6.267666666666667, "eval_loss": 1.7743651866912842, "eval_rouge1": 78.5967, "eval_rouge2": 66.4212, "eval_rougeL": 78.6125, "eval_rougeLsum": 78.5741, "eval_runtime": 1518.2132, "eval_samples_per_second": 1.976, "eval_steps_per_second": 0.494, "step": 14000 }, { "epoch": 2.09, "learning_rate": 1.0523359562429441e-05, "loss": 1.5746, "step": 14500 }, { "epoch": 2.09, "eval_gen_len": 6.267666666666667, "eval_loss": 1.7696142196655273, "eval_rouge1": 78.7375, "eval_rouge2": 67.0042, "eval_rougeL": 78.7704, "eval_rougeLsum": 78.6912, "eval_runtime": 1504.5573, "eval_samples_per_second": 1.994, "eval_steps_per_second": 0.498, "step": 14500 }, { "epoch": 2.17, "learning_rate": 1e-05, "loss": 1.5767, "step": 15000 }, { "epoch": 2.17, "eval_gen_len": 6.267666666666667, "eval_loss": 1.7685412168502808, "eval_rouge1": 78.1583, "eval_rouge2": 66.0184, "eval_rougeL": 78.1256, "eval_rougeLsum": 78.1273, "eval_runtime": 1504.2381, "eval_samples_per_second": 1.994, "eval_steps_per_second": 0.499, "step": 15000 }, { "epoch": 2.24, "learning_rate": 9.476640437570562e-06, "loss": 1.5713, "step": 15500 }, { "epoch": 2.24, "eval_gen_len": 6.267666666666667, "eval_loss": 1.7626044750213623, "eval_rouge1": 78.6909, "eval_rouge2": 66.7862, "eval_rougeL": 78.7168, "eval_rougeLsum": 78.671, "eval_runtime": 1509.684, "eval_samples_per_second": 1.987, "eval_steps_per_second": 0.497, "step": 15500 }, { "epoch": 2.31, "learning_rate": 8.954715367323468e-06, "loss": 1.5731, "step": 16000 }, { "epoch": 2.31, "eval_gen_len": 6.267666666666667, "eval_loss": 1.752744436264038, "eval_rouge1": 78.9605, "eval_rouge2": 67.084, "eval_rougeL": 78.9504, "eval_rougeLsum": 78.9289, "eval_runtime": 1512.7838, "eval_samples_per_second": 1.983, "eval_steps_per_second": 0.496, "step": 16000 }, { "epoch": 2.38, "learning_rate": 8.43565534959769e-06, "loss": 1.5683, "step": 16500 }, { "epoch": 2.38, "eval_gen_len": 6.267666666666667, "eval_loss": 1.748329758644104, "eval_rouge1": 79.0099, "eval_rouge2": 67.1613, "eval_rougeL": 79.0228, "eval_rougeLsum": 79.0093, "eval_runtime": 1516.6027, "eval_samples_per_second": 1.978, "eval_steps_per_second": 0.495, "step": 16500 }, { "epoch": 2.46, "learning_rate": 7.92088309182241e-06, "loss": 1.5684, "step": 17000 }, { "epoch": 2.46, "eval_gen_len": 6.267666666666667, "eval_loss": 1.7434362173080444, "eval_rouge1": 78.9853, "eval_rouge2": 67.0167, "eval_rougeL": 78.9846, "eval_rougeLsum": 78.9721, "eval_runtime": 1523.8295, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.492, "step": 17000 }, { "epoch": 2.53, "learning_rate": 7.411809548974792e-06, "loss": 1.5653, "step": 17500 }, { "epoch": 2.53, "eval_gen_len": 6.267666666666667, "eval_loss": 1.7393466234207153, "eval_rouge1": 79.413, "eval_rouge2": 67.6295, "eval_rougeL": 79.4249, "eval_rougeLsum": 79.4027, "eval_runtime": 1511.8366, "eval_samples_per_second": 1.984, "eval_steps_per_second": 0.496, "step": 17500 }, { "epoch": 2.6, "learning_rate": 6.909830056250527e-06, "loss": 1.5584, "step": 18000 }, { "epoch": 2.6, "eval_gen_len": 6.267666666666667, "eval_loss": 1.735812783241272, "eval_rouge1": 79.3295, "eval_rouge2": 67.4641, "eval_rougeL": 79.3042, "eval_rougeLsum": 79.3028, "eval_runtime": 1515.6868, "eval_samples_per_second": 1.979, "eval_steps_per_second": 0.495, "step": 18000 }, { "epoch": 2.67, "learning_rate": 6.4163205045469975e-06, "loss": 1.5567, "step": 18500 }, { "epoch": 2.67, "eval_gen_len": 6.267666666666667, "eval_loss": 1.7302848100662231, "eval_rouge1": 80.0165, "eval_rouge2": 68.3289, "eval_rougeL": 80.0436, "eval_rougeLsum": 80.019, "eval_runtime": 1522.3815, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.493, "step": 18500 }, { "epoch": 2.74, "learning_rate": 5.932633569242e-06, "loss": 1.5564, "step": 19000 }, { "epoch": 2.74, "eval_gen_len": 6.267666666666667, "eval_loss": 1.7278697490692139, "eval_rouge1": 79.8733, "eval_rouge2": 68.1584, "eval_rougeL": 79.868, "eval_rougeLsum": 79.8511, "eval_runtime": 1516.0739, "eval_samples_per_second": 1.979, "eval_steps_per_second": 0.495, "step": 19000 }, { "epoch": 2.82, "learning_rate": 5.460095002604533e-06, "loss": 1.553, "step": 19500 }, { "epoch": 2.82, "eval_gen_len": 6.267666666666667, "eval_loss": 1.7226474285125732, "eval_rouge1": 79.9727, "eval_rouge2": 68.2612, "eval_rougeL": 79.9901, "eval_rougeLsum": 79.9554, "eval_runtime": 1525.8223, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.492, "step": 19500 }, { "epoch": 2.89, "learning_rate": 5.000000000000003e-06, "loss": 1.5498, "step": 20000 }, { "epoch": 2.89, "eval_gen_len": 6.267666666666667, "eval_loss": 1.720814824104309, "eval_rouge1": 79.9937, "eval_rouge2": 68.221, "eval_rougeL": 79.9703, "eval_rougeLsum": 79.9696, "eval_runtime": 1519.3409, "eval_samples_per_second": 1.975, "eval_steps_per_second": 0.494, "step": 20000 } ], "logging_steps": 500, "max_steps": 30000, "num_train_epochs": 5, "save_steps": 2500, "total_flos": 8.228710189060075e+16, "trial_name": null, "trial_params": null }