{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4443561782335523, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 1.9986295347545738e-05, "loss": 4.4247, "step": 500 }, { "epoch": 0.07, "eval_gen_len": 6.267, "eval_loss": 3.5801377296447754, "eval_rouge1": 27.9882, "eval_rouge2": 8.9331, "eval_rougeL": 27.6011, "eval_rougeLsum": 27.5353, "eval_runtime": 1554.6657, "eval_samples_per_second": 1.93, "eval_steps_per_second": 0.482, "step": 500 }, { "epoch": 0.14, "learning_rate": 1.9945218953682736e-05, "loss": 2.959, "step": 1000 }, { "epoch": 0.14, "eval_gen_len": 6.267333333333333, "eval_loss": 3.238922119140625, "eval_rouge1": 34.4913, "eval_rouge2": 14.1739, "eval_rougeL": 34.2505, "eval_rougeLsum": 34.1717, "eval_runtime": 1549.8046, "eval_samples_per_second": 1.936, "eval_steps_per_second": 0.484, "step": 1000 }, { "epoch": 0.22, "learning_rate": 1.9876883405951378e-05, "loss": 2.7367, "step": 1500 }, { "epoch": 0.22, "eval_gen_len": 6.267666666666667, "eval_loss": 2.983262300491333, "eval_rouge1": 40.5052, "eval_rouge2": 18.725, "eval_rougeL": 40.1333, "eval_rougeLsum": 40.0554, "eval_runtime": 1542.1597, "eval_samples_per_second": 1.945, "eval_steps_per_second": 0.486, "step": 1500 }, { "epoch": 0.29, "learning_rate": 1.9781476007338058e-05, "loss": 2.553, "step": 2000 }, { "epoch": 0.29, "eval_gen_len": 6.267333333333333, "eval_loss": 2.781419277191162, "eval_rouge1": 45.972, "eval_rouge2": 24.75, "eval_rougeL": 45.6989, "eval_rougeLsum": 45.6257, "eval_runtime": 1547.6212, "eval_samples_per_second": 1.938, "eval_steps_per_second": 0.485, "step": 2000 }, { "epoch": 0.36, "learning_rate": 1.9659258262890683e-05, "loss": 2.3988, "step": 2500 }, { "epoch": 0.36, "eval_gen_len": 6.267666666666667, "eval_loss": 2.6185145378112793, "eval_rouge1": 51.3237, "eval_rouge2": 30.8584, "eval_rougeL": 51.0697, "eval_rougeLsum": 50.9947, "eval_runtime": 1538.5542, "eval_samples_per_second": 1.95, "eval_steps_per_second": 0.487, "step": 2500 }, { "epoch": 0.43, "learning_rate": 1.9510565162951538e-05, "loss": 2.2788, "step": 3000 }, { "epoch": 0.43, "eval_gen_len": 6.267666666666667, "eval_loss": 2.4821877479553223, "eval_rouge1": 55.054, "eval_rouge2": 35.9558, "eval_rougeL": 54.885, "eval_rougeLsum": 54.8263, "eval_runtime": 1535.9732, "eval_samples_per_second": 1.953, "eval_steps_per_second": 0.488, "step": 3000 }, { "epoch": 0.51, "learning_rate": 1.9335804264972018e-05, "loss": 2.185, "step": 3500 }, { "epoch": 0.51, "eval_gen_len": 6.267666666666667, "eval_loss": 2.372607469558716, "eval_rouge1": 58.4125, "eval_rouge2": 39.7017, "eval_rougeL": 58.2864, "eval_rougeLsum": 58.2323, "eval_runtime": 1555.498, "eval_samples_per_second": 1.929, "eval_steps_per_second": 0.482, "step": 3500 }, { "epoch": 0.58, "learning_rate": 1.913545457642601e-05, "loss": 2.1024, "step": 4000 }, { "epoch": 0.58, "eval_gen_len": 6.267666666666667, "eval_loss": 2.2966315746307373, "eval_rouge1": 60.2269, "eval_rouge2": 42.343, "eval_rougeL": 60.1064, "eval_rougeLsum": 60.015, "eval_runtime": 1539.5016, "eval_samples_per_second": 1.949, "eval_steps_per_second": 0.487, "step": 4000 }, { "epoch": 0.65, "learning_rate": 1.891006524188368e-05, "loss": 2.0395, "step": 4500 }, { "epoch": 0.65, "eval_gen_len": 6.267666666666667, "eval_loss": 2.2079367637634277, "eval_rouge1": 63.9442, "eval_rouge2": 47.1262, "eval_rougeL": 63.8226, "eval_rougeLsum": 63.783, "eval_runtime": 1534.6718, "eval_samples_per_second": 1.955, "eval_steps_per_second": 0.489, "step": 4500 }, { "epoch": 0.72, "learning_rate": 1.866025403784439e-05, "loss": 1.9929, "step": 5000 }, { "epoch": 0.72, "eval_gen_len": 6.267666666666667, "eval_loss": 2.142320156097412, "eval_rouge1": 66.0535, "eval_rouge2": 49.403, "eval_rougeL": 65.9837, "eval_rougeLsum": 65.9295, "eval_runtime": 1531.6986, "eval_samples_per_second": 1.959, "eval_steps_per_second": 0.49, "step": 5000 }, { "epoch": 0.79, "learning_rate": 1.8386705679454243e-05, "loss": 1.944, "step": 5500 }, { "epoch": 0.79, "eval_gen_len": 6.267666666666667, "eval_loss": 2.094369888305664, "eval_rouge1": 67.4662, "eval_rouge2": 50.9242, "eval_rougeL": 67.3906, "eval_rougeLsum": 67.3416, "eval_runtime": 1527.1607, "eval_samples_per_second": 1.964, "eval_steps_per_second": 0.491, "step": 5500 }, { "epoch": 0.87, "learning_rate": 1.8090169943749477e-05, "loss": 1.902, "step": 6000 }, { "epoch": 0.87, "eval_gen_len": 6.267666666666667, "eval_loss": 2.0382838249206543, "eval_rouge1": 69.3267, "eval_rouge2": 53.8395, "eval_rougeL": 69.2759, "eval_rougeLsum": 69.2151, "eval_runtime": 1532.3017, "eval_samples_per_second": 1.958, "eval_steps_per_second": 0.489, "step": 6000 }, { "epoch": 0.94, "learning_rate": 1.777145961456971e-05, "loss": 1.8712, "step": 6500 }, { "epoch": 0.94, "eval_gen_len": 6.267666666666667, "eval_loss": 2.010657548904419, "eval_rouge1": 70.2771, "eval_rouge2": 54.5208, "eval_rougeL": 70.2492, "eval_rougeLsum": 70.2095, "eval_runtime": 1563.3117, "eval_samples_per_second": 1.919, "eval_steps_per_second": 0.48, "step": 6500 }, { "epoch": 1.01, "learning_rate": 1.7431448254773943e-05, "loss": 1.8387, "step": 7000 }, { "epoch": 1.01, "eval_gen_len": 6.267666666666667, "eval_loss": 1.9761910438537598, "eval_rouge1": 71.145, "eval_rouge2": 56.4319, "eval_rougeL": 71.1008, "eval_rougeLsum": 71.0876, "eval_runtime": 1552.4923, "eval_samples_per_second": 1.932, "eval_steps_per_second": 0.483, "step": 7000 }, { "epoch": 1.08, "learning_rate": 1.7071067811865477e-05, "loss": 1.7558, "step": 7500 }, { "epoch": 1.08, "eval_gen_len": 6.267666666666667, "eval_loss": 1.9575979709625244, "eval_rouge1": 72.527, "eval_rouge2": 58.0461, "eval_rougeL": 72.4801, "eval_rougeLsum": 72.461, "eval_runtime": 1522.5751, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.493, "step": 7500 }, { "epoch": 1.16, "learning_rate": 1.6691306063588583e-05, "loss": 1.7363, "step": 8000 }, { "epoch": 1.16, "eval_gen_len": 6.267666666666667, "eval_loss": 1.9305455684661865, "eval_rouge1": 73.3884, "eval_rouge2": 59.6248, "eval_rougeL": 73.3396, "eval_rougeLsum": 73.3362, "eval_runtime": 1541.3792, "eval_samples_per_second": 1.946, "eval_steps_per_second": 0.487, "step": 8000 }, { "epoch": 1.23, "learning_rate": 1.6293203910498375e-05, "loss": 1.7245, "step": 8500 }, { "epoch": 1.23, "eval_gen_len": 6.267666666666667, "eval_loss": 1.9158198833465576, "eval_rouge1": 73.3565, "eval_rouge2": 58.7585, "eval_rougeL": 73.3541, "eval_rougeLsum": 73.3668, "eval_runtime": 1546.9117, "eval_samples_per_second": 1.939, "eval_steps_per_second": 0.485, "step": 8500 }, { "epoch": 1.3, "learning_rate": 1.5877852522924733e-05, "loss": 1.7147, "step": 9000 }, { "epoch": 1.3, "eval_gen_len": 6.267666666666667, "eval_loss": 1.898065447807312, "eval_rouge1": 74.1688, "eval_rouge2": 59.9465, "eval_rougeL": 74.1934, "eval_rougeLsum": 74.1447, "eval_runtime": 1537.688, "eval_samples_per_second": 1.951, "eval_steps_per_second": 0.488, "step": 9000 }, { "epoch": 1.37, "learning_rate": 1.5446390350150272e-05, "loss": 1.7013, "step": 9500 }, { "epoch": 1.37, "eval_gen_len": 6.267666666666667, "eval_loss": 1.8747327327728271, "eval_rouge1": 75.1266, "eval_rouge2": 61.394, "eval_rougeL": 75.128, "eval_rougeLsum": 75.0856, "eval_runtime": 1526.768, "eval_samples_per_second": 1.965, "eval_steps_per_second": 0.491, "step": 9500 }, { "epoch": 1.44, "learning_rate": 1.5000000000000002e-05, "loss": 1.6906, "step": 10000 }, { "epoch": 1.44, "eval_gen_len": 6.267666666666667, "eval_loss": 1.8567513227462769, "eval_rouge1": 75.8503, "eval_rouge2": 62.2772, "eval_rougeL": 75.8449, "eval_rougeLsum": 75.8138, "eval_runtime": 1518.2689, "eval_samples_per_second": 1.976, "eval_steps_per_second": 0.494, "step": 10000 } ], "logging_steps": 500, "max_steps": 30000, "num_train_epochs": 5, "save_steps": 2500, "total_flos": 4.113331352053805e+16, "trial_name": null, "trial_params": null }