|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.610890445583881, |
|
"eval_steps": 500, |
|
"global_step": 25000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9986295347545738e-05, |
|
"loss": 4.4247, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_gen_len": 6.267, |
|
"eval_loss": 3.5801377296447754, |
|
"eval_rouge1": 27.9882, |
|
"eval_rouge2": 8.9331, |
|
"eval_rougeL": 27.6011, |
|
"eval_rougeLsum": 27.5353, |
|
"eval_runtime": 1554.6657, |
|
"eval_samples_per_second": 1.93, |
|
"eval_steps_per_second": 0.482, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9945218953682736e-05, |
|
"loss": 2.959, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_gen_len": 6.267333333333333, |
|
"eval_loss": 3.238922119140625, |
|
"eval_rouge1": 34.4913, |
|
"eval_rouge2": 14.1739, |
|
"eval_rougeL": 34.2505, |
|
"eval_rougeLsum": 34.1717, |
|
"eval_runtime": 1549.8046, |
|
"eval_samples_per_second": 1.936, |
|
"eval_steps_per_second": 0.484, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9876883405951378e-05, |
|
"loss": 2.7367, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.983262300491333, |
|
"eval_rouge1": 40.5052, |
|
"eval_rouge2": 18.725, |
|
"eval_rougeL": 40.1333, |
|
"eval_rougeLsum": 40.0554, |
|
"eval_runtime": 1542.1597, |
|
"eval_samples_per_second": 1.945, |
|
"eval_steps_per_second": 0.486, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9781476007338058e-05, |
|
"loss": 2.553, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_gen_len": 6.267333333333333, |
|
"eval_loss": 2.781419277191162, |
|
"eval_rouge1": 45.972, |
|
"eval_rouge2": 24.75, |
|
"eval_rougeL": 45.6989, |
|
"eval_rougeLsum": 45.6257, |
|
"eval_runtime": 1547.6212, |
|
"eval_samples_per_second": 1.938, |
|
"eval_steps_per_second": 0.485, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9659258262890683e-05, |
|
"loss": 2.3988, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.6185145378112793, |
|
"eval_rouge1": 51.3237, |
|
"eval_rouge2": 30.8584, |
|
"eval_rougeL": 51.0697, |
|
"eval_rougeLsum": 50.9947, |
|
"eval_runtime": 1538.5542, |
|
"eval_samples_per_second": 1.95, |
|
"eval_steps_per_second": 0.487, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.9510565162951538e-05, |
|
"loss": 2.2788, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.4821877479553223, |
|
"eval_rouge1": 55.054, |
|
"eval_rouge2": 35.9558, |
|
"eval_rougeL": 54.885, |
|
"eval_rougeLsum": 54.8263, |
|
"eval_runtime": 1535.9732, |
|
"eval_samples_per_second": 1.953, |
|
"eval_steps_per_second": 0.488, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.9335804264972018e-05, |
|
"loss": 2.185, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.372607469558716, |
|
"eval_rouge1": 58.4125, |
|
"eval_rouge2": 39.7017, |
|
"eval_rougeL": 58.2864, |
|
"eval_rougeLsum": 58.2323, |
|
"eval_runtime": 1555.498, |
|
"eval_samples_per_second": 1.929, |
|
"eval_steps_per_second": 0.482, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.913545457642601e-05, |
|
"loss": 2.1024, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.2966315746307373, |
|
"eval_rouge1": 60.2269, |
|
"eval_rouge2": 42.343, |
|
"eval_rougeL": 60.1064, |
|
"eval_rougeLsum": 60.015, |
|
"eval_runtime": 1539.5016, |
|
"eval_samples_per_second": 1.949, |
|
"eval_steps_per_second": 0.487, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.891006524188368e-05, |
|
"loss": 2.0395, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.2079367637634277, |
|
"eval_rouge1": 63.9442, |
|
"eval_rouge2": 47.1262, |
|
"eval_rougeL": 63.8226, |
|
"eval_rougeLsum": 63.783, |
|
"eval_runtime": 1534.6718, |
|
"eval_samples_per_second": 1.955, |
|
"eval_steps_per_second": 0.489, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.866025403784439e-05, |
|
"loss": 1.9929, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.142320156097412, |
|
"eval_rouge1": 66.0535, |
|
"eval_rouge2": 49.403, |
|
"eval_rougeL": 65.9837, |
|
"eval_rougeLsum": 65.9295, |
|
"eval_runtime": 1531.6986, |
|
"eval_samples_per_second": 1.959, |
|
"eval_steps_per_second": 0.49, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.8386705679454243e-05, |
|
"loss": 1.944, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.094369888305664, |
|
"eval_rouge1": 67.4662, |
|
"eval_rouge2": 50.9242, |
|
"eval_rougeL": 67.3906, |
|
"eval_rougeLsum": 67.3416, |
|
"eval_runtime": 1527.1607, |
|
"eval_samples_per_second": 1.964, |
|
"eval_steps_per_second": 0.491, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.8090169943749477e-05, |
|
"loss": 1.902, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.0382838249206543, |
|
"eval_rouge1": 69.3267, |
|
"eval_rouge2": 53.8395, |
|
"eval_rougeL": 69.2759, |
|
"eval_rougeLsum": 69.2151, |
|
"eval_runtime": 1532.3017, |
|
"eval_samples_per_second": 1.958, |
|
"eval_steps_per_second": 0.489, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.777145961456971e-05, |
|
"loss": 1.8712, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.010657548904419, |
|
"eval_rouge1": 70.2771, |
|
"eval_rouge2": 54.5208, |
|
"eval_rougeL": 70.2492, |
|
"eval_rougeLsum": 70.2095, |
|
"eval_runtime": 1563.3117, |
|
"eval_samples_per_second": 1.919, |
|
"eval_steps_per_second": 0.48, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.7431448254773943e-05, |
|
"loss": 1.8387, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.9761910438537598, |
|
"eval_rouge1": 71.145, |
|
"eval_rouge2": 56.4319, |
|
"eval_rougeL": 71.1008, |
|
"eval_rougeLsum": 71.0876, |
|
"eval_runtime": 1552.4923, |
|
"eval_samples_per_second": 1.932, |
|
"eval_steps_per_second": 0.483, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.7071067811865477e-05, |
|
"loss": 1.7558, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.9575979709625244, |
|
"eval_rouge1": 72.527, |
|
"eval_rouge2": 58.0461, |
|
"eval_rougeL": 72.4801, |
|
"eval_rougeLsum": 72.461, |
|
"eval_runtime": 1522.5751, |
|
"eval_samples_per_second": 1.97, |
|
"eval_steps_per_second": 0.493, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.6691306063588583e-05, |
|
"loss": 1.7363, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.9305455684661865, |
|
"eval_rouge1": 73.3884, |
|
"eval_rouge2": 59.6248, |
|
"eval_rougeL": 73.3396, |
|
"eval_rougeLsum": 73.3362, |
|
"eval_runtime": 1541.3792, |
|
"eval_samples_per_second": 1.946, |
|
"eval_steps_per_second": 0.487, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.6293203910498375e-05, |
|
"loss": 1.7245, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.9158198833465576, |
|
"eval_rouge1": 73.3565, |
|
"eval_rouge2": 58.7585, |
|
"eval_rougeL": 73.3541, |
|
"eval_rougeLsum": 73.3668, |
|
"eval_runtime": 1546.9117, |
|
"eval_samples_per_second": 1.939, |
|
"eval_steps_per_second": 0.485, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.5877852522924733e-05, |
|
"loss": 1.7147, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.898065447807312, |
|
"eval_rouge1": 74.1688, |
|
"eval_rouge2": 59.9465, |
|
"eval_rougeL": 74.1934, |
|
"eval_rougeLsum": 74.1447, |
|
"eval_runtime": 1537.688, |
|
"eval_samples_per_second": 1.951, |
|
"eval_steps_per_second": 0.488, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.5446390350150272e-05, |
|
"loss": 1.7013, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.8747327327728271, |
|
"eval_rouge1": 75.1266, |
|
"eval_rouge2": 61.394, |
|
"eval_rougeL": 75.128, |
|
"eval_rougeLsum": 75.0856, |
|
"eval_runtime": 1526.768, |
|
"eval_samples_per_second": 1.965, |
|
"eval_steps_per_second": 0.491, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 1.6906, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.8567513227462769, |
|
"eval_rouge1": 75.8503, |
|
"eval_rouge2": 62.2772, |
|
"eval_rougeL": 75.8449, |
|
"eval_rougeLsum": 75.8138, |
|
"eval_runtime": 1518.2689, |
|
"eval_samples_per_second": 1.976, |
|
"eval_steps_per_second": 0.494, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.4539904997395468e-05, |
|
"loss": 1.6827, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.8478548526763916, |
|
"eval_rouge1": 75.7687, |
|
"eval_rouge2": 62.3363, |
|
"eval_rougeL": 75.7808, |
|
"eval_rougeLsum": 75.7533, |
|
"eval_runtime": 1520.7572, |
|
"eval_samples_per_second": 1.973, |
|
"eval_steps_per_second": 0.493, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.4067366430758004e-05, |
|
"loss": 1.6808, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.8268691301345825, |
|
"eval_rouge1": 76.5635, |
|
"eval_rouge2": 63.5572, |
|
"eval_rougeL": 76.5759, |
|
"eval_rougeLsum": 76.5281, |
|
"eval_runtime": 1529.0949, |
|
"eval_samples_per_second": 1.962, |
|
"eval_steps_per_second": 0.49, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.3583679495453e-05, |
|
"loss": 1.6747, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.8204782009124756, |
|
"eval_rouge1": 76.0665, |
|
"eval_rouge2": 62.7626, |
|
"eval_rougeL": 76.0441, |
|
"eval_rougeLsum": 76.0155, |
|
"eval_runtime": 1521.0486, |
|
"eval_samples_per_second": 1.972, |
|
"eval_steps_per_second": 0.493, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.3090169943749475e-05, |
|
"loss": 1.6628, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.8059669733047485, |
|
"eval_rouge1": 76.6687, |
|
"eval_rouge2": 63.8953, |
|
"eval_rougeL": 76.678, |
|
"eval_rougeLsum": 76.6457, |
|
"eval_runtime": 1519.6043, |
|
"eval_samples_per_second": 1.974, |
|
"eval_steps_per_second": 0.494, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.2588190451025209e-05, |
|
"loss": 1.6517, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7973886728286743, |
|
"eval_rouge1": 77.5432, |
|
"eval_rouge2": 64.831, |
|
"eval_rougeL": 77.5477, |
|
"eval_rougeLsum": 77.5112, |
|
"eval_runtime": 1514.3733, |
|
"eval_samples_per_second": 1.981, |
|
"eval_steps_per_second": 0.495, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.2079116908177592e-05, |
|
"loss": 1.6493, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7867239713668823, |
|
"eval_rouge1": 77.6195, |
|
"eval_rouge2": 64.9393, |
|
"eval_rougeL": 77.5908, |
|
"eval_rougeLsum": 77.5498, |
|
"eval_runtime": 1514.8609, |
|
"eval_samples_per_second": 1.98, |
|
"eval_steps_per_second": 0.495, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.156434465040231e-05, |
|
"loss": 1.6435, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.780102252960205, |
|
"eval_rouge1": 77.8512, |
|
"eval_rouge2": 65.2471, |
|
"eval_rougeL": 77.8573, |
|
"eval_rougeLsum": 77.8118, |
|
"eval_runtime": 1516.9222, |
|
"eval_samples_per_second": 1.978, |
|
"eval_steps_per_second": 0.494, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.1045284632676535e-05, |
|
"loss": 1.6189, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7743651866912842, |
|
"eval_rouge1": 78.5967, |
|
"eval_rouge2": 66.4212, |
|
"eval_rougeL": 78.6125, |
|
"eval_rougeLsum": 78.5741, |
|
"eval_runtime": 1518.2132, |
|
"eval_samples_per_second": 1.976, |
|
"eval_steps_per_second": 0.494, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.0523359562429441e-05, |
|
"loss": 1.5746, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7696142196655273, |
|
"eval_rouge1": 78.7375, |
|
"eval_rouge2": 67.0042, |
|
"eval_rougeL": 78.7704, |
|
"eval_rougeLsum": 78.6912, |
|
"eval_runtime": 1504.5573, |
|
"eval_samples_per_second": 1.994, |
|
"eval_steps_per_second": 0.498, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5767, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7685412168502808, |
|
"eval_rouge1": 78.1583, |
|
"eval_rouge2": 66.0184, |
|
"eval_rougeL": 78.1256, |
|
"eval_rougeLsum": 78.1273, |
|
"eval_runtime": 1504.2381, |
|
"eval_samples_per_second": 1.994, |
|
"eval_steps_per_second": 0.499, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 9.476640437570562e-06, |
|
"loss": 1.5713, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7626044750213623, |
|
"eval_rouge1": 78.6909, |
|
"eval_rouge2": 66.7862, |
|
"eval_rougeL": 78.7168, |
|
"eval_rougeLsum": 78.671, |
|
"eval_runtime": 1509.684, |
|
"eval_samples_per_second": 1.987, |
|
"eval_steps_per_second": 0.497, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 8.954715367323468e-06, |
|
"loss": 1.5731, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.752744436264038, |
|
"eval_rouge1": 78.9605, |
|
"eval_rouge2": 67.084, |
|
"eval_rougeL": 78.9504, |
|
"eval_rougeLsum": 78.9289, |
|
"eval_runtime": 1512.7838, |
|
"eval_samples_per_second": 1.983, |
|
"eval_steps_per_second": 0.496, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 8.43565534959769e-06, |
|
"loss": 1.5683, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.748329758644104, |
|
"eval_rouge1": 79.0099, |
|
"eval_rouge2": 67.1613, |
|
"eval_rougeL": 79.0228, |
|
"eval_rougeLsum": 79.0093, |
|
"eval_runtime": 1516.6027, |
|
"eval_samples_per_second": 1.978, |
|
"eval_steps_per_second": 0.495, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 7.92088309182241e-06, |
|
"loss": 1.5684, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7434362173080444, |
|
"eval_rouge1": 78.9853, |
|
"eval_rouge2": 67.0167, |
|
"eval_rougeL": 78.9846, |
|
"eval_rougeLsum": 78.9721, |
|
"eval_runtime": 1523.8295, |
|
"eval_samples_per_second": 1.969, |
|
"eval_steps_per_second": 0.492, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.411809548974792e-06, |
|
"loss": 1.5653, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7393466234207153, |
|
"eval_rouge1": 79.413, |
|
"eval_rouge2": 67.6295, |
|
"eval_rougeL": 79.4249, |
|
"eval_rougeLsum": 79.4027, |
|
"eval_runtime": 1511.8366, |
|
"eval_samples_per_second": 1.984, |
|
"eval_steps_per_second": 0.496, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.909830056250527e-06, |
|
"loss": 1.5584, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.735812783241272, |
|
"eval_rouge1": 79.3295, |
|
"eval_rouge2": 67.4641, |
|
"eval_rougeL": 79.3042, |
|
"eval_rougeLsum": 79.3028, |
|
"eval_runtime": 1515.6868, |
|
"eval_samples_per_second": 1.979, |
|
"eval_steps_per_second": 0.495, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.4163205045469975e-06, |
|
"loss": 1.5567, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7302848100662231, |
|
"eval_rouge1": 80.0165, |
|
"eval_rouge2": 68.3289, |
|
"eval_rougeL": 80.0436, |
|
"eval_rougeLsum": 80.019, |
|
"eval_runtime": 1522.3815, |
|
"eval_samples_per_second": 1.971, |
|
"eval_steps_per_second": 0.493, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 5.932633569242e-06, |
|
"loss": 1.5564, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7278697490692139, |
|
"eval_rouge1": 79.8733, |
|
"eval_rouge2": 68.1584, |
|
"eval_rougeL": 79.868, |
|
"eval_rougeLsum": 79.8511, |
|
"eval_runtime": 1516.0739, |
|
"eval_samples_per_second": 1.979, |
|
"eval_steps_per_second": 0.495, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 5.460095002604533e-06, |
|
"loss": 1.553, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7226474285125732, |
|
"eval_rouge1": 79.9727, |
|
"eval_rouge2": 68.2612, |
|
"eval_rougeL": 79.9901, |
|
"eval_rougeLsum": 79.9554, |
|
"eval_runtime": 1525.8223, |
|
"eval_samples_per_second": 1.966, |
|
"eval_steps_per_second": 0.492, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 5.000000000000003e-06, |
|
"loss": 1.5498, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.720814824104309, |
|
"eval_rouge1": 79.9937, |
|
"eval_rouge2": 68.221, |
|
"eval_rougeL": 79.9703, |
|
"eval_rougeLsum": 79.9696, |
|
"eval_runtime": 1519.3409, |
|
"eval_samples_per_second": 1.975, |
|
"eval_steps_per_second": 0.494, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.5536096498497295e-06, |
|
"loss": 1.5482, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7179197072982788, |
|
"eval_rouge1": 79.8959, |
|
"eval_rouge2": 68.4771, |
|
"eval_rougeL": 79.913, |
|
"eval_rougeLsum": 79.8608, |
|
"eval_runtime": 1520.8168, |
|
"eval_samples_per_second": 1.973, |
|
"eval_steps_per_second": 0.493, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.12214747707527e-06, |
|
"loss": 1.5305, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7194596529006958, |
|
"eval_rouge1": 80.1371, |
|
"eval_rouge2": 68.4834, |
|
"eval_rougeL": 80.1291, |
|
"eval_rougeLsum": 80.1164, |
|
"eval_runtime": 1522.408, |
|
"eval_samples_per_second": 1.971, |
|
"eval_steps_per_second": 0.493, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 3.7067960895016277e-06, |
|
"loss": 1.5057, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7195872068405151, |
|
"eval_rouge1": 80.143, |
|
"eval_rouge2": 68.5801, |
|
"eval_rougeL": 80.1614, |
|
"eval_rougeLsum": 80.1476, |
|
"eval_runtime": 1525.9564, |
|
"eval_samples_per_second": 1.966, |
|
"eval_steps_per_second": 0.491, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 3.308693936411421e-06, |
|
"loss": 1.5057, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7173995971679688, |
|
"eval_rouge1": 80.3721, |
|
"eval_rouge2": 69.0264, |
|
"eval_rougeL": 80.3868, |
|
"eval_rougeLsum": 80.3636, |
|
"eval_runtime": 1526.4809, |
|
"eval_samples_per_second": 1.965, |
|
"eval_steps_per_second": 0.491, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 2.9289321881345257e-06, |
|
"loss": 1.5052, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7151570320129395, |
|
"eval_rouge1": 80.3483, |
|
"eval_rouge2": 69.2677, |
|
"eval_rougeL": 80.333, |
|
"eval_rougeLsum": 80.319, |
|
"eval_runtime": 1527.1337, |
|
"eval_samples_per_second": 1.964, |
|
"eval_steps_per_second": 0.491, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 2.5685517452260566e-06, |
|
"loss": 1.5042, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7125614881515503, |
|
"eval_rouge1": 80.2099, |
|
"eval_rouge2": 68.9148, |
|
"eval_rougeL": 80.2083, |
|
"eval_rougeLsum": 80.176, |
|
"eval_runtime": 1523.8678, |
|
"eval_samples_per_second": 1.969, |
|
"eval_steps_per_second": 0.492, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 2.2285403854302912e-06, |
|
"loss": 1.5039, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7120810747146606, |
|
"eval_rouge1": 80.1778, |
|
"eval_rouge2": 68.7873, |
|
"eval_rougeL": 80.1959, |
|
"eval_rougeLsum": 80.1548, |
|
"eval_runtime": 1534.2984, |
|
"eval_samples_per_second": 1.955, |
|
"eval_steps_per_second": 0.489, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.9098300562505266e-06, |
|
"loss": 1.5021, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7112153768539429, |
|
"eval_rouge1": 80.1776, |
|
"eval_rouge2": 69.1022, |
|
"eval_rougeL": 80.1833, |
|
"eval_rougeLsum": 80.1473, |
|
"eval_runtime": 1528.5323, |
|
"eval_samples_per_second": 1.963, |
|
"eval_steps_per_second": 0.491, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.6132943205457607e-06, |
|
"loss": 1.5014, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.7083262205123901, |
|
"eval_rouge1": 80.4158, |
|
"eval_rouge2": 69.2978, |
|
"eval_rougeL": 80.4171, |
|
"eval_rougeLsum": 80.3903, |
|
"eval_runtime": 1523.1308, |
|
"eval_samples_per_second": 1.97, |
|
"eval_steps_per_second": 0.492, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.339745962155613e-06, |
|
"loss": 1.502, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.70697820186615, |
|
"eval_rouge1": 80.5089, |
|
"eval_rouge2": 69.3556, |
|
"eval_rougeL": 80.5157, |
|
"eval_rougeLsum": 80.4938, |
|
"eval_runtime": 1602.9954, |
|
"eval_samples_per_second": 1.871, |
|
"eval_steps_per_second": 0.468, |
|
"step": 25000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 30000, |
|
"num_train_epochs": 5, |
|
"save_steps": 2500, |
|
"total_flos": 1.0283694363520248e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|