|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.4443561782335523, |
|
"eval_steps": 500, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9986295347545738e-05, |
|
"loss": 4.4247, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_gen_len": 6.267, |
|
"eval_loss": 3.5801377296447754, |
|
"eval_rouge1": 27.9882, |
|
"eval_rouge2": 8.9331, |
|
"eval_rougeL": 27.6011, |
|
"eval_rougeLsum": 27.5353, |
|
"eval_runtime": 1554.6657, |
|
"eval_samples_per_second": 1.93, |
|
"eval_steps_per_second": 0.482, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9945218953682736e-05, |
|
"loss": 2.959, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_gen_len": 6.267333333333333, |
|
"eval_loss": 3.238922119140625, |
|
"eval_rouge1": 34.4913, |
|
"eval_rouge2": 14.1739, |
|
"eval_rougeL": 34.2505, |
|
"eval_rougeLsum": 34.1717, |
|
"eval_runtime": 1549.8046, |
|
"eval_samples_per_second": 1.936, |
|
"eval_steps_per_second": 0.484, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9876883405951378e-05, |
|
"loss": 2.7367, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.983262300491333, |
|
"eval_rouge1": 40.5052, |
|
"eval_rouge2": 18.725, |
|
"eval_rougeL": 40.1333, |
|
"eval_rougeLsum": 40.0554, |
|
"eval_runtime": 1542.1597, |
|
"eval_samples_per_second": 1.945, |
|
"eval_steps_per_second": 0.486, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9781476007338058e-05, |
|
"loss": 2.553, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_gen_len": 6.267333333333333, |
|
"eval_loss": 2.781419277191162, |
|
"eval_rouge1": 45.972, |
|
"eval_rouge2": 24.75, |
|
"eval_rougeL": 45.6989, |
|
"eval_rougeLsum": 45.6257, |
|
"eval_runtime": 1547.6212, |
|
"eval_samples_per_second": 1.938, |
|
"eval_steps_per_second": 0.485, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9659258262890683e-05, |
|
"loss": 2.3988, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.6185145378112793, |
|
"eval_rouge1": 51.3237, |
|
"eval_rouge2": 30.8584, |
|
"eval_rougeL": 51.0697, |
|
"eval_rougeLsum": 50.9947, |
|
"eval_runtime": 1538.5542, |
|
"eval_samples_per_second": 1.95, |
|
"eval_steps_per_second": 0.487, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.9510565162951538e-05, |
|
"loss": 2.2788, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.4821877479553223, |
|
"eval_rouge1": 55.054, |
|
"eval_rouge2": 35.9558, |
|
"eval_rougeL": 54.885, |
|
"eval_rougeLsum": 54.8263, |
|
"eval_runtime": 1535.9732, |
|
"eval_samples_per_second": 1.953, |
|
"eval_steps_per_second": 0.488, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.9335804264972018e-05, |
|
"loss": 2.185, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.372607469558716, |
|
"eval_rouge1": 58.4125, |
|
"eval_rouge2": 39.7017, |
|
"eval_rougeL": 58.2864, |
|
"eval_rougeLsum": 58.2323, |
|
"eval_runtime": 1555.498, |
|
"eval_samples_per_second": 1.929, |
|
"eval_steps_per_second": 0.482, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.913545457642601e-05, |
|
"loss": 2.1024, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.2966315746307373, |
|
"eval_rouge1": 60.2269, |
|
"eval_rouge2": 42.343, |
|
"eval_rougeL": 60.1064, |
|
"eval_rougeLsum": 60.015, |
|
"eval_runtime": 1539.5016, |
|
"eval_samples_per_second": 1.949, |
|
"eval_steps_per_second": 0.487, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.891006524188368e-05, |
|
"loss": 2.0395, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.2079367637634277, |
|
"eval_rouge1": 63.9442, |
|
"eval_rouge2": 47.1262, |
|
"eval_rougeL": 63.8226, |
|
"eval_rougeLsum": 63.783, |
|
"eval_runtime": 1534.6718, |
|
"eval_samples_per_second": 1.955, |
|
"eval_steps_per_second": 0.489, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.866025403784439e-05, |
|
"loss": 1.9929, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.142320156097412, |
|
"eval_rouge1": 66.0535, |
|
"eval_rouge2": 49.403, |
|
"eval_rougeL": 65.9837, |
|
"eval_rougeLsum": 65.9295, |
|
"eval_runtime": 1531.6986, |
|
"eval_samples_per_second": 1.959, |
|
"eval_steps_per_second": 0.49, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.8386705679454243e-05, |
|
"loss": 1.944, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.094369888305664, |
|
"eval_rouge1": 67.4662, |
|
"eval_rouge2": 50.9242, |
|
"eval_rougeL": 67.3906, |
|
"eval_rougeLsum": 67.3416, |
|
"eval_runtime": 1527.1607, |
|
"eval_samples_per_second": 1.964, |
|
"eval_steps_per_second": 0.491, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.8090169943749477e-05, |
|
"loss": 1.902, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.0382838249206543, |
|
"eval_rouge1": 69.3267, |
|
"eval_rouge2": 53.8395, |
|
"eval_rougeL": 69.2759, |
|
"eval_rougeLsum": 69.2151, |
|
"eval_runtime": 1532.3017, |
|
"eval_samples_per_second": 1.958, |
|
"eval_steps_per_second": 0.489, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.777145961456971e-05, |
|
"loss": 1.8712, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 2.010657548904419, |
|
"eval_rouge1": 70.2771, |
|
"eval_rouge2": 54.5208, |
|
"eval_rougeL": 70.2492, |
|
"eval_rougeLsum": 70.2095, |
|
"eval_runtime": 1563.3117, |
|
"eval_samples_per_second": 1.919, |
|
"eval_steps_per_second": 0.48, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.7431448254773943e-05, |
|
"loss": 1.8387, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.9761910438537598, |
|
"eval_rouge1": 71.145, |
|
"eval_rouge2": 56.4319, |
|
"eval_rougeL": 71.1008, |
|
"eval_rougeLsum": 71.0876, |
|
"eval_runtime": 1552.4923, |
|
"eval_samples_per_second": 1.932, |
|
"eval_steps_per_second": 0.483, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.7071067811865477e-05, |
|
"loss": 1.7558, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.9575979709625244, |
|
"eval_rouge1": 72.527, |
|
"eval_rouge2": 58.0461, |
|
"eval_rougeL": 72.4801, |
|
"eval_rougeLsum": 72.461, |
|
"eval_runtime": 1522.5751, |
|
"eval_samples_per_second": 1.97, |
|
"eval_steps_per_second": 0.493, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.6691306063588583e-05, |
|
"loss": 1.7363, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.9305455684661865, |
|
"eval_rouge1": 73.3884, |
|
"eval_rouge2": 59.6248, |
|
"eval_rougeL": 73.3396, |
|
"eval_rougeLsum": 73.3362, |
|
"eval_runtime": 1541.3792, |
|
"eval_samples_per_second": 1.946, |
|
"eval_steps_per_second": 0.487, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.6293203910498375e-05, |
|
"loss": 1.7245, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.9158198833465576, |
|
"eval_rouge1": 73.3565, |
|
"eval_rouge2": 58.7585, |
|
"eval_rougeL": 73.3541, |
|
"eval_rougeLsum": 73.3668, |
|
"eval_runtime": 1546.9117, |
|
"eval_samples_per_second": 1.939, |
|
"eval_steps_per_second": 0.485, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.5877852522924733e-05, |
|
"loss": 1.7147, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.898065447807312, |
|
"eval_rouge1": 74.1688, |
|
"eval_rouge2": 59.9465, |
|
"eval_rougeL": 74.1934, |
|
"eval_rougeLsum": 74.1447, |
|
"eval_runtime": 1537.688, |
|
"eval_samples_per_second": 1.951, |
|
"eval_steps_per_second": 0.488, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.5446390350150272e-05, |
|
"loss": 1.7013, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.8747327327728271, |
|
"eval_rouge1": 75.1266, |
|
"eval_rouge2": 61.394, |
|
"eval_rougeL": 75.128, |
|
"eval_rougeLsum": 75.0856, |
|
"eval_runtime": 1526.768, |
|
"eval_samples_per_second": 1.965, |
|
"eval_steps_per_second": 0.491, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 1.6906, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_gen_len": 6.267666666666667, |
|
"eval_loss": 1.8567513227462769, |
|
"eval_rouge1": 75.8503, |
|
"eval_rouge2": 62.2772, |
|
"eval_rougeL": 75.8449, |
|
"eval_rougeLsum": 75.8138, |
|
"eval_runtime": 1518.2689, |
|
"eval_samples_per_second": 1.976, |
|
"eval_steps_per_second": 0.494, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 30000, |
|
"num_train_epochs": 5, |
|
"save_steps": 2500, |
|
"total_flos": 4.113331352053805e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|