|
{ |
|
"best_metric": 2.132361888885498, |
|
"best_model_checkpoint": "./26-125356_megasuperkanin/checkpoint-100000", |
|
"epoch": 0.9769822970807769, |
|
"global_step": 100000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6761, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.551, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_gen_len": 28.4674, |
|
"eval_loss": 2.423037052154541, |
|
"eval_rouge1": 0.214, |
|
"eval_rouge2": 0.0668, |
|
"eval_rougeL": 0.1717, |
|
"eval_rougeLsum": 0.1777, |
|
"eval_runtime": 1015.6418, |
|
"eval_samples_per_second": 2.265, |
|
"eval_steps_per_second": 0.284, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5186, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4717, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_gen_len": 25.6604, |
|
"eval_loss": 2.3709843158721924, |
|
"eval_rouge1": 0.2071, |
|
"eval_rouge2": 0.0634, |
|
"eval_rougeL": 0.1686, |
|
"eval_rougeLsum": 0.1745, |
|
"eval_runtime": 951.1096, |
|
"eval_samples_per_second": 2.418, |
|
"eval_steps_per_second": 0.303, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4593, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4281, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_gen_len": 28.8296, |
|
"eval_loss": 2.3228819370269775, |
|
"eval_rouge1": 0.2137, |
|
"eval_rouge2": 0.0662, |
|
"eval_rougeL": 0.1711, |
|
"eval_rougeLsum": 0.1768, |
|
"eval_runtime": 1022.9494, |
|
"eval_samples_per_second": 2.248, |
|
"eval_steps_per_second": 0.282, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4049, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3735, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_gen_len": 29.9183, |
|
"eval_loss": 2.2881429195404053, |
|
"eval_rouge1": 0.2164, |
|
"eval_rouge2": 0.0668, |
|
"eval_rougeL": 0.1735, |
|
"eval_rougeLsum": 0.1808, |
|
"eval_runtime": 1036.2984, |
|
"eval_samples_per_second": 2.219, |
|
"eval_steps_per_second": 0.278, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3732, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.377, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_gen_len": 29.5183, |
|
"eval_loss": 2.2759358882904053, |
|
"eval_rouge1": 0.2209, |
|
"eval_rouge2": 0.0694, |
|
"eval_rougeL": 0.1782, |
|
"eval_rougeLsum": 0.1851, |
|
"eval_runtime": 1036.1071, |
|
"eval_samples_per_second": 2.22, |
|
"eval_steps_per_second": 0.278, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3513, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3444, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_gen_len": 29.3183, |
|
"eval_loss": 2.2552034854888916, |
|
"eval_rouge1": 0.2194, |
|
"eval_rouge2": 0.0679, |
|
"eval_rougeL": 0.1757, |
|
"eval_rougeLsum": 0.1829, |
|
"eval_runtime": 1037.4604, |
|
"eval_samples_per_second": 2.217, |
|
"eval_steps_per_second": 0.278, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3504, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3203, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_gen_len": 32.2061, |
|
"eval_loss": 2.235518455505371, |
|
"eval_rouge1": 0.2284, |
|
"eval_rouge2": 0.0722, |
|
"eval_rougeL": 0.1819, |
|
"eval_rougeLsum": 0.1892, |
|
"eval_runtime": 1121.1561, |
|
"eval_samples_per_second": 2.051, |
|
"eval_steps_per_second": 0.257, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3087, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3132, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_gen_len": 29.5452, |
|
"eval_loss": 2.2289836406707764, |
|
"eval_rouge1": 0.2183, |
|
"eval_rouge2": 0.0673, |
|
"eval_rougeL": 0.1759, |
|
"eval_rougeLsum": 0.1827, |
|
"eval_runtime": 1055.2895, |
|
"eval_samples_per_second": 2.179, |
|
"eval_steps_per_second": 0.273, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3063, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3116, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_gen_len": 30.2935, |
|
"eval_loss": 2.218207359313965, |
|
"eval_rouge1": 0.2239, |
|
"eval_rouge2": 0.07, |
|
"eval_rougeL": 0.1798, |
|
"eval_rougeLsum": 0.1879, |
|
"eval_runtime": 1063.5185, |
|
"eval_samples_per_second": 2.163, |
|
"eval_steps_per_second": 0.271, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3014, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2852, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_gen_len": 28.6443, |
|
"eval_loss": 2.2090706825256348, |
|
"eval_rouge1": 0.2251, |
|
"eval_rouge2": 0.0703, |
|
"eval_rougeL": 0.1812, |
|
"eval_rougeLsum": 0.1887, |
|
"eval_runtime": 1045.7282, |
|
"eval_samples_per_second": 2.199, |
|
"eval_steps_per_second": 0.275, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2963, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2683, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_gen_len": 29.9661, |
|
"eval_loss": 2.1879115104675293, |
|
"eval_rouge1": 0.2257, |
|
"eval_rouge2": 0.0716, |
|
"eval_rougeL": 0.1806, |
|
"eval_rougeLsum": 0.1876, |
|
"eval_runtime": 1061.3075, |
|
"eval_samples_per_second": 2.167, |
|
"eval_steps_per_second": 0.271, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2735, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2614, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_gen_len": 30.4435, |
|
"eval_loss": 2.1871089935302734, |
|
"eval_rouge1": 0.2316, |
|
"eval_rouge2": 0.075, |
|
"eval_rougeL": 0.1863, |
|
"eval_rougeLsum": 0.1936, |
|
"eval_runtime": 1083.7377, |
|
"eval_samples_per_second": 2.122, |
|
"eval_steps_per_second": 0.266, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2735, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.252, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_gen_len": 30.6239, |
|
"eval_loss": 2.175469160079956, |
|
"eval_rouge1": 0.226, |
|
"eval_rouge2": 0.0729, |
|
"eval_rougeL": 0.1834, |
|
"eval_rougeLsum": 0.1914, |
|
"eval_runtime": 1080.4009, |
|
"eval_samples_per_second": 2.129, |
|
"eval_steps_per_second": 0.267, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2509, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.262, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_gen_len": 30.9983, |
|
"eval_loss": 2.16789174079895, |
|
"eval_rouge1": 0.2256, |
|
"eval_rouge2": 0.0716, |
|
"eval_rougeL": 0.1815, |
|
"eval_rougeLsum": 0.1889, |
|
"eval_runtime": 1104.0224, |
|
"eval_samples_per_second": 2.083, |
|
"eval_steps_per_second": 0.261, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2398, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.228, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_gen_len": 29.9704, |
|
"eval_loss": 2.1669178009033203, |
|
"eval_rouge1": 0.2253, |
|
"eval_rouge2": 0.0725, |
|
"eval_rougeL": 0.1822, |
|
"eval_rougeLsum": 0.1894, |
|
"eval_runtime": 1052.7669, |
|
"eval_samples_per_second": 2.185, |
|
"eval_steps_per_second": 0.274, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.25, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.234, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_gen_len": 29.4826, |
|
"eval_loss": 2.1604671478271484, |
|
"eval_rouge1": 0.2283, |
|
"eval_rouge2": 0.0747, |
|
"eval_rougeL": 0.1855, |
|
"eval_rougeLsum": 0.1937, |
|
"eval_runtime": 1075.8159, |
|
"eval_samples_per_second": 2.138, |
|
"eval_steps_per_second": 0.268, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.236, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2289, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_gen_len": 30.0213, |
|
"eval_loss": 2.1517326831817627, |
|
"eval_rouge1": 0.2226, |
|
"eval_rouge2": 0.0705, |
|
"eval_rougeL": 0.1801, |
|
"eval_rougeLsum": 0.1873, |
|
"eval_runtime": 1072.8178, |
|
"eval_samples_per_second": 2.144, |
|
"eval_steps_per_second": 0.268, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2214, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2043, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_gen_len": 29.5361, |
|
"eval_loss": 2.1455490589141846, |
|
"eval_rouge1": 0.2265, |
|
"eval_rouge2": 0.075, |
|
"eval_rougeL": 0.1838, |
|
"eval_rougeLsum": 0.1908, |
|
"eval_runtime": 1058.731, |
|
"eval_samples_per_second": 2.172, |
|
"eval_steps_per_second": 0.272, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2419, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2259, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_gen_len": 29.6874, |
|
"eval_loss": 2.1389129161834717, |
|
"eval_rouge1": 0.2287, |
|
"eval_rouge2": 0.0713, |
|
"eval_rougeL": 0.1844, |
|
"eval_rougeLsum": 0.1911, |
|
"eval_runtime": 1069.2344, |
|
"eval_samples_per_second": 2.151, |
|
"eval_steps_per_second": 0.269, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2202, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2307, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_gen_len": 30.7513, |
|
"eval_loss": 2.132361888885498, |
|
"eval_rouge1": 0.2293, |
|
"eval_rouge2": 0.0741, |
|
"eval_rougeL": 0.1845, |
|
"eval_rougeLsum": 0.1924, |
|
"eval_runtime": 1089.9927, |
|
"eval_samples_per_second": 2.11, |
|
"eval_steps_per_second": 0.264, |
|
"step": 100000 |
|
} |
|
], |
|
"max_steps": 102356, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.8696291573252096e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|