|
{ |
|
"best_metric": 2.63411021232605, |
|
"best_model_checkpoint": "model/best_model_test_0423_small/checkpoint-55000", |
|
"epoch": 3.0, |
|
"global_step": 57807, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.827010569654195e-05, |
|
"loss": 5.8165, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_gen_len": 18.0056, |
|
"eval_loss": 3.6540932655334473, |
|
"eval_rouge1": 11.6734, |
|
"eval_rouge2": 3.9865, |
|
"eval_rougeL": 11.5734, |
|
"eval_rougeLsum": 11.5375, |
|
"eval_runtime": 831.6458, |
|
"eval_samples_per_second": 5.793, |
|
"eval_steps_per_second": 2.897, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.654021139308389e-05, |
|
"loss": 4.306, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_gen_len": 16.8948, |
|
"eval_loss": 3.4290754795074463, |
|
"eval_rouge1": 12.0417, |
|
"eval_rouge2": 3.8419, |
|
"eval_rougeL": 11.9231, |
|
"eval_rougeLsum": 11.9223, |
|
"eval_runtime": 769.6461, |
|
"eval_samples_per_second": 6.26, |
|
"eval_steps_per_second": 3.13, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.481031708962583e-05, |
|
"loss": 4.1091, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_gen_len": 19.4016, |
|
"eval_loss": 3.364335060119629, |
|
"eval_rouge1": 13.661, |
|
"eval_rouge2": 4.5171, |
|
"eval_rougeL": 13.5123, |
|
"eval_rougeLsum": 13.5076, |
|
"eval_runtime": 875.4047, |
|
"eval_samples_per_second": 5.504, |
|
"eval_steps_per_second": 2.752, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.308042278616777e-05, |
|
"loss": 3.9637, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_gen_len": 18.4288, |
|
"eval_loss": 3.2573604583740234, |
|
"eval_rouge1": 13.8443, |
|
"eval_rouge2": 4.1761, |
|
"eval_rougeL": 13.689, |
|
"eval_rougeLsum": 13.6927, |
|
"eval_runtime": 771.3843, |
|
"eval_samples_per_second": 6.246, |
|
"eval_steps_per_second": 3.123, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.135052848270971e-05, |
|
"loss": 3.8205, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_gen_len": 21.5776, |
|
"eval_loss": 3.2433691024780273, |
|
"eval_rouge1": 13.5371, |
|
"eval_rouge2": 4.3639, |
|
"eval_rougeL": 13.3551, |
|
"eval_rougeLsum": 13.3552, |
|
"eval_runtime": 903.4907, |
|
"eval_samples_per_second": 5.333, |
|
"eval_steps_per_second": 2.666, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.962063417925165e-05, |
|
"loss": 3.7262, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_gen_len": 21.5548, |
|
"eval_loss": 3.1689953804016113, |
|
"eval_rouge1": 14.3668, |
|
"eval_rouge2": 4.8048, |
|
"eval_rougeL": 14.2191, |
|
"eval_rougeLsum": 14.1906, |
|
"eval_runtime": 870.3487, |
|
"eval_samples_per_second": 5.536, |
|
"eval_steps_per_second": 2.768, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.78907398757936e-05, |
|
"loss": 3.6887, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_gen_len": 20.89, |
|
"eval_loss": 3.0656516551971436, |
|
"eval_rouge1": 14.3265, |
|
"eval_rouge2": 4.436, |
|
"eval_rougeL": 14.212, |
|
"eval_rougeLsum": 14.205, |
|
"eval_runtime": 840.9965, |
|
"eval_samples_per_second": 5.729, |
|
"eval_steps_per_second": 2.864, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.616084557233554e-05, |
|
"loss": 3.6337, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_gen_len": 20.3651, |
|
"eval_loss": 3.031759262084961, |
|
"eval_rouge1": 14.6809, |
|
"eval_rouge2": 4.8345, |
|
"eval_rougeL": 14.5378, |
|
"eval_rougeLsum": 14.5331, |
|
"eval_runtime": 836.4852, |
|
"eval_samples_per_second": 5.76, |
|
"eval_steps_per_second": 2.88, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.443095126887747e-05, |
|
"loss": 3.5443, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_gen_len": 21.7742, |
|
"eval_loss": 3.0553905963897705, |
|
"eval_rouge1": 15.3372, |
|
"eval_rouge2": 4.9163, |
|
"eval_rougeL": 15.1794, |
|
"eval_rougeLsum": 15.1781, |
|
"eval_runtime": 893.8221, |
|
"eval_samples_per_second": 5.39, |
|
"eval_steps_per_second": 2.695, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.270105696541942e-05, |
|
"loss": 3.5203, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_gen_len": 20.8113, |
|
"eval_loss": 2.9792585372924805, |
|
"eval_rouge1": 14.9278, |
|
"eval_rouge2": 4.9656, |
|
"eval_rougeL": 14.7491, |
|
"eval_rougeLsum": 14.743, |
|
"eval_runtime": 848.3297, |
|
"eval_samples_per_second": 5.679, |
|
"eval_steps_per_second": 2.84, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.097116266196136e-05, |
|
"loss": 3.4936, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_gen_len": 23.4274, |
|
"eval_loss": 3.0078511238098145, |
|
"eval_rouge1": 15.7705, |
|
"eval_rouge2": 5.1453, |
|
"eval_rougeL": 15.5582, |
|
"eval_rougeLsum": 15.5756, |
|
"eval_runtime": 944.685, |
|
"eval_samples_per_second": 5.1, |
|
"eval_steps_per_second": 2.55, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.92412683585033e-05, |
|
"loss": 3.4592, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_gen_len": 22.7007, |
|
"eval_loss": 2.972140312194824, |
|
"eval_rouge1": 15.0201, |
|
"eval_rouge2": 5.1612, |
|
"eval_rougeL": 14.8508, |
|
"eval_rougeLsum": 14.8198, |
|
"eval_runtime": 914.5833, |
|
"eval_samples_per_second": 5.268, |
|
"eval_steps_per_second": 2.634, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.751137405504524e-05, |
|
"loss": 3.377, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_gen_len": 23.4427, |
|
"eval_loss": 3.01123309135437, |
|
"eval_rouge1": 15.9595, |
|
"eval_rouge2": 5.1133, |
|
"eval_rougeL": 15.78, |
|
"eval_rougeLsum": 15.7774, |
|
"eval_runtime": 950.6422, |
|
"eval_samples_per_second": 5.068, |
|
"eval_steps_per_second": 2.534, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.578147975158718e-05, |
|
"loss": 3.4158, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_gen_len": 21.6009, |
|
"eval_loss": 2.9238853454589844, |
|
"eval_rouge1": 14.7984, |
|
"eval_rouge2": 5.051, |
|
"eval_rougeL": 14.6943, |
|
"eval_rougeLsum": 14.6581, |
|
"eval_runtime": 878.6968, |
|
"eval_samples_per_second": 5.483, |
|
"eval_steps_per_second": 2.742, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.405158544812913e-05, |
|
"loss": 3.378, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_gen_len": 22.0828, |
|
"eval_loss": 2.889694929122925, |
|
"eval_rouge1": 16.5128, |
|
"eval_rouge2": 5.1923, |
|
"eval_rougeL": 16.3523, |
|
"eval_rougeLsum": 16.3265, |
|
"eval_runtime": 902.1756, |
|
"eval_samples_per_second": 5.34, |
|
"eval_steps_per_second": 2.67, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.232169114467106e-05, |
|
"loss": 3.3231, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_gen_len": 22.5807, |
|
"eval_loss": 2.9346752166748047, |
|
"eval_rouge1": 16.9997, |
|
"eval_rouge2": 5.5524, |
|
"eval_rougeL": 16.8534, |
|
"eval_rougeLsum": 16.8737, |
|
"eval_runtime": 895.2014, |
|
"eval_samples_per_second": 5.382, |
|
"eval_steps_per_second": 2.691, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.059179684121301e-05, |
|
"loss": 3.3268, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_gen_len": 23.6988, |
|
"eval_loss": 2.911571741104126, |
|
"eval_rouge1": 16.0261, |
|
"eval_rouge2": 5.4226, |
|
"eval_rougeL": 15.9234, |
|
"eval_rougeLsum": 15.914, |
|
"eval_runtime": 962.7416, |
|
"eval_samples_per_second": 5.004, |
|
"eval_steps_per_second": 2.502, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.886190253775494e-05, |
|
"loss": 3.3127, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_gen_len": 22.9481, |
|
"eval_loss": 2.861004590988159, |
|
"eval_rouge1": 16.6255, |
|
"eval_rouge2": 5.3554, |
|
"eval_rougeL": 16.4729, |
|
"eval_rougeLsum": 16.4569, |
|
"eval_runtime": 922.0145, |
|
"eval_samples_per_second": 5.226, |
|
"eval_steps_per_second": 2.613, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.713200823429688e-05, |
|
"loss": 3.2664, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_gen_len": 23.4423, |
|
"eval_loss": 2.860574722290039, |
|
"eval_rouge1": 17.7703, |
|
"eval_rouge2": 5.9475, |
|
"eval_rougeL": 17.6229, |
|
"eval_rougeLsum": 17.6259, |
|
"eval_runtime": 936.5594, |
|
"eval_samples_per_second": 5.144, |
|
"eval_steps_per_second": 2.572, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 6.540211393083882e-05, |
|
"loss": 3.1718, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_gen_len": 23.0093, |
|
"eval_loss": 2.8764114379882812, |
|
"eval_rouge1": 17.301, |
|
"eval_rouge2": 5.6262, |
|
"eval_rougeL": 17.122, |
|
"eval_rougeLsum": 17.1104, |
|
"eval_runtime": 908.2265, |
|
"eval_samples_per_second": 5.305, |
|
"eval_steps_per_second": 2.652, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 6.367221962738077e-05, |
|
"loss": 3.0987, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_gen_len": 20.9697, |
|
"eval_loss": 2.82820200920105, |
|
"eval_rouge1": 16.4718, |
|
"eval_rouge2": 5.2077, |
|
"eval_rougeL": 16.3394, |
|
"eval_rougeLsum": 16.3401, |
|
"eval_runtime": 831.2333, |
|
"eval_samples_per_second": 5.796, |
|
"eval_steps_per_second": 2.898, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 6.19423253239227e-05, |
|
"loss": 3.1486, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_gen_len": 22.7291, |
|
"eval_loss": 2.823465347290039, |
|
"eval_rouge1": 18.5594, |
|
"eval_rouge2": 5.9469, |
|
"eval_rougeL": 18.3882, |
|
"eval_rougeLsum": 18.3799, |
|
"eval_runtime": 901.4834, |
|
"eval_samples_per_second": 5.345, |
|
"eval_steps_per_second": 2.672, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 6.021243102046465e-05, |
|
"loss": 3.1435, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_gen_len": 22.9612, |
|
"eval_loss": 2.826120615005493, |
|
"eval_rouge1": 18.111, |
|
"eval_rouge2": 6.0309, |
|
"eval_rougeL": 17.9593, |
|
"eval_rougeLsum": 17.9613, |
|
"eval_runtime": 912.4414, |
|
"eval_samples_per_second": 5.28, |
|
"eval_steps_per_second": 2.64, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5.84825367170066e-05, |
|
"loss": 3.1049, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_gen_len": 22.5558, |
|
"eval_loss": 2.8067939281463623, |
|
"eval_rouge1": 17.124, |
|
"eval_rouge2": 5.5675, |
|
"eval_rougeL": 16.9714, |
|
"eval_rougeLsum": 16.9876, |
|
"eval_runtime": 903.448, |
|
"eval_samples_per_second": 5.333, |
|
"eval_steps_per_second": 2.666, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.675264241354854e-05, |
|
"loss": 3.1357, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_gen_len": 23.0075, |
|
"eval_loss": 2.801447629928589, |
|
"eval_rouge1": 17.3916, |
|
"eval_rouge2": 5.8671, |
|
"eval_rougeL": 17.2148, |
|
"eval_rougeLsum": 17.2502, |
|
"eval_runtime": 917.374, |
|
"eval_samples_per_second": 5.252, |
|
"eval_steps_per_second": 2.626, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.5022748110090474e-05, |
|
"loss": 3.0904, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_gen_len": 22.1492, |
|
"eval_loss": 2.7790260314941406, |
|
"eval_rouge1": 17.419, |
|
"eval_rouge2": 5.6689, |
|
"eval_rougeL": 17.3125, |
|
"eval_rougeLsum": 17.3058, |
|
"eval_runtime": 879.4764, |
|
"eval_samples_per_second": 5.478, |
|
"eval_steps_per_second": 2.739, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.3292853806632414e-05, |
|
"loss": 3.0877, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_gen_len": 21.7522, |
|
"eval_loss": 2.7462034225463867, |
|
"eval_rouge1": 17.0605, |
|
"eval_rouge2": 5.4735, |
|
"eval_rougeL": 16.9414, |
|
"eval_rougeLsum": 16.9378, |
|
"eval_runtime": 878.5335, |
|
"eval_samples_per_second": 5.484, |
|
"eval_steps_per_second": 2.742, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.1562959503174354e-05, |
|
"loss": 3.0694, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_gen_len": 23.2005, |
|
"eval_loss": 2.75631046295166, |
|
"eval_rouge1": 17.752, |
|
"eval_rouge2": 5.8889, |
|
"eval_rougeL": 17.5967, |
|
"eval_rougeLsum": 17.619, |
|
"eval_runtime": 928.0873, |
|
"eval_samples_per_second": 5.191, |
|
"eval_steps_per_second": 2.596, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.98330651997163e-05, |
|
"loss": 3.0498, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_gen_len": 21.9369, |
|
"eval_loss": 2.752108335494995, |
|
"eval_rouge1": 17.9056, |
|
"eval_rouge2": 5.7754, |
|
"eval_rougeL": 17.7624, |
|
"eval_rougeLsum": 17.7836, |
|
"eval_runtime": 872.1773, |
|
"eval_samples_per_second": 5.524, |
|
"eval_steps_per_second": 2.762, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.810317089625824e-05, |
|
"loss": 3.0566, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_gen_len": 22.2358, |
|
"eval_loss": 2.7468161582946777, |
|
"eval_rouge1": 18.6531, |
|
"eval_rouge2": 6.0538, |
|
"eval_rougeL": 18.5397, |
|
"eval_rougeLsum": 18.5038, |
|
"eval_runtime": 878.1686, |
|
"eval_samples_per_second": 5.486, |
|
"eval_steps_per_second": 2.743, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.637327659280018e-05, |
|
"loss": 3.0489, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_gen_len": 22.0108, |
|
"eval_loss": 2.7450203895568848, |
|
"eval_rouge1": 18.4869, |
|
"eval_rouge2": 5.9297, |
|
"eval_rougeL": 18.3139, |
|
"eval_rougeLsum": 18.3169, |
|
"eval_runtime": 856.3376, |
|
"eval_samples_per_second": 5.626, |
|
"eval_steps_per_second": 2.813, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.464338228934212e-05, |
|
"loss": 3.0247, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_gen_len": 22.2071, |
|
"eval_loss": 2.744947671890259, |
|
"eval_rouge1": 18.5192, |
|
"eval_rouge2": 5.9966, |
|
"eval_rougeL": 18.3721, |
|
"eval_rougeLsum": 18.3569, |
|
"eval_runtime": 887.9355, |
|
"eval_samples_per_second": 5.426, |
|
"eval_steps_per_second": 2.713, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.291348798588406e-05, |
|
"loss": 2.9877, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_gen_len": 21.4595, |
|
"eval_loss": 2.7159626483917236, |
|
"eval_rouge1": 18.1655, |
|
"eval_rouge2": 5.9294, |
|
"eval_rougeL": 18.0304, |
|
"eval_rougeLsum": 18.0836, |
|
"eval_runtime": 847.8313, |
|
"eval_samples_per_second": 5.683, |
|
"eval_steps_per_second": 2.841, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.118359368242601e-05, |
|
"loss": 3.0383, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_gen_len": 22.9732, |
|
"eval_loss": 2.720228433609009, |
|
"eval_rouge1": 18.4959, |
|
"eval_rouge2": 6.2413, |
|
"eval_rougeL": 18.3363, |
|
"eval_rougeLsum": 18.3431, |
|
"eval_runtime": 911.6184, |
|
"eval_samples_per_second": 5.285, |
|
"eval_steps_per_second": 2.643, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.945369937896794e-05, |
|
"loss": 3.041, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_gen_len": 21.9435, |
|
"eval_loss": 2.6947648525238037, |
|
"eval_rouge1": 17.5306, |
|
"eval_rouge2": 5.8119, |
|
"eval_rougeL": 17.4011, |
|
"eval_rougeLsum": 17.4149, |
|
"eval_runtime": 881.522, |
|
"eval_samples_per_second": 5.466, |
|
"eval_steps_per_second": 2.733, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.772380507550989e-05, |
|
"loss": 2.9285, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_gen_len": 22.5174, |
|
"eval_loss": 2.6956820487976074, |
|
"eval_rouge1": 18.6418, |
|
"eval_rouge2": 6.1394, |
|
"eval_rougeL": 18.514, |
|
"eval_rougeLsum": 18.4823, |
|
"eval_runtime": 891.2207, |
|
"eval_samples_per_second": 5.406, |
|
"eval_steps_per_second": 2.703, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.599391077205183e-05, |
|
"loss": 3.0556, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_gen_len": 22.9315, |
|
"eval_loss": 2.7000110149383545, |
|
"eval_rouge1": 18.7387, |
|
"eval_rouge2": 6.0585, |
|
"eval_rougeL": 18.5761, |
|
"eval_rougeLsum": 18.574, |
|
"eval_runtime": 896.5509, |
|
"eval_samples_per_second": 5.374, |
|
"eval_steps_per_second": 2.687, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.426401646859377e-05, |
|
"loss": 3.0033, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_gen_len": 22.4726, |
|
"eval_loss": 2.697437047958374, |
|
"eval_rouge1": 17.9387, |
|
"eval_rouge2": 6.1387, |
|
"eval_rougeL": 17.8271, |
|
"eval_rougeLsum": 17.8111, |
|
"eval_runtime": 892.312, |
|
"eval_samples_per_second": 5.399, |
|
"eval_steps_per_second": 2.7, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.253412216513571e-05, |
|
"loss": 2.9207, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_gen_len": 23.0274, |
|
"eval_loss": 2.699842929840088, |
|
"eval_rouge1": 18.6073, |
|
"eval_rouge2": 6.1906, |
|
"eval_rougeL": 18.3891, |
|
"eval_rougeLsum": 18.4103, |
|
"eval_runtime": 911.8188, |
|
"eval_samples_per_second": 5.284, |
|
"eval_steps_per_second": 2.642, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.080422786167765e-05, |
|
"loss": 2.8922, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_gen_len": 22.0697, |
|
"eval_loss": 2.67978572845459, |
|
"eval_rouge1": 18.4017, |
|
"eval_rouge2": 6.2244, |
|
"eval_rougeL": 18.2321, |
|
"eval_rougeLsum": 18.2296, |
|
"eval_runtime": 869.5208, |
|
"eval_samples_per_second": 5.541, |
|
"eval_steps_per_second": 2.77, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.9074333558219595e-05, |
|
"loss": 2.8938, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_gen_len": 21.7017, |
|
"eval_loss": 2.666600227355957, |
|
"eval_rouge1": 18.8016, |
|
"eval_rouge2": 6.2066, |
|
"eval_rougeL": 18.6411, |
|
"eval_rougeLsum": 18.6353, |
|
"eval_runtime": 845.165, |
|
"eval_samples_per_second": 5.701, |
|
"eval_steps_per_second": 2.85, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.7344439254761532e-05, |
|
"loss": 2.9124, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_gen_len": 21.4303, |
|
"eval_loss": 2.6605563163757324, |
|
"eval_rouge1": 18.7544, |
|
"eval_rouge2": 6.3533, |
|
"eval_rougeL": 18.5923, |
|
"eval_rougeLsum": 18.5739, |
|
"eval_runtime": 843.6756, |
|
"eval_samples_per_second": 5.711, |
|
"eval_steps_per_second": 2.855, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.5614544951303476e-05, |
|
"loss": 2.8597, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_gen_len": 22.3352, |
|
"eval_loss": 2.694676399230957, |
|
"eval_rouge1": 18.8672, |
|
"eval_rouge2": 6.4526, |
|
"eval_rougeL": 18.7416, |
|
"eval_rougeLsum": 18.7482, |
|
"eval_runtime": 884.9439, |
|
"eval_samples_per_second": 5.444, |
|
"eval_steps_per_second": 2.722, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.388465064784542e-05, |
|
"loss": 2.8435, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_gen_len": 21.9081, |
|
"eval_loss": 2.6738336086273193, |
|
"eval_rouge1": 18.9405, |
|
"eval_rouge2": 6.356, |
|
"eval_rougeL": 18.7791, |
|
"eval_rougeLsum": 18.7729, |
|
"eval_runtime": 862.6512, |
|
"eval_samples_per_second": 5.585, |
|
"eval_steps_per_second": 2.793, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.215475634438736e-05, |
|
"loss": 2.8672, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_gen_len": 21.8869, |
|
"eval_loss": 2.6733603477478027, |
|
"eval_rouge1": 18.7509, |
|
"eval_rouge2": 6.3991, |
|
"eval_rougeL": 18.6175, |
|
"eval_rougeLsum": 18.5828, |
|
"eval_runtime": 863.0328, |
|
"eval_samples_per_second": 5.583, |
|
"eval_steps_per_second": 2.791, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.04248620409293e-05, |
|
"loss": 2.899, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_gen_len": 21.7694, |
|
"eval_loss": 2.6575164794921875, |
|
"eval_rouge1": 18.5529, |
|
"eval_rouge2": 6.3489, |
|
"eval_rougeL": 18.4139, |
|
"eval_rougeLsum": 18.401, |
|
"eval_runtime": 860.1836, |
|
"eval_samples_per_second": 5.601, |
|
"eval_steps_per_second": 2.801, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.869496773747124e-05, |
|
"loss": 2.8616, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_gen_len": 21.5685, |
|
"eval_loss": 2.6484768390655518, |
|
"eval_rouge1": 18.7563, |
|
"eval_rouge2": 6.268, |
|
"eval_rougeL": 18.6368, |
|
"eval_rougeLsum": 18.6253, |
|
"eval_runtime": 854.7636, |
|
"eval_samples_per_second": 5.637, |
|
"eval_steps_per_second": 2.818, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.696507343401318e-05, |
|
"loss": 2.8937, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_gen_len": 22.3337, |
|
"eval_loss": 2.648592472076416, |
|
"eval_rouge1": 18.6525, |
|
"eval_rouge2": 6.3426, |
|
"eval_rougeL": 18.5184, |
|
"eval_rougeLsum": 18.5129, |
|
"eval_runtime": 882.9047, |
|
"eval_samples_per_second": 5.457, |
|
"eval_steps_per_second": 2.728, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.5235179130555125e-05, |
|
"loss": 2.8446, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_gen_len": 22.3331, |
|
"eval_loss": 2.657186508178711, |
|
"eval_rouge1": 18.6529, |
|
"eval_rouge2": 6.2655, |
|
"eval_rougeL": 18.4915, |
|
"eval_rougeLsum": 18.4764, |
|
"eval_runtime": 873.4651, |
|
"eval_samples_per_second": 5.516, |
|
"eval_steps_per_second": 2.758, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.3505284827097065e-05, |
|
"loss": 2.8676, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_gen_len": 22.132, |
|
"eval_loss": 2.6608123779296875, |
|
"eval_rouge1": 19.0913, |
|
"eval_rouge2": 6.494, |
|
"eval_rougeL": 18.929, |
|
"eval_rougeLsum": 18.9233, |
|
"eval_runtime": 867.8739, |
|
"eval_samples_per_second": 5.551, |
|
"eval_steps_per_second": 2.776, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.1775390523639005e-05, |
|
"loss": 2.8794, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_gen_len": 22.2414, |
|
"eval_loss": 2.6582980155944824, |
|
"eval_rouge1": 18.7648, |
|
"eval_rouge2": 6.459, |
|
"eval_rougeL": 18.6276, |
|
"eval_rougeLsum": 18.6125, |
|
"eval_runtime": 877.0489, |
|
"eval_samples_per_second": 5.493, |
|
"eval_steps_per_second": 2.747, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.0045496220180947e-05, |
|
"loss": 2.8836, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_gen_len": 22.2551, |
|
"eval_loss": 2.6512138843536377, |
|
"eval_rouge1": 18.7243, |
|
"eval_rouge2": 6.3865, |
|
"eval_rougeL": 18.5848, |
|
"eval_rougeLsum": 18.5763, |
|
"eval_runtime": 879.8072, |
|
"eval_samples_per_second": 5.476, |
|
"eval_steps_per_second": 2.738, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.315601916722888e-06, |
|
"loss": 2.8174, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_gen_len": 22.1243, |
|
"eval_loss": 2.640946865081787, |
|
"eval_rouge1": 18.9393, |
|
"eval_rouge2": 6.3914, |
|
"eval_rougeL": 18.7733, |
|
"eval_rougeLsum": 18.7715, |
|
"eval_runtime": 881.5366, |
|
"eval_samples_per_second": 5.465, |
|
"eval_steps_per_second": 2.733, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 6.58570761326483e-06, |
|
"loss": 2.8494, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_gen_len": 21.7638, |
|
"eval_loss": 2.639634132385254, |
|
"eval_rouge1": 18.6126, |
|
"eval_rouge2": 6.4389, |
|
"eval_rougeL": 18.4673, |
|
"eval_rougeLsum": 18.4516, |
|
"eval_runtime": 860.8517, |
|
"eval_samples_per_second": 5.597, |
|
"eval_steps_per_second": 2.798, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.855813309806771e-06, |
|
"loss": 2.9025, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_gen_len": 22.1086, |
|
"eval_loss": 2.63411021232605, |
|
"eval_rouge1": 18.7681, |
|
"eval_rouge2": 6.3762, |
|
"eval_rougeL": 18.6081, |
|
"eval_rougeLsum": 18.6173, |
|
"eval_runtime": 872.623, |
|
"eval_samples_per_second": 5.521, |
|
"eval_steps_per_second": 2.761, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.1259190063487117e-06, |
|
"loss": 2.8754, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_gen_len": 22.3497, |
|
"eval_loss": 2.638812780380249, |
|
"eval_rouge1": 19.0828, |
|
"eval_rouge2": 6.5203, |
|
"eval_rougeL": 18.9334, |
|
"eval_rougeLsum": 18.9285, |
|
"eval_runtime": 879.763, |
|
"eval_samples_per_second": 5.476, |
|
"eval_steps_per_second": 2.738, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.3960247028906535e-06, |
|
"loss": 2.8489, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_gen_len": 21.9321, |
|
"eval_loss": 2.637495756149292, |
|
"eval_rouge1": 18.9219, |
|
"eval_rouge2": 6.4922, |
|
"eval_rougeL": 18.763, |
|
"eval_rougeLsum": 18.7437, |
|
"eval_runtime": 865.2523, |
|
"eval_samples_per_second": 5.568, |
|
"eval_steps_per_second": 2.784, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 57807, |
|
"total_flos": 8.129568206380646e+16, |
|
"train_loss": 3.210809704903007, |
|
"train_runtime": 55943.4873, |
|
"train_samples_per_second": 2.067, |
|
"train_steps_per_second": 1.033 |
|
} |
|
], |
|
"max_steps": 57807, |
|
"num_train_epochs": 3, |
|
"total_flos": 8.129568206380646e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|