|
{ |
|
"best_metric": 0.5253227408142999, |
|
"best_model_checkpoint": "/experiments/cosql/1ccc8b19-d4cc-42ca-b70f-7d796303d2e5/checkpoint-1856", |
|
"epoch": 231.9922480620155, |
|
"global_step": 1856, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001, |
|
"loss": 3.2653, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001, |
|
"loss": 2.2224, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1652, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8035, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.642, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.537, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4739, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4251, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3945, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.352, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3245, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2948, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.287, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2686, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2745, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2495, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2298, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_exact_match": 0.3644488579940417, |
|
"eval_exec": 0.3902681231380338, |
|
"eval_loss": 0.23653237521648407, |
|
"eval_runtime": 724.5845, |
|
"eval_samples_per_second": 1.794, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2242, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.205, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1932, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1937, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1978, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1747, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1669, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1734, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1615, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1551, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1519, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1476, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1483, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1478, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1292, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1285, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"eval_exact_match": 0.4170804369414101, |
|
"eval_exec": 0.45878848063555117, |
|
"eval_loss": 0.20163151621818542, |
|
"eval_runtime": 846.5253, |
|
"eval_samples_per_second": 1.536, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1407, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1228, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1324, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1214, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 18.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1224, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1128, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 19.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1016, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.107, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 20.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1116, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0997, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 21.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1025, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0973, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0996, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0917, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 23.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0899, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 23.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0896, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 23.99, |
|
"eval_exact_match": 0.4816285998013903, |
|
"eval_exec": 0.5104270109235353, |
|
"eval_loss": 0.19154271483421326, |
|
"eval_runtime": 788.1761, |
|
"eval_samples_per_second": 1.649, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 24.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0909, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0867, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 25.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.089, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 25.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0888, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 26.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0946, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0803, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0769, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 27.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.078, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 28.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.084, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.07, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 29.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0727, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0719, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 30.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0727, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0759, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 31.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0708, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 31.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0674, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 31.99, |
|
"eval_exact_match": 0.5014895729890765, |
|
"eval_exec": 0.5382323733862959, |
|
"eval_loss": 0.19477179646492004, |
|
"eval_runtime": 786.6121, |
|
"eval_samples_per_second": 1.653, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0673, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 32.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0716, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 33.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0658, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 33.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0564, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 34.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0604, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 34.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0632, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 35.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0611, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 35.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0577, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 36.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0602, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 36.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0556, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0588, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 37.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0584, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 38.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0537, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 38.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0538, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 39.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0517, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0542, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"eval_exact_match": 0.5173783515392254, |
|
"eval_exec": 0.5501489572989077, |
|
"eval_loss": 0.21476374566555023, |
|
"eval_runtime": 722.7734, |
|
"eval_samples_per_second": 1.799, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 40.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.052, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 40.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0552, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 41.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0474, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 41.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0456, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 42.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0517, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 42.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0484, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 43.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0432, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 43.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0446, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 44.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0443, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 44.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0425, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 45.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0452, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 45.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0429, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 46.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0471, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 46.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0433, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 47.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0416, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 47.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0397, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 47.99, |
|
"eval_exact_match": 0.5143992055610725, |
|
"eval_exec": 0.5491559086395233, |
|
"eval_loss": 0.22859126329421997, |
|
"eval_runtime": 751.9885, |
|
"eval_samples_per_second": 1.729, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 48.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0449, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 48.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0404, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 49.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0384, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 49.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0378, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 50.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0405, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 50.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0391, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 51.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0377, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 51.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0349, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 52.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0328, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 52.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0363, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 53.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0355, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 53.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0317, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 54.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0308, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 54.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0303, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 55.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0313, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 55.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0351, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 55.99, |
|
"eval_exact_match": 0.5243296921549155, |
|
"eval_exec": 0.5561072492552135, |
|
"eval_loss": 0.2355286180973053, |
|
"eval_runtime": 746.4078, |
|
"eval_samples_per_second": 1.742, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 56.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0307, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 56.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0296, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 57.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0295, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 57.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0316, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 58.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0283, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 58.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0321, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 59.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0296, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 59.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0262, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 60.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0259, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 60.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.027, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 61.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0263, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 61.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0259, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0275, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 62.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0262, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 63.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0252, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 63.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.025, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 63.99, |
|
"eval_exact_match": 0.5094339622641509, |
|
"eval_exec": 0.5422045680238332, |
|
"eval_loss": 0.2562254071235657, |
|
"eval_runtime": 720.3713, |
|
"eval_samples_per_second": 1.805, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 64.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0256, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 64.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0272, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 65.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0293, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 65.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0231, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 66.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0232, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 66.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0253, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 67.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0224, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 67.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.021, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 68.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0206, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 68.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0229, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 69.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0243, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 69.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0211, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 70.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0208, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 70.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0225, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 71.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0209, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 71.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0225, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 71.99, |
|
"eval_exact_match": 0.5173783515392254, |
|
"eval_exec": 0.5461767626613704, |
|
"eval_loss": 0.2805687189102173, |
|
"eval_runtime": 749.8642, |
|
"eval_samples_per_second": 1.734, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 72.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0188, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 72.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0209, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 73.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0193, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 73.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0221, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 74.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0193, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 74.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0184, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 75.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0169, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 75.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0177, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 76.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0189, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 76.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0185, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 77.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0166, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 77.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0167, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 78.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0145, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 78.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0162, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 79.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0168, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 79.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0172, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 79.99, |
|
"eval_exact_match": 0.5134061569016882, |
|
"eval_exec": 0.5431976166832175, |
|
"eval_loss": 0.2822588086128235, |
|
"eval_runtime": 735.1307, |
|
"eval_samples_per_second": 1.768, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 80.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.017, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 80.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0166, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 81.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0157, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 81.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0182, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 82.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0168, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 82.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0145, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 83.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0142, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 83.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0139, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 84.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0151, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 84.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0137, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 85.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0137, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 85.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0119, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 86.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0128, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 86.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0134, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 87.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0143, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 87.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0189, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 87.99, |
|
"eval_exact_match": 0.5134061569016882, |
|
"eval_exec": 0.5441906653426017, |
|
"eval_loss": 0.285916805267334, |
|
"eval_runtime": 740.5163, |
|
"eval_samples_per_second": 1.756, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 88.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0125, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 88.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0122, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 89.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0141, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 89.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0132, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 90.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0152, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 90.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0118, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 91.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0105, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 91.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0117, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 92.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0119, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 92.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0126, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 93.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0118, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 93.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0143, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 94.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0127, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 94.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0143, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 95.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0107, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 95.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0112, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 95.99, |
|
"eval_exact_match": 0.506454816285998, |
|
"eval_exec": 0.5461767626613704, |
|
"eval_loss": 0.3350318670272827, |
|
"eval_runtime": 791.2409, |
|
"eval_samples_per_second": 1.643, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 96.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0113, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 96.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0107, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 97.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0112, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 97.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.011, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 98.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.012, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 98.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0122, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 99.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0113, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 99.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0101, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 100.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0089, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 100.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.011, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 101.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0086, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 101.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0097, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 102.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.011, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 102.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0085, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 103.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0086, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 103.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0094, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 103.99, |
|
"eval_exact_match": 0.5183714001986097, |
|
"eval_exec": 0.5441906653426017, |
|
"eval_loss": 0.34131428599357605, |
|
"eval_runtime": 681.4487, |
|
"eval_samples_per_second": 1.908, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 104.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0111, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 104.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0095, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 105.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0083, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 105.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0083, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 106.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0085, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 106.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0086, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 107.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0085, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 107.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0088, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 108.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0094, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 108.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0084, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 109.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0075, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 109.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0093, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 110.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0091, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 110.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0088, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 111.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0083, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 111.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0086, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 111.99, |
|
"eval_exact_match": 0.5243296921549155, |
|
"eval_exec": 0.5521350546176763, |
|
"eval_loss": 0.3550397753715515, |
|
"eval_runtime": 717.3828, |
|
"eval_samples_per_second": 1.812, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 112.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0086, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 112.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0085, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 113.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0076, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 113.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0093, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 114.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0068, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 114.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0075, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 115.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0073, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 115.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0071, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 116.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0077, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 116.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.006, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 117.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0065, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 117.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.008, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 118.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0069, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 118.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0082, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 119.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0058, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 119.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0072, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 119.99, |
|
"eval_exact_match": 0.5104270109235353, |
|
"eval_exec": 0.5431976166832175, |
|
"eval_loss": 0.3599448800086975, |
|
"eval_runtime": 731.1991, |
|
"eval_samples_per_second": 1.778, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 120.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0062, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 120.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0066, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 121.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0054, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 121.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0055, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 122.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0054, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 122.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0066, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 123.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0068, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 123.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0059, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 124.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0065, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 124.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0067, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 125.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0061, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 125.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0059, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 126.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0056, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 126.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0059, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 127.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0062, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 127.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.006, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 127.99, |
|
"eval_exact_match": 0.5203574975173784, |
|
"eval_exec": 0.5501489572989077, |
|
"eval_loss": 0.39113467931747437, |
|
"eval_runtime": 888.0535, |
|
"eval_samples_per_second": 1.464, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 128.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0066, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 128.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0065, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 129.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0047, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 129.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.005, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 130.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 130.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0053, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 131.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0063, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 131.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0095, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 132.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0058, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 132.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0046, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 133.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0067, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 133.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0056, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 134.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 134.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0059, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 135.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0049, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 135.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0054, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 135.99, |
|
"eval_exact_match": 0.5084409136047666, |
|
"eval_exec": 0.535253227408143, |
|
"eval_loss": 0.3826310336589813, |
|
"eval_runtime": 944.2579, |
|
"eval_samples_per_second": 1.377, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 136.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 136.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 137.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0055, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 137.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 138.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0054, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 138.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0051, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 139.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0047, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 139.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0045, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 140.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0087, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 140.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0043, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 141.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0041, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 141.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0047, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 142.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0046, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 142.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0042, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 143.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0066, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 143.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0052, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 143.99, |
|
"eval_exact_match": 0.506454816285998, |
|
"eval_exec": 0.5402184707050646, |
|
"eval_loss": 0.39830923080444336, |
|
"eval_runtime": 975.7311, |
|
"eval_samples_per_second": 1.332, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 144.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 144.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0051, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 145.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0036, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 145.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.004, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 146.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 146.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0041, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 147.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 147.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0043, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 148.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0041, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 148.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 149.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 149.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.004, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 150.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0046, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 150.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0063, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 151.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0043, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 151.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0084, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 151.99, |
|
"eval_exact_match": 0.44885799404170806, |
|
"eval_exec": 0.5014895729890765, |
|
"eval_loss": 0.34375911951065063, |
|
"eval_runtime": 974.1005, |
|
"eval_samples_per_second": 1.335, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 152.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0114, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 152.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0044, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 153.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0037, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 153.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0045, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 154.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 154.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 155.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 155.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 156.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0039, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 156.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0037, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 157.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 157.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 158.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 158.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 159.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 159.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0036, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 159.99, |
|
"eval_exact_match": 0.5153922542204568, |
|
"eval_exec": 0.5441906653426017, |
|
"eval_loss": 0.41516825556755066, |
|
"eval_runtime": 976.5716, |
|
"eval_samples_per_second": 1.331, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 160.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 160.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 161.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 161.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0039, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 162.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 162.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0037, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 163.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 163.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 164.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0046, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 164.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 165.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 165.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 166.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0039, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 166.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 167.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 167.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 167.99, |
|
"eval_exact_match": 0.5143992055610725, |
|
"eval_exec": 0.5382323733862959, |
|
"eval_loss": 0.4463094472885132, |
|
"eval_runtime": 955.823, |
|
"eval_samples_per_second": 1.36, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 168.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 168.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0049, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 169.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0042, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 169.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0037, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 170.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 170.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 171.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 171.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 172.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 172.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 173.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 173.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 174.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 174.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 175.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 175.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 175.99, |
|
"eval_exact_match": 0.5074478649453823, |
|
"eval_exec": 0.5342601787487586, |
|
"eval_loss": 0.4188894033432007, |
|
"eval_runtime": 941.4278, |
|
"eval_samples_per_second": 1.381, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 176.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0036, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 176.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 177.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 177.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 178.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 178.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 179.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0057, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 179.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 180.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 180.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 181.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 181.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 182.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0049, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 182.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0051, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 183.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 183.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 183.99, |
|
"eval_exact_match": 0.519364448857994, |
|
"eval_exec": 0.5441906653426017, |
|
"eval_loss": 0.422220915555954, |
|
"eval_runtime": 973.3607, |
|
"eval_samples_per_second": 1.336, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 184.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 184.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 185.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 185.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 186.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0037, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 186.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 187.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 187.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 188.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 188.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 189.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 189.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 190.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 190.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 191.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 191.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 191.99, |
|
"eval_exact_match": 0.5203574975173784, |
|
"eval_exec": 0.5491559086395233, |
|
"eval_loss": 0.44573745131492615, |
|
"eval_runtime": 974.7523, |
|
"eval_samples_per_second": 1.334, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 192.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 192.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 193.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 193.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 194.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 194.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 195.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 195.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 196.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 196.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 197.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 197.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 198.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 198.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 199.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 199.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 199.99, |
|
"eval_exact_match": 0.5114200595829196, |
|
"eval_exec": 0.5521350546176763, |
|
"eval_loss": 0.4328438639640808, |
|
"eval_runtime": 959.4133, |
|
"eval_samples_per_second": 1.355, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 200.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 200.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 201.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 201.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 202.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 202.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 203.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 203.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 204.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 204.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 205.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 205.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 206.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 206.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 207.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 207.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 207.99, |
|
"eval_exact_match": 0.5153922542204568, |
|
"eval_exec": 0.5422045680238332, |
|
"eval_loss": 0.46325209736824036, |
|
"eval_runtime": 948.9363, |
|
"eval_samples_per_second": 1.37, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 208.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 208.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 209.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 209.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 210.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 1684 |
|
}, |
|
{ |
|
"epoch": 210.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 211.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 211.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 212.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 212.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 213.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 213.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 214.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 214.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 215.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 1724 |
|
}, |
|
{ |
|
"epoch": 215.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 215.99, |
|
"eval_exact_match": 0.5134061569016882, |
|
"eval_exec": 0.551142005958292, |
|
"eval_loss": 0.4671519100666046, |
|
"eval_runtime": 968.39, |
|
"eval_samples_per_second": 1.342, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 216.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 216.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 217.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 217.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 218.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 218.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 219.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1756 |
|
}, |
|
{ |
|
"epoch": 219.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 220.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 220.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 221.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 1772 |
|
}, |
|
{ |
|
"epoch": 221.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 222.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 222.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 223.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 223.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 223.99, |
|
"eval_exact_match": 0.5143992055610725, |
|
"eval_exec": 0.5402184707050646, |
|
"eval_loss": 0.4584057629108429, |
|
"eval_runtime": 959.2229, |
|
"eval_samples_per_second": 1.355, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 224.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 1796 |
|
}, |
|
{ |
|
"epoch": 224.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 225.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 1804 |
|
}, |
|
{ |
|
"epoch": 225.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1808 |
|
}, |
|
{ |
|
"epoch": 226.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 226.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 1816 |
|
}, |
|
{ |
|
"epoch": 227.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 227.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 228.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1828 |
|
}, |
|
{ |
|
"epoch": 228.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 1832 |
|
}, |
|
{ |
|
"epoch": 229.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0063, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 229.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 230.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1844 |
|
}, |
|
{ |
|
"epoch": 230.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 231.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 1852 |
|
}, |
|
{ |
|
"epoch": 231.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 231.99, |
|
"eval_exact_match": 0.5253227408142999, |
|
"eval_exec": 0.5561072492552135, |
|
"eval_loss": 0.44773775339126587, |
|
"eval_runtime": 980.451, |
|
"eval_samples_per_second": 1.326, |
|
"step": 1856 |
|
} |
|
], |
|
"max_steps": 24576, |
|
"num_train_epochs": 3072, |
|
"total_flos": 7.797838246859244e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|