|
{ |
|
"best_metric": 0.9842381786339754, |
|
"best_model_checkpoint": "teacher-status-van-tiny-256-2/checkpoint-420", |
|
"epoch": 29.714285714285715, |
|
"eval_steps": 500, |
|
"global_step": 780, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.41025641025641e-06, |
|
"loss": 0.6928, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 0.6896, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.7700534759358288, |
|
"eval_f1_score": 0.8700906344410876, |
|
"eval_loss": 0.670672595500946, |
|
"eval_precision": 0.7700534759358288, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 2.6151, |
|
"eval_samples_per_second": 143.014, |
|
"eval_steps_per_second": 4.589, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 0.6763, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 0.6376, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.205128205128206e-05, |
|
"loss": 0.5438, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_accuracy": 0.7700534759358288, |
|
"eval_f1_score": 0.8700906344410876, |
|
"eval_loss": 0.43023183941841125, |
|
"eval_precision": 0.7700534759358288, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 2.5825, |
|
"eval_samples_per_second": 144.822, |
|
"eval_steps_per_second": 4.647, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.4252, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 4.4871794871794874e-05, |
|
"loss": 0.3756, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.8850267379679144, |
|
"eval_f1_score": 0.9284525790349417, |
|
"eval_loss": 0.2762328088283539, |
|
"eval_precision": 0.8913738019169329, |
|
"eval_recall": 0.96875, |
|
"eval_runtime": 2.711, |
|
"eval_samples_per_second": 137.957, |
|
"eval_steps_per_second": 4.426, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 4.985754985754986e-05, |
|
"loss": 0.3572, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 4.9145299145299147e-05, |
|
"loss": 0.3217, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 4.8433048433048433e-05, |
|
"loss": 0.3017, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9224598930481284, |
|
"eval_f1_score": 0.9502572898799314, |
|
"eval_loss": 0.20020952820777893, |
|
"eval_precision": 0.9389830508474576, |
|
"eval_recall": 0.9618055555555556, |
|
"eval_runtime": 2.7006, |
|
"eval_samples_per_second": 138.487, |
|
"eval_steps_per_second": 4.443, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 4.772079772079772e-05, |
|
"loss": 0.2563, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 4.700854700854701e-05, |
|
"loss": 0.2548, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.257, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.9385026737967914, |
|
"eval_f1_score": 0.9605488850771869, |
|
"eval_loss": 0.17939455807209015, |
|
"eval_precision": 0.9491525423728814, |
|
"eval_recall": 0.9722222222222222, |
|
"eval_runtime": 2.4102, |
|
"eval_samples_per_second": 155.173, |
|
"eval_steps_per_second": 4.979, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 4.558404558404559e-05, |
|
"loss": 0.2484, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 4.4871794871794874e-05, |
|
"loss": 0.2345, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"eval_accuracy": 0.9358288770053476, |
|
"eval_f1_score": 0.9581881533101045, |
|
"eval_loss": 0.14852212369441986, |
|
"eval_precision": 0.9615384615384616, |
|
"eval_recall": 0.9548611111111112, |
|
"eval_runtime": 2.3968, |
|
"eval_samples_per_second": 156.039, |
|
"eval_steps_per_second": 5.007, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 4.415954415954416e-05, |
|
"loss": 0.2514, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 4.344729344729345e-05, |
|
"loss": 0.2179, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 4.2735042735042735e-05, |
|
"loss": 0.2318, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.9438502673796791, |
|
"eval_f1_score": 0.9630931458699472, |
|
"eval_loss": 0.13021136820316315, |
|
"eval_precision": 0.9750889679715302, |
|
"eval_recall": 0.9513888888888888, |
|
"eval_runtime": 2.3767, |
|
"eval_samples_per_second": 157.361, |
|
"eval_steps_per_second": 5.049, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 4.202279202279202e-05, |
|
"loss": 0.2151, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 4.131054131054131e-05, |
|
"loss": 0.2311, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.05982905982906e-05, |
|
"loss": 0.2173, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9518716577540107, |
|
"eval_f1_score": 0.9688581314878894, |
|
"eval_loss": 0.12773053348064423, |
|
"eval_precision": 0.9655172413793104, |
|
"eval_recall": 0.9722222222222222, |
|
"eval_runtime": 2.3674, |
|
"eval_samples_per_second": 157.977, |
|
"eval_steps_per_second": 5.069, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 3.988603988603989e-05, |
|
"loss": 0.1934, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 3.9173789173789176e-05, |
|
"loss": 0.2058, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.9572192513368984, |
|
"eval_f1_score": 0.9722222222222222, |
|
"eval_loss": 0.1269279271364212, |
|
"eval_precision": 0.9722222222222222, |
|
"eval_recall": 0.9722222222222222, |
|
"eval_runtime": 2.2694, |
|
"eval_samples_per_second": 164.802, |
|
"eval_steps_per_second": 5.288, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.1959, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 3.774928774928775e-05, |
|
"loss": 0.2016, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.1955, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"eval_accuracy": 0.9572192513368984, |
|
"eval_f1_score": 0.9724137931034483, |
|
"eval_loss": 0.11462008953094482, |
|
"eval_precision": 0.9657534246575342, |
|
"eval_recall": 0.9791666666666666, |
|
"eval_runtime": 2.2006, |
|
"eval_samples_per_second": 169.954, |
|
"eval_steps_per_second": 5.453, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 3.6324786324786323e-05, |
|
"loss": 0.2038, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 3.561253561253561e-05, |
|
"loss": 0.2083, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.9652406417112299, |
|
"eval_f1_score": 0.9772329246935202, |
|
"eval_loss": 0.1083158627152443, |
|
"eval_precision": 0.9858657243816255, |
|
"eval_recall": 0.96875, |
|
"eval_runtime": 2.2496, |
|
"eval_samples_per_second": 166.25, |
|
"eval_steps_per_second": 5.334, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 3.4900284900284904e-05, |
|
"loss": 0.2107, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 3.418803418803419e-05, |
|
"loss": 0.1725, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 3.347578347578348e-05, |
|
"loss": 0.1886, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9598930481283422, |
|
"eval_f1_score": 0.9740932642487047, |
|
"eval_loss": 0.10481037944555283, |
|
"eval_precision": 0.9690721649484536, |
|
"eval_recall": 0.9791666666666666, |
|
"eval_runtime": 2.1714, |
|
"eval_samples_per_second": 172.242, |
|
"eval_steps_per_second": 5.526, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 12.19, |
|
"learning_rate": 3.2763532763532764e-05, |
|
"loss": 0.1703, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"learning_rate": 3.205128205128206e-05, |
|
"loss": 0.1817, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 3.133903133903134e-05, |
|
"loss": 0.1618, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.9625668449197861, |
|
"eval_f1_score": 0.9756944444444444, |
|
"eval_loss": 0.10334747284650803, |
|
"eval_precision": 0.9756944444444444, |
|
"eval_recall": 0.9756944444444444, |
|
"eval_runtime": 2.1514, |
|
"eval_samples_per_second": 173.842, |
|
"eval_steps_per_second": 5.578, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 3.0626780626780625e-05, |
|
"loss": 0.1733, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 2.9914529914529915e-05, |
|
"loss": 0.1908, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"eval_accuracy": 0.9598930481283422, |
|
"eval_f1_score": 0.9739130434782608, |
|
"eval_loss": 0.10439594089984894, |
|
"eval_precision": 0.975609756097561, |
|
"eval_recall": 0.9722222222222222, |
|
"eval_runtime": 2.2063, |
|
"eval_samples_per_second": 169.517, |
|
"eval_steps_per_second": 5.439, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 2.9202279202279202e-05, |
|
"loss": 0.1677, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 2.8490028490028492e-05, |
|
"loss": 0.1641, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.1594, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.9625668449197861, |
|
"eval_f1_score": 0.9757785467128027, |
|
"eval_loss": 0.09152617305517197, |
|
"eval_precision": 0.9724137931034482, |
|
"eval_recall": 0.9791666666666666, |
|
"eval_runtime": 2.146, |
|
"eval_samples_per_second": 174.274, |
|
"eval_steps_per_second": 5.592, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 2.706552706552707e-05, |
|
"loss": 0.1621, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 2.6353276353276356e-05, |
|
"loss": 0.1698, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 0.1474, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9759358288770054, |
|
"eval_f1_score": 0.9842381786339754, |
|
"eval_loss": 0.09159436821937561, |
|
"eval_precision": 0.9929328621908127, |
|
"eval_recall": 0.9756944444444444, |
|
"eval_runtime": 2.1573, |
|
"eval_samples_per_second": 173.367, |
|
"eval_steps_per_second": 5.563, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 16.38, |
|
"learning_rate": 2.492877492877493e-05, |
|
"loss": 0.1326, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 16.76, |
|
"learning_rate": 2.4216524216524217e-05, |
|
"loss": 0.1734, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.9652406417112299, |
|
"eval_f1_score": 0.9773123909249563, |
|
"eval_loss": 0.09513744711875916, |
|
"eval_precision": 0.9824561403508771, |
|
"eval_recall": 0.9722222222222222, |
|
"eval_runtime": 2.1734, |
|
"eval_samples_per_second": 172.078, |
|
"eval_steps_per_second": 5.521, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 2.3504273504273504e-05, |
|
"loss": 0.1827, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 2.2792022792022794e-05, |
|
"loss": 0.1419, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 17.9, |
|
"learning_rate": 2.207977207977208e-05, |
|
"loss": 0.1484, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 17.98, |
|
"eval_accuracy": 0.9705882352941176, |
|
"eval_f1_score": 0.9808695652173912, |
|
"eval_loss": 0.10494749993085861, |
|
"eval_precision": 0.9825783972125436, |
|
"eval_recall": 0.9791666666666666, |
|
"eval_runtime": 2.155, |
|
"eval_samples_per_second": 173.55, |
|
"eval_steps_per_second": 5.568, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 18.29, |
|
"learning_rate": 2.1367521367521368e-05, |
|
"loss": 0.1515, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 2.0655270655270654e-05, |
|
"loss": 0.1495, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"eval_accuracy": 0.9679144385026738, |
|
"eval_f1_score": 0.9790940766550522, |
|
"eval_loss": 0.09304243326187134, |
|
"eval_precision": 0.9825174825174825, |
|
"eval_recall": 0.9756944444444444, |
|
"eval_runtime": 2.1676, |
|
"eval_samples_per_second": 172.538, |
|
"eval_steps_per_second": 5.536, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 1.9943019943019945e-05, |
|
"loss": 0.1525, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 19.43, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 0.1575, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.1385, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9625668449197861, |
|
"eval_f1_score": 0.9758620689655172, |
|
"eval_loss": 0.095456063747406, |
|
"eval_precision": 0.9691780821917808, |
|
"eval_recall": 0.9826388888888888, |
|
"eval_runtime": 2.217, |
|
"eval_samples_per_second": 168.695, |
|
"eval_steps_per_second": 5.413, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 20.19, |
|
"learning_rate": 1.7806267806267805e-05, |
|
"loss": 0.139, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 20.57, |
|
"learning_rate": 1.7094017094017095e-05, |
|
"loss": 0.1567, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 20.95, |
|
"learning_rate": 1.6381766381766382e-05, |
|
"loss": 0.1492, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.9598930481283422, |
|
"eval_f1_score": 0.9740932642487047, |
|
"eval_loss": 0.09114021807909012, |
|
"eval_precision": 0.9690721649484536, |
|
"eval_recall": 0.9791666666666666, |
|
"eval_runtime": 2.1474, |
|
"eval_samples_per_second": 174.163, |
|
"eval_steps_per_second": 5.588, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 21.33, |
|
"learning_rate": 1.566951566951567e-05, |
|
"loss": 0.1664, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 21.71, |
|
"learning_rate": 1.4957264957264958e-05, |
|
"loss": 0.1401, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 21.98, |
|
"eval_accuracy": 0.9705882352941176, |
|
"eval_f1_score": 0.9808695652173912, |
|
"eval_loss": 0.09274759143590927, |
|
"eval_precision": 0.9825783972125436, |
|
"eval_recall": 0.9791666666666666, |
|
"eval_runtime": 2.1665, |
|
"eval_samples_per_second": 172.63, |
|
"eval_steps_per_second": 5.539, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 22.1, |
|
"learning_rate": 1.4245014245014246e-05, |
|
"loss": 0.1538, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 22.48, |
|
"learning_rate": 1.3532763532763535e-05, |
|
"loss": 0.1245, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 0.1288, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"eval_accuracy": 0.9705882352941176, |
|
"eval_f1_score": 0.9808695652173912, |
|
"eval_loss": 0.09401033818721771, |
|
"eval_precision": 0.9825783972125436, |
|
"eval_recall": 0.9791666666666666, |
|
"eval_runtime": 2.1911, |
|
"eval_samples_per_second": 170.692, |
|
"eval_steps_per_second": 5.477, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 23.24, |
|
"learning_rate": 1.2108262108262108e-05, |
|
"loss": 0.1422, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 23.62, |
|
"learning_rate": 1.1396011396011397e-05, |
|
"loss": 0.1262, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 1.0683760683760684e-05, |
|
"loss": 0.1304, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9652406417112299, |
|
"eval_f1_score": 0.9775474956822107, |
|
"eval_loss": 0.09132420271635056, |
|
"eval_precision": 0.9725085910652921, |
|
"eval_recall": 0.9826388888888888, |
|
"eval_runtime": 2.1685, |
|
"eval_samples_per_second": 172.467, |
|
"eval_steps_per_second": 5.534, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 24.38, |
|
"learning_rate": 9.971509971509972e-06, |
|
"loss": 0.1542, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 24.76, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.14, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.9652406417112299, |
|
"eval_f1_score": 0.9776247848537005, |
|
"eval_loss": 0.09787322580814362, |
|
"eval_precision": 0.9692832764505119, |
|
"eval_recall": 0.9861111111111112, |
|
"eval_runtime": 2.1719, |
|
"eval_samples_per_second": 172.201, |
|
"eval_steps_per_second": 5.525, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 25.14, |
|
"learning_rate": 8.547008547008548e-06, |
|
"loss": 0.1529, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 25.52, |
|
"learning_rate": 7.834757834757835e-06, |
|
"loss": 0.1367, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 25.9, |
|
"learning_rate": 7.122507122507123e-06, |
|
"loss": 0.1461, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 25.98, |
|
"eval_accuracy": 0.9705882352941176, |
|
"eval_f1_score": 0.9810017271157168, |
|
"eval_loss": 0.08736571669578552, |
|
"eval_precision": 0.9759450171821306, |
|
"eval_recall": 0.9861111111111112, |
|
"eval_runtime": 2.1664, |
|
"eval_samples_per_second": 172.637, |
|
"eval_steps_per_second": 5.539, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 26.29, |
|
"learning_rate": 6.41025641025641e-06, |
|
"loss": 0.1349, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 5.6980056980056985e-06, |
|
"loss": 0.1429, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"eval_accuracy": 0.9705882352941176, |
|
"eval_f1_score": 0.9808027923211169, |
|
"eval_loss": 0.08370037376880646, |
|
"eval_precision": 0.9859649122807017, |
|
"eval_recall": 0.9756944444444444, |
|
"eval_runtime": 2.1544, |
|
"eval_samples_per_second": 173.601, |
|
"eval_steps_per_second": 5.57, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 27.05, |
|
"learning_rate": 4.985754985754986e-06, |
|
"loss": 0.134, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 27.43, |
|
"learning_rate": 4.273504273504274e-06, |
|
"loss": 0.1366, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 27.81, |
|
"learning_rate": 3.5612535612535615e-06, |
|
"loss": 0.1444, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9679144385026738, |
|
"eval_f1_score": 0.9791666666666666, |
|
"eval_loss": 0.08762019872665405, |
|
"eval_precision": 0.9791666666666666, |
|
"eval_recall": 0.9791666666666666, |
|
"eval_runtime": 2.1635, |
|
"eval_samples_per_second": 172.868, |
|
"eval_steps_per_second": 5.547, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 28.19, |
|
"learning_rate": 2.8490028490028492e-06, |
|
"loss": 0.1167, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"learning_rate": 2.136752136752137e-06, |
|
"loss": 0.1129, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 28.95, |
|
"learning_rate": 1.4245014245014246e-06, |
|
"loss": 0.145, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.9705882352941176, |
|
"eval_f1_score": 0.9808695652173912, |
|
"eval_loss": 0.09031202644109726, |
|
"eval_precision": 0.9825783972125436, |
|
"eval_recall": 0.9791666666666666, |
|
"eval_runtime": 2.1632, |
|
"eval_samples_per_second": 172.893, |
|
"eval_steps_per_second": 5.547, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"learning_rate": 7.122507122507123e-07, |
|
"loss": 0.1334, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 29.71, |
|
"learning_rate": 0.0, |
|
"loss": 0.1445, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 29.71, |
|
"eval_accuracy": 0.9679144385026738, |
|
"eval_f1_score": 0.9790940766550522, |
|
"eval_loss": 0.08815235644578934, |
|
"eval_precision": 0.9825174825174825, |
|
"eval_recall": 0.9756944444444444, |
|
"eval_runtime": 3.4052, |
|
"eval_samples_per_second": 109.832, |
|
"eval_steps_per_second": 3.524, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 29.71, |
|
"step": 780, |
|
"total_flos": 4.5315392030480794e+17, |
|
"train_loss": 0.2131147768252935, |
|
"train_runtime": 1561.1697, |
|
"train_samples_per_second": 64.529, |
|
"train_steps_per_second": 0.5 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 780, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 4.5315392030480794e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|