|
{ |
|
"best_metric": 0.6666666666666666, |
|
"best_model_checkpoint": "videomae-base-short-ssv2-finetuned-ct_cpc/checkpoint-47", |
|
"epoch": 48.01913043478261, |
|
"eval_steps": 500, |
|
"global_step": 2300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.347826086956522e-07, |
|
"loss": 2.0243, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.695652173913044e-07, |
|
"loss": 1.8063, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.3043478260869566e-06, |
|
"loss": 1.6927, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.7391304347826088e-06, |
|
"loss": 1.4832, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.1202621459960938, |
|
"eval_runtime": 7.2119, |
|
"eval_samples_per_second": 8.32, |
|
"eval_steps_per_second": 1.387, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.173913043478261e-06, |
|
"loss": 1.3436, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.6086956521739132e-06, |
|
"loss": 1.2714, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.043478260869566e-06, |
|
"loss": 1.1191, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.4782608695652175e-06, |
|
"loss": 0.9807, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.91304347826087e-06, |
|
"loss": 1.32, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.050528645515442, |
|
"eval_runtime": 6.9232, |
|
"eval_samples_per_second": 8.667, |
|
"eval_steps_per_second": 1.444, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.347826086956522e-06, |
|
"loss": 1.0838, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.782608695652174e-06, |
|
"loss": 1.1624, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.2173913043478265e-06, |
|
"loss": 0.9414, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.652173913043479e-06, |
|
"loss": 1.2583, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 6.086956521739132e-06, |
|
"loss": 1.1539, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.0524587631225586, |
|
"eval_runtime": 7.8366, |
|
"eval_samples_per_second": 7.656, |
|
"eval_steps_per_second": 1.276, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 6.521739130434783e-06, |
|
"loss": 1.0695, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 6.956521739130435e-06, |
|
"loss": 0.9282, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 7.391304347826087e-06, |
|
"loss": 0.9621, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 7.82608695652174e-06, |
|
"loss": 1.2399, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.0353821516036987, |
|
"eval_runtime": 7.1279, |
|
"eval_samples_per_second": 8.418, |
|
"eval_steps_per_second": 1.403, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 8.260869565217392e-06, |
|
"loss": 1.1451, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 8.695652173913044e-06, |
|
"loss": 1.1118, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 9.130434782608697e-06, |
|
"loss": 1.1014, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 9.565217391304349e-06, |
|
"loss": 1.1536, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9928, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.0516277551651, |
|
"eval_runtime": 6.8144, |
|
"eval_samples_per_second": 8.805, |
|
"eval_steps_per_second": 1.467, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 9.95169082125604e-06, |
|
"loss": 1.1887, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 9.903381642512077e-06, |
|
"loss": 1.176, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 9.855072463768118e-06, |
|
"loss": 1.0889, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 9.806763285024155e-06, |
|
"loss": 0.8653, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 9.758454106280194e-06, |
|
"loss": 1.0054, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.0296636819839478, |
|
"eval_runtime": 6.8135, |
|
"eval_samples_per_second": 8.806, |
|
"eval_steps_per_second": 1.468, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 9.710144927536233e-06, |
|
"loss": 1.1702, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 9.66183574879227e-06, |
|
"loss": 1.0608, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 9.61352657004831e-06, |
|
"loss": 1.0026, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 9.565217391304349e-06, |
|
"loss": 1.1166, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.024717092514038, |
|
"eval_runtime": 6.7961, |
|
"eval_samples_per_second": 8.829, |
|
"eval_steps_per_second": 1.471, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 9.516908212560388e-06, |
|
"loss": 1.0851, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 9.468599033816425e-06, |
|
"loss": 0.9748, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 9.420289855072464e-06, |
|
"loss": 1.0743, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 9.371980676328503e-06, |
|
"loss": 0.9153, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 9.323671497584542e-06, |
|
"loss": 1.2715, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.0097945928573608, |
|
"eval_runtime": 6.7899, |
|
"eval_samples_per_second": 8.837, |
|
"eval_steps_per_second": 1.473, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 9.275362318840581e-06, |
|
"loss": 0.9517, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 9.227053140096618e-06, |
|
"loss": 0.8985, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 9.178743961352658e-06, |
|
"loss": 1.1341, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 9.130434782608697e-06, |
|
"loss": 1.0505, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 9.082125603864736e-06, |
|
"loss": 1.0317, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.040429949760437, |
|
"eval_runtime": 6.9971, |
|
"eval_samples_per_second": 8.575, |
|
"eval_steps_per_second": 1.429, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 9.033816425120775e-06, |
|
"loss": 1.1947, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 8.985507246376812e-06, |
|
"loss": 0.9361, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 8.937198067632851e-06, |
|
"loss": 0.9432, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 1.0958, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 8.840579710144929e-06, |
|
"loss": 1.1258, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.0053784847259521, |
|
"eval_runtime": 7.1441, |
|
"eval_samples_per_second": 8.399, |
|
"eval_steps_per_second": 1.4, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 8.792270531400966e-06, |
|
"loss": 1.0206, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 8.743961352657005e-06, |
|
"loss": 1.0568, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 8.695652173913044e-06, |
|
"loss": 1.1054, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 8.647342995169082e-06, |
|
"loss": 0.8991, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.0477041006088257, |
|
"eval_runtime": 7.2799, |
|
"eval_samples_per_second": 8.242, |
|
"eval_steps_per_second": 1.374, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 8.599033816425122e-06, |
|
"loss": 1.167, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 8.55072463768116e-06, |
|
"loss": 0.9754, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 8.502415458937199e-06, |
|
"loss": 1.0365, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 8.454106280193238e-06, |
|
"loss": 0.9409, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 8.405797101449275e-06, |
|
"loss": 0.8957, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.0607702732086182, |
|
"eval_runtime": 6.9041, |
|
"eval_samples_per_second": 8.69, |
|
"eval_steps_per_second": 1.448, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 8.357487922705316e-06, |
|
"loss": 0.8378, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 8.309178743961353e-06, |
|
"loss": 0.8314, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 8.260869565217392e-06, |
|
"loss": 1.092, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 8.212560386473431e-06, |
|
"loss": 1.0079, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 8.164251207729469e-06, |
|
"loss": 1.078, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.0165636539459229, |
|
"eval_runtime": 7.648, |
|
"eval_samples_per_second": 7.845, |
|
"eval_steps_per_second": 1.308, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 8.115942028985508e-06, |
|
"loss": 0.8161, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 8.067632850241547e-06, |
|
"loss": 1.2458, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 8.019323671497586e-06, |
|
"loss": 0.9597, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 7.971014492753623e-06, |
|
"loss": 0.9067, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.2110427618026733, |
|
"eval_runtime": 7.2284, |
|
"eval_samples_per_second": 8.301, |
|
"eval_steps_per_second": 1.383, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 7.922705314009662e-06, |
|
"loss": 0.9322, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 7.874396135265701e-06, |
|
"loss": 0.993, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 7.82608695652174e-06, |
|
"loss": 0.7774, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.9399, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 7.729468599033817e-06, |
|
"loss": 1.0383, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 1.0380301475524902, |
|
"eval_runtime": 6.9013, |
|
"eval_samples_per_second": 8.694, |
|
"eval_steps_per_second": 1.449, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 7.681159420289856e-06, |
|
"loss": 0.7043, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 7.632850241545895e-06, |
|
"loss": 1.0185, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 7.584541062801934e-06, |
|
"loss": 0.9907, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 7.536231884057972e-06, |
|
"loss": 0.8602, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 7.48792270531401e-06, |
|
"loss": 1.2244, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.1040699481964111, |
|
"eval_runtime": 7.4605, |
|
"eval_samples_per_second": 8.042, |
|
"eval_steps_per_second": 1.34, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 7.439613526570049e-06, |
|
"loss": 0.9108, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 7.391304347826087e-06, |
|
"loss": 0.8811, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 7.342995169082127e-06, |
|
"loss": 0.8487, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 7.294685990338165e-06, |
|
"loss": 1.0364, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 1.0123381614685059, |
|
"eval_runtime": 6.8033, |
|
"eval_samples_per_second": 8.819, |
|
"eval_steps_per_second": 1.47, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 7.246376811594203e-06, |
|
"loss": 0.8189, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 7.1980676328502416e-06, |
|
"loss": 0.8733, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 7.149758454106281e-06, |
|
"loss": 0.6958, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 7.10144927536232e-06, |
|
"loss": 0.782, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 7.053140096618359e-06, |
|
"loss": 0.9662, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"eval_accuracy": 0.6333333333333333, |
|
"eval_loss": 1.1802695989608765, |
|
"eval_runtime": 7.2583, |
|
"eval_samples_per_second": 8.266, |
|
"eval_steps_per_second": 1.378, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 7.004830917874397e-06, |
|
"loss": 0.9205, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 6.956521739130435e-06, |
|
"loss": 0.8999, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 6.908212560386473e-06, |
|
"loss": 0.6096, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 6.859903381642513e-06, |
|
"loss": 0.79, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 6.811594202898551e-06, |
|
"loss": 0.9299, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"eval_accuracy": 0.6333333333333333, |
|
"eval_loss": 1.0910345315933228, |
|
"eval_runtime": 6.9398, |
|
"eval_samples_per_second": 8.646, |
|
"eval_steps_per_second": 1.441, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 6.76328502415459e-06, |
|
"loss": 0.8715, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 6.7149758454106285e-06, |
|
"loss": 0.8882, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.8562, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"learning_rate": 6.6183574879227065e-06, |
|
"loss": 0.9704, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"learning_rate": 6.570048309178745e-06, |
|
"loss": 0.877, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 1.0182613134384155, |
|
"eval_runtime": 7.0493, |
|
"eval_samples_per_second": 8.511, |
|
"eval_steps_per_second": 1.419, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 6.521739130434783e-06, |
|
"loss": 0.8358, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 6.473429951690822e-06, |
|
"loss": 0.7785, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 6.42512077294686e-06, |
|
"loss": 0.7644, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"learning_rate": 6.376811594202898e-06, |
|
"loss": 0.8649, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"eval_accuracy": 0.6166666666666667, |
|
"eval_loss": 1.1414852142333984, |
|
"eval_runtime": 7.0701, |
|
"eval_samples_per_second": 8.486, |
|
"eval_steps_per_second": 1.414, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 6.328502415458938e-06, |
|
"loss": 0.8795, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 6.280193236714976e-06, |
|
"loss": 0.7464, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 6.2318840579710145e-06, |
|
"loss": 0.7973, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 6.1835748792270535e-06, |
|
"loss": 0.9681, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"learning_rate": 6.135265700483092e-06, |
|
"loss": 0.8841, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"eval_accuracy": 0.6333333333333333, |
|
"eval_loss": 1.0975487232208252, |
|
"eval_runtime": 7.726, |
|
"eval_samples_per_second": 7.766, |
|
"eval_steps_per_second": 1.294, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 6.086956521739132e-06, |
|
"loss": 0.6596, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 6.03864734299517e-06, |
|
"loss": 0.7252, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 5.990338164251208e-06, |
|
"loss": 0.7564, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 22.02, |
|
"learning_rate": 5.942028985507247e-06, |
|
"loss": 0.7406, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 22.02, |
|
"learning_rate": 5.893719806763285e-06, |
|
"loss": 0.6276, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 22.02, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 1.072813868522644, |
|
"eval_runtime": 7.252, |
|
"eval_samples_per_second": 8.274, |
|
"eval_steps_per_second": 1.379, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 5.845410628019324e-06, |
|
"loss": 0.7456, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 5.797101449275363e-06, |
|
"loss": 0.5497, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 5.748792270531401e-06, |
|
"loss": 0.875, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"learning_rate": 5.70048309178744e-06, |
|
"loss": 0.7974, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 1.1675584316253662, |
|
"eval_runtime": 6.8242, |
|
"eval_samples_per_second": 8.792, |
|
"eval_steps_per_second": 1.465, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 5.652173913043479e-06, |
|
"loss": 0.8186, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 5.603864734299518e-06, |
|
"loss": 0.7628, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.4592, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 5.507246376811595e-06, |
|
"loss": 0.6761, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 5.458937198067633e-06, |
|
"loss": 0.7381, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 1.1623855829238892, |
|
"eval_runtime": 6.815, |
|
"eval_samples_per_second": 8.804, |
|
"eval_steps_per_second": 1.467, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 5.410628019323671e-06, |
|
"loss": 0.8723, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 5.362318840579711e-06, |
|
"loss": 0.6832, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 5.314009661835749e-06, |
|
"loss": 0.7768, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"learning_rate": 5.265700483091788e-06, |
|
"loss": 0.6568, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"learning_rate": 5.2173913043478265e-06, |
|
"loss": 0.4807, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 1.1929564476013184, |
|
"eval_runtime": 6.8394, |
|
"eval_samples_per_second": 8.773, |
|
"eval_steps_per_second": 1.462, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 5.169082125603865e-06, |
|
"loss": 0.7603, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 5.1207729468599045e-06, |
|
"loss": 0.647, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 5.072463768115943e-06, |
|
"loss": 0.67, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"learning_rate": 5.024154589371981e-06, |
|
"loss": 0.6812, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 1.1511147022247314, |
|
"eval_runtime": 7.0933, |
|
"eval_samples_per_second": 8.459, |
|
"eval_steps_per_second": 1.41, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 4.97584541062802e-06, |
|
"loss": 0.6945, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 4.927536231884059e-06, |
|
"loss": 0.6031, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 4.879227053140097e-06, |
|
"loss": 0.659, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 4.830917874396135e-06, |
|
"loss": 0.659, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"learning_rate": 4.782608695652174e-06, |
|
"loss": 0.5802, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"eval_accuracy": 0.6166666666666667, |
|
"eval_loss": 1.2314832210540771, |
|
"eval_runtime": 7.5214, |
|
"eval_samples_per_second": 7.977, |
|
"eval_steps_per_second": 1.33, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 4.7342995169082125e-06, |
|
"loss": 0.5988, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 4.6859903381642516e-06, |
|
"loss": 0.6461, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 4.637681159420291e-06, |
|
"loss": 0.729, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 4.589371980676329e-06, |
|
"loss": 0.5582, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"learning_rate": 4.541062801932368e-06, |
|
"loss": 0.5943, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 1.1640959978103638, |
|
"eval_runtime": 7.4811, |
|
"eval_samples_per_second": 8.02, |
|
"eval_steps_per_second": 1.337, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 4.492753623188406e-06, |
|
"loss": 0.4759, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.7748, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 4.396135265700483e-06, |
|
"loss": 0.5033, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"learning_rate": 4.347826086956522e-06, |
|
"loss": 0.8771, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"learning_rate": 4.299516908212561e-06, |
|
"loss": 0.5416, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 1.0962903499603271, |
|
"eval_runtime": 7.0002, |
|
"eval_samples_per_second": 8.571, |
|
"eval_steps_per_second": 1.429, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 4.251207729468599e-06, |
|
"loss": 0.6134, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 4.202898550724638e-06, |
|
"loss": 0.5366, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 4.154589371980677e-06, |
|
"loss": 0.5575, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 30.02, |
|
"learning_rate": 4.106280193236716e-06, |
|
"loss": 0.6676, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 30.02, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 1.1130536794662476, |
|
"eval_runtime": 7.3167, |
|
"eval_samples_per_second": 8.2, |
|
"eval_steps_per_second": 1.367, |
|
"step": 1457 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 4.057971014492754e-06, |
|
"loss": 0.5169, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 4.009661835748793e-06, |
|
"loss": 0.5593, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 3.961352657004831e-06, |
|
"loss": 0.7655, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 3.91304347826087e-06, |
|
"loss": 0.6803, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 31.02, |
|
"learning_rate": 3.864734299516908e-06, |
|
"loss": 0.4085, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 31.02, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 1.2947934865951538, |
|
"eval_runtime": 6.9055, |
|
"eval_samples_per_second": 8.689, |
|
"eval_steps_per_second": 1.448, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 3.816425120772947e-06, |
|
"loss": 0.7962, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 3.768115942028986e-06, |
|
"loss": 0.742, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 3.7198067632850245e-06, |
|
"loss": 0.4275, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 32.02, |
|
"learning_rate": 3.6714975845410635e-06, |
|
"loss": 0.6044, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 32.02, |
|
"learning_rate": 3.6231884057971017e-06, |
|
"loss": 0.4548, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 32.02, |
|
"eval_accuracy": 0.6333333333333333, |
|
"eval_loss": 1.2788442373275757, |
|
"eval_runtime": 6.7575, |
|
"eval_samples_per_second": 8.879, |
|
"eval_steps_per_second": 1.48, |
|
"step": 1551 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 3.5748792270531403e-06, |
|
"loss": 0.7688, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 3.5265700483091793e-06, |
|
"loss": 0.489, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 3.4782608695652175e-06, |
|
"loss": 0.545, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"learning_rate": 3.4299516908212565e-06, |
|
"loss": 0.4351, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 1.1921883821487427, |
|
"eval_runtime": 6.9013, |
|
"eval_samples_per_second": 8.694, |
|
"eval_steps_per_second": 1.449, |
|
"step": 1598 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 3.381642512077295e-06, |
|
"loss": 0.5118, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.5212, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 3.2850241545893724e-06, |
|
"loss": 0.3797, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 3.236714975845411e-06, |
|
"loss": 0.5177, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"learning_rate": 3.188405797101449e-06, |
|
"loss": 0.5641, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 1.267711877822876, |
|
"eval_runtime": 7.3036, |
|
"eval_samples_per_second": 8.215, |
|
"eval_steps_per_second": 1.369, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 3.140096618357488e-06, |
|
"loss": 0.3153, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 3.0917874396135268e-06, |
|
"loss": 0.4103, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 3.043478260869566e-06, |
|
"loss": 0.503, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 35.02, |
|
"learning_rate": 2.995169082125604e-06, |
|
"loss": 0.5921, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 35.02, |
|
"learning_rate": 2.9468599033816426e-06, |
|
"loss": 0.4471, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 35.02, |
|
"eval_accuracy": 0.6166666666666667, |
|
"eval_loss": 1.2765668630599976, |
|
"eval_runtime": 7.0911, |
|
"eval_samples_per_second": 8.461, |
|
"eval_steps_per_second": 1.41, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 2.8985507246376816e-06, |
|
"loss": 0.4534, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 2.85024154589372e-06, |
|
"loss": 0.4072, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 2.801932367149759e-06, |
|
"loss": 0.4057, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 36.02, |
|
"learning_rate": 2.7536231884057974e-06, |
|
"loss": 0.361, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 36.02, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 1.2524678707122803, |
|
"eval_runtime": 7.3291, |
|
"eval_samples_per_second": 8.187, |
|
"eval_steps_per_second": 1.364, |
|
"step": 1739 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 2.7053140096618356e-06, |
|
"loss": 0.5294, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 2.6570048309178746e-06, |
|
"loss": 0.4, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 2.6086956521739132e-06, |
|
"loss": 0.4891, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 2.5603864734299523e-06, |
|
"loss": 0.3458, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 37.02, |
|
"learning_rate": 2.5120772946859904e-06, |
|
"loss": 0.5668, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 37.02, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 1.292838454246521, |
|
"eval_runtime": 7.1147, |
|
"eval_samples_per_second": 8.433, |
|
"eval_steps_per_second": 1.406, |
|
"step": 1786 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 2.4637681159420295e-06, |
|
"loss": 0.4136, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 2.4154589371980677e-06, |
|
"loss": 0.6028, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 2.3671497584541063e-06, |
|
"loss": 0.4004, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 2.3188405797101453e-06, |
|
"loss": 0.4186, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 38.02, |
|
"learning_rate": 2.270531400966184e-06, |
|
"loss": 0.5262, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 38.02, |
|
"eval_accuracy": 0.6333333333333333, |
|
"eval_loss": 1.2428669929504395, |
|
"eval_runtime": 7.0978, |
|
"eval_samples_per_second": 8.453, |
|
"eval_steps_per_second": 1.409, |
|
"step": 1833 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.4642, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 2.173913043478261e-06, |
|
"loss": 0.485, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 2.1256038647342997e-06, |
|
"loss": 0.3757, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"learning_rate": 2.0772946859903383e-06, |
|
"loss": 0.5197, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"learning_rate": 2.028985507246377e-06, |
|
"loss": 0.4901, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 1.1457266807556152, |
|
"eval_runtime": 7.0788, |
|
"eval_samples_per_second": 8.476, |
|
"eval_steps_per_second": 1.413, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 1.9806763285024155e-06, |
|
"loss": 0.432, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 40.01, |
|
"learning_rate": 1.932367149758454e-06, |
|
"loss": 0.3237, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 40.01, |
|
"learning_rate": 1.884057971014493e-06, |
|
"loss": 0.3166, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 40.02, |
|
"learning_rate": 1.8357487922705318e-06, |
|
"loss": 0.481, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 40.02, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 1.213700771331787, |
|
"eval_runtime": 7.0464, |
|
"eval_samples_per_second": 8.515, |
|
"eval_steps_per_second": 1.419, |
|
"step": 1927 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 1.7874396135265702e-06, |
|
"loss": 0.3313, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"learning_rate": 1.7391304347826088e-06, |
|
"loss": 0.4095, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"learning_rate": 1.6908212560386476e-06, |
|
"loss": 0.4013, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"learning_rate": 1.6425120772946862e-06, |
|
"loss": 0.453, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 41.02, |
|
"learning_rate": 1.5942028985507246e-06, |
|
"loss": 0.3566, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 41.02, |
|
"eval_accuracy": 0.6166666666666667, |
|
"eval_loss": 1.1829572916030884, |
|
"eval_runtime": 7.0773, |
|
"eval_samples_per_second": 8.478, |
|
"eval_steps_per_second": 1.413, |
|
"step": 1974 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 1.5458937198067634e-06, |
|
"loss": 0.342, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"learning_rate": 1.497584541062802e-06, |
|
"loss": 0.2666, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"learning_rate": 1.4492753623188408e-06, |
|
"loss": 0.4153, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 42.02, |
|
"learning_rate": 1.4009661835748794e-06, |
|
"loss": 0.3853, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 42.02, |
|
"learning_rate": 1.3526570048309178e-06, |
|
"loss": 0.5147, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 42.02, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 1.1628528833389282, |
|
"eval_runtime": 6.9693, |
|
"eval_samples_per_second": 8.609, |
|
"eval_steps_per_second": 1.435, |
|
"step": 2021 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 1.3043478260869566e-06, |
|
"loss": 0.5075, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"learning_rate": 1.2560386473429952e-06, |
|
"loss": 0.3599, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"learning_rate": 1.2077294685990338e-06, |
|
"loss": 0.3602, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 43.02, |
|
"learning_rate": 1.1594202898550726e-06, |
|
"loss": 0.4013, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 43.02, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 1.2734596729278564, |
|
"eval_runtime": 7.5005, |
|
"eval_samples_per_second": 8.0, |
|
"eval_steps_per_second": 1.333, |
|
"step": 2068 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.1829, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 1.0628019323671499e-06, |
|
"loss": 0.3784, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 1.0144927536231885e-06, |
|
"loss": 0.3492, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 9.66183574879227e-07, |
|
"loss": 0.3413, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 44.02, |
|
"learning_rate": 9.178743961352659e-07, |
|
"loss": 0.3895, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 44.02, |
|
"eval_accuracy": 0.6166666666666667, |
|
"eval_loss": 1.275781273841858, |
|
"eval_runtime": 7.0795, |
|
"eval_samples_per_second": 8.475, |
|
"eval_steps_per_second": 1.413, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 8.695652173913044e-07, |
|
"loss": 0.2765, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"learning_rate": 8.212560386473431e-07, |
|
"loss": 0.3307, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"learning_rate": 7.729468599033817e-07, |
|
"loss": 0.4263, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 45.02, |
|
"learning_rate": 7.246376811594204e-07, |
|
"loss": 0.2821, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 45.02, |
|
"learning_rate": 6.763285024154589e-07, |
|
"loss": 0.3727, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 45.02, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 1.251878023147583, |
|
"eval_runtime": 6.9821, |
|
"eval_samples_per_second": 8.593, |
|
"eval_steps_per_second": 1.432, |
|
"step": 2162 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 6.280193236714976e-07, |
|
"loss": 0.2848, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"learning_rate": 5.797101449275363e-07, |
|
"loss": 0.3064, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"learning_rate": 5.314009661835749e-07, |
|
"loss": 0.2802, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 46.02, |
|
"learning_rate": 4.830917874396135e-07, |
|
"loss": 0.2379, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 46.02, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 1.2318103313446045, |
|
"eval_runtime": 7.4568, |
|
"eval_samples_per_second": 8.046, |
|
"eval_steps_per_second": 1.341, |
|
"step": 2209 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 4.347826086956522e-07, |
|
"loss": 0.3444, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 3.8647342995169085e-07, |
|
"loss": 0.3964, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"learning_rate": 3.3816425120772945e-07, |
|
"loss": 0.218, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"learning_rate": 2.8985507246376816e-07, |
|
"loss": 0.4576, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 47.02, |
|
"learning_rate": 2.4154589371980677e-07, |
|
"loss": 0.2985, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 47.02, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 1.2594045400619507, |
|
"eval_runtime": 6.9353, |
|
"eval_samples_per_second": 8.651, |
|
"eval_steps_per_second": 1.442, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 1.9323671497584542e-07, |
|
"loss": 0.359, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"learning_rate": 1.4492753623188408e-07, |
|
"loss": 0.3572, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"learning_rate": 9.661835748792271e-08, |
|
"loss": 0.4208, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"learning_rate": 4.8309178743961356e-08, |
|
"loss": 0.3265, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"learning_rate": 0.0, |
|
"loss": 0.2991, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 1.2510361671447754, |
|
"eval_runtime": 6.9183, |
|
"eval_samples_per_second": 8.673, |
|
"eval_steps_per_second": 1.445, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"step": 2300, |
|
"total_flos": 1.7136338694977618e+19, |
|
"train_loss": 0.7374556654950847, |
|
"train_runtime": 3025.2001, |
|
"train_samples_per_second": 4.562, |
|
"train_steps_per_second": 0.76 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"eval_accuracy": 0.6507936507936508, |
|
"eval_loss": 1.1547213792800903, |
|
"eval_runtime": 9.0552, |
|
"eval_samples_per_second": 6.957, |
|
"eval_steps_per_second": 1.215, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"eval_accuracy": 0.6507936507936508, |
|
"eval_loss": 1.1547214984893799, |
|
"eval_runtime": 7.9277, |
|
"eval_samples_per_second": 7.947, |
|
"eval_steps_per_second": 1.388, |
|
"step": 2300 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 1.7136338694977618e+19, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|