{ "best_metric": 0.6666666666666666, "best_model_checkpoint": "videomae-base-short-ssv2-finetuned-ct_cpc/checkpoint-47", "epoch": 48.01913043478261, "eval_steps": 500, "global_step": 2300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.347826086956522e-07, "loss": 2.0243, "step": 10 }, { "epoch": 0.01, "learning_rate": 8.695652173913044e-07, "loss": 1.8063, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.3043478260869566e-06, "loss": 1.6927, "step": 30 }, { "epoch": 0.02, "learning_rate": 1.7391304347826088e-06, "loss": 1.4832, "step": 40 }, { "epoch": 0.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.1202621459960938, "eval_runtime": 7.2119, "eval_samples_per_second": 8.32, "eval_steps_per_second": 1.387, "step": 47 }, { "epoch": 1.0, "learning_rate": 2.173913043478261e-06, "loss": 1.3436, "step": 50 }, { "epoch": 1.01, "learning_rate": 2.6086956521739132e-06, "loss": 1.2714, "step": 60 }, { "epoch": 1.01, "learning_rate": 3.043478260869566e-06, "loss": 1.1191, "step": 70 }, { "epoch": 1.01, "learning_rate": 3.4782608695652175e-06, "loss": 0.9807, "step": 80 }, { "epoch": 1.02, "learning_rate": 3.91304347826087e-06, "loss": 1.32, "step": 90 }, { "epoch": 1.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.050528645515442, "eval_runtime": 6.9232, "eval_samples_per_second": 8.667, "eval_steps_per_second": 1.444, "step": 94 }, { "epoch": 2.0, "learning_rate": 4.347826086956522e-06, "loss": 1.0838, "step": 100 }, { "epoch": 2.01, "learning_rate": 4.782608695652174e-06, "loss": 1.1624, "step": 110 }, { "epoch": 2.01, "learning_rate": 5.2173913043478265e-06, "loss": 0.9414, "step": 120 }, { "epoch": 2.02, "learning_rate": 5.652173913043479e-06, "loss": 1.2583, "step": 130 }, { "epoch": 2.02, "learning_rate": 6.086956521739132e-06, "loss": 1.1539, "step": 140 }, { "epoch": 2.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.0524587631225586, "eval_runtime": 7.8366, "eval_samples_per_second": 7.656, "eval_steps_per_second": 1.276, "step": 141 }, { "epoch": 3.0, "learning_rate": 6.521739130434783e-06, "loss": 1.0695, "step": 150 }, { "epoch": 3.01, "learning_rate": 6.956521739130435e-06, "loss": 0.9282, "step": 160 }, { "epoch": 3.01, "learning_rate": 7.391304347826087e-06, "loss": 0.9621, "step": 170 }, { "epoch": 3.02, "learning_rate": 7.82608695652174e-06, "loss": 1.2399, "step": 180 }, { "epoch": 3.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.0353821516036987, "eval_runtime": 7.1279, "eval_samples_per_second": 8.418, "eval_steps_per_second": 1.403, "step": 188 }, { "epoch": 4.0, "learning_rate": 8.260869565217392e-06, "loss": 1.1451, "step": 190 }, { "epoch": 4.01, "learning_rate": 8.695652173913044e-06, "loss": 1.1118, "step": 200 }, { "epoch": 4.01, "learning_rate": 9.130434782608697e-06, "loss": 1.1014, "step": 210 }, { "epoch": 4.01, "learning_rate": 9.565217391304349e-06, "loss": 1.1536, "step": 220 }, { "epoch": 4.02, "learning_rate": 1e-05, "loss": 0.9928, "step": 230 }, { "epoch": 4.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.0516277551651, "eval_runtime": 6.8144, "eval_samples_per_second": 8.805, "eval_steps_per_second": 1.467, "step": 235 }, { "epoch": 5.0, "learning_rate": 9.95169082125604e-06, "loss": 1.1887, "step": 240 }, { "epoch": 5.01, "learning_rate": 9.903381642512077e-06, "loss": 1.176, "step": 250 }, { "epoch": 5.01, "learning_rate": 9.855072463768118e-06, "loss": 1.0889, "step": 260 }, { "epoch": 5.02, "learning_rate": 9.806763285024155e-06, "loss": 0.8653, "step": 270 }, { "epoch": 5.02, "learning_rate": 9.758454106280194e-06, "loss": 1.0054, "step": 280 }, { "epoch": 5.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.0296636819839478, "eval_runtime": 6.8135, "eval_samples_per_second": 8.806, "eval_steps_per_second": 1.468, "step": 282 }, { "epoch": 6.0, "learning_rate": 9.710144927536233e-06, "loss": 1.1702, "step": 290 }, { "epoch": 6.01, "learning_rate": 9.66183574879227e-06, "loss": 1.0608, "step": 300 }, { "epoch": 6.01, "learning_rate": 9.61352657004831e-06, "loss": 1.0026, "step": 310 }, { "epoch": 6.02, "learning_rate": 9.565217391304349e-06, "loss": 1.1166, "step": 320 }, { "epoch": 6.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.024717092514038, "eval_runtime": 6.7961, "eval_samples_per_second": 8.829, "eval_steps_per_second": 1.471, "step": 329 }, { "epoch": 7.0, "learning_rate": 9.516908212560388e-06, "loss": 1.0851, "step": 330 }, { "epoch": 7.0, "learning_rate": 9.468599033816425e-06, "loss": 0.9748, "step": 340 }, { "epoch": 7.01, "learning_rate": 9.420289855072464e-06, "loss": 1.0743, "step": 350 }, { "epoch": 7.01, "learning_rate": 9.371980676328503e-06, "loss": 0.9153, "step": 360 }, { "epoch": 7.02, "learning_rate": 9.323671497584542e-06, "loss": 1.2715, "step": 370 }, { "epoch": 7.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.0097945928573608, "eval_runtime": 6.7899, "eval_samples_per_second": 8.837, "eval_steps_per_second": 1.473, "step": 376 }, { "epoch": 8.0, "learning_rate": 9.275362318840581e-06, "loss": 0.9517, "step": 380 }, { "epoch": 8.01, "learning_rate": 9.227053140096618e-06, "loss": 0.8985, "step": 390 }, { "epoch": 8.01, "learning_rate": 9.178743961352658e-06, "loss": 1.1341, "step": 400 }, { "epoch": 8.01, "learning_rate": 9.130434782608697e-06, "loss": 1.0505, "step": 410 }, { "epoch": 8.02, "learning_rate": 9.082125603864736e-06, "loss": 1.0317, "step": 420 }, { "epoch": 8.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.040429949760437, "eval_runtime": 6.9971, "eval_samples_per_second": 8.575, "eval_steps_per_second": 1.429, "step": 423 }, { "epoch": 9.0, "learning_rate": 9.033816425120775e-06, "loss": 1.1947, "step": 430 }, { "epoch": 9.01, "learning_rate": 8.985507246376812e-06, "loss": 0.9361, "step": 440 }, { "epoch": 9.01, "learning_rate": 8.937198067632851e-06, "loss": 0.9432, "step": 450 }, { "epoch": 9.02, "learning_rate": 8.888888888888888e-06, "loss": 1.0958, "step": 460 }, { "epoch": 9.02, "learning_rate": 8.840579710144929e-06, "loss": 1.1258, "step": 470 }, { "epoch": 9.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.0053784847259521, "eval_runtime": 7.1441, "eval_samples_per_second": 8.399, "eval_steps_per_second": 1.4, "step": 470 }, { "epoch": 10.0, "learning_rate": 8.792270531400966e-06, "loss": 1.0206, "step": 480 }, { "epoch": 10.01, "learning_rate": 8.743961352657005e-06, "loss": 1.0568, "step": 490 }, { "epoch": 10.01, "learning_rate": 8.695652173913044e-06, "loss": 1.1054, "step": 500 }, { "epoch": 10.02, "learning_rate": 8.647342995169082e-06, "loss": 0.8991, "step": 510 }, { "epoch": 10.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.0477041006088257, "eval_runtime": 7.2799, "eval_samples_per_second": 8.242, "eval_steps_per_second": 1.374, "step": 517 }, { "epoch": 11.0, "learning_rate": 8.599033816425122e-06, "loss": 1.167, "step": 520 }, { "epoch": 11.01, "learning_rate": 8.55072463768116e-06, "loss": 0.9754, "step": 530 }, { "epoch": 11.01, "learning_rate": 8.502415458937199e-06, "loss": 1.0365, "step": 540 }, { "epoch": 11.01, "learning_rate": 8.454106280193238e-06, "loss": 0.9409, "step": 550 }, { "epoch": 11.02, "learning_rate": 8.405797101449275e-06, "loss": 0.8957, "step": 560 }, { "epoch": 11.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.0607702732086182, "eval_runtime": 6.9041, "eval_samples_per_second": 8.69, "eval_steps_per_second": 1.448, "step": 564 }, { "epoch": 12.0, "learning_rate": 8.357487922705316e-06, "loss": 0.8378, "step": 570 }, { "epoch": 12.01, "learning_rate": 8.309178743961353e-06, "loss": 0.8314, "step": 580 }, { "epoch": 12.01, "learning_rate": 8.260869565217392e-06, "loss": 1.092, "step": 590 }, { "epoch": 12.02, "learning_rate": 8.212560386473431e-06, "loss": 1.0079, "step": 600 }, { "epoch": 12.02, "learning_rate": 8.164251207729469e-06, "loss": 1.078, "step": 610 }, { "epoch": 12.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.0165636539459229, "eval_runtime": 7.648, "eval_samples_per_second": 7.845, "eval_steps_per_second": 1.308, "step": 611 }, { "epoch": 13.0, "learning_rate": 8.115942028985508e-06, "loss": 0.8161, "step": 620 }, { "epoch": 13.01, "learning_rate": 8.067632850241547e-06, "loss": 1.2458, "step": 630 }, { "epoch": 13.01, "learning_rate": 8.019323671497586e-06, "loss": 0.9597, "step": 640 }, { "epoch": 13.02, "learning_rate": 7.971014492753623e-06, "loss": 0.9067, "step": 650 }, { "epoch": 13.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.2110427618026733, "eval_runtime": 7.2284, "eval_samples_per_second": 8.301, "eval_steps_per_second": 1.383, "step": 658 }, { "epoch": 14.0, "learning_rate": 7.922705314009662e-06, "loss": 0.9322, "step": 660 }, { "epoch": 14.01, "learning_rate": 7.874396135265701e-06, "loss": 0.993, "step": 670 }, { "epoch": 14.01, "learning_rate": 7.82608695652174e-06, "loss": 0.7774, "step": 680 }, { "epoch": 14.01, "learning_rate": 7.77777777777778e-06, "loss": 0.9399, "step": 690 }, { "epoch": 14.02, "learning_rate": 7.729468599033817e-06, "loss": 1.0383, "step": 700 }, { "epoch": 14.02, "eval_accuracy": 0.6, "eval_loss": 1.0380301475524902, "eval_runtime": 6.9013, "eval_samples_per_second": 8.694, "eval_steps_per_second": 1.449, "step": 705 }, { "epoch": 15.0, "learning_rate": 7.681159420289856e-06, "loss": 0.7043, "step": 710 }, { "epoch": 15.01, "learning_rate": 7.632850241545895e-06, "loss": 1.0185, "step": 720 }, { "epoch": 15.01, "learning_rate": 7.584541062801934e-06, "loss": 0.9907, "step": 730 }, { "epoch": 15.02, "learning_rate": 7.536231884057972e-06, "loss": 0.8602, "step": 740 }, { "epoch": 15.02, "learning_rate": 7.48792270531401e-06, "loss": 1.2244, "step": 750 }, { "epoch": 15.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.1040699481964111, "eval_runtime": 7.4605, "eval_samples_per_second": 8.042, "eval_steps_per_second": 1.34, "step": 752 }, { "epoch": 16.0, "learning_rate": 7.439613526570049e-06, "loss": 0.9108, "step": 760 }, { "epoch": 16.01, "learning_rate": 7.391304347826087e-06, "loss": 0.8811, "step": 770 }, { "epoch": 16.01, "learning_rate": 7.342995169082127e-06, "loss": 0.8487, "step": 780 }, { "epoch": 16.02, "learning_rate": 7.294685990338165e-06, "loss": 1.0364, "step": 790 }, { "epoch": 16.02, "eval_accuracy": 0.6, "eval_loss": 1.0123381614685059, "eval_runtime": 6.8033, "eval_samples_per_second": 8.819, "eval_steps_per_second": 1.47, "step": 799 }, { "epoch": 17.0, "learning_rate": 7.246376811594203e-06, "loss": 0.8189, "step": 800 }, { "epoch": 17.0, "learning_rate": 7.1980676328502416e-06, "loss": 0.8733, "step": 810 }, { "epoch": 17.01, "learning_rate": 7.149758454106281e-06, "loss": 0.6958, "step": 820 }, { "epoch": 17.01, "learning_rate": 7.10144927536232e-06, "loss": 0.782, "step": 830 }, { "epoch": 17.02, "learning_rate": 7.053140096618359e-06, "loss": 0.9662, "step": 840 }, { "epoch": 17.02, "eval_accuracy": 0.6333333333333333, "eval_loss": 1.1802695989608765, "eval_runtime": 7.2583, "eval_samples_per_second": 8.266, "eval_steps_per_second": 1.378, "step": 846 }, { "epoch": 18.0, "learning_rate": 7.004830917874397e-06, "loss": 0.9205, "step": 850 }, { "epoch": 18.01, "learning_rate": 6.956521739130435e-06, "loss": 0.8999, "step": 860 }, { "epoch": 18.01, "learning_rate": 6.908212560386473e-06, "loss": 0.6096, "step": 870 }, { "epoch": 18.01, "learning_rate": 6.859903381642513e-06, "loss": 0.79, "step": 880 }, { "epoch": 18.02, "learning_rate": 6.811594202898551e-06, "loss": 0.9299, "step": 890 }, { "epoch": 18.02, "eval_accuracy": 0.6333333333333333, "eval_loss": 1.0910345315933228, "eval_runtime": 6.9398, "eval_samples_per_second": 8.646, "eval_steps_per_second": 1.441, "step": 893 }, { "epoch": 19.0, "learning_rate": 6.76328502415459e-06, "loss": 0.8715, "step": 900 }, { "epoch": 19.01, "learning_rate": 6.7149758454106285e-06, "loss": 0.8882, "step": 910 }, { "epoch": 19.01, "learning_rate": 6.666666666666667e-06, "loss": 0.8562, "step": 920 }, { "epoch": 19.02, "learning_rate": 6.6183574879227065e-06, "loss": 0.9704, "step": 930 }, { "epoch": 19.02, "learning_rate": 6.570048309178745e-06, "loss": 0.877, "step": 940 }, { "epoch": 19.02, "eval_accuracy": 0.65, "eval_loss": 1.0182613134384155, "eval_runtime": 7.0493, "eval_samples_per_second": 8.511, "eval_steps_per_second": 1.419, "step": 940 }, { "epoch": 20.0, "learning_rate": 6.521739130434783e-06, "loss": 0.8358, "step": 950 }, { "epoch": 20.01, "learning_rate": 6.473429951690822e-06, "loss": 0.7785, "step": 960 }, { "epoch": 20.01, "learning_rate": 6.42512077294686e-06, "loss": 0.7644, "step": 970 }, { "epoch": 20.02, "learning_rate": 6.376811594202898e-06, "loss": 0.8649, "step": 980 }, { "epoch": 20.02, "eval_accuracy": 0.6166666666666667, "eval_loss": 1.1414852142333984, "eval_runtime": 7.0701, "eval_samples_per_second": 8.486, "eval_steps_per_second": 1.414, "step": 987 }, { "epoch": 21.0, "learning_rate": 6.328502415458938e-06, "loss": 0.8795, "step": 990 }, { "epoch": 21.01, "learning_rate": 6.280193236714976e-06, "loss": 0.7464, "step": 1000 }, { "epoch": 21.01, "learning_rate": 6.2318840579710145e-06, "loss": 0.7973, "step": 1010 }, { "epoch": 21.01, "learning_rate": 6.1835748792270535e-06, "loss": 0.9681, "step": 1020 }, { "epoch": 21.02, "learning_rate": 6.135265700483092e-06, "loss": 0.8841, "step": 1030 }, { "epoch": 21.02, "eval_accuracy": 0.6333333333333333, "eval_loss": 1.0975487232208252, "eval_runtime": 7.726, "eval_samples_per_second": 7.766, "eval_steps_per_second": 1.294, "step": 1034 }, { "epoch": 22.0, "learning_rate": 6.086956521739132e-06, "loss": 0.6596, "step": 1040 }, { "epoch": 22.01, "learning_rate": 6.03864734299517e-06, "loss": 0.7252, "step": 1050 }, { "epoch": 22.01, "learning_rate": 5.990338164251208e-06, "loss": 0.7564, "step": 1060 }, { "epoch": 22.02, "learning_rate": 5.942028985507247e-06, "loss": 0.7406, "step": 1070 }, { "epoch": 22.02, "learning_rate": 5.893719806763285e-06, "loss": 0.6276, "step": 1080 }, { "epoch": 22.02, "eval_accuracy": 0.6, "eval_loss": 1.072813868522644, "eval_runtime": 7.252, "eval_samples_per_second": 8.274, "eval_steps_per_second": 1.379, "step": 1081 }, { "epoch": 23.0, "learning_rate": 5.845410628019324e-06, "loss": 0.7456, "step": 1090 }, { "epoch": 23.01, "learning_rate": 5.797101449275363e-06, "loss": 0.5497, "step": 1100 }, { "epoch": 23.01, "learning_rate": 5.748792270531401e-06, "loss": 0.875, "step": 1110 }, { "epoch": 23.02, "learning_rate": 5.70048309178744e-06, "loss": 0.7974, "step": 1120 }, { "epoch": 23.02, "eval_accuracy": 0.5833333333333334, "eval_loss": 1.1675584316253662, "eval_runtime": 6.8242, "eval_samples_per_second": 8.792, "eval_steps_per_second": 1.465, "step": 1128 }, { "epoch": 24.0, "learning_rate": 5.652173913043479e-06, "loss": 0.8186, "step": 1130 }, { "epoch": 24.01, "learning_rate": 5.603864734299518e-06, "loss": 0.7628, "step": 1140 }, { "epoch": 24.01, "learning_rate": 5.555555555555557e-06, "loss": 0.4592, "step": 1150 }, { "epoch": 24.01, "learning_rate": 5.507246376811595e-06, "loss": 0.6761, "step": 1160 }, { "epoch": 24.02, "learning_rate": 5.458937198067633e-06, "loss": 0.7381, "step": 1170 }, { "epoch": 24.02, "eval_accuracy": 0.55, "eval_loss": 1.1623855829238892, "eval_runtime": 6.815, "eval_samples_per_second": 8.804, "eval_steps_per_second": 1.467, "step": 1175 }, { "epoch": 25.0, "learning_rate": 5.410628019323671e-06, "loss": 0.8723, "step": 1180 }, { "epoch": 25.01, "learning_rate": 5.362318840579711e-06, "loss": 0.6832, "step": 1190 }, { "epoch": 25.01, "learning_rate": 5.314009661835749e-06, "loss": 0.7768, "step": 1200 }, { "epoch": 25.02, "learning_rate": 5.265700483091788e-06, "loss": 0.6568, "step": 1210 }, { "epoch": 25.02, "learning_rate": 5.2173913043478265e-06, "loss": 0.4807, "step": 1220 }, { "epoch": 25.02, "eval_accuracy": 0.55, "eval_loss": 1.1929564476013184, "eval_runtime": 6.8394, "eval_samples_per_second": 8.773, "eval_steps_per_second": 1.462, "step": 1222 }, { "epoch": 26.0, "learning_rate": 5.169082125603865e-06, "loss": 0.7603, "step": 1230 }, { "epoch": 26.01, "learning_rate": 5.1207729468599045e-06, "loss": 0.647, "step": 1240 }, { "epoch": 26.01, "learning_rate": 5.072463768115943e-06, "loss": 0.67, "step": 1250 }, { "epoch": 26.02, "learning_rate": 5.024154589371981e-06, "loss": 0.6812, "step": 1260 }, { "epoch": 26.02, "eval_accuracy": 0.6, "eval_loss": 1.1511147022247314, "eval_runtime": 7.0933, "eval_samples_per_second": 8.459, "eval_steps_per_second": 1.41, "step": 1269 }, { "epoch": 27.0, "learning_rate": 4.97584541062802e-06, "loss": 0.6945, "step": 1270 }, { "epoch": 27.0, "learning_rate": 4.927536231884059e-06, "loss": 0.6031, "step": 1280 }, { "epoch": 27.01, "learning_rate": 4.879227053140097e-06, "loss": 0.659, "step": 1290 }, { "epoch": 27.01, "learning_rate": 4.830917874396135e-06, "loss": 0.659, "step": 1300 }, { "epoch": 27.02, "learning_rate": 4.782608695652174e-06, "loss": 0.5802, "step": 1310 }, { "epoch": 27.02, "eval_accuracy": 0.6166666666666667, "eval_loss": 1.2314832210540771, "eval_runtime": 7.5214, "eval_samples_per_second": 7.977, "eval_steps_per_second": 1.33, "step": 1316 }, { "epoch": 28.0, "learning_rate": 4.7342995169082125e-06, "loss": 0.5988, "step": 1320 }, { "epoch": 28.01, "learning_rate": 4.6859903381642516e-06, "loss": 0.6461, "step": 1330 }, { "epoch": 28.01, "learning_rate": 4.637681159420291e-06, "loss": 0.729, "step": 1340 }, { "epoch": 28.01, "learning_rate": 4.589371980676329e-06, "loss": 0.5582, "step": 1350 }, { "epoch": 28.02, "learning_rate": 4.541062801932368e-06, "loss": 0.5943, "step": 1360 }, { "epoch": 28.02, "eval_accuracy": 0.5666666666666667, "eval_loss": 1.1640959978103638, "eval_runtime": 7.4811, "eval_samples_per_second": 8.02, "eval_steps_per_second": 1.337, "step": 1363 }, { "epoch": 29.0, "learning_rate": 4.492753623188406e-06, "loss": 0.4759, "step": 1370 }, { "epoch": 29.01, "learning_rate": 4.444444444444444e-06, "loss": 0.7748, "step": 1380 }, { "epoch": 29.01, "learning_rate": 4.396135265700483e-06, "loss": 0.5033, "step": 1390 }, { "epoch": 29.02, "learning_rate": 4.347826086956522e-06, "loss": 0.8771, "step": 1400 }, { "epoch": 29.02, "learning_rate": 4.299516908212561e-06, "loss": 0.5416, "step": 1410 }, { "epoch": 29.02, "eval_accuracy": 0.6, "eval_loss": 1.0962903499603271, "eval_runtime": 7.0002, "eval_samples_per_second": 8.571, "eval_steps_per_second": 1.429, "step": 1410 }, { "epoch": 30.0, "learning_rate": 4.251207729468599e-06, "loss": 0.6134, "step": 1420 }, { "epoch": 30.01, "learning_rate": 4.202898550724638e-06, "loss": 0.5366, "step": 1430 }, { "epoch": 30.01, "learning_rate": 4.154589371980677e-06, "loss": 0.5575, "step": 1440 }, { "epoch": 30.02, "learning_rate": 4.106280193236716e-06, "loss": 0.6676, "step": 1450 }, { "epoch": 30.02, "eval_accuracy": 0.5666666666666667, "eval_loss": 1.1130536794662476, "eval_runtime": 7.3167, "eval_samples_per_second": 8.2, "eval_steps_per_second": 1.367, "step": 1457 }, { "epoch": 31.0, "learning_rate": 4.057971014492754e-06, "loss": 0.5169, "step": 1460 }, { "epoch": 31.01, "learning_rate": 4.009661835748793e-06, "loss": 0.5593, "step": 1470 }, { "epoch": 31.01, "learning_rate": 3.961352657004831e-06, "loss": 0.7655, "step": 1480 }, { "epoch": 31.01, "learning_rate": 3.91304347826087e-06, "loss": 0.6803, "step": 1490 }, { "epoch": 31.02, "learning_rate": 3.864734299516908e-06, "loss": 0.4085, "step": 1500 }, { "epoch": 31.02, "eval_accuracy": 0.5833333333333334, "eval_loss": 1.2947934865951538, "eval_runtime": 6.9055, "eval_samples_per_second": 8.689, "eval_steps_per_second": 1.448, "step": 1504 }, { "epoch": 32.0, "learning_rate": 3.816425120772947e-06, "loss": 0.7962, "step": 1510 }, { "epoch": 32.01, "learning_rate": 3.768115942028986e-06, "loss": 0.742, "step": 1520 }, { "epoch": 32.01, "learning_rate": 3.7198067632850245e-06, "loss": 0.4275, "step": 1530 }, { "epoch": 32.02, "learning_rate": 3.6714975845410635e-06, "loss": 0.6044, "step": 1540 }, { "epoch": 32.02, "learning_rate": 3.6231884057971017e-06, "loss": 0.4548, "step": 1550 }, { "epoch": 32.02, "eval_accuracy": 0.6333333333333333, "eval_loss": 1.2788442373275757, "eval_runtime": 6.7575, "eval_samples_per_second": 8.879, "eval_steps_per_second": 1.48, "step": 1551 }, { "epoch": 33.0, "learning_rate": 3.5748792270531403e-06, "loss": 0.7688, "step": 1560 }, { "epoch": 33.01, "learning_rate": 3.5265700483091793e-06, "loss": 0.489, "step": 1570 }, { "epoch": 33.01, "learning_rate": 3.4782608695652175e-06, "loss": 0.545, "step": 1580 }, { "epoch": 33.02, "learning_rate": 3.4299516908212565e-06, "loss": 0.4351, "step": 1590 }, { "epoch": 33.02, "eval_accuracy": 0.5666666666666667, "eval_loss": 1.1921883821487427, "eval_runtime": 6.9013, "eval_samples_per_second": 8.694, "eval_steps_per_second": 1.449, "step": 1598 }, { "epoch": 34.0, "learning_rate": 3.381642512077295e-06, "loss": 0.5118, "step": 1600 }, { "epoch": 34.01, "learning_rate": 3.3333333333333333e-06, "loss": 0.5212, "step": 1610 }, { "epoch": 34.01, "learning_rate": 3.2850241545893724e-06, "loss": 0.3797, "step": 1620 }, { "epoch": 34.01, "learning_rate": 3.236714975845411e-06, "loss": 0.5177, "step": 1630 }, { "epoch": 34.02, "learning_rate": 3.188405797101449e-06, "loss": 0.5641, "step": 1640 }, { "epoch": 34.02, "eval_accuracy": 0.55, "eval_loss": 1.267711877822876, "eval_runtime": 7.3036, "eval_samples_per_second": 8.215, "eval_steps_per_second": 1.369, "step": 1645 }, { "epoch": 35.0, "learning_rate": 3.140096618357488e-06, "loss": 0.3153, "step": 1650 }, { "epoch": 35.01, "learning_rate": 3.0917874396135268e-06, "loss": 0.4103, "step": 1660 }, { "epoch": 35.01, "learning_rate": 3.043478260869566e-06, "loss": 0.503, "step": 1670 }, { "epoch": 35.02, "learning_rate": 2.995169082125604e-06, "loss": 0.5921, "step": 1680 }, { "epoch": 35.02, "learning_rate": 2.9468599033816426e-06, "loss": 0.4471, "step": 1690 }, { "epoch": 35.02, "eval_accuracy": 0.6166666666666667, "eval_loss": 1.2765668630599976, "eval_runtime": 7.0911, "eval_samples_per_second": 8.461, "eval_steps_per_second": 1.41, "step": 1692 }, { "epoch": 36.0, "learning_rate": 2.8985507246376816e-06, "loss": 0.4534, "step": 1700 }, { "epoch": 36.01, "learning_rate": 2.85024154589372e-06, "loss": 0.4072, "step": 1710 }, { "epoch": 36.01, "learning_rate": 2.801932367149759e-06, "loss": 0.4057, "step": 1720 }, { "epoch": 36.02, "learning_rate": 2.7536231884057974e-06, "loss": 0.361, "step": 1730 }, { "epoch": 36.02, "eval_accuracy": 0.55, "eval_loss": 1.2524678707122803, "eval_runtime": 7.3291, "eval_samples_per_second": 8.187, "eval_steps_per_second": 1.364, "step": 1739 }, { "epoch": 37.0, "learning_rate": 2.7053140096618356e-06, "loss": 0.5294, "step": 1740 }, { "epoch": 37.0, "learning_rate": 2.6570048309178746e-06, "loss": 0.4, "step": 1750 }, { "epoch": 37.01, "learning_rate": 2.6086956521739132e-06, "loss": 0.4891, "step": 1760 }, { "epoch": 37.01, "learning_rate": 2.5603864734299523e-06, "loss": 0.3458, "step": 1770 }, { "epoch": 37.02, "learning_rate": 2.5120772946859904e-06, "loss": 0.5668, "step": 1780 }, { "epoch": 37.02, "eval_accuracy": 0.55, "eval_loss": 1.292838454246521, "eval_runtime": 7.1147, "eval_samples_per_second": 8.433, "eval_steps_per_second": 1.406, "step": 1786 }, { "epoch": 38.0, "learning_rate": 2.4637681159420295e-06, "loss": 0.4136, "step": 1790 }, { "epoch": 38.01, "learning_rate": 2.4154589371980677e-06, "loss": 0.6028, "step": 1800 }, { "epoch": 38.01, "learning_rate": 2.3671497584541063e-06, "loss": 0.4004, "step": 1810 }, { "epoch": 38.01, "learning_rate": 2.3188405797101453e-06, "loss": 0.4186, "step": 1820 }, { "epoch": 38.02, "learning_rate": 2.270531400966184e-06, "loss": 0.5262, "step": 1830 }, { "epoch": 38.02, "eval_accuracy": 0.6333333333333333, "eval_loss": 1.2428669929504395, "eval_runtime": 7.0978, "eval_samples_per_second": 8.453, "eval_steps_per_second": 1.409, "step": 1833 }, { "epoch": 39.0, "learning_rate": 2.222222222222222e-06, "loss": 0.4642, "step": 1840 }, { "epoch": 39.01, "learning_rate": 2.173913043478261e-06, "loss": 0.485, "step": 1850 }, { "epoch": 39.01, "learning_rate": 2.1256038647342997e-06, "loss": 0.3757, "step": 1860 }, { "epoch": 39.02, "learning_rate": 2.0772946859903383e-06, "loss": 0.5197, "step": 1870 }, { "epoch": 39.02, "learning_rate": 2.028985507246377e-06, "loss": 0.4901, "step": 1880 }, { "epoch": 39.02, "eval_accuracy": 0.5666666666666667, "eval_loss": 1.1457266807556152, "eval_runtime": 7.0788, "eval_samples_per_second": 8.476, "eval_steps_per_second": 1.413, "step": 1880 }, { "epoch": 40.0, "learning_rate": 1.9806763285024155e-06, "loss": 0.432, "step": 1890 }, { "epoch": 40.01, "learning_rate": 1.932367149758454e-06, "loss": 0.3237, "step": 1900 }, { "epoch": 40.01, "learning_rate": 1.884057971014493e-06, "loss": 0.3166, "step": 1910 }, { "epoch": 40.02, "learning_rate": 1.8357487922705318e-06, "loss": 0.481, "step": 1920 }, { "epoch": 40.02, "eval_accuracy": 0.5833333333333334, "eval_loss": 1.213700771331787, "eval_runtime": 7.0464, "eval_samples_per_second": 8.515, "eval_steps_per_second": 1.419, "step": 1927 }, { "epoch": 41.0, "learning_rate": 1.7874396135265702e-06, "loss": 0.3313, "step": 1930 }, { "epoch": 41.01, "learning_rate": 1.7391304347826088e-06, "loss": 0.4095, "step": 1940 }, { "epoch": 41.01, "learning_rate": 1.6908212560386476e-06, "loss": 0.4013, "step": 1950 }, { "epoch": 41.01, "learning_rate": 1.6425120772946862e-06, "loss": 0.453, "step": 1960 }, { "epoch": 41.02, "learning_rate": 1.5942028985507246e-06, "loss": 0.3566, "step": 1970 }, { "epoch": 41.02, "eval_accuracy": 0.6166666666666667, "eval_loss": 1.1829572916030884, "eval_runtime": 7.0773, "eval_samples_per_second": 8.478, "eval_steps_per_second": 1.413, "step": 1974 }, { "epoch": 42.0, "learning_rate": 1.5458937198067634e-06, "loss": 0.342, "step": 1980 }, { "epoch": 42.01, "learning_rate": 1.497584541062802e-06, "loss": 0.2666, "step": 1990 }, { "epoch": 42.01, "learning_rate": 1.4492753623188408e-06, "loss": 0.4153, "step": 2000 }, { "epoch": 42.02, "learning_rate": 1.4009661835748794e-06, "loss": 0.3853, "step": 2010 }, { "epoch": 42.02, "learning_rate": 1.3526570048309178e-06, "loss": 0.5147, "step": 2020 }, { "epoch": 42.02, "eval_accuracy": 0.6, "eval_loss": 1.1628528833389282, "eval_runtime": 6.9693, "eval_samples_per_second": 8.609, "eval_steps_per_second": 1.435, "step": 2021 }, { "epoch": 43.0, "learning_rate": 1.3043478260869566e-06, "loss": 0.5075, "step": 2030 }, { "epoch": 43.01, "learning_rate": 1.2560386473429952e-06, "loss": 0.3599, "step": 2040 }, { "epoch": 43.01, "learning_rate": 1.2077294685990338e-06, "loss": 0.3602, "step": 2050 }, { "epoch": 43.02, "learning_rate": 1.1594202898550726e-06, "loss": 0.4013, "step": 2060 }, { "epoch": 43.02, "eval_accuracy": 0.6, "eval_loss": 1.2734596729278564, "eval_runtime": 7.5005, "eval_samples_per_second": 8.0, "eval_steps_per_second": 1.333, "step": 2068 }, { "epoch": 44.0, "learning_rate": 1.111111111111111e-06, "loss": 0.1829, "step": 2070 }, { "epoch": 44.01, "learning_rate": 1.0628019323671499e-06, "loss": 0.3784, "step": 2080 }, { "epoch": 44.01, "learning_rate": 1.0144927536231885e-06, "loss": 0.3492, "step": 2090 }, { "epoch": 44.01, "learning_rate": 9.66183574879227e-07, "loss": 0.3413, "step": 2100 }, { "epoch": 44.02, "learning_rate": 9.178743961352659e-07, "loss": 0.3895, "step": 2110 }, { "epoch": 44.02, "eval_accuracy": 0.6166666666666667, "eval_loss": 1.275781273841858, "eval_runtime": 7.0795, "eval_samples_per_second": 8.475, "eval_steps_per_second": 1.413, "step": 2115 }, { "epoch": 45.0, "learning_rate": 8.695652173913044e-07, "loss": 0.2765, "step": 2120 }, { "epoch": 45.01, "learning_rate": 8.212560386473431e-07, "loss": 0.3307, "step": 2130 }, { "epoch": 45.01, "learning_rate": 7.729468599033817e-07, "loss": 0.4263, "step": 2140 }, { "epoch": 45.02, "learning_rate": 7.246376811594204e-07, "loss": 0.2821, "step": 2150 }, { "epoch": 45.02, "learning_rate": 6.763285024154589e-07, "loss": 0.3727, "step": 2160 }, { "epoch": 45.02, "eval_accuracy": 0.5833333333333334, "eval_loss": 1.251878023147583, "eval_runtime": 6.9821, "eval_samples_per_second": 8.593, "eval_steps_per_second": 1.432, "step": 2162 }, { "epoch": 46.0, "learning_rate": 6.280193236714976e-07, "loss": 0.2848, "step": 2170 }, { "epoch": 46.01, "learning_rate": 5.797101449275363e-07, "loss": 0.3064, "step": 2180 }, { "epoch": 46.01, "learning_rate": 5.314009661835749e-07, "loss": 0.2802, "step": 2190 }, { "epoch": 46.02, "learning_rate": 4.830917874396135e-07, "loss": 0.2379, "step": 2200 }, { "epoch": 46.02, "eval_accuracy": 0.5833333333333334, "eval_loss": 1.2318103313446045, "eval_runtime": 7.4568, "eval_samples_per_second": 8.046, "eval_steps_per_second": 1.341, "step": 2209 }, { "epoch": 47.0, "learning_rate": 4.347826086956522e-07, "loss": 0.3444, "step": 2210 }, { "epoch": 47.0, "learning_rate": 3.8647342995169085e-07, "loss": 0.3964, "step": 2220 }, { "epoch": 47.01, "learning_rate": 3.3816425120772945e-07, "loss": 0.218, "step": 2230 }, { "epoch": 47.01, "learning_rate": 2.8985507246376816e-07, "loss": 0.4576, "step": 2240 }, { "epoch": 47.02, "learning_rate": 2.4154589371980677e-07, "loss": 0.2985, "step": 2250 }, { "epoch": 47.02, "eval_accuracy": 0.5833333333333334, "eval_loss": 1.2594045400619507, "eval_runtime": 6.9353, "eval_samples_per_second": 8.651, "eval_steps_per_second": 1.442, "step": 2256 }, { "epoch": 48.0, "learning_rate": 1.9323671497584542e-07, "loss": 0.359, "step": 2260 }, { "epoch": 48.01, "learning_rate": 1.4492753623188408e-07, "loss": 0.3572, "step": 2270 }, { "epoch": 48.01, "learning_rate": 9.661835748792271e-08, "loss": 0.4208, "step": 2280 }, { "epoch": 48.01, "learning_rate": 4.8309178743961356e-08, "loss": 0.3265, "step": 2290 }, { "epoch": 48.02, "learning_rate": 0.0, "loss": 0.2991, "step": 2300 }, { "epoch": 48.02, "eval_accuracy": 0.5833333333333334, "eval_loss": 1.2510361671447754, "eval_runtime": 6.9183, "eval_samples_per_second": 8.673, "eval_steps_per_second": 1.445, "step": 2300 }, { "epoch": 48.02, "step": 2300, "total_flos": 1.7136338694977618e+19, "train_loss": 0.7374556654950847, "train_runtime": 3025.2001, "train_samples_per_second": 4.562, "train_steps_per_second": 0.76 }, { "epoch": 48.02, "eval_accuracy": 0.6507936507936508, "eval_loss": 1.1547213792800903, "eval_runtime": 9.0552, "eval_samples_per_second": 6.957, "eval_steps_per_second": 1.215, "step": 2300 }, { "epoch": 48.02, "eval_accuracy": 0.6507936507936508, "eval_loss": 1.1547214984893799, "eval_runtime": 7.9277, "eval_samples_per_second": 7.947, "eval_steps_per_second": 1.388, "step": 2300 } ], "logging_steps": 10, "max_steps": 2300, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 1.7136338694977618e+19, "train_batch_size": 6, "trial_name": null, "trial_params": null }