|
{ |
|
"best_metric": 0.6078431372549019, |
|
"best_model_checkpoint": "videomae-base-finetuned-kinetics-finetuned-right-hand-conflab-v3/checkpoint-826", |
|
"epoch": 7.117521367521367, |
|
"eval_steps": 500, |
|
"global_step": 936, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010683760683760684, |
|
"grad_norm": 7.551811218261719, |
|
"learning_rate": 5.319148936170213e-06, |
|
"loss": 2.0365, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.021367521367521368, |
|
"grad_norm": 6.67141580581665, |
|
"learning_rate": 1.0638297872340426e-05, |
|
"loss": 2.0409, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03205128205128205, |
|
"grad_norm": 7.671868801116943, |
|
"learning_rate": 1.595744680851064e-05, |
|
"loss": 2.0454, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.042735042735042736, |
|
"grad_norm": 8.299155235290527, |
|
"learning_rate": 2.1276595744680852e-05, |
|
"loss": 1.9744, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.053418803418803416, |
|
"grad_norm": 8.462949752807617, |
|
"learning_rate": 2.6595744680851064e-05, |
|
"loss": 1.9464, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0641025641025641, |
|
"grad_norm": 10.244002342224121, |
|
"learning_rate": 3.191489361702128e-05, |
|
"loss": 1.937, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07478632478632478, |
|
"grad_norm": 5.489703178405762, |
|
"learning_rate": 3.723404255319149e-05, |
|
"loss": 1.8308, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08547008547008547, |
|
"grad_norm": 9.307482719421387, |
|
"learning_rate": 4.2553191489361704e-05, |
|
"loss": 1.9194, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09615384615384616, |
|
"grad_norm": 6.383068084716797, |
|
"learning_rate": 4.787234042553192e-05, |
|
"loss": 1.8826, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10683760683760683, |
|
"grad_norm": 7.77570915222168, |
|
"learning_rate": 4.96437054631829e-05, |
|
"loss": 1.9247, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11752136752136752, |
|
"grad_norm": 5.738762378692627, |
|
"learning_rate": 4.90498812351544e-05, |
|
"loss": 1.9096, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12606837606837606, |
|
"eval_accuracy": 0.3333333333333333, |
|
"eval_loss": 1.688755750656128, |
|
"eval_runtime": 17.8148, |
|
"eval_samples_per_second": 11.451, |
|
"eval_steps_per_second": 1.459, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.0021367521367521, |
|
"grad_norm": 6.397932052612305, |
|
"learning_rate": 4.845605700712589e-05, |
|
"loss": 1.7919, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0128205128205128, |
|
"grad_norm": 6.429988384246826, |
|
"learning_rate": 4.7862232779097386e-05, |
|
"loss": 1.7007, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0235042735042734, |
|
"grad_norm": 9.103757858276367, |
|
"learning_rate": 4.7268408551068886e-05, |
|
"loss": 1.8668, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0341880341880343, |
|
"grad_norm": 8.578359603881836, |
|
"learning_rate": 4.667458432304038e-05, |
|
"loss": 1.8187, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.044871794871795, |
|
"grad_norm": 9.773784637451172, |
|
"learning_rate": 4.6080760095011874e-05, |
|
"loss": 1.7984, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0555555555555556, |
|
"grad_norm": 8.500809669494629, |
|
"learning_rate": 4.5486935866983374e-05, |
|
"loss": 1.6211, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0662393162393162, |
|
"grad_norm": 5.273374557495117, |
|
"learning_rate": 4.4893111638954874e-05, |
|
"loss": 1.6033, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0769230769230769, |
|
"grad_norm": 9.878400802612305, |
|
"learning_rate": 4.429928741092637e-05, |
|
"loss": 1.6822, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0876068376068375, |
|
"grad_norm": 9.135309219360352, |
|
"learning_rate": 4.370546318289787e-05, |
|
"loss": 1.5063, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0982905982905984, |
|
"grad_norm": 7.596296787261963, |
|
"learning_rate": 4.311163895486936e-05, |
|
"loss": 1.4128, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.108974358974359, |
|
"grad_norm": 7.444425582885742, |
|
"learning_rate": 4.2517814726840856e-05, |
|
"loss": 1.6052, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.1196581196581197, |
|
"grad_norm": 8.690657615661621, |
|
"learning_rate": 4.1923990498812356e-05, |
|
"loss": 1.628, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.126068376068376, |
|
"eval_accuracy": 0.4117647058823529, |
|
"eval_loss": 1.6285208463668823, |
|
"eval_runtime": 14.5186, |
|
"eval_samples_per_second": 14.051, |
|
"eval_steps_per_second": 1.791, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 2.0042735042735043, |
|
"grad_norm": 6.66658878326416, |
|
"learning_rate": 4.133016627078385e-05, |
|
"loss": 1.5841, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.014957264957265, |
|
"grad_norm": 7.023474216461182, |
|
"learning_rate": 4.073634204275535e-05, |
|
"loss": 1.3114, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.0256410256410255, |
|
"grad_norm": 5.985867500305176, |
|
"learning_rate": 4.0142517814726843e-05, |
|
"loss": 1.3695, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.036324786324786, |
|
"grad_norm": 6.7717719078063965, |
|
"learning_rate": 3.954869358669834e-05, |
|
"loss": 1.4598, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.047008547008547, |
|
"grad_norm": 12.264374732971191, |
|
"learning_rate": 3.895486935866984e-05, |
|
"loss": 1.337, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.0576923076923075, |
|
"grad_norm": 10.283348083496094, |
|
"learning_rate": 3.836104513064133e-05, |
|
"loss": 1.4704, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.0683760683760686, |
|
"grad_norm": 9.656314849853516, |
|
"learning_rate": 3.7767220902612825e-05, |
|
"loss": 1.5234, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0790598290598292, |
|
"grad_norm": 7.528780937194824, |
|
"learning_rate": 3.7173396674584325e-05, |
|
"loss": 1.4436, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.08974358974359, |
|
"grad_norm": 8.336195945739746, |
|
"learning_rate": 3.657957244655582e-05, |
|
"loss": 1.2449, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.1004273504273505, |
|
"grad_norm": 10.961182594299316, |
|
"learning_rate": 3.598574821852731e-05, |
|
"loss": 1.4239, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.111111111111111, |
|
"grad_norm": 7.471282482147217, |
|
"learning_rate": 3.539192399049881e-05, |
|
"loss": 1.4062, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.121794871794872, |
|
"grad_norm": 8.72683334350586, |
|
"learning_rate": 3.479809976247031e-05, |
|
"loss": 1.3656, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.126068376068376, |
|
"eval_accuracy": 0.5147058823529411, |
|
"eval_loss": 1.3947311639785767, |
|
"eval_runtime": 14.5916, |
|
"eval_samples_per_second": 13.981, |
|
"eval_steps_per_second": 1.782, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 3.0064102564102564, |
|
"grad_norm": 10.454861640930176, |
|
"learning_rate": 3.4204275534441806e-05, |
|
"loss": 1.1968, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.017094017094017, |
|
"grad_norm": 7.894044876098633, |
|
"learning_rate": 3.361045130641331e-05, |
|
"loss": 1.0705, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.0277777777777777, |
|
"grad_norm": 11.178865432739258, |
|
"learning_rate": 3.30166270783848e-05, |
|
"loss": 1.0553, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.0384615384615383, |
|
"grad_norm": 13.55367374420166, |
|
"learning_rate": 3.24228028503563e-05, |
|
"loss": 0.8611, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.049145299145299, |
|
"grad_norm": 11.7691650390625, |
|
"learning_rate": 3.1828978622327794e-05, |
|
"loss": 1.1736, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.0598290598290596, |
|
"grad_norm": 9.843459129333496, |
|
"learning_rate": 3.123515439429929e-05, |
|
"loss": 0.9042, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.0705128205128207, |
|
"grad_norm": 12.834084510803223, |
|
"learning_rate": 3.064133016627079e-05, |
|
"loss": 1.0035, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.0811965811965814, |
|
"grad_norm": 11.492711067199707, |
|
"learning_rate": 3.0047505938242282e-05, |
|
"loss": 1.0545, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.091880341880342, |
|
"grad_norm": 11.115935325622559, |
|
"learning_rate": 2.9453681710213776e-05, |
|
"loss": 1.046, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.1025641025641026, |
|
"grad_norm": 11.434919357299805, |
|
"learning_rate": 2.8859857482185276e-05, |
|
"loss": 0.9765, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.1132478632478633, |
|
"grad_norm": 13.18517017364502, |
|
"learning_rate": 2.826603325415677e-05, |
|
"loss": 1.0194, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.123931623931624, |
|
"grad_norm": 9.168006896972656, |
|
"learning_rate": 2.7672209026128266e-05, |
|
"loss": 1.1498, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.126068376068376, |
|
"eval_accuracy": 0.5735294117647058, |
|
"eval_loss": 1.2639243602752686, |
|
"eval_runtime": 15.2962, |
|
"eval_samples_per_second": 13.337, |
|
"eval_steps_per_second": 1.7, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 4.0085470085470085, |
|
"grad_norm": 10.556476593017578, |
|
"learning_rate": 2.7078384798099763e-05, |
|
"loss": 0.7885, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.019230769230769, |
|
"grad_norm": 9.74152660369873, |
|
"learning_rate": 2.648456057007126e-05, |
|
"loss": 0.6999, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.02991452991453, |
|
"grad_norm": 8.711644172668457, |
|
"learning_rate": 2.5890736342042754e-05, |
|
"loss": 0.7566, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0405982905982905, |
|
"grad_norm": 13.662306785583496, |
|
"learning_rate": 2.5296912114014254e-05, |
|
"loss": 0.7932, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 4.051282051282051, |
|
"grad_norm": 9.280159950256348, |
|
"learning_rate": 2.4703087885985748e-05, |
|
"loss": 0.7025, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 4.061965811965812, |
|
"grad_norm": 7.840063095092773, |
|
"learning_rate": 2.4109263657957245e-05, |
|
"loss": 0.6106, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 4.072649572649572, |
|
"grad_norm": 3.8308825492858887, |
|
"learning_rate": 2.3515439429928742e-05, |
|
"loss": 0.6493, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 4.083333333333333, |
|
"grad_norm": 13.45524787902832, |
|
"learning_rate": 2.292161520190024e-05, |
|
"loss": 1.0321, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 4.094017094017094, |
|
"grad_norm": 13.148232460021973, |
|
"learning_rate": 2.2327790973871736e-05, |
|
"loss": 0.756, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 4.104700854700854, |
|
"grad_norm": 6.532124042510986, |
|
"learning_rate": 2.1733966745843233e-05, |
|
"loss": 0.8232, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 4.115384615384615, |
|
"grad_norm": 8.199164390563965, |
|
"learning_rate": 2.114014251781473e-05, |
|
"loss": 0.6284, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 4.1260683760683765, |
|
"grad_norm": 16.024168014526367, |
|
"learning_rate": 2.0546318289786223e-05, |
|
"loss": 0.6546, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 4.1260683760683765, |
|
"eval_accuracy": 0.5882352941176471, |
|
"eval_loss": 1.2053728103637695, |
|
"eval_runtime": 14.7556, |
|
"eval_samples_per_second": 13.825, |
|
"eval_steps_per_second": 1.762, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.010683760683761, |
|
"grad_norm": 8.120397567749023, |
|
"learning_rate": 1.995249406175772e-05, |
|
"loss": 0.5882, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.021367521367521, |
|
"grad_norm": 8.652383804321289, |
|
"learning_rate": 1.9358669833729217e-05, |
|
"loss": 0.584, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.032051282051282, |
|
"grad_norm": 13.461187362670898, |
|
"learning_rate": 1.876484560570071e-05, |
|
"loss": 0.6268, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 5.042735042735043, |
|
"grad_norm": 4.858776569366455, |
|
"learning_rate": 1.8171021377672208e-05, |
|
"loss": 0.503, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 5.053418803418803, |
|
"grad_norm": 9.537641525268555, |
|
"learning_rate": 1.7577197149643705e-05, |
|
"loss": 0.5206, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.064102564102564, |
|
"grad_norm": 12.624528884887695, |
|
"learning_rate": 1.6983372921615205e-05, |
|
"loss": 0.4366, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 5.0747863247863245, |
|
"grad_norm": 12.749320030212402, |
|
"learning_rate": 1.63895486935867e-05, |
|
"loss": 0.4381, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 5.085470085470085, |
|
"grad_norm": 10.724397659301758, |
|
"learning_rate": 1.5795724465558196e-05, |
|
"loss": 0.3677, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 5.096153846153846, |
|
"grad_norm": 4.257500171661377, |
|
"learning_rate": 1.5201900237529693e-05, |
|
"loss": 0.5245, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 5.1068376068376065, |
|
"grad_norm": 11.950453758239746, |
|
"learning_rate": 1.4608076009501186e-05, |
|
"loss": 0.3506, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 5.117521367521367, |
|
"grad_norm": 15.27865219116211, |
|
"learning_rate": 1.4014251781472683e-05, |
|
"loss": 0.4812, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.1260683760683765, |
|
"eval_accuracy": 0.5490196078431373, |
|
"eval_loss": 1.2694668769836426, |
|
"eval_runtime": 14.7973, |
|
"eval_samples_per_second": 13.786, |
|
"eval_steps_per_second": 1.757, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 6.002136752136752, |
|
"grad_norm": 3.3596088886260986, |
|
"learning_rate": 1.3420427553444182e-05, |
|
"loss": 0.3931, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 6.012820512820513, |
|
"grad_norm": 13.172847747802734, |
|
"learning_rate": 1.2826603325415679e-05, |
|
"loss": 0.2829, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 6.023504273504273, |
|
"grad_norm": 4.362306594848633, |
|
"learning_rate": 1.2232779097387174e-05, |
|
"loss": 0.2584, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 6.034188034188034, |
|
"grad_norm": 10.034035682678223, |
|
"learning_rate": 1.163895486935867e-05, |
|
"loss": 0.4665, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 6.044871794871795, |
|
"grad_norm": 6.337015151977539, |
|
"learning_rate": 1.1045130641330167e-05, |
|
"loss": 0.4468, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 6.055555555555555, |
|
"grad_norm": 8.211027145385742, |
|
"learning_rate": 1.0451306413301664e-05, |
|
"loss": 0.2763, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 6.066239316239316, |
|
"grad_norm": 10.967971801757812, |
|
"learning_rate": 9.857482185273159e-06, |
|
"loss": 0.4814, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 6.076923076923077, |
|
"grad_norm": 4.332571506500244, |
|
"learning_rate": 9.263657957244656e-06, |
|
"loss": 0.3561, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 6.087606837606837, |
|
"grad_norm": 21.54241180419922, |
|
"learning_rate": 8.669833729216153e-06, |
|
"loss": 0.2402, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 6.098290598290598, |
|
"grad_norm": 9.541482925415039, |
|
"learning_rate": 8.07600950118765e-06, |
|
"loss": 0.4022, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.108974358974359, |
|
"grad_norm": 12.201172828674316, |
|
"learning_rate": 7.482185273159145e-06, |
|
"loss": 0.3603, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 6.119658119658119, |
|
"grad_norm": 2.7810683250427246, |
|
"learning_rate": 6.888361045130641e-06, |
|
"loss": 0.4631, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 6.1260683760683765, |
|
"eval_accuracy": 0.6078431372549019, |
|
"eval_loss": 1.209458351135254, |
|
"eval_runtime": 16.1617, |
|
"eval_samples_per_second": 12.622, |
|
"eval_steps_per_second": 1.609, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 7.004273504273504, |
|
"grad_norm": 9.450981140136719, |
|
"learning_rate": 6.294536817102138e-06, |
|
"loss": 0.4533, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 7.014957264957265, |
|
"grad_norm": 8.65283489227295, |
|
"learning_rate": 5.700712589073634e-06, |
|
"loss": 0.2189, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 7.0256410256410255, |
|
"grad_norm": 2.8259940147399902, |
|
"learning_rate": 5.1068883610451305e-06, |
|
"loss": 0.266, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 7.036324786324786, |
|
"grad_norm": 11.976675033569336, |
|
"learning_rate": 4.513064133016627e-06, |
|
"loss": 0.3194, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 7.047008547008547, |
|
"grad_norm": 9.489921569824219, |
|
"learning_rate": 3.919239904988124e-06, |
|
"loss": 0.3841, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 7.0576923076923075, |
|
"grad_norm": 5.314388751983643, |
|
"learning_rate": 3.3254156769596202e-06, |
|
"loss": 0.2449, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 7.068376068376068, |
|
"grad_norm": 20.23029327392578, |
|
"learning_rate": 2.7315914489311168e-06, |
|
"loss": 0.314, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 7.079059829059829, |
|
"grad_norm": 10.66151237487793, |
|
"learning_rate": 2.137767220902613e-06, |
|
"loss": 0.254, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.089743589743589, |
|
"grad_norm": 5.35306978225708, |
|
"learning_rate": 1.5439429928741092e-06, |
|
"loss": 0.4233, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 7.10042735042735, |
|
"grad_norm": 6.157220363616943, |
|
"learning_rate": 9.501187648456058e-07, |
|
"loss": 0.343, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 7.111111111111111, |
|
"grad_norm": 12.305887222290039, |
|
"learning_rate": 3.5629453681710215e-07, |
|
"loss": 0.2311, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 7.117521367521367, |
|
"eval_accuracy": 0.5980392156862745, |
|
"eval_loss": 1.1960477828979492, |
|
"eval_runtime": 14.4874, |
|
"eval_samples_per_second": 14.081, |
|
"eval_steps_per_second": 1.795, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 7.117521367521367, |
|
"step": 936, |
|
"total_flos": 9.31358507420772e+18, |
|
"train_loss": 0.996516230269375, |
|
"train_runtime": 1255.4154, |
|
"train_samples_per_second": 5.965, |
|
"train_steps_per_second": 0.746 |
|
}, |
|
{ |
|
"epoch": 7.117521367521367, |
|
"eval_accuracy": 0.5812807881773399, |
|
"eval_loss": 1.3488672971725464, |
|
"eval_runtime": 28.5732, |
|
"eval_samples_per_second": 7.105, |
|
"eval_steps_per_second": 0.91, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 7.117521367521367, |
|
"eval_accuracy": 0.5812807881773399, |
|
"eval_loss": 1.3488672971725464, |
|
"eval_runtime": 14.5651, |
|
"eval_samples_per_second": 13.937, |
|
"eval_steps_per_second": 1.785, |
|
"step": 936 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 936, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.31358507420772e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|