|
{ |
|
"best_metric": 5.022224426269531, |
|
"best_model_checkpoint": "./ViMedical_Diseases/checkpoint-400", |
|
"epoch": 84.76821192052981, |
|
"eval_steps": 500, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.0024875621890547263, |
|
"eval_f1": 1.2345221781909311e-05, |
|
"eval_loss": 6.415043830871582, |
|
"eval_runtime": 0.3946, |
|
"eval_samples_per_second": 3055.887, |
|
"eval_steps_per_second": 25.339, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_accuracy": 0.0024875621890547263, |
|
"eval_f1": 1.2756729174639624e-05, |
|
"eval_loss": 6.412064075469971, |
|
"eval_runtime": 0.3736, |
|
"eval_samples_per_second": 3227.701, |
|
"eval_steps_per_second": 26.764, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.0024875621890547263, |
|
"eval_f1": 1.338598487383709e-05, |
|
"eval_loss": 6.407182693481445, |
|
"eval_runtime": 0.3753, |
|
"eval_samples_per_second": 3213.746, |
|
"eval_steps_per_second": 26.648, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"eval_accuracy": 0.0041459369817578775, |
|
"eval_f1": 0.0005888539889881497, |
|
"eval_loss": 6.397731304168701, |
|
"eval_runtime": 0.3765, |
|
"eval_samples_per_second": 3202.793, |
|
"eval_steps_per_second": 26.557, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"eval_accuracy": 0.0041459369817578775, |
|
"eval_f1": 0.00024318292743308673, |
|
"eval_loss": 6.378692626953125, |
|
"eval_runtime": 0.375, |
|
"eval_samples_per_second": 3215.914, |
|
"eval_steps_per_second": 26.666, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_accuracy": 0.006633499170812604, |
|
"eval_f1": 0.0006805627237639513, |
|
"eval_loss": 6.348721981048584, |
|
"eval_runtime": 0.3755, |
|
"eval_samples_per_second": 3212.024, |
|
"eval_steps_per_second": 26.634, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.00912106135986733, |
|
"eval_f1": 0.0044415843789941045, |
|
"eval_loss": 6.305507183074951, |
|
"eval_runtime": 0.3784, |
|
"eval_samples_per_second": 3187.077, |
|
"eval_steps_per_second": 26.427, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"eval_accuracy": 0.015754560530679935, |
|
"eval_f1": 0.004968013448467957, |
|
"eval_loss": 6.288573265075684, |
|
"eval_runtime": 0.3772, |
|
"eval_samples_per_second": 3197.112, |
|
"eval_steps_per_second": 26.51, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_accuracy": 0.01658374792703151, |
|
"eval_f1": 0.004671981131856493, |
|
"eval_loss": 6.297088623046875, |
|
"eval_runtime": 0.3776, |
|
"eval_samples_per_second": 3193.556, |
|
"eval_steps_per_second": 26.481, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"eval_accuracy": 0.013266998341625208, |
|
"eval_f1": 0.004042787456966562, |
|
"eval_loss": 6.216242790222168, |
|
"eval_runtime": 0.3779, |
|
"eval_samples_per_second": 3191.019, |
|
"eval_steps_per_second": 26.46, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"eval_accuracy": 0.020729684908789386, |
|
"eval_f1": 0.008522136976574753, |
|
"eval_loss": 6.187163352966309, |
|
"eval_runtime": 0.3792, |
|
"eval_samples_per_second": 3180.288, |
|
"eval_steps_per_second": 26.371, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"eval_accuracy": 0.029850746268656716, |
|
"eval_f1": 0.014040979403889549, |
|
"eval_loss": 6.177770137786865, |
|
"eval_runtime": 0.3792, |
|
"eval_samples_per_second": 3180.276, |
|
"eval_steps_per_second": 26.37, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"eval_accuracy": 0.02902155887230514, |
|
"eval_f1": 0.011104717761289505, |
|
"eval_loss": 6.158536911010742, |
|
"eval_runtime": 0.3786, |
|
"eval_samples_per_second": 3185.18, |
|
"eval_steps_per_second": 26.411, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_accuracy": 0.02570480928689884, |
|
"eval_f1": 0.01033900277156658, |
|
"eval_loss": 6.1090803146362305, |
|
"eval_runtime": 0.38, |
|
"eval_samples_per_second": 3173.403, |
|
"eval_steps_per_second": 26.313, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 14.83, |
|
"eval_accuracy": 0.03316749585406302, |
|
"eval_f1": 0.015474453231986877, |
|
"eval_loss": 6.08702278137207, |
|
"eval_runtime": 0.3792, |
|
"eval_samples_per_second": 3180.296, |
|
"eval_steps_per_second": 26.371, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"eval_accuracy": 0.04809286898839138, |
|
"eval_f1": 0.02513988397193506, |
|
"eval_loss": 6.0609517097473145, |
|
"eval_runtime": 0.3801, |
|
"eval_samples_per_second": 3172.69, |
|
"eval_steps_per_second": 26.308, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"eval_accuracy": 0.03814262023217247, |
|
"eval_f1": 0.01813347312377026, |
|
"eval_loss": 6.025730609893799, |
|
"eval_runtime": 0.3785, |
|
"eval_samples_per_second": 3185.844, |
|
"eval_steps_per_second": 26.417, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"eval_accuracy": 0.05555555555555555, |
|
"eval_f1": 0.0316611803261535, |
|
"eval_loss": 6.014459133148193, |
|
"eval_runtime": 0.3787, |
|
"eval_samples_per_second": 3184.258, |
|
"eval_steps_per_second": 26.403, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 18.86, |
|
"eval_accuracy": 0.04311774461028192, |
|
"eval_f1": 0.02307944350501337, |
|
"eval_loss": 5.969913005828857, |
|
"eval_runtime": 0.3804, |
|
"eval_samples_per_second": 3169.946, |
|
"eval_steps_per_second": 26.285, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 19.92, |
|
"eval_accuracy": 0.05638474295190713, |
|
"eval_f1": 0.0302836119107397, |
|
"eval_loss": 5.954006195068359, |
|
"eval_runtime": 0.3796, |
|
"eval_samples_per_second": 3176.755, |
|
"eval_steps_per_second": 26.341, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 20.98, |
|
"eval_accuracy": 0.06301824212271974, |
|
"eval_f1": 0.03687442226058399, |
|
"eval_loss": 5.912012100219727, |
|
"eval_runtime": 0.3781, |
|
"eval_samples_per_second": 3189.303, |
|
"eval_steps_per_second": 26.445, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 21.83, |
|
"eval_accuracy": 0.06384742951907131, |
|
"eval_f1": 0.03535293317796501, |
|
"eval_loss": 5.8892645835876465, |
|
"eval_runtime": 0.3799, |
|
"eval_samples_per_second": 3174.661, |
|
"eval_steps_per_second": 26.324, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 22.89, |
|
"eval_accuracy": 0.07711442786069651, |
|
"eval_f1": 0.04528489909367382, |
|
"eval_loss": 5.885714530944824, |
|
"eval_runtime": 0.3814, |
|
"eval_samples_per_second": 3162.388, |
|
"eval_steps_per_second": 26.222, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 23.95, |
|
"eval_accuracy": 0.06550580431177445, |
|
"eval_f1": 0.03852630441009513, |
|
"eval_loss": 5.839115142822266, |
|
"eval_runtime": 0.3786, |
|
"eval_samples_per_second": 3185.788, |
|
"eval_steps_per_second": 26.416, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 24.79, |
|
"eval_accuracy": 0.07877280265339967, |
|
"eval_f1": 0.050102448183655314, |
|
"eval_loss": 5.812053203582764, |
|
"eval_runtime": 0.3783, |
|
"eval_samples_per_second": 3187.772, |
|
"eval_steps_per_second": 26.433, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 25.85, |
|
"eval_accuracy": 0.0845771144278607, |
|
"eval_f1": 0.05181174832064436, |
|
"eval_loss": 5.789350986480713, |
|
"eval_runtime": 0.3785, |
|
"eval_samples_per_second": 3186.218, |
|
"eval_steps_per_second": 26.42, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 26.91, |
|
"eval_accuracy": 0.09286898839137644, |
|
"eval_f1": 0.0554051006900966, |
|
"eval_loss": 5.8099284172058105, |
|
"eval_runtime": 0.3795, |
|
"eval_samples_per_second": 3177.946, |
|
"eval_steps_per_second": 26.351, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 27.97, |
|
"eval_accuracy": 0.08208955223880597, |
|
"eval_f1": 0.052706293384051014, |
|
"eval_loss": 5.745517253875732, |
|
"eval_runtime": 0.379, |
|
"eval_samples_per_second": 3182.087, |
|
"eval_steps_per_second": 26.385, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 28.82, |
|
"eval_accuracy": 0.08706467661691543, |
|
"eval_f1": 0.05317439161254717, |
|
"eval_loss": 5.725302696228027, |
|
"eval_runtime": 0.38, |
|
"eval_samples_per_second": 3173.908, |
|
"eval_steps_per_second": 26.318, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 29.88, |
|
"eval_accuracy": 0.09618573797678276, |
|
"eval_f1": 0.05978131215599709, |
|
"eval_loss": 5.701379299163818, |
|
"eval_runtime": 0.3792, |
|
"eval_samples_per_second": 3180.126, |
|
"eval_steps_per_second": 26.369, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 30.94, |
|
"eval_accuracy": 0.09286898839137644, |
|
"eval_f1": 0.057675351156170285, |
|
"eval_loss": 5.6744208335876465, |
|
"eval_runtime": 0.3792, |
|
"eval_samples_per_second": 3180.732, |
|
"eval_steps_per_second": 26.374, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 31.79, |
|
"grad_norm": 2.17041015625, |
|
"learning_rate": 1.255e-05, |
|
"loss": 6.0949, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.09950248756218906, |
|
"eval_f1": 0.06490404658367951, |
|
"eval_loss": 5.660266399383545, |
|
"eval_runtime": 0.379, |
|
"eval_samples_per_second": 3182.291, |
|
"eval_steps_per_second": 26.387, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 32.85, |
|
"eval_accuracy": 0.09867330016583747, |
|
"eval_f1": 0.0630415651434736, |
|
"eval_loss": 5.6351704597473145, |
|
"eval_runtime": 0.3796, |
|
"eval_samples_per_second": 3176.904, |
|
"eval_steps_per_second": 26.342, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 33.91, |
|
"eval_accuracy": 0.09867330016583747, |
|
"eval_f1": 0.060441362912511246, |
|
"eval_loss": 5.600429058074951, |
|
"eval_runtime": 0.3801, |
|
"eval_samples_per_second": 3172.485, |
|
"eval_steps_per_second": 26.306, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"eval_accuracy": 0.10281923714759536, |
|
"eval_f1": 0.0637386932721227, |
|
"eval_loss": 5.598119258880615, |
|
"eval_runtime": 0.3795, |
|
"eval_samples_per_second": 3177.533, |
|
"eval_steps_per_second": 26.348, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 35.81, |
|
"eval_accuracy": 0.1011608623548922, |
|
"eval_f1": 0.06296000608037869, |
|
"eval_loss": 5.570890426635742, |
|
"eval_runtime": 0.3788, |
|
"eval_samples_per_second": 3183.869, |
|
"eval_steps_per_second": 26.4, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 36.87, |
|
"eval_accuracy": 0.10530679933665009, |
|
"eval_f1": 0.06954752384683803, |
|
"eval_loss": 5.540558815002441, |
|
"eval_runtime": 0.3796, |
|
"eval_samples_per_second": 3176.978, |
|
"eval_steps_per_second": 26.343, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 37.93, |
|
"eval_accuracy": 0.11442786069651742, |
|
"eval_f1": 0.07287036657509441, |
|
"eval_loss": 5.520463943481445, |
|
"eval_runtime": 0.3792, |
|
"eval_samples_per_second": 3180.006, |
|
"eval_steps_per_second": 26.368, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 38.99, |
|
"eval_accuracy": 0.10862354892205639, |
|
"eval_f1": 0.07094764147857342, |
|
"eval_loss": 5.498680114746094, |
|
"eval_runtime": 0.3784, |
|
"eval_samples_per_second": 3187.079, |
|
"eval_steps_per_second": 26.427, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 39.84, |
|
"eval_accuracy": 0.1111111111111111, |
|
"eval_f1": 0.06995636683961066, |
|
"eval_loss": 5.477349758148193, |
|
"eval_runtime": 0.3792, |
|
"eval_samples_per_second": 3180.04, |
|
"eval_steps_per_second": 26.368, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 40.9, |
|
"eval_accuracy": 0.12023217247097844, |
|
"eval_f1": 0.0803838202287807, |
|
"eval_loss": 5.464529514312744, |
|
"eval_runtime": 0.3783, |
|
"eval_samples_per_second": 3188.288, |
|
"eval_steps_per_second": 26.437, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 41.96, |
|
"eval_accuracy": 0.12189054726368159, |
|
"eval_f1": 0.0797876479133484, |
|
"eval_loss": 5.439937591552734, |
|
"eval_runtime": 0.3805, |
|
"eval_samples_per_second": 3169.807, |
|
"eval_steps_per_second": 26.284, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 42.81, |
|
"eval_accuracy": 0.11774461028192372, |
|
"eval_f1": 0.07397147599836879, |
|
"eval_loss": 5.417842388153076, |
|
"eval_runtime": 0.3791, |
|
"eval_samples_per_second": 3181.28, |
|
"eval_steps_per_second": 26.379, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 43.87, |
|
"eval_accuracy": 0.12271973466003316, |
|
"eval_f1": 0.08191085838440999, |
|
"eval_loss": 5.408046245574951, |
|
"eval_runtime": 0.379, |
|
"eval_samples_per_second": 3181.692, |
|
"eval_steps_per_second": 26.382, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 44.93, |
|
"eval_accuracy": 0.1310116086235489, |
|
"eval_f1": 0.09138005527552184, |
|
"eval_loss": 5.374771595001221, |
|
"eval_runtime": 0.3787, |
|
"eval_samples_per_second": 3184.593, |
|
"eval_steps_per_second": 26.406, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 45.99, |
|
"eval_accuracy": 0.12769485903814262, |
|
"eval_f1": 0.08429454245370348, |
|
"eval_loss": 5.369958400726318, |
|
"eval_runtime": 0.3796, |
|
"eval_samples_per_second": 3177.034, |
|
"eval_steps_per_second": 26.344, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 46.83, |
|
"eval_accuracy": 0.1252072968490879, |
|
"eval_f1": 0.0828030322134829, |
|
"eval_loss": 5.345793724060059, |
|
"eval_runtime": 0.3798, |
|
"eval_samples_per_second": 3175.327, |
|
"eval_steps_per_second": 26.329, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 47.89, |
|
"eval_accuracy": 0.12686567164179105, |
|
"eval_f1": 0.08616638865359265, |
|
"eval_loss": 5.33195686340332, |
|
"eval_runtime": 0.3775, |
|
"eval_samples_per_second": 3194.5, |
|
"eval_steps_per_second": 26.488, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 48.95, |
|
"eval_accuracy": 0.13598673300165837, |
|
"eval_f1": 0.09754942424775702, |
|
"eval_loss": 5.319748878479004, |
|
"eval_runtime": 0.3785, |
|
"eval_samples_per_second": 3186.573, |
|
"eval_steps_per_second": 26.423, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 49.8, |
|
"eval_accuracy": 0.13598673300165837, |
|
"eval_f1": 0.09534346865693065, |
|
"eval_loss": 5.305931091308594, |
|
"eval_runtime": 0.3795, |
|
"eval_samples_per_second": 3178.056, |
|
"eval_steps_per_second": 26.352, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 50.86, |
|
"eval_accuracy": 0.13018242122719734, |
|
"eval_f1": 0.08991729329826506, |
|
"eval_loss": 5.287778377532959, |
|
"eval_runtime": 0.3796, |
|
"eval_samples_per_second": 3176.946, |
|
"eval_steps_per_second": 26.343, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 51.92, |
|
"eval_accuracy": 0.1384742951907131, |
|
"eval_f1": 0.0985219901693966, |
|
"eval_loss": 5.267482757568359, |
|
"eval_runtime": 0.378, |
|
"eval_samples_per_second": 3190.643, |
|
"eval_steps_per_second": 26.456, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 52.98, |
|
"eval_accuracy": 0.13764510779436154, |
|
"eval_f1": 0.09503565447256919, |
|
"eval_loss": 5.257174491882324, |
|
"eval_runtime": 0.3791, |
|
"eval_samples_per_second": 3181.268, |
|
"eval_steps_per_second": 26.379, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 53.83, |
|
"eval_accuracy": 0.1417910447761194, |
|
"eval_f1": 0.09976792054154651, |
|
"eval_loss": 5.241861820220947, |
|
"eval_runtime": 0.378, |
|
"eval_samples_per_second": 3190.114, |
|
"eval_steps_per_second": 26.452, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 54.89, |
|
"eval_accuracy": 0.14925373134328357, |
|
"eval_f1": 0.10946077502309857, |
|
"eval_loss": 5.227325439453125, |
|
"eval_runtime": 0.3793, |
|
"eval_samples_per_second": 3179.266, |
|
"eval_steps_per_second": 26.362, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 55.95, |
|
"eval_accuracy": 0.1451077943615257, |
|
"eval_f1": 0.10515805134406893, |
|
"eval_loss": 5.209225654602051, |
|
"eval_runtime": 0.3796, |
|
"eval_samples_per_second": 3177.24, |
|
"eval_steps_per_second": 26.345, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 56.79, |
|
"eval_accuracy": 0.14262023217247097, |
|
"eval_f1": 0.10262149479931104, |
|
"eval_loss": 5.20381498336792, |
|
"eval_runtime": 0.38, |
|
"eval_samples_per_second": 3174.088, |
|
"eval_steps_per_second": 26.319, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 57.85, |
|
"eval_accuracy": 0.14759535655058043, |
|
"eval_f1": 0.10374650435743883, |
|
"eval_loss": 5.192402362823486, |
|
"eval_runtime": 0.3799, |
|
"eval_samples_per_second": 3174.635, |
|
"eval_steps_per_second": 26.324, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 58.91, |
|
"eval_accuracy": 0.148424543946932, |
|
"eval_f1": 0.10865541288671039, |
|
"eval_loss": 5.173834800720215, |
|
"eval_runtime": 0.3803, |
|
"eval_samples_per_second": 3171.494, |
|
"eval_steps_per_second": 26.298, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 59.97, |
|
"eval_accuracy": 0.15008291873963517, |
|
"eval_f1": 0.10904088165373273, |
|
"eval_loss": 5.164543628692627, |
|
"eval_runtime": 0.3779, |
|
"eval_samples_per_second": 3190.923, |
|
"eval_steps_per_second": 26.459, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 60.82, |
|
"eval_accuracy": 0.15008291873963517, |
|
"eval_f1": 0.10954105182677536, |
|
"eval_loss": 5.1523356437683105, |
|
"eval_runtime": 0.3794, |
|
"eval_samples_per_second": 3178.663, |
|
"eval_steps_per_second": 26.357, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 61.88, |
|
"eval_accuracy": 0.15339966832504145, |
|
"eval_f1": 0.11320704863201249, |
|
"eval_loss": 5.140935897827148, |
|
"eval_runtime": 0.3968, |
|
"eval_samples_per_second": 3039.575, |
|
"eval_steps_per_second": 25.204, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 62.94, |
|
"eval_accuracy": 0.15671641791044777, |
|
"eval_f1": 0.11619378649382489, |
|
"eval_loss": 5.13328218460083, |
|
"eval_runtime": 0.3811, |
|
"eval_samples_per_second": 3164.429, |
|
"eval_steps_per_second": 26.239, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 63.58, |
|
"grad_norm": 1.4214905500411987, |
|
"learning_rate": 5.050000000000001e-06, |
|
"loss": 5.3883, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.15754560530679934, |
|
"eval_f1": 0.11639663398572815, |
|
"eval_loss": 5.12091064453125, |
|
"eval_runtime": 0.3803, |
|
"eval_samples_per_second": 3171.109, |
|
"eval_steps_per_second": 26.294, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 64.85, |
|
"eval_accuracy": 0.15754560530679934, |
|
"eval_f1": 0.11683650415743181, |
|
"eval_loss": 5.114450454711914, |
|
"eval_runtime": 0.3804, |
|
"eval_samples_per_second": 3169.982, |
|
"eval_steps_per_second": 26.285, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 65.91, |
|
"eval_accuracy": 0.1550580431177446, |
|
"eval_f1": 0.11358274444416293, |
|
"eval_loss": 5.104104995727539, |
|
"eval_runtime": 0.3928, |
|
"eval_samples_per_second": 3069.891, |
|
"eval_steps_per_second": 25.455, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 66.97, |
|
"eval_accuracy": 0.15671641791044777, |
|
"eval_f1": 0.11432989925366752, |
|
"eval_loss": 5.097550868988037, |
|
"eval_runtime": 0.3806, |
|
"eval_samples_per_second": 3168.505, |
|
"eval_steps_per_second": 26.273, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 67.81, |
|
"eval_accuracy": 0.1583747927031509, |
|
"eval_f1": 0.11862831647923934, |
|
"eval_loss": 5.090635776519775, |
|
"eval_runtime": 0.3895, |
|
"eval_samples_per_second": 3096.27, |
|
"eval_steps_per_second": 25.674, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 68.87, |
|
"eval_accuracy": 0.1625207296849088, |
|
"eval_f1": 0.12161516717070989, |
|
"eval_loss": 5.080664157867432, |
|
"eval_runtime": 0.3789, |
|
"eval_samples_per_second": 3183.124, |
|
"eval_steps_per_second": 26.394, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 69.93, |
|
"eval_accuracy": 0.16169154228855723, |
|
"eval_f1": 0.12018082388719613, |
|
"eval_loss": 5.074178218841553, |
|
"eval_runtime": 0.3782, |
|
"eval_samples_per_second": 3189.028, |
|
"eval_steps_per_second": 26.443, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 70.99, |
|
"eval_accuracy": 0.1625207296849088, |
|
"eval_f1": 0.12052740787172442, |
|
"eval_loss": 5.066323757171631, |
|
"eval_runtime": 0.3784, |
|
"eval_samples_per_second": 3186.978, |
|
"eval_steps_per_second": 26.426, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 71.84, |
|
"eval_accuracy": 0.1625207296849088, |
|
"eval_f1": 0.12160006289622655, |
|
"eval_loss": 5.062046527862549, |
|
"eval_runtime": 0.3801, |
|
"eval_samples_per_second": 3172.618, |
|
"eval_steps_per_second": 26.307, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 72.9, |
|
"eval_accuracy": 0.16417910447761194, |
|
"eval_f1": 0.12152850729201306, |
|
"eval_loss": 5.056005954742432, |
|
"eval_runtime": 0.381, |
|
"eval_samples_per_second": 3165.275, |
|
"eval_steps_per_second": 26.246, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 73.96, |
|
"eval_accuracy": 0.16666666666666666, |
|
"eval_f1": 0.12561182312709163, |
|
"eval_loss": 5.050036430358887, |
|
"eval_runtime": 0.3802, |
|
"eval_samples_per_second": 3171.695, |
|
"eval_steps_per_second": 26.299, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 74.81, |
|
"eval_accuracy": 0.16832504145936983, |
|
"eval_f1": 0.12484080598728817, |
|
"eval_loss": 5.044373035430908, |
|
"eval_runtime": 0.38, |
|
"eval_samples_per_second": 3173.886, |
|
"eval_steps_per_second": 26.317, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 75.87, |
|
"eval_accuracy": 0.17081260364842454, |
|
"eval_f1": 0.12924225508300144, |
|
"eval_loss": 5.041046619415283, |
|
"eval_runtime": 0.3807, |
|
"eval_samples_per_second": 3168.064, |
|
"eval_steps_per_second": 26.269, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 76.93, |
|
"eval_accuracy": 0.16832504145936983, |
|
"eval_f1": 0.1266665967282326, |
|
"eval_loss": 5.036989688873291, |
|
"eval_runtime": 0.3914, |
|
"eval_samples_per_second": 3081.513, |
|
"eval_steps_per_second": 25.552, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 77.99, |
|
"eval_accuracy": 0.17081260364842454, |
|
"eval_f1": 0.12804840525194425, |
|
"eval_loss": 5.0318603515625, |
|
"eval_runtime": 0.3794, |
|
"eval_samples_per_second": 3178.933, |
|
"eval_steps_per_second": 26.359, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 78.83, |
|
"eval_accuracy": 0.16998341625207297, |
|
"eval_f1": 0.1290972468615459, |
|
"eval_loss": 5.03059720993042, |
|
"eval_runtime": 0.3795, |
|
"eval_samples_per_second": 3178.278, |
|
"eval_steps_per_second": 26.354, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 79.89, |
|
"eval_accuracy": 0.17164179104477612, |
|
"eval_f1": 0.12944060169078692, |
|
"eval_loss": 5.027899265289307, |
|
"eval_runtime": 0.3787, |
|
"eval_samples_per_second": 3184.745, |
|
"eval_steps_per_second": 26.408, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 80.95, |
|
"eval_accuracy": 0.1724709784411277, |
|
"eval_f1": 0.13072735697861348, |
|
"eval_loss": 5.026247501373291, |
|
"eval_runtime": 0.3797, |
|
"eval_samples_per_second": 3176.404, |
|
"eval_steps_per_second": 26.338, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 81.8, |
|
"eval_accuracy": 0.16998341625207297, |
|
"eval_f1": 0.12880907562232394, |
|
"eval_loss": 5.024827003479004, |
|
"eval_runtime": 0.3798, |
|
"eval_samples_per_second": 3175.064, |
|
"eval_steps_per_second": 26.327, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 82.86, |
|
"eval_accuracy": 0.17081260364842454, |
|
"eval_f1": 0.12912358374020652, |
|
"eval_loss": 5.0235443115234375, |
|
"eval_runtime": 0.379, |
|
"eval_samples_per_second": 3182.237, |
|
"eval_steps_per_second": 26.387, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 83.92, |
|
"eval_accuracy": 0.17081260364842454, |
|
"eval_f1": 0.12920331329754803, |
|
"eval_loss": 5.022723197937012, |
|
"eval_runtime": 0.3809, |
|
"eval_samples_per_second": 3166.333, |
|
"eval_steps_per_second": 26.255, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 84.77, |
|
"eval_accuracy": 0.17081260364842454, |
|
"eval_f1": 0.12896882646345054, |
|
"eval_loss": 5.022224426269531, |
|
"eval_runtime": 0.3794, |
|
"eval_samples_per_second": 3178.549, |
|
"eval_steps_per_second": 26.356, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 150, |
|
"max_steps": 400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 2.010721223440829e+16, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|