ViMedical_Diseases / trainer_state.json
PB3002's picture
Upload 10 files
27368fa verified
raw
history blame
25 kB
{
"best_metric": 5.022224426269531,
"best_model_checkpoint": "./ViMedical_Diseases/checkpoint-400",
"epoch": 84.76821192052981,
"eval_steps": 500,
"global_step": 400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.85,
"eval_accuracy": 0.0024875621890547263,
"eval_f1": 1.2345221781909311e-05,
"eval_loss": 6.415043830871582,
"eval_runtime": 0.3946,
"eval_samples_per_second": 3055.887,
"eval_steps_per_second": 25.339,
"step": 4
},
{
"epoch": 1.91,
"eval_accuracy": 0.0024875621890547263,
"eval_f1": 1.2756729174639624e-05,
"eval_loss": 6.412064075469971,
"eval_runtime": 0.3736,
"eval_samples_per_second": 3227.701,
"eval_steps_per_second": 26.764,
"step": 9
},
{
"epoch": 2.97,
"eval_accuracy": 0.0024875621890547263,
"eval_f1": 1.338598487383709e-05,
"eval_loss": 6.407182693481445,
"eval_runtime": 0.3753,
"eval_samples_per_second": 3213.746,
"eval_steps_per_second": 26.648,
"step": 14
},
{
"epoch": 3.81,
"eval_accuracy": 0.0041459369817578775,
"eval_f1": 0.0005888539889881497,
"eval_loss": 6.397731304168701,
"eval_runtime": 0.3765,
"eval_samples_per_second": 3202.793,
"eval_steps_per_second": 26.557,
"step": 18
},
{
"epoch": 4.87,
"eval_accuracy": 0.0041459369817578775,
"eval_f1": 0.00024318292743308673,
"eval_loss": 6.378692626953125,
"eval_runtime": 0.375,
"eval_samples_per_second": 3215.914,
"eval_steps_per_second": 26.666,
"step": 23
},
{
"epoch": 5.93,
"eval_accuracy": 0.006633499170812604,
"eval_f1": 0.0006805627237639513,
"eval_loss": 6.348721981048584,
"eval_runtime": 0.3755,
"eval_samples_per_second": 3212.024,
"eval_steps_per_second": 26.634,
"step": 28
},
{
"epoch": 6.99,
"eval_accuracy": 0.00912106135986733,
"eval_f1": 0.0044415843789941045,
"eval_loss": 6.305507183074951,
"eval_runtime": 0.3784,
"eval_samples_per_second": 3187.077,
"eval_steps_per_second": 26.427,
"step": 33
},
{
"epoch": 7.84,
"eval_accuracy": 0.015754560530679935,
"eval_f1": 0.004968013448467957,
"eval_loss": 6.288573265075684,
"eval_runtime": 0.3772,
"eval_samples_per_second": 3197.112,
"eval_steps_per_second": 26.51,
"step": 37
},
{
"epoch": 8.9,
"eval_accuracy": 0.01658374792703151,
"eval_f1": 0.004671981131856493,
"eval_loss": 6.297088623046875,
"eval_runtime": 0.3776,
"eval_samples_per_second": 3193.556,
"eval_steps_per_second": 26.481,
"step": 42
},
{
"epoch": 9.96,
"eval_accuracy": 0.013266998341625208,
"eval_f1": 0.004042787456966562,
"eval_loss": 6.216242790222168,
"eval_runtime": 0.3779,
"eval_samples_per_second": 3191.019,
"eval_steps_per_second": 26.46,
"step": 47
},
{
"epoch": 10.81,
"eval_accuracy": 0.020729684908789386,
"eval_f1": 0.008522136976574753,
"eval_loss": 6.187163352966309,
"eval_runtime": 0.3792,
"eval_samples_per_second": 3180.288,
"eval_steps_per_second": 26.371,
"step": 51
},
{
"epoch": 11.87,
"eval_accuracy": 0.029850746268656716,
"eval_f1": 0.014040979403889549,
"eval_loss": 6.177770137786865,
"eval_runtime": 0.3792,
"eval_samples_per_second": 3180.276,
"eval_steps_per_second": 26.37,
"step": 56
},
{
"epoch": 12.93,
"eval_accuracy": 0.02902155887230514,
"eval_f1": 0.011104717761289505,
"eval_loss": 6.158536911010742,
"eval_runtime": 0.3786,
"eval_samples_per_second": 3185.18,
"eval_steps_per_second": 26.411,
"step": 61
},
{
"epoch": 13.99,
"eval_accuracy": 0.02570480928689884,
"eval_f1": 0.01033900277156658,
"eval_loss": 6.1090803146362305,
"eval_runtime": 0.38,
"eval_samples_per_second": 3173.403,
"eval_steps_per_second": 26.313,
"step": 66
},
{
"epoch": 14.83,
"eval_accuracy": 0.03316749585406302,
"eval_f1": 0.015474453231986877,
"eval_loss": 6.08702278137207,
"eval_runtime": 0.3792,
"eval_samples_per_second": 3180.296,
"eval_steps_per_second": 26.371,
"step": 70
},
{
"epoch": 15.89,
"eval_accuracy": 0.04809286898839138,
"eval_f1": 0.02513988397193506,
"eval_loss": 6.0609517097473145,
"eval_runtime": 0.3801,
"eval_samples_per_second": 3172.69,
"eval_steps_per_second": 26.308,
"step": 75
},
{
"epoch": 16.95,
"eval_accuracy": 0.03814262023217247,
"eval_f1": 0.01813347312377026,
"eval_loss": 6.025730609893799,
"eval_runtime": 0.3785,
"eval_samples_per_second": 3185.844,
"eval_steps_per_second": 26.417,
"step": 80
},
{
"epoch": 17.8,
"eval_accuracy": 0.05555555555555555,
"eval_f1": 0.0316611803261535,
"eval_loss": 6.014459133148193,
"eval_runtime": 0.3787,
"eval_samples_per_second": 3184.258,
"eval_steps_per_second": 26.403,
"step": 84
},
{
"epoch": 18.86,
"eval_accuracy": 0.04311774461028192,
"eval_f1": 0.02307944350501337,
"eval_loss": 5.969913005828857,
"eval_runtime": 0.3804,
"eval_samples_per_second": 3169.946,
"eval_steps_per_second": 26.285,
"step": 89
},
{
"epoch": 19.92,
"eval_accuracy": 0.05638474295190713,
"eval_f1": 0.0302836119107397,
"eval_loss": 5.954006195068359,
"eval_runtime": 0.3796,
"eval_samples_per_second": 3176.755,
"eval_steps_per_second": 26.341,
"step": 94
},
{
"epoch": 20.98,
"eval_accuracy": 0.06301824212271974,
"eval_f1": 0.03687442226058399,
"eval_loss": 5.912012100219727,
"eval_runtime": 0.3781,
"eval_samples_per_second": 3189.303,
"eval_steps_per_second": 26.445,
"step": 99
},
{
"epoch": 21.83,
"eval_accuracy": 0.06384742951907131,
"eval_f1": 0.03535293317796501,
"eval_loss": 5.8892645835876465,
"eval_runtime": 0.3799,
"eval_samples_per_second": 3174.661,
"eval_steps_per_second": 26.324,
"step": 103
},
{
"epoch": 22.89,
"eval_accuracy": 0.07711442786069651,
"eval_f1": 0.04528489909367382,
"eval_loss": 5.885714530944824,
"eval_runtime": 0.3814,
"eval_samples_per_second": 3162.388,
"eval_steps_per_second": 26.222,
"step": 108
},
{
"epoch": 23.95,
"eval_accuracy": 0.06550580431177445,
"eval_f1": 0.03852630441009513,
"eval_loss": 5.839115142822266,
"eval_runtime": 0.3786,
"eval_samples_per_second": 3185.788,
"eval_steps_per_second": 26.416,
"step": 113
},
{
"epoch": 24.79,
"eval_accuracy": 0.07877280265339967,
"eval_f1": 0.050102448183655314,
"eval_loss": 5.812053203582764,
"eval_runtime": 0.3783,
"eval_samples_per_second": 3187.772,
"eval_steps_per_second": 26.433,
"step": 117
},
{
"epoch": 25.85,
"eval_accuracy": 0.0845771144278607,
"eval_f1": 0.05181174832064436,
"eval_loss": 5.789350986480713,
"eval_runtime": 0.3785,
"eval_samples_per_second": 3186.218,
"eval_steps_per_second": 26.42,
"step": 122
},
{
"epoch": 26.91,
"eval_accuracy": 0.09286898839137644,
"eval_f1": 0.0554051006900966,
"eval_loss": 5.8099284172058105,
"eval_runtime": 0.3795,
"eval_samples_per_second": 3177.946,
"eval_steps_per_second": 26.351,
"step": 127
},
{
"epoch": 27.97,
"eval_accuracy": 0.08208955223880597,
"eval_f1": 0.052706293384051014,
"eval_loss": 5.745517253875732,
"eval_runtime": 0.379,
"eval_samples_per_second": 3182.087,
"eval_steps_per_second": 26.385,
"step": 132
},
{
"epoch": 28.82,
"eval_accuracy": 0.08706467661691543,
"eval_f1": 0.05317439161254717,
"eval_loss": 5.725302696228027,
"eval_runtime": 0.38,
"eval_samples_per_second": 3173.908,
"eval_steps_per_second": 26.318,
"step": 136
},
{
"epoch": 29.88,
"eval_accuracy": 0.09618573797678276,
"eval_f1": 0.05978131215599709,
"eval_loss": 5.701379299163818,
"eval_runtime": 0.3792,
"eval_samples_per_second": 3180.126,
"eval_steps_per_second": 26.369,
"step": 141
},
{
"epoch": 30.94,
"eval_accuracy": 0.09286898839137644,
"eval_f1": 0.057675351156170285,
"eval_loss": 5.6744208335876465,
"eval_runtime": 0.3792,
"eval_samples_per_second": 3180.732,
"eval_steps_per_second": 26.374,
"step": 146
},
{
"epoch": 31.79,
"grad_norm": 2.17041015625,
"learning_rate": 1.255e-05,
"loss": 6.0949,
"step": 150
},
{
"epoch": 32.0,
"eval_accuracy": 0.09950248756218906,
"eval_f1": 0.06490404658367951,
"eval_loss": 5.660266399383545,
"eval_runtime": 0.379,
"eval_samples_per_second": 3182.291,
"eval_steps_per_second": 26.387,
"step": 151
},
{
"epoch": 32.85,
"eval_accuracy": 0.09867330016583747,
"eval_f1": 0.0630415651434736,
"eval_loss": 5.6351704597473145,
"eval_runtime": 0.3796,
"eval_samples_per_second": 3176.904,
"eval_steps_per_second": 26.342,
"step": 155
},
{
"epoch": 33.91,
"eval_accuracy": 0.09867330016583747,
"eval_f1": 0.060441362912511246,
"eval_loss": 5.600429058074951,
"eval_runtime": 0.3801,
"eval_samples_per_second": 3172.485,
"eval_steps_per_second": 26.306,
"step": 160
},
{
"epoch": 34.97,
"eval_accuracy": 0.10281923714759536,
"eval_f1": 0.0637386932721227,
"eval_loss": 5.598119258880615,
"eval_runtime": 0.3795,
"eval_samples_per_second": 3177.533,
"eval_steps_per_second": 26.348,
"step": 165
},
{
"epoch": 35.81,
"eval_accuracy": 0.1011608623548922,
"eval_f1": 0.06296000608037869,
"eval_loss": 5.570890426635742,
"eval_runtime": 0.3788,
"eval_samples_per_second": 3183.869,
"eval_steps_per_second": 26.4,
"step": 169
},
{
"epoch": 36.87,
"eval_accuracy": 0.10530679933665009,
"eval_f1": 0.06954752384683803,
"eval_loss": 5.540558815002441,
"eval_runtime": 0.3796,
"eval_samples_per_second": 3176.978,
"eval_steps_per_second": 26.343,
"step": 174
},
{
"epoch": 37.93,
"eval_accuracy": 0.11442786069651742,
"eval_f1": 0.07287036657509441,
"eval_loss": 5.520463943481445,
"eval_runtime": 0.3792,
"eval_samples_per_second": 3180.006,
"eval_steps_per_second": 26.368,
"step": 179
},
{
"epoch": 38.99,
"eval_accuracy": 0.10862354892205639,
"eval_f1": 0.07094764147857342,
"eval_loss": 5.498680114746094,
"eval_runtime": 0.3784,
"eval_samples_per_second": 3187.079,
"eval_steps_per_second": 26.427,
"step": 184
},
{
"epoch": 39.84,
"eval_accuracy": 0.1111111111111111,
"eval_f1": 0.06995636683961066,
"eval_loss": 5.477349758148193,
"eval_runtime": 0.3792,
"eval_samples_per_second": 3180.04,
"eval_steps_per_second": 26.368,
"step": 188
},
{
"epoch": 40.9,
"eval_accuracy": 0.12023217247097844,
"eval_f1": 0.0803838202287807,
"eval_loss": 5.464529514312744,
"eval_runtime": 0.3783,
"eval_samples_per_second": 3188.288,
"eval_steps_per_second": 26.437,
"step": 193
},
{
"epoch": 41.96,
"eval_accuracy": 0.12189054726368159,
"eval_f1": 0.0797876479133484,
"eval_loss": 5.439937591552734,
"eval_runtime": 0.3805,
"eval_samples_per_second": 3169.807,
"eval_steps_per_second": 26.284,
"step": 198
},
{
"epoch": 42.81,
"eval_accuracy": 0.11774461028192372,
"eval_f1": 0.07397147599836879,
"eval_loss": 5.417842388153076,
"eval_runtime": 0.3791,
"eval_samples_per_second": 3181.28,
"eval_steps_per_second": 26.379,
"step": 202
},
{
"epoch": 43.87,
"eval_accuracy": 0.12271973466003316,
"eval_f1": 0.08191085838440999,
"eval_loss": 5.408046245574951,
"eval_runtime": 0.379,
"eval_samples_per_second": 3181.692,
"eval_steps_per_second": 26.382,
"step": 207
},
{
"epoch": 44.93,
"eval_accuracy": 0.1310116086235489,
"eval_f1": 0.09138005527552184,
"eval_loss": 5.374771595001221,
"eval_runtime": 0.3787,
"eval_samples_per_second": 3184.593,
"eval_steps_per_second": 26.406,
"step": 212
},
{
"epoch": 45.99,
"eval_accuracy": 0.12769485903814262,
"eval_f1": 0.08429454245370348,
"eval_loss": 5.369958400726318,
"eval_runtime": 0.3796,
"eval_samples_per_second": 3177.034,
"eval_steps_per_second": 26.344,
"step": 217
},
{
"epoch": 46.83,
"eval_accuracy": 0.1252072968490879,
"eval_f1": 0.0828030322134829,
"eval_loss": 5.345793724060059,
"eval_runtime": 0.3798,
"eval_samples_per_second": 3175.327,
"eval_steps_per_second": 26.329,
"step": 221
},
{
"epoch": 47.89,
"eval_accuracy": 0.12686567164179105,
"eval_f1": 0.08616638865359265,
"eval_loss": 5.33195686340332,
"eval_runtime": 0.3775,
"eval_samples_per_second": 3194.5,
"eval_steps_per_second": 26.488,
"step": 226
},
{
"epoch": 48.95,
"eval_accuracy": 0.13598673300165837,
"eval_f1": 0.09754942424775702,
"eval_loss": 5.319748878479004,
"eval_runtime": 0.3785,
"eval_samples_per_second": 3186.573,
"eval_steps_per_second": 26.423,
"step": 231
},
{
"epoch": 49.8,
"eval_accuracy": 0.13598673300165837,
"eval_f1": 0.09534346865693065,
"eval_loss": 5.305931091308594,
"eval_runtime": 0.3795,
"eval_samples_per_second": 3178.056,
"eval_steps_per_second": 26.352,
"step": 235
},
{
"epoch": 50.86,
"eval_accuracy": 0.13018242122719734,
"eval_f1": 0.08991729329826506,
"eval_loss": 5.287778377532959,
"eval_runtime": 0.3796,
"eval_samples_per_second": 3176.946,
"eval_steps_per_second": 26.343,
"step": 240
},
{
"epoch": 51.92,
"eval_accuracy": 0.1384742951907131,
"eval_f1": 0.0985219901693966,
"eval_loss": 5.267482757568359,
"eval_runtime": 0.378,
"eval_samples_per_second": 3190.643,
"eval_steps_per_second": 26.456,
"step": 245
},
{
"epoch": 52.98,
"eval_accuracy": 0.13764510779436154,
"eval_f1": 0.09503565447256919,
"eval_loss": 5.257174491882324,
"eval_runtime": 0.3791,
"eval_samples_per_second": 3181.268,
"eval_steps_per_second": 26.379,
"step": 250
},
{
"epoch": 53.83,
"eval_accuracy": 0.1417910447761194,
"eval_f1": 0.09976792054154651,
"eval_loss": 5.241861820220947,
"eval_runtime": 0.378,
"eval_samples_per_second": 3190.114,
"eval_steps_per_second": 26.452,
"step": 254
},
{
"epoch": 54.89,
"eval_accuracy": 0.14925373134328357,
"eval_f1": 0.10946077502309857,
"eval_loss": 5.227325439453125,
"eval_runtime": 0.3793,
"eval_samples_per_second": 3179.266,
"eval_steps_per_second": 26.362,
"step": 259
},
{
"epoch": 55.95,
"eval_accuracy": 0.1451077943615257,
"eval_f1": 0.10515805134406893,
"eval_loss": 5.209225654602051,
"eval_runtime": 0.3796,
"eval_samples_per_second": 3177.24,
"eval_steps_per_second": 26.345,
"step": 264
},
{
"epoch": 56.79,
"eval_accuracy": 0.14262023217247097,
"eval_f1": 0.10262149479931104,
"eval_loss": 5.20381498336792,
"eval_runtime": 0.38,
"eval_samples_per_second": 3174.088,
"eval_steps_per_second": 26.319,
"step": 268
},
{
"epoch": 57.85,
"eval_accuracy": 0.14759535655058043,
"eval_f1": 0.10374650435743883,
"eval_loss": 5.192402362823486,
"eval_runtime": 0.3799,
"eval_samples_per_second": 3174.635,
"eval_steps_per_second": 26.324,
"step": 273
},
{
"epoch": 58.91,
"eval_accuracy": 0.148424543946932,
"eval_f1": 0.10865541288671039,
"eval_loss": 5.173834800720215,
"eval_runtime": 0.3803,
"eval_samples_per_second": 3171.494,
"eval_steps_per_second": 26.298,
"step": 278
},
{
"epoch": 59.97,
"eval_accuracy": 0.15008291873963517,
"eval_f1": 0.10904088165373273,
"eval_loss": 5.164543628692627,
"eval_runtime": 0.3779,
"eval_samples_per_second": 3190.923,
"eval_steps_per_second": 26.459,
"step": 283
},
{
"epoch": 60.82,
"eval_accuracy": 0.15008291873963517,
"eval_f1": 0.10954105182677536,
"eval_loss": 5.1523356437683105,
"eval_runtime": 0.3794,
"eval_samples_per_second": 3178.663,
"eval_steps_per_second": 26.357,
"step": 287
},
{
"epoch": 61.88,
"eval_accuracy": 0.15339966832504145,
"eval_f1": 0.11320704863201249,
"eval_loss": 5.140935897827148,
"eval_runtime": 0.3968,
"eval_samples_per_second": 3039.575,
"eval_steps_per_second": 25.204,
"step": 292
},
{
"epoch": 62.94,
"eval_accuracy": 0.15671641791044777,
"eval_f1": 0.11619378649382489,
"eval_loss": 5.13328218460083,
"eval_runtime": 0.3811,
"eval_samples_per_second": 3164.429,
"eval_steps_per_second": 26.239,
"step": 297
},
{
"epoch": 63.58,
"grad_norm": 1.4214905500411987,
"learning_rate": 5.050000000000001e-06,
"loss": 5.3883,
"step": 300
},
{
"epoch": 64.0,
"eval_accuracy": 0.15754560530679934,
"eval_f1": 0.11639663398572815,
"eval_loss": 5.12091064453125,
"eval_runtime": 0.3803,
"eval_samples_per_second": 3171.109,
"eval_steps_per_second": 26.294,
"step": 302
},
{
"epoch": 64.85,
"eval_accuracy": 0.15754560530679934,
"eval_f1": 0.11683650415743181,
"eval_loss": 5.114450454711914,
"eval_runtime": 0.3804,
"eval_samples_per_second": 3169.982,
"eval_steps_per_second": 26.285,
"step": 306
},
{
"epoch": 65.91,
"eval_accuracy": 0.1550580431177446,
"eval_f1": 0.11358274444416293,
"eval_loss": 5.104104995727539,
"eval_runtime": 0.3928,
"eval_samples_per_second": 3069.891,
"eval_steps_per_second": 25.455,
"step": 311
},
{
"epoch": 66.97,
"eval_accuracy": 0.15671641791044777,
"eval_f1": 0.11432989925366752,
"eval_loss": 5.097550868988037,
"eval_runtime": 0.3806,
"eval_samples_per_second": 3168.505,
"eval_steps_per_second": 26.273,
"step": 316
},
{
"epoch": 67.81,
"eval_accuracy": 0.1583747927031509,
"eval_f1": 0.11862831647923934,
"eval_loss": 5.090635776519775,
"eval_runtime": 0.3895,
"eval_samples_per_second": 3096.27,
"eval_steps_per_second": 25.674,
"step": 320
},
{
"epoch": 68.87,
"eval_accuracy": 0.1625207296849088,
"eval_f1": 0.12161516717070989,
"eval_loss": 5.080664157867432,
"eval_runtime": 0.3789,
"eval_samples_per_second": 3183.124,
"eval_steps_per_second": 26.394,
"step": 325
},
{
"epoch": 69.93,
"eval_accuracy": 0.16169154228855723,
"eval_f1": 0.12018082388719613,
"eval_loss": 5.074178218841553,
"eval_runtime": 0.3782,
"eval_samples_per_second": 3189.028,
"eval_steps_per_second": 26.443,
"step": 330
},
{
"epoch": 70.99,
"eval_accuracy": 0.1625207296849088,
"eval_f1": 0.12052740787172442,
"eval_loss": 5.066323757171631,
"eval_runtime": 0.3784,
"eval_samples_per_second": 3186.978,
"eval_steps_per_second": 26.426,
"step": 335
},
{
"epoch": 71.84,
"eval_accuracy": 0.1625207296849088,
"eval_f1": 0.12160006289622655,
"eval_loss": 5.062046527862549,
"eval_runtime": 0.3801,
"eval_samples_per_second": 3172.618,
"eval_steps_per_second": 26.307,
"step": 339
},
{
"epoch": 72.9,
"eval_accuracy": 0.16417910447761194,
"eval_f1": 0.12152850729201306,
"eval_loss": 5.056005954742432,
"eval_runtime": 0.381,
"eval_samples_per_second": 3165.275,
"eval_steps_per_second": 26.246,
"step": 344
},
{
"epoch": 73.96,
"eval_accuracy": 0.16666666666666666,
"eval_f1": 0.12561182312709163,
"eval_loss": 5.050036430358887,
"eval_runtime": 0.3802,
"eval_samples_per_second": 3171.695,
"eval_steps_per_second": 26.299,
"step": 349
},
{
"epoch": 74.81,
"eval_accuracy": 0.16832504145936983,
"eval_f1": 0.12484080598728817,
"eval_loss": 5.044373035430908,
"eval_runtime": 0.38,
"eval_samples_per_second": 3173.886,
"eval_steps_per_second": 26.317,
"step": 353
},
{
"epoch": 75.87,
"eval_accuracy": 0.17081260364842454,
"eval_f1": 0.12924225508300144,
"eval_loss": 5.041046619415283,
"eval_runtime": 0.3807,
"eval_samples_per_second": 3168.064,
"eval_steps_per_second": 26.269,
"step": 358
},
{
"epoch": 76.93,
"eval_accuracy": 0.16832504145936983,
"eval_f1": 0.1266665967282326,
"eval_loss": 5.036989688873291,
"eval_runtime": 0.3914,
"eval_samples_per_second": 3081.513,
"eval_steps_per_second": 25.552,
"step": 363
},
{
"epoch": 77.99,
"eval_accuracy": 0.17081260364842454,
"eval_f1": 0.12804840525194425,
"eval_loss": 5.0318603515625,
"eval_runtime": 0.3794,
"eval_samples_per_second": 3178.933,
"eval_steps_per_second": 26.359,
"step": 368
},
{
"epoch": 78.83,
"eval_accuracy": 0.16998341625207297,
"eval_f1": 0.1290972468615459,
"eval_loss": 5.03059720993042,
"eval_runtime": 0.3795,
"eval_samples_per_second": 3178.278,
"eval_steps_per_second": 26.354,
"step": 372
},
{
"epoch": 79.89,
"eval_accuracy": 0.17164179104477612,
"eval_f1": 0.12944060169078692,
"eval_loss": 5.027899265289307,
"eval_runtime": 0.3787,
"eval_samples_per_second": 3184.745,
"eval_steps_per_second": 26.408,
"step": 377
},
{
"epoch": 80.95,
"eval_accuracy": 0.1724709784411277,
"eval_f1": 0.13072735697861348,
"eval_loss": 5.026247501373291,
"eval_runtime": 0.3797,
"eval_samples_per_second": 3176.404,
"eval_steps_per_second": 26.338,
"step": 382
},
{
"epoch": 81.8,
"eval_accuracy": 0.16998341625207297,
"eval_f1": 0.12880907562232394,
"eval_loss": 5.024827003479004,
"eval_runtime": 0.3798,
"eval_samples_per_second": 3175.064,
"eval_steps_per_second": 26.327,
"step": 386
},
{
"epoch": 82.86,
"eval_accuracy": 0.17081260364842454,
"eval_f1": 0.12912358374020652,
"eval_loss": 5.0235443115234375,
"eval_runtime": 0.379,
"eval_samples_per_second": 3182.237,
"eval_steps_per_second": 26.387,
"step": 391
},
{
"epoch": 83.92,
"eval_accuracy": 0.17081260364842454,
"eval_f1": 0.12920331329754803,
"eval_loss": 5.022723197937012,
"eval_runtime": 0.3809,
"eval_samples_per_second": 3166.333,
"eval_steps_per_second": 26.255,
"step": 396
},
{
"epoch": 84.77,
"eval_accuracy": 0.17081260364842454,
"eval_f1": 0.12896882646345054,
"eval_loss": 5.022224426269531,
"eval_runtime": 0.3794,
"eval_samples_per_second": 3178.549,
"eval_steps_per_second": 26.356,
"step": 400
}
],
"logging_steps": 150,
"max_steps": 400,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"total_flos": 2.010721223440829e+16,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}