videomae-base-finetuned-ucf101 / trainer_state.json
nateraw's picture
Upload . with huggingface_hub
8fcb3b6
{
"best_metric": 0.7513241767883301,
"best_model_checkpoint": "videomae-base-finetuned-ucf101-nomixup/checkpoint-5500",
"epoch": 36.91275167785235,
"global_step": 5500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07,
"learning_rate": 6.7114093959731546e-06,
"loss": 4.685,
"step": 10
},
{
"epoch": 0.13,
"learning_rate": 1.3422818791946309e-05,
"loss": 4.6973,
"step": 20
},
{
"epoch": 0.2,
"learning_rate": 2.013422818791946e-05,
"loss": 4.6677,
"step": 30
},
{
"epoch": 0.27,
"learning_rate": 2.6845637583892618e-05,
"loss": 4.6252,
"step": 40
},
{
"epoch": 0.34,
"learning_rate": 3.3557046979865775e-05,
"loss": 4.6242,
"step": 50
},
{
"epoch": 0.4,
"learning_rate": 4.026845637583892e-05,
"loss": 4.5581,
"step": 60
},
{
"epoch": 0.47,
"learning_rate": 4.697986577181208e-05,
"loss": 4.5285,
"step": 70
},
{
"epoch": 0.54,
"learning_rate": 5.3691275167785237e-05,
"loss": 4.5188,
"step": 80
},
{
"epoch": 0.6,
"learning_rate": 6.040268456375839e-05,
"loss": 4.431,
"step": 90
},
{
"epoch": 0.67,
"learning_rate": 6.711409395973155e-05,
"loss": 4.3994,
"step": 100
},
{
"epoch": 0.74,
"learning_rate": 7.38255033557047e-05,
"loss": 4.3375,
"step": 110
},
{
"epoch": 0.81,
"learning_rate": 8.053691275167784e-05,
"loss": 4.2659,
"step": 120
},
{
"epoch": 0.87,
"learning_rate": 8.7248322147651e-05,
"loss": 4.1503,
"step": 130
},
{
"epoch": 0.94,
"learning_rate": 9.395973154362417e-05,
"loss": 4.0464,
"step": 140
},
{
"epoch": 1.01,
"learning_rate": 0.00010067114093959731,
"loss": 3.9801,
"step": 150
},
{
"epoch": 1.07,
"learning_rate": 0.00010738255033557047,
"loss": 3.9336,
"step": 160
},
{
"epoch": 1.14,
"learning_rate": 0.00011409395973154363,
"loss": 3.8316,
"step": 170
},
{
"epoch": 1.21,
"learning_rate": 0.00012080536912751678,
"loss": 3.7867,
"step": 180
},
{
"epoch": 1.28,
"learning_rate": 0.00012751677852348994,
"loss": 3.6977,
"step": 190
},
{
"epoch": 1.34,
"learning_rate": 0.0001342281879194631,
"loss": 3.5853,
"step": 200
},
{
"epoch": 1.41,
"learning_rate": 0.00014093959731543624,
"loss": 3.4901,
"step": 210
},
{
"epoch": 1.48,
"learning_rate": 0.0001476510067114094,
"loss": 3.4679,
"step": 220
},
{
"epoch": 1.54,
"learning_rate": 0.00015436241610738256,
"loss": 3.4984,
"step": 230
},
{
"epoch": 1.61,
"learning_rate": 0.0001610738255033557,
"loss": 3.3598,
"step": 240
},
{
"epoch": 1.68,
"learning_rate": 0.00016778523489932888,
"loss": 3.2679,
"step": 250
},
{
"epoch": 1.74,
"learning_rate": 0.000174496644295302,
"loss": 3.2981,
"step": 260
},
{
"epoch": 1.81,
"learning_rate": 0.00018120805369127517,
"loss": 3.2664,
"step": 270
},
{
"epoch": 1.88,
"learning_rate": 0.00018791946308724833,
"loss": 3.2929,
"step": 280
},
{
"epoch": 1.95,
"learning_rate": 0.00019463087248322146,
"loss": 3.2039,
"step": 290
},
{
"epoch": 2.01,
"learning_rate": 0.00020134228187919463,
"loss": 3.1763,
"step": 300
},
{
"epoch": 2.08,
"learning_rate": 0.00020805369127516779,
"loss": 3.0554,
"step": 310
},
{
"epoch": 2.15,
"learning_rate": 0.00021476510067114095,
"loss": 2.9937,
"step": 320
},
{
"epoch": 2.21,
"learning_rate": 0.0002214765100671141,
"loss": 3.0402,
"step": 330
},
{
"epoch": 2.28,
"learning_rate": 0.00022818791946308727,
"loss": 3.0012,
"step": 340
},
{
"epoch": 2.35,
"learning_rate": 0.0002348993288590604,
"loss": 2.9892,
"step": 350
},
{
"epoch": 2.42,
"learning_rate": 0.00024161073825503356,
"loss": 3.0453,
"step": 360
},
{
"epoch": 2.48,
"learning_rate": 0.0002483221476510067,
"loss": 2.9497,
"step": 370
},
{
"epoch": 2.55,
"learning_rate": 0.0002550335570469799,
"loss": 2.8924,
"step": 380
},
{
"epoch": 2.62,
"learning_rate": 0.000261744966442953,
"loss": 2.9597,
"step": 390
},
{
"epoch": 2.68,
"learning_rate": 0.0002684563758389262,
"loss": 3.0828,
"step": 400
},
{
"epoch": 2.75,
"learning_rate": 0.00027516778523489934,
"loss": 3.0353,
"step": 410
},
{
"epoch": 2.82,
"learning_rate": 0.00028187919463087247,
"loss": 2.8786,
"step": 420
},
{
"epoch": 2.89,
"learning_rate": 0.00028859060402684566,
"loss": 2.9322,
"step": 430
},
{
"epoch": 2.95,
"learning_rate": 0.0002953020134228188,
"loss": 2.7467,
"step": 440
},
{
"epoch": 3.02,
"learning_rate": 0.000302013422818792,
"loss": 2.8574,
"step": 450
},
{
"epoch": 3.09,
"learning_rate": 0.0003087248322147651,
"loss": 2.7929,
"step": 460
},
{
"epoch": 3.15,
"learning_rate": 0.00031543624161073825,
"loss": 2.8987,
"step": 470
},
{
"epoch": 3.22,
"learning_rate": 0.0003221476510067114,
"loss": 2.8908,
"step": 480
},
{
"epoch": 3.29,
"learning_rate": 0.0003288590604026846,
"loss": 2.6509,
"step": 490
},
{
"epoch": 3.36,
"learning_rate": 0.00033557046979865775,
"loss": 2.8576,
"step": 500
},
{
"epoch": 3.36,
"eval_accuracy": 0.5911017060279846,
"eval_accuracy_top5": 0.727224588394165,
"eval_loss": 1.8057622909545898,
"eval_runtime": 104.4766,
"eval_samples_per_second": 36.209,
"eval_steps_per_second": 4.527,
"step": 500
},
{
"epoch": 3.42,
"learning_rate": 0.0003422818791946309,
"loss": 2.7528,
"step": 510
},
{
"epoch": 3.49,
"learning_rate": 0.000348993288590604,
"loss": 2.7427,
"step": 520
},
{
"epoch": 3.56,
"learning_rate": 0.00035570469798657715,
"loss": 2.8489,
"step": 530
},
{
"epoch": 3.62,
"learning_rate": 0.00036241610738255034,
"loss": 2.8167,
"step": 540
},
{
"epoch": 3.69,
"learning_rate": 0.00036912751677852353,
"loss": 2.7375,
"step": 550
},
{
"epoch": 3.76,
"learning_rate": 0.00037583892617449666,
"loss": 2.8108,
"step": 560
},
{
"epoch": 3.83,
"learning_rate": 0.0003825503355704698,
"loss": 2.8261,
"step": 570
},
{
"epoch": 3.89,
"learning_rate": 0.00038926174496644293,
"loss": 2.7504,
"step": 580
},
{
"epoch": 3.96,
"learning_rate": 0.0003959731543624161,
"loss": 2.802,
"step": 590
},
{
"epoch": 4.03,
"learning_rate": 0.00040268456375838925,
"loss": 2.7507,
"step": 600
},
{
"epoch": 4.09,
"learning_rate": 0.00040939597315436244,
"loss": 2.8335,
"step": 610
},
{
"epoch": 4.16,
"learning_rate": 0.00041610738255033557,
"loss": 2.6994,
"step": 620
},
{
"epoch": 4.23,
"learning_rate": 0.00042281879194630876,
"loss": 2.8312,
"step": 630
},
{
"epoch": 4.3,
"learning_rate": 0.0004295302013422819,
"loss": 2.8237,
"step": 640
},
{
"epoch": 4.36,
"learning_rate": 0.000436241610738255,
"loss": 2.849,
"step": 650
},
{
"epoch": 4.43,
"learning_rate": 0.0004429530201342282,
"loss": 2.8233,
"step": 660
},
{
"epoch": 4.5,
"learning_rate": 0.00044966442953020135,
"loss": 2.7256,
"step": 670
},
{
"epoch": 4.56,
"learning_rate": 0.00045637583892617453,
"loss": 2.8312,
"step": 680
},
{
"epoch": 4.63,
"learning_rate": 0.00046308724832214767,
"loss": 2.7241,
"step": 690
},
{
"epoch": 4.7,
"learning_rate": 0.0004697986577181208,
"loss": 2.6984,
"step": 700
},
{
"epoch": 4.77,
"learning_rate": 0.00047651006711409394,
"loss": 2.7826,
"step": 710
},
{
"epoch": 4.83,
"learning_rate": 0.0004832214765100671,
"loss": 2.781,
"step": 720
},
{
"epoch": 4.9,
"learning_rate": 0.0004899328859060403,
"loss": 2.7601,
"step": 730
},
{
"epoch": 4.97,
"learning_rate": 0.0004966442953020134,
"loss": 2.744,
"step": 740
},
{
"epoch": 5.03,
"learning_rate": 0.0004999988659275162,
"loss": 2.8382,
"step": 750
},
{
"epoch": 5.1,
"learning_rate": 0.0004999897934093798,
"loss": 2.8987,
"step": 760
},
{
"epoch": 5.17,
"learning_rate": 0.0004999716487023506,
"loss": 2.7493,
"step": 770
},
{
"epoch": 5.23,
"learning_rate": 0.0004999444324649045,
"loss": 2.8423,
"step": 780
},
{
"epoch": 5.3,
"learning_rate": 0.0004999081456847252,
"loss": 2.8108,
"step": 790
},
{
"epoch": 5.37,
"learning_rate": 0.0004998627896786686,
"loss": 2.7675,
"step": 800
},
{
"epoch": 5.44,
"learning_rate": 0.0004998083660927148,
"loss": 2.5845,
"step": 810
},
{
"epoch": 5.5,
"learning_rate": 0.0004997448769019087,
"loss": 2.7445,
"step": 820
},
{
"epoch": 5.57,
"learning_rate": 0.0004996723244102881,
"loss": 2.6608,
"step": 830
},
{
"epoch": 5.64,
"learning_rate": 0.0004995907112508002,
"loss": 2.6423,
"step": 840
},
{
"epoch": 5.7,
"learning_rate": 0.0004995000403852056,
"loss": 2.706,
"step": 850
},
{
"epoch": 5.77,
"learning_rate": 0.0004994003151039719,
"loss": 2.7687,
"step": 860
},
{
"epoch": 5.84,
"learning_rate": 0.000499291539026153,
"loss": 2.761,
"step": 870
},
{
"epoch": 5.91,
"learning_rate": 0.0004991737160992588,
"loss": 2.6858,
"step": 880
},
{
"epoch": 5.97,
"learning_rate": 0.000499046850599111,
"loss": 2.7172,
"step": 890
},
{
"epoch": 6.04,
"learning_rate": 0.0004989109471296887,
"loss": 2.8251,
"step": 900
},
{
"epoch": 6.11,
"learning_rate": 0.000498766010622961,
"loss": 2.6963,
"step": 910
},
{
"epoch": 6.17,
"learning_rate": 0.0004986120463387084,
"loss": 2.4729,
"step": 920
},
{
"epoch": 6.24,
"learning_rate": 0.000498449059864331,
"loss": 2.7372,
"step": 930
},
{
"epoch": 6.31,
"learning_rate": 0.0004982770571146468,
"loss": 2.8074,
"step": 940
},
{
"epoch": 6.38,
"learning_rate": 0.0004980960443316762,
"loss": 2.7279,
"step": 950
},
{
"epoch": 6.44,
"learning_rate": 0.0004979060280844162,
"loss": 2.6071,
"step": 960
},
{
"epoch": 6.51,
"learning_rate": 0.0004977070152686013,
"loss": 2.7364,
"step": 970
},
{
"epoch": 6.58,
"learning_rate": 0.000497499013106454,
"loss": 2.6095,
"step": 980
},
{
"epoch": 6.64,
"learning_rate": 0.0004972820291464219,
"loss": 2.574,
"step": 990
},
{
"epoch": 6.71,
"learning_rate": 0.0004970560712629044,
"loss": 2.6651,
"step": 1000
},
{
"epoch": 6.71,
"eval_accuracy": 0.5632944703102112,
"eval_accuracy_top5": 0.7002118825912476,
"eval_loss": 1.7887717485427856,
"eval_runtime": 1057.27,
"eval_samples_per_second": 3.578,
"eval_steps_per_second": 0.447,
"step": 1000
},
{
"epoch": 6.78,
"learning_rate": 0.0004968211476559667,
"loss": 2.7145,
"step": 1010
},
{
"epoch": 6.85,
"learning_rate": 0.0004965772668510425,
"loss": 2.6658,
"step": 1020
},
{
"epoch": 6.91,
"learning_rate": 0.0004963244376986235,
"loss": 2.7473,
"step": 1030
},
{
"epoch": 6.98,
"learning_rate": 0.0004960626693739401,
"loss": 2.8268,
"step": 1040
},
{
"epoch": 7.05,
"learning_rate": 0.0004957919713766266,
"loss": 2.5706,
"step": 1050
},
{
"epoch": 7.11,
"learning_rate": 0.0004955123535303775,
"loss": 2.645,
"step": 1060
},
{
"epoch": 7.18,
"learning_rate": 0.0004952238259825911,
"loss": 2.68,
"step": 1070
},
{
"epoch": 7.25,
"learning_rate": 0.0004949263992040001,
"loss": 2.6571,
"step": 1080
},
{
"epoch": 7.32,
"learning_rate": 0.0004946200839882932,
"loss": 2.4641,
"step": 1090
},
{
"epoch": 7.38,
"learning_rate": 0.000494304891451722,
"loss": 2.6196,
"step": 1100
},
{
"epoch": 7.45,
"learning_rate": 0.0004939808330326985,
"loss": 2.5998,
"step": 1110
},
{
"epoch": 7.52,
"learning_rate": 0.0004936479204913795,
"loss": 2.6489,
"step": 1120
},
{
"epoch": 7.58,
"learning_rate": 0.0004933061659092401,
"loss": 2.6613,
"step": 1130
},
{
"epoch": 7.65,
"learning_rate": 0.0004929555816886352,
"loss": 2.533,
"step": 1140
},
{
"epoch": 7.72,
"learning_rate": 0.0004925961805523493,
"loss": 2.6853,
"step": 1150
},
{
"epoch": 7.79,
"learning_rate": 0.0004922279755431349,
"loss": 2.6887,
"step": 1160
},
{
"epoch": 7.85,
"learning_rate": 0.0004918509800232392,
"loss": 2.6397,
"step": 1170
},
{
"epoch": 7.92,
"learning_rate": 0.0004914652076739187,
"loss": 2.4045,
"step": 1180
},
{
"epoch": 7.99,
"learning_rate": 0.0004910706724949438,
"loss": 2.5858,
"step": 1190
},
{
"epoch": 8.05,
"learning_rate": 0.0004906673888040895,
"loss": 2.5493,
"step": 1200
},
{
"epoch": 8.12,
"learning_rate": 0.0004902553712366165,
"loss": 2.5056,
"step": 1210
},
{
"epoch": 8.19,
"learning_rate": 0.0004898346347447405,
"loss": 2.6127,
"step": 1220
},
{
"epoch": 8.26,
"learning_rate": 0.0004894051945970881,
"loss": 2.519,
"step": 1230
},
{
"epoch": 8.32,
"learning_rate": 0.0004889670663781443,
"loss": 2.4318,
"step": 1240
},
{
"epoch": 8.39,
"learning_rate": 0.0004885202659876861,
"loss": 2.4674,
"step": 1250
},
{
"epoch": 8.46,
"learning_rate": 0.00048806480964020554,
"loss": 2.5532,
"step": 1260
},
{
"epoch": 8.52,
"learning_rate": 0.0004876007138643216,
"loss": 2.5892,
"step": 1270
},
{
"epoch": 8.59,
"learning_rate": 0.00048712799550218,
"loss": 2.5353,
"step": 1280
},
{
"epoch": 8.66,
"learning_rate": 0.00048664667170884217,
"loss": 2.4692,
"step": 1290
},
{
"epoch": 8.72,
"learning_rate": 0.0004861567599516628,
"loss": 2.5329,
"step": 1300
},
{
"epoch": 8.79,
"learning_rate": 0.0004856582780096558,
"loss": 2.5185,
"step": 1310
},
{
"epoch": 8.86,
"learning_rate": 0.0004851512439728489,
"loss": 2.509,
"step": 1320
},
{
"epoch": 8.93,
"learning_rate": 0.0004846356762416276,
"loss": 2.4121,
"step": 1330
},
{
"epoch": 8.99,
"learning_rate": 0.00048411159352606734,
"loss": 2.528,
"step": 1340
},
{
"epoch": 9.06,
"learning_rate": 0.0004835790148452539,
"loss": 2.57,
"step": 1350
},
{
"epoch": 9.13,
"learning_rate": 0.000483037959526594,
"loss": 2.3809,
"step": 1360
},
{
"epoch": 9.19,
"learning_rate": 0.0004824884472051136,
"loss": 2.4399,
"step": 1370
},
{
"epoch": 9.26,
"learning_rate": 0.000481930497822745,
"loss": 2.4777,
"step": 1380
},
{
"epoch": 9.33,
"learning_rate": 0.00048136413162760375,
"loss": 2.6037,
"step": 1390
},
{
"epoch": 9.4,
"learning_rate": 0.00048078936917325335,
"loss": 2.4826,
"step": 1400
},
{
"epoch": 9.46,
"learning_rate": 0.0004802062313179595,
"loss": 2.5202,
"step": 1410
},
{
"epoch": 9.53,
"learning_rate": 0.0004796147392239334,
"loss": 2.5156,
"step": 1420
},
{
"epoch": 9.6,
"learning_rate": 0.00047901491435656333,
"loss": 2.4923,
"step": 1430
},
{
"epoch": 9.66,
"learning_rate": 0.000478406778483636,
"loss": 2.4254,
"step": 1440
},
{
"epoch": 9.73,
"learning_rate": 0.0004777903536745466,
"loss": 2.5741,
"step": 1450
},
{
"epoch": 9.8,
"learning_rate": 0.0004771656622994974,
"loss": 2.5019,
"step": 1460
},
{
"epoch": 9.87,
"learning_rate": 0.00047653272702868676,
"loss": 2.5268,
"step": 1470
},
{
"epoch": 9.93,
"learning_rate": 0.0004758915708314858,
"loss": 2.4928,
"step": 1480
},
{
"epoch": 10.0,
"learning_rate": 0.00047524221697560476,
"loss": 2.5171,
"step": 1490
},
{
"epoch": 10.07,
"learning_rate": 0.0004745846890262493,
"loss": 2.3321,
"step": 1500
},
{
"epoch": 10.07,
"eval_accuracy": 0.5985169410705566,
"eval_accuracy_top5": 0.7327859997749329,
"eval_loss": 1.7438325881958008,
"eval_runtime": 103.1085,
"eval_samples_per_second": 36.69,
"eval_steps_per_second": 4.587,
"step": 1500
},
{
"epoch": 10.13,
"learning_rate": 0.0004739190108452645,
"loss": 2.4042,
"step": 1510
},
{
"epoch": 10.2,
"learning_rate": 0.0004732452065902695,
"loss": 2.4647,
"step": 1520
},
{
"epoch": 10.27,
"learning_rate": 0.0004725633007137805,
"loss": 2.4076,
"step": 1530
},
{
"epoch": 10.34,
"learning_rate": 0.00047187331796232345,
"loss": 2.2986,
"step": 1540
},
{
"epoch": 10.4,
"learning_rate": 0.00047117528337553615,
"loss": 2.5126,
"step": 1550
},
{
"epoch": 10.47,
"learning_rate": 0.00047046922228525926,
"loss": 2.4857,
"step": 1560
},
{
"epoch": 10.54,
"learning_rate": 0.0004697551603146171,
"loss": 2.4434,
"step": 1570
},
{
"epoch": 10.6,
"learning_rate": 0.000469033123377088,
"loss": 2.3844,
"step": 1580
},
{
"epoch": 10.67,
"learning_rate": 0.00046830313767556365,
"loss": 2.4232,
"step": 1590
},
{
"epoch": 10.74,
"learning_rate": 0.0004675652297013984,
"loss": 2.4638,
"step": 1600
},
{
"epoch": 10.81,
"learning_rate": 0.0004668194262334475,
"loss": 2.3976,
"step": 1610
},
{
"epoch": 10.87,
"learning_rate": 0.0004660657543370958,
"loss": 2.407,
"step": 1620
},
{
"epoch": 10.94,
"learning_rate": 0.0004653042413632751,
"loss": 2.42,
"step": 1630
},
{
"epoch": 11.01,
"learning_rate": 0.0004645349149474718,
"loss": 2.4782,
"step": 1640
},
{
"epoch": 11.07,
"learning_rate": 0.00046375780300872405,
"loss": 2.3456,
"step": 1650
},
{
"epoch": 11.14,
"learning_rate": 0.00046297293374860846,
"loss": 2.4317,
"step": 1660
},
{
"epoch": 11.21,
"learning_rate": 0.00046218033565021647,
"loss": 2.315,
"step": 1670
},
{
"epoch": 11.28,
"learning_rate": 0.0004613800374771212,
"loss": 2.3308,
"step": 1680
},
{
"epoch": 11.34,
"learning_rate": 0.000460572068272333,
"loss": 2.4006,
"step": 1690
},
{
"epoch": 11.41,
"learning_rate": 0.0004597564573572461,
"loss": 2.4491,
"step": 1700
},
{
"epoch": 11.48,
"learning_rate": 0.000458933234330574,
"loss": 2.4763,
"step": 1710
},
{
"epoch": 11.54,
"learning_rate": 0.0004581024290672755,
"loss": 2.4014,
"step": 1720
},
{
"epoch": 11.61,
"learning_rate": 0.00045726407171747075,
"loss": 2.5596,
"step": 1730
},
{
"epoch": 11.68,
"learning_rate": 0.0004564181927053468,
"loss": 2.4823,
"step": 1740
},
{
"epoch": 11.74,
"learning_rate": 0.0004555648227280535,
"loss": 2.4451,
"step": 1750
},
{
"epoch": 11.81,
"learning_rate": 0.00045470399275458984,
"loss": 2.4504,
"step": 1760
},
{
"epoch": 11.88,
"learning_rate": 0.00045383573402467967,
"loss": 2.3143,
"step": 1770
},
{
"epoch": 11.95,
"learning_rate": 0.00045296007804763815,
"loss": 2.2971,
"step": 1780
},
{
"epoch": 12.01,
"learning_rate": 0.00045207705660122853,
"loss": 2.2387,
"step": 1790
},
{
"epoch": 12.08,
"learning_rate": 0.00045118670173050846,
"loss": 2.386,
"step": 1800
},
{
"epoch": 12.15,
"learning_rate": 0.0004502890457466673,
"loss": 2.3928,
"step": 1810
},
{
"epoch": 12.21,
"learning_rate": 0.00044938412122585373,
"loss": 2.3766,
"step": 1820
},
{
"epoch": 12.28,
"learning_rate": 0.00044847196100799306,
"loss": 2.3229,
"step": 1830
},
{
"epoch": 12.35,
"learning_rate": 0.000447552598195596,
"loss": 2.1737,
"step": 1840
},
{
"epoch": 12.42,
"learning_rate": 0.00044662606615255705,
"loss": 2.3663,
"step": 1850
},
{
"epoch": 12.48,
"learning_rate": 0.0004456923985029439,
"loss": 2.3396,
"step": 1860
},
{
"epoch": 12.55,
"learning_rate": 0.0004447516291297768,
"loss": 2.5236,
"step": 1870
},
{
"epoch": 12.62,
"learning_rate": 0.0004438037921737997,
"loss": 2.242,
"step": 1880
},
{
"epoch": 12.68,
"learning_rate": 0.00044284892203224024,
"loss": 2.3837,
"step": 1890
},
{
"epoch": 12.75,
"learning_rate": 0.00044188705335756253,
"loss": 2.3464,
"step": 1900
},
{
"epoch": 12.82,
"learning_rate": 0.0004409182210562087,
"loss": 2.2084,
"step": 1910
},
{
"epoch": 12.89,
"learning_rate": 0.00043994246028733296,
"loss": 2.3124,
"step": 1920
},
{
"epoch": 12.95,
"learning_rate": 0.0004389598064615249,
"loss": 2.1847,
"step": 1930
},
{
"epoch": 13.02,
"learning_rate": 0.0004379702952395249,
"loss": 2.2371,
"step": 1940
},
{
"epoch": 13.09,
"learning_rate": 0.00043697396253093,
"loss": 2.383,
"step": 1950
},
{
"epoch": 13.15,
"learning_rate": 0.0004359708444928904,
"loss": 2.2143,
"step": 1960
},
{
"epoch": 13.22,
"learning_rate": 0.0004349609775287977,
"loss": 2.2602,
"step": 1970
},
{
"epoch": 13.29,
"learning_rate": 0.0004339443982869634,
"loss": 2.2835,
"step": 1980
},
{
"epoch": 13.36,
"learning_rate": 0.00043292114365928934,
"loss": 2.2528,
"step": 1990
},
{
"epoch": 13.42,
"learning_rate": 0.00043189125077992867,
"loss": 2.3348,
"step": 2000
},
{
"epoch": 13.42,
"eval_accuracy": 0.6295021176338196,
"eval_accuracy_top5": 0.7356991767883301,
"eval_loss": 1.62944495677948,
"eval_runtime": 99.1269,
"eval_samples_per_second": 38.163,
"eval_steps_per_second": 4.772,
"step": 2000
},
{
"epoch": 13.49,
"learning_rate": 0.00043085475702393827,
"loss": 2.3693,
"step": 2010
},
{
"epoch": 13.56,
"learning_rate": 0.0004298117000059223,
"loss": 2.2296,
"step": 2020
},
{
"epoch": 13.62,
"learning_rate": 0.0004287621175786674,
"loss": 2.2473,
"step": 2030
},
{
"epoch": 13.69,
"learning_rate": 0.0004277060478317687,
"loss": 2.2507,
"step": 2040
},
{
"epoch": 13.76,
"learning_rate": 0.00042664352909024784,
"loss": 2.3107,
"step": 2050
},
{
"epoch": 13.83,
"learning_rate": 0.00042557459991316197,
"loss": 2.3385,
"step": 2060
},
{
"epoch": 13.89,
"learning_rate": 0.0004244992990922043,
"loss": 2.2102,
"step": 2070
},
{
"epoch": 13.96,
"learning_rate": 0.0004234176656502968,
"loss": 2.3437,
"step": 2080
},
{
"epoch": 14.03,
"learning_rate": 0.00042232973884017355,
"loss": 2.3511,
"step": 2090
},
{
"epoch": 14.09,
"learning_rate": 0.00042123555814295655,
"loss": 2.3496,
"step": 2100
},
{
"epoch": 14.16,
"learning_rate": 0.00042013516326672276,
"loss": 2.2043,
"step": 2110
},
{
"epoch": 14.23,
"learning_rate": 0.0004190285941450632,
"loss": 2.2971,
"step": 2120
},
{
"epoch": 14.3,
"learning_rate": 0.00041791589093563385,
"loss": 2.2116,
"step": 2130
},
{
"epoch": 14.36,
"learning_rate": 0.000416797094018698,
"loss": 2.2991,
"step": 2140
},
{
"epoch": 14.43,
"learning_rate": 0.00041567224399566104,
"loss": 2.2796,
"step": 2150
},
{
"epoch": 14.5,
"learning_rate": 0.00041454138168759726,
"loss": 2.3402,
"step": 2160
},
{
"epoch": 14.56,
"learning_rate": 0.00041340454813376784,
"loss": 2.3073,
"step": 2170
},
{
"epoch": 14.63,
"learning_rate": 0.0004122617845901322,
"loss": 2.2733,
"step": 2180
},
{
"epoch": 14.7,
"learning_rate": 0.0004111131325278502,
"loss": 2.2013,
"step": 2190
},
{
"epoch": 14.77,
"learning_rate": 0.0004099586336317779,
"loss": 2.2275,
"step": 2200
},
{
"epoch": 14.83,
"learning_rate": 0.00040879832979895376,
"loss": 2.2008,
"step": 2210
},
{
"epoch": 14.9,
"learning_rate": 0.00040763226313707924,
"loss": 2.1913,
"step": 2220
},
{
"epoch": 14.97,
"learning_rate": 0.00040646047596299,
"loss": 2.2507,
"step": 2230
},
{
"epoch": 15.03,
"learning_rate": 0.0004052830108011204,
"loss": 2.138,
"step": 2240
},
{
"epoch": 15.1,
"learning_rate": 0.00040409991038196057,
"loss": 2.2183,
"step": 2250
},
{
"epoch": 15.17,
"learning_rate": 0.00040291121764050533,
"loss": 2.2204,
"step": 2260
},
{
"epoch": 15.23,
"learning_rate": 0.000401716975714696,
"loss": 2.1662,
"step": 2270
},
{
"epoch": 15.3,
"learning_rate": 0.0004005172279438555,
"loss": 2.3825,
"step": 2280
},
{
"epoch": 15.37,
"learning_rate": 0.00039931201786711493,
"loss": 2.2073,
"step": 2290
},
{
"epoch": 15.44,
"learning_rate": 0.0003981013892218336,
"loss": 2.1114,
"step": 2300
},
{
"epoch": 15.5,
"learning_rate": 0.0003968853859420125,
"loss": 2.2326,
"step": 2310
},
{
"epoch": 15.57,
"learning_rate": 0.0003956640521566989,
"loss": 2.2042,
"step": 2320
},
{
"epoch": 15.64,
"learning_rate": 0.00039443743218838553,
"loss": 2.1792,
"step": 2330
},
{
"epoch": 15.7,
"learning_rate": 0.00039320557055140195,
"loss": 2.1393,
"step": 2340
},
{
"epoch": 15.77,
"learning_rate": 0.00039196851195029913,
"loss": 2.1661,
"step": 2350
},
{
"epoch": 15.84,
"learning_rate": 0.00039072630127822707,
"loss": 2.2561,
"step": 2360
},
{
"epoch": 15.91,
"learning_rate": 0.0003894789836153058,
"loss": 2.1827,
"step": 2370
},
{
"epoch": 15.97,
"learning_rate": 0.00038822660422698895,
"loss": 2.2583,
"step": 2380
},
{
"epoch": 16.04,
"learning_rate": 0.00038696920856242176,
"loss": 2.1149,
"step": 2390
},
{
"epoch": 16.11,
"learning_rate": 0.0003857068422527908,
"loss": 2.1045,
"step": 2400
},
{
"epoch": 16.17,
"learning_rate": 0.0003844395511096692,
"loss": 2.2076,
"step": 2410
},
{
"epoch": 16.24,
"learning_rate": 0.00038316738112335294,
"loss": 2.2332,
"step": 2420
},
{
"epoch": 16.31,
"learning_rate": 0.00038189037846119247,
"loss": 2.1815,
"step": 2430
},
{
"epoch": 16.38,
"learning_rate": 0.0003806085894659174,
"loss": 2.139,
"step": 2440
},
{
"epoch": 16.44,
"learning_rate": 0.00037932206065395427,
"loss": 2.0076,
"step": 2450
},
{
"epoch": 16.51,
"learning_rate": 0.0003780308387137387,
"loss": 2.2418,
"step": 2460
},
{
"epoch": 16.58,
"learning_rate": 0.00037673497050402143,
"loss": 2.1668,
"step": 2470
},
{
"epoch": 16.64,
"learning_rate": 0.0003754345030521669,
"loss": 2.1749,
"step": 2480
},
{
"epoch": 16.71,
"learning_rate": 0.0003741294835524475,
"loss": 2.1023,
"step": 2490
},
{
"epoch": 16.78,
"learning_rate": 0.00037281995936433026,
"loss": 2.0315,
"step": 2500
},
{
"epoch": 16.78,
"eval_accuracy": 0.6353284120559692,
"eval_accuracy_top5": 0.7513241767883301,
"eval_loss": 1.620951771736145,
"eval_runtime": 106.1983,
"eval_samples_per_second": 35.622,
"eval_steps_per_second": 4.454,
"step": 2500
},
{
"epoch": 16.85,
"learning_rate": 0.0003715059780107586,
"loss": 2.1777,
"step": 2510
},
{
"epoch": 16.91,
"learning_rate": 0.0003701875871764275,
"loss": 2.0452,
"step": 2520
},
{
"epoch": 16.98,
"learning_rate": 0.0003688648347060529,
"loss": 2.2884,
"step": 2530
},
{
"epoch": 17.05,
"learning_rate": 0.00036753776860263594,
"loss": 2.1402,
"step": 2540
},
{
"epoch": 17.11,
"learning_rate": 0.00036620643702571997,
"loss": 2.0759,
"step": 2550
},
{
"epoch": 17.18,
"learning_rate": 0.00036487088828964377,
"loss": 2.0357,
"step": 2560
},
{
"epoch": 17.25,
"learning_rate": 0.00036353117086178777,
"loss": 2.0975,
"step": 2570
},
{
"epoch": 17.32,
"learning_rate": 0.00036218733336081506,
"loss": 2.2448,
"step": 2580
},
{
"epoch": 17.38,
"learning_rate": 0.00036083942455490735,
"loss": 2.1251,
"step": 2590
},
{
"epoch": 17.45,
"learning_rate": 0.0003594874933599949,
"loss": 2.1602,
"step": 2600
},
{
"epoch": 17.52,
"learning_rate": 0.00035813158883798134,
"loss": 2.1744,
"step": 2610
},
{
"epoch": 17.58,
"learning_rate": 0.00035677176019496343,
"loss": 2.1631,
"step": 2620
},
{
"epoch": 17.65,
"learning_rate": 0.0003554080567794451,
"loss": 2.1669,
"step": 2630
},
{
"epoch": 17.72,
"learning_rate": 0.0003540405280805467,
"loss": 2.1181,
"step": 2640
},
{
"epoch": 17.79,
"learning_rate": 0.00035266922372620906,
"loss": 2.0952,
"step": 2650
},
{
"epoch": 17.85,
"learning_rate": 0.00035129419348139246,
"loss": 2.1549,
"step": 2660
},
{
"epoch": 17.92,
"learning_rate": 0.0003499154872462705,
"loss": 2.1738,
"step": 2670
},
{
"epoch": 17.99,
"learning_rate": 0.0003485331550544197,
"loss": 2.1793,
"step": 2680
},
{
"epoch": 18.05,
"learning_rate": 0.00034714724707100283,
"loss": 2.1329,
"step": 2690
},
{
"epoch": 18.12,
"learning_rate": 0.00034575781359094947,
"loss": 2.0711,
"step": 2700
},
{
"epoch": 18.19,
"learning_rate": 0.0003443649050371301,
"loss": 2.0466,
"step": 2710
},
{
"epoch": 18.26,
"learning_rate": 0.00034296857195852647,
"loss": 2.1244,
"step": 2720
},
{
"epoch": 18.32,
"learning_rate": 0.0003415688650283973,
"loss": 2.1518,
"step": 2730
},
{
"epoch": 18.39,
"learning_rate": 0.0003401658350424389,
"loss": 2.1096,
"step": 2740
},
{
"epoch": 18.46,
"learning_rate": 0.0003387595329169423,
"loss": 2.0938,
"step": 2750
},
{
"epoch": 18.52,
"learning_rate": 0.0003373500096869451,
"loss": 2.1085,
"step": 2760
},
{
"epoch": 18.59,
"learning_rate": 0.0003359373165043796,
"loss": 2.0453,
"step": 2770
},
{
"epoch": 18.66,
"learning_rate": 0.0003345215046362165,
"loss": 2.2149,
"step": 2780
},
{
"epoch": 18.72,
"learning_rate": 0.00033310262546260434,
"loss": 2.1468,
"step": 2790
},
{
"epoch": 18.79,
"learning_rate": 0.0003316807304750047,
"loss": 2.0384,
"step": 2800
},
{
"epoch": 18.86,
"learning_rate": 0.0003302558712743241,
"loss": 2.0865,
"step": 2810
},
{
"epoch": 18.93,
"learning_rate": 0.00032882809956904065,
"loss": 2.1671,
"step": 2820
},
{
"epoch": 18.99,
"learning_rate": 0.0003273974671733283,
"loss": 2.0147,
"step": 2830
},
{
"epoch": 19.06,
"learning_rate": 0.0003259640260051759,
"loss": 1.9943,
"step": 2840
},
{
"epoch": 19.13,
"learning_rate": 0.00032452782808450355,
"loss": 1.9631,
"step": 2850
},
{
"epoch": 19.19,
"learning_rate": 0.00032308892553127425,
"loss": 2.1063,
"step": 2860
},
{
"epoch": 19.26,
"learning_rate": 0.00032164737056360307,
"loss": 1.9979,
"step": 2870
},
{
"epoch": 19.33,
"learning_rate": 0.0003202032154958615,
"loss": 1.9259,
"step": 2880
},
{
"epoch": 19.4,
"learning_rate": 0.0003187565127367797,
"loss": 1.9233,
"step": 2890
},
{
"epoch": 19.46,
"learning_rate": 0.00031730731478754365,
"loss": 2.0203,
"step": 2900
},
{
"epoch": 19.53,
"learning_rate": 0.00031585567423989084,
"loss": 2.0238,
"step": 2910
},
{
"epoch": 19.6,
"learning_rate": 0.00031440164377420085,
"loss": 2.0497,
"step": 2920
},
{
"epoch": 19.66,
"learning_rate": 0.0003129452761575843,
"loss": 1.9399,
"step": 2930
},
{
"epoch": 19.73,
"learning_rate": 0.00031148662424196723,
"loss": 2.0291,
"step": 2940
},
{
"epoch": 19.8,
"learning_rate": 0.0003100257409621738,
"loss": 2.0845,
"step": 2950
},
{
"epoch": 19.87,
"learning_rate": 0.0003085626793340045,
"loss": 1.9692,
"step": 2960
},
{
"epoch": 19.93,
"learning_rate": 0.00030709749245231297,
"loss": 2.0479,
"step": 2970
},
{
"epoch": 20.0,
"learning_rate": 0.0003056302334890786,
"loss": 2.0145,
"step": 2980
},
{
"epoch": 20.07,
"learning_rate": 0.00030416095569147717,
"loss": 1.9802,
"step": 2990
},
{
"epoch": 20.13,
"learning_rate": 0.00030268971237994835,
"loss": 2.0705,
"step": 3000
},
{
"epoch": 20.13,
"eval_accuracy": 0.6464512944221497,
"eval_accuracy_top5": 0.7627118825912476,
"eval_loss": 1.5695319175720215,
"eval_runtime": 101.6469,
"eval_samples_per_second": 37.217,
"eval_steps_per_second": 4.653,
"step": 3000
},
{
"epoch": 20.2,
"learning_rate": 0.00030121655694626096,
"loss": 1.9999,
"step": 3010
},
{
"epoch": 20.27,
"learning_rate": 0.00029974154285157496,
"loss": 2.0053,
"step": 3020
},
{
"epoch": 20.34,
"learning_rate": 0.0002982647236245017,
"loss": 1.9484,
"step": 3030
},
{
"epoch": 20.4,
"learning_rate": 0.00029678615285916126,
"loss": 1.9829,
"step": 3040
},
{
"epoch": 20.47,
"learning_rate": 0.0002953058842132373,
"loss": 1.9263,
"step": 3050
},
{
"epoch": 20.54,
"learning_rate": 0.00029382397140603013,
"loss": 1.955,
"step": 3060
},
{
"epoch": 20.6,
"learning_rate": 0.00029234046821650694,
"loss": 2.0098,
"step": 3070
},
{
"epoch": 20.67,
"learning_rate": 0.0002908554284813503,
"loss": 1.9278,
"step": 3080
},
{
"epoch": 20.74,
"learning_rate": 0.00028936890609300447,
"loss": 2.0939,
"step": 3090
},
{
"epoch": 20.81,
"learning_rate": 0.00028788095499771943,
"loss": 1.9715,
"step": 3100
},
{
"epoch": 20.87,
"learning_rate": 0.0002863916291935933,
"loss": 2.0042,
"step": 3110
},
{
"epoch": 20.94,
"learning_rate": 0.00028490098272861283,
"loss": 2.0696,
"step": 3120
},
{
"epoch": 21.01,
"learning_rate": 0.0002834090696986916,
"loss": 2.0059,
"step": 3130
},
{
"epoch": 21.07,
"learning_rate": 0.00028191594424570754,
"loss": 1.9319,
"step": 3140
},
{
"epoch": 21.14,
"learning_rate": 0.00028042166055553723,
"loss": 2.0266,
"step": 3150
},
{
"epoch": 21.21,
"learning_rate": 0.00027892627285609035,
"loss": 1.9712,
"step": 3160
},
{
"epoch": 21.28,
"learning_rate": 0.0002774298354153411,
"loss": 1.9637,
"step": 3170
},
{
"epoch": 21.34,
"learning_rate": 0.0002759324025393591,
"loss": 1.9696,
"step": 3180
},
{
"epoch": 21.41,
"learning_rate": 0.00027443402857033864,
"loss": 1.9457,
"step": 3190
},
{
"epoch": 21.48,
"learning_rate": 0.00027293476788462623,
"loss": 1.9282,
"step": 3200
},
{
"epoch": 21.54,
"learning_rate": 0.000271434674890748,
"loss": 2.0554,
"step": 3210
},
{
"epoch": 21.61,
"learning_rate": 0.0002699338040274343,
"loss": 1.9161,
"step": 3220
},
{
"epoch": 21.68,
"learning_rate": 0.0002684322097616448,
"loss": 1.9126,
"step": 3230
},
{
"epoch": 21.74,
"learning_rate": 0.0002669299465865914,
"loss": 2.0167,
"step": 3240
},
{
"epoch": 21.81,
"learning_rate": 0.00026542706901976125,
"loss": 1.9754,
"step": 3250
},
{
"epoch": 21.88,
"learning_rate": 0.00026392363160093746,
"loss": 1.9503,
"step": 3260
},
{
"epoch": 21.95,
"learning_rate": 0.00026241968889022065,
"loss": 2.0577,
"step": 3270
},
{
"epoch": 22.01,
"learning_rate": 0.0002609152954660484,
"loss": 1.9624,
"step": 3280
},
{
"epoch": 22.08,
"learning_rate": 0.000259410505923215,
"loss": 1.9243,
"step": 3290
},
{
"epoch": 22.15,
"learning_rate": 0.0002579053748708897,
"loss": 1.9068,
"step": 3300
},
{
"epoch": 22.21,
"learning_rate": 0.0002563999569306355,
"loss": 1.8909,
"step": 3310
},
{
"epoch": 22.28,
"learning_rate": 0.00025489430673442635,
"loss": 1.9244,
"step": 3320
},
{
"epoch": 22.35,
"learning_rate": 0.0002533884789226651,
"loss": 1.9617,
"step": 3330
},
{
"epoch": 22.42,
"learning_rate": 0.00025188252814220004,
"loss": 1.8954,
"step": 3340
},
{
"epoch": 22.48,
"learning_rate": 0.0002503765090443422,
"loss": 1.95,
"step": 3350
},
{
"epoch": 22.55,
"learning_rate": 0.0002488704762828819,
"loss": 1.9047,
"step": 3360
},
{
"epoch": 22.62,
"learning_rate": 0.0002473644845121051,
"loss": 2.0102,
"step": 3370
},
{
"epoch": 22.68,
"learning_rate": 0.00024585858838481055,
"loss": 1.9811,
"step": 3380
},
{
"epoch": 22.75,
"learning_rate": 0.0002443528425503256,
"loss": 1.8512,
"step": 3390
},
{
"epoch": 22.82,
"learning_rate": 0.00024284730165252387,
"loss": 1.929,
"step": 3400
},
{
"epoch": 22.89,
"learning_rate": 0.00024134202032784156,
"loss": 1.8707,
"step": 3410
},
{
"epoch": 22.95,
"learning_rate": 0.00023983705320329508,
"loss": 1.8647,
"step": 3420
},
{
"epoch": 23.02,
"learning_rate": 0.00023833245489449805,
"loss": 1.9206,
"step": 3430
},
{
"epoch": 23.09,
"learning_rate": 0.00023682828000368012,
"loss": 1.8319,
"step": 3440
},
{
"epoch": 23.15,
"learning_rate": 0.00023532458311770486,
"loss": 1.8411,
"step": 3450
},
{
"epoch": 23.22,
"learning_rate": 0.0002338214188060887,
"loss": 1.7666,
"step": 3460
},
{
"epoch": 23.29,
"learning_rate": 0.0002323188416190211,
"loss": 1.8131,
"step": 3470
},
{
"epoch": 23.36,
"learning_rate": 0.00023081690608538463,
"loss": 1.9416,
"step": 3480
},
{
"epoch": 23.42,
"learning_rate": 0.00022931566671077603,
"loss": 1.8075,
"step": 3490
},
{
"epoch": 23.49,
"learning_rate": 0.0002278151779755281,
"loss": 1.9087,
"step": 3500
},
{
"epoch": 23.49,
"eval_accuracy": 0.6922669410705566,
"eval_accuracy_top5": 0.7862817645072937,
"eval_loss": 1.3863152265548706,
"eval_runtime": 99.8777,
"eval_samples_per_second": 37.876,
"eval_steps_per_second": 4.736,
"step": 3500
},
{
"epoch": 23.56,
"learning_rate": 0.00022631549433273294,
"loss": 1.9428,
"step": 3510
},
{
"epoch": 23.62,
"learning_rate": 0.0002248166702062657,
"loss": 1.8847,
"step": 3520
},
{
"epoch": 23.69,
"learning_rate": 0.00022331875998880943,
"loss": 1.9183,
"step": 3530
},
{
"epoch": 23.76,
"learning_rate": 0.0002218218180398811,
"loss": 1.9658,
"step": 3540
},
{
"epoch": 23.83,
"learning_rate": 0.0002203258986838591,
"loss": 1.8596,
"step": 3550
},
{
"epoch": 23.89,
"learning_rate": 0.00021883105620801164,
"loss": 1.9158,
"step": 3560
},
{
"epoch": 23.96,
"learning_rate": 0.0002173373448605267,
"loss": 1.8477,
"step": 3570
},
{
"epoch": 24.03,
"learning_rate": 0.0002158448188485433,
"loss": 1.9931,
"step": 3580
},
{
"epoch": 24.09,
"learning_rate": 0.00021435353233618446,
"loss": 1.8643,
"step": 3590
},
{
"epoch": 24.16,
"learning_rate": 0.00021286353944259138,
"loss": 1.8867,
"step": 3600
},
{
"epoch": 24.23,
"learning_rate": 0.00021137489423995963,
"loss": 1.9012,
"step": 3610
},
{
"epoch": 24.3,
"learning_rate": 0.00020988765075157648,
"loss": 1.8438,
"step": 3620
},
{
"epoch": 24.36,
"learning_rate": 0.00020840186294986097,
"loss": 1.8712,
"step": 3630
},
{
"epoch": 24.43,
"learning_rate": 0.00020691758475440482,
"loss": 1.8504,
"step": 3640
},
{
"epoch": 24.5,
"learning_rate": 0.0002054348700300158,
"loss": 1.9375,
"step": 3650
},
{
"epoch": 24.56,
"learning_rate": 0.00020395377258476278,
"loss": 1.8488,
"step": 3660
},
{
"epoch": 24.63,
"learning_rate": 0.0002024743461680234,
"loss": 1.8815,
"step": 3670
},
{
"epoch": 24.7,
"learning_rate": 0.00020099664446853328,
"loss": 1.8098,
"step": 3680
},
{
"epoch": 24.77,
"learning_rate": 0.00019952072111243738,
"loss": 1.8489,
"step": 3690
},
{
"epoch": 24.83,
"learning_rate": 0.00019804662966134442,
"loss": 1.9232,
"step": 3700
},
{
"epoch": 24.9,
"learning_rate": 0.0001965744236103828,
"loss": 1.8177,
"step": 3710
},
{
"epoch": 24.97,
"learning_rate": 0.0001951041563862593,
"loss": 1.9149,
"step": 3720
},
{
"epoch": 25.03,
"learning_rate": 0.00019363588134532007,
"loss": 1.8291,
"step": 3730
},
{
"epoch": 25.1,
"learning_rate": 0.0001921696517716147,
"loss": 1.883,
"step": 3740
},
{
"epoch": 25.17,
"learning_rate": 0.00019070552087496203,
"loss": 1.8461,
"step": 3750
},
{
"epoch": 25.23,
"learning_rate": 0.0001892435417890197,
"loss": 1.8295,
"step": 3760
},
{
"epoch": 25.3,
"learning_rate": 0.00018778376756935534,
"loss": 1.887,
"step": 3770
},
{
"epoch": 25.37,
"learning_rate": 0.00018632625119152163,
"loss": 1.8155,
"step": 3780
},
{
"epoch": 25.44,
"learning_rate": 0.0001848710455491336,
"loss": 1.8593,
"step": 3790
},
{
"epoch": 25.5,
"learning_rate": 0.00018341820345194932,
"loss": 1.7904,
"step": 3800
},
{
"epoch": 25.57,
"learning_rate": 0.00018196777762395275,
"loss": 1.9035,
"step": 3810
},
{
"epoch": 25.64,
"learning_rate": 0.00018051982070144135,
"loss": 1.8704,
"step": 3820
},
{
"epoch": 25.7,
"learning_rate": 0.00017907438523111528,
"loss": 1.7725,
"step": 3830
},
{
"epoch": 25.77,
"learning_rate": 0.0001776315236681706,
"loss": 1.7824,
"step": 3840
},
{
"epoch": 25.84,
"learning_rate": 0.00017619128837439546,
"loss": 1.9004,
"step": 3850
},
{
"epoch": 25.91,
"learning_rate": 0.00017475373161627035,
"loss": 1.7393,
"step": 3860
},
{
"epoch": 25.97,
"learning_rate": 0.00017331890556307095,
"loss": 1.8055,
"step": 3870
},
{
"epoch": 26.04,
"learning_rate": 0.0001718868622849752,
"loss": 1.6775,
"step": 3880
},
{
"epoch": 26.11,
"learning_rate": 0.00017045765375117299,
"loss": 1.765,
"step": 3890
},
{
"epoch": 26.17,
"learning_rate": 0.00016903133182798115,
"loss": 1.7172,
"step": 3900
},
{
"epoch": 26.24,
"learning_rate": 0.00016760794827696055,
"loss": 1.7846,
"step": 3910
},
{
"epoch": 26.31,
"learning_rate": 0.0001661875547530378,
"loss": 1.8185,
"step": 3920
},
{
"epoch": 26.38,
"learning_rate": 0.0001647702028026308,
"loss": 1.7271,
"step": 3930
},
{
"epoch": 26.44,
"learning_rate": 0.00016335594386177794,
"loss": 1.7887,
"step": 3940
},
{
"epoch": 26.51,
"learning_rate": 0.0001619448292542716,
"loss": 1.7533,
"step": 3950
},
{
"epoch": 26.58,
"learning_rate": 0.0001605369101897956,
"loss": 1.8131,
"step": 3960
},
{
"epoch": 26.64,
"learning_rate": 0.00015913223776206666,
"loss": 1.7736,
"step": 3970
},
{
"epoch": 26.71,
"learning_rate": 0.00015773086294698036,
"loss": 1.7481,
"step": 3980
},
{
"epoch": 26.78,
"learning_rate": 0.0001563328366007612,
"loss": 1.8161,
"step": 3990
},
{
"epoch": 26.85,
"learning_rate": 0.0001549382094581166,
"loss": 1.816,
"step": 4000
},
{
"epoch": 26.85,
"eval_accuracy": 0.7110699415206909,
"eval_accuracy_top5": 0.7913135886192322,
"eval_loss": 1.3816050291061401,
"eval_runtime": 1281.5963,
"eval_samples_per_second": 2.952,
"eval_steps_per_second": 0.369,
"step": 4000
},
{
"epoch": 26.91,
"learning_rate": 0.00015354703213039651,
"loss": 1.7483,
"step": 4010
},
{
"epoch": 26.98,
"learning_rate": 0.00015215935510375607,
"loss": 1.7502,
"step": 4020
},
{
"epoch": 27.05,
"learning_rate": 0.00015077522873732375,
"loss": 1.7954,
"step": 4030
},
{
"epoch": 27.11,
"learning_rate": 0.0001493947032613735,
"loss": 1.7957,
"step": 4040
},
{
"epoch": 27.18,
"learning_rate": 0.00014801782877550234,
"loss": 1.7301,
"step": 4050
},
{
"epoch": 27.25,
"learning_rate": 0.00014664465524681197,
"loss": 1.7393,
"step": 4060
},
{
"epoch": 27.32,
"learning_rate": 0.00014527523250809543,
"loss": 1.7667,
"step": 4070
},
{
"epoch": 27.38,
"learning_rate": 0.00014390961025602855,
"loss": 1.8113,
"step": 4080
},
{
"epoch": 27.45,
"learning_rate": 0.00014254783804936686,
"loss": 1.7796,
"step": 4090
},
{
"epoch": 27.52,
"learning_rate": 0.00014118996530714664,
"loss": 1.7518,
"step": 4100
},
{
"epoch": 27.58,
"learning_rate": 0.00013983604130689192,
"loss": 1.7064,
"step": 4110
},
{
"epoch": 27.65,
"learning_rate": 0.0001384861151828255,
"loss": 1.754,
"step": 4120
},
{
"epoch": 27.72,
"learning_rate": 0.00013714023592408678,
"loss": 1.7321,
"step": 4130
},
{
"epoch": 27.79,
"learning_rate": 0.0001357984523729533,
"loss": 1.787,
"step": 4140
},
{
"epoch": 27.85,
"learning_rate": 0.00013446081322306813,
"loss": 1.7389,
"step": 4150
},
{
"epoch": 27.92,
"learning_rate": 0.00013312736701767347,
"loss": 1.7504,
"step": 4160
},
{
"epoch": 27.99,
"learning_rate": 0.00013179816214784826,
"loss": 1.7239,
"step": 4170
},
{
"epoch": 28.05,
"learning_rate": 0.00013047324685075263,
"loss": 1.7251,
"step": 4180
},
{
"epoch": 28.12,
"learning_rate": 0.00012915266920787672,
"loss": 1.7394,
"step": 4190
},
{
"epoch": 28.19,
"learning_rate": 0.00012783647714329649,
"loss": 1.7623,
"step": 4200
},
{
"epoch": 28.26,
"learning_rate": 0.00012652471842193415,
"loss": 1.68,
"step": 4210
},
{
"epoch": 28.32,
"learning_rate": 0.00012521744064782476,
"loss": 1.7476,
"step": 4220
},
{
"epoch": 28.39,
"learning_rate": 0.00012391469126238884,
"loss": 1.7888,
"step": 4230
},
{
"epoch": 28.46,
"learning_rate": 0.0001226165175427105,
"loss": 1.8063,
"step": 4240
},
{
"epoch": 28.52,
"learning_rate": 0.00012132296659982206,
"loss": 1.7794,
"step": 4250
},
{
"epoch": 28.59,
"learning_rate": 0.00012003408537699384,
"loss": 1.7629,
"step": 4260
},
{
"epoch": 28.66,
"learning_rate": 0.00011874992064803114,
"loss": 1.7939,
"step": 4270
},
{
"epoch": 28.72,
"learning_rate": 0.0001174705190155766,
"loss": 1.7379,
"step": 4280
},
{
"epoch": 28.79,
"learning_rate": 0.00011619592690941886,
"loss": 1.7218,
"step": 4290
},
{
"epoch": 28.86,
"learning_rate": 0.00011492619058480783,
"loss": 1.7113,
"step": 4300
},
{
"epoch": 28.93,
"learning_rate": 0.00011366135612077571,
"loss": 1.7242,
"step": 4310
},
{
"epoch": 28.99,
"learning_rate": 0.00011240146941846526,
"loss": 1.7338,
"step": 4320
},
{
"epoch": 29.06,
"learning_rate": 0.00011114657619946372,
"loss": 1.67,
"step": 4330
},
{
"epoch": 29.13,
"learning_rate": 0.00010989672200414375,
"loss": 1.7605,
"step": 4340
},
{
"epoch": 29.19,
"learning_rate": 0.00010865195219001028,
"loss": 1.682,
"step": 4350
},
{
"epoch": 29.26,
"learning_rate": 0.00010741231193005521,
"loss": 1.6429,
"step": 4360
},
{
"epoch": 29.33,
"learning_rate": 0.00010617784621111767,
"loss": 1.6666,
"step": 4370
},
{
"epoch": 29.4,
"learning_rate": 0.0001049485998322512,
"loss": 1.6439,
"step": 4380
},
{
"epoch": 29.46,
"learning_rate": 0.00010372461740309849,
"loss": 1.6682,
"step": 4390
},
{
"epoch": 29.53,
"learning_rate": 0.00010250594334227223,
"loss": 1.8048,
"step": 4400
},
{
"epoch": 29.6,
"learning_rate": 0.00010129262187574318,
"loss": 1.7627,
"step": 4410
},
{
"epoch": 29.66,
"learning_rate": 0.00010008469703523492,
"loss": 1.8278,
"step": 4420
},
{
"epoch": 29.73,
"learning_rate": 9.888221265662655e-05,
"loss": 1.7661,
"step": 4430
},
{
"epoch": 29.8,
"learning_rate": 9.768521237836131e-05,
"loss": 1.7062,
"step": 4440
},
{
"epoch": 29.87,
"learning_rate": 9.64937396398633e-05,
"loss": 1.6505,
"step": 4450
},
{
"epoch": 29.93,
"learning_rate": 9.530783767996057e-05,
"loss": 1.688,
"step": 4460
},
{
"epoch": 30.0,
"learning_rate": 9.412754953531663e-05,
"loss": 1.6097,
"step": 4470
},
{
"epoch": 30.07,
"learning_rate": 9.295291803886818e-05,
"loss": 1.6393,
"step": 4480
},
{
"epoch": 30.13,
"learning_rate": 9.178398581827086e-05,
"loss": 1.6005,
"step": 4490
},
{
"epoch": 30.2,
"learning_rate": 9.062079529435204e-05,
"loss": 1.7324,
"step": 4500
},
{
"epoch": 30.2,
"eval_accuracy": 0.7243114113807678,
"eval_accuracy_top5": 0.8164724707603455,
"eval_loss": 1.2510910034179688,
"eval_runtime": 99.3635,
"eval_samples_per_second": 38.072,
"eval_steps_per_second": 4.76,
"step": 4500
},
{
"epoch": 30.27,
"learning_rate": 8.946338867957182e-05,
"loss": 1.6926,
"step": 4510
},
{
"epoch": 30.34,
"learning_rate": 8.831180797649071e-05,
"loss": 1.6246,
"step": 4520
},
{
"epoch": 30.4,
"learning_rate": 8.716609497624564e-05,
"loss": 1.7316,
"step": 4530
},
{
"epoch": 30.47,
"learning_rate": 8.602629125703296e-05,
"loss": 1.6764,
"step": 4540
},
{
"epoch": 30.54,
"learning_rate": 8.489243818260004e-05,
"loss": 1.664,
"step": 4550
},
{
"epoch": 30.6,
"learning_rate": 8.376457690074384e-05,
"loss": 1.6697,
"step": 4560
},
{
"epoch": 30.67,
"learning_rate": 8.264274834181793e-05,
"loss": 1.6584,
"step": 4570
},
{
"epoch": 30.74,
"learning_rate": 8.152699321724652e-05,
"loss": 1.6844,
"step": 4580
},
{
"epoch": 30.81,
"learning_rate": 8.041735201804783e-05,
"loss": 1.7346,
"step": 4590
},
{
"epoch": 30.87,
"learning_rate": 7.931386501336418e-05,
"loss": 1.7664,
"step": 4600
},
{
"epoch": 30.94,
"learning_rate": 7.821657224900064e-05,
"loss": 1.7337,
"step": 4610
},
{
"epoch": 31.01,
"learning_rate": 7.712551354597187e-05,
"loss": 1.5892,
"step": 4620
},
{
"epoch": 31.07,
"learning_rate": 7.604072849905708e-05,
"loss": 1.6845,
"step": 4630
},
{
"epoch": 31.14,
"learning_rate": 7.49622564753627e-05,
"loss": 1.6969,
"step": 4640
},
{
"epoch": 31.21,
"learning_rate": 7.38901366128944e-05,
"loss": 1.6825,
"step": 4650
},
{
"epoch": 31.28,
"learning_rate": 7.282440781913619e-05,
"loss": 1.6149,
"step": 4660
},
{
"epoch": 31.34,
"learning_rate": 7.176510876963876e-05,
"loss": 1.6943,
"step": 4670
},
{
"epoch": 31.41,
"learning_rate": 7.071227790661597e-05,
"loss": 1.7003,
"step": 4680
},
{
"epoch": 31.48,
"learning_rate": 6.966595343754936e-05,
"loss": 1.754,
"step": 4690
},
{
"epoch": 31.54,
"learning_rate": 6.862617333380214e-05,
"loss": 1.7057,
"step": 4700
},
{
"epoch": 31.61,
"learning_rate": 6.759297532924086e-05,
"loss": 1.6623,
"step": 4710
},
{
"epoch": 31.68,
"learning_rate": 6.656639691886629e-05,
"loss": 1.5663,
"step": 4720
},
{
"epoch": 31.74,
"learning_rate": 6.55464753574522e-05,
"loss": 1.62,
"step": 4730
},
{
"epoch": 31.81,
"learning_rate": 6.453324765819404e-05,
"loss": 1.6554,
"step": 4740
},
{
"epoch": 31.88,
"learning_rate": 6.352675059136531e-05,
"loss": 1.5982,
"step": 4750
},
{
"epoch": 31.95,
"learning_rate": 6.252702068298338e-05,
"loss": 1.681,
"step": 4760
},
{
"epoch": 32.01,
"learning_rate": 6.153409421348358e-05,
"loss": 1.6682,
"step": 4770
},
{
"epoch": 32.08,
"learning_rate": 6.054800721640305e-05,
"loss": 1.6059,
"step": 4780
},
{
"epoch": 32.15,
"learning_rate": 5.956879547707275e-05,
"loss": 1.661,
"step": 4790
},
{
"epoch": 32.21,
"learning_rate": 5.8596494531319045e-05,
"loss": 1.6645,
"step": 4800
},
{
"epoch": 32.28,
"learning_rate": 5.763113966417369e-05,
"loss": 1.6481,
"step": 4810
},
{
"epoch": 32.35,
"learning_rate": 5.667276590859385e-05,
"loss": 1.6029,
"step": 4820
},
{
"epoch": 32.42,
"learning_rate": 5.572140804419049e-05,
"loss": 1.6251,
"step": 4830
},
{
"epoch": 32.48,
"learning_rate": 5.477710059596599e-05,
"loss": 1.676,
"step": 4840
},
{
"epoch": 32.55,
"learning_rate": 5.3839877833061684e-05,
"loss": 1.6486,
"step": 4850
},
{
"epoch": 32.62,
"learning_rate": 5.2909773767513934e-05,
"loss": 1.6394,
"step": 4860
},
{
"epoch": 32.68,
"learning_rate": 5.198682215301989e-05,
"loss": 1.5951,
"step": 4870
},
{
"epoch": 32.75,
"learning_rate": 5.1071056483712435e-05,
"loss": 1.6508,
"step": 4880
},
{
"epoch": 32.82,
"learning_rate": 5.016250999294497e-05,
"loss": 1.615,
"step": 4890
},
{
"epoch": 32.89,
"learning_rate": 4.9261215652085105e-05,
"loss": 1.6343,
"step": 4900
},
{
"epoch": 32.95,
"learning_rate": 4.8367206169318305e-05,
"loss": 1.608,
"step": 4910
},
{
"epoch": 33.02,
"learning_rate": 4.7480513988460625e-05,
"loss": 1.5985,
"step": 4920
},
{
"epoch": 33.09,
"learning_rate": 4.660117128778163e-05,
"loss": 1.6053,
"step": 4930
},
{
"epoch": 33.15,
"learning_rate": 4.572920997883648e-05,
"loss": 1.6875,
"step": 4940
},
{
"epoch": 33.22,
"learning_rate": 4.486466170530798e-05,
"loss": 1.6692,
"step": 4950
},
{
"epoch": 33.29,
"learning_rate": 4.4007557841857865e-05,
"loss": 1.7254,
"step": 4960
},
{
"epoch": 33.36,
"learning_rate": 4.315792949298869e-05,
"loss": 1.6377,
"step": 4970
},
{
"epoch": 33.42,
"learning_rate": 4.231580749191474e-05,
"loss": 1.6221,
"step": 4980
},
{
"epoch": 33.49,
"learning_rate": 4.148122239944316e-05,
"loss": 1.7078,
"step": 4990
},
{
"epoch": 33.56,
"learning_rate": 4.0654204502864886e-05,
"loss": 1.5947,
"step": 5000
},
{
"epoch": 33.56,
"eval_accuracy": 0.7288135886192322,
"eval_accuracy_top5": 0.8172669410705566,
"eval_loss": 1.2728419303894043,
"eval_runtime": 100.0466,
"eval_samples_per_second": 37.812,
"eval_steps_per_second": 4.728,
"step": 5000
},
{
"epoch": 33.62,
"learning_rate": 3.983478381485558e-05,
"loss": 1.6253,
"step": 5010
},
{
"epoch": 33.69,
"learning_rate": 3.902299007238627e-05,
"loss": 1.6119,
"step": 5020
},
{
"epoch": 33.76,
"learning_rate": 3.8218852735644404e-05,
"loss": 1.5703,
"step": 5030
},
{
"epoch": 33.83,
"learning_rate": 3.7422400986964724e-05,
"loss": 1.6635,
"step": 5040
},
{
"epoch": 33.89,
"learning_rate": 3.6633663729770004e-05,
"loss": 1.6116,
"step": 5050
},
{
"epoch": 33.96,
"learning_rate": 3.585266958752248e-05,
"loss": 1.5966,
"step": 5060
},
{
"epoch": 34.03,
"learning_rate": 3.507944690268469e-05,
"loss": 1.656,
"step": 5070
},
{
"epoch": 34.09,
"learning_rate": 3.4314023735691286e-05,
"loss": 1.6135,
"step": 5080
},
{
"epoch": 34.16,
"learning_rate": 3.355642786393051e-05,
"loss": 1.6592,
"step": 5090
},
{
"epoch": 34.23,
"learning_rate": 3.2806686780736336e-05,
"loss": 1.5753,
"step": 5100
},
{
"epoch": 34.3,
"learning_rate": 3.2064827694390345e-05,
"loss": 1.6342,
"step": 5110
},
{
"epoch": 34.36,
"learning_rate": 3.1330877527134785e-05,
"loss": 1.6055,
"step": 5120
},
{
"epoch": 34.43,
"learning_rate": 3.060486291419531e-05,
"loss": 1.6282,
"step": 5130
},
{
"epoch": 34.5,
"learning_rate": 2.9886810202814447e-05,
"loss": 1.6759,
"step": 5140
},
{
"epoch": 34.56,
"learning_rate": 2.917674545129531e-05,
"loss": 1.6517,
"step": 5150
},
{
"epoch": 34.63,
"learning_rate": 2.847469442805614e-05,
"loss": 1.5875,
"step": 5160
},
{
"epoch": 34.7,
"learning_rate": 2.7780682610695136e-05,
"loss": 1.6003,
"step": 5170
},
{
"epoch": 34.77,
"learning_rate": 2.7094735185065778e-05,
"loss": 1.5555,
"step": 5180
},
{
"epoch": 34.83,
"learning_rate": 2.6416877044362685e-05,
"loss": 1.6044,
"step": 5190
},
{
"epoch": 34.9,
"learning_rate": 2.5747132788218663e-05,
"loss": 1.6296,
"step": 5200
},
{
"epoch": 34.97,
"learning_rate": 2.508552672181158e-05,
"loss": 1.6314,
"step": 5210
},
{
"epoch": 35.03,
"learning_rate": 2.4432082854982524e-05,
"loss": 1.6526,
"step": 5220
},
{
"epoch": 35.1,
"learning_rate": 2.3786824901364357e-05,
"loss": 1.6304,
"step": 5230
},
{
"epoch": 35.17,
"learning_rate": 2.3149776277521266e-05,
"loss": 1.5172,
"step": 5240
},
{
"epoch": 35.23,
"learning_rate": 2.2520960102098892e-05,
"loss": 1.606,
"step": 5250
},
{
"epoch": 35.3,
"learning_rate": 2.190039919498543e-05,
"loss": 1.5952,
"step": 5260
},
{
"epoch": 35.37,
"learning_rate": 2.1288116076483288e-05,
"loss": 1.5936,
"step": 5270
},
{
"epoch": 35.44,
"learning_rate": 2.0684132966492103e-05,
"loss": 1.6147,
"step": 5280
},
{
"epoch": 35.5,
"learning_rate": 2.008847178370221e-05,
"loss": 1.6177,
"step": 5290
},
{
"epoch": 35.57,
"learning_rate": 1.9501154144799137e-05,
"loss": 1.6251,
"step": 5300
},
{
"epoch": 35.64,
"learning_rate": 1.8922201363679338e-05,
"loss": 1.6342,
"step": 5310
},
{
"epoch": 35.7,
"learning_rate": 1.8351634450676502e-05,
"loss": 1.6048,
"step": 5320
},
{
"epoch": 35.77,
"learning_rate": 1.7789474111799318e-05,
"loss": 1.5749,
"step": 5330
},
{
"epoch": 35.84,
"learning_rate": 1.7235740747979733e-05,
"loss": 1.6196,
"step": 5340
},
{
"epoch": 35.91,
"learning_rate": 1.6690454454332843e-05,
"loss": 1.5682,
"step": 5350
},
{
"epoch": 35.97,
"learning_rate": 1.6153635019427598e-05,
"loss": 1.6569,
"step": 5360
},
{
"epoch": 36.04,
"learning_rate": 1.5625301924568626e-05,
"loss": 1.6054,
"step": 5370
},
{
"epoch": 36.11,
"learning_rate": 1.5105474343089205e-05,
"loss": 1.6504,
"step": 5380
},
{
"epoch": 36.17,
"learning_rate": 1.459417113965561e-05,
"loss": 1.606,
"step": 5390
},
{
"epoch": 36.24,
"learning_rate": 1.4091410869582266e-05,
"loss": 1.627,
"step": 5400
},
{
"epoch": 36.31,
"learning_rate": 1.3597211778158675e-05,
"loss": 1.5914,
"step": 5410
},
{
"epoch": 36.38,
"learning_rate": 1.3111591799987083e-05,
"loss": 1.6279,
"step": 5420
},
{
"epoch": 36.44,
"learning_rate": 1.2634568558331644e-05,
"loss": 1.6583,
"step": 5430
},
{
"epoch": 36.51,
"learning_rate": 1.216615936447893e-05,
"loss": 1.567,
"step": 5440
},
{
"epoch": 36.58,
"learning_rate": 1.1706381217109735e-05,
"loss": 1.6408,
"step": 5450
},
{
"epoch": 36.64,
"learning_rate": 1.1255250801681994e-05,
"loss": 1.5914,
"step": 5460
},
{
"epoch": 36.71,
"learning_rate": 1.0812784489825506e-05,
"loss": 1.5906,
"step": 5470
},
{
"epoch": 36.78,
"learning_rate": 1.0378998338747669e-05,
"loss": 1.541,
"step": 5480
},
{
"epoch": 36.85,
"learning_rate": 9.953908090650804e-06,
"loss": 1.6335,
"step": 5490
},
{
"epoch": 36.91,
"learning_rate": 9.53752917216083e-06,
"loss": 1.6565,
"step": 5500
},
{
"epoch": 36.91,
"eval_accuracy": 0.7513241767883301,
"eval_accuracy_top5": 0.8336864113807678,
"eval_loss": 1.1887362003326416,
"eval_runtime": 101.4114,
"eval_samples_per_second": 37.304,
"eval_steps_per_second": 4.664,
"step": 5500
}
],
"max_steps": 5960,
"num_train_epochs": 40,
"total_flos": 4.3900198664955494e+20,
"trial_name": null,
"trial_params": null
}