|
{ |
|
"best_metric": 0.11646050214767456, |
|
"best_model_checkpoint": "./vit-indian-food/checkpoint-1500", |
|
"epoch": 7.853403141361256, |
|
"eval_steps": 50, |
|
"global_step": 1500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 7.558130264282227, |
|
"learning_rate": 0.00019947643979057592, |
|
"loss": 2.4369, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.5493950843811035, |
|
"learning_rate": 0.00019895287958115185, |
|
"loss": 1.6214, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.205695629119873, |
|
"learning_rate": 0.00019842931937172776, |
|
"loss": 1.197, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.8880319595336914, |
|
"learning_rate": 0.00019790575916230367, |
|
"loss": 0.7391, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 6.315922260284424, |
|
"learning_rate": 0.0001973821989528796, |
|
"loss": 0.6288, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 4.008142471313477, |
|
"learning_rate": 0.0001968586387434555, |
|
"loss": 0.3582, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 4.005300998687744, |
|
"learning_rate": 0.00019633507853403142, |
|
"loss": 0.3882, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.9506969451904297, |
|
"learning_rate": 0.00019581151832460733, |
|
"loss": 0.39, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 4.074863910675049, |
|
"learning_rate": 0.00019528795811518326, |
|
"loss": 0.3413, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 4.617203712463379, |
|
"learning_rate": 0.00019476439790575917, |
|
"loss": 0.358, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.9081364829396326, |
|
"eval_loss": 0.32936015725135803, |
|
"eval_precision": 0.9220515144455581, |
|
"eval_recall": 0.9081364829396326, |
|
"eval_runtime": 17.5763, |
|
"eval_samples_per_second": 43.354, |
|
"eval_steps_per_second": 2.731, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 4.685304641723633, |
|
"learning_rate": 0.0001942408376963351, |
|
"loss": 0.3669, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.119244337081909, |
|
"learning_rate": 0.000193717277486911, |
|
"loss": 0.1722, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 6.17639684677124, |
|
"learning_rate": 0.00019319371727748692, |
|
"loss": 0.405, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 4.263852119445801, |
|
"learning_rate": 0.00019267015706806283, |
|
"loss": 0.5013, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.03020977973938, |
|
"learning_rate": 0.00019214659685863877, |
|
"loss": 0.2251, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 5.0216193199157715, |
|
"learning_rate": 0.00019162303664921465, |
|
"loss": 0.3329, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 7.692380428314209, |
|
"learning_rate": 0.00019109947643979058, |
|
"loss": 0.2191, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 5.645913600921631, |
|
"learning_rate": 0.0001905759162303665, |
|
"loss": 0.3437, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 5.066718578338623, |
|
"learning_rate": 0.00019005235602094243, |
|
"loss": 0.4461, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 4.659124851226807, |
|
"learning_rate": 0.00018952879581151833, |
|
"loss": 0.5051, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.9291338582677166, |
|
"eval_loss": 0.22470374405384064, |
|
"eval_precision": 0.9372539303361345, |
|
"eval_recall": 0.9291338582677166, |
|
"eval_runtime": 17.4785, |
|
"eval_samples_per_second": 43.597, |
|
"eval_steps_per_second": 2.746, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 4.5200324058532715, |
|
"learning_rate": 0.00018900523560209424, |
|
"loss": 0.2615, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.2165738344192505, |
|
"learning_rate": 0.00018848167539267018, |
|
"loss": 0.2415, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 6.801146030426025, |
|
"learning_rate": 0.00018795811518324608, |
|
"loss": 0.3234, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.8039401173591614, |
|
"learning_rate": 0.00018743455497382202, |
|
"loss": 0.1962, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 4.639632701873779, |
|
"learning_rate": 0.0001869109947643979, |
|
"loss": 0.2755, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 3.4670636653900146, |
|
"learning_rate": 0.00018638743455497384, |
|
"loss": 0.3463, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 6.07156229019165, |
|
"learning_rate": 0.00018586387434554974, |
|
"loss": 0.3047, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 6.347087860107422, |
|
"learning_rate": 0.00018534031413612568, |
|
"loss": 0.2364, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 5.963706970214844, |
|
"learning_rate": 0.00018481675392670156, |
|
"loss": 0.4148, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 4.037764072418213, |
|
"learning_rate": 0.0001842931937172775, |
|
"loss": 0.1361, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.9409448818897638, |
|
"eval_loss": 0.21655863523483276, |
|
"eval_precision": 0.9454930944052194, |
|
"eval_recall": 0.9409448818897638, |
|
"eval_runtime": 17.4607, |
|
"eval_samples_per_second": 43.641, |
|
"eval_steps_per_second": 2.749, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 4.717739582061768, |
|
"learning_rate": 0.0001837696335078534, |
|
"loss": 0.1183, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 3.9427084922790527, |
|
"learning_rate": 0.00018324607329842934, |
|
"loss": 0.1138, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 4.915616989135742, |
|
"learning_rate": 0.00018272251308900525, |
|
"loss": 0.2988, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 6.3770432472229, |
|
"learning_rate": 0.00018219895287958115, |
|
"loss": 0.1744, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.7859281301498413, |
|
"learning_rate": 0.00018167539267015706, |
|
"loss": 0.2486, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 3.75736141204834, |
|
"learning_rate": 0.000181151832460733, |
|
"loss": 0.2637, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 6.771958351135254, |
|
"learning_rate": 0.0001806282722513089, |
|
"loss": 0.2305, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 4.951119899749756, |
|
"learning_rate": 0.0001801047120418848, |
|
"loss": 0.1108, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 3.362152576446533, |
|
"learning_rate": 0.00017958115183246075, |
|
"loss": 0.133, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.8346299529075623, |
|
"learning_rate": 0.00017905759162303666, |
|
"loss": 0.0611, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.9488188976377953, |
|
"eval_loss": 0.18689152598381042, |
|
"eval_precision": 0.9533647964651322, |
|
"eval_recall": 0.9488188976377953, |
|
"eval_runtime": 17.6111, |
|
"eval_samples_per_second": 43.268, |
|
"eval_steps_per_second": 2.726, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.5746695399284363, |
|
"learning_rate": 0.0001785340314136126, |
|
"loss": 0.0241, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 2.0743372440338135, |
|
"learning_rate": 0.0001780104712041885, |
|
"loss": 0.0662, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 1.6814557313919067, |
|
"learning_rate": 0.0001774869109947644, |
|
"loss": 0.0635, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 2.118739128112793, |
|
"learning_rate": 0.00017696335078534032, |
|
"loss": 0.1024, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 3.4306182861328125, |
|
"learning_rate": 0.00017643979057591625, |
|
"loss": 0.0201, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.28945377469062805, |
|
"learning_rate": 0.00017591623036649216, |
|
"loss": 0.1151, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 4.6208648681640625, |
|
"learning_rate": 0.00017539267015706807, |
|
"loss": 0.2324, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 1.6085395812988281, |
|
"learning_rate": 0.00017486910994764398, |
|
"loss": 0.0429, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 5.860565185546875, |
|
"learning_rate": 0.0001743455497382199, |
|
"loss": 0.0567, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 5.421276092529297, |
|
"learning_rate": 0.00017382198952879582, |
|
"loss": 0.1037, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_accuracy": 0.968503937007874, |
|
"eval_loss": 0.11793948709964752, |
|
"eval_precision": 0.9693355319509627, |
|
"eval_recall": 0.968503937007874, |
|
"eval_runtime": 17.5701, |
|
"eval_samples_per_second": 43.369, |
|
"eval_steps_per_second": 2.732, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 3.9477956295013428, |
|
"learning_rate": 0.00017329842931937175, |
|
"loss": 0.0285, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 7.515362739562988, |
|
"learning_rate": 0.00017277486910994763, |
|
"loss": 0.2195, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.1676739752292633, |
|
"learning_rate": 0.00017225130890052357, |
|
"loss": 0.0172, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 8.235359191894531, |
|
"learning_rate": 0.00017172774869109948, |
|
"loss": 0.1756, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 7.9198832511901855, |
|
"learning_rate": 0.0001712041884816754, |
|
"loss": 0.0663, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 6.388627529144287, |
|
"learning_rate": 0.00017068062827225132, |
|
"loss": 0.1467, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.09213338047266006, |
|
"learning_rate": 0.00017015706806282723, |
|
"loss": 0.0684, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 1.0691012144088745, |
|
"learning_rate": 0.00016963350785340316, |
|
"loss": 0.0954, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 6.120029449462891, |
|
"learning_rate": 0.00016910994764397907, |
|
"loss": 0.0587, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.0928138718008995, |
|
"learning_rate": 0.00016858638743455498, |
|
"loss": 0.0294, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_accuracy": 0.963254593175853, |
|
"eval_loss": 0.11820019036531448, |
|
"eval_precision": 0.9645155573207865, |
|
"eval_recall": 0.963254593175853, |
|
"eval_runtime": 17.5014, |
|
"eval_samples_per_second": 43.539, |
|
"eval_steps_per_second": 2.743, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.3131069242954254, |
|
"learning_rate": 0.0001680628272251309, |
|
"loss": 0.1184, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.417850524187088, |
|
"learning_rate": 0.00016753926701570682, |
|
"loss": 0.0053, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 5.371811389923096, |
|
"learning_rate": 0.00016701570680628273, |
|
"loss": 0.1099, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 4.09129524230957, |
|
"learning_rate": 0.00016649214659685867, |
|
"loss": 0.2501, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.6515682935714722, |
|
"learning_rate": 0.00016596858638743455, |
|
"loss": 0.0528, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.5656059384346008, |
|
"learning_rate": 0.00016544502617801048, |
|
"loss": 0.1378, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.10378353297710419, |
|
"learning_rate": 0.0001649214659685864, |
|
"loss": 0.0329, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 2.387305736541748, |
|
"learning_rate": 0.00016439790575916233, |
|
"loss": 0.0353, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.05628238245844841, |
|
"learning_rate": 0.0001638743455497382, |
|
"loss": 0.0138, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.04053734615445137, |
|
"learning_rate": 0.00016335078534031414, |
|
"loss": 0.0082, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.963254593175853, |
|
"eval_loss": 0.11838709563016891, |
|
"eval_precision": 0.9654537871036863, |
|
"eval_recall": 0.963254593175853, |
|
"eval_runtime": 17.4034, |
|
"eval_samples_per_second": 43.785, |
|
"eval_steps_per_second": 2.758, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 2.287546396255493, |
|
"learning_rate": 0.00016282722513089005, |
|
"loss": 0.02, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 4.561453342437744, |
|
"learning_rate": 0.00016230366492146599, |
|
"loss": 0.203, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 4.0477294921875, |
|
"learning_rate": 0.0001617801047120419, |
|
"loss": 0.0646, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 1.0380969047546387, |
|
"learning_rate": 0.0001612565445026178, |
|
"loss": 0.0848, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 4.385131359100342, |
|
"learning_rate": 0.0001607329842931937, |
|
"loss": 0.166, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.036621347069740295, |
|
"learning_rate": 0.00016020942408376964, |
|
"loss": 0.0871, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.5929269194602966, |
|
"learning_rate": 0.00015968586387434555, |
|
"loss": 0.1271, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.26594266295433044, |
|
"learning_rate": 0.00015916230366492146, |
|
"loss": 0.0114, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.020196596160531044, |
|
"learning_rate": 0.0001586387434554974, |
|
"loss": 0.0081, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.6509531140327454, |
|
"learning_rate": 0.0001581151832460733, |
|
"loss": 0.0206, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_accuracy": 0.963254593175853, |
|
"eval_loss": 0.130891352891922, |
|
"eval_precision": 0.964741369994443, |
|
"eval_recall": 0.963254593175853, |
|
"eval_runtime": 17.4584, |
|
"eval_samples_per_second": 43.647, |
|
"eval_steps_per_second": 2.749, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.04883525148034096, |
|
"learning_rate": 0.00015759162303664924, |
|
"loss": 0.047, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 1.4584039449691772, |
|
"learning_rate": 0.00015706806282722515, |
|
"loss": 0.0393, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.3986053466796875, |
|
"learning_rate": 0.00015654450261780105, |
|
"loss": 0.0606, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.11985136568546295, |
|
"learning_rate": 0.00015602094240837696, |
|
"loss": 0.0379, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.07443628460168839, |
|
"learning_rate": 0.0001554973821989529, |
|
"loss": 0.0157, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.03219376876950264, |
|
"learning_rate": 0.0001549738219895288, |
|
"loss": 0.01, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.17020490765571594, |
|
"learning_rate": 0.00015445026178010471, |
|
"loss": 0.0093, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 2.6877171993255615, |
|
"learning_rate": 0.00015392670157068062, |
|
"loss": 0.021, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 0.010094034485518932, |
|
"learning_rate": 0.00015340314136125656, |
|
"loss": 0.0083, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 0.0077184755355119705, |
|
"learning_rate": 0.00015287958115183247, |
|
"loss": 0.0246, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.9553805774278216, |
|
"eval_loss": 0.17534801363945007, |
|
"eval_precision": 0.9586171915743325, |
|
"eval_recall": 0.9553805774278216, |
|
"eval_runtime": 17.505, |
|
"eval_samples_per_second": 43.53, |
|
"eval_steps_per_second": 2.742, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 5.202419757843018, |
|
"learning_rate": 0.0001523560209424084, |
|
"loss": 0.032, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.018663793802261353, |
|
"learning_rate": 0.00015183246073298428, |
|
"loss": 0.0221, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 1.0761280059814453, |
|
"learning_rate": 0.00015130890052356022, |
|
"loss": 0.0055, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.8688761591911316, |
|
"learning_rate": 0.00015078534031413612, |
|
"loss": 0.0852, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 0.06378360092639923, |
|
"learning_rate": 0.00015026178010471206, |
|
"loss": 0.0031, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 7.236721992492676, |
|
"learning_rate": 0.00014973821989528797, |
|
"loss": 0.0106, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.013875219039618969, |
|
"learning_rate": 0.00014921465968586388, |
|
"loss": 0.0032, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 0.031233886256814003, |
|
"learning_rate": 0.0001486910994764398, |
|
"loss": 0.0047, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 0.2528940439224243, |
|
"learning_rate": 0.00014816753926701572, |
|
"loss": 0.0081, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 0.02138712629675865, |
|
"learning_rate": 0.00014764397905759163, |
|
"loss": 0.0161, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_accuracy": 0.9514435695538058, |
|
"eval_loss": 0.16896109282970428, |
|
"eval_precision": 0.9537490891546931, |
|
"eval_recall": 0.9514435695538058, |
|
"eval_runtime": 17.5851, |
|
"eval_samples_per_second": 43.332, |
|
"eval_steps_per_second": 2.73, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 5.179097652435303, |
|
"learning_rate": 0.00014712041884816754, |
|
"loss": 0.0271, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 0.06704265624284744, |
|
"learning_rate": 0.00014659685863874347, |
|
"loss": 0.0072, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 0.15809708833694458, |
|
"learning_rate": 0.00014607329842931938, |
|
"loss": 0.0027, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 6.423127174377441, |
|
"learning_rate": 0.00014554973821989531, |
|
"loss": 0.0238, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 0.010630112141370773, |
|
"learning_rate": 0.0001450261780104712, |
|
"loss": 0.0271, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 2.8193652629852295, |
|
"learning_rate": 0.00014450261780104713, |
|
"loss": 0.1816, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.004105111118406057, |
|
"learning_rate": 0.00014397905759162304, |
|
"loss": 0.1842, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 0.20598988234996796, |
|
"learning_rate": 0.00014345549738219897, |
|
"loss": 0.0302, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 0.7738168835639954, |
|
"learning_rate": 0.00014293193717277485, |
|
"loss": 0.0256, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 0.07728663086891174, |
|
"learning_rate": 0.0001424083769633508, |
|
"loss": 0.0341, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_accuracy": 0.9488188976377953, |
|
"eval_loss": 0.18862025439739227, |
|
"eval_precision": 0.9543073149026239, |
|
"eval_recall": 0.9488188976377953, |
|
"eval_runtime": 17.3957, |
|
"eval_samples_per_second": 43.804, |
|
"eval_steps_per_second": 2.759, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 12.730446815490723, |
|
"learning_rate": 0.0001418848167539267, |
|
"loss": 0.0664, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 8.560800552368164, |
|
"learning_rate": 0.00014136125654450263, |
|
"loss": 0.0518, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 1.7465717792510986, |
|
"learning_rate": 0.00014083769633507854, |
|
"loss": 0.0236, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 5.386238098144531, |
|
"learning_rate": 0.00014031413612565445, |
|
"loss": 0.0449, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 0.008860424160957336, |
|
"learning_rate": 0.00013979057591623038, |
|
"loss": 0.0168, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.0057926299050450325, |
|
"learning_rate": 0.0001392670157068063, |
|
"loss": 0.0191, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.5266512036323547, |
|
"learning_rate": 0.00013874345549738223, |
|
"loss": 0.0023, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 0.03807899355888367, |
|
"learning_rate": 0.0001382198952879581, |
|
"loss": 0.0259, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 0.41590607166290283, |
|
"learning_rate": 0.00013769633507853404, |
|
"loss": 0.0224, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 0.003112237202003598, |
|
"learning_rate": 0.00013717277486910995, |
|
"loss": 0.0022, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_accuracy": 0.963254593175853, |
|
"eval_loss": 0.1624463051557541, |
|
"eval_precision": 0.9649964224541913, |
|
"eval_recall": 0.963254593175853, |
|
"eval_runtime": 17.4244, |
|
"eval_samples_per_second": 43.732, |
|
"eval_steps_per_second": 2.755, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"grad_norm": 0.013240986503660679, |
|
"learning_rate": 0.00013664921465968589, |
|
"loss": 0.0096, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"grad_norm": 0.13229414820671082, |
|
"learning_rate": 0.0001361256544502618, |
|
"loss": 0.0016, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"grad_norm": 0.2047564685344696, |
|
"learning_rate": 0.0001356020942408377, |
|
"loss": 0.0029, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 5.075094699859619, |
|
"learning_rate": 0.0001350785340314136, |
|
"loss": 0.1541, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"grad_norm": 0.11219122260808945, |
|
"learning_rate": 0.00013455497382198955, |
|
"loss": 0.0016, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 0.0553029403090477, |
|
"learning_rate": 0.00013403141361256545, |
|
"loss": 0.0016, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"grad_norm": 0.1759098470211029, |
|
"learning_rate": 0.00013350785340314136, |
|
"loss": 0.0616, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"grad_norm": 0.29212847352027893, |
|
"learning_rate": 0.00013298429319371727, |
|
"loss": 0.0074, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"grad_norm": 0.02130221202969551, |
|
"learning_rate": 0.0001324607329842932, |
|
"loss": 0.0013, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"grad_norm": 1.0668525695800781, |
|
"learning_rate": 0.0001319371727748691, |
|
"loss": 0.0044, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_accuracy": 0.937007874015748, |
|
"eval_loss": 0.22285035252571106, |
|
"eval_precision": 0.9410975298976415, |
|
"eval_recall": 0.937007874015748, |
|
"eval_runtime": 17.5444, |
|
"eval_samples_per_second": 43.433, |
|
"eval_steps_per_second": 2.736, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"grad_norm": 0.27807894349098206, |
|
"learning_rate": 0.00013141361256544505, |
|
"loss": 0.0669, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"grad_norm": 0.17472220957279205, |
|
"learning_rate": 0.00013089005235602096, |
|
"loss": 0.0066, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"grad_norm": 0.07417164742946625, |
|
"learning_rate": 0.00013036649214659686, |
|
"loss": 0.0045, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"grad_norm": 0.007742606569081545, |
|
"learning_rate": 0.00012984293193717277, |
|
"loss": 0.0008, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"grad_norm": 0.00367438024841249, |
|
"learning_rate": 0.0001293193717277487, |
|
"loss": 0.003, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"grad_norm": 0.0222078375518322, |
|
"learning_rate": 0.00012879581151832462, |
|
"loss": 0.0017, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"grad_norm": 6.7436299324035645, |
|
"learning_rate": 0.00012827225130890052, |
|
"loss": 0.0179, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"grad_norm": 0.0018994753481820226, |
|
"learning_rate": 0.00012774869109947646, |
|
"loss": 0.0008, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"grad_norm": 0.005384970456361771, |
|
"learning_rate": 0.00012722513089005237, |
|
"loss": 0.0018, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"grad_norm": 0.0077237836085259914, |
|
"learning_rate": 0.00012670157068062827, |
|
"loss": 0.0012, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"eval_accuracy": 0.9501312335958005, |
|
"eval_loss": 0.17090748250484467, |
|
"eval_precision": 0.9525764956521452, |
|
"eval_recall": 0.9501312335958005, |
|
"eval_runtime": 17.5175, |
|
"eval_samples_per_second": 43.499, |
|
"eval_steps_per_second": 2.74, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"grad_norm": 0.016971301287412643, |
|
"learning_rate": 0.00012617801047120418, |
|
"loss": 0.0672, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"grad_norm": 0.37131184339523315, |
|
"learning_rate": 0.00012565445026178012, |
|
"loss": 0.0032, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"grad_norm": 0.020785262808203697, |
|
"learning_rate": 0.00012513089005235603, |
|
"loss": 0.0036, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"grad_norm": 0.003909711726009846, |
|
"learning_rate": 0.00012460732984293196, |
|
"loss": 0.0005, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"grad_norm": 0.32289111614227295, |
|
"learning_rate": 0.00012408376963350784, |
|
"loss": 0.0067, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"grad_norm": 0.011649747379124165, |
|
"learning_rate": 0.00012356020942408378, |
|
"loss": 0.0004, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"grad_norm": 0.009505286812782288, |
|
"learning_rate": 0.00012303664921465968, |
|
"loss": 0.0004, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"grad_norm": 0.016009971499443054, |
|
"learning_rate": 0.00012251308900523562, |
|
"loss": 0.0132, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 0.07612348347902298, |
|
"learning_rate": 0.00012198952879581151, |
|
"loss": 0.0012, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"grad_norm": 0.05622846260666847, |
|
"learning_rate": 0.00012146596858638744, |
|
"loss": 0.0058, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_accuracy": 0.958005249343832, |
|
"eval_loss": 0.17302347719669342, |
|
"eval_precision": 0.9602924010464745, |
|
"eval_recall": 0.958005249343832, |
|
"eval_runtime": 17.4895, |
|
"eval_samples_per_second": 43.569, |
|
"eval_steps_per_second": 2.744, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"grad_norm": 0.050630178302526474, |
|
"learning_rate": 0.00012094240837696336, |
|
"loss": 0.0008, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"grad_norm": 0.02956685610115528, |
|
"learning_rate": 0.00012041884816753928, |
|
"loss": 0.0017, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 0.038056932389736176, |
|
"learning_rate": 0.0001198952879581152, |
|
"loss": 0.001, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.006303295027464628, |
|
"learning_rate": 0.0001193717277486911, |
|
"loss": 0.0003, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 0.019060682505369186, |
|
"learning_rate": 0.00011884816753926702, |
|
"loss": 0.0005, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.9220014810562134, |
|
"learning_rate": 0.00011832460732984294, |
|
"loss": 0.0014, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"grad_norm": 0.0184442438185215, |
|
"learning_rate": 0.00011780104712041886, |
|
"loss": 0.0003, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"grad_norm": 0.003219619393348694, |
|
"learning_rate": 0.00011727748691099475, |
|
"loss": 0.0003, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"grad_norm": 0.006713965907692909, |
|
"learning_rate": 0.00011675392670157068, |
|
"loss": 0.0044, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"grad_norm": 0.0026614165399223566, |
|
"learning_rate": 0.0001162303664921466, |
|
"loss": 0.0003, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_accuracy": 0.9488188976377953, |
|
"eval_loss": 0.20851939916610718, |
|
"eval_precision": 0.9513993632135537, |
|
"eval_recall": 0.9488188976377953, |
|
"eval_runtime": 17.4973, |
|
"eval_samples_per_second": 43.55, |
|
"eval_steps_per_second": 2.743, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"grad_norm": 0.2586454749107361, |
|
"learning_rate": 0.00011570680628272252, |
|
"loss": 0.001, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 0.005604247096925974, |
|
"learning_rate": 0.00011518324607329844, |
|
"loss": 0.0027, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"grad_norm": 0.00321913487277925, |
|
"learning_rate": 0.00011465968586387435, |
|
"loss": 0.0002, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"grad_norm": 0.00729359732940793, |
|
"learning_rate": 0.00011413612565445027, |
|
"loss": 0.0068, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"grad_norm": 2.0007290840148926, |
|
"learning_rate": 0.00011361256544502619, |
|
"loss": 0.002, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"grad_norm": 0.0031692993361502886, |
|
"learning_rate": 0.00011308900523560211, |
|
"loss": 0.0005, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"grad_norm": 0.007637929171323776, |
|
"learning_rate": 0.00011256544502617801, |
|
"loss": 0.0002, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 0.003380405716598034, |
|
"learning_rate": 0.00011204188481675393, |
|
"loss": 0.0002, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"grad_norm": 0.0019728606566786766, |
|
"learning_rate": 0.00011151832460732985, |
|
"loss": 0.0005, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"grad_norm": 0.0047022090293467045, |
|
"learning_rate": 0.00011099476439790577, |
|
"loss": 0.0003, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"eval_accuracy": 0.9698162729658792, |
|
"eval_loss": 0.1316651999950409, |
|
"eval_precision": 0.9708177710817824, |
|
"eval_recall": 0.9698162729658792, |
|
"eval_runtime": 17.4736, |
|
"eval_samples_per_second": 43.609, |
|
"eval_steps_per_second": 2.747, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"grad_norm": 0.003034041728824377, |
|
"learning_rate": 0.0001104712041884817, |
|
"loss": 0.0002, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"grad_norm": 0.0816386267542839, |
|
"learning_rate": 0.00010994764397905759, |
|
"loss": 0.0005, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"grad_norm": 0.010150614194571972, |
|
"learning_rate": 0.00010942408376963351, |
|
"loss": 0.0004, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"grad_norm": 0.003652422921732068, |
|
"learning_rate": 0.00010890052356020943, |
|
"loss": 0.0002, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"grad_norm": 0.0027291346341371536, |
|
"learning_rate": 0.00010837696335078535, |
|
"loss": 0.0002, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"grad_norm": 0.0021885058376938105, |
|
"learning_rate": 0.00010785340314136125, |
|
"loss": 0.0002, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"grad_norm": 0.0019014202989637852, |
|
"learning_rate": 0.00010732984293193717, |
|
"loss": 0.0002, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"grad_norm": 0.9188545942306519, |
|
"learning_rate": 0.00010680628272251309, |
|
"loss": 0.0032, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"grad_norm": 0.0015793447382748127, |
|
"learning_rate": 0.00010628272251308901, |
|
"loss": 0.0002, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"grad_norm": 0.0016003657365217805, |
|
"learning_rate": 0.00010575916230366492, |
|
"loss": 0.0002, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"eval_accuracy": 0.9698162729658792, |
|
"eval_loss": 0.12266764789819717, |
|
"eval_precision": 0.9709897987678096, |
|
"eval_recall": 0.9698162729658792, |
|
"eval_runtime": 17.6013, |
|
"eval_samples_per_second": 43.292, |
|
"eval_steps_per_second": 2.727, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"grad_norm": 0.001760563114657998, |
|
"learning_rate": 0.00010523560209424084, |
|
"loss": 0.0002, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"grad_norm": 0.14921420812606812, |
|
"learning_rate": 0.00010471204188481676, |
|
"loss": 0.0007, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"grad_norm": 0.008730238303542137, |
|
"learning_rate": 0.00010418848167539269, |
|
"loss": 0.0005, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"grad_norm": 0.0021068351343274117, |
|
"learning_rate": 0.0001036649214659686, |
|
"loss": 0.0004, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"grad_norm": 0.0018881463911384344, |
|
"learning_rate": 0.0001031413612565445, |
|
"loss": 0.0002, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"grad_norm": 0.010380023159086704, |
|
"learning_rate": 0.00010261780104712042, |
|
"loss": 0.0002, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"grad_norm": 0.005413604434579611, |
|
"learning_rate": 0.00010209424083769635, |
|
"loss": 0.0435, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"grad_norm": 0.013811178505420685, |
|
"learning_rate": 0.00010157068062827227, |
|
"loss": 0.0002, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"grad_norm": 0.0024414085783064365, |
|
"learning_rate": 0.00010104712041884816, |
|
"loss": 0.0014, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"grad_norm": 0.00615642499178648, |
|
"learning_rate": 0.00010052356020942408, |
|
"loss": 0.0004, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"eval_accuracy": 0.9711286089238845, |
|
"eval_loss": 0.12034053355455399, |
|
"eval_precision": 0.9732646926949845, |
|
"eval_recall": 0.9711286089238845, |
|
"eval_runtime": 17.6401, |
|
"eval_samples_per_second": 43.197, |
|
"eval_steps_per_second": 2.721, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.5034066438674927, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.0023136490490287542, |
|
"learning_rate": 9.947643979057593e-05, |
|
"loss": 0.0002, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 0.025553325191140175, |
|
"learning_rate": 9.895287958115183e-05, |
|
"loss": 0.0003, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 0.0021953664254397154, |
|
"learning_rate": 9.842931937172776e-05, |
|
"loss": 0.0002, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"grad_norm": 0.0016761135775595903, |
|
"learning_rate": 9.790575916230366e-05, |
|
"loss": 0.0002, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"grad_norm": 0.001983917085453868, |
|
"learning_rate": 9.738219895287959e-05, |
|
"loss": 0.0002, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"grad_norm": 0.008785477839410305, |
|
"learning_rate": 9.68586387434555e-05, |
|
"loss": 0.0003, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"grad_norm": 0.00397130474448204, |
|
"learning_rate": 9.633507853403142e-05, |
|
"loss": 0.0002, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"grad_norm": 0.0015918458811938763, |
|
"learning_rate": 9.581151832460732e-05, |
|
"loss": 0.0003, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"grad_norm": 0.0018075347179546952, |
|
"learning_rate": 9.528795811518324e-05, |
|
"loss": 0.0002, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_accuracy": 0.958005249343832, |
|
"eval_loss": 0.19232842326164246, |
|
"eval_precision": 0.960493858579392, |
|
"eval_recall": 0.958005249343832, |
|
"eval_runtime": 17.4659, |
|
"eval_samples_per_second": 43.628, |
|
"eval_steps_per_second": 2.748, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"grad_norm": 0.0014983582077547908, |
|
"learning_rate": 9.476439790575917e-05, |
|
"loss": 0.0003, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"grad_norm": 0.004939761478453875, |
|
"learning_rate": 9.424083769633509e-05, |
|
"loss": 0.0025, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"grad_norm": 0.003176321741193533, |
|
"learning_rate": 9.371727748691101e-05, |
|
"loss": 0.0002, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"grad_norm": 0.008645892143249512, |
|
"learning_rate": 9.319371727748692e-05, |
|
"loss": 0.0002, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"grad_norm": 0.009872050024569035, |
|
"learning_rate": 9.267015706806284e-05, |
|
"loss": 0.0002, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"grad_norm": 0.0015197212342172861, |
|
"learning_rate": 9.214659685863875e-05, |
|
"loss": 0.0001, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"grad_norm": 0.0035685019101947546, |
|
"learning_rate": 9.162303664921467e-05, |
|
"loss": 0.0002, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"grad_norm": 0.0015960617456585169, |
|
"learning_rate": 9.109947643979058e-05, |
|
"loss": 0.0002, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"grad_norm": 0.0019863061606884003, |
|
"learning_rate": 9.05759162303665e-05, |
|
"loss": 0.0036, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"grad_norm": 0.0011158788111060858, |
|
"learning_rate": 9.00523560209424e-05, |
|
"loss": 0.0001, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"eval_accuracy": 0.9711286089238845, |
|
"eval_loss": 0.11267632246017456, |
|
"eval_precision": 0.9717079331418645, |
|
"eval_recall": 0.9711286089238845, |
|
"eval_runtime": 17.5861, |
|
"eval_samples_per_second": 43.33, |
|
"eval_steps_per_second": 2.729, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"grad_norm": 0.003642949042841792, |
|
"learning_rate": 8.952879581151833e-05, |
|
"loss": 0.0002, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"grad_norm": 2.638040781021118, |
|
"learning_rate": 8.900523560209425e-05, |
|
"loss": 0.0021, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"grad_norm": 5.575833320617676, |
|
"learning_rate": 8.848167539267016e-05, |
|
"loss": 0.0068, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"grad_norm": 0.0017755662556737661, |
|
"learning_rate": 8.795811518324608e-05, |
|
"loss": 0.0001, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"grad_norm": 0.0034077195450663567, |
|
"learning_rate": 8.743455497382199e-05, |
|
"loss": 0.0003, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"grad_norm": 0.003383865812793374, |
|
"learning_rate": 8.691099476439791e-05, |
|
"loss": 0.0004, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"grad_norm": 0.0037927927915006876, |
|
"learning_rate": 8.638743455497382e-05, |
|
"loss": 0.0025, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"grad_norm": 0.008559320122003555, |
|
"learning_rate": 8.586387434554974e-05, |
|
"loss": 0.0002, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"grad_norm": 0.0041591702029109, |
|
"learning_rate": 8.534031413612566e-05, |
|
"loss": 0.0002, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"grad_norm": 0.0011583847226575017, |
|
"learning_rate": 8.481675392670158e-05, |
|
"loss": 0.0003, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"eval_accuracy": 0.9711286089238845, |
|
"eval_loss": 0.13462768495082855, |
|
"eval_precision": 0.9722333613195212, |
|
"eval_recall": 0.9711286089238845, |
|
"eval_runtime": 17.6983, |
|
"eval_samples_per_second": 43.055, |
|
"eval_steps_per_second": 2.712, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"grad_norm": 0.0030487922485917807, |
|
"learning_rate": 8.429319371727749e-05, |
|
"loss": 0.0002, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"grad_norm": 0.003955108113586903, |
|
"learning_rate": 8.376963350785341e-05, |
|
"loss": 0.0002, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"grad_norm": 0.002146199345588684, |
|
"learning_rate": 8.324607329842933e-05, |
|
"loss": 0.0001, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"grad_norm": 0.0010180504759773612, |
|
"learning_rate": 8.272251308900524e-05, |
|
"loss": 0.0002, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"grad_norm": 0.0008741291239857674, |
|
"learning_rate": 8.219895287958116e-05, |
|
"loss": 0.0001, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"grad_norm": 0.0017273599514737725, |
|
"learning_rate": 8.167539267015707e-05, |
|
"loss": 0.0071, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"grad_norm": 0.003460003063082695, |
|
"learning_rate": 8.115183246073299e-05, |
|
"loss": 0.0006, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"grad_norm": 0.0010187524603679776, |
|
"learning_rate": 8.06282722513089e-05, |
|
"loss": 0.0001, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"grad_norm": 0.0035252266097813845, |
|
"learning_rate": 8.010471204188482e-05, |
|
"loss": 0.0009, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.0013756396947428584, |
|
"learning_rate": 7.958115183246073e-05, |
|
"loss": 0.0001, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"eval_accuracy": 0.9711286089238845, |
|
"eval_loss": 0.11907853186130524, |
|
"eval_precision": 0.9719758650287736, |
|
"eval_recall": 0.9711286089238845, |
|
"eval_runtime": 18.1879, |
|
"eval_samples_per_second": 41.896, |
|
"eval_steps_per_second": 2.639, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 0.0016478670295327902, |
|
"learning_rate": 7.905759162303665e-05, |
|
"loss": 0.0001, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.0019091927679255605, |
|
"learning_rate": 7.853403141361257e-05, |
|
"loss": 0.0001, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 0.00418177992105484, |
|
"learning_rate": 7.801047120418848e-05, |
|
"loss": 0.0001, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"grad_norm": 0.0010454648872837424, |
|
"learning_rate": 7.74869109947644e-05, |
|
"loss": 0.0001, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"grad_norm": 0.003557993331924081, |
|
"learning_rate": 7.696335078534031e-05, |
|
"loss": 0.0001, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"grad_norm": 0.00358203100040555, |
|
"learning_rate": 7.643979057591623e-05, |
|
"loss": 0.0002, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"grad_norm": 0.0017727952217683196, |
|
"learning_rate": 7.591623036649214e-05, |
|
"loss": 0.0002, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"grad_norm": 0.056761130690574646, |
|
"learning_rate": 7.539267015706806e-05, |
|
"loss": 0.0003, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"grad_norm": 0.0010181496618315578, |
|
"learning_rate": 7.486910994764398e-05, |
|
"loss": 0.0003, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"grad_norm": 0.003052978776395321, |
|
"learning_rate": 7.43455497382199e-05, |
|
"loss": 0.0001, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"eval_accuracy": 0.968503937007874, |
|
"eval_loss": 0.1217927485704422, |
|
"eval_precision": 0.9692342594033277, |
|
"eval_recall": 0.968503937007874, |
|
"eval_runtime": 17.611, |
|
"eval_samples_per_second": 43.268, |
|
"eval_steps_per_second": 2.726, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"grad_norm": 0.0031509266700595617, |
|
"learning_rate": 7.382198952879581e-05, |
|
"loss": 0.0002, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"grad_norm": 0.0011931936023756862, |
|
"learning_rate": 7.329842931937174e-05, |
|
"loss": 0.0002, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"grad_norm": 0.0014170074136927724, |
|
"learning_rate": 7.277486910994766e-05, |
|
"loss": 0.0002, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"grad_norm": 0.0008681151666678488, |
|
"learning_rate": 7.225130890052356e-05, |
|
"loss": 0.0002, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"grad_norm": 0.0015088602667674422, |
|
"learning_rate": 7.172774869109949e-05, |
|
"loss": 0.0002, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"grad_norm": 0.0011560139246284962, |
|
"learning_rate": 7.12041884816754e-05, |
|
"loss": 0.0002, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"grad_norm": 0.0059287999756634235, |
|
"learning_rate": 7.068062827225132e-05, |
|
"loss": 0.0002, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"grad_norm": 0.0017274218844249845, |
|
"learning_rate": 7.015706806282722e-05, |
|
"loss": 0.0002, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"grad_norm": 0.000886244117282331, |
|
"learning_rate": 6.963350785340315e-05, |
|
"loss": 0.0001, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"grad_norm": 0.0028559649363160133, |
|
"learning_rate": 6.910994764397905e-05, |
|
"loss": 0.0002, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"eval_accuracy": 0.968503937007874, |
|
"eval_loss": 0.11844318360090256, |
|
"eval_precision": 0.9692177012971396, |
|
"eval_recall": 0.968503937007874, |
|
"eval_runtime": 17.5917, |
|
"eval_samples_per_second": 43.316, |
|
"eval_steps_per_second": 2.729, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"grad_norm": 0.0011471702018752694, |
|
"learning_rate": 6.858638743455498e-05, |
|
"loss": 0.0001, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"grad_norm": 0.0015491463709622622, |
|
"learning_rate": 6.80628272251309e-05, |
|
"loss": 0.0001, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"grad_norm": 0.0011632316745817661, |
|
"learning_rate": 6.75392670157068e-05, |
|
"loss": 0.0001, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"grad_norm": 0.006089572329074144, |
|
"learning_rate": 6.701570680628273e-05, |
|
"loss": 0.0001, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"grad_norm": 0.0025600052904337645, |
|
"learning_rate": 6.649214659685863e-05, |
|
"loss": 0.0001, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"grad_norm": 0.001310704043135047, |
|
"learning_rate": 6.596858638743456e-05, |
|
"loss": 0.0001, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"grad_norm": 0.0050185080617666245, |
|
"learning_rate": 6.544502617801048e-05, |
|
"loss": 0.0001, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"grad_norm": 0.0030103351455181837, |
|
"learning_rate": 6.492146596858639e-05, |
|
"loss": 0.0001, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"grad_norm": 0.0009425992611795664, |
|
"learning_rate": 6.439790575916231e-05, |
|
"loss": 0.0001, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"grad_norm": 0.0016329910140484571, |
|
"learning_rate": 6.387434554973823e-05, |
|
"loss": 0.0002, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"eval_accuracy": 0.9698162729658792, |
|
"eval_loss": 0.11775030940771103, |
|
"eval_precision": 0.9703878401175062, |
|
"eval_recall": 0.9698162729658792, |
|
"eval_runtime": 17.4725, |
|
"eval_samples_per_second": 43.611, |
|
"eval_steps_per_second": 2.747, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"grad_norm": 0.0049902000464499, |
|
"learning_rate": 6.335078534031414e-05, |
|
"loss": 0.0002, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"grad_norm": 0.0012894049286842346, |
|
"learning_rate": 6.282722513089006e-05, |
|
"loss": 0.0001, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"grad_norm": 0.0013533816672861576, |
|
"learning_rate": 6.230366492146598e-05, |
|
"loss": 0.0001, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"grad_norm": 0.0015272346790879965, |
|
"learning_rate": 6.178010471204189e-05, |
|
"loss": 0.0001, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"grad_norm": 0.0021596469450742006, |
|
"learning_rate": 6.125654450261781e-05, |
|
"loss": 0.0001, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"grad_norm": 0.0010469523258507252, |
|
"learning_rate": 6.073298429319372e-05, |
|
"loss": 0.0001, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"grad_norm": 0.004506214987486601, |
|
"learning_rate": 6.020942408376964e-05, |
|
"loss": 0.0001, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.002483614254742861, |
|
"learning_rate": 5.968586387434555e-05, |
|
"loss": 0.0001, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.0011110632913187146, |
|
"learning_rate": 5.916230366492147e-05, |
|
"loss": 0.0001, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 0.0009813542710617185, |
|
"learning_rate": 5.863874345549738e-05, |
|
"loss": 0.0001, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"eval_accuracy": 0.9698162729658792, |
|
"eval_loss": 0.1171058714389801, |
|
"eval_precision": 0.9703878401175062, |
|
"eval_recall": 0.9698162729658792, |
|
"eval_runtime": 17.4755, |
|
"eval_samples_per_second": 43.604, |
|
"eval_steps_per_second": 2.747, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 0.0015625334344804287, |
|
"learning_rate": 5.81151832460733e-05, |
|
"loss": 0.0001, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"grad_norm": 0.0011336584575474262, |
|
"learning_rate": 5.759162303664922e-05, |
|
"loss": 0.0001, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"grad_norm": 0.002448306418955326, |
|
"learning_rate": 5.7068062827225135e-05, |
|
"loss": 0.0001, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"grad_norm": 0.003220533486455679, |
|
"learning_rate": 5.654450261780106e-05, |
|
"loss": 0.0001, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"grad_norm": 0.0035826002713292837, |
|
"learning_rate": 5.6020942408376965e-05, |
|
"loss": 0.0001, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"grad_norm": 0.0011589195346459746, |
|
"learning_rate": 5.5497382198952887e-05, |
|
"loss": 0.0001, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"grad_norm": 0.00346089294180274, |
|
"learning_rate": 5.4973821989528795e-05, |
|
"loss": 0.0001, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"grad_norm": 0.0009584302315488458, |
|
"learning_rate": 5.4450261780104716e-05, |
|
"loss": 0.0001, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"grad_norm": 0.0017620971193537116, |
|
"learning_rate": 5.3926701570680624e-05, |
|
"loss": 0.0001, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"grad_norm": 0.0009925129124894738, |
|
"learning_rate": 5.3403141361256546e-05, |
|
"loss": 0.0001, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"eval_accuracy": 0.9698162729658792, |
|
"eval_loss": 0.11688791215419769, |
|
"eval_precision": 0.9704864266182309, |
|
"eval_recall": 0.9698162729658792, |
|
"eval_runtime": 17.5351, |
|
"eval_samples_per_second": 43.456, |
|
"eval_steps_per_second": 2.737, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"grad_norm": 0.0010989775182679296, |
|
"learning_rate": 5.287958115183246e-05, |
|
"loss": 0.0001, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"grad_norm": 0.001141382148489356, |
|
"learning_rate": 5.235602094240838e-05, |
|
"loss": 0.0001, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"grad_norm": 0.0018131214892491698, |
|
"learning_rate": 5.18324607329843e-05, |
|
"loss": 0.0001, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"grad_norm": 0.0016311927465721965, |
|
"learning_rate": 5.130890052356021e-05, |
|
"loss": 0.0002, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"grad_norm": 0.0019679146353155375, |
|
"learning_rate": 5.0785340314136134e-05, |
|
"loss": 0.0001, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"grad_norm": 0.0030663602519780397, |
|
"learning_rate": 5.026178010471204e-05, |
|
"loss": 0.0001, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"grad_norm": 0.0022476576268672943, |
|
"learning_rate": 4.973821989528796e-05, |
|
"loss": 0.0001, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"grad_norm": 0.001257985015399754, |
|
"learning_rate": 4.921465968586388e-05, |
|
"loss": 0.0001, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"grad_norm": 0.002487305086106062, |
|
"learning_rate": 4.869109947643979e-05, |
|
"loss": 0.0001, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"grad_norm": 0.0023026170674711466, |
|
"learning_rate": 4.816753926701571e-05, |
|
"loss": 0.0001, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"eval_accuracy": 0.9711286089238845, |
|
"eval_loss": 0.11671730875968933, |
|
"eval_precision": 0.9718133010833329, |
|
"eval_recall": 0.9711286089238845, |
|
"eval_runtime": 17.5661, |
|
"eval_samples_per_second": 43.379, |
|
"eval_steps_per_second": 2.733, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"grad_norm": 0.0014070291072130203, |
|
"learning_rate": 4.764397905759162e-05, |
|
"loss": 0.0001, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"grad_norm": 0.0011672518448904157, |
|
"learning_rate": 4.7120418848167544e-05, |
|
"loss": 0.0001, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"grad_norm": 0.0015011669602245092, |
|
"learning_rate": 4.659685863874346e-05, |
|
"loss": 0.0001, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"grad_norm": 0.001993841025978327, |
|
"learning_rate": 4.6073298429319374e-05, |
|
"loss": 0.0001, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"grad_norm": 0.0008604762842878699, |
|
"learning_rate": 4.554973821989529e-05, |
|
"loss": 0.0001, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"grad_norm": 0.002782499184831977, |
|
"learning_rate": 4.50261780104712e-05, |
|
"loss": 0.0001, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"grad_norm": 0.002074967371299863, |
|
"learning_rate": 4.4502617801047125e-05, |
|
"loss": 0.0001, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"grad_norm": 0.0031987845432013273, |
|
"learning_rate": 4.397905759162304e-05, |
|
"loss": 0.0001, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"grad_norm": 0.0007084137760102749, |
|
"learning_rate": 4.3455497382198955e-05, |
|
"loss": 0.0001, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"grad_norm": 0.0019736553076654673, |
|
"learning_rate": 4.293193717277487e-05, |
|
"loss": 0.0001, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"eval_accuracy": 0.9711286089238845, |
|
"eval_loss": 0.11646050214767456, |
|
"eval_precision": 0.9718133010833329, |
|
"eval_recall": 0.9711286089238845, |
|
"eval_runtime": 17.4763, |
|
"eval_samples_per_second": 43.602, |
|
"eval_steps_per_second": 2.747, |
|
"step": 1500 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1.8551418818712515e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|