{ "best_metric": 0.08583971858024597, "best_model_checkpoint": "xraynewww/checkpoint-4587", "epoch": 3.0, "eval_steps": 500, "global_step": 4587, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016350555918901243, "grad_norm": 8.776586532592773, "learning_rate": 2.6143790849673204e-06, "loss": 0.769, "step": 25 }, { "epoch": 0.032701111837802485, "grad_norm": 6.286694526672363, "learning_rate": 5.33769063180828e-06, "loss": 0.4887, "step": 50 }, { "epoch": 0.04905166775670373, "grad_norm": 3.120753288269043, "learning_rate": 8.061002178649239e-06, "loss": 0.5396, "step": 75 }, { "epoch": 0.06540222367560497, "grad_norm": 4.589162826538086, "learning_rate": 1.0784313725490197e-05, "loss": 0.4676, "step": 100 }, { "epoch": 0.08175277959450622, "grad_norm": 5.849193096160889, "learning_rate": 1.3507625272331156e-05, "loss": 0.3214, "step": 125 }, { "epoch": 0.09810333551340746, "grad_norm": 16.743791580200195, "learning_rate": 1.6230936819172112e-05, "loss": 0.3655, "step": 150 }, { "epoch": 0.1144538914323087, "grad_norm": 4.951746463775635, "learning_rate": 1.895424836601307e-05, "loss": 0.282, "step": 175 }, { "epoch": 0.13080444735120994, "grad_norm": 2.425187349319458, "learning_rate": 2.1677559912854033e-05, "loss": 0.2814, "step": 200 }, { "epoch": 0.1471550032701112, "grad_norm": 1.2229609489440918, "learning_rate": 2.4400871459694992e-05, "loss": 0.1735, "step": 225 }, { "epoch": 0.16350555918901244, "grad_norm": 6.4670515060424805, "learning_rate": 2.7124183006535947e-05, "loss": 0.3867, "step": 250 }, { "epoch": 0.17985611510791366, "grad_norm": 8.571281433105469, "learning_rate": 2.984749455337691e-05, "loss": 0.2893, "step": 275 }, { "epoch": 0.1962066710268149, "grad_norm": 10.381059646606445, "learning_rate": 3.257080610021787e-05, "loss": 0.2292, "step": 300 }, { "epoch": 0.21255722694571616, "grad_norm": 1.9075127840042114, "learning_rate": 3.529411764705883e-05, "loss": 0.3212, "step": 325 }, { "epoch": 0.2289077828646174, "grad_norm": 0.7908616065979004, "learning_rate": 3.8017429193899786e-05, "loss": 0.4011, "step": 350 }, { "epoch": 0.24525833878351863, "grad_norm": 11.509167671203613, "learning_rate": 4.074074074074074e-05, "loss": 0.3098, "step": 375 }, { "epoch": 0.2616088947024199, "grad_norm": 18.186838150024414, "learning_rate": 4.3464052287581704e-05, "loss": 0.1825, "step": 400 }, { "epoch": 0.2779594506213211, "grad_norm": 4.45954704284668, "learning_rate": 4.6187363834422656e-05, "loss": 0.4669, "step": 425 }, { "epoch": 0.2943100065402224, "grad_norm": 5.498913764953613, "learning_rate": 4.891067538126362e-05, "loss": 0.2676, "step": 450 }, { "epoch": 0.3106605624591236, "grad_norm": 7.379275798797607, "learning_rate": 4.981831395348838e-05, "loss": 0.2986, "step": 475 }, { "epoch": 0.3270111183780249, "grad_norm": 4.307358741760254, "learning_rate": 4.9515503875968994e-05, "loss": 0.2586, "step": 500 }, { "epoch": 0.3433616742969261, "grad_norm": 10.809741973876953, "learning_rate": 4.9212693798449616e-05, "loss": 0.2444, "step": 525 }, { "epoch": 0.3597122302158273, "grad_norm": 4.582003593444824, "learning_rate": 4.890988372093023e-05, "loss": 0.4484, "step": 550 }, { "epoch": 0.3760627861347286, "grad_norm": 2.763364315032959, "learning_rate": 4.860707364341086e-05, "loss": 0.3377, "step": 575 }, { "epoch": 0.3924133420536298, "grad_norm": 11.982911109924316, "learning_rate": 4.830426356589148e-05, "loss": 0.2219, "step": 600 }, { "epoch": 0.40876389797253104, "grad_norm": 14.225927352905273, "learning_rate": 4.8001453488372095e-05, "loss": 0.2669, "step": 625 }, { "epoch": 0.4251144538914323, "grad_norm": 16.698148727416992, "learning_rate": 4.7698643410852716e-05, "loss": 0.3172, "step": 650 }, { "epoch": 0.44146500981033354, "grad_norm": 11.465031623840332, "learning_rate": 4.739583333333333e-05, "loss": 0.3953, "step": 675 }, { "epoch": 0.4578155657292348, "grad_norm": 2.0661489963531494, "learning_rate": 4.709302325581396e-05, "loss": 0.3295, "step": 700 }, { "epoch": 0.47416612164813604, "grad_norm": 4.683841228485107, "learning_rate": 4.679021317829458e-05, "loss": 0.2518, "step": 725 }, { "epoch": 0.49051667756703726, "grad_norm": 0.2995409369468689, "learning_rate": 4.6487403100775196e-05, "loss": 0.2238, "step": 750 }, { "epoch": 0.5068672334859385, "grad_norm": 8.76974105834961, "learning_rate": 4.618459302325582e-05, "loss": 0.4002, "step": 775 }, { "epoch": 0.5232177894048398, "grad_norm": 16.188955307006836, "learning_rate": 4.588178294573643e-05, "loss": 0.2211, "step": 800 }, { "epoch": 0.539568345323741, "grad_norm": 1.0120298862457275, "learning_rate": 4.557897286821706e-05, "loss": 0.3034, "step": 825 }, { "epoch": 0.5559189012426422, "grad_norm": 3.9240946769714355, "learning_rate": 4.527616279069768e-05, "loss": 0.3112, "step": 850 }, { "epoch": 0.5722694571615435, "grad_norm": 0.4812540113925934, "learning_rate": 4.4973352713178296e-05, "loss": 0.1609, "step": 875 }, { "epoch": 0.5886200130804448, "grad_norm": 3.5718393325805664, "learning_rate": 4.467054263565892e-05, "loss": 0.3033, "step": 900 }, { "epoch": 0.604970568999346, "grad_norm": 16.025508880615234, "learning_rate": 4.436773255813953e-05, "loss": 0.3427, "step": 925 }, { "epoch": 0.6213211249182472, "grad_norm": 6.246650218963623, "learning_rate": 4.406492248062016e-05, "loss": 0.1998, "step": 950 }, { "epoch": 0.6376716808371484, "grad_norm": 0.6095386743545532, "learning_rate": 4.3762112403100776e-05, "loss": 0.2766, "step": 975 }, { "epoch": 0.6540222367560498, "grad_norm": 2.4750990867614746, "learning_rate": 4.34593023255814e-05, "loss": 0.1986, "step": 1000 }, { "epoch": 0.670372792674951, "grad_norm": 4.894515514373779, "learning_rate": 4.315649224806202e-05, "loss": 0.3955, "step": 1025 }, { "epoch": 0.6867233485938522, "grad_norm": 0.23163600265979767, "learning_rate": 4.2853682170542634e-05, "loss": 0.205, "step": 1050 }, { "epoch": 0.7030739045127534, "grad_norm": 2.4058098793029785, "learning_rate": 4.255087209302326e-05, "loss": 0.3028, "step": 1075 }, { "epoch": 0.7194244604316546, "grad_norm": 8.029851913452148, "learning_rate": 4.2248062015503877e-05, "loss": 0.1737, "step": 1100 }, { "epoch": 0.7357750163505559, "grad_norm": 20.8834228515625, "learning_rate": 4.19452519379845e-05, "loss": 0.2956, "step": 1125 }, { "epoch": 0.7521255722694572, "grad_norm": 3.3043746948242188, "learning_rate": 4.164244186046512e-05, "loss": 0.2918, "step": 1150 }, { "epoch": 0.7684761281883584, "grad_norm": 3.6136388778686523, "learning_rate": 4.1339631782945734e-05, "loss": 0.2564, "step": 1175 }, { "epoch": 0.7848266841072596, "grad_norm": 4.633103370666504, "learning_rate": 4.103682170542636e-05, "loss": 0.158, "step": 1200 }, { "epoch": 0.8011772400261609, "grad_norm": 6.264617443084717, "learning_rate": 4.073401162790698e-05, "loss": 0.3824, "step": 1225 }, { "epoch": 0.8175277959450621, "grad_norm": 4.16187047958374, "learning_rate": 4.04312015503876e-05, "loss": 0.1884, "step": 1250 }, { "epoch": 0.8338783518639634, "grad_norm": 7.170223236083984, "learning_rate": 4.012839147286822e-05, "loss": 0.2477, "step": 1275 }, { "epoch": 0.8502289077828646, "grad_norm": 9.301641464233398, "learning_rate": 3.9825581395348835e-05, "loss": 0.2557, "step": 1300 }, { "epoch": 0.8665794637017659, "grad_norm": 0.13785265386104584, "learning_rate": 3.9522771317829463e-05, "loss": 0.2382, "step": 1325 }, { "epoch": 0.8829300196206671, "grad_norm": 8.949559211730957, "learning_rate": 3.921996124031008e-05, "loss": 0.3123, "step": 1350 }, { "epoch": 0.8992805755395683, "grad_norm": 5.080151557922363, "learning_rate": 3.89171511627907e-05, "loss": 0.1705, "step": 1375 }, { "epoch": 0.9156311314584696, "grad_norm": 4.757380485534668, "learning_rate": 3.861434108527132e-05, "loss": 0.121, "step": 1400 }, { "epoch": 0.9319816873773709, "grad_norm": 8.166597366333008, "learning_rate": 3.8311531007751936e-05, "loss": 0.2547, "step": 1425 }, { "epoch": 0.9483322432962721, "grad_norm": 0.18423116207122803, "learning_rate": 3.8008720930232564e-05, "loss": 0.1343, "step": 1450 }, { "epoch": 0.9646827992151733, "grad_norm": 9.6463623046875, "learning_rate": 3.770591085271318e-05, "loss": 0.2026, "step": 1475 }, { "epoch": 0.9810333551340745, "grad_norm": 6.031147003173828, "learning_rate": 3.74031007751938e-05, "loss": 0.2007, "step": 1500 }, { "epoch": 0.9973839110529757, "grad_norm": 15.847230911254883, "learning_rate": 3.710029069767442e-05, "loss": 0.287, "step": 1525 }, { "epoch": 1.0, "eval_accuracy": 0.9502145922746781, "eval_auc": 0.9945156335961857, "eval_f1": 0.965352449223417, "eval_loss": 0.15939411520957947, "eval_precision": 0.993849938499385, "eval_recall": 0.9384436701509872, "eval_runtime": 5.7507, "eval_samples_per_second": 202.584, "eval_steps_per_second": 12.694, "step": 1529 }, { "epoch": 1.013734466971877, "grad_norm": 5.32667875289917, "learning_rate": 3.679748062015504e-05, "loss": 0.2212, "step": 1550 }, { "epoch": 1.0300850228907783, "grad_norm": 4.328026294708252, "learning_rate": 3.6494670542635665e-05, "loss": 0.1949, "step": 1575 }, { "epoch": 1.0464355788096795, "grad_norm": 8.589631080627441, "learning_rate": 3.619186046511628e-05, "loss": 0.1544, "step": 1600 }, { "epoch": 1.0627861347285807, "grad_norm": 1.7674341201782227, "learning_rate": 3.58890503875969e-05, "loss": 0.103, "step": 1625 }, { "epoch": 1.079136690647482, "grad_norm": 6.062548637390137, "learning_rate": 3.558624031007752e-05, "loss": 0.2805, "step": 1650 }, { "epoch": 1.0954872465663832, "grad_norm": 8.54261302947998, "learning_rate": 3.528343023255814e-05, "loss": 0.1475, "step": 1675 }, { "epoch": 1.1118378024852844, "grad_norm": 6.659710884094238, "learning_rate": 3.4980620155038766e-05, "loss": 0.2388, "step": 1700 }, { "epoch": 1.1281883584041856, "grad_norm": 4.354561805725098, "learning_rate": 3.467781007751938e-05, "loss": 0.1795, "step": 1725 }, { "epoch": 1.144538914323087, "grad_norm": 0.20215673744678497, "learning_rate": 3.4375e-05, "loss": 0.1543, "step": 1750 }, { "epoch": 1.1608894702419883, "grad_norm": 0.9856411218643188, "learning_rate": 3.4072189922480624e-05, "loss": 0.2781, "step": 1775 }, { "epoch": 1.1772400261608895, "grad_norm": 0.7960185408592224, "learning_rate": 3.376937984496124e-05, "loss": 0.2001, "step": 1800 }, { "epoch": 1.1935905820797907, "grad_norm": 4.385952949523926, "learning_rate": 3.346656976744187e-05, "loss": 0.262, "step": 1825 }, { "epoch": 1.209941137998692, "grad_norm": 5.5558576583862305, "learning_rate": 3.316375968992248e-05, "loss": 0.1862, "step": 1850 }, { "epoch": 1.2262916939175932, "grad_norm": 8.943402290344238, "learning_rate": 3.28609496124031e-05, "loss": 0.1657, "step": 1875 }, { "epoch": 1.2426422498364944, "grad_norm": 0.3145776689052582, "learning_rate": 3.2558139534883724e-05, "loss": 0.213, "step": 1900 }, { "epoch": 1.2589928057553956, "grad_norm": 0.6332132816314697, "learning_rate": 3.225532945736434e-05, "loss": 0.1174, "step": 1925 }, { "epoch": 1.2753433616742968, "grad_norm": 3.167943239212036, "learning_rate": 3.195251937984496e-05, "loss": 0.1622, "step": 1950 }, { "epoch": 1.2916939175931983, "grad_norm": 0.24524331092834473, "learning_rate": 3.164970930232558e-05, "loss": 0.2144, "step": 1975 }, { "epoch": 1.3080444735120995, "grad_norm": 5.5770263671875, "learning_rate": 3.1346899224806204e-05, "loss": 0.1812, "step": 2000 }, { "epoch": 1.3243950294310007, "grad_norm": 0.6289435029029846, "learning_rate": 3.1044089147286825e-05, "loss": 0.1762, "step": 2025 }, { "epoch": 1.340745585349902, "grad_norm": 6.443571090698242, "learning_rate": 3.074127906976744e-05, "loss": 0.1759, "step": 2050 }, { "epoch": 1.3570961412688032, "grad_norm": 6.263022422790527, "learning_rate": 3.0450581395348838e-05, "loss": 0.411, "step": 2075 }, { "epoch": 1.3734466971877044, "grad_norm": 4.602290630340576, "learning_rate": 3.014777131782946e-05, "loss": 0.1595, "step": 2100 }, { "epoch": 1.3897972531066056, "grad_norm": 13.215031623840332, "learning_rate": 2.9844961240310077e-05, "loss": 0.2196, "step": 2125 }, { "epoch": 1.4061478090255068, "grad_norm": 0.35713547468185425, "learning_rate": 2.95421511627907e-05, "loss": 0.1646, "step": 2150 }, { "epoch": 1.422498364944408, "grad_norm": 10.571084022521973, "learning_rate": 2.923934108527132e-05, "loss": 0.2415, "step": 2175 }, { "epoch": 1.4388489208633093, "grad_norm": 0.12735210359096527, "learning_rate": 2.893653100775194e-05, "loss": 0.1596, "step": 2200 }, { "epoch": 1.4551994767822105, "grad_norm": 0.8274198174476624, "learning_rate": 2.863372093023256e-05, "loss": 0.2063, "step": 2225 }, { "epoch": 1.4715500327011117, "grad_norm": 0.18884733319282532, "learning_rate": 2.8330910852713178e-05, "loss": 0.2319, "step": 2250 }, { "epoch": 1.487900588620013, "grad_norm": 7.244335651397705, "learning_rate": 2.80281007751938e-05, "loss": 0.171, "step": 2275 }, { "epoch": 1.5042511445389142, "grad_norm": 1.030834436416626, "learning_rate": 2.772529069767442e-05, "loss": 0.1566, "step": 2300 }, { "epoch": 1.5206017004578156, "grad_norm": 0.2723897099494934, "learning_rate": 2.742248062015504e-05, "loss": 0.2864, "step": 2325 }, { "epoch": 1.5369522563767168, "grad_norm": 0.3210669457912445, "learning_rate": 2.711967054263566e-05, "loss": 0.1941, "step": 2350 }, { "epoch": 1.553302812295618, "grad_norm": 5.555069446563721, "learning_rate": 2.681686046511628e-05, "loss": 0.2085, "step": 2375 }, { "epoch": 1.5696533682145193, "grad_norm": 8.625024795532227, "learning_rate": 2.65140503875969e-05, "loss": 0.1789, "step": 2400 }, { "epoch": 1.5860039241334205, "grad_norm": 7.455594539642334, "learning_rate": 2.6211240310077522e-05, "loss": 0.2819, "step": 2425 }, { "epoch": 1.6023544800523217, "grad_norm": 2.1804699897766113, "learning_rate": 2.590843023255814e-05, "loss": 0.2866, "step": 2450 }, { "epoch": 1.6187050359712232, "grad_norm": 10.467452049255371, "learning_rate": 2.560562015503876e-05, "loss": 0.2576, "step": 2475 }, { "epoch": 1.6350555918901244, "grad_norm": 4.638566493988037, "learning_rate": 2.530281007751938e-05, "loss": 0.275, "step": 2500 }, { "epoch": 1.6514061478090256, "grad_norm": 6.57110595703125, "learning_rate": 2.5e-05, "loss": 0.1353, "step": 2525 }, { "epoch": 1.6677567037279268, "grad_norm": 0.1900484710931778, "learning_rate": 2.469718992248062e-05, "loss": 0.2028, "step": 2550 }, { "epoch": 1.684107259646828, "grad_norm": 0.9844114184379578, "learning_rate": 2.439437984496124e-05, "loss": 0.1779, "step": 2575 }, { "epoch": 1.7004578155657293, "grad_norm": 3.4427802562713623, "learning_rate": 2.4091569767441862e-05, "loss": 0.2193, "step": 2600 }, { "epoch": 1.7168083714846305, "grad_norm": 0.2224160134792328, "learning_rate": 2.3788759689922484e-05, "loss": 0.2097, "step": 2625 }, { "epoch": 1.7331589274035317, "grad_norm": 5.424374580383301, "learning_rate": 2.3485949612403102e-05, "loss": 0.1106, "step": 2650 }, { "epoch": 1.749509483322433, "grad_norm": 0.08824160695075989, "learning_rate": 2.318313953488372e-05, "loss": 0.1152, "step": 2675 }, { "epoch": 1.7658600392413342, "grad_norm": 13.905054092407227, "learning_rate": 2.288032945736434e-05, "loss": 0.1858, "step": 2700 }, { "epoch": 1.7822105951602354, "grad_norm": 1.2964977025985718, "learning_rate": 2.2577519379844963e-05, "loss": 0.1714, "step": 2725 }, { "epoch": 1.7985611510791366, "grad_norm": 40.13843536376953, "learning_rate": 2.2274709302325585e-05, "loss": 0.0823, "step": 2750 }, { "epoch": 1.8149117069980378, "grad_norm": 6.79445219039917, "learning_rate": 2.1971899224806203e-05, "loss": 0.0833, "step": 2775 }, { "epoch": 1.831262262916939, "grad_norm": 5.254680156707764, "learning_rate": 2.166908914728682e-05, "loss": 0.126, "step": 2800 }, { "epoch": 1.8476128188358403, "grad_norm": 0.21236732602119446, "learning_rate": 2.1366279069767442e-05, "loss": 0.1087, "step": 2825 }, { "epoch": 1.8639633747547415, "grad_norm": 1.6276271343231201, "learning_rate": 2.1063468992248064e-05, "loss": 0.0624, "step": 2850 }, { "epoch": 1.880313930673643, "grad_norm": 0.12902870774269104, "learning_rate": 2.0760658914728685e-05, "loss": 0.2461, "step": 2875 }, { "epoch": 1.8966644865925442, "grad_norm": 0.7700151801109314, "learning_rate": 2.0457848837209304e-05, "loss": 0.1109, "step": 2900 }, { "epoch": 1.9130150425114454, "grad_norm": 9.981553077697754, "learning_rate": 2.0155038759689922e-05, "loss": 0.1031, "step": 2925 }, { "epoch": 1.9293655984303466, "grad_norm": 7.399847507476807, "learning_rate": 1.9852228682170543e-05, "loss": 0.2301, "step": 2950 }, { "epoch": 1.9457161543492478, "grad_norm": 10.187629699707031, "learning_rate": 1.954941860465116e-05, "loss": 0.2124, "step": 2975 }, { "epoch": 1.9620667102681493, "grad_norm": 6.082677841186523, "learning_rate": 1.9246608527131786e-05, "loss": 0.248, "step": 3000 }, { "epoch": 1.9784172661870505, "grad_norm": 11.070582389831543, "learning_rate": 1.8943798449612404e-05, "loss": 0.11, "step": 3025 }, { "epoch": 1.9947678221059517, "grad_norm": 1.658309817314148, "learning_rate": 1.8640988372093023e-05, "loss": 0.2387, "step": 3050 }, { "epoch": 2.0, "eval_accuracy": 0.9699570815450643, "eval_auc": 0.9967430007946696, "eval_f1": 0.9799196787148594, "eval_loss": 0.08820199966430664, "eval_precision": 0.9682539682539683, "eval_recall": 0.991869918699187, "eval_runtime": 5.7323, "eval_samples_per_second": 203.234, "eval_steps_per_second": 12.735, "step": 3058 }, { "epoch": 2.011118378024853, "grad_norm": 0.42580923438072205, "learning_rate": 1.8338178294573644e-05, "loss": 0.1586, "step": 3075 }, { "epoch": 2.027468933943754, "grad_norm": 12.402567863464355, "learning_rate": 1.8035368217054262e-05, "loss": 0.1163, "step": 3100 }, { "epoch": 2.0438194898626554, "grad_norm": 9.509015083312988, "learning_rate": 1.7732558139534887e-05, "loss": 0.1893, "step": 3125 }, { "epoch": 2.0601700457815566, "grad_norm": 0.024427318945527077, "learning_rate": 1.7429748062015505e-05, "loss": 0.1121, "step": 3150 }, { "epoch": 2.076520601700458, "grad_norm": 3.6162619590759277, "learning_rate": 1.7126937984496123e-05, "loss": 0.1592, "step": 3175 }, { "epoch": 2.092871157619359, "grad_norm": 0.3668271601200104, "learning_rate": 1.6824127906976745e-05, "loss": 0.1724, "step": 3200 }, { "epoch": 2.1092217135382603, "grad_norm": 0.10604169964790344, "learning_rate": 1.6521317829457363e-05, "loss": 0.1133, "step": 3225 }, { "epoch": 2.1255722694571615, "grad_norm": 5.482110977172852, "learning_rate": 1.6218507751937988e-05, "loss": 0.2885, "step": 3250 }, { "epoch": 2.1419228253760627, "grad_norm": 2.177529811859131, "learning_rate": 1.5915697674418606e-05, "loss": 0.1656, "step": 3275 }, { "epoch": 2.158273381294964, "grad_norm": 0.7062684893608093, "learning_rate": 1.5612887596899224e-05, "loss": 0.1784, "step": 3300 }, { "epoch": 2.174623937213865, "grad_norm": 0.6271941065788269, "learning_rate": 1.5310077519379846e-05, "loss": 0.0416, "step": 3325 }, { "epoch": 2.1909744931327664, "grad_norm": 0.13655735552310944, "learning_rate": 1.5007267441860465e-05, "loss": 0.1002, "step": 3350 }, { "epoch": 2.2073250490516676, "grad_norm": 8.668305397033691, "learning_rate": 1.4704457364341087e-05, "loss": 0.1661, "step": 3375 }, { "epoch": 2.223675604970569, "grad_norm": 5.533019065856934, "learning_rate": 1.4401647286821707e-05, "loss": 0.3131, "step": 3400 }, { "epoch": 2.24002616088947, "grad_norm": 9.50761890411377, "learning_rate": 1.4098837209302327e-05, "loss": 0.155, "step": 3425 }, { "epoch": 2.2563767168083713, "grad_norm": 14.682671546936035, "learning_rate": 1.3796027131782946e-05, "loss": 0.1707, "step": 3450 }, { "epoch": 2.2727272727272725, "grad_norm": 14.890127182006836, "learning_rate": 1.3493217054263566e-05, "loss": 0.1336, "step": 3475 }, { "epoch": 2.289077828646174, "grad_norm": 0.08246014267206192, "learning_rate": 1.3190406976744188e-05, "loss": 0.1234, "step": 3500 }, { "epoch": 2.3054283845650754, "grad_norm": 0.00616547791287303, "learning_rate": 1.2887596899224808e-05, "loss": 0.1608, "step": 3525 }, { "epoch": 2.3217789404839766, "grad_norm": 6.7408576011657715, "learning_rate": 1.2584786821705427e-05, "loss": 0.1328, "step": 3550 }, { "epoch": 2.338129496402878, "grad_norm": 13.762513160705566, "learning_rate": 1.2281976744186047e-05, "loss": 0.0673, "step": 3575 }, { "epoch": 2.354480052321779, "grad_norm": 0.3772618770599365, "learning_rate": 1.1979166666666667e-05, "loss": 0.1469, "step": 3600 }, { "epoch": 2.3708306082406803, "grad_norm": 0.21671177446842194, "learning_rate": 1.1676356589147287e-05, "loss": 0.1849, "step": 3625 }, { "epoch": 2.3871811641595815, "grad_norm": 4.807242393493652, "learning_rate": 1.1373546511627907e-05, "loss": 0.202, "step": 3650 }, { "epoch": 2.4035317200784827, "grad_norm": 2.1541972160339355, "learning_rate": 1.1070736434108528e-05, "loss": 0.2248, "step": 3675 }, { "epoch": 2.419882275997384, "grad_norm": 0.06315416097640991, "learning_rate": 1.0767926356589148e-05, "loss": 0.1071, "step": 3700 }, { "epoch": 2.436232831916285, "grad_norm": 0.3411709666252136, "learning_rate": 1.0465116279069768e-05, "loss": 0.073, "step": 3725 }, { "epoch": 2.4525833878351864, "grad_norm": 0.4479942321777344, "learning_rate": 1.0162306201550388e-05, "loss": 0.1289, "step": 3750 }, { "epoch": 2.4689339437540876, "grad_norm": 3.079694986343384, "learning_rate": 9.859496124031007e-06, "loss": 0.1315, "step": 3775 }, { "epoch": 2.485284499672989, "grad_norm": 0.02924235165119171, "learning_rate": 9.556686046511629e-06, "loss": 0.152, "step": 3800 }, { "epoch": 2.50163505559189, "grad_norm": 0.04543924704194069, "learning_rate": 9.253875968992249e-06, "loss": 0.115, "step": 3825 }, { "epoch": 2.5179856115107913, "grad_norm": 13.066108703613281, "learning_rate": 8.951065891472869e-06, "loss": 0.154, "step": 3850 }, { "epoch": 2.5343361674296925, "grad_norm": 0.13062244653701782, "learning_rate": 8.648255813953488e-06, "loss": 0.0953, "step": 3875 }, { "epoch": 2.5506867233485937, "grad_norm": 14.618424415588379, "learning_rate": 8.345445736434108e-06, "loss": 0.1167, "step": 3900 }, { "epoch": 2.5670372792674954, "grad_norm": 4.791504859924316, "learning_rate": 8.04263565891473e-06, "loss": 0.2192, "step": 3925 }, { "epoch": 2.5833878351863966, "grad_norm": 0.2636285126209259, "learning_rate": 7.73982558139535e-06, "loss": 0.1261, "step": 3950 }, { "epoch": 2.599738391105298, "grad_norm": 5.333641529083252, "learning_rate": 7.4370155038759686e-06, "loss": 0.1271, "step": 3975 }, { "epoch": 2.616088947024199, "grad_norm": 5.496675491333008, "learning_rate": 7.13420542635659e-06, "loss": 0.1443, "step": 4000 }, { "epoch": 2.6324395029431003, "grad_norm": 0.13467253744602203, "learning_rate": 6.831395348837209e-06, "loss": 0.061, "step": 4025 }, { "epoch": 2.6487900588620015, "grad_norm": 0.5136555433273315, "learning_rate": 6.5285852713178306e-06, "loss": 0.255, "step": 4050 }, { "epoch": 2.6651406147809027, "grad_norm": 0.19155253469944, "learning_rate": 6.2257751937984495e-06, "loss": 0.1159, "step": 4075 }, { "epoch": 2.681491170699804, "grad_norm": 0.2983386814594269, "learning_rate": 5.92296511627907e-06, "loss": 0.1332, "step": 4100 }, { "epoch": 2.697841726618705, "grad_norm": 1.9400678873062134, "learning_rate": 5.620155038759691e-06, "loss": 0.1556, "step": 4125 }, { "epoch": 2.7141922825376064, "grad_norm": 5.343267440795898, "learning_rate": 5.31734496124031e-06, "loss": 0.1416, "step": 4150 }, { "epoch": 2.7305428384565076, "grad_norm": 6.421015739440918, "learning_rate": 5.0145348837209305e-06, "loss": 0.1812, "step": 4175 }, { "epoch": 2.746893394375409, "grad_norm": 0.15666760504245758, "learning_rate": 4.71172480620155e-06, "loss": 0.1108, "step": 4200 }, { "epoch": 2.76324395029431, "grad_norm": 0.2742402255535126, "learning_rate": 4.408914728682171e-06, "loss": 0.1291, "step": 4225 }, { "epoch": 2.7795945062132112, "grad_norm": 0.05212033912539482, "learning_rate": 4.106104651162791e-06, "loss": 0.0938, "step": 4250 }, { "epoch": 2.7959450621321125, "grad_norm": 0.1697097271680832, "learning_rate": 3.8032945736434107e-06, "loss": 0.0349, "step": 4275 }, { "epoch": 2.8122956180510137, "grad_norm": 1.2600692510604858, "learning_rate": 3.5004844961240313e-06, "loss": 0.0619, "step": 4300 }, { "epoch": 2.828646173969915, "grad_norm": 0.0772242546081543, "learning_rate": 3.1976744186046516e-06, "loss": 0.107, "step": 4325 }, { "epoch": 2.844996729888816, "grad_norm": 0.17465172708034515, "learning_rate": 2.8948643410852714e-06, "loss": 0.0803, "step": 4350 }, { "epoch": 2.8613472858077174, "grad_norm": 11.832673072814941, "learning_rate": 2.5920542635658916e-06, "loss": 0.1322, "step": 4375 }, { "epoch": 2.8776978417266186, "grad_norm": 10.022361755371094, "learning_rate": 2.2892441860465114e-06, "loss": 0.1041, "step": 4400 }, { "epoch": 2.89404839764552, "grad_norm": 5.453954219818115, "learning_rate": 1.986434108527132e-06, "loss": 0.1611, "step": 4425 }, { "epoch": 2.910398953564421, "grad_norm": 0.10524914413690567, "learning_rate": 1.683624031007752e-06, "loss": 0.1443, "step": 4450 }, { "epoch": 2.9267495094833222, "grad_norm": 8.652328491210938, "learning_rate": 1.3808139534883722e-06, "loss": 0.1896, "step": 4475 }, { "epoch": 2.9431000654022235, "grad_norm": 0.2993433177471161, "learning_rate": 1.0780038759689922e-06, "loss": 0.0849, "step": 4500 }, { "epoch": 2.9594506213211247, "grad_norm": 0.6885517239570618, "learning_rate": 7.751937984496125e-07, "loss": 0.1334, "step": 4525 }, { "epoch": 2.975801177240026, "grad_norm": 7.389411926269531, "learning_rate": 4.7238372093023254e-07, "loss": 0.202, "step": 4550 }, { "epoch": 2.992151733158927, "grad_norm": 2.6400258541107178, "learning_rate": 1.6957364341085273e-07, "loss": 0.1566, "step": 4575 }, { "epoch": 3.0, "eval_accuracy": 0.976824034334764, "eval_auc": 0.9978146585977138, "eval_f1": 0.9841642228739003, "eval_loss": 0.08583971858024597, "eval_precision": 0.9940758293838863, "eval_recall": 0.9744483159117305, "eval_runtime": 5.747, "eval_samples_per_second": 202.716, "eval_steps_per_second": 12.702, "step": 4587 } ], "logging_steps": 25, "max_steps": 4587, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.8429486229905367e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }