bclavie's picture
Upload folder using huggingface_hub
9bb7a05 verified
{
"best_metric": 0.5487671335091459,
"best_model_checkpoint": "/mnt/data/bert24/fineweb_edu/checkpoints/5e-5_one_label/checkpoint-13149",
"epoch": 3.0,
"eval_steps": 1000,
"global_step": 13149,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.022815423226100844,
"grad_norm": 3.930772542953491,
"learning_rate": 1.1407711613050422e-06,
"loss": 0.8134,
"step": 100
},
{
"epoch": 0.04563084645220169,
"grad_norm": 3.383699655532837,
"learning_rate": 2.2815423226100845e-06,
"loss": 0.4827,
"step": 200
},
{
"epoch": 0.06844626967830253,
"grad_norm": 13.854891777038574,
"learning_rate": 3.4223134839151265e-06,
"loss": 0.386,
"step": 300
},
{
"epoch": 0.09126169290440338,
"grad_norm": 4.105241775512695,
"learning_rate": 4.563084645220169e-06,
"loss": 0.3565,
"step": 400
},
{
"epoch": 0.11407711613050422,
"grad_norm": 3.2210636138916016,
"learning_rate": 5.703855806525212e-06,
"loss": 0.3272,
"step": 500
},
{
"epoch": 0.13689253935660506,
"grad_norm": 4.857998847961426,
"learning_rate": 6.844626967830253e-06,
"loss": 0.3297,
"step": 600
},
{
"epoch": 0.15970796258270592,
"grad_norm": 2.4987571239471436,
"learning_rate": 7.985398129135297e-06,
"loss": 0.3093,
"step": 700
},
{
"epoch": 0.18252338580880675,
"grad_norm": 6.888661861419678,
"learning_rate": 9.126169290440338e-06,
"loss": 0.3108,
"step": 800
},
{
"epoch": 0.2053388090349076,
"grad_norm": 4.0510358810424805,
"learning_rate": 1.0266940451745379e-05,
"loss": 0.318,
"step": 900
},
{
"epoch": 0.22815423226100845,
"grad_norm": 7.22244119644165,
"learning_rate": 1.1407711613050424e-05,
"loss": 0.293,
"step": 1000
},
{
"epoch": 0.2509696554871093,
"grad_norm": 7.43681526184082,
"learning_rate": 1.2548482774355466e-05,
"loss": 0.2853,
"step": 1100
},
{
"epoch": 0.2737850787132101,
"grad_norm": 3.81840443611145,
"learning_rate": 1.3689253935660506e-05,
"loss": 0.2784,
"step": 1200
},
{
"epoch": 0.296600501939311,
"grad_norm": 8.012810707092285,
"learning_rate": 1.4830025096965549e-05,
"loss": 0.2698,
"step": 1300
},
{
"epoch": 0.31941592516541184,
"grad_norm": 4.390589714050293,
"learning_rate": 1.5970796258270593e-05,
"loss": 0.2653,
"step": 1400
},
{
"epoch": 0.34223134839151265,
"grad_norm": 9.778414726257324,
"learning_rate": 1.7111567419575633e-05,
"loss": 0.2666,
"step": 1500
},
{
"epoch": 0.3650467716176135,
"grad_norm": 3.7897677421569824,
"learning_rate": 1.8252338580880676e-05,
"loss": 0.259,
"step": 1600
},
{
"epoch": 0.38786219484371437,
"grad_norm": 3.3615636825561523,
"learning_rate": 1.939310974218572e-05,
"loss": 0.2522,
"step": 1700
},
{
"epoch": 0.4106776180698152,
"grad_norm": 5.232314109802246,
"learning_rate": 2.0533880903490758e-05,
"loss": 0.2547,
"step": 1800
},
{
"epoch": 0.43349304129591604,
"grad_norm": 3.6110997200012207,
"learning_rate": 2.1674652064795804e-05,
"loss": 0.2594,
"step": 1900
},
{
"epoch": 0.4563084645220169,
"grad_norm": 2.4445459842681885,
"learning_rate": 2.2815423226100847e-05,
"loss": 0.2537,
"step": 2000
},
{
"epoch": 0.4791238877481177,
"grad_norm": 3.6951136589050293,
"learning_rate": 2.3956194387405887e-05,
"loss": 0.2518,
"step": 2100
},
{
"epoch": 0.5019393109742186,
"grad_norm": 3.9381537437438965,
"learning_rate": 2.5096965548710933e-05,
"loss": 0.2516,
"step": 2200
},
{
"epoch": 0.5247547342003194,
"grad_norm": 1.5301635265350342,
"learning_rate": 2.623773671001597e-05,
"loss": 0.2435,
"step": 2300
},
{
"epoch": 0.5475701574264202,
"grad_norm": 4.029539585113525,
"learning_rate": 2.7378507871321012e-05,
"loss": 0.2467,
"step": 2400
},
{
"epoch": 0.5703855806525211,
"grad_norm": 5.817471027374268,
"learning_rate": 2.8519279032626055e-05,
"loss": 0.2452,
"step": 2500
},
{
"epoch": 0.593201003878622,
"grad_norm": 3.2799909114837646,
"learning_rate": 2.9660050193931098e-05,
"loss": 0.2425,
"step": 2600
},
{
"epoch": 0.6160164271047228,
"grad_norm": 1.8384263515472412,
"learning_rate": 3.080082135523614e-05,
"loss": 0.2371,
"step": 2700
},
{
"epoch": 0.6388318503308237,
"grad_norm": 2.4290475845336914,
"learning_rate": 3.194159251654119e-05,
"loss": 0.2484,
"step": 2800
},
{
"epoch": 0.6616472735569244,
"grad_norm": 1.5778276920318604,
"learning_rate": 3.3082363677846226e-05,
"loss": 0.2328,
"step": 2900
},
{
"epoch": 0.6844626967830253,
"grad_norm": 2.872375249862671,
"learning_rate": 3.4223134839151266e-05,
"loss": 0.2546,
"step": 3000
},
{
"epoch": 0.7072781200091262,
"grad_norm": 1.8962109088897705,
"learning_rate": 3.536390600045631e-05,
"loss": 0.236,
"step": 3100
},
{
"epoch": 0.730093543235227,
"grad_norm": 2.156942129135132,
"learning_rate": 3.650467716176135e-05,
"loss": 0.2366,
"step": 3200
},
{
"epoch": 0.7529089664613279,
"grad_norm": 2.4454355239868164,
"learning_rate": 3.76454483230664e-05,
"loss": 0.2304,
"step": 3300
},
{
"epoch": 0.7757243896874287,
"grad_norm": 4.029500961303711,
"learning_rate": 3.878621948437144e-05,
"loss": 0.2353,
"step": 3400
},
{
"epoch": 0.7985398129135296,
"grad_norm": 1.7412891387939453,
"learning_rate": 3.9926990645676483e-05,
"loss": 0.2314,
"step": 3500
},
{
"epoch": 0.8213552361396304,
"grad_norm": 2.626089572906494,
"learning_rate": 4.1067761806981516e-05,
"loss": 0.2327,
"step": 3600
},
{
"epoch": 0.8441706593657312,
"grad_norm": 1.2162595987319946,
"learning_rate": 4.220853296828656e-05,
"loss": 0.2367,
"step": 3700
},
{
"epoch": 0.8669860825918321,
"grad_norm": 2.4519355297088623,
"learning_rate": 4.334930412959161e-05,
"loss": 0.2269,
"step": 3800
},
{
"epoch": 0.8898015058179329,
"grad_norm": 1.2700366973876953,
"learning_rate": 4.449007529089665e-05,
"loss": 0.2334,
"step": 3900
},
{
"epoch": 0.9126169290440338,
"grad_norm": 2.6412575244903564,
"learning_rate": 4.5630846452201694e-05,
"loss": 0.2315,
"step": 4000
},
{
"epoch": 0.9354323522701347,
"grad_norm": 1.5039851665496826,
"learning_rate": 4.6771617613506734e-05,
"loss": 0.2335,
"step": 4100
},
{
"epoch": 0.9582477754962354,
"grad_norm": 5.099658012390137,
"learning_rate": 4.791238877481177e-05,
"loss": 0.2341,
"step": 4200
},
{
"epoch": 0.9810631987223363,
"grad_norm": 3.0738842487335205,
"learning_rate": 4.905315993611681e-05,
"loss": 0.2375,
"step": 4300
},
{
"epoch": 1.0,
"eval_accuracy": 0.7342489784566673,
"eval_f1_macro": 0.49721066997853086,
"eval_loss": 0.2188662439584732,
"eval_precision": 0.6009352545578245,
"eval_recall": 0.4565070202147285,
"eval_runtime": 19.3025,
"eval_samples_per_second": 2421.601,
"eval_steps_per_second": 3.16,
"step": 4383
},
{
"epoch": 1.0038786219484372,
"grad_norm": 0.8934878706932068,
"learning_rate": 4.997845210028646e-05,
"loss": 0.2254,
"step": 4400
},
{
"epoch": 1.0266940451745379,
"grad_norm": 2.297649621963501,
"learning_rate": 4.985169974903035e-05,
"loss": 0.2097,
"step": 4500
},
{
"epoch": 1.0495094684006387,
"grad_norm": 1.5094428062438965,
"learning_rate": 4.972494739777423e-05,
"loss": 0.208,
"step": 4600
},
{
"epoch": 1.0723248916267396,
"grad_norm": 1.9187654256820679,
"learning_rate": 4.9598195046518116e-05,
"loss": 0.2003,
"step": 4700
},
{
"epoch": 1.0951403148528405,
"grad_norm": 0.9724093079566956,
"learning_rate": 4.9471442695262e-05,
"loss": 0.1976,
"step": 4800
},
{
"epoch": 1.1179557380789413,
"grad_norm": 1.6662517786026,
"learning_rate": 4.9344690344005885e-05,
"loss": 0.1982,
"step": 4900
},
{
"epoch": 1.1407711613050422,
"grad_norm": 0.7583943009376526,
"learning_rate": 4.921793799274977e-05,
"loss": 0.2014,
"step": 5000
},
{
"epoch": 1.163586584531143,
"grad_norm": 3.5475401878356934,
"learning_rate": 4.9091185641493654e-05,
"loss": 0.1982,
"step": 5100
},
{
"epoch": 1.186402007757244,
"grad_norm": 1.741073727607727,
"learning_rate": 4.896443329023754e-05,
"loss": 0.2044,
"step": 5200
},
{
"epoch": 1.2092174309833448,
"grad_norm": 1.7129486799240112,
"learning_rate": 4.883768093898142e-05,
"loss": 0.2039,
"step": 5300
},
{
"epoch": 1.2320328542094456,
"grad_norm": 1.3080261945724487,
"learning_rate": 4.87109285877253e-05,
"loss": 0.1997,
"step": 5400
},
{
"epoch": 1.2548482774355465,
"grad_norm": 2.1321465969085693,
"learning_rate": 4.8584176236469186e-05,
"loss": 0.2013,
"step": 5500
},
{
"epoch": 1.2776637006616474,
"grad_norm": 1.2857780456542969,
"learning_rate": 4.845742388521307e-05,
"loss": 0.2038,
"step": 5600
},
{
"epoch": 1.3004791238877482,
"grad_norm": 2.0227644443511963,
"learning_rate": 4.8330671533956955e-05,
"loss": 0.1952,
"step": 5700
},
{
"epoch": 1.323294547113849,
"grad_norm": 1.627146601676941,
"learning_rate": 4.820391918270084e-05,
"loss": 0.1971,
"step": 5800
},
{
"epoch": 1.3461099703399497,
"grad_norm": 1.5856671333312988,
"learning_rate": 4.8077166831444724e-05,
"loss": 0.1966,
"step": 5900
},
{
"epoch": 1.3689253935660506,
"grad_norm": 0.9896683692932129,
"learning_rate": 4.795041448018861e-05,
"loss": 0.1949,
"step": 6000
},
{
"epoch": 1.3917408167921514,
"grad_norm": 1.0868607759475708,
"learning_rate": 4.782366212893249e-05,
"loss": 0.1936,
"step": 6100
},
{
"epoch": 1.4145562400182523,
"grad_norm": 1.2213613986968994,
"learning_rate": 4.769690977767638e-05,
"loss": 0.1951,
"step": 6200
},
{
"epoch": 1.4373716632443532,
"grad_norm": 2.0876383781433105,
"learning_rate": 4.757015742642026e-05,
"loss": 0.1956,
"step": 6300
},
{
"epoch": 1.460187086470454,
"grad_norm": 3.948638916015625,
"learning_rate": 4.7443405075164146e-05,
"loss": 0.1999,
"step": 6400
},
{
"epoch": 1.483002509696555,
"grad_norm": 2.2048580646514893,
"learning_rate": 4.731665272390803e-05,
"loss": 0.1984,
"step": 6500
},
{
"epoch": 1.5058179329226558,
"grad_norm": 1.1835227012634277,
"learning_rate": 4.7189900372651915e-05,
"loss": 0.1936,
"step": 6600
},
{
"epoch": 1.5286333561487564,
"grad_norm": 1.2532591819763184,
"learning_rate": 4.70631480213958e-05,
"loss": 0.2015,
"step": 6700
},
{
"epoch": 1.5514487793748573,
"grad_norm": 1.1021713018417358,
"learning_rate": 4.6936395670139684e-05,
"loss": 0.1946,
"step": 6800
},
{
"epoch": 1.5742642026009581,
"grad_norm": 0.7517445087432861,
"learning_rate": 4.680964331888357e-05,
"loss": 0.2024,
"step": 6900
},
{
"epoch": 1.597079625827059,
"grad_norm": 1.0925252437591553,
"learning_rate": 4.668289096762745e-05,
"loss": 0.2004,
"step": 7000
},
{
"epoch": 1.6198950490531598,
"grad_norm": 1.2946810722351074,
"learning_rate": 4.655613861637134e-05,
"loss": 0.1941,
"step": 7100
},
{
"epoch": 1.6427104722792607,
"grad_norm": 1.1611590385437012,
"learning_rate": 4.642938626511522e-05,
"loss": 0.1987,
"step": 7200
},
{
"epoch": 1.6655258955053616,
"grad_norm": 0.7706720232963562,
"learning_rate": 4.6302633913859107e-05,
"loss": 0.1977,
"step": 7300
},
{
"epoch": 1.6883413187314624,
"grad_norm": 2.32635760307312,
"learning_rate": 4.617588156260299e-05,
"loss": 0.2002,
"step": 7400
},
{
"epoch": 1.7111567419575633,
"grad_norm": 2.003863573074341,
"learning_rate": 4.6049129211346876e-05,
"loss": 0.1981,
"step": 7500
},
{
"epoch": 1.7339721651836641,
"grad_norm": 0.9495589137077332,
"learning_rate": 4.592237686009075e-05,
"loss": 0.1969,
"step": 7600
},
{
"epoch": 1.756787588409765,
"grad_norm": 1.1177550554275513,
"learning_rate": 4.579562450883464e-05,
"loss": 0.196,
"step": 7700
},
{
"epoch": 1.7796030116358659,
"grad_norm": 2.062082290649414,
"learning_rate": 4.566887215757852e-05,
"loss": 0.1979,
"step": 7800
},
{
"epoch": 1.8024184348619667,
"grad_norm": 1.265002965927124,
"learning_rate": 4.554211980632241e-05,
"loss": 0.1911,
"step": 7900
},
{
"epoch": 1.8252338580880676,
"grad_norm": 1.4993034601211548,
"learning_rate": 4.541536745506629e-05,
"loss": 0.1917,
"step": 8000
},
{
"epoch": 1.8480492813141685,
"grad_norm": 0.7223235964775085,
"learning_rate": 4.5288615103810176e-05,
"loss": 0.1887,
"step": 8100
},
{
"epoch": 1.8708647045402693,
"grad_norm": 2.019066095352173,
"learning_rate": 4.516186275255406e-05,
"loss": 0.1963,
"step": 8200
},
{
"epoch": 1.8936801277663702,
"grad_norm": 1.8746905326843262,
"learning_rate": 4.5035110401297945e-05,
"loss": 0.191,
"step": 8300
},
{
"epoch": 1.916495550992471,
"grad_norm": 1.852623701095581,
"learning_rate": 4.490835805004183e-05,
"loss": 0.1923,
"step": 8400
},
{
"epoch": 1.939310974218572,
"grad_norm": 1.784754753112793,
"learning_rate": 4.4781605698785714e-05,
"loss": 0.1931,
"step": 8500
},
{
"epoch": 1.9621263974446728,
"grad_norm": 1.1549110412597656,
"learning_rate": 4.46548533475296e-05,
"loss": 0.196,
"step": 8600
},
{
"epoch": 1.9849418206707734,
"grad_norm": 1.884442925453186,
"learning_rate": 4.452810099627348e-05,
"loss": 0.1864,
"step": 8700
},
{
"epoch": 2.0,
"eval_accuracy": 0.7550221423528657,
"eval_f1_macro": 0.46991506497740926,
"eval_loss": 0.2009369283914566,
"eval_precision": 0.6326650393349825,
"eval_recall": 0.44929900989141963,
"eval_runtime": 18.7453,
"eval_samples_per_second": 2493.578,
"eval_steps_per_second": 3.254,
"step": 8766
},
{
"epoch": 2.0077572438968745,
"grad_norm": 0.6890222430229187,
"learning_rate": 4.440134864501737e-05,
"loss": 0.1749,
"step": 8800
},
{
"epoch": 2.0305726671229753,
"grad_norm": 0.682909369468689,
"learning_rate": 4.427459629376125e-05,
"loss": 0.1359,
"step": 8900
},
{
"epoch": 2.0533880903490758,
"grad_norm": 0.7629422545433044,
"learning_rate": 4.4147843942505136e-05,
"loss": 0.1367,
"step": 9000
},
{
"epoch": 2.0762035135751766,
"grad_norm": 0.9653803110122681,
"learning_rate": 4.402109159124902e-05,
"loss": 0.1358,
"step": 9100
},
{
"epoch": 2.0990189368012775,
"grad_norm": 1.0762972831726074,
"learning_rate": 4.3894339239992905e-05,
"loss": 0.136,
"step": 9200
},
{
"epoch": 2.1218343600273784,
"grad_norm": 0.9004037976264954,
"learning_rate": 4.376758688873679e-05,
"loss": 0.1287,
"step": 9300
},
{
"epoch": 2.144649783253479,
"grad_norm": 1.2514455318450928,
"learning_rate": 4.3640834537480674e-05,
"loss": 0.1296,
"step": 9400
},
{
"epoch": 2.16746520647958,
"grad_norm": 0.9992089867591858,
"learning_rate": 4.351408218622456e-05,
"loss": 0.1343,
"step": 9500
},
{
"epoch": 2.190280629705681,
"grad_norm": 1.090753197669983,
"learning_rate": 4.338732983496844e-05,
"loss": 0.1314,
"step": 9600
},
{
"epoch": 2.213096052931782,
"grad_norm": 1.0993868112564087,
"learning_rate": 4.326057748371232e-05,
"loss": 0.1294,
"step": 9700
},
{
"epoch": 2.2359114761578827,
"grad_norm": 7.720792770385742,
"learning_rate": 4.3133825132456206e-05,
"loss": 0.1311,
"step": 9800
},
{
"epoch": 2.2587268993839835,
"grad_norm": 0.968745768070221,
"learning_rate": 4.300707278120009e-05,
"loss": 0.1339,
"step": 9900
},
{
"epoch": 2.2815423226100844,
"grad_norm": 1.4330673217773438,
"learning_rate": 4.2880320429943975e-05,
"loss": 0.1354,
"step": 10000
},
{
"epoch": 2.3043577458361852,
"grad_norm": 1.1297446489334106,
"learning_rate": 4.275356807868786e-05,
"loss": 0.1343,
"step": 10100
},
{
"epoch": 2.327173169062286,
"grad_norm": 1.1260323524475098,
"learning_rate": 4.2626815727431744e-05,
"loss": 0.1324,
"step": 10200
},
{
"epoch": 2.349988592288387,
"grad_norm": 1.0184403657913208,
"learning_rate": 4.250006337617563e-05,
"loss": 0.13,
"step": 10300
},
{
"epoch": 2.372804015514488,
"grad_norm": 3.17570161819458,
"learning_rate": 4.237331102491951e-05,
"loss": 0.1316,
"step": 10400
},
{
"epoch": 2.3956194387405887,
"grad_norm": 0.9891201257705688,
"learning_rate": 4.22465586736634e-05,
"loss": 0.133,
"step": 10500
},
{
"epoch": 2.4184348619666896,
"grad_norm": 1.0855801105499268,
"learning_rate": 4.211980632240728e-05,
"loss": 0.1362,
"step": 10600
},
{
"epoch": 2.4412502851927904,
"grad_norm": 1.94706392288208,
"learning_rate": 4.199305397115117e-05,
"loss": 0.1275,
"step": 10700
},
{
"epoch": 2.4640657084188913,
"grad_norm": 1.3414404392242432,
"learning_rate": 4.186630161989505e-05,
"loss": 0.1332,
"step": 10800
},
{
"epoch": 2.486881131644992,
"grad_norm": 1.5227530002593994,
"learning_rate": 4.1739549268638935e-05,
"loss": 0.1313,
"step": 10900
},
{
"epoch": 2.509696554871093,
"grad_norm": 1.4501299858093262,
"learning_rate": 4.161279691738282e-05,
"loss": 0.1368,
"step": 11000
},
{
"epoch": 2.532511978097194,
"grad_norm": 1.6044068336486816,
"learning_rate": 4.1486044566126704e-05,
"loss": 0.1341,
"step": 11100
},
{
"epoch": 2.5553274013232947,
"grad_norm": 1.1524112224578857,
"learning_rate": 4.135929221487059e-05,
"loss": 0.134,
"step": 11200
},
{
"epoch": 2.5781428245493956,
"grad_norm": 2.4395620822906494,
"learning_rate": 4.123253986361447e-05,
"loss": 0.135,
"step": 11300
},
{
"epoch": 2.6009582477754964,
"grad_norm": 1.3260228633880615,
"learning_rate": 4.110578751235836e-05,
"loss": 0.1322,
"step": 11400
},
{
"epoch": 2.6237736710015973,
"grad_norm": 1.1100713014602661,
"learning_rate": 4.097903516110224e-05,
"loss": 0.1328,
"step": 11500
},
{
"epoch": 2.646589094227698,
"grad_norm": 0.9172728061676025,
"learning_rate": 4.0852282809846127e-05,
"loss": 0.1286,
"step": 11600
},
{
"epoch": 2.669404517453799,
"grad_norm": 1.2171121835708618,
"learning_rate": 4.072553045859001e-05,
"loss": 0.1337,
"step": 11700
},
{
"epoch": 2.6922199406798994,
"grad_norm": 1.0674031972885132,
"learning_rate": 4.0598778107333896e-05,
"loss": 0.1324,
"step": 11800
},
{
"epoch": 2.7150353639060003,
"grad_norm": 1.2550758123397827,
"learning_rate": 4.047202575607777e-05,
"loss": 0.1342,
"step": 11900
},
{
"epoch": 2.737850787132101,
"grad_norm": 0.9326576590538025,
"learning_rate": 4.034527340482166e-05,
"loss": 0.1313,
"step": 12000
},
{
"epoch": 2.760666210358202,
"grad_norm": 0.8683038949966431,
"learning_rate": 4.021852105356554e-05,
"loss": 0.1352,
"step": 12100
},
{
"epoch": 2.783481633584303,
"grad_norm": 1.100682020187378,
"learning_rate": 4.009176870230943e-05,
"loss": 0.1332,
"step": 12200
},
{
"epoch": 2.8062970568104038,
"grad_norm": 1.1301831007003784,
"learning_rate": 3.996501635105331e-05,
"loss": 0.1308,
"step": 12300
},
{
"epoch": 2.8291124800365046,
"grad_norm": 1.2631456851959229,
"learning_rate": 3.9838263999797196e-05,
"loss": 0.1324,
"step": 12400
},
{
"epoch": 2.8519279032626055,
"grad_norm": 1.0373079776763916,
"learning_rate": 3.971151164854108e-05,
"loss": 0.1311,
"step": 12500
},
{
"epoch": 2.8747433264887063,
"grad_norm": 1.5447543859481812,
"learning_rate": 3.9584759297284965e-05,
"loss": 0.133,
"step": 12600
},
{
"epoch": 2.897558749714807,
"grad_norm": 1.0701638460159302,
"learning_rate": 3.945800694602885e-05,
"loss": 0.1313,
"step": 12700
},
{
"epoch": 2.920374172940908,
"grad_norm": 1.3061041831970215,
"learning_rate": 3.933125459477274e-05,
"loss": 0.1355,
"step": 12800
},
{
"epoch": 2.943189596167009,
"grad_norm": 1.4337469339370728,
"learning_rate": 3.920450224351662e-05,
"loss": 0.134,
"step": 12900
},
{
"epoch": 2.96600501939311,
"grad_norm": 1.2338460683822632,
"learning_rate": 3.90777498922605e-05,
"loss": 0.1334,
"step": 13000
},
{
"epoch": 2.9888204426192106,
"grad_norm": 1.1828727722167969,
"learning_rate": 3.895099754100439e-05,
"loss": 0.1374,
"step": 13100
},
{
"epoch": 3.0,
"eval_accuracy": 0.7612476734484308,
"eval_f1_macro": 0.5487671335091459,
"eval_loss": 0.19929102063179016,
"eval_precision": 0.6182830803008168,
"eval_recall": 0.5145348157777688,
"eval_runtime": 18.7521,
"eval_samples_per_second": 2492.68,
"eval_steps_per_second": 3.253,
"step": 13149
}
],
"logging_steps": 100,
"max_steps": 43830,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.58047685121278e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}