ap-normistral-7b-align-scan / trainer_state.json
hugodk-sch's picture
Model save
bca549b verified
raw
history blame
22.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 385,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 1312.0,
"learning_rate": 1.282051282051282e-07,
"logits/chosen": 88.18099975585938,
"logits/rejected": 88.25153350830078,
"logps/chosen": -29.073104858398438,
"logps/rejected": -26.25731658935547,
"loss": 25.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"grad_norm": 1168.0,
"learning_rate": 1.282051282051282e-06,
"logits/chosen": 81.08999633789062,
"logits/rejected": 80.79169464111328,
"logps/chosen": -34.18925094604492,
"logps/rejected": -33.03681945800781,
"loss": 24.3423,
"rewards/accuracies": 0.4722222089767456,
"rewards/chosen": 0.005247864406555891,
"rewards/margins": 0.0130887096747756,
"rewards/rejected": -0.007840845733880997,
"step": 10
},
{
"epoch": 0.05,
"grad_norm": 1032.0,
"learning_rate": 2.564102564102564e-06,
"logits/chosen": 80.66268157958984,
"logits/rejected": 80.54837799072266,
"logps/chosen": -33.62754821777344,
"logps/rejected": -30.786510467529297,
"loss": 25.2801,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": 0.006235760636627674,
"rewards/margins": 0.007138081826269627,
"rewards/rejected": -0.0009023217717185616,
"step": 20
},
{
"epoch": 0.08,
"grad_norm": 1200.0,
"learning_rate": 3.846153846153847e-06,
"logits/chosen": 82.51216888427734,
"logits/rejected": 82.54035949707031,
"logps/chosen": -33.695411682128906,
"logps/rejected": -31.29660415649414,
"loss": 24.6437,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": 0.03174129128456116,
"rewards/margins": 0.01901828870177269,
"rewards/rejected": 0.012723001651465893,
"step": 30
},
{
"epoch": 0.1,
"grad_norm": 1144.0,
"learning_rate": 4.999896948438434e-06,
"logits/chosen": 81.04322814941406,
"logits/rejected": 81.03800964355469,
"logps/chosen": -32.80583572387695,
"logps/rejected": -33.2043571472168,
"loss": 24.7349,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.03682267293334007,
"rewards/margins": 0.020405994728207588,
"rewards/rejected": 0.016416678205132484,
"step": 40
},
{
"epoch": 0.13,
"grad_norm": 948.0,
"learning_rate": 4.987541037542187e-06,
"logits/chosen": 78.70301055908203,
"logits/rejected": 78.71062469482422,
"logps/chosen": -30.72637367248535,
"logps/rejected": -30.724206924438477,
"loss": 26.36,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.04788754880428314,
"rewards/margins": 0.012979410588741302,
"rewards/rejected": 0.03490813449025154,
"step": 50
},
{
"epoch": 0.16,
"grad_norm": 968.0,
"learning_rate": 4.954691471941119e-06,
"logits/chosen": 83.19561767578125,
"logits/rejected": 83.24873352050781,
"logps/chosen": -30.971267700195312,
"logps/rejected": -29.551761627197266,
"loss": 25.0468,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.020389318466186523,
"rewards/margins": 0.016513368114829063,
"rewards/rejected": 0.0038759508170187473,
"step": 60
},
{
"epoch": 0.18,
"grad_norm": 1552.0,
"learning_rate": 4.901618883413549e-06,
"logits/chosen": 83.78221130371094,
"logits/rejected": 83.81100463867188,
"logps/chosen": -30.479488372802734,
"logps/rejected": -33.11530303955078,
"loss": 24.2453,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": 0.01495352853089571,
"rewards/margins": 0.02271350473165512,
"rewards/rejected": -0.007759974803775549,
"step": 70
},
{
"epoch": 0.21,
"grad_norm": 1104.0,
"learning_rate": 4.828760511501322e-06,
"logits/chosen": 81.36155700683594,
"logits/rejected": 81.35380554199219,
"logps/chosen": -31.37007713317871,
"logps/rejected": -30.98931884765625,
"loss": 23.0324,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.013486603274941444,
"rewards/margins": 0.03665385767817497,
"rewards/rejected": -0.023167254403233528,
"step": 80
},
{
"epoch": 0.23,
"grad_norm": 1120.0,
"learning_rate": 4.7367166013034295e-06,
"logits/chosen": 78.11141967773438,
"logits/rejected": 78.0789794921875,
"logps/chosen": -32.44235610961914,
"logps/rejected": -31.213552474975586,
"loss": 22.8826,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.01958216354250908,
"rewards/margins": 0.0457901768386364,
"rewards/rejected": -0.02620801329612732,
"step": 90
},
{
"epoch": 0.26,
"grad_norm": 1056.0,
"learning_rate": 4.626245458345211e-06,
"logits/chosen": 83.38008117675781,
"logits/rejected": 83.412841796875,
"logps/chosen": -34.02827072143555,
"logps/rejected": -31.858572006225586,
"loss": 23.2882,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.027672046795487404,
"rewards/margins": 0.038474611937999725,
"rewards/rejected": -0.010802562348544598,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": 98.75923156738281,
"eval_logits/rejected": 98.74642181396484,
"eval_logps/chosen": -32.47222137451172,
"eval_logps/rejected": -36.06691360473633,
"eval_loss": 25.671157836914062,
"eval_rewards/accuracies": 0.5228405594825745,
"eval_rewards/chosen": -0.0029044141992926598,
"eval_rewards/margins": 0.0071340943686664104,
"eval_rewards/rejected": -0.010038508102297783,
"eval_runtime": 104.2457,
"eval_samples_per_second": 3.29,
"eval_steps_per_second": 0.412,
"step": 100
},
{
"epoch": 0.29,
"grad_norm": 1528.0,
"learning_rate": 4.498257201263691e-06,
"logits/chosen": 83.55766296386719,
"logits/rejected": 83.4551010131836,
"logps/chosen": -32.478965759277344,
"logps/rejected": -32.80836486816406,
"loss": 21.9052,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.05438382551074028,
"rewards/margins": 0.06963126361370087,
"rewards/rejected": -0.015247439965605736,
"step": 110
},
{
"epoch": 0.31,
"grad_norm": 1504.0,
"learning_rate": 4.353806263777678e-06,
"logits/chosen": 83.767822265625,
"logits/rejected": 83.86802673339844,
"logps/chosen": -28.277118682861328,
"logps/rejected": -35.63311004638672,
"loss": 20.22,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.06524648517370224,
"rewards/margins": 0.08264312148094177,
"rewards/rejected": -0.01739663816988468,
"step": 120
},
{
"epoch": 0.34,
"grad_norm": 956.0,
"learning_rate": 4.1940827077152755e-06,
"logits/chosen": 80.87911224365234,
"logits/rejected": 80.89720153808594,
"logps/chosen": -30.450307846069336,
"logps/rejected": -32.20357131958008,
"loss": 20.9442,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.04570445418357849,
"rewards/margins": 0.07289845496416092,
"rewards/rejected": -0.02719399705529213,
"step": 130
},
{
"epoch": 0.36,
"grad_norm": 740.0,
"learning_rate": 4.0204024186666215e-06,
"logits/chosen": 82.11528778076172,
"logits/rejected": 82.12232971191406,
"logps/chosen": -27.089996337890625,
"logps/rejected": -33.013877868652344,
"loss": 20.3465,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.035732947289943695,
"rewards/margins": 0.09478302299976349,
"rewards/rejected": -0.059050071984529495,
"step": 140
},
{
"epoch": 0.39,
"grad_norm": 948.0,
"learning_rate": 3.834196265035119e-06,
"logits/chosen": 80.63235473632812,
"logits/rejected": 80.60447692871094,
"logps/chosen": -28.92165756225586,
"logps/rejected": -33.14097213745117,
"loss": 18.7455,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.04674536734819412,
"rewards/margins": 0.10419263690710068,
"rewards/rejected": -0.05744727700948715,
"step": 150
},
{
"epoch": 0.42,
"grad_norm": 1664.0,
"learning_rate": 3.636998309800573e-06,
"logits/chosen": 82.47834777832031,
"logits/rejected": 82.48927307128906,
"logps/chosen": -33.57959747314453,
"logps/rejected": -30.44466209411621,
"loss": 21.1798,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.055715084075927734,
"rewards/margins": 0.10492750257253647,
"rewards/rejected": -0.049212418496608734,
"step": 160
},
{
"epoch": 0.44,
"grad_norm": 1152.0,
"learning_rate": 3.4304331721118078e-06,
"logits/chosen": 83.31620788574219,
"logits/rejected": 83.2592544555664,
"logps/chosen": -30.965845108032227,
"logps/rejected": -32.591552734375,
"loss": 21.522,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.02934456244111061,
"rewards/margins": 0.08649395406246185,
"rewards/rejected": -0.057149387896060944,
"step": 170
},
{
"epoch": 0.47,
"grad_norm": 896.0,
"learning_rate": 3.2162026428305436e-06,
"logits/chosen": 80.9317626953125,
"logits/rejected": 80.91111755371094,
"logps/chosen": -30.60970687866211,
"logps/rejected": -31.63262939453125,
"loss": 18.9358,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.058744143694639206,
"rewards/margins": 0.10628656297922134,
"rewards/rejected": -0.04754243046045303,
"step": 180
},
{
"epoch": 0.49,
"grad_norm": 620.0,
"learning_rate": 2.996071664294641e-06,
"logits/chosen": 82.61732482910156,
"logits/rejected": 82.6050033569336,
"logps/chosen": -30.360843658447266,
"logps/rejected": -30.71734619140625,
"loss": 23.6279,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.04055742174386978,
"rewards/margins": 0.06548986583948135,
"rewards/rejected": -0.024932442232966423,
"step": 190
},
{
"epoch": 0.52,
"grad_norm": 700.0,
"learning_rate": 2.7718537898066833e-06,
"logits/chosen": 78.06134033203125,
"logits/rejected": 78.00863647460938,
"logps/chosen": -33.79378890991211,
"logps/rejected": -32.66465377807617,
"loss": 20.2659,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.09589491784572601,
"rewards/margins": 0.1130196675658226,
"rewards/rejected": -0.017124753445386887,
"step": 200
},
{
"epoch": 0.52,
"eval_logits/chosen": 98.65145874023438,
"eval_logits/rejected": 98.6263427734375,
"eval_logps/chosen": -32.57748031616211,
"eval_logps/rejected": -36.30455017089844,
"eval_loss": 24.828882217407227,
"eval_rewards/accuracies": 0.5340532064437866,
"eval_rewards/chosen": -0.013430174440145493,
"eval_rewards/margins": 0.020371900871396065,
"eval_rewards/rejected": -0.03380206972360611,
"eval_runtime": 104.0575,
"eval_samples_per_second": 3.296,
"eval_steps_per_second": 0.413,
"step": 200
},
{
"epoch": 0.55,
"grad_norm": 1320.0,
"learning_rate": 2.5453962426402006e-06,
"logits/chosen": 80.6713638305664,
"logits/rejected": 80.57633972167969,
"logps/chosen": -33.16452407836914,
"logps/rejected": -35.397491455078125,
"loss": 18.2607,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.07811959832906723,
"rewards/margins": 0.11925957351922989,
"rewards/rejected": -0.04113996401429176,
"step": 210
},
{
"epoch": 0.57,
"grad_norm": 652.0,
"learning_rate": 2.3185646976551794e-06,
"logits/chosen": 82.79257202148438,
"logits/rejected": 82.88256072998047,
"logps/chosen": -30.9622859954834,
"logps/rejected": -31.26416015625,
"loss": 16.5714,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.09866676479578018,
"rewards/margins": 0.15218719840049744,
"rewards/rejected": -0.053520433604717255,
"step": 220
},
{
"epoch": 0.6,
"grad_norm": 1144.0,
"learning_rate": 2.0932279108998323e-06,
"logits/chosen": 79.95256042480469,
"logits/rejected": 80.00727844238281,
"logps/chosen": -32.26613235473633,
"logps/rejected": -34.29724884033203,
"loss": 21.5779,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.05396091938018799,
"rewards/margins": 0.08247107267379761,
"rewards/rejected": -0.028510143980383873,
"step": 230
},
{
"epoch": 0.62,
"grad_norm": 856.0,
"learning_rate": 1.8712423238279358e-06,
"logits/chosen": 82.29564666748047,
"logits/rejected": 82.59175109863281,
"logps/chosen": -30.715564727783203,
"logps/rejected": -31.892749786376953,
"loss": 16.3789,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.0972500815987587,
"rewards/margins": 0.13332059979438782,
"rewards/rejected": -0.03607049956917763,
"step": 240
},
{
"epoch": 0.65,
"grad_norm": 1000.0,
"learning_rate": 1.6544367689701824e-06,
"logits/chosen": 80.96902465820312,
"logits/rejected": 81.0352554321289,
"logps/chosen": -26.926239013671875,
"logps/rejected": -30.266109466552734,
"loss": 20.1404,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.066965751349926,
"rewards/margins": 0.10350307077169418,
"rewards/rejected": -0.03653731197118759,
"step": 250
},
{
"epoch": 0.68,
"grad_norm": 1064.0,
"learning_rate": 1.4445974030621963e-06,
"logits/chosen": 78.25106048583984,
"logits/rejected": 78.37794494628906,
"logps/chosen": -30.432825088500977,
"logps/rejected": -36.55792999267578,
"loss": 15.8988,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.11281673610210419,
"rewards/margins": 0.158446803689003,
"rewards/rejected": -0.0456300750374794,
"step": 260
},
{
"epoch": 0.7,
"grad_norm": 708.0,
"learning_rate": 1.243452991757889e-06,
"logits/chosen": 77.56661224365234,
"logits/rejected": 77.58919525146484,
"logps/chosen": -30.973047256469727,
"logps/rejected": -31.90987777709961,
"loss": 18.1036,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.08058776706457138,
"rewards/margins": 0.1210336685180664,
"rewards/rejected": -0.040445905178785324,
"step": 270
},
{
"epoch": 0.73,
"grad_norm": 1056.0,
"learning_rate": 1.0526606671603523e-06,
"logits/chosen": 80.2997817993164,
"logits/rejected": 80.07062530517578,
"logps/chosen": -31.0832576751709,
"logps/rejected": -29.884586334228516,
"loss": 22.0775,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.07266353815793991,
"rewards/margins": 0.0944225937128067,
"rewards/rejected": -0.021759048104286194,
"step": 280
},
{
"epoch": 0.75,
"grad_norm": 808.0,
"learning_rate": 8.737922755071455e-07,
"logits/chosen": 80.42820739746094,
"logits/rejected": 80.34810638427734,
"logps/chosen": -33.01291275024414,
"logps/rejected": -32.72394561767578,
"loss": 15.1146,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.10913344472646713,
"rewards/margins": 0.17984186112880707,
"rewards/rejected": -0.07070842385292053,
"step": 290
},
{
"epoch": 0.78,
"grad_norm": 1072.0,
"learning_rate": 7.08321427484816e-07,
"logits/chosen": 76.04508209228516,
"logits/rejected": 76.13191986083984,
"logps/chosen": -32.2183952331543,
"logps/rejected": -29.19476890563965,
"loss": 20.0695,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.11873127520084381,
"rewards/margins": 0.12800584733486176,
"rewards/rejected": -0.009274585172533989,
"step": 300
},
{
"epoch": 0.78,
"eval_logits/chosen": 98.68071746826172,
"eval_logits/rejected": 98.6502685546875,
"eval_logps/chosen": -32.66925811767578,
"eval_logps/rejected": -36.280174255371094,
"eval_loss": 26.15445899963379,
"eval_rewards/accuracies": 0.5307309031486511,
"eval_rewards/chosen": -0.022608023136854172,
"eval_rewards/margins": 0.008756463415920734,
"eval_rewards/rejected": -0.03136448562145233,
"eval_runtime": 103.8063,
"eval_samples_per_second": 3.304,
"eval_steps_per_second": 0.414,
"step": 300
},
{
"epoch": 0.81,
"grad_norm": 1136.0,
"learning_rate": 5.576113578589035e-07,
"logits/chosen": 83.16893005371094,
"logits/rejected": 83.19877624511719,
"logps/chosen": -30.013708114624023,
"logps/rejected": -32.592529296875,
"loss": 18.2229,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 0.08498243242502213,
"rewards/margins": 0.12425950914621353,
"rewards/rejected": -0.039277076721191406,
"step": 310
},
{
"epoch": 0.83,
"grad_norm": 596.0,
"learning_rate": 4.229036944380913e-07,
"logits/chosen": 80.65386199951172,
"logits/rejected": 80.6548080444336,
"logps/chosen": -30.399967193603516,
"logps/rejected": -29.175945281982422,
"loss": 16.3256,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.11985927820205688,
"rewards/margins": 0.1455042064189911,
"rewards/rejected": -0.025644922628998756,
"step": 320
},
{
"epoch": 0.86,
"grad_norm": 604.0,
"learning_rate": 3.053082288996112e-07,
"logits/chosen": 77.81417846679688,
"logits/rejected": 77.8701171875,
"logps/chosen": -29.092737197875977,
"logps/rejected": -33.01492691040039,
"loss": 15.6194,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.12598693370819092,
"rewards/margins": 0.16399319469928741,
"rewards/rejected": -0.0380062535405159,
"step": 330
},
{
"epoch": 0.88,
"grad_norm": 1384.0,
"learning_rate": 2.0579377374915805e-07,
"logits/chosen": 82.1661376953125,
"logits/rejected": 82.20028686523438,
"logps/chosen": -32.25991439819336,
"logps/rejected": -33.82966995239258,
"loss": 18.1168,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.09522315859794617,
"rewards/margins": 0.1413211077451706,
"rewards/rejected": -0.04609795659780502,
"step": 340
},
{
"epoch": 0.91,
"grad_norm": 736.0,
"learning_rate": 1.2518018074041684e-07,
"logits/chosen": 81.15172576904297,
"logits/rejected": 81.16615295410156,
"logps/chosen": -32.44929504394531,
"logps/rejected": -33.39020919799805,
"loss": 17.9927,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.11944446712732315,
"rewards/margins": 0.14928530156612396,
"rewards/rejected": -0.029840845614671707,
"step": 350
},
{
"epoch": 0.94,
"grad_norm": 832.0,
"learning_rate": 6.41315865106129e-08,
"logits/chosen": 82.63230895996094,
"logits/rejected": 82.66060638427734,
"logps/chosen": -28.42384910583496,
"logps/rejected": -31.795475006103516,
"loss": 17.9341,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.11283614486455917,
"rewards/margins": 0.12739871442317963,
"rewards/rejected": -0.014562586322426796,
"step": 360
},
{
"epoch": 0.96,
"grad_norm": 900.0,
"learning_rate": 2.3150941078050325e-08,
"logits/chosen": 82.10162353515625,
"logits/rejected": 82.12476348876953,
"logps/chosen": -31.836299896240234,
"logps/rejected": -35.486595153808594,
"loss": 19.9538,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.10398608446121216,
"rewards/margins": 0.14170874655246735,
"rewards/rejected": -0.03772266209125519,
"step": 370
},
{
"epoch": 0.99,
"grad_norm": 888.0,
"learning_rate": 2.575864278703266e-09,
"logits/chosen": 76.01589965820312,
"logits/rejected": 75.89103698730469,
"logps/chosen": -29.72897720336914,
"logps/rejected": -28.481863021850586,
"loss": 18.4584,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.08471504598855972,
"rewards/margins": 0.11684386432170868,
"rewards/rejected": -0.03212881088256836,
"step": 380
},
{
"epoch": 1.0,
"step": 385,
"total_flos": 0.0,
"train_loss": 20.413429577319654,
"train_runtime": 2557.2601,
"train_samples_per_second": 1.204,
"train_steps_per_second": 0.151
}
],
"logging_steps": 10,
"max_steps": 385,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}