ap-normistral-7b-align-scan / trainer_state.json
hugodk-sch's picture
Model save
6b5d590 verified
raw
history blame
No virus
22.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 385,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 13.0625,
"learning_rate": 1.282051282051282e-07,
"logits/chosen": 88.18099975585938,
"logits/rejected": 88.25153350830078,
"logps/chosen": -29.073104858398438,
"logps/rejected": -26.25731658935547,
"loss": 1.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"grad_norm": 10.875,
"learning_rate": 1.282051282051282e-06,
"logits/chosen": 81.074951171875,
"logits/rejected": 80.7809829711914,
"logps/chosen": -34.20733642578125,
"logps/rejected": -32.97297668457031,
"loss": 0.9951,
"rewards/accuracies": 0.4583333432674408,
"rewards/chosen": 0.003438829444348812,
"rewards/margins": 0.004895869642496109,
"rewards/rejected": -0.0014570390339940786,
"step": 10
},
{
"epoch": 0.05,
"grad_norm": 12.6875,
"learning_rate": 2.564102564102564e-06,
"logits/chosen": 80.64326477050781,
"logits/rejected": 80.53416442871094,
"logps/chosen": -33.720584869384766,
"logps/rejected": -30.82167625427246,
"loss": 0.9986,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.003067571669816971,
"rewards/margins": 0.0013512909645214677,
"rewards/rejected": -0.004418861120939255,
"step": 20
},
{
"epoch": 0.08,
"grad_norm": 12.0,
"learning_rate": 3.846153846153847e-06,
"logits/chosen": 82.51445007324219,
"logits/rejected": 82.54810333251953,
"logps/chosen": -33.81728744506836,
"logps/rejected": -31.204355239868164,
"loss": 1.0024,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": 0.019553204998373985,
"rewards/margins": -0.002394508570432663,
"rewards/rejected": 0.0219477117061615,
"step": 30
},
{
"epoch": 0.1,
"grad_norm": 11.8125,
"learning_rate": 4.999896948438434e-06,
"logits/chosen": 81.101806640625,
"logits/rejected": 81.09938049316406,
"logps/chosen": -32.73223876953125,
"logps/rejected": -33.143699645996094,
"loss": 0.9783,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.04418279603123665,
"rewards/margins": 0.02170029655098915,
"rewards/rejected": 0.022482499480247498,
"step": 40
},
{
"epoch": 0.13,
"grad_norm": 11.9375,
"learning_rate": 4.987541037542187e-06,
"logits/chosen": 78.82136535644531,
"logits/rejected": 78.82911682128906,
"logps/chosen": -30.3783016204834,
"logps/rejected": -30.641677856445312,
"loss": 0.9605,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.082694411277771,
"rewards/margins": 0.039533428847789764,
"rewards/rejected": 0.04316098242998123,
"step": 50
},
{
"epoch": 0.16,
"grad_norm": 9.75,
"learning_rate": 4.954691471941119e-06,
"logits/chosen": 83.49494171142578,
"logits/rejected": 83.55232238769531,
"logps/chosen": -30.781469345092773,
"logps/rejected": -29.190662384033203,
"loss": 1.0006,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": 0.03936903923749924,
"rewards/margins": -0.0006168211111798882,
"rewards/rejected": 0.039985861629247665,
"step": 60
},
{
"epoch": 0.18,
"grad_norm": 12.0625,
"learning_rate": 4.901618883413549e-06,
"logits/chosen": 84.1231918334961,
"logits/rejected": 84.15650939941406,
"logps/chosen": -30.209863662719727,
"logps/rejected": -32.619781494140625,
"loss": 0.9999,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": 0.041915904730558395,
"rewards/margins": 0.000123101839562878,
"rewards/rejected": 0.04179280251264572,
"step": 70
},
{
"epoch": 0.21,
"grad_norm": 12.375,
"learning_rate": 4.828760511501322e-06,
"logits/chosen": 81.8515853881836,
"logits/rejected": 81.83155822753906,
"logps/chosen": -30.982410430908203,
"logps/rejected": -30.617040634155273,
"loss": 0.9618,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.05225307494401932,
"rewards/margins": 0.03819245845079422,
"rewards/rejected": 0.014060619287192822,
"step": 80
},
{
"epoch": 0.23,
"grad_norm": 14.75,
"learning_rate": 4.7367166013034295e-06,
"logits/chosen": 78.68502807617188,
"logits/rejected": 78.65934753417969,
"logps/chosen": -32.18014144897461,
"logps/rejected": -30.878421783447266,
"loss": 0.9627,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.04580371826887131,
"rewards/margins": 0.03849860280752182,
"rewards/rejected": 0.007305114530026913,
"step": 90
},
{
"epoch": 0.26,
"grad_norm": 12.375,
"learning_rate": 4.626245458345211e-06,
"logits/chosen": 83.6978530883789,
"logits/rejected": 83.72080993652344,
"logps/chosen": -33.769813537597656,
"logps/rejected": -31.638240814208984,
"loss": 0.9577,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.05351760983467102,
"rewards/margins": 0.04228735715150833,
"rewards/rejected": 0.01123025082051754,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": 98.71279907226562,
"eval_logits/rejected": 98.70589447021484,
"eval_logps/chosen": -32.32750701904297,
"eval_logps/rejected": -35.914642333984375,
"eval_loss": 0.993482768535614,
"eval_rewards/accuracies": 0.5066444873809814,
"eval_rewards/chosen": 0.011567190289497375,
"eval_rewards/margins": 0.006378817837685347,
"eval_rewards/rejected": 0.005188372451812029,
"eval_runtime": 104.0772,
"eval_samples_per_second": 3.296,
"eval_steps_per_second": 0.413,
"step": 100
},
{
"epoch": 0.29,
"grad_norm": 14.125,
"learning_rate": 4.498257201263691e-06,
"logits/chosen": 83.8444595336914,
"logits/rejected": 83.72880554199219,
"logps/chosen": -32.138458251953125,
"logps/rejected": -32.649452209472656,
"loss": 0.9122,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.08843465149402618,
"rewards/margins": 0.0877910926938057,
"rewards/rejected": 0.0006435603136196733,
"step": 110
},
{
"epoch": 0.31,
"grad_norm": 13.0,
"learning_rate": 4.353806263777678e-06,
"logits/chosen": 83.81868743896484,
"logits/rejected": 83.92826080322266,
"logps/chosen": -28.111730575561523,
"logps/rejected": -35.34394454956055,
"loss": 0.9297,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.08178504556417465,
"rewards/margins": 0.07026515156030655,
"rewards/rejected": 0.011519892141222954,
"step": 120
},
{
"epoch": 0.34,
"grad_norm": 9.375,
"learning_rate": 4.1940827077152755e-06,
"logits/chosen": 80.93681335449219,
"logits/rejected": 80.97130584716797,
"logps/chosen": -30.195592880249023,
"logps/rejected": -31.842870712280273,
"loss": 0.939,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.07117608934640884,
"rewards/margins": 0.06230046600103378,
"rewards/rejected": 0.008875617757439613,
"step": 130
},
{
"epoch": 0.36,
"grad_norm": 11.0625,
"learning_rate": 4.0204024186666215e-06,
"logits/chosen": 81.7886962890625,
"logits/rejected": 81.80296325683594,
"logps/chosen": -26.81143569946289,
"logps/rejected": -32.920143127441406,
"loss": 0.8867,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.0635889396071434,
"rewards/margins": 0.11326569318771362,
"rewards/rejected": -0.04967674985527992,
"step": 140
},
{
"epoch": 0.39,
"grad_norm": 12.1875,
"learning_rate": 3.834196265035119e-06,
"logits/chosen": 80.10134887695312,
"logits/rejected": 80.06209564208984,
"logps/chosen": -28.821029663085938,
"logps/rejected": -33.226470947265625,
"loss": 0.8772,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.05680803209543228,
"rewards/margins": 0.1228049248456955,
"rewards/rejected": -0.06599690765142441,
"step": 150
},
{
"epoch": 0.42,
"grad_norm": 13.4375,
"learning_rate": 3.636998309800573e-06,
"logits/chosen": 81.63700866699219,
"logits/rejected": 81.66020202636719,
"logps/chosen": -34.080284118652344,
"logps/rejected": -30.817296981811523,
"loss": 0.9079,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.005646559409797192,
"rewards/margins": 0.09212217479944229,
"rewards/rejected": -0.08647561073303223,
"step": 160
},
{
"epoch": 0.44,
"grad_norm": 15.125,
"learning_rate": 3.4304331721118078e-06,
"logits/chosen": 82.27827453613281,
"logits/rejected": 82.23299407958984,
"logps/chosen": -31.016094207763672,
"logps/rejected": -33.03407669067383,
"loss": 0.8743,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.024319300428032875,
"rewards/margins": 0.12572081387043,
"rewards/rejected": -0.10140150785446167,
"step": 170
},
{
"epoch": 0.47,
"grad_norm": 12.5,
"learning_rate": 3.2162026428305436e-06,
"logits/chosen": 79.44108581542969,
"logits/rejected": 79.4173355102539,
"logps/chosen": -30.948467254638672,
"logps/rejected": -32.03376007080078,
"loss": 0.8875,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.02486853487789631,
"rewards/margins": 0.1125236377120018,
"rewards/rejected": -0.08765510469675064,
"step": 180
},
{
"epoch": 0.49,
"grad_norm": 9.6875,
"learning_rate": 2.996071664294641e-06,
"logits/chosen": 80.90037536621094,
"logits/rejected": 80.8733901977539,
"logps/chosen": -30.740875244140625,
"logps/rejected": -31.226177215576172,
"loss": 0.9221,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.002553999889642,
"rewards/margins": 0.07836906611919403,
"rewards/rejected": -0.07581506669521332,
"step": 190
},
{
"epoch": 0.52,
"grad_norm": 14.4375,
"learning_rate": 2.7718537898066833e-06,
"logits/chosen": 76.02571105957031,
"logits/rejected": 75.9735107421875,
"logps/chosen": -34.12778854370117,
"logps/rejected": -33.30614471435547,
"loss": 0.8562,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.0624953992664814,
"rewards/margins": 0.14376921951770782,
"rewards/rejected": -0.08127383887767792,
"step": 200
},
{
"epoch": 0.52,
"eval_logits/chosen": 97.97191619873047,
"eval_logits/rejected": 97.95458984375,
"eval_logps/chosen": -32.94685363769531,
"eval_logps/rejected": -36.8216552734375,
"eval_loss": 0.9646754860877991,
"eval_rewards/accuracies": 0.5676910281181335,
"eval_rewards/chosen": -0.05036771669983864,
"eval_rewards/margins": 0.035145342350006104,
"eval_rewards/rejected": -0.08551305532455444,
"eval_runtime": 103.8205,
"eval_samples_per_second": 3.304,
"eval_steps_per_second": 0.414,
"step": 200
},
{
"epoch": 0.55,
"grad_norm": 18.625,
"learning_rate": 2.5453962426402006e-06,
"logits/chosen": 78.56858825683594,
"logits/rejected": 78.47276306152344,
"logps/chosen": -33.586204528808594,
"logps/rejected": -36.05295944213867,
"loss": 0.8633,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.0359516479074955,
"rewards/margins": 0.1426382064819336,
"rewards/rejected": -0.1066865548491478,
"step": 210
},
{
"epoch": 0.57,
"grad_norm": 14.6875,
"learning_rate": 2.3185646976551794e-06,
"logits/chosen": 80.53726959228516,
"logits/rejected": 80.62950134277344,
"logps/chosen": -31.454355239868164,
"logps/rejected": -32.02970504760742,
"loss": 0.8286,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.049460187554359436,
"rewards/margins": 0.17953529953956604,
"rewards/rejected": -0.1300750970840454,
"step": 220
},
{
"epoch": 0.6,
"grad_norm": 14.0,
"learning_rate": 2.0932279108998323e-06,
"logits/chosen": 77.57215881347656,
"logits/rejected": 77.61759948730469,
"logps/chosen": -32.74131393432617,
"logps/rejected": -35.29151153564453,
"loss": 0.8713,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.006443141493946314,
"rewards/margins": 0.13437876105308533,
"rewards/rejected": -0.12793561816215515,
"step": 230
},
{
"epoch": 0.62,
"grad_norm": 16.5,
"learning_rate": 1.8712423238279358e-06,
"logits/chosen": 79.76509094238281,
"logits/rejected": 80.07305908203125,
"logps/chosen": -31.252161026000977,
"logps/rejected": -32.75459671020508,
"loss": 0.8345,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.04359050467610359,
"rewards/margins": 0.16584597527980804,
"rewards/rejected": -0.12225550413131714,
"step": 240
},
{
"epoch": 0.65,
"grad_norm": 15.0625,
"learning_rate": 1.6544367689701824e-06,
"logits/chosen": 78.32225036621094,
"logits/rejected": 78.37224578857422,
"logps/chosen": -27.815731048583984,
"logps/rejected": -31.077539443969727,
"loss": 0.9043,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.02198370359838009,
"rewards/margins": 0.09569612890481949,
"rewards/rejected": -0.11767983436584473,
"step": 250
},
{
"epoch": 0.68,
"grad_norm": 13.75,
"learning_rate": 1.4445974030621963e-06,
"logits/chosen": 75.31268310546875,
"logits/rejected": 75.46823120117188,
"logps/chosen": -31.16106605529785,
"logps/rejected": -38.22980880737305,
"loss": 0.7589,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.039992958307266235,
"rewards/margins": 0.2528113126754761,
"rewards/rejected": -0.21281830966472626,
"step": 260
},
{
"epoch": 0.7,
"grad_norm": 13.1875,
"learning_rate": 1.243452991757889e-06,
"logits/chosen": 74.42684173583984,
"logits/rejected": 74.45647430419922,
"logps/chosen": -31.796234130859375,
"logps/rejected": -33.113197326660156,
"loss": 0.8432,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.0017309554386883974,
"rewards/margins": 0.15904627740383148,
"rewards/rejected": -0.1607772409915924,
"step": 270
},
{
"epoch": 0.73,
"grad_norm": 19.75,
"learning_rate": 1.0526606671603523e-06,
"logits/chosen": 77.391845703125,
"logits/rejected": 77.1694564819336,
"logps/chosen": -32.110328674316406,
"logps/rejected": -30.959293365478516,
"loss": 0.9109,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.030043601989746094,
"rewards/margins": 0.09918614476919174,
"rewards/rejected": -0.12922975420951843,
"step": 280
},
{
"epoch": 0.75,
"grad_norm": 13.1875,
"learning_rate": 8.737922755071455e-07,
"logits/chosen": 77.39460754394531,
"logits/rejected": 77.31553649902344,
"logps/chosen": -33.84746551513672,
"logps/rejected": -34.28327178955078,
"loss": 0.7586,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.02567802369594574,
"rewards/margins": 0.2523185908794403,
"rewards/rejected": -0.22664058208465576,
"step": 290
},
{
"epoch": 0.78,
"grad_norm": 14.8125,
"learning_rate": 7.08321427484816e-07,
"logits/chosen": 73.05286407470703,
"logits/rejected": 73.1805648803711,
"logps/chosen": -33.01818084716797,
"logps/rejected": -30.520904541015625,
"loss": 0.8271,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.03875284641981125,
"rewards/margins": 0.18064062297344208,
"rewards/rejected": -0.14188775420188904,
"step": 300
},
{
"epoch": 0.78,
"eval_logits/chosen": 97.71524047851562,
"eval_logits/rejected": 97.68975067138672,
"eval_logps/chosen": -33.206886291503906,
"eval_logps/rejected": -37.196205139160156,
"eval_loss": 0.9533767700195312,
"eval_rewards/accuracies": 0.5714285373687744,
"eval_rewards/chosen": -0.07637124508619308,
"eval_rewards/margins": 0.04659651592373848,
"eval_rewards/rejected": -0.12296776473522186,
"eval_runtime": 104.0124,
"eval_samples_per_second": 3.298,
"eval_steps_per_second": 0.413,
"step": 300
},
{
"epoch": 0.81,
"grad_norm": 13.0,
"learning_rate": 5.576113578589035e-07,
"logits/chosen": 80.51168060302734,
"logits/rejected": 80.51959228515625,
"logps/chosen": -30.940738677978516,
"logps/rejected": -33.976158142089844,
"loss": 0.841,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.00772014120593667,
"rewards/margins": 0.1699191778898239,
"rewards/rejected": -0.17763930559158325,
"step": 310
},
{
"epoch": 0.83,
"grad_norm": 14.9375,
"learning_rate": 4.229036944380913e-07,
"logits/chosen": 77.665771484375,
"logits/rejected": 77.6807861328125,
"logps/chosen": -31.073196411132812,
"logps/rejected": -30.321044921875,
"loss": 0.8202,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.05253634601831436,
"rewards/margins": 0.19269177317619324,
"rewards/rejected": -0.14015543460845947,
"step": 320
},
{
"epoch": 0.86,
"grad_norm": 16.375,
"learning_rate": 3.053082288996112e-07,
"logits/chosen": 74.7007064819336,
"logits/rejected": 74.74031829833984,
"logps/chosen": -29.66641616821289,
"logps/rejected": -34.42815399169922,
"loss": 0.7549,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.06861907988786697,
"rewards/margins": 0.24794825911521912,
"rewards/rejected": -0.17932915687561035,
"step": 330
},
{
"epoch": 0.88,
"grad_norm": 17.125,
"learning_rate": 2.0579377374915805e-07,
"logits/chosen": 79.32682800292969,
"logits/rejected": 79.36729431152344,
"logps/chosen": -33.05027770996094,
"logps/rejected": -35.377281188964844,
"loss": 0.7972,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.01618681475520134,
"rewards/margins": 0.21704569458961487,
"rewards/rejected": -0.20085887610912323,
"step": 340
},
{
"epoch": 0.91,
"grad_norm": 15.5,
"learning_rate": 1.2518018074041684e-07,
"logits/chosen": 78.2139663696289,
"logits/rejected": 78.22695922851562,
"logps/chosen": -33.175575256347656,
"logps/rejected": -34.91777801513672,
"loss": 0.7788,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.04681625962257385,
"rewards/margins": 0.22941403090953827,
"rewards/rejected": -0.18259775638580322,
"step": 350
},
{
"epoch": 0.94,
"grad_norm": 11.6875,
"learning_rate": 6.41315865106129e-08,
"logits/chosen": 79.90480041503906,
"logits/rejected": 79.93946075439453,
"logps/chosen": -28.85941505432129,
"logps/rejected": -33.032447814941406,
"loss": 0.8014,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.06927979737520218,
"rewards/margins": 0.20753948390483856,
"rewards/rejected": -0.13825969398021698,
"step": 360
},
{
"epoch": 0.96,
"grad_norm": 12.8125,
"learning_rate": 2.3150941078050325e-08,
"logits/chosen": 79.3287582397461,
"logits/rejected": 79.33840942382812,
"logps/chosen": -33.099693298339844,
"logps/rejected": -37.02233123779297,
"loss": 0.8425,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.022353025153279305,
"rewards/margins": 0.16894304752349854,
"rewards/rejected": -0.1912960708141327,
"step": 370
},
{
"epoch": 0.99,
"grad_norm": 13.25,
"learning_rate": 2.575864278703266e-09,
"logits/chosen": 72.94795227050781,
"logits/rejected": 72.81517791748047,
"logps/chosen": -30.674768447875977,
"logps/rejected": -29.731273651123047,
"loss": 0.8564,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.009864235296845436,
"rewards/margins": 0.14720600843429565,
"rewards/rejected": -0.15707024931907654,
"step": 380
},
{
"epoch": 1.0,
"step": 385,
"total_flos": 0.0,
"train_loss": 0.88636748450143,
"train_runtime": 2554.6221,
"train_samples_per_second": 1.205,
"train_steps_per_second": 0.151
}
],
"logging_steps": 10,
"max_steps": 385,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}