ap-normistral-7b-align-scan / trainer_state.json
hugodk-sch's picture
Model save
40d6fc5 verified
raw
history blame
No virus
21.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 385,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 52.25,
"learning_rate": 1.282051282051282e-07,
"logits/chosen": 88.18099975585938,
"logits/rejected": 88.25153350830078,
"logps/chosen": -29.073104858398438,
"logps/rejected": -26.25731658935547,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"grad_norm": 40.75,
"learning_rate": 1.282051282051282e-06,
"logits/chosen": 81.06388092041016,
"logits/rejected": 80.76617431640625,
"logps/chosen": -34.315635681152344,
"logps/rejected": -33.083072662353516,
"loss": 0.7277,
"rewards/accuracies": 0.4305555522441864,
"rewards/chosen": -0.05912955477833748,
"rewards/margins": 0.04060414060950279,
"rewards/rejected": -0.09973368048667908,
"step": 10
},
{
"epoch": 0.05,
"grad_norm": 36.25,
"learning_rate": 2.564102564102564e-06,
"logits/chosen": 80.6710433959961,
"logits/rejected": 80.55816650390625,
"logps/chosen": -33.6015625,
"logps/rejected": -30.793548583984375,
"loss": 0.724,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": 0.07067807018756866,
"rewards/margins": 0.08352533727884293,
"rewards/rejected": -0.012847280129790306,
"step": 20
},
{
"epoch": 0.08,
"grad_norm": 48.0,
"learning_rate": 3.846153846153847e-06,
"logits/chosen": 82.53105163574219,
"logits/rejected": 82.56202697753906,
"logps/chosen": -33.846534729003906,
"logps/rejected": -31.217113494873047,
"loss": 0.823,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": 0.13303251564502716,
"rewards/margins": -0.03234311193227768,
"rewards/rejected": 0.16537563502788544,
"step": 30
},
{
"epoch": 0.1,
"grad_norm": 49.0,
"learning_rate": 4.999896948438434e-06,
"logits/chosen": 81.03451538085938,
"logits/rejected": 81.03263092041016,
"logps/chosen": -32.87987518310547,
"logps/rejected": -33.08030700683594,
"loss": 0.7898,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.2353484332561493,
"rewards/margins": 0.004774211905896664,
"rewards/rejected": 0.23057425022125244,
"step": 40
},
{
"epoch": 0.13,
"grad_norm": 37.0,
"learning_rate": 4.987541037542187e-06,
"logits/chosen": 78.62713623046875,
"logits/rejected": 78.64068603515625,
"logps/chosen": -30.693645477294922,
"logps/rejected": -30.871618270874023,
"loss": 0.7362,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.40928253531455994,
"rewards/margins": 0.24794897437095642,
"rewards/rejected": 0.1613335758447647,
"step": 50
},
{
"epoch": 0.16,
"grad_norm": 35.75,
"learning_rate": 4.954691471941119e-06,
"logits/chosen": 83.1414566040039,
"logits/rejected": 83.20195007324219,
"logps/chosen": -30.973648071289062,
"logps/rejected": -29.62088394165039,
"loss": 0.7214,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.16120928525924683,
"rewards/margins": 0.18549844622612,
"rewards/rejected": -0.02428916096687317,
"step": 60
},
{
"epoch": 0.18,
"grad_norm": 72.5,
"learning_rate": 4.901618883413549e-06,
"logits/chosen": 83.78003692626953,
"logits/rejected": 83.8111343383789,
"logps/chosen": -30.537906646728516,
"logps/rejected": -33.126197814941406,
"loss": 0.7291,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.07289580255746841,
"rewards/margins": 0.14368507266044617,
"rewards/rejected": -0.07078926265239716,
"step": 70
},
{
"epoch": 0.21,
"grad_norm": 46.25,
"learning_rate": 4.828760511501322e-06,
"logits/chosen": 81.44255065917969,
"logits/rejected": 81.430908203125,
"logps/chosen": -31.439010620117188,
"logps/rejected": -30.995372772216797,
"loss": 0.7039,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": 0.0527455098927021,
"rewards/margins": 0.24292418360710144,
"rewards/rejected": -0.19017870724201202,
"step": 80
},
{
"epoch": 0.23,
"grad_norm": 61.75,
"learning_rate": 4.7367166013034295e-06,
"logits/chosen": 78.15830993652344,
"logits/rejected": 78.127197265625,
"logps/chosen": -32.702423095703125,
"logps/rejected": -31.252826690673828,
"loss": 0.7773,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.0513942614197731,
"rewards/margins": 0.18969133496284485,
"rewards/rejected": -0.24108560383319855,
"step": 90
},
{
"epoch": 0.26,
"grad_norm": 39.0,
"learning_rate": 4.626245458345211e-06,
"logits/chosen": 83.44832611083984,
"logits/rejected": 83.47621154785156,
"logps/chosen": -34.1397819519043,
"logps/rejected": -31.86690330505371,
"loss": 0.7017,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.1321648806333542,
"rewards/margins": 0.225250244140625,
"rewards/rejected": -0.09308534860610962,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": 98.7764892578125,
"eval_logits/rejected": 98.76370239257812,
"eval_logps/chosen": -32.49420166015625,
"eval_logps/rejected": -36.03456497192383,
"eval_loss": 0.8030149340629578,
"eval_rewards/accuracies": 0.5137043595314026,
"eval_rewards/chosen": -0.04081834852695465,
"eval_rewards/margins": 0.013612199574708939,
"eval_rewards/rejected": -0.05443055182695389,
"eval_runtime": 104.1164,
"eval_samples_per_second": 3.294,
"eval_steps_per_second": 0.413,
"step": 100
},
{
"epoch": 0.29,
"grad_norm": 48.25,
"learning_rate": 4.498257201263691e-06,
"logits/chosen": 83.67167663574219,
"logits/rejected": 83.56147766113281,
"logps/chosen": -32.48063659667969,
"logps/rejected": -32.763511657714844,
"loss": 0.6258,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.43373650312423706,
"rewards/margins": 0.5198318362236023,
"rewards/rejected": -0.08609537780284882,
"step": 110
},
{
"epoch": 0.31,
"grad_norm": 53.75,
"learning_rate": 4.353806263777678e-06,
"logits/chosen": 83.80461120605469,
"logits/rejected": 83.91490173339844,
"logps/chosen": -28.273183822631836,
"logps/rejected": -35.56282043457031,
"loss": 0.5439,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.5251210927963257,
"rewards/margins": 0.608059287071228,
"rewards/rejected": -0.08293820172548294,
"step": 120
},
{
"epoch": 0.34,
"grad_norm": 28.75,
"learning_rate": 4.1940827077152755e-06,
"logits/chosen": 81.06788635253906,
"logits/rejected": 81.09136962890625,
"logps/chosen": -30.509033203125,
"logps/rejected": -32.07707595825195,
"loss": 0.6608,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.3186565339565277,
"rewards/margins": 0.4350167214870453,
"rewards/rejected": -0.11636020243167877,
"step": 130
},
{
"epoch": 0.36,
"grad_norm": 24.875,
"learning_rate": 4.0204024186666215e-06,
"logits/chosen": 82.33757019042969,
"logits/rejected": 82.34171295166016,
"logps/chosen": -26.971057891845703,
"logps/rejected": -32.84387969970703,
"loss": 0.5554,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.38101473450660706,
"rewards/margins": 0.7174161076545715,
"rewards/rejected": -0.33640143275260925,
"step": 140
},
{
"epoch": 0.39,
"grad_norm": 30.125,
"learning_rate": 3.834196265035119e-06,
"logits/chosen": 80.85295104980469,
"logits/rejected": 80.82408142089844,
"logps/chosen": -28.91988754272461,
"logps/rejected": -33.04113006591797,
"loss": 0.5578,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.375379741191864,
"rewards/margins": 0.7550806403160095,
"rewards/rejected": -0.37970098853111267,
"step": 150
},
{
"epoch": 0.42,
"grad_norm": 60.5,
"learning_rate": 3.636998309800573e-06,
"logits/chosen": 82.69292449951172,
"logits/rejected": 82.70225524902344,
"logps/chosen": -33.54145050048828,
"logps/rejected": -30.415197372436523,
"loss": 0.6193,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.4762394428253174,
"rewards/margins": 0.8463689684867859,
"rewards/rejected": -0.37012940645217896,
"step": 160
},
{
"epoch": 0.44,
"grad_norm": 30.375,
"learning_rate": 3.4304331721118078e-06,
"logits/chosen": 83.42604064941406,
"logits/rejected": 83.37831115722656,
"logps/chosen": -30.8799991607666,
"logps/rejected": -32.62963104248047,
"loss": 0.5673,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.3034316599369049,
"rewards/margins": 0.7910871505737305,
"rewards/rejected": -0.4876554012298584,
"step": 170
},
{
"epoch": 0.47,
"grad_norm": 32.0,
"learning_rate": 3.2162026428305436e-06,
"logits/chosen": 80.95600891113281,
"logits/rejected": 80.93622589111328,
"logps/chosen": -30.429906845092773,
"logps/rejected": -31.591838836669922,
"loss": 0.4992,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.6137940287590027,
"rewards/margins": 0.9614995718002319,
"rewards/rejected": -0.34770551323890686,
"step": 180
},
{
"epoch": 0.49,
"grad_norm": 17.25,
"learning_rate": 2.996071664294641e-06,
"logits/chosen": 82.7037124633789,
"logits/rejected": 82.69169616699219,
"logps/chosen": -30.465667724609375,
"logps/rejected": -30.747249603271484,
"loss": 0.7493,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.24060054123401642,
"rewards/margins": 0.463981568813324,
"rewards/rejected": -0.22338099777698517,
"step": 190
},
{
"epoch": 0.52,
"grad_norm": 21.5,
"learning_rate": 2.7718537898066833e-06,
"logits/chosen": 78.2643814086914,
"logits/rejected": 78.21286010742188,
"logps/chosen": -33.79939270019531,
"logps/rejected": -32.67485046386719,
"loss": 0.6236,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.7626792192459106,
"rewards/margins": 0.9078337550163269,
"rewards/rejected": -0.14515459537506104,
"step": 200
},
{
"epoch": 0.52,
"eval_logits/chosen": 98.72710418701172,
"eval_logits/rejected": 98.70391082763672,
"eval_logps/chosen": -32.6581916809082,
"eval_logps/rejected": -36.33292770385742,
"eval_loss": 0.7719493508338928,
"eval_rewards/accuracies": 0.5215947031974792,
"eval_rewards/chosen": -0.17201441526412964,
"eval_rewards/margins": 0.12110573798418045,
"eval_rewards/rejected": -0.2931201457977295,
"eval_runtime": 103.8817,
"eval_samples_per_second": 3.302,
"eval_steps_per_second": 0.414,
"step": 200
},
{
"epoch": 0.55,
"grad_norm": 78.5,
"learning_rate": 2.5453962426402006e-06,
"logits/chosen": 80.8266830444336,
"logits/rejected": 80.73197174072266,
"logps/chosen": -33.31422805786133,
"logps/rejected": -35.28189468383789,
"loss": 0.6059,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.5051938891410828,
"rewards/margins": 0.7418341636657715,
"rewards/rejected": -0.23664025962352753,
"step": 210
},
{
"epoch": 0.57,
"grad_norm": 21.0,
"learning_rate": 2.3185646976551794e-06,
"logits/chosen": 83.00508117675781,
"logits/rejected": 83.09242248535156,
"logps/chosen": -31.03920555114746,
"logps/rejected": -30.981882095336914,
"loss": 0.5212,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.7277995347976685,
"rewards/margins": 0.930141270160675,
"rewards/rejected": -0.20234176516532898,
"step": 220
},
{
"epoch": 0.6,
"grad_norm": 49.25,
"learning_rate": 2.0932279108998323e-06,
"logits/chosen": 80.0581283569336,
"logits/rejected": 80.11595153808594,
"logps/chosen": -32.08351516723633,
"logps/rejected": -34.33815002441406,
"loss": 0.594,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.5777846574783325,
"rewards/margins": 0.8385850787162781,
"rewards/rejected": -0.26080039143562317,
"step": 230
},
{
"epoch": 0.62,
"grad_norm": 28.0,
"learning_rate": 1.8712423238279358e-06,
"logits/chosen": 82.56427001953125,
"logits/rejected": 82.85057830810547,
"logps/chosen": -30.730464935302734,
"logps/rejected": -31.813983917236328,
"loss": 0.4701,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.7660826444625854,
"rewards/margins": 0.9916342496871948,
"rewards/rejected": -0.22555160522460938,
"step": 240
},
{
"epoch": 0.65,
"grad_norm": 50.5,
"learning_rate": 1.6544367689701824e-06,
"logits/chosen": 81.2629623413086,
"logits/rejected": 81.32804870605469,
"logps/chosen": -26.856103897094727,
"logps/rejected": -30.107614517211914,
"loss": 0.6072,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.5918346047401428,
"rewards/margins": 0.7573351263999939,
"rewards/rejected": -0.1655004471540451,
"step": 250
},
{
"epoch": 0.68,
"grad_norm": 36.25,
"learning_rate": 1.4445974030621963e-06,
"logits/chosen": 78.54646301269531,
"logits/rejected": 78.68824768066406,
"logps/chosen": -30.280277252197266,
"logps/rejected": -36.395103454589844,
"loss": 0.4323,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 1.0245726108551025,
"rewards/margins": 1.259353756904602,
"rewards/rejected": -0.23478105664253235,
"step": 260
},
{
"epoch": 0.7,
"grad_norm": 22.625,
"learning_rate": 1.243452991757889e-06,
"logits/chosen": 77.9116439819336,
"logits/rejected": 77.93907165527344,
"logps/chosen": -30.94207763671875,
"logps/rejected": -31.825298309326172,
"loss": 0.5298,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.6694773435592651,
"rewards/margins": 0.9253811836242676,
"rewards/rejected": -0.2559038996696472,
"step": 270
},
{
"epoch": 0.73,
"grad_norm": 48.0,
"learning_rate": 1.0526606671603523e-06,
"logits/chosen": 80.6185531616211,
"logits/rejected": 80.40409851074219,
"logps/chosen": -31.1258487701416,
"logps/rejected": -29.83230972290039,
"loss": 0.6589,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.5472350716590881,
"rewards/margins": 0.6794873476028442,
"rewards/rejected": -0.1322522908449173,
"step": 280
},
{
"epoch": 0.75,
"grad_norm": 17.25,
"learning_rate": 8.737922755071455e-07,
"logits/chosen": 80.76725006103516,
"logits/rejected": 80.67757415771484,
"logps/chosen": -33.04716491699219,
"logps/rejected": -32.507728576660156,
"loss": 0.4727,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.8456665277481079,
"rewards/margins": 1.238355040550232,
"rewards/rejected": -0.39268869161605835,
"step": 290
},
{
"epoch": 0.78,
"grad_norm": 44.75,
"learning_rate": 7.08321427484816e-07,
"logits/chosen": 76.41008758544922,
"logits/rejected": 76.48558044433594,
"logps/chosen": -32.2567253112793,
"logps/rejected": -29.211252212524414,
"loss": 0.5655,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.9191843271255493,
"rewards/margins": 1.0065667629241943,
"rewards/rejected": -0.08738242089748383,
"step": 300
},
{
"epoch": 0.78,
"eval_logits/chosen": 98.77224731445312,
"eval_logits/rejected": 98.75171661376953,
"eval_logps/chosen": -32.550052642822266,
"eval_logps/rejected": -36.224205017089844,
"eval_loss": 0.7744476795196533,
"eval_rewards/accuracies": 0.5427741408348083,
"eval_rewards/chosen": -0.08550228923559189,
"eval_rewards/margins": 0.1206393763422966,
"eval_rewards/rejected": -0.2061416506767273,
"eval_runtime": 104.0332,
"eval_samples_per_second": 3.297,
"eval_steps_per_second": 0.413,
"step": 300
},
{
"epoch": 0.81,
"grad_norm": 31.875,
"learning_rate": 5.576113578589035e-07,
"logits/chosen": 83.43260192871094,
"logits/rejected": 83.46700286865234,
"logps/chosen": -30.02802085876465,
"logps/rejected": -32.44657897949219,
"loss": 0.5905,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.6684112548828125,
"rewards/margins": 0.8658668398857117,
"rewards/rejected": -0.19745555520057678,
"step": 310
},
{
"epoch": 0.83,
"grad_norm": 24.875,
"learning_rate": 4.229036944380913e-07,
"logits/chosen": 80.97013092041016,
"logits/rejected": 80.96881103515625,
"logps/chosen": -30.49709701538086,
"logps/rejected": -29.160675048828125,
"loss": 0.4662,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.8811699748039246,
"rewards/margins": 1.0741162300109863,
"rewards/rejected": -0.19294631481170654,
"step": 320
},
{
"epoch": 0.86,
"grad_norm": 19.625,
"learning_rate": 3.053082288996112e-07,
"logits/chosen": 78.14985656738281,
"logits/rejected": 78.19586181640625,
"logps/chosen": -29.189855575561523,
"logps/rejected": -32.966094970703125,
"loss": 0.4527,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.9302013516426086,
"rewards/margins": 1.195185661315918,
"rewards/rejected": -0.26498422026634216,
"step": 330
},
{
"epoch": 0.88,
"grad_norm": 61.5,
"learning_rate": 2.0579377374915805e-07,
"logits/chosen": 82.46419525146484,
"logits/rejected": 82.48377990722656,
"logps/chosen": -32.1870002746582,
"logps/rejected": -33.69792175292969,
"loss": 0.5537,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.8201173543930054,
"rewards/margins": 1.0834996700286865,
"rewards/rejected": -0.26338234543800354,
"step": 340
},
{
"epoch": 0.91,
"grad_norm": 16.75,
"learning_rate": 1.2518018074041684e-07,
"logits/chosen": 81.44327545166016,
"logits/rejected": 81.4502944946289,
"logps/chosen": -32.606056213378906,
"logps/rejected": -33.298622131347656,
"loss": 0.5676,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.8301448822021484,
"rewards/margins": 0.9956042170524597,
"rewards/rejected": -0.16545933485031128,
"step": 350
},
{
"epoch": 0.94,
"grad_norm": 33.0,
"learning_rate": 6.41315865106129e-08,
"logits/chosen": 82.95188903808594,
"logits/rejected": 82.9841079711914,
"logps/chosen": -28.371601104736328,
"logps/rejected": -31.830394744873047,
"loss": 0.4872,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.9444905519485474,
"rewards/margins": 1.0889289379119873,
"rewards/rejected": -0.14443838596343994,
"step": 360
},
{
"epoch": 0.96,
"grad_norm": 34.25,
"learning_rate": 2.3150941078050325e-08,
"logits/chosen": 82.37725067138672,
"logits/rejected": 82.40049743652344,
"logps/chosen": -31.749374389648438,
"logps/rejected": -35.2952766418457,
"loss": 0.508,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.9014309644699097,
"rewards/margins": 1.0501554012298584,
"rewards/rejected": -0.14872442185878754,
"step": 370
},
{
"epoch": 0.99,
"grad_norm": 38.0,
"learning_rate": 2.575864278703266e-09,
"logits/chosen": 76.2959976196289,
"logits/rejected": 76.17599487304688,
"logps/chosen": -29.714282989501953,
"logps/rejected": -28.307018280029297,
"loss": 0.57,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.6894746422767639,
"rewards/margins": 0.806629478931427,
"rewards/rejected": -0.11715485900640488,
"step": 380
},
{
"epoch": 1.0,
"step": 385,
"total_flos": 0.0,
"train_loss": 0.6050039254225694,
"train_runtime": 2556.17,
"train_samples_per_second": 1.205,
"train_steps_per_second": 0.151
}
],
"logging_steps": 10,
"max_steps": 385,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}