ap-normistral-7b-align-scan / trainer_state.json
hugodk-sch's picture
Model save
c6d1958 verified
raw
history blame
No virus
21.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 385,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 13.0625,
"learning_rate": 1.282051282051282e-07,
"logits/chosen": 88.18099975585938,
"logits/rejected": 88.25153350830078,
"logps/chosen": -29.073104858398438,
"logps/rejected": -26.25731658935547,
"loss": 0.5,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"grad_norm": 5.375,
"learning_rate": 1.282051282051282e-06,
"logits/chosen": 81.08451080322266,
"logits/rejected": 80.78488159179688,
"logps/chosen": -34.28126525878906,
"logps/rejected": -33.13351058959961,
"loss": 0.492,
"rewards/accuracies": 0.4861111044883728,
"rewards/chosen": -0.015815330669283867,
"rewards/margins": 0.05422591418027878,
"rewards/rejected": -0.0700412467122078,
"step": 10
},
{
"epoch": 0.05,
"grad_norm": 9.6875,
"learning_rate": 2.564102564102564e-06,
"logits/chosen": 80.63957214355469,
"logits/rejected": 80.53150939941406,
"logps/chosen": -33.59695816040039,
"logps/rejected": -30.76776123046875,
"loss": 0.4944,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.03718096390366554,
"rewards/margins": 0.03328876942396164,
"rewards/rejected": 0.003892195178195834,
"step": 20
},
{
"epoch": 0.08,
"grad_norm": 10.5,
"learning_rate": 3.846153846153847e-06,
"logits/chosen": 82.47281646728516,
"logits/rejected": 82.50285339355469,
"logps/chosen": -33.86055374145508,
"logps/rejected": -31.097183227539062,
"loss": 0.5134,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": 0.06090690940618515,
"rewards/margins": -0.0697535052895546,
"rewards/rejected": 0.13066044449806213,
"step": 30
},
{
"epoch": 0.1,
"grad_norm": 10.875,
"learning_rate": 4.999896948438434e-06,
"logits/chosen": 81.05327606201172,
"logits/rejected": 81.04902648925781,
"logps/chosen": -32.787689208984375,
"logps/rejected": -33.05846405029297,
"loss": 0.4913,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.15454962849617004,
"rewards/margins": 0.030526524409651756,
"rewards/rejected": 0.12402307987213135,
"step": 40
},
{
"epoch": 0.13,
"grad_norm": 8.125,
"learning_rate": 4.987541037542187e-06,
"logits/chosen": 78.79315185546875,
"logits/rejected": 78.80322265625,
"logps/chosen": -30.44384765625,
"logps/rejected": -30.80826759338379,
"loss": 0.4599,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.30455905199050903,
"rewards/margins": 0.19855086505413055,
"rewards/rejected": 0.10600819438695908,
"step": 50
},
{
"epoch": 0.16,
"grad_norm": 9.625,
"learning_rate": 4.954691471941119e-06,
"logits/chosen": 83.44609069824219,
"logits/rejected": 83.50300598144531,
"logps/chosen": -30.81852149963379,
"logps/rejected": -29.231618881225586,
"loss": 0.4994,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": 0.1426558792591095,
"rewards/margins": -0.0009051367524079978,
"rewards/rejected": 0.1435610055923462,
"step": 60
},
{
"epoch": 0.18,
"grad_norm": 7.875,
"learning_rate": 4.901618883413549e-06,
"logits/chosen": 84.02252960205078,
"logits/rejected": 84.05018615722656,
"logps/chosen": -30.281408309936523,
"logps/rejected": -32.76702880859375,
"loss": 0.4981,
"rewards/accuracies": 0.4375,
"rewards/chosen": 0.13904651999473572,
"rewards/margins": 0.03077618218958378,
"rewards/rejected": 0.10827036201953888,
"step": 70
},
{
"epoch": 0.21,
"grad_norm": 9.5,
"learning_rate": 4.828760511501322e-06,
"logits/chosen": 81.84428405761719,
"logits/rejected": 81.82522583007812,
"logps/chosen": -31.032154083251953,
"logps/rejected": -30.712255477905273,
"loss": 0.4648,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.18911631405353546,
"rewards/margins": 0.1709602177143097,
"rewards/rejected": 0.018156107515096664,
"step": 80
},
{
"epoch": 0.23,
"grad_norm": 12.375,
"learning_rate": 4.7367166013034295e-06,
"logits/chosen": 78.68492126464844,
"logits/rejected": 78.66017150878906,
"logps/chosen": -32.12371063232422,
"logps/rejected": -30.837940216064453,
"loss": 0.4704,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.20578794181346893,
"rewards/margins": 0.16037426888942719,
"rewards/rejected": 0.04541371017694473,
"step": 90
},
{
"epoch": 0.26,
"grad_norm": 9.9375,
"learning_rate": 4.626245458345211e-06,
"logits/chosen": 83.77175903320312,
"logits/rejected": 83.79080963134766,
"logps/chosen": -33.829063415527344,
"logps/rejected": -31.544397354125977,
"loss": 0.4749,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.1903691589832306,
"rewards/margins": 0.10790882259607315,
"rewards/rejected": 0.08246034383773804,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": 98.75000762939453,
"eval_logits/rejected": 98.7376708984375,
"eval_logps/chosen": -32.41049575805664,
"eval_logps/rejected": -35.85907745361328,
"eval_loss": 0.504712700843811,
"eval_rewards/accuracies": 0.4746677577495575,
"eval_rewards/chosen": 0.013071590103209019,
"eval_rewards/margins": -0.02990747056901455,
"eval_rewards/rejected": 0.042979057878255844,
"eval_runtime": 104.3402,
"eval_samples_per_second": 3.287,
"eval_steps_per_second": 0.412,
"step": 100
},
{
"epoch": 0.29,
"grad_norm": 11.875,
"learning_rate": 4.498257201263691e-06,
"logits/chosen": 84.02198028564453,
"logits/rejected": 83.90216064453125,
"logps/chosen": -32.067047119140625,
"logps/rejected": -32.60837936401367,
"loss": 0.4251,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.382302463054657,
"rewards/margins": 0.36329856514930725,
"rewards/rejected": 0.01900387369096279,
"step": 110
},
{
"epoch": 0.31,
"grad_norm": 10.9375,
"learning_rate": 4.353806263777678e-06,
"logits/chosen": 84.06869506835938,
"logits/rejected": 84.17839050292969,
"logps/chosen": -28.029781341552734,
"logps/rejected": -35.15131378173828,
"loss": 0.4499,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.35991987586021423,
"rewards/margins": 0.23678629100322723,
"rewards/rejected": 0.123133584856987,
"step": 120
},
{
"epoch": 0.34,
"grad_norm": 9.125,
"learning_rate": 4.1940827077152755e-06,
"logits/chosen": 81.26686096191406,
"logits/rejected": 81.29707336425781,
"logps/chosen": -30.0821533203125,
"logps/rejected": -31.722143173217773,
"loss": 0.4515,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.3300797641277313,
"rewards/margins": 0.24628591537475586,
"rewards/rejected": 0.08379384875297546,
"step": 130
},
{
"epoch": 0.36,
"grad_norm": 7.09375,
"learning_rate": 4.0204024186666215e-06,
"logits/chosen": 82.26895904541016,
"logits/rejected": 82.28730773925781,
"logps/chosen": -26.675317764282227,
"logps/rejected": -32.535396575927734,
"loss": 0.4328,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.30880242586135864,
"rewards/margins": 0.35360854864120483,
"rewards/rejected": -0.044806141406297684,
"step": 140
},
{
"epoch": 0.39,
"grad_norm": 8.75,
"learning_rate": 3.834196265035119e-06,
"logits/chosen": 80.73310089111328,
"logits/rejected": 80.70652770996094,
"logps/chosen": -28.468103408813477,
"logps/rejected": -32.77440643310547,
"loss": 0.4125,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.36840274930000305,
"rewards/margins": 0.45156532526016235,
"rewards/rejected": -0.08316260576248169,
"step": 150
},
{
"epoch": 0.42,
"grad_norm": 8.5,
"learning_rate": 3.636998309800573e-06,
"logits/chosen": 82.41865539550781,
"logits/rejected": 82.44725799560547,
"logps/chosen": -33.112274169921875,
"logps/rejected": -30.1284122467041,
"loss": 0.4055,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.40979090332984924,
"rewards/margins": 0.4801415503025055,
"rewards/rejected": -0.07035063952207565,
"step": 160
},
{
"epoch": 0.44,
"grad_norm": 7.5625,
"learning_rate": 3.4304331721118078e-06,
"logits/chosen": 83.09947204589844,
"logits/rejected": 83.05732727050781,
"logps/chosen": -30.413660049438477,
"logps/rejected": -32.43395233154297,
"loss": 0.4144,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.33825185894966125,
"rewards/margins": 0.5038093328475952,
"rewards/rejected": -0.16555748879909515,
"step": 170
},
{
"epoch": 0.47,
"grad_norm": 6.46875,
"learning_rate": 3.2162026428305436e-06,
"logits/chosen": 80.59300231933594,
"logits/rejected": 80.56774139404297,
"logps/chosen": -30.52907371520996,
"logps/rejected": -31.434350967407227,
"loss": 0.433,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.2672317624092102,
"rewards/margins": 0.3780880868434906,
"rewards/rejected": -0.1108563169836998,
"step": 180
},
{
"epoch": 0.49,
"grad_norm": 5.40625,
"learning_rate": 2.996071664294641e-06,
"logits/chosen": 82.23339080810547,
"logits/rejected": 82.1915283203125,
"logps/chosen": -29.8317813873291,
"logps/rejected": -30.190067291259766,
"loss": 0.4509,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.37385472655296326,
"rewards/margins": 0.26267164945602417,
"rewards/rejected": 0.1111830323934555,
"step": 190
},
{
"epoch": 0.52,
"grad_norm": 6.625,
"learning_rate": 2.7718537898066833e-06,
"logits/chosen": 77.5392074584961,
"logits/rejected": 77.48648834228516,
"logps/chosen": -33.0440788269043,
"logps/rejected": -32.247684478759766,
"loss": 0.3843,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.6834636926651001,
"rewards/margins": 0.5851765871047974,
"rewards/rejected": 0.09828709810972214,
"step": 200
},
{
"epoch": 0.52,
"eval_logits/chosen": 98.38356018066406,
"eval_logits/rejected": 98.37836456298828,
"eval_logps/chosen": -32.3836784362793,
"eval_logps/rejected": -35.98166275024414,
"eval_loss": 0.4944417476654053,
"eval_rewards/accuracies": 0.5307309031486511,
"eval_rewards/chosen": 0.023798126727342606,
"eval_rewards/margins": 0.029852891340851784,
"eval_rewards/rejected": -0.00605476601049304,
"eval_runtime": 104.0926,
"eval_samples_per_second": 3.295,
"eval_steps_per_second": 0.413,
"step": 200
},
{
"epoch": 0.55,
"grad_norm": 12.3125,
"learning_rate": 2.5453962426402006e-06,
"logits/chosen": 80.09881591796875,
"logits/rejected": 80.01982879638672,
"logps/chosen": -32.544151306152344,
"logps/rejected": -35.001224517822266,
"loss": 0.3882,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.5606266260147095,
"rewards/margins": 0.5666781663894653,
"rewards/rejected": -0.006051521748304367,
"step": 210
},
{
"epoch": 0.57,
"grad_norm": 8.1875,
"learning_rate": 2.3185646976551794e-06,
"logits/chosen": 82.21588134765625,
"logits/rejected": 82.28758239746094,
"logps/chosen": -30.38724136352539,
"logps/rejected": -30.862102508544922,
"loss": 0.3707,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.6246866583824158,
"rewards/margins": 0.6779459714889526,
"rewards/rejected": -0.05325937271118164,
"step": 220
},
{
"epoch": 0.6,
"grad_norm": 8.5,
"learning_rate": 2.0932279108998323e-06,
"logits/chosen": 79.33607482910156,
"logits/rejected": 79.39179992675781,
"logps/chosen": -31.715646743774414,
"logps/rejected": -33.92882537841797,
"loss": 0.4251,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.4360387325286865,
"rewards/margins": 0.40270787477493286,
"rewards/rejected": 0.03333085775375366,
"step": 230
},
{
"epoch": 0.62,
"grad_norm": 10.625,
"learning_rate": 1.8712423238279358e-06,
"logits/chosen": 81.82881164550781,
"logits/rejected": 82.10590362548828,
"logps/chosen": -29.962848663330078,
"logps/rejected": -31.462871551513672,
"loss": 0.3667,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 0.6900869607925415,
"rewards/margins": 0.6624192595481873,
"rewards/rejected": 0.027667587623000145,
"step": 240
},
{
"epoch": 0.65,
"grad_norm": 11.25,
"learning_rate": 1.6544367689701824e-06,
"logits/chosen": 80.50505065917969,
"logits/rejected": 80.56990051269531,
"logps/chosen": -26.388418197631836,
"logps/rejected": -29.717565536499023,
"loss": 0.4245,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.48299089074134827,
"rewards/margins": 0.40972191095352173,
"rewards/rejected": 0.07326899468898773,
"step": 250
},
{
"epoch": 0.68,
"grad_norm": 8.625,
"learning_rate": 1.4445974030621963e-06,
"logits/chosen": 77.76419830322266,
"logits/rejected": 77.94038391113281,
"logps/chosen": -29.758447647094727,
"logps/rejected": -36.150657653808594,
"loss": 0.3604,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.7210196256637573,
"rewards/margins": 0.7406320571899414,
"rewards/rejected": -0.0196123905479908,
"step": 260
},
{
"epoch": 0.7,
"grad_norm": 6.1875,
"learning_rate": 1.243452991757889e-06,
"logits/chosen": 77.1490707397461,
"logits/rejected": 77.18915557861328,
"logps/chosen": -30.186153411865234,
"logps/rejected": -31.373126983642578,
"loss": 0.3871,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.6371081471443176,
"rewards/margins": 0.5841903686523438,
"rewards/rejected": 0.05291769653558731,
"step": 270
},
{
"epoch": 0.73,
"grad_norm": 12.5,
"learning_rate": 1.0526606671603523e-06,
"logits/chosen": 79.78591918945312,
"logits/rejected": 79.5605239868164,
"logps/chosen": -30.330188751220703,
"logps/rejected": -29.223669052124023,
"loss": 0.4182,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.5918818712234497,
"rewards/margins": 0.41455134749412537,
"rewards/rejected": 0.17733044922351837,
"step": 280
},
{
"epoch": 0.75,
"grad_norm": 8.4375,
"learning_rate": 8.737922755071455e-07,
"logits/chosen": 80.03771209716797,
"logits/rejected": 79.94876861572266,
"logps/chosen": -32.170814514160156,
"logps/rejected": -32.12440872192383,
"loss": 0.3449,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 0.7733729481697083,
"rewards/margins": 0.8163889646530151,
"rewards/rejected": -0.04301605746150017,
"step": 290
},
{
"epoch": 0.78,
"grad_norm": 5.5,
"learning_rate": 7.08321427484816e-07,
"logits/chosen": 75.5302505493164,
"logits/rejected": 75.59835052490234,
"logps/chosen": -31.593181610107422,
"logps/rejected": -29.076126098632812,
"loss": 0.363,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.7250088453292847,
"rewards/margins": 0.714650571346283,
"rewards/rejected": 0.010358264669775963,
"step": 300
},
{
"epoch": 0.78,
"eval_logits/chosen": 98.36553192138672,
"eval_logits/rejected": 98.37035369873047,
"eval_logps/chosen": -32.229251861572266,
"eval_logps/rejected": -35.81660079956055,
"eval_loss": 0.49616533517837524,
"eval_rewards/accuracies": 0.5103820562362671,
"eval_rewards/chosen": 0.0855708047747612,
"eval_rewards/margins": 0.025601176545023918,
"eval_rewards/rejected": 0.05996962636709213,
"eval_runtime": 104.1169,
"eval_samples_per_second": 3.294,
"eval_steps_per_second": 0.413,
"step": 300
},
{
"epoch": 0.81,
"grad_norm": 8.0625,
"learning_rate": 5.576113578589035e-07,
"logits/chosen": 82.7135238647461,
"logits/rejected": 82.74400329589844,
"logps/chosen": -29.19759178161621,
"logps/rejected": -32.07235336303711,
"loss": 0.3811,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.666375994682312,
"rewards/margins": 0.6154125332832336,
"rewards/rejected": 0.05096355825662613,
"step": 310
},
{
"epoch": 0.83,
"grad_norm": 8.5,
"learning_rate": 4.229036944380913e-07,
"logits/chosen": 80.10868835449219,
"logits/rejected": 80.10973358154297,
"logps/chosen": -29.693145751953125,
"logps/rejected": -28.737625122070312,
"loss": 0.3658,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.7621666193008423,
"rewards/margins": 0.6894195675849915,
"rewards/rejected": 0.07274699211120605,
"step": 320
},
{
"epoch": 0.86,
"grad_norm": 7.8125,
"learning_rate": 3.053082288996112e-07,
"logits/chosen": 77.36323547363281,
"logits/rejected": 77.391357421875,
"logps/chosen": -28.293853759765625,
"logps/rejected": -32.6038703918457,
"loss": 0.3393,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.8235009908676147,
"rewards/margins": 0.8111019134521484,
"rewards/rejected": 0.012399068102240562,
"step": 330
},
{
"epoch": 0.88,
"grad_norm": 10.625,
"learning_rate": 2.0579377374915805e-07,
"logits/chosen": 81.62095642089844,
"logits/rejected": 81.64241027832031,
"logps/chosen": -31.521636962890625,
"logps/rejected": -33.3758659362793,
"loss": 0.377,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.6762043833732605,
"rewards/margins": 0.6790729761123657,
"rewards/rejected": -0.002868417650461197,
"step": 340
},
{
"epoch": 0.91,
"grad_norm": 7.71875,
"learning_rate": 1.2518018074041684e-07,
"logits/chosen": 80.68013763427734,
"logits/rejected": 80.69649505615234,
"logps/chosen": -31.641170501708984,
"logps/rejected": -32.8623161315918,
"loss": 0.3712,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.8010267019271851,
"rewards/margins": 0.7092341780662537,
"rewards/rejected": 0.0917925089597702,
"step": 350
},
{
"epoch": 0.94,
"grad_norm": 6.25,
"learning_rate": 6.41315865106129e-08,
"logits/chosen": 82.13801574707031,
"logits/rejected": 82.19556427001953,
"logps/chosen": -27.77614402770996,
"logps/rejected": -31.526113510131836,
"loss": 0.369,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.7104269862174988,
"rewards/margins": 0.6609331965446472,
"rewards/rejected": 0.04949387162923813,
"step": 360
},
{
"epoch": 0.96,
"grad_norm": 7.5,
"learning_rate": 2.3150941078050325e-08,
"logits/chosen": 81.63580322265625,
"logits/rejected": 81.66941833496094,
"logps/chosen": -31.2968692779541,
"logps/rejected": -34.87516403198242,
"loss": 0.4021,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.6317180395126343,
"rewards/margins": 0.5380347967147827,
"rewards/rejected": 0.09368324279785156,
"step": 370
},
{
"epoch": 0.99,
"grad_norm": 8.625,
"learning_rate": 2.575864278703266e-09,
"logits/chosen": 75.4756088256836,
"logits/rejected": 75.35409545898438,
"logps/chosen": -29.19217300415039,
"logps/rejected": -27.851299285888672,
"loss": 0.4126,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.5535811185836792,
"rewards/margins": 0.4298717975616455,
"rewards/rejected": 0.1237092986702919,
"step": 380
},
{
"epoch": 1.0,
"step": 385,
"total_flos": 0.0,
"train_loss": 0.4199758805237807,
"train_runtime": 2559.5376,
"train_samples_per_second": 1.203,
"train_steps_per_second": 0.15
}
],
"logging_steps": 10,
"max_steps": 385,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}