aftonposten-6b-align-scan / trainer_state.json
hugodk-sch's picture
Model save
afc2ba1 verified
raw
history blame
21.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 385,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.282051282051282e-07,
"logits/chosen": -1.7278180122375488,
"logits/rejected": -1.7377450466156006,
"logps/chosen": -29.553977966308594,
"logps/rejected": -42.813133239746094,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"learning_rate": 1.282051282051282e-06,
"logits/chosen": -1.8664319515228271,
"logits/rejected": -1.8707623481750488,
"logps/chosen": -36.98527526855469,
"logps/rejected": -33.654090881347656,
"loss": 0.6829,
"rewards/accuracies": 0.5416666865348816,
"rewards/chosen": 0.010662304237484932,
"rewards/margins": 0.02267039567232132,
"rewards/rejected": -0.012008090503513813,
"step": 10
},
{
"epoch": 0.05,
"learning_rate": 2.564102564102564e-06,
"logits/chosen": -1.9980642795562744,
"logits/rejected": -2.0007288455963135,
"logps/chosen": -29.634414672851562,
"logps/rejected": -29.0543270111084,
"loss": 0.6943,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.0038894296158105135,
"rewards/margins": -0.000667938613332808,
"rewards/rejected": 0.004557368345558643,
"step": 20
},
{
"epoch": 0.08,
"learning_rate": 3.846153846153847e-06,
"logits/chosen": -1.9210376739501953,
"logits/rejected": -1.9183601140975952,
"logps/chosen": -31.391239166259766,
"logps/rejected": -33.24319076538086,
"loss": 0.6828,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.012426799163222313,
"rewards/margins": 0.024792592972517014,
"rewards/rejected": -0.0123657938092947,
"step": 30
},
{
"epoch": 0.1,
"learning_rate": 4.999896948438434e-06,
"logits/chosen": -2.017646312713623,
"logits/rejected": -2.0089142322540283,
"logps/chosen": -32.557518005371094,
"logps/rejected": -32.51502227783203,
"loss": 0.6894,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": 0.009644975885748863,
"rewards/margins": 0.010818523354828358,
"rewards/rejected": -0.0011735468870028853,
"step": 40
},
{
"epoch": 0.13,
"learning_rate": 4.987541037542187e-06,
"logits/chosen": -1.8620023727416992,
"logits/rejected": -1.8512481451034546,
"logps/chosen": -33.577735900878906,
"logps/rejected": -35.46040344238281,
"loss": 0.6982,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.010127579793334007,
"rewards/margins": -0.0060965316370129585,
"rewards/rejected": -0.004031048621982336,
"step": 50
},
{
"epoch": 0.16,
"learning_rate": 4.954691471941119e-06,
"logits/chosen": -1.9402154684066772,
"logits/rejected": -1.9421701431274414,
"logps/chosen": -32.552555084228516,
"logps/rejected": -33.22978973388672,
"loss": 0.6723,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.023814614862203598,
"rewards/margins": 0.053018856793642044,
"rewards/rejected": -0.029204240068793297,
"step": 60
},
{
"epoch": 0.18,
"learning_rate": 4.901618883413549e-06,
"logits/chosen": -2.071889877319336,
"logits/rejected": -2.0768685340881348,
"logps/chosen": -33.997718811035156,
"logps/rejected": -36.63623809814453,
"loss": 0.6836,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.004616844467818737,
"rewards/margins": 0.02906452678143978,
"rewards/rejected": -0.033681370317935944,
"step": 70
},
{
"epoch": 0.21,
"learning_rate": 4.828760511501322e-06,
"logits/chosen": -1.9326432943344116,
"logits/rejected": -1.935786247253418,
"logps/chosen": -34.30440902709961,
"logps/rejected": -34.659637451171875,
"loss": 0.6521,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.06169893592596054,
"rewards/margins": 0.09531383961439133,
"rewards/rejected": -0.03361489623785019,
"step": 80
},
{
"epoch": 0.23,
"learning_rate": 4.7367166013034295e-06,
"logits/chosen": -1.9409675598144531,
"logits/rejected": -1.9454774856567383,
"logps/chosen": -32.3830680847168,
"logps/rejected": -32.33238983154297,
"loss": 0.6838,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.042176127433776855,
"rewards/margins": 0.028917592018842697,
"rewards/rejected": 0.013258534483611584,
"step": 90
},
{
"epoch": 0.26,
"learning_rate": 4.626245458345211e-06,
"logits/chosen": -2.038121461868286,
"logits/rejected": -2.036132574081421,
"logps/chosen": -32.12568664550781,
"logps/rejected": -31.2890567779541,
"loss": 0.6634,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.05757413059473038,
"rewards/margins": 0.06899620592594147,
"rewards/rejected": -0.011422084644436836,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": -2.232415199279785,
"eval_logits/rejected": -2.2275755405426025,
"eval_logps/chosen": -34.029048919677734,
"eval_logps/rejected": -37.52485275268555,
"eval_loss": 0.693107545375824,
"eval_rewards/accuracies": 0.5215947031974792,
"eval_rewards/chosen": 0.0027513643726706505,
"eval_rewards/margins": 0.006868092343211174,
"eval_rewards/rejected": -0.0041167279705405235,
"eval_runtime": 145.7484,
"eval_samples_per_second": 2.353,
"eval_steps_per_second": 0.295,
"step": 100
},
{
"epoch": 0.29,
"learning_rate": 4.498257201263691e-06,
"logits/chosen": -1.9926633834838867,
"logits/rejected": -1.9902803897857666,
"logps/chosen": -33.11687088012695,
"logps/rejected": -34.01213836669922,
"loss": 0.6814,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.06347335875034332,
"rewards/margins": 0.054856397211551666,
"rewards/rejected": 0.008616959676146507,
"step": 110
},
{
"epoch": 0.31,
"learning_rate": 4.353806263777678e-06,
"logits/chosen": -2.0042788982391357,
"logits/rejected": -1.9959495067596436,
"logps/chosen": -32.306739807128906,
"logps/rejected": -32.13039779663086,
"loss": 0.6734,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.06908417493104935,
"rewards/margins": 0.05353052541613579,
"rewards/rejected": 0.015553650446236134,
"step": 120
},
{
"epoch": 0.34,
"learning_rate": 4.1940827077152755e-06,
"logits/chosen": -2.0326714515686035,
"logits/rejected": -2.0247092247009277,
"logps/chosen": -30.308746337890625,
"logps/rejected": -32.05224609375,
"loss": 0.6637,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.08348459005355835,
"rewards/margins": 0.08310474455356598,
"rewards/rejected": 0.00037985146627761424,
"step": 130
},
{
"epoch": 0.36,
"learning_rate": 4.0204024186666215e-06,
"logits/chosen": -1.9627164602279663,
"logits/rejected": -1.9729163646697998,
"logps/chosen": -31.189788818359375,
"logps/rejected": -32.54594421386719,
"loss": 0.6424,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.1149359717965126,
"rewards/margins": 0.12046756595373154,
"rewards/rejected": -0.005531603004783392,
"step": 140
},
{
"epoch": 0.39,
"learning_rate": 3.834196265035119e-06,
"logits/chosen": -1.8740726709365845,
"logits/rejected": -1.875239372253418,
"logps/chosen": -33.88011932373047,
"logps/rejected": -34.779319763183594,
"loss": 0.6271,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.15628577768802643,
"rewards/margins": 0.1673184335231781,
"rewards/rejected": -0.011032682843506336,
"step": 150
},
{
"epoch": 0.42,
"learning_rate": 3.636998309800573e-06,
"logits/chosen": -1.9257261753082275,
"logits/rejected": -1.922323226928711,
"logps/chosen": -35.9793586730957,
"logps/rejected": -32.714969635009766,
"loss": 0.6539,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.09615939855575562,
"rewards/margins": 0.09226818382740021,
"rewards/rejected": 0.003891219152137637,
"step": 160
},
{
"epoch": 0.44,
"learning_rate": 3.4304331721118078e-06,
"logits/chosen": -2.027190923690796,
"logits/rejected": -2.019850254058838,
"logps/chosen": -33.4937629699707,
"logps/rejected": -31.404333114624023,
"loss": 0.6193,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.15204860270023346,
"rewards/margins": 0.17829009890556335,
"rewards/rejected": -0.026241496205329895,
"step": 170
},
{
"epoch": 0.47,
"learning_rate": 3.2162026428305436e-06,
"logits/chosen": -2.033841609954834,
"logits/rejected": -2.039079427719116,
"logps/chosen": -32.22673797607422,
"logps/rejected": -32.453857421875,
"loss": 0.632,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.1590258777141571,
"rewards/margins": 0.1433834582567215,
"rewards/rejected": 0.015642408281564713,
"step": 180
},
{
"epoch": 0.49,
"learning_rate": 2.996071664294641e-06,
"logits/chosen": -2.03458833694458,
"logits/rejected": -2.031813144683838,
"logps/chosen": -31.249963760375977,
"logps/rejected": -31.329097747802734,
"loss": 0.6439,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.11949291080236435,
"rewards/margins": 0.12493407726287842,
"rewards/rejected": -0.005441152956336737,
"step": 190
},
{
"epoch": 0.52,
"learning_rate": 2.7718537898066833e-06,
"logits/chosen": -1.9052807092666626,
"logits/rejected": -1.9099184274673462,
"logps/chosen": -31.314193725585938,
"logps/rejected": -32.81206512451172,
"loss": 0.6329,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.14604374766349792,
"rewards/margins": 0.15660937130451202,
"rewards/rejected": -0.010565629228949547,
"step": 200
},
{
"epoch": 0.52,
"eval_logits/chosen": -2.230377674102783,
"eval_logits/rejected": -2.225529193878174,
"eval_logps/chosen": -34.05998992919922,
"eval_logps/rejected": -37.57161331176758,
"eval_loss": 0.6905081868171692,
"eval_rewards/accuracies": 0.5274086594581604,
"eval_rewards/chosen": -0.012718739919364452,
"eval_rewards/margins": 0.014778696931898594,
"eval_rewards/rejected": -0.027497438713908195,
"eval_runtime": 145.701,
"eval_samples_per_second": 2.354,
"eval_steps_per_second": 0.295,
"step": 200
},
{
"epoch": 0.55,
"learning_rate": 2.5453962426402006e-06,
"logits/chosen": -2.017439603805542,
"logits/rejected": -2.0280823707580566,
"logps/chosen": -31.72454261779785,
"logps/rejected": -33.935951232910156,
"loss": 0.6269,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.12337962538003922,
"rewards/margins": 0.16000542044639587,
"rewards/rejected": -0.036625780165195465,
"step": 210
},
{
"epoch": 0.57,
"learning_rate": 2.3185646976551794e-06,
"logits/chosen": -1.909328818321228,
"logits/rejected": -1.924088716506958,
"logps/chosen": -29.841415405273438,
"logps/rejected": -31.60904884338379,
"loss": 0.6274,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.13506175577640533,
"rewards/margins": 0.16049805283546448,
"rewards/rejected": -0.02543630823493004,
"step": 220
},
{
"epoch": 0.6,
"learning_rate": 2.0932279108998323e-06,
"logits/chosen": -1.966265320777893,
"logits/rejected": -1.970245361328125,
"logps/chosen": -33.091209411621094,
"logps/rejected": -31.639759063720703,
"loss": 0.6126,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.16312837600708008,
"rewards/margins": 0.20835788547992706,
"rewards/rejected": -0.045229505747556686,
"step": 230
},
{
"epoch": 0.62,
"learning_rate": 1.8712423238279358e-06,
"logits/chosen": -1.9648067951202393,
"logits/rejected": -1.9429614543914795,
"logps/chosen": -33.82001495361328,
"logps/rejected": -35.11749267578125,
"loss": 0.5941,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.16686691343784332,
"rewards/margins": 0.25021862983703613,
"rewards/rejected": -0.08335171639919281,
"step": 240
},
{
"epoch": 0.65,
"learning_rate": 1.6544367689701824e-06,
"logits/chosen": -2.005873441696167,
"logits/rejected": -2.002545118331909,
"logps/chosen": -32.70961380004883,
"logps/rejected": -36.252098083496094,
"loss": 0.6448,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.10772605985403061,
"rewards/margins": 0.12131496518850327,
"rewards/rejected": -0.013588905334472656,
"step": 250
},
{
"epoch": 0.68,
"learning_rate": 1.4445974030621963e-06,
"logits/chosen": -1.8737099170684814,
"logits/rejected": -1.8713098764419556,
"logps/chosen": -33.96501922607422,
"logps/rejected": -35.54829025268555,
"loss": 0.6377,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.11169042438268661,
"rewards/margins": 0.13352498412132263,
"rewards/rejected": -0.02183455601334572,
"step": 260
},
{
"epoch": 0.7,
"learning_rate": 1.243452991757889e-06,
"logits/chosen": -1.858642578125,
"logits/rejected": -1.8562240600585938,
"logps/chosen": -34.18030548095703,
"logps/rejected": -31.82675552368164,
"loss": 0.6384,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.1086462140083313,
"rewards/margins": 0.1363571435213089,
"rewards/rejected": -0.027710938826203346,
"step": 270
},
{
"epoch": 0.73,
"learning_rate": 1.0526606671603523e-06,
"logits/chosen": -1.9618957042694092,
"logits/rejected": -1.9513881206512451,
"logps/chosen": -35.000816345214844,
"logps/rejected": -31.879558563232422,
"loss": 0.6107,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.17791253328323364,
"rewards/margins": 0.1971966028213501,
"rewards/rejected": -0.019284065812826157,
"step": 280
},
{
"epoch": 0.75,
"learning_rate": 8.737922755071455e-07,
"logits/chosen": -2.0570178031921387,
"logits/rejected": -2.04209566116333,
"logps/chosen": -30.695226669311523,
"logps/rejected": -32.64103317260742,
"loss": 0.654,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.11444780975580215,
"rewards/margins": 0.10904743522405624,
"rewards/rejected": 0.005400371737778187,
"step": 290
},
{
"epoch": 0.78,
"learning_rate": 7.08321427484816e-07,
"logits/chosen": -1.9285688400268555,
"logits/rejected": -1.9260343313217163,
"logps/chosen": -32.38969039916992,
"logps/rejected": -30.898773193359375,
"loss": 0.5742,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.2687075138092041,
"rewards/margins": 0.3096885085105896,
"rewards/rejected": -0.04098101332783699,
"step": 300
},
{
"epoch": 0.78,
"eval_logits/chosen": -2.2279160022735596,
"eval_logits/rejected": -2.223081350326538,
"eval_logps/chosen": -34.089088439941406,
"eval_logps/rejected": -37.59783935546875,
"eval_loss": 0.6919631958007812,
"eval_rewards/accuracies": 0.5278239250183105,
"eval_rewards/chosen": -0.027267219498753548,
"eval_rewards/margins": 0.013343668542802334,
"eval_rewards/rejected": -0.040610890835523605,
"eval_runtime": 145.7459,
"eval_samples_per_second": 2.353,
"eval_steps_per_second": 0.295,
"step": 300
},
{
"epoch": 0.81,
"learning_rate": 5.576113578589035e-07,
"logits/chosen": -1.9126602411270142,
"logits/rejected": -1.9093825817108154,
"logps/chosen": -31.319168090820312,
"logps/rejected": -33.805519104003906,
"loss": 0.624,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.1420917958021164,
"rewards/margins": 0.1737762689590454,
"rewards/rejected": -0.03168448060750961,
"step": 310
},
{
"epoch": 0.83,
"learning_rate": 4.229036944380913e-07,
"logits/chosen": -1.9627044200897217,
"logits/rejected": -1.9504749774932861,
"logps/chosen": -34.31007385253906,
"logps/rejected": -33.66672134399414,
"loss": 0.6084,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.1430002748966217,
"rewards/margins": 0.20694026350975037,
"rewards/rejected": -0.06393997371196747,
"step": 320
},
{
"epoch": 0.86,
"learning_rate": 3.053082288996112e-07,
"logits/chosen": -1.9980976581573486,
"logits/rejected": -1.9966537952423096,
"logps/chosen": -33.16533660888672,
"logps/rejected": -32.55678939819336,
"loss": 0.6136,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.1602279245853424,
"rewards/margins": 0.19883206486701965,
"rewards/rejected": -0.03860412910580635,
"step": 330
},
{
"epoch": 0.88,
"learning_rate": 2.0579377374915805e-07,
"logits/chosen": -2.0848796367645264,
"logits/rejected": -2.069186210632324,
"logps/chosen": -33.787841796875,
"logps/rejected": -33.07987976074219,
"loss": 0.6229,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.19577431678771973,
"rewards/margins": 0.17669571936130524,
"rewards/rejected": 0.01907859742641449,
"step": 340
},
{
"epoch": 0.91,
"learning_rate": 1.2518018074041684e-07,
"logits/chosen": -1.957275390625,
"logits/rejected": -1.9564218521118164,
"logps/chosen": -32.81622314453125,
"logps/rejected": -32.52650833129883,
"loss": 0.5923,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.23293733596801758,
"rewards/margins": 0.2636169195175171,
"rewards/rejected": -0.030679568648338318,
"step": 350
},
{
"epoch": 0.94,
"learning_rate": 6.41315865106129e-08,
"logits/chosen": -1.9124408960342407,
"logits/rejected": -1.92275071144104,
"logps/chosen": -31.859888076782227,
"logps/rejected": -35.33869934082031,
"loss": 0.6119,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.17240020632743835,
"rewards/margins": 0.19736871123313904,
"rewards/rejected": -0.024968529120087624,
"step": 360
},
{
"epoch": 0.96,
"learning_rate": 2.3150941078050325e-08,
"logits/chosen": -2.0522782802581787,
"logits/rejected": -2.045797824859619,
"logps/chosen": -33.34915542602539,
"logps/rejected": -29.27215576171875,
"loss": 0.6194,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.15140748023986816,
"rewards/margins": 0.1742721050977707,
"rewards/rejected": -0.022864630445837975,
"step": 370
},
{
"epoch": 0.99,
"learning_rate": 2.575864278703266e-09,
"logits/chosen": -1.9120715856552124,
"logits/rejected": -1.9142844676971436,
"logps/chosen": -33.86906051635742,
"logps/rejected": -30.961559295654297,
"loss": 0.5996,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.1849948763847351,
"rewards/margins": 0.22946183383464813,
"rewards/rejected": -0.04446694999933243,
"step": 380
},
{
"epoch": 1.0,
"step": 385,
"total_flos": 0.0,
"train_loss": 0.64145151051608,
"train_runtime": 3249.8987,
"train_samples_per_second": 0.947,
"train_steps_per_second": 0.118
}
],
"logging_steps": 10,
"max_steps": 385,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}