zephyr-7b-sft-full-orpo / trainer_state.json
statking's picture
Model save
77068ec verified
raw
history blame
211 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9984268484530676,
"eval_steps": 100,
"global_step": 2859,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01048767697954903,
"grad_norm": 11.303338968797107,
"learning_rate": 2.0000000000000003e-06,
"log_odds_chosen": 0.16597549617290497,
"log_odds_ratio": -0.6960083246231079,
"logits/chosen": -2.5440375804901123,
"logits/rejected": -2.532742977142334,
"logps/chosen": -0.9999498128890991,
"logps/rejected": -1.0999202728271484,
"loss": 2.7435,
"nll_loss": 2.655998706817627,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.049997489899396896,
"rewards/margins": 0.004998520482331514,
"rewards/rejected": -0.054996006190776825,
"step": 10
},
{
"epoch": 0.02097535395909806,
"grad_norm": 3.296785739531489,
"learning_rate": 4.000000000000001e-06,
"log_odds_chosen": 0.19497092068195343,
"log_odds_ratio": -0.6663684844970703,
"logits/chosen": -3.153244733810425,
"logits/rejected": -3.176297903060913,
"logps/chosen": -0.7618023753166199,
"logps/rejected": -0.8721799850463867,
"loss": 0.5628,
"nll_loss": 0.5223663449287415,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.03809012100100517,
"rewards/margins": 0.005518879741430283,
"rewards/rejected": -0.043609000742435455,
"step": 20
},
{
"epoch": 0.03146303093864709,
"grad_norm": 2.5096714885559264,
"learning_rate": 6e-06,
"log_odds_chosen": 0.23512229323387146,
"log_odds_ratio": -0.6553729772567749,
"logits/chosen": -2.9705119132995605,
"logits/rejected": -2.944556713104248,
"logps/chosen": -0.8099643588066101,
"logps/rejected": -0.9404464960098267,
"loss": 0.5331,
"nll_loss": 0.4915856420993805,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.040498219430446625,
"rewards/margins": 0.0065241060219705105,
"rewards/rejected": -0.04702232405543327,
"step": 30
},
{
"epoch": 0.04195070791819612,
"grad_norm": 2.5670929503530138,
"learning_rate": 8.000000000000001e-06,
"log_odds_chosen": 0.1703537404537201,
"log_odds_ratio": -0.6904168128967285,
"logits/chosen": -2.8517043590545654,
"logits/rejected": -2.83884334564209,
"logps/chosen": -0.805575966835022,
"logps/rejected": -0.9237464666366577,
"loss": 0.5194,
"nll_loss": 0.4799742102622986,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.04027879983186722,
"rewards/margins": 0.005908523220568895,
"rewards/rejected": -0.046187322586774826,
"step": 40
},
{
"epoch": 0.05243838489774515,
"grad_norm": 2.8257696541784587,
"learning_rate": 1e-05,
"log_odds_chosen": 0.28843408823013306,
"log_odds_ratio": -0.6763556599617004,
"logits/chosen": -2.7286221981048584,
"logits/rejected": -2.72869610786438,
"logps/chosen": -0.787534236907959,
"logps/rejected": -0.968492865562439,
"loss": 0.5419,
"nll_loss": 0.48419374227523804,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.03937670961022377,
"rewards/margins": 0.009047931991517544,
"rewards/rejected": -0.04842463880777359,
"step": 50
},
{
"epoch": 0.06292606187729417,
"grad_norm": 2.7270372711002624,
"learning_rate": 1.2e-05,
"log_odds_chosen": 0.2020198553800583,
"log_odds_ratio": -0.6800572872161865,
"logits/chosen": -2.896289110183716,
"logits/rejected": -2.8839545249938965,
"logps/chosen": -0.8010624051094055,
"logps/rejected": -0.9179455637931824,
"loss": 0.5456,
"nll_loss": 0.5158990621566772,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.040053121745586395,
"rewards/margins": 0.005844158586114645,
"rewards/rejected": -0.04589728266000748,
"step": 60
},
{
"epoch": 0.07341373885684321,
"grad_norm": 2.7197204143491605,
"learning_rate": 1.4e-05,
"log_odds_chosen": 0.1937415450811386,
"log_odds_ratio": -0.6942794919013977,
"logits/chosen": -2.8848633766174316,
"logits/rejected": -2.905164957046509,
"logps/chosen": -0.8219146728515625,
"logps/rejected": -0.9291160702705383,
"loss": 0.5412,
"nll_loss": 0.5311218500137329,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.041095733642578125,
"rewards/margins": 0.005360070150345564,
"rewards/rejected": -0.046455807983875275,
"step": 70
},
{
"epoch": 0.08390141583639224,
"grad_norm": 1049.2102246099553,
"learning_rate": 1.6000000000000003e-05,
"log_odds_chosen": 0.1753607988357544,
"log_odds_ratio": -0.6886225938796997,
"logits/chosen": -2.6637063026428223,
"logits/rejected": -2.637396812438965,
"logps/chosen": -0.8933579325675964,
"logps/rejected": -1.020629644393921,
"loss": 1.0694,
"nll_loss": 0.9787748456001282,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.04466789960861206,
"rewards/margins": 0.006363583263009787,
"rewards/rejected": -0.051031481474637985,
"step": 80
},
{
"epoch": 0.09438909281594127,
"grad_norm": 4.011701524085754,
"learning_rate": 1.8e-05,
"log_odds_chosen": 0.2628815174102783,
"log_odds_ratio": -0.6731477975845337,
"logits/chosen": -3.106489658355713,
"logits/rejected": -3.0954391956329346,
"logps/chosen": -0.9435924291610718,
"logps/rejected": -1.1041589975357056,
"loss": 0.5766,
"nll_loss": 0.5112682580947876,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.04717962443828583,
"rewards/margins": 0.008028322830796242,
"rewards/rejected": -0.05520794540643692,
"step": 90
},
{
"epoch": 0.1048767697954903,
"grad_norm": 5.340561330006851,
"learning_rate": 2e-05,
"log_odds_chosen": 0.17503713071346283,
"log_odds_ratio": -0.6751121282577515,
"logits/chosen": -3.3266518115997314,
"logits/rejected": -3.3420982360839844,
"logps/chosen": -0.8886896371841431,
"logps/rejected": -1.0002682209014893,
"loss": 0.5668,
"nll_loss": 0.5238600969314575,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.044434480369091034,
"rewards/margins": 0.005578924436122179,
"rewards/rejected": -0.050013404339551926,
"step": 100
},
{
"epoch": 0.1048767697954903,
"eval_log_odds_chosen": 0.21844430267810822,
"eval_log_odds_ratio": -0.6529861688613892,
"eval_logits/chosen": -3.3082144260406494,
"eval_logits/rejected": -3.3147807121276855,
"eval_logps/chosen": -0.9112777709960938,
"eval_logps/rejected": -1.0580321550369263,
"eval_loss": 0.5842872858047485,
"eval_nll_loss": 0.5515953898429871,
"eval_rewards/accuracies": 0.6150793433189392,
"eval_rewards/chosen": -0.04556388780474663,
"eval_rewards/margins": 0.007337724789977074,
"eval_rewards/rejected": -0.05290161445736885,
"eval_runtime": 138.2645,
"eval_samples_per_second": 14.422,
"eval_steps_per_second": 0.456,
"step": 100
},
{
"epoch": 0.11536444677503933,
"grad_norm": 2.8100337089038514,
"learning_rate": 1.9069251784911845e-05,
"log_odds_chosen": 0.2544933259487152,
"log_odds_ratio": -0.643945038318634,
"logits/chosen": -3.2667174339294434,
"logits/rejected": -3.310918092727661,
"logps/chosen": -0.8447545170783997,
"logps/rejected": -1.009132981300354,
"loss": 0.5651,
"nll_loss": 0.5105677843093872,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.042237721383571625,
"rewards/margins": 0.008218927308917046,
"rewards/rejected": -0.05045665428042412,
"step": 110
},
{
"epoch": 0.12585212375458835,
"grad_norm": 2.2193460343172986,
"learning_rate": 1.825741858350554e-05,
"log_odds_chosen": 0.24397364258766174,
"log_odds_ratio": -0.6682508587837219,
"logits/chosen": -3.193361282348633,
"logits/rejected": -3.243128538131714,
"logps/chosen": -0.8714381456375122,
"logps/rejected": -1.0333614349365234,
"loss": 0.6091,
"nll_loss": 0.5700744390487671,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.04357190802693367,
"rewards/margins": 0.008096165955066681,
"rewards/rejected": -0.051668066531419754,
"step": 120
},
{
"epoch": 0.1363398007341374,
"grad_norm": 2.3414921674264555,
"learning_rate": 1.7541160386140587e-05,
"log_odds_chosen": 0.2272050678730011,
"log_odds_ratio": -0.6708214282989502,
"logits/chosen": -3.1920104026794434,
"logits/rejected": -3.211714267730713,
"logps/chosen": -0.8986352682113647,
"logps/rejected": -1.0474598407745361,
"loss": 0.5886,
"nll_loss": 0.552306056022644,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.044931765645742416,
"rewards/margins": 0.007441227789968252,
"rewards/rejected": -0.052372999489307404,
"step": 130
},
{
"epoch": 0.14682747771368643,
"grad_norm": 2.3255085925590597,
"learning_rate": 1.6903085094570334e-05,
"log_odds_chosen": 0.22232067584991455,
"log_odds_ratio": -0.6680520176887512,
"logits/chosen": -3.1715519428253174,
"logits/rejected": -3.198253631591797,
"logps/chosen": -0.9551104307174683,
"logps/rejected": -1.1022988557815552,
"loss": 0.5878,
"nll_loss": 0.5523446798324585,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.04775552451610565,
"rewards/margins": 0.007359415292739868,
"rewards/rejected": -0.05511493608355522,
"step": 140
},
{
"epoch": 0.15731515469323545,
"grad_norm": 2.6729814886854766,
"learning_rate": 1.6329931618554523e-05,
"log_odds_chosen": 0.17247287929058075,
"log_odds_ratio": -0.7340894341468811,
"logits/chosen": -3.102067470550537,
"logits/rejected": -3.1263070106506348,
"logps/chosen": -0.9946192502975464,
"logps/rejected": -1.1088117361068726,
"loss": 0.5489,
"nll_loss": 0.5492355823516846,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.04973096773028374,
"rewards/margins": 0.005709617864340544,
"rewards/rejected": -0.05544058233499527,
"step": 150
},
{
"epoch": 0.16780283167278448,
"grad_norm": 2.603021066142599,
"learning_rate": 1.5811388300841898e-05,
"log_odds_chosen": 0.2041763812303543,
"log_odds_ratio": -0.6666288375854492,
"logits/chosen": -3.0764000415802,
"logits/rejected": -3.1064279079437256,
"logps/chosen": -0.9137493968009949,
"logps/rejected": -1.0383034944534302,
"loss": 0.6063,
"nll_loss": 0.5569471120834351,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.045687466859817505,
"rewards/margins": 0.006227707955986261,
"rewards/rejected": -0.05191517621278763,
"step": 160
},
{
"epoch": 0.1782905086523335,
"grad_norm": 2.4919552056925416,
"learning_rate": 1.533929977694741e-05,
"log_odds_chosen": 0.25588172674179077,
"log_odds_ratio": -0.6607967019081116,
"logits/chosen": -3.1293396949768066,
"logits/rejected": -3.1606574058532715,
"logps/chosen": -0.8986794352531433,
"logps/rejected": -1.0667051076889038,
"loss": 0.5845,
"nll_loss": 0.5496193766593933,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.044933974742889404,
"rewards/margins": 0.008401280269026756,
"rewards/rejected": -0.05333525687456131,
"step": 170
},
{
"epoch": 0.18877818563188253,
"grad_norm": 2.4600198980545915,
"learning_rate": 1.49071198499986e-05,
"log_odds_chosen": 0.27393144369125366,
"log_odds_ratio": -0.6479635238647461,
"logits/chosen": -3.080091714859009,
"logits/rejected": -3.103672504425049,
"logps/chosen": -0.9190357327461243,
"logps/rejected": -1.0871737003326416,
"loss": 0.5676,
"nll_loss": 0.550677478313446,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.045951783657073975,
"rewards/margins": 0.00840689055621624,
"rewards/rejected": -0.05435867980122566,
"step": 180
},
{
"epoch": 0.19926586261143156,
"grad_norm": 5.689090620434962,
"learning_rate": 1.4509525002200235e-05,
"log_odds_chosen": 0.23676976561546326,
"log_odds_ratio": -0.6501709222793579,
"logits/chosen": -3.0815584659576416,
"logits/rejected": -3.1054322719573975,
"logps/chosen": -0.9278916120529175,
"logps/rejected": -1.0751855373382568,
"loss": 0.5906,
"nll_loss": 0.6120038628578186,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.046394579112529755,
"rewards/margins": 0.007364692632108927,
"rewards/rejected": -0.05375927686691284,
"step": 190
},
{
"epoch": 0.2097535395909806,
"grad_norm": 2.2848535898780375,
"learning_rate": 1.4142135623730951e-05,
"log_odds_chosen": 0.2697228789329529,
"log_odds_ratio": -0.6704415082931519,
"logits/chosen": -2.99995756149292,
"logits/rejected": -3.038682460784912,
"logps/chosen": -0.9138332605361938,
"logps/rejected": -1.1080011129379272,
"loss": 0.5676,
"nll_loss": 0.5736643075942993,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.04569166153669357,
"rewards/margins": 0.009708395227789879,
"rewards/rejected": -0.0554000549018383,
"step": 200
},
{
"epoch": 0.2097535395909806,
"eval_log_odds_chosen": 0.2850116789340973,
"eval_log_odds_ratio": -0.6474155783653259,
"eval_logits/chosen": -2.9992330074310303,
"eval_logits/rejected": -3.0026443004608154,
"eval_logps/chosen": -0.8811094164848328,
"eval_logps/rejected": -1.0644237995147705,
"eval_loss": 0.5726434588432312,
"eval_nll_loss": 0.5359312295913696,
"eval_rewards/accuracies": 0.625,
"eval_rewards/chosen": -0.04405546560883522,
"eval_rewards/margins": 0.00916572567075491,
"eval_rewards/rejected": -0.053221192210912704,
"eval_runtime": 137.9025,
"eval_samples_per_second": 14.459,
"eval_steps_per_second": 0.457,
"step": 200
},
{
"epoch": 0.22024121657052964,
"grad_norm": 2.2864637176453266,
"learning_rate": 1.3801311186847084e-05,
"log_odds_chosen": 0.10374544560909271,
"log_odds_ratio": -0.7170687913894653,
"logits/chosen": -3.0079314708709717,
"logits/rejected": -3.026061773300171,
"logps/chosen": -0.8713214993476868,
"logps/rejected": -0.9376395344734192,
"loss": 0.5683,
"nll_loss": 0.5364366769790649,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.04356607422232628,
"rewards/margins": 0.003315900219604373,
"rewards/rejected": -0.04688197374343872,
"step": 210
},
{
"epoch": 0.23072889355007867,
"grad_norm": 2.3833164568305705,
"learning_rate": 1.3483997249264842e-05,
"log_odds_chosen": 0.1967695653438568,
"log_odds_ratio": -0.6872244477272034,
"logits/chosen": -3.066392183303833,
"logits/rejected": -3.0755832195281982,
"logps/chosen": -0.8734294176101685,
"logps/rejected": -0.9998324513435364,
"loss": 0.5608,
"nll_loss": 0.5176301598548889,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.043671466410160065,
"rewards/margins": 0.006320156157016754,
"rewards/rejected": -0.04999162256717682,
"step": 220
},
{
"epoch": 0.2412165705296277,
"grad_norm": 2.143148051812647,
"learning_rate": 1.3187609467915744e-05,
"log_odds_chosen": 0.2681586444377899,
"log_odds_ratio": -0.669995129108429,
"logits/chosen": -3.0045371055603027,
"logits/rejected": -3.023197889328003,
"logps/chosen": -0.9347988963127136,
"logps/rejected": -1.1079022884368896,
"loss": 0.5715,
"nll_loss": 0.5268279910087585,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.04673994705080986,
"rewards/margins": 0.00865517370402813,
"rewards/rejected": -0.05539512634277344,
"step": 230
},
{
"epoch": 0.2517042475091767,
"grad_norm": 2.4867634050680865,
"learning_rate": 1.2909944487358057e-05,
"log_odds_chosen": 0.2310989797115326,
"log_odds_ratio": -0.6607853770256042,
"logits/chosen": -3.0592639446258545,
"logits/rejected": -3.0972437858581543,
"logps/chosen": -0.90626060962677,
"logps/rejected": -1.057490587234497,
"loss": 0.5797,
"nll_loss": 0.5543950796127319,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.0453130342066288,
"rewards/margins": 0.007561509497463703,
"rewards/rejected": -0.05287454277276993,
"step": 240
},
{
"epoch": 0.26219192448872575,
"grad_norm": 2.2846935841220364,
"learning_rate": 1.2649110640673518e-05,
"log_odds_chosen": 0.24984344840049744,
"log_odds_ratio": -0.6764962077140808,
"logits/chosen": -3.0678868293762207,
"logits/rejected": -3.0685126781463623,
"logps/chosen": -0.8884732127189636,
"logps/rejected": -1.025420904159546,
"loss": 0.5498,
"nll_loss": 0.5219429731369019,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.0444236658513546,
"rewards/margins": 0.006847388111054897,
"rewards/rejected": -0.05127105116844177,
"step": 250
},
{
"epoch": 0.2726796014682748,
"grad_norm": 2.3800633619201523,
"learning_rate": 1.2403473458920845e-05,
"log_odds_chosen": 0.2426706850528717,
"log_odds_ratio": -0.6691194772720337,
"logits/chosen": -3.0950028896331787,
"logits/rejected": -3.112684488296509,
"logps/chosen": -0.8879591822624207,
"logps/rejected": -1.042834997177124,
"loss": 0.5302,
"nll_loss": 0.45519179105758667,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.04439795762300491,
"rewards/margins": 0.00774379214271903,
"rewards/rejected": -0.05214175581932068,
"step": 260
},
{
"epoch": 0.2831672784478238,
"grad_norm": 2.3697586961370027,
"learning_rate": 1.2171612389003691e-05,
"log_odds_chosen": 0.23119862377643585,
"log_odds_ratio": -0.6756153702735901,
"logits/chosen": -3.113889455795288,
"logits/rejected": -3.157740354537964,
"logps/chosen": -0.9564247131347656,
"logps/rejected": -1.1352105140686035,
"loss": 0.5654,
"nll_loss": 0.5433498024940491,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.04782123863697052,
"rewards/margins": 0.008939290419220924,
"rewards/rejected": -0.056760527193546295,
"step": 270
},
{
"epoch": 0.29365495542737285,
"grad_norm": 1.9757109026566833,
"learning_rate": 1.1952286093343936e-05,
"log_odds_chosen": 0.25132113695144653,
"log_odds_ratio": -0.6663895845413208,
"logits/chosen": -3.1407101154327393,
"logits/rejected": -3.1832191944122314,
"logps/chosen": -0.9308640360832214,
"logps/rejected": -1.087449312210083,
"loss": 0.5429,
"nll_loss": 0.4785974621772766,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.04654319956898689,
"rewards/margins": 0.007829269394278526,
"rewards/rejected": -0.05437246710062027,
"step": 280
},
{
"epoch": 0.30414263240692185,
"grad_norm": 2.7308236297418427,
"learning_rate": 1.1744404390294071e-05,
"log_odds_chosen": 0.35913094878196716,
"log_odds_ratio": -0.6187662482261658,
"logits/chosen": -3.0944533348083496,
"logits/rejected": -3.1177055835723877,
"logps/chosen": -0.8355825543403625,
"logps/rejected": -1.0572632551193237,
"loss": 0.5568,
"nll_loss": 0.48925265669822693,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.041779130697250366,
"rewards/margins": 0.011084041558206081,
"rewards/rejected": -0.052863169461488724,
"step": 290
},
{
"epoch": 0.3146303093864709,
"grad_norm": 2.472653160364779,
"learning_rate": 1.1547005383792517e-05,
"log_odds_chosen": 0.2816540598869324,
"log_odds_ratio": -0.6775935888290405,
"logits/chosen": -3.092194080352783,
"logits/rejected": -3.1420485973358154,
"logps/chosen": -0.8778301477432251,
"logps/rejected": -1.0717580318450928,
"loss": 0.5819,
"nll_loss": 0.5100258588790894,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.04389150068163872,
"rewards/margins": 0.009696396067738533,
"rewards/rejected": -0.0535879023373127,
"step": 300
},
{
"epoch": 0.3146303093864709,
"eval_log_odds_chosen": 0.28298813104629517,
"eval_log_odds_ratio": -0.6463662981987,
"eval_logits/chosen": -3.1391000747680664,
"eval_logits/rejected": -3.1424100399017334,
"eval_logps/chosen": -0.8770027756690979,
"eval_logps/rejected": -1.0619502067565918,
"eval_loss": 0.5552015900611877,
"eval_nll_loss": 0.5201771259307861,
"eval_rewards/accuracies": 0.6289682388305664,
"eval_rewards/chosen": -0.043850142508745193,
"eval_rewards/margins": 0.00924737099558115,
"eval_rewards/rejected": -0.05309751257300377,
"eval_runtime": 141.1002,
"eval_samples_per_second": 14.132,
"eval_steps_per_second": 0.446,
"step": 300
},
{
"epoch": 0.3251179863660199,
"grad_norm": 2.038557141198459,
"learning_rate": 1.1359236684941297e-05,
"log_odds_chosen": 0.1998841017484665,
"log_odds_ratio": -0.6875525116920471,
"logits/chosen": -3.0676262378692627,
"logits/rejected": -3.07094407081604,
"logps/chosen": -0.9092122912406921,
"logps/rejected": -1.0280473232269287,
"loss": 0.5844,
"nll_loss": 0.5417822599411011,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.04546061158180237,
"rewards/margins": 0.005941747687757015,
"rewards/rejected": -0.051402367651462555,
"step": 310
},
{
"epoch": 0.33560566334556896,
"grad_norm": 2.262270965184679,
"learning_rate": 1.118033988749895e-05,
"log_odds_chosen": 0.2705835700035095,
"log_odds_ratio": -0.6538633108139038,
"logits/chosen": -3.127427339553833,
"logits/rejected": -3.142587661743164,
"logps/chosen": -0.9069059491157532,
"logps/rejected": -1.0691728591918945,
"loss": 0.5242,
"nll_loss": 0.4929099977016449,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.04534530267119408,
"rewards/margins": 0.008113345131278038,
"rewards/rejected": -0.05345864221453667,
"step": 320
},
{
"epoch": 0.34609334032511796,
"grad_norm": 2.4122464498293623,
"learning_rate": 1.1009637651263608e-05,
"log_odds_chosen": 0.23684370517730713,
"log_odds_ratio": -0.7030869722366333,
"logits/chosen": -3.0819878578186035,
"logits/rejected": -3.1327972412109375,
"logps/chosen": -0.9059860110282898,
"logps/rejected": -1.0601646900177002,
"loss": 0.5547,
"nll_loss": 0.5366790890693665,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.04529929906129837,
"rewards/margins": 0.007708935532718897,
"rewards/rejected": -0.05300822854042053,
"step": 330
},
{
"epoch": 0.356581017304667,
"grad_norm": 2.3793498474146535,
"learning_rate": 1.0846522890932809e-05,
"log_odds_chosen": 0.18786638975143433,
"log_odds_ratio": -0.6986292004585266,
"logits/chosen": -3.0940568447113037,
"logits/rejected": -3.1512954235076904,
"logps/chosen": -0.8602282404899597,
"logps/rejected": -0.9875131845474243,
"loss": 0.5702,
"nll_loss": 0.5145949125289917,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.043011412024497986,
"rewards/margins": 0.0063642458990216255,
"rewards/rejected": -0.049375660717487335,
"step": 340
},
{
"epoch": 0.36706869428421607,
"grad_norm": 2.3420960793915517,
"learning_rate": 1.0690449676496977e-05,
"log_odds_chosen": 0.2689460217952728,
"log_odds_ratio": -0.6845754384994507,
"logits/chosen": -3.1326746940612793,
"logits/rejected": -3.1552205085754395,
"logps/chosen": -0.8725005984306335,
"logps/rejected": -1.0421197414398193,
"loss": 0.5462,
"nll_loss": 0.5172144174575806,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.043625034391880035,
"rewards/margins": 0.008480949327349663,
"rewards/rejected": -0.05210598558187485,
"step": 350
},
{
"epoch": 0.37755637126376507,
"grad_norm": 2.014589871880686,
"learning_rate": 1.0540925533894598e-05,
"log_odds_chosen": 0.37792789936065674,
"log_odds_ratio": -0.6156649589538574,
"logits/chosen": -3.010802745819092,
"logits/rejected": -3.042652130126953,
"logps/chosen": -0.8830682635307312,
"logps/rejected": -1.118240237236023,
"loss": 0.5497,
"nll_loss": 0.5099813938140869,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.04415341466665268,
"rewards/margins": 0.011758595705032349,
"rewards/rejected": -0.05591200664639473,
"step": 360
},
{
"epoch": 0.3880440482433141,
"grad_norm": 2.0494786838330903,
"learning_rate": 1.0397504898200728e-05,
"log_odds_chosen": 0.37991228699684143,
"log_odds_ratio": -0.6151097416877747,
"logits/chosen": -3.071289539337158,
"logits/rejected": -3.0840888023376465,
"logps/chosen": -0.863991379737854,
"logps/rejected": -1.1161118745803833,
"loss": 0.5195,
"nll_loss": 0.4998775124549866,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.0431995615363121,
"rewards/margins": 0.012606029398739338,
"rewards/rejected": -0.055805593729019165,
"step": 370
},
{
"epoch": 0.3985317252228631,
"grad_norm": 2.3440751758332294,
"learning_rate": 1.0259783520851543e-05,
"log_odds_chosen": 0.4805373549461365,
"log_odds_ratio": -0.5845500230789185,
"logits/chosen": -3.1311728954315186,
"logits/rejected": -3.168400287628174,
"logps/chosen": -0.8546767234802246,
"logps/rejected": -1.1352304220199585,
"loss": 0.5371,
"nll_loss": 0.5167530179023743,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.04273384064435959,
"rewards/margins": 0.014027683064341545,
"rewards/rejected": -0.056761521846055984,
"step": 380
},
{
"epoch": 0.4090194022024122,
"grad_norm": 2.50155675830033,
"learning_rate": 1.0127393670836667e-05,
"log_odds_chosen": 0.0912429466843605,
"log_odds_ratio": -0.7177212238311768,
"logits/chosen": -3.1054975986480713,
"logits/rejected": -3.1308093070983887,
"logps/chosen": -0.9102872014045715,
"logps/rejected": -0.9754246473312378,
"loss": 0.5574,
"nll_loss": 0.5331951379776001,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.045514363795518875,
"rewards/margins": 0.003256872994825244,
"rewards/rejected": -0.04877123609185219,
"step": 390
},
{
"epoch": 0.4195070791819612,
"grad_norm": 2.027467517514936,
"learning_rate": 1e-05,
"log_odds_chosen": 0.2633103132247925,
"log_odds_ratio": -0.6879682540893555,
"logits/chosen": -3.0087058544158936,
"logits/rejected": -3.0386600494384766,
"logps/chosen": -0.9468951225280762,
"logps/rejected": -1.1236045360565186,
"loss": 0.5738,
"nll_loss": 0.527585506439209,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.04734475538134575,
"rewards/margins": 0.008835467509925365,
"rewards/rejected": -0.05618022754788399,
"step": 400
},
{
"epoch": 0.4195070791819612,
"eval_log_odds_chosen": 0.2960740923881531,
"eval_log_odds_ratio": -0.6521593332290649,
"eval_logits/chosen": -3.1019551753997803,
"eval_logits/rejected": -3.1026368141174316,
"eval_logps/chosen": -0.8433709740638733,
"eval_logps/rejected": -1.0346297025680542,
"eval_loss": 0.5411269664764404,
"eval_nll_loss": 0.5047088265419006,
"eval_rewards/accuracies": 0.6289682388305664,
"eval_rewards/chosen": -0.042168550193309784,
"eval_rewards/margins": 0.00956293661147356,
"eval_rewards/rejected": -0.05173148587346077,
"eval_runtime": 135.94,
"eval_samples_per_second": 14.668,
"eval_steps_per_second": 0.463,
"step": 400
},
{
"epoch": 0.4299947561615102,
"grad_norm": 2.077556227084633,
"learning_rate": 9.877295966495898e-06,
"log_odds_chosen": 0.1433972865343094,
"log_odds_ratio": -0.7417241930961609,
"logits/chosen": -3.147104024887085,
"logits/rejected": -3.1611135005950928,
"logps/chosen": -0.8865131139755249,
"logps/rejected": -0.9979325532913208,
"loss": 0.5454,
"nll_loss": 0.4825812876224518,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.044325657188892365,
"rewards/margins": 0.005570969078689814,
"rewards/rejected": -0.04989662766456604,
"step": 410
},
{
"epoch": 0.4404824331410593,
"grad_norm": 1.9177361456178337,
"learning_rate": 9.759000729485331e-06,
"log_odds_chosen": 0.2965100407600403,
"log_odds_ratio": -0.6552795171737671,
"logits/chosen": -3.065213203430176,
"logits/rejected": -3.106889247894287,
"logps/chosen": -0.8926699757575989,
"logps/rejected": -1.073974609375,
"loss": 0.5349,
"nll_loss": 0.47521886229515076,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.04463350027799606,
"rewards/margins": 0.009065226651728153,
"rewards/rejected": -0.05369872972369194,
"step": 420
},
{
"epoch": 0.4509701101206083,
"grad_norm": 2.2675621915351503,
"learning_rate": 9.644856443408244e-06,
"log_odds_chosen": 0.29174235463142395,
"log_odds_ratio": -0.6506129503250122,
"logits/chosen": -3.075723648071289,
"logits/rejected": -3.0862226486206055,
"logps/chosen": -0.8427901268005371,
"logps/rejected": -1.0184295177459717,
"loss": 0.5557,
"nll_loss": 0.5429800152778625,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.04213951155543327,
"rewards/margins": 0.008781969547271729,
"rewards/rejected": -0.050921481102705,
"step": 430
},
{
"epoch": 0.46145778710015734,
"grad_norm": 2.048479923586714,
"learning_rate": 9.534625892455923e-06,
"log_odds_chosen": 0.2715272009372711,
"log_odds_ratio": -0.6504871249198914,
"logits/chosen": -3.114889144897461,
"logits/rejected": -3.1430869102478027,
"logps/chosen": -0.8674638867378235,
"logps/rejected": -1.0402857065200806,
"loss": 0.5502,
"nll_loss": 0.5185979604721069,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.04337319731712341,
"rewards/margins": 0.008641095831990242,
"rewards/rejected": -0.05201428383588791,
"step": 440
},
{
"epoch": 0.47194546407970633,
"grad_norm": 1.9700303764265876,
"learning_rate": 9.428090415820635e-06,
"log_odds_chosen": 0.37898144125938416,
"log_odds_ratio": -0.6548101305961609,
"logits/chosen": -3.141404390335083,
"logits/rejected": -3.1785435676574707,
"logps/chosen": -0.8289934396743774,
"logps/rejected": -1.080649733543396,
"loss": 0.5278,
"nll_loss": 0.49574679136276245,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.04144967347383499,
"rewards/margins": 0.01258282084017992,
"rewards/rejected": -0.05403248593211174,
"step": 450
},
{
"epoch": 0.4824331410592554,
"grad_norm": 2.1444885294890796,
"learning_rate": 9.325048082403139e-06,
"log_odds_chosen": 0.21225424110889435,
"log_odds_ratio": -0.6999707221984863,
"logits/chosen": -3.110089063644409,
"logits/rejected": -3.1592323780059814,
"logps/chosen": -0.947162926197052,
"logps/rejected": -1.1105449199676514,
"loss": 0.5315,
"nll_loss": 0.5339683890342712,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.04735814779996872,
"rewards/margins": 0.008169097825884819,
"rewards/rejected": -0.05552724748849869,
"step": 460
},
{
"epoch": 0.4929208180388044,
"grad_norm": 2.1649660190560613,
"learning_rate": 9.225312080288851e-06,
"log_odds_chosen": 0.2549912929534912,
"log_odds_ratio": -0.6857655644416809,
"logits/chosen": -3.0928080081939697,
"logits/rejected": -3.1287431716918945,
"logps/chosen": -0.8865912556648254,
"logps/rejected": -1.050857663154602,
"loss": 0.5421,
"nll_loss": 0.5101572275161743,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.044329557567834854,
"rewards/margins": 0.008213317021727562,
"rewards/rejected": -0.052542876452207565,
"step": 470
},
{
"epoch": 0.5034084950183534,
"grad_norm": 1.89898044344756,
"learning_rate": 9.12870929175277e-06,
"log_odds_chosen": 0.18933558464050293,
"log_odds_ratio": -0.7031041383743286,
"logits/chosen": -3.1588873863220215,
"logits/rejected": -3.1968955993652344,
"logps/chosen": -0.8558489680290222,
"logps/rejected": -0.980047881603241,
"loss": 0.5174,
"nll_loss": 0.5121264457702637,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.04279245063662529,
"rewards/margins": 0.006209943443536758,
"rewards/rejected": -0.04900239408016205,
"step": 480
},
{
"epoch": 0.5138961719979025,
"grad_norm": 1.9212510076087481,
"learning_rate": 9.035079029052514e-06,
"log_odds_chosen": 0.23131313920021057,
"log_odds_ratio": -0.6693936586380005,
"logits/chosen": -3.094421625137329,
"logits/rejected": -3.1039950847625732,
"logps/chosen": -0.9284296035766602,
"logps/rejected": -1.0470894575119019,
"loss": 0.5391,
"nll_loss": 0.5019217729568481,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.04642148315906525,
"rewards/margins": 0.005932994186878204,
"rewards/rejected": -0.05235447734594345,
"step": 490
},
{
"epoch": 0.5243838489774515,
"grad_norm": 2.197524211966931,
"learning_rate": 8.94427190999916e-06,
"log_odds_chosen": 0.2233821153640747,
"log_odds_ratio": -0.6923887729644775,
"logits/chosen": -3.0647079944610596,
"logits/rejected": -3.0620505809783936,
"logps/chosen": -0.8755196332931519,
"logps/rejected": -1.0028659105300903,
"loss": 0.5478,
"nll_loss": 0.5219477415084839,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.04377598315477371,
"rewards/margins": 0.0063673085533082485,
"rewards/rejected": -0.0501432940363884,
"step": 500
},
{
"epoch": 0.5243838489774515,
"eval_log_odds_chosen": 0.33266139030456543,
"eval_log_odds_ratio": -0.6382430791854858,
"eval_logits/chosen": -3.028609275817871,
"eval_logits/rejected": -3.0259969234466553,
"eval_logps/chosen": -0.8414799571037292,
"eval_logps/rejected": -1.0509231090545654,
"eval_loss": 0.5319445133209229,
"eval_nll_loss": 0.49702468514442444,
"eval_rewards/accuracies": 0.6289682388305664,
"eval_rewards/chosen": -0.04207399860024452,
"eval_rewards/margins": 0.010472159832715988,
"eval_rewards/rejected": -0.05254615470767021,
"eval_runtime": 136.7326,
"eval_samples_per_second": 14.583,
"eval_steps_per_second": 0.461,
"step": 500
},
{
"epoch": 0.5348715259570005,
"grad_norm": 1.7639475332504142,
"learning_rate": 8.856148855400955e-06,
"log_odds_chosen": 0.29167047142982483,
"log_odds_ratio": -0.648201048374176,
"logits/chosen": -3.0114383697509766,
"logits/rejected": -3.024693250656128,
"logps/chosen": -0.841100811958313,
"logps/rejected": -1.0192333459854126,
"loss": 0.5263,
"nll_loss": 0.5350626111030579,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.04205504059791565,
"rewards/margins": 0.00890662893652916,
"rewards/rejected": -0.05096167325973511,
"step": 510
},
{
"epoch": 0.5453592029365496,
"grad_norm": 1.6884098835310988,
"learning_rate": 8.770580193070294e-06,
"log_odds_chosen": 0.24579331278800964,
"log_odds_ratio": -0.6814862489700317,
"logits/chosen": -3.016019582748413,
"logits/rejected": -3.0255684852600098,
"logps/chosen": -0.9082791209220886,
"logps/rejected": -1.0769283771514893,
"loss": 0.5369,
"nll_loss": 0.47502464056015015,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.04541395604610443,
"rewards/margins": 0.008432453498244286,
"rewards/rejected": -0.053846411406993866,
"step": 520
},
{
"epoch": 0.5558468799160986,
"grad_norm": 1.7588436164574766,
"learning_rate": 8.687444855261389e-06,
"log_odds_chosen": 0.39766445755958557,
"log_odds_ratio": -0.6521557569503784,
"logits/chosen": -3.0906691551208496,
"logits/rejected": -3.1090755462646484,
"logps/chosen": -0.8297191858291626,
"logps/rejected": -1.1049801111221313,
"loss": 0.5364,
"nll_loss": 0.450814813375473,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.04148596152663231,
"rewards/margins": 0.01376304216682911,
"rewards/rejected": -0.05524900555610657,
"step": 530
},
{
"epoch": 0.5663345568956476,
"grad_norm": 1.9397603724841295,
"learning_rate": 8.606629658238705e-06,
"log_odds_chosen": 0.15624158084392548,
"log_odds_ratio": -0.7059566378593445,
"logits/chosen": -3.0063095092773438,
"logits/rejected": -3.0354349613189697,
"logps/chosen": -0.8621616363525391,
"logps/rejected": -0.9609626531600952,
"loss": 0.5526,
"nll_loss": 0.5280291438102722,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.04310808330774307,
"rewards/margins": 0.0049400487914681435,
"rewards/rejected": -0.04804813116788864,
"step": 540
},
{
"epoch": 0.5768222338751966,
"grad_norm": 1.9970251061131588,
"learning_rate": 8.528028654224417e-06,
"log_odds_chosen": 0.3964000940322876,
"log_odds_ratio": -0.6276581883430481,
"logits/chosen": -3.051056385040283,
"logits/rejected": -3.0628600120544434,
"logps/chosen": -0.8477095365524292,
"logps/rejected": -1.090545415878296,
"loss": 0.5377,
"nll_loss": 0.5382589101791382,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.04238547384738922,
"rewards/margins": 0.012141798622906208,
"rewards/rejected": -0.05452727526426315,
"step": 550
},
{
"epoch": 0.5873099108547457,
"grad_norm": 1.9451374983545444,
"learning_rate": 8.451542547285167e-06,
"log_odds_chosen": 0.24946291744709015,
"log_odds_ratio": -0.6731950044631958,
"logits/chosen": -3.09270977973938,
"logits/rejected": -3.1291451454162598,
"logps/chosen": -0.8785122632980347,
"logps/rejected": -1.0384708642959595,
"loss": 0.5214,
"nll_loss": 0.5020500421524048,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.04392561689019203,
"rewards/margins": 0.007997924461960793,
"rewards/rejected": -0.05192355066537857,
"step": 560
},
{
"epoch": 0.5977975878342947,
"grad_norm": 2.015759366014609,
"learning_rate": 8.37707816583391e-06,
"log_odds_chosen": 0.1689465194940567,
"log_odds_ratio": -0.7204016447067261,
"logits/chosen": -3.082165241241455,
"logits/rejected": -3.113685369491577,
"logps/chosen": -0.8903343081474304,
"logps/rejected": -1.0027625560760498,
"loss": 0.5039,
"nll_loss": 0.5279403924942017,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.04451671987771988,
"rewards/margins": 0.0056214118376374245,
"rewards/rejected": -0.05013813450932503,
"step": 570
},
{
"epoch": 0.6082852648138437,
"grad_norm": 1.8532059123988396,
"learning_rate": 8.304547985373997e-06,
"log_odds_chosen": 0.27719905972480774,
"log_odds_ratio": -0.6604655385017395,
"logits/chosen": -3.164926528930664,
"logits/rejected": -3.1809298992156982,
"logps/chosen": -0.8681858777999878,
"logps/rejected": -1.0584015846252441,
"loss": 0.5449,
"nll_loss": 0.48173967003822327,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.04340929910540581,
"rewards/margins": 0.009510790929198265,
"rewards/rejected": -0.052920084446668625,
"step": 580
},
{
"epoch": 0.6187729417933928,
"grad_norm": 1.9696416884513863,
"learning_rate": 8.233869695926184e-06,
"log_odds_chosen": 0.3565579056739807,
"log_odds_ratio": -0.6653521656990051,
"logits/chosen": -3.1371326446533203,
"logits/rejected": -3.1804890632629395,
"logps/chosen": -0.8285515904426575,
"logps/rejected": -1.060605764389038,
"loss": 0.5115,
"nll_loss": 0.5481864213943481,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.04142758250236511,
"rewards/margins": 0.011602701619267464,
"rewards/rejected": -0.05303028225898743,
"step": 590
},
{
"epoch": 0.6292606187729418,
"grad_norm": 2.0728707870222607,
"learning_rate": 8.164965809277262e-06,
"log_odds_chosen": 0.3636320233345032,
"log_odds_ratio": -0.6437779664993286,
"logits/chosen": -3.155708074569702,
"logits/rejected": -3.155524492263794,
"logps/chosen": -0.8240157961845398,
"logps/rejected": -1.06477952003479,
"loss": 0.5146,
"nll_loss": 0.4843020439147949,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.04120079427957535,
"rewards/margins": 0.012038188055157661,
"rewards/rejected": -0.05323898047208786,
"step": 600
},
{
"epoch": 0.6292606187729418,
"eval_log_odds_chosen": 0.312126487493515,
"eval_log_odds_ratio": -0.6417948603630066,
"eval_logits/chosen": -3.127530336380005,
"eval_logits/rejected": -3.1324751377105713,
"eval_logps/chosen": -0.8164808750152588,
"eval_logps/rejected": -1.016471028327942,
"eval_loss": 0.5239931344985962,
"eval_nll_loss": 0.4882962703704834,
"eval_rewards/accuracies": 0.6230158805847168,
"eval_rewards/chosen": -0.0408240407705307,
"eval_rewards/margins": 0.00999950896948576,
"eval_rewards/rejected": -0.050823554396629333,
"eval_runtime": 137.2676,
"eval_samples_per_second": 14.526,
"eval_steps_per_second": 0.459,
"step": 600
},
{
"epoch": 0.6397482957524908,
"grad_norm": 2.2204480702078246,
"learning_rate": 8.097763301789162e-06,
"log_odds_chosen": 0.1712610125541687,
"log_odds_ratio": -0.705093502998352,
"logits/chosen": -3.0651237964630127,
"logits/rejected": -3.0982956886291504,
"logps/chosen": -0.8816771507263184,
"logps/rejected": -0.989287257194519,
"loss": 0.526,
"nll_loss": 0.48726779222488403,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.044083863496780396,
"rewards/margins": 0.0053805033676326275,
"rewards/rejected": -0.04946436733007431,
"step": 610
},
{
"epoch": 0.6502359727320398,
"grad_norm": 2.0795066851294,
"learning_rate": 8.03219328902499e-06,
"log_odds_chosen": 0.18011939525604248,
"log_odds_ratio": -0.7075856328010559,
"logits/chosen": -3.093158721923828,
"logits/rejected": -3.1170780658721924,
"logps/chosen": -0.8789434432983398,
"logps/rejected": -1.0122572183609009,
"loss": 0.5293,
"nll_loss": 0.5134457945823669,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.043947167694568634,
"rewards/margins": 0.006665694061666727,
"rewards/rejected": -0.050612859427928925,
"step": 620
},
{
"epoch": 0.6607236497115889,
"grad_norm": 2.0001788984831514,
"learning_rate": 7.968190728895958e-06,
"log_odds_chosen": 0.2610745429992676,
"log_odds_ratio": -0.6974207758903503,
"logits/chosen": -3.0472846031188965,
"logits/rejected": -3.0721120834350586,
"logps/chosen": -0.8566058874130249,
"logps/rejected": -1.0223418474197388,
"loss": 0.5372,
"nll_loss": 0.5244878530502319,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.042830295860767365,
"rewards/margins": 0.00828679371625185,
"rewards/rejected": -0.05111708492040634,
"step": 630
},
{
"epoch": 0.6712113266911379,
"grad_norm": 2.3414302184737332,
"learning_rate": 7.905694150420949e-06,
"log_odds_chosen": 0.30453813076019287,
"log_odds_ratio": -0.6686201095581055,
"logits/chosen": -3.0571064949035645,
"logits/rejected": -3.079134464263916,
"logps/chosen": -0.8609515428543091,
"logps/rejected": -1.0473490953445435,
"loss": 0.5151,
"nll_loss": 0.46057072281837463,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.04304756969213486,
"rewards/margins": 0.009319878183305264,
"rewards/rejected": -0.05236745625734329,
"step": 640
},
{
"epoch": 0.6816990036706869,
"grad_norm": 1.9074311662484937,
"learning_rate": 7.844645405527363e-06,
"log_odds_chosen": 0.21438069641590118,
"log_odds_ratio": -0.7022002935409546,
"logits/chosen": -3.058842897415161,
"logits/rejected": -3.0864357948303223,
"logps/chosen": -0.8311389684677124,
"logps/rejected": -0.9654434323310852,
"loss": 0.5332,
"nll_loss": 0.5123748183250427,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.0415569506585598,
"rewards/margins": 0.006715219467878342,
"rewards/rejected": -0.04827217012643814,
"step": 650
},
{
"epoch": 0.6921866806502359,
"grad_norm": 1.9616180703535884,
"learning_rate": 7.78498944161523e-06,
"log_odds_chosen": 0.3507782816886902,
"log_odds_ratio": -0.641882061958313,
"logits/chosen": -3.0647902488708496,
"logits/rejected": -3.1045496463775635,
"logps/chosen": -0.8823181390762329,
"logps/rejected": -1.1245914697647095,
"loss": 0.5293,
"nll_loss": 0.48711147904396057,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.044115908443927765,
"rewards/margins": 0.012113666161894798,
"rewards/rejected": -0.05622958019375801,
"step": 660
},
{
"epoch": 0.702674357629785,
"grad_norm": 2.2401170633783427,
"learning_rate": 7.726674092862559e-06,
"log_odds_chosen": 0.4617346227169037,
"log_odds_ratio": -0.627942681312561,
"logits/chosen": -3.0200469493865967,
"logits/rejected": -3.0557796955108643,
"logps/chosen": -0.8328607678413391,
"logps/rejected": -1.140726923942566,
"loss": 0.5237,
"nll_loss": 0.46908053755760193,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.041643042117357254,
"rewards/margins": 0.015393314883112907,
"rewards/rejected": -0.057036347687244415,
"step": 670
},
{
"epoch": 0.713162034609334,
"grad_norm": 2.00824540701018,
"learning_rate": 7.669649888473705e-06,
"log_odds_chosen": 0.36505717039108276,
"log_odds_ratio": -0.6428455114364624,
"logits/chosen": -3.0360779762268066,
"logits/rejected": -3.044907808303833,
"logps/chosen": -0.8793157339096069,
"logps/rejected": -1.1065771579742432,
"loss": 0.5083,
"nll_loss": 0.4951552450656891,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.04396578669548035,
"rewards/margins": 0.0113630760461092,
"rewards/rejected": -0.0553288571536541,
"step": 680
},
{
"epoch": 0.723649711588883,
"grad_norm": 1.8606652251395144,
"learning_rate": 7.61386987626881e-06,
"log_odds_chosen": 0.2045813500881195,
"log_odds_ratio": -0.7114613056182861,
"logits/chosen": -3.036839723587036,
"logits/rejected": -3.0589654445648193,
"logps/chosen": -0.8661033511161804,
"logps/rejected": -1.014004111289978,
"loss": 0.5313,
"nll_loss": 0.5510386824607849,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.0433051735162735,
"rewards/margins": 0.007395035121589899,
"rewards/rejected": -0.05070021003484726,
"step": 690
},
{
"epoch": 0.7341373885684321,
"grad_norm": 2.2895278902082747,
"learning_rate": 7.559289460184545e-06,
"log_odds_chosen": 0.34833860397338867,
"log_odds_ratio": -0.6269202828407288,
"logits/chosen": -3.0252926349639893,
"logits/rejected": -3.068871021270752,
"logps/chosen": -0.8163930177688599,
"logps/rejected": -1.0459128618240356,
"loss": 0.5298,
"nll_loss": 0.5428040623664856,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.04081965237855911,
"rewards/margins": 0.01147598959505558,
"rewards/rejected": -0.05229564383625984,
"step": 700
},
{
"epoch": 0.7341373885684321,
"eval_log_odds_chosen": 0.3869401812553406,
"eval_log_odds_ratio": -0.6218506097793579,
"eval_logits/chosen": -3.0754599571228027,
"eval_logits/rejected": -3.076083183288574,
"eval_logps/chosen": -0.8267216682434082,
"eval_logps/rejected": -1.0827099084854126,
"eval_loss": 0.5187779068946838,
"eval_nll_loss": 0.4841572344303131,
"eval_rewards/accuracies": 0.6428571343421936,
"eval_rewards/chosen": -0.04133608192205429,
"eval_rewards/margins": 0.012799412943422794,
"eval_rewards/rejected": -0.05413549765944481,
"eval_runtime": 137.1864,
"eval_samples_per_second": 14.535,
"eval_steps_per_second": 0.459,
"step": 700
},
{
"epoch": 0.7446250655479811,
"grad_norm": 1.958829045282282,
"learning_rate": 7.505866250408016e-06,
"log_odds_chosen": 0.2794094383716583,
"log_odds_ratio": -0.6572638750076294,
"logits/chosen": -3.1184074878692627,
"logits/rejected": -3.1369974613189697,
"logps/chosen": -0.8444921374320984,
"logps/rejected": -1.0439577102661133,
"loss": 0.5242,
"nll_loss": 0.47964978218078613,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.04222460836172104,
"rewards/margins": 0.00997327920049429,
"rewards/rejected": -0.052197881042957306,
"step": 710
},
{
"epoch": 0.7551127425275301,
"grad_norm": 1.8049248182957538,
"learning_rate": 7.4535599249993e-06,
"log_odds_chosen": 0.36963027715682983,
"log_odds_ratio": -0.6443501710891724,
"logits/chosen": -3.075653076171875,
"logits/rejected": -3.0980098247528076,
"logps/chosen": -0.7987631559371948,
"logps/rejected": -1.03029465675354,
"loss": 0.5308,
"nll_loss": 0.4633590281009674,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.03993815928697586,
"rewards/margins": 0.011576572433114052,
"rewards/rejected": -0.051514726132154465,
"step": 720
},
{
"epoch": 0.7656004195070791,
"grad_norm": 2.1907119668628807,
"learning_rate": 7.402332101976053e-06,
"log_odds_chosen": 0.1018507108092308,
"log_odds_ratio": -0.7229408621788025,
"logits/chosen": -3.084719181060791,
"logits/rejected": -3.0846333503723145,
"logps/chosen": -0.8332414627075195,
"logps/rejected": -0.8869687914848328,
"loss": 0.5377,
"nll_loss": 0.5031158328056335,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.041662074625492096,
"rewards/margins": 0.00268636760301888,
"rewards/rejected": -0.04434844106435776,
"step": 730
},
{
"epoch": 0.7760880964866282,
"grad_norm": 2.050092986168091,
"learning_rate": 7.352146220938079e-06,
"log_odds_chosen": 0.3393878936767578,
"log_odds_ratio": -0.6246740221977234,
"logits/chosen": -3.119809627532959,
"logits/rejected": -3.132826328277588,
"logps/chosen": -0.804786205291748,
"logps/rejected": -1.0171911716461182,
"loss": 0.5308,
"nll_loss": 0.4794273376464844,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.040239304304122925,
"rewards/margins": 0.010620243847370148,
"rewards/rejected": -0.05085955187678337,
"step": 740
},
{
"epoch": 0.7865757734661772,
"grad_norm": 2.0193892114327556,
"learning_rate": 7.3029674334022146e-06,
"log_odds_chosen": 0.2425309419631958,
"log_odds_ratio": -0.6716917753219604,
"logits/chosen": -3.093583106994629,
"logits/rejected": -3.114816188812256,
"logps/chosen": -0.8740803599357605,
"logps/rejected": -1.0157320499420166,
"loss": 0.5427,
"nll_loss": 0.4982066750526428,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.04370402172207832,
"rewards/margins": 0.007082589901983738,
"rewards/rejected": -0.05078660696744919,
"step": 750
},
{
"epoch": 0.7970634504457262,
"grad_norm": 1.891204637475333,
"learning_rate": 7.254762501100117e-06,
"log_odds_chosen": 0.2664291262626648,
"log_odds_ratio": -0.6672528386116028,
"logits/chosen": -3.0630593299865723,
"logits/rejected": -3.0695788860321045,
"logps/chosen": -0.8163594007492065,
"logps/rejected": -0.993925929069519,
"loss": 0.5114,
"nll_loss": 0.40486717224121094,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.040817975997924805,
"rewards/margins": 0.00887832697480917,
"rewards/rejected": -0.049696292728185654,
"step": 760
},
{
"epoch": 0.8075511274252754,
"grad_norm": 2.0675479903273914,
"learning_rate": 7.207499701564472e-06,
"log_odds_chosen": 0.23201966285705566,
"log_odds_ratio": -0.6995107531547546,
"logits/chosen": -3.027050018310547,
"logits/rejected": -3.0489039421081543,
"logps/chosen": -0.8810374140739441,
"logps/rejected": -1.0541043281555176,
"loss": 0.5343,
"nll_loss": 0.5017890334129333,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.0440518744289875,
"rewards/margins": 0.008653342723846436,
"rewards/rejected": -0.05270521715283394,
"step": 770
},
{
"epoch": 0.8180388044048243,
"grad_norm": 1.9571785710156353,
"learning_rate": 7.1611487403943295e-06,
"log_odds_chosen": 0.23842506110668182,
"log_odds_ratio": -0.672247052192688,
"logits/chosen": -3.062586545944214,
"logits/rejected": -3.0935113430023193,
"logps/chosen": -0.8818261027336121,
"logps/rejected": -1.0167505741119385,
"loss": 0.5467,
"nll_loss": 0.5480509996414185,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.04409131035208702,
"rewards/margins": 0.006746229715645313,
"rewards/rejected": -0.05083753541111946,
"step": 780
},
{
"epoch": 0.8285264813843733,
"grad_norm": 1.8565884413084413,
"learning_rate": 7.115680669648201e-06,
"log_odds_chosen": 0.32895228266716003,
"log_odds_ratio": -0.6478875875473022,
"logits/chosen": -3.1025116443634033,
"logits/rejected": -3.1219050884246826,
"logps/chosen": -0.8189374804496765,
"logps/rejected": -1.0338833332061768,
"loss": 0.5049,
"nll_loss": 0.44281667470932007,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.040946874767541885,
"rewards/margins": 0.010747292079031467,
"rewards/rejected": -0.05169416218996048,
"step": 790
},
{
"epoch": 0.8390141583639223,
"grad_norm": 2.106485781152954,
"learning_rate": 7.0710678118654756e-06,
"log_odds_chosen": 0.4608131945133209,
"log_odds_ratio": -0.5961465835571289,
"logits/chosen": -3.092484951019287,
"logits/rejected": -3.090536117553711,
"logps/chosen": -0.7798897624015808,
"logps/rejected": -1.0744028091430664,
"loss": 0.5181,
"nll_loss": 0.4202440679073334,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.03899449110031128,
"rewards/margins": 0.014725650660693645,
"rewards/rejected": -0.0537201389670372,
"step": 800
},
{
"epoch": 0.8390141583639223,
"eval_log_odds_chosen": 0.35056135058403015,
"eval_log_odds_ratio": -0.6322371363639832,
"eval_logits/chosen": -3.139373302459717,
"eval_logits/rejected": -3.1382317543029785,
"eval_logps/chosen": -0.8198128342628479,
"eval_logps/rejected": -1.0474979877471924,
"eval_loss": 0.5140993595123291,
"eval_nll_loss": 0.4803001582622528,
"eval_rewards/accuracies": 0.6329365372657776,
"eval_rewards/chosen": -0.040990639477968216,
"eval_rewards/margins": 0.011384249664843082,
"eval_rewards/rejected": -0.05237489193677902,
"eval_runtime": 136.2293,
"eval_samples_per_second": 14.637,
"eval_steps_per_second": 0.462,
"step": 800
},
{
"epoch": 0.8495018353434715,
"grad_norm": 1.919736952774634,
"learning_rate": 7.027283689263066e-06,
"log_odds_chosen": 0.3574589788913727,
"log_odds_ratio": -0.6265517473220825,
"logits/chosen": -3.0922906398773193,
"logits/rejected": -3.093270778656006,
"logps/chosen": -0.8058309555053711,
"logps/rejected": -1.0188381671905518,
"loss": 0.5132,
"nll_loss": 0.4754185676574707,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.04029155150055885,
"rewards/margins": 0.010650361888110638,
"rewards/rejected": -0.050941914319992065,
"step": 810
},
{
"epoch": 0.8599895123230205,
"grad_norm": 2.3619475771455214,
"learning_rate": 6.984302957695783e-06,
"log_odds_chosen": 0.2932414412498474,
"log_odds_ratio": -0.6586158275604248,
"logits/chosen": -3.0357770919799805,
"logits/rejected": -3.0360379219055176,
"logps/chosen": -0.842557430267334,
"logps/rejected": -1.0188366174697876,
"loss": 0.505,
"nll_loss": 0.4280059337615967,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.04212787002325058,
"rewards/margins": 0.008813952095806599,
"rewards/rejected": -0.0509418249130249,
"step": 820
},
{
"epoch": 0.8704771893025695,
"grad_norm": 2.3824306185771267,
"learning_rate": 6.942101345006233e-06,
"log_odds_chosen": 0.2479257881641388,
"log_odds_ratio": -0.702430248260498,
"logits/chosen": -3.008411407470703,
"logits/rejected": -3.05663800239563,
"logps/chosen": -0.853378415107727,
"logps/rejected": -1.0239073038101196,
"loss": 0.5248,
"nll_loss": 0.4657117426395416,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.04266892373561859,
"rewards/margins": 0.00852644257247448,
"rewards/rejected": -0.05119536444544792,
"step": 830
},
{
"epoch": 0.8809648662821186,
"grad_norm": 1.9624325890421999,
"learning_rate": 6.900655593423542e-06,
"log_odds_chosen": 0.2082471400499344,
"log_odds_ratio": -0.6889498233795166,
"logits/chosen": -3.040546178817749,
"logits/rejected": -3.0660147666931152,
"logps/chosen": -0.8756462931632996,
"logps/rejected": -1.0124717950820923,
"loss": 0.5137,
"nll_loss": 0.4855361580848694,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.043782319873571396,
"rewards/margins": 0.006841268390417099,
"rewards/rejected": -0.050623588263988495,
"step": 840
},
{
"epoch": 0.8914525432616676,
"grad_norm": 2.0144554917595756,
"learning_rate": 6.859943405700353e-06,
"log_odds_chosen": 0.3205421566963196,
"log_odds_ratio": -0.6371484994888306,
"logits/chosen": -3.054384231567383,
"logits/rejected": -3.0986409187316895,
"logps/chosen": -0.8319618105888367,
"logps/rejected": -1.0313116312026978,
"loss": 0.5044,
"nll_loss": 0.4881317615509033,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.041598085314035416,
"rewards/margins": 0.009967491030693054,
"rewards/rejected": -0.05156558007001877,
"step": 850
},
{
"epoch": 0.9019402202412166,
"grad_norm": 1.9341957217840544,
"learning_rate": 6.819943394704736e-06,
"log_odds_chosen": 0.26728707551956177,
"log_odds_ratio": -0.6747015714645386,
"logits/chosen": -3.0936527252197266,
"logits/rejected": -3.1073575019836426,
"logps/chosen": -0.8353049159049988,
"logps/rejected": -1.0224361419677734,
"loss": 0.5278,
"nll_loss": 0.4731883108615875,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.0417652502655983,
"rewards/margins": 0.009356559254229069,
"rewards/rejected": -0.05112180858850479,
"step": 860
},
{
"epoch": 0.9124278972207656,
"grad_norm": 5.30319924106792,
"learning_rate": 6.780635036208105e-06,
"log_odds_chosen": 0.30106544494628906,
"log_odds_ratio": -0.6683878898620605,
"logits/chosen": -3.097151279449463,
"logits/rejected": -3.1499500274658203,
"logps/chosen": -0.867012619972229,
"logps/rejected": -1.0790386199951172,
"loss": 0.4933,
"nll_loss": 0.48347124457359314,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.04335063695907593,
"rewards/margins": 0.010601297952234745,
"rewards/rejected": -0.0539519302546978,
"step": 870
},
{
"epoch": 0.9229155742003147,
"grad_norm": 1.6208302885778367,
"learning_rate": 6.741998624632421e-06,
"log_odds_chosen": 0.29186171293258667,
"log_odds_ratio": -0.6591932773590088,
"logits/chosen": -3.15583872795105,
"logits/rejected": -3.168064594268799,
"logps/chosen": -0.8187226057052612,
"logps/rejected": -1.0049909353256226,
"loss": 0.4887,
"nll_loss": 0.4384452700614929,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.04093613475561142,
"rewards/margins": 0.009313413873314857,
"rewards/rejected": -0.05024954676628113,
"step": 880
},
{
"epoch": 0.9334032511798637,
"grad_norm": 1.7707391073712173,
"learning_rate": 6.70401523153991e-06,
"log_odds_chosen": 0.33703380823135376,
"log_odds_ratio": -0.6459982991218567,
"logits/chosen": -3.1340742111206055,
"logits/rejected": -3.157071590423584,
"logps/chosen": -0.8063561320304871,
"logps/rejected": -0.9982324838638306,
"loss": 0.4931,
"nll_loss": 0.4631246030330658,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.040317803621292114,
"rewards/margins": 0.009593818336725235,
"rewards/rejected": -0.04991162568330765,
"step": 890
},
{
"epoch": 0.9438909281594127,
"grad_norm": 2.341682439233393,
"learning_rate": 6.666666666666667e-06,
"log_odds_chosen": 0.26426905393600464,
"log_odds_ratio": -0.6637164354324341,
"logits/chosen": -3.1100411415100098,
"logits/rejected": -3.130826473236084,
"logps/chosen": -0.7806347012519836,
"logps/rejected": -0.9385608434677124,
"loss": 0.5239,
"nll_loss": 0.4659123420715332,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.039031732827425,
"rewards/margins": 0.00789631437510252,
"rewards/rejected": -0.0469280444085598,
"step": 900
},
{
"epoch": 0.9438909281594127,
"eval_log_odds_chosen": 0.32679569721221924,
"eval_log_odds_ratio": -0.6327584385871887,
"eval_logits/chosen": -3.117077112197876,
"eval_logits/rejected": -3.119086742401123,
"eval_logps/chosen": -0.8044511079788208,
"eval_logps/rejected": -1.0129274129867554,
"eval_loss": 0.5086367726325989,
"eval_nll_loss": 0.4747697710990906,
"eval_rewards/accuracies": 0.6309523582458496,
"eval_rewards/chosen": -0.04022255912423134,
"eval_rewards/margins": 0.010423817671835423,
"eval_rewards/rejected": -0.05064636468887329,
"eval_runtime": 137.5576,
"eval_samples_per_second": 14.496,
"eval_steps_per_second": 0.458,
"step": 900
},
{
"epoch": 0.9543786051389617,
"grad_norm": 2.0533389896159213,
"learning_rate": 6.629935441317959e-06,
"log_odds_chosen": 0.4754648208618164,
"log_odds_ratio": -0.6232188940048218,
"logits/chosen": -3.073176622390747,
"logits/rejected": -3.084963321685791,
"logps/chosen": -0.828788161277771,
"logps/rejected": -1.1443804502487183,
"loss": 0.5142,
"nll_loss": 0.46652156114578247,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.04143941029906273,
"rewards/margins": 0.015779614448547363,
"rewards/rejected": -0.05721902847290039,
"step": 910
},
{
"epoch": 0.9648662821185108,
"grad_norm": 2.138448059862142,
"learning_rate": 6.593804733957872e-06,
"log_odds_chosen": 0.32768282294273376,
"log_odds_ratio": -0.6431117057800293,
"logits/chosen": -3.038576364517212,
"logits/rejected": -3.061370372772217,
"logps/chosen": -0.7864677906036377,
"logps/rejected": -0.9946994781494141,
"loss": 0.4836,
"nll_loss": 0.43025264143943787,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.03932339325547218,
"rewards/margins": 0.010411588475108147,
"rewards/rejected": -0.04973498359322548,
"step": 920
},
{
"epoch": 0.9753539590980598,
"grad_norm": 2.1602863053901413,
"learning_rate": 6.55825835783953e-06,
"log_odds_chosen": 0.2050061970949173,
"log_odds_ratio": -0.6868597269058228,
"logits/chosen": -3.0544333457946777,
"logits/rejected": -3.066739797592163,
"logps/chosen": -0.8742432594299316,
"logps/rejected": -1.0194706916809082,
"loss": 0.5136,
"nll_loss": 0.5241981744766235,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.04371216148138046,
"rewards/margins": 0.007261371705681086,
"rewards/rejected": -0.05097353458404541,
"step": 930
},
{
"epoch": 0.9858416360776088,
"grad_norm": 1.9215491222233851,
"learning_rate": 6.523280730534423e-06,
"log_odds_chosen": 0.23041269183158875,
"log_odds_ratio": -0.6992384195327759,
"logits/chosen": -3.0867247581481934,
"logits/rejected": -3.0779662132263184,
"logps/chosen": -0.7768861651420593,
"logps/rejected": -0.9184977412223816,
"loss": 0.5102,
"nll_loss": 0.4776674211025238,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.038844309747219086,
"rewards/margins": 0.0070805782452225685,
"rewards/rejected": -0.045924894511699677,
"step": 940
},
{
"epoch": 0.9963293130571579,
"grad_norm": 2.1983436102574547,
"learning_rate": 6.488856845230502e-06,
"log_odds_chosen": 0.25244003534317017,
"log_odds_ratio": -0.6911928653717041,
"logits/chosen": -3.0215468406677246,
"logits/rejected": -3.0374438762664795,
"logps/chosen": -0.8648554682731628,
"logps/rejected": -1.0236364603042603,
"loss": 0.5385,
"nll_loss": 0.5036488175392151,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.04324277862906456,
"rewards/margins": 0.00793905183672905,
"rewards/rejected": -0.05118182301521301,
"step": 950
},
{
"epoch": 1.0068169900367068,
"grad_norm": 2.2724469008271773,
"learning_rate": 6.4549722436790284e-06,
"log_odds_chosen": 1.0400245189666748,
"log_odds_ratio": -0.42517581582069397,
"logits/chosen": -3.0371384620666504,
"logits/rejected": -3.0435400009155273,
"logps/chosen": -0.5974615812301636,
"logps/rejected": -1.1842448711395264,
"loss": 0.3929,
"nll_loss": 0.40045398473739624,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": -0.02987307868897915,
"rewards/margins": 0.02933916449546814,
"rewards/rejected": -0.05921224504709244,
"step": 960
},
{
"epoch": 1.017304667016256,
"grad_norm": 2.0168885022396372,
"learning_rate": 6.421612990679356e-06,
"log_odds_chosen": 1.6284434795379639,
"log_odds_ratio": -0.2502659857273102,
"logits/chosen": -3.080873727798462,
"logits/rejected": -3.070159912109375,
"logps/chosen": -0.4285094141960144,
"logps/rejected": -1.2745321989059448,
"loss": 0.2923,
"nll_loss": 0.28497669100761414,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.02142546884715557,
"rewards/margins": 0.04230114072561264,
"rewards/rejected": -0.06372661143541336,
"step": 970
},
{
"epoch": 1.027792343995805,
"grad_norm": 1.9662869053425782,
"learning_rate": 6.3887656499994e-06,
"log_odds_chosen": 1.8482691049575806,
"log_odds_ratio": -0.21383436024188995,
"logits/chosen": -3.071471929550171,
"logits/rejected": -3.079923391342163,
"logps/chosen": -0.43078216910362244,
"logps/rejected": -1.4107215404510498,
"loss": 0.3019,
"nll_loss": 0.3140898644924164,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.021539105102419853,
"rewards/margins": 0.04899696633219719,
"rewards/rejected": -0.0705360695719719,
"step": 980
},
{
"epoch": 1.038280020975354,
"grad_norm": 1.9845582869348006,
"learning_rate": 6.356417261637282e-06,
"log_odds_chosen": 1.6627075672149658,
"log_odds_ratio": -0.2610566318035126,
"logits/chosen": -2.9875268936157227,
"logits/rejected": -2.9876785278320312,
"logps/chosen": -0.4378105103969574,
"logps/rejected": -1.3178083896636963,
"loss": 0.296,
"nll_loss": 0.27773916721343994,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.02189052477478981,
"rewards/margins": 0.04399988800287247,
"rewards/rejected": -0.06589041650295258,
"step": 990
},
{
"epoch": 1.048767697954903,
"grad_norm": 2.0942478813902783,
"learning_rate": 6.324555320336759e-06,
"log_odds_chosen": 1.9041988849639893,
"log_odds_ratio": -0.20684988796710968,
"logits/chosen": -2.9869093894958496,
"logits/rejected": -3.029050588607788,
"logps/chosen": -0.4077525734901428,
"logps/rejected": -1.3952513933181763,
"loss": 0.2888,
"nll_loss": 0.2748258709907532,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.02038763090968132,
"rewards/margins": 0.04937494546175003,
"rewards/rejected": -0.06976256519556046,
"step": 1000
},
{
"epoch": 1.048767697954903,
"eval_log_odds_chosen": 0.37935417890548706,
"eval_log_odds_ratio": -0.6318228840827942,
"eval_logits/chosen": -3.0189764499664307,
"eval_logits/rejected": -3.0171284675598145,
"eval_logps/chosen": -0.8724088072776794,
"eval_logps/rejected": -1.112794280052185,
"eval_loss": 0.5400179028511047,
"eval_nll_loss": 0.5058131814002991,
"eval_rewards/accuracies": 0.6428571343421936,
"eval_rewards/chosen": -0.04362044483423233,
"eval_rewards/margins": 0.012019270099699497,
"eval_rewards/rejected": -0.05563971400260925,
"eval_runtime": 136.9938,
"eval_samples_per_second": 14.555,
"eval_steps_per_second": 0.46,
"step": 1000
},
{
"epoch": 1.059255374934452,
"grad_norm": 1.8526210480251912,
"learning_rate": 6.2931677552755265e-06,
"log_odds_chosen": 1.7620799541473389,
"log_odds_ratio": -0.23190836608409882,
"logits/chosen": -3.0539023876190186,
"logits/rejected": -3.0629706382751465,
"logps/chosen": -0.43785715103149414,
"logps/rejected": -1.3722269535064697,
"loss": 0.2859,
"nll_loss": 0.2769049108028412,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.021892856806516647,
"rewards/margins": 0.04671848937869072,
"rewards/rejected": -0.06861135363578796,
"step": 1010
},
{
"epoch": 1.069743051914001,
"grad_norm": 2.017775428059147,
"learning_rate": 6.262242910851496e-06,
"log_odds_chosen": 1.7232574224472046,
"log_odds_ratio": -0.22979629039764404,
"logits/chosen": -3.0019690990448,
"logits/rejected": -3.0224807262420654,
"logps/chosen": -0.4002920091152191,
"logps/rejected": -1.3048107624053955,
"loss": 0.2894,
"nll_loss": 0.2588661015033722,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.020014600828289986,
"rewards/margins": 0.04522594064474106,
"rewards/rejected": -0.0652405396103859,
"step": 1020
},
{
"epoch": 1.08023072889355,
"grad_norm": 2.1656896077764,
"learning_rate": 6.231769528497559e-06,
"log_odds_chosen": 1.7999454736709595,
"log_odds_ratio": -0.23009638488292694,
"logits/chosen": -3.0344815254211426,
"logits/rejected": -3.0285098552703857,
"logps/chosen": -0.42475366592407227,
"logps/rejected": -1.3811571598052979,
"loss": 0.2779,
"nll_loss": 0.26928776502609253,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.021237684413790703,
"rewards/margins": 0.04782017320394516,
"rewards/rejected": -0.06905786693096161,
"step": 1030
},
{
"epoch": 1.0907184058730992,
"grad_norm": 1.8893124181143397,
"learning_rate": 6.2017367294604225e-06,
"log_odds_chosen": 1.7361199855804443,
"log_odds_ratio": -0.2356552630662918,
"logits/chosen": -2.9798855781555176,
"logits/rejected": -3.012021780014038,
"logps/chosen": -0.4087589383125305,
"logps/rejected": -1.318456768989563,
"loss": 0.2848,
"nll_loss": 0.2693423926830292,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.020437946543097496,
"rewards/margins": 0.045484889298677444,
"rewards/rejected": -0.06592283397912979,
"step": 1040
},
{
"epoch": 1.1012060828526482,
"grad_norm": 1.998285617344112,
"learning_rate": 6.172133998483677e-06,
"log_odds_chosen": 1.989933967590332,
"log_odds_ratio": -0.2104463130235672,
"logits/chosen": -2.9669861793518066,
"logits/rejected": -2.992997169494629,
"logps/chosen": -0.4091659486293793,
"logps/rejected": -1.4872965812683105,
"loss": 0.2793,
"nll_loss": 0.24384136497974396,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.020458297803997993,
"rewards/margins": 0.05390653759241104,
"rewards/rejected": -0.07436482608318329,
"step": 1050
},
{
"epoch": 1.1116937598321972,
"grad_norm": 1.99753785316238,
"learning_rate": 6.142951168339513e-06,
"log_odds_chosen": 1.7905690670013428,
"log_odds_ratio": -0.2465437948703766,
"logits/chosen": -2.9944257736206055,
"logits/rejected": -2.988699436187744,
"logps/chosen": -0.41175705194473267,
"logps/rejected": -1.3037220239639282,
"loss": 0.2828,
"nll_loss": 0.2829252779483795,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.020587850362062454,
"rewards/margins": 0.04459824413061142,
"rewards/rejected": -0.06518609821796417,
"step": 1060
},
{
"epoch": 1.1221814368117462,
"grad_norm": 2.0944607329795666,
"learning_rate": 6.114178405157431e-06,
"log_odds_chosen": 1.972241759300232,
"log_odds_ratio": -0.202741339802742,
"logits/chosen": -2.9314074516296387,
"logits/rejected": -2.943037271499634,
"logps/chosen": -0.39666005969047546,
"logps/rejected": -1.4398232698440552,
"loss": 0.2869,
"nll_loss": 0.26206424832344055,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.019833002239465714,
"rewards/margins": 0.0521581657230854,
"rewards/rejected": -0.07199116796255112,
"step": 1070
},
{
"epoch": 1.1326691137912952,
"grad_norm": 2.082309850512046,
"learning_rate": 6.0858061945018455e-06,
"log_odds_chosen": 1.9569040536880493,
"log_odds_ratio": -0.20189175009727478,
"logits/chosen": -2.9233288764953613,
"logits/rejected": -2.953047275543213,
"logps/chosen": -0.4349672198295593,
"logps/rejected": -1.479813814163208,
"loss": 0.286,
"nll_loss": 0.25732284784317017,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.021748360246419907,
"rewards/margins": 0.052242327481508255,
"rewards/rejected": -0.07399068772792816,
"step": 1080
},
{
"epoch": 1.1431567907708442,
"grad_norm": 1.977872551014816,
"learning_rate": 6.0578253281538265e-06,
"log_odds_chosen": 1.8792686462402344,
"log_odds_ratio": -0.23301272094249725,
"logits/chosen": -2.9573769569396973,
"logits/rejected": -2.968686103820801,
"logps/chosen": -0.3683982789516449,
"logps/rejected": -1.286027431488037,
"loss": 0.2841,
"nll_loss": 0.26943594217300415,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.018419915810227394,
"rewards/margins": 0.04588145762681961,
"rewards/rejected": -0.06430138647556305,
"step": 1090
},
{
"epoch": 1.1536444677503932,
"grad_norm": 2.2874664942911984,
"learning_rate": 6.030226891555273e-06,
"log_odds_chosen": 1.744699239730835,
"log_odds_ratio": -0.2575313448905945,
"logits/chosen": -3.0328478813171387,
"logits/rejected": -3.0531229972839355,
"logps/chosen": -0.4480053782463074,
"logps/rejected": -1.409203290939331,
"loss": 0.29,
"nll_loss": 0.2910405397415161,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.022400271147489548,
"rewards/margins": 0.04805989935994148,
"rewards/rejected": -0.07046017050743103,
"step": 1100
},
{
"epoch": 1.1536444677503932,
"eval_log_odds_chosen": 0.4246710240840912,
"eval_log_odds_ratio": -0.6255837082862854,
"eval_logits/chosen": -3.002875804901123,
"eval_logits/rejected": -3.0027201175689697,
"eval_logps/chosen": -0.8736297488212585,
"eval_logps/rejected": -1.1487443447113037,
"eval_loss": 0.5385290384292603,
"eval_nll_loss": 0.5041735172271729,
"eval_rewards/accuracies": 0.64682537317276,
"eval_rewards/chosen": -0.04368148371577263,
"eval_rewards/margins": 0.013755732215940952,
"eval_rewards/rejected": -0.057437218725681305,
"eval_runtime": 136.8823,
"eval_samples_per_second": 14.567,
"eval_steps_per_second": 0.46,
"step": 1100
},
{
"epoch": 1.1641321447299422,
"grad_norm": 1.8147231314332177,
"learning_rate": 6.003002251876643e-06,
"log_odds_chosen": 1.8075166940689087,
"log_odds_ratio": -0.2281859815120697,
"logits/chosen": -2.965421199798584,
"logits/rejected": -3.0172793865203857,
"logps/chosen": -0.44597238302230835,
"logps/rejected": -1.4203885793685913,
"loss": 0.2891,
"nll_loss": 0.2668479084968567,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.022298619151115417,
"rewards/margins": 0.04872080683708191,
"rewards/rejected": -0.07101943343877792,
"step": 1110
},
{
"epoch": 1.1746198217094914,
"grad_norm": 1.9969430269469466,
"learning_rate": 5.976143046671968e-06,
"log_odds_chosen": 1.7478984594345093,
"log_odds_ratio": -0.22862455248832703,
"logits/chosen": -3.0243489742279053,
"logits/rejected": -3.0321333408355713,
"logps/chosen": -0.40696269273757935,
"logps/rejected": -1.2988313436508179,
"loss": 0.2927,
"nll_loss": 0.27604612708091736,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.02034812793135643,
"rewards/margins": 0.04459343105554581,
"rewards/rejected": -0.06494157016277313,
"step": 1120
},
{
"epoch": 1.1851074986890404,
"grad_norm": 2.1896703421371275,
"learning_rate": 5.949641173087296e-06,
"log_odds_chosen": 2.048767566680908,
"log_odds_ratio": -0.20188426971435547,
"logits/chosen": -2.9657158851623535,
"logits/rejected": -2.977405309677124,
"logps/chosen": -0.38311532139778137,
"logps/rejected": -1.454978108406067,
"loss": 0.2825,
"nll_loss": 0.2597211003303528,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.01915576681494713,
"rewards/margins": 0.05359314754605293,
"rewards/rejected": -0.07274890691041946,
"step": 1130
},
{
"epoch": 1.1955951756685894,
"grad_norm": 1.8856822247943528,
"learning_rate": 5.923488777590924e-06,
"log_odds_chosen": 1.9368520975112915,
"log_odds_ratio": -0.21634550392627716,
"logits/chosen": -3.009665012359619,
"logits/rejected": -3.0066471099853516,
"logps/chosen": -0.412930428981781,
"logps/rejected": -1.4850547313690186,
"loss": 0.2786,
"nll_loss": 0.28015536069869995,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.02064652182161808,
"rewards/margins": 0.05360621213912964,
"rewards/rejected": -0.07425273954868317,
"step": 1140
},
{
"epoch": 1.2060828526481384,
"grad_norm": 2.2165729739830233,
"learning_rate": 5.897678246195886e-06,
"log_odds_chosen": 1.9798767566680908,
"log_odds_ratio": -0.19855430722236633,
"logits/chosen": -2.9805493354797363,
"logits/rejected": -2.9919371604919434,
"logps/chosen": -0.38313865661621094,
"logps/rejected": -1.3864378929138184,
"loss": 0.2909,
"nll_loss": 0.27790573239326477,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.019156932830810547,
"rewards/margins": 0.05016495659947395,
"rewards/rejected": -0.0693218931555748,
"step": 1150
},
{
"epoch": 1.2165705296276874,
"grad_norm": 2.8337045840850497,
"learning_rate": 5.8722021951470355e-06,
"log_odds_chosen": 1.7361915111541748,
"log_odds_ratio": -0.24711327254772186,
"logits/chosen": -2.966083288192749,
"logits/rejected": -2.9842519760131836,
"logps/chosen": -0.4412474036216736,
"logps/rejected": -1.3824529647827148,
"loss": 0.2781,
"nll_loss": 0.2754039466381073,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.02206237055361271,
"rewards/margins": 0.0470602810382843,
"rewards/rejected": -0.06912264972925186,
"step": 1160
},
{
"epoch": 1.2270582066072364,
"grad_norm": 1.7729938432799273,
"learning_rate": 5.847053462046862e-06,
"log_odds_chosen": 1.7805134057998657,
"log_odds_ratio": -0.23545412719249725,
"logits/chosen": -3.0085816383361816,
"logits/rejected": -3.003875494003296,
"logps/chosen": -0.4123718738555908,
"logps/rejected": -1.3221479654312134,
"loss": 0.2829,
"nll_loss": 0.2879020869731903,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.02061859332025051,
"rewards/margins": 0.045488808304071426,
"rewards/rejected": -0.06610739976167679,
"step": 1170
},
{
"epoch": 1.2375458835867854,
"grad_norm": 2.2169036925519454,
"learning_rate": 5.822225097395821e-06,
"log_odds_chosen": 1.9844211339950562,
"log_odds_ratio": -0.1866404265165329,
"logits/chosen": -2.9880988597869873,
"logits/rejected": -3.0081310272216797,
"logps/chosen": -0.3858886957168579,
"logps/rejected": -1.3924882411956787,
"loss": 0.2873,
"nll_loss": 0.25162869691848755,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.019294437021017075,
"rewards/margins": 0.05032998323440552,
"rewards/rejected": -0.06962442398071289,
"step": 1180
},
{
"epoch": 1.2480335605663346,
"grad_norm": 2.1614361138819045,
"learning_rate": 5.797710356524486e-06,
"log_odds_chosen": 1.8616158962249756,
"log_odds_ratio": -0.22632256150245667,
"logits/chosen": -3.0017178058624268,
"logits/rejected": -3.0013363361358643,
"logps/chosen": -0.4442955553531647,
"logps/rejected": -1.4363129138946533,
"loss": 0.2867,
"nll_loss": 0.289310485124588,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.022214777767658234,
"rewards/margins": 0.04960086941719055,
"rewards/rejected": -0.07181564718484879,
"step": 1190
},
{
"epoch": 1.2585212375458836,
"grad_norm": 2.0470229728313494,
"learning_rate": 5.773502691896259e-06,
"log_odds_chosen": 1.8614075183868408,
"log_odds_ratio": -0.2429337054491043,
"logits/chosen": -2.9596099853515625,
"logits/rejected": -2.9728147983551025,
"logps/chosen": -0.44122061133384705,
"logps/rejected": -1.4644559621810913,
"loss": 0.2826,
"nll_loss": 0.2614334225654602,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.022061031311750412,
"rewards/margins": 0.051161766052246094,
"rewards/rejected": -0.0732228010892868,
"step": 1200
},
{
"epoch": 1.2585212375458836,
"eval_log_odds_chosen": 0.4214767515659332,
"eval_log_odds_ratio": -0.6254101991653442,
"eval_logits/chosen": -2.9582858085632324,
"eval_logits/rejected": -2.96195912361145,
"eval_logps/chosen": -0.8853804469108582,
"eval_logps/rejected": -1.162561058998108,
"eval_loss": 0.5427829027175903,
"eval_nll_loss": 0.5084435939788818,
"eval_rewards/accuracies": 0.6428571343421936,
"eval_rewards/chosen": -0.04426902160048485,
"eval_rewards/margins": 0.013859033584594727,
"eval_rewards/rejected": -0.05812805891036987,
"eval_runtime": 137.2006,
"eval_samples_per_second": 14.533,
"eval_steps_per_second": 0.459,
"step": 1200
},
{
"epoch": 1.2690089145254326,
"grad_norm": 2.3388472125063946,
"learning_rate": 5.749595745760691e-06,
"log_odds_chosen": 1.858030080795288,
"log_odds_ratio": -0.21272964775562286,
"logits/chosen": -2.996577739715576,
"logits/rejected": -3.0146660804748535,
"logps/chosen": -0.4070938229560852,
"logps/rejected": -1.3386101722717285,
"loss": 0.2988,
"nll_loss": 0.292961448431015,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.02035469003021717,
"rewards/margins": 0.04657582566142082,
"rewards/rejected": -0.06693051755428314,
"step": 1210
},
{
"epoch": 1.2794965915049816,
"grad_norm": 1.9762440493042526,
"learning_rate": 5.725983343138682e-06,
"log_odds_chosen": 1.7544046640396118,
"log_odds_ratio": -0.22841353714466095,
"logits/chosen": -2.9734439849853516,
"logits/rejected": -2.9992988109588623,
"logps/chosen": -0.42544227838516235,
"logps/rejected": -1.3273015022277832,
"loss": 0.295,
"nll_loss": 0.28989139199256897,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.021272115409374237,
"rewards/margins": 0.04509295895695686,
"rewards/rejected": -0.0663650780916214,
"step": 1220
},
{
"epoch": 1.2899842684845306,
"grad_norm": 2.230074491318477,
"learning_rate": 5.702659485122011e-06,
"log_odds_chosen": 1.929265022277832,
"log_odds_ratio": -0.20951807498931885,
"logits/chosen": -2.9871158599853516,
"logits/rejected": -2.993727207183838,
"logps/chosen": -0.40125927329063416,
"logps/rejected": -1.4160717725753784,
"loss": 0.2653,
"nll_loss": 0.23026029765605927,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.020062964409589767,
"rewards/margins": 0.050740621984004974,
"rewards/rejected": -0.07080359011888504,
"step": 1230
},
{
"epoch": 1.3004719454640796,
"grad_norm": 1.9679461376203173,
"learning_rate": 5.679618342470648e-06,
"log_odds_chosen": 1.7371532917022705,
"log_odds_ratio": -0.2242734134197235,
"logits/chosen": -3.0132291316986084,
"logits/rejected": -3.0433402061462402,
"logps/chosen": -0.413210391998291,
"logps/rejected": -1.3000330924987793,
"loss": 0.2804,
"nll_loss": 0.29589781165122986,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.0206605214625597,
"rewards/margins": 0.04434113949537277,
"rewards/rejected": -0.06500165909528732,
"step": 1240
},
{
"epoch": 1.3109596224436286,
"grad_norm": 2.617277483095543,
"learning_rate": 5.656854249492381e-06,
"log_odds_chosen": 1.814679741859436,
"log_odds_ratio": -0.22298629581928253,
"logits/chosen": -2.996896266937256,
"logits/rejected": -3.0056145191192627,
"logps/chosen": -0.42395251989364624,
"logps/rejected": -1.3927456140518188,
"loss": 0.2687,
"nll_loss": 0.25607752799987793,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.021197626367211342,
"rewards/margins": 0.04843965172767639,
"rewards/rejected": -0.06963728368282318,
"step": 1250
},
{
"epoch": 1.3214472994231778,
"grad_norm": 1.9773184888291742,
"learning_rate": 5.63436169819011e-06,
"log_odds_chosen": 1.8136640787124634,
"log_odds_ratio": -0.24320077896118164,
"logits/chosen": -2.966784954071045,
"logits/rejected": -3.001746892929077,
"logps/chosen": -0.45541706681251526,
"logps/rejected": -1.3951488733291626,
"loss": 0.2988,
"nll_loss": 0.31274476647377014,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.022770855575799942,
"rewards/margins": 0.046986598521471024,
"rewards/rejected": -0.06975744664669037,
"step": 1260
},
{
"epoch": 1.3319349764027268,
"grad_norm": 1.9140818928985086,
"learning_rate": 5.612135332663138e-06,
"log_odds_chosen": 1.953155755996704,
"log_odds_ratio": -0.21717992424964905,
"logits/chosen": -3.006328821182251,
"logits/rejected": -3.037388324737549,
"logps/chosen": -0.42650872468948364,
"logps/rejected": -1.495060682296753,
"loss": 0.272,
"nll_loss": 0.2669217586517334,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.021325435489416122,
"rewards/margins": 0.053427595645189285,
"rewards/rejected": -0.074753038585186,
"step": 1270
},
{
"epoch": 1.3424226533822758,
"grad_norm": 1.9500186785754579,
"learning_rate": 5.590169943749475e-06,
"log_odds_chosen": 1.8904393911361694,
"log_odds_ratio": -0.2255454808473587,
"logits/chosen": -2.989861011505127,
"logits/rejected": -3.0198075771331787,
"logps/chosen": -0.424043744802475,
"logps/rejected": -1.4651858806610107,
"loss": 0.2783,
"nll_loss": 0.267769455909729,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.02120218798518181,
"rewards/margins": 0.05205710977315903,
"rewards/rejected": -0.07325930893421173,
"step": 1280
},
{
"epoch": 1.3529103303618248,
"grad_norm": 1.9502765281924526,
"learning_rate": 5.568460463897046e-06,
"log_odds_chosen": 1.8929240703582764,
"log_odds_ratio": -0.21857920289039612,
"logits/chosen": -2.9535863399505615,
"logits/rejected": -2.9874510765075684,
"logps/chosen": -0.45026451349258423,
"logps/rejected": -1.4960235357284546,
"loss": 0.295,
"nll_loss": 0.27629774808883667,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.022513221949338913,
"rewards/margins": 0.052287958562374115,
"rewards/rejected": -0.07480116933584213,
"step": 1290
},
{
"epoch": 1.3633980073413738,
"grad_norm": 2.2093191033587223,
"learning_rate": 5.547001962252292e-06,
"log_odds_chosen": 1.7265195846557617,
"log_odds_ratio": -0.23279574513435364,
"logits/chosen": -2.9012649059295654,
"logits/rejected": -2.9128100872039795,
"logps/chosen": -0.4365314841270447,
"logps/rejected": -1.3402652740478516,
"loss": 0.2796,
"nll_loss": 0.28851714730262756,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.021826574578881264,
"rewards/margins": 0.04518669471144676,
"rewards/rejected": -0.06701326370239258,
"step": 1300
},
{
"epoch": 1.3633980073413738,
"eval_log_odds_chosen": 0.45076510310173035,
"eval_log_odds_ratio": -0.6208177208900452,
"eval_logits/chosen": -2.928496837615967,
"eval_logits/rejected": -2.9256343841552734,
"eval_logps/chosen": -0.8825219869613647,
"eval_logps/rejected": -1.1770830154418945,
"eval_loss": 0.5392885208129883,
"eval_nll_loss": 0.5060464143753052,
"eval_rewards/accuracies": 0.64682537317276,
"eval_rewards/chosen": -0.044126104563474655,
"eval_rewards/margins": 0.014728044159710407,
"eval_rewards/rejected": -0.05885414779186249,
"eval_runtime": 136.6608,
"eval_samples_per_second": 14.591,
"eval_steps_per_second": 0.461,
"step": 1300
},
{
"epoch": 1.3738856843209228,
"grad_norm": 1.715926192038861,
"learning_rate": 5.525789639955377e-06,
"log_odds_chosen": 2.0803933143615723,
"log_odds_ratio": -0.21633043885231018,
"logits/chosen": -2.926987409591675,
"logits/rejected": -2.9622962474823,
"logps/chosen": -0.43519288301467896,
"logps/rejected": -1.6533997058868408,
"loss": 0.2713,
"nll_loss": 0.26452213525772095,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.02175964042544365,
"rewards/margins": 0.06091034412384033,
"rewards/rejected": -0.08266998082399368,
"step": 1310
},
{
"epoch": 1.3843733613004718,
"grad_norm": 2.0174814570503012,
"learning_rate": 5.504818825631804e-06,
"log_odds_chosen": 2.108902931213379,
"log_odds_ratio": -0.1835678517818451,
"logits/chosen": -2.96756911277771,
"logits/rejected": -2.9531686305999756,
"logps/chosen": -0.3781605362892151,
"logps/rejected": -1.4976880550384521,
"loss": 0.267,
"nll_loss": 0.25148090720176697,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.018908025696873665,
"rewards/margins": 0.055976372212171555,
"rewards/rejected": -0.07488439977169037,
"step": 1320
},
{
"epoch": 1.394861038280021,
"grad_norm": 2.317364085817375,
"learning_rate": 5.484084971070817e-06,
"log_odds_chosen": 1.9238555431365967,
"log_odds_ratio": -0.2074807584285736,
"logits/chosen": -2.923131227493286,
"logits/rejected": -2.9520606994628906,
"logps/chosen": -0.42446833848953247,
"logps/rejected": -1.4086004495620728,
"loss": 0.2852,
"nll_loss": 0.28959181904792786,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.021223418414592743,
"rewards/margins": 0.049206603318452835,
"rewards/rejected": -0.07043002545833588,
"step": 1330
},
{
"epoch": 1.40534871525957,
"grad_norm": 2.165975215343917,
"learning_rate": 5.4635836470815305e-06,
"log_odds_chosen": 1.8837333917617798,
"log_odds_ratio": -0.21855314075946808,
"logits/chosen": -2.9127135276794434,
"logits/rejected": -2.9249043464660645,
"logps/chosen": -0.41960373520851135,
"logps/rejected": -1.4260175228118896,
"loss": 0.2787,
"nll_loss": 0.25244617462158203,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.020980186760425568,
"rewards/margins": 0.050320692360401154,
"rewards/rejected": -0.07130087912082672,
"step": 1340
},
{
"epoch": 1.415836392239119,
"grad_norm": 1.9224928940953034,
"learning_rate": 5.443310539518174e-06,
"log_odds_chosen": 2.056159734725952,
"log_odds_ratio": -0.19483168423175812,
"logits/chosen": -2.956674814224243,
"logits/rejected": -2.9572062492370605,
"logps/chosen": -0.4208443760871887,
"logps/rejected": -1.5285457372665405,
"loss": 0.2822,
"nll_loss": 0.26951080560684204,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.021042218431830406,
"rewards/margins": 0.05538507178425789,
"rewards/rejected": -0.07642728835344315,
"step": 1350
},
{
"epoch": 1.426324069218668,
"grad_norm": 2.0115204434239025,
"learning_rate": 5.423261445466404e-06,
"log_odds_chosen": 1.707457184791565,
"log_odds_ratio": -0.2479782998561859,
"logits/chosen": -2.915250301361084,
"logits/rejected": -2.9445343017578125,
"logps/chosen": -0.4267791211605072,
"logps/rejected": -1.3377535343170166,
"loss": 0.2925,
"nll_loss": 0.29825955629348755,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.02133895456790924,
"rewards/margins": 0.04554871469736099,
"rewards/rejected": -0.06688766926527023,
"step": 1360
},
{
"epoch": 1.436811746198217,
"grad_norm": 2.0083912520624234,
"learning_rate": 5.403432269582992e-06,
"log_odds_chosen": 1.7433815002441406,
"log_odds_ratio": -0.23284384608268738,
"logits/chosen": -2.9682974815368652,
"logits/rejected": -2.9809725284576416,
"logps/chosen": -0.4545938968658447,
"logps/rejected": -1.3821640014648438,
"loss": 0.2995,
"nll_loss": 0.2861328721046448,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.022729698568582535,
"rewards/margins": 0.04637850075960159,
"rewards/rejected": -0.06910820305347443,
"step": 1370
},
{
"epoch": 1.447299423177766,
"grad_norm": 1.918494069287167,
"learning_rate": 5.383819020581656e-06,
"log_odds_chosen": 1.839255690574646,
"log_odds_ratio": -0.22518055140972137,
"logits/chosen": -2.9555628299713135,
"logits/rejected": -2.968390703201294,
"logps/chosen": -0.4370731711387634,
"logps/rejected": -1.4699593782424927,
"loss": 0.2859,
"nll_loss": 0.28876128792762756,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.021853657439351082,
"rewards/margins": 0.051644302904605865,
"rewards/rejected": -0.0734979659318924,
"step": 1380
},
{
"epoch": 1.457787100157315,
"grad_norm": 1.8701436058229068,
"learning_rate": 5.364417807858201e-06,
"log_odds_chosen": 2.0006766319274902,
"log_odds_ratio": -0.19503512978553772,
"logits/chosen": -2.9456233978271484,
"logits/rejected": -2.9416487216949463,
"logps/chosen": -0.397217720746994,
"logps/rejected": -1.458070993423462,
"loss": 0.2898,
"nll_loss": 0.2990682125091553,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.01986088417470455,
"rewards/margins": 0.05304265767335892,
"rewards/rejected": -0.07290354371070862,
"step": 1390
},
{
"epoch": 1.4682747771368643,
"grad_norm": 1.8947645182805886,
"learning_rate": 5.345224838248489e-06,
"log_odds_chosen": 1.9478137493133545,
"log_odds_ratio": -0.22849062085151672,
"logits/chosen": -2.9488446712493896,
"logits/rejected": -2.980994462966919,
"logps/chosen": -0.38306254148483276,
"logps/rejected": -1.40244460105896,
"loss": 0.2784,
"nll_loss": 0.27079683542251587,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.019153129309415817,
"rewards/margins": 0.05096910148859024,
"rewards/rejected": -0.07012222707271576,
"step": 1400
},
{
"epoch": 1.4682747771368643,
"eval_log_odds_chosen": 0.4410339295864105,
"eval_log_odds_ratio": -0.6236060261726379,
"eval_logits/chosen": -2.9594457149505615,
"eval_logits/rejected": -2.9583115577697754,
"eval_logps/chosen": -0.8884981274604797,
"eval_logps/rejected": -1.1784039735794067,
"eval_loss": 0.5364598631858826,
"eval_nll_loss": 0.5036527514457703,
"eval_rewards/accuracies": 0.6527777910232544,
"eval_rewards/chosen": -0.044424910098314285,
"eval_rewards/margins": 0.014495291747152805,
"eval_rewards/rejected": -0.058920200914144516,
"eval_runtime": 139.2595,
"eval_samples_per_second": 14.319,
"eval_steps_per_second": 0.452,
"step": 1400
},
{
"epoch": 1.4787624541164133,
"grad_norm": 2.1665159464201142,
"learning_rate": 5.326236412913075e-06,
"log_odds_chosen": 1.7970411777496338,
"log_odds_ratio": -0.2380552738904953,
"logits/chosen": -2.9149088859558105,
"logits/rejected": -2.9543135166168213,
"logps/chosen": -0.4362480640411377,
"logps/rejected": -1.3472230434417725,
"loss": 0.29,
"nll_loss": 0.2710421681404114,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.021812403574585915,
"rewards/margins": 0.04554874822497368,
"rewards/rejected": -0.06736114621162415,
"step": 1410
},
{
"epoch": 1.4892501310959623,
"grad_norm": 2.196966160421767,
"learning_rate": 5.307448924342753e-06,
"log_odds_chosen": 1.8308820724487305,
"log_odds_ratio": -0.21477296948432922,
"logits/chosen": -2.877204179763794,
"logits/rejected": -2.932901620864868,
"logps/chosen": -0.4031652510166168,
"logps/rejected": -1.3179484605789185,
"loss": 0.2855,
"nll_loss": 0.2783321738243103,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.02015826478600502,
"rewards/margins": 0.04573915898799896,
"rewards/rejected": -0.06589742004871368,
"step": 1420
},
{
"epoch": 1.4997378080755113,
"grad_norm": 2.1884907491879084,
"learning_rate": 5.28885885347945e-06,
"log_odds_chosen": 1.9711707830429077,
"log_odds_ratio": -0.20648148655891418,
"logits/chosen": -2.954136371612549,
"logits/rejected": -2.9814727306365967,
"logps/chosen": -0.41374531388282776,
"logps/rejected": -1.4304702281951904,
"loss": 0.2924,
"nll_loss": 0.27289509773254395,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.020687269046902657,
"rewards/margins": 0.0508362352848053,
"rewards/rejected": -0.071523517370224,
"step": 1430
},
{
"epoch": 1.5102254850550603,
"grad_norm": 2.124176001387226,
"learning_rate": 5.270462766947299e-06,
"log_odds_chosen": 1.7731349468231201,
"log_odds_ratio": -0.2392440289258957,
"logits/chosen": -2.9405388832092285,
"logits/rejected": -2.9464943408966064,
"logps/chosen": -0.4539235234260559,
"logps/rejected": -1.403793454170227,
"loss": 0.2961,
"nll_loss": 0.2940642237663269,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.022696174681186676,
"rewards/margins": 0.04749349504709244,
"rewards/rejected": -0.07018966972827911,
"step": 1440
},
{
"epoch": 1.5207131620346095,
"grad_norm": 1.8197825407446042,
"learning_rate": 5.252257314388902e-06,
"log_odds_chosen": 1.7956994771957397,
"log_odds_ratio": -0.22454524040222168,
"logits/chosen": -2.954716444015503,
"logits/rejected": -2.978447437286377,
"logps/chosen": -0.4430459439754486,
"logps/rejected": -1.4194531440734863,
"loss": 0.2777,
"nll_loss": 0.24652138352394104,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.02215229719877243,
"rewards/margins": 0.048820365220308304,
"rewards/rejected": -0.07097266614437103,
"step": 1450
},
{
"epoch": 1.5312008390141583,
"grad_norm": 2.1915818543360355,
"learning_rate": 5.234239225902137e-06,
"log_odds_chosen": 1.9382715225219727,
"log_odds_ratio": -0.1963178515434265,
"logits/chosen": -2.8938894271850586,
"logits/rejected": -2.924325466156006,
"logps/chosen": -0.39880725741386414,
"logps/rejected": -1.4752063751220703,
"loss": 0.2971,
"nll_loss": 0.2676003575325012,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.019940361380577087,
"rewards/margins": 0.05381995439529419,
"rewards/rejected": -0.07376032322645187,
"step": 1460
},
{
"epoch": 1.5416885159937075,
"grad_norm": 2.1118618734250307,
"learning_rate": 5.216405309573011e-06,
"log_odds_chosen": 1.9139398336410522,
"log_odds_ratio": -0.19271975755691528,
"logits/chosen": -3.0117218494415283,
"logits/rejected": -3.0411810874938965,
"logps/chosen": -0.42149630188941956,
"logps/rejected": -1.471760869026184,
"loss": 0.2889,
"nll_loss": 0.27934783697128296,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.0210748128592968,
"rewards/margins": 0.052513234317302704,
"rewards/rejected": -0.0735880434513092,
"step": 1470
},
{
"epoch": 1.5521761929732563,
"grad_norm": 2.0510895547316745,
"learning_rate": 5.198752449100364e-06,
"log_odds_chosen": 2.0376482009887695,
"log_odds_ratio": -0.19703765213489532,
"logits/chosen": -3.009754180908203,
"logits/rejected": -3.016758441925049,
"logps/chosen": -0.40712347626686096,
"logps/rejected": -1.459837555885315,
"loss": 0.2888,
"nll_loss": 0.3001149892807007,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.020356174558401108,
"rewards/margins": 0.05263570696115494,
"rewards/rejected": -0.07299187034368515,
"step": 1480
},
{
"epoch": 1.5626638699528055,
"grad_norm": 2.1669568438399684,
"learning_rate": 5.181277601508398e-06,
"log_odds_chosen": 1.8304507732391357,
"log_odds_ratio": -0.2394884079694748,
"logits/chosen": -2.9779343605041504,
"logits/rejected": -3.008795738220215,
"logps/chosen": -0.4576667249202728,
"logps/rejected": -1.4601542949676514,
"loss": 0.2888,
"nll_loss": 0.29476073384284973,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.0228833369910717,
"rewards/margins": 0.05012437701225281,
"rewards/rejected": -0.07300771772861481,
"step": 1490
},
{
"epoch": 1.5731515469323545,
"grad_norm": 2.372050874462119,
"learning_rate": 5.163977794943223e-06,
"log_odds_chosen": 1.9750179052352905,
"log_odds_ratio": -0.19530083239078522,
"logits/chosen": -2.9395532608032227,
"logits/rejected": -2.991283893585205,
"logps/chosen": -0.42392611503601074,
"logps/rejected": -1.5091795921325684,
"loss": 0.2873,
"nll_loss": 0.2818702757358551,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.021196305751800537,
"rewards/margins": 0.0542626678943634,
"rewards/rejected": -0.07545898109674454,
"step": 1500
},
{
"epoch": 1.5731515469323545,
"eval_log_odds_chosen": 0.4364486038684845,
"eval_log_odds_ratio": -0.6225508451461792,
"eval_logits/chosen": -2.965731382369995,
"eval_logits/rejected": -2.966355323791504,
"eval_logps/chosen": -0.8718044757843018,
"eval_logps/rejected": -1.158449649810791,
"eval_loss": 0.53301602602005,
"eval_nll_loss": 0.5004281997680664,
"eval_rewards/accuracies": 0.6448412537574768,
"eval_rewards/chosen": -0.043590229004621506,
"eval_rewards/margins": 0.014332256279885769,
"eval_rewards/rejected": -0.05792247876524925,
"eval_runtime": 139.8515,
"eval_samples_per_second": 14.258,
"eval_steps_per_second": 0.45,
"step": 1500
},
{
"epoch": 1.5836392239119035,
"grad_norm": 1.9123802783189798,
"learning_rate": 5.146850126549788e-06,
"log_odds_chosen": 1.6361440420150757,
"log_odds_ratio": -0.26433151960372925,
"logits/chosen": -2.943331003189087,
"logits/rejected": -2.9721503257751465,
"logps/chosen": -0.44553548097610474,
"logps/rejected": -1.2933813333511353,
"loss": 0.3044,
"nll_loss": 0.2870228588581085,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.022276774048805237,
"rewards/margins": 0.042392291128635406,
"rewards/rejected": -0.06466906517744064,
"step": 1510
},
{
"epoch": 1.5941269008914527,
"grad_norm": 1.9978617693896288,
"learning_rate": 5.129891760425772e-06,
"log_odds_chosen": 1.872454285621643,
"log_odds_ratio": -0.21693451702594757,
"logits/chosen": -2.9198169708251953,
"logits/rejected": -2.9594712257385254,
"logps/chosen": -0.4238964915275574,
"logps/rejected": -1.4147742986679077,
"loss": 0.2765,
"nll_loss": 0.2593707740306854,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.021194826811552048,
"rewards/margins": 0.049543894827365875,
"rewards/rejected": -0.07073871791362762,
"step": 1520
},
{
"epoch": 1.6046145778710015,
"grad_norm": 2.2358254561438966,
"learning_rate": 5.113099925649136e-06,
"log_odds_chosen": 1.7420718669891357,
"log_odds_ratio": -0.2600535750389099,
"logits/chosen": -2.9620399475097656,
"logits/rejected": -2.997101068496704,
"logps/chosen": -0.4705958366394043,
"logps/rejected": -1.435579538345337,
"loss": 0.2766,
"nll_loss": 0.28323301672935486,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.023529794067144394,
"rewards/margins": 0.048249177634716034,
"rewards/rejected": -0.07177898287773132,
"step": 1530
},
{
"epoch": 1.6151022548505507,
"grad_norm": 2.123071067312132,
"learning_rate": 5.096471914376255e-06,
"log_odds_chosen": 2.0446419715881348,
"log_odds_ratio": -0.20973734557628632,
"logits/chosen": -2.8849668502807617,
"logits/rejected": -2.91094970703125,
"logps/chosen": -0.42269793152809143,
"logps/rejected": -1.4985077381134033,
"loss": 0.2842,
"nll_loss": 0.24874058365821838,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.021134894341230392,
"rewards/margins": 0.053790487349033356,
"rewards/rejected": -0.07492538541555405,
"step": 1540
},
{
"epoch": 1.6255899318300995,
"grad_norm": 1.8574119456068037,
"learning_rate": 5.08000508000762e-06,
"log_odds_chosen": 1.8896774053573608,
"log_odds_ratio": -0.2109728306531906,
"logits/chosen": -2.9518914222717285,
"logits/rejected": -2.9677398204803467,
"logps/chosen": -0.42254775762557983,
"logps/rejected": -1.4004069566726685,
"loss": 0.2737,
"nll_loss": 0.26676517724990845,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.02112739160656929,
"rewards/margins": 0.04889295622706413,
"rewards/rejected": -0.07002034783363342,
"step": 1550
},
{
"epoch": 1.6360776088096487,
"grad_norm": 2.012947859419835,
"learning_rate": 5.0636968354183334e-06,
"log_odds_chosen": 1.7877776622772217,
"log_odds_ratio": -0.2195170670747757,
"logits/chosen": -2.916713237762451,
"logits/rejected": -2.9442696571350098,
"logps/chosen": -0.4229874610900879,
"logps/rejected": -1.3620960712432861,
"loss": 0.2937,
"nll_loss": 0.28985968232154846,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.021149372681975365,
"rewards/margins": 0.046955425292253494,
"rewards/rejected": -0.06810478866100311,
"step": 1560
},
{
"epoch": 1.6465652857891977,
"grad_norm": 1.9554610757973563,
"learning_rate": 5.047544651250688e-06,
"log_odds_chosen": 1.9977741241455078,
"log_odds_ratio": -0.22808516025543213,
"logits/chosen": -2.95414137840271,
"logits/rejected": -2.9667911529541016,
"logps/chosen": -0.40563470125198364,
"logps/rejected": -1.493981122970581,
"loss": 0.2746,
"nll_loss": 0.25610029697418213,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.020281735807657242,
"rewards/margins": 0.05441732332110405,
"rewards/rejected": -0.07469905912876129,
"step": 1570
},
{
"epoch": 1.6570529627687467,
"grad_norm": 2.2417227837369094,
"learning_rate": 5.031546054266276e-06,
"log_odds_chosen": 1.8591692447662354,
"log_odds_ratio": -0.23143061995506287,
"logits/chosen": -3.0023272037506104,
"logits/rejected": -3.0128941535949707,
"logps/chosen": -0.46788668632507324,
"logps/rejected": -1.490392804145813,
"loss": 0.2962,
"nll_loss": 0.31111472845077515,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.02339433692395687,
"rewards/margins": 0.05112530663609505,
"rewards/rejected": -0.07451964914798737,
"step": 1580
},
{
"epoch": 1.667540639748296,
"grad_norm": 2.0152925811378846,
"learning_rate": 5.015698625755192e-06,
"log_odds_chosen": 1.9612891674041748,
"log_odds_ratio": -0.22349119186401367,
"logits/chosen": -2.9373695850372314,
"logits/rejected": -2.9659922122955322,
"logps/chosen": -0.40127071738243103,
"logps/rejected": -1.4479907751083374,
"loss": 0.2939,
"nll_loss": 0.2725040912628174,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.02006353810429573,
"rewards/margins": 0.05233600735664368,
"rewards/rejected": -0.07239954173564911,
"step": 1590
},
{
"epoch": 1.6780283167278447,
"grad_norm": 1.9355725247245243,
"learning_rate": 5e-06,
"log_odds_chosen": 1.8742882013320923,
"log_odds_ratio": -0.21055075526237488,
"logits/chosen": -2.9387471675872803,
"logits/rejected": -2.9844515323638916,
"logps/chosen": -0.43298736214637756,
"logps/rejected": -1.4716593027114868,
"loss": 0.276,
"nll_loss": 0.26002392172813416,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.02164936624467373,
"rewards/margins": 0.05193359777331352,
"rewards/rejected": -0.0735829621553421,
"step": 1600
},
{
"epoch": 1.6780283167278447,
"eval_log_odds_chosen": 0.4569767117500305,
"eval_log_odds_ratio": -0.6159732937812805,
"eval_logits/chosen": -2.932406187057495,
"eval_logits/rejected": -2.9357593059539795,
"eval_logps/chosen": -0.8832988142967224,
"eval_logps/rejected": -1.1878604888916016,
"eval_loss": 0.5367424488067627,
"eval_nll_loss": 0.5040929913520813,
"eval_rewards/accuracies": 0.6408730149269104,
"eval_rewards/chosen": -0.04416494444012642,
"eval_rewards/margins": 0.015228085219860077,
"eval_rewards/rejected": -0.0593930259346962,
"eval_runtime": 138.0302,
"eval_samples_per_second": 14.446,
"eval_steps_per_second": 0.456,
"step": 1600
},
{
"epoch": 1.688515993707394,
"grad_norm": 1.9448584897613828,
"learning_rate": 4.984447862792268e-06,
"log_odds_chosen": 2.0258474349975586,
"log_odds_ratio": -0.2537488639354706,
"logits/chosen": -2.9370341300964355,
"logits/rejected": -2.959137439727783,
"logps/chosen": -0.4205976128578186,
"logps/rejected": -1.51674485206604,
"loss": 0.2805,
"nll_loss": 0.2590489387512207,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.0210298802703619,
"rewards/margins": 0.054807353764772415,
"rewards/rejected": -0.07583723217248917,
"step": 1610
},
{
"epoch": 1.6990036706869427,
"grad_norm": 2.2985078763398503,
"learning_rate": 4.969039949999534e-06,
"log_odds_chosen": 1.9926655292510986,
"log_odds_ratio": -0.209347203373909,
"logits/chosen": -2.9543755054473877,
"logits/rejected": -2.979072093963623,
"logps/chosen": -0.4242986738681793,
"logps/rejected": -1.527527093887329,
"loss": 0.2829,
"nll_loss": 0.28810399770736694,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.021214932203292847,
"rewards/margins": 0.05516142398118973,
"rewards/rejected": -0.07637635618448257,
"step": 1620
},
{
"epoch": 1.709491347666492,
"grad_norm": 1.978508364107179,
"learning_rate": 4.9537740461807e-06,
"log_odds_chosen": 1.7989534139633179,
"log_odds_ratio": -0.22280922532081604,
"logits/chosen": -2.9272611141204834,
"logits/rejected": -2.933403968811035,
"logps/chosen": -0.4125545024871826,
"logps/rejected": -1.371010422706604,
"loss": 0.2723,
"nll_loss": 0.27273207902908325,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.02062772400677204,
"rewards/margins": 0.04792279377579689,
"rewards/rejected": -0.06855051219463348,
"step": 1630
},
{
"epoch": 1.719979024646041,
"grad_norm": 2.5294696595366375,
"learning_rate": 4.938647983247949e-06,
"log_odds_chosen": 1.8762280941009521,
"log_odds_ratio": -0.23052379488945007,
"logits/chosen": -2.9176859855651855,
"logits/rejected": -2.937653064727783,
"logps/chosen": -0.4308241307735443,
"logps/rejected": -1.4621460437774658,
"loss": 0.2707,
"nll_loss": 0.24837055802345276,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.021541204303503036,
"rewards/margins": 0.05156610533595085,
"rewards/rejected": -0.07310730963945389,
"step": 1640
},
{
"epoch": 1.73046670162559,
"grad_norm": 1.9845638290615137,
"learning_rate": 4.9236596391733095e-06,
"log_odds_chosen": 1.9353539943695068,
"log_odds_ratio": -0.22219491004943848,
"logits/chosen": -2.9324100017547607,
"logits/rejected": -2.9492199420928955,
"logps/chosen": -0.4047132134437561,
"logps/rejected": -1.447388768196106,
"loss": 0.2921,
"nll_loss": 0.2786787152290344,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.020235659554600716,
"rewards/margins": 0.05213377624750137,
"rewards/rejected": -0.07236944139003754,
"step": 1650
},
{
"epoch": 1.740954378605139,
"grad_norm": 2.1313335783196914,
"learning_rate": 4.9088069367381605e-06,
"log_odds_chosen": 1.9517314434051514,
"log_odds_ratio": -0.19579176604747772,
"logits/chosen": -2.9807212352752686,
"logits/rejected": -3.004951000213623,
"logps/chosen": -0.4060528874397278,
"logps/rejected": -1.4121928215026855,
"loss": 0.2851,
"nll_loss": 0.27768373489379883,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.020302647724747658,
"rewards/margins": 0.05030699446797371,
"rewards/rejected": -0.07060963660478592,
"step": 1660
},
{
"epoch": 1.751442055584688,
"grad_norm": 1.893515732849545,
"learning_rate": 4.894087842323964e-06,
"log_odds_chosen": 1.8834346532821655,
"log_odds_ratio": -0.20945528149604797,
"logits/chosen": -2.9691452980041504,
"logits/rejected": -3.0074009895324707,
"logps/chosen": -0.4027465283870697,
"logps/rejected": -1.374361276626587,
"loss": 0.2926,
"nll_loss": 0.26718848943710327,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.020137326791882515,
"rewards/margins": 0.04858074709773064,
"rewards/rejected": -0.0687180757522583,
"step": 1670
},
{
"epoch": 1.7619297325642371,
"grad_norm": 2.0915190498544263,
"learning_rate": 4.8795003647426654e-06,
"log_odds_chosen": 1.8165385723114014,
"log_odds_ratio": -0.21812555193901062,
"logits/chosen": -3.0662589073181152,
"logits/rejected": -3.089877128601074,
"logps/chosen": -0.40138545632362366,
"logps/rejected": -1.3200931549072266,
"loss": 0.2998,
"nll_loss": 0.29331129789352417,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.020069271326065063,
"rewards/margins": 0.04593539237976074,
"rewards/rejected": -0.06600465625524521,
"step": 1680
},
{
"epoch": 1.772417409543786,
"grad_norm": 2.1457501870245417,
"learning_rate": 4.865042554105199e-06,
"log_odds_chosen": 1.869539499282837,
"log_odds_ratio": -0.2280159890651703,
"logits/chosen": -2.991488456726074,
"logits/rejected": -2.98630690574646,
"logps/chosen": -0.4090718626976013,
"logps/rejected": -1.36448073387146,
"loss": 0.2858,
"nll_loss": 0.2776942253112793,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.020453594624996185,
"rewards/margins": 0.04777044430375099,
"rewards/rejected": -0.06822402775287628,
"step": 1690
},
{
"epoch": 1.7829050865233351,
"grad_norm": 2.3665022543070093,
"learning_rate": 4.850712500726659e-06,
"log_odds_chosen": 1.9791815280914307,
"log_odds_ratio": -0.19878429174423218,
"logits/chosen": -2.9824297428131104,
"logits/rejected": -3.022101640701294,
"logps/chosen": -0.4144412875175476,
"logps/rejected": -1.4597278833389282,
"loss": 0.2715,
"nll_loss": 0.28446242213249207,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.02072206512093544,
"rewards/margins": 0.052264340221881866,
"rewards/rejected": -0.07298640161752701,
"step": 1700
},
{
"epoch": 1.7829050865233351,
"eval_log_odds_chosen": 0.4425116777420044,
"eval_log_odds_ratio": -0.6271889209747314,
"eval_logits/chosen": -3.019425392150879,
"eval_logits/rejected": -3.020922899246216,
"eval_logps/chosen": -0.8710321187973022,
"eval_logps/rejected": -1.1603412628173828,
"eval_loss": 0.5348805785179138,
"eval_nll_loss": 0.5024282336235046,
"eval_rewards/accuracies": 0.6448412537574768,
"eval_rewards/chosen": -0.04355160519480705,
"eval_rewards/margins": 0.014465462416410446,
"eval_rewards/rejected": -0.0580170638859272,
"eval_runtime": 136.3216,
"eval_samples_per_second": 14.627,
"eval_steps_per_second": 0.462,
"step": 1700
},
{
"epoch": 1.7933927635028841,
"grad_norm": 1.847904822728325,
"learning_rate": 4.836508334066745e-06,
"log_odds_chosen": 1.9795688390731812,
"log_odds_ratio": -0.2207694798707962,
"logits/chosen": -3.0054497718811035,
"logits/rejected": -3.0154829025268555,
"logps/chosen": -0.4081927239894867,
"logps/rejected": -1.4390795230865479,
"loss": 0.264,
"nll_loss": 0.24716749787330627,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.020409639924764633,
"rewards/margins": 0.051544345915317535,
"rewards/rejected": -0.07195398211479187,
"step": 1710
},
{
"epoch": 1.8038804404824331,
"grad_norm": 1.7750027737169987,
"learning_rate": 4.822428221704122e-06,
"log_odds_chosen": 1.926945686340332,
"log_odds_ratio": -0.22434870898723602,
"logits/chosen": -3.0268912315368652,
"logits/rejected": -3.035226583480835,
"logps/chosen": -0.43201422691345215,
"logps/rejected": -1.498827576637268,
"loss": 0.2864,
"nll_loss": 0.25820285081863403,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.021600713953375816,
"rewards/margins": 0.053340665996074677,
"rewards/rejected": -0.07494138181209564,
"step": 1720
},
{
"epoch": 1.8143681174619821,
"grad_norm": 2.0662716537028354,
"learning_rate": 4.8084703683434506e-06,
"log_odds_chosen": 1.974784255027771,
"log_odds_ratio": -0.21157677471637726,
"logits/chosen": -3.010627031326294,
"logits/rejected": -2.9982268810272217,
"logps/chosen": -0.4355824589729309,
"logps/rejected": -1.5232689380645752,
"loss": 0.2903,
"nll_loss": 0.2755037248134613,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.021779123693704605,
"rewards/margins": 0.05438433215022087,
"rewards/rejected": -0.07616344839334488,
"step": 1730
},
{
"epoch": 1.8248557944415311,
"grad_norm": 2.1360074988574445,
"learning_rate": 4.794633014853843e-06,
"log_odds_chosen": 1.847333312034607,
"log_odds_ratio": -0.2377551794052124,
"logits/chosen": -3.006833553314209,
"logits/rejected": -3.0122854709625244,
"logps/chosen": -0.4366019368171692,
"logps/rejected": -1.4164003133773804,
"loss": 0.304,
"nll_loss": 0.29017573595046997,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.02183009497821331,
"rewards/margins": 0.0489899218082428,
"rewards/rejected": -0.07082001864910126,
"step": 1740
},
{
"epoch": 1.8353434714210803,
"grad_norm": 1.9891927691131213,
"learning_rate": 4.780914437337575e-06,
"log_odds_chosen": 1.8539154529571533,
"log_odds_ratio": -0.23103201389312744,
"logits/chosen": -2.9830121994018555,
"logits/rejected": -2.9818801879882812,
"logps/chosen": -0.4274306297302246,
"logps/rejected": -1.4196858406066895,
"loss": 0.2958,
"nll_loss": 0.2937518060207367,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.02137153223156929,
"rewards/margins": 0.049612756818532944,
"rewards/rejected": -0.07098428905010223,
"step": 1750
},
{
"epoch": 1.8458311484006291,
"grad_norm": 1.827588117065436,
"learning_rate": 4.767312946227961e-06,
"log_odds_chosen": 2.2149860858917236,
"log_odds_ratio": -0.2075362503528595,
"logits/chosen": -2.9530441761016846,
"logits/rejected": -2.9839682579040527,
"logps/chosen": -0.391355037689209,
"logps/rejected": -1.6375446319580078,
"loss": 0.2721,
"nll_loss": 0.2694031000137329,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.01956775411963463,
"rewards/margins": 0.06230948120355606,
"rewards/rejected": -0.08187723159790039,
"step": 1760
},
{
"epoch": 1.8563188253801783,
"grad_norm": 1.8203811521479276,
"learning_rate": 4.7538268854152834e-06,
"log_odds_chosen": 1.7995598316192627,
"log_odds_ratio": -0.244699165225029,
"logits/chosen": -3.011706829071045,
"logits/rejected": -3.024837017059326,
"logps/chosen": -0.4394347071647644,
"logps/rejected": -1.4033676385879517,
"loss": 0.2771,
"nll_loss": 0.25858861207962036,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.02197173610329628,
"rewards/margins": 0.04819665104150772,
"rewards/rejected": -0.0701683908700943,
"step": 1770
},
{
"epoch": 1.8668065023597273,
"grad_norm": 2.2623646165216313,
"learning_rate": 4.740454631399773e-06,
"log_odds_chosen": 1.962255835533142,
"log_odds_ratio": -0.23438410460948944,
"logits/chosen": -2.949073314666748,
"logits/rejected": -2.989229202270508,
"logps/chosen": -0.3985145688056946,
"logps/rejected": -1.4544894695281982,
"loss": 0.2941,
"nll_loss": 0.29249390959739685,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.019925730302929878,
"rewards/margins": 0.052798740565776825,
"rewards/rejected": -0.07272447645664215,
"step": 1780
},
{
"epoch": 1.8772941793392763,
"grad_norm": 2.5104520915032538,
"learning_rate": 4.727194592470656e-06,
"log_odds_chosen": 2.0800955295562744,
"log_odds_ratio": -0.19981749355793,
"logits/chosen": -2.9771628379821777,
"logits/rejected": -3.0005829334259033,
"logps/chosen": -0.42085084319114685,
"logps/rejected": -1.603994607925415,
"loss": 0.2844,
"nll_loss": 0.2677140235900879,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.02104254439473152,
"rewards/margins": 0.05915718153119087,
"rewards/rejected": -0.08019973337650299,
"step": 1790
},
{
"epoch": 1.8877818563188253,
"grad_norm": 2.077913541951449,
"learning_rate": 4.714045207910318e-06,
"log_odds_chosen": 2.1426799297332764,
"log_odds_ratio": -0.18838170170783997,
"logits/chosen": -2.950552463531494,
"logits/rejected": -2.9804420471191406,
"logps/chosen": -0.41320332884788513,
"logps/rejected": -1.622671365737915,
"loss": 0.2717,
"nll_loss": 0.2544669210910797,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.020660167559981346,
"rewards/margins": 0.060473401099443436,
"rewards/rejected": -0.08113356679677963,
"step": 1800
},
{
"epoch": 1.8877818563188253,
"eval_log_odds_chosen": 0.48237088322639465,
"eval_log_odds_ratio": -0.6183955669403076,
"eval_logits/chosen": -2.9562783241271973,
"eval_logits/rejected": -2.957892894744873,
"eval_logps/chosen": -0.8997318148612976,
"eval_logps/rejected": -1.2325206995010376,
"eval_loss": 0.5340895652770996,
"eval_nll_loss": 0.5023403763771057,
"eval_rewards/accuracies": 0.6547619104385376,
"eval_rewards/chosen": -0.04498659446835518,
"eval_rewards/margins": 0.01663944497704506,
"eval_rewards/rejected": -0.06162603944540024,
"eval_runtime": 136.1464,
"eval_samples_per_second": 14.646,
"eval_steps_per_second": 0.463,
"step": 1800
},
{
"epoch": 1.8982695332983743,
"grad_norm": 1.896252578291677,
"learning_rate": 4.701004947222685e-06,
"log_odds_chosen": 2.0811541080474854,
"log_odds_ratio": -0.20500631630420685,
"logits/chosen": -3.000387668609619,
"logits/rejected": -2.983591079711914,
"logps/chosen": -0.4098430573940277,
"logps/rejected": -1.608665108680725,
"loss": 0.2794,
"nll_loss": 0.25453388690948486,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.020492153242230415,
"rewards/margins": 0.05994110181927681,
"rewards/rejected": -0.08043324947357178,
"step": 1810
},
{
"epoch": 1.9087572102779236,
"grad_norm": 2.019085371673625,
"learning_rate": 4.688072309384955e-06,
"log_odds_chosen": 2.0144619941711426,
"log_odds_ratio": -0.2020682841539383,
"logits/chosen": -2.9534127712249756,
"logits/rejected": -2.9533755779266357,
"logps/chosen": -0.3999931216239929,
"logps/rejected": -1.4992988109588623,
"loss": 0.2775,
"nll_loss": 0.26274845004081726,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.019999656826257706,
"rewards/margins": 0.054965294897556305,
"rewards/rejected": -0.07496494799852371,
"step": 1820
},
{
"epoch": 1.9192448872574723,
"grad_norm": 1.9263871107241788,
"learning_rate": 4.675245822121844e-06,
"log_odds_chosen": 2.0367493629455566,
"log_odds_ratio": -0.20607483386993408,
"logits/chosen": -2.9868836402893066,
"logits/rejected": -3.000213861465454,
"logps/chosen": -0.4244080185890198,
"logps/rejected": -1.5761488676071167,
"loss": 0.2923,
"nll_loss": 0.2808459997177124,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.02122039906680584,
"rewards/margins": 0.057587046176195145,
"rewards/rejected": -0.07880743592977524,
"step": 1830
},
{
"epoch": 1.9297325642370216,
"grad_norm": 2.1487838733941365,
"learning_rate": 4.662524041201569e-06,
"log_odds_chosen": 2.0472216606140137,
"log_odds_ratio": -0.22086529433727264,
"logits/chosen": -2.9925904273986816,
"logits/rejected": -2.985816240310669,
"logps/chosen": -0.4373515248298645,
"logps/rejected": -1.5831472873687744,
"loss": 0.2713,
"nll_loss": 0.2551635801792145,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.021867576986551285,
"rewards/margins": 0.057289790362119675,
"rewards/rejected": -0.07915736734867096,
"step": 1840
},
{
"epoch": 1.9402202412165706,
"grad_norm": 2.0463386352717112,
"learning_rate": 4.649905549752772e-06,
"log_odds_chosen": 2.1467113494873047,
"log_odds_ratio": -0.21497011184692383,
"logits/chosen": -2.938457727432251,
"logits/rejected": -2.9367523193359375,
"logps/chosen": -0.4192470610141754,
"logps/rejected": -1.63271164894104,
"loss": 0.2767,
"nll_loss": 0.2981775999069214,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.02096235193312168,
"rewards/margins": 0.06067322567105293,
"rewards/rejected": -0.08163557946681976,
"step": 1850
},
{
"epoch": 1.9507079181961196,
"grad_norm": 1.9930187660935812,
"learning_rate": 4.6373889576016826e-06,
"log_odds_chosen": 2.145296573638916,
"log_odds_ratio": -0.19072812795639038,
"logits/chosen": -2.9529764652252197,
"logits/rejected": -2.960404634475708,
"logps/chosen": -0.407731294631958,
"logps/rejected": -1.5777407884597778,
"loss": 0.2761,
"nll_loss": 0.2852553129196167,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.02038656547665596,
"rewards/margins": 0.05850047990679741,
"rewards/rejected": -0.07888703793287277,
"step": 1860
},
{
"epoch": 1.9611955951756685,
"grad_norm": 2.0042665222271756,
"learning_rate": 4.624972900628803e-06,
"log_odds_chosen": 2.0522494316101074,
"log_odds_ratio": -0.20059652626514435,
"logits/chosen": -2.932502269744873,
"logits/rejected": -2.9307363033294678,
"logps/chosen": -0.4203645586967468,
"logps/rejected": -1.5539976358413696,
"loss": 0.276,
"nll_loss": 0.2738272547721863,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.02101822756230831,
"rewards/margins": 0.05668165162205696,
"rewards/rejected": -0.07769988477230072,
"step": 1870
},
{
"epoch": 1.9716832721552175,
"grad_norm": 2.0226547316915258,
"learning_rate": 4.6126560401444256e-06,
"log_odds_chosen": 2.0710301399230957,
"log_odds_ratio": -0.19392071664333344,
"logits/chosen": -3.015066623687744,
"logits/rejected": -2.99493145942688,
"logps/chosen": -0.43072837591171265,
"logps/rejected": -1.6065874099731445,
"loss": 0.2748,
"nll_loss": 0.2821330428123474,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.02153642103075981,
"rewards/margins": 0.05879295617341995,
"rewards/rejected": -0.08032937347888947,
"step": 1880
},
{
"epoch": 1.9821709491347668,
"grad_norm": 2.567857697275732,
"learning_rate": 4.600437062282362e-06,
"log_odds_chosen": 1.9227994680404663,
"log_odds_ratio": -0.2224545031785965,
"logits/chosen": -3.0251965522766113,
"logits/rejected": -2.993910789489746,
"logps/chosen": -0.4456098675727844,
"logps/rejected": -1.529626488685608,
"loss": 0.2788,
"nll_loss": 0.28787270188331604,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.0222804956138134,
"rewards/margins": 0.054200828075408936,
"rewards/rejected": -0.07648131996393204,
"step": 1890
},
{
"epoch": 1.9926586261143155,
"grad_norm": 2.1545883447921654,
"learning_rate": 4.588314677411235e-06,
"log_odds_chosen": 2.2162415981292725,
"log_odds_ratio": -0.20383968949317932,
"logits/chosen": -3.039658784866333,
"logits/rejected": -3.022245407104492,
"logps/chosen": -0.420427143573761,
"logps/rejected": -1.6983455419540405,
"loss": 0.2857,
"nll_loss": 0.24534273147583008,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.02102135680615902,
"rewards/margins": 0.06389592587947845,
"rewards/rejected": -0.08491728454828262,
"step": 1900
},
{
"epoch": 1.9926586261143155,
"eval_log_odds_chosen": 0.48923251032829285,
"eval_log_odds_ratio": -0.6193312406539917,
"eval_logits/chosen": -3.0350046157836914,
"eval_logits/rejected": -3.0279133319854736,
"eval_logps/chosen": -0.908783495426178,
"eval_logps/rejected": -1.2409300804138184,
"eval_loss": 0.5407980680465698,
"eval_nll_loss": 0.5090586543083191,
"eval_rewards/accuracies": 0.6547619104385376,
"eval_rewards/chosen": -0.04543917626142502,
"eval_rewards/margins": 0.016607332974672318,
"eval_rewards/rejected": -0.062046512961387634,
"eval_runtime": 137.1653,
"eval_samples_per_second": 14.537,
"eval_steps_per_second": 0.459,
"step": 1900
},
{
"epoch": 2.0031463030938648,
"grad_norm": 2.4971175632899385,
"learning_rate": 4.576287619562756e-06,
"log_odds_chosen": 2.549215793609619,
"log_odds_ratio": -0.13884183764457703,
"logits/chosen": -3.0293986797332764,
"logits/rejected": -3.0052542686462402,
"logps/chosen": -0.3389069139957428,
"logps/rejected": -1.6784775257110596,
"loss": 0.2535,
"nll_loss": 0.2399848997592926,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.01694534718990326,
"rewards/margins": 0.06697852909564972,
"rewards/rejected": -0.08392388373613358,
"step": 1910
},
{
"epoch": 2.0136339800734135,
"grad_norm": 2.5031224034871475,
"learning_rate": 4.564354645876385e-06,
"log_odds_chosen": 4.333657741546631,
"log_odds_ratio": -0.02762582339346409,
"logits/chosen": -2.869049549102783,
"logits/rejected": -2.8186068534851074,
"logps/chosen": -0.1433320939540863,
"logps/rejected": -2.334181547164917,
"loss": 0.1236,
"nll_loss": 0.11940746009349823,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.007166605442762375,
"rewards/margins": 0.10954247415065765,
"rewards/rejected": -0.11670909076929092,
"step": 1920
},
{
"epoch": 2.0241216570529628,
"grad_norm": 1.9586057770651872,
"learning_rate": 4.552514536059854e-06,
"log_odds_chosen": 3.8062407970428467,
"log_odds_ratio": -0.0499381422996521,
"logits/chosen": -2.9369876384735107,
"logits/rejected": -2.963967800140381,
"logps/chosen": -0.1607118844985962,
"logps/rejected": -1.9827187061309814,
"loss": 0.116,
"nll_loss": 0.11325522512197495,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.00803559459745884,
"rewards/margins": 0.09110033512115479,
"rewards/rejected": -0.09913593530654907,
"step": 1930
},
{
"epoch": 2.034609334032512,
"grad_norm": 2.173705177159571,
"learning_rate": 4.540766091864998e-06,
"log_odds_chosen": 3.9211831092834473,
"log_odds_ratio": -0.03853369504213333,
"logits/chosen": -2.848071575164795,
"logits/rejected": -2.927175760269165,
"logps/chosen": -0.14356736838817596,
"logps/rejected": -1.959979772567749,
"loss": 0.1167,
"nll_loss": 0.11882974952459335,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.007178368978202343,
"rewards/margins": 0.09082063287496567,
"rewards/rejected": -0.09799900650978088,
"step": 1940
},
{
"epoch": 2.0450970110120608,
"grad_norm": 1.7557144572827617,
"learning_rate": 4.529108136578383e-06,
"log_odds_chosen": 4.060091495513916,
"log_odds_ratio": -0.028795290738344193,
"logits/chosen": -2.8138527870178223,
"logits/rejected": -2.8606162071228027,
"logps/chosen": -0.13301293551921844,
"logps/rejected": -2.0062737464904785,
"loss": 0.1151,
"nll_loss": 0.1191815584897995,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.006650646682828665,
"rewards/margins": 0.09366302937269211,
"rewards/rejected": -0.10031367838382721,
"step": 1950
},
{
"epoch": 2.05558468799161,
"grad_norm": 1.69960315567237,
"learning_rate": 4.517539514526257e-06,
"log_odds_chosen": 4.352217674255371,
"log_odds_ratio": -0.03757786005735397,
"logits/chosen": -2.819655656814575,
"logits/rejected": -2.8428378105163574,
"logps/chosen": -0.14081783592700958,
"logps/rejected": -2.33030104637146,
"loss": 0.1135,
"nll_loss": 0.11204487085342407,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007040892727673054,
"rewards/margins": 0.10947415977716446,
"rewards/rejected": -0.11651506274938583,
"step": 1960
},
{
"epoch": 2.0660723649711588,
"grad_norm": 1.991621297994473,
"learning_rate": 4.506059090593329e-06,
"log_odds_chosen": 4.156961917877197,
"log_odds_ratio": -0.0386335626244545,
"logits/chosen": -2.8222968578338623,
"logits/rejected": -2.880376100540161,
"logps/chosen": -0.15631213784217834,
"logps/rejected": -2.2803502082824707,
"loss": 0.1083,
"nll_loss": 0.11318318545818329,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007815606892108917,
"rewards/margins": 0.1062019094824791,
"rewards/rejected": -0.11401752382516861,
"step": 1970
},
{
"epoch": 2.076560041950708,
"grad_norm": 1.8671392728507943,
"learning_rate": 4.4946657497549474e-06,
"log_odds_chosen": 4.751786708831787,
"log_odds_ratio": -0.02287628874182701,
"logits/chosen": -2.8250374794006348,
"logits/rejected": -2.858389377593994,
"logps/chosen": -0.136850968003273,
"logps/rejected": -2.61843204498291,
"loss": 0.1149,
"nll_loss": 0.11261866241693497,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.006842548493295908,
"rewards/margins": 0.12407903373241425,
"rewards/rejected": -0.13092158734798431,
"step": 1980
},
{
"epoch": 2.0870477189302568,
"grad_norm": 2.047221073846021,
"learning_rate": 4.483358396622204e-06,
"log_odds_chosen": 4.551729202270508,
"log_odds_ratio": -0.029045408591628075,
"logits/chosen": -2.8212010860443115,
"logits/rejected": -2.863682270050049,
"logps/chosen": -0.13936151564121246,
"logps/rejected": -2.4473021030426025,
"loss": 0.1129,
"nll_loss": 0.11166741698980331,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.006968076340854168,
"rewards/margins": 0.11539702117443085,
"rewards/rejected": -0.12236510217189789,
"step": 1990
},
{
"epoch": 2.097535395909806,
"grad_norm": 2.1099833794179723,
"learning_rate": 4.47213595499958e-06,
"log_odds_chosen": 4.558366298675537,
"log_odds_ratio": -0.01906474307179451,
"logits/chosen": -2.8424153327941895,
"logits/rejected": -2.877136707305908,
"logps/chosen": -0.14121726155281067,
"logps/rejected": -2.4738833904266357,
"loss": 0.1137,
"nll_loss": 0.1110328808426857,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.0070608630776405334,
"rewards/margins": 0.11663329601287842,
"rewards/rejected": -0.12369415909051895,
"step": 2000
},
{
"epoch": 2.097535395909806,
"eval_log_odds_chosen": 0.5767443776130676,
"eval_log_odds_ratio": -0.6272528171539307,
"eval_logits/chosen": -2.87036395072937,
"eval_logits/rejected": -2.881497383117676,
"eval_logps/chosen": -1.2408413887023926,
"eval_logps/rejected": -1.6761136054992676,
"eval_loss": 0.6877180337905884,
"eval_nll_loss": 0.6538823843002319,
"eval_rewards/accuracies": 0.670634925365448,
"eval_rewards/chosen": -0.06204206869006157,
"eval_rewards/margins": 0.021763615310192108,
"eval_rewards/rejected": -0.08380568027496338,
"eval_runtime": 137.068,
"eval_samples_per_second": 14.548,
"eval_steps_per_second": 0.46,
"step": 2000
},
{
"epoch": 2.108023072889355,
"grad_norm": 1.7758830781899906,
"learning_rate": 4.4609973674547055e-06,
"log_odds_chosen": 4.593904495239258,
"log_odds_ratio": -0.033291045576334,
"logits/chosen": -2.856330394744873,
"logits/rejected": -2.8690733909606934,
"logps/chosen": -0.1400183141231537,
"logps/rejected": -2.536652088165283,
"loss": 0.1039,
"nll_loss": 0.10139288008213043,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007000915706157684,
"rewards/margins": 0.11983168125152588,
"rewards/rejected": -0.12683258950710297,
"step": 2010
},
{
"epoch": 2.118510749868904,
"grad_norm": 2.6416736862275076,
"learning_rate": 4.449941594899848e-06,
"log_odds_chosen": 4.607335090637207,
"log_odds_ratio": -0.028559138998389244,
"logits/chosen": -2.7992746829986572,
"logits/rejected": -2.8301546573638916,
"logps/chosen": -0.14062660932540894,
"logps/rejected": -2.5437684059143066,
"loss": 0.1201,
"nll_loss": 0.1216670423746109,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007031330373138189,
"rewards/margins": 0.12015708535909653,
"rewards/rejected": -0.12718841433525085,
"step": 2020
},
{
"epoch": 2.128998426848453,
"grad_norm": 2.094070218470564,
"learning_rate": 4.438967616184754e-06,
"log_odds_chosen": 4.340805530548096,
"log_odds_ratio": -0.027936171740293503,
"logits/chosen": -2.823608875274658,
"logits/rejected": -2.8253750801086426,
"logps/chosen": -0.13957419991493225,
"logps/rejected": -2.268900156021118,
"loss": 0.1108,
"nll_loss": 0.1126783937215805,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.006978710647672415,
"rewards/margins": 0.10646629333496094,
"rewards/rejected": -0.11344502121210098,
"step": 2030
},
{
"epoch": 2.139486103828002,
"grad_norm": 2.222098137194295,
"learning_rate": 4.428074427700477e-06,
"log_odds_chosen": 4.698141098022461,
"log_odds_ratio": -0.02707051672041416,
"logits/chosen": -2.8169960975646973,
"logits/rejected": -2.8297157287597656,
"logps/chosen": -0.1413937509059906,
"logps/rejected": -2.65130877494812,
"loss": 0.1166,
"nll_loss": 0.11614535748958588,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007069687359035015,
"rewards/margins": 0.1254957616329193,
"rewards/rejected": -0.1325654536485672,
"step": 2040
},
{
"epoch": 2.149973780807551,
"grad_norm": 2.1988466339750317,
"learning_rate": 4.417261042993862e-06,
"log_odds_chosen": 4.824273109436035,
"log_odds_ratio": -0.022720973938703537,
"logits/chosen": -2.8039610385894775,
"logits/rejected": -2.795748710632324,
"logps/chosen": -0.12069626152515411,
"logps/rejected": -2.613525390625,
"loss": 0.1113,
"nll_loss": 0.10357411205768585,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.006034812889993191,
"rewards/margins": 0.12464147806167603,
"rewards/rejected": -0.1306762993335724,
"step": 2050
},
{
"epoch": 2.1604614577871,
"grad_norm": 1.9312492998690272,
"learning_rate": 4.406526492392318e-06,
"log_odds_chosen": 4.532221794128418,
"log_odds_ratio": -0.025564473122358322,
"logits/chosen": -2.856283664703369,
"logits/rejected": -2.847923994064331,
"logps/chosen": -0.15458881855010986,
"logps/rejected": -2.556361198425293,
"loss": 0.1171,
"nll_loss": 0.1105358749628067,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007729442324489355,
"rewards/margins": 0.1200886145234108,
"rewards/rejected": -0.1278180480003357,
"step": 2060
},
{
"epoch": 2.170949134766649,
"grad_norm": 2.184212774032157,
"learning_rate": 4.39586982263858e-06,
"log_odds_chosen": 4.760067462921143,
"log_odds_ratio": -0.025417357683181763,
"logits/chosen": -2.8176796436309814,
"logits/rejected": -2.818103313446045,
"logps/chosen": -0.15180301666259766,
"logps/rejected": -2.774660110473633,
"loss": 0.1148,
"nll_loss": 0.11588319391012192,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007590149994939566,
"rewards/margins": 0.13114285469055176,
"rewards/rejected": -0.13873299956321716,
"step": 2070
},
{
"epoch": 2.1814368117461984,
"grad_norm": 2.151555777196694,
"learning_rate": 4.385290096535147e-06,
"log_odds_chosen": 4.732907772064209,
"log_odds_ratio": -0.026212304830551147,
"logits/chosen": -2.859835147857666,
"logits/rejected": -2.857645034790039,
"logps/chosen": -0.13824030756950378,
"logps/rejected": -2.6506001949310303,
"loss": 0.1132,
"nll_loss": 0.11115143448114395,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.006912014447152615,
"rewards/margins": 0.12561801075935364,
"rewards/rejected": -0.13253000378608704,
"step": 2080
},
{
"epoch": 2.191924488725747,
"grad_norm": 3.2431795321399486,
"learning_rate": 4.374786392598072e-06,
"log_odds_chosen": 4.578325271606445,
"log_odds_ratio": -0.03994257375597954,
"logits/chosen": -2.8212687969207764,
"logits/rejected": -2.7516632080078125,
"logps/chosen": -0.1504596322774887,
"logps/rejected": -2.5710039138793945,
"loss": 0.1095,
"nll_loss": 0.10720662772655487,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007522981613874435,
"rewards/margins": 0.12102720886468887,
"rewards/rejected": -0.128550186753273,
"step": 2090
},
{
"epoch": 2.2024121657052964,
"grad_norm": 2.6693753745610076,
"learning_rate": 4.364357804719848e-06,
"log_odds_chosen": 4.707537651062012,
"log_odds_ratio": -0.025204619392752647,
"logits/chosen": -2.798999309539795,
"logits/rejected": -2.794037342071533,
"logps/chosen": -0.15521793067455292,
"logps/rejected": -2.689946174621582,
"loss": 0.1192,
"nll_loss": 0.12550954520702362,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007760896347463131,
"rewards/margins": 0.12673643231391907,
"rewards/rejected": -0.13449731469154358,
"step": 2100
},
{
"epoch": 2.2024121657052964,
"eval_log_odds_chosen": 0.6958096623420715,
"eval_log_odds_ratio": -0.6209548115730286,
"eval_logits/chosen": -2.837247610092163,
"eval_logits/rejected": -2.8433148860931396,
"eval_logps/chosen": -1.4121639728546143,
"eval_logps/rejected": -1.9619879722595215,
"eval_loss": 0.7576995491981506,
"eval_nll_loss": 0.7199162244796753,
"eval_rewards/accuracies": 0.6726190447807312,
"eval_rewards/chosen": -0.07060819864273071,
"eval_rewards/margins": 0.027491191402077675,
"eval_rewards/rejected": -0.09809939563274384,
"eval_runtime": 136.9058,
"eval_samples_per_second": 14.565,
"eval_steps_per_second": 0.46,
"step": 2100
},
{
"epoch": 2.212899842684845,
"grad_norm": 1.7712476287108132,
"learning_rate": 4.354003441841081e-06,
"log_odds_chosen": 4.905824184417725,
"log_odds_ratio": -0.02992095984518528,
"logits/chosen": -2.8259618282318115,
"logits/rejected": -2.760521650314331,
"logps/chosen": -0.13811610639095306,
"logps/rejected": -2.7983617782592773,
"loss": 0.1173,
"nll_loss": 0.12010955810546875,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.006905805319547653,
"rewards/margins": 0.13301227986812592,
"rewards/rejected": -0.13991808891296387,
"step": 2110
},
{
"epoch": 2.2233875196643944,
"grad_norm": 1.6446106852737563,
"learning_rate": 4.3437224276306945e-06,
"log_odds_chosen": 4.906925201416016,
"log_odds_ratio": -0.017224887385964394,
"logits/chosen": -2.838736057281494,
"logits/rejected": -2.8536746501922607,
"logps/chosen": -0.16129423677921295,
"logps/rejected": -2.8627591133117676,
"loss": 0.1147,
"nll_loss": 0.12654295563697815,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.008064712397754192,
"rewards/margins": 0.1350732445716858,
"rewards/rejected": -0.14313796162605286,
"step": 2120
},
{
"epoch": 2.233875196643943,
"grad_norm": 1.7769911595186116,
"learning_rate": 4.333513900174396e-06,
"log_odds_chosen": 4.821990966796875,
"log_odds_ratio": -0.026227790862321854,
"logits/chosen": -2.829463481903076,
"logits/rejected": -2.842454433441162,
"logps/chosen": -0.1390562653541565,
"logps/rejected": -2.760815143585205,
"loss": 0.1215,
"nll_loss": 0.11114709079265594,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.0069528138265013695,
"rewards/margins": 0.13108794391155243,
"rewards/rejected": -0.13804076611995697,
"step": 2130
},
{
"epoch": 2.2443628736234924,
"grad_norm": 2.186831361943043,
"learning_rate": 4.32337701167117e-06,
"log_odds_chosen": 5.350895881652832,
"log_odds_ratio": -0.0246684979647398,
"logits/chosen": -2.872166156768799,
"logits/rejected": -2.8550028800964355,
"logps/chosen": -0.13888207077980042,
"logps/rejected": -3.2091636657714844,
"loss": 0.1143,
"nll_loss": 0.11629905551671982,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.006944102700799704,
"rewards/margins": 0.1535140872001648,
"rewards/rejected": -0.16045819222927094,
"step": 2140
},
{
"epoch": 2.2548505506030416,
"grad_norm": 2.2764409350931345,
"learning_rate": 4.313310928137537e-06,
"log_odds_chosen": 4.80722713470459,
"log_odds_ratio": -0.025547053664922714,
"logits/chosen": -2.8291611671447754,
"logits/rejected": -2.858245849609375,
"logps/chosen": -0.15937599539756775,
"logps/rejected": -2.8679497241973877,
"loss": 0.1185,
"nll_loss": 0.11574534326791763,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007968800142407417,
"rewards/margins": 0.13542868196964264,
"rewards/rejected": -0.1433974802494049,
"step": 2150
},
{
"epoch": 2.2653382275825904,
"grad_norm": 2.239980255447614,
"learning_rate": 4.303314829119352e-06,
"log_odds_chosen": 5.589659690856934,
"log_odds_ratio": -0.020419184118509293,
"logits/chosen": -2.905287981033325,
"logits/rejected": -2.966031551361084,
"logps/chosen": -0.1542571783065796,
"logps/rejected": -3.551201581954956,
"loss": 0.1236,
"nll_loss": 0.11697031557559967,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007712858729064465,
"rewards/margins": 0.16984722018241882,
"rewards/rejected": -0.17756007611751556,
"step": 2160
},
{
"epoch": 2.2758259045621396,
"grad_norm": 2.009942820215124,
"learning_rate": 4.293387907410919e-06,
"log_odds_chosen": 6.170254707336426,
"log_odds_ratio": -0.017188329249620438,
"logits/chosen": -2.848698139190674,
"logits/rejected": -2.945160388946533,
"logps/chosen": -0.13800857961177826,
"logps/rejected": -4.000069618225098,
"loss": 0.1137,
"nll_loss": 0.11105845123529434,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.006900429725646973,
"rewards/margins": 0.19310306012630463,
"rewards/rejected": -0.2000034749507904,
"step": 2170
},
{
"epoch": 2.2863135815416884,
"grad_norm": 2.1918079846574567,
"learning_rate": 4.2835293687811935e-06,
"log_odds_chosen": 6.479376316070557,
"log_odds_ratio": -0.010083029977977276,
"logits/chosen": -2.7919399738311768,
"logits/rejected": -2.9110770225524902,
"logps/chosen": -0.1471458077430725,
"logps/rejected": -4.402917385101318,
"loss": 0.1149,
"nll_loss": 0.12062163650989532,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.007357291877269745,
"rewards/margins": 0.21278861165046692,
"rewards/rejected": -0.22014589607715607,
"step": 2180
},
{
"epoch": 2.2968012585212376,
"grad_norm": 1.9268306821517742,
"learning_rate": 4.273738431706883e-06,
"log_odds_chosen": 6.724373817443848,
"log_odds_ratio": -0.018149670213460922,
"logits/chosen": -2.891892194747925,
"logits/rejected": -3.004826784133911,
"logps/chosen": -0.15707895159721375,
"logps/rejected": -4.773315906524658,
"loss": 0.1119,
"nll_loss": 0.10733366012573242,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007853945717215538,
"rewards/margins": 0.23081183433532715,
"rewards/rejected": -0.23866574466228485,
"step": 2190
},
{
"epoch": 2.3072889355007864,
"grad_norm": 1.9131867908425575,
"learning_rate": 4.264014327112208e-06,
"log_odds_chosen": 6.2542595863342285,
"log_odds_ratio": -0.015775460749864578,
"logits/chosen": -2.862001419067383,
"logits/rejected": -2.91827654838562,
"logps/chosen": -0.14461472630500793,
"logps/rejected": -4.159193515777588,
"loss": 0.1178,
"nll_loss": 0.12322264909744263,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.007230737246572971,
"rewards/margins": 0.20072893798351288,
"rewards/rejected": -0.20795968174934387,
"step": 2200
},
{
"epoch": 2.3072889355007864,
"eval_log_odds_chosen": 1.1627599000930786,
"eval_log_odds_ratio": -0.7777736783027649,
"eval_logits/chosen": -2.887819766998291,
"eval_logits/rejected": -2.9106638431549072,
"eval_logps/chosen": -2.4108457565307617,
"eval_logps/rejected": -3.4342026710510254,
"eval_loss": 1.1761772632598877,
"eval_nll_loss": 1.1196904182434082,
"eval_rewards/accuracies": 0.6527777910232544,
"eval_rewards/chosen": -0.12054230272769928,
"eval_rewards/margins": 0.051167842000722885,
"eval_rewards/rejected": -0.17171014845371246,
"eval_runtime": 137.1423,
"eval_samples_per_second": 14.54,
"eval_steps_per_second": 0.459,
"step": 2200
},
{
"epoch": 2.3177766124803356,
"grad_norm": 2.1121501905853624,
"learning_rate": 4.254356298115171e-06,
"log_odds_chosen": 6.363844394683838,
"log_odds_ratio": -0.024754000827670097,
"logits/chosen": -2.8908374309539795,
"logits/rejected": -2.9566292762756348,
"logps/chosen": -0.15381646156311035,
"logps/rejected": -4.287047386169434,
"loss": 0.1181,
"nll_loss": 0.12711365520954132,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007690823636949062,
"rewards/margins": 0.20666155219078064,
"rewards/rejected": -0.21435236930847168,
"step": 2210
},
{
"epoch": 2.3282642894598844,
"grad_norm": 3.84884286912148,
"learning_rate": 4.24476359978009e-06,
"log_odds_chosen": 5.530186176300049,
"log_odds_ratio": -0.017865758389234543,
"logits/chosen": -2.8787178993225098,
"logits/rejected": -2.9533944129943848,
"logps/chosen": -0.1436866670846939,
"logps/rejected": -3.488823652267456,
"loss": 0.1234,
"nll_loss": 0.11815366894006729,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.0071843331679701805,
"rewards/margins": 0.16725686192512512,
"rewards/rejected": -0.17444118857383728,
"step": 2220
},
{
"epoch": 2.3387519664394336,
"grad_norm": 2.417106329176298,
"learning_rate": 4.235235498876268e-06,
"log_odds_chosen": 5.049867630004883,
"log_odds_ratio": -0.030804011970758438,
"logits/chosen": -2.8601975440979004,
"logits/rejected": -2.919813632965088,
"logps/chosen": -0.16016361117362976,
"logps/rejected": -3.108591079711914,
"loss": 0.1205,
"nll_loss": 0.12257065623998642,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.008008181117475033,
"rewards/margins": 0.14742138981819153,
"rewards/rejected": -0.15542957186698914,
"step": 2230
},
{
"epoch": 2.349239643418983,
"grad_norm": 2.0311020060176737,
"learning_rate": 4.2257712736425835e-06,
"log_odds_chosen": 6.287697792053223,
"log_odds_ratio": -0.03303173556923866,
"logits/chosen": -2.8431243896484375,
"logits/rejected": -2.987511396408081,
"logps/chosen": -0.15092086791992188,
"logps/rejected": -4.205324649810791,
"loss": 0.119,
"nll_loss": 0.11937984079122543,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.0075460439547896385,
"rewards/margins": 0.20272019505500793,
"rewards/rejected": -0.21026620268821716,
"step": 2240
},
{
"epoch": 2.3597273203985316,
"grad_norm": 1.8184108922544404,
"learning_rate": 4.216370213557839e-06,
"log_odds_chosen": 6.489804267883301,
"log_odds_ratio": -0.017738422378897667,
"logits/chosen": -2.8637566566467285,
"logits/rejected": -2.9882349967956543,
"logps/chosen": -0.1367037147283554,
"logps/rejected": -4.3643412590026855,
"loss": 0.1103,
"nll_loss": 0.10625318437814713,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.006835184991359711,
"rewards/margins": 0.21138188242912292,
"rewards/rejected": -0.21821708977222443,
"step": 2250
},
{
"epoch": 2.370214997378081,
"grad_norm": 1.9927993897844196,
"learning_rate": 4.207031619116713e-06,
"log_odds_chosen": 6.5232744216918945,
"log_odds_ratio": -0.02112133800983429,
"logits/chosen": -2.888134002685547,
"logits/rejected": -2.9766697883605957,
"logps/chosen": -0.13985328376293182,
"logps/rejected": -4.443106174468994,
"loss": 0.1119,
"nll_loss": 0.10387493669986725,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.006992665119469166,
"rewards/margins": 0.21516263484954834,
"rewards/rejected": -0.22215530276298523,
"step": 2260
},
{
"epoch": 2.3807026743576296,
"grad_norm": 1.9179118979680037,
"learning_rate": 4.197754801611136e-06,
"log_odds_chosen": 7.000714302062988,
"log_odds_ratio": -0.01941884122788906,
"logits/chosen": -2.8880743980407715,
"logits/rejected": -3.0280842781066895,
"logps/chosen": -0.1594962626695633,
"logps/rejected": -4.991673946380615,
"loss": 0.1187,
"nll_loss": 0.12734182178974152,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007974812760949135,
"rewards/margins": 0.2416088581085205,
"rewards/rejected": -0.2495836764574051,
"step": 2270
},
{
"epoch": 2.391190351337179,
"grad_norm": 1.7656016453383905,
"learning_rate": 4.188539082916955e-06,
"log_odds_chosen": 5.81030797958374,
"log_odds_ratio": -0.02714763581752777,
"logits/chosen": -2.858682155609131,
"logits/rejected": -2.961153030395508,
"logps/chosen": -0.1495695412158966,
"logps/rejected": -3.7413382530212402,
"loss": 0.117,
"nll_loss": 0.1129683405160904,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007478476967662573,
"rewards/margins": 0.1795884370803833,
"rewards/rejected": -0.18706689774990082,
"step": 2280
},
{
"epoch": 2.401678028316728,
"grad_norm": 1.7721263332581463,
"learning_rate": 4.179383795285729e-06,
"log_odds_chosen": 6.099682807922363,
"log_odds_ratio": -0.016452614217996597,
"logits/chosen": -2.8671703338623047,
"logits/rejected": -2.94566011428833,
"logps/chosen": -0.1470957249403,
"logps/rejected": -4.025435447692871,
"loss": 0.1162,
"nll_loss": 0.1030157208442688,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.007354786153882742,
"rewards/margins": 0.19391697645187378,
"rewards/rejected": -0.20127174258232117,
"step": 2290
},
{
"epoch": 2.412165705296277,
"grad_norm": 6.518126509500433,
"learning_rate": 4.170288281141496e-06,
"log_odds_chosen": 5.677874565124512,
"log_odds_ratio": -0.02623058296740055,
"logits/chosen": -2.8755476474761963,
"logits/rejected": -2.926180362701416,
"logps/chosen": -0.15929332375526428,
"logps/rejected": -3.627763271331787,
"loss": 0.1184,
"nll_loss": 0.12096776813268661,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007964666932821274,
"rewards/margins": 0.17342346906661987,
"rewards/rejected": -0.18138816952705383,
"step": 2300
},
{
"epoch": 2.412165705296277,
"eval_log_odds_chosen": 1.3232934474945068,
"eval_log_odds_ratio": -1.0561914443969727,
"eval_logits/chosen": -2.9102423191070557,
"eval_logits/rejected": -2.9226319789886475,
"eval_logps/chosen": -3.8695833683013916,
"eval_logps/rejected": -5.081162452697754,
"eval_loss": 1.8519541025161743,
"eval_nll_loss": 1.7541913986206055,
"eval_rewards/accuracies": 0.636904776096344,
"eval_rewards/chosen": -0.19347918033599854,
"eval_rewards/margins": 0.06057893857359886,
"eval_rewards/rejected": -0.2540581226348877,
"eval_runtime": 140.6912,
"eval_samples_per_second": 14.173,
"eval_steps_per_second": 0.448,
"step": 2300
},
{
"epoch": 2.422653382275826,
"grad_norm": 2.1350280555835317,
"learning_rate": 4.1612518928823956e-06,
"log_odds_chosen": 5.239171028137207,
"log_odds_ratio": -0.0356699600815773,
"logits/chosen": -2.8127808570861816,
"logits/rejected": -2.847365140914917,
"logps/chosen": -0.17353428900241852,
"logps/rejected": -3.4219677448272705,
"loss": 0.1197,
"nll_loss": 0.12273728847503662,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.008676714263856411,
"rewards/margins": 0.16242167353630066,
"rewards/rejected": -0.17109838128089905,
"step": 2310
},
{
"epoch": 2.433141059255375,
"grad_norm": 2.142764154815985,
"learning_rate": 4.1522739926869985e-06,
"log_odds_chosen": 7.10500431060791,
"log_odds_ratio": -0.02759629487991333,
"logits/chosen": -2.8841793537139893,
"logits/rejected": -2.979490280151367,
"logps/chosen": -0.15857262909412384,
"logps/rejected": -5.118218898773193,
"loss": 0.1179,
"nll_loss": 0.11995577812194824,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007928632199764252,
"rewards/margins": 0.24798233807086945,
"rewards/rejected": -0.2559109628200531,
"step": 2320
},
{
"epoch": 2.443628736234924,
"grad_norm": 2.442748493026814,
"learning_rate": 4.143353952325209e-06,
"log_odds_chosen": 6.4824538230896,
"log_odds_ratio": -0.03863966092467308,
"logits/chosen": -2.8798575401306152,
"logits/rejected": -2.975369691848755,
"logps/chosen": -0.16273298859596252,
"logps/rejected": -4.518317222595215,
"loss": 0.1144,
"nll_loss": 0.11924872547388077,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.008136649616062641,
"rewards/margins": 0.21777920424938202,
"rewards/rejected": -0.22591586410999298,
"step": 2330
},
{
"epoch": 2.454116413214473,
"grad_norm": 1.7906952084031593,
"learning_rate": 4.134491152973616e-06,
"log_odds_chosen": 6.330552101135254,
"log_odds_ratio": -0.019993215799331665,
"logits/chosen": -2.903748035430908,
"logits/rejected": -2.961629629135132,
"logps/chosen": -0.1506245732307434,
"logps/rejected": -4.29229736328125,
"loss": 0.1162,
"nll_loss": 0.11873211711645126,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.0075312284752726555,
"rewards/margins": 0.20708362758159637,
"rewards/rejected": -0.2146148979663849,
"step": 2340
},
{
"epoch": 2.464604090194022,
"grad_norm": 2.709543224621687,
"learning_rate": 4.125684985035174e-06,
"log_odds_chosen": 6.674917697906494,
"log_odds_ratio": -0.02191847935318947,
"logits/chosen": -2.869702100753784,
"logits/rejected": -2.9517292976379395,
"logps/chosen": -0.14587149024009705,
"logps/rejected": -4.594050407409668,
"loss": 0.1189,
"nll_loss": 0.11958177387714386,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007293573580682278,
"rewards/margins": 0.2224089354276657,
"rewards/rejected": -0.22970251739025116,
"step": 2350
},
{
"epoch": 2.475091767173571,
"grad_norm": 1.9596617726605967,
"learning_rate": 4.116934847963092e-06,
"log_odds_chosen": 6.008196830749512,
"log_odds_ratio": -0.020748203620314598,
"logits/chosen": -2.859504222869873,
"logits/rejected": -2.9086391925811768,
"logps/chosen": -0.1603454202413559,
"logps/rejected": -4.055342674255371,
"loss": 0.1137,
"nll_loss": 0.11717329174280167,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.008017271757125854,
"rewards/margins": 0.1947498619556427,
"rewards/rejected": -0.20276716351509094,
"step": 2360
},
{
"epoch": 2.48557944415312,
"grad_norm": 25.11227763431921,
"learning_rate": 4.1082401500888055e-06,
"log_odds_chosen": 6.279742240905762,
"log_odds_ratio": -0.01569024845957756,
"logits/chosen": -2.916944742202759,
"logits/rejected": -2.987224578857422,
"logps/chosen": -0.14050395786762238,
"logps/rejected": -4.152866363525391,
"loss": 0.1189,
"nll_loss": 0.10722777992486954,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.007025198079645634,
"rewards/margins": 0.20061811804771423,
"rewards/rejected": -0.2076433151960373,
"step": 2370
},
{
"epoch": 2.4960671211326693,
"grad_norm": 1.757332945919827,
"learning_rate": 4.099600308453939e-06,
"log_odds_chosen": 6.39632511138916,
"log_odds_ratio": -0.023090779781341553,
"logits/chosen": -2.8743884563446045,
"logits/rejected": -2.9668736457824707,
"logps/chosen": -0.15729930996894836,
"logps/rejected": -4.314006328582764,
"loss": 0.1177,
"nll_loss": 0.1209021583199501,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007864965125918388,
"rewards/margins": 0.2078353613615036,
"rewards/rejected": -0.21570034325122833,
"step": 2380
},
{
"epoch": 2.506554798112218,
"grad_norm": 2.0524680636282056,
"learning_rate": 4.091014748646132e-06,
"log_odds_chosen": 5.9223713874816895,
"log_odds_ratio": -0.030582841485738754,
"logits/chosen": -2.8992161750793457,
"logits/rejected": -2.929603099822998,
"logps/chosen": -0.1705484390258789,
"logps/rejected": -4.027953147888184,
"loss": 0.1189,
"nll_loss": 0.10802364349365234,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.008527422323822975,
"rewards/margins": 0.19287024438381195,
"rewards/rejected": -0.20139765739440918,
"step": 2390
},
{
"epoch": 2.5170424750917673,
"grad_norm": 1.7245638696745784,
"learning_rate": 4.082482904638631e-06,
"log_odds_chosen": 6.324474811553955,
"log_odds_ratio": -0.018949782475829124,
"logits/chosen": -2.8749866485595703,
"logits/rejected": -2.9224321842193604,
"logps/chosen": -0.1520567536354065,
"logps/rejected": -4.290619850158691,
"loss": 0.1172,
"nll_loss": 0.12284000217914581,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.00760283786803484,
"rewards/margins": 0.20692817866802216,
"rewards/rejected": -0.21453101933002472,
"step": 2400
},
{
"epoch": 2.5170424750917673,
"eval_log_odds_chosen": 1.0075438022613525,
"eval_log_odds_ratio": -0.8145382404327393,
"eval_logits/chosen": -2.8560779094696045,
"eval_logits/rejected": -2.871006965637207,
"eval_logps/chosen": -2.0024044513702393,
"eval_logps/rejected": -2.8670685291290283,
"eval_loss": 1.01926589012146,
"eval_nll_loss": 0.9735569357872009,
"eval_rewards/accuracies": 0.6408730149269104,
"eval_rewards/chosen": -0.10012022405862808,
"eval_rewards/margins": 0.043233200907707214,
"eval_rewards/rejected": -0.1433534324169159,
"eval_runtime": 138.4847,
"eval_samples_per_second": 14.399,
"eval_steps_per_second": 0.455,
"step": 2400
},
{
"epoch": 2.527530152071316,
"grad_norm": 2.140192470773612,
"learning_rate": 4.074004218633553e-06,
"log_odds_chosen": 6.169337272644043,
"log_odds_ratio": -0.024398522451519966,
"logits/chosen": -2.8802199363708496,
"logits/rejected": -2.9575634002685547,
"logps/chosen": -0.14228537678718567,
"logps/rejected": -4.140218257904053,
"loss": 0.1204,
"nll_loss": 0.10762319713830948,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.0071142688393592834,
"rewards/margins": 0.1998966485261917,
"rewards/rejected": -0.20701093971729279,
"step": 2410
},
{
"epoch": 2.5380178290508653,
"grad_norm": 1.9307036538867832,
"learning_rate": 4.065578140908709e-06,
"log_odds_chosen": 6.545037269592285,
"log_odds_ratio": -0.020819999277591705,
"logits/chosen": -2.826190948486328,
"logits/rejected": -2.9180386066436768,
"logps/chosen": -0.15343733131885529,
"logps/rejected": -4.550530433654785,
"loss": 0.1292,
"nll_loss": 0.12483732402324677,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007671866565942764,
"rewards/margins": 0.2198546677827835,
"rewards/rejected": -0.22752651572227478,
"step": 2420
},
{
"epoch": 2.5485055060304145,
"grad_norm": 2.472322893814309,
"learning_rate": 4.057204129667897e-06,
"log_odds_chosen": 6.510749816894531,
"log_odds_ratio": -0.017572391778230667,
"logits/chosen": -2.8476340770721436,
"logits/rejected": -2.9206082820892334,
"logps/chosen": -0.1623007208108902,
"logps/rejected": -4.547110557556152,
"loss": 0.114,
"nll_loss": 0.11619551479816437,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.00811503641307354,
"rewards/margins": 0.21924051642417908,
"rewards/rejected": -0.22735556960105896,
"step": 2430
},
{
"epoch": 2.5589931830099633,
"grad_norm": 3.562558849555077,
"learning_rate": 4.048881650894581e-06,
"log_odds_chosen": 7.486746311187744,
"log_odds_ratio": -0.012338453903794289,
"logits/chosen": -2.8392252922058105,
"logits/rejected": -2.924240827560425,
"logps/chosen": -0.15012109279632568,
"logps/rejected": -5.4815144538879395,
"loss": 0.1213,
"nll_loss": 0.12608163058757782,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.007506055291742086,
"rewards/margins": 0.26656967401504517,
"rewards/rejected": -0.2740757167339325,
"step": 2440
},
{
"epoch": 2.5694808599895125,
"grad_norm": 2.3252293901649193,
"learning_rate": 4.040610178208843e-06,
"log_odds_chosen": 7.7740631103515625,
"log_odds_ratio": -0.0118449367582798,
"logits/chosen": -2.795551061630249,
"logits/rejected": -2.8945860862731934,
"logps/chosen": -0.1522868573665619,
"logps/rejected": -5.739714622497559,
"loss": 0.1145,
"nll_loss": 0.11489256471395493,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.007614342961460352,
"rewards/margins": 0.27937138080596924,
"rewards/rejected": -0.28698569536209106,
"step": 2450
},
{
"epoch": 2.5799685369690613,
"grad_norm": 2.0157957603988175,
"learning_rate": 4.032389192727559e-06,
"log_odds_chosen": 6.265582084655762,
"log_odds_ratio": -0.024669019505381584,
"logits/chosen": -2.85023832321167,
"logits/rejected": -2.8876233100891113,
"logps/chosen": -0.150896817445755,
"logps/rejected": -4.219937324523926,
"loss": 0.1277,
"nll_loss": 0.12799417972564697,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007544840686023235,
"rewards/margins": 0.20345202088356018,
"rewards/rejected": -0.2109968364238739,
"step": 2460
},
{
"epoch": 2.5904562139486105,
"grad_norm": 2.287376161767263,
"learning_rate": 4.024218182927669e-06,
"log_odds_chosen": 6.810778617858887,
"log_odds_ratio": -0.013128559105098248,
"logits/chosen": -2.823387622833252,
"logits/rejected": -2.879467487335205,
"logps/chosen": -0.15397700667381287,
"logps/rejected": -4.72897481918335,
"loss": 0.1209,
"nll_loss": 0.12541964650154114,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.0076988511718809605,
"rewards/margins": 0.22874990105628967,
"rewards/rejected": -0.236448734998703,
"step": 2470
},
{
"epoch": 2.6009438909281593,
"grad_norm": 2.2717126345189547,
"learning_rate": 4.016096644512495e-06,
"log_odds_chosen": 6.199719429016113,
"log_odds_ratio": -0.018437180668115616,
"logits/chosen": -2.8248672485351562,
"logits/rejected": -2.8656277656555176,
"logps/chosen": -0.14331553876399994,
"logps/rejected": -4.071486949920654,
"loss": 0.1196,
"nll_loss": 0.11505875736474991,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.00716577610000968,
"rewards/margins": 0.19640859961509705,
"rewards/rejected": -0.20357437431812286,
"step": 2480
},
{
"epoch": 2.6114315679077085,
"grad_norm": 2.1379482021716036,
"learning_rate": 4.008024080281012e-06,
"log_odds_chosen": 7.395205497741699,
"log_odds_ratio": -0.01522077340632677,
"logits/chosen": -2.8720109462738037,
"logits/rejected": -2.936903476715088,
"logps/chosen": -0.13911715149879456,
"logps/rejected": -5.221936225891113,
"loss": 0.12,
"nll_loss": 0.12369368225336075,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.0069558583199977875,
"rewards/margins": 0.2541409730911255,
"rewards/rejected": -0.2610968351364136,
"step": 2490
},
{
"epoch": 2.6219192448872572,
"grad_norm": 1.7439578923515293,
"learning_rate": 4.000000000000001e-06,
"log_odds_chosen": 8.536567687988281,
"log_odds_ratio": -0.02061418630182743,
"logits/chosen": -2.854001760482788,
"logits/rejected": -2.9489758014678955,
"logps/chosen": -0.1588824838399887,
"logps/rejected": -6.567204475402832,
"loss": 0.1109,
"nll_loss": 0.11326327174901962,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007944123819470406,
"rewards/margins": 0.32041609287261963,
"rewards/rejected": -0.3283601999282837,
"step": 2500
},
{
"epoch": 2.6219192448872572,
"eval_log_odds_chosen": 1.0766297578811646,
"eval_log_odds_ratio": -0.9767945408821106,
"eval_logits/chosen": -2.8457064628601074,
"eval_logits/rejected": -2.857062339782715,
"eval_logps/chosen": -2.4182989597320557,
"eval_logps/rejected": -3.354691743850708,
"eval_loss": 1.2049823999404907,
"eval_nll_loss": 1.172393560409546,
"eval_rewards/accuracies": 0.6329365372657776,
"eval_rewards/chosen": -0.12091495096683502,
"eval_rewards/margins": 0.046819645911455154,
"eval_rewards/rejected": -0.1677345633506775,
"eval_runtime": 137.7801,
"eval_samples_per_second": 14.472,
"eval_steps_per_second": 0.457,
"step": 2500
},
{
"epoch": 2.6324069218668065,
"grad_norm": 3.8704567483353496,
"learning_rate": 3.992023920278996e-06,
"log_odds_chosen": 6.979190826416016,
"log_odds_ratio": -0.018384801223874092,
"logits/chosen": -2.8529200553894043,
"logits/rejected": -2.923466920852661,
"logps/chosen": -0.14472463726997375,
"logps/rejected": -4.871707916259766,
"loss": 0.1127,
"nll_loss": 0.1109754890203476,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.007236232049763203,
"rewards/margins": 0.23634913563728333,
"rewards/rejected": -0.24358537793159485,
"step": 2510
},
{
"epoch": 2.6428945988463557,
"grad_norm": 2.0243407054263933,
"learning_rate": 3.984095364447979e-06,
"log_odds_chosen": 6.955283164978027,
"log_odds_ratio": -0.026280570775270462,
"logits/chosen": -2.845829486846924,
"logits/rejected": -2.9166336059570312,
"logps/chosen": -0.1561572551727295,
"logps/rejected": -4.968081474304199,
"loss": 0.1245,
"nll_loss": 0.11139287799596786,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.0078078629449009895,
"rewards/margins": 0.2405962496995926,
"rewards/rejected": -0.2484041005373001,
"step": 2520
},
{
"epoch": 2.6533822758259045,
"grad_norm": 2.159445384644007,
"learning_rate": 3.97621386243772e-06,
"log_odds_chosen": 8.654619216918945,
"log_odds_ratio": -0.015728970989584923,
"logits/chosen": -2.815493583679199,
"logits/rejected": -2.9511656761169434,
"logps/chosen": -0.1413796991109848,
"logps/rejected": -6.552220821380615,
"loss": 0.1201,
"nll_loss": 0.11258909851312637,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007068985607475042,
"rewards/margins": 0.32054203748703003,
"rewards/rejected": -0.3276110291481018,
"step": 2530
},
{
"epoch": 2.6638699528054537,
"grad_norm": 2.5062335927036123,
"learning_rate": 3.9683789506627254e-06,
"log_odds_chosen": 7.7274370193481445,
"log_odds_ratio": -0.020870521664619446,
"logits/chosen": -2.8319153785705566,
"logits/rejected": -2.922696113586426,
"logps/chosen": -0.15536390244960785,
"logps/rejected": -5.693093776702881,
"loss": 0.1181,
"nll_loss": 0.10906670987606049,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007768194191157818,
"rewards/margins": 0.2768864631652832,
"rewards/rejected": -0.2846546769142151,
"step": 2540
},
{
"epoch": 2.6743576297850025,
"grad_norm": 1.970994291017683,
"learning_rate": 3.960590171906698e-06,
"log_odds_chosen": 7.434384822845459,
"log_odds_ratio": -0.023785177618265152,
"logits/chosen": -2.7982025146484375,
"logits/rejected": -2.8931427001953125,
"logps/chosen": -0.16477976739406586,
"logps/rejected": -5.395650386810303,
"loss": 0.1221,
"nll_loss": 0.13674572110176086,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.008238988928496838,
"rewards/margins": 0.2615435719490051,
"rewards/rejected": -0.26978254318237305,
"step": 2550
},
{
"epoch": 2.6848453067645517,
"grad_norm": 2.0205686734736594,
"learning_rate": 3.952847075210474e-06,
"log_odds_chosen": 7.365771293640137,
"log_odds_ratio": -0.01570904441177845,
"logits/chosen": -2.866798162460327,
"logits/rejected": -2.959561347961426,
"logps/chosen": -0.14348378777503967,
"logps/rejected": -5.177813529968262,
"loss": 0.1204,
"nll_loss": 0.12037654966115952,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.007174189202487469,
"rewards/margins": 0.2517164647579193,
"rewards/rejected": -0.25889068841934204,
"step": 2560
},
{
"epoch": 2.695332983744101,
"grad_norm": 1.8761709200806869,
"learning_rate": 3.9451492157623585e-06,
"log_odds_chosen": 8.670493125915527,
"log_odds_ratio": -0.011763294227421284,
"logits/chosen": -2.8013434410095215,
"logits/rejected": -2.920924425125122,
"logps/chosen": -0.16095298528671265,
"logps/rejected": -6.665195465087891,
"loss": 0.1166,
"nll_loss": 0.13346998393535614,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.008047649636864662,
"rewards/margins": 0.32521215081214905,
"rewards/rejected": -0.3332597613334656,
"step": 2570
},
{
"epoch": 2.7058206607236497,
"grad_norm": 2.1285971867573408,
"learning_rate": 3.937496154790789e-06,
"log_odds_chosen": 7.294459342956543,
"log_odds_ratio": -0.018316376954317093,
"logits/chosen": -2.816880702972412,
"logits/rejected": -2.8812124729156494,
"logps/chosen": -0.13620439171791077,
"logps/rejected": -5.142992973327637,
"loss": 0.1195,
"nll_loss": 0.10606805980205536,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.0068102204240858555,
"rewards/margins": 0.25033941864967346,
"rewards/rejected": -0.2571496367454529,
"step": 2580
},
{
"epoch": 2.716308337703199,
"grad_norm": 2.400899470701997,
"learning_rate": 3.9298874594592975e-06,
"log_odds_chosen": 8.10938549041748,
"log_odds_ratio": -0.016252661123871803,
"logits/chosen": -2.807111978530884,
"logits/rejected": -2.915724515914917,
"logps/chosen": -0.15417781472206116,
"logps/rejected": -6.080683708190918,
"loss": 0.1163,
"nll_loss": 0.11585485935211182,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007708890829235315,
"rewards/margins": 0.2963252663612366,
"rewards/rejected": -0.30403420329093933,
"step": 2590
},
{
"epoch": 2.7267960146827477,
"grad_norm": 3.318597907364317,
"learning_rate": 3.922322702763682e-06,
"log_odds_chosen": 8.183881759643555,
"log_odds_ratio": -0.021557733416557312,
"logits/chosen": -2.8544585704803467,
"logits/rejected": -2.9738879203796387,
"logps/chosen": -0.14029571413993835,
"logps/rejected": -6.104724884033203,
"loss": 0.1238,
"nll_loss": 0.11269497871398926,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007014785893261433,
"rewards/margins": 0.2982214391231537,
"rewards/rejected": -0.30523625016212463,
"step": 2600
},
{
"epoch": 2.7267960146827477,
"eval_log_odds_chosen": 1.6673794984817505,
"eval_log_odds_ratio": -1.6934312582015991,
"eval_logits/chosen": -2.9804697036743164,
"eval_logits/rejected": -2.996739387512207,
"eval_logps/chosen": -6.072526454925537,
"eval_logps/rejected": -7.644432067871094,
"eval_loss": 2.6922054290771484,
"eval_nll_loss": 2.6498186588287354,
"eval_rewards/accuracies": 0.5873016119003296,
"eval_rewards/chosen": -0.30362632870674133,
"eval_rewards/margins": 0.07859525829553604,
"eval_rewards/rejected": -0.38222160935401917,
"eval_runtime": 136.8599,
"eval_samples_per_second": 14.57,
"eval_steps_per_second": 0.46,
"step": 2600
},
{
"epoch": 2.737283691662297,
"grad_norm": 2.23878079697452,
"learning_rate": 3.914801463431357e-06,
"log_odds_chosen": 7.083222389221191,
"log_odds_ratio": -0.02951228991150856,
"logits/chosen": -2.8593714237213135,
"logits/rejected": -2.9374592304229736,
"logps/chosen": -0.14687521755695343,
"logps/rejected": -5.056353569030762,
"loss": 0.1245,
"nll_loss": 0.11392644792795181,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007343760691583157,
"rewards/margins": 0.24547390639781952,
"rewards/rejected": -0.25281769037246704,
"step": 2610
},
{
"epoch": 2.7477713686418457,
"grad_norm": 3.0293992863459636,
"learning_rate": 3.907323325822818e-06,
"log_odds_chosen": 5.10004997253418,
"log_odds_ratio": -0.032727014273405075,
"logits/chosen": -2.780730962753296,
"logits/rejected": -2.8234589099884033,
"logps/chosen": -0.14557409286499023,
"logps/rejected": -3.112699031829834,
"loss": 0.1196,
"nll_loss": 0.1244465708732605,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007278704084455967,
"rewards/margins": 0.14835625886917114,
"rewards/rejected": -0.15563495457172394,
"step": 2620
},
{
"epoch": 2.758259045621395,
"grad_norm": 2.2549688272537094,
"learning_rate": 3.8998878798351596e-06,
"log_odds_chosen": 5.7140727043151855,
"log_odds_ratio": -0.026816044002771378,
"logits/chosen": -2.864112377166748,
"logits/rejected": -2.8956217765808105,
"logps/chosen": -0.14010892808437347,
"logps/rejected": -3.677777051925659,
"loss": 0.1148,
"nll_loss": 0.11140565574169159,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007005447056144476,
"rewards/margins": 0.17688342928886414,
"rewards/rejected": -0.18388888239860535,
"step": 2630
},
{
"epoch": 2.7687467226009437,
"grad_norm": 2.3361581110737384,
"learning_rate": 3.892494720807615e-06,
"log_odds_chosen": 6.5437517166137695,
"log_odds_ratio": -0.02287450060248375,
"logits/chosen": -2.835170269012451,
"logits/rejected": -2.904600143432617,
"logps/chosen": -0.15383225679397583,
"logps/rejected": -4.582453727722168,
"loss": 0.1163,
"nll_loss": 0.1210094466805458,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007691613398492336,
"rewards/margins": 0.22143109142780304,
"rewards/rejected": -0.22912268340587616,
"step": 2640
},
{
"epoch": 2.779234399580493,
"grad_norm": 2.113727988806721,
"learning_rate": 3.885143449429057e-06,
"log_odds_chosen": 8.709664344787598,
"log_odds_ratio": -0.01187268365174532,
"logits/chosen": -2.8075308799743652,
"logits/rejected": -2.8737902641296387,
"logps/chosen": -0.15384691953659058,
"logps/rejected": -6.678023338317871,
"loss": 0.1126,
"nll_loss": 0.11222463846206665,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.007692346815019846,
"rewards/margins": 0.32620885968208313,
"rewards/rejected": -0.33390119671821594,
"step": 2650
},
{
"epoch": 2.789722076560042,
"grad_norm": 2.1767794366513376,
"learning_rate": 3.877833671647406e-06,
"log_odds_chosen": 7.380768775939941,
"log_odds_ratio": -0.028077024966478348,
"logits/chosen": -2.793292999267578,
"logits/rejected": -2.8911733627319336,
"logps/chosen": -0.15328237414360046,
"logps/rejected": -5.426938533782959,
"loss": 0.1168,
"nll_loss": 0.11543625593185425,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.0076641179621219635,
"rewards/margins": 0.26368287205696106,
"rewards/rejected": -0.27134692668914795,
"step": 2660
},
{
"epoch": 2.800209753539591,
"grad_norm": 2.256877035979117,
"learning_rate": 3.870564998580918e-06,
"log_odds_chosen": 8.639537811279297,
"log_odds_ratio": -0.022679299116134644,
"logits/chosen": -2.811685085296631,
"logits/rejected": -2.9056103229522705,
"logps/chosen": -0.15335455536842346,
"logps/rejected": -6.6522955894470215,
"loss": 0.1172,
"nll_loss": 0.1345623880624771,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.007667726371437311,
"rewards/margins": 0.3249470591545105,
"rewards/rejected": -0.3326147794723511,
"step": 2670
},
{
"epoch": 2.81069743051914,
"grad_norm": 2.0730722454139485,
"learning_rate": 3.863337046431279e-06,
"log_odds_chosen": 6.9750657081604,
"log_odds_ratio": -0.025320613756775856,
"logits/chosen": -2.7947394847869873,
"logits/rejected": -2.846017360687256,
"logps/chosen": -0.13509753346443176,
"logps/rejected": -4.8464508056640625,
"loss": 0.1193,
"nll_loss": 0.10888632386922836,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.006754877511411905,
"rewards/margins": 0.23556765913963318,
"rewards/rejected": -0.24232256412506104,
"step": 2680
},
{
"epoch": 2.821185107498689,
"grad_norm": 1.9858072033613254,
"learning_rate": 3.8561494363984955e-06,
"log_odds_chosen": 9.771112442016602,
"log_odds_ratio": -0.013731351122260094,
"logits/chosen": -2.8062682151794434,
"logits/rejected": -2.9753849506378174,
"logps/chosen": -0.14906486868858337,
"logps/rejected": -7.731194496154785,
"loss": 0.1179,
"nll_loss": 0.11920718103647232,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007453243248164654,
"rewards/margins": 0.37910646200180054,
"rewards/rejected": -0.38655975461006165,
"step": 2690
},
{
"epoch": 2.831672784478238,
"grad_norm": 1.6847580595509726,
"learning_rate": 3.849001794597506e-06,
"log_odds_chosen": 7.8019118309021,
"log_odds_ratio": -0.019792212173342705,
"logits/chosen": -2.8470611572265625,
"logits/rejected": -2.9447550773620605,
"logps/chosen": -0.15314054489135742,
"logps/rejected": -5.769678115844727,
"loss": 0.1192,
"nll_loss": 0.11755287647247314,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007657027803361416,
"rewards/margins": 0.2808268666267395,
"rewards/rejected": -0.2884839177131653,
"step": 2700
},
{
"epoch": 2.831672784478238,
"eval_log_odds_chosen": 1.020140528678894,
"eval_log_odds_ratio": -0.950748860836029,
"eval_logits/chosen": -2.866152763366699,
"eval_logits/rejected": -2.883617877960205,
"eval_logps/chosen": -2.3778645992279053,
"eval_logps/rejected": -3.2670860290527344,
"eval_loss": 1.2390626668930054,
"eval_nll_loss": 1.1910258531570435,
"eval_rewards/accuracies": 0.625,
"eval_rewards/chosen": -0.11889322102069855,
"eval_rewards/margins": 0.04446107894182205,
"eval_rewards/rejected": -0.16335429251194,
"eval_runtime": 137.1045,
"eval_samples_per_second": 14.544,
"eval_steps_per_second": 0.46,
"step": 2700
},
{
"epoch": 2.8421604614577873,
"grad_norm": 2.227062658222717,
"learning_rate": 3.841893751976493e-06,
"log_odds_chosen": 6.429055690765381,
"log_odds_ratio": -0.025566572323441505,
"logits/chosen": -2.8230857849121094,
"logits/rejected": -2.9232447147369385,
"logps/chosen": -0.13817086815834045,
"logps/rejected": -4.313010215759277,
"loss": 0.1236,
"nll_loss": 0.1359073519706726,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.006908542010933161,
"rewards/margins": 0.20874197781085968,
"rewards/rejected": -0.2156505137681961,
"step": 2710
},
{
"epoch": 2.852648138437336,
"grad_norm": 2.108179677461151,
"learning_rate": 3.834824944236852e-06,
"log_odds_chosen": 7.687928676605225,
"log_odds_ratio": -0.019871855154633522,
"logits/chosen": -2.9058802127838135,
"logits/rejected": -3.016103744506836,
"logps/chosen": -0.15432411432266235,
"logps/rejected": -5.692026615142822,
"loss": 0.1226,
"nll_loss": 0.12474212795495987,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.0077162072993814945,
"rewards/margins": 0.27688512206077576,
"rewards/rejected": -0.2846013009548187,
"step": 2720
},
{
"epoch": 2.863135815416885,
"grad_norm": 2.0852362976431627,
"learning_rate": 3.827795011754764e-06,
"log_odds_chosen": 7.531012058258057,
"log_odds_ratio": -0.020183496177196503,
"logits/chosen": -2.9127936363220215,
"logits/rejected": -3.042579174041748,
"logps/chosen": -0.1713821142911911,
"logps/rejected": -5.637821197509766,
"loss": 0.1192,
"nll_loss": 0.1238013282418251,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.00856910552829504,
"rewards/margins": 0.2733219265937805,
"rewards/rejected": -0.2818910479545593,
"step": 2730
},
{
"epoch": 2.873623492396434,
"grad_norm": 2.1240217329220727,
"learning_rate": 3.8208035995043505e-06,
"log_odds_chosen": 7.918447017669678,
"log_odds_ratio": -0.016450051218271255,
"logits/chosen": -2.9222500324249268,
"logits/rejected": -3.0099682807922363,
"logps/chosen": -0.16613063216209412,
"logps/rejected": -5.923202037811279,
"loss": 0.1167,
"nll_loss": 0.11456701904535294,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.00830653216689825,
"rewards/margins": 0.28785353899002075,
"rewards/rejected": -0.2961600720882416,
"step": 2740
},
{
"epoch": 2.8841111693759833,
"grad_norm": 31.79228564478535,
"learning_rate": 3.8138503569823697e-06,
"log_odds_chosen": 6.909941673278809,
"log_odds_ratio": -0.009971695020794868,
"logits/chosen": -2.913257598876953,
"logits/rejected": -3.0123419761657715,
"logps/chosen": -0.14221827685832977,
"logps/rejected": -4.7533063888549805,
"loss": 0.1366,
"nll_loss": 0.12416551262140274,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.007110914681106806,
"rewards/margins": 0.23055438697338104,
"rewards/rejected": -0.2376653254032135,
"step": 2750
},
{
"epoch": 2.894598846355532,
"grad_norm": 1.9557051281290665,
"learning_rate": 3.806934938134405e-06,
"log_odds_chosen": 6.693169593811035,
"log_odds_ratio": -0.02671411633491516,
"logits/chosen": -2.8386614322662354,
"logits/rejected": -2.913949966430664,
"logps/chosen": -0.158113032579422,
"logps/rejected": -4.6884589195251465,
"loss": 0.1257,
"nll_loss": 0.13248762488365173,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.007905651815235615,
"rewards/margins": 0.22651728987693787,
"rewards/rejected": -0.23442292213439941,
"step": 2760
},
{
"epoch": 2.9050865233350813,
"grad_norm": 2.137070948069414,
"learning_rate": 3.800057001282532e-06,
"log_odds_chosen": 7.526410102844238,
"log_odds_ratio": -0.018288953229784966,
"logits/chosen": -2.8420822620391846,
"logits/rejected": -2.9359934329986572,
"logps/chosen": -0.13937655091285706,
"logps/rejected": -5.3555192947387695,
"loss": 0.1203,
"nll_loss": 0.11602024734020233,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.00696882838383317,
"rewards/margins": 0.2608071565628052,
"rewards/rejected": -0.2677759826183319,
"step": 2770
},
{
"epoch": 2.91557420031463,
"grad_norm": 1.9039164114563458,
"learning_rate": 3.7932162090544085e-06,
"log_odds_chosen": 8.005070686340332,
"log_odds_ratio": -0.013831285759806633,
"logits/chosen": -2.85080885887146,
"logits/rejected": -2.9412410259246826,
"logps/chosen": -0.14242660999298096,
"logps/rejected": -5.835131645202637,
"loss": 0.115,
"nll_loss": 0.11129038035869598,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.007121330592781305,
"rewards/margins": 0.2846352159976959,
"rewards/rejected": -0.2917565703392029,
"step": 2780
},
{
"epoch": 2.9260618772941793,
"grad_norm": 1.9066238493747631,
"learning_rate": 3.7864122283137657e-06,
"log_odds_chosen": 8.59681510925293,
"log_odds_ratio": -0.01634146459400654,
"logits/chosen": -2.811566114425659,
"logits/rejected": -2.953697681427002,
"logps/chosen": -0.1852981150150299,
"logps/rejected": -6.696959495544434,
"loss": 0.1237,
"nll_loss": 0.13221383094787598,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.009264904074370861,
"rewards/margins": 0.3255830705165863,
"rewards/rejected": -0.33484798669815063,
"step": 2790
},
{
"epoch": 2.9365495542737285,
"grad_norm": 2.1229204349942523,
"learning_rate": 3.7796447300922724e-06,
"log_odds_chosen": 8.886019706726074,
"log_odds_ratio": -0.014133910648524761,
"logits/chosen": -2.8244338035583496,
"logits/rejected": -2.9361133575439453,
"logps/chosen": -0.1553722470998764,
"logps/rejected": -6.724435329437256,
"loss": 0.1191,
"nll_loss": 0.11856858432292938,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.007768611423671246,
"rewards/margins": 0.3284532129764557,
"rewards/rejected": -0.3362218141555786,
"step": 2800
},
{
"epoch": 2.9365495542737285,
"eval_log_odds_chosen": 0.9868643283843994,
"eval_log_odds_ratio": -0.8558183312416077,
"eval_logits/chosen": -2.8059191703796387,
"eval_logits/rejected": -2.8221092224121094,
"eval_logps/chosen": -1.9523440599441528,
"eval_logps/rejected": -2.7882232666015625,
"eval_loss": 1.0213509798049927,
"eval_nll_loss": 0.9673047065734863,
"eval_rewards/accuracies": 0.6269841194152832,
"eval_rewards/chosen": -0.09761719405651093,
"eval_rewards/margins": 0.04179396852850914,
"eval_rewards/rejected": -0.13941116631031036,
"eval_runtime": 140.3646,
"eval_samples_per_second": 14.206,
"eval_steps_per_second": 0.449,
"step": 2800
},
{
"epoch": 2.9470372312532773,
"grad_norm": 1.8098718147037927,
"learning_rate": 3.772913389522725e-06,
"log_odds_chosen": 7.045705318450928,
"log_odds_ratio": -0.0264790840446949,
"logits/chosen": -2.8278496265411377,
"logits/rejected": -2.935941696166992,
"logps/chosen": -0.16044145822525024,
"logps/rejected": -5.10351037979126,
"loss": 0.1197,
"nll_loss": 0.11624834686517715,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.008022072724997997,
"rewards/margins": 0.24715343117713928,
"rewards/rejected": -0.25517550110816956,
"step": 2810
},
{
"epoch": 2.9575249082328265,
"grad_norm": 1.8754542855362524,
"learning_rate": 3.7662178857735478e-06,
"log_odds_chosen": 8.025814056396484,
"log_odds_ratio": -0.014746090397238731,
"logits/chosen": -2.7981061935424805,
"logits/rejected": -2.9223358631134033,
"logps/chosen": -0.1609780192375183,
"logps/rejected": -6.0790114402771,
"loss": 0.1164,
"nll_loss": 0.114871546626091,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.00804890040308237,
"rewards/margins": 0.29590168595314026,
"rewards/rejected": -0.30395060777664185,
"step": 2820
},
{
"epoch": 2.9680125852123753,
"grad_norm": 2.270114335100112,
"learning_rate": 3.7595579019845623e-06,
"log_odds_chosen": 7.872386932373047,
"log_odds_ratio": -0.01882219687104225,
"logits/chosen": -2.8168020248413086,
"logits/rejected": -2.900966167449951,
"logps/chosen": -0.1528329849243164,
"logps/rejected": -5.721396446228027,
"loss": 0.117,
"nll_loss": 0.1145024448633194,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.007641649339348078,
"rewards/margins": 0.27842822670936584,
"rewards/rejected": -0.2860698103904724,
"step": 2830
},
{
"epoch": 2.9785002621919245,
"grad_norm": 2.2955550853318907,
"learning_rate": 3.752933125204008e-06,
"log_odds_chosen": 8.305427551269531,
"log_odds_ratio": -0.02256721630692482,
"logits/chosen": -2.8052284717559814,
"logits/rejected": -2.9265544414520264,
"logps/chosen": -0.13989822566509247,
"logps/rejected": -6.217524528503418,
"loss": 0.1182,
"nll_loss": 0.12114028632640839,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.006994911935180426,
"rewards/margins": 0.30388128757476807,
"rewards/rejected": -0.31087619066238403,
"step": 2840
},
{
"epoch": 2.9889879391714738,
"grad_norm": 1.888221991554896,
"learning_rate": 3.7463432463267764e-06,
"log_odds_chosen": 7.020120143890381,
"log_odds_ratio": -0.01538365613669157,
"logits/chosen": -2.8246865272521973,
"logits/rejected": -2.9202027320861816,
"logps/chosen": -0.16290083527565002,
"logps/rejected": -4.992356777191162,
"loss": 0.1252,
"nll_loss": 0.14337727427482605,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.008145040832459927,
"rewards/margins": 0.24147279560565948,
"rewards/rejected": -0.24961784482002258,
"step": 2850
},
{
"epoch": 2.9984268484530676,
"step": 2859,
"total_flos": 0.0,
"train_loss": 0.32389816019492534,
"train_runtime": 62235.4926,
"train_samples_per_second": 2.941,
"train_steps_per_second": 0.046
}
],
"logging_steps": 10,
"max_steps": 2859,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}