{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9984268484530676, "eval_steps": 100, "global_step": 2859, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01048767697954903, "grad_norm": 11.303338968797107, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.16597549617290497, "log_odds_ratio": -0.6960083246231079, "logits/chosen": -2.5440375804901123, "logits/rejected": -2.532742977142334, "logps/chosen": -0.9999498128890991, "logps/rejected": -1.0999202728271484, "loss": 2.7435, "nll_loss": 2.655998706817627, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.049997489899396896, "rewards/margins": 0.004998520482331514, "rewards/rejected": -0.054996006190776825, "step": 10 }, { "epoch": 0.02097535395909806, "grad_norm": 3.296785739531489, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 0.19497092068195343, "log_odds_ratio": -0.6663684844970703, "logits/chosen": -3.153244733810425, "logits/rejected": -3.176297903060913, "logps/chosen": -0.7618023753166199, "logps/rejected": -0.8721799850463867, "loss": 0.5628, "nll_loss": 0.5223663449287415, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03809012100100517, "rewards/margins": 0.005518879741430283, "rewards/rejected": -0.043609000742435455, "step": 20 }, { "epoch": 0.03146303093864709, "grad_norm": 2.5096714885559264, "learning_rate": 6e-06, "log_odds_chosen": 0.23512229323387146, "log_odds_ratio": -0.6553729772567749, "logits/chosen": -2.9705119132995605, "logits/rejected": -2.944556713104248, "logps/chosen": -0.8099643588066101, "logps/rejected": -0.9404464960098267, "loss": 0.5331, "nll_loss": 0.4915856420993805, "rewards/accuracies": 0.59375, "rewards/chosen": -0.040498219430446625, "rewards/margins": 0.0065241060219705105, "rewards/rejected": -0.04702232405543327, "step": 30 }, { "epoch": 0.04195070791819612, "grad_norm": 2.5670929503530138, "learning_rate": 8.000000000000001e-06, "log_odds_chosen": 0.1703537404537201, "log_odds_ratio": -0.6904168128967285, "logits/chosen": -2.8517043590545654, "logits/rejected": -2.83884334564209, "logps/chosen": -0.805575966835022, "logps/rejected": -0.9237464666366577, "loss": 0.5194, "nll_loss": 0.4799742102622986, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04027879983186722, "rewards/margins": 0.005908523220568895, "rewards/rejected": -0.046187322586774826, "step": 40 }, { "epoch": 0.05243838489774515, "grad_norm": 2.8257696541784587, "learning_rate": 1e-05, "log_odds_chosen": 0.28843408823013306, "log_odds_ratio": -0.6763556599617004, "logits/chosen": -2.7286221981048584, "logits/rejected": -2.72869610786438, "logps/chosen": -0.787534236907959, "logps/rejected": -0.968492865562439, "loss": 0.5419, "nll_loss": 0.48419374227523804, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.03937670961022377, "rewards/margins": 0.009047931991517544, "rewards/rejected": -0.04842463880777359, "step": 50 }, { "epoch": 0.06292606187729417, "grad_norm": 2.7270372711002624, "learning_rate": 1.2e-05, "log_odds_chosen": 0.2020198553800583, "log_odds_ratio": -0.6800572872161865, "logits/chosen": -2.896289110183716, "logits/rejected": -2.8839545249938965, "logps/chosen": -0.8010624051094055, "logps/rejected": -0.9179455637931824, "loss": 0.5456, "nll_loss": 0.5158990621566772, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.040053121745586395, "rewards/margins": 0.005844158586114645, "rewards/rejected": -0.04589728266000748, "step": 60 }, { "epoch": 0.07341373885684321, "grad_norm": 2.7197204143491605, "learning_rate": 1.4e-05, "log_odds_chosen": 0.1937415450811386, "log_odds_ratio": -0.6942794919013977, "logits/chosen": -2.8848633766174316, "logits/rejected": -2.905164957046509, "logps/chosen": -0.8219146728515625, "logps/rejected": -0.9291160702705383, "loss": 0.5412, "nll_loss": 0.5311218500137329, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.041095733642578125, "rewards/margins": 0.005360070150345564, "rewards/rejected": -0.046455807983875275, "step": 70 }, { "epoch": 0.08390141583639224, "grad_norm": 1049.2102246099553, "learning_rate": 1.6000000000000003e-05, "log_odds_chosen": 0.1753607988357544, "log_odds_ratio": -0.6886225938796997, "logits/chosen": -2.6637063026428223, "logits/rejected": -2.637396812438965, "logps/chosen": -0.8933579325675964, "logps/rejected": -1.020629644393921, "loss": 1.0694, "nll_loss": 0.9787748456001282, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.04466789960861206, "rewards/margins": 0.006363583263009787, "rewards/rejected": -0.051031481474637985, "step": 80 }, { "epoch": 0.09438909281594127, "grad_norm": 4.011701524085754, "learning_rate": 1.8e-05, "log_odds_chosen": 0.2628815174102783, "log_odds_ratio": -0.6731477975845337, "logits/chosen": -3.106489658355713, "logits/rejected": -3.0954391956329346, "logps/chosen": -0.9435924291610718, "logps/rejected": -1.1041589975357056, "loss": 0.5766, "nll_loss": 0.5112682580947876, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.04717962443828583, "rewards/margins": 0.008028322830796242, "rewards/rejected": -0.05520794540643692, "step": 90 }, { "epoch": 0.1048767697954903, "grad_norm": 5.340561330006851, "learning_rate": 2e-05, "log_odds_chosen": 0.17503713071346283, "log_odds_ratio": -0.6751121282577515, "logits/chosen": -3.3266518115997314, "logits/rejected": -3.3420982360839844, "logps/chosen": -0.8886896371841431, "logps/rejected": -1.0002682209014893, "loss": 0.5668, "nll_loss": 0.5238600969314575, "rewards/accuracies": 0.5625, "rewards/chosen": -0.044434480369091034, "rewards/margins": 0.005578924436122179, "rewards/rejected": -0.050013404339551926, "step": 100 }, { "epoch": 0.1048767697954903, "eval_log_odds_chosen": 0.21844430267810822, "eval_log_odds_ratio": -0.6529861688613892, "eval_logits/chosen": -3.3082144260406494, "eval_logits/rejected": -3.3147807121276855, "eval_logps/chosen": -0.9112777709960938, "eval_logps/rejected": -1.0580321550369263, "eval_loss": 0.5842872858047485, "eval_nll_loss": 0.5515953898429871, "eval_rewards/accuracies": 0.6150793433189392, "eval_rewards/chosen": -0.04556388780474663, "eval_rewards/margins": 0.007337724789977074, "eval_rewards/rejected": -0.05290161445736885, "eval_runtime": 138.2645, "eval_samples_per_second": 14.422, "eval_steps_per_second": 0.456, "step": 100 }, { "epoch": 0.11536444677503933, "grad_norm": 2.8100337089038514, "learning_rate": 1.9069251784911845e-05, "log_odds_chosen": 0.2544933259487152, "log_odds_ratio": -0.643945038318634, "logits/chosen": -3.2667174339294434, "logits/rejected": -3.310918092727661, "logps/chosen": -0.8447545170783997, "logps/rejected": -1.009132981300354, "loss": 0.5651, "nll_loss": 0.5105677843093872, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.042237721383571625, "rewards/margins": 0.008218927308917046, "rewards/rejected": -0.05045665428042412, "step": 110 }, { "epoch": 0.12585212375458835, "grad_norm": 2.2193460343172986, "learning_rate": 1.825741858350554e-05, "log_odds_chosen": 0.24397364258766174, "log_odds_ratio": -0.6682508587837219, "logits/chosen": -3.193361282348633, "logits/rejected": -3.243128538131714, "logps/chosen": -0.8714381456375122, "logps/rejected": -1.0333614349365234, "loss": 0.6091, "nll_loss": 0.5700744390487671, "rewards/accuracies": 0.625, "rewards/chosen": -0.04357190802693367, "rewards/margins": 0.008096165955066681, "rewards/rejected": -0.051668066531419754, "step": 120 }, { "epoch": 0.1363398007341374, "grad_norm": 2.3414921674264555, "learning_rate": 1.7541160386140587e-05, "log_odds_chosen": 0.2272050678730011, "log_odds_ratio": -0.6708214282989502, "logits/chosen": -3.1920104026794434, "logits/rejected": -3.211714267730713, "logps/chosen": -0.8986352682113647, "logps/rejected": -1.0474598407745361, "loss": 0.5886, "nll_loss": 0.552306056022644, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.044931765645742416, "rewards/margins": 0.007441227789968252, "rewards/rejected": -0.052372999489307404, "step": 130 }, { "epoch": 0.14682747771368643, "grad_norm": 2.3255085925590597, "learning_rate": 1.6903085094570334e-05, "log_odds_chosen": 0.22232067584991455, "log_odds_ratio": -0.6680520176887512, "logits/chosen": -3.1715519428253174, "logits/rejected": -3.198253631591797, "logps/chosen": -0.9551104307174683, "logps/rejected": -1.1022988557815552, "loss": 0.5878, "nll_loss": 0.5523446798324585, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.04775552451610565, "rewards/margins": 0.007359415292739868, "rewards/rejected": -0.05511493608355522, "step": 140 }, { "epoch": 0.15731515469323545, "grad_norm": 2.6729814886854766, "learning_rate": 1.6329931618554523e-05, "log_odds_chosen": 0.17247287929058075, "log_odds_ratio": -0.7340894341468811, "logits/chosen": -3.102067470550537, "logits/rejected": -3.1263070106506348, "logps/chosen": -0.9946192502975464, "logps/rejected": -1.1088117361068726, "loss": 0.5489, "nll_loss": 0.5492355823516846, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.04973096773028374, "rewards/margins": 0.005709617864340544, "rewards/rejected": -0.05544058233499527, "step": 150 }, { "epoch": 0.16780283167278448, "grad_norm": 2.603021066142599, "learning_rate": 1.5811388300841898e-05, "log_odds_chosen": 0.2041763812303543, "log_odds_ratio": -0.6666288375854492, "logits/chosen": -3.0764000415802, "logits/rejected": -3.1064279079437256, "logps/chosen": -0.9137493968009949, "logps/rejected": -1.0383034944534302, "loss": 0.6063, "nll_loss": 0.5569471120834351, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.045687466859817505, "rewards/margins": 0.006227707955986261, "rewards/rejected": -0.05191517621278763, "step": 160 }, { "epoch": 0.1782905086523335, "grad_norm": 2.4919552056925416, "learning_rate": 1.533929977694741e-05, "log_odds_chosen": 0.25588172674179077, "log_odds_ratio": -0.6607967019081116, "logits/chosen": -3.1293396949768066, "logits/rejected": -3.1606574058532715, "logps/chosen": -0.8986794352531433, "logps/rejected": -1.0667051076889038, "loss": 0.5845, "nll_loss": 0.5496193766593933, "rewards/accuracies": 0.5625, "rewards/chosen": -0.044933974742889404, "rewards/margins": 0.008401280269026756, "rewards/rejected": -0.05333525687456131, "step": 170 }, { "epoch": 0.18877818563188253, "grad_norm": 2.4600198980545915, "learning_rate": 1.49071198499986e-05, "log_odds_chosen": 0.27393144369125366, "log_odds_ratio": -0.6479635238647461, "logits/chosen": -3.080091714859009, "logits/rejected": -3.103672504425049, "logps/chosen": -0.9190357327461243, "logps/rejected": -1.0871737003326416, "loss": 0.5676, "nll_loss": 0.550677478313446, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.045951783657073975, "rewards/margins": 0.00840689055621624, "rewards/rejected": -0.05435867980122566, "step": 180 }, { "epoch": 0.19926586261143156, "grad_norm": 5.689090620434962, "learning_rate": 1.4509525002200235e-05, "log_odds_chosen": 0.23676976561546326, "log_odds_ratio": -0.6501709222793579, "logits/chosen": -3.0815584659576416, "logits/rejected": -3.1054322719573975, "logps/chosen": -0.9278916120529175, "logps/rejected": -1.0751855373382568, "loss": 0.5906, "nll_loss": 0.6120038628578186, "rewards/accuracies": 0.59375, "rewards/chosen": -0.046394579112529755, "rewards/margins": 0.007364692632108927, "rewards/rejected": -0.05375927686691284, "step": 190 }, { "epoch": 0.2097535395909806, "grad_norm": 2.2848535898780375, "learning_rate": 1.4142135623730951e-05, "log_odds_chosen": 0.2697228789329529, "log_odds_ratio": -0.6704415082931519, "logits/chosen": -2.99995756149292, "logits/rejected": -3.038682460784912, "logps/chosen": -0.9138332605361938, "logps/rejected": -1.1080011129379272, "loss": 0.5676, "nll_loss": 0.5736643075942993, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04569166153669357, "rewards/margins": 0.009708395227789879, "rewards/rejected": -0.0554000549018383, "step": 200 }, { "epoch": 0.2097535395909806, "eval_log_odds_chosen": 0.2850116789340973, "eval_log_odds_ratio": -0.6474155783653259, "eval_logits/chosen": -2.9992330074310303, "eval_logits/rejected": -3.0026443004608154, "eval_logps/chosen": -0.8811094164848328, "eval_logps/rejected": -1.0644237995147705, "eval_loss": 0.5726434588432312, "eval_nll_loss": 0.5359312295913696, "eval_rewards/accuracies": 0.625, "eval_rewards/chosen": -0.04405546560883522, "eval_rewards/margins": 0.00916572567075491, "eval_rewards/rejected": -0.053221192210912704, "eval_runtime": 137.9025, "eval_samples_per_second": 14.459, "eval_steps_per_second": 0.457, "step": 200 }, { "epoch": 0.22024121657052964, "grad_norm": 2.2864637176453266, "learning_rate": 1.3801311186847084e-05, "log_odds_chosen": 0.10374544560909271, "log_odds_ratio": -0.7170687913894653, "logits/chosen": -3.0079314708709717, "logits/rejected": -3.026061773300171, "logps/chosen": -0.8713214993476868, "logps/rejected": -0.9376395344734192, "loss": 0.5683, "nll_loss": 0.5364366769790649, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.04356607422232628, "rewards/margins": 0.003315900219604373, "rewards/rejected": -0.04688197374343872, "step": 210 }, { "epoch": 0.23072889355007867, "grad_norm": 2.3833164568305705, "learning_rate": 1.3483997249264842e-05, "log_odds_chosen": 0.1967695653438568, "log_odds_ratio": -0.6872244477272034, "logits/chosen": -3.066392183303833, "logits/rejected": -3.0755832195281982, "logps/chosen": -0.8734294176101685, "logps/rejected": -0.9998324513435364, "loss": 0.5608, "nll_loss": 0.5176301598548889, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.043671466410160065, "rewards/margins": 0.006320156157016754, "rewards/rejected": -0.04999162256717682, "step": 220 }, { "epoch": 0.2412165705296277, "grad_norm": 2.143148051812647, "learning_rate": 1.3187609467915744e-05, "log_odds_chosen": 0.2681586444377899, "log_odds_ratio": -0.669995129108429, "logits/chosen": -3.0045371055603027, "logits/rejected": -3.023197889328003, "logps/chosen": -0.9347988963127136, "logps/rejected": -1.1079022884368896, "loss": 0.5715, "nll_loss": 0.5268279910087585, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04673994705080986, "rewards/margins": 0.00865517370402813, "rewards/rejected": -0.05539512634277344, "step": 230 }, { "epoch": 0.2517042475091767, "grad_norm": 2.4867634050680865, "learning_rate": 1.2909944487358057e-05, "log_odds_chosen": 0.2310989797115326, "log_odds_ratio": -0.6607853770256042, "logits/chosen": -3.0592639446258545, "logits/rejected": -3.0972437858581543, "logps/chosen": -0.90626060962677, "logps/rejected": -1.057490587234497, "loss": 0.5797, "nll_loss": 0.5543950796127319, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.0453130342066288, "rewards/margins": 0.007561509497463703, "rewards/rejected": -0.05287454277276993, "step": 240 }, { "epoch": 0.26219192448872575, "grad_norm": 2.2846935841220364, "learning_rate": 1.2649110640673518e-05, "log_odds_chosen": 0.24984344840049744, "log_odds_ratio": -0.6764962077140808, "logits/chosen": -3.0678868293762207, "logits/rejected": -3.0685126781463623, "logps/chosen": -0.8884732127189636, "logps/rejected": -1.025420904159546, "loss": 0.5498, "nll_loss": 0.5219429731369019, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.0444236658513546, "rewards/margins": 0.006847388111054897, "rewards/rejected": -0.05127105116844177, "step": 250 }, { "epoch": 0.2726796014682748, "grad_norm": 2.3800633619201523, "learning_rate": 1.2403473458920845e-05, "log_odds_chosen": 0.2426706850528717, "log_odds_ratio": -0.6691194772720337, "logits/chosen": -3.0950028896331787, "logits/rejected": -3.112684488296509, "logps/chosen": -0.8879591822624207, "logps/rejected": -1.042834997177124, "loss": 0.5302, "nll_loss": 0.45519179105758667, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04439795762300491, "rewards/margins": 0.00774379214271903, "rewards/rejected": -0.05214175581932068, "step": 260 }, { "epoch": 0.2831672784478238, "grad_norm": 2.3697586961370027, "learning_rate": 1.2171612389003691e-05, "log_odds_chosen": 0.23119862377643585, "log_odds_ratio": -0.6756153702735901, "logits/chosen": -3.113889455795288, "logits/rejected": -3.157740354537964, "logps/chosen": -0.9564247131347656, "logps/rejected": -1.1352105140686035, "loss": 0.5654, "nll_loss": 0.5433498024940491, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.04782123863697052, "rewards/margins": 0.008939290419220924, "rewards/rejected": -0.056760527193546295, "step": 270 }, { "epoch": 0.29365495542737285, "grad_norm": 1.9757109026566833, "learning_rate": 1.1952286093343936e-05, "log_odds_chosen": 0.25132113695144653, "log_odds_ratio": -0.6663895845413208, "logits/chosen": -3.1407101154327393, "logits/rejected": -3.1832191944122314, "logps/chosen": -0.9308640360832214, "logps/rejected": -1.087449312210083, "loss": 0.5429, "nll_loss": 0.4785974621772766, "rewards/accuracies": 0.625, "rewards/chosen": -0.04654319956898689, "rewards/margins": 0.007829269394278526, "rewards/rejected": -0.05437246710062027, "step": 280 }, { "epoch": 0.30414263240692185, "grad_norm": 2.7308236297418427, "learning_rate": 1.1744404390294071e-05, "log_odds_chosen": 0.35913094878196716, "log_odds_ratio": -0.6187662482261658, "logits/chosen": -3.0944533348083496, "logits/rejected": -3.1177055835723877, "logps/chosen": -0.8355825543403625, "logps/rejected": -1.0572632551193237, "loss": 0.5568, "nll_loss": 0.48925265669822693, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.041779130697250366, "rewards/margins": 0.011084041558206081, "rewards/rejected": -0.052863169461488724, "step": 290 }, { "epoch": 0.3146303093864709, "grad_norm": 2.472653160364779, "learning_rate": 1.1547005383792517e-05, "log_odds_chosen": 0.2816540598869324, "log_odds_ratio": -0.6775935888290405, "logits/chosen": -3.092194080352783, "logits/rejected": -3.1420485973358154, "logps/chosen": -0.8778301477432251, "logps/rejected": -1.0717580318450928, "loss": 0.5819, "nll_loss": 0.5100258588790894, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04389150068163872, "rewards/margins": 0.009696396067738533, "rewards/rejected": -0.0535879023373127, "step": 300 }, { "epoch": 0.3146303093864709, "eval_log_odds_chosen": 0.28298813104629517, "eval_log_odds_ratio": -0.6463662981987, "eval_logits/chosen": -3.1391000747680664, "eval_logits/rejected": -3.1424100399017334, "eval_logps/chosen": -0.8770027756690979, "eval_logps/rejected": -1.0619502067565918, "eval_loss": 0.5552015900611877, "eval_nll_loss": 0.5201771259307861, "eval_rewards/accuracies": 0.6289682388305664, "eval_rewards/chosen": -0.043850142508745193, "eval_rewards/margins": 0.00924737099558115, "eval_rewards/rejected": -0.05309751257300377, "eval_runtime": 141.1002, "eval_samples_per_second": 14.132, "eval_steps_per_second": 0.446, "step": 300 }, { "epoch": 0.3251179863660199, "grad_norm": 2.038557141198459, "learning_rate": 1.1359236684941297e-05, "log_odds_chosen": 0.1998841017484665, "log_odds_ratio": -0.6875525116920471, "logits/chosen": -3.0676262378692627, "logits/rejected": -3.07094407081604, "logps/chosen": -0.9092122912406921, "logps/rejected": -1.0280473232269287, "loss": 0.5844, "nll_loss": 0.5417822599411011, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04546061158180237, "rewards/margins": 0.005941747687757015, "rewards/rejected": -0.051402367651462555, "step": 310 }, { "epoch": 0.33560566334556896, "grad_norm": 2.262270965184679, "learning_rate": 1.118033988749895e-05, "log_odds_chosen": 0.2705835700035095, "log_odds_ratio": -0.6538633108139038, "logits/chosen": -3.127427339553833, "logits/rejected": -3.142587661743164, "logps/chosen": -0.9069059491157532, "logps/rejected": -1.0691728591918945, "loss": 0.5242, "nll_loss": 0.4929099977016449, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.04534530267119408, "rewards/margins": 0.008113345131278038, "rewards/rejected": -0.05345864221453667, "step": 320 }, { "epoch": 0.34609334032511796, "grad_norm": 2.4122464498293623, "learning_rate": 1.1009637651263608e-05, "log_odds_chosen": 0.23684370517730713, "log_odds_ratio": -0.7030869722366333, "logits/chosen": -3.0819878578186035, "logits/rejected": -3.1327972412109375, "logps/chosen": -0.9059860110282898, "logps/rejected": -1.0601646900177002, "loss": 0.5547, "nll_loss": 0.5366790890693665, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.04529929906129837, "rewards/margins": 0.007708935532718897, "rewards/rejected": -0.05300822854042053, "step": 330 }, { "epoch": 0.356581017304667, "grad_norm": 2.3793498474146535, "learning_rate": 1.0846522890932809e-05, "log_odds_chosen": 0.18786638975143433, "log_odds_ratio": -0.6986292004585266, "logits/chosen": -3.0940568447113037, "logits/rejected": -3.1512954235076904, "logps/chosen": -0.8602282404899597, "logps/rejected": -0.9875131845474243, "loss": 0.5702, "nll_loss": 0.5145949125289917, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.043011412024497986, "rewards/margins": 0.0063642458990216255, "rewards/rejected": -0.049375660717487335, "step": 340 }, { "epoch": 0.36706869428421607, "grad_norm": 2.3420960793915517, "learning_rate": 1.0690449676496977e-05, "log_odds_chosen": 0.2689460217952728, "log_odds_ratio": -0.6845754384994507, "logits/chosen": -3.1326746940612793, "logits/rejected": -3.1552205085754395, "logps/chosen": -0.8725005984306335, "logps/rejected": -1.0421197414398193, "loss": 0.5462, "nll_loss": 0.5172144174575806, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.043625034391880035, "rewards/margins": 0.008480949327349663, "rewards/rejected": -0.05210598558187485, "step": 350 }, { "epoch": 0.37755637126376507, "grad_norm": 2.014589871880686, "learning_rate": 1.0540925533894598e-05, "log_odds_chosen": 0.37792789936065674, "log_odds_ratio": -0.6156649589538574, "logits/chosen": -3.010802745819092, "logits/rejected": -3.042652130126953, "logps/chosen": -0.8830682635307312, "logps/rejected": -1.118240237236023, "loss": 0.5497, "nll_loss": 0.5099813938140869, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.04415341466665268, "rewards/margins": 0.011758595705032349, "rewards/rejected": -0.05591200664639473, "step": 360 }, { "epoch": 0.3880440482433141, "grad_norm": 2.0494786838330903, "learning_rate": 1.0397504898200728e-05, "log_odds_chosen": 0.37991228699684143, "log_odds_ratio": -0.6151097416877747, "logits/chosen": -3.071289539337158, "logits/rejected": -3.0840888023376465, "logps/chosen": -0.863991379737854, "logps/rejected": -1.1161118745803833, "loss": 0.5195, "nll_loss": 0.4998775124549866, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.0431995615363121, "rewards/margins": 0.012606029398739338, "rewards/rejected": -0.055805593729019165, "step": 370 }, { "epoch": 0.3985317252228631, "grad_norm": 2.3440751758332294, "learning_rate": 1.0259783520851543e-05, "log_odds_chosen": 0.4805373549461365, "log_odds_ratio": -0.5845500230789185, "logits/chosen": -3.1311728954315186, "logits/rejected": -3.168400287628174, "logps/chosen": -0.8546767234802246, "logps/rejected": -1.1352304220199585, "loss": 0.5371, "nll_loss": 0.5167530179023743, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.04273384064435959, "rewards/margins": 0.014027683064341545, "rewards/rejected": -0.056761521846055984, "step": 380 }, { "epoch": 0.4090194022024122, "grad_norm": 2.50155675830033, "learning_rate": 1.0127393670836667e-05, "log_odds_chosen": 0.0912429466843605, "log_odds_ratio": -0.7177212238311768, "logits/chosen": -3.1054975986480713, "logits/rejected": -3.1308093070983887, "logps/chosen": -0.9102872014045715, "logps/rejected": -0.9754246473312378, "loss": 0.5574, "nll_loss": 0.5331951379776001, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.045514363795518875, "rewards/margins": 0.003256872994825244, "rewards/rejected": -0.04877123609185219, "step": 390 }, { "epoch": 0.4195070791819612, "grad_norm": 2.027467517514936, "learning_rate": 1e-05, "log_odds_chosen": 0.2633103132247925, "log_odds_ratio": -0.6879682540893555, "logits/chosen": -3.0087058544158936, "logits/rejected": -3.0386600494384766, "logps/chosen": -0.9468951225280762, "logps/rejected": -1.1236045360565186, "loss": 0.5738, "nll_loss": 0.527585506439209, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.04734475538134575, "rewards/margins": 0.008835467509925365, "rewards/rejected": -0.05618022754788399, "step": 400 }, { "epoch": 0.4195070791819612, "eval_log_odds_chosen": 0.2960740923881531, "eval_log_odds_ratio": -0.6521593332290649, "eval_logits/chosen": -3.1019551753997803, "eval_logits/rejected": -3.1026368141174316, "eval_logps/chosen": -0.8433709740638733, "eval_logps/rejected": -1.0346297025680542, "eval_loss": 0.5411269664764404, "eval_nll_loss": 0.5047088265419006, "eval_rewards/accuracies": 0.6289682388305664, "eval_rewards/chosen": -0.042168550193309784, "eval_rewards/margins": 0.00956293661147356, "eval_rewards/rejected": -0.05173148587346077, "eval_runtime": 135.94, "eval_samples_per_second": 14.668, "eval_steps_per_second": 0.463, "step": 400 }, { "epoch": 0.4299947561615102, "grad_norm": 2.077556227084633, "learning_rate": 9.877295966495898e-06, "log_odds_chosen": 0.1433972865343094, "log_odds_ratio": -0.7417241930961609, "logits/chosen": -3.147104024887085, "logits/rejected": -3.1611135005950928, "logps/chosen": -0.8865131139755249, "logps/rejected": -0.9979325532913208, "loss": 0.5454, "nll_loss": 0.4825812876224518, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.044325657188892365, "rewards/margins": 0.005570969078689814, "rewards/rejected": -0.04989662766456604, "step": 410 }, { "epoch": 0.4404824331410593, "grad_norm": 1.9177361456178337, "learning_rate": 9.759000729485331e-06, "log_odds_chosen": 0.2965100407600403, "log_odds_ratio": -0.6552795171737671, "logits/chosen": -3.065213203430176, "logits/rejected": -3.106889247894287, "logps/chosen": -0.8926699757575989, "logps/rejected": -1.073974609375, "loss": 0.5349, "nll_loss": 0.47521886229515076, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.04463350027799606, "rewards/margins": 0.009065226651728153, "rewards/rejected": -0.05369872972369194, "step": 420 }, { "epoch": 0.4509701101206083, "grad_norm": 2.2675621915351503, "learning_rate": 9.644856443408244e-06, "log_odds_chosen": 0.29174235463142395, "log_odds_ratio": -0.6506129503250122, "logits/chosen": -3.075723648071289, "logits/rejected": -3.0862226486206055, "logps/chosen": -0.8427901268005371, "logps/rejected": -1.0184295177459717, "loss": 0.5557, "nll_loss": 0.5429800152778625, "rewards/accuracies": 0.65625, "rewards/chosen": -0.04213951155543327, "rewards/margins": 0.008781969547271729, "rewards/rejected": -0.050921481102705, "step": 430 }, { "epoch": 0.46145778710015734, "grad_norm": 2.048479923586714, "learning_rate": 9.534625892455923e-06, "log_odds_chosen": 0.2715272009372711, "log_odds_ratio": -0.6504871249198914, "logits/chosen": -3.114889144897461, "logits/rejected": -3.1430869102478027, "logps/chosen": -0.8674638867378235, "logps/rejected": -1.0402857065200806, "loss": 0.5502, "nll_loss": 0.5185979604721069, "rewards/accuracies": 0.59375, "rewards/chosen": -0.04337319731712341, "rewards/margins": 0.008641095831990242, "rewards/rejected": -0.05201428383588791, "step": 440 }, { "epoch": 0.47194546407970633, "grad_norm": 1.9700303764265876, "learning_rate": 9.428090415820635e-06, "log_odds_chosen": 0.37898144125938416, "log_odds_ratio": -0.6548101305961609, "logits/chosen": -3.141404390335083, "logits/rejected": -3.1785435676574707, "logps/chosen": -0.8289934396743774, "logps/rejected": -1.080649733543396, "loss": 0.5278, "nll_loss": 0.49574679136276245, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.04144967347383499, "rewards/margins": 0.01258282084017992, "rewards/rejected": -0.05403248593211174, "step": 450 }, { "epoch": 0.4824331410592554, "grad_norm": 2.1444885294890796, "learning_rate": 9.325048082403139e-06, "log_odds_chosen": 0.21225424110889435, "log_odds_ratio": -0.6999707221984863, "logits/chosen": -3.110089063644409, "logits/rejected": -3.1592323780059814, "logps/chosen": -0.947162926197052, "logps/rejected": -1.1105449199676514, "loss": 0.5315, "nll_loss": 0.5339683890342712, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04735814779996872, "rewards/margins": 0.008169097825884819, "rewards/rejected": -0.05552724748849869, "step": 460 }, { "epoch": 0.4929208180388044, "grad_norm": 2.1649660190560613, "learning_rate": 9.225312080288851e-06, "log_odds_chosen": 0.2549912929534912, "log_odds_ratio": -0.6857655644416809, "logits/chosen": -3.0928080081939697, "logits/rejected": -3.1287431716918945, "logps/chosen": -0.8865912556648254, "logps/rejected": -1.050857663154602, "loss": 0.5421, "nll_loss": 0.5101572275161743, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.044329557567834854, "rewards/margins": 0.008213317021727562, "rewards/rejected": -0.052542876452207565, "step": 470 }, { "epoch": 0.5034084950183534, "grad_norm": 1.89898044344756, "learning_rate": 9.12870929175277e-06, "log_odds_chosen": 0.18933558464050293, "log_odds_ratio": -0.7031041383743286, "logits/chosen": -3.1588873863220215, "logits/rejected": -3.1968955993652344, "logps/chosen": -0.8558489680290222, "logps/rejected": -0.980047881603241, "loss": 0.5174, "nll_loss": 0.5121264457702637, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.04279245063662529, "rewards/margins": 0.006209943443536758, "rewards/rejected": -0.04900239408016205, "step": 480 }, { "epoch": 0.5138961719979025, "grad_norm": 1.9212510076087481, "learning_rate": 9.035079029052514e-06, "log_odds_chosen": 0.23131313920021057, "log_odds_ratio": -0.6693936586380005, "logits/chosen": -3.094421625137329, "logits/rejected": -3.1039950847625732, "logps/chosen": -0.9284296035766602, "logps/rejected": -1.0470894575119019, "loss": 0.5391, "nll_loss": 0.5019217729568481, "rewards/accuracies": 0.59375, "rewards/chosen": -0.04642148315906525, "rewards/margins": 0.005932994186878204, "rewards/rejected": -0.05235447734594345, "step": 490 }, { "epoch": 0.5243838489774515, "grad_norm": 2.197524211966931, "learning_rate": 8.94427190999916e-06, "log_odds_chosen": 0.2233821153640747, "log_odds_ratio": -0.6923887729644775, "logits/chosen": -3.0647079944610596, "logits/rejected": -3.0620505809783936, "logps/chosen": -0.8755196332931519, "logps/rejected": -1.0028659105300903, "loss": 0.5478, "nll_loss": 0.5219477415084839, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.04377598315477371, "rewards/margins": 0.0063673085533082485, "rewards/rejected": -0.0501432940363884, "step": 500 }, { "epoch": 0.5243838489774515, "eval_log_odds_chosen": 0.33266139030456543, "eval_log_odds_ratio": -0.6382430791854858, "eval_logits/chosen": -3.028609275817871, "eval_logits/rejected": -3.0259969234466553, "eval_logps/chosen": -0.8414799571037292, "eval_logps/rejected": -1.0509231090545654, "eval_loss": 0.5319445133209229, "eval_nll_loss": 0.49702468514442444, "eval_rewards/accuracies": 0.6289682388305664, "eval_rewards/chosen": -0.04207399860024452, "eval_rewards/margins": 0.010472159832715988, "eval_rewards/rejected": -0.05254615470767021, "eval_runtime": 136.7326, "eval_samples_per_second": 14.583, "eval_steps_per_second": 0.461, "step": 500 }, { "epoch": 0.5348715259570005, "grad_norm": 1.7639475332504142, "learning_rate": 8.856148855400955e-06, "log_odds_chosen": 0.29167047142982483, "log_odds_ratio": -0.648201048374176, "logits/chosen": -3.0114383697509766, "logits/rejected": -3.024693250656128, "logps/chosen": -0.841100811958313, "logps/rejected": -1.0192333459854126, "loss": 0.5263, "nll_loss": 0.5350626111030579, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.04205504059791565, "rewards/margins": 0.00890662893652916, "rewards/rejected": -0.05096167325973511, "step": 510 }, { "epoch": 0.5453592029365496, "grad_norm": 1.6884098835310988, "learning_rate": 8.770580193070294e-06, "log_odds_chosen": 0.24579331278800964, "log_odds_ratio": -0.6814862489700317, "logits/chosen": -3.016019582748413, "logits/rejected": -3.0255684852600098, "logps/chosen": -0.9082791209220886, "logps/rejected": -1.0769283771514893, "loss": 0.5369, "nll_loss": 0.47502464056015015, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.04541395604610443, "rewards/margins": 0.008432453498244286, "rewards/rejected": -0.053846411406993866, "step": 520 }, { "epoch": 0.5558468799160986, "grad_norm": 1.7588436164574766, "learning_rate": 8.687444855261389e-06, "log_odds_chosen": 0.39766445755958557, "log_odds_ratio": -0.6521557569503784, "logits/chosen": -3.0906691551208496, "logits/rejected": -3.1090755462646484, "logps/chosen": -0.8297191858291626, "logps/rejected": -1.1049801111221313, "loss": 0.5364, "nll_loss": 0.450814813375473, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.04148596152663231, "rewards/margins": 0.01376304216682911, "rewards/rejected": -0.05524900555610657, "step": 530 }, { "epoch": 0.5663345568956476, "grad_norm": 1.9397603724841295, "learning_rate": 8.606629658238705e-06, "log_odds_chosen": 0.15624158084392548, "log_odds_ratio": -0.7059566378593445, "logits/chosen": -3.0063095092773438, "logits/rejected": -3.0354349613189697, "logps/chosen": -0.8621616363525391, "logps/rejected": -0.9609626531600952, "loss": 0.5526, "nll_loss": 0.5280291438102722, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.04310808330774307, "rewards/margins": 0.0049400487914681435, "rewards/rejected": -0.04804813116788864, "step": 540 }, { "epoch": 0.5768222338751966, "grad_norm": 1.9970251061131588, "learning_rate": 8.528028654224417e-06, "log_odds_chosen": 0.3964000940322876, "log_odds_ratio": -0.6276581883430481, "logits/chosen": -3.051056385040283, "logits/rejected": -3.0628600120544434, "logps/chosen": -0.8477095365524292, "logps/rejected": -1.090545415878296, "loss": 0.5377, "nll_loss": 0.5382589101791382, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.04238547384738922, "rewards/margins": 0.012141798622906208, "rewards/rejected": -0.05452727526426315, "step": 550 }, { "epoch": 0.5873099108547457, "grad_norm": 1.9451374983545444, "learning_rate": 8.451542547285167e-06, "log_odds_chosen": 0.24946291744709015, "log_odds_ratio": -0.6731950044631958, "logits/chosen": -3.09270977973938, "logits/rejected": -3.1291451454162598, "logps/chosen": -0.8785122632980347, "logps/rejected": -1.0384708642959595, "loss": 0.5214, "nll_loss": 0.5020500421524048, "rewards/accuracies": 0.59375, "rewards/chosen": -0.04392561689019203, "rewards/margins": 0.007997924461960793, "rewards/rejected": -0.05192355066537857, "step": 560 }, { "epoch": 0.5977975878342947, "grad_norm": 2.015759366014609, "learning_rate": 8.37707816583391e-06, "log_odds_chosen": 0.1689465194940567, "log_odds_ratio": -0.7204016447067261, "logits/chosen": -3.082165241241455, "logits/rejected": -3.113685369491577, "logps/chosen": -0.8903343081474304, "logps/rejected": -1.0027625560760498, "loss": 0.5039, "nll_loss": 0.5279403924942017, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.04451671987771988, "rewards/margins": 0.0056214118376374245, "rewards/rejected": -0.05013813450932503, "step": 570 }, { "epoch": 0.6082852648138437, "grad_norm": 1.8532059123988396, "learning_rate": 8.304547985373997e-06, "log_odds_chosen": 0.27719905972480774, "log_odds_ratio": -0.6604655385017395, "logits/chosen": -3.164926528930664, "logits/rejected": -3.1809298992156982, "logps/chosen": -0.8681858777999878, "logps/rejected": -1.0584015846252441, "loss": 0.5449, "nll_loss": 0.48173967003822327, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.04340929910540581, "rewards/margins": 0.009510790929198265, "rewards/rejected": -0.052920084446668625, "step": 580 }, { "epoch": 0.6187729417933928, "grad_norm": 1.9696416884513863, "learning_rate": 8.233869695926184e-06, "log_odds_chosen": 0.3565579056739807, "log_odds_ratio": -0.6653521656990051, "logits/chosen": -3.1371326446533203, "logits/rejected": -3.1804890632629395, "logps/chosen": -0.8285515904426575, "logps/rejected": -1.060605764389038, "loss": 0.5115, "nll_loss": 0.5481864213943481, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.04142758250236511, "rewards/margins": 0.011602701619267464, "rewards/rejected": -0.05303028225898743, "step": 590 }, { "epoch": 0.6292606187729418, "grad_norm": 2.0728707870222607, "learning_rate": 8.164965809277262e-06, "log_odds_chosen": 0.3636320233345032, "log_odds_ratio": -0.6437779664993286, "logits/chosen": -3.155708074569702, "logits/rejected": -3.155524492263794, "logps/chosen": -0.8240157961845398, "logps/rejected": -1.06477952003479, "loss": 0.5146, "nll_loss": 0.4843020439147949, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.04120079427957535, "rewards/margins": 0.012038188055157661, "rewards/rejected": -0.05323898047208786, "step": 600 }, { "epoch": 0.6292606187729418, "eval_log_odds_chosen": 0.312126487493515, "eval_log_odds_ratio": -0.6417948603630066, "eval_logits/chosen": -3.127530336380005, "eval_logits/rejected": -3.1324751377105713, "eval_logps/chosen": -0.8164808750152588, "eval_logps/rejected": -1.016471028327942, "eval_loss": 0.5239931344985962, "eval_nll_loss": 0.4882962703704834, "eval_rewards/accuracies": 0.6230158805847168, "eval_rewards/chosen": -0.0408240407705307, "eval_rewards/margins": 0.00999950896948576, "eval_rewards/rejected": -0.050823554396629333, "eval_runtime": 137.2676, "eval_samples_per_second": 14.526, "eval_steps_per_second": 0.459, "step": 600 }, { "epoch": 0.6397482957524908, "grad_norm": 2.2204480702078246, "learning_rate": 8.097763301789162e-06, "log_odds_chosen": 0.1712610125541687, "log_odds_ratio": -0.705093502998352, "logits/chosen": -3.0651237964630127, "logits/rejected": -3.0982956886291504, "logps/chosen": -0.8816771507263184, "logps/rejected": -0.989287257194519, "loss": 0.526, "nll_loss": 0.48726779222488403, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.044083863496780396, "rewards/margins": 0.0053805033676326275, "rewards/rejected": -0.04946436733007431, "step": 610 }, { "epoch": 0.6502359727320398, "grad_norm": 2.0795066851294, "learning_rate": 8.03219328902499e-06, "log_odds_chosen": 0.18011939525604248, "log_odds_ratio": -0.7075856328010559, "logits/chosen": -3.093158721923828, "logits/rejected": -3.1170780658721924, "logps/chosen": -0.8789434432983398, "logps/rejected": -1.0122572183609009, "loss": 0.5293, "nll_loss": 0.5134457945823669, "rewards/accuracies": 0.5625, "rewards/chosen": -0.043947167694568634, "rewards/margins": 0.006665694061666727, "rewards/rejected": -0.050612859427928925, "step": 620 }, { "epoch": 0.6607236497115889, "grad_norm": 2.0001788984831514, "learning_rate": 7.968190728895958e-06, "log_odds_chosen": 0.2610745429992676, "log_odds_ratio": -0.6974207758903503, "logits/chosen": -3.0472846031188965, "logits/rejected": -3.0721120834350586, "logps/chosen": -0.8566058874130249, "logps/rejected": -1.0223418474197388, "loss": 0.5372, "nll_loss": 0.5244878530502319, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.042830295860767365, "rewards/margins": 0.00828679371625185, "rewards/rejected": -0.05111708492040634, "step": 630 }, { "epoch": 0.6712113266911379, "grad_norm": 2.3414302184737332, "learning_rate": 7.905694150420949e-06, "log_odds_chosen": 0.30453813076019287, "log_odds_ratio": -0.6686201095581055, "logits/chosen": -3.0571064949035645, "logits/rejected": -3.079134464263916, "logps/chosen": -0.8609515428543091, "logps/rejected": -1.0473490953445435, "loss": 0.5151, "nll_loss": 0.46057072281837463, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04304756969213486, "rewards/margins": 0.009319878183305264, "rewards/rejected": -0.05236745625734329, "step": 640 }, { "epoch": 0.6816990036706869, "grad_norm": 1.9074311662484937, "learning_rate": 7.844645405527363e-06, "log_odds_chosen": 0.21438069641590118, "log_odds_ratio": -0.7022002935409546, "logits/chosen": -3.058842897415161, "logits/rejected": -3.0864357948303223, "logps/chosen": -0.8311389684677124, "logps/rejected": -0.9654434323310852, "loss": 0.5332, "nll_loss": 0.5123748183250427, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.0415569506585598, "rewards/margins": 0.006715219467878342, "rewards/rejected": -0.04827217012643814, "step": 650 }, { "epoch": 0.6921866806502359, "grad_norm": 1.9616180703535884, "learning_rate": 7.78498944161523e-06, "log_odds_chosen": 0.3507782816886902, "log_odds_ratio": -0.641882061958313, "logits/chosen": -3.0647902488708496, "logits/rejected": -3.1045496463775635, "logps/chosen": -0.8823181390762329, "logps/rejected": -1.1245914697647095, "loss": 0.5293, "nll_loss": 0.48711147904396057, "rewards/accuracies": 0.59375, "rewards/chosen": -0.044115908443927765, "rewards/margins": 0.012113666161894798, "rewards/rejected": -0.05622958019375801, "step": 660 }, { "epoch": 0.702674357629785, "grad_norm": 2.2401170633783427, "learning_rate": 7.726674092862559e-06, "log_odds_chosen": 0.4617346227169037, "log_odds_ratio": -0.627942681312561, "logits/chosen": -3.0200469493865967, "logits/rejected": -3.0557796955108643, "logps/chosen": -0.8328607678413391, "logps/rejected": -1.140726923942566, "loss": 0.5237, "nll_loss": 0.46908053755760193, "rewards/accuracies": 0.65625, "rewards/chosen": -0.041643042117357254, "rewards/margins": 0.015393314883112907, "rewards/rejected": -0.057036347687244415, "step": 670 }, { "epoch": 0.713162034609334, "grad_norm": 2.00824540701018, "learning_rate": 7.669649888473705e-06, "log_odds_chosen": 0.36505717039108276, "log_odds_ratio": -0.6428455114364624, "logits/chosen": -3.0360779762268066, "logits/rejected": -3.044907808303833, "logps/chosen": -0.8793157339096069, "logps/rejected": -1.1065771579742432, "loss": 0.5083, "nll_loss": 0.4951552450656891, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.04396578669548035, "rewards/margins": 0.0113630760461092, "rewards/rejected": -0.0553288571536541, "step": 680 }, { "epoch": 0.723649711588883, "grad_norm": 1.8606652251395144, "learning_rate": 7.61386987626881e-06, "log_odds_chosen": 0.2045813500881195, "log_odds_ratio": -0.7114613056182861, "logits/chosen": -3.036839723587036, "logits/rejected": -3.0589654445648193, "logps/chosen": -0.8661033511161804, "logps/rejected": -1.014004111289978, "loss": 0.5313, "nll_loss": 0.5510386824607849, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.0433051735162735, "rewards/margins": 0.007395035121589899, "rewards/rejected": -0.05070021003484726, "step": 690 }, { "epoch": 0.7341373885684321, "grad_norm": 2.2895278902082747, "learning_rate": 7.559289460184545e-06, "log_odds_chosen": 0.34833860397338867, "log_odds_ratio": -0.6269202828407288, "logits/chosen": -3.0252926349639893, "logits/rejected": -3.068871021270752, "logps/chosen": -0.8163930177688599, "logps/rejected": -1.0459128618240356, "loss": 0.5298, "nll_loss": 0.5428040623664856, "rewards/accuracies": 0.625, "rewards/chosen": -0.04081965237855911, "rewards/margins": 0.01147598959505558, "rewards/rejected": -0.05229564383625984, "step": 700 }, { "epoch": 0.7341373885684321, "eval_log_odds_chosen": 0.3869401812553406, "eval_log_odds_ratio": -0.6218506097793579, "eval_logits/chosen": -3.0754599571228027, "eval_logits/rejected": -3.076083183288574, "eval_logps/chosen": -0.8267216682434082, "eval_logps/rejected": -1.0827099084854126, "eval_loss": 0.5187779068946838, "eval_nll_loss": 0.4841572344303131, "eval_rewards/accuracies": 0.6428571343421936, "eval_rewards/chosen": -0.04133608192205429, "eval_rewards/margins": 0.012799412943422794, "eval_rewards/rejected": -0.05413549765944481, "eval_runtime": 137.1864, "eval_samples_per_second": 14.535, "eval_steps_per_second": 0.459, "step": 700 }, { "epoch": 0.7446250655479811, "grad_norm": 1.958829045282282, "learning_rate": 7.505866250408016e-06, "log_odds_chosen": 0.2794094383716583, "log_odds_ratio": -0.6572638750076294, "logits/chosen": -3.1184074878692627, "logits/rejected": -3.1369974613189697, "logps/chosen": -0.8444921374320984, "logps/rejected": -1.0439577102661133, "loss": 0.5242, "nll_loss": 0.47964978218078613, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04222460836172104, "rewards/margins": 0.00997327920049429, "rewards/rejected": -0.052197881042957306, "step": 710 }, { "epoch": 0.7551127425275301, "grad_norm": 1.8049248182957538, "learning_rate": 7.4535599249993e-06, "log_odds_chosen": 0.36963027715682983, "log_odds_ratio": -0.6443501710891724, "logits/chosen": -3.075653076171875, "logits/rejected": -3.0980098247528076, "logps/chosen": -0.7987631559371948, "logps/rejected": -1.03029465675354, "loss": 0.5308, "nll_loss": 0.4633590281009674, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.03993815928697586, "rewards/margins": 0.011576572433114052, "rewards/rejected": -0.051514726132154465, "step": 720 }, { "epoch": 0.7656004195070791, "grad_norm": 2.1907119668628807, "learning_rate": 7.402332101976053e-06, "log_odds_chosen": 0.1018507108092308, "log_odds_ratio": -0.7229408621788025, "logits/chosen": -3.084719181060791, "logits/rejected": -3.0846333503723145, "logps/chosen": -0.8332414627075195, "logps/rejected": -0.8869687914848328, "loss": 0.5377, "nll_loss": 0.5031158328056335, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.041662074625492096, "rewards/margins": 0.00268636760301888, "rewards/rejected": -0.04434844106435776, "step": 730 }, { "epoch": 0.7760880964866282, "grad_norm": 2.050092986168091, "learning_rate": 7.352146220938079e-06, "log_odds_chosen": 0.3393878936767578, "log_odds_ratio": -0.6246740221977234, "logits/chosen": -3.119809627532959, "logits/rejected": -3.132826328277588, "logps/chosen": -0.804786205291748, "logps/rejected": -1.0171911716461182, "loss": 0.5308, "nll_loss": 0.4794273376464844, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.040239304304122925, "rewards/margins": 0.010620243847370148, "rewards/rejected": -0.05085955187678337, "step": 740 }, { "epoch": 0.7865757734661772, "grad_norm": 2.0193892114327556, "learning_rate": 7.3029674334022146e-06, "log_odds_chosen": 0.2425309419631958, "log_odds_ratio": -0.6716917753219604, "logits/chosen": -3.093583106994629, "logits/rejected": -3.114816188812256, "logps/chosen": -0.8740803599357605, "logps/rejected": -1.0157320499420166, "loss": 0.5427, "nll_loss": 0.4982066750526428, "rewards/accuracies": 0.59375, "rewards/chosen": -0.04370402172207832, "rewards/margins": 0.007082589901983738, "rewards/rejected": -0.05078660696744919, "step": 750 }, { "epoch": 0.7970634504457262, "grad_norm": 1.891204637475333, "learning_rate": 7.254762501100117e-06, "log_odds_chosen": 0.2664291262626648, "log_odds_ratio": -0.6672528386116028, "logits/chosen": -3.0630593299865723, "logits/rejected": -3.0695788860321045, "logps/chosen": -0.8163594007492065, "logps/rejected": -0.993925929069519, "loss": 0.5114, "nll_loss": 0.40486717224121094, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.040817975997924805, "rewards/margins": 0.00887832697480917, "rewards/rejected": -0.049696292728185654, "step": 760 }, { "epoch": 0.8075511274252754, "grad_norm": 2.0675479903273914, "learning_rate": 7.207499701564472e-06, "log_odds_chosen": 0.23201966285705566, "log_odds_ratio": -0.6995107531547546, "logits/chosen": -3.027050018310547, "logits/rejected": -3.0489039421081543, "logps/chosen": -0.8810374140739441, "logps/rejected": -1.0541043281555176, "loss": 0.5343, "nll_loss": 0.5017890334129333, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.0440518744289875, "rewards/margins": 0.008653342723846436, "rewards/rejected": -0.05270521715283394, "step": 770 }, { "epoch": 0.8180388044048243, "grad_norm": 1.9571785710156353, "learning_rate": 7.1611487403943295e-06, "log_odds_chosen": 0.23842506110668182, "log_odds_ratio": -0.672247052192688, "logits/chosen": -3.062586545944214, "logits/rejected": -3.0935113430023193, "logps/chosen": -0.8818261027336121, "logps/rejected": -1.0167505741119385, "loss": 0.5467, "nll_loss": 0.5480509996414185, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.04409131035208702, "rewards/margins": 0.006746229715645313, "rewards/rejected": -0.05083753541111946, "step": 780 }, { "epoch": 0.8285264813843733, "grad_norm": 1.8565884413084413, "learning_rate": 7.115680669648201e-06, "log_odds_chosen": 0.32895228266716003, "log_odds_ratio": -0.6478875875473022, "logits/chosen": -3.1025116443634033, "logits/rejected": -3.1219050884246826, "logps/chosen": -0.8189374804496765, "logps/rejected": -1.0338833332061768, "loss": 0.5049, "nll_loss": 0.44281667470932007, "rewards/accuracies": 0.59375, "rewards/chosen": -0.040946874767541885, "rewards/margins": 0.010747292079031467, "rewards/rejected": -0.05169416218996048, "step": 790 }, { "epoch": 0.8390141583639223, "grad_norm": 2.106485781152954, "learning_rate": 7.0710678118654756e-06, "log_odds_chosen": 0.4608131945133209, "log_odds_ratio": -0.5961465835571289, "logits/chosen": -3.092484951019287, "logits/rejected": -3.090536117553711, "logps/chosen": -0.7798897624015808, "logps/rejected": -1.0744028091430664, "loss": 0.5181, "nll_loss": 0.4202440679073334, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.03899449110031128, "rewards/margins": 0.014725650660693645, "rewards/rejected": -0.0537201389670372, "step": 800 }, { "epoch": 0.8390141583639223, "eval_log_odds_chosen": 0.35056135058403015, "eval_log_odds_ratio": -0.6322371363639832, "eval_logits/chosen": -3.139373302459717, "eval_logits/rejected": -3.1382317543029785, "eval_logps/chosen": -0.8198128342628479, "eval_logps/rejected": -1.0474979877471924, "eval_loss": 0.5140993595123291, "eval_nll_loss": 0.4803001582622528, "eval_rewards/accuracies": 0.6329365372657776, "eval_rewards/chosen": -0.040990639477968216, "eval_rewards/margins": 0.011384249664843082, "eval_rewards/rejected": -0.05237489193677902, "eval_runtime": 136.2293, "eval_samples_per_second": 14.637, "eval_steps_per_second": 0.462, "step": 800 }, { "epoch": 0.8495018353434715, "grad_norm": 1.919736952774634, "learning_rate": 7.027283689263066e-06, "log_odds_chosen": 0.3574589788913727, "log_odds_ratio": -0.6265517473220825, "logits/chosen": -3.0922906398773193, "logits/rejected": -3.093270778656006, "logps/chosen": -0.8058309555053711, "logps/rejected": -1.0188381671905518, "loss": 0.5132, "nll_loss": 0.4754185676574707, "rewards/accuracies": 0.59375, "rewards/chosen": -0.04029155150055885, "rewards/margins": 0.010650361888110638, "rewards/rejected": -0.050941914319992065, "step": 810 }, { "epoch": 0.8599895123230205, "grad_norm": 2.3619475771455214, "learning_rate": 6.984302957695783e-06, "log_odds_chosen": 0.2932414412498474, "log_odds_ratio": -0.6586158275604248, "logits/chosen": -3.0357770919799805, "logits/rejected": -3.0360379219055176, "logps/chosen": -0.842557430267334, "logps/rejected": -1.0188366174697876, "loss": 0.505, "nll_loss": 0.4280059337615967, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.04212787002325058, "rewards/margins": 0.008813952095806599, "rewards/rejected": -0.0509418249130249, "step": 820 }, { "epoch": 0.8704771893025695, "grad_norm": 2.3824306185771267, "learning_rate": 6.942101345006233e-06, "log_odds_chosen": 0.2479257881641388, "log_odds_ratio": -0.702430248260498, "logits/chosen": -3.008411407470703, "logits/rejected": -3.05663800239563, "logps/chosen": -0.853378415107727, "logps/rejected": -1.0239073038101196, "loss": 0.5248, "nll_loss": 0.4657117426395416, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.04266892373561859, "rewards/margins": 0.00852644257247448, "rewards/rejected": -0.05119536444544792, "step": 830 }, { "epoch": 0.8809648662821186, "grad_norm": 1.9624325890421999, "learning_rate": 6.900655593423542e-06, "log_odds_chosen": 0.2082471400499344, "log_odds_ratio": -0.6889498233795166, "logits/chosen": -3.040546178817749, "logits/rejected": -3.0660147666931152, "logps/chosen": -0.8756462931632996, "logps/rejected": -1.0124717950820923, "loss": 0.5137, "nll_loss": 0.4855361580848694, "rewards/accuracies": 0.53125, "rewards/chosen": -0.043782319873571396, "rewards/margins": 0.006841268390417099, "rewards/rejected": -0.050623588263988495, "step": 840 }, { "epoch": 0.8914525432616676, "grad_norm": 2.0144554917595756, "learning_rate": 6.859943405700353e-06, "log_odds_chosen": 0.3205421566963196, "log_odds_ratio": -0.6371484994888306, "logits/chosen": -3.054384231567383, "logits/rejected": -3.0986409187316895, "logps/chosen": -0.8319618105888367, "logps/rejected": -1.0313116312026978, "loss": 0.5044, "nll_loss": 0.4881317615509033, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.041598085314035416, "rewards/margins": 0.009967491030693054, "rewards/rejected": -0.05156558007001877, "step": 850 }, { "epoch": 0.9019402202412166, "grad_norm": 1.9341957217840544, "learning_rate": 6.819943394704736e-06, "log_odds_chosen": 0.26728707551956177, "log_odds_ratio": -0.6747015714645386, "logits/chosen": -3.0936527252197266, "logits/rejected": -3.1073575019836426, "logps/chosen": -0.8353049159049988, "logps/rejected": -1.0224361419677734, "loss": 0.5278, "nll_loss": 0.4731883108615875, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0417652502655983, "rewards/margins": 0.009356559254229069, "rewards/rejected": -0.05112180858850479, "step": 860 }, { "epoch": 0.9124278972207656, "grad_norm": 5.30319924106792, "learning_rate": 6.780635036208105e-06, "log_odds_chosen": 0.30106544494628906, "log_odds_ratio": -0.6683878898620605, "logits/chosen": -3.097151279449463, "logits/rejected": -3.1499500274658203, "logps/chosen": -0.867012619972229, "logps/rejected": -1.0790386199951172, "loss": 0.4933, "nll_loss": 0.48347124457359314, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.04335063695907593, "rewards/margins": 0.010601297952234745, "rewards/rejected": -0.0539519302546978, "step": 870 }, { "epoch": 0.9229155742003147, "grad_norm": 1.6208302885778367, "learning_rate": 6.741998624632421e-06, "log_odds_chosen": 0.29186171293258667, "log_odds_ratio": -0.6591932773590088, "logits/chosen": -3.15583872795105, "logits/rejected": -3.168064594268799, "logps/chosen": -0.8187226057052612, "logps/rejected": -1.0049909353256226, "loss": 0.4887, "nll_loss": 0.4384452700614929, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.04093613475561142, "rewards/margins": 0.009313413873314857, "rewards/rejected": -0.05024954676628113, "step": 880 }, { "epoch": 0.9334032511798637, "grad_norm": 1.7707391073712173, "learning_rate": 6.70401523153991e-06, "log_odds_chosen": 0.33703380823135376, "log_odds_ratio": -0.6459982991218567, "logits/chosen": -3.1340742111206055, "logits/rejected": -3.157071590423584, "logps/chosen": -0.8063561320304871, "logps/rejected": -0.9982324838638306, "loss": 0.4931, "nll_loss": 0.4631246030330658, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.040317803621292114, "rewards/margins": 0.009593818336725235, "rewards/rejected": -0.04991162568330765, "step": 890 }, { "epoch": 0.9438909281594127, "grad_norm": 2.341682439233393, "learning_rate": 6.666666666666667e-06, "log_odds_chosen": 0.26426905393600464, "log_odds_ratio": -0.6637164354324341, "logits/chosen": -3.1100411415100098, "logits/rejected": -3.130826473236084, "logps/chosen": -0.7806347012519836, "logps/rejected": -0.9385608434677124, "loss": 0.5239, "nll_loss": 0.4659123420715332, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.039031732827425, "rewards/margins": 0.00789631437510252, "rewards/rejected": -0.0469280444085598, "step": 900 }, { "epoch": 0.9438909281594127, "eval_log_odds_chosen": 0.32679569721221924, "eval_log_odds_ratio": -0.6327584385871887, "eval_logits/chosen": -3.117077112197876, "eval_logits/rejected": -3.119086742401123, "eval_logps/chosen": -0.8044511079788208, "eval_logps/rejected": -1.0129274129867554, "eval_loss": 0.5086367726325989, "eval_nll_loss": 0.4747697710990906, "eval_rewards/accuracies": 0.6309523582458496, "eval_rewards/chosen": -0.04022255912423134, "eval_rewards/margins": 0.010423817671835423, "eval_rewards/rejected": -0.05064636468887329, "eval_runtime": 137.5576, "eval_samples_per_second": 14.496, "eval_steps_per_second": 0.458, "step": 900 }, { "epoch": 0.9543786051389617, "grad_norm": 2.0533389896159213, "learning_rate": 6.629935441317959e-06, "log_odds_chosen": 0.4754648208618164, "log_odds_ratio": -0.6232188940048218, "logits/chosen": -3.073176622390747, "logits/rejected": -3.084963321685791, "logps/chosen": -0.828788161277771, "logps/rejected": -1.1443804502487183, "loss": 0.5142, "nll_loss": 0.46652156114578247, "rewards/accuracies": 0.625, "rewards/chosen": -0.04143941029906273, "rewards/margins": 0.015779614448547363, "rewards/rejected": -0.05721902847290039, "step": 910 }, { "epoch": 0.9648662821185108, "grad_norm": 2.138448059862142, "learning_rate": 6.593804733957872e-06, "log_odds_chosen": 0.32768282294273376, "log_odds_ratio": -0.6431117057800293, "logits/chosen": -3.038576364517212, "logits/rejected": -3.061370372772217, "logps/chosen": -0.7864677906036377, "logps/rejected": -0.9946994781494141, "loss": 0.4836, "nll_loss": 0.43025264143943787, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03932339325547218, "rewards/margins": 0.010411588475108147, "rewards/rejected": -0.04973498359322548, "step": 920 }, { "epoch": 0.9753539590980598, "grad_norm": 2.1602863053901413, "learning_rate": 6.55825835783953e-06, "log_odds_chosen": 0.2050061970949173, "log_odds_ratio": -0.6868597269058228, "logits/chosen": -3.0544333457946777, "logits/rejected": -3.066739797592163, "logps/chosen": -0.8742432594299316, "logps/rejected": -1.0194706916809082, "loss": 0.5136, "nll_loss": 0.5241981744766235, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.04371216148138046, "rewards/margins": 0.007261371705681086, "rewards/rejected": -0.05097353458404541, "step": 930 }, { "epoch": 0.9858416360776088, "grad_norm": 1.9215491222233851, "learning_rate": 6.523280730534423e-06, "log_odds_chosen": 0.23041269183158875, "log_odds_ratio": -0.6992384195327759, "logits/chosen": -3.0867247581481934, "logits/rejected": -3.0779662132263184, "logps/chosen": -0.7768861651420593, "logps/rejected": -0.9184977412223816, "loss": 0.5102, "nll_loss": 0.4776674211025238, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.038844309747219086, "rewards/margins": 0.0070805782452225685, "rewards/rejected": -0.045924894511699677, "step": 940 }, { "epoch": 0.9963293130571579, "grad_norm": 2.1983436102574547, "learning_rate": 6.488856845230502e-06, "log_odds_chosen": 0.25244003534317017, "log_odds_ratio": -0.6911928653717041, "logits/chosen": -3.0215468406677246, "logits/rejected": -3.0374438762664795, "logps/chosen": -0.8648554682731628, "logps/rejected": -1.0236364603042603, "loss": 0.5385, "nll_loss": 0.5036488175392151, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.04324277862906456, "rewards/margins": 0.00793905183672905, "rewards/rejected": -0.05118182301521301, "step": 950 }, { "epoch": 1.0068169900367068, "grad_norm": 2.2724469008271773, "learning_rate": 6.4549722436790284e-06, "log_odds_chosen": 1.0400245189666748, "log_odds_ratio": -0.42517581582069397, "logits/chosen": -3.0371384620666504, "logits/rejected": -3.0435400009155273, "logps/chosen": -0.5974615812301636, "logps/rejected": -1.1842448711395264, "loss": 0.3929, "nll_loss": 0.40045398473739624, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.02987307868897915, "rewards/margins": 0.02933916449546814, "rewards/rejected": -0.05921224504709244, "step": 960 }, { "epoch": 1.017304667016256, "grad_norm": 2.0168885022396372, "learning_rate": 6.421612990679356e-06, "log_odds_chosen": 1.6284434795379639, "log_odds_ratio": -0.2502659857273102, "logits/chosen": -3.080873727798462, "logits/rejected": -3.070159912109375, "logps/chosen": -0.4285094141960144, "logps/rejected": -1.2745321989059448, "loss": 0.2923, "nll_loss": 0.28497669100761414, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.02142546884715557, "rewards/margins": 0.04230114072561264, "rewards/rejected": -0.06372661143541336, "step": 970 }, { "epoch": 1.027792343995805, "grad_norm": 1.9662869053425782, "learning_rate": 6.3887656499994e-06, "log_odds_chosen": 1.8482691049575806, "log_odds_ratio": -0.21383436024188995, "logits/chosen": -3.071471929550171, "logits/rejected": -3.079923391342163, "logps/chosen": -0.43078216910362244, "logps/rejected": -1.4107215404510498, "loss": 0.3019, "nll_loss": 0.3140898644924164, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.021539105102419853, "rewards/margins": 0.04899696633219719, "rewards/rejected": -0.0705360695719719, "step": 980 }, { "epoch": 1.038280020975354, "grad_norm": 1.9845582869348006, "learning_rate": 6.356417261637282e-06, "log_odds_chosen": 1.6627075672149658, "log_odds_ratio": -0.2610566318035126, "logits/chosen": -2.9875268936157227, "logits/rejected": -2.9876785278320312, "logps/chosen": -0.4378105103969574, "logps/rejected": -1.3178083896636963, "loss": 0.296, "nll_loss": 0.27773916721343994, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.02189052477478981, "rewards/margins": 0.04399988800287247, "rewards/rejected": -0.06589041650295258, "step": 990 }, { "epoch": 1.048767697954903, "grad_norm": 2.0942478813902783, "learning_rate": 6.324555320336759e-06, "log_odds_chosen": 1.9041988849639893, "log_odds_ratio": -0.20684988796710968, "logits/chosen": -2.9869093894958496, "logits/rejected": -3.029050588607788, "logps/chosen": -0.4077525734901428, "logps/rejected": -1.3952513933181763, "loss": 0.2888, "nll_loss": 0.2748258709907532, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.02038763090968132, "rewards/margins": 0.04937494546175003, "rewards/rejected": -0.06976256519556046, "step": 1000 }, { "epoch": 1.048767697954903, "eval_log_odds_chosen": 0.37935417890548706, "eval_log_odds_ratio": -0.6318228840827942, "eval_logits/chosen": -3.0189764499664307, "eval_logits/rejected": -3.0171284675598145, "eval_logps/chosen": -0.8724088072776794, "eval_logps/rejected": -1.112794280052185, "eval_loss": 0.5400179028511047, "eval_nll_loss": 0.5058131814002991, "eval_rewards/accuracies": 0.6428571343421936, "eval_rewards/chosen": -0.04362044483423233, "eval_rewards/margins": 0.012019270099699497, "eval_rewards/rejected": -0.05563971400260925, "eval_runtime": 136.9938, "eval_samples_per_second": 14.555, "eval_steps_per_second": 0.46, "step": 1000 }, { "epoch": 1.059255374934452, "grad_norm": 1.8526210480251912, "learning_rate": 6.2931677552755265e-06, "log_odds_chosen": 1.7620799541473389, "log_odds_ratio": -0.23190836608409882, "logits/chosen": -3.0539023876190186, "logits/rejected": -3.0629706382751465, "logps/chosen": -0.43785715103149414, "logps/rejected": -1.3722269535064697, "loss": 0.2859, "nll_loss": 0.2769049108028412, "rewards/accuracies": 0.96875, "rewards/chosen": -0.021892856806516647, "rewards/margins": 0.04671848937869072, "rewards/rejected": -0.06861135363578796, "step": 1010 }, { "epoch": 1.069743051914001, "grad_norm": 2.017775428059147, "learning_rate": 6.262242910851496e-06, "log_odds_chosen": 1.7232574224472046, "log_odds_ratio": -0.22979629039764404, "logits/chosen": -3.0019690990448, "logits/rejected": -3.0224807262420654, "logps/chosen": -0.4002920091152191, "logps/rejected": -1.3048107624053955, "loss": 0.2894, "nll_loss": 0.2588661015033722, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.020014600828289986, "rewards/margins": 0.04522594064474106, "rewards/rejected": -0.0652405396103859, "step": 1020 }, { "epoch": 1.08023072889355, "grad_norm": 2.1656896077764, "learning_rate": 6.231769528497559e-06, "log_odds_chosen": 1.7999454736709595, "log_odds_ratio": -0.23009638488292694, "logits/chosen": -3.0344815254211426, "logits/rejected": -3.0285098552703857, "logps/chosen": -0.42475366592407227, "logps/rejected": -1.3811571598052979, "loss": 0.2779, "nll_loss": 0.26928776502609253, "rewards/accuracies": 0.96875, "rewards/chosen": -0.021237684413790703, "rewards/margins": 0.04782017320394516, "rewards/rejected": -0.06905786693096161, "step": 1030 }, { "epoch": 1.0907184058730992, "grad_norm": 1.8893124181143397, "learning_rate": 6.2017367294604225e-06, "log_odds_chosen": 1.7361199855804443, "log_odds_ratio": -0.2356552630662918, "logits/chosen": -2.9798855781555176, "logits/rejected": -3.012021780014038, "logps/chosen": -0.4087589383125305, "logps/rejected": -1.318456768989563, "loss": 0.2848, "nll_loss": 0.2693423926830292, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.020437946543097496, "rewards/margins": 0.045484889298677444, "rewards/rejected": -0.06592283397912979, "step": 1040 }, { "epoch": 1.1012060828526482, "grad_norm": 1.998285617344112, "learning_rate": 6.172133998483677e-06, "log_odds_chosen": 1.989933967590332, "log_odds_ratio": -0.2104463130235672, "logits/chosen": -2.9669861793518066, "logits/rejected": -2.992997169494629, "logps/chosen": -0.4091659486293793, "logps/rejected": -1.4872965812683105, "loss": 0.2793, "nll_loss": 0.24384136497974396, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.020458297803997993, "rewards/margins": 0.05390653759241104, "rewards/rejected": -0.07436482608318329, "step": 1050 }, { "epoch": 1.1116937598321972, "grad_norm": 1.99753785316238, "learning_rate": 6.142951168339513e-06, "log_odds_chosen": 1.7905690670013428, "log_odds_ratio": -0.2465437948703766, "logits/chosen": -2.9944257736206055, "logits/rejected": -2.988699436187744, "logps/chosen": -0.41175705194473267, "logps/rejected": -1.3037220239639282, "loss": 0.2828, "nll_loss": 0.2829252779483795, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.020587850362062454, "rewards/margins": 0.04459824413061142, "rewards/rejected": -0.06518609821796417, "step": 1060 }, { "epoch": 1.1221814368117462, "grad_norm": 2.0944607329795666, "learning_rate": 6.114178405157431e-06, "log_odds_chosen": 1.972241759300232, "log_odds_ratio": -0.202741339802742, "logits/chosen": -2.9314074516296387, "logits/rejected": -2.943037271499634, "logps/chosen": -0.39666005969047546, "logps/rejected": -1.4398232698440552, "loss": 0.2869, "nll_loss": 0.26206424832344055, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.019833002239465714, "rewards/margins": 0.0521581657230854, "rewards/rejected": -0.07199116796255112, "step": 1070 }, { "epoch": 1.1326691137912952, "grad_norm": 2.082309850512046, "learning_rate": 6.0858061945018455e-06, "log_odds_chosen": 1.9569040536880493, "log_odds_ratio": -0.20189175009727478, "logits/chosen": -2.9233288764953613, "logits/rejected": -2.953047275543213, "logps/chosen": -0.4349672198295593, "logps/rejected": -1.479813814163208, "loss": 0.286, "nll_loss": 0.25732284784317017, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.021748360246419907, "rewards/margins": 0.052242327481508255, "rewards/rejected": -0.07399068772792816, "step": 1080 }, { "epoch": 1.1431567907708442, "grad_norm": 1.977872551014816, "learning_rate": 6.0578253281538265e-06, "log_odds_chosen": 1.8792686462402344, "log_odds_ratio": -0.23301272094249725, "logits/chosen": -2.9573769569396973, "logits/rejected": -2.968686103820801, "logps/chosen": -0.3683982789516449, "logps/rejected": -1.286027431488037, "loss": 0.2841, "nll_loss": 0.26943594217300415, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.018419915810227394, "rewards/margins": 0.04588145762681961, "rewards/rejected": -0.06430138647556305, "step": 1090 }, { "epoch": 1.1536444677503932, "grad_norm": 2.2874664942911984, "learning_rate": 6.030226891555273e-06, "log_odds_chosen": 1.744699239730835, "log_odds_ratio": -0.2575313448905945, "logits/chosen": -3.0328478813171387, "logits/rejected": -3.0531229972839355, "logps/chosen": -0.4480053782463074, "logps/rejected": -1.409203290939331, "loss": 0.29, "nll_loss": 0.2910405397415161, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.022400271147489548, "rewards/margins": 0.04805989935994148, "rewards/rejected": -0.07046017050743103, "step": 1100 }, { "epoch": 1.1536444677503932, "eval_log_odds_chosen": 0.4246710240840912, "eval_log_odds_ratio": -0.6255837082862854, "eval_logits/chosen": -3.002875804901123, "eval_logits/rejected": -3.0027201175689697, "eval_logps/chosen": -0.8736297488212585, "eval_logps/rejected": -1.1487443447113037, "eval_loss": 0.5385290384292603, "eval_nll_loss": 0.5041735172271729, "eval_rewards/accuracies": 0.64682537317276, "eval_rewards/chosen": -0.04368148371577263, "eval_rewards/margins": 0.013755732215940952, "eval_rewards/rejected": -0.057437218725681305, "eval_runtime": 136.8823, "eval_samples_per_second": 14.567, "eval_steps_per_second": 0.46, "step": 1100 }, { "epoch": 1.1641321447299422, "grad_norm": 1.8147231314332177, "learning_rate": 6.003002251876643e-06, "log_odds_chosen": 1.8075166940689087, "log_odds_ratio": -0.2281859815120697, "logits/chosen": -2.965421199798584, "logits/rejected": -3.0172793865203857, "logps/chosen": -0.44597238302230835, "logps/rejected": -1.4203885793685913, "loss": 0.2891, "nll_loss": 0.2668479084968567, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.022298619151115417, "rewards/margins": 0.04872080683708191, "rewards/rejected": -0.07101943343877792, "step": 1110 }, { "epoch": 1.1746198217094914, "grad_norm": 1.9969430269469466, "learning_rate": 5.976143046671968e-06, "log_odds_chosen": 1.7478984594345093, "log_odds_ratio": -0.22862455248832703, "logits/chosen": -3.0243489742279053, "logits/rejected": -3.0321333408355713, "logps/chosen": -0.40696269273757935, "logps/rejected": -1.2988313436508179, "loss": 0.2927, "nll_loss": 0.27604612708091736, "rewards/accuracies": 0.96875, "rewards/chosen": -0.02034812793135643, "rewards/margins": 0.04459343105554581, "rewards/rejected": -0.06494157016277313, "step": 1120 }, { "epoch": 1.1851074986890404, "grad_norm": 2.1896703421371275, "learning_rate": 5.949641173087296e-06, "log_odds_chosen": 2.048767566680908, "log_odds_ratio": -0.20188426971435547, "logits/chosen": -2.9657158851623535, "logits/rejected": -2.977405309677124, "logps/chosen": -0.38311532139778137, "logps/rejected": -1.454978108406067, "loss": 0.2825, "nll_loss": 0.2597211003303528, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.01915576681494713, "rewards/margins": 0.05359314754605293, "rewards/rejected": -0.07274890691041946, "step": 1130 }, { "epoch": 1.1955951756685894, "grad_norm": 1.8856822247943528, "learning_rate": 5.923488777590924e-06, "log_odds_chosen": 1.9368520975112915, "log_odds_ratio": -0.21634550392627716, "logits/chosen": -3.009665012359619, "logits/rejected": -3.0066471099853516, "logps/chosen": -0.412930428981781, "logps/rejected": -1.4850547313690186, "loss": 0.2786, "nll_loss": 0.28015536069869995, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.02064652182161808, "rewards/margins": 0.05360621213912964, "rewards/rejected": -0.07425273954868317, "step": 1140 }, { "epoch": 1.2060828526481384, "grad_norm": 2.2165729739830233, "learning_rate": 5.897678246195886e-06, "log_odds_chosen": 1.9798767566680908, "log_odds_ratio": -0.19855430722236633, "logits/chosen": -2.9805493354797363, "logits/rejected": -2.9919371604919434, "logps/chosen": -0.38313865661621094, "logps/rejected": -1.3864378929138184, "loss": 0.2909, "nll_loss": 0.27790573239326477, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.019156932830810547, "rewards/margins": 0.05016495659947395, "rewards/rejected": -0.0693218931555748, "step": 1150 }, { "epoch": 1.2165705296276874, "grad_norm": 2.8337045840850497, "learning_rate": 5.8722021951470355e-06, "log_odds_chosen": 1.7361915111541748, "log_odds_ratio": -0.24711327254772186, "logits/chosen": -2.966083288192749, "logits/rejected": -2.9842519760131836, "logps/chosen": -0.4412474036216736, "logps/rejected": -1.3824529647827148, "loss": 0.2781, "nll_loss": 0.2754039466381073, "rewards/accuracies": 0.9375, "rewards/chosen": -0.02206237055361271, "rewards/margins": 0.0470602810382843, "rewards/rejected": -0.06912264972925186, "step": 1160 }, { "epoch": 1.2270582066072364, "grad_norm": 1.7729938432799273, "learning_rate": 5.847053462046862e-06, "log_odds_chosen": 1.7805134057998657, "log_odds_ratio": -0.23545412719249725, "logits/chosen": -3.0085816383361816, "logits/rejected": -3.003875494003296, "logps/chosen": -0.4123718738555908, "logps/rejected": -1.3221479654312134, "loss": 0.2829, "nll_loss": 0.2879020869731903, "rewards/accuracies": 0.9375, "rewards/chosen": -0.02061859332025051, "rewards/margins": 0.045488808304071426, "rewards/rejected": -0.06610739976167679, "step": 1170 }, { "epoch": 1.2375458835867854, "grad_norm": 2.2169036925519454, "learning_rate": 5.822225097395821e-06, "log_odds_chosen": 1.9844211339950562, "log_odds_ratio": -0.1866404265165329, "logits/chosen": -2.9880988597869873, "logits/rejected": -3.0081310272216797, "logps/chosen": -0.3858886957168579, "logps/rejected": -1.3924882411956787, "loss": 0.2873, "nll_loss": 0.25162869691848755, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.019294437021017075, "rewards/margins": 0.05032998323440552, "rewards/rejected": -0.06962442398071289, "step": 1180 }, { "epoch": 1.2480335605663346, "grad_norm": 2.1614361138819045, "learning_rate": 5.797710356524486e-06, "log_odds_chosen": 1.8616158962249756, "log_odds_ratio": -0.22632256150245667, "logits/chosen": -3.0017178058624268, "logits/rejected": -3.0013363361358643, "logps/chosen": -0.4442955553531647, "logps/rejected": -1.4363129138946533, "loss": 0.2867, "nll_loss": 0.289310485124588, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.022214777767658234, "rewards/margins": 0.04960086941719055, "rewards/rejected": -0.07181564718484879, "step": 1190 }, { "epoch": 1.2585212375458836, "grad_norm": 2.0470229728313494, "learning_rate": 5.773502691896259e-06, "log_odds_chosen": 1.8614075183868408, "log_odds_ratio": -0.2429337054491043, "logits/chosen": -2.9596099853515625, "logits/rejected": -2.9728147983551025, "logps/chosen": -0.44122061133384705, "logps/rejected": -1.4644559621810913, "loss": 0.2826, "nll_loss": 0.2614334225654602, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.022061031311750412, "rewards/margins": 0.051161766052246094, "rewards/rejected": -0.0732228010892868, "step": 1200 }, { "epoch": 1.2585212375458836, "eval_log_odds_chosen": 0.4214767515659332, "eval_log_odds_ratio": -0.6254101991653442, "eval_logits/chosen": -2.9582858085632324, "eval_logits/rejected": -2.96195912361145, "eval_logps/chosen": -0.8853804469108582, "eval_logps/rejected": -1.162561058998108, "eval_loss": 0.5427829027175903, "eval_nll_loss": 0.5084435939788818, "eval_rewards/accuracies": 0.6428571343421936, "eval_rewards/chosen": -0.04426902160048485, "eval_rewards/margins": 0.013859033584594727, "eval_rewards/rejected": -0.05812805891036987, "eval_runtime": 137.2006, "eval_samples_per_second": 14.533, "eval_steps_per_second": 0.459, "step": 1200 }, { "epoch": 1.2690089145254326, "grad_norm": 2.3388472125063946, "learning_rate": 5.749595745760691e-06, "log_odds_chosen": 1.858030080795288, "log_odds_ratio": -0.21272964775562286, "logits/chosen": -2.996577739715576, "logits/rejected": -3.0146660804748535, "logps/chosen": -0.4070938229560852, "logps/rejected": -1.3386101722717285, "loss": 0.2988, "nll_loss": 0.292961448431015, "rewards/accuracies": 0.96875, "rewards/chosen": -0.02035469003021717, "rewards/margins": 0.04657582566142082, "rewards/rejected": -0.06693051755428314, "step": 1210 }, { "epoch": 1.2794965915049816, "grad_norm": 1.9762440493042526, "learning_rate": 5.725983343138682e-06, "log_odds_chosen": 1.7544046640396118, "log_odds_ratio": -0.22841353714466095, "logits/chosen": -2.9734439849853516, "logits/rejected": -2.9992988109588623, "logps/chosen": -0.42544227838516235, "logps/rejected": -1.3273015022277832, "loss": 0.295, "nll_loss": 0.28989139199256897, "rewards/accuracies": 0.96875, "rewards/chosen": -0.021272115409374237, "rewards/margins": 0.04509295895695686, "rewards/rejected": -0.0663650780916214, "step": 1220 }, { "epoch": 1.2899842684845306, "grad_norm": 2.230074491318477, "learning_rate": 5.702659485122011e-06, "log_odds_chosen": 1.929265022277832, "log_odds_ratio": -0.20951807498931885, "logits/chosen": -2.9871158599853516, "logits/rejected": -2.993727207183838, "logps/chosen": -0.40125927329063416, "logps/rejected": -1.4160717725753784, "loss": 0.2653, "nll_loss": 0.23026029765605927, "rewards/accuracies": 0.96875, "rewards/chosen": -0.020062964409589767, "rewards/margins": 0.050740621984004974, "rewards/rejected": -0.07080359011888504, "step": 1230 }, { "epoch": 1.3004719454640796, "grad_norm": 1.9679461376203173, "learning_rate": 5.679618342470648e-06, "log_odds_chosen": 1.7371532917022705, "log_odds_ratio": -0.2242734134197235, "logits/chosen": -3.0132291316986084, "logits/rejected": -3.0433402061462402, "logps/chosen": -0.413210391998291, "logps/rejected": -1.3000330924987793, "loss": 0.2804, "nll_loss": 0.29589781165122986, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.0206605214625597, "rewards/margins": 0.04434113949537277, "rewards/rejected": -0.06500165909528732, "step": 1240 }, { "epoch": 1.3109596224436286, "grad_norm": 2.617277483095543, "learning_rate": 5.656854249492381e-06, "log_odds_chosen": 1.814679741859436, "log_odds_ratio": -0.22298629581928253, "logits/chosen": -2.996896266937256, "logits/rejected": -3.0056145191192627, "logps/chosen": -0.42395251989364624, "logps/rejected": -1.3927456140518188, "loss": 0.2687, "nll_loss": 0.25607752799987793, "rewards/accuracies": 0.96875, "rewards/chosen": -0.021197626367211342, "rewards/margins": 0.04843965172767639, "rewards/rejected": -0.06963728368282318, "step": 1250 }, { "epoch": 1.3214472994231778, "grad_norm": 1.9773184888291742, "learning_rate": 5.63436169819011e-06, "log_odds_chosen": 1.8136640787124634, "log_odds_ratio": -0.24320077896118164, "logits/chosen": -2.966784954071045, "logits/rejected": -3.001746892929077, "logps/chosen": -0.45541706681251526, "logps/rejected": -1.3951488733291626, "loss": 0.2988, "nll_loss": 0.31274476647377014, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.022770855575799942, "rewards/margins": 0.046986598521471024, "rewards/rejected": -0.06975744664669037, "step": 1260 }, { "epoch": 1.3319349764027268, "grad_norm": 1.9140818928985086, "learning_rate": 5.612135332663138e-06, "log_odds_chosen": 1.953155755996704, "log_odds_ratio": -0.21717992424964905, "logits/chosen": -3.006328821182251, "logits/rejected": -3.037388324737549, "logps/chosen": -0.42650872468948364, "logps/rejected": -1.495060682296753, "loss": 0.272, "nll_loss": 0.2669217586517334, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.021325435489416122, "rewards/margins": 0.053427595645189285, "rewards/rejected": -0.074753038585186, "step": 1270 }, { "epoch": 1.3424226533822758, "grad_norm": 1.9500186785754579, "learning_rate": 5.590169943749475e-06, "log_odds_chosen": 1.8904393911361694, "log_odds_ratio": -0.2255454808473587, "logits/chosen": -2.989861011505127, "logits/rejected": -3.0198075771331787, "logps/chosen": -0.424043744802475, "logps/rejected": -1.4651858806610107, "loss": 0.2783, "nll_loss": 0.267769455909729, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.02120218798518181, "rewards/margins": 0.05205710977315903, "rewards/rejected": -0.07325930893421173, "step": 1280 }, { "epoch": 1.3529103303618248, "grad_norm": 1.9502765281924526, "learning_rate": 5.568460463897046e-06, "log_odds_chosen": 1.8929240703582764, "log_odds_ratio": -0.21857920289039612, "logits/chosen": -2.9535863399505615, "logits/rejected": -2.9874510765075684, "logps/chosen": -0.45026451349258423, "logps/rejected": -1.4960235357284546, "loss": 0.295, "nll_loss": 0.27629774808883667, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.022513221949338913, "rewards/margins": 0.052287958562374115, "rewards/rejected": -0.07480116933584213, "step": 1290 }, { "epoch": 1.3633980073413738, "grad_norm": 2.2093191033587223, "learning_rate": 5.547001962252292e-06, "log_odds_chosen": 1.7265195846557617, "log_odds_ratio": -0.23279574513435364, "logits/chosen": -2.9012649059295654, "logits/rejected": -2.9128100872039795, "logps/chosen": -0.4365314841270447, "logps/rejected": -1.3402652740478516, "loss": 0.2796, "nll_loss": 0.28851714730262756, "rewards/accuracies": 0.96875, "rewards/chosen": -0.021826574578881264, "rewards/margins": 0.04518669471144676, "rewards/rejected": -0.06701326370239258, "step": 1300 }, { "epoch": 1.3633980073413738, "eval_log_odds_chosen": 0.45076510310173035, "eval_log_odds_ratio": -0.6208177208900452, "eval_logits/chosen": -2.928496837615967, "eval_logits/rejected": -2.9256343841552734, "eval_logps/chosen": -0.8825219869613647, "eval_logps/rejected": -1.1770830154418945, "eval_loss": 0.5392885208129883, "eval_nll_loss": 0.5060464143753052, "eval_rewards/accuracies": 0.64682537317276, "eval_rewards/chosen": -0.044126104563474655, "eval_rewards/margins": 0.014728044159710407, "eval_rewards/rejected": -0.05885414779186249, "eval_runtime": 136.6608, "eval_samples_per_second": 14.591, "eval_steps_per_second": 0.461, "step": 1300 }, { "epoch": 1.3738856843209228, "grad_norm": 1.715926192038861, "learning_rate": 5.525789639955377e-06, "log_odds_chosen": 2.0803933143615723, "log_odds_ratio": -0.21633043885231018, "logits/chosen": -2.926987409591675, "logits/rejected": -2.9622962474823, "logps/chosen": -0.43519288301467896, "logps/rejected": -1.6533997058868408, "loss": 0.2713, "nll_loss": 0.26452213525772095, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.02175964042544365, "rewards/margins": 0.06091034412384033, "rewards/rejected": -0.08266998082399368, "step": 1310 }, { "epoch": 1.3843733613004718, "grad_norm": 2.0174814570503012, "learning_rate": 5.504818825631804e-06, "log_odds_chosen": 2.108902931213379, "log_odds_ratio": -0.1835678517818451, "logits/chosen": -2.96756911277771, "logits/rejected": -2.9531686305999756, "logps/chosen": -0.3781605362892151, "logps/rejected": -1.4976880550384521, "loss": 0.267, "nll_loss": 0.25148090720176697, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.018908025696873665, "rewards/margins": 0.055976372212171555, "rewards/rejected": -0.07488439977169037, "step": 1320 }, { "epoch": 1.394861038280021, "grad_norm": 2.317364085817375, "learning_rate": 5.484084971070817e-06, "log_odds_chosen": 1.9238555431365967, "log_odds_ratio": -0.2074807584285736, "logits/chosen": -2.923131227493286, "logits/rejected": -2.9520606994628906, "logps/chosen": -0.42446833848953247, "logps/rejected": -1.4086004495620728, "loss": 0.2852, "nll_loss": 0.28959181904792786, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.021223418414592743, "rewards/margins": 0.049206603318452835, "rewards/rejected": -0.07043002545833588, "step": 1330 }, { "epoch": 1.40534871525957, "grad_norm": 2.165975215343917, "learning_rate": 5.4635836470815305e-06, "log_odds_chosen": 1.8837333917617798, "log_odds_ratio": -0.21855314075946808, "logits/chosen": -2.9127135276794434, "logits/rejected": -2.9249043464660645, "logps/chosen": -0.41960373520851135, "logps/rejected": -1.4260175228118896, "loss": 0.2787, "nll_loss": 0.25244617462158203, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.020980186760425568, "rewards/margins": 0.050320692360401154, "rewards/rejected": -0.07130087912082672, "step": 1340 }, { "epoch": 1.415836392239119, "grad_norm": 1.9224928940953034, "learning_rate": 5.443310539518174e-06, "log_odds_chosen": 2.056159734725952, "log_odds_ratio": -0.19483168423175812, "logits/chosen": -2.956674814224243, "logits/rejected": -2.9572062492370605, "logps/chosen": -0.4208443760871887, "logps/rejected": -1.5285457372665405, "loss": 0.2822, "nll_loss": 0.26951080560684204, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.021042218431830406, "rewards/margins": 0.05538507178425789, "rewards/rejected": -0.07642728835344315, "step": 1350 }, { "epoch": 1.426324069218668, "grad_norm": 2.0115204434239025, "learning_rate": 5.423261445466404e-06, "log_odds_chosen": 1.707457184791565, "log_odds_ratio": -0.2479782998561859, "logits/chosen": -2.915250301361084, "logits/rejected": -2.9445343017578125, "logps/chosen": -0.4267791211605072, "logps/rejected": -1.3377535343170166, "loss": 0.2925, "nll_loss": 0.29825955629348755, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.02133895456790924, "rewards/margins": 0.04554871469736099, "rewards/rejected": -0.06688766926527023, "step": 1360 }, { "epoch": 1.436811746198217, "grad_norm": 2.0083912520624234, "learning_rate": 5.403432269582992e-06, "log_odds_chosen": 1.7433815002441406, "log_odds_ratio": -0.23284384608268738, "logits/chosen": -2.9682974815368652, "logits/rejected": -2.9809725284576416, "logps/chosen": -0.4545938968658447, "logps/rejected": -1.3821640014648438, "loss": 0.2995, "nll_loss": 0.2861328721046448, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.022729698568582535, "rewards/margins": 0.04637850075960159, "rewards/rejected": -0.06910820305347443, "step": 1370 }, { "epoch": 1.447299423177766, "grad_norm": 1.918494069287167, "learning_rate": 5.383819020581656e-06, "log_odds_chosen": 1.839255690574646, "log_odds_ratio": -0.22518055140972137, "logits/chosen": -2.9555628299713135, "logits/rejected": -2.968390703201294, "logps/chosen": -0.4370731711387634, "logps/rejected": -1.4699593782424927, "loss": 0.2859, "nll_loss": 0.28876128792762756, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.021853657439351082, "rewards/margins": 0.051644302904605865, "rewards/rejected": -0.0734979659318924, "step": 1380 }, { "epoch": 1.457787100157315, "grad_norm": 1.8701436058229068, "learning_rate": 5.364417807858201e-06, "log_odds_chosen": 2.0006766319274902, "log_odds_ratio": -0.19503512978553772, "logits/chosen": -2.9456233978271484, "logits/rejected": -2.9416487216949463, "logps/chosen": -0.397217720746994, "logps/rejected": -1.458070993423462, "loss": 0.2898, "nll_loss": 0.2990682125091553, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.01986088417470455, "rewards/margins": 0.05304265767335892, "rewards/rejected": -0.07290354371070862, "step": 1390 }, { "epoch": 1.4682747771368643, "grad_norm": 1.8947645182805886, "learning_rate": 5.345224838248489e-06, "log_odds_chosen": 1.9478137493133545, "log_odds_ratio": -0.22849062085151672, "logits/chosen": -2.9488446712493896, "logits/rejected": -2.980994462966919, "logps/chosen": -0.38306254148483276, "logps/rejected": -1.40244460105896, "loss": 0.2784, "nll_loss": 0.27079683542251587, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.019153129309415817, "rewards/margins": 0.05096910148859024, "rewards/rejected": -0.07012222707271576, "step": 1400 }, { "epoch": 1.4682747771368643, "eval_log_odds_chosen": 0.4410339295864105, "eval_log_odds_ratio": -0.6236060261726379, "eval_logits/chosen": -2.9594457149505615, "eval_logits/rejected": -2.9583115577697754, "eval_logps/chosen": -0.8884981274604797, "eval_logps/rejected": -1.1784039735794067, "eval_loss": 0.5364598631858826, "eval_nll_loss": 0.5036527514457703, "eval_rewards/accuracies": 0.6527777910232544, "eval_rewards/chosen": -0.044424910098314285, "eval_rewards/margins": 0.014495291747152805, "eval_rewards/rejected": -0.058920200914144516, "eval_runtime": 139.2595, "eval_samples_per_second": 14.319, "eval_steps_per_second": 0.452, "step": 1400 }, { "epoch": 1.4787624541164133, "grad_norm": 2.1665159464201142, "learning_rate": 5.326236412913075e-06, "log_odds_chosen": 1.7970411777496338, "log_odds_ratio": -0.2380552738904953, "logits/chosen": -2.9149088859558105, "logits/rejected": -2.9543135166168213, "logps/chosen": -0.4362480640411377, "logps/rejected": -1.3472230434417725, "loss": 0.29, "nll_loss": 0.2710421681404114, "rewards/accuracies": 0.96875, "rewards/chosen": -0.021812403574585915, "rewards/margins": 0.04554874822497368, "rewards/rejected": -0.06736114621162415, "step": 1410 }, { "epoch": 1.4892501310959623, "grad_norm": 2.196966160421767, "learning_rate": 5.307448924342753e-06, "log_odds_chosen": 1.8308820724487305, "log_odds_ratio": -0.21477296948432922, "logits/chosen": -2.877204179763794, "logits/rejected": -2.932901620864868, "logps/chosen": -0.4031652510166168, "logps/rejected": -1.3179484605789185, "loss": 0.2855, "nll_loss": 0.2783321738243103, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.02015826478600502, "rewards/margins": 0.04573915898799896, "rewards/rejected": -0.06589742004871368, "step": 1420 }, { "epoch": 1.4997378080755113, "grad_norm": 2.1884907491879084, "learning_rate": 5.28885885347945e-06, "log_odds_chosen": 1.9711707830429077, "log_odds_ratio": -0.20648148655891418, "logits/chosen": -2.954136371612549, "logits/rejected": -2.9814727306365967, "logps/chosen": -0.41374531388282776, "logps/rejected": -1.4304702281951904, "loss": 0.2924, "nll_loss": 0.27289509773254395, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.020687269046902657, "rewards/margins": 0.0508362352848053, "rewards/rejected": -0.071523517370224, "step": 1430 }, { "epoch": 1.5102254850550603, "grad_norm": 2.124176001387226, "learning_rate": 5.270462766947299e-06, "log_odds_chosen": 1.7731349468231201, "log_odds_ratio": -0.2392440289258957, "logits/chosen": -2.9405388832092285, "logits/rejected": -2.9464943408966064, "logps/chosen": -0.4539235234260559, "logps/rejected": -1.403793454170227, "loss": 0.2961, "nll_loss": 0.2940642237663269, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.022696174681186676, "rewards/margins": 0.04749349504709244, "rewards/rejected": -0.07018966972827911, "step": 1440 }, { "epoch": 1.5207131620346095, "grad_norm": 1.8197825407446042, "learning_rate": 5.252257314388902e-06, "log_odds_chosen": 1.7956994771957397, "log_odds_ratio": -0.22454524040222168, "logits/chosen": -2.954716444015503, "logits/rejected": -2.978447437286377, "logps/chosen": -0.4430459439754486, "logps/rejected": -1.4194531440734863, "loss": 0.2777, "nll_loss": 0.24652138352394104, "rewards/accuracies": 0.96875, "rewards/chosen": -0.02215229719877243, "rewards/margins": 0.048820365220308304, "rewards/rejected": -0.07097266614437103, "step": 1450 }, { "epoch": 1.5312008390141583, "grad_norm": 2.1915818543360355, "learning_rate": 5.234239225902137e-06, "log_odds_chosen": 1.9382715225219727, "log_odds_ratio": -0.1963178515434265, "logits/chosen": -2.8938894271850586, "logits/rejected": -2.924325466156006, "logps/chosen": -0.39880725741386414, "logps/rejected": -1.4752063751220703, "loss": 0.2971, "nll_loss": 0.2676003575325012, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.019940361380577087, "rewards/margins": 0.05381995439529419, "rewards/rejected": -0.07376032322645187, "step": 1460 }, { "epoch": 1.5416885159937075, "grad_norm": 2.1118618734250307, "learning_rate": 5.216405309573011e-06, "log_odds_chosen": 1.9139398336410522, "log_odds_ratio": -0.19271975755691528, "logits/chosen": -3.0117218494415283, "logits/rejected": -3.0411810874938965, "logps/chosen": -0.42149630188941956, "logps/rejected": -1.471760869026184, "loss": 0.2889, "nll_loss": 0.27934783697128296, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.0210748128592968, "rewards/margins": 0.052513234317302704, "rewards/rejected": -0.0735880434513092, "step": 1470 }, { "epoch": 1.5521761929732563, "grad_norm": 2.0510895547316745, "learning_rate": 5.198752449100364e-06, "log_odds_chosen": 2.0376482009887695, "log_odds_ratio": -0.19703765213489532, "logits/chosen": -3.009754180908203, "logits/rejected": -3.016758441925049, "logps/chosen": -0.40712347626686096, "logps/rejected": -1.459837555885315, "loss": 0.2888, "nll_loss": 0.3001149892807007, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.020356174558401108, "rewards/margins": 0.05263570696115494, "rewards/rejected": -0.07299187034368515, "step": 1480 }, { "epoch": 1.5626638699528055, "grad_norm": 2.1669568438399684, "learning_rate": 5.181277601508398e-06, "log_odds_chosen": 1.8304507732391357, "log_odds_ratio": -0.2394884079694748, "logits/chosen": -2.9779343605041504, "logits/rejected": -3.008795738220215, "logps/chosen": -0.4576667249202728, "logps/rejected": -1.4601542949676514, "loss": 0.2888, "nll_loss": 0.29476073384284973, "rewards/accuracies": 0.9375, "rewards/chosen": -0.0228833369910717, "rewards/margins": 0.05012437701225281, "rewards/rejected": -0.07300771772861481, "step": 1490 }, { "epoch": 1.5731515469323545, "grad_norm": 2.372050874462119, "learning_rate": 5.163977794943223e-06, "log_odds_chosen": 1.9750179052352905, "log_odds_ratio": -0.19530083239078522, "logits/chosen": -2.9395532608032227, "logits/rejected": -2.991283893585205, "logps/chosen": -0.42392611503601074, "logps/rejected": -1.5091795921325684, "loss": 0.2873, "nll_loss": 0.2818702757358551, "rewards/accuracies": 0.96875, "rewards/chosen": -0.021196305751800537, "rewards/margins": 0.0542626678943634, "rewards/rejected": -0.07545898109674454, "step": 1500 }, { "epoch": 1.5731515469323545, "eval_log_odds_chosen": 0.4364486038684845, "eval_log_odds_ratio": -0.6225508451461792, "eval_logits/chosen": -2.965731382369995, "eval_logits/rejected": -2.966355323791504, "eval_logps/chosen": -0.8718044757843018, "eval_logps/rejected": -1.158449649810791, "eval_loss": 0.53301602602005, "eval_nll_loss": 0.5004281997680664, "eval_rewards/accuracies": 0.6448412537574768, "eval_rewards/chosen": -0.043590229004621506, "eval_rewards/margins": 0.014332256279885769, "eval_rewards/rejected": -0.05792247876524925, "eval_runtime": 139.8515, "eval_samples_per_second": 14.258, "eval_steps_per_second": 0.45, "step": 1500 }, { "epoch": 1.5836392239119035, "grad_norm": 1.9123802783189798, "learning_rate": 5.146850126549788e-06, "log_odds_chosen": 1.6361440420150757, "log_odds_ratio": -0.26433151960372925, "logits/chosen": -2.943331003189087, "logits/rejected": -2.9721503257751465, "logps/chosen": -0.44553548097610474, "logps/rejected": -1.2933813333511353, "loss": 0.3044, "nll_loss": 0.2870228588581085, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.022276774048805237, "rewards/margins": 0.042392291128635406, "rewards/rejected": -0.06466906517744064, "step": 1510 }, { "epoch": 1.5941269008914527, "grad_norm": 1.9978617693896288, "learning_rate": 5.129891760425772e-06, "log_odds_chosen": 1.872454285621643, "log_odds_ratio": -0.21693451702594757, "logits/chosen": -2.9198169708251953, "logits/rejected": -2.9594712257385254, "logps/chosen": -0.4238964915275574, "logps/rejected": -1.4147742986679077, "loss": 0.2765, "nll_loss": 0.2593707740306854, "rewards/accuracies": 0.9375, "rewards/chosen": -0.021194826811552048, "rewards/margins": 0.049543894827365875, "rewards/rejected": -0.07073871791362762, "step": 1520 }, { "epoch": 1.6046145778710015, "grad_norm": 2.2358254561438966, "learning_rate": 5.113099925649136e-06, "log_odds_chosen": 1.7420718669891357, "log_odds_ratio": -0.2600535750389099, "logits/chosen": -2.9620399475097656, "logits/rejected": -2.997101068496704, "logps/chosen": -0.4705958366394043, "logps/rejected": -1.435579538345337, "loss": 0.2766, "nll_loss": 0.28323301672935486, "rewards/accuracies": 0.96875, "rewards/chosen": -0.023529794067144394, "rewards/margins": 0.048249177634716034, "rewards/rejected": -0.07177898287773132, "step": 1530 }, { "epoch": 1.6151022548505507, "grad_norm": 2.123071067312132, "learning_rate": 5.096471914376255e-06, "log_odds_chosen": 2.0446419715881348, "log_odds_ratio": -0.20973734557628632, "logits/chosen": -2.8849668502807617, "logits/rejected": -2.91094970703125, "logps/chosen": -0.42269793152809143, "logps/rejected": -1.4985077381134033, "loss": 0.2842, "nll_loss": 0.24874058365821838, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.021134894341230392, "rewards/margins": 0.053790487349033356, "rewards/rejected": -0.07492538541555405, "step": 1540 }, { "epoch": 1.6255899318300995, "grad_norm": 1.8574119456068037, "learning_rate": 5.08000508000762e-06, "log_odds_chosen": 1.8896774053573608, "log_odds_ratio": -0.2109728306531906, "logits/chosen": -2.9518914222717285, "logits/rejected": -2.9677398204803467, "logps/chosen": -0.42254775762557983, "logps/rejected": -1.4004069566726685, "loss": 0.2737, "nll_loss": 0.26676517724990845, "rewards/accuracies": 0.96875, "rewards/chosen": -0.02112739160656929, "rewards/margins": 0.04889295622706413, "rewards/rejected": -0.07002034783363342, "step": 1550 }, { "epoch": 1.6360776088096487, "grad_norm": 2.012947859419835, "learning_rate": 5.0636968354183334e-06, "log_odds_chosen": 1.7877776622772217, "log_odds_ratio": -0.2195170670747757, "logits/chosen": -2.916713237762451, "logits/rejected": -2.9442696571350098, "logps/chosen": -0.4229874610900879, "logps/rejected": -1.3620960712432861, "loss": 0.2937, "nll_loss": 0.28985968232154846, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.021149372681975365, "rewards/margins": 0.046955425292253494, "rewards/rejected": -0.06810478866100311, "step": 1560 }, { "epoch": 1.6465652857891977, "grad_norm": 1.9554610757973563, "learning_rate": 5.047544651250688e-06, "log_odds_chosen": 1.9977741241455078, "log_odds_ratio": -0.22808516025543213, "logits/chosen": -2.95414137840271, "logits/rejected": -2.9667911529541016, "logps/chosen": -0.40563470125198364, "logps/rejected": -1.493981122970581, "loss": 0.2746, "nll_loss": 0.25610029697418213, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.020281735807657242, "rewards/margins": 0.05441732332110405, "rewards/rejected": -0.07469905912876129, "step": 1570 }, { "epoch": 1.6570529627687467, "grad_norm": 2.2417227837369094, "learning_rate": 5.031546054266276e-06, "log_odds_chosen": 1.8591692447662354, "log_odds_ratio": -0.23143061995506287, "logits/chosen": -3.0023272037506104, "logits/rejected": -3.0128941535949707, "logps/chosen": -0.46788668632507324, "logps/rejected": -1.490392804145813, "loss": 0.2962, "nll_loss": 0.31111472845077515, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.02339433692395687, "rewards/margins": 0.05112530663609505, "rewards/rejected": -0.07451964914798737, "step": 1580 }, { "epoch": 1.667540639748296, "grad_norm": 2.0152925811378846, "learning_rate": 5.015698625755192e-06, "log_odds_chosen": 1.9612891674041748, "log_odds_ratio": -0.22349119186401367, "logits/chosen": -2.9373695850372314, "logits/rejected": -2.9659922122955322, "logps/chosen": -0.40127071738243103, "logps/rejected": -1.4479907751083374, "loss": 0.2939, "nll_loss": 0.2725040912628174, "rewards/accuracies": 0.96875, "rewards/chosen": -0.02006353810429573, "rewards/margins": 0.05233600735664368, "rewards/rejected": -0.07239954173564911, "step": 1590 }, { "epoch": 1.6780283167278447, "grad_norm": 1.9355725247245243, "learning_rate": 5e-06, "log_odds_chosen": 1.8742882013320923, "log_odds_ratio": -0.21055075526237488, "logits/chosen": -2.9387471675872803, "logits/rejected": -2.9844515323638916, "logps/chosen": -0.43298736214637756, "logps/rejected": -1.4716593027114868, "loss": 0.276, "nll_loss": 0.26002392172813416, "rewards/accuracies": 0.96875, "rewards/chosen": -0.02164936624467373, "rewards/margins": 0.05193359777331352, "rewards/rejected": -0.0735829621553421, "step": 1600 }, { "epoch": 1.6780283167278447, "eval_log_odds_chosen": 0.4569767117500305, "eval_log_odds_ratio": -0.6159732937812805, "eval_logits/chosen": -2.932406187057495, "eval_logits/rejected": -2.9357593059539795, "eval_logps/chosen": -0.8832988142967224, "eval_logps/rejected": -1.1878604888916016, "eval_loss": 0.5367424488067627, "eval_nll_loss": 0.5040929913520813, "eval_rewards/accuracies": 0.6408730149269104, "eval_rewards/chosen": -0.04416494444012642, "eval_rewards/margins": 0.015228085219860077, "eval_rewards/rejected": -0.0593930259346962, "eval_runtime": 138.0302, "eval_samples_per_second": 14.446, "eval_steps_per_second": 0.456, "step": 1600 }, { "epoch": 1.688515993707394, "grad_norm": 1.9448584897613828, "learning_rate": 4.984447862792268e-06, "log_odds_chosen": 2.0258474349975586, "log_odds_ratio": -0.2537488639354706, "logits/chosen": -2.9370341300964355, "logits/rejected": -2.959137439727783, "logps/chosen": -0.4205976128578186, "logps/rejected": -1.51674485206604, "loss": 0.2805, "nll_loss": 0.2590489387512207, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.0210298802703619, "rewards/margins": 0.054807353764772415, "rewards/rejected": -0.07583723217248917, "step": 1610 }, { "epoch": 1.6990036706869427, "grad_norm": 2.2985078763398503, "learning_rate": 4.969039949999534e-06, "log_odds_chosen": 1.9926655292510986, "log_odds_ratio": -0.209347203373909, "logits/chosen": -2.9543755054473877, "logits/rejected": -2.979072093963623, "logps/chosen": -0.4242986738681793, "logps/rejected": -1.527527093887329, "loss": 0.2829, "nll_loss": 0.28810399770736694, "rewards/accuracies": 0.96875, "rewards/chosen": -0.021214932203292847, "rewards/margins": 0.05516142398118973, "rewards/rejected": -0.07637635618448257, "step": 1620 }, { "epoch": 1.709491347666492, "grad_norm": 1.978508364107179, "learning_rate": 4.9537740461807e-06, "log_odds_chosen": 1.7989534139633179, "log_odds_ratio": -0.22280922532081604, "logits/chosen": -2.9272611141204834, "logits/rejected": -2.933403968811035, "logps/chosen": -0.4125545024871826, "logps/rejected": -1.371010422706604, "loss": 0.2723, "nll_loss": 0.27273207902908325, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.02062772400677204, "rewards/margins": 0.04792279377579689, "rewards/rejected": -0.06855051219463348, "step": 1630 }, { "epoch": 1.719979024646041, "grad_norm": 2.5294696595366375, "learning_rate": 4.938647983247949e-06, "log_odds_chosen": 1.8762280941009521, "log_odds_ratio": -0.23052379488945007, "logits/chosen": -2.9176859855651855, "logits/rejected": -2.937653064727783, "logps/chosen": -0.4308241307735443, "logps/rejected": -1.4621460437774658, "loss": 0.2707, "nll_loss": 0.24837055802345276, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.021541204303503036, "rewards/margins": 0.05156610533595085, "rewards/rejected": -0.07310730963945389, "step": 1640 }, { "epoch": 1.73046670162559, "grad_norm": 1.9845638290615137, "learning_rate": 4.9236596391733095e-06, "log_odds_chosen": 1.9353539943695068, "log_odds_ratio": -0.22219491004943848, "logits/chosen": -2.9324100017547607, "logits/rejected": -2.9492199420928955, "logps/chosen": -0.4047132134437561, "logps/rejected": -1.447388768196106, "loss": 0.2921, "nll_loss": 0.2786787152290344, "rewards/accuracies": 0.96875, "rewards/chosen": -0.020235659554600716, "rewards/margins": 0.05213377624750137, "rewards/rejected": -0.07236944139003754, "step": 1650 }, { "epoch": 1.740954378605139, "grad_norm": 2.1313335783196914, "learning_rate": 4.9088069367381605e-06, "log_odds_chosen": 1.9517314434051514, "log_odds_ratio": -0.19579176604747772, "logits/chosen": -2.9807212352752686, "logits/rejected": -3.004951000213623, "logps/chosen": -0.4060528874397278, "logps/rejected": -1.4121928215026855, "loss": 0.2851, "nll_loss": 0.27768373489379883, "rewards/accuracies": 0.96875, "rewards/chosen": -0.020302647724747658, "rewards/margins": 0.05030699446797371, "rewards/rejected": -0.07060963660478592, "step": 1660 }, { "epoch": 1.751442055584688, "grad_norm": 1.893515732849545, "learning_rate": 4.894087842323964e-06, "log_odds_chosen": 1.8834346532821655, "log_odds_ratio": -0.20945528149604797, "logits/chosen": -2.9691452980041504, "logits/rejected": -3.0074009895324707, "logps/chosen": -0.4027465283870697, "logps/rejected": -1.374361276626587, "loss": 0.2926, "nll_loss": 0.26718848943710327, "rewards/accuracies": 0.96875, "rewards/chosen": -0.020137326791882515, "rewards/margins": 0.04858074709773064, "rewards/rejected": -0.0687180757522583, "step": 1670 }, { "epoch": 1.7619297325642371, "grad_norm": 2.0915190498544263, "learning_rate": 4.8795003647426654e-06, "log_odds_chosen": 1.8165385723114014, "log_odds_ratio": -0.21812555193901062, "logits/chosen": -3.0662589073181152, "logits/rejected": -3.089877128601074, "logps/chosen": -0.40138545632362366, "logps/rejected": -1.3200931549072266, "loss": 0.2998, "nll_loss": 0.29331129789352417, "rewards/accuracies": 0.96875, "rewards/chosen": -0.020069271326065063, "rewards/margins": 0.04593539237976074, "rewards/rejected": -0.06600465625524521, "step": 1680 }, { "epoch": 1.772417409543786, "grad_norm": 2.1457501870245417, "learning_rate": 4.865042554105199e-06, "log_odds_chosen": 1.869539499282837, "log_odds_ratio": -0.2280159890651703, "logits/chosen": -2.991488456726074, "logits/rejected": -2.98630690574646, "logps/chosen": -0.4090718626976013, "logps/rejected": -1.36448073387146, "loss": 0.2858, "nll_loss": 0.2776942253112793, "rewards/accuracies": 0.9375, "rewards/chosen": -0.020453594624996185, "rewards/margins": 0.04777044430375099, "rewards/rejected": -0.06822402775287628, "step": 1690 }, { "epoch": 1.7829050865233351, "grad_norm": 2.3665022543070093, "learning_rate": 4.850712500726659e-06, "log_odds_chosen": 1.9791815280914307, "log_odds_ratio": -0.19878429174423218, "logits/chosen": -2.9824297428131104, "logits/rejected": -3.022101640701294, "logps/chosen": -0.4144412875175476, "logps/rejected": -1.4597278833389282, "loss": 0.2715, "nll_loss": 0.28446242213249207, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.02072206512093544, "rewards/margins": 0.052264340221881866, "rewards/rejected": -0.07298640161752701, "step": 1700 }, { "epoch": 1.7829050865233351, "eval_log_odds_chosen": 0.4425116777420044, "eval_log_odds_ratio": -0.6271889209747314, "eval_logits/chosen": -3.019425392150879, "eval_logits/rejected": -3.020922899246216, "eval_logps/chosen": -0.8710321187973022, "eval_logps/rejected": -1.1603412628173828, "eval_loss": 0.5348805785179138, "eval_nll_loss": 0.5024282336235046, "eval_rewards/accuracies": 0.6448412537574768, "eval_rewards/chosen": -0.04355160519480705, "eval_rewards/margins": 0.014465462416410446, "eval_rewards/rejected": -0.0580170638859272, "eval_runtime": 136.3216, "eval_samples_per_second": 14.627, "eval_steps_per_second": 0.462, "step": 1700 }, { "epoch": 1.7933927635028841, "grad_norm": 1.847904822728325, "learning_rate": 4.836508334066745e-06, "log_odds_chosen": 1.9795688390731812, "log_odds_ratio": -0.2207694798707962, "logits/chosen": -3.0054497718811035, "logits/rejected": -3.0154829025268555, "logps/chosen": -0.4081927239894867, "logps/rejected": -1.4390795230865479, "loss": 0.264, "nll_loss": 0.24716749787330627, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.020409639924764633, "rewards/margins": 0.051544345915317535, "rewards/rejected": -0.07195398211479187, "step": 1710 }, { "epoch": 1.8038804404824331, "grad_norm": 1.7750027737169987, "learning_rate": 4.822428221704122e-06, "log_odds_chosen": 1.926945686340332, "log_odds_ratio": -0.22434870898723602, "logits/chosen": -3.0268912315368652, "logits/rejected": -3.035226583480835, "logps/chosen": -0.43201422691345215, "logps/rejected": -1.498827576637268, "loss": 0.2864, "nll_loss": 0.25820285081863403, "rewards/accuracies": 0.9375, "rewards/chosen": -0.021600713953375816, "rewards/margins": 0.053340665996074677, "rewards/rejected": -0.07494138181209564, "step": 1720 }, { "epoch": 1.8143681174619821, "grad_norm": 2.0662716537028354, "learning_rate": 4.8084703683434506e-06, "log_odds_chosen": 1.974784255027771, "log_odds_ratio": -0.21157677471637726, "logits/chosen": -3.010627031326294, "logits/rejected": -2.9982268810272217, "logps/chosen": -0.4355824589729309, "logps/rejected": -1.5232689380645752, "loss": 0.2903, "nll_loss": 0.2755037248134613, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.021779123693704605, "rewards/margins": 0.05438433215022087, "rewards/rejected": -0.07616344839334488, "step": 1730 }, { "epoch": 1.8248557944415311, "grad_norm": 2.1360074988574445, "learning_rate": 4.794633014853843e-06, "log_odds_chosen": 1.847333312034607, "log_odds_ratio": -0.2377551794052124, "logits/chosen": -3.006833553314209, "logits/rejected": -3.0122854709625244, "logps/chosen": -0.4366019368171692, "logps/rejected": -1.4164003133773804, "loss": 0.304, "nll_loss": 0.29017573595046997, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.02183009497821331, "rewards/margins": 0.0489899218082428, "rewards/rejected": -0.07082001864910126, "step": 1740 }, { "epoch": 1.8353434714210803, "grad_norm": 1.9891927691131213, "learning_rate": 4.780914437337575e-06, "log_odds_chosen": 1.8539154529571533, "log_odds_ratio": -0.23103201389312744, "logits/chosen": -2.9830121994018555, "logits/rejected": -2.9818801879882812, "logps/chosen": -0.4274306297302246, "logps/rejected": -1.4196858406066895, "loss": 0.2958, "nll_loss": 0.2937518060207367, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.02137153223156929, "rewards/margins": 0.049612756818532944, "rewards/rejected": -0.07098428905010223, "step": 1750 }, { "epoch": 1.8458311484006291, "grad_norm": 1.827588117065436, "learning_rate": 4.767312946227961e-06, "log_odds_chosen": 2.2149860858917236, "log_odds_ratio": -0.2075362503528595, "logits/chosen": -2.9530441761016846, "logits/rejected": -2.9839682579040527, "logps/chosen": -0.391355037689209, "logps/rejected": -1.6375446319580078, "loss": 0.2721, "nll_loss": 0.2694031000137329, "rewards/accuracies": 0.9375, "rewards/chosen": -0.01956775411963463, "rewards/margins": 0.06230948120355606, "rewards/rejected": -0.08187723159790039, "step": 1760 }, { "epoch": 1.8563188253801783, "grad_norm": 1.8203811521479276, "learning_rate": 4.7538268854152834e-06, "log_odds_chosen": 1.7995598316192627, "log_odds_ratio": -0.244699165225029, "logits/chosen": -3.011706829071045, "logits/rejected": -3.024837017059326, "logps/chosen": -0.4394347071647644, "logps/rejected": -1.4033676385879517, "loss": 0.2771, "nll_loss": 0.25858861207962036, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.02197173610329628, "rewards/margins": 0.04819665104150772, "rewards/rejected": -0.0701683908700943, "step": 1770 }, { "epoch": 1.8668065023597273, "grad_norm": 2.2623646165216313, "learning_rate": 4.740454631399773e-06, "log_odds_chosen": 1.962255835533142, "log_odds_ratio": -0.23438410460948944, "logits/chosen": -2.949073314666748, "logits/rejected": -2.989229202270508, "logps/chosen": -0.3985145688056946, "logps/rejected": -1.4544894695281982, "loss": 0.2941, "nll_loss": 0.29249390959739685, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.019925730302929878, "rewards/margins": 0.052798740565776825, "rewards/rejected": -0.07272447645664215, "step": 1780 }, { "epoch": 1.8772941793392763, "grad_norm": 2.5104520915032538, "learning_rate": 4.727194592470656e-06, "log_odds_chosen": 2.0800955295562744, "log_odds_ratio": -0.19981749355793, "logits/chosen": -2.9771628379821777, "logits/rejected": -3.0005829334259033, "logps/chosen": -0.42085084319114685, "logps/rejected": -1.603994607925415, "loss": 0.2844, "nll_loss": 0.2677140235900879, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.02104254439473152, "rewards/margins": 0.05915718153119087, "rewards/rejected": -0.08019973337650299, "step": 1790 }, { "epoch": 1.8877818563188253, "grad_norm": 2.077913541951449, "learning_rate": 4.714045207910318e-06, "log_odds_chosen": 2.1426799297332764, "log_odds_ratio": -0.18838170170783997, "logits/chosen": -2.950552463531494, "logits/rejected": -2.9804420471191406, "logps/chosen": -0.41320332884788513, "logps/rejected": -1.622671365737915, "loss": 0.2717, "nll_loss": 0.2544669210910797, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.020660167559981346, "rewards/margins": 0.060473401099443436, "rewards/rejected": -0.08113356679677963, "step": 1800 }, { "epoch": 1.8877818563188253, "eval_log_odds_chosen": 0.48237088322639465, "eval_log_odds_ratio": -0.6183955669403076, "eval_logits/chosen": -2.9562783241271973, "eval_logits/rejected": -2.957892894744873, "eval_logps/chosen": -0.8997318148612976, "eval_logps/rejected": -1.2325206995010376, "eval_loss": 0.5340895652770996, "eval_nll_loss": 0.5023403763771057, "eval_rewards/accuracies": 0.6547619104385376, "eval_rewards/chosen": -0.04498659446835518, "eval_rewards/margins": 0.01663944497704506, "eval_rewards/rejected": -0.06162603944540024, "eval_runtime": 136.1464, "eval_samples_per_second": 14.646, "eval_steps_per_second": 0.463, "step": 1800 }, { "epoch": 1.8982695332983743, "grad_norm": 1.896252578291677, "learning_rate": 4.701004947222685e-06, "log_odds_chosen": 2.0811541080474854, "log_odds_ratio": -0.20500631630420685, "logits/chosen": -3.000387668609619, "logits/rejected": -2.983591079711914, "logps/chosen": -0.4098430573940277, "logps/rejected": -1.608665108680725, "loss": 0.2794, "nll_loss": 0.25453388690948486, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.020492153242230415, "rewards/margins": 0.05994110181927681, "rewards/rejected": -0.08043324947357178, "step": 1810 }, { "epoch": 1.9087572102779236, "grad_norm": 2.019085371673625, "learning_rate": 4.688072309384955e-06, "log_odds_chosen": 2.0144619941711426, "log_odds_ratio": -0.2020682841539383, "logits/chosen": -2.9534127712249756, "logits/rejected": -2.9533755779266357, "logps/chosen": -0.3999931216239929, "logps/rejected": -1.4992988109588623, "loss": 0.2775, "nll_loss": 0.26274845004081726, "rewards/accuracies": 0.96875, "rewards/chosen": -0.019999656826257706, "rewards/margins": 0.054965294897556305, "rewards/rejected": -0.07496494799852371, "step": 1820 }, { "epoch": 1.9192448872574723, "grad_norm": 1.9263871107241788, "learning_rate": 4.675245822121844e-06, "log_odds_chosen": 2.0367493629455566, "log_odds_ratio": -0.20607483386993408, "logits/chosen": -2.9868836402893066, "logits/rejected": -3.000213861465454, "logps/chosen": -0.4244080185890198, "logps/rejected": -1.5761488676071167, "loss": 0.2923, "nll_loss": 0.2808459997177124, "rewards/accuracies": 0.96875, "rewards/chosen": -0.02122039906680584, "rewards/margins": 0.057587046176195145, "rewards/rejected": -0.07880743592977524, "step": 1830 }, { "epoch": 1.9297325642370216, "grad_norm": 2.1487838733941365, "learning_rate": 4.662524041201569e-06, "log_odds_chosen": 2.0472216606140137, "log_odds_ratio": -0.22086529433727264, "logits/chosen": -2.9925904273986816, "logits/rejected": -2.985816240310669, "logps/chosen": -0.4373515248298645, "logps/rejected": -1.5831472873687744, "loss": 0.2713, "nll_loss": 0.2551635801792145, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.021867576986551285, "rewards/margins": 0.057289790362119675, "rewards/rejected": -0.07915736734867096, "step": 1840 }, { "epoch": 1.9402202412165706, "grad_norm": 2.0463386352717112, "learning_rate": 4.649905549752772e-06, "log_odds_chosen": 2.1467113494873047, "log_odds_ratio": -0.21497011184692383, "logits/chosen": -2.938457727432251, "logits/rejected": -2.9367523193359375, "logps/chosen": -0.4192470610141754, "logps/rejected": -1.63271164894104, "loss": 0.2767, "nll_loss": 0.2981775999069214, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.02096235193312168, "rewards/margins": 0.06067322567105293, "rewards/rejected": -0.08163557946681976, "step": 1850 }, { "epoch": 1.9507079181961196, "grad_norm": 1.9930187660935812, "learning_rate": 4.6373889576016826e-06, "log_odds_chosen": 2.145296573638916, "log_odds_ratio": -0.19072812795639038, "logits/chosen": -2.9529764652252197, "logits/rejected": -2.960404634475708, "logps/chosen": -0.407731294631958, "logps/rejected": -1.5777407884597778, "loss": 0.2761, "nll_loss": 0.2852553129196167, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.02038656547665596, "rewards/margins": 0.05850047990679741, "rewards/rejected": -0.07888703793287277, "step": 1860 }, { "epoch": 1.9611955951756685, "grad_norm": 2.0042665222271756, "learning_rate": 4.624972900628803e-06, "log_odds_chosen": 2.0522494316101074, "log_odds_ratio": -0.20059652626514435, "logits/chosen": -2.932502269744873, "logits/rejected": -2.9307363033294678, "logps/chosen": -0.4203645586967468, "logps/rejected": -1.5539976358413696, "loss": 0.276, "nll_loss": 0.2738272547721863, "rewards/accuracies": 0.96875, "rewards/chosen": -0.02101822756230831, "rewards/margins": 0.05668165162205696, "rewards/rejected": -0.07769988477230072, "step": 1870 }, { "epoch": 1.9716832721552175, "grad_norm": 2.0226547316915258, "learning_rate": 4.6126560401444256e-06, "log_odds_chosen": 2.0710301399230957, "log_odds_ratio": -0.19392071664333344, "logits/chosen": -3.015066623687744, "logits/rejected": -2.99493145942688, "logps/chosen": -0.43072837591171265, "logps/rejected": -1.6065874099731445, "loss": 0.2748, "nll_loss": 0.2821330428123474, "rewards/accuracies": 0.96875, "rewards/chosen": -0.02153642103075981, "rewards/margins": 0.05879295617341995, "rewards/rejected": -0.08032937347888947, "step": 1880 }, { "epoch": 1.9821709491347668, "grad_norm": 2.567857697275732, "learning_rate": 4.600437062282362e-06, "log_odds_chosen": 1.9227994680404663, "log_odds_ratio": -0.2224545031785965, "logits/chosen": -3.0251965522766113, "logits/rejected": -2.993910789489746, "logps/chosen": -0.4456098675727844, "logps/rejected": -1.529626488685608, "loss": 0.2788, "nll_loss": 0.28787270188331604, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0222804956138134, "rewards/margins": 0.054200828075408936, "rewards/rejected": -0.07648131996393204, "step": 1890 }, { "epoch": 1.9926586261143155, "grad_norm": 2.1545883447921654, "learning_rate": 4.588314677411235e-06, "log_odds_chosen": 2.2162415981292725, "log_odds_ratio": -0.20383968949317932, "logits/chosen": -3.039658784866333, "logits/rejected": -3.022245407104492, "logps/chosen": -0.420427143573761, "logps/rejected": -1.6983455419540405, "loss": 0.2857, "nll_loss": 0.24534273147583008, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.02102135680615902, "rewards/margins": 0.06389592587947845, "rewards/rejected": -0.08491728454828262, "step": 1900 }, { "epoch": 1.9926586261143155, "eval_log_odds_chosen": 0.48923251032829285, "eval_log_odds_ratio": -0.6193312406539917, "eval_logits/chosen": -3.0350046157836914, "eval_logits/rejected": -3.0279133319854736, "eval_logps/chosen": -0.908783495426178, "eval_logps/rejected": -1.2409300804138184, "eval_loss": 0.5407980680465698, "eval_nll_loss": 0.5090586543083191, "eval_rewards/accuracies": 0.6547619104385376, "eval_rewards/chosen": -0.04543917626142502, "eval_rewards/margins": 0.016607332974672318, "eval_rewards/rejected": -0.062046512961387634, "eval_runtime": 137.1653, "eval_samples_per_second": 14.537, "eval_steps_per_second": 0.459, "step": 1900 }, { "epoch": 2.0031463030938648, "grad_norm": 2.4971175632899385, "learning_rate": 4.576287619562756e-06, "log_odds_chosen": 2.549215793609619, "log_odds_ratio": -0.13884183764457703, "logits/chosen": -3.0293986797332764, "logits/rejected": -3.0052542686462402, "logps/chosen": -0.3389069139957428, "logps/rejected": -1.6784775257110596, "loss": 0.2535, "nll_loss": 0.2399848997592926, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.01694534718990326, "rewards/margins": 0.06697852909564972, "rewards/rejected": -0.08392388373613358, "step": 1910 }, { "epoch": 2.0136339800734135, "grad_norm": 2.5031224034871475, "learning_rate": 4.564354645876385e-06, "log_odds_chosen": 4.333657741546631, "log_odds_ratio": -0.02762582339346409, "logits/chosen": -2.869049549102783, "logits/rejected": -2.8186068534851074, "logps/chosen": -0.1433320939540863, "logps/rejected": -2.334181547164917, "loss": 0.1236, "nll_loss": 0.11940746009349823, "rewards/accuracies": 1.0, "rewards/chosen": -0.007166605442762375, "rewards/margins": 0.10954247415065765, "rewards/rejected": -0.11670909076929092, "step": 1920 }, { "epoch": 2.0241216570529628, "grad_norm": 1.9586057770651872, "learning_rate": 4.552514536059854e-06, "log_odds_chosen": 3.8062407970428467, "log_odds_ratio": -0.0499381422996521, "logits/chosen": -2.9369876384735107, "logits/rejected": -2.963967800140381, "logps/chosen": -0.1607118844985962, "logps/rejected": -1.9827187061309814, "loss": 0.116, "nll_loss": 0.11325522512197495, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.00803559459745884, "rewards/margins": 0.09110033512115479, "rewards/rejected": -0.09913593530654907, "step": 1930 }, { "epoch": 2.034609334032512, "grad_norm": 2.173705177159571, "learning_rate": 4.540766091864998e-06, "log_odds_chosen": 3.9211831092834473, "log_odds_ratio": -0.03853369504213333, "logits/chosen": -2.848071575164795, "logits/rejected": -2.927175760269165, "logps/chosen": -0.14356736838817596, "logps/rejected": -1.959979772567749, "loss": 0.1167, "nll_loss": 0.11882974952459335, "rewards/accuracies": 1.0, "rewards/chosen": -0.007178368978202343, "rewards/margins": 0.09082063287496567, "rewards/rejected": -0.09799900650978088, "step": 1940 }, { "epoch": 2.0450970110120608, "grad_norm": 1.7557144572827617, "learning_rate": 4.529108136578383e-06, "log_odds_chosen": 4.060091495513916, "log_odds_ratio": -0.028795290738344193, "logits/chosen": -2.8138527870178223, "logits/rejected": -2.8606162071228027, "logps/chosen": -0.13301293551921844, "logps/rejected": -2.0062737464904785, "loss": 0.1151, "nll_loss": 0.1191815584897995, "rewards/accuracies": 1.0, "rewards/chosen": -0.006650646682828665, "rewards/margins": 0.09366302937269211, "rewards/rejected": -0.10031367838382721, "step": 1950 }, { "epoch": 2.05558468799161, "grad_norm": 1.69960315567237, "learning_rate": 4.517539514526257e-06, "log_odds_chosen": 4.352217674255371, "log_odds_ratio": -0.03757786005735397, "logits/chosen": -2.819655656814575, "logits/rejected": -2.8428378105163574, "logps/chosen": -0.14081783592700958, "logps/rejected": -2.33030104637146, "loss": 0.1135, "nll_loss": 0.11204487085342407, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007040892727673054, "rewards/margins": 0.10947415977716446, "rewards/rejected": -0.11651506274938583, "step": 1960 }, { "epoch": 2.0660723649711588, "grad_norm": 1.991621297994473, "learning_rate": 4.506059090593329e-06, "log_odds_chosen": 4.156961917877197, "log_odds_ratio": -0.0386335626244545, "logits/chosen": -2.8222968578338623, "logits/rejected": -2.880376100540161, "logps/chosen": -0.15631213784217834, "logps/rejected": -2.2803502082824707, "loss": 0.1083, "nll_loss": 0.11318318545818329, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007815606892108917, "rewards/margins": 0.1062019094824791, "rewards/rejected": -0.11401752382516861, "step": 1970 }, { "epoch": 2.076560041950708, "grad_norm": 1.8671392728507943, "learning_rate": 4.4946657497549474e-06, "log_odds_chosen": 4.751786708831787, "log_odds_ratio": -0.02287628874182701, "logits/chosen": -2.8250374794006348, "logits/rejected": -2.858389377593994, "logps/chosen": -0.136850968003273, "logps/rejected": -2.61843204498291, "loss": 0.1149, "nll_loss": 0.11261866241693497, "rewards/accuracies": 1.0, "rewards/chosen": -0.006842548493295908, "rewards/margins": 0.12407903373241425, "rewards/rejected": -0.13092158734798431, "step": 1980 }, { "epoch": 2.0870477189302568, "grad_norm": 2.047221073846021, "learning_rate": 4.483358396622204e-06, "log_odds_chosen": 4.551729202270508, "log_odds_ratio": -0.029045408591628075, "logits/chosen": -2.8212010860443115, "logits/rejected": -2.863682270050049, "logps/chosen": -0.13936151564121246, "logps/rejected": -2.4473021030426025, "loss": 0.1129, "nll_loss": 0.11166741698980331, "rewards/accuracies": 1.0, "rewards/chosen": -0.006968076340854168, "rewards/margins": 0.11539702117443085, "rewards/rejected": -0.12236510217189789, "step": 1990 }, { "epoch": 2.097535395909806, "grad_norm": 2.1099833794179723, "learning_rate": 4.47213595499958e-06, "log_odds_chosen": 4.558366298675537, "log_odds_ratio": -0.01906474307179451, "logits/chosen": -2.8424153327941895, "logits/rejected": -2.877136707305908, "logps/chosen": -0.14121726155281067, "logps/rejected": -2.4738833904266357, "loss": 0.1137, "nll_loss": 0.1110328808426857, "rewards/accuracies": 1.0, "rewards/chosen": -0.0070608630776405334, "rewards/margins": 0.11663329601287842, "rewards/rejected": -0.12369415909051895, "step": 2000 }, { "epoch": 2.097535395909806, "eval_log_odds_chosen": 0.5767443776130676, "eval_log_odds_ratio": -0.6272528171539307, "eval_logits/chosen": -2.87036395072937, "eval_logits/rejected": -2.881497383117676, "eval_logps/chosen": -1.2408413887023926, "eval_logps/rejected": -1.6761136054992676, "eval_loss": 0.6877180337905884, "eval_nll_loss": 0.6538823843002319, "eval_rewards/accuracies": 0.670634925365448, "eval_rewards/chosen": -0.06204206869006157, "eval_rewards/margins": 0.021763615310192108, "eval_rewards/rejected": -0.08380568027496338, "eval_runtime": 137.068, "eval_samples_per_second": 14.548, "eval_steps_per_second": 0.46, "step": 2000 }, { "epoch": 2.108023072889355, "grad_norm": 1.7758830781899906, "learning_rate": 4.4609973674547055e-06, "log_odds_chosen": 4.593904495239258, "log_odds_ratio": -0.033291045576334, "logits/chosen": -2.856330394744873, "logits/rejected": -2.8690733909606934, "logps/chosen": -0.1400183141231537, "logps/rejected": -2.536652088165283, "loss": 0.1039, "nll_loss": 0.10139288008213043, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007000915706157684, "rewards/margins": 0.11983168125152588, "rewards/rejected": -0.12683258950710297, "step": 2010 }, { "epoch": 2.118510749868904, "grad_norm": 2.6416736862275076, "learning_rate": 4.449941594899848e-06, "log_odds_chosen": 4.607335090637207, "log_odds_ratio": -0.028559138998389244, "logits/chosen": -2.7992746829986572, "logits/rejected": -2.8301546573638916, "logps/chosen": -0.14062660932540894, "logps/rejected": -2.5437684059143066, "loss": 0.1201, "nll_loss": 0.1216670423746109, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007031330373138189, "rewards/margins": 0.12015708535909653, "rewards/rejected": -0.12718841433525085, "step": 2020 }, { "epoch": 2.128998426848453, "grad_norm": 2.094070218470564, "learning_rate": 4.438967616184754e-06, "log_odds_chosen": 4.340805530548096, "log_odds_ratio": -0.027936171740293503, "logits/chosen": -2.823608875274658, "logits/rejected": -2.8253750801086426, "logps/chosen": -0.13957419991493225, "logps/rejected": -2.268900156021118, "loss": 0.1108, "nll_loss": 0.1126783937215805, "rewards/accuracies": 1.0, "rewards/chosen": -0.006978710647672415, "rewards/margins": 0.10646629333496094, "rewards/rejected": -0.11344502121210098, "step": 2030 }, { "epoch": 2.139486103828002, "grad_norm": 2.222098137194295, "learning_rate": 4.428074427700477e-06, "log_odds_chosen": 4.698141098022461, "log_odds_ratio": -0.02707051672041416, "logits/chosen": -2.8169960975646973, "logits/rejected": -2.8297157287597656, "logps/chosen": -0.1413937509059906, "logps/rejected": -2.65130877494812, "loss": 0.1166, "nll_loss": 0.11614535748958588, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007069687359035015, "rewards/margins": 0.1254957616329193, "rewards/rejected": -0.1325654536485672, "step": 2040 }, { "epoch": 2.149973780807551, "grad_norm": 2.1988466339750317, "learning_rate": 4.417261042993862e-06, "log_odds_chosen": 4.824273109436035, "log_odds_ratio": -0.022720973938703537, "logits/chosen": -2.8039610385894775, "logits/rejected": -2.795748710632324, "logps/chosen": -0.12069626152515411, "logps/rejected": -2.613525390625, "loss": 0.1113, "nll_loss": 0.10357411205768585, "rewards/accuracies": 1.0, "rewards/chosen": -0.006034812889993191, "rewards/margins": 0.12464147806167603, "rewards/rejected": -0.1306762993335724, "step": 2050 }, { "epoch": 2.1604614577871, "grad_norm": 1.9312492998690272, "learning_rate": 4.406526492392318e-06, "log_odds_chosen": 4.532221794128418, "log_odds_ratio": -0.025564473122358322, "logits/chosen": -2.856283664703369, "logits/rejected": -2.847923994064331, "logps/chosen": -0.15458881855010986, "logps/rejected": -2.556361198425293, "loss": 0.1171, "nll_loss": 0.1105358749628067, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007729442324489355, "rewards/margins": 0.1200886145234108, "rewards/rejected": -0.1278180480003357, "step": 2060 }, { "epoch": 2.170949134766649, "grad_norm": 2.184212774032157, "learning_rate": 4.39586982263858e-06, "log_odds_chosen": 4.760067462921143, "log_odds_ratio": -0.025417357683181763, "logits/chosen": -2.8176796436309814, "logits/rejected": -2.818103313446045, "logps/chosen": -0.15180301666259766, "logps/rejected": -2.774660110473633, "loss": 0.1148, "nll_loss": 0.11588319391012192, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007590149994939566, "rewards/margins": 0.13114285469055176, "rewards/rejected": -0.13873299956321716, "step": 2070 }, { "epoch": 2.1814368117461984, "grad_norm": 2.151555777196694, "learning_rate": 4.385290096535147e-06, "log_odds_chosen": 4.732907772064209, "log_odds_ratio": -0.026212304830551147, "logits/chosen": -2.859835147857666, "logits/rejected": -2.857645034790039, "logps/chosen": -0.13824030756950378, "logps/rejected": -2.6506001949310303, "loss": 0.1132, "nll_loss": 0.11115143448114395, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.006912014447152615, "rewards/margins": 0.12561801075935364, "rewards/rejected": -0.13253000378608704, "step": 2080 }, { "epoch": 2.191924488725747, "grad_norm": 3.2431795321399486, "learning_rate": 4.374786392598072e-06, "log_odds_chosen": 4.578325271606445, "log_odds_ratio": -0.03994257375597954, "logits/chosen": -2.8212687969207764, "logits/rejected": -2.7516632080078125, "logps/chosen": -0.1504596322774887, "logps/rejected": -2.5710039138793945, "loss": 0.1095, "nll_loss": 0.10720662772655487, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007522981613874435, "rewards/margins": 0.12102720886468887, "rewards/rejected": -0.128550186753273, "step": 2090 }, { "epoch": 2.2024121657052964, "grad_norm": 2.6693753745610076, "learning_rate": 4.364357804719848e-06, "log_odds_chosen": 4.707537651062012, "log_odds_ratio": -0.025204619392752647, "logits/chosen": -2.798999309539795, "logits/rejected": -2.794037342071533, "logps/chosen": -0.15521793067455292, "logps/rejected": -2.689946174621582, "loss": 0.1192, "nll_loss": 0.12550954520702362, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007760896347463131, "rewards/margins": 0.12673643231391907, "rewards/rejected": -0.13449731469154358, "step": 2100 }, { "epoch": 2.2024121657052964, "eval_log_odds_chosen": 0.6958096623420715, "eval_log_odds_ratio": -0.6209548115730286, "eval_logits/chosen": -2.837247610092163, "eval_logits/rejected": -2.8433148860931396, "eval_logps/chosen": -1.4121639728546143, "eval_logps/rejected": -1.9619879722595215, "eval_loss": 0.7576995491981506, "eval_nll_loss": 0.7199162244796753, "eval_rewards/accuracies": 0.6726190447807312, "eval_rewards/chosen": -0.07060819864273071, "eval_rewards/margins": 0.027491191402077675, "eval_rewards/rejected": -0.09809939563274384, "eval_runtime": 136.9058, "eval_samples_per_second": 14.565, "eval_steps_per_second": 0.46, "step": 2100 }, { "epoch": 2.212899842684845, "grad_norm": 1.7712476287108132, "learning_rate": 4.354003441841081e-06, "log_odds_chosen": 4.905824184417725, "log_odds_ratio": -0.02992095984518528, "logits/chosen": -2.8259618282318115, "logits/rejected": -2.760521650314331, "logps/chosen": -0.13811610639095306, "logps/rejected": -2.7983617782592773, "loss": 0.1173, "nll_loss": 0.12010955810546875, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.006905805319547653, "rewards/margins": 0.13301227986812592, "rewards/rejected": -0.13991808891296387, "step": 2110 }, { "epoch": 2.2233875196643944, "grad_norm": 1.6446106852737563, "learning_rate": 4.3437224276306945e-06, "log_odds_chosen": 4.906925201416016, "log_odds_ratio": -0.017224887385964394, "logits/chosen": -2.838736057281494, "logits/rejected": -2.8536746501922607, "logps/chosen": -0.16129423677921295, "logps/rejected": -2.8627591133117676, "loss": 0.1147, "nll_loss": 0.12654295563697815, "rewards/accuracies": 1.0, "rewards/chosen": -0.008064712397754192, "rewards/margins": 0.1350732445716858, "rewards/rejected": -0.14313796162605286, "step": 2120 }, { "epoch": 2.233875196643943, "grad_norm": 1.7769911595186116, "learning_rate": 4.333513900174396e-06, "log_odds_chosen": 4.821990966796875, "log_odds_ratio": -0.026227790862321854, "logits/chosen": -2.829463481903076, "logits/rejected": -2.842454433441162, "logps/chosen": -0.1390562653541565, "logps/rejected": -2.760815143585205, "loss": 0.1215, "nll_loss": 0.11114709079265594, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.0069528138265013695, "rewards/margins": 0.13108794391155243, "rewards/rejected": -0.13804076611995697, "step": 2130 }, { "epoch": 2.2443628736234924, "grad_norm": 2.186831361943043, "learning_rate": 4.32337701167117e-06, "log_odds_chosen": 5.350895881652832, "log_odds_ratio": -0.0246684979647398, "logits/chosen": -2.872166156768799, "logits/rejected": -2.8550028800964355, "logps/chosen": -0.13888207077980042, "logps/rejected": -3.2091636657714844, "loss": 0.1143, "nll_loss": 0.11629905551671982, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.006944102700799704, "rewards/margins": 0.1535140872001648, "rewards/rejected": -0.16045819222927094, "step": 2140 }, { "epoch": 2.2548505506030416, "grad_norm": 2.2764409350931345, "learning_rate": 4.313310928137537e-06, "log_odds_chosen": 4.80722713470459, "log_odds_ratio": -0.025547053664922714, "logits/chosen": -2.8291611671447754, "logits/rejected": -2.858245849609375, "logps/chosen": -0.15937599539756775, "logps/rejected": -2.8679497241973877, "loss": 0.1185, "nll_loss": 0.11574534326791763, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007968800142407417, "rewards/margins": 0.13542868196964264, "rewards/rejected": -0.1433974802494049, "step": 2150 }, { "epoch": 2.2653382275825904, "grad_norm": 2.239980255447614, "learning_rate": 4.303314829119352e-06, "log_odds_chosen": 5.589659690856934, "log_odds_ratio": -0.020419184118509293, "logits/chosen": -2.905287981033325, "logits/rejected": -2.966031551361084, "logps/chosen": -0.1542571783065796, "logps/rejected": -3.551201581954956, "loss": 0.1236, "nll_loss": 0.11697031557559967, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007712858729064465, "rewards/margins": 0.16984722018241882, "rewards/rejected": -0.17756007611751556, "step": 2160 }, { "epoch": 2.2758259045621396, "grad_norm": 2.009942820215124, "learning_rate": 4.293387907410919e-06, "log_odds_chosen": 6.170254707336426, "log_odds_ratio": -0.017188329249620438, "logits/chosen": -2.848698139190674, "logits/rejected": -2.945160388946533, "logps/chosen": -0.13800857961177826, "logps/rejected": -4.000069618225098, "loss": 0.1137, "nll_loss": 0.11105845123529434, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.006900429725646973, "rewards/margins": 0.19310306012630463, "rewards/rejected": -0.2000034749507904, "step": 2170 }, { "epoch": 2.2863135815416884, "grad_norm": 2.1918079846574567, "learning_rate": 4.2835293687811935e-06, "log_odds_chosen": 6.479376316070557, "log_odds_ratio": -0.010083029977977276, "logits/chosen": -2.7919399738311768, "logits/rejected": -2.9110770225524902, "logps/chosen": -0.1471458077430725, "logps/rejected": -4.402917385101318, "loss": 0.1149, "nll_loss": 0.12062163650989532, "rewards/accuracies": 1.0, "rewards/chosen": -0.007357291877269745, "rewards/margins": 0.21278861165046692, "rewards/rejected": -0.22014589607715607, "step": 2180 }, { "epoch": 2.2968012585212376, "grad_norm": 1.9268306821517742, "learning_rate": 4.273738431706883e-06, "log_odds_chosen": 6.724373817443848, "log_odds_ratio": -0.018149670213460922, "logits/chosen": -2.891892194747925, "logits/rejected": -3.004826784133911, "logps/chosen": -0.15707895159721375, "logps/rejected": -4.773315906524658, "loss": 0.1119, "nll_loss": 0.10733366012573242, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007853945717215538, "rewards/margins": 0.23081183433532715, "rewards/rejected": -0.23866574466228485, "step": 2190 }, { "epoch": 2.3072889355007864, "grad_norm": 1.9131867908425575, "learning_rate": 4.264014327112208e-06, "log_odds_chosen": 6.2542595863342285, "log_odds_ratio": -0.015775460749864578, "logits/chosen": -2.862001419067383, "logits/rejected": -2.91827654838562, "logps/chosen": -0.14461472630500793, "logps/rejected": -4.159193515777588, "loss": 0.1178, "nll_loss": 0.12322264909744263, "rewards/accuracies": 1.0, "rewards/chosen": -0.007230737246572971, "rewards/margins": 0.20072893798351288, "rewards/rejected": -0.20795968174934387, "step": 2200 }, { "epoch": 2.3072889355007864, "eval_log_odds_chosen": 1.1627599000930786, "eval_log_odds_ratio": -0.7777736783027649, "eval_logits/chosen": -2.887819766998291, "eval_logits/rejected": -2.9106638431549072, "eval_logps/chosen": -2.4108457565307617, "eval_logps/rejected": -3.4342026710510254, "eval_loss": 1.1761772632598877, "eval_nll_loss": 1.1196904182434082, "eval_rewards/accuracies": 0.6527777910232544, "eval_rewards/chosen": -0.12054230272769928, "eval_rewards/margins": 0.051167842000722885, "eval_rewards/rejected": -0.17171014845371246, "eval_runtime": 137.1423, "eval_samples_per_second": 14.54, "eval_steps_per_second": 0.459, "step": 2200 }, { "epoch": 2.3177766124803356, "grad_norm": 2.1121501905853624, "learning_rate": 4.254356298115171e-06, "log_odds_chosen": 6.363844394683838, "log_odds_ratio": -0.024754000827670097, "logits/chosen": -2.8908374309539795, "logits/rejected": -2.9566292762756348, "logps/chosen": -0.15381646156311035, "logps/rejected": -4.287047386169434, "loss": 0.1181, "nll_loss": 0.12711365520954132, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007690823636949062, "rewards/margins": 0.20666155219078064, "rewards/rejected": -0.21435236930847168, "step": 2210 }, { "epoch": 2.3282642894598844, "grad_norm": 3.84884286912148, "learning_rate": 4.24476359978009e-06, "log_odds_chosen": 5.530186176300049, "log_odds_ratio": -0.017865758389234543, "logits/chosen": -2.8787178993225098, "logits/rejected": -2.9533944129943848, "logps/chosen": -0.1436866670846939, "logps/rejected": -3.488823652267456, "loss": 0.1234, "nll_loss": 0.11815366894006729, "rewards/accuracies": 1.0, "rewards/chosen": -0.0071843331679701805, "rewards/margins": 0.16725686192512512, "rewards/rejected": -0.17444118857383728, "step": 2220 }, { "epoch": 2.3387519664394336, "grad_norm": 2.417106329176298, "learning_rate": 4.235235498876268e-06, "log_odds_chosen": 5.049867630004883, "log_odds_ratio": -0.030804011970758438, "logits/chosen": -2.8601975440979004, "logits/rejected": -2.919813632965088, "logps/chosen": -0.16016361117362976, "logps/rejected": -3.108591079711914, "loss": 0.1205, "nll_loss": 0.12257065623998642, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.008008181117475033, "rewards/margins": 0.14742138981819153, "rewards/rejected": -0.15542957186698914, "step": 2230 }, { "epoch": 2.349239643418983, "grad_norm": 2.0311020060176737, "learning_rate": 4.2257712736425835e-06, "log_odds_chosen": 6.287697792053223, "log_odds_ratio": -0.03303173556923866, "logits/chosen": -2.8431243896484375, "logits/rejected": -2.987511396408081, "logps/chosen": -0.15092086791992188, "logps/rejected": -4.205324649810791, "loss": 0.119, "nll_loss": 0.11937984079122543, "rewards/accuracies": 0.96875, "rewards/chosen": -0.0075460439547896385, "rewards/margins": 0.20272019505500793, "rewards/rejected": -0.21026620268821716, "step": 2240 }, { "epoch": 2.3597273203985316, "grad_norm": 1.8184108922544404, "learning_rate": 4.216370213557839e-06, "log_odds_chosen": 6.489804267883301, "log_odds_ratio": -0.017738422378897667, "logits/chosen": -2.8637566566467285, "logits/rejected": -2.9882349967956543, "logps/chosen": -0.1367037147283554, "logps/rejected": -4.3643412590026855, "loss": 0.1103, "nll_loss": 0.10625318437814713, "rewards/accuracies": 1.0, "rewards/chosen": -0.006835184991359711, "rewards/margins": 0.21138188242912292, "rewards/rejected": -0.21821708977222443, "step": 2250 }, { "epoch": 2.370214997378081, "grad_norm": 1.9927993897844196, "learning_rate": 4.207031619116713e-06, "log_odds_chosen": 6.5232744216918945, "log_odds_ratio": -0.02112133800983429, "logits/chosen": -2.888134002685547, "logits/rejected": -2.9766697883605957, "logps/chosen": -0.13985328376293182, "logps/rejected": -4.443106174468994, "loss": 0.1119, "nll_loss": 0.10387493669986725, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.006992665119469166, "rewards/margins": 0.21516263484954834, "rewards/rejected": -0.22215530276298523, "step": 2260 }, { "epoch": 2.3807026743576296, "grad_norm": 1.9179118979680037, "learning_rate": 4.197754801611136e-06, "log_odds_chosen": 7.000714302062988, "log_odds_ratio": -0.01941884122788906, "logits/chosen": -2.8880743980407715, "logits/rejected": -3.0280842781066895, "logps/chosen": -0.1594962626695633, "logps/rejected": -4.991673946380615, "loss": 0.1187, "nll_loss": 0.12734182178974152, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007974812760949135, "rewards/margins": 0.2416088581085205, "rewards/rejected": -0.2495836764574051, "step": 2270 }, { "epoch": 2.391190351337179, "grad_norm": 1.7656016453383905, "learning_rate": 4.188539082916955e-06, "log_odds_chosen": 5.81030797958374, "log_odds_ratio": -0.02714763581752777, "logits/chosen": -2.858682155609131, "logits/rejected": -2.961153030395508, "logps/chosen": -0.1495695412158966, "logps/rejected": -3.7413382530212402, "loss": 0.117, "nll_loss": 0.1129683405160904, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007478476967662573, "rewards/margins": 0.1795884370803833, "rewards/rejected": -0.18706689774990082, "step": 2280 }, { "epoch": 2.401678028316728, "grad_norm": 1.7721263332581463, "learning_rate": 4.179383795285729e-06, "log_odds_chosen": 6.099682807922363, "log_odds_ratio": -0.016452614217996597, "logits/chosen": -2.8671703338623047, "logits/rejected": -2.94566011428833, "logps/chosen": -0.1470957249403, "logps/rejected": -4.025435447692871, "loss": 0.1162, "nll_loss": 0.1030157208442688, "rewards/accuracies": 1.0, "rewards/chosen": -0.007354786153882742, "rewards/margins": 0.19391697645187378, "rewards/rejected": -0.20127174258232117, "step": 2290 }, { "epoch": 2.412165705296277, "grad_norm": 6.518126509500433, "learning_rate": 4.170288281141496e-06, "log_odds_chosen": 5.677874565124512, "log_odds_ratio": -0.02623058296740055, "logits/chosen": -2.8755476474761963, "logits/rejected": -2.926180362701416, "logps/chosen": -0.15929332375526428, "logps/rejected": -3.627763271331787, "loss": 0.1184, "nll_loss": 0.12096776813268661, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007964666932821274, "rewards/margins": 0.17342346906661987, "rewards/rejected": -0.18138816952705383, "step": 2300 }, { "epoch": 2.412165705296277, "eval_log_odds_chosen": 1.3232934474945068, "eval_log_odds_ratio": -1.0561914443969727, "eval_logits/chosen": -2.9102423191070557, "eval_logits/rejected": -2.9226319789886475, "eval_logps/chosen": -3.8695833683013916, "eval_logps/rejected": -5.081162452697754, "eval_loss": 1.8519541025161743, "eval_nll_loss": 1.7541913986206055, "eval_rewards/accuracies": 0.636904776096344, "eval_rewards/chosen": -0.19347918033599854, "eval_rewards/margins": 0.06057893857359886, "eval_rewards/rejected": -0.2540581226348877, "eval_runtime": 140.6912, "eval_samples_per_second": 14.173, "eval_steps_per_second": 0.448, "step": 2300 }, { "epoch": 2.422653382275826, "grad_norm": 2.1350280555835317, "learning_rate": 4.1612518928823956e-06, "log_odds_chosen": 5.239171028137207, "log_odds_ratio": -0.0356699600815773, "logits/chosen": -2.8127808570861816, "logits/rejected": -2.847365140914917, "logps/chosen": -0.17353428900241852, "logps/rejected": -3.4219677448272705, "loss": 0.1197, "nll_loss": 0.12273728847503662, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.008676714263856411, "rewards/margins": 0.16242167353630066, "rewards/rejected": -0.17109838128089905, "step": 2310 }, { "epoch": 2.433141059255375, "grad_norm": 2.142764154815985, "learning_rate": 4.1522739926869985e-06, "log_odds_chosen": 7.10500431060791, "log_odds_ratio": -0.02759629487991333, "logits/chosen": -2.8841793537139893, "logits/rejected": -2.979490280151367, "logps/chosen": -0.15857262909412384, "logps/rejected": -5.118218898773193, "loss": 0.1179, "nll_loss": 0.11995577812194824, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007928632199764252, "rewards/margins": 0.24798233807086945, "rewards/rejected": -0.2559109628200531, "step": 2320 }, { "epoch": 2.443628736234924, "grad_norm": 2.442748493026814, "learning_rate": 4.143353952325209e-06, "log_odds_chosen": 6.4824538230896, "log_odds_ratio": -0.03863966092467308, "logits/chosen": -2.8798575401306152, "logits/rejected": -2.975369691848755, "logps/chosen": -0.16273298859596252, "logps/rejected": -4.518317222595215, "loss": 0.1144, "nll_loss": 0.11924872547388077, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.008136649616062641, "rewards/margins": 0.21777920424938202, "rewards/rejected": -0.22591586410999298, "step": 2330 }, { "epoch": 2.454116413214473, "grad_norm": 1.7906952084031593, "learning_rate": 4.134491152973616e-06, "log_odds_chosen": 6.330552101135254, "log_odds_ratio": -0.019993215799331665, "logits/chosen": -2.903748035430908, "logits/rejected": -2.961629629135132, "logps/chosen": -0.1506245732307434, "logps/rejected": -4.29229736328125, "loss": 0.1162, "nll_loss": 0.11873211711645126, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.0075312284752726555, "rewards/margins": 0.20708362758159637, "rewards/rejected": -0.2146148979663849, "step": 2340 }, { "epoch": 2.464604090194022, "grad_norm": 2.709543224621687, "learning_rate": 4.125684985035174e-06, "log_odds_chosen": 6.674917697906494, "log_odds_ratio": -0.02191847935318947, "logits/chosen": -2.869702100753784, "logits/rejected": -2.9517292976379395, "logps/chosen": -0.14587149024009705, "logps/rejected": -4.594050407409668, "loss": 0.1189, "nll_loss": 0.11958177387714386, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007293573580682278, "rewards/margins": 0.2224089354276657, "rewards/rejected": -0.22970251739025116, "step": 2350 }, { "epoch": 2.475091767173571, "grad_norm": 1.9596617726605967, "learning_rate": 4.116934847963092e-06, "log_odds_chosen": 6.008196830749512, "log_odds_ratio": -0.020748203620314598, "logits/chosen": -2.859504222869873, "logits/rejected": -2.9086391925811768, "logps/chosen": -0.1603454202413559, "logps/rejected": -4.055342674255371, "loss": 0.1137, "nll_loss": 0.11717329174280167, "rewards/accuracies": 1.0, "rewards/chosen": -0.008017271757125854, "rewards/margins": 0.1947498619556427, "rewards/rejected": -0.20276716351509094, "step": 2360 }, { "epoch": 2.48557944415312, "grad_norm": 25.11227763431921, "learning_rate": 4.1082401500888055e-06, "log_odds_chosen": 6.279742240905762, "log_odds_ratio": -0.01569024845957756, "logits/chosen": -2.916944742202759, "logits/rejected": -2.987224578857422, "logps/chosen": -0.14050395786762238, "logps/rejected": -4.152866363525391, "loss": 0.1189, "nll_loss": 0.10722777992486954, "rewards/accuracies": 1.0, "rewards/chosen": -0.007025198079645634, "rewards/margins": 0.20061811804771423, "rewards/rejected": -0.2076433151960373, "step": 2370 }, { "epoch": 2.4960671211326693, "grad_norm": 1.757332945919827, "learning_rate": 4.099600308453939e-06, "log_odds_chosen": 6.39632511138916, "log_odds_ratio": -0.023090779781341553, "logits/chosen": -2.8743884563446045, "logits/rejected": -2.9668736457824707, "logps/chosen": -0.15729930996894836, "logps/rejected": -4.314006328582764, "loss": 0.1177, "nll_loss": 0.1209021583199501, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007864965125918388, "rewards/margins": 0.2078353613615036, "rewards/rejected": -0.21570034325122833, "step": 2380 }, { "epoch": 2.506554798112218, "grad_norm": 2.0524680636282056, "learning_rate": 4.091014748646132e-06, "log_odds_chosen": 5.9223713874816895, "log_odds_ratio": -0.030582841485738754, "logits/chosen": -2.8992161750793457, "logits/rejected": -2.929603099822998, "logps/chosen": -0.1705484390258789, "logps/rejected": -4.027953147888184, "loss": 0.1189, "nll_loss": 0.10802364349365234, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.008527422323822975, "rewards/margins": 0.19287024438381195, "rewards/rejected": -0.20139765739440918, "step": 2390 }, { "epoch": 2.5170424750917673, "grad_norm": 1.7245638696745784, "learning_rate": 4.082482904638631e-06, "log_odds_chosen": 6.324474811553955, "log_odds_ratio": -0.018949782475829124, "logits/chosen": -2.8749866485595703, "logits/rejected": -2.9224321842193604, "logps/chosen": -0.1520567536354065, "logps/rejected": -4.290619850158691, "loss": 0.1172, "nll_loss": 0.12284000217914581, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.00760283786803484, "rewards/margins": 0.20692817866802216, "rewards/rejected": -0.21453101933002472, "step": 2400 }, { "epoch": 2.5170424750917673, "eval_log_odds_chosen": 1.0075438022613525, "eval_log_odds_ratio": -0.8145382404327393, "eval_logits/chosen": -2.8560779094696045, "eval_logits/rejected": -2.871006965637207, "eval_logps/chosen": -2.0024044513702393, "eval_logps/rejected": -2.8670685291290283, "eval_loss": 1.01926589012146, "eval_nll_loss": 0.9735569357872009, "eval_rewards/accuracies": 0.6408730149269104, "eval_rewards/chosen": -0.10012022405862808, "eval_rewards/margins": 0.043233200907707214, "eval_rewards/rejected": -0.1433534324169159, "eval_runtime": 138.4847, "eval_samples_per_second": 14.399, "eval_steps_per_second": 0.455, "step": 2400 }, { "epoch": 2.527530152071316, "grad_norm": 2.140192470773612, "learning_rate": 4.074004218633553e-06, "log_odds_chosen": 6.169337272644043, "log_odds_ratio": -0.024398522451519966, "logits/chosen": -2.8802199363708496, "logits/rejected": -2.9575634002685547, "logps/chosen": -0.14228537678718567, "logps/rejected": -4.140218257904053, "loss": 0.1204, "nll_loss": 0.10762319713830948, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.0071142688393592834, "rewards/margins": 0.1998966485261917, "rewards/rejected": -0.20701093971729279, "step": 2410 }, { "epoch": 2.5380178290508653, "grad_norm": 1.9307036538867832, "learning_rate": 4.065578140908709e-06, "log_odds_chosen": 6.545037269592285, "log_odds_ratio": -0.020819999277591705, "logits/chosen": -2.826190948486328, "logits/rejected": -2.9180386066436768, "logps/chosen": -0.15343733131885529, "logps/rejected": -4.550530433654785, "loss": 0.1292, "nll_loss": 0.12483732402324677, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007671866565942764, "rewards/margins": 0.2198546677827835, "rewards/rejected": -0.22752651572227478, "step": 2420 }, { "epoch": 2.5485055060304145, "grad_norm": 2.472322893814309, "learning_rate": 4.057204129667897e-06, "log_odds_chosen": 6.510749816894531, "log_odds_ratio": -0.017572391778230667, "logits/chosen": -2.8476340770721436, "logits/rejected": -2.9206082820892334, "logps/chosen": -0.1623007208108902, "logps/rejected": -4.547110557556152, "loss": 0.114, "nll_loss": 0.11619551479816437, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.00811503641307354, "rewards/margins": 0.21924051642417908, "rewards/rejected": -0.22735556960105896, "step": 2430 }, { "epoch": 2.5589931830099633, "grad_norm": 3.562558849555077, "learning_rate": 4.048881650894581e-06, "log_odds_chosen": 7.486746311187744, "log_odds_ratio": -0.012338453903794289, "logits/chosen": -2.8392252922058105, "logits/rejected": -2.924240827560425, "logps/chosen": -0.15012109279632568, "logps/rejected": -5.4815144538879395, "loss": 0.1213, "nll_loss": 0.12608163058757782, "rewards/accuracies": 1.0, "rewards/chosen": -0.007506055291742086, "rewards/margins": 0.26656967401504517, "rewards/rejected": -0.2740757167339325, "step": 2440 }, { "epoch": 2.5694808599895125, "grad_norm": 2.3252293901649193, "learning_rate": 4.040610178208843e-06, "log_odds_chosen": 7.7740631103515625, "log_odds_ratio": -0.0118449367582798, "logits/chosen": -2.795551061630249, "logits/rejected": -2.8945860862731934, "logps/chosen": -0.1522868573665619, "logps/rejected": -5.739714622497559, "loss": 0.1145, "nll_loss": 0.11489256471395493, "rewards/accuracies": 1.0, "rewards/chosen": -0.007614342961460352, "rewards/margins": 0.27937138080596924, "rewards/rejected": -0.28698569536209106, "step": 2450 }, { "epoch": 2.5799685369690613, "grad_norm": 2.0157957603988175, "learning_rate": 4.032389192727559e-06, "log_odds_chosen": 6.265582084655762, "log_odds_ratio": -0.024669019505381584, "logits/chosen": -2.85023832321167, "logits/rejected": -2.8876233100891113, "logps/chosen": -0.150896817445755, "logps/rejected": -4.219937324523926, "loss": 0.1277, "nll_loss": 0.12799417972564697, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007544840686023235, "rewards/margins": 0.20345202088356018, "rewards/rejected": -0.2109968364238739, "step": 2460 }, { "epoch": 2.5904562139486105, "grad_norm": 2.287376161767263, "learning_rate": 4.024218182927669e-06, "log_odds_chosen": 6.810778617858887, "log_odds_ratio": -0.013128559105098248, "logits/chosen": -2.823387622833252, "logits/rejected": -2.879467487335205, "logps/chosen": -0.15397700667381287, "logps/rejected": -4.72897481918335, "loss": 0.1209, "nll_loss": 0.12541964650154114, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.0076988511718809605, "rewards/margins": 0.22874990105628967, "rewards/rejected": -0.236448734998703, "step": 2470 }, { "epoch": 2.6009438909281593, "grad_norm": 2.2717126345189547, "learning_rate": 4.016096644512495e-06, "log_odds_chosen": 6.199719429016113, "log_odds_ratio": -0.018437180668115616, "logits/chosen": -2.8248672485351562, "logits/rejected": -2.8656277656555176, "logps/chosen": -0.14331553876399994, "logps/rejected": -4.071486949920654, "loss": 0.1196, "nll_loss": 0.11505875736474991, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.00716577610000968, "rewards/margins": 0.19640859961509705, "rewards/rejected": -0.20357437431812286, "step": 2480 }, { "epoch": 2.6114315679077085, "grad_norm": 2.1379482021716036, "learning_rate": 4.008024080281012e-06, "log_odds_chosen": 7.395205497741699, "log_odds_ratio": -0.01522077340632677, "logits/chosen": -2.8720109462738037, "logits/rejected": -2.936903476715088, "logps/chosen": -0.13911715149879456, "logps/rejected": -5.221936225891113, "loss": 0.12, "nll_loss": 0.12369368225336075, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.0069558583199977875, "rewards/margins": 0.2541409730911255, "rewards/rejected": -0.2610968351364136, "step": 2490 }, { "epoch": 2.6219192448872572, "grad_norm": 1.7439578923515293, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 8.536567687988281, "log_odds_ratio": -0.02061418630182743, "logits/chosen": -2.854001760482788, "logits/rejected": -2.9489758014678955, "logps/chosen": -0.1588824838399887, "logps/rejected": -6.567204475402832, "loss": 0.1109, "nll_loss": 0.11326327174901962, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007944123819470406, "rewards/margins": 0.32041609287261963, "rewards/rejected": -0.3283601999282837, "step": 2500 }, { "epoch": 2.6219192448872572, "eval_log_odds_chosen": 1.0766297578811646, "eval_log_odds_ratio": -0.9767945408821106, "eval_logits/chosen": -2.8457064628601074, "eval_logits/rejected": -2.857062339782715, "eval_logps/chosen": -2.4182989597320557, "eval_logps/rejected": -3.354691743850708, "eval_loss": 1.2049823999404907, "eval_nll_loss": 1.172393560409546, "eval_rewards/accuracies": 0.6329365372657776, "eval_rewards/chosen": -0.12091495096683502, "eval_rewards/margins": 0.046819645911455154, "eval_rewards/rejected": -0.1677345633506775, "eval_runtime": 137.7801, "eval_samples_per_second": 14.472, "eval_steps_per_second": 0.457, "step": 2500 }, { "epoch": 2.6324069218668065, "grad_norm": 3.8704567483353496, "learning_rate": 3.992023920278996e-06, "log_odds_chosen": 6.979190826416016, "log_odds_ratio": -0.018384801223874092, "logits/chosen": -2.8529200553894043, "logits/rejected": -2.923466920852661, "logps/chosen": -0.14472463726997375, "logps/rejected": -4.871707916259766, "loss": 0.1127, "nll_loss": 0.1109754890203476, "rewards/accuracies": 1.0, "rewards/chosen": -0.007236232049763203, "rewards/margins": 0.23634913563728333, "rewards/rejected": -0.24358537793159485, "step": 2510 }, { "epoch": 2.6428945988463557, "grad_norm": 2.0243407054263933, "learning_rate": 3.984095364447979e-06, "log_odds_chosen": 6.955283164978027, "log_odds_ratio": -0.026280570775270462, "logits/chosen": -2.845829486846924, "logits/rejected": -2.9166336059570312, "logps/chosen": -0.1561572551727295, "logps/rejected": -4.968081474304199, "loss": 0.1245, "nll_loss": 0.11139287799596786, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.0078078629449009895, "rewards/margins": 0.2405962496995926, "rewards/rejected": -0.2484041005373001, "step": 2520 }, { "epoch": 2.6533822758259045, "grad_norm": 2.159445384644007, "learning_rate": 3.97621386243772e-06, "log_odds_chosen": 8.654619216918945, "log_odds_ratio": -0.015728970989584923, "logits/chosen": -2.815493583679199, "logits/rejected": -2.9511656761169434, "logps/chosen": -0.1413796991109848, "logps/rejected": -6.552220821380615, "loss": 0.1201, "nll_loss": 0.11258909851312637, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007068985607475042, "rewards/margins": 0.32054203748703003, "rewards/rejected": -0.3276110291481018, "step": 2530 }, { "epoch": 2.6638699528054537, "grad_norm": 2.5062335927036123, "learning_rate": 3.9683789506627254e-06, "log_odds_chosen": 7.7274370193481445, "log_odds_ratio": -0.020870521664619446, "logits/chosen": -2.8319153785705566, "logits/rejected": -2.922696113586426, "logps/chosen": -0.15536390244960785, "logps/rejected": -5.693093776702881, "loss": 0.1181, "nll_loss": 0.10906670987606049, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007768194191157818, "rewards/margins": 0.2768864631652832, "rewards/rejected": -0.2846546769142151, "step": 2540 }, { "epoch": 2.6743576297850025, "grad_norm": 1.970994291017683, "learning_rate": 3.960590171906698e-06, "log_odds_chosen": 7.434384822845459, "log_odds_ratio": -0.023785177618265152, "logits/chosen": -2.7982025146484375, "logits/rejected": -2.8931427001953125, "logps/chosen": -0.16477976739406586, "logps/rejected": -5.395650386810303, "loss": 0.1221, "nll_loss": 0.13674572110176086, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.008238988928496838, "rewards/margins": 0.2615435719490051, "rewards/rejected": -0.26978254318237305, "step": 2550 }, { "epoch": 2.6848453067645517, "grad_norm": 2.0205686734736594, "learning_rate": 3.952847075210474e-06, "log_odds_chosen": 7.365771293640137, "log_odds_ratio": -0.01570904441177845, "logits/chosen": -2.866798162460327, "logits/rejected": -2.959561347961426, "logps/chosen": -0.14348378777503967, "logps/rejected": -5.177813529968262, "loss": 0.1204, "nll_loss": 0.12037654966115952, "rewards/accuracies": 1.0, "rewards/chosen": -0.007174189202487469, "rewards/margins": 0.2517164647579193, "rewards/rejected": -0.25889068841934204, "step": 2560 }, { "epoch": 2.695332983744101, "grad_norm": 1.8761709200806869, "learning_rate": 3.9451492157623585e-06, "log_odds_chosen": 8.670493125915527, "log_odds_ratio": -0.011763294227421284, "logits/chosen": -2.8013434410095215, "logits/rejected": -2.920924425125122, "logps/chosen": -0.16095298528671265, "logps/rejected": -6.665195465087891, "loss": 0.1166, "nll_loss": 0.13346998393535614, "rewards/accuracies": 1.0, "rewards/chosen": -0.008047649636864662, "rewards/margins": 0.32521215081214905, "rewards/rejected": -0.3332597613334656, "step": 2570 }, { "epoch": 2.7058206607236497, "grad_norm": 2.1285971867573408, "learning_rate": 3.937496154790789e-06, "log_odds_chosen": 7.294459342956543, "log_odds_ratio": -0.018316376954317093, "logits/chosen": -2.816880702972412, "logits/rejected": -2.8812124729156494, "logps/chosen": -0.13620439171791077, "logps/rejected": -5.142992973327637, "loss": 0.1195, "nll_loss": 0.10606805980205536, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.0068102204240858555, "rewards/margins": 0.25033941864967346, "rewards/rejected": -0.2571496367454529, "step": 2580 }, { "epoch": 2.716308337703199, "grad_norm": 2.400899470701997, "learning_rate": 3.9298874594592975e-06, "log_odds_chosen": 8.10938549041748, "log_odds_ratio": -0.016252661123871803, "logits/chosen": -2.807111978530884, "logits/rejected": -2.915724515914917, "logps/chosen": -0.15417781472206116, "logps/rejected": -6.080683708190918, "loss": 0.1163, "nll_loss": 0.11585485935211182, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007708890829235315, "rewards/margins": 0.2963252663612366, "rewards/rejected": -0.30403420329093933, "step": 2590 }, { "epoch": 2.7267960146827477, "grad_norm": 3.318597907364317, "learning_rate": 3.922322702763682e-06, "log_odds_chosen": 8.183881759643555, "log_odds_ratio": -0.021557733416557312, "logits/chosen": -2.8544585704803467, "logits/rejected": -2.9738879203796387, "logps/chosen": -0.14029571413993835, "logps/rejected": -6.104724884033203, "loss": 0.1238, "nll_loss": 0.11269497871398926, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007014785893261433, "rewards/margins": 0.2982214391231537, "rewards/rejected": -0.30523625016212463, "step": 2600 }, { "epoch": 2.7267960146827477, "eval_log_odds_chosen": 1.6673794984817505, "eval_log_odds_ratio": -1.6934312582015991, "eval_logits/chosen": -2.9804697036743164, "eval_logits/rejected": -2.996739387512207, "eval_logps/chosen": -6.072526454925537, "eval_logps/rejected": -7.644432067871094, "eval_loss": 2.6922054290771484, "eval_nll_loss": 2.6498186588287354, "eval_rewards/accuracies": 0.5873016119003296, "eval_rewards/chosen": -0.30362632870674133, "eval_rewards/margins": 0.07859525829553604, "eval_rewards/rejected": -0.38222160935401917, "eval_runtime": 136.8599, "eval_samples_per_second": 14.57, "eval_steps_per_second": 0.46, "step": 2600 }, { "epoch": 2.737283691662297, "grad_norm": 2.23878079697452, "learning_rate": 3.914801463431357e-06, "log_odds_chosen": 7.083222389221191, "log_odds_ratio": -0.02951228991150856, "logits/chosen": -2.8593714237213135, "logits/rejected": -2.9374592304229736, "logps/chosen": -0.14687521755695343, "logps/rejected": -5.056353569030762, "loss": 0.1245, "nll_loss": 0.11392644792795181, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007343760691583157, "rewards/margins": 0.24547390639781952, "rewards/rejected": -0.25281769037246704, "step": 2610 }, { "epoch": 2.7477713686418457, "grad_norm": 3.0293992863459636, "learning_rate": 3.907323325822818e-06, "log_odds_chosen": 5.10004997253418, "log_odds_ratio": -0.032727014273405075, "logits/chosen": -2.780730962753296, "logits/rejected": -2.8234589099884033, "logps/chosen": -0.14557409286499023, "logps/rejected": -3.112699031829834, "loss": 0.1196, "nll_loss": 0.1244465708732605, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007278704084455967, "rewards/margins": 0.14835625886917114, "rewards/rejected": -0.15563495457172394, "step": 2620 }, { "epoch": 2.758259045621395, "grad_norm": 2.2549688272537094, "learning_rate": 3.8998878798351596e-06, "log_odds_chosen": 5.7140727043151855, "log_odds_ratio": -0.026816044002771378, "logits/chosen": -2.864112377166748, "logits/rejected": -2.8956217765808105, "logps/chosen": -0.14010892808437347, "logps/rejected": -3.677777051925659, "loss": 0.1148, "nll_loss": 0.11140565574169159, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007005447056144476, "rewards/margins": 0.17688342928886414, "rewards/rejected": -0.18388888239860535, "step": 2630 }, { "epoch": 2.7687467226009437, "grad_norm": 2.3361581110737384, "learning_rate": 3.892494720807615e-06, "log_odds_chosen": 6.5437517166137695, "log_odds_ratio": -0.02287450060248375, "logits/chosen": -2.835170269012451, "logits/rejected": -2.904600143432617, "logps/chosen": -0.15383225679397583, "logps/rejected": -4.582453727722168, "loss": 0.1163, "nll_loss": 0.1210094466805458, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007691613398492336, "rewards/margins": 0.22143109142780304, "rewards/rejected": -0.22912268340587616, "step": 2640 }, { "epoch": 2.779234399580493, "grad_norm": 2.113727988806721, "learning_rate": 3.885143449429057e-06, "log_odds_chosen": 8.709664344787598, "log_odds_ratio": -0.01187268365174532, "logits/chosen": -2.8075308799743652, "logits/rejected": -2.8737902641296387, "logps/chosen": -0.15384691953659058, "logps/rejected": -6.678023338317871, "loss": 0.1126, "nll_loss": 0.11222463846206665, "rewards/accuracies": 1.0, "rewards/chosen": -0.007692346815019846, "rewards/margins": 0.32620885968208313, "rewards/rejected": -0.33390119671821594, "step": 2650 }, { "epoch": 2.789722076560042, "grad_norm": 2.1767794366513376, "learning_rate": 3.877833671647406e-06, "log_odds_chosen": 7.380768775939941, "log_odds_ratio": -0.028077024966478348, "logits/chosen": -2.793292999267578, "logits/rejected": -2.8911733627319336, "logps/chosen": -0.15328237414360046, "logps/rejected": -5.426938533782959, "loss": 0.1168, "nll_loss": 0.11543625593185425, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.0076641179621219635, "rewards/margins": 0.26368287205696106, "rewards/rejected": -0.27134692668914795, "step": 2660 }, { "epoch": 2.800209753539591, "grad_norm": 2.256877035979117, "learning_rate": 3.870564998580918e-06, "log_odds_chosen": 8.639537811279297, "log_odds_ratio": -0.022679299116134644, "logits/chosen": -2.811685085296631, "logits/rejected": -2.9056103229522705, "logps/chosen": -0.15335455536842346, "logps/rejected": -6.6522955894470215, "loss": 0.1172, "nll_loss": 0.1345623880624771, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007667726371437311, "rewards/margins": 0.3249470591545105, "rewards/rejected": -0.3326147794723511, "step": 2670 }, { "epoch": 2.81069743051914, "grad_norm": 2.0730722454139485, "learning_rate": 3.863337046431279e-06, "log_odds_chosen": 6.9750657081604, "log_odds_ratio": -0.025320613756775856, "logits/chosen": -2.7947394847869873, "logits/rejected": -2.846017360687256, "logps/chosen": -0.13509753346443176, "logps/rejected": -4.8464508056640625, "loss": 0.1193, "nll_loss": 0.10888632386922836, "rewards/accuracies": 1.0, "rewards/chosen": -0.006754877511411905, "rewards/margins": 0.23556765913963318, "rewards/rejected": -0.24232256412506104, "step": 2680 }, { "epoch": 2.821185107498689, "grad_norm": 1.9858072033613254, "learning_rate": 3.8561494363984955e-06, "log_odds_chosen": 9.771112442016602, "log_odds_ratio": -0.013731351122260094, "logits/chosen": -2.8062682151794434, "logits/rejected": -2.9753849506378174, "logps/chosen": -0.14906486868858337, "logps/rejected": -7.731194496154785, "loss": 0.1179, "nll_loss": 0.11920718103647232, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007453243248164654, "rewards/margins": 0.37910646200180054, "rewards/rejected": -0.38655975461006165, "step": 2690 }, { "epoch": 2.831672784478238, "grad_norm": 1.6847580595509726, "learning_rate": 3.849001794597506e-06, "log_odds_chosen": 7.8019118309021, "log_odds_ratio": -0.019792212173342705, "logits/chosen": -2.8470611572265625, "logits/rejected": -2.9447550773620605, "logps/chosen": -0.15314054489135742, "logps/rejected": -5.769678115844727, "loss": 0.1192, "nll_loss": 0.11755287647247314, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007657027803361416, "rewards/margins": 0.2808268666267395, "rewards/rejected": -0.2884839177131653, "step": 2700 }, { "epoch": 2.831672784478238, "eval_log_odds_chosen": 1.020140528678894, "eval_log_odds_ratio": -0.950748860836029, "eval_logits/chosen": -2.866152763366699, "eval_logits/rejected": -2.883617877960205, "eval_logps/chosen": -2.3778645992279053, "eval_logps/rejected": -3.2670860290527344, "eval_loss": 1.2390626668930054, "eval_nll_loss": 1.1910258531570435, "eval_rewards/accuracies": 0.625, "eval_rewards/chosen": -0.11889322102069855, "eval_rewards/margins": 0.04446107894182205, "eval_rewards/rejected": -0.16335429251194, "eval_runtime": 137.1045, "eval_samples_per_second": 14.544, "eval_steps_per_second": 0.46, "step": 2700 }, { "epoch": 2.8421604614577873, "grad_norm": 2.227062658222717, "learning_rate": 3.841893751976493e-06, "log_odds_chosen": 6.429055690765381, "log_odds_ratio": -0.025566572323441505, "logits/chosen": -2.8230857849121094, "logits/rejected": -2.9232447147369385, "logps/chosen": -0.13817086815834045, "logps/rejected": -4.313010215759277, "loss": 0.1236, "nll_loss": 0.1359073519706726, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.006908542010933161, "rewards/margins": 0.20874197781085968, "rewards/rejected": -0.2156505137681961, "step": 2710 }, { "epoch": 2.852648138437336, "grad_norm": 2.108179677461151, "learning_rate": 3.834824944236852e-06, "log_odds_chosen": 7.687928676605225, "log_odds_ratio": -0.019871855154633522, "logits/chosen": -2.9058802127838135, "logits/rejected": -3.016103744506836, "logps/chosen": -0.15432411432266235, "logps/rejected": -5.692026615142822, "loss": 0.1226, "nll_loss": 0.12474212795495987, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.0077162072993814945, "rewards/margins": 0.27688512206077576, "rewards/rejected": -0.2846013009548187, "step": 2720 }, { "epoch": 2.863135815416885, "grad_norm": 2.0852362976431627, "learning_rate": 3.827795011754764e-06, "log_odds_chosen": 7.531012058258057, "log_odds_ratio": -0.020183496177196503, "logits/chosen": -2.9127936363220215, "logits/rejected": -3.042579174041748, "logps/chosen": -0.1713821142911911, "logps/rejected": -5.637821197509766, "loss": 0.1192, "nll_loss": 0.1238013282418251, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.00856910552829504, "rewards/margins": 0.2733219265937805, "rewards/rejected": -0.2818910479545593, "step": 2730 }, { "epoch": 2.873623492396434, "grad_norm": 2.1240217329220727, "learning_rate": 3.8208035995043505e-06, "log_odds_chosen": 7.918447017669678, "log_odds_ratio": -0.016450051218271255, "logits/chosen": -2.9222500324249268, "logits/rejected": -3.0099682807922363, "logps/chosen": -0.16613063216209412, "logps/rejected": -5.923202037811279, "loss": 0.1167, "nll_loss": 0.11456701904535294, "rewards/accuracies": 1.0, "rewards/chosen": -0.00830653216689825, "rewards/margins": 0.28785353899002075, "rewards/rejected": -0.2961600720882416, "step": 2740 }, { "epoch": 2.8841111693759833, "grad_norm": 31.79228564478535, "learning_rate": 3.8138503569823697e-06, "log_odds_chosen": 6.909941673278809, "log_odds_ratio": -0.009971695020794868, "logits/chosen": -2.913257598876953, "logits/rejected": -3.0123419761657715, "logps/chosen": -0.14221827685832977, "logps/rejected": -4.7533063888549805, "loss": 0.1366, "nll_loss": 0.12416551262140274, "rewards/accuracies": 1.0, "rewards/chosen": -0.007110914681106806, "rewards/margins": 0.23055438697338104, "rewards/rejected": -0.2376653254032135, "step": 2750 }, { "epoch": 2.894598846355532, "grad_norm": 1.9557051281290665, "learning_rate": 3.806934938134405e-06, "log_odds_chosen": 6.693169593811035, "log_odds_ratio": -0.02671411633491516, "logits/chosen": -2.8386614322662354, "logits/rejected": -2.913949966430664, "logps/chosen": -0.158113032579422, "logps/rejected": -4.6884589195251465, "loss": 0.1257, "nll_loss": 0.13248762488365173, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.007905651815235615, "rewards/margins": 0.22651728987693787, "rewards/rejected": -0.23442292213439941, "step": 2760 }, { "epoch": 2.9050865233350813, "grad_norm": 2.137070948069414, "learning_rate": 3.800057001282532e-06, "log_odds_chosen": 7.526410102844238, "log_odds_ratio": -0.018288953229784966, "logits/chosen": -2.8420822620391846, "logits/rejected": -2.9359934329986572, "logps/chosen": -0.13937655091285706, "logps/rejected": -5.3555192947387695, "loss": 0.1203, "nll_loss": 0.11602024734020233, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.00696882838383317, "rewards/margins": 0.2608071565628052, "rewards/rejected": -0.2677759826183319, "step": 2770 }, { "epoch": 2.91557420031463, "grad_norm": 1.9039164114563458, "learning_rate": 3.7932162090544085e-06, "log_odds_chosen": 8.005070686340332, "log_odds_ratio": -0.013831285759806633, "logits/chosen": -2.85080885887146, "logits/rejected": -2.9412410259246826, "logps/chosen": -0.14242660999298096, "logps/rejected": -5.835131645202637, "loss": 0.115, "nll_loss": 0.11129038035869598, "rewards/accuracies": 1.0, "rewards/chosen": -0.007121330592781305, "rewards/margins": 0.2846352159976959, "rewards/rejected": -0.2917565703392029, "step": 2780 }, { "epoch": 2.9260618772941793, "grad_norm": 1.9066238493747631, "learning_rate": 3.7864122283137657e-06, "log_odds_chosen": 8.59681510925293, "log_odds_ratio": -0.01634146459400654, "logits/chosen": -2.811566114425659, "logits/rejected": -2.953697681427002, "logps/chosen": -0.1852981150150299, "logps/rejected": -6.696959495544434, "loss": 0.1237, "nll_loss": 0.13221383094787598, "rewards/accuracies": 1.0, "rewards/chosen": -0.009264904074370861, "rewards/margins": 0.3255830705165863, "rewards/rejected": -0.33484798669815063, "step": 2790 }, { "epoch": 2.9365495542737285, "grad_norm": 2.1229204349942523, "learning_rate": 3.7796447300922724e-06, "log_odds_chosen": 8.886019706726074, "log_odds_ratio": -0.014133910648524761, "logits/chosen": -2.8244338035583496, "logits/rejected": -2.9361133575439453, "logps/chosen": -0.1553722470998764, "logps/rejected": -6.724435329437256, "loss": 0.1191, "nll_loss": 0.11856858432292938, "rewards/accuracies": 1.0, "rewards/chosen": -0.007768611423671246, "rewards/margins": 0.3284532129764557, "rewards/rejected": -0.3362218141555786, "step": 2800 }, { "epoch": 2.9365495542737285, "eval_log_odds_chosen": 0.9868643283843994, "eval_log_odds_ratio": -0.8558183312416077, "eval_logits/chosen": -2.8059191703796387, "eval_logits/rejected": -2.8221092224121094, "eval_logps/chosen": -1.9523440599441528, "eval_logps/rejected": -2.7882232666015625, "eval_loss": 1.0213509798049927, "eval_nll_loss": 0.9673047065734863, "eval_rewards/accuracies": 0.6269841194152832, "eval_rewards/chosen": -0.09761719405651093, "eval_rewards/margins": 0.04179396852850914, "eval_rewards/rejected": -0.13941116631031036, "eval_runtime": 140.3646, "eval_samples_per_second": 14.206, "eval_steps_per_second": 0.449, "step": 2800 }, { "epoch": 2.9470372312532773, "grad_norm": 1.8098718147037927, "learning_rate": 3.772913389522725e-06, "log_odds_chosen": 7.045705318450928, "log_odds_ratio": -0.0264790840446949, "logits/chosen": -2.8278496265411377, "logits/rejected": -2.935941696166992, "logps/chosen": -0.16044145822525024, "logps/rejected": -5.10351037979126, "loss": 0.1197, "nll_loss": 0.11624834686517715, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.008022072724997997, "rewards/margins": 0.24715343117713928, "rewards/rejected": -0.25517550110816956, "step": 2810 }, { "epoch": 2.9575249082328265, "grad_norm": 1.8754542855362524, "learning_rate": 3.7662178857735478e-06, "log_odds_chosen": 8.025814056396484, "log_odds_ratio": -0.014746090397238731, "logits/chosen": -2.7981061935424805, "logits/rejected": -2.9223358631134033, "logps/chosen": -0.1609780192375183, "logps/rejected": -6.0790114402771, "loss": 0.1164, "nll_loss": 0.114871546626091, "rewards/accuracies": 1.0, "rewards/chosen": -0.00804890040308237, "rewards/margins": 0.29590168595314026, "rewards/rejected": -0.30395060777664185, "step": 2820 }, { "epoch": 2.9680125852123753, "grad_norm": 2.270114335100112, "learning_rate": 3.7595579019845623e-06, "log_odds_chosen": 7.872386932373047, "log_odds_ratio": -0.01882219687104225, "logits/chosen": -2.8168020248413086, "logits/rejected": -2.900966167449951, "logps/chosen": -0.1528329849243164, "logps/rejected": -5.721396446228027, "loss": 0.117, "nll_loss": 0.1145024448633194, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.007641649339348078, "rewards/margins": 0.27842822670936584, "rewards/rejected": -0.2860698103904724, "step": 2830 }, { "epoch": 2.9785002621919245, "grad_norm": 2.2955550853318907, "learning_rate": 3.752933125204008e-06, "log_odds_chosen": 8.305427551269531, "log_odds_ratio": -0.02256721630692482, "logits/chosen": -2.8052284717559814, "logits/rejected": -2.9265544414520264, "logps/chosen": -0.13989822566509247, "logps/rejected": -6.217524528503418, "loss": 0.1182, "nll_loss": 0.12114028632640839, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.006994911935180426, "rewards/margins": 0.30388128757476807, "rewards/rejected": -0.31087619066238403, "step": 2840 }, { "epoch": 2.9889879391714738, "grad_norm": 1.888221991554896, "learning_rate": 3.7463432463267764e-06, "log_odds_chosen": 7.020120143890381, "log_odds_ratio": -0.01538365613669157, "logits/chosen": -2.8246865272521973, "logits/rejected": -2.9202027320861816, "logps/chosen": -0.16290083527565002, "logps/rejected": -4.992356777191162, "loss": 0.1252, "nll_loss": 0.14337727427482605, "rewards/accuracies": 1.0, "rewards/chosen": -0.008145040832459927, "rewards/margins": 0.24147279560565948, "rewards/rejected": -0.24961784482002258, "step": 2850 }, { "epoch": 2.9984268484530676, "step": 2859, "total_flos": 0.0, "train_loss": 0.32389816019492534, "train_runtime": 62235.4926, "train_samples_per_second": 2.941, "train_steps_per_second": 0.046 } ], "logging_steps": 10, "max_steps": 2859, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }