diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,1908 +1,5689 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.9994756161510225, + "epoch": 2.9984268484530676, "eval_steps": 100, - "global_step": 953, + "global_step": 2859, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01048767697954903, - "grad_norm": 11.269791488706222, + "grad_norm": 11.303338968797107, "learning_rate": 2.0000000000000003e-06, - "log_odds_chosen": 0.1659858673810959, - "log_odds_ratio": -0.6960253715515137, - "logits/chosen": -2.5437328815460205, - "logits/rejected": -2.532463550567627, - "logps/chosen": -0.9995189905166626, - "logps/rejected": -1.0994223356246948, - "loss": 2.7426, - "nll_loss": 2.6549222469329834, + "log_odds_chosen": 0.16597549617290497, + "log_odds_ratio": -0.6960083246231079, + "logits/chosen": -2.5440375804901123, + "logits/rejected": -2.532742977142334, + "logps/chosen": -0.9999498128890991, + "logps/rejected": -1.0999202728271484, + "loss": 2.7435, + "nll_loss": 2.655998706817627, "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.04997594282031059, - "rewards/margins": 0.004995172377675772, - "rewards/rejected": -0.0549711212515831, + "rewards/chosen": -0.049997489899396896, + "rewards/margins": 0.004998520482331514, + "rewards/rejected": -0.054996006190776825, "step": 10 }, { "epoch": 0.02097535395909806, - "grad_norm": 3.2083352232231426, + "grad_norm": 3.296785739531489, "learning_rate": 4.000000000000001e-06, - "log_odds_chosen": 0.19043061137199402, - "log_odds_ratio": -0.6681476831436157, - "logits/chosen": -3.149108409881592, - "logits/rejected": -3.1720833778381348, - "logps/chosen": -0.7663742303848267, - "logps/rejected": -0.8751267194747925, + "log_odds_chosen": 0.19497092068195343, + "log_odds_ratio": -0.6663684844970703, + "logits/chosen": -3.153244733810425, + "logits/rejected": -3.176297903060913, + "logps/chosen": -0.7618023753166199, + "logps/rejected": -0.8721799850463867, "loss": 0.5628, - "nll_loss": 0.5223474502563477, + "nll_loss": 0.5223663449287415, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.03831871226429939, - "rewards/margins": 0.005437628366053104, - "rewards/rejected": -0.04375633969902992, + "rewards/chosen": -0.03809012100100517, + "rewards/margins": 0.005518879741430283, + "rewards/rejected": -0.043609000742435455, "step": 20 }, { "epoch": 0.03146303093864709, - "grad_norm": 2.5438959591852903, + "grad_norm": 2.5096714885559264, "learning_rate": 6e-06, - "log_odds_chosen": 0.24195578694343567, - "log_odds_ratio": -0.6542765498161316, - "logits/chosen": -2.974864959716797, - "logits/rejected": -2.9495468139648438, - "logps/chosen": -0.8126222491264343, - "logps/rejected": -0.9452728033065796, - "loss": 0.5332, - "nll_loss": 0.49184679985046387, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.04063111171126366, - "rewards/margins": 0.006632520817220211, - "rewards/rejected": -0.04726364091038704, + "log_odds_chosen": 0.23512229323387146, + "log_odds_ratio": -0.6553729772567749, + "logits/chosen": -2.9705119132995605, + "logits/rejected": -2.944556713104248, + "logps/chosen": -0.8099643588066101, + "logps/rejected": -0.9404464960098267, + "loss": 0.5331, + "nll_loss": 0.4915856420993805, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.040498219430446625, + "rewards/margins": 0.0065241060219705105, + "rewards/rejected": -0.04702232405543327, "step": 30 }, { "epoch": 0.04195070791819612, - "grad_norm": 2.6387687995337887, + "grad_norm": 2.5670929503530138, "learning_rate": 8.000000000000001e-06, - "log_odds_chosen": 0.16362647712230682, - "log_odds_ratio": -0.6933655738830566, - "logits/chosen": -2.880462408065796, - "logits/rejected": -2.8687615394592285, - "logps/chosen": -0.804220974445343, - "logps/rejected": -0.9210459589958191, - "loss": 0.5196, - "nll_loss": 0.4802279472351074, - "rewards/accuracies": 0.5562499761581421, - "rewards/chosen": -0.04021105170249939, - "rewards/margins": 0.005841248203068972, - "rewards/rejected": -0.046052299439907074, + "log_odds_chosen": 0.1703537404537201, + "log_odds_ratio": -0.6904168128967285, + "logits/chosen": -2.8517043590545654, + "logits/rejected": -2.83884334564209, + "logps/chosen": -0.805575966835022, + "logps/rejected": -0.9237464666366577, + "loss": 0.5194, + "nll_loss": 0.4799742102622986, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.04027879983186722, + "rewards/margins": 0.005908523220568895, + "rewards/rejected": -0.046187322586774826, "step": 40 }, { "epoch": 0.05243838489774515, - "grad_norm": 2.753689850023678, + "grad_norm": 2.8257696541784587, "learning_rate": 1e-05, - "log_odds_chosen": 0.285639226436615, - "log_odds_ratio": -0.6802313327789307, - "logits/chosen": -2.7953293323516846, - "logits/rejected": -2.801888942718506, - "logps/chosen": -0.786683201789856, - "logps/rejected": -0.9665401577949524, + "log_odds_chosen": 0.28843408823013306, + "log_odds_ratio": -0.6763556599617004, + "logits/chosen": -2.7286221981048584, + "logits/rejected": -2.72869610786438, + "logps/chosen": -0.787534236907959, + "logps/rejected": -0.968492865562439, "loss": 0.5419, - "nll_loss": 0.4841863214969635, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.03933415934443474, - "rewards/margins": 0.008992847986519337, - "rewards/rejected": -0.0483270101249218, + "nll_loss": 0.48419374227523804, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.03937670961022377, + "rewards/margins": 0.009047931991517544, + "rewards/rejected": -0.04842463880777359, "step": 50 }, { "epoch": 0.06292606187729417, - "grad_norm": 2.9944776685892003, + "grad_norm": 2.7270372711002624, "learning_rate": 1.2e-05, - "log_odds_chosen": 0.18177883327007294, - "log_odds_ratio": -0.6903725862503052, - "logits/chosen": -2.9931223392486572, - "logits/rejected": -2.9918220043182373, - "logps/chosen": -0.8297529220581055, - "logps/rejected": -0.9411457180976868, - "loss": 0.552, - "nll_loss": 0.5221412777900696, + "log_odds_chosen": 0.2020198553800583, + "log_odds_ratio": -0.6800572872161865, + "logits/chosen": -2.896289110183716, + "logits/rejected": -2.8839545249938965, + "logps/chosen": -0.8010624051094055, + "logps/rejected": -0.9179455637931824, + "loss": 0.5456, + "nll_loss": 0.5158990621566772, "rewards/accuracies": 0.5687500238418579, - "rewards/chosen": -0.041487645357847214, - "rewards/margins": 0.005569641478359699, - "rewards/rejected": -0.04705728590488434, + "rewards/chosen": -0.040053121745586395, + "rewards/margins": 0.005844158586114645, + "rewards/rejected": -0.04589728266000748, "step": 60 }, { "epoch": 0.07341373885684321, - "grad_norm": 2.7695397689637704, + "grad_norm": 2.7197204143491605, "learning_rate": 1.4e-05, - "log_odds_chosen": 0.18929322063922882, - "log_odds_ratio": -0.6986348032951355, - "logits/chosen": -2.928518056869507, - "logits/rejected": -2.952428102493286, - "logps/chosen": -0.8219515085220337, - "logps/rejected": -0.9297820925712585, - "loss": 0.5396, - "nll_loss": 0.5304870009422302, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.041097573935985565, - "rewards/margins": 0.00539153628051281, - "rewards/rejected": -0.046489108353853226, + "log_odds_chosen": 0.1937415450811386, + "log_odds_ratio": -0.6942794919013977, + "logits/chosen": -2.8848633766174316, + "logits/rejected": -2.905164957046509, + "logps/chosen": -0.8219146728515625, + "logps/rejected": -0.9291160702705383, + "loss": 0.5412, + "nll_loss": 0.5311218500137329, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.041095733642578125, + "rewards/margins": 0.005360070150345564, + "rewards/rejected": -0.046455807983875275, "step": 70 }, { "epoch": 0.08390141583639224, - "grad_norm": 19.07043575642583, + "grad_norm": 1049.2102246099553, "learning_rate": 1.6000000000000003e-05, - "log_odds_chosen": 0.18035998940467834, - "log_odds_ratio": -0.6837159395217896, - "logits/chosen": -2.7761759757995605, - "logits/rejected": -2.7504143714904785, - "logps/chosen": -0.8980675935745239, - "logps/rejected": -1.0327494144439697, - "loss": 0.5637, - "nll_loss": 0.48639434576034546, - "rewards/accuracies": 0.5249999761581421, - "rewards/chosen": -0.04490337893366814, - "rewards/margins": 0.006734092719852924, - "rewards/rejected": -0.05163746327161789, + "log_odds_chosen": 0.1753607988357544, + "log_odds_ratio": -0.6886225938796997, + "logits/chosen": -2.6637063026428223, + "logits/rejected": -2.637396812438965, + "logps/chosen": -0.8933579325675964, + "logps/rejected": -1.020629644393921, + "loss": 1.0694, + "nll_loss": 0.9787748456001282, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.04466789960861206, + "rewards/margins": 0.006363583263009787, + "rewards/rejected": -0.051031481474637985, "step": 80 }, { "epoch": 0.09438909281594127, - "grad_norm": 3.590055499786838, + "grad_norm": 4.011701524085754, "learning_rate": 1.8e-05, - "log_odds_chosen": 0.2686706781387329, - "log_odds_ratio": -0.6697625517845154, - "logits/chosen": -2.6665635108947754, - "logits/rejected": -2.664783239364624, - "logps/chosen": -0.8778934478759766, - "logps/rejected": -1.0414215326309204, - "loss": 0.5547, - "nll_loss": 0.49069148302078247, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.04389467462897301, - "rewards/margins": 0.008176402188837528, - "rewards/rejected": -0.05207107588648796, + "log_odds_chosen": 0.2628815174102783, + "log_odds_ratio": -0.6731477975845337, + "logits/chosen": -3.106489658355713, + "logits/rejected": -3.0954391956329346, + "logps/chosen": -0.9435924291610718, + "logps/rejected": -1.1041589975357056, + "loss": 0.5766, + "nll_loss": 0.5112682580947876, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.04717962443828583, + "rewards/margins": 0.008028322830796242, + "rewards/rejected": -0.05520794540643692, "step": 90 }, { "epoch": 0.1048767697954903, - "grad_norm": 3.4892365652397572, + "grad_norm": 5.340561330006851, "learning_rate": 2e-05, - "log_odds_chosen": 0.20862731337547302, - "log_odds_ratio": -0.6619225144386292, - "logits/chosen": -2.6862692832946777, - "logits/rejected": -2.673692226409912, - "logps/chosen": -0.9019685983657837, - "logps/rejected": -1.0285098552703857, - "loss": 0.5707, - "nll_loss": 0.5284041166305542, + "log_odds_chosen": 0.17503713071346283, + "log_odds_ratio": -0.6751121282577515, + "logits/chosen": -3.3266518115997314, + "logits/rejected": -3.3420982360839844, + "logps/chosen": -0.8886896371841431, + "logps/rejected": -1.0002682209014893, + "loss": 0.5668, + "nll_loss": 0.5238600969314575, "rewards/accuracies": 0.5625, - "rewards/chosen": -0.045098431408405304, - "rewards/margins": 0.006327061913907528, - "rewards/rejected": -0.051425494253635406, + "rewards/chosen": -0.044434480369091034, + "rewards/margins": 0.005578924436122179, + "rewards/rejected": -0.050013404339551926, "step": 100 }, { "epoch": 0.1048767697954903, - "eval_log_odds_chosen": 0.2601078152656555, - "eval_log_odds_ratio": -0.6412674188613892, - "eval_logits/chosen": -2.5810625553131104, - "eval_logits/rejected": -2.5432214736938477, - "eval_logps/chosen": -0.9045050144195557, - "eval_logps/rejected": -1.077429175376892, - "eval_loss": 1.1267567873001099, - "eval_nll_loss": 1.0893229246139526, - "eval_rewards/accuracies": 0.636904776096344, - "eval_rewards/chosen": -0.045225247740745544, - "eval_rewards/margins": 0.008646207861602306, - "eval_rewards/rejected": -0.053871456533670425, - "eval_runtime": 137.3095, - "eval_samples_per_second": 14.522, - "eval_steps_per_second": 0.459, + "eval_log_odds_chosen": 0.21844430267810822, + "eval_log_odds_ratio": -0.6529861688613892, + "eval_logits/chosen": -3.3082144260406494, + "eval_logits/rejected": -3.3147807121276855, + "eval_logps/chosen": -0.9112777709960938, + "eval_logps/rejected": -1.0580321550369263, + "eval_loss": 0.5842872858047485, + "eval_nll_loss": 0.5515953898429871, + "eval_rewards/accuracies": 0.6150793433189392, + "eval_rewards/chosen": -0.04556388780474663, + "eval_rewards/margins": 0.007337724789977074, + "eval_rewards/rejected": -0.05290161445736885, + "eval_runtime": 138.2645, + "eval_samples_per_second": 14.422, + "eval_steps_per_second": 0.456, "step": 100 }, { "epoch": 0.11536444677503933, - "grad_norm": 3.2610517529807947, + "grad_norm": 2.8100337089038514, "learning_rate": 1.9069251784911845e-05, - "log_odds_chosen": 0.2603258490562439, - "log_odds_ratio": -0.6417919397354126, - "logits/chosen": -2.632387399673462, - "logits/rejected": -2.6478092670440674, - "logps/chosen": -0.8465877771377563, - "logps/rejected": -1.0118043422698975, - "loss": 0.6247, - "nll_loss": 0.5625969171524048, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.04232938587665558, - "rewards/margins": 0.008260839618742466, - "rewards/rejected": -0.05059022456407547, + "log_odds_chosen": 0.2544933259487152, + "log_odds_ratio": -0.643945038318634, + "logits/chosen": -3.2667174339294434, + "logits/rejected": -3.310918092727661, + "logps/chosen": -0.8447545170783997, + "logps/rejected": -1.009132981300354, + "loss": 0.5651, + "nll_loss": 0.5105677843093872, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.042237721383571625, + "rewards/margins": 0.008218927308917046, + "rewards/rejected": -0.05045665428042412, "step": 110 }, { "epoch": 0.12585212375458835, - "grad_norm": 3.1929412426319397, + "grad_norm": 2.2193460343172986, "learning_rate": 1.825741858350554e-05, - "log_odds_chosen": 0.242882639169693, - "log_odds_ratio": -0.6634533405303955, - "logits/chosen": -2.5689873695373535, - "logits/rejected": -2.536681652069092, - "logps/chosen": -0.8897055387496948, - "logps/rejected": -1.0510555505752563, - "loss": 0.6122, - "nll_loss": 0.5722111463546753, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.04448527842760086, - "rewards/margins": 0.008067498914897442, - "rewards/rejected": -0.05255277082324028, + "log_odds_chosen": 0.24397364258766174, + "log_odds_ratio": -0.6682508587837219, + "logits/chosen": -3.193361282348633, + "logits/rejected": -3.243128538131714, + "logps/chosen": -0.8714381456375122, + "logps/rejected": -1.0333614349365234, + "loss": 0.6091, + "nll_loss": 0.5700744390487671, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.04357190802693367, + "rewards/margins": 0.008096165955066681, + "rewards/rejected": -0.051668066531419754, "step": 120 }, { "epoch": 0.1363398007341374, - "grad_norm": 2.381017549769141, + "grad_norm": 2.3414921674264555, "learning_rate": 1.7541160386140587e-05, - "log_odds_chosen": 0.20046833157539368, - "log_odds_ratio": -0.6848769783973694, - "logits/chosen": -2.5286340713500977, - "logits/rejected": -2.503958225250244, - "logps/chosen": -0.914216160774231, - "logps/rejected": -1.0454927682876587, - "loss": 0.5902, - "nll_loss": 0.5541085004806519, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.04571080952882767, - "rewards/margins": 0.006563832517713308, - "rewards/rejected": -0.052274636924266815, + "log_odds_chosen": 0.2272050678730011, + "log_odds_ratio": -0.6708214282989502, + "logits/chosen": -3.1920104026794434, + "logits/rejected": -3.211714267730713, + "logps/chosen": -0.8986352682113647, + "logps/rejected": -1.0474598407745361, + "loss": 0.5886, + "nll_loss": 0.552306056022644, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.044931765645742416, + "rewards/margins": 0.007441227789968252, + "rewards/rejected": -0.052372999489307404, "step": 130 }, { "epoch": 0.14682747771368643, - "grad_norm": 2.2223756230190213, + "grad_norm": 2.3255085925590597, "learning_rate": 1.6903085094570334e-05, - "log_odds_chosen": 0.231459379196167, - "log_odds_ratio": -0.659934937953949, - "logits/chosen": -2.5273799896240234, - "logits/rejected": -2.5001978874206543, - "logps/chosen": -0.971345067024231, - "logps/rejected": -1.1217668056488037, - "loss": 0.5945, - "nll_loss": 0.564177393913269, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.04856724292039871, - "rewards/margins": 0.007521096616983414, - "rewards/rejected": -0.05608834698796272, + "log_odds_chosen": 0.22232067584991455, + "log_odds_ratio": -0.6680520176887512, + "logits/chosen": -3.1715519428253174, + "logits/rejected": -3.198253631591797, + "logps/chosen": -0.9551104307174683, + "logps/rejected": -1.1022988557815552, + "loss": 0.5878, + "nll_loss": 0.5523446798324585, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.04775552451610565, + "rewards/margins": 0.007359415292739868, + "rewards/rejected": -0.05511493608355522, "step": 140 }, { "epoch": 0.15731515469323545, - "grad_norm": 3.55513500930042, + "grad_norm": 2.6729814886854766, "learning_rate": 1.6329931618554523e-05, - "log_odds_chosen": 0.18197762966156006, - "log_odds_ratio": -0.735857367515564, - "logits/chosen": -2.5072078704833984, - "logits/rejected": -2.4954299926757812, - "logps/chosen": -0.9893903732299805, - "logps/rejected": -1.1020596027374268, - "loss": 0.553, - "nll_loss": 0.5451637506484985, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.04946952313184738, - "rewards/margins": 0.00563345942646265, - "rewards/rejected": -0.055102985352277756, + "log_odds_chosen": 0.17247287929058075, + "log_odds_ratio": -0.7340894341468811, + "logits/chosen": -3.102067470550537, + "logits/rejected": -3.1263070106506348, + "logps/chosen": -0.9946192502975464, + "logps/rejected": -1.1088117361068726, + "loss": 0.5489, + "nll_loss": 0.5492355823516846, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.04973096773028374, + "rewards/margins": 0.005709617864340544, + "rewards/rejected": -0.05544058233499527, "step": 150 }, { "epoch": 0.16780283167278448, - "grad_norm": 2.753579339789172, + "grad_norm": 2.603021066142599, "learning_rate": 1.5811388300841898e-05, - "log_odds_chosen": 0.2206648290157318, - "log_odds_ratio": -0.6601604223251343, - "logits/chosen": -2.54675030708313, - "logits/rejected": -2.53303861618042, - "logps/chosen": -0.9035905599594116, - "logps/rejected": -1.0334583520889282, - "loss": 0.6058, - "nll_loss": 0.5536268949508667, + "log_odds_chosen": 0.2041763812303543, + "log_odds_ratio": -0.6666288375854492, + "logits/chosen": -3.0764000415802, + "logits/rejected": -3.1064279079437256, + "logps/chosen": -0.9137493968009949, + "logps/rejected": -1.0383034944534302, + "loss": 0.6063, + "nll_loss": 0.5569471120834351, "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.04517952725291252, - "rewards/margins": 0.006493390537798405, - "rewards/rejected": -0.05167291685938835, + "rewards/chosen": -0.045687466859817505, + "rewards/margins": 0.006227707955986261, + "rewards/rejected": -0.05191517621278763, "step": 160 }, { "epoch": 0.1782905086523335, - "grad_norm": 2.4463207326823673, + "grad_norm": 2.4919552056925416, "learning_rate": 1.533929977694741e-05, - "log_odds_chosen": 0.3002270460128784, - "log_odds_ratio": -0.6512068510055542, - "logits/chosen": -2.55534029006958, - "logits/rejected": -2.53877592086792, - "logps/chosen": -0.8796469569206238, - "logps/rejected": -1.063819169998169, - "loss": 0.5849, - "nll_loss": 0.5501061677932739, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.04398234561085701, - "rewards/margins": 0.009208607487380505, - "rewards/rejected": -0.05319095402956009, + "log_odds_chosen": 0.25588172674179077, + "log_odds_ratio": -0.6607967019081116, + "logits/chosen": -3.1293396949768066, + "logits/rejected": -3.1606574058532715, + "logps/chosen": -0.8986794352531433, + "logps/rejected": -1.0667051076889038, + "loss": 0.5845, + "nll_loss": 0.5496193766593933, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.044933974742889404, + "rewards/margins": 0.008401280269026756, + "rewards/rejected": -0.05333525687456131, "step": 170 }, { "epoch": 0.18877818563188253, - "grad_norm": 2.404564536005987, + "grad_norm": 2.4600198980545915, "learning_rate": 1.49071198499986e-05, - "log_odds_chosen": 0.2884437143802643, - "log_odds_ratio": -0.6566611528396606, - "logits/chosen": -2.5615644454956055, - "logits/rejected": -2.5457139015197754, - "logps/chosen": -0.9158379435539246, - "logps/rejected": -1.0882136821746826, - "loss": 0.5658, - "nll_loss": 0.5478283166885376, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04579189792275429, - "rewards/margins": 0.008618785068392754, - "rewards/rejected": -0.05441068485379219, + "log_odds_chosen": 0.27393144369125366, + "log_odds_ratio": -0.6479635238647461, + "logits/chosen": -3.080091714859009, + "logits/rejected": -3.103672504425049, + "logps/chosen": -0.9190357327461243, + "logps/rejected": -1.0871737003326416, + "loss": 0.5676, + "nll_loss": 0.550677478313446, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.045951783657073975, + "rewards/margins": 0.00840689055621624, + "rewards/rejected": -0.05435867980122566, "step": 180 }, { "epoch": 0.19926586261143156, - "grad_norm": 3.2974100665964885, + "grad_norm": 5.689090620434962, "learning_rate": 1.4509525002200235e-05, - "log_odds_chosen": 0.23702804744243622, - "log_odds_ratio": -0.6489595770835876, - "logits/chosen": -2.644819498062134, - "logits/rejected": -2.6255900859832764, - "logps/chosen": -0.9308468103408813, - "logps/rejected": -1.0799505710601807, - "loss": 0.5902, - "nll_loss": 0.6114972829818726, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.04654233902692795, - "rewards/margins": 0.007455187849700451, - "rewards/rejected": -0.053997524082660675, + "log_odds_chosen": 0.23676976561546326, + "log_odds_ratio": -0.6501709222793579, + "logits/chosen": -3.0815584659576416, + "logits/rejected": -3.1054322719573975, + "logps/chosen": -0.9278916120529175, + "logps/rejected": -1.0751855373382568, + "loss": 0.5906, + "nll_loss": 0.6120038628578186, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.046394579112529755, + "rewards/margins": 0.007364692632108927, + "rewards/rejected": -0.05375927686691284, "step": 190 }, { "epoch": 0.2097535395909806, - "grad_norm": 2.498750011275506, + "grad_norm": 2.2848535898780375, "learning_rate": 1.4142135623730951e-05, - "log_odds_chosen": 0.29194706678390503, - "log_odds_ratio": -0.6627270579338074, - "logits/chosen": -2.5841925144195557, - "logits/rejected": -2.5723748207092285, - "logps/chosen": -0.917371928691864, - "logps/rejected": -1.126123070716858, - "loss": 0.5663, - "nll_loss": 0.5702028274536133, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.04586859419941902, - "rewards/margins": 0.0104375584051013, - "rewards/rejected": -0.0563061460852623, + "log_odds_chosen": 0.2697228789329529, + "log_odds_ratio": -0.6704415082931519, + "logits/chosen": -2.99995756149292, + "logits/rejected": -3.038682460784912, + "logps/chosen": -0.9138332605361938, + "logps/rejected": -1.1080011129379272, + "loss": 0.5676, + "nll_loss": 0.5736643075942993, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.04569166153669357, + "rewards/margins": 0.009708395227789879, + "rewards/rejected": -0.0554000549018383, "step": 200 }, { "epoch": 0.2097535395909806, - "eval_log_odds_chosen": 0.28631675243377686, - "eval_log_odds_ratio": -0.644675076007843, - "eval_logits/chosen": -2.5596959590911865, - "eval_logits/rejected": -2.537684917449951, - "eval_logps/chosen": -0.8798824548721313, - "eval_logps/rejected": -1.0675764083862305, - "eval_loss": 0.5741076469421387, - "eval_nll_loss": 0.5351698398590088, - "eval_rewards/accuracies": 0.6269841194152832, - "eval_rewards/chosen": -0.043994128704071045, - "eval_rewards/margins": 0.009384696371853352, - "eval_rewards/rejected": -0.05337882414460182, - "eval_runtime": 137.7655, - "eval_samples_per_second": 14.474, + "eval_log_odds_chosen": 0.2850116789340973, + "eval_log_odds_ratio": -0.6474155783653259, + "eval_logits/chosen": -2.9992330074310303, + "eval_logits/rejected": -3.0026443004608154, + "eval_logps/chosen": -0.8811094164848328, + "eval_logps/rejected": -1.0644237995147705, + "eval_loss": 0.5726434588432312, + "eval_nll_loss": 0.5359312295913696, + "eval_rewards/accuracies": 0.625, + "eval_rewards/chosen": -0.04405546560883522, + "eval_rewards/margins": 0.00916572567075491, + "eval_rewards/rejected": -0.053221192210912704, + "eval_runtime": 137.9025, + "eval_samples_per_second": 14.459, "eval_steps_per_second": 0.457, "step": 200 }, { "epoch": 0.22024121657052964, - "grad_norm": 2.310322648029005, + "grad_norm": 2.2864637176453266, "learning_rate": 1.3801311186847084e-05, - "log_odds_chosen": 0.1077527180314064, - "log_odds_ratio": -0.7207110524177551, - "logits/chosen": -2.5468177795410156, - "logits/rejected": -2.544996976852417, - "logps/chosen": -0.8708482980728149, - "logps/rejected": -0.9297773241996765, - "loss": 0.5676, - "nll_loss": 0.5340272188186646, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.04354241490364075, - "rewards/margins": 0.002946457825601101, - "rewards/rejected": -0.04648887366056442, + "log_odds_chosen": 0.10374544560909271, + "log_odds_ratio": -0.7170687913894653, + "logits/chosen": -3.0079314708709717, + "logits/rejected": -3.026061773300171, + "logps/chosen": -0.8713214993476868, + "logps/rejected": -0.9376395344734192, + "loss": 0.5683, + "nll_loss": 0.5364366769790649, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.04356607422232628, + "rewards/margins": 0.003315900219604373, + "rewards/rejected": -0.04688197374343872, "step": 210 }, { "epoch": 0.23072889355007867, - "grad_norm": 2.578087834768522, + "grad_norm": 2.3833164568305705, "learning_rate": 1.3483997249264842e-05, - "log_odds_chosen": 0.1988961100578308, - "log_odds_ratio": -0.6947790384292603, - "logits/chosen": -2.582960605621338, - "logits/rejected": -2.5871338844299316, - "logps/chosen": -0.8790571093559265, - "logps/rejected": -1.0056135654449463, - "loss": 0.5604, - "nll_loss": 0.5243524312973022, - "rewards/accuracies": 0.5687500238418579, - "rewards/chosen": -0.043952859938144684, - "rewards/margins": 0.0063278162851929665, - "rewards/rejected": -0.05028067156672478, + "log_odds_chosen": 0.1967695653438568, + "log_odds_ratio": -0.6872244477272034, + "logits/chosen": -3.066392183303833, + "logits/rejected": -3.0755832195281982, + "logps/chosen": -0.8734294176101685, + "logps/rejected": -0.9998324513435364, + "loss": 0.5608, + "nll_loss": 0.5176301598548889, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.043671466410160065, + "rewards/margins": 0.006320156157016754, + "rewards/rejected": -0.04999162256717682, "step": 220 }, { "epoch": 0.2412165705296277, - "grad_norm": 2.166025203586939, + "grad_norm": 2.143148051812647, "learning_rate": 1.3187609467915744e-05, - "log_odds_chosen": 0.28293663263320923, - "log_odds_ratio": -0.6729618906974792, - "logits/chosen": -2.409632682800293, - "logits/rejected": -2.407254695892334, - "logps/chosen": -0.923631489276886, - "logps/rejected": -1.1052097082138062, - "loss": 0.5705, - "nll_loss": 0.5283125638961792, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.0461815744638443, - "rewards/margins": 0.009078909642994404, - "rewards/rejected": -0.05526048690080643, + "log_odds_chosen": 0.2681586444377899, + "log_odds_ratio": -0.669995129108429, + "logits/chosen": -3.0045371055603027, + "logits/rejected": -3.023197889328003, + "logps/chosen": -0.9347988963127136, + "logps/rejected": -1.1079022884368896, + "loss": 0.5715, + "nll_loss": 0.5268279910087585, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.04673994705080986, + "rewards/margins": 0.00865517370402813, + "rewards/rejected": -0.05539512634277344, "step": 230 }, { "epoch": 0.2517042475091767, - "grad_norm": 3.8331735868635723, + "grad_norm": 2.4867634050680865, "learning_rate": 1.2909944487358057e-05, - "log_odds_chosen": 0.23131528496742249, - "log_odds_ratio": -0.6579959988594055, - "logits/chosen": -2.456178665161133, - "logits/rejected": -2.4356391429901123, - "logps/chosen": -0.9076164960861206, - "logps/rejected": -1.0572835206985474, - "loss": 0.5795, - "nll_loss": 0.5539125800132751, - "rewards/accuracies": 0.5687500238418579, - "rewards/chosen": -0.04538082331418991, - "rewards/margins": 0.007483348250389099, - "rewards/rejected": -0.05286417528986931, + "log_odds_chosen": 0.2310989797115326, + "log_odds_ratio": -0.6607853770256042, + "logits/chosen": -3.0592639446258545, + "logits/rejected": -3.0972437858581543, + "logps/chosen": -0.90626060962677, + "logps/rejected": -1.057490587234497, + "loss": 0.5797, + "nll_loss": 0.5543950796127319, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.0453130342066288, + "rewards/margins": 0.007561509497463703, + "rewards/rejected": -0.05287454277276993, "step": 240 }, { "epoch": 0.26219192448872575, - "grad_norm": 2.3703558112076846, + "grad_norm": 2.2846935841220364, "learning_rate": 1.2649110640673518e-05, - "log_odds_chosen": 0.24735364317893982, - "log_odds_ratio": -0.6739610433578491, - "logits/chosen": -2.3664963245391846, - "logits/rejected": -2.3717617988586426, - "logps/chosen": -0.8910790681838989, - "logps/rejected": -1.0310931205749512, - "loss": 0.552, - "nll_loss": 0.521629810333252, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.044553957879543304, - "rewards/margins": 0.007000704295933247, - "rewards/rejected": -0.05155465751886368, + "log_odds_chosen": 0.24984344840049744, + "log_odds_ratio": -0.6764962077140808, + "logits/chosen": -3.0678868293762207, + "logits/rejected": -3.0685126781463623, + "logps/chosen": -0.8884732127189636, + "logps/rejected": -1.025420904159546, + "loss": 0.5498, + "nll_loss": 0.5219429731369019, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.0444236658513546, + "rewards/margins": 0.006847388111054897, + "rewards/rejected": -0.05127105116844177, "step": 250 }, { "epoch": 0.2726796014682748, - "grad_norm": 2.3126279019982494, + "grad_norm": 2.3800633619201523, "learning_rate": 1.2403473458920845e-05, - "log_odds_chosen": 0.21803805232048035, - "log_odds_ratio": -0.6705144047737122, - "logits/chosen": -2.3871326446533203, - "logits/rejected": -2.3607029914855957, - "logps/chosen": -0.8851995468139648, - "logps/rejected": -1.0210189819335938, - "loss": 0.5318, - "nll_loss": 0.45665669441223145, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.044259972870349884, - "rewards/margins": 0.006790975574404001, - "rewards/rejected": -0.05105094984173775, + "log_odds_chosen": 0.2426706850528717, + "log_odds_ratio": -0.6691194772720337, + "logits/chosen": -3.0950028896331787, + "logits/rejected": -3.112684488296509, + "logps/chosen": -0.8879591822624207, + "logps/rejected": -1.042834997177124, + "loss": 0.5302, + "nll_loss": 0.45519179105758667, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04439795762300491, + "rewards/margins": 0.00774379214271903, + "rewards/rejected": -0.05214175581932068, "step": 260 }, { "epoch": 0.2831672784478238, - "grad_norm": 3.0127014090338062, + "grad_norm": 2.3697586961370027, "learning_rate": 1.2171612389003691e-05, - "log_odds_chosen": 0.19388818740844727, - "log_odds_ratio": -0.6943486928939819, - "logits/chosen": -2.4198155403137207, - "logits/rejected": -2.3934123516082764, - "logps/chosen": -0.9466629028320312, - "logps/rejected": -1.087548017501831, - "loss": 0.5675, - "nll_loss": 0.5421209335327148, - "rewards/accuracies": 0.4937500059604645, - "rewards/chosen": -0.04733314737677574, - "rewards/margins": 0.007044260855764151, - "rewards/rejected": -0.05437741428613663, + "log_odds_chosen": 0.23119862377643585, + "log_odds_ratio": -0.6756153702735901, + "logits/chosen": -3.113889455795288, + "logits/rejected": -3.157740354537964, + "logps/chosen": -0.9564247131347656, + "logps/rejected": -1.1352105140686035, + "loss": 0.5654, + "nll_loss": 0.5433498024940491, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -0.04782123863697052, + "rewards/margins": 0.008939290419220924, + "rewards/rejected": -0.056760527193546295, "step": 270 }, { "epoch": 0.29365495542737285, - "grad_norm": 2.1321408503589745, + "grad_norm": 1.9757109026566833, "learning_rate": 1.1952286093343936e-05, - "log_odds_chosen": 0.23094406723976135, - "log_odds_ratio": -0.6691509485244751, - "logits/chosen": -2.3476357460021973, - "logits/rejected": -2.3334882259368896, - "logps/chosen": -0.9389116168022156, - "logps/rejected": -1.0817869901657104, - "loss": 0.5428, - "nll_loss": 0.4766770303249359, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.04694558307528496, - "rewards/margins": 0.007143768016248941, - "rewards/rejected": -0.05408934876322746, + "log_odds_chosen": 0.25132113695144653, + "log_odds_ratio": -0.6663895845413208, + "logits/chosen": -3.1407101154327393, + "logits/rejected": -3.1832191944122314, + "logps/chosen": -0.9308640360832214, + "logps/rejected": -1.087449312210083, + "loss": 0.5429, + "nll_loss": 0.4785974621772766, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.04654319956898689, + "rewards/margins": 0.007829269394278526, + "rewards/rejected": -0.05437246710062027, "step": 280 }, { "epoch": 0.30414263240692185, - "grad_norm": 2.9832356292712654, + "grad_norm": 2.7308236297418427, "learning_rate": 1.1744404390294071e-05, - "log_odds_chosen": 0.3523382842540741, - "log_odds_ratio": -0.6227424740791321, - "logits/chosen": -2.2994518280029297, - "logits/rejected": -2.2809882164001465, - "logps/chosen": -0.8515156507492065, - "logps/rejected": -1.0561182498931885, - "loss": 0.5582, - "nll_loss": 0.49160194396972656, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.04257578402757645, - "rewards/margins": 0.010230125859379768, - "rewards/rejected": -0.052805911749601364, + "log_odds_chosen": 0.35913094878196716, + "log_odds_ratio": -0.6187662482261658, + "logits/chosen": -3.0944533348083496, + "logits/rejected": -3.1177055835723877, + "logps/chosen": -0.8355825543403625, + "logps/rejected": -1.0572632551193237, + "loss": 0.5568, + "nll_loss": 0.48925265669822693, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.041779130697250366, + "rewards/margins": 0.011084041558206081, + "rewards/rejected": -0.052863169461488724, "step": 290 }, { "epoch": 0.3146303093864709, - "grad_norm": 2.455429454160177, + "grad_norm": 2.472653160364779, "learning_rate": 1.1547005383792517e-05, - "log_odds_chosen": 0.30407971143722534, - "log_odds_ratio": -0.6693702340126038, - "logits/chosen": -2.4093105792999268, - "logits/rejected": -2.360572099685669, - "logps/chosen": -0.8702648878097534, - "logps/rejected": -1.071603775024414, - "loss": 0.5817, - "nll_loss": 0.509266197681427, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.04351323843002319, - "rewards/margins": 0.010066945105791092, - "rewards/rejected": -0.053580187261104584, + "log_odds_chosen": 0.2816540598869324, + "log_odds_ratio": -0.6775935888290405, + "logits/chosen": -3.092194080352783, + "logits/rejected": -3.1420485973358154, + "logps/chosen": -0.8778301477432251, + "logps/rejected": -1.0717580318450928, + "loss": 0.5819, + "nll_loss": 0.5100258588790894, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.04389150068163872, + "rewards/margins": 0.009696396067738533, + "rewards/rejected": -0.0535879023373127, "step": 300 }, { "epoch": 0.3146303093864709, - "eval_log_odds_chosen": 0.2780136466026306, - "eval_log_odds_ratio": -0.650335431098938, - "eval_logits/chosen": -2.4818081855773926, - "eval_logits/rejected": -2.4498839378356934, - "eval_logps/chosen": -0.8807685971260071, - "eval_logps/rejected": -1.0628403425216675, - "eval_loss": 0.5571724772453308, - "eval_nll_loss": 0.5207235217094421, - "eval_rewards/accuracies": 0.6190476417541504, - "eval_rewards/chosen": -0.04403843358159065, - "eval_rewards/margins": 0.009103580377995968, - "eval_rewards/rejected": -0.053142011165618896, - "eval_runtime": 140.9657, - "eval_samples_per_second": 14.145, - "eval_steps_per_second": 0.447, + "eval_log_odds_chosen": 0.28298813104629517, + "eval_log_odds_ratio": -0.6463662981987, + "eval_logits/chosen": -3.1391000747680664, + "eval_logits/rejected": -3.1424100399017334, + "eval_logps/chosen": -0.8770027756690979, + "eval_logps/rejected": -1.0619502067565918, + "eval_loss": 0.5552015900611877, + "eval_nll_loss": 0.5201771259307861, + "eval_rewards/accuracies": 0.6289682388305664, + "eval_rewards/chosen": -0.043850142508745193, + "eval_rewards/margins": 0.00924737099558115, + "eval_rewards/rejected": -0.05309751257300377, + "eval_runtime": 141.1002, + "eval_samples_per_second": 14.132, + "eval_steps_per_second": 0.446, "step": 300 }, { "epoch": 0.3251179863660199, - "grad_norm": 2.1236305912642894, + "grad_norm": 2.038557141198459, "learning_rate": 1.1359236684941297e-05, - "log_odds_chosen": 0.2490301877260208, - "log_odds_ratio": -0.6818236112594604, - "logits/chosen": -2.438469409942627, - "logits/rejected": -2.4002931118011475, - "logps/chosen": -0.9081015586853027, - "logps/rejected": -1.0690175294876099, - "loss": 0.5876, - "nll_loss": 0.5490554571151733, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.045405078679323196, - "rewards/margins": 0.008045798167586327, - "rewards/rejected": -0.053450871258974075, + "log_odds_chosen": 0.1998841017484665, + "log_odds_ratio": -0.6875525116920471, + "logits/chosen": -3.0676262378692627, + "logits/rejected": -3.07094407081604, + "logps/chosen": -0.9092122912406921, + "logps/rejected": -1.0280473232269287, + "loss": 0.5844, + "nll_loss": 0.5417822599411011, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.04546061158180237, + "rewards/margins": 0.005941747687757015, + "rewards/rejected": -0.051402367651462555, "step": 310 }, { "epoch": 0.33560566334556896, - "grad_norm": 2.120713978353275, + "grad_norm": 2.262270965184679, "learning_rate": 1.118033988749895e-05, - "log_odds_chosen": 0.24050185084342957, - "log_odds_ratio": -0.6646271347999573, - "logits/chosen": -2.427072286605835, - "logits/rejected": -2.400460720062256, - "logps/chosen": -0.919741153717041, - "logps/rejected": -1.0707252025604248, - "loss": 0.5255, - "nll_loss": 0.4938685894012451, + "log_odds_chosen": 0.2705835700035095, + "log_odds_ratio": -0.6538633108139038, + "logits/chosen": -3.127427339553833, + "logits/rejected": -3.142587661743164, + "logps/chosen": -0.9069059491157532, + "logps/rejected": -1.0691728591918945, + "loss": 0.5242, + "nll_loss": 0.4929099977016449, "rewards/accuracies": 0.5562499761581421, - "rewards/chosen": -0.04598705843091011, - "rewards/margins": 0.0075491988100111485, - "rewards/rejected": -0.05353625863790512, + "rewards/chosen": -0.04534530267119408, + "rewards/margins": 0.008113345131278038, + "rewards/rejected": -0.05345864221453667, "step": 320 }, { "epoch": 0.34609334032511796, - "grad_norm": 3.9130583978149622, + "grad_norm": 2.4122464498293623, "learning_rate": 1.1009637651263608e-05, - "log_odds_chosen": 0.25334832072257996, - "log_odds_ratio": -0.6984423995018005, - "logits/chosen": -2.404737949371338, - "logits/rejected": -2.3937125205993652, - "logps/chosen": -0.9015901684761047, - "logps/rejected": -1.0603028535842896, - "loss": 0.5557, - "nll_loss": 0.5412198305130005, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.04507950693368912, - "rewards/margins": 0.007935632951557636, - "rewards/rejected": -0.05301513522863388, + "log_odds_chosen": 0.23684370517730713, + "log_odds_ratio": -0.7030869722366333, + "logits/chosen": -3.0819878578186035, + "logits/rejected": -3.1327972412109375, + "logps/chosen": -0.9059860110282898, + "logps/rejected": -1.0601646900177002, + "loss": 0.5547, + "nll_loss": 0.5366790890693665, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.04529929906129837, + "rewards/margins": 0.007708935532718897, + "rewards/rejected": -0.05300822854042053, "step": 330 }, { "epoch": 0.356581017304667, - "grad_norm": 2.354938087613489, + "grad_norm": 2.3793498474146535, "learning_rate": 1.0846522890932809e-05, - "log_odds_chosen": 0.17314568161964417, - "log_odds_ratio": -0.6990125775337219, - "logits/chosen": -2.3741681575775146, - "logits/rejected": -2.372586727142334, - "logps/chosen": -0.8716468811035156, - "logps/rejected": -0.989061713218689, - "loss": 0.5708, - "nll_loss": 0.5135380029678345, - "rewards/accuracies": 0.5375000238418579, - "rewards/chosen": -0.04358234256505966, - "rewards/margins": 0.005870741792023182, - "rewards/rejected": -0.04945308715105057, + "log_odds_chosen": 0.18786638975143433, + "log_odds_ratio": -0.6986292004585266, + "logits/chosen": -3.0940568447113037, + "logits/rejected": -3.1512954235076904, + "logps/chosen": -0.8602282404899597, + "logps/rejected": -0.9875131845474243, + "loss": 0.5702, + "nll_loss": 0.5145949125289917, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.043011412024497986, + "rewards/margins": 0.0063642458990216255, + "rewards/rejected": -0.049375660717487335, "step": 340 }, { "epoch": 0.36706869428421607, - "grad_norm": 2.2044319965087382, + "grad_norm": 2.3420960793915517, "learning_rate": 1.0690449676496977e-05, - "log_odds_chosen": 0.24199283123016357, - "log_odds_ratio": -0.687169075012207, - "logits/chosen": -2.426055908203125, - "logits/rejected": -2.37978196144104, - "logps/chosen": -0.8775957226753235, - "logps/rejected": -1.019217848777771, - "loss": 0.5463, - "nll_loss": 0.5177103281021118, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.04387979209423065, - "rewards/margins": 0.007081110030412674, - "rewards/rejected": -0.05096089839935303, + "log_odds_chosen": 0.2689460217952728, + "log_odds_ratio": -0.6845754384994507, + "logits/chosen": -3.1326746940612793, + "logits/rejected": -3.1552205085754395, + "logps/chosen": -0.8725005984306335, + "logps/rejected": -1.0421197414398193, + "loss": 0.5462, + "nll_loss": 0.5172144174575806, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.043625034391880035, + "rewards/margins": 0.008480949327349663, + "rewards/rejected": -0.05210598558187485, "step": 350 }, { "epoch": 0.37755637126376507, - "grad_norm": 1.951247314132421, + "grad_norm": 2.014589871880686, "learning_rate": 1.0540925533894598e-05, - "log_odds_chosen": 0.37273699045181274, - "log_odds_ratio": -0.6097368001937866, - "logits/chosen": -2.3991737365722656, - "logits/rejected": -2.3913745880126953, - "logps/chosen": -0.8743513226509094, - "logps/rejected": -1.1015660762786865, - "loss": 0.5509, - "nll_loss": 0.5144286155700684, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.04371756687760353, - "rewards/margins": 0.011360744014382362, - "rewards/rejected": -0.05507831647992134, + "log_odds_chosen": 0.37792789936065674, + "log_odds_ratio": -0.6156649589538574, + "logits/chosen": -3.010802745819092, + "logits/rejected": -3.042652130126953, + "logps/chosen": -0.8830682635307312, + "logps/rejected": -1.118240237236023, + "loss": 0.5497, + "nll_loss": 0.5099813938140869, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.04415341466665268, + "rewards/margins": 0.011758595705032349, + "rewards/rejected": -0.05591200664639473, "step": 360 }, { "epoch": 0.3880440482433141, - "grad_norm": 2.1160835291077307, + "grad_norm": 2.0494786838330903, "learning_rate": 1.0397504898200728e-05, - "log_odds_chosen": 0.37601083517074585, - "log_odds_ratio": -0.6155336499214172, - "logits/chosen": -2.4748804569244385, - "logits/rejected": -2.4376637935638428, - "logps/chosen": -0.8649997711181641, - "logps/rejected": -1.1139612197875977, - "loss": 0.5205, - "nll_loss": 0.502615749835968, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.043249987065792084, - "rewards/margins": 0.012448069639503956, - "rewards/rejected": -0.05569805949926376, + "log_odds_chosen": 0.37991228699684143, + "log_odds_ratio": -0.6151097416877747, + "logits/chosen": -3.071289539337158, + "logits/rejected": -3.0840888023376465, + "logps/chosen": -0.863991379737854, + "logps/rejected": -1.1161118745803833, + "loss": 0.5195, + "nll_loss": 0.4998775124549866, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.0431995615363121, + "rewards/margins": 0.012606029398739338, + "rewards/rejected": -0.055805593729019165, "step": 370 }, { "epoch": 0.3985317252228631, - "grad_norm": 2.226775744348268, + "grad_norm": 2.3440751758332294, "learning_rate": 1.0259783520851543e-05, - "log_odds_chosen": 0.429561048746109, - "log_odds_ratio": -0.5968413949012756, - "logits/chosen": -2.519869089126587, - "logits/rejected": -2.494752883911133, - "logps/chosen": -0.8703508377075195, - "logps/rejected": -1.1160125732421875, - "loss": 0.5374, - "nll_loss": 0.5153257846832275, - "rewards/accuracies": 0.65625, - "rewards/chosen": -0.043517544865608215, - "rewards/margins": 0.012283083982765675, - "rewards/rejected": -0.05580062419176102, + "log_odds_chosen": 0.4805373549461365, + "log_odds_ratio": -0.5845500230789185, + "logits/chosen": -3.1311728954315186, + "logits/rejected": -3.168400287628174, + "logps/chosen": -0.8546767234802246, + "logps/rejected": -1.1352304220199585, + "loss": 0.5371, + "nll_loss": 0.5167530179023743, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.04273384064435959, + "rewards/margins": 0.014027683064341545, + "rewards/rejected": -0.056761521846055984, "step": 380 }, { "epoch": 0.4090194022024122, - "grad_norm": 2.401246607233204, + "grad_norm": 2.50155675830033, "learning_rate": 1.0127393670836667e-05, - "log_odds_chosen": 0.08164841681718826, - "log_odds_ratio": -0.730138897895813, - "logits/chosen": -2.456601619720459, - "logits/rejected": -2.4586360454559326, - "logps/chosen": -0.9149462580680847, - "logps/rejected": -0.9739354848861694, - "loss": 0.5576, - "nll_loss": 0.5350494384765625, - "rewards/accuracies": 0.543749988079071, - "rewards/chosen": -0.045747317373752594, - "rewards/margins": 0.0029494580812752247, - "rewards/rejected": -0.048696767538785934, + "log_odds_chosen": 0.0912429466843605, + "log_odds_ratio": -0.7177212238311768, + "logits/chosen": -3.1054975986480713, + "logits/rejected": -3.1308093070983887, + "logps/chosen": -0.9102872014045715, + "logps/rejected": -0.9754246473312378, + "loss": 0.5574, + "nll_loss": 0.5331951379776001, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.045514363795518875, + "rewards/margins": 0.003256872994825244, + "rewards/rejected": -0.04877123609185219, "step": 390 }, { "epoch": 0.4195070791819612, - "grad_norm": 1.9835782676616263, + "grad_norm": 2.027467517514936, "learning_rate": 1e-05, - "log_odds_chosen": 0.24804361164569855, - "log_odds_ratio": -0.6891428232192993, - "logits/chosen": -2.352999210357666, - "logits/rejected": -2.3628151416778564, - "logps/chosen": -0.9478782415390015, - "logps/rejected": -1.1192692518234253, - "loss": 0.5724, - "nll_loss": 0.5249911546707153, - "rewards/accuracies": 0.53125, - "rewards/chosen": -0.047393910586833954, - "rewards/margins": 0.008569559082388878, - "rewards/rejected": -0.05596347525715828, + "log_odds_chosen": 0.2633103132247925, + "log_odds_ratio": -0.6879682540893555, + "logits/chosen": -3.0087058544158936, + "logits/rejected": -3.0386600494384766, + "logps/chosen": -0.9468951225280762, + "logps/rejected": -1.1236045360565186, + "loss": 0.5738, + "nll_loss": 0.527585506439209, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.04734475538134575, + "rewards/margins": 0.008835467509925365, + "rewards/rejected": -0.05618022754788399, "step": 400 }, { "epoch": 0.4195070791819612, - "eval_log_odds_chosen": 0.2819042503833771, - "eval_log_odds_ratio": -0.6550887227058411, - "eval_logits/chosen": -2.4376399517059326, - "eval_logits/rejected": -2.4026126861572266, - "eval_logps/chosen": -0.8510361313819885, - "eval_logps/rejected": -1.029338002204895, - "eval_loss": 0.5415622591972351, - "eval_nll_loss": 0.5060027837753296, - "eval_rewards/accuracies": 0.625, - "eval_rewards/chosen": -0.042551808059215546, - "eval_rewards/margins": 0.008915101177990437, - "eval_rewards/rejected": -0.051466912031173706, - "eval_runtime": 135.9814, - "eval_samples_per_second": 14.664, + "eval_log_odds_chosen": 0.2960740923881531, + "eval_log_odds_ratio": -0.6521593332290649, + "eval_logits/chosen": -3.1019551753997803, + "eval_logits/rejected": -3.1026368141174316, + "eval_logps/chosen": -0.8433709740638733, + "eval_logps/rejected": -1.0346297025680542, + "eval_loss": 0.5411269664764404, + "eval_nll_loss": 0.5047088265419006, + "eval_rewards/accuracies": 0.6289682388305664, + "eval_rewards/chosen": -0.042168550193309784, + "eval_rewards/margins": 0.00956293661147356, + "eval_rewards/rejected": -0.05173148587346077, + "eval_runtime": 135.94, + "eval_samples_per_second": 14.668, "eval_steps_per_second": 0.463, "step": 400 }, { "epoch": 0.4299947561615102, - "grad_norm": 2.0741408388417053, + "grad_norm": 2.077556227084633, "learning_rate": 9.877295966495898e-06, - "log_odds_chosen": 0.14674368500709534, - "log_odds_ratio": -0.7315293550491333, - "logits/chosen": -2.453657388687134, - "logits/rejected": -2.4033920764923096, - "logps/chosen": -0.8739027976989746, - "logps/rejected": -0.9881707429885864, - "loss": 0.546, - "nll_loss": 0.48288026452064514, - "rewards/accuracies": 0.5562499761581421, - "rewards/chosen": -0.04369514063000679, - "rewards/margins": 0.0057133943773806095, - "rewards/rejected": -0.04940853267908096, + "log_odds_chosen": 0.1433972865343094, + "log_odds_ratio": -0.7417241930961609, + "logits/chosen": -3.147104024887085, + "logits/rejected": -3.1611135005950928, + "logps/chosen": -0.8865131139755249, + "logps/rejected": -0.9979325532913208, + "loss": 0.5454, + "nll_loss": 0.4825812876224518, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.044325657188892365, + "rewards/margins": 0.005570969078689814, + "rewards/rejected": -0.04989662766456604, "step": 410 }, { "epoch": 0.4404824331410593, - "grad_norm": 1.862967371631046, + "grad_norm": 1.9177361456178337, "learning_rate": 9.759000729485331e-06, - "log_odds_chosen": 0.3599195182323456, - "log_odds_ratio": -0.6281547546386719, - "logits/chosen": -2.3640646934509277, - "logits/rejected": -2.3699867725372314, - "logps/chosen": -0.8427717089653015, - "logps/rejected": -1.0523298978805542, - "loss": 0.5338, - "nll_loss": 0.475394070148468, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.042138583958148956, - "rewards/margins": 0.01047790888696909, - "rewards/rejected": -0.05261648818850517, + "log_odds_chosen": 0.2965100407600403, + "log_odds_ratio": -0.6552795171737671, + "logits/chosen": -3.065213203430176, + "logits/rejected": -3.106889247894287, + "logps/chosen": -0.8926699757575989, + "logps/rejected": -1.073974609375, + "loss": 0.5349, + "nll_loss": 0.47521886229515076, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.04463350027799606, + "rewards/margins": 0.009065226651728153, + "rewards/rejected": -0.05369872972369194, "step": 420 }, { "epoch": 0.4509701101206083, - "grad_norm": 2.2577027347270673, + "grad_norm": 2.2675621915351503, "learning_rate": 9.644856443408244e-06, - "log_odds_chosen": 0.2772213816642761, - "log_odds_ratio": -0.6547843217849731, - "logits/chosen": -2.463442325592041, - "logits/rejected": -2.4424116611480713, - "logps/chosen": -0.8533428311347961, - "logps/rejected": -1.0268352031707764, - "loss": 0.5561, - "nll_loss": 0.5445196628570557, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.04266713932156563, - "rewards/margins": 0.008674620650708675, - "rewards/rejected": -0.051341764628887177, + "log_odds_chosen": 0.29174235463142395, + "log_odds_ratio": -0.6506129503250122, + "logits/chosen": -3.075723648071289, + "logits/rejected": -3.0862226486206055, + "logps/chosen": -0.8427901268005371, + "logps/rejected": -1.0184295177459717, + "loss": 0.5557, + "nll_loss": 0.5429800152778625, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.04213951155543327, + "rewards/margins": 0.008781969547271729, + "rewards/rejected": -0.050921481102705, "step": 430 }, { "epoch": 0.46145778710015734, - "grad_norm": 2.148366110891132, + "grad_norm": 2.048479923586714, "learning_rate": 9.534625892455923e-06, - "log_odds_chosen": 0.251740038394928, - "log_odds_ratio": -0.6593549847602844, - "logits/chosen": -2.433262586593628, - "logits/rejected": -2.400451183319092, - "logps/chosen": -0.869005560874939, - "logps/rejected": -1.0262689590454102, - "loss": 0.5514, - "nll_loss": 0.518151044845581, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.043450284749269485, - "rewards/margins": 0.007863158360123634, - "rewards/rejected": -0.05131344124674797, + "log_odds_chosen": 0.2715272009372711, + "log_odds_ratio": -0.6504871249198914, + "logits/chosen": -3.114889144897461, + "logits/rejected": -3.1430869102478027, + "logps/chosen": -0.8674638867378235, + "logps/rejected": -1.0402857065200806, + "loss": 0.5502, + "nll_loss": 0.5185979604721069, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.04337319731712341, + "rewards/margins": 0.008641095831990242, + "rewards/rejected": -0.05201428383588791, "step": 440 }, { "epoch": 0.47194546407970633, - "grad_norm": 1.9641531890945303, + "grad_norm": 1.9700303764265876, "learning_rate": 9.428090415820635e-06, - "log_odds_chosen": 0.3584665358066559, - "log_odds_ratio": -0.6613593101501465, - "logits/chosen": -2.3730902671813965, - "logits/rejected": -2.3335137367248535, - "logps/chosen": -0.8309770822525024, - "logps/rejected": -1.0618839263916016, - "loss": 0.5284, - "nll_loss": 0.4951680600643158, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.041548848152160645, - "rewards/margins": 0.011545347049832344, - "rewards/rejected": -0.05309419706463814, + "log_odds_chosen": 0.37898144125938416, + "log_odds_ratio": -0.6548101305961609, + "logits/chosen": -3.141404390335083, + "logits/rejected": -3.1785435676574707, + "logps/chosen": -0.8289934396743774, + "logps/rejected": -1.080649733543396, + "loss": 0.5278, + "nll_loss": 0.49574679136276245, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.04144967347383499, + "rewards/margins": 0.01258282084017992, + "rewards/rejected": -0.05403248593211174, "step": 450 }, { "epoch": 0.4824331410592554, - "grad_norm": 2.1323921754635955, + "grad_norm": 2.1444885294890796, "learning_rate": 9.325048082403139e-06, - "log_odds_chosen": 0.18219377100467682, - "log_odds_ratio": -0.7052776217460632, - "logits/chosen": -2.417771577835083, - "logits/rejected": -2.391197681427002, - "logps/chosen": -0.9514438509941101, - "logps/rejected": -1.081947922706604, - "loss": 0.532, - "nll_loss": 0.5332220792770386, - "rewards/accuracies": 0.5687500238418579, - "rewards/chosen": -0.047572195529937744, - "rewards/margins": 0.006525200791656971, - "rewards/rejected": -0.05409740284085274, + "log_odds_chosen": 0.21225424110889435, + "log_odds_ratio": -0.6999707221984863, + "logits/chosen": -3.110089063644409, + "logits/rejected": -3.1592323780059814, + "logps/chosen": -0.947162926197052, + "logps/rejected": -1.1105449199676514, + "loss": 0.5315, + "nll_loss": 0.5339683890342712, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.04735814779996872, + "rewards/margins": 0.008169097825884819, + "rewards/rejected": -0.05552724748849869, "step": 460 }, { "epoch": 0.4929208180388044, - "grad_norm": 2.0935836198507145, + "grad_norm": 2.1649660190560613, "learning_rate": 9.225312080288851e-06, - "log_odds_chosen": 0.2585422098636627, - "log_odds_ratio": -0.681999683380127, - "logits/chosen": -2.4441823959350586, - "logits/rejected": -2.418107271194458, - "logps/chosen": -0.8849735260009766, - "logps/rejected": -1.0435359477996826, - "loss": 0.5416, - "nll_loss": 0.5094045400619507, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.04424867779016495, - "rewards/margins": 0.007928118109703064, - "rewards/rejected": -0.05217679589986801, + "log_odds_chosen": 0.2549912929534912, + "log_odds_ratio": -0.6857655644416809, + "logits/chosen": -3.0928080081939697, + "logits/rejected": -3.1287431716918945, + "logps/chosen": -0.8865912556648254, + "logps/rejected": -1.050857663154602, + "loss": 0.5421, + "nll_loss": 0.5101572275161743, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.044329557567834854, + "rewards/margins": 0.008213317021727562, + "rewards/rejected": -0.052542876452207565, "step": 470 }, { "epoch": 0.5034084950183534, - "grad_norm": 2.077184991073431, + "grad_norm": 1.89898044344756, "learning_rate": 9.12870929175277e-06, - "log_odds_chosen": 0.1411927044391632, - "log_odds_ratio": -0.7211004495620728, - "logits/chosen": -2.478450298309326, - "logits/rejected": -2.4527339935302734, - "logps/chosen": -0.8615080118179321, - "logps/rejected": -0.9589959979057312, - "loss": 0.5176, - "nll_loss": 0.5134377479553223, + "log_odds_chosen": 0.18933558464050293, + "log_odds_ratio": -0.7031041383743286, + "logits/chosen": -3.1588873863220215, + "logits/rejected": -3.1968955993652344, + "logps/chosen": -0.8558489680290222, + "logps/rejected": -0.980047881603241, + "loss": 0.5174, + "nll_loss": 0.5121264457702637, "rewards/accuracies": 0.543749988079071, - "rewards/chosen": -0.04307539761066437, - "rewards/margins": 0.004874409642070532, - "rewards/rejected": -0.04794980585575104, + "rewards/chosen": -0.04279245063662529, + "rewards/margins": 0.006209943443536758, + "rewards/rejected": -0.04900239408016205, "step": 480 }, { "epoch": 0.5138961719979025, - "grad_norm": 1.9047433825748046, + "grad_norm": 1.9212510076087481, "learning_rate": 9.035079029052514e-06, - "log_odds_chosen": 0.20130577683448792, - "log_odds_ratio": -0.687514066696167, - "logits/chosen": -2.404505491256714, - "logits/rejected": -2.3535995483398438, - "logps/chosen": -0.9324936866760254, - "logps/rejected": -1.036684274673462, - "loss": 0.54, - "nll_loss": 0.5031000375747681, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.04662468656897545, - "rewards/margins": 0.0052095321007072926, - "rewards/rejected": -0.05183422565460205, + "log_odds_chosen": 0.23131313920021057, + "log_odds_ratio": -0.6693936586380005, + "logits/chosen": -3.094421625137329, + "logits/rejected": -3.1039950847625732, + "logps/chosen": -0.9284296035766602, + "logps/rejected": -1.0470894575119019, + "loss": 0.5391, + "nll_loss": 0.5019217729568481, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.04642148315906525, + "rewards/margins": 0.005932994186878204, + "rewards/rejected": -0.05235447734594345, "step": 490 }, { "epoch": 0.5243838489774515, - "grad_norm": 2.2722995091864013, + "grad_norm": 2.197524211966931, "learning_rate": 8.94427190999916e-06, - "log_odds_chosen": 0.20798742771148682, - "log_odds_ratio": -0.6965998411178589, - "logits/chosen": -2.503917694091797, - "logits/rejected": -2.4567532539367676, - "logps/chosen": -0.882551372051239, - "logps/rejected": -1.009610652923584, - "loss": 0.5486, - "nll_loss": 0.5240460634231567, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.04412757605314255, - "rewards/margins": 0.00635296106338501, - "rewards/rejected": -0.05048053711652756, + "log_odds_chosen": 0.2233821153640747, + "log_odds_ratio": -0.6923887729644775, + "logits/chosen": -3.0647079944610596, + "logits/rejected": -3.0620505809783936, + "logps/chosen": -0.8755196332931519, + "logps/rejected": -1.0028659105300903, + "loss": 0.5478, + "nll_loss": 0.5219477415084839, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.04377598315477371, + "rewards/margins": 0.0063673085533082485, + "rewards/rejected": -0.0501432940363884, "step": 500 }, { "epoch": 0.5243838489774515, - "eval_log_odds_chosen": 0.319318950176239, - "eval_log_odds_ratio": -0.6438891291618347, - "eval_logits/chosen": -2.471830368041992, - "eval_logits/rejected": -2.437340021133423, - "eval_logps/chosen": -0.8492264151573181, - "eval_logps/rejected": -1.0513862371444702, - "eval_loss": 0.5343749523162842, - "eval_nll_loss": 0.49899229407310486, - "eval_rewards/accuracies": 0.6150793433189392, - "eval_rewards/chosen": -0.042461320757865906, - "eval_rewards/margins": 0.010107995010912418, - "eval_rewards/rejected": -0.05256931483745575, - "eval_runtime": 137.8752, - "eval_samples_per_second": 14.462, - "eval_steps_per_second": 0.457, + "eval_log_odds_chosen": 0.33266139030456543, + "eval_log_odds_ratio": -0.6382430791854858, + "eval_logits/chosen": -3.028609275817871, + "eval_logits/rejected": -3.0259969234466553, + "eval_logps/chosen": -0.8414799571037292, + "eval_logps/rejected": -1.0509231090545654, + "eval_loss": 0.5319445133209229, + "eval_nll_loss": 0.49702468514442444, + "eval_rewards/accuracies": 0.6289682388305664, + "eval_rewards/chosen": -0.04207399860024452, + "eval_rewards/margins": 0.010472159832715988, + "eval_rewards/rejected": -0.05254615470767021, + "eval_runtime": 136.7326, + "eval_samples_per_second": 14.583, + "eval_steps_per_second": 0.461, "step": 500 }, { "epoch": 0.5348715259570005, - "grad_norm": 1.7543648883606602, + "grad_norm": 1.7639475332504142, "learning_rate": 8.856148855400955e-06, - "log_odds_chosen": 0.290159672498703, - "log_odds_ratio": -0.6539579629898071, - "logits/chosen": -2.5112712383270264, - "logits/rejected": -2.4847443103790283, - "logps/chosen": -0.8425674438476562, - "logps/rejected": -1.0141561031341553, - "loss": 0.5278, - "nll_loss": 0.5365942120552063, + "log_odds_chosen": 0.29167047142982483, + "log_odds_ratio": -0.648201048374176, + "logits/chosen": -3.0114383697509766, + "logits/rejected": -3.024693250656128, + "logps/chosen": -0.841100811958313, + "logps/rejected": -1.0192333459854126, + "loss": 0.5263, + "nll_loss": 0.5350626111030579, "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.04212837293744087, - "rewards/margins": 0.00857943668961525, - "rewards/rejected": -0.05070780962705612, + "rewards/chosen": -0.04205504059791565, + "rewards/margins": 0.00890662893652916, + "rewards/rejected": -0.05096167325973511, "step": 510 }, { "epoch": 0.5453592029365496, - "grad_norm": 1.6585367227101162, + "grad_norm": 1.6884098835310988, "learning_rate": 8.770580193070294e-06, - "log_odds_chosen": 0.23928451538085938, - "log_odds_ratio": -0.6756108999252319, - "logits/chosen": -2.4492619037628174, - "logits/rejected": -2.413327693939209, - "logps/chosen": -0.9059408903121948, - "logps/rejected": -1.068650245666504, - "loss": 0.5372, - "nll_loss": 0.47487586736679077, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.0452970452606678, - "rewards/margins": 0.008135473355650902, - "rewards/rejected": -0.053432513028383255, + "log_odds_chosen": 0.24579331278800964, + "log_odds_ratio": -0.6814862489700317, + "logits/chosen": -3.016019582748413, + "logits/rejected": -3.0255684852600098, + "logps/chosen": -0.9082791209220886, + "logps/rejected": -1.0769283771514893, + "loss": 0.5369, + "nll_loss": 0.47502464056015015, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.04541395604610443, + "rewards/margins": 0.008432453498244286, + "rewards/rejected": -0.053846411406993866, "step": 520 }, { "epoch": 0.5558468799160986, - "grad_norm": 2.041930709602407, + "grad_norm": 1.7588436164574766, "learning_rate": 8.687444855261389e-06, - "log_odds_chosen": 0.4141673445701599, - "log_odds_ratio": -0.6465325355529785, - "logits/chosen": -2.50728178024292, - "logits/rejected": -2.4843533039093018, - "logps/chosen": -0.828266978263855, - "logps/rejected": -1.111327886581421, - "loss": 0.5372, - "nll_loss": 0.4500916004180908, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.04141335189342499, - "rewards/margins": 0.014153043739497662, - "rewards/rejected": -0.055566392838954926, + "log_odds_chosen": 0.39766445755958557, + "log_odds_ratio": -0.6521557569503784, + "logits/chosen": -3.0906691551208496, + "logits/rejected": -3.1090755462646484, + "logps/chosen": -0.8297191858291626, + "logps/rejected": -1.1049801111221313, + "loss": 0.5364, + "nll_loss": 0.450814813375473, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.04148596152663231, + "rewards/margins": 0.01376304216682911, + "rewards/rejected": -0.05524900555610657, "step": 530 }, { "epoch": 0.5663345568956476, - "grad_norm": 1.8588001511511827, + "grad_norm": 1.9397603724841295, "learning_rate": 8.606629658238705e-06, - "log_odds_chosen": 0.1719091385602951, - "log_odds_ratio": -0.697492241859436, - "logits/chosen": -2.500349760055542, - "logits/rejected": -2.4829323291778564, - "logps/chosen": -0.8647764325141907, - "logps/rejected": -0.9715200662612915, - "loss": 0.553, - "nll_loss": 0.526767373085022, - "rewards/accuracies": 0.5687500238418579, - "rewards/chosen": -0.04323882237076759, - "rewards/margins": 0.005337181035429239, - "rewards/rejected": -0.048576001077890396, + "log_odds_chosen": 0.15624158084392548, + "log_odds_ratio": -0.7059566378593445, + "logits/chosen": -3.0063095092773438, + "logits/rejected": -3.0354349613189697, + "logps/chosen": -0.8621616363525391, + "logps/rejected": -0.9609626531600952, + "loss": 0.5526, + "nll_loss": 0.5280291438102722, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.04310808330774307, + "rewards/margins": 0.0049400487914681435, + "rewards/rejected": -0.04804813116788864, "step": 540 }, { "epoch": 0.5768222338751966, - "grad_norm": 1.9151013438807294, + "grad_norm": 1.9970251061131588, "learning_rate": 8.528028654224417e-06, - "log_odds_chosen": 0.41694098711013794, - "log_odds_ratio": -0.6211504936218262, - "logits/chosen": -2.526711940765381, - "logits/rejected": -2.488142490386963, - "logps/chosen": -0.866176426410675, - "logps/rejected": -1.1338094472885132, - "loss": 0.5372, - "nll_loss": 0.5370919704437256, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.04330882430076599, - "rewards/margins": 0.013381647877395153, - "rewards/rejected": -0.05669047310948372, + "log_odds_chosen": 0.3964000940322876, + "log_odds_ratio": -0.6276581883430481, + "logits/chosen": -3.051056385040283, + "logits/rejected": -3.0628600120544434, + "logps/chosen": -0.8477095365524292, + "logps/rejected": -1.090545415878296, + "loss": 0.5377, + "nll_loss": 0.5382589101791382, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.04238547384738922, + "rewards/margins": 0.012141798622906208, + "rewards/rejected": -0.05452727526426315, "step": 550 }, { "epoch": 0.5873099108547457, - "grad_norm": 1.9475996513575733, + "grad_norm": 1.9451374983545444, "learning_rate": 8.451542547285167e-06, - "log_odds_chosen": 0.23696064949035645, - "log_odds_ratio": -0.6743646860122681, - "logits/chosen": -2.518937110900879, - "logits/rejected": -2.490901470184326, - "logps/chosen": -0.8790968060493469, - "logps/rejected": -1.026903748512268, + "log_odds_chosen": 0.24946291744709015, + "log_odds_ratio": -0.6731950044631958, + "logits/chosen": -3.09270977973938, + "logits/rejected": -3.1291451454162598, + "logps/chosen": -0.8785122632980347, + "logps/rejected": -1.0384708642959595, "loss": 0.5214, - "nll_loss": 0.5015530586242676, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.043954841792583466, - "rewards/margins": 0.007390348706394434, - "rewards/rejected": -0.05134518817067146, + "nll_loss": 0.5020500421524048, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.04392561689019203, + "rewards/margins": 0.007997924461960793, + "rewards/rejected": -0.05192355066537857, "step": 560 }, { "epoch": 0.5977975878342947, - "grad_norm": 1.8968936654846589, + "grad_norm": 2.015759366014609, "learning_rate": 8.37707816583391e-06, - "log_odds_chosen": 0.1709347516298294, - "log_odds_ratio": -0.721364438533783, - "logits/chosen": -2.5332372188568115, - "logits/rejected": -2.5029869079589844, - "logps/chosen": -0.8752782940864563, - "logps/rejected": -0.9968281984329224, - "loss": 0.5044, - "nll_loss": 0.528136134147644, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.043763916939496994, - "rewards/margins": 0.006077499594539404, - "rewards/rejected": -0.04984141141176224, + "log_odds_chosen": 0.1689465194940567, + "log_odds_ratio": -0.7204016447067261, + "logits/chosen": -3.082165241241455, + "logits/rejected": -3.113685369491577, + "logps/chosen": -0.8903343081474304, + "logps/rejected": -1.0027625560760498, + "loss": 0.5039, + "nll_loss": 0.5279403924942017, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": -0.04451671987771988, + "rewards/margins": 0.0056214118376374245, + "rewards/rejected": -0.05013813450932503, "step": 570 }, { "epoch": 0.6082852648138437, - "grad_norm": 1.901568677283731, + "grad_norm": 1.8532059123988396, "learning_rate": 8.304547985373997e-06, - "log_odds_chosen": 0.29886722564697266, - "log_odds_ratio": -0.6539247632026672, - "logits/chosen": -2.5251448154449463, - "logits/rejected": -2.5202994346618652, - "logps/chosen": -0.8657291531562805, - "logps/rejected": -1.0712764263153076, - "loss": 0.5461, - "nll_loss": 0.48294153809547424, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.04328645393252373, - "rewards/margins": 0.01027736347168684, - "rewards/rejected": -0.05356382206082344, + "log_odds_chosen": 0.27719905972480774, + "log_odds_ratio": -0.6604655385017395, + "logits/chosen": -3.164926528930664, + "logits/rejected": -3.1809298992156982, + "logps/chosen": -0.8681858777999878, + "logps/rejected": -1.0584015846252441, + "loss": 0.5449, + "nll_loss": 0.48173967003822327, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.04340929910540581, + "rewards/margins": 0.009510790929198265, + "rewards/rejected": -0.052920084446668625, "step": 580 }, { "epoch": 0.6187729417933928, - "grad_norm": 1.9902242754931676, + "grad_norm": 1.9696416884513863, "learning_rate": 8.233869695926184e-06, - "log_odds_chosen": 0.33919957280158997, - "log_odds_ratio": -0.6699340343475342, - "logits/chosen": -2.56527042388916, - "logits/rejected": -2.572580575942993, - "logps/chosen": -0.8352983593940735, - "logps/rejected": -1.0507652759552002, - "loss": 0.5138, - "nll_loss": 0.5514861345291138, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.041764914989471436, - "rewards/margins": 0.010773347690701485, - "rewards/rejected": -0.05253826454281807, + "log_odds_chosen": 0.3565579056739807, + "log_odds_ratio": -0.6653521656990051, + "logits/chosen": -3.1371326446533203, + "logits/rejected": -3.1804890632629395, + "logps/chosen": -0.8285515904426575, + "logps/rejected": -1.060605764389038, + "loss": 0.5115, + "nll_loss": 0.5481864213943481, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.04142758250236511, + "rewards/margins": 0.011602701619267464, + "rewards/rejected": -0.05303028225898743, "step": 590 }, { "epoch": 0.6292606187729418, - "grad_norm": 1.9968521848986331, + "grad_norm": 2.0728707870222607, "learning_rate": 8.164965809277262e-06, - "log_odds_chosen": 0.3369660973548889, - "log_odds_ratio": -0.6556235551834106, - "logits/chosen": -2.563744306564331, - "logits/rejected": -2.5540928840637207, - "logps/chosen": -0.8338971138000488, - "logps/rejected": -1.0589314699172974, - "loss": 0.5156, - "nll_loss": 0.4856153130531311, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.04169485345482826, - "rewards/margins": 0.011251723393797874, - "rewards/rejected": -0.052946578711271286, + "log_odds_chosen": 0.3636320233345032, + "log_odds_ratio": -0.6437779664993286, + "logits/chosen": -3.155708074569702, + "logits/rejected": -3.155524492263794, + "logps/chosen": -0.8240157961845398, + "logps/rejected": -1.06477952003479, + "loss": 0.5146, + "nll_loss": 0.4843020439147949, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.04120079427957535, + "rewards/margins": 0.012038188055157661, + "rewards/rejected": -0.05323898047208786, "step": 600 }, { "epoch": 0.6292606187729418, - "eval_log_odds_chosen": 0.30560463666915894, - "eval_log_odds_ratio": -0.6469583511352539, - "eval_logits/chosen": -2.581132173538208, - "eval_logits/rejected": -2.555058717727661, - "eval_logps/chosen": -0.8332868218421936, - "eval_logps/rejected": -1.0284805297851562, - "eval_loss": 0.5242142677307129, - "eval_nll_loss": 0.48822054266929626, - "eval_rewards/accuracies": 0.6150793433189392, - "eval_rewards/chosen": -0.04166434332728386, - "eval_rewards/margins": 0.009759685955941677, - "eval_rewards/rejected": -0.05142403393983841, - "eval_runtime": 136.8777, - "eval_samples_per_second": 14.568, - "eval_steps_per_second": 0.46, + "eval_log_odds_chosen": 0.312126487493515, + "eval_log_odds_ratio": -0.6417948603630066, + "eval_logits/chosen": -3.127530336380005, + "eval_logits/rejected": -3.1324751377105713, + "eval_logps/chosen": -0.8164808750152588, + "eval_logps/rejected": -1.016471028327942, + "eval_loss": 0.5239931344985962, + "eval_nll_loss": 0.4882962703704834, + "eval_rewards/accuracies": 0.6230158805847168, + "eval_rewards/chosen": -0.0408240407705307, + "eval_rewards/margins": 0.00999950896948576, + "eval_rewards/rejected": -0.050823554396629333, + "eval_runtime": 137.2676, + "eval_samples_per_second": 14.526, + "eval_steps_per_second": 0.459, "step": 600 }, { "epoch": 0.6397482957524908, - "grad_norm": 2.0046645094729914, + "grad_norm": 2.2204480702078246, "learning_rate": 8.097763301789162e-06, - "log_odds_chosen": 0.1975608468055725, - "log_odds_ratio": -0.692371666431427, - "logits/chosen": -2.4713737964630127, - "logits/rejected": -2.4652817249298096, - "logps/chosen": -0.8778279423713684, - "logps/rejected": -0.9979953765869141, - "loss": 0.5255, - "nll_loss": 0.4872562289237976, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04389139264822006, - "rewards/margins": 0.006008377764374018, - "rewards/rejected": -0.04989977926015854, + "log_odds_chosen": 0.1712610125541687, + "log_odds_ratio": -0.705093502998352, + "logits/chosen": -3.0651237964630127, + "logits/rejected": -3.0982956886291504, + "logps/chosen": -0.8816771507263184, + "logps/rejected": -0.989287257194519, + "loss": 0.526, + "nll_loss": 0.48726779222488403, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.044083863496780396, + "rewards/margins": 0.0053805033676326275, + "rewards/rejected": -0.04946436733007431, "step": 610 }, { "epoch": 0.6502359727320398, - "grad_norm": 2.040129936950799, + "grad_norm": 2.0795066851294, "learning_rate": 8.03219328902499e-06, - "log_odds_chosen": 0.1849410980939865, - "log_odds_ratio": -0.7018038630485535, - "logits/chosen": -2.539135694503784, - "logits/rejected": -2.500748872756958, - "logps/chosen": -0.8882759213447571, - "logps/rejected": -1.0186254978179932, - "loss": 0.5299, - "nll_loss": 0.5155984163284302, - "rewards/accuracies": 0.543749988079071, - "rewards/chosen": -0.04441379755735397, - "rewards/margins": 0.006517473608255386, - "rewards/rejected": -0.05093127489089966, + "log_odds_chosen": 0.18011939525604248, + "log_odds_ratio": -0.7075856328010559, + "logits/chosen": -3.093158721923828, + "logits/rejected": -3.1170780658721924, + "logps/chosen": -0.8789434432983398, + "logps/rejected": -1.0122572183609009, + "loss": 0.5293, + "nll_loss": 0.5134457945823669, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.043947167694568634, + "rewards/margins": 0.006665694061666727, + "rewards/rejected": -0.050612859427928925, "step": 620 }, { "epoch": 0.6607236497115889, - "grad_norm": 2.2341440675434683, + "grad_norm": 2.0001788984831514, "learning_rate": 7.968190728895958e-06, - "log_odds_chosen": 0.2307681292295456, - "log_odds_ratio": -0.7022296786308289, - "logits/chosen": -2.498748779296875, - "logits/rejected": -2.490837812423706, - "logps/chosen": -0.8587957620620728, - "logps/rejected": -1.0074841976165771, - "loss": 0.5336, - "nll_loss": 0.5248268842697144, - "rewards/accuracies": 0.543749988079071, - "rewards/chosen": -0.04293978586792946, - "rewards/margins": 0.007434426806867123, - "rewards/rejected": -0.050374217331409454, + "log_odds_chosen": 0.2610745429992676, + "log_odds_ratio": -0.6974207758903503, + "logits/chosen": -3.0472846031188965, + "logits/rejected": -3.0721120834350586, + "logps/chosen": -0.8566058874130249, + "logps/rejected": -1.0223418474197388, + "loss": 0.5372, + "nll_loss": 0.5244878530502319, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.042830295860767365, + "rewards/margins": 0.00828679371625185, + "rewards/rejected": -0.05111708492040634, "step": 630 }, { "epoch": 0.6712113266911379, - "grad_norm": 2.376219217041332, + "grad_norm": 2.3414302184737332, "learning_rate": 7.905694150420949e-06, - "log_odds_chosen": 0.2888760268688202, - "log_odds_ratio": -0.6808607578277588, - "logits/chosen": -2.4707798957824707, - "logits/rejected": -2.437269687652588, - "logps/chosen": -0.8727089166641235, - "logps/rejected": -1.049213171005249, - "loss": 0.5154, - "nll_loss": 0.46024101972579956, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.043635450303554535, - "rewards/margins": 0.008825212717056274, - "rewards/rejected": -0.05246065929532051, + "log_odds_chosen": 0.30453813076019287, + "log_odds_ratio": -0.6686201095581055, + "logits/chosen": -3.0571064949035645, + "logits/rejected": -3.079134464263916, + "logps/chosen": -0.8609515428543091, + "logps/rejected": -1.0473490953445435, + "loss": 0.5151, + "nll_loss": 0.46057072281837463, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.04304756969213486, + "rewards/margins": 0.009319878183305264, + "rewards/rejected": -0.05236745625734329, "step": 640 }, { "epoch": 0.6816990036706869, - "grad_norm": 1.8230434305650385, + "grad_norm": 1.9074311662484937, "learning_rate": 7.844645405527363e-06, - "log_odds_chosen": 0.197784885764122, - "log_odds_ratio": -0.706741988658905, - "logits/chosen": -2.497851848602295, - "logits/rejected": -2.4905192852020264, - "logps/chosen": -0.8444819450378418, - "logps/rejected": -0.975223183631897, - "loss": 0.5333, - "nll_loss": 0.5124696493148804, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.04222410172224045, - "rewards/margins": 0.006537059787660837, - "rewards/rejected": -0.048761166632175446, + "log_odds_chosen": 0.21438069641590118, + "log_odds_ratio": -0.7022002935409546, + "logits/chosen": -3.058842897415161, + "logits/rejected": -3.0864357948303223, + "logps/chosen": -0.8311389684677124, + "logps/rejected": -0.9654434323310852, + "loss": 0.5332, + "nll_loss": 0.5123748183250427, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.0415569506585598, + "rewards/margins": 0.006715219467878342, + "rewards/rejected": -0.04827217012643814, "step": 650 }, { "epoch": 0.6921866806502359, - "grad_norm": 1.8970235810963871, + "grad_norm": 1.9616180703535884, "learning_rate": 7.78498944161523e-06, - "log_odds_chosen": 0.3309662640094757, - "log_odds_ratio": -0.6530941724777222, - "logits/chosen": -2.5231690406799316, - "logits/rejected": -2.494469404220581, - "logps/chosen": -0.8999967575073242, - "logps/rejected": -1.128688097000122, - "loss": 0.5297, - "nll_loss": 0.4878067970275879, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.04499983415007591, - "rewards/margins": 0.01143457181751728, - "rewards/rejected": -0.05643441155552864, + "log_odds_chosen": 0.3507782816886902, + "log_odds_ratio": -0.641882061958313, + "logits/chosen": -3.0647902488708496, + "logits/rejected": -3.1045496463775635, + "logps/chosen": -0.8823181390762329, + "logps/rejected": -1.1245914697647095, + "loss": 0.5293, + "nll_loss": 0.48711147904396057, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.044115908443927765, + "rewards/margins": 0.012113666161894798, + "rewards/rejected": -0.05622958019375801, "step": 660 }, { "epoch": 0.702674357629785, - "grad_norm": 1.8146696004735343, + "grad_norm": 2.2401170633783427, "learning_rate": 7.726674092862559e-06, - "log_odds_chosen": 0.45232027769088745, - "log_odds_ratio": -0.6299984455108643, - "logits/chosen": -2.4613699913024902, - "logits/rejected": -2.447711944580078, - "logps/chosen": -0.8380166292190552, - "logps/rejected": -1.143587350845337, - "loss": 0.524, - "nll_loss": 0.4693591594696045, + "log_odds_chosen": 0.4617346227169037, + "log_odds_ratio": -0.627942681312561, + "logits/chosen": -3.0200469493865967, + "logits/rejected": -3.0557796955108643, + "logps/chosen": -0.8328607678413391, + "logps/rejected": -1.140726923942566, + "loss": 0.5237, + "nll_loss": 0.46908053755760193, "rewards/accuracies": 0.65625, - "rewards/chosen": -0.04190083220601082, - "rewards/margins": 0.015278531238436699, - "rewards/rejected": -0.05717936158180237, + "rewards/chosen": -0.041643042117357254, + "rewards/margins": 0.015393314883112907, + "rewards/rejected": -0.057036347687244415, "step": 670 }, { "epoch": 0.713162034609334, - "grad_norm": 2.112620721807998, + "grad_norm": 2.00824540701018, "learning_rate": 7.669649888473705e-06, - "log_odds_chosen": 0.31269508600234985, - "log_odds_ratio": -0.6543976664543152, - "logits/chosen": -2.464507818222046, - "logits/rejected": -2.418670892715454, - "logps/chosen": -0.871087908744812, - "logps/rejected": -1.055972695350647, - "loss": 0.5087, - "nll_loss": 0.4947647452354431, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.0435543991625309, - "rewards/margins": 0.009244237095117569, - "rewards/rejected": -0.05279862880706787, + "log_odds_chosen": 0.36505717039108276, + "log_odds_ratio": -0.6428455114364624, + "logits/chosen": -3.0360779762268066, + "logits/rejected": -3.044907808303833, + "logps/chosen": -0.8793157339096069, + "logps/rejected": -1.1065771579742432, + "loss": 0.5083, + "nll_loss": 0.4951552450656891, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.04396578669548035, + "rewards/margins": 0.0113630760461092, + "rewards/rejected": -0.0553288571536541, "step": 680 }, { "epoch": 0.723649711588883, - "grad_norm": 1.8678427871613372, + "grad_norm": 1.8606652251395144, "learning_rate": 7.61386987626881e-06, - "log_odds_chosen": 0.1339389979839325, - "log_odds_ratio": -0.7375361919403076, - "logits/chosen": -2.5094785690307617, - "logits/rejected": -2.479645252227783, - "logps/chosen": -0.86939537525177, - "logps/rejected": -0.9705018997192383, + "log_odds_chosen": 0.2045813500881195, + "log_odds_ratio": -0.7114613056182861, + "logits/chosen": -3.036839723587036, + "logits/rejected": -3.0589654445648193, + "logps/chosen": -0.8661033511161804, + "logps/rejected": -1.014004111289978, "loss": 0.5313, - "nll_loss": 0.5503351092338562, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.04346977174282074, - "rewards/margins": 0.005055318586528301, - "rewards/rejected": -0.04852508753538132, + "nll_loss": 0.5510386824607849, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.0433051735162735, + "rewards/margins": 0.007395035121589899, + "rewards/rejected": -0.05070021003484726, "step": 690 }, { "epoch": 0.7341373885684321, - "grad_norm": 2.2093003307729235, + "grad_norm": 2.2895278902082747, "learning_rate": 7.559289460184545e-06, - "log_odds_chosen": 0.31829774379730225, - "log_odds_ratio": -0.6285902261734009, - "logits/chosen": -2.4719974994659424, - "logits/rejected": -2.4300436973571777, - "logps/chosen": -0.8187413215637207, - "logps/rejected": -1.0199077129364014, - "loss": 0.5297, - "nll_loss": 0.5423263907432556, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.04093705862760544, - "rewards/margins": 0.010058322921395302, - "rewards/rejected": -0.05099538713693619, + "log_odds_chosen": 0.34833860397338867, + "log_odds_ratio": -0.6269202828407288, + "logits/chosen": -3.0252926349639893, + "logits/rejected": -3.068871021270752, + "logps/chosen": -0.8163930177688599, + "logps/rejected": -1.0459128618240356, + "loss": 0.5298, + "nll_loss": 0.5428040623664856, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.04081965237855911, + "rewards/margins": 0.01147598959505558, + "rewards/rejected": -0.05229564383625984, "step": 700 }, { "epoch": 0.7341373885684321, - "eval_log_odds_chosen": 0.3407081067562103, - "eval_log_odds_ratio": -0.6351403594017029, - "eval_logits/chosen": -2.4800758361816406, - "eval_logits/rejected": -2.447735548019409, - "eval_logps/chosen": -0.8215174674987793, - "eval_logps/rejected": -1.0422321557998657, - "eval_loss": 0.5191378593444824, - "eval_nll_loss": 0.48380225896835327, - "eval_rewards/accuracies": 0.6309523582458496, - "eval_rewards/chosen": -0.04107587784528732, - "eval_rewards/margins": 0.011035734787583351, - "eval_rewards/rejected": -0.052111607044935226, - "eval_runtime": 136.6803, - "eval_samples_per_second": 14.589, - "eval_steps_per_second": 0.461, + "eval_log_odds_chosen": 0.3869401812553406, + "eval_log_odds_ratio": -0.6218506097793579, + "eval_logits/chosen": -3.0754599571228027, + "eval_logits/rejected": -3.076083183288574, + "eval_logps/chosen": -0.8267216682434082, + "eval_logps/rejected": -1.0827099084854126, + "eval_loss": 0.5187779068946838, + "eval_nll_loss": 0.4841572344303131, + "eval_rewards/accuracies": 0.6428571343421936, + "eval_rewards/chosen": -0.04133608192205429, + "eval_rewards/margins": 0.012799412943422794, + "eval_rewards/rejected": -0.05413549765944481, + "eval_runtime": 137.1864, + "eval_samples_per_second": 14.535, + "eval_steps_per_second": 0.459, "step": 700 }, { "epoch": 0.7446250655479811, - "grad_norm": 1.9677237822159654, + "grad_norm": 1.958829045282282, "learning_rate": 7.505866250408016e-06, - "log_odds_chosen": 0.25284355878829956, - "log_odds_ratio": -0.6641759872436523, - "logits/chosen": -2.5103344917297363, - "logits/rejected": -2.492450714111328, - "logps/chosen": -0.8418480753898621, - "logps/rejected": -1.0181466341018677, - "loss": 0.5238, - "nll_loss": 0.4800626337528229, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.04209240525960922, - "rewards/margins": 0.008814921602606773, - "rewards/rejected": -0.050907332450151443, + "log_odds_chosen": 0.2794094383716583, + "log_odds_ratio": -0.6572638750076294, + "logits/chosen": -3.1184074878692627, + "logits/rejected": -3.1369974613189697, + "logps/chosen": -0.8444921374320984, + "logps/rejected": -1.0439577102661133, + "loss": 0.5242, + "nll_loss": 0.47964978218078613, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04222460836172104, + "rewards/margins": 0.00997327920049429, + "rewards/rejected": -0.052197881042957306, "step": 710 }, { "epoch": 0.7551127425275301, - "grad_norm": 1.836122210517328, + "grad_norm": 1.8049248182957538, "learning_rate": 7.4535599249993e-06, - "log_odds_chosen": 0.35044384002685547, - "log_odds_ratio": -0.6476293802261353, - "logits/chosen": -2.4395956993103027, - "logits/rejected": -2.4294960498809814, - "logps/chosen": -0.7990450859069824, - "logps/rejected": -1.0232374668121338, - "loss": 0.5307, - "nll_loss": 0.4627358317375183, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.03995225578546524, - "rewards/margins": 0.01120961643755436, - "rewards/rejected": -0.05116187408566475, + "log_odds_chosen": 0.36963027715682983, + "log_odds_ratio": -0.6443501710891724, + "logits/chosen": -3.075653076171875, + "logits/rejected": -3.0980098247528076, + "logps/chosen": -0.7987631559371948, + "logps/rejected": -1.03029465675354, + "loss": 0.5308, + "nll_loss": 0.4633590281009674, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.03993815928697586, + "rewards/margins": 0.011576572433114052, + "rewards/rejected": -0.051514726132154465, "step": 720 }, { "epoch": 0.7656004195070791, - "grad_norm": 2.1971858392846007, + "grad_norm": 2.1907119668628807, "learning_rate": 7.402332101976053e-06, - "log_odds_chosen": 0.119834303855896, - "log_odds_ratio": -0.7195987701416016, - "logits/chosen": -2.4724977016448975, - "logits/rejected": -2.466116189956665, - "logps/chosen": -0.8342846035957336, - "logps/rejected": -0.9009860754013062, - "loss": 0.5373, - "nll_loss": 0.5026193857192993, - "rewards/accuracies": 0.5249999761581421, - "rewards/chosen": -0.04171422868967056, - "rewards/margins": 0.003335078712552786, - "rewards/rejected": -0.045049309730529785, + "log_odds_chosen": 0.1018507108092308, + "log_odds_ratio": -0.7229408621788025, + "logits/chosen": -3.084719181060791, + "logits/rejected": -3.0846333503723145, + "logps/chosen": -0.8332414627075195, + "logps/rejected": -0.8869687914848328, + "loss": 0.5377, + "nll_loss": 0.5031158328056335, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.041662074625492096, + "rewards/margins": 0.00268636760301888, + "rewards/rejected": -0.04434844106435776, "step": 730 }, { "epoch": 0.7760880964866282, - "grad_norm": 2.0508082994645074, + "grad_norm": 2.050092986168091, "learning_rate": 7.352146220938079e-06, - "log_odds_chosen": 0.3144014775753021, - "log_odds_ratio": -0.6337074041366577, - "logits/chosen": -2.502380847930908, - "logits/rejected": -2.4844462871551514, - "logps/chosen": -0.8134506940841675, - "logps/rejected": -1.0163192749023438, - "loss": 0.5318, - "nll_loss": 0.4793321192264557, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.040672533214092255, - "rewards/margins": 0.010143419727683067, - "rewards/rejected": -0.05081595852971077, + "log_odds_chosen": 0.3393878936767578, + "log_odds_ratio": -0.6246740221977234, + "logits/chosen": -3.119809627532959, + "logits/rejected": -3.132826328277588, + "logps/chosen": -0.804786205291748, + "logps/rejected": -1.0171911716461182, + "loss": 0.5308, + "nll_loss": 0.4794273376464844, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.040239304304122925, + "rewards/margins": 0.010620243847370148, + "rewards/rejected": -0.05085955187678337, "step": 740 }, { "epoch": 0.7865757734661772, - "grad_norm": 1.9563195894092036, + "grad_norm": 2.0193892114327556, "learning_rate": 7.3029674334022146e-06, - "log_odds_chosen": 0.26552754640579224, - "log_odds_ratio": -0.6666983366012573, - "logits/chosen": -2.502037763595581, - "logits/rejected": -2.4685168266296387, - "logps/chosen": -0.8672981262207031, - "logps/rejected": -1.0235345363616943, - "loss": 0.5431, - "nll_loss": 0.4982251226902008, + "log_odds_chosen": 0.2425309419631958, + "log_odds_ratio": -0.6716917753219604, + "logits/chosen": -3.093583106994629, + "logits/rejected": -3.114816188812256, + "logps/chosen": -0.8740803599357605, + "logps/rejected": -1.0157320499420166, + "loss": 0.5427, + "nll_loss": 0.4982066750526428, "rewards/accuracies": 0.59375, - "rewards/chosen": -0.043364908546209335, - "rewards/margins": 0.007811821065843105, - "rewards/rejected": -0.05117672681808472, + "rewards/chosen": -0.04370402172207832, + "rewards/margins": 0.007082589901983738, + "rewards/rejected": -0.05078660696744919, "step": 750 }, { "epoch": 0.7970634504457262, - "grad_norm": 1.7911295245119114, + "grad_norm": 1.891204637475333, "learning_rate": 7.254762501100117e-06, - "log_odds_chosen": 0.2668423354625702, - "log_odds_ratio": -0.6578959226608276, - "logits/chosen": -2.4371020793914795, - "logits/rejected": -2.417811870574951, - "logps/chosen": -0.812160849571228, - "logps/rejected": -0.9835097193717957, - "loss": 0.5124, - "nll_loss": 0.4057600498199463, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.04060804471373558, - "rewards/margins": 0.008567440323531628, - "rewards/rejected": -0.04917549341917038, + "log_odds_chosen": 0.2664291262626648, + "log_odds_ratio": -0.6672528386116028, + "logits/chosen": -3.0630593299865723, + "logits/rejected": -3.0695788860321045, + "logps/chosen": -0.8163594007492065, + "logps/rejected": -0.993925929069519, + "loss": 0.5114, + "nll_loss": 0.40486717224121094, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.040817975997924805, + "rewards/margins": 0.00887832697480917, + "rewards/rejected": -0.049696292728185654, "step": 760 }, { "epoch": 0.8075511274252754, - "grad_norm": 2.141524900160083, + "grad_norm": 2.0675479903273914, "learning_rate": 7.207499701564472e-06, - "log_odds_chosen": 0.21438488364219666, - "log_odds_ratio": -0.7001821994781494, - "logits/chosen": -2.432426929473877, - "logits/rejected": -2.4075264930725098, - "logps/chosen": -0.8868153691291809, - "logps/rejected": -1.0428266525268555, - "loss": 0.5341, - "nll_loss": 0.5013958811759949, + "log_odds_chosen": 0.23201966285705566, + "log_odds_ratio": -0.6995107531547546, + "logits/chosen": -3.027050018310547, + "logits/rejected": -3.0489039421081543, + "logps/chosen": -0.8810374140739441, + "logps/rejected": -1.0541043281555176, + "loss": 0.5343, + "nll_loss": 0.5017890334129333, "rewards/accuracies": 0.5062500238418579, - "rewards/chosen": -0.044340766966342926, - "rewards/margins": 0.007800562772899866, - "rewards/rejected": -0.052141331136226654, + "rewards/chosen": -0.0440518744289875, + "rewards/margins": 0.008653342723846436, + "rewards/rejected": -0.05270521715283394, "step": 770 }, { "epoch": 0.8180388044048243, - "grad_norm": 2.177116882955384, + "grad_norm": 1.9571785710156353, "learning_rate": 7.1611487403943295e-06, - "log_odds_chosen": 0.20159511268138885, - "log_odds_ratio": -0.6788761019706726, - "logits/chosen": -2.4562764167785645, - "logits/rejected": -2.4499940872192383, - "logps/chosen": -0.883618950843811, - "logps/rejected": -0.998609721660614, - "loss": 0.5476, - "nll_loss": 0.549780547618866, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.044180940836668015, - "rewards/margins": 0.0057495469227433205, - "rewards/rejected": -0.04993049427866936, + "log_odds_chosen": 0.23842506110668182, + "log_odds_ratio": -0.672247052192688, + "logits/chosen": -3.062586545944214, + "logits/rejected": -3.0935113430023193, + "logps/chosen": -0.8818261027336121, + "logps/rejected": -1.0167505741119385, + "loss": 0.5467, + "nll_loss": 0.5480509996414185, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.04409131035208702, + "rewards/margins": 0.006746229715645313, + "rewards/rejected": -0.05083753541111946, "step": 780 }, { "epoch": 0.8285264813843733, - "grad_norm": 1.9065399955928464, + "grad_norm": 1.8565884413084413, "learning_rate": 7.115680669648201e-06, - "log_odds_chosen": 0.3184022307395935, - "log_odds_ratio": -0.6474640965461731, - "logits/chosen": -2.4409756660461426, - "logits/rejected": -2.4378621578216553, - "logps/chosen": -0.8189239501953125, - "logps/rejected": -1.0288126468658447, - "loss": 0.5063, - "nll_loss": 0.4424379765987396, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.04094620421528816, - "rewards/margins": 0.010494431480765343, - "rewards/rejected": -0.051440637558698654, + "log_odds_chosen": 0.32895228266716003, + "log_odds_ratio": -0.6478875875473022, + "logits/chosen": -3.1025116443634033, + "logits/rejected": -3.1219050884246826, + "logps/chosen": -0.8189374804496765, + "logps/rejected": -1.0338833332061768, + "loss": 0.5049, + "nll_loss": 0.44281667470932007, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.040946874767541885, + "rewards/margins": 0.010747292079031467, + "rewards/rejected": -0.05169416218996048, "step": 790 }, { "epoch": 0.8390141583639223, - "grad_norm": 2.0323396486068086, + "grad_norm": 2.106485781152954, "learning_rate": 7.0710678118654756e-06, - "log_odds_chosen": 0.4407121241092682, - "log_odds_ratio": -0.6011781096458435, - "logits/chosen": -2.411447048187256, - "logits/rejected": -2.389554500579834, - "logps/chosen": -0.7789972424507141, - "logps/rejected": -1.0607415437698364, - "loss": 0.5184, - "nll_loss": 0.4200369715690613, + "log_odds_chosen": 0.4608131945133209, + "log_odds_ratio": -0.5961465835571289, + "logits/chosen": -3.092484951019287, + "logits/rejected": -3.090536117553711, + "logps/chosen": -0.7798897624015808, + "logps/rejected": -1.0744028091430664, + "loss": 0.5181, + "nll_loss": 0.4202440679073334, "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.03894985839724541, - "rewards/margins": 0.014087215065956116, - "rewards/rejected": -0.053037069737911224, + "rewards/chosen": -0.03899449110031128, + "rewards/margins": 0.014725650660693645, + "rewards/rejected": -0.0537201389670372, "step": 800 }, { "epoch": 0.8390141583639223, - "eval_log_odds_chosen": 0.37830010056495667, - "eval_log_odds_ratio": -0.6304489970207214, - "eval_logits/chosen": -2.4921600818634033, - "eval_logits/rejected": -2.4575421810150146, - "eval_logps/chosen": -0.8178579211235046, - "eval_logps/rejected": -1.0646613836288452, - "eval_loss": 0.5137735605239868, - "eval_nll_loss": 0.4796440601348877, - "eval_rewards/accuracies": 0.6309523582458496, - "eval_rewards/chosen": -0.04089289531111717, - "eval_rewards/margins": 0.012340176850557327, - "eval_rewards/rejected": -0.0532330721616745, - "eval_runtime": 136.422, - "eval_samples_per_second": 14.616, + "eval_log_odds_chosen": 0.35056135058403015, + "eval_log_odds_ratio": -0.6322371363639832, + "eval_logits/chosen": -3.139373302459717, + "eval_logits/rejected": -3.1382317543029785, + "eval_logps/chosen": -0.8198128342628479, + "eval_logps/rejected": -1.0474979877471924, + "eval_loss": 0.5140993595123291, + "eval_nll_loss": 0.4803001582622528, + "eval_rewards/accuracies": 0.6329365372657776, + "eval_rewards/chosen": -0.040990639477968216, + "eval_rewards/margins": 0.011384249664843082, + "eval_rewards/rejected": -0.05237489193677902, + "eval_runtime": 136.2293, + "eval_samples_per_second": 14.637, "eval_steps_per_second": 0.462, "step": 800 }, { "epoch": 0.8495018353434715, - "grad_norm": 1.8282488507148427, + "grad_norm": 1.919736952774634, "learning_rate": 7.027283689263066e-06, - "log_odds_chosen": 0.34346696734428406, - "log_odds_ratio": -0.6324015259742737, - "logits/chosen": -2.4684414863586426, - "logits/rejected": -2.419478178024292, - "logps/chosen": -0.7967440485954285, - "logps/rejected": -1.0016282796859741, - "loss": 0.513, - "nll_loss": 0.4749225676059723, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.03983720391988754, - "rewards/margins": 0.0102442167699337, - "rewards/rejected": -0.050081413239240646, + "log_odds_chosen": 0.3574589788913727, + "log_odds_ratio": -0.6265517473220825, + "logits/chosen": -3.0922906398773193, + "logits/rejected": -3.093270778656006, + "logps/chosen": -0.8058309555053711, + "logps/rejected": -1.0188381671905518, + "loss": 0.5132, + "nll_loss": 0.4754185676574707, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.04029155150055885, + "rewards/margins": 0.010650361888110638, + "rewards/rejected": -0.050941914319992065, "step": 810 }, { "epoch": 0.8599895123230205, - "grad_norm": 2.4353704194820343, + "grad_norm": 2.3619475771455214, "learning_rate": 6.984302957695783e-06, - "log_odds_chosen": 0.3245043158531189, - "log_odds_ratio": -0.643069863319397, - "logits/chosen": -2.3819215297698975, - "logits/rejected": -2.349990129470825, - "logps/chosen": -0.8364176750183105, - "logps/rejected": -1.031416654586792, - "loss": 0.5047, - "nll_loss": 0.42748355865478516, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.041820887476205826, - "rewards/margins": 0.009749949909746647, - "rewards/rejected": -0.0515708327293396, + "log_odds_chosen": 0.2932414412498474, + "log_odds_ratio": -0.6586158275604248, + "logits/chosen": -3.0357770919799805, + "logits/rejected": -3.0360379219055176, + "logps/chosen": -0.842557430267334, + "logps/rejected": -1.0188366174697876, + "loss": 0.505, + "nll_loss": 0.4280059337615967, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.04212787002325058, + "rewards/margins": 0.008813952095806599, + "rewards/rejected": -0.0509418249130249, "step": 820 }, { "epoch": 0.8704771893025695, - "grad_norm": 2.34776921636757, + "grad_norm": 2.3824306185771267, "learning_rate": 6.942101345006233e-06, - "log_odds_chosen": 0.2353450506925583, - "log_odds_ratio": -0.7053114175796509, - "logits/chosen": -2.4152169227600098, - "logits/rejected": -2.4146676063537598, - "logps/chosen": -0.8572267293930054, - "logps/rejected": -1.0250964164733887, - "loss": 0.5251, - "nll_loss": 0.4658161699771881, - "rewards/accuracies": 0.5249999761581421, - "rewards/chosen": -0.042861342430114746, - "rewards/margins": 0.008393481373786926, - "rewards/rejected": -0.051254820078611374, + "log_odds_chosen": 0.2479257881641388, + "log_odds_ratio": -0.702430248260498, + "logits/chosen": -3.008411407470703, + "logits/rejected": -3.05663800239563, + "logps/chosen": -0.853378415107727, + "logps/rejected": -1.0239073038101196, + "loss": 0.5248, + "nll_loss": 0.4657117426395416, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": -0.04266892373561859, + "rewards/margins": 0.00852644257247448, + "rewards/rejected": -0.05119536444544792, "step": 830 }, { "epoch": 0.8809648662821186, - "grad_norm": 1.9903482887340351, + "grad_norm": 1.9624325890421999, "learning_rate": 6.900655593423542e-06, - "log_odds_chosen": 0.19377179443836212, - "log_odds_ratio": -0.6883701086044312, - "logits/chosen": -2.450711727142334, - "logits/rejected": -2.4275918006896973, - "logps/chosen": -0.8693481683731079, - "logps/rejected": -0.9985544085502625, - "loss": 0.5129, - "nll_loss": 0.4828173518180847, - "rewards/accuracies": 0.543749988079071, - "rewards/chosen": -0.04346740245819092, - "rewards/margins": 0.006460316479206085, - "rewards/rejected": -0.0499277226626873, + "log_odds_chosen": 0.2082471400499344, + "log_odds_ratio": -0.6889498233795166, + "logits/chosen": -3.040546178817749, + "logits/rejected": -3.0660147666931152, + "logps/chosen": -0.8756462931632996, + "logps/rejected": -1.0124717950820923, + "loss": 0.5137, + "nll_loss": 0.4855361580848694, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.043782319873571396, + "rewards/margins": 0.006841268390417099, + "rewards/rejected": -0.050623588263988495, "step": 840 }, { "epoch": 0.8914525432616676, - "grad_norm": 2.0572418230575784, + "grad_norm": 2.0144554917595756, "learning_rate": 6.859943405700353e-06, - "log_odds_chosen": 0.2696766257286072, - "log_odds_ratio": -0.6502133011817932, - "logits/chosen": -2.4989540576934814, - "logits/rejected": -2.480175018310547, - "logps/chosen": -0.8377168774604797, - "logps/rejected": -1.0055049657821655, - "loss": 0.5056, - "nll_loss": 0.49036240577697754, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.041885845363140106, - "rewards/margins": 0.008389403112232685, - "rewards/rejected": -0.05027524754405022, + "log_odds_chosen": 0.3205421566963196, + "log_odds_ratio": -0.6371484994888306, + "logits/chosen": -3.054384231567383, + "logits/rejected": -3.0986409187316895, + "logps/chosen": -0.8319618105888367, + "logps/rejected": -1.0313116312026978, + "loss": 0.5044, + "nll_loss": 0.4881317615509033, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.041598085314035416, + "rewards/margins": 0.009967491030693054, + "rewards/rejected": -0.05156558007001877, "step": 850 }, { "epoch": 0.9019402202412166, - "grad_norm": 1.9654607653654201, + "grad_norm": 1.9341957217840544, "learning_rate": 6.819943394704736e-06, - "log_odds_chosen": 0.21943990886211395, - "log_odds_ratio": -0.6898983716964722, - "logits/chosen": -2.5337636470794678, - "logits/rejected": -2.546189546585083, - "logps/chosen": -0.8453003168106079, - "logps/rejected": -0.9918826222419739, - "loss": 0.5285, - "nll_loss": 0.4758750796318054, - "rewards/accuracies": 0.5562499761581421, - "rewards/chosen": -0.042265016585588455, - "rewards/margins": 0.007329112850129604, - "rewards/rejected": -0.049594126641750336, + "log_odds_chosen": 0.26728707551956177, + "log_odds_ratio": -0.6747015714645386, + "logits/chosen": -3.0936527252197266, + "logits/rejected": -3.1073575019836426, + "logps/chosen": -0.8353049159049988, + "logps/rejected": -1.0224361419677734, + "loss": 0.5278, + "nll_loss": 0.4731883108615875, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0417652502655983, + "rewards/margins": 0.009356559254229069, + "rewards/rejected": -0.05112180858850479, "step": 860 }, { "epoch": 0.9124278972207656, - "grad_norm": 1.9335503237781204, + "grad_norm": 5.30319924106792, "learning_rate": 6.780635036208105e-06, - "log_odds_chosen": 0.29112187027931213, - "log_odds_ratio": -0.6706128120422363, - "logits/chosen": -2.535698890686035, - "logits/rejected": -2.5173022747039795, - "logps/chosen": -0.8680477142333984, - "logps/rejected": -1.0733777284622192, - "loss": 0.4939, - "nll_loss": 0.4834807515144348, - "rewards/accuracies": 0.5562499761581421, - "rewards/chosen": -0.04340239241719246, - "rewards/margins": 0.010266497731208801, - "rewards/rejected": -0.05366888642311096, + "log_odds_chosen": 0.30106544494628906, + "log_odds_ratio": -0.6683878898620605, + "logits/chosen": -3.097151279449463, + "logits/rejected": -3.1499500274658203, + "logps/chosen": -0.867012619972229, + "logps/rejected": -1.0790386199951172, + "loss": 0.4933, + "nll_loss": 0.48347124457359314, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.04335063695907593, + "rewards/margins": 0.010601297952234745, + "rewards/rejected": -0.0539519302546978, "step": 870 }, { "epoch": 0.9229155742003147, - "grad_norm": 1.6238877353235468, + "grad_norm": 1.6208302885778367, "learning_rate": 6.741998624632421e-06, - "log_odds_chosen": 0.29792147874832153, - "log_odds_ratio": -0.6587230563163757, - "logits/chosen": -2.533979892730713, - "logits/rejected": -2.498964548110962, - "logps/chosen": -0.8105006217956543, - "logps/rejected": -0.999383807182312, - "loss": 0.4883, - "nll_loss": 0.4381064474582672, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.040525030344724655, - "rewards/margins": 0.009444162249565125, - "rewards/rejected": -0.04996918886899948, + "log_odds_chosen": 0.29186171293258667, + "log_odds_ratio": -0.6591932773590088, + "logits/chosen": -3.15583872795105, + "logits/rejected": -3.168064594268799, + "logps/chosen": -0.8187226057052612, + "logps/rejected": -1.0049909353256226, + "loss": 0.4887, + "nll_loss": 0.4384452700614929, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.04093613475561142, + "rewards/margins": 0.009313413873314857, + "rewards/rejected": -0.05024954676628113, "step": 880 }, { "epoch": 0.9334032511798637, - "grad_norm": 1.7501205893349534, + "grad_norm": 1.7707391073712173, "learning_rate": 6.70401523153991e-06, - "log_odds_chosen": 0.3352048397064209, - "log_odds_ratio": -0.6494973301887512, - "logits/chosen": -2.5071699619293213, - "logits/rejected": -2.488619327545166, - "logps/chosen": -0.811043381690979, - "logps/rejected": -1.0027059316635132, - "loss": 0.4932, - "nll_loss": 0.4646259844303131, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.04055216908454895, - "rewards/margins": 0.0095831248909235, - "rewards/rejected": -0.0501352921128273, + "log_odds_chosen": 0.33703380823135376, + "log_odds_ratio": -0.6459982991218567, + "logits/chosen": -3.1340742111206055, + "logits/rejected": -3.157071590423584, + "logps/chosen": -0.8063561320304871, + "logps/rejected": -0.9982324838638306, + "loss": 0.4931, + "nll_loss": 0.4631246030330658, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.040317803621292114, + "rewards/margins": 0.009593818336725235, + "rewards/rejected": -0.04991162568330765, "step": 890 }, { "epoch": 0.9438909281594127, - "grad_norm": 1.9660241717665867, + "grad_norm": 2.341682439233393, "learning_rate": 6.666666666666667e-06, - "log_odds_chosen": 0.2810109555721283, - "log_odds_ratio": -0.6615744829177856, - "logits/chosen": -2.5149343013763428, - "logits/rejected": -2.5029118061065674, - "logps/chosen": -0.7809039354324341, - "logps/rejected": -0.9545317888259888, - "loss": 0.5235, - "nll_loss": 0.4655960202217102, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.039045192301273346, - "rewards/margins": 0.008681395091116428, - "rewards/rejected": -0.0477265901863575, + "log_odds_chosen": 0.26426905393600464, + "log_odds_ratio": -0.6637164354324341, + "logits/chosen": -3.1100411415100098, + "logits/rejected": -3.130826473236084, + "logps/chosen": -0.7806347012519836, + "logps/rejected": -0.9385608434677124, + "loss": 0.5239, + "nll_loss": 0.4659123420715332, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.039031732827425, + "rewards/margins": 0.00789631437510252, + "rewards/rejected": -0.0469280444085598, "step": 900 }, { "epoch": 0.9438909281594127, - "eval_log_odds_chosen": 0.3305439352989197, - "eval_log_odds_ratio": -0.6379230618476868, - "eval_logits/chosen": -2.5633676052093506, - "eval_logits/rejected": -2.533735752105713, - "eval_logps/chosen": -0.8084598779678345, - "eval_logps/rejected": -1.0201667547225952, - "eval_loss": 0.5087887644767761, - "eval_nll_loss": 0.4741307497024536, - "eval_rewards/accuracies": 0.6289682388305664, - "eval_rewards/chosen": -0.040422990918159485, - "eval_rewards/margins": 0.010585347190499306, - "eval_rewards/rejected": -0.05100833997130394, - "eval_runtime": 137.3356, - "eval_samples_per_second": 14.519, - "eval_steps_per_second": 0.459, + "eval_log_odds_chosen": 0.32679569721221924, + "eval_log_odds_ratio": -0.6327584385871887, + "eval_logits/chosen": -3.117077112197876, + "eval_logits/rejected": -3.119086742401123, + "eval_logps/chosen": -0.8044511079788208, + "eval_logps/rejected": -1.0129274129867554, + "eval_loss": 0.5086367726325989, + "eval_nll_loss": 0.4747697710990906, + "eval_rewards/accuracies": 0.6309523582458496, + "eval_rewards/chosen": -0.04022255912423134, + "eval_rewards/margins": 0.010423817671835423, + "eval_rewards/rejected": -0.05064636468887329, + "eval_runtime": 137.5576, + "eval_samples_per_second": 14.496, + "eval_steps_per_second": 0.458, "step": 900 }, { "epoch": 0.9543786051389617, - "grad_norm": 1.980970750978968, + "grad_norm": 2.0533389896159213, "learning_rate": 6.629935441317959e-06, - "log_odds_chosen": 0.4885142743587494, - "log_odds_ratio": -0.6281706094741821, - "logits/chosen": -2.5022482872009277, - "logits/rejected": -2.4692506790161133, - "logps/chosen": -0.8217445611953735, - "logps/rejected": -1.1550103425979614, - "loss": 0.5139, - "nll_loss": 0.46534866094589233, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.04108722507953644, - "rewards/margins": 0.016663286834955215, - "rewards/rejected": -0.05775051191449165, + "log_odds_chosen": 0.4754648208618164, + "log_odds_ratio": -0.6232188940048218, + "logits/chosen": -3.073176622390747, + "logits/rejected": -3.084963321685791, + "logps/chosen": -0.828788161277771, + "logps/rejected": -1.1443804502487183, + "loss": 0.5142, + "nll_loss": 0.46652156114578247, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.04143941029906273, + "rewards/margins": 0.015779614448547363, + "rewards/rejected": -0.05721902847290039, "step": 910 }, { "epoch": 0.9648662821185108, - "grad_norm": 1.9369311282677693, + "grad_norm": 2.138448059862142, "learning_rate": 6.593804733957872e-06, - "log_odds_chosen": 0.30783259868621826, - "log_odds_ratio": -0.6484240293502808, - "logits/chosen": -2.43939208984375, - "logits/rejected": -2.4428539276123047, - "logps/chosen": -0.7850558161735535, - "logps/rejected": -0.9770357012748718, + "log_odds_chosen": 0.32768282294273376, + "log_odds_ratio": -0.6431117057800293, + "logits/chosen": -3.038576364517212, + "logits/rejected": -3.061370372772217, + "logps/chosen": -0.7864677906036377, + "logps/rejected": -0.9946994781494141, "loss": 0.4836, - "nll_loss": 0.4291355013847351, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.03925279527902603, - "rewards/margins": 0.009598996490240097, - "rewards/rejected": -0.04885178059339523, + "nll_loss": 0.43025264143943787, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.03932339325547218, + "rewards/margins": 0.010411588475108147, + "rewards/rejected": -0.04973498359322548, "step": 920 }, { "epoch": 0.9753539590980598, - "grad_norm": 2.1104816487301523, + "grad_norm": 2.1602863053901413, "learning_rate": 6.55825835783953e-06, - "log_odds_chosen": 0.2358274906873703, - "log_odds_ratio": -0.6784078478813171, - "logits/chosen": -2.5169990062713623, - "logits/rejected": -2.4993927478790283, - "logps/chosen": -0.8740841746330261, - "logps/rejected": -1.0411127805709839, - "loss": 0.5133, - "nll_loss": 0.5242566466331482, + "log_odds_chosen": 0.2050061970949173, + "log_odds_ratio": -0.6868597269058228, + "logits/chosen": -3.0544333457946777, + "logits/rejected": -3.066739797592163, + "logps/chosen": -0.8742432594299316, + "logps/rejected": -1.0194706916809082, + "loss": 0.5136, + "nll_loss": 0.5241981744766235, "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.04370420426130295, - "rewards/margins": 0.008351435884833336, - "rewards/rejected": -0.05205564573407173, + "rewards/chosen": -0.04371216148138046, + "rewards/margins": 0.007261371705681086, + "rewards/rejected": -0.05097353458404541, "step": 930 }, { "epoch": 0.9858416360776088, - "grad_norm": 1.9809353614537575, + "grad_norm": 1.9215491222233851, "learning_rate": 6.523280730534423e-06, - "log_odds_chosen": 0.25125178694725037, - "log_odds_ratio": -0.6891010403633118, - "logits/chosen": -2.52742862701416, - "logits/rejected": -2.5163397789001465, - "logps/chosen": -0.7782126069068909, - "logps/rejected": -0.9240644574165344, + "log_odds_chosen": 0.23041269183158875, + "log_odds_ratio": -0.6992384195327759, + "logits/chosen": -3.0867247581481934, + "logits/rejected": -3.0779662132263184, + "logps/chosen": -0.7768861651420593, + "logps/rejected": -0.9184977412223816, "loss": 0.5102, - "nll_loss": 0.478424072265625, + "nll_loss": 0.4776674211025238, "rewards/accuracies": 0.5687500238418579, - "rewards/chosen": -0.038910627365112305, - "rewards/margins": 0.007292595691978931, - "rewards/rejected": -0.04620322585105896, + "rewards/chosen": -0.038844309747219086, + "rewards/margins": 0.0070805782452225685, + "rewards/rejected": -0.045924894511699677, "step": 940 }, { "epoch": 0.9963293130571579, - "grad_norm": 2.052392197513496, + "grad_norm": 2.1983436102574547, "learning_rate": 6.488856845230502e-06, - "log_odds_chosen": 0.22615018486976624, - "log_odds_ratio": -0.7002879977226257, - "logits/chosen": -2.4786956310272217, - "logits/rejected": -2.447265625, - "logps/chosen": -0.8600684404373169, - "logps/rejected": -0.9971193075180054, - "loss": 0.5383, - "nll_loss": 0.5037115812301636, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.043003425002098083, - "rewards/margins": 0.006852544844150543, - "rewards/rejected": -0.04985596612095833, + "log_odds_chosen": 0.25244003534317017, + "log_odds_ratio": -0.6911928653717041, + "logits/chosen": -3.0215468406677246, + "logits/rejected": -3.0374438762664795, + "logps/chosen": -0.8648554682731628, + "logps/rejected": -1.0236364603042603, + "loss": 0.5385, + "nll_loss": 0.5036488175392151, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.04324277862906456, + "rewards/margins": 0.00793905183672905, + "rewards/rejected": -0.05118182301521301, "step": 950 }, { - "epoch": 0.9994756161510225, - "step": 953, + "epoch": 1.0068169900367068, + "grad_norm": 2.2724469008271773, + "learning_rate": 6.4549722436790284e-06, + "log_odds_chosen": 1.0400245189666748, + "log_odds_ratio": -0.42517581582069397, + "logits/chosen": -3.0371384620666504, + "logits/rejected": -3.0435400009155273, + "logps/chosen": -0.5974615812301636, + "logps/rejected": -1.1842448711395264, + "loss": 0.3929, + "nll_loss": 0.40045398473739624, + "rewards/accuracies": 0.793749988079071, + "rewards/chosen": -0.02987307868897915, + "rewards/margins": 0.02933916449546814, + "rewards/rejected": -0.05921224504709244, + "step": 960 + }, + { + "epoch": 1.017304667016256, + "grad_norm": 2.0168885022396372, + "learning_rate": 6.421612990679356e-06, + "log_odds_chosen": 1.6284434795379639, + "log_odds_ratio": -0.2502659857273102, + "logits/chosen": -3.080873727798462, + "logits/rejected": -3.070159912109375, + "logps/chosen": -0.4285094141960144, + "logps/rejected": -1.2745321989059448, + "loss": 0.2923, + "nll_loss": 0.28497669100761414, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.02142546884715557, + "rewards/margins": 0.04230114072561264, + "rewards/rejected": -0.06372661143541336, + "step": 970 + }, + { + "epoch": 1.027792343995805, + "grad_norm": 1.9662869053425782, + "learning_rate": 6.3887656499994e-06, + "log_odds_chosen": 1.8482691049575806, + "log_odds_ratio": -0.21383436024188995, + "logits/chosen": -3.071471929550171, + "logits/rejected": -3.079923391342163, + "logps/chosen": -0.43078216910362244, + "logps/rejected": -1.4107215404510498, + "loss": 0.3019, + "nll_loss": 0.3140898644924164, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.021539105102419853, + "rewards/margins": 0.04899696633219719, + "rewards/rejected": -0.0705360695719719, + "step": 980 + }, + { + "epoch": 1.038280020975354, + "grad_norm": 1.9845582869348006, + "learning_rate": 6.356417261637282e-06, + "log_odds_chosen": 1.6627075672149658, + "log_odds_ratio": -0.2610566318035126, + "logits/chosen": -2.9875268936157227, + "logits/rejected": -2.9876785278320312, + "logps/chosen": -0.4378105103969574, + "logps/rejected": -1.3178083896636963, + "loss": 0.296, + "nll_loss": 0.27773916721343994, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.02189052477478981, + "rewards/margins": 0.04399988800287247, + "rewards/rejected": -0.06589041650295258, + "step": 990 + }, + { + "epoch": 1.048767697954903, + "grad_norm": 2.0942478813902783, + "learning_rate": 6.324555320336759e-06, + "log_odds_chosen": 1.9041988849639893, + "log_odds_ratio": -0.20684988796710968, + "logits/chosen": -2.9869093894958496, + "logits/rejected": -3.029050588607788, + "logps/chosen": -0.4077525734901428, + "logps/rejected": -1.3952513933181763, + "loss": 0.2888, + "nll_loss": 0.2748258709907532, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.02038763090968132, + "rewards/margins": 0.04937494546175003, + "rewards/rejected": -0.06976256519556046, + "step": 1000 + }, + { + "epoch": 1.048767697954903, + "eval_log_odds_chosen": 0.37935417890548706, + "eval_log_odds_ratio": -0.6318228840827942, + "eval_logits/chosen": -3.0189764499664307, + "eval_logits/rejected": -3.0171284675598145, + "eval_logps/chosen": -0.8724088072776794, + "eval_logps/rejected": -1.112794280052185, + "eval_loss": 0.5400179028511047, + "eval_nll_loss": 0.5058131814002991, + "eval_rewards/accuracies": 0.6428571343421936, + "eval_rewards/chosen": -0.04362044483423233, + "eval_rewards/margins": 0.012019270099699497, + "eval_rewards/rejected": -0.05563971400260925, + "eval_runtime": 136.9938, + "eval_samples_per_second": 14.555, + "eval_steps_per_second": 0.46, + "step": 1000 + }, + { + "epoch": 1.059255374934452, + "grad_norm": 1.8526210480251912, + "learning_rate": 6.2931677552755265e-06, + "log_odds_chosen": 1.7620799541473389, + "log_odds_ratio": -0.23190836608409882, + "logits/chosen": -3.0539023876190186, + "logits/rejected": -3.0629706382751465, + "logps/chosen": -0.43785715103149414, + "logps/rejected": -1.3722269535064697, + "loss": 0.2859, + "nll_loss": 0.2769049108028412, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.021892856806516647, + "rewards/margins": 0.04671848937869072, + "rewards/rejected": -0.06861135363578796, + "step": 1010 + }, + { + "epoch": 1.069743051914001, + "grad_norm": 2.017775428059147, + "learning_rate": 6.262242910851496e-06, + "log_odds_chosen": 1.7232574224472046, + "log_odds_ratio": -0.22979629039764404, + "logits/chosen": -3.0019690990448, + "logits/rejected": -3.0224807262420654, + "logps/chosen": -0.4002920091152191, + "logps/rejected": -1.3048107624053955, + "loss": 0.2894, + "nll_loss": 0.2588661015033722, + "rewards/accuracies": 0.956250011920929, + "rewards/chosen": -0.020014600828289986, + "rewards/margins": 0.04522594064474106, + "rewards/rejected": -0.0652405396103859, + "step": 1020 + }, + { + "epoch": 1.08023072889355, + "grad_norm": 2.1656896077764, + "learning_rate": 6.231769528497559e-06, + "log_odds_chosen": 1.7999454736709595, + "log_odds_ratio": -0.23009638488292694, + "logits/chosen": -3.0344815254211426, + "logits/rejected": -3.0285098552703857, + "logps/chosen": -0.42475366592407227, + "logps/rejected": -1.3811571598052979, + "loss": 0.2779, + "nll_loss": 0.26928776502609253, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.021237684413790703, + "rewards/margins": 0.04782017320394516, + "rewards/rejected": -0.06905786693096161, + "step": 1030 + }, + { + "epoch": 1.0907184058730992, + "grad_norm": 1.8893124181143397, + "learning_rate": 6.2017367294604225e-06, + "log_odds_chosen": 1.7361199855804443, + "log_odds_ratio": -0.2356552630662918, + "logits/chosen": -2.9798855781555176, + "logits/rejected": -3.012021780014038, + "logps/chosen": -0.4087589383125305, + "logps/rejected": -1.318456768989563, + "loss": 0.2848, + "nll_loss": 0.2693423926830292, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.020437946543097496, + "rewards/margins": 0.045484889298677444, + "rewards/rejected": -0.06592283397912979, + "step": 1040 + }, + { + "epoch": 1.1012060828526482, + "grad_norm": 1.998285617344112, + "learning_rate": 6.172133998483677e-06, + "log_odds_chosen": 1.989933967590332, + "log_odds_ratio": -0.2104463130235672, + "logits/chosen": -2.9669861793518066, + "logits/rejected": -2.992997169494629, + "logps/chosen": -0.4091659486293793, + "logps/rejected": -1.4872965812683105, + "loss": 0.2793, + "nll_loss": 0.24384136497974396, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.020458297803997993, + "rewards/margins": 0.05390653759241104, + "rewards/rejected": -0.07436482608318329, + "step": 1050 + }, + { + "epoch": 1.1116937598321972, + "grad_norm": 1.99753785316238, + "learning_rate": 6.142951168339513e-06, + "log_odds_chosen": 1.7905690670013428, + "log_odds_ratio": -0.2465437948703766, + "logits/chosen": -2.9944257736206055, + "logits/rejected": -2.988699436187744, + "logps/chosen": -0.41175705194473267, + "logps/rejected": -1.3037220239639282, + "loss": 0.2828, + "nll_loss": 0.2829252779483795, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.020587850362062454, + "rewards/margins": 0.04459824413061142, + "rewards/rejected": -0.06518609821796417, + "step": 1060 + }, + { + "epoch": 1.1221814368117462, + "grad_norm": 2.0944607329795666, + "learning_rate": 6.114178405157431e-06, + "log_odds_chosen": 1.972241759300232, + "log_odds_ratio": -0.202741339802742, + "logits/chosen": -2.9314074516296387, + "logits/rejected": -2.943037271499634, + "logps/chosen": -0.39666005969047546, + "logps/rejected": -1.4398232698440552, + "loss": 0.2869, + "nll_loss": 0.26206424832344055, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.019833002239465714, + "rewards/margins": 0.0521581657230854, + "rewards/rejected": -0.07199116796255112, + "step": 1070 + }, + { + "epoch": 1.1326691137912952, + "grad_norm": 2.082309850512046, + "learning_rate": 6.0858061945018455e-06, + "log_odds_chosen": 1.9569040536880493, + "log_odds_ratio": -0.20189175009727478, + "logits/chosen": -2.9233288764953613, + "logits/rejected": -2.953047275543213, + "logps/chosen": -0.4349672198295593, + "logps/rejected": -1.479813814163208, + "loss": 0.286, + "nll_loss": 0.25732284784317017, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.021748360246419907, + "rewards/margins": 0.052242327481508255, + "rewards/rejected": -0.07399068772792816, + "step": 1080 + }, + { + "epoch": 1.1431567907708442, + "grad_norm": 1.977872551014816, + "learning_rate": 6.0578253281538265e-06, + "log_odds_chosen": 1.8792686462402344, + "log_odds_ratio": -0.23301272094249725, + "logits/chosen": -2.9573769569396973, + "logits/rejected": -2.968686103820801, + "logps/chosen": -0.3683982789516449, + "logps/rejected": -1.286027431488037, + "loss": 0.2841, + "nll_loss": 0.26943594217300415, + "rewards/accuracies": 0.956250011920929, + "rewards/chosen": -0.018419915810227394, + "rewards/margins": 0.04588145762681961, + "rewards/rejected": -0.06430138647556305, + "step": 1090 + }, + { + "epoch": 1.1536444677503932, + "grad_norm": 2.2874664942911984, + "learning_rate": 6.030226891555273e-06, + "log_odds_chosen": 1.744699239730835, + "log_odds_ratio": -0.2575313448905945, + "logits/chosen": -3.0328478813171387, + "logits/rejected": -3.0531229972839355, + "logps/chosen": -0.4480053782463074, + "logps/rejected": -1.409203290939331, + "loss": 0.29, + "nll_loss": 0.2910405397415161, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.022400271147489548, + "rewards/margins": 0.04805989935994148, + "rewards/rejected": -0.07046017050743103, + "step": 1100 + }, + { + "epoch": 1.1536444677503932, + "eval_log_odds_chosen": 0.4246710240840912, + "eval_log_odds_ratio": -0.6255837082862854, + "eval_logits/chosen": -3.002875804901123, + "eval_logits/rejected": -3.0027201175689697, + "eval_logps/chosen": -0.8736297488212585, + "eval_logps/rejected": -1.1487443447113037, + "eval_loss": 0.5385290384292603, + "eval_nll_loss": 0.5041735172271729, + "eval_rewards/accuracies": 0.64682537317276, + "eval_rewards/chosen": -0.04368148371577263, + "eval_rewards/margins": 0.013755732215940952, + "eval_rewards/rejected": -0.057437218725681305, + "eval_runtime": 136.8823, + "eval_samples_per_second": 14.567, + "eval_steps_per_second": 0.46, + "step": 1100 + }, + { + "epoch": 1.1641321447299422, + "grad_norm": 1.8147231314332177, + "learning_rate": 6.003002251876643e-06, + "log_odds_chosen": 1.8075166940689087, + "log_odds_ratio": -0.2281859815120697, + "logits/chosen": -2.965421199798584, + "logits/rejected": -3.0172793865203857, + "logps/chosen": -0.44597238302230835, + "logps/rejected": -1.4203885793685913, + "loss": 0.2891, + "nll_loss": 0.2668479084968567, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.022298619151115417, + "rewards/margins": 0.04872080683708191, + "rewards/rejected": -0.07101943343877792, + "step": 1110 + }, + { + "epoch": 1.1746198217094914, + "grad_norm": 1.9969430269469466, + "learning_rate": 5.976143046671968e-06, + "log_odds_chosen": 1.7478984594345093, + "log_odds_ratio": -0.22862455248832703, + "logits/chosen": -3.0243489742279053, + "logits/rejected": -3.0321333408355713, + "logps/chosen": -0.40696269273757935, + "logps/rejected": -1.2988313436508179, + "loss": 0.2927, + "nll_loss": 0.27604612708091736, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.02034812793135643, + "rewards/margins": 0.04459343105554581, + "rewards/rejected": -0.06494157016277313, + "step": 1120 + }, + { + "epoch": 1.1851074986890404, + "grad_norm": 2.1896703421371275, + "learning_rate": 5.949641173087296e-06, + "log_odds_chosen": 2.048767566680908, + "log_odds_ratio": -0.20188426971435547, + "logits/chosen": -2.9657158851623535, + "logits/rejected": -2.977405309677124, + "logps/chosen": -0.38311532139778137, + "logps/rejected": -1.454978108406067, + "loss": 0.2825, + "nll_loss": 0.2597211003303528, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.01915576681494713, + "rewards/margins": 0.05359314754605293, + "rewards/rejected": -0.07274890691041946, + "step": 1130 + }, + { + "epoch": 1.1955951756685894, + "grad_norm": 1.8856822247943528, + "learning_rate": 5.923488777590924e-06, + "log_odds_chosen": 1.9368520975112915, + "log_odds_ratio": -0.21634550392627716, + "logits/chosen": -3.009665012359619, + "logits/rejected": -3.0066471099853516, + "logps/chosen": -0.412930428981781, + "logps/rejected": -1.4850547313690186, + "loss": 0.2786, + "nll_loss": 0.28015536069869995, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.02064652182161808, + "rewards/margins": 0.05360621213912964, + "rewards/rejected": -0.07425273954868317, + "step": 1140 + }, + { + "epoch": 1.2060828526481384, + "grad_norm": 2.2165729739830233, + "learning_rate": 5.897678246195886e-06, + "log_odds_chosen": 1.9798767566680908, + "log_odds_ratio": -0.19855430722236633, + "logits/chosen": -2.9805493354797363, + "logits/rejected": -2.9919371604919434, + "logps/chosen": -0.38313865661621094, + "logps/rejected": -1.3864378929138184, + "loss": 0.2909, + "nll_loss": 0.27790573239326477, + "rewards/accuracies": 0.956250011920929, + "rewards/chosen": -0.019156932830810547, + "rewards/margins": 0.05016495659947395, + "rewards/rejected": -0.0693218931555748, + "step": 1150 + }, + { + "epoch": 1.2165705296276874, + "grad_norm": 2.8337045840850497, + "learning_rate": 5.8722021951470355e-06, + "log_odds_chosen": 1.7361915111541748, + "log_odds_ratio": -0.24711327254772186, + "logits/chosen": -2.966083288192749, + "logits/rejected": -2.9842519760131836, + "logps/chosen": -0.4412474036216736, + "logps/rejected": -1.3824529647827148, + "loss": 0.2781, + "nll_loss": 0.2754039466381073, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.02206237055361271, + "rewards/margins": 0.0470602810382843, + "rewards/rejected": -0.06912264972925186, + "step": 1160 + }, + { + "epoch": 1.2270582066072364, + "grad_norm": 1.7729938432799273, + "learning_rate": 5.847053462046862e-06, + "log_odds_chosen": 1.7805134057998657, + "log_odds_ratio": -0.23545412719249725, + "logits/chosen": -3.0085816383361816, + "logits/rejected": -3.003875494003296, + "logps/chosen": -0.4123718738555908, + "logps/rejected": -1.3221479654312134, + "loss": 0.2829, + "nll_loss": 0.2879020869731903, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.02061859332025051, + "rewards/margins": 0.045488808304071426, + "rewards/rejected": -0.06610739976167679, + "step": 1170 + }, + { + "epoch": 1.2375458835867854, + "grad_norm": 2.2169036925519454, + "learning_rate": 5.822225097395821e-06, + "log_odds_chosen": 1.9844211339950562, + "log_odds_ratio": -0.1866404265165329, + "logits/chosen": -2.9880988597869873, + "logits/rejected": -3.0081310272216797, + "logps/chosen": -0.3858886957168579, + "logps/rejected": -1.3924882411956787, + "loss": 0.2873, + "nll_loss": 0.25162869691848755, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.019294437021017075, + "rewards/margins": 0.05032998323440552, + "rewards/rejected": -0.06962442398071289, + "step": 1180 + }, + { + "epoch": 1.2480335605663346, + "grad_norm": 2.1614361138819045, + "learning_rate": 5.797710356524486e-06, + "log_odds_chosen": 1.8616158962249756, + "log_odds_ratio": -0.22632256150245667, + "logits/chosen": -3.0017178058624268, + "logits/rejected": -3.0013363361358643, + "logps/chosen": -0.4442955553531647, + "logps/rejected": -1.4363129138946533, + "loss": 0.2867, + "nll_loss": 0.289310485124588, + "rewards/accuracies": 0.956250011920929, + "rewards/chosen": -0.022214777767658234, + "rewards/margins": 0.04960086941719055, + "rewards/rejected": -0.07181564718484879, + "step": 1190 + }, + { + "epoch": 1.2585212375458836, + "grad_norm": 2.0470229728313494, + "learning_rate": 5.773502691896259e-06, + "log_odds_chosen": 1.8614075183868408, + "log_odds_ratio": -0.2429337054491043, + "logits/chosen": -2.9596099853515625, + "logits/rejected": -2.9728147983551025, + "logps/chosen": -0.44122061133384705, + "logps/rejected": -1.4644559621810913, + "loss": 0.2826, + "nll_loss": 0.2614334225654602, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.022061031311750412, + "rewards/margins": 0.051161766052246094, + "rewards/rejected": -0.0732228010892868, + "step": 1200 + }, + { + "epoch": 1.2585212375458836, + "eval_log_odds_chosen": 0.4214767515659332, + "eval_log_odds_ratio": -0.6254101991653442, + "eval_logits/chosen": -2.9582858085632324, + "eval_logits/rejected": -2.96195912361145, + "eval_logps/chosen": -0.8853804469108582, + "eval_logps/rejected": -1.162561058998108, + "eval_loss": 0.5427829027175903, + "eval_nll_loss": 0.5084435939788818, + "eval_rewards/accuracies": 0.6428571343421936, + "eval_rewards/chosen": -0.04426902160048485, + "eval_rewards/margins": 0.013859033584594727, + "eval_rewards/rejected": -0.05812805891036987, + "eval_runtime": 137.2006, + "eval_samples_per_second": 14.533, + "eval_steps_per_second": 0.459, + "step": 1200 + }, + { + "epoch": 1.2690089145254326, + "grad_norm": 2.3388472125063946, + "learning_rate": 5.749595745760691e-06, + "log_odds_chosen": 1.858030080795288, + "log_odds_ratio": -0.21272964775562286, + "logits/chosen": -2.996577739715576, + "logits/rejected": -3.0146660804748535, + "logps/chosen": -0.4070938229560852, + "logps/rejected": -1.3386101722717285, + "loss": 0.2988, + "nll_loss": 0.292961448431015, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.02035469003021717, + "rewards/margins": 0.04657582566142082, + "rewards/rejected": -0.06693051755428314, + "step": 1210 + }, + { + "epoch": 1.2794965915049816, + "grad_norm": 1.9762440493042526, + "learning_rate": 5.725983343138682e-06, + "log_odds_chosen": 1.7544046640396118, + "log_odds_ratio": -0.22841353714466095, + "logits/chosen": -2.9734439849853516, + "logits/rejected": -2.9992988109588623, + "logps/chosen": -0.42544227838516235, + "logps/rejected": -1.3273015022277832, + "loss": 0.295, + "nll_loss": 0.28989139199256897, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.021272115409374237, + "rewards/margins": 0.04509295895695686, + "rewards/rejected": -0.0663650780916214, + "step": 1220 + }, + { + "epoch": 1.2899842684845306, + "grad_norm": 2.230074491318477, + "learning_rate": 5.702659485122011e-06, + "log_odds_chosen": 1.929265022277832, + "log_odds_ratio": -0.20951807498931885, + "logits/chosen": -2.9871158599853516, + "logits/rejected": -2.993727207183838, + "logps/chosen": -0.40125927329063416, + "logps/rejected": -1.4160717725753784, + "loss": 0.2653, + "nll_loss": 0.23026029765605927, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.020062964409589767, + "rewards/margins": 0.050740621984004974, + "rewards/rejected": -0.07080359011888504, + "step": 1230 + }, + { + "epoch": 1.3004719454640796, + "grad_norm": 1.9679461376203173, + "learning_rate": 5.679618342470648e-06, + "log_odds_chosen": 1.7371532917022705, + "log_odds_ratio": -0.2242734134197235, + "logits/chosen": -3.0132291316986084, + "logits/rejected": -3.0433402061462402, + "logps/chosen": -0.413210391998291, + "logps/rejected": -1.3000330924987793, + "loss": 0.2804, + "nll_loss": 0.29589781165122986, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.0206605214625597, + "rewards/margins": 0.04434113949537277, + "rewards/rejected": -0.06500165909528732, + "step": 1240 + }, + { + "epoch": 1.3109596224436286, + "grad_norm": 2.617277483095543, + "learning_rate": 5.656854249492381e-06, + "log_odds_chosen": 1.814679741859436, + "log_odds_ratio": -0.22298629581928253, + "logits/chosen": -2.996896266937256, + "logits/rejected": -3.0056145191192627, + "logps/chosen": -0.42395251989364624, + "logps/rejected": -1.3927456140518188, + "loss": 0.2687, + "nll_loss": 0.25607752799987793, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.021197626367211342, + "rewards/margins": 0.04843965172767639, + "rewards/rejected": -0.06963728368282318, + "step": 1250 + }, + { + "epoch": 1.3214472994231778, + "grad_norm": 1.9773184888291742, + "learning_rate": 5.63436169819011e-06, + "log_odds_chosen": 1.8136640787124634, + "log_odds_ratio": -0.24320077896118164, + "logits/chosen": -2.966784954071045, + "logits/rejected": -3.001746892929077, + "logps/chosen": -0.45541706681251526, + "logps/rejected": -1.3951488733291626, + "loss": 0.2988, + "nll_loss": 0.31274476647377014, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.022770855575799942, + "rewards/margins": 0.046986598521471024, + "rewards/rejected": -0.06975744664669037, + "step": 1260 + }, + { + "epoch": 1.3319349764027268, + "grad_norm": 1.9140818928985086, + "learning_rate": 5.612135332663138e-06, + "log_odds_chosen": 1.953155755996704, + "log_odds_ratio": -0.21717992424964905, + "logits/chosen": -3.006328821182251, + "logits/rejected": -3.037388324737549, + "logps/chosen": -0.42650872468948364, + "logps/rejected": -1.495060682296753, + "loss": 0.272, + "nll_loss": 0.2669217586517334, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.021325435489416122, + "rewards/margins": 0.053427595645189285, + "rewards/rejected": -0.074753038585186, + "step": 1270 + }, + { + "epoch": 1.3424226533822758, + "grad_norm": 1.9500186785754579, + "learning_rate": 5.590169943749475e-06, + "log_odds_chosen": 1.8904393911361694, + "log_odds_ratio": -0.2255454808473587, + "logits/chosen": -2.989861011505127, + "logits/rejected": -3.0198075771331787, + "logps/chosen": -0.424043744802475, + "logps/rejected": -1.4651858806610107, + "loss": 0.2783, + "nll_loss": 0.267769455909729, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.02120218798518181, + "rewards/margins": 0.05205710977315903, + "rewards/rejected": -0.07325930893421173, + "step": 1280 + }, + { + "epoch": 1.3529103303618248, + "grad_norm": 1.9502765281924526, + "learning_rate": 5.568460463897046e-06, + "log_odds_chosen": 1.8929240703582764, + "log_odds_ratio": -0.21857920289039612, + "logits/chosen": -2.9535863399505615, + "logits/rejected": -2.9874510765075684, + "logps/chosen": -0.45026451349258423, + "logps/rejected": -1.4960235357284546, + "loss": 0.295, + "nll_loss": 0.27629774808883667, + "rewards/accuracies": 0.956250011920929, + "rewards/chosen": -0.022513221949338913, + "rewards/margins": 0.052287958562374115, + "rewards/rejected": -0.07480116933584213, + "step": 1290 + }, + { + "epoch": 1.3633980073413738, + "grad_norm": 2.2093191033587223, + "learning_rate": 5.547001962252292e-06, + "log_odds_chosen": 1.7265195846557617, + "log_odds_ratio": -0.23279574513435364, + "logits/chosen": -2.9012649059295654, + "logits/rejected": -2.9128100872039795, + "logps/chosen": -0.4365314841270447, + "logps/rejected": -1.3402652740478516, + "loss": 0.2796, + "nll_loss": 0.28851714730262756, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.021826574578881264, + "rewards/margins": 0.04518669471144676, + "rewards/rejected": -0.06701326370239258, + "step": 1300 + }, + { + "epoch": 1.3633980073413738, + "eval_log_odds_chosen": 0.45076510310173035, + "eval_log_odds_ratio": -0.6208177208900452, + "eval_logits/chosen": -2.928496837615967, + "eval_logits/rejected": -2.9256343841552734, + "eval_logps/chosen": -0.8825219869613647, + "eval_logps/rejected": -1.1770830154418945, + "eval_loss": 0.5392885208129883, + "eval_nll_loss": 0.5060464143753052, + "eval_rewards/accuracies": 0.64682537317276, + "eval_rewards/chosen": -0.044126104563474655, + "eval_rewards/margins": 0.014728044159710407, + "eval_rewards/rejected": -0.05885414779186249, + "eval_runtime": 136.6608, + "eval_samples_per_second": 14.591, + "eval_steps_per_second": 0.461, + "step": 1300 + }, + { + "epoch": 1.3738856843209228, + "grad_norm": 1.715926192038861, + "learning_rate": 5.525789639955377e-06, + "log_odds_chosen": 2.0803933143615723, + "log_odds_ratio": -0.21633043885231018, + "logits/chosen": -2.926987409591675, + "logits/rejected": -2.9622962474823, + "logps/chosen": -0.43519288301467896, + "logps/rejected": -1.6533997058868408, + "loss": 0.2713, + "nll_loss": 0.26452213525772095, + "rewards/accuracies": 0.956250011920929, + "rewards/chosen": -0.02175964042544365, + "rewards/margins": 0.06091034412384033, + "rewards/rejected": -0.08266998082399368, + "step": 1310 + }, + { + "epoch": 1.3843733613004718, + "grad_norm": 2.0174814570503012, + "learning_rate": 5.504818825631804e-06, + "log_odds_chosen": 2.108902931213379, + "log_odds_ratio": -0.1835678517818451, + "logits/chosen": -2.96756911277771, + "logits/rejected": -2.9531686305999756, + "logps/chosen": -0.3781605362892151, + "logps/rejected": -1.4976880550384521, + "loss": 0.267, + "nll_loss": 0.25148090720176697, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.018908025696873665, + "rewards/margins": 0.055976372212171555, + "rewards/rejected": -0.07488439977169037, + "step": 1320 + }, + { + "epoch": 1.394861038280021, + "grad_norm": 2.317364085817375, + "learning_rate": 5.484084971070817e-06, + "log_odds_chosen": 1.9238555431365967, + "log_odds_ratio": -0.2074807584285736, + "logits/chosen": -2.923131227493286, + "logits/rejected": -2.9520606994628906, + "logps/chosen": -0.42446833848953247, + "logps/rejected": -1.4086004495620728, + "loss": 0.2852, + "nll_loss": 0.28959181904792786, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.021223418414592743, + "rewards/margins": 0.049206603318452835, + "rewards/rejected": -0.07043002545833588, + "step": 1330 + }, + { + "epoch": 1.40534871525957, + "grad_norm": 2.165975215343917, + "learning_rate": 5.4635836470815305e-06, + "log_odds_chosen": 1.8837333917617798, + "log_odds_ratio": -0.21855314075946808, + "logits/chosen": -2.9127135276794434, + "logits/rejected": -2.9249043464660645, + "logps/chosen": -0.41960373520851135, + "logps/rejected": -1.4260175228118896, + "loss": 0.2787, + "nll_loss": 0.25244617462158203, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.020980186760425568, + "rewards/margins": 0.050320692360401154, + "rewards/rejected": -0.07130087912082672, + "step": 1340 + }, + { + "epoch": 1.415836392239119, + "grad_norm": 1.9224928940953034, + "learning_rate": 5.443310539518174e-06, + "log_odds_chosen": 2.056159734725952, + "log_odds_ratio": -0.19483168423175812, + "logits/chosen": -2.956674814224243, + "logits/rejected": -2.9572062492370605, + "logps/chosen": -0.4208443760871887, + "logps/rejected": -1.5285457372665405, + "loss": 0.2822, + "nll_loss": 0.26951080560684204, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.021042218431830406, + "rewards/margins": 0.05538507178425789, + "rewards/rejected": -0.07642728835344315, + "step": 1350 + }, + { + "epoch": 1.426324069218668, + "grad_norm": 2.0115204434239025, + "learning_rate": 5.423261445466404e-06, + "log_odds_chosen": 1.707457184791565, + "log_odds_ratio": -0.2479782998561859, + "logits/chosen": -2.915250301361084, + "logits/rejected": -2.9445343017578125, + "logps/chosen": -0.4267791211605072, + "logps/rejected": -1.3377535343170166, + "loss": 0.2925, + "nll_loss": 0.29825955629348755, + "rewards/accuracies": 0.9312499761581421, + "rewards/chosen": -0.02133895456790924, + "rewards/margins": 0.04554871469736099, + "rewards/rejected": -0.06688766926527023, + "step": 1360 + }, + { + "epoch": 1.436811746198217, + "grad_norm": 2.0083912520624234, + "learning_rate": 5.403432269582992e-06, + "log_odds_chosen": 1.7433815002441406, + "log_odds_ratio": -0.23284384608268738, + "logits/chosen": -2.9682974815368652, + "logits/rejected": -2.9809725284576416, + "logps/chosen": -0.4545938968658447, + "logps/rejected": -1.3821640014648438, + "loss": 0.2995, + "nll_loss": 0.2861328721046448, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.022729698568582535, + "rewards/margins": 0.04637850075960159, + "rewards/rejected": -0.06910820305347443, + "step": 1370 + }, + { + "epoch": 1.447299423177766, + "grad_norm": 1.918494069287167, + "learning_rate": 5.383819020581656e-06, + "log_odds_chosen": 1.839255690574646, + "log_odds_ratio": -0.22518055140972137, + "logits/chosen": -2.9555628299713135, + "logits/rejected": -2.968390703201294, + "logps/chosen": -0.4370731711387634, + "logps/rejected": -1.4699593782424927, + "loss": 0.2859, + "nll_loss": 0.28876128792762756, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.021853657439351082, + "rewards/margins": 0.051644302904605865, + "rewards/rejected": -0.0734979659318924, + "step": 1380 + }, + { + "epoch": 1.457787100157315, + "grad_norm": 1.8701436058229068, + "learning_rate": 5.364417807858201e-06, + "log_odds_chosen": 2.0006766319274902, + "log_odds_ratio": -0.19503512978553772, + "logits/chosen": -2.9456233978271484, + "logits/rejected": -2.9416487216949463, + "logps/chosen": -0.397217720746994, + "logps/rejected": -1.458070993423462, + "loss": 0.2898, + "nll_loss": 0.2990682125091553, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.01986088417470455, + "rewards/margins": 0.05304265767335892, + "rewards/rejected": -0.07290354371070862, + "step": 1390 + }, + { + "epoch": 1.4682747771368643, + "grad_norm": 1.8947645182805886, + "learning_rate": 5.345224838248489e-06, + "log_odds_chosen": 1.9478137493133545, + "log_odds_ratio": -0.22849062085151672, + "logits/chosen": -2.9488446712493896, + "logits/rejected": -2.980994462966919, + "logps/chosen": -0.38306254148483276, + "logps/rejected": -1.40244460105896, + "loss": 0.2784, + "nll_loss": 0.27079683542251587, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.019153129309415817, + "rewards/margins": 0.05096910148859024, + "rewards/rejected": -0.07012222707271576, + "step": 1400 + }, + { + "epoch": 1.4682747771368643, + "eval_log_odds_chosen": 0.4410339295864105, + "eval_log_odds_ratio": -0.6236060261726379, + "eval_logits/chosen": -2.9594457149505615, + "eval_logits/rejected": -2.9583115577697754, + "eval_logps/chosen": -0.8884981274604797, + "eval_logps/rejected": -1.1784039735794067, + "eval_loss": 0.5364598631858826, + "eval_nll_loss": 0.5036527514457703, + "eval_rewards/accuracies": 0.6527777910232544, + "eval_rewards/chosen": -0.044424910098314285, + "eval_rewards/margins": 0.014495291747152805, + "eval_rewards/rejected": -0.058920200914144516, + "eval_runtime": 139.2595, + "eval_samples_per_second": 14.319, + "eval_steps_per_second": 0.452, + "step": 1400 + }, + { + "epoch": 1.4787624541164133, + "grad_norm": 2.1665159464201142, + "learning_rate": 5.326236412913075e-06, + "log_odds_chosen": 1.7970411777496338, + "log_odds_ratio": -0.2380552738904953, + "logits/chosen": -2.9149088859558105, + "logits/rejected": -2.9543135166168213, + "logps/chosen": -0.4362480640411377, + "logps/rejected": -1.3472230434417725, + "loss": 0.29, + "nll_loss": 0.2710421681404114, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.021812403574585915, + "rewards/margins": 0.04554874822497368, + "rewards/rejected": -0.06736114621162415, + "step": 1410 + }, + { + "epoch": 1.4892501310959623, + "grad_norm": 2.196966160421767, + "learning_rate": 5.307448924342753e-06, + "log_odds_chosen": 1.8308820724487305, + "log_odds_ratio": -0.21477296948432922, + "logits/chosen": -2.877204179763794, + "logits/rejected": -2.932901620864868, + "logps/chosen": -0.4031652510166168, + "logps/rejected": -1.3179484605789185, + "loss": 0.2855, + "nll_loss": 0.2783321738243103, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.02015826478600502, + "rewards/margins": 0.04573915898799896, + "rewards/rejected": -0.06589742004871368, + "step": 1420 + }, + { + "epoch": 1.4997378080755113, + "grad_norm": 2.1884907491879084, + "learning_rate": 5.28885885347945e-06, + "log_odds_chosen": 1.9711707830429077, + "log_odds_ratio": -0.20648148655891418, + "logits/chosen": -2.954136371612549, + "logits/rejected": -2.9814727306365967, + "logps/chosen": -0.41374531388282776, + "logps/rejected": -1.4304702281951904, + "loss": 0.2924, + "nll_loss": 0.27289509773254395, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.020687269046902657, + "rewards/margins": 0.0508362352848053, + "rewards/rejected": -0.071523517370224, + "step": 1430 + }, + { + "epoch": 1.5102254850550603, + "grad_norm": 2.124176001387226, + "learning_rate": 5.270462766947299e-06, + "log_odds_chosen": 1.7731349468231201, + "log_odds_ratio": -0.2392440289258957, + "logits/chosen": -2.9405388832092285, + "logits/rejected": -2.9464943408966064, + "logps/chosen": -0.4539235234260559, + "logps/rejected": -1.403793454170227, + "loss": 0.2961, + "nll_loss": 0.2940642237663269, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.022696174681186676, + "rewards/margins": 0.04749349504709244, + "rewards/rejected": -0.07018966972827911, + "step": 1440 + }, + { + "epoch": 1.5207131620346095, + "grad_norm": 1.8197825407446042, + "learning_rate": 5.252257314388902e-06, + "log_odds_chosen": 1.7956994771957397, + "log_odds_ratio": -0.22454524040222168, + "logits/chosen": -2.954716444015503, + "logits/rejected": -2.978447437286377, + "logps/chosen": -0.4430459439754486, + "logps/rejected": -1.4194531440734863, + "loss": 0.2777, + "nll_loss": 0.24652138352394104, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.02215229719877243, + "rewards/margins": 0.048820365220308304, + "rewards/rejected": -0.07097266614437103, + "step": 1450 + }, + { + "epoch": 1.5312008390141583, + "grad_norm": 2.1915818543360355, + "learning_rate": 5.234239225902137e-06, + "log_odds_chosen": 1.9382715225219727, + "log_odds_ratio": -0.1963178515434265, + "logits/chosen": -2.8938894271850586, + "logits/rejected": -2.924325466156006, + "logps/chosen": -0.39880725741386414, + "logps/rejected": -1.4752063751220703, + "loss": 0.2971, + "nll_loss": 0.2676003575325012, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.019940361380577087, + "rewards/margins": 0.05381995439529419, + "rewards/rejected": -0.07376032322645187, + "step": 1460 + }, + { + "epoch": 1.5416885159937075, + "grad_norm": 2.1118618734250307, + "learning_rate": 5.216405309573011e-06, + "log_odds_chosen": 1.9139398336410522, + "log_odds_ratio": -0.19271975755691528, + "logits/chosen": -3.0117218494415283, + "logits/rejected": -3.0411810874938965, + "logps/chosen": -0.42149630188941956, + "logps/rejected": -1.471760869026184, + "loss": 0.2889, + "nll_loss": 0.27934783697128296, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.0210748128592968, + "rewards/margins": 0.052513234317302704, + "rewards/rejected": -0.0735880434513092, + "step": 1470 + }, + { + "epoch": 1.5521761929732563, + "grad_norm": 2.0510895547316745, + "learning_rate": 5.198752449100364e-06, + "log_odds_chosen": 2.0376482009887695, + "log_odds_ratio": -0.19703765213489532, + "logits/chosen": -3.009754180908203, + "logits/rejected": -3.016758441925049, + "logps/chosen": -0.40712347626686096, + "logps/rejected": -1.459837555885315, + "loss": 0.2888, + "nll_loss": 0.3001149892807007, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.020356174558401108, + "rewards/margins": 0.05263570696115494, + "rewards/rejected": -0.07299187034368515, + "step": 1480 + }, + { + "epoch": 1.5626638699528055, + "grad_norm": 2.1669568438399684, + "learning_rate": 5.181277601508398e-06, + "log_odds_chosen": 1.8304507732391357, + "log_odds_ratio": -0.2394884079694748, + "logits/chosen": -2.9779343605041504, + "logits/rejected": -3.008795738220215, + "logps/chosen": -0.4576667249202728, + "logps/rejected": -1.4601542949676514, + "loss": 0.2888, + "nll_loss": 0.29476073384284973, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.0228833369910717, + "rewards/margins": 0.05012437701225281, + "rewards/rejected": -0.07300771772861481, + "step": 1490 + }, + { + "epoch": 1.5731515469323545, + "grad_norm": 2.372050874462119, + "learning_rate": 5.163977794943223e-06, + "log_odds_chosen": 1.9750179052352905, + "log_odds_ratio": -0.19530083239078522, + "logits/chosen": -2.9395532608032227, + "logits/rejected": -2.991283893585205, + "logps/chosen": -0.42392611503601074, + "logps/rejected": -1.5091795921325684, + "loss": 0.2873, + "nll_loss": 0.2818702757358551, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.021196305751800537, + "rewards/margins": 0.0542626678943634, + "rewards/rejected": -0.07545898109674454, + "step": 1500 + }, + { + "epoch": 1.5731515469323545, + "eval_log_odds_chosen": 0.4364486038684845, + "eval_log_odds_ratio": -0.6225508451461792, + "eval_logits/chosen": -2.965731382369995, + "eval_logits/rejected": -2.966355323791504, + "eval_logps/chosen": -0.8718044757843018, + "eval_logps/rejected": -1.158449649810791, + "eval_loss": 0.53301602602005, + "eval_nll_loss": 0.5004281997680664, + "eval_rewards/accuracies": 0.6448412537574768, + "eval_rewards/chosen": -0.043590229004621506, + "eval_rewards/margins": 0.014332256279885769, + "eval_rewards/rejected": -0.05792247876524925, + "eval_runtime": 139.8515, + "eval_samples_per_second": 14.258, + "eval_steps_per_second": 0.45, + "step": 1500 + }, + { + "epoch": 1.5836392239119035, + "grad_norm": 1.9123802783189798, + "learning_rate": 5.146850126549788e-06, + "log_odds_chosen": 1.6361440420150757, + "log_odds_ratio": -0.26433151960372925, + "logits/chosen": -2.943331003189087, + "logits/rejected": -2.9721503257751465, + "logps/chosen": -0.44553548097610474, + "logps/rejected": -1.2933813333511353, + "loss": 0.3044, + "nll_loss": 0.2870228588581085, + "rewards/accuracies": 0.956250011920929, + "rewards/chosen": -0.022276774048805237, + "rewards/margins": 0.042392291128635406, + "rewards/rejected": -0.06466906517744064, + "step": 1510 + }, + { + "epoch": 1.5941269008914527, + "grad_norm": 1.9978617693896288, + "learning_rate": 5.129891760425772e-06, + "log_odds_chosen": 1.872454285621643, + "log_odds_ratio": -0.21693451702594757, + "logits/chosen": -2.9198169708251953, + "logits/rejected": -2.9594712257385254, + "logps/chosen": -0.4238964915275574, + "logps/rejected": -1.4147742986679077, + "loss": 0.2765, + "nll_loss": 0.2593707740306854, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.021194826811552048, + "rewards/margins": 0.049543894827365875, + "rewards/rejected": -0.07073871791362762, + "step": 1520 + }, + { + "epoch": 1.6046145778710015, + "grad_norm": 2.2358254561438966, + "learning_rate": 5.113099925649136e-06, + "log_odds_chosen": 1.7420718669891357, + "log_odds_ratio": -0.2600535750389099, + "logits/chosen": -2.9620399475097656, + "logits/rejected": -2.997101068496704, + "logps/chosen": -0.4705958366394043, + "logps/rejected": -1.435579538345337, + "loss": 0.2766, + "nll_loss": 0.28323301672935486, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.023529794067144394, + "rewards/margins": 0.048249177634716034, + "rewards/rejected": -0.07177898287773132, + "step": 1530 + }, + { + "epoch": 1.6151022548505507, + "grad_norm": 2.123071067312132, + "learning_rate": 5.096471914376255e-06, + "log_odds_chosen": 2.0446419715881348, + "log_odds_ratio": -0.20973734557628632, + "logits/chosen": -2.8849668502807617, + "logits/rejected": -2.91094970703125, + "logps/chosen": -0.42269793152809143, + "logps/rejected": -1.4985077381134033, + "loss": 0.2842, + "nll_loss": 0.24874058365821838, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.021134894341230392, + "rewards/margins": 0.053790487349033356, + "rewards/rejected": -0.07492538541555405, + "step": 1540 + }, + { + "epoch": 1.6255899318300995, + "grad_norm": 1.8574119456068037, + "learning_rate": 5.08000508000762e-06, + "log_odds_chosen": 1.8896774053573608, + "log_odds_ratio": -0.2109728306531906, + "logits/chosen": -2.9518914222717285, + "logits/rejected": -2.9677398204803467, + "logps/chosen": -0.42254775762557983, + "logps/rejected": -1.4004069566726685, + "loss": 0.2737, + "nll_loss": 0.26676517724990845, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.02112739160656929, + "rewards/margins": 0.04889295622706413, + "rewards/rejected": -0.07002034783363342, + "step": 1550 + }, + { + "epoch": 1.6360776088096487, + "grad_norm": 2.012947859419835, + "learning_rate": 5.0636968354183334e-06, + "log_odds_chosen": 1.7877776622772217, + "log_odds_ratio": -0.2195170670747757, + "logits/chosen": -2.916713237762451, + "logits/rejected": -2.9442696571350098, + "logps/chosen": -0.4229874610900879, + "logps/rejected": -1.3620960712432861, + "loss": 0.2937, + "nll_loss": 0.28985968232154846, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.021149372681975365, + "rewards/margins": 0.046955425292253494, + "rewards/rejected": -0.06810478866100311, + "step": 1560 + }, + { + "epoch": 1.6465652857891977, + "grad_norm": 1.9554610757973563, + "learning_rate": 5.047544651250688e-06, + "log_odds_chosen": 1.9977741241455078, + "log_odds_ratio": -0.22808516025543213, + "logits/chosen": -2.95414137840271, + "logits/rejected": -2.9667911529541016, + "logps/chosen": -0.40563470125198364, + "logps/rejected": -1.493981122970581, + "loss": 0.2746, + "nll_loss": 0.25610029697418213, + "rewards/accuracies": 0.9312499761581421, + "rewards/chosen": -0.020281735807657242, + "rewards/margins": 0.05441732332110405, + "rewards/rejected": -0.07469905912876129, + "step": 1570 + }, + { + "epoch": 1.6570529627687467, + "grad_norm": 2.2417227837369094, + "learning_rate": 5.031546054266276e-06, + "log_odds_chosen": 1.8591692447662354, + "log_odds_ratio": -0.23143061995506287, + "logits/chosen": -3.0023272037506104, + "logits/rejected": -3.0128941535949707, + "logps/chosen": -0.46788668632507324, + "logps/rejected": -1.490392804145813, + "loss": 0.2962, + "nll_loss": 0.31111472845077515, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.02339433692395687, + "rewards/margins": 0.05112530663609505, + "rewards/rejected": -0.07451964914798737, + "step": 1580 + }, + { + "epoch": 1.667540639748296, + "grad_norm": 2.0152925811378846, + "learning_rate": 5.015698625755192e-06, + "log_odds_chosen": 1.9612891674041748, + "log_odds_ratio": -0.22349119186401367, + "logits/chosen": -2.9373695850372314, + "logits/rejected": -2.9659922122955322, + "logps/chosen": -0.40127071738243103, + "logps/rejected": -1.4479907751083374, + "loss": 0.2939, + "nll_loss": 0.2725040912628174, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.02006353810429573, + "rewards/margins": 0.05233600735664368, + "rewards/rejected": -0.07239954173564911, + "step": 1590 + }, + { + "epoch": 1.6780283167278447, + "grad_norm": 1.9355725247245243, + "learning_rate": 5e-06, + "log_odds_chosen": 1.8742882013320923, + "log_odds_ratio": -0.21055075526237488, + "logits/chosen": -2.9387471675872803, + "logits/rejected": -2.9844515323638916, + "logps/chosen": -0.43298736214637756, + "logps/rejected": -1.4716593027114868, + "loss": 0.276, + "nll_loss": 0.26002392172813416, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.02164936624467373, + "rewards/margins": 0.05193359777331352, + "rewards/rejected": -0.0735829621553421, + "step": 1600 + }, + { + "epoch": 1.6780283167278447, + "eval_log_odds_chosen": 0.4569767117500305, + "eval_log_odds_ratio": -0.6159732937812805, + "eval_logits/chosen": -2.932406187057495, + "eval_logits/rejected": -2.9357593059539795, + "eval_logps/chosen": -0.8832988142967224, + "eval_logps/rejected": -1.1878604888916016, + "eval_loss": 0.5367424488067627, + "eval_nll_loss": 0.5040929913520813, + "eval_rewards/accuracies": 0.6408730149269104, + "eval_rewards/chosen": -0.04416494444012642, + "eval_rewards/margins": 0.015228085219860077, + "eval_rewards/rejected": -0.0593930259346962, + "eval_runtime": 138.0302, + "eval_samples_per_second": 14.446, + "eval_steps_per_second": 0.456, + "step": 1600 + }, + { + "epoch": 1.688515993707394, + "grad_norm": 1.9448584897613828, + "learning_rate": 4.984447862792268e-06, + "log_odds_chosen": 2.0258474349975586, + "log_odds_ratio": -0.2537488639354706, + "logits/chosen": -2.9370341300964355, + "logits/rejected": -2.959137439727783, + "logps/chosen": -0.4205976128578186, + "logps/rejected": -1.51674485206604, + "loss": 0.2805, + "nll_loss": 0.2590489387512207, + "rewards/accuracies": 0.9312499761581421, + "rewards/chosen": -0.0210298802703619, + "rewards/margins": 0.054807353764772415, + "rewards/rejected": -0.07583723217248917, + "step": 1610 + }, + { + "epoch": 1.6990036706869427, + "grad_norm": 2.2985078763398503, + "learning_rate": 4.969039949999534e-06, + "log_odds_chosen": 1.9926655292510986, + "log_odds_ratio": -0.209347203373909, + "logits/chosen": -2.9543755054473877, + "logits/rejected": -2.979072093963623, + "logps/chosen": -0.4242986738681793, + "logps/rejected": -1.527527093887329, + "loss": 0.2829, + "nll_loss": 0.28810399770736694, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.021214932203292847, + "rewards/margins": 0.05516142398118973, + "rewards/rejected": -0.07637635618448257, + "step": 1620 + }, + { + "epoch": 1.709491347666492, + "grad_norm": 1.978508364107179, + "learning_rate": 4.9537740461807e-06, + "log_odds_chosen": 1.7989534139633179, + "log_odds_ratio": -0.22280922532081604, + "logits/chosen": -2.9272611141204834, + "logits/rejected": -2.933403968811035, + "logps/chosen": -0.4125545024871826, + "logps/rejected": -1.371010422706604, + "loss": 0.2723, + "nll_loss": 0.27273207902908325, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.02062772400677204, + "rewards/margins": 0.04792279377579689, + "rewards/rejected": -0.06855051219463348, + "step": 1630 + }, + { + "epoch": 1.719979024646041, + "grad_norm": 2.5294696595366375, + "learning_rate": 4.938647983247949e-06, + "log_odds_chosen": 1.8762280941009521, + "log_odds_ratio": -0.23052379488945007, + "logits/chosen": -2.9176859855651855, + "logits/rejected": -2.937653064727783, + "logps/chosen": -0.4308241307735443, + "logps/rejected": -1.4621460437774658, + "loss": 0.2707, + "nll_loss": 0.24837055802345276, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.021541204303503036, + "rewards/margins": 0.05156610533595085, + "rewards/rejected": -0.07310730963945389, + "step": 1640 + }, + { + "epoch": 1.73046670162559, + "grad_norm": 1.9845638290615137, + "learning_rate": 4.9236596391733095e-06, + "log_odds_chosen": 1.9353539943695068, + "log_odds_ratio": -0.22219491004943848, + "logits/chosen": -2.9324100017547607, + "logits/rejected": -2.9492199420928955, + "logps/chosen": -0.4047132134437561, + "logps/rejected": -1.447388768196106, + "loss": 0.2921, + "nll_loss": 0.2786787152290344, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.020235659554600716, + "rewards/margins": 0.05213377624750137, + "rewards/rejected": -0.07236944139003754, + "step": 1650 + }, + { + "epoch": 1.740954378605139, + "grad_norm": 2.1313335783196914, + "learning_rate": 4.9088069367381605e-06, + "log_odds_chosen": 1.9517314434051514, + "log_odds_ratio": -0.19579176604747772, + "logits/chosen": -2.9807212352752686, + "logits/rejected": -3.004951000213623, + "logps/chosen": -0.4060528874397278, + "logps/rejected": -1.4121928215026855, + "loss": 0.2851, + "nll_loss": 0.27768373489379883, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.020302647724747658, + "rewards/margins": 0.05030699446797371, + "rewards/rejected": -0.07060963660478592, + "step": 1660 + }, + { + "epoch": 1.751442055584688, + "grad_norm": 1.893515732849545, + "learning_rate": 4.894087842323964e-06, + "log_odds_chosen": 1.8834346532821655, + "log_odds_ratio": -0.20945528149604797, + "logits/chosen": -2.9691452980041504, + "logits/rejected": -3.0074009895324707, + "logps/chosen": -0.4027465283870697, + "logps/rejected": -1.374361276626587, + "loss": 0.2926, + "nll_loss": 0.26718848943710327, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.020137326791882515, + "rewards/margins": 0.04858074709773064, + "rewards/rejected": -0.0687180757522583, + "step": 1670 + }, + { + "epoch": 1.7619297325642371, + "grad_norm": 2.0915190498544263, + "learning_rate": 4.8795003647426654e-06, + "log_odds_chosen": 1.8165385723114014, + "log_odds_ratio": -0.21812555193901062, + "logits/chosen": -3.0662589073181152, + "logits/rejected": -3.089877128601074, + "logps/chosen": -0.40138545632362366, + "logps/rejected": -1.3200931549072266, + "loss": 0.2998, + "nll_loss": 0.29331129789352417, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.020069271326065063, + "rewards/margins": 0.04593539237976074, + "rewards/rejected": -0.06600465625524521, + "step": 1680 + }, + { + "epoch": 1.772417409543786, + "grad_norm": 2.1457501870245417, + "learning_rate": 4.865042554105199e-06, + "log_odds_chosen": 1.869539499282837, + "log_odds_ratio": -0.2280159890651703, + "logits/chosen": -2.991488456726074, + "logits/rejected": -2.98630690574646, + "logps/chosen": -0.4090718626976013, + "logps/rejected": -1.36448073387146, + "loss": 0.2858, + "nll_loss": 0.2776942253112793, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.020453594624996185, + "rewards/margins": 0.04777044430375099, + "rewards/rejected": -0.06822402775287628, + "step": 1690 + }, + { + "epoch": 1.7829050865233351, + "grad_norm": 2.3665022543070093, + "learning_rate": 4.850712500726659e-06, + "log_odds_chosen": 1.9791815280914307, + "log_odds_ratio": -0.19878429174423218, + "logits/chosen": -2.9824297428131104, + "logits/rejected": -3.022101640701294, + "logps/chosen": -0.4144412875175476, + "logps/rejected": -1.4597278833389282, + "loss": 0.2715, + "nll_loss": 0.28446242213249207, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.02072206512093544, + "rewards/margins": 0.052264340221881866, + "rewards/rejected": -0.07298640161752701, + "step": 1700 + }, + { + "epoch": 1.7829050865233351, + "eval_log_odds_chosen": 0.4425116777420044, + "eval_log_odds_ratio": -0.6271889209747314, + "eval_logits/chosen": -3.019425392150879, + "eval_logits/rejected": -3.020922899246216, + "eval_logps/chosen": -0.8710321187973022, + "eval_logps/rejected": -1.1603412628173828, + "eval_loss": 0.5348805785179138, + "eval_nll_loss": 0.5024282336235046, + "eval_rewards/accuracies": 0.6448412537574768, + "eval_rewards/chosen": -0.04355160519480705, + "eval_rewards/margins": 0.014465462416410446, + "eval_rewards/rejected": -0.0580170638859272, + "eval_runtime": 136.3216, + "eval_samples_per_second": 14.627, + "eval_steps_per_second": 0.462, + "step": 1700 + }, + { + "epoch": 1.7933927635028841, + "grad_norm": 1.847904822728325, + "learning_rate": 4.836508334066745e-06, + "log_odds_chosen": 1.9795688390731812, + "log_odds_ratio": -0.2207694798707962, + "logits/chosen": -3.0054497718811035, + "logits/rejected": -3.0154829025268555, + "logps/chosen": -0.4081927239894867, + "logps/rejected": -1.4390795230865479, + "loss": 0.264, + "nll_loss": 0.24716749787330627, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.020409639924764633, + "rewards/margins": 0.051544345915317535, + "rewards/rejected": -0.07195398211479187, + "step": 1710 + }, + { + "epoch": 1.8038804404824331, + "grad_norm": 1.7750027737169987, + "learning_rate": 4.822428221704122e-06, + "log_odds_chosen": 1.926945686340332, + "log_odds_ratio": -0.22434870898723602, + "logits/chosen": -3.0268912315368652, + "logits/rejected": -3.035226583480835, + "logps/chosen": -0.43201422691345215, + "logps/rejected": -1.498827576637268, + "loss": 0.2864, + "nll_loss": 0.25820285081863403, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.021600713953375816, + "rewards/margins": 0.053340665996074677, + "rewards/rejected": -0.07494138181209564, + "step": 1720 + }, + { + "epoch": 1.8143681174619821, + "grad_norm": 2.0662716537028354, + "learning_rate": 4.8084703683434506e-06, + "log_odds_chosen": 1.974784255027771, + "log_odds_ratio": -0.21157677471637726, + "logits/chosen": -3.010627031326294, + "logits/rejected": -2.9982268810272217, + "logps/chosen": -0.4355824589729309, + "logps/rejected": -1.5232689380645752, + "loss": 0.2903, + "nll_loss": 0.2755037248134613, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.021779123693704605, + "rewards/margins": 0.05438433215022087, + "rewards/rejected": -0.07616344839334488, + "step": 1730 + }, + { + "epoch": 1.8248557944415311, + "grad_norm": 2.1360074988574445, + "learning_rate": 4.794633014853843e-06, + "log_odds_chosen": 1.847333312034607, + "log_odds_ratio": -0.2377551794052124, + "logits/chosen": -3.006833553314209, + "logits/rejected": -3.0122854709625244, + "logps/chosen": -0.4366019368171692, + "logps/rejected": -1.4164003133773804, + "loss": 0.304, + "nll_loss": 0.29017573595046997, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.02183009497821331, + "rewards/margins": 0.0489899218082428, + "rewards/rejected": -0.07082001864910126, + "step": 1740 + }, + { + "epoch": 1.8353434714210803, + "grad_norm": 1.9891927691131213, + "learning_rate": 4.780914437337575e-06, + "log_odds_chosen": 1.8539154529571533, + "log_odds_ratio": -0.23103201389312744, + "logits/chosen": -2.9830121994018555, + "logits/rejected": -2.9818801879882812, + "logps/chosen": -0.4274306297302246, + "logps/rejected": -1.4196858406066895, + "loss": 0.2958, + "nll_loss": 0.2937518060207367, + "rewards/accuracies": 0.956250011920929, + "rewards/chosen": -0.02137153223156929, + "rewards/margins": 0.049612756818532944, + "rewards/rejected": -0.07098428905010223, + "step": 1750 + }, + { + "epoch": 1.8458311484006291, + "grad_norm": 1.827588117065436, + "learning_rate": 4.767312946227961e-06, + "log_odds_chosen": 2.2149860858917236, + "log_odds_ratio": -0.2075362503528595, + "logits/chosen": -2.9530441761016846, + "logits/rejected": -2.9839682579040527, + "logps/chosen": -0.391355037689209, + "logps/rejected": -1.6375446319580078, + "loss": 0.2721, + "nll_loss": 0.2694031000137329, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.01956775411963463, + "rewards/margins": 0.06230948120355606, + "rewards/rejected": -0.08187723159790039, + "step": 1760 + }, + { + "epoch": 1.8563188253801783, + "grad_norm": 1.8203811521479276, + "learning_rate": 4.7538268854152834e-06, + "log_odds_chosen": 1.7995598316192627, + "log_odds_ratio": -0.244699165225029, + "logits/chosen": -3.011706829071045, + "logits/rejected": -3.024837017059326, + "logps/chosen": -0.4394347071647644, + "logps/rejected": -1.4033676385879517, + "loss": 0.2771, + "nll_loss": 0.25858861207962036, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.02197173610329628, + "rewards/margins": 0.04819665104150772, + "rewards/rejected": -0.0701683908700943, + "step": 1770 + }, + { + "epoch": 1.8668065023597273, + "grad_norm": 2.2623646165216313, + "learning_rate": 4.740454631399773e-06, + "log_odds_chosen": 1.962255835533142, + "log_odds_ratio": -0.23438410460948944, + "logits/chosen": -2.949073314666748, + "logits/rejected": -2.989229202270508, + "logps/chosen": -0.3985145688056946, + "logps/rejected": -1.4544894695281982, + "loss": 0.2941, + "nll_loss": 0.29249390959739685, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.019925730302929878, + "rewards/margins": 0.052798740565776825, + "rewards/rejected": -0.07272447645664215, + "step": 1780 + }, + { + "epoch": 1.8772941793392763, + "grad_norm": 2.5104520915032538, + "learning_rate": 4.727194592470656e-06, + "log_odds_chosen": 2.0800955295562744, + "log_odds_ratio": -0.19981749355793, + "logits/chosen": -2.9771628379821777, + "logits/rejected": -3.0005829334259033, + "logps/chosen": -0.42085084319114685, + "logps/rejected": -1.603994607925415, + "loss": 0.2844, + "nll_loss": 0.2677140235900879, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.02104254439473152, + "rewards/margins": 0.05915718153119087, + "rewards/rejected": -0.08019973337650299, + "step": 1790 + }, + { + "epoch": 1.8877818563188253, + "grad_norm": 2.077913541951449, + "learning_rate": 4.714045207910318e-06, + "log_odds_chosen": 2.1426799297332764, + "log_odds_ratio": -0.18838170170783997, + "logits/chosen": -2.950552463531494, + "logits/rejected": -2.9804420471191406, + "logps/chosen": -0.41320332884788513, + "logps/rejected": -1.622671365737915, + "loss": 0.2717, + "nll_loss": 0.2544669210910797, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.020660167559981346, + "rewards/margins": 0.060473401099443436, + "rewards/rejected": -0.08113356679677963, + "step": 1800 + }, + { + "epoch": 1.8877818563188253, + "eval_log_odds_chosen": 0.48237088322639465, + "eval_log_odds_ratio": -0.6183955669403076, + "eval_logits/chosen": -2.9562783241271973, + "eval_logits/rejected": -2.957892894744873, + "eval_logps/chosen": -0.8997318148612976, + "eval_logps/rejected": -1.2325206995010376, + "eval_loss": 0.5340895652770996, + "eval_nll_loss": 0.5023403763771057, + "eval_rewards/accuracies": 0.6547619104385376, + "eval_rewards/chosen": -0.04498659446835518, + "eval_rewards/margins": 0.01663944497704506, + "eval_rewards/rejected": -0.06162603944540024, + "eval_runtime": 136.1464, + "eval_samples_per_second": 14.646, + "eval_steps_per_second": 0.463, + "step": 1800 + }, + { + "epoch": 1.8982695332983743, + "grad_norm": 1.896252578291677, + "learning_rate": 4.701004947222685e-06, + "log_odds_chosen": 2.0811541080474854, + "log_odds_ratio": -0.20500631630420685, + "logits/chosen": -3.000387668609619, + "logits/rejected": -2.983591079711914, + "logps/chosen": -0.4098430573940277, + "logps/rejected": -1.608665108680725, + "loss": 0.2794, + "nll_loss": 0.25453388690948486, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.020492153242230415, + "rewards/margins": 0.05994110181927681, + "rewards/rejected": -0.08043324947357178, + "step": 1810 + }, + { + "epoch": 1.9087572102779236, + "grad_norm": 2.019085371673625, + "learning_rate": 4.688072309384955e-06, + "log_odds_chosen": 2.0144619941711426, + "log_odds_ratio": -0.2020682841539383, + "logits/chosen": -2.9534127712249756, + "logits/rejected": -2.9533755779266357, + "logps/chosen": -0.3999931216239929, + "logps/rejected": -1.4992988109588623, + "loss": 0.2775, + "nll_loss": 0.26274845004081726, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.019999656826257706, + "rewards/margins": 0.054965294897556305, + "rewards/rejected": -0.07496494799852371, + "step": 1820 + }, + { + "epoch": 1.9192448872574723, + "grad_norm": 1.9263871107241788, + "learning_rate": 4.675245822121844e-06, + "log_odds_chosen": 2.0367493629455566, + "log_odds_ratio": -0.20607483386993408, + "logits/chosen": -2.9868836402893066, + "logits/rejected": -3.000213861465454, + "logps/chosen": -0.4244080185890198, + "logps/rejected": -1.5761488676071167, + "loss": 0.2923, + "nll_loss": 0.2808459997177124, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.02122039906680584, + "rewards/margins": 0.057587046176195145, + "rewards/rejected": -0.07880743592977524, + "step": 1830 + }, + { + "epoch": 1.9297325642370216, + "grad_norm": 2.1487838733941365, + "learning_rate": 4.662524041201569e-06, + "log_odds_chosen": 2.0472216606140137, + "log_odds_ratio": -0.22086529433727264, + "logits/chosen": -2.9925904273986816, + "logits/rejected": -2.985816240310669, + "logps/chosen": -0.4373515248298645, + "logps/rejected": -1.5831472873687744, + "loss": 0.2713, + "nll_loss": 0.2551635801792145, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.021867576986551285, + "rewards/margins": 0.057289790362119675, + "rewards/rejected": -0.07915736734867096, + "step": 1840 + }, + { + "epoch": 1.9402202412165706, + "grad_norm": 2.0463386352717112, + "learning_rate": 4.649905549752772e-06, + "log_odds_chosen": 2.1467113494873047, + "log_odds_ratio": -0.21497011184692383, + "logits/chosen": -2.938457727432251, + "logits/rejected": -2.9367523193359375, + "logps/chosen": -0.4192470610141754, + "logps/rejected": -1.63271164894104, + "loss": 0.2767, + "nll_loss": 0.2981775999069214, + "rewards/accuracies": 0.9312499761581421, + "rewards/chosen": -0.02096235193312168, + "rewards/margins": 0.06067322567105293, + "rewards/rejected": -0.08163557946681976, + "step": 1850 + }, + { + "epoch": 1.9507079181961196, + "grad_norm": 1.9930187660935812, + "learning_rate": 4.6373889576016826e-06, + "log_odds_chosen": 2.145296573638916, + "log_odds_ratio": -0.19072812795639038, + "logits/chosen": -2.9529764652252197, + "logits/rejected": -2.960404634475708, + "logps/chosen": -0.407731294631958, + "logps/rejected": -1.5777407884597778, + "loss": 0.2761, + "nll_loss": 0.2852553129196167, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.02038656547665596, + "rewards/margins": 0.05850047990679741, + "rewards/rejected": -0.07888703793287277, + "step": 1860 + }, + { + "epoch": 1.9611955951756685, + "grad_norm": 2.0042665222271756, + "learning_rate": 4.624972900628803e-06, + "log_odds_chosen": 2.0522494316101074, + "log_odds_ratio": -0.20059652626514435, + "logits/chosen": -2.932502269744873, + "logits/rejected": -2.9307363033294678, + "logps/chosen": -0.4203645586967468, + "logps/rejected": -1.5539976358413696, + "loss": 0.276, + "nll_loss": 0.2738272547721863, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.02101822756230831, + "rewards/margins": 0.05668165162205696, + "rewards/rejected": -0.07769988477230072, + "step": 1870 + }, + { + "epoch": 1.9716832721552175, + "grad_norm": 2.0226547316915258, + "learning_rate": 4.6126560401444256e-06, + "log_odds_chosen": 2.0710301399230957, + "log_odds_ratio": -0.19392071664333344, + "logits/chosen": -3.015066623687744, + "logits/rejected": -2.99493145942688, + "logps/chosen": -0.43072837591171265, + "logps/rejected": -1.6065874099731445, + "loss": 0.2748, + "nll_loss": 0.2821330428123474, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.02153642103075981, + "rewards/margins": 0.05879295617341995, + "rewards/rejected": -0.08032937347888947, + "step": 1880 + }, + { + "epoch": 1.9821709491347668, + "grad_norm": 2.567857697275732, + "learning_rate": 4.600437062282362e-06, + "log_odds_chosen": 1.9227994680404663, + "log_odds_ratio": -0.2224545031785965, + "logits/chosen": -3.0251965522766113, + "logits/rejected": -2.993910789489746, + "logps/chosen": -0.4456098675727844, + "logps/rejected": -1.529626488685608, + "loss": 0.2788, + "nll_loss": 0.28787270188331604, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.0222804956138134, + "rewards/margins": 0.054200828075408936, + "rewards/rejected": -0.07648131996393204, + "step": 1890 + }, + { + "epoch": 1.9926586261143155, + "grad_norm": 2.1545883447921654, + "learning_rate": 4.588314677411235e-06, + "log_odds_chosen": 2.2162415981292725, + "log_odds_ratio": -0.20383968949317932, + "logits/chosen": -3.039658784866333, + "logits/rejected": -3.022245407104492, + "logps/chosen": -0.420427143573761, + "logps/rejected": -1.6983455419540405, + "loss": 0.2857, + "nll_loss": 0.24534273147583008, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.02102135680615902, + "rewards/margins": 0.06389592587947845, + "rewards/rejected": -0.08491728454828262, + "step": 1900 + }, + { + "epoch": 1.9926586261143155, + "eval_log_odds_chosen": 0.48923251032829285, + "eval_log_odds_ratio": -0.6193312406539917, + "eval_logits/chosen": -3.0350046157836914, + "eval_logits/rejected": -3.0279133319854736, + "eval_logps/chosen": -0.908783495426178, + "eval_logps/rejected": -1.2409300804138184, + "eval_loss": 0.5407980680465698, + "eval_nll_loss": 0.5090586543083191, + "eval_rewards/accuracies": 0.6547619104385376, + "eval_rewards/chosen": -0.04543917626142502, + "eval_rewards/margins": 0.016607332974672318, + "eval_rewards/rejected": -0.062046512961387634, + "eval_runtime": 137.1653, + "eval_samples_per_second": 14.537, + "eval_steps_per_second": 0.459, + "step": 1900 + }, + { + "epoch": 2.0031463030938648, + "grad_norm": 2.4971175632899385, + "learning_rate": 4.576287619562756e-06, + "log_odds_chosen": 2.549215793609619, + "log_odds_ratio": -0.13884183764457703, + "logits/chosen": -3.0293986797332764, + "logits/rejected": -3.0052542686462402, + "logps/chosen": -0.3389069139957428, + "logps/rejected": -1.6784775257110596, + "loss": 0.2535, + "nll_loss": 0.2399848997592926, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.01694534718990326, + "rewards/margins": 0.06697852909564972, + "rewards/rejected": -0.08392388373613358, + "step": 1910 + }, + { + "epoch": 2.0136339800734135, + "grad_norm": 2.5031224034871475, + "learning_rate": 4.564354645876385e-06, + "log_odds_chosen": 4.333657741546631, + "log_odds_ratio": -0.02762582339346409, + "logits/chosen": -2.869049549102783, + "logits/rejected": -2.8186068534851074, + "logps/chosen": -0.1433320939540863, + "logps/rejected": -2.334181547164917, + "loss": 0.1236, + "nll_loss": 0.11940746009349823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007166605442762375, + "rewards/margins": 0.10954247415065765, + "rewards/rejected": -0.11670909076929092, + "step": 1920 + }, + { + "epoch": 2.0241216570529628, + "grad_norm": 1.9586057770651872, + "learning_rate": 4.552514536059854e-06, + "log_odds_chosen": 3.8062407970428467, + "log_odds_ratio": -0.0499381422996521, + "logits/chosen": -2.9369876384735107, + "logits/rejected": -2.963967800140381, + "logps/chosen": -0.1607118844985962, + "logps/rejected": -1.9827187061309814, + "loss": 0.116, + "nll_loss": 0.11325522512197495, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.00803559459745884, + "rewards/margins": 0.09110033512115479, + "rewards/rejected": -0.09913593530654907, + "step": 1930 + }, + { + "epoch": 2.034609334032512, + "grad_norm": 2.173705177159571, + "learning_rate": 4.540766091864998e-06, + "log_odds_chosen": 3.9211831092834473, + "log_odds_ratio": -0.03853369504213333, + "logits/chosen": -2.848071575164795, + "logits/rejected": -2.927175760269165, + "logps/chosen": -0.14356736838817596, + "logps/rejected": -1.959979772567749, + "loss": 0.1167, + "nll_loss": 0.11882974952459335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007178368978202343, + "rewards/margins": 0.09082063287496567, + "rewards/rejected": -0.09799900650978088, + "step": 1940 + }, + { + "epoch": 2.0450970110120608, + "grad_norm": 1.7557144572827617, + "learning_rate": 4.529108136578383e-06, + "log_odds_chosen": 4.060091495513916, + "log_odds_ratio": -0.028795290738344193, + "logits/chosen": -2.8138527870178223, + "logits/rejected": -2.8606162071228027, + "logps/chosen": -0.13301293551921844, + "logps/rejected": -2.0062737464904785, + "loss": 0.1151, + "nll_loss": 0.1191815584897995, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006650646682828665, + "rewards/margins": 0.09366302937269211, + "rewards/rejected": -0.10031367838382721, + "step": 1950 + }, + { + "epoch": 2.05558468799161, + "grad_norm": 1.69960315567237, + "learning_rate": 4.517539514526257e-06, + "log_odds_chosen": 4.352217674255371, + "log_odds_ratio": -0.03757786005735397, + "logits/chosen": -2.819655656814575, + "logits/rejected": -2.8428378105163574, + "logps/chosen": -0.14081783592700958, + "logps/rejected": -2.33030104637146, + "loss": 0.1135, + "nll_loss": 0.11204487085342407, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007040892727673054, + "rewards/margins": 0.10947415977716446, + "rewards/rejected": -0.11651506274938583, + "step": 1960 + }, + { + "epoch": 2.0660723649711588, + "grad_norm": 1.991621297994473, + "learning_rate": 4.506059090593329e-06, + "log_odds_chosen": 4.156961917877197, + "log_odds_ratio": -0.0386335626244545, + "logits/chosen": -2.8222968578338623, + "logits/rejected": -2.880376100540161, + "logps/chosen": -0.15631213784217834, + "logps/rejected": -2.2803502082824707, + "loss": 0.1083, + "nll_loss": 0.11318318545818329, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007815606892108917, + "rewards/margins": 0.1062019094824791, + "rewards/rejected": -0.11401752382516861, + "step": 1970 + }, + { + "epoch": 2.076560041950708, + "grad_norm": 1.8671392728507943, + "learning_rate": 4.4946657497549474e-06, + "log_odds_chosen": 4.751786708831787, + "log_odds_ratio": -0.02287628874182701, + "logits/chosen": -2.8250374794006348, + "logits/rejected": -2.858389377593994, + "logps/chosen": -0.136850968003273, + "logps/rejected": -2.61843204498291, + "loss": 0.1149, + "nll_loss": 0.11261866241693497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006842548493295908, + "rewards/margins": 0.12407903373241425, + "rewards/rejected": -0.13092158734798431, + "step": 1980 + }, + { + "epoch": 2.0870477189302568, + "grad_norm": 2.047221073846021, + "learning_rate": 4.483358396622204e-06, + "log_odds_chosen": 4.551729202270508, + "log_odds_ratio": -0.029045408591628075, + "logits/chosen": -2.8212010860443115, + "logits/rejected": -2.863682270050049, + "logps/chosen": -0.13936151564121246, + "logps/rejected": -2.4473021030426025, + "loss": 0.1129, + "nll_loss": 0.11166741698980331, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006968076340854168, + "rewards/margins": 0.11539702117443085, + "rewards/rejected": -0.12236510217189789, + "step": 1990 + }, + { + "epoch": 2.097535395909806, + "grad_norm": 2.1099833794179723, + "learning_rate": 4.47213595499958e-06, + "log_odds_chosen": 4.558366298675537, + "log_odds_ratio": -0.01906474307179451, + "logits/chosen": -2.8424153327941895, + "logits/rejected": -2.877136707305908, + "logps/chosen": -0.14121726155281067, + "logps/rejected": -2.4738833904266357, + "loss": 0.1137, + "nll_loss": 0.1110328808426857, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0070608630776405334, + "rewards/margins": 0.11663329601287842, + "rewards/rejected": -0.12369415909051895, + "step": 2000 + }, + { + "epoch": 2.097535395909806, + "eval_log_odds_chosen": 0.5767443776130676, + "eval_log_odds_ratio": -0.6272528171539307, + "eval_logits/chosen": -2.87036395072937, + "eval_logits/rejected": -2.881497383117676, + "eval_logps/chosen": -1.2408413887023926, + "eval_logps/rejected": -1.6761136054992676, + "eval_loss": 0.6877180337905884, + "eval_nll_loss": 0.6538823843002319, + "eval_rewards/accuracies": 0.670634925365448, + "eval_rewards/chosen": -0.06204206869006157, + "eval_rewards/margins": 0.021763615310192108, + "eval_rewards/rejected": -0.08380568027496338, + "eval_runtime": 137.068, + "eval_samples_per_second": 14.548, + "eval_steps_per_second": 0.46, + "step": 2000 + }, + { + "epoch": 2.108023072889355, + "grad_norm": 1.7758830781899906, + "learning_rate": 4.4609973674547055e-06, + "log_odds_chosen": 4.593904495239258, + "log_odds_ratio": -0.033291045576334, + "logits/chosen": -2.856330394744873, + "logits/rejected": -2.8690733909606934, + "logps/chosen": -0.1400183141231537, + "logps/rejected": -2.536652088165283, + "loss": 0.1039, + "nll_loss": 0.10139288008213043, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007000915706157684, + "rewards/margins": 0.11983168125152588, + "rewards/rejected": -0.12683258950710297, + "step": 2010 + }, + { + "epoch": 2.118510749868904, + "grad_norm": 2.6416736862275076, + "learning_rate": 4.449941594899848e-06, + "log_odds_chosen": 4.607335090637207, + "log_odds_ratio": -0.028559138998389244, + "logits/chosen": -2.7992746829986572, + "logits/rejected": -2.8301546573638916, + "logps/chosen": -0.14062660932540894, + "logps/rejected": -2.5437684059143066, + "loss": 0.1201, + "nll_loss": 0.1216670423746109, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007031330373138189, + "rewards/margins": 0.12015708535909653, + "rewards/rejected": -0.12718841433525085, + "step": 2020 + }, + { + "epoch": 2.128998426848453, + "grad_norm": 2.094070218470564, + "learning_rate": 4.438967616184754e-06, + "log_odds_chosen": 4.340805530548096, + "log_odds_ratio": -0.027936171740293503, + "logits/chosen": -2.823608875274658, + "logits/rejected": -2.8253750801086426, + "logps/chosen": -0.13957419991493225, + "logps/rejected": -2.268900156021118, + "loss": 0.1108, + "nll_loss": 0.1126783937215805, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006978710647672415, + "rewards/margins": 0.10646629333496094, + "rewards/rejected": -0.11344502121210098, + "step": 2030 + }, + { + "epoch": 2.139486103828002, + "grad_norm": 2.222098137194295, + "learning_rate": 4.428074427700477e-06, + "log_odds_chosen": 4.698141098022461, + "log_odds_ratio": -0.02707051672041416, + "logits/chosen": -2.8169960975646973, + "logits/rejected": -2.8297157287597656, + "logps/chosen": -0.1413937509059906, + "logps/rejected": -2.65130877494812, + "loss": 0.1166, + "nll_loss": 0.11614535748958588, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007069687359035015, + "rewards/margins": 0.1254957616329193, + "rewards/rejected": -0.1325654536485672, + "step": 2040 + }, + { + "epoch": 2.149973780807551, + "grad_norm": 2.1988466339750317, + "learning_rate": 4.417261042993862e-06, + "log_odds_chosen": 4.824273109436035, + "log_odds_ratio": -0.022720973938703537, + "logits/chosen": -2.8039610385894775, + "logits/rejected": -2.795748710632324, + "logps/chosen": -0.12069626152515411, + "logps/rejected": -2.613525390625, + "loss": 0.1113, + "nll_loss": 0.10357411205768585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006034812889993191, + "rewards/margins": 0.12464147806167603, + "rewards/rejected": -0.1306762993335724, + "step": 2050 + }, + { + "epoch": 2.1604614577871, + "grad_norm": 1.9312492998690272, + "learning_rate": 4.406526492392318e-06, + "log_odds_chosen": 4.532221794128418, + "log_odds_ratio": -0.025564473122358322, + "logits/chosen": -2.856283664703369, + "logits/rejected": -2.847923994064331, + "logps/chosen": -0.15458881855010986, + "logps/rejected": -2.556361198425293, + "loss": 0.1171, + "nll_loss": 0.1105358749628067, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007729442324489355, + "rewards/margins": 0.1200886145234108, + "rewards/rejected": -0.1278180480003357, + "step": 2060 + }, + { + "epoch": 2.170949134766649, + "grad_norm": 2.184212774032157, + "learning_rate": 4.39586982263858e-06, + "log_odds_chosen": 4.760067462921143, + "log_odds_ratio": -0.025417357683181763, + "logits/chosen": -2.8176796436309814, + "logits/rejected": -2.818103313446045, + "logps/chosen": -0.15180301666259766, + "logps/rejected": -2.774660110473633, + "loss": 0.1148, + "nll_loss": 0.11588319391012192, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007590149994939566, + "rewards/margins": 0.13114285469055176, + "rewards/rejected": -0.13873299956321716, + "step": 2070 + }, + { + "epoch": 2.1814368117461984, + "grad_norm": 2.151555777196694, + "learning_rate": 4.385290096535147e-06, + "log_odds_chosen": 4.732907772064209, + "log_odds_ratio": -0.026212304830551147, + "logits/chosen": -2.859835147857666, + "logits/rejected": -2.857645034790039, + "logps/chosen": -0.13824030756950378, + "logps/rejected": -2.6506001949310303, + "loss": 0.1132, + "nll_loss": 0.11115143448114395, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.006912014447152615, + "rewards/margins": 0.12561801075935364, + "rewards/rejected": -0.13253000378608704, + "step": 2080 + }, + { + "epoch": 2.191924488725747, + "grad_norm": 3.2431795321399486, + "learning_rate": 4.374786392598072e-06, + "log_odds_chosen": 4.578325271606445, + "log_odds_ratio": -0.03994257375597954, + "logits/chosen": -2.8212687969207764, + "logits/rejected": -2.7516632080078125, + "logps/chosen": -0.1504596322774887, + "logps/rejected": -2.5710039138793945, + "loss": 0.1095, + "nll_loss": 0.10720662772655487, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007522981613874435, + "rewards/margins": 0.12102720886468887, + "rewards/rejected": -0.128550186753273, + "step": 2090 + }, + { + "epoch": 2.2024121657052964, + "grad_norm": 2.6693753745610076, + "learning_rate": 4.364357804719848e-06, + "log_odds_chosen": 4.707537651062012, + "log_odds_ratio": -0.025204619392752647, + "logits/chosen": -2.798999309539795, + "logits/rejected": -2.794037342071533, + "logps/chosen": -0.15521793067455292, + "logps/rejected": -2.689946174621582, + "loss": 0.1192, + "nll_loss": 0.12550954520702362, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007760896347463131, + "rewards/margins": 0.12673643231391907, + "rewards/rejected": -0.13449731469154358, + "step": 2100 + }, + { + "epoch": 2.2024121657052964, + "eval_log_odds_chosen": 0.6958096623420715, + "eval_log_odds_ratio": -0.6209548115730286, + "eval_logits/chosen": -2.837247610092163, + "eval_logits/rejected": -2.8433148860931396, + "eval_logps/chosen": -1.4121639728546143, + "eval_logps/rejected": -1.9619879722595215, + "eval_loss": 0.7576995491981506, + "eval_nll_loss": 0.7199162244796753, + "eval_rewards/accuracies": 0.6726190447807312, + "eval_rewards/chosen": -0.07060819864273071, + "eval_rewards/margins": 0.027491191402077675, + "eval_rewards/rejected": -0.09809939563274384, + "eval_runtime": 136.9058, + "eval_samples_per_second": 14.565, + "eval_steps_per_second": 0.46, + "step": 2100 + }, + { + "epoch": 2.212899842684845, + "grad_norm": 1.7712476287108132, + "learning_rate": 4.354003441841081e-06, + "log_odds_chosen": 4.905824184417725, + "log_odds_ratio": -0.02992095984518528, + "logits/chosen": -2.8259618282318115, + "logits/rejected": -2.760521650314331, + "logps/chosen": -0.13811610639095306, + "logps/rejected": -2.7983617782592773, + "loss": 0.1173, + "nll_loss": 0.12010955810546875, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.006905805319547653, + "rewards/margins": 0.13301227986812592, + "rewards/rejected": -0.13991808891296387, + "step": 2110 + }, + { + "epoch": 2.2233875196643944, + "grad_norm": 1.6446106852737563, + "learning_rate": 4.3437224276306945e-06, + "log_odds_chosen": 4.906925201416016, + "log_odds_ratio": -0.017224887385964394, + "logits/chosen": -2.838736057281494, + "logits/rejected": -2.8536746501922607, + "logps/chosen": -0.16129423677921295, + "logps/rejected": -2.8627591133117676, + "loss": 0.1147, + "nll_loss": 0.12654295563697815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008064712397754192, + "rewards/margins": 0.1350732445716858, + "rewards/rejected": -0.14313796162605286, + "step": 2120 + }, + { + "epoch": 2.233875196643943, + "grad_norm": 1.7769911595186116, + "learning_rate": 4.333513900174396e-06, + "log_odds_chosen": 4.821990966796875, + "log_odds_ratio": -0.026227790862321854, + "logits/chosen": -2.829463481903076, + "logits/rejected": -2.842454433441162, + "logps/chosen": -0.1390562653541565, + "logps/rejected": -2.760815143585205, + "loss": 0.1215, + "nll_loss": 0.11114709079265594, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.0069528138265013695, + "rewards/margins": 0.13108794391155243, + "rewards/rejected": -0.13804076611995697, + "step": 2130 + }, + { + "epoch": 2.2443628736234924, + "grad_norm": 2.186831361943043, + "learning_rate": 4.32337701167117e-06, + "log_odds_chosen": 5.350895881652832, + "log_odds_ratio": -0.0246684979647398, + "logits/chosen": -2.872166156768799, + "logits/rejected": -2.8550028800964355, + "logps/chosen": -0.13888207077980042, + "logps/rejected": -3.2091636657714844, + "loss": 0.1143, + "nll_loss": 0.11629905551671982, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.006944102700799704, + "rewards/margins": 0.1535140872001648, + "rewards/rejected": -0.16045819222927094, + "step": 2140 + }, + { + "epoch": 2.2548505506030416, + "grad_norm": 2.2764409350931345, + "learning_rate": 4.313310928137537e-06, + "log_odds_chosen": 4.80722713470459, + "log_odds_ratio": -0.025547053664922714, + "logits/chosen": -2.8291611671447754, + "logits/rejected": -2.858245849609375, + "logps/chosen": -0.15937599539756775, + "logps/rejected": -2.8679497241973877, + "loss": 0.1185, + "nll_loss": 0.11574534326791763, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007968800142407417, + "rewards/margins": 0.13542868196964264, + "rewards/rejected": -0.1433974802494049, + "step": 2150 + }, + { + "epoch": 2.2653382275825904, + "grad_norm": 2.239980255447614, + "learning_rate": 4.303314829119352e-06, + "log_odds_chosen": 5.589659690856934, + "log_odds_ratio": -0.020419184118509293, + "logits/chosen": -2.905287981033325, + "logits/rejected": -2.966031551361084, + "logps/chosen": -0.1542571783065796, + "logps/rejected": -3.551201581954956, + "loss": 0.1236, + "nll_loss": 0.11697031557559967, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007712858729064465, + "rewards/margins": 0.16984722018241882, + "rewards/rejected": -0.17756007611751556, + "step": 2160 + }, + { + "epoch": 2.2758259045621396, + "grad_norm": 2.009942820215124, + "learning_rate": 4.293387907410919e-06, + "log_odds_chosen": 6.170254707336426, + "log_odds_ratio": -0.017188329249620438, + "logits/chosen": -2.848698139190674, + "logits/rejected": -2.945160388946533, + "logps/chosen": -0.13800857961177826, + "logps/rejected": -4.000069618225098, + "loss": 0.1137, + "nll_loss": 0.11105845123529434, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.006900429725646973, + "rewards/margins": 0.19310306012630463, + "rewards/rejected": -0.2000034749507904, + "step": 2170 + }, + { + "epoch": 2.2863135815416884, + "grad_norm": 2.1918079846574567, + "learning_rate": 4.2835293687811935e-06, + "log_odds_chosen": 6.479376316070557, + "log_odds_ratio": -0.010083029977977276, + "logits/chosen": -2.7919399738311768, + "logits/rejected": -2.9110770225524902, + "logps/chosen": -0.1471458077430725, + "logps/rejected": -4.402917385101318, + "loss": 0.1149, + "nll_loss": 0.12062163650989532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007357291877269745, + "rewards/margins": 0.21278861165046692, + "rewards/rejected": -0.22014589607715607, + "step": 2180 + }, + { + "epoch": 2.2968012585212376, + "grad_norm": 1.9268306821517742, + "learning_rate": 4.273738431706883e-06, + "log_odds_chosen": 6.724373817443848, + "log_odds_ratio": -0.018149670213460922, + "logits/chosen": -2.891892194747925, + "logits/rejected": -3.004826784133911, + "logps/chosen": -0.15707895159721375, + "logps/rejected": -4.773315906524658, + "loss": 0.1119, + "nll_loss": 0.10733366012573242, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007853945717215538, + "rewards/margins": 0.23081183433532715, + "rewards/rejected": -0.23866574466228485, + "step": 2190 + }, + { + "epoch": 2.3072889355007864, + "grad_norm": 1.9131867908425575, + "learning_rate": 4.264014327112208e-06, + "log_odds_chosen": 6.2542595863342285, + "log_odds_ratio": -0.015775460749864578, + "logits/chosen": -2.862001419067383, + "logits/rejected": -2.91827654838562, + "logps/chosen": -0.14461472630500793, + "logps/rejected": -4.159193515777588, + "loss": 0.1178, + "nll_loss": 0.12322264909744263, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007230737246572971, + "rewards/margins": 0.20072893798351288, + "rewards/rejected": -0.20795968174934387, + "step": 2200 + }, + { + "epoch": 2.3072889355007864, + "eval_log_odds_chosen": 1.1627599000930786, + "eval_log_odds_ratio": -0.7777736783027649, + "eval_logits/chosen": -2.887819766998291, + "eval_logits/rejected": -2.9106638431549072, + "eval_logps/chosen": -2.4108457565307617, + "eval_logps/rejected": -3.4342026710510254, + "eval_loss": 1.1761772632598877, + "eval_nll_loss": 1.1196904182434082, + "eval_rewards/accuracies": 0.6527777910232544, + "eval_rewards/chosen": -0.12054230272769928, + "eval_rewards/margins": 0.051167842000722885, + "eval_rewards/rejected": -0.17171014845371246, + "eval_runtime": 137.1423, + "eval_samples_per_second": 14.54, + "eval_steps_per_second": 0.459, + "step": 2200 + }, + { + "epoch": 2.3177766124803356, + "grad_norm": 2.1121501905853624, + "learning_rate": 4.254356298115171e-06, + "log_odds_chosen": 6.363844394683838, + "log_odds_ratio": -0.024754000827670097, + "logits/chosen": -2.8908374309539795, + "logits/rejected": -2.9566292762756348, + "logps/chosen": -0.15381646156311035, + "logps/rejected": -4.287047386169434, + "loss": 0.1181, + "nll_loss": 0.12711365520954132, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007690823636949062, + "rewards/margins": 0.20666155219078064, + "rewards/rejected": -0.21435236930847168, + "step": 2210 + }, + { + "epoch": 2.3282642894598844, + "grad_norm": 3.84884286912148, + "learning_rate": 4.24476359978009e-06, + "log_odds_chosen": 5.530186176300049, + "log_odds_ratio": -0.017865758389234543, + "logits/chosen": -2.8787178993225098, + "logits/rejected": -2.9533944129943848, + "logps/chosen": -0.1436866670846939, + "logps/rejected": -3.488823652267456, + "loss": 0.1234, + "nll_loss": 0.11815366894006729, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0071843331679701805, + "rewards/margins": 0.16725686192512512, + "rewards/rejected": -0.17444118857383728, + "step": 2220 + }, + { + "epoch": 2.3387519664394336, + "grad_norm": 2.417106329176298, + "learning_rate": 4.235235498876268e-06, + "log_odds_chosen": 5.049867630004883, + "log_odds_ratio": -0.030804011970758438, + "logits/chosen": -2.8601975440979004, + "logits/rejected": -2.919813632965088, + "logps/chosen": -0.16016361117362976, + "logps/rejected": -3.108591079711914, + "loss": 0.1205, + "nll_loss": 0.12257065623998642, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.008008181117475033, + "rewards/margins": 0.14742138981819153, + "rewards/rejected": -0.15542957186698914, + "step": 2230 + }, + { + "epoch": 2.349239643418983, + "grad_norm": 2.0311020060176737, + "learning_rate": 4.2257712736425835e-06, + "log_odds_chosen": 6.287697792053223, + "log_odds_ratio": -0.03303173556923866, + "logits/chosen": -2.8431243896484375, + "logits/rejected": -2.987511396408081, + "logps/chosen": -0.15092086791992188, + "logps/rejected": -4.205324649810791, + "loss": 0.119, + "nll_loss": 0.11937984079122543, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.0075460439547896385, + "rewards/margins": 0.20272019505500793, + "rewards/rejected": -0.21026620268821716, + "step": 2240 + }, + { + "epoch": 2.3597273203985316, + "grad_norm": 1.8184108922544404, + "learning_rate": 4.216370213557839e-06, + "log_odds_chosen": 6.489804267883301, + "log_odds_ratio": -0.017738422378897667, + "logits/chosen": -2.8637566566467285, + "logits/rejected": -2.9882349967956543, + "logps/chosen": -0.1367037147283554, + "logps/rejected": -4.3643412590026855, + "loss": 0.1103, + "nll_loss": 0.10625318437814713, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006835184991359711, + "rewards/margins": 0.21138188242912292, + "rewards/rejected": -0.21821708977222443, + "step": 2250 + }, + { + "epoch": 2.370214997378081, + "grad_norm": 1.9927993897844196, + "learning_rate": 4.207031619116713e-06, + "log_odds_chosen": 6.5232744216918945, + "log_odds_ratio": -0.02112133800983429, + "logits/chosen": -2.888134002685547, + "logits/rejected": -2.9766697883605957, + "logps/chosen": -0.13985328376293182, + "logps/rejected": -4.443106174468994, + "loss": 0.1119, + "nll_loss": 0.10387493669986725, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.006992665119469166, + "rewards/margins": 0.21516263484954834, + "rewards/rejected": -0.22215530276298523, + "step": 2260 + }, + { + "epoch": 2.3807026743576296, + "grad_norm": 1.9179118979680037, + "learning_rate": 4.197754801611136e-06, + "log_odds_chosen": 7.000714302062988, + "log_odds_ratio": -0.01941884122788906, + "logits/chosen": -2.8880743980407715, + "logits/rejected": -3.0280842781066895, + "logps/chosen": -0.1594962626695633, + "logps/rejected": -4.991673946380615, + "loss": 0.1187, + "nll_loss": 0.12734182178974152, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007974812760949135, + "rewards/margins": 0.2416088581085205, + "rewards/rejected": -0.2495836764574051, + "step": 2270 + }, + { + "epoch": 2.391190351337179, + "grad_norm": 1.7656016453383905, + "learning_rate": 4.188539082916955e-06, + "log_odds_chosen": 5.81030797958374, + "log_odds_ratio": -0.02714763581752777, + "logits/chosen": -2.858682155609131, + "logits/rejected": -2.961153030395508, + "logps/chosen": -0.1495695412158966, + "logps/rejected": -3.7413382530212402, + "loss": 0.117, + "nll_loss": 0.1129683405160904, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007478476967662573, + "rewards/margins": 0.1795884370803833, + "rewards/rejected": -0.18706689774990082, + "step": 2280 + }, + { + "epoch": 2.401678028316728, + "grad_norm": 1.7721263332581463, + "learning_rate": 4.179383795285729e-06, + "log_odds_chosen": 6.099682807922363, + "log_odds_ratio": -0.016452614217996597, + "logits/chosen": -2.8671703338623047, + "logits/rejected": -2.94566011428833, + "logps/chosen": -0.1470957249403, + "logps/rejected": -4.025435447692871, + "loss": 0.1162, + "nll_loss": 0.1030157208442688, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007354786153882742, + "rewards/margins": 0.19391697645187378, + "rewards/rejected": -0.20127174258232117, + "step": 2290 + }, + { + "epoch": 2.412165705296277, + "grad_norm": 6.518126509500433, + "learning_rate": 4.170288281141496e-06, + "log_odds_chosen": 5.677874565124512, + "log_odds_ratio": -0.02623058296740055, + "logits/chosen": -2.8755476474761963, + "logits/rejected": -2.926180362701416, + "logps/chosen": -0.15929332375526428, + "logps/rejected": -3.627763271331787, + "loss": 0.1184, + "nll_loss": 0.12096776813268661, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007964666932821274, + "rewards/margins": 0.17342346906661987, + "rewards/rejected": -0.18138816952705383, + "step": 2300 + }, + { + "epoch": 2.412165705296277, + "eval_log_odds_chosen": 1.3232934474945068, + "eval_log_odds_ratio": -1.0561914443969727, + "eval_logits/chosen": -2.9102423191070557, + "eval_logits/rejected": -2.9226319789886475, + "eval_logps/chosen": -3.8695833683013916, + "eval_logps/rejected": -5.081162452697754, + "eval_loss": 1.8519541025161743, + "eval_nll_loss": 1.7541913986206055, + "eval_rewards/accuracies": 0.636904776096344, + "eval_rewards/chosen": -0.19347918033599854, + "eval_rewards/margins": 0.06057893857359886, + "eval_rewards/rejected": -0.2540581226348877, + "eval_runtime": 140.6912, + "eval_samples_per_second": 14.173, + "eval_steps_per_second": 0.448, + "step": 2300 + }, + { + "epoch": 2.422653382275826, + "grad_norm": 2.1350280555835317, + "learning_rate": 4.1612518928823956e-06, + "log_odds_chosen": 5.239171028137207, + "log_odds_ratio": -0.0356699600815773, + "logits/chosen": -2.8127808570861816, + "logits/rejected": -2.847365140914917, + "logps/chosen": -0.17353428900241852, + "logps/rejected": -3.4219677448272705, + "loss": 0.1197, + "nll_loss": 0.12273728847503662, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.008676714263856411, + "rewards/margins": 0.16242167353630066, + "rewards/rejected": -0.17109838128089905, + "step": 2310 + }, + { + "epoch": 2.433141059255375, + "grad_norm": 2.142764154815985, + "learning_rate": 4.1522739926869985e-06, + "log_odds_chosen": 7.10500431060791, + "log_odds_ratio": -0.02759629487991333, + "logits/chosen": -2.8841793537139893, + "logits/rejected": -2.979490280151367, + "logps/chosen": -0.15857262909412384, + "logps/rejected": -5.118218898773193, + "loss": 0.1179, + "nll_loss": 0.11995577812194824, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007928632199764252, + "rewards/margins": 0.24798233807086945, + "rewards/rejected": -0.2559109628200531, + "step": 2320 + }, + { + "epoch": 2.443628736234924, + "grad_norm": 2.442748493026814, + "learning_rate": 4.143353952325209e-06, + "log_odds_chosen": 6.4824538230896, + "log_odds_ratio": -0.03863966092467308, + "logits/chosen": -2.8798575401306152, + "logits/rejected": -2.975369691848755, + "logps/chosen": -0.16273298859596252, + "logps/rejected": -4.518317222595215, + "loss": 0.1144, + "nll_loss": 0.11924872547388077, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.008136649616062641, + "rewards/margins": 0.21777920424938202, + "rewards/rejected": -0.22591586410999298, + "step": 2330 + }, + { + "epoch": 2.454116413214473, + "grad_norm": 1.7906952084031593, + "learning_rate": 4.134491152973616e-06, + "log_odds_chosen": 6.330552101135254, + "log_odds_ratio": -0.019993215799331665, + "logits/chosen": -2.903748035430908, + "logits/rejected": -2.961629629135132, + "logps/chosen": -0.1506245732307434, + "logps/rejected": -4.29229736328125, + "loss": 0.1162, + "nll_loss": 0.11873211711645126, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.0075312284752726555, + "rewards/margins": 0.20708362758159637, + "rewards/rejected": -0.2146148979663849, + "step": 2340 + }, + { + "epoch": 2.464604090194022, + "grad_norm": 2.709543224621687, + "learning_rate": 4.125684985035174e-06, + "log_odds_chosen": 6.674917697906494, + "log_odds_ratio": -0.02191847935318947, + "logits/chosen": -2.869702100753784, + "logits/rejected": -2.9517292976379395, + "logps/chosen": -0.14587149024009705, + "logps/rejected": -4.594050407409668, + "loss": 0.1189, + "nll_loss": 0.11958177387714386, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007293573580682278, + "rewards/margins": 0.2224089354276657, + "rewards/rejected": -0.22970251739025116, + "step": 2350 + }, + { + "epoch": 2.475091767173571, + "grad_norm": 1.9596617726605967, + "learning_rate": 4.116934847963092e-06, + "log_odds_chosen": 6.008196830749512, + "log_odds_ratio": -0.020748203620314598, + "logits/chosen": -2.859504222869873, + "logits/rejected": -2.9086391925811768, + "logps/chosen": -0.1603454202413559, + "logps/rejected": -4.055342674255371, + "loss": 0.1137, + "nll_loss": 0.11717329174280167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008017271757125854, + "rewards/margins": 0.1947498619556427, + "rewards/rejected": -0.20276716351509094, + "step": 2360 + }, + { + "epoch": 2.48557944415312, + "grad_norm": 25.11227763431921, + "learning_rate": 4.1082401500888055e-06, + "log_odds_chosen": 6.279742240905762, + "log_odds_ratio": -0.01569024845957756, + "logits/chosen": -2.916944742202759, + "logits/rejected": -2.987224578857422, + "logps/chosen": -0.14050395786762238, + "logps/rejected": -4.152866363525391, + "loss": 0.1189, + "nll_loss": 0.10722777992486954, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007025198079645634, + "rewards/margins": 0.20061811804771423, + "rewards/rejected": -0.2076433151960373, + "step": 2370 + }, + { + "epoch": 2.4960671211326693, + "grad_norm": 1.757332945919827, + "learning_rate": 4.099600308453939e-06, + "log_odds_chosen": 6.39632511138916, + "log_odds_ratio": -0.023090779781341553, + "logits/chosen": -2.8743884563446045, + "logits/rejected": -2.9668736457824707, + "logps/chosen": -0.15729930996894836, + "logps/rejected": -4.314006328582764, + "loss": 0.1177, + "nll_loss": 0.1209021583199501, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007864965125918388, + "rewards/margins": 0.2078353613615036, + "rewards/rejected": -0.21570034325122833, + "step": 2380 + }, + { + "epoch": 2.506554798112218, + "grad_norm": 2.0524680636282056, + "learning_rate": 4.091014748646132e-06, + "log_odds_chosen": 5.9223713874816895, + "log_odds_ratio": -0.030582841485738754, + "logits/chosen": -2.8992161750793457, + "logits/rejected": -2.929603099822998, + "logps/chosen": -0.1705484390258789, + "logps/rejected": -4.027953147888184, + "loss": 0.1189, + "nll_loss": 0.10802364349365234, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.008527422323822975, + "rewards/margins": 0.19287024438381195, + "rewards/rejected": -0.20139765739440918, + "step": 2390 + }, + { + "epoch": 2.5170424750917673, + "grad_norm": 1.7245638696745784, + "learning_rate": 4.082482904638631e-06, + "log_odds_chosen": 6.324474811553955, + "log_odds_ratio": -0.018949782475829124, + "logits/chosen": -2.8749866485595703, + "logits/rejected": -2.9224321842193604, + "logps/chosen": -0.1520567536354065, + "logps/rejected": -4.290619850158691, + "loss": 0.1172, + "nll_loss": 0.12284000217914581, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.00760283786803484, + "rewards/margins": 0.20692817866802216, + "rewards/rejected": -0.21453101933002472, + "step": 2400 + }, + { + "epoch": 2.5170424750917673, + "eval_log_odds_chosen": 1.0075438022613525, + "eval_log_odds_ratio": -0.8145382404327393, + "eval_logits/chosen": -2.8560779094696045, + "eval_logits/rejected": -2.871006965637207, + "eval_logps/chosen": -2.0024044513702393, + "eval_logps/rejected": -2.8670685291290283, + "eval_loss": 1.01926589012146, + "eval_nll_loss": 0.9735569357872009, + "eval_rewards/accuracies": 0.6408730149269104, + "eval_rewards/chosen": -0.10012022405862808, + "eval_rewards/margins": 0.043233200907707214, + "eval_rewards/rejected": -0.1433534324169159, + "eval_runtime": 138.4847, + "eval_samples_per_second": 14.399, + "eval_steps_per_second": 0.455, + "step": 2400 + }, + { + "epoch": 2.527530152071316, + "grad_norm": 2.140192470773612, + "learning_rate": 4.074004218633553e-06, + "log_odds_chosen": 6.169337272644043, + "log_odds_ratio": -0.024398522451519966, + "logits/chosen": -2.8802199363708496, + "logits/rejected": -2.9575634002685547, + "logps/chosen": -0.14228537678718567, + "logps/rejected": -4.140218257904053, + "loss": 0.1204, + "nll_loss": 0.10762319713830948, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.0071142688393592834, + "rewards/margins": 0.1998966485261917, + "rewards/rejected": -0.20701093971729279, + "step": 2410 + }, + { + "epoch": 2.5380178290508653, + "grad_norm": 1.9307036538867832, + "learning_rate": 4.065578140908709e-06, + "log_odds_chosen": 6.545037269592285, + "log_odds_ratio": -0.020819999277591705, + "logits/chosen": -2.826190948486328, + "logits/rejected": -2.9180386066436768, + "logps/chosen": -0.15343733131885529, + "logps/rejected": -4.550530433654785, + "loss": 0.1292, + "nll_loss": 0.12483732402324677, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007671866565942764, + "rewards/margins": 0.2198546677827835, + "rewards/rejected": -0.22752651572227478, + "step": 2420 + }, + { + "epoch": 2.5485055060304145, + "grad_norm": 2.472322893814309, + "learning_rate": 4.057204129667897e-06, + "log_odds_chosen": 6.510749816894531, + "log_odds_ratio": -0.017572391778230667, + "logits/chosen": -2.8476340770721436, + "logits/rejected": -2.9206082820892334, + "logps/chosen": -0.1623007208108902, + "logps/rejected": -4.547110557556152, + "loss": 0.114, + "nll_loss": 0.11619551479816437, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.00811503641307354, + "rewards/margins": 0.21924051642417908, + "rewards/rejected": -0.22735556960105896, + "step": 2430 + }, + { + "epoch": 2.5589931830099633, + "grad_norm": 3.562558849555077, + "learning_rate": 4.048881650894581e-06, + "log_odds_chosen": 7.486746311187744, + "log_odds_ratio": -0.012338453903794289, + "logits/chosen": -2.8392252922058105, + "logits/rejected": -2.924240827560425, + "logps/chosen": -0.15012109279632568, + "logps/rejected": -5.4815144538879395, + "loss": 0.1213, + "nll_loss": 0.12608163058757782, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007506055291742086, + "rewards/margins": 0.26656967401504517, + "rewards/rejected": -0.2740757167339325, + "step": 2440 + }, + { + "epoch": 2.5694808599895125, + "grad_norm": 2.3252293901649193, + "learning_rate": 4.040610178208843e-06, + "log_odds_chosen": 7.7740631103515625, + "log_odds_ratio": -0.0118449367582798, + "logits/chosen": -2.795551061630249, + "logits/rejected": -2.8945860862731934, + "logps/chosen": -0.1522868573665619, + "logps/rejected": -5.739714622497559, + "loss": 0.1145, + "nll_loss": 0.11489256471395493, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007614342961460352, + "rewards/margins": 0.27937138080596924, + "rewards/rejected": -0.28698569536209106, + "step": 2450 + }, + { + "epoch": 2.5799685369690613, + "grad_norm": 2.0157957603988175, + "learning_rate": 4.032389192727559e-06, + "log_odds_chosen": 6.265582084655762, + "log_odds_ratio": -0.024669019505381584, + "logits/chosen": -2.85023832321167, + "logits/rejected": -2.8876233100891113, + "logps/chosen": -0.150896817445755, + "logps/rejected": -4.219937324523926, + "loss": 0.1277, + "nll_loss": 0.12799417972564697, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007544840686023235, + "rewards/margins": 0.20345202088356018, + "rewards/rejected": -0.2109968364238739, + "step": 2460 + }, + { + "epoch": 2.5904562139486105, + "grad_norm": 2.287376161767263, + "learning_rate": 4.024218182927669e-06, + "log_odds_chosen": 6.810778617858887, + "log_odds_ratio": -0.013128559105098248, + "logits/chosen": -2.823387622833252, + "logits/rejected": -2.879467487335205, + "logps/chosen": -0.15397700667381287, + "logps/rejected": -4.72897481918335, + "loss": 0.1209, + "nll_loss": 0.12541964650154114, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.0076988511718809605, + "rewards/margins": 0.22874990105628967, + "rewards/rejected": -0.236448734998703, + "step": 2470 + }, + { + "epoch": 2.6009438909281593, + "grad_norm": 2.2717126345189547, + "learning_rate": 4.016096644512495e-06, + "log_odds_chosen": 6.199719429016113, + "log_odds_ratio": -0.018437180668115616, + "logits/chosen": -2.8248672485351562, + "logits/rejected": -2.8656277656555176, + "logps/chosen": -0.14331553876399994, + "logps/rejected": -4.071486949920654, + "loss": 0.1196, + "nll_loss": 0.11505875736474991, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.00716577610000968, + "rewards/margins": 0.19640859961509705, + "rewards/rejected": -0.20357437431812286, + "step": 2480 + }, + { + "epoch": 2.6114315679077085, + "grad_norm": 2.1379482021716036, + "learning_rate": 4.008024080281012e-06, + "log_odds_chosen": 7.395205497741699, + "log_odds_ratio": -0.01522077340632677, + "logits/chosen": -2.8720109462738037, + "logits/rejected": -2.936903476715088, + "logps/chosen": -0.13911715149879456, + "logps/rejected": -5.221936225891113, + "loss": 0.12, + "nll_loss": 0.12369368225336075, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.0069558583199977875, + "rewards/margins": 0.2541409730911255, + "rewards/rejected": -0.2610968351364136, + "step": 2490 + }, + { + "epoch": 2.6219192448872572, + "grad_norm": 1.7439578923515293, + "learning_rate": 4.000000000000001e-06, + "log_odds_chosen": 8.536567687988281, + "log_odds_ratio": -0.02061418630182743, + "logits/chosen": -2.854001760482788, + "logits/rejected": -2.9489758014678955, + "logps/chosen": -0.1588824838399887, + "logps/rejected": -6.567204475402832, + "loss": 0.1109, + "nll_loss": 0.11326327174901962, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007944123819470406, + "rewards/margins": 0.32041609287261963, + "rewards/rejected": -0.3283601999282837, + "step": 2500 + }, + { + "epoch": 2.6219192448872572, + "eval_log_odds_chosen": 1.0766297578811646, + "eval_log_odds_ratio": -0.9767945408821106, + "eval_logits/chosen": -2.8457064628601074, + "eval_logits/rejected": -2.857062339782715, + "eval_logps/chosen": -2.4182989597320557, + "eval_logps/rejected": -3.354691743850708, + "eval_loss": 1.2049823999404907, + "eval_nll_loss": 1.172393560409546, + "eval_rewards/accuracies": 0.6329365372657776, + "eval_rewards/chosen": -0.12091495096683502, + "eval_rewards/margins": 0.046819645911455154, + "eval_rewards/rejected": -0.1677345633506775, + "eval_runtime": 137.7801, + "eval_samples_per_second": 14.472, + "eval_steps_per_second": 0.457, + "step": 2500 + }, + { + "epoch": 2.6324069218668065, + "grad_norm": 3.8704567483353496, + "learning_rate": 3.992023920278996e-06, + "log_odds_chosen": 6.979190826416016, + "log_odds_ratio": -0.018384801223874092, + "logits/chosen": -2.8529200553894043, + "logits/rejected": -2.923466920852661, + "logps/chosen": -0.14472463726997375, + "logps/rejected": -4.871707916259766, + "loss": 0.1127, + "nll_loss": 0.1109754890203476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007236232049763203, + "rewards/margins": 0.23634913563728333, + "rewards/rejected": -0.24358537793159485, + "step": 2510 + }, + { + "epoch": 2.6428945988463557, + "grad_norm": 2.0243407054263933, + "learning_rate": 3.984095364447979e-06, + "log_odds_chosen": 6.955283164978027, + "log_odds_ratio": -0.026280570775270462, + "logits/chosen": -2.845829486846924, + "logits/rejected": -2.9166336059570312, + "logps/chosen": -0.1561572551727295, + "logps/rejected": -4.968081474304199, + "loss": 0.1245, + "nll_loss": 0.11139287799596786, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.0078078629449009895, + "rewards/margins": 0.2405962496995926, + "rewards/rejected": -0.2484041005373001, + "step": 2520 + }, + { + "epoch": 2.6533822758259045, + "grad_norm": 2.159445384644007, + "learning_rate": 3.97621386243772e-06, + "log_odds_chosen": 8.654619216918945, + "log_odds_ratio": -0.015728970989584923, + "logits/chosen": -2.815493583679199, + "logits/rejected": -2.9511656761169434, + "logps/chosen": -0.1413796991109848, + "logps/rejected": -6.552220821380615, + "loss": 0.1201, + "nll_loss": 0.11258909851312637, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007068985607475042, + "rewards/margins": 0.32054203748703003, + "rewards/rejected": -0.3276110291481018, + "step": 2530 + }, + { + "epoch": 2.6638699528054537, + "grad_norm": 2.5062335927036123, + "learning_rate": 3.9683789506627254e-06, + "log_odds_chosen": 7.7274370193481445, + "log_odds_ratio": -0.020870521664619446, + "logits/chosen": -2.8319153785705566, + "logits/rejected": -2.922696113586426, + "logps/chosen": -0.15536390244960785, + "logps/rejected": -5.693093776702881, + "loss": 0.1181, + "nll_loss": 0.10906670987606049, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007768194191157818, + "rewards/margins": 0.2768864631652832, + "rewards/rejected": -0.2846546769142151, + "step": 2540 + }, + { + "epoch": 2.6743576297850025, + "grad_norm": 1.970994291017683, + "learning_rate": 3.960590171906698e-06, + "log_odds_chosen": 7.434384822845459, + "log_odds_ratio": -0.023785177618265152, + "logits/chosen": -2.7982025146484375, + "logits/rejected": -2.8931427001953125, + "logps/chosen": -0.16477976739406586, + "logps/rejected": -5.395650386810303, + "loss": 0.1221, + "nll_loss": 0.13674572110176086, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.008238988928496838, + "rewards/margins": 0.2615435719490051, + "rewards/rejected": -0.26978254318237305, + "step": 2550 + }, + { + "epoch": 2.6848453067645517, + "grad_norm": 2.0205686734736594, + "learning_rate": 3.952847075210474e-06, + "log_odds_chosen": 7.365771293640137, + "log_odds_ratio": -0.01570904441177845, + "logits/chosen": -2.866798162460327, + "logits/rejected": -2.959561347961426, + "logps/chosen": -0.14348378777503967, + "logps/rejected": -5.177813529968262, + "loss": 0.1204, + "nll_loss": 0.12037654966115952, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007174189202487469, + "rewards/margins": 0.2517164647579193, + "rewards/rejected": -0.25889068841934204, + "step": 2560 + }, + { + "epoch": 2.695332983744101, + "grad_norm": 1.8761709200806869, + "learning_rate": 3.9451492157623585e-06, + "log_odds_chosen": 8.670493125915527, + "log_odds_ratio": -0.011763294227421284, + "logits/chosen": -2.8013434410095215, + "logits/rejected": -2.920924425125122, + "logps/chosen": -0.16095298528671265, + "logps/rejected": -6.665195465087891, + "loss": 0.1166, + "nll_loss": 0.13346998393535614, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008047649636864662, + "rewards/margins": 0.32521215081214905, + "rewards/rejected": -0.3332597613334656, + "step": 2570 + }, + { + "epoch": 2.7058206607236497, + "grad_norm": 2.1285971867573408, + "learning_rate": 3.937496154790789e-06, + "log_odds_chosen": 7.294459342956543, + "log_odds_ratio": -0.018316376954317093, + "logits/chosen": -2.816880702972412, + "logits/rejected": -2.8812124729156494, + "logps/chosen": -0.13620439171791077, + "logps/rejected": -5.142992973327637, + "loss": 0.1195, + "nll_loss": 0.10606805980205536, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.0068102204240858555, + "rewards/margins": 0.25033941864967346, + "rewards/rejected": -0.2571496367454529, + "step": 2580 + }, + { + "epoch": 2.716308337703199, + "grad_norm": 2.400899470701997, + "learning_rate": 3.9298874594592975e-06, + "log_odds_chosen": 8.10938549041748, + "log_odds_ratio": -0.016252661123871803, + "logits/chosen": -2.807111978530884, + "logits/rejected": -2.915724515914917, + "logps/chosen": -0.15417781472206116, + "logps/rejected": -6.080683708190918, + "loss": 0.1163, + "nll_loss": 0.11585485935211182, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007708890829235315, + "rewards/margins": 0.2963252663612366, + "rewards/rejected": -0.30403420329093933, + "step": 2590 + }, + { + "epoch": 2.7267960146827477, + "grad_norm": 3.318597907364317, + "learning_rate": 3.922322702763682e-06, + "log_odds_chosen": 8.183881759643555, + "log_odds_ratio": -0.021557733416557312, + "logits/chosen": -2.8544585704803467, + "logits/rejected": -2.9738879203796387, + "logps/chosen": -0.14029571413993835, + "logps/rejected": -6.104724884033203, + "loss": 0.1238, + "nll_loss": 0.11269497871398926, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007014785893261433, + "rewards/margins": 0.2982214391231537, + "rewards/rejected": -0.30523625016212463, + "step": 2600 + }, + { + "epoch": 2.7267960146827477, + "eval_log_odds_chosen": 1.6673794984817505, + "eval_log_odds_ratio": -1.6934312582015991, + "eval_logits/chosen": -2.9804697036743164, + "eval_logits/rejected": -2.996739387512207, + "eval_logps/chosen": -6.072526454925537, + "eval_logps/rejected": -7.644432067871094, + "eval_loss": 2.6922054290771484, + "eval_nll_loss": 2.6498186588287354, + "eval_rewards/accuracies": 0.5873016119003296, + "eval_rewards/chosen": -0.30362632870674133, + "eval_rewards/margins": 0.07859525829553604, + "eval_rewards/rejected": -0.38222160935401917, + "eval_runtime": 136.8599, + "eval_samples_per_second": 14.57, + "eval_steps_per_second": 0.46, + "step": 2600 + }, + { + "epoch": 2.737283691662297, + "grad_norm": 2.23878079697452, + "learning_rate": 3.914801463431357e-06, + "log_odds_chosen": 7.083222389221191, + "log_odds_ratio": -0.02951228991150856, + "logits/chosen": -2.8593714237213135, + "logits/rejected": -2.9374592304229736, + "logps/chosen": -0.14687521755695343, + "logps/rejected": -5.056353569030762, + "loss": 0.1245, + "nll_loss": 0.11392644792795181, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007343760691583157, + "rewards/margins": 0.24547390639781952, + "rewards/rejected": -0.25281769037246704, + "step": 2610 + }, + { + "epoch": 2.7477713686418457, + "grad_norm": 3.0293992863459636, + "learning_rate": 3.907323325822818e-06, + "log_odds_chosen": 5.10004997253418, + "log_odds_ratio": -0.032727014273405075, + "logits/chosen": -2.780730962753296, + "logits/rejected": -2.8234589099884033, + "logps/chosen": -0.14557409286499023, + "logps/rejected": -3.112699031829834, + "loss": 0.1196, + "nll_loss": 0.1244465708732605, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007278704084455967, + "rewards/margins": 0.14835625886917114, + "rewards/rejected": -0.15563495457172394, + "step": 2620 + }, + { + "epoch": 2.758259045621395, + "grad_norm": 2.2549688272537094, + "learning_rate": 3.8998878798351596e-06, + "log_odds_chosen": 5.7140727043151855, + "log_odds_ratio": -0.026816044002771378, + "logits/chosen": -2.864112377166748, + "logits/rejected": -2.8956217765808105, + "logps/chosen": -0.14010892808437347, + "logps/rejected": -3.677777051925659, + "loss": 0.1148, + "nll_loss": 0.11140565574169159, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007005447056144476, + "rewards/margins": 0.17688342928886414, + "rewards/rejected": -0.18388888239860535, + "step": 2630 + }, + { + "epoch": 2.7687467226009437, + "grad_norm": 2.3361581110737384, + "learning_rate": 3.892494720807615e-06, + "log_odds_chosen": 6.5437517166137695, + "log_odds_ratio": -0.02287450060248375, + "logits/chosen": -2.835170269012451, + "logits/rejected": -2.904600143432617, + "logps/chosen": -0.15383225679397583, + "logps/rejected": -4.582453727722168, + "loss": 0.1163, + "nll_loss": 0.1210094466805458, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007691613398492336, + "rewards/margins": 0.22143109142780304, + "rewards/rejected": -0.22912268340587616, + "step": 2640 + }, + { + "epoch": 2.779234399580493, + "grad_norm": 2.113727988806721, + "learning_rate": 3.885143449429057e-06, + "log_odds_chosen": 8.709664344787598, + "log_odds_ratio": -0.01187268365174532, + "logits/chosen": -2.8075308799743652, + "logits/rejected": -2.8737902641296387, + "logps/chosen": -0.15384691953659058, + "logps/rejected": -6.678023338317871, + "loss": 0.1126, + "nll_loss": 0.11222463846206665, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007692346815019846, + "rewards/margins": 0.32620885968208313, + "rewards/rejected": -0.33390119671821594, + "step": 2650 + }, + { + "epoch": 2.789722076560042, + "grad_norm": 2.1767794366513376, + "learning_rate": 3.877833671647406e-06, + "log_odds_chosen": 7.380768775939941, + "log_odds_ratio": -0.028077024966478348, + "logits/chosen": -2.793292999267578, + "logits/rejected": -2.8911733627319336, + "logps/chosen": -0.15328237414360046, + "logps/rejected": -5.426938533782959, + "loss": 0.1168, + "nll_loss": 0.11543625593185425, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.0076641179621219635, + "rewards/margins": 0.26368287205696106, + "rewards/rejected": -0.27134692668914795, + "step": 2660 + }, + { + "epoch": 2.800209753539591, + "grad_norm": 2.256877035979117, + "learning_rate": 3.870564998580918e-06, + "log_odds_chosen": 8.639537811279297, + "log_odds_ratio": -0.022679299116134644, + "logits/chosen": -2.811685085296631, + "logits/rejected": -2.9056103229522705, + "logps/chosen": -0.15335455536842346, + "logps/rejected": -6.6522955894470215, + "loss": 0.1172, + "nll_loss": 0.1345623880624771, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.007667726371437311, + "rewards/margins": 0.3249470591545105, + "rewards/rejected": -0.3326147794723511, + "step": 2670 + }, + { + "epoch": 2.81069743051914, + "grad_norm": 2.0730722454139485, + "learning_rate": 3.863337046431279e-06, + "log_odds_chosen": 6.9750657081604, + "log_odds_ratio": -0.025320613756775856, + "logits/chosen": -2.7947394847869873, + "logits/rejected": -2.846017360687256, + "logps/chosen": -0.13509753346443176, + "logps/rejected": -4.8464508056640625, + "loss": 0.1193, + "nll_loss": 0.10888632386922836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006754877511411905, + "rewards/margins": 0.23556765913963318, + "rewards/rejected": -0.24232256412506104, + "step": 2680 + }, + { + "epoch": 2.821185107498689, + "grad_norm": 1.9858072033613254, + "learning_rate": 3.8561494363984955e-06, + "log_odds_chosen": 9.771112442016602, + "log_odds_ratio": -0.013731351122260094, + "logits/chosen": -2.8062682151794434, + "logits/rejected": -2.9753849506378174, + "logps/chosen": -0.14906486868858337, + "logps/rejected": -7.731194496154785, + "loss": 0.1179, + "nll_loss": 0.11920718103647232, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007453243248164654, + "rewards/margins": 0.37910646200180054, + "rewards/rejected": -0.38655975461006165, + "step": 2690 + }, + { + "epoch": 2.831672784478238, + "grad_norm": 1.6847580595509726, + "learning_rate": 3.849001794597506e-06, + "log_odds_chosen": 7.8019118309021, + "log_odds_ratio": -0.019792212173342705, + "logits/chosen": -2.8470611572265625, + "logits/rejected": -2.9447550773620605, + "logps/chosen": -0.15314054489135742, + "logps/rejected": -5.769678115844727, + "loss": 0.1192, + "nll_loss": 0.11755287647247314, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007657027803361416, + "rewards/margins": 0.2808268666267395, + "rewards/rejected": -0.2884839177131653, + "step": 2700 + }, + { + "epoch": 2.831672784478238, + "eval_log_odds_chosen": 1.020140528678894, + "eval_log_odds_ratio": -0.950748860836029, + "eval_logits/chosen": -2.866152763366699, + "eval_logits/rejected": -2.883617877960205, + "eval_logps/chosen": -2.3778645992279053, + "eval_logps/rejected": -3.2670860290527344, + "eval_loss": 1.2390626668930054, + "eval_nll_loss": 1.1910258531570435, + "eval_rewards/accuracies": 0.625, + "eval_rewards/chosen": -0.11889322102069855, + "eval_rewards/margins": 0.04446107894182205, + "eval_rewards/rejected": -0.16335429251194, + "eval_runtime": 137.1045, + "eval_samples_per_second": 14.544, + "eval_steps_per_second": 0.46, + "step": 2700 + }, + { + "epoch": 2.8421604614577873, + "grad_norm": 2.227062658222717, + "learning_rate": 3.841893751976493e-06, + "log_odds_chosen": 6.429055690765381, + "log_odds_ratio": -0.025566572323441505, + "logits/chosen": -2.8230857849121094, + "logits/rejected": -2.9232447147369385, + "logps/chosen": -0.13817086815834045, + "logps/rejected": -4.313010215759277, + "loss": 0.1236, + "nll_loss": 0.1359073519706726, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.006908542010933161, + "rewards/margins": 0.20874197781085968, + "rewards/rejected": -0.2156505137681961, + "step": 2710 + }, + { + "epoch": 2.852648138437336, + "grad_norm": 2.108179677461151, + "learning_rate": 3.834824944236852e-06, + "log_odds_chosen": 7.687928676605225, + "log_odds_ratio": -0.019871855154633522, + "logits/chosen": -2.9058802127838135, + "logits/rejected": -3.016103744506836, + "logps/chosen": -0.15432411432266235, + "logps/rejected": -5.692026615142822, + "loss": 0.1226, + "nll_loss": 0.12474212795495987, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.0077162072993814945, + "rewards/margins": 0.27688512206077576, + "rewards/rejected": -0.2846013009548187, + "step": 2720 + }, + { + "epoch": 2.863135815416885, + "grad_norm": 2.0852362976431627, + "learning_rate": 3.827795011754764e-06, + "log_odds_chosen": 7.531012058258057, + "log_odds_ratio": -0.020183496177196503, + "logits/chosen": -2.9127936363220215, + "logits/rejected": -3.042579174041748, + "logps/chosen": -0.1713821142911911, + "logps/rejected": -5.637821197509766, + "loss": 0.1192, + "nll_loss": 0.1238013282418251, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.00856910552829504, + "rewards/margins": 0.2733219265937805, + "rewards/rejected": -0.2818910479545593, + "step": 2730 + }, + { + "epoch": 2.873623492396434, + "grad_norm": 2.1240217329220727, + "learning_rate": 3.8208035995043505e-06, + "log_odds_chosen": 7.918447017669678, + "log_odds_ratio": -0.016450051218271255, + "logits/chosen": -2.9222500324249268, + "logits/rejected": -3.0099682807922363, + "logps/chosen": -0.16613063216209412, + "logps/rejected": -5.923202037811279, + "loss": 0.1167, + "nll_loss": 0.11456701904535294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00830653216689825, + "rewards/margins": 0.28785353899002075, + "rewards/rejected": -0.2961600720882416, + "step": 2740 + }, + { + "epoch": 2.8841111693759833, + "grad_norm": 31.79228564478535, + "learning_rate": 3.8138503569823697e-06, + "log_odds_chosen": 6.909941673278809, + "log_odds_ratio": -0.009971695020794868, + "logits/chosen": -2.913257598876953, + "logits/rejected": -3.0123419761657715, + "logps/chosen": -0.14221827685832977, + "logps/rejected": -4.7533063888549805, + "loss": 0.1366, + "nll_loss": 0.12416551262140274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007110914681106806, + "rewards/margins": 0.23055438697338104, + "rewards/rejected": -0.2376653254032135, + "step": 2750 + }, + { + "epoch": 2.894598846355532, + "grad_norm": 1.9557051281290665, + "learning_rate": 3.806934938134405e-06, + "log_odds_chosen": 6.693169593811035, + "log_odds_ratio": -0.02671411633491516, + "logits/chosen": -2.8386614322662354, + "logits/rejected": -2.913949966430664, + "logps/chosen": -0.158113032579422, + "logps/rejected": -4.6884589195251465, + "loss": 0.1257, + "nll_loss": 0.13248762488365173, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.007905651815235615, + "rewards/margins": 0.22651728987693787, + "rewards/rejected": -0.23442292213439941, + "step": 2760 + }, + { + "epoch": 2.9050865233350813, + "grad_norm": 2.137070948069414, + "learning_rate": 3.800057001282532e-06, + "log_odds_chosen": 7.526410102844238, + "log_odds_ratio": -0.018288953229784966, + "logits/chosen": -2.8420822620391846, + "logits/rejected": -2.9359934329986572, + "logps/chosen": -0.13937655091285706, + "logps/rejected": -5.3555192947387695, + "loss": 0.1203, + "nll_loss": 0.11602024734020233, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.00696882838383317, + "rewards/margins": 0.2608071565628052, + "rewards/rejected": -0.2677759826183319, + "step": 2770 + }, + { + "epoch": 2.91557420031463, + "grad_norm": 1.9039164114563458, + "learning_rate": 3.7932162090544085e-06, + "log_odds_chosen": 8.005070686340332, + "log_odds_ratio": -0.013831285759806633, + "logits/chosen": -2.85080885887146, + "logits/rejected": -2.9412410259246826, + "logps/chosen": -0.14242660999298096, + "logps/rejected": -5.835131645202637, + "loss": 0.115, + "nll_loss": 0.11129038035869598, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007121330592781305, + "rewards/margins": 0.2846352159976959, + "rewards/rejected": -0.2917565703392029, + "step": 2780 + }, + { + "epoch": 2.9260618772941793, + "grad_norm": 1.9066238493747631, + "learning_rate": 3.7864122283137657e-06, + "log_odds_chosen": 8.59681510925293, + "log_odds_ratio": -0.01634146459400654, + "logits/chosen": -2.811566114425659, + "logits/rejected": -2.953697681427002, + "logps/chosen": -0.1852981150150299, + "logps/rejected": -6.696959495544434, + "loss": 0.1237, + "nll_loss": 0.13221383094787598, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009264904074370861, + "rewards/margins": 0.3255830705165863, + "rewards/rejected": -0.33484798669815063, + "step": 2790 + }, + { + "epoch": 2.9365495542737285, + "grad_norm": 2.1229204349942523, + "learning_rate": 3.7796447300922724e-06, + "log_odds_chosen": 8.886019706726074, + "log_odds_ratio": -0.014133910648524761, + "logits/chosen": -2.8244338035583496, + "logits/rejected": -2.9361133575439453, + "logps/chosen": -0.1553722470998764, + "logps/rejected": -6.724435329437256, + "loss": 0.1191, + "nll_loss": 0.11856858432292938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007768611423671246, + "rewards/margins": 0.3284532129764557, + "rewards/rejected": -0.3362218141555786, + "step": 2800 + }, + { + "epoch": 2.9365495542737285, + "eval_log_odds_chosen": 0.9868643283843994, + "eval_log_odds_ratio": -0.8558183312416077, + "eval_logits/chosen": -2.8059191703796387, + "eval_logits/rejected": -2.8221092224121094, + "eval_logps/chosen": -1.9523440599441528, + "eval_logps/rejected": -2.7882232666015625, + "eval_loss": 1.0213509798049927, + "eval_nll_loss": 0.9673047065734863, + "eval_rewards/accuracies": 0.6269841194152832, + "eval_rewards/chosen": -0.09761719405651093, + "eval_rewards/margins": 0.04179396852850914, + "eval_rewards/rejected": -0.13941116631031036, + "eval_runtime": 140.3646, + "eval_samples_per_second": 14.206, + "eval_steps_per_second": 0.449, + "step": 2800 + }, + { + "epoch": 2.9470372312532773, + "grad_norm": 1.8098718147037927, + "learning_rate": 3.772913389522725e-06, + "log_odds_chosen": 7.045705318450928, + "log_odds_ratio": -0.0264790840446949, + "logits/chosen": -2.8278496265411377, + "logits/rejected": -2.935941696166992, + "logps/chosen": -0.16044145822525024, + "logps/rejected": -5.10351037979126, + "loss": 0.1197, + "nll_loss": 0.11624834686517715, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.008022072724997997, + "rewards/margins": 0.24715343117713928, + "rewards/rejected": -0.25517550110816956, + "step": 2810 + }, + { + "epoch": 2.9575249082328265, + "grad_norm": 1.8754542855362524, + "learning_rate": 3.7662178857735478e-06, + "log_odds_chosen": 8.025814056396484, + "log_odds_ratio": -0.014746090397238731, + "logits/chosen": -2.7981061935424805, + "logits/rejected": -2.9223358631134033, + "logps/chosen": -0.1609780192375183, + "logps/rejected": -6.0790114402771, + "loss": 0.1164, + "nll_loss": 0.114871546626091, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00804890040308237, + "rewards/margins": 0.29590168595314026, + "rewards/rejected": -0.30395060777664185, + "step": 2820 + }, + { + "epoch": 2.9680125852123753, + "grad_norm": 2.270114335100112, + "learning_rate": 3.7595579019845623e-06, + "log_odds_chosen": 7.872386932373047, + "log_odds_ratio": -0.01882219687104225, + "logits/chosen": -2.8168020248413086, + "logits/rejected": -2.900966167449951, + "logps/chosen": -0.1528329849243164, + "logps/rejected": -5.721396446228027, + "loss": 0.117, + "nll_loss": 0.1145024448633194, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.007641649339348078, + "rewards/margins": 0.27842822670936584, + "rewards/rejected": -0.2860698103904724, + "step": 2830 + }, + { + "epoch": 2.9785002621919245, + "grad_norm": 2.2955550853318907, + "learning_rate": 3.752933125204008e-06, + "log_odds_chosen": 8.305427551269531, + "log_odds_ratio": -0.02256721630692482, + "logits/chosen": -2.8052284717559814, + "logits/rejected": -2.9265544414520264, + "logps/chosen": -0.13989822566509247, + "logps/rejected": -6.217524528503418, + "loss": 0.1182, + "nll_loss": 0.12114028632640839, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.006994911935180426, + "rewards/margins": 0.30388128757476807, + "rewards/rejected": -0.31087619066238403, + "step": 2840 + }, + { + "epoch": 2.9889879391714738, + "grad_norm": 1.888221991554896, + "learning_rate": 3.7463432463267764e-06, + "log_odds_chosen": 7.020120143890381, + "log_odds_ratio": -0.01538365613669157, + "logits/chosen": -2.8246865272521973, + "logits/rejected": -2.9202027320861816, + "logps/chosen": -0.16290083527565002, + "logps/rejected": -4.992356777191162, + "loss": 0.1252, + "nll_loss": 0.14337727427482605, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008145040832459927, + "rewards/margins": 0.24147279560565948, + "rewards/rejected": -0.24961784482002258, + "step": 2850 + }, + { + "epoch": 2.9984268484530676, + "step": 2859, "total_flos": 0.0, - "train_loss": 0.5642813587989287, - "train_runtime": 20357.789, - "train_samples_per_second": 2.997, - "train_steps_per_second": 0.047 + "train_loss": 0.32389816019492534, + "train_runtime": 62235.4926, + "train_samples_per_second": 2.941, + "train_steps_per_second": 0.046 } ], "logging_steps": 10, - "max_steps": 953, + "max_steps": 2859, "num_input_tokens_seen": 0, - "num_train_epochs": 1, + "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": {