|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9984268484530676, |
|
"eval_steps": 100, |
|
"global_step": 2859, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01048767697954903, |
|
"grad_norm": 11.303338968797107, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"log_odds_chosen": 0.16597549617290497, |
|
"log_odds_ratio": -0.6960083246231079, |
|
"logits/chosen": -2.5440375804901123, |
|
"logits/rejected": -2.532742977142334, |
|
"logps/chosen": -0.9999498128890991, |
|
"logps/rejected": -1.0999202728271484, |
|
"loss": 2.7435, |
|
"nll_loss": 2.655998706817627, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.049997489899396896, |
|
"rewards/margins": 0.004998520482331514, |
|
"rewards/rejected": -0.054996006190776825, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02097535395909806, |
|
"grad_norm": 3.296785739531489, |
|
"learning_rate": 4.000000000000001e-06, |
|
"log_odds_chosen": 0.19497092068195343, |
|
"log_odds_ratio": -0.6663684844970703, |
|
"logits/chosen": -3.153244733810425, |
|
"logits/rejected": -3.176297903060913, |
|
"logps/chosen": -0.7618023753166199, |
|
"logps/rejected": -0.8721799850463867, |
|
"loss": 0.5628, |
|
"nll_loss": 0.5223663449287415, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03809012100100517, |
|
"rewards/margins": 0.005518879741430283, |
|
"rewards/rejected": -0.043609000742435455, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03146303093864709, |
|
"grad_norm": 2.5096714885559264, |
|
"learning_rate": 6e-06, |
|
"log_odds_chosen": 0.23512229323387146, |
|
"log_odds_ratio": -0.6553729772567749, |
|
"logits/chosen": -2.9705119132995605, |
|
"logits/rejected": -2.944556713104248, |
|
"logps/chosen": -0.8099643588066101, |
|
"logps/rejected": -0.9404464960098267, |
|
"loss": 0.5331, |
|
"nll_loss": 0.4915856420993805, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.040498219430446625, |
|
"rewards/margins": 0.0065241060219705105, |
|
"rewards/rejected": -0.04702232405543327, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04195070791819612, |
|
"grad_norm": 2.5670929503530138, |
|
"learning_rate": 8.000000000000001e-06, |
|
"log_odds_chosen": 0.1703537404537201, |
|
"log_odds_ratio": -0.6904168128967285, |
|
"logits/chosen": -2.8517043590545654, |
|
"logits/rejected": -2.83884334564209, |
|
"logps/chosen": -0.805575966835022, |
|
"logps/rejected": -0.9237464666366577, |
|
"loss": 0.5194, |
|
"nll_loss": 0.4799742102622986, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04027879983186722, |
|
"rewards/margins": 0.005908523220568895, |
|
"rewards/rejected": -0.046187322586774826, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05243838489774515, |
|
"grad_norm": 2.8257696541784587, |
|
"learning_rate": 1e-05, |
|
"log_odds_chosen": 0.28843408823013306, |
|
"log_odds_ratio": -0.6763556599617004, |
|
"logits/chosen": -2.7286221981048584, |
|
"logits/rejected": -2.72869610786438, |
|
"logps/chosen": -0.787534236907959, |
|
"logps/rejected": -0.968492865562439, |
|
"loss": 0.5419, |
|
"nll_loss": 0.48419374227523804, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03937670961022377, |
|
"rewards/margins": 0.009047931991517544, |
|
"rewards/rejected": -0.04842463880777359, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06292606187729417, |
|
"grad_norm": 2.7270372711002624, |
|
"learning_rate": 1.2e-05, |
|
"log_odds_chosen": 0.2020198553800583, |
|
"log_odds_ratio": -0.6800572872161865, |
|
"logits/chosen": -2.896289110183716, |
|
"logits/rejected": -2.8839545249938965, |
|
"logps/chosen": -0.8010624051094055, |
|
"logps/rejected": -0.9179455637931824, |
|
"loss": 0.5456, |
|
"nll_loss": 0.5158990621566772, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.040053121745586395, |
|
"rewards/margins": 0.005844158586114645, |
|
"rewards/rejected": -0.04589728266000748, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07341373885684321, |
|
"grad_norm": 2.7197204143491605, |
|
"learning_rate": 1.4e-05, |
|
"log_odds_chosen": 0.1937415450811386, |
|
"log_odds_ratio": -0.6942794919013977, |
|
"logits/chosen": -2.8848633766174316, |
|
"logits/rejected": -2.905164957046509, |
|
"logps/chosen": -0.8219146728515625, |
|
"logps/rejected": -0.9291160702705383, |
|
"loss": 0.5412, |
|
"nll_loss": 0.5311218500137329, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.041095733642578125, |
|
"rewards/margins": 0.005360070150345564, |
|
"rewards/rejected": -0.046455807983875275, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08390141583639224, |
|
"grad_norm": 1049.2102246099553, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"log_odds_chosen": 0.1753607988357544, |
|
"log_odds_ratio": -0.6886225938796997, |
|
"logits/chosen": -2.6637063026428223, |
|
"logits/rejected": -2.637396812438965, |
|
"logps/chosen": -0.8933579325675964, |
|
"logps/rejected": -1.020629644393921, |
|
"loss": 1.0694, |
|
"nll_loss": 0.9787748456001282, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.04466789960861206, |
|
"rewards/margins": 0.006363583263009787, |
|
"rewards/rejected": -0.051031481474637985, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09438909281594127, |
|
"grad_norm": 4.011701524085754, |
|
"learning_rate": 1.8e-05, |
|
"log_odds_chosen": 0.2628815174102783, |
|
"log_odds_ratio": -0.6731477975845337, |
|
"logits/chosen": -3.106489658355713, |
|
"logits/rejected": -3.0954391956329346, |
|
"logps/chosen": -0.9435924291610718, |
|
"logps/rejected": -1.1041589975357056, |
|
"loss": 0.5766, |
|
"nll_loss": 0.5112682580947876, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04717962443828583, |
|
"rewards/margins": 0.008028322830796242, |
|
"rewards/rejected": -0.05520794540643692, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1048767697954903, |
|
"grad_norm": 5.340561330006851, |
|
"learning_rate": 2e-05, |
|
"log_odds_chosen": 0.17503713071346283, |
|
"log_odds_ratio": -0.6751121282577515, |
|
"logits/chosen": -3.3266518115997314, |
|
"logits/rejected": -3.3420982360839844, |
|
"logps/chosen": -0.8886896371841431, |
|
"logps/rejected": -1.0002682209014893, |
|
"loss": 0.5668, |
|
"nll_loss": 0.5238600969314575, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.044434480369091034, |
|
"rewards/margins": 0.005578924436122179, |
|
"rewards/rejected": -0.050013404339551926, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1048767697954903, |
|
"eval_log_odds_chosen": 0.21844430267810822, |
|
"eval_log_odds_ratio": -0.6529861688613892, |
|
"eval_logits/chosen": -3.3082144260406494, |
|
"eval_logits/rejected": -3.3147807121276855, |
|
"eval_logps/chosen": -0.9112777709960938, |
|
"eval_logps/rejected": -1.0580321550369263, |
|
"eval_loss": 0.5842872858047485, |
|
"eval_nll_loss": 0.5515953898429871, |
|
"eval_rewards/accuracies": 0.6150793433189392, |
|
"eval_rewards/chosen": -0.04556388780474663, |
|
"eval_rewards/margins": 0.007337724789977074, |
|
"eval_rewards/rejected": -0.05290161445736885, |
|
"eval_runtime": 138.2645, |
|
"eval_samples_per_second": 14.422, |
|
"eval_steps_per_second": 0.456, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11536444677503933, |
|
"grad_norm": 2.8100337089038514, |
|
"learning_rate": 1.9069251784911845e-05, |
|
"log_odds_chosen": 0.2544933259487152, |
|
"log_odds_ratio": -0.643945038318634, |
|
"logits/chosen": -3.2667174339294434, |
|
"logits/rejected": -3.310918092727661, |
|
"logps/chosen": -0.8447545170783997, |
|
"logps/rejected": -1.009132981300354, |
|
"loss": 0.5651, |
|
"nll_loss": 0.5105677843093872, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.042237721383571625, |
|
"rewards/margins": 0.008218927308917046, |
|
"rewards/rejected": -0.05045665428042412, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12585212375458835, |
|
"grad_norm": 2.2193460343172986, |
|
"learning_rate": 1.825741858350554e-05, |
|
"log_odds_chosen": 0.24397364258766174, |
|
"log_odds_ratio": -0.6682508587837219, |
|
"logits/chosen": -3.193361282348633, |
|
"logits/rejected": -3.243128538131714, |
|
"logps/chosen": -0.8714381456375122, |
|
"logps/rejected": -1.0333614349365234, |
|
"loss": 0.6091, |
|
"nll_loss": 0.5700744390487671, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04357190802693367, |
|
"rewards/margins": 0.008096165955066681, |
|
"rewards/rejected": -0.051668066531419754, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1363398007341374, |
|
"grad_norm": 2.3414921674264555, |
|
"learning_rate": 1.7541160386140587e-05, |
|
"log_odds_chosen": 0.2272050678730011, |
|
"log_odds_ratio": -0.6708214282989502, |
|
"logits/chosen": -3.1920104026794434, |
|
"logits/rejected": -3.211714267730713, |
|
"logps/chosen": -0.8986352682113647, |
|
"logps/rejected": -1.0474598407745361, |
|
"loss": 0.5886, |
|
"nll_loss": 0.552306056022644, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.044931765645742416, |
|
"rewards/margins": 0.007441227789968252, |
|
"rewards/rejected": -0.052372999489307404, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14682747771368643, |
|
"grad_norm": 2.3255085925590597, |
|
"learning_rate": 1.6903085094570334e-05, |
|
"log_odds_chosen": 0.22232067584991455, |
|
"log_odds_ratio": -0.6680520176887512, |
|
"logits/chosen": -3.1715519428253174, |
|
"logits/rejected": -3.198253631591797, |
|
"logps/chosen": -0.9551104307174683, |
|
"logps/rejected": -1.1022988557815552, |
|
"loss": 0.5878, |
|
"nll_loss": 0.5523446798324585, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.04775552451610565, |
|
"rewards/margins": 0.007359415292739868, |
|
"rewards/rejected": -0.05511493608355522, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15731515469323545, |
|
"grad_norm": 2.6729814886854766, |
|
"learning_rate": 1.6329931618554523e-05, |
|
"log_odds_chosen": 0.17247287929058075, |
|
"log_odds_ratio": -0.7340894341468811, |
|
"logits/chosen": -3.102067470550537, |
|
"logits/rejected": -3.1263070106506348, |
|
"logps/chosen": -0.9946192502975464, |
|
"logps/rejected": -1.1088117361068726, |
|
"loss": 0.5489, |
|
"nll_loss": 0.5492355823516846, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.04973096773028374, |
|
"rewards/margins": 0.005709617864340544, |
|
"rewards/rejected": -0.05544058233499527, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16780283167278448, |
|
"grad_norm": 2.603021066142599, |
|
"learning_rate": 1.5811388300841898e-05, |
|
"log_odds_chosen": 0.2041763812303543, |
|
"log_odds_ratio": -0.6666288375854492, |
|
"logits/chosen": -3.0764000415802, |
|
"logits/rejected": -3.1064279079437256, |
|
"logps/chosen": -0.9137493968009949, |
|
"logps/rejected": -1.0383034944534302, |
|
"loss": 0.6063, |
|
"nll_loss": 0.5569471120834351, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.045687466859817505, |
|
"rewards/margins": 0.006227707955986261, |
|
"rewards/rejected": -0.05191517621278763, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1782905086523335, |
|
"grad_norm": 2.4919552056925416, |
|
"learning_rate": 1.533929977694741e-05, |
|
"log_odds_chosen": 0.25588172674179077, |
|
"log_odds_ratio": -0.6607967019081116, |
|
"logits/chosen": -3.1293396949768066, |
|
"logits/rejected": -3.1606574058532715, |
|
"logps/chosen": -0.8986794352531433, |
|
"logps/rejected": -1.0667051076889038, |
|
"loss": 0.5845, |
|
"nll_loss": 0.5496193766593933, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.044933974742889404, |
|
"rewards/margins": 0.008401280269026756, |
|
"rewards/rejected": -0.05333525687456131, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18877818563188253, |
|
"grad_norm": 2.4600198980545915, |
|
"learning_rate": 1.49071198499986e-05, |
|
"log_odds_chosen": 0.27393144369125366, |
|
"log_odds_ratio": -0.6479635238647461, |
|
"logits/chosen": -3.080091714859009, |
|
"logits/rejected": -3.103672504425049, |
|
"logps/chosen": -0.9190357327461243, |
|
"logps/rejected": -1.0871737003326416, |
|
"loss": 0.5676, |
|
"nll_loss": 0.550677478313446, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.045951783657073975, |
|
"rewards/margins": 0.00840689055621624, |
|
"rewards/rejected": -0.05435867980122566, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19926586261143156, |
|
"grad_norm": 5.689090620434962, |
|
"learning_rate": 1.4509525002200235e-05, |
|
"log_odds_chosen": 0.23676976561546326, |
|
"log_odds_ratio": -0.6501709222793579, |
|
"logits/chosen": -3.0815584659576416, |
|
"logits/rejected": -3.1054322719573975, |
|
"logps/chosen": -0.9278916120529175, |
|
"logps/rejected": -1.0751855373382568, |
|
"loss": 0.5906, |
|
"nll_loss": 0.6120038628578186, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.046394579112529755, |
|
"rewards/margins": 0.007364692632108927, |
|
"rewards/rejected": -0.05375927686691284, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2097535395909806, |
|
"grad_norm": 2.2848535898780375, |
|
"learning_rate": 1.4142135623730951e-05, |
|
"log_odds_chosen": 0.2697228789329529, |
|
"log_odds_ratio": -0.6704415082931519, |
|
"logits/chosen": -2.99995756149292, |
|
"logits/rejected": -3.038682460784912, |
|
"logps/chosen": -0.9138332605361938, |
|
"logps/rejected": -1.1080011129379272, |
|
"loss": 0.5676, |
|
"nll_loss": 0.5736643075942993, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.04569166153669357, |
|
"rewards/margins": 0.009708395227789879, |
|
"rewards/rejected": -0.0554000549018383, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2097535395909806, |
|
"eval_log_odds_chosen": 0.2850116789340973, |
|
"eval_log_odds_ratio": -0.6474155783653259, |
|
"eval_logits/chosen": -2.9992330074310303, |
|
"eval_logits/rejected": -3.0026443004608154, |
|
"eval_logps/chosen": -0.8811094164848328, |
|
"eval_logps/rejected": -1.0644237995147705, |
|
"eval_loss": 0.5726434588432312, |
|
"eval_nll_loss": 0.5359312295913696, |
|
"eval_rewards/accuracies": 0.625, |
|
"eval_rewards/chosen": -0.04405546560883522, |
|
"eval_rewards/margins": 0.00916572567075491, |
|
"eval_rewards/rejected": -0.053221192210912704, |
|
"eval_runtime": 137.9025, |
|
"eval_samples_per_second": 14.459, |
|
"eval_steps_per_second": 0.457, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22024121657052964, |
|
"grad_norm": 2.2864637176453266, |
|
"learning_rate": 1.3801311186847084e-05, |
|
"log_odds_chosen": 0.10374544560909271, |
|
"log_odds_ratio": -0.7170687913894653, |
|
"logits/chosen": -3.0079314708709717, |
|
"logits/rejected": -3.026061773300171, |
|
"logps/chosen": -0.8713214993476868, |
|
"logps/rejected": -0.9376395344734192, |
|
"loss": 0.5683, |
|
"nll_loss": 0.5364366769790649, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04356607422232628, |
|
"rewards/margins": 0.003315900219604373, |
|
"rewards/rejected": -0.04688197374343872, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23072889355007867, |
|
"grad_norm": 2.3833164568305705, |
|
"learning_rate": 1.3483997249264842e-05, |
|
"log_odds_chosen": 0.1967695653438568, |
|
"log_odds_ratio": -0.6872244477272034, |
|
"logits/chosen": -3.066392183303833, |
|
"logits/rejected": -3.0755832195281982, |
|
"logps/chosen": -0.8734294176101685, |
|
"logps/rejected": -0.9998324513435364, |
|
"loss": 0.5608, |
|
"nll_loss": 0.5176301598548889, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.043671466410160065, |
|
"rewards/margins": 0.006320156157016754, |
|
"rewards/rejected": -0.04999162256717682, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2412165705296277, |
|
"grad_norm": 2.143148051812647, |
|
"learning_rate": 1.3187609467915744e-05, |
|
"log_odds_chosen": 0.2681586444377899, |
|
"log_odds_ratio": -0.669995129108429, |
|
"logits/chosen": -3.0045371055603027, |
|
"logits/rejected": -3.023197889328003, |
|
"logps/chosen": -0.9347988963127136, |
|
"logps/rejected": -1.1079022884368896, |
|
"loss": 0.5715, |
|
"nll_loss": 0.5268279910087585, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04673994705080986, |
|
"rewards/margins": 0.00865517370402813, |
|
"rewards/rejected": -0.05539512634277344, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2517042475091767, |
|
"grad_norm": 2.4867634050680865, |
|
"learning_rate": 1.2909944487358057e-05, |
|
"log_odds_chosen": 0.2310989797115326, |
|
"log_odds_ratio": -0.6607853770256042, |
|
"logits/chosen": -3.0592639446258545, |
|
"logits/rejected": -3.0972437858581543, |
|
"logps/chosen": -0.90626060962677, |
|
"logps/rejected": -1.057490587234497, |
|
"loss": 0.5797, |
|
"nll_loss": 0.5543950796127319, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.0453130342066288, |
|
"rewards/margins": 0.007561509497463703, |
|
"rewards/rejected": -0.05287454277276993, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26219192448872575, |
|
"grad_norm": 2.2846935841220364, |
|
"learning_rate": 1.2649110640673518e-05, |
|
"log_odds_chosen": 0.24984344840049744, |
|
"log_odds_ratio": -0.6764962077140808, |
|
"logits/chosen": -3.0678868293762207, |
|
"logits/rejected": -3.0685126781463623, |
|
"logps/chosen": -0.8884732127189636, |
|
"logps/rejected": -1.025420904159546, |
|
"loss": 0.5498, |
|
"nll_loss": 0.5219429731369019, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.0444236658513546, |
|
"rewards/margins": 0.006847388111054897, |
|
"rewards/rejected": -0.05127105116844177, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2726796014682748, |
|
"grad_norm": 2.3800633619201523, |
|
"learning_rate": 1.2403473458920845e-05, |
|
"log_odds_chosen": 0.2426706850528717, |
|
"log_odds_ratio": -0.6691194772720337, |
|
"logits/chosen": -3.0950028896331787, |
|
"logits/rejected": -3.112684488296509, |
|
"logps/chosen": -0.8879591822624207, |
|
"logps/rejected": -1.042834997177124, |
|
"loss": 0.5302, |
|
"nll_loss": 0.45519179105758667, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04439795762300491, |
|
"rewards/margins": 0.00774379214271903, |
|
"rewards/rejected": -0.05214175581932068, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2831672784478238, |
|
"grad_norm": 2.3697586961370027, |
|
"learning_rate": 1.2171612389003691e-05, |
|
"log_odds_chosen": 0.23119862377643585, |
|
"log_odds_ratio": -0.6756153702735901, |
|
"logits/chosen": -3.113889455795288, |
|
"logits/rejected": -3.157740354537964, |
|
"logps/chosen": -0.9564247131347656, |
|
"logps/rejected": -1.1352105140686035, |
|
"loss": 0.5654, |
|
"nll_loss": 0.5433498024940491, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.04782123863697052, |
|
"rewards/margins": 0.008939290419220924, |
|
"rewards/rejected": -0.056760527193546295, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29365495542737285, |
|
"grad_norm": 1.9757109026566833, |
|
"learning_rate": 1.1952286093343936e-05, |
|
"log_odds_chosen": 0.25132113695144653, |
|
"log_odds_ratio": -0.6663895845413208, |
|
"logits/chosen": -3.1407101154327393, |
|
"logits/rejected": -3.1832191944122314, |
|
"logps/chosen": -0.9308640360832214, |
|
"logps/rejected": -1.087449312210083, |
|
"loss": 0.5429, |
|
"nll_loss": 0.4785974621772766, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04654319956898689, |
|
"rewards/margins": 0.007829269394278526, |
|
"rewards/rejected": -0.05437246710062027, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.30414263240692185, |
|
"grad_norm": 2.7308236297418427, |
|
"learning_rate": 1.1744404390294071e-05, |
|
"log_odds_chosen": 0.35913094878196716, |
|
"log_odds_ratio": -0.6187662482261658, |
|
"logits/chosen": -3.0944533348083496, |
|
"logits/rejected": -3.1177055835723877, |
|
"logps/chosen": -0.8355825543403625, |
|
"logps/rejected": -1.0572632551193237, |
|
"loss": 0.5568, |
|
"nll_loss": 0.48925265669822693, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.041779130697250366, |
|
"rewards/margins": 0.011084041558206081, |
|
"rewards/rejected": -0.052863169461488724, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3146303093864709, |
|
"grad_norm": 2.472653160364779, |
|
"learning_rate": 1.1547005383792517e-05, |
|
"log_odds_chosen": 0.2816540598869324, |
|
"log_odds_ratio": -0.6775935888290405, |
|
"logits/chosen": -3.092194080352783, |
|
"logits/rejected": -3.1420485973358154, |
|
"logps/chosen": -0.8778301477432251, |
|
"logps/rejected": -1.0717580318450928, |
|
"loss": 0.5819, |
|
"nll_loss": 0.5100258588790894, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04389150068163872, |
|
"rewards/margins": 0.009696396067738533, |
|
"rewards/rejected": -0.0535879023373127, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3146303093864709, |
|
"eval_log_odds_chosen": 0.28298813104629517, |
|
"eval_log_odds_ratio": -0.6463662981987, |
|
"eval_logits/chosen": -3.1391000747680664, |
|
"eval_logits/rejected": -3.1424100399017334, |
|
"eval_logps/chosen": -0.8770027756690979, |
|
"eval_logps/rejected": -1.0619502067565918, |
|
"eval_loss": 0.5552015900611877, |
|
"eval_nll_loss": 0.5201771259307861, |
|
"eval_rewards/accuracies": 0.6289682388305664, |
|
"eval_rewards/chosen": -0.043850142508745193, |
|
"eval_rewards/margins": 0.00924737099558115, |
|
"eval_rewards/rejected": -0.05309751257300377, |
|
"eval_runtime": 141.1002, |
|
"eval_samples_per_second": 14.132, |
|
"eval_steps_per_second": 0.446, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3251179863660199, |
|
"grad_norm": 2.038557141198459, |
|
"learning_rate": 1.1359236684941297e-05, |
|
"log_odds_chosen": 0.1998841017484665, |
|
"log_odds_ratio": -0.6875525116920471, |
|
"logits/chosen": -3.0676262378692627, |
|
"logits/rejected": -3.07094407081604, |
|
"logps/chosen": -0.9092122912406921, |
|
"logps/rejected": -1.0280473232269287, |
|
"loss": 0.5844, |
|
"nll_loss": 0.5417822599411011, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04546061158180237, |
|
"rewards/margins": 0.005941747687757015, |
|
"rewards/rejected": -0.051402367651462555, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33560566334556896, |
|
"grad_norm": 2.262270965184679, |
|
"learning_rate": 1.118033988749895e-05, |
|
"log_odds_chosen": 0.2705835700035095, |
|
"log_odds_ratio": -0.6538633108139038, |
|
"logits/chosen": -3.127427339553833, |
|
"logits/rejected": -3.142587661743164, |
|
"logps/chosen": -0.9069059491157532, |
|
"logps/rejected": -1.0691728591918945, |
|
"loss": 0.5242, |
|
"nll_loss": 0.4929099977016449, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.04534530267119408, |
|
"rewards/margins": 0.008113345131278038, |
|
"rewards/rejected": -0.05345864221453667, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34609334032511796, |
|
"grad_norm": 2.4122464498293623, |
|
"learning_rate": 1.1009637651263608e-05, |
|
"log_odds_chosen": 0.23684370517730713, |
|
"log_odds_ratio": -0.7030869722366333, |
|
"logits/chosen": -3.0819878578186035, |
|
"logits/rejected": -3.1327972412109375, |
|
"logps/chosen": -0.9059860110282898, |
|
"logps/rejected": -1.0601646900177002, |
|
"loss": 0.5547, |
|
"nll_loss": 0.5366790890693665, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.04529929906129837, |
|
"rewards/margins": 0.007708935532718897, |
|
"rewards/rejected": -0.05300822854042053, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.356581017304667, |
|
"grad_norm": 2.3793498474146535, |
|
"learning_rate": 1.0846522890932809e-05, |
|
"log_odds_chosen": 0.18786638975143433, |
|
"log_odds_ratio": -0.6986292004585266, |
|
"logits/chosen": -3.0940568447113037, |
|
"logits/rejected": -3.1512954235076904, |
|
"logps/chosen": -0.8602282404899597, |
|
"logps/rejected": -0.9875131845474243, |
|
"loss": 0.5702, |
|
"nll_loss": 0.5145949125289917, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.043011412024497986, |
|
"rewards/margins": 0.0063642458990216255, |
|
"rewards/rejected": -0.049375660717487335, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36706869428421607, |
|
"grad_norm": 2.3420960793915517, |
|
"learning_rate": 1.0690449676496977e-05, |
|
"log_odds_chosen": 0.2689460217952728, |
|
"log_odds_ratio": -0.6845754384994507, |
|
"logits/chosen": -3.1326746940612793, |
|
"logits/rejected": -3.1552205085754395, |
|
"logps/chosen": -0.8725005984306335, |
|
"logps/rejected": -1.0421197414398193, |
|
"loss": 0.5462, |
|
"nll_loss": 0.5172144174575806, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.043625034391880035, |
|
"rewards/margins": 0.008480949327349663, |
|
"rewards/rejected": -0.05210598558187485, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37755637126376507, |
|
"grad_norm": 2.014589871880686, |
|
"learning_rate": 1.0540925533894598e-05, |
|
"log_odds_chosen": 0.37792789936065674, |
|
"log_odds_ratio": -0.6156649589538574, |
|
"logits/chosen": -3.010802745819092, |
|
"logits/rejected": -3.042652130126953, |
|
"logps/chosen": -0.8830682635307312, |
|
"logps/rejected": -1.118240237236023, |
|
"loss": 0.5497, |
|
"nll_loss": 0.5099813938140869, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04415341466665268, |
|
"rewards/margins": 0.011758595705032349, |
|
"rewards/rejected": -0.05591200664639473, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3880440482433141, |
|
"grad_norm": 2.0494786838330903, |
|
"learning_rate": 1.0397504898200728e-05, |
|
"log_odds_chosen": 0.37991228699684143, |
|
"log_odds_ratio": -0.6151097416877747, |
|
"logits/chosen": -3.071289539337158, |
|
"logits/rejected": -3.0840888023376465, |
|
"logps/chosen": -0.863991379737854, |
|
"logps/rejected": -1.1161118745803833, |
|
"loss": 0.5195, |
|
"nll_loss": 0.4998775124549866, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.0431995615363121, |
|
"rewards/margins": 0.012606029398739338, |
|
"rewards/rejected": -0.055805593729019165, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3985317252228631, |
|
"grad_norm": 2.3440751758332294, |
|
"learning_rate": 1.0259783520851543e-05, |
|
"log_odds_chosen": 0.4805373549461365, |
|
"log_odds_ratio": -0.5845500230789185, |
|
"logits/chosen": -3.1311728954315186, |
|
"logits/rejected": -3.168400287628174, |
|
"logps/chosen": -0.8546767234802246, |
|
"logps/rejected": -1.1352304220199585, |
|
"loss": 0.5371, |
|
"nll_loss": 0.5167530179023743, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.04273384064435959, |
|
"rewards/margins": 0.014027683064341545, |
|
"rewards/rejected": -0.056761521846055984, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4090194022024122, |
|
"grad_norm": 2.50155675830033, |
|
"learning_rate": 1.0127393670836667e-05, |
|
"log_odds_chosen": 0.0912429466843605, |
|
"log_odds_ratio": -0.7177212238311768, |
|
"logits/chosen": -3.1054975986480713, |
|
"logits/rejected": -3.1308093070983887, |
|
"logps/chosen": -0.9102872014045715, |
|
"logps/rejected": -0.9754246473312378, |
|
"loss": 0.5574, |
|
"nll_loss": 0.5331951379776001, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.045514363795518875, |
|
"rewards/margins": 0.003256872994825244, |
|
"rewards/rejected": -0.04877123609185219, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4195070791819612, |
|
"grad_norm": 2.027467517514936, |
|
"learning_rate": 1e-05, |
|
"log_odds_chosen": 0.2633103132247925, |
|
"log_odds_ratio": -0.6879682540893555, |
|
"logits/chosen": -3.0087058544158936, |
|
"logits/rejected": -3.0386600494384766, |
|
"logps/chosen": -0.9468951225280762, |
|
"logps/rejected": -1.1236045360565186, |
|
"loss": 0.5738, |
|
"nll_loss": 0.527585506439209, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.04734475538134575, |
|
"rewards/margins": 0.008835467509925365, |
|
"rewards/rejected": -0.05618022754788399, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4195070791819612, |
|
"eval_log_odds_chosen": 0.2960740923881531, |
|
"eval_log_odds_ratio": -0.6521593332290649, |
|
"eval_logits/chosen": -3.1019551753997803, |
|
"eval_logits/rejected": -3.1026368141174316, |
|
"eval_logps/chosen": -0.8433709740638733, |
|
"eval_logps/rejected": -1.0346297025680542, |
|
"eval_loss": 0.5411269664764404, |
|
"eval_nll_loss": 0.5047088265419006, |
|
"eval_rewards/accuracies": 0.6289682388305664, |
|
"eval_rewards/chosen": -0.042168550193309784, |
|
"eval_rewards/margins": 0.00956293661147356, |
|
"eval_rewards/rejected": -0.05173148587346077, |
|
"eval_runtime": 135.94, |
|
"eval_samples_per_second": 14.668, |
|
"eval_steps_per_second": 0.463, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4299947561615102, |
|
"grad_norm": 2.077556227084633, |
|
"learning_rate": 9.877295966495898e-06, |
|
"log_odds_chosen": 0.1433972865343094, |
|
"log_odds_ratio": -0.7417241930961609, |
|
"logits/chosen": -3.147104024887085, |
|
"logits/rejected": -3.1611135005950928, |
|
"logps/chosen": -0.8865131139755249, |
|
"logps/rejected": -0.9979325532913208, |
|
"loss": 0.5454, |
|
"nll_loss": 0.4825812876224518, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.044325657188892365, |
|
"rewards/margins": 0.005570969078689814, |
|
"rewards/rejected": -0.04989662766456604, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.4404824331410593, |
|
"grad_norm": 1.9177361456178337, |
|
"learning_rate": 9.759000729485331e-06, |
|
"log_odds_chosen": 0.2965100407600403, |
|
"log_odds_ratio": -0.6552795171737671, |
|
"logits/chosen": -3.065213203430176, |
|
"logits/rejected": -3.106889247894287, |
|
"logps/chosen": -0.8926699757575989, |
|
"logps/rejected": -1.073974609375, |
|
"loss": 0.5349, |
|
"nll_loss": 0.47521886229515076, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.04463350027799606, |
|
"rewards/margins": 0.009065226651728153, |
|
"rewards/rejected": -0.05369872972369194, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4509701101206083, |
|
"grad_norm": 2.2675621915351503, |
|
"learning_rate": 9.644856443408244e-06, |
|
"log_odds_chosen": 0.29174235463142395, |
|
"log_odds_ratio": -0.6506129503250122, |
|
"logits/chosen": -3.075723648071289, |
|
"logits/rejected": -3.0862226486206055, |
|
"logps/chosen": -0.8427901268005371, |
|
"logps/rejected": -1.0184295177459717, |
|
"loss": 0.5557, |
|
"nll_loss": 0.5429800152778625, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.04213951155543327, |
|
"rewards/margins": 0.008781969547271729, |
|
"rewards/rejected": -0.050921481102705, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46145778710015734, |
|
"grad_norm": 2.048479923586714, |
|
"learning_rate": 9.534625892455923e-06, |
|
"log_odds_chosen": 0.2715272009372711, |
|
"log_odds_ratio": -0.6504871249198914, |
|
"logits/chosen": -3.114889144897461, |
|
"logits/rejected": -3.1430869102478027, |
|
"logps/chosen": -0.8674638867378235, |
|
"logps/rejected": -1.0402857065200806, |
|
"loss": 0.5502, |
|
"nll_loss": 0.5185979604721069, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.04337319731712341, |
|
"rewards/margins": 0.008641095831990242, |
|
"rewards/rejected": -0.05201428383588791, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.47194546407970633, |
|
"grad_norm": 1.9700303764265876, |
|
"learning_rate": 9.428090415820635e-06, |
|
"log_odds_chosen": 0.37898144125938416, |
|
"log_odds_ratio": -0.6548101305961609, |
|
"logits/chosen": -3.141404390335083, |
|
"logits/rejected": -3.1785435676574707, |
|
"logps/chosen": -0.8289934396743774, |
|
"logps/rejected": -1.080649733543396, |
|
"loss": 0.5278, |
|
"nll_loss": 0.49574679136276245, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.04144967347383499, |
|
"rewards/margins": 0.01258282084017992, |
|
"rewards/rejected": -0.05403248593211174, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4824331410592554, |
|
"grad_norm": 2.1444885294890796, |
|
"learning_rate": 9.325048082403139e-06, |
|
"log_odds_chosen": 0.21225424110889435, |
|
"log_odds_ratio": -0.6999707221984863, |
|
"logits/chosen": -3.110089063644409, |
|
"logits/rejected": -3.1592323780059814, |
|
"logps/chosen": -0.947162926197052, |
|
"logps/rejected": -1.1105449199676514, |
|
"loss": 0.5315, |
|
"nll_loss": 0.5339683890342712, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04735814779996872, |
|
"rewards/margins": 0.008169097825884819, |
|
"rewards/rejected": -0.05552724748849869, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4929208180388044, |
|
"grad_norm": 2.1649660190560613, |
|
"learning_rate": 9.225312080288851e-06, |
|
"log_odds_chosen": 0.2549912929534912, |
|
"log_odds_ratio": -0.6857655644416809, |
|
"logits/chosen": -3.0928080081939697, |
|
"logits/rejected": -3.1287431716918945, |
|
"logps/chosen": -0.8865912556648254, |
|
"logps/rejected": -1.050857663154602, |
|
"loss": 0.5421, |
|
"nll_loss": 0.5101572275161743, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.044329557567834854, |
|
"rewards/margins": 0.008213317021727562, |
|
"rewards/rejected": -0.052542876452207565, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5034084950183534, |
|
"grad_norm": 1.89898044344756, |
|
"learning_rate": 9.12870929175277e-06, |
|
"log_odds_chosen": 0.18933558464050293, |
|
"log_odds_ratio": -0.7031041383743286, |
|
"logits/chosen": -3.1588873863220215, |
|
"logits/rejected": -3.1968955993652344, |
|
"logps/chosen": -0.8558489680290222, |
|
"logps/rejected": -0.980047881603241, |
|
"loss": 0.5174, |
|
"nll_loss": 0.5121264457702637, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.04279245063662529, |
|
"rewards/margins": 0.006209943443536758, |
|
"rewards/rejected": -0.04900239408016205, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5138961719979025, |
|
"grad_norm": 1.9212510076087481, |
|
"learning_rate": 9.035079029052514e-06, |
|
"log_odds_chosen": 0.23131313920021057, |
|
"log_odds_ratio": -0.6693936586380005, |
|
"logits/chosen": -3.094421625137329, |
|
"logits/rejected": -3.1039950847625732, |
|
"logps/chosen": -0.9284296035766602, |
|
"logps/rejected": -1.0470894575119019, |
|
"loss": 0.5391, |
|
"nll_loss": 0.5019217729568481, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.04642148315906525, |
|
"rewards/margins": 0.005932994186878204, |
|
"rewards/rejected": -0.05235447734594345, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5243838489774515, |
|
"grad_norm": 2.197524211966931, |
|
"learning_rate": 8.94427190999916e-06, |
|
"log_odds_chosen": 0.2233821153640747, |
|
"log_odds_ratio": -0.6923887729644775, |
|
"logits/chosen": -3.0647079944610596, |
|
"logits/rejected": -3.0620505809783936, |
|
"logps/chosen": -0.8755196332931519, |
|
"logps/rejected": -1.0028659105300903, |
|
"loss": 0.5478, |
|
"nll_loss": 0.5219477415084839, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04377598315477371, |
|
"rewards/margins": 0.0063673085533082485, |
|
"rewards/rejected": -0.0501432940363884, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5243838489774515, |
|
"eval_log_odds_chosen": 0.33266139030456543, |
|
"eval_log_odds_ratio": -0.6382430791854858, |
|
"eval_logits/chosen": -3.028609275817871, |
|
"eval_logits/rejected": -3.0259969234466553, |
|
"eval_logps/chosen": -0.8414799571037292, |
|
"eval_logps/rejected": -1.0509231090545654, |
|
"eval_loss": 0.5319445133209229, |
|
"eval_nll_loss": 0.49702468514442444, |
|
"eval_rewards/accuracies": 0.6289682388305664, |
|
"eval_rewards/chosen": -0.04207399860024452, |
|
"eval_rewards/margins": 0.010472159832715988, |
|
"eval_rewards/rejected": -0.05254615470767021, |
|
"eval_runtime": 136.7326, |
|
"eval_samples_per_second": 14.583, |
|
"eval_steps_per_second": 0.461, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5348715259570005, |
|
"grad_norm": 1.7639475332504142, |
|
"learning_rate": 8.856148855400955e-06, |
|
"log_odds_chosen": 0.29167047142982483, |
|
"log_odds_ratio": -0.648201048374176, |
|
"logits/chosen": -3.0114383697509766, |
|
"logits/rejected": -3.024693250656128, |
|
"logps/chosen": -0.841100811958313, |
|
"logps/rejected": -1.0192333459854126, |
|
"loss": 0.5263, |
|
"nll_loss": 0.5350626111030579, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.04205504059791565, |
|
"rewards/margins": 0.00890662893652916, |
|
"rewards/rejected": -0.05096167325973511, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5453592029365496, |
|
"grad_norm": 1.6884098835310988, |
|
"learning_rate": 8.770580193070294e-06, |
|
"log_odds_chosen": 0.24579331278800964, |
|
"log_odds_ratio": -0.6814862489700317, |
|
"logits/chosen": -3.016019582748413, |
|
"logits/rejected": -3.0255684852600098, |
|
"logps/chosen": -0.9082791209220886, |
|
"logps/rejected": -1.0769283771514893, |
|
"loss": 0.5369, |
|
"nll_loss": 0.47502464056015015, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04541395604610443, |
|
"rewards/margins": 0.008432453498244286, |
|
"rewards/rejected": -0.053846411406993866, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5558468799160986, |
|
"grad_norm": 1.7588436164574766, |
|
"learning_rate": 8.687444855261389e-06, |
|
"log_odds_chosen": 0.39766445755958557, |
|
"log_odds_ratio": -0.6521557569503784, |
|
"logits/chosen": -3.0906691551208496, |
|
"logits/rejected": -3.1090755462646484, |
|
"logps/chosen": -0.8297191858291626, |
|
"logps/rejected": -1.1049801111221313, |
|
"loss": 0.5364, |
|
"nll_loss": 0.450814813375473, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.04148596152663231, |
|
"rewards/margins": 0.01376304216682911, |
|
"rewards/rejected": -0.05524900555610657, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5663345568956476, |
|
"grad_norm": 1.9397603724841295, |
|
"learning_rate": 8.606629658238705e-06, |
|
"log_odds_chosen": 0.15624158084392548, |
|
"log_odds_ratio": -0.7059566378593445, |
|
"logits/chosen": -3.0063095092773438, |
|
"logits/rejected": -3.0354349613189697, |
|
"logps/chosen": -0.8621616363525391, |
|
"logps/rejected": -0.9609626531600952, |
|
"loss": 0.5526, |
|
"nll_loss": 0.5280291438102722, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.04310808330774307, |
|
"rewards/margins": 0.0049400487914681435, |
|
"rewards/rejected": -0.04804813116788864, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5768222338751966, |
|
"grad_norm": 1.9970251061131588, |
|
"learning_rate": 8.528028654224417e-06, |
|
"log_odds_chosen": 0.3964000940322876, |
|
"log_odds_ratio": -0.6276581883430481, |
|
"logits/chosen": -3.051056385040283, |
|
"logits/rejected": -3.0628600120544434, |
|
"logps/chosen": -0.8477095365524292, |
|
"logps/rejected": -1.090545415878296, |
|
"loss": 0.5377, |
|
"nll_loss": 0.5382589101791382, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04238547384738922, |
|
"rewards/margins": 0.012141798622906208, |
|
"rewards/rejected": -0.05452727526426315, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5873099108547457, |
|
"grad_norm": 1.9451374983545444, |
|
"learning_rate": 8.451542547285167e-06, |
|
"log_odds_chosen": 0.24946291744709015, |
|
"log_odds_ratio": -0.6731950044631958, |
|
"logits/chosen": -3.09270977973938, |
|
"logits/rejected": -3.1291451454162598, |
|
"logps/chosen": -0.8785122632980347, |
|
"logps/rejected": -1.0384708642959595, |
|
"loss": 0.5214, |
|
"nll_loss": 0.5020500421524048, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.04392561689019203, |
|
"rewards/margins": 0.007997924461960793, |
|
"rewards/rejected": -0.05192355066537857, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5977975878342947, |
|
"grad_norm": 2.015759366014609, |
|
"learning_rate": 8.37707816583391e-06, |
|
"log_odds_chosen": 0.1689465194940567, |
|
"log_odds_ratio": -0.7204016447067261, |
|
"logits/chosen": -3.082165241241455, |
|
"logits/rejected": -3.113685369491577, |
|
"logps/chosen": -0.8903343081474304, |
|
"logps/rejected": -1.0027625560760498, |
|
"loss": 0.5039, |
|
"nll_loss": 0.5279403924942017, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.04451671987771988, |
|
"rewards/margins": 0.0056214118376374245, |
|
"rewards/rejected": -0.05013813450932503, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6082852648138437, |
|
"grad_norm": 1.8532059123988396, |
|
"learning_rate": 8.304547985373997e-06, |
|
"log_odds_chosen": 0.27719905972480774, |
|
"log_odds_ratio": -0.6604655385017395, |
|
"logits/chosen": -3.164926528930664, |
|
"logits/rejected": -3.1809298992156982, |
|
"logps/chosen": -0.8681858777999878, |
|
"logps/rejected": -1.0584015846252441, |
|
"loss": 0.5449, |
|
"nll_loss": 0.48173967003822327, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.04340929910540581, |
|
"rewards/margins": 0.009510790929198265, |
|
"rewards/rejected": -0.052920084446668625, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6187729417933928, |
|
"grad_norm": 1.9696416884513863, |
|
"learning_rate": 8.233869695926184e-06, |
|
"log_odds_chosen": 0.3565579056739807, |
|
"log_odds_ratio": -0.6653521656990051, |
|
"logits/chosen": -3.1371326446533203, |
|
"logits/rejected": -3.1804890632629395, |
|
"logps/chosen": -0.8285515904426575, |
|
"logps/rejected": -1.060605764389038, |
|
"loss": 0.5115, |
|
"nll_loss": 0.5481864213943481, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04142758250236511, |
|
"rewards/margins": 0.011602701619267464, |
|
"rewards/rejected": -0.05303028225898743, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6292606187729418, |
|
"grad_norm": 2.0728707870222607, |
|
"learning_rate": 8.164965809277262e-06, |
|
"log_odds_chosen": 0.3636320233345032, |
|
"log_odds_ratio": -0.6437779664993286, |
|
"logits/chosen": -3.155708074569702, |
|
"logits/rejected": -3.155524492263794, |
|
"logps/chosen": -0.8240157961845398, |
|
"logps/rejected": -1.06477952003479, |
|
"loss": 0.5146, |
|
"nll_loss": 0.4843020439147949, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.04120079427957535, |
|
"rewards/margins": 0.012038188055157661, |
|
"rewards/rejected": -0.05323898047208786, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6292606187729418, |
|
"eval_log_odds_chosen": 0.312126487493515, |
|
"eval_log_odds_ratio": -0.6417948603630066, |
|
"eval_logits/chosen": -3.127530336380005, |
|
"eval_logits/rejected": -3.1324751377105713, |
|
"eval_logps/chosen": -0.8164808750152588, |
|
"eval_logps/rejected": -1.016471028327942, |
|
"eval_loss": 0.5239931344985962, |
|
"eval_nll_loss": 0.4882962703704834, |
|
"eval_rewards/accuracies": 0.6230158805847168, |
|
"eval_rewards/chosen": -0.0408240407705307, |
|
"eval_rewards/margins": 0.00999950896948576, |
|
"eval_rewards/rejected": -0.050823554396629333, |
|
"eval_runtime": 137.2676, |
|
"eval_samples_per_second": 14.526, |
|
"eval_steps_per_second": 0.459, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6397482957524908, |
|
"grad_norm": 2.2204480702078246, |
|
"learning_rate": 8.097763301789162e-06, |
|
"log_odds_chosen": 0.1712610125541687, |
|
"log_odds_ratio": -0.705093502998352, |
|
"logits/chosen": -3.0651237964630127, |
|
"logits/rejected": -3.0982956886291504, |
|
"logps/chosen": -0.8816771507263184, |
|
"logps/rejected": -0.989287257194519, |
|
"loss": 0.526, |
|
"nll_loss": 0.48726779222488403, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.044083863496780396, |
|
"rewards/margins": 0.0053805033676326275, |
|
"rewards/rejected": -0.04946436733007431, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6502359727320398, |
|
"grad_norm": 2.0795066851294, |
|
"learning_rate": 8.03219328902499e-06, |
|
"log_odds_chosen": 0.18011939525604248, |
|
"log_odds_ratio": -0.7075856328010559, |
|
"logits/chosen": -3.093158721923828, |
|
"logits/rejected": -3.1170780658721924, |
|
"logps/chosen": -0.8789434432983398, |
|
"logps/rejected": -1.0122572183609009, |
|
"loss": 0.5293, |
|
"nll_loss": 0.5134457945823669, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.043947167694568634, |
|
"rewards/margins": 0.006665694061666727, |
|
"rewards/rejected": -0.050612859427928925, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6607236497115889, |
|
"grad_norm": 2.0001788984831514, |
|
"learning_rate": 7.968190728895958e-06, |
|
"log_odds_chosen": 0.2610745429992676, |
|
"log_odds_ratio": -0.6974207758903503, |
|
"logits/chosen": -3.0472846031188965, |
|
"logits/rejected": -3.0721120834350586, |
|
"logps/chosen": -0.8566058874130249, |
|
"logps/rejected": -1.0223418474197388, |
|
"loss": 0.5372, |
|
"nll_loss": 0.5244878530502319, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.042830295860767365, |
|
"rewards/margins": 0.00828679371625185, |
|
"rewards/rejected": -0.05111708492040634, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6712113266911379, |
|
"grad_norm": 2.3414302184737332, |
|
"learning_rate": 7.905694150420949e-06, |
|
"log_odds_chosen": 0.30453813076019287, |
|
"log_odds_ratio": -0.6686201095581055, |
|
"logits/chosen": -3.0571064949035645, |
|
"logits/rejected": -3.079134464263916, |
|
"logps/chosen": -0.8609515428543091, |
|
"logps/rejected": -1.0473490953445435, |
|
"loss": 0.5151, |
|
"nll_loss": 0.46057072281837463, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04304756969213486, |
|
"rewards/margins": 0.009319878183305264, |
|
"rewards/rejected": -0.05236745625734329, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6816990036706869, |
|
"grad_norm": 1.9074311662484937, |
|
"learning_rate": 7.844645405527363e-06, |
|
"log_odds_chosen": 0.21438069641590118, |
|
"log_odds_ratio": -0.7022002935409546, |
|
"logits/chosen": -3.058842897415161, |
|
"logits/rejected": -3.0864357948303223, |
|
"logps/chosen": -0.8311389684677124, |
|
"logps/rejected": -0.9654434323310852, |
|
"loss": 0.5332, |
|
"nll_loss": 0.5123748183250427, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0415569506585598, |
|
"rewards/margins": 0.006715219467878342, |
|
"rewards/rejected": -0.04827217012643814, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6921866806502359, |
|
"grad_norm": 1.9616180703535884, |
|
"learning_rate": 7.78498944161523e-06, |
|
"log_odds_chosen": 0.3507782816886902, |
|
"log_odds_ratio": -0.641882061958313, |
|
"logits/chosen": -3.0647902488708496, |
|
"logits/rejected": -3.1045496463775635, |
|
"logps/chosen": -0.8823181390762329, |
|
"logps/rejected": -1.1245914697647095, |
|
"loss": 0.5293, |
|
"nll_loss": 0.48711147904396057, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.044115908443927765, |
|
"rewards/margins": 0.012113666161894798, |
|
"rewards/rejected": -0.05622958019375801, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.702674357629785, |
|
"grad_norm": 2.2401170633783427, |
|
"learning_rate": 7.726674092862559e-06, |
|
"log_odds_chosen": 0.4617346227169037, |
|
"log_odds_ratio": -0.627942681312561, |
|
"logits/chosen": -3.0200469493865967, |
|
"logits/rejected": -3.0557796955108643, |
|
"logps/chosen": -0.8328607678413391, |
|
"logps/rejected": -1.140726923942566, |
|
"loss": 0.5237, |
|
"nll_loss": 0.46908053755760193, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.041643042117357254, |
|
"rewards/margins": 0.015393314883112907, |
|
"rewards/rejected": -0.057036347687244415, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.713162034609334, |
|
"grad_norm": 2.00824540701018, |
|
"learning_rate": 7.669649888473705e-06, |
|
"log_odds_chosen": 0.36505717039108276, |
|
"log_odds_ratio": -0.6428455114364624, |
|
"logits/chosen": -3.0360779762268066, |
|
"logits/rejected": -3.044907808303833, |
|
"logps/chosen": -0.8793157339096069, |
|
"logps/rejected": -1.1065771579742432, |
|
"loss": 0.5083, |
|
"nll_loss": 0.4951552450656891, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.04396578669548035, |
|
"rewards/margins": 0.0113630760461092, |
|
"rewards/rejected": -0.0553288571536541, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.723649711588883, |
|
"grad_norm": 1.8606652251395144, |
|
"learning_rate": 7.61386987626881e-06, |
|
"log_odds_chosen": 0.2045813500881195, |
|
"log_odds_ratio": -0.7114613056182861, |
|
"logits/chosen": -3.036839723587036, |
|
"logits/rejected": -3.0589654445648193, |
|
"logps/chosen": -0.8661033511161804, |
|
"logps/rejected": -1.014004111289978, |
|
"loss": 0.5313, |
|
"nll_loss": 0.5510386824607849, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.0433051735162735, |
|
"rewards/margins": 0.007395035121589899, |
|
"rewards/rejected": -0.05070021003484726, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7341373885684321, |
|
"grad_norm": 2.2895278902082747, |
|
"learning_rate": 7.559289460184545e-06, |
|
"log_odds_chosen": 0.34833860397338867, |
|
"log_odds_ratio": -0.6269202828407288, |
|
"logits/chosen": -3.0252926349639893, |
|
"logits/rejected": -3.068871021270752, |
|
"logps/chosen": -0.8163930177688599, |
|
"logps/rejected": -1.0459128618240356, |
|
"loss": 0.5298, |
|
"nll_loss": 0.5428040623664856, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04081965237855911, |
|
"rewards/margins": 0.01147598959505558, |
|
"rewards/rejected": -0.05229564383625984, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7341373885684321, |
|
"eval_log_odds_chosen": 0.3869401812553406, |
|
"eval_log_odds_ratio": -0.6218506097793579, |
|
"eval_logits/chosen": -3.0754599571228027, |
|
"eval_logits/rejected": -3.076083183288574, |
|
"eval_logps/chosen": -0.8267216682434082, |
|
"eval_logps/rejected": -1.0827099084854126, |
|
"eval_loss": 0.5187779068946838, |
|
"eval_nll_loss": 0.4841572344303131, |
|
"eval_rewards/accuracies": 0.6428571343421936, |
|
"eval_rewards/chosen": -0.04133608192205429, |
|
"eval_rewards/margins": 0.012799412943422794, |
|
"eval_rewards/rejected": -0.05413549765944481, |
|
"eval_runtime": 137.1864, |
|
"eval_samples_per_second": 14.535, |
|
"eval_steps_per_second": 0.459, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7446250655479811, |
|
"grad_norm": 1.958829045282282, |
|
"learning_rate": 7.505866250408016e-06, |
|
"log_odds_chosen": 0.2794094383716583, |
|
"log_odds_ratio": -0.6572638750076294, |
|
"logits/chosen": -3.1184074878692627, |
|
"logits/rejected": -3.1369974613189697, |
|
"logps/chosen": -0.8444921374320984, |
|
"logps/rejected": -1.0439577102661133, |
|
"loss": 0.5242, |
|
"nll_loss": 0.47964978218078613, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04222460836172104, |
|
"rewards/margins": 0.00997327920049429, |
|
"rewards/rejected": -0.052197881042957306, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7551127425275301, |
|
"grad_norm": 1.8049248182957538, |
|
"learning_rate": 7.4535599249993e-06, |
|
"log_odds_chosen": 0.36963027715682983, |
|
"log_odds_ratio": -0.6443501710891724, |
|
"logits/chosen": -3.075653076171875, |
|
"logits/rejected": -3.0980098247528076, |
|
"logps/chosen": -0.7987631559371948, |
|
"logps/rejected": -1.03029465675354, |
|
"loss": 0.5308, |
|
"nll_loss": 0.4633590281009674, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.03993815928697586, |
|
"rewards/margins": 0.011576572433114052, |
|
"rewards/rejected": -0.051514726132154465, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7656004195070791, |
|
"grad_norm": 2.1907119668628807, |
|
"learning_rate": 7.402332101976053e-06, |
|
"log_odds_chosen": 0.1018507108092308, |
|
"log_odds_ratio": -0.7229408621788025, |
|
"logits/chosen": -3.084719181060791, |
|
"logits/rejected": -3.0846333503723145, |
|
"logps/chosen": -0.8332414627075195, |
|
"logps/rejected": -0.8869687914848328, |
|
"loss": 0.5377, |
|
"nll_loss": 0.5031158328056335, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.041662074625492096, |
|
"rewards/margins": 0.00268636760301888, |
|
"rewards/rejected": -0.04434844106435776, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7760880964866282, |
|
"grad_norm": 2.050092986168091, |
|
"learning_rate": 7.352146220938079e-06, |
|
"log_odds_chosen": 0.3393878936767578, |
|
"log_odds_ratio": -0.6246740221977234, |
|
"logits/chosen": -3.119809627532959, |
|
"logits/rejected": -3.132826328277588, |
|
"logps/chosen": -0.804786205291748, |
|
"logps/rejected": -1.0171911716461182, |
|
"loss": 0.5308, |
|
"nll_loss": 0.4794273376464844, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.040239304304122925, |
|
"rewards/margins": 0.010620243847370148, |
|
"rewards/rejected": -0.05085955187678337, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7865757734661772, |
|
"grad_norm": 2.0193892114327556, |
|
"learning_rate": 7.3029674334022146e-06, |
|
"log_odds_chosen": 0.2425309419631958, |
|
"log_odds_ratio": -0.6716917753219604, |
|
"logits/chosen": -3.093583106994629, |
|
"logits/rejected": -3.114816188812256, |
|
"logps/chosen": -0.8740803599357605, |
|
"logps/rejected": -1.0157320499420166, |
|
"loss": 0.5427, |
|
"nll_loss": 0.4982066750526428, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.04370402172207832, |
|
"rewards/margins": 0.007082589901983738, |
|
"rewards/rejected": -0.05078660696744919, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7970634504457262, |
|
"grad_norm": 1.891204637475333, |
|
"learning_rate": 7.254762501100117e-06, |
|
"log_odds_chosen": 0.2664291262626648, |
|
"log_odds_ratio": -0.6672528386116028, |
|
"logits/chosen": -3.0630593299865723, |
|
"logits/rejected": -3.0695788860321045, |
|
"logps/chosen": -0.8163594007492065, |
|
"logps/rejected": -0.993925929069519, |
|
"loss": 0.5114, |
|
"nll_loss": 0.40486717224121094, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.040817975997924805, |
|
"rewards/margins": 0.00887832697480917, |
|
"rewards/rejected": -0.049696292728185654, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8075511274252754, |
|
"grad_norm": 2.0675479903273914, |
|
"learning_rate": 7.207499701564472e-06, |
|
"log_odds_chosen": 0.23201966285705566, |
|
"log_odds_ratio": -0.6995107531547546, |
|
"logits/chosen": -3.027050018310547, |
|
"logits/rejected": -3.0489039421081543, |
|
"logps/chosen": -0.8810374140739441, |
|
"logps/rejected": -1.0541043281555176, |
|
"loss": 0.5343, |
|
"nll_loss": 0.5017890334129333, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.0440518744289875, |
|
"rewards/margins": 0.008653342723846436, |
|
"rewards/rejected": -0.05270521715283394, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8180388044048243, |
|
"grad_norm": 1.9571785710156353, |
|
"learning_rate": 7.1611487403943295e-06, |
|
"log_odds_chosen": 0.23842506110668182, |
|
"log_odds_ratio": -0.672247052192688, |
|
"logits/chosen": -3.062586545944214, |
|
"logits/rejected": -3.0935113430023193, |
|
"logps/chosen": -0.8818261027336121, |
|
"logps/rejected": -1.0167505741119385, |
|
"loss": 0.5467, |
|
"nll_loss": 0.5480509996414185, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04409131035208702, |
|
"rewards/margins": 0.006746229715645313, |
|
"rewards/rejected": -0.05083753541111946, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.8285264813843733, |
|
"grad_norm": 1.8565884413084413, |
|
"learning_rate": 7.115680669648201e-06, |
|
"log_odds_chosen": 0.32895228266716003, |
|
"log_odds_ratio": -0.6478875875473022, |
|
"logits/chosen": -3.1025116443634033, |
|
"logits/rejected": -3.1219050884246826, |
|
"logps/chosen": -0.8189374804496765, |
|
"logps/rejected": -1.0338833332061768, |
|
"loss": 0.5049, |
|
"nll_loss": 0.44281667470932007, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.040946874767541885, |
|
"rewards/margins": 0.010747292079031467, |
|
"rewards/rejected": -0.05169416218996048, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.8390141583639223, |
|
"grad_norm": 2.106485781152954, |
|
"learning_rate": 7.0710678118654756e-06, |
|
"log_odds_chosen": 0.4608131945133209, |
|
"log_odds_ratio": -0.5961465835571289, |
|
"logits/chosen": -3.092484951019287, |
|
"logits/rejected": -3.090536117553711, |
|
"logps/chosen": -0.7798897624015808, |
|
"logps/rejected": -1.0744028091430664, |
|
"loss": 0.5181, |
|
"nll_loss": 0.4202440679073334, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.03899449110031128, |
|
"rewards/margins": 0.014725650660693645, |
|
"rewards/rejected": -0.0537201389670372, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8390141583639223, |
|
"eval_log_odds_chosen": 0.35056135058403015, |
|
"eval_log_odds_ratio": -0.6322371363639832, |
|
"eval_logits/chosen": -3.139373302459717, |
|
"eval_logits/rejected": -3.1382317543029785, |
|
"eval_logps/chosen": -0.8198128342628479, |
|
"eval_logps/rejected": -1.0474979877471924, |
|
"eval_loss": 0.5140993595123291, |
|
"eval_nll_loss": 0.4803001582622528, |
|
"eval_rewards/accuracies": 0.6329365372657776, |
|
"eval_rewards/chosen": -0.040990639477968216, |
|
"eval_rewards/margins": 0.011384249664843082, |
|
"eval_rewards/rejected": -0.05237489193677902, |
|
"eval_runtime": 136.2293, |
|
"eval_samples_per_second": 14.637, |
|
"eval_steps_per_second": 0.462, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8495018353434715, |
|
"grad_norm": 1.919736952774634, |
|
"learning_rate": 7.027283689263066e-06, |
|
"log_odds_chosen": 0.3574589788913727, |
|
"log_odds_ratio": -0.6265517473220825, |
|
"logits/chosen": -3.0922906398773193, |
|
"logits/rejected": -3.093270778656006, |
|
"logps/chosen": -0.8058309555053711, |
|
"logps/rejected": -1.0188381671905518, |
|
"loss": 0.5132, |
|
"nll_loss": 0.4754185676574707, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.04029155150055885, |
|
"rewards/margins": 0.010650361888110638, |
|
"rewards/rejected": -0.050941914319992065, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.8599895123230205, |
|
"grad_norm": 2.3619475771455214, |
|
"learning_rate": 6.984302957695783e-06, |
|
"log_odds_chosen": 0.2932414412498474, |
|
"log_odds_ratio": -0.6586158275604248, |
|
"logits/chosen": -3.0357770919799805, |
|
"logits/rejected": -3.0360379219055176, |
|
"logps/chosen": -0.842557430267334, |
|
"logps/rejected": -1.0188366174697876, |
|
"loss": 0.505, |
|
"nll_loss": 0.4280059337615967, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.04212787002325058, |
|
"rewards/margins": 0.008813952095806599, |
|
"rewards/rejected": -0.0509418249130249, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8704771893025695, |
|
"grad_norm": 2.3824306185771267, |
|
"learning_rate": 6.942101345006233e-06, |
|
"log_odds_chosen": 0.2479257881641388, |
|
"log_odds_ratio": -0.702430248260498, |
|
"logits/chosen": -3.008411407470703, |
|
"logits/rejected": -3.05663800239563, |
|
"logps/chosen": -0.853378415107727, |
|
"logps/rejected": -1.0239073038101196, |
|
"loss": 0.5248, |
|
"nll_loss": 0.4657117426395416, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.04266892373561859, |
|
"rewards/margins": 0.00852644257247448, |
|
"rewards/rejected": -0.05119536444544792, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8809648662821186, |
|
"grad_norm": 1.9624325890421999, |
|
"learning_rate": 6.900655593423542e-06, |
|
"log_odds_chosen": 0.2082471400499344, |
|
"log_odds_ratio": -0.6889498233795166, |
|
"logits/chosen": -3.040546178817749, |
|
"logits/rejected": -3.0660147666931152, |
|
"logps/chosen": -0.8756462931632996, |
|
"logps/rejected": -1.0124717950820923, |
|
"loss": 0.5137, |
|
"nll_loss": 0.4855361580848694, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.043782319873571396, |
|
"rewards/margins": 0.006841268390417099, |
|
"rewards/rejected": -0.050623588263988495, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8914525432616676, |
|
"grad_norm": 2.0144554917595756, |
|
"learning_rate": 6.859943405700353e-06, |
|
"log_odds_chosen": 0.3205421566963196, |
|
"log_odds_ratio": -0.6371484994888306, |
|
"logits/chosen": -3.054384231567383, |
|
"logits/rejected": -3.0986409187316895, |
|
"logps/chosen": -0.8319618105888367, |
|
"logps/rejected": -1.0313116312026978, |
|
"loss": 0.5044, |
|
"nll_loss": 0.4881317615509033, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.041598085314035416, |
|
"rewards/margins": 0.009967491030693054, |
|
"rewards/rejected": -0.05156558007001877, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9019402202412166, |
|
"grad_norm": 1.9341957217840544, |
|
"learning_rate": 6.819943394704736e-06, |
|
"log_odds_chosen": 0.26728707551956177, |
|
"log_odds_ratio": -0.6747015714645386, |
|
"logits/chosen": -3.0936527252197266, |
|
"logits/rejected": -3.1073575019836426, |
|
"logps/chosen": -0.8353049159049988, |
|
"logps/rejected": -1.0224361419677734, |
|
"loss": 0.5278, |
|
"nll_loss": 0.4731883108615875, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0417652502655983, |
|
"rewards/margins": 0.009356559254229069, |
|
"rewards/rejected": -0.05112180858850479, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9124278972207656, |
|
"grad_norm": 5.30319924106792, |
|
"learning_rate": 6.780635036208105e-06, |
|
"log_odds_chosen": 0.30106544494628906, |
|
"log_odds_ratio": -0.6683878898620605, |
|
"logits/chosen": -3.097151279449463, |
|
"logits/rejected": -3.1499500274658203, |
|
"logps/chosen": -0.867012619972229, |
|
"logps/rejected": -1.0790386199951172, |
|
"loss": 0.4933, |
|
"nll_loss": 0.48347124457359314, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.04335063695907593, |
|
"rewards/margins": 0.010601297952234745, |
|
"rewards/rejected": -0.0539519302546978, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9229155742003147, |
|
"grad_norm": 1.6208302885778367, |
|
"learning_rate": 6.741998624632421e-06, |
|
"log_odds_chosen": 0.29186171293258667, |
|
"log_odds_ratio": -0.6591932773590088, |
|
"logits/chosen": -3.15583872795105, |
|
"logits/rejected": -3.168064594268799, |
|
"logps/chosen": -0.8187226057052612, |
|
"logps/rejected": -1.0049909353256226, |
|
"loss": 0.4887, |
|
"nll_loss": 0.4384452700614929, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04093613475561142, |
|
"rewards/margins": 0.009313413873314857, |
|
"rewards/rejected": -0.05024954676628113, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9334032511798637, |
|
"grad_norm": 1.7707391073712173, |
|
"learning_rate": 6.70401523153991e-06, |
|
"log_odds_chosen": 0.33703380823135376, |
|
"log_odds_ratio": -0.6459982991218567, |
|
"logits/chosen": -3.1340742111206055, |
|
"logits/rejected": -3.157071590423584, |
|
"logps/chosen": -0.8063561320304871, |
|
"logps/rejected": -0.9982324838638306, |
|
"loss": 0.4931, |
|
"nll_loss": 0.4631246030330658, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.040317803621292114, |
|
"rewards/margins": 0.009593818336725235, |
|
"rewards/rejected": -0.04991162568330765, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.9438909281594127, |
|
"grad_norm": 2.341682439233393, |
|
"learning_rate": 6.666666666666667e-06, |
|
"log_odds_chosen": 0.26426905393600464, |
|
"log_odds_ratio": -0.6637164354324341, |
|
"logits/chosen": -3.1100411415100098, |
|
"logits/rejected": -3.130826473236084, |
|
"logps/chosen": -0.7806347012519836, |
|
"logps/rejected": -0.9385608434677124, |
|
"loss": 0.5239, |
|
"nll_loss": 0.4659123420715332, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.039031732827425, |
|
"rewards/margins": 0.00789631437510252, |
|
"rewards/rejected": -0.0469280444085598, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9438909281594127, |
|
"eval_log_odds_chosen": 0.32679569721221924, |
|
"eval_log_odds_ratio": -0.6327584385871887, |
|
"eval_logits/chosen": -3.117077112197876, |
|
"eval_logits/rejected": -3.119086742401123, |
|
"eval_logps/chosen": -0.8044511079788208, |
|
"eval_logps/rejected": -1.0129274129867554, |
|
"eval_loss": 0.5086367726325989, |
|
"eval_nll_loss": 0.4747697710990906, |
|
"eval_rewards/accuracies": 0.6309523582458496, |
|
"eval_rewards/chosen": -0.04022255912423134, |
|
"eval_rewards/margins": 0.010423817671835423, |
|
"eval_rewards/rejected": -0.05064636468887329, |
|
"eval_runtime": 137.5576, |
|
"eval_samples_per_second": 14.496, |
|
"eval_steps_per_second": 0.458, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9543786051389617, |
|
"grad_norm": 2.0533389896159213, |
|
"learning_rate": 6.629935441317959e-06, |
|
"log_odds_chosen": 0.4754648208618164, |
|
"log_odds_ratio": -0.6232188940048218, |
|
"logits/chosen": -3.073176622390747, |
|
"logits/rejected": -3.084963321685791, |
|
"logps/chosen": -0.828788161277771, |
|
"logps/rejected": -1.1443804502487183, |
|
"loss": 0.5142, |
|
"nll_loss": 0.46652156114578247, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04143941029906273, |
|
"rewards/margins": 0.015779614448547363, |
|
"rewards/rejected": -0.05721902847290039, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9648662821185108, |
|
"grad_norm": 2.138448059862142, |
|
"learning_rate": 6.593804733957872e-06, |
|
"log_odds_chosen": 0.32768282294273376, |
|
"log_odds_ratio": -0.6431117057800293, |
|
"logits/chosen": -3.038576364517212, |
|
"logits/rejected": -3.061370372772217, |
|
"logps/chosen": -0.7864677906036377, |
|
"logps/rejected": -0.9946994781494141, |
|
"loss": 0.4836, |
|
"nll_loss": 0.43025264143943787, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03932339325547218, |
|
"rewards/margins": 0.010411588475108147, |
|
"rewards/rejected": -0.04973498359322548, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9753539590980598, |
|
"grad_norm": 2.1602863053901413, |
|
"learning_rate": 6.55825835783953e-06, |
|
"log_odds_chosen": 0.2050061970949173, |
|
"log_odds_ratio": -0.6868597269058228, |
|
"logits/chosen": -3.0544333457946777, |
|
"logits/rejected": -3.066739797592163, |
|
"logps/chosen": -0.8742432594299316, |
|
"logps/rejected": -1.0194706916809082, |
|
"loss": 0.5136, |
|
"nll_loss": 0.5241981744766235, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.04371216148138046, |
|
"rewards/margins": 0.007261371705681086, |
|
"rewards/rejected": -0.05097353458404541, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9858416360776088, |
|
"grad_norm": 1.9215491222233851, |
|
"learning_rate": 6.523280730534423e-06, |
|
"log_odds_chosen": 0.23041269183158875, |
|
"log_odds_ratio": -0.6992384195327759, |
|
"logits/chosen": -3.0867247581481934, |
|
"logits/rejected": -3.0779662132263184, |
|
"logps/chosen": -0.7768861651420593, |
|
"logps/rejected": -0.9184977412223816, |
|
"loss": 0.5102, |
|
"nll_loss": 0.4776674211025238, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.038844309747219086, |
|
"rewards/margins": 0.0070805782452225685, |
|
"rewards/rejected": -0.045924894511699677, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9963293130571579, |
|
"grad_norm": 2.1983436102574547, |
|
"learning_rate": 6.488856845230502e-06, |
|
"log_odds_chosen": 0.25244003534317017, |
|
"log_odds_ratio": -0.6911928653717041, |
|
"logits/chosen": -3.0215468406677246, |
|
"logits/rejected": -3.0374438762664795, |
|
"logps/chosen": -0.8648554682731628, |
|
"logps/rejected": -1.0236364603042603, |
|
"loss": 0.5385, |
|
"nll_loss": 0.5036488175392151, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.04324277862906456, |
|
"rewards/margins": 0.00793905183672905, |
|
"rewards/rejected": -0.05118182301521301, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.0068169900367068, |
|
"grad_norm": 2.2724469008271773, |
|
"learning_rate": 6.4549722436790284e-06, |
|
"log_odds_chosen": 1.0400245189666748, |
|
"log_odds_ratio": -0.42517581582069397, |
|
"logits/chosen": -3.0371384620666504, |
|
"logits/rejected": -3.0435400009155273, |
|
"logps/chosen": -0.5974615812301636, |
|
"logps/rejected": -1.1842448711395264, |
|
"loss": 0.3929, |
|
"nll_loss": 0.40045398473739624, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.02987307868897915, |
|
"rewards/margins": 0.02933916449546814, |
|
"rewards/rejected": -0.05921224504709244, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.017304667016256, |
|
"grad_norm": 2.0168885022396372, |
|
"learning_rate": 6.421612990679356e-06, |
|
"log_odds_chosen": 1.6284434795379639, |
|
"log_odds_ratio": -0.2502659857273102, |
|
"logits/chosen": -3.080873727798462, |
|
"logits/rejected": -3.070159912109375, |
|
"logps/chosen": -0.4285094141960144, |
|
"logps/rejected": -1.2745321989059448, |
|
"loss": 0.2923, |
|
"nll_loss": 0.28497669100761414, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.02142546884715557, |
|
"rewards/margins": 0.04230114072561264, |
|
"rewards/rejected": -0.06372661143541336, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.027792343995805, |
|
"grad_norm": 1.9662869053425782, |
|
"learning_rate": 6.3887656499994e-06, |
|
"log_odds_chosen": 1.8482691049575806, |
|
"log_odds_ratio": -0.21383436024188995, |
|
"logits/chosen": -3.071471929550171, |
|
"logits/rejected": -3.079923391342163, |
|
"logps/chosen": -0.43078216910362244, |
|
"logps/rejected": -1.4107215404510498, |
|
"loss": 0.3019, |
|
"nll_loss": 0.3140898644924164, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.021539105102419853, |
|
"rewards/margins": 0.04899696633219719, |
|
"rewards/rejected": -0.0705360695719719, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.038280020975354, |
|
"grad_norm": 1.9845582869348006, |
|
"learning_rate": 6.356417261637282e-06, |
|
"log_odds_chosen": 1.6627075672149658, |
|
"log_odds_ratio": -0.2610566318035126, |
|
"logits/chosen": -2.9875268936157227, |
|
"logits/rejected": -2.9876785278320312, |
|
"logps/chosen": -0.4378105103969574, |
|
"logps/rejected": -1.3178083896636963, |
|
"loss": 0.296, |
|
"nll_loss": 0.27773916721343994, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.02189052477478981, |
|
"rewards/margins": 0.04399988800287247, |
|
"rewards/rejected": -0.06589041650295258, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.048767697954903, |
|
"grad_norm": 2.0942478813902783, |
|
"learning_rate": 6.324555320336759e-06, |
|
"log_odds_chosen": 1.9041988849639893, |
|
"log_odds_ratio": -0.20684988796710968, |
|
"logits/chosen": -2.9869093894958496, |
|
"logits/rejected": -3.029050588607788, |
|
"logps/chosen": -0.4077525734901428, |
|
"logps/rejected": -1.3952513933181763, |
|
"loss": 0.2888, |
|
"nll_loss": 0.2748258709907532, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.02038763090968132, |
|
"rewards/margins": 0.04937494546175003, |
|
"rewards/rejected": -0.06976256519556046, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.048767697954903, |
|
"eval_log_odds_chosen": 0.37935417890548706, |
|
"eval_log_odds_ratio": -0.6318228840827942, |
|
"eval_logits/chosen": -3.0189764499664307, |
|
"eval_logits/rejected": -3.0171284675598145, |
|
"eval_logps/chosen": -0.8724088072776794, |
|
"eval_logps/rejected": -1.112794280052185, |
|
"eval_loss": 0.5400179028511047, |
|
"eval_nll_loss": 0.5058131814002991, |
|
"eval_rewards/accuracies": 0.6428571343421936, |
|
"eval_rewards/chosen": -0.04362044483423233, |
|
"eval_rewards/margins": 0.012019270099699497, |
|
"eval_rewards/rejected": -0.05563971400260925, |
|
"eval_runtime": 136.9938, |
|
"eval_samples_per_second": 14.555, |
|
"eval_steps_per_second": 0.46, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.059255374934452, |
|
"grad_norm": 1.8526210480251912, |
|
"learning_rate": 6.2931677552755265e-06, |
|
"log_odds_chosen": 1.7620799541473389, |
|
"log_odds_ratio": -0.23190836608409882, |
|
"logits/chosen": -3.0539023876190186, |
|
"logits/rejected": -3.0629706382751465, |
|
"logps/chosen": -0.43785715103149414, |
|
"logps/rejected": -1.3722269535064697, |
|
"loss": 0.2859, |
|
"nll_loss": 0.2769049108028412, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.021892856806516647, |
|
"rewards/margins": 0.04671848937869072, |
|
"rewards/rejected": -0.06861135363578796, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.069743051914001, |
|
"grad_norm": 2.017775428059147, |
|
"learning_rate": 6.262242910851496e-06, |
|
"log_odds_chosen": 1.7232574224472046, |
|
"log_odds_ratio": -0.22979629039764404, |
|
"logits/chosen": -3.0019690990448, |
|
"logits/rejected": -3.0224807262420654, |
|
"logps/chosen": -0.4002920091152191, |
|
"logps/rejected": -1.3048107624053955, |
|
"loss": 0.2894, |
|
"nll_loss": 0.2588661015033722, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.020014600828289986, |
|
"rewards/margins": 0.04522594064474106, |
|
"rewards/rejected": -0.0652405396103859, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.08023072889355, |
|
"grad_norm": 2.1656896077764, |
|
"learning_rate": 6.231769528497559e-06, |
|
"log_odds_chosen": 1.7999454736709595, |
|
"log_odds_ratio": -0.23009638488292694, |
|
"logits/chosen": -3.0344815254211426, |
|
"logits/rejected": -3.0285098552703857, |
|
"logps/chosen": -0.42475366592407227, |
|
"logps/rejected": -1.3811571598052979, |
|
"loss": 0.2779, |
|
"nll_loss": 0.26928776502609253, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.021237684413790703, |
|
"rewards/margins": 0.04782017320394516, |
|
"rewards/rejected": -0.06905786693096161, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.0907184058730992, |
|
"grad_norm": 1.8893124181143397, |
|
"learning_rate": 6.2017367294604225e-06, |
|
"log_odds_chosen": 1.7361199855804443, |
|
"log_odds_ratio": -0.2356552630662918, |
|
"logits/chosen": -2.9798855781555176, |
|
"logits/rejected": -3.012021780014038, |
|
"logps/chosen": -0.4087589383125305, |
|
"logps/rejected": -1.318456768989563, |
|
"loss": 0.2848, |
|
"nll_loss": 0.2693423926830292, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.020437946543097496, |
|
"rewards/margins": 0.045484889298677444, |
|
"rewards/rejected": -0.06592283397912979, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.1012060828526482, |
|
"grad_norm": 1.998285617344112, |
|
"learning_rate": 6.172133998483677e-06, |
|
"log_odds_chosen": 1.989933967590332, |
|
"log_odds_ratio": -0.2104463130235672, |
|
"logits/chosen": -2.9669861793518066, |
|
"logits/rejected": -2.992997169494629, |
|
"logps/chosen": -0.4091659486293793, |
|
"logps/rejected": -1.4872965812683105, |
|
"loss": 0.2793, |
|
"nll_loss": 0.24384136497974396, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.020458297803997993, |
|
"rewards/margins": 0.05390653759241104, |
|
"rewards/rejected": -0.07436482608318329, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.1116937598321972, |
|
"grad_norm": 1.99753785316238, |
|
"learning_rate": 6.142951168339513e-06, |
|
"log_odds_chosen": 1.7905690670013428, |
|
"log_odds_ratio": -0.2465437948703766, |
|
"logits/chosen": -2.9944257736206055, |
|
"logits/rejected": -2.988699436187744, |
|
"logps/chosen": -0.41175705194473267, |
|
"logps/rejected": -1.3037220239639282, |
|
"loss": 0.2828, |
|
"nll_loss": 0.2829252779483795, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.020587850362062454, |
|
"rewards/margins": 0.04459824413061142, |
|
"rewards/rejected": -0.06518609821796417, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.1221814368117462, |
|
"grad_norm": 2.0944607329795666, |
|
"learning_rate": 6.114178405157431e-06, |
|
"log_odds_chosen": 1.972241759300232, |
|
"log_odds_ratio": -0.202741339802742, |
|
"logits/chosen": -2.9314074516296387, |
|
"logits/rejected": -2.943037271499634, |
|
"logps/chosen": -0.39666005969047546, |
|
"logps/rejected": -1.4398232698440552, |
|
"loss": 0.2869, |
|
"nll_loss": 0.26206424832344055, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.019833002239465714, |
|
"rewards/margins": 0.0521581657230854, |
|
"rewards/rejected": -0.07199116796255112, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.1326691137912952, |
|
"grad_norm": 2.082309850512046, |
|
"learning_rate": 6.0858061945018455e-06, |
|
"log_odds_chosen": 1.9569040536880493, |
|
"log_odds_ratio": -0.20189175009727478, |
|
"logits/chosen": -2.9233288764953613, |
|
"logits/rejected": -2.953047275543213, |
|
"logps/chosen": -0.4349672198295593, |
|
"logps/rejected": -1.479813814163208, |
|
"loss": 0.286, |
|
"nll_loss": 0.25732284784317017, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.021748360246419907, |
|
"rewards/margins": 0.052242327481508255, |
|
"rewards/rejected": -0.07399068772792816, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.1431567907708442, |
|
"grad_norm": 1.977872551014816, |
|
"learning_rate": 6.0578253281538265e-06, |
|
"log_odds_chosen": 1.8792686462402344, |
|
"log_odds_ratio": -0.23301272094249725, |
|
"logits/chosen": -2.9573769569396973, |
|
"logits/rejected": -2.968686103820801, |
|
"logps/chosen": -0.3683982789516449, |
|
"logps/rejected": -1.286027431488037, |
|
"loss": 0.2841, |
|
"nll_loss": 0.26943594217300415, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.018419915810227394, |
|
"rewards/margins": 0.04588145762681961, |
|
"rewards/rejected": -0.06430138647556305, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.1536444677503932, |
|
"grad_norm": 2.2874664942911984, |
|
"learning_rate": 6.030226891555273e-06, |
|
"log_odds_chosen": 1.744699239730835, |
|
"log_odds_ratio": -0.2575313448905945, |
|
"logits/chosen": -3.0328478813171387, |
|
"logits/rejected": -3.0531229972839355, |
|
"logps/chosen": -0.4480053782463074, |
|
"logps/rejected": -1.409203290939331, |
|
"loss": 0.29, |
|
"nll_loss": 0.2910405397415161, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.022400271147489548, |
|
"rewards/margins": 0.04805989935994148, |
|
"rewards/rejected": -0.07046017050743103, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.1536444677503932, |
|
"eval_log_odds_chosen": 0.4246710240840912, |
|
"eval_log_odds_ratio": -0.6255837082862854, |
|
"eval_logits/chosen": -3.002875804901123, |
|
"eval_logits/rejected": -3.0027201175689697, |
|
"eval_logps/chosen": -0.8736297488212585, |
|
"eval_logps/rejected": -1.1487443447113037, |
|
"eval_loss": 0.5385290384292603, |
|
"eval_nll_loss": 0.5041735172271729, |
|
"eval_rewards/accuracies": 0.64682537317276, |
|
"eval_rewards/chosen": -0.04368148371577263, |
|
"eval_rewards/margins": 0.013755732215940952, |
|
"eval_rewards/rejected": -0.057437218725681305, |
|
"eval_runtime": 136.8823, |
|
"eval_samples_per_second": 14.567, |
|
"eval_steps_per_second": 0.46, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.1641321447299422, |
|
"grad_norm": 1.8147231314332177, |
|
"learning_rate": 6.003002251876643e-06, |
|
"log_odds_chosen": 1.8075166940689087, |
|
"log_odds_ratio": -0.2281859815120697, |
|
"logits/chosen": -2.965421199798584, |
|
"logits/rejected": -3.0172793865203857, |
|
"logps/chosen": -0.44597238302230835, |
|
"logps/rejected": -1.4203885793685913, |
|
"loss": 0.2891, |
|
"nll_loss": 0.2668479084968567, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.022298619151115417, |
|
"rewards/margins": 0.04872080683708191, |
|
"rewards/rejected": -0.07101943343877792, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.1746198217094914, |
|
"grad_norm": 1.9969430269469466, |
|
"learning_rate": 5.976143046671968e-06, |
|
"log_odds_chosen": 1.7478984594345093, |
|
"log_odds_ratio": -0.22862455248832703, |
|
"logits/chosen": -3.0243489742279053, |
|
"logits/rejected": -3.0321333408355713, |
|
"logps/chosen": -0.40696269273757935, |
|
"logps/rejected": -1.2988313436508179, |
|
"loss": 0.2927, |
|
"nll_loss": 0.27604612708091736, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.02034812793135643, |
|
"rewards/margins": 0.04459343105554581, |
|
"rewards/rejected": -0.06494157016277313, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.1851074986890404, |
|
"grad_norm": 2.1896703421371275, |
|
"learning_rate": 5.949641173087296e-06, |
|
"log_odds_chosen": 2.048767566680908, |
|
"log_odds_ratio": -0.20188426971435547, |
|
"logits/chosen": -2.9657158851623535, |
|
"logits/rejected": -2.977405309677124, |
|
"logps/chosen": -0.38311532139778137, |
|
"logps/rejected": -1.454978108406067, |
|
"loss": 0.2825, |
|
"nll_loss": 0.2597211003303528, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.01915576681494713, |
|
"rewards/margins": 0.05359314754605293, |
|
"rewards/rejected": -0.07274890691041946, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.1955951756685894, |
|
"grad_norm": 1.8856822247943528, |
|
"learning_rate": 5.923488777590924e-06, |
|
"log_odds_chosen": 1.9368520975112915, |
|
"log_odds_ratio": -0.21634550392627716, |
|
"logits/chosen": -3.009665012359619, |
|
"logits/rejected": -3.0066471099853516, |
|
"logps/chosen": -0.412930428981781, |
|
"logps/rejected": -1.4850547313690186, |
|
"loss": 0.2786, |
|
"nll_loss": 0.28015536069869995, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.02064652182161808, |
|
"rewards/margins": 0.05360621213912964, |
|
"rewards/rejected": -0.07425273954868317, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.2060828526481384, |
|
"grad_norm": 2.2165729739830233, |
|
"learning_rate": 5.897678246195886e-06, |
|
"log_odds_chosen": 1.9798767566680908, |
|
"log_odds_ratio": -0.19855430722236633, |
|
"logits/chosen": -2.9805493354797363, |
|
"logits/rejected": -2.9919371604919434, |
|
"logps/chosen": -0.38313865661621094, |
|
"logps/rejected": -1.3864378929138184, |
|
"loss": 0.2909, |
|
"nll_loss": 0.27790573239326477, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.019156932830810547, |
|
"rewards/margins": 0.05016495659947395, |
|
"rewards/rejected": -0.0693218931555748, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.2165705296276874, |
|
"grad_norm": 2.8337045840850497, |
|
"learning_rate": 5.8722021951470355e-06, |
|
"log_odds_chosen": 1.7361915111541748, |
|
"log_odds_ratio": -0.24711327254772186, |
|
"logits/chosen": -2.966083288192749, |
|
"logits/rejected": -2.9842519760131836, |
|
"logps/chosen": -0.4412474036216736, |
|
"logps/rejected": -1.3824529647827148, |
|
"loss": 0.2781, |
|
"nll_loss": 0.2754039466381073, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.02206237055361271, |
|
"rewards/margins": 0.0470602810382843, |
|
"rewards/rejected": -0.06912264972925186, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.2270582066072364, |
|
"grad_norm": 1.7729938432799273, |
|
"learning_rate": 5.847053462046862e-06, |
|
"log_odds_chosen": 1.7805134057998657, |
|
"log_odds_ratio": -0.23545412719249725, |
|
"logits/chosen": -3.0085816383361816, |
|
"logits/rejected": -3.003875494003296, |
|
"logps/chosen": -0.4123718738555908, |
|
"logps/rejected": -1.3221479654312134, |
|
"loss": 0.2829, |
|
"nll_loss": 0.2879020869731903, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.02061859332025051, |
|
"rewards/margins": 0.045488808304071426, |
|
"rewards/rejected": -0.06610739976167679, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.2375458835867854, |
|
"grad_norm": 2.2169036925519454, |
|
"learning_rate": 5.822225097395821e-06, |
|
"log_odds_chosen": 1.9844211339950562, |
|
"log_odds_ratio": -0.1866404265165329, |
|
"logits/chosen": -2.9880988597869873, |
|
"logits/rejected": -3.0081310272216797, |
|
"logps/chosen": -0.3858886957168579, |
|
"logps/rejected": -1.3924882411956787, |
|
"loss": 0.2873, |
|
"nll_loss": 0.25162869691848755, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.019294437021017075, |
|
"rewards/margins": 0.05032998323440552, |
|
"rewards/rejected": -0.06962442398071289, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.2480335605663346, |
|
"grad_norm": 2.1614361138819045, |
|
"learning_rate": 5.797710356524486e-06, |
|
"log_odds_chosen": 1.8616158962249756, |
|
"log_odds_ratio": -0.22632256150245667, |
|
"logits/chosen": -3.0017178058624268, |
|
"logits/rejected": -3.0013363361358643, |
|
"logps/chosen": -0.4442955553531647, |
|
"logps/rejected": -1.4363129138946533, |
|
"loss": 0.2867, |
|
"nll_loss": 0.289310485124588, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.022214777767658234, |
|
"rewards/margins": 0.04960086941719055, |
|
"rewards/rejected": -0.07181564718484879, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.2585212375458836, |
|
"grad_norm": 2.0470229728313494, |
|
"learning_rate": 5.773502691896259e-06, |
|
"log_odds_chosen": 1.8614075183868408, |
|
"log_odds_ratio": -0.2429337054491043, |
|
"logits/chosen": -2.9596099853515625, |
|
"logits/rejected": -2.9728147983551025, |
|
"logps/chosen": -0.44122061133384705, |
|
"logps/rejected": -1.4644559621810913, |
|
"loss": 0.2826, |
|
"nll_loss": 0.2614334225654602, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.022061031311750412, |
|
"rewards/margins": 0.051161766052246094, |
|
"rewards/rejected": -0.0732228010892868, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.2585212375458836, |
|
"eval_log_odds_chosen": 0.4214767515659332, |
|
"eval_log_odds_ratio": -0.6254101991653442, |
|
"eval_logits/chosen": -2.9582858085632324, |
|
"eval_logits/rejected": -2.96195912361145, |
|
"eval_logps/chosen": -0.8853804469108582, |
|
"eval_logps/rejected": -1.162561058998108, |
|
"eval_loss": 0.5427829027175903, |
|
"eval_nll_loss": 0.5084435939788818, |
|
"eval_rewards/accuracies": 0.6428571343421936, |
|
"eval_rewards/chosen": -0.04426902160048485, |
|
"eval_rewards/margins": 0.013859033584594727, |
|
"eval_rewards/rejected": -0.05812805891036987, |
|
"eval_runtime": 137.2006, |
|
"eval_samples_per_second": 14.533, |
|
"eval_steps_per_second": 0.459, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.2690089145254326, |
|
"grad_norm": 2.3388472125063946, |
|
"learning_rate": 5.749595745760691e-06, |
|
"log_odds_chosen": 1.858030080795288, |
|
"log_odds_ratio": -0.21272964775562286, |
|
"logits/chosen": -2.996577739715576, |
|
"logits/rejected": -3.0146660804748535, |
|
"logps/chosen": -0.4070938229560852, |
|
"logps/rejected": -1.3386101722717285, |
|
"loss": 0.2988, |
|
"nll_loss": 0.292961448431015, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.02035469003021717, |
|
"rewards/margins": 0.04657582566142082, |
|
"rewards/rejected": -0.06693051755428314, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.2794965915049816, |
|
"grad_norm": 1.9762440493042526, |
|
"learning_rate": 5.725983343138682e-06, |
|
"log_odds_chosen": 1.7544046640396118, |
|
"log_odds_ratio": -0.22841353714466095, |
|
"logits/chosen": -2.9734439849853516, |
|
"logits/rejected": -2.9992988109588623, |
|
"logps/chosen": -0.42544227838516235, |
|
"logps/rejected": -1.3273015022277832, |
|
"loss": 0.295, |
|
"nll_loss": 0.28989139199256897, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.021272115409374237, |
|
"rewards/margins": 0.04509295895695686, |
|
"rewards/rejected": -0.0663650780916214, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.2899842684845306, |
|
"grad_norm": 2.230074491318477, |
|
"learning_rate": 5.702659485122011e-06, |
|
"log_odds_chosen": 1.929265022277832, |
|
"log_odds_ratio": -0.20951807498931885, |
|
"logits/chosen": -2.9871158599853516, |
|
"logits/rejected": -2.993727207183838, |
|
"logps/chosen": -0.40125927329063416, |
|
"logps/rejected": -1.4160717725753784, |
|
"loss": 0.2653, |
|
"nll_loss": 0.23026029765605927, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.020062964409589767, |
|
"rewards/margins": 0.050740621984004974, |
|
"rewards/rejected": -0.07080359011888504, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.3004719454640796, |
|
"grad_norm": 1.9679461376203173, |
|
"learning_rate": 5.679618342470648e-06, |
|
"log_odds_chosen": 1.7371532917022705, |
|
"log_odds_ratio": -0.2242734134197235, |
|
"logits/chosen": -3.0132291316986084, |
|
"logits/rejected": -3.0433402061462402, |
|
"logps/chosen": -0.413210391998291, |
|
"logps/rejected": -1.3000330924987793, |
|
"loss": 0.2804, |
|
"nll_loss": 0.29589781165122986, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.0206605214625597, |
|
"rewards/margins": 0.04434113949537277, |
|
"rewards/rejected": -0.06500165909528732, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.3109596224436286, |
|
"grad_norm": 2.617277483095543, |
|
"learning_rate": 5.656854249492381e-06, |
|
"log_odds_chosen": 1.814679741859436, |
|
"log_odds_ratio": -0.22298629581928253, |
|
"logits/chosen": -2.996896266937256, |
|
"logits/rejected": -3.0056145191192627, |
|
"logps/chosen": -0.42395251989364624, |
|
"logps/rejected": -1.3927456140518188, |
|
"loss": 0.2687, |
|
"nll_loss": 0.25607752799987793, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.021197626367211342, |
|
"rewards/margins": 0.04843965172767639, |
|
"rewards/rejected": -0.06963728368282318, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.3214472994231778, |
|
"grad_norm": 1.9773184888291742, |
|
"learning_rate": 5.63436169819011e-06, |
|
"log_odds_chosen": 1.8136640787124634, |
|
"log_odds_ratio": -0.24320077896118164, |
|
"logits/chosen": -2.966784954071045, |
|
"logits/rejected": -3.001746892929077, |
|
"logps/chosen": -0.45541706681251526, |
|
"logps/rejected": -1.3951488733291626, |
|
"loss": 0.2988, |
|
"nll_loss": 0.31274476647377014, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.022770855575799942, |
|
"rewards/margins": 0.046986598521471024, |
|
"rewards/rejected": -0.06975744664669037, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.3319349764027268, |
|
"grad_norm": 1.9140818928985086, |
|
"learning_rate": 5.612135332663138e-06, |
|
"log_odds_chosen": 1.953155755996704, |
|
"log_odds_ratio": -0.21717992424964905, |
|
"logits/chosen": -3.006328821182251, |
|
"logits/rejected": -3.037388324737549, |
|
"logps/chosen": -0.42650872468948364, |
|
"logps/rejected": -1.495060682296753, |
|
"loss": 0.272, |
|
"nll_loss": 0.2669217586517334, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.021325435489416122, |
|
"rewards/margins": 0.053427595645189285, |
|
"rewards/rejected": -0.074753038585186, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.3424226533822758, |
|
"grad_norm": 1.9500186785754579, |
|
"learning_rate": 5.590169943749475e-06, |
|
"log_odds_chosen": 1.8904393911361694, |
|
"log_odds_ratio": -0.2255454808473587, |
|
"logits/chosen": -2.989861011505127, |
|
"logits/rejected": -3.0198075771331787, |
|
"logps/chosen": -0.424043744802475, |
|
"logps/rejected": -1.4651858806610107, |
|
"loss": 0.2783, |
|
"nll_loss": 0.267769455909729, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.02120218798518181, |
|
"rewards/margins": 0.05205710977315903, |
|
"rewards/rejected": -0.07325930893421173, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.3529103303618248, |
|
"grad_norm": 1.9502765281924526, |
|
"learning_rate": 5.568460463897046e-06, |
|
"log_odds_chosen": 1.8929240703582764, |
|
"log_odds_ratio": -0.21857920289039612, |
|
"logits/chosen": -2.9535863399505615, |
|
"logits/rejected": -2.9874510765075684, |
|
"logps/chosen": -0.45026451349258423, |
|
"logps/rejected": -1.4960235357284546, |
|
"loss": 0.295, |
|
"nll_loss": 0.27629774808883667, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.022513221949338913, |
|
"rewards/margins": 0.052287958562374115, |
|
"rewards/rejected": -0.07480116933584213, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.3633980073413738, |
|
"grad_norm": 2.2093191033587223, |
|
"learning_rate": 5.547001962252292e-06, |
|
"log_odds_chosen": 1.7265195846557617, |
|
"log_odds_ratio": -0.23279574513435364, |
|
"logits/chosen": -2.9012649059295654, |
|
"logits/rejected": -2.9128100872039795, |
|
"logps/chosen": -0.4365314841270447, |
|
"logps/rejected": -1.3402652740478516, |
|
"loss": 0.2796, |
|
"nll_loss": 0.28851714730262756, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.021826574578881264, |
|
"rewards/margins": 0.04518669471144676, |
|
"rewards/rejected": -0.06701326370239258, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.3633980073413738, |
|
"eval_log_odds_chosen": 0.45076510310173035, |
|
"eval_log_odds_ratio": -0.6208177208900452, |
|
"eval_logits/chosen": -2.928496837615967, |
|
"eval_logits/rejected": -2.9256343841552734, |
|
"eval_logps/chosen": -0.8825219869613647, |
|
"eval_logps/rejected": -1.1770830154418945, |
|
"eval_loss": 0.5392885208129883, |
|
"eval_nll_loss": 0.5060464143753052, |
|
"eval_rewards/accuracies": 0.64682537317276, |
|
"eval_rewards/chosen": -0.044126104563474655, |
|
"eval_rewards/margins": 0.014728044159710407, |
|
"eval_rewards/rejected": -0.05885414779186249, |
|
"eval_runtime": 136.6608, |
|
"eval_samples_per_second": 14.591, |
|
"eval_steps_per_second": 0.461, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.3738856843209228, |
|
"grad_norm": 1.715926192038861, |
|
"learning_rate": 5.525789639955377e-06, |
|
"log_odds_chosen": 2.0803933143615723, |
|
"log_odds_ratio": -0.21633043885231018, |
|
"logits/chosen": -2.926987409591675, |
|
"logits/rejected": -2.9622962474823, |
|
"logps/chosen": -0.43519288301467896, |
|
"logps/rejected": -1.6533997058868408, |
|
"loss": 0.2713, |
|
"nll_loss": 0.26452213525772095, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.02175964042544365, |
|
"rewards/margins": 0.06091034412384033, |
|
"rewards/rejected": -0.08266998082399368, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.3843733613004718, |
|
"grad_norm": 2.0174814570503012, |
|
"learning_rate": 5.504818825631804e-06, |
|
"log_odds_chosen": 2.108902931213379, |
|
"log_odds_ratio": -0.1835678517818451, |
|
"logits/chosen": -2.96756911277771, |
|
"logits/rejected": -2.9531686305999756, |
|
"logps/chosen": -0.3781605362892151, |
|
"logps/rejected": -1.4976880550384521, |
|
"loss": 0.267, |
|
"nll_loss": 0.25148090720176697, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.018908025696873665, |
|
"rewards/margins": 0.055976372212171555, |
|
"rewards/rejected": -0.07488439977169037, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.394861038280021, |
|
"grad_norm": 2.317364085817375, |
|
"learning_rate": 5.484084971070817e-06, |
|
"log_odds_chosen": 1.9238555431365967, |
|
"log_odds_ratio": -0.2074807584285736, |
|
"logits/chosen": -2.923131227493286, |
|
"logits/rejected": -2.9520606994628906, |
|
"logps/chosen": -0.42446833848953247, |
|
"logps/rejected": -1.4086004495620728, |
|
"loss": 0.2852, |
|
"nll_loss": 0.28959181904792786, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.021223418414592743, |
|
"rewards/margins": 0.049206603318452835, |
|
"rewards/rejected": -0.07043002545833588, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.40534871525957, |
|
"grad_norm": 2.165975215343917, |
|
"learning_rate": 5.4635836470815305e-06, |
|
"log_odds_chosen": 1.8837333917617798, |
|
"log_odds_ratio": -0.21855314075946808, |
|
"logits/chosen": -2.9127135276794434, |
|
"logits/rejected": -2.9249043464660645, |
|
"logps/chosen": -0.41960373520851135, |
|
"logps/rejected": -1.4260175228118896, |
|
"loss": 0.2787, |
|
"nll_loss": 0.25244617462158203, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.020980186760425568, |
|
"rewards/margins": 0.050320692360401154, |
|
"rewards/rejected": -0.07130087912082672, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.415836392239119, |
|
"grad_norm": 1.9224928940953034, |
|
"learning_rate": 5.443310539518174e-06, |
|
"log_odds_chosen": 2.056159734725952, |
|
"log_odds_ratio": -0.19483168423175812, |
|
"logits/chosen": -2.956674814224243, |
|
"logits/rejected": -2.9572062492370605, |
|
"logps/chosen": -0.4208443760871887, |
|
"logps/rejected": -1.5285457372665405, |
|
"loss": 0.2822, |
|
"nll_loss": 0.26951080560684204, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.021042218431830406, |
|
"rewards/margins": 0.05538507178425789, |
|
"rewards/rejected": -0.07642728835344315, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.426324069218668, |
|
"grad_norm": 2.0115204434239025, |
|
"learning_rate": 5.423261445466404e-06, |
|
"log_odds_chosen": 1.707457184791565, |
|
"log_odds_ratio": -0.2479782998561859, |
|
"logits/chosen": -2.915250301361084, |
|
"logits/rejected": -2.9445343017578125, |
|
"logps/chosen": -0.4267791211605072, |
|
"logps/rejected": -1.3377535343170166, |
|
"loss": 0.2925, |
|
"nll_loss": 0.29825955629348755, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.02133895456790924, |
|
"rewards/margins": 0.04554871469736099, |
|
"rewards/rejected": -0.06688766926527023, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.436811746198217, |
|
"grad_norm": 2.0083912520624234, |
|
"learning_rate": 5.403432269582992e-06, |
|
"log_odds_chosen": 1.7433815002441406, |
|
"log_odds_ratio": -0.23284384608268738, |
|
"logits/chosen": -2.9682974815368652, |
|
"logits/rejected": -2.9809725284576416, |
|
"logps/chosen": -0.4545938968658447, |
|
"logps/rejected": -1.3821640014648438, |
|
"loss": 0.2995, |
|
"nll_loss": 0.2861328721046448, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.022729698568582535, |
|
"rewards/margins": 0.04637850075960159, |
|
"rewards/rejected": -0.06910820305347443, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.447299423177766, |
|
"grad_norm": 1.918494069287167, |
|
"learning_rate": 5.383819020581656e-06, |
|
"log_odds_chosen": 1.839255690574646, |
|
"log_odds_ratio": -0.22518055140972137, |
|
"logits/chosen": -2.9555628299713135, |
|
"logits/rejected": -2.968390703201294, |
|
"logps/chosen": -0.4370731711387634, |
|
"logps/rejected": -1.4699593782424927, |
|
"loss": 0.2859, |
|
"nll_loss": 0.28876128792762756, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.021853657439351082, |
|
"rewards/margins": 0.051644302904605865, |
|
"rewards/rejected": -0.0734979659318924, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.457787100157315, |
|
"grad_norm": 1.8701436058229068, |
|
"learning_rate": 5.364417807858201e-06, |
|
"log_odds_chosen": 2.0006766319274902, |
|
"log_odds_ratio": -0.19503512978553772, |
|
"logits/chosen": -2.9456233978271484, |
|
"logits/rejected": -2.9416487216949463, |
|
"logps/chosen": -0.397217720746994, |
|
"logps/rejected": -1.458070993423462, |
|
"loss": 0.2898, |
|
"nll_loss": 0.2990682125091553, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.01986088417470455, |
|
"rewards/margins": 0.05304265767335892, |
|
"rewards/rejected": -0.07290354371070862, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.4682747771368643, |
|
"grad_norm": 1.8947645182805886, |
|
"learning_rate": 5.345224838248489e-06, |
|
"log_odds_chosen": 1.9478137493133545, |
|
"log_odds_ratio": -0.22849062085151672, |
|
"logits/chosen": -2.9488446712493896, |
|
"logits/rejected": -2.980994462966919, |
|
"logps/chosen": -0.38306254148483276, |
|
"logps/rejected": -1.40244460105896, |
|
"loss": 0.2784, |
|
"nll_loss": 0.27079683542251587, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.019153129309415817, |
|
"rewards/margins": 0.05096910148859024, |
|
"rewards/rejected": -0.07012222707271576, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.4682747771368643, |
|
"eval_log_odds_chosen": 0.4410339295864105, |
|
"eval_log_odds_ratio": -0.6236060261726379, |
|
"eval_logits/chosen": -2.9594457149505615, |
|
"eval_logits/rejected": -2.9583115577697754, |
|
"eval_logps/chosen": -0.8884981274604797, |
|
"eval_logps/rejected": -1.1784039735794067, |
|
"eval_loss": 0.5364598631858826, |
|
"eval_nll_loss": 0.5036527514457703, |
|
"eval_rewards/accuracies": 0.6527777910232544, |
|
"eval_rewards/chosen": -0.044424910098314285, |
|
"eval_rewards/margins": 0.014495291747152805, |
|
"eval_rewards/rejected": -0.058920200914144516, |
|
"eval_runtime": 139.2595, |
|
"eval_samples_per_second": 14.319, |
|
"eval_steps_per_second": 0.452, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.4787624541164133, |
|
"grad_norm": 2.1665159464201142, |
|
"learning_rate": 5.326236412913075e-06, |
|
"log_odds_chosen": 1.7970411777496338, |
|
"log_odds_ratio": -0.2380552738904953, |
|
"logits/chosen": -2.9149088859558105, |
|
"logits/rejected": -2.9543135166168213, |
|
"logps/chosen": -0.4362480640411377, |
|
"logps/rejected": -1.3472230434417725, |
|
"loss": 0.29, |
|
"nll_loss": 0.2710421681404114, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.021812403574585915, |
|
"rewards/margins": 0.04554874822497368, |
|
"rewards/rejected": -0.06736114621162415, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.4892501310959623, |
|
"grad_norm": 2.196966160421767, |
|
"learning_rate": 5.307448924342753e-06, |
|
"log_odds_chosen": 1.8308820724487305, |
|
"log_odds_ratio": -0.21477296948432922, |
|
"logits/chosen": -2.877204179763794, |
|
"logits/rejected": -2.932901620864868, |
|
"logps/chosen": -0.4031652510166168, |
|
"logps/rejected": -1.3179484605789185, |
|
"loss": 0.2855, |
|
"nll_loss": 0.2783321738243103, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.02015826478600502, |
|
"rewards/margins": 0.04573915898799896, |
|
"rewards/rejected": -0.06589742004871368, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.4997378080755113, |
|
"grad_norm": 2.1884907491879084, |
|
"learning_rate": 5.28885885347945e-06, |
|
"log_odds_chosen": 1.9711707830429077, |
|
"log_odds_ratio": -0.20648148655891418, |
|
"logits/chosen": -2.954136371612549, |
|
"logits/rejected": -2.9814727306365967, |
|
"logps/chosen": -0.41374531388282776, |
|
"logps/rejected": -1.4304702281951904, |
|
"loss": 0.2924, |
|
"nll_loss": 0.27289509773254395, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.020687269046902657, |
|
"rewards/margins": 0.0508362352848053, |
|
"rewards/rejected": -0.071523517370224, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.5102254850550603, |
|
"grad_norm": 2.124176001387226, |
|
"learning_rate": 5.270462766947299e-06, |
|
"log_odds_chosen": 1.7731349468231201, |
|
"log_odds_ratio": -0.2392440289258957, |
|
"logits/chosen": -2.9405388832092285, |
|
"logits/rejected": -2.9464943408966064, |
|
"logps/chosen": -0.4539235234260559, |
|
"logps/rejected": -1.403793454170227, |
|
"loss": 0.2961, |
|
"nll_loss": 0.2940642237663269, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.022696174681186676, |
|
"rewards/margins": 0.04749349504709244, |
|
"rewards/rejected": -0.07018966972827911, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.5207131620346095, |
|
"grad_norm": 1.8197825407446042, |
|
"learning_rate": 5.252257314388902e-06, |
|
"log_odds_chosen": 1.7956994771957397, |
|
"log_odds_ratio": -0.22454524040222168, |
|
"logits/chosen": -2.954716444015503, |
|
"logits/rejected": -2.978447437286377, |
|
"logps/chosen": -0.4430459439754486, |
|
"logps/rejected": -1.4194531440734863, |
|
"loss": 0.2777, |
|
"nll_loss": 0.24652138352394104, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.02215229719877243, |
|
"rewards/margins": 0.048820365220308304, |
|
"rewards/rejected": -0.07097266614437103, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.5312008390141583, |
|
"grad_norm": 2.1915818543360355, |
|
"learning_rate": 5.234239225902137e-06, |
|
"log_odds_chosen": 1.9382715225219727, |
|
"log_odds_ratio": -0.1963178515434265, |
|
"logits/chosen": -2.8938894271850586, |
|
"logits/rejected": -2.924325466156006, |
|
"logps/chosen": -0.39880725741386414, |
|
"logps/rejected": -1.4752063751220703, |
|
"loss": 0.2971, |
|
"nll_loss": 0.2676003575325012, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.019940361380577087, |
|
"rewards/margins": 0.05381995439529419, |
|
"rewards/rejected": -0.07376032322645187, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.5416885159937075, |
|
"grad_norm": 2.1118618734250307, |
|
"learning_rate": 5.216405309573011e-06, |
|
"log_odds_chosen": 1.9139398336410522, |
|
"log_odds_ratio": -0.19271975755691528, |
|
"logits/chosen": -3.0117218494415283, |
|
"logits/rejected": -3.0411810874938965, |
|
"logps/chosen": -0.42149630188941956, |
|
"logps/rejected": -1.471760869026184, |
|
"loss": 0.2889, |
|
"nll_loss": 0.27934783697128296, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.0210748128592968, |
|
"rewards/margins": 0.052513234317302704, |
|
"rewards/rejected": -0.0735880434513092, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.5521761929732563, |
|
"grad_norm": 2.0510895547316745, |
|
"learning_rate": 5.198752449100364e-06, |
|
"log_odds_chosen": 2.0376482009887695, |
|
"log_odds_ratio": -0.19703765213489532, |
|
"logits/chosen": -3.009754180908203, |
|
"logits/rejected": -3.016758441925049, |
|
"logps/chosen": -0.40712347626686096, |
|
"logps/rejected": -1.459837555885315, |
|
"loss": 0.2888, |
|
"nll_loss": 0.3001149892807007, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.020356174558401108, |
|
"rewards/margins": 0.05263570696115494, |
|
"rewards/rejected": -0.07299187034368515, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.5626638699528055, |
|
"grad_norm": 2.1669568438399684, |
|
"learning_rate": 5.181277601508398e-06, |
|
"log_odds_chosen": 1.8304507732391357, |
|
"log_odds_ratio": -0.2394884079694748, |
|
"logits/chosen": -2.9779343605041504, |
|
"logits/rejected": -3.008795738220215, |
|
"logps/chosen": -0.4576667249202728, |
|
"logps/rejected": -1.4601542949676514, |
|
"loss": 0.2888, |
|
"nll_loss": 0.29476073384284973, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.0228833369910717, |
|
"rewards/margins": 0.05012437701225281, |
|
"rewards/rejected": -0.07300771772861481, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.5731515469323545, |
|
"grad_norm": 2.372050874462119, |
|
"learning_rate": 5.163977794943223e-06, |
|
"log_odds_chosen": 1.9750179052352905, |
|
"log_odds_ratio": -0.19530083239078522, |
|
"logits/chosen": -2.9395532608032227, |
|
"logits/rejected": -2.991283893585205, |
|
"logps/chosen": -0.42392611503601074, |
|
"logps/rejected": -1.5091795921325684, |
|
"loss": 0.2873, |
|
"nll_loss": 0.2818702757358551, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.021196305751800537, |
|
"rewards/margins": 0.0542626678943634, |
|
"rewards/rejected": -0.07545898109674454, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.5731515469323545, |
|
"eval_log_odds_chosen": 0.4364486038684845, |
|
"eval_log_odds_ratio": -0.6225508451461792, |
|
"eval_logits/chosen": -2.965731382369995, |
|
"eval_logits/rejected": -2.966355323791504, |
|
"eval_logps/chosen": -0.8718044757843018, |
|
"eval_logps/rejected": -1.158449649810791, |
|
"eval_loss": 0.53301602602005, |
|
"eval_nll_loss": 0.5004281997680664, |
|
"eval_rewards/accuracies": 0.6448412537574768, |
|
"eval_rewards/chosen": -0.043590229004621506, |
|
"eval_rewards/margins": 0.014332256279885769, |
|
"eval_rewards/rejected": -0.05792247876524925, |
|
"eval_runtime": 139.8515, |
|
"eval_samples_per_second": 14.258, |
|
"eval_steps_per_second": 0.45, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.5836392239119035, |
|
"grad_norm": 1.9123802783189798, |
|
"learning_rate": 5.146850126549788e-06, |
|
"log_odds_chosen": 1.6361440420150757, |
|
"log_odds_ratio": -0.26433151960372925, |
|
"logits/chosen": -2.943331003189087, |
|
"logits/rejected": -2.9721503257751465, |
|
"logps/chosen": -0.44553548097610474, |
|
"logps/rejected": -1.2933813333511353, |
|
"loss": 0.3044, |
|
"nll_loss": 0.2870228588581085, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.022276774048805237, |
|
"rewards/margins": 0.042392291128635406, |
|
"rewards/rejected": -0.06466906517744064, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.5941269008914527, |
|
"grad_norm": 1.9978617693896288, |
|
"learning_rate": 5.129891760425772e-06, |
|
"log_odds_chosen": 1.872454285621643, |
|
"log_odds_ratio": -0.21693451702594757, |
|
"logits/chosen": -2.9198169708251953, |
|
"logits/rejected": -2.9594712257385254, |
|
"logps/chosen": -0.4238964915275574, |
|
"logps/rejected": -1.4147742986679077, |
|
"loss": 0.2765, |
|
"nll_loss": 0.2593707740306854, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.021194826811552048, |
|
"rewards/margins": 0.049543894827365875, |
|
"rewards/rejected": -0.07073871791362762, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.6046145778710015, |
|
"grad_norm": 2.2358254561438966, |
|
"learning_rate": 5.113099925649136e-06, |
|
"log_odds_chosen": 1.7420718669891357, |
|
"log_odds_ratio": -0.2600535750389099, |
|
"logits/chosen": -2.9620399475097656, |
|
"logits/rejected": -2.997101068496704, |
|
"logps/chosen": -0.4705958366394043, |
|
"logps/rejected": -1.435579538345337, |
|
"loss": 0.2766, |
|
"nll_loss": 0.28323301672935486, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.023529794067144394, |
|
"rewards/margins": 0.048249177634716034, |
|
"rewards/rejected": -0.07177898287773132, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.6151022548505507, |
|
"grad_norm": 2.123071067312132, |
|
"learning_rate": 5.096471914376255e-06, |
|
"log_odds_chosen": 2.0446419715881348, |
|
"log_odds_ratio": -0.20973734557628632, |
|
"logits/chosen": -2.8849668502807617, |
|
"logits/rejected": -2.91094970703125, |
|
"logps/chosen": -0.42269793152809143, |
|
"logps/rejected": -1.4985077381134033, |
|
"loss": 0.2842, |
|
"nll_loss": 0.24874058365821838, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.021134894341230392, |
|
"rewards/margins": 0.053790487349033356, |
|
"rewards/rejected": -0.07492538541555405, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.6255899318300995, |
|
"grad_norm": 1.8574119456068037, |
|
"learning_rate": 5.08000508000762e-06, |
|
"log_odds_chosen": 1.8896774053573608, |
|
"log_odds_ratio": -0.2109728306531906, |
|
"logits/chosen": -2.9518914222717285, |
|
"logits/rejected": -2.9677398204803467, |
|
"logps/chosen": -0.42254775762557983, |
|
"logps/rejected": -1.4004069566726685, |
|
"loss": 0.2737, |
|
"nll_loss": 0.26676517724990845, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.02112739160656929, |
|
"rewards/margins": 0.04889295622706413, |
|
"rewards/rejected": -0.07002034783363342, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.6360776088096487, |
|
"grad_norm": 2.012947859419835, |
|
"learning_rate": 5.0636968354183334e-06, |
|
"log_odds_chosen": 1.7877776622772217, |
|
"log_odds_ratio": -0.2195170670747757, |
|
"logits/chosen": -2.916713237762451, |
|
"logits/rejected": -2.9442696571350098, |
|
"logps/chosen": -0.4229874610900879, |
|
"logps/rejected": -1.3620960712432861, |
|
"loss": 0.2937, |
|
"nll_loss": 0.28985968232154846, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.021149372681975365, |
|
"rewards/margins": 0.046955425292253494, |
|
"rewards/rejected": -0.06810478866100311, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.6465652857891977, |
|
"grad_norm": 1.9554610757973563, |
|
"learning_rate": 5.047544651250688e-06, |
|
"log_odds_chosen": 1.9977741241455078, |
|
"log_odds_ratio": -0.22808516025543213, |
|
"logits/chosen": -2.95414137840271, |
|
"logits/rejected": -2.9667911529541016, |
|
"logps/chosen": -0.40563470125198364, |
|
"logps/rejected": -1.493981122970581, |
|
"loss": 0.2746, |
|
"nll_loss": 0.25610029697418213, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.020281735807657242, |
|
"rewards/margins": 0.05441732332110405, |
|
"rewards/rejected": -0.07469905912876129, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.6570529627687467, |
|
"grad_norm": 2.2417227837369094, |
|
"learning_rate": 5.031546054266276e-06, |
|
"log_odds_chosen": 1.8591692447662354, |
|
"log_odds_ratio": -0.23143061995506287, |
|
"logits/chosen": -3.0023272037506104, |
|
"logits/rejected": -3.0128941535949707, |
|
"logps/chosen": -0.46788668632507324, |
|
"logps/rejected": -1.490392804145813, |
|
"loss": 0.2962, |
|
"nll_loss": 0.31111472845077515, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.02339433692395687, |
|
"rewards/margins": 0.05112530663609505, |
|
"rewards/rejected": -0.07451964914798737, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.667540639748296, |
|
"grad_norm": 2.0152925811378846, |
|
"learning_rate": 5.015698625755192e-06, |
|
"log_odds_chosen": 1.9612891674041748, |
|
"log_odds_ratio": -0.22349119186401367, |
|
"logits/chosen": -2.9373695850372314, |
|
"logits/rejected": -2.9659922122955322, |
|
"logps/chosen": -0.40127071738243103, |
|
"logps/rejected": -1.4479907751083374, |
|
"loss": 0.2939, |
|
"nll_loss": 0.2725040912628174, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.02006353810429573, |
|
"rewards/margins": 0.05233600735664368, |
|
"rewards/rejected": -0.07239954173564911, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.6780283167278447, |
|
"grad_norm": 1.9355725247245243, |
|
"learning_rate": 5e-06, |
|
"log_odds_chosen": 1.8742882013320923, |
|
"log_odds_ratio": -0.21055075526237488, |
|
"logits/chosen": -2.9387471675872803, |
|
"logits/rejected": -2.9844515323638916, |
|
"logps/chosen": -0.43298736214637756, |
|
"logps/rejected": -1.4716593027114868, |
|
"loss": 0.276, |
|
"nll_loss": 0.26002392172813416, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.02164936624467373, |
|
"rewards/margins": 0.05193359777331352, |
|
"rewards/rejected": -0.0735829621553421, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.6780283167278447, |
|
"eval_log_odds_chosen": 0.4569767117500305, |
|
"eval_log_odds_ratio": -0.6159732937812805, |
|
"eval_logits/chosen": -2.932406187057495, |
|
"eval_logits/rejected": -2.9357593059539795, |
|
"eval_logps/chosen": -0.8832988142967224, |
|
"eval_logps/rejected": -1.1878604888916016, |
|
"eval_loss": 0.5367424488067627, |
|
"eval_nll_loss": 0.5040929913520813, |
|
"eval_rewards/accuracies": 0.6408730149269104, |
|
"eval_rewards/chosen": -0.04416494444012642, |
|
"eval_rewards/margins": 0.015228085219860077, |
|
"eval_rewards/rejected": -0.0593930259346962, |
|
"eval_runtime": 138.0302, |
|
"eval_samples_per_second": 14.446, |
|
"eval_steps_per_second": 0.456, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.688515993707394, |
|
"grad_norm": 1.9448584897613828, |
|
"learning_rate": 4.984447862792268e-06, |
|
"log_odds_chosen": 2.0258474349975586, |
|
"log_odds_ratio": -0.2537488639354706, |
|
"logits/chosen": -2.9370341300964355, |
|
"logits/rejected": -2.959137439727783, |
|
"logps/chosen": -0.4205976128578186, |
|
"logps/rejected": -1.51674485206604, |
|
"loss": 0.2805, |
|
"nll_loss": 0.2590489387512207, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.0210298802703619, |
|
"rewards/margins": 0.054807353764772415, |
|
"rewards/rejected": -0.07583723217248917, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.6990036706869427, |
|
"grad_norm": 2.2985078763398503, |
|
"learning_rate": 4.969039949999534e-06, |
|
"log_odds_chosen": 1.9926655292510986, |
|
"log_odds_ratio": -0.209347203373909, |
|
"logits/chosen": -2.9543755054473877, |
|
"logits/rejected": -2.979072093963623, |
|
"logps/chosen": -0.4242986738681793, |
|
"logps/rejected": -1.527527093887329, |
|
"loss": 0.2829, |
|
"nll_loss": 0.28810399770736694, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.021214932203292847, |
|
"rewards/margins": 0.05516142398118973, |
|
"rewards/rejected": -0.07637635618448257, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.709491347666492, |
|
"grad_norm": 1.978508364107179, |
|
"learning_rate": 4.9537740461807e-06, |
|
"log_odds_chosen": 1.7989534139633179, |
|
"log_odds_ratio": -0.22280922532081604, |
|
"logits/chosen": -2.9272611141204834, |
|
"logits/rejected": -2.933403968811035, |
|
"logps/chosen": -0.4125545024871826, |
|
"logps/rejected": -1.371010422706604, |
|
"loss": 0.2723, |
|
"nll_loss": 0.27273207902908325, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.02062772400677204, |
|
"rewards/margins": 0.04792279377579689, |
|
"rewards/rejected": -0.06855051219463348, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.719979024646041, |
|
"grad_norm": 2.5294696595366375, |
|
"learning_rate": 4.938647983247949e-06, |
|
"log_odds_chosen": 1.8762280941009521, |
|
"log_odds_ratio": -0.23052379488945007, |
|
"logits/chosen": -2.9176859855651855, |
|
"logits/rejected": -2.937653064727783, |
|
"logps/chosen": -0.4308241307735443, |
|
"logps/rejected": -1.4621460437774658, |
|
"loss": 0.2707, |
|
"nll_loss": 0.24837055802345276, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.021541204303503036, |
|
"rewards/margins": 0.05156610533595085, |
|
"rewards/rejected": -0.07310730963945389, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.73046670162559, |
|
"grad_norm": 1.9845638290615137, |
|
"learning_rate": 4.9236596391733095e-06, |
|
"log_odds_chosen": 1.9353539943695068, |
|
"log_odds_ratio": -0.22219491004943848, |
|
"logits/chosen": -2.9324100017547607, |
|
"logits/rejected": -2.9492199420928955, |
|
"logps/chosen": -0.4047132134437561, |
|
"logps/rejected": -1.447388768196106, |
|
"loss": 0.2921, |
|
"nll_loss": 0.2786787152290344, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.020235659554600716, |
|
"rewards/margins": 0.05213377624750137, |
|
"rewards/rejected": -0.07236944139003754, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.740954378605139, |
|
"grad_norm": 2.1313335783196914, |
|
"learning_rate": 4.9088069367381605e-06, |
|
"log_odds_chosen": 1.9517314434051514, |
|
"log_odds_ratio": -0.19579176604747772, |
|
"logits/chosen": -2.9807212352752686, |
|
"logits/rejected": -3.004951000213623, |
|
"logps/chosen": -0.4060528874397278, |
|
"logps/rejected": -1.4121928215026855, |
|
"loss": 0.2851, |
|
"nll_loss": 0.27768373489379883, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.020302647724747658, |
|
"rewards/margins": 0.05030699446797371, |
|
"rewards/rejected": -0.07060963660478592, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.751442055584688, |
|
"grad_norm": 1.893515732849545, |
|
"learning_rate": 4.894087842323964e-06, |
|
"log_odds_chosen": 1.8834346532821655, |
|
"log_odds_ratio": -0.20945528149604797, |
|
"logits/chosen": -2.9691452980041504, |
|
"logits/rejected": -3.0074009895324707, |
|
"logps/chosen": -0.4027465283870697, |
|
"logps/rejected": -1.374361276626587, |
|
"loss": 0.2926, |
|
"nll_loss": 0.26718848943710327, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.020137326791882515, |
|
"rewards/margins": 0.04858074709773064, |
|
"rewards/rejected": -0.0687180757522583, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.7619297325642371, |
|
"grad_norm": 2.0915190498544263, |
|
"learning_rate": 4.8795003647426654e-06, |
|
"log_odds_chosen": 1.8165385723114014, |
|
"log_odds_ratio": -0.21812555193901062, |
|
"logits/chosen": -3.0662589073181152, |
|
"logits/rejected": -3.089877128601074, |
|
"logps/chosen": -0.40138545632362366, |
|
"logps/rejected": -1.3200931549072266, |
|
"loss": 0.2998, |
|
"nll_loss": 0.29331129789352417, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.020069271326065063, |
|
"rewards/margins": 0.04593539237976074, |
|
"rewards/rejected": -0.06600465625524521, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.772417409543786, |
|
"grad_norm": 2.1457501870245417, |
|
"learning_rate": 4.865042554105199e-06, |
|
"log_odds_chosen": 1.869539499282837, |
|
"log_odds_ratio": -0.2280159890651703, |
|
"logits/chosen": -2.991488456726074, |
|
"logits/rejected": -2.98630690574646, |
|
"logps/chosen": -0.4090718626976013, |
|
"logps/rejected": -1.36448073387146, |
|
"loss": 0.2858, |
|
"nll_loss": 0.2776942253112793, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.020453594624996185, |
|
"rewards/margins": 0.04777044430375099, |
|
"rewards/rejected": -0.06822402775287628, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.7829050865233351, |
|
"grad_norm": 2.3665022543070093, |
|
"learning_rate": 4.850712500726659e-06, |
|
"log_odds_chosen": 1.9791815280914307, |
|
"log_odds_ratio": -0.19878429174423218, |
|
"logits/chosen": -2.9824297428131104, |
|
"logits/rejected": -3.022101640701294, |
|
"logps/chosen": -0.4144412875175476, |
|
"logps/rejected": -1.4597278833389282, |
|
"loss": 0.2715, |
|
"nll_loss": 0.28446242213249207, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.02072206512093544, |
|
"rewards/margins": 0.052264340221881866, |
|
"rewards/rejected": -0.07298640161752701, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.7829050865233351, |
|
"eval_log_odds_chosen": 0.4425116777420044, |
|
"eval_log_odds_ratio": -0.6271889209747314, |
|
"eval_logits/chosen": -3.019425392150879, |
|
"eval_logits/rejected": -3.020922899246216, |
|
"eval_logps/chosen": -0.8710321187973022, |
|
"eval_logps/rejected": -1.1603412628173828, |
|
"eval_loss": 0.5348805785179138, |
|
"eval_nll_loss": 0.5024282336235046, |
|
"eval_rewards/accuracies": 0.6448412537574768, |
|
"eval_rewards/chosen": -0.04355160519480705, |
|
"eval_rewards/margins": 0.014465462416410446, |
|
"eval_rewards/rejected": -0.0580170638859272, |
|
"eval_runtime": 136.3216, |
|
"eval_samples_per_second": 14.627, |
|
"eval_steps_per_second": 0.462, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.7933927635028841, |
|
"grad_norm": 1.847904822728325, |
|
"learning_rate": 4.836508334066745e-06, |
|
"log_odds_chosen": 1.9795688390731812, |
|
"log_odds_ratio": -0.2207694798707962, |
|
"logits/chosen": -3.0054497718811035, |
|
"logits/rejected": -3.0154829025268555, |
|
"logps/chosen": -0.4081927239894867, |
|
"logps/rejected": -1.4390795230865479, |
|
"loss": 0.264, |
|
"nll_loss": 0.24716749787330627, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.020409639924764633, |
|
"rewards/margins": 0.051544345915317535, |
|
"rewards/rejected": -0.07195398211479187, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.8038804404824331, |
|
"grad_norm": 1.7750027737169987, |
|
"learning_rate": 4.822428221704122e-06, |
|
"log_odds_chosen": 1.926945686340332, |
|
"log_odds_ratio": -0.22434870898723602, |
|
"logits/chosen": -3.0268912315368652, |
|
"logits/rejected": -3.035226583480835, |
|
"logps/chosen": -0.43201422691345215, |
|
"logps/rejected": -1.498827576637268, |
|
"loss": 0.2864, |
|
"nll_loss": 0.25820285081863403, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.021600713953375816, |
|
"rewards/margins": 0.053340665996074677, |
|
"rewards/rejected": -0.07494138181209564, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.8143681174619821, |
|
"grad_norm": 2.0662716537028354, |
|
"learning_rate": 4.8084703683434506e-06, |
|
"log_odds_chosen": 1.974784255027771, |
|
"log_odds_ratio": -0.21157677471637726, |
|
"logits/chosen": -3.010627031326294, |
|
"logits/rejected": -2.9982268810272217, |
|
"logps/chosen": -0.4355824589729309, |
|
"logps/rejected": -1.5232689380645752, |
|
"loss": 0.2903, |
|
"nll_loss": 0.2755037248134613, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.021779123693704605, |
|
"rewards/margins": 0.05438433215022087, |
|
"rewards/rejected": -0.07616344839334488, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.8248557944415311, |
|
"grad_norm": 2.1360074988574445, |
|
"learning_rate": 4.794633014853843e-06, |
|
"log_odds_chosen": 1.847333312034607, |
|
"log_odds_ratio": -0.2377551794052124, |
|
"logits/chosen": -3.006833553314209, |
|
"logits/rejected": -3.0122854709625244, |
|
"logps/chosen": -0.4366019368171692, |
|
"logps/rejected": -1.4164003133773804, |
|
"loss": 0.304, |
|
"nll_loss": 0.29017573595046997, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.02183009497821331, |
|
"rewards/margins": 0.0489899218082428, |
|
"rewards/rejected": -0.07082001864910126, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.8353434714210803, |
|
"grad_norm": 1.9891927691131213, |
|
"learning_rate": 4.780914437337575e-06, |
|
"log_odds_chosen": 1.8539154529571533, |
|
"log_odds_ratio": -0.23103201389312744, |
|
"logits/chosen": -2.9830121994018555, |
|
"logits/rejected": -2.9818801879882812, |
|
"logps/chosen": -0.4274306297302246, |
|
"logps/rejected": -1.4196858406066895, |
|
"loss": 0.2958, |
|
"nll_loss": 0.2937518060207367, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.02137153223156929, |
|
"rewards/margins": 0.049612756818532944, |
|
"rewards/rejected": -0.07098428905010223, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.8458311484006291, |
|
"grad_norm": 1.827588117065436, |
|
"learning_rate": 4.767312946227961e-06, |
|
"log_odds_chosen": 2.2149860858917236, |
|
"log_odds_ratio": -0.2075362503528595, |
|
"logits/chosen": -2.9530441761016846, |
|
"logits/rejected": -2.9839682579040527, |
|
"logps/chosen": -0.391355037689209, |
|
"logps/rejected": -1.6375446319580078, |
|
"loss": 0.2721, |
|
"nll_loss": 0.2694031000137329, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.01956775411963463, |
|
"rewards/margins": 0.06230948120355606, |
|
"rewards/rejected": -0.08187723159790039, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.8563188253801783, |
|
"grad_norm": 1.8203811521479276, |
|
"learning_rate": 4.7538268854152834e-06, |
|
"log_odds_chosen": 1.7995598316192627, |
|
"log_odds_ratio": -0.244699165225029, |
|
"logits/chosen": -3.011706829071045, |
|
"logits/rejected": -3.024837017059326, |
|
"logps/chosen": -0.4394347071647644, |
|
"logps/rejected": -1.4033676385879517, |
|
"loss": 0.2771, |
|
"nll_loss": 0.25858861207962036, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.02197173610329628, |
|
"rewards/margins": 0.04819665104150772, |
|
"rewards/rejected": -0.0701683908700943, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.8668065023597273, |
|
"grad_norm": 2.2623646165216313, |
|
"learning_rate": 4.740454631399773e-06, |
|
"log_odds_chosen": 1.962255835533142, |
|
"log_odds_ratio": -0.23438410460948944, |
|
"logits/chosen": -2.949073314666748, |
|
"logits/rejected": -2.989229202270508, |
|
"logps/chosen": -0.3985145688056946, |
|
"logps/rejected": -1.4544894695281982, |
|
"loss": 0.2941, |
|
"nll_loss": 0.29249390959739685, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.019925730302929878, |
|
"rewards/margins": 0.052798740565776825, |
|
"rewards/rejected": -0.07272447645664215, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.8772941793392763, |
|
"grad_norm": 2.5104520915032538, |
|
"learning_rate": 4.727194592470656e-06, |
|
"log_odds_chosen": 2.0800955295562744, |
|
"log_odds_ratio": -0.19981749355793, |
|
"logits/chosen": -2.9771628379821777, |
|
"logits/rejected": -3.0005829334259033, |
|
"logps/chosen": -0.42085084319114685, |
|
"logps/rejected": -1.603994607925415, |
|
"loss": 0.2844, |
|
"nll_loss": 0.2677140235900879, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.02104254439473152, |
|
"rewards/margins": 0.05915718153119087, |
|
"rewards/rejected": -0.08019973337650299, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.8877818563188253, |
|
"grad_norm": 2.077913541951449, |
|
"learning_rate": 4.714045207910318e-06, |
|
"log_odds_chosen": 2.1426799297332764, |
|
"log_odds_ratio": -0.18838170170783997, |
|
"logits/chosen": -2.950552463531494, |
|
"logits/rejected": -2.9804420471191406, |
|
"logps/chosen": -0.41320332884788513, |
|
"logps/rejected": -1.622671365737915, |
|
"loss": 0.2717, |
|
"nll_loss": 0.2544669210910797, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.020660167559981346, |
|
"rewards/margins": 0.060473401099443436, |
|
"rewards/rejected": -0.08113356679677963, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.8877818563188253, |
|
"eval_log_odds_chosen": 0.48237088322639465, |
|
"eval_log_odds_ratio": -0.6183955669403076, |
|
"eval_logits/chosen": -2.9562783241271973, |
|
"eval_logits/rejected": -2.957892894744873, |
|
"eval_logps/chosen": -0.8997318148612976, |
|
"eval_logps/rejected": -1.2325206995010376, |
|
"eval_loss": 0.5340895652770996, |
|
"eval_nll_loss": 0.5023403763771057, |
|
"eval_rewards/accuracies": 0.6547619104385376, |
|
"eval_rewards/chosen": -0.04498659446835518, |
|
"eval_rewards/margins": 0.01663944497704506, |
|
"eval_rewards/rejected": -0.06162603944540024, |
|
"eval_runtime": 136.1464, |
|
"eval_samples_per_second": 14.646, |
|
"eval_steps_per_second": 0.463, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.8982695332983743, |
|
"grad_norm": 1.896252578291677, |
|
"learning_rate": 4.701004947222685e-06, |
|
"log_odds_chosen": 2.0811541080474854, |
|
"log_odds_ratio": -0.20500631630420685, |
|
"logits/chosen": -3.000387668609619, |
|
"logits/rejected": -2.983591079711914, |
|
"logps/chosen": -0.4098430573940277, |
|
"logps/rejected": -1.608665108680725, |
|
"loss": 0.2794, |
|
"nll_loss": 0.25453388690948486, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.020492153242230415, |
|
"rewards/margins": 0.05994110181927681, |
|
"rewards/rejected": -0.08043324947357178, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.9087572102779236, |
|
"grad_norm": 2.019085371673625, |
|
"learning_rate": 4.688072309384955e-06, |
|
"log_odds_chosen": 2.0144619941711426, |
|
"log_odds_ratio": -0.2020682841539383, |
|
"logits/chosen": -2.9534127712249756, |
|
"logits/rejected": -2.9533755779266357, |
|
"logps/chosen": -0.3999931216239929, |
|
"logps/rejected": -1.4992988109588623, |
|
"loss": 0.2775, |
|
"nll_loss": 0.26274845004081726, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.019999656826257706, |
|
"rewards/margins": 0.054965294897556305, |
|
"rewards/rejected": -0.07496494799852371, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.9192448872574723, |
|
"grad_norm": 1.9263871107241788, |
|
"learning_rate": 4.675245822121844e-06, |
|
"log_odds_chosen": 2.0367493629455566, |
|
"log_odds_ratio": -0.20607483386993408, |
|
"logits/chosen": -2.9868836402893066, |
|
"logits/rejected": -3.000213861465454, |
|
"logps/chosen": -0.4244080185890198, |
|
"logps/rejected": -1.5761488676071167, |
|
"loss": 0.2923, |
|
"nll_loss": 0.2808459997177124, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.02122039906680584, |
|
"rewards/margins": 0.057587046176195145, |
|
"rewards/rejected": -0.07880743592977524, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.9297325642370216, |
|
"grad_norm": 2.1487838733941365, |
|
"learning_rate": 4.662524041201569e-06, |
|
"log_odds_chosen": 2.0472216606140137, |
|
"log_odds_ratio": -0.22086529433727264, |
|
"logits/chosen": -2.9925904273986816, |
|
"logits/rejected": -2.985816240310669, |
|
"logps/chosen": -0.4373515248298645, |
|
"logps/rejected": -1.5831472873687744, |
|
"loss": 0.2713, |
|
"nll_loss": 0.2551635801792145, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.021867576986551285, |
|
"rewards/margins": 0.057289790362119675, |
|
"rewards/rejected": -0.07915736734867096, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.9402202412165706, |
|
"grad_norm": 2.0463386352717112, |
|
"learning_rate": 4.649905549752772e-06, |
|
"log_odds_chosen": 2.1467113494873047, |
|
"log_odds_ratio": -0.21497011184692383, |
|
"logits/chosen": -2.938457727432251, |
|
"logits/rejected": -2.9367523193359375, |
|
"logps/chosen": -0.4192470610141754, |
|
"logps/rejected": -1.63271164894104, |
|
"loss": 0.2767, |
|
"nll_loss": 0.2981775999069214, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.02096235193312168, |
|
"rewards/margins": 0.06067322567105293, |
|
"rewards/rejected": -0.08163557946681976, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.9507079181961196, |
|
"grad_norm": 1.9930187660935812, |
|
"learning_rate": 4.6373889576016826e-06, |
|
"log_odds_chosen": 2.145296573638916, |
|
"log_odds_ratio": -0.19072812795639038, |
|
"logits/chosen": -2.9529764652252197, |
|
"logits/rejected": -2.960404634475708, |
|
"logps/chosen": -0.407731294631958, |
|
"logps/rejected": -1.5777407884597778, |
|
"loss": 0.2761, |
|
"nll_loss": 0.2852553129196167, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.02038656547665596, |
|
"rewards/margins": 0.05850047990679741, |
|
"rewards/rejected": -0.07888703793287277, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.9611955951756685, |
|
"grad_norm": 2.0042665222271756, |
|
"learning_rate": 4.624972900628803e-06, |
|
"log_odds_chosen": 2.0522494316101074, |
|
"log_odds_ratio": -0.20059652626514435, |
|
"logits/chosen": -2.932502269744873, |
|
"logits/rejected": -2.9307363033294678, |
|
"logps/chosen": -0.4203645586967468, |
|
"logps/rejected": -1.5539976358413696, |
|
"loss": 0.276, |
|
"nll_loss": 0.2738272547721863, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.02101822756230831, |
|
"rewards/margins": 0.05668165162205696, |
|
"rewards/rejected": -0.07769988477230072, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.9716832721552175, |
|
"grad_norm": 2.0226547316915258, |
|
"learning_rate": 4.6126560401444256e-06, |
|
"log_odds_chosen": 2.0710301399230957, |
|
"log_odds_ratio": -0.19392071664333344, |
|
"logits/chosen": -3.015066623687744, |
|
"logits/rejected": -2.99493145942688, |
|
"logps/chosen": -0.43072837591171265, |
|
"logps/rejected": -1.6065874099731445, |
|
"loss": 0.2748, |
|
"nll_loss": 0.2821330428123474, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.02153642103075981, |
|
"rewards/margins": 0.05879295617341995, |
|
"rewards/rejected": -0.08032937347888947, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.9821709491347668, |
|
"grad_norm": 2.567857697275732, |
|
"learning_rate": 4.600437062282362e-06, |
|
"log_odds_chosen": 1.9227994680404663, |
|
"log_odds_ratio": -0.2224545031785965, |
|
"logits/chosen": -3.0251965522766113, |
|
"logits/rejected": -2.993910789489746, |
|
"logps/chosen": -0.4456098675727844, |
|
"logps/rejected": -1.529626488685608, |
|
"loss": 0.2788, |
|
"nll_loss": 0.28787270188331604, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.0222804956138134, |
|
"rewards/margins": 0.054200828075408936, |
|
"rewards/rejected": -0.07648131996393204, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.9926586261143155, |
|
"grad_norm": 2.1545883447921654, |
|
"learning_rate": 4.588314677411235e-06, |
|
"log_odds_chosen": 2.2162415981292725, |
|
"log_odds_ratio": -0.20383968949317932, |
|
"logits/chosen": -3.039658784866333, |
|
"logits/rejected": -3.022245407104492, |
|
"logps/chosen": -0.420427143573761, |
|
"logps/rejected": -1.6983455419540405, |
|
"loss": 0.2857, |
|
"nll_loss": 0.24534273147583008, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.02102135680615902, |
|
"rewards/margins": 0.06389592587947845, |
|
"rewards/rejected": -0.08491728454828262, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.9926586261143155, |
|
"eval_log_odds_chosen": 0.48923251032829285, |
|
"eval_log_odds_ratio": -0.6193312406539917, |
|
"eval_logits/chosen": -3.0350046157836914, |
|
"eval_logits/rejected": -3.0279133319854736, |
|
"eval_logps/chosen": -0.908783495426178, |
|
"eval_logps/rejected": -1.2409300804138184, |
|
"eval_loss": 0.5407980680465698, |
|
"eval_nll_loss": 0.5090586543083191, |
|
"eval_rewards/accuracies": 0.6547619104385376, |
|
"eval_rewards/chosen": -0.04543917626142502, |
|
"eval_rewards/margins": 0.016607332974672318, |
|
"eval_rewards/rejected": -0.062046512961387634, |
|
"eval_runtime": 137.1653, |
|
"eval_samples_per_second": 14.537, |
|
"eval_steps_per_second": 0.459, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.0031463030938648, |
|
"grad_norm": 2.4971175632899385, |
|
"learning_rate": 4.576287619562756e-06, |
|
"log_odds_chosen": 2.549215793609619, |
|
"log_odds_ratio": -0.13884183764457703, |
|
"logits/chosen": -3.0293986797332764, |
|
"logits/rejected": -3.0052542686462402, |
|
"logps/chosen": -0.3389069139957428, |
|
"logps/rejected": -1.6784775257110596, |
|
"loss": 0.2535, |
|
"nll_loss": 0.2399848997592926, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.01694534718990326, |
|
"rewards/margins": 0.06697852909564972, |
|
"rewards/rejected": -0.08392388373613358, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.0136339800734135, |
|
"grad_norm": 2.5031224034871475, |
|
"learning_rate": 4.564354645876385e-06, |
|
"log_odds_chosen": 4.333657741546631, |
|
"log_odds_ratio": -0.02762582339346409, |
|
"logits/chosen": -2.869049549102783, |
|
"logits/rejected": -2.8186068534851074, |
|
"logps/chosen": -0.1433320939540863, |
|
"logps/rejected": -2.334181547164917, |
|
"loss": 0.1236, |
|
"nll_loss": 0.11940746009349823, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007166605442762375, |
|
"rewards/margins": 0.10954247415065765, |
|
"rewards/rejected": -0.11670909076929092, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.0241216570529628, |
|
"grad_norm": 1.9586057770651872, |
|
"learning_rate": 4.552514536059854e-06, |
|
"log_odds_chosen": 3.8062407970428467, |
|
"log_odds_ratio": -0.0499381422996521, |
|
"logits/chosen": -2.9369876384735107, |
|
"logits/rejected": -2.963967800140381, |
|
"logps/chosen": -0.1607118844985962, |
|
"logps/rejected": -1.9827187061309814, |
|
"loss": 0.116, |
|
"nll_loss": 0.11325522512197495, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.00803559459745884, |
|
"rewards/margins": 0.09110033512115479, |
|
"rewards/rejected": -0.09913593530654907, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.034609334032512, |
|
"grad_norm": 2.173705177159571, |
|
"learning_rate": 4.540766091864998e-06, |
|
"log_odds_chosen": 3.9211831092834473, |
|
"log_odds_ratio": -0.03853369504213333, |
|
"logits/chosen": -2.848071575164795, |
|
"logits/rejected": -2.927175760269165, |
|
"logps/chosen": -0.14356736838817596, |
|
"logps/rejected": -1.959979772567749, |
|
"loss": 0.1167, |
|
"nll_loss": 0.11882974952459335, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007178368978202343, |
|
"rewards/margins": 0.09082063287496567, |
|
"rewards/rejected": -0.09799900650978088, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.0450970110120608, |
|
"grad_norm": 1.7557144572827617, |
|
"learning_rate": 4.529108136578383e-06, |
|
"log_odds_chosen": 4.060091495513916, |
|
"log_odds_ratio": -0.028795290738344193, |
|
"logits/chosen": -2.8138527870178223, |
|
"logits/rejected": -2.8606162071228027, |
|
"logps/chosen": -0.13301293551921844, |
|
"logps/rejected": -2.0062737464904785, |
|
"loss": 0.1151, |
|
"nll_loss": 0.1191815584897995, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.006650646682828665, |
|
"rewards/margins": 0.09366302937269211, |
|
"rewards/rejected": -0.10031367838382721, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.05558468799161, |
|
"grad_norm": 1.69960315567237, |
|
"learning_rate": 4.517539514526257e-06, |
|
"log_odds_chosen": 4.352217674255371, |
|
"log_odds_ratio": -0.03757786005735397, |
|
"logits/chosen": -2.819655656814575, |
|
"logits/rejected": -2.8428378105163574, |
|
"logps/chosen": -0.14081783592700958, |
|
"logps/rejected": -2.33030104637146, |
|
"loss": 0.1135, |
|
"nll_loss": 0.11204487085342407, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007040892727673054, |
|
"rewards/margins": 0.10947415977716446, |
|
"rewards/rejected": -0.11651506274938583, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.0660723649711588, |
|
"grad_norm": 1.991621297994473, |
|
"learning_rate": 4.506059090593329e-06, |
|
"log_odds_chosen": 4.156961917877197, |
|
"log_odds_ratio": -0.0386335626244545, |
|
"logits/chosen": -2.8222968578338623, |
|
"logits/rejected": -2.880376100540161, |
|
"logps/chosen": -0.15631213784217834, |
|
"logps/rejected": -2.2803502082824707, |
|
"loss": 0.1083, |
|
"nll_loss": 0.11318318545818329, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007815606892108917, |
|
"rewards/margins": 0.1062019094824791, |
|
"rewards/rejected": -0.11401752382516861, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.076560041950708, |
|
"grad_norm": 1.8671392728507943, |
|
"learning_rate": 4.4946657497549474e-06, |
|
"log_odds_chosen": 4.751786708831787, |
|
"log_odds_ratio": -0.02287628874182701, |
|
"logits/chosen": -2.8250374794006348, |
|
"logits/rejected": -2.858389377593994, |
|
"logps/chosen": -0.136850968003273, |
|
"logps/rejected": -2.61843204498291, |
|
"loss": 0.1149, |
|
"nll_loss": 0.11261866241693497, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.006842548493295908, |
|
"rewards/margins": 0.12407903373241425, |
|
"rewards/rejected": -0.13092158734798431, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.0870477189302568, |
|
"grad_norm": 2.047221073846021, |
|
"learning_rate": 4.483358396622204e-06, |
|
"log_odds_chosen": 4.551729202270508, |
|
"log_odds_ratio": -0.029045408591628075, |
|
"logits/chosen": -2.8212010860443115, |
|
"logits/rejected": -2.863682270050049, |
|
"logps/chosen": -0.13936151564121246, |
|
"logps/rejected": -2.4473021030426025, |
|
"loss": 0.1129, |
|
"nll_loss": 0.11166741698980331, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.006968076340854168, |
|
"rewards/margins": 0.11539702117443085, |
|
"rewards/rejected": -0.12236510217189789, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.097535395909806, |
|
"grad_norm": 2.1099833794179723, |
|
"learning_rate": 4.47213595499958e-06, |
|
"log_odds_chosen": 4.558366298675537, |
|
"log_odds_ratio": -0.01906474307179451, |
|
"logits/chosen": -2.8424153327941895, |
|
"logits/rejected": -2.877136707305908, |
|
"logps/chosen": -0.14121726155281067, |
|
"logps/rejected": -2.4738833904266357, |
|
"loss": 0.1137, |
|
"nll_loss": 0.1110328808426857, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0070608630776405334, |
|
"rewards/margins": 0.11663329601287842, |
|
"rewards/rejected": -0.12369415909051895, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.097535395909806, |
|
"eval_log_odds_chosen": 0.5767443776130676, |
|
"eval_log_odds_ratio": -0.6272528171539307, |
|
"eval_logits/chosen": -2.87036395072937, |
|
"eval_logits/rejected": -2.881497383117676, |
|
"eval_logps/chosen": -1.2408413887023926, |
|
"eval_logps/rejected": -1.6761136054992676, |
|
"eval_loss": 0.6877180337905884, |
|
"eval_nll_loss": 0.6538823843002319, |
|
"eval_rewards/accuracies": 0.670634925365448, |
|
"eval_rewards/chosen": -0.06204206869006157, |
|
"eval_rewards/margins": 0.021763615310192108, |
|
"eval_rewards/rejected": -0.08380568027496338, |
|
"eval_runtime": 137.068, |
|
"eval_samples_per_second": 14.548, |
|
"eval_steps_per_second": 0.46, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.108023072889355, |
|
"grad_norm": 1.7758830781899906, |
|
"learning_rate": 4.4609973674547055e-06, |
|
"log_odds_chosen": 4.593904495239258, |
|
"log_odds_ratio": -0.033291045576334, |
|
"logits/chosen": -2.856330394744873, |
|
"logits/rejected": -2.8690733909606934, |
|
"logps/chosen": -0.1400183141231537, |
|
"logps/rejected": -2.536652088165283, |
|
"loss": 0.1039, |
|
"nll_loss": 0.10139288008213043, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007000915706157684, |
|
"rewards/margins": 0.11983168125152588, |
|
"rewards/rejected": -0.12683258950710297, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.118510749868904, |
|
"grad_norm": 2.6416736862275076, |
|
"learning_rate": 4.449941594899848e-06, |
|
"log_odds_chosen": 4.607335090637207, |
|
"log_odds_ratio": -0.028559138998389244, |
|
"logits/chosen": -2.7992746829986572, |
|
"logits/rejected": -2.8301546573638916, |
|
"logps/chosen": -0.14062660932540894, |
|
"logps/rejected": -2.5437684059143066, |
|
"loss": 0.1201, |
|
"nll_loss": 0.1216670423746109, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007031330373138189, |
|
"rewards/margins": 0.12015708535909653, |
|
"rewards/rejected": -0.12718841433525085, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.128998426848453, |
|
"grad_norm": 2.094070218470564, |
|
"learning_rate": 4.438967616184754e-06, |
|
"log_odds_chosen": 4.340805530548096, |
|
"log_odds_ratio": -0.027936171740293503, |
|
"logits/chosen": -2.823608875274658, |
|
"logits/rejected": -2.8253750801086426, |
|
"logps/chosen": -0.13957419991493225, |
|
"logps/rejected": -2.268900156021118, |
|
"loss": 0.1108, |
|
"nll_loss": 0.1126783937215805, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.006978710647672415, |
|
"rewards/margins": 0.10646629333496094, |
|
"rewards/rejected": -0.11344502121210098, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.139486103828002, |
|
"grad_norm": 2.222098137194295, |
|
"learning_rate": 4.428074427700477e-06, |
|
"log_odds_chosen": 4.698141098022461, |
|
"log_odds_ratio": -0.02707051672041416, |
|
"logits/chosen": -2.8169960975646973, |
|
"logits/rejected": -2.8297157287597656, |
|
"logps/chosen": -0.1413937509059906, |
|
"logps/rejected": -2.65130877494812, |
|
"loss": 0.1166, |
|
"nll_loss": 0.11614535748958588, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007069687359035015, |
|
"rewards/margins": 0.1254957616329193, |
|
"rewards/rejected": -0.1325654536485672, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.149973780807551, |
|
"grad_norm": 2.1988466339750317, |
|
"learning_rate": 4.417261042993862e-06, |
|
"log_odds_chosen": 4.824273109436035, |
|
"log_odds_ratio": -0.022720973938703537, |
|
"logits/chosen": -2.8039610385894775, |
|
"logits/rejected": -2.795748710632324, |
|
"logps/chosen": -0.12069626152515411, |
|
"logps/rejected": -2.613525390625, |
|
"loss": 0.1113, |
|
"nll_loss": 0.10357411205768585, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.006034812889993191, |
|
"rewards/margins": 0.12464147806167603, |
|
"rewards/rejected": -0.1306762993335724, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.1604614577871, |
|
"grad_norm": 1.9312492998690272, |
|
"learning_rate": 4.406526492392318e-06, |
|
"log_odds_chosen": 4.532221794128418, |
|
"log_odds_ratio": -0.025564473122358322, |
|
"logits/chosen": -2.856283664703369, |
|
"logits/rejected": -2.847923994064331, |
|
"logps/chosen": -0.15458881855010986, |
|
"logps/rejected": -2.556361198425293, |
|
"loss": 0.1171, |
|
"nll_loss": 0.1105358749628067, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007729442324489355, |
|
"rewards/margins": 0.1200886145234108, |
|
"rewards/rejected": -0.1278180480003357, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.170949134766649, |
|
"grad_norm": 2.184212774032157, |
|
"learning_rate": 4.39586982263858e-06, |
|
"log_odds_chosen": 4.760067462921143, |
|
"log_odds_ratio": -0.025417357683181763, |
|
"logits/chosen": -2.8176796436309814, |
|
"logits/rejected": -2.818103313446045, |
|
"logps/chosen": -0.15180301666259766, |
|
"logps/rejected": -2.774660110473633, |
|
"loss": 0.1148, |
|
"nll_loss": 0.11588319391012192, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007590149994939566, |
|
"rewards/margins": 0.13114285469055176, |
|
"rewards/rejected": -0.13873299956321716, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.1814368117461984, |
|
"grad_norm": 2.151555777196694, |
|
"learning_rate": 4.385290096535147e-06, |
|
"log_odds_chosen": 4.732907772064209, |
|
"log_odds_ratio": -0.026212304830551147, |
|
"logits/chosen": -2.859835147857666, |
|
"logits/rejected": -2.857645034790039, |
|
"logps/chosen": -0.13824030756950378, |
|
"logps/rejected": -2.6506001949310303, |
|
"loss": 0.1132, |
|
"nll_loss": 0.11115143448114395, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.006912014447152615, |
|
"rewards/margins": 0.12561801075935364, |
|
"rewards/rejected": -0.13253000378608704, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.191924488725747, |
|
"grad_norm": 3.2431795321399486, |
|
"learning_rate": 4.374786392598072e-06, |
|
"log_odds_chosen": 4.578325271606445, |
|
"log_odds_ratio": -0.03994257375597954, |
|
"logits/chosen": -2.8212687969207764, |
|
"logits/rejected": -2.7516632080078125, |
|
"logps/chosen": -0.1504596322774887, |
|
"logps/rejected": -2.5710039138793945, |
|
"loss": 0.1095, |
|
"nll_loss": 0.10720662772655487, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007522981613874435, |
|
"rewards/margins": 0.12102720886468887, |
|
"rewards/rejected": -0.128550186753273, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.2024121657052964, |
|
"grad_norm": 2.6693753745610076, |
|
"learning_rate": 4.364357804719848e-06, |
|
"log_odds_chosen": 4.707537651062012, |
|
"log_odds_ratio": -0.025204619392752647, |
|
"logits/chosen": -2.798999309539795, |
|
"logits/rejected": -2.794037342071533, |
|
"logps/chosen": -0.15521793067455292, |
|
"logps/rejected": -2.689946174621582, |
|
"loss": 0.1192, |
|
"nll_loss": 0.12550954520702362, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007760896347463131, |
|
"rewards/margins": 0.12673643231391907, |
|
"rewards/rejected": -0.13449731469154358, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.2024121657052964, |
|
"eval_log_odds_chosen": 0.6958096623420715, |
|
"eval_log_odds_ratio": -0.6209548115730286, |
|
"eval_logits/chosen": -2.837247610092163, |
|
"eval_logits/rejected": -2.8433148860931396, |
|
"eval_logps/chosen": -1.4121639728546143, |
|
"eval_logps/rejected": -1.9619879722595215, |
|
"eval_loss": 0.7576995491981506, |
|
"eval_nll_loss": 0.7199162244796753, |
|
"eval_rewards/accuracies": 0.6726190447807312, |
|
"eval_rewards/chosen": -0.07060819864273071, |
|
"eval_rewards/margins": 0.027491191402077675, |
|
"eval_rewards/rejected": -0.09809939563274384, |
|
"eval_runtime": 136.9058, |
|
"eval_samples_per_second": 14.565, |
|
"eval_steps_per_second": 0.46, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.212899842684845, |
|
"grad_norm": 1.7712476287108132, |
|
"learning_rate": 4.354003441841081e-06, |
|
"log_odds_chosen": 4.905824184417725, |
|
"log_odds_ratio": -0.02992095984518528, |
|
"logits/chosen": -2.8259618282318115, |
|
"logits/rejected": -2.760521650314331, |
|
"logps/chosen": -0.13811610639095306, |
|
"logps/rejected": -2.7983617782592773, |
|
"loss": 0.1173, |
|
"nll_loss": 0.12010955810546875, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.006905805319547653, |
|
"rewards/margins": 0.13301227986812592, |
|
"rewards/rejected": -0.13991808891296387, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.2233875196643944, |
|
"grad_norm": 1.6446106852737563, |
|
"learning_rate": 4.3437224276306945e-06, |
|
"log_odds_chosen": 4.906925201416016, |
|
"log_odds_ratio": -0.017224887385964394, |
|
"logits/chosen": -2.838736057281494, |
|
"logits/rejected": -2.8536746501922607, |
|
"logps/chosen": -0.16129423677921295, |
|
"logps/rejected": -2.8627591133117676, |
|
"loss": 0.1147, |
|
"nll_loss": 0.12654295563697815, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.008064712397754192, |
|
"rewards/margins": 0.1350732445716858, |
|
"rewards/rejected": -0.14313796162605286, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.233875196643943, |
|
"grad_norm": 1.7769911595186116, |
|
"learning_rate": 4.333513900174396e-06, |
|
"log_odds_chosen": 4.821990966796875, |
|
"log_odds_ratio": -0.026227790862321854, |
|
"logits/chosen": -2.829463481903076, |
|
"logits/rejected": -2.842454433441162, |
|
"logps/chosen": -0.1390562653541565, |
|
"logps/rejected": -2.760815143585205, |
|
"loss": 0.1215, |
|
"nll_loss": 0.11114709079265594, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0069528138265013695, |
|
"rewards/margins": 0.13108794391155243, |
|
"rewards/rejected": -0.13804076611995697, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.2443628736234924, |
|
"grad_norm": 2.186831361943043, |
|
"learning_rate": 4.32337701167117e-06, |
|
"log_odds_chosen": 5.350895881652832, |
|
"log_odds_ratio": -0.0246684979647398, |
|
"logits/chosen": -2.872166156768799, |
|
"logits/rejected": -2.8550028800964355, |
|
"logps/chosen": -0.13888207077980042, |
|
"logps/rejected": -3.2091636657714844, |
|
"loss": 0.1143, |
|
"nll_loss": 0.11629905551671982, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.006944102700799704, |
|
"rewards/margins": 0.1535140872001648, |
|
"rewards/rejected": -0.16045819222927094, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.2548505506030416, |
|
"grad_norm": 2.2764409350931345, |
|
"learning_rate": 4.313310928137537e-06, |
|
"log_odds_chosen": 4.80722713470459, |
|
"log_odds_ratio": -0.025547053664922714, |
|
"logits/chosen": -2.8291611671447754, |
|
"logits/rejected": -2.858245849609375, |
|
"logps/chosen": -0.15937599539756775, |
|
"logps/rejected": -2.8679497241973877, |
|
"loss": 0.1185, |
|
"nll_loss": 0.11574534326791763, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007968800142407417, |
|
"rewards/margins": 0.13542868196964264, |
|
"rewards/rejected": -0.1433974802494049, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.2653382275825904, |
|
"grad_norm": 2.239980255447614, |
|
"learning_rate": 4.303314829119352e-06, |
|
"log_odds_chosen": 5.589659690856934, |
|
"log_odds_ratio": -0.020419184118509293, |
|
"logits/chosen": -2.905287981033325, |
|
"logits/rejected": -2.966031551361084, |
|
"logps/chosen": -0.1542571783065796, |
|
"logps/rejected": -3.551201581954956, |
|
"loss": 0.1236, |
|
"nll_loss": 0.11697031557559967, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007712858729064465, |
|
"rewards/margins": 0.16984722018241882, |
|
"rewards/rejected": -0.17756007611751556, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.2758259045621396, |
|
"grad_norm": 2.009942820215124, |
|
"learning_rate": 4.293387907410919e-06, |
|
"log_odds_chosen": 6.170254707336426, |
|
"log_odds_ratio": -0.017188329249620438, |
|
"logits/chosen": -2.848698139190674, |
|
"logits/rejected": -2.945160388946533, |
|
"logps/chosen": -0.13800857961177826, |
|
"logps/rejected": -4.000069618225098, |
|
"loss": 0.1137, |
|
"nll_loss": 0.11105845123529434, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.006900429725646973, |
|
"rewards/margins": 0.19310306012630463, |
|
"rewards/rejected": -0.2000034749507904, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.2863135815416884, |
|
"grad_norm": 2.1918079846574567, |
|
"learning_rate": 4.2835293687811935e-06, |
|
"log_odds_chosen": 6.479376316070557, |
|
"log_odds_ratio": -0.010083029977977276, |
|
"logits/chosen": -2.7919399738311768, |
|
"logits/rejected": -2.9110770225524902, |
|
"logps/chosen": -0.1471458077430725, |
|
"logps/rejected": -4.402917385101318, |
|
"loss": 0.1149, |
|
"nll_loss": 0.12062163650989532, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007357291877269745, |
|
"rewards/margins": 0.21278861165046692, |
|
"rewards/rejected": -0.22014589607715607, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.2968012585212376, |
|
"grad_norm": 1.9268306821517742, |
|
"learning_rate": 4.273738431706883e-06, |
|
"log_odds_chosen": 6.724373817443848, |
|
"log_odds_ratio": -0.018149670213460922, |
|
"logits/chosen": -2.891892194747925, |
|
"logits/rejected": -3.004826784133911, |
|
"logps/chosen": -0.15707895159721375, |
|
"logps/rejected": -4.773315906524658, |
|
"loss": 0.1119, |
|
"nll_loss": 0.10733366012573242, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007853945717215538, |
|
"rewards/margins": 0.23081183433532715, |
|
"rewards/rejected": -0.23866574466228485, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.3072889355007864, |
|
"grad_norm": 1.9131867908425575, |
|
"learning_rate": 4.264014327112208e-06, |
|
"log_odds_chosen": 6.2542595863342285, |
|
"log_odds_ratio": -0.015775460749864578, |
|
"logits/chosen": -2.862001419067383, |
|
"logits/rejected": -2.91827654838562, |
|
"logps/chosen": -0.14461472630500793, |
|
"logps/rejected": -4.159193515777588, |
|
"loss": 0.1178, |
|
"nll_loss": 0.12322264909744263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007230737246572971, |
|
"rewards/margins": 0.20072893798351288, |
|
"rewards/rejected": -0.20795968174934387, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.3072889355007864, |
|
"eval_log_odds_chosen": 1.1627599000930786, |
|
"eval_log_odds_ratio": -0.7777736783027649, |
|
"eval_logits/chosen": -2.887819766998291, |
|
"eval_logits/rejected": -2.9106638431549072, |
|
"eval_logps/chosen": -2.4108457565307617, |
|
"eval_logps/rejected": -3.4342026710510254, |
|
"eval_loss": 1.1761772632598877, |
|
"eval_nll_loss": 1.1196904182434082, |
|
"eval_rewards/accuracies": 0.6527777910232544, |
|
"eval_rewards/chosen": -0.12054230272769928, |
|
"eval_rewards/margins": 0.051167842000722885, |
|
"eval_rewards/rejected": -0.17171014845371246, |
|
"eval_runtime": 137.1423, |
|
"eval_samples_per_second": 14.54, |
|
"eval_steps_per_second": 0.459, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.3177766124803356, |
|
"grad_norm": 2.1121501905853624, |
|
"learning_rate": 4.254356298115171e-06, |
|
"log_odds_chosen": 6.363844394683838, |
|
"log_odds_ratio": -0.024754000827670097, |
|
"logits/chosen": -2.8908374309539795, |
|
"logits/rejected": -2.9566292762756348, |
|
"logps/chosen": -0.15381646156311035, |
|
"logps/rejected": -4.287047386169434, |
|
"loss": 0.1181, |
|
"nll_loss": 0.12711365520954132, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007690823636949062, |
|
"rewards/margins": 0.20666155219078064, |
|
"rewards/rejected": -0.21435236930847168, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.3282642894598844, |
|
"grad_norm": 3.84884286912148, |
|
"learning_rate": 4.24476359978009e-06, |
|
"log_odds_chosen": 5.530186176300049, |
|
"log_odds_ratio": -0.017865758389234543, |
|
"logits/chosen": -2.8787178993225098, |
|
"logits/rejected": -2.9533944129943848, |
|
"logps/chosen": -0.1436866670846939, |
|
"logps/rejected": -3.488823652267456, |
|
"loss": 0.1234, |
|
"nll_loss": 0.11815366894006729, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0071843331679701805, |
|
"rewards/margins": 0.16725686192512512, |
|
"rewards/rejected": -0.17444118857383728, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.3387519664394336, |
|
"grad_norm": 2.417106329176298, |
|
"learning_rate": 4.235235498876268e-06, |
|
"log_odds_chosen": 5.049867630004883, |
|
"log_odds_ratio": -0.030804011970758438, |
|
"logits/chosen": -2.8601975440979004, |
|
"logits/rejected": -2.919813632965088, |
|
"logps/chosen": -0.16016361117362976, |
|
"logps/rejected": -3.108591079711914, |
|
"loss": 0.1205, |
|
"nll_loss": 0.12257065623998642, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.008008181117475033, |
|
"rewards/margins": 0.14742138981819153, |
|
"rewards/rejected": -0.15542957186698914, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.349239643418983, |
|
"grad_norm": 2.0311020060176737, |
|
"learning_rate": 4.2257712736425835e-06, |
|
"log_odds_chosen": 6.287697792053223, |
|
"log_odds_ratio": -0.03303173556923866, |
|
"logits/chosen": -2.8431243896484375, |
|
"logits/rejected": -2.987511396408081, |
|
"logps/chosen": -0.15092086791992188, |
|
"logps/rejected": -4.205324649810791, |
|
"loss": 0.119, |
|
"nll_loss": 0.11937984079122543, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.0075460439547896385, |
|
"rewards/margins": 0.20272019505500793, |
|
"rewards/rejected": -0.21026620268821716, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.3597273203985316, |
|
"grad_norm": 1.8184108922544404, |
|
"learning_rate": 4.216370213557839e-06, |
|
"log_odds_chosen": 6.489804267883301, |
|
"log_odds_ratio": -0.017738422378897667, |
|
"logits/chosen": -2.8637566566467285, |
|
"logits/rejected": -2.9882349967956543, |
|
"logps/chosen": -0.1367037147283554, |
|
"logps/rejected": -4.3643412590026855, |
|
"loss": 0.1103, |
|
"nll_loss": 0.10625318437814713, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.006835184991359711, |
|
"rewards/margins": 0.21138188242912292, |
|
"rewards/rejected": -0.21821708977222443, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.370214997378081, |
|
"grad_norm": 1.9927993897844196, |
|
"learning_rate": 4.207031619116713e-06, |
|
"log_odds_chosen": 6.5232744216918945, |
|
"log_odds_ratio": -0.02112133800983429, |
|
"logits/chosen": -2.888134002685547, |
|
"logits/rejected": -2.9766697883605957, |
|
"logps/chosen": -0.13985328376293182, |
|
"logps/rejected": -4.443106174468994, |
|
"loss": 0.1119, |
|
"nll_loss": 0.10387493669986725, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.006992665119469166, |
|
"rewards/margins": 0.21516263484954834, |
|
"rewards/rejected": -0.22215530276298523, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.3807026743576296, |
|
"grad_norm": 1.9179118979680037, |
|
"learning_rate": 4.197754801611136e-06, |
|
"log_odds_chosen": 7.000714302062988, |
|
"log_odds_ratio": -0.01941884122788906, |
|
"logits/chosen": -2.8880743980407715, |
|
"logits/rejected": -3.0280842781066895, |
|
"logps/chosen": -0.1594962626695633, |
|
"logps/rejected": -4.991673946380615, |
|
"loss": 0.1187, |
|
"nll_loss": 0.12734182178974152, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007974812760949135, |
|
"rewards/margins": 0.2416088581085205, |
|
"rewards/rejected": -0.2495836764574051, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.391190351337179, |
|
"grad_norm": 1.7656016453383905, |
|
"learning_rate": 4.188539082916955e-06, |
|
"log_odds_chosen": 5.81030797958374, |
|
"log_odds_ratio": -0.02714763581752777, |
|
"logits/chosen": -2.858682155609131, |
|
"logits/rejected": -2.961153030395508, |
|
"logps/chosen": -0.1495695412158966, |
|
"logps/rejected": -3.7413382530212402, |
|
"loss": 0.117, |
|
"nll_loss": 0.1129683405160904, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007478476967662573, |
|
"rewards/margins": 0.1795884370803833, |
|
"rewards/rejected": -0.18706689774990082, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.401678028316728, |
|
"grad_norm": 1.7721263332581463, |
|
"learning_rate": 4.179383795285729e-06, |
|
"log_odds_chosen": 6.099682807922363, |
|
"log_odds_ratio": -0.016452614217996597, |
|
"logits/chosen": -2.8671703338623047, |
|
"logits/rejected": -2.94566011428833, |
|
"logps/chosen": -0.1470957249403, |
|
"logps/rejected": -4.025435447692871, |
|
"loss": 0.1162, |
|
"nll_loss": 0.1030157208442688, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007354786153882742, |
|
"rewards/margins": 0.19391697645187378, |
|
"rewards/rejected": -0.20127174258232117, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.412165705296277, |
|
"grad_norm": 6.518126509500433, |
|
"learning_rate": 4.170288281141496e-06, |
|
"log_odds_chosen": 5.677874565124512, |
|
"log_odds_ratio": -0.02623058296740055, |
|
"logits/chosen": -2.8755476474761963, |
|
"logits/rejected": -2.926180362701416, |
|
"logps/chosen": -0.15929332375526428, |
|
"logps/rejected": -3.627763271331787, |
|
"loss": 0.1184, |
|
"nll_loss": 0.12096776813268661, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007964666932821274, |
|
"rewards/margins": 0.17342346906661987, |
|
"rewards/rejected": -0.18138816952705383, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.412165705296277, |
|
"eval_log_odds_chosen": 1.3232934474945068, |
|
"eval_log_odds_ratio": -1.0561914443969727, |
|
"eval_logits/chosen": -2.9102423191070557, |
|
"eval_logits/rejected": -2.9226319789886475, |
|
"eval_logps/chosen": -3.8695833683013916, |
|
"eval_logps/rejected": -5.081162452697754, |
|
"eval_loss": 1.8519541025161743, |
|
"eval_nll_loss": 1.7541913986206055, |
|
"eval_rewards/accuracies": 0.636904776096344, |
|
"eval_rewards/chosen": -0.19347918033599854, |
|
"eval_rewards/margins": 0.06057893857359886, |
|
"eval_rewards/rejected": -0.2540581226348877, |
|
"eval_runtime": 140.6912, |
|
"eval_samples_per_second": 14.173, |
|
"eval_steps_per_second": 0.448, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.422653382275826, |
|
"grad_norm": 2.1350280555835317, |
|
"learning_rate": 4.1612518928823956e-06, |
|
"log_odds_chosen": 5.239171028137207, |
|
"log_odds_ratio": -0.0356699600815773, |
|
"logits/chosen": -2.8127808570861816, |
|
"logits/rejected": -2.847365140914917, |
|
"logps/chosen": -0.17353428900241852, |
|
"logps/rejected": -3.4219677448272705, |
|
"loss": 0.1197, |
|
"nll_loss": 0.12273728847503662, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.008676714263856411, |
|
"rewards/margins": 0.16242167353630066, |
|
"rewards/rejected": -0.17109838128089905, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.433141059255375, |
|
"grad_norm": 2.142764154815985, |
|
"learning_rate": 4.1522739926869985e-06, |
|
"log_odds_chosen": 7.10500431060791, |
|
"log_odds_ratio": -0.02759629487991333, |
|
"logits/chosen": -2.8841793537139893, |
|
"logits/rejected": -2.979490280151367, |
|
"logps/chosen": -0.15857262909412384, |
|
"logps/rejected": -5.118218898773193, |
|
"loss": 0.1179, |
|
"nll_loss": 0.11995577812194824, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007928632199764252, |
|
"rewards/margins": 0.24798233807086945, |
|
"rewards/rejected": -0.2559109628200531, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.443628736234924, |
|
"grad_norm": 2.442748493026814, |
|
"learning_rate": 4.143353952325209e-06, |
|
"log_odds_chosen": 6.4824538230896, |
|
"log_odds_ratio": -0.03863966092467308, |
|
"logits/chosen": -2.8798575401306152, |
|
"logits/rejected": -2.975369691848755, |
|
"logps/chosen": -0.16273298859596252, |
|
"logps/rejected": -4.518317222595215, |
|
"loss": 0.1144, |
|
"nll_loss": 0.11924872547388077, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.008136649616062641, |
|
"rewards/margins": 0.21777920424938202, |
|
"rewards/rejected": -0.22591586410999298, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.454116413214473, |
|
"grad_norm": 1.7906952084031593, |
|
"learning_rate": 4.134491152973616e-06, |
|
"log_odds_chosen": 6.330552101135254, |
|
"log_odds_ratio": -0.019993215799331665, |
|
"logits/chosen": -2.903748035430908, |
|
"logits/rejected": -2.961629629135132, |
|
"logps/chosen": -0.1506245732307434, |
|
"logps/rejected": -4.29229736328125, |
|
"loss": 0.1162, |
|
"nll_loss": 0.11873211711645126, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.0075312284752726555, |
|
"rewards/margins": 0.20708362758159637, |
|
"rewards/rejected": -0.2146148979663849, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.464604090194022, |
|
"grad_norm": 2.709543224621687, |
|
"learning_rate": 4.125684985035174e-06, |
|
"log_odds_chosen": 6.674917697906494, |
|
"log_odds_ratio": -0.02191847935318947, |
|
"logits/chosen": -2.869702100753784, |
|
"logits/rejected": -2.9517292976379395, |
|
"logps/chosen": -0.14587149024009705, |
|
"logps/rejected": -4.594050407409668, |
|
"loss": 0.1189, |
|
"nll_loss": 0.11958177387714386, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007293573580682278, |
|
"rewards/margins": 0.2224089354276657, |
|
"rewards/rejected": -0.22970251739025116, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.475091767173571, |
|
"grad_norm": 1.9596617726605967, |
|
"learning_rate": 4.116934847963092e-06, |
|
"log_odds_chosen": 6.008196830749512, |
|
"log_odds_ratio": -0.020748203620314598, |
|
"logits/chosen": -2.859504222869873, |
|
"logits/rejected": -2.9086391925811768, |
|
"logps/chosen": -0.1603454202413559, |
|
"logps/rejected": -4.055342674255371, |
|
"loss": 0.1137, |
|
"nll_loss": 0.11717329174280167, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.008017271757125854, |
|
"rewards/margins": 0.1947498619556427, |
|
"rewards/rejected": -0.20276716351509094, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.48557944415312, |
|
"grad_norm": 25.11227763431921, |
|
"learning_rate": 4.1082401500888055e-06, |
|
"log_odds_chosen": 6.279742240905762, |
|
"log_odds_ratio": -0.01569024845957756, |
|
"logits/chosen": -2.916944742202759, |
|
"logits/rejected": -2.987224578857422, |
|
"logps/chosen": -0.14050395786762238, |
|
"logps/rejected": -4.152866363525391, |
|
"loss": 0.1189, |
|
"nll_loss": 0.10722777992486954, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007025198079645634, |
|
"rewards/margins": 0.20061811804771423, |
|
"rewards/rejected": -0.2076433151960373, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.4960671211326693, |
|
"grad_norm": 1.757332945919827, |
|
"learning_rate": 4.099600308453939e-06, |
|
"log_odds_chosen": 6.39632511138916, |
|
"log_odds_ratio": -0.023090779781341553, |
|
"logits/chosen": -2.8743884563446045, |
|
"logits/rejected": -2.9668736457824707, |
|
"logps/chosen": -0.15729930996894836, |
|
"logps/rejected": -4.314006328582764, |
|
"loss": 0.1177, |
|
"nll_loss": 0.1209021583199501, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007864965125918388, |
|
"rewards/margins": 0.2078353613615036, |
|
"rewards/rejected": -0.21570034325122833, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.506554798112218, |
|
"grad_norm": 2.0524680636282056, |
|
"learning_rate": 4.091014748646132e-06, |
|
"log_odds_chosen": 5.9223713874816895, |
|
"log_odds_ratio": -0.030582841485738754, |
|
"logits/chosen": -2.8992161750793457, |
|
"logits/rejected": -2.929603099822998, |
|
"logps/chosen": -0.1705484390258789, |
|
"logps/rejected": -4.027953147888184, |
|
"loss": 0.1189, |
|
"nll_loss": 0.10802364349365234, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.008527422323822975, |
|
"rewards/margins": 0.19287024438381195, |
|
"rewards/rejected": -0.20139765739440918, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.5170424750917673, |
|
"grad_norm": 1.7245638696745784, |
|
"learning_rate": 4.082482904638631e-06, |
|
"log_odds_chosen": 6.324474811553955, |
|
"log_odds_ratio": -0.018949782475829124, |
|
"logits/chosen": -2.8749866485595703, |
|
"logits/rejected": -2.9224321842193604, |
|
"logps/chosen": -0.1520567536354065, |
|
"logps/rejected": -4.290619850158691, |
|
"loss": 0.1172, |
|
"nll_loss": 0.12284000217914581, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.00760283786803484, |
|
"rewards/margins": 0.20692817866802216, |
|
"rewards/rejected": -0.21453101933002472, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.5170424750917673, |
|
"eval_log_odds_chosen": 1.0075438022613525, |
|
"eval_log_odds_ratio": -0.8145382404327393, |
|
"eval_logits/chosen": -2.8560779094696045, |
|
"eval_logits/rejected": -2.871006965637207, |
|
"eval_logps/chosen": -2.0024044513702393, |
|
"eval_logps/rejected": -2.8670685291290283, |
|
"eval_loss": 1.01926589012146, |
|
"eval_nll_loss": 0.9735569357872009, |
|
"eval_rewards/accuracies": 0.6408730149269104, |
|
"eval_rewards/chosen": -0.10012022405862808, |
|
"eval_rewards/margins": 0.043233200907707214, |
|
"eval_rewards/rejected": -0.1433534324169159, |
|
"eval_runtime": 138.4847, |
|
"eval_samples_per_second": 14.399, |
|
"eval_steps_per_second": 0.455, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.527530152071316, |
|
"grad_norm": 2.140192470773612, |
|
"learning_rate": 4.074004218633553e-06, |
|
"log_odds_chosen": 6.169337272644043, |
|
"log_odds_ratio": -0.024398522451519966, |
|
"logits/chosen": -2.8802199363708496, |
|
"logits/rejected": -2.9575634002685547, |
|
"logps/chosen": -0.14228537678718567, |
|
"logps/rejected": -4.140218257904053, |
|
"loss": 0.1204, |
|
"nll_loss": 0.10762319713830948, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0071142688393592834, |
|
"rewards/margins": 0.1998966485261917, |
|
"rewards/rejected": -0.20701093971729279, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.5380178290508653, |
|
"grad_norm": 1.9307036538867832, |
|
"learning_rate": 4.065578140908709e-06, |
|
"log_odds_chosen": 6.545037269592285, |
|
"log_odds_ratio": -0.020819999277591705, |
|
"logits/chosen": -2.826190948486328, |
|
"logits/rejected": -2.9180386066436768, |
|
"logps/chosen": -0.15343733131885529, |
|
"logps/rejected": -4.550530433654785, |
|
"loss": 0.1292, |
|
"nll_loss": 0.12483732402324677, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007671866565942764, |
|
"rewards/margins": 0.2198546677827835, |
|
"rewards/rejected": -0.22752651572227478, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.5485055060304145, |
|
"grad_norm": 2.472322893814309, |
|
"learning_rate": 4.057204129667897e-06, |
|
"log_odds_chosen": 6.510749816894531, |
|
"log_odds_ratio": -0.017572391778230667, |
|
"logits/chosen": -2.8476340770721436, |
|
"logits/rejected": -2.9206082820892334, |
|
"logps/chosen": -0.1623007208108902, |
|
"logps/rejected": -4.547110557556152, |
|
"loss": 0.114, |
|
"nll_loss": 0.11619551479816437, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.00811503641307354, |
|
"rewards/margins": 0.21924051642417908, |
|
"rewards/rejected": -0.22735556960105896, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.5589931830099633, |
|
"grad_norm": 3.562558849555077, |
|
"learning_rate": 4.048881650894581e-06, |
|
"log_odds_chosen": 7.486746311187744, |
|
"log_odds_ratio": -0.012338453903794289, |
|
"logits/chosen": -2.8392252922058105, |
|
"logits/rejected": -2.924240827560425, |
|
"logps/chosen": -0.15012109279632568, |
|
"logps/rejected": -5.4815144538879395, |
|
"loss": 0.1213, |
|
"nll_loss": 0.12608163058757782, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007506055291742086, |
|
"rewards/margins": 0.26656967401504517, |
|
"rewards/rejected": -0.2740757167339325, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.5694808599895125, |
|
"grad_norm": 2.3252293901649193, |
|
"learning_rate": 4.040610178208843e-06, |
|
"log_odds_chosen": 7.7740631103515625, |
|
"log_odds_ratio": -0.0118449367582798, |
|
"logits/chosen": -2.795551061630249, |
|
"logits/rejected": -2.8945860862731934, |
|
"logps/chosen": -0.1522868573665619, |
|
"logps/rejected": -5.739714622497559, |
|
"loss": 0.1145, |
|
"nll_loss": 0.11489256471395493, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007614342961460352, |
|
"rewards/margins": 0.27937138080596924, |
|
"rewards/rejected": -0.28698569536209106, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.5799685369690613, |
|
"grad_norm": 2.0157957603988175, |
|
"learning_rate": 4.032389192727559e-06, |
|
"log_odds_chosen": 6.265582084655762, |
|
"log_odds_ratio": -0.024669019505381584, |
|
"logits/chosen": -2.85023832321167, |
|
"logits/rejected": -2.8876233100891113, |
|
"logps/chosen": -0.150896817445755, |
|
"logps/rejected": -4.219937324523926, |
|
"loss": 0.1277, |
|
"nll_loss": 0.12799417972564697, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007544840686023235, |
|
"rewards/margins": 0.20345202088356018, |
|
"rewards/rejected": -0.2109968364238739, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.5904562139486105, |
|
"grad_norm": 2.287376161767263, |
|
"learning_rate": 4.024218182927669e-06, |
|
"log_odds_chosen": 6.810778617858887, |
|
"log_odds_ratio": -0.013128559105098248, |
|
"logits/chosen": -2.823387622833252, |
|
"logits/rejected": -2.879467487335205, |
|
"logps/chosen": -0.15397700667381287, |
|
"logps/rejected": -4.72897481918335, |
|
"loss": 0.1209, |
|
"nll_loss": 0.12541964650154114, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.0076988511718809605, |
|
"rewards/margins": 0.22874990105628967, |
|
"rewards/rejected": -0.236448734998703, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.6009438909281593, |
|
"grad_norm": 2.2717126345189547, |
|
"learning_rate": 4.016096644512495e-06, |
|
"log_odds_chosen": 6.199719429016113, |
|
"log_odds_ratio": -0.018437180668115616, |
|
"logits/chosen": -2.8248672485351562, |
|
"logits/rejected": -2.8656277656555176, |
|
"logps/chosen": -0.14331553876399994, |
|
"logps/rejected": -4.071486949920654, |
|
"loss": 0.1196, |
|
"nll_loss": 0.11505875736474991, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.00716577610000968, |
|
"rewards/margins": 0.19640859961509705, |
|
"rewards/rejected": -0.20357437431812286, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.6114315679077085, |
|
"grad_norm": 2.1379482021716036, |
|
"learning_rate": 4.008024080281012e-06, |
|
"log_odds_chosen": 7.395205497741699, |
|
"log_odds_ratio": -0.01522077340632677, |
|
"logits/chosen": -2.8720109462738037, |
|
"logits/rejected": -2.936903476715088, |
|
"logps/chosen": -0.13911715149879456, |
|
"logps/rejected": -5.221936225891113, |
|
"loss": 0.12, |
|
"nll_loss": 0.12369368225336075, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.0069558583199977875, |
|
"rewards/margins": 0.2541409730911255, |
|
"rewards/rejected": -0.2610968351364136, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.6219192448872572, |
|
"grad_norm": 1.7439578923515293, |
|
"learning_rate": 4.000000000000001e-06, |
|
"log_odds_chosen": 8.536567687988281, |
|
"log_odds_ratio": -0.02061418630182743, |
|
"logits/chosen": -2.854001760482788, |
|
"logits/rejected": -2.9489758014678955, |
|
"logps/chosen": -0.1588824838399887, |
|
"logps/rejected": -6.567204475402832, |
|
"loss": 0.1109, |
|
"nll_loss": 0.11326327174901962, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007944123819470406, |
|
"rewards/margins": 0.32041609287261963, |
|
"rewards/rejected": -0.3283601999282837, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.6219192448872572, |
|
"eval_log_odds_chosen": 1.0766297578811646, |
|
"eval_log_odds_ratio": -0.9767945408821106, |
|
"eval_logits/chosen": -2.8457064628601074, |
|
"eval_logits/rejected": -2.857062339782715, |
|
"eval_logps/chosen": -2.4182989597320557, |
|
"eval_logps/rejected": -3.354691743850708, |
|
"eval_loss": 1.2049823999404907, |
|
"eval_nll_loss": 1.172393560409546, |
|
"eval_rewards/accuracies": 0.6329365372657776, |
|
"eval_rewards/chosen": -0.12091495096683502, |
|
"eval_rewards/margins": 0.046819645911455154, |
|
"eval_rewards/rejected": -0.1677345633506775, |
|
"eval_runtime": 137.7801, |
|
"eval_samples_per_second": 14.472, |
|
"eval_steps_per_second": 0.457, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.6324069218668065, |
|
"grad_norm": 3.8704567483353496, |
|
"learning_rate": 3.992023920278996e-06, |
|
"log_odds_chosen": 6.979190826416016, |
|
"log_odds_ratio": -0.018384801223874092, |
|
"logits/chosen": -2.8529200553894043, |
|
"logits/rejected": -2.923466920852661, |
|
"logps/chosen": -0.14472463726997375, |
|
"logps/rejected": -4.871707916259766, |
|
"loss": 0.1127, |
|
"nll_loss": 0.1109754890203476, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007236232049763203, |
|
"rewards/margins": 0.23634913563728333, |
|
"rewards/rejected": -0.24358537793159485, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.6428945988463557, |
|
"grad_norm": 2.0243407054263933, |
|
"learning_rate": 3.984095364447979e-06, |
|
"log_odds_chosen": 6.955283164978027, |
|
"log_odds_ratio": -0.026280570775270462, |
|
"logits/chosen": -2.845829486846924, |
|
"logits/rejected": -2.9166336059570312, |
|
"logps/chosen": -0.1561572551727295, |
|
"logps/rejected": -4.968081474304199, |
|
"loss": 0.1245, |
|
"nll_loss": 0.11139287799596786, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.0078078629449009895, |
|
"rewards/margins": 0.2405962496995926, |
|
"rewards/rejected": -0.2484041005373001, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.6533822758259045, |
|
"grad_norm": 2.159445384644007, |
|
"learning_rate": 3.97621386243772e-06, |
|
"log_odds_chosen": 8.654619216918945, |
|
"log_odds_ratio": -0.015728970989584923, |
|
"logits/chosen": -2.815493583679199, |
|
"logits/rejected": -2.9511656761169434, |
|
"logps/chosen": -0.1413796991109848, |
|
"logps/rejected": -6.552220821380615, |
|
"loss": 0.1201, |
|
"nll_loss": 0.11258909851312637, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007068985607475042, |
|
"rewards/margins": 0.32054203748703003, |
|
"rewards/rejected": -0.3276110291481018, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.6638699528054537, |
|
"grad_norm": 2.5062335927036123, |
|
"learning_rate": 3.9683789506627254e-06, |
|
"log_odds_chosen": 7.7274370193481445, |
|
"log_odds_ratio": -0.020870521664619446, |
|
"logits/chosen": -2.8319153785705566, |
|
"logits/rejected": -2.922696113586426, |
|
"logps/chosen": -0.15536390244960785, |
|
"logps/rejected": -5.693093776702881, |
|
"loss": 0.1181, |
|
"nll_loss": 0.10906670987606049, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007768194191157818, |
|
"rewards/margins": 0.2768864631652832, |
|
"rewards/rejected": -0.2846546769142151, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.6743576297850025, |
|
"grad_norm": 1.970994291017683, |
|
"learning_rate": 3.960590171906698e-06, |
|
"log_odds_chosen": 7.434384822845459, |
|
"log_odds_ratio": -0.023785177618265152, |
|
"logits/chosen": -2.7982025146484375, |
|
"logits/rejected": -2.8931427001953125, |
|
"logps/chosen": -0.16477976739406586, |
|
"logps/rejected": -5.395650386810303, |
|
"loss": 0.1221, |
|
"nll_loss": 0.13674572110176086, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.008238988928496838, |
|
"rewards/margins": 0.2615435719490051, |
|
"rewards/rejected": -0.26978254318237305, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.6848453067645517, |
|
"grad_norm": 2.0205686734736594, |
|
"learning_rate": 3.952847075210474e-06, |
|
"log_odds_chosen": 7.365771293640137, |
|
"log_odds_ratio": -0.01570904441177845, |
|
"logits/chosen": -2.866798162460327, |
|
"logits/rejected": -2.959561347961426, |
|
"logps/chosen": -0.14348378777503967, |
|
"logps/rejected": -5.177813529968262, |
|
"loss": 0.1204, |
|
"nll_loss": 0.12037654966115952, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007174189202487469, |
|
"rewards/margins": 0.2517164647579193, |
|
"rewards/rejected": -0.25889068841934204, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.695332983744101, |
|
"grad_norm": 1.8761709200806869, |
|
"learning_rate": 3.9451492157623585e-06, |
|
"log_odds_chosen": 8.670493125915527, |
|
"log_odds_ratio": -0.011763294227421284, |
|
"logits/chosen": -2.8013434410095215, |
|
"logits/rejected": -2.920924425125122, |
|
"logps/chosen": -0.16095298528671265, |
|
"logps/rejected": -6.665195465087891, |
|
"loss": 0.1166, |
|
"nll_loss": 0.13346998393535614, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.008047649636864662, |
|
"rewards/margins": 0.32521215081214905, |
|
"rewards/rejected": -0.3332597613334656, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.7058206607236497, |
|
"grad_norm": 2.1285971867573408, |
|
"learning_rate": 3.937496154790789e-06, |
|
"log_odds_chosen": 7.294459342956543, |
|
"log_odds_ratio": -0.018316376954317093, |
|
"logits/chosen": -2.816880702972412, |
|
"logits/rejected": -2.8812124729156494, |
|
"logps/chosen": -0.13620439171791077, |
|
"logps/rejected": -5.142992973327637, |
|
"loss": 0.1195, |
|
"nll_loss": 0.10606805980205536, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.0068102204240858555, |
|
"rewards/margins": 0.25033941864967346, |
|
"rewards/rejected": -0.2571496367454529, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.716308337703199, |
|
"grad_norm": 2.400899470701997, |
|
"learning_rate": 3.9298874594592975e-06, |
|
"log_odds_chosen": 8.10938549041748, |
|
"log_odds_ratio": -0.016252661123871803, |
|
"logits/chosen": -2.807111978530884, |
|
"logits/rejected": -2.915724515914917, |
|
"logps/chosen": -0.15417781472206116, |
|
"logps/rejected": -6.080683708190918, |
|
"loss": 0.1163, |
|
"nll_loss": 0.11585485935211182, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007708890829235315, |
|
"rewards/margins": 0.2963252663612366, |
|
"rewards/rejected": -0.30403420329093933, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.7267960146827477, |
|
"grad_norm": 3.318597907364317, |
|
"learning_rate": 3.922322702763682e-06, |
|
"log_odds_chosen": 8.183881759643555, |
|
"log_odds_ratio": -0.021557733416557312, |
|
"logits/chosen": -2.8544585704803467, |
|
"logits/rejected": -2.9738879203796387, |
|
"logps/chosen": -0.14029571413993835, |
|
"logps/rejected": -6.104724884033203, |
|
"loss": 0.1238, |
|
"nll_loss": 0.11269497871398926, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007014785893261433, |
|
"rewards/margins": 0.2982214391231537, |
|
"rewards/rejected": -0.30523625016212463, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.7267960146827477, |
|
"eval_log_odds_chosen": 1.6673794984817505, |
|
"eval_log_odds_ratio": -1.6934312582015991, |
|
"eval_logits/chosen": -2.9804697036743164, |
|
"eval_logits/rejected": -2.996739387512207, |
|
"eval_logps/chosen": -6.072526454925537, |
|
"eval_logps/rejected": -7.644432067871094, |
|
"eval_loss": 2.6922054290771484, |
|
"eval_nll_loss": 2.6498186588287354, |
|
"eval_rewards/accuracies": 0.5873016119003296, |
|
"eval_rewards/chosen": -0.30362632870674133, |
|
"eval_rewards/margins": 0.07859525829553604, |
|
"eval_rewards/rejected": -0.38222160935401917, |
|
"eval_runtime": 136.8599, |
|
"eval_samples_per_second": 14.57, |
|
"eval_steps_per_second": 0.46, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.737283691662297, |
|
"grad_norm": 2.23878079697452, |
|
"learning_rate": 3.914801463431357e-06, |
|
"log_odds_chosen": 7.083222389221191, |
|
"log_odds_ratio": -0.02951228991150856, |
|
"logits/chosen": -2.8593714237213135, |
|
"logits/rejected": -2.9374592304229736, |
|
"logps/chosen": -0.14687521755695343, |
|
"logps/rejected": -5.056353569030762, |
|
"loss": 0.1245, |
|
"nll_loss": 0.11392644792795181, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007343760691583157, |
|
"rewards/margins": 0.24547390639781952, |
|
"rewards/rejected": -0.25281769037246704, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.7477713686418457, |
|
"grad_norm": 3.0293992863459636, |
|
"learning_rate": 3.907323325822818e-06, |
|
"log_odds_chosen": 5.10004997253418, |
|
"log_odds_ratio": -0.032727014273405075, |
|
"logits/chosen": -2.780730962753296, |
|
"logits/rejected": -2.8234589099884033, |
|
"logps/chosen": -0.14557409286499023, |
|
"logps/rejected": -3.112699031829834, |
|
"loss": 0.1196, |
|
"nll_loss": 0.1244465708732605, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007278704084455967, |
|
"rewards/margins": 0.14835625886917114, |
|
"rewards/rejected": -0.15563495457172394, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.758259045621395, |
|
"grad_norm": 2.2549688272537094, |
|
"learning_rate": 3.8998878798351596e-06, |
|
"log_odds_chosen": 5.7140727043151855, |
|
"log_odds_ratio": -0.026816044002771378, |
|
"logits/chosen": -2.864112377166748, |
|
"logits/rejected": -2.8956217765808105, |
|
"logps/chosen": -0.14010892808437347, |
|
"logps/rejected": -3.677777051925659, |
|
"loss": 0.1148, |
|
"nll_loss": 0.11140565574169159, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007005447056144476, |
|
"rewards/margins": 0.17688342928886414, |
|
"rewards/rejected": -0.18388888239860535, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.7687467226009437, |
|
"grad_norm": 2.3361581110737384, |
|
"learning_rate": 3.892494720807615e-06, |
|
"log_odds_chosen": 6.5437517166137695, |
|
"log_odds_ratio": -0.02287450060248375, |
|
"logits/chosen": -2.835170269012451, |
|
"logits/rejected": -2.904600143432617, |
|
"logps/chosen": -0.15383225679397583, |
|
"logps/rejected": -4.582453727722168, |
|
"loss": 0.1163, |
|
"nll_loss": 0.1210094466805458, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007691613398492336, |
|
"rewards/margins": 0.22143109142780304, |
|
"rewards/rejected": -0.22912268340587616, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.779234399580493, |
|
"grad_norm": 2.113727988806721, |
|
"learning_rate": 3.885143449429057e-06, |
|
"log_odds_chosen": 8.709664344787598, |
|
"log_odds_ratio": -0.01187268365174532, |
|
"logits/chosen": -2.8075308799743652, |
|
"logits/rejected": -2.8737902641296387, |
|
"logps/chosen": -0.15384691953659058, |
|
"logps/rejected": -6.678023338317871, |
|
"loss": 0.1126, |
|
"nll_loss": 0.11222463846206665, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007692346815019846, |
|
"rewards/margins": 0.32620885968208313, |
|
"rewards/rejected": -0.33390119671821594, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.789722076560042, |
|
"grad_norm": 2.1767794366513376, |
|
"learning_rate": 3.877833671647406e-06, |
|
"log_odds_chosen": 7.380768775939941, |
|
"log_odds_ratio": -0.028077024966478348, |
|
"logits/chosen": -2.793292999267578, |
|
"logits/rejected": -2.8911733627319336, |
|
"logps/chosen": -0.15328237414360046, |
|
"logps/rejected": -5.426938533782959, |
|
"loss": 0.1168, |
|
"nll_loss": 0.11543625593185425, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0076641179621219635, |
|
"rewards/margins": 0.26368287205696106, |
|
"rewards/rejected": -0.27134692668914795, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.800209753539591, |
|
"grad_norm": 2.256877035979117, |
|
"learning_rate": 3.870564998580918e-06, |
|
"log_odds_chosen": 8.639537811279297, |
|
"log_odds_ratio": -0.022679299116134644, |
|
"logits/chosen": -2.811685085296631, |
|
"logits/rejected": -2.9056103229522705, |
|
"logps/chosen": -0.15335455536842346, |
|
"logps/rejected": -6.6522955894470215, |
|
"loss": 0.1172, |
|
"nll_loss": 0.1345623880624771, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007667726371437311, |
|
"rewards/margins": 0.3249470591545105, |
|
"rewards/rejected": -0.3326147794723511, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.81069743051914, |
|
"grad_norm": 2.0730722454139485, |
|
"learning_rate": 3.863337046431279e-06, |
|
"log_odds_chosen": 6.9750657081604, |
|
"log_odds_ratio": -0.025320613756775856, |
|
"logits/chosen": -2.7947394847869873, |
|
"logits/rejected": -2.846017360687256, |
|
"logps/chosen": -0.13509753346443176, |
|
"logps/rejected": -4.8464508056640625, |
|
"loss": 0.1193, |
|
"nll_loss": 0.10888632386922836, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.006754877511411905, |
|
"rewards/margins": 0.23556765913963318, |
|
"rewards/rejected": -0.24232256412506104, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.821185107498689, |
|
"grad_norm": 1.9858072033613254, |
|
"learning_rate": 3.8561494363984955e-06, |
|
"log_odds_chosen": 9.771112442016602, |
|
"log_odds_ratio": -0.013731351122260094, |
|
"logits/chosen": -2.8062682151794434, |
|
"logits/rejected": -2.9753849506378174, |
|
"logps/chosen": -0.14906486868858337, |
|
"logps/rejected": -7.731194496154785, |
|
"loss": 0.1179, |
|
"nll_loss": 0.11920718103647232, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007453243248164654, |
|
"rewards/margins": 0.37910646200180054, |
|
"rewards/rejected": -0.38655975461006165, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.831672784478238, |
|
"grad_norm": 1.6847580595509726, |
|
"learning_rate": 3.849001794597506e-06, |
|
"log_odds_chosen": 7.8019118309021, |
|
"log_odds_ratio": -0.019792212173342705, |
|
"logits/chosen": -2.8470611572265625, |
|
"logits/rejected": -2.9447550773620605, |
|
"logps/chosen": -0.15314054489135742, |
|
"logps/rejected": -5.769678115844727, |
|
"loss": 0.1192, |
|
"nll_loss": 0.11755287647247314, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007657027803361416, |
|
"rewards/margins": 0.2808268666267395, |
|
"rewards/rejected": -0.2884839177131653, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.831672784478238, |
|
"eval_log_odds_chosen": 1.020140528678894, |
|
"eval_log_odds_ratio": -0.950748860836029, |
|
"eval_logits/chosen": -2.866152763366699, |
|
"eval_logits/rejected": -2.883617877960205, |
|
"eval_logps/chosen": -2.3778645992279053, |
|
"eval_logps/rejected": -3.2670860290527344, |
|
"eval_loss": 1.2390626668930054, |
|
"eval_nll_loss": 1.1910258531570435, |
|
"eval_rewards/accuracies": 0.625, |
|
"eval_rewards/chosen": -0.11889322102069855, |
|
"eval_rewards/margins": 0.04446107894182205, |
|
"eval_rewards/rejected": -0.16335429251194, |
|
"eval_runtime": 137.1045, |
|
"eval_samples_per_second": 14.544, |
|
"eval_steps_per_second": 0.46, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8421604614577873, |
|
"grad_norm": 2.227062658222717, |
|
"learning_rate": 3.841893751976493e-06, |
|
"log_odds_chosen": 6.429055690765381, |
|
"log_odds_ratio": -0.025566572323441505, |
|
"logits/chosen": -2.8230857849121094, |
|
"logits/rejected": -2.9232447147369385, |
|
"logps/chosen": -0.13817086815834045, |
|
"logps/rejected": -4.313010215759277, |
|
"loss": 0.1236, |
|
"nll_loss": 0.1359073519706726, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.006908542010933161, |
|
"rewards/margins": 0.20874197781085968, |
|
"rewards/rejected": -0.2156505137681961, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.852648138437336, |
|
"grad_norm": 2.108179677461151, |
|
"learning_rate": 3.834824944236852e-06, |
|
"log_odds_chosen": 7.687928676605225, |
|
"log_odds_ratio": -0.019871855154633522, |
|
"logits/chosen": -2.9058802127838135, |
|
"logits/rejected": -3.016103744506836, |
|
"logps/chosen": -0.15432411432266235, |
|
"logps/rejected": -5.692026615142822, |
|
"loss": 0.1226, |
|
"nll_loss": 0.12474212795495987, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0077162072993814945, |
|
"rewards/margins": 0.27688512206077576, |
|
"rewards/rejected": -0.2846013009548187, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.863135815416885, |
|
"grad_norm": 2.0852362976431627, |
|
"learning_rate": 3.827795011754764e-06, |
|
"log_odds_chosen": 7.531012058258057, |
|
"log_odds_ratio": -0.020183496177196503, |
|
"logits/chosen": -2.9127936363220215, |
|
"logits/rejected": -3.042579174041748, |
|
"logps/chosen": -0.1713821142911911, |
|
"logps/rejected": -5.637821197509766, |
|
"loss": 0.1192, |
|
"nll_loss": 0.1238013282418251, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.00856910552829504, |
|
"rewards/margins": 0.2733219265937805, |
|
"rewards/rejected": -0.2818910479545593, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.873623492396434, |
|
"grad_norm": 2.1240217329220727, |
|
"learning_rate": 3.8208035995043505e-06, |
|
"log_odds_chosen": 7.918447017669678, |
|
"log_odds_ratio": -0.016450051218271255, |
|
"logits/chosen": -2.9222500324249268, |
|
"logits/rejected": -3.0099682807922363, |
|
"logps/chosen": -0.16613063216209412, |
|
"logps/rejected": -5.923202037811279, |
|
"loss": 0.1167, |
|
"nll_loss": 0.11456701904535294, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.00830653216689825, |
|
"rewards/margins": 0.28785353899002075, |
|
"rewards/rejected": -0.2961600720882416, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.8841111693759833, |
|
"grad_norm": 31.79228564478535, |
|
"learning_rate": 3.8138503569823697e-06, |
|
"log_odds_chosen": 6.909941673278809, |
|
"log_odds_ratio": -0.009971695020794868, |
|
"logits/chosen": -2.913257598876953, |
|
"logits/rejected": -3.0123419761657715, |
|
"logps/chosen": -0.14221827685832977, |
|
"logps/rejected": -4.7533063888549805, |
|
"loss": 0.1366, |
|
"nll_loss": 0.12416551262140274, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007110914681106806, |
|
"rewards/margins": 0.23055438697338104, |
|
"rewards/rejected": -0.2376653254032135, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.894598846355532, |
|
"grad_norm": 1.9557051281290665, |
|
"learning_rate": 3.806934938134405e-06, |
|
"log_odds_chosen": 6.693169593811035, |
|
"log_odds_ratio": -0.02671411633491516, |
|
"logits/chosen": -2.8386614322662354, |
|
"logits/rejected": -2.913949966430664, |
|
"logps/chosen": -0.158113032579422, |
|
"logps/rejected": -4.6884589195251465, |
|
"loss": 0.1257, |
|
"nll_loss": 0.13248762488365173, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.007905651815235615, |
|
"rewards/margins": 0.22651728987693787, |
|
"rewards/rejected": -0.23442292213439941, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.9050865233350813, |
|
"grad_norm": 2.137070948069414, |
|
"learning_rate": 3.800057001282532e-06, |
|
"log_odds_chosen": 7.526410102844238, |
|
"log_odds_ratio": -0.018288953229784966, |
|
"logits/chosen": -2.8420822620391846, |
|
"logits/rejected": -2.9359934329986572, |
|
"logps/chosen": -0.13937655091285706, |
|
"logps/rejected": -5.3555192947387695, |
|
"loss": 0.1203, |
|
"nll_loss": 0.11602024734020233, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.00696882838383317, |
|
"rewards/margins": 0.2608071565628052, |
|
"rewards/rejected": -0.2677759826183319, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.91557420031463, |
|
"grad_norm": 1.9039164114563458, |
|
"learning_rate": 3.7932162090544085e-06, |
|
"log_odds_chosen": 8.005070686340332, |
|
"log_odds_ratio": -0.013831285759806633, |
|
"logits/chosen": -2.85080885887146, |
|
"logits/rejected": -2.9412410259246826, |
|
"logps/chosen": -0.14242660999298096, |
|
"logps/rejected": -5.835131645202637, |
|
"loss": 0.115, |
|
"nll_loss": 0.11129038035869598, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007121330592781305, |
|
"rewards/margins": 0.2846352159976959, |
|
"rewards/rejected": -0.2917565703392029, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.9260618772941793, |
|
"grad_norm": 1.9066238493747631, |
|
"learning_rate": 3.7864122283137657e-06, |
|
"log_odds_chosen": 8.59681510925293, |
|
"log_odds_ratio": -0.01634146459400654, |
|
"logits/chosen": -2.811566114425659, |
|
"logits/rejected": -2.953697681427002, |
|
"logps/chosen": -0.1852981150150299, |
|
"logps/rejected": -6.696959495544434, |
|
"loss": 0.1237, |
|
"nll_loss": 0.13221383094787598, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.009264904074370861, |
|
"rewards/margins": 0.3255830705165863, |
|
"rewards/rejected": -0.33484798669815063, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.9365495542737285, |
|
"grad_norm": 2.1229204349942523, |
|
"learning_rate": 3.7796447300922724e-06, |
|
"log_odds_chosen": 8.886019706726074, |
|
"log_odds_ratio": -0.014133910648524761, |
|
"logits/chosen": -2.8244338035583496, |
|
"logits/rejected": -2.9361133575439453, |
|
"logps/chosen": -0.1553722470998764, |
|
"logps/rejected": -6.724435329437256, |
|
"loss": 0.1191, |
|
"nll_loss": 0.11856858432292938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007768611423671246, |
|
"rewards/margins": 0.3284532129764557, |
|
"rewards/rejected": -0.3362218141555786, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.9365495542737285, |
|
"eval_log_odds_chosen": 0.9868643283843994, |
|
"eval_log_odds_ratio": -0.8558183312416077, |
|
"eval_logits/chosen": -2.8059191703796387, |
|
"eval_logits/rejected": -2.8221092224121094, |
|
"eval_logps/chosen": -1.9523440599441528, |
|
"eval_logps/rejected": -2.7882232666015625, |
|
"eval_loss": 1.0213509798049927, |
|
"eval_nll_loss": 0.9673047065734863, |
|
"eval_rewards/accuracies": 0.6269841194152832, |
|
"eval_rewards/chosen": -0.09761719405651093, |
|
"eval_rewards/margins": 0.04179396852850914, |
|
"eval_rewards/rejected": -0.13941116631031036, |
|
"eval_runtime": 140.3646, |
|
"eval_samples_per_second": 14.206, |
|
"eval_steps_per_second": 0.449, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.9470372312532773, |
|
"grad_norm": 1.8098718147037927, |
|
"learning_rate": 3.772913389522725e-06, |
|
"log_odds_chosen": 7.045705318450928, |
|
"log_odds_ratio": -0.0264790840446949, |
|
"logits/chosen": -2.8278496265411377, |
|
"logits/rejected": -2.935941696166992, |
|
"logps/chosen": -0.16044145822525024, |
|
"logps/rejected": -5.10351037979126, |
|
"loss": 0.1197, |
|
"nll_loss": 0.11624834686517715, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.008022072724997997, |
|
"rewards/margins": 0.24715343117713928, |
|
"rewards/rejected": -0.25517550110816956, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.9575249082328265, |
|
"grad_norm": 1.8754542855362524, |
|
"learning_rate": 3.7662178857735478e-06, |
|
"log_odds_chosen": 8.025814056396484, |
|
"log_odds_ratio": -0.014746090397238731, |
|
"logits/chosen": -2.7981061935424805, |
|
"logits/rejected": -2.9223358631134033, |
|
"logps/chosen": -0.1609780192375183, |
|
"logps/rejected": -6.0790114402771, |
|
"loss": 0.1164, |
|
"nll_loss": 0.114871546626091, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.00804890040308237, |
|
"rewards/margins": 0.29590168595314026, |
|
"rewards/rejected": -0.30395060777664185, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.9680125852123753, |
|
"grad_norm": 2.270114335100112, |
|
"learning_rate": 3.7595579019845623e-06, |
|
"log_odds_chosen": 7.872386932373047, |
|
"log_odds_ratio": -0.01882219687104225, |
|
"logits/chosen": -2.8168020248413086, |
|
"logits/rejected": -2.900966167449951, |
|
"logps/chosen": -0.1528329849243164, |
|
"logps/rejected": -5.721396446228027, |
|
"loss": 0.117, |
|
"nll_loss": 0.1145024448633194, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.007641649339348078, |
|
"rewards/margins": 0.27842822670936584, |
|
"rewards/rejected": -0.2860698103904724, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.9785002621919245, |
|
"grad_norm": 2.2955550853318907, |
|
"learning_rate": 3.752933125204008e-06, |
|
"log_odds_chosen": 8.305427551269531, |
|
"log_odds_ratio": -0.02256721630692482, |
|
"logits/chosen": -2.8052284717559814, |
|
"logits/rejected": -2.9265544414520264, |
|
"logps/chosen": -0.13989822566509247, |
|
"logps/rejected": -6.217524528503418, |
|
"loss": 0.1182, |
|
"nll_loss": 0.12114028632640839, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.006994911935180426, |
|
"rewards/margins": 0.30388128757476807, |
|
"rewards/rejected": -0.31087619066238403, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.9889879391714738, |
|
"grad_norm": 1.888221991554896, |
|
"learning_rate": 3.7463432463267764e-06, |
|
"log_odds_chosen": 7.020120143890381, |
|
"log_odds_ratio": -0.01538365613669157, |
|
"logits/chosen": -2.8246865272521973, |
|
"logits/rejected": -2.9202027320861816, |
|
"logps/chosen": -0.16290083527565002, |
|
"logps/rejected": -4.992356777191162, |
|
"loss": 0.1252, |
|
"nll_loss": 0.14337727427482605, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.008145040832459927, |
|
"rewards/margins": 0.24147279560565948, |
|
"rewards/rejected": -0.24961784482002258, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.9984268484530676, |
|
"step": 2859, |
|
"total_flos": 0.0, |
|
"train_loss": 0.32389816019492534, |
|
"train_runtime": 62235.4926, |
|
"train_samples_per_second": 2.941, |
|
"train_steps_per_second": 0.046 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2859, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|