{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 10, "global_step": 2237, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004470272686633884, "grad_norm": 200.87217281502842, "learning_rate": 5.000000000000001e-07, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -2.589505195617676, "logits/rejected": -2.589505195617676, "logps/chosen": -2.1613333225250244, "logps/rejected": -2.1613333225250244, "loss": 3.4851, "nll_loss": 3.450399875640869, "rewards/accuracies": 0.0, "rewards/chosen": -0.10806665569543839, "rewards/margins": 0.0, "rewards/rejected": -0.10806665569543839, "step": 10 }, { "epoch": 0.004470272686633884, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -2.5523717403411865, "eval_logits/rejected": -2.5523717403411865, "eval_logps/chosen": -0.27682167291641235, "eval_logps/rejected": -0.27682167291641235, "eval_loss": 1.5240834951400757, "eval_nll_loss": 1.4894258975982666, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -0.01384108979254961, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -0.01384108979254961, "eval_runtime": 307.2493, "eval_samples_per_second": 58.236, "eval_steps_per_second": 1.823, "step": 10 }, { "epoch": 0.008940545373267769, "grad_norm": 35.74287350401742, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -2.1916913986206055, "logits/rejected": -2.1916913986206055, "logps/chosen": -0.04086422920227051, "logps/rejected": -0.04086422920227051, "loss": 1.086, "nll_loss": 1.0513627529144287, "rewards/accuracies": 0.0, "rewards/chosen": -0.002043211366981268, "rewards/margins": 0.0, "rewards/rejected": -0.002043211366981268, "step": 20 }, { "epoch": 0.008940545373267769, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.7726595401763916, "eval_logits/rejected": -1.7726595401763916, "eval_logps/chosen": -6.101293365645688e-06, "eval_logps/rejected": -6.101293365645688e-06, "eval_loss": 0.946562647819519, "eval_nll_loss": 0.9119052886962891, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -3.050646739666263e-07, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -3.050646739666263e-07, "eval_runtime": 307.2712, "eval_samples_per_second": 58.232, "eval_steps_per_second": 1.822, "step": 20 }, { "epoch": 0.013410818059901655, "grad_norm": 35.233323872959666, "learning_rate": 1.5e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.6510288715362549, "logits/rejected": -1.6510288715362549, "logps/chosen": -8.857093234837521e-06, "logps/rejected": -8.857093234837521e-06, "loss": 0.9087, "nll_loss": 0.8740367889404297, "rewards/accuracies": 0.0, "rewards/chosen": -4.4285465605753416e-07, "rewards/margins": 0.0, "rewards/rejected": -4.4285465605753416e-07, "step": 30 }, { "epoch": 0.013410818059901655, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.5711654424667358, "eval_logits/rejected": -1.5711654424667358, "eval_logps/chosen": -1.556159622850828e-05, "eval_logps/rejected": -1.556159622850828e-05, "eval_loss": 0.8375607132911682, "eval_nll_loss": 0.8029031753540039, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -7.780799933243543e-07, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -7.780799933243543e-07, "eval_runtime": 307.3414, "eval_samples_per_second": 58.219, "eval_steps_per_second": 1.822, "step": 30 }, { "epoch": 0.017881090746535537, "grad_norm": 38.04594247051011, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.567310094833374, "logits/rejected": -1.567310094833374, "logps/chosen": -5.675311058439547e-06, "logps/rejected": -5.675311058439547e-06, "loss": 0.7774, "nll_loss": 0.742705225944519, "rewards/accuracies": 0.0, "rewards/chosen": -2.837655870280287e-07, "rewards/margins": 0.0, "rewards/rejected": -2.837655870280287e-07, "step": 40 }, { "epoch": 0.017881090746535537, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.5837194919586182, "eval_logits/rejected": -1.5837194919586182, "eval_logps/chosen": -4.9416635192756075e-06, "eval_logps/rejected": -4.9416635192756075e-06, "eval_loss": 0.691165030002594, "eval_nll_loss": 0.6565076112747192, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -2.4708319301680604e-07, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -2.4708319301680604e-07, "eval_runtime": 307.3287, "eval_samples_per_second": 58.221, "eval_steps_per_second": 1.822, "step": 40 }, { "epoch": 0.022351363433169423, "grad_norm": 244.0980901004834, "learning_rate": 2.5e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.5995845794677734, "logits/rejected": -1.5995845794677734, "logps/chosen": -7.533667030656943e-06, "logps/rejected": -7.533667030656943e-06, "loss": 0.5426, "nll_loss": 0.507915198802948, "rewards/accuracies": 0.0, "rewards/chosen": -3.7668331742679584e-07, "rewards/margins": 0.0, "rewards/rejected": -3.7668331742679584e-07, "step": 50 }, { "epoch": 0.022351363433169423, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.5342867374420166, "eval_logits/rejected": -1.5342867374420166, "eval_logps/chosen": -4.649061338568572e-06, "eval_logps/rejected": -4.649061338568572e-06, "eval_loss": 0.1590723991394043, "eval_nll_loss": 0.12441505491733551, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -2.32453146509215e-07, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -2.32453146509215e-07, "eval_runtime": 307.2894, "eval_samples_per_second": 58.229, "eval_steps_per_second": 1.822, "step": 50 }, { "epoch": 0.02682163611980331, "grad_norm": 742.9836173119904, "learning_rate": 3e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.0592808723449707, "logits/rejected": -1.0592808723449707, "logps/chosen": -0.646319568157196, "logps/rejected": -0.646319568157196, "loss": 0.4926, "nll_loss": 0.4579242765903473, "rewards/accuracies": 0.0, "rewards/chosen": -0.03231597691774368, "rewards/margins": 0.0, "rewards/rejected": -0.03231597691774368, "step": 60 }, { "epoch": 0.02682163611980331, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -2.1701815128326416, "eval_logits/rejected": -2.1701815128326416, "eval_logps/chosen": -0.049711961299180984, "eval_logps/rejected": -0.049711961299180984, "eval_loss": 0.07283048331737518, "eval_nll_loss": 0.0381731316447258, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -0.0024855986703187227, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -0.0024855986703187227, "eval_runtime": 307.3188, "eval_samples_per_second": 58.223, "eval_steps_per_second": 1.822, "step": 60 }, { "epoch": 0.031291908806437195, "grad_norm": 36.939547099581965, "learning_rate": 3.5e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -2.109687328338623, "logits/rejected": -2.109687328338623, "logps/chosen": -0.4633702337741852, "logps/rejected": -0.4633702337741852, "loss": 0.3784, "nll_loss": 0.34376633167266846, "rewards/accuracies": 0.0, "rewards/chosen": -0.02316850982606411, "rewards/margins": 0.0, "rewards/rejected": -0.02316850982606411, "step": 70 }, { "epoch": 0.031291908806437195, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.8683053255081177, "eval_logits/rejected": -1.8683053255081177, "eval_logps/chosen": -0.0014687292277812958, "eval_logps/rejected": -0.0014687292277812958, "eval_loss": 0.037432197481393814, "eval_nll_loss": 0.0027748411521315575, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -7.343645120272413e-05, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -7.343645120272413e-05, "eval_runtime": 307.334, "eval_samples_per_second": 58.22, "eval_steps_per_second": 1.822, "step": 70 }, { "epoch": 0.035762181493071074, "grad_norm": 8.17885090561578, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.9282808303833008, "logits/rejected": -1.9282808303833008, "logps/chosen": -0.1062564104795456, "logps/rejected": -0.1062564104795456, "loss": 0.1081, "nll_loss": 0.0734890028834343, "rewards/accuracies": 0.0, "rewards/chosen": -0.005312820430845022, "rewards/margins": 0.0, "rewards/rejected": -0.005312820430845022, "step": 80 }, { "epoch": 0.035762181493071074, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.9697402715682983, "eval_logits/rejected": -1.9697402715682983, "eval_logps/chosen": -1.1368107152520679e-05, "eval_logps/rejected": -1.1368107152520679e-05, "eval_loss": 0.03469717875123024, "eval_nll_loss": 3.982333146268502e-05, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.684053689947177e-07, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.684053689947177e-07, "eval_runtime": 307.2922, "eval_samples_per_second": 58.228, "eval_steps_per_second": 1.822, "step": 80 }, { "epoch": 0.04023245417970496, "grad_norm": 3.100931395880866, "learning_rate": 4.5e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -2.24125075340271, "logits/rejected": -2.24125075340271, "logps/chosen": -0.265648752450943, "logps/rejected": -0.265648752450943, "loss": 0.2173, "nll_loss": 0.18264153599739075, "rewards/accuracies": 0.0, "rewards/chosen": -0.013282437808811665, "rewards/margins": 0.0, "rewards/rejected": -0.013282437808811665, "step": 90 }, { "epoch": 0.04023245417970496, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -2.7805023193359375, "eval_logits/rejected": -2.7805023193359375, "eval_logps/chosen": -0.6984005570411682, "eval_logps/rejected": -0.6984005570411682, "eval_loss": 0.5148123502731323, "eval_nll_loss": 0.4801549017429352, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -0.03492003679275513, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -0.03492003679275513, "eval_runtime": 307.3525, "eval_samples_per_second": 58.217, "eval_steps_per_second": 1.822, "step": 90 }, { "epoch": 0.044702726866338846, "grad_norm": 0.708612815644015, "learning_rate": 5e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -2.5113797187805176, "logits/rejected": -2.5113797187805176, "logps/chosen": -0.07246340066194534, "logps/rejected": -0.07246340066194534, "loss": 0.0845, "nll_loss": 0.04982428997755051, "rewards/accuracies": 0.0, "rewards/chosen": -0.003623170079663396, "rewards/margins": 0.0, "rewards/rejected": -0.003623170079663396, "step": 100 }, { "epoch": 0.044702726866338846, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -2.2378087043762207, "eval_logits/rejected": -2.2378087043762207, "eval_logps/chosen": -0.0317058339715004, "eval_logps/rejected": -0.0317058339715004, "eval_loss": 0.05645650252699852, "eval_nll_loss": 0.021799137815833092, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -0.0015852916985750198, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -0.0015852916985750198, "eval_runtime": 307.3355, "eval_samples_per_second": 58.22, "eval_steps_per_second": 1.822, "step": 100 }, { "epoch": 0.04917299955297273, "grad_norm": 327.5861555766306, "learning_rate": 4.767312946227961e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -2.459609031677246, "logits/rejected": -2.459609031677246, "logps/chosen": -0.1411500871181488, "logps/rejected": -0.1411500871181488, "loss": 0.1317, "nll_loss": 0.09704854339361191, "rewards/accuracies": 0.0, "rewards/chosen": -0.007057504262775183, "rewards/margins": 0.0, "rewards/rejected": -0.007057504262775183, "step": 110 }, { "epoch": 0.04917299955297273, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -2.4884233474731445, "eval_logits/rejected": -2.4884233474731445, "eval_logps/chosen": -1.179061109723989e-05, "eval_logps/rejected": -1.179061109723989e-05, "eval_loss": 0.03466625139117241, "eval_nll_loss": 8.895804057829082e-06, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.895305434933107e-07, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.895305434933107e-07, "eval_runtime": 308.2975, "eval_samples_per_second": 58.038, "eval_steps_per_second": 1.816, "step": 110 }, { "epoch": 0.05364327223960662, "grad_norm": 855.5753312971198, "learning_rate": 4.564354645876385e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -2.3581442832946777, "logits/rejected": -2.3581442832946777, "logps/chosen": -0.2550828456878662, "logps/rejected": -0.2550828456878662, "loss": 0.21, "nll_loss": 0.1753716617822647, "rewards/accuracies": 0.0, "rewards/chosen": -0.01275414414703846, "rewards/margins": 0.0, "rewards/rejected": -0.01275414414703846, "step": 120 }, { "epoch": 0.05364327223960662, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -2.1373255252838135, "eval_logits/rejected": -2.1373255252838135, "eval_logps/chosen": -0.00023682457685936242, "eval_logps/rejected": -0.00023682457685936242, "eval_loss": 0.034821655601263046, "eval_nll_loss": 0.00016429205425083637, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -1.184122811537236e-05, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -1.184122811537236e-05, "eval_runtime": 307.3399, "eval_samples_per_second": 58.219, "eval_steps_per_second": 1.822, "step": 120 }, { "epoch": 0.058113544926240504, "grad_norm": 0.1670814802188449, "learning_rate": 4.385290096535147e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.8926069736480713, "logits/rejected": -1.8926069736480713, "logps/chosen": -0.07454721629619598, "logps/rejected": -0.07454721629619598, "loss": 0.0859, "nll_loss": 0.05125713348388672, "rewards/accuracies": 0.0, "rewards/chosen": -0.0037273610942065716, "rewards/margins": 0.0, "rewards/rejected": -0.0037273610942065716, "step": 130 }, { "epoch": 0.058113544926240504, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.857692837715149, "eval_logits/rejected": -1.857692837715149, "eval_logps/chosen": -2.4058474537014263e-06, "eval_logps/rejected": -2.4058474537014263e-06, "eval_loss": 0.03466090187430382, "eval_nll_loss": 3.539007138897432e-06, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -1.2029238405375509e-07, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -1.2029238405375509e-07, "eval_runtime": 307.2975, "eval_samples_per_second": 58.227, "eval_steps_per_second": 1.822, "step": 130 }, { "epoch": 0.06258381761287439, "grad_norm": 0.06453300264388455, "learning_rate": 4.2257712736425835e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.854994773864746, "logits/rejected": -1.854994773864746, "logps/chosen": -1.4180723155732267e-05, "logps/rejected": -1.4180723155732267e-05, "loss": 0.0347, "nll_loss": 1.0921966349997092e-05, "rewards/accuracies": 0.0, "rewards/chosen": -7.090361577866133e-07, "rewards/margins": 0.0, "rewards/rejected": -7.090361577866133e-07, "step": 140 }, { "epoch": 0.06258381761287439, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.8251653909683228, "eval_logits/rejected": -1.8251653909683228, "eval_logps/chosen": -0.00015634606825187802, "eval_logps/rejected": -0.00015634606825187802, "eval_loss": 0.03476560115814209, "eval_nll_loss": 0.00010824044147739187, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -7.817303412593901e-06, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -7.817303412593901e-06, "eval_runtime": 307.2194, "eval_samples_per_second": 58.242, "eval_steps_per_second": 1.823, "step": 140 }, { "epoch": 0.06705409029950828, "grad_norm": 0.20774379926153874, "learning_rate": 4.082482904638631e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.847884178161621, "logits/rejected": -1.847884178161621, "logps/chosen": -0.021540921181440353, "logps/rejected": -0.021540921181440353, "loss": 0.0495, "nll_loss": 0.014810365624725819, "rewards/accuracies": 0.0, "rewards/chosen": -0.0010770460357889533, "rewards/margins": 0.0, "rewards/rejected": -0.0010770460357889533, "step": 150 }, { "epoch": 0.06705409029950828, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.850500464439392, "eval_logits/rejected": -1.850500464439392, "eval_logps/chosen": -2.1336307327146642e-05, "eval_logps/rejected": -2.1336307327146642e-05, "eval_loss": 0.034673456102609634, "eval_nll_loss": 1.6099216736620292e-05, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -1.0668153436199646e-06, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -1.0668153436199646e-06, "eval_runtime": 307.2824, "eval_samples_per_second": 58.23, "eval_steps_per_second": 1.822, "step": 150 }, { "epoch": 0.07152436298614215, "grad_norm": 0.4385089216707075, "learning_rate": 3.952847075210474e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.9034173488616943, "logits/rejected": -1.9034173488616943, "logps/chosen": -2.675616997294128e-05, "logps/rejected": -2.675616997294128e-05, "loss": 0.0347, "nll_loss": 1.960856025107205e-05, "rewards/accuracies": 0.0, "rewards/chosen": -1.337808498647064e-06, "rewards/margins": 0.0, "rewards/rejected": -1.337808498647064e-06, "step": 160 }, { "epoch": 0.07152436298614215, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.9784146547317505, "eval_logits/rejected": -1.9784146547317505, "eval_logps/chosen": -1.380585763399722e-05, "eval_logps/rejected": -1.380585763399722e-05, "eval_loss": 0.03466769680380821, "eval_nll_loss": 1.0333444151910953e-05, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -6.902928930685448e-07, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -6.902928930685448e-07, "eval_runtime": 307.2592, "eval_samples_per_second": 58.234, "eval_steps_per_second": 1.823, "step": 160 }, { "epoch": 0.07599463567277603, "grad_norm": 0.4184321814624391, "learning_rate": 3.834824944236852e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.8877710103988647, "logits/rejected": -1.8877710103988647, "logps/chosen": -1.3126833437127061e-05, "logps/rejected": -1.3126833437127061e-05, "loss": 0.0347, "nll_loss": 9.645330464991275e-06, "rewards/accuracies": 0.0, "rewards/chosen": -6.563416832250368e-07, "rewards/margins": 0.0, "rewards/rejected": -6.563416832250368e-07, "step": 170 }, { "epoch": 0.07599463567277603, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.7608203887939453, "eval_logits/rejected": -1.7608203887939453, "eval_logps/chosen": -2.7526366466190666e-06, "eval_logps/rejected": -2.7526366466190666e-06, "eval_loss": 0.03465956076979637, "eval_nll_loss": 2.1979110442771344e-06, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -1.3763180106707296e-07, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -1.3763180106707296e-07, "eval_runtime": 307.2297, "eval_samples_per_second": 58.24, "eval_steps_per_second": 1.823, "step": 170 }, { "epoch": 0.08046490835940992, "grad_norm": 0.038632251509525706, "learning_rate": 3.72677996249965e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.6208105087280273, "logits/rejected": -1.6208105087280273, "logps/chosen": -3.7441983295138925e-05, "logps/rejected": -3.7441983295138925e-05, "loss": 0.0347, "nll_loss": 2.5923154680640437e-05, "rewards/accuracies": 0.0, "rewards/chosen": -1.8720990055953735e-06, "rewards/margins": 0.0, "rewards/rejected": -1.8720990055953735e-06, "step": 180 }, { "epoch": 0.08046490835940992, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.5160672664642334, "eval_logits/rejected": -1.5160672664642334, "eval_logps/chosen": -5.201853809921886e-07, "eval_logps/rejected": -5.201853809921886e-07, "eval_loss": 0.03465788811445236, "eval_nll_loss": 5.215401870373171e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -2.6009265852167118e-08, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -2.6009265852167118e-08, "eval_runtime": 307.3109, "eval_samples_per_second": 58.224, "eval_steps_per_second": 1.822, "step": 180 }, { "epoch": 0.0849351810460438, "grad_norm": 0.03669320934920467, "learning_rate": 3.6273812505500587e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.4880715608596802, "logits/rejected": -1.4880715608596802, "logps/chosen": -4.2265060073987115e-07, "logps/rejected": -4.2265060073987115e-07, "loss": 0.0347, "nll_loss": 5.044038289270247e-07, "rewards/accuracies": 0.0, "rewards/chosen": -2.1132532168621765e-08, "rewards/margins": 0.0, "rewards/rejected": -2.1132532168621765e-08, "step": 190 }, { "epoch": 0.0849351810460438, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.4645824432373047, "eval_logits/rejected": -1.4645824432373047, "eval_logps/chosen": -2.9260445444379e-07, "eval_logps/rejected": -2.9260445444379e-07, "eval_loss": 0.03465784341096878, "eval_nll_loss": 4.917378646496218e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -1.4630221478739713e-08, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -1.4630221478739713e-08, "eval_runtime": 307.3254, "eval_samples_per_second": 58.222, "eval_steps_per_second": 1.822, "step": 190 }, { "epoch": 0.08940545373267769, "grad_norm": 0.012980292617960867, "learning_rate": 3.5355339059327378e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.460682988166809, "logits/rejected": -1.460682988166809, "logps/chosen": -2.5575798190402566e-07, "logps/rejected": -2.5575798190402566e-07, "loss": 0.0347, "nll_loss": 5.394212507781049e-07, "rewards/accuracies": 0.0, "rewards/chosen": -1.2787898739929915e-08, "rewards/margins": 0.0, "rewards/rejected": -1.2787898739929915e-08, "step": 200 }, { "epoch": 0.08940545373267769, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.4564566612243652, "eval_logits/rejected": -1.4564566612243652, "eval_logps/chosen": -2.0590688620814035e-07, "eval_logps/rejected": -2.0590688620814035e-07, "eval_loss": 0.034657903015613556, "eval_nll_loss": 5.513420546776615e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -1.0295342356414494e-08, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -1.0295342356414494e-08, "eval_runtime": 307.2465, "eval_samples_per_second": 58.237, "eval_steps_per_second": 1.823, "step": 200 }, { "epoch": 0.09387572641931158, "grad_norm": 0.02641498837884943, "learning_rate": 3.450327796711771e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.4444046020507812, "logits/rejected": -1.4444046020507812, "logps/chosen": -2.2758126760891173e-07, "logps/rejected": -2.2758126760891173e-07, "loss": 0.0347, "nll_loss": 4.954629844178271e-07, "rewards/accuracies": 0.0, "rewards/chosen": -1.1379063913352638e-08, "rewards/margins": 0.0, "rewards/rejected": -1.1379063913352638e-08, "step": 210 }, { "epoch": 0.09387572641931158, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.4376832246780396, "eval_logits/rejected": -1.4376832246780396, "eval_logps/chosen": -2.2758125339805702e-07, "eval_logps/rejected": -2.2758125339805702e-07, "eval_loss": 0.034657806158065796, "eval_nll_loss": 4.4703440948978823e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -1.1379063025174219e-08, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -1.1379063025174219e-08, "eval_runtime": 307.3275, "eval_samples_per_second": 58.221, "eval_steps_per_second": 1.822, "step": 210 }, { "epoch": 0.09834599910594546, "grad_norm": 0.03304538141719999, "learning_rate": 3.3709993123162106e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.4320012331008911, "logits/rejected": -1.4320012331008911, "logps/chosen": -2.449207840982126e-07, "logps/rejected": -2.449207840982126e-07, "loss": 0.0347, "nll_loss": 4.1574213582862285e-07, "rewards/accuracies": 0.0, "rewards/chosen": -1.2246038849639262e-08, "rewards/margins": 0.0, "rewards/rejected": -1.2246038849639262e-08, "step": 220 }, { "epoch": 0.09834599910594546, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.4223650693893433, "eval_logits/rejected": -1.4223650693893433, "eval_logps/chosen": -2.492556632205378e-07, "eval_logps/rejected": -2.492556632205378e-07, "eval_loss": 0.03465771675109863, "eval_nll_loss": 3.5017714594687277e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -1.2462780141220264e-08, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -1.2462780141220264e-08, "eval_runtime": 307.3295, "eval_samples_per_second": 58.221, "eval_steps_per_second": 1.822, "step": 220 }, { "epoch": 0.10281627179257935, "grad_norm": 0.044286350524316676, "learning_rate": 3.296902366978936e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.4200690984725952, "logits/rejected": -1.4200690984725952, "logps/chosen": -2.698463390515826e-07, "logps/rejected": -2.698463390515826e-07, "loss": 0.0347, "nll_loss": 3.3378583452758903e-07, "rewards/accuracies": 0.0, "rewards/chosen": -1.3492316597307763e-08, "rewards/margins": 0.0, "rewards/rejected": -1.3492316597307763e-08, "step": 230 }, { "epoch": 0.10281627179257935, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.413095474243164, "eval_logits/rejected": -1.413095474243164, "eval_logps/chosen": -2.7093008725387335e-07, "eval_logps/rejected": -2.7093008725387335e-07, "eval_loss": 0.034657664597034454, "eval_nll_loss": 3.1292435664909135e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -1.3546501698158409e-08, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -1.3546501698158409e-08, "eval_runtime": 307.2385, "eval_samples_per_second": 58.238, "eval_steps_per_second": 1.823, "step": 230 }, { "epoch": 0.10728654447921324, "grad_norm": 0.04353666797671452, "learning_rate": 3.2274861218395142e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.4168875217437744, "logits/rejected": -1.4168875217437744, "logps/chosen": -2.6984636747329205e-07, "logps/rejected": -2.6984636747329205e-07, "loss": 0.0347, "nll_loss": 3.1888473017716024e-07, "rewards/accuracies": 0.0, "rewards/chosen": -1.3492316597307763e-08, "rewards/margins": 0.0, "rewards/rejected": -1.3492316597307763e-08, "step": 240 }, { "epoch": 0.10728654447921324, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.415872573852539, "eval_logits/rejected": -1.415872573852539, "eval_logps/chosen": -2.8176731348139583e-07, "eval_logps/rejected": -2.8176731348139583e-07, "eval_loss": 0.034657686948776245, "eval_nll_loss": 3.278254041561013e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -1.4088360700270641e-08, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -1.4088360700270641e-08, "eval_runtime": 307.249, "eval_samples_per_second": 58.236, "eval_steps_per_second": 1.823, "step": 240 }, { "epoch": 0.11175681716584712, "grad_norm": 0.03917480482423638, "learning_rate": 3.1622776601683796e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.4259008169174194, "logits/rejected": -1.4259008169174194, "logps/chosen": -2.861021641820116e-07, "logps/rejected": -2.861021641820116e-07, "loss": 0.0347, "nll_loss": 3.2782546099952015e-07, "rewards/accuracies": 0.0, "rewards/chosen": -1.4305106432743742e-08, "rewards/margins": 0.0, "rewards/rejected": -1.4305106432743742e-08, "step": 250 }, { "epoch": 0.11175681716584712, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.4383741617202759, "eval_logits/rejected": -1.4383741617202759, "eval_logps/chosen": -2.8176731348139583e-07, "eval_logps/rejected": -2.8176731348139583e-07, "eval_loss": 0.034657690674066544, "eval_nll_loss": 3.3527609843986284e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -1.4088360700270641e-08, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -1.4088360700270641e-08, "eval_runtime": 307.2546, "eval_samples_per_second": 58.235, "eval_steps_per_second": 1.823, "step": 250 }, { "epoch": 0.11622708985248101, "grad_norm": 0.05198634530505512, "learning_rate": 3.1008683647302113e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.4493887424468994, "logits/rejected": -1.4493887424468994, "logps/chosen": -2.633440487898042e-07, "logps/rejected": -2.633440487898042e-07, "loss": 0.0347, "nll_loss": 3.524122860198986e-07, "rewards/accuracies": 0.0, "rewards/chosen": -1.316720421584705e-08, "rewards/margins": 0.0, "rewards/rejected": -1.316720421584705e-08, "step": 260 }, { "epoch": 0.11622708985248101, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.4592266082763672, "eval_logits/rejected": -1.4592266082763672, "eval_logps/chosen": -2.7093008725387335e-07, "eval_logps/rejected": -2.7093008725387335e-07, "eval_loss": 0.03465774282813072, "eval_nll_loss": 3.799794683345681e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -1.3546508803585766e-08, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -1.3546508803585766e-08, "eval_runtime": 307.2459, "eval_samples_per_second": 58.237, "eval_steps_per_second": 1.823, "step": 260 }, { "epoch": 0.12069736253911488, "grad_norm": 0.11820570469696125, "learning_rate": 3.0429030972509227e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.46332585811615, "logits/rejected": -1.46332585811615, "logps/chosen": -3.9230658899214177e-07, "logps/rejected": -3.9230658899214177e-07, "loss": 0.0347, "nll_loss": 4.783269105246291e-07, "rewards/accuracies": 0.0, "rewards/chosen": -1.9615329804878456e-08, "rewards/margins": 0.0, "rewards/rejected": -1.9615329804878456e-08, "step": 270 }, { "epoch": 0.12069736253911488, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.468156337738037, "eval_logits/rejected": -1.468156337738037, "eval_logps/chosen": -9.536715310787258e-07, "eval_logps/rejected": -9.536715310787258e-07, "eval_loss": 0.03465830534696579, "eval_nll_loss": 9.387706541019725e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -4.7683567316880726e-08, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -4.7683567316880726e-08, "eval_runtime": 307.3223, "eval_samples_per_second": 58.222, "eval_steps_per_second": 1.822, "step": 270 }, { "epoch": 0.12516763522574878, "grad_norm": 0.09515560200533464, "learning_rate": 2.988071523335984e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.4118568897247314, "logits/rejected": -1.4118568897247314, "logps/chosen": -0.00016646471340209246, "logps/rejected": -0.00016646471340209246, "loss": 0.0348, "nll_loss": 0.0001147976508946158, "rewards/accuracies": 0.0, "rewards/chosen": -8.323235306306742e-06, "rewards/margins": 0.0, "rewards/rejected": -8.323235306306742e-06, "step": 280 }, { "epoch": 0.12516763522574878, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.126119613647461, "eval_logits/rejected": -1.126119613647461, "eval_logps/chosen": -2.7093008725387335e-07, "eval_logps/rejected": -2.7093008725387335e-07, "eval_loss": 0.034658852964639664, "eval_nll_loss": 1.4975536259953515e-06, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -1.3546501698158409e-08, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -1.3546501698158409e-08, "eval_runtime": 307.2374, "eval_samples_per_second": 58.238, "eval_steps_per_second": 1.823, "step": 280 }, { "epoch": 0.12963790791238267, "grad_norm": 0.24950192922112446, "learning_rate": 2.9361010975735177e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.9670178294181824, "logits/rejected": -0.9670178294181824, "logps/chosen": -0.04551885277032852, "logps/rejected": -0.04551885277032852, "loss": 0.066, "nll_loss": 0.031298212707042694, "rewards/accuracies": 0.0, "rewards/chosen": -0.0022759425919502974, "rewards/margins": 0.0, "rewards/rejected": -0.0022759425919502974, "step": 290 }, { "epoch": 0.12963790791238267, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.908425509929657, "eval_logits/rejected": -0.908425509929657, "eval_logps/chosen": -6.848872544651385e-06, "eval_logps/rejected": -6.848872544651385e-06, "eval_loss": 0.034663498401641846, "eval_nll_loss": 6.1390978771669324e-06, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -3.42443740919407e-07, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -3.42443740919407e-07, "eval_runtime": 307.2693, "eval_samples_per_second": 58.232, "eval_steps_per_second": 1.823, "step": 290 }, { "epoch": 0.13410818059901655, "grad_norm": 0.03431844824411396, "learning_rate": 2.8867513459481293e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.8719849586486816, "logits/rejected": -0.8719849586486816, "logps/chosen": -1.4694925312142004e-06, "logps/rejected": -1.4694925312142004e-06, "loss": 0.0347, "nll_loss": 1.3790776165478746e-06, "rewards/accuracies": 0.0, "rewards/chosen": -7.347462371853908e-08, "rewards/margins": 0.0, "rewards/rejected": -7.347462371853908e-08, "step": 300 }, { "epoch": 0.13410818059901655, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.8419067859649658, "eval_logits/rejected": -0.8419067859649658, "eval_logps/chosen": -1.0837207042868613e-07, "eval_logps/rejected": -1.0837207042868613e-07, "eval_loss": 0.03465750813484192, "eval_nll_loss": 1.4901159772762185e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.418604231977042e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.418604231977042e-09, "eval_runtime": 307.3079, "eval_samples_per_second": 58.225, "eval_steps_per_second": 1.822, "step": 300 }, { "epoch": 0.13857845328565044, "grad_norm": 0.046084457187151866, "learning_rate": 2.839809171235324e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.8182867765426636, "logits/rejected": -0.8182867765426636, "logps/chosen": -1.1812553424306316e-07, "logps/rejected": -1.1812553424306316e-07, "loss": 0.0347, "nll_loss": 1.356005299157914e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.906276800971e-09, "rewards/margins": 0.0, "rewards/rejected": -5.906276800971e-09, "step": 310 }, { "epoch": 0.13857845328565044, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7935153245925903, "eval_logits/rejected": -0.7935153245925903, "eval_logps/chosen": -1.3004645893488487e-07, "eval_logps/rejected": -1.3004645893488487e-07, "eval_loss": 0.03465748578310013, "eval_nll_loss": 1.341104365337742e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -6.502324456647557e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -6.502324456647557e-09, "eval_runtime": 307.2897, "eval_samples_per_second": 58.228, "eval_steps_per_second": 1.822, "step": 310 }, { "epoch": 0.1430487259722843, "grad_norm": 0.026447763498169533, "learning_rate": 2.7950849718747376e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7777472734451294, "logits/rejected": -0.7777472734451294, "logps/chosen": -1.1704181446248185e-07, "logps/rejected": -1.1704181446248185e-07, "loss": 0.0347, "nll_loss": 1.25169719922269e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.852090811941935e-09, "rewards/margins": 0.0, "rewards/rejected": -5.852090811941935e-09, "step": 320 }, { "epoch": 0.1430487259722843, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7703178524971008, "eval_logits/rejected": -0.7703178524971008, "eval_logps/chosen": -9.753485130659101e-08, "eval_logps/rejected": -9.753485130659101e-08, "eval_loss": 0.03465747460722923, "eval_nll_loss": 1.1175869474300271e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -4.876741233061921e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -4.876741233061921e-09, "eval_runtime": 307.3055, "eval_samples_per_second": 58.225, "eval_steps_per_second": 1.822, "step": 320 }, { "epoch": 0.14751899865891818, "grad_norm": 0.024063594687914625, "learning_rate": 2.752409412815902e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7712096571922302, "logits/rejected": -0.7712096571922302, "logps/chosen": -1.1053950998984874e-07, "logps/rejected": -1.1053950998984874e-07, "loss": 0.0347, "nll_loss": 1.244246874421151e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.526974433678333e-09, "rewards/margins": 0.0, "rewards/rejected": -5.526974433678333e-09, "step": 330 }, { "epoch": 0.14751899865891818, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7746484875679016, "eval_logits/rejected": -0.7746484875679016, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465748578310013, "eval_nll_loss": 1.341104365337742e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960464122267695e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960464122267695e-09, "eval_runtime": 307.2334, "eval_samples_per_second": 58.239, "eval_steps_per_second": 1.823, "step": 330 }, { "epoch": 0.15198927134555207, "grad_norm": 0.020132595346862448, "learning_rate": 2.711630722733202e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7744746804237366, "logits/rejected": -0.7744746804237366, "logps/chosen": -1.1379066933159265e-07, "logps/rejected": -1.1379066933159265e-07, "loss": 0.0347, "nll_loss": 1.2591478082413232e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.689532400765529e-09, "rewards/margins": 0.0, "rewards/rejected": -5.689532400765529e-09, "step": 340 }, { "epoch": 0.15198927134555207, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7745358347892761, "eval_logits/rejected": -0.7745358347892761, "eval_logps/chosen": -1.3004645893488487e-07, "eval_logps/rejected": -1.3004645893488487e-07, "eval_loss": 0.03465748578310013, "eval_nll_loss": 1.341104365337742e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -6.502324456647557e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -6.502324456647557e-09, "eval_runtime": 307.34, "eval_samples_per_second": 58.219, "eval_steps_per_second": 1.822, "step": 340 }, { "epoch": 0.15645954403218595, "grad_norm": 0.023269788619903833, "learning_rate": 2.6726124191242444e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.777168869972229, "logits/rejected": -0.777168869972229, "logps/chosen": -1.192092469182171e-07, "logps/rejected": -1.192092469182171e-07, "loss": 0.0347, "nll_loss": 1.2665984172599565e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.960462345910855e-09, "rewards/margins": 0.0, "rewards/rejected": -5.960462345910855e-09, "step": 350 }, { "epoch": 0.15645954403218595, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7768326997756958, "eval_logits/rejected": -0.7768326997756958, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465748205780983, "eval_nll_loss": 1.2665985593685036e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.3208, "eval_samples_per_second": 58.223, "eval_steps_per_second": 1.822, "step": 350 }, { "epoch": 0.16092981671881984, "grad_norm": 0.0223925116848789, "learning_rate": 2.6352313834736496e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7769736647605896, "logits/rejected": -0.7769736647605896, "logps/chosen": -1.1487436779589189e-07, "logps/rejected": -1.1487436779589189e-07, "loss": 0.0347, "nll_loss": 1.1697407842348184e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.743718389794594e-09, "rewards/margins": 0.0, "rewards/rejected": -5.743718389794594e-09, "step": 360 }, { "epoch": 0.16092981671881984, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7752794027328491, "eval_logits/rejected": -0.7752794027328491, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465747833251953, "eval_nll_loss": 1.1920927533992653e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.3143, "eval_samples_per_second": 58.224, "eval_steps_per_second": 1.822, "step": 360 }, { "epoch": 0.16540008940545373, "grad_norm": 0.023199535123816478, "learning_rate": 2.599376224550182e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7756190299987793, "logits/rejected": -0.7756190299987793, "logps/chosen": -1.192092469182171e-07, "logps/rejected": -1.192092469182171e-07, "loss": 0.0347, "nll_loss": 1.192092469182171e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.960462345910855e-09, "rewards/margins": 0.0, "rewards/rejected": -5.960462345910855e-09, "step": 370 }, { "epoch": 0.16540008940545373, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7760257720947266, "eval_logits/rejected": -0.7760257720947266, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465747460722923, "eval_nll_loss": 1.1175869474300271e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.2373, "eval_samples_per_second": 58.238, "eval_steps_per_second": 1.823, "step": 370 }, { "epoch": 0.1698703620920876, "grad_norm": 0.022797710054038613, "learning_rate": 2.564945880212886e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7724426984786987, "logits/rejected": -0.7724426984786987, "logps/chosen": -1.192092469182171e-07, "logps/rejected": -1.192092469182171e-07, "loss": 0.0347, "nll_loss": 1.1771913932534517e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.960462345910855e-09, "rewards/margins": 0.0, "rewards/rejected": -5.960462345910855e-09, "step": 380 }, { "epoch": 0.1698703620920876, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7687971591949463, "eval_logits/rejected": -0.7687971591949463, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465747833251953, "eval_nll_loss": 1.1920927533992653e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.2189, "eval_samples_per_second": 58.242, "eval_steps_per_second": 1.823, "step": 380 }, { "epoch": 0.1743406347787215, "grad_norm": 0.02330401700584419, "learning_rate": 2.5318484177091667e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7696870565414429, "logits/rejected": -0.7696870565414429, "logps/chosen": -1.192092469182171e-07, "logps/rejected": -1.192092469182171e-07, "loss": 0.0347, "nll_loss": 1.2293455142753373e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.960462345910855e-09, "rewards/margins": 0.0, "rewards/rejected": -5.960462345910855e-09, "step": 390 }, { "epoch": 0.1743406347787215, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7687665224075317, "eval_logits/rejected": -0.7687665224075317, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465748205780983, "eval_nll_loss": 1.2665985593685036e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.3556, "eval_samples_per_second": 58.216, "eval_steps_per_second": 1.822, "step": 390 }, { "epoch": 0.17881090746535538, "grad_norm": 0.023191506116135168, "learning_rate": 2.5e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7678502798080444, "logits/rejected": -0.7678502798080444, "logps/chosen": -1.192092469182171e-07, "logps/rejected": -1.192092469182171e-07, "loss": 0.0347, "nll_loss": 1.25169719922269e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.960462345910855e-09, "rewards/margins": 0.0, "rewards/rejected": -5.960462345910855e-09, "step": 400 }, { "epoch": 0.17881090746535538, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7668212056159973, "eval_logits/rejected": -0.7668212056159973, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465748205780983, "eval_nll_loss": 1.2665985593685036e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.2727, "eval_samples_per_second": 58.232, "eval_steps_per_second": 1.822, "step": 400 }, { "epoch": 0.18328118015198927, "grad_norm": 0.023363022377614208, "learning_rate": 2.4693239916239746e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7651191353797913, "logits/rejected": -0.7651191353797913, "logps/chosen": -1.192092469182171e-07, "logps/rejected": -1.192092469182171e-07, "loss": 0.0347, "nll_loss": 1.25169719922269e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.960462345910855e-09, "rewards/margins": 0.0, "rewards/rejected": -5.960462345910855e-09, "step": 410 }, { "epoch": 0.18328118015198927, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7659606337547302, "eval_logits/rejected": -0.7659606337547302, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465747460722923, "eval_nll_loss": 1.1175869474300271e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.3455, "eval_samples_per_second": 58.218, "eval_steps_per_second": 1.822, "step": 410 }, { "epoch": 0.18775145283862316, "grad_norm": 0.023511625619021405, "learning_rate": 2.4397501823713327e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7650267481803894, "logits/rejected": -0.7650267481803894, "logps/chosen": -1.192092469182171e-07, "logps/rejected": -1.192092469182171e-07, "loss": 0.0347, "nll_loss": 1.2367959811854234e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.960462345910855e-09, "rewards/margins": 0.0, "rewards/rejected": -5.960462345910855e-09, "step": 420 }, { "epoch": 0.18775145283862316, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7655761241912842, "eval_logits/rejected": -0.7655761241912842, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465747833251953, "eval_nll_loss": 1.1920927533992653e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.2676, "eval_samples_per_second": 58.233, "eval_steps_per_second": 1.823, "step": 420 }, { "epoch": 0.19222172552525704, "grad_norm": 0.02330154160943623, "learning_rate": 2.411214110852061e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7662619352340698, "logits/rejected": -0.7662619352340698, "logps/chosen": -1.192092469182171e-07, "logps/rejected": -1.192092469182171e-07, "loss": 0.0347, "nll_loss": 1.1995430782008043e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.960462345910855e-09, "rewards/margins": 0.0, "rewards/rejected": -5.960462345910855e-09, "step": 430 }, { "epoch": 0.19222172552525704, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7679765820503235, "eval_logits/rejected": -0.7679765820503235, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465747460722923, "eval_nll_loss": 1.1175869474300271e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.2354, "eval_samples_per_second": 58.239, "eval_steps_per_second": 1.823, "step": 430 }, { "epoch": 0.19669199821189093, "grad_norm": 0.022797027310381853, "learning_rate": 2.3836564731139807e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7675051689147949, "logits/rejected": -0.7675051689147949, "logps/chosen": -1.192092469182171e-07, "logps/rejected": -1.192092469182171e-07, "loss": 0.0347, "nll_loss": 1.2293455142753373e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.960462345910855e-09, "rewards/margins": 0.0, "rewards/rejected": -5.960462345910855e-09, "step": 440 }, { "epoch": 0.19669199821189093, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7653347849845886, "eval_logits/rejected": -0.7653347849845886, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465748205780983, "eval_nll_loss": 1.2665985593685036e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.2951, "eval_samples_per_second": 58.227, "eval_steps_per_second": 1.822, "step": 440 }, { "epoch": 0.20116227089852481, "grad_norm": 0.023019572901179387, "learning_rate": 2.357022603955159e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7651574015617371, "logits/rejected": -0.7651574015617371, "logps/chosen": -1.192092469182171e-07, "logps/rejected": -1.192092469182171e-07, "loss": 0.0347, "nll_loss": 1.2665984172599565e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.960462345910855e-09, "rewards/margins": 0.0, "rewards/rejected": -5.960462345910855e-09, "step": 450 }, { "epoch": 0.20116227089852481, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7623587250709534, "eval_logits/rejected": -0.7623587250709534, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465748205780983, "eval_nll_loss": 1.2665985593685036e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.2612, "eval_samples_per_second": 58.234, "eval_steps_per_second": 1.823, "step": 450 }, { "epoch": 0.2056325435851587, "grad_norm": 0.0240935794512142, "learning_rate": 2.3312620206007847e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7617487907409668, "logits/rejected": -0.7617487907409668, "logps/chosen": -1.192092469182171e-07, "logps/rejected": -1.192092469182171e-07, "loss": 0.0347, "nll_loss": 1.3336534721020143e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.960462345910855e-09, "rewards/margins": 0.0, "rewards/rejected": -5.960462345910855e-09, "step": 460 }, { "epoch": 0.2056325435851587, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7611863613128662, "eval_logits/rejected": -0.7611863613128662, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465750440955162, "eval_nll_loss": 1.4156101713069802e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.3105, "eval_samples_per_second": 58.224, "eval_steps_per_second": 1.822, "step": 460 }, { "epoch": 0.2101028162717926, "grad_norm": 0.02327309664522326, "learning_rate": 2.3063280200722128e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7611461877822876, "logits/rejected": -0.7611461877822876, "logps/chosen": -1.192092469182171e-07, "logps/rejected": -1.192092469182171e-07, "loss": 0.0347, "nll_loss": 1.51991798702511e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.960462345910855e-09, "rewards/margins": 0.0, "rewards/rejected": -5.960462345910855e-09, "step": 470 }, { "epoch": 0.2101028162717926, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7622258067131042, "eval_logits/rejected": -0.7622258067131042, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465753048658371, "eval_nll_loss": 1.6391270207805064e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.2119, "eval_samples_per_second": 58.243, "eval_steps_per_second": 1.823, "step": 470 }, { "epoch": 0.21457308895842647, "grad_norm": 0.023299532688741614, "learning_rate": 2.2821773229381924e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7616379261016846, "logits/rejected": -0.7616379261016846, "logps/chosen": -1.192092469182171e-07, "logps/rejected": -1.192092469182171e-07, "loss": 0.0347, "nll_loss": 1.6763797816565784e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.960462345910855e-09, "rewards/margins": 0.0, "rewards/rejected": -5.960462345910855e-09, "step": 480 }, { "epoch": 0.21457308895842647, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7636578679084778, "eval_logits/rejected": -0.7636578679084778, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465753048658371, "eval_nll_loss": 1.6391270207805064e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.2264, "eval_samples_per_second": 58.24, "eval_steps_per_second": 1.823, "step": 480 }, { "epoch": 0.21904336164506036, "grad_norm": 0.023667881715919378, "learning_rate": 2.2587697572631284e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7620495557785034, "logits/rejected": -0.7620495557785034, "logps/chosen": -1.192092469182171e-07, "logps/rejected": -1.192092469182171e-07, "loss": 0.0347, "nll_loss": 1.6391268786719593e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.960462345910855e-09, "rewards/margins": 0.0, "rewards/rejected": -5.960462345910855e-09, "step": 490 }, { "epoch": 0.21904336164506036, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7611829042434692, "eval_logits/rejected": -0.7611829042434692, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465751186013222, "eval_nll_loss": 1.5646217832454568e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.2239, "eval_samples_per_second": 58.241, "eval_steps_per_second": 1.823, "step": 490 }, { "epoch": 0.22351363433169424, "grad_norm": 0.023427690242502415, "learning_rate": 2.23606797749979e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7625541687011719, "logits/rejected": -0.7625541687011719, "logps/chosen": -1.192092469182171e-07, "logps/rejected": -1.192092469182171e-07, "loss": 0.0347, "nll_loss": 1.5646214990283625e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.960462345910855e-09, "rewards/margins": 0.0, "rewards/rejected": -5.960462345910855e-09, "step": 500 }, { "epoch": 0.22351363433169424, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7622444033622742, "eval_logits/rejected": -0.7622444033622742, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465748578310013, "eval_nll_loss": 1.341104365337742e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.2023, "eval_samples_per_second": 58.245, "eval_steps_per_second": 1.823, "step": 500 }, { "epoch": 0.22798390701832813, "grad_norm": 0.023813059050390148, "learning_rate": 2.2140372138502386e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7629829049110413, "logits/rejected": -0.7629829049110413, "logps/chosen": -1.181255271376358e-07, "logps/rejected": -1.181255271376358e-07, "loss": 0.0347, "nll_loss": 1.3336536142105615e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.90627635688179e-09, "rewards/margins": 0.0, "rewards/rejected": -5.90627635688179e-09, "step": 510 }, { "epoch": 0.22798390701832813, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7672262787818909, "eval_logits/rejected": -0.7672262787818909, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465748578310013, "eval_nll_loss": 1.341104365337742e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.354, "eval_samples_per_second": 58.216, "eval_steps_per_second": 1.822, "step": 510 }, { "epoch": 0.23245417970496202, "grad_norm": 0.023778754619915025, "learning_rate": 2.1926450482675734e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7636716365814209, "logits/rejected": -0.7636716365814209, "logps/chosen": -1.192092469182171e-07, "logps/rejected": -1.192092469182171e-07, "loss": 0.0347, "nll_loss": 1.3411042232291948e-07, "rewards/accuracies": 0.0, "rewards/chosen": -5.960462345910855e-09, "rewards/margins": 0.0, "rewards/rejected": -5.960462345910855e-09, "step": 520 }, { "epoch": 0.23245417970496202, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7621346116065979, "eval_logits/rejected": -0.7621346116065979, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465748578310013, "eval_nll_loss": 1.341104365337742e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.3259, "eval_samples_per_second": 58.222, "eval_steps_per_second": 1.822, "step": 520 }, { "epoch": 0.23692445239159587, "grad_norm": 0.024300673901908782, "learning_rate": 2.1718612138153473e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7622011303901672, "logits/rejected": -0.7622011303901672, "logps/chosen": -1.2029298090965312e-07, "logps/rejected": -1.2029298090965312e-07, "loss": 0.0347, "nll_loss": 1.3485546901392809e-07, "rewards/accuracies": 0.0, "rewards/chosen": -6.0146483349399205e-09, "rewards/margins": 0.0, "rewards/rejected": -6.0146483349399205e-09, "step": 530 }, { "epoch": 0.23692445239159587, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7621825337409973, "eval_logits/rejected": -0.7621825337409973, "eval_logps/chosen": -1.1920927533992653e-07, "eval_logps/rejected": -1.1920927533992653e-07, "eval_loss": 0.03465748578310013, "eval_nll_loss": 1.341104365337742e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.960461013643226e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.960461013643226e-09, "eval_runtime": 307.2644, "eval_samples_per_second": 58.233, "eval_steps_per_second": 1.823, "step": 530 }, { "epoch": 0.24139472507822976, "grad_norm": 0.024939117222400192, "learning_rate": 2.151657414559676e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7631164789199829, "logits/rejected": -0.7631164789199829, "logps/chosen": -1.2787901937372226e-07, "logps/rejected": -1.2787901937372226e-07, "loss": 0.0347, "nll_loss": 1.4007086690526194e-07, "rewards/accuracies": 0.0, "rewards/chosen": -6.393950258143377e-09, "rewards/margins": 0.0, "rewards/rejected": -6.393950258143377e-09, "step": 540 }, { "epoch": 0.24139472507822976, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7637412548065186, "eval_logits/rejected": -0.7637412548065186, "eval_logps/chosen": -1.3004644472403015e-07, "eval_logps/rejected": -1.3004644472403015e-07, "eval_loss": 0.03465750440955162, "eval_nll_loss": 1.4156101713069802e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -6.502321348023088e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -6.502321348023088e-09, "eval_runtime": 307.2322, "eval_samples_per_second": 58.239, "eval_steps_per_second": 1.823, "step": 540 }, { "epoch": 0.24586499776486365, "grad_norm": 0.04096011470812148, "learning_rate": 2.132007163556104e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7638927698135376, "logits/rejected": -0.7638927698135376, "logps/chosen": -1.2787901937372226e-07, "logps/rejected": -1.2787901937372226e-07, "loss": 0.0347, "nll_loss": 1.4007086690526194e-07, "rewards/accuracies": 0.0, "rewards/chosen": -6.393951146321797e-09, "rewards/margins": 0.0, "rewards/rejected": -6.393951146321797e-09, "step": 550 }, { "epoch": 0.24586499776486365, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7629874348640442, "eval_logits/rejected": -0.7629874348640442, "eval_logps/chosen": -1.3004645893488487e-07, "eval_logps/rejected": -1.3004645893488487e-07, "eval_loss": 0.03465750440955162, "eval_nll_loss": 1.4156101713069802e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -6.502324456647557e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -6.502324456647557e-09, "eval_runtime": 307.2276, "eval_samples_per_second": 58.24, "eval_steps_per_second": 1.823, "step": 550 }, { "epoch": 0.25033527045149756, "grad_norm": 0.06516073872439763, "learning_rate": 2.1128856368212917e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7624660134315491, "logits/rejected": -0.7624660134315491, "logps/chosen": -1.4088367095155263e-07, "logps/rejected": -1.4088367095155263e-07, "loss": 0.0347, "nll_loss": 1.4901156930591242e-07, "rewards/accuracies": 0.0, "rewards/chosen": -7.04418301467058e-09, "rewards/margins": 0.0, "rewards/rejected": -7.04418301467058e-09, "step": 560 }, { "epoch": 0.25033527045149756, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7590819001197815, "eval_logits/rejected": -0.7590819001197815, "eval_logps/chosen": -1.5172085454651096e-07, "eval_logps/rejected": -1.5172085454651096e-07, "eval_loss": 0.03465751186013222, "eval_nll_loss": 1.5646217832454568e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -7.586042904961232e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -7.586042904961232e-09, "eval_runtime": 307.2208, "eval_samples_per_second": 58.242, "eval_steps_per_second": 1.823, "step": 560 }, { "epoch": 0.2548055431381314, "grad_norm": 0.07782558260433992, "learning_rate": 2.0942695414584777e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.758915901184082, "logits/rejected": -0.758915901184082, "logps/chosen": -1.5280458853794698e-07, "logps/rejected": -1.5280458853794698e-07, "loss": 0.0347, "nll_loss": 1.661478705727859e-07, "rewards/accuracies": 0.0, "rewards/chosen": -7.640229782168717e-09, "rewards/margins": 0.0, "rewards/rejected": -7.640229782168717e-09, "step": 570 }, { "epoch": 0.2548055431381314, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7589533925056458, "eval_logits/rejected": -0.7589533925056458, "eval_logps/chosen": -1.5172085454651096e-07, "eval_logps/rejected": -1.5172085454651096e-07, "eval_loss": 0.03465753421187401, "eval_nll_loss": 1.7136328267497447e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -7.586042904961232e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -7.586042904961232e-09, "eval_runtime": 307.2908, "eval_samples_per_second": 58.228, "eval_steps_per_second": 1.822, "step": 570 }, { "epoch": 0.25927581582476533, "grad_norm": 0.0704148190270702, "learning_rate": 2.0761369963434992e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7552851438522339, "logits/rejected": -0.7552851438522339, "logps/chosen": -1.538883083185283e-07, "logps/rejected": -1.538883083185283e-07, "loss": 0.0347, "nll_loss": 1.7210831515512837e-07, "rewards/accuracies": 0.0, "rewards/chosen": -7.694415771197782e-09, "rewards/margins": 0.0, "rewards/rejected": -7.694415771197782e-09, "step": 580 }, { "epoch": 0.25927581582476533, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.754145622253418, "eval_logits/rejected": -0.754145622253418, "eval_logps/chosen": -1.5172085454651096e-07, "eval_logps/rejected": -1.5172085454651096e-07, "eval_loss": 0.03465753421187401, "eval_nll_loss": 1.7136328267497447e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -7.586042904961232e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -7.586042904961232e-09, "eval_runtime": 307.2521, "eval_samples_per_second": 58.236, "eval_steps_per_second": 1.823, "step": 580 }, { "epoch": 0.2637460885113992, "grad_norm": 0.11568307292638198, "learning_rate": 2.058467423981546e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7538480162620544, "logits/rejected": -0.7538480162620544, "logps/chosen": -1.549720280991096e-07, "logps/rejected": -1.549720280991096e-07, "loss": 0.0347, "nll_loss": 1.765786663554536e-07, "rewards/accuracies": 0.0, "rewards/chosen": -7.748601760226848e-09, "rewards/margins": 0.0, "rewards/rejected": -7.748601760226848e-09, "step": 590 }, { "epoch": 0.2637460885113992, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.755375325679779, "eval_logits/rejected": -0.755375325679779, "eval_logps/chosen": -1.6255808077403344e-07, "eval_logps/rejected": -1.6255808077403344e-07, "eval_loss": 0.034657545387744904, "eval_nll_loss": 1.937150670983101e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -8.127903683430304e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -8.127903683430304e-09, "eval_runtime": 307.2733, "eval_samples_per_second": 58.232, "eval_steps_per_second": 1.822, "step": 590 }, { "epoch": 0.2682163611980331, "grad_norm": 0.19470687368738301, "learning_rate": 2.0412414523193154e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7575832605361938, "logits/rejected": -0.7575832605361938, "logps/chosen": -2.1999520072313317e-07, "logps/rejected": -2.1999520072313317e-07, "loss": 0.0347, "nll_loss": 2.332030391016815e-07, "rewards/accuracies": 0.0, "rewards/chosen": -1.0999761990149182e-08, "rewards/margins": 0.0, "rewards/rejected": -1.0999761990149182e-08, "step": 600 }, { "epoch": 0.2682163611980331, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7635509967803955, "eval_logits/rejected": -0.7635509967803955, "eval_logps/chosen": -3.359532172453328e-07, "eval_logps/rejected": -3.359532172453328e-07, "eval_loss": 0.034657664597034454, "eval_nll_loss": 3.1292407243199705e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -1.6797658375367064e-08, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -1.6797658375367064e-08, "eval_runtime": 307.2526, "eval_samples_per_second": 58.235, "eval_steps_per_second": 1.823, "step": 600 }, { "epoch": 0.27268663388466696, "grad_norm": 0.42044147270647025, "learning_rate": 2.0244408254472904e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7522677183151245, "logits/rejected": -0.7522677183151245, "logps/chosen": -5.35355354713829e-07, "logps/rejected": -5.35355354713829e-07, "loss": 0.0347, "nll_loss": 4.31386752097751e-07, "rewards/accuracies": 0.0, "rewards/chosen": -2.6767764893520507e-08, "rewards/margins": 0.0, "rewards/rejected": -2.6767764893520507e-08, "step": 610 }, { "epoch": 0.27268663388466696, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.7175083756446838, "eval_logits/rejected": -0.7175083756446838, "eval_logps/chosen": -4.291438472137088e-06, "eval_logps/rejected": -4.291438472137088e-06, "eval_loss": 0.03466034680604935, "eval_nll_loss": 2.995067234223825e-06, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -2.1457185539475176e-07, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -2.1457185539475176e-07, "eval_runtime": 307.3107, "eval_samples_per_second": 58.224, "eval_steps_per_second": 1.822, "step": 610 }, { "epoch": 0.2771569065713009, "grad_norm": 0.01605006750727601, "learning_rate": 2.0080483222562476e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.7701171040534973, "logits/rejected": -0.7701171040534973, "logps/chosen": -3.6403660487849265e-05, "logps/rejected": -3.6403660487849265e-05, "loss": 0.0347, "nll_loss": 2.5153431124635972e-05, "rewards/accuracies": 0.0, "rewards/chosen": -1.820182887968258e-06, "rewards/margins": 0.0, "rewards/rejected": -1.820182887968258e-06, "step": 620 }, { "epoch": 0.2771569065713009, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.9072837233543396, "eval_logits/rejected": -0.9072837233543396, "eval_logps/chosen": -9.753479446317215e-08, "eval_logps/rejected": -9.753479446317215e-08, "eval_loss": 0.03465813025832176, "eval_nll_loss": 7.674061066609283e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -4.876741233061921e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -4.876741233061921e-09, "eval_runtime": 307.2783, "eval_samples_per_second": 58.231, "eval_steps_per_second": 1.822, "step": 620 }, { "epoch": 0.28162717925793473, "grad_norm": 0.017203363283006045, "learning_rate": 1.9920476822239895e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -0.9159248471260071, "logits/rejected": -0.9159248471260071, "logps/chosen": -5.274546765576815e-06, "logps/rejected": -5.274546765576815e-06, "loss": 0.0347, "nll_loss": 4.1455546124780085e-06, "rewards/accuracies": 0.0, "rewards/chosen": -2.637273723848921e-07, "rewards/margins": 0.0, "rewards/rejected": -2.637273723848921e-07, "step": 630 }, { "epoch": 0.28162717925793473, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.001300573348999, "eval_logits/rejected": -1.001300573348999, "eval_logps/chosen": -1.0837207042868613e-07, "eval_logps/rejected": -1.0837207042868613e-07, "eval_loss": 0.03465818613767624, "eval_nll_loss": 8.195610803340969e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -5.418601123352573e-09, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -5.418601123352573e-09, "eval_runtime": 307.2434, "eval_samples_per_second": 58.237, "eval_steps_per_second": 1.823, "step": 630 }, { "epoch": 0.2860974519445686, "grad_norm": 0.30297850608494836, "learning_rate": 1.976423537605237e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.05489182472229, "logits/rejected": -1.05489182472229, "logps/chosen": -1.5822315901914408e-07, "logps/rejected": -1.5822315901914408e-07, "loss": 0.0347, "nll_loss": 1.037116476254596e-06, "rewards/accuracies": 0.0, "rewards/chosen": -7.911157062778784e-09, "rewards/margins": 0.0, "rewards/rejected": -7.911157062778784e-09, "step": 640 }, { "epoch": 0.2860974519445686, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -1.097064733505249, "eval_logits/rejected": -1.097064733505249, "eval_logps/chosen": -8.127877890728996e-07, "eval_logps/rejected": -8.127877890728996e-07, "eval_loss": 0.034658223390579224, "eval_nll_loss": 8.642647912893153e-07, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -4.0639388743102245e-08, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -4.0639388743102245e-08, "eval_runtime": 307.3284, "eval_samples_per_second": 58.221, "eval_steps_per_second": 1.822, "step": 640 }, { "epoch": 0.2905677246312025, "grad_norm": 0.2522133573777816, "learning_rate": 1.961161351381841e-06, "log_odds_chosen": 0.0, "log_odds_ratio": -0.6931471824645996, "logits/chosen": -1.0934903621673584, "logits/rejected": -1.0934903621673584, "logps/chosen": -8.60470663610613e-07, "logps/rejected": -8.60470663610613e-07, "loss": 0.0347, "nll_loss": 6.951365207896743e-07, "rewards/accuracies": 0.0, "rewards/chosen": -4.302352962781697e-08, "rewards/margins": 0.0, "rewards/rejected": -4.302352962781697e-08, "step": 650 }, { "epoch": 0.2905677246312025, "eval_log_odds_chosen": 0.0, "eval_log_odds_ratio": -0.6931473612785339, "eval_logits/chosen": -0.8988085985183716, "eval_logits/rejected": -0.8988085985183716, "eval_logps/chosen": -1.3534682693716604e-05, "eval_logps/rejected": -1.3534682693716604e-05, "eval_loss": 0.03466695547103882, "eval_nll_loss": 9.595665687811561e-06, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -6.767340892110951e-07, "eval_rewards/margins": 0.0, "eval_rewards/rejected": -6.767340892110951e-07, "eval_runtime": 307.2566, "eval_samples_per_second": 58.235, "eval_steps_per_second": 1.823, "step": 650 }, { "epoch": 0.29503799731783636, "grad_norm": 1.4142135623730951, "learning_rate": 1.9462473604038077e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.1054, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 660 }, { "epoch": 0.29503799731783636, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9632, "eval_samples_per_second": 58.29, "eval_steps_per_second": 1.824, "step": 660 }, { "epoch": 0.2995082700044703, "grad_norm": 1.4142135623730951, "learning_rate": 1.9316685232156397e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 670 }, { "epoch": 0.2995082700044703, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.937, "eval_samples_per_second": 58.295, "eval_steps_per_second": 1.824, "step": 670 }, { "epoch": 0.30397854269110414, "grad_norm": 1.4142135623730951, "learning_rate": 1.917412472118426e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 680 }, { "epoch": 0.30397854269110414, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9677, "eval_samples_per_second": 58.29, "eval_steps_per_second": 1.824, "step": 680 }, { "epoch": 0.30844881537773805, "grad_norm": 1.4142135623730951, "learning_rate": 1.9034674690672024e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 690 }, { "epoch": 0.30844881537773805, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9917, "eval_samples_per_second": 58.285, "eval_steps_per_second": 1.824, "step": 690 }, { "epoch": 0.3129190880643719, "grad_norm": 1.4142135623730951, "learning_rate": 1.8898223650461362e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 700 }, { "epoch": 0.3129190880643719, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.0225, "eval_samples_per_second": 58.279, "eval_steps_per_second": 1.824, "step": 700 }, { "epoch": 0.3173893607510058, "grad_norm": 1.4142135623730951, "learning_rate": 1.876466562602004e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 710 }, { "epoch": 0.3173893607510058, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.0188, "eval_samples_per_second": 58.28, "eval_steps_per_second": 1.824, "step": 710 }, { "epoch": 0.3218596334376397, "grad_norm": 1.4142135623730951, "learning_rate": 1.863389981249825e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 720 }, { "epoch": 0.3218596334376397, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9809, "eval_samples_per_second": 58.287, "eval_steps_per_second": 1.824, "step": 720 }, { "epoch": 0.3263299061242736, "grad_norm": 1.4142135623730951, "learning_rate": 1.8505830254940132e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 730 }, { "epoch": 0.3263299061242736, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.999, "eval_samples_per_second": 58.284, "eval_steps_per_second": 1.824, "step": 730 }, { "epoch": 0.33080017881090745, "grad_norm": 1.4142135623730951, "learning_rate": 1.8380365552345197e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 740 }, { "epoch": 0.33080017881090745, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9879, "eval_samples_per_second": 58.286, "eval_steps_per_second": 1.824, "step": 740 }, { "epoch": 0.33527045149754137, "grad_norm": 1.4142135623730951, "learning_rate": 1.8257418583505536e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 750 }, { "epoch": 0.33527045149754137, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9649, "eval_samples_per_second": 58.29, "eval_steps_per_second": 1.824, "step": 750 }, { "epoch": 0.3397407241841752, "grad_norm": 1.4142135623730951, "learning_rate": 1.8136906252750293e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 760 }, { "epoch": 0.3397407241841752, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9772, "eval_samples_per_second": 58.288, "eval_steps_per_second": 1.824, "step": 760 }, { "epoch": 0.34421099687080914, "grad_norm": 1.4142135623730951, "learning_rate": 1.801874925391118e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 770 }, { "epoch": 0.34421099687080914, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9557, "eval_samples_per_second": 58.292, "eval_steps_per_second": 1.824, "step": 770 }, { "epoch": 0.348681269557443, "grad_norm": 1.4142135623730951, "learning_rate": 1.7902871850985824e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 780 }, { "epoch": 0.348681269557443, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9969, "eval_samples_per_second": 58.284, "eval_steps_per_second": 1.824, "step": 780 }, { "epoch": 0.3531515422440769, "grad_norm": 1.4142135623730951, "learning_rate": 1.7789201674120502e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 790 }, { "epoch": 0.3531515422440769, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.985, "eval_samples_per_second": 58.286, "eval_steps_per_second": 1.824, "step": 790 }, { "epoch": 0.35762181493071077, "grad_norm": 1.4142135623730951, "learning_rate": 1.7677669529663689e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 800 }, { "epoch": 0.35762181493071077, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.0459, "eval_samples_per_second": 58.275, "eval_steps_per_second": 1.824, "step": 800 }, { "epoch": 0.3620920876173447, "grad_norm": 1.4142135623730951, "learning_rate": 1.7568209223157664e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 810 }, { "epoch": 0.3620920876173447, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.487, "eval_samples_per_second": 58.191, "eval_steps_per_second": 1.821, "step": 810 }, { "epoch": 0.36656236030397854, "grad_norm": 1.4142135623730951, "learning_rate": 1.7460757394239458e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 820 }, { "epoch": 0.36656236030397854, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9448, "eval_samples_per_second": 58.294, "eval_steps_per_second": 1.824, "step": 820 }, { "epoch": 0.37103263299061245, "grad_norm": 1.4142135623730951, "learning_rate": 1.7355253362515584e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 830 }, { "epoch": 0.37103263299061245, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9523, "eval_samples_per_second": 58.292, "eval_steps_per_second": 1.824, "step": 830 }, { "epoch": 0.3755029056772463, "grad_norm": 1.4142135623730951, "learning_rate": 1.7251638983558855e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 840 }, { "epoch": 0.3755029056772463, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9448, "eval_samples_per_second": 58.294, "eval_steps_per_second": 1.824, "step": 840 }, { "epoch": 0.37997317836388017, "grad_norm": 1.4142135623730951, "learning_rate": 1.7149858514250883e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 850 }, { "epoch": 0.37997317836388017, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9333, "eval_samples_per_second": 58.296, "eval_steps_per_second": 1.825, "step": 850 }, { "epoch": 0.3844434510505141, "grad_norm": 1.4142135623730951, "learning_rate": 1.704985848676184e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 860 }, { "epoch": 0.3844434510505141, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8979, "eval_samples_per_second": 58.303, "eval_steps_per_second": 1.825, "step": 860 }, { "epoch": 0.38891372373714794, "grad_norm": 1.4142135623730951, "learning_rate": 1.6951587590520263e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 870 }, { "epoch": 0.38891372373714794, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9523, "eval_samples_per_second": 58.292, "eval_steps_per_second": 1.824, "step": 870 }, { "epoch": 0.39338399642378186, "grad_norm": 1.4142135623730951, "learning_rate": 1.6854996561581053e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 880 }, { "epoch": 0.39338399642378186, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9703, "eval_samples_per_second": 58.289, "eval_steps_per_second": 1.824, "step": 880 }, { "epoch": 0.3978542691104157, "grad_norm": 1.4142135623730951, "learning_rate": 1.6760038078849776e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 890 }, { "epoch": 0.3978542691104157, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9927, "eval_samples_per_second": 58.285, "eval_steps_per_second": 1.824, "step": 890 }, { "epoch": 0.40232454179704963, "grad_norm": 1.4142135623730951, "learning_rate": 1.6666666666666667e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 900 }, { "epoch": 0.40232454179704963, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9478, "eval_samples_per_second": 58.293, "eval_steps_per_second": 1.824, "step": 900 }, { "epoch": 0.4067948144836835, "grad_norm": 1.4142135623730951, "learning_rate": 1.6574838603294898e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 910 }, { "epoch": 0.4067948144836835, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.0325, "eval_samples_per_second": 58.277, "eval_steps_per_second": 1.824, "step": 910 }, { "epoch": 0.4112650871703174, "grad_norm": 1.4142135623730951, "learning_rate": 1.648451183489468e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 920 }, { "epoch": 0.4112650871703174, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.0105, "eval_samples_per_second": 58.281, "eval_steps_per_second": 1.824, "step": 920 }, { "epoch": 0.41573535985695126, "grad_norm": 1.4142135623730951, "learning_rate": 1.6395645894598825e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 930 }, { "epoch": 0.41573535985695126, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9663, "eval_samples_per_second": 58.29, "eval_steps_per_second": 1.824, "step": 930 }, { "epoch": 0.4202056325435852, "grad_norm": 1.4142135623730951, "learning_rate": 1.6308201826336057e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 940 }, { "epoch": 0.4202056325435852, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9346, "eval_samples_per_second": 58.296, "eval_steps_per_second": 1.824, "step": 940 }, { "epoch": 0.42467590523021903, "grad_norm": 1.4142135623730951, "learning_rate": 1.6222142113076255e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 950 }, { "epoch": 0.42467590523021903, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9147, "eval_samples_per_second": 58.3, "eval_steps_per_second": 1.825, "step": 950 }, { "epoch": 0.42914617791685294, "grad_norm": 1.4142135623730951, "learning_rate": 1.6137430609197571e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 960 }, { "epoch": 0.42914617791685294, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9235, "eval_samples_per_second": 58.298, "eval_steps_per_second": 1.825, "step": 960 }, { "epoch": 0.4336164506034868, "grad_norm": 1.4142135623730951, "learning_rate": 1.605403247669839e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 970 }, { "epoch": 0.4336164506034868, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9407, "eval_samples_per_second": 58.295, "eval_steps_per_second": 1.824, "step": 970 }, { "epoch": 0.4380867232901207, "grad_norm": 1.4142135623730951, "learning_rate": 1.59719141249985e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 980 }, { "epoch": 0.4380867232901207, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8804, "eval_samples_per_second": 58.306, "eval_steps_per_second": 1.825, "step": 980 }, { "epoch": 0.4425569959767546, "grad_norm": 1.4142135623730951, "learning_rate": 1.5891043154093205e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 990 }, { "epoch": 0.4425569959767546, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9332, "eval_samples_per_second": 58.296, "eval_steps_per_second": 1.825, "step": 990 }, { "epoch": 0.4470272686633885, "grad_norm": 1.4142135623730951, "learning_rate": 1.5811388300841898e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1000 }, { "epoch": 0.4470272686633885, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9856, "eval_samples_per_second": 58.286, "eval_steps_per_second": 1.824, "step": 1000 }, { "epoch": 0.45149754135002235, "grad_norm": 1.4142135623730951, "learning_rate": 1.5732919388188816e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1010 }, { "epoch": 0.45149754135002235, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.1337, "eval_samples_per_second": 58.258, "eval_steps_per_second": 1.823, "step": 1010 }, { "epoch": 0.45596781403665626, "grad_norm": 1.4142135623730951, "learning_rate": 1.565560727712874e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1020 }, { "epoch": 0.45596781403665626, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.942, "eval_samples_per_second": 58.294, "eval_steps_per_second": 1.824, "step": 1020 }, { "epoch": 0.4604380867232901, "grad_norm": 1.4142135623730951, "learning_rate": 1.5579423821243897e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1030 }, { "epoch": 0.4604380867232901, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9725, "eval_samples_per_second": 58.289, "eval_steps_per_second": 1.824, "step": 1030 }, { "epoch": 0.46490835940992403, "grad_norm": 1.4142135623730951, "learning_rate": 1.5504341823651056e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1040 }, { "epoch": 0.46490835940992403, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9371, "eval_samples_per_second": 58.295, "eval_steps_per_second": 1.824, "step": 1040 }, { "epoch": 0.4693786320965579, "grad_norm": 1.4142135623730951, "learning_rate": 1.5430334996209192e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1050 }, { "epoch": 0.4693786320965579, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9659, "eval_samples_per_second": 58.29, "eval_steps_per_second": 1.824, "step": 1050 }, { "epoch": 0.47384890478319175, "grad_norm": 1.4142135623730951, "learning_rate": 1.5357377920848783e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1060 }, { "epoch": 0.47384890478319175, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8938, "eval_samples_per_second": 58.304, "eval_steps_per_second": 1.825, "step": 1060 }, { "epoch": 0.47831917746982566, "grad_norm": 1.4142135623730951, "learning_rate": 1.5285446012893579e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1070 }, { "epoch": 0.47831917746982566, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8839, "eval_samples_per_second": 58.305, "eval_steps_per_second": 1.825, "step": 1070 }, { "epoch": 0.4827894501564595, "grad_norm": 1.4142135623730951, "learning_rate": 1.5214515486254614e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1080 }, { "epoch": 0.4827894501564595, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9231, "eval_samples_per_second": 58.298, "eval_steps_per_second": 1.825, "step": 1080 }, { "epoch": 0.48725972284309343, "grad_norm": 1.4142135623730951, "learning_rate": 1.5144563320384566e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1090 }, { "epoch": 0.48725972284309343, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9186, "eval_samples_per_second": 58.299, "eval_steps_per_second": 1.825, "step": 1090 }, { "epoch": 0.4917299955297273, "grad_norm": 1.4142135623730951, "learning_rate": 1.5075567228888182e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1100 }, { "epoch": 0.4917299955297273, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9155, "eval_samples_per_second": 58.299, "eval_steps_per_second": 1.825, "step": 1100 }, { "epoch": 0.4962002682163612, "grad_norm": 1.4142135623730951, "learning_rate": 1.5007505629691608e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1110 }, { "epoch": 0.4962002682163612, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.0125, "eval_samples_per_second": 58.281, "eval_steps_per_second": 1.824, "step": 1110 }, { "epoch": 0.5006705409029951, "grad_norm": 1.4142135623730951, "learning_rate": 1.494035761667992e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1120 }, { "epoch": 0.5006705409029951, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9188, "eval_samples_per_second": 58.299, "eval_steps_per_second": 1.825, "step": 1120 }, { "epoch": 0.5051408135896289, "grad_norm": 1.4142135623730951, "learning_rate": 1.487410293271824e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1130 }, { "epoch": 0.5051408135896289, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9331, "eval_samples_per_second": 58.296, "eval_steps_per_second": 1.825, "step": 1130 }, { "epoch": 0.5096110862762628, "grad_norm": 1.4142135623730951, "learning_rate": 1.480872194397731e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1140 }, { "epoch": 0.5096110862762628, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9523, "eval_samples_per_second": 58.292, "eval_steps_per_second": 1.824, "step": 1140 }, { "epoch": 0.5140813589628968, "grad_norm": 1.4142135623730951, "learning_rate": 1.4744195615489715e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1150 }, { "epoch": 0.5140813589628968, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9243, "eval_samples_per_second": 58.298, "eval_steps_per_second": 1.825, "step": 1150 }, { "epoch": 0.5185516316495307, "grad_norm": 1.4142135623730951, "learning_rate": 1.4680505487867589e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1160 }, { "epoch": 0.5185516316495307, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9342, "eval_samples_per_second": 58.296, "eval_steps_per_second": 1.824, "step": 1160 }, { "epoch": 0.5230219043361645, "grad_norm": 1.4142135623730951, "learning_rate": 1.4617633655117156e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1170 }, { "epoch": 0.5230219043361645, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9471, "eval_samples_per_second": 58.293, "eval_steps_per_second": 1.824, "step": 1170 }, { "epoch": 0.5274921770227984, "grad_norm": 1.4142135623730951, "learning_rate": 1.4555562743489552e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1180 }, { "epoch": 0.5274921770227984, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9553, "eval_samples_per_second": 58.292, "eval_steps_per_second": 1.824, "step": 1180 }, { "epoch": 0.5319624497094323, "grad_norm": 1.4142135623730951, "learning_rate": 1.4494275891311214e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1190 }, { "epoch": 0.5319624497094323, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9175, "eval_samples_per_second": 58.299, "eval_steps_per_second": 1.825, "step": 1190 }, { "epoch": 0.5364327223960662, "grad_norm": 1.4142135623730951, "learning_rate": 1.4433756729740647e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1200 }, { "epoch": 0.5364327223960662, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9299, "eval_samples_per_second": 58.297, "eval_steps_per_second": 1.825, "step": 1200 }, { "epoch": 0.5409029950827, "grad_norm": 1.4142135623730951, "learning_rate": 1.4373989364401727e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1210 }, { "epoch": 0.5409029950827, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.4452, "eval_samples_per_second": 58.199, "eval_steps_per_second": 1.821, "step": 1210 }, { "epoch": 0.5453732677693339, "grad_norm": 1.4142135623730951, "learning_rate": 1.4314958357846706e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1220 }, { "epoch": 0.5453732677693339, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9633, "eval_samples_per_second": 58.29, "eval_steps_per_second": 1.824, "step": 1220 }, { "epoch": 0.5498435404559678, "grad_norm": 1.4142135623730951, "learning_rate": 1.4256648712805027e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1230 }, { "epoch": 0.5498435404559678, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.0093, "eval_samples_per_second": 58.282, "eval_steps_per_second": 1.824, "step": 1230 }, { "epoch": 0.5543138131426018, "grad_norm": 1.4142135623730951, "learning_rate": 1.419904585617662e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1240 }, { "epoch": 0.5543138131426018, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9636, "eval_samples_per_second": 58.29, "eval_steps_per_second": 1.824, "step": 1240 }, { "epoch": 0.5587840858292356, "grad_norm": 1.4142135623730951, "learning_rate": 1.4142135623730952e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1250 }, { "epoch": 0.5587840858292356, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9663, "eval_samples_per_second": 58.29, "eval_steps_per_second": 1.824, "step": 1250 }, { "epoch": 0.5632543585158695, "grad_norm": 1.4142135623730951, "learning_rate": 1.4085904245475275e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1260 }, { "epoch": 0.5632543585158695, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9137, "eval_samples_per_second": 58.3, "eval_steps_per_second": 1.825, "step": 1260 }, { "epoch": 0.5677246312025034, "grad_norm": 1.4142135623730951, "learning_rate": 1.4030338331657844e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1270 }, { "epoch": 0.5677246312025034, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9528, "eval_samples_per_second": 58.292, "eval_steps_per_second": 1.824, "step": 1270 }, { "epoch": 0.5721949038891372, "grad_norm": 1.4142135623730951, "learning_rate": 1.3975424859373688e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1280 }, { "epoch": 0.5721949038891372, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9438, "eval_samples_per_second": 58.294, "eval_steps_per_second": 1.824, "step": 1280 }, { "epoch": 0.5766651765757711, "grad_norm": 1.4142135623730951, "learning_rate": 1.3921151159742616e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1290 }, { "epoch": 0.5766651765757711, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9341, "eval_samples_per_second": 58.296, "eval_steps_per_second": 1.824, "step": 1290 }, { "epoch": 0.581135449262405, "grad_norm": 1.4142135623730951, "learning_rate": 1.386750490563073e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1300 }, { "epoch": 0.581135449262405, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9212, "eval_samples_per_second": 58.298, "eval_steps_per_second": 1.825, "step": 1300 }, { "epoch": 0.5856057219490389, "grad_norm": 1.4142135623730951, "learning_rate": 1.3814474099888442e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1310 }, { "epoch": 0.5856057219490389, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9709, "eval_samples_per_second": 58.289, "eval_steps_per_second": 1.824, "step": 1310 }, { "epoch": 0.5900759946356727, "grad_norm": 1.4142135623730951, "learning_rate": 1.376204706407951e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1320 }, { "epoch": 0.5900759946356727, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8949, "eval_samples_per_second": 58.303, "eval_steps_per_second": 1.825, "step": 1320 }, { "epoch": 0.5945462673223066, "grad_norm": 1.4142135623730951, "learning_rate": 1.3710212427677044e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1330 }, { "epoch": 0.5945462673223066, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9456, "eval_samples_per_second": 58.294, "eval_steps_per_second": 1.824, "step": 1330 }, { "epoch": 0.5990165400089406, "grad_norm": 1.4142135623730951, "learning_rate": 1.3658959117703826e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1340 }, { "epoch": 0.5990165400089406, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.923, "eval_samples_per_second": 58.298, "eval_steps_per_second": 1.825, "step": 1340 }, { "epoch": 0.6034868126955745, "grad_norm": 1.4142135623730951, "learning_rate": 1.3608276348795436e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1350 }, { "epoch": 0.6034868126955745, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9347, "eval_samples_per_second": 58.296, "eval_steps_per_second": 1.824, "step": 1350 }, { "epoch": 0.6079570853822083, "grad_norm": 1.4142135623730951, "learning_rate": 1.355815361366601e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1360 }, { "epoch": 0.6079570853822083, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9472, "eval_samples_per_second": 58.293, "eval_steps_per_second": 1.824, "step": 1360 }, { "epoch": 0.6124273580688422, "grad_norm": 1.4142135623730951, "learning_rate": 1.350858067395748e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1370 }, { "epoch": 0.6124273580688422, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9086, "eval_samples_per_second": 58.301, "eval_steps_per_second": 1.825, "step": 1370 }, { "epoch": 0.6168976307554761, "grad_norm": 1.4142135623730951, "learning_rate": 1.345954755145414e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1380 }, { "epoch": 0.6168976307554761, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9757, "eval_samples_per_second": 58.288, "eval_steps_per_second": 1.824, "step": 1380 }, { "epoch": 0.62136790344211, "grad_norm": 1.4142135623730951, "learning_rate": 1.3411044519645502e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1390 }, { "epoch": 0.62136790344211, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9413, "eval_samples_per_second": 58.295, "eval_steps_per_second": 1.824, "step": 1390 }, { "epoch": 0.6258381761287438, "grad_norm": 1.4142135623730951, "learning_rate": 1.3363062095621222e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1400 }, { "epoch": 0.6258381761287438, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8654, "eval_samples_per_second": 58.309, "eval_steps_per_second": 1.825, "step": 1400 }, { "epoch": 0.6303084488153777, "grad_norm": 1.4142135623730951, "learning_rate": 1.3315591032282687e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1410 }, { "epoch": 0.6303084488153777, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.3373, "eval_samples_per_second": 58.219, "eval_steps_per_second": 1.822, "step": 1410 }, { "epoch": 0.6347787215020116, "grad_norm": 1.4142135623730951, "learning_rate": 1.3268622310856882e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1420 }, { "epoch": 0.6347787215020116, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9821, "eval_samples_per_second": 58.287, "eval_steps_per_second": 1.824, "step": 1420 }, { "epoch": 0.6392489941886456, "grad_norm": 1.4142135623730951, "learning_rate": 1.3222147133698626e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1430 }, { "epoch": 0.6392489941886456, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9112, "eval_samples_per_second": 58.3, "eval_steps_per_second": 1.825, "step": 1430 }, { "epoch": 0.6437192668752794, "grad_norm": 1.4142135623730951, "learning_rate": 1.3176156917368248e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1440 }, { "epoch": 0.6437192668752794, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9398, "eval_samples_per_second": 58.295, "eval_steps_per_second": 1.824, "step": 1440 }, { "epoch": 0.6481895395619133, "grad_norm": 1.4142135623730951, "learning_rate": 1.3130643285972255e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1450 }, { "epoch": 0.6481895395619133, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9452, "eval_samples_per_second": 58.294, "eval_steps_per_second": 1.824, "step": 1450 }, { "epoch": 0.6526598122485472, "grad_norm": 1.4142135623730951, "learning_rate": 1.3085598064755342e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1460 }, { "epoch": 0.6526598122485472, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9408, "eval_samples_per_second": 58.295, "eval_steps_per_second": 1.824, "step": 1460 }, { "epoch": 0.657130084935181, "grad_norm": 1.4142135623730951, "learning_rate": 1.3041013273932528e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1470 }, { "epoch": 0.657130084935181, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9377, "eval_samples_per_second": 58.295, "eval_steps_per_second": 1.824, "step": 1470 }, { "epoch": 0.6616003576218149, "grad_norm": 1.4142135623730951, "learning_rate": 1.299688112275091e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1480 }, { "epoch": 0.6616003576218149, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9478, "eval_samples_per_second": 58.293, "eval_steps_per_second": 1.824, "step": 1480 }, { "epoch": 0.6660706303084488, "grad_norm": 1.4142135623730951, "learning_rate": 1.2953194003770995e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1490 }, { "epoch": 0.6660706303084488, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9455, "eval_samples_per_second": 58.294, "eval_steps_per_second": 1.824, "step": 1490 }, { "epoch": 0.6705409029950827, "grad_norm": 1.4142135623730951, "learning_rate": 1.2909944487358056e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1500 }, { "epoch": 0.6705409029950827, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9717, "eval_samples_per_second": 58.289, "eval_steps_per_second": 1.824, "step": 1500 }, { "epoch": 0.6750111756817165, "grad_norm": 1.4142135623730951, "learning_rate": 1.286712531637447e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1510 }, { "epoch": 0.6750111756817165, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.4527, "eval_samples_per_second": 58.198, "eval_steps_per_second": 1.821, "step": 1510 }, { "epoch": 0.6794814483683504, "grad_norm": 1.4142135623730951, "learning_rate": 1.282472940106443e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1520 }, { "epoch": 0.6794814483683504, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8943, "eval_samples_per_second": 58.303, "eval_steps_per_second": 1.825, "step": 1520 }, { "epoch": 0.6839517210549844, "grad_norm": 1.4142135623730951, "learning_rate": 1.278274981412284e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1530 }, { "epoch": 0.6839517210549844, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8936, "eval_samples_per_second": 58.304, "eval_steps_per_second": 1.825, "step": 1530 }, { "epoch": 0.6884219937416183, "grad_norm": 1.4142135623730951, "learning_rate": 1.2741179785940638e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1540 }, { "epoch": 0.6884219937416183, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9217, "eval_samples_per_second": 58.298, "eval_steps_per_second": 1.825, "step": 1540 }, { "epoch": 0.6928922664282521, "grad_norm": 1.4142135623730951, "learning_rate": 1.270001270001905e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1550 }, { "epoch": 0.6928922664282521, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9543, "eval_samples_per_second": 58.292, "eval_steps_per_second": 1.824, "step": 1550 }, { "epoch": 0.697362539114886, "grad_norm": 1.4142135623730951, "learning_rate": 1.2659242088545834e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1560 }, { "epoch": 0.697362539114886, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9759, "eval_samples_per_second": 58.288, "eval_steps_per_second": 1.824, "step": 1560 }, { "epoch": 0.7018328118015199, "grad_norm": 1.4142135623730951, "learning_rate": 1.261886162812672e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1570 }, { "epoch": 0.7018328118015199, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9444, "eval_samples_per_second": 58.294, "eval_steps_per_second": 1.824, "step": 1570 }, { "epoch": 0.7063030844881538, "grad_norm": 1.4142135623730951, "learning_rate": 1.257886513566569e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1580 }, { "epoch": 0.7063030844881538, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9556, "eval_samples_per_second": 58.292, "eval_steps_per_second": 1.824, "step": 1580 }, { "epoch": 0.7107733571747876, "grad_norm": 1.4142135623730951, "learning_rate": 1.253924656438798e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1590 }, { "epoch": 0.7107733571747876, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9498, "eval_samples_per_second": 58.293, "eval_steps_per_second": 1.824, "step": 1590 }, { "epoch": 0.7152436298614215, "grad_norm": 1.4142135623730951, "learning_rate": 1.25e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1600 }, { "epoch": 0.7152436298614215, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9189, "eval_samples_per_second": 58.299, "eval_steps_per_second": 1.825, "step": 1600 }, { "epoch": 0.7197139025480555, "grad_norm": 1.4142135623730951, "learning_rate": 1.246111965698067e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1610 }, { "epoch": 0.7197139025480555, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.0602, "eval_samples_per_second": 58.272, "eval_steps_per_second": 1.824, "step": 1610 }, { "epoch": 0.7241841752346894, "grad_norm": 1.4142135623730951, "learning_rate": 1.2422599874998834e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1620 }, { "epoch": 0.7241841752346894, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9262, "eval_samples_per_second": 58.297, "eval_steps_per_second": 1.825, "step": 1620 }, { "epoch": 0.7286544479213232, "grad_norm": 1.4142135623730951, "learning_rate": 1.238443511545175e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1630 }, { "epoch": 0.7286544479213232, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9104, "eval_samples_per_second": 58.3, "eval_steps_per_second": 1.825, "step": 1630 }, { "epoch": 0.7331247206079571, "grad_norm": 1.4142135623730951, "learning_rate": 1.2346619958119873e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1640 }, { "epoch": 0.7331247206079571, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9095, "eval_samples_per_second": 58.301, "eval_steps_per_second": 1.825, "step": 1640 }, { "epoch": 0.737594993294591, "grad_norm": 1.4142135623730951, "learning_rate": 1.2309149097933274e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1650 }, { "epoch": 0.737594993294591, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9941, "eval_samples_per_second": 58.285, "eval_steps_per_second": 1.824, "step": 1650 }, { "epoch": 0.7420652659812249, "grad_norm": 1.4142135623730951, "learning_rate": 1.2272017341845401e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1660 }, { "epoch": 0.7420652659812249, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9483, "eval_samples_per_second": 58.293, "eval_steps_per_second": 1.824, "step": 1660 }, { "epoch": 0.7465355386678587, "grad_norm": 1.4142135623730951, "learning_rate": 1.223521960580991e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1670 }, { "epoch": 0.7465355386678587, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9179, "eval_samples_per_second": 58.299, "eval_steps_per_second": 1.825, "step": 1670 }, { "epoch": 0.7510058113544926, "grad_norm": 1.4142135623730951, "learning_rate": 1.2198750911856664e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1680 }, { "epoch": 0.7510058113544926, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9109, "eval_samples_per_second": 58.3, "eval_steps_per_second": 1.825, "step": 1680 }, { "epoch": 0.7554760840411265, "grad_norm": 1.4142135623730951, "learning_rate": 1.2162606385262997e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1690 }, { "epoch": 0.7554760840411265, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8631, "eval_samples_per_second": 58.309, "eval_steps_per_second": 1.825, "step": 1690 }, { "epoch": 0.7599463567277603, "grad_norm": 1.4142135623730951, "learning_rate": 1.2126781251816649e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1700 }, { "epoch": 0.7599463567277603, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9256, "eval_samples_per_second": 58.298, "eval_steps_per_second": 1.825, "step": 1700 }, { "epoch": 0.7644166294143943, "grad_norm": 1.4142135623730951, "learning_rate": 1.2091270835166862e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1710 }, { "epoch": 0.7644166294143943, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.0642, "eval_samples_per_second": 58.271, "eval_steps_per_second": 1.824, "step": 1710 }, { "epoch": 0.7688869021010282, "grad_norm": 1.4142135623730951, "learning_rate": 1.2056070554260305e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1720 }, { "epoch": 0.7688869021010282, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9109, "eval_samples_per_second": 58.3, "eval_steps_per_second": 1.825, "step": 1720 }, { "epoch": 0.7733571747876621, "grad_norm": 1.4142135623730951, "learning_rate": 1.2021175920858626e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1730 }, { "epoch": 0.7733571747876621, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8822, "eval_samples_per_second": 58.306, "eval_steps_per_second": 1.825, "step": 1730 }, { "epoch": 0.7778274474742959, "grad_norm": 1.4142135623730951, "learning_rate": 1.1986582537134606e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1740 }, { "epoch": 0.7778274474742959, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8705, "eval_samples_per_second": 58.308, "eval_steps_per_second": 1.825, "step": 1740 }, { "epoch": 0.7822977201609298, "grad_norm": 1.4142135623730951, "learning_rate": 1.1952286093343937e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1750 }, { "epoch": 0.7822977201609298, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9026, "eval_samples_per_second": 58.302, "eval_steps_per_second": 1.825, "step": 1750 }, { "epoch": 0.7867679928475637, "grad_norm": 1.4142135623730951, "learning_rate": 1.1918282365569903e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1760 }, { "epoch": 0.7867679928475637, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.928, "eval_samples_per_second": 58.297, "eval_steps_per_second": 1.825, "step": 1760 }, { "epoch": 0.7912382655341976, "grad_norm": 1.4142135623730951, "learning_rate": 1.1884567213538209e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1770 }, { "epoch": 0.7912382655341976, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9265, "eval_samples_per_second": 58.297, "eval_steps_per_second": 1.825, "step": 1770 }, { "epoch": 0.7957085382208314, "grad_norm": 1.4142135623730951, "learning_rate": 1.1851136578499433e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1780 }, { "epoch": 0.7957085382208314, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9251, "eval_samples_per_second": 58.298, "eval_steps_per_second": 1.825, "step": 1780 }, { "epoch": 0.8001788109074653, "grad_norm": 1.4142135623730951, "learning_rate": 1.181798648117664e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1790 }, { "epoch": 0.8001788109074653, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8954, "eval_samples_per_second": 58.303, "eval_steps_per_second": 1.825, "step": 1790 }, { "epoch": 0.8046490835940993, "grad_norm": 1.4142135623730951, "learning_rate": 1.1785113019775794e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1800 }, { "epoch": 0.8046490835940993, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8855, "eval_samples_per_second": 58.305, "eval_steps_per_second": 1.825, "step": 1800 }, { "epoch": 0.8091193562807332, "grad_norm": 1.4142135623730951, "learning_rate": 1.1752512368056712e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1810 }, { "epoch": 0.8091193562807332, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.0492, "eval_samples_per_second": 58.274, "eval_steps_per_second": 1.824, "step": 1810 }, { "epoch": 0.813589628967367, "grad_norm": 1.4142135623730951, "learning_rate": 1.1720180773462387e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1820 }, { "epoch": 0.813589628967367, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8809, "eval_samples_per_second": 58.306, "eval_steps_per_second": 1.825, "step": 1820 }, { "epoch": 0.8180599016540009, "grad_norm": 1.4142135623730951, "learning_rate": 1.168811455530461e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1830 }, { "epoch": 0.8180599016540009, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8895, "eval_samples_per_second": 58.304, "eval_steps_per_second": 1.825, "step": 1830 }, { "epoch": 0.8225301743406348, "grad_norm": 1.4142135623730951, "learning_rate": 1.1656310103003923e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1840 }, { "epoch": 0.8225301743406348, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8857, "eval_samples_per_second": 58.305, "eval_steps_per_second": 1.825, "step": 1840 }, { "epoch": 0.8270004470272687, "grad_norm": 1.4142135623730951, "learning_rate": 1.162476387438193e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1850 }, { "epoch": 0.8270004470272687, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9216, "eval_samples_per_second": 58.298, "eval_steps_per_second": 1.825, "step": 1850 }, { "epoch": 0.8314707197139025, "grad_norm": 1.4142135623730951, "learning_rate": 1.1593472394004206e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1860 }, { "epoch": 0.8314707197139025, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8721, "eval_samples_per_second": 58.308, "eval_steps_per_second": 1.825, "step": 1860 }, { "epoch": 0.8359409924005364, "grad_norm": 1.4142135623730951, "learning_rate": 1.1562432251572007e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1870 }, { "epoch": 0.8359409924005364, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8854, "eval_samples_per_second": 58.305, "eval_steps_per_second": 1.825, "step": 1870 }, { "epoch": 0.8404112650871703, "grad_norm": 1.4142135623730951, "learning_rate": 1.1531640100361064e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1880 }, { "epoch": 0.8404112650871703, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8876, "eval_samples_per_second": 58.305, "eval_steps_per_second": 1.825, "step": 1880 }, { "epoch": 0.8448815377738041, "grad_norm": 1.4142135623730951, "learning_rate": 1.1501092655705905e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1890 }, { "epoch": 0.8448815377738041, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8984, "eval_samples_per_second": 58.303, "eval_steps_per_second": 1.825, "step": 1890 }, { "epoch": 0.8493518104604381, "grad_norm": 1.4142135623730951, "learning_rate": 1.1470786693528087e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1900 }, { "epoch": 0.8493518104604381, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8788, "eval_samples_per_second": 58.306, "eval_steps_per_second": 1.825, "step": 1900 }, { "epoch": 0.853822083147072, "grad_norm": 1.4142135623730951, "learning_rate": 1.144071904890689e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1910 }, { "epoch": 0.853822083147072, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9922, "eval_samples_per_second": 58.285, "eval_steps_per_second": 1.824, "step": 1910 }, { "epoch": 0.8582923558337059, "grad_norm": 1.4142135623730951, "learning_rate": 1.1410886614690962e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1920 }, { "epoch": 0.8582923558337059, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8985, "eval_samples_per_second": 58.303, "eval_steps_per_second": 1.825, "step": 1920 }, { "epoch": 0.8627626285203397, "grad_norm": 1.4142135623730951, "learning_rate": 1.1381286340149635e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1930 }, { "epoch": 0.8627626285203397, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.948, "eval_samples_per_second": 58.293, "eval_steps_per_second": 1.824, "step": 1930 }, { "epoch": 0.8672329012069736, "grad_norm": 1.4142135623730951, "learning_rate": 1.1351915229662496e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1940 }, { "epoch": 0.8672329012069736, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9062, "eval_samples_per_second": 58.301, "eval_steps_per_second": 1.825, "step": 1940 }, { "epoch": 0.8717031738936075, "grad_norm": 1.4142135623730951, "learning_rate": 1.1322770341445958e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1950 }, { "epoch": 0.8717031738936075, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9101, "eval_samples_per_second": 58.3, "eval_steps_per_second": 1.825, "step": 1950 }, { "epoch": 0.8761734465802414, "grad_norm": 1.4142135623730951, "learning_rate": 1.1293848786315642e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1960 }, { "epoch": 0.8761734465802414, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9046, "eval_samples_per_second": 58.302, "eval_steps_per_second": 1.825, "step": 1960 }, { "epoch": 0.8806437192668752, "grad_norm": 1.4142135623730951, "learning_rate": 1.1265147726483323e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1970 }, { "epoch": 0.8806437192668752, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8737, "eval_samples_per_second": 58.307, "eval_steps_per_second": 1.825, "step": 1970 }, { "epoch": 0.8851139919535091, "grad_norm": 1.4142135623730951, "learning_rate": 1.1236664374387369e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1980 }, { "epoch": 0.8851139919535091, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8878, "eval_samples_per_second": 58.305, "eval_steps_per_second": 1.825, "step": 1980 }, { "epoch": 0.8895842646401431, "grad_norm": 1.4142135623730951, "learning_rate": 1.120839599155551e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1990 }, { "epoch": 0.8895842646401431, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8703, "eval_samples_per_second": 58.308, "eval_steps_per_second": 1.825, "step": 1990 }, { "epoch": 0.894054537326777, "grad_norm": 1.4142135623730951, "learning_rate": 1.118033988749895e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2000 }, { "epoch": 0.894054537326777, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8991, "eval_samples_per_second": 58.303, "eval_steps_per_second": 1.825, "step": 2000 }, { "epoch": 0.8985248100134108, "grad_norm": 1.4142135623730951, "learning_rate": 1.1152493418636764e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2010 }, { "epoch": 0.8985248100134108, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.3093, "eval_samples_per_second": 58.225, "eval_steps_per_second": 1.822, "step": 2010 }, { "epoch": 0.9029950827000447, "grad_norm": 1.4142135623730951, "learning_rate": 1.112485398724962e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2020 }, { "epoch": 0.9029950827000447, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9509, "eval_samples_per_second": 58.293, "eval_steps_per_second": 1.824, "step": 2020 }, { "epoch": 0.9074653553866786, "grad_norm": 1.4142135623730951, "learning_rate": 1.1097419040461884e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2030 }, { "epoch": 0.9074653553866786, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8663, "eval_samples_per_second": 58.309, "eval_steps_per_second": 1.825, "step": 2030 }, { "epoch": 0.9119356280733125, "grad_norm": 1.4142135623730951, "learning_rate": 1.1070186069251193e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2040 }, { "epoch": 0.9119356280733125, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9069, "eval_samples_per_second": 58.301, "eval_steps_per_second": 1.825, "step": 2040 }, { "epoch": 0.9164059007599463, "grad_norm": 1.4142135623730951, "learning_rate": 1.1043152607484655e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2050 }, { "epoch": 0.9164059007599463, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8916, "eval_samples_per_second": 58.304, "eval_steps_per_second": 1.825, "step": 2050 }, { "epoch": 0.9208761734465802, "grad_norm": 1.4142135623730951, "learning_rate": 1.1016316230980794e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2060 }, { "epoch": 0.9208761734465802, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.933, "eval_samples_per_second": 58.296, "eval_steps_per_second": 1.825, "step": 2060 }, { "epoch": 0.9253464461332142, "grad_norm": 1.4142135623730951, "learning_rate": 1.098967455659645e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2070 }, { "epoch": 0.9253464461332142, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9244, "eval_samples_per_second": 58.298, "eval_steps_per_second": 1.825, "step": 2070 }, { "epoch": 0.9298167188198481, "grad_norm": 1.4142135623730951, "learning_rate": 1.0963225241337867e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2080 }, { "epoch": 0.9298167188198481, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9128, "eval_samples_per_second": 58.3, "eval_steps_per_second": 1.825, "step": 2080 }, { "epoch": 0.9342869915064819, "grad_norm": 1.4142135623730951, "learning_rate": 1.093696598149518e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2090 }, { "epoch": 0.9342869915064819, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8665, "eval_samples_per_second": 58.309, "eval_steps_per_second": 1.825, "step": 2090 }, { "epoch": 0.9387572641931158, "grad_norm": 1.4142135623730951, "learning_rate": 1.091089451179962e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2100 }, { "epoch": 0.9387572641931158, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9558, "eval_samples_per_second": 58.292, "eval_steps_per_second": 1.824, "step": 2100 }, { "epoch": 0.9432275368797497, "grad_norm": 1.4142135623730951, "learning_rate": 1.0885008604602703e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2110 }, { "epoch": 0.9432275368797497, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9696, "eval_samples_per_second": 58.289, "eval_steps_per_second": 1.824, "step": 2110 }, { "epoch": 0.9476978095663835, "grad_norm": 1.4142135623730951, "learning_rate": 1.0859306069076736e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2120 }, { "epoch": 0.9476978095663835, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8734, "eval_samples_per_second": 58.307, "eval_steps_per_second": 1.825, "step": 2120 }, { "epoch": 0.9521680822530174, "grad_norm": 1.4142135623730951, "learning_rate": 1.083378475043599e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2130 }, { "epoch": 0.9521680822530174, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8916, "eval_samples_per_second": 58.304, "eval_steps_per_second": 1.825, "step": 2130 }, { "epoch": 0.9566383549396513, "grad_norm": 1.4142135623730951, "learning_rate": 1.0808442529177925e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2140 }, { "epoch": 0.9566383549396513, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9179, "eval_samples_per_second": 58.299, "eval_steps_per_second": 1.825, "step": 2140 }, { "epoch": 0.9611086276262852, "grad_norm": 1.4142135623730951, "learning_rate": 1.0783277320343842e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2150 }, { "epoch": 0.9611086276262852, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8541, "eval_samples_per_second": 58.311, "eval_steps_per_second": 1.825, "step": 2150 }, { "epoch": 0.965578900312919, "grad_norm": 1.4142135623730951, "learning_rate": 1.075828707279838e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2160 }, { "epoch": 0.965578900312919, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9213, "eval_samples_per_second": 58.298, "eval_steps_per_second": 1.825, "step": 2160 }, { "epoch": 0.970049172999553, "grad_norm": 1.4142135623730951, "learning_rate": 1.0733469768527298e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2170 }, { "epoch": 0.970049172999553, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.9379, "eval_samples_per_second": 58.295, "eval_steps_per_second": 1.824, "step": 2170 }, { "epoch": 0.9745194456861869, "grad_norm": 1.4142135623730951, "learning_rate": 1.0708823421952984e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2180 }, { "epoch": 0.9745194456861869, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8894, "eval_samples_per_second": 58.304, "eval_steps_per_second": 1.825, "step": 2180 }, { "epoch": 0.9789897183728208, "grad_norm": 1.4142135623730951, "learning_rate": 1.0684346079267208e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2190 }, { "epoch": 0.9789897183728208, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.89, "eval_samples_per_second": 58.304, "eval_steps_per_second": 1.825, "step": 2190 }, { "epoch": 0.9834599910594546, "grad_norm": 1.4142135623730951, "learning_rate": 1.066003581778052e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2200 }, { "epoch": 0.9834599910594546, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8657, "eval_samples_per_second": 58.309, "eval_steps_per_second": 1.825, "step": 2200 }, { "epoch": 0.9879302637460885, "grad_norm": 1.4142135623730951, "learning_rate": 1.0635890745287928e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2210 }, { "epoch": 0.9879302637460885, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 307.025, "eval_samples_per_second": 58.279, "eval_steps_per_second": 1.824, "step": 2210 }, { "epoch": 0.9924005364327224, "grad_norm": 1.4142135623730951, "learning_rate": 1.0611908999450224e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2220 }, { "epoch": 0.9924005364327224, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8682, "eval_samples_per_second": 58.308, "eval_steps_per_second": 1.825, "step": 2220 }, { "epoch": 0.9968708091193563, "grad_norm": 1.4142135623730951, "learning_rate": 1.058808874719067e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0, "nll_loss": NaN, "rewards/accuracies": 0.0, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2230 }, { "epoch": 0.9968708091193563, "eval_log_odds_chosen": NaN, "eval_log_odds_ratio": NaN, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": NaN, "eval_logps/rejected": NaN, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": NaN, "eval_rewards/margins": NaN, "eval_rewards/rejected": NaN, "eval_runtime": 306.8635, "eval_samples_per_second": 58.309, "eval_steps_per_second": 1.825, "step": 2230 }, { "epoch": 1.0, "step": 2237, "total_flos": 0.0, "train_loss": 0.04676872755621073, "train_runtime": 74758.4678, "train_samples_per_second": 0.957, "train_steps_per_second": 0.03 } ], "logging_steps": 10, "max_steps": 2237, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }