{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 711, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10548523206751055, "grad_norm": 2.3504333482192794, "learning_rate": 6.944444444444445e-06, "log_odds_chosen": 0.15013551712036133, "log_odds_ratio": -0.6692488193511963, "logits/chosen": -0.8321835994720459, "logits/rejected": -0.832346498966217, "logps/chosen": -0.9450153112411499, "logps/rejected": -1.0411678552627563, "loss": 1.7955, "nll_loss": 1.4582228660583496, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.09450153261423111, "rewards/margins": 0.00961525458842516, "rewards/rejected": -0.10411678999662399, "step": 25 }, { "epoch": 0.2109704641350211, "grad_norm": 1.9691856311458618, "learning_rate": 1.388888888888889e-05, "log_odds_chosen": 0.3434825539588928, "log_odds_ratio": -0.6003904938697815, "logits/chosen": -0.7575013637542725, "logits/rejected": -0.7675329446792603, "logps/chosen": -0.5159790515899658, "logps/rejected": -0.6563442349433899, "loss": 0.5969, "nll_loss": 0.4879494607448578, "rewards/accuracies": 0.5899999737739563, "rewards/chosen": -0.05159790441393852, "rewards/margins": 0.01403652224689722, "rewards/rejected": -0.06563442200422287, "step": 50 }, { "epoch": 0.31645569620253167, "grad_norm": 1.085908619339194, "learning_rate": 1.9906103286384977e-05, "log_odds_chosen": 0.42845839262008667, "log_odds_ratio": -0.5688134431838989, "logits/chosen": -0.7494800686836243, "logits/rejected": -0.7198505997657776, "logps/chosen": -0.43672674894332886, "logps/rejected": -0.6124382615089417, "loss": 0.5533, "nll_loss": 0.4733336567878723, "rewards/accuracies": 0.5, "rewards/chosen": -0.04367266967892647, "rewards/margins": 0.01757114939391613, "rewards/rejected": -0.061243828386068344, "step": 75 }, { "epoch": 0.4219409282700422, "grad_norm": 1.1240076241404764, "learning_rate": 1.912363067292645e-05, "log_odds_chosen": 0.29888662695884705, "log_odds_ratio": -0.6093687415122986, "logits/chosen": -0.709399402141571, "logits/rejected": -0.7048219442367554, "logps/chosen": -0.5603894591331482, "logps/rejected": -0.7039618492126465, "loss": 0.56, "nll_loss": 0.49526843428611755, "rewards/accuracies": 0.5099999904632568, "rewards/chosen": -0.056038953363895416, "rewards/margins": 0.014357234351336956, "rewards/rejected": -0.07039617747068405, "step": 100 }, { "epoch": 0.5274261603375527, "grad_norm": 1.0788263671658365, "learning_rate": 1.834115805946792e-05, "log_odds_chosen": 0.25319904088974, "log_odds_ratio": -0.631630539894104, "logits/chosen": -0.6328348517417908, "logits/rejected": -0.6294259428977966, "logps/chosen": -0.49323585629463196, "logps/rejected": -0.5995710492134094, "loss": 0.5485, "nll_loss": 0.5131134986877441, "rewards/accuracies": 0.47999998927116394, "rewards/chosen": -0.049323588609695435, "rewards/margins": 0.010633519850671291, "rewards/rejected": -0.059957101941108704, "step": 125 }, { "epoch": 0.6329113924050633, "grad_norm": 1.1078095413080318, "learning_rate": 1.755868544600939e-05, "log_odds_chosen": 0.25750571489334106, "log_odds_ratio": -0.6318458914756775, "logits/chosen": -0.7056828141212463, "logits/rejected": -0.6951937675476074, "logps/chosen": -0.5244600176811218, "logps/rejected": -0.605254590511322, "loss": 0.5609, "nll_loss": 0.478809654712677, "rewards/accuracies": 0.5099999904632568, "rewards/chosen": -0.052445996552705765, "rewards/margins": 0.008079464547336102, "rewards/rejected": -0.06052546575665474, "step": 150 }, { "epoch": 0.7383966244725738, "grad_norm": 1.0260136766275174, "learning_rate": 1.6776212832550862e-05, "log_odds_chosen": 0.3896116316318512, "log_odds_ratio": -0.5955315232276917, "logits/chosen": -0.6688608527183533, "logits/rejected": -0.6982090473175049, "logps/chosen": -0.4829683303833008, "logps/rejected": -0.6143837571144104, "loss": 0.5201, "nll_loss": 0.4728917181491852, "rewards/accuracies": 0.5600000023841858, "rewards/chosen": -0.048296838998794556, "rewards/margins": 0.013141541741788387, "rewards/rejected": -0.06143837794661522, "step": 175 }, { "epoch": 0.8438818565400844, "grad_norm": 1.0922913456111067, "learning_rate": 1.5993740219092334e-05, "log_odds_chosen": 0.35299748182296753, "log_odds_ratio": -0.5904638767242432, "logits/chosen": -0.7948518395423889, "logits/rejected": -0.7930269837379456, "logps/chosen": -0.5347493886947632, "logps/rejected": -0.7274565696716309, "loss": 0.5678, "nll_loss": 0.5108060240745544, "rewards/accuracies": 0.5699999928474426, "rewards/chosen": -0.05347493290901184, "rewards/margins": 0.01927073672413826, "rewards/rejected": -0.0727456733584404, "step": 200 }, { "epoch": 0.9493670886075949, "grad_norm": 1.171188249527363, "learning_rate": 1.5211267605633803e-05, "log_odds_chosen": 0.19867561757564545, "log_odds_ratio": -0.6661250591278076, "logits/chosen": -0.7350332736968994, "logits/rejected": -0.7309374213218689, "logps/chosen": -0.4790670871734619, "logps/rejected": -0.578321099281311, "loss": 0.5593, "nll_loss": 0.4474111795425415, "rewards/accuracies": 0.49000000953674316, "rewards/chosen": -0.04790670797228813, "rewards/margins": 0.009925400838255882, "rewards/rejected": -0.05783211067318916, "step": 225 }, { "epoch": 1.0548523206751055, "grad_norm": 1.0561427838830786, "learning_rate": 1.4428794992175275e-05, "log_odds_chosen": 0.5409557819366455, "log_odds_ratio": -0.5446589589118958, "logits/chosen": -0.8016952276229858, "logits/rejected": -0.7894623279571533, "logps/chosen": -0.466928094625473, "logps/rejected": -0.6494468450546265, "loss": 0.5096, "nll_loss": 0.4225366711616516, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04669281467795372, "rewards/margins": 0.018251873552799225, "rewards/rejected": -0.06494467705488205, "step": 250 }, { "epoch": 1.160337552742616, "grad_norm": 1.0616103418922775, "learning_rate": 1.3646322378716745e-05, "log_odds_chosen": 0.6413868069648743, "log_odds_ratio": -0.5098162293434143, "logits/chosen": -0.8740493655204773, "logits/rejected": -0.9086742997169495, "logps/chosen": -0.43551456928253174, "logps/rejected": -0.7041670083999634, "loss": 0.4928, "nll_loss": 0.4237639605998993, "rewards/accuracies": 0.6899999976158142, "rewards/chosen": -0.04355145990848541, "rewards/margins": 0.026865236461162567, "rewards/rejected": -0.07041670382022858, "step": 275 }, { "epoch": 1.2658227848101267, "grad_norm": 1.0597296687116873, "learning_rate": 1.2863849765258216e-05, "log_odds_chosen": 0.6442282795906067, "log_odds_ratio": -0.5011763572692871, "logits/chosen": -1.0111823081970215, "logits/rejected": -1.0184077024459839, "logps/chosen": -0.45919105410575867, "logps/rejected": -0.7301878333091736, "loss": 0.4639, "nll_loss": 0.40898972749710083, "rewards/accuracies": 0.6800000071525574, "rewards/chosen": -0.045919109135866165, "rewards/margins": 0.02709968015551567, "rewards/rejected": -0.07301878184080124, "step": 300 }, { "epoch": 1.371308016877637, "grad_norm": 1.0935677703953148, "learning_rate": 1.2081377151799686e-05, "log_odds_chosen": 0.6757858395576477, "log_odds_ratio": -0.4901491403579712, "logits/chosen": -0.9852328300476074, "logits/rejected": -1.0046324729919434, "logps/chosen": -0.41143473982810974, "logps/rejected": -0.6924607753753662, "loss": 0.4867, "nll_loss": 0.4125671088695526, "rewards/accuracies": 0.6700000166893005, "rewards/chosen": -0.041143473237752914, "rewards/margins": 0.028102604672312737, "rewards/rejected": -0.0692460760474205, "step": 325 }, { "epoch": 1.4767932489451476, "grad_norm": 1.1585743076882284, "learning_rate": 1.129890453834116e-05, "log_odds_chosen": 0.4605286121368408, "log_odds_ratio": -0.5612049102783203, "logits/chosen": -0.9352016448974609, "logits/rejected": -0.9490614533424377, "logps/chosen": -0.43350136280059814, "logps/rejected": -0.5812506675720215, "loss": 0.4718, "nll_loss": 0.4129113256931305, "rewards/accuracies": 0.5600000023841858, "rewards/chosen": -0.043350137770175934, "rewards/margins": 0.014774931594729424, "rewards/rejected": -0.05812506750226021, "step": 350 }, { "epoch": 1.5822784810126582, "grad_norm": 1.0715102217336756, "learning_rate": 1.051643192488263e-05, "log_odds_chosen": 0.6196006536483765, "log_odds_ratio": -0.4975026845932007, "logits/chosen": -1.0041831731796265, "logits/rejected": -1.0140608549118042, "logps/chosen": -0.3989720046520233, "logps/rejected": -0.6381548047065735, "loss": 0.4857, "nll_loss": 0.42035380005836487, "rewards/accuracies": 0.6200000047683716, "rewards/chosen": -0.03989719972014427, "rewards/margins": 0.023918280377984047, "rewards/rejected": -0.06381548196077347, "step": 375 }, { "epoch": 1.6877637130801688, "grad_norm": 1.0097421405350755, "learning_rate": 9.7339593114241e-06, "log_odds_chosen": 0.5579173564910889, "log_odds_ratio": -0.5348711609840393, "logits/chosen": -1.0373647212982178, "logits/rejected": -1.037854790687561, "logps/chosen": -0.45267555117607117, "logps/rejected": -0.66820228099823, "loss": 0.4691, "nll_loss": 0.410322904586792, "rewards/accuracies": 0.6200000047683716, "rewards/chosen": -0.04526755213737488, "rewards/margins": 0.021552674472332, "rewards/rejected": -0.06682023406028748, "step": 400 }, { "epoch": 1.7932489451476794, "grad_norm": 1.0546124322913446, "learning_rate": 8.951486697965573e-06, "log_odds_chosen": 0.7115356922149658, "log_odds_ratio": -0.5003312826156616, "logits/chosen": -1.1095713376998901, "logits/rejected": -1.1265352964401245, "logps/chosen": -0.44462206959724426, "logps/rejected": -0.7215204834938049, "loss": 0.4849, "nll_loss": 0.43178752064704895, "rewards/accuracies": 0.6399999856948853, "rewards/chosen": -0.04446220397949219, "rewards/margins": 0.027689840644598007, "rewards/rejected": -0.0721520483493805, "step": 425 }, { "epoch": 1.8987341772151898, "grad_norm": 1.1203509702182832, "learning_rate": 8.169014084507043e-06, "log_odds_chosen": 0.6642155647277832, "log_odds_ratio": -0.5045433640480042, "logits/chosen": -1.0597500801086426, "logits/rejected": -1.0680012702941895, "logps/chosen": -0.4701367914676666, "logps/rejected": -0.704271674156189, "loss": 0.4792, "nll_loss": 0.4112149477005005, "rewards/accuracies": 0.6399999856948853, "rewards/chosen": -0.04701368510723114, "rewards/margins": 0.023413481190800667, "rewards/rejected": -0.07042715698480606, "step": 450 }, { "epoch": 2.0042194092827006, "grad_norm": 1.2606333093207305, "learning_rate": 7.386541471048514e-06, "log_odds_chosen": 0.5286250114440918, "log_odds_ratio": -0.5306064486503601, "logits/chosen": -1.0645604133605957, "logits/rejected": -1.0691789388656616, "logps/chosen": -0.44743481278419495, "logps/rejected": -0.6287744045257568, "loss": 0.4803, "nll_loss": 0.42209434509277344, "rewards/accuracies": 0.6299999952316284, "rewards/chosen": -0.04474348574876785, "rewards/margins": 0.018133964389562607, "rewards/rejected": -0.06287744641304016, "step": 475 }, { "epoch": 2.109704641350211, "grad_norm": 1.0966083263839905, "learning_rate": 6.604068857589985e-06, "log_odds_chosen": 0.8573014140129089, "log_odds_ratio": -0.4275921583175659, "logits/chosen": -1.1114364862442017, "logits/rejected": -1.1227397918701172, "logps/chosen": -0.3463269770145416, "logps/rejected": -0.6633933782577515, "loss": 0.4078, "nll_loss": 0.3584522306919098, "rewards/accuracies": 0.7400000095367432, "rewards/chosen": -0.03463269770145416, "rewards/margins": 0.03170664235949516, "rewards/rejected": -0.06633934378623962, "step": 500 }, { "epoch": 2.2151898734177213, "grad_norm": 1.1009276796065117, "learning_rate": 5.821596244131456e-06, "log_odds_chosen": 0.7615014910697937, "log_odds_ratio": -0.47703635692596436, "logits/chosen": -1.1601929664611816, "logits/rejected": -1.1780595779418945, "logps/chosen": -0.39950495958328247, "logps/rejected": -0.6856523752212524, "loss": 0.4048, "nll_loss": 0.41196614503860474, "rewards/accuracies": 0.5899999737739563, "rewards/chosen": -0.03995049372315407, "rewards/margins": 0.028614744544029236, "rewards/rejected": -0.0685652419924736, "step": 525 }, { "epoch": 2.320675105485232, "grad_norm": 1.1135597142860487, "learning_rate": 5.039123630672926e-06, "log_odds_chosen": 0.8249316215515137, "log_odds_ratio": -0.43479686975479126, "logits/chosen": -1.1542495489120483, "logits/rejected": -1.1734535694122314, "logps/chosen": -0.39931461215019226, "logps/rejected": -0.733440637588501, "loss": 0.4061, "nll_loss": 0.39662715792655945, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.039931461215019226, "rewards/margins": 0.03341260552406311, "rewards/rejected": -0.07334406673908234, "step": 550 }, { "epoch": 2.4261603375527425, "grad_norm": 1.1107780211778215, "learning_rate": 4.2566510172143975e-06, "log_odds_chosen": 1.200696587562561, "log_odds_ratio": -0.3881736099720001, "logits/chosen": -1.2098137140274048, "logits/rejected": -1.2287673950195312, "logps/chosen": -0.36806556582450867, "logps/rejected": -0.7970322370529175, "loss": 0.3924, "nll_loss": 0.3511776328086853, "rewards/accuracies": 0.7200000286102295, "rewards/chosen": -0.03680655360221863, "rewards/margins": 0.042896661907434464, "rewards/rejected": -0.07970321923494339, "step": 575 }, { "epoch": 2.5316455696202533, "grad_norm": 1.3159040332886311, "learning_rate": 3.474178403755869e-06, "log_odds_chosen": 1.0953081846237183, "log_odds_ratio": -0.3781184256076813, "logits/chosen": -1.1880977153778076, "logits/rejected": -1.2077745199203491, "logps/chosen": -0.32341885566711426, "logps/rejected": -0.7020614147186279, "loss": 0.4006, "nll_loss": 0.3256089389324188, "rewards/accuracies": 0.7799999713897705, "rewards/chosen": -0.032341886311769485, "rewards/margins": 0.03786425665020943, "rewards/rejected": -0.07020614296197891, "step": 600 }, { "epoch": 2.6371308016877637, "grad_norm": 1.070273138577641, "learning_rate": 2.69170579029734e-06, "log_odds_chosen": 0.6728782057762146, "log_odds_ratio": -0.5007551312446594, "logits/chosen": -1.1887409687042236, "logits/rejected": -1.2133393287658691, "logps/chosen": -0.40768423676490784, "logps/rejected": -0.6612467169761658, "loss": 0.4134, "nll_loss": 0.42612510919570923, "rewards/accuracies": 0.5600000023841858, "rewards/chosen": -0.04076842963695526, "rewards/margins": 0.025356244295835495, "rewards/rejected": -0.06612467020750046, "step": 625 }, { "epoch": 2.742616033755274, "grad_norm": 1.1092420255798914, "learning_rate": 1.9092331768388107e-06, "log_odds_chosen": 0.9050965309143066, "log_odds_ratio": -0.42763033509254456, "logits/chosen": -1.192216396331787, "logits/rejected": -1.2236640453338623, "logps/chosen": -0.35716915130615234, "logps/rejected": -0.735872209072113, "loss": 0.4171, "nll_loss": 0.35565415024757385, "rewards/accuracies": 0.6600000262260437, "rewards/chosen": -0.03571692109107971, "rewards/margins": 0.03787030279636383, "rewards/rejected": -0.07358721643686295, "step": 650 }, { "epoch": 2.848101265822785, "grad_norm": 1.0762367696616766, "learning_rate": 1.1267605633802817e-06, "log_odds_chosen": 0.8493003845214844, "log_odds_ratio": -0.4451717734336853, "logits/chosen": -1.2292503118515015, "logits/rejected": -1.2512164115905762, "logps/chosen": -0.40543216466903687, "logps/rejected": -0.7575715780258179, "loss": 0.4083, "nll_loss": 0.3527292311191559, "rewards/accuracies": 0.6700000166893005, "rewards/chosen": -0.040543220937252045, "rewards/margins": 0.03521393612027168, "rewards/rejected": -0.07575715333223343, "step": 675 }, { "epoch": 2.9535864978902953, "grad_norm": 1.4284839235977267, "learning_rate": 3.4428794992175273e-07, "log_odds_chosen": 0.6508604288101196, "log_odds_ratio": -0.5013710856437683, "logits/chosen": -1.2339705228805542, "logits/rejected": -1.2357439994812012, "logps/chosen": -0.38158729672431946, "logps/rejected": -0.603762149810791, "loss": 0.4226, "nll_loss": 0.40551432967185974, "rewards/accuracies": 0.5199999809265137, "rewards/chosen": -0.03815872594714165, "rewards/margins": 0.022217486053705215, "rewards/rejected": -0.06037621572613716, "step": 700 } ], "logging_steps": 25, "max_steps": 711, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }