|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 711, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10548523206751055, |
|
"grad_norm": 2.3504333482192794, |
|
"learning_rate": 6.944444444444445e-06, |
|
"log_odds_chosen": 0.15013551712036133, |
|
"log_odds_ratio": -0.6692488193511963, |
|
"logits/chosen": -0.8321835994720459, |
|
"logits/rejected": -0.832346498966217, |
|
"logps/chosen": -0.9450153112411499, |
|
"logps/rejected": -1.0411678552627563, |
|
"loss": 1.7955, |
|
"nll_loss": 1.4582228660583496, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.09450153261423111, |
|
"rewards/margins": 0.00961525458842516, |
|
"rewards/rejected": -0.10411678999662399, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2109704641350211, |
|
"grad_norm": 1.9691856311458618, |
|
"learning_rate": 1.388888888888889e-05, |
|
"log_odds_chosen": 0.3434825539588928, |
|
"log_odds_ratio": -0.6003904938697815, |
|
"logits/chosen": -0.7575013637542725, |
|
"logits/rejected": -0.7675329446792603, |
|
"logps/chosen": -0.5159790515899658, |
|
"logps/rejected": -0.6563442349433899, |
|
"loss": 0.5969, |
|
"nll_loss": 0.4879494607448578, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": -0.05159790441393852, |
|
"rewards/margins": 0.01403652224689722, |
|
"rewards/rejected": -0.06563442200422287, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.31645569620253167, |
|
"grad_norm": 1.085908619339194, |
|
"learning_rate": 1.9906103286384977e-05, |
|
"log_odds_chosen": 0.42845839262008667, |
|
"log_odds_ratio": -0.5688134431838989, |
|
"logits/chosen": -0.7494800686836243, |
|
"logits/rejected": -0.7198505997657776, |
|
"logps/chosen": -0.43672674894332886, |
|
"logps/rejected": -0.6124382615089417, |
|
"loss": 0.5533, |
|
"nll_loss": 0.4733336567878723, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.04367266967892647, |
|
"rewards/margins": 0.01757114939391613, |
|
"rewards/rejected": -0.061243828386068344, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4219409282700422, |
|
"grad_norm": 1.1240076241404764, |
|
"learning_rate": 1.912363067292645e-05, |
|
"log_odds_chosen": 0.29888662695884705, |
|
"log_odds_ratio": -0.6093687415122986, |
|
"logits/chosen": -0.709399402141571, |
|
"logits/rejected": -0.7048219442367554, |
|
"logps/chosen": -0.5603894591331482, |
|
"logps/rejected": -0.7039618492126465, |
|
"loss": 0.56, |
|
"nll_loss": 0.49526843428611755, |
|
"rewards/accuracies": 0.5099999904632568, |
|
"rewards/chosen": -0.056038953363895416, |
|
"rewards/margins": 0.014357234351336956, |
|
"rewards/rejected": -0.07039617747068405, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5274261603375527, |
|
"grad_norm": 1.0788263671658365, |
|
"learning_rate": 1.834115805946792e-05, |
|
"log_odds_chosen": 0.25319904088974, |
|
"log_odds_ratio": -0.631630539894104, |
|
"logits/chosen": -0.6328348517417908, |
|
"logits/rejected": -0.6294259428977966, |
|
"logps/chosen": -0.49323585629463196, |
|
"logps/rejected": -0.5995710492134094, |
|
"loss": 0.5485, |
|
"nll_loss": 0.5131134986877441, |
|
"rewards/accuracies": 0.47999998927116394, |
|
"rewards/chosen": -0.049323588609695435, |
|
"rewards/margins": 0.010633519850671291, |
|
"rewards/rejected": -0.059957101941108704, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6329113924050633, |
|
"grad_norm": 1.1078095413080318, |
|
"learning_rate": 1.755868544600939e-05, |
|
"log_odds_chosen": 0.25750571489334106, |
|
"log_odds_ratio": -0.6318458914756775, |
|
"logits/chosen": -0.7056828141212463, |
|
"logits/rejected": -0.6951937675476074, |
|
"logps/chosen": -0.5244600176811218, |
|
"logps/rejected": -0.605254590511322, |
|
"loss": 0.5609, |
|
"nll_loss": 0.478809654712677, |
|
"rewards/accuracies": 0.5099999904632568, |
|
"rewards/chosen": -0.052445996552705765, |
|
"rewards/margins": 0.008079464547336102, |
|
"rewards/rejected": -0.06052546575665474, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7383966244725738, |
|
"grad_norm": 1.0260136766275174, |
|
"learning_rate": 1.6776212832550862e-05, |
|
"log_odds_chosen": 0.3896116316318512, |
|
"log_odds_ratio": -0.5955315232276917, |
|
"logits/chosen": -0.6688608527183533, |
|
"logits/rejected": -0.6982090473175049, |
|
"logps/chosen": -0.4829683303833008, |
|
"logps/rejected": -0.6143837571144104, |
|
"loss": 0.5201, |
|
"nll_loss": 0.4728917181491852, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": -0.048296838998794556, |
|
"rewards/margins": 0.013141541741788387, |
|
"rewards/rejected": -0.06143837794661522, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.8438818565400844, |
|
"grad_norm": 1.0922913456111067, |
|
"learning_rate": 1.5993740219092334e-05, |
|
"log_odds_chosen": 0.35299748182296753, |
|
"log_odds_ratio": -0.5904638767242432, |
|
"logits/chosen": -0.7948518395423889, |
|
"logits/rejected": -0.7930269837379456, |
|
"logps/chosen": -0.5347493886947632, |
|
"logps/rejected": -0.7274565696716309, |
|
"loss": 0.5678, |
|
"nll_loss": 0.5108060240745544, |
|
"rewards/accuracies": 0.5699999928474426, |
|
"rewards/chosen": -0.05347493290901184, |
|
"rewards/margins": 0.01927073672413826, |
|
"rewards/rejected": -0.0727456733584404, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9493670886075949, |
|
"grad_norm": 1.171188249527363, |
|
"learning_rate": 1.5211267605633803e-05, |
|
"log_odds_chosen": 0.19867561757564545, |
|
"log_odds_ratio": -0.6661250591278076, |
|
"logits/chosen": -0.7350332736968994, |
|
"logits/rejected": -0.7309374213218689, |
|
"logps/chosen": -0.4790670871734619, |
|
"logps/rejected": -0.578321099281311, |
|
"loss": 0.5593, |
|
"nll_loss": 0.4474111795425415, |
|
"rewards/accuracies": 0.49000000953674316, |
|
"rewards/chosen": -0.04790670797228813, |
|
"rewards/margins": 0.009925400838255882, |
|
"rewards/rejected": -0.05783211067318916, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.0548523206751055, |
|
"grad_norm": 1.0561427838830786, |
|
"learning_rate": 1.4428794992175275e-05, |
|
"log_odds_chosen": 0.5409557819366455, |
|
"log_odds_ratio": -0.5446589589118958, |
|
"logits/chosen": -0.8016952276229858, |
|
"logits/rejected": -0.7894623279571533, |
|
"logps/chosen": -0.466928094625473, |
|
"logps/rejected": -0.6494468450546265, |
|
"loss": 0.5096, |
|
"nll_loss": 0.4225366711616516, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04669281467795372, |
|
"rewards/margins": 0.018251873552799225, |
|
"rewards/rejected": -0.06494467705488205, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.160337552742616, |
|
"grad_norm": 1.0616103418922775, |
|
"learning_rate": 1.3646322378716745e-05, |
|
"log_odds_chosen": 0.6413868069648743, |
|
"log_odds_ratio": -0.5098162293434143, |
|
"logits/chosen": -0.8740493655204773, |
|
"logits/rejected": -0.9086742997169495, |
|
"logps/chosen": -0.43551456928253174, |
|
"logps/rejected": -0.7041670083999634, |
|
"loss": 0.4928, |
|
"nll_loss": 0.4237639605998993, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": -0.04355145990848541, |
|
"rewards/margins": 0.026865236461162567, |
|
"rewards/rejected": -0.07041670382022858, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.2658227848101267, |
|
"grad_norm": 1.0597296687116873, |
|
"learning_rate": 1.2863849765258216e-05, |
|
"log_odds_chosen": 0.6442282795906067, |
|
"log_odds_ratio": -0.5011763572692871, |
|
"logits/chosen": -1.0111823081970215, |
|
"logits/rejected": -1.0184077024459839, |
|
"logps/chosen": -0.45919105410575867, |
|
"logps/rejected": -0.7301878333091736, |
|
"loss": 0.4639, |
|
"nll_loss": 0.40898972749710083, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": -0.045919109135866165, |
|
"rewards/margins": 0.02709968015551567, |
|
"rewards/rejected": -0.07301878184080124, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.371308016877637, |
|
"grad_norm": 1.0935677703953148, |
|
"learning_rate": 1.2081377151799686e-05, |
|
"log_odds_chosen": 0.6757858395576477, |
|
"log_odds_ratio": -0.4901491403579712, |
|
"logits/chosen": -0.9852328300476074, |
|
"logits/rejected": -1.0046324729919434, |
|
"logps/chosen": -0.41143473982810974, |
|
"logps/rejected": -0.6924607753753662, |
|
"loss": 0.4867, |
|
"nll_loss": 0.4125671088695526, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -0.041143473237752914, |
|
"rewards/margins": 0.028102604672312737, |
|
"rewards/rejected": -0.0692460760474205, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.4767932489451476, |
|
"grad_norm": 1.1585743076882284, |
|
"learning_rate": 1.129890453834116e-05, |
|
"log_odds_chosen": 0.4605286121368408, |
|
"log_odds_ratio": -0.5612049102783203, |
|
"logits/chosen": -0.9352016448974609, |
|
"logits/rejected": -0.9490614533424377, |
|
"logps/chosen": -0.43350136280059814, |
|
"logps/rejected": -0.5812506675720215, |
|
"loss": 0.4718, |
|
"nll_loss": 0.4129113256931305, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": -0.043350137770175934, |
|
"rewards/margins": 0.014774931594729424, |
|
"rewards/rejected": -0.05812506750226021, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.5822784810126582, |
|
"grad_norm": 1.0715102217336756, |
|
"learning_rate": 1.051643192488263e-05, |
|
"log_odds_chosen": 0.6196006536483765, |
|
"log_odds_ratio": -0.4975026845932007, |
|
"logits/chosen": -1.0041831731796265, |
|
"logits/rejected": -1.0140608549118042, |
|
"logps/chosen": -0.3989720046520233, |
|
"logps/rejected": -0.6381548047065735, |
|
"loss": 0.4857, |
|
"nll_loss": 0.42035380005836487, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": -0.03989719972014427, |
|
"rewards/margins": 0.023918280377984047, |
|
"rewards/rejected": -0.06381548196077347, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.6877637130801688, |
|
"grad_norm": 1.0097421405350755, |
|
"learning_rate": 9.7339593114241e-06, |
|
"log_odds_chosen": 0.5579173564910889, |
|
"log_odds_ratio": -0.5348711609840393, |
|
"logits/chosen": -1.0373647212982178, |
|
"logits/rejected": -1.037854790687561, |
|
"logps/chosen": -0.45267555117607117, |
|
"logps/rejected": -0.66820228099823, |
|
"loss": 0.4691, |
|
"nll_loss": 0.410322904586792, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": -0.04526755213737488, |
|
"rewards/margins": 0.021552674472332, |
|
"rewards/rejected": -0.06682023406028748, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.7932489451476794, |
|
"grad_norm": 1.0546124322913446, |
|
"learning_rate": 8.951486697965573e-06, |
|
"log_odds_chosen": 0.7115356922149658, |
|
"log_odds_ratio": -0.5003312826156616, |
|
"logits/chosen": -1.1095713376998901, |
|
"logits/rejected": -1.1265352964401245, |
|
"logps/chosen": -0.44462206959724426, |
|
"logps/rejected": -0.7215204834938049, |
|
"loss": 0.4849, |
|
"nll_loss": 0.43178752064704895, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": -0.04446220397949219, |
|
"rewards/margins": 0.027689840644598007, |
|
"rewards/rejected": -0.0721520483493805, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.8987341772151898, |
|
"grad_norm": 1.1203509702182832, |
|
"learning_rate": 8.169014084507043e-06, |
|
"log_odds_chosen": 0.6642155647277832, |
|
"log_odds_ratio": -0.5045433640480042, |
|
"logits/chosen": -1.0597500801086426, |
|
"logits/rejected": -1.0680012702941895, |
|
"logps/chosen": -0.4701367914676666, |
|
"logps/rejected": -0.704271674156189, |
|
"loss": 0.4792, |
|
"nll_loss": 0.4112149477005005, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": -0.04701368510723114, |
|
"rewards/margins": 0.023413481190800667, |
|
"rewards/rejected": -0.07042715698480606, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.0042194092827006, |
|
"grad_norm": 1.2606333093207305, |
|
"learning_rate": 7.386541471048514e-06, |
|
"log_odds_chosen": 0.5286250114440918, |
|
"log_odds_ratio": -0.5306064486503601, |
|
"logits/chosen": -1.0645604133605957, |
|
"logits/rejected": -1.0691789388656616, |
|
"logps/chosen": -0.44743481278419495, |
|
"logps/rejected": -0.6287744045257568, |
|
"loss": 0.4803, |
|
"nll_loss": 0.42209434509277344, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": -0.04474348574876785, |
|
"rewards/margins": 0.018133964389562607, |
|
"rewards/rejected": -0.06287744641304016, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.109704641350211, |
|
"grad_norm": 1.0966083263839905, |
|
"learning_rate": 6.604068857589985e-06, |
|
"log_odds_chosen": 0.8573014140129089, |
|
"log_odds_ratio": -0.4275921583175659, |
|
"logits/chosen": -1.1114364862442017, |
|
"logits/rejected": -1.1227397918701172, |
|
"logps/chosen": -0.3463269770145416, |
|
"logps/rejected": -0.6633933782577515, |
|
"loss": 0.4078, |
|
"nll_loss": 0.3584522306919098, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": -0.03463269770145416, |
|
"rewards/margins": 0.03170664235949516, |
|
"rewards/rejected": -0.06633934378623962, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.2151898734177213, |
|
"grad_norm": 1.1009276796065117, |
|
"learning_rate": 5.821596244131456e-06, |
|
"log_odds_chosen": 0.7615014910697937, |
|
"log_odds_ratio": -0.47703635692596436, |
|
"logits/chosen": -1.1601929664611816, |
|
"logits/rejected": -1.1780595779418945, |
|
"logps/chosen": -0.39950495958328247, |
|
"logps/rejected": -0.6856523752212524, |
|
"loss": 0.4048, |
|
"nll_loss": 0.41196614503860474, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": -0.03995049372315407, |
|
"rewards/margins": 0.028614744544029236, |
|
"rewards/rejected": -0.0685652419924736, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.320675105485232, |
|
"grad_norm": 1.1135597142860487, |
|
"learning_rate": 5.039123630672926e-06, |
|
"log_odds_chosen": 0.8249316215515137, |
|
"log_odds_ratio": -0.43479686975479126, |
|
"logits/chosen": -1.1542495489120483, |
|
"logits/rejected": -1.1734535694122314, |
|
"logps/chosen": -0.39931461215019226, |
|
"logps/rejected": -0.733440637588501, |
|
"loss": 0.4061, |
|
"nll_loss": 0.39662715792655945, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.039931461215019226, |
|
"rewards/margins": 0.03341260552406311, |
|
"rewards/rejected": -0.07334406673908234, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.4261603375527425, |
|
"grad_norm": 1.1107780211778215, |
|
"learning_rate": 4.2566510172143975e-06, |
|
"log_odds_chosen": 1.200696587562561, |
|
"log_odds_ratio": -0.3881736099720001, |
|
"logits/chosen": -1.2098137140274048, |
|
"logits/rejected": -1.2287673950195312, |
|
"logps/chosen": -0.36806556582450867, |
|
"logps/rejected": -0.7970322370529175, |
|
"loss": 0.3924, |
|
"nll_loss": 0.3511776328086853, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -0.03680655360221863, |
|
"rewards/margins": 0.042896661907434464, |
|
"rewards/rejected": -0.07970321923494339, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.5316455696202533, |
|
"grad_norm": 1.3159040332886311, |
|
"learning_rate": 3.474178403755869e-06, |
|
"log_odds_chosen": 1.0953081846237183, |
|
"log_odds_ratio": -0.3781184256076813, |
|
"logits/chosen": -1.1880977153778076, |
|
"logits/rejected": -1.2077745199203491, |
|
"logps/chosen": -0.32341885566711426, |
|
"logps/rejected": -0.7020614147186279, |
|
"loss": 0.4006, |
|
"nll_loss": 0.3256089389324188, |
|
"rewards/accuracies": 0.7799999713897705, |
|
"rewards/chosen": -0.032341886311769485, |
|
"rewards/margins": 0.03786425665020943, |
|
"rewards/rejected": -0.07020614296197891, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.6371308016877637, |
|
"grad_norm": 1.070273138577641, |
|
"learning_rate": 2.69170579029734e-06, |
|
"log_odds_chosen": 0.6728782057762146, |
|
"log_odds_ratio": -0.5007551312446594, |
|
"logits/chosen": -1.1887409687042236, |
|
"logits/rejected": -1.2133393287658691, |
|
"logps/chosen": -0.40768423676490784, |
|
"logps/rejected": -0.6612467169761658, |
|
"loss": 0.4134, |
|
"nll_loss": 0.42612510919570923, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": -0.04076842963695526, |
|
"rewards/margins": 0.025356244295835495, |
|
"rewards/rejected": -0.06612467020750046, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.742616033755274, |
|
"grad_norm": 1.1092420255798914, |
|
"learning_rate": 1.9092331768388107e-06, |
|
"log_odds_chosen": 0.9050965309143066, |
|
"log_odds_ratio": -0.42763033509254456, |
|
"logits/chosen": -1.192216396331787, |
|
"logits/rejected": -1.2236640453338623, |
|
"logps/chosen": -0.35716915130615234, |
|
"logps/rejected": -0.735872209072113, |
|
"loss": 0.4171, |
|
"nll_loss": 0.35565415024757385, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": -0.03571692109107971, |
|
"rewards/margins": 0.03787030279636383, |
|
"rewards/rejected": -0.07358721643686295, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.848101265822785, |
|
"grad_norm": 1.0762367696616766, |
|
"learning_rate": 1.1267605633802817e-06, |
|
"log_odds_chosen": 0.8493003845214844, |
|
"log_odds_ratio": -0.4451717734336853, |
|
"logits/chosen": -1.2292503118515015, |
|
"logits/rejected": -1.2512164115905762, |
|
"logps/chosen": -0.40543216466903687, |
|
"logps/rejected": -0.7575715780258179, |
|
"loss": 0.4083, |
|
"nll_loss": 0.3527292311191559, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -0.040543220937252045, |
|
"rewards/margins": 0.03521393612027168, |
|
"rewards/rejected": -0.07575715333223343, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.9535864978902953, |
|
"grad_norm": 1.4284839235977267, |
|
"learning_rate": 3.4428794992175273e-07, |
|
"log_odds_chosen": 0.6508604288101196, |
|
"log_odds_ratio": -0.5013710856437683, |
|
"logits/chosen": -1.2339705228805542, |
|
"logits/rejected": -1.2357439994812012, |
|
"logps/chosen": -0.38158729672431946, |
|
"logps/rejected": -0.603762149810791, |
|
"loss": 0.4226, |
|
"nll_loss": 0.40551432967185974, |
|
"rewards/accuracies": 0.5199999809265137, |
|
"rewards/chosen": -0.03815872594714165, |
|
"rewards/margins": 0.022217486053705215, |
|
"rewards/rejected": -0.06037621572613716, |
|
"step": 700 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 711, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|