|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 100, |
|
"global_step": 2902, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006891798759476223, |
|
"grad_norm": 1.1716080904006958, |
|
"learning_rate": 1.718213058419244e-10, |
|
"logits/chosen": -3.184086799621582, |
|
"logits/rejected": -3.1319174766540527, |
|
"logps/chosen": -49.95408630371094, |
|
"logps/rejected": -44.33523178100586, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006891798759476223, |
|
"grad_norm": 1.0663460493087769, |
|
"learning_rate": 1.718213058419244e-09, |
|
"logits/chosen": -3.080113172531128, |
|
"logits/rejected": -3.0596792697906494, |
|
"logps/chosen": -54.03813171386719, |
|
"logps/rejected": -53.65137481689453, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4565972089767456, |
|
"rewards/chosen": 8.68273782543838e-05, |
|
"rewards/margins": -1.9125265680486336e-05, |
|
"rewards/rejected": 0.00010595263302093372, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.013783597518952447, |
|
"grad_norm": 1.1690140962600708, |
|
"learning_rate": 3.436426116838488e-09, |
|
"logits/chosen": -3.1165332794189453, |
|
"logits/rejected": -3.0916168689727783, |
|
"logps/chosen": -55.888938903808594, |
|
"logps/rejected": -53.246864318847656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": 3.240557634853758e-05, |
|
"rewards/margins": -3.6290578009356977e-06, |
|
"rewards/rejected": 3.603463846957311e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02067539627842867, |
|
"grad_norm": 1.2955037355422974, |
|
"learning_rate": 5.154639175257731e-09, |
|
"logits/chosen": -3.0878665447235107, |
|
"logits/rejected": -3.058804988861084, |
|
"logps/chosen": -54.54620361328125, |
|
"logps/rejected": -52.591636657714844, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -2.4173205019906163e-05, |
|
"rewards/margins": -4.490778155741282e-05, |
|
"rewards/rejected": 2.0734580175485462e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.027567195037904894, |
|
"grad_norm": 1.1852333545684814, |
|
"learning_rate": 6.872852233676976e-09, |
|
"logits/chosen": -3.0849013328552246, |
|
"logits/rejected": -3.0671732425689697, |
|
"logps/chosen": -53.879005432128906, |
|
"logps/rejected": -53.66566848754883, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": -5.587830673903227e-05, |
|
"rewards/margins": 4.417077434482053e-05, |
|
"rewards/rejected": -0.00010004905925597996, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03445899379738112, |
|
"grad_norm": 1.2431070804595947, |
|
"learning_rate": 8.59106529209622e-09, |
|
"logits/chosen": -3.0804286003112793, |
|
"logits/rejected": -3.0561296939849854, |
|
"logps/chosen": -56.24019241333008, |
|
"logps/rejected": -53.092872619628906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": 8.934068318922073e-05, |
|
"rewards/margins": 0.00011236695718253031, |
|
"rewards/rejected": -2.302624488947913e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04135079255685734, |
|
"grad_norm": 1.1313049793243408, |
|
"learning_rate": 1.0309278350515463e-08, |
|
"logits/chosen": -3.0351052284240723, |
|
"logits/rejected": -3.0099387168884277, |
|
"logps/chosen": -52.579429626464844, |
|
"logps/rejected": -52.6761589050293, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.48906248807907104, |
|
"rewards/chosen": 2.3904693080112338e-05, |
|
"rewards/margins": 2.025809772021603e-05, |
|
"rewards/rejected": 3.6465789889916778e-06, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.048242591316333565, |
|
"grad_norm": 1.2357141971588135, |
|
"learning_rate": 1.2027491408934707e-08, |
|
"logits/chosen": -3.092390537261963, |
|
"logits/rejected": -3.0711493492126465, |
|
"logps/chosen": -54.469940185546875, |
|
"logps/rejected": -53.86017990112305, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 5.560473709920188e-06, |
|
"rewards/margins": 2.101451354974415e-05, |
|
"rewards/rejected": -1.5454041204066016e-05, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05513439007580979, |
|
"grad_norm": 1.1142845153808594, |
|
"learning_rate": 1.3745704467353952e-08, |
|
"logits/chosen": -3.0345962047576904, |
|
"logits/rejected": -3.0208940505981445, |
|
"logps/chosen": -54.06622314453125, |
|
"logps/rejected": -52.69053268432617, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.48906248807907104, |
|
"rewards/chosen": -4.671530405175872e-05, |
|
"rewards/margins": -4.488803824642673e-05, |
|
"rewards/rejected": -1.827271603360714e-06, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06202618883528601, |
|
"grad_norm": 1.1985735893249512, |
|
"learning_rate": 1.5463917525773195e-08, |
|
"logits/chosen": -3.048698663711548, |
|
"logits/rejected": -3.0217783451080322, |
|
"logps/chosen": -54.59540939331055, |
|
"logps/rejected": -52.060035705566406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4906249940395355, |
|
"rewards/chosen": -6.244768155738711e-05, |
|
"rewards/margins": -3.21494007948786e-06, |
|
"rewards/rejected": -5.923274511587806e-05, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06891798759476224, |
|
"grad_norm": 1.3350454568862915, |
|
"learning_rate": 1.718213058419244e-08, |
|
"logits/chosen": -3.119621753692627, |
|
"logits/rejected": -3.095787763595581, |
|
"logps/chosen": -53.65461349487305, |
|
"logps/rejected": -52.88787841796875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -6.203976226970553e-05, |
|
"rewards/margins": 5.055965812061913e-05, |
|
"rewards/rejected": -0.00011259941675234586, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06891798759476224, |
|
"eval_logits/chosen": -3.163339376449585, |
|
"eval_logits/rejected": -3.157687187194824, |
|
"eval_logps/chosen": -58.7006721496582, |
|
"eval_logps/rejected": -63.17026138305664, |
|
"eval_loss": 0.693140983581543, |
|
"eval_rewards/accuracies": 0.5023234486579895, |
|
"eval_rewards/chosen": 0.00011220378655707464, |
|
"eval_rewards/margins": 1.3582017345470376e-05, |
|
"eval_rewards/rejected": 9.862175647867844e-05, |
|
"eval_runtime": 383.3503, |
|
"eval_samples_per_second": 11.227, |
|
"eval_steps_per_second": 1.403, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07580978635423846, |
|
"grad_norm": 1.2324384450912476, |
|
"learning_rate": 1.8900343642611684e-08, |
|
"logits/chosen": -3.0891432762145996, |
|
"logits/rejected": -3.0738348960876465, |
|
"logps/chosen": -53.08173751831055, |
|
"logps/rejected": -54.20978546142578, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5093749761581421, |
|
"rewards/chosen": -4.9080466851592064e-05, |
|
"rewards/margins": -7.88484321674332e-05, |
|
"rewards/rejected": 2.976796167786233e-05, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08270158511371468, |
|
"grad_norm": 1.2855055332183838, |
|
"learning_rate": 2.0618556701030925e-08, |
|
"logits/chosen": -3.043365478515625, |
|
"logits/rejected": -3.0211169719696045, |
|
"logps/chosen": -54.957427978515625, |
|
"logps/rejected": -54.4825439453125, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.47187501192092896, |
|
"rewards/chosen": -7.188355084508657e-05, |
|
"rewards/margins": -0.00011629929940681905, |
|
"rewards/rejected": 4.441575947566889e-05, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08959338387319091, |
|
"grad_norm": 1.1282892227172852, |
|
"learning_rate": 2.2336769759450173e-08, |
|
"logits/chosen": -3.0101354122161865, |
|
"logits/rejected": -2.9788012504577637, |
|
"logps/chosen": -57.5596923828125, |
|
"logps/rejected": -51.651153564453125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 2.1165338694117963e-05, |
|
"rewards/margins": 0.00015237969637382776, |
|
"rewards/rejected": -0.00013121434312779456, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09648518263266713, |
|
"grad_norm": 1.1657721996307373, |
|
"learning_rate": 2.4054982817869415e-08, |
|
"logits/chosen": -3.067199468612671, |
|
"logits/rejected": -3.046125888824463, |
|
"logps/chosen": -53.55717849731445, |
|
"logps/rejected": -52.773223876953125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": 1.3996473171573598e-05, |
|
"rewards/margins": 0.00015452780644409359, |
|
"rewards/rejected": -0.00014053132326807827, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10337698139214335, |
|
"grad_norm": 1.2658566236495972, |
|
"learning_rate": 2.5773195876288656e-08, |
|
"logits/chosen": -3.04317569732666, |
|
"logits/rejected": -3.0280072689056396, |
|
"logps/chosen": -52.809234619140625, |
|
"logps/rejected": -54.64301300048828, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": -1.0054915037471801e-05, |
|
"rewards/margins": 0.0001238631666637957, |
|
"rewards/rejected": -0.00013391808897722512, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11026878015161957, |
|
"grad_norm": 1.2126415967941284, |
|
"learning_rate": 2.7491408934707904e-08, |
|
"logits/chosen": -3.09346342086792, |
|
"logits/rejected": -3.07668399810791, |
|
"logps/chosen": -53.59107208251953, |
|
"logps/rejected": -52.9258918762207, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4703125059604645, |
|
"rewards/chosen": -9.462583875574637e-06, |
|
"rewards/margins": -2.1159441530471668e-05, |
|
"rewards/rejected": 1.1696849469444714e-05, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1171605789110958, |
|
"grad_norm": 1.1890392303466797, |
|
"learning_rate": 2.9209621993127148e-08, |
|
"logits/chosen": -3.0306668281555176, |
|
"logits/rejected": -3.0220158100128174, |
|
"logps/chosen": -53.26588821411133, |
|
"logps/rejected": -53.87241744995117, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -5.447790681500919e-05, |
|
"rewards/margins": -7.718646884313785e-06, |
|
"rewards/rejected": -4.675926174968481e-05, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12405237767057202, |
|
"grad_norm": 1.15412175655365, |
|
"learning_rate": 3.092783505154639e-08, |
|
"logits/chosen": -3.0752334594726562, |
|
"logits/rejected": -3.0524303913116455, |
|
"logps/chosen": -55.69530487060547, |
|
"logps/rejected": -53.15666961669922, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.504687488079071, |
|
"rewards/chosen": -1.1674828783725388e-05, |
|
"rewards/margins": 9.358397619507741e-06, |
|
"rewards/rejected": -2.1033218217780814e-05, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13094417643004824, |
|
"grad_norm": 1.1720036268234253, |
|
"learning_rate": 3.264604810996564e-08, |
|
"logits/chosen": -3.1030337810516357, |
|
"logits/rejected": -3.0736050605773926, |
|
"logps/chosen": -55.423614501953125, |
|
"logps/rejected": -52.4505500793457, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": 9.101578143599909e-06, |
|
"rewards/margins": 0.0001561685057822615, |
|
"rewards/rejected": -0.00014706689398735762, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13783597518952448, |
|
"grad_norm": 1.2227604389190674, |
|
"learning_rate": 3.436426116838488e-08, |
|
"logits/chosen": -3.0704421997070312, |
|
"logits/rejected": -3.041954278945923, |
|
"logps/chosen": -53.747833251953125, |
|
"logps/rejected": -52.85246658325195, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -6.959711026865989e-05, |
|
"rewards/margins": 9.592306014383212e-05, |
|
"rewards/rejected": -0.0001655201631365344, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13783597518952448, |
|
"eval_logits/chosen": -3.1631689071655273, |
|
"eval_logits/rejected": -3.157501220703125, |
|
"eval_logps/chosen": -58.700950622558594, |
|
"eval_logps/rejected": -63.162139892578125, |
|
"eval_loss": 0.6931830048561096, |
|
"eval_rewards/accuracies": 0.48745352029800415, |
|
"eval_rewards/chosen": 0.00010945786925731227, |
|
"eval_rewards/margins": -7.042505603749305e-05, |
|
"eval_rewards/rejected": 0.00017988293257076293, |
|
"eval_runtime": 383.3981, |
|
"eval_samples_per_second": 11.226, |
|
"eval_steps_per_second": 1.403, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1447277739490007, |
|
"grad_norm": 1.1424545049667358, |
|
"learning_rate": 3.608247422680412e-08, |
|
"logits/chosen": -3.08945631980896, |
|
"logits/rejected": -3.0655088424682617, |
|
"logps/chosen": -54.22871780395508, |
|
"logps/rejected": -52.478431701660156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.00013771439262200147, |
|
"rewards/margins": 0.00012528176011983305, |
|
"rewards/rejected": -0.00026299612363800406, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.15161957270847692, |
|
"grad_norm": 1.1047999858856201, |
|
"learning_rate": 3.780068728522337e-08, |
|
"logits/chosen": -3.0537705421447754, |
|
"logits/rejected": -3.039431571960449, |
|
"logps/chosen": -51.688323974609375, |
|
"logps/rejected": -53.095741271972656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": -0.00010369622759753838, |
|
"rewards/margins": 0.00015849454212002456, |
|
"rewards/rejected": -0.00026219076244160533, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15851137146795313, |
|
"grad_norm": 1.2490479946136475, |
|
"learning_rate": 3.951890034364261e-08, |
|
"logits/chosen": -3.071945905685425, |
|
"logits/rejected": -3.0471181869506836, |
|
"logps/chosen": -54.49678421020508, |
|
"logps/rejected": -52.037872314453125, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -3.5270270018372685e-05, |
|
"rewards/margins": 0.0002025824796874076, |
|
"rewards/rejected": -0.0002378527569817379, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.16540317022742937, |
|
"grad_norm": 1.141684889793396, |
|
"learning_rate": 4.123711340206185e-08, |
|
"logits/chosen": -3.028677225112915, |
|
"logits/rejected": -3.0117344856262207, |
|
"logps/chosen": -54.690513610839844, |
|
"logps/rejected": -55.188621520996094, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5296875238418579, |
|
"rewards/chosen": -0.00015048097702674568, |
|
"rewards/margins": 0.00021787775040138513, |
|
"rewards/rejected": -0.00036835874198004603, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.17229496898690558, |
|
"grad_norm": 1.171937108039856, |
|
"learning_rate": 4.295532646048109e-08, |
|
"logits/chosen": -3.06539249420166, |
|
"logits/rejected": -3.0387420654296875, |
|
"logps/chosen": -57.0573616027832, |
|
"logps/rejected": -52.94896697998047, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5296875238418579, |
|
"rewards/chosen": -0.0001992958423215896, |
|
"rewards/margins": 0.00017509344615973532, |
|
"rewards/rejected": -0.0003743892884813249, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17918676774638181, |
|
"grad_norm": 1.1496978998184204, |
|
"learning_rate": 4.4673539518900346e-08, |
|
"logits/chosen": -3.0649943351745605, |
|
"logits/rejected": -3.0493435859680176, |
|
"logps/chosen": -54.52451705932617, |
|
"logps/rejected": -54.94301223754883, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.535937488079071, |
|
"rewards/chosen": -0.00030117519781924784, |
|
"rewards/margins": 0.00021073469542898238, |
|
"rewards/rejected": -0.0005119099514558911, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.18607856650585802, |
|
"grad_norm": 1.1325643062591553, |
|
"learning_rate": 4.639175257731959e-08, |
|
"logits/chosen": -3.066349506378174, |
|
"logits/rejected": -3.0383307933807373, |
|
"logps/chosen": -56.371307373046875, |
|
"logps/rejected": -52.432106018066406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.00030968443024903536, |
|
"rewards/margins": 0.0001904324017232284, |
|
"rewards/rejected": -0.000500116846524179, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.19297036526533426, |
|
"grad_norm": 1.2462892532348633, |
|
"learning_rate": 4.810996563573883e-08, |
|
"logits/chosen": -3.0566208362579346, |
|
"logits/rejected": -3.051412582397461, |
|
"logps/chosen": -53.14699172973633, |
|
"logps/rejected": -54.41425323486328, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.49531251192092896, |
|
"rewards/chosen": -0.00045495276572182775, |
|
"rewards/margins": 9.360066178487614e-05, |
|
"rewards/rejected": -0.0005485534202307463, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.19986216402481047, |
|
"grad_norm": 1.1743725538253784, |
|
"learning_rate": 4.982817869415808e-08, |
|
"logits/chosen": -3.0853469371795654, |
|
"logits/rejected": -3.063814640045166, |
|
"logps/chosen": -54.09833908081055, |
|
"logps/rejected": -54.12751007080078, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0002439660020172596, |
|
"rewards/margins": 0.000504250347148627, |
|
"rewards/rejected": -0.0007482162909582257, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2067539627842867, |
|
"grad_norm": 1.1194610595703125, |
|
"learning_rate": 4.999853419300577e-08, |
|
"logits/chosen": -3.012183666229248, |
|
"logits/rejected": -2.9885506629943848, |
|
"logps/chosen": -54.22556686401367, |
|
"logps/rejected": -51.91581344604492, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -0.000393096124753356, |
|
"rewards/margins": 0.0003973825369030237, |
|
"rewards/rejected": -0.0007904786616563797, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2067539627842867, |
|
"eval_logits/chosen": -3.1624693870544434, |
|
"eval_logits/rejected": -3.156888484954834, |
|
"eval_logps/chosen": -58.67123794555664, |
|
"eval_logps/rejected": -63.15048599243164, |
|
"eval_loss": 0.6930928826332092, |
|
"eval_rewards/accuracies": 0.5148698687553406, |
|
"eval_rewards/chosen": 0.00040659555816091597, |
|
"eval_rewards/margins": 0.00011023049592040479, |
|
"eval_rewards/rejected": 0.0002963650331366807, |
|
"eval_runtime": 383.575, |
|
"eval_samples_per_second": 11.221, |
|
"eval_steps_per_second": 1.403, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2136457615437629, |
|
"grad_norm": 1.1925629377365112, |
|
"learning_rate": 4.9993467426542045e-08, |
|
"logits/chosen": -3.086402416229248, |
|
"logits/rejected": -3.0562937259674072, |
|
"logps/chosen": -53.876312255859375, |
|
"logps/rejected": -52.675437927246094, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5796874761581421, |
|
"rewards/chosen": -0.0003693565959110856, |
|
"rewards/margins": 0.0005247757071629167, |
|
"rewards/rejected": -0.0008941322448663414, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.22053756030323915, |
|
"grad_norm": 1.154595136642456, |
|
"learning_rate": 4.998478233757101e-08, |
|
"logits/chosen": -3.0752129554748535, |
|
"logits/rejected": -3.0584304332733154, |
|
"logps/chosen": -52.4905891418457, |
|
"logps/rejected": -54.12751388549805, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -0.0005890514003112912, |
|
"rewards/margins": 0.0004612796474248171, |
|
"rewards/rejected": -0.0010503310477361083, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.22742935906271536, |
|
"grad_norm": 1.143236517906189, |
|
"learning_rate": 4.9972480183439325e-08, |
|
"logits/chosen": -3.075157642364502, |
|
"logits/rejected": -3.0487570762634277, |
|
"logps/chosen": -53.44994354248047, |
|
"logps/rejected": -51.2059326171875, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -0.0005590206128545105, |
|
"rewards/margins": 0.0007544254185631871, |
|
"rewards/rejected": -0.0013134460896253586, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2343211578221916, |
|
"grad_norm": 1.3542113304138184, |
|
"learning_rate": 4.995656274513881e-08, |
|
"logits/chosen": -3.0580501556396484, |
|
"logits/rejected": -3.035737991333008, |
|
"logps/chosen": -54.966087341308594, |
|
"logps/rejected": -53.1796760559082, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5609375238418579, |
|
"rewards/chosen": -0.0005089120240882039, |
|
"rewards/margins": 0.0006240031216293573, |
|
"rewards/rejected": -0.001132915262132883, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2412129565816678, |
|
"grad_norm": 1.2170838117599487, |
|
"learning_rate": 4.993703232704862e-08, |
|
"logits/chosen": -3.0822110176086426, |
|
"logits/rejected": -3.059418201446533, |
|
"logps/chosen": -54.97810745239258, |
|
"logps/rejected": -52.979820251464844, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.604687511920929, |
|
"rewards/chosen": -0.0005054243374615908, |
|
"rewards/margins": 0.0008770185522735119, |
|
"rewards/rejected": -0.0013824428897351027, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.24810475534114404, |
|
"grad_norm": 1.107391595840454, |
|
"learning_rate": 4.991389175660163e-08, |
|
"logits/chosen": -3.0396039485931396, |
|
"logits/rejected": -3.0273656845092773, |
|
"logps/chosen": -52.375274658203125, |
|
"logps/rejected": -53.336265563964844, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.573437511920929, |
|
"rewards/chosen": -0.0008153729140758514, |
|
"rewards/margins": 0.0007024986553005874, |
|
"rewards/rejected": -0.001517871511168778, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2549965541006203, |
|
"grad_norm": 1.1218314170837402, |
|
"learning_rate": 4.98871443838751e-08, |
|
"logits/chosen": -3.114689350128174, |
|
"logits/rejected": -3.0790865421295166, |
|
"logps/chosen": -53.952476501464844, |
|
"logps/rejected": -52.38344192504883, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -0.0007519819191657007, |
|
"rewards/margins": 0.001077468739822507, |
|
"rewards/rejected": -0.0018294507171958685, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2618883528600965, |
|
"grad_norm": 1.1392273902893066, |
|
"learning_rate": 4.985679408110568e-08, |
|
"logits/chosen": -3.0398175716400146, |
|
"logits/rejected": -3.0220084190368652, |
|
"logps/chosen": -54.647239685058594, |
|
"logps/rejected": -52.84843826293945, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.0008999688434414566, |
|
"rewards/margins": 0.0009324215352535248, |
|
"rewards/rejected": -0.001832390553317964, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2687801516195727, |
|
"grad_norm": 1.1045254468917847, |
|
"learning_rate": 4.9822845242128844e-08, |
|
"logits/chosen": -3.0233044624328613, |
|
"logits/rejected": -3.001706600189209, |
|
"logps/chosen": -53.072547912597656, |
|
"logps/rejected": -50.9716796875, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.589062511920929, |
|
"rewards/chosen": -0.0011974747758358717, |
|
"rewards/margins": 0.001123163616284728, |
|
"rewards/rejected": -0.0023206386249512434, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.27567195037904896, |
|
"grad_norm": 1.1129488945007324, |
|
"learning_rate": 4.9785302781742763e-08, |
|
"logits/chosen": -3.050330400466919, |
|
"logits/rejected": -3.035008192062378, |
|
"logps/chosen": -52.901397705078125, |
|
"logps/rejected": -54.134605407714844, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.582812488079071, |
|
"rewards/chosen": -0.0011613852111622691, |
|
"rewards/margins": 0.0009022338199429214, |
|
"rewards/rejected": -0.0020636192057281733, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.27567195037904896, |
|
"eval_logits/chosen": -3.1611053943634033, |
|
"eval_logits/rejected": -3.1554572582244873, |
|
"eval_logps/chosen": -58.63969039916992, |
|
"eval_logps/rejected": -63.135032653808594, |
|
"eval_loss": 0.6930131316184998, |
|
"eval_rewards/accuracies": 0.5257899761199951, |
|
"eval_rewards/chosen": 0.0007220551487989724, |
|
"eval_rewards/margins": 0.0002711908018682152, |
|
"eval_rewards/rejected": 0.00045086428872309625, |
|
"eval_runtime": 383.1949, |
|
"eval_samples_per_second": 11.232, |
|
"eval_steps_per_second": 1.404, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.28256374913852517, |
|
"grad_norm": 1.2506204843521118, |
|
"learning_rate": 4.974417213499681e-08, |
|
"logits/chosen": -3.0777323246002197, |
|
"logits/rejected": -3.049983501434326, |
|
"logps/chosen": -55.058868408203125, |
|
"logps/rejected": -53.96419143676758, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.0010929839918389916, |
|
"rewards/margins": 0.0014660651795566082, |
|
"rewards/rejected": -0.0025590492878109217, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2894555478980014, |
|
"grad_norm": 1.237091302871704, |
|
"learning_rate": 4.9699459256404706e-08, |
|
"logits/chosen": -3.105699300765991, |
|
"logits/rejected": -3.0748677253723145, |
|
"logps/chosen": -55.66558837890625, |
|
"logps/rejected": -53.8339729309082, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.6109374761581421, |
|
"rewards/chosen": -0.000828454561997205, |
|
"rewards/margins": 0.001723860390484333, |
|
"rewards/rejected": -0.002552315127104521, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2963473466574776, |
|
"grad_norm": 1.1707303524017334, |
|
"learning_rate": 4.965117061908251e-08, |
|
"logits/chosen": -3.056098461151123, |
|
"logits/rejected": -3.035871982574463, |
|
"logps/chosen": -55.13801193237305, |
|
"logps/rejected": -53.53112030029297, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.0015201037749648094, |
|
"rewards/margins": 0.0009773834608495235, |
|
"rewards/rejected": -0.002497487235814333, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.30323914541695385, |
|
"grad_norm": 1.1965198516845703, |
|
"learning_rate": 4.959931321381145e-08, |
|
"logits/chosen": -3.082432508468628, |
|
"logits/rejected": -3.063544750213623, |
|
"logps/chosen": -54.456016540527344, |
|
"logps/rejected": -54.16331100463867, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -0.0014482419937849045, |
|
"rewards/margins": 0.0013292920775711536, |
|
"rewards/rejected": -0.002777534071356058, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.31013094417643006, |
|
"grad_norm": 1.2304091453552246, |
|
"learning_rate": 4.954389454802591e-08, |
|
"logits/chosen": -3.1104228496551514, |
|
"logits/rejected": -3.090036153793335, |
|
"logps/chosen": -53.494163513183594, |
|
"logps/rejected": -53.315879821777344, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5921875238418579, |
|
"rewards/chosen": -0.0016971270088106394, |
|
"rewards/margins": 0.0016467798268422484, |
|
"rewards/rejected": -0.003343907417729497, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.31702274293590627, |
|
"grad_norm": 1.1292587518692017, |
|
"learning_rate": 4.948492264472656e-08, |
|
"logits/chosen": -3.1166298389434814, |
|
"logits/rejected": -3.094527006149292, |
|
"logps/chosen": -55.6964111328125, |
|
"logps/rejected": -53.82384490966797, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0016075067687779665, |
|
"rewards/margins": 0.001251583336852491, |
|
"rewards/rejected": -0.0028590902220457792, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3239145416953825, |
|
"grad_norm": 1.197009563446045, |
|
"learning_rate": 4.9422406041318844e-08, |
|
"logits/chosen": -3.0635745525360107, |
|
"logits/rejected": -3.038623094558716, |
|
"logps/chosen": -54.91028594970703, |
|
"logps/rejected": -53.81779861450195, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.0011848278809338808, |
|
"rewards/margins": 0.002738000126555562, |
|
"rewards/rejected": -0.0039228275418281555, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.33080634045485874, |
|
"grad_norm": 1.2151196002960205, |
|
"learning_rate": 4.9356353788377026e-08, |
|
"logits/chosen": -3.055495023727417, |
|
"logits/rejected": -3.0299649238586426, |
|
"logps/chosen": -55.23993682861328, |
|
"logps/rejected": -53.810813903808594, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.598437488079071, |
|
"rewards/chosen": -0.0015645608073100448, |
|
"rewards/margins": 0.0022706836462020874, |
|
"rewards/rejected": -0.003835244569927454, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.33769813921433495, |
|
"grad_norm": 1.1114208698272705, |
|
"learning_rate": 4.9286775448333944e-08, |
|
"logits/chosen": -3.0453591346740723, |
|
"logits/rejected": -3.0262703895568848, |
|
"logps/chosen": -53.14439010620117, |
|
"logps/rejected": -53.70630645751953, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.6078125238418579, |
|
"rewards/chosen": -0.0021999510936439037, |
|
"rewards/margins": 0.0019915387965738773, |
|
"rewards/rejected": -0.0041914889588952065, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.34458993797381116, |
|
"grad_norm": 1.2440327405929565, |
|
"learning_rate": 4.921368109409663e-08, |
|
"logits/chosen": -3.0790770053863525, |
|
"logits/rejected": -3.0631680488586426, |
|
"logps/chosen": -53.35895538330078, |
|
"logps/rejected": -53.36548614501953, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.614062488079071, |
|
"rewards/chosen": -0.002265265677124262, |
|
"rewards/margins": 0.002222201321274042, |
|
"rewards/rejected": -0.004487467464059591, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.34458993797381116, |
|
"eval_logits/chosen": -3.1591975688934326, |
|
"eval_logits/rejected": -3.153568983078003, |
|
"eval_logps/chosen": -58.59514236450195, |
|
"eval_logps/rejected": -63.1102180480957, |
|
"eval_loss": 0.6929171681404114, |
|
"eval_rewards/accuracies": 0.5246282815933228, |
|
"eval_rewards/chosen": 0.0011674691922962666, |
|
"eval_rewards/margins": 0.0004684112500399351, |
|
"eval_rewards/rejected": 0.0006990578840486705, |
|
"eval_runtime": 382.8893, |
|
"eval_samples_per_second": 11.241, |
|
"eval_steps_per_second": 1.405, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.35148173673328736, |
|
"grad_norm": 1.1804462671279907, |
|
"learning_rate": 4.913708130758806e-08, |
|
"logits/chosen": -3.0682575702667236, |
|
"logits/rejected": -3.046999454498291, |
|
"logps/chosen": -54.03418731689453, |
|
"logps/rejected": -54.376319885253906, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5953124761581421, |
|
"rewards/chosen": -0.002343302359804511, |
|
"rewards/margins": 0.002379921730607748, |
|
"rewards/rejected": -0.0047232238575816154, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.35837353549276363, |
|
"grad_norm": 1.1343954801559448, |
|
"learning_rate": 4.9056987178215176e-08, |
|
"logits/chosen": -3.1094601154327393, |
|
"logits/rejected": -3.0802154541015625, |
|
"logps/chosen": -53.637245178222656, |
|
"logps/rejected": -53.262474060058594, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0020912564359605312, |
|
"rewards/margins": 0.002018420724198222, |
|
"rewards/rejected": -0.004109677392989397, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.36526533425223984, |
|
"grad_norm": 1.1996898651123047, |
|
"learning_rate": 4.8973410301263516e-08, |
|
"logits/chosen": -3.051212787628174, |
|
"logits/rejected": -3.0387063026428223, |
|
"logps/chosen": -53.287681579589844, |
|
"logps/rejected": -53.440711975097656, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -0.0023356422316282988, |
|
"rewards/margins": 0.0020052504260092974, |
|
"rewards/rejected": -0.004340892191976309, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.37215713301171605, |
|
"grad_norm": 1.164119839668274, |
|
"learning_rate": 4.8886362776218506e-08, |
|
"logits/chosen": -3.0033349990844727, |
|
"logits/rejected": -2.9812140464782715, |
|
"logps/chosen": -53.450355529785156, |
|
"logps/rejected": -51.471229553222656, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0030757987406104803, |
|
"rewards/margins": 0.0025067501701414585, |
|
"rewards/rejected": -0.005582548677921295, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.37904893177119225, |
|
"grad_norm": 1.28213632106781, |
|
"learning_rate": 4.879585720501382e-08, |
|
"logits/chosen": -3.148085355758667, |
|
"logits/rejected": -3.127159595489502, |
|
"logps/chosen": -54.660545349121094, |
|
"logps/rejected": -53.745887756347656, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.0028407000936567783, |
|
"rewards/margins": 0.0024546708445996046, |
|
"rewards/rejected": -0.005295370705425739, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3859407305306685, |
|
"grad_norm": 1.3044832944869995, |
|
"learning_rate": 4.870190669020703e-08, |
|
"logits/chosen": -3.0593714714050293, |
|
"logits/rejected": -3.036311388015747, |
|
"logps/chosen": -55.014060974121094, |
|
"logps/rejected": -53.53757858276367, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.002411695895716548, |
|
"rewards/margins": 0.0032335221767425537, |
|
"rewards/rejected": -0.005645217839628458, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3928325292901447, |
|
"grad_norm": 1.1550047397613525, |
|
"learning_rate": 4.860452483308266e-08, |
|
"logits/chosen": -2.9982199668884277, |
|
"logits/rejected": -2.972108840942383, |
|
"logps/chosen": -56.20374298095703, |
|
"logps/rejected": -55.09558868408203, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.5953124761581421, |
|
"rewards/chosen": -0.0028813418466597795, |
|
"rewards/margins": 0.0031816777773201466, |
|
"rewards/rejected": -0.00606301985681057, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.39972432804962094, |
|
"grad_norm": 1.230724573135376, |
|
"learning_rate": 4.8503725731683204e-08, |
|
"logits/chosen": -3.0479977130889893, |
|
"logits/rejected": -3.0179476737976074, |
|
"logps/chosen": -54.623687744140625, |
|
"logps/rejected": -53.172157287597656, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.002868245355784893, |
|
"rewards/margins": 0.003858409356325865, |
|
"rewards/rejected": -0.006726655177772045, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4066161268090972, |
|
"grad_norm": 1.1609071493148804, |
|
"learning_rate": 4.839952397876808e-08, |
|
"logits/chosen": -3.0574018955230713, |
|
"logits/rejected": -3.039822816848755, |
|
"logps/chosen": -54.512779235839844, |
|
"logps/rejected": -54.206886291503906, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.629687488079071, |
|
"rewards/chosen": -0.0032920341473072767, |
|
"rewards/margins": 0.0035798237659037113, |
|
"rewards/rejected": -0.006871857680380344, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4135079255685734, |
|
"grad_norm": 1.0820258855819702, |
|
"learning_rate": 4.829193465970105e-08, |
|
"logits/chosen": -3.089672327041626, |
|
"logits/rejected": -3.069746494293213, |
|
"logps/chosen": -54.53960418701172, |
|
"logps/rejected": -53.9844970703125, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.604687511920929, |
|
"rewards/chosen": -0.003799352329224348, |
|
"rewards/margins": 0.0033282779622823, |
|
"rewards/rejected": -0.007127630058676004, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4135079255685734, |
|
"eval_logits/chosen": -3.1564178466796875, |
|
"eval_logits/rejected": -3.150780200958252, |
|
"eval_logps/chosen": -58.54813766479492, |
|
"eval_logps/rejected": -63.1104850769043, |
|
"eval_loss": 0.6926856637001038, |
|
"eval_rewards/accuracies": 0.5504181981086731, |
|
"eval_rewards/chosen": 0.001637543668039143, |
|
"eval_rewards/margins": 0.0009411590872332454, |
|
"eval_rewards/rejected": 0.0006963845225982368, |
|
"eval_runtime": 383.4087, |
|
"eval_samples_per_second": 11.226, |
|
"eval_steps_per_second": 1.403, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4203997243280496, |
|
"grad_norm": 1.1835054159164429, |
|
"learning_rate": 4.818097335026631e-08, |
|
"logits/chosen": -3.101921319961548, |
|
"logits/rejected": -3.0772037506103516, |
|
"logps/chosen": -55.26588821411133, |
|
"logps/rejected": -53.28364181518555, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.002828064141795039, |
|
"rewards/margins": 0.00399785814806819, |
|
"rewards/rejected": -0.006825921591371298, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4272915230875258, |
|
"grad_norm": 1.203052043914795, |
|
"learning_rate": 4.806665611441354e-08, |
|
"logits/chosen": -3.077770233154297, |
|
"logits/rejected": -3.0505123138427734, |
|
"logps/chosen": -55.078880310058594, |
|
"logps/rejected": -52.72577667236328, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.004233444109559059, |
|
"rewards/margins": 0.0032121867407113314, |
|
"rewards/rejected": -0.007445631083101034, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4341833218470021, |
|
"grad_norm": 1.1228797435760498, |
|
"learning_rate": 4.794899950193235e-08, |
|
"logits/chosen": -3.0409035682678223, |
|
"logits/rejected": -3.0232279300689697, |
|
"logps/chosen": -53.423980712890625, |
|
"logps/rejected": -52.98765182495117, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5796874761581421, |
|
"rewards/chosen": -0.004781276918947697, |
|
"rewards/margins": 0.0022252718918025494, |
|
"rewards/rejected": -0.007006548345088959, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4410751206064783, |
|
"grad_norm": 1.262542486190796, |
|
"learning_rate": 4.782802054605635e-08, |
|
"logits/chosen": -3.0899507999420166, |
|
"logits/rejected": -3.0717437267303467, |
|
"logps/chosen": -55.078704833984375, |
|
"logps/rejected": -54.794776916503906, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.0041534146293997765, |
|
"rewards/margins": 0.0038144378922879696, |
|
"rewards/rejected": -0.007967852056026459, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.4479669193659545, |
|
"grad_norm": 1.2199469804763794, |
|
"learning_rate": 4.77037367609972e-08, |
|
"logits/chosen": -3.0735621452331543, |
|
"logits/rejected": -3.0427281856536865, |
|
"logps/chosen": -56.89426803588867, |
|
"logps/rejected": -53.209136962890625, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -0.004497360438108444, |
|
"rewards/margins": 0.004101374186575413, |
|
"rewards/rejected": -0.008598734624683857, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4548587181254307, |
|
"grad_norm": 1.1544371843338013, |
|
"learning_rate": 4.7576166139409105e-08, |
|
"logits/chosen": -3.042221784591675, |
|
"logits/rejected": -3.0110714435577393, |
|
"logps/chosen": -54.11481475830078, |
|
"logps/rejected": -52.04207229614258, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.6390625238418579, |
|
"rewards/chosen": -0.004763273987919092, |
|
"rewards/margins": 0.004999758210033178, |
|
"rewards/rejected": -0.00976303219795227, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.461750516884907, |
|
"grad_norm": 1.281175136566162, |
|
"learning_rate": 4.744532714978399e-08, |
|
"logits/chosen": -3.0140280723571777, |
|
"logits/rejected": -2.9848811626434326, |
|
"logps/chosen": -56.1414680480957, |
|
"logps/rejected": -54.0085334777832, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.6265624761581421, |
|
"rewards/chosen": -0.0041292086243629456, |
|
"rewards/margins": 0.004748177714645863, |
|
"rewards/rejected": -0.008877387270331383, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4686423156443832, |
|
"grad_norm": 1.1695414781570435, |
|
"learning_rate": 4.7311238733777815e-08, |
|
"logits/chosen": -3.046804428100586, |
|
"logits/rejected": -3.0304887294769287, |
|
"logps/chosen": -54.355079650878906, |
|
"logps/rejected": -54.04961395263672, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.005136381834745407, |
|
"rewards/margins": 0.00416863476857543, |
|
"rewards/rejected": -0.009305017068982124, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4755341144038594, |
|
"grad_norm": 1.1991028785705566, |
|
"learning_rate": 4.717392030346835e-08, |
|
"logits/chosen": -3.028083562850952, |
|
"logits/rejected": -3.011951446533203, |
|
"logps/chosen": -54.25959396362305, |
|
"logps/rejected": -54.1555061340332, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.00518420897424221, |
|
"rewards/margins": 0.004269171506166458, |
|
"rewards/rejected": -0.009453380480408669, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.4824259131633356, |
|
"grad_norm": 1.2611873149871826, |
|
"learning_rate": 4.70333917385449e-08, |
|
"logits/chosen": -3.079685926437378, |
|
"logits/rejected": -3.049795627593994, |
|
"logps/chosen": -55.45751190185547, |
|
"logps/rejected": -53.548301696777344, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -0.004976336378604174, |
|
"rewards/margins": 0.003996217157691717, |
|
"rewards/rejected": -0.00897255353629589, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4824259131633356, |
|
"eval_logits/chosen": -3.1538004875183105, |
|
"eval_logits/rejected": -3.1481423377990723, |
|
"eval_logps/chosen": -58.522918701171875, |
|
"eval_logps/rejected": -63.142425537109375, |
|
"eval_loss": 0.6924082636833191, |
|
"eval_rewards/accuracies": 0.5671468377113342, |
|
"eval_rewards/chosen": 0.0018897424452006817, |
|
"eval_rewards/margins": 0.0015127337537705898, |
|
"eval_rewards/rejected": 0.0003770088078454137, |
|
"eval_runtime": 383.1967, |
|
"eval_samples_per_second": 11.232, |
|
"eval_steps_per_second": 1.404, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.48931771192281187, |
|
"grad_norm": 1.1392662525177002, |
|
"learning_rate": 4.688967338343029e-08, |
|
"logits/chosen": -3.0261685848236084, |
|
"logits/rejected": -3.0102686882019043, |
|
"logps/chosen": -54.990821838378906, |
|
"logps/rejected": -54.767127990722656, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.00540867168456316, |
|
"rewards/margins": 0.004442816134542227, |
|
"rewards/rejected": -0.0098514873534441, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.4962095106822881, |
|
"grad_norm": 1.1779069900512695, |
|
"learning_rate": 4.6742786044335625e-08, |
|
"logits/chosen": -3.0809476375579834, |
|
"logits/rejected": -3.057307481765747, |
|
"logps/chosen": -55.18914794921875, |
|
"logps/rejected": -53.8927116394043, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -0.0052046263590455055, |
|
"rewards/margins": 0.006748650223016739, |
|
"rewards/rejected": -0.011953277513384819, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5031013094417643, |
|
"grad_norm": 1.2199147939682007, |
|
"learning_rate": 4.6592750986248085e-08, |
|
"logits/chosen": -3.107689380645752, |
|
"logits/rejected": -3.1000123023986816, |
|
"logps/chosen": -54.34379959106445, |
|
"logps/rejected": -54.853431701660156, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.005630369298160076, |
|
"rewards/margins": 0.004252653103321791, |
|
"rewards/rejected": -0.009883022867143154, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5099931082012406, |
|
"grad_norm": 1.307981014251709, |
|
"learning_rate": 4.6439589929852476e-08, |
|
"logits/chosen": -3.0687716007232666, |
|
"logits/rejected": -3.0409016609191895, |
|
"logps/chosen": -53.86914825439453, |
|
"logps/rejected": -53.336158752441406, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.005739855580031872, |
|
"rewards/margins": 0.0054161581210792065, |
|
"rewards/rejected": -0.011156014166772366, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5168849069607168, |
|
"grad_norm": 1.1373140811920166, |
|
"learning_rate": 4.6283325048386624e-08, |
|
"logits/chosen": -3.0201470851898193, |
|
"logits/rejected": -2.998100519180298, |
|
"logps/chosen": -55.00568389892578, |
|
"logps/rejected": -54.43558883666992, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.0055595808662474155, |
|
"rewards/margins": 0.005786740221083164, |
|
"rewards/rejected": -0.011346321552991867, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.523776705720193, |
|
"grad_norm": 1.1454448699951172, |
|
"learning_rate": 4.612397896443138e-08, |
|
"logits/chosen": -3.118800163269043, |
|
"logits/rejected": -3.0978825092315674, |
|
"logps/chosen": -54.798065185546875, |
|
"logps/rejected": -54.3465461730957, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.5796874761581421, |
|
"rewards/chosen": -0.007258473429828882, |
|
"rewards/margins": 0.004668924491852522, |
|
"rewards/rejected": -0.011927397921681404, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5306685044796692, |
|
"grad_norm": 1.1706945896148682, |
|
"learning_rate": 4.5961574746635536e-08, |
|
"logits/chosen": -3.012247323989868, |
|
"logits/rejected": -2.993521213531494, |
|
"logps/chosen": -55.298187255859375, |
|
"logps/rejected": -55.779624938964844, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.582812488079071, |
|
"rewards/chosen": -0.00789455696940422, |
|
"rewards/margins": 0.0037407889030873775, |
|
"rewards/rejected": -0.011635346338152885, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5375603032391454, |
|
"grad_norm": 1.2820113897323608, |
|
"learning_rate": 4.5796135906376144e-08, |
|
"logits/chosen": -3.0310168266296387, |
|
"logits/rejected": -3.015160083770752, |
|
"logps/chosen": -54.29914474487305, |
|
"logps/rejected": -55.2180290222168, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -0.007602076046168804, |
|
"rewards/margins": 0.004507972858846188, |
|
"rewards/rejected": -0.012110048905014992, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5444521019986216, |
|
"grad_norm": 1.1508716344833374, |
|
"learning_rate": 4.5627686394354766e-08, |
|
"logits/chosen": -3.0379862785339355, |
|
"logits/rejected": -3.017380475997925, |
|
"logps/chosen": -53.72552490234375, |
|
"logps/rejected": -54.60520553588867, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.0065218256786465645, |
|
"rewards/margins": 0.006237885914742947, |
|
"rewards/rejected": -0.012759710662066936, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5513439007580979, |
|
"grad_norm": 1.1988805532455444, |
|
"learning_rate": 4.545625059713011e-08, |
|
"logits/chosen": -3.0689666271209717, |
|
"logits/rejected": -3.046346664428711, |
|
"logps/chosen": -54.87028121948242, |
|
"logps/rejected": -53.7490119934082, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -0.006772381253540516, |
|
"rewards/margins": 0.006438801996409893, |
|
"rewards/rejected": -0.013211183249950409, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5513439007580979, |
|
"eval_logits/chosen": -3.1500000953674316, |
|
"eval_logits/rejected": -3.1443684101104736, |
|
"eval_logps/chosen": -58.52486038208008, |
|
"eval_logps/rejected": -63.183868408203125, |
|
"eval_loss": 0.69222092628479, |
|
"eval_rewards/accuracies": 0.5759758353233337, |
|
"eval_rewards/chosen": 0.001870311563834548, |
|
"eval_rewards/margins": 0.0019077310571447015, |
|
"eval_rewards/rejected": -3.741981345228851e-05, |
|
"eval_runtime": 383.2221, |
|
"eval_samples_per_second": 11.231, |
|
"eval_steps_per_second": 1.404, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5582356995175741, |
|
"grad_norm": 1.181986927986145, |
|
"learning_rate": 4.528185333358756e-08, |
|
"logits/chosen": -3.026899814605713, |
|
"logits/rejected": -3.0093157291412354, |
|
"logps/chosen": -54.46189498901367, |
|
"logps/rejected": -54.8513298034668, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.00789581798017025, |
|
"rewards/margins": 0.004861229099333286, |
|
"rewards/rejected": -0.01275704801082611, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5651274982770503, |
|
"grad_norm": 1.1672871112823486, |
|
"learning_rate": 4.510451985134616e-08, |
|
"logits/chosen": -3.0875649452209473, |
|
"logits/rejected": -3.0743203163146973, |
|
"logps/chosen": -53.040733337402344, |
|
"logps/rejected": -55.541954040527344, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.00876162201166153, |
|
"rewards/margins": 0.00582465436309576, |
|
"rewards/rejected": -0.014586275443434715, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5720192970365265, |
|
"grad_norm": 1.2362406253814697, |
|
"learning_rate": 4.492427582310346e-08, |
|
"logits/chosen": -3.0630054473876953, |
|
"logits/rejected": -3.0335052013397217, |
|
"logps/chosen": -54.42986297607422, |
|
"logps/rejected": -53.1016845703125, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.6390625238418579, |
|
"rewards/chosen": -0.007075751665979624, |
|
"rewards/margins": 0.007580357138067484, |
|
"rewards/rejected": -0.014656109735369682, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5789110957960028, |
|
"grad_norm": 1.1612728834152222, |
|
"learning_rate": 4.4741147342918894e-08, |
|
"logits/chosen": -3.076169013977051, |
|
"logits/rejected": -3.0513038635253906, |
|
"logps/chosen": -55.8946533203125, |
|
"logps/rejected": -55.85911178588867, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.0071454280987381935, |
|
"rewards/margins": 0.008417905308306217, |
|
"rewards/rejected": -0.01556333340704441, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.585802894555479, |
|
"grad_norm": 1.1926907300949097, |
|
"learning_rate": 4.4555160922436074e-08, |
|
"logits/chosen": -3.079662322998047, |
|
"logits/rejected": -3.0524630546569824, |
|
"logps/chosen": -54.04046630859375, |
|
"logps/rejected": -53.262847900390625, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.6234375238418579, |
|
"rewards/chosen": -0.007903190329670906, |
|
"rewards/margins": 0.007286435458809137, |
|
"rewards/rejected": -0.015189625322818756, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5926946933149552, |
|
"grad_norm": 1.2318311929702759, |
|
"learning_rate": 4.4366343487044754e-08, |
|
"logits/chosen": -3.031019926071167, |
|
"logits/rejected": -3.0084445476531982, |
|
"logps/chosen": -52.4871711730957, |
|
"logps/rejected": -53.59075927734375, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -0.009235886856913567, |
|
"rewards/margins": 0.007172322832047939, |
|
"rewards/rejected": -0.01640820875763893, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5995864920744314, |
|
"grad_norm": 1.2643660306930542, |
|
"learning_rate": 4.417472237198275e-08, |
|
"logits/chosen": -3.122987985610962, |
|
"logits/rejected": -3.097611665725708, |
|
"logps/chosen": -56.34685134887695, |
|
"logps/rejected": -55.146095275878906, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.620312511920929, |
|
"rewards/chosen": -0.0072593227960169315, |
|
"rewards/margins": 0.007286491803824902, |
|
"rewards/rejected": -0.01454581506550312, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6064782908339077, |
|
"grad_norm": 1.2349611520767212, |
|
"learning_rate": 4.398032531837865e-08, |
|
"logits/chosen": -3.000382423400879, |
|
"logits/rejected": -2.979700803756714, |
|
"logps/chosen": -54.820579528808594, |
|
"logps/rejected": -54.77504348754883, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.010271805338561535, |
|
"rewards/margins": 0.006492338143289089, |
|
"rewards/rejected": -0.016764143481850624, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6133700895933839, |
|
"grad_norm": 1.2228236198425293, |
|
"learning_rate": 4.378318046923567e-08, |
|
"logits/chosen": -3.046607494354248, |
|
"logits/rejected": -3.0200607776641846, |
|
"logps/chosen": -55.26753616333008, |
|
"logps/rejected": -54.108428955078125, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.6078125238418579, |
|
"rewards/chosen": -0.00934204924851656, |
|
"rewards/margins": 0.007791099604219198, |
|
"rewards/rejected": -0.01713315024971962, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6202618883528601, |
|
"grad_norm": 1.186522126197815, |
|
"learning_rate": 4.3583316365357413e-08, |
|
"logits/chosen": -3.081699848175049, |
|
"logits/rejected": -3.0569376945495605, |
|
"logps/chosen": -56.97715377807617, |
|
"logps/rejected": -55.800636291503906, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.614062488079071, |
|
"rewards/chosen": -0.009422613307833672, |
|
"rewards/margins": 0.007982470095157623, |
|
"rewards/rejected": -0.017405081540346146, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6202618883528601, |
|
"eval_logits/chosen": -3.145947217941284, |
|
"eval_logits/rejected": -3.140315532684326, |
|
"eval_logps/chosen": -58.54254913330078, |
|
"eval_logps/rejected": -63.26302719116211, |
|
"eval_loss": 0.6919277906417847, |
|
"eval_rewards/accuracies": 0.5708643198013306, |
|
"eval_rewards/chosen": 0.0016934837913140655, |
|
"eval_rewards/margins": 0.00252249906770885, |
|
"eval_rewards/rejected": -0.0008290152181871235, |
|
"eval_runtime": 383.2519, |
|
"eval_samples_per_second": 11.23, |
|
"eval_steps_per_second": 1.404, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6271536871123363, |
|
"grad_norm": 1.234681248664856, |
|
"learning_rate": 4.3380761941215947e-08, |
|
"logits/chosen": -3.046011447906494, |
|
"logits/rejected": -3.0302977561950684, |
|
"logps/chosen": -54.25246047973633, |
|
"logps/rejected": -55.46947479248047, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -0.009562310762703419, |
|
"rewards/margins": 0.007873213849961758, |
|
"rewards/rejected": -0.017435524612665176, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6340454858718125, |
|
"grad_norm": 1.141934871673584, |
|
"learning_rate": 4.317554652076299e-08, |
|
"logits/chosen": -3.054769992828369, |
|
"logits/rejected": -3.0311903953552246, |
|
"logps/chosen": -54.04453659057617, |
|
"logps/rejected": -54.37770462036133, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.009277190081775188, |
|
"rewards/margins": 0.0076979040168225765, |
|
"rewards/rejected": -0.016975093632936478, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6409372846312887, |
|
"grad_norm": 1.236680269241333, |
|
"learning_rate": 4.2967699813184615e-08, |
|
"logits/chosen": -3.0500195026397705, |
|
"logits/rejected": -3.0328176021575928, |
|
"logps/chosen": -54.70762252807617, |
|
"logps/rejected": -57.55879592895508, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.008434431627392769, |
|
"rewards/margins": 0.009904151782393456, |
|
"rewards/rejected": -0.018338583409786224, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.647829083390765, |
|
"grad_norm": 1.2360023260116577, |
|
"learning_rate": 4.275725190860027e-08, |
|
"logits/chosen": -3.073611259460449, |
|
"logits/rejected": -3.0537660121917725, |
|
"logps/chosen": -55.351104736328125, |
|
"logps/rejected": -55.8747673034668, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.010648580268025398, |
|
"rewards/margins": 0.007464288733899593, |
|
"rewards/rejected": -0.018112869933247566, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6547208821502413, |
|
"grad_norm": 1.2623155117034912, |
|
"learning_rate": 4.2544233273706585e-08, |
|
"logits/chosen": -3.0598671436309814, |
|
"logits/rejected": -3.0294106006622314, |
|
"logps/chosen": -55.8059196472168, |
|
"logps/rejected": -53.73136520385742, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.6234375238418579, |
|
"rewards/chosen": -0.009804973378777504, |
|
"rewards/margins": 0.007985373958945274, |
|
"rewards/rejected": -0.01779034733772278, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6616126809097175, |
|
"grad_norm": 1.2945950031280518, |
|
"learning_rate": 4.232867474736669e-08, |
|
"logits/chosen": -3.0672502517700195, |
|
"logits/rejected": -3.0369277000427246, |
|
"logps/chosen": -56.809417724609375, |
|
"logps/rejected": -55.6953239440918, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.660937488079071, |
|
"rewards/chosen": -0.007269621826708317, |
|
"rewards/margins": 0.010824671015143394, |
|
"rewards/rejected": -0.018094293773174286, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6685044796691937, |
|
"grad_norm": 1.1434519290924072, |
|
"learning_rate": 4.211060753614565e-08, |
|
"logits/chosen": -3.1128265857696533, |
|
"logits/rejected": -3.0972368717193604, |
|
"logps/chosen": -56.41877365112305, |
|
"logps/rejected": -55.6785774230957, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.011154340580105782, |
|
"rewards/margins": 0.005970745347440243, |
|
"rewards/rejected": -0.0171250868588686, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.6753962784286699, |
|
"grad_norm": 1.1750149726867676, |
|
"learning_rate": 4.1890063209792674e-08, |
|
"logits/chosen": -3.1158690452575684, |
|
"logits/rejected": -3.079075336456299, |
|
"logps/chosen": -57.21317672729492, |
|
"logps/rejected": -53.88423538208008, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.6421874761581421, |
|
"rewards/chosen": -0.008817395195364952, |
|
"rewards/margins": 0.01136676874011755, |
|
"rewards/rejected": -0.020184166729450226, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.6822880771881461, |
|
"grad_norm": 1.3042854070663452, |
|
"learning_rate": 4.166707369667073e-08, |
|
"logits/chosen": -3.045738458633423, |
|
"logits/rejected": -3.0281968116760254, |
|
"logps/chosen": -54.19719696044922, |
|
"logps/rejected": -56.0754280090332, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -0.009636206552386284, |
|
"rewards/margins": 0.009063459932804108, |
|
"rewards/rejected": -0.01869966648519039, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.6891798759476223, |
|
"grad_norm": 1.22942054271698, |
|
"learning_rate": 4.144167127913426e-08, |
|
"logits/chosen": -3.075810194015503, |
|
"logits/rejected": -3.052361249923706, |
|
"logps/chosen": -55.58427810668945, |
|
"logps/rejected": -55.51273727416992, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -0.011259499937295914, |
|
"rewards/margins": 0.008291425183415413, |
|
"rewards/rejected": -0.019550926983356476, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6891798759476223, |
|
"eval_logits/chosen": -3.1417765617370605, |
|
"eval_logits/rejected": -3.136131525039673, |
|
"eval_logps/chosen": -58.606346130371094, |
|
"eval_logps/rejected": -63.375797271728516, |
|
"eval_loss": 0.6916959881782532, |
|
"eval_rewards/accuracies": 0.5724906921386719, |
|
"eval_rewards/chosen": 0.001055453554727137, |
|
"eval_rewards/margins": 0.0030122159514576197, |
|
"eval_rewards/rejected": -0.0019567625131458044, |
|
"eval_runtime": 382.8427, |
|
"eval_samples_per_second": 11.242, |
|
"eval_steps_per_second": 1.405, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6960716747070985, |
|
"grad_norm": 1.228550910949707, |
|
"learning_rate": 4.1213888588855636e-08, |
|
"logits/chosen": -3.0645551681518555, |
|
"logits/rejected": -3.0503764152526855, |
|
"logps/chosen": -54.256507873535156, |
|
"logps/rejected": -55.95310592651367, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.6234375238418579, |
|
"rewards/chosen": -0.011437224224209785, |
|
"rewards/margins": 0.008133414201438427, |
|
"rewards/rejected": -0.019570637494325638, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7029634734665747, |
|
"grad_norm": 1.290880560874939, |
|
"learning_rate": 4.098375860210107e-08, |
|
"logits/chosen": -3.0364532470703125, |
|
"logits/rejected": -3.0164756774902344, |
|
"logps/chosen": -54.49522018432617, |
|
"logps/rejected": -55.22959518432617, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.598437488079071, |
|
"rewards/chosen": -0.013310156762599945, |
|
"rewards/margins": 0.0093264514580369, |
|
"rewards/rejected": -0.02263660728931427, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.709855272226051, |
|
"grad_norm": 1.234087586402893, |
|
"learning_rate": 4.075131463495657e-08, |
|
"logits/chosen": -3.0410397052764893, |
|
"logits/rejected": -3.023860454559326, |
|
"logps/chosen": -54.86391067504883, |
|
"logps/rejected": -54.73369598388672, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.604687511920929, |
|
"rewards/chosen": -0.012769045308232307, |
|
"rewards/margins": 0.009414998814463615, |
|
"rewards/rejected": -0.022184044122695923, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7167470709855273, |
|
"grad_norm": 1.1497515439987183, |
|
"learning_rate": 4.051659033850477e-08, |
|
"logits/chosen": -3.0711050033569336, |
|
"logits/rejected": -3.0434327125549316, |
|
"logps/chosen": -55.960113525390625, |
|
"logps/rejected": -53.39757537841797, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.01206748653203249, |
|
"rewards/margins": 0.011478706263005733, |
|
"rewards/rejected": -0.023546192795038223, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7236388697450035, |
|
"grad_norm": 1.241176724433899, |
|
"learning_rate": 4.0279619693953283e-08, |
|
"logits/chosen": -3.0579118728637695, |
|
"logits/rejected": -3.044525623321533, |
|
"logps/chosen": -54.537757873535156, |
|
"logps/rejected": -55.7606201171875, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.5953124761581421, |
|
"rewards/chosen": -0.01158697810024023, |
|
"rewards/margins": 0.008560305461287498, |
|
"rewards/rejected": -0.020147282630205154, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7305306685044797, |
|
"grad_norm": 1.287839651107788, |
|
"learning_rate": 4.0040437007715124e-08, |
|
"logits/chosen": -3.0260822772979736, |
|
"logits/rejected": -3.0041518211364746, |
|
"logps/chosen": -55.73114013671875, |
|
"logps/rejected": -56.4024772644043, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.6234375238418579, |
|
"rewards/chosen": -0.013397350907325745, |
|
"rewards/margins": 0.01179309468716383, |
|
"rewards/rejected": -0.02519044652581215, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.7374224672639559, |
|
"grad_norm": 1.1840453147888184, |
|
"learning_rate": 3.979907690644222e-08, |
|
"logits/chosen": -3.005467653274536, |
|
"logits/rejected": -2.9843525886535645, |
|
"logps/chosen": -54.47725296020508, |
|
"logps/rejected": -54.86272048950195, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -0.012515179812908173, |
|
"rewards/margins": 0.010800262913107872, |
|
"rewards/rejected": -0.023315440863370895, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7443142660234321, |
|
"grad_norm": 1.2041012048721313, |
|
"learning_rate": 3.9555574332012454e-08, |
|
"logits/chosen": -3.0442147254943848, |
|
"logits/rejected": -3.0234692096710205, |
|
"logps/chosen": -56.4234619140625, |
|
"logps/rejected": -55.07111740112305, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -0.013009254820644855, |
|
"rewards/margins": 0.01034192182123661, |
|
"rewards/rejected": -0.02335117571055889, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7512060647829083, |
|
"grad_norm": 1.192734956741333, |
|
"learning_rate": 3.930996453647113e-08, |
|
"logits/chosen": -3.008514881134033, |
|
"logits/rejected": -2.986760139465332, |
|
"logps/chosen": -53.92486572265625, |
|
"logps/rejected": -53.8699951171875, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.016327153891324997, |
|
"rewards/margins": 0.009530487470328808, |
|
"rewards/rejected": -0.02585764229297638, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7580978635423845, |
|
"grad_norm": 1.1945998668670654, |
|
"learning_rate": 3.906228307692747e-08, |
|
"logits/chosen": -3.050058126449585, |
|
"logits/rejected": -3.0325589179992676, |
|
"logps/chosen": -56.26338577270508, |
|
"logps/rejected": -56.20615768432617, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.598437488079071, |
|
"rewards/chosen": -0.014123663306236267, |
|
"rewards/margins": 0.0081967543810606, |
|
"rewards/rejected": -0.022320415824651718, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7580978635423845, |
|
"eval_logits/chosen": -3.136918067932129, |
|
"eval_logits/rejected": -3.131256103515625, |
|
"eval_logps/chosen": -58.693904876708984, |
|
"eval_logps/rejected": -63.52504348754883, |
|
"eval_loss": 0.6914047598838806, |
|
"eval_rewards/accuracies": 0.5808550119400024, |
|
"eval_rewards/chosen": 0.00017988457693718374, |
|
"eval_rewards/margins": 0.003629034385085106, |
|
"eval_rewards/rejected": -0.0034491494297981262, |
|
"eval_runtime": 382.7678, |
|
"eval_samples_per_second": 11.244, |
|
"eval_steps_per_second": 1.406, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7649896623018608, |
|
"grad_norm": 1.2611422538757324, |
|
"learning_rate": 3.8812565810407006e-08, |
|
"logits/chosen": -3.0509583950042725, |
|
"logits/rejected": -3.019794225692749, |
|
"logps/chosen": -57.166297912597656, |
|
"logps/rejected": -55.56831741333008, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.614062488079071, |
|
"rewards/chosen": -0.014432080090045929, |
|
"rewards/margins": 0.01164001040160656, |
|
"rewards/rejected": -0.02607208862900734, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.771881461061337, |
|
"grad_norm": 1.1777701377868652, |
|
"learning_rate": 3.856084888866052e-08, |
|
"logits/chosen": -3.0596282482147217, |
|
"logits/rejected": -3.045269250869751, |
|
"logps/chosen": -55.52899932861328, |
|
"logps/rejected": -54.93024444580078, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -0.01683993637561798, |
|
"rewards/margins": 0.009132475592195988, |
|
"rewards/rejected": -0.025972411036491394, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.7787732598208132, |
|
"grad_norm": 1.2296311855316162, |
|
"learning_rate": 3.830716875293038e-08, |
|
"logits/chosen": -3.0673999786376953, |
|
"logits/rejected": -3.0444142818450928, |
|
"logps/chosen": -54.970741271972656, |
|
"logps/rejected": -54.68275833129883, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.015940912067890167, |
|
"rewards/margins": 0.009639047086238861, |
|
"rewards/rejected": -0.02557995915412903, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.7856650585802895, |
|
"grad_norm": 1.1905580759048462, |
|
"learning_rate": 3.805156212867483e-08, |
|
"logits/chosen": -3.029092788696289, |
|
"logits/rejected": -3.0086588859558105, |
|
"logps/chosen": -56.229042053222656, |
|
"logps/rejected": -55.34952926635742, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.6421874761581421, |
|
"rewards/chosen": -0.01458609290421009, |
|
"rewards/margins": 0.011665640398859978, |
|
"rewards/rejected": -0.02625173330307007, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.7925568573397657, |
|
"grad_norm": 1.1570724248886108, |
|
"learning_rate": 3.779406602025128e-08, |
|
"logits/chosen": -3.007833957672119, |
|
"logits/rejected": -2.9827017784118652, |
|
"logps/chosen": -55.04015350341797, |
|
"logps/rejected": -55.21650314331055, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -0.01640317775309086, |
|
"rewards/margins": 0.010581018403172493, |
|
"rewards/rejected": -0.02698419615626335, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.7994486560992419, |
|
"grad_norm": 1.211165189743042, |
|
"learning_rate": 3.7534717705559146e-08, |
|
"logits/chosen": -3.036921977996826, |
|
"logits/rejected": -3.0160536766052246, |
|
"logps/chosen": -56.46533203125, |
|
"logps/rejected": -57.42781448364258, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.017415925860404968, |
|
"rewards/margins": 0.01034791674464941, |
|
"rewards/rejected": -0.027763843536376953, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8063404548587181, |
|
"grad_norm": 1.1748243570327759, |
|
"learning_rate": 3.727355473064308e-08, |
|
"logits/chosen": -3.05203914642334, |
|
"logits/rejected": -3.024839162826538, |
|
"logps/chosen": -54.88653564453125, |
|
"logps/rejected": -54.187705993652344, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.6234375238418579, |
|
"rewards/chosen": -0.015425342135131359, |
|
"rewards/margins": 0.011839036829769611, |
|
"rewards/rejected": -0.02726438082754612, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8132322536181944, |
|
"grad_norm": 1.2590429782867432, |
|
"learning_rate": 3.701061490425745e-08, |
|
"logits/chosen": -3.053898334503174, |
|
"logits/rejected": -3.0290002822875977, |
|
"logps/chosen": -57.20033645629883, |
|
"logps/rejected": -56.57124710083008, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.604687511920929, |
|
"rewards/chosen": -0.016385816037654877, |
|
"rewards/margins": 0.012309985235333443, |
|
"rewards/rejected": -0.02869580127298832, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8201240523776706, |
|
"grad_norm": 1.2485055923461914, |
|
"learning_rate": 3.6745936292392666e-08, |
|
"logits/chosen": -3.021477460861206, |
|
"logits/rejected": -3.0019021034240723, |
|
"logps/chosen": -55.60076141357422, |
|
"logps/rejected": -55.449058532714844, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -0.01587783917784691, |
|
"rewards/margins": 0.010499360039830208, |
|
"rewards/rejected": -0.026377201080322266, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8270158511371468, |
|
"grad_norm": 1.2800626754760742, |
|
"learning_rate": 3.6479557212764414e-08, |
|
"logits/chosen": -3.028402090072632, |
|
"logits/rejected": -3.008002519607544, |
|
"logps/chosen": -56.96452713012695, |
|
"logps/rejected": -55.789756774902344, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -0.01752752624452114, |
|
"rewards/margins": 0.009723445400595665, |
|
"rewards/rejected": -0.027250971645116806, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8270158511371468, |
|
"eval_logits/chosen": -3.133814811706543, |
|
"eval_logits/rejected": -3.128159284591675, |
|
"eval_logps/chosen": -58.78531265258789, |
|
"eval_logps/rejected": -63.680179595947266, |
|
"eval_loss": 0.6911039352416992, |
|
"eval_rewards/accuracies": 0.5755111575126648, |
|
"eval_rewards/chosen": -0.0007341906311921775, |
|
"eval_rewards/margins": 0.004266415257006884, |
|
"eval_rewards/rejected": -0.005000605713576078, |
|
"eval_runtime": 383.3119, |
|
"eval_samples_per_second": 11.228, |
|
"eval_steps_per_second": 1.404, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.833907649896623, |
|
"grad_norm": 1.3170323371887207, |
|
"learning_rate": 3.621151622926631e-08, |
|
"logits/chosen": -3.022981643676758, |
|
"logits/rejected": -2.9983408451080322, |
|
"logps/chosen": -56.321983337402344, |
|
"logps/rejected": -55.284454345703125, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.017917579039931297, |
|
"rewards/margins": 0.011845814064145088, |
|
"rewards/rejected": -0.029763391241431236, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8407994486560992, |
|
"grad_norm": 1.170351505279541, |
|
"learning_rate": 3.594185214638704e-08, |
|
"logits/chosen": -3.066943407058716, |
|
"logits/rejected": -3.0385823249816895, |
|
"logps/chosen": -57.5960807800293, |
|
"logps/rejected": -54.60730743408203, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.0178567823022604, |
|
"rewards/margins": 0.012462841346859932, |
|
"rewards/rejected": -0.03031962178647518, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8476912474155754, |
|
"grad_norm": 1.224771499633789, |
|
"learning_rate": 3.567060400359253e-08, |
|
"logits/chosen": -3.0506491661071777, |
|
"logits/rejected": -3.0242903232574463, |
|
"logps/chosen": -56.49556350708008, |
|
"logps/rejected": -55.71235275268555, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.635937511920929, |
|
"rewards/chosen": -0.017950473353266716, |
|
"rewards/margins": 0.014979615807533264, |
|
"rewards/rejected": -0.03293009102344513, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8545830461750517, |
|
"grad_norm": 1.2280082702636719, |
|
"learning_rate": 3.5397811069674256e-08, |
|
"logits/chosen": -3.037538528442383, |
|
"logits/rejected": -3.023832082748413, |
|
"logps/chosen": -56.15264129638672, |
|
"logps/rejected": -58.523162841796875, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -0.02116158790886402, |
|
"rewards/margins": 0.0096513070166111, |
|
"rewards/rejected": -0.03081289492547512, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8614748449345279, |
|
"grad_norm": 1.3131028413772583, |
|
"learning_rate": 3.512351283706419e-08, |
|
"logits/chosen": -3.0145888328552246, |
|
"logits/rejected": -3.0035436153411865, |
|
"logps/chosen": -55.00154495239258, |
|
"logps/rejected": -56.4818229675293, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -0.02046709507703781, |
|
"rewards/margins": 0.008517416194081306, |
|
"rewards/rejected": -0.02898450754582882, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8683666436940042, |
|
"grad_norm": 1.2719794511795044, |
|
"learning_rate": 3.484774901611753e-08, |
|
"logits/chosen": -3.037191390991211, |
|
"logits/rejected": -3.011030673980713, |
|
"logps/chosen": -56.1077880859375, |
|
"logps/rejected": -55.119110107421875, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.020265808328986168, |
|
"rewards/margins": 0.012135522440075874, |
|
"rewards/rejected": -0.03240133076906204, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.8752584424534804, |
|
"grad_norm": 1.223455786705017, |
|
"learning_rate": 3.4570559529363756e-08, |
|
"logits/chosen": -3.0510623455047607, |
|
"logits/rejected": -3.0273430347442627, |
|
"logps/chosen": -56.024391174316406, |
|
"logps/rejected": -54.66645431518555, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.01942160725593567, |
|
"rewards/margins": 0.012839846312999725, |
|
"rewards/rejected": -0.032261453568935394, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.8821502412129566, |
|
"grad_norm": 1.224640965461731, |
|
"learning_rate": 3.429198450572702e-08, |
|
"logits/chosen": -3.045257568359375, |
|
"logits/rejected": -3.0113143920898438, |
|
"logps/chosen": -57.24846267700195, |
|
"logps/rejected": -55.42502975463867, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.635937511920929, |
|
"rewards/chosen": -0.019725024700164795, |
|
"rewards/margins": 0.014653083868324757, |
|
"rewards/rejected": -0.03437810391187668, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.8890420399724328, |
|
"grad_norm": 1.1708803176879883, |
|
"learning_rate": 3.401206427471665e-08, |
|
"logits/chosen": -3.083014965057373, |
|
"logits/rejected": -3.053872585296631, |
|
"logps/chosen": -55.94194412231445, |
|
"logps/rejected": -54.92655563354492, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.6234375238418579, |
|
"rewards/chosen": -0.019591879099607468, |
|
"rewards/margins": 0.015089405700564384, |
|
"rewards/rejected": -0.034681286662817, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.895933838731909, |
|
"grad_norm": 1.2637726068496704, |
|
"learning_rate": 3.3730839360588633e-08, |
|
"logits/chosen": -3.0728800296783447, |
|
"logits/rejected": -3.0561113357543945, |
|
"logps/chosen": -55.9746208190918, |
|
"logps/rejected": -56.79145431518555, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.6078125238418579, |
|
"rewards/chosen": -0.020700206980109215, |
|
"rewards/margins": 0.011441068723797798, |
|
"rewards/rejected": -0.03214127570390701, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.895933838731909, |
|
"eval_logits/chosen": -3.12795090675354, |
|
"eval_logits/rejected": -3.1222612857818604, |
|
"eval_logps/chosen": -58.95671081542969, |
|
"eval_logps/rejected": -63.90719223022461, |
|
"eval_loss": 0.690849244594574, |
|
"eval_rewards/accuracies": 0.578066885471344, |
|
"eval_rewards/chosen": -0.002448198851197958, |
|
"eval_rewards/margins": 0.004822447896003723, |
|
"eval_rewards/rejected": -0.007270646747201681, |
|
"eval_runtime": 383.2075, |
|
"eval_samples_per_second": 11.232, |
|
"eval_steps_per_second": 1.404, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9028256374913852, |
|
"grad_norm": 1.2189267873764038, |
|
"learning_rate": 3.344835047647892e-08, |
|
"logits/chosen": -3.038391351699829, |
|
"logits/rejected": -3.0231399536132812, |
|
"logps/chosen": -55.052391052246094, |
|
"logps/rejected": -57.159202575683594, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -0.01980864442884922, |
|
"rewards/margins": 0.013179932720959187, |
|
"rewards/rejected": -0.03298857808113098, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9097174362508614, |
|
"grad_norm": 1.2375820875167847, |
|
"learning_rate": 3.316463851850925e-08, |
|
"logits/chosen": -3.0543713569641113, |
|
"logits/rejected": -3.0287716388702393, |
|
"logps/chosen": -55.84870147705078, |
|
"logps/rejected": -54.72203826904297, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.620312511920929, |
|
"rewards/chosen": -0.024270061403512955, |
|
"rewards/margins": 0.012434338219463825, |
|
"rewards/rejected": -0.036704398691654205, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9166092350103378, |
|
"grad_norm": 1.2331100702285767, |
|
"learning_rate": 3.287974455986671e-08, |
|
"logits/chosen": -3.0482242107391357, |
|
"logits/rejected": -3.0168027877807617, |
|
"logps/chosen": -58.51416015625, |
|
"logps/rejected": -55.834266662597656, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.6578124761581421, |
|
"rewards/chosen": -0.018537839874625206, |
|
"rewards/margins": 0.015199096873402596, |
|
"rewards/rejected": -0.0337369367480278, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.923501033769814, |
|
"grad_norm": 1.2201625108718872, |
|
"learning_rate": 3.259370984485746e-08, |
|
"logits/chosen": -3.0217771530151367, |
|
"logits/rejected": -2.998465061187744, |
|
"logps/chosen": -55.553428649902344, |
|
"logps/rejected": -56.95924758911133, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.022853773087263107, |
|
"rewards/margins": 0.013605187647044659, |
|
"rewards/rejected": -0.03645896166563034, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9303928325292902, |
|
"grad_norm": 1.2265971899032593, |
|
"learning_rate": 3.2306575782935806e-08, |
|
"logits/chosen": -3.043489456176758, |
|
"logits/rejected": -3.013939380645752, |
|
"logps/chosen": -56.77729415893555, |
|
"logps/rejected": -56.281822204589844, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.635937511920929, |
|
"rewards/chosen": -0.020281706005334854, |
|
"rewards/margins": 0.016187874600291252, |
|
"rewards/rejected": -0.036469582468271255, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9372846312887664, |
|
"grad_norm": 1.245123267173767, |
|
"learning_rate": 3.201838394270931e-08, |
|
"logits/chosen": -3.064115524291992, |
|
"logits/rejected": -3.0484519004821777, |
|
"logps/chosen": -57.521820068359375, |
|
"logps/rejected": -57.416893005371094, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.6109374761581421, |
|
"rewards/chosen": -0.024825390428304672, |
|
"rewards/margins": 0.012256421148777008, |
|
"rewards/rejected": -0.03708181157708168, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9441764300482426, |
|
"grad_norm": 1.2887479066848755, |
|
"learning_rate": 3.172917604592084e-08, |
|
"logits/chosen": -3.0228118896484375, |
|
"logits/rejected": -3.0045721530914307, |
|
"logps/chosen": -55.98120880126953, |
|
"logps/rejected": -55.73798751831055, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.598437488079071, |
|
"rewards/chosen": -0.025501202791929245, |
|
"rewards/margins": 0.012837904505431652, |
|
"rewards/rejected": -0.03833910822868347, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9510682288077188, |
|
"grad_norm": 1.2927711009979248, |
|
"learning_rate": 3.143899396140856e-08, |
|
"logits/chosen": -3.03184175491333, |
|
"logits/rejected": -3.010230779647827, |
|
"logps/chosen": -56.847740173339844, |
|
"logps/rejected": -56.31840133666992, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.022407762706279755, |
|
"rewards/margins": 0.01289152167737484, |
|
"rewards/rejected": -0.035299282521009445, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.957960027567195, |
|
"grad_norm": 1.2551859617233276, |
|
"learning_rate": 3.114787969904446e-08, |
|
"logits/chosen": -3.001943826675415, |
|
"logits/rejected": -2.9838249683380127, |
|
"logps/chosen": -56.46649169921875, |
|
"logps/rejected": -57.01629638671875, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.629687488079071, |
|
"rewards/chosen": -0.022768724709749222, |
|
"rewards/margins": 0.014040583744645119, |
|
"rewards/rejected": -0.03680930659174919, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9648518263266712, |
|
"grad_norm": 1.1776050329208374, |
|
"learning_rate": 3.085587540365262e-08, |
|
"logits/chosen": -3.054063081741333, |
|
"logits/rejected": -3.0365357398986816, |
|
"logps/chosen": -55.7647819519043, |
|
"logps/rejected": -59.496559143066406, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.6078125238418579, |
|
"rewards/chosen": -0.024700012058019638, |
|
"rewards/margins": 0.01230792049318552, |
|
"rewards/rejected": -0.03700793534517288, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9648518263266712, |
|
"eval_logits/chosen": -3.1261656284332275, |
|
"eval_logits/rejected": -3.1204779148101807, |
|
"eval_logps/chosen": -59.10846710205078, |
|
"eval_logps/rejected": -64.10256958007812, |
|
"eval_loss": 0.6906503438949585, |
|
"eval_rewards/accuracies": 0.5771375298500061, |
|
"eval_rewards/chosen": -0.003965714480727911, |
|
"eval_rewards/margins": 0.0052587250247597694, |
|
"eval_rewards/rejected": -0.009224439039826393, |
|
"eval_runtime": 383.1495, |
|
"eval_samples_per_second": 11.233, |
|
"eval_steps_per_second": 1.404, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9717436250861475, |
|
"grad_norm": 1.347545862197876, |
|
"learning_rate": 3.056302334890786e-08, |
|
"logits/chosen": -3.0551466941833496, |
|
"logits/rejected": -3.0341668128967285, |
|
"logps/chosen": -56.55133056640625, |
|
"logps/rejected": -57.29961395263672, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.02232871949672699, |
|
"rewards/margins": 0.014838054776191711, |
|
"rewards/rejected": -0.037166766822338104, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.9786354238456237, |
|
"grad_norm": 1.2241698503494263, |
|
"learning_rate": 3.02693659312157e-08, |
|
"logits/chosen": -2.9941155910491943, |
|
"logits/rejected": -2.9760937690734863, |
|
"logps/chosen": -55.75859451293945, |
|
"logps/rejected": -55.846839904785156, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.598437488079071, |
|
"rewards/chosen": -0.02684764564037323, |
|
"rewards/margins": 0.012402022257447243, |
|
"rewards/rejected": -0.039249666035175323, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.9855272226051, |
|
"grad_norm": 1.3626172542572021, |
|
"learning_rate": 2.9974945663574684e-08, |
|
"logits/chosen": -3.026280403137207, |
|
"logits/rejected": -3.0005555152893066, |
|
"logps/chosen": -57.986793518066406, |
|
"logps/rejected": -56.02061080932617, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.645312488079071, |
|
"rewards/chosen": -0.02259395457804203, |
|
"rewards/margins": 0.019132796674966812, |
|
"rewards/rejected": -0.041726745665073395, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.9924190213645762, |
|
"grad_norm": 1.289384126663208, |
|
"learning_rate": 2.967980516942168e-08, |
|
"logits/chosen": -3.0657455921173096, |
|
"logits/rejected": -3.040670394897461, |
|
"logps/chosen": -58.47277069091797, |
|
"logps/rejected": -57.19081497192383, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.6234375238418579, |
|
"rewards/chosen": -0.024709826335310936, |
|
"rewards/margins": 0.016781199723482132, |
|
"rewards/rejected": -0.04149102419614792, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.9993108201240524, |
|
"grad_norm": 1.288656234741211, |
|
"learning_rate": 2.9383987176461268e-08, |
|
"logits/chosen": -2.991293430328369, |
|
"logits/rejected": -2.9657962322235107, |
|
"logps/chosen": -54.06468963623047, |
|
"logps/rejected": -57.25890350341797, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.024288879707455635, |
|
"rewards/margins": 0.012961057014763355, |
|
"rewards/rejected": -0.037249937653541565, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.0062026188835287, |
|
"grad_norm": 1.3280855417251587, |
|
"learning_rate": 2.9087534510480032e-08, |
|
"logits/chosen": -3.045292377471924, |
|
"logits/rejected": -3.0192904472351074, |
|
"logps/chosen": -55.768096923828125, |
|
"logps/rejected": -56.869842529296875, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -0.02522682026028633, |
|
"rewards/margins": 0.017563799396157265, |
|
"rewards/rejected": -0.04279061779379845, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.0130944176430048, |
|
"grad_norm": 1.2552244663238525, |
|
"learning_rate": 2.879049008914664e-08, |
|
"logits/chosen": -3.015263080596924, |
|
"logits/rejected": -2.999004602432251, |
|
"logps/chosen": -55.86402130126953, |
|
"logps/rejected": -58.1766471862793, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.645312488079071, |
|
"rewards/chosen": -0.024612870067358017, |
|
"rewards/margins": 0.01742735505104065, |
|
"rewards/rejected": -0.04204022139310837, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.019986216402481, |
|
"grad_norm": 1.2557737827301025, |
|
"learning_rate": 2.8492896915798605e-08, |
|
"logits/chosen": -3.021721601486206, |
|
"logits/rejected": -3.0094776153564453, |
|
"logps/chosen": -56.1937141418457, |
|
"logps/rejected": -59.952537536621094, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -0.024868253618478775, |
|
"rewards/margins": 0.014400708489120007, |
|
"rewards/rejected": -0.039268963038921356, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.0268780151619572, |
|
"grad_norm": 1.3632835149765015, |
|
"learning_rate": 2.8194798073216665e-08, |
|
"logits/chosen": -2.9897549152374268, |
|
"logits/rejected": -2.9639222621917725, |
|
"logps/chosen": -56.546180725097656, |
|
"logps/rejected": -56.685096740722656, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.635937511920929, |
|
"rewards/chosen": -0.024227097630500793, |
|
"rewards/margins": 0.017609497532248497, |
|
"rewards/rejected": -0.04183660075068474, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.0337698139214335, |
|
"grad_norm": 1.359270691871643, |
|
"learning_rate": 2.7896236717387662e-08, |
|
"logits/chosen": -2.9973204135894775, |
|
"logits/rejected": -2.979785680770874, |
|
"logps/chosen": -55.88494110107422, |
|
"logps/rejected": -56.941490173339844, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.614062488079071, |
|
"rewards/chosen": -0.029738834127783775, |
|
"rewards/margins": 0.012930555269122124, |
|
"rewards/rejected": -0.0426693893969059, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0337698139214335, |
|
"eval_logits/chosen": -3.121001720428467, |
|
"eval_logits/rejected": -3.1152734756469727, |
|
"eval_logps/chosen": -59.26029968261719, |
|
"eval_logps/rejected": -64.31062316894531, |
|
"eval_loss": 0.6903930306434631, |
|
"eval_rewards/accuracies": 0.5824813842773438, |
|
"eval_rewards/chosen": -0.005484058987349272, |
|
"eval_rewards/margins": 0.005820916499942541, |
|
"eval_rewards/rejected": -0.011304975487291813, |
|
"eval_runtime": 383.1818, |
|
"eval_samples_per_second": 11.232, |
|
"eval_steps_per_second": 1.404, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0406616126809096, |
|
"grad_norm": 1.1823673248291016, |
|
"learning_rate": 2.7597256071256836e-08, |
|
"logits/chosen": -3.0232627391815186, |
|
"logits/rejected": -3.002992630004883, |
|
"logps/chosen": -55.69109344482422, |
|
"logps/rejected": -55.20794677734375, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.6078125238418579, |
|
"rewards/chosen": -0.029489045962691307, |
|
"rewards/margins": 0.014345052652060986, |
|
"rewards/rejected": -0.04383409768342972, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.047553411440386, |
|
"grad_norm": 1.2642569541931152, |
|
"learning_rate": 2.7297899418470372e-08, |
|
"logits/chosen": -2.9904372692108154, |
|
"logits/rejected": -2.9669933319091797, |
|
"logps/chosen": -59.17595672607422, |
|
"logps/rejected": -57.17033767700195, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.651562511920929, |
|
"rewards/chosen": -0.027122503146529198, |
|
"rewards/margins": 0.017171606421470642, |
|
"rewards/rejected": -0.04429411143064499, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.0544452101998623, |
|
"grad_norm": 1.3126106262207031, |
|
"learning_rate": 2.6998210097109213e-08, |
|
"logits/chosen": -3.062737226486206, |
|
"logits/rejected": -3.041637659072876, |
|
"logps/chosen": -55.9976806640625, |
|
"logps/rejected": -57.3626823425293, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.6546875238418579, |
|
"rewards/chosen": -0.0244468804448843, |
|
"rewards/margins": 0.018709514290094376, |
|
"rewards/rejected": -0.043156400322914124, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.0613370089593384, |
|
"grad_norm": 1.1926969289779663, |
|
"learning_rate": 2.669823149341498e-08, |
|
"logits/chosen": -3.0017895698547363, |
|
"logits/rejected": -2.9859423637390137, |
|
"logps/chosen": -55.688560485839844, |
|
"logps/rejected": -56.54026412963867, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.620312511920929, |
|
"rewards/chosen": -0.02824980393052101, |
|
"rewards/margins": 0.01474563218653202, |
|
"rewards/rejected": -0.04299543425440788, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.0682288077188147, |
|
"grad_norm": 1.2355592250823975, |
|
"learning_rate": 2.6398007035508906e-08, |
|
"logits/chosen": -3.020545482635498, |
|
"logits/rejected": -2.9991073608398438, |
|
"logps/chosen": -60.19884490966797, |
|
"logps/rejected": -58.834068298339844, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.02622285485267639, |
|
"rewards/margins": 0.01788989268243313, |
|
"rewards/rejected": -0.04411274939775467, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.0751206064782908, |
|
"grad_norm": 1.2842044830322266, |
|
"learning_rate": 2.609758018710473e-08, |
|
"logits/chosen": -3.0513670444488525, |
|
"logits/rejected": -3.0258631706237793, |
|
"logps/chosen": -58.162193298339844, |
|
"logps/rejected": -58.37177276611328, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.026162385940551758, |
|
"rewards/margins": 0.01831069216132164, |
|
"rewards/rejected": -0.0444730743765831, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.082012405237767, |
|
"grad_norm": 1.2734873294830322, |
|
"learning_rate": 2.5796994441216392e-08, |
|
"logits/chosen": -3.025871753692627, |
|
"logits/rejected": -3.008380174636841, |
|
"logps/chosen": -57.0385856628418, |
|
"logps/rejected": -57.39351272583008, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.027973037213087082, |
|
"rewards/margins": 0.017069904133677483, |
|
"rewards/rejected": -0.045042943209409714, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.0889042039972432, |
|
"grad_norm": 1.2615596055984497, |
|
"learning_rate": 2.5496293313861533e-08, |
|
"logits/chosen": -3.0057692527770996, |
|
"logits/rejected": -2.9775753021240234, |
|
"logps/chosen": -55.155418395996094, |
|
"logps/rejected": -56.9516716003418, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.02907036617398262, |
|
"rewards/margins": 0.01720438338816166, |
|
"rewards/rejected": -0.04627475142478943, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.0957960027567195, |
|
"grad_norm": 1.260473370552063, |
|
"learning_rate": 2.519552033776168e-08, |
|
"logits/chosen": -2.992969512939453, |
|
"logits/rejected": -2.9799506664276123, |
|
"logps/chosen": -57.432411193847656, |
|
"logps/rejected": -58.8470458984375, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -0.029672112315893173, |
|
"rewards/margins": 0.015414416790008545, |
|
"rewards/rejected": -0.04508653283119202, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.1026878015161956, |
|
"grad_norm": 1.3105090856552124, |
|
"learning_rate": 2.4894719056039933e-08, |
|
"logits/chosen": -3.059690237045288, |
|
"logits/rejected": -3.0416653156280518, |
|
"logps/chosen": -57.756431579589844, |
|
"logps/rejected": -58.930335998535156, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -0.02842426858842373, |
|
"rewards/margins": 0.014618475921452045, |
|
"rewards/rejected": -0.04304274171590805, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.1026878015161956, |
|
"eval_logits/chosen": -3.117943286895752, |
|
"eval_logits/rejected": -3.112224817276001, |
|
"eval_logps/chosen": -59.45923614501953, |
|
"eval_logps/rejected": -64.5576171875, |
|
"eval_loss": 0.6901748180389404, |
|
"eval_rewards/accuracies": 0.5887546539306641, |
|
"eval_rewards/chosen": -0.007473426405340433, |
|
"eval_rewards/margins": 0.00630148034542799, |
|
"eval_rewards/rejected": -0.013774906285107136, |
|
"eval_runtime": 383.3589, |
|
"eval_samples_per_second": 11.227, |
|
"eval_steps_per_second": 1.403, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.109579600275672, |
|
"grad_norm": 1.2811577320098877, |
|
"learning_rate": 2.459393301591723e-08, |
|
"logits/chosen": -3.0472471714019775, |
|
"logits/rejected": -3.0280234813690186, |
|
"logps/chosen": -56.1804313659668, |
|
"logps/rejected": -56.194740295410156, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.029790574684739113, |
|
"rewards/margins": 0.014425704255700111, |
|
"rewards/rejected": -0.044216278940439224, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.1164713990351482, |
|
"grad_norm": 1.2719690799713135, |
|
"learning_rate": 2.429320576240796e-08, |
|
"logits/chosen": -2.983424186706543, |
|
"logits/rejected": -2.960758686065674, |
|
"logps/chosen": -57.0593147277832, |
|
"logps/rejected": -57.68733596801758, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.6703125238418579, |
|
"rewards/chosen": -0.0267618540674448, |
|
"rewards/margins": 0.01918993890285492, |
|
"rewards/rejected": -0.04595179110765457, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.1233631977946243, |
|
"grad_norm": 1.274936556816101, |
|
"learning_rate": 2.3992580832015937e-08, |
|
"logits/chosen": -3.0748069286346436, |
|
"logits/rejected": -3.0516257286071777, |
|
"logps/chosen": -57.92633056640625, |
|
"logps/rejected": -57.392669677734375, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.028706436976790428, |
|
"rewards/margins": 0.016950782388448715, |
|
"rewards/rejected": -0.04565722122788429, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.1302549965541007, |
|
"grad_norm": 1.3350669145584106, |
|
"learning_rate": 2.3692101746431582e-08, |
|
"logits/chosen": -2.999372720718384, |
|
"logits/rejected": -2.9686479568481445, |
|
"logps/chosen": -57.523155212402344, |
|
"logps/rejected": -57.58971405029297, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.645312488079071, |
|
"rewards/chosen": -0.0288604237139225, |
|
"rewards/margins": 0.018762212246656418, |
|
"rewards/rejected": -0.04762263223528862, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.1371467953135768, |
|
"grad_norm": 1.1888097524642944, |
|
"learning_rate": 2.3391812006231252e-08, |
|
"logits/chosen": -3.051567792892456, |
|
"logits/rejected": -3.024486780166626, |
|
"logps/chosen": -57.35553741455078, |
|
"logps/rejected": -57.0074462890625, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.5921875238418579, |
|
"rewards/chosen": -0.030893787741661072, |
|
"rewards/margins": 0.01476077176630497, |
|
"rewards/rejected": -0.04565456137061119, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.144038594073053, |
|
"grad_norm": 1.1799283027648926, |
|
"learning_rate": 2.3091755084579655e-08, |
|
"logits/chosen": -3.032055377960205, |
|
"logits/rejected": -3.004883289337158, |
|
"logps/chosen": -56.0220832824707, |
|
"logps/rejected": -55.85259246826172, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -0.033284805715084076, |
|
"rewards/margins": 0.018482720479369164, |
|
"rewards/rejected": -0.05176752805709839, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.1509303928325294, |
|
"grad_norm": 1.3657642602920532, |
|
"learning_rate": 2.2791974420936168e-08, |
|
"logits/chosen": -3.0568909645080566, |
|
"logits/rejected": -3.0396854877471924, |
|
"logps/chosen": -55.1065673828125, |
|
"logps/rejected": -58.755226135253906, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.6390625238418579, |
|
"rewards/chosen": -0.03204982727766037, |
|
"rewards/margins": 0.018315287306904793, |
|
"rewards/rejected": -0.05036511272192001, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.1578221915920055, |
|
"grad_norm": 1.238609790802002, |
|
"learning_rate": 2.2492513414766092e-08, |
|
"logits/chosen": -3.015735626220703, |
|
"logits/rejected": -2.9980287551879883, |
|
"logps/chosen": -58.169593811035156, |
|
"logps/rejected": -59.08977127075195, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.6390625238418579, |
|
"rewards/chosen": -0.027626004070043564, |
|
"rewards/margins": 0.0211968831717968, |
|
"rewards/rejected": -0.04882288724184036, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.1647139903514818, |
|
"grad_norm": 1.2068655490875244, |
|
"learning_rate": 2.2193415419257697e-08, |
|
"logits/chosen": -3.023995876312256, |
|
"logits/rejected": -3.0071871280670166, |
|
"logps/chosen": -57.2905158996582, |
|
"logps/rejected": -58.23944854736328, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.5953124761581421, |
|
"rewards/chosen": -0.03379104658961296, |
|
"rewards/margins": 0.014122662134468555, |
|
"rewards/rejected": -0.04791371151804924, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.171605789110958, |
|
"grad_norm": 1.2340092658996582, |
|
"learning_rate": 2.1894723735045923e-08, |
|
"logits/chosen": -3.015665054321289, |
|
"logits/rejected": -2.995542526245117, |
|
"logps/chosen": -56.78801727294922, |
|
"logps/rejected": -58.15932083129883, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -0.034682370722293854, |
|
"rewards/margins": 0.01648074761033058, |
|
"rewards/rejected": -0.051163118332624435, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.171605789110958, |
|
"eval_logits/chosen": -3.11429500579834, |
|
"eval_logits/rejected": -3.1086244583129883, |
|
"eval_logps/chosen": -59.66813278198242, |
|
"eval_logps/rejected": -64.80902099609375, |
|
"eval_loss": 0.6899911165237427, |
|
"eval_rewards/accuracies": 0.586663544178009, |
|
"eval_rewards/chosen": -0.009562356397509575, |
|
"eval_rewards/margins": 0.006726610474288464, |
|
"eval_rewards/rejected": -0.016288965940475464, |
|
"eval_runtime": 383.0366, |
|
"eval_samples_per_second": 11.237, |
|
"eval_steps_per_second": 1.405, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.1784975878704342, |
|
"grad_norm": 1.361463189125061, |
|
"learning_rate": 2.159648160394373e-08, |
|
"logits/chosen": -3.077711582183838, |
|
"logits/rejected": -3.057525396347046, |
|
"logps/chosen": -58.2719612121582, |
|
"logps/rejected": -58.065284729003906, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.629687488079071, |
|
"rewards/chosen": -0.028816580772399902, |
|
"rewards/margins": 0.018897056579589844, |
|
"rewards/rejected": -0.047713637351989746, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.1853893866299103, |
|
"grad_norm": 1.315765619277954, |
|
"learning_rate": 2.1298732202681956e-08, |
|
"logits/chosen": -2.9981892108917236, |
|
"logits/rejected": -2.9703431129455566, |
|
"logps/chosen": -57.9752311706543, |
|
"logps/rejected": -57.5850944519043, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.6234375238418579, |
|
"rewards/chosen": -0.03318381309509277, |
|
"rewards/margins": 0.02043316699564457, |
|
"rewards/rejected": -0.053616978228092194, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.1922811853893867, |
|
"grad_norm": 1.2448103427886963, |
|
"learning_rate": 2.1001518636658567e-08, |
|
"logits/chosen": -3.0659806728363037, |
|
"logits/rejected": -3.0397450923919678, |
|
"logps/chosen": -57.8054313659668, |
|
"logps/rejected": -58.7362174987793, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.6390625238418579, |
|
"rewards/chosen": -0.030917003750801086, |
|
"rewards/margins": 0.019438743591308594, |
|
"rewards/rejected": -0.05035575106739998, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.1991729841488628, |
|
"grad_norm": 1.3165340423583984, |
|
"learning_rate": 2.0704883933698286e-08, |
|
"logits/chosen": -3.0220000743865967, |
|
"logits/rejected": -2.988614559173584, |
|
"logps/chosen": -59.221092224121094, |
|
"logps/rejected": -56.499412536621094, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.03028092160820961, |
|
"rewards/margins": 0.020808402448892593, |
|
"rewards/rejected": -0.0510893277823925, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.206064782908339, |
|
"grad_norm": 1.2597706317901611, |
|
"learning_rate": 2.0408871037823392e-08, |
|
"logits/chosen": -3.050657033920288, |
|
"logits/rejected": -3.0249600410461426, |
|
"logps/chosen": -58.73793411254883, |
|
"logps/rejected": -58.411109924316406, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -0.032266996800899506, |
|
"rewards/margins": 0.020968910306692123, |
|
"rewards/rejected": -0.05323590710759163, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.2129565816678154, |
|
"grad_norm": 1.3488494157791138, |
|
"learning_rate": 2.0113522803036697e-08, |
|
"logits/chosen": -3.0145840644836426, |
|
"logits/rejected": -2.9919447898864746, |
|
"logps/chosen": -57.56928253173828, |
|
"logps/rejected": -59.618064880371094, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.614062488079071, |
|
"rewards/chosen": -0.033350322395563126, |
|
"rewards/margins": 0.020749535411596298, |
|
"rewards/rejected": -0.054099857807159424, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.2198483804272915, |
|
"grad_norm": 1.2959116697311401, |
|
"learning_rate": 1.981888198711757e-08, |
|
"logits/chosen": -3.0438103675842285, |
|
"logits/rejected": -3.0201594829559326, |
|
"logps/chosen": -56.78424072265625, |
|
"logps/rejected": -58.80998992919922, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -0.03599938377737999, |
|
"rewards/margins": 0.018340986222028732, |
|
"rewards/rejected": -0.05434036999940872, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.2267401791867678, |
|
"grad_norm": 1.3248341083526611, |
|
"learning_rate": 1.952499124543181e-08, |
|
"logits/chosen": -3.050183057785034, |
|
"logits/rejected": -3.0215609073638916, |
|
"logps/chosen": -59.94126510620117, |
|
"logps/rejected": -58.557289123535156, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.614062488079071, |
|
"rewards/chosen": -0.03428710997104645, |
|
"rewards/margins": 0.019783692434430122, |
|
"rewards/rejected": -0.05407080054283142, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.233631977946244, |
|
"grad_norm": 1.2419155836105347, |
|
"learning_rate": 1.923189312475642e-08, |
|
"logits/chosen": -3.0126285552978516, |
|
"logits/rejected": -2.989089250564575, |
|
"logps/chosen": -57.96059036254883, |
|
"logps/rejected": -58.968994140625, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -0.035335466265678406, |
|
"rewards/margins": 0.01862800493836403, |
|
"rewards/rejected": -0.05396346375346184, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.2405237767057202, |
|
"grad_norm": 1.3300213813781738, |
|
"learning_rate": 1.8939630057120098e-08, |
|
"logits/chosen": -3.000619888305664, |
|
"logits/rejected": -2.9799740314483643, |
|
"logps/chosen": -58.02915573120117, |
|
"logps/rejected": -60.03089141845703, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.614062488079071, |
|
"rewards/chosen": -0.03870057314634323, |
|
"rewards/margins": 0.01641600951552391, |
|
"rewards/rejected": -0.05511658638715744, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.2405237767057202, |
|
"eval_logits/chosen": -3.11264705657959, |
|
"eval_logits/rejected": -3.1069631576538086, |
|
"eval_logps/chosen": -59.91139221191406, |
|
"eval_logps/rejected": -65.09736633300781, |
|
"eval_loss": 0.6897966265678406, |
|
"eval_rewards/accuracies": 0.582713782787323, |
|
"eval_rewards/chosen": -0.01199500635266304, |
|
"eval_rewards/margins": 0.007177378050982952, |
|
"eval_rewards/rejected": -0.019172383472323418, |
|
"eval_runtime": 383.0708, |
|
"eval_samples_per_second": 11.236, |
|
"eval_steps_per_second": 1.404, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.2474155754651963, |
|
"grad_norm": 1.2643280029296875, |
|
"learning_rate": 1.8648244353660288e-08, |
|
"logits/chosen": -3.0149238109588623, |
|
"logits/rejected": -2.9926235675811768, |
|
"logps/chosen": -59.53852081298828, |
|
"logps/rejected": -58.4305419921875, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -0.03731811046600342, |
|
"rewards/margins": 0.015873271971940994, |
|
"rewards/rejected": -0.05319138243794441, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.2543073742246726, |
|
"grad_norm": 1.353582501411438, |
|
"learning_rate": 1.835777819849779e-08, |
|
"logits/chosen": -3.084817886352539, |
|
"logits/rejected": -3.052018880844116, |
|
"logps/chosen": -58.27227783203125, |
|
"logps/rejected": -58.49510955810547, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.676562488079071, |
|
"rewards/chosen": -0.034875739365816116, |
|
"rewards/margins": 0.023144185543060303, |
|
"rewards/rejected": -0.05801992490887642, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.2611991729841487, |
|
"grad_norm": 1.3098019361495972, |
|
"learning_rate": 1.806827364262974e-08, |
|
"logits/chosen": -2.970393657684326, |
|
"logits/rejected": -2.9504239559173584, |
|
"logps/chosen": -57.546607971191406, |
|
"logps/rejected": -58.0165901184082, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0393044538795948, |
|
"rewards/margins": 0.013580908067524433, |
|
"rewards/rejected": -0.05288536101579666, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.268090971743625, |
|
"grad_norm": 1.2913509607315063, |
|
"learning_rate": 1.7779772597841818e-08, |
|
"logits/chosen": -3.0347402095794678, |
|
"logits/rejected": -3.008613109588623, |
|
"logps/chosen": -58.36812210083008, |
|
"logps/rejected": -58.02600860595703, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -0.03452888876199722, |
|
"rewards/margins": 0.022128187119960785, |
|
"rewards/rejected": -0.05665707588195801, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.2749827705031014, |
|
"grad_norm": 1.252109169960022, |
|
"learning_rate": 1.749231683064069e-08, |
|
"logits/chosen": -2.9613466262817383, |
|
"logits/rejected": -2.9358131885528564, |
|
"logps/chosen": -57.309776306152344, |
|
"logps/rejected": -57.78460693359375, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.635937511920929, |
|
"rewards/chosen": -0.03900003433227539, |
|
"rewards/margins": 0.017551228404045105, |
|
"rewards/rejected": -0.056551266461610794, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.2818745692625775, |
|
"grad_norm": 1.3581938743591309, |
|
"learning_rate": 1.7205947956207416e-08, |
|
"logits/chosen": -2.9560749530792236, |
|
"logits/rejected": -2.9271953105926514, |
|
"logps/chosen": -58.55373001098633, |
|
"logps/rejected": -58.74352264404297, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.6265624761581421, |
|
"rewards/chosen": -0.03720385953783989, |
|
"rewards/margins": 0.021904241293668747, |
|
"rewards/rejected": -0.059108100831508636, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.2887663680220538, |
|
"grad_norm": 1.377907395362854, |
|
"learning_rate": 1.69207074323728e-08, |
|
"logits/chosen": -3.007751703262329, |
|
"logits/rejected": -2.990427255630493, |
|
"logps/chosen": -57.58440017700195, |
|
"logps/rejected": -57.02080154418945, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.598437488079071, |
|
"rewards/chosen": -0.0374065637588501, |
|
"rewards/margins": 0.01753416657447815, |
|
"rewards/rejected": -0.05494073033332825, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.29565816678153, |
|
"grad_norm": 1.3684296607971191, |
|
"learning_rate": 1.6636636553615502e-08, |
|
"logits/chosen": -2.969104290008545, |
|
"logits/rejected": -2.9459190368652344, |
|
"logps/chosen": -57.26690673828125, |
|
"logps/rejected": -58.23255157470703, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -0.03711014613509178, |
|
"rewards/margins": 0.019704418256878853, |
|
"rewards/rejected": -0.056814562529325485, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.3025499655410062, |
|
"grad_norm": 1.2850284576416016, |
|
"learning_rate": 1.6353776445083815e-08, |
|
"logits/chosen": -3.0240912437438965, |
|
"logits/rejected": -3.0120110511779785, |
|
"logps/chosen": -55.608795166015625, |
|
"logps/rejected": -58.87140655517578, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.6234375238418579, |
|
"rewards/chosen": -0.03577902913093567, |
|
"rewards/margins": 0.019716601818799973, |
|
"rewards/rejected": -0.05549562722444534, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.3094417643004825, |
|
"grad_norm": 1.2016737461090088, |
|
"learning_rate": 1.6072168056641944e-08, |
|
"logits/chosen": -3.0512993335723877, |
|
"logits/rejected": -3.0232746601104736, |
|
"logps/chosen": -59.6502571105957, |
|
"logps/rejected": -57.75080490112305, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -0.03732553869485855, |
|
"rewards/margins": 0.02288132533431053, |
|
"rewards/rejected": -0.06020686775445938, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.3094417643004825, |
|
"eval_logits/chosen": -3.109053134918213, |
|
"eval_logits/rejected": -3.1033873558044434, |
|
"eval_logps/chosen": -60.10012435913086, |
|
"eval_logps/rejected": -65.30886840820312, |
|
"eval_loss": 0.6897038221359253, |
|
"eval_rewards/accuracies": 0.5824813842773438, |
|
"eval_rewards/chosen": -0.013882317580282688, |
|
"eval_rewards/margins": 0.007405092474073172, |
|
"eval_rewards/rejected": -0.02128741145133972, |
|
"eval_runtime": 383.365, |
|
"eval_samples_per_second": 11.227, |
|
"eval_steps_per_second": 1.403, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.3163335630599586, |
|
"grad_norm": 1.3704478740692139, |
|
"learning_rate": 1.5791852156941672e-08, |
|
"logits/chosen": -2.9737439155578613, |
|
"logits/rejected": -2.9562785625457764, |
|
"logps/chosen": -58.3253173828125, |
|
"logps/rejected": -58.146751403808594, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -0.03781484067440033, |
|
"rewards/margins": 0.017499810084700584, |
|
"rewards/rejected": -0.05531464144587517, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.323225361819435, |
|
"grad_norm": 1.304793119430542, |
|
"learning_rate": 1.5512869327520234e-08, |
|
"logits/chosen": -3.0345849990844727, |
|
"logits/rejected": -3.006624937057495, |
|
"logps/chosen": -59.780555725097656, |
|
"logps/rejected": -59.3910026550293, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.6390625238418579, |
|
"rewards/chosen": -0.038787275552749634, |
|
"rewards/margins": 0.022125843912363052, |
|
"rewards/rejected": -0.060913123190402985, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.330117160578911, |
|
"grad_norm": 1.281746506690979, |
|
"learning_rate": 1.52352599569253e-08, |
|
"logits/chosen": -3.0547759532928467, |
|
"logits/rejected": -3.0221850872039795, |
|
"logps/chosen": -57.975791931152344, |
|
"logps/rejected": -56.89446258544922, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.629687488079071, |
|
"rewards/chosen": -0.03873300552368164, |
|
"rewards/margins": 0.018973171710968018, |
|
"rewards/rejected": -0.05770616978406906, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.3370089593383874, |
|
"grad_norm": 1.3232277631759644, |
|
"learning_rate": 1.4959064234867925e-08, |
|
"logits/chosen": -2.9585065841674805, |
|
"logits/rejected": -2.936213970184326, |
|
"logps/chosen": -56.48392868041992, |
|
"logps/rejected": -58.73712158203125, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -0.03964737430214882, |
|
"rewards/margins": 0.022885087877511978, |
|
"rewards/rejected": -0.0625324696302414, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.3439007580978635, |
|
"grad_norm": 1.266557216644287, |
|
"learning_rate": 1.4684322146404215e-08, |
|
"logits/chosen": -3.035268783569336, |
|
"logits/rejected": -3.01952862739563, |
|
"logps/chosen": -56.704620361328125, |
|
"logps/rejected": -59.3856086730957, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -0.04403103142976761, |
|
"rewards/margins": 0.017256928607821465, |
|
"rewards/rejected": -0.061287958174943924, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.3507925568573398, |
|
"grad_norm": 1.2548446655273438, |
|
"learning_rate": 1.4411073466146698e-08, |
|
"logits/chosen": -3.0059549808502197, |
|
"logits/rejected": -2.9832520484924316, |
|
"logps/chosen": -58.505836486816406, |
|
"logps/rejected": -60.24690628051758, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.629687488079071, |
|
"rewards/chosen": -0.03818178176879883, |
|
"rewards/margins": 0.01996403932571411, |
|
"rewards/rejected": -0.058145828545093536, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.3576843556168159, |
|
"grad_norm": 1.3594894409179688, |
|
"learning_rate": 1.413935775250609e-08, |
|
"logits/chosen": -2.9915037155151367, |
|
"logits/rejected": -2.9657304286956787, |
|
"logps/chosen": -58.22015380859375, |
|
"logps/rejected": -58.225196838378906, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.03904888778924942, |
|
"rewards/margins": 0.023439262062311172, |
|
"rewards/rejected": -0.06248814985156059, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.3645761543762922, |
|
"grad_norm": 1.305829644203186, |
|
"learning_rate": 1.3869214341964411e-08, |
|
"logits/chosen": -2.9901621341705322, |
|
"logits/rejected": -2.972623825073242, |
|
"logps/chosen": -58.67400360107422, |
|
"logps/rejected": -59.30767059326172, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.0412491112947464, |
|
"rewards/margins": 0.01679963245987892, |
|
"rewards/rejected": -0.058048736304044724, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.3714679531357685, |
|
"grad_norm": 1.2267251014709473, |
|
"learning_rate": 1.3600682343380247e-08, |
|
"logits/chosen": -2.953930616378784, |
|
"logits/rejected": -2.9335622787475586, |
|
"logps/chosen": -58.66155242919922, |
|
"logps/rejected": -59.33677291870117, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.039415620267391205, |
|
"rewards/margins": 0.020520631223917007, |
|
"rewards/rejected": -0.05993625521659851, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.3783597518952446, |
|
"grad_norm": 1.2556020021438599, |
|
"learning_rate": 1.3333800632326858e-08, |
|
"logits/chosen": -3.0334630012512207, |
|
"logits/rejected": -3.0166397094726562, |
|
"logps/chosen": -58.55223846435547, |
|
"logps/rejected": -59.941978454589844, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -0.0418681763112545, |
|
"rewards/margins": 0.017354335635900497, |
|
"rewards/rejected": -0.0592225082218647, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.3783597518952446, |
|
"eval_logits/chosen": -3.1071271896362305, |
|
"eval_logits/rejected": -3.101414680480957, |
|
"eval_logps/chosen": -60.261566162109375, |
|
"eval_logps/rejected": -65.51657104492188, |
|
"eval_loss": 0.6894936561584473, |
|
"eval_rewards/accuracies": 0.5906133651733398, |
|
"eval_rewards/chosen": -0.015496725216507912, |
|
"eval_rewards/margins": 0.007867763750255108, |
|
"eval_rewards/rejected": -0.023364488035440445, |
|
"eval_runtime": 383.0695, |
|
"eval_samples_per_second": 11.236, |
|
"eval_steps_per_second": 1.404, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.385251550654721, |
|
"grad_norm": 1.4159228801727295, |
|
"learning_rate": 1.3068607845464202e-08, |
|
"logits/chosen": -2.9797048568725586, |
|
"logits/rejected": -2.952303171157837, |
|
"logps/chosen": -59.8831901550293, |
|
"logps/rejected": -59.22021484375, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -0.041412778198719025, |
|
"rewards/margins": 0.0192607082426548, |
|
"rewards/rejected": -0.06067349389195442, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.392143349414197, |
|
"grad_norm": 1.3155369758605957, |
|
"learning_rate": 1.2805142374945437e-08, |
|
"logits/chosen": -3.0014488697052, |
|
"logits/rejected": -2.978201389312744, |
|
"logps/chosen": -57.95537185668945, |
|
"logps/rejected": -59.4213752746582, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -0.03732657432556152, |
|
"rewards/margins": 0.022235842421650887, |
|
"rewards/rejected": -0.05956241488456726, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.3990351481736734, |
|
"grad_norm": 1.2982782125473022, |
|
"learning_rate": 1.254344236285888e-08, |
|
"logits/chosen": -2.984819173812866, |
|
"logits/rejected": -2.9616565704345703, |
|
"logps/chosen": -58.2531623840332, |
|
"logps/rejected": -59.219261169433594, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.629687488079071, |
|
"rewards/chosen": -0.039121102541685104, |
|
"rewards/margins": 0.019388314336538315, |
|
"rewards/rejected": -0.05850941687822342, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.4059269469331497, |
|
"grad_norm": 1.328587532043457, |
|
"learning_rate": 1.2283545695706135e-08, |
|
"logits/chosen": -2.9852476119995117, |
|
"logits/rejected": -2.9641222953796387, |
|
"logps/chosen": -58.166831970214844, |
|
"logps/rejected": -58.347557067871094, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -0.042398639023303986, |
|
"rewards/margins": 0.019147690385580063, |
|
"rewards/rejected": -0.06154633313417435, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.4128187456926258, |
|
"grad_norm": 1.2532403469085693, |
|
"learning_rate": 1.2025489998917254e-08, |
|
"logits/chosen": -3.011920690536499, |
|
"logits/rejected": -2.985671043395996, |
|
"logps/chosen": -61.16510009765625, |
|
"logps/rejected": -59.185394287109375, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -0.03920884430408478, |
|
"rewards/margins": 0.019202571362257004, |
|
"rewards/rejected": -0.058411408215761185, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.4197105444521019, |
|
"grad_norm": 1.379606008529663, |
|
"learning_rate": 1.1769312631403659e-08, |
|
"logits/chosen": -2.9937710762023926, |
|
"logits/rejected": -2.9706432819366455, |
|
"logps/chosen": -59.001708984375, |
|
"logps/rejected": -58.9688720703125, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.0409664623439312, |
|
"rewards/margins": 0.017764370888471603, |
|
"rewards/rejected": -0.0587308332324028, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.4266023432115782, |
|
"grad_norm": 1.3206267356872559, |
|
"learning_rate": 1.1515050680149687e-08, |
|
"logits/chosen": -3.0447440147399902, |
|
"logits/rejected": -3.0277929306030273, |
|
"logps/chosen": -59.411956787109375, |
|
"logps/rejected": -60.502716064453125, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.04452786594629288, |
|
"rewards/margins": 0.01587734930217266, |
|
"rewards/rejected": -0.06040521338582039, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.4334941419710545, |
|
"grad_norm": 1.2896159887313843, |
|
"learning_rate": 1.1262740954843456e-08, |
|
"logits/chosen": -3.0021471977233887, |
|
"logits/rejected": -2.971998691558838, |
|
"logps/chosen": -60.27617645263672, |
|
"logps/rejected": -59.302833557128906, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -0.03701635077595711, |
|
"rewards/margins": 0.021184273064136505, |
|
"rewards/rejected": -0.058200620114803314, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.4403859407305306, |
|
"grad_norm": 1.326338768005371, |
|
"learning_rate": 1.1012419982547905e-08, |
|
"logits/chosen": -2.961845636367798, |
|
"logits/rejected": -2.941849946975708, |
|
"logps/chosen": -57.25081253051758, |
|
"logps/rejected": -59.027015686035156, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04554927721619606, |
|
"rewards/margins": 0.019669197499752045, |
|
"rewards/rejected": -0.0652184709906578, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.447277739490007, |
|
"grad_norm": 1.316919207572937, |
|
"learning_rate": 1.0764124002412758e-08, |
|
"logits/chosen": -3.0356943607330322, |
|
"logits/rejected": -3.012575626373291, |
|
"logps/chosen": -56.51226806640625, |
|
"logps/rejected": -59.925514221191406, |
|
"loss": 0.6834, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -0.04187322035431862, |
|
"rewards/margins": 0.02075764164328575, |
|
"rewards/rejected": -0.06263085454702377, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.447277739490007, |
|
"eval_logits/chosen": -3.1055357456207275, |
|
"eval_logits/rejected": -3.099832534790039, |
|
"eval_logps/chosen": -60.38422775268555, |
|
"eval_logps/rejected": -65.65011596679688, |
|
"eval_loss": 0.6894546151161194, |
|
"eval_rewards/accuracies": 0.5861988663673401, |
|
"eval_rewards/chosen": -0.01672333851456642, |
|
"eval_rewards/margins": 0.00797655712813139, |
|
"eval_rewards/rejected": -0.024699894711375237, |
|
"eval_runtime": 383.1595, |
|
"eval_samples_per_second": 11.233, |
|
"eval_steps_per_second": 1.404, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.454169538249483, |
|
"grad_norm": 1.3180962800979614, |
|
"learning_rate": 1.0517888960428139e-08, |
|
"logits/chosen": -2.964921236038208, |
|
"logits/rejected": -2.94468355178833, |
|
"logps/chosen": -58.661651611328125, |
|
"logps/rejected": -59.41533660888672, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.651562511920929, |
|
"rewards/chosen": -0.042625896632671356, |
|
"rewards/margins": 0.023367973044514656, |
|
"rewards/rejected": -0.06599386781454086, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.4610613370089593, |
|
"grad_norm": 1.2736924886703491, |
|
"learning_rate": 1.0273750504220666e-08, |
|
"logits/chosen": -2.982936382293701, |
|
"logits/rejected": -2.9598629474639893, |
|
"logps/chosen": -56.829505920410156, |
|
"logps/rejected": -59.5037956237793, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.629687488079071, |
|
"rewards/chosen": -0.04348963871598244, |
|
"rewards/margins": 0.023124249652028084, |
|
"rewards/rejected": -0.06661389768123627, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.4679531357684357, |
|
"grad_norm": 1.305767297744751, |
|
"learning_rate": 1.003174397789269e-08, |
|
"logits/chosen": -2.981260299682617, |
|
"logits/rejected": -2.9577889442443848, |
|
"logps/chosen": -59.381996154785156, |
|
"logps/rejected": -59.3449821472168, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04265505075454712, |
|
"rewards/margins": 0.023621436208486557, |
|
"rewards/rejected": -0.06627649068832397, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.4748449345279118, |
|
"grad_norm": 1.2957626581192017, |
|
"learning_rate": 9.791904416905526e-09, |
|
"logits/chosen": -3.0431559085845947, |
|
"logits/rejected": -3.0270159244537354, |
|
"logps/chosen": -58.642250061035156, |
|
"logps/rejected": -59.5418586730957, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.044856615364551544, |
|
"rewards/margins": 0.016749290749430656, |
|
"rewards/rejected": -0.06160591170191765, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.481736733287388, |
|
"grad_norm": 1.3108528852462769, |
|
"learning_rate": 9.554266543007328e-09, |
|
"logits/chosen": -3.007779598236084, |
|
"logits/rejected": -2.9805774688720703, |
|
"logps/chosen": -58.5262565612793, |
|
"logps/rejected": -59.313941955566406, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.6421874761581421, |
|
"rewards/chosen": -0.0379471480846405, |
|
"rewards/margins": 0.02591646835207939, |
|
"rewards/rejected": -0.0638636127114296, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.4886285320468642, |
|
"grad_norm": 1.2914735078811646, |
|
"learning_rate": 9.318864759206429e-09, |
|
"logits/chosen": -2.9647016525268555, |
|
"logits/rejected": -2.9434256553649902, |
|
"logps/chosen": -58.31499481201172, |
|
"logps/rejected": -57.80283737182617, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.6078125238418579, |
|
"rewards/chosen": -0.04223569482564926, |
|
"rewards/margins": 0.0191799309104681, |
|
"rewards/rejected": -0.061415620148181915, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.4955203308063405, |
|
"grad_norm": 1.3237831592559814, |
|
"learning_rate": 9.085733144790744e-09, |
|
"logits/chosen": -3.0011582374572754, |
|
"logits/rejected": -2.9859848022460938, |
|
"logps/chosen": -57.24296188354492, |
|
"logps/rejected": -59.595008850097656, |
|
"loss": 0.6826, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04495619237422943, |
|
"rewards/margins": 0.022577274590730667, |
|
"rewards/rejected": -0.0675334706902504, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.5024121295658168, |
|
"grad_norm": 1.409790277481079, |
|
"learning_rate": 8.854905450394113e-09, |
|
"logits/chosen": -3.0098440647125244, |
|
"logits/rejected": -2.983942747116089, |
|
"logps/chosen": -59.406158447265625, |
|
"logps/rejected": -58.25774002075195, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.04101915657520294, |
|
"rewards/margins": 0.020175766199827194, |
|
"rewards/rejected": -0.06119491904973984, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.509303928325293, |
|
"grad_norm": 1.3202848434448242, |
|
"learning_rate": 8.626415093110202e-09, |
|
"logits/chosen": -2.9948554039001465, |
|
"logits/rejected": -2.975142240524292, |
|
"logps/chosen": -56.601722717285156, |
|
"logps/rejected": -59.769569396972656, |
|
"loss": 0.6834, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04384131729602814, |
|
"rewards/margins": 0.020800283178687096, |
|
"rewards/rejected": -0.06464160233736038, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.516195727084769, |
|
"grad_norm": 1.366294503211975, |
|
"learning_rate": 8.40029515165467e-09, |
|
"logits/chosen": -3.006235361099243, |
|
"logits/rejected": -2.983431577682495, |
|
"logps/chosen": -57.9134635925293, |
|
"logps/rejected": -58.527076721191406, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.04390479251742363, |
|
"rewards/margins": 0.022031091153621674, |
|
"rewards/rejected": -0.065935879945755, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.516195727084769, |
|
"eval_logits/chosen": -3.104142904281616, |
|
"eval_logits/rejected": -3.098437547683716, |
|
"eval_logps/chosen": -60.504878997802734, |
|
"eval_logps/rejected": -65.79142761230469, |
|
"eval_loss": 0.6893645524978638, |
|
"eval_rewards/accuracies": 0.5873606204986572, |
|
"eval_rewards/chosen": -0.01792982593178749, |
|
"eval_rewards/margins": 0.008183243684470654, |
|
"eval_rewards/rejected": -0.02611307054758072, |
|
"eval_runtime": 382.8386, |
|
"eval_samples_per_second": 11.242, |
|
"eval_steps_per_second": 1.405, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.5230875258442453, |
|
"grad_norm": 1.290756106376648, |
|
"learning_rate": 8.176578361576358e-09, |
|
"logits/chosen": -2.9781322479248047, |
|
"logits/rejected": -2.959134340286255, |
|
"logps/chosen": -56.6655387878418, |
|
"logps/rejected": -58.62583541870117, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.04583312198519707, |
|
"rewards/margins": 0.01810682937502861, |
|
"rewards/rejected": -0.06393995136022568, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.5299793246037217, |
|
"grad_norm": 1.2776232957839966, |
|
"learning_rate": 7.955297110518117e-09, |
|
"logits/chosen": -3.0543761253356934, |
|
"logits/rejected": -3.028890371322632, |
|
"logps/chosen": -58.1224365234375, |
|
"logps/rejected": -59.595802307128906, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -0.04209282249212265, |
|
"rewards/margins": 0.019542943686246872, |
|
"rewards/rejected": -0.06163576990365982, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.5368711233631978, |
|
"grad_norm": 1.349440574645996, |
|
"learning_rate": 7.73648343352806e-09, |
|
"logits/chosen": -3.0225765705108643, |
|
"logits/rejected": -2.996718168258667, |
|
"logps/chosen": -59.868263244628906, |
|
"logps/rejected": -58.85590744018555, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -0.043428223580121994, |
|
"rewards/margins": 0.022792860865592957, |
|
"rewards/rejected": -0.06622108817100525, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.5437629221226739, |
|
"grad_norm": 1.3181602954864502, |
|
"learning_rate": 7.520169008421775e-09, |
|
"logits/chosen": -2.999849796295166, |
|
"logits/rejected": -2.9812533855438232, |
|
"logps/chosen": -59.130516052246094, |
|
"logps/rejected": -60.17681884765625, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04423438385128975, |
|
"rewards/margins": 0.019376900047063828, |
|
"rewards/rejected": -0.06361128389835358, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.5506547208821502, |
|
"grad_norm": 1.389096975326538, |
|
"learning_rate": 7.3063851511963535e-09, |
|
"logits/chosen": -3.01939058303833, |
|
"logits/rejected": -2.992645740509033, |
|
"logps/chosen": -59.214324951171875, |
|
"logps/rejected": -59.09142303466797, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -0.04182130843400955, |
|
"rewards/margins": 0.023091908544301987, |
|
"rewards/rejected": -0.06491322070360184, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.5575465196416265, |
|
"grad_norm": 1.2921773195266724, |
|
"learning_rate": 7.095162811496716e-09, |
|
"logits/chosen": -2.9625678062438965, |
|
"logits/rejected": -2.947840452194214, |
|
"logps/chosen": -57.61260986328125, |
|
"logps/rejected": -58.932640075683594, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.046415556222200394, |
|
"rewards/margins": 0.01893490180373192, |
|
"rewards/rejected": -0.06535045802593231, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.5644383184011028, |
|
"grad_norm": 1.2538701295852661, |
|
"learning_rate": 6.886532568135017e-09, |
|
"logits/chosen": -2.9978413581848145, |
|
"logits/rejected": -2.98121976852417, |
|
"logps/chosen": -58.2430305480957, |
|
"logps/rejected": -60.4543571472168, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.044888339936733246, |
|
"rewards/margins": 0.019984986633062363, |
|
"rewards/rejected": -0.06487332284450531, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.571330117160579, |
|
"grad_norm": 1.3960515260696411, |
|
"learning_rate": 6.680524624663763e-09, |
|
"logits/chosen": -3.0089173316955566, |
|
"logits/rejected": -2.977341413497925, |
|
"logps/chosen": -60.883209228515625, |
|
"logps/rejected": -59.40845489501953, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.6234375238418579, |
|
"rewards/chosen": -0.03985728323459625, |
|
"rewards/margins": 0.025174889713525772, |
|
"rewards/rejected": -0.06503216922283173, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.578221915920055, |
|
"grad_norm": 1.1940710544586182, |
|
"learning_rate": 6.477168805003166e-09, |
|
"logits/chosen": -3.00933575630188, |
|
"logits/rejected": -2.982250690460205, |
|
"logps/chosen": -59.1096076965332, |
|
"logps/rejected": -59.2675895690918, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.6390625238418579, |
|
"rewards/chosen": -0.04165520519018173, |
|
"rewards/margins": 0.023930717259645462, |
|
"rewards/rejected": -0.06558592617511749, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.5851137146795313, |
|
"grad_norm": 1.3209586143493652, |
|
"learning_rate": 6.276494549123546e-09, |
|
"logits/chosen": -3.050356388092041, |
|
"logits/rejected": -3.02972412109375, |
|
"logps/chosen": -58.97772979736328, |
|
"logps/rejected": -59.48607635498047, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.635937511920929, |
|
"rewards/chosen": -0.04832325503230095, |
|
"rewards/margins": 0.021100293844938278, |
|
"rewards/rejected": -0.06942354887723923, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.5851137146795313, |
|
"eval_logits/chosen": -3.1029651165008545, |
|
"eval_logits/rejected": -3.097285509109497, |
|
"eval_logps/chosen": -60.59327697753906, |
|
"eval_logps/rejected": -65.90728759765625, |
|
"eval_loss": 0.6892400979995728, |
|
"eval_rewards/accuracies": 0.5901486873626709, |
|
"eval_rewards/chosen": -0.018813807517290115, |
|
"eval_rewards/margins": 0.008457801304757595, |
|
"eval_rewards/rejected": -0.027271609753370285, |
|
"eval_runtime": 383.2974, |
|
"eval_samples_per_second": 11.229, |
|
"eval_steps_per_second": 1.404, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.5920055134390076, |
|
"grad_norm": 1.2838672399520874, |
|
"learning_rate": 6.078530908783283e-09, |
|
"logits/chosen": -2.946258068084717, |
|
"logits/rejected": -2.9289188385009766, |
|
"logps/chosen": -57.40240478515625, |
|
"logps/rejected": -58.80238723754883, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.04573064297437668, |
|
"rewards/margins": 0.020323526114225388, |
|
"rewards/rejected": -0.06605416536331177, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.598897312198484, |
|
"grad_norm": 1.3529164791107178, |
|
"learning_rate": 5.883306543322963e-09, |
|
"logits/chosen": -3.0067434310913086, |
|
"logits/rejected": -2.983191967010498, |
|
"logps/chosen": -57.39630126953125, |
|
"logps/rejected": -58.9193000793457, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.045682333409786224, |
|
"rewards/margins": 0.020494289696216583, |
|
"rewards/rejected": -0.06617662310600281, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.60578911095796, |
|
"grad_norm": 1.3721948862075806, |
|
"learning_rate": 5.690849715516346e-09, |
|
"logits/chosen": -2.9921982288360596, |
|
"logits/rejected": -2.972947597503662, |
|
"logps/chosen": -58.18434524536133, |
|
"logps/rejected": -59.79640579223633, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.049553245306015015, |
|
"rewards/margins": 0.019603563472628593, |
|
"rewards/rejected": -0.06915681809186935, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.6126809097174362, |
|
"grad_norm": 1.326244831085205, |
|
"learning_rate": 5.50118828747877e-09, |
|
"logits/chosen": -3.013467311859131, |
|
"logits/rejected": -2.985992908477783, |
|
"logps/chosen": -59.114105224609375, |
|
"logps/rejected": -59.962989807128906, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -0.04709188640117645, |
|
"rewards/margins": 0.024677757173776627, |
|
"rewards/rejected": -0.07176963984966278, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.6195727084769125, |
|
"grad_norm": 1.3307464122772217, |
|
"learning_rate": 5.314349716633484e-09, |
|
"logits/chosen": -2.999783515930176, |
|
"logits/rejected": -2.978919744491577, |
|
"logps/chosen": -58.51006317138672, |
|
"logps/rejected": -59.8082389831543, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.04656077176332474, |
|
"rewards/margins": 0.015919920057058334, |
|
"rewards/rejected": -0.06248069554567337, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.6264645072363888, |
|
"grad_norm": 1.3505630493164062, |
|
"learning_rate": 5.130361051736656e-09, |
|
"logits/chosen": -2.992077589035034, |
|
"logits/rejected": -2.9786789417266846, |
|
"logps/chosen": -57.856048583984375, |
|
"logps/rejected": -58.31081008911133, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.05111172795295715, |
|
"rewards/margins": 0.01770883984863758, |
|
"rewards/rejected": -0.06882055848836899, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.633356305995865, |
|
"grad_norm": 1.3144451379776, |
|
"learning_rate": 4.9492489289614884e-09, |
|
"logits/chosen": -2.9724109172821045, |
|
"logits/rejected": -2.9529943466186523, |
|
"logps/chosen": -58.423919677734375, |
|
"logps/rejected": -59.29913330078125, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -0.053545523434877396, |
|
"rewards/margins": 0.019974233582615852, |
|
"rewards/rejected": -0.0735197439789772, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.640248104755341, |
|
"grad_norm": 1.3916033506393433, |
|
"learning_rate": 4.771039568042076e-09, |
|
"logits/chosen": -3.004544734954834, |
|
"logits/rejected": -2.988704204559326, |
|
"logps/chosen": -57.34346389770508, |
|
"logps/rejected": -61.55018997192383, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.635937511920929, |
|
"rewards/chosen": -0.04598530754446983, |
|
"rewards/margins": 0.021662291139364243, |
|
"rewards/rejected": -0.06764759868383408, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.6471399035148173, |
|
"grad_norm": 1.4496431350708008, |
|
"learning_rate": 4.595758768477576e-09, |
|
"logits/chosen": -3.0240254402160645, |
|
"logits/rejected": -3.011583089828491, |
|
"logps/chosen": -58.51326370239258, |
|
"logps/rejected": -60.47749710083008, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.5921875238418579, |
|
"rewards/chosen": -0.05048090219497681, |
|
"rewards/margins": 0.01893479749560356, |
|
"rewards/rejected": -0.06941570341587067, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.6540317022742936, |
|
"grad_norm": 1.3277703523635864, |
|
"learning_rate": 4.423431905797162e-09, |
|
"logits/chosen": -3.039842128753662, |
|
"logits/rejected": -3.0183472633361816, |
|
"logps/chosen": -58.69083786010742, |
|
"logps/rejected": -60.8518180847168, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.6109374761581421, |
|
"rewards/chosen": -0.04716577008366585, |
|
"rewards/margins": 0.020657068118453026, |
|
"rewards/rejected": -0.06782282888889313, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.6540317022742936, |
|
"eval_logits/chosen": -3.1017863750457764, |
|
"eval_logits/rejected": -3.09609055519104, |
|
"eval_logps/chosen": -60.646873474121094, |
|
"eval_logps/rejected": -65.97390747070312, |
|
"eval_loss": 0.6891800761222839, |
|
"eval_rewards/accuracies": 0.5861988663673401, |
|
"eval_rewards/chosen": -0.019349750131368637, |
|
"eval_rewards/margins": 0.008588053286075592, |
|
"eval_rewards/rejected": -0.027937807142734528, |
|
"eval_runtime": 383.0908, |
|
"eval_samples_per_second": 11.235, |
|
"eval_steps_per_second": 1.404, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.66092350103377, |
|
"grad_norm": 1.3616927862167358, |
|
"learning_rate": 4.254083927886443e-09, |
|
"logits/chosen": -3.052434206008911, |
|
"logits/rejected": -3.0306789875030518, |
|
"logps/chosen": -60.23524856567383, |
|
"logps/rejected": -59.88490676879883, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.598437488079071, |
|
"rewards/chosen": -0.04647786170244217, |
|
"rewards/margins": 0.017560753971338272, |
|
"rewards/rejected": -0.06403861939907074, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.667815299793246, |
|
"grad_norm": 1.3540840148925781, |
|
"learning_rate": 4.0877393513756795e-09, |
|
"logits/chosen": -3.0015170574188232, |
|
"logits/rejected": -2.9834518432617188, |
|
"logps/chosen": -58.74982452392578, |
|
"logps/rejected": -59.712005615234375, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.045878536999225616, |
|
"rewards/margins": 0.018289810046553612, |
|
"rewards/rejected": -0.06416834890842438, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.6747070985527222, |
|
"grad_norm": 1.319036841392517, |
|
"learning_rate": 3.924422258090529e-09, |
|
"logits/chosen": -2.939756155014038, |
|
"logits/rejected": -2.919666290283203, |
|
"logps/chosen": -58.5392951965332, |
|
"logps/rejected": -59.17338943481445, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -0.04658854380249977, |
|
"rewards/margins": 0.019758421927690506, |
|
"rewards/rejected": -0.06634696573019028, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.6815988973121985, |
|
"grad_norm": 1.3268150091171265, |
|
"learning_rate": 3.764156291565693e-09, |
|
"logits/chosen": -3.0177316665649414, |
|
"logits/rejected": -2.9926140308380127, |
|
"logps/chosen": -58.570648193359375, |
|
"logps/rejected": -58.78978729248047, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04440145939588547, |
|
"rewards/margins": 0.022650301456451416, |
|
"rewards/rejected": -0.06705176085233688, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.6884906960716748, |
|
"grad_norm": 1.3211112022399902, |
|
"learning_rate": 3.6069646536220357e-09, |
|
"logits/chosen": -2.998032331466675, |
|
"logits/rejected": -2.9721641540527344, |
|
"logps/chosen": -60.38201904296875, |
|
"logps/rejected": -60.990257263183594, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.682812511920929, |
|
"rewards/chosen": -0.040809061378240585, |
|
"rewards/margins": 0.026985710486769676, |
|
"rewards/rejected": -0.06779477745294571, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.6953824948311509, |
|
"grad_norm": 1.285194993019104, |
|
"learning_rate": 3.4528701010076155e-09, |
|
"logits/chosen": -3.003739833831787, |
|
"logits/rejected": -2.9794375896453857, |
|
"logps/chosen": -60.3626823425293, |
|
"logps/rejected": -61.65105438232422, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.045110072940588, |
|
"rewards/margins": 0.02330555021762848, |
|
"rewards/rejected": -0.06841562688350677, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.7022742935906272, |
|
"grad_norm": 1.2852113246917725, |
|
"learning_rate": 3.3018949421032003e-09, |
|
"logits/chosen": -3.0109405517578125, |
|
"logits/rejected": -2.9975745677948, |
|
"logps/chosen": -58.778053283691406, |
|
"logps/rejected": -59.56703567504883, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -0.04846884682774544, |
|
"rewards/margins": 0.01748683862388134, |
|
"rewards/rejected": -0.06595568358898163, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.7091660923501033, |
|
"grad_norm": 1.258186936378479, |
|
"learning_rate": 3.154061033692651e-09, |
|
"logits/chosen": -3.0072379112243652, |
|
"logits/rejected": -2.979935884475708, |
|
"logps/chosen": -59.062705993652344, |
|
"logps/rejected": -58.03764724731445, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.04467375949025154, |
|
"rewards/margins": 0.026565441861748695, |
|
"rewards/rejected": -0.07123919576406479, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.7160578911095796, |
|
"grad_norm": 1.3505935668945312, |
|
"learning_rate": 3.0093897777987098e-09, |
|
"logits/chosen": -3.0517494678497314, |
|
"logits/rejected": -3.0366005897521973, |
|
"logps/chosen": -58.27477264404297, |
|
"logps/rejected": -61.6846923828125, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.6109374761581421, |
|
"rewards/chosen": -0.04682334139943123, |
|
"rewards/margins": 0.016174782067537308, |
|
"rewards/rejected": -0.06299812346696854, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.722949689869056, |
|
"grad_norm": 1.3154429197311401, |
|
"learning_rate": 2.8679021185845975e-09, |
|
"logits/chosen": -3.023200273513794, |
|
"logits/rejected": -2.997267246246338, |
|
"logps/chosen": -58.07569122314453, |
|
"logps/rejected": -59.87085723876953, |
|
"loss": 0.6826, |
|
"rewards/accuracies": 0.620312511920929, |
|
"rewards/chosen": -0.046663668006658554, |
|
"rewards/margins": 0.022471796721220016, |
|
"rewards/rejected": -0.06913547217845917, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.722949689869056, |
|
"eval_logits/chosen": -3.1012966632843018, |
|
"eval_logits/rejected": -3.095568895339966, |
|
"eval_logps/chosen": -60.681880950927734, |
|
"eval_logps/rejected": -66.00990295410156, |
|
"eval_loss": 0.689177393913269, |
|
"eval_rewards/accuracies": 0.5850371718406677, |
|
"eval_rewards/chosen": -0.019699882715940475, |
|
"eval_rewards/margins": 0.008597951382398605, |
|
"eval_rewards/rejected": -0.02829783223569393, |
|
"eval_runtime": 383.2161, |
|
"eval_samples_per_second": 11.231, |
|
"eval_steps_per_second": 1.404, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.729841488628532, |
|
"grad_norm": 1.3162225484848022, |
|
"learning_rate": 2.7296185393219316e-09, |
|
"logits/chosen": -3.0459542274475098, |
|
"logits/rejected": -3.0207812786102295, |
|
"logps/chosen": -59.15156173706055, |
|
"logps/rejected": -59.254676818847656, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.045805253088474274, |
|
"rewards/margins": 0.020932307466864586, |
|
"rewards/rejected": -0.06673755496740341, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.7367332873880081, |
|
"grad_norm": 1.328461766242981, |
|
"learning_rate": 2.5945590594253305e-09, |
|
"logits/chosen": -2.9799602031707764, |
|
"logits/rejected": -2.9713258743286133, |
|
"logps/chosen": -58.0362548828125, |
|
"logps/rejected": -60.381080627441406, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -0.0511082224547863, |
|
"rewards/margins": 0.012231842614710331, |
|
"rewards/rejected": -0.06334006786346436, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.7436250861474845, |
|
"grad_norm": 1.2932238578796387, |
|
"learning_rate": 2.4627432315541986e-09, |
|
"logits/chosen": -3.055954694747925, |
|
"logits/rejected": -3.0452940464019775, |
|
"logps/chosen": -58.16063690185547, |
|
"logps/rejected": -61.20969772338867, |
|
"loss": 0.6834, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -0.04746638238430023, |
|
"rewards/margins": 0.02094622328877449, |
|
"rewards/rejected": -0.06841260939836502, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.7505168849069608, |
|
"grad_norm": 1.3644834756851196, |
|
"learning_rate": 2.3341901387820717e-09, |
|
"logits/chosen": -3.0201711654663086, |
|
"logits/rejected": -2.995832681655884, |
|
"logps/chosen": -59.7026481628418, |
|
"logps/rejected": -60.433990478515625, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.04398275539278984, |
|
"rewards/margins": 0.02229396626353264, |
|
"rewards/rejected": -0.06627672165632248, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.757408683666437, |
|
"grad_norm": 1.3037844896316528, |
|
"learning_rate": 2.2089183918339445e-09, |
|
"logits/chosen": -2.996652126312256, |
|
"logits/rejected": -2.976022243499756, |
|
"logps/chosen": -57.28471755981445, |
|
"logps/rejected": -59.08478546142578, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -0.04602036252617836, |
|
"rewards/margins": 0.019708681851625443, |
|
"rewards/rejected": -0.0657290443778038, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.7643004824259132, |
|
"grad_norm": 1.3543256521224976, |
|
"learning_rate": 2.086946126391981e-09, |
|
"logits/chosen": -2.9888083934783936, |
|
"logits/rejected": -2.9732577800750732, |
|
"logps/chosen": -56.89265823364258, |
|
"logps/rejected": -60.136573791503906, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.05068554729223251, |
|
"rewards/margins": 0.019495617598295212, |
|
"rewards/rejected": -0.07018117606639862, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.7711922811853893, |
|
"grad_norm": 1.3135391473770142, |
|
"learning_rate": 1.9682910004700155e-09, |
|
"logits/chosen": -3.000701904296875, |
|
"logits/rejected": -2.9841086864471436, |
|
"logps/chosen": -59.79181671142578, |
|
"logps/rejected": -60.78386306762695, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.048417720943689346, |
|
"rewards/margins": 0.02133244276046753, |
|
"rewards/rejected": -0.06975016742944717, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.7780840799448656, |
|
"grad_norm": 1.2864971160888672, |
|
"learning_rate": 1.852970191857159e-09, |
|
"logits/chosen": -2.9674811363220215, |
|
"logits/rejected": -2.94804048538208, |
|
"logps/chosen": -59.39619064331055, |
|
"logps/rejected": -60.783851623535156, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.04619085043668747, |
|
"rewards/margins": 0.02428482472896576, |
|
"rewards/rejected": -0.07047567516565323, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.784975878704342, |
|
"grad_norm": 1.2950899600982666, |
|
"learning_rate": 1.741000395630976e-09, |
|
"logits/chosen": -3.034547805786133, |
|
"logits/rejected": -3.0095696449279785, |
|
"logps/chosen": -58.98634719848633, |
|
"logps/rejected": -60.004661560058594, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.044211823493242264, |
|
"rewards/margins": 0.02347356267273426, |
|
"rewards/rejected": -0.06768538057804108, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.791867677463818, |
|
"grad_norm": 1.3900827169418335, |
|
"learning_rate": 1.6323978217405277e-09, |
|
"logits/chosen": -2.962319850921631, |
|
"logits/rejected": -2.9394538402557373, |
|
"logps/chosen": -57.80451202392578, |
|
"logps/rejected": -60.771339416503906, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.6390625238418579, |
|
"rewards/chosen": -0.0485307052731514, |
|
"rewards/margins": 0.022718578577041626, |
|
"rewards/rejected": -0.07124929130077362, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.791867677463818, |
|
"eval_logits/chosen": -3.1006553173065186, |
|
"eval_logits/rejected": -3.094916820526123, |
|
"eval_logps/chosen": -60.688175201416016, |
|
"eval_logps/rejected": -66.03443145751953, |
|
"eval_loss": 0.6890937089920044, |
|
"eval_rewards/accuracies": 0.5889869928359985, |
|
"eval_rewards/chosen": -0.019762787967920303, |
|
"eval_rewards/margins": 0.008780322037637234, |
|
"eval_rewards/rejected": -0.028543109074234962, |
|
"eval_runtime": 383.3128, |
|
"eval_samples_per_second": 11.228, |
|
"eval_steps_per_second": 1.404, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.7987594762232941, |
|
"grad_norm": 1.325190544128418, |
|
"learning_rate": 1.5271781926596449e-09, |
|
"logits/chosen": -3.0393474102020264, |
|
"logits/rejected": -3.0156404972076416, |
|
"logps/chosen": -60.42161178588867, |
|
"logps/rejected": -61.184486389160156, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -0.04576939716935158, |
|
"rewards/margins": 0.022201048210263252, |
|
"rewards/rejected": -0.06797045469284058, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.8056512749827704, |
|
"grad_norm": 1.3093925714492798, |
|
"learning_rate": 1.4253567411107643e-09, |
|
"logits/chosen": -2.989856243133545, |
|
"logits/rejected": -2.9666576385498047, |
|
"logps/chosen": -58.600990295410156, |
|
"logps/rejected": -60.18854904174805, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.0458240807056427, |
|
"rewards/margins": 0.021440699696540833, |
|
"rewards/rejected": -0.06726478040218353, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.8125430737422468, |
|
"grad_norm": 1.2918739318847656, |
|
"learning_rate": 1.326948207859685e-09, |
|
"logits/chosen": -3.0238237380981445, |
|
"logits/rejected": -3.0077781677246094, |
|
"logps/chosen": -57.79582595825195, |
|
"logps/rejected": -60.6348762512207, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -0.046499475836753845, |
|
"rewards/margins": 0.021402059122920036, |
|
"rewards/rejected": -0.06790152937173843, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.819434872501723, |
|
"grad_norm": 1.349001407623291, |
|
"learning_rate": 1.2319668395815358e-09, |
|
"logits/chosen": -3.0028393268585205, |
|
"logits/rejected": -2.9857001304626465, |
|
"logps/chosen": -58.69614791870117, |
|
"logps/rejected": -59.920021057128906, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.604687511920929, |
|
"rewards/chosen": -0.04932459071278572, |
|
"rewards/margins": 0.0200694240629673, |
|
"rewards/rejected": -0.06939400732517242, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.8263266712611992, |
|
"grad_norm": 1.2818963527679443, |
|
"learning_rate": 1.1404263867982738e-09, |
|
"logits/chosen": -3.0455758571624756, |
|
"logits/rejected": -3.0237960815429688, |
|
"logps/chosen": -59.25251388549805, |
|
"logps/rejected": -60.45496368408203, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.629687488079071, |
|
"rewards/chosen": -0.04728539660573006, |
|
"rewards/margins": 0.021762443706393242, |
|
"rewards/rejected": -0.06904784590005875, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.8332184700206753, |
|
"grad_norm": 1.2889119386672974, |
|
"learning_rate": 1.0523401018880134e-09, |
|
"logits/chosen": -2.983532428741455, |
|
"logits/rejected": -2.9646944999694824, |
|
"logps/chosen": -58.277976989746094, |
|
"logps/rejected": -59.491722106933594, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.047298818826675415, |
|
"rewards/margins": 0.020261693745851517, |
|
"rewards/rejected": -0.06756050884723663, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.8401102687801516, |
|
"grad_norm": 1.3029212951660156, |
|
"learning_rate": 9.677207371664608e-10, |
|
"logits/chosen": -3.0146260261535645, |
|
"logits/rejected": -2.9899539947509766, |
|
"logps/chosen": -59.18970489501953, |
|
"logps/rejected": -60.14207077026367, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.05002979189157486, |
|
"rewards/margins": 0.02303471975028515, |
|
"rewards/rejected": -0.07306452095508575, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.847002067539628, |
|
"grad_norm": 1.260703444480896, |
|
"learning_rate": 8.865805430407575e-10, |
|
"logits/chosen": -3.0160889625549316, |
|
"logits/rejected": -2.9888625144958496, |
|
"logps/chosen": -58.788368225097656, |
|
"logps/rejected": -59.13869094848633, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -0.048010729253292084, |
|
"rewards/margins": 0.0222895760089159, |
|
"rewards/rejected": -0.07030030339956284, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.853893866299104, |
|
"grad_norm": 1.3435821533203125, |
|
"learning_rate": 8.089312662359904e-10, |
|
"logits/chosen": -3.000649929046631, |
|
"logits/rejected": -2.9763736724853516, |
|
"logps/chosen": -58.345603942871094, |
|
"logps/rejected": -59.42350387573242, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04748475179076195, |
|
"rewards/margins": 0.02055184543132782, |
|
"rewards/rejected": -0.06803660839796066, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.8607856650585803, |
|
"grad_norm": 1.3498848676681519, |
|
"learning_rate": 7.34784148094586e-10, |
|
"logits/chosen": -3.062194347381592, |
|
"logits/rejected": -3.040365219116211, |
|
"logps/chosen": -58.02552032470703, |
|
"logps/rejected": -60.96380615234375, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -0.04390079155564308, |
|
"rewards/margins": 0.023236598819494247, |
|
"rewards/rejected": -0.06713739782571793, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.8607856650585803, |
|
"eval_logits/chosen": -3.1006381511688232, |
|
"eval_logits/rejected": -3.0949079990386963, |
|
"eval_logps/chosen": -60.71648025512695, |
|
"eval_logps/rejected": -66.05257415771484, |
|
"eval_loss": 0.6891458034515381, |
|
"eval_rewards/accuracies": 0.5889869928359985, |
|
"eval_rewards/chosen": -0.020045887678861618, |
|
"eval_rewards/margins": 0.008678610436618328, |
|
"eval_rewards/rejected": -0.02872449718415737, |
|
"eval_runtime": 383.351, |
|
"eval_samples_per_second": 11.227, |
|
"eval_steps_per_second": 1.403, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.8676774638180564, |
|
"grad_norm": 1.2470507621765137, |
|
"learning_rate": 6.641499229489145e-10, |
|
"logits/chosen": -3.003091335296631, |
|
"logits/rejected": -2.9715797901153564, |
|
"logps/chosen": -58.2459831237793, |
|
"logps/rejected": -58.3682975769043, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.6390625238418579, |
|
"rewards/chosen": -0.04425545781850815, |
|
"rewards/margins": 0.02511006034910679, |
|
"rewards/rejected": -0.06936550885438919, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.8745692625775328, |
|
"grad_norm": 1.3162429332733154, |
|
"learning_rate": 5.970388165672691e-10, |
|
"logits/chosen": -2.99006724357605, |
|
"logits/rejected": -2.971386671066284, |
|
"logps/chosen": -57.105255126953125, |
|
"logps/rejected": -60.72968673706055, |
|
"loss": 0.6826, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -0.0457664355635643, |
|
"rewards/margins": 0.02246815897524357, |
|
"rewards/rejected": -0.06823460012674332, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.881461061337009, |
|
"grad_norm": 1.4187453985214233, |
|
"learning_rate": 5.334605446734585e-10, |
|
"logits/chosen": -3.0353336334228516, |
|
"logits/rejected": -3.007887363433838, |
|
"logps/chosen": -59.300804138183594, |
|
"logps/rejected": -59.99883270263672, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.6421874761581421, |
|
"rewards/chosen": -0.043865978717803955, |
|
"rewards/margins": 0.02479901909828186, |
|
"rewards/rejected": -0.06866499781608582, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.8883528600964852, |
|
"grad_norm": 1.3848966360092163, |
|
"learning_rate": 4.734243115402825e-10, |
|
"logits/chosen": -2.9592947959899902, |
|
"logits/rejected": -2.9372572898864746, |
|
"logps/chosen": -59.5694465637207, |
|
"logps/rejected": -60.315895080566406, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.047974247485399246, |
|
"rewards/margins": 0.02036571130156517, |
|
"rewards/rejected": -0.06833995878696442, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.8952446588559613, |
|
"grad_norm": 1.4468791484832764, |
|
"learning_rate": 4.169388086569886e-10, |
|
"logits/chosen": -3.0385963916778564, |
|
"logits/rejected": -3.0236904621124268, |
|
"logps/chosen": -58.946388244628906, |
|
"logps/rejected": -61.70532989501953, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.604687511920929, |
|
"rewards/chosen": -0.04743208736181259, |
|
"rewards/margins": 0.020196830853819847, |
|
"rewards/rejected": -0.06762892007827759, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.9021364576154376, |
|
"grad_norm": 1.3899776935577393, |
|
"learning_rate": 3.640122134710294e-10, |
|
"logits/chosen": -3.06270170211792, |
|
"logits/rejected": -3.0440831184387207, |
|
"logps/chosen": -59.356605529785156, |
|
"logps/rejected": -60.465003967285156, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.0449027419090271, |
|
"rewards/margins": 0.023394212126731873, |
|
"rewards/rejected": -0.06829695403575897, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.909028256374914, |
|
"grad_norm": 1.328192114830017, |
|
"learning_rate": 3.1465218820418415e-10, |
|
"logits/chosen": -3.037365436553955, |
|
"logits/rejected": -3.0028114318847656, |
|
"logps/chosen": -58.11069869995117, |
|
"logps/rejected": -59.29419708251953, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.667187511920929, |
|
"rewards/chosen": -0.04381219670176506, |
|
"rewards/margins": 0.026473551988601685, |
|
"rewards/rejected": -0.07028575241565704, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.9159200551343902, |
|
"grad_norm": 1.2775218486785889, |
|
"learning_rate": 2.688658787433157e-10, |
|
"logits/chosen": -3.022888660430908, |
|
"logits/rejected": -3.000300884246826, |
|
"logps/chosen": -60.5079460144043, |
|
"logps/rejected": -60.63434600830078, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -0.04978417605161667, |
|
"rewards/margins": 0.021695107221603394, |
|
"rewards/rejected": -0.07147928327322006, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.9228118538938663, |
|
"grad_norm": 1.2735910415649414, |
|
"learning_rate": 2.266599136058367e-10, |
|
"logits/chosen": -3.0028035640716553, |
|
"logits/rejected": -2.9840023517608643, |
|
"logps/chosen": -59.78889083862305, |
|
"logps/rejected": -59.44769287109375, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.589062511920929, |
|
"rewards/chosen": -0.04813474044203758, |
|
"rewards/margins": 0.015620408579707146, |
|
"rewards/rejected": -0.06375513970851898, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.9297036526533424, |
|
"grad_norm": 1.376592755317688, |
|
"learning_rate": 1.8804040298009693e-10, |
|
"logits/chosen": -3.0288257598876953, |
|
"logits/rejected": -3.0026650428771973, |
|
"logps/chosen": -59.218475341796875, |
|
"logps/rejected": -58.154075622558594, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -0.04387739300727844, |
|
"rewards/margins": 0.02458575740456581, |
|
"rewards/rejected": -0.06846315413713455, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.9297036526533424, |
|
"eval_logits/chosen": -3.1007766723632812, |
|
"eval_logits/rejected": -3.095076084136963, |
|
"eval_logps/chosen": -60.726348876953125, |
|
"eval_logps/rejected": -66.07279968261719, |
|
"eval_loss": 0.6890966892242432, |
|
"eval_rewards/accuracies": 0.5841078162193298, |
|
"eval_rewards/chosen": -0.020144494250416756, |
|
"eval_rewards/margins": 0.008782317861914635, |
|
"eval_rewards/rejected": -0.02892681024968624, |
|
"eval_runtime": 383.6267, |
|
"eval_samples_per_second": 11.219, |
|
"eval_steps_per_second": 1.402, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.9365954514128187, |
|
"grad_norm": 1.2682085037231445, |
|
"learning_rate": 1.5301293784081847e-10, |
|
"logits/chosen": -2.9736599922180176, |
|
"logits/rejected": -2.9589531421661377, |
|
"logps/chosen": -58.26537322998047, |
|
"logps/rejected": -60.63109588623047, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -0.0504550039768219, |
|
"rewards/margins": 0.019359614700078964, |
|
"rewards/rejected": -0.06981462240219116, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.943487250172295, |
|
"grad_norm": 1.3405542373657227, |
|
"learning_rate": 1.2158258913967102e-10, |
|
"logits/chosen": -3.0063540935516357, |
|
"logits/rejected": -2.9755642414093018, |
|
"logps/chosen": -60.50700759887695, |
|
"logps/rejected": -58.998687744140625, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -0.04506916552782059, |
|
"rewards/margins": 0.023324180394411087, |
|
"rewards/rejected": -0.06839334219694138, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.9503790489317712, |
|
"grad_norm": 1.3689327239990234, |
|
"learning_rate": 9.37539070711646e-11, |
|
"logits/chosen": -3.0321671962738037, |
|
"logits/rejected": -3.012648820877075, |
|
"logps/chosen": -60.28644943237305, |
|
"logps/rejected": -60.640167236328125, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.6109374761581421, |
|
"rewards/chosen": -0.042195506393909454, |
|
"rewards/margins": 0.02599485218524933, |
|
"rewards/rejected": -0.06819035857915878, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.9572708476912473, |
|
"grad_norm": 1.3046759366989136, |
|
"learning_rate": 6.953092041389607e-11, |
|
"logits/chosen": -3.014383554458618, |
|
"logits/rejected": -2.9899418354034424, |
|
"logps/chosen": -59.1005859375, |
|
"logps/rejected": -59.03815460205078, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -0.04825712740421295, |
|
"rewards/margins": 0.023372991010546684, |
|
"rewards/rejected": -0.07163011282682419, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.9641626464507236, |
|
"grad_norm": 1.3823450803756714, |
|
"learning_rate": 4.891713594731006e-11, |
|
"logits/chosen": -3.0164265632629395, |
|
"logits/rejected": -2.993161678314209, |
|
"logps/chosen": -58.6377067565918, |
|
"logps/rejected": -59.821807861328125, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04551283270120621, |
|
"rewards/margins": 0.021898990496993065, |
|
"rewards/rejected": -0.06741182506084442, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.9710544452102, |
|
"grad_norm": 1.2972795963287354, |
|
"learning_rate": 3.191553794401336e-11, |
|
"logits/chosen": -3.0041041374206543, |
|
"logits/rejected": -2.9793601036071777, |
|
"logps/chosen": -58.8316535949707, |
|
"logps/rejected": -59.15874481201172, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.5921875238418579, |
|
"rewards/chosen": -0.04608858376741409, |
|
"rewards/margins": 0.018982943147420883, |
|
"rewards/rejected": -0.06507153064012527, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.9779462439696762, |
|
"grad_norm": 1.4782917499542236, |
|
"learning_rate": 1.8528587737753898e-11, |
|
"logits/chosen": -3.00868558883667, |
|
"logits/rejected": -2.981982707977295, |
|
"logps/chosen": -59.979164123535156, |
|
"logps/rejected": -59.065032958984375, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.04683419317007065, |
|
"rewards/margins": 0.025204036384820938, |
|
"rewards/rejected": -0.07203822582960129, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.9848380427291523, |
|
"grad_norm": 1.387197494506836, |
|
"learning_rate": 8.758223367075212e-12, |
|
"logits/chosen": -3.0078389644622803, |
|
"logits/rejected": -2.978468656539917, |
|
"logps/chosen": -59.639747619628906, |
|
"logps/rejected": -58.05632781982422, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.6265624761581421, |
|
"rewards/chosen": -0.04575073719024658, |
|
"rewards/margins": 0.021932676434516907, |
|
"rewards/rejected": -0.06768341362476349, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.9917298414886284, |
|
"grad_norm": 1.3310401439666748, |
|
"learning_rate": 2.605859294749213e-12, |
|
"logits/chosen": -3.018655300140381, |
|
"logits/rejected": -2.994286298751831, |
|
"logps/chosen": -57.234657287597656, |
|
"logps/rejected": -59.07853317260742, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.046042539179325104, |
|
"rewards/margins": 0.022625811398029327, |
|
"rewards/rejected": -0.06866835057735443, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.9986216402481047, |
|
"grad_norm": 1.3212953805923462, |
|
"learning_rate": 7.2386203012198e-14, |
|
"logits/chosen": -3.0002474784851074, |
|
"logits/rejected": -2.9772603511810303, |
|
"logps/chosen": -59.88157272338867, |
|
"logps/rejected": -60.886680603027344, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.614062488079071, |
|
"rewards/chosen": -0.04801579564809799, |
|
"rewards/margins": 0.0204045120626688, |
|
"rewards/rejected": -0.06842031329870224, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.9986216402481047, |
|
"eval_logits/chosen": -3.100578546524048, |
|
"eval_logits/rejected": -3.094856023788452, |
|
"eval_logps/chosen": -60.72254180908203, |
|
"eval_logps/rejected": -66.06378173828125, |
|
"eval_loss": 0.6891194581985474, |
|
"eval_rewards/accuracies": 0.5910780429840088, |
|
"eval_rewards/chosen": -0.020106395706534386, |
|
"eval_rewards/margins": 0.008730227127671242, |
|
"eval_rewards/rejected": -0.028836622834205627, |
|
"eval_runtime": 383.6501, |
|
"eval_samples_per_second": 11.219, |
|
"eval_steps_per_second": 1.402, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 2902, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6870454205553093, |
|
"train_runtime": 56536.4846, |
|
"train_samples_per_second": 3.285, |
|
"train_steps_per_second": 0.051 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2902, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|