|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 39.25, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": 88.18099975585938, |
|
"logits/rejected": 88.25153350830078, |
|
"logps/chosen": -29.073104858398438, |
|
"logps/rejected": -26.25731658935547, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 37.75, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": 81.07136535644531, |
|
"logits/rejected": 80.77804565429688, |
|
"logps/chosen": -34.25458526611328, |
|
"logps/rejected": -33.03440475463867, |
|
"loss": 0.699, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": -0.007714875973761082, |
|
"rewards/margins": 0.03788409009575844, |
|
"rewards/rejected": -0.045598965138196945, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 26.25, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": 80.65422058105469, |
|
"logits/rejected": 80.54401397705078, |
|
"logps/chosen": -33.63849639892578, |
|
"logps/rejected": -30.794116973876953, |
|
"loss": 0.708, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.030845394358038902, |
|
"rewards/margins": 0.04082341492176056, |
|
"rewards/rejected": -0.009978031739592552, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 38.25, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": 82.5073013305664, |
|
"logits/rejected": 82.5381088256836, |
|
"logps/chosen": -33.88646697998047, |
|
"logps/rejected": -31.181421279907227, |
|
"loss": 0.7746, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.07581041753292084, |
|
"rewards/margins": -0.06963472068309784, |
|
"rewards/rejected": 0.14544512331485748, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 31.625, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": 81.06532287597656, |
|
"logits/rejected": 81.06108093261719, |
|
"logps/chosen": -32.81906509399414, |
|
"logps/rejected": -33.26140594482422, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.21299926936626434, |
|
"rewards/margins": 0.14872975647449493, |
|
"rewards/rejected": 0.0642695277929306, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 23.0, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": 78.69737243652344, |
|
"logits/rejected": 78.7103500366211, |
|
"logps/chosen": -30.65850257873535, |
|
"logps/rejected": -30.81766128540039, |
|
"loss": 0.6962, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.3280490040779114, |
|
"rewards/margins": 0.17467446625232697, |
|
"rewards/rejected": 0.1533745527267456, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 31.625, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": 83.20633697509766, |
|
"logits/rejected": 83.25883483886719, |
|
"logps/chosen": -30.961681365966797, |
|
"logps/rejected": -29.538171768188477, |
|
"loss": 0.703, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.12808682024478912, |
|
"rewards/margins": 0.09667714685201645, |
|
"rewards/rejected": 0.03140967711806297, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 53.25, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": 83.81951141357422, |
|
"logits/rejected": 83.84638977050781, |
|
"logps/chosen": -30.67291259765625, |
|
"logps/rejected": -33.11872482299805, |
|
"loss": 0.755, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.026334354653954506, |
|
"rewards/margins": 0.02227923832833767, |
|
"rewards/rejected": -0.04861358925700188, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 31.75, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": 81.4664306640625, |
|
"logits/rejected": 81.44920349121094, |
|
"logps/chosen": -31.316049575805664, |
|
"logps/rejected": -31.0085391998291, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.11333731561899185, |
|
"rewards/margins": 0.2638704478740692, |
|
"rewards/rejected": -0.15053315460681915, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 37.0, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": 78.19766998291016, |
|
"logits/rejected": 78.16535186767578, |
|
"logps/chosen": -32.48051071166992, |
|
"logps/rejected": -31.223648071289062, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.09460089355707169, |
|
"rewards/margins": 0.2579067349433899, |
|
"rewards/rejected": -0.1633058488368988, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 31.5, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": 83.43191528320312, |
|
"logits/rejected": 83.45047760009766, |
|
"logps/chosen": -34.02558135986328, |
|
"logps/rejected": -31.787883758544922, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.16764816641807556, |
|
"rewards/margins": 0.1900513470172882, |
|
"rewards/rejected": -0.022403212264180183, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 98.71414947509766, |
|
"eval_logits/rejected": 98.70475769042969, |
|
"eval_logps/chosen": -32.44282531738281, |
|
"eval_logps/rejected": -36.040138244628906, |
|
"eval_loss": 0.7398820519447327, |
|
"eval_rewards/accuracies": 0.5245016813278198, |
|
"eval_rewards/chosen": 0.00021068855130579323, |
|
"eval_rewards/margins": 0.04437926039099693, |
|
"eval_rewards/rejected": -0.04416857287287712, |
|
"eval_runtime": 104.2075, |
|
"eval_samples_per_second": 3.292, |
|
"eval_steps_per_second": 0.413, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 40.25, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": 83.59847259521484, |
|
"logits/rejected": 83.49092102050781, |
|
"logps/chosen": -32.43052673339844, |
|
"logps/rejected": -32.78325271606445, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.3553674817085266, |
|
"rewards/margins": 0.43178611993789673, |
|
"rewards/rejected": -0.07641863822937012, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 46.5, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": 83.7637710571289, |
|
"logits/rejected": 83.87000274658203, |
|
"logps/chosen": -28.259990692138672, |
|
"logps/rejected": -35.35393524169922, |
|
"loss": 0.6375, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.40175461769104004, |
|
"rewards/margins": 0.33862805366516113, |
|
"rewards/rejected": 0.06312654912471771, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 24.875, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": 80.89453125, |
|
"logits/rejected": 80.9158706665039, |
|
"logps/chosen": -30.432043075561523, |
|
"logps/rejected": -32.080535888671875, |
|
"loss": 0.6294, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.2851874530315399, |
|
"rewards/margins": 0.3745357096195221, |
|
"rewards/rejected": -0.08934825658798218, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 25.5, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": 82.0683822631836, |
|
"logits/rejected": 82.07270812988281, |
|
"logps/chosen": -27.02596092224121, |
|
"logps/rejected": -33.121150970458984, |
|
"loss": 0.5365, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.2528177499771118, |
|
"rewards/margins": 0.6714814305305481, |
|
"rewards/rejected": -0.4186636805534363, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 25.375, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": 80.59815979003906, |
|
"logits/rejected": 80.57023620605469, |
|
"logps/chosen": -28.871845245361328, |
|
"logps/rejected": -33.09119415283203, |
|
"loss": 0.5456, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.31036004424095154, |
|
"rewards/margins": 0.6251744627952576, |
|
"rewards/rejected": -0.3148145079612732, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 44.25, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": 82.46113586425781, |
|
"logits/rejected": 82.46646118164062, |
|
"logps/chosen": -33.629737854003906, |
|
"logps/rejected": -30.432525634765625, |
|
"loss": 0.6101, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.30420786142349243, |
|
"rewards/margins": 0.5921996235847473, |
|
"rewards/rejected": -0.2879917025566101, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 33.0, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": 83.26214599609375, |
|
"logits/rejected": 83.21092224121094, |
|
"logps/chosen": -30.77018165588379, |
|
"logps/rejected": -32.57013702392578, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.2934645712375641, |
|
"rewards/margins": 0.6235076189041138, |
|
"rewards/rejected": -0.33004307746887207, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 27.125, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": 80.83445739746094, |
|
"logits/rejected": 80.81375885009766, |
|
"logps/chosen": -30.401935577392578, |
|
"logps/rejected": -31.623117446899414, |
|
"loss": 0.5116, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.4771292805671692, |
|
"rewards/margins": 0.7566738724708557, |
|
"rewards/rejected": -0.2795446515083313, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 14.0, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": 82.55574035644531, |
|
"logits/rejected": 82.5384521484375, |
|
"logps/chosen": -30.206974029541016, |
|
"logps/rejected": -30.71441078186035, |
|
"loss": 0.6219, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.3356670141220093, |
|
"rewards/margins": 0.4834977686405182, |
|
"rewards/rejected": -0.14783072471618652, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 15.375, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": 78.06065368652344, |
|
"logits/rejected": 78.00971984863281, |
|
"logps/chosen": -33.789581298828125, |
|
"logps/rejected": -32.68096923828125, |
|
"loss": 0.6115, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.577893853187561, |
|
"rewards/margins": 0.690432071685791, |
|
"rewards/rejected": -0.11253812164068222, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 98.62848663330078, |
|
"eval_logits/rejected": 98.60428619384766, |
|
"eval_logps/chosen": -32.65570068359375, |
|
"eval_logps/rejected": -36.321441650390625, |
|
"eval_loss": 0.7252821922302246, |
|
"eval_rewards/accuracies": 0.530315637588501, |
|
"eval_rewards/chosen": -0.12751542031764984, |
|
"eval_rewards/margins": 0.08543363958597183, |
|
"eval_rewards/rejected": -0.2129490226507187, |
|
"eval_runtime": 103.8957, |
|
"eval_samples_per_second": 3.301, |
|
"eval_steps_per_second": 0.414, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 52.0, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": 80.63914489746094, |
|
"logits/rejected": 80.54652404785156, |
|
"logps/chosen": -33.34014129638672, |
|
"logps/rejected": -35.32052230834961, |
|
"loss": 0.5935, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.3633476793766022, |
|
"rewards/margins": 0.5640031099319458, |
|
"rewards/rejected": -0.20065537095069885, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 19.625, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": 82.76437377929688, |
|
"logits/rejected": 82.84717559814453, |
|
"logps/chosen": -31.025707244873047, |
|
"logps/rejected": -31.30951499938965, |
|
"loss": 0.5027, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.553946852684021, |
|
"rewards/margins": 0.9022806286811829, |
|
"rewards/rejected": -0.3483339250087738, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 32.75, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": 79.89958190917969, |
|
"logits/rejected": 79.95211791992188, |
|
"logps/chosen": -32.34553146362305, |
|
"logps/rejected": -34.391754150390625, |
|
"loss": 0.6272, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2761251628398895, |
|
"rewards/margins": 0.5038853287696838, |
|
"rewards/rejected": -0.2277601659297943, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 35.5, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": 82.25331115722656, |
|
"logits/rejected": 82.53690338134766, |
|
"logps/chosen": -30.6766357421875, |
|
"logps/rejected": -31.96030044555664, |
|
"loss": 0.4539, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.6068586111068726, |
|
"rewards/margins": 0.8638145327568054, |
|
"rewards/rejected": -0.25695592164993286, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 30.5, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": 80.93089294433594, |
|
"logits/rejected": 80.99276733398438, |
|
"logps/chosen": -27.04372787475586, |
|
"logps/rejected": -30.084264755249023, |
|
"loss": 0.6593, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.3313008248806, |
|
"rewards/margins": 0.441417396068573, |
|
"rewards/rejected": -0.11011654138565063, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 29.125, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": 78.20941162109375, |
|
"logits/rejected": 78.33964538574219, |
|
"logps/chosen": -30.433767318725586, |
|
"logps/rejected": -36.57436752319336, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.6763362884521484, |
|
"rewards/margins": 0.9599828720092773, |
|
"rewards/rejected": -0.28364673256874084, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 21.5, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": 77.5750503540039, |
|
"logits/rejected": 77.60489654541016, |
|
"logps/chosen": -30.800561904907227, |
|
"logps/rejected": -31.87221908569336, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.5870199203491211, |
|
"rewards/margins": 0.807098388671875, |
|
"rewards/rejected": -0.2200784683227539, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 33.25, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": 80.28849029541016, |
|
"logits/rejected": 80.06718444824219, |
|
"logps/chosen": -31.078380584716797, |
|
"logps/rejected": -29.8966007232666, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4389079213142395, |
|
"rewards/margins": 0.5766692757606506, |
|
"rewards/rejected": -0.13776138424873352, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 17.75, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": 80.41847229003906, |
|
"logits/rejected": 80.33303833007812, |
|
"logps/chosen": -32.99018478393555, |
|
"logps/rejected": -32.6365966796875, |
|
"loss": 0.4458, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.6684367656707764, |
|
"rewards/margins": 1.0402761697769165, |
|
"rewards/rejected": -0.37183937430381775, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 34.25, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": 76.02632141113281, |
|
"logits/rejected": 76.11949920654297, |
|
"logps/chosen": -32.25402069091797, |
|
"logps/rejected": -29.283954620361328, |
|
"loss": 0.5545, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.6910130381584167, |
|
"rewards/margins": 0.8001711964607239, |
|
"rewards/rejected": -0.10915807634592056, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": 98.64820861816406, |
|
"eval_logits/rejected": 98.62397003173828, |
|
"eval_logps/chosen": -32.62586212158203, |
|
"eval_logps/rejected": -36.32143783569336, |
|
"eval_loss": 0.7248644828796387, |
|
"eval_rewards/accuracies": 0.5282392501831055, |
|
"eval_rewards/chosen": -0.10961288958787918, |
|
"eval_rewards/margins": 0.10333485901355743, |
|
"eval_rewards/rejected": -0.2129477560520172, |
|
"eval_runtime": 104.0116, |
|
"eval_samples_per_second": 3.298, |
|
"eval_steps_per_second": 0.413, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 27.625, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": 83.13574981689453, |
|
"logits/rejected": 83.16615295410156, |
|
"logps/chosen": -29.959243774414062, |
|
"logps/rejected": -32.55767059326172, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.542574405670166, |
|
"rewards/margins": 0.7573197484016418, |
|
"rewards/rejected": -0.21474528312683105, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 21.625, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": 80.65809631347656, |
|
"logits/rejected": 80.65727233886719, |
|
"logps/chosen": -30.505443572998047, |
|
"logps/rejected": -29.11099624633789, |
|
"loss": 0.5087, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.6558700799942017, |
|
"rewards/margins": 0.7707726359367371, |
|
"rewards/rejected": -0.11490253359079361, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 19.25, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": 77.81159210205078, |
|
"logits/rejected": 77.86034393310547, |
|
"logps/chosen": -29.130138397216797, |
|
"logps/rejected": -33.010986328125, |
|
"loss": 0.4483, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.7334806323051453, |
|
"rewards/margins": 0.9591558575630188, |
|
"rewards/rejected": -0.22567513585090637, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 41.75, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": 82.1180648803711, |
|
"logits/rejected": 82.14155578613281, |
|
"logps/chosen": -32.119606018066406, |
|
"logps/rejected": -33.77212905883789, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.6555252075195312, |
|
"rewards/margins": 0.8975871801376343, |
|
"rewards/rejected": -0.24206197261810303, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 17.25, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": 81.12958526611328, |
|
"logits/rejected": 81.1399154663086, |
|
"logps/chosen": -32.4399299621582, |
|
"logps/rejected": -33.30702590942383, |
|
"loss": 0.5483, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.7222862839698792, |
|
"rewards/margins": 0.8514213562011719, |
|
"rewards/rejected": -0.12913502752780914, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 24.875, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": 82.61198425292969, |
|
"logits/rejected": 82.64558410644531, |
|
"logps/chosen": -28.419490814208984, |
|
"logps/rejected": -31.76764488220215, |
|
"loss": 0.5254, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.6796320080757141, |
|
"rewards/margins": 0.7503107786178589, |
|
"rewards/rejected": -0.07067875564098358, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 30.25, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": 82.08049774169922, |
|
"logits/rejected": 82.0997543334961, |
|
"logps/chosen": -31.871307373046875, |
|
"logps/rejected": -35.636024475097656, |
|
"loss": 0.5176, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.6029146909713745, |
|
"rewards/margins": 0.9189049601554871, |
|
"rewards/rejected": -0.31599029898643494, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 31.875, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": 75.98027038574219, |
|
"logits/rejected": 75.85136413574219, |
|
"logps/chosen": -29.75612449645996, |
|
"logps/rejected": -28.387653350830078, |
|
"loss": 0.5513, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.49200135469436646, |
|
"rewards/margins": 0.6282498836517334, |
|
"rewards/rejected": -0.13624855875968933, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5886486524111265, |
|
"train_runtime": 2556.9439, |
|
"train_samples_per_second": 1.204, |
|
"train_steps_per_second": 0.151 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|