|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 91.5, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": 88.18099975585938, |
|
"logits/rejected": 88.25153350830078, |
|
"logps/chosen": -29.073104858398438, |
|
"logps/rejected": -26.25731658935547, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 76.5, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": 81.08255004882812, |
|
"logits/rejected": 80.78926086425781, |
|
"logps/chosen": -34.20470428466797, |
|
"logps/rejected": -33.038047790527344, |
|
"loss": 0.9368, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.02591484785079956, |
|
"rewards/margins": 0.0816662609577179, |
|
"rewards/rejected": -0.055751409381628036, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 60.25, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": 80.67174530029297, |
|
"logits/rejected": 80.55998229980469, |
|
"logps/chosen": -33.60923767089844, |
|
"logps/rejected": -30.828128814697266, |
|
"loss": 0.9439, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.05647152662277222, |
|
"rewards/margins": 0.09191782772541046, |
|
"rewards/rejected": -0.035446297377347946, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 67.5, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": 82.49557495117188, |
|
"logits/rejected": 82.5277099609375, |
|
"logps/chosen": -33.90664291381836, |
|
"logps/rejected": -31.188213348388672, |
|
"loss": 1.0985, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.07432325184345245, |
|
"rewards/margins": -0.0906088799238205, |
|
"rewards/rejected": 0.16493213176727295, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 75.5, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": 81.06272888183594, |
|
"logits/rejected": 81.05645751953125, |
|
"logps/chosen": -32.722740173339844, |
|
"logps/rejected": -33.15789031982422, |
|
"loss": 0.8862, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.3159271776676178, |
|
"rewards/margins": 0.1684812754392624, |
|
"rewards/rejected": 0.1474459171295166, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 48.75, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": 78.66886901855469, |
|
"logits/rejected": 78.68830871582031, |
|
"logps/chosen": -30.57431411743164, |
|
"logps/rejected": -30.798864364624023, |
|
"loss": 0.9351, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.44165611267089844, |
|
"rewards/margins": 0.24956238269805908, |
|
"rewards/rejected": 0.19209368526935577, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 72.5, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": 83.24879455566406, |
|
"logits/rejected": 83.29969787597656, |
|
"logps/chosen": -30.918521881103516, |
|
"logps/rejected": -29.482006072998047, |
|
"loss": 1.0359, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.17964980006217957, |
|
"rewards/margins": 0.1036890521645546, |
|
"rewards/rejected": 0.07596075534820557, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 87.0, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": 83.91014099121094, |
|
"logits/rejected": 83.93647766113281, |
|
"logps/chosen": -30.502422332763672, |
|
"logps/rejected": -33.12609100341797, |
|
"loss": 0.9567, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.08862228691577911, |
|
"rewards/margins": 0.15049318969249725, |
|
"rewards/rejected": -0.06187089532613754, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 74.0, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": 81.4886245727539, |
|
"logits/rejected": 81.47552490234375, |
|
"logps/chosen": -31.330123901367188, |
|
"logps/rejected": -31.121978759765625, |
|
"loss": 0.814, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.12237221002578735, |
|
"rewards/margins": 0.3774043917655945, |
|
"rewards/rejected": -0.25503218173980713, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 83.0, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": 78.25160217285156, |
|
"logits/rejected": 78.21989440917969, |
|
"logps/chosen": -32.49519729614258, |
|
"logps/rejected": -31.26288414001465, |
|
"loss": 0.8564, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.10008607059717178, |
|
"rewards/margins": 0.31807559728622437, |
|
"rewards/rejected": -0.21798951923847198, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 82.5, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": 83.49636840820312, |
|
"logits/rejected": 83.53155517578125, |
|
"logps/chosen": -34.187034606933594, |
|
"logps/rejected": -31.907695770263672, |
|
"loss": 0.9161, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0825684443116188, |
|
"rewards/margins": 0.19257497787475586, |
|
"rewards/rejected": -0.11000655591487885, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 98.7728500366211, |
|
"eval_logits/rejected": 98.76228332519531, |
|
"eval_logps/chosen": -32.4925651550293, |
|
"eval_logps/rejected": -36.00117492675781, |
|
"eval_loss": 1.0662287473678589, |
|
"eval_rewards/accuracies": 0.5128737688064575, |
|
"eval_rewards/chosen": -0.03457321599125862, |
|
"eval_rewards/margins": -0.010320436209440231, |
|
"eval_rewards/rejected": -0.02425277978181839, |
|
"eval_runtime": 104.2438, |
|
"eval_samples_per_second": 3.29, |
|
"eval_steps_per_second": 0.412, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 91.5, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": 83.7912368774414, |
|
"logits/rejected": 83.67459106445312, |
|
"logps/chosen": -32.45995330810547, |
|
"logps/rejected": -32.786006927490234, |
|
"loss": 0.8008, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.39399290084838867, |
|
"rewards/margins": 0.4850761294364929, |
|
"rewards/rejected": -0.09108323603868484, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 95.0, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": 84.00267028808594, |
|
"logits/rejected": 84.11933898925781, |
|
"logps/chosen": -28.27166175842285, |
|
"logps/rejected": -35.5056037902832, |
|
"loss": 0.7011, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.4605434536933899, |
|
"rewards/margins": 0.4930640757083893, |
|
"rewards/rejected": -0.03252064064145088, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 64.5, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": 81.23250579833984, |
|
"logits/rejected": 81.24813842773438, |
|
"logps/chosen": -30.38728904724121, |
|
"logps/rejected": -32.12664031982422, |
|
"loss": 0.7344, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.3640448749065399, |
|
"rewards/margins": 0.5005542039871216, |
|
"rewards/rejected": -0.13650932908058167, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 59.75, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": 82.44822692871094, |
|
"logits/rejected": 82.46932220458984, |
|
"logps/chosen": -27.172740936279297, |
|
"logps/rejected": -33.0168571472168, |
|
"loss": 0.7509, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.19220882654190063, |
|
"rewards/margins": 0.6076450347900391, |
|
"rewards/rejected": -0.4154362082481384, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 54.0, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": 80.97503662109375, |
|
"logits/rejected": 80.94606018066406, |
|
"logps/chosen": -29.037616729736328, |
|
"logps/rejected": -33.28493118286133, |
|
"loss": 0.6493, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.2460467517375946, |
|
"rewards/margins": 0.7489484548568726, |
|
"rewards/rejected": -0.5029016733169556, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 63.0, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": 82.87214660644531, |
|
"logits/rejected": 82.87894439697266, |
|
"logps/chosen": -33.497344970703125, |
|
"logps/rejected": -30.375295639038086, |
|
"loss": 0.7389, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.4475820064544678, |
|
"rewards/margins": 0.7435113191604614, |
|
"rewards/rejected": -0.2959292531013489, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 63.25, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": 83.60514831542969, |
|
"logits/rejected": 83.55717468261719, |
|
"logps/chosen": -30.812519073486328, |
|
"logps/rejected": -32.62251281738281, |
|
"loss": 0.7275, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.31273749470710754, |
|
"rewards/margins": 0.7344537377357483, |
|
"rewards/rejected": -0.42171627283096313, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 54.25, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": 81.30427551269531, |
|
"logits/rejected": 81.28076171875, |
|
"logps/chosen": -30.593530654907227, |
|
"logps/rejected": -31.66329574584961, |
|
"loss": 0.58, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.4225357472896576, |
|
"rewards/margins": 0.7767966985702515, |
|
"rewards/rejected": -0.3542609214782715, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 37.5, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": 82.88330078125, |
|
"logits/rejected": 82.88325500488281, |
|
"logps/chosen": -30.3848934173584, |
|
"logps/rejected": -30.6480770111084, |
|
"loss": 0.8789, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2670655846595764, |
|
"rewards/margins": 0.39310184121131897, |
|
"rewards/rejected": -0.12603625655174255, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 56.25, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": 78.49826049804688, |
|
"logits/rejected": 78.43463134765625, |
|
"logps/chosen": -33.972293853759766, |
|
"logps/rejected": -32.59092330932617, |
|
"loss": 0.8398, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.5463098287582397, |
|
"rewards/margins": 0.6145724058151245, |
|
"rewards/rejected": -0.06826266646385193, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 98.71674346923828, |
|
"eval_logits/rejected": 98.69157409667969, |
|
"eval_logps/chosen": -32.63681411743164, |
|
"eval_logps/rejected": -36.29216003417969, |
|
"eval_loss": 1.0089657306671143, |
|
"eval_rewards/accuracies": 0.5141196250915527, |
|
"eval_rewards/chosen": -0.13554596900939941, |
|
"eval_rewards/margins": 0.09239647537469864, |
|
"eval_rewards/rejected": -0.22794245183467865, |
|
"eval_runtime": 104.0982, |
|
"eval_samples_per_second": 3.295, |
|
"eval_steps_per_second": 0.413, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 73.5, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": 81.18994140625, |
|
"logits/rejected": 81.10676574707031, |
|
"logps/chosen": -33.214935302734375, |
|
"logps/rejected": -35.187870025634766, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.511551022529602, |
|
"rewards/margins": 0.6527955532073975, |
|
"rewards/rejected": -0.14124450087547302, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 53.5, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": 83.3000259399414, |
|
"logits/rejected": 83.38983917236328, |
|
"logps/chosen": -31.05356216430664, |
|
"logps/rejected": -31.100086212158203, |
|
"loss": 0.5573, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.6267757415771484, |
|
"rewards/margins": 0.8865677714347839, |
|
"rewards/rejected": -0.2597920000553131, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 59.5, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": 80.46002197265625, |
|
"logits/rejected": 80.5117416381836, |
|
"logps/chosen": -32.2435417175293, |
|
"logps/rejected": -34.2344970703125, |
|
"loss": 0.7635, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.393540620803833, |
|
"rewards/margins": 0.5491863489151001, |
|
"rewards/rejected": -0.15564575791358948, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 72.0, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": 82.81127166748047, |
|
"logits/rejected": 83.10184478759766, |
|
"logps/chosen": -30.6541748046875, |
|
"logps/rejected": -31.797021865844727, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.7237205505371094, |
|
"rewards/margins": 0.9092057347297668, |
|
"rewards/rejected": -0.18548506498336792, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 76.5, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": 81.55049133300781, |
|
"logits/rejected": 81.6218032836914, |
|
"logps/chosen": -26.869796752929688, |
|
"logps/rejected": -30.16421890258789, |
|
"loss": 0.7649, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.5082693696022034, |
|
"rewards/margins": 0.6927058696746826, |
|
"rewards/rejected": -0.18443644046783447, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 52.5, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": 78.84456634521484, |
|
"logits/rejected": 78.9933090209961, |
|
"logps/chosen": -30.304040908813477, |
|
"logps/rejected": -36.30915069580078, |
|
"loss": 0.4765, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.8798693418502808, |
|
"rewards/margins": 1.0251331329345703, |
|
"rewards/rejected": -0.1452637016773224, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 57.0, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": 78.18994903564453, |
|
"logits/rejected": 78.21639251708984, |
|
"logps/chosen": -30.825641632080078, |
|
"logps/rejected": -31.800342559814453, |
|
"loss": 0.6623, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.6672974228858948, |
|
"rewards/margins": 0.8737428784370422, |
|
"rewards/rejected": -0.20644548535346985, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 76.0, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": 80.78196716308594, |
|
"logits/rejected": 80.56169128417969, |
|
"logps/chosen": -30.946773529052734, |
|
"logps/rejected": -29.854522705078125, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.6041832566261292, |
|
"rewards/margins": 0.7354522943496704, |
|
"rewards/rejected": -0.13126906752586365, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 60.25, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": 80.99944305419922, |
|
"logits/rejected": 80.91734313964844, |
|
"logps/chosen": -32.94629669189453, |
|
"logps/rejected": -32.49176025390625, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.8105659484863281, |
|
"rewards/margins": 1.1429924964904785, |
|
"rewards/rejected": -0.3324265480041504, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 72.5, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": 76.61840057373047, |
|
"logits/rejected": 76.69322204589844, |
|
"logps/chosen": -32.09418487548828, |
|
"logps/rejected": -29.156299591064453, |
|
"loss": 0.733, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.9180667996406555, |
|
"rewards/margins": 0.9560586214065552, |
|
"rewards/rejected": -0.03799174353480339, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": 98.73394012451172, |
|
"eval_logits/rejected": 98.70979309082031, |
|
"eval_logps/chosen": -32.53404235839844, |
|
"eval_logps/rejected": -36.16787338256836, |
|
"eval_loss": 1.0123846530914307, |
|
"eval_rewards/accuracies": 0.5398671627044678, |
|
"eval_rewards/chosen": -0.06360965222120285, |
|
"eval_rewards/margins": 0.07733342051506042, |
|
"eval_rewards/rejected": -0.14094306528568268, |
|
"eval_runtime": 104.0616, |
|
"eval_samples_per_second": 3.296, |
|
"eval_steps_per_second": 0.413, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 63.0, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": 83.6187515258789, |
|
"logits/rejected": 83.65609741210938, |
|
"logps/chosen": -30.124670028686523, |
|
"logps/rejected": -32.455326080322266, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.5172038078308105, |
|
"rewards/margins": 0.6960991024971008, |
|
"rewards/rejected": -0.1788952797651291, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 53.5, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": 81.21861267089844, |
|
"logits/rejected": 81.21974182128906, |
|
"logps/chosen": -30.413522720336914, |
|
"logps/rejected": -29.050277709960938, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.8295270204544067, |
|
"rewards/margins": 0.921076774597168, |
|
"rewards/rejected": -0.09154972434043884, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 42.5, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": 78.42388153076172, |
|
"logits/rejected": 78.47276306152344, |
|
"logps/chosen": -28.912973403930664, |
|
"logps/rejected": -32.85163116455078, |
|
"loss": 0.5118, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.007744550704956, |
|
"rewards/margins": 1.1594822406768799, |
|
"rewards/rejected": -0.151737779378891, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 78.0, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": 82.58137512207031, |
|
"logits/rejected": 82.63166046142578, |
|
"logps/chosen": -32.287044525146484, |
|
"logps/rejected": -33.664268493652344, |
|
"loss": 0.7276, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.6475721597671509, |
|
"rewards/margins": 0.8544757962226868, |
|
"rewards/rejected": -0.2069036066532135, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 56.75, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": 81.68583679199219, |
|
"logits/rejected": 81.69776916503906, |
|
"logps/chosen": -32.62708282470703, |
|
"logps/rejected": -33.35443115234375, |
|
"loss": 0.626, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.7116604447364807, |
|
"rewards/margins": 0.8955025672912598, |
|
"rewards/rejected": -0.18384216725826263, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 45.5, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": 83.15383911132812, |
|
"logits/rejected": 83.18663024902344, |
|
"logps/chosen": -28.314571380615234, |
|
"logps/rejected": -31.6760196685791, |
|
"loss": 0.6231, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.8663473129272461, |
|
"rewards/margins": 0.8846683502197266, |
|
"rewards/rejected": -0.01832098886370659, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 76.0, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": 82.54973602294922, |
|
"logits/rejected": 82.57438659667969, |
|
"logps/chosen": -31.776432037353516, |
|
"logps/rejected": -35.274253845214844, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.7698130011558533, |
|
"rewards/margins": 0.8852267265319824, |
|
"rewards/rejected": -0.11541371047496796, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 72.0, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": 76.53568267822266, |
|
"logits/rejected": 76.40200805664062, |
|
"logps/chosen": -29.737590789794922, |
|
"logps/rejected": -28.21639060974121, |
|
"loss": 0.7558, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.5869752168655396, |
|
"rewards/margins": 0.6260467767715454, |
|
"rewards/rejected": -0.03907149285078049, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.7429099231571347, |
|
"train_runtime": 2555.3187, |
|
"train_samples_per_second": 1.205, |
|
"train_steps_per_second": 0.151 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|