|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 104.5, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": 88.18099975585938, |
|
"logits/rejected": 88.25153350830078, |
|
"logps/chosen": -29.073104858398438, |
|
"logps/rejected": -26.25731658935547, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 78.0, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": 81.08700561523438, |
|
"logits/rejected": 80.79035186767578, |
|
"logps/chosen": -34.29237365722656, |
|
"logps/rejected": -33.04549026489258, |
|
"loss": 0.9866, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": -0.040518708527088165, |
|
"rewards/margins": 0.029149238020181656, |
|
"rewards/rejected": -0.06966794282197952, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 85.5, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": 80.68280029296875, |
|
"logits/rejected": 80.57057189941406, |
|
"logps/chosen": -33.53000259399414, |
|
"logps/rejected": -30.8519287109375, |
|
"loss": 0.8984, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.12792269885540009, |
|
"rewards/margins": 0.18747423589229584, |
|
"rewards/rejected": -0.05955154448747635, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 73.0, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": 82.52570343017578, |
|
"logits/rejected": 82.552734375, |
|
"logps/chosen": -33.96383285522461, |
|
"logps/rejected": -31.202930450439453, |
|
"loss": 1.1955, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.03918968886137009, |
|
"rewards/margins": -0.1375342458486557, |
|
"rewards/rejected": 0.1767239272594452, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 74.5, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": 80.97098541259766, |
|
"logits/rejected": 80.9694595336914, |
|
"logps/chosen": -32.8037109375, |
|
"logps/rejected": -33.232460021972656, |
|
"loss": 0.9309, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2962803840637207, |
|
"rewards/margins": 0.1874258816242218, |
|
"rewards/rejected": 0.10885453224182129, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 70.0, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": 78.56432342529297, |
|
"logits/rejected": 78.58203125, |
|
"logps/chosen": -30.68828773498535, |
|
"logps/rejected": -30.789108276367188, |
|
"loss": 0.9855, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.4135704040527344, |
|
"rewards/margins": 0.18622872233390808, |
|
"rewards/rejected": 0.2273416966199875, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 84.0, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": 83.11712646484375, |
|
"logits/rejected": 83.17086791992188, |
|
"logps/chosen": -30.859710693359375, |
|
"logps/rejected": -29.577678680419922, |
|
"loss": 0.9236, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.25236082077026367, |
|
"rewards/margins": 0.24208447337150574, |
|
"rewards/rejected": 0.010276327840983868, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 107.0, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": 83.80155944824219, |
|
"logits/rejected": 83.8315658569336, |
|
"logps/chosen": -30.606204986572266, |
|
"logps/rejected": -32.91896057128906, |
|
"loss": 1.141, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.01825830340385437, |
|
"rewards/margins": -0.0767388790845871, |
|
"rewards/rejected": 0.09499720484018326, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 84.5, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": 81.45521545410156, |
|
"logits/rejected": 81.44440460205078, |
|
"logps/chosen": -31.48111343383789, |
|
"logps/rejected": -31.072092056274414, |
|
"loss": 0.8995, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.01906520128250122, |
|
"rewards/margins": 0.2706204056739807, |
|
"rewards/rejected": -0.2515551745891571, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 102.0, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": 78.27713775634766, |
|
"logits/rejected": 78.24540710449219, |
|
"logps/chosen": -32.49984359741211, |
|
"logps/rejected": -31.142908096313477, |
|
"loss": 0.9467, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.11066800355911255, |
|
"rewards/margins": 0.2638159692287445, |
|
"rewards/rejected": -0.15314793586730957, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 73.5, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": 83.56034851074219, |
|
"logits/rejected": 83.58055877685547, |
|
"logps/chosen": -34.057029724121094, |
|
"logps/rejected": -31.78998374938965, |
|
"loss": 0.9567, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.19836829602718353, |
|
"rewards/margins": 0.22992074489593506, |
|
"rewards/rejected": -0.031552452594041824, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 98.73486328125, |
|
"eval_logits/rejected": 98.72332763671875, |
|
"eval_logps/chosen": -32.45964431762695, |
|
"eval_logps/rejected": -35.88682556152344, |
|
"eval_loss": 1.1574316024780273, |
|
"eval_rewards/accuracies": 0.4630398452281952, |
|
"eval_rewards/chosen": -0.013175476342439651, |
|
"eval_rewards/margins": -0.07693858444690704, |
|
"eval_rewards/rejected": 0.0637631043791771, |
|
"eval_runtime": 104.0187, |
|
"eval_samples_per_second": 3.297, |
|
"eval_steps_per_second": 0.413, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 104.0, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": 83.91990661621094, |
|
"logits/rejected": 83.79493713378906, |
|
"logps/chosen": -32.23216247558594, |
|
"logps/rejected": -32.69822692871094, |
|
"loss": 0.7793, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6325147151947021, |
|
"rewards/margins": 0.6663830280303955, |
|
"rewards/rejected": -0.03386829048395157, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 108.5, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": 84.13631439208984, |
|
"logits/rejected": 84.24298858642578, |
|
"logps/chosen": -28.422576904296875, |
|
"logps/rejected": -35.379127502441406, |
|
"loss": 0.8738, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.40560561418533325, |
|
"rewards/margins": 0.34159213304519653, |
|
"rewards/rejected": 0.06401350349187851, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 55.5, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": 81.52787017822266, |
|
"logits/rejected": 81.55335998535156, |
|
"logps/chosen": -30.235469818115234, |
|
"logps/rejected": -31.976581573486328, |
|
"loss": 0.8013, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.5375091433525085, |
|
"rewards/margins": 0.5734738707542419, |
|
"rewards/rejected": -0.03596482425928116, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 57.75, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": 82.77738189697266, |
|
"logits/rejected": 82.78349304199219, |
|
"logps/chosen": -27.04022216796875, |
|
"logps/rejected": -32.86003875732422, |
|
"loss": 0.7387, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.32568174600601196, |
|
"rewards/margins": 0.6750079393386841, |
|
"rewards/rejected": -0.34932616353034973, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 63.0, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": 81.38068389892578, |
|
"logits/rejected": 81.35902404785156, |
|
"logps/chosen": -28.90557289123535, |
|
"logps/rejected": -32.89466094970703, |
|
"loss": 0.6488, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.38682785630226135, |
|
"rewards/margins": 0.6493567228317261, |
|
"rewards/rejected": -0.26252883672714233, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 66.0, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": 83.2899169921875, |
|
"logits/rejected": 83.30531311035156, |
|
"logps/chosen": -33.436553955078125, |
|
"logps/rejected": -30.43096351623535, |
|
"loss": 0.6075, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.5601544976234436, |
|
"rewards/margins": 0.9428972005844116, |
|
"rewards/rejected": -0.382742702960968, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 70.5, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": 84.12855529785156, |
|
"logits/rejected": 84.0693359375, |
|
"logps/chosen": -30.63346290588379, |
|
"logps/rejected": -32.29146194458008, |
|
"loss": 0.7575, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.5006591081619263, |
|
"rewards/margins": 0.717779278755188, |
|
"rewards/rejected": -0.21712002158164978, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 65.0, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": 81.81861114501953, |
|
"logits/rejected": 81.79876708984375, |
|
"logps/chosen": -30.488971710205078, |
|
"logps/rejected": -31.519611358642578, |
|
"loss": 0.6265, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5665422677993774, |
|
"rewards/margins": 0.8564668893814087, |
|
"rewards/rejected": -0.289924681186676, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 34.0, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": 83.52326965332031, |
|
"logits/rejected": 83.50785064697266, |
|
"logps/chosen": -30.285634994506836, |
|
"logps/rejected": -30.630422592163086, |
|
"loss": 0.8287, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.38462626934051514, |
|
"rewards/margins": 0.5145466327667236, |
|
"rewards/rejected": -0.12992039322853088, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 50.25, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": 79.0837173461914, |
|
"logits/rejected": 79.03185272216797, |
|
"logps/chosen": -33.81468963623047, |
|
"logps/rejected": -32.4266357421875, |
|
"loss": 0.8098, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.7504364252090454, |
|
"rewards/margins": 0.6970199346542358, |
|
"rewards/rejected": 0.053416453301906586, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 98.86815643310547, |
|
"eval_logits/rejected": 98.84379577636719, |
|
"eval_logps/chosen": -32.561100006103516, |
|
"eval_logps/rejected": -36.16287612915039, |
|
"eval_loss": 1.0544954538345337, |
|
"eval_rewards/accuracies": 0.5278239250183105, |
|
"eval_rewards/chosen": -0.09434036910533905, |
|
"eval_rewards/margins": 0.06273789703845978, |
|
"eval_rewards/rejected": -0.15707828104496002, |
|
"eval_runtime": 103.8402, |
|
"eval_samples_per_second": 3.303, |
|
"eval_steps_per_second": 0.414, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 104.0, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": 81.58848571777344, |
|
"logits/rejected": 81.49668884277344, |
|
"logps/chosen": -33.169471740722656, |
|
"logps/rejected": -34.991939544677734, |
|
"loss": 0.7585, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.620996356010437, |
|
"rewards/margins": 0.6256735324859619, |
|
"rewards/rejected": -0.004677181597799063, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 35.0, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": 83.66175079345703, |
|
"logits/rejected": 83.74690246582031, |
|
"logps/chosen": -31.094005584716797, |
|
"logps/rejected": -31.061717987060547, |
|
"loss": 0.626, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.683959424495697, |
|
"rewards/margins": 0.9501722455024719, |
|
"rewards/rejected": -0.2662127912044525, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 69.5, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": 80.7747802734375, |
|
"logits/rejected": 80.82144165039062, |
|
"logps/chosen": -32.322105407714844, |
|
"logps/rejected": -34.30824661254883, |
|
"loss": 0.7928, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.38691413402557373, |
|
"rewards/margins": 0.6237919926643372, |
|
"rewards/rejected": -0.23687779903411865, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 89.5, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": 83.18328094482422, |
|
"logits/rejected": 83.4581069946289, |
|
"logps/chosen": -30.6195125579834, |
|
"logps/rejected": -31.637020111083984, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.854840099811554, |
|
"rewards/margins": 0.9388192892074585, |
|
"rewards/rejected": -0.08397923409938812, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 84.5, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": 81.9409408569336, |
|
"logits/rejected": 82.00200653076172, |
|
"logps/chosen": -26.908878326416016, |
|
"logps/rejected": -30.06099510192871, |
|
"loss": 0.8063, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5496150255203247, |
|
"rewards/margins": 0.6778196692466736, |
|
"rewards/rejected": -0.12820473313331604, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 60.25, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": 79.24473571777344, |
|
"logits/rejected": 79.37559509277344, |
|
"logps/chosen": -30.283336639404297, |
|
"logps/rejected": -36.4005241394043, |
|
"loss": 0.4689, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.0221258401870728, |
|
"rewards/margins": 1.2612489461898804, |
|
"rewards/rejected": -0.23912319540977478, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 31.875, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": 78.6611328125, |
|
"logits/rejected": 78.69209289550781, |
|
"logps/chosen": -30.992252349853516, |
|
"logps/rejected": -31.770212173461914, |
|
"loss": 0.6471, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.6293355822563171, |
|
"rewards/margins": 0.8411690592765808, |
|
"rewards/rejected": -0.21183356642723083, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 100.0, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": 81.2646255493164, |
|
"logits/rejected": 81.05351257324219, |
|
"logps/chosen": -31.10286521911621, |
|
"logps/rejected": -29.866031646728516, |
|
"loss": 0.7333, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5656233429908752, |
|
"rewards/margins": 0.7248517274856567, |
|
"rewards/rejected": -0.15922844409942627, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 61.25, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": 81.45980072021484, |
|
"logits/rejected": 81.37715148925781, |
|
"logps/chosen": -33.03575134277344, |
|
"logps/rejected": -32.42512512207031, |
|
"loss": 0.6231, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.8547972440719604, |
|
"rewards/margins": 1.1814041137695312, |
|
"rewards/rejected": -0.32660672068595886, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 78.0, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": 77.12594604492188, |
|
"logits/rejected": 77.21607971191406, |
|
"logps/chosen": -32.209590911865234, |
|
"logps/rejected": -29.23952865600586, |
|
"loss": 0.6965, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.9568928480148315, |
|
"rewards/margins": 1.066897988319397, |
|
"rewards/rejected": -0.11000506579875946, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": 98.90528106689453, |
|
"eval_logits/rejected": 98.8800277709961, |
|
"eval_logps/chosen": -32.50230407714844, |
|
"eval_logps/rejected": -36.192344665527344, |
|
"eval_loss": 0.9868877530097961, |
|
"eval_rewards/accuracies": 0.5830564498901367, |
|
"eval_rewards/chosen": -0.047303199768066406, |
|
"eval_rewards/margins": 0.1333501785993576, |
|
"eval_rewards/rejected": -0.180653378367424, |
|
"eval_runtime": 103.7665, |
|
"eval_samples_per_second": 3.305, |
|
"eval_steps_per_second": 0.414, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 59.25, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": 84.09150695800781, |
|
"logits/rejected": 84.11959075927734, |
|
"logps/chosen": -30.127155303955078, |
|
"logps/rejected": -32.29438781738281, |
|
"loss": 0.7446, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5891044735908508, |
|
"rewards/margins": 0.6648039817810059, |
|
"rewards/rejected": -0.07569954544305801, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 60.25, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": 81.67195892333984, |
|
"logits/rejected": 81.67171478271484, |
|
"logps/chosen": -30.566574096679688, |
|
"logps/rejected": -29.08144187927246, |
|
"loss": 0.5518, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.8255898356437683, |
|
"rewards/margins": 0.9551518559455872, |
|
"rewards/rejected": -0.12956194579601288, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 58.0, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": 78.98292541503906, |
|
"logits/rejected": 79.03089904785156, |
|
"logps/chosen": -29.0233211517334, |
|
"logps/rejected": -32.939231872558594, |
|
"loss": 0.5283, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.063429832458496, |
|
"rewards/margins": 1.306926965713501, |
|
"rewards/rejected": -0.24349698424339294, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 88.5, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": 83.09165954589844, |
|
"logits/rejected": 83.11895751953125, |
|
"logps/chosen": -32.228084564208984, |
|
"logps/rejected": -33.656490325927734, |
|
"loss": 0.6498, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.7872547507286072, |
|
"rewards/margins": 1.0174915790557861, |
|
"rewards/rejected": -0.23023685812950134, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 42.5, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": 82.23709869384766, |
|
"logits/rejected": 82.24420928955078, |
|
"logps/chosen": -32.544349670410156, |
|
"logps/rejected": -33.209228515625, |
|
"loss": 0.6454, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.8795096278190613, |
|
"rewards/margins": 0.9734523892402649, |
|
"rewards/rejected": -0.09394274652004242, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 66.0, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": 83.5949935913086, |
|
"logits/rejected": 83.62043762207031, |
|
"logps/chosen": -28.472000122070312, |
|
"logps/rejected": -31.736831665039062, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.8641688227653503, |
|
"rewards/margins": 0.9337539672851562, |
|
"rewards/rejected": -0.0695851519703865, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 78.5, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": 83.08488464355469, |
|
"logits/rejected": 83.10858917236328, |
|
"logps/chosen": -31.81646156311035, |
|
"logps/rejected": -35.26220703125, |
|
"loss": 0.6364, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.847761332988739, |
|
"rewards/margins": 0.9700264930725098, |
|
"rewards/rejected": -0.1222650408744812, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 82.5, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": 77.07919311523438, |
|
"logits/rejected": 76.94573974609375, |
|
"logps/chosen": -29.726633071899414, |
|
"logps/rejected": -28.218725204467773, |
|
"loss": 0.7075, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.6795950531959534, |
|
"rewards/margins": 0.7261168956756592, |
|
"rewards/rejected": -0.046521805226802826, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.7616742146479619, |
|
"train_runtime": 2551.1965, |
|
"train_samples_per_second": 1.207, |
|
"train_steps_per_second": 0.151 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|