|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 26.125, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": 88.18099975585938, |
|
"logits/rejected": 88.25153350830078, |
|
"logps/chosen": -29.073104858398438, |
|
"logps/rejected": -26.25731658935547, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 20.75, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": 81.08618927001953, |
|
"logits/rejected": 80.79019165039062, |
|
"logps/chosen": -34.27778625488281, |
|
"logps/rejected": -33.089111328125, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": -0.014423643238842487, |
|
"rewards/margins": 0.037857502698898315, |
|
"rewards/rejected": -0.05228114500641823, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 23.5, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": 80.65704345703125, |
|
"logits/rejected": 80.54637908935547, |
|
"logps/chosen": -33.64678192138672, |
|
"logps/rejected": -30.826171875, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.017251869663596153, |
|
"rewards/margins": 0.03672502189874649, |
|
"rewards/rejected": -0.019473150372505188, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 23.375, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": 82.53121185302734, |
|
"logits/rejected": 82.56344604492188, |
|
"logps/chosen": -33.85637664794922, |
|
"logps/rejected": -31.18195152282715, |
|
"loss": 0.7305, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.06257696449756622, |
|
"rewards/margins": -0.03417497128248215, |
|
"rewards/rejected": 0.09675192832946777, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 21.625, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": 81.05611419677734, |
|
"logits/rejected": 81.05009460449219, |
|
"logps/chosen": -32.67264175415039, |
|
"logps/rejected": -33.195556640625, |
|
"loss": 0.6623, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.20056962966918945, |
|
"rewards/margins": 0.13138213753700256, |
|
"rewards/rejected": 0.0691874772310257, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 16.5, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": 78.71464538574219, |
|
"logits/rejected": 78.72676086425781, |
|
"logps/chosen": -30.6053466796875, |
|
"logps/rejected": -30.86154556274414, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.23996099829673767, |
|
"rewards/margins": 0.15526394546031952, |
|
"rewards/rejected": 0.08469705283641815, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 20.375, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": 83.24500274658203, |
|
"logits/rejected": 83.30210876464844, |
|
"logps/chosen": -30.880474090576172, |
|
"logps/rejected": -29.403039932250977, |
|
"loss": 0.7121, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.11787523329257965, |
|
"rewards/margins": 0.04288250952959061, |
|
"rewards/rejected": 0.07499273121356964, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 30.75, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": 83.83995056152344, |
|
"logits/rejected": 83.8766098022461, |
|
"logps/chosen": -30.424224853515625, |
|
"logps/rejected": -32.918212890625, |
|
"loss": 0.704, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.08192038536071777, |
|
"rewards/margins": 0.03412293642759323, |
|
"rewards/rejected": 0.04779745265841484, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 20.5, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": 81.50079345703125, |
|
"logits/rejected": 81.48374938964844, |
|
"logps/chosen": -31.288936614990234, |
|
"logps/rejected": -30.928760528564453, |
|
"loss": 0.6548, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.08640192449092865, |
|
"rewards/margins": 0.15484753251075745, |
|
"rewards/rejected": -0.0684456080198288, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 27.375, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": 78.15573120117188, |
|
"logits/rejected": 78.12377166748047, |
|
"logps/chosen": -32.45014190673828, |
|
"logps/rejected": -31.189159393310547, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.07521401345729828, |
|
"rewards/margins": 0.17028900980949402, |
|
"rewards/rejected": -0.09507499635219574, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 21.25, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": 83.35581970214844, |
|
"logits/rejected": 83.3793716430664, |
|
"logps/chosen": -33.93380355834961, |
|
"logps/rejected": -31.7375431060791, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.14847493171691895, |
|
"rewards/margins": 0.14327403903007507, |
|
"rewards/rejected": 0.005200871266424656, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 98.69609832763672, |
|
"eval_logits/rejected": 98.68218231201172, |
|
"eval_logps/chosen": -32.47989273071289, |
|
"eval_logps/rejected": -36.04493713378906, |
|
"eval_loss": 0.7133548259735107, |
|
"eval_rewards/accuracies": 0.5220099687576294, |
|
"eval_rewards/chosen": -0.01468663476407528, |
|
"eval_rewards/margins": 0.01667696051299572, |
|
"eval_rewards/rejected": -0.0313635915517807, |
|
"eval_runtime": 104.2778, |
|
"eval_samples_per_second": 3.289, |
|
"eval_steps_per_second": 0.412, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 27.375, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": 83.54657745361328, |
|
"logits/rejected": 83.43326568603516, |
|
"logps/chosen": -32.3030891418457, |
|
"logps/rejected": -32.73809051513672, |
|
"loss": 0.6007, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.2878861129283905, |
|
"rewards/margins": 0.3207677900791168, |
|
"rewards/rejected": -0.03288168087601662, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 30.625, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": 83.636474609375, |
|
"logits/rejected": 83.75120544433594, |
|
"logps/chosen": -28.302623748779297, |
|
"logps/rejected": -35.44631576538086, |
|
"loss": 0.6186, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.2507820725440979, |
|
"rewards/margins": 0.2456505298614502, |
|
"rewards/rejected": 0.005131528712809086, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 19.375, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": 80.81904602050781, |
|
"logits/rejected": 80.84658813476562, |
|
"logps/chosen": -30.323780059814453, |
|
"logps/rejected": -32.11851119995117, |
|
"loss": 0.6101, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2334306687116623, |
|
"rewards/margins": 0.3081851899623871, |
|
"rewards/rejected": -0.07475452125072479, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 16.375, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": 81.91621398925781, |
|
"logits/rejected": 81.9150161743164, |
|
"logps/chosen": -26.948715209960938, |
|
"logps/rejected": -33.053627014160156, |
|
"loss": 0.5561, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.19944298267364502, |
|
"rewards/margins": 0.45154237747192383, |
|
"rewards/rejected": -0.2520993649959564, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 17.75, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": 80.3634262084961, |
|
"logits/rejected": 80.32890319824219, |
|
"logps/chosen": -28.899206161499023, |
|
"logps/rejected": -33.3048095703125, |
|
"loss": 0.5375, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.1959628015756607, |
|
"rewards/margins": 0.49128589034080505, |
|
"rewards/rejected": -0.29532313346862793, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 29.625, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": 82.13862609863281, |
|
"logits/rejected": 82.1554183959961, |
|
"logps/chosen": -33.77667236328125, |
|
"logps/rejected": -30.610042572021484, |
|
"loss": 0.6209, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.14403103291988373, |
|
"rewards/margins": 0.4070332646369934, |
|
"rewards/rejected": -0.2630022168159485, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 21.375, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": 82.94571685791016, |
|
"logits/rejected": 82.90376281738281, |
|
"logps/chosen": -30.815759658813477, |
|
"logps/rejected": -32.71514129638672, |
|
"loss": 0.5622, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.17741146683692932, |
|
"rewards/margins": 0.45544299483299255, |
|
"rewards/rejected": -0.2780315577983856, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 17.25, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": 80.42796325683594, |
|
"logits/rejected": 80.40777587890625, |
|
"logps/chosen": -30.608139038085938, |
|
"logps/rejected": -31.78671646118164, |
|
"loss": 0.5453, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.23560428619384766, |
|
"rewards/margins": 0.48740649223327637, |
|
"rewards/rejected": -0.25180214643478394, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 10.5, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": 82.06446838378906, |
|
"logits/rejected": 82.06089782714844, |
|
"logps/chosen": -30.304092407226562, |
|
"logps/rejected": -30.887014389038086, |
|
"loss": 0.6087, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.1849292516708374, |
|
"rewards/margins": 0.3525257706642151, |
|
"rewards/rejected": -0.16759653389453888, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 14.75, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": 77.5021743774414, |
|
"logits/rejected": 77.44898223876953, |
|
"logps/chosen": -33.84641647338867, |
|
"logps/rejected": -32.781463623046875, |
|
"loss": 0.6015, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.36252856254577637, |
|
"rewards/margins": 0.477752149105072, |
|
"rewards/rejected": -0.11522357165813446, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 98.46090698242188, |
|
"eval_logits/rejected": 98.43693542480469, |
|
"eval_logps/chosen": -32.81344985961914, |
|
"eval_logps/rejected": -36.51091384887695, |
|
"eval_loss": 0.6983966827392578, |
|
"eval_rewards/accuracies": 0.5402824282646179, |
|
"eval_rewards/chosen": -0.14810949563980103, |
|
"eval_rewards/margins": 0.0696462020277977, |
|
"eval_rewards/rejected": -0.21775569021701813, |
|
"eval_runtime": 104.0141, |
|
"eval_samples_per_second": 3.298, |
|
"eval_steps_per_second": 0.413, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 30.375, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": 80.12822723388672, |
|
"logits/rejected": 80.03518676757812, |
|
"logps/chosen": -33.25244903564453, |
|
"logps/rejected": -35.43198013305664, |
|
"loss": 0.5658, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.27730754017829895, |
|
"rewards/margins": 0.4556616246700287, |
|
"rewards/rejected": -0.17835409939289093, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 17.625, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": 82.24729919433594, |
|
"logits/rejected": 82.3295669555664, |
|
"logps/chosen": -31.125244140625, |
|
"logps/rejected": -31.354183197021484, |
|
"loss": 0.5216, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3294834792613983, |
|
"rewards/margins": 0.57957524061203, |
|
"rewards/rejected": -0.2500917315483093, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 21.125, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": 79.34232330322266, |
|
"logits/rejected": 79.39698791503906, |
|
"logps/chosen": -32.36313247680664, |
|
"logps/rejected": -34.425838470458984, |
|
"loss": 0.618, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.17704486846923828, |
|
"rewards/margins": 0.34251970052719116, |
|
"rewards/rejected": -0.16547484695911407, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 22.5, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": 81.70603942871094, |
|
"logits/rejected": 81.9966049194336, |
|
"logps/chosen": -30.78457260131836, |
|
"logps/rejected": -32.03525161743164, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.3613971471786499, |
|
"rewards/margins": 0.5626804232597351, |
|
"rewards/rejected": -0.20128324627876282, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 23.375, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": 80.38089752197266, |
|
"logits/rejected": 80.44111633300781, |
|
"logps/chosen": -26.974994659423828, |
|
"logps/rejected": -30.13335609436035, |
|
"loss": 0.6427, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.24836082756519318, |
|
"rewards/margins": 0.34140732884407043, |
|
"rewards/rejected": -0.09304650872945786, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 20.125, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": 77.59573364257812, |
|
"logits/rejected": 77.74366760253906, |
|
"logps/chosen": -30.3758487701416, |
|
"logps/rejected": -36.788978576660156, |
|
"loss": 0.4885, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.4740586280822754, |
|
"rewards/margins": 0.749001145362854, |
|
"rewards/rejected": -0.2749424874782562, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 14.875, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": 76.95594787597656, |
|
"logits/rejected": 76.9817886352539, |
|
"logps/chosen": -30.940093994140625, |
|
"logps/rejected": -31.99422264099121, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3355324864387512, |
|
"rewards/margins": 0.5310525894165039, |
|
"rewards/rejected": -0.19552013278007507, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 22.375, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": 79.73005676269531, |
|
"logits/rejected": 79.521484375, |
|
"logps/chosen": -31.06637954711914, |
|
"logps/rejected": -30.05121421813965, |
|
"loss": 0.5946, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.2974059581756592, |
|
"rewards/margins": 0.4510935842990875, |
|
"rewards/rejected": -0.15368762612342834, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 18.125, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": 79.8319091796875, |
|
"logits/rejected": 79.74632263183594, |
|
"logps/chosen": -33.005733489990234, |
|
"logps/rejected": -32.86864471435547, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.43940457701683044, |
|
"rewards/margins": 0.780119001865387, |
|
"rewards/rejected": -0.3407144248485565, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 23.375, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": 75.52354431152344, |
|
"logits/rejected": 75.6214599609375, |
|
"logps/chosen": -32.215476989746094, |
|
"logps/rejected": -29.42368507385254, |
|
"loss": 0.5603, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.47609180212020874, |
|
"rewards/margins": 0.6047566533088684, |
|
"rewards/rejected": -0.1286647617816925, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": 98.4832534790039, |
|
"eval_logits/rejected": 98.45703125, |
|
"eval_logps/chosen": -32.67014694213867, |
|
"eval_logps/rejected": -36.31406784057617, |
|
"eval_loss": 0.7083631753921509, |
|
"eval_rewards/accuracies": 0.5398671627044678, |
|
"eval_rewards/chosen": -0.09078802913427353, |
|
"eval_rewards/margins": 0.04822726547718048, |
|
"eval_rewards/rejected": -0.1390153020620346, |
|
"eval_runtime": 103.8446, |
|
"eval_samples_per_second": 3.303, |
|
"eval_steps_per_second": 0.414, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 17.375, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": 82.69928741455078, |
|
"logits/rejected": 82.7286376953125, |
|
"logps/chosen": -30.082874298095703, |
|
"logps/rejected": -32.789588928222656, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.3122648298740387, |
|
"rewards/margins": 0.5481952428817749, |
|
"rewards/rejected": -0.2359304428100586, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 17.75, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": 80.08171081542969, |
|
"logits/rejected": 80.08326721191406, |
|
"logps/chosen": -30.4410457611084, |
|
"logps/rejected": -29.53485107421875, |
|
"loss": 0.4679, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.4630066454410553, |
|
"rewards/margins": 0.7091516852378845, |
|
"rewards/rejected": -0.24614505469799042, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 14.6875, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": 77.24263000488281, |
|
"logits/rejected": 77.28250122070312, |
|
"logps/chosen": -28.96456527709961, |
|
"logps/rejected": -33.16306686401367, |
|
"loss": 0.4741, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5552161931991577, |
|
"rewards/margins": 0.7664985656738281, |
|
"rewards/rejected": -0.21128229796886444, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 29.875, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": 81.6305923461914, |
|
"logits/rejected": 81.66109466552734, |
|
"logps/chosen": -32.311283111572266, |
|
"logps/rejected": -34.081214904785156, |
|
"loss": 0.5334, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.3603467345237732, |
|
"rewards/margins": 0.6453540921211243, |
|
"rewards/rejected": -0.2850072979927063, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 11.625, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": 80.61808776855469, |
|
"logits/rejected": 80.62725067138672, |
|
"logps/chosen": -32.49099349975586, |
|
"logps/rejected": -33.57762908935547, |
|
"loss": 0.5483, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.4610978960990906, |
|
"rewards/margins": 0.6554309129714966, |
|
"rewards/rejected": -0.19433295726776123, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 15.8125, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": 82.13656616210938, |
|
"logits/rejected": 82.17839050292969, |
|
"logps/chosen": -28.390300750732422, |
|
"logps/rejected": -31.930139541625977, |
|
"loss": 0.5259, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.46476393938064575, |
|
"rewards/margins": 0.5768795013427734, |
|
"rewards/rejected": -0.11211560666561127, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 20.375, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": 81.61884307861328, |
|
"logits/rejected": 81.6422348022461, |
|
"logps/chosen": -31.991628646850586, |
|
"logps/rejected": -35.56809616088867, |
|
"loss": 0.5548, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.3538144528865814, |
|
"rewards/margins": 0.5373049974441528, |
|
"rewards/rejected": -0.1834905445575714, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 21.375, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": 75.43504333496094, |
|
"logits/rejected": 75.3118667602539, |
|
"logps/chosen": -29.84956932067871, |
|
"logps/rejected": -28.521799087524414, |
|
"loss": 0.5873, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.29062420129776, |
|
"rewards/margins": 0.4351147711277008, |
|
"rewards/rejected": -0.1444905549287796, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5890550520512965, |
|
"train_runtime": 2558.5725, |
|
"train_samples_per_second": 1.203, |
|
"train_steps_per_second": 0.15 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|