|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1312.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": 88.18099975585938, |
|
"logits/rejected": 88.25153350830078, |
|
"logps/chosen": -29.073104858398438, |
|
"logps/rejected": -26.25731658935547, |
|
"loss": 25.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1168.0, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": 81.08999633789062, |
|
"logits/rejected": 80.79169464111328, |
|
"logps/chosen": -34.18925094604492, |
|
"logps/rejected": -33.03681945800781, |
|
"loss": 24.3423, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.005247864406555891, |
|
"rewards/margins": 0.0130887096747756, |
|
"rewards/rejected": -0.007840845733880997, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1032.0, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": 80.66268157958984, |
|
"logits/rejected": 80.54837799072266, |
|
"logps/chosen": -33.62754821777344, |
|
"logps/rejected": -30.786510467529297, |
|
"loss": 25.2801, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.006235760636627674, |
|
"rewards/margins": 0.007138081826269627, |
|
"rewards/rejected": -0.0009023217717185616, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1200.0, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": 82.51216888427734, |
|
"logits/rejected": 82.54035949707031, |
|
"logps/chosen": -33.695411682128906, |
|
"logps/rejected": -31.29660415649414, |
|
"loss": 24.6437, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.03174129128456116, |
|
"rewards/margins": 0.01901828870177269, |
|
"rewards/rejected": 0.012723001651465893, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1144.0, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": 81.04322814941406, |
|
"logits/rejected": 81.03800964355469, |
|
"logps/chosen": -32.80583572387695, |
|
"logps/rejected": -33.2043571472168, |
|
"loss": 24.7349, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.03682267293334007, |
|
"rewards/margins": 0.020405994728207588, |
|
"rewards/rejected": 0.016416678205132484, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 948.0, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": 78.70301055908203, |
|
"logits/rejected": 78.71062469482422, |
|
"logps/chosen": -30.72637367248535, |
|
"logps/rejected": -30.724206924438477, |
|
"loss": 26.36, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.04788754880428314, |
|
"rewards/margins": 0.012979410588741302, |
|
"rewards/rejected": 0.03490813449025154, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 968.0, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": 83.19561767578125, |
|
"logits/rejected": 83.24873352050781, |
|
"logps/chosen": -30.971267700195312, |
|
"logps/rejected": -29.551761627197266, |
|
"loss": 25.0468, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.020389318466186523, |
|
"rewards/margins": 0.016513368114829063, |
|
"rewards/rejected": 0.0038759508170187473, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1552.0, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": 83.78221130371094, |
|
"logits/rejected": 83.81100463867188, |
|
"logps/chosen": -30.479488372802734, |
|
"logps/rejected": -33.11530303955078, |
|
"loss": 24.2453, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.01495352853089571, |
|
"rewards/margins": 0.02271350473165512, |
|
"rewards/rejected": -0.007759974803775549, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1104.0, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": 81.36155700683594, |
|
"logits/rejected": 81.35380554199219, |
|
"logps/chosen": -31.37007713317871, |
|
"logps/rejected": -30.98931884765625, |
|
"loss": 23.0324, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.013486603274941444, |
|
"rewards/margins": 0.03665385767817497, |
|
"rewards/rejected": -0.023167254403233528, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1120.0, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": 78.11141967773438, |
|
"logits/rejected": 78.0789794921875, |
|
"logps/chosen": -32.44235610961914, |
|
"logps/rejected": -31.213552474975586, |
|
"loss": 22.8826, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.01958216354250908, |
|
"rewards/margins": 0.0457901768386364, |
|
"rewards/rejected": -0.02620801329612732, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1056.0, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": 83.38008117675781, |
|
"logits/rejected": 83.412841796875, |
|
"logps/chosen": -34.02827072143555, |
|
"logps/rejected": -31.858572006225586, |
|
"loss": 23.2882, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.027672046795487404, |
|
"rewards/margins": 0.038474611937999725, |
|
"rewards/rejected": -0.010802562348544598, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 98.75923156738281, |
|
"eval_logits/rejected": 98.74642181396484, |
|
"eval_logps/chosen": -32.47222137451172, |
|
"eval_logps/rejected": -36.06691360473633, |
|
"eval_loss": 25.671157836914062, |
|
"eval_rewards/accuracies": 0.5228405594825745, |
|
"eval_rewards/chosen": -0.0029044141992926598, |
|
"eval_rewards/margins": 0.0071340943686664104, |
|
"eval_rewards/rejected": -0.010038508102297783, |
|
"eval_runtime": 104.2457, |
|
"eval_samples_per_second": 3.29, |
|
"eval_steps_per_second": 0.412, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1528.0, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": 83.55766296386719, |
|
"logits/rejected": 83.4551010131836, |
|
"logps/chosen": -32.478965759277344, |
|
"logps/rejected": -32.80836486816406, |
|
"loss": 21.9052, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.05438382551074028, |
|
"rewards/margins": 0.06963126361370087, |
|
"rewards/rejected": -0.015247439965605736, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1504.0, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": 83.767822265625, |
|
"logits/rejected": 83.86802673339844, |
|
"logps/chosen": -28.277118682861328, |
|
"logps/rejected": -35.63311004638672, |
|
"loss": 20.22, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.06524648517370224, |
|
"rewards/margins": 0.08264312148094177, |
|
"rewards/rejected": -0.01739663816988468, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 956.0, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": 80.87911224365234, |
|
"logits/rejected": 80.89720153808594, |
|
"logps/chosen": -30.450307846069336, |
|
"logps/rejected": -32.20357131958008, |
|
"loss": 20.9442, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.04570445418357849, |
|
"rewards/margins": 0.07289845496416092, |
|
"rewards/rejected": -0.02719399705529213, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 740.0, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": 82.11528778076172, |
|
"logits/rejected": 82.12232971191406, |
|
"logps/chosen": -27.089996337890625, |
|
"logps/rejected": -33.013877868652344, |
|
"loss": 20.3465, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.035732947289943695, |
|
"rewards/margins": 0.09478302299976349, |
|
"rewards/rejected": -0.059050071984529495, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 948.0, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": 80.63235473632812, |
|
"logits/rejected": 80.60447692871094, |
|
"logps/chosen": -28.92165756225586, |
|
"logps/rejected": -33.14097213745117, |
|
"loss": 18.7455, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.04674536734819412, |
|
"rewards/margins": 0.10419263690710068, |
|
"rewards/rejected": -0.05744727700948715, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1664.0, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": 82.47834777832031, |
|
"logits/rejected": 82.48927307128906, |
|
"logps/chosen": -33.57959747314453, |
|
"logps/rejected": -30.44466209411621, |
|
"loss": 21.1798, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.055715084075927734, |
|
"rewards/margins": 0.10492750257253647, |
|
"rewards/rejected": -0.049212418496608734, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1152.0, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": 83.31620788574219, |
|
"logits/rejected": 83.2592544555664, |
|
"logps/chosen": -30.965845108032227, |
|
"logps/rejected": -32.591552734375, |
|
"loss": 21.522, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.02934456244111061, |
|
"rewards/margins": 0.08649395406246185, |
|
"rewards/rejected": -0.057149387896060944, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 896.0, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": 80.9317626953125, |
|
"logits/rejected": 80.91111755371094, |
|
"logps/chosen": -30.60970687866211, |
|
"logps/rejected": -31.63262939453125, |
|
"loss": 18.9358, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.058744143694639206, |
|
"rewards/margins": 0.10628656297922134, |
|
"rewards/rejected": -0.04754243046045303, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 620.0, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": 82.61732482910156, |
|
"logits/rejected": 82.6050033569336, |
|
"logps/chosen": -30.360843658447266, |
|
"logps/rejected": -30.71734619140625, |
|
"loss": 23.6279, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.04055742174386978, |
|
"rewards/margins": 0.06548986583948135, |
|
"rewards/rejected": -0.024932442232966423, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 700.0, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": 78.06134033203125, |
|
"logits/rejected": 78.00863647460938, |
|
"logps/chosen": -33.79378890991211, |
|
"logps/rejected": -32.66465377807617, |
|
"loss": 20.2659, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.09589491784572601, |
|
"rewards/margins": 0.1130196675658226, |
|
"rewards/rejected": -0.017124753445386887, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 98.65145874023438, |
|
"eval_logits/rejected": 98.6263427734375, |
|
"eval_logps/chosen": -32.57748031616211, |
|
"eval_logps/rejected": -36.30455017089844, |
|
"eval_loss": 24.828882217407227, |
|
"eval_rewards/accuracies": 0.5340532064437866, |
|
"eval_rewards/chosen": -0.013430174440145493, |
|
"eval_rewards/margins": 0.020371900871396065, |
|
"eval_rewards/rejected": -0.03380206972360611, |
|
"eval_runtime": 104.0575, |
|
"eval_samples_per_second": 3.296, |
|
"eval_steps_per_second": 0.413, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1320.0, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": 80.6713638305664, |
|
"logits/rejected": 80.57633972167969, |
|
"logps/chosen": -33.16452407836914, |
|
"logps/rejected": -35.397491455078125, |
|
"loss": 18.2607, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.07811959832906723, |
|
"rewards/margins": 0.11925957351922989, |
|
"rewards/rejected": -0.04113996401429176, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 652.0, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": 82.79257202148438, |
|
"logits/rejected": 82.88256072998047, |
|
"logps/chosen": -30.9622859954834, |
|
"logps/rejected": -31.26416015625, |
|
"loss": 16.5714, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.09866676479578018, |
|
"rewards/margins": 0.15218719840049744, |
|
"rewards/rejected": -0.053520433604717255, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1144.0, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": 79.95256042480469, |
|
"logits/rejected": 80.00727844238281, |
|
"logps/chosen": -32.26613235473633, |
|
"logps/rejected": -34.29724884033203, |
|
"loss": 21.5779, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.05396091938018799, |
|
"rewards/margins": 0.08247107267379761, |
|
"rewards/rejected": -0.028510143980383873, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 856.0, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": 82.29564666748047, |
|
"logits/rejected": 82.59175109863281, |
|
"logps/chosen": -30.715564727783203, |
|
"logps/rejected": -31.892749786376953, |
|
"loss": 16.3789, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.0972500815987587, |
|
"rewards/margins": 0.13332059979438782, |
|
"rewards/rejected": -0.03607049956917763, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1000.0, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": 80.96902465820312, |
|
"logits/rejected": 81.0352554321289, |
|
"logps/chosen": -26.926239013671875, |
|
"logps/rejected": -30.266109466552734, |
|
"loss": 20.1404, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.066965751349926, |
|
"rewards/margins": 0.10350307077169418, |
|
"rewards/rejected": -0.03653731197118759, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1064.0, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": 78.25106048583984, |
|
"logits/rejected": 78.37794494628906, |
|
"logps/chosen": -30.432825088500977, |
|
"logps/rejected": -36.55792999267578, |
|
"loss": 15.8988, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.11281673610210419, |
|
"rewards/margins": 0.158446803689003, |
|
"rewards/rejected": -0.0456300750374794, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 708.0, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": 77.56661224365234, |
|
"logits/rejected": 77.58919525146484, |
|
"logps/chosen": -30.973047256469727, |
|
"logps/rejected": -31.90987777709961, |
|
"loss": 18.1036, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.08058776706457138, |
|
"rewards/margins": 0.1210336685180664, |
|
"rewards/rejected": -0.040445905178785324, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1056.0, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": 80.2997817993164, |
|
"logits/rejected": 80.07062530517578, |
|
"logps/chosen": -31.0832576751709, |
|
"logps/rejected": -29.884586334228516, |
|
"loss": 22.0775, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.07266353815793991, |
|
"rewards/margins": 0.0944225937128067, |
|
"rewards/rejected": -0.021759048104286194, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 808.0, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": 80.42820739746094, |
|
"logits/rejected": 80.34810638427734, |
|
"logps/chosen": -33.01291275024414, |
|
"logps/rejected": -32.72394561767578, |
|
"loss": 15.1146, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.10913344472646713, |
|
"rewards/margins": 0.17984186112880707, |
|
"rewards/rejected": -0.07070842385292053, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1072.0, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": 76.04508209228516, |
|
"logits/rejected": 76.13191986083984, |
|
"logps/chosen": -32.2183952331543, |
|
"logps/rejected": -29.19476890563965, |
|
"loss": 20.0695, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.11873127520084381, |
|
"rewards/margins": 0.12800584733486176, |
|
"rewards/rejected": -0.009274585172533989, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": 98.68071746826172, |
|
"eval_logits/rejected": 98.6502685546875, |
|
"eval_logps/chosen": -32.66925811767578, |
|
"eval_logps/rejected": -36.280174255371094, |
|
"eval_loss": 26.15445899963379, |
|
"eval_rewards/accuracies": 0.5307309031486511, |
|
"eval_rewards/chosen": -0.022608023136854172, |
|
"eval_rewards/margins": 0.008756463415920734, |
|
"eval_rewards/rejected": -0.03136448562145233, |
|
"eval_runtime": 103.8063, |
|
"eval_samples_per_second": 3.304, |
|
"eval_steps_per_second": 0.414, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1136.0, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": 83.16893005371094, |
|
"logits/rejected": 83.19877624511719, |
|
"logps/chosen": -30.013708114624023, |
|
"logps/rejected": -32.592529296875, |
|
"loss": 18.2229, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.08498243242502213, |
|
"rewards/margins": 0.12425950914621353, |
|
"rewards/rejected": -0.039277076721191406, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 596.0, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": 80.65386199951172, |
|
"logits/rejected": 80.6548080444336, |
|
"logps/chosen": -30.399967193603516, |
|
"logps/rejected": -29.175945281982422, |
|
"loss": 16.3256, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.11985927820205688, |
|
"rewards/margins": 0.1455042064189911, |
|
"rewards/rejected": -0.025644922628998756, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 604.0, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": 77.81417846679688, |
|
"logits/rejected": 77.8701171875, |
|
"logps/chosen": -29.092737197875977, |
|
"logps/rejected": -33.01492691040039, |
|
"loss": 15.6194, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.12598693370819092, |
|
"rewards/margins": 0.16399319469928741, |
|
"rewards/rejected": -0.0380062535405159, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1384.0, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": 82.1661376953125, |
|
"logits/rejected": 82.20028686523438, |
|
"logps/chosen": -32.25991439819336, |
|
"logps/rejected": -33.82966995239258, |
|
"loss": 18.1168, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.09522315859794617, |
|
"rewards/margins": 0.1413211077451706, |
|
"rewards/rejected": -0.04609795659780502, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 736.0, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": 81.15172576904297, |
|
"logits/rejected": 81.16615295410156, |
|
"logps/chosen": -32.44929504394531, |
|
"logps/rejected": -33.39020919799805, |
|
"loss": 17.9927, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.11944446712732315, |
|
"rewards/margins": 0.14928530156612396, |
|
"rewards/rejected": -0.029840845614671707, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 832.0, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": 82.63230895996094, |
|
"logits/rejected": 82.66060638427734, |
|
"logps/chosen": -28.42384910583496, |
|
"logps/rejected": -31.795475006103516, |
|
"loss": 17.9341, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.11283614486455917, |
|
"rewards/margins": 0.12739871442317963, |
|
"rewards/rejected": -0.014562586322426796, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 900.0, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": 82.10162353515625, |
|
"logits/rejected": 82.12476348876953, |
|
"logps/chosen": -31.836299896240234, |
|
"logps/rejected": -35.486595153808594, |
|
"loss": 19.9538, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.10398608446121216, |
|
"rewards/margins": 0.14170874655246735, |
|
"rewards/rejected": -0.03772266209125519, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 888.0, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": 76.01589965820312, |
|
"logits/rejected": 75.89103698730469, |
|
"logps/chosen": -29.72897720336914, |
|
"logps/rejected": -28.481863021850586, |
|
"loss": 18.4584, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.08471504598855972, |
|
"rewards/margins": 0.11684386432170868, |
|
"rewards/rejected": -0.03212881088256836, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 20.413429577319654, |
|
"train_runtime": 2557.2601, |
|
"train_samples_per_second": 1.204, |
|
"train_steps_per_second": 0.151 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|