|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 164.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": 88.18099975585938, |
|
"logits/rejected": 88.25153350830078, |
|
"logps/chosen": -29.073104858398438, |
|
"logps/rejected": -26.25731658935547, |
|
"loss": 0.3906, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 338.0, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": 81.08870697021484, |
|
"logits/rejected": 80.791259765625, |
|
"logps/chosen": -34.342586517333984, |
|
"logps/rejected": -33.04642105102539, |
|
"loss": 1.0826, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": -0.08069034665822983, |
|
"rewards/margins": -0.010280769318342209, |
|
"rewards/rejected": -0.07040956616401672, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 192.0, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": 80.631591796875, |
|
"logits/rejected": 80.52144622802734, |
|
"logps/chosen": -33.67961883544922, |
|
"logps/rejected": -30.759857177734375, |
|
"loss": 1.1487, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.008231614716351032, |
|
"rewards/margins": -0.005876022391021252, |
|
"rewards/rejected": 0.014107631519436836, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 320.0, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": 82.36092376708984, |
|
"logits/rejected": 82.38922882080078, |
|
"logps/chosen": -33.96755599975586, |
|
"logps/rejected": -31.327926635742188, |
|
"loss": 1.3772, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.03620918095111847, |
|
"rewards/margins": -0.04051407054066658, |
|
"rewards/rejected": 0.07672326266765594, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 616.0, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": 80.7266616821289, |
|
"logits/rejected": 80.72164154052734, |
|
"logps/chosen": -33.125850677490234, |
|
"logps/rejected": -33.346900939941406, |
|
"loss": 2.0081, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.03856855630874634, |
|
"rewards/margins": 0.02127004601061344, |
|
"rewards/rejected": 0.017298510298132896, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 173.0, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": 78.16896057128906, |
|
"logits/rejected": 78.18494415283203, |
|
"logps/chosen": -31.25204849243164, |
|
"logps/rejected": -31.230030059814453, |
|
"loss": 1.2803, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.037441618740558624, |
|
"rewards/margins": 0.08795086294412613, |
|
"rewards/rejected": -0.12539246678352356, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 270.0, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": 82.7291259765625, |
|
"logits/rejected": 82.77815246582031, |
|
"logps/chosen": -31.22023582458496, |
|
"logps/rejected": -29.69997787475586, |
|
"loss": 1.5003, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.0360613577067852, |
|
"rewards/margins": 0.05150580406188965, |
|
"rewards/rejected": -0.08756714314222336, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 648.0, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": 83.51155090332031, |
|
"logits/rejected": 83.56068420410156, |
|
"logps/chosen": -30.490299224853516, |
|
"logps/rejected": -32.93412780761719, |
|
"loss": 1.9177, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.11098086833953857, |
|
"rewards/margins": 0.028119858354330063, |
|
"rewards/rejected": 0.08286102861166, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 294.0, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": 81.31900787353516, |
|
"logits/rejected": 81.31642150878906, |
|
"logps/chosen": -31.447376251220703, |
|
"logps/rejected": -30.629833221435547, |
|
"loss": 2.0412, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.04605206102132797, |
|
"rewards/margins": -0.05619793012738228, |
|
"rewards/rejected": 0.10225000232458115, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 260.0, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": 78.35929870605469, |
|
"logits/rejected": 78.31613159179688, |
|
"logps/chosen": -32.67215347290039, |
|
"logps/rejected": -31.034778594970703, |
|
"loss": 2.0579, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.027179216966032982, |
|
"rewards/margins": 0.039463870227336884, |
|
"rewards/rejected": -0.06664308160543442, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 168.0, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": 83.58841705322266, |
|
"logits/rejected": 83.63834381103516, |
|
"logps/chosen": -34.13097381591797, |
|
"logps/rejected": -31.874624252319336, |
|
"loss": 1.5488, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.13921667635440826, |
|
"rewards/margins": 0.2384806126356125, |
|
"rewards/rejected": -0.09926395863294601, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 98.81468963623047, |
|
"eval_logits/rejected": 98.80232238769531, |
|
"eval_logps/chosen": -32.53538513183594, |
|
"eval_logps/rejected": -36.158119201660156, |
|
"eval_loss": 1.7742832899093628, |
|
"eval_rewards/accuracies": 0.5377907156944275, |
|
"eval_rewards/chosen": -0.07376820594072342, |
|
"eval_rewards/margins": 0.0795043483376503, |
|
"eval_rewards/rejected": -0.1532725691795349, |
|
"eval_runtime": 104.2041, |
|
"eval_samples_per_second": 3.292, |
|
"eval_steps_per_second": 0.413, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 304.0, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": 83.73497009277344, |
|
"logits/rejected": 83.63249206542969, |
|
"logps/chosen": -32.8060417175293, |
|
"logps/rejected": -32.912742614746094, |
|
"loss": 3.7056, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.17341332137584686, |
|
"rewards/margins": 0.3788934051990509, |
|
"rewards/rejected": -0.20548005402088165, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 326.0, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": 83.89222717285156, |
|
"logits/rejected": 84.00482940673828, |
|
"logps/chosen": -28.764019012451172, |
|
"logps/rejected": -35.54996109008789, |
|
"loss": 1.7008, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.13245216012001038, |
|
"rewards/margins": 0.20510384440422058, |
|
"rewards/rejected": -0.0726516991853714, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 190.0, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": 81.15211486816406, |
|
"logits/rejected": 81.18699645996094, |
|
"logps/chosen": -30.555667877197266, |
|
"logps/rejected": -32.08338928222656, |
|
"loss": 2.0248, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.281352162361145, |
|
"rewards/margins": 0.4027627110481262, |
|
"rewards/rejected": -0.1214105486869812, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 193.0, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": 82.7738037109375, |
|
"logits/rejected": 82.75199890136719, |
|
"logps/chosen": -27.381637573242188, |
|
"logps/rejected": -32.79624938964844, |
|
"loss": 2.9965, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.05254943296313286, |
|
"rewards/margins": 0.3508468568325043, |
|
"rewards/rejected": -0.2982974350452423, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 320.0, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": 81.73448181152344, |
|
"logits/rejected": 81.72250366210938, |
|
"logps/chosen": -29.080272674560547, |
|
"logps/rejected": -32.661338806152344, |
|
"loss": 2.5467, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.2470671832561493, |
|
"rewards/margins": 0.32294052839279175, |
|
"rewards/rejected": -0.07587336003780365, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 470.0, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": 83.80685424804688, |
|
"logits/rejected": 83.82804870605469, |
|
"logps/chosen": -33.54126739501953, |
|
"logps/rejected": -29.94925880432129, |
|
"loss": 3.6642, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.4763854444026947, |
|
"rewards/margins": 0.4737597107887268, |
|
"rewards/rejected": 0.002625748049467802, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 410.0, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": 84.45747375488281, |
|
"logits/rejected": 84.39142608642578, |
|
"logps/chosen": -30.8850040435791, |
|
"logps/rejected": -32.122833251953125, |
|
"loss": 2.1347, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2994261682033539, |
|
"rewards/margins": 0.38164275884628296, |
|
"rewards/rejected": -0.08221657574176788, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 338.0, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": 82.16630554199219, |
|
"logits/rejected": 82.15303802490234, |
|
"logps/chosen": -30.96017837524414, |
|
"logps/rejected": -31.404443740844727, |
|
"loss": 2.1896, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.189576655626297, |
|
"rewards/margins": 0.38736575841903687, |
|
"rewards/rejected": -0.19778910279273987, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 260.0, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": 84.08841705322266, |
|
"logits/rejected": 84.07431030273438, |
|
"logps/chosen": -31.131977081298828, |
|
"logps/rejected": -30.759252548217773, |
|
"loss": 2.7916, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.29245084524154663, |
|
"rewards/margins": -0.059467971324920654, |
|
"rewards/rejected": -0.2329828441143036, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 478.0, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": 79.57664489746094, |
|
"logits/rejected": 79.53712463378906, |
|
"logps/chosen": -34.537757873535156, |
|
"logps/rejected": -32.43874740600586, |
|
"loss": 3.6133, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.17198507487773895, |
|
"rewards/margins": 0.12826156616210938, |
|
"rewards/rejected": 0.04372352734208107, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 99.06522369384766, |
|
"eval_logits/rejected": 99.04875183105469, |
|
"eval_logps/chosen": -32.56056213378906, |
|
"eval_logps/rejected": -36.141395568847656, |
|
"eval_loss": 1.8921738862991333, |
|
"eval_rewards/accuracies": 0.5166113376617432, |
|
"eval_rewards/chosen": -0.09390944987535477, |
|
"eval_rewards/margins": 0.045985639095306396, |
|
"eval_rewards/rejected": -0.13989506661891937, |
|
"eval_runtime": 103.9514, |
|
"eval_samples_per_second": 3.3, |
|
"eval_steps_per_second": 0.414, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 414.0, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": 82.20321655273438, |
|
"logits/rejected": 82.10101318359375, |
|
"logps/chosen": -33.50377655029297, |
|
"logps/rejected": -34.905250549316406, |
|
"loss": 3.0392, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.3535541892051697, |
|
"rewards/margins": 0.28887811303138733, |
|
"rewards/rejected": 0.06467613577842712, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 227.0, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": 84.46163177490234, |
|
"logits/rejected": 84.5374526977539, |
|
"logps/chosen": -31.67867088317871, |
|
"logps/rejected": -30.870431900024414, |
|
"loss": 2.5301, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.21622808277606964, |
|
"rewards/margins": 0.32940909266471863, |
|
"rewards/rejected": -0.11318100988864899, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 256.0, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": 81.72822570800781, |
|
"logits/rejected": 81.78887939453125, |
|
"logps/chosen": -32.73725128173828, |
|
"logps/rejected": -33.852149963378906, |
|
"loss": 3.0508, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0547933354973793, |
|
"rewards/margins": -0.07320666313171387, |
|
"rewards/rejected": 0.12799999117851257, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 264.0, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": 84.00396728515625, |
|
"logits/rejected": 84.2620620727539, |
|
"logps/chosen": -31.22537612915039, |
|
"logps/rejected": -31.510395050048828, |
|
"loss": 4.0684, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.3701513409614563, |
|
"rewards/margins": 0.3528321385383606, |
|
"rewards/rejected": 0.017319146543741226, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 185.0, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": 82.73152160644531, |
|
"logits/rejected": 82.81182098388672, |
|
"logps/chosen": -27.366741180419922, |
|
"logps/rejected": -29.901432037353516, |
|
"loss": 1.8356, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.18332336843013763, |
|
"rewards/margins": 0.18387673795223236, |
|
"rewards/rejected": -0.0005533635849133134, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1064.0, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": 80.05500793457031, |
|
"logits/rejected": 80.20561218261719, |
|
"logps/chosen": -31.206958770751953, |
|
"logps/rejected": -35.96382522583008, |
|
"loss": 3.0499, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.283230721950531, |
|
"rewards/margins": 0.17298570275306702, |
|
"rewards/rejected": 0.1102449893951416, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 253.0, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": 79.4796142578125, |
|
"logits/rejected": 79.51309967041016, |
|
"logps/chosen": -31.116466522216797, |
|
"logps/rejected": -31.364765167236328, |
|
"loss": 2.1069, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.5299628376960754, |
|
"rewards/margins": 0.4174376428127289, |
|
"rewards/rejected": 0.11252517998218536, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 536.0, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": 81.92121124267578, |
|
"logits/rejected": 81.70929718017578, |
|
"logps/chosen": -31.43599510192871, |
|
"logps/rejected": -29.533435821533203, |
|
"loss": 2.4746, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.29911860823631287, |
|
"rewards/margins": 0.1922709196805954, |
|
"rewards/rejected": 0.10684768110513687, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 426.0, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": 82.15860748291016, |
|
"logits/rejected": 82.0548324584961, |
|
"logps/chosen": -33.60521697998047, |
|
"logps/rejected": -31.876062393188477, |
|
"loss": 3.1769, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.3992280066013336, |
|
"rewards/margins": 0.2865853011608124, |
|
"rewards/rejected": 0.11264270544052124, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 288.0, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": 77.75070190429688, |
|
"logits/rejected": 77.80924224853516, |
|
"logps/chosen": -32.97479248046875, |
|
"logps/rejected": -28.991891860961914, |
|
"loss": 2.1193, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.34473174810409546, |
|
"rewards/margins": 0.25662440061569214, |
|
"rewards/rejected": 0.08810728043317795, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": 98.99166870117188, |
|
"eval_logits/rejected": 98.98554229736328, |
|
"eval_logps/chosen": -32.376094818115234, |
|
"eval_logps/rejected": -35.87874221801758, |
|
"eval_loss": 1.593911051750183, |
|
"eval_rewards/accuracies": 0.5191029906272888, |
|
"eval_rewards/chosen": 0.05366762727499008, |
|
"eval_rewards/margins": -0.01655910350382328, |
|
"eval_rewards/rejected": 0.07022672146558762, |
|
"eval_runtime": 104.0583, |
|
"eval_samples_per_second": 3.296, |
|
"eval_steps_per_second": 0.413, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 211.0, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": 84.67102813720703, |
|
"logits/rejected": 84.72035217285156, |
|
"logps/chosen": -30.487579345703125, |
|
"logps/rejected": -32.10687255859375, |
|
"loss": 2.1669, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.3007642328739166, |
|
"rewards/margins": 0.22645199298858643, |
|
"rewards/rejected": 0.0743122547864914, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 220.0, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": 82.1921615600586, |
|
"logits/rejected": 82.18672943115234, |
|
"logps/chosen": -31.139236450195312, |
|
"logps/rejected": -28.77066421508789, |
|
"loss": 1.4103, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.36746180057525635, |
|
"rewards/margins": 0.2483980655670166, |
|
"rewards/rejected": 0.11906375735998154, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 246.0, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": 79.607177734375, |
|
"logits/rejected": 79.65017700195312, |
|
"logps/chosen": -29.744531631469727, |
|
"logps/rejected": -32.44166564941406, |
|
"loss": 1.7227, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.4864615797996521, |
|
"rewards/margins": 0.3319019079208374, |
|
"rewards/rejected": 0.15455973148345947, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 448.0, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": 83.59193420410156, |
|
"logits/rejected": 83.5968017578125, |
|
"logps/chosen": -32.50049591064453, |
|
"logps/rejected": -33.3798828125, |
|
"loss": 3.0512, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5693238973617554, |
|
"rewards/margins": 0.5782763361930847, |
|
"rewards/rejected": -0.008952394127845764, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 304.0, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": 82.85350036621094, |
|
"logits/rejected": 82.85616302490234, |
|
"logps/chosen": -33.243656158447266, |
|
"logps/rejected": -33.08414840698242, |
|
"loss": 1.7389, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.3200667202472687, |
|
"rewards/margins": 0.31394660472869873, |
|
"rewards/rejected": 0.006120128557085991, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 120.5, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": 84.1597900390625, |
|
"logits/rejected": 84.19812774658203, |
|
"logps/chosen": -28.9798526763916, |
|
"logps/rejected": -31.50480079650879, |
|
"loss": 1.7363, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.4578874111175537, |
|
"rewards/margins": 0.3418472409248352, |
|
"rewards/rejected": 0.11604013293981552, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 392.0, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": 83.60018157958984, |
|
"logits/rejected": 83.62623596191406, |
|
"logps/chosen": -32.55873489379883, |
|
"logps/rejected": -34.760498046875, |
|
"loss": 2.6656, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.25394394993782043, |
|
"rewards/margins": -0.025154482573270798, |
|
"rewards/rejected": 0.2790984511375427, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 338.0, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": 77.67994689941406, |
|
"logits/rejected": 77.54925537109375, |
|
"logps/chosen": -30.067230224609375, |
|
"logps/rejected": -27.904937744140625, |
|
"loss": 2.1564, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.4071175158023834, |
|
"rewards/margins": 0.20260827243328094, |
|
"rewards/rejected": 0.20450922846794128, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 2.2952296467570514, |
|
"train_runtime": 2556.4368, |
|
"train_samples_per_second": 1.204, |
|
"train_steps_per_second": 0.151 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|