|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": -1.866452693939209, |
|
"logits/rejected": -1.8707809448242188, |
|
"logps/chosen": -36.98366165161133, |
|
"logps/rejected": -33.666290283203125, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.5555555820465088, |
|
"rewards/chosen": 0.01834939420223236, |
|
"rewards/margins": 0.04732292518019676, |
|
"rewards/rejected": -0.02897353284060955, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": -1.9971332550048828, |
|
"logits/rejected": -1.999770164489746, |
|
"logps/chosen": -29.6362247467041, |
|
"logps/rejected": -29.044448852539062, |
|
"loss": 0.5009, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.004776014480739832, |
|
"rewards/margins": -0.01042011845856905, |
|
"rewards/rejected": 0.015196132473647594, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -1.9208379983901978, |
|
"logits/rejected": -1.9181442260742188, |
|
"logps/chosen": -31.425155639648438, |
|
"logps/rejected": -33.237945556640625, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.007249836809933186, |
|
"rewards/margins": 0.008341209962964058, |
|
"rewards/rejected": -0.015591045841574669, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": -2.0180625915527344, |
|
"logits/rejected": -2.009308338165283, |
|
"logps/chosen": -32.559749603271484, |
|
"logps/rejected": -32.524681091308594, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.013646525330841541, |
|
"rewards/margins": 0.023250887170433998, |
|
"rewards/rejected": -0.009604359045624733, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": -1.8639278411865234, |
|
"logits/rejected": -1.8531490564346313, |
|
"logps/chosen": -33.53788375854492, |
|
"logps/rejected": -35.42852020263672, |
|
"loss": 0.5015, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.015677569434046745, |
|
"rewards/margins": -0.0033827773295342922, |
|
"rewards/rejected": 0.019060343503952026, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": -1.9435577392578125, |
|
"logits/rejected": -1.9454940557479858, |
|
"logps/chosen": -32.49355697631836, |
|
"logps/rejected": -33.1856575012207, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.08530057966709137, |
|
"rewards/margins": 0.09672373533248901, |
|
"rewards/rejected": -0.011423162184655666, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": -2.075368881225586, |
|
"logits/rejected": -2.0803427696228027, |
|
"logps/chosen": -33.89010238647461, |
|
"logps/rejected": -36.549957275390625, |
|
"loss": 0.4868, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.07870620489120483, |
|
"rewards/margins": 0.06357409805059433, |
|
"rewards/rejected": 0.015132094733417034, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": -1.9372116327285767, |
|
"logits/rejected": -1.9403302669525146, |
|
"logps/chosen": -34.205604553222656, |
|
"logps/rejected": -34.49314880371094, |
|
"loss": 0.4788, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.17776472866535187, |
|
"rewards/margins": 0.09835849702358246, |
|
"rewards/rejected": 0.0794062465429306, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": -1.945416808128357, |
|
"logits/rejected": -1.9499365091323853, |
|
"logps/chosen": -32.26136016845703, |
|
"logps/rejected": -32.26020050048828, |
|
"loss": 0.4788, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.16484788060188293, |
|
"rewards/margins": 0.08588258177042007, |
|
"rewards/rejected": 0.07896529138088226, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": -2.043586254119873, |
|
"logits/rejected": -2.0416104793548584, |
|
"logps/chosen": -31.968154907226562, |
|
"logps/rejected": -31.14252281188965, |
|
"loss": 0.4749, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.21814338862895966, |
|
"rewards/margins": 0.11919467151165009, |
|
"rewards/rejected": 0.09894871711730957, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.2375268936157227, |
|
"eval_logits/rejected": -2.2326817512512207, |
|
"eval_logps/chosen": -33.851219177246094, |
|
"eval_logps/rejected": -37.3537483215332, |
|
"eval_loss": 0.49629145860671997, |
|
"eval_rewards/accuracies": 0.5336378812789917, |
|
"eval_rewards/chosen": 0.14666710793972015, |
|
"eval_rewards/margins": 0.016369162127375603, |
|
"eval_rewards/rejected": 0.1302979290485382, |
|
"eval_runtime": 145.7938, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.295, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": -1.998853087425232, |
|
"logits/rejected": -1.9964749813079834, |
|
"logps/chosen": -32.937965393066406, |
|
"logps/rejected": -33.846004486083984, |
|
"loss": 0.474, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.2446821928024292, |
|
"rewards/margins": 0.09798828512430191, |
|
"rewards/rejected": 0.1466939002275467, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": -2.0093610286712646, |
|
"logits/rejected": -2.0010437965393066, |
|
"logps/chosen": -32.1511116027832, |
|
"logps/rejected": -31.939533233642578, |
|
"loss": 0.4873, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.2350343018770218, |
|
"rewards/margins": 0.057456426322460175, |
|
"rewards/rejected": 0.17757786810398102, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": -2.0374419689178467, |
|
"logits/rejected": -2.029500961303711, |
|
"logps/chosen": -30.157424926757812, |
|
"logps/rejected": -31.90500831604004, |
|
"loss": 0.4716, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2546332776546478, |
|
"rewards/margins": 0.1362351030111313, |
|
"rewards/rejected": 0.11839816719293594, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": -1.9675471782684326, |
|
"logits/rejected": -1.9777450561523438, |
|
"logps/chosen": -31.059162139892578, |
|
"logps/rejected": -32.384300231933594, |
|
"loss": 0.4616, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.28839823603630066, |
|
"rewards/margins": 0.16793587803840637, |
|
"rewards/rejected": 0.12046238034963608, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": -1.8793624639511108, |
|
"logits/rejected": -1.8805418014526367, |
|
"logps/chosen": -33.646915435791016, |
|
"logps/rejected": -34.56272888183594, |
|
"loss": 0.4385, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.4366222321987152, |
|
"rewards/margins": 0.2809991240501404, |
|
"rewards/rejected": 0.15562310814857483, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": -1.9307949542999268, |
|
"logits/rejected": -1.9274256229400635, |
|
"logps/chosen": -35.765926361083984, |
|
"logps/rejected": -32.48193359375, |
|
"loss": 0.4685, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.32460084557533264, |
|
"rewards/margins": 0.13194666802883148, |
|
"rewards/rejected": 0.19265416264533997, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": -2.0318870544433594, |
|
"logits/rejected": -2.0245723724365234, |
|
"logps/chosen": -33.25593566894531, |
|
"logps/rejected": -31.189266204833984, |
|
"loss": 0.4315, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.4335424304008484, |
|
"rewards/margins": 0.3034728467464447, |
|
"rewards/rejected": 0.1300695389509201, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": -2.0387840270996094, |
|
"logits/rejected": -2.0439517498016357, |
|
"logps/chosen": -31.977941513061523, |
|
"logps/rejected": -32.183441162109375, |
|
"loss": 0.4499, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.4534761905670166, |
|
"rewards/margins": 0.2121172845363617, |
|
"rewards/rejected": 0.2413589060306549, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": -2.039411783218384, |
|
"logits/rejected": -2.036684513092041, |
|
"logps/chosen": -31.094594955444336, |
|
"logps/rejected": -31.10666275024414, |
|
"loss": 0.4653, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.3154846429824829, |
|
"rewards/margins": 0.1462438404560089, |
|
"rewards/rejected": 0.169240802526474, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": -1.909691572189331, |
|
"logits/rejected": -1.914358139038086, |
|
"logps/chosen": -31.08693504333496, |
|
"logps/rejected": -32.61079406738281, |
|
"loss": 0.4376, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.41547876596450806, |
|
"rewards/margins": 0.27136775851249695, |
|
"rewards/rejected": 0.14411096274852753, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.233895778656006, |
|
"eval_logits/rejected": -2.2290823459625244, |
|
"eval_logps/chosen": -33.78962326049805, |
|
"eval_logps/rejected": -37.29551696777344, |
|
"eval_loss": 0.4955826997756958, |
|
"eval_rewards/accuracies": 0.5485880374908447, |
|
"eval_rewards/chosen": 0.19594170153141022, |
|
"eval_rewards/margins": 0.019058095291256905, |
|
"eval_rewards/rejected": 0.17688362300395966, |
|
"eval_runtime": 145.7262, |
|
"eval_samples_per_second": 2.354, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": -2.0220179557800293, |
|
"logits/rejected": -2.0326101779937744, |
|
"logps/chosen": -31.51509666442871, |
|
"logps/rejected": -33.655662536621094, |
|
"loss": 0.4554, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.36496439576148987, |
|
"rewards/margins": 0.19933317601680756, |
|
"rewards/rejected": 0.16563120484352112, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": -1.9142780303955078, |
|
"logits/rejected": -1.9289791584014893, |
|
"logps/chosen": -29.601001739501953, |
|
"logps/rejected": -31.418704986572266, |
|
"loss": 0.4339, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.4084321856498718, |
|
"rewards/margins": 0.2968555688858032, |
|
"rewards/rejected": 0.11157669872045517, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": -1.9722967147827148, |
|
"logits/rejected": -1.9762967824935913, |
|
"logps/chosen": -32.825157165527344, |
|
"logps/rejected": -31.445331573486328, |
|
"loss": 0.4142, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.47385087609291077, |
|
"rewards/margins": 0.39067354798316956, |
|
"rewards/rejected": 0.0831773579120636, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": -1.9708006381988525, |
|
"logits/rejected": -1.9490963220596313, |
|
"logps/chosen": -33.599205017089844, |
|
"logps/rejected": -34.856910705566406, |
|
"loss": 0.4183, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.4436335563659668, |
|
"rewards/margins": 0.36852845549583435, |
|
"rewards/rejected": 0.07510510087013245, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": -2.011307954788208, |
|
"logits/rejected": -2.008012533187866, |
|
"logps/chosen": -32.494956970214844, |
|
"logps/rejected": -35.98812484741211, |
|
"loss": 0.4641, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.3440879285335541, |
|
"rewards/margins": 0.15464913845062256, |
|
"rewards/rejected": 0.18943879008293152, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": -1.8780018091201782, |
|
"logits/rejected": -1.8755817413330078, |
|
"logps/chosen": -33.72126388549805, |
|
"logps/rejected": -35.290069580078125, |
|
"loss": 0.4544, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.37370795011520386, |
|
"rewards/margins": 0.20206721127033234, |
|
"rewards/rejected": 0.17164072394371033, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": -1.8628206253051758, |
|
"logits/rejected": -1.8603187799453735, |
|
"logps/chosen": -33.929344177246094, |
|
"logps/rejected": -31.61127281188965, |
|
"loss": 0.4451, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.3746057152748108, |
|
"rewards/margins": 0.246560737490654, |
|
"rewards/rejected": 0.128044992685318, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": -1.9666433334350586, |
|
"logits/rejected": -1.9561984539031982, |
|
"logps/chosen": -34.74022674560547, |
|
"logps/rejected": -31.632495880126953, |
|
"loss": 0.4239, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.4931296706199646, |
|
"rewards/margins": 0.3263343572616577, |
|
"rewards/rejected": 0.16679534316062927, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": -2.0619094371795654, |
|
"logits/rejected": -2.0470869541168213, |
|
"logps/chosen": -30.43317222595215, |
|
"logps/rejected": -32.3420524597168, |
|
"loss": 0.4679, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.39276057481765747, |
|
"rewards/margins": 0.1449340283870697, |
|
"rewards/rejected": 0.24782654643058777, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": -1.9332126379013062, |
|
"logits/rejected": -1.9307750463485718, |
|
"logps/chosen": -32.100502014160156, |
|
"logps/rejected": -30.6780948638916, |
|
"loss": 0.3835, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.6612862348556519, |
|
"rewards/margins": 0.5503143072128296, |
|
"rewards/rejected": 0.1109718531370163, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.231203556060791, |
|
"eval_logits/rejected": -2.226395606994629, |
|
"eval_logps/chosen": -33.778892517089844, |
|
"eval_logps/rejected": -37.28717803955078, |
|
"eval_loss": 0.4949820339679718, |
|
"eval_rewards/accuracies": 0.5245016813278198, |
|
"eval_rewards/chosen": 0.204526886343956, |
|
"eval_rewards/margins": 0.020971858873963356, |
|
"eval_rewards/rejected": 0.1835550218820572, |
|
"eval_runtime": 145.6189, |
|
"eval_samples_per_second": 2.355, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": -1.9173246622085571, |
|
"logits/rejected": -1.9141733646392822, |
|
"logps/chosen": -31.059444427490234, |
|
"logps/rejected": -33.56504440307617, |
|
"loss": 0.4356, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.4351249635219574, |
|
"rewards/margins": 0.293440043926239, |
|
"rewards/rejected": 0.141684889793396, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": -1.9676717519760132, |
|
"logits/rejected": -1.9555410146713257, |
|
"logps/chosen": -34.03219985961914, |
|
"logps/rejected": -33.442317962646484, |
|
"loss": 0.417, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.4510994851589203, |
|
"rewards/margins": 0.37388378381729126, |
|
"rewards/rejected": 0.07721573859453201, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": -2.002480983734131, |
|
"logits/rejected": -2.0011117458343506, |
|
"logps/chosen": -32.882102966308594, |
|
"logps/rejected": -32.251502990722656, |
|
"loss": 0.4339, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.4829506278038025, |
|
"rewards/margins": 0.30048781633377075, |
|
"rewards/rejected": 0.18246281147003174, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": -2.0899081230163574, |
|
"logits/rejected": -2.0742757320404053, |
|
"logps/chosen": -33.487709045410156, |
|
"logps/rejected": -32.8193359375, |
|
"loss": 0.4286, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.5533460974693298, |
|
"rewards/margins": 0.3143841624259949, |
|
"rewards/rejected": 0.23896190524101257, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": -1.9617509841918945, |
|
"logits/rejected": -1.9609159231185913, |
|
"logps/chosen": -32.60249710083008, |
|
"logps/rejected": -32.25555419921875, |
|
"loss": 0.418, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5436802506446838, |
|
"rewards/margins": 0.37600547075271606, |
|
"rewards/rejected": 0.16767482459545135, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": -1.9185073375701904, |
|
"logits/rejected": -1.9287922382354736, |
|
"logps/chosen": -31.57277488708496, |
|
"logps/rejected": -35.039085388183594, |
|
"loss": 0.4304, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.505532443523407, |
|
"rewards/margins": 0.305793821811676, |
|
"rewards/rejected": 0.19973860681056976, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": -2.0573890209198, |
|
"logits/rejected": -2.0508790016174316, |
|
"logps/chosen": -33.04835891723633, |
|
"logps/rejected": -28.99324607849121, |
|
"loss": 0.4308, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.4828890860080719, |
|
"rewards/margins": 0.2963466942310333, |
|
"rewards/rejected": 0.18654237687587738, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": -1.9167007207870483, |
|
"logits/rejected": -1.9188674688339233, |
|
"logps/chosen": -33.65839385986328, |
|
"logps/rejected": -30.719829559326172, |
|
"loss": 0.4247, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.46452397108078003, |
|
"rewards/margins": 0.34228652715682983, |
|
"rewards/rejected": 0.12223746627569199, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4538224170734356, |
|
"train_runtime": 3252.427, |
|
"train_samples_per_second": 0.947, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|