|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": -1.8664777278900146, |
|
"logits/rejected": -1.8707849979400635, |
|
"logps/chosen": -36.99364471435547, |
|
"logps/rejected": -33.650604248046875, |
|
"loss": 0.9766, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.00906434003263712, |
|
"rewards/margins": 0.023435616865754128, |
|
"rewards/rejected": -0.014371277764439583, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": -1.9982150793075562, |
|
"logits/rejected": -2.0008621215820312, |
|
"logps/chosen": -29.64394760131836, |
|
"logps/rejected": -29.04986000061035, |
|
"loss": 1.0107, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0012267641723155975, |
|
"rewards/margins": -0.010734880343079567, |
|
"rewards/rejected": 0.00950811617076397, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -1.920768141746521, |
|
"logits/rejected": -1.9180870056152344, |
|
"logps/chosen": -31.416461944580078, |
|
"logps/rejected": -33.2098274230957, |
|
"loss": 1.0063, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.000256747764069587, |
|
"rewards/margins": -0.0063001858070492744, |
|
"rewards/rejected": 0.00604343693703413, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": -2.0177602767944336, |
|
"logits/rejected": -2.009014129638672, |
|
"logps/chosen": -32.56236267089844, |
|
"logps/rejected": -32.517822265625, |
|
"loss": 0.9863, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.010110180824995041, |
|
"rewards/margins": 0.013717299327254295, |
|
"rewards/rejected": -0.003607118036597967, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": -1.862694501876831, |
|
"logits/rejected": -1.8519262075424194, |
|
"logps/chosen": -33.541160583496094, |
|
"logps/rejected": -35.44048309326172, |
|
"loss": 0.9969, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.011423684656620026, |
|
"rewards/margins": 0.003121361369267106, |
|
"rewards/rejected": 0.00830232072621584, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": -1.9417282342910767, |
|
"logits/rejected": -1.9436867237091064, |
|
"logps/chosen": -32.52958679199219, |
|
"logps/rejected": -33.216880798339844, |
|
"loss": 0.9296, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.04941769689321518, |
|
"rewards/margins": 0.08126799017190933, |
|
"rewards/rejected": -0.031850285828113556, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": -2.0729386806488037, |
|
"logits/rejected": -2.0779125690460205, |
|
"logps/chosen": -33.99254608154297, |
|
"logps/rejected": -36.62586212158203, |
|
"loss": 0.9629, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0028400986921042204, |
|
"rewards/margins": 0.037055134773254395, |
|
"rewards/rejected": -0.03989524394273758, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": -1.9335737228393555, |
|
"logits/rejected": -1.9366981983184814, |
|
"logps/chosen": -34.332157135009766, |
|
"logps/rejected": -34.641021728515625, |
|
"loss": 0.9026, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.06695752590894699, |
|
"rewards/margins": 0.10098665952682495, |
|
"rewards/rejected": -0.03402913734316826, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": -1.9401309490203857, |
|
"logits/rejected": -1.9446433782577515, |
|
"logps/chosen": -32.37213897705078, |
|
"logps/rejected": -32.343849182128906, |
|
"loss": 0.9438, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.06670050323009491, |
|
"rewards/margins": 0.05615914613008499, |
|
"rewards/rejected": 0.010541360825300217, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": -2.037087917327881, |
|
"logits/rejected": -2.035101890563965, |
|
"logps/chosen": -32.13945388793945, |
|
"logps/rejected": -31.313283920288086, |
|
"loss": 0.8961, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.07096613943576813, |
|
"rewards/margins": 0.10391455888748169, |
|
"rewards/rejected": -0.032948415726423264, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.231482982635498, |
|
"eval_logits/rejected": -2.226637363433838, |
|
"eval_logps/chosen": -34.02524185180664, |
|
"eval_logps/rejected": -37.54085159301758, |
|
"eval_loss": 0.9766585230827332, |
|
"eval_rewards/accuracies": 0.5365448594093323, |
|
"eval_rewards/chosen": 0.006516099441796541, |
|
"eval_rewards/margins": 0.02347717247903347, |
|
"eval_rewards/rejected": -0.016961071640253067, |
|
"eval_runtime": 145.8279, |
|
"eval_samples_per_second": 2.352, |
|
"eval_steps_per_second": 0.295, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": -1.9920060634613037, |
|
"logits/rejected": -1.9896419048309326, |
|
"logps/chosen": -33.146766662597656, |
|
"logps/rejected": -34.02008819580078, |
|
"loss": 0.9486, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0679345652461052, |
|
"rewards/margins": 0.061437882483005524, |
|
"rewards/rejected": 0.006496679037809372, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": -2.003952741622925, |
|
"logits/rejected": -1.9956319332122803, |
|
"logps/chosen": -32.33639144897461, |
|
"logps/rejected": -32.133079528808594, |
|
"loss": 0.9488, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.07595954835414886, |
|
"rewards/margins": 0.05606143921613693, |
|
"rewards/rejected": 0.019898109138011932, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": -2.0316150188446045, |
|
"logits/rejected": -2.0236544609069824, |
|
"logps/chosen": -30.298206329345703, |
|
"logps/rejected": -32.07080841064453, |
|
"loss": 0.8911, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.12425784766674042, |
|
"rewards/margins": 0.1367165446281433, |
|
"rewards/rejected": -0.0124586820602417, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": -1.962376356124878, |
|
"logits/rejected": -1.9726108312606812, |
|
"logps/chosen": -31.235275268554688, |
|
"logps/rejected": -32.56925964355469, |
|
"loss": 0.8524, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.1290682703256607, |
|
"rewards/margins": 0.15313370525836945, |
|
"rewards/rejected": -0.024065453559160233, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": -1.8732143640518188, |
|
"logits/rejected": -1.874371886253357, |
|
"logps/chosen": -33.8985481262207, |
|
"logps/rejected": -34.81908416748047, |
|
"loss": 0.786, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.20590214431285858, |
|
"rewards/margins": 0.24918103218078613, |
|
"rewards/rejected": -0.04327889531850815, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": -1.924538016319275, |
|
"logits/rejected": -1.921121597290039, |
|
"logps/chosen": -36.01353454589844, |
|
"logps/rejected": -32.723262786865234, |
|
"loss": 0.8894, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.11069967597723007, |
|
"rewards/margins": 0.11105670034885406, |
|
"rewards/rejected": -0.0003570284752640873, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": -2.0247209072113037, |
|
"logits/rejected": -2.017392635345459, |
|
"logps/chosen": -33.49879455566406, |
|
"logps/rejected": -31.44363784790039, |
|
"loss": 0.7434, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.2093469649553299, |
|
"rewards/margins": 0.2735980153083801, |
|
"rewards/rejected": -0.06425107270479202, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": -2.031324863433838, |
|
"logits/rejected": -2.0365915298461914, |
|
"logps/chosen": -32.253074645996094, |
|
"logps/rejected": -32.45112609863281, |
|
"loss": 0.8312, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.20420043170452118, |
|
"rewards/margins": 0.18039169907569885, |
|
"rewards/rejected": 0.023808732628822327, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": -2.0317559242248535, |
|
"logits/rejected": -2.0289719104766846, |
|
"logps/chosen": -31.279537200927734, |
|
"logps/rejected": -31.34115219116211, |
|
"loss": 0.8405, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.14659160375595093, |
|
"rewards/margins": 0.16264860332012177, |
|
"rewards/rejected": -0.016057008877396584, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": -1.9018064737319946, |
|
"logits/rejected": -1.9064457416534424, |
|
"logps/chosen": -31.301830291748047, |
|
"logps/rejected": -32.8339729309082, |
|
"loss": 0.7699, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.21311891078948975, |
|
"rewards/margins": 0.2432461678981781, |
|
"rewards/rejected": -0.03012726828455925, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.2282192707061768, |
|
"eval_logits/rejected": -2.223379611968994, |
|
"eval_logps/chosen": -34.05467224121094, |
|
"eval_logps/rejected": -37.57374572753906, |
|
"eval_loss": 0.974229633808136, |
|
"eval_rewards/accuracies": 0.530315637588501, |
|
"eval_rewards/chosen": -0.01408342458307743, |
|
"eval_rewards/margins": 0.02590302750468254, |
|
"eval_rewards/rejected": -0.03998645395040512, |
|
"eval_runtime": 145.7826, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": -2.014596462249756, |
|
"logits/rejected": -2.0252418518066406, |
|
"logps/chosen": -31.797557830810547, |
|
"logps/rejected": -33.982398986816406, |
|
"loss": 0.8139, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.12162177264690399, |
|
"rewards/margins": 0.20541362464427948, |
|
"rewards/rejected": -0.08379185199737549, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": -1.9068737030029297, |
|
"logits/rejected": -1.9216482639312744, |
|
"logps/chosen": -29.838830947875977, |
|
"logps/rejected": -31.62994956970215, |
|
"loss": 0.766, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.1908968687057495, |
|
"rewards/margins": 0.24113738536834717, |
|
"rewards/rejected": -0.05024053901433945, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": -1.9636850357055664, |
|
"logits/rejected": -1.967655897140503, |
|
"logps/chosen": -33.12433624267578, |
|
"logps/rejected": -31.637094497680664, |
|
"loss": 0.7833, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.20519034564495087, |
|
"rewards/margins": 0.2666449546813965, |
|
"rewards/rejected": -0.06145460531115532, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": -1.9611847400665283, |
|
"logits/rejected": -1.9393657445907593, |
|
"logps/chosen": -33.867958068847656, |
|
"logps/rejected": -35.12390899658203, |
|
"loss": 0.737, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.2000524252653122, |
|
"rewards/margins": 0.32123422622680664, |
|
"rewards/rejected": -0.12118180096149445, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": -2.001960277557373, |
|
"logits/rejected": -1.9986454248428345, |
|
"logps/chosen": -32.73499298095703, |
|
"logps/rejected": -36.28093719482422, |
|
"loss": 0.8434, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.13304933905601501, |
|
"rewards/margins": 0.17225751280784607, |
|
"rewards/rejected": -0.03920816630125046, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": -1.8687576055526733, |
|
"logits/rejected": -1.866320013999939, |
|
"logps/chosen": -33.981781005859375, |
|
"logps/rejected": -35.54584503173828, |
|
"loss": 0.8296, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.14463281631469727, |
|
"rewards/margins": 0.1734902262687683, |
|
"rewards/rejected": -0.028857415542006493, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": -1.853939414024353, |
|
"logits/rejected": -1.8515437841415405, |
|
"logps/chosen": -34.234046936035156, |
|
"logps/rejected": -31.837631225585938, |
|
"loss": 0.8487, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.11448518931865692, |
|
"rewards/margins": 0.16089434921741486, |
|
"rewards/rejected": -0.04640916362404823, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": -1.9570705890655518, |
|
"logits/rejected": -1.9465986490249634, |
|
"logps/chosen": -35.030006408691406, |
|
"logps/rejected": -31.88030433654785, |
|
"loss": 0.7553, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.22864773869514465, |
|
"rewards/margins": 0.2561652660369873, |
|
"rewards/rejected": -0.02751758135855198, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": -2.0522685050964355, |
|
"logits/rejected": -2.0373730659484863, |
|
"logps/chosen": -30.7352352142334, |
|
"logps/rejected": -32.61699676513672, |
|
"loss": 0.9072, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.13222160935401917, |
|
"rewards/margins": 0.10783363878726959, |
|
"rewards/rejected": 0.024387964978814125, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": -1.9228973388671875, |
|
"logits/rejected": -1.9203764200210571, |
|
"logps/chosen": -32.44710159301758, |
|
"logps/rejected": -30.934436798095703, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.3360074460506439, |
|
"rewards/margins": 0.418344646692276, |
|
"rewards/rejected": -0.08233721554279327, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.2234153747558594, |
|
"eval_logits/rejected": -2.218602180480957, |
|
"eval_logps/chosen": -34.08680725097656, |
|
"eval_logps/rejected": -37.60466003417969, |
|
"eval_loss": 0.976102888584137, |
|
"eval_rewards/accuracies": 0.529900312423706, |
|
"eval_rewards/chosen": -0.036580219864845276, |
|
"eval_rewards/margins": 0.02504708059132099, |
|
"eval_rewards/rejected": -0.06162729859352112, |
|
"eval_runtime": 145.7665, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": -1.9082481861114502, |
|
"logits/rejected": -1.9050118923187256, |
|
"logps/chosen": -31.349285125732422, |
|
"logps/rejected": -33.84658432006836, |
|
"loss": 0.7796, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.17784307897090912, |
|
"rewards/margins": 0.2509470283985138, |
|
"rewards/rejected": -0.07310393452644348, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": -1.9580894708633423, |
|
"logits/rejected": -1.9458973407745361, |
|
"logps/chosen": -34.3031005859375, |
|
"logps/rejected": -33.67659378051758, |
|
"loss": 0.7302, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.2050826996564865, |
|
"rewards/margins": 0.3015114367008209, |
|
"rewards/rejected": -0.09642868489027023, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": -1.9932842254638672, |
|
"logits/rejected": -1.9918495416641235, |
|
"logps/chosen": -33.17847442626953, |
|
"logps/rejected": -32.54157638549805, |
|
"loss": 0.7677, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.21512338519096375, |
|
"rewards/margins": 0.2585209906101227, |
|
"rewards/rejected": -0.04339758679270744, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": -2.0800719261169434, |
|
"logits/rejected": -2.064396381378174, |
|
"logps/chosen": -33.80484390258789, |
|
"logps/rejected": -33.1123046875, |
|
"loss": 0.7636, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.2621825039386749, |
|
"rewards/margins": 0.25817227363586426, |
|
"rewards/rejected": 0.004010227043181658, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": -1.9522559642791748, |
|
"logits/rejected": -1.951424241065979, |
|
"logps/chosen": -32.8499755859375, |
|
"logps/rejected": -32.56407165527344, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.3024839758872986, |
|
"rewards/margins": 0.3717316687107086, |
|
"rewards/rejected": -0.06924761831760406, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": -1.9075695276260376, |
|
"logits/rejected": -1.917851209640503, |
|
"logps/chosen": -31.882221221923828, |
|
"logps/rejected": -35.31555938720703, |
|
"loss": 0.7689, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.22572879493236542, |
|
"rewards/margins": 0.24449090659618378, |
|
"rewards/rejected": -0.018762132152915, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": -2.0478641986846924, |
|
"logits/rejected": -2.041414737701416, |
|
"logps/chosen": -33.331912994384766, |
|
"logps/rejected": -29.259756088256836, |
|
"loss": 0.7658, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.2240387201309204, |
|
"rewards/margins": 0.2473684549331665, |
|
"rewards/rejected": -0.02332974039018154, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": -1.9081246852874756, |
|
"logits/rejected": -1.9103105068206787, |
|
"logps/chosen": -33.882568359375, |
|
"logps/rejected": -30.96805191040039, |
|
"loss": 0.741, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.24953576922416687, |
|
"rewards/margins": 0.3163323998451233, |
|
"rewards/rejected": -0.06679664552211761, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.8438688600218142, |
|
"train_runtime": 3250.9917, |
|
"train_samples_per_second": 0.947, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|