|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-08, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2820512820512818e-07, |
|
"logits/chosen": -1.866068720817566, |
|
"logits/rejected": -1.8703863620758057, |
|
"logps/chosen": -36.99105453491211, |
|
"logps/rejected": -33.65992736816406, |
|
"loss": 0.9728, |
|
"rewards/accuracies": 0.5138888955116272, |
|
"rewards/chosen": 0.009325359016656876, |
|
"rewards/margins": 0.027240369468927383, |
|
"rewards/rejected": -0.017915012314915657, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5641025641025636e-07, |
|
"logits/chosen": -1.9970887899398804, |
|
"logits/rejected": -1.9997413158416748, |
|
"logps/chosen": -29.63595199584961, |
|
"logps/rejected": -29.070571899414062, |
|
"loss": 0.992, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.003744622226804495, |
|
"rewards/margins": 0.008021654561161995, |
|
"rewards/rejected": -0.004277031868696213, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": -1.9195963144302368, |
|
"logits/rejected": -1.9168994426727295, |
|
"logps/chosen": -31.414175033569336, |
|
"logps/rejected": -33.25041961669922, |
|
"loss": 0.9797, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0011531396303325891, |
|
"rewards/margins": 0.02032935619354248, |
|
"rewards/rejected": -0.019176214933395386, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438433e-07, |
|
"logits/chosen": -2.01688814163208, |
|
"logits/rejected": -2.0081212520599365, |
|
"logps/chosen": -32.583614349365234, |
|
"logps/rejected": -32.48966979980469, |
|
"loss": 1.0179, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.004085154738277197, |
|
"rewards/margins": -0.017886672168970108, |
|
"rewards/rejected": 0.013801517896354198, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542186e-07, |
|
"logits/chosen": -1.8644500970840454, |
|
"logits/rejected": -1.853655219078064, |
|
"logps/chosen": -33.54804229736328, |
|
"logps/rejected": -35.43767166137695, |
|
"loss": 1.0031, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.005664288066327572, |
|
"rewards/margins": -0.0031387731432914734, |
|
"rewards/rejected": 0.00880306214094162, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941118e-07, |
|
"logits/chosen": -1.9449504613876343, |
|
"logits/rejected": -1.946890115737915, |
|
"logps/chosen": -32.58049774169922, |
|
"logps/rejected": -33.199378967285156, |
|
"loss": 0.9714, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.011811850592494011, |
|
"rewards/margins": 0.02860717847943306, |
|
"rewards/rejected": -0.016795331612229347, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413548e-07, |
|
"logits/chosen": -2.0795464515686035, |
|
"logits/rejected": -2.0845184326171875, |
|
"logps/chosen": -33.98798370361328, |
|
"logps/rejected": -36.569732666015625, |
|
"loss": 0.9992, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0003018675488419831, |
|
"rewards/margins": 0.0008206713828258216, |
|
"rewards/rejected": -0.0005188033101148903, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-07, |
|
"logits/chosen": -1.9425585269927979, |
|
"logits/rejected": -1.945704698562622, |
|
"logps/chosen": -34.408721923828125, |
|
"logps/rejected": -34.581504821777344, |
|
"loss": 0.9951, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.011452903971076012, |
|
"rewards/margins": 0.0049109989777207375, |
|
"rewards/rejected": 0.006541903130710125, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.736716601303429e-07, |
|
"logits/chosen": -1.9505088329315186, |
|
"logits/rejected": -1.9550243616104126, |
|
"logps/chosen": -32.451255798339844, |
|
"logps/rejected": -32.366790771484375, |
|
"loss": 0.9856, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.009697502478957176, |
|
"rewards/margins": 0.014425436034798622, |
|
"rewards/rejected": -0.0047279344871640205, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.62624545834521e-07, |
|
"logits/chosen": -2.048947334289551, |
|
"logits/rejected": -2.046935796737671, |
|
"logps/chosen": -32.25363540649414, |
|
"logps/rejected": -31.277917861938477, |
|
"loss": 1.0007, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.007680465932935476, |
|
"rewards/margins": -0.0006578800384886563, |
|
"rewards/rejected": -0.007022587116807699, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.243957042694092, |
|
"eval_logits/rejected": -2.2390689849853516, |
|
"eval_logps/chosen": -34.019203186035156, |
|
"eval_logps/rejected": -37.50347900390625, |
|
"eval_loss": 0.998858630657196, |
|
"eval_rewards/accuracies": 0.5402824282646179, |
|
"eval_rewards/chosen": 0.009209612384438515, |
|
"eval_rewards/margins": 0.0013267018366605043, |
|
"eval_rewards/rejected": 0.00788290984928608, |
|
"eval_runtime": 145.9923, |
|
"eval_samples_per_second": 2.349, |
|
"eval_steps_per_second": 0.295, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4982572012636904e-07, |
|
"logits/chosen": -2.005302667617798, |
|
"logits/rejected": -2.002894878387451, |
|
"logps/chosen": -33.23944854736328, |
|
"logps/rejected": -34.005104064941406, |
|
"loss": 1.0119, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.002622986678034067, |
|
"rewards/margins": -0.01194014959037304, |
|
"rewards/rejected": 0.014563137665390968, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777677e-07, |
|
"logits/chosen": -2.0167059898376465, |
|
"logits/rejected": -2.0083398818969727, |
|
"logps/chosen": -32.45713424682617, |
|
"logps/rejected": -32.17597198486328, |
|
"loss": 0.9987, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0073361145332455635, |
|
"rewards/margins": 0.0013455990701913834, |
|
"rewards/rejected": -0.008681714534759521, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.194082707715275e-07, |
|
"logits/chosen": -2.0464558601379395, |
|
"logits/rejected": -2.038412094116211, |
|
"logps/chosen": -30.482311248779297, |
|
"logps/rejected": -32.04933166503906, |
|
"loss": 1.0062, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.003956255037337542, |
|
"rewards/margins": -0.006162940990179777, |
|
"rewards/rejected": 0.002206685720011592, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020402418666621e-07, |
|
"logits/chosen": -1.9769861698150635, |
|
"logits/rejected": -1.9872652292251587, |
|
"logps/chosen": -31.415592193603516, |
|
"logps/rejected": -32.5562858581543, |
|
"loss": 0.9847, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0024404728319495916, |
|
"rewards/margins": 0.015285758301615715, |
|
"rewards/rejected": -0.01284528523683548, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8341962650351185e-07, |
|
"logits/chosen": -1.890750527381897, |
|
"logits/rejected": -1.8918195962905884, |
|
"logps/chosen": -34.196632385253906, |
|
"logps/rejected": -34.74388885498047, |
|
"loss": 1.0104, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.002364098560065031, |
|
"rewards/margins": -0.0103833619505167, |
|
"rewards/rejected": 0.008019264787435532, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800572e-07, |
|
"logits/chosen": -1.9424158334732056, |
|
"logits/rejected": -1.9389365911483765, |
|
"logps/chosen": -36.13062286376953, |
|
"logps/rejected": -32.720314025878906, |
|
"loss": 0.9768, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.02463117241859436, |
|
"rewards/margins": 0.023165332153439522, |
|
"rewards/rejected": 0.0014658428262919188, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.430433172111807e-07, |
|
"logits/chosen": -2.04191517829895, |
|
"logits/rejected": -2.034518003463745, |
|
"logps/chosen": -33.780067443847656, |
|
"logps/rejected": -31.35748863220215, |
|
"loss": 0.9859, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.010676576755940914, |
|
"rewards/margins": 0.014060018584132195, |
|
"rewards/rejected": -0.0033834422938525677, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.216202642830543e-07, |
|
"logits/chosen": -2.0474770069122314, |
|
"logits/rejected": -2.0527572631835938, |
|
"logps/chosen": -32.51685333251953, |
|
"logps/rejected": -32.48542022705078, |
|
"loss": 0.9831, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.01676076650619507, |
|
"rewards/margins": 0.016928378492593765, |
|
"rewards/rejected": -0.00016761067672632635, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.9960716642946403e-07, |
|
"logits/chosen": -2.0486884117126465, |
|
"logits/rejected": -2.045897960662842, |
|
"logps/chosen": -31.490026473999023, |
|
"logps/rejected": -31.336299896240234, |
|
"loss": 0.9898, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0006444025784730911, |
|
"rewards/margins": 0.010206506587564945, |
|
"rewards/rejected": -0.010850909166038036, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.771853789806683e-07, |
|
"logits/chosen": -1.9185329675674438, |
|
"logits/rejected": -1.9231984615325928, |
|
"logps/chosen": -31.579875946044922, |
|
"logps/rejected": -32.781455993652344, |
|
"loss": 0.9898, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.015846019610762596, |
|
"rewards/margins": 0.010161412879824638, |
|
"rewards/rejected": 0.005684606730937958, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.2439582347869873, |
|
"eval_logits/rejected": -2.239088296890259, |
|
"eval_logps/chosen": -34.01380920410156, |
|
"eval_logps/rejected": -37.500911712646484, |
|
"eval_loss": 0.9969704151153564, |
|
"eval_rewards/accuracies": 0.5307309031486511, |
|
"eval_rewards/chosen": 0.01244510430842638, |
|
"eval_rewards/margins": 0.0030194728169590235, |
|
"eval_rewards/rejected": 0.009425631724298, |
|
"eval_runtime": 145.8589, |
|
"eval_samples_per_second": 2.352, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402e-07, |
|
"logits/chosen": -2.0318081378936768, |
|
"logits/rejected": -2.0424768924713135, |
|
"logps/chosen": -31.95905113220215, |
|
"logps/rejected": -33.87731170654297, |
|
"loss": 0.9839, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.007351105101406574, |
|
"rewards/margins": 0.01611880213022232, |
|
"rewards/rejected": -0.008767696097493172, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.318564697655179e-07, |
|
"logits/chosen": -1.9253685474395752, |
|
"logits/rejected": -1.940233826637268, |
|
"logps/chosen": -30.08770179748535, |
|
"logps/rejected": -31.577301025390625, |
|
"loss": 0.9742, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.014303619973361492, |
|
"rewards/margins": 0.025777745991945267, |
|
"rewards/rejected": -0.011474122293293476, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.093227910899832e-07, |
|
"logits/chosen": -1.9832963943481445, |
|
"logits/rejected": -1.9872655868530273, |
|
"logps/chosen": -33.39131546020508, |
|
"logps/rejected": -31.548086166381836, |
|
"loss": 0.985, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.015694385394454002, |
|
"rewards/margins": 0.014964587986469269, |
|
"rewards/rejected": 0.0007297966512851417, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279356e-07, |
|
"logits/chosen": -1.9828859567642212, |
|
"logits/rejected": -1.9609191417694092, |
|
"logps/chosen": -34.16459655761719, |
|
"logps/rejected": -34.95134735107422, |
|
"loss": 1.0062, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.0065087564289569855, |
|
"rewards/margins": -0.006178082898259163, |
|
"rewards/rejected": -0.0003306727739982307, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.654436768970182e-07, |
|
"logits/chosen": -2.0246593952178955, |
|
"logits/rejected": -2.0213539600372314, |
|
"logps/chosen": -32.9222412109375, |
|
"logps/rejected": -36.228416442871094, |
|
"loss": 0.9962, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0016961356159299612, |
|
"rewards/margins": 0.0037924889475107193, |
|
"rewards/rejected": -0.0020963551942259073, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.444597403062196e-07, |
|
"logits/chosen": -1.8918907642364502, |
|
"logits/rejected": -1.8894577026367188, |
|
"logps/chosen": -34.189327239990234, |
|
"logps/rejected": -35.505496978759766, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0005569729837588966, |
|
"rewards/margins": -2.999706157424953e-05, |
|
"rewards/rejected": -0.0005269756657071412, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2434529917578887e-07, |
|
"logits/chosen": -1.8758213520050049, |
|
"logits/rejected": -1.8732786178588867, |
|
"logps/chosen": -34.38149642944336, |
|
"logps/rejected": -31.752059936523438, |
|
"loss": 1.0019, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.009660609066486359, |
|
"rewards/margins": -0.0019017171580344439, |
|
"rewards/rejected": 0.011562327854335308, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603521e-07, |
|
"logits/chosen": -1.9798027276992798, |
|
"logits/rejected": -1.9691746234893799, |
|
"logps/chosen": -35.328853607177734, |
|
"logps/rejected": -31.868383407592773, |
|
"loss": 0.9669, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.016671547666192055, |
|
"rewards/margins": 0.03310702368617058, |
|
"rewards/rejected": -0.016435474157333374, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071453e-08, |
|
"logits/chosen": -2.075648546218872, |
|
"logits/rejected": -2.0605978965759277, |
|
"logps/chosen": -30.926809310913086, |
|
"logps/rejected": -32.621070861816406, |
|
"loss": 1.0201, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.0016108205309137702, |
|
"rewards/margins": -0.020070534199476242, |
|
"rewards/rejected": 0.018459713086485863, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-08, |
|
"logits/chosen": -1.9466993808746338, |
|
"logits/rejected": -1.9441711902618408, |
|
"logps/chosen": -32.90770721435547, |
|
"logps/rejected": -30.82305335998535, |
|
"loss": 0.9846, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.011643724516034126, |
|
"rewards/margins": 0.015391023829579353, |
|
"rewards/rejected": -0.0037472969852387905, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.243670701980591, |
|
"eval_logits/rejected": -2.2387943267822266, |
|
"eval_logps/chosen": -34.02626419067383, |
|
"eval_logps/rejected": -37.50523376464844, |
|
"eval_loss": 1.001994013786316, |
|
"eval_rewards/accuracies": 0.4634551703929901, |
|
"eval_rewards/chosen": 0.0049731116741895676, |
|
"eval_rewards/margins": -0.0018590801628306508, |
|
"eval_rewards/rejected": 0.006832191254943609, |
|
"eval_runtime": 145.8336, |
|
"eval_samples_per_second": 2.352, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.576113578589034e-08, |
|
"logits/chosen": -1.9287983179092407, |
|
"logits/rejected": -1.9255239963531494, |
|
"logps/chosen": -31.573007583618164, |
|
"logps/rejected": -33.742637634277344, |
|
"loss": 0.9815, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.018206708133220673, |
|
"rewards/margins": 0.018496429547667503, |
|
"rewards/rejected": -0.0002897246740758419, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.229036944380912e-08, |
|
"logits/chosen": -1.9805524349212646, |
|
"logits/rejected": -1.9682422876358032, |
|
"logps/chosen": -34.57670211791992, |
|
"logps/rejected": -33.561363220214844, |
|
"loss": 0.9749, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.011624744161963463, |
|
"rewards/margins": 0.0251374039798975, |
|
"rewards/rejected": -0.013512656092643738, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.053082288996112e-08, |
|
"logits/chosen": -2.016091823577881, |
|
"logits/rejected": -2.0146238803863525, |
|
"logps/chosen": -33.47222137451172, |
|
"logps/rejected": -32.46577835083008, |
|
"loss": 1.0001, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.008143280632793903, |
|
"rewards/margins": -0.0001393534184899181, |
|
"rewards/rejected": 0.008282631635665894, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.05793773749158e-08, |
|
"logits/chosen": -2.1032156944274902, |
|
"logits/rejected": -2.0874197483062744, |
|
"logps/chosen": -34.174861907958984, |
|
"logps/rejected": -33.088890075683594, |
|
"loss": 1.0148, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.002716648392379284, |
|
"rewards/margins": -0.014771336689591408, |
|
"rewards/rejected": 0.017487986013293266, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.251801807404168e-08, |
|
"logits/chosen": -1.9747329950332642, |
|
"logits/rejected": -1.9737837314605713, |
|
"logps/chosen": -33.25817108154297, |
|
"logps/rejected": -32.459861755371094, |
|
"loss": 0.9888, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.014356844127178192, |
|
"rewards/margins": 0.01118617132306099, |
|
"rewards/rejected": 0.003170671407133341, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.41315865106129e-09, |
|
"logits/chosen": -1.9303538799285889, |
|
"logits/rejected": -1.94071364402771, |
|
"logps/chosen": -32.20769500732422, |
|
"logps/rejected": -35.273780822753906, |
|
"loss": 1.0108, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0018019669223576784, |
|
"rewards/margins": -0.010786842554807663, |
|
"rewards/rejected": 0.008984875865280628, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3150941078050324e-09, |
|
"logits/chosen": -2.069509744644165, |
|
"logits/rejected": -2.0629658699035645, |
|
"logps/chosen": -33.647117614746094, |
|
"logps/rejected": -29.217060089111328, |
|
"loss": 1.0027, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.002911838237196207, |
|
"rewards/margins": -0.0027081891894340515, |
|
"rewards/rejected": 0.005620025563985109, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.575864278703266e-10, |
|
"logits/chosen": -1.929449439048767, |
|
"logits/rejected": -1.9315989017486572, |
|
"logps/chosen": -34.25099563598633, |
|
"logps/rejected": -30.906116485595703, |
|
"loss": 0.9871, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.007165629416704178, |
|
"rewards/margins": 0.012927901931107044, |
|
"rewards/rejected": -0.020093530416488647, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.9932638019710393, |
|
"train_runtime": 3254.1177, |
|
"train_samples_per_second": 0.946, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|