|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-08, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 0.3906, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2820512820512818e-07, |
|
"logits/chosen": -1.8662992715835571, |
|
"logits/rejected": -1.8706117868423462, |
|
"logps/chosen": -36.97681427001953, |
|
"logps/rejected": -33.66523361206055, |
|
"loss": 0.3447, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": 0.023829741403460503, |
|
"rewards/margins": 0.05195777863264084, |
|
"rewards/rejected": -0.028128040954470634, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5641025641025636e-07, |
|
"logits/chosen": -1.997193694114685, |
|
"logits/rejected": -1.9998573064804077, |
|
"logps/chosen": -29.65359878540039, |
|
"logps/rejected": -29.054311752319336, |
|
"loss": 0.4534, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.009124360978603363, |
|
"rewards/margins": -0.016431041061878204, |
|
"rewards/rejected": 0.007306680083274841, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": -1.9197280406951904, |
|
"logits/rejected": -1.9170429706573486, |
|
"logps/chosen": -31.414775848388672, |
|
"logps/rejected": -33.24064254760742, |
|
"loss": 0.4025, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0010542668169364333, |
|
"rewards/margins": 0.018799806013703346, |
|
"rewards/rejected": -0.01774553768336773, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438433e-07, |
|
"logits/chosen": -2.0171010494232178, |
|
"logits/rejected": -2.008350372314453, |
|
"logps/chosen": -32.59648513793945, |
|
"logps/rejected": -32.50862121582031, |
|
"loss": 0.4532, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.015740757808089256, |
|
"rewards/margins": -0.018979396671056747, |
|
"rewards/rejected": 0.0032386414241045713, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542186e-07, |
|
"logits/chosen": -1.8645904064178467, |
|
"logits/rejected": -1.8538070917129517, |
|
"logps/chosen": -33.542823791503906, |
|
"logps/rejected": -35.43744659423828, |
|
"loss": 0.4304, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.011724205687642097, |
|
"rewards/margins": -0.00019102543592453003, |
|
"rewards/rejected": 0.011915231123566628, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941118e-07, |
|
"logits/chosen": -1.9461469650268555, |
|
"logits/rejected": -1.948094129562378, |
|
"logps/chosen": -32.587215423583984, |
|
"logps/rejected": -33.21445846557617, |
|
"loss": 0.3539, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.010374858975410461, |
|
"rewards/margins": 0.04483799636363983, |
|
"rewards/rejected": -0.03446313738822937, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413548e-07, |
|
"logits/chosen": -2.079591989517212, |
|
"logits/rejected": -2.0845742225646973, |
|
"logps/chosen": -34.00908279418945, |
|
"logps/rejected": -36.58150863647461, |
|
"loss": 0.4506, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.01648041605949402, |
|
"rewards/margins": -0.006370754446834326, |
|
"rewards/rejected": -0.010109663009643555, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-07, |
|
"logits/chosen": -1.9423307180404663, |
|
"logits/rejected": -1.9454963207244873, |
|
"logps/chosen": -34.39698028564453, |
|
"logps/rejected": -34.59761428833008, |
|
"loss": 0.3889, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.024663064628839493, |
|
"rewards/margins": 0.028829574584960938, |
|
"rewards/rejected": -0.00416650902479887, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.736716601303429e-07, |
|
"logits/chosen": -1.9508873224258423, |
|
"logits/rejected": -1.9553953409194946, |
|
"logps/chosen": -32.48583221435547, |
|
"logps/rejected": -32.35867691040039, |
|
"loss": 0.4445, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.014726865105330944, |
|
"rewards/margins": -0.014909917488694191, |
|
"rewards/rejected": 0.00018304325931239873, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.62624545834521e-07, |
|
"logits/chosen": -2.0491719245910645, |
|
"logits/rejected": -2.047179937362671, |
|
"logps/chosen": -32.22635269165039, |
|
"logps/rejected": -31.287487030029297, |
|
"loss": 0.3805, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.011584864929318428, |
|
"rewards/margins": 0.02860759198665619, |
|
"rewards/rejected": -0.01702272891998291, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.244148015975952, |
|
"eval_logits/rejected": -2.2392663955688477, |
|
"eval_logps/chosen": -34.01713943481445, |
|
"eval_logps/rejected": -37.514495849609375, |
|
"eval_loss": 0.4061162769794464, |
|
"eval_rewards/accuracies": 0.5394518375396729, |
|
"eval_rewards/chosen": 0.013928660191595554, |
|
"eval_rewards/margins": 0.012230273336172104, |
|
"eval_rewards/rejected": 0.0016983875539153814, |
|
"eval_runtime": 146.1091, |
|
"eval_samples_per_second": 2.348, |
|
"eval_steps_per_second": 0.294, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4982572012636904e-07, |
|
"logits/chosen": -2.005580186843872, |
|
"logits/rejected": -2.0031564235687256, |
|
"logps/chosen": -33.24415588378906, |
|
"logps/rejected": -33.99993133544922, |
|
"loss": 0.4792, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.00026987865567207336, |
|
"rewards/margins": -0.02382112666964531, |
|
"rewards/rejected": 0.023551244288682938, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777677e-07, |
|
"logits/chosen": -2.01680326461792, |
|
"logits/rejected": -2.0084121227264404, |
|
"logps/chosen": -32.45465850830078, |
|
"logps/rejected": -32.1729850769043, |
|
"loss": 0.4286, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.007803081069141626, |
|
"rewards/margins": 0.0013800703454762697, |
|
"rewards/rejected": -0.009183152578771114, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.194082707715275e-07, |
|
"logits/chosen": -2.0466856956481934, |
|
"logits/rejected": -2.0386409759521484, |
|
"logps/chosen": -30.494335174560547, |
|
"logps/rejected": -32.051963806152344, |
|
"loss": 0.4497, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.014895597472786903, |
|
"rewards/margins": -0.015729816630482674, |
|
"rewards/rejected": 0.0008342192741110921, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020402418666621e-07, |
|
"logits/chosen": -1.977065086364746, |
|
"logits/rejected": -1.987343430519104, |
|
"logps/chosen": -31.37868309020996, |
|
"logps/rejected": -32.54730224609375, |
|
"loss": 0.3589, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.03278205543756485, |
|
"rewards/margins": 0.04271895810961723, |
|
"rewards/rejected": -0.009936909191310406, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8341962650351185e-07, |
|
"logits/chosen": -1.8916466236114502, |
|
"logits/rejected": -1.8927490711212158, |
|
"logps/chosen": -34.209651947021484, |
|
"logps/rejected": -34.766143798828125, |
|
"loss": 0.4325, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.013569498434662819, |
|
"rewards/margins": -0.00645996630191803, |
|
"rewards/rejected": -0.007109532598406076, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800572e-07, |
|
"logits/chosen": -1.9427303075790405, |
|
"logits/rejected": -1.939252495765686, |
|
"logps/chosen": -36.14452362060547, |
|
"logps/rejected": -32.73284149169922, |
|
"loss": 0.3796, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.021723434329032898, |
|
"rewards/margins": 0.02979486621916294, |
|
"rewards/rejected": -0.008071433752775192, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.430433172111807e-07, |
|
"logits/chosen": -2.0424513816833496, |
|
"logits/rejected": -2.035060405731201, |
|
"logps/chosen": -33.786170959472656, |
|
"logps/rejected": -31.34820556640625, |
|
"loss": 0.4172, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.009350698441267014, |
|
"rewards/margins": 0.006434415467083454, |
|
"rewards/rejected": 0.0029162843711674213, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.216202642830543e-07, |
|
"logits/chosen": -2.0475661754608154, |
|
"logits/rejected": -2.052834987640381, |
|
"logps/chosen": -32.528114318847656, |
|
"logps/rejected": -32.50902557373047, |
|
"loss": 0.3766, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.013338183984160423, |
|
"rewards/margins": 0.03244578838348389, |
|
"rewards/rejected": -0.019107606261968613, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.9960716642946403e-07, |
|
"logits/chosen": -2.048661231994629, |
|
"logits/rejected": -2.045872926712036, |
|
"logps/chosen": -31.496755599975586, |
|
"logps/rejected": -31.314464569091797, |
|
"loss": 0.4475, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00624021515250206, |
|
"rewards/margins": -0.009242123924195766, |
|
"rewards/rejected": 0.003001909703016281, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.771853789806683e-07, |
|
"logits/chosen": -1.9193611145019531, |
|
"logits/rejected": -1.9240529537200928, |
|
"logps/chosen": -31.579212188720703, |
|
"logps/rejected": -32.80603790283203, |
|
"loss": 0.371, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.02165621519088745, |
|
"rewards/margins": 0.033739686012268066, |
|
"rewards/rejected": -0.012083468958735466, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.243856191635132, |
|
"eval_logits/rejected": -2.2389791011810303, |
|
"eval_logps/chosen": -34.023136138916016, |
|
"eval_logps/rejected": -37.501853942871094, |
|
"eval_loss": 0.4322855770587921, |
|
"eval_rewards/accuracies": 0.5253322720527649, |
|
"eval_rewards/chosen": 0.009133166633546352, |
|
"eval_rewards/margins": -0.002679171971976757, |
|
"eval_rewards/rejected": 0.01181233860552311, |
|
"eval_runtime": 145.9618, |
|
"eval_samples_per_second": 2.35, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402e-07, |
|
"logits/chosen": -2.0320448875427246, |
|
"logits/rejected": -2.0427393913269043, |
|
"logps/chosen": -31.947372436523438, |
|
"logps/rejected": -33.899864196777344, |
|
"loss": 0.3622, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.019143681973218918, |
|
"rewards/margins": 0.04887578636407852, |
|
"rewards/rejected": -0.029732098802924156, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.318564697655179e-07, |
|
"logits/chosen": -1.9257709980010986, |
|
"logits/rejected": -1.940639853477478, |
|
"logps/chosen": -30.079341888427734, |
|
"logps/rejected": -31.5616397857666, |
|
"loss": 0.3884, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.025758206844329834, |
|
"rewards/margins": 0.028527002781629562, |
|
"rewards/rejected": -0.0027687971014529467, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.093227910899832e-07, |
|
"logits/chosen": -1.9829959869384766, |
|
"logits/rejected": -1.9869670867919922, |
|
"logps/chosen": -33.4053955078125, |
|
"logps/rejected": -31.562353134155273, |
|
"loss": 0.3918, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.009657363407313824, |
|
"rewards/margins": 0.02010050043463707, |
|
"rewards/rejected": -0.010443134233355522, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279356e-07, |
|
"logits/chosen": -1.9836658239364624, |
|
"logits/rejected": -1.9616800546646118, |
|
"logps/chosen": -34.175201416015625, |
|
"logps/rejected": -34.966102600097656, |
|
"loss": 0.4346, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.017159918323159218, |
|
"rewards/margins": -0.004913450218737125, |
|
"rewards/rejected": -0.012246469967067242, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.654436768970182e-07, |
|
"logits/chosen": -2.0244596004486084, |
|
"logits/rejected": -2.021144390106201, |
|
"logps/chosen": -32.932373046875, |
|
"logps/rejected": -36.2264289855957, |
|
"loss": 0.4397, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.005845203995704651, |
|
"rewards/margins": -0.004640025552362204, |
|
"rewards/rejected": -0.001205177279189229, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.444597403062196e-07, |
|
"logits/chosen": -1.891283392906189, |
|
"logits/rejected": -1.8888483047485352, |
|
"logps/chosen": -34.20048522949219, |
|
"logps/rejected": -35.507022857666016, |
|
"loss": 0.4441, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.009669994935393333, |
|
"rewards/margins": -0.0077504729852080345, |
|
"rewards/rejected": -0.0019195213681086898, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2434529917578887e-07, |
|
"logits/chosen": -1.8760111331939697, |
|
"logits/rejected": -1.8734772205352783, |
|
"logps/chosen": -34.39020919799805, |
|
"logps/rejected": -31.75579261779785, |
|
"loss": 0.4295, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.0059113698080182076, |
|
"rewards/margins": -0.006519269198179245, |
|
"rewards/rejected": 0.012430639937520027, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603521e-07, |
|
"logits/chosen": -1.9797089099884033, |
|
"logits/rejected": -1.9690707921981812, |
|
"logps/chosen": -35.31258773803711, |
|
"logps/rejected": -31.837697982788086, |
|
"loss": 0.3868, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.03524213656783104, |
|
"rewards/margins": 0.0326063297688961, |
|
"rewards/rejected": 0.0026358035393059254, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071453e-08, |
|
"logits/chosen": -2.0755209922790527, |
|
"logits/rejected": -2.060497760772705, |
|
"logps/chosen": -30.90865135192871, |
|
"logps/rejected": -32.64521026611328, |
|
"loss": 0.41, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.012379061430692673, |
|
"rewards/margins": 0.007080032490193844, |
|
"rewards/rejected": 0.005299028940498829, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-08, |
|
"logits/chosen": -1.9463651180267334, |
|
"logits/rejected": -1.9438308477401733, |
|
"logps/chosen": -32.89823532104492, |
|
"logps/rejected": -30.81850814819336, |
|
"loss": 0.3839, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.02309919334948063, |
|
"rewards/margins": 0.024458223953843117, |
|
"rewards/rejected": -0.0013590289745479822, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.2435548305511475, |
|
"eval_logits/rejected": -2.2386720180511475, |
|
"eval_logps/chosen": -34.023826599121094, |
|
"eval_logps/rejected": -37.50039291381836, |
|
"eval_loss": 0.4344586431980133, |
|
"eval_rewards/accuracies": 0.4808970093727112, |
|
"eval_rewards/chosen": 0.00858243927359581, |
|
"eval_rewards/margins": -0.004399063065648079, |
|
"eval_rewards/rejected": 0.012981505133211613, |
|
"eval_runtime": 145.9473, |
|
"eval_samples_per_second": 2.35, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.576113578589034e-08, |
|
"logits/chosen": -1.9286285638809204, |
|
"logits/rejected": -1.9253568649291992, |
|
"logps/chosen": -31.583232879638672, |
|
"logps/rejected": -33.75123596191406, |
|
"loss": 0.3881, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.016091840341687202, |
|
"rewards/margins": 0.023359118029475212, |
|
"rewards/rejected": -0.007267280016094446, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.229036944380912e-08, |
|
"logits/chosen": -1.9803975820541382, |
|
"logits/rejected": -1.9680900573730469, |
|
"logps/chosen": -34.58079147338867, |
|
"logps/rejected": -33.575767517089844, |
|
"loss": 0.3778, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.012225466780364513, |
|
"rewards/margins": 0.041770923882722855, |
|
"rewards/rejected": -0.029545456171035767, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.053082288996112e-08, |
|
"logits/chosen": -2.015634298324585, |
|
"logits/rejected": -2.0141713619232178, |
|
"logps/chosen": -33.45996856689453, |
|
"logps/rejected": -32.48029327392578, |
|
"loss": 0.4032, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.020660031586885452, |
|
"rewards/margins": 0.021225089207291603, |
|
"rewards/rejected": -0.0005650619277730584, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.05793773749158e-08, |
|
"logits/chosen": -2.1035873889923096, |
|
"logits/rejected": -2.0877768993377686, |
|
"logps/chosen": -34.162208557128906, |
|
"logps/rejected": -33.095733642578125, |
|
"loss": 0.439, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.013746557757258415, |
|
"rewards/margins": -0.004091509617865086, |
|
"rewards/rejected": 0.017838066443800926, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.251801807404168e-08, |
|
"logits/chosen": -1.9746344089508057, |
|
"logits/rejected": -1.9736888408660889, |
|
"logps/chosen": -33.253448486328125, |
|
"logps/rejected": -32.458499908447266, |
|
"loss": 0.3954, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.02291743829846382, |
|
"rewards/margins": 0.017600122839212418, |
|
"rewards/rejected": 0.005317316390573978, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.41315865106129e-09, |
|
"logits/chosen": -1.9309253692626953, |
|
"logits/rejected": -1.941303014755249, |
|
"logps/chosen": -32.21305465698242, |
|
"logps/rejected": -35.309688568115234, |
|
"loss": 0.4095, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.006690672133117914, |
|
"rewards/margins": 0.010053041391074657, |
|
"rewards/rejected": -0.01674371212720871, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3150941078050324e-09, |
|
"logits/chosen": -2.069108009338379, |
|
"logits/rejected": -2.0625574588775635, |
|
"logps/chosen": -33.637176513671875, |
|
"logps/rejected": -29.221187591552734, |
|
"loss": 0.4059, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.011833530850708485, |
|
"rewards/margins": 0.00764369685202837, |
|
"rewards/rejected": 0.004189834464341402, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.575864278703266e-10, |
|
"logits/chosen": -1.9292182922363281, |
|
"logits/rejected": -1.9313886165618896, |
|
"logps/chosen": -34.24732971191406, |
|
"logps/rejected": -30.893402099609375, |
|
"loss": 0.4086, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.006622877903282642, |
|
"rewards/margins": 0.009996414184570312, |
|
"rewards/rejected": -0.01661929115653038, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4092799360101873, |
|
"train_runtime": 3256.9261, |
|
"train_samples_per_second": 0.945, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|