|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": -1.866413950920105, |
|
"logits/rejected": -1.8707411289215088, |
|
"logps/chosen": -36.98916244506836, |
|
"logps/rejected": -33.67436981201172, |
|
"loss": 0.6701, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": 0.01569323241710663, |
|
"rewards/margins": 0.05555717274546623, |
|
"rewards/rejected": -0.039863936603069305, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": -1.9979650974273682, |
|
"logits/rejected": -2.0006086826324463, |
|
"logps/chosen": -29.624820709228516, |
|
"logps/rejected": -29.0762939453125, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.01563635841012001, |
|
"rewards/margins": 0.027204299345612526, |
|
"rewards/rejected": -0.01156794372946024, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -1.921021819114685, |
|
"logits/rejected": -1.9183374643325806, |
|
"logps/chosen": -31.40532875061035, |
|
"logps/rejected": -33.23241424560547, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.00968973059207201, |
|
"rewards/margins": 0.022251319140195847, |
|
"rewards/rejected": -0.012561586685478687, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": -2.0176353454589844, |
|
"logits/rejected": -2.008906364440918, |
|
"logps/chosen": -32.574256896972656, |
|
"logps/rejected": -32.53368377685547, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0022967704571783543, |
|
"rewards/margins": 0.02120940014719963, |
|
"rewards/rejected": -0.018912632018327713, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": -1.8619186878204346, |
|
"logits/rejected": -1.85114324092865, |
|
"logps/chosen": -33.55537414550781, |
|
"logps/rejected": -35.45675277709961, |
|
"loss": 0.6957, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.001892436295747757, |
|
"rewards/margins": 0.005858602002263069, |
|
"rewards/rejected": -0.003966164775192738, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": -1.9400945901870728, |
|
"logits/rejected": -1.9420464038848877, |
|
"logps/chosen": -32.56509780883789, |
|
"logps/rejected": -33.2406120300293, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.031578924506902695, |
|
"rewards/margins": 0.09388783574104309, |
|
"rewards/rejected": -0.062308914959430695, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": -2.0712790489196777, |
|
"logits/rejected": -2.0762436389923096, |
|
"logps/chosen": -33.981910705566406, |
|
"logps/rejected": -36.62363815307617, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.005918038543313742, |
|
"rewards/margins": 0.05520814657211304, |
|
"rewards/rejected": -0.04929010197520256, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": -1.9327905178070068, |
|
"logits/rejected": -1.935909628868103, |
|
"logps/chosen": -34.32685470581055, |
|
"logps/rejected": -34.65606689453125, |
|
"loss": 0.639, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.09085920453071594, |
|
"rewards/margins": 0.14815348386764526, |
|
"rewards/rejected": -0.057294271886348724, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": -1.9414918422698975, |
|
"logits/rejected": -1.946007490158081, |
|
"logps/chosen": -32.406803131103516, |
|
"logps/rejected": -32.36021041870117, |
|
"loss": 0.6792, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.054556868970394135, |
|
"rewards/margins": 0.05573350936174393, |
|
"rewards/rejected": -0.0011766403913497925, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": -2.039034128189087, |
|
"logits/rejected": -2.0370402336120605, |
|
"logps/chosen": -32.172786712646484, |
|
"logps/rejected": -31.333194732666016, |
|
"loss": 0.6464, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.06124376505613327, |
|
"rewards/margins": 0.12152798473834991, |
|
"rewards/rejected": -0.06028420478105545, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.2339773178100586, |
|
"eval_logits/rejected": -2.229137420654297, |
|
"eval_logps/chosen": -34.04054641723633, |
|
"eval_logps/rejected": -37.549957275390625, |
|
"eval_loss": 0.6902773976325989, |
|
"eval_rewards/accuracies": 0.5685215592384338, |
|
"eval_rewards/chosen": -0.005393954925239086, |
|
"eval_rewards/margins": 0.024608083069324493, |
|
"eval_rewards/rejected": -0.030002037063241005, |
|
"eval_runtime": 146.034, |
|
"eval_samples_per_second": 2.349, |
|
"eval_steps_per_second": 0.294, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": -1.994192123413086, |
|
"logits/rejected": -1.9918158054351807, |
|
"logps/chosen": -33.142940521240234, |
|
"logps/rejected": -34.01188278198242, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.09078876674175262, |
|
"rewards/margins": 0.07505009323358536, |
|
"rewards/rejected": 0.015738680958747864, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": -2.0053954124450684, |
|
"logits/rejected": -1.997046709060669, |
|
"logps/chosen": -32.33894348144531, |
|
"logps/rejected": -32.1308708190918, |
|
"loss": 0.6746, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.09536493569612503, |
|
"rewards/margins": 0.06779730319976807, |
|
"rewards/rejected": 0.027567636221647263, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": -2.0336387157440186, |
|
"logits/rejected": -2.025650978088379, |
|
"logps/chosen": -30.345691680908203, |
|
"logps/rejected": -32.078697204589844, |
|
"loss": 0.6527, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.11702337116003036, |
|
"rewards/margins": 0.14014457166194916, |
|
"rewards/rejected": -0.023121213540434837, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": -1.9642337560653687, |
|
"logits/rejected": -1.9744552373886108, |
|
"logps/chosen": -31.243911743164062, |
|
"logps/rejected": -32.590267181396484, |
|
"loss": 0.6171, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.1581769436597824, |
|
"rewards/margins": 0.20802685618400574, |
|
"rewards/rejected": -0.04984992742538452, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": -1.876604437828064, |
|
"logits/rejected": -1.8777605295181274, |
|
"logps/chosen": -33.938690185546875, |
|
"logps/rejected": -34.807891845703125, |
|
"loss": 0.6043, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.22860188782215118, |
|
"rewards/margins": 0.2741745412349701, |
|
"rewards/rejected": -0.0455726757645607, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": -1.9282041788101196, |
|
"logits/rejected": -1.9247684478759766, |
|
"logps/chosen": -36.02125930786133, |
|
"logps/rejected": -32.71831130981445, |
|
"loss": 0.6454, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.13537634909152985, |
|
"rewards/margins": 0.13137592375278473, |
|
"rewards/rejected": 0.004000450484454632, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": -2.029125928878784, |
|
"logits/rejected": -2.0217747688293457, |
|
"logps/chosen": -33.49839401245117, |
|
"logps/rejected": -31.400177001953125, |
|
"loss": 0.5828, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.26951926946640015, |
|
"rewards/margins": 0.3130132555961609, |
|
"rewards/rejected": -0.04349397122859955, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": -2.0355944633483887, |
|
"logits/rejected": -2.040832042694092, |
|
"logps/chosen": -32.235923767089844, |
|
"logps/rejected": -32.460418701171875, |
|
"loss": 0.5943, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.2779761850833893, |
|
"rewards/margins": 0.2557251751422882, |
|
"rewards/rejected": 0.02225096896290779, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": -2.0362112522125244, |
|
"logits/rejected": -2.0334599018096924, |
|
"logps/chosen": -31.269250869750977, |
|
"logps/rejected": -31.325435638427734, |
|
"loss": 0.6245, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.19773444533348083, |
|
"rewards/margins": 0.20423230528831482, |
|
"rewards/rejected": -0.0064978525042533875, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": -1.9060389995574951, |
|
"logits/rejected": -1.9106788635253906, |
|
"logps/chosen": -31.306299209594727, |
|
"logps/rejected": -32.81407165527344, |
|
"loss": 0.5931, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.2699825167655945, |
|
"rewards/margins": 0.2908058166503906, |
|
"rewards/rejected": -0.02082330361008644, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.231553792953491, |
|
"eval_logits/rejected": -2.2267112731933594, |
|
"eval_logps/chosen": -34.07304763793945, |
|
"eval_logps/rejected": -37.57693862915039, |
|
"eval_loss": 0.6979728937149048, |
|
"eval_rewards/accuracies": 0.5157807469367981, |
|
"eval_rewards/chosen": -0.03464451804757118, |
|
"eval_rewards/margins": 0.019641490653157234, |
|
"eval_rewards/rejected": -0.054286014288663864, |
|
"eval_runtime": 145.8095, |
|
"eval_samples_per_second": 2.352, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": -2.018519163131714, |
|
"logits/rejected": -2.0291810035705566, |
|
"logps/chosen": -31.742992401123047, |
|
"logps/rejected": -33.946937561035156, |
|
"loss": 0.5902, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.2054794579744339, |
|
"rewards/margins": 0.2812942862510681, |
|
"rewards/rejected": -0.07581482082605362, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": -1.911586046218872, |
|
"logits/rejected": -1.9263393878936768, |
|
"logps/chosen": -29.84616470336914, |
|
"logps/rejected": -31.615009307861328, |
|
"loss": 0.5879, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.23883743584156036, |
|
"rewards/margins": 0.2899848222732544, |
|
"rewards/rejected": -0.051147449761629105, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": -1.9677941799163818, |
|
"logits/rejected": -1.9717823266983032, |
|
"logps/chosen": -33.100074768066406, |
|
"logps/rejected": -31.62213134765625, |
|
"loss": 0.5748, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.28565075993537903, |
|
"rewards/margins": 0.3511958718299866, |
|
"rewards/rejected": -0.06554517149925232, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": -1.9661725759506226, |
|
"logits/rejected": -1.944300651550293, |
|
"logps/chosen": -33.841453552246094, |
|
"logps/rejected": -35.11375045776367, |
|
"loss": 0.5473, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.2810631990432739, |
|
"rewards/margins": 0.4277234673500061, |
|
"rewards/rejected": -0.14666026830673218, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": -2.007416009902954, |
|
"logits/rejected": -2.0040948390960693, |
|
"logps/chosen": -32.70330810546875, |
|
"logps/rejected": -36.29412841796875, |
|
"loss": 0.5992, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.1995842456817627, |
|
"rewards/margins": 0.2618715763092041, |
|
"rewards/rejected": -0.06228730082511902, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": -1.8749721050262451, |
|
"logits/rejected": -1.8725513219833374, |
|
"logps/chosen": -34.00068664550781, |
|
"logps/rejected": -35.53888702392578, |
|
"loss": 0.6254, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.16894161701202393, |
|
"rewards/margins": 0.1997825801372528, |
|
"rewards/rejected": -0.030840963125228882, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": -1.8600317239761353, |
|
"logits/rejected": -1.8576066493988037, |
|
"logps/chosen": -34.1875, |
|
"logps/rejected": -31.8159122467041, |
|
"loss": 0.616, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.1890900433063507, |
|
"rewards/margins": 0.22921428084373474, |
|
"rewards/rejected": -0.04012420028448105, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": -1.9631398916244507, |
|
"logits/rejected": -1.9526073932647705, |
|
"logps/chosen": -35.023719787597656, |
|
"logps/rejected": -31.869693756103516, |
|
"loss": 0.5782, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.29963088035583496, |
|
"rewards/margins": 0.32546472549438477, |
|
"rewards/rejected": -0.025833839550614357, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": -2.0582926273345947, |
|
"logits/rejected": -2.0433640480041504, |
|
"logps/chosen": -30.733753204345703, |
|
"logps/rejected": -32.67460632324219, |
|
"loss": 0.6392, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.17133468389511108, |
|
"rewards/margins": 0.19182677567005157, |
|
"rewards/rejected": -0.020492086187005043, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": -1.929610013961792, |
|
"logits/rejected": -1.9270601272583008, |
|
"logps/chosen": -32.42620086669922, |
|
"logps/rejected": -30.873455047607422, |
|
"loss": 0.5301, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.450817346572876, |
|
"rewards/margins": 0.5018006563186646, |
|
"rewards/rejected": -0.050983332097530365, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.229154348373413, |
|
"eval_logits/rejected": -2.2243051528930664, |
|
"eval_logps/chosen": -34.09621810913086, |
|
"eval_logps/rejected": -37.59999084472656, |
|
"eval_loss": 0.6972895860671997, |
|
"eval_rewards/accuracies": 0.5390365719795227, |
|
"eval_rewards/chosen": -0.05550166219472885, |
|
"eval_rewards/margins": 0.019528048112988472, |
|
"eval_rewards/rejected": -0.07502970844507217, |
|
"eval_runtime": 145.7792, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": -1.9142345190048218, |
|
"logits/rejected": -1.9109809398651123, |
|
"logps/chosen": -31.33791732788086, |
|
"logps/rejected": -33.82014465332031, |
|
"loss": 0.5861, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.23888680338859558, |
|
"rewards/margins": 0.30907896161079407, |
|
"rewards/rejected": -0.07019217312335968, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": -1.9650068283081055, |
|
"logits/rejected": -1.9527791738510132, |
|
"logps/chosen": -34.34791946411133, |
|
"logps/rejected": -33.650447845458984, |
|
"loss": 0.5828, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.22334297001361847, |
|
"rewards/margins": 0.3237887918949127, |
|
"rewards/rejected": -0.10044582933187485, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": -2.00040602684021, |
|
"logits/rejected": -1.9989902973175049, |
|
"logps/chosen": -33.210105895996094, |
|
"logps/rejected": -32.56142807006836, |
|
"loss": 0.5803, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.24812059104442596, |
|
"rewards/margins": 0.32178014516830444, |
|
"rewards/rejected": -0.07365953922271729, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": -2.0870866775512695, |
|
"logits/rejected": -2.0713772773742676, |
|
"logps/chosen": -33.80995178222656, |
|
"logps/rejected": -33.120697021484375, |
|
"loss": 0.5723, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3324963450431824, |
|
"rewards/margins": 0.33488941192626953, |
|
"rewards/rejected": -0.0023930787574499846, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": -1.959240198135376, |
|
"logits/rejected": -1.9583876132965088, |
|
"logps/chosen": -32.863216400146484, |
|
"logps/rejected": -32.54397201538086, |
|
"loss": 0.5523, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.37699171900749207, |
|
"rewards/margins": 0.44793614745140076, |
|
"rewards/rejected": -0.0709443911910057, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": -1.9147189855575562, |
|
"logits/rejected": -1.9250224828720093, |
|
"logps/chosen": -31.902795791625977, |
|
"logps/rejected": -35.3552131652832, |
|
"loss": 0.5743, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.2717086672782898, |
|
"rewards/margins": 0.33151620626449585, |
|
"rewards/rejected": -0.05980752781033516, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": -2.05413556098938, |
|
"logits/rejected": -2.047651767730713, |
|
"logps/chosen": -33.377376556396484, |
|
"logps/rejected": -29.2799072265625, |
|
"loss": 0.5801, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.24713313579559326, |
|
"rewards/margins": 0.2952673137187958, |
|
"rewards/rejected": -0.04813414067029953, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": -1.9141871929168701, |
|
"logits/rejected": -1.9163949489593506, |
|
"logps/chosen": -33.87698745727539, |
|
"logps/rejected": -30.976858139038086, |
|
"loss": 0.5489, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.32585546374320984, |
|
"rewards/margins": 0.4196627140045166, |
|
"rewards/rejected": -0.09380728751420975, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6175476637753573, |
|
"train_runtime": 3252.7839, |
|
"train_samples_per_second": 0.947, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|