|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9989258861439313, |
|
"eval_steps": 100000, |
|
"global_step": 465, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010741138560687433, |
|
"grad_norm": 94.5147817778946, |
|
"learning_rate": 8.51063829787234e-08, |
|
"logits/chosen": -10.583702087402344, |
|
"logits/rejected": -10.455877304077148, |
|
"logps/chosen": -0.9049979448318481, |
|
"logps/rejected": -0.8784100413322449, |
|
"loss": 6.1451, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -9.04997730255127, |
|
"rewards/margins": -0.2658771872520447, |
|
"rewards/rejected": -8.784101486206055, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.021482277121374866, |
|
"grad_norm": 128.5515421485228, |
|
"learning_rate": 1.702127659574468e-07, |
|
"logits/chosen": -10.710015296936035, |
|
"logits/rejected": -10.85377311706543, |
|
"logps/chosen": -1.0046945810317993, |
|
"logps/rejected": -0.8850045204162598, |
|
"loss": 5.8491, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -10.04694652557373, |
|
"rewards/margins": -1.196901559829712, |
|
"rewards/rejected": -8.850046157836914, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0322234156820623, |
|
"grad_norm": 58.802331595913145, |
|
"learning_rate": 2.553191489361702e-07, |
|
"logits/chosen": -10.312850952148438, |
|
"logits/rejected": -10.239133834838867, |
|
"logps/chosen": -1.0889472961425781, |
|
"logps/rejected": -1.1543949842453003, |
|
"loss": 6.2505, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -10.889472961425781, |
|
"rewards/margins": 0.6544777154922485, |
|
"rewards/rejected": -11.543951034545898, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04296455424274973, |
|
"grad_norm": 151.21348799898024, |
|
"learning_rate": 3.404255319148936e-07, |
|
"logits/chosen": -9.954164505004883, |
|
"logits/rejected": -10.053568840026855, |
|
"logps/chosen": -0.9611791372299194, |
|
"logps/rejected": -1.1332345008850098, |
|
"loss": 5.5619, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -9.611791610717773, |
|
"rewards/margins": 1.7205528020858765, |
|
"rewards/rejected": -11.332345008850098, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05370569280343716, |
|
"grad_norm": 99.92865956439873, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -10.326103210449219, |
|
"logits/rejected": -10.055009841918945, |
|
"logps/chosen": -0.8260948061943054, |
|
"logps/rejected": -1.1549828052520752, |
|
"loss": 5.2635, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -8.260948181152344, |
|
"rewards/margins": 3.2888808250427246, |
|
"rewards/rejected": -11.54982852935791, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0644468313641246, |
|
"grad_norm": 81.6600880763309, |
|
"learning_rate": 5.106382978723404e-07, |
|
"logits/chosen": -9.319940567016602, |
|
"logits/rejected": -9.13192081451416, |
|
"logps/chosen": -0.6618553996086121, |
|
"logps/rejected": -0.6553267240524292, |
|
"loss": 5.0518, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -6.61855411529541, |
|
"rewards/margins": -0.06528709828853607, |
|
"rewards/rejected": -6.553267002105713, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07518796992481203, |
|
"grad_norm": 92.79131753018717, |
|
"learning_rate": 5.957446808510638e-07, |
|
"logits/chosen": -8.877812385559082, |
|
"logits/rejected": -8.929550170898438, |
|
"logps/chosen": -0.717892050743103, |
|
"logps/rejected": -0.6935927867889404, |
|
"loss": 5.224, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -7.178920745849609, |
|
"rewards/margins": -0.24299363791942596, |
|
"rewards/rejected": -6.935927391052246, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08592910848549946, |
|
"grad_norm": 89.40050001716304, |
|
"learning_rate": 6.808510638297872e-07, |
|
"logits/chosen": -7.988096714019775, |
|
"logits/rejected": -7.907191276550293, |
|
"logps/chosen": -0.7402302622795105, |
|
"logps/rejected": -0.7434382438659668, |
|
"loss": 4.8885, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -7.4023027420043945, |
|
"rewards/margins": 0.032080501317977905, |
|
"rewards/rejected": -7.434383392333984, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0966702470461869, |
|
"grad_norm": 59.92644904113339, |
|
"learning_rate": 7.659574468085107e-07, |
|
"logits/chosen": -8.71805477142334, |
|
"logits/rejected": -8.232014656066895, |
|
"logps/chosen": -0.5317873954772949, |
|
"logps/rejected": -0.6050616502761841, |
|
"loss": 4.5879, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -5.317873477935791, |
|
"rewards/margins": 0.7327424883842468, |
|
"rewards/rejected": -6.0506157875061035, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10741138560687433, |
|
"grad_norm": 60.25452206880678, |
|
"learning_rate": 7.998983280184396e-07, |
|
"logits/chosen": -8.83049488067627, |
|
"logits/rejected": -8.585375785827637, |
|
"logps/chosen": -0.5144228339195251, |
|
"logps/rejected": -0.5809676647186279, |
|
"loss": 4.6549, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -5.144228458404541, |
|
"rewards/margins": 0.6654484868049622, |
|
"rewards/rejected": -5.8096771240234375, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11815252416756176, |
|
"grad_norm": 46.997605934809734, |
|
"learning_rate": 7.992771864078597e-07, |
|
"logits/chosen": -8.163946151733398, |
|
"logits/rejected": -8.180994033813477, |
|
"logps/chosen": -0.5956984758377075, |
|
"logps/rejected": -0.7000880837440491, |
|
"loss": 4.6606, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -5.956984519958496, |
|
"rewards/margins": 1.0438958406448364, |
|
"rewards/rejected": -7.000881195068359, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1288936627282492, |
|
"grad_norm": 59.25357465703395, |
|
"learning_rate": 7.980922636120897e-07, |
|
"logits/chosen": -8.718216896057129, |
|
"logits/rejected": -8.35698127746582, |
|
"logps/chosen": -0.5706155896186829, |
|
"logps/rejected": -0.6969493627548218, |
|
"loss": 4.4885, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -5.706155776977539, |
|
"rewards/margins": 1.2633379697799683, |
|
"rewards/rejected": -6.969493865966797, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13963480128893663, |
|
"grad_norm": 50.79780612404402, |
|
"learning_rate": 7.963452327474534e-07, |
|
"logits/chosen": -9.234804153442383, |
|
"logits/rejected": -9.1095609664917, |
|
"logps/chosen": -0.6090906858444214, |
|
"logps/rejected": -0.7208055257797241, |
|
"loss": 4.666, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -6.090908050537109, |
|
"rewards/margins": 1.1171473264694214, |
|
"rewards/rejected": -7.208055019378662, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.15037593984962405, |
|
"grad_norm": 56.36564641791114, |
|
"learning_rate": 7.940385606293987e-07, |
|
"logits/chosen": -8.946883201599121, |
|
"logits/rejected": -8.716778755187988, |
|
"logps/chosen": -0.6818052530288696, |
|
"logps/rejected": -0.7961267828941345, |
|
"loss": 4.577, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -6.818052768707275, |
|
"rewards/margins": 1.1432150602340698, |
|
"rewards/rejected": -7.961267948150635, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1611170784103115, |
|
"grad_norm": 61.76930510948969, |
|
"learning_rate": 7.911755042893434e-07, |
|
"logits/chosen": -9.067525863647461, |
|
"logits/rejected": -8.9346923828125, |
|
"logps/chosen": -0.6832990646362305, |
|
"logps/rejected": -0.7763570547103882, |
|
"loss": 4.4179, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -6.8329901695251465, |
|
"rewards/margins": 0.9305804371833801, |
|
"rewards/rejected": -7.763571262359619, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17185821697099893, |
|
"grad_norm": 63.602633074853536, |
|
"learning_rate": 7.877601063757321e-07, |
|
"logits/chosen": -9.461370468139648, |
|
"logits/rejected": -8.981520652770996, |
|
"logps/chosen": -0.6881433129310608, |
|
"logps/rejected": -0.8508684039115906, |
|
"loss": 4.3763, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -6.881433010101318, |
|
"rewards/margins": 1.6272509098052979, |
|
"rewards/rejected": -8.508684158325195, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18259935553168635, |
|
"grad_norm": 50.67613033462041, |
|
"learning_rate": 7.837971894457989e-07, |
|
"logits/chosen": -9.557887077331543, |
|
"logits/rejected": -9.17081069946289, |
|
"logps/chosen": -0.6830392479896545, |
|
"logps/rejected": -0.799291729927063, |
|
"loss": 4.6499, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -6.830392360687256, |
|
"rewards/margins": 1.1625245809555054, |
|
"rewards/rejected": -7.992917060852051, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1933404940923738, |
|
"grad_norm": 57.58777625940833, |
|
"learning_rate": 7.792923491560942e-07, |
|
"logits/chosen": -8.579484939575195, |
|
"logits/rejected": -8.546136856079102, |
|
"logps/chosen": -0.6667743921279907, |
|
"logps/rejected": -0.7407978177070618, |
|
"loss": 4.4492, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -6.667743682861328, |
|
"rewards/margins": 0.7402342557907104, |
|
"rewards/rejected": -7.407977104187012, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 68.91179001810359, |
|
"learning_rate": 7.742519463613926e-07, |
|
"logits/chosen": -9.336307525634766, |
|
"logits/rejected": -9.128133773803711, |
|
"logps/chosen": -0.706219494342804, |
|
"logps/rejected": -0.7757526636123657, |
|
"loss": 4.2763, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -7.062193870544434, |
|
"rewards/margins": 0.6953321099281311, |
|
"rewards/rejected": -7.7575273513793945, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21482277121374865, |
|
"grad_norm": 140.43504047927686, |
|
"learning_rate": 7.68683098133138e-07, |
|
"logits/chosen": -8.939419746398926, |
|
"logits/rejected": -8.681028366088867, |
|
"logps/chosen": -0.7093919515609741, |
|
"logps/rejected": -0.8932901620864868, |
|
"loss": 4.4002, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -7.093919277191162, |
|
"rewards/margins": 1.838982343673706, |
|
"rewards/rejected": -8.932901382446289, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22556390977443608, |
|
"grad_norm": 105.52949175741638, |
|
"learning_rate": 7.625936677101051e-07, |
|
"logits/chosen": -8.601816177368164, |
|
"logits/rejected": -8.625459671020508, |
|
"logps/chosen": -0.8767679333686829, |
|
"logps/rejected": -0.8515819311141968, |
|
"loss": 4.4644, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -8.767679214477539, |
|
"rewards/margins": -0.25185948610305786, |
|
"rewards/rejected": -8.51581859588623, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23630504833512353, |
|
"grad_norm": 56.44051818244315, |
|
"learning_rate": 7.559922533954731e-07, |
|
"logits/chosen": -9.58240795135498, |
|
"logits/rejected": -9.501542091369629, |
|
"logps/chosen": -0.7655607461929321, |
|
"logps/rejected": -0.8916142582893372, |
|
"loss": 4.2797, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -7.6556077003479, |
|
"rewards/margins": 1.2605348825454712, |
|
"rewards/rejected": -8.916143417358398, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24704618689581095, |
|
"grad_norm": 68.9497539150874, |
|
"learning_rate": 7.488881764159808e-07, |
|
"logits/chosen": -9.756335258483887, |
|
"logits/rejected": -9.543218612670898, |
|
"logps/chosen": -0.7038711309432983, |
|
"logps/rejected": -0.7986757159233093, |
|
"loss": 4.154, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -7.0387115478515625, |
|
"rewards/margins": 0.9480463862419128, |
|
"rewards/rejected": -7.986758232116699, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2577873254564984, |
|
"grad_norm": 90.20504733911939, |
|
"learning_rate": 7.412914677603135e-07, |
|
"logits/chosen": -9.883420944213867, |
|
"logits/rejected": -9.735390663146973, |
|
"logps/chosen": -0.9017173647880554, |
|
"logps/rejected": -0.9996153116226196, |
|
"loss": 4.2168, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -9.017173767089844, |
|
"rewards/margins": 0.9789786338806152, |
|
"rewards/rejected": -9.996152877807617, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26852846401718583, |
|
"grad_norm": 63.31580116626401, |
|
"learning_rate": 7.332128540153017e-07, |
|
"logits/chosen": -10.71928596496582, |
|
"logits/rejected": -10.555776596069336, |
|
"logps/chosen": -0.788918673992157, |
|
"logps/rejected": -0.9437187910079956, |
|
"loss": 4.1085, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -7.889185905456543, |
|
"rewards/margins": 1.5480016469955444, |
|
"rewards/rejected": -9.437189102172852, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27926960257787325, |
|
"grad_norm": 81.18347953083857, |
|
"learning_rate": 7.246637422199322e-07, |
|
"logits/chosen": -10.676037788391113, |
|
"logits/rejected": -10.630210876464844, |
|
"logps/chosen": -0.8381876945495605, |
|
"logps/rejected": -1.0221493244171143, |
|
"loss": 4.0857, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -8.381875991821289, |
|
"rewards/margins": 1.8396151065826416, |
|
"rewards/rejected": -10.221491813659668, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2900107411385607, |
|
"grad_norm": 80.90445716094146, |
|
"learning_rate": 7.156562037585574e-07, |
|
"logits/chosen": -11.714326858520508, |
|
"logits/rejected": -11.1636323928833, |
|
"logps/chosen": -0.8452903628349304, |
|
"logps/rejected": -1.1077954769134521, |
|
"loss": 3.9406, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -8.452905654907227, |
|
"rewards/margins": 2.6250510215759277, |
|
"rewards/rejected": -11.07795524597168, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3007518796992481, |
|
"grad_norm": 88.8085364984583, |
|
"learning_rate": 7.062029573160467e-07, |
|
"logits/chosen": -11.935297012329102, |
|
"logits/rejected": -11.792046546936035, |
|
"logps/chosen": -0.9109989404678345, |
|
"logps/rejected": -1.1446388959884644, |
|
"loss": 3.6921, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -9.109989166259766, |
|
"rewards/margins": 2.3363993167877197, |
|
"rewards/rejected": -11.446390151977539, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31149301825993553, |
|
"grad_norm": 110.78828702618637, |
|
"learning_rate": 6.963173509189455e-07, |
|
"logits/chosen": -13.552042007446289, |
|
"logits/rejected": -13.324705123901367, |
|
"logps/chosen": -1.0502710342407227, |
|
"logps/rejected": -1.2693006992340088, |
|
"loss": 3.8236, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -10.502711296081543, |
|
"rewards/margins": 2.190295696258545, |
|
"rewards/rejected": -12.69300651550293, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.322234156820623, |
|
"grad_norm": 111.15417892636452, |
|
"learning_rate": 6.860133430880024e-07, |
|
"logits/chosen": -14.586761474609375, |
|
"logits/rejected": -14.23077392578125, |
|
"logps/chosen": -1.2444875240325928, |
|
"logps/rejected": -1.4712624549865723, |
|
"loss": 3.7527, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -12.444875717163086, |
|
"rewards/margins": 2.2677478790283203, |
|
"rewards/rejected": -14.712623596191406, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33297529538131043, |
|
"grad_norm": 119.28100769496392, |
|
"learning_rate": 6.753054831286747e-07, |
|
"logits/chosen": -14.823234558105469, |
|
"logits/rejected": -14.770757675170898, |
|
"logps/chosen": -1.3944904804229736, |
|
"logps/rejected": -1.6155385971069336, |
|
"loss": 3.893, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -13.944903373718262, |
|
"rewards/margins": 2.2104804515838623, |
|
"rewards/rejected": -16.155384063720703, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.34371643394199786, |
|
"grad_norm": 103.61310969210844, |
|
"learning_rate": 6.642088905874433e-07, |
|
"logits/chosen": -14.195696830749512, |
|
"logits/rejected": -14.07690143585205, |
|
"logps/chosen": -1.3757129907608032, |
|
"logps/rejected": -1.5891997814178467, |
|
"loss": 3.7363, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -13.75713062286377, |
|
"rewards/margins": 2.134868860244751, |
|
"rewards/rejected": -15.891998291015625, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3544575725026853, |
|
"grad_norm": 95.46872943421629, |
|
"learning_rate": 6.527392339029455e-07, |
|
"logits/chosen": -14.401777267456055, |
|
"logits/rejected": -14.322749137878418, |
|
"logps/chosen": -1.2530758380889893, |
|
"logps/rejected": -1.5336121320724487, |
|
"loss": 3.4763, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -12.530759811401367, |
|
"rewards/margins": 2.8053627014160156, |
|
"rewards/rejected": -15.336122512817383, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3651987110633727, |
|
"grad_norm": 119.23452912777815, |
|
"learning_rate": 6.409127082820689e-07, |
|
"logits/chosen": -14.566454887390137, |
|
"logits/rejected": -14.484842300415039, |
|
"logps/chosen": -1.531166434288025, |
|
"logps/rejected": -1.7996248006820679, |
|
"loss": 3.6554, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -15.311663627624512, |
|
"rewards/margins": 2.6845829486846924, |
|
"rewards/rejected": -17.996248245239258, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.37593984962406013, |
|
"grad_norm": 123.93953756068933, |
|
"learning_rate": 6.287460128322457e-07, |
|
"logits/chosen": -14.157377243041992, |
|
"logits/rejected": -14.0371675491333, |
|
"logps/chosen": -1.5080561637878418, |
|
"logps/rejected": -1.9248558282852173, |
|
"loss": 3.4374, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -15.080561637878418, |
|
"rewards/margins": 4.167994022369385, |
|
"rewards/rejected": -19.24855613708496, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3866809881847476, |
|
"grad_norm": 223.710665367037, |
|
"learning_rate": 6.16256326982239e-07, |
|
"logits/chosen": -16.03777313232422, |
|
"logits/rejected": -16.100345611572266, |
|
"logps/chosen": -1.5300877094268799, |
|
"logps/rejected": -1.8575336933135986, |
|
"loss": 3.4606, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -15.300875663757324, |
|
"rewards/margins": 3.2744598388671875, |
|
"rewards/rejected": -18.575336456298828, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39742212674543503, |
|
"grad_norm": 294.4348098673441, |
|
"learning_rate": 6.034612862247114e-07, |
|
"logits/chosen": -14.142799377441406, |
|
"logits/rejected": -13.796422958374023, |
|
"logps/chosen": -1.5025275945663452, |
|
"logps/rejected": -1.7810484170913696, |
|
"loss": 3.1185, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -15.025274276733398, |
|
"rewards/margins": 2.7852089405059814, |
|
"rewards/rejected": -17.810483932495117, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 112.37773385751002, |
|
"learning_rate": 5.903789572148295e-07, |
|
"logits/chosen": -14.8009614944458, |
|
"logits/rejected": -14.250249862670898, |
|
"logps/chosen": -1.5931546688079834, |
|
"logps/rejected": -2.022378444671631, |
|
"loss": 3.3847, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -15.931546211242676, |
|
"rewards/margins": 4.292238712310791, |
|
"rewards/rejected": -20.223783493041992, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4189044038668099, |
|
"grad_norm": 125.77761635765717, |
|
"learning_rate": 5.770278122600662e-07, |
|
"logits/chosen": -14.832977294921875, |
|
"logits/rejected": -14.608530044555664, |
|
"logps/chosen": -1.6177564859390259, |
|
"logps/rejected": -1.9727256298065186, |
|
"loss": 3.6009, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -16.177562713623047, |
|
"rewards/margins": 3.549692153930664, |
|
"rewards/rejected": -19.72725486755371, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4296455424274973, |
|
"grad_norm": 101.70153964682511, |
|
"learning_rate": 5.634267032372192e-07, |
|
"logits/chosen": -14.803668022155762, |
|
"logits/rejected": -14.786203384399414, |
|
"logps/chosen": -1.6423594951629639, |
|
"logps/rejected": -1.9881032705307007, |
|
"loss": 3.3904, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -16.423595428466797, |
|
"rewards/margins": 3.457437515258789, |
|
"rewards/rejected": -19.881032943725586, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44038668098818473, |
|
"grad_norm": 123.22102599092351, |
|
"learning_rate": 5.495948349734758e-07, |
|
"logits/chosen": -14.582061767578125, |
|
"logits/rejected": -14.51270580291748, |
|
"logps/chosen": -1.747982382774353, |
|
"logps/rejected": -2.051506757736206, |
|
"loss": 3.1521, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -17.47982406616211, |
|
"rewards/margins": 3.035243034362793, |
|
"rewards/rejected": -20.51506805419922, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.45112781954887216, |
|
"grad_norm": 106.53206062736002, |
|
"learning_rate": 5.355517381291105e-07, |
|
"logits/chosen": -15.856142044067383, |
|
"logits/rejected": -15.048059463500977, |
|
"logps/chosen": -1.9086406230926514, |
|
"logps/rejected": -2.3940463066101074, |
|
"loss": 3.1856, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -19.086406707763672, |
|
"rewards/margins": 4.854057312011719, |
|
"rewards/rejected": -23.940462112426758, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46186895810955964, |
|
"grad_norm": 135.01895835142213, |
|
"learning_rate": 5.21317241620105e-07, |
|
"logits/chosen": -17.223520278930664, |
|
"logits/rejected": -16.822795867919922, |
|
"logps/chosen": -1.9180253744125366, |
|
"logps/rejected": -2.2895708084106445, |
|
"loss": 3.4171, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -19.180253982543945, |
|
"rewards/margins": 3.7154533863067627, |
|
"rewards/rejected": -22.895706176757812, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.47261009667024706, |
|
"grad_norm": 116.88786548811646, |
|
"learning_rate": 5.069114446196291e-07, |
|
"logits/chosen": -14.405430793762207, |
|
"logits/rejected": -14.03125, |
|
"logps/chosen": -1.8195463418960571, |
|
"logps/rejected": -2.373373508453369, |
|
"loss": 3.1087, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -18.195463180541992, |
|
"rewards/margins": 5.538268566131592, |
|
"rewards/rejected": -23.73373031616211, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4833512352309345, |
|
"grad_norm": 117.33800643418842, |
|
"learning_rate": 4.923546881779183e-07, |
|
"logits/chosen": -15.34239387512207, |
|
"logits/rejected": -15.118896484375, |
|
"logps/chosen": -1.5427398681640625, |
|
"logps/rejected": -1.9821481704711914, |
|
"loss": 3.0885, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -15.427398681640625, |
|
"rewards/margins": 4.394083023071289, |
|
"rewards/rejected": -19.821481704711914, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4940923737916219, |
|
"grad_norm": 116.49865117484065, |
|
"learning_rate": 4.776675265006186e-07, |
|
"logits/chosen": -14.630195617675781, |
|
"logits/rejected": -14.59937858581543, |
|
"logps/chosen": -1.6563146114349365, |
|
"logps/rejected": -2.0928232669830322, |
|
"loss": 3.2032, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -16.56314468383789, |
|
"rewards/margins": 4.365086555480957, |
|
"rewards/rejected": -20.928232192993164, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5048335123523093, |
|
"grad_norm": 203.29377037587201, |
|
"learning_rate": 4.62870697926156e-07, |
|
"logits/chosen": -14.8600435256958, |
|
"logits/rejected": -15.338279724121094, |
|
"logps/chosen": -1.7831714153289795, |
|
"logps/rejected": -2.145383358001709, |
|
"loss": 3.2633, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -17.831714630126953, |
|
"rewards/margins": 3.622117519378662, |
|
"rewards/rejected": -21.45383071899414, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5155746509129968, |
|
"grad_norm": 116.16395762640381, |
|
"learning_rate": 4.479850956431092e-07, |
|
"logits/chosen": -14.476922988891602, |
|
"logits/rejected": -14.742956161499023, |
|
"logps/chosen": -1.6810489892959595, |
|
"logps/rejected": -2.0232789516448975, |
|
"loss": 3.0869, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -16.810489654541016, |
|
"rewards/margins": 3.4223015308380127, |
|
"rewards/rejected": -20.232791900634766, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 98.59837238595348, |
|
"learning_rate": 4.33031738188933e-07, |
|
"logits/chosen": -15.394210815429688, |
|
"logits/rejected": -14.864255905151367, |
|
"logps/chosen": -1.6588356494903564, |
|
"logps/rejected": -2.0340933799743652, |
|
"loss": 3.2303, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -16.58835792541504, |
|
"rewards/margins": 3.752579927444458, |
|
"rewards/rejected": -20.3409366607666, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5370569280343717, |
|
"grad_norm": 122.58746074195308, |
|
"learning_rate": 4.180317397716889e-07, |
|
"logits/chosen": -15.588345527648926, |
|
"logits/rejected": -15.222723007202148, |
|
"logps/chosen": -1.6328115463256836, |
|
"logps/rejected": -2.2505877017974854, |
|
"loss": 3.0874, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -16.328113555908203, |
|
"rewards/margins": 6.177763938903809, |
|
"rewards/rejected": -22.505878448486328, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.547798066595059, |
|
"grad_norm": 120.49234048427142, |
|
"learning_rate": 4.030062804566888e-07, |
|
"logits/chosen": -15.462881088256836, |
|
"logits/rejected": -15.3878173828125, |
|
"logps/chosen": -1.6617505550384521, |
|
"logps/rejected": -1.9518375396728516, |
|
"loss": 3.0471, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -16.61750602722168, |
|
"rewards/margins": 2.9008688926696777, |
|
"rewards/rejected": -19.518375396728516, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5585392051557465, |
|
"grad_norm": 102.36282696415171, |
|
"learning_rate": 3.8797657626014614e-07, |
|
"logits/chosen": -15.693799018859863, |
|
"logits/rejected": -15.475125312805176, |
|
"logps/chosen": -1.7303409576416016, |
|
"logps/rejected": -2.111295223236084, |
|
"loss": 3.1486, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -17.303409576416016, |
|
"rewards/margins": 3.809544801712036, |
|
"rewards/rejected": -21.112953186035156, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.569280343716434, |
|
"grad_norm": 120.30352416855824, |
|
"learning_rate": 3.729638491920669e-07, |
|
"logits/chosen": -14.018827438354492, |
|
"logits/rejected": -14.128240585327148, |
|
"logps/chosen": -1.5650581121444702, |
|
"logps/rejected": -1.8844165802001953, |
|
"loss": 3.0723, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -15.650581359863281, |
|
"rewards/margins": 3.193586826324463, |
|
"rewards/rejected": -18.84417152404785, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5800214822771214, |
|
"grad_norm": 91.11946128351651, |
|
"learning_rate": 3.5798929729067464e-07, |
|
"logits/chosen": -15.980966567993164, |
|
"logits/rejected": -15.59577465057373, |
|
"logps/chosen": -1.759478211402893, |
|
"logps/rejected": -2.1487960815429688, |
|
"loss": 2.7323, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -17.59478187561035, |
|
"rewards/margins": 3.8931777477264404, |
|
"rewards/rejected": -21.487960815429688, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5907626208378088, |
|
"grad_norm": 111.96074414088407, |
|
"learning_rate": 3.4307406469068595e-07, |
|
"logits/chosen": -15.691810607910156, |
|
"logits/rejected": -15.631698608398438, |
|
"logps/chosen": -1.7383606433868408, |
|
"logps/rejected": -2.2635440826416016, |
|
"loss": 2.9053, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -17.38360595703125, |
|
"rewards/margins": 5.251835823059082, |
|
"rewards/rejected": -22.635440826416016, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.6015037593984962, |
|
"grad_norm": 122.95732420434838, |
|
"learning_rate": 3.282392117676968e-07, |
|
"logits/chosen": -15.389913558959961, |
|
"logits/rejected": -15.556841850280762, |
|
"logps/chosen": -1.9042613506317139, |
|
"logps/rejected": -2.476677417755127, |
|
"loss": 2.9112, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -19.04261589050293, |
|
"rewards/margins": 5.72415828704834, |
|
"rewards/rejected": -24.766775131225586, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 104.08928006990811, |
|
"learning_rate": 3.135056854008371e-07, |
|
"logits/chosen": -16.152729034423828, |
|
"logits/rejected": -16.002233505249023, |
|
"logps/chosen": -1.8163830041885376, |
|
"logps/rejected": -2.228738784790039, |
|
"loss": 2.8591, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -18.163829803466797, |
|
"rewards/margins": 4.123559474945068, |
|
"rewards/rejected": -22.28738784790039, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6229860365198711, |
|
"grad_norm": 113.24003069429473, |
|
"learning_rate": 2.988942893956833e-07, |
|
"logits/chosen": -15.338768005371094, |
|
"logits/rejected": -15.249606132507324, |
|
"logps/chosen": -1.8918412923812866, |
|
"logps/rejected": -2.32609224319458, |
|
"loss": 3.0229, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -18.918415069580078, |
|
"rewards/margins": 4.342508792877197, |
|
"rewards/rejected": -23.260921478271484, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6337271750805585, |
|
"grad_norm": 151.8876206935079, |
|
"learning_rate": 2.844256551091911e-07, |
|
"logits/chosen": -16.8232421875, |
|
"logits/rejected": -16.842761993408203, |
|
"logps/chosen": -1.9518108367919922, |
|
"logps/rejected": -2.4939913749694824, |
|
"loss": 2.833, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -19.518108367919922, |
|
"rewards/margins": 5.421802997589111, |
|
"rewards/rejected": -24.939910888671875, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.644468313641246, |
|
"grad_norm": 273.6603281637076, |
|
"learning_rate": 2.7012021231812664e-07, |
|
"logits/chosen": -16.766956329345703, |
|
"logits/rejected": -16.424999237060547, |
|
"logps/chosen": -1.969435453414917, |
|
"logps/rejected": -2.308202028274536, |
|
"loss": 3.2442, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -19.694355010986328, |
|
"rewards/margins": 3.387664318084717, |
|
"rewards/rejected": -23.082019805908203, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6552094522019334, |
|
"grad_norm": 187.01720889425104, |
|
"learning_rate": 2.5599816037212954e-07, |
|
"logits/chosen": -14.743069648742676, |
|
"logits/rejected": -14.623723983764648, |
|
"logps/chosen": -1.8649146556854248, |
|
"logps/rejected": -2.4030163288116455, |
|
"loss": 2.9136, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -18.64914894104004, |
|
"rewards/margins": 5.381015777587891, |
|
"rewards/rejected": -24.03016471862793, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6659505907626209, |
|
"grad_norm": 142.83830462137144, |
|
"learning_rate": 2.4207943967214064e-07, |
|
"logits/chosen": -16.09463119506836, |
|
"logits/rejected": -15.806689262390137, |
|
"logps/chosen": -2.033447742462158, |
|
"logps/rejected": -2.524364471435547, |
|
"loss": 3.1207, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -20.334476470947266, |
|
"rewards/margins": 4.909164905548096, |
|
"rewards/rejected": -25.24364471435547, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6766917293233082, |
|
"grad_norm": 106.6308691579763, |
|
"learning_rate": 2.2838370351446547e-07, |
|
"logits/chosen": -15.870585441589355, |
|
"logits/rejected": -15.55876350402832, |
|
"logps/chosen": -1.8023881912231445, |
|
"logps/rejected": -2.277968168258667, |
|
"loss": 2.7641, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -18.023881912231445, |
|
"rewards/margins": 4.755801200866699, |
|
"rewards/rejected": -22.779682159423828, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6874328678839957, |
|
"grad_norm": 108.7814729394269, |
|
"learning_rate": 2.1493029034023188e-07, |
|
"logits/chosen": -15.210580825805664, |
|
"logits/rejected": -15.057415962219238, |
|
"logps/chosen": -1.789072036743164, |
|
"logps/rejected": -2.2775633335113525, |
|
"loss": 2.8863, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.89072036743164, |
|
"rewards/margins": 4.884912014007568, |
|
"rewards/rejected": -22.775630950927734, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6981740064446831, |
|
"grad_norm": 180.86259023584043, |
|
"learning_rate": 2.0173819642942376e-07, |
|
"logits/chosen": -14.378689765930176, |
|
"logits/rejected": -14.201916694641113, |
|
"logps/chosen": -1.9404065608978271, |
|
"logps/rejected": -2.5850563049316406, |
|
"loss": 2.9848, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -19.404064178466797, |
|
"rewards/margins": 6.446499824523926, |
|
"rewards/rejected": -25.85056495666504, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7089151450053706, |
|
"grad_norm": 132.13798361660807, |
|
"learning_rate": 1.888260490780485e-07, |
|
"logits/chosen": -14.281087875366211, |
|
"logits/rejected": -14.154438972473145, |
|
"logps/chosen": -1.7613548040390015, |
|
"logps/rejected": -2.238495349884033, |
|
"loss": 3.0495, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -17.61355209350586, |
|
"rewards/margins": 4.771404266357422, |
|
"rewards/rejected": -22.384952545166016, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.719656283566058, |
|
"grad_norm": 127.44733826541191, |
|
"learning_rate": 1.7621208029631078e-07, |
|
"logits/chosen": -14.766406059265137, |
|
"logits/rejected": -14.667470932006836, |
|
"logps/chosen": -1.9043552875518799, |
|
"logps/rejected": -2.4970054626464844, |
|
"loss": 2.9109, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -19.04355239868164, |
|
"rewards/margins": 5.9265007972717285, |
|
"rewards/rejected": -24.970054626464844, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7303974221267454, |
|
"grad_norm": 114.16195914051926, |
|
"learning_rate": 1.6391410106493227e-07, |
|
"logits/chosen": -14.881872177124023, |
|
"logits/rejected": -14.627456665039062, |
|
"logps/chosen": -1.9786325693130493, |
|
"logps/rejected": -2.5424978733062744, |
|
"loss": 2.8817, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -19.786325454711914, |
|
"rewards/margins": 5.63865327835083, |
|
"rewards/rejected": -25.424976348876953, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7411385606874329, |
|
"grad_norm": 99.76652109445614, |
|
"learning_rate": 1.5194947618596673e-07, |
|
"logits/chosen": -15.026782035827637, |
|
"logits/rejected": -14.58587646484375, |
|
"logps/chosen": -1.9211170673370361, |
|
"logps/rejected": -2.394774913787842, |
|
"loss": 3.0269, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -19.211170196533203, |
|
"rewards/margins": 4.736577033996582, |
|
"rewards/rejected": -23.9477481842041, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7518796992481203, |
|
"grad_norm": 107.2380444923777, |
|
"learning_rate": 1.4033509976362083e-07, |
|
"logits/chosen": -15.670697212219238, |
|
"logits/rejected": -15.579752922058105, |
|
"logps/chosen": -1.9338979721069336, |
|
"logps/rejected": -2.3520257472991943, |
|
"loss": 2.9892, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -19.338979721069336, |
|
"rewards/margins": 4.181277751922607, |
|
"rewards/rejected": -23.52025604248047, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7626208378088077, |
|
"grad_norm": 108.07049767560719, |
|
"learning_rate": 1.2908737134970363e-07, |
|
"logits/chosen": -14.5513334274292, |
|
"logits/rejected": -14.516873359680176, |
|
"logps/chosen": -1.8658726215362549, |
|
"logps/rejected": -2.476576566696167, |
|
"loss": 3.1196, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -18.65872573852539, |
|
"rewards/margins": 6.1070404052734375, |
|
"rewards/rejected": -24.765766143798828, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7733619763694952, |
|
"grad_norm": 103.96338647941381, |
|
"learning_rate": 1.1822217278738515e-07, |
|
"logits/chosen": -15.559527397155762, |
|
"logits/rejected": -15.545167922973633, |
|
"logps/chosen": -1.9091074466705322, |
|
"logps/rejected": -2.4284536838531494, |
|
"loss": 3.0461, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -19.091075897216797, |
|
"rewards/margins": 5.193462371826172, |
|
"rewards/rejected": -24.284536361694336, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7841031149301826, |
|
"grad_norm": 138.78303459839336, |
|
"learning_rate": 1.0775484578596241e-07, |
|
"logits/chosen": -15.669352531433105, |
|
"logits/rejected": -15.537897109985352, |
|
"logps/chosen": -1.94468092918396, |
|
"logps/rejected": -2.5436809062957764, |
|
"loss": 2.7974, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -19.44681167602539, |
|
"rewards/margins": 5.989997386932373, |
|
"rewards/rejected": -25.436809539794922, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7948442534908701, |
|
"grad_norm": 122.14734643501443, |
|
"learning_rate": 9.770017025829673e-08, |
|
"logits/chosen": -15.961019515991211, |
|
"logits/rejected": -15.95417308807373, |
|
"logps/chosen": -2.1812241077423096, |
|
"logps/rejected": -2.6908135414123535, |
|
"loss": 2.6374, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -21.812240600585938, |
|
"rewards/margins": 5.095890998840332, |
|
"rewards/rejected": -26.908132553100586, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8055853920515574, |
|
"grad_norm": 114.19485857094502, |
|
"learning_rate": 8.807234345151027e-08, |
|
"logits/chosen": -14.920249938964844, |
|
"logits/rejected": -14.890344619750977, |
|
"logps/chosen": -2.0413661003112793, |
|
"logps/rejected": -2.6543760299682617, |
|
"loss": 2.8891, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -20.41366195678711, |
|
"rewards/margins": 6.13009786605835, |
|
"rewards/rejected": -26.543758392333984, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 184.4289644042616, |
|
"learning_rate": 7.888495990040924e-08, |
|
"logits/chosen": -13.656982421875, |
|
"logits/rejected": -13.701431274414062, |
|
"logps/chosen": -2.011772871017456, |
|
"logps/rejected": -2.7216084003448486, |
|
"loss": 2.9801, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -20.117727279663086, |
|
"rewards/margins": 7.098354339599609, |
|
"rewards/rejected": -27.21608543395996, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8270676691729323, |
|
"grad_norm": 132.46593111411627, |
|
"learning_rate": 7.015099223193943e-08, |
|
"logits/chosen": -15.658330917358398, |
|
"logits/rejected": -15.693890571594238, |
|
"logps/chosen": -1.9581212997436523, |
|
"logps/rejected": -2.5001060962677, |
|
"loss": 2.8611, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -19.58121681213379, |
|
"rewards/margins": 5.419846534729004, |
|
"rewards/rejected": -25.001062393188477, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8378088077336198, |
|
"grad_norm": 109.73562851103766, |
|
"learning_rate": 6.188277284777857e-08, |
|
"logits/chosen": -14.48884391784668, |
|
"logits/rejected": -13.800481796264648, |
|
"logps/chosen": -1.9877732992172241, |
|
"logps/rejected": -2.5746169090270996, |
|
"loss": 2.8295, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -19.877731323242188, |
|
"rewards/margins": 5.868436813354492, |
|
"rewards/rejected": -25.746166229248047, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8485499462943072, |
|
"grad_norm": 119.04235166242496, |
|
"learning_rate": 5.409197651092965e-08, |
|
"logits/chosen": -15.729510307312012, |
|
"logits/rejected": -15.620780944824219, |
|
"logps/chosen": -2.1660141944885254, |
|
"logps/rejected": -2.667978286743164, |
|
"loss": 2.7298, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -21.660140991210938, |
|
"rewards/margins": 5.019640922546387, |
|
"rewards/rejected": -26.679784774780273, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8592910848549946, |
|
"grad_norm": 105.88618147362118, |
|
"learning_rate": 4.678960386090298e-08, |
|
"logits/chosen": -15.191770553588867, |
|
"logits/rejected": -15.158576965332031, |
|
"logps/chosen": -1.9015194177627563, |
|
"logps/rejected": -2.5145716667175293, |
|
"loss": 2.7402, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -19.015193939208984, |
|
"rewards/margins": 6.130521297454834, |
|
"rewards/rejected": -25.145715713500977, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8700322234156821, |
|
"grad_norm": 114.47737417347113, |
|
"learning_rate": 3.998596588076366e-08, |
|
"logits/chosen": -13.7559814453125, |
|
"logits/rejected": -13.483953475952148, |
|
"logps/chosen": -1.9659799337387085, |
|
"logps/rejected": -2.390488862991333, |
|
"loss": 3.1028, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -19.659801483154297, |
|
"rewards/margins": 4.245090484619141, |
|
"rewards/rejected": -23.904891967773438, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8807733619763695, |
|
"grad_norm": 124.82158209754382, |
|
"learning_rate": 3.3690669337976996e-08, |
|
"logits/chosen": -15.061103820800781, |
|
"logits/rejected": -14.891242980957031, |
|
"logps/chosen": -1.8727645874023438, |
|
"logps/rejected": -2.3343753814697266, |
|
"loss": 2.7051, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -18.727645874023438, |
|
"rewards/margins": 4.6161088943481445, |
|
"rewards/rejected": -23.343753814697266, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8915145005370569, |
|
"grad_norm": 127.84134494966216, |
|
"learning_rate": 2.7912603219609798e-08, |
|
"logits/chosen": -15.650156021118164, |
|
"logits/rejected": -15.531412124633789, |
|
"logps/chosen": -2.0558724403381348, |
|
"logps/rejected": -2.456268787384033, |
|
"loss": 2.7054, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -20.558725357055664, |
|
"rewards/margins": 4.003961563110352, |
|
"rewards/rejected": -24.562685012817383, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.9022556390977443, |
|
"grad_norm": 121.68417066288369, |
|
"learning_rate": 2.265992618104029e-08, |
|
"logits/chosen": -15.883665084838867, |
|
"logits/rejected": -15.859227180480957, |
|
"logps/chosen": -2.1571857929229736, |
|
"logps/rejected": -2.7032248973846436, |
|
"loss": 2.7706, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -21.571857452392578, |
|
"rewards/margins": 5.460390090942383, |
|
"rewards/rejected": -27.03224754333496, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9129967776584318, |
|
"grad_norm": 205.5451817035243, |
|
"learning_rate": 1.7940055025900304e-08, |
|
"logits/chosen": -14.086555480957031, |
|
"logits/rejected": -13.882139205932617, |
|
"logps/chosen": -2.0481374263763428, |
|
"logps/rejected": -2.428682804107666, |
|
"loss": 3.0577, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -20.481372833251953, |
|
"rewards/margins": 3.8054566383361816, |
|
"rewards/rejected": -24.28683090209961, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9237379162191193, |
|
"grad_norm": 119.46095967048043, |
|
"learning_rate": 1.3759654233514817e-08, |
|
"logits/chosen": -14.88987922668457, |
|
"logits/rejected": -14.742823600769043, |
|
"logps/chosen": -1.941057562828064, |
|
"logps/rejected": -2.4432146549224854, |
|
"loss": 2.8075, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -19.41057586669922, |
|
"rewards/margins": 5.021571636199951, |
|
"rewards/rejected": -24.432147979736328, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9344790547798066, |
|
"grad_norm": 112.52416047730715, |
|
"learning_rate": 1.0124626548627402e-08, |
|
"logits/chosen": -15.55200481414795, |
|
"logits/rejected": -15.57677936553955, |
|
"logps/chosen": -2.0663094520568848, |
|
"logps/rejected": -2.723595380783081, |
|
"loss": 2.7934, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -20.663097381591797, |
|
"rewards/margins": 6.5728559494018555, |
|
"rewards/rejected": -27.235950469970703, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9452201933404941, |
|
"grad_norm": 95.43516446345191, |
|
"learning_rate": 7.040104646698042e-09, |
|
"logits/chosen": -14.149500846862793, |
|
"logits/rejected": -14.121160507202148, |
|
"logps/chosen": -2.1648197174072266, |
|
"logps/rejected": -2.803515911102295, |
|
"loss": 2.7258, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -21.648197174072266, |
|
"rewards/margins": 6.386962890625, |
|
"rewards/rejected": -28.035160064697266, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9559613319011815, |
|
"grad_norm": 105.32519506523218, |
|
"learning_rate": 4.510443886542114e-09, |
|
"logits/chosen": -15.509679794311523, |
|
"logits/rejected": -15.587133407592773, |
|
"logps/chosen": -2.019160509109497, |
|
"logps/rejected": -2.5319721698760986, |
|
"loss": 2.8015, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -20.191600799560547, |
|
"rewards/margins": 5.128118991851807, |
|
"rewards/rejected": -25.319721221923828, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.966702470461869, |
|
"grad_norm": 142.71634790119586, |
|
"learning_rate": 2.539216160544333e-09, |
|
"logits/chosen": -15.480878829956055, |
|
"logits/rejected": -15.102048873901367, |
|
"logps/chosen": -2.1352379322052, |
|
"logps/rejected": -2.589895486831665, |
|
"loss": 2.889, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -21.35237693786621, |
|
"rewards/margins": 4.546576023101807, |
|
"rewards/rejected": -25.898956298828125, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9774436090225563, |
|
"grad_norm": 123.94313579799326, |
|
"learning_rate": 1.1292048511303054e-09, |
|
"logits/chosen": -14.889852523803711, |
|
"logits/rejected": -15.104809761047363, |
|
"logps/chosen": -1.9709218740463257, |
|
"logps/rejected": -2.4727888107299805, |
|
"loss": 2.9813, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -19.709218978881836, |
|
"rewards/margins": 5.018665313720703, |
|
"rewards/rejected": -24.727886199951172, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9881847475832438, |
|
"grad_norm": 100.41632245481235, |
|
"learning_rate": 2.82400900618418e-10, |
|
"logits/chosen": -15.215599060058594, |
|
"logits/rejected": -15.19567584991455, |
|
"logps/chosen": -1.937787652015686, |
|
"logps/rejected": -2.593761920928955, |
|
"loss": 2.5524, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -19.377878189086914, |
|
"rewards/margins": 6.559741020202637, |
|
"rewards/rejected": -25.937618255615234, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9989258861439313, |
|
"grad_norm": 139.5358860215804, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -15.462666511535645, |
|
"logits/rejected": -15.356382369995117, |
|
"logps/chosen": -2.155303716659546, |
|
"logps/rejected": -2.6153688430786133, |
|
"loss": 2.5621, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -21.55303955078125, |
|
"rewards/margins": 4.600649356842041, |
|
"rewards/rejected": -26.1536865234375, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9989258861439313, |
|
"step": 465, |
|
"total_flos": 0.0, |
|
"train_loss": 3.5518602760889197, |
|
"train_runtime": 6148.2375, |
|
"train_samples_per_second": 9.689, |
|
"train_steps_per_second": 0.076 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 465, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|