|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1904, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005252100840336135, |
|
"grad_norm": 70.43905184347379, |
|
"kl": 0.21706357598304749, |
|
"learning_rate": 4.999659696812289e-07, |
|
"logps/chosen": -305.59059320494185, |
|
"logps/rejected": -267.9389252533784, |
|
"loss": 0.5004, |
|
"rewards/chosen": -0.09660225136335505, |
|
"rewards/margins": 0.019717185921372296, |
|
"rewards/rejected": -0.11631943728472735, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01050420168067227, |
|
"grad_norm": 81.8070303563997, |
|
"kl": 1.2398526668548584, |
|
"learning_rate": 4.998638879894165e-07, |
|
"logps/chosen": -279.9350071957237, |
|
"logps/rejected": -239.82217261904762, |
|
"loss": 0.4612, |
|
"rewards/chosen": 0.36437626888877467, |
|
"rewards/margins": 0.3501945868470615, |
|
"rewards/rejected": 0.01418168204171317, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015756302521008403, |
|
"grad_norm": 73.53961446495941, |
|
"kl": 0.0, |
|
"learning_rate": 4.996937827155428e-07, |
|
"logps/chosen": -327.1117964181287, |
|
"logps/rejected": -270.5936713506711, |
|
"loss": 0.4185, |
|
"rewards/chosen": -0.6132015652126737, |
|
"rewards/margins": 1.3433878967603758, |
|
"rewards/rejected": -1.9565894619730495, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02100840336134454, |
|
"grad_norm": 51.541845841038615, |
|
"kl": 3.030362129211426, |
|
"learning_rate": 4.994557001695013e-07, |
|
"logps/chosen": -286.4127286585366, |
|
"logps/rejected": -238.16353665865384, |
|
"loss": 0.4045, |
|
"rewards/chosen": 1.3511776807831555, |
|
"rewards/margins": 1.1392201160624147, |
|
"rewards/rejected": 0.21195756472074068, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.026260504201680673, |
|
"grad_norm": 83.73385866105204, |
|
"kl": 0.0, |
|
"learning_rate": 4.991497051674917e-07, |
|
"logps/chosen": -295.27088246855345, |
|
"logps/rejected": -278.80997670807454, |
|
"loss": 0.3944, |
|
"rewards/chosen": 0.2137240523812156, |
|
"rewards/margins": 1.5743222196584865, |
|
"rewards/rejected": -1.360598167277271, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.031512605042016806, |
|
"grad_norm": 57.616560952806196, |
|
"kl": 0.0, |
|
"learning_rate": 4.987758810143735e-07, |
|
"logps/chosen": -325.4705078125, |
|
"logps/rejected": -255.959326171875, |
|
"loss": 0.4034, |
|
"rewards/chosen": 1.1321415901184082, |
|
"rewards/margins": 1.2807276964187622, |
|
"rewards/rejected": -0.14858610630035402, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03676470588235294, |
|
"grad_norm": 62.734358016783204, |
|
"kl": 0.0, |
|
"learning_rate": 4.983343294809874e-07, |
|
"logps/chosen": -306.6684683866279, |
|
"logps/rejected": -258.4501953125, |
|
"loss": 0.3822, |
|
"rewards/chosen": 1.1383993459302326, |
|
"rewards/margins": 1.6607683698461164, |
|
"rewards/rejected": -0.5223690239158837, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04201680672268908, |
|
"grad_norm": 52.446040187552235, |
|
"kl": 0.0, |
|
"learning_rate": 4.978251707764491e-07, |
|
"logps/chosen": -304.0558176100629, |
|
"logps/rejected": -276.5557793090062, |
|
"loss": 0.3593, |
|
"rewards/chosen": 0.427140049964377, |
|
"rewards/margins": 2.316681770716027, |
|
"rewards/rejected": -1.8895417207516498, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04726890756302521, |
|
"grad_norm": 51.1401597083277, |
|
"kl": 0.0, |
|
"learning_rate": 4.972485435154228e-07, |
|
"logps/chosen": -284.92168090062114, |
|
"logps/rejected": -289.27459217767296, |
|
"loss": 0.3601, |
|
"rewards/chosen": 1.139393302964868, |
|
"rewards/margins": 2.435457571832792, |
|
"rewards/rejected": -1.2960642688679245, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.052521008403361345, |
|
"grad_norm": 44.236393372962524, |
|
"kl": 0.0, |
|
"learning_rate": 4.966046046803842e-07, |
|
"logps/chosen": -312.7745820063694, |
|
"logps/rejected": -278.74285851226995, |
|
"loss": 0.3593, |
|
"rewards/chosen": 0.3532914568664162, |
|
"rewards/margins": 2.3767495515510406, |
|
"rewards/rejected": -2.0234580946846243, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05777310924369748, |
|
"grad_norm": 38.95152051376054, |
|
"kl": 0.0, |
|
"learning_rate": 4.958935295788841e-07, |
|
"logps/chosen": -364.43832781456956, |
|
"logps/rejected": -306.8252588757396, |
|
"loss": 0.3823, |
|
"rewards/chosen": -1.0973136851329677, |
|
"rewards/margins": 2.0935938101286817, |
|
"rewards/rejected": -3.1909074952616496, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06302521008403361, |
|
"grad_norm": 45.75317629434847, |
|
"kl": 0.0, |
|
"learning_rate": 4.951155117958216e-07, |
|
"logps/chosen": -304.1672453703704, |
|
"logps/rejected": -288.4255340189873, |
|
"loss": 0.3693, |
|
"rewards/chosen": -0.012564458964783469, |
|
"rewards/margins": 2.483852622843959, |
|
"rewards/rejected": -2.496417081808742, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06827731092436974, |
|
"grad_norm": 52.69834029914945, |
|
"kl": 0.0, |
|
"learning_rate": 4.942707631407419e-07, |
|
"logps/chosen": -331.2020513523392, |
|
"logps/rejected": -275.2519662332215, |
|
"loss": 0.3931, |
|
"rewards/chosen": 0.4922319378769189, |
|
"rewards/margins": 1.7257208016235108, |
|
"rewards/rejected": -1.233488863746592, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07352941176470588, |
|
"grad_norm": 65.97698112794365, |
|
"kl": 0.0, |
|
"learning_rate": 4.933595135901732e-07, |
|
"logps/chosen": -314.8364361702128, |
|
"logps/rejected": -269.268156424581, |
|
"loss": 0.3674, |
|
"rewards/chosen": 0.04331512315898922, |
|
"rewards/margins": 1.9166126173759934, |
|
"rewards/rejected": -1.8732974942170042, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07878151260504201, |
|
"grad_norm": 75.72251586801518, |
|
"kl": 0.0, |
|
"learning_rate": 4.923820112250169e-07, |
|
"logps/chosen": -310.11692533557044, |
|
"logps/rejected": -272.4901087353801, |
|
"loss": 0.3438, |
|
"rewards/chosen": -0.31625601749292154, |
|
"rewards/margins": 2.9441549382819323, |
|
"rewards/rejected": -3.2604109557748537, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08403361344537816, |
|
"grad_norm": 51.266519524836305, |
|
"kl": 0.0, |
|
"learning_rate": 4.913385221630096e-07, |
|
"logps/chosen": -264.80689858490564, |
|
"logps/rejected": -284.40986510093165, |
|
"loss": 0.3589, |
|
"rewards/chosen": 0.2699741627435264, |
|
"rewards/margins": 2.429860872395118, |
|
"rewards/rejected": -2.1598867096515915, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08928571428571429, |
|
"grad_norm": 69.780370721156, |
|
"kl": 0.0, |
|
"learning_rate": 4.902293304862749e-07, |
|
"logps/chosen": -281.5589111328125, |
|
"logps/rejected": -274.93525390625, |
|
"loss": 0.361, |
|
"rewards/chosen": 0.5642091274261475, |
|
"rewards/margins": 2.2661412715911866, |
|
"rewards/rejected": -1.701932144165039, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09453781512605042, |
|
"grad_norm": 48.456143880910204, |
|
"kl": 0.0, |
|
"learning_rate": 4.890547381639833e-07, |
|
"logps/chosen": -312.7041968368902, |
|
"logps/rejected": -251.9474158653846, |
|
"loss": 0.3564, |
|
"rewards/chosen": -0.15778241506436977, |
|
"rewards/margins": 2.705699155448451, |
|
"rewards/rejected": -2.8634815705128207, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09978991596638656, |
|
"grad_norm": 49.10681910714073, |
|
"kl": 0.0, |
|
"learning_rate": 4.878150649701439e-07, |
|
"logps/chosen": -356.0400260416667, |
|
"logps/rejected": -285.11312040441175, |
|
"loss": 0.3345, |
|
"rewards/chosen": -0.45484156290690103, |
|
"rewards/margins": 3.4065278505811505, |
|
"rewards/rejected": -3.8613694134880516, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10504201680672269, |
|
"grad_norm": 42.17787499105965, |
|
"kl": 0.0, |
|
"learning_rate": 4.865106483965486e-07, |
|
"logps/chosen": -297.6003605769231, |
|
"logps/rejected": -252.6407440929878, |
|
"loss": 0.3495, |
|
"rewards/chosen": -0.08823707164862217, |
|
"rewards/margins": 2.8701916601301507, |
|
"rewards/rejected": -2.9584287317787727, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11029411764705882, |
|
"grad_norm": 32.7456875390811, |
|
"kl": 0.0, |
|
"learning_rate": 4.851418435608919e-07, |
|
"logps/chosen": -292.8467514124294, |
|
"logps/rejected": -265.97642591783216, |
|
"loss": 0.3655, |
|
"rewards/chosen": 0.9715841692046258, |
|
"rewards/margins": 2.2475568689901917, |
|
"rewards/rejected": -1.275972699785566, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11554621848739496, |
|
"grad_norm": 46.476558893396145, |
|
"kl": 0.0, |
|
"learning_rate": 4.837090231100927e-07, |
|
"logps/chosen": -315.36924463757396, |
|
"logps/rejected": -251.48538389900662, |
|
"loss": 0.3217, |
|
"rewards/chosen": 1.1581101276465422, |
|
"rewards/margins": 2.9979615575668648, |
|
"rewards/rejected": -1.8398514299203228, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1207983193277311, |
|
"grad_norm": 58.692837949072945, |
|
"kl": 0.0, |
|
"learning_rate": 4.822125771188448e-07, |
|
"logps/chosen": -306.7650669642857, |
|
"logps/rejected": -301.322265625, |
|
"loss": 0.3701, |
|
"rewards/chosen": -0.7039821178882153, |
|
"rewards/margins": 2.5890142082365966, |
|
"rewards/rejected": -3.2929963261248116, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12605042016806722, |
|
"grad_norm": 38.77396195360521, |
|
"kl": 0.0, |
|
"learning_rate": 4.806529129834207e-07, |
|
"logps/chosen": -299.9251123715753, |
|
"logps/rejected": -295.51858836206895, |
|
"loss": 0.3211, |
|
"rewards/chosen": -0.4328058843743311, |
|
"rewards/margins": 3.400510611674519, |
|
"rewards/rejected": -3.8333164960488504, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13130252100840337, |
|
"grad_norm": 54.771749584890465, |
|
"kl": 0.0, |
|
"learning_rate": 4.790304553107622e-07, |
|
"logps/chosen": -305.0079280695266, |
|
"logps/rejected": -256.60885761589407, |
|
"loss": 0.3722, |
|
"rewards/chosen": 0.8469177787825906, |
|
"rewards/margins": 2.0538868820589253, |
|
"rewards/rejected": -1.2069691032763348, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13655462184873948, |
|
"grad_norm": 56.498632681940265, |
|
"kl": 0.0, |
|
"learning_rate": 4.773456458028837e-07, |
|
"logps/chosen": -278.4545183121019, |
|
"logps/rejected": -264.5601514570552, |
|
"loss": 0.3744, |
|
"rewards/chosen": 1.3172516063520103, |
|
"rewards/margins": 2.152952080254426, |
|
"rewards/rejected": -0.8357004739024156, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14180672268907563, |
|
"grad_norm": 56.9574995100581, |
|
"kl": 0.0, |
|
"learning_rate": 4.755989431366221e-07, |
|
"logps/chosen": -304.06099759615387, |
|
"logps/rejected": -293.3189310213415, |
|
"loss": 0.3093, |
|
"rewards/chosen": 1.1587672111315606, |
|
"rewards/margins": 4.173915839180341, |
|
"rewards/rejected": -3.0151486280487805, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14705882352941177, |
|
"grad_norm": 63.035012985547, |
|
"kl": 0.0, |
|
"learning_rate": 4.737908228387656e-07, |
|
"logps/chosen": -297.4018322172619, |
|
"logps/rejected": -267.7578895970395, |
|
"loss": 0.3581, |
|
"rewards/chosen": 1.0978340875534784, |
|
"rewards/margins": 2.6742270142213442, |
|
"rewards/rejected": -1.576392926667866, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15231092436974789, |
|
"grad_norm": 57.87624552889083, |
|
"kl": 0.0, |
|
"learning_rate": 4.7192177715659516e-07, |
|
"logps/chosen": -293.31778630239523, |
|
"logps/rejected": -253.0896905637255, |
|
"loss": 0.3583, |
|
"rewards/chosen": 0.7335890809932869, |
|
"rewards/margins": 2.5885067194353826, |
|
"rewards/rejected": -1.8549176384420956, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15756302521008403, |
|
"grad_norm": 63.42262644789701, |
|
"kl": 0.0, |
|
"learning_rate": 4.699923149238736e-07, |
|
"logps/chosen": -300.68055867805754, |
|
"logps/rejected": -294.7720778660221, |
|
"loss": 0.3416, |
|
"rewards/chosen": -0.4132621820024449, |
|
"rewards/margins": 3.4406516889618164, |
|
"rewards/rejected": -3.853913870964261, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16281512605042017, |
|
"grad_norm": 58.34203205576481, |
|
"kl": 0.0, |
|
"learning_rate": 4.680029614223198e-07, |
|
"logps/chosen": -284.8943819665605, |
|
"logps/rejected": -281.19528853527606, |
|
"loss": 0.374, |
|
"rewards/chosen": -0.6231775951992934, |
|
"rewards/margins": 2.335680965000093, |
|
"rewards/rejected": -2.9588585601993866, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.16806722689075632, |
|
"grad_norm": 45.38290295645773, |
|
"kl": 0.0, |
|
"learning_rate": 4.65954258238604e-07, |
|
"logps/chosen": -297.1282980913174, |
|
"logps/rejected": -263.37903390522877, |
|
"loss": 0.3695, |
|
"rewards/chosen": 0.311750309196061, |
|
"rewards/margins": 2.5421297005359302, |
|
"rewards/rejected": -2.230379391339869, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17331932773109243, |
|
"grad_norm": 67.2098515597697, |
|
"kl": 0.0, |
|
"learning_rate": 4.638467631169056e-07, |
|
"logps/chosen": -328.67982700892856, |
|
"logps/rejected": -309.46533203125, |
|
"loss": 0.3203, |
|
"rewards/chosen": 0.9687714349655878, |
|
"rewards/margins": 3.498681472357652, |
|
"rewards/rejected": -2.529910037392064, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17857142857142858, |
|
"grad_norm": 55.44522631657296, |
|
"kl": 0.0, |
|
"learning_rate": 4.6168104980707103e-07, |
|
"logps/chosen": -285.8288395579268, |
|
"logps/rejected": -275.2672526041667, |
|
"loss": 0.3315, |
|
"rewards/chosen": 0.7993925141125191, |
|
"rewards/margins": 3.4423916251902433, |
|
"rewards/rejected": -2.6429991110777245, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18382352941176472, |
|
"grad_norm": 36.50055695974426, |
|
"kl": 0.0, |
|
"learning_rate": 4.594577079084145e-07, |
|
"logps/chosen": -287.8186279296875, |
|
"logps/rejected": -295.969091796875, |
|
"loss": 0.3146, |
|
"rewards/chosen": 1.0129197120666504, |
|
"rewards/margins": 3.7097062110900882, |
|
"rewards/rejected": -2.6967864990234376, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.18907563025210083, |
|
"grad_norm": 48.72297305247662, |
|
"kl": 0.0, |
|
"learning_rate": 4.5717734270920466e-07, |
|
"logps/chosen": -280.1525594325153, |
|
"logps/rejected": -232.07081011146497, |
|
"loss": 0.351, |
|
"rewards/chosen": 0.9978786538715011, |
|
"rewards/margins": 2.654015719394491, |
|
"rewards/rejected": -1.6561370655229897, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19432773109243698, |
|
"grad_norm": 47.17739577906298, |
|
"kl": 0.0, |
|
"learning_rate": 4.548405750218785e-07, |
|
"logps/chosen": -290.4170778508772, |
|
"logps/rejected": -278.8102191694631, |
|
"loss": 0.3535, |
|
"rewards/chosen": 0.4387444390190972, |
|
"rewards/margins": 3.595384203792063, |
|
"rewards/rejected": -3.1566397647729656, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.19957983193277312, |
|
"grad_norm": 55.83663877353586, |
|
"kl": 0.0, |
|
"learning_rate": 4.5244804101403025e-07, |
|
"logps/chosen": -262.20204133064516, |
|
"logps/rejected": -258.4055634469697, |
|
"loss": 0.3611, |
|
"rewards/chosen": 0.3024998326455393, |
|
"rewards/margins": 2.7435985024490432, |
|
"rewards/rejected": -2.441098669803504, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.20483193277310924, |
|
"grad_norm": 53.266164401529096, |
|
"kl": 0.0, |
|
"learning_rate": 4.5000039203521976e-07, |
|
"logps/chosen": -300.6577662417763, |
|
"logps/rejected": -298.0972609747024, |
|
"loss": 0.348, |
|
"rewards/chosen": 1.187171132940995, |
|
"rewards/margins": 3.403816691616125, |
|
"rewards/rejected": -2.2166455586751304, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21008403361344538, |
|
"grad_norm": 51.236491583825575, |
|
"kl": 0.0, |
|
"learning_rate": 4.47498294439647e-07, |
|
"logps/chosen": -291.7605892319277, |
|
"logps/rejected": -267.51894784902595, |
|
"loss": 0.3199, |
|
"rewards/chosen": 1.1110430797898625, |
|
"rewards/margins": 3.199955449158775, |
|
"rewards/rejected": -2.0889123693689124, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21533613445378152, |
|
"grad_norm": 66.69622404112168, |
|
"kl": 0.0, |
|
"learning_rate": 4.449424294047419e-07, |
|
"logps/chosen": -301.6258148006135, |
|
"logps/rejected": -284.4103801751592, |
|
"loss": 0.3232, |
|
"rewards/chosen": 1.0998908668939322, |
|
"rewards/margins": 3.629001137594569, |
|
"rewards/rejected": -2.5291102707006368, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22058823529411764, |
|
"grad_norm": 52.10847099745242, |
|
"kl": 0.0, |
|
"learning_rate": 4.4233349274571974e-07, |
|
"logps/chosen": -303.1148280201342, |
|
"logps/rejected": -273.5418494152047, |
|
"loss": 0.3532, |
|
"rewards/chosen": 0.6703597203197095, |
|
"rewards/margins": 3.336041436614026, |
|
"rewards/rejected": -2.6656817162943165, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.22584033613445378, |
|
"grad_norm": 48.65147057788595, |
|
"kl": 0.0, |
|
"learning_rate": 4.396721947261496e-07, |
|
"logps/chosen": -293.84951524849396, |
|
"logps/rejected": -279.7598924512987, |
|
"loss": 0.345, |
|
"rewards/chosen": 0.35480995637824736, |
|
"rewards/margins": 3.4659533905807636, |
|
"rewards/rejected": -3.1111434342025164, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23109243697478993, |
|
"grad_norm": 60.821733992897606, |
|
"kl": 0.0, |
|
"learning_rate": 4.3695925986459107e-07, |
|
"logps/chosen": -284.638457507622, |
|
"logps/rejected": -260.53165064102564, |
|
"loss": 0.3469, |
|
"rewards/chosen": 0.6963288376970989, |
|
"rewards/margins": 3.8638249564871634, |
|
"rewards/rejected": -3.1674961187900643, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23634453781512604, |
|
"grad_norm": 40.515343648966464, |
|
"kl": 0.0, |
|
"learning_rate": 4.341954267373494e-07, |
|
"logps/chosen": -288.7482045807453, |
|
"logps/rejected": -257.25950766509436, |
|
"loss": 0.3392, |
|
"rewards/chosen": 1.8534261573175466, |
|
"rewards/margins": 2.916300141600418, |
|
"rewards/rejected": -1.0628739842828714, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2415966386554622, |
|
"grad_norm": 59.00202211258808, |
|
"kl": 0.0, |
|
"learning_rate": 4.313814477774035e-07, |
|
"logps/chosen": -450.8061615566038, |
|
"logps/rejected": -373.24873835403724, |
|
"loss": 0.3372, |
|
"rewards/chosen": -13.37514433470912, |
|
"rewards/margins": -0.7130592221314807, |
|
"rewards/rejected": -12.662085112577639, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24684873949579833, |
|
"grad_norm": 47.46663221207845, |
|
"kl": 0.0, |
|
"learning_rate": 4.2851808906956134e-07, |
|
"logps/chosen": -293.39712000739644, |
|
"logps/rejected": -281.5295685016556, |
|
"loss": 0.3252, |
|
"rewards/chosen": 1.1083643986628606, |
|
"rewards/margins": 3.8087977886928686, |
|
"rewards/rejected": -2.700433390030008, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25210084033613445, |
|
"grad_norm": 51.531711526626296, |
|
"kl": 0.0, |
|
"learning_rate": 4.256061301418996e-07, |
|
"logps/chosen": -276.60667242005815, |
|
"logps/rejected": -270.2625897381757, |
|
"loss": 0.3145, |
|
"rewards/chosen": 1.4284691034361374, |
|
"rewards/margins": 3.5734818300910147, |
|
"rewards/rejected": -2.1450127266548775, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.25735294117647056, |
|
"grad_norm": 45.63717278781431, |
|
"kl": 0.0, |
|
"learning_rate": 4.2264636375354283e-07, |
|
"logps/chosen": -291.6655943627451, |
|
"logps/rejected": -258.8236573727545, |
|
"loss": 0.3174, |
|
"rewards/chosen": 1.2060089111328125, |
|
"rewards/margins": 4.036167989947838, |
|
"rewards/rejected": -2.830159078815026, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26260504201680673, |
|
"grad_norm": 52.57777553035371, |
|
"kl": 0.0, |
|
"learning_rate": 4.1963959567884045e-07, |
|
"logps/chosen": -308.52905933277026, |
|
"logps/rejected": -288.6666061046512, |
|
"loss": 0.3152, |
|
"rewards/chosen": 1.2610940675477724, |
|
"rewards/margins": 4.6736672387641605, |
|
"rewards/rejected": -3.4125731712163883, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26785714285714285, |
|
"grad_norm": 40.78741469978887, |
|
"kl": 0.0, |
|
"learning_rate": 4.1658664448800094e-07, |
|
"logps/chosen": -287.10628043831167, |
|
"logps/rejected": -265.04310993975906, |
|
"loss": 0.334, |
|
"rewards/chosen": 0.06761282140558417, |
|
"rewards/margins": 3.2680445532156366, |
|
"rewards/rejected": -3.2004317318100526, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.27310924369747897, |
|
"grad_norm": 53.20527576804437, |
|
"kl": 0.0, |
|
"learning_rate": 4.1348834132424204e-07, |
|
"logps/chosen": -319.5769211871069, |
|
"logps/rejected": -310.2074679736025, |
|
"loss": 0.3435, |
|
"rewards/chosen": -0.4044905368636989, |
|
"rewards/margins": 4.406604517241503, |
|
"rewards/rejected": -4.811095054105202, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.27836134453781514, |
|
"grad_norm": 56.13445331912671, |
|
"kl": 0.0, |
|
"learning_rate": 4.103455296775181e-07, |
|
"logps/chosen": -318.1049981174699, |
|
"logps/rejected": -280.2241020698052, |
|
"loss": 0.3043, |
|
"rewards/chosen": 0.8269406973597515, |
|
"rewards/margins": 4.47310245796243, |
|
"rewards/rejected": -3.6461617606026784, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.28361344537815125, |
|
"grad_norm": 35.13309154843364, |
|
"kl": 0.0, |
|
"learning_rate": 4.071590651548867e-07, |
|
"logps/chosen": -300.40650531045753, |
|
"logps/rejected": -319.7241766467066, |
|
"loss": 0.3379, |
|
"rewards/chosen": -1.0223476434844772, |
|
"rewards/margins": 4.300584251514588, |
|
"rewards/rejected": -5.322931894999065, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.28886554621848737, |
|
"grad_norm": 52.825979085690484, |
|
"kl": 0.0, |
|
"learning_rate": 4.039298152475754e-07, |
|
"logps/chosen": -309.6330613057325, |
|
"logps/rejected": -316.5918328220859, |
|
"loss": 0.3449, |
|
"rewards/chosen": -0.2137961903954767, |
|
"rewards/margins": 3.9556471390236183, |
|
"rewards/rejected": -4.169443329419095, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29411764705882354, |
|
"grad_norm": 51.016496924749916, |
|
"kl": 0.0, |
|
"learning_rate": 4.006586590948141e-07, |
|
"logps/chosen": -282.8731328616352, |
|
"logps/rejected": -268.71367915372673, |
|
"loss": 0.3175, |
|
"rewards/chosen": 1.230227104522897, |
|
"rewards/margins": 3.262244593180214, |
|
"rewards/rejected": -2.0320174886573175, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29936974789915966, |
|
"grad_norm": 38.1092514993909, |
|
"kl": 0.0, |
|
"learning_rate": 3.973464872444958e-07, |
|
"logps/chosen": -294.08426339285717, |
|
"logps/rejected": -278.8594487028302, |
|
"loss": 0.3418, |
|
"rewards/chosen": 1.2165776602229716, |
|
"rewards/margins": 3.751641013947421, |
|
"rewards/rejected": -2.53506335372445, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.30462184873949577, |
|
"grad_norm": 58.704387106710655, |
|
"kl": 0.0, |
|
"learning_rate": 3.939942014107318e-07, |
|
"logps/chosen": -271.2850392964072, |
|
"logps/rejected": -297.9468954248366, |
|
"loss": 0.3425, |
|
"rewards/chosen": 0.49638238781226607, |
|
"rewards/margins": 3.3393096099706607, |
|
"rewards/rejected": -2.8429272221583948, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.30987394957983194, |
|
"grad_norm": 40.68813636391329, |
|
"kl": 0.0, |
|
"learning_rate": 3.9060271422836624e-07, |
|
"logps/chosen": -287.60264185855266, |
|
"logps/rejected": -279.2552780877976, |
|
"loss": 0.3002, |
|
"rewards/chosen": 1.2695540377968235, |
|
"rewards/margins": 4.471388216903036, |
|
"rewards/rejected": -3.201834179106213, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31512605042016806, |
|
"grad_norm": 50.475157194053196, |
|
"kl": 0.0, |
|
"learning_rate": 3.871729490045185e-07, |
|
"logps/chosen": -290.81252297794117, |
|
"logps/rejected": -269.86484375, |
|
"loss": 0.2895, |
|
"rewards/chosen": 1.230052095301011, |
|
"rewards/margins": 4.965939546472886, |
|
"rewards/rejected": -3.735887451171875, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32037815126050423, |
|
"grad_norm": 52.61703361665091, |
|
"kl": 0.0, |
|
"learning_rate": 3.837058394672196e-07, |
|
"logps/chosen": -272.8394775390625, |
|
"logps/rejected": -298.7532958984375, |
|
"loss": 0.3103, |
|
"rewards/chosen": 0.9608588218688965, |
|
"rewards/margins": 4.247009944915772, |
|
"rewards/rejected": -3.286151123046875, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32563025210084034, |
|
"grad_norm": 46.87592251872074, |
|
"kl": 0.0, |
|
"learning_rate": 3.8020232951121166e-07, |
|
"logps/chosen": -291.6056034482759, |
|
"logps/rejected": -298.2428348214286, |
|
"loss": 0.3197, |
|
"rewards/chosen": 0.4831507189520474, |
|
"rewards/margins": 3.7980579455145476, |
|
"rewards/rejected": -3.3149072265625, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33088235294117646, |
|
"grad_norm": 50.37994120078213, |
|
"kl": 0.0, |
|
"learning_rate": 3.7666337294097985e-07, |
|
"logps/chosen": -306.0224609375, |
|
"logps/rejected": -271.47445401278407, |
|
"loss": 0.3324, |
|
"rewards/chosen": 0.5180339813232422, |
|
"rewards/margins": 4.124568072232333, |
|
"rewards/rejected": -3.606534090909091, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33613445378151263, |
|
"grad_norm": 42.5115670992028, |
|
"kl": 0.0, |
|
"learning_rate": 3.730899332110855e-07, |
|
"logps/chosen": -283.47386259191177, |
|
"logps/rejected": -306.39444633152175, |
|
"loss": 0.2874, |
|
"rewards/chosen": 0.3301387113683364, |
|
"rewards/margins": 5.416864360994695, |
|
"rewards/rejected": -5.086725649626358, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34138655462184875, |
|
"grad_norm": 41.86018982710908, |
|
"kl": 0.0, |
|
"learning_rate": 3.694829831638738e-07, |
|
"logps/chosen": -277.6752025462963, |
|
"logps/rejected": -316.3676819620253, |
|
"loss": 0.3214, |
|
"rewards/chosen": -0.18712226255440417, |
|
"rewards/margins": 4.334243518819211, |
|
"rewards/rejected": -4.521365781373616, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.34663865546218486, |
|
"grad_norm": 48.14500081411287, |
|
"kl": 0.0, |
|
"learning_rate": 3.658435047646238e-07, |
|
"logps/chosen": -287.62355587121215, |
|
"logps/rejected": -288.9054939516129, |
|
"loss": 0.2986, |
|
"rewards/chosen": 0.6268967137192235, |
|
"rewards/margins": 5.107723799152692, |
|
"rewards/rejected": -4.480827085433468, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35189075630252103, |
|
"grad_norm": 56.47592313913895, |
|
"kl": 0.0, |
|
"learning_rate": 3.621724888342161e-07, |
|
"logps/chosen": -315.55562279929575, |
|
"logps/rejected": -265.77844101123594, |
|
"loss": 0.3345, |
|
"rewards/chosen": 0.5301809713874065, |
|
"rewards/margins": 3.9119891344623716, |
|
"rewards/rejected": -3.381808163074965, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 61.01903580408772, |
|
"kl": 0.0, |
|
"learning_rate": 3.584709347793895e-07, |
|
"logps/chosen": -333.9532463121118, |
|
"logps/rejected": -284.3685878537736, |
|
"loss": 0.3061, |
|
"rewards/chosen": 0.6283516735764023, |
|
"rewards/margins": 4.496458923478132, |
|
"rewards/rejected": -3.8681072499017297, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36239495798319327, |
|
"grad_norm": 65.14201524354712, |
|
"kl": 0.0, |
|
"learning_rate": 3.5473985032065946e-07, |
|
"logps/chosen": -337.2389689700704, |
|
"logps/rejected": -293.1407566713483, |
|
"loss": 0.3049, |
|
"rewards/chosen": 1.298159102318992, |
|
"rewards/margins": 4.470166802349715, |
|
"rewards/rejected": -3.1720077000307234, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.36764705882352944, |
|
"grad_norm": 83.73292739825986, |
|
"kl": 0.0, |
|
"learning_rate": 3.509802512179737e-07, |
|
"logps/chosen": -641.1551411290322, |
|
"logps/rejected": -469.2648674242424, |
|
"loss": 0.3542, |
|
"rewards/chosen": -31.990240675403225, |
|
"rewards/margins": -6.357922375213832, |
|
"rewards/rejected": -25.632318300189393, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37289915966386555, |
|
"grad_norm": 59.59503648599958, |
|
"kl": 0.0, |
|
"learning_rate": 3.4719316099417983e-07, |
|
"logps/chosen": -305.71205357142856, |
|
"logps/rejected": -302.07416961477986, |
|
"loss": 0.3297, |
|
"rewards/chosen": 0.6071341378348214, |
|
"rewards/margins": 4.526243746227117, |
|
"rewards/rejected": -3.9191096083922954, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.37815126050420167, |
|
"grad_norm": 52.12511665719596, |
|
"kl": 0.0, |
|
"learning_rate": 3.4337961065637786e-07, |
|
"logps/chosen": -354.86205286949684, |
|
"logps/rejected": -341.6544788431677, |
|
"loss": 0.3121, |
|
"rewards/chosen": -0.023094465147774173, |
|
"rewards/margins": 5.537508137603584, |
|
"rewards/rejected": -5.560602602751358, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38340336134453784, |
|
"grad_norm": 57.5402410664677, |
|
"kl": 0.0, |
|
"learning_rate": 3.395406384152371e-07, |
|
"logps/chosen": -315.799042492378, |
|
"logps/rejected": -294.5041316105769, |
|
"loss": 0.3199, |
|
"rewards/chosen": 0.43840366456566787, |
|
"rewards/margins": 4.3100072146207555, |
|
"rewards/rejected": -3.871603550055088, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.38865546218487396, |
|
"grad_norm": 49.22922457925009, |
|
"kl": 0.0, |
|
"learning_rate": 3.356772894023505e-07, |
|
"logps/chosen": -268.91911764705884, |
|
"logps/rejected": -281.14432565789474, |
|
"loss": 0.3627, |
|
"rewards/chosen": 1.405719124697109, |
|
"rewards/margins": 3.5240630802892143, |
|
"rewards/rejected": -2.1183439555921053, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3939075630252101, |
|
"grad_norm": 46.913670160363154, |
|
"kl": 0.0, |
|
"learning_rate": 3.317906153857054e-07, |
|
"logps/chosen": -283.1244277468153, |
|
"logps/rejected": -271.5762078220859, |
|
"loss": 0.3514, |
|
"rewards/chosen": 1.5512054832118332, |
|
"rewards/margins": 3.464116740341338, |
|
"rewards/rejected": -1.9129112571295053, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.39915966386554624, |
|
"grad_norm": 39.898837555375735, |
|
"kl": 0.0, |
|
"learning_rate": 3.2788167448334784e-07, |
|
"logps/chosen": -253.08657625786162, |
|
"logps/rejected": -273.2616459627329, |
|
"loss": 0.3454, |
|
"rewards/chosen": 0.4627175001228381, |
|
"rewards/margins": 3.103327146558688, |
|
"rewards/rejected": -2.64060964643585, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.40441176470588236, |
|
"grad_norm": 78.57819058757303, |
|
"kl": 0.0, |
|
"learning_rate": 3.2395153087531763e-07, |
|
"logps/chosen": -297.46610213926175, |
|
"logps/rejected": -265.79100420321635, |
|
"loss": 0.3314, |
|
"rewards/chosen": 0.0033199643128670304, |
|
"rewards/margins": 4.24907213159631, |
|
"rewards/rejected": -4.245752167283443, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.4096638655462185, |
|
"grad_norm": 58.66647545765705, |
|
"kl": 0.0, |
|
"learning_rate": 3.20001254513933e-07, |
|
"logps/chosen": -316.0476090604027, |
|
"logps/rejected": -338.84203673245617, |
|
"loss": 0.3352, |
|
"rewards/chosen": -0.9521625698012793, |
|
"rewards/margins": 3.994815012092909, |
|
"rewards/rejected": -4.946977581894188, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.41491596638655465, |
|
"grad_norm": 61.099435670035334, |
|
"kl": 0.0, |
|
"learning_rate": 3.160319208325044e-07, |
|
"logps/chosen": -328.2163245506536, |
|
"logps/rejected": -299.7488070733533, |
|
"loss": 0.3143, |
|
"rewards/chosen": -0.38961004120072507, |
|
"rewards/margins": 4.762490664614208, |
|
"rewards/rejected": -5.152100705814933, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.42016806722689076, |
|
"grad_norm": 55.61375515356607, |
|
"kl": 0.0, |
|
"learning_rate": 3.1204461045255597e-07, |
|
"logps/chosen": -310.96216982886904, |
|
"logps/rejected": -292.1716951069079, |
|
"loss": 0.3393, |
|
"rewards/chosen": 0.4771306628272647, |
|
"rewards/margins": 4.309103688500579, |
|
"rewards/rejected": -3.831973025673314, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4254201680672269, |
|
"grad_norm": 64.83942546607469, |
|
"kl": 0.0, |
|
"learning_rate": 3.0804040888963367e-07, |
|
"logps/chosen": -306.39021236795776, |
|
"logps/rejected": -276.6224543539326, |
|
"loss": 0.3007, |
|
"rewards/chosen": 0.6496220978213029, |
|
"rewards/margins": 5.41400753551881, |
|
"rewards/rejected": -4.764385437697507, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.43067226890756305, |
|
"grad_norm": 47.53109374375667, |
|
"kl": 0.0, |
|
"learning_rate": 3.040204062577824e-07, |
|
"logps/chosen": -319.8095262096774, |
|
"logps/rejected": -276.47109375, |
|
"loss": 0.3453, |
|
"rewards/chosen": 0.12265574547552294, |
|
"rewards/margins": 3.962314170546546, |
|
"rewards/rejected": -3.839658425071023, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43592436974789917, |
|
"grad_norm": 50.597587143566045, |
|
"kl": 0.0, |
|
"learning_rate": 2.999856969727704e-07, |
|
"logps/chosen": -301.44910453216374, |
|
"logps/rejected": -281.371670511745, |
|
"loss": 0.3375, |
|
"rewards/chosen": 0.8545954297160545, |
|
"rewards/margins": 4.381973588797431, |
|
"rewards/rejected": -3.5273781590813758, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4411764705882353, |
|
"grad_norm": 65.2431749840782, |
|
"kl": 0.0, |
|
"learning_rate": 2.959373794541426e-07, |
|
"logps/chosen": -288.58415743670884, |
|
"logps/rejected": -275.80381944444446, |
|
"loss": 0.3174, |
|
"rewards/chosen": 0.5184780072562302, |
|
"rewards/margins": 4.098698170860589, |
|
"rewards/rejected": -3.5802201636043596, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44642857142857145, |
|
"grad_norm": 43.10826918884011, |
|
"kl": 0.0, |
|
"learning_rate": 2.9187655582618407e-07, |
|
"logps/chosen": -317.565112154908, |
|
"logps/rejected": -297.73979896496814, |
|
"loss": 0.3039, |
|
"rewards/chosen": 1.0487361978168137, |
|
"rewards/margins": 4.458561225185444, |
|
"rewards/rejected": -3.4098250273686306, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45168067226890757, |
|
"grad_norm": 49.36926037963924, |
|
"kl": 0.0, |
|
"learning_rate": 2.878043316178753e-07, |
|
"logps/chosen": -290.1714082154088, |
|
"logps/rejected": -275.16697399068323, |
|
"loss": 0.339, |
|
"rewards/chosen": -0.1980593579370271, |
|
"rewards/margins": 4.149451317528811, |
|
"rewards/rejected": -4.347510675465839, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4569327731092437, |
|
"grad_norm": 31.59893236803937, |
|
"kl": 0.0, |
|
"learning_rate": 2.837218154619193e-07, |
|
"logps/chosen": -310.3774646577381, |
|
"logps/rejected": -279.89951685855266, |
|
"loss": 0.3072, |
|
"rewards/chosen": 0.8483944847470238, |
|
"rewards/margins": 4.661724482562608, |
|
"rewards/rejected": -3.813329997815584, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46218487394957986, |
|
"grad_norm": 78.02492897783358, |
|
"kl": 0.0, |
|
"learning_rate": 2.796301187929257e-07, |
|
"logps/chosen": -328.6445046768707, |
|
"logps/rejected": -285.24498735549133, |
|
"loss": 0.3409, |
|
"rewards/chosen": -0.49578680959688565, |
|
"rewards/margins": 3.4661660613839667, |
|
"rewards/rejected": -3.9619528709808525, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.46743697478991597, |
|
"grad_norm": 47.42133886253641, |
|
"kl": 0.0, |
|
"learning_rate": 2.755303555448301e-07, |
|
"logps/chosen": -321.7949578220859, |
|
"logps/rejected": -317.8714171974522, |
|
"loss": 0.3471, |
|
"rewards/chosen": -1.7351532918543904, |
|
"rewards/margins": 4.654421281443778, |
|
"rewards/rejected": -6.389574573298169, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.4726890756302521, |
|
"grad_norm": 50.39237398761096, |
|
"kl": 0.0, |
|
"learning_rate": 2.7142364184763424e-07, |
|
"logps/chosen": -284.39781663907286, |
|
"logps/rejected": -317.32179178994085, |
|
"loss": 0.3519, |
|
"rewards/chosen": -0.08173387413782789, |
|
"rewards/margins": 4.825091084257143, |
|
"rewards/rejected": -4.906824958394971, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.47794117647058826, |
|
"grad_norm": 56.7884471471947, |
|
"kl": 0.0, |
|
"learning_rate": 2.673110957235479e-07, |
|
"logps/chosen": -309.5387185534591, |
|
"logps/rejected": -299.7813227872671, |
|
"loss": 0.3017, |
|
"rewards/chosen": 0.5868375766202338, |
|
"rewards/margins": 4.1676898245336655, |
|
"rewards/rejected": -3.5808522479134317, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.4831932773109244, |
|
"grad_norm": 55.26734943859427, |
|
"kl": 0.0, |
|
"learning_rate": 2.6319383678261557e-07, |
|
"logps/chosen": -322.89912539308176, |
|
"logps/rejected": -293.4796680900621, |
|
"loss": 0.3131, |
|
"rewards/chosen": 0.8568071089450668, |
|
"rewards/margins": 4.327623205849182, |
|
"rewards/rejected": -3.470816096904115, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.4884453781512605, |
|
"grad_norm": 43.186318715354865, |
|
"kl": 0.0, |
|
"learning_rate": 2.5907298591791105e-07, |
|
"logps/chosen": -297.91787462349396, |
|
"logps/rejected": -322.5411931818182, |
|
"loss": 0.2897, |
|
"rewards/chosen": 0.7378294151949595, |
|
"rewards/margins": 5.026299398580715, |
|
"rewards/rejected": -4.288469983385755, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49369747899159666, |
|
"grad_norm": 75.34771457681897, |
|
"kl": 0.0, |
|
"learning_rate": 2.5494966500038264e-07, |
|
"logps/chosen": -304.362060546875, |
|
"logps/rejected": -301.68695746527777, |
|
"loss": 0.3408, |
|
"rewards/chosen": -0.0454999641938643, |
|
"rewards/margins": 4.585332284070025, |
|
"rewards/rejected": -4.630832248263889, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4989495798319328, |
|
"grad_norm": 70.74960017005274, |
|
"kl": 0.0, |
|
"learning_rate": 2.508249965734319e-07, |
|
"logps/chosen": -322.87849884969324, |
|
"logps/rejected": -298.17384056528664, |
|
"loss": 0.2932, |
|
"rewards/chosen": 0.08947515780209032, |
|
"rewards/margins": 5.383114517002927, |
|
"rewards/rejected": -5.293639359200836, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5042016806722689, |
|
"grad_norm": 71.05303842998583, |
|
"kl": 0.0, |
|
"learning_rate": 2.467001035473103e-07, |
|
"logps/chosen": -305.84786676646706, |
|
"logps/rejected": -311.00056168300654, |
|
"loss": 0.3135, |
|
"rewards/chosen": 0.8616841024981288, |
|
"rewards/margins": 4.789382175542451, |
|
"rewards/rejected": -3.927698073044322, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.509453781512605, |
|
"grad_norm": 51.90598218741456, |
|
"kl": 0.0, |
|
"learning_rate": 2.425761088934142e-07, |
|
"logps/chosen": -275.5298913043478, |
|
"logps/rejected": -261.6807439072327, |
|
"loss": 0.3269, |
|
"rewards/chosen": 0.6217543797463364, |
|
"rewards/margins": 4.42957770484952, |
|
"rewards/rejected": -3.807823325103184, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5147058823529411, |
|
"grad_norm": 49.01333342668485, |
|
"kl": 0.0, |
|
"learning_rate": 2.3845413533856514e-07, |
|
"logps/chosen": -303.1413620283019, |
|
"logps/rejected": -294.9575892857143, |
|
"loss": 0.2877, |
|
"rewards/chosen": 0.7997542567223123, |
|
"rewards/margins": 5.2810229084595015, |
|
"rewards/rejected": -4.48126865173719, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5199579831932774, |
|
"grad_norm": 45.4420006215638, |
|
"kl": 0.0, |
|
"learning_rate": 2.343353050593553e-07, |
|
"logps/chosen": -315.00589576863354, |
|
"logps/rejected": -299.73405562106916, |
|
"loss": 0.3082, |
|
"rewards/chosen": 0.48438277155716225, |
|
"rewards/margins": 5.411952082927052, |
|
"rewards/rejected": -4.92756931136989, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5252100840336135, |
|
"grad_norm": 44.01774159207231, |
|
"kl": 0.0, |
|
"learning_rate": 2.3022073937664383e-07, |
|
"logps/chosen": -304.87012987012986, |
|
"logps/rejected": -277.4141330948795, |
|
"loss": 0.3005, |
|
"rewards/chosen": 1.3989817631709112, |
|
"rewards/margins": 4.661985928268803, |
|
"rewards/rejected": -3.2630041650978914, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5304621848739496, |
|
"grad_norm": 48.00123102792709, |
|
"kl": 0.0, |
|
"learning_rate": 2.261115584502849e-07, |
|
"logps/chosen": -307.63991253930817, |
|
"logps/rejected": -270.22658676242236, |
|
"loss": 0.326, |
|
"rewards/chosen": 1.3268984428741648, |
|
"rewards/margins": 3.5992706708924103, |
|
"rewards/rejected": -2.2723722280182455, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.5357142857142857, |
|
"grad_norm": 101.16998848852474, |
|
"kl": 0.0, |
|
"learning_rate": 2.2200888097417302e-07, |
|
"logps/chosen": -260.3294677734375, |
|
"logps/rejected": -272.8283203125, |
|
"loss": 0.3344, |
|
"rewards/chosen": 1.6289764404296876, |
|
"rewards/margins": 3.6363025665283204, |
|
"rewards/rejected": -2.007326126098633, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5409663865546218, |
|
"grad_norm": 45.84798251584408, |
|
"kl": 0.0, |
|
"learning_rate": 2.1791382387168684e-07, |
|
"logps/chosen": -293.09831912878786, |
|
"logps/rejected": -303.9574848790323, |
|
"loss": 0.3042, |
|
"rewards/chosen": 0.6066324407404119, |
|
"rewards/margins": 5.404102435070048, |
|
"rewards/rejected": -4.797469994329637, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5462184873949579, |
|
"grad_norm": 40.19468881639845, |
|
"kl": 0.0, |
|
"learning_rate": 2.1382750199161495e-07, |
|
"logps/chosen": -319.7741268382353, |
|
"logps/rejected": -292.6617708333333, |
|
"loss": 0.3023, |
|
"rewards/chosen": 1.018133544921875, |
|
"rewards/margins": 4.818516031901042, |
|
"rewards/rejected": -3.8003824869791667, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5514705882352942, |
|
"grad_norm": 33.38152973598087, |
|
"kl": 0.0, |
|
"learning_rate": 2.0975102780464673e-07, |
|
"logps/chosen": -311.2853467987805, |
|
"logps/rejected": -304.88221153846155, |
|
"loss": 0.3141, |
|
"rewards/chosen": 1.0162078113090702, |
|
"rewards/margins": 4.974026258324295, |
|
"rewards/rejected": -3.9578184470152245, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5567226890756303, |
|
"grad_norm": 63.954166267932834, |
|
"kl": 0.0, |
|
"learning_rate": 2.0568551110051074e-07, |
|
"logps/chosen": -264.285549331761, |
|
"logps/rejected": -278.77358307453414, |
|
"loss": 0.3119, |
|
"rewards/chosen": 1.0249821884827044, |
|
"rewards/margins": 4.825221324983093, |
|
"rewards/rejected": -3.8002391365003882, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5619747899159664, |
|
"grad_norm": 41.06781071234312, |
|
"kl": 0.0, |
|
"learning_rate": 2.016320586858422e-07, |
|
"logps/chosen": -276.17345252403845, |
|
"logps/rejected": -310.3708555640244, |
|
"loss": 0.2879, |
|
"rewards/chosen": 1.260654547275641, |
|
"rewards/margins": 5.546377977630062, |
|
"rewards/rejected": -4.2857234303544205, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5672268907563025, |
|
"grad_norm": 48.37440945890147, |
|
"kl": 0.0, |
|
"learning_rate": 1.9759177408286337e-07, |
|
"logps/chosen": -316.57014678030305, |
|
"logps/rejected": -284.27633568548384, |
|
"loss": 0.3181, |
|
"rewards/chosen": 1.2529100822679924, |
|
"rewards/margins": 4.161886384876359, |
|
"rewards/rejected": -2.908976302608367, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5724789915966386, |
|
"grad_norm": 43.39106017396027, |
|
"kl": 0.0, |
|
"learning_rate": 1.9356575722895808e-07, |
|
"logps/chosen": -298.2481151660839, |
|
"logps/rejected": -282.445334569209, |
|
"loss": 0.3094, |
|
"rewards/chosen": 1.7649928806545017, |
|
"rewards/margins": 4.975185889680809, |
|
"rewards/rejected": -3.2101930090263067, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5777310924369747, |
|
"grad_norm": 60.62146730688155, |
|
"kl": 0.0, |
|
"learning_rate": 1.895551041772216e-07, |
|
"logps/chosen": -307.72318892045456, |
|
"logps/rejected": -269.39409722222223, |
|
"loss": 0.3155, |
|
"rewards/chosen": 1.027849023992365, |
|
"rewards/margins": 3.9311439051772608, |
|
"rewards/rejected": -2.903294881184896, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.582983193277311, |
|
"grad_norm": 87.64642171640351, |
|
"kl": 0.0, |
|
"learning_rate": 1.8556090679806847e-07, |
|
"logps/chosen": -302.53110881024094, |
|
"logps/rejected": -283.19252232142856, |
|
"loss": 0.299, |
|
"rewards/chosen": 1.1432133869952465, |
|
"rewards/margins": 5.261560554963997, |
|
"rewards/rejected": -4.11834716796875, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 61.71028625828252, |
|
"kl": 0.0, |
|
"learning_rate": 1.8158425248197928e-07, |
|
"logps/chosen": -325.427734375, |
|
"logps/rejected": -312.36092748397436, |
|
"loss": 0.2687, |
|
"rewards/chosen": 1.890699060951791, |
|
"rewards/margins": 5.755905657130677, |
|
"rewards/rejected": -3.8652065961788864, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5934873949579832, |
|
"grad_norm": 65.2119562803559, |
|
"kl": 0.0, |
|
"learning_rate": 1.7762622384346609e-07, |
|
"logps/chosen": -291.2824315200617, |
|
"logps/rejected": -258.82664161392404, |
|
"loss": 0.3013, |
|
"rewards/chosen": 1.8866918116440008, |
|
"rewards/margins": 4.205873229761089, |
|
"rewards/rejected": -2.3191814181170884, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5987394957983193, |
|
"grad_norm": 38.01395730830506, |
|
"kl": 0.0, |
|
"learning_rate": 1.7368789842633907e-07, |
|
"logps/chosen": -322.3194526336478, |
|
"logps/rejected": -307.19121215062114, |
|
"loss": 0.3003, |
|
"rewards/chosen": 1.5120626005736537, |
|
"rewards/margins": 4.748744047099275, |
|
"rewards/rejected": -3.2366814465256213, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6039915966386554, |
|
"grad_norm": 57.24723300086865, |
|
"kl": 0.0, |
|
"learning_rate": 1.697703484103532e-07, |
|
"logps/chosen": -297.3913395579268, |
|
"logps/rejected": -276.43687399839746, |
|
"loss": 0.3062, |
|
"rewards/chosen": 1.2956105674185403, |
|
"rewards/margins": 5.099581686834606, |
|
"rewards/rejected": -3.8039711194160657, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6092436974789915, |
|
"grad_norm": 46.71083438381099, |
|
"kl": 0.0, |
|
"learning_rate": 1.6587464031931526e-07, |
|
"logps/chosen": -307.08378031716416, |
|
"logps/rejected": -292.77835181451616, |
|
"loss": 0.3292, |
|
"rewards/chosen": 1.054947810386544, |
|
"rewards/margins": 4.291957494631335, |
|
"rewards/rejected": -3.2370096842447915, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6144957983193278, |
|
"grad_norm": 28.548641210768817, |
|
"kl": 0.0, |
|
"learning_rate": 1.6200183473073048e-07, |
|
"logps/chosen": -301.33444602272726, |
|
"logps/rejected": -287.6282510080645, |
|
"loss": 0.2939, |
|
"rewards/chosen": 0.8960710005326704, |
|
"rewards/margins": 4.915845997382469, |
|
"rewards/rejected": -4.019774996849798, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6197478991596639, |
|
"grad_norm": 75.28423549841818, |
|
"kl": 0.0, |
|
"learning_rate": 1.5815298598706888e-07, |
|
"logps/chosen": -294.9990295031056, |
|
"logps/rejected": -297.1986536949685, |
|
"loss": 0.3023, |
|
"rewards/chosen": 0.4812743382424301, |
|
"rewards/margins": 5.439699416244593, |
|
"rewards/rejected": -4.958425078002162, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 65.4384114129384, |
|
"kl": 0.0, |
|
"learning_rate": 1.5432914190872756e-07, |
|
"logps/chosen": -287.07041139240505, |
|
"logps/rejected": -303.2984905478395, |
|
"loss": 0.3376, |
|
"rewards/chosen": 0.6506334135803995, |
|
"rewards/margins": 4.541042906732406, |
|
"rewards/rejected": -3.8904094931520063, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6302521008403361, |
|
"grad_norm": 39.03209593199308, |
|
"kl": 0.0, |
|
"learning_rate": 1.505313435087698e-07, |
|
"logps/chosen": -302.3798961900685, |
|
"logps/rejected": -300.5006510416667, |
|
"loss": 0.3026, |
|
"rewards/chosen": 0.7183382217198202, |
|
"rewards/margins": 5.01667280381842, |
|
"rewards/rejected": -4.298334582098599, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6355042016806722, |
|
"grad_norm": 37.74806766053673, |
|
"kl": 0.0, |
|
"learning_rate": 1.4676062470951705e-07, |
|
"logps/chosen": -320.5517877684049, |
|
"logps/rejected": -318.0521248009554, |
|
"loss": 0.3071, |
|
"rewards/chosen": 0.636018296692269, |
|
"rewards/margins": 5.225012997130167, |
|
"rewards/rejected": -4.588994700437898, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6407563025210085, |
|
"grad_norm": 48.11396951304232, |
|
"kl": 0.0, |
|
"learning_rate": 1.430180120610711e-07, |
|
"logps/chosen": -294.4245869824841, |
|
"logps/rejected": -295.2305885736196, |
|
"loss": 0.2893, |
|
"rewards/chosen": 0.30213877198043143, |
|
"rewards/margins": 5.5401721368673185, |
|
"rewards/rejected": -5.238033364886887, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6460084033613446, |
|
"grad_norm": 54.817845691829724, |
|
"kl": 0.0, |
|
"learning_rate": 1.3930452446184385e-07, |
|
"logps/chosen": -329.90924310064935, |
|
"logps/rejected": -325.61871705572287, |
|
"loss": 0.2779, |
|
"rewards/chosen": 0.11350006252140193, |
|
"rewards/margins": 6.256528965241658, |
|
"rewards/rejected": -6.143028902720256, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6512605042016807, |
|
"grad_norm": 58.30938790650043, |
|
"kl": 0.0, |
|
"learning_rate": 1.3562117288116923e-07, |
|
"logps/chosen": -327.2169507575758, |
|
"logps/rejected": -302.0602318548387, |
|
"loss": 0.31, |
|
"rewards/chosen": 0.08168421658602627, |
|
"rewards/margins": 5.523900462175744, |
|
"rewards/rejected": -5.442216245589718, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6565126050420168, |
|
"grad_norm": 55.044099371688475, |
|
"kl": 0.0, |
|
"learning_rate": 1.319689600840747e-07, |
|
"logps/chosen": -308.8638286226115, |
|
"logps/rejected": -301.6549079754601, |
|
"loss": 0.323, |
|
"rewards/chosen": -0.07818331384355096, |
|
"rewards/margins": 4.454612785298512, |
|
"rewards/rejected": -4.532796099142063, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6617647058823529, |
|
"grad_norm": 102.96993890295445, |
|
"kl": 0.0, |
|
"learning_rate": 1.2834888035828596e-07, |
|
"logps/chosen": -300.1451028963415, |
|
"logps/rejected": -284.04051482371796, |
|
"loss": 0.2618, |
|
"rewards/chosen": 1.2945330550030965, |
|
"rewards/margins": 6.069371233588834, |
|
"rewards/rejected": -4.774838178585737, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.667016806722689, |
|
"grad_norm": 62.28101991590411, |
|
"kl": 0.0, |
|
"learning_rate": 1.2476191924353932e-07, |
|
"logps/chosen": -340.08214285714286, |
|
"logps/rejected": -286.0248114224138, |
|
"loss": 0.2961, |
|
"rewards/chosen": 1.140911167689732, |
|
"rewards/margins": 5.149687181294258, |
|
"rewards/rejected": -4.008776013604526, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6722689075630253, |
|
"grad_norm": 56.339091641639506, |
|
"kl": 0.0, |
|
"learning_rate": 1.2120905326327596e-07, |
|
"logps/chosen": -294.5368897928994, |
|
"logps/rejected": -291.57833195364236, |
|
"loss": 0.3268, |
|
"rewards/chosen": 0.9549788977267474, |
|
"rewards/margins": 4.6167907768658205, |
|
"rewards/rejected": -3.661811879139073, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6775210084033614, |
|
"grad_norm": 66.6477563654386, |
|
"kl": 0.0, |
|
"learning_rate": 1.1769124965879091e-07, |
|
"logps/chosen": -307.14776490066225, |
|
"logps/rejected": -255.9688424556213, |
|
"loss": 0.3042, |
|
"rewards/chosen": 1.2051307728748448, |
|
"rewards/margins": 4.886082675727101, |
|
"rewards/rejected": -3.680951902852256, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6827731092436975, |
|
"grad_norm": 51.16719779623548, |
|
"kl": 0.0, |
|
"learning_rate": 1.1420946612590837e-07, |
|
"logps/chosen": -274.20879836309524, |
|
"logps/rejected": -287.1597193667763, |
|
"loss": 0.3073, |
|
"rewards/chosen": 0.7768129621233258, |
|
"rewards/margins": 4.585587408309592, |
|
"rewards/rejected": -3.8087744461862663, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6880252100840336, |
|
"grad_norm": 46.424902003345146, |
|
"kl": 0.0, |
|
"learning_rate": 1.1076465055425646e-07, |
|
"logps/chosen": -295.8868777252907, |
|
"logps/rejected": -271.63410578547297, |
|
"loss": 0.2858, |
|
"rewards/chosen": 1.1238191294115643, |
|
"rewards/margins": 5.138592936421699, |
|
"rewards/rejected": -4.014773807010135, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6932773109243697, |
|
"grad_norm": 68.63654254788709, |
|
"kl": 0.0, |
|
"learning_rate": 1.0735774076921128e-07, |
|
"logps/chosen": -290.4839599609375, |
|
"logps/rejected": -246.5270751953125, |
|
"loss": 0.3271, |
|
"rewards/chosen": 1.0010972976684571, |
|
"rewards/margins": 4.268711280822754, |
|
"rewards/rejected": -3.267613983154297, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6985294117647058, |
|
"grad_norm": 41.396174001243224, |
|
"kl": 0.0, |
|
"learning_rate": 1.039896642765809e-07, |
|
"logps/chosen": -301.9284396701389, |
|
"logps/rejected": -323.622314453125, |
|
"loss": 0.2906, |
|
"rewards/chosen": 0.9389337963528104, |
|
"rewards/margins": 5.783896870083279, |
|
"rewards/rejected": -4.844963073730469, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7037815126050421, |
|
"grad_norm": 42.28257001832088, |
|
"kl": 0.0, |
|
"learning_rate": 1.0066133801009871e-07, |
|
"logps/chosen": -330.92494419642856, |
|
"logps/rejected": -265.96412417763156, |
|
"loss": 0.2927, |
|
"rewards/chosen": 1.0747142973400297, |
|
"rewards/margins": 5.422101510796033, |
|
"rewards/rejected": -4.347387213456003, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7090336134453782, |
|
"grad_norm": 84.07485027336253, |
|
"kl": 0.0, |
|
"learning_rate": 9.737366808179553e-08, |
|
"logps/chosen": -309.5953733766234, |
|
"logps/rejected": -269.1078219126506, |
|
"loss": 0.2967, |
|
"rewards/chosen": 0.7130060567484273, |
|
"rewards/margins": 5.262509245283819, |
|
"rewards/rejected": -4.549503188535391, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 46.16115962232735, |
|
"kl": 0.0, |
|
"learning_rate": 9.412754953531663e-08, |
|
"logps/chosen": -268.95225954341316, |
|
"logps/rejected": -302.0711039624183, |
|
"loss": 0.3274, |
|
"rewards/chosen": 0.8310946161875468, |
|
"rewards/margins": 4.501142151803355, |
|
"rewards/rejected": -3.670047535615809, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7195378151260504, |
|
"grad_norm": 125.1880813626159, |
|
"kl": 0.0, |
|
"learning_rate": 9.092386610225325e-08, |
|
"logps/chosen": -269.0572060032895, |
|
"logps/rejected": -291.4349655877976, |
|
"loss": 0.308, |
|
"rewards/chosen": 1.3923439226652448, |
|
"rewards/margins": 4.397153347655944, |
|
"rewards/rejected": -3.0048094249906994, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7247899159663865, |
|
"grad_norm": 72.34232290742432, |
|
"kl": 0.0, |
|
"learning_rate": 8.776348996155317e-08, |
|
"logps/chosen": -295.1348721590909, |
|
"logps/rejected": -292.1137348790323, |
|
"loss": 0.2973, |
|
"rewards/chosen": 0.5286340886896307, |
|
"rewards/margins": 5.645270303407373, |
|
"rewards/rejected": -5.116636214717742, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7300420168067226, |
|
"grad_norm": 101.10706766564086, |
|
"kl": 0.0, |
|
"learning_rate": 8.464728150207636e-08, |
|
"logps/chosen": -298.3083235062893, |
|
"logps/rejected": -309.3073563664596, |
|
"loss": 0.2942, |
|
"rewards/chosen": 1.189275585630405, |
|
"rewards/margins": 5.284051052075474, |
|
"rewards/rejected": -4.0947754664450695, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7352941176470589, |
|
"grad_norm": 32.92022315972127, |
|
"kl": 0.0, |
|
"learning_rate": 8.15760890883607e-08, |
|
"logps/chosen": -282.652443272293, |
|
"logps/rejected": -268.00280387269936, |
|
"loss": 0.3095, |
|
"rewards/chosen": 0.9466076504652667, |
|
"rewards/margins": 4.818131267700696, |
|
"rewards/rejected": -3.8715236172354293, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.740546218487395, |
|
"grad_norm": 66.0924893411411, |
|
"kl": 0.0, |
|
"learning_rate": 7.855074882966103e-08, |
|
"logps/chosen": -317.7045433407738, |
|
"logps/rejected": -290.1392115542763, |
|
"loss": 0.2904, |
|
"rewards/chosen": 0.8157288687569755, |
|
"rewards/margins": 5.654886776343324, |
|
"rewards/rejected": -4.8391579075863485, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7457983193277311, |
|
"grad_norm": 29.988518434005318, |
|
"kl": 0.0, |
|
"learning_rate": 7.557208435232449e-08, |
|
"logps/chosen": -281.4327616494083, |
|
"logps/rejected": -294.1578797599338, |
|
"loss": 0.2786, |
|
"rewards/chosen": 1.3708069028233636, |
|
"rewards/margins": 5.1220817695969565, |
|
"rewards/rejected": -3.751274866773593, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7510504201680672, |
|
"grad_norm": 137.787205313578, |
|
"kl": 0.0, |
|
"learning_rate": 7.264090657556443e-08, |
|
"logps/chosen": -301.6008921967456, |
|
"logps/rejected": -260.6448158112583, |
|
"loss": 0.3048, |
|
"rewards/chosen": 0.6026445196930473, |
|
"rewards/margins": 5.368352381991268, |
|
"rewards/rejected": -4.76570786229822, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7563025210084033, |
|
"grad_norm": 72.85612810540347, |
|
"kl": 0.0, |
|
"learning_rate": 6.975801349069385e-08, |
|
"logps/chosen": -284.1950284090909, |
|
"logps/rejected": -288.5327872983871, |
|
"loss": 0.2889, |
|
"rewards/chosen": 0.9271378950639204, |
|
"rewards/margins": 5.736759792115331, |
|
"rewards/rejected": -4.809621897051411, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7615546218487395, |
|
"grad_norm": 102.78241617760347, |
|
"kl": 0.0, |
|
"learning_rate": 6.692418994387799e-08, |
|
"logps/chosen": -288.68474264705884, |
|
"logps/rejected": -295.6461452095808, |
|
"loss": 0.2864, |
|
"rewards/chosen": 1.031971950157016, |
|
"rewards/margins": 5.655328021218018, |
|
"rewards/rejected": -4.623356071061003, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7668067226890757, |
|
"grad_norm": 65.20920984482183, |
|
"kl": 0.0, |
|
"learning_rate": 6.414020742246593e-08, |
|
"logps/chosen": -290.9591749237805, |
|
"logps/rejected": -305.4570562900641, |
|
"loss": 0.2474, |
|
"rewards/chosen": 1.242940390982279, |
|
"rewards/margins": 6.340856431647342, |
|
"rewards/rejected": -5.097916040665064, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7720588235294118, |
|
"grad_norm": 66.73235542937682, |
|
"kl": 0.0, |
|
"learning_rate": 6.140682384495902e-08, |
|
"logps/chosen": -311.04422530594405, |
|
"logps/rejected": -286.8678937146893, |
|
"loss": 0.2764, |
|
"rewards/chosen": 1.3596580878837958, |
|
"rewards/margins": 5.264105654201769, |
|
"rewards/rejected": -3.904447566317973, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7773109243697479, |
|
"grad_norm": 86.01646021421267, |
|
"kl": 0.0, |
|
"learning_rate": 5.872478335467298e-08, |
|
"logps/chosen": -266.1013243140244, |
|
"logps/rejected": -272.6937850560897, |
|
"loss": 0.3282, |
|
"rewards/chosen": 1.3123749523628048, |
|
"rewards/margins": 4.397385187489007, |
|
"rewards/rejected": -3.085010235126202, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.782563025210084, |
|
"grad_norm": 40.2582163158674, |
|
"kl": 0.0, |
|
"learning_rate": 5.60948161171505e-08, |
|
"logps/chosen": -293.6717694256757, |
|
"logps/rejected": -290.36904978197674, |
|
"loss": 0.3256, |
|
"rewards/chosen": 1.6847073323017843, |
|
"rewards/margins": 4.269535376394717, |
|
"rewards/rejected": -2.5848280440929323, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7878151260504201, |
|
"grad_norm": 63.304715322652115, |
|
"kl": 0.0, |
|
"learning_rate": 5.351763812137916e-08, |
|
"logps/chosen": -276.54271343954247, |
|
"logps/rejected": -297.85116485778445, |
|
"loss": 0.307, |
|
"rewards/chosen": 1.3718405330882353, |
|
"rewards/margins": 4.822991575729516, |
|
"rewards/rejected": -3.45115104264128, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7930672268907563, |
|
"grad_norm": 52.372015337826504, |
|
"kl": 0.0, |
|
"learning_rate": 5.0993950984868836e-08, |
|
"logps/chosen": -268.07744565217394, |
|
"logps/rejected": -287.0620332154088, |
|
"loss": 0.331, |
|
"rewards/chosen": 1.229935924458948, |
|
"rewards/margins": 4.45942081844971, |
|
"rewards/rejected": -3.2294848939907626, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7983193277310925, |
|
"grad_norm": 32.103803649440515, |
|
"kl": 0.0, |
|
"learning_rate": 4.8524441762641284e-08, |
|
"logps/chosen": -314.27061222484275, |
|
"logps/rejected": -264.5042944487578, |
|
"loss": 0.273, |
|
"rewards/chosen": 1.570641667587952, |
|
"rewards/margins": 5.312982324129101, |
|
"rewards/rejected": -3.742340656541149, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8035714285714286, |
|
"grad_norm": 43.137151503875806, |
|
"kl": 0.0, |
|
"learning_rate": 4.6109782760184956e-08, |
|
"logps/chosen": -277.4168693862275, |
|
"logps/rejected": -293.28860294117646, |
|
"loss": 0.2971, |
|
"rewards/chosen": 1.7373914889946669, |
|
"rewards/margins": 4.780555711063703, |
|
"rewards/rejected": -3.043164222069036, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8088235294117647, |
|
"grad_norm": 50.02695394326994, |
|
"kl": 0.0, |
|
"learning_rate": 4.375063135042445e-08, |
|
"logps/chosen": -295.5123428254438, |
|
"logps/rejected": -317.11524730960264, |
|
"loss": 0.2655, |
|
"rewards/chosen": 1.741723596697023, |
|
"rewards/margins": 5.875752757758695, |
|
"rewards/rejected": -4.134029161061672, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8140756302521008, |
|
"grad_norm": 39.113276722110314, |
|
"kl": 0.0, |
|
"learning_rate": 4.144762979475575e-08, |
|
"logps/chosen": -296.8016826923077, |
|
"logps/rejected": -275.8460451977401, |
|
"loss": 0.3124, |
|
"rewards/chosen": 1.1531088235494973, |
|
"rewards/margins": 4.638992460886185, |
|
"rewards/rejected": -3.4858836373366877, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.819327731092437, |
|
"grad_norm": 65.74166961412382, |
|
"kl": 0.0, |
|
"learning_rate": 3.9201405068195385e-08, |
|
"logps/chosen": -300.7455797697368, |
|
"logps/rejected": -287.8294735863095, |
|
"loss": 0.3239, |
|
"rewards/chosen": 1.3694068507144326, |
|
"rewards/margins": 4.3731968432739565, |
|
"rewards/rejected": -3.0037899925595237, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.8245798319327731, |
|
"grad_norm": 40.97531017956201, |
|
"kl": 0.0, |
|
"learning_rate": 3.701256868869124e-08, |
|
"logps/chosen": -308.8232851808176, |
|
"logps/rejected": -251.1597923136646, |
|
"loss": 0.3387, |
|
"rewards/chosen": 1.3816734649850138, |
|
"rewards/margins": 4.159259081007917, |
|
"rewards/rejected": -2.7775856160229035, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.8298319327731093, |
|
"grad_norm": 67.8764255365854, |
|
"kl": 0.0, |
|
"learning_rate": 3.488171655064107e-08, |
|
"logps/chosen": -283.0639042075163, |
|
"logps/rejected": -281.1170705464072, |
|
"loss": 0.3203, |
|
"rewards/chosen": 1.294641232958027, |
|
"rewards/margins": 4.51022142511278, |
|
"rewards/rejected": -3.215580192154753, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8350840336134454, |
|
"grad_norm": 50.875889688456084, |
|
"kl": 0.0, |
|
"learning_rate": 3.28094287626651e-08, |
|
"logps/chosen": -326.3169806985294, |
|
"logps/rejected": -297.6423697916667, |
|
"loss": 0.295, |
|
"rewards/chosen": 1.5313853544347427, |
|
"rewards/margins": 5.293017434512867, |
|
"rewards/rejected": -3.761632080078125, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.8403361344537815, |
|
"grad_norm": 46.374855210212814, |
|
"kl": 0.0, |
|
"learning_rate": 3.079626948967534e-08, |
|
"logps/chosen": -304.4027423469388, |
|
"logps/rejected": -287.12953847543355, |
|
"loss": 0.3133, |
|
"rewards/chosen": 1.1032969156901042, |
|
"rewards/margins": 4.994385853438478, |
|
"rewards/rejected": -3.8910889377483744, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8455882352941176, |
|
"grad_norm": 55.05693685423068, |
|
"kl": 0.0, |
|
"learning_rate": 2.88427867992862e-08, |
|
"logps/chosen": -289.62710160818716, |
|
"logps/rejected": -261.55342911073825, |
|
"loss": 0.2771, |
|
"rewards/chosen": 1.5187704521313048, |
|
"rewards/margins": 5.328108912963939, |
|
"rewards/rejected": -3.809338460832634, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8508403361344538, |
|
"grad_norm": 40.13605931494857, |
|
"kl": 0.0, |
|
"learning_rate": 2.6949512512606965e-08, |
|
"logps/chosen": -292.2077305169753, |
|
"logps/rejected": -295.6835195806962, |
|
"loss": 0.2883, |
|
"rewards/chosen": 1.2319991500289351, |
|
"rewards/margins": 5.180050506091039, |
|
"rewards/rejected": -3.9480513560621042, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8560924369747899, |
|
"grad_norm": 26.234795280971806, |
|
"kl": 0.0, |
|
"learning_rate": 2.5116962059457653e-08, |
|
"logps/chosen": -302.0387451171875, |
|
"logps/rejected": -288.72607421875, |
|
"loss": 0.2664, |
|
"rewards/chosen": 1.2181474685668945, |
|
"rewards/margins": 5.965513038635254, |
|
"rewards/rejected": -4.747365570068359, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8613445378151261, |
|
"grad_norm": 88.66056550298477, |
|
"kl": 0.0, |
|
"learning_rate": 2.334563433804687e-08, |
|
"logps/chosen": -321.8770623059006, |
|
"logps/rejected": -290.4252161949685, |
|
"loss": 0.2794, |
|
"rewards/chosen": 1.0824178375812792, |
|
"rewards/margins": 5.4567103671852495, |
|
"rewards/rejected": -4.37429252960397, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.8665966386554622, |
|
"grad_norm": 46.4344164784028, |
|
"kl": 0.0, |
|
"learning_rate": 2.1636011579150793e-08, |
|
"logps/chosen": -295.313525390625, |
|
"logps/rejected": -301.4338134765625, |
|
"loss": 0.2569, |
|
"rewards/chosen": 1.2288617134094237, |
|
"rewards/margins": 5.767054080963135, |
|
"rewards/rejected": -4.538192367553711, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8718487394957983, |
|
"grad_norm": 46.720887932263636, |
|
"kl": 0.0, |
|
"learning_rate": 1.998855921482906e-08, |
|
"logps/chosen": -277.0703369140625, |
|
"logps/rejected": -252.481689453125, |
|
"loss": 0.321, |
|
"rewards/chosen": 0.9479250907897949, |
|
"rewards/margins": 4.605030727386475, |
|
"rewards/rejected": -3.65710563659668, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8771008403361344, |
|
"grad_norm": 77.29491195750472, |
|
"kl": 0.0, |
|
"learning_rate": 1.8403725751714615e-08, |
|
"logps/chosen": -291.567009066358, |
|
"logps/rejected": -321.13864715189874, |
|
"loss": 0.3081, |
|
"rewards/chosen": 0.9058116394796489, |
|
"rewards/margins": 5.581022712956706, |
|
"rewards/rejected": -4.675211073477057, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8823529411764706, |
|
"grad_norm": 35.132417644186994, |
|
"kl": 0.0, |
|
"learning_rate": 1.6881942648911074e-08, |
|
"logps/chosen": -289.9874625748503, |
|
"logps/rejected": -282.17639399509807, |
|
"loss": 0.3084, |
|
"rewards/chosen": 1.0200939064254304, |
|
"rewards/margins": 5.139812570258151, |
|
"rewards/rejected": -4.119718663832721, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8876050420168067, |
|
"grad_norm": 41.03832392131175, |
|
"kl": 0.0, |
|
"learning_rate": 1.5423624200531115e-08, |
|
"logps/chosen": -295.63090376420456, |
|
"logps/rejected": -312.71739366319446, |
|
"loss": 0.267, |
|
"rewards/chosen": 1.9761036959561435, |
|
"rewards/margins": 5.501615119702889, |
|
"rewards/rejected": -3.5255114237467446, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8928571428571429, |
|
"grad_norm": 75.37154437951729, |
|
"kl": 0.0, |
|
"learning_rate": 1.4029167422908105e-08, |
|
"logps/chosen": -276.8064123376623, |
|
"logps/rejected": -277.1538968373494, |
|
"loss": 0.2886, |
|
"rewards/chosen": 1.2620481020444398, |
|
"rewards/margins": 5.54502657354764, |
|
"rewards/rejected": -4.2829784715032, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.898109243697479, |
|
"grad_norm": 40.567317410299374, |
|
"kl": 0.0, |
|
"learning_rate": 1.2698951946511327e-08, |
|
"logps/chosen": -308.1201601808176, |
|
"logps/rejected": -281.5419497282609, |
|
"loss": 0.3105, |
|
"rewards/chosen": 0.3164803127072892, |
|
"rewards/margins": 4.992804434407599, |
|
"rewards/rejected": -4.67632412170031, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9033613445378151, |
|
"grad_norm": 54.46098979124272, |
|
"kl": 0.0, |
|
"learning_rate": 1.1433339912594265e-08, |
|
"logps/chosen": -306.5632858727811, |
|
"logps/rejected": -280.34178394039736, |
|
"loss": 0.2749, |
|
"rewards/chosen": 1.2275824010724852, |
|
"rewards/margins": 6.236800730697899, |
|
"rewards/rejected": -5.009218329625414, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9086134453781513, |
|
"grad_norm": 40.407188173526485, |
|
"kl": 0.0, |
|
"learning_rate": 1.0232675874604608e-08, |
|
"logps/chosen": -309.16411713286715, |
|
"logps/rejected": -310.2364936440678, |
|
"loss": 0.2894, |
|
"rewards/chosen": 0.768409675651497, |
|
"rewards/margins": 5.153310129725472, |
|
"rewards/rejected": -4.384900454073976, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9138655462184874, |
|
"grad_norm": 30.438351627967617, |
|
"kl": 0.0, |
|
"learning_rate": 9.097286704381896e-09, |
|
"logps/chosen": -317.2416330645161, |
|
"logps/rejected": -278.5894886363636, |
|
"loss": 0.3153, |
|
"rewards/chosen": 1.0305794992754536, |
|
"rewards/margins": 4.64966315812962, |
|
"rewards/rejected": -3.6190836588541666, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9191176470588235, |
|
"grad_norm": 56.93756466862378, |
|
"kl": 0.0, |
|
"learning_rate": 8.02748150316937e-09, |
|
"logps/chosen": -286.57449070411394, |
|
"logps/rejected": -278.80803915895063, |
|
"loss": 0.3173, |
|
"rewards/chosen": 1.041691840449466, |
|
"rewards/margins": 4.70056070076486, |
|
"rewards/rejected": -3.6588688603153936, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9243697478991597, |
|
"grad_norm": 59.48003945550571, |
|
"kl": 0.0, |
|
"learning_rate": 7.023551517463089e-09, |
|
"logps/chosen": -276.2664721385542, |
|
"logps/rejected": -300.2993861607143, |
|
"loss": 0.3055, |
|
"rewards/chosen": 1.1007473497505647, |
|
"rewards/margins": 4.7333291844198015, |
|
"rewards/rejected": -3.632581834669237, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9296218487394958, |
|
"grad_norm": 75.70187200694772, |
|
"kl": 0.0, |
|
"learning_rate": 6.085770059722634e-09, |
|
"logps/chosen": -276.1528105345912, |
|
"logps/rejected": -285.99791343167703, |
|
"loss": 0.2734, |
|
"rewards/chosen": 1.5460262658461086, |
|
"rewards/margins": 5.593088920519051, |
|
"rewards/rejected": -4.047062654672943, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9348739495798319, |
|
"grad_norm": 62.362510517390575, |
|
"kl": 0.0, |
|
"learning_rate": 5.214392433963488e-09, |
|
"logps/chosen": -291.6478470203488, |
|
"logps/rejected": -285.2013038429054, |
|
"loss": 0.324, |
|
"rewards/chosen": 0.7867310989734738, |
|
"rewards/margins": 4.6428056641392255, |
|
"rewards/rejected": -3.8560745651657515, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9401260504201681, |
|
"grad_norm": 57.685751276785766, |
|
"kl": 0.0, |
|
"learning_rate": 4.409655866252693e-09, |
|
"logps/chosen": -284.08316022398844, |
|
"logps/rejected": -311.69903273809524, |
|
"loss": 0.2987, |
|
"rewards/chosen": 1.1329683513310604, |
|
"rewards/margins": 5.054558935807038, |
|
"rewards/rejected": -3.921590584475978, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.9453781512605042, |
|
"grad_norm": 64.21944849726562, |
|
"kl": 0.0, |
|
"learning_rate": 3.671779440125644e-09, |
|
"logps/chosen": -296.63870919585986, |
|
"logps/rejected": -292.5702645705521, |
|
"loss": 0.282, |
|
"rewards/chosen": 1.5888889580015924, |
|
"rewards/margins": 5.482801244116431, |
|
"rewards/rejected": -3.8939122861148387, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9506302521008403, |
|
"grad_norm": 120.91274575574961, |
|
"kl": 0.0, |
|
"learning_rate": 3.000964036942305e-09, |
|
"logps/chosen": -286.46690883757964, |
|
"logps/rejected": -307.89421970858893, |
|
"loss": 0.3187, |
|
"rewards/chosen": 0.656261176820014, |
|
"rewards/margins": 5.1038937616551365, |
|
"rewards/rejected": -4.447632584835123, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9558823529411765, |
|
"grad_norm": 36.13196162142592, |
|
"kl": 0.0, |
|
"learning_rate": 2.397392281198729e-09, |
|
"logps/chosen": -302.49788306451615, |
|
"logps/rejected": -277.52104640151515, |
|
"loss": 0.2937, |
|
"rewards/chosen": 0.9888374574722782, |
|
"rewards/margins": 5.233212605436293, |
|
"rewards/rejected": -4.244375147964015, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9611344537815126, |
|
"grad_norm": 28.996205179253028, |
|
"kl": 0.0, |
|
"learning_rate": 1.861228490808886e-09, |
|
"logps/chosen": -293.10886452414775, |
|
"logps/rejected": -297.74403211805554, |
|
"loss": 0.2997, |
|
"rewards/chosen": 0.9712606776844371, |
|
"rewards/margins": 5.202846710128014, |
|
"rewards/rejected": -4.231586032443577, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9663865546218487, |
|
"grad_norm": 45.07816045720644, |
|
"kl": 0.0, |
|
"learning_rate": 1.3926186323703903e-09, |
|
"logps/chosen": -278.5950362042683, |
|
"logps/rejected": -270.61345653044873, |
|
"loss": 0.3545, |
|
"rewards/chosen": 0.5840071236214986, |
|
"rewards/margins": 3.794906672274343, |
|
"rewards/rejected": -3.2108995486528444, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.9716386554621849, |
|
"grad_norm": 37.183516112349096, |
|
"kl": 0.0, |
|
"learning_rate": 9.916902814261774e-10, |
|
"logps/chosen": -289.34506048387095, |
|
"logps/rejected": -296.7537168560606, |
|
"loss": 0.2669, |
|
"rewards/chosen": 1.187039283014113, |
|
"rewards/margins": 5.528245041773583, |
|
"rewards/rejected": -4.34120575875947, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.976890756302521, |
|
"grad_norm": 99.37300393766564, |
|
"kl": 0.0, |
|
"learning_rate": 6.585525877328968e-10, |
|
"logps/chosen": -291.4291068412162, |
|
"logps/rejected": -333.1322901526163, |
|
"loss": 0.3206, |
|
"rewards/chosen": 0.7791987238703547, |
|
"rewards/margins": 4.7422238391274405, |
|
"rewards/rejected": -3.9630251152570857, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.9821428571428571, |
|
"grad_norm": 91.23926744066455, |
|
"kl": 0.0, |
|
"learning_rate": 3.9329624554584883e-10, |
|
"logps/chosen": -284.56466490963857, |
|
"logps/rejected": -287.7052049512987, |
|
"loss": 0.3167, |
|
"rewards/chosen": 0.6357683848185711, |
|
"rewards/margins": 4.381204223394058, |
|
"rewards/rejected": -3.745435838575487, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.9873949579831933, |
|
"grad_norm": 56.11511499932997, |
|
"kl": 0.0, |
|
"learning_rate": 1.959934689280962e-10, |
|
"logps/chosen": -295.48760695684524, |
|
"logps/rejected": -301.8758994654605, |
|
"loss": 0.2857, |
|
"rewards/chosen": 1.2652909415108817, |
|
"rewards/margins": 6.0682341209927895, |
|
"rewards/rejected": -4.8029431794819075, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.9926470588235294, |
|
"grad_norm": 71.18992657480682, |
|
"kl": 0.0, |
|
"learning_rate": 6.669797209069017e-11, |
|
"logps/chosen": -269.54678721910113, |
|
"logps/rejected": -283.6238996478873, |
|
"loss": 0.3009, |
|
"rewards/chosen": 0.8121567629696278, |
|
"rewards/margins": 5.090732821370816, |
|
"rewards/rejected": -4.278576058401188, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9978991596638656, |
|
"grad_norm": 25.085484740800712, |
|
"kl": 0.0, |
|
"learning_rate": 5.444954769395771e-12, |
|
"logps/chosen": -256.12128784937886, |
|
"logps/rejected": -292.2530709512579, |
|
"loss": 0.2815, |
|
"rewards/chosen": 1.2973005401421778, |
|
"rewards/margins": 5.830910950052752, |
|
"rewards/rejected": -4.533610409910574, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1904, |
|
"total_flos": 0.0, |
|
"train_loss": 0.321955375749023, |
|
"train_runtime": 31031.977, |
|
"train_samples_per_second": 1.963, |
|
"train_steps_per_second": 0.061 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1904, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|