|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.996784565916399, |
|
"eval_steps": 500, |
|
"global_step": 699, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 16.25, |
|
"learning_rate": 5.000000000000001e-07, |
|
"log_odds_chosen": 0.06385541707277298, |
|
"log_odds_ratio": -0.700367271900177, |
|
"logits/chosen": -2.185523748397827, |
|
"logits/rejected": -2.1816813945770264, |
|
"logps/chosen": -0.9445573687553406, |
|
"logps/rejected": -0.9744073748588562, |
|
"loss": 0.7979, |
|
"nll_loss": 0.7937018871307373, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 12.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"log_odds_chosen": 0.16918674111366272, |
|
"log_odds_ratio": -0.6699416041374207, |
|
"logits/chosen": -2.2384395599365234, |
|
"logits/rejected": -2.15749454498291, |
|
"logps/chosen": -0.7942744493484497, |
|
"logps/rejected": -0.9024287462234497, |
|
"loss": 0.7438, |
|
"nll_loss": 0.7179641723632812, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 6.71875, |
|
"learning_rate": 1.5e-06, |
|
"log_odds_chosen": 0.006562241818755865, |
|
"log_odds_ratio": -0.7472976446151733, |
|
"logits/chosen": -2.2658329010009766, |
|
"logits/rejected": -2.2255280017852783, |
|
"logps/chosen": -0.7462302446365356, |
|
"logps/rejected": -0.7592640519142151, |
|
"loss": 0.6946, |
|
"nll_loss": 0.709227442741394, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"log_odds_chosen": 0.130902498960495, |
|
"log_odds_ratio": -0.696516215801239, |
|
"logits/chosen": -2.31599497795105, |
|
"logits/rejected": -2.197169780731201, |
|
"logps/chosen": -0.6473785042762756, |
|
"logps/rejected": -0.7054767608642578, |
|
"loss": 0.6834, |
|
"nll_loss": 0.6458895802497864, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 2.5e-06, |
|
"log_odds_chosen": 0.10142650455236435, |
|
"log_odds_ratio": -0.7053729891777039, |
|
"logits/chosen": -2.2686123847961426, |
|
"logits/rejected": -2.19942307472229, |
|
"logps/chosen": -0.655845046043396, |
|
"logps/rejected": -0.7169128656387329, |
|
"loss": 0.6504, |
|
"nll_loss": 0.6311678290367126, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 3e-06, |
|
"log_odds_chosen": 0.04096098989248276, |
|
"log_odds_ratio": -0.7439945936203003, |
|
"logits/chosen": -2.2910470962524414, |
|
"logits/rejected": -2.2046058177948, |
|
"logps/chosen": -0.7029940485954285, |
|
"logps/rejected": -0.7319748997688293, |
|
"loss": 0.6785, |
|
"nll_loss": 0.6775273084640503, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 7.375, |
|
"learning_rate": 3.5e-06, |
|
"log_odds_chosen": 0.039232559502124786, |
|
"log_odds_ratio": -0.7308824062347412, |
|
"logits/chosen": -2.2575812339782715, |
|
"logits/rejected": -2.199249505996704, |
|
"logps/chosen": -0.6514252424240112, |
|
"logps/rejected": -0.6770638823509216, |
|
"loss": 0.6408, |
|
"nll_loss": 0.6384583711624146, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 4.000000000000001e-06, |
|
"log_odds_chosen": 0.065470851957798, |
|
"log_odds_ratio": -0.750912070274353, |
|
"logits/chosen": -2.23573637008667, |
|
"logits/rejected": -2.2267823219299316, |
|
"logps/chosen": -0.694416344165802, |
|
"logps/rejected": -0.7376698851585388, |
|
"loss": 0.6678, |
|
"nll_loss": 0.6970995664596558, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 7.4375, |
|
"learning_rate": 4.5e-06, |
|
"log_odds_chosen": 0.18219563364982605, |
|
"log_odds_ratio": -0.6834715604782104, |
|
"logits/chosen": -2.3221395015716553, |
|
"logits/rejected": -2.2302544116973877, |
|
"logps/chosen": -0.6259569525718689, |
|
"logps/rejected": -0.7380796074867249, |
|
"loss": 0.6328, |
|
"nll_loss": 0.6325567364692688, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 7.90625, |
|
"learning_rate": 5e-06, |
|
"log_odds_chosen": 0.015087798237800598, |
|
"log_odds_ratio": -0.7565385699272156, |
|
"logits/chosen": -2.30287504196167, |
|
"logits/rejected": -2.1991920471191406, |
|
"logps/chosen": -0.6201391816139221, |
|
"logps/rejected": -0.6416058540344238, |
|
"loss": 0.6096, |
|
"nll_loss": 0.613267719745636, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 7.0, |
|
"learning_rate": 4.996562390352354e-06, |
|
"log_odds_chosen": 0.13090373575687408, |
|
"log_odds_ratio": -0.7056041955947876, |
|
"logits/chosen": -2.3028149604797363, |
|
"logits/rejected": -2.220900058746338, |
|
"logps/chosen": -0.6311284899711609, |
|
"logps/rejected": -0.726052463054657, |
|
"loss": 0.6535, |
|
"nll_loss": 0.6418599486351013, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 5.75, |
|
"learning_rate": 4.986259015137485e-06, |
|
"log_odds_chosen": 0.18970146775245667, |
|
"log_odds_ratio": -0.6792970895767212, |
|
"logits/chosen": -2.2931487560272217, |
|
"logits/rejected": -2.1606650352478027, |
|
"logps/chosen": -0.6621894836425781, |
|
"logps/rejected": -0.7548196911811829, |
|
"loss": 0.6535, |
|
"nll_loss": 0.6817941069602966, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 6.75, |
|
"learning_rate": 4.96911820954103e-06, |
|
"log_odds_chosen": 0.10594276338815689, |
|
"log_odds_ratio": -0.7069065570831299, |
|
"logits/chosen": -2.1914877891540527, |
|
"logits/rejected": -2.101252794265747, |
|
"logps/chosen": -0.6385573148727417, |
|
"logps/rejected": -0.694845974445343, |
|
"loss": 0.6445, |
|
"nll_loss": 0.6093564033508301, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 7.03125, |
|
"learning_rate": 4.945187112281936e-06, |
|
"log_odds_chosen": 0.03052571788430214, |
|
"log_odds_ratio": -0.7518700361251831, |
|
"logits/chosen": -2.319819927215576, |
|
"logits/rejected": -2.2670364379882812, |
|
"logps/chosen": -0.656802237033844, |
|
"logps/rejected": -0.6807278394699097, |
|
"loss": 0.6661, |
|
"nll_loss": 0.6735944747924805, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 7.53125, |
|
"learning_rate": 4.9145315359768575e-06, |
|
"log_odds_chosen": 0.09797719866037369, |
|
"log_odds_ratio": -0.7208576798439026, |
|
"logits/chosen": -2.3044209480285645, |
|
"logits/rejected": -2.2165465354919434, |
|
"logps/chosen": -0.612718939781189, |
|
"logps/rejected": -0.6638216376304626, |
|
"loss": 0.6463, |
|
"nll_loss": 0.6212142705917358, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 4.877235786149681e-06, |
|
"log_odds_chosen": -0.003668625606223941, |
|
"log_odds_ratio": -0.7583411931991577, |
|
"logits/chosen": -2.2496469020843506, |
|
"logits/rejected": -2.2018706798553467, |
|
"logps/chosen": -0.5735569000244141, |
|
"logps/rejected": -0.5976427793502808, |
|
"loss": 0.6229, |
|
"nll_loss": 0.5609295964241028, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 7.15625, |
|
"learning_rate": 4.833402429383947e-06, |
|
"log_odds_chosen": 0.01744142733514309, |
|
"log_odds_ratio": -0.7390518188476562, |
|
"logits/chosen": -2.344698667526245, |
|
"logits/rejected": -2.255495548248291, |
|
"logps/chosen": -0.659144401550293, |
|
"logps/rejected": -0.6820610761642456, |
|
"loss": 0.6303, |
|
"nll_loss": 0.6458700299263, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 6.125, |
|
"learning_rate": 4.783152011255739e-06, |
|
"log_odds_chosen": 0.0607575885951519, |
|
"log_odds_ratio": -0.7323936223983765, |
|
"logits/chosen": -2.2320337295532227, |
|
"logits/rejected": -2.177126407623291, |
|
"logps/chosen": -0.6184755563735962, |
|
"logps/rejected": -0.6612327694892883, |
|
"loss": 0.6472, |
|
"nll_loss": 0.6245880126953125, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 4.726622724822781e-06, |
|
"log_odds_chosen": 0.07648645341396332, |
|
"log_odds_ratio": -0.709121584892273, |
|
"logits/chosen": -2.2593894004821777, |
|
"logits/rejected": -2.202803134918213, |
|
"logps/chosen": -0.6581476330757141, |
|
"logps/rejected": -0.7056922912597656, |
|
"loss": 0.6518, |
|
"nll_loss": 0.6567482352256775, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 5.84375, |
|
"learning_rate": 4.663970030581408e-06, |
|
"log_odds_chosen": 0.1157410591840744, |
|
"log_odds_ratio": -0.7236483097076416, |
|
"logits/chosen": -2.193201780319214, |
|
"logits/rejected": -2.150437831878662, |
|
"logps/chosen": -0.5841165781021118, |
|
"logps/rejected": -0.6441755890846252, |
|
"loss": 0.6273, |
|
"nll_loss": 0.5958451628684998, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 6.59375, |
|
"learning_rate": 4.59536622893656e-06, |
|
"log_odds_chosen": -0.02586597204208374, |
|
"log_odds_ratio": -0.7771649360656738, |
|
"logits/chosen": -2.2009682655334473, |
|
"logits/rejected": -2.112724542617798, |
|
"logps/chosen": -0.6882591247558594, |
|
"logps/rejected": -0.6998633146286011, |
|
"loss": 0.6506, |
|
"nll_loss": 0.6941450834274292, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 6.6875, |
|
"learning_rate": 4.520999986360555e-06, |
|
"log_odds_chosen": 0.06855098158121109, |
|
"log_odds_ratio": -0.7186604738235474, |
|
"logits/chosen": -2.171708583831787, |
|
"logits/rejected": -2.1308529376983643, |
|
"logps/chosen": -0.6504184603691101, |
|
"logps/rejected": -0.684329628944397, |
|
"loss": 0.6221, |
|
"nll_loss": 0.6537456512451172, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 4.441075816543745e-06, |
|
"log_odds_chosen": 0.022804971784353256, |
|
"log_odds_ratio": -0.741841733455658, |
|
"logits/chosen": -2.239063262939453, |
|
"logits/rejected": -2.1194045543670654, |
|
"logps/chosen": -0.6218129992485046, |
|
"logps/rejected": -0.6426461935043335, |
|
"loss": 0.6318, |
|
"nll_loss": 0.6370661854743958, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 7.0, |
|
"learning_rate": 4.355813517963924e-06, |
|
"log_odds_chosen": 0.44283628463745117, |
|
"log_odds_ratio": -0.5788813829421997, |
|
"logits/chosen": -2.2207190990448, |
|
"logits/rejected": -2.141242265701294, |
|
"logps/chosen": -0.47952336072921753, |
|
"logps/rejected": -0.6606315970420837, |
|
"loss": 0.524, |
|
"nll_loss": 0.4753130078315735, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 7.375, |
|
"learning_rate": 4.265447569421234e-06, |
|
"log_odds_chosen": 0.46678227186203003, |
|
"log_odds_ratio": -0.5757448077201843, |
|
"logits/chosen": -2.2942795753479004, |
|
"logits/rejected": -2.1979148387908936, |
|
"logps/chosen": -0.48276787996292114, |
|
"logps/rejected": -0.6674878597259521, |
|
"loss": 0.4488, |
|
"nll_loss": 0.49083882570266724, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 7.21875, |
|
"learning_rate": 4.170226485200899e-06, |
|
"log_odds_chosen": 0.5519483685493469, |
|
"log_odds_ratio": -0.5382982492446899, |
|
"logits/chosen": -2.2755680084228516, |
|
"logits/rejected": -2.1837079524993896, |
|
"logps/chosen": -0.45342904329299927, |
|
"logps/rejected": -0.6915146708488464, |
|
"loss": 0.4786, |
|
"nll_loss": 0.4660493731498718, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 7.34375, |
|
"learning_rate": 4.070412131637139e-06, |
|
"log_odds_chosen": 0.4370903968811035, |
|
"log_odds_ratio": -0.6020382046699524, |
|
"logits/chosen": -2.23408842086792, |
|
"logits/rejected": -2.1390938758850098, |
|
"logps/chosen": -0.4502868056297302, |
|
"logps/rejected": -0.621059238910675, |
|
"loss": 0.4861, |
|
"nll_loss": 0.4494762420654297, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 8.0, |
|
"learning_rate": 3.966279006957781e-06, |
|
"log_odds_chosen": 0.5599658489227295, |
|
"log_odds_ratio": -0.538549542427063, |
|
"logits/chosen": -2.212937831878662, |
|
"logits/rejected": -2.1164355278015137, |
|
"logps/chosen": -0.466763973236084, |
|
"logps/rejected": -0.7057895064353943, |
|
"loss": 0.4703, |
|
"nll_loss": 0.48790493607521057, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 7.375, |
|
"learning_rate": 3.858113486390056e-06, |
|
"log_odds_chosen": 0.5336993932723999, |
|
"log_odds_ratio": -0.5696849822998047, |
|
"logits/chosen": -2.247847318649292, |
|
"logits/rejected": -2.1648640632629395, |
|
"logps/chosen": -0.4514709413051605, |
|
"logps/rejected": -0.7045167684555054, |
|
"loss": 0.5033, |
|
"nll_loss": 0.4769435524940491, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 3.7462130346036e-06, |
|
"log_odds_chosen": 0.5668686032295227, |
|
"log_odds_ratio": -0.5324344635009766, |
|
"logits/chosen": -2.2085554599761963, |
|
"logits/rejected": -2.1306445598602295, |
|
"logps/chosen": -0.4667239189147949, |
|
"logps/rejected": -0.701043963432312, |
|
"loss": 0.4759, |
|
"nll_loss": 0.4993102550506592, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 6.96875, |
|
"learning_rate": 3.6308853876565232e-06, |
|
"log_odds_chosen": 0.5387176275253296, |
|
"log_odds_ratio": -0.5357738137245178, |
|
"logits/chosen": -2.256187677383423, |
|
"logits/rejected": -2.137636423110962, |
|
"logps/chosen": -0.4479009211063385, |
|
"logps/rejected": -0.6637527346611023, |
|
"loss": 0.4945, |
|
"nll_loss": 0.45828866958618164, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 6.375, |
|
"learning_rate": 3.512447706694254e-06, |
|
"log_odds_chosen": 0.4713989198207855, |
|
"log_odds_ratio": -0.5788164138793945, |
|
"logits/chosen": -2.2068490982055664, |
|
"logits/rejected": -2.1465065479278564, |
|
"logps/chosen": -0.46771669387817383, |
|
"logps/rejected": -0.670925498008728, |
|
"loss": 0.4675, |
|
"nll_loss": 0.4617518484592438, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 6.3125, |
|
"learning_rate": 3.3912257057285684e-06, |
|
"log_odds_chosen": 0.6038286089897156, |
|
"log_odds_ratio": -0.53058922290802, |
|
"logits/chosen": -2.2398104667663574, |
|
"logits/rejected": -2.1177425384521484, |
|
"logps/chosen": -0.42343878746032715, |
|
"logps/rejected": -0.6755813360214233, |
|
"loss": 0.4659, |
|
"nll_loss": 0.4260299801826477, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 3.2675527558954897e-06, |
|
"log_odds_chosen": 0.5158039331436157, |
|
"log_odds_ratio": -0.5490551590919495, |
|
"logits/chosen": -2.240586757659912, |
|
"logits/rejected": -2.1683266162872314, |
|
"logps/chosen": -0.47749781608581543, |
|
"logps/rejected": -0.6999717950820923, |
|
"loss": 0.4843, |
|
"nll_loss": 0.4936765134334564, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 7.96875, |
|
"learning_rate": 3.1417689686554144e-06, |
|
"log_odds_chosen": 0.5974913239479065, |
|
"log_odds_ratio": -0.5386354327201843, |
|
"logits/chosen": -2.2457404136657715, |
|
"logits/rejected": -2.2017741203308105, |
|
"logps/chosen": -0.4567716717720032, |
|
"logps/rejected": -0.7109787464141846, |
|
"loss": 0.4855, |
|
"nll_loss": 0.46884220838546753, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 6.34375, |
|
"learning_rate": 3.0142202604567724e-06, |
|
"log_odds_chosen": 0.5235527753829956, |
|
"log_odds_ratio": -0.5673588514328003, |
|
"logits/chosen": -2.3072521686553955, |
|
"logits/rejected": -2.2010462284088135, |
|
"logps/chosen": -0.45431017875671387, |
|
"logps/rejected": -0.669671893119812, |
|
"loss": 0.4738, |
|
"nll_loss": 0.4782930314540863, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 6.84375, |
|
"learning_rate": 2.8852574014354394e-06, |
|
"log_odds_chosen": 0.5463188886642456, |
|
"log_odds_ratio": -0.5436448454856873, |
|
"logits/chosen": -2.230321168899536, |
|
"logits/rejected": -2.136723279953003, |
|
"logps/chosen": -0.4296957552433014, |
|
"logps/rejected": -0.6486948132514954, |
|
"loss": 0.469, |
|
"nll_loss": 0.4498000741004944, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 7.03125, |
|
"learning_rate": 2.7552350507661063e-06, |
|
"log_odds_chosen": 0.3968818187713623, |
|
"log_odds_ratio": -0.5953518152236938, |
|
"logits/chosen": -2.2960264682769775, |
|
"logits/rejected": -2.1944198608398438, |
|
"logps/chosen": -0.48995494842529297, |
|
"logps/rejected": -0.639137327671051, |
|
"loss": 0.4771, |
|
"nll_loss": 0.49491143226623535, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 6.90625, |
|
"learning_rate": 2.6245107813184286e-06, |
|
"log_odds_chosen": 0.494235098361969, |
|
"log_odds_ratio": -0.5915595889091492, |
|
"logits/chosen": -2.2012336254119873, |
|
"logits/rejected": -2.144287586212158, |
|
"logps/chosen": -0.47316503524780273, |
|
"logps/rejected": -0.7013116478919983, |
|
"loss": 0.4697, |
|
"nll_loss": 0.5003570318222046, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 2.493444096300273e-06, |
|
"log_odds_chosen": 0.4220251142978668, |
|
"log_odds_ratio": -0.598805844783783, |
|
"logits/chosen": -2.243101119995117, |
|
"logits/rejected": -2.149256706237793, |
|
"logps/chosen": -0.47102293372154236, |
|
"logps/rejected": -0.6398745775222778, |
|
"loss": 0.5081, |
|
"nll_loss": 0.476499080657959, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 6.75, |
|
"learning_rate": 2.3623954405923636e-06, |
|
"log_odds_chosen": 0.4254421293735504, |
|
"log_odds_ratio": -0.5817403793334961, |
|
"logits/chosen": -2.227276086807251, |
|
"logits/rejected": -2.1629862785339355, |
|
"logps/chosen": -0.44677025079727173, |
|
"logps/rejected": -0.6224783658981323, |
|
"loss": 0.479, |
|
"nll_loss": 0.4805859923362732, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 6.375, |
|
"learning_rate": 2.2317252094932383e-06, |
|
"log_odds_chosen": 0.5560603737831116, |
|
"log_odds_ratio": -0.5513437390327454, |
|
"logits/chosen": -2.267571449279785, |
|
"logits/rejected": -2.182979106903076, |
|
"logps/chosen": -0.4530865252017975, |
|
"logps/rejected": -0.6832284331321716, |
|
"loss": 0.4704, |
|
"nll_loss": 0.4891841411590576, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 2.1017927576005657e-06, |
|
"log_odds_chosen": 0.5321310758590698, |
|
"log_odds_ratio": -0.5503188371658325, |
|
"logits/chosen": -2.2212295532226562, |
|
"logits/rejected": -2.1423323154449463, |
|
"logps/chosen": -0.44568586349487305, |
|
"logps/rejected": -0.6636167764663696, |
|
"loss": 0.4514, |
|
"nll_loss": 0.4601981043815613, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 1.9729554105544816e-06, |
|
"log_odds_chosen": 0.4405423104763031, |
|
"log_odds_ratio": -0.5903416872024536, |
|
"logits/chosen": -2.1597094535827637, |
|
"logits/rejected": -2.088732957839966, |
|
"logps/chosen": -0.48640793561935425, |
|
"logps/rejected": -0.6836115121841431, |
|
"loss": 0.5071, |
|
"nll_loss": 0.5123512148857117, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 6.59375, |
|
"learning_rate": 1.8455674823607312e-06, |
|
"log_odds_chosen": 0.3645675778388977, |
|
"log_odds_ratio": -0.6205390095710754, |
|
"logits/chosen": -2.248469829559326, |
|
"logits/rejected": -2.1717073917388916, |
|
"logps/chosen": -0.48888254165649414, |
|
"logps/rejected": -0.6395884156227112, |
|
"loss": 0.4756, |
|
"nll_loss": 0.5086942911148071, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 6.78125, |
|
"learning_rate": 1.7199793009960766e-06, |
|
"log_odds_chosen": 0.5230408310890198, |
|
"log_odds_ratio": -0.5633384585380554, |
|
"logits/chosen": -2.1712698936462402, |
|
"logits/rejected": -2.1275506019592285, |
|
"logps/chosen": -0.49090108275413513, |
|
"logps/rejected": -0.7094160914421082, |
|
"loss": 0.4833, |
|
"nll_loss": 0.5040683150291443, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 7.625, |
|
"learning_rate": 1.5965362449756317e-06, |
|
"log_odds_chosen": 0.6898726224899292, |
|
"log_odds_ratio": -0.4951675534248352, |
|
"logits/chosen": -2.271798610687256, |
|
"logits/rejected": -2.167448043823242, |
|
"logps/chosen": -0.4061620831489563, |
|
"logps/rejected": -0.6713688969612122, |
|
"loss": 0.4494, |
|
"nll_loss": 0.4319891333580017, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 8.0, |
|
"learning_rate": 1.4755777935316412e-06, |
|
"log_odds_chosen": 0.9442523121833801, |
|
"log_odds_ratio": -0.4286450445652008, |
|
"logits/chosen": -2.326359272003174, |
|
"logits/rejected": -2.1804490089416504, |
|
"logps/chosen": -0.3767296373844147, |
|
"logps/rejected": -0.7195509672164917, |
|
"loss": 0.3793, |
|
"nll_loss": 0.3976900279521942, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 6.3125, |
|
"learning_rate": 1.3574365930158272e-06, |
|
"log_odds_chosen": 0.9110834002494812, |
|
"log_odds_ratio": -0.4287477433681488, |
|
"logits/chosen": -2.2323246002197266, |
|
"logits/rejected": -2.1740031242370605, |
|
"logps/chosen": -0.34097957611083984, |
|
"logps/rejected": -0.6637391448020935, |
|
"loss": 0.3725, |
|
"nll_loss": 0.3685534596443176, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 6.5, |
|
"learning_rate": 1.242437542092731e-06, |
|
"log_odds_chosen": 1.0058963298797607, |
|
"log_odds_ratio": -0.3893057703971863, |
|
"logits/chosen": -2.2164254188537598, |
|
"logits/rejected": -2.159911870956421, |
|
"logps/chosen": -0.3380836546421051, |
|
"logps/rejected": -0.6879861950874329, |
|
"loss": 0.3836, |
|
"nll_loss": 0.3611399233341217, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 6.6875, |
|
"learning_rate": 1.1308968982398893e-06, |
|
"log_odds_chosen": 1.015363097190857, |
|
"log_odds_ratio": -0.3953540325164795, |
|
"logits/chosen": -2.239375591278076, |
|
"logits/rejected": -2.1401748657226562, |
|
"logps/chosen": -0.32685962319374084, |
|
"logps/rejected": -0.714621901512146, |
|
"loss": 0.3588, |
|
"nll_loss": 0.3464065492153168, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 7.375, |
|
"learning_rate": 1.0231214080120354e-06, |
|
"log_odds_chosen": 0.9830226898193359, |
|
"log_odds_ratio": -0.42418375611305237, |
|
"logits/chosen": -2.203508138656616, |
|
"logits/rejected": -2.155480146408081, |
|
"logps/chosen": -0.3562384247779846, |
|
"logps/rejected": -0.7052286267280579, |
|
"loss": 0.3706, |
|
"nll_loss": 0.37242117524147034, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 6.125, |
|
"learning_rate": 9.194074634611577e-07, |
|
"log_odds_chosen": 0.9658070802688599, |
|
"log_odds_ratio": -0.40443453192710876, |
|
"logits/chosen": -2.27958607673645, |
|
"logits/rejected": -2.1977391242980957, |
|
"logps/chosen": -0.34767287969589233, |
|
"logps/rejected": -0.6850040555000305, |
|
"loss": 0.3652, |
|
"nll_loss": 0.3679281175136566, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 6.5, |
|
"learning_rate": 8.200402870323634e-07, |
|
"log_odds_chosen": 1.0082114934921265, |
|
"log_odds_ratio": -0.4111156463623047, |
|
"logits/chosen": -2.2096288204193115, |
|
"logits/rejected": -2.115715503692627, |
|
"logps/chosen": -0.36051854491233826, |
|
"logps/rejected": -0.724047064781189, |
|
"loss": 0.3715, |
|
"nll_loss": 0.3693843483924866, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 6.59375, |
|
"learning_rate": 7.252931471771322e-07, |
|
"log_odds_chosen": 1.1489967107772827, |
|
"log_odds_ratio": -0.36786437034606934, |
|
"logits/chosen": -2.3240292072296143, |
|
"logits/rejected": -2.2296738624572754, |
|
"logps/chosen": -0.32746613025665283, |
|
"logps/rejected": -0.7155576944351196, |
|
"loss": 0.3592, |
|
"nll_loss": 0.33152374625205994, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 7.71875, |
|
"learning_rate": 6.354266068411078e-07, |
|
"log_odds_chosen": 0.9698125720024109, |
|
"log_odds_ratio": -0.4362240731716156, |
|
"logits/chosen": -2.169530153274536, |
|
"logits/rejected": -2.1334238052368164, |
|
"logps/chosen": -0.3798636496067047, |
|
"logps/rejected": -0.7286175489425659, |
|
"loss": 0.3918, |
|
"nll_loss": 0.40456095337867737, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 7.09375, |
|
"learning_rate": 5.50687806893139e-07, |
|
"log_odds_chosen": 1.0643595457077026, |
|
"log_odds_ratio": -0.39470165967941284, |
|
"logits/chosen": -2.20218825340271, |
|
"logits/rejected": -2.2017059326171875, |
|
"logps/chosen": -0.3234565854072571, |
|
"logps/rejected": -0.6679071187973022, |
|
"loss": 0.3741, |
|
"nll_loss": 0.3704506456851959, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 7.6875, |
|
"learning_rate": 4.7130978646620807e-07, |
|
"log_odds_chosen": 1.1568490266799927, |
|
"log_odds_ratio": -0.3843044340610504, |
|
"logits/chosen": -2.246670961380005, |
|
"logits/rejected": -2.117475748062134, |
|
"logps/chosen": -0.3415004014968872, |
|
"logps/rejected": -0.7403726577758789, |
|
"loss": 0.3564, |
|
"nll_loss": 0.39345091581344604, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 6.125, |
|
"learning_rate": 3.975108420793819e-07, |
|
"log_odds_chosen": 1.017449140548706, |
|
"log_odds_ratio": -0.4111986756324768, |
|
"logits/chosen": -2.182363271713257, |
|
"logits/rejected": -2.1441586017608643, |
|
"logps/chosen": -0.3217340111732483, |
|
"logps/rejected": -0.6855174899101257, |
|
"loss": 0.361, |
|
"nll_loss": 0.34829622507095337, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 3.294939273032272e-07, |
|
"log_odds_chosen": 1.0704463720321655, |
|
"log_odds_ratio": -0.376108318567276, |
|
"logits/chosen": -2.1767077445983887, |
|
"logits/rejected": -2.1506667137145996, |
|
"logps/chosen": -0.32282644510269165, |
|
"logps/rejected": -0.7082036733627319, |
|
"loss": 0.34, |
|
"nll_loss": 0.32364171743392944, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 6.5, |
|
"learning_rate": 2.6744609461969523e-07, |
|
"log_odds_chosen": 1.051203966140747, |
|
"log_odds_ratio": -0.39061832427978516, |
|
"logits/chosen": -2.290379047393799, |
|
"logits/rejected": -2.220198154449463, |
|
"logps/chosen": -0.32938411831855774, |
|
"logps/rejected": -0.7165583968162537, |
|
"loss": 0.3702, |
|
"nll_loss": 0.338884174823761, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 6.84375, |
|
"learning_rate": 2.1153798101138405e-07, |
|
"log_odds_chosen": 1.1411008834838867, |
|
"log_odds_ratio": -0.360759973526001, |
|
"logits/chosen": -2.221855878829956, |
|
"logits/rejected": -2.1710667610168457, |
|
"logps/chosen": -0.3297441303730011, |
|
"logps/rejected": -0.7233366370201111, |
|
"loss": 0.3585, |
|
"nll_loss": 0.35538965463638306, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 7.5625, |
|
"learning_rate": 1.61923338694871e-07, |
|
"log_odds_chosen": 1.0430134534835815, |
|
"log_odds_ratio": -0.4124155044555664, |
|
"logits/chosen": -2.2372210025787354, |
|
"logits/rejected": -2.169727087020874, |
|
"logps/chosen": -0.343710720539093, |
|
"logps/rejected": -0.740007758140564, |
|
"loss": 0.3551, |
|
"nll_loss": 0.35884636640548706, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 6.375, |
|
"learning_rate": 1.1873861228862998e-07, |
|
"log_odds_chosen": 1.1086117029190063, |
|
"log_odds_ratio": -0.3817860186100006, |
|
"logits/chosen": -2.2177319526672363, |
|
"logits/rejected": -2.130946397781372, |
|
"logps/chosen": -0.3493684232234955, |
|
"logps/rejected": -0.7561189532279968, |
|
"loss": 0.3626, |
|
"nll_loss": 0.37719854712486267, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 7.0, |
|
"learning_rate": 8.210256357836065e-08, |
|
"log_odds_chosen": 1.211117148399353, |
|
"log_odds_ratio": -0.34444934129714966, |
|
"logits/chosen": -2.181938886642456, |
|
"logits/rejected": -2.1168627738952637, |
|
"logps/chosen": -0.30442455410957336, |
|
"logps/rejected": -0.723738968372345, |
|
"loss": 0.3425, |
|
"nll_loss": 0.31909602880477905, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 7.25, |
|
"learning_rate": 5.21159449116615e-08, |
|
"log_odds_chosen": 0.889040470123291, |
|
"log_odds_ratio": -0.46176376938819885, |
|
"logits/chosen": -2.210618495941162, |
|
"logits/rejected": -2.131258726119995, |
|
"logps/chosen": -0.37134605646133423, |
|
"logps/rejected": -0.6827694773674011, |
|
"loss": 0.3686, |
|
"nll_loss": 0.37423044443130493, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 2.8861222120235845e-08, |
|
"log_odds_chosen": 1.068619966506958, |
|
"log_odds_ratio": -0.3856962025165558, |
|
"logits/chosen": -2.163769245147705, |
|
"logits/rejected": -2.134917736053467, |
|
"logps/chosen": -0.30325040221214294, |
|
"logps/rejected": -0.6523526906967163, |
|
"loss": 0.3648, |
|
"nll_loss": 0.336664617061615, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 7.46875, |
|
"learning_rate": 1.2402347731620412e-08, |
|
"log_odds_chosen": 0.9644078016281128, |
|
"log_odds_ratio": -0.40152567625045776, |
|
"logits/chosen": -2.2033181190490723, |
|
"logits/rejected": -2.091287136077881, |
|
"logps/chosen": -0.36845338344573975, |
|
"logps/rejected": -0.7090677618980408, |
|
"loss": 0.3648, |
|
"nll_loss": 0.38515713810920715, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 7.15625, |
|
"learning_rate": 2.7845850941254914e-09, |
|
"log_odds_chosen": 1.0706063508987427, |
|
"log_odds_ratio": -0.3853154182434082, |
|
"logits/chosen": -2.2872862815856934, |
|
"logits/rejected": -2.2065258026123047, |
|
"logps/chosen": -0.32825738191604614, |
|
"logps/rejected": -0.6639867424964905, |
|
"loss": 0.3568, |
|
"nll_loss": 0.35901403427124023, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 699, |
|
"total_flos": 0.0, |
|
"train_loss": 0.500827480283418, |
|
"train_runtime": 21486.5219, |
|
"train_samples_per_second": 2.084, |
|
"train_steps_per_second": 0.033 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 699, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|