|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 113.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": -2.7358343601226807, |
|
"logits/rejected": -2.7480404376983643, |
|
"logps/chosen": -27.35565757751465, |
|
"logps/rejected": -21.06114387512207, |
|
"loss": 0.5102, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 146.0, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": -3.0090532302856445, |
|
"logits/rejected": -2.998255729675293, |
|
"logps/chosen": -33.17539596557617, |
|
"logps/rejected": -31.967647552490234, |
|
"loss": 0.5411, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.0038858253974467516, |
|
"rewards/margins": 0.006502790376543999, |
|
"rewards/rejected": -0.0026169654447585344, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 144.0, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": -2.89949369430542, |
|
"logits/rejected": -2.8941283226013184, |
|
"logps/chosen": -32.47248077392578, |
|
"logps/rejected": -28.952869415283203, |
|
"loss": 0.6026, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0018292926251888275, |
|
"rewards/margins": -0.018470000475645065, |
|
"rewards/rejected": 0.02029929682612419, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 159.0, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -3.096463680267334, |
|
"logits/rejected": -3.1078667640686035, |
|
"logps/chosen": -32.81493377685547, |
|
"logps/rejected": -30.128747940063477, |
|
"loss": 0.5658, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.06562719494104385, |
|
"rewards/margins": 0.018515314906835556, |
|
"rewards/rejected": 0.047111887484788895, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 223.0, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": -2.8619472980499268, |
|
"logits/rejected": -2.8528523445129395, |
|
"logps/chosen": -31.569528579711914, |
|
"logps/rejected": -32.359840393066406, |
|
"loss": 0.8775, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.1825559139251709, |
|
"rewards/margins": 0.18422597646713257, |
|
"rewards/rejected": -0.0016700520645827055, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 156.0, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": -2.8859169483184814, |
|
"logits/rejected": -2.8837780952453613, |
|
"logps/chosen": -29.52107810974121, |
|
"logps/rejected": -30.1494083404541, |
|
"loss": 0.7059, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.1533588469028473, |
|
"rewards/margins": 0.1956842839717865, |
|
"rewards/rejected": -0.042325448244810104, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 150.0, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": -2.9192371368408203, |
|
"logits/rejected": -2.9210586547851562, |
|
"logps/chosen": -30.085376739501953, |
|
"logps/rejected": -28.13214111328125, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.031074851751327515, |
|
"rewards/margins": 0.1545926332473755, |
|
"rewards/rejected": -0.12351777404546738, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 972.0, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": -3.001981496810913, |
|
"logits/rejected": -3.009333610534668, |
|
"logps/chosen": -29.32364273071289, |
|
"logps/rejected": -31.037628173828125, |
|
"loss": 1.1987, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.029870102182030678, |
|
"rewards/margins": 0.07220745086669922, |
|
"rewards/rejected": -0.10207755863666534, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 192.0, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": -2.816981792449951, |
|
"logits/rejected": -2.8327014446258545, |
|
"logps/chosen": -29.412933349609375, |
|
"logps/rejected": -29.788555145263672, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.08945836126804352, |
|
"rewards/margins": 0.16944995522499084, |
|
"rewards/rejected": -0.07999160140752792, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 362.0, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": -2.9031848907470703, |
|
"logits/rejected": -2.8844356536865234, |
|
"logps/chosen": -32.63589859008789, |
|
"logps/rejected": -30.085968017578125, |
|
"loss": 4.9199, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.14037345349788666, |
|
"rewards/margins": 0.18539710342884064, |
|
"rewards/rejected": -0.04502364248037338, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 157.0, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": -3.0054373741149902, |
|
"logits/rejected": -3.006031036376953, |
|
"logps/chosen": -31.7724552154541, |
|
"logps/rejected": -30.76922035217285, |
|
"loss": 1.1043, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.19909459352493286, |
|
"rewards/margins": 0.16622690856456757, |
|
"rewards/rejected": 0.03286769241094589, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.815906524658203, |
|
"eval_logits/rejected": -2.813185930252075, |
|
"eval_logps/chosen": -31.25544548034668, |
|
"eval_logps/rejected": -34.70828628540039, |
|
"eval_loss": 0.6260043978691101, |
|
"eval_rewards/accuracies": 0.5369601845741272, |
|
"eval_rewards/chosen": 0.018902836367487907, |
|
"eval_rewards/margins": 0.02599383145570755, |
|
"eval_rewards/rejected": -0.00709099555388093, |
|
"eval_runtime": 113.2921, |
|
"eval_samples_per_second": 3.028, |
|
"eval_steps_per_second": 0.38, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 157.0, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": -2.9622676372528076, |
|
"logits/rejected": -2.937613010406494, |
|
"logps/chosen": -31.749774932861328, |
|
"logps/rejected": -31.12813949584961, |
|
"loss": 1.4773, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.3269161581993103, |
|
"rewards/margins": 0.30541908740997314, |
|
"rewards/rejected": 0.02149704284965992, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 109.0, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": -3.043015718460083, |
|
"logits/rejected": -3.0729401111602783, |
|
"logps/chosen": -28.837779998779297, |
|
"logps/rejected": -34.10157012939453, |
|
"loss": 1.2172, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.22828666865825653, |
|
"rewards/margins": 0.19610069692134857, |
|
"rewards/rejected": 0.032185956835746765, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 95.0, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": -2.7426066398620605, |
|
"logits/rejected": -2.738373279571533, |
|
"logps/chosen": -28.734365463256836, |
|
"logps/rejected": -30.13399887084961, |
|
"loss": 0.9835, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.16822054982185364, |
|
"rewards/margins": 0.16835634410381317, |
|
"rewards/rejected": -0.00013580024824477732, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 192.0, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": -3.015483856201172, |
|
"logits/rejected": -3.012770652770996, |
|
"logps/chosen": -27.254077911376953, |
|
"logps/rejected": -31.771997451782227, |
|
"loss": 0.9001, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.1625039130449295, |
|
"rewards/margins": 0.2431638240814209, |
|
"rewards/rejected": -0.0806598886847496, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 147.0, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": -2.8096275329589844, |
|
"logits/rejected": -2.804391860961914, |
|
"logps/chosen": -27.50994300842285, |
|
"logps/rejected": -31.280899047851562, |
|
"loss": 0.7365, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.22089700400829315, |
|
"rewards/margins": 0.26097819209098816, |
|
"rewards/rejected": -0.04008117690682411, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 122.5, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": -3.127908229827881, |
|
"logits/rejected": -3.1092982292175293, |
|
"logps/chosen": -31.936166763305664, |
|
"logps/rejected": -29.042150497436523, |
|
"loss": 1.5699, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.38888686895370483, |
|
"rewards/margins": 0.43254947662353516, |
|
"rewards/rejected": -0.043662626296281815, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 116.0, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": -2.9442355632781982, |
|
"logits/rejected": -2.9510912895202637, |
|
"logps/chosen": -29.370159149169922, |
|
"logps/rejected": -31.104833602905273, |
|
"loss": 1.1729, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.367119163274765, |
|
"rewards/margins": 0.37086552381515503, |
|
"rewards/rejected": -0.003746363567188382, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 206.0, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": -2.7904200553894043, |
|
"logits/rejected": -2.7880074977874756, |
|
"logps/chosen": -29.214679718017578, |
|
"logps/rejected": -29.80947494506836, |
|
"loss": 0.8255, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.3186172544956207, |
|
"rewards/margins": 0.3475292921066284, |
|
"rewards/rejected": -0.028912032023072243, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 264.0, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": -2.9064321517944336, |
|
"logits/rejected": -2.9026076793670654, |
|
"logps/chosen": -29.777074813842773, |
|
"logps/rejected": -28.358760833740234, |
|
"loss": 1.8338, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2867472767829895, |
|
"rewards/margins": 0.28760695457458496, |
|
"rewards/rejected": -0.0008596793049946427, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 344.0, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": -2.9745757579803467, |
|
"logits/rejected": -2.9624178409576416, |
|
"logps/chosen": -33.12746810913086, |
|
"logps/rejected": -30.224645614624023, |
|
"loss": 3.0672, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.4000841975212097, |
|
"rewards/margins": 0.25347504019737244, |
|
"rewards/rejected": 0.14660920202732086, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.8127942085266113, |
|
"eval_logits/rejected": -2.8107194900512695, |
|
"eval_logps/chosen": -31.217693328857422, |
|
"eval_logps/rejected": -34.68858337402344, |
|
"eval_loss": 0.6355926990509033, |
|
"eval_rewards/accuracies": 0.5627076625823975, |
|
"eval_rewards/chosen": 0.04533065855503082, |
|
"eval_rewards/margins": 0.03862937539815903, |
|
"eval_rewards/rejected": 0.006701283622533083, |
|
"eval_runtime": 112.8257, |
|
"eval_samples_per_second": 3.04, |
|
"eval_steps_per_second": 0.381, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 272.0, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": -2.909015655517578, |
|
"logits/rejected": -2.909053325653076, |
|
"logps/chosen": -32.618431091308594, |
|
"logps/rejected": -33.84246063232422, |
|
"loss": 1.7638, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.2904837131500244, |
|
"rewards/margins": 0.2771795690059662, |
|
"rewards/rejected": 0.013304118998348713, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 195.0, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": -2.8854432106018066, |
|
"logits/rejected": -2.901151657104492, |
|
"logps/chosen": -29.67086410522461, |
|
"logps/rejected": -28.4765567779541, |
|
"loss": 1.6395, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.3550260066986084, |
|
"rewards/margins": 0.3188283145427704, |
|
"rewards/rejected": 0.03619767725467682, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 102.0, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": -2.9346377849578857, |
|
"logits/rejected": -2.9390294551849365, |
|
"logps/chosen": -30.749608993530273, |
|
"logps/rejected": -31.755859375, |
|
"loss": 0.8936, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.28158265352249146, |
|
"rewards/margins": 0.279979407787323, |
|
"rewards/rejected": 0.0016032479470595717, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 77.5, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": -2.983455181121826, |
|
"logits/rejected": -2.9928596019744873, |
|
"logps/chosen": -30.32903480529785, |
|
"logps/rejected": -30.288427352905273, |
|
"loss": 0.9824, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.4031239151954651, |
|
"rewards/margins": 0.4403551518917084, |
|
"rewards/rejected": -0.03723124787211418, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 282.0, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": -2.817836046218872, |
|
"logits/rejected": -2.808262348175049, |
|
"logps/chosen": -26.585535049438477, |
|
"logps/rejected": -29.320880889892578, |
|
"loss": 1.3431, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.25807633996009827, |
|
"rewards/margins": 0.11054243892431259, |
|
"rewards/rejected": 0.14753387868404388, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 89.0, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": -2.8003945350646973, |
|
"logits/rejected": -2.8208835124969482, |
|
"logps/chosen": -29.268306732177734, |
|
"logps/rejected": -34.10762405395508, |
|
"loss": 1.0424, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.39315709471702576, |
|
"rewards/margins": 0.34663277864456177, |
|
"rewards/rejected": 0.04652435705065727, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 54.25, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": -2.9433441162109375, |
|
"logits/rejected": -2.9482321739196777, |
|
"logps/chosen": -30.294042587280273, |
|
"logps/rejected": -29.78550148010254, |
|
"loss": 1.0077, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.341118723154068, |
|
"rewards/margins": 0.2852852940559387, |
|
"rewards/rejected": 0.05583342909812927, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 78.0, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": -2.9577131271362305, |
|
"logits/rejected": -2.944226026535034, |
|
"logps/chosen": -30.35019874572754, |
|
"logps/rejected": -28.367935180664062, |
|
"loss": 1.9189, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.2600388824939728, |
|
"rewards/margins": 0.0756484642624855, |
|
"rewards/rejected": 0.18439041078090668, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 227.0, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": -2.885504722595215, |
|
"logits/rejected": -2.8676083087921143, |
|
"logps/chosen": -32.159873962402344, |
|
"logps/rejected": -30.42794418334961, |
|
"loss": 1.5952, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.33349719643592834, |
|
"rewards/margins": 0.26674339175224304, |
|
"rewards/rejected": 0.06675383448600769, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 340.0, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": -2.889909267425537, |
|
"logits/rejected": -2.8869872093200684, |
|
"logps/chosen": -31.75823402404785, |
|
"logps/rejected": -27.508203506469727, |
|
"loss": 1.2353, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.32509008049964905, |
|
"rewards/margins": 0.2583235204219818, |
|
"rewards/rejected": 0.06676653772592545, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.809739589691162, |
|
"eval_logits/rejected": -2.807137966156006, |
|
"eval_logps/chosen": -31.20755958557129, |
|
"eval_logps/rejected": -34.65956115722656, |
|
"eval_loss": 0.6301615834236145, |
|
"eval_rewards/accuracies": 0.531146228313446, |
|
"eval_rewards/chosen": 0.052424702793359756, |
|
"eval_rewards/margins": 0.025408506393432617, |
|
"eval_rewards/rejected": 0.02701619826257229, |
|
"eval_runtime": 113.0311, |
|
"eval_samples_per_second": 3.035, |
|
"eval_steps_per_second": 0.38, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 251.0, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": -2.7691893577575684, |
|
"logits/rejected": -2.787349224090576, |
|
"logps/chosen": -28.8099422454834, |
|
"logps/rejected": -30.58675765991211, |
|
"loss": 1.3555, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.2936348617076874, |
|
"rewards/margins": 0.19195988774299622, |
|
"rewards/rejected": 0.10167495906352997, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 93.5, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": -3.0164008140563965, |
|
"logits/rejected": -3.0019757747650146, |
|
"logps/chosen": -29.883535385131836, |
|
"logps/rejected": -28.36444664001465, |
|
"loss": 0.9933, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.22963504493236542, |
|
"rewards/margins": 0.26160377264022827, |
|
"rewards/rejected": -0.03196870535612106, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 119.5, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": -2.931257724761963, |
|
"logits/rejected": -2.9136319160461426, |
|
"logps/chosen": -27.954730987548828, |
|
"logps/rejected": -30.42913246154785, |
|
"loss": 0.922, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.285138338804245, |
|
"rewards/margins": 0.3113783299922943, |
|
"rewards/rejected": -0.026239976286888123, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 131.0, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": -3.146080493927002, |
|
"logits/rejected": -3.152937650680542, |
|
"logps/chosen": -31.1461181640625, |
|
"logps/rejected": -32.72796630859375, |
|
"loss": 0.797, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.3451611399650574, |
|
"rewards/margins": 0.3675331473350525, |
|
"rewards/rejected": -0.022372011095285416, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 117.0, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": -3.0231246948242188, |
|
"logits/rejected": -3.0266261100769043, |
|
"logps/chosen": -30.33087158203125, |
|
"logps/rejected": -31.515050888061523, |
|
"loss": 0.8954, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.33320528268814087, |
|
"rewards/margins": 0.334422767162323, |
|
"rewards/rejected": -0.001217484474182129, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 223.0, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": -2.86474871635437, |
|
"logits/rejected": -2.866323947906494, |
|
"logps/chosen": -27.935626983642578, |
|
"logps/rejected": -29.81571388244629, |
|
"loss": 0.7841, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.49987268447875977, |
|
"rewards/margins": 0.4892123341560364, |
|
"rewards/rejected": 0.010660367086529732, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 127.5, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": -2.9371769428253174, |
|
"logits/rejected": -2.936225414276123, |
|
"logps/chosen": -30.32781410217285, |
|
"logps/rejected": -31.941625595092773, |
|
"loss": 0.7687, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.3211846947669983, |
|
"rewards/margins": 0.3306921422481537, |
|
"rewards/rejected": -0.00950746051967144, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 484.0, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": -2.8952066898345947, |
|
"logits/rejected": -2.8765997886657715, |
|
"logps/chosen": -28.4578857421875, |
|
"logps/rejected": -27.789112091064453, |
|
"loss": 0.8783, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.3410065770149231, |
|
"rewards/margins": 0.2706519067287445, |
|
"rewards/rejected": 0.07035474479198456, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 1.2163990609057538, |
|
"train_runtime": 2720.9807, |
|
"train_samples_per_second": 1.132, |
|
"train_steps_per_second": 0.141 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|