|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.99582225598177, |
|
"eval_steps": 800, |
|
"global_step": 1479, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002025572857323712, |
|
"grad_norm": 31.013870239257812, |
|
"learning_rate": 6.756756756756757e-10, |
|
"logits/chosen": -2.5177597999572754, |
|
"logits/rejected": -2.4276583194732666, |
|
"logps/chosen": -79.6932373046875, |
|
"logps/rejected": -86.58649444580078, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.03125, |
|
"rewards/chosen": -0.0008372783777303994, |
|
"rewards/margins": 0.00045527220936492085, |
|
"rewards/rejected": -0.0012925505870953202, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.020255728573237118, |
|
"grad_norm": 28.71278190612793, |
|
"learning_rate": 6.756756756756757e-09, |
|
"logits/chosen": -2.587923526763916, |
|
"logits/rejected": -2.421647787094116, |
|
"logps/chosen": -72.02790069580078, |
|
"logps/rejected": -68.7666015625, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4479166567325592, |
|
"rewards/chosen": -0.000452535372460261, |
|
"rewards/margins": 0.0008923111017793417, |
|
"rewards/rejected": -0.001344846561551094, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.040511457146474236, |
|
"grad_norm": 24.486814498901367, |
|
"learning_rate": 1.3513513513513514e-08, |
|
"logits/chosen": -2.5588934421539307, |
|
"logits/rejected": -2.3707621097564697, |
|
"logps/chosen": -77.4730453491211, |
|
"logps/rejected": -71.17650604248047, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.49687498807907104, |
|
"rewards/chosen": 0.0015422820579260588, |
|
"rewards/margins": 0.00048240157775580883, |
|
"rewards/rejected": 0.0010598807130008936, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.060767185719711354, |
|
"grad_norm": 26.952857971191406, |
|
"learning_rate": 2.027027027027027e-08, |
|
"logits/chosen": -2.5552210807800293, |
|
"logits/rejected": -2.3964200019836426, |
|
"logps/chosen": -75.58769226074219, |
|
"logps/rejected": -74.38423156738281, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.00154799222946167, |
|
"rewards/margins": -0.0016709610354155302, |
|
"rewards/rejected": 0.0001229687622981146, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08102291429294847, |
|
"grad_norm": 30.491893768310547, |
|
"learning_rate": 2.7027027027027028e-08, |
|
"logits/chosen": -2.538985013961792, |
|
"logits/rejected": -2.3956074714660645, |
|
"logps/chosen": -84.64269256591797, |
|
"logps/rejected": -82.15937042236328, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0005608886131085455, |
|
"rewards/margins": 0.0012397856917232275, |
|
"rewards/rejected": -0.000678897020407021, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1012786428661856, |
|
"grad_norm": 27.152774810791016, |
|
"learning_rate": 3.378378378378378e-08, |
|
"logits/chosen": -2.515413522720337, |
|
"logits/rejected": -2.358457565307617, |
|
"logps/chosen": -81.1507568359375, |
|
"logps/rejected": -78.68826293945312, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0013986782869324088, |
|
"rewards/margins": 0.002325823763385415, |
|
"rewards/rejected": -0.0037245028652250767, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12153437143942271, |
|
"grad_norm": 27.534177780151367, |
|
"learning_rate": 4.054054054054054e-08, |
|
"logits/chosen": -2.520850419998169, |
|
"logits/rejected": -2.3658010959625244, |
|
"logps/chosen": -78.13814544677734, |
|
"logps/rejected": -75.04551696777344, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.0025924122892320156, |
|
"rewards/margins": -0.0010297519620507956, |
|
"rewards/rejected": -0.00156266032718122, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14179010001265982, |
|
"grad_norm": 30.60668182373047, |
|
"learning_rate": 4.72972972972973e-08, |
|
"logits/chosen": -2.5382590293884277, |
|
"logits/rejected": -2.37661075592041, |
|
"logps/chosen": -83.97273254394531, |
|
"logps/rejected": -80.8182373046875, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.003294873284175992, |
|
"rewards/margins": 0.0013795426348224282, |
|
"rewards/rejected": -0.004674416035413742, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16204582858589694, |
|
"grad_norm": 27.501964569091797, |
|
"learning_rate": 5.4054054054054056e-08, |
|
"logits/chosen": -2.4654035568237305, |
|
"logits/rejected": -2.3297314643859863, |
|
"logps/chosen": -75.83648681640625, |
|
"logps/rejected": -76.66287994384766, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.0013893753057345748, |
|
"rewards/margins": 0.006102095358073711, |
|
"rewards/rejected": -0.007491470314562321, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18230155715913407, |
|
"grad_norm": 24.944175720214844, |
|
"learning_rate": 6.081081081081081e-08, |
|
"logits/chosen": -2.5287299156188965, |
|
"logits/rejected": -2.379664659500122, |
|
"logps/chosen": -86.45475769042969, |
|
"logps/rejected": -79.61102294921875, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.0014329934492707253, |
|
"rewards/margins": 0.005873243790119886, |
|
"rewards/rejected": -0.007306237705051899, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2025572857323712, |
|
"grad_norm": 29.129093170166016, |
|
"learning_rate": 6.756756756756756e-08, |
|
"logits/chosen": -2.5153121948242188, |
|
"logits/rejected": -2.361551523208618, |
|
"logps/chosen": -84.0345230102539, |
|
"logps/rejected": -78.61013793945312, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0028929836116731167, |
|
"rewards/margins": 0.010393200442194939, |
|
"rewards/rejected": -0.013286183588206768, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2228130143056083, |
|
"grad_norm": 28.691940307617188, |
|
"learning_rate": 7.432432432432432e-08, |
|
"logits/chosen": -2.546154260635376, |
|
"logits/rejected": -2.389882802963257, |
|
"logps/chosen": -74.24641418457031, |
|
"logps/rejected": -72.99244689941406, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.003837780561298132, |
|
"rewards/margins": 0.012213540263473988, |
|
"rewards/rejected": -0.016051320359110832, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24306874287884542, |
|
"grad_norm": 26.047407150268555, |
|
"learning_rate": 8.108108108108108e-08, |
|
"logits/chosen": -2.530447006225586, |
|
"logits/rejected": -2.3604226112365723, |
|
"logps/chosen": -79.45042419433594, |
|
"logps/rejected": -75.46896362304688, |
|
"loss": 0.6834, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.004467087332159281, |
|
"rewards/margins": 0.020118705928325653, |
|
"rewards/rejected": -0.024585790932178497, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26332447145208254, |
|
"grad_norm": 30.345191955566406, |
|
"learning_rate": 8.783783783783784e-08, |
|
"logits/chosen": -2.4959208965301514, |
|
"logits/rejected": -2.344454526901245, |
|
"logps/chosen": -86.02290344238281, |
|
"logps/rejected": -81.23602294921875, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.006714115384966135, |
|
"rewards/margins": 0.02207200787961483, |
|
"rewards/rejected": -0.028786126524209976, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28358020002531964, |
|
"grad_norm": 23.314868927001953, |
|
"learning_rate": 9.45945945945946e-08, |
|
"logits/chosen": -2.5607352256774902, |
|
"logits/rejected": -2.394366502761841, |
|
"logps/chosen": -72.60206604003906, |
|
"logps/rejected": -67.85148620605469, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.008559314534068108, |
|
"rewards/margins": 0.027249369770288467, |
|
"rewards/rejected": -0.035808682441711426, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3038359285985568, |
|
"grad_norm": 27.51814079284668, |
|
"learning_rate": 9.999944288759615e-08, |
|
"logits/chosen": -2.5597286224365234, |
|
"logits/rejected": -2.4156954288482666, |
|
"logps/chosen": -74.42972564697266, |
|
"logps/rejected": -70.92676544189453, |
|
"loss": 0.6768, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.00905610155314207, |
|
"rewards/margins": 0.03394917771220207, |
|
"rewards/rejected": -0.043005283921957016, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3240916571717939, |
|
"grad_norm": 30.097389221191406, |
|
"learning_rate": 9.99799452570021e-08, |
|
"logits/chosen": -2.508636236190796, |
|
"logits/rejected": -2.3848562240600586, |
|
"logps/chosen": -77.739013671875, |
|
"logps/rejected": -73.92839813232422, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.017720907926559448, |
|
"rewards/margins": 0.037171002477407455, |
|
"rewards/rejected": -0.054891906678676605, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34434738574503104, |
|
"grad_norm": 32.0848503112793, |
|
"learning_rate": 9.993260441994115e-08, |
|
"logits/chosen": -2.5097594261169434, |
|
"logits/rejected": -2.3447771072387695, |
|
"logps/chosen": -81.94526672363281, |
|
"logps/rejected": -78.39651489257812, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.013811466284096241, |
|
"rewards/margins": 0.06231771036982536, |
|
"rewards/rejected": -0.07612917572259903, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36460311431826814, |
|
"grad_norm": 29.288728713989258, |
|
"learning_rate": 9.985744674940535e-08, |
|
"logits/chosen": -2.5364279747009277, |
|
"logits/rejected": -2.354965925216675, |
|
"logps/chosen": -80.46150207519531, |
|
"logps/rejected": -75.10428619384766, |
|
"loss": 0.6577, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.0209208894520998, |
|
"rewards/margins": 0.07542888820171356, |
|
"rewards/rejected": -0.09634977579116821, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.38485884289150524, |
|
"grad_norm": 27.00816535949707, |
|
"learning_rate": 9.975451411479911e-08, |
|
"logits/chosen": -2.499474048614502, |
|
"logits/rejected": -2.337350606918335, |
|
"logps/chosen": -78.61238098144531, |
|
"logps/rejected": -78.64549255371094, |
|
"loss": 0.657, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.02487110160291195, |
|
"rewards/margins": 0.07822562754154205, |
|
"rewards/rejected": -0.10309673845767975, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4051145714647424, |
|
"grad_norm": 27.34921646118164, |
|
"learning_rate": 9.962386385861412e-08, |
|
"logits/chosen": -2.50087308883667, |
|
"logits/rejected": -2.360152006149292, |
|
"logps/chosen": -76.67208862304688, |
|
"logps/rejected": -78.57847595214844, |
|
"loss": 0.652, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.038105227053165436, |
|
"rewards/margins": 0.08875634521245956, |
|
"rewards/rejected": -0.126861572265625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4253703000379795, |
|
"grad_norm": 25.707185745239258, |
|
"learning_rate": 9.946556876448468e-08, |
|
"logits/chosen": -2.4654879570007324, |
|
"logits/rejected": -2.312530994415283, |
|
"logps/chosen": -78.15449523925781, |
|
"logps/rejected": -77.42134857177734, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.03906805440783501, |
|
"rewards/margins": 0.10793592780828476, |
|
"rewards/rejected": -0.14700399339199066, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4456260286112166, |
|
"grad_norm": 27.729816436767578, |
|
"learning_rate": 9.927971701664084e-08, |
|
"logits/chosen": -2.4674429893493652, |
|
"logits/rejected": -2.3009190559387207, |
|
"logps/chosen": -75.07694244384766, |
|
"logps/rejected": -75.41253662109375, |
|
"loss": 0.6417, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.0388757549226284, |
|
"rewards/margins": 0.1117323786020279, |
|
"rewards/rejected": -0.1506081372499466, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.46588175718445374, |
|
"grad_norm": 24.861696243286133, |
|
"learning_rate": 9.906641215078196e-08, |
|
"logits/chosen": -2.462665557861328, |
|
"logits/rejected": -2.309985876083374, |
|
"logps/chosen": -77.72923278808594, |
|
"logps/rejected": -75.81291961669922, |
|
"loss": 0.6384, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.052964676171541214, |
|
"rewards/margins": 0.12125100940465927, |
|
"rewards/rejected": -0.1742156744003296, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.48613748575769083, |
|
"grad_norm": 30.613037109375, |
|
"learning_rate": 9.882577299639835e-08, |
|
"logits/chosen": -2.4711391925811768, |
|
"logits/rejected": -2.329216957092285, |
|
"logps/chosen": -80.07206726074219, |
|
"logps/rejected": -80.30625915527344, |
|
"loss": 0.6333, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.0626656636595726, |
|
"rewards/margins": 0.13491353392601013, |
|
"rewards/rejected": -0.19757920503616333, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.506393214330928, |
|
"grad_norm": 26.08587074279785, |
|
"learning_rate": 9.85579336105728e-08, |
|
"logits/chosen": -2.443732738494873, |
|
"logits/rejected": -2.2919225692749023, |
|
"logps/chosen": -81.61358642578125, |
|
"logps/rejected": -77.46446990966797, |
|
"loss": 0.6342, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.05705754831433296, |
|
"rewards/margins": 0.13192656636238098, |
|
"rewards/rejected": -0.18898411095142365, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5266489429041651, |
|
"grad_norm": 24.169845581054688, |
|
"learning_rate": 9.826304320329907e-08, |
|
"logits/chosen": -2.478874683380127, |
|
"logits/rejected": -2.297999382019043, |
|
"logps/chosen": -83.48451232910156, |
|
"logps/rejected": -78.53952026367188, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.07330447435379028, |
|
"rewards/margins": 0.16096489131450653, |
|
"rewards/rejected": -0.23426935076713562, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5469046714774022, |
|
"grad_norm": 26.509349822998047, |
|
"learning_rate": 9.794126605435884e-08, |
|
"logits/chosen": -2.452291488647461, |
|
"logits/rejected": -2.2766990661621094, |
|
"logps/chosen": -83.7742691040039, |
|
"logps/rejected": -80.86663818359375, |
|
"loss": 0.6085, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.08826017379760742, |
|
"rewards/margins": 0.20091946423053741, |
|
"rewards/rejected": -0.28917962312698364, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5671604000506393, |
|
"grad_norm": 23.097434997558594, |
|
"learning_rate": 9.759278142180347e-08, |
|
"logits/chosen": -2.4537911415100098, |
|
"logits/rejected": -2.291194438934326, |
|
"logps/chosen": -77.87368774414062, |
|
"logps/rejected": -77.5306625366211, |
|
"loss": 0.6047, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.08215166628360748, |
|
"rewards/margins": 0.20681920647621155, |
|
"rewards/rejected": -0.28897085785865784, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5874161286238765, |
|
"grad_norm": 25.596263885498047, |
|
"learning_rate": 9.72177834420916e-08, |
|
"logits/chosen": -2.4155325889587402, |
|
"logits/rejected": -2.2689411640167236, |
|
"logps/chosen": -84.03662109375, |
|
"logps/rejected": -83.03952026367188, |
|
"loss": 0.6053, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12551462650299072, |
|
"rewards/margins": 0.20985326170921326, |
|
"rewards/rejected": -0.33536791801452637, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6076718571971136, |
|
"grad_norm": 32.68680191040039, |
|
"learning_rate": 9.68164810219381e-08, |
|
"logits/chosen": -2.4283127784729004, |
|
"logits/rejected": -2.3022093772888184, |
|
"logps/chosen": -74.84422302246094, |
|
"logps/rejected": -76.46062469482422, |
|
"loss": 0.6136, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.13987191021442413, |
|
"rewards/margins": 0.19149479269981384, |
|
"rewards/rejected": -0.33136671781539917, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6279275857703507, |
|
"grad_norm": 26.36361312866211, |
|
"learning_rate": 9.638909772193478e-08, |
|
"logits/chosen": -2.3842902183532715, |
|
"logits/rejected": -2.242034435272217, |
|
"logps/chosen": -85.19241333007812, |
|
"logps/rejected": -81.81166076660156, |
|
"loss": 0.6018, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.12717826664447784, |
|
"rewards/margins": 0.21827277541160583, |
|
"rewards/rejected": -0.3454510569572449, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6481833143435878, |
|
"grad_norm": 26.719867706298828, |
|
"learning_rate": 9.593587163200753e-08, |
|
"logits/chosen": -2.4053542613983154, |
|
"logits/rejected": -2.277993679046631, |
|
"logps/chosen": -82.21893310546875, |
|
"logps/rejected": -82.72879791259766, |
|
"loss": 0.5946, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.14859510958194733, |
|
"rewards/margins": 0.2397138774394989, |
|
"rewards/rejected": -0.38830894231796265, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6684390429168249, |
|
"grad_norm": 29.379661560058594, |
|
"learning_rate": 9.545705523877943e-08, |
|
"logits/chosen": -2.39337420463562, |
|
"logits/rejected": -2.243393659591675, |
|
"logps/chosen": -89.27748107910156, |
|
"logps/rejected": -88.36190795898438, |
|
"loss": 0.583, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.15845103561878204, |
|
"rewards/margins": 0.27531546354293823, |
|
"rewards/rejected": -0.43376651406288147, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6886947714900621, |
|
"grad_norm": 29.51100730895996, |
|
"learning_rate": 9.495291528491348e-08, |
|
"logits/chosen": -2.4061636924743652, |
|
"logits/rejected": -2.2737927436828613, |
|
"logps/chosen": -77.5340805053711, |
|
"logps/rejected": -78.92658996582031, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.18812718987464905, |
|
"rewards/margins": 0.22010421752929688, |
|
"rewards/rejected": -0.4082314372062683, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7089505000632992, |
|
"grad_norm": 28.85425567626953, |
|
"learning_rate": 9.442373262051371e-08, |
|
"logits/chosen": -2.3706448078155518, |
|
"logits/rejected": -2.207597017288208, |
|
"logps/chosen": -84.70035552978516, |
|
"logps/rejected": -79.90083312988281, |
|
"loss": 0.596, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.1977623850107193, |
|
"rewards/margins": 0.24996185302734375, |
|
"rewards/rejected": -0.44772419333457947, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7292062286365363, |
|
"grad_norm": 28.540573120117188, |
|
"learning_rate": 9.386980204666698e-08, |
|
"logits/chosen": -2.369175910949707, |
|
"logits/rejected": -2.214489459991455, |
|
"logps/chosen": -80.21549224853516, |
|
"logps/rejected": -79.48722839355469, |
|
"loss": 0.5941, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.21811044216156006, |
|
"rewards/margins": 0.2636975646018982, |
|
"rewards/rejected": -0.48180800676345825, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7494619572097734, |
|
"grad_norm": 32.249847412109375, |
|
"learning_rate": 9.3291432151213e-08, |
|
"logits/chosen": -2.3587095737457275, |
|
"logits/rejected": -2.218735456466675, |
|
"logps/chosen": -85.94621276855469, |
|
"logps/rejected": -85.6765365600586, |
|
"loss": 0.5842, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.21563701331615448, |
|
"rewards/margins": 0.27373427152633667, |
|
"rewards/rejected": -0.48937129974365234, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7697176857830105, |
|
"grad_norm": 26.788440704345703, |
|
"learning_rate": 9.26889451368339e-08, |
|
"logits/chosen": -2.368450164794922, |
|
"logits/rejected": -2.2382750511169434, |
|
"logps/chosen": -84.80735778808594, |
|
"logps/rejected": -83.58937072753906, |
|
"loss": 0.5836, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.22893838584423065, |
|
"rewards/margins": 0.27746590971946716, |
|
"rewards/rejected": -0.5064042806625366, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7899734143562476, |
|
"grad_norm": 26.557083129882812, |
|
"learning_rate": 9.206267664155906e-08, |
|
"logits/chosen": -2.389660120010376, |
|
"logits/rejected": -2.2198188304901123, |
|
"logps/chosen": -80.32451629638672, |
|
"logps/rejected": -81.00599670410156, |
|
"loss": 0.5576, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.22655515372753143, |
|
"rewards/margins": 0.3575701117515564, |
|
"rewards/rejected": -0.584125280380249, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8102291429294848, |
|
"grad_norm": 22.922771453857422, |
|
"learning_rate": 9.141297555178535e-08, |
|
"logits/chosen": -2.4124135971069336, |
|
"logits/rejected": -2.25138521194458, |
|
"logps/chosen": -73.63737487792969, |
|
"logps/rejected": -74.43919372558594, |
|
"loss": 0.5756, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.2471962869167328, |
|
"rewards/margins": 0.31536445021629333, |
|
"rewards/rejected": -0.5625607371330261, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8304848715027219, |
|
"grad_norm": 27.076038360595703, |
|
"learning_rate": 9.074020380791693e-08, |
|
"logits/chosen": -2.387418270111084, |
|
"logits/rejected": -2.233450412750244, |
|
"logps/chosen": -75.89783477783203, |
|
"logps/rejected": -77.44602966308594, |
|
"loss": 0.5727, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2607758641242981, |
|
"rewards/margins": 0.3421139121055603, |
|
"rewards/rejected": -0.6028897762298584, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.850740600075959, |
|
"grad_norm": 28.83265495300293, |
|
"learning_rate": 9.004473620273263e-08, |
|
"logits/chosen": -2.3343796730041504, |
|
"logits/rejected": -2.207730293273926, |
|
"logps/chosen": -80.99537658691406, |
|
"logps/rejected": -83.44149017333984, |
|
"loss": 0.5767, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2705189883708954, |
|
"rewards/margins": 0.32222992181777954, |
|
"rewards/rejected": -0.5927489399909973, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8709963286491961, |
|
"grad_norm": 29.984909057617188, |
|
"learning_rate": 8.932696017259361e-08, |
|
"logits/chosen": -2.3199007511138916, |
|
"logits/rejected": -2.1576333045959473, |
|
"logps/chosen": -85.59019470214844, |
|
"logps/rejected": -84.45293426513672, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.25998255610466003, |
|
"rewards/margins": 0.3359551429748535, |
|
"rewards/rejected": -0.5959377288818359, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8912520572224332, |
|
"grad_norm": 25.058645248413086, |
|
"learning_rate": 8.858727558160743e-08, |
|
"logits/chosen": -2.3427436351776123, |
|
"logits/rejected": -2.1888678073883057, |
|
"logps/chosen": -82.66050720214844, |
|
"logps/rejected": -82.61741638183594, |
|
"loss": 0.5589, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.26411646604537964, |
|
"rewards/margins": 0.3582358956336975, |
|
"rewards/rejected": -0.6223524212837219, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9115077857956704, |
|
"grad_norm": 27.469467163085938, |
|
"learning_rate": 8.782609449886861e-08, |
|
"logits/chosen": -2.325899839401245, |
|
"logits/rejected": -2.1819796562194824, |
|
"logps/chosen": -82.68738555908203, |
|
"logps/rejected": -82.69558715820312, |
|
"loss": 0.5571, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.27801764011383057, |
|
"rewards/margins": 0.37557533383369446, |
|
"rewards/rejected": -0.6535929441452026, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9317635143689075, |
|
"grad_norm": 25.38697052001953, |
|
"learning_rate": 8.704384096890013e-08, |
|
"logits/chosen": -2.3276028633117676, |
|
"logits/rejected": -2.1806609630584717, |
|
"logps/chosen": -84.02021789550781, |
|
"logps/rejected": -83.25233459472656, |
|
"loss": 0.5403, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -0.2819129526615143, |
|
"rewards/margins": 0.41642332077026367, |
|
"rewards/rejected": -0.6983363032341003, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9520192429421446, |
|
"grad_norm": 32.92365264892578, |
|
"learning_rate": 8.62409507754235e-08, |
|
"logits/chosen": -2.2575485706329346, |
|
"logits/rejected": -2.1456449031829834, |
|
"logps/chosen": -87.51969909667969, |
|
"logps/rejected": -87.74186706542969, |
|
"loss": 0.5594, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.3425014913082123, |
|
"rewards/margins": 0.37962430715560913, |
|
"rewards/rejected": -0.7221258878707886, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9722749715153817, |
|
"grad_norm": 29.529056549072266, |
|
"learning_rate": 8.541787119858902e-08, |
|
"logits/chosen": -2.302694082260132, |
|
"logits/rejected": -2.162090539932251, |
|
"logps/chosen": -79.02600860595703, |
|
"logps/rejected": -78.34095764160156, |
|
"loss": 0.5721, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.35388362407684326, |
|
"rewards/margins": 0.3509567975997925, |
|
"rewards/rejected": -0.7048403024673462, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9925307000886188, |
|
"grad_norm": 23.92174530029297, |
|
"learning_rate": 8.457506076580162e-08, |
|
"logits/chosen": -2.3030953407287598, |
|
"logits/rejected": -2.158973217010498, |
|
"logps/chosen": -81.10468292236328, |
|
"logps/rejected": -83.77845764160156, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.3333788514137268, |
|
"rewards/margins": 0.4156631529331207, |
|
"rewards/rejected": -0.7490419745445251, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.012786428661856, |
|
"grad_norm": 30.66814613342285, |
|
"learning_rate": 8.371298899628089e-08, |
|
"logits/chosen": -2.2549185752868652, |
|
"logits/rejected": -2.122537612915039, |
|
"logps/chosen": -83.01513671875, |
|
"logps/rejected": -86.776123046875, |
|
"loss": 0.5316, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.380868136882782, |
|
"rewards/margins": 0.471442848443985, |
|
"rewards/rejected": -0.8523109555244446, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.033042157235093, |
|
"grad_norm": 26.109542846679688, |
|
"learning_rate": 8.28321361394978e-08, |
|
"logits/chosen": -2.2775070667266846, |
|
"logits/rejected": -2.1255202293395996, |
|
"logps/chosen": -81.81797790527344, |
|
"logps/rejected": -84.06095123291016, |
|
"loss": 0.5349, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.3651012182235718, |
|
"rewards/margins": 0.4572354853153229, |
|
"rewards/rejected": -0.8223366737365723, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.0532978858083302, |
|
"grad_norm": 28.901084899902344, |
|
"learning_rate": 8.193299290763362e-08, |
|
"logits/chosen": -2.2764482498168945, |
|
"logits/rejected": -2.128359317779541, |
|
"logps/chosen": -83.87080383300781, |
|
"logps/rejected": -83.10397338867188, |
|
"loss": 0.5413, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.38391047716140747, |
|
"rewards/margins": 0.43836045265197754, |
|
"rewards/rejected": -0.8222709894180298, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.0735536143815674, |
|
"grad_norm": 26.954652786254883, |
|
"learning_rate": 8.101606020221038e-08, |
|
"logits/chosen": -2.26556134223938, |
|
"logits/rejected": -2.12338924407959, |
|
"logps/chosen": -85.02649688720703, |
|
"logps/rejected": -85.88096618652344, |
|
"loss": 0.5519, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.4198201298713684, |
|
"rewards/margins": 0.4140090048313141, |
|
"rewards/rejected": -0.8338291049003601, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.0938093429548044, |
|
"grad_norm": 27.471698760986328, |
|
"learning_rate": 8.008184883504472e-08, |
|
"logits/chosen": -2.285780668258667, |
|
"logits/rejected": -2.15956449508667, |
|
"logps/chosen": -91.95730590820312, |
|
"logps/rejected": -89.56153869628906, |
|
"loss": 0.5388, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.37540143728256226, |
|
"rewards/margins": 0.4454117715358734, |
|
"rewards/rejected": -0.8208131790161133, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.1140650715280416, |
|
"grad_norm": 27.839784622192383, |
|
"learning_rate": 7.913087924368102e-08, |
|
"logits/chosen": -2.272618055343628, |
|
"logits/rejected": -2.146136522293091, |
|
"logps/chosen": -82.60453033447266, |
|
"logps/rejected": -84.6351547241211, |
|
"loss": 0.5466, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.3854660391807556, |
|
"rewards/margins": 0.4508994221687317, |
|
"rewards/rejected": -0.8363655209541321, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.1343208001012786, |
|
"grad_norm": 29.220504760742188, |
|
"learning_rate": 7.816368120146224e-08, |
|
"logits/chosen": -2.2264904975891113, |
|
"logits/rejected": -2.1163620948791504, |
|
"logps/chosen": -80.90209197998047, |
|
"logps/rejected": -84.96456909179688, |
|
"loss": 0.5371, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.3850155472755432, |
|
"rewards/margins": 0.47849899530410767, |
|
"rewards/rejected": -0.8635146021842957, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.1545765286745158, |
|
"grad_norm": 24.049087524414062, |
|
"learning_rate": 7.718079352239955e-08, |
|
"logits/chosen": -2.2715275287628174, |
|
"logits/rejected": -2.095773220062256, |
|
"logps/chosen": -83.15594482421875, |
|
"logps/rejected": -83.11366271972656, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.38344329595565796, |
|
"rewards/margins": 0.4991793632507324, |
|
"rewards/rejected": -0.8826227188110352, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.174832257247753, |
|
"grad_norm": 32.831993103027344, |
|
"learning_rate": 7.618276376100587e-08, |
|
"logits/chosen": -2.2670161724090576, |
|
"logits/rejected": -2.106199264526367, |
|
"logps/chosen": -81.29884338378906, |
|
"logps/rejected": -82.74469757080078, |
|
"loss": 0.5303, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.4227599501609802, |
|
"rewards/margins": 0.4853205680847168, |
|
"rewards/rejected": -0.9080804586410522, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.19508798582099, |
|
"grad_norm": 26.217998504638672, |
|
"learning_rate": 7.517014790726011e-08, |
|
"logits/chosen": -2.2205467224121094, |
|
"logits/rejected": -2.0953052043914795, |
|
"logps/chosen": -83.64337921142578, |
|
"logps/rejected": -86.88542938232422, |
|
"loss": 0.5513, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.43212181329727173, |
|
"rewards/margins": 0.44605493545532227, |
|
"rewards/rejected": -0.8781768083572388, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.2153437143942272, |
|
"grad_norm": 22.538532257080078, |
|
"learning_rate": 7.414351007687187e-08, |
|
"logits/chosen": -2.205540418624878, |
|
"logits/rejected": -2.0615344047546387, |
|
"logps/chosen": -88.0082015991211, |
|
"logps/rejected": -85.7721176147461, |
|
"loss": 0.5173, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.4331473708152771, |
|
"rewards/margins": 0.5372229814529419, |
|
"rewards/rejected": -0.9703702926635742, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.2355994429674642, |
|
"grad_norm": 29.271059036254883, |
|
"learning_rate": 7.310342219701981e-08, |
|
"logits/chosen": -2.2468390464782715, |
|
"logits/rejected": -2.107861280441284, |
|
"logps/chosen": -82.70673370361328, |
|
"logps/rejected": -83.1991958618164, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4486440122127533, |
|
"rewards/margins": 0.5201537013053894, |
|
"rewards/rejected": -0.9687976837158203, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.2558551715407014, |
|
"grad_norm": 26.889476776123047, |
|
"learning_rate": 7.205046368773794e-08, |
|
"logits/chosen": -2.1630682945251465, |
|
"logits/rejected": -2.018644332885742, |
|
"logps/chosen": -86.0914535522461, |
|
"logps/rejected": -87.1644515991211, |
|
"loss": 0.5364, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.48391470313072205, |
|
"rewards/margins": 0.474750280380249, |
|
"rewards/rejected": -0.9586650133132935, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.2761109001139386, |
|
"grad_norm": 32.43236541748047, |
|
"learning_rate": 7.098522113912808e-08, |
|
"logits/chosen": -2.2398154735565186, |
|
"logits/rejected": -2.0971333980560303, |
|
"logps/chosen": -81.19099426269531, |
|
"logps/rejected": -82.07807922363281, |
|
"loss": 0.5372, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.441434383392334, |
|
"rewards/margins": 0.5117905735969543, |
|
"rewards/rejected": -0.9532249569892883, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.2963666286871756, |
|
"grad_norm": 26.83734130859375, |
|
"learning_rate": 6.990828798457764e-08, |
|
"logits/chosen": -2.2109534740448, |
|
"logits/rejected": -2.083139657974243, |
|
"logps/chosen": -80.3914566040039, |
|
"logps/rejected": -87.99849700927734, |
|
"loss": 0.5314, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.4658758044242859, |
|
"rewards/margins": 0.5169892311096191, |
|
"rewards/rejected": -0.9828651547431946, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.3166223572604128, |
|
"grad_norm": 25.39097785949707, |
|
"learning_rate": 6.882026417016541e-08, |
|
"logits/chosen": -2.230027437210083, |
|
"logits/rejected": -2.100419759750366, |
|
"logps/chosen": -82.01457214355469, |
|
"logps/rejected": -83.42109680175781, |
|
"loss": 0.5346, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.4833486080169678, |
|
"rewards/margins": 0.4825879633426666, |
|
"rewards/rejected": -0.9659366607666016, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.3368780858336498, |
|
"grad_norm": 30.31522560119629, |
|
"learning_rate": 6.772175582043889e-08, |
|
"logits/chosen": -2.1776843070983887, |
|
"logits/rejected": -2.039802074432373, |
|
"logps/chosen": -83.42335510253906, |
|
"logps/rejected": -88.11833953857422, |
|
"loss": 0.4909, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.45507392287254333, |
|
"rewards/margins": 0.6273630857467651, |
|
"rewards/rejected": -1.0824369192123413, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.357133814406887, |
|
"grad_norm": 34.97041702270508, |
|
"learning_rate": 6.661337490075003e-08, |
|
"logits/chosen": -2.2355475425720215, |
|
"logits/rejected": -2.095804452896118, |
|
"logps/chosen": -83.29669952392578, |
|
"logps/rejected": -84.55702209472656, |
|
"loss": 0.5307, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.5031043291091919, |
|
"rewards/margins": 0.5249863862991333, |
|
"rewards/rejected": -1.0280907154083252, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.377389542980124, |
|
"grad_norm": 26.33791160583496, |
|
"learning_rate": 6.549573887633676e-08, |
|
"logits/chosen": -2.1734795570373535, |
|
"logits/rejected": -2.028352737426758, |
|
"logps/chosen": -83.61205291748047, |
|
"logps/rejected": -84.9587173461914, |
|
"loss": 0.4813, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.46062904596328735, |
|
"rewards/margins": 0.6703583002090454, |
|
"rewards/rejected": -1.1309874057769775, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.3976452715533612, |
|
"grad_norm": 35.68913269042969, |
|
"learning_rate": 6.436947036834086e-08, |
|
"logits/chosen": -2.1777005195617676, |
|
"logits/rejected": -2.054405689239502, |
|
"logps/chosen": -83.69397735595703, |
|
"logps/rejected": -86.34680938720703, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.518213152885437, |
|
"rewards/margins": 0.5348538756370544, |
|
"rewards/rejected": -1.0530669689178467, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.4179010001265984, |
|
"grad_norm": 30.295581817626953, |
|
"learning_rate": 6.323519680695349e-08, |
|
"logits/chosen": -2.1419105529785156, |
|
"logits/rejected": -1.9936020374298096, |
|
"logps/chosen": -90.16146087646484, |
|
"logps/rejected": -89.37017822265625, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.5005131363868713, |
|
"rewards/margins": 0.5668459534645081, |
|
"rewards/rejected": -1.067359209060669, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.4381567286998354, |
|
"grad_norm": 26.36173439025879, |
|
"learning_rate": 6.209355008188152e-08, |
|
"logits/chosen": -2.1437783241271973, |
|
"logits/rejected": -2.0539603233337402, |
|
"logps/chosen": -89.0562973022461, |
|
"logps/rejected": -92.04231262207031, |
|
"loss": 0.5409, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.5396249294281006, |
|
"rewards/margins": 0.5132231116294861, |
|
"rewards/rejected": -1.0528481006622314, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.4584124572730726, |
|
"grad_norm": 27.820674896240234, |
|
"learning_rate": 6.094516619032975e-08, |
|
"logits/chosen": -2.1499810218811035, |
|
"logits/rejected": -2.025269031524658, |
|
"logps/chosen": -83.47040557861328, |
|
"logps/rejected": -86.60690307617188, |
|
"loss": 0.5196, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.49484682083129883, |
|
"rewards/margins": 0.5879716277122498, |
|
"rewards/rejected": -1.0828183889389038, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.4786681858463098, |
|
"grad_norm": 24.657302856445312, |
|
"learning_rate": 5.979068488269468e-08, |
|
"logits/chosen": -2.1996073722839355, |
|
"logits/rejected": -2.0537047386169434, |
|
"logps/chosen": -86.91001892089844, |
|
"logps/rejected": -89.12824249267578, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.542976975440979, |
|
"rewards/margins": 0.6267004013061523, |
|
"rewards/rejected": -1.169677495956421, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.4989239144195468, |
|
"grad_norm": 28.313621520996094, |
|
"learning_rate": 5.8630749306167556e-08, |
|
"logits/chosen": -2.1813175678253174, |
|
"logits/rejected": -2.0757999420166016, |
|
"logps/chosen": -83.51826477050781, |
|
"logps/rejected": -89.63159942626953, |
|
"loss": 0.5115, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.515524685382843, |
|
"rewards/margins": 0.594115674495697, |
|
"rewards/rejected": -1.1096404790878296, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.5191796429927837, |
|
"grad_norm": 29.172090530395508, |
|
"learning_rate": 5.7466005646445095e-08, |
|
"logits/chosen": -2.1559250354766846, |
|
"logits/rejected": -2.0057528018951416, |
|
"logps/chosen": -83.4648208618164, |
|
"logps/rejected": -85.3985366821289, |
|
"loss": 0.4947, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.5010935068130493, |
|
"rewards/margins": 0.6447556018829346, |
|
"rewards/rejected": -1.1458488702774048, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.5394353715660212, |
|
"grad_norm": 30.766334533691406, |
|
"learning_rate": 5.6297102767747325e-08, |
|
"logits/chosen": -2.1724162101745605, |
|
"logits/rejected": -2.0574355125427246, |
|
"logps/chosen": -90.57199096679688, |
|
"logps/rejected": -92.65487670898438, |
|
"loss": 0.5309, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5361162424087524, |
|
"rewards/margins": 0.5525388121604919, |
|
"rewards/rejected": -1.0886551141738892, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.5596911001392582, |
|
"grad_norm": 32.11360549926758, |
|
"learning_rate": 5.512469185134354e-08, |
|
"logits/chosen": -2.1918747425079346, |
|
"logits/rejected": -2.054835796356201, |
|
"logps/chosen": -84.68449401855469, |
|
"logps/rejected": -87.59661865234375, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.5611705780029297, |
|
"rewards/margins": 0.6253485083580017, |
|
"rewards/rejected": -1.1865190267562866, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.5799468287124951, |
|
"grad_norm": 28.335988998413086, |
|
"learning_rate": 5.394942603278726e-08, |
|
"logits/chosen": -2.1388983726501465, |
|
"logits/rejected": -2.0282931327819824, |
|
"logps/chosen": -89.77628326416016, |
|
"logps/rejected": -90.62747955322266, |
|
"loss": 0.5258, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.524773895740509, |
|
"rewards/margins": 0.5710369944572449, |
|
"rewards/rejected": -1.095810890197754, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.6002025572857324, |
|
"grad_norm": 25.966876983642578, |
|
"learning_rate": 5.277196003806249e-08, |
|
"logits/chosen": -2.2010576725006104, |
|
"logits/rejected": -2.0596330165863037, |
|
"logps/chosen": -79.80914306640625, |
|
"logps/rejected": -82.98551940917969, |
|
"loss": 0.4784, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.4947393536567688, |
|
"rewards/margins": 0.6838704347610474, |
|
"rewards/rejected": -1.1786099672317505, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.6204582858589696, |
|
"grad_norm": 29.524734497070312, |
|
"learning_rate": 5.1592949818844046e-08, |
|
"logits/chosen": -2.1284611225128174, |
|
"logits/rejected": -2.0030832290649414, |
|
"logps/chosen": -86.19153594970703, |
|
"logps/rejected": -87.28998565673828, |
|
"loss": 0.5126, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.6002845764160156, |
|
"rewards/margins": 0.6240141987800598, |
|
"rewards/rejected": -1.2242988348007202, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6407140144322065, |
|
"grad_norm": 25.266782760620117, |
|
"learning_rate": 5.0413052187075054e-08, |
|
"logits/chosen": -2.168487787246704, |
|
"logits/rejected": -2.0182714462280273, |
|
"logps/chosen": -79.49079132080078, |
|
"logps/rejected": -81.5199203491211, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.48622050881385803, |
|
"rewards/margins": 0.6199517846107483, |
|
"rewards/rejected": -1.1061723232269287, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.6609697430054438, |
|
"grad_norm": 26.315034866333008, |
|
"learning_rate": 4.9232924449065095e-08, |
|
"logits/chosen": -2.1593496799468994, |
|
"logits/rejected": -2.030149459838867, |
|
"logps/chosen": -81.82089233398438, |
|
"logps/rejected": -89.67662811279297, |
|
"loss": 0.5042, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.5738162994384766, |
|
"rewards/margins": 0.6879658102989197, |
|
"rewards/rejected": -1.261782169342041, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.681225471578681, |
|
"grad_norm": 33.470664978027344, |
|
"learning_rate": 4.8053224039313114e-08, |
|
"logits/chosen": -2.1270673274993896, |
|
"logits/rejected": -2.012338638305664, |
|
"logps/chosen": -86.01063537597656, |
|
"logps/rejected": -85.80992889404297, |
|
"loss": 0.5459, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.535088837146759, |
|
"rewards/margins": 0.49604400992393494, |
|
"rewards/rejected": -1.0311328172683716, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.701481200151918, |
|
"grad_norm": 20.869911193847656, |
|
"learning_rate": 4.687460815425878e-08, |
|
"logits/chosen": -2.157341480255127, |
|
"logits/rejected": -2.007072925567627, |
|
"logps/chosen": -80.98677825927734, |
|
"logps/rejected": -83.40042114257812, |
|
"loss": 0.4965, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.568179190158844, |
|
"rewards/margins": 0.660231351852417, |
|
"rewards/rejected": -1.2284104824066162, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.721736928725155, |
|
"grad_norm": 23.50938606262207, |
|
"learning_rate": 4.5697733386166524e-08, |
|
"logits/chosen": -2.1210384368896484, |
|
"logits/rejected": -1.9905798435211182, |
|
"logps/chosen": -87.40711975097656, |
|
"logps/rejected": -86.93902587890625, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5617082715034485, |
|
"rewards/margins": 0.592042088508606, |
|
"rewards/rejected": -1.1537501811981201, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.7419926572983924, |
|
"grad_norm": 27.28333282470703, |
|
"learning_rate": 4.4523255357346187e-08, |
|
"logits/chosen": -2.1478943824768066, |
|
"logits/rejected": -2.024747371673584, |
|
"logps/chosen": -84.65662384033203, |
|
"logps/rejected": -88.84449005126953, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5495315790176392, |
|
"rewards/margins": 0.6314437985420227, |
|
"rewards/rejected": -1.1809751987457275, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.7622483858716294, |
|
"grad_norm": 25.31683349609375, |
|
"learning_rate": 4.335182835491387e-08, |
|
"logits/chosen": -2.1592297554016113, |
|
"logits/rejected": -2.031510829925537, |
|
"logps/chosen": -83.72755432128906, |
|
"logps/rejected": -90.34858703613281, |
|
"loss": 0.5023, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.6199240684509277, |
|
"rewards/margins": 0.634971022605896, |
|
"rewards/rejected": -1.2548949718475342, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.7825041144448663, |
|
"grad_norm": 29.31256103515625, |
|
"learning_rate": 4.218410496629684e-08, |
|
"logits/chosen": -2.1241517066955566, |
|
"logits/rejected": -1.9871305227279663, |
|
"logps/chosen": -76.77335357666016, |
|
"logps/rejected": -80.37085723876953, |
|
"loss": 0.4854, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.5658852458000183, |
|
"rewards/margins": 0.6777737736701965, |
|
"rewards/rejected": -1.2436590194702148, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.8027598430181035, |
|
"grad_norm": 29.485347747802734, |
|
"learning_rate": 4.102073571568516e-08, |
|
"logits/chosen": -2.1224985122680664, |
|
"logits/rejected": -1.993857741355896, |
|
"logps/chosen": -86.9496078491211, |
|
"logps/rejected": -88.30699157714844, |
|
"loss": 0.506, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.5687106847763062, |
|
"rewards/margins": 0.6147977709770203, |
|
"rewards/rejected": -1.1835086345672607, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.8230155715913408, |
|
"grad_norm": 27.672901153564453, |
|
"learning_rate": 3.986236870163262e-08, |
|
"logits/chosen": -2.1112308502197266, |
|
"logits/rejected": -1.99734628200531, |
|
"logps/chosen": -85.15098571777344, |
|
"logps/rejected": -90.8910140991211, |
|
"loss": 0.5078, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5743888020515442, |
|
"rewards/margins": 0.6254476308822632, |
|
"rewards/rejected": -1.1998364925384521, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.8432713001645777, |
|
"grad_norm": 28.670635223388672, |
|
"learning_rate": 3.870964923600923e-08, |
|
"logits/chosen": -2.088013172149658, |
|
"logits/rejected": -1.9703779220581055, |
|
"logps/chosen": -83.16795349121094, |
|
"logps/rejected": -86.55892181396484, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.5139526724815369, |
|
"rewards/margins": 0.6616466045379639, |
|
"rewards/rejected": -1.1755993366241455, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.863527028737815, |
|
"grad_norm": 30.791370391845703, |
|
"learning_rate": 3.756321948450599e-08, |
|
"logits/chosen": -2.120954751968384, |
|
"logits/rejected": -1.9975354671478271, |
|
"logps/chosen": -84.26751708984375, |
|
"logps/rejected": -86.91252899169922, |
|
"loss": 0.5453, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.6508383750915527, |
|
"rewards/margins": 0.5812313556671143, |
|
"rewards/rejected": -1.232069730758667, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.8837827573110522, |
|
"grad_norm": 33.708187103271484, |
|
"learning_rate": 3.642371810889222e-08, |
|
"logits/chosen": -2.092048168182373, |
|
"logits/rejected": -1.9911048412322998, |
|
"logps/chosen": -84.46561431884766, |
|
"logps/rejected": -88.94342041015625, |
|
"loss": 0.5404, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5762837529182434, |
|
"rewards/margins": 0.5255244970321655, |
|
"rewards/rejected": -1.1018081903457642, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.9040384858842891, |
|
"grad_norm": 25.534833908081055, |
|
"learning_rate": 3.529177991122518e-08, |
|
"logits/chosen": -2.066344738006592, |
|
"logits/rejected": -1.9486182928085327, |
|
"logps/chosen": -91.85676574707031, |
|
"logps/rejected": -94.47185516357422, |
|
"loss": 0.493, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5671579837799072, |
|
"rewards/margins": 0.6500786542892456, |
|
"rewards/rejected": -1.2172366380691528, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.9242942144575261, |
|
"grad_norm": 26.457944869995117, |
|
"learning_rate": 3.416803548020969e-08, |
|
"logits/chosen": -2.115591049194336, |
|
"logits/rejected": -1.9885094165802002, |
|
"logps/chosen": -89.46985626220703, |
|
"logps/rejected": -93.4120864868164, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.5443202257156372, |
|
"rewards/margins": 0.6258162260055542, |
|
"rewards/rejected": -1.1701364517211914, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.9445499430307633, |
|
"grad_norm": 27.294607162475586, |
|
"learning_rate": 3.305311083990496e-08, |
|
"logits/chosen": -2.1644487380981445, |
|
"logits/rejected": -2.040801525115967, |
|
"logps/chosen": -76.92610931396484, |
|
"logps/rejected": -82.45623016357422, |
|
"loss": 0.5196, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.6151694655418396, |
|
"rewards/margins": 0.5893052220344543, |
|
"rewards/rejected": -1.204474687576294, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.9648056716040005, |
|
"grad_norm": 27.27229881286621, |
|
"learning_rate": 3.194762710097436e-08, |
|
"logits/chosen": -2.1350479125976562, |
|
"logits/rejected": -2.030987501144409, |
|
"logps/chosen": -80.64533996582031, |
|
"logps/rejected": -85.28868103027344, |
|
"loss": 0.5371, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.5596300363540649, |
|
"rewards/margins": 0.5202323794364929, |
|
"rewards/rejected": -1.0798624753952026, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.9850614001772375, |
|
"grad_norm": 24.55060386657715, |
|
"learning_rate": 3.0852200114672453e-08, |
|
"logits/chosen": -2.127375602722168, |
|
"logits/rejected": -1.991199254989624, |
|
"logps/chosen": -85.77812957763672, |
|
"logps/rejected": -89.46642303466797, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5709745287895203, |
|
"rewards/margins": 0.6586155891418457, |
|
"rewards/rejected": -1.2295901775360107, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.005317128750475, |
|
"grad_norm": 25.645376205444336, |
|
"learning_rate": 2.976744012976189e-08, |
|
"logits/chosen": -2.1159276962280273, |
|
"logits/rejected": -1.9866254329681396, |
|
"logps/chosen": -87.69465637207031, |
|
"logps/rejected": -88.93052673339844, |
|
"loss": 0.4894, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5695109367370605, |
|
"rewards/margins": 0.6490300893783569, |
|
"rewards/rejected": -1.218540906906128, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.025572857323712, |
|
"grad_norm": 27.878236770629883, |
|
"learning_rate": 2.8693951452551307e-08, |
|
"logits/chosen": -2.0782949924468994, |
|
"logits/rejected": -1.9823192358016968, |
|
"logps/chosen": -79.88198852539062, |
|
"logps/rejected": -86.08316802978516, |
|
"loss": 0.5315, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.5980249643325806, |
|
"rewards/margins": 0.5769001245498657, |
|
"rewards/rejected": -1.1749250888824463, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.045828585896949, |
|
"grad_norm": 28.799379348754883, |
|
"learning_rate": 2.7632332110243967e-08, |
|
"logits/chosen": -2.0895416736602783, |
|
"logits/rejected": -1.975619912147522, |
|
"logps/chosen": -86.46625518798828, |
|
"logps/rejected": -90.41603088378906, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.5257114171981812, |
|
"rewards/margins": 0.6659296751022339, |
|
"rewards/rejected": -1.1916412115097046, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.066084314470186, |
|
"grad_norm": 48.88608169555664, |
|
"learning_rate": 2.658317351778412e-08, |
|
"logits/chosen": -2.099612236022949, |
|
"logits/rejected": -1.9862468242645264, |
|
"logps/chosen": -86.78905487060547, |
|
"logps/rejected": -92.29573059082031, |
|
"loss": 0.4904, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.564150869846344, |
|
"rewards/margins": 0.7153151631355286, |
|
"rewards/rejected": -1.279465913772583, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.0863400430434234, |
|
"grad_norm": 24.239578247070312, |
|
"learning_rate": 2.554706014838705e-08, |
|
"logits/chosen": -2.1574556827545166, |
|
"logits/rejected": -2.014895439147949, |
|
"logps/chosen": -84.7563247680664, |
|
"logps/rejected": -85.77194213867188, |
|
"loss": 0.4855, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5780550837516785, |
|
"rewards/margins": 0.6501516103744507, |
|
"rewards/rejected": -1.2282066345214844, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.1065957716166603, |
|
"grad_norm": 28.262638092041016, |
|
"learning_rate": 2.4524569207936445e-08, |
|
"logits/chosen": -2.0934982299804688, |
|
"logits/rejected": -1.962937355041504, |
|
"logps/chosen": -86.09654235839844, |
|
"logps/rejected": -91.18133544921875, |
|
"loss": 0.4598, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": -0.567767322063446, |
|
"rewards/margins": 0.8064430356025696, |
|
"rewards/rejected": -1.374210238456726, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.1268515001898973, |
|
"grad_norm": 30.161561965942383, |
|
"learning_rate": 2.351627031343008e-08, |
|
"logits/chosen": -2.134225368499756, |
|
"logits/rejected": -1.9962198734283447, |
|
"logps/chosen": -87.08121490478516, |
|
"logps/rejected": -90.91963958740234, |
|
"loss": 0.5068, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.5947022438049316, |
|
"rewards/margins": 0.6418746113777161, |
|
"rewards/rejected": -1.236576795578003, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.1471072287631348, |
|
"grad_norm": 29.3469295501709, |
|
"learning_rate": 2.2522725175653233e-08, |
|
"logits/chosen": -2.0764639377593994, |
|
"logits/rejected": -1.9766466617584229, |
|
"logps/chosen": -87.24481964111328, |
|
"logps/rejected": -93.39856719970703, |
|
"loss": 0.533, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.6166614294052124, |
|
"rewards/margins": 0.5650585293769836, |
|
"rewards/rejected": -1.1817197799682617, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.1673629573363717, |
|
"grad_norm": 23.277862548828125, |
|
"learning_rate": 2.154448728625668e-08, |
|
"logits/chosen": -2.1141998767852783, |
|
"logits/rejected": -1.9909133911132812, |
|
"logps/chosen": -84.21327209472656, |
|
"logps/rejected": -86.62323760986328, |
|
"loss": 0.4652, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5331219434738159, |
|
"rewards/margins": 0.7241859436035156, |
|
"rewards/rejected": -1.257307767868042, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.1876186859096087, |
|
"grad_norm": 26.303924560546875, |
|
"learning_rate": 2.0582101609413333e-08, |
|
"logits/chosen": -2.0846378803253174, |
|
"logits/rejected": -1.9430017471313477, |
|
"logps/chosen": -90.30846405029297, |
|
"logps/rejected": -91.7738265991211, |
|
"loss": 0.483, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.5784596800804138, |
|
"rewards/margins": 0.7019392251968384, |
|
"rewards/rejected": -1.2803988456726074, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.2078744144828457, |
|
"grad_norm": 29.66044044494629, |
|
"learning_rate": 1.9636104278225413e-08, |
|
"logits/chosen": -2.113520860671997, |
|
"logits/rejected": -2.006913661956787, |
|
"logps/chosen": -87.39739990234375, |
|
"logps/rejected": -91.94351959228516, |
|
"loss": 0.4885, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.6303068995475769, |
|
"rewards/margins": 0.7132034301757812, |
|
"rewards/rejected": -1.343510389328003, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.228130143056083, |
|
"grad_norm": 31.82991600036621, |
|
"learning_rate": 1.8707022296051462e-08, |
|
"logits/chosen": -2.1318724155426025, |
|
"logits/rejected": -1.997719407081604, |
|
"logps/chosen": -83.96778869628906, |
|
"logps/rejected": -91.7020492553711, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5827969312667847, |
|
"rewards/margins": 0.7294033765792847, |
|
"rewards/rejected": -1.3122001886367798, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.24838587162932, |
|
"grad_norm": 30.810279846191406, |
|
"learning_rate": 1.779537324291926e-08, |
|
"logits/chosen": -2.087120771408081, |
|
"logits/rejected": -1.974585771560669, |
|
"logps/chosen": -85.29585266113281, |
|
"logps/rejected": -90.92556762695312, |
|
"loss": 0.5036, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6058118939399719, |
|
"rewards/margins": 0.6656503081321716, |
|
"rewards/rejected": -1.2714622020721436, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.268641600202557, |
|
"grad_norm": 24.00498390197754, |
|
"learning_rate": 1.6901664987188425e-08, |
|
"logits/chosen": -2.0903851985931396, |
|
"logits/rejected": -1.9751678705215454, |
|
"logps/chosen": -83.34523010253906, |
|
"logps/rejected": -86.21971130371094, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.5954999923706055, |
|
"rewards/margins": 0.6783249378204346, |
|
"rewards/rejected": -1.27382493019104, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.2888973287757945, |
|
"grad_norm": 39.50699234008789, |
|
"learning_rate": 1.6026395402623272e-08, |
|
"logits/chosen": -2.0663511753082275, |
|
"logits/rejected": -1.9365609884262085, |
|
"logps/chosen": -94.30004119873047, |
|
"logps/rejected": -97.46401977539062, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.6117586493492126, |
|
"rewards/margins": 0.6866195797920227, |
|
"rewards/rejected": -1.2983782291412354, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.3091530573490315, |
|
"grad_norm": 26.588275909423828, |
|
"learning_rate": 1.5170052091033552e-08, |
|
"logits/chosen": -2.1126387119293213, |
|
"logits/rejected": -1.959896445274353, |
|
"logps/chosen": -80.80674743652344, |
|
"logps/rejected": -83.63319396972656, |
|
"loss": 0.4726, |
|
"rewards/accuracies": 0.8218749761581421, |
|
"rewards/chosen": -0.616263747215271, |
|
"rewards/margins": 0.7580442428588867, |
|
"rewards/rejected": -1.3743079900741577, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.3294087859222685, |
|
"grad_norm": 28.325511932373047, |
|
"learning_rate": 1.4333112110637453e-08, |
|
"logits/chosen": -2.064669609069824, |
|
"logits/rejected": -1.9410665035247803, |
|
"logps/chosen": -84.78388977050781, |
|
"logps/rejected": -87.63703155517578, |
|
"loss": 0.493, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.5784262418746948, |
|
"rewards/margins": 0.691449761390686, |
|
"rewards/rejected": -1.2698760032653809, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.349664514495506, |
|
"grad_norm": 27.597017288208008, |
|
"learning_rate": 1.3516041710298498e-08, |
|
"logits/chosen": -2.1402578353881836, |
|
"logits/rejected": -2.004826068878174, |
|
"logps/chosen": -87.74010467529297, |
|
"logps/rejected": -89.4506607055664, |
|
"loss": 0.5047, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.5920398831367493, |
|
"rewards/margins": 0.6377407908439636, |
|
"rewards/rejected": -1.2297805547714233, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.369920243068743, |
|
"grad_norm": 33.10106658935547, |
|
"learning_rate": 1.2719296069784063e-08, |
|
"logits/chosen": -2.062407970428467, |
|
"logits/rejected": -1.9447336196899414, |
|
"logps/chosen": -89.19010925292969, |
|
"logps/rejected": -95.0318374633789, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.5857541561126709, |
|
"rewards/margins": 0.7076044082641602, |
|
"rewards/rejected": -1.293358564376831, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.39017597164198, |
|
"grad_norm": 33.07532501220703, |
|
"learning_rate": 1.1943319046190332e-08, |
|
"logits/chosen": -2.074035167694092, |
|
"logits/rejected": -1.965685486793518, |
|
"logps/chosen": -80.5416030883789, |
|
"logps/rejected": -84.71125030517578, |
|
"loss": 0.4871, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.5754114389419556, |
|
"rewards/margins": 0.7324446439743042, |
|
"rewards/rejected": -1.3078559637069702, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.4104317002152174, |
|
"grad_norm": 26.195051193237305, |
|
"learning_rate": 1.1188542926675104e-08, |
|
"logits/chosen": -2.117806911468506, |
|
"logits/rejected": -1.9781955480575562, |
|
"logps/chosen": -86.0428466796875, |
|
"logps/rejected": -90.29086303710938, |
|
"loss": 0.4505, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.5670899152755737, |
|
"rewards/margins": 0.8041001558303833, |
|
"rewards/rejected": -1.371190071105957, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.4306874287884543, |
|
"grad_norm": 23.817218780517578, |
|
"learning_rate": 1.0455388187635933e-08, |
|
"logits/chosen": -2.1228573322296143, |
|
"logits/rejected": -1.9943969249725342, |
|
"logps/chosen": -81.49883270263672, |
|
"logps/rejected": -83.27964782714844, |
|
"loss": 0.4844, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.6024131774902344, |
|
"rewards/margins": 0.6800934076309204, |
|
"rewards/rejected": -1.2825065851211548, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.4509431573616913, |
|
"grad_norm": 30.316747665405273, |
|
"learning_rate": 9.744263260468005e-09, |
|
"logits/chosen": -2.059378147125244, |
|
"logits/rejected": -1.9458458423614502, |
|
"logps/chosen": -92.07587432861328, |
|
"logps/rejected": -95.30807495117188, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.7906249761581421, |
|
"rewards/chosen": -0.5844647884368896, |
|
"rewards/margins": 0.7100616097450256, |
|
"rewards/rejected": -1.2945263385772705, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.4711988859349283, |
|
"grad_norm": 28.162200927734375, |
|
"learning_rate": 9.055564304031981e-09, |
|
"logits/chosen": -2.082139730453491, |
|
"logits/rejected": -1.9701576232910156, |
|
"logps/chosen": -86.1491928100586, |
|
"logps/rejected": -92.10936737060547, |
|
"loss": 0.4954, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.5959383845329285, |
|
"rewards/margins": 0.7093779444694519, |
|
"rewards/rejected": -1.3053163290023804, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.4914546145081657, |
|
"grad_norm": 29.766904830932617, |
|
"learning_rate": 8.38967498395895e-09, |
|
"logits/chosen": -2.1094155311584473, |
|
"logits/rejected": -1.9789727926254272, |
|
"logps/chosen": -80.73603820800781, |
|
"logps/rejected": -84.64231872558594, |
|
"loss": 0.4904, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5383921265602112, |
|
"rewards/margins": 0.6760575175285339, |
|
"rewards/rejected": -1.2144496440887451, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.5117103430814027, |
|
"grad_norm": 35.819358825683594, |
|
"learning_rate": 7.746966258914988e-09, |
|
"logits/chosen": -2.1154112815856934, |
|
"logits/rejected": -1.9815971851348877, |
|
"logps/chosen": -86.95155334472656, |
|
"logps/rejected": -85.64454650878906, |
|
"loss": 0.5197, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.6145626306533813, |
|
"rewards/margins": 0.631356418132782, |
|
"rewards/rejected": -1.2459189891815186, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.5319660716546397, |
|
"grad_norm": 29.06343650817871, |
|
"learning_rate": 7.127796173944695e-09, |
|
"logits/chosen": -2.085669994354248, |
|
"logits/rejected": -1.9675817489624023, |
|
"logps/chosen": -89.59977722167969, |
|
"logps/rejected": -91.9139404296875, |
|
"loss": 0.4879, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.5644815564155579, |
|
"rewards/margins": 0.7162417769432068, |
|
"rewards/rejected": -1.2807233333587646, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.552221800227877, |
|
"grad_norm": 32.731590270996094, |
|
"learning_rate": 6.532509661008789e-09, |
|
"logits/chosen": -2.1157116889953613, |
|
"logits/rejected": -1.9942877292633057, |
|
"logps/chosen": -81.25712585449219, |
|
"logps/rejected": -85.86751556396484, |
|
"loss": 0.4855, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5965838432312012, |
|
"rewards/margins": 0.7261613011360168, |
|
"rewards/rejected": -1.3227452039718628, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.572477528801114, |
|
"grad_norm": 21.976213455200195, |
|
"learning_rate": 5.9614383468267916e-09, |
|
"logits/chosen": -2.064387321472168, |
|
"logits/rejected": -1.939223289489746, |
|
"logps/chosen": -87.50946807861328, |
|
"logps/rejected": -90.81806945800781, |
|
"loss": 0.4677, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5381378531455994, |
|
"rewards/margins": 0.7613744735717773, |
|
"rewards/rejected": -1.2995123863220215, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.592733257374351, |
|
"grad_norm": 26.645793914794922, |
|
"learning_rate": 5.4149003681318525e-09, |
|
"logits/chosen": -2.0983309745788574, |
|
"logits/rejected": -1.9681360721588135, |
|
"logps/chosen": -86.5027847290039, |
|
"logps/rejected": -88.28022003173828, |
|
"loss": 0.4846, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -0.6178566217422485, |
|
"rewards/margins": 0.7156898975372314, |
|
"rewards/rejected": -1.3335466384887695, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.612988985947588, |
|
"grad_norm": 34.658424377441406, |
|
"learning_rate": 4.8932001944408e-09, |
|
"logits/chosen": -2.114567995071411, |
|
"logits/rejected": -2.003492832183838, |
|
"logps/chosen": -86.31463623046875, |
|
"logps/rejected": -87.3377456665039, |
|
"loss": 0.5289, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.5726319551467896, |
|
"rewards/margins": 0.5831801891326904, |
|
"rewards/rejected": -1.1558120250701904, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.6332447145208255, |
|
"grad_norm": 29.894866943359375, |
|
"learning_rate": 4.396628458437912e-09, |
|
"logits/chosen": -2.057438373565674, |
|
"logits/rejected": -1.9271215200424194, |
|
"logps/chosen": -86.70679473876953, |
|
"logps/rejected": -89.4132308959961, |
|
"loss": 0.4833, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.5987198948860168, |
|
"rewards/margins": 0.7157739996910095, |
|
"rewards/rejected": -1.3144938945770264, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.6535004430940625, |
|
"grad_norm": 28.439125061035156, |
|
"learning_rate": 3.9254617940670474e-09, |
|
"logits/chosen": -2.0954787731170654, |
|
"logits/rejected": -1.9630225896835327, |
|
"logps/chosen": -84.08492279052734, |
|
"logps/rejected": -87.75090026855469, |
|
"loss": 0.4635, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.523126482963562, |
|
"rewards/margins": 0.7484750151634216, |
|
"rewards/rejected": -1.2716015577316284, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.6737561716672995, |
|
"grad_norm": 25.333951950073242, |
|
"learning_rate": 3.479962682422366e-09, |
|
"logits/chosen": -2.1200668811798096, |
|
"logits/rejected": -1.9594342708587646, |
|
"logps/chosen": -83.06964111328125, |
|
"logps/rejected": -84.25973510742188, |
|
"loss": 0.4752, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.6142527461051941, |
|
"rewards/margins": 0.7399193048477173, |
|
"rewards/rejected": -1.3541719913482666, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.694011900240537, |
|
"grad_norm": 38.79462814331055, |
|
"learning_rate": 3.0603793055233194e-09, |
|
"logits/chosen": -2.078015089035034, |
|
"logits/rejected": -1.9544031620025635, |
|
"logps/chosen": -86.95954895019531, |
|
"logps/rejected": -87.41011810302734, |
|
"loss": 0.5153, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.6789627075195312, |
|
"rewards/margins": 0.6111471652984619, |
|
"rewards/rejected": -1.2901098728179932, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.714267628813774, |
|
"grad_norm": 25.715852737426758, |
|
"learning_rate": 2.6669454080555707e-09, |
|
"logits/chosen": -2.081672191619873, |
|
"logits/rejected": -1.9599504470825195, |
|
"logps/chosen": -81.85166931152344, |
|
"logps/rejected": -84.5459213256836, |
|
"loss": 0.4941, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5518099069595337, |
|
"rewards/margins": 0.6579364538192749, |
|
"rewards/rejected": -1.2097463607788086, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.734523357387011, |
|
"grad_norm": 31.674190521240234, |
|
"learning_rate": 2.299880167154694e-09, |
|
"logits/chosen": -2.0664610862731934, |
|
"logits/rejected": -1.9412147998809814, |
|
"logps/chosen": -86.22771453857422, |
|
"logps/rejected": -90.61781311035156, |
|
"loss": 0.4909, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.5787885785102844, |
|
"rewards/margins": 0.6425621509552002, |
|
"rewards/rejected": -1.2213506698608398, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.754779085960248, |
|
"grad_norm": 21.180307388305664, |
|
"learning_rate": 1.959388070305368e-09, |
|
"logits/chosen": -2.1191658973693848, |
|
"logits/rejected": -1.9780559539794922, |
|
"logps/chosen": -84.12895202636719, |
|
"logps/rejected": -87.06913757324219, |
|
"loss": 0.4683, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -0.598002016544342, |
|
"rewards/margins": 0.7502027750015259, |
|
"rewards/rejected": -1.3482048511505127, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.7750348145334853, |
|
"grad_norm": 31.467397689819336, |
|
"learning_rate": 1.6456588014238826e-09, |
|
"logits/chosen": -2.0679941177368164, |
|
"logits/rejected": -1.984758973121643, |
|
"logps/chosen": -82.20966339111328, |
|
"logps/rejected": -89.56126403808594, |
|
"loss": 0.5158, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.6504064798355103, |
|
"rewards/margins": 0.6637855768203735, |
|
"rewards/rejected": -1.3141919374465942, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.7952905431067223, |
|
"grad_norm": 25.813554763793945, |
|
"learning_rate": 1.3588671351876358e-09, |
|
"logits/chosen": -2.088512897491455, |
|
"logits/rejected": -1.9785858392715454, |
|
"logps/chosen": -86.8198013305664, |
|
"logps/rejected": -90.5335922241211, |
|
"loss": 0.4835, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6225306987762451, |
|
"rewards/margins": 0.7479228973388672, |
|
"rewards/rejected": -1.3704535961151123, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.8155462716799597, |
|
"grad_norm": 27.169641494750977, |
|
"learning_rate": 1.099172839670298e-09, |
|
"logits/chosen": -2.0676891803741455, |
|
"logits/rejected": -1.9737918376922607, |
|
"logps/chosen": -78.63626861572266, |
|
"logps/rejected": -83.41529083251953, |
|
"loss": 0.5417, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.6757029294967651, |
|
"rewards/margins": 0.6025967597961426, |
|
"rewards/rejected": -1.2782996892929077, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.8358020002531967, |
|
"grad_norm": 27.115802764892578, |
|
"learning_rate": 8.66720587337011e-10, |
|
"logits/chosen": -2.065960645675659, |
|
"logits/rejected": -1.9609451293945312, |
|
"logps/chosen": -87.2630615234375, |
|
"logps/rejected": -90.57234191894531, |
|
"loss": 0.5015, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.619225263595581, |
|
"rewards/margins": 0.6657966375350952, |
|
"rewards/rejected": -1.2850219011306763, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.8560577288264337, |
|
"grad_norm": 24.163036346435547, |
|
"learning_rate": 6.616398744491825e-10, |
|
"logits/chosen": -2.079883337020874, |
|
"logits/rejected": -1.9338324069976807, |
|
"logps/chosen": -88.01893615722656, |
|
"logps/rejected": -89.63729095458984, |
|
"loss": 0.4572, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5343765020370483, |
|
"rewards/margins": 0.7622194886207581, |
|
"rewards/rejected": -1.2965959310531616, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.8763134573996707, |
|
"grad_norm": 33.94268798828125, |
|
"learning_rate": 4.840449489236786e-10, |
|
"logits/chosen": -2.0775258541107178, |
|
"logits/rejected": -1.9470727443695068, |
|
"logps/chosen": -83.77029418945312, |
|
"logps/rejected": -87.10340881347656, |
|
"loss": 0.478, |
|
"rewards/accuracies": 0.809374988079071, |
|
"rewards/chosen": -0.578041672706604, |
|
"rewards/margins": 0.695804238319397, |
|
"rewards/rejected": -1.273845911026001, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.8965691859729077, |
|
"grad_norm": 31.864702224731445, |
|
"learning_rate": 3.3403474668677324e-10, |
|
"logits/chosen": -2.063178062438965, |
|
"logits/rejected": -1.9387401342391968, |
|
"logps/chosen": -88.2574234008789, |
|
"logps/rejected": -90.79651641845703, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.6772388219833374, |
|
"rewards/margins": 0.6761503219604492, |
|
"rewards/rejected": -1.3533891439437866, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.916824914546145, |
|
"grad_norm": 23.90968894958496, |
|
"learning_rate": 2.1169283655815274e-10, |
|
"logits/chosen": -2.082099199295044, |
|
"logits/rejected": -1.949532151222229, |
|
"logps/chosen": -87.7041244506836, |
|
"logps/rejected": -91.3929214477539, |
|
"loss": 0.4748, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6174831390380859, |
|
"rewards/margins": 0.7841922044754028, |
|
"rewards/rejected": -1.4016753435134888, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.937080643119382, |
|
"grad_norm": 25.46677589416504, |
|
"learning_rate": 1.1708737369576228e-10, |
|
"logits/chosen": -2.081848621368408, |
|
"logits/rejected": -1.9594366550445557, |
|
"logps/chosen": -78.64682006835938, |
|
"logps/rejected": -87.83316040039062, |
|
"loss": 0.4826, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.5947951078414917, |
|
"rewards/margins": 0.7158970832824707, |
|
"rewards/rejected": -1.3106920719146729, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.9573363716926195, |
|
"grad_norm": 34.160301208496094, |
|
"learning_rate": 5.0271061627427115e-11, |
|
"logits/chosen": -2.1119987964630127, |
|
"logits/rejected": -1.9852664470672607, |
|
"logps/chosen": -80.1048355102539, |
|
"logps/rejected": -88.52069091796875, |
|
"loss": 0.5029, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.6696670651435852, |
|
"rewards/margins": 0.718749463558197, |
|
"rewards/rejected": -1.3884165287017822, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.9775921002658565, |
|
"grad_norm": 26.002410888671875, |
|
"learning_rate": 1.1281122890355322e-11, |
|
"logits/chosen": -2.076430082321167, |
|
"logits/rejected": -1.9372243881225586, |
|
"logps/chosen": -83.05271911621094, |
|
"logps/rejected": -85.09577941894531, |
|
"loss": 0.5069, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.542018711566925, |
|
"rewards/margins": 0.6729723215103149, |
|
"rewards/rejected": -1.2149909734725952, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.99582225598177, |
|
"step": 1479, |
|
"total_flos": 0.0, |
|
"train_loss": 0.546259533964032, |
|
"train_runtime": 14357.5178, |
|
"train_samples_per_second": 3.301, |
|
"train_steps_per_second": 0.103 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1479, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|