{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 9336, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.353319057815846e-10, "logits/generated": -1.1808598041534424, "logits/real": -1.6454026699066162, "logps/generated": -618.9616088867188, "logps/real": -434.496826171875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 5.353319057815845e-09, "logits/generated": -1.5312682390213013, "logits/real": -1.6957073211669922, "logps/generated": -439.24603271484375, "logps/real": -409.38775634765625, "loss": 0.6942, "rewards/accuracies": 0.3611111044883728, "rewards/generated": 0.0017296108417212963, "rewards/margins": -0.009049887768924236, "rewards/real": -0.007320277392864227, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.070663811563169e-08, "logits/generated": -1.4873509407043457, "logits/real": -1.5558300018310547, "logps/generated": -424.1045837402344, "logps/real": -396.4812316894531, "loss": 0.6917, "rewards/accuracies": 0.5, "rewards/generated": -0.006751542445272207, "rewards/margins": 0.009579039178788662, "rewards/real": 0.0028274969663470984, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.6059957173447538e-08, "logits/generated": -1.4540519714355469, "logits/real": -1.6225078105926514, "logps/generated": -385.8744201660156, "logps/real": -374.5352478027344, "loss": 0.6798, "rewards/accuracies": 0.512499988079071, "rewards/generated": -0.0441308356821537, "rewards/margins": 0.014605102129280567, "rewards/real": -0.02952573262155056, "step": 30 }, { "epoch": 0.01, "learning_rate": 2.141327623126338e-08, "logits/generated": -1.5126330852508545, "logits/real": -1.5405385494232178, "logps/generated": -447.34716796875, "logps/real": -412.48663330078125, "loss": 0.6549, "rewards/accuracies": 0.737500011920929, "rewards/generated": -0.1428581029176712, "rewards/margins": 0.0812745913863182, "rewards/real": -0.061583511531353, "step": 40 }, { "epoch": 0.02, "learning_rate": 2.676659528907923e-08, "logits/generated": -1.4839402437210083, "logits/real": -1.6111907958984375, "logps/generated": -459.6697692871094, "logps/real": -440.2416076660156, "loss": 0.6261, "rewards/accuracies": 0.75, "rewards/generated": -0.2770255208015442, "rewards/margins": 0.13668467104434967, "rewards/real": -0.14034084975719452, "step": 50 }, { "epoch": 0.02, "learning_rate": 3.2119914346895076e-08, "logits/generated": -1.3601272106170654, "logits/real": -1.4990594387054443, "logps/generated": -483.013916015625, "logps/real": -444.7726135253906, "loss": 0.5744, "rewards/accuracies": 0.862500011920929, "rewards/generated": -0.4971727728843689, "rewards/margins": 0.273921936750412, "rewards/real": -0.22325079143047333, "step": 60 }, { "epoch": 0.02, "learning_rate": 3.747323340471092e-08, "logits/generated": -1.4716002941131592, "logits/real": -1.4833121299743652, "logps/generated": -392.59783935546875, "logps/real": -377.26397705078125, "loss": 0.5479, "rewards/accuracies": 0.887499988079071, "rewards/generated": -0.7791768312454224, "rewards/margins": 0.4691910743713379, "rewards/real": -0.3099857270717621, "step": 70 }, { "epoch": 0.03, "learning_rate": 4.282655246252676e-08, "logits/generated": -1.4095914363861084, "logits/real": -1.4806625843048096, "logps/generated": -407.0181579589844, "logps/real": -397.73773193359375, "loss": 0.468, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -0.8945554494857788, "rewards/margins": 0.5730990171432495, "rewards/real": -0.3214564025402069, "step": 80 }, { "epoch": 0.03, "learning_rate": 4.817987152034261e-08, "logits/generated": -1.309691071510315, "logits/real": -1.388648271560669, "logps/generated": -468.7787170410156, "logps/real": -422.65826416015625, "loss": 0.4324, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -1.151340126991272, "rewards/margins": 0.743704080581665, "rewards/real": -0.4076361060142517, "step": 90 }, { "epoch": 0.03, "learning_rate": 5.353319057815846e-08, "logits/generated": -1.3499476909637451, "logits/real": -1.414467692375183, "logps/generated": -432.04522705078125, "logps/real": -406.4741516113281, "loss": 0.4021, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -1.3209459781646729, "rewards/margins": 0.9695326089859009, "rewards/real": -0.3514133095741272, "step": 100 }, { "epoch": 0.04, "learning_rate": 5.88865096359743e-08, "logits/generated": -1.298343300819397, "logits/real": -1.3823400735855103, "logps/generated": -486.8177185058594, "logps/real": -406.31939697265625, "loss": 0.3552, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -1.6570875644683838, "rewards/margins": 1.1586334705352783, "rewards/real": -0.4984540343284607, "step": 110 }, { "epoch": 0.04, "learning_rate": 6.423982869379015e-08, "logits/generated": -1.1703130006790161, "logits/real": -1.305787444114685, "logps/generated": -455.2709045410156, "logps/real": -398.802978515625, "loss": 0.3167, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -1.8421247005462646, "rewards/margins": 1.3385977745056152, "rewards/real": -0.5035268068313599, "step": 120 }, { "epoch": 0.04, "learning_rate": 6.959314775160599e-08, "logits/generated": -1.126508116722107, "logits/real": -1.3803380727767944, "logps/generated": -518.164794921875, "logps/real": -466.70965576171875, "loss": 0.2734, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -2.510751247406006, "rewards/margins": 1.9667211771011353, "rewards/real": -0.5440300703048706, "step": 130 }, { "epoch": 0.04, "learning_rate": 7.494646680942184e-08, "logits/generated": -1.0738112926483154, "logits/real": -1.3529951572418213, "logps/generated": -412.0902404785156, "logps/real": -396.1264343261719, "loss": 0.2593, "rewards/accuracies": 0.887499988079071, "rewards/generated": -2.2306365966796875, "rewards/margins": 1.710015058517456, "rewards/real": -0.5206215381622314, "step": 140 }, { "epoch": 0.05, "learning_rate": 8.029978586723767e-08, "logits/generated": -1.0912739038467407, "logits/real": -1.1289231777191162, "logps/generated": -434.7208557128906, "logps/real": -406.6234436035156, "loss": 0.2551, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -2.392695665359497, "rewards/margins": 1.759856939315796, "rewards/real": -0.632838785648346, "step": 150 }, { "epoch": 0.05, "learning_rate": 8.565310492505352e-08, "logits/generated": -0.9237449765205383, "logits/real": -1.0879288911819458, "logps/generated": -409.626953125, "logps/real": -338.8586730957031, "loss": 0.2395, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -2.819441318511963, "rewards/margins": 2.140852212905884, "rewards/real": -0.6785895824432373, "step": 160 }, { "epoch": 0.05, "learning_rate": 9.100642398286937e-08, "logits/generated": -1.0291730165481567, "logits/real": -1.2613914012908936, "logps/generated": -427.37738037109375, "logps/real": -393.40643310546875, "loss": 0.2095, "rewards/accuracies": 0.925000011920929, "rewards/generated": -3.0544464588165283, "rewards/margins": 2.376891613006592, "rewards/real": -0.6775552034378052, "step": 170 }, { "epoch": 0.06, "learning_rate": 9.635974304068522e-08, "logits/generated": -0.8457727432250977, "logits/real": -1.1457784175872803, "logps/generated": -442.05963134765625, "logps/real": -424.82623291015625, "loss": 0.2354, "rewards/accuracies": 0.875, "rewards/generated": -3.149376392364502, "rewards/margins": 2.3693556785583496, "rewards/real": -0.7800208926200867, "step": 180 }, { "epoch": 0.06, "learning_rate": 1.0171306209850107e-07, "logits/generated": -0.9481123685836792, "logits/real": -1.0342750549316406, "logps/generated": -438.37225341796875, "logps/real": -384.0022888183594, "loss": 0.203, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -3.4312710762023926, "rewards/margins": 2.5691442489624023, "rewards/real": -0.8621267080307007, "step": 190 }, { "epoch": 0.06, "learning_rate": 1.0706638115631692e-07, "logits/generated": -0.8297363519668579, "logits/real": -1.0828993320465088, "logps/generated": -451.39691162109375, "logps/real": -364.50225830078125, "loss": 0.1936, "rewards/accuracies": 0.949999988079071, "rewards/generated": -3.835573673248291, "rewards/margins": 2.9357094764709473, "rewards/real": -0.8998647928237915, "step": 200 }, { "epoch": 0.07, "learning_rate": 1.1241970021413276e-07, "logits/generated": -0.7779098749160767, "logits/real": -1.0249348878860474, "logps/generated": -485.56103515625, "logps/real": -409.61553955078125, "loss": 0.2009, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -4.258936882019043, "rewards/margins": 3.1135361194610596, "rewards/real": -1.1454010009765625, "step": 210 }, { "epoch": 0.07, "learning_rate": 1.177730192719486e-07, "logits/generated": -0.8809121251106262, "logits/real": -1.0999161005020142, "logps/generated": -462.90130615234375, "logps/real": -431.45098876953125, "loss": 0.1488, "rewards/accuracies": 0.9375, "rewards/generated": -4.145557403564453, "rewards/margins": 3.1025338172912598, "rewards/real": -1.0430233478546143, "step": 220 }, { "epoch": 0.07, "learning_rate": 1.2312633832976445e-07, "logits/generated": -0.6643133759498596, "logits/real": -0.9744445085525513, "logps/generated": -521.8199462890625, "logps/real": -412.56024169921875, "loss": 0.1634, "rewards/accuracies": 0.925000011920929, "rewards/generated": -4.987928867340088, "rewards/margins": 3.7843337059020996, "rewards/real": -1.203594446182251, "step": 230 }, { "epoch": 0.08, "learning_rate": 1.284796573875803e-07, "logits/generated": -0.762657105922699, "logits/real": -0.9924310445785522, "logps/generated": -514.7429809570312, "logps/real": -416.35675048828125, "loss": 0.1546, "rewards/accuracies": 0.925000011920929, "rewards/generated": -4.7892255783081055, "rewards/margins": 3.605727434158325, "rewards/real": -1.1834982633590698, "step": 240 }, { "epoch": 0.08, "learning_rate": 1.3383297644539615e-07, "logits/generated": -0.6807416081428528, "logits/real": -0.968305766582489, "logps/generated": -453.20068359375, "logps/real": -430.4881896972656, "loss": 0.1224, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -5.14382791519165, "rewards/margins": 4.103741645812988, "rewards/real": -1.040086030960083, "step": 250 }, { "epoch": 0.08, "learning_rate": 1.3918629550321198e-07, "logits/generated": -0.6399410963058472, "logits/real": -0.897692859172821, "logps/generated": -444.42779541015625, "logps/real": -403.3442687988281, "loss": 0.1556, "rewards/accuracies": 0.949999988079071, "rewards/generated": -4.718822956085205, "rewards/margins": 3.561589479446411, "rewards/real": -1.1572335958480835, "step": 260 }, { "epoch": 0.09, "learning_rate": 1.4453961456102785e-07, "logits/generated": -0.6675001382827759, "logits/real": -0.8955841064453125, "logps/generated": -547.5645751953125, "logps/real": -432.44970703125, "loss": 0.1747, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -5.936769008636475, "rewards/margins": 4.787117004394531, "rewards/real": -1.1496514081954956, "step": 270 }, { "epoch": 0.09, "learning_rate": 1.4989293361884367e-07, "logits/generated": -0.6895192861557007, "logits/real": -0.8625975847244263, "logps/generated": -500.6998596191406, "logps/real": -428.13995361328125, "loss": 0.1569, "rewards/accuracies": 0.9375, "rewards/generated": -6.001742839813232, "rewards/margins": 4.775277137756348, "rewards/real": -1.2264657020568848, "step": 280 }, { "epoch": 0.09, "learning_rate": 1.5524625267665952e-07, "logits/generated": -0.6587976217269897, "logits/real": -0.8182875514030457, "logps/generated": -458.0955505371094, "logps/real": -356.9617004394531, "loss": 0.1091, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -5.574240207672119, "rewards/margins": 4.104826927185059, "rewards/real": -1.469412922859192, "step": 290 }, { "epoch": 0.1, "learning_rate": 1.6059957173447535e-07, "logits/generated": -0.5367578268051147, "logits/real": -0.7587701678276062, "logps/generated": -479.69720458984375, "logps/real": -424.663818359375, "loss": 0.1704, "rewards/accuracies": 0.949999988079071, "rewards/generated": -6.380267143249512, "rewards/margins": 4.135984420776367, "rewards/real": -2.2442824840545654, "step": 300 }, { "epoch": 0.1, "learning_rate": 1.6595289079229122e-07, "logits/generated": -0.6532753109931946, "logits/real": -0.831712543964386, "logps/generated": -518.7501220703125, "logps/real": -392.31793212890625, "loss": 0.141, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -6.206812381744385, "rewards/margins": 4.91754674911499, "rewards/real": -1.2892649173736572, "step": 310 }, { "epoch": 0.1, "learning_rate": 1.7130620985010704e-07, "logits/generated": -0.4793264865875244, "logits/real": -0.772171139717102, "logps/generated": -494.2796936035156, "logps/real": -452.6293029785156, "loss": 0.1218, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -6.064553260803223, "rewards/margins": 4.667840480804443, "rewards/real": -1.3967125415802002, "step": 320 }, { "epoch": 0.11, "learning_rate": 1.766595289079229e-07, "logits/generated": -0.5085831880569458, "logits/real": -0.8297263383865356, "logps/generated": -478.5436096191406, "logps/real": -427.4683532714844, "loss": 0.1107, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -6.236862659454346, "rewards/margins": 4.353259086608887, "rewards/real": -1.8836028575897217, "step": 330 }, { "epoch": 0.11, "learning_rate": 1.8201284796573874e-07, "logits/generated": -0.5670705437660217, "logits/real": -0.7842034101486206, "logps/generated": -463.4210510253906, "logps/real": -395.5496826171875, "loss": 0.1105, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -6.000011444091797, "rewards/margins": 4.41122579574585, "rewards/real": -1.5887855291366577, "step": 340 }, { "epoch": 0.11, "learning_rate": 1.873661670235546e-07, "logits/generated": -0.526637852191925, "logits/real": -0.7385447025299072, "logps/generated": -484.6463317871094, "logps/real": -382.02154541015625, "loss": 0.1045, "rewards/accuracies": 0.9375, "rewards/generated": -6.869808197021484, "rewards/margins": 4.903951168060303, "rewards/real": -1.9658565521240234, "step": 350 }, { "epoch": 0.12, "learning_rate": 1.9271948608137044e-07, "logits/generated": -0.496141254901886, "logits/real": -0.548911452293396, "logps/generated": -542.5902099609375, "logps/real": -376.17108154296875, "loss": 0.1237, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -8.125356674194336, "rewards/margins": 6.278863430023193, "rewards/real": -1.8464933633804321, "step": 360 }, { "epoch": 0.12, "learning_rate": 1.980728051391863e-07, "logits/generated": -0.4736308157444, "logits/real": -0.6836093664169312, "logps/generated": -531.567138671875, "logps/real": -395.33642578125, "loss": 0.1327, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -7.355809211730957, "rewards/margins": 5.609824180603027, "rewards/real": -1.745985984802246, "step": 370 }, { "epoch": 0.12, "learning_rate": 2.0342612419700214e-07, "logits/generated": -0.2938746213912964, "logits/real": -0.7044242024421692, "logps/generated": -578.6864013671875, "logps/real": -466.355224609375, "loss": 0.1033, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -7.541459560394287, "rewards/margins": 5.902386665344238, "rewards/real": -1.639073371887207, "step": 380 }, { "epoch": 0.13, "learning_rate": 2.0877944325481796e-07, "logits/generated": -0.45513588190078735, "logits/real": -0.6607937812805176, "logps/generated": -487.9962463378906, "logps/real": -375.00872802734375, "loss": 0.0994, "rewards/accuracies": 1.0, "rewards/generated": -7.0926513671875, "rewards/margins": 5.93194580078125, "rewards/real": -1.16070556640625, "step": 390 }, { "epoch": 0.13, "learning_rate": 2.1413276231263384e-07, "logits/generated": -0.4080098271369934, "logits/real": -0.7269114255905151, "logps/generated": -480.9695739746094, "logps/real": -430.83245849609375, "loss": 0.1178, "rewards/accuracies": 0.987500011920929, "rewards/generated": -7.114847660064697, "rewards/margins": 5.5363569259643555, "rewards/real": -1.5784902572631836, "step": 400 }, { "epoch": 0.13, "learning_rate": 2.1948608137044966e-07, "logits/generated": -0.48005548119544983, "logits/real": -0.6669771075248718, "logps/generated": -506.3741760253906, "logps/real": -388.86102294921875, "loss": 0.108, "rewards/accuracies": 0.987500011920929, "rewards/generated": -7.5640716552734375, "rewards/margins": 5.90824031829834, "rewards/real": -1.6558303833007812, "step": 410 }, { "epoch": 0.13, "learning_rate": 2.248394004282655e-07, "logits/generated": -0.4052657186985016, "logits/real": -0.768792450428009, "logps/generated": -525.4461669921875, "logps/real": -471.03155517578125, "loss": 0.0964, "rewards/accuracies": 0.987500011920929, "rewards/generated": -7.519853115081787, "rewards/margins": 5.440214157104492, "rewards/real": -2.079638957977295, "step": 420 }, { "epoch": 0.14, "learning_rate": 2.3019271948608136e-07, "logits/generated": -0.6707115769386292, "logits/real": -0.8949233293533325, "logps/generated": -540.353515625, "logps/real": -465.3887634277344, "loss": 0.1065, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -6.855665683746338, "rewards/margins": 5.546034812927246, "rewards/real": -1.3096299171447754, "step": 430 }, { "epoch": 0.14, "learning_rate": 2.355460385438972e-07, "logits/generated": -0.5515331625938416, "logits/real": -0.7232746481895447, "logps/generated": -542.0548095703125, "logps/real": -439.6663513183594, "loss": 0.0928, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -8.203524589538574, "rewards/margins": 6.849783420562744, "rewards/real": -1.3537415266036987, "step": 440 }, { "epoch": 0.14, "learning_rate": 2.4089935760171303e-07, "logits/generated": -0.49747830629348755, "logits/real": -0.6678773760795593, "logps/generated": -535.3563232421875, "logps/real": -432.7539978027344, "loss": 0.1298, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -8.025300979614258, "rewards/margins": 6.239161491394043, "rewards/real": -1.7861411571502686, "step": 450 }, { "epoch": 0.15, "learning_rate": 2.462526766595289e-07, "logits/generated": -0.7647048830986023, "logits/real": -0.9378277659416199, "logps/generated": -507.47467041015625, "logps/real": -447.69122314453125, "loss": 0.1075, "rewards/accuracies": 0.949999988079071, "rewards/generated": -6.996179103851318, "rewards/margins": 6.177031517028809, "rewards/real": -0.8191484212875366, "step": 460 }, { "epoch": 0.15, "learning_rate": 2.5160599571734473e-07, "logits/generated": -0.48733216524124146, "logits/real": -0.7585724592208862, "logps/generated": -549.7501220703125, "logps/real": -428.76934814453125, "loss": 0.0689, "rewards/accuracies": 1.0, "rewards/generated": -8.673051834106445, "rewards/margins": 6.8696184158325195, "rewards/real": -1.8034347295761108, "step": 470 }, { "epoch": 0.15, "learning_rate": 2.569593147751606e-07, "logits/generated": -0.4284300208091736, "logits/real": -0.7243700623512268, "logps/generated": -520.1068115234375, "logps/real": -396.58892822265625, "loss": 0.1055, "rewards/accuracies": 0.987500011920929, "rewards/generated": -9.21156120300293, "rewards/margins": 7.108712673187256, "rewards/real": -2.1028475761413574, "step": 480 }, { "epoch": 0.16, "learning_rate": 2.6231263383297643e-07, "logits/generated": -0.423846960067749, "logits/real": -0.8373206257820129, "logps/generated": -526.388916015625, "logps/real": -441.72491455078125, "loss": 0.0969, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -10.056032180786133, "rewards/margins": 7.8534369468688965, "rewards/real": -2.2025949954986572, "step": 490 }, { "epoch": 0.16, "learning_rate": 2.676659528907923e-07, "logits/generated": -0.42267242074012756, "logits/real": -0.837328314781189, "logps/generated": -552.9312744140625, "logps/real": -438.79547119140625, "loss": 0.0932, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -9.084081649780273, "rewards/margins": 7.7864556312561035, "rewards/real": -1.297626256942749, "step": 500 }, { "epoch": 0.16, "learning_rate": 2.7301927194860813e-07, "logits/generated": -0.41762199997901917, "logits/real": -0.7780364155769348, "logps/generated": -451.2693786621094, "logps/real": -431.5977478027344, "loss": 0.1306, "rewards/accuracies": 0.925000011920929, "rewards/generated": -7.363960266113281, "rewards/margins": 5.918612480163574, "rewards/real": -1.4453485012054443, "step": 510 }, { "epoch": 0.17, "learning_rate": 2.7837259100642395e-07, "logits/generated": -0.5067940354347229, "logits/real": -0.7955325841903687, "logps/generated": -522.6128540039062, "logps/real": -458.2911071777344, "loss": 0.0657, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -8.69929027557373, "rewards/margins": 7.056490421295166, "rewards/real": -1.6427996158599854, "step": 520 }, { "epoch": 0.17, "learning_rate": 2.8372591006423977e-07, "logits/generated": -0.5578095316886902, "logits/real": -0.9103477597236633, "logps/generated": -523.0525512695312, "logps/real": -414.1087341308594, "loss": 0.1093, "rewards/accuracies": 0.9375, "rewards/generated": -8.661569595336914, "rewards/margins": 6.977335453033447, "rewards/real": -1.6842330694198608, "step": 530 }, { "epoch": 0.17, "learning_rate": 2.890792291220557e-07, "logits/generated": -0.3941894769668579, "logits/real": -0.6425245404243469, "logps/generated": -477.5164489746094, "logps/real": -391.55023193359375, "loss": 0.1125, "rewards/accuracies": 0.9375, "rewards/generated": -8.470056533813477, "rewards/margins": 6.905735015869141, "rewards/real": -1.564321756362915, "step": 540 }, { "epoch": 0.18, "learning_rate": 2.944325481798715e-07, "logits/generated": -0.4100174903869629, "logits/real": -0.8396750688552856, "logps/generated": -517.8656616210938, "logps/real": -426.30401611328125, "loss": 0.079, "rewards/accuracies": 0.9375, "rewards/generated": -8.600957870483398, "rewards/margins": 6.472214698791504, "rewards/real": -2.128744125366211, "step": 550 }, { "epoch": 0.18, "learning_rate": 2.9978586723768735e-07, "logits/generated": -0.27704018354415894, "logits/real": -0.7048155665397644, "logps/generated": -533.9234619140625, "logps/real": -454.0775451660156, "loss": 0.1146, "rewards/accuracies": 0.9375, "rewards/generated": -8.320775985717773, "rewards/margins": 6.487453460693359, "rewards/real": -1.8333231210708618, "step": 560 }, { "epoch": 0.18, "learning_rate": 3.051391862955032e-07, "logits/generated": -0.07181927561759949, "logits/real": -0.51822829246521, "logps/generated": -520.12841796875, "logps/real": -420.47467041015625, "loss": 0.1092, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -10.44589614868164, "rewards/margins": 8.124000549316406, "rewards/real": -2.321895122528076, "step": 570 }, { "epoch": 0.19, "learning_rate": 3.1049250535331905e-07, "logits/generated": -0.21476033329963684, "logits/real": -0.4515322744846344, "logps/generated": -530.752685546875, "logps/real": -392.84124755859375, "loss": 0.0845, "rewards/accuracies": 0.949999988079071, "rewards/generated": -9.2924222946167, "rewards/margins": 7.091778755187988, "rewards/real": -2.200643539428711, "step": 580 }, { "epoch": 0.19, "learning_rate": 3.1584582441113487e-07, "logits/generated": -0.18166860938072205, "logits/real": -0.4030866026878357, "logps/generated": -487.50396728515625, "logps/real": -307.7723693847656, "loss": 0.1108, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -9.999070167541504, "rewards/margins": 7.4625372886657715, "rewards/real": -2.5365328788757324, "step": 590 }, { "epoch": 0.19, "learning_rate": 3.211991434689507e-07, "logits/generated": -0.09769759327173233, "logits/real": -0.48593512177467346, "logps/generated": -546.4222412109375, "logps/real": -456.47796630859375, "loss": 0.0789, "rewards/accuracies": 0.949999988079071, "rewards/generated": -11.810072898864746, "rewards/margins": 8.587949752807617, "rewards/real": -3.2221226692199707, "step": 600 }, { "epoch": 0.2, "learning_rate": 3.265524625267666e-07, "logits/generated": -0.23076875507831573, "logits/real": -0.37269458174705505, "logps/generated": -563.2169189453125, "logps/real": -425.9403381347656, "loss": 0.1068, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -11.81021499633789, "rewards/margins": 8.701576232910156, "rewards/real": -3.108638286590576, "step": 610 }, { "epoch": 0.2, "learning_rate": 3.3190578158458244e-07, "logits/generated": -0.2781403958797455, "logits/real": -0.3580755889415741, "logps/generated": -544.1906127929688, "logps/real": -421.5177307128906, "loss": 0.1083, "rewards/accuracies": 0.949999988079071, "rewards/generated": -10.807619094848633, "rewards/margins": 8.305747985839844, "rewards/real": -2.5018701553344727, "step": 620 }, { "epoch": 0.2, "learning_rate": 3.3725910064239827e-07, "logits/generated": -0.25612014532089233, "logits/real": -0.6349602341651917, "logps/generated": -545.1222534179688, "logps/real": -460.37713623046875, "loss": 0.1372, "rewards/accuracies": 0.949999988079071, "rewards/generated": -10.042314529418945, "rewards/margins": 7.683230400085449, "rewards/real": -2.3590829372406006, "step": 630 }, { "epoch": 0.21, "learning_rate": 3.426124197002141e-07, "logits/generated": -0.4210878014564514, "logits/real": -0.6943050622940063, "logps/generated": -498.9895935058594, "logps/real": -383.1539001464844, "loss": 0.1037, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -8.85213565826416, "rewards/margins": 6.893673896789551, "rewards/real": -1.958461046218872, "step": 640 }, { "epoch": 0.21, "learning_rate": 3.4796573875802996e-07, "logits/generated": -0.4035860598087311, "logits/real": -0.7837592363357544, "logps/generated": -594.0745849609375, "logps/real": -427.7720642089844, "loss": 0.1373, "rewards/accuracies": 0.949999988079071, "rewards/generated": -12.293752670288086, "rewards/margins": 9.528745651245117, "rewards/real": -2.7650060653686523, "step": 650 }, { "epoch": 0.21, "learning_rate": 3.533190578158458e-07, "logits/generated": -0.2005266696214676, "logits/real": -0.674777090549469, "logps/generated": -512.3184814453125, "logps/real": -414.497314453125, "loss": 0.1317, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -10.525413513183594, "rewards/margins": 8.240222930908203, "rewards/real": -2.2851901054382324, "step": 660 }, { "epoch": 0.22, "learning_rate": 3.5867237687366166e-07, "logits/generated": -0.21463844180107117, "logits/real": -0.7407910823822021, "logps/generated": -491.6964416503906, "logps/real": -415.92718505859375, "loss": 0.1166, "rewards/accuracies": 0.925000011920929, "rewards/generated": -10.522367477416992, "rewards/margins": 8.165858268737793, "rewards/real": -2.356508493423462, "step": 670 }, { "epoch": 0.22, "learning_rate": 3.640256959314775e-07, "logits/generated": -0.5279382467269897, "logits/real": -0.8150844573974609, "logps/generated": -572.4334106445312, "logps/real": -405.1953430175781, "loss": 0.1263, "rewards/accuracies": 0.949999988079071, "rewards/generated": -11.910943031311035, "rewards/margins": 9.167193412780762, "rewards/real": -2.7437491416931152, "step": 680 }, { "epoch": 0.22, "learning_rate": 3.6937901498929336e-07, "logits/generated": -0.30016857385635376, "logits/real": -0.494767427444458, "logps/generated": -547.5362548828125, "logps/real": -382.86273193359375, "loss": 0.0881, "rewards/accuracies": 0.987500011920929, "rewards/generated": -10.797706604003906, "rewards/margins": 8.6353759765625, "rewards/real": -2.1623311042785645, "step": 690 }, { "epoch": 0.22, "learning_rate": 3.747323340471092e-07, "logits/generated": -0.2906731069087982, "logits/real": -0.8248172998428345, "logps/generated": -552.7711181640625, "logps/real": -484.8692321777344, "loss": 0.0972, "rewards/accuracies": 0.987500011920929, "rewards/generated": -11.04801082611084, "rewards/margins": 9.569578170776367, "rewards/real": -1.4784339666366577, "step": 700 }, { "epoch": 0.23, "learning_rate": 3.80085653104925e-07, "logits/generated": -0.43845877051353455, "logits/real": -0.8140133023262024, "logps/generated": -543.4899291992188, "logps/real": -395.19451904296875, "loss": 0.138, "rewards/accuracies": 0.925000011920929, "rewards/generated": -9.932836532592773, "rewards/margins": 7.952110290527344, "rewards/real": -1.9807262420654297, "step": 710 }, { "epoch": 0.23, "learning_rate": 3.854389721627409e-07, "logits/generated": 0.16249224543571472, "logits/real": -0.41873112320899963, "logps/generated": -543.8289184570312, "logps/real": -432.6397399902344, "loss": 0.1119, "rewards/accuracies": 1.0, "rewards/generated": -12.238335609436035, "rewards/margins": 9.472078323364258, "rewards/real": -2.76625657081604, "step": 720 }, { "epoch": 0.23, "learning_rate": 3.9079229122055676e-07, "logits/generated": -0.036403071135282516, "logits/real": -0.36339229345321655, "logps/generated": -564.2974853515625, "logps/real": -405.4169616699219, "loss": 0.1301, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -12.455001831054688, "rewards/margins": 8.998868942260742, "rewards/real": -3.4561333656311035, "step": 730 }, { "epoch": 0.24, "learning_rate": 3.961456102783726e-07, "logits/generated": -0.10172195732593536, "logits/real": -0.49659547209739685, "logps/generated": -545.7815551757812, "logps/real": -417.1949157714844, "loss": 0.1318, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -11.067608833312988, "rewards/margins": 8.637709617614746, "rewards/real": -2.4298996925354004, "step": 740 }, { "epoch": 0.24, "learning_rate": 4.014989293361884e-07, "logits/generated": -0.4993431568145752, "logits/real": -0.9273010492324829, "logps/generated": -526.9933471679688, "logps/real": -407.6418151855469, "loss": 0.102, "rewards/accuracies": 0.949999988079071, "rewards/generated": -10.606310844421387, "rewards/margins": 8.870317459106445, "rewards/real": -1.7359952926635742, "step": 750 }, { "epoch": 0.24, "learning_rate": 4.068522483940043e-07, "logits/generated": -0.23106026649475098, "logits/real": -0.6554586291313171, "logps/generated": -547.8184814453125, "logps/real": -421.58575439453125, "loss": 0.0864, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -12.40614128112793, "rewards/margins": 9.498018264770508, "rewards/real": -2.9081225395202637, "step": 760 }, { "epoch": 0.25, "learning_rate": 4.122055674518201e-07, "logits/generated": -0.31717449426651, "logits/real": -0.7359119057655334, "logps/generated": -562.9927978515625, "logps/real": -414.7088317871094, "loss": 0.0601, "rewards/accuracies": 1.0, "rewards/generated": -11.926814079284668, "rewards/margins": 9.443926811218262, "rewards/real": -2.482889175415039, "step": 770 }, { "epoch": 0.25, "learning_rate": 4.175588865096359e-07, "logits/generated": 0.015390765853226185, "logits/real": -0.5203262567520142, "logps/generated": -582.1818237304688, "logps/real": -418.8092346191406, "loss": 0.1487, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -14.417628288269043, "rewards/margins": 11.039918899536133, "rewards/real": -3.3777077198028564, "step": 780 }, { "epoch": 0.25, "learning_rate": 4.2291220556745175e-07, "logits/generated": -0.38638511300086975, "logits/real": -0.6982467770576477, "logps/generated": -473.2498474121094, "logps/real": -399.2554016113281, "loss": 0.1522, "rewards/accuracies": 0.9375, "rewards/generated": -10.71537971496582, "rewards/margins": 7.822648525238037, "rewards/real": -2.892730236053467, "step": 790 }, { "epoch": 0.26, "learning_rate": 4.282655246252677e-07, "logits/generated": 0.0042091188952326775, "logits/real": -0.5825104117393494, "logps/generated": -532.9794921875, "logps/real": -426.70733642578125, "loss": 0.1145, "rewards/accuracies": 0.9375, "rewards/generated": -12.605524063110352, "rewards/margins": 9.04686450958252, "rewards/real": -3.558659315109253, "step": 800 }, { "epoch": 0.26, "learning_rate": 4.336188436830835e-07, "logits/generated": -0.12025006860494614, "logits/real": -0.7458328008651733, "logps/generated": -611.2869262695312, "logps/real": -428.008544921875, "loss": 0.14, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -14.54490852355957, "rewards/margins": 11.255144119262695, "rewards/real": -3.289766311645508, "step": 810 }, { "epoch": 0.26, "learning_rate": 4.389721627408993e-07, "logits/generated": -0.3027825951576233, "logits/real": -0.9337183833122253, "logps/generated": -537.9955444335938, "logps/real": -385.7232666015625, "loss": 0.1197, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -12.050565719604492, "rewards/margins": 9.237031936645508, "rewards/real": -2.8135340213775635, "step": 820 }, { "epoch": 0.27, "learning_rate": 4.443254817987152e-07, "logits/generated": -0.46209535002708435, "logits/real": -1.0165607929229736, "logps/generated": -535.6875, "logps/real": -449.3550720214844, "loss": 0.1315, "rewards/accuracies": 0.987500011920929, "rewards/generated": -12.250198364257812, "rewards/margins": 9.160192489624023, "rewards/real": -3.09000563621521, "step": 830 }, { "epoch": 0.27, "learning_rate": 4.49678800856531e-07, "logits/generated": -0.4964587688446045, "logits/real": -0.9699739217758179, "logps/generated": -571.7767944335938, "logps/real": -433.69989013671875, "loss": 0.0821, "rewards/accuracies": 0.987500011920929, "rewards/generated": -13.041302680969238, "rewards/margins": 9.878203392028809, "rewards/real": -3.1630992889404297, "step": 840 }, { "epoch": 0.27, "learning_rate": 4.5503211991434684e-07, "logits/generated": -0.35973459482192993, "logits/real": -0.9779464602470398, "logps/generated": -524.3665161132812, "logps/real": -469.8843688964844, "loss": 0.1054, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -12.343412399291992, "rewards/margins": 9.135124206542969, "rewards/real": -3.208287477493286, "step": 850 }, { "epoch": 0.28, "learning_rate": 4.603854389721627e-07, "logits/generated": -0.26254531741142273, "logits/real": -0.6523619294166565, "logps/generated": -512.7224731445312, "logps/real": -416.3313903808594, "loss": 0.1076, "rewards/accuracies": 0.949999988079071, "rewards/generated": -11.55627727508545, "rewards/margins": 8.417329788208008, "rewards/real": -3.138948917388916, "step": 860 }, { "epoch": 0.28, "learning_rate": 4.657387580299786e-07, "logits/generated": 0.1977463811635971, "logits/real": -0.3939017951488495, "logps/generated": -524.1931762695312, "logps/real": -437.3082580566406, "loss": 0.1166, "rewards/accuracies": 0.9375, "rewards/generated": -14.033221244812012, "rewards/margins": 8.770598411560059, "rewards/real": -5.262623310089111, "step": 870 }, { "epoch": 0.28, "learning_rate": 4.710920770877944e-07, "logits/generated": 0.1040988340973854, "logits/real": -0.3440566956996918, "logps/generated": -611.7708129882812, "logps/real": -432.7342224121094, "loss": 0.1332, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -14.44532299041748, "rewards/margins": 10.373334884643555, "rewards/real": -4.071990489959717, "step": 880 }, { "epoch": 0.29, "learning_rate": 4.7644539614561024e-07, "logits/generated": -0.07733511924743652, "logits/real": -0.6917945146560669, "logps/generated": -505.3834533691406, "logps/real": -407.64788818359375, "loss": 0.1533, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -13.073893547058105, "rewards/margins": 9.836199760437012, "rewards/real": -3.2376930713653564, "step": 890 }, { "epoch": 0.29, "learning_rate": 4.817987152034261e-07, "logits/generated": -0.051682524383068085, "logits/real": -0.5843181610107422, "logps/generated": -575.9207153320312, "logps/real": -433.20513916015625, "loss": 0.2117, "rewards/accuracies": 0.9375, "rewards/generated": -14.66722583770752, "rewards/margins": 9.896738052368164, "rewards/real": -4.770487308502197, "step": 900 }, { "epoch": 0.29, "learning_rate": 4.871520342612419e-07, "logits/generated": 0.21942099928855896, "logits/real": -0.23753786087036133, "logps/generated": -559.3024291992188, "logps/real": -389.79791259765625, "loss": 0.1714, "rewards/accuracies": 0.925000011920929, "rewards/generated": -14.590487480163574, "rewards/margins": 10.587839126586914, "rewards/real": -4.002651214599609, "step": 910 }, { "epoch": 0.3, "learning_rate": 4.925053533190578e-07, "logits/generated": 0.4584569036960602, "logits/real": -0.15114298462867737, "logps/generated": -587.4402465820312, "logps/real": -505.27520751953125, "loss": 0.1182, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -15.189573287963867, "rewards/margins": 11.305639266967773, "rewards/real": -3.883934497833252, "step": 920 }, { "epoch": 0.3, "learning_rate": 4.978586723768736e-07, "logits/generated": 0.29027053713798523, "logits/real": -0.1807415932416916, "logps/generated": -674.7141723632812, "logps/real": -460.8020935058594, "loss": 0.1546, "rewards/accuracies": 1.0, "rewards/generated": -15.816503524780273, "rewards/margins": 12.28365707397461, "rewards/real": -3.5328450202941895, "step": 930 }, { "epoch": 0.3, "learning_rate": 4.996429421566293e-07, "logits/generated": 0.38288548588752747, "logits/real": -0.18652737140655518, "logps/generated": -648.7691650390625, "logps/real": -476.4012756347656, "loss": 0.0705, "rewards/accuracies": 0.949999988079071, "rewards/generated": -15.498260498046875, "rewards/margins": 11.223997116088867, "rewards/real": -4.274262428283691, "step": 940 }, { "epoch": 0.31, "learning_rate": 4.990478457510116e-07, "logits/generated": 0.43329495191574097, "logits/real": -0.1161356121301651, "logps/generated": -581.5389404296875, "logps/real": -408.0802307128906, "loss": 0.1305, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -14.54937744140625, "rewards/margins": 12.084678649902344, "rewards/real": -2.4646964073181152, "step": 950 }, { "epoch": 0.31, "learning_rate": 4.98452749345394e-07, "logits/generated": 0.7366458177566528, "logits/real": -0.030541684478521347, "logps/generated": -518.4758911132812, "logps/real": -425.9664001464844, "loss": 0.1578, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -14.289156913757324, "rewards/margins": 10.656447410583496, "rewards/real": -3.6327109336853027, "step": 960 }, { "epoch": 0.31, "learning_rate": 4.978576529397762e-07, "logits/generated": 0.49747592210769653, "logits/real": 0.06051991134881973, "logps/generated": -541.1715087890625, "logps/real": -418.36260986328125, "loss": 0.0997, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -14.382936477661133, "rewards/margins": 9.953971862792969, "rewards/real": -4.4289631843566895, "step": 970 }, { "epoch": 0.31, "learning_rate": 4.972625565341585e-07, "logits/generated": 0.15441684424877167, "logits/real": -0.2515067458152771, "logps/generated": -613.6532592773438, "logps/real": -417.5436096191406, "loss": 0.1339, "rewards/accuracies": 0.949999988079071, "rewards/generated": -14.758111953735352, "rewards/margins": 10.549171447753906, "rewards/real": -4.208940029144287, "step": 980 }, { "epoch": 0.32, "learning_rate": 4.966674601285408e-07, "logits/generated": 0.5540698766708374, "logits/real": -0.15427391231060028, "logps/generated": -605.1724853515625, "logps/real": -467.2969665527344, "loss": 0.0976, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -16.439870834350586, "rewards/margins": 11.430564880371094, "rewards/real": -5.009305000305176, "step": 990 }, { "epoch": 0.32, "learning_rate": 4.960723637229232e-07, "logits/generated": 0.7222113013267517, "logits/real": 0.23430073261260986, "logps/generated": -641.006103515625, "logps/real": -435.02130126953125, "loss": 0.1789, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -18.662405014038086, "rewards/margins": 13.47729206085205, "rewards/real": -5.18511438369751, "step": 1000 }, { "epoch": 0.32, "learning_rate": 4.954772673173054e-07, "logits/generated": 0.5562798976898193, "logits/real": -0.2210710495710373, "logps/generated": -634.76416015625, "logps/real": -460.98101806640625, "loss": 0.2052, "rewards/accuracies": 0.9375, "rewards/generated": -18.090970993041992, "rewards/margins": 13.143693923950195, "rewards/real": -4.94727897644043, "step": 1010 }, { "epoch": 0.33, "learning_rate": 4.948821709116876e-07, "logits/generated": 0.7667255401611328, "logits/real": -0.06433865427970886, "logps/generated": -578.8385620117188, "logps/real": -480.8077087402344, "loss": 0.0977, "rewards/accuracies": 0.949999988079071, "rewards/generated": -16.514789581298828, "rewards/margins": 12.266481399536133, "rewards/real": -4.248310565948486, "step": 1020 }, { "epoch": 0.33, "learning_rate": 4.9428707450607e-07, "logits/generated": 0.77698814868927, "logits/real": 0.38106992840766907, "logps/generated": -584.0748291015625, "logps/real": -492.6322326660156, "loss": 0.1203, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.00595474243164, "rewards/margins": 11.451032638549805, "rewards/real": -5.5549211502075195, "step": 1030 }, { "epoch": 0.33, "learning_rate": 4.936919781004522e-07, "logits/generated": 0.4747350215911865, "logits/real": -0.04128783941268921, "logps/generated": -593.6450805664062, "logps/real": -445.22125244140625, "loss": 0.1383, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -18.309412002563477, "rewards/margins": 12.987277030944824, "rewards/real": -5.322135925292969, "step": 1040 }, { "epoch": 0.34, "learning_rate": 4.930968816948346e-07, "logits/generated": 0.22491517663002014, "logits/real": -0.26303762197494507, "logps/generated": -576.6366577148438, "logps/real": -385.68914794921875, "loss": 0.2232, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -16.38455581665039, "rewards/margins": 12.055481910705566, "rewards/real": -4.329075813293457, "step": 1050 }, { "epoch": 0.34, "learning_rate": 4.925017852892168e-07, "logits/generated": 0.8281558156013489, "logits/real": 0.3183763921260834, "logps/generated": -662.0716552734375, "logps/real": -469.63238525390625, "loss": 0.1194, "rewards/accuracies": 0.949999988079071, "rewards/generated": -18.38417625427246, "rewards/margins": 12.322381973266602, "rewards/real": -6.061794281005859, "step": 1060 }, { "epoch": 0.34, "learning_rate": 4.919066888835991e-07, "logits/generated": 0.6267691850662231, "logits/real": -0.09960536658763885, "logps/generated": -580.5639038085938, "logps/real": -478.1272888183594, "loss": 0.1327, "rewards/accuracies": 0.949999988079071, "rewards/generated": -15.041595458984375, "rewards/margins": 11.124860763549805, "rewards/real": -3.9167346954345703, "step": 1070 }, { "epoch": 0.35, "learning_rate": 4.913115924779814e-07, "logits/generated": 0.7604618668556213, "logits/real": 0.08551941066980362, "logps/generated": -599.4046020507812, "logps/real": -460.885009765625, "loss": 0.1552, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -16.938884735107422, "rewards/margins": 11.212400436401367, "rewards/real": -5.726484298706055, "step": 1080 }, { "epoch": 0.35, "learning_rate": 4.907164960723638e-07, "logits/generated": 0.7514285445213318, "logits/real": 0.24342569708824158, "logps/generated": -595.6495361328125, "logps/real": -459.2359924316406, "loss": 0.1357, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -16.638690948486328, "rewards/margins": 11.065937995910645, "rewards/real": -5.572750091552734, "step": 1090 }, { "epoch": 0.35, "learning_rate": 4.90121399666746e-07, "logits/generated": 0.8910647630691528, "logits/real": 0.03425633907318115, "logps/generated": -585.2291870117188, "logps/real": -474.4053649902344, "loss": 0.2227, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -17.45850372314453, "rewards/margins": 11.365607261657715, "rewards/real": -6.092895030975342, "step": 1100 }, { "epoch": 0.36, "learning_rate": 4.895263032611282e-07, "logits/generated": 0.12513598799705505, "logits/real": -0.7318788766860962, "logps/generated": -501.1244201660156, "logps/real": -450.900634765625, "loss": 0.2001, "rewards/accuracies": 0.925000011920929, "rewards/generated": -12.528016090393066, "rewards/margins": 9.888049125671387, "rewards/real": -2.6399664878845215, "step": 1110 }, { "epoch": 0.36, "learning_rate": 4.889312068555106e-07, "logits/generated": -0.12164588272571564, "logits/real": -0.6808714270591736, "logps/generated": -613.710205078125, "logps/real": -419.4390563964844, "loss": 0.2528, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -14.635782241821289, "rewards/margins": 11.117566108703613, "rewards/real": -3.5182151794433594, "step": 1120 }, { "epoch": 0.36, "learning_rate": 4.883361104498928e-07, "logits/generated": 0.0697876513004303, "logits/real": -0.4420434832572937, "logps/generated": -646.8770141601562, "logps/real": -414.06170654296875, "loss": 0.0543, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -15.984211921691895, "rewards/margins": 12.85014533996582, "rewards/real": -3.1340668201446533, "step": 1130 }, { "epoch": 0.37, "learning_rate": 4.877410140442752e-07, "logits/generated": 0.15847408771514893, "logits/real": -0.48507052659988403, "logps/generated": -568.7153930664062, "logps/real": -366.9979553222656, "loss": 0.0733, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -17.623722076416016, "rewards/margins": 13.794718742370605, "rewards/real": -3.829005002975464, "step": 1140 }, { "epoch": 0.37, "learning_rate": 4.871459176386574e-07, "logits/generated": 0.3374684751033783, "logits/real": -0.43447422981262207, "logps/generated": -570.4505004882812, "logps/real": -489.80340576171875, "loss": 0.0995, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -15.265728950500488, "rewards/margins": 11.534330368041992, "rewards/real": -3.7314000129699707, "step": 1150 }, { "epoch": 0.37, "learning_rate": 4.865508212330398e-07, "logits/generated": 0.79986572265625, "logits/real": 0.11900987476110458, "logps/generated": -649.2984619140625, "logps/real": -475.9365234375, "loss": 0.1284, "rewards/accuracies": 0.9375, "rewards/generated": -19.11408805847168, "rewards/margins": 13.933436393737793, "rewards/real": -5.180653095245361, "step": 1160 }, { "epoch": 0.38, "learning_rate": 4.85955724827422e-07, "logits/generated": 0.6280202865600586, "logits/real": 0.01941187120974064, "logps/generated": -671.6176147460938, "logps/real": -394.4974060058594, "loss": 0.1665, "rewards/accuracies": 0.925000011920929, "rewards/generated": -19.163089752197266, "rewards/margins": 13.27893352508545, "rewards/real": -5.884156227111816, "step": 1170 }, { "epoch": 0.38, "learning_rate": 4.853606284218044e-07, "logits/generated": 0.7004096508026123, "logits/real": 0.38366079330444336, "logps/generated": -720.7593994140625, "logps/real": -441.22369384765625, "loss": 0.2227, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -20.298124313354492, "rewards/margins": 13.823907852172852, "rewards/real": -6.474215507507324, "step": 1180 }, { "epoch": 0.38, "learning_rate": 4.847655320161866e-07, "logits/generated": 0.4623348116874695, "logits/real": -0.20224161446094513, "logps/generated": -643.1566162109375, "logps/real": -395.9561462402344, "loss": 0.1177, "rewards/accuracies": 0.949999988079071, "rewards/generated": -17.520111083984375, "rewards/margins": 13.4257173538208, "rewards/real": -4.094396591186523, "step": 1190 }, { "epoch": 0.39, "learning_rate": 4.841704356105689e-07, "logits/generated": 0.4386751055717468, "logits/real": -0.0903383120894432, "logps/generated": -628.1522216796875, "logps/real": -442.05242919921875, "loss": 0.1849, "rewards/accuracies": 0.9375, "rewards/generated": -18.02935028076172, "rewards/margins": 12.333436965942383, "rewards/real": -5.695910930633545, "step": 1200 }, { "epoch": 0.39, "learning_rate": 4.835753392049512e-07, "logits/generated": 0.4043962359428406, "logits/real": -0.42423373460769653, "logps/generated": -589.523681640625, "logps/real": -497.030029296875, "loss": 0.0953, "rewards/accuracies": 0.949999988079071, "rewards/generated": -16.19954490661621, "rewards/margins": 11.84134292602539, "rewards/real": -4.358203411102295, "step": 1210 }, { "epoch": 0.39, "learning_rate": 4.829802427993334e-07, "logits/generated": 0.5528501272201538, "logits/real": -0.17417611181735992, "logps/generated": -535.357421875, "logps/real": -435.448974609375, "loss": 0.1791, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -16.632652282714844, "rewards/margins": 13.00996208190918, "rewards/real": -3.6226890087127686, "step": 1220 }, { "epoch": 0.4, "learning_rate": 4.823851463937158e-07, "logits/generated": 0.5257650017738342, "logits/real": -0.24014277756214142, "logps/generated": -574.71435546875, "logps/real": -448.80279541015625, "loss": 0.1339, "rewards/accuracies": 0.949999988079071, "rewards/generated": -16.43558120727539, "rewards/margins": 12.44011116027832, "rewards/real": -3.995471239089966, "step": 1230 }, { "epoch": 0.4, "learning_rate": 4.81790049988098e-07, "logits/generated": 0.041858408600091934, "logits/real": -0.38767313957214355, "logps/generated": -585.8471069335938, "logps/real": -451.6504821777344, "loss": 0.1778, "rewards/accuracies": 0.987500011920929, "rewards/generated": -14.504603385925293, "rewards/margins": 10.893657684326172, "rewards/real": -3.6109466552734375, "step": 1240 }, { "epoch": 0.4, "learning_rate": 4.811949535824804e-07, "logits/generated": 0.583884596824646, "logits/real": -0.432716429233551, "logps/generated": -528.9489135742188, "logps/real": -436.3238830566406, "loss": 0.1076, "rewards/accuracies": 0.987500011920929, "rewards/generated": -14.528376579284668, "rewards/margins": 11.315681457519531, "rewards/real": -3.212695598602295, "step": 1250 }, { "epoch": 0.4, "learning_rate": 4.805998571768626e-07, "logits/generated": 0.581306517124176, "logits/real": -0.025955751538276672, "logps/generated": -615.06494140625, "logps/real": -468.18878173828125, "loss": 0.1364, "rewards/accuracies": 0.987500011920929, "rewards/generated": -16.53779411315918, "rewards/margins": 11.862200736999512, "rewards/real": -4.675593376159668, "step": 1260 }, { "epoch": 0.41, "learning_rate": 4.80004760771245e-07, "logits/generated": 1.188056230545044, "logits/real": 0.49854737520217896, "logps/generated": -563.9627075195312, "logps/real": -405.2356872558594, "loss": 0.1378, "rewards/accuracies": 0.9375, "rewards/generated": -15.706609725952148, "rewards/margins": 11.854299545288086, "rewards/real": -3.852311372756958, "step": 1270 }, { "epoch": 0.41, "learning_rate": 4.794096643656272e-07, "logits/generated": 0.7192880511283875, "logits/real": 0.34548497200012207, "logps/generated": -577.8472900390625, "logps/real": -441.2189025878906, "loss": 0.2233, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -15.9380464553833, "rewards/margins": 11.308958053588867, "rewards/real": -4.629090309143066, "step": 1280 }, { "epoch": 0.41, "learning_rate": 4.788145679600095e-07, "logits/generated": 0.5968669652938843, "logits/real": 0.14197833836078644, "logps/generated": -588.7890625, "logps/real": -385.81158447265625, "loss": 0.1767, "rewards/accuracies": 0.887499988079071, "rewards/generated": -14.702133178710938, "rewards/margins": 11.405279159545898, "rewards/real": -3.29685640335083, "step": 1290 }, { "epoch": 0.42, "learning_rate": 4.782194715543918e-07, "logits/generated": 1.0414130687713623, "logits/real": 0.49789801239967346, "logps/generated": -552.2242431640625, "logps/real": -422.2083435058594, "loss": 0.1664, "rewards/accuracies": 0.887499988079071, "rewards/generated": -15.274334907531738, "rewards/margins": 11.004362106323242, "rewards/real": -4.269972324371338, "step": 1300 }, { "epoch": 0.42, "learning_rate": 4.77624375148774e-07, "logits/generated": 0.7460211515426636, "logits/real": 0.45159998536109924, "logps/generated": -566.40234375, "logps/real": -480.30950927734375, "loss": 0.1775, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -13.9493408203125, "rewards/margins": 10.329652786254883, "rewards/real": -3.619688034057617, "step": 1310 }, { "epoch": 0.42, "learning_rate": 4.770292787431564e-07, "logits/generated": 0.8211126327514648, "logits/real": 0.02447642758488655, "logps/generated": -588.2283325195312, "logps/real": -493.6436462402344, "loss": 0.1235, "rewards/accuracies": 0.925000011920929, "rewards/generated": -17.133975982666016, "rewards/margins": 11.407205581665039, "rewards/real": -5.72676944732666, "step": 1320 }, { "epoch": 0.43, "learning_rate": 4.764341823375387e-07, "logits/generated": 0.5519979596138, "logits/real": -0.026401275768876076, "logps/generated": -607.3372802734375, "logps/real": -415.8194885253906, "loss": 0.1955, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -18.53517723083496, "rewards/margins": 13.163558959960938, "rewards/real": -5.371618270874023, "step": 1330 }, { "epoch": 0.43, "learning_rate": 4.7583908593192097e-07, "logits/generated": 0.4440780580043793, "logits/real": -0.1810052990913391, "logps/generated": -581.2791137695312, "logps/real": -426.8333435058594, "loss": 0.073, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -16.157032012939453, "rewards/margins": 11.956171035766602, "rewards/real": -4.200861930847168, "step": 1340 }, { "epoch": 0.43, "learning_rate": 4.752439895263032e-07, "logits/generated": 0.8261198997497559, "logits/real": 0.1969045102596283, "logps/generated": -543.7503662109375, "logps/real": -394.2093811035156, "loss": 0.1409, "rewards/accuracies": 0.925000011920929, "rewards/generated": -16.908294677734375, "rewards/margins": 10.991459846496582, "rewards/real": -5.916835784912109, "step": 1350 }, { "epoch": 0.44, "learning_rate": 4.746488931206855e-07, "logits/generated": 0.919946551322937, "logits/real": 0.39481428265571594, "logps/generated": -655.728515625, "logps/real": -456.26959228515625, "loss": 0.0735, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.801586151123047, "rewards/margins": 15.81944465637207, "rewards/real": -4.982141971588135, "step": 1360 }, { "epoch": 0.44, "learning_rate": 4.7405379671506785e-07, "logits/generated": 1.002418041229248, "logits/real": 0.42013120651245117, "logps/generated": -613.5100708007812, "logps/real": -461.9776306152344, "loss": 0.1337, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -20.39179229736328, "rewards/margins": 13.954264640808105, "rewards/real": -6.437526702880859, "step": 1370 }, { "epoch": 0.44, "learning_rate": 4.734587003094501e-07, "logits/generated": 0.7721500396728516, "logits/real": 0.1944776177406311, "logps/generated": -651.8570556640625, "logps/real": -458.2220764160156, "loss": 0.1393, "rewards/accuracies": 0.949999988079071, "rewards/generated": -19.7298526763916, "rewards/margins": 14.351509094238281, "rewards/real": -5.3783440589904785, "step": 1380 }, { "epoch": 0.45, "learning_rate": 4.728636039038324e-07, "logits/generated": 1.09163236618042, "logits/real": 0.4099518358707428, "logps/generated": -593.5547485351562, "logps/real": -407.93048095703125, "loss": 0.1487, "rewards/accuracies": 0.949999988079071, "rewards/generated": -19.948055267333984, "rewards/margins": 12.153738021850586, "rewards/real": -7.794315338134766, "step": 1390 }, { "epoch": 0.45, "learning_rate": 4.722685074982147e-07, "logits/generated": 1.0830082893371582, "logits/real": 0.6959132552146912, "logps/generated": -616.0054321289062, "logps/real": -462.34503173828125, "loss": 0.1483, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -19.778316497802734, "rewards/margins": 14.290502548217773, "rewards/real": -5.487817287445068, "step": 1400 }, { "epoch": 0.45, "learning_rate": 4.7167341109259703e-07, "logits/generated": 1.1333125829696655, "logits/real": 0.2914403975009918, "logps/generated": -577.7488403320312, "logps/real": -423.5242614746094, "loss": 0.1568, "rewards/accuracies": 0.949999988079071, "rewards/generated": -17.46885871887207, "rewards/margins": 12.82512378692627, "rewards/real": -4.643734455108643, "step": 1410 }, { "epoch": 0.46, "learning_rate": 4.710783146869793e-07, "logits/generated": 0.6774446368217468, "logits/real": -0.1057586669921875, "logps/generated": -660.5841674804688, "logps/real": -461.93084716796875, "loss": 0.1258, "rewards/accuracies": 0.949999988079071, "rewards/generated": -15.781753540039062, "rewards/margins": 12.401758193969727, "rewards/real": -3.3799960613250732, "step": 1420 }, { "epoch": 0.46, "learning_rate": 4.7048321828136157e-07, "logits/generated": 0.5405632257461548, "logits/real": -0.31847503781318665, "logps/generated": -533.5389404296875, "logps/real": -455.6302185058594, "loss": 0.1673, "rewards/accuracies": 0.887499988079071, "rewards/generated": -15.044950485229492, "rewards/margins": 9.774984359741211, "rewards/real": -5.269965648651123, "step": 1430 }, { "epoch": 0.46, "learning_rate": 4.698881218757438e-07, "logits/generated": 1.157545804977417, "logits/real": 0.21916981041431427, "logps/generated": -674.6870727539062, "logps/real": -446.28582763671875, "loss": 0.1836, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -21.649944305419922, "rewards/margins": 14.28632926940918, "rewards/real": -7.363614082336426, "step": 1440 }, { "epoch": 0.47, "learning_rate": 4.692930254701261e-07, "logits/generated": 1.0000150203704834, "logits/real": 0.2654796242713928, "logps/generated": -581.3739013671875, "logps/real": -457.564453125, "loss": 0.1217, "rewards/accuracies": 0.9375, "rewards/generated": -20.32326889038086, "rewards/margins": 13.458106994628906, "rewards/real": -6.865163326263428, "step": 1450 }, { "epoch": 0.47, "learning_rate": 4.6869792906450845e-07, "logits/generated": 0.9947832226753235, "logits/real": 0.19707408547401428, "logps/generated": -624.0670166015625, "logps/real": -520.7171630859375, "loss": 0.2592, "rewards/accuracies": 0.9375, "rewards/generated": -21.57383918762207, "rewards/margins": 13.364995956420898, "rewards/real": -8.208843231201172, "step": 1460 }, { "epoch": 0.47, "learning_rate": 4.6810283265889075e-07, "logits/generated": 0.8377590179443359, "logits/real": 0.315042644739151, "logps/generated": -705.0423583984375, "logps/real": -490.0647888183594, "loss": 0.099, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -21.04421615600586, "rewards/margins": 14.467103958129883, "rewards/real": -6.577111721038818, "step": 1470 }, { "epoch": 0.48, "learning_rate": 4.67507736253273e-07, "logits/generated": 0.952717661857605, "logits/real": 0.32825908064842224, "logps/generated": -619.4190063476562, "logps/real": -482.84307861328125, "loss": 0.0893, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.161073684692383, "rewards/margins": 14.814906120300293, "rewards/real": -5.346166133880615, "step": 1480 }, { "epoch": 0.48, "learning_rate": 4.669126398476553e-07, "logits/generated": 0.6674627661705017, "logits/real": -0.19968627393245697, "logps/generated": -639.9056396484375, "logps/real": -482.53857421875, "loss": 0.1075, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -18.637418746948242, "rewards/margins": 13.708355903625488, "rewards/real": -4.929060935974121, "step": 1490 }, { "epoch": 0.48, "learning_rate": 4.6631754344203763e-07, "logits/generated": 1.0142736434936523, "logits/real": 0.2910311222076416, "logps/generated": -660.4046630859375, "logps/real": -453.3133850097656, "loss": 0.0659, "rewards/accuracies": 0.949999988079071, "rewards/generated": -19.06307029724121, "rewards/margins": 13.818547248840332, "rewards/real": -5.244523048400879, "step": 1500 }, { "epoch": 0.49, "learning_rate": 4.657224470364199e-07, "logits/generated": 1.190795660018921, "logits/real": 0.23487675189971924, "logps/generated": -672.40478515625, "logps/real": -449.458740234375, "loss": 0.1004, "rewards/accuracies": 0.925000011920929, "rewards/generated": -21.44005012512207, "rewards/margins": 17.266386032104492, "rewards/real": -4.173663139343262, "step": 1510 }, { "epoch": 0.49, "learning_rate": 4.6512735063080217e-07, "logits/generated": 1.2253535985946655, "logits/real": 0.1833571493625641, "logps/generated": -607.3043212890625, "logps/real": -444.6239318847656, "loss": 0.1496, "rewards/accuracies": 0.949999988079071, "rewards/generated": -21.109739303588867, "rewards/margins": 15.182458877563477, "rewards/real": -5.927280426025391, "step": 1520 }, { "epoch": 0.49, "learning_rate": 4.6453225422518447e-07, "logits/generated": 1.1783695220947266, "logits/real": 0.23501253128051758, "logps/generated": -606.5845947265625, "logps/real": -405.78570556640625, "loss": 0.1063, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -19.71524429321289, "rewards/margins": 13.84802532196045, "rewards/real": -5.867218971252441, "step": 1530 }, { "epoch": 0.49, "learning_rate": 4.6393715781956676e-07, "logits/generated": 1.1605355739593506, "logits/real": 0.07183153182268143, "logps/generated": -635.2320556640625, "logps/real": -520.0054321289062, "loss": 0.1266, "rewards/accuracies": 0.949999988079071, "rewards/generated": -22.56968116760254, "rewards/margins": 15.290313720703125, "rewards/real": -7.279367923736572, "step": 1540 }, { "epoch": 0.5, "learning_rate": 4.6334206141394905e-07, "logits/generated": 0.9681889414787292, "logits/real": 0.31621164083480835, "logps/generated": -630.9334106445312, "logps/real": -455.72686767578125, "loss": 0.129, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -20.262531280517578, "rewards/margins": 15.318807601928711, "rewards/real": -4.943722248077393, "step": 1550 }, { "epoch": 0.5, "learning_rate": 4.6274696500833135e-07, "logits/generated": 0.44618314504623413, "logits/real": -0.5795027613639832, "logps/generated": -618.8849487304688, "logps/real": -436.7918395996094, "loss": 0.1385, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -18.20525550842285, "rewards/margins": 14.203890800476074, "rewards/real": -4.0013628005981445, "step": 1560 }, { "epoch": 0.5, "learning_rate": 4.621518686027136e-07, "logits/generated": 1.0406615734100342, "logits/real": 0.18830417096614838, "logps/generated": -665.0647583007812, "logps/real": -552.0758666992188, "loss": 0.1204, "rewards/accuracies": 0.9375, "rewards/generated": -21.448116302490234, "rewards/margins": 14.357902526855469, "rewards/real": -7.09021520614624, "step": 1570 }, { "epoch": 0.51, "learning_rate": 4.6155677219709594e-07, "logits/generated": 1.028936743736267, "logits/real": 0.4498261511325836, "logps/generated": -647.6895751953125, "logps/real": -456.8243103027344, "loss": 0.1066, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.325885772705078, "rewards/margins": 14.831125259399414, "rewards/real": -4.494760513305664, "step": 1580 }, { "epoch": 0.51, "learning_rate": 4.6096167579147823e-07, "logits/generated": 1.0840437412261963, "logits/real": 0.17742864787578583, "logps/generated": -613.6370849609375, "logps/real": -391.63690185546875, "loss": 0.1198, "rewards/accuracies": 0.9375, "rewards/generated": -22.402141571044922, "rewards/margins": 16.035541534423828, "rewards/real": -6.366599082946777, "step": 1590 }, { "epoch": 0.51, "learning_rate": 4.603665793858605e-07, "logits/generated": 1.2031408548355103, "logits/real": 0.2488580197095871, "logps/generated": -591.8146362304688, "logps/real": -403.86761474609375, "loss": 0.1726, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -19.670690536499023, "rewards/margins": 14.535089492797852, "rewards/real": -5.13560152053833, "step": 1600 }, { "epoch": 0.52, "learning_rate": 4.5977148298024277e-07, "logits/generated": 1.1220364570617676, "logits/real": 0.48264771699905396, "logps/generated": -600.8532104492188, "logps/real": -440.7237854003906, "loss": 0.1744, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -19.656465530395508, "rewards/margins": 13.32789421081543, "rewards/real": -6.328570365905762, "step": 1610 }, { "epoch": 0.52, "learning_rate": 4.5917638657462507e-07, "logits/generated": 0.7380300164222717, "logits/real": -0.2684968113899231, "logps/generated": -607.64697265625, "logps/real": -465.97674560546875, "loss": 0.1428, "rewards/accuracies": 0.925000011920929, "rewards/generated": -20.37729263305664, "rewards/margins": 13.441081047058105, "rewards/real": -6.936212062835693, "step": 1620 }, { "epoch": 0.52, "learning_rate": 4.5858129016900736e-07, "logits/generated": 0.8170192837715149, "logits/real": -0.3309488892555237, "logps/generated": -574.9518432617188, "logps/real": -435.5298767089844, "loss": 0.1754, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -18.864727020263672, "rewards/margins": 14.569292068481445, "rewards/real": -4.295435905456543, "step": 1630 }, { "epoch": 0.53, "learning_rate": 4.5798619376338966e-07, "logits/generated": 0.7942267060279846, "logits/real": -0.36876288056373596, "logps/generated": -567.2540283203125, "logps/real": -392.127685546875, "loss": 0.0932, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -19.31840705871582, "rewards/margins": 14.62622356414795, "rewards/real": -4.69218111038208, "step": 1640 }, { "epoch": 0.53, "learning_rate": 4.5739109735777195e-07, "logits/generated": 0.8375517129898071, "logits/real": -0.281541109085083, "logps/generated": -522.7070922851562, "logps/real": -428.2649841308594, "loss": 0.1131, "rewards/accuracies": 0.949999988079071, "rewards/generated": -17.026012420654297, "rewards/margins": 13.364750862121582, "rewards/real": -3.661261796951294, "step": 1650 }, { "epoch": 0.53, "learning_rate": 4.567960009521542e-07, "logits/generated": 0.85284823179245, "logits/real": -0.16292670369148254, "logps/generated": -668.9300537109375, "logps/real": -472.83184814453125, "loss": 0.1261, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -20.59139060974121, "rewards/margins": 15.517718315124512, "rewards/real": -5.07367467880249, "step": 1660 }, { "epoch": 0.54, "learning_rate": 4.5620090454653654e-07, "logits/generated": 0.4061349034309387, "logits/real": -0.5179981589317322, "logps/generated": -609.2232666015625, "logps/real": -431.9954528808594, "loss": 0.1896, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -18.078081130981445, "rewards/margins": 14.560803413391113, "rewards/real": -3.5172781944274902, "step": 1670 }, { "epoch": 0.54, "learning_rate": 4.5560580814091884e-07, "logits/generated": 0.9091037511825562, "logits/real": -0.3707179129123688, "logps/generated": -621.21142578125, "logps/real": -458.58123779296875, "loss": 0.0819, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -18.463977813720703, "rewards/margins": 13.731709480285645, "rewards/real": -4.732268333435059, "step": 1680 }, { "epoch": 0.54, "learning_rate": 4.550107117353011e-07, "logits/generated": 0.587882399559021, "logits/real": -0.5815958380699158, "logps/generated": -644.7069091796875, "logps/real": -444.1105041503906, "loss": 0.1307, "rewards/accuracies": 0.949999988079071, "rewards/generated": -19.754364013671875, "rewards/margins": 15.095369338989258, "rewards/real": -4.658993244171143, "step": 1690 }, { "epoch": 0.55, "learning_rate": 4.5441561532968337e-07, "logits/generated": 0.5395095944404602, "logits/real": -0.6018906831741333, "logps/generated": -639.6863403320312, "logps/real": -439.3102111816406, "loss": 0.1368, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -20.76279067993164, "rewards/margins": 15.01270580291748, "rewards/real": -5.750086307525635, "step": 1700 }, { "epoch": 0.55, "learning_rate": 4.538205189240657e-07, "logits/generated": 0.4623304307460785, "logits/real": -0.7674506306648254, "logps/generated": -602.7589111328125, "logps/real": -404.996337890625, "loss": 0.1272, "rewards/accuracies": 0.9375, "rewards/generated": -17.342721939086914, "rewards/margins": 13.562103271484375, "rewards/real": -3.780616283416748, "step": 1710 }, { "epoch": 0.55, "learning_rate": 4.5322542251844796e-07, "logits/generated": 0.324072003364563, "logits/real": -0.8895346522331238, "logps/generated": -601.6587524414062, "logps/real": -471.562255859375, "loss": 0.1011, "rewards/accuracies": 0.9375, "rewards/generated": -18.58040428161621, "rewards/margins": 13.672235488891602, "rewards/real": -4.908169269561768, "step": 1720 }, { "epoch": 0.56, "learning_rate": 4.5263032611283026e-07, "logits/generated": 0.5588086843490601, "logits/real": -0.39361295104026794, "logps/generated": -645.50390625, "logps/real": -454.4718322753906, "loss": 0.1283, "rewards/accuracies": 0.949999988079071, "rewards/generated": -19.534143447875977, "rewards/margins": 13.78315258026123, "rewards/real": -5.750988483428955, "step": 1730 }, { "epoch": 0.56, "learning_rate": 4.5203522970721255e-07, "logits/generated": 1.0117212533950806, "logits/real": -0.10400022566318512, "logps/generated": -707.6424560546875, "logps/real": -505.9258728027344, "loss": 0.1953, "rewards/accuracies": 0.949999988079071, "rewards/generated": -22.3872127532959, "rewards/margins": 17.375761032104492, "rewards/real": -5.0114521980285645, "step": 1740 }, { "epoch": 0.56, "learning_rate": 4.5144013330159485e-07, "logits/generated": 0.9166877865791321, "logits/real": 0.036824341863393784, "logps/generated": -612.57666015625, "logps/real": -439.55908203125, "loss": 0.1298, "rewards/accuracies": 0.949999988079071, "rewards/generated": -19.841764450073242, "rewards/margins": 14.188302993774414, "rewards/real": -5.6534647941589355, "step": 1750 }, { "epoch": 0.57, "learning_rate": 4.5084503689597714e-07, "logits/generated": 0.6594575643539429, "logits/real": -0.14145585894584656, "logps/generated": -579.8397827148438, "logps/real": -453.34552001953125, "loss": 0.1479, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -18.10012435913086, "rewards/margins": 12.603446960449219, "rewards/real": -5.496678352355957, "step": 1760 }, { "epoch": 0.57, "learning_rate": 4.5024994049035944e-07, "logits/generated": 0.7030743360519409, "logits/real": -0.14197489619255066, "logps/generated": -579.800048828125, "logps/real": -438.91131591796875, "loss": 0.0879, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -18.57028579711914, "rewards/margins": 13.237577438354492, "rewards/real": -5.332709312438965, "step": 1770 }, { "epoch": 0.57, "learning_rate": 4.496548440847417e-07, "logits/generated": 0.7485989332199097, "logits/real": -0.2793040871620178, "logps/generated": -659.8019409179688, "logps/real": -463.54632568359375, "loss": 0.2121, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -20.170917510986328, "rewards/margins": 14.84669303894043, "rewards/real": -5.324224472045898, "step": 1780 }, { "epoch": 0.58, "learning_rate": 4.49059747679124e-07, "logits/generated": 1.0758802890777588, "logits/real": -0.006481161806732416, "logps/generated": -662.810791015625, "logps/real": -441.0035705566406, "loss": 0.0863, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -20.724292755126953, "rewards/margins": 16.245223999023438, "rewards/real": -4.479069709777832, "step": 1790 }, { "epoch": 0.58, "learning_rate": 4.484646512735063e-07, "logits/generated": 1.1917352676391602, "logits/real": 0.24964077770709991, "logps/generated": -683.35498046875, "logps/real": -439.3916931152344, "loss": 0.0824, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -21.53213119506836, "rewards/margins": 16.999732971191406, "rewards/real": -4.532394886016846, "step": 1800 }, { "epoch": 0.58, "learning_rate": 4.4786955486788856e-07, "logits/generated": 1.180478811264038, "logits/real": 0.44369906187057495, "logps/generated": -603.5838623046875, "logps/real": -481.0667419433594, "loss": 0.1781, "rewards/accuracies": 0.949999988079071, "rewards/generated": -18.881160736083984, "rewards/margins": 13.507619857788086, "rewards/real": -5.373543739318848, "step": 1810 }, { "epoch": 0.58, "learning_rate": 4.4727445846227086e-07, "logits/generated": 1.0639439821243286, "logits/real": 0.37180715799331665, "logps/generated": -582.93359375, "logps/real": -440.77569580078125, "loss": 0.1088, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -18.63724708557129, "rewards/margins": 13.778401374816895, "rewards/real": -4.85884428024292, "step": 1820 }, { "epoch": 0.59, "learning_rate": 4.4667936205665315e-07, "logits/generated": 1.000383734703064, "logits/real": 0.1557195484638214, "logps/generated": -655.4578857421875, "logps/real": -461.74346923828125, "loss": 0.127, "rewards/accuracies": 0.9375, "rewards/generated": -21.606306076049805, "rewards/margins": 14.98638916015625, "rewards/real": -6.619917392730713, "step": 1830 }, { "epoch": 0.59, "learning_rate": 4.4608426565103545e-07, "logits/generated": 1.184015154838562, "logits/real": 0.07904740422964096, "logps/generated": -566.90185546875, "logps/real": -445.81988525390625, "loss": 0.0555, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.59018325805664, "rewards/margins": 13.177793502807617, "rewards/real": -4.4123921394348145, "step": 1840 }, { "epoch": 0.59, "learning_rate": 4.4548916924541774e-07, "logits/generated": 1.4024584293365479, "logits/real": 0.4426136016845703, "logps/generated": -586.5557861328125, "logps/real": -471.76922607421875, "loss": 0.1215, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.957576751708984, "rewards/margins": 13.35058879852295, "rewards/real": -5.6069865226745605, "step": 1850 }, { "epoch": 0.6, "learning_rate": 4.4489407283980004e-07, "logits/generated": 1.6096948385238647, "logits/real": 0.6912265419960022, "logps/generated": -636.128662109375, "logps/real": -481.68243408203125, "loss": 0.1289, "rewards/accuracies": 0.949999988079071, "rewards/generated": -22.33319091796875, "rewards/margins": 13.920247077941895, "rewards/real": -8.412942886352539, "step": 1860 }, { "epoch": 0.6, "learning_rate": 4.442989764341823e-07, "logits/generated": 1.3153483867645264, "logits/real": 0.6630610227584839, "logps/generated": -679.8712768554688, "logps/real": -501.7373046875, "loss": 0.0994, "rewards/accuracies": 0.9375, "rewards/generated": -23.698484420776367, "rewards/margins": 13.750986099243164, "rewards/real": -9.947501182556152, "step": 1870 }, { "epoch": 0.6, "learning_rate": 4.437038800285646e-07, "logits/generated": 1.2106873989105225, "logits/real": 0.28888610005378723, "logps/generated": -592.4668579101562, "logps/real": -445.3599548339844, "loss": 0.1238, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -20.43438720703125, "rewards/margins": 13.247209548950195, "rewards/real": -7.1871771812438965, "step": 1880 }, { "epoch": 0.61, "learning_rate": 4.431087836229469e-07, "logits/generated": 1.4615843296051025, "logits/real": 0.25171416997909546, "logps/generated": -671.6376342773438, "logps/real": -500.7383728027344, "loss": 0.1401, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.331417083740234, "rewards/margins": 15.790310859680176, "rewards/real": -6.541104793548584, "step": 1890 }, { "epoch": 0.61, "learning_rate": 4.4251368721732916e-07, "logits/generated": 1.3813832998275757, "logits/real": 0.3438590466976166, "logps/generated": -642.7565307617188, "logps/real": -475.8926696777344, "loss": 0.1115, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -21.601709365844727, "rewards/margins": 15.173812866210938, "rewards/real": -6.427896022796631, "step": 1900 }, { "epoch": 0.61, "learning_rate": 4.4191859081171146e-07, "logits/generated": 1.229118824005127, "logits/real": 0.2723376452922821, "logps/generated": -606.1504516601562, "logps/real": -470.62469482421875, "loss": 0.1588, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -21.483226776123047, "rewards/margins": 14.734949111938477, "rewards/real": -6.748278617858887, "step": 1910 }, { "epoch": 0.62, "learning_rate": 4.413234944060938e-07, "logits/generated": 1.0301889181137085, "logits/real": 0.0052675218321383, "logps/generated": -601.0791015625, "logps/real": -477.5992736816406, "loss": 0.122, "rewards/accuracies": 0.949999988079071, "rewards/generated": -20.35313606262207, "rewards/margins": 13.284744262695312, "rewards/real": -7.068390846252441, "step": 1920 }, { "epoch": 0.62, "learning_rate": 4.4072839800047605e-07, "logits/generated": 1.5382087230682373, "logits/real": 0.24535174667835236, "logps/generated": -671.9805908203125, "logps/real": -477.30035400390625, "loss": 0.0925, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.372716903686523, "rewards/margins": 15.39643669128418, "rewards/real": -9.97628116607666, "step": 1930 }, { "epoch": 0.62, "learning_rate": 4.4013330159485834e-07, "logits/generated": 1.2175848484039307, "logits/real": -0.10958399623632431, "logps/generated": -669.2454833984375, "logps/real": -497.4383239746094, "loss": 0.0843, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -24.28799057006836, "rewards/margins": 16.57255744934082, "rewards/real": -7.715432167053223, "step": 1940 }, { "epoch": 0.63, "learning_rate": 4.3953820518924064e-07, "logits/generated": 0.7717699408531189, "logits/real": -0.5841304659843445, "logps/generated": -586.4393310546875, "logps/real": -471.82708740234375, "loss": 0.1555, "rewards/accuracies": 0.9375, "rewards/generated": -18.601816177368164, "rewards/margins": 12.578469276428223, "rewards/real": -6.023347854614258, "step": 1950 }, { "epoch": 0.63, "learning_rate": 4.3894310878362293e-07, "logits/generated": 0.6206039190292358, "logits/real": -0.5187448859214783, "logps/generated": -632.9306030273438, "logps/real": -451.200439453125, "loss": 0.1467, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -20.35729217529297, "rewards/margins": 14.714719772338867, "rewards/real": -5.642572402954102, "step": 1960 }, { "epoch": 0.63, "learning_rate": 4.3834801237800523e-07, "logits/generated": 0.4496919512748718, "logits/real": -0.2513660788536072, "logps/generated": -687.8447265625, "logps/real": -460.92547607421875, "loss": 0.0852, "rewards/accuracies": 0.949999988079071, "rewards/generated": -21.937829971313477, "rewards/margins": 14.947624206542969, "rewards/real": -6.990207672119141, "step": 1970 }, { "epoch": 0.64, "learning_rate": 4.377529159723875e-07, "logits/generated": 0.7417780160903931, "logits/real": -0.1339636892080307, "logps/generated": -679.2607421875, "logps/real": -420.50555419921875, "loss": 0.1334, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -23.68735694885254, "rewards/margins": 17.246950149536133, "rewards/real": -6.440406799316406, "step": 1980 }, { "epoch": 0.64, "learning_rate": 4.3715781956676976e-07, "logits/generated": 0.8819293975830078, "logits/real": -0.08830475807189941, "logps/generated": -707.3695068359375, "logps/real": -449.54498291015625, "loss": 0.1236, "rewards/accuracies": 0.949999988079071, "rewards/generated": -23.74288558959961, "rewards/margins": 16.584501266479492, "rewards/real": -7.158383369445801, "step": 1990 }, { "epoch": 0.64, "learning_rate": 4.365627231611521e-07, "logits/generated": 0.7114775776863098, "logits/real": -0.19845573604106903, "logps/generated": -626.62451171875, "logps/real": -417.2754821777344, "loss": 0.1656, "rewards/accuracies": 0.925000011920929, "rewards/generated": -22.421459197998047, "rewards/margins": 15.469587326049805, "rewards/real": -6.951870918273926, "step": 2000 }, { "epoch": 0.65, "learning_rate": 4.359676267555344e-07, "logits/generated": 1.2350739240646362, "logits/real": 0.25051772594451904, "logps/generated": -605.4368286132812, "logps/real": -475.13739013671875, "loss": 0.2356, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -20.76801300048828, "rewards/margins": 13.880704879760742, "rewards/real": -6.887307167053223, "step": 2010 }, { "epoch": 0.65, "learning_rate": 4.3537253034991665e-07, "logits/generated": 0.5416502952575684, "logits/real": -0.3646186888217926, "logps/generated": -604.8953857421875, "logps/real": -451.2520446777344, "loss": 0.165, "rewards/accuracies": 0.949999988079071, "rewards/generated": -18.870716094970703, "rewards/margins": 14.286008834838867, "rewards/real": -4.5847039222717285, "step": 2020 }, { "epoch": 0.65, "learning_rate": 4.3477743394429894e-07, "logits/generated": 1.174548864364624, "logits/real": -0.09084296226501465, "logps/generated": -590.6651000976562, "logps/real": -455.6598205566406, "loss": 0.0766, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -20.71969985961914, "rewards/margins": 14.48126220703125, "rewards/real": -6.238439559936523, "step": 2030 }, { "epoch": 0.66, "learning_rate": 4.3418233753868124e-07, "logits/generated": 1.0709384679794312, "logits/real": 0.21827688813209534, "logps/generated": -600.7669677734375, "logps/real": -432.707275390625, "loss": 0.0868, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.194637298583984, "rewards/margins": 15.0757417678833, "rewards/real": -5.118895053863525, "step": 2040 }, { "epoch": 0.66, "learning_rate": 4.3358724113306353e-07, "logits/generated": 1.1439318656921387, "logits/real": 0.04696197807788849, "logps/generated": -676.9769287109375, "logps/real": -433.456298828125, "loss": 0.0928, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -22.072307586669922, "rewards/margins": 15.534550666809082, "rewards/real": -6.537755012512207, "step": 2050 }, { "epoch": 0.66, "learning_rate": 4.3299214472744583e-07, "logits/generated": 0.7251248359680176, "logits/real": -0.062470000237226486, "logps/generated": -593.2071533203125, "logps/real": -442.0604553222656, "loss": 0.1187, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -18.837377548217773, "rewards/margins": 14.742944717407227, "rewards/real": -4.094433784484863, "step": 2060 }, { "epoch": 0.67, "learning_rate": 4.323970483218281e-07, "logits/generated": 1.1000020503997803, "logits/real": 0.16617831587791443, "logps/generated": -652.6090698242188, "logps/real": -470.9197692871094, "loss": 0.1089, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.765390396118164, "rewards/margins": 14.730812072753906, "rewards/real": -7.034579277038574, "step": 2070 }, { "epoch": 0.67, "learning_rate": 4.3180195191621036e-07, "logits/generated": 1.0825153589248657, "logits/real": 0.27547433972358704, "logps/generated": -707.243896484375, "logps/real": -488.03057861328125, "loss": 0.1086, "rewards/accuracies": 0.9375, "rewards/generated": -24.917938232421875, "rewards/margins": 16.264972686767578, "rewards/real": -8.652966499328613, "step": 2080 }, { "epoch": 0.67, "learning_rate": 4.312068555105927e-07, "logits/generated": 0.9982080459594727, "logits/real": -0.2850358486175537, "logps/generated": -632.2254638671875, "logps/real": -438.5880432128906, "loss": 0.1378, "rewards/accuracies": 0.9375, "rewards/generated": -20.80510902404785, "rewards/margins": 15.820623397827148, "rewards/real": -4.984485149383545, "step": 2090 }, { "epoch": 0.67, "learning_rate": 4.30611759104975e-07, "logits/generated": 0.8128962516784668, "logits/real": -0.29134249687194824, "logps/generated": -603.084716796875, "logps/real": -433.2664489746094, "loss": 0.1467, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.682422637939453, "rewards/margins": 16.017141342163086, "rewards/real": -3.66528058052063, "step": 2100 }, { "epoch": 0.68, "learning_rate": 4.3001666269935725e-07, "logits/generated": 0.9158611297607422, "logits/real": -0.016733283177018166, "logps/generated": -655.6238403320312, "logps/real": -451.81884765625, "loss": 0.0953, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -21.88642120361328, "rewards/margins": 16.277923583984375, "rewards/real": -5.608496189117432, "step": 2110 }, { "epoch": 0.68, "learning_rate": 4.2942156629373954e-07, "logits/generated": 1.1155306100845337, "logits/real": 0.05211225897073746, "logps/generated": -685.4801635742188, "logps/real": -491.64727783203125, "loss": 0.0956, "rewards/accuracies": 0.9375, "rewards/generated": -22.24175453186035, "rewards/margins": 16.263568878173828, "rewards/real": -5.978183746337891, "step": 2120 }, { "epoch": 0.68, "learning_rate": 4.288264698881219e-07, "logits/generated": 1.1729493141174316, "logits/real": 0.21658125519752502, "logps/generated": -662.4814453125, "logps/real": -441.9817810058594, "loss": 0.1995, "rewards/accuracies": 0.9375, "rewards/generated": -20.412508010864258, "rewards/margins": 16.586462020874023, "rewards/real": -3.826045513153076, "step": 2130 }, { "epoch": 0.69, "learning_rate": 4.2823137348250413e-07, "logits/generated": 0.9209669232368469, "logits/real": 0.2127465009689331, "logps/generated": -780.4097290039062, "logps/real": -442.49993896484375, "loss": 0.0645, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -27.382080078125, "rewards/margins": 21.51984405517578, "rewards/real": -5.862236499786377, "step": 2140 }, { "epoch": 0.69, "learning_rate": 4.2763627707688643e-07, "logits/generated": 1.5670409202575684, "logits/real": 0.5017860531806946, "logps/generated": -602.9078369140625, "logps/real": -468.22113037109375, "loss": 0.0691, "rewards/accuracies": 0.949999988079071, "rewards/generated": -21.46792984008789, "rewards/margins": 15.233144760131836, "rewards/real": -6.2347822189331055, "step": 2150 }, { "epoch": 0.69, "learning_rate": 4.270411806712687e-07, "logits/generated": 1.0000498294830322, "logits/real": 0.13837209343910217, "logps/generated": -686.1837158203125, "logps/real": -446.4208984375, "loss": 0.1984, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -22.511646270751953, "rewards/margins": 17.19004249572754, "rewards/real": -5.321603775024414, "step": 2160 }, { "epoch": 0.7, "learning_rate": 4.26446084265651e-07, "logits/generated": 1.5698037147521973, "logits/real": 0.6897302865982056, "logps/generated": -677.3275146484375, "logps/real": -452.85455322265625, "loss": 0.1499, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -25.572689056396484, "rewards/margins": 17.939279556274414, "rewards/real": -7.633410453796387, "step": 2170 }, { "epoch": 0.7, "learning_rate": 4.258509878600333e-07, "logits/generated": 1.6071172952651978, "logits/real": 1.1992182731628418, "logps/generated": -628.3941650390625, "logps/real": -456.18408203125, "loss": 0.1377, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.96967315673828, "rewards/margins": 15.923495292663574, "rewards/real": -8.046175956726074, "step": 2180 }, { "epoch": 0.7, "learning_rate": 4.252558914544156e-07, "logits/generated": 1.6417099237442017, "logits/real": 0.7819353342056274, "logps/generated": -656.7808837890625, "logps/real": -400.19305419921875, "loss": 0.1321, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -23.775333404541016, "rewards/margins": 18.094524383544922, "rewards/real": -5.680811405181885, "step": 2190 }, { "epoch": 0.71, "learning_rate": 4.2466079504879785e-07, "logits/generated": 1.8996002674102783, "logits/real": 0.6976447105407715, "logps/generated": -670.1383056640625, "logps/real": -436.518310546875, "loss": 0.1094, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -24.21951675415039, "rewards/margins": 17.48426055908203, "rewards/real": -6.73525857925415, "step": 2200 }, { "epoch": 0.71, "learning_rate": 4.240656986431802e-07, "logits/generated": 1.0037071704864502, "logits/real": 0.15993377566337585, "logps/generated": -693.8859252929688, "logps/real": -521.7453002929688, "loss": 0.1379, "rewards/accuracies": 0.949999988079071, "rewards/generated": -21.03115463256836, "rewards/margins": 16.182323455810547, "rewards/real": -4.84883451461792, "step": 2210 }, { "epoch": 0.71, "learning_rate": 4.234706022375625e-07, "logits/generated": 1.0976048707962036, "logits/real": -0.4111405313014984, "logps/generated": -582.8529663085938, "logps/real": -447.2684020996094, "loss": 0.0853, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -20.977855682373047, "rewards/margins": 15.011520385742188, "rewards/real": -5.96633768081665, "step": 2220 }, { "epoch": 0.72, "learning_rate": 4.2287550583194473e-07, "logits/generated": 1.166113257408142, "logits/real": 0.2641443610191345, "logps/generated": -606.8047485351562, "logps/real": -480.0673828125, "loss": 0.103, "rewards/accuracies": 0.949999988079071, "rewards/generated": -21.531431198120117, "rewards/margins": 15.159482955932617, "rewards/real": -6.371949195861816, "step": 2230 }, { "epoch": 0.72, "learning_rate": 4.2228040942632703e-07, "logits/generated": 1.039516806602478, "logits/real": -0.32285696268081665, "logps/generated": -642.9342041015625, "logps/real": -473.72991943359375, "loss": 0.1508, "rewards/accuracies": 0.9375, "rewards/generated": -22.065031051635742, "rewards/margins": 15.44611644744873, "rewards/real": -6.6189165115356445, "step": 2240 }, { "epoch": 0.72, "learning_rate": 4.216853130207093e-07, "logits/generated": 1.360559105873108, "logits/real": 0.23350267112255096, "logps/generated": -660.4410400390625, "logps/real": -475.9117736816406, "loss": 0.0651, "rewards/accuracies": 1.0, "rewards/generated": -25.624866485595703, "rewards/margins": 18.860502243041992, "rewards/real": -6.76436710357666, "step": 2250 }, { "epoch": 0.73, "learning_rate": 4.210902166150916e-07, "logits/generated": 1.6995837688446045, "logits/real": 0.22159907221794128, "logps/generated": -708.10693359375, "logps/real": -457.06689453125, "loss": 0.0928, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -27.035980224609375, "rewards/margins": 18.932140350341797, "rewards/real": -8.103840827941895, "step": 2260 }, { "epoch": 0.73, "learning_rate": 4.204951202094739e-07, "logits/generated": 1.3365023136138916, "logits/real": -0.08979900181293488, "logps/generated": -755.7864990234375, "logps/real": -482.84942626953125, "loss": 0.1074, "rewards/accuracies": 1.0, "rewards/generated": -27.070831298828125, "rewards/margins": 22.39603042602539, "rewards/real": -4.674804210662842, "step": 2270 }, { "epoch": 0.73, "learning_rate": 4.199000238038562e-07, "logits/generated": 1.3093631267547607, "logits/real": 0.5447927713394165, "logps/generated": -597.0226440429688, "logps/real": -470.2562561035156, "loss": 0.1432, "rewards/accuracies": 0.949999988079071, "rewards/generated": -20.414518356323242, "rewards/margins": 14.831563949584961, "rewards/real": -5.582953453063965, "step": 2280 }, { "epoch": 0.74, "learning_rate": 4.1930492739823845e-07, "logits/generated": 1.2069238424301147, "logits/real": -0.04555271938443184, "logps/generated": -747.2056274414062, "logps/real": -477.55078125, "loss": 0.1163, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -25.814773559570312, "rewards/margins": 19.156614303588867, "rewards/real": -6.658161163330078, "step": 2290 }, { "epoch": 0.74, "learning_rate": 4.187098309926208e-07, "logits/generated": 1.1571060419082642, "logits/real": 0.37931182980537415, "logps/generated": -585.6158447265625, "logps/real": -449.7051696777344, "loss": 0.2165, "rewards/accuracies": 0.925000011920929, "rewards/generated": -19.866413116455078, "rewards/margins": 14.944169998168945, "rewards/real": -4.9222412109375, "step": 2300 }, { "epoch": 0.74, "learning_rate": 4.181147345870031e-07, "logits/generated": 1.3476543426513672, "logits/real": -0.09041701257228851, "logps/generated": -700.3237915039062, "logps/real": -479.28741455078125, "loss": 0.1239, "rewards/accuracies": 0.9375, "rewards/generated": -24.448787689208984, "rewards/margins": 17.267478942871094, "rewards/real": -7.181310176849365, "step": 2310 }, { "epoch": 0.75, "learning_rate": 4.1751963818138534e-07, "logits/generated": 1.463550329208374, "logits/real": 0.2527451515197754, "logps/generated": -656.2312622070312, "logps/real": -462.66796875, "loss": 0.0598, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.757957458496094, "rewards/margins": 18.187570571899414, "rewards/real": -5.570387840270996, "step": 2320 }, { "epoch": 0.75, "learning_rate": 4.1692454177576763e-07, "logits/generated": 1.7917886972427368, "logits/real": 0.3991376757621765, "logps/generated": -679.2438354492188, "logps/real": -466.888671875, "loss": 0.0622, "rewards/accuracies": 0.949999988079071, "rewards/generated": -24.200347900390625, "rewards/margins": 17.97238540649414, "rewards/real": -6.227959632873535, "step": 2330 }, { "epoch": 0.75, "learning_rate": 4.1632944537015e-07, "logits/generated": 1.4638440608978271, "logits/real": 0.17642351984977722, "logps/generated": -636.1854858398438, "logps/real": -497.65985107421875, "loss": 0.1428, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -22.347293853759766, "rewards/margins": 16.1359806060791, "rewards/real": -6.211312294006348, "step": 2340 }, { "epoch": 0.76, "learning_rate": 4.157343489645322e-07, "logits/generated": 0.5574540495872498, "logits/real": -0.2585034966468811, "logps/generated": -602.3259887695312, "logps/real": -387.1542663574219, "loss": 0.1159, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -17.966516494750977, "rewards/margins": 14.315078735351562, "rewards/real": -3.651435375213623, "step": 2350 }, { "epoch": 0.76, "learning_rate": 4.151392525589145e-07, "logits/generated": 1.1736016273498535, "logits/real": -0.27645593881607056, "logps/generated": -541.8629150390625, "logps/real": -402.4732971191406, "loss": 0.0831, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -19.846296310424805, "rewards/margins": 16.3344783782959, "rewards/real": -3.5118186473846436, "step": 2360 }, { "epoch": 0.76, "learning_rate": 4.145441561532968e-07, "logits/generated": 1.1403225660324097, "logits/real": 0.1481081247329712, "logps/generated": -624.2811889648438, "logps/real": -453.93096923828125, "loss": 0.1542, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -20.442745208740234, "rewards/margins": 15.996304512023926, "rewards/real": -4.446438789367676, "step": 2370 }, { "epoch": 0.76, "learning_rate": 4.139490597476791e-07, "logits/generated": 1.2319636344909668, "logits/real": 0.09550313651561737, "logps/generated": -662.0890502929688, "logps/real": -473.3858337402344, "loss": 0.1272, "rewards/accuracies": 0.9375, "rewards/generated": -21.983417510986328, "rewards/margins": 16.918115615844727, "rewards/real": -5.065304756164551, "step": 2380 }, { "epoch": 0.77, "learning_rate": 4.133539633420614e-07, "logits/generated": 1.037402629852295, "logits/real": -0.269021213054657, "logps/generated": -610.9610595703125, "logps/real": -393.1529541015625, "loss": 0.1488, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -21.803951263427734, "rewards/margins": 16.398584365844727, "rewards/real": -5.405367374420166, "step": 2390 }, { "epoch": 0.77, "learning_rate": 4.127588669364437e-07, "logits/generated": 1.5463179349899292, "logits/real": 0.7788742780685425, "logps/generated": -598.1651611328125, "logps/real": -419.4200744628906, "loss": 0.0468, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -21.083148956298828, "rewards/margins": 17.17831802368164, "rewards/real": -3.904832124710083, "step": 2400 }, { "epoch": 0.77, "learning_rate": 4.1216377053082594e-07, "logits/generated": 1.1087303161621094, "logits/real": 0.15012915432453156, "logps/generated": -679.88330078125, "logps/real": -365.9043884277344, "loss": 0.212, "rewards/accuracies": 0.9375, "rewards/generated": -24.497900009155273, "rewards/margins": 18.293258666992188, "rewards/real": -6.204642295837402, "step": 2410 }, { "epoch": 0.78, "learning_rate": 4.115686741252083e-07, "logits/generated": 1.7481863498687744, "logits/real": 0.7498850226402283, "logps/generated": -659.528076171875, "logps/real": -405.57037353515625, "loss": 0.0779, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -22.178714752197266, "rewards/margins": 16.73876190185547, "rewards/real": -5.4399542808532715, "step": 2420 }, { "epoch": 0.78, "learning_rate": 4.109735777195906e-07, "logits/generated": 1.3580553531646729, "logits/real": 0.2673123776912689, "logps/generated": -603.6868896484375, "logps/real": -419.85467529296875, "loss": 0.1142, "rewards/accuracies": 0.949999988079071, "rewards/generated": -20.102436065673828, "rewards/margins": 16.28812026977539, "rewards/real": -3.8143131732940674, "step": 2430 }, { "epoch": 0.78, "learning_rate": 4.103784813139728e-07, "logits/generated": 1.659716248512268, "logits/real": 0.6744736433029175, "logps/generated": -665.5172729492188, "logps/real": -424.14501953125, "loss": 0.1547, "rewards/accuracies": 0.9375, "rewards/generated": -20.445209503173828, "rewards/margins": 15.716888427734375, "rewards/real": -4.728322505950928, "step": 2440 }, { "epoch": 0.79, "learning_rate": 4.097833849083551e-07, "logits/generated": 2.2902350425720215, "logits/real": 1.197037935256958, "logps/generated": -655.159423828125, "logps/real": -413.08367919921875, "loss": 0.2203, "rewards/accuracies": 0.925000011920929, "rewards/generated": -23.0451602935791, "rewards/margins": 16.372943878173828, "rewards/real": -6.67221736907959, "step": 2450 }, { "epoch": 0.79, "learning_rate": 4.091882885027374e-07, "logits/generated": 1.7905441522598267, "logits/real": 1.0759319067001343, "logps/generated": -657.9566040039062, "logps/real": -475.89178466796875, "loss": 0.1436, "rewards/accuracies": 0.949999988079071, "rewards/generated": -23.567241668701172, "rewards/margins": 16.844799041748047, "rewards/real": -6.722445487976074, "step": 2460 }, { "epoch": 0.79, "learning_rate": 4.0859319209711976e-07, "logits/generated": 1.5683486461639404, "logits/real": 0.9022798538208008, "logps/generated": -698.0650634765625, "logps/real": -500.62091064453125, "loss": 0.1446, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.320369720458984, "rewards/margins": 16.500619888305664, "rewards/real": -8.819746017456055, "step": 2470 }, { "epoch": 0.8, "learning_rate": 4.07998095691502e-07, "logits/generated": 1.8525025844573975, "logits/real": 0.8823596835136414, "logps/generated": -689.376220703125, "logps/real": -488.66802978515625, "loss": 0.0841, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -22.453453063964844, "rewards/margins": 15.652555465698242, "rewards/real": -6.800897121429443, "step": 2480 }, { "epoch": 0.8, "learning_rate": 4.074029992858843e-07, "logits/generated": 1.4011659622192383, "logits/real": 0.8053063154220581, "logps/generated": -657.358642578125, "logps/real": -424.69537353515625, "loss": 0.1434, "rewards/accuracies": 0.949999988079071, "rewards/generated": -21.598224639892578, "rewards/margins": 13.740839958190918, "rewards/real": -7.857385158538818, "step": 2490 }, { "epoch": 0.8, "learning_rate": 4.0680790288026654e-07, "logits/generated": 1.6497198343276978, "logits/real": 1.0142757892608643, "logps/generated": -698.167236328125, "logps/real": -498.84912109375, "loss": 0.0661, "rewards/accuracies": 0.949999988079071, "rewards/generated": -25.35224723815918, "rewards/margins": 15.73254680633545, "rewards/real": -9.619702339172363, "step": 2500 }, { "epoch": 0.81, "learning_rate": 4.062128064746489e-07, "logits/generated": 2.2319440841674805, "logits/real": 1.0847914218902588, "logps/generated": -697.7644653320312, "logps/real": -507.31414794921875, "loss": 0.0825, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.45355796813965, "rewards/margins": 18.98246955871582, "rewards/real": -9.471087455749512, "step": 2510 }, { "epoch": 0.81, "learning_rate": 4.056177100690312e-07, "logits/generated": 1.697690725326538, "logits/real": 0.9456738233566284, "logps/generated": -719.6398315429688, "logps/real": -487.04144287109375, "loss": 0.2062, "rewards/accuracies": 0.949999988079071, "rewards/generated": -24.45000648498535, "rewards/margins": 16.116748809814453, "rewards/real": -8.333260536193848, "step": 2520 }, { "epoch": 0.81, "learning_rate": 4.050226136634135e-07, "logits/generated": 1.739855408668518, "logits/real": 0.7909706234931946, "logps/generated": -791.3806762695312, "logps/real": -523.274658203125, "loss": 0.0609, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -25.29513168334961, "rewards/margins": 17.597150802612305, "rewards/real": -7.6979780197143555, "step": 2530 }, { "epoch": 0.82, "learning_rate": 4.044275172577957e-07, "logits/generated": 1.6763756275177002, "logits/real": 0.8947393298149109, "logps/generated": -647.4367065429688, "logps/real": -394.94757080078125, "loss": 0.1395, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -23.249563217163086, "rewards/margins": 15.033239364624023, "rewards/real": -8.216323852539062, "step": 2540 }, { "epoch": 0.82, "learning_rate": 4.0383242085217806e-07, "logits/generated": 1.2965757846832275, "logits/real": 0.4431212544441223, "logps/generated": -663.156982421875, "logps/real": -453.704345703125, "loss": 0.1922, "rewards/accuracies": 0.9375, "rewards/generated": -24.153026580810547, "rewards/margins": 16.11758804321289, "rewards/real": -8.035436630249023, "step": 2550 }, { "epoch": 0.82, "learning_rate": 4.0323732444656036e-07, "logits/generated": 1.4699618816375732, "logits/real": 0.9690890312194824, "logps/generated": -620.1663208007812, "logps/real": -442.7039489746094, "loss": 0.0873, "rewards/accuracies": 0.9375, "rewards/generated": -23.336048126220703, "rewards/margins": 13.991986274719238, "rewards/real": -9.344061851501465, "step": 2560 }, { "epoch": 0.83, "learning_rate": 4.026422280409426e-07, "logits/generated": 1.4466969966888428, "logits/real": 0.436715304851532, "logps/generated": -669.3927001953125, "logps/real": -469.9801330566406, "loss": 0.1589, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -25.13608169555664, "rewards/margins": 18.560630798339844, "rewards/real": -6.5754499435424805, "step": 2570 }, { "epoch": 0.83, "learning_rate": 4.020471316353249e-07, "logits/generated": 1.6500829458236694, "logits/real": 0.4353089928627014, "logps/generated": -612.4130859375, "logps/real": -407.9613342285156, "loss": 0.1333, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -22.937171936035156, "rewards/margins": 17.223447799682617, "rewards/real": -5.713723182678223, "step": 2580 }, { "epoch": 0.83, "learning_rate": 4.0145203522970724e-07, "logits/generated": 1.7563555240631104, "logits/real": 0.760065495967865, "logps/generated": -632.6456298828125, "logps/real": -454.03369140625, "loss": 0.0715, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -23.2701473236084, "rewards/margins": 17.838836669921875, "rewards/real": -5.431310653686523, "step": 2590 }, { "epoch": 0.84, "learning_rate": 4.008569388240895e-07, "logits/generated": 2.1055586338043213, "logits/real": 0.852258026599884, "logps/generated": -714.2008056640625, "logps/real": -456.05340576171875, "loss": 0.1267, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -28.52945327758789, "rewards/margins": 21.305814743041992, "rewards/real": -7.223639011383057, "step": 2600 }, { "epoch": 0.84, "learning_rate": 4.002618424184718e-07, "logits/generated": 2.0347981452941895, "logits/real": 0.5782783627510071, "logps/generated": -591.4415283203125, "logps/real": -437.5423889160156, "loss": 0.1038, "rewards/accuracies": 0.9375, "rewards/generated": -23.187183380126953, "rewards/margins": 16.621183395385742, "rewards/real": -6.566000461578369, "step": 2610 }, { "epoch": 0.84, "learning_rate": 3.996667460128541e-07, "logits/generated": 1.9191009998321533, "logits/real": 0.9071873426437378, "logps/generated": -693.7344360351562, "logps/real": -458.44244384765625, "loss": 0.1234, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.760778427124023, "rewards/margins": 18.74248695373535, "rewards/real": -7.0182905197143555, "step": 2620 }, { "epoch": 0.85, "learning_rate": 3.990716496072363e-07, "logits/generated": 1.681870698928833, "logits/real": 1.0849336385726929, "logps/generated": -651.5535278320312, "logps/real": -476.86138916015625, "loss": 0.1849, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -20.717464447021484, "rewards/margins": 15.76073932647705, "rewards/real": -4.956723213195801, "step": 2630 }, { "epoch": 0.85, "learning_rate": 3.9847655320161867e-07, "logits/generated": 1.2117068767547607, "logits/real": 0.5563064813613892, "logps/generated": -583.0098876953125, "logps/real": -424.4280700683594, "loss": 0.1632, "rewards/accuracies": 0.9375, "rewards/generated": -19.803607940673828, "rewards/margins": 15.817873001098633, "rewards/real": -3.9857354164123535, "step": 2640 }, { "epoch": 0.85, "learning_rate": 3.9788145679600096e-07, "logits/generated": 1.6774489879608154, "logits/real": 0.3074805438518524, "logps/generated": -620.2223510742188, "logps/real": -454.95379638671875, "loss": 0.1221, "rewards/accuracies": 0.9375, "rewards/generated": -20.706676483154297, "rewards/margins": 14.849881172180176, "rewards/real": -5.856795787811279, "step": 2650 }, { "epoch": 0.85, "learning_rate": 3.972863603903832e-07, "logits/generated": 2.0141940116882324, "logits/real": 0.47630470991134644, "logps/generated": -658.5839233398438, "logps/real": -417.65771484375, "loss": 0.107, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -21.86336326599121, "rewards/margins": 16.918996810913086, "rewards/real": -4.944365501403809, "step": 2660 }, { "epoch": 0.86, "learning_rate": 3.966912639847655e-07, "logits/generated": 1.5835866928100586, "logits/real": 0.5641391277313232, "logps/generated": -650.1529541015625, "logps/real": -478.546142578125, "loss": 0.1263, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -21.58686637878418, "rewards/margins": 16.488636016845703, "rewards/real": -5.098231315612793, "step": 2670 }, { "epoch": 0.86, "learning_rate": 3.9609616757914784e-07, "logits/generated": 2.462594985961914, "logits/real": 1.5123826265335083, "logps/generated": -688.5294189453125, "logps/real": -493.29925537109375, "loss": 0.0993, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.49506187438965, "rewards/margins": 15.3764009475708, "rewards/real": -7.118662357330322, "step": 2680 }, { "epoch": 0.86, "learning_rate": 3.955010711735301e-07, "logits/generated": 2.1065256595611572, "logits/real": 0.8846191167831421, "logps/generated": -685.9591674804688, "logps/real": -508.6744079589844, "loss": 0.1373, "rewards/accuracies": 0.949999988079071, "rewards/generated": -25.060028076171875, "rewards/margins": 17.572795867919922, "rewards/real": -7.487231254577637, "step": 2690 }, { "epoch": 0.87, "learning_rate": 3.949059747679124e-07, "logits/generated": 1.799756407737732, "logits/real": 0.8131219148635864, "logps/generated": -610.969482421875, "logps/real": -436.1451721191406, "loss": 0.1217, "rewards/accuracies": 0.9375, "rewards/generated": -20.828516006469727, "rewards/margins": 15.434659004211426, "rewards/real": -5.393857002258301, "step": 2700 }, { "epoch": 0.87, "learning_rate": 3.943108783622947e-07, "logits/generated": 1.7484347820281982, "logits/real": 0.4647720456123352, "logps/generated": -609.5855712890625, "logps/real": -463.22186279296875, "loss": 0.1691, "rewards/accuracies": 0.925000011920929, "rewards/generated": -21.302770614624023, "rewards/margins": 16.369754791259766, "rewards/real": -4.933016777038574, "step": 2710 }, { "epoch": 0.87, "learning_rate": 3.9371578195667697e-07, "logits/generated": 1.705211877822876, "logits/real": 0.35285013914108276, "logps/generated": -622.8514404296875, "logps/real": -449.4032287597656, "loss": 0.1544, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -23.880168914794922, "rewards/margins": 16.88746452331543, "rewards/real": -6.992703914642334, "step": 2720 }, { "epoch": 0.88, "learning_rate": 3.9312068555105927e-07, "logits/generated": 1.6071208715438843, "logits/real": 0.5557805299758911, "logps/generated": -608.6546630859375, "logps/real": -424.5992126464844, "loss": 0.0895, "rewards/accuracies": 0.925000011920929, "rewards/generated": -21.85446548461914, "rewards/margins": 14.656303405761719, "rewards/real": -7.198163032531738, "step": 2730 }, { "epoch": 0.88, "learning_rate": 3.9252558914544156e-07, "logits/generated": 1.756801962852478, "logits/real": 0.5307028293609619, "logps/generated": -774.9449462890625, "logps/real": -499.4449768066406, "loss": 0.096, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -25.977970123291016, "rewards/margins": 18.38821792602539, "rewards/real": -7.589752197265625, "step": 2740 }, { "epoch": 0.88, "learning_rate": 3.919304927398238e-07, "logits/generated": 1.8678667545318604, "logits/real": 0.4454065263271332, "logps/generated": -667.2235107421875, "logps/real": -512.75439453125, "loss": 0.0781, "rewards/accuracies": 0.9375, "rewards/generated": -23.759370803833008, "rewards/margins": 16.69193458557129, "rewards/real": -7.067436218261719, "step": 2750 }, { "epoch": 0.89, "learning_rate": 3.9133539633420615e-07, "logits/generated": 1.624516487121582, "logits/real": 0.740147590637207, "logps/generated": -687.477294921875, "logps/real": -479.264404296875, "loss": 0.1949, "rewards/accuracies": 0.949999988079071, "rewards/generated": -23.365718841552734, "rewards/margins": 15.9913969039917, "rewards/real": -7.374321937561035, "step": 2760 }, { "epoch": 0.89, "learning_rate": 3.9074029992858845e-07, "logits/generated": 1.9105228185653687, "logits/real": 0.58503258228302, "logps/generated": -703.8897094726562, "logps/real": -465.3978576660156, "loss": 0.0529, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -24.54638671875, "rewards/margins": 16.3099308013916, "rewards/real": -8.236456871032715, "step": 2770 }, { "epoch": 0.89, "learning_rate": 3.901452035229707e-07, "logits/generated": 1.9742530584335327, "logits/real": 0.9231522679328918, "logps/generated": -711.0045166015625, "logps/real": -454.5599670410156, "loss": 0.1347, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.938568115234375, "rewards/margins": 17.882389068603516, "rewards/real": -8.056180953979492, "step": 2780 }, { "epoch": 0.9, "learning_rate": 3.89550107117353e-07, "logits/generated": 1.9007511138916016, "logits/real": 0.6400322318077087, "logps/generated": -743.71142578125, "logps/real": -463.73455810546875, "loss": 0.1108, "rewards/accuracies": 0.949999988079071, "rewards/generated": -25.113950729370117, "rewards/margins": 18.267871856689453, "rewards/real": -6.846077919006348, "step": 2790 }, { "epoch": 0.9, "learning_rate": 3.8895501071173533e-07, "logits/generated": 1.8643954992294312, "logits/real": 0.8374252319335938, "logps/generated": -603.0673828125, "logps/real": -400.63763427734375, "loss": 0.1211, "rewards/accuracies": 0.949999988079071, "rewards/generated": -21.2904109954834, "rewards/margins": 15.732434272766113, "rewards/real": -5.557978630065918, "step": 2800 }, { "epoch": 0.9, "learning_rate": 3.8835991430611757e-07, "logits/generated": 2.0988786220550537, "logits/real": 0.9499675035476685, "logps/generated": -675.1707763671875, "logps/real": -496.8114318847656, "loss": 0.1602, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -22.068920135498047, "rewards/margins": 16.41037368774414, "rewards/real": -5.65854549407959, "step": 2810 }, { "epoch": 0.91, "learning_rate": 3.8776481790049987e-07, "logits/generated": 2.0510239601135254, "logits/real": 0.5466877818107605, "logps/generated": -585.0289306640625, "logps/real": -406.4467468261719, "loss": 0.1463, "rewards/accuracies": 0.949999988079071, "rewards/generated": -20.89907455444336, "rewards/margins": 14.988405227661133, "rewards/real": -5.910671710968018, "step": 2820 }, { "epoch": 0.91, "learning_rate": 3.8716972149488216e-07, "logits/generated": 1.7208560705184937, "logits/real": 0.4842369556427002, "logps/generated": -727.0315551757812, "logps/real": -478.61578369140625, "loss": 0.165, "rewards/accuracies": 0.9375, "rewards/generated": -23.6444091796875, "rewards/margins": 17.922069549560547, "rewards/real": -5.722338676452637, "step": 2830 }, { "epoch": 0.91, "learning_rate": 3.865746250892644e-07, "logits/generated": 2.126579999923706, "logits/real": 0.9012452363967896, "logps/generated": -693.7175903320312, "logps/real": -461.068603515625, "loss": 0.1526, "rewards/accuracies": 0.9375, "rewards/generated": -26.0562686920166, "rewards/margins": 18.208553314208984, "rewards/real": -7.847714900970459, "step": 2840 }, { "epoch": 0.92, "learning_rate": 3.8597952868364675e-07, "logits/generated": 1.9919145107269287, "logits/real": 0.7750564813613892, "logps/generated": -691.1611328125, "logps/real": -516.2545166015625, "loss": 0.1119, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -25.196273803710938, "rewards/margins": 18.44356918334961, "rewards/real": -6.752704620361328, "step": 2850 }, { "epoch": 0.92, "learning_rate": 3.8538443227802905e-07, "logits/generated": 2.6324565410614014, "logits/real": 1.2637501955032349, "logps/generated": -753.69384765625, "logps/real": -479.24169921875, "loss": 0.1089, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.343191146850586, "rewards/margins": 22.25613784790039, "rewards/real": -9.087053298950195, "step": 2860 }, { "epoch": 0.92, "learning_rate": 3.847893358724113e-07, "logits/generated": 2.4507477283477783, "logits/real": 1.5249906778335571, "logps/generated": -649.0636596679688, "logps/real": -424.68695068359375, "loss": 0.1912, "rewards/accuracies": 0.949999988079071, "rewards/generated": -23.07471466064453, "rewards/margins": 16.885095596313477, "rewards/real": -6.1896185874938965, "step": 2870 }, { "epoch": 0.93, "learning_rate": 3.841942394667936e-07, "logits/generated": 2.339313507080078, "logits/real": 1.0518441200256348, "logps/generated": -640.6937255859375, "logps/real": -464.7792053222656, "loss": 0.1013, "rewards/accuracies": 0.925000011920929, "rewards/generated": -20.716344833374023, "rewards/margins": 15.540962219238281, "rewards/real": -5.175383567810059, "step": 2880 }, { "epoch": 0.93, "learning_rate": 3.8359914306117593e-07, "logits/generated": 2.0106041431427, "logits/real": 0.9860283732414246, "logps/generated": -672.1895751953125, "logps/real": -474.09576416015625, "loss": 0.096, "rewards/accuracies": 0.925000011920929, "rewards/generated": -23.003826141357422, "rewards/margins": 16.14929962158203, "rewards/real": -6.854525566101074, "step": 2890 }, { "epoch": 0.93, "learning_rate": 3.8300404665555817e-07, "logits/generated": 2.1366891860961914, "logits/real": 0.7132225632667542, "logps/generated": -639.6456298828125, "logps/real": -491.59686279296875, "loss": 0.0807, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -22.65061378479004, "rewards/margins": 18.138324737548828, "rewards/real": -4.512289524078369, "step": 2900 }, { "epoch": 0.94, "learning_rate": 3.8240895024994047e-07, "logits/generated": 2.0520312786102295, "logits/real": 0.8536787033081055, "logps/generated": -642.2930908203125, "logps/real": -519.9813232421875, "loss": 0.0723, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -25.232421875, "rewards/margins": 15.981023788452148, "rewards/real": -9.251399040222168, "step": 2910 }, { "epoch": 0.94, "learning_rate": 3.8181385384432276e-07, "logits/generated": 2.5133538246154785, "logits/real": 1.2602908611297607, "logps/generated": -648.4487915039062, "logps/real": -475.26611328125, "loss": 0.1389, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.82448959350586, "rewards/margins": 17.464475631713867, "rewards/real": -7.360013484954834, "step": 2920 }, { "epoch": 0.94, "learning_rate": 3.8121875743870506e-07, "logits/generated": 2.1093783378601074, "logits/real": 1.2038170099258423, "logps/generated": -630.2153930664062, "logps/real": -448.27490234375, "loss": 0.0691, "rewards/accuracies": 0.949999988079071, "rewards/generated": -22.69088363647461, "rewards/margins": 15.852401733398438, "rewards/real": -6.838482856750488, "step": 2930 }, { "epoch": 0.94, "learning_rate": 3.8062366103308735e-07, "logits/generated": 2.2254674434661865, "logits/real": 1.6502078771591187, "logps/generated": -716.8702392578125, "logps/real": -420.49530029296875, "loss": 0.0941, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -25.63828468322754, "rewards/margins": 18.89675521850586, "rewards/real": -6.741529941558838, "step": 2940 }, { "epoch": 0.95, "learning_rate": 3.8002856462746965e-07, "logits/generated": 2.150106430053711, "logits/real": 1.3666409254074097, "logps/generated": -654.9771728515625, "logps/real": -392.6027526855469, "loss": 0.1527, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -25.518451690673828, "rewards/margins": 18.933677673339844, "rewards/real": -6.584771156311035, "step": 2950 }, { "epoch": 0.95, "learning_rate": 3.794334682218519e-07, "logits/generated": 2.322861671447754, "logits/real": 1.2566105127334595, "logps/generated": -655.5877075195312, "logps/real": -419.9671325683594, "loss": 0.0903, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -23.30948257446289, "rewards/margins": 16.545915603637695, "rewards/real": -6.763568878173828, "step": 2960 }, { "epoch": 0.95, "learning_rate": 3.7883837181623424e-07, "logits/generated": 2.129446268081665, "logits/real": 1.5759432315826416, "logps/generated": -679.4102783203125, "logps/real": -492.904541015625, "loss": 0.1525, "rewards/accuracies": 0.949999988079071, "rewards/generated": -25.48388671875, "rewards/margins": 17.235952377319336, "rewards/real": -8.247933387756348, "step": 2970 }, { "epoch": 0.96, "learning_rate": 3.7824327541061653e-07, "logits/generated": 2.421316385269165, "logits/real": 1.5830551385879517, "logps/generated": -724.2254638671875, "logps/real": -477.78729248046875, "loss": 0.099, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -27.972797393798828, "rewards/margins": 20.960155487060547, "rewards/real": -7.012637138366699, "step": 2980 }, { "epoch": 0.96, "learning_rate": 3.776481790049988e-07, "logits/generated": 2.3155086040496826, "logits/real": 1.251240611076355, "logps/generated": -633.3641357421875, "logps/real": -450.9847717285156, "loss": 0.1466, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -24.90486717224121, "rewards/margins": 17.912742614746094, "rewards/real": -6.992125511169434, "step": 2990 }, { "epoch": 0.96, "learning_rate": 3.7705308259938107e-07, "logits/generated": 2.0880470275878906, "logits/real": 1.4387776851654053, "logps/generated": -686.9122314453125, "logps/real": -469.4881286621094, "loss": 0.125, "rewards/accuracies": 0.949999988079071, "rewards/generated": -22.322071075439453, "rewards/margins": 15.308202743530273, "rewards/real": -7.013866424560547, "step": 3000 }, { "epoch": 0.97, "learning_rate": 3.764579861937634e-07, "logits/generated": 2.4441990852355957, "logits/real": 1.7256838083267212, "logps/generated": -672.1497802734375, "logps/real": -462.963134765625, "loss": 0.1122, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.562822341918945, "rewards/margins": 17.964797973632812, "rewards/real": -5.598027229309082, "step": 3010 }, { "epoch": 0.97, "learning_rate": 3.7586288978814566e-07, "logits/generated": 2.4153993129730225, "logits/real": 1.8128273487091064, "logps/generated": -659.8216552734375, "logps/real": -466.326171875, "loss": 0.1103, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -25.243799209594727, "rewards/margins": 16.617708206176758, "rewards/real": -8.626090049743652, "step": 3020 }, { "epoch": 0.97, "learning_rate": 3.7526779338252795e-07, "logits/generated": 2.2986738681793213, "logits/real": 1.7889225482940674, "logps/generated": -624.5096435546875, "logps/real": -435.96405029296875, "loss": 0.1009, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -22.05388069152832, "rewards/margins": 15.632284164428711, "rewards/real": -6.421595573425293, "step": 3030 }, { "epoch": 0.98, "learning_rate": 3.7467269697691025e-07, "logits/generated": 2.8382792472839355, "logits/real": 1.5281206369400024, "logps/generated": -619.8280029296875, "logps/real": -469.70587158203125, "loss": 0.0469, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -20.758590698242188, "rewards/margins": 15.723081588745117, "rewards/real": -5.035506248474121, "step": 3040 }, { "epoch": 0.98, "learning_rate": 3.740776005712925e-07, "logits/generated": 2.1755332946777344, "logits/real": 1.53883695602417, "logps/generated": -648.2359619140625, "logps/real": -454.14923095703125, "loss": 0.1232, "rewards/accuracies": 0.949999988079071, "rewards/generated": -23.50449562072754, "rewards/margins": 17.57127571105957, "rewards/real": -5.933220386505127, "step": 3050 }, { "epoch": 0.98, "learning_rate": 3.7348250416567484e-07, "logits/generated": 2.148017406463623, "logits/real": 1.362083077430725, "logps/generated": -643.4403076171875, "logps/real": -435.7044982910156, "loss": 0.1267, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -24.117530822753906, "rewards/margins": 17.68121910095215, "rewards/real": -6.436310768127441, "step": 3060 }, { "epoch": 0.99, "learning_rate": 3.7288740776005713e-07, "logits/generated": 2.3167834281921387, "logits/real": 1.0776903629302979, "logps/generated": -678.5569458007812, "logps/real": -431.29852294921875, "loss": 0.1282, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -23.591516494750977, "rewards/margins": 20.012746810913086, "rewards/real": -3.578770875930786, "step": 3070 }, { "epoch": 0.99, "learning_rate": 3.722923113544394e-07, "logits/generated": 2.239835023880005, "logits/real": 1.213585615158081, "logps/generated": -664.9454345703125, "logps/real": -433.1368713378906, "loss": 0.1048, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.229228973388672, "rewards/margins": 16.568553924560547, "rewards/real": -5.660672187805176, "step": 3080 }, { "epoch": 0.99, "learning_rate": 3.7169721494882167e-07, "logits/generated": 2.14680552482605, "logits/real": 1.0136090517044067, "logps/generated": -724.78759765625, "logps/real": -497.329833984375, "loss": 0.1669, "rewards/accuracies": 0.9375, "rewards/generated": -25.292936325073242, "rewards/margins": 17.165977478027344, "rewards/real": -8.126960754394531, "step": 3090 }, { "epoch": 1.0, "learning_rate": 3.71102118543204e-07, "logits/generated": 2.5293147563934326, "logits/real": 1.6967331171035767, "logps/generated": -745.9620361328125, "logps/real": -437.16705322265625, "loss": 0.0651, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.372419357299805, "rewards/margins": 19.07187271118164, "rewards/real": -6.300545692443848, "step": 3100 }, { "epoch": 1.0, "learning_rate": 3.7050702213758626e-07, "logits/generated": 2.614368200302124, "logits/real": 1.9223474264144897, "logps/generated": -634.6612548828125, "logps/real": -447.0576171875, "loss": 0.126, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -23.611736297607422, "rewards/margins": 17.95545768737793, "rewards/real": -5.65627908706665, "step": 3110 }, { "epoch": 1.0, "learning_rate": 3.6991192573196855e-07, "logits/generated": 2.772679328918457, "logits/real": 1.8856141567230225, "logps/generated": -671.4141845703125, "logps/real": -453.4501037597656, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/generated": -27.316808700561523, "rewards/margins": 21.6008243560791, "rewards/real": -5.715986251831055, "step": 3120 }, { "epoch": 1.01, "learning_rate": 3.6931682932635085e-07, "logits/generated": 2.493772506713867, "logits/real": 1.8791770935058594, "logps/generated": -755.4078369140625, "logps/real": -473.3190002441406, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/generated": -27.489822387695312, "rewards/margins": 22.146568298339844, "rewards/real": -5.34325647354126, "step": 3130 }, { "epoch": 1.01, "learning_rate": 3.6872173292073314e-07, "logits/generated": 2.66170334815979, "logits/real": 1.4719913005828857, "logps/generated": -591.2332763671875, "logps/real": -434.45458984375, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/generated": -24.629484176635742, "rewards/margins": 19.480703353881836, "rewards/real": -5.148781776428223, "step": 3140 }, { "epoch": 1.01, "learning_rate": 3.6812663651511544e-07, "logits/generated": 2.268622636795044, "logits/real": 1.183978796005249, "logps/generated": -676.4526977539062, "logps/real": -388.0956115722656, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/generated": -25.9034366607666, "rewards/margins": 22.03853416442871, "rewards/real": -3.8649017810821533, "step": 3150 }, { "epoch": 1.02, "learning_rate": 3.6753154010949773e-07, "logits/generated": 2.528545379638672, "logits/real": 1.0117290019989014, "logps/generated": -608.172607421875, "logps/real": -528.00146484375, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/generated": -24.75876808166504, "rewards/margins": 17.666471481323242, "rewards/real": -7.092293739318848, "step": 3160 }, { "epoch": 1.02, "learning_rate": 3.6693644370388e-07, "logits/generated": 1.9390583038330078, "logits/real": 0.6848304271697998, "logps/generated": -639.5184326171875, "logps/real": -430.2386169433594, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/generated": -23.728824615478516, "rewards/margins": 18.967758178710938, "rewards/real": -4.761065483093262, "step": 3170 }, { "epoch": 1.02, "learning_rate": 3.663413472982623e-07, "logits/generated": 1.9909107685089111, "logits/real": 0.8477198481559753, "logps/generated": -681.6779174804688, "logps/real": -432.35760498046875, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/generated": -25.21199607849121, "rewards/margins": 20.57056427001953, "rewards/real": -4.6414289474487305, "step": 3180 }, { "epoch": 1.03, "learning_rate": 3.657462508926446e-07, "logits/generated": 2.0864903926849365, "logits/real": 0.4242134690284729, "logps/generated": -653.9561767578125, "logps/real": -505.0721740722656, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/generated": -24.692241668701172, "rewards/margins": 19.77627182006836, "rewards/real": -4.915972709655762, "step": 3190 }, { "epoch": 1.03, "learning_rate": 3.6515115448702686e-07, "logits/generated": 2.395272731781006, "logits/real": 0.6490557789802551, "logps/generated": -693.27001953125, "logps/real": -426.8551330566406, "loss": 0.0473, "rewards/accuracies": 1.0, "rewards/generated": -27.264209747314453, "rewards/margins": 22.363313674926758, "rewards/real": -4.9008965492248535, "step": 3200 }, { "epoch": 1.03, "learning_rate": 3.6455605808140915e-07, "logits/generated": 2.3996498584747314, "logits/real": 1.0307645797729492, "logps/generated": -660.3303833007812, "logps/real": -468.6165466308594, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/generated": -24.826950073242188, "rewards/margins": 19.53725242614746, "rewards/real": -5.289698123931885, "step": 3210 }, { "epoch": 1.03, "learning_rate": 3.639609616757915e-07, "logits/generated": 2.361658811569214, "logits/real": 0.9979084730148315, "logps/generated": -642.5380249023438, "logps/real": -439.641357421875, "loss": 0.0052, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.816490173339844, "rewards/margins": 18.060041427612305, "rewards/real": -6.756446838378906, "step": 3220 }, { "epoch": 1.04, "learning_rate": 3.6336586527017374e-07, "logits/generated": 2.002927303314209, "logits/real": 0.5947871804237366, "logps/generated": -709.7286376953125, "logps/real": -493.5497131347656, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/generated": -25.199174880981445, "rewards/margins": 19.367977142333984, "rewards/real": -5.8311967849731445, "step": 3230 }, { "epoch": 1.04, "learning_rate": 3.6277076886455604e-07, "logits/generated": 2.247689962387085, "logits/real": 1.0326040983200073, "logps/generated": -707.9560546875, "logps/real": -451.2037658691406, "loss": 0.0238, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.405521392822266, "rewards/margins": 19.543106079101562, "rewards/real": -5.8624162673950195, "step": 3240 }, { "epoch": 1.04, "learning_rate": 3.6217567245893833e-07, "logits/generated": 2.341550350189209, "logits/real": 0.9611526727676392, "logps/generated": -654.49755859375, "logps/real": -457.76739501953125, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/generated": -25.633724212646484, "rewards/margins": 19.352584838867188, "rewards/real": -6.2811384201049805, "step": 3250 }, { "epoch": 1.05, "learning_rate": 3.615805760533206e-07, "logits/generated": 2.316416025161743, "logits/real": 0.9922012090682983, "logps/generated": -719.2664184570312, "logps/real": -477.29864501953125, "loss": 0.0196, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.88983726501465, "rewards/margins": 18.632572174072266, "rewards/real": -8.2572660446167, "step": 3260 }, { "epoch": 1.05, "learning_rate": 3.609854796477029e-07, "logits/generated": 2.655102014541626, "logits/real": 1.2498875856399536, "logps/generated": -683.600341796875, "logps/real": -461.31634521484375, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/generated": -27.89333724975586, "rewards/margins": 21.55805015563965, "rewards/real": -6.3352861404418945, "step": 3270 }, { "epoch": 1.05, "learning_rate": 3.603903832420852e-07, "logits/generated": 2.6055874824523926, "logits/real": 1.1949670314788818, "logps/generated": -601.434326171875, "logps/real": -422.3607482910156, "loss": 0.0189, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.317922592163086, "rewards/margins": 18.50489044189453, "rewards/real": -5.8130316734313965, "step": 3280 }, { "epoch": 1.06, "learning_rate": 3.5979528683646746e-07, "logits/generated": 2.562065601348877, "logits/real": 1.0140424966812134, "logps/generated": -672.9127197265625, "logps/real": -502.0747985839844, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -26.477609634399414, "rewards/margins": 21.053733825683594, "rewards/real": -5.423875331878662, "step": 3290 }, { "epoch": 1.06, "learning_rate": 3.5920019043084976e-07, "logits/generated": 2.934922933578491, "logits/real": 1.6339528560638428, "logps/generated": -768.0491333007812, "logps/real": -462.97039794921875, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/generated": -35.04420852661133, "rewards/margins": 25.731639862060547, "rewards/real": -9.312570571899414, "step": 3300 }, { "epoch": 1.06, "learning_rate": 3.586050940252321e-07, "logits/generated": 3.187080144882202, "logits/real": 1.8761570453643799, "logps/generated": -788.3067626953125, "logps/real": -503.75994873046875, "loss": 0.0136, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -36.4393424987793, "rewards/margins": 25.657562255859375, "rewards/real": -10.781782150268555, "step": 3310 }, { "epoch": 1.07, "learning_rate": 3.5800999761961435e-07, "logits/generated": 3.75718355178833, "logits/real": 2.8633627891540527, "logps/generated": -718.607421875, "logps/real": -508.8706970214844, "loss": 0.058, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -30.915721893310547, "rewards/margins": 20.729930877685547, "rewards/real": -10.185790061950684, "step": 3320 }, { "epoch": 1.07, "learning_rate": 3.5741490121399664e-07, "logits/generated": 3.0927834510803223, "logits/real": 2.159468412399292, "logps/generated": -786.6273803710938, "logps/real": -488.6346130371094, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/generated": -32.53669738769531, "rewards/margins": 23.137868881225586, "rewards/real": -9.398828506469727, "step": 3330 }, { "epoch": 1.07, "learning_rate": 3.5681980480837893e-07, "logits/generated": 2.881220579147339, "logits/real": 2.157914161682129, "logps/generated": -660.4367065429688, "logps/real": -429.3740234375, "loss": 0.033, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -27.188419342041016, "rewards/margins": 20.55979347229004, "rewards/real": -6.628628730773926, "step": 3340 }, { "epoch": 1.08, "learning_rate": 3.5622470840276123e-07, "logits/generated": 2.685161590576172, "logits/real": 1.643836259841919, "logps/generated": -674.2780151367188, "logps/real": -423.3504943847656, "loss": 0.0165, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -25.780574798583984, "rewards/margins": 21.787914276123047, "rewards/real": -3.992661237716675, "step": 3350 }, { "epoch": 1.08, "learning_rate": 3.556296119971435e-07, "logits/generated": 2.7607975006103516, "logits/real": 2.0212929248809814, "logps/generated": -666.9227294921875, "logps/real": -456.1646423339844, "loss": 0.0285, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -27.266469955444336, "rewards/margins": 19.726221084594727, "rewards/real": -7.540248870849609, "step": 3360 }, { "epoch": 1.08, "learning_rate": 3.550345155915258e-07, "logits/generated": 2.6057658195495605, "logits/real": 1.4647444486618042, "logps/generated": -631.7987060546875, "logps/real": -475.56329345703125, "loss": 0.0422, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.27475357055664, "rewards/margins": 18.110424041748047, "rewards/real": -5.16433048248291, "step": 3370 }, { "epoch": 1.09, "learning_rate": 3.5443941918590806e-07, "logits/generated": 2.2082176208496094, "logits/real": 1.3506563901901245, "logps/generated": -656.8385620117188, "logps/real": -444.25634765625, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/generated": -24.30401611328125, "rewards/margins": 20.055713653564453, "rewards/real": -4.248305320739746, "step": 3380 }, { "epoch": 1.09, "learning_rate": 3.538443227802904e-07, "logits/generated": 2.513526678085327, "logits/real": 1.3941971063613892, "logps/generated": -652.9690551757812, "logps/real": -418.3465881347656, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/generated": -26.163951873779297, "rewards/margins": 20.355674743652344, "rewards/real": -5.808277606964111, "step": 3390 }, { "epoch": 1.09, "learning_rate": 3.532492263746727e-07, "logits/generated": 2.749168872833252, "logits/real": 1.4466502666473389, "logps/generated": -698.4146728515625, "logps/real": -448.23773193359375, "loss": 0.0179, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.915002822875977, "rewards/margins": 21.451581954956055, "rewards/real": -6.4634199142456055, "step": 3400 }, { "epoch": 1.1, "learning_rate": 3.5265412996905495e-07, "logits/generated": 2.79091215133667, "logits/real": 1.5373364686965942, "logps/generated": -597.6466064453125, "logps/real": -442.4712829589844, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/generated": -23.42270278930664, "rewards/margins": 18.19327735900879, "rewards/real": -5.229424953460693, "step": 3410 }, { "epoch": 1.1, "learning_rate": 3.5205903356343724e-07, "logits/generated": 2.8285651206970215, "logits/real": 1.7332261800765991, "logps/generated": -771.4722900390625, "logps/real": -477.1019592285156, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/generated": -28.73077964782715, "rewards/margins": 22.746891021728516, "rewards/real": -5.983891010284424, "step": 3420 }, { "epoch": 1.1, "learning_rate": 3.5146393715781954e-07, "logits/generated": 1.9864263534545898, "logits/real": 1.1889464855194092, "logps/generated": -594.979736328125, "logps/real": -388.6983642578125, "loss": 0.0201, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.040409088134766, "rewards/margins": 18.76906394958496, "rewards/real": -5.271348476409912, "step": 3430 }, { "epoch": 1.11, "learning_rate": 3.5086884075220183e-07, "logits/generated": 2.6099677085876465, "logits/real": 1.2744433879852295, "logps/generated": -704.1134033203125, "logps/real": -464.90576171875, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/generated": -31.00588035583496, "rewards/margins": 24.120746612548828, "rewards/real": -6.885133266448975, "step": 3440 }, { "epoch": 1.11, "learning_rate": 3.502737443465841e-07, "logits/generated": 2.6332716941833496, "logits/real": 2.0162413120269775, "logps/generated": -715.8412475585938, "logps/real": -456.40179443359375, "loss": 0.0194, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.613704681396484, "rewards/margins": 21.34140968322754, "rewards/real": -6.2722978591918945, "step": 3450 }, { "epoch": 1.11, "learning_rate": 3.496786479409664e-07, "logits/generated": 2.4073052406311035, "logits/real": 1.6214309930801392, "logps/generated": -750.5589599609375, "logps/real": -451.9835510253906, "loss": 0.0517, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -28.5625057220459, "rewards/margins": 22.398935317993164, "rewards/real": -6.163569450378418, "step": 3460 }, { "epoch": 1.12, "learning_rate": 3.4908355153534866e-07, "logits/generated": 2.761322498321533, "logits/real": 1.542069911956787, "logps/generated": -588.9007568359375, "logps/real": -481.5369567871094, "loss": 0.0436, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.16315269470215, "rewards/margins": 18.87639808654785, "rewards/real": -5.2867560386657715, "step": 3470 }, { "epoch": 1.12, "learning_rate": 3.48488455129731e-07, "logits/generated": 2.9807777404785156, "logits/real": 2.5232110023498535, "logps/generated": -735.063232421875, "logps/real": -476.3984375, "loss": 0.0761, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.6256103515625, "rewards/margins": 20.839588165283203, "rewards/real": -8.786020278930664, "step": 3480 }, { "epoch": 1.12, "learning_rate": 3.478933587241133e-07, "logits/generated": 2.6169018745422363, "logits/real": 2.436037302017212, "logps/generated": -699.1199340820312, "logps/real": -494.16888427734375, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/generated": -28.41788673400879, "rewards/margins": 19.675487518310547, "rewards/real": -8.742403984069824, "step": 3490 }, { "epoch": 1.12, "learning_rate": 3.4729826231849555e-07, "logits/generated": 2.7718207836151123, "logits/real": 1.7036941051483154, "logps/generated": -741.5457153320312, "logps/real": -461.86932373046875, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/generated": -29.29446792602539, "rewards/margins": 21.628189086914062, "rewards/real": -7.666277885437012, "step": 3500 }, { "epoch": 1.13, "learning_rate": 3.4670316591287784e-07, "logits/generated": 2.4933695793151855, "logits/real": 1.6785141229629517, "logps/generated": -730.9461669921875, "logps/real": -474.16070556640625, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/generated": -27.382604598999023, "rewards/margins": 19.891149520874023, "rewards/real": -7.491456508636475, "step": 3510 }, { "epoch": 1.13, "learning_rate": 3.461080695072602e-07, "logits/generated": 2.545271158218384, "logits/real": 1.531073808670044, "logps/generated": -621.9008178710938, "logps/real": -486.6148376464844, "loss": 0.0228, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -25.943262100219727, "rewards/margins": 17.01035499572754, "rewards/real": -8.932910919189453, "step": 3520 }, { "epoch": 1.13, "learning_rate": 3.4551297310164243e-07, "logits/generated": 2.2416441440582275, "logits/real": 1.9815393686294556, "logps/generated": -737.7775268554688, "logps/real": -464.59771728515625, "loss": 0.0123, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -31.351669311523438, "rewards/margins": 20.522449493408203, "rewards/real": -10.829219818115234, "step": 3530 }, { "epoch": 1.14, "learning_rate": 3.449178766960247e-07, "logits/generated": 2.2843196392059326, "logits/real": 1.0606849193572998, "logps/generated": -734.2962646484375, "logps/real": -444.4004821777344, "loss": 0.0352, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.335979461669922, "rewards/margins": 23.6134033203125, "rewards/real": -6.722577095031738, "step": 3540 }, { "epoch": 1.14, "learning_rate": 3.44322780290407e-07, "logits/generated": 2.2767176628112793, "logits/real": 1.2818129062652588, "logps/generated": -719.1285400390625, "logps/real": -459.3954162597656, "loss": 0.0735, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -26.325124740600586, "rewards/margins": 20.9857120513916, "rewards/real": -5.33941650390625, "step": 3550 }, { "epoch": 1.14, "learning_rate": 3.4372768388478937e-07, "logits/generated": 2.9061708450317383, "logits/real": 1.6455659866333008, "logps/generated": -665.9501953125, "logps/real": -454.51141357421875, "loss": 0.0227, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.28609848022461, "rewards/margins": 21.570520401000977, "rewards/real": -6.715574741363525, "step": 3560 }, { "epoch": 1.15, "learning_rate": 3.431325874791716e-07, "logits/generated": 2.4950995445251465, "logits/real": 0.7829762697219849, "logps/generated": -630.0986328125, "logps/real": -446.040771484375, "loss": 0.0108, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.073078155517578, "rewards/margins": 20.410871505737305, "rewards/real": -5.662203788757324, "step": 3570 }, { "epoch": 1.15, "learning_rate": 3.425374910735539e-07, "logits/generated": 2.650120973587036, "logits/real": 1.3936200141906738, "logps/generated": -637.1566162109375, "logps/real": -483.755126953125, "loss": 0.0313, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.2448787689209, "rewards/margins": 19.2751407623291, "rewards/real": -6.9697394371032715, "step": 3580 }, { "epoch": 1.15, "learning_rate": 3.4194239466793615e-07, "logits/generated": 2.405644655227661, "logits/real": 1.281725287437439, "logps/generated": -667.3518676757812, "logps/real": -429.366943359375, "loss": 0.0781, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.54378890991211, "rewards/margins": 20.49782371520996, "rewards/real": -6.045966148376465, "step": 3590 }, { "epoch": 1.16, "learning_rate": 3.413472982623185e-07, "logits/generated": 2.283933162689209, "logits/real": 1.1979738473892212, "logps/generated": -635.62451171875, "logps/real": -517.581298828125, "loss": 0.0309, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.221525192260742, "rewards/margins": 16.748271942138672, "rewards/real": -7.473250389099121, "step": 3600 }, { "epoch": 1.16, "learning_rate": 3.407522018567008e-07, "logits/generated": 3.037490129470825, "logits/real": 2.163864850997925, "logps/generated": -691.4986572265625, "logps/real": -458.17913818359375, "loss": 0.0161, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.751453399658203, "rewards/margins": 21.80462646484375, "rewards/real": -7.946825981140137, "step": 3610 }, { "epoch": 1.16, "learning_rate": 3.401571054510831e-07, "logits/generated": 2.6360836029052734, "logits/real": 1.7148447036743164, "logps/generated": -840.6214599609375, "logps/real": -466.7984313964844, "loss": 0.042, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -36.006675720214844, "rewards/margins": 28.180644989013672, "rewards/real": -7.826028347015381, "step": 3620 }, { "epoch": 1.17, "learning_rate": 3.3956200904546533e-07, "logits/generated": 1.8129583597183228, "logits/real": 1.1348989009857178, "logps/generated": -692.1317749023438, "logps/real": -455.8722229003906, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/generated": -26.975698471069336, "rewards/margins": 20.7060604095459, "rewards/real": -6.269636154174805, "step": 3630 }, { "epoch": 1.17, "learning_rate": 3.389669126398476e-07, "logits/generated": 2.48490834236145, "logits/real": 1.4064624309539795, "logps/generated": -760.88525390625, "logps/real": -432.4095764160156, "loss": 0.0146, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.652202606201172, "rewards/margins": 24.75800895690918, "rewards/real": -5.894194602966309, "step": 3640 }, { "epoch": 1.17, "learning_rate": 3.3837181623422997e-07, "logits/generated": 2.7649261951446533, "logits/real": 1.8577854633331299, "logps/generated": -657.9354248046875, "logps/real": -503.26416015625, "loss": 0.0245, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.3824405670166, "rewards/margins": 20.5275936126709, "rewards/real": -7.854846000671387, "step": 3650 }, { "epoch": 1.18, "learning_rate": 3.377767198286122e-07, "logits/generated": 2.5927748680114746, "logits/real": 2.04367995262146, "logps/generated": -676.5906372070312, "logps/real": -435.38238525390625, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/generated": -25.694509506225586, "rewards/margins": 19.603580474853516, "rewards/real": -6.090932846069336, "step": 3660 }, { "epoch": 1.18, "learning_rate": 3.371816234229945e-07, "logits/generated": 2.6572422981262207, "logits/real": 1.521686315536499, "logps/generated": -678.6843872070312, "logps/real": -480.9020080566406, "loss": 0.006, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.5948543548584, "rewards/margins": 20.361568450927734, "rewards/real": -7.233286380767822, "step": 3670 }, { "epoch": 1.18, "learning_rate": 3.365865270173768e-07, "logits/generated": 2.6615359783172607, "logits/real": 1.6930313110351562, "logps/generated": -707.2503662109375, "logps/real": -473.5144958496094, "loss": 0.0153, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.01141929626465, "rewards/margins": 21.097003936767578, "rewards/real": -6.914416313171387, "step": 3680 }, { "epoch": 1.19, "learning_rate": 3.359914306117591e-07, "logits/generated": 0.905605137348175, "logits/real": -0.4722270965576172, "logps/generated": -790.31494140625, "logps/real": -461.4234924316406, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/generated": -34.29324722290039, "rewards/margins": 26.24923324584961, "rewards/real": -8.044008255004883, "step": 3690 }, { "epoch": 1.19, "learning_rate": 3.353963342061414e-07, "logits/generated": 1.0457203388214111, "logits/real": -0.4413973391056061, "logps/generated": -687.7850341796875, "logps/real": -426.17041015625, "loss": 0.096, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.678781509399414, "rewards/margins": 22.148509979248047, "rewards/real": -6.530271053314209, "step": 3700 }, { "epoch": 1.19, "learning_rate": 3.348012378005237e-07, "logits/generated": 1.0453917980194092, "logits/real": -0.23727576434612274, "logps/generated": -678.01904296875, "logps/real": -444.00048828125, "loss": 0.0544, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.240116119384766, "rewards/margins": 20.035165786743164, "rewards/real": -7.204949855804443, "step": 3710 }, { "epoch": 1.2, "learning_rate": 3.3420614139490593e-07, "logits/generated": 1.1247848272323608, "logits/real": -0.5448418855667114, "logps/generated": -736.1654052734375, "logps/real": -449.4205627441406, "loss": 0.039, "rewards/accuracies": 1.0, "rewards/generated": -29.88787269592285, "rewards/margins": 22.57768440246582, "rewards/real": -7.310188293457031, "step": 3720 }, { "epoch": 1.2, "learning_rate": 3.336110449892883e-07, "logits/generated": 1.1480869054794312, "logits/real": -0.37490135431289673, "logps/generated": -660.1180419921875, "logps/real": -485.0994567871094, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/generated": -26.454639434814453, "rewards/margins": 18.33192253112793, "rewards/real": -8.122716903686523, "step": 3730 }, { "epoch": 1.2, "learning_rate": 3.3301594858367057e-07, "logits/generated": 1.343151330947876, "logits/real": -0.2886783480644226, "logps/generated": -684.6146240234375, "logps/real": -429.09539794921875, "loss": 0.0161, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -27.920307159423828, "rewards/margins": 20.828861236572266, "rewards/real": -7.091446876525879, "step": 3740 }, { "epoch": 1.21, "learning_rate": 3.324208521780528e-07, "logits/generated": 1.3708603382110596, "logits/real": -0.03330984339118004, "logps/generated": -612.2896118164062, "logps/real": -400.5157470703125, "loss": 0.0451, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -26.34270668029785, "rewards/margins": 18.69586181640625, "rewards/real": -7.646846771240234, "step": 3750 }, { "epoch": 1.21, "learning_rate": 3.318257557724351e-07, "logits/generated": 1.5275355577468872, "logits/real": -0.4374828338623047, "logps/generated": -768.6717529296875, "logps/real": -511.7127380371094, "loss": 0.0269, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.739360809326172, "rewards/margins": 22.874874114990234, "rewards/real": -7.864485263824463, "step": 3760 }, { "epoch": 1.21, "learning_rate": 3.3123065936681745e-07, "logits/generated": 1.1088958978652954, "logits/real": -0.48227983713150024, "logps/generated": -678.9129638671875, "logps/real": -438.7022399902344, "loss": 0.0478, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.197046279907227, "rewards/margins": 19.823627471923828, "rewards/real": -7.373419284820557, "step": 3770 }, { "epoch": 1.21, "learning_rate": 3.306355629611997e-07, "logits/generated": 1.6406713724136353, "logits/real": 0.10558615624904633, "logps/generated": -816.3553466796875, "logps/real": -483.51177978515625, "loss": 0.0708, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -35.803016662597656, "rewards/margins": 23.361392974853516, "rewards/real": -12.441622734069824, "step": 3780 }, { "epoch": 1.22, "learning_rate": 3.30040466555582e-07, "logits/generated": 1.9653699398040771, "logits/real": 0.05775642395019531, "logps/generated": -691.2618408203125, "logps/real": -487.98681640625, "loss": 0.0656, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.764989852905273, "rewards/margins": 19.655536651611328, "rewards/real": -9.109453201293945, "step": 3790 }, { "epoch": 1.22, "learning_rate": 3.294453701499643e-07, "logits/generated": 2.2136850357055664, "logits/real": 0.766396164894104, "logps/generated": -772.6976318359375, "logps/real": -423.5503845214844, "loss": 0.0513, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.09532928466797, "rewards/margins": 24.41140365600586, "rewards/real": -8.683924674987793, "step": 3800 }, { "epoch": 1.22, "learning_rate": 3.288502737443466e-07, "logits/generated": 1.915806770324707, "logits/real": 0.30771923065185547, "logps/generated": -767.9225463867188, "logps/real": -491.69622802734375, "loss": 0.0891, "rewards/accuracies": 1.0, "rewards/generated": -29.66864013671875, "rewards/margins": 21.831602096557617, "rewards/real": -7.837037086486816, "step": 3810 }, { "epoch": 1.23, "learning_rate": 3.282551773387289e-07, "logits/generated": 1.6662776470184326, "logits/real": 0.2889935374259949, "logps/generated": -786.261962890625, "logps/real": -495.9602966308594, "loss": 0.0194, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.037105560302734, "rewards/margins": 23.815635681152344, "rewards/real": -7.221470832824707, "step": 3820 }, { "epoch": 1.23, "learning_rate": 3.2766008093311117e-07, "logits/generated": 1.9438180923461914, "logits/real": 0.3044320046901703, "logps/generated": -702.0763549804688, "logps/real": -460.8330078125, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/generated": -26.46884536743164, "rewards/margins": 19.958297729492188, "rewards/real": -6.5105485916137695, "step": 3830 }, { "epoch": 1.23, "learning_rate": 3.270649845274934e-07, "logits/generated": 2.305710554122925, "logits/real": 0.5862905383110046, "logps/generated": -776.2945556640625, "logps/real": -470.1326599121094, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/generated": -33.211219787597656, "rewards/margins": 23.26434326171875, "rewards/real": -9.946874618530273, "step": 3840 }, { "epoch": 1.24, "learning_rate": 3.264698881218757e-07, "logits/generated": 2.1864776611328125, "logits/real": 0.6319053769111633, "logps/generated": -824.8487548828125, "logps/real": -488.22369384765625, "loss": 0.0562, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.97633361816406, "rewards/margins": 23.374177932739258, "rewards/real": -9.602158546447754, "step": 3850 }, { "epoch": 1.24, "learning_rate": 3.2587479171625806e-07, "logits/generated": 1.9181573390960693, "logits/real": 0.2898411750793457, "logps/generated": -887.0048828125, "logps/real": -527.6532592773438, "loss": 0.065, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -31.16916847229004, "rewards/margins": 22.435504913330078, "rewards/real": -8.733665466308594, "step": 3860 }, { "epoch": 1.24, "learning_rate": 3.252796953106403e-07, "logits/generated": 2.1994969844818115, "logits/real": 0.6730740666389465, "logps/generated": -679.5873413085938, "logps/real": -499.27423095703125, "loss": 0.0593, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -25.11680030822754, "rewards/margins": 18.643583297729492, "rewards/real": -6.4732160568237305, "step": 3870 }, { "epoch": 1.25, "learning_rate": 3.246845989050226e-07, "logits/generated": 2.196690320968628, "logits/real": 0.9357960820198059, "logps/generated": -767.1376342773438, "logps/real": -498.81170654296875, "loss": 0.0313, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.14312171936035, "rewards/margins": 21.682828903198242, "rewards/real": -8.460294723510742, "step": 3880 }, { "epoch": 1.25, "learning_rate": 3.240895024994049e-07, "logits/generated": 2.2653632164001465, "logits/real": 0.9786055684089661, "logps/generated": -774.4112548828125, "logps/real": -465.4608459472656, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/generated": -31.3429012298584, "rewards/margins": 21.939701080322266, "rewards/real": -9.403202056884766, "step": 3890 }, { "epoch": 1.25, "learning_rate": 3.234944060937872e-07, "logits/generated": 2.245255947113037, "logits/real": 0.7700493335723877, "logps/generated": -674.8272094726562, "logps/real": -487.7240295410156, "loss": 0.0273, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.440723419189453, "rewards/margins": 19.245616912841797, "rewards/real": -7.195105075836182, "step": 3900 }, { "epoch": 1.26, "learning_rate": 3.228993096881695e-07, "logits/generated": 2.5470709800720215, "logits/real": 0.9530758857727051, "logps/generated": -642.1466674804688, "logps/real": -487.7120056152344, "loss": 0.0531, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -26.48048973083496, "rewards/margins": 19.238502502441406, "rewards/real": -7.241986274719238, "step": 3910 }, { "epoch": 1.26, "learning_rate": 3.2230421328255177e-07, "logits/generated": 2.549373149871826, "logits/real": 0.5313698053359985, "logps/generated": -731.5639038085938, "logps/real": -475.5712890625, "loss": 0.0184, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.543292999267578, "rewards/margins": 21.97779083251953, "rewards/real": -6.5655012130737305, "step": 3920 }, { "epoch": 1.26, "learning_rate": 3.21709116876934e-07, "logits/generated": 2.3441226482391357, "logits/real": 1.0925277471542358, "logps/generated": -735.50732421875, "logps/real": -458.16204833984375, "loss": 0.0152, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.029842376708984, "rewards/margins": 24.97720718383789, "rewards/real": -7.052639007568359, "step": 3930 }, { "epoch": 1.27, "learning_rate": 3.2111402047131636e-07, "logits/generated": 2.7295000553131104, "logits/real": 0.9965232014656067, "logps/generated": -786.9588012695312, "logps/real": -483.0789489746094, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/generated": -32.28748321533203, "rewards/margins": 24.025020599365234, "rewards/real": -8.262463569641113, "step": 3940 }, { "epoch": 1.27, "learning_rate": 3.2051892406569866e-07, "logits/generated": 2.8566665649414062, "logits/real": 1.6441774368286133, "logps/generated": -632.7756958007812, "logps/real": -469.6166076660156, "loss": 0.0389, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -28.829965591430664, "rewards/margins": 19.0399112701416, "rewards/real": -9.790054321289062, "step": 3950 }, { "epoch": 1.27, "learning_rate": 3.199238276600809e-07, "logits/generated": 2.7866692543029785, "logits/real": 1.7060445547103882, "logps/generated": -701.2554321289062, "logps/real": -502.695556640625, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/generated": -30.553813934326172, "rewards/margins": 22.033260345458984, "rewards/real": -8.520556449890137, "step": 3960 }, { "epoch": 1.28, "learning_rate": 3.193287312544632e-07, "logits/generated": 2.66530442237854, "logits/real": 1.3314772844314575, "logps/generated": -823.5281372070312, "logps/real": -519.7109375, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/generated": -35.031681060791016, "rewards/margins": 22.982370376586914, "rewards/real": -12.049309730529785, "step": 3970 }, { "epoch": 1.28, "learning_rate": 3.1873363484884554e-07, "logits/generated": 3.160966634750366, "logits/real": 1.6971614360809326, "logps/generated": -795.4423828125, "logps/real": -469.6409606933594, "loss": 0.0084, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.070735931396484, "rewards/margins": 25.117141723632812, "rewards/real": -7.9535980224609375, "step": 3980 }, { "epoch": 1.28, "learning_rate": 3.181385384432278e-07, "logits/generated": 2.323033094406128, "logits/real": 1.3230948448181152, "logps/generated": -802.9261474609375, "logps/real": -462.02783203125, "loss": 0.0258, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.472232818603516, "rewards/margins": 24.273265838623047, "rewards/real": -8.198967933654785, "step": 3990 }, { "epoch": 1.29, "learning_rate": 3.175434420376101e-07, "logits/generated": 2.5093555450439453, "logits/real": 1.285348892211914, "logps/generated": -725.5984497070312, "logps/real": -480.664306640625, "loss": 0.0227, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.8314266204834, "rewards/margins": 23.818405151367188, "rewards/real": -7.013019561767578, "step": 4000 }, { "epoch": 1.29, "learning_rate": 3.1694834563199237e-07, "logits/generated": 2.8705523014068604, "logits/real": 0.6883656978607178, "logps/generated": -802.2123413085938, "logps/real": -461.05059814453125, "loss": 0.0586, "rewards/accuracies": 1.0, "rewards/generated": -34.19792175292969, "rewards/margins": 25.772363662719727, "rewards/real": -8.425557136535645, "step": 4010 }, { "epoch": 1.29, "learning_rate": 3.1635324922637467e-07, "logits/generated": 2.5492050647735596, "logits/real": 1.1038516759872437, "logps/generated": -717.2047119140625, "logps/real": -413.26519775390625, "loss": 0.0585, "rewards/accuracies": 0.949999988079071, "rewards/generated": -30.3170166015625, "rewards/margins": 21.097728729248047, "rewards/real": -9.219284057617188, "step": 4020 }, { "epoch": 1.29, "learning_rate": 3.1575815282075696e-07, "logits/generated": 2.518587589263916, "logits/real": 0.8266888856887817, "logps/generated": -720.0540161132812, "logps/real": -464.48388671875, "loss": 0.0227, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -30.711990356445312, "rewards/margins": 22.5683650970459, "rewards/real": -8.143627166748047, "step": 4030 }, { "epoch": 1.3, "learning_rate": 3.1516305641513926e-07, "logits/generated": 2.6297123432159424, "logits/real": 1.3374924659729004, "logps/generated": -680.5969848632812, "logps/real": -460.84075927734375, "loss": 0.0693, "rewards/accuracies": 1.0, "rewards/generated": -27.0722713470459, "rewards/margins": 20.3653564453125, "rewards/real": -6.706915855407715, "step": 4040 }, { "epoch": 1.3, "learning_rate": 3.145679600095215e-07, "logits/generated": 2.4563651084899902, "logits/real": 1.5648283958435059, "logps/generated": -676.21533203125, "logps/real": -459.96624755859375, "loss": 0.0511, "rewards/accuracies": 1.0, "rewards/generated": -28.7080078125, "rewards/margins": 22.59457015991211, "rewards/real": -6.113435745239258, "step": 4050 }, { "epoch": 1.3, "learning_rate": 3.139728636039038e-07, "logits/generated": 2.4664809703826904, "logits/real": 1.083017349243164, "logps/generated": -626.862548828125, "logps/real": -446.3602600097656, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/generated": -26.330068588256836, "rewards/margins": 19.853473663330078, "rewards/real": -6.476595878601074, "step": 4060 }, { "epoch": 1.31, "learning_rate": 3.1337776719828614e-07, "logits/generated": 2.232530117034912, "logits/real": 1.284562349319458, "logps/generated": -644.2023315429688, "logps/real": -434.1683654785156, "loss": 0.0119, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.42414665222168, "rewards/margins": 17.972837448120117, "rewards/real": -6.4513092041015625, "step": 4070 }, { "epoch": 1.31, "learning_rate": 3.127826707926684e-07, "logits/generated": 2.3483405113220215, "logits/real": 1.6036663055419922, "logps/generated": -731.7938232421875, "logps/real": -506.4893493652344, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/generated": -28.597402572631836, "rewards/margins": 21.374975204467773, "rewards/real": -7.222431182861328, "step": 4080 }, { "epoch": 1.31, "learning_rate": 3.121875743870507e-07, "logits/generated": 2.6478030681610107, "logits/real": 1.7559664249420166, "logps/generated": -582.6901245117188, "logps/real": -416.71966552734375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -26.351787567138672, "rewards/margins": 18.96149253845215, "rewards/real": -7.390294551849365, "step": 4090 }, { "epoch": 1.32, "learning_rate": 3.11592477981433e-07, "logits/generated": 2.5606369972229004, "logits/real": 1.240614652633667, "logps/generated": -671.0670166015625, "logps/real": -481.71075439453125, "loss": 0.0142, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.930618286132812, "rewards/margins": 22.340429306030273, "rewards/real": -5.590188503265381, "step": 4100 }, { "epoch": 1.32, "learning_rate": 3.1099738157581527e-07, "logits/generated": 3.0026493072509766, "logits/real": 1.7493858337402344, "logps/generated": -756.0106201171875, "logps/real": -461.55224609375, "loss": 0.0191, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.6851863861084, "rewards/margins": 24.285850524902344, "rewards/real": -7.399336814880371, "step": 4110 }, { "epoch": 1.32, "learning_rate": 3.1040228517019756e-07, "logits/generated": 2.499925136566162, "logits/real": 1.1109522581100464, "logps/generated": -600.7268676757812, "logps/real": -416.0723571777344, "loss": 0.0273, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -24.501590728759766, "rewards/margins": 18.634164810180664, "rewards/real": -5.867423057556152, "step": 4120 }, { "epoch": 1.33, "learning_rate": 3.0980718876457986e-07, "logits/generated": 2.891514539718628, "logits/real": 1.7420437335968018, "logps/generated": -690.1170654296875, "logps/real": -426.11444091796875, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/generated": -26.5653018951416, "rewards/margins": 22.309375762939453, "rewards/real": -4.255928039550781, "step": 4130 }, { "epoch": 1.33, "learning_rate": 3.092120923589621e-07, "logits/generated": 2.281183958053589, "logits/real": 0.9821410179138184, "logps/generated": -675.3880615234375, "logps/real": -445.2452087402344, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/generated": -27.642953872680664, "rewards/margins": 22.418996810913086, "rewards/real": -5.2239556312561035, "step": 4140 }, { "epoch": 1.33, "learning_rate": 3.0861699595334445e-07, "logits/generated": 2.0309836864471436, "logits/real": 0.9327380061149597, "logps/generated": -728.02490234375, "logps/real": -494.54437255859375, "loss": 0.0105, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.806100845336914, "rewards/margins": 20.957168579101562, "rewards/real": -5.8489298820495605, "step": 4150 }, { "epoch": 1.34, "learning_rate": 3.0802189954772674e-07, "logits/generated": 2.7178268432617188, "logits/real": 1.9559532403945923, "logps/generated": -762.4481201171875, "logps/real": -435.4845275878906, "loss": 0.0291, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.609783172607422, "rewards/margins": 23.3583927154541, "rewards/real": -8.251394271850586, "step": 4160 }, { "epoch": 1.34, "learning_rate": 3.07426803142109e-07, "logits/generated": 2.825317859649658, "logits/real": 1.7495107650756836, "logps/generated": -764.5574951171875, "logps/real": -488.174560546875, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/generated": -32.27056121826172, "rewards/margins": 23.566438674926758, "rewards/real": -8.704119682312012, "step": 4170 }, { "epoch": 1.34, "learning_rate": 3.068317067364913e-07, "logits/generated": 2.877255916595459, "logits/real": 1.2865673303604126, "logps/generated": -692.69091796875, "logps/real": -466.9850158691406, "loss": 0.0117, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -31.39888572692871, "rewards/margins": 24.25893211364746, "rewards/real": -7.139950752258301, "step": 4180 }, { "epoch": 1.35, "learning_rate": 3.0623661033087363e-07, "logits/generated": 2.43916916847229, "logits/real": 1.1945528984069824, "logps/generated": -723.6937255859375, "logps/real": -428.70477294921875, "loss": 0.0417, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -30.390575408935547, "rewards/margins": 23.736957550048828, "rewards/real": -6.653617858886719, "step": 4190 }, { "epoch": 1.35, "learning_rate": 3.0564151392525587e-07, "logits/generated": 2.3293750286102295, "logits/real": 1.4255329370498657, "logps/generated": -732.8556518554688, "logps/real": -460.5939025878906, "loss": 0.0546, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -31.291086196899414, "rewards/margins": 23.055442810058594, "rewards/real": -8.235644340515137, "step": 4200 }, { "epoch": 1.35, "learning_rate": 3.0504641751963816e-07, "logits/generated": 2.428840398788452, "logits/real": 1.4368888139724731, "logps/generated": -716.0555419921875, "logps/real": -502.5569763183594, "loss": 0.0244, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -30.735620498657227, "rewards/margins": 20.167133331298828, "rewards/real": -10.56848430633545, "step": 4210 }, { "epoch": 1.36, "learning_rate": 3.0445132111402046e-07, "logits/generated": 2.5322787761688232, "logits/real": 1.1126108169555664, "logps/generated": -749.0120239257812, "logps/real": -521.3764038085938, "loss": 0.0361, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.96017074584961, "rewards/margins": 22.68358039855957, "rewards/real": -9.276590347290039, "step": 4220 }, { "epoch": 1.36, "learning_rate": 3.0385622470840275e-07, "logits/generated": 2.722740888595581, "logits/real": 1.155497670173645, "logps/generated": -655.9694213867188, "logps/real": -462.1548767089844, "loss": 0.0258, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -27.651046752929688, "rewards/margins": 19.807077407836914, "rewards/real": -7.843969821929932, "step": 4230 }, { "epoch": 1.36, "learning_rate": 3.0326112830278505e-07, "logits/generated": 2.987996816635132, "logits/real": 1.087993860244751, "logps/generated": -744.4529418945312, "logps/real": -493.61590576171875, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/generated": -33.61971664428711, "rewards/margins": 25.357425689697266, "rewards/real": -8.262292861938477, "step": 4240 }, { "epoch": 1.37, "learning_rate": 3.0266603189716734e-07, "logits/generated": 2.667389392852783, "logits/real": 1.3173561096191406, "logps/generated": -704.877685546875, "logps/real": -476.40960693359375, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/generated": -28.580036163330078, "rewards/margins": 21.75140953063965, "rewards/real": -6.828622341156006, "step": 4250 }, { "epoch": 1.37, "learning_rate": 3.020709354915496e-07, "logits/generated": 2.288752555847168, "logits/real": 1.1589146852493286, "logps/generated": -715.4613037109375, "logps/real": -474.13067626953125, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/generated": -32.76097106933594, "rewards/margins": 24.329526901245117, "rewards/real": -8.43144702911377, "step": 4260 }, { "epoch": 1.37, "learning_rate": 3.014758390859319e-07, "logits/generated": 1.9467523097991943, "logits/real": 0.9978979229927063, "logps/generated": -747.3699340820312, "logps/real": -436.234619140625, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/generated": -31.13223648071289, "rewards/margins": 23.94467544555664, "rewards/real": -7.187562465667725, "step": 4270 }, { "epoch": 1.38, "learning_rate": 3.0088074268031423e-07, "logits/generated": 2.7511472702026367, "logits/real": 1.480581521987915, "logps/generated": -717.3095703125, "logps/real": -498.9349060058594, "loss": 0.0504, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -29.9949893951416, "rewards/margins": 22.38837242126465, "rewards/real": -7.6066179275512695, "step": 4280 }, { "epoch": 1.38, "learning_rate": 3.0028564627469647e-07, "logits/generated": 2.8890328407287598, "logits/real": 1.2660605907440186, "logps/generated": -730.168212890625, "logps/real": -478.07708740234375, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/generated": -32.03810119628906, "rewards/margins": 26.617752075195312, "rewards/real": -5.420350074768066, "step": 4290 }, { "epoch": 1.38, "learning_rate": 2.9969054986907876e-07, "logits/generated": 2.608889579772949, "logits/real": 1.2832896709442139, "logps/generated": -743.54931640625, "logps/real": -464.9396057128906, "loss": 0.0289, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.240285873413086, "rewards/margins": 24.698974609375, "rewards/real": -6.541312217712402, "step": 4300 }, { "epoch": 1.38, "learning_rate": 2.9909545346346106e-07, "logits/generated": 2.6535749435424805, "logits/real": 1.146988868713379, "logps/generated": -834.8203125, "logps/real": -488.576416015625, "loss": 0.0383, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.6282844543457, "rewards/margins": 26.080337524414062, "rewards/real": -7.547944068908691, "step": 4310 }, { "epoch": 1.39, "learning_rate": 2.9850035705784335e-07, "logits/generated": 2.5933151245117188, "logits/real": 0.5238062143325806, "logps/generated": -651.6871337890625, "logps/real": -505.53204345703125, "loss": 0.0226, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.81691551208496, "rewards/margins": 23.056732177734375, "rewards/real": -6.760184288024902, "step": 4320 }, { "epoch": 1.39, "learning_rate": 2.9790526065222565e-07, "logits/generated": 2.3179240226745605, "logits/real": 0.7064167857170105, "logps/generated": -779.7217407226562, "logps/real": -474.32965087890625, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/generated": -32.549400329589844, "rewards/margins": 27.475814819335938, "rewards/real": -5.073585510253906, "step": 4330 }, { "epoch": 1.39, "learning_rate": 2.9731016424660794e-07, "logits/generated": 2.2130136489868164, "logits/real": 0.7594603300094604, "logps/generated": -706.07568359375, "logps/real": -459.9960021972656, "loss": 0.0266, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.772945404052734, "rewards/margins": 23.573457717895508, "rewards/real": -8.199487686157227, "step": 4340 }, { "epoch": 1.4, "learning_rate": 2.967150678409902e-07, "logits/generated": 2.3822648525238037, "logits/real": 0.5831276178359985, "logps/generated": -748.8506469726562, "logps/real": -480.2749938964844, "loss": 0.0906, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -30.569591522216797, "rewards/margins": 24.361059188842773, "rewards/real": -6.20853328704834, "step": 4350 }, { "epoch": 1.4, "learning_rate": 2.9611997143537253e-07, "logits/generated": 2.630711793899536, "logits/real": 0.7206360101699829, "logps/generated": -760.3441162109375, "logps/real": -468.82525634765625, "loss": 0.027, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.948570251464844, "rewards/margins": 25.273727416992188, "rewards/real": -7.67484188079834, "step": 4360 }, { "epoch": 1.4, "learning_rate": 2.9552487502975483e-07, "logits/generated": 2.394188165664673, "logits/real": 0.9266592264175415, "logps/generated": -772.2131958007812, "logps/real": -503.6002502441406, "loss": 0.0105, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -31.026798248291016, "rewards/margins": 23.337125778198242, "rewards/real": -7.689669132232666, "step": 4370 }, { "epoch": 1.41, "learning_rate": 2.9492977862413707e-07, "logits/generated": 2.3783416748046875, "logits/real": 0.6899093389511108, "logps/generated": -796.3905029296875, "logps/real": -506.93255615234375, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/generated": -33.03886795043945, "rewards/margins": 24.80643081665039, "rewards/real": -8.232439041137695, "step": 4380 }, { "epoch": 1.41, "learning_rate": 2.9433468221851937e-07, "logits/generated": 2.5532989501953125, "logits/real": 0.8979525566101074, "logps/generated": -775.7893676757812, "logps/real": -472.422119140625, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/generated": -33.862464904785156, "rewards/margins": 25.17178726196289, "rewards/real": -8.690677642822266, "step": 4390 }, { "epoch": 1.41, "learning_rate": 2.937395858129017e-07, "logits/generated": 2.75309419631958, "logits/real": 0.7668476104736328, "logps/generated": -714.7312622070312, "logps/real": -514.927490234375, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/generated": -33.620052337646484, "rewards/margins": 23.170490264892578, "rewards/real": -10.449566841125488, "step": 4400 }, { "epoch": 1.42, "learning_rate": 2.9314448940728396e-07, "logits/generated": 1.9756301641464233, "logits/real": 0.889691174030304, "logps/generated": -727.3688354492188, "logps/real": -453.04888916015625, "loss": 0.0183, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -31.26788902282715, "rewards/margins": 22.88307762145996, "rewards/real": -8.384815216064453, "step": 4410 }, { "epoch": 1.42, "learning_rate": 2.9254939300166625e-07, "logits/generated": 2.107994794845581, "logits/real": 0.2895105481147766, "logps/generated": -715.98095703125, "logps/real": -495.554443359375, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/generated": -32.349239349365234, "rewards/margins": 24.312902450561523, "rewards/real": -8.036337852478027, "step": 4420 }, { "epoch": 1.42, "learning_rate": 2.9195429659604855e-07, "logits/generated": 2.2468323707580566, "logits/real": 0.8285868763923645, "logps/generated": -719.9053344726562, "logps/real": -455.3446350097656, "loss": 0.034, "rewards/accuracies": 1.0, "rewards/generated": -33.243507385253906, "rewards/margins": 24.0690975189209, "rewards/real": -9.174409866333008, "step": 4430 }, { "epoch": 1.43, "learning_rate": 2.913592001904308e-07, "logits/generated": 2.7601046562194824, "logits/real": 1.0650097131729126, "logps/generated": -778.4031372070312, "logps/real": -486.15283203125, "loss": 0.0225, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -36.750770568847656, "rewards/margins": 26.943151473999023, "rewards/real": -9.8076171875, "step": 4440 }, { "epoch": 1.43, "learning_rate": 2.9076410378481313e-07, "logits/generated": 2.6170191764831543, "logits/real": 1.165027379989624, "logps/generated": -844.6610107421875, "logps/real": -501.0423889160156, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -39.05771255493164, "rewards/margins": 28.3835506439209, "rewards/real": -10.674161911010742, "step": 4450 }, { "epoch": 1.43, "learning_rate": 2.9016900737919543e-07, "logits/generated": 2.809844493865967, "logits/real": 0.9889978170394897, "logps/generated": -859.2933349609375, "logps/real": -523.3839111328125, "loss": 0.0168, "rewards/accuracies": 0.987500011920929, "rewards/generated": -39.692840576171875, "rewards/margins": 29.4258975982666, "rewards/real": -10.266946792602539, "step": 4460 }, { "epoch": 1.44, "learning_rate": 2.8957391097357767e-07, "logits/generated": 3.18965482711792, "logits/real": 1.2603296041488647, "logps/generated": -745.4596557617188, "logps/real": -453.44110107421875, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/generated": -38.61964416503906, "rewards/margins": 27.276596069335938, "rewards/real": -11.343046188354492, "step": 4470 }, { "epoch": 1.44, "learning_rate": 2.8897881456795997e-07, "logits/generated": 2.8014941215515137, "logits/real": 1.3085429668426514, "logps/generated": -836.2554931640625, "logps/real": -520.4592895507812, "loss": 0.0165, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -37.28559112548828, "rewards/margins": 26.030391693115234, "rewards/real": -11.255202293395996, "step": 4480 }, { "epoch": 1.44, "learning_rate": 2.883837181623423e-07, "logits/generated": 2.8689937591552734, "logits/real": 1.0933443307876587, "logps/generated": -844.1451416015625, "logps/real": -470.31707763671875, "loss": 0.0635, "rewards/accuracies": 0.949999988079071, "rewards/generated": -38.91167449951172, "rewards/margins": 27.770639419555664, "rewards/real": -11.141031265258789, "step": 4490 }, { "epoch": 1.45, "learning_rate": 2.8778862175672456e-07, "logits/generated": 3.0756936073303223, "logits/real": 0.9317873120307922, "logps/generated": -738.4552001953125, "logps/real": -493.4588317871094, "loss": 0.0256, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.770565032958984, "rewards/margins": 23.64522933959961, "rewards/real": -10.125336647033691, "step": 4500 }, { "epoch": 1.45, "learning_rate": 2.8719352535110685e-07, "logits/generated": 2.6127545833587646, "logits/real": 0.7991515398025513, "logps/generated": -812.0107421875, "logps/real": -529.5050048828125, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/generated": -37.79477310180664, "rewards/margins": 26.503253936767578, "rewards/real": -11.291519165039062, "step": 4510 }, { "epoch": 1.45, "learning_rate": 2.8659842894548915e-07, "logits/generated": 2.7488789558410645, "logits/real": 0.9205607175827026, "logps/generated": -814.0018920898438, "logps/real": -485.72808837890625, "loss": 0.0121, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.6971321105957, "rewards/margins": 26.73847007751465, "rewards/real": -7.958658695220947, "step": 4520 }, { "epoch": 1.46, "learning_rate": 2.8600333253987144e-07, "logits/generated": 2.8380286693573, "logits/real": 1.3292276859283447, "logps/generated": -664.5426025390625, "logps/real": -405.931884765625, "loss": 0.0245, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.408498764038086, "rewards/margins": 21.89618682861328, "rewards/real": -7.512312889099121, "step": 4530 }, { "epoch": 1.46, "learning_rate": 2.8540823613425374e-07, "logits/generated": 2.6762473583221436, "logits/real": 1.285409927368164, "logps/generated": -742.6524658203125, "logps/real": -466.1742248535156, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/generated": -29.877111434936523, "rewards/margins": 22.331222534179688, "rewards/real": -7.545889854431152, "step": 4540 }, { "epoch": 1.46, "learning_rate": 2.8481313972863603e-07, "logits/generated": 2.002077579498291, "logits/real": 1.0287457704544067, "logps/generated": -737.4330444335938, "logps/real": -466.03851318359375, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/generated": -27.91741371154785, "rewards/margins": 21.647811889648438, "rewards/real": -6.269599914550781, "step": 4550 }, { "epoch": 1.47, "learning_rate": 2.8421804332301827e-07, "logits/generated": 3.0227811336517334, "logits/real": 1.248914361000061, "logps/generated": -762.0416259765625, "logps/real": -481.5623474121094, "loss": 0.0326, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.13182830810547, "rewards/margins": 25.93064308166504, "rewards/real": -8.20118522644043, "step": 4560 }, { "epoch": 1.47, "learning_rate": 2.836229469174006e-07, "logits/generated": 3.1882803440093994, "logits/real": 1.7910082340240479, "logps/generated": -722.3285522460938, "logps/real": -453.1412658691406, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/generated": -32.93183135986328, "rewards/margins": 24.40808868408203, "rewards/real": -8.523744583129883, "step": 4570 }, { "epoch": 1.47, "learning_rate": 2.830278505117829e-07, "logits/generated": 3.282738208770752, "logits/real": 2.1348180770874023, "logps/generated": -797.0693359375, "logps/real": -549.5692138671875, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/generated": -32.49406433105469, "rewards/margins": 25.04464340209961, "rewards/real": -7.449420928955078, "step": 4580 }, { "epoch": 1.47, "learning_rate": 2.8243275410616516e-07, "logits/generated": 2.9645931720733643, "logits/real": 1.8900096416473389, "logps/generated": -806.32177734375, "logps/real": -484.203369140625, "loss": 0.0273, "rewards/accuracies": 0.987500011920929, "rewards/generated": -37.03327941894531, "rewards/margins": 28.272974014282227, "rewards/real": -8.760306358337402, "step": 4590 }, { "epoch": 1.48, "learning_rate": 2.8183765770054745e-07, "logits/generated": 2.8000144958496094, "logits/real": 2.1421077251434326, "logps/generated": -714.8748168945312, "logps/real": -507.2569885253906, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/generated": -29.132343292236328, "rewards/margins": 22.321605682373047, "rewards/real": -6.8107404708862305, "step": 4600 }, { "epoch": 1.48, "learning_rate": 2.812425612949298e-07, "logits/generated": 3.213202953338623, "logits/real": 1.9755401611328125, "logps/generated": -810.6072998046875, "logps/real": -491.380126953125, "loss": 0.0385, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.903844833374023, "rewards/margins": 24.917381286621094, "rewards/real": -6.9864606857299805, "step": 4610 }, { "epoch": 1.48, "learning_rate": 2.806474648893121e-07, "logits/generated": 2.736905097961426, "logits/real": 1.2728312015533447, "logps/generated": -759.031494140625, "logps/real": -457.0621032714844, "loss": 0.0143, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.261310577392578, "rewards/margins": 23.33499526977539, "rewards/real": -5.926311016082764, "step": 4620 }, { "epoch": 1.49, "learning_rate": 2.8005236848369434e-07, "logits/generated": 2.6796927452087402, "logits/real": 1.1678800582885742, "logps/generated": -809.9019775390625, "logps/real": -502.7870178222656, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/generated": -31.175128936767578, "rewards/margins": 25.001976013183594, "rewards/real": -6.173148155212402, "step": 4630 }, { "epoch": 1.49, "learning_rate": 2.7945727207807663e-07, "logits/generated": 2.694232702255249, "logits/real": 0.6000593304634094, "logps/generated": -704.302734375, "logps/real": -470.20184326171875, "loss": 0.0094, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.516103744506836, "rewards/margins": 21.654653549194336, "rewards/real": -5.861451625823975, "step": 4640 }, { "epoch": 1.49, "learning_rate": 2.7886217567245887e-07, "logits/generated": 2.1403942108154297, "logits/real": 0.913508415222168, "logps/generated": -715.5081787109375, "logps/real": -417.43115234375, "loss": 0.0304, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -26.56410789489746, "rewards/margins": 20.98434829711914, "rewards/real": -5.579760551452637, "step": 4650 }, { "epoch": 1.5, "learning_rate": 2.782670792668412e-07, "logits/generated": 2.4997222423553467, "logits/real": 0.7167991399765015, "logps/generated": -680.9993896484375, "logps/real": -527.2828369140625, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/generated": -28.727066040039062, "rewards/margins": 21.321027755737305, "rewards/real": -7.406037330627441, "step": 4660 }, { "epoch": 1.5, "learning_rate": 2.776719828612235e-07, "logits/generated": 2.3608334064483643, "logits/real": 0.6739964485168457, "logps/generated": -687.1911010742188, "logps/real": -442.19781494140625, "loss": 0.0225, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -27.181970596313477, "rewards/margins": 21.896047592163086, "rewards/real": -5.285922050476074, "step": 4670 }, { "epoch": 1.5, "learning_rate": 2.770768864556058e-07, "logits/generated": 2.36586594581604, "logits/real": 0.6533133387565613, "logps/generated": -747.43798828125, "logps/real": -524.3408203125, "loss": 0.0117, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.81697654724121, "rewards/margins": 21.71823501586914, "rewards/real": -5.098741054534912, "step": 4680 }, { "epoch": 1.51, "learning_rate": 2.7648179004998805e-07, "logits/generated": 2.2925453186035156, "logits/real": 0.9335816502571106, "logps/generated": -796.139404296875, "logps/real": -460.892333984375, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/generated": -30.27596092224121, "rewards/margins": 23.194541931152344, "rewards/real": -7.081418037414551, "step": 4690 }, { "epoch": 1.51, "learning_rate": 2.758866936443704e-07, "logits/generated": 2.4903645515441895, "logits/real": 0.8307952880859375, "logps/generated": -795.6973266601562, "logps/real": -482.89166259765625, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/generated": -33.75735092163086, "rewards/margins": 27.124073028564453, "rewards/real": -6.633279323577881, "step": 4700 }, { "epoch": 1.51, "learning_rate": 2.752915972387527e-07, "logits/generated": 1.9768826961517334, "logits/real": 0.31128233671188354, "logps/generated": -707.9979248046875, "logps/real": -458.42169189453125, "loss": 0.0237, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -29.013736724853516, "rewards/margins": 23.47129249572754, "rewards/real": -5.542443752288818, "step": 4710 }, { "epoch": 1.52, "learning_rate": 2.7469650083313494e-07, "logits/generated": 2.1541712284088135, "logits/real": 0.33641910552978516, "logps/generated": -791.7969970703125, "logps/real": -534.8948974609375, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/generated": -34.90331268310547, "rewards/margins": 25.88799476623535, "rewards/real": -9.015324592590332, "step": 4720 }, { "epoch": 1.52, "learning_rate": 2.7410140442751723e-07, "logits/generated": 2.5016562938690186, "logits/real": 0.4876073896884918, "logps/generated": -750.0125732421875, "logps/real": -506.6883239746094, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/generated": -30.507471084594727, "rewards/margins": 23.036855697631836, "rewards/real": -7.470613956451416, "step": 4730 }, { "epoch": 1.52, "learning_rate": 2.735063080218996e-07, "logits/generated": 2.2976343631744385, "logits/real": 0.6462267637252808, "logps/generated": -769.0777587890625, "logps/real": -494.9781799316406, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -29.975749969482422, "rewards/margins": 23.599349975585938, "rewards/real": -6.376400947570801, "step": 4740 }, { "epoch": 1.53, "learning_rate": 2.729112116162818e-07, "logits/generated": 2.2208805084228516, "logits/real": 0.6400170922279358, "logps/generated": -735.2027587890625, "logps/real": -472.8662109375, "loss": 0.0188, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.83158302307129, "rewards/margins": 25.616039276123047, "rewards/real": -6.215543270111084, "step": 4750 }, { "epoch": 1.53, "learning_rate": 2.723161152106641e-07, "logits/generated": 2.3882317543029785, "logits/real": 0.7476447820663452, "logps/generated": -765.5809326171875, "logps/real": -498.8245544433594, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/generated": -32.68141555786133, "rewards/margins": 25.028549194335938, "rewards/real": -7.652866363525391, "step": 4760 }, { "epoch": 1.53, "learning_rate": 2.717210188050464e-07, "logits/generated": 2.701981544494629, "logits/real": 0.5061413645744324, "logps/generated": -687.2328491210938, "logps/real": -486.9599609375, "loss": 0.0517, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.980815887451172, "rewards/margins": 24.221647262573242, "rewards/real": -6.759167671203613, "step": 4770 }, { "epoch": 1.54, "learning_rate": 2.711259223994287e-07, "logits/generated": 2.478610038757324, "logits/real": 0.663358211517334, "logps/generated": -823.0439453125, "logps/real": -467.19940185546875, "loss": 0.05, "rewards/accuracies": 0.949999988079071, "rewards/generated": -34.75782012939453, "rewards/margins": 27.008438110351562, "rewards/real": -7.74938440322876, "step": 4780 }, { "epoch": 1.54, "learning_rate": 2.70530825993811e-07, "logits/generated": 2.24548077583313, "logits/real": 0.7349573373794556, "logps/generated": -837.5589599609375, "logps/real": -418.2542419433594, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/generated": -37.881492614746094, "rewards/margins": 31.39179039001465, "rewards/real": -6.489705562591553, "step": 4790 }, { "epoch": 1.54, "learning_rate": 2.699357295881933e-07, "logits/generated": 2.4621572494506836, "logits/real": 0.8740324974060059, "logps/generated": -721.0609130859375, "logps/real": -498.69293212890625, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/generated": -31.06414222717285, "rewards/margins": 25.177392959594727, "rewards/real": -5.88674783706665, "step": 4800 }, { "epoch": 1.55, "learning_rate": 2.6934063318257554e-07, "logits/generated": 2.428663492202759, "logits/real": 0.5707311034202576, "logps/generated": -868.7835693359375, "logps/real": -458.8046875, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/generated": -33.82869338989258, "rewards/margins": 27.98880958557129, "rewards/real": -5.8398847579956055, "step": 4810 }, { "epoch": 1.55, "learning_rate": 2.687455367769579e-07, "logits/generated": 2.295199155807495, "logits/real": 0.5986093878746033, "logps/generated": -738.4827270507812, "logps/real": -466.02154541015625, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/generated": -29.10929298400879, "rewards/margins": 23.056617736816406, "rewards/real": -6.05267858505249, "step": 4820 }, { "epoch": 1.55, "learning_rate": 2.681504403713402e-07, "logits/generated": 2.6833040714263916, "logits/real": 1.1700522899627686, "logps/generated": -787.7691650390625, "logps/real": -476.64044189453125, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/generated": -35.42084503173828, "rewards/margins": 26.63577651977539, "rewards/real": -8.785063743591309, "step": 4830 }, { "epoch": 1.56, "learning_rate": 2.675553439657224e-07, "logits/generated": 2.29309344291687, "logits/real": 1.0085852146148682, "logps/generated": -811.6697387695312, "logps/real": -483.84716796875, "loss": 0.0082, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.68345642089844, "rewards/margins": 25.697622299194336, "rewards/real": -8.985834121704102, "step": 4840 }, { "epoch": 1.56, "learning_rate": 2.669602475601047e-07, "logits/generated": 2.7675158977508545, "logits/real": 1.3838626146316528, "logps/generated": -796.0728149414062, "logps/real": -505.99066162109375, "loss": 0.0087, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.752342224121094, "rewards/margins": 23.620967864990234, "rewards/real": -9.131373405456543, "step": 4850 }, { "epoch": 1.56, "learning_rate": 2.66365151154487e-07, "logits/generated": 2.735084056854248, "logits/real": 1.2561891078948975, "logps/generated": -752.9113159179688, "logps/real": -510.1122131347656, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/generated": -32.71538543701172, "rewards/margins": 23.120983123779297, "rewards/real": -9.594400405883789, "step": 4860 }, { "epoch": 1.56, "learning_rate": 2.657700547488693e-07, "logits/generated": 3.0447628498077393, "logits/real": 1.1842975616455078, "logps/generated": -696.0899658203125, "logps/real": -509.74041748046875, "loss": 0.0476, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.184078216552734, "rewards/margins": 23.026195526123047, "rewards/real": -8.157883644104004, "step": 4870 }, { "epoch": 1.57, "learning_rate": 2.651749583432516e-07, "logits/generated": 2.4543721675872803, "logits/real": 0.9730796813964844, "logps/generated": -721.242919921875, "logps/real": -433.3460998535156, "loss": 0.032, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.981969833374023, "rewards/margins": 24.40141487121582, "rewards/real": -5.580554485321045, "step": 4880 }, { "epoch": 1.57, "learning_rate": 2.645798619376339e-07, "logits/generated": 2.339740037918091, "logits/real": 0.9487134218215942, "logps/generated": -733.1251220703125, "logps/real": -510.47314453125, "loss": 0.0131, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.48800277709961, "rewards/margins": 21.83696937561035, "rewards/real": -5.651036262512207, "step": 4890 }, { "epoch": 1.57, "learning_rate": 2.6398476553201614e-07, "logits/generated": 1.7981455326080322, "logits/real": 0.26637938618659973, "logps/generated": -732.0943603515625, "logps/real": -456.2235412597656, "loss": 0.0236, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.602985382080078, "rewards/margins": 20.017929077148438, "rewards/real": -6.585053443908691, "step": 4900 }, { "epoch": 1.58, "learning_rate": 2.633896691263985e-07, "logits/generated": 2.0388360023498535, "logits/real": 0.49885162711143494, "logps/generated": -707.6419677734375, "logps/real": -507.53289794921875, "loss": 0.0358, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.079853057861328, "rewards/margins": 23.106510162353516, "rewards/real": -5.973341941833496, "step": 4910 }, { "epoch": 1.58, "learning_rate": 2.627945727207808e-07, "logits/generated": 1.6724512577056885, "logits/real": 0.31570136547088623, "logps/generated": -790.2762451171875, "logps/real": -480.8265686035156, "loss": 0.0663, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -29.528568267822266, "rewards/margins": 22.83530044555664, "rewards/real": -6.693266868591309, "step": 4920 }, { "epoch": 1.58, "learning_rate": 2.62199476315163e-07, "logits/generated": 2.416900873184204, "logits/real": 0.7247395515441895, "logps/generated": -747.8357543945312, "logps/real": -512.4075927734375, "loss": 0.0425, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.612747192382812, "rewards/margins": 23.638700485229492, "rewards/real": -6.974043846130371, "step": 4930 }, { "epoch": 1.59, "learning_rate": 2.616043799095453e-07, "logits/generated": 2.188868761062622, "logits/real": 0.5506302714347839, "logps/generated": -730.7992553710938, "logps/real": -502.9918518066406, "loss": 0.0492, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.751028060913086, "rewards/margins": 22.191381454467773, "rewards/real": -7.5596466064453125, "step": 4940 }, { "epoch": 1.59, "learning_rate": 2.6100928350392767e-07, "logits/generated": 2.2117607593536377, "logits/real": 0.8450597524642944, "logps/generated": -836.2321166992188, "logps/real": -429.93731689453125, "loss": 0.0293, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -35.805118560791016, "rewards/margins": 29.46940040588379, "rewards/real": -6.335714340209961, "step": 4950 }, { "epoch": 1.59, "learning_rate": 2.604141870983099e-07, "logits/generated": 2.7299587726593018, "logits/real": 0.9924218058586121, "logps/generated": -711.3277587890625, "logps/real": -478.1184997558594, "loss": 0.0293, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -31.283676147460938, "rewards/margins": 24.492584228515625, "rewards/real": -6.791094779968262, "step": 4960 }, { "epoch": 1.6, "learning_rate": 2.598190906926922e-07, "logits/generated": 3.2217116355895996, "logits/real": 2.3055434226989746, "logps/generated": -738.604248046875, "logps/real": -490.96417236328125, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/generated": -29.20232582092285, "rewards/margins": 23.750289916992188, "rewards/real": -5.4520368576049805, "step": 4970 }, { "epoch": 1.6, "learning_rate": 2.592239942870745e-07, "logits/generated": 3.2054240703582764, "logits/real": 2.3192431926727295, "logps/generated": -765.8434448242188, "logps/real": -509.50543212890625, "loss": 0.041, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.2117919921875, "rewards/margins": 25.140424728393555, "rewards/real": -5.071366310119629, "step": 4980 }, { "epoch": 1.6, "learning_rate": 2.586288978814568e-07, "logits/generated": 3.7722153663635254, "logits/real": 2.441248655319214, "logps/generated": -761.0325927734375, "logps/real": -445.501708984375, "loss": 0.018, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.73529815673828, "rewards/margins": 26.847003936767578, "rewards/real": -5.888295650482178, "step": 4990 }, { "epoch": 1.61, "learning_rate": 2.580338014758391e-07, "logits/generated": 3.419520616531372, "logits/real": 2.6156516075134277, "logps/generated": -647.5647583007812, "logps/real": -521.021484375, "loss": 0.0369, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -27.798999786376953, "rewards/margins": 20.285310745239258, "rewards/real": -7.513688087463379, "step": 5000 }, { "epoch": 1.61, "learning_rate": 2.574387050702214e-07, "logits/generated": 2.5651206970214844, "logits/real": 1.6404587030410767, "logps/generated": -702.4592895507812, "logps/real": -416.69427490234375, "loss": 0.075, "rewards/accuracies": 0.949999988079071, "rewards/generated": -25.681949615478516, "rewards/margins": 19.924440383911133, "rewards/real": -5.757508754730225, "step": 5010 }, { "epoch": 1.61, "learning_rate": 2.568436086646036e-07, "logits/generated": 3.092038154602051, "logits/real": 1.9284976720809937, "logps/generated": -764.1776123046875, "logps/real": -439.559326171875, "loss": 0.0494, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.717243194580078, "rewards/margins": 24.212526321411133, "rewards/real": -6.504717826843262, "step": 5020 }, { "epoch": 1.62, "learning_rate": 2.5624851225898597e-07, "logits/generated": 2.615468740463257, "logits/real": 1.7854080200195312, "logps/generated": -609.8756713867188, "logps/real": -431.2034606933594, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -26.65850830078125, "rewards/margins": 19.94034194946289, "rewards/real": -6.718170166015625, "step": 5030 }, { "epoch": 1.62, "learning_rate": 2.5565341585336827e-07, "logits/generated": 3.222811222076416, "logits/real": 1.9919694662094116, "logps/generated": -738.6182861328125, "logps/real": -439.84039306640625, "loss": 0.0092, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.00419998168945, "rewards/margins": 24.882890701293945, "rewards/real": -7.1213059425354, "step": 5040 }, { "epoch": 1.62, "learning_rate": 2.550583194477505e-07, "logits/generated": 2.728070020675659, "logits/real": 2.180960178375244, "logps/generated": -886.3973388671875, "logps/real": -480.2327575683594, "loss": 0.0355, "rewards/accuracies": 0.987500011920929, "rewards/generated": -37.03089141845703, "rewards/margins": 27.961660385131836, "rewards/real": -9.069226264953613, "step": 5050 }, { "epoch": 1.63, "learning_rate": 2.544632230421328e-07, "logits/generated": 2.982736587524414, "logits/real": 1.91934335231781, "logps/generated": -715.6785888671875, "logps/real": -501.8251037597656, "loss": 0.0079, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.447717666625977, "rewards/margins": 24.469932556152344, "rewards/real": -6.977784633636475, "step": 5060 }, { "epoch": 1.63, "learning_rate": 2.538681266365151e-07, "logits/generated": 2.9678521156311035, "logits/real": 2.094540596008301, "logps/generated": -690.1868896484375, "logps/real": -487.75146484375, "loss": 0.0059, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.726177215576172, "rewards/margins": 22.071422576904297, "rewards/real": -8.654757499694824, "step": 5070 }, { "epoch": 1.63, "learning_rate": 2.532730302308974e-07, "logits/generated": 2.969590187072754, "logits/real": 1.9709651470184326, "logps/generated": -765.6884155273438, "logps/real": -469.21209716796875, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/generated": -34.07955551147461, "rewards/margins": 25.141956329345703, "rewards/real": -8.93759536743164, "step": 5080 }, { "epoch": 1.64, "learning_rate": 2.526779338252797e-07, "logits/generated": 2.799515724182129, "logits/real": 1.9044996500015259, "logps/generated": -733.8375244140625, "logps/real": -464.5816345214844, "loss": 0.0152, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.8238582611084, "rewards/margins": 24.320951461791992, "rewards/real": -6.502911567687988, "step": 5090 }, { "epoch": 1.64, "learning_rate": 2.52082837419662e-07, "logits/generated": 2.7419354915618896, "logits/real": 1.771140456199646, "logps/generated": -710.9172973632812, "logps/real": -465.570556640625, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/generated": -29.166614532470703, "rewards/margins": 24.319820404052734, "rewards/real": -4.846792697906494, "step": 5100 }, { "epoch": 1.64, "learning_rate": 2.514877410140442e-07, "logits/generated": 2.544529438018799, "logits/real": 1.575028657913208, "logps/generated": -771.6636962890625, "logps/real": -441.236083984375, "loss": 0.1338, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -31.430034637451172, "rewards/margins": 25.110210418701172, "rewards/real": -6.319823265075684, "step": 5110 }, { "epoch": 1.65, "learning_rate": 2.5089264460842657e-07, "logits/generated": 2.8638453483581543, "logits/real": 1.777227759361267, "logps/generated": -669.4266357421875, "logps/real": -434.623046875, "loss": 0.0437, "rewards/accuracies": 1.0, "rewards/generated": -26.182674407958984, "rewards/margins": 21.812503814697266, "rewards/real": -4.370171546936035, "step": 5120 }, { "epoch": 1.65, "learning_rate": 2.5029754820280887e-07, "logits/generated": 2.6607799530029297, "logits/real": 1.4905234575271606, "logps/generated": -665.0662841796875, "logps/real": -418.33782958984375, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/generated": -28.730260848999023, "rewards/margins": 22.343725204467773, "rewards/real": -6.386534690856934, "step": 5130 }, { "epoch": 1.65, "learning_rate": 2.497024517971911e-07, "logits/generated": 2.906975746154785, "logits/real": 1.8390878438949585, "logps/generated": -616.5433349609375, "logps/real": -438.78277587890625, "loss": 0.0369, "rewards/accuracies": 0.925000011920929, "rewards/generated": -25.811447143554688, "rewards/margins": 18.983327865600586, "rewards/real": -6.828118324279785, "step": 5140 }, { "epoch": 1.65, "learning_rate": 2.4910735539157346e-07, "logits/generated": 3.056576728820801, "logits/real": 2.0154595375061035, "logps/generated": -702.7066650390625, "logps/real": -488.64892578125, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/generated": -29.37052345275879, "rewards/margins": 22.200847625732422, "rewards/real": -7.169675350189209, "step": 5150 }, { "epoch": 1.66, "learning_rate": 2.485122589859557e-07, "logits/generated": 3.2454257011413574, "logits/real": 1.889814019203186, "logps/generated": -753.4996337890625, "logps/real": -449.96337890625, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/generated": -33.740745544433594, "rewards/margins": 25.83376121520996, "rewards/real": -7.906986236572266, "step": 5160 }, { "epoch": 1.66, "learning_rate": 2.47917162580338e-07, "logits/generated": 2.7945713996887207, "logits/real": 1.7078113555908203, "logps/generated": -759.4659423828125, "logps/real": -435.614990234375, "loss": 0.0454, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.202117919921875, "rewards/margins": 25.43960189819336, "rewards/real": -6.762513160705566, "step": 5170 }, { "epoch": 1.66, "learning_rate": 2.473220661747203e-07, "logits/generated": 2.648451805114746, "logits/real": 1.7412598133087158, "logps/generated": -733.3679809570312, "logps/real": -440.29669189453125, "loss": 0.0375, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.32193374633789, "rewards/margins": 24.29556655883789, "rewards/real": -7.026368618011475, "step": 5180 }, { "epoch": 1.67, "learning_rate": 2.467269697691026e-07, "logits/generated": 3.2652766704559326, "logits/real": 2.115884304046631, "logps/generated": -744.8924560546875, "logps/real": -486.2250061035156, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/generated": -30.532958984375, "rewards/margins": 23.423959732055664, "rewards/real": -7.109001159667969, "step": 5190 }, { "epoch": 1.67, "learning_rate": 2.461318733634849e-07, "logits/generated": 2.822188377380371, "logits/real": 1.3295681476593018, "logps/generated": -759.28466796875, "logps/real": -456.4290466308594, "loss": 0.0539, "rewards/accuracies": 1.0, "rewards/generated": -26.58596420288086, "rewards/margins": 22.599458694458008, "rewards/real": -3.986506223678589, "step": 5200 }, { "epoch": 1.67, "learning_rate": 2.455367769578672e-07, "logits/generated": 2.415480852127075, "logits/real": 1.3910236358642578, "logps/generated": -720.91552734375, "logps/real": -456.38616943359375, "loss": 0.0641, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -25.909374237060547, "rewards/margins": 22.008350372314453, "rewards/real": -3.9010231494903564, "step": 5210 }, { "epoch": 1.68, "learning_rate": 2.4494168055224947e-07, "logits/generated": 3.2592720985412598, "logits/real": 2.091784715652466, "logps/generated": -747.2438354492188, "logps/real": -442.516357421875, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/generated": -32.031429290771484, "rewards/margins": 26.13106346130371, "rewards/real": -5.900364875793457, "step": 5220 }, { "epoch": 1.68, "learning_rate": 2.4434658414663176e-07, "logits/generated": 2.9935145378112793, "logits/real": 1.467901587486267, "logps/generated": -807.4828491210938, "logps/real": -393.7422180175781, "loss": 0.0402, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.532814025878906, "rewards/margins": 28.70172691345215, "rewards/real": -5.831088066101074, "step": 5230 }, { "epoch": 1.68, "learning_rate": 2.4375148774101406e-07, "logits/generated": 2.823208808898926, "logits/real": 1.510538101196289, "logps/generated": -717.1495361328125, "logps/real": -426.45477294921875, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/generated": -31.887248992919922, "rewards/margins": 27.405620574951172, "rewards/real": -4.481626033782959, "step": 5240 }, { "epoch": 1.69, "learning_rate": 2.431563913353963e-07, "logits/generated": 2.8816030025482178, "logits/real": 1.8721132278442383, "logps/generated": -698.6284790039062, "logps/real": -455.8505859375, "loss": 0.0329, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.584320068359375, "rewards/margins": 22.162700653076172, "rewards/real": -5.4216203689575195, "step": 5250 }, { "epoch": 1.69, "learning_rate": 2.425612949297786e-07, "logits/generated": 2.842949628829956, "logits/real": 2.150618553161621, "logps/generated": -752.1483154296875, "logps/real": -413.72418212890625, "loss": 0.0349, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -32.9172477722168, "rewards/margins": 27.57682228088379, "rewards/real": -5.340425491333008, "step": 5260 }, { "epoch": 1.69, "learning_rate": 2.419661985241609e-07, "logits/generated": 2.86783504486084, "logits/real": 1.9957005977630615, "logps/generated": -684.0862426757812, "logps/real": -423.90655517578125, "loss": 0.0171, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.807300567626953, "rewards/margins": 22.57040786743164, "rewards/real": -6.236894607543945, "step": 5270 }, { "epoch": 1.7, "learning_rate": 2.413711021185432e-07, "logits/generated": 2.691141128540039, "logits/real": 2.0136866569519043, "logps/generated": -731.213623046875, "logps/real": -425.91473388671875, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/generated": -30.016021728515625, "rewards/margins": 24.95660400390625, "rewards/real": -5.059416770935059, "step": 5280 }, { "epoch": 1.7, "learning_rate": 2.407760057129255e-07, "logits/generated": 3.5068390369415283, "logits/real": 2.3343660831451416, "logps/generated": -692.9608154296875, "logps/real": -473.14044189453125, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/generated": -27.307918548583984, "rewards/margins": 20.745437622070312, "rewards/real": -6.562477111816406, "step": 5290 }, { "epoch": 1.7, "learning_rate": 2.401809093073078e-07, "logits/generated": 2.75744891166687, "logits/real": 1.7323909997940063, "logps/generated": -671.6505737304688, "logps/real": -431.427978515625, "loss": 0.0416, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -26.189565658569336, "rewards/margins": 19.617929458618164, "rewards/real": -6.5716376304626465, "step": 5300 }, { "epoch": 1.71, "learning_rate": 2.3958581290169007e-07, "logits/generated": 2.7290797233581543, "logits/real": 1.5933500528335571, "logps/generated": -808.5557861328125, "logps/real": -462.436279296875, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/generated": -37.44112014770508, "rewards/margins": 28.445056915283203, "rewards/real": -8.99606704711914, "step": 5310 }, { "epoch": 1.71, "learning_rate": 2.3899071649607236e-07, "logits/generated": 3.152801275253296, "logits/real": 1.7887890338897705, "logps/generated": -777.1715698242188, "logps/real": -543.6041259765625, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/generated": -33.41661071777344, "rewards/margins": 21.798736572265625, "rewards/real": -11.617876052856445, "step": 5320 }, { "epoch": 1.71, "learning_rate": 2.3839562009045463e-07, "logits/generated": 2.955986976623535, "logits/real": 1.5648243427276611, "logps/generated": -754.6717529296875, "logps/real": -433.7395935058594, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/generated": -34.17113494873047, "rewards/margins": 26.666982650756836, "rewards/real": -7.504152774810791, "step": 5330 }, { "epoch": 1.72, "learning_rate": 2.3780052368483693e-07, "logits/generated": 2.6767773628234863, "logits/real": 1.4906699657440186, "logps/generated": -706.8065185546875, "logps/real": -463.88543701171875, "loss": 0.013, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.29275131225586, "rewards/margins": 23.12708282470703, "rewards/real": -7.165673732757568, "step": 5340 }, { "epoch": 1.72, "learning_rate": 2.3720542727921922e-07, "logits/generated": 2.910680055618286, "logits/real": 1.3556164503097534, "logps/generated": -666.5962524414062, "logps/real": -470.55291748046875, "loss": 0.0233, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -27.45033836364746, "rewards/margins": 20.965059280395508, "rewards/real": -6.485279083251953, "step": 5350 }, { "epoch": 1.72, "learning_rate": 2.3661033087360152e-07, "logits/generated": 2.689680576324463, "logits/real": 1.0981296300888062, "logps/generated": -701.8787841796875, "logps/real": -444.9510192871094, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/generated": -30.670818328857422, "rewards/margins": 23.23502540588379, "rewards/real": -7.435798645019531, "step": 5360 }, { "epoch": 1.73, "learning_rate": 2.3601523446798379e-07, "logits/generated": 3.0294888019561768, "logits/real": 1.7291358709335327, "logps/generated": -754.689208984375, "logps/real": -417.75494384765625, "loss": 0.0511, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.69175338745117, "rewards/margins": 26.0546932220459, "rewards/real": -6.637058258056641, "step": 5370 }, { "epoch": 1.73, "learning_rate": 2.354201380623661e-07, "logits/generated": 2.681684732437134, "logits/real": 1.2749654054641724, "logps/generated": -669.1463623046875, "logps/real": -430.14825439453125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -28.479578018188477, "rewards/margins": 22.748775482177734, "rewards/real": -5.7308030128479, "step": 5380 }, { "epoch": 1.73, "learning_rate": 2.3482504165674838e-07, "logits/generated": 2.4223625659942627, "logits/real": 1.2798221111297607, "logps/generated": -663.8045043945312, "logps/real": -414.91455078125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -28.343509674072266, "rewards/margins": 23.536113739013672, "rewards/real": -4.807394981384277, "step": 5390 }, { "epoch": 1.74, "learning_rate": 2.342299452511307e-07, "logits/generated": 2.959122657775879, "logits/real": 1.4598335027694702, "logps/generated": -707.958251953125, "logps/real": -429.2984924316406, "loss": 0.012, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.75325584411621, "rewards/margins": 22.29989242553711, "rewards/real": -5.453360557556152, "step": 5400 }, { "epoch": 1.74, "learning_rate": 2.3363484884551296e-07, "logits/generated": 2.5947265625, "logits/real": 1.4615029096603394, "logps/generated": -701.7139282226562, "logps/real": -475.82562255859375, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/generated": -29.210739135742188, "rewards/margins": 22.190637588500977, "rewards/real": -7.020098686218262, "step": 5410 }, { "epoch": 1.74, "learning_rate": 2.3303975243989526e-07, "logits/generated": 2.9765827655792236, "logits/real": 1.2555286884307861, "logps/generated": -713.07861328125, "logps/real": -516.0474853515625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/generated": -32.003273010253906, "rewards/margins": 24.047054290771484, "rewards/real": -7.956216335296631, "step": 5420 }, { "epoch": 1.74, "learning_rate": 2.3244465603427755e-07, "logits/generated": 2.534071445465088, "logits/real": 1.1811937093734741, "logps/generated": -750.6032104492188, "logps/real": -518.8228759765625, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/generated": -30.289377212524414, "rewards/margins": 24.1800479888916, "rewards/real": -6.1093316078186035, "step": 5430 }, { "epoch": 1.75, "learning_rate": 2.3184955962865982e-07, "logits/generated": 2.654798984527588, "logits/real": 1.203155755996704, "logps/generated": -715.1002197265625, "logps/real": -435.7122497558594, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/generated": -31.899333953857422, "rewards/margins": 24.78330421447754, "rewards/real": -7.116034030914307, "step": 5440 }, { "epoch": 1.75, "learning_rate": 2.3125446322304212e-07, "logits/generated": 2.660198926925659, "logits/real": 1.1293308734893799, "logps/generated": -752.1211547851562, "logps/real": -406.1238708496094, "loss": 0.0221, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.58022117614746, "rewards/margins": 22.895931243896484, "rewards/real": -6.684289455413818, "step": 5450 }, { "epoch": 1.75, "learning_rate": 2.306593668174244e-07, "logits/generated": 2.7071640491485596, "logits/real": 1.3755825757980347, "logps/generated": -775.673095703125, "logps/real": -486.27691650390625, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/generated": -30.75917625427246, "rewards/margins": 24.801382064819336, "rewards/real": -5.957797050476074, "step": 5460 }, { "epoch": 1.76, "learning_rate": 2.300642704118067e-07, "logits/generated": 2.5014870166778564, "logits/real": 0.8498876690864563, "logps/generated": -708.8710327148438, "logps/real": -464.1234436035156, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -28.217737197875977, "rewards/margins": 22.49802589416504, "rewards/real": -5.719711780548096, "step": 5470 }, { "epoch": 1.76, "learning_rate": 2.2946917400618898e-07, "logits/generated": 2.605872631072998, "logits/real": 1.2937989234924316, "logps/generated": -811.8409423828125, "logps/real": -504.169189453125, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/generated": -32.49912643432617, "rewards/margins": 25.55198860168457, "rewards/real": -6.94713830947876, "step": 5480 }, { "epoch": 1.76, "learning_rate": 2.288740776005713e-07, "logits/generated": 2.917212963104248, "logits/real": 1.3528918027877808, "logps/generated": -768.0127563476562, "logps/real": -479.8633728027344, "loss": 0.0403, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -30.010440826416016, "rewards/margins": 22.82418441772461, "rewards/real": -7.186254024505615, "step": 5490 }, { "epoch": 1.77, "learning_rate": 2.2827898119495357e-07, "logits/generated": 2.696010112762451, "logits/real": 1.1761934757232666, "logps/generated": -684.8172607421875, "logps/real": -457.9454040527344, "loss": 0.0329, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.704761505126953, "rewards/margins": 20.90123748779297, "rewards/real": -7.803526878356934, "step": 5500 }, { "epoch": 1.77, "learning_rate": 2.2768388478933586e-07, "logits/generated": 2.552457571029663, "logits/real": 1.241220235824585, "logps/generated": -711.3782348632812, "logps/real": -409.0838928222656, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/generated": -29.80320167541504, "rewards/margins": 23.669525146484375, "rewards/real": -6.133675575256348, "step": 5510 }, { "epoch": 1.77, "learning_rate": 2.2708878838371816e-07, "logits/generated": 2.6401093006134033, "logits/real": 1.3719732761383057, "logps/generated": -710.6211547851562, "logps/real": -504.38800048828125, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/generated": -28.926345825195312, "rewards/margins": 21.45614242553711, "rewards/real": -7.4701995849609375, "step": 5520 }, { "epoch": 1.78, "learning_rate": 2.2649369197810045e-07, "logits/generated": 2.950218915939331, "logits/real": 1.6391916275024414, "logps/generated": -773.2508544921875, "logps/real": -461.0390625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/generated": -34.48397445678711, "rewards/margins": 26.925251007080078, "rewards/real": -7.558724880218506, "step": 5530 }, { "epoch": 1.78, "learning_rate": 2.2589859557248272e-07, "logits/generated": 3.4347710609436035, "logits/real": 2.0075528621673584, "logps/generated": -756.8824462890625, "logps/real": -467.9287109375, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/generated": -36.634769439697266, "rewards/margins": 28.416446685791016, "rewards/real": -8.218317031860352, "step": 5540 }, { "epoch": 1.78, "learning_rate": 2.2530349916686504e-07, "logits/generated": 2.9656918048858643, "logits/real": 2.171490430831909, "logps/generated": -665.1826171875, "logps/real": -415.07647705078125, "loss": 0.0389, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -29.8585147857666, "rewards/margins": 22.468830108642578, "rewards/real": -7.389682769775391, "step": 5550 }, { "epoch": 1.79, "learning_rate": 2.247084027612473e-07, "logits/generated": 2.446951150894165, "logits/real": 1.2317943572998047, "logps/generated": -799.5875854492188, "logps/real": -458.3248596191406, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/generated": -30.641714096069336, "rewards/margins": 25.08247947692871, "rewards/real": -5.559234619140625, "step": 5560 }, { "epoch": 1.79, "learning_rate": 2.241133063556296e-07, "logits/generated": 2.8455803394317627, "logits/real": 1.3593130111694336, "logps/generated": -698.3577270507812, "logps/real": -467.02545166015625, "loss": 0.0393, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -30.29443359375, "rewards/margins": 22.628559112548828, "rewards/real": -7.665876865386963, "step": 5570 }, { "epoch": 1.79, "learning_rate": 2.235182099500119e-07, "logits/generated": 3.1402835845947266, "logits/real": 2.1559672355651855, "logps/generated": -653.4713745117188, "logps/real": -461.2105407714844, "loss": 0.0597, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -26.697525024414062, "rewards/margins": 20.618328094482422, "rewards/real": -6.079193592071533, "step": 5580 }, { "epoch": 1.8, "learning_rate": 2.229231135443942e-07, "logits/generated": 2.89564847946167, "logits/real": 2.00141978263855, "logps/generated": -843.45361328125, "logps/real": -508.66546630859375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -31.43828773498535, "rewards/margins": 24.105422973632812, "rewards/real": -7.332862854003906, "step": 5590 }, { "epoch": 1.8, "learning_rate": 2.2232801713877646e-07, "logits/generated": 3.3040759563446045, "logits/real": 2.143773317337036, "logps/generated": -670.0435791015625, "logps/real": -433.7417907714844, "loss": 0.0176, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.447317123413086, "rewards/margins": 21.794464111328125, "rewards/real": -6.652854919433594, "step": 5600 }, { "epoch": 1.8, "learning_rate": 2.2173292073315878e-07, "logits/generated": 3.2964019775390625, "logits/real": 2.1834044456481934, "logps/generated": -655.6387939453125, "logps/real": -462.686767578125, "loss": 0.0299, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -28.412311553955078, "rewards/margins": 20.467451095581055, "rewards/real": -7.944859504699707, "step": 5610 }, { "epoch": 1.81, "learning_rate": 2.2113782432754105e-07, "logits/generated": 3.2280681133270264, "logits/real": 1.460381269454956, "logps/generated": -674.0933837890625, "logps/real": -468.34991455078125, "loss": 0.0268, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.98651695251465, "rewards/margins": 19.617218017578125, "rewards/real": -6.36929988861084, "step": 5620 }, { "epoch": 1.81, "learning_rate": 2.2054272792192335e-07, "logits/generated": 3.130091667175293, "logits/real": 1.7368800640106201, "logps/generated": -716.0877685546875, "logps/real": -512.7584228515625, "loss": 0.0067, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.322362899780273, "rewards/margins": 23.450937271118164, "rewards/real": -7.8714280128479, "step": 5630 }, { "epoch": 1.81, "learning_rate": 2.1994763151630564e-07, "logits/generated": 3.469697952270508, "logits/real": 1.7277415990829468, "logps/generated": -756.61328125, "logps/real": -523.1085815429688, "loss": 0.0243, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.16545486450195, "rewards/margins": 24.577342987060547, "rewards/real": -7.588109016418457, "step": 5640 }, { "epoch": 1.82, "learning_rate": 2.193525351106879e-07, "logits/generated": 3.619964599609375, "logits/real": 1.6505966186523438, "logps/generated": -707.2024536132812, "logps/real": -481.4723205566406, "loss": 0.0163, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.787860870361328, "rewards/margins": 23.255229949951172, "rewards/real": -6.532633304595947, "step": 5650 }, { "epoch": 1.82, "learning_rate": 2.187574387050702e-07, "logits/generated": 3.6552112102508545, "logits/real": 1.7688257694244385, "logps/generated": -730.9967651367188, "logps/real": -504.402099609375, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/generated": -30.167022705078125, "rewards/margins": 23.241119384765625, "rewards/real": -6.925901889801025, "step": 5660 }, { "epoch": 1.82, "learning_rate": 2.181623422994525e-07, "logits/generated": 3.0656933784484863, "logits/real": 1.9707841873168945, "logps/generated": -758.146484375, "logps/real": -482.95294189453125, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/generated": -32.09127426147461, "rewards/margins": 24.831462860107422, "rewards/real": -7.259812355041504, "step": 5670 }, { "epoch": 1.83, "learning_rate": 2.175672458938348e-07, "logits/generated": 3.4794890880584717, "logits/real": 2.2101058959960938, "logps/generated": -684.7684326171875, "logps/real": -463.972900390625, "loss": 0.0138, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.933116912841797, "rewards/margins": 20.60323715209961, "rewards/real": -7.3298797607421875, "step": 5680 }, { "epoch": 1.83, "learning_rate": 2.1697214948821706e-07, "logits/generated": 3.0091590881347656, "logits/real": 1.5238651037216187, "logps/generated": -662.09765625, "logps/real": -435.771728515625, "loss": 0.0814, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -25.887866973876953, "rewards/margins": 20.965564727783203, "rewards/real": -4.922301292419434, "step": 5690 }, { "epoch": 1.83, "learning_rate": 2.1637705308259938e-07, "logits/generated": 2.797576665878296, "logits/real": 1.4991616010665894, "logps/generated": -715.3610229492188, "logps/real": -463.04681396484375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/generated": -26.817230224609375, "rewards/margins": 21.125072479248047, "rewards/real": -5.6921539306640625, "step": 5700 }, { "epoch": 1.83, "learning_rate": 2.1578195667698165e-07, "logits/generated": 3.0820202827453613, "logits/real": 1.8699191808700562, "logps/generated": -718.1972045898438, "logps/real": -475.1966247558594, "loss": 0.0115, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.86795425415039, "rewards/margins": 21.317264556884766, "rewards/real": -5.550690650939941, "step": 5710 }, { "epoch": 1.84, "learning_rate": 2.1518686027136395e-07, "logits/generated": 3.038087844848633, "logits/real": 1.2628568410873413, "logps/generated": -685.7374877929688, "logps/real": -504.09368896484375, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/generated": -25.509607315063477, "rewards/margins": 21.282466888427734, "rewards/real": -4.227142810821533, "step": 5720 }, { "epoch": 1.84, "learning_rate": 2.1459176386574624e-07, "logits/generated": 2.605422019958496, "logits/real": 1.457798957824707, "logps/generated": -721.0975341796875, "logps/real": -418.3540954589844, "loss": 0.0106, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -28.388507843017578, "rewards/margins": 21.493106842041016, "rewards/real": -6.895401954650879, "step": 5730 }, { "epoch": 1.84, "learning_rate": 2.1399666746012854e-07, "logits/generated": 2.997837781906128, "logits/real": 0.839316189289093, "logps/generated": -679.2623291015625, "logps/real": -481.00592041015625, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/generated": -28.459264755249023, "rewards/margins": 22.15597915649414, "rewards/real": -6.303285121917725, "step": 5740 }, { "epoch": 1.85, "learning_rate": 2.134015710545108e-07, "logits/generated": 2.599454641342163, "logits/real": 1.0574313402175903, "logps/generated": -663.8294067382812, "logps/real": -447.6298828125, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/generated": -28.11807632446289, "rewards/margins": 23.381507873535156, "rewards/real": -4.736568927764893, "step": 5750 }, { "epoch": 1.85, "learning_rate": 2.1280647464889313e-07, "logits/generated": 2.5867490768432617, "logits/real": 0.908903956413269, "logps/generated": -718.8128662109375, "logps/real": -430.189453125, "loss": 0.0078, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -28.684032440185547, "rewards/margins": 22.639102935791016, "rewards/real": -6.044931888580322, "step": 5760 }, { "epoch": 1.85, "learning_rate": 2.122113782432754e-07, "logits/generated": 2.440488338470459, "logits/real": 1.1901414394378662, "logps/generated": -811.150634765625, "logps/real": -477.43536376953125, "loss": 0.0133, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.804540634155273, "rewards/margins": 24.78463363647461, "rewards/real": -7.019909858703613, "step": 5770 }, { "epoch": 1.86, "learning_rate": 2.116162818376577e-07, "logits/generated": 2.7003817558288574, "logits/real": 1.5489081144332886, "logps/generated": -707.3662109375, "logps/real": -461.5133361816406, "loss": 0.0059, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.568317413330078, "rewards/margins": 22.07459831237793, "rewards/real": -6.493720054626465, "step": 5780 }, { "epoch": 1.86, "learning_rate": 2.1102118543203998e-07, "logits/generated": 3.0403170585632324, "logits/real": 1.5336769819259644, "logps/generated": -765.9243774414062, "logps/real": -516.1292724609375, "loss": 0.0318, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.90403366088867, "rewards/margins": 25.290592193603516, "rewards/real": -7.613440036773682, "step": 5790 }, { "epoch": 1.86, "learning_rate": 2.1042608902642228e-07, "logits/generated": 3.1250314712524414, "logits/real": 1.6278241872787476, "logps/generated": -813.4271240234375, "logps/real": -527.4683837890625, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/generated": -34.745723724365234, "rewards/margins": 25.233367919921875, "rewards/real": -9.512349128723145, "step": 5800 }, { "epoch": 1.87, "learning_rate": 2.0983099262080455e-07, "logits/generated": 3.3532841205596924, "logits/real": 1.9759838581085205, "logps/generated": -835.74951171875, "logps/real": -427.34539794921875, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/generated": -34.730987548828125, "rewards/margins": 29.040348052978516, "rewards/real": -5.69063663482666, "step": 5810 }, { "epoch": 1.87, "learning_rate": 2.0923589621518687e-07, "logits/generated": 2.8812496662139893, "logits/real": 1.574552297592163, "logps/generated": -754.96826171875, "logps/real": -460.3710021972656, "loss": 0.0118, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.462261199951172, "rewards/margins": 25.757083892822266, "rewards/real": -5.705177307128906, "step": 5820 }, { "epoch": 1.87, "learning_rate": 2.0864079980956914e-07, "logits/generated": 3.600605010986328, "logits/real": 1.8222070932388306, "logps/generated": -764.5396118164062, "logps/real": -451.06964111328125, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/generated": -34.749000549316406, "rewards/margins": 27.31717300415039, "rewards/real": -7.431827545166016, "step": 5830 }, { "epoch": 1.88, "learning_rate": 2.080457034039514e-07, "logits/generated": 3.7013142108917236, "logits/real": 2.087290048599243, "logps/generated": -850.8868408203125, "logps/real": -460.0469665527344, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -36.133060455322266, "rewards/margins": 27.512826919555664, "rewards/real": -8.620233535766602, "step": 5840 }, { "epoch": 1.88, "learning_rate": 2.0745060699833373e-07, "logits/generated": 3.2362942695617676, "logits/real": 2.055744171142578, "logps/generated": -796.7747802734375, "logps/real": -514.2713012695312, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/generated": -35.28668975830078, "rewards/margins": 26.677703857421875, "rewards/real": -8.608981132507324, "step": 5850 }, { "epoch": 1.88, "learning_rate": 2.06855510592716e-07, "logits/generated": 3.418581485748291, "logits/real": 2.2879433631896973, "logps/generated": -815.4217529296875, "logps/real": -480.712890625, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/generated": -33.09884262084961, "rewards/margins": 24.691471099853516, "rewards/real": -8.407373428344727, "step": 5860 }, { "epoch": 1.89, "learning_rate": 2.062604141870983e-07, "logits/generated": 2.988093614578247, "logits/real": 1.783630609512329, "logps/generated": -674.3501586914062, "logps/real": -406.0289001464844, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/generated": -26.45358657836914, "rewards/margins": 20.868459701538086, "rewards/real": -5.585123538970947, "step": 5870 }, { "epoch": 1.89, "learning_rate": 2.0566531778148059e-07, "logits/generated": 3.688793659210205, "logits/real": 2.070399284362793, "logps/generated": -743.6490478515625, "logps/real": -471.90716552734375, "loss": 0.0084, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.91237258911133, "rewards/margins": 25.45476722717285, "rewards/real": -8.457605361938477, "step": 5880 }, { "epoch": 1.89, "learning_rate": 2.0507022137586288e-07, "logits/generated": 3.4528822898864746, "logits/real": 1.8902027606964111, "logps/generated": -677.3098754882812, "logps/real": -485.5440979003906, "loss": 0.0084, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.425466537475586, "rewards/margins": 20.571178436279297, "rewards/real": -7.854287624359131, "step": 5890 }, { "epoch": 1.9, "learning_rate": 2.0447512497024515e-07, "logits/generated": 2.7801759243011475, "logits/real": 1.0644177198410034, "logps/generated": -671.3907470703125, "logps/real": -428.8927307128906, "loss": 0.0206, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.90665054321289, "rewards/margins": 20.93669891357422, "rewards/real": -5.969952583312988, "step": 5900 }, { "epoch": 1.9, "learning_rate": 2.0388002856462747e-07, "logits/generated": 2.8966877460479736, "logits/real": 1.3920232057571411, "logps/generated": -651.2731323242188, "logps/real": -451.3460998535156, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/generated": -29.057958602905273, "rewards/margins": 22.836750030517578, "rewards/real": -6.221212387084961, "step": 5910 }, { "epoch": 1.9, "learning_rate": 2.0328493215900974e-07, "logits/generated": 2.976752996444702, "logits/real": 1.1868098974227905, "logps/generated": -748.0014038085938, "logps/real": -447.36138916015625, "loss": 0.0421, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.844507217407227, "rewards/margins": 24.713308334350586, "rewards/real": -7.131197929382324, "step": 5920 }, { "epoch": 1.91, "learning_rate": 2.0268983575339206e-07, "logits/generated": 3.282592296600342, "logits/real": 1.4352185726165771, "logps/generated": -726.3150634765625, "logps/real": -516.3076782226562, "loss": 0.0254, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.921512603759766, "rewards/margins": 23.147998809814453, "rewards/real": -8.77351188659668, "step": 5930 }, { "epoch": 1.91, "learning_rate": 2.0209473934777433e-07, "logits/generated": 2.480395555496216, "logits/real": 1.0593836307525635, "logps/generated": -723.8402709960938, "logps/real": -444.6786193847656, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/generated": -31.583181381225586, "rewards/margins": 24.095699310302734, "rewards/real": -7.48748254776001, "step": 5940 }, { "epoch": 1.91, "learning_rate": 2.0149964294215662e-07, "logits/generated": 2.9752345085144043, "logits/real": 1.1397522687911987, "logps/generated": -776.2227172851562, "logps/real": -494.5772399902344, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/generated": -32.56951904296875, "rewards/margins": 26.18454933166504, "rewards/real": -6.384967803955078, "step": 5950 }, { "epoch": 1.92, "learning_rate": 2.0090454653653892e-07, "logits/generated": 3.008641242980957, "logits/real": 1.4339027404785156, "logps/generated": -798.4984741210938, "logps/real": -455.3702697753906, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/generated": -34.42979431152344, "rewards/margins": 26.591014862060547, "rewards/real": -7.838778495788574, "step": 5960 }, { "epoch": 1.92, "learning_rate": 2.003094501309212e-07, "logits/generated": 3.171658515930176, "logits/real": 1.786611795425415, "logps/generated": -741.7694702148438, "logps/real": -503.50115966796875, "loss": 0.0163, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.65754318237305, "rewards/margins": 22.39518165588379, "rewards/real": -12.262359619140625, "step": 5970 }, { "epoch": 1.92, "learning_rate": 1.9971435372530348e-07, "logits/generated": 3.4006717205047607, "logits/real": 1.937177062034607, "logps/generated": -797.3352661132812, "logps/real": -497.25714111328125, "loss": 0.0555, "rewards/accuracies": 0.987500011920929, "rewards/generated": -37.03146743774414, "rewards/margins": 25.487783432006836, "rewards/real": -11.543684959411621, "step": 5980 }, { "epoch": 1.92, "learning_rate": 1.991192573196858e-07, "logits/generated": 2.996246576309204, "logits/real": 1.628159523010254, "logps/generated": -816.3474731445312, "logps/real": -533.3189086914062, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/generated": -36.799381256103516, "rewards/margins": 25.136749267578125, "rewards/real": -11.662628173828125, "step": 5990 }, { "epoch": 1.93, "learning_rate": 1.9852416091406807e-07, "logits/generated": 3.26483154296875, "logits/real": 2.0803146362304688, "logps/generated": -821.1701049804688, "logps/real": -512.66845703125, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/generated": -37.62847137451172, "rewards/margins": 26.89015769958496, "rewards/real": -10.738313674926758, "step": 6000 }, { "epoch": 1.93, "learning_rate": 1.9792906450845037e-07, "logits/generated": 3.347775936126709, "logits/real": 2.269700288772583, "logps/generated": -807.4554443359375, "logps/real": -474.37908935546875, "loss": 0.0165, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -35.2678108215332, "rewards/margins": 25.417163848876953, "rewards/real": -9.850648880004883, "step": 6010 }, { "epoch": 1.93, "learning_rate": 1.9733396810283266e-07, "logits/generated": 3.5969672203063965, "logits/real": 1.9511098861694336, "logps/generated": -747.4946899414062, "logps/real": -505.2703552246094, "loss": 0.0672, "rewards/accuracies": 1.0, "rewards/generated": -32.678009033203125, "rewards/margins": 23.052684783935547, "rewards/real": -9.625326156616211, "step": 6020 }, { "epoch": 1.94, "learning_rate": 1.9673887169721496e-07, "logits/generated": 3.2265143394470215, "logits/real": 2.096703290939331, "logps/generated": -752.9732055664062, "logps/real": -465.1576232910156, "loss": 0.0228, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.793691635131836, "rewards/margins": 23.042184829711914, "rewards/real": -8.751505851745605, "step": 6030 }, { "epoch": 1.94, "learning_rate": 1.9614377529159722e-07, "logits/generated": 3.8023338317871094, "logits/real": 2.3538644313812256, "logps/generated": -798.989013671875, "logps/real": -468.39306640625, "loss": 0.0136, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.19927978515625, "rewards/margins": 25.13361358642578, "rewards/real": -9.065664291381836, "step": 6040 }, { "epoch": 1.94, "learning_rate": 1.9554867888597952e-07, "logits/generated": 3.0939815044403076, "logits/real": 1.9666297435760498, "logps/generated": -749.4322509765625, "logps/real": -481.1255798339844, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/generated": -32.69804000854492, "rewards/margins": 25.424407958984375, "rewards/real": -7.273628234863281, "step": 6050 }, { "epoch": 1.95, "learning_rate": 1.9495358248036181e-07, "logits/generated": 3.34405779838562, "logits/real": 2.3473591804504395, "logps/generated": -797.3914184570312, "logps/real": -506.1500549316406, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/generated": -36.610618591308594, "rewards/margins": 26.47043800354004, "rewards/real": -10.140178680419922, "step": 6060 }, { "epoch": 1.95, "learning_rate": 1.9435848607474408e-07, "logits/generated": 3.341012954711914, "logits/real": 1.7094272375106812, "logps/generated": -771.1280517578125, "logps/real": -500.28271484375, "loss": 0.0528, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -34.969154357910156, "rewards/margins": 25.37281608581543, "rewards/real": -9.596338272094727, "step": 6070 }, { "epoch": 1.95, "learning_rate": 1.937633896691264e-07, "logits/generated": 3.8389554023742676, "logits/real": 2.900240898132324, "logps/generated": -801.2884521484375, "logps/real": -540.925048828125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -36.037132263183594, "rewards/margins": 27.04874038696289, "rewards/real": -8.988397598266602, "step": 6080 }, { "epoch": 1.96, "learning_rate": 1.9316829326350867e-07, "logits/generated": 3.3485310077667236, "logits/real": 1.668752670288086, "logps/generated": -742.7164916992188, "logps/real": -476.8247985839844, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/generated": -31.77897071838379, "rewards/margins": 25.453754425048828, "rewards/real": -6.325215816497803, "step": 6090 }, { "epoch": 1.96, "learning_rate": 1.9257319685789097e-07, "logits/generated": 3.2133305072784424, "logits/real": 2.4366812705993652, "logps/generated": -851.7571411132812, "logps/real": -485.66943359375, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/generated": -35.7568359375, "rewards/margins": 26.238117218017578, "rewards/real": -9.518716812133789, "step": 6100 }, { "epoch": 1.96, "learning_rate": 1.9197810045227326e-07, "logits/generated": 3.559143543243408, "logits/real": 2.334658145904541, "logps/generated": -760.0504150390625, "logps/real": -422.5521545410156, "loss": 0.0309, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.309932708740234, "rewards/margins": 23.871671676635742, "rewards/real": -9.438261985778809, "step": 6110 }, { "epoch": 1.97, "learning_rate": 1.9138300404665556e-07, "logits/generated": 3.540189027786255, "logits/real": 2.2443525791168213, "logps/generated": -789.2877197265625, "logps/real": -477.0284729003906, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/generated": -36.38033676147461, "rewards/margins": 29.3916015625, "rewards/real": -6.988736152648926, "step": 6120 }, { "epoch": 1.97, "learning_rate": 1.9078790764103782e-07, "logits/generated": 3.1156485080718994, "logits/real": 2.145648241043091, "logps/generated": -763.25634765625, "logps/real": -503.68328857421875, "loss": 0.0319, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.2876033782959, "rewards/margins": 21.99011993408203, "rewards/real": -8.2974853515625, "step": 6130 }, { "epoch": 1.97, "learning_rate": 1.9019281123542015e-07, "logits/generated": 2.463261604309082, "logits/real": 1.8599951267242432, "logps/generated": -836.5603637695312, "logps/real": -496.2212829589844, "loss": 0.0224, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -34.29585647583008, "rewards/margins": 25.487403869628906, "rewards/real": -8.808453559875488, "step": 6140 }, { "epoch": 1.98, "learning_rate": 1.8959771482980241e-07, "logits/generated": 3.0138566493988037, "logits/real": 1.5178388357162476, "logps/generated": -814.3992309570312, "logps/real": -464.64892578125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/generated": -33.70328903198242, "rewards/margins": 26.163631439208984, "rewards/real": -7.539660453796387, "step": 6150 }, { "epoch": 1.98, "learning_rate": 1.890026184241847e-07, "logits/generated": 3.052659511566162, "logits/real": 1.983630895614624, "logps/generated": -861.5380859375, "logps/real": -509.36865234375, "loss": 0.0272, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -35.899269104003906, "rewards/margins": 27.742517471313477, "rewards/real": -8.15674877166748, "step": 6160 }, { "epoch": 1.98, "learning_rate": 1.88407522018567e-07, "logits/generated": 3.517216444015503, "logits/real": 1.921411156654358, "logps/generated": -772.3025512695312, "logps/real": -461.76861572265625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/generated": -31.825847625732422, "rewards/margins": 25.28184700012207, "rewards/real": -6.543998718261719, "step": 6170 }, { "epoch": 1.99, "learning_rate": 1.878124256129493e-07, "logits/generated": 3.129927635192871, "logits/real": 1.8396860361099243, "logps/generated": -690.7073974609375, "logps/real": -431.8116149902344, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/generated": -29.615066528320312, "rewards/margins": 22.658132553100586, "rewards/real": -6.956935882568359, "step": 6180 }, { "epoch": 1.99, "learning_rate": 1.8721732920733157e-07, "logits/generated": 2.79906964302063, "logits/real": 1.612360954284668, "logps/generated": -728.2341918945312, "logps/real": -477.59619140625, "loss": 0.0213, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.16036033630371, "rewards/margins": 24.525720596313477, "rewards/real": -5.634638786315918, "step": 6190 }, { "epoch": 1.99, "learning_rate": 1.866222328017139e-07, "logits/generated": 3.5017170906066895, "logits/real": 1.345957636833191, "logps/generated": -695.3180541992188, "logps/real": -476.79437255859375, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/generated": -33.25951385498047, "rewards/margins": 26.6379337310791, "rewards/real": -6.621582984924316, "step": 6200 }, { "epoch": 2.0, "learning_rate": 1.8602713639609616e-07, "logits/generated": 2.4704718589782715, "logits/real": 1.207629919052124, "logps/generated": -699.0107421875, "logps/real": -435.38232421875, "loss": 0.0382, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.03241539001465, "rewards/margins": 24.147058486938477, "rewards/real": -5.8853583335876465, "step": 6210 }, { "epoch": 2.0, "learning_rate": 1.8543203999047845e-07, "logits/generated": 3.3870880603790283, "logits/real": 1.8318264484405518, "logps/generated": -667.4065551757812, "logps/real": -424.3517150878906, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/generated": -26.48272132873535, "rewards/margins": 20.73587989807129, "rewards/real": -5.746842384338379, "step": 6220 }, { "epoch": 2.0, "learning_rate": 1.8483694358486075e-07, "logits/generated": 3.07029128074646, "logits/real": 1.7169042825698853, "logps/generated": -768.1024169921875, "logps/real": -439.9070739746094, "loss": 0.0094, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.02583694458008, "rewards/margins": 28.658138275146484, "rewards/real": -5.367697715759277, "step": 6230 }, { "epoch": 2.01, "learning_rate": 1.8424184717924304e-07, "logits/generated": 3.3821499347686768, "logits/real": 1.8697267770767212, "logps/generated": -688.635009765625, "logps/real": -447.0469665527344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -29.846878051757812, "rewards/margins": 24.339887619018555, "rewards/real": -5.506989479064941, "step": 6240 }, { "epoch": 2.01, "learning_rate": 1.836467507736253e-07, "logits/generated": 3.2087948322296143, "logits/real": 1.9404966831207275, "logps/generated": -708.5045166015625, "logps/real": -459.5628356933594, "loss": 0.0217, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.491397857666016, "rewards/margins": 23.212505340576172, "rewards/real": -6.278890609741211, "step": 6250 }, { "epoch": 2.01, "learning_rate": 1.830516543680076e-07, "logits/generated": 3.5987801551818848, "logits/real": 2.005876064300537, "logps/generated": -755.4384155273438, "logps/real": -486.30816650390625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -30.79571533203125, "rewards/margins": 25.197860717773438, "rewards/real": -5.597853183746338, "step": 6260 }, { "epoch": 2.01, "learning_rate": 1.824565579623899e-07, "logits/generated": 2.6892330646514893, "logits/real": 1.5897356271743774, "logps/generated": -729.8096923828125, "logps/real": -474.1394958496094, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -29.81118392944336, "rewards/margins": 23.526775360107422, "rewards/real": -6.284409523010254, "step": 6270 }, { "epoch": 2.02, "learning_rate": 1.8186146155677217e-07, "logits/generated": 3.1169962882995605, "logits/real": 1.9156696796417236, "logps/generated": -693.0061645507812, "logps/real": -402.56756591796875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -30.740665435791016, "rewards/margins": 25.9101505279541, "rewards/real": -4.830517768859863, "step": 6280 }, { "epoch": 2.02, "learning_rate": 1.812663651511545e-07, "logits/generated": 3.193457841873169, "logits/real": 2.040435314178467, "logps/generated": -807.4782104492188, "logps/real": -469.2937927246094, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -32.69962692260742, "rewards/margins": 27.740413665771484, "rewards/real": -4.959216117858887, "step": 6290 }, { "epoch": 2.02, "learning_rate": 1.8067126874553676e-07, "logits/generated": 2.6828558444976807, "logits/real": 1.6642811298370361, "logps/generated": -776.3592529296875, "logps/real": -480.539794921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -31.057636260986328, "rewards/margins": 25.11455726623535, "rewards/real": -5.943079948425293, "step": 6300 }, { "epoch": 2.03, "learning_rate": 1.8007617233991905e-07, "logits/generated": 3.338327407836914, "logits/real": 1.9675556421279907, "logps/generated": -804.360595703125, "logps/real": -491.82733154296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -33.61079025268555, "rewards/margins": 27.75766944885254, "rewards/real": -5.853122234344482, "step": 6310 }, { "epoch": 2.03, "learning_rate": 1.7948107593430135e-07, "logits/generated": 3.1016745567321777, "logits/real": 1.7637602090835571, "logps/generated": -704.31201171875, "logps/real": -430.8287048339844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -32.20759582519531, "rewards/margins": 26.00567626953125, "rewards/real": -6.201918601989746, "step": 6320 }, { "epoch": 2.03, "learning_rate": 1.7888597952868364e-07, "logits/generated": 2.9748806953430176, "logits/real": 1.8464667797088623, "logps/generated": -729.1771850585938, "logps/real": -445.2801818847656, "loss": 0.0067, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.5558967590332, "rewards/margins": 25.59171485900879, "rewards/real": -6.9641828536987305, "step": 6330 }, { "epoch": 2.04, "learning_rate": 1.782908831230659e-07, "logits/generated": 3.177793502807617, "logits/real": 2.0086519718170166, "logps/generated": -741.6273193359375, "logps/real": -435.7503356933594, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -35.483524322509766, "rewards/margins": 27.497705459594727, "rewards/real": -7.985817909240723, "step": 6340 }, { "epoch": 2.04, "learning_rate": 1.7769578671744823e-07, "logits/generated": 3.133960008621216, "logits/real": 2.067500352859497, "logps/generated": -702.2384033203125, "logps/real": -470.1734924316406, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -30.85849380493164, "rewards/margins": 23.97761344909668, "rewards/real": -6.880878448486328, "step": 6350 }, { "epoch": 2.04, "learning_rate": 1.771006903118305e-07, "logits/generated": 3.531920909881592, "logits/real": 1.9516847133636475, "logps/generated": -666.6228637695312, "logps/real": -449.03369140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -30.33913230895996, "rewards/margins": 23.271108627319336, "rewards/real": -7.06802225112915, "step": 6360 }, { "epoch": 2.05, "learning_rate": 1.765055939062128e-07, "logits/generated": 3.029170513153076, "logits/real": 2.236736536026001, "logps/generated": -811.6734619140625, "logps/real": -434.297119140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -35.372764587402344, "rewards/margins": 27.47579574584961, "rewards/real": -7.89696741104126, "step": 6370 }, { "epoch": 2.05, "learning_rate": 1.759104975005951e-07, "logits/generated": 3.5796592235565186, "logits/real": 2.2333364486694336, "logps/generated": -735.351806640625, "logps/real": -482.19573974609375, "loss": 0.0041, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.26863098144531, "rewards/margins": 26.08734703063965, "rewards/real": -7.181282997131348, "step": 6380 }, { "epoch": 2.05, "learning_rate": 1.7531540109497738e-07, "logits/generated": 3.0666661262512207, "logits/real": 1.8722283840179443, "logps/generated": -745.4119873046875, "logps/real": -501.99420166015625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -31.516876220703125, "rewards/margins": 26.70094871520996, "rewards/real": -4.8159260749816895, "step": 6390 }, { "epoch": 2.06, "learning_rate": 1.7472030468935965e-07, "logits/generated": 3.0296640396118164, "logits/real": 1.7486279010772705, "logps/generated": -774.6638793945312, "logps/real": -406.6872253417969, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -36.22615051269531, "rewards/margins": 29.07309913635254, "rewards/real": -7.153050422668457, "step": 6400 }, { "epoch": 2.06, "learning_rate": 1.7412520828374197e-07, "logits/generated": 3.21099591255188, "logits/real": 1.978674292564392, "logps/generated": -739.1716918945312, "logps/real": -431.38983154296875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -32.394100189208984, "rewards/margins": 24.6510009765625, "rewards/real": -7.743099212646484, "step": 6410 }, { "epoch": 2.06, "learning_rate": 1.7353011187812424e-07, "logits/generated": 3.942091464996338, "logits/real": 2.311521053314209, "logps/generated": -651.0306396484375, "logps/real": -418.48797607421875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -29.951709747314453, "rewards/margins": 22.43353843688965, "rewards/real": -7.5181708335876465, "step": 6420 }, { "epoch": 2.07, "learning_rate": 1.7293501547250656e-07, "logits/generated": 3.4128024578094482, "logits/real": 1.9964441061019897, "logps/generated": -870.8810424804688, "logps/real": -453.6979064941406, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -37.46969223022461, "rewards/margins": 30.21625328063965, "rewards/real": -7.253439426422119, "step": 6430 }, { "epoch": 2.07, "learning_rate": 1.7233991906688883e-07, "logits/generated": 3.1276350021362305, "logits/real": 2.3425166606903076, "logps/generated": -765.7840576171875, "logps/real": -464.552490234375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -33.910152435302734, "rewards/margins": 26.4287052154541, "rewards/real": -7.481447696685791, "step": 6440 }, { "epoch": 2.07, "learning_rate": 1.717448226612711e-07, "logits/generated": 3.6584267616271973, "logits/real": 2.6113901138305664, "logps/generated": -756.5220336914062, "logps/real": -439.65460205078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -34.312252044677734, "rewards/margins": 27.10641098022461, "rewards/real": -7.205840110778809, "step": 6450 }, { "epoch": 2.08, "learning_rate": 1.7114972625565342e-07, "logits/generated": 3.421060085296631, "logits/real": 1.9909251928329468, "logps/generated": -818.8541259765625, "logps/real": -536.1509399414062, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -32.93922424316406, "rewards/margins": 25.9167537689209, "rewards/real": -7.022474765777588, "step": 6460 }, { "epoch": 2.08, "learning_rate": 1.705546298500357e-07, "logits/generated": 3.4344940185546875, "logits/real": 2.227994680404663, "logps/generated": -774.3441772460938, "logps/real": -504.506591796875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -31.716869354248047, "rewards/margins": 25.101083755493164, "rewards/real": -6.615784645080566, "step": 6470 }, { "epoch": 2.08, "learning_rate": 1.6995953344441799e-07, "logits/generated": 3.3116958141326904, "logits/real": 2.0009453296661377, "logps/generated": -763.6702270507812, "logps/real": -483.4938049316406, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -32.51554489135742, "rewards/margins": 25.449382781982422, "rewards/real": -7.066157341003418, "step": 6480 }, { "epoch": 2.09, "learning_rate": 1.6936443703880028e-07, "logits/generated": 3.2536113262176514, "logits/real": 2.249297618865967, "logps/generated": -750.3321533203125, "logps/real": -482.439208984375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -29.747817993164062, "rewards/margins": 23.75244903564453, "rewards/real": -5.995371341705322, "step": 6490 }, { "epoch": 2.09, "learning_rate": 1.6876934063318258e-07, "logits/generated": 3.509892225265503, "logits/real": 2.257246494293213, "logps/generated": -740.7205810546875, "logps/real": -525.8812255859375, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/generated": -33.42816925048828, "rewards/margins": 25.562885284423828, "rewards/real": -7.865290641784668, "step": 6500 }, { "epoch": 2.09, "learning_rate": 1.6817424422756484e-07, "logits/generated": 3.8845996856689453, "logits/real": 2.39617919921875, "logps/generated": -774.9995727539062, "logps/real": -477.24822998046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -36.98552703857422, "rewards/margins": 29.352413177490234, "rewards/real": -7.633110046386719, "step": 6510 }, { "epoch": 2.1, "learning_rate": 1.6757914782194717e-07, "logits/generated": 3.5948615074157715, "logits/real": 2.176312208175659, "logps/generated": -725.4068603515625, "logps/real": -458.8091735839844, "loss": 0.008, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.527379989624023, "rewards/margins": 24.933658599853516, "rewards/real": -6.593724250793457, "step": 6520 }, { "epoch": 2.1, "learning_rate": 1.6698405141632943e-07, "logits/generated": 3.4443843364715576, "logits/real": 2.0957226753234863, "logps/generated": -730.2969970703125, "logps/real": -486.7123107910156, "loss": 0.0061, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.2806339263916, "rewards/margins": 24.06241226196289, "rewards/real": -6.218218803405762, "step": 6530 }, { "epoch": 2.1, "learning_rate": 1.6638895501071173e-07, "logits/generated": 3.536531448364258, "logits/real": 2.1532483100891113, "logps/generated": -771.5786743164062, "logps/real": -479.9591369628906, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/generated": -36.14640426635742, "rewards/margins": 27.942331314086914, "rewards/real": -8.204072952270508, "step": 6540 }, { "epoch": 2.1, "learning_rate": 1.6579385860509402e-07, "logits/generated": 3.972900867462158, "logits/real": 2.893733024597168, "logps/generated": -833.9752197265625, "logps/real": -479.9151916503906, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -39.31206512451172, "rewards/margins": 30.116342544555664, "rewards/real": -9.195723533630371, "step": 6550 }, { "epoch": 2.11, "learning_rate": 1.6519876219947632e-07, "logits/generated": 3.5084547996520996, "logits/real": 2.637056350708008, "logps/generated": -837.7586669921875, "logps/real": -470.548583984375, "loss": 0.0182, "rewards/accuracies": 0.987500011920929, "rewards/generated": -37.11254119873047, "rewards/margins": 28.333038330078125, "rewards/real": -8.779504776000977, "step": 6560 }, { "epoch": 2.11, "learning_rate": 1.6460366579385859e-07, "logits/generated": 3.692127227783203, "logits/real": 2.6864566802978516, "logps/generated": -805.2144775390625, "logps/real": -419.8226623535156, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -39.286216735839844, "rewards/margins": 31.407846450805664, "rewards/real": -7.878371238708496, "step": 6570 }, { "epoch": 2.11, "learning_rate": 1.640085693882409e-07, "logits/generated": 3.6602516174316406, "logits/real": 2.5245766639709473, "logps/generated": -946.4542236328125, "logps/real": -555.8132934570312, "loss": 0.0139, "rewards/accuracies": 0.987500011920929, "rewards/generated": -44.212867736816406, "rewards/margins": 33.743980407714844, "rewards/real": -10.46888542175293, "step": 6580 }, { "epoch": 2.12, "learning_rate": 1.6341347298262318e-07, "logits/generated": 3.7536563873291016, "logits/real": 2.159757614135742, "logps/generated": -823.7515869140625, "logps/real": -496.1431579589844, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/generated": -39.68825912475586, "rewards/margins": 30.009510040283203, "rewards/real": -9.67874813079834, "step": 6590 }, { "epoch": 2.12, "learning_rate": 1.6281837657700547e-07, "logits/generated": 3.707986354827881, "logits/real": 2.2052059173583984, "logps/generated": -878.4490356445312, "logps/real": -460.3269958496094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -40.774574279785156, "rewards/margins": 32.6356201171875, "rewards/real": -8.138957023620605, "step": 6600 }, { "epoch": 2.12, "learning_rate": 1.6222328017138777e-07, "logits/generated": 3.541447401046753, "logits/real": 2.353390693664551, "logps/generated": -802.6497802734375, "logps/real": -469.12255859375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -39.103675842285156, "rewards/margins": 30.940677642822266, "rewards/real": -8.162993431091309, "step": 6610 }, { "epoch": 2.13, "learning_rate": 1.6162818376577006e-07, "logits/generated": 3.4416019916534424, "logits/real": 2.4427173137664795, "logps/generated": -798.640380859375, "logps/real": -520.0285034179688, "loss": 0.0706, "rewards/accuracies": 1.0, "rewards/generated": -36.18897247314453, "rewards/margins": 28.493976593017578, "rewards/real": -7.694993019104004, "step": 6620 }, { "epoch": 2.13, "learning_rate": 1.6103308736015233e-07, "logits/generated": 3.1433939933776855, "logits/real": 1.9659850597381592, "logps/generated": -692.6221923828125, "logps/real": -444.29034423828125, "loss": 0.0043, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.389507293701172, "rewards/margins": 24.271543502807617, "rewards/real": -6.117964267730713, "step": 6630 }, { "epoch": 2.13, "learning_rate": 1.6043799095453465e-07, "logits/generated": 2.9152379035949707, "logits/real": 1.9543044567108154, "logps/generated": -712.6101684570312, "logps/real": -374.736328125, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/generated": -31.805017471313477, "rewards/margins": 27.639978408813477, "rewards/real": -4.165036201477051, "step": 6640 }, { "epoch": 2.14, "learning_rate": 1.5984289454891692e-07, "logits/generated": 3.1087963581085205, "logits/real": 1.4458690881729126, "logps/generated": -653.6477661132812, "logps/real": -434.3851623535156, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/generated": -28.969432830810547, "rewards/margins": 24.76437759399414, "rewards/real": -4.205053806304932, "step": 6650 }, { "epoch": 2.14, "learning_rate": 1.592477981432992e-07, "logits/generated": 2.782691478729248, "logits/real": 1.5272406339645386, "logps/generated": -670.6622314453125, "logps/real": -415.12872314453125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -25.6741943359375, "rewards/margins": 21.860679626464844, "rewards/real": -3.8135147094726562, "step": 6660 }, { "epoch": 2.14, "learning_rate": 1.586527017376815e-07, "logits/generated": 2.5433123111724854, "logits/real": 0.8736278414726257, "logps/generated": -758.8399658203125, "logps/real": -398.16473388671875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -31.524974822998047, "rewards/margins": 28.4589786529541, "rewards/real": -3.0659985542297363, "step": 6670 }, { "epoch": 2.15, "learning_rate": 1.5805760533206378e-07, "logits/generated": 2.982088327407837, "logits/real": 1.780231237411499, "logps/generated": -764.5460815429688, "logps/real": -417.8609313964844, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/generated": -30.573827743530273, "rewards/margins": 25.998668670654297, "rewards/real": -4.575158596038818, "step": 6680 }, { "epoch": 2.15, "learning_rate": 1.5746250892644607e-07, "logits/generated": 2.6655311584472656, "logits/real": 1.267910361289978, "logps/generated": -713.3572998046875, "logps/real": -472.82275390625, "loss": 0.0048, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.80756187438965, "rewards/margins": 22.294097900390625, "rewards/real": -5.513462543487549, "step": 6690 }, { "epoch": 2.15, "learning_rate": 1.5686741252082837e-07, "logits/generated": 3.0424792766571045, "logits/real": 1.3012491464614868, "logps/generated": -758.1329956054688, "logps/real": -483.0899353027344, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/generated": -31.085933685302734, "rewards/margins": 26.114904403686523, "rewards/real": -4.971026420593262, "step": 6700 }, { "epoch": 2.16, "learning_rate": 1.5627231611521066e-07, "logits/generated": 2.8525235652923584, "logits/real": 1.7656253576278687, "logps/generated": -781.4427490234375, "logps/real": -510.42742919921875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/generated": -30.134618759155273, "rewards/margins": 24.16891860961914, "rewards/real": -5.965699672698975, "step": 6710 }, { "epoch": 2.16, "learning_rate": 1.5567721970959293e-07, "logits/generated": 3.188775062561035, "logits/real": 1.6213468313217163, "logps/generated": -762.4727783203125, "logps/real": -485.2328186035156, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.72724151611328, "rewards/margins": 28.589534759521484, "rewards/real": -6.137705326080322, "step": 6720 }, { "epoch": 2.16, "learning_rate": 1.5508212330397525e-07, "logits/generated": 2.9089066982269287, "logits/real": 1.407815933227539, "logps/generated": -763.925537109375, "logps/real": -474.84967041015625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/generated": -32.17937088012695, "rewards/margins": 25.163753509521484, "rewards/real": -7.015621185302734, "step": 6730 }, { "epoch": 2.17, "learning_rate": 1.5448702689835752e-07, "logits/generated": 3.5764732360839844, "logits/real": 1.9514563083648682, "logps/generated": -979.783203125, "logps/real": -481.804443359375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -45.18765640258789, "rewards/margins": 35.2607307434082, "rewards/real": -9.92692756652832, "step": 6740 }, { "epoch": 2.17, "learning_rate": 1.5389193049273981e-07, "logits/generated": 3.6203644275665283, "logits/real": 1.8205022811889648, "logps/generated": -843.4395751953125, "logps/real": -467.07958984375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -41.38634490966797, "rewards/margins": 33.26428985595703, "rewards/real": -8.122050285339355, "step": 6750 }, { "epoch": 2.17, "learning_rate": 1.532968340871221e-07, "logits/generated": 2.9757542610168457, "logits/real": 2.0479233264923096, "logps/generated": -754.7349243164062, "logps/real": -488.94561767578125, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.361976623535156, "rewards/margins": 25.936023712158203, "rewards/real": -8.425954818725586, "step": 6760 }, { "epoch": 2.18, "learning_rate": 1.527017376815044e-07, "logits/generated": 3.0736441612243652, "logits/real": 2.252469301223755, "logps/generated": -820.4495849609375, "logps/real": -509.2919006347656, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -36.53960418701172, "rewards/margins": 26.953048706054688, "rewards/real": -9.586559295654297, "step": 6770 }, { "epoch": 2.18, "learning_rate": 1.5210664127588667e-07, "logits/generated": 3.0741493701934814, "logits/real": 2.1354005336761475, "logps/generated": -831.7189331054688, "logps/real": -512.5988159179688, "loss": 0.0342, "rewards/accuracies": 0.987500011920929, "rewards/generated": -36.531272888183594, "rewards/margins": 27.779932022094727, "rewards/real": -8.751340866088867, "step": 6780 }, { "epoch": 2.18, "learning_rate": 1.51511544870269e-07, "logits/generated": 2.87150502204895, "logits/real": 1.7805006504058838, "logps/generated": -759.3827514648438, "logps/real": -459.1974182128906, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -33.29137420654297, "rewards/margins": 25.51667594909668, "rewards/real": -7.774700164794922, "step": 6790 }, { "epoch": 2.19, "learning_rate": 1.5091644846465126e-07, "logits/generated": 2.9630839824676514, "logits/real": 2.090167999267578, "logps/generated": -731.0892333984375, "logps/real": -428.4425354003906, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -32.39105224609375, "rewards/margins": 25.993234634399414, "rewards/real": -6.397812843322754, "step": 6800 }, { "epoch": 2.19, "learning_rate": 1.5032135205903356e-07, "logits/generated": 3.222738265991211, "logits/real": 1.7744882106781006, "logps/generated": -760.7310180664062, "logps/real": -422.6011657714844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -33.521156311035156, "rewards/margins": 26.281503677368164, "rewards/real": -7.239654541015625, "step": 6810 }, { "epoch": 2.19, "learning_rate": 1.4972625565341585e-07, "logits/generated": 2.744563102722168, "logits/real": 1.5349392890930176, "logps/generated": -734.73095703125, "logps/real": -475.14691162109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -31.505443572998047, "rewards/margins": 23.36994171142578, "rewards/real": -8.135502815246582, "step": 6820 }, { "epoch": 2.19, "learning_rate": 1.4913115924779815e-07, "logits/generated": 2.9873857498168945, "logits/real": 1.9933503866195679, "logps/generated": -695.9105224609375, "logps/real": -467.68035888671875, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/generated": -29.649166107177734, "rewards/margins": 22.82960319519043, "rewards/real": -6.819563388824463, "step": 6830 }, { "epoch": 2.2, "learning_rate": 1.4853606284218042e-07, "logits/generated": 2.964663028717041, "logits/real": 1.3203465938568115, "logps/generated": -743.5252685546875, "logps/real": -423.99188232421875, "loss": 0.0157, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.895681381225586, "rewards/margins": 24.211328506469727, "rewards/real": -6.684350490570068, "step": 6840 }, { "epoch": 2.2, "learning_rate": 1.479409664365627e-07, "logits/generated": 3.1455674171447754, "logits/real": 1.7021219730377197, "logps/generated": -723.40283203125, "logps/real": -460.1839294433594, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -32.17972946166992, "rewards/margins": 25.026466369628906, "rewards/real": -7.153264045715332, "step": 6850 }, { "epoch": 2.2, "learning_rate": 1.47345870030945e-07, "logits/generated": 2.8925163745880127, "logits/real": 1.152197241783142, "logps/generated": -741.8182373046875, "logps/real": -459.43255615234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -32.202430725097656, "rewards/margins": 25.113248825073242, "rewards/real": -7.0891828536987305, "step": 6860 }, { "epoch": 2.21, "learning_rate": 1.4675077362532727e-07, "logits/generated": 2.961906909942627, "logits/real": 1.5837877988815308, "logps/generated": -740.1246948242188, "logps/real": -427.2061462402344, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -30.971378326416016, "rewards/margins": 24.051876068115234, "rewards/real": -6.919497489929199, "step": 6870 }, { "epoch": 2.21, "learning_rate": 1.461556772197096e-07, "logits/generated": 3.15515398979187, "logits/real": 2.1786811351776123, "logps/generated": -678.2523193359375, "logps/real": -453.1685485839844, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -30.356531143188477, "rewards/margins": 22.28665542602539, "rewards/real": -8.069873809814453, "step": 6880 }, { "epoch": 2.21, "learning_rate": 1.4556058081409186e-07, "logits/generated": 2.4801106452941895, "logits/real": 1.5259133577346802, "logps/generated": -795.98193359375, "logps/real": -445.598876953125, "loss": 0.0072, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -32.838043212890625, "rewards/margins": 26.635839462280273, "rewards/real": -6.202207088470459, "step": 6890 }, { "epoch": 2.22, "learning_rate": 1.4496548440847416e-07, "logits/generated": 3.0790295600891113, "logits/real": 1.9465078115463257, "logps/generated": -787.3807373046875, "logps/real": -435.8995056152344, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -32.44220733642578, "rewards/margins": 24.926931381225586, "rewards/real": -7.515273094177246, "step": 6900 }, { "epoch": 2.22, "learning_rate": 1.4437038800285645e-07, "logits/generated": 2.899871349334717, "logits/real": 1.5646097660064697, "logps/generated": -804.6171875, "logps/real": -462.52069091796875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -33.27408981323242, "rewards/margins": 26.5900936126709, "rewards/real": -6.683995246887207, "step": 6910 }, { "epoch": 2.22, "learning_rate": 1.4377529159723875e-07, "logits/generated": 3.1001930236816406, "logits/real": 1.9517736434936523, "logps/generated": -807.1820068359375, "logps/real": -461.59344482421875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -33.98884201049805, "rewards/margins": 26.3109188079834, "rewards/real": -7.677923679351807, "step": 6920 }, { "epoch": 2.23, "learning_rate": 1.4318019519162102e-07, "logits/generated": 3.415861129760742, "logits/real": 1.6572726964950562, "logps/generated": -696.6417236328125, "logps/real": -468.6304626464844, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -31.29232406616211, "rewards/margins": 23.048959732055664, "rewards/real": -8.243368148803711, "step": 6930 }, { "epoch": 2.23, "learning_rate": 1.4258509878600334e-07, "logits/generated": 3.060891628265381, "logits/real": 2.0257744789123535, "logps/generated": -708.6741333007812, "logps/real": -426.91436767578125, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/generated": -29.888269424438477, "rewards/margins": 24.17989158630371, "rewards/real": -5.708379745483398, "step": 6940 }, { "epoch": 2.23, "learning_rate": 1.419900023803856e-07, "logits/generated": 2.399233341217041, "logits/real": 0.872857391834259, "logps/generated": -690.2432250976562, "logps/real": -408.26641845703125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -29.205242156982422, "rewards/margins": 24.39468002319336, "rewards/real": -4.810557842254639, "step": 6950 }, { "epoch": 2.24, "learning_rate": 1.4139490597476793e-07, "logits/generated": 3.0926308631896973, "logits/real": 1.4446666240692139, "logps/generated": -692.4381713867188, "logps/real": -472.76763916015625, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/generated": -31.363794326782227, "rewards/margins": 24.68940544128418, "rewards/real": -6.674388885498047, "step": 6960 }, { "epoch": 2.24, "learning_rate": 1.407998095691502e-07, "logits/generated": 2.5872902870178223, "logits/real": 1.5146644115447998, "logps/generated": -821.1486206054688, "logps/real": -444.119873046875, "loss": 0.017, "rewards/accuracies": 0.987500011920929, "rewards/generated": -35.32810974121094, "rewards/margins": 29.51620864868164, "rewards/real": -5.811902046203613, "step": 6970 }, { "epoch": 2.24, "learning_rate": 1.402047131635325e-07, "logits/generated": 2.8249990940093994, "logits/real": 1.023376703262329, "logps/generated": -765.0428466796875, "logps/real": -428.3826599121094, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -32.61444091796875, "rewards/margins": 26.848316192626953, "rewards/real": -5.766120433807373, "step": 6980 }, { "epoch": 2.25, "learning_rate": 1.3960961675791479e-07, "logits/generated": 2.2632861137390137, "logits/real": 1.1580618619918823, "logps/generated": -736.8770751953125, "logps/real": -457.27764892578125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -30.05502700805664, "rewards/margins": 23.780330657958984, "rewards/real": -6.274697303771973, "step": 6990 }, { "epoch": 2.25, "learning_rate": 1.3901452035229708e-07, "logits/generated": 3.0902857780456543, "logits/real": 1.1614948511123657, "logps/generated": -689.6188354492188, "logps/real": -441.04937744140625, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -30.000102996826172, "rewards/margins": 24.39971160888672, "rewards/real": -5.600390911102295, "step": 7000 }, { "epoch": 2.25, "learning_rate": 1.3841942394667935e-07, "logits/generated": 2.586245059967041, "logits/real": 0.9895980954170227, "logps/generated": -712.676513671875, "logps/real": -417.57763671875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/generated": -30.325984954833984, "rewards/margins": 23.686946868896484, "rewards/real": -6.639040946960449, "step": 7010 }, { "epoch": 2.26, "learning_rate": 1.3782432754106167e-07, "logits/generated": 2.5772995948791504, "logits/real": 1.5468659400939941, "logps/generated": -799.404052734375, "logps/real": -489.91278076171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -34.551673889160156, "rewards/margins": 26.73357582092285, "rewards/real": -7.818093776702881, "step": 7020 }, { "epoch": 2.26, "learning_rate": 1.3722923113544394e-07, "logits/generated": 3.1107935905456543, "logits/real": 1.1767048835754395, "logps/generated": -739.7095336914062, "logps/real": -488.7715759277344, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -33.4478645324707, "rewards/margins": 26.486454010009766, "rewards/real": -6.961409568786621, "step": 7030 }, { "epoch": 2.26, "learning_rate": 1.3663413472982623e-07, "logits/generated": 2.6594302654266357, "logits/real": 1.3413512706756592, "logps/generated": -758.55224609375, "logps/real": -437.8865661621094, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -34.44856262207031, "rewards/margins": 27.506145477294922, "rewards/real": -6.942414283752441, "step": 7040 }, { "epoch": 2.27, "learning_rate": 1.3603903832420853e-07, "logits/generated": 3.0324738025665283, "logits/real": 1.310829758644104, "logps/generated": -805.4611206054688, "logps/real": -516.8486938476562, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -34.84650421142578, "rewards/margins": 27.294002532958984, "rewards/real": -7.552500247955322, "step": 7050 }, { "epoch": 2.27, "learning_rate": 1.354439419185908e-07, "logits/generated": 2.620281457901001, "logits/real": 1.1966314315795898, "logps/generated": -740.776123046875, "logps/real": -454.8985290527344, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -33.303741455078125, "rewards/margins": 27.1345272064209, "rewards/real": -6.169215679168701, "step": 7060 }, { "epoch": 2.27, "learning_rate": 1.348488455129731e-07, "logits/generated": 2.464813232421875, "logits/real": 0.9124526977539062, "logps/generated": -696.6542358398438, "logps/real": -463.55133056640625, "loss": 0.0036, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.61940574645996, "rewards/margins": 23.6339054107666, "rewards/real": -7.985500335693359, "step": 7070 }, { "epoch": 2.28, "learning_rate": 1.3425374910735539e-07, "logits/generated": 2.452291965484619, "logits/real": 1.0921869277954102, "logps/generated": -841.27197265625, "logps/real": -488.4458923339844, "loss": 0.0057, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.82002639770508, "rewards/margins": 26.540307998657227, "rewards/real": -8.279718399047852, "step": 7080 }, { "epoch": 2.28, "learning_rate": 1.3365865270173768e-07, "logits/generated": 2.807206630706787, "logits/real": 1.3947135210037231, "logps/generated": -782.46728515625, "logps/real": -487.2464904785156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -34.70906448364258, "rewards/margins": 26.87406349182129, "rewards/real": -7.834999084472656, "step": 7090 }, { "epoch": 2.28, "learning_rate": 1.3306355629611995e-07, "logits/generated": 2.7090964317321777, "logits/real": 1.2813708782196045, "logps/generated": -703.2115478515625, "logps/real": -435.378662109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -29.2602481842041, "rewards/margins": 21.91950035095215, "rewards/real": -7.3407464027404785, "step": 7100 }, { "epoch": 2.28, "learning_rate": 1.3246845989050227e-07, "logits/generated": 3.3155906200408936, "logits/real": 1.3356436491012573, "logps/generated": -832.2888793945312, "logps/real": -448.3607482910156, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -37.993247985839844, "rewards/margins": 30.307275772094727, "rewards/real": -7.685971260070801, "step": 7110 }, { "epoch": 2.29, "learning_rate": 1.3187336348488454e-07, "logits/generated": 2.5738959312438965, "logits/real": 1.168304443359375, "logps/generated": -733.6793212890625, "logps/real": -420.4358825683594, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -32.179771423339844, "rewards/margins": 27.08028221130371, "rewards/real": -5.099486351013184, "step": 7120 }, { "epoch": 2.29, "learning_rate": 1.3127826707926683e-07, "logits/generated": 2.67816162109375, "logits/real": 1.3505403995513916, "logps/generated": -775.6461181640625, "logps/real": -439.0838928222656, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/generated": -33.05588912963867, "rewards/margins": 28.1678524017334, "rewards/real": -4.888035297393799, "step": 7130 }, { "epoch": 2.29, "learning_rate": 1.3068317067364913e-07, "logits/generated": 2.9945292472839355, "logits/real": 1.4103082418441772, "logps/generated": -624.4776611328125, "logps/real": -429.5799865722656, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/generated": -27.462692260742188, "rewards/margins": 22.07574462890625, "rewards/real": -5.386946678161621, "step": 7140 }, { "epoch": 2.3, "learning_rate": 1.3008807426803142e-07, "logits/generated": 2.76560640335083, "logits/real": 1.0679657459259033, "logps/generated": -826.3810424804688, "logps/real": -457.3650817871094, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -32.85173416137695, "rewards/margins": 27.399700164794922, "rewards/real": -5.452032566070557, "step": 7150 }, { "epoch": 2.3, "learning_rate": 1.294929778624137e-07, "logits/generated": 3.219749927520752, "logits/real": 1.604093313217163, "logps/generated": -765.2886962890625, "logps/real": -473.2640075683594, "loss": 0.0047, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.7594108581543, "rewards/margins": 26.417593002319336, "rewards/real": -7.341818332672119, "step": 7160 }, { "epoch": 2.3, "learning_rate": 1.2889788145679601e-07, "logits/generated": 3.151139736175537, "logits/real": 1.5980494022369385, "logps/generated": -832.0123901367188, "logps/real": -511.2401428222656, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -35.35246276855469, "rewards/margins": 28.923532485961914, "rewards/real": -6.428927421569824, "step": 7170 }, { "epoch": 2.31, "learning_rate": 1.2830278505117828e-07, "logits/generated": 3.3734428882598877, "logits/real": 1.2419679164886475, "logps/generated": -739.4263305664062, "logps/real": -487.5960388183594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -29.978775024414062, "rewards/margins": 23.334543228149414, "rewards/real": -6.64423131942749, "step": 7180 }, { "epoch": 2.31, "learning_rate": 1.2770768864556058e-07, "logits/generated": 2.8000102043151855, "logits/real": 1.9294319152832031, "logps/generated": -779.2018432617188, "logps/real": -421.3666076660156, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/generated": -33.18901824951172, "rewards/margins": 26.902957916259766, "rewards/real": -6.286064147949219, "step": 7190 }, { "epoch": 2.31, "learning_rate": 1.2711259223994287e-07, "logits/generated": 3.1519150733947754, "logits/real": 1.751089096069336, "logps/generated": -766.7760009765625, "logps/real": -484.00006103515625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -34.094905853271484, "rewards/margins": 25.8779354095459, "rewards/real": -8.216973304748535, "step": 7200 }, { "epoch": 2.32, "learning_rate": 1.2651749583432517e-07, "logits/generated": 3.4888617992401123, "logits/real": 2.2022228240966797, "logps/generated": -727.6364135742188, "logps/real": -476.1726989746094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -33.9564323425293, "rewards/margins": 25.65195083618164, "rewards/real": -8.304486274719238, "step": 7210 }, { "epoch": 2.32, "learning_rate": 1.2592239942870743e-07, "logits/generated": 3.3202128410339355, "logits/real": 1.3672093152999878, "logps/generated": -832.3145751953125, "logps/real": -472.842041015625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -36.88011932373047, "rewards/margins": 29.452173233032227, "rewards/real": -7.427945613861084, "step": 7220 }, { "epoch": 2.32, "learning_rate": 1.2532730302308976e-07, "logits/generated": 3.3410096168518066, "logits/real": 1.7470417022705078, "logps/generated": -666.7941284179688, "logps/real": -472.508056640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/generated": -30.478723526000977, "rewards/margins": 22.41364860534668, "rewards/real": -8.065074920654297, "step": 7230 }, { "epoch": 2.33, "learning_rate": 1.2473220661747202e-07, "logits/generated": 3.183448553085327, "logits/real": 1.3558294773101807, "logps/generated": -770.9962768554688, "logps/real": -515.2557373046875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -34.31395721435547, "rewards/margins": 27.274433135986328, "rewards/real": -7.039522647857666, "step": 7240 }, { "epoch": 2.33, "learning_rate": 1.2413711021185432e-07, "logits/generated": 3.8509552478790283, "logits/real": 1.9763472080230713, "logps/generated": -771.0432739257812, "logps/real": -517.6268920898438, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -34.521018981933594, "rewards/margins": 26.22637367248535, "rewards/real": -8.294641494750977, "step": 7250 }, { "epoch": 2.33, "learning_rate": 1.2354201380623661e-07, "logits/generated": 3.2065505981445312, "logits/real": 2.2779228687286377, "logps/generated": -734.5236206054688, "logps/real": -456.244140625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -33.6418571472168, "rewards/margins": 25.030609130859375, "rewards/real": -8.611248016357422, "step": 7260 }, { "epoch": 2.34, "learning_rate": 1.229469174006189e-07, "logits/generated": 3.5071442127227783, "logits/real": 1.975908637046814, "logps/generated": -771.9376220703125, "logps/real": -502.39959716796875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -33.467506408691406, "rewards/margins": 24.4432430267334, "rewards/real": -9.024258613586426, "step": 7270 }, { "epoch": 2.34, "learning_rate": 1.2235182099500118e-07, "logits/generated": 3.317119598388672, "logits/real": 2.387730360031128, "logps/generated": -678.3218994140625, "logps/real": -442.78790283203125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -32.64667510986328, "rewards/margins": 24.854127883911133, "rewards/real": -7.792550086975098, "step": 7280 }, { "epoch": 2.34, "learning_rate": 1.2175672458938347e-07, "logits/generated": 2.7923922538757324, "logits/real": 1.9158508777618408, "logps/generated": -783.607421875, "logps/real": -456.8174743652344, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/generated": -32.69109344482422, "rewards/margins": 25.652584075927734, "rewards/real": -7.038506984710693, "step": 7290 }, { "epoch": 2.35, "learning_rate": 1.2116162818376577e-07, "logits/generated": 2.7299890518188477, "logits/real": 1.5718133449554443, "logps/generated": -655.5050048828125, "logps/real": -413.37237548828125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/generated": -29.520999908447266, "rewards/margins": 23.0987606048584, "rewards/real": -6.4222412109375, "step": 7300 }, { "epoch": 2.35, "learning_rate": 1.2056653177814806e-07, "logits/generated": 3.4191479682922363, "logits/real": 1.788791298866272, "logps/generated": -799.5596313476562, "logps/real": -463.6944274902344, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -33.976009368896484, "rewards/margins": 28.416900634765625, "rewards/real": -5.559107780456543, "step": 7310 }, { "epoch": 2.35, "learning_rate": 1.1997143537253036e-07, "logits/generated": 3.396096706390381, "logits/real": 1.7464885711669922, "logps/generated": -767.1105346679688, "logps/real": -460.4150390625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -33.3072509765625, "rewards/margins": 28.73329734802246, "rewards/real": -4.5739545822143555, "step": 7320 }, { "epoch": 2.36, "learning_rate": 1.1937633896691265e-07, "logits/generated": 3.047201156616211, "logits/real": 1.3891384601593018, "logps/generated": -728.8016357421875, "logps/real": -404.3458557128906, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -32.675262451171875, "rewards/margins": 27.848257064819336, "rewards/real": -4.8270039558410645, "step": 7330 }, { "epoch": 2.36, "learning_rate": 1.1878124256129493e-07, "logits/generated": 3.1526782512664795, "logits/real": 1.9908387660980225, "logps/generated": -768.6041870117188, "logps/real": -471.05078125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -35.2022705078125, "rewards/margins": 27.991321563720703, "rewards/real": -7.210949897766113, "step": 7340 }, { "epoch": 2.36, "learning_rate": 1.181861461556772e-07, "logits/generated": 3.47533917427063, "logits/real": 1.691488265991211, "logps/generated": -761.0567626953125, "logps/real": -515.1283569335938, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -32.1240348815918, "rewards/margins": 25.822696685791016, "rewards/real": -6.301340579986572, "step": 7350 }, { "epoch": 2.37, "learning_rate": 1.175910497500595e-07, "logits/generated": 3.005817413330078, "logits/real": 1.1787437200546265, "logps/generated": -766.0821533203125, "logps/real": -480.853515625, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/generated": -34.102684020996094, "rewards/margins": 27.207138061523438, "rewards/real": -6.89554500579834, "step": 7360 }, { "epoch": 2.37, "learning_rate": 1.1699595334444179e-07, "logits/generated": 2.9544918537139893, "logits/real": 1.1928799152374268, "logps/generated": -780.8756103515625, "logps/real": -501.836181640625, "loss": 0.0046, "rewards/accuracies": 0.987500011920929, "rewards/generated": -36.68290710449219, "rewards/margins": 28.39774513244629, "rewards/real": -8.285164833068848, "step": 7370 }, { "epoch": 2.37, "learning_rate": 1.1640085693882409e-07, "logits/generated": 2.927870750427246, "logits/real": 1.1816669702529907, "logps/generated": -872.4822387695312, "logps/real": -528.32373046875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -39.08098602294922, "rewards/margins": 32.24845504760742, "rewards/real": -6.8325300216674805, "step": 7380 }, { "epoch": 2.37, "learning_rate": 1.1580576053320637e-07, "logits/generated": 2.702702522277832, "logits/real": 1.1594092845916748, "logps/generated": -796.3984375, "logps/real": -523.8278198242188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -35.30531692504883, "rewards/margins": 27.286239624023438, "rewards/real": -8.019079208374023, "step": 7390 }, { "epoch": 2.38, "learning_rate": 1.1521066412758866e-07, "logits/generated": 2.818573474884033, "logits/real": 1.2178303003311157, "logps/generated": -715.4994506835938, "logps/real": -477.83551025390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -31.734912872314453, "rewards/margins": 24.450969696044922, "rewards/real": -7.283943176269531, "step": 7400 }, { "epoch": 2.38, "learning_rate": 1.1461556772197096e-07, "logits/generated": 2.7181813716888428, "logits/real": 1.2402232885360718, "logps/generated": -807.756591796875, "logps/real": -486.3701171875, "loss": 0.0084, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.50809860229492, "rewards/margins": 24.656108856201172, "rewards/real": -8.85198974609375, "step": 7410 }, { "epoch": 2.38, "learning_rate": 1.1402047131635324e-07, "logits/generated": 2.747448205947876, "logits/real": 1.0721694231033325, "logps/generated": -796.5250244140625, "logps/real": -434.9480895996094, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -38.2369270324707, "rewards/margins": 29.642135620117188, "rewards/real": -8.5947904586792, "step": 7420 }, { "epoch": 2.39, "learning_rate": 1.1342537491073553e-07, "logits/generated": 3.317473888397217, "logits/real": 1.3040592670440674, "logps/generated": -764.7445678710938, "logps/real": -507.01226806640625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -34.16574478149414, "rewards/margins": 25.690637588500977, "rewards/real": -8.475104331970215, "step": 7430 }, { "epoch": 2.39, "learning_rate": 1.1283027850511783e-07, "logits/generated": 2.859200954437256, "logits/real": 1.4399030208587646, "logps/generated": -721.3890380859375, "logps/real": -432.20263671875, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/generated": -33.457698822021484, "rewards/margins": 24.963918685913086, "rewards/real": -8.493776321411133, "step": 7440 }, { "epoch": 2.39, "learning_rate": 1.1223518209950011e-07, "logits/generated": 2.468221664428711, "logits/real": 1.2090656757354736, "logps/generated": -770.1082763671875, "logps/real": -455.5987243652344, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -33.903175354003906, "rewards/margins": 26.92715072631836, "rewards/real": -6.976022243499756, "step": 7450 }, { "epoch": 2.4, "learning_rate": 1.116400856938824e-07, "logits/generated": 2.4611763954162598, "logits/real": 0.8761798143386841, "logps/generated": -747.6172485351562, "logps/real": -466.01470947265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -32.50324249267578, "rewards/margins": 25.81982421875, "rewards/real": -6.6834211349487305, "step": 7460 }, { "epoch": 2.4, "learning_rate": 1.110449892882647e-07, "logits/generated": 2.667145252227783, "logits/real": 1.2940479516983032, "logps/generated": -713.1707153320312, "logps/real": -445.208740234375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/generated": -31.926921844482422, "rewards/margins": 25.238840103149414, "rewards/real": -6.68808126449585, "step": 7470 }, { "epoch": 2.4, "learning_rate": 1.1044989288264698e-07, "logits/generated": 3.0308682918548584, "logits/real": 1.3372796773910522, "logps/generated": -860.1969604492188, "logps/real": -501.64501953125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -35.971717834472656, "rewards/margins": 28.487728118896484, "rewards/real": -7.483983516693115, "step": 7480 }, { "epoch": 2.41, "learning_rate": 1.0985479647702928e-07, "logits/generated": 3.2643141746520996, "logits/real": 1.4599339962005615, "logps/generated": -778.430419921875, "logps/real": -539.1873779296875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -35.98785400390625, "rewards/margins": 28.409305572509766, "rewards/real": -7.57855224609375, "step": 7490 }, { "epoch": 2.41, "learning_rate": 1.0925970007141157e-07, "logits/generated": 3.239894151687622, "logits/real": 1.293896198272705, "logps/generated": -800.9592895507812, "logps/real": -517.4242553710938, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -35.605045318603516, "rewards/margins": 27.84867286682129, "rewards/real": -7.756373405456543, "step": 7500 }, { "epoch": 2.41, "learning_rate": 1.0866460366579385e-07, "logits/generated": 3.154141664505005, "logits/real": 1.6751811504364014, "logps/generated": -716.1446533203125, "logps/real": -438.0457458496094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -32.26148986816406, "rewards/margins": 24.838245391845703, "rewards/real": -7.423245429992676, "step": 7510 }, { "epoch": 2.42, "learning_rate": 1.0806950726017615e-07, "logits/generated": 3.3375821113586426, "logits/real": 1.4001529216766357, "logps/generated": -719.6033935546875, "logps/real": -454.2464904785156, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -34.63445281982422, "rewards/margins": 28.429996490478516, "rewards/real": -6.204458713531494, "step": 7520 }, { "epoch": 2.42, "learning_rate": 1.0747441085455844e-07, "logits/generated": 2.714874029159546, "logits/real": 1.3237279653549194, "logps/generated": -726.1779174804688, "logps/real": -420.90625, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.34538269042969, "rewards/margins": 26.279022216796875, "rewards/real": -6.0663628578186035, "step": 7530 }, { "epoch": 2.42, "learning_rate": 1.0687931444894072e-07, "logits/generated": 2.405118465423584, "logits/real": 1.00875723361969, "logps/generated": -778.2654418945312, "logps/real": -474.66485595703125, "loss": 0.0075, "rewards/accuracies": 0.987500011920929, "rewards/generated": -35.29490280151367, "rewards/margins": 28.0728759765625, "rewards/real": -7.222027778625488, "step": 7540 }, { "epoch": 2.43, "learning_rate": 1.06284218043323e-07, "logits/generated": 2.638481855392456, "logits/real": 0.44862380623817444, "logps/generated": -756.4628295898438, "logps/real": -459.02001953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -33.91370391845703, "rewards/margins": 26.11090660095215, "rewards/real": -7.802797794342041, "step": 7550 }, { "epoch": 2.43, "learning_rate": 1.056891216377053e-07, "logits/generated": 2.909228801727295, "logits/real": 0.9783293008804321, "logps/generated": -712.5177612304688, "logps/real": -432.32293701171875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/generated": -32.577003479003906, "rewards/margins": 25.266740798950195, "rewards/real": -7.310264587402344, "step": 7560 }, { "epoch": 2.43, "learning_rate": 1.0509402523208758e-07, "logits/generated": 2.7182772159576416, "logits/real": 0.5386302471160889, "logps/generated": -765.7089233398438, "logps/real": -452.9603576660156, "loss": 0.0025, "rewards/accuracies": 0.987500011920929, "rewards/generated": -35.822235107421875, "rewards/margins": 29.205032348632812, "rewards/real": -6.617201805114746, "step": 7570 }, { "epoch": 2.44, "learning_rate": 1.0449892882646988e-07, "logits/generated": 2.4969494342803955, "logits/real": 0.5392976999282837, "logps/generated": -805.8818969726562, "logps/real": -488.0843200683594, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -35.3496208190918, "rewards/margins": 27.54764175415039, "rewards/real": -7.8019819259643555, "step": 7580 }, { "epoch": 2.44, "learning_rate": 1.0390383242085217e-07, "logits/generated": 2.7933075428009033, "logits/real": 1.172039270401001, "logps/generated": -726.2203369140625, "logps/real": -484.22216796875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -33.49732208251953, "rewards/margins": 25.451711654663086, "rewards/real": -8.045612335205078, "step": 7590 }, { "epoch": 2.44, "learning_rate": 1.0330873601523445e-07, "logits/generated": 2.802065134048462, "logits/real": 0.8606332540512085, "logps/generated": -687.5404052734375, "logps/real": -448.2220153808594, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/generated": -31.423603057861328, "rewards/margins": 24.442333221435547, "rewards/real": -6.981271266937256, "step": 7600 }, { "epoch": 2.45, "learning_rate": 1.0271363960961675e-07, "logits/generated": 3.1056323051452637, "logits/real": 1.0243109464645386, "logps/generated": -775.0807495117188, "logps/real": -469.50189208984375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -36.81755828857422, "rewards/margins": 29.04024887084961, "rewards/real": -7.777310371398926, "step": 7610 }, { "epoch": 2.45, "learning_rate": 1.0211854320399904e-07, "logits/generated": 2.6853814125061035, "logits/real": 1.4886776208877563, "logps/generated": -786.6143188476562, "logps/real": -462.4486389160156, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -33.95984649658203, "rewards/margins": 27.376718521118164, "rewards/real": -6.583123683929443, "step": 7620 }, { "epoch": 2.45, "learning_rate": 1.0152344679838133e-07, "logits/generated": 2.4071197509765625, "logits/real": 0.939761757850647, "logps/generated": -770.4609985351562, "logps/real": -441.9866638183594, "loss": 0.0055, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.65291976928711, "rewards/margins": 26.58403968811035, "rewards/real": -6.068882465362549, "step": 7630 }, { "epoch": 2.46, "learning_rate": 1.0092835039276362e-07, "logits/generated": 2.574300765991211, "logits/real": 0.7687474489212036, "logps/generated": -806.5418090820312, "logps/real": -474.4375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/generated": -34.255653381347656, "rewards/margins": 26.4575252532959, "rewards/real": -7.798126220703125, "step": 7640 }, { "epoch": 2.46, "learning_rate": 1.0033325398714592e-07, "logits/generated": 2.711141586303711, "logits/real": 1.2199287414550781, "logps/generated": -731.8900146484375, "logps/real": -481.900146484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -32.70969772338867, "rewards/margins": 24.953229904174805, "rewards/real": -7.756471157073975, "step": 7650 }, { "epoch": 2.46, "learning_rate": 9.973815758152821e-08, "logits/generated": 2.8330657482147217, "logits/real": 1.2487785816192627, "logps/generated": -754.8839111328125, "logps/real": -489.70489501953125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -32.9232063293457, "rewards/margins": 24.870262145996094, "rewards/real": -8.05294418334961, "step": 7660 }, { "epoch": 2.46, "learning_rate": 9.914306117591049e-08, "logits/generated": 2.722625970840454, "logits/real": 1.1355412006378174, "logps/generated": -771.1907348632812, "logps/real": -450.7420959472656, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -34.733238220214844, "rewards/margins": 27.90236473083496, "rewards/real": -6.830872535705566, "step": 7670 }, { "epoch": 2.47, "learning_rate": 9.854796477029279e-08, "logits/generated": 2.9408042430877686, "logits/real": 1.5430357456207275, "logps/generated": -806.4617919921875, "logps/real": -532.6165161132812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -33.39200973510742, "rewards/margins": 24.269031524658203, "rewards/real": -9.122979164123535, "step": 7680 }, { "epoch": 2.47, "learning_rate": 9.795286836467508e-08, "logits/generated": 2.5622048377990723, "logits/real": 0.9354397654533386, "logps/generated": -773.5486450195312, "logps/real": -471.0, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -33.29973602294922, "rewards/margins": 26.009201049804688, "rewards/real": -7.290537357330322, "step": 7690 }, { "epoch": 2.47, "learning_rate": 9.735777195905736e-08, "logits/generated": 2.7198944091796875, "logits/real": 0.9722633361816406, "logps/generated": -821.4305419921875, "logps/real": -446.9388122558594, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -34.81932067871094, "rewards/margins": 28.387466430664062, "rewards/real": -6.4318528175354, "step": 7700 }, { "epoch": 2.48, "learning_rate": 9.676267555343966e-08, "logits/generated": 3.3499999046325684, "logits/real": 1.271303653717041, "logps/generated": -777.5213623046875, "logps/real": -508.3485412597656, "loss": 0.0041, "rewards/accuracies": 0.987500011920929, "rewards/generated": -36.471778869628906, "rewards/margins": 29.451171875, "rewards/real": -7.020611763000488, "step": 7710 }, { "epoch": 2.48, "learning_rate": 9.616757914782195e-08, "logits/generated": 2.5723698139190674, "logits/real": 1.042120099067688, "logps/generated": -770.1019287109375, "logps/real": -485.211181640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -34.77147674560547, "rewards/margins": 27.350971221923828, "rewards/real": -7.420510292053223, "step": 7720 }, { "epoch": 2.48, "learning_rate": 9.557248274220423e-08, "logits/generated": 2.573293685913086, "logits/real": 0.9942825436592102, "logps/generated": -740.1798706054688, "logps/real": -455.57470703125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/generated": -32.222145080566406, "rewards/margins": 25.125164031982422, "rewards/real": -7.096977233886719, "step": 7730 }, { "epoch": 2.49, "learning_rate": 9.497738633658653e-08, "logits/generated": 2.686053514480591, "logits/real": 1.076339602470398, "logps/generated": -744.6796264648438, "logps/real": -467.9151916503906, "loss": 0.0025, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.11707305908203, "rewards/margins": 25.706613540649414, "rewards/real": -8.410463333129883, "step": 7740 }, { "epoch": 2.49, "learning_rate": 9.438228993096881e-08, "logits/generated": 2.4728541374206543, "logits/real": 0.679612934589386, "logps/generated": -766.8170166015625, "logps/real": -448.61175537109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -36.72022247314453, "rewards/margins": 27.64395523071289, "rewards/real": -9.076268196105957, "step": 7750 }, { "epoch": 2.49, "learning_rate": 9.378719352535109e-08, "logits/generated": 2.973196506500244, "logits/real": 1.431605339050293, "logps/generated": -707.9198608398438, "logps/real": -423.39898681640625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -33.29743576049805, "rewards/margins": 26.96817970275879, "rewards/real": -6.329256534576416, "step": 7760 }, { "epoch": 2.5, "learning_rate": 9.319209711973339e-08, "logits/generated": 2.7053258419036865, "logits/real": 1.531813383102417, "logps/generated": -748.2283935546875, "logps/real": -460.315673828125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -34.45850372314453, "rewards/margins": 25.537561416625977, "rewards/real": -8.92094612121582, "step": 7770 }, { "epoch": 2.5, "learning_rate": 9.259700071411568e-08, "logits/generated": 2.805387020111084, "logits/real": 1.35537588596344, "logps/generated": -713.0643310546875, "logps/real": -490.5414123535156, "loss": 0.0028, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.46773338317871, "rewards/margins": 23.90458106994629, "rewards/real": -7.5631537437438965, "step": 7780 }, { "epoch": 2.5, "learning_rate": 9.200190430849796e-08, "logits/generated": 2.840932846069336, "logits/real": 1.195715308189392, "logps/generated": -795.6235961914062, "logps/real": -506.05126953125, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.20328140258789, "rewards/margins": 26.821950912475586, "rewards/real": -7.381324768066406, "step": 7790 }, { "epoch": 2.51, "learning_rate": 9.140680790288026e-08, "logits/generated": 2.907820224761963, "logits/real": 1.6150579452514648, "logps/generated": -745.470703125, "logps/real": -437.0804138183594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -33.50928497314453, "rewards/margins": 26.094961166381836, "rewards/real": -7.4143242835998535, "step": 7800 }, { "epoch": 2.51, "learning_rate": 9.081171149726255e-08, "logits/generated": 3.3101794719696045, "logits/real": 1.632311224937439, "logps/generated": -869.7657470703125, "logps/real": -475.5704040527344, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -41.94033432006836, "rewards/margins": 32.2828483581543, "rewards/real": -9.657485961914062, "step": 7810 }, { "epoch": 2.51, "learning_rate": 9.021661509164484e-08, "logits/generated": 3.158996105194092, "logits/real": 1.5095632076263428, "logps/generated": -739.5943603515625, "logps/real": -464.4718322753906, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -35.22946548461914, "rewards/margins": 26.255565643310547, "rewards/real": -8.97390079498291, "step": 7820 }, { "epoch": 2.52, "learning_rate": 8.962151868602713e-08, "logits/generated": 3.3813796043395996, "logits/real": 1.3575268983840942, "logps/generated": -796.2796020507812, "logps/real": -481.00006103515625, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/generated": -38.84486770629883, "rewards/margins": 28.80959129333496, "rewards/real": -10.035270690917969, "step": 7830 }, { "epoch": 2.52, "learning_rate": 8.902642228040942e-08, "logits/generated": 3.571286678314209, "logits/real": 1.8144538402557373, "logps/generated": -824.0863037109375, "logps/real": -463.3797302246094, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -38.79276657104492, "rewards/margins": 30.1883487701416, "rewards/real": -8.60442066192627, "step": 7840 }, { "epoch": 2.52, "learning_rate": 8.84313258747917e-08, "logits/generated": 2.69801664352417, "logits/real": 1.0358164310455322, "logps/generated": -742.7152099609375, "logps/real": -499.96136474609375, "loss": 0.0156, "rewards/accuracies": 0.987500011920929, "rewards/generated": -36.644935607910156, "rewards/margins": 25.655263900756836, "rewards/real": -10.989669799804688, "step": 7850 }, { "epoch": 2.53, "learning_rate": 8.7836229469174e-08, "logits/generated": 3.279534101486206, "logits/real": 1.8801437616348267, "logps/generated": -822.6712646484375, "logps/real": -494.4563903808594, "loss": 0.0092, "rewards/accuracies": 0.987500011920929, "rewards/generated": -40.3455924987793, "rewards/margins": 27.10475730895996, "rewards/real": -13.240839004516602, "step": 7860 }, { "epoch": 2.53, "learning_rate": 8.72411330635563e-08, "logits/generated": 2.800844669342041, "logits/real": 1.6319366693496704, "logps/generated": -854.8231201171875, "logps/real": -488.85693359375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -40.46689987182617, "rewards/margins": 29.3613224029541, "rewards/real": -11.105579376220703, "step": 7870 }, { "epoch": 2.53, "learning_rate": 8.664603665793858e-08, "logits/generated": 3.2954354286193848, "logits/real": 1.3626015186309814, "logps/generated": -773.179443359375, "logps/real": -498.94940185546875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -37.719215393066406, "rewards/margins": 27.16721534729004, "rewards/real": -10.55199909210205, "step": 7880 }, { "epoch": 2.54, "learning_rate": 8.605094025232087e-08, "logits/generated": 3.678366184234619, "logits/real": 1.669468879699707, "logps/generated": -884.4777221679688, "logps/real": -481.454833984375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -45.586647033691406, "rewards/margins": 34.745201110839844, "rewards/real": -10.84144401550293, "step": 7890 }, { "epoch": 2.54, "learning_rate": 8.545584384670317e-08, "logits/generated": 3.132028102874756, "logits/real": 1.244564175605774, "logps/generated": -833.7867431640625, "logps/real": -454.77716064453125, "loss": 0.0034, "rewards/accuracies": 0.987500011920929, "rewards/generated": -38.836936950683594, "rewards/margins": 28.8145809173584, "rewards/real": -10.022356033325195, "step": 7900 }, { "epoch": 2.54, "learning_rate": 8.486074744108546e-08, "logits/generated": 2.863927125930786, "logits/real": 1.270990014076233, "logps/generated": -722.0516357421875, "logps/real": -486.9088439941406, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/generated": -34.17937469482422, "rewards/margins": 23.50700569152832, "rewards/real": -10.672369003295898, "step": 7910 }, { "epoch": 2.54, "learning_rate": 8.426565103546774e-08, "logits/generated": 2.986619710922241, "logits/real": 1.4829037189483643, "logps/generated": -794.3270263671875, "logps/real": -445.35955810546875, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -35.842628479003906, "rewards/margins": 27.46895980834961, "rewards/real": -8.373666763305664, "step": 7920 }, { "epoch": 2.55, "learning_rate": 8.367055462985004e-08, "logits/generated": 2.7366650104522705, "logits/real": 1.4087549448013306, "logps/generated": -748.2372436523438, "logps/real": -446.433349609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -34.81609344482422, "rewards/margins": 26.67582130432129, "rewards/real": -8.140268325805664, "step": 7930 }, { "epoch": 2.55, "learning_rate": 8.307545822423233e-08, "logits/generated": 2.628760814666748, "logits/real": 1.601776361465454, "logps/generated": -758.2694091796875, "logps/real": -482.5057678222656, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -35.32893753051758, "rewards/margins": 28.153697967529297, "rewards/real": -7.175234794616699, "step": 7940 }, { "epoch": 2.55, "learning_rate": 8.24803618186146e-08, "logits/generated": 2.9728894233703613, "logits/real": 1.109973669052124, "logps/generated": -784.9164428710938, "logps/real": -515.6529541015625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -36.20766830444336, "rewards/margins": 27.598922729492188, "rewards/real": -8.608742713928223, "step": 7950 }, { "epoch": 2.56, "learning_rate": 8.18852654129969e-08, "logits/generated": 3.2292587757110596, "logits/real": 1.5186768770217896, "logps/generated": -828.9783325195312, "logps/real": -463.95526123046875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -41.503849029541016, "rewards/margins": 32.671268463134766, "rewards/real": -8.832586288452148, "step": 7960 }, { "epoch": 2.56, "learning_rate": 8.129016900737919e-08, "logits/generated": 3.0103938579559326, "logits/real": 1.1483628749847412, "logps/generated": -790.0716552734375, "logps/real": -434.24188232421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -35.52741241455078, "rewards/margins": 28.292184829711914, "rewards/real": -7.235227108001709, "step": 7970 }, { "epoch": 2.56, "learning_rate": 8.069507260176147e-08, "logits/generated": 2.683225154876709, "logits/real": 1.3247687816619873, "logps/generated": -793.4164428710938, "logps/real": -465.06121826171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -34.82353973388672, "rewards/margins": 26.00320053100586, "rewards/real": -8.820337295532227, "step": 7980 }, { "epoch": 2.57, "learning_rate": 8.009997619614377e-08, "logits/generated": 2.8887317180633545, "logits/real": 1.2829910516738892, "logps/generated": -679.059814453125, "logps/real": -491.88848876953125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -32.95555877685547, "rewards/margins": 24.55912208557129, "rewards/real": -8.39643669128418, "step": 7990 }, { "epoch": 2.57, "learning_rate": 7.950487979052606e-08, "logits/generated": 2.8687214851379395, "logits/real": 1.3230711221694946, "logps/generated": -823.169921875, "logps/real": -491.85015869140625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -38.57782745361328, "rewards/margins": 28.096878051757812, "rewards/real": -10.480950355529785, "step": 8000 }, { "epoch": 2.57, "learning_rate": 7.890978338490834e-08, "logits/generated": 2.8275399208068848, "logits/real": 0.776604950428009, "logps/generated": -785.6185302734375, "logps/real": -513.6004638671875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -37.354896545410156, "rewards/margins": 27.946659088134766, "rewards/real": -9.408243179321289, "step": 8010 }, { "epoch": 2.58, "learning_rate": 7.831468697929064e-08, "logits/generated": 2.8337345123291016, "logits/real": 1.223612666130066, "logps/generated": -837.4182739257812, "logps/real": -504.1847229003906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -37.143089294433594, "rewards/margins": 27.328475952148438, "rewards/real": -9.814611434936523, "step": 8020 }, { "epoch": 2.58, "learning_rate": 7.771959057367293e-08, "logits/generated": 2.460624933242798, "logits/real": 1.3921589851379395, "logps/generated": -756.406494140625, "logps/real": -484.98748779296875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -32.813175201416016, "rewards/margins": 23.948450088500977, "rewards/real": -8.864723205566406, "step": 8030 }, { "epoch": 2.58, "learning_rate": 7.712449416805522e-08, "logits/generated": 3.1973955631256104, "logits/real": 1.3502247333526611, "logps/generated": -738.6199951171875, "logps/real": -487.993896484375, "loss": 0.0481, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.096397399902344, "rewards/margins": 26.09737205505371, "rewards/real": -7.9990234375, "step": 8040 }, { "epoch": 2.59, "learning_rate": 7.652939776243751e-08, "logits/generated": 2.9743385314941406, "logits/real": 1.3750207424163818, "logps/generated": -747.4863891601562, "logps/real": -441.22760009765625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -35.50279998779297, "rewards/margins": 27.592899322509766, "rewards/real": -7.909903526306152, "step": 8050 }, { "epoch": 2.59, "learning_rate": 7.59343013568198e-08, "logits/generated": 2.5244336128234863, "logits/real": 0.8889492154121399, "logps/generated": -765.7584228515625, "logps/real": -433.25445556640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -37.794254302978516, "rewards/margins": 29.81020164489746, "rewards/real": -7.984050750732422, "step": 8060 }, { "epoch": 2.59, "learning_rate": 7.533920495120209e-08, "logits/generated": 3.0301074981689453, "logits/real": 1.533179521560669, "logps/generated": -841.4713134765625, "logps/real": -475.43536376953125, "loss": 0.0205, "rewards/accuracies": 0.987500011920929, "rewards/generated": -37.69113540649414, "rewards/margins": 30.192031860351562, "rewards/real": -7.499100685119629, "step": 8070 }, { "epoch": 2.6, "learning_rate": 7.474410854558438e-08, "logits/generated": 2.862591505050659, "logits/real": 1.3269087076187134, "logps/generated": -745.9798583984375, "logps/real": -499.33087158203125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -33.42017364501953, "rewards/margins": 25.073740005493164, "rewards/real": -8.346429824829102, "step": 8080 }, { "epoch": 2.6, "learning_rate": 7.414901213996668e-08, "logits/generated": 2.7778656482696533, "logits/real": 1.1807500123977661, "logps/generated": -797.7686767578125, "logps/real": -395.6166076660156, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -35.33749008178711, "rewards/margins": 29.420669555664062, "rewards/real": -5.916820049285889, "step": 8090 }, { "epoch": 2.6, "learning_rate": 7.355391573434896e-08, "logits/generated": 3.154921054840088, "logits/real": 1.4642541408538818, "logps/generated": -762.2824096679688, "logps/real": -510.2544860839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.94062042236328, "rewards/margins": 25.70054054260254, "rewards/real": -8.240079879760742, "step": 8100 }, { "epoch": 2.61, "learning_rate": 7.295881932873125e-08, "logits/generated": 3.115079641342163, "logits/real": 1.1844195127487183, "logps/generated": -755.1008911132812, "logps/real": -492.8846740722656, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -35.0328369140625, "rewards/margins": 26.659786224365234, "rewards/real": -8.373051643371582, "step": 8110 }, { "epoch": 2.61, "learning_rate": 7.236372292311355e-08, "logits/generated": 3.520921230316162, "logits/real": 1.756852388381958, "logps/generated": -851.3341674804688, "logps/real": -566.7991943359375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/generated": -38.737728118896484, "rewards/margins": 28.312875747680664, "rewards/real": -10.424853324890137, "step": 8120 }, { "epoch": 2.61, "learning_rate": 7.176862651749583e-08, "logits/generated": 3.0365023612976074, "logits/real": 1.7437479496002197, "logps/generated": -781.711181640625, "logps/real": -519.8856201171875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -35.33845138549805, "rewards/margins": 27.072681427001953, "rewards/real": -8.265774726867676, "step": 8130 }, { "epoch": 2.62, "learning_rate": 7.117353011187813e-08, "logits/generated": 2.9369990825653076, "logits/real": 1.4561811685562134, "logps/generated": -811.1724853515625, "logps/real": -500.3245544433594, "loss": 0.009, "rewards/accuracies": 0.987500011920929, "rewards/generated": -36.455406188964844, "rewards/margins": 27.38521385192871, "rewards/real": -9.070195198059082, "step": 8140 }, { "epoch": 2.62, "learning_rate": 7.057843370626042e-08, "logits/generated": 3.3141605854034424, "logits/real": 1.4747812747955322, "logps/generated": -775.8446044921875, "logps/real": -443.55621337890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -37.97770309448242, "rewards/margins": 28.798559188842773, "rewards/real": -9.179143905639648, "step": 8150 }, { "epoch": 2.62, "learning_rate": 6.998333730064269e-08, "logits/generated": 3.369584560394287, "logits/real": 1.5479835271835327, "logps/generated": -835.9019775390625, "logps/real": -480.01434326171875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/generated": -41.738380432128906, "rewards/margins": 32.067771911621094, "rewards/real": -9.670604705810547, "step": 8160 }, { "epoch": 2.63, "learning_rate": 6.938824089502498e-08, "logits/generated": 2.6076505184173584, "logits/real": 1.6065130233764648, "logps/generated": -839.8328247070312, "logps/real": -521.17236328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -37.27448272705078, "rewards/margins": 28.13425064086914, "rewards/real": -9.140233993530273, "step": 8170 }, { "epoch": 2.63, "learning_rate": 6.879314448940728e-08, "logits/generated": 3.2445945739746094, "logits/real": 1.2038366794586182, "logps/generated": -845.16015625, "logps/real": -582.2398071289062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -36.631813049316406, "rewards/margins": 28.374114990234375, "rewards/real": -8.257695198059082, "step": 8180 }, { "epoch": 2.63, "learning_rate": 6.819804808378957e-08, "logits/generated": 3.287216901779175, "logits/real": 1.4357283115386963, "logps/generated": -771.4024658203125, "logps/real": -449.7586975097656, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/generated": -38.21382522583008, "rewards/margins": 28.36443519592285, "rewards/real": -9.849393844604492, "step": 8190 }, { "epoch": 2.63, "learning_rate": 6.760295167817185e-08, "logits/generated": 2.858645439147949, "logits/real": 1.5483039617538452, "logps/generated": -816.647216796875, "logps/real": -433.73480224609375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -39.512489318847656, "rewards/margins": 31.068744659423828, "rewards/real": -8.443746566772461, "step": 8200 }, { "epoch": 2.64, "learning_rate": 6.700785527255415e-08, "logits/generated": 3.4710192680358887, "logits/real": 1.5234482288360596, "logps/generated": -845.6710815429688, "logps/real": -517.4374389648438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -35.07843780517578, "rewards/margins": 28.01369857788086, "rewards/real": -7.064740180969238, "step": 8210 }, { "epoch": 2.64, "learning_rate": 6.641275886693644e-08, "logits/generated": 3.5218703746795654, "logits/real": 1.7099206447601318, "logps/generated": -809.7120361328125, "logps/real": -452.8575134277344, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -37.52550506591797, "rewards/margins": 30.40155029296875, "rewards/real": -7.123953342437744, "step": 8220 }, { "epoch": 2.64, "learning_rate": 6.581766246131873e-08, "logits/generated": 3.3273627758026123, "logits/real": 1.667799949645996, "logps/generated": -773.13427734375, "logps/real": -469.0680236816406, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.69101333618164, "rewards/margins": 27.456430435180664, "rewards/real": -7.234580039978027, "step": 8230 }, { "epoch": 2.65, "learning_rate": 6.522256605570102e-08, "logits/generated": 2.6748316287994385, "logits/real": 1.6668426990509033, "logps/generated": -825.6130981445312, "logps/real": -461.3782653808594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -35.9446907043457, "rewards/margins": 29.2167911529541, "rewards/real": -6.727897644042969, "step": 8240 }, { "epoch": 2.65, "learning_rate": 6.462746965008332e-08, "logits/generated": 3.467258930206299, "logits/real": 1.7721755504608154, "logps/generated": -742.2659301757812, "logps/real": -550.3944091796875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/generated": -33.94966506958008, "rewards/margins": 24.820079803466797, "rewards/real": -9.129584312438965, "step": 8250 }, { "epoch": 2.65, "learning_rate": 6.40323732444656e-08, "logits/generated": 3.181549072265625, "logits/real": 1.8261711597442627, "logps/generated": -809.9362182617188, "logps/real": -492.84832763671875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -36.916534423828125, "rewards/margins": 28.238445281982422, "rewards/real": -8.67808723449707, "step": 8260 }, { "epoch": 2.66, "learning_rate": 6.343727683884789e-08, "logits/generated": 3.495197296142578, "logits/real": 1.8549379110336304, "logps/generated": -857.6259765625, "logps/real": -552.5338745117188, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/generated": -37.931182861328125, "rewards/margins": 28.336917877197266, "rewards/real": -9.594263076782227, "step": 8270 }, { "epoch": 2.66, "learning_rate": 6.284218043323019e-08, "logits/generated": 3.0114383697509766, "logits/real": 1.6358234882354736, "logps/generated": -724.8143310546875, "logps/real": -415.6348571777344, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/generated": -34.47003936767578, "rewards/margins": 26.23971176147461, "rewards/real": -8.230328559875488, "step": 8280 }, { "epoch": 2.66, "learning_rate": 6.224708402761247e-08, "logits/generated": 3.530071258544922, "logits/real": 1.5216658115386963, "logps/generated": -744.9732666015625, "logps/real": -465.30560302734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -36.94565963745117, "rewards/margins": 28.743549346923828, "rewards/real": -8.202113151550293, "step": 8290 }, { "epoch": 2.67, "learning_rate": 6.165198762199476e-08, "logits/generated": 3.0954372882843018, "logits/real": 1.803502082824707, "logps/generated": -756.04248046875, "logps/real": -474.942626953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -37.64512252807617, "rewards/margins": 26.757970809936523, "rewards/real": -10.887151718139648, "step": 8300 }, { "epoch": 2.67, "learning_rate": 6.105689121637705e-08, "logits/generated": 2.9991931915283203, "logits/real": 1.7015835046768188, "logps/generated": -768.0135498046875, "logps/real": -437.2664489746094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -34.67110061645508, "rewards/margins": 25.91499900817871, "rewards/real": -8.756101608276367, "step": 8310 }, { "epoch": 2.67, "learning_rate": 6.046179481075934e-08, "logits/generated": 2.842740535736084, "logits/real": 1.2913899421691895, "logps/generated": -784.5416259765625, "logps/real": -524.0296630859375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -35.684783935546875, "rewards/margins": 28.220544815063477, "rewards/real": -7.46423864364624, "step": 8320 }, { "epoch": 2.68, "learning_rate": 5.986669840514163e-08, "logits/generated": 3.418170213699341, "logits/real": 1.7757072448730469, "logps/generated": -850.01220703125, "logps/real": -536.5762939453125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -39.732444763183594, "rewards/margins": 31.325265884399414, "rewards/real": -8.407184600830078, "step": 8330 }, { "epoch": 2.68, "learning_rate": 5.9271601999523916e-08, "logits/generated": 3.3157310485839844, "logits/real": 1.6027755737304688, "logps/generated": -856.6058349609375, "logps/real": -500.6687927246094, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/generated": -35.26923370361328, "rewards/margins": 26.92138671875, "rewards/real": -8.34785270690918, "step": 8340 }, { "epoch": 2.68, "learning_rate": 5.867650559390621e-08, "logits/generated": 2.910555601119995, "logits/real": 1.9252341985702515, "logps/generated": -799.5537109375, "logps/real": -491.54888916015625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/generated": -35.60499954223633, "rewards/margins": 27.46946144104004, "rewards/real": -8.135533332824707, "step": 8350 }, { "epoch": 2.69, "learning_rate": 5.80814091882885e-08, "logits/generated": 3.3811542987823486, "logits/real": 2.0283687114715576, "logps/generated": -860.8333740234375, "logps/real": -475.64453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -37.37459182739258, "rewards/margins": 29.230798721313477, "rewards/real": -8.143793106079102, "step": 8360 }, { "epoch": 2.69, "learning_rate": 5.7486312782670794e-08, "logits/generated": 3.6468443870544434, "logits/real": 1.8509632349014282, "logps/generated": -771.2623291015625, "logps/real": -515.796630859375, "loss": 0.0036, "rewards/accuracies": 0.987500011920929, "rewards/generated": -35.350616455078125, "rewards/margins": 26.09872055053711, "rewards/real": -9.25189208984375, "step": 8370 }, { "epoch": 2.69, "learning_rate": 5.689121637705308e-08, "logits/generated": 3.3427340984344482, "logits/real": 1.4290753602981567, "logps/generated": -777.0437622070312, "logps/real": -449.41754150390625, "loss": 0.007, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -37.69095230102539, "rewards/margins": 30.076812744140625, "rewards/real": -7.614136695861816, "step": 8380 }, { "epoch": 2.7, "learning_rate": 5.629611997143537e-08, "logits/generated": 2.934074640274048, "logits/real": 1.1947616338729858, "logps/generated": -851.9031982421875, "logps/real": -509.12408447265625, "loss": 0.0027, "rewards/accuracies": 0.987500011920929, "rewards/generated": -37.149330139160156, "rewards/margins": 29.3508358001709, "rewards/real": -7.798496246337891, "step": 8390 }, { "epoch": 2.7, "learning_rate": 5.5701023565817666e-08, "logits/generated": 3.3033649921417236, "logits/real": 1.4339520931243896, "logps/generated": -720.3985595703125, "logps/real": -542.14208984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -33.08808135986328, "rewards/margins": 25.795751571655273, "rewards/real": -7.292330741882324, "step": 8400 }, { "epoch": 2.7, "learning_rate": 5.510592716019995e-08, "logits/generated": 3.3134028911590576, "logits/real": 1.730181097984314, "logps/generated": -836.9600830078125, "logps/real": -473.23565673828125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -39.22040939331055, "rewards/margins": 30.765247344970703, "rewards/real": -8.455163955688477, "step": 8410 }, { "epoch": 2.71, "learning_rate": 5.4510830754582236e-08, "logits/generated": 3.0139784812927246, "logits/real": 2.073922634124756, "logps/generated": -850.1917114257812, "logps/real": -477.65716552734375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -39.42927169799805, "rewards/margins": 30.965606689453125, "rewards/real": -8.463666915893555, "step": 8420 }, { "epoch": 2.71, "learning_rate": 5.391573434896453e-08, "logits/generated": 2.8533833026885986, "logits/real": 1.3191601037979126, "logps/generated": -775.8548583984375, "logps/real": -420.8113708496094, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/generated": -36.59467697143555, "rewards/margins": 29.030315399169922, "rewards/real": -7.564364433288574, "step": 8430 }, { "epoch": 2.71, "learning_rate": 5.332063794334682e-08, "logits/generated": 3.010335683822632, "logits/real": 1.7422542572021484, "logps/generated": -669.0731201171875, "logps/real": -463.169677734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -30.44244384765625, "rewards/margins": 22.686447143554688, "rewards/real": -7.755995273590088, "step": 8440 }, { "epoch": 2.72, "learning_rate": 5.272554153772911e-08, "logits/generated": 2.8566172122955322, "logits/real": 1.8051217794418335, "logps/generated": -776.7787475585938, "logps/real": -451.3484802246094, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -34.335365295410156, "rewards/margins": 27.841394424438477, "rewards/real": -6.493971824645996, "step": 8450 }, { "epoch": 2.72, "learning_rate": 5.21304451321114e-08, "logits/generated": 3.4016449451446533, "logits/real": 1.8644094467163086, "logps/generated": -826.0098876953125, "logps/real": -435.3868713378906, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -39.29865264892578, "rewards/margins": 31.840560913085938, "rewards/real": -7.458085536956787, "step": 8460 }, { "epoch": 2.72, "learning_rate": 5.153534872649369e-08, "logits/generated": 2.8162379264831543, "logits/real": 1.6108335256576538, "logps/generated": -782.1649780273438, "logps/real": -422.7880859375, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/generated": -34.98106384277344, "rewards/margins": 28.367328643798828, "rewards/real": -6.613736629486084, "step": 8470 }, { "epoch": 2.72, "learning_rate": 5.0940252320875985e-08, "logits/generated": 3.0136919021606445, "logits/real": 2.0329723358154297, "logps/generated": -747.817626953125, "logps/real": -466.11737060546875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -34.00456619262695, "rewards/margins": 26.25246810913086, "rewards/real": -7.7520952224731445, "step": 8480 }, { "epoch": 2.73, "learning_rate": 5.034515591525827e-08, "logits/generated": 2.8304662704467773, "logits/real": 1.4094116687774658, "logps/generated": -838.5126953125, "logps/real": -541.8682250976562, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -36.088321685791016, "rewards/margins": 28.613460540771484, "rewards/real": -7.47485876083374, "step": 8490 }, { "epoch": 2.73, "learning_rate": 4.975005950964056e-08, "logits/generated": 2.775794267654419, "logits/real": 1.4020425081253052, "logps/generated": -722.1731567382812, "logps/real": -411.4930114746094, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -29.182281494140625, "rewards/margins": 23.229717254638672, "rewards/real": -5.952565670013428, "step": 8500 }, { "epoch": 2.73, "learning_rate": 4.915496310402285e-08, "logits/generated": 3.2242050170898438, "logits/real": 1.5009018182754517, "logps/generated": -716.9075927734375, "logps/real": -526.5830688476562, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -32.840641021728516, "rewards/margins": 26.066463470458984, "rewards/real": -6.774176120758057, "step": 8510 }, { "epoch": 2.74, "learning_rate": 4.855986669840514e-08, "logits/generated": 3.1212315559387207, "logits/real": 1.399795651435852, "logps/generated": -740.31103515625, "logps/real": -471.4595642089844, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -33.6530876159668, "rewards/margins": 27.133167266845703, "rewards/real": -6.51992130279541, "step": 8520 }, { "epoch": 2.74, "learning_rate": 4.7964770292787426e-08, "logits/generated": 2.833284378051758, "logits/real": 1.5777113437652588, "logps/generated": -695.9685668945312, "logps/real": -443.299560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -31.9746150970459, "rewards/margins": 24.4139347076416, "rewards/real": -7.5606818199157715, "step": 8530 }, { "epoch": 2.74, "learning_rate": 4.736967388716972e-08, "logits/generated": 3.155226945877075, "logits/real": 1.556386947631836, "logps/generated": -766.650146484375, "logps/real": -498.51385498046875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -32.98406982421875, "rewards/margins": 25.18619155883789, "rewards/real": -7.797879695892334, "step": 8540 }, { "epoch": 2.75, "learning_rate": 4.677457748155201e-08, "logits/generated": 2.9887216091156006, "logits/real": 1.5266433954238892, "logps/generated": -771.2750854492188, "logps/real": -496.5469665527344, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -33.36461639404297, "rewards/margins": 26.154592514038086, "rewards/real": -7.21002197265625, "step": 8550 }, { "epoch": 2.75, "learning_rate": 4.61794810759343e-08, "logits/generated": 3.427128553390503, "logits/real": 1.7409652471542358, "logps/generated": -761.2540283203125, "logps/real": -474.03973388671875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -36.65859603881836, "rewards/margins": 28.606678009033203, "rewards/real": -8.051918983459473, "step": 8560 }, { "epoch": 2.75, "learning_rate": 4.558438467031659e-08, "logits/generated": 2.801579475402832, "logits/real": 1.5079745054244995, "logps/generated": -758.4537353515625, "logps/real": -419.08251953125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -33.15636444091797, "rewards/margins": 25.69656753540039, "rewards/real": -7.459795951843262, "step": 8570 }, { "epoch": 2.76, "learning_rate": 4.498928826469888e-08, "logits/generated": 2.9341182708740234, "logits/real": 1.5971897840499878, "logps/generated": -832.4122314453125, "logps/real": -469.4901428222656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -37.149688720703125, "rewards/margins": 29.325109481811523, "rewards/real": -7.824580192565918, "step": 8580 }, { "epoch": 2.76, "learning_rate": 4.439419185908117e-08, "logits/generated": 3.185535430908203, "logits/real": 1.5048960447311401, "logps/generated": -839.0285034179688, "logps/real": -437.461669921875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -38.7950553894043, "rewards/margins": 30.796091079711914, "rewards/real": -7.998966217041016, "step": 8590 }, { "epoch": 2.76, "learning_rate": 4.3799095453463464e-08, "logits/generated": 2.9187042713165283, "logits/real": 1.3976789712905884, "logps/generated": -770.5217895507812, "logps/real": -521.6911010742188, "loss": 0.0164, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.184410095214844, "rewards/margins": 25.12429428100586, "rewards/real": -9.0601167678833, "step": 8600 }, { "epoch": 2.77, "learning_rate": 4.3203999047845745e-08, "logits/generated": 2.874825954437256, "logits/real": 1.4568679332733154, "logps/generated": -694.4080200195312, "logps/real": -419.46307373046875, "loss": 0.017, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.17966079711914, "rewards/margins": 24.234317779541016, "rewards/real": -5.945339202880859, "step": 8610 }, { "epoch": 2.77, "learning_rate": 4.2608902642228033e-08, "logits/generated": 3.062195062637329, "logits/real": 1.627201795578003, "logps/generated": -723.2996826171875, "logps/real": -421.48321533203125, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/generated": -30.21268653869629, "rewards/margins": 24.724964141845703, "rewards/real": -5.487717628479004, "step": 8620 }, { "epoch": 2.77, "learning_rate": 4.201380623661033e-08, "logits/generated": 3.1232762336730957, "logits/real": 1.7867786884307861, "logps/generated": -755.4934692382812, "logps/real": -460.92095947265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -35.111637115478516, "rewards/margins": 27.390594482421875, "rewards/real": -7.721039772033691, "step": 8630 }, { "epoch": 2.78, "learning_rate": 4.1418709830992617e-08, "logits/generated": 2.9343647956848145, "logits/real": 1.4838205575942993, "logps/generated": -733.4630126953125, "logps/real": -510.0469665527344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.948387145996094, "rewards/margins": 27.314489364624023, "rewards/real": -6.633896827697754, "step": 8640 }, { "epoch": 2.78, "learning_rate": 4.082361342537491e-08, "logits/generated": 3.2548491954803467, "logits/real": 1.8221263885498047, "logps/generated": -796.93212890625, "logps/real": -461.876708984375, "loss": 0.003, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.01301193237305, "rewards/margins": 26.876983642578125, "rewards/real": -7.136033058166504, "step": 8650 }, { "epoch": 2.78, "learning_rate": 4.02285170197572e-08, "logits/generated": 2.8353352546691895, "logits/real": 1.6875264644622803, "logps/generated": -767.2127075195312, "logps/real": -474.08172607421875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -32.641632080078125, "rewards/margins": 25.748126983642578, "rewards/real": -6.893509864807129, "step": 8660 }, { "epoch": 2.79, "learning_rate": 3.963342061413949e-08, "logits/generated": 3.1572728157043457, "logits/real": 1.7358052730560303, "logps/generated": -738.89404296875, "logps/real": -439.635986328125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -34.04695510864258, "rewards/margins": 27.265859603881836, "rewards/real": -6.781087398529053, "step": 8670 }, { "epoch": 2.79, "learning_rate": 3.903832420852178e-08, "logits/generated": 3.241525173187256, "logits/real": 1.713012933731079, "logps/generated": -744.816162109375, "logps/real": -457.0414123535156, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/generated": -35.350643157958984, "rewards/margins": 28.8222713470459, "rewards/real": -6.528374671936035, "step": 8680 }, { "epoch": 2.79, "learning_rate": 3.844322780290407e-08, "logits/generated": 3.4162325859069824, "logits/real": 1.879372239112854, "logps/generated": -787.7777099609375, "logps/real": -498.8902282714844, "loss": 0.011, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.91460418701172, "rewards/margins": 28.239084243774414, "rewards/real": -6.6755218505859375, "step": 8690 }, { "epoch": 2.8, "learning_rate": 3.784813139728636e-08, "logits/generated": 2.93852162361145, "logits/real": 1.3213813304901123, "logps/generated": -758.5169677734375, "logps/real": -471.8833923339844, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -32.7196044921875, "rewards/margins": 25.495986938476562, "rewards/real": -7.223616600036621, "step": 8700 }, { "epoch": 2.8, "learning_rate": 3.725303499166865e-08, "logits/generated": 3.753695249557495, "logits/real": 2.1591434478759766, "logps/generated": -860.6848754882812, "logps/real": -524.9163818359375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -34.73515701293945, "rewards/margins": 27.504486083984375, "rewards/real": -7.230668544769287, "step": 8710 }, { "epoch": 2.8, "learning_rate": 3.6657938586050936e-08, "logits/generated": 3.5195088386535645, "logits/real": 2.248831272125244, "logps/generated": -806.697265625, "logps/real": -477.43133544921875, "loss": 0.0068, "rewards/accuracies": 0.987500011920929, "rewards/generated": -37.1906852722168, "rewards/margins": 29.06699562072754, "rewards/real": -8.123689651489258, "step": 8720 }, { "epoch": 2.81, "learning_rate": 3.6062842180433224e-08, "logits/generated": 3.1179943084716797, "logits/real": 1.3181183338165283, "logps/generated": -826.83740234375, "logps/real": -462.10516357421875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -37.52702713012695, "rewards/margins": 30.666515350341797, "rewards/real": -6.860505104064941, "step": 8730 }, { "epoch": 2.81, "learning_rate": 3.546774577481552e-08, "logits/generated": 3.140714168548584, "logits/real": 1.6323251724243164, "logps/generated": -724.0711669921875, "logps/real": -481.90057373046875, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/generated": -31.701507568359375, "rewards/margins": 25.245559692382812, "rewards/real": -6.455949306488037, "step": 8740 }, { "epoch": 2.81, "learning_rate": 3.487264936919781e-08, "logits/generated": 3.2538998126983643, "logits/real": 1.565359354019165, "logps/generated": -751.4068603515625, "logps/real": -475.2777404785156, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -32.36248779296875, "rewards/margins": 26.812061309814453, "rewards/real": -5.5504255294799805, "step": 8750 }, { "epoch": 2.81, "learning_rate": 3.42775529635801e-08, "logits/generated": 3.1185154914855957, "logits/real": 1.7924352884292603, "logps/generated": -651.2061157226562, "logps/real": -475.150390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -30.828842163085938, "rewards/margins": 24.05868911743164, "rewards/real": -6.770150661468506, "step": 8760 }, { "epoch": 2.82, "learning_rate": 3.368245655796239e-08, "logits/generated": 2.6842496395111084, "logits/real": 1.5537285804748535, "logps/generated": -787.1558227539062, "logps/real": -519.8851928710938, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -32.14249038696289, "rewards/margins": 25.10601043701172, "rewards/real": -7.036477565765381, "step": 8770 }, { "epoch": 2.82, "learning_rate": 3.308736015234468e-08, "logits/generated": 2.9321939945220947, "logits/real": 1.6629358530044556, "logps/generated": -765.5720825195312, "logps/real": -493.0559997558594, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -33.3111686706543, "rewards/margins": 27.14790916442871, "rewards/real": -6.163259506225586, "step": 8780 }, { "epoch": 2.82, "learning_rate": 3.249226374672697e-08, "logits/generated": 3.006134510040283, "logits/real": 1.6650793552398682, "logps/generated": -777.2510986328125, "logps/real": -471.74029541015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -34.07080078125, "rewards/margins": 26.57828712463379, "rewards/real": -7.492513179779053, "step": 8790 }, { "epoch": 2.83, "learning_rate": 3.189716734110926e-08, "logits/generated": 3.406435012817383, "logits/real": 1.6540796756744385, "logps/generated": -746.054443359375, "logps/real": -509.64935302734375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -34.21219253540039, "rewards/margins": 27.074874877929688, "rewards/real": -7.137310981750488, "step": 8800 }, { "epoch": 2.83, "learning_rate": 3.130207093549154e-08, "logits/generated": 3.2752296924591064, "logits/real": 1.4010677337646484, "logps/generated": -743.275146484375, "logps/real": -509.25531005859375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -31.902740478515625, "rewards/margins": 25.384103775024414, "rewards/real": -6.518636226654053, "step": 8810 }, { "epoch": 2.83, "learning_rate": 3.070697452987384e-08, "logits/generated": 3.275930881500244, "logits/real": 2.2676784992218018, "logps/generated": -797.0228271484375, "logps/real": -482.3719787597656, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -35.256900787353516, "rewards/margins": 27.465679168701172, "rewards/real": -7.791220188140869, "step": 8820 }, { "epoch": 2.84, "learning_rate": 3.0111878124256126e-08, "logits/generated": 2.977724313735962, "logits/real": 1.951297402381897, "logps/generated": -758.8145751953125, "logps/real": -495.4877014160156, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -35.03417205810547, "rewards/margins": 27.565837860107422, "rewards/real": -7.468335151672363, "step": 8830 }, { "epoch": 2.84, "learning_rate": 2.9516781718638418e-08, "logits/generated": 3.3798155784606934, "logits/real": 2.1956369876861572, "logps/generated": -807.9532470703125, "logps/real": -504.31683349609375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -33.95764923095703, "rewards/margins": 26.328283309936523, "rewards/real": -7.629366874694824, "step": 8840 }, { "epoch": 2.84, "learning_rate": 2.892168531302071e-08, "logits/generated": 3.2768847942352295, "logits/real": 1.4380134344100952, "logps/generated": -800.1155395507812, "logps/real": -480.4146423339844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -34.52793884277344, "rewards/margins": 26.30367088317871, "rewards/real": -8.224270820617676, "step": 8850 }, { "epoch": 2.85, "learning_rate": 2.8326588907402998e-08, "logits/generated": 3.342456817626953, "logits/real": 2.0722851753234863, "logps/generated": -781.5902099609375, "logps/real": -441.1924743652344, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -35.27935028076172, "rewards/margins": 27.86493492126465, "rewards/real": -7.414412498474121, "step": 8860 }, { "epoch": 2.85, "learning_rate": 2.773149250178529e-08, "logits/generated": 3.2281785011291504, "logits/real": 1.5924721956253052, "logps/generated": -812.0906982421875, "logps/real": -499.855712890625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -36.3347282409668, "rewards/margins": 28.233911514282227, "rewards/real": -8.100814819335938, "step": 8870 }, { "epoch": 2.85, "learning_rate": 2.7136396096167577e-08, "logits/generated": 3.0839850902557373, "logits/real": 1.4561680555343628, "logps/generated": -750.0205078125, "logps/real": -506.64276123046875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -32.61287307739258, "rewards/margins": 25.880529403686523, "rewards/real": -6.732338905334473, "step": 8880 }, { "epoch": 2.86, "learning_rate": 2.6541299690549866e-08, "logits/generated": 2.987210512161255, "logits/real": 1.7344825267791748, "logps/generated": -843.7034301757812, "logps/real": -489.5560607910156, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -36.15435028076172, "rewards/margins": 29.273778915405273, "rewards/real": -6.880563259124756, "step": 8890 }, { "epoch": 2.86, "learning_rate": 2.5946203284932157e-08, "logits/generated": 3.1084742546081543, "logits/real": 1.6733993291854858, "logps/generated": -819.2145385742188, "logps/real": -534.1326904296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.72334671020508, "rewards/margins": 25.07648277282715, "rewards/real": -8.646860122680664, "step": 8900 }, { "epoch": 2.86, "learning_rate": 2.535110687931445e-08, "logits/generated": 3.4447078704833984, "logits/real": 1.8348305225372314, "logps/generated": -813.2745361328125, "logps/real": -404.28857421875, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/generated": -39.80561065673828, "rewards/margins": 32.86267852783203, "rewards/real": -6.942930698394775, "step": 8910 }, { "epoch": 2.87, "learning_rate": 2.475601047369674e-08, "logits/generated": 3.2527499198913574, "logits/real": 1.6698894500732422, "logps/generated": -765.736083984375, "logps/real": -483.4952087402344, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -34.594703674316406, "rewards/margins": 27.78500747680664, "rewards/real": -6.809699058532715, "step": 8920 }, { "epoch": 2.87, "learning_rate": 2.4160914068079025e-08, "logits/generated": 3.111821413040161, "logits/real": 1.9169723987579346, "logps/generated": -710.6486206054688, "logps/real": -430.81964111328125, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/generated": -30.046289443969727, "rewards/margins": 22.980323791503906, "rewards/real": -7.0659661293029785, "step": 8930 }, { "epoch": 2.87, "learning_rate": 2.3565817662461317e-08, "logits/generated": 3.1548495292663574, "logits/real": 1.519892692565918, "logps/generated": -789.2969970703125, "logps/real": -457.87762451171875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -35.45643997192383, "rewards/margins": 29.173004150390625, "rewards/real": -6.2834296226501465, "step": 8940 }, { "epoch": 2.88, "learning_rate": 2.2970721256843608e-08, "logits/generated": 3.596977949142456, "logits/real": 1.8957351446151733, "logps/generated": -760.9915771484375, "logps/real": -421.7425231933594, "loss": 0.0049, "rewards/accuracies": 0.987500011920929, "rewards/generated": -36.46320343017578, "rewards/margins": 28.33466148376465, "rewards/real": -8.128539085388184, "step": 8950 }, { "epoch": 2.88, "learning_rate": 2.2375624851225897e-08, "logits/generated": 3.299694776535034, "logits/real": 2.2599711418151855, "logps/generated": -749.4815673828125, "logps/real": -440.0604553222656, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -34.306434631347656, "rewards/margins": 27.075420379638672, "rewards/real": -7.231014251708984, "step": 8960 }, { "epoch": 2.88, "learning_rate": 2.1780528445608188e-08, "logits/generated": 3.3625245094299316, "logits/real": 2.385824203491211, "logps/generated": -724.9124755859375, "logps/real": -445.8505859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -35.021018981933594, "rewards/margins": 28.207509994506836, "rewards/real": -6.813511848449707, "step": 8970 }, { "epoch": 2.89, "learning_rate": 2.1185432039990476e-08, "logits/generated": 3.059018611907959, "logits/real": 2.0408132076263428, "logps/generated": -749.118896484375, "logps/real": -467.23834228515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -33.99578857421875, "rewards/margins": 26.810449600219727, "rewards/real": -7.185333251953125, "step": 8980 }, { "epoch": 2.89, "learning_rate": 2.0590335634372768e-08, "logits/generated": 3.29240345954895, "logits/real": 1.3560516834259033, "logps/generated": -796.9227294921875, "logps/real": -521.9937133789062, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -35.48700714111328, "rewards/margins": 28.393346786499023, "rewards/real": -7.093659400939941, "step": 8990 }, { "epoch": 2.89, "learning_rate": 1.9995239228755056e-08, "logits/generated": 3.0828750133514404, "logits/real": 1.592389464378357, "logps/generated": -871.2789306640625, "logps/real": -485.0287170410156, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -37.55382537841797, "rewards/margins": 31.4636173248291, "rewards/real": -6.090206623077393, "step": 9000 }, { "epoch": 2.9, "learning_rate": 1.9400142823137348e-08, "logits/generated": 3.092836618423462, "logits/real": 1.4812750816345215, "logps/generated": -732.5904541015625, "logps/real": -447.2054748535156, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.7863655090332, "rewards/margins": 26.842517852783203, "rewards/real": -5.943843841552734, "step": 9010 }, { "epoch": 2.9, "learning_rate": 1.880504641751964e-08, "logits/generated": 3.1290504932403564, "logits/real": 1.667345643043518, "logps/generated": -792.3594360351562, "logps/real": -477.4339294433594, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -35.30573272705078, "rewards/margins": 29.34500503540039, "rewards/real": -5.960729122161865, "step": 9020 }, { "epoch": 2.9, "learning_rate": 1.8209950011901924e-08, "logits/generated": 3.0685980319976807, "logits/real": 1.679626226425171, "logps/generated": -721.7459716796875, "logps/real": -476.15435791015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -32.63353729248047, "rewards/margins": 25.051095962524414, "rewards/real": -7.582440376281738, "step": 9030 }, { "epoch": 2.9, "learning_rate": 1.7614853606284216e-08, "logits/generated": 3.479518175125122, "logits/real": 2.4186058044433594, "logps/generated": -767.41064453125, "logps/real": -474.83837890625, "loss": 0.0027, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.49204635620117, "rewards/margins": 24.543514251708984, "rewards/real": -7.948531150817871, "step": 9040 }, { "epoch": 2.91, "learning_rate": 1.7019757200666507e-08, "logits/generated": 3.3706657886505127, "logits/real": 2.1165781021118164, "logps/generated": -699.5797729492188, "logps/real": -469.268798828125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/generated": -31.1688289642334, "rewards/margins": 24.58893394470215, "rewards/real": -6.579898834228516, "step": 9050 }, { "epoch": 2.91, "learning_rate": 1.64246607950488e-08, "logits/generated": 2.897587299346924, "logits/real": 1.3072104454040527, "logps/generated": -763.4205322265625, "logps/real": -478.0918884277344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -33.09733200073242, "rewards/margins": 26.24774742126465, "rewards/real": -6.84958553314209, "step": 9060 }, { "epoch": 2.91, "learning_rate": 1.5829564389431087e-08, "logits/generated": 3.2408931255340576, "logits/real": 2.0958895683288574, "logps/generated": -712.6605224609375, "logps/real": -439.13409423828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -32.0467529296875, "rewards/margins": 25.190519332885742, "rewards/real": -6.8562331199646, "step": 9070 }, { "epoch": 2.92, "learning_rate": 1.523446798381338e-08, "logits/generated": 3.4491748809814453, "logits/real": 2.1663897037506104, "logps/generated": -721.4193115234375, "logps/real": -451.4937438964844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -33.52249526977539, "rewards/margins": 28.1429386138916, "rewards/real": -5.379555702209473, "step": 9080 }, { "epoch": 2.92, "learning_rate": 1.4639371578195669e-08, "logits/generated": 3.2985618114471436, "logits/real": 1.7052987813949585, "logps/generated": -722.9124755859375, "logps/real": -478.326904296875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -32.31082534790039, "rewards/margins": 24.70047378540039, "rewards/real": -7.610352993011475, "step": 9090 }, { "epoch": 2.92, "learning_rate": 1.4044275172577957e-08, "logits/generated": 3.1738462448120117, "logits/real": 1.2917898893356323, "logps/generated": -796.5064086914062, "logps/real": -458.9476623535156, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -33.63817596435547, "rewards/margins": 27.587594985961914, "rewards/real": -6.0505828857421875, "step": 9100 }, { "epoch": 2.93, "learning_rate": 1.3449178766960247e-08, "logits/generated": 3.156320810317993, "logits/real": 1.5794252157211304, "logps/generated": -748.75390625, "logps/real": -479.9158630371094, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -34.39598083496094, "rewards/margins": 27.459997177124023, "rewards/real": -6.9359846115112305, "step": 9110 }, { "epoch": 2.93, "learning_rate": 1.2854082361342537e-08, "logits/generated": 3.1458640098571777, "logits/real": 1.9806264638900757, "logps/generated": -799.3624877929688, "logps/real": -460.40283203125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -34.492103576660156, "rewards/margins": 27.267162322998047, "rewards/real": -7.224940299987793, "step": 9120 }, { "epoch": 2.93, "learning_rate": 1.2258985955724826e-08, "logits/generated": 3.339601516723633, "logits/real": 1.7530397176742554, "logps/generated": -799.3890991210938, "logps/real": -482.59814453125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -33.62677764892578, "rewards/margins": 27.268543243408203, "rewards/real": -6.358229160308838, "step": 9130 }, { "epoch": 2.94, "learning_rate": 1.1663889550107118e-08, "logits/generated": 3.344064235687256, "logits/real": 1.8734636306762695, "logps/generated": -757.6580810546875, "logps/real": -481.42913818359375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -31.565128326416016, "rewards/margins": 24.76689338684082, "rewards/real": -6.798234462738037, "step": 9140 }, { "epoch": 2.94, "learning_rate": 1.1068793144489406e-08, "logits/generated": 3.0886898040771484, "logits/real": 1.683241844177246, "logps/generated": -736.2691040039062, "logps/real": -478.54779052734375, "loss": 0.0157, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.55189514160156, "rewards/margins": 25.6215763092041, "rewards/real": -6.930319309234619, "step": 9150 }, { "epoch": 2.94, "learning_rate": 1.0473696738871698e-08, "logits/generated": 2.993119239807129, "logits/real": 1.7231972217559814, "logps/generated": -783.2693481445312, "logps/real": -507.5567932128906, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -32.22801971435547, "rewards/margins": 24.554100036621094, "rewards/real": -7.67392110824585, "step": 9160 }, { "epoch": 2.95, "learning_rate": 9.878600333253986e-09, "logits/generated": 3.2624917030334473, "logits/real": 1.433098554611206, "logps/generated": -708.4134521484375, "logps/real": -491.4871520996094, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -32.31158447265625, "rewards/margins": 25.722497940063477, "rewards/real": -6.589092254638672, "step": 9170 }, { "epoch": 2.95, "learning_rate": 9.283503927636276e-09, "logits/generated": 2.972395658493042, "logits/real": 1.8567657470703125, "logps/generated": -780.3577270507812, "logps/real": -411.82080078125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -34.754310607910156, "rewards/margins": 28.901599884033203, "rewards/real": -5.8527092933654785, "step": 9180 }, { "epoch": 2.95, "learning_rate": 8.688407522018568e-09, "logits/generated": 3.5766608715057373, "logits/real": 1.3456761837005615, "logps/generated": -858.0286865234375, "logps/real": -476.2017517089844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -35.649436950683594, "rewards/margins": 29.215795516967773, "rewards/real": -6.4336442947387695, "step": 9190 }, { "epoch": 2.96, "learning_rate": 8.093311116400856e-09, "logits/generated": 3.2225594520568848, "logits/real": 1.6229501962661743, "logps/generated": -713.380859375, "logps/real": -473.11187744140625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -30.473922729492188, "rewards/margins": 25.37922477722168, "rewards/real": -5.094698905944824, "step": 9200 }, { "epoch": 2.96, "learning_rate": 7.498214710783147e-09, "logits/generated": 3.045548915863037, "logits/real": 1.6095815896987915, "logps/generated": -749.2374267578125, "logps/real": -431.52825927734375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -33.536590576171875, "rewards/margins": 26.678333282470703, "rewards/real": -6.858257293701172, "step": 9210 }, { "epoch": 2.96, "learning_rate": 6.903118305165436e-09, "logits/generated": 2.7229132652282715, "logits/real": 1.9056316614151, "logps/generated": -745.2661743164062, "logps/real": -405.09320068359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -33.12969970703125, "rewards/margins": 26.677814483642578, "rewards/real": -6.451888084411621, "step": 9220 }, { "epoch": 2.97, "learning_rate": 6.308021899547726e-09, "logits/generated": 3.376845598220825, "logits/real": 1.8195558786392212, "logps/generated": -743.9133911132812, "logps/real": -468.9219665527344, "loss": 0.0128, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.888755798339844, "rewards/margins": 25.621810913085938, "rewards/real": -7.266948699951172, "step": 9230 }, { "epoch": 2.97, "learning_rate": 5.712925493930016e-09, "logits/generated": 3.007474660873413, "logits/real": 1.6739345788955688, "logps/generated": -654.8544921875, "logps/real": -407.5782470703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -31.767675399780273, "rewards/margins": 26.138391494750977, "rewards/real": -5.629283905029297, "step": 9240 }, { "epoch": 2.97, "learning_rate": 5.117829088312306e-09, "logits/generated": 3.1612629890441895, "logits/real": 1.6607601642608643, "logps/generated": -817.826416015625, "logps/real": -500.9883728027344, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -35.301570892333984, "rewards/margins": 28.810741424560547, "rewards/real": -6.4908294677734375, "step": 9250 }, { "epoch": 2.98, "learning_rate": 4.522732682694597e-09, "logits/generated": 3.1394991874694824, "logits/real": 1.5999667644500732, "logps/generated": -758.4761962890625, "logps/real": -390.61944580078125, "loss": 0.0047, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.750396728515625, "rewards/margins": 26.97458267211914, "rewards/real": -6.77581262588501, "step": 9260 }, { "epoch": 2.98, "learning_rate": 3.927636277076887e-09, "logits/generated": 2.840440511703491, "logits/real": 1.7077268362045288, "logps/generated": -771.6409301757812, "logps/real": -455.101318359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -31.247241973876953, "rewards/margins": 26.89655113220215, "rewards/real": -4.350689888000488, "step": 9270 }, { "epoch": 2.98, "learning_rate": 3.332539871459176e-09, "logits/generated": 3.233851909637451, "logits/real": 2.1648850440979004, "logps/generated": -801.1500244140625, "logps/real": -437.95098876953125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -34.10742950439453, "rewards/margins": 27.890575408935547, "rewards/real": -6.216853141784668, "step": 9280 }, { "epoch": 2.99, "learning_rate": 2.7374434658414665e-09, "logits/generated": 3.1344990730285645, "logits/real": 1.3887934684753418, "logps/generated": -729.60888671875, "logps/real": -445.4117736816406, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -32.27558135986328, "rewards/margins": 27.172298431396484, "rewards/real": -5.103287696838379, "step": 9290 }, { "epoch": 2.99, "learning_rate": 2.1423470602237564e-09, "logits/generated": 2.8525900840759277, "logits/real": 1.922720193862915, "logps/generated": -792.5548095703125, "logps/real": -449.06170654296875, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/generated": -33.03357696533203, "rewards/margins": 26.860116958618164, "rewards/real": -6.173454761505127, "step": 9300 }, { "epoch": 2.99, "learning_rate": 1.5472506546060463e-09, "logits/generated": 3.235713243484497, "logits/real": 1.6957728862762451, "logps/generated": -849.6369018554688, "logps/real": -447.80889892578125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -38.40895080566406, "rewards/margins": 32.40843963623047, "rewards/real": -6.000512599945068, "step": 9310 }, { "epoch": 2.99, "learning_rate": 9.521542489883362e-10, "logits/generated": 2.7895750999450684, "logits/real": 1.6889011859893799, "logps/generated": -790.5099487304688, "logps/real": -420.15704345703125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -35.6865234375, "rewards/margins": 29.061664581298828, "rewards/real": -6.624857425689697, "step": 9320 }, { "epoch": 3.0, "learning_rate": 3.57057843370626e-10, "logits/generated": 3.134608030319214, "logits/real": 2.191204786300659, "logps/generated": -721.1075439453125, "logps/real": -427.5851135253906, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.275808334350586, "rewards/margins": 24.25775146484375, "rewards/real": -7.018056392669678, "step": 9330 }, { "epoch": 3.0, "step": 9336, "total_flos": 0.0, "train_loss": 0.05879934279130232, "train_runtime": 78001.7882, "train_samples_per_second": 3.83, "train_steps_per_second": 0.12 } ], "logging_steps": 10, "max_steps": 9336, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }