{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 3126, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.597444089456869e-09, "logits/generated": 6.076260089874268, "logits/real": 4.217202663421631, "logps/generated": -793.698486328125, "logps/real": -221.5892333984375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.597444089456869e-08, "logits/generated": 5.937064170837402, "logits/real": 4.38163423538208, "logps/generated": -943.8297729492188, "logps/real": -251.95458984375, "loss": 0.7034, "rewards/accuracies": 0.1111111119389534, "rewards/generated": 0.008679242804646492, "rewards/margins": -0.012087766081094742, "rewards/real": -0.0034085246734321117, "step": 10 }, { "epoch": 0.01, "learning_rate": 3.194888178913738e-08, "logits/generated": 6.016368389129639, "logits/real": 4.054781913757324, "logps/generated": -866.6522216796875, "logps/real": -244.7873992919922, "loss": 0.6959, "rewards/accuracies": 0.5249999761581421, "rewards/generated": -0.04229893162846565, "rewards/margins": 0.02972055971622467, "rewards/real": -0.01257836353033781, "step": 20 }, { "epoch": 0.02, "learning_rate": 4.7923322683706064e-08, "logits/generated": 6.003698348999023, "logits/real": 4.410984992980957, "logps/generated": -772.19970703125, "logps/real": -265.46051025390625, "loss": 0.6868, "rewards/accuracies": 0.5249999761581421, "rewards/generated": -0.002037657890468836, "rewards/margins": 0.010990817099809647, "rewards/real": 0.008953156881034374, "step": 30 }, { "epoch": 0.03, "learning_rate": 6.389776357827476e-08, "logits/generated": 6.05269718170166, "logits/real": 4.136534214019775, "logps/generated": -916.8504028320312, "logps/real": -240.4838409423828, "loss": 0.7385, "rewards/accuracies": 0.550000011920929, "rewards/generated": 0.05573784187436104, "rewards/margins": -0.06023125723004341, "rewards/real": -0.004493414890021086, "step": 40 }, { "epoch": 0.03, "learning_rate": 7.987220447284344e-08, "logits/generated": 6.0863237380981445, "logits/real": 4.26947546005249, "logps/generated": -910.7779541015625, "logps/real": -224.7820281982422, "loss": 0.7306, "rewards/accuracies": 0.4375, "rewards/generated": 0.05095089226961136, "rewards/margins": -0.05160406976938248, "rewards/real": -0.0006531793624162674, "step": 50 }, { "epoch": 0.04, "learning_rate": 9.584664536741213e-08, "logits/generated": 5.816224098205566, "logits/real": 4.575616359710693, "logps/generated": -828.2184448242188, "logps/real": -243.4587860107422, "loss": 0.7114, "rewards/accuracies": 0.4625000059604645, "rewards/generated": 0.006193811539560556, "rewards/margins": -0.02020972967147827, "rewards/real": -0.014015915803611279, "step": 60 }, { "epoch": 0.04, "learning_rate": 1.1182108626198082e-07, "logits/generated": 5.910815238952637, "logits/real": 4.322142601013184, "logps/generated": -864.3304443359375, "logps/real": -267.9166564941406, "loss": 0.7368, "rewards/accuracies": 0.42500001192092896, "rewards/generated": 0.10107575356960297, "rewards/margins": -0.11164508759975433, "rewards/real": -0.010569351725280285, "step": 70 }, { "epoch": 0.05, "learning_rate": 1.2779552715654952e-07, "logits/generated": 5.944818496704102, "logits/real": 4.048387050628662, "logps/generated": -961.0286865234375, "logps/real": -237.78018188476562, "loss": 0.678, "rewards/accuracies": 0.637499988079071, "rewards/generated": -0.04871482402086258, "rewards/margins": 0.04645959287881851, "rewards/real": -0.002255239523947239, "step": 80 }, { "epoch": 0.06, "learning_rate": 1.437699680511182e-07, "logits/generated": 6.106154441833496, "logits/real": 4.130688667297363, "logps/generated": -899.5615234375, "logps/real": -257.23492431640625, "loss": 0.663, "rewards/accuracies": 0.6000000238418579, "rewards/generated": -0.09308083355426788, "rewards/margins": 0.07399795949459076, "rewards/real": -0.01908286102116108, "step": 90 }, { "epoch": 0.06, "learning_rate": 1.5974440894568688e-07, "logits/generated": 5.771765232086182, "logits/real": 4.03811502456665, "logps/generated": -820.4385986328125, "logps/real": -240.3477325439453, "loss": 0.721, "rewards/accuracies": 0.42500001192092896, "rewards/generated": 0.0457366518676281, "rewards/margins": -0.05348087102174759, "rewards/real": -0.007744210306555033, "step": 100 }, { "epoch": 0.07, "learning_rate": 1.757188498402556e-07, "logits/generated": 5.8602399826049805, "logits/real": 4.352300643920898, "logps/generated": -949.75537109375, "logps/real": -256.1276550292969, "loss": 0.6804, "rewards/accuracies": 0.6625000238418579, "rewards/generated": -0.08218346536159515, "rewards/margins": 0.0958067774772644, "rewards/real": 0.013623319566249847, "step": 110 }, { "epoch": 0.08, "learning_rate": 1.9169329073482426e-07, "logits/generated": 5.985620021820068, "logits/real": 4.446410179138184, "logps/generated": -827.1920776367188, "logps/real": -240.00082397460938, "loss": 0.6858, "rewards/accuracies": 0.5249999761581421, "rewards/generated": -0.024061355739831924, "rewards/margins": 0.02317228354513645, "rewards/real": -0.0008890745230019093, "step": 120 }, { "epoch": 0.08, "learning_rate": 2.0766773162939297e-07, "logits/generated": 5.976418972015381, "logits/real": 4.385097980499268, "logps/generated": -903.66357421875, "logps/real": -253.0922393798828, "loss": 0.6812, "rewards/accuracies": 0.574999988079071, "rewards/generated": -0.03790752962231636, "rewards/margins": 0.04170641303062439, "rewards/real": 0.0037988885305821896, "step": 130 }, { "epoch": 0.09, "learning_rate": 2.2364217252396164e-07, "logits/generated": 5.8814568519592285, "logits/real": 4.022303581237793, "logps/generated": -872.1270751953125, "logps/real": -239.0474090576172, "loss": 0.6462, "rewards/accuracies": 0.6000000238418579, "rewards/generated": -0.08364593237638474, "rewards/margins": 0.07660557329654694, "rewards/real": -0.007040367461740971, "step": 140 }, { "epoch": 0.1, "learning_rate": 2.3961661341853033e-07, "logits/generated": 5.902901649475098, "logits/real": 4.500279426574707, "logps/generated": -909.7518310546875, "logps/real": -252.1283416748047, "loss": 0.6449, "rewards/accuracies": 0.550000011920929, "rewards/generated": -0.1788884699344635, "rewards/margins": 0.1675419807434082, "rewards/real": -0.011346508748829365, "step": 150 }, { "epoch": 0.1, "learning_rate": 2.5559105431309904e-07, "logits/generated": 6.0863142013549805, "logits/real": 4.278581142425537, "logps/generated": -862.6253662109375, "logps/real": -249.1087188720703, "loss": 0.6591, "rewards/accuracies": 0.5874999761581421, "rewards/generated": -0.06084873527288437, "rewards/margins": 0.06349506974220276, "rewards/real": 0.0026463475078344345, "step": 160 }, { "epoch": 0.11, "learning_rate": 2.715654952076677e-07, "logits/generated": 5.8610758781433105, "logits/real": 4.366555213928223, "logps/generated": -893.8683471679688, "logps/real": -238.64443969726562, "loss": 0.6079, "rewards/accuracies": 0.762499988079071, "rewards/generated": -0.22141461074352264, "rewards/margins": 0.23539376258850098, "rewards/real": 0.01397914718836546, "step": 170 }, { "epoch": 0.12, "learning_rate": 2.875399361022364e-07, "logits/generated": 5.8870625495910645, "logits/real": 4.547116279602051, "logps/generated": -906.3629150390625, "logps/real": -263.76654052734375, "loss": 0.5942, "rewards/accuracies": 0.6625000238418579, "rewards/generated": -0.2859794497489929, "rewards/margins": 0.2683030664920807, "rewards/real": -0.017676372081041336, "step": 180 }, { "epoch": 0.12, "learning_rate": 3.035143769968051e-07, "logits/generated": 6.080388069152832, "logits/real": 4.27817440032959, "logps/generated": -907.4010009765625, "logps/real": -263.9908447265625, "loss": 0.5832, "rewards/accuracies": 0.675000011920929, "rewards/generated": -0.28283512592315674, "rewards/margins": 0.27541953325271606, "rewards/real": -0.007415570318698883, "step": 190 }, { "epoch": 0.13, "learning_rate": 3.1948881789137375e-07, "logits/generated": 6.112253665924072, "logits/real": 4.260523796081543, "logps/generated": -938.8836059570312, "logps/real": -248.11587524414062, "loss": 0.5842, "rewards/accuracies": 0.6625000238418579, "rewards/generated": -0.30967745184898376, "rewards/margins": 0.29746749997138977, "rewards/real": -0.012209964916110039, "step": 200 }, { "epoch": 0.13, "learning_rate": 3.354632587859425e-07, "logits/generated": 5.790924549102783, "logits/real": 4.103597640991211, "logps/generated": -907.6522216796875, "logps/real": -227.3259735107422, "loss": 0.6164, "rewards/accuracies": 0.6499999761581421, "rewards/generated": -0.22061340510845184, "rewards/margins": 0.21634364128112793, "rewards/real": -0.004269786179065704, "step": 210 }, { "epoch": 0.14, "learning_rate": 3.514376996805112e-07, "logits/generated": 5.998345375061035, "logits/real": 4.214221000671387, "logps/generated": -942.66552734375, "logps/real": -222.0115509033203, "loss": 0.5932, "rewards/accuracies": 0.75, "rewards/generated": -0.32513970136642456, "rewards/margins": 0.32551324367523193, "rewards/real": 0.0003735637292265892, "step": 220 }, { "epoch": 0.15, "learning_rate": 3.6741214057507985e-07, "logits/generated": 6.107602596282959, "logits/real": 4.030685901641846, "logps/generated": -899.4200439453125, "logps/real": -238.90316772460938, "loss": 0.5673, "rewards/accuracies": 0.7250000238418579, "rewards/generated": -0.3097040355205536, "rewards/margins": 0.31323733925819397, "rewards/real": 0.003533291397616267, "step": 230 }, { "epoch": 0.15, "learning_rate": 3.833865814696485e-07, "logits/generated": 6.146908283233643, "logits/real": 4.572686672210693, "logps/generated": -849.7062377929688, "logps/real": -287.7304382324219, "loss": 0.6028, "rewards/accuracies": 0.612500011920929, "rewards/generated": -0.2624419331550598, "rewards/margins": 0.24513304233551025, "rewards/real": -0.017308901995420456, "step": 240 }, { "epoch": 0.16, "learning_rate": 3.993610223642173e-07, "logits/generated": 6.0791335105896, "logits/real": 4.294358730316162, "logps/generated": -928.7376708984375, "logps/real": -228.5765380859375, "loss": 0.5843, "rewards/accuracies": 0.7749999761581421, "rewards/generated": -0.3841578960418701, "rewards/margins": 0.3720285892486572, "rewards/real": -0.012129291892051697, "step": 250 }, { "epoch": 0.17, "learning_rate": 4.1533546325878595e-07, "logits/generated": 6.011288642883301, "logits/real": 3.944628953933716, "logps/generated": -920.6962890625, "logps/real": -224.2725372314453, "loss": 0.5626, "rewards/accuracies": 0.7250000238418579, "rewards/generated": -0.4267396926879883, "rewards/margins": 0.42054229974746704, "rewards/real": -0.006197371985763311, "step": 260 }, { "epoch": 0.17, "learning_rate": 4.313099041533546e-07, "logits/generated": 5.999421119689941, "logits/real": 4.240687370300293, "logps/generated": -839.8876953125, "logps/real": -239.6907196044922, "loss": 0.5742, "rewards/accuracies": 0.7124999761581421, "rewards/generated": -0.29800790548324585, "rewards/margins": 0.30169859528541565, "rewards/real": 0.003690724028274417, "step": 270 }, { "epoch": 0.18, "learning_rate": 4.472843450479233e-07, "logits/generated": 6.157493591308594, "logits/real": 4.233771324157715, "logps/generated": -919.6163330078125, "logps/real": -256.02667236328125, "loss": 0.5615, "rewards/accuracies": 0.7749999761581421, "rewards/generated": -0.436979204416275, "rewards/margins": 0.4427516460418701, "rewards/real": 0.00577241787686944, "step": 280 }, { "epoch": 0.19, "learning_rate": 4.63258785942492e-07, "logits/generated": 5.8831915855407715, "logits/real": 4.111518859863281, "logps/generated": -915.33642578125, "logps/real": -250.9635772705078, "loss": 0.5469, "rewards/accuracies": 0.762499988079071, "rewards/generated": -0.44483208656311035, "rewards/margins": 0.45819053053855896, "rewards/real": 0.013358525931835175, "step": 290 }, { "epoch": 0.19, "learning_rate": 4.792332268370607e-07, "logits/generated": 6.069736480712891, "logits/real": 4.062201499938965, "logps/generated": -1062.180419921875, "logps/real": -239.5394744873047, "loss": 0.5112, "rewards/accuracies": 0.8500000238418579, "rewards/generated": -0.7640186548233032, "rewards/margins": 0.7979092597961426, "rewards/real": 0.03389066457748413, "step": 300 }, { "epoch": 0.2, "learning_rate": 4.952076677316294e-07, "logits/generated": 6.100470542907715, "logits/real": 4.4795241355896, "logps/generated": -883.9137573242188, "logps/real": -260.9756164550781, "loss": 0.5219, "rewards/accuracies": 0.6625000238418579, "rewards/generated": -0.466789186000824, "rewards/margins": 0.47098153829574585, "rewards/real": 0.0041923513635993, "step": 310 }, { "epoch": 0.2, "learning_rate": 4.987557767507998e-07, "logits/generated": 5.904444694519043, "logits/real": 4.497801303863525, "logps/generated": -920.7421875, "logps/real": -258.6832580566406, "loss": 0.5076, "rewards/accuracies": 0.875, "rewards/generated": -0.6512015461921692, "rewards/margins": 0.6806803941726685, "rewards/real": 0.02947883866727352, "step": 320 }, { "epoch": 0.21, "learning_rate": 4.969783149662282e-07, "logits/generated": 5.837555885314941, "logits/real": 4.39646053314209, "logps/generated": -887.0213623046875, "logps/real": -259.81976318359375, "loss": 0.4883, "rewards/accuracies": 0.8500000238418579, "rewards/generated": -0.6606628894805908, "rewards/margins": 0.6972694396972656, "rewards/real": 0.036606594920158386, "step": 330 }, { "epoch": 0.22, "learning_rate": 4.952008531816565e-07, "logits/generated": 6.078923225402832, "logits/real": 4.288995265960693, "logps/generated": -755.9134521484375, "logps/real": -254.28958129882812, "loss": 0.5167, "rewards/accuracies": 0.7875000238418579, "rewards/generated": -0.4952046275138855, "rewards/margins": 0.5218986868858337, "rewards/real": 0.026694035157561302, "step": 340 }, { "epoch": 0.22, "learning_rate": 4.93423391397085e-07, "logits/generated": 6.069632530212402, "logits/real": 4.2718353271484375, "logps/generated": -885.2103271484375, "logps/real": -246.41549682617188, "loss": 0.494, "rewards/accuracies": 0.75, "rewards/generated": -0.8642350435256958, "rewards/margins": 0.8856006860733032, "rewards/real": 0.021365612745285034, "step": 350 }, { "epoch": 0.23, "learning_rate": 4.916459296125133e-07, "logits/generated": 6.0406084060668945, "logits/real": 4.213619232177734, "logps/generated": -862.3121337890625, "logps/real": -237.30038452148438, "loss": 0.4344, "rewards/accuracies": 0.875, "rewards/generated": -0.819617748260498, "rewards/margins": 0.8550539016723633, "rewards/real": 0.03543621301651001, "step": 360 }, { "epoch": 0.24, "learning_rate": 4.898684678279417e-07, "logits/generated": 5.9395928382873535, "logits/real": 4.514933109283447, "logps/generated": -784.0012817382812, "logps/real": -274.2304992675781, "loss": 0.482, "rewards/accuracies": 0.875, "rewards/generated": -0.666130006313324, "rewards/margins": 0.7213379144668579, "rewards/real": 0.055207859724760056, "step": 370 }, { "epoch": 0.24, "learning_rate": 4.8809100604337e-07, "logits/generated": 5.9382734298706055, "logits/real": 4.106156826019287, "logps/generated": -868.2293090820312, "logps/real": -257.92828369140625, "loss": 0.4572, "rewards/accuracies": 0.949999988079071, "rewards/generated": -0.924160361289978, "rewards/margins": 0.9683877229690552, "rewards/real": 0.044227343052625656, "step": 380 }, { "epoch": 0.25, "learning_rate": 4.863135442587984e-07, "logits/generated": 6.171679496765137, "logits/real": 4.2907819747924805, "logps/generated": -979.2257080078125, "logps/real": -244.9657745361328, "loss": 0.4354, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -1.2005796432495117, "rewards/margins": 1.255791187286377, "rewards/real": 0.05521152541041374, "step": 390 }, { "epoch": 0.26, "learning_rate": 4.845360824742267e-07, "logits/generated": 6.069159984588623, "logits/real": 4.032870292663574, "logps/generated": -804.0493774414062, "logps/real": -250.74868774414062, "loss": 0.447, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -0.7962594032287598, "rewards/margins": 0.8555679321289062, "rewards/real": 0.05930844694375992, "step": 400 }, { "epoch": 0.26, "learning_rate": 4.827586206896552e-07, "logits/generated": 6.105320453643799, "logits/real": 4.221116542816162, "logps/generated": -797.972900390625, "logps/real": -244.3699188232422, "loss": 0.471, "rewards/accuracies": 0.9375, "rewards/generated": -0.7431430816650391, "rewards/margins": 0.8322073817253113, "rewards/real": 0.08906435966491699, "step": 410 }, { "epoch": 0.27, "learning_rate": 4.809811589050835e-07, "logits/generated": 5.868694305419922, "logits/real": 4.118344306945801, "logps/generated": -944.1519775390625, "logps/real": -250.259765625, "loss": 0.4199, "rewards/accuracies": 0.9375, "rewards/generated": -1.201935887336731, "rewards/margins": 1.2849478721618652, "rewards/real": 0.08301188051700592, "step": 420 }, { "epoch": 0.28, "learning_rate": 4.792036971205119e-07, "logits/generated": 6.04774284362793, "logits/real": 4.184063911437988, "logps/generated": -943.5716552734375, "logps/real": -233.9786834716797, "loss": 0.3929, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.1643246412277222, "rewards/margins": 1.2494871616363525, "rewards/real": 0.08516237884759903, "step": 430 }, { "epoch": 0.28, "learning_rate": 4.774262353359402e-07, "logits/generated": 6.015780925750732, "logits/real": 4.106911659240723, "logps/generated": -873.0906982421875, "logps/real": -245.89151000976562, "loss": 0.4092, "rewards/accuracies": 0.949999988079071, "rewards/generated": -1.0168969631195068, "rewards/margins": 1.096966028213501, "rewards/real": 0.08006921410560608, "step": 440 }, { "epoch": 0.29, "learning_rate": 4.7564877355136863e-07, "logits/generated": 6.1327619552612305, "logits/real": 4.286798000335693, "logps/generated": -913.6229248046875, "logps/real": -235.7820281982422, "loss": 0.3918, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.1738783121109009, "rewards/margins": 1.2836366891860962, "rewards/real": 0.1097583994269371, "step": 450 }, { "epoch": 0.29, "learning_rate": 4.73871311766797e-07, "logits/generated": 5.843601226806641, "logits/real": 4.894453048706055, "logps/generated": -836.0406494140625, "logps/real": -262.8157653808594, "loss": 0.3896, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.122596263885498, "rewards/margins": 1.2202900648117065, "rewards/real": 0.09769367426633835, "step": 460 }, { "epoch": 0.3, "learning_rate": 4.7209384998222536e-07, "logits/generated": 5.926724433898926, "logits/real": 4.339844226837158, "logps/generated": -835.6307373046875, "logps/real": -243.3745880126953, "loss": 0.3792, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.0902034044265747, "rewards/margins": 1.2509477138519287, "rewards/real": 0.16074436902999878, "step": 470 }, { "epoch": 0.31, "learning_rate": 4.7031638819765373e-07, "logits/generated": 6.176774501800537, "logits/real": 4.369190692901611, "logps/generated": -1035.437255859375, "logps/real": -241.65518188476562, "loss": 0.3517, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.6769460439682007, "rewards/margins": 1.8335769176483154, "rewards/real": 0.15663087368011475, "step": 480 }, { "epoch": 0.31, "learning_rate": 4.6853892641308215e-07, "logits/generated": 6.054518699645996, "logits/real": 4.261479377746582, "logps/generated": -991.4581909179688, "logps/real": -227.12417602539062, "loss": 0.3452, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.612874984741211, "rewards/margins": 1.7803363800048828, "rewards/real": 0.1674613654613495, "step": 490 }, { "epoch": 0.32, "learning_rate": 4.6676146462851046e-07, "logits/generated": 5.990649700164795, "logits/real": 4.494472026824951, "logps/generated": -877.47412109375, "logps/real": -245.2666015625, "loss": 0.3664, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.3127868175506592, "rewards/margins": 1.4855382442474365, "rewards/real": 0.1727515310049057, "step": 500 }, { "epoch": 0.33, "learning_rate": 4.649840028439388e-07, "logits/generated": 6.058517932891846, "logits/real": 4.018253803253174, "logps/generated": -849.6085815429688, "logps/real": -239.42333984375, "loss": 0.3424, "rewards/accuracies": 1.0, "rewards/generated": -1.2417773008346558, "rewards/margins": 1.4562361240386963, "rewards/real": 0.21445894241333008, "step": 510 }, { "epoch": 0.33, "learning_rate": 4.632065410593672e-07, "logits/generated": 6.090991020202637, "logits/real": 4.206871509552002, "logps/generated": -858.64306640625, "logps/real": -231.8360595703125, "loss": 0.3043, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.3968095779418945, "rewards/margins": 1.6249290704727173, "rewards/real": 0.22811949253082275, "step": 520 }, { "epoch": 0.34, "learning_rate": 4.6142907927479556e-07, "logits/generated": 5.986910820007324, "logits/real": 4.2822465896606445, "logps/generated": -1048.8765869140625, "logps/real": -231.92068481445312, "loss": 0.2983, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.237921714782715, "rewards/margins": 2.462768793106079, "rewards/real": 0.22484686970710754, "step": 530 }, { "epoch": 0.35, "learning_rate": 4.59651617490224e-07, "logits/generated": 5.905124187469482, "logits/real": 4.177928447723389, "logps/generated": -821.7687377929688, "logps/real": -246.280517578125, "loss": 0.3157, "rewards/accuracies": 1.0, "rewards/generated": -1.3176755905151367, "rewards/margins": 1.5714536905288696, "rewards/real": 0.2537779211997986, "step": 540 }, { "epoch": 0.35, "learning_rate": 4.578741557056523e-07, "logits/generated": 5.976016044616699, "logits/real": 4.194876194000244, "logps/generated": -845.0601806640625, "logps/real": -246.12539672851562, "loss": 0.2847, "rewards/accuracies": 1.0, "rewards/generated": -1.4623727798461914, "rewards/margins": 1.7181476354599, "rewards/real": 0.25577467679977417, "step": 550 }, { "epoch": 0.36, "learning_rate": 4.560966939210807e-07, "logits/generated": 6.045588493347168, "logits/real": 4.3586626052856445, "logps/generated": -811.4501953125, "logps/real": -235.3633270263672, "loss": 0.2894, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.3929394483566284, "rewards/margins": 1.660951018333435, "rewards/real": 0.2680116295814514, "step": 560 }, { "epoch": 0.36, "learning_rate": 4.54319232136509e-07, "logits/generated": 5.941956520080566, "logits/real": 3.9948837757110596, "logps/generated": -939.8425903320312, "logps/real": -252.4247283935547, "loss": 0.2729, "rewards/accuracies": 1.0, "rewards/generated": -1.8964020013809204, "rewards/margins": 2.198000907897949, "rewards/real": 0.3015987277030945, "step": 570 }, { "epoch": 0.37, "learning_rate": 4.525417703519374e-07, "logits/generated": 5.852833271026611, "logits/real": 4.229121208190918, "logps/generated": -947.67333984375, "logps/real": -247.6307830810547, "loss": 0.2615, "rewards/accuracies": 1.0, "rewards/generated": -1.9862486124038696, "rewards/margins": 2.275512933731079, "rewards/real": 0.28926438093185425, "step": 580 }, { "epoch": 0.38, "learning_rate": 4.507643085673658e-07, "logits/generated": 5.97081995010376, "logits/real": 4.184626579284668, "logps/generated": -939.2525634765625, "logps/real": -249.10293579101562, "loss": 0.2501, "rewards/accuracies": 1.0, "rewards/generated": -2.023348331451416, "rewards/margins": 2.3503973484039307, "rewards/real": 0.3270490765571594, "step": 590 }, { "epoch": 0.38, "learning_rate": 4.489868467827941e-07, "logits/generated": 5.946187496185303, "logits/real": 3.98576021194458, "logps/generated": -868.517578125, "logps/real": -234.7804412841797, "loss": 0.2434, "rewards/accuracies": 1.0, "rewards/generated": -1.8050874471664429, "rewards/margins": 2.1618123054504395, "rewards/real": 0.35672444105148315, "step": 600 }, { "epoch": 0.39, "learning_rate": 4.4720938499822254e-07, "logits/generated": 6.001513481140137, "logits/real": 4.200368881225586, "logps/generated": -938.6095581054688, "logps/real": -236.7099609375, "loss": 0.2342, "rewards/accuracies": 1.0, "rewards/generated": -2.136950731277466, "rewards/margins": 2.4946799278259277, "rewards/real": 0.3577292263507843, "step": 610 }, { "epoch": 0.4, "learning_rate": 4.4543192321365085e-07, "logits/generated": 6.019902229309082, "logits/real": 4.102996349334717, "logps/generated": -877.0018310546875, "logps/real": -231.89395141601562, "loss": 0.2346, "rewards/accuracies": 1.0, "rewards/generated": -1.801348328590393, "rewards/margins": 2.227733850479126, "rewards/real": 0.42638540267944336, "step": 620 }, { "epoch": 0.4, "learning_rate": 4.4365446142907927e-07, "logits/generated": 5.965851783752441, "logits/real": 4.574382781982422, "logps/generated": -923.7843627929688, "logps/real": -251.86672973632812, "loss": 0.2313, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.9577804803848267, "rewards/margins": 2.378849744796753, "rewards/real": 0.4210694432258606, "step": 630 }, { "epoch": 0.41, "learning_rate": 4.4187699964450764e-07, "logits/generated": 6.019913196563721, "logits/real": 4.158315181732178, "logps/generated": -858.7822265625, "logps/real": -240.2414093017578, "loss": 0.1937, "rewards/accuracies": 1.0, "rewards/generated": -1.8835636377334595, "rewards/margins": 2.340017795562744, "rewards/real": 0.45645445585250854, "step": 640 }, { "epoch": 0.42, "learning_rate": 4.4009953785993595e-07, "logits/generated": 6.02773380279541, "logits/real": 4.274472713470459, "logps/generated": -930.7633056640625, "logps/real": -252.86410522460938, "loss": 0.2073, "rewards/accuracies": 1.0, "rewards/generated": -2.2265586853027344, "rewards/margins": 2.6945266723632812, "rewards/real": 0.4679679274559021, "step": 650 }, { "epoch": 0.42, "learning_rate": 4.3832207607536437e-07, "logits/generated": 6.097564220428467, "logits/real": 4.119298458099365, "logps/generated": -823.2561645507812, "logps/real": -236.7152557373047, "loss": 0.2025, "rewards/accuracies": 1.0, "rewards/generated": -1.7512767314910889, "rewards/margins": 2.2316291332244873, "rewards/real": 0.48035264015197754, "step": 660 }, { "epoch": 0.43, "learning_rate": 4.365446142907927e-07, "logits/generated": 6.0582170486450195, "logits/real": 4.083959579467773, "logps/generated": -952.2449340820312, "logps/real": -228.7444305419922, "loss": 0.2019, "rewards/accuracies": 1.0, "rewards/generated": -2.267892599105835, "rewards/margins": 2.736295461654663, "rewards/real": 0.4684027135372162, "step": 670 }, { "epoch": 0.44, "learning_rate": 4.347671525062211e-07, "logits/generated": 5.989724159240723, "logits/real": 4.3230791091918945, "logps/generated": -932.2943115234375, "logps/real": -237.28170776367188, "loss": 0.1903, "rewards/accuracies": 1.0, "rewards/generated": -2.2489707469940186, "rewards/margins": 2.7711246013641357, "rewards/real": 0.5221537947654724, "step": 680 }, { "epoch": 0.44, "learning_rate": 4.3298969072164947e-07, "logits/generated": 5.9734578132629395, "logits/real": 4.305315971374512, "logps/generated": -898.1632080078125, "logps/real": -249.6607666015625, "loss": 0.1736, "rewards/accuracies": 1.0, "rewards/generated": -2.172926664352417, "rewards/margins": 2.7069246768951416, "rewards/real": 0.5339978337287903, "step": 690 }, { "epoch": 0.45, "learning_rate": 4.3121222893707783e-07, "logits/generated": 5.79467248916626, "logits/real": 4.179832458496094, "logps/generated": -853.1522216796875, "logps/real": -225.7552490234375, "loss": 0.1769, "rewards/accuracies": 1.0, "rewards/generated": -2.0059478282928467, "rewards/margins": 2.5637969970703125, "rewards/real": 0.5578492879867554, "step": 700 }, { "epoch": 0.45, "learning_rate": 4.294347671525062e-07, "logits/generated": 6.031177997589111, "logits/real": 4.149500846862793, "logps/generated": -914.326171875, "logps/real": -237.4337158203125, "loss": 0.1682, "rewards/accuracies": 1.0, "rewards/generated": -2.2514748573303223, "rewards/margins": 2.817643642425537, "rewards/real": 0.5661691427230835, "step": 710 }, { "epoch": 0.46, "learning_rate": 4.276573053679346e-07, "logits/generated": 6.02950382232666, "logits/real": 4.436091423034668, "logps/generated": -924.0319213867188, "logps/real": -244.99362182617188, "loss": 0.1649, "rewards/accuracies": 1.0, "rewards/generated": -2.3590235710144043, "rewards/margins": 2.9966607093811035, "rewards/real": 0.6376368403434753, "step": 720 }, { "epoch": 0.47, "learning_rate": 4.2587984358336293e-07, "logits/generated": 6.004773139953613, "logits/real": 4.440590858459473, "logps/generated": -880.44970703125, "logps/real": -252.4867401123047, "loss": 0.148, "rewards/accuracies": 1.0, "rewards/generated": -2.2073614597320557, "rewards/margins": 2.841465473175049, "rewards/real": 0.6341038942337036, "step": 730 }, { "epoch": 0.47, "learning_rate": 4.241023817987913e-07, "logits/generated": 6.170002460479736, "logits/real": 4.281262397766113, "logps/generated": -804.2672119140625, "logps/real": -246.38973999023438, "loss": 0.1503, "rewards/accuracies": 1.0, "rewards/generated": -2.0122382640838623, "rewards/margins": 2.680762529373169, "rewards/real": 0.668523907661438, "step": 740 }, { "epoch": 0.48, "learning_rate": 4.2232492001421966e-07, "logits/generated": 5.92216157913208, "logits/real": 4.2551093101501465, "logps/generated": -862.2703857421875, "logps/real": -258.5106201171875, "loss": 0.1396, "rewards/accuracies": 1.0, "rewards/generated": -2.400254249572754, "rewards/margins": 3.064797878265381, "rewards/real": 0.6645434498786926, "step": 750 }, { "epoch": 0.49, "learning_rate": 4.2054745822964803e-07, "logits/generated": 5.7719502449035645, "logits/real": 4.651850700378418, "logps/generated": -891.9352416992188, "logps/real": -271.16015625, "loss": 0.1323, "rewards/accuracies": 1.0, "rewards/generated": -2.4314839839935303, "rewards/margins": 3.151881694793701, "rewards/real": 0.7203975319862366, "step": 760 }, { "epoch": 0.49, "learning_rate": 4.1876999644507645e-07, "logits/generated": 6.078845024108887, "logits/real": 4.190167427062988, "logps/generated": -986.5220947265625, "logps/real": -244.7814483642578, "loss": 0.1271, "rewards/accuracies": 1.0, "rewards/generated": -2.8437342643737793, "rewards/margins": 3.5870704650878906, "rewards/real": 0.7433363795280457, "step": 770 }, { "epoch": 0.5, "learning_rate": 4.1699253466050476e-07, "logits/generated": 5.826613903045654, "logits/real": 4.3144378662109375, "logps/generated": -897.0218505859375, "logps/real": -217.27749633789062, "loss": 0.1252, "rewards/accuracies": 1.0, "rewards/generated": -2.579515218734741, "rewards/margins": 3.3170909881591797, "rewards/real": 0.737575888633728, "step": 780 }, { "epoch": 0.51, "learning_rate": 4.152150728759332e-07, "logits/generated": 5.925543785095215, "logits/real": 4.433269500732422, "logps/generated": -918.3221435546875, "logps/real": -254.0979766845703, "loss": 0.1275, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.530599594116211, "rewards/margins": 3.2494373321533203, "rewards/real": 0.7188378572463989, "step": 790 }, { "epoch": 0.51, "learning_rate": 4.134376110913615e-07, "logits/generated": 5.992081642150879, "logits/real": 4.210297107696533, "logps/generated": -960.2722778320312, "logps/real": -226.6741943359375, "loss": 0.1126, "rewards/accuracies": 1.0, "rewards/generated": -2.807277202606201, "rewards/margins": 3.636183261871338, "rewards/real": 0.8289061784744263, "step": 800 }, { "epoch": 0.52, "learning_rate": 4.1166014930678986e-07, "logits/generated": 6.153465747833252, "logits/real": 4.260440349578857, "logps/generated": -966.8284912109375, "logps/real": -226.76773071289062, "loss": 0.1122, "rewards/accuracies": 1.0, "rewards/generated": -2.9602110385894775, "rewards/margins": 3.7220828533172607, "rewards/real": 0.7618720531463623, "step": 810 }, { "epoch": 0.52, "learning_rate": 4.098826875222183e-07, "logits/generated": 5.99381160736084, "logits/real": 4.221859931945801, "logps/generated": -903.6896362304688, "logps/real": -233.30349731445312, "loss": 0.1167, "rewards/accuracies": 1.0, "rewards/generated": -2.786527633666992, "rewards/margins": 3.6095592975616455, "rewards/real": 0.823030948638916, "step": 820 }, { "epoch": 0.53, "learning_rate": 4.081052257376466e-07, "logits/generated": 5.980920314788818, "logits/real": 4.316037178039551, "logps/generated": -979.0846557617188, "logps/real": -240.3975830078125, "loss": 0.1015, "rewards/accuracies": 1.0, "rewards/generated": -3.2098584175109863, "rewards/margins": 4.0826005935668945, "rewards/real": 0.8727418780326843, "step": 830 }, { "epoch": 0.54, "learning_rate": 4.06327763953075e-07, "logits/generated": 6.098940849304199, "logits/real": 4.4456987380981445, "logps/generated": -979.9505004882812, "logps/real": -263.83966064453125, "loss": 0.0951, "rewards/accuracies": 1.0, "rewards/generated": -3.254727840423584, "rewards/margins": 4.050951957702637, "rewards/real": 0.7962234616279602, "step": 840 }, { "epoch": 0.54, "learning_rate": 4.045503021685033e-07, "logits/generated": 6.151425361633301, "logits/real": 4.164916038513184, "logps/generated": -920.0028076171875, "logps/real": -246.395263671875, "loss": 0.0947, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.9581751823425293, "rewards/margins": 3.8200302124023438, "rewards/real": 0.861855149269104, "step": 850 }, { "epoch": 0.55, "learning_rate": 4.0277284038393174e-07, "logits/generated": 6.034658908843994, "logits/real": 4.340263366699219, "logps/generated": -912.1318359375, "logps/real": -238.3675537109375, "loss": 0.0904, "rewards/accuracies": 1.0, "rewards/generated": -2.746849536895752, "rewards/margins": 3.6980957984924316, "rewards/real": 0.9512465596199036, "step": 860 }, { "epoch": 0.56, "learning_rate": 4.009953785993601e-07, "logits/generated": 5.890770435333252, "logits/real": 4.256333351135254, "logps/generated": -841.7523193359375, "logps/real": -251.98605346679688, "loss": 0.0886, "rewards/accuracies": 1.0, "rewards/generated": -2.5989060401916504, "rewards/margins": 3.512964963912964, "rewards/real": 0.9140589833259583, "step": 870 }, { "epoch": 0.56, "learning_rate": 3.992179168147884e-07, "logits/generated": 5.99869441986084, "logits/real": 4.36562442779541, "logps/generated": -1008.8406372070312, "logps/real": -242.4652862548828, "loss": 0.0893, "rewards/accuracies": 1.0, "rewards/generated": -3.4944260120391846, "rewards/margins": 4.502266883850098, "rewards/real": 1.0078411102294922, "step": 880 }, { "epoch": 0.57, "learning_rate": 3.9744045503021684e-07, "logits/generated": 5.980597496032715, "logits/real": 4.342673301696777, "logps/generated": -875.7488403320312, "logps/real": -243.07528686523438, "loss": 0.0872, "rewards/accuracies": 1.0, "rewards/generated": -2.8209424018859863, "rewards/margins": 3.841762065887451, "rewards/real": 1.0208194255828857, "step": 890 }, { "epoch": 0.58, "learning_rate": 3.956629932456452e-07, "logits/generated": 5.762509346008301, "logits/real": 4.160869598388672, "logps/generated": -963.9265747070312, "logps/real": -229.8173828125, "loss": 0.072, "rewards/accuracies": 1.0, "rewards/generated": -3.1484429836273193, "rewards/margins": 4.151796340942383, "rewards/real": 1.0033533573150635, "step": 900 }, { "epoch": 0.58, "learning_rate": 3.938855314610736e-07, "logits/generated": 6.13181209564209, "logits/real": 4.697200775146484, "logps/generated": -839.2921752929688, "logps/real": -260.0076904296875, "loss": 0.0733, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.6962039470672607, "rewards/margins": 3.7993292808532715, "rewards/real": 1.103124976158142, "step": 910 }, { "epoch": 0.59, "learning_rate": 3.9210806967650194e-07, "logits/generated": 5.9377360343933105, "logits/real": 4.226620674133301, "logps/generated": -908.3089599609375, "logps/real": -254.44149780273438, "loss": 0.0785, "rewards/accuracies": 1.0, "rewards/generated": -2.9502556324005127, "rewards/margins": 3.9924514293670654, "rewards/real": 1.0421960353851318, "step": 920 }, { "epoch": 0.6, "learning_rate": 3.903306078919303e-07, "logits/generated": 6.1085710525512695, "logits/real": 4.342537879943848, "logps/generated": -1006.3308715820312, "logps/real": -224.99795532226562, "loss": 0.0711, "rewards/accuracies": 1.0, "rewards/generated": -3.459972381591797, "rewards/margins": 4.4290771484375, "rewards/real": 0.9691041707992554, "step": 930 }, { "epoch": 0.6, "learning_rate": 3.8855314610735867e-07, "logits/generated": 6.049094200134277, "logits/real": 4.437540531158447, "logps/generated": -905.2625732421875, "logps/real": -250.00546264648438, "loss": 0.0654, "rewards/accuracies": 1.0, "rewards/generated": -3.0280721187591553, "rewards/margins": 4.146524429321289, "rewards/real": 1.1184518337249756, "step": 940 }, { "epoch": 0.61, "learning_rate": 3.867756843227871e-07, "logits/generated": 6.0964884757995605, "logits/real": 4.308868408203125, "logps/generated": -988.3762817382812, "logps/real": -213.0933074951172, "loss": 0.0675, "rewards/accuracies": 1.0, "rewards/generated": -3.6333088874816895, "rewards/margins": 4.766198635101318, "rewards/real": 1.132889986038208, "step": 950 }, { "epoch": 0.61, "learning_rate": 3.849982225382154e-07, "logits/generated": 6.094457626342773, "logits/real": 4.595139980316162, "logps/generated": -874.7532348632812, "logps/real": -239.2107391357422, "loss": 0.071, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.91304612159729, "rewards/margins": 4.163543701171875, "rewards/real": 1.2504980564117432, "step": 960 }, { "epoch": 0.62, "learning_rate": 3.8322076075364377e-07, "logits/generated": 5.932214260101318, "logits/real": 4.410408020019531, "logps/generated": -892.4168090820312, "logps/real": -231.3613739013672, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/generated": -3.2558155059814453, "rewards/margins": 4.5149030685424805, "rewards/real": 1.2590879201889038, "step": 970 }, { "epoch": 0.63, "learning_rate": 3.8144329896907214e-07, "logits/generated": 5.978908538818359, "logits/real": 3.7718472480773926, "logps/generated": -904.0095825195312, "logps/real": -221.69680786132812, "loss": 0.0523, "rewards/accuracies": 1.0, "rewards/generated": -3.370866298675537, "rewards/margins": 4.739681243896484, "rewards/real": 1.3688147068023682, "step": 980 }, { "epoch": 0.63, "learning_rate": 3.796658371845005e-07, "logits/generated": 5.914143085479736, "logits/real": 4.5271077156066895, "logps/generated": -804.7938232421875, "logps/real": -251.84097290039062, "loss": 0.0613, "rewards/accuracies": 1.0, "rewards/generated": -2.7886099815368652, "rewards/margins": 4.072835445404053, "rewards/real": 1.2842260599136353, "step": 990 }, { "epoch": 0.64, "learning_rate": 3.778883753999289e-07, "logits/generated": 6.046614646911621, "logits/real": 4.076377868652344, "logps/generated": -828.2825317382812, "logps/real": -222.2332000732422, "loss": 0.058, "rewards/accuracies": 1.0, "rewards/generated": -3.041111469268799, "rewards/margins": 4.31611967086792, "rewards/real": 1.2750083208084106, "step": 1000 }, { "epoch": 0.65, "learning_rate": 3.7611091361535723e-07, "logits/generated": 5.892106056213379, "logits/real": 4.29874324798584, "logps/generated": -991.6868286132812, "logps/real": -222.35653686523438, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/generated": -3.95190167427063, "rewards/margins": 5.255926132202148, "rewards/real": 1.3040244579315186, "step": 1010 }, { "epoch": 0.65, "learning_rate": 3.7433345183078565e-07, "logits/generated": 5.954197883605957, "logits/real": 3.857102632522583, "logps/generated": -923.1248168945312, "logps/real": -203.76199340820312, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/generated": -3.505117893218994, "rewards/margins": 4.928933143615723, "rewards/real": 1.4238157272338867, "step": 1020 }, { "epoch": 0.66, "learning_rate": 3.7255599004621397e-07, "logits/generated": 6.068178653717041, "logits/real": 4.234717845916748, "logps/generated": -960.6267700195312, "logps/real": -242.6820831298828, "loss": 0.0501, "rewards/accuracies": 1.0, "rewards/generated": -3.7443442344665527, "rewards/margins": 5.048287868499756, "rewards/real": 1.3039430379867554, "step": 1030 }, { "epoch": 0.67, "learning_rate": 3.7077852826164233e-07, "logits/generated": 6.109949588775635, "logits/real": 4.280592441558838, "logps/generated": -789.5339965820312, "logps/real": -248.1779022216797, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/generated": -3.201239824295044, "rewards/margins": 4.559616565704346, "rewards/real": 1.358377456665039, "step": 1040 }, { "epoch": 0.67, "learning_rate": 3.6900106647707075e-07, "logits/generated": 6.069329261779785, "logits/real": 4.076823711395264, "logps/generated": -924.1849365234375, "logps/real": -228.2224578857422, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/generated": -3.9169020652770996, "rewards/margins": 5.305215358734131, "rewards/real": 1.3883137702941895, "step": 1050 }, { "epoch": 0.68, "learning_rate": 3.6722360469249906e-07, "logits/generated": 6.091455936431885, "logits/real": 4.33205509185791, "logps/generated": -1016.7650146484375, "logps/real": -233.74496459960938, "loss": 0.0414, "rewards/accuracies": 1.0, "rewards/generated": -4.456690311431885, "rewards/margins": 5.847256183624268, "rewards/real": 1.3905656337738037, "step": 1060 }, { "epoch": 0.68, "learning_rate": 3.654461429079275e-07, "logits/generated": 6.010404109954834, "logits/real": 4.235081672668457, "logps/generated": -920.1266479492188, "logps/real": -240.25985717773438, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/generated": -3.6452674865722656, "rewards/margins": 5.162905693054199, "rewards/real": 1.5176377296447754, "step": 1070 }, { "epoch": 0.69, "learning_rate": 3.6366868112335585e-07, "logits/generated": 5.845242500305176, "logits/real": 4.197909355163574, "logps/generated": -866.0949096679688, "logps/real": -226.61862182617188, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/generated": -3.452317476272583, "rewards/margins": 5.058979511260986, "rewards/real": 1.6066612005233765, "step": 1080 }, { "epoch": 0.7, "learning_rate": 3.618912193387842e-07, "logits/generated": 6.059525012969971, "logits/real": 4.2445173263549805, "logps/generated": -1042.3199462890625, "logps/real": -222.6482696533203, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/generated": -4.495182514190674, "rewards/margins": 6.020852088928223, "rewards/real": 1.5256696939468384, "step": 1090 }, { "epoch": 0.7, "learning_rate": 3.601137575542126e-07, "logits/generated": 6.201509952545166, "logits/real": 4.5010085105896, "logps/generated": -1009.8512573242188, "logps/real": -234.58670043945312, "loss": 0.0445, "rewards/accuracies": 1.0, "rewards/generated": -4.315596580505371, "rewards/margins": 5.6330060958862305, "rewards/real": 1.317409634590149, "step": 1100 }, { "epoch": 0.71, "learning_rate": 3.583362957696409e-07, "logits/generated": 6.0568976402282715, "logits/real": 4.108635425567627, "logps/generated": -997.2374267578125, "logps/real": -220.4910430908203, "loss": 0.0422, "rewards/accuracies": 1.0, "rewards/generated": -4.327475070953369, "rewards/margins": 5.861368179321289, "rewards/real": 1.5338925123214722, "step": 1110 }, { "epoch": 0.72, "learning_rate": 3.565588339850693e-07, "logits/generated": 6.054575443267822, "logits/real": 4.0250701904296875, "logps/generated": -973.4529418945312, "logps/real": -234.23666381835938, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/generated": -4.307805061340332, "rewards/margins": 5.763493537902832, "rewards/real": 1.455688714981079, "step": 1120 }, { "epoch": 0.72, "learning_rate": 3.547813722004977e-07, "logits/generated": 5.811452865600586, "logits/real": 4.26353120803833, "logps/generated": -964.6253051757812, "logps/real": -233.96548461914062, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/generated": -4.142745018005371, "rewards/margins": 5.800713539123535, "rewards/real": 1.6579687595367432, "step": 1130 }, { "epoch": 0.73, "learning_rate": 3.5300391041592605e-07, "logits/generated": 5.870312213897705, "logits/real": 4.17107629776001, "logps/generated": -863.7185668945312, "logps/real": -227.08627319335938, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/generated": -3.778026580810547, "rewards/margins": 5.291924476623535, "rewards/real": 1.5138972997665405, "step": 1140 }, { "epoch": 0.74, "learning_rate": 3.512264486313544e-07, "logits/generated": 5.952390670776367, "logits/real": 4.353332996368408, "logps/generated": -848.7848510742188, "logps/real": -239.64132690429688, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/generated": -3.684706449508667, "rewards/margins": 5.332028388977051, "rewards/real": 1.6473219394683838, "step": 1150 }, { "epoch": 0.74, "learning_rate": 3.494489868467828e-07, "logits/generated": 5.984662055969238, "logits/real": 4.688880443572998, "logps/generated": -926.8069458007812, "logps/real": -222.42788696289062, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/generated": -4.1294732093811035, "rewards/margins": 5.777259349822998, "rewards/real": 1.6477859020233154, "step": 1160 }, { "epoch": 0.75, "learning_rate": 3.4767152506221114e-07, "logits/generated": 6.093937873840332, "logits/real": 4.4692511558532715, "logps/generated": -893.0154418945312, "logps/real": -249.5071563720703, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/generated": -3.984126567840576, "rewards/margins": 5.740622520446777, "rewards/real": 1.7564961910247803, "step": 1170 }, { "epoch": 0.75, "learning_rate": 3.458940632776395e-07, "logits/generated": 6.014351844787598, "logits/real": 4.509620666503906, "logps/generated": -945.0499877929688, "logps/real": -208.66506958007812, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/generated": -4.119633674621582, "rewards/margins": 5.723677635192871, "rewards/real": 1.6040436029434204, "step": 1180 }, { "epoch": 0.76, "learning_rate": 3.441166014930679e-07, "logits/generated": 5.890795707702637, "logits/real": 4.306885719299316, "logps/generated": -920.9514770507812, "logps/real": -219.43441772460938, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/generated": -4.295720100402832, "rewards/margins": 5.946078300476074, "rewards/real": 1.6503584384918213, "step": 1190 }, { "epoch": 0.77, "learning_rate": 3.4233913970849624e-07, "logits/generated": 6.092907905578613, "logits/real": 4.382439613342285, "logps/generated": -869.8155517578125, "logps/real": -213.0802459716797, "loss": 0.0317, "rewards/accuracies": 1.0, "rewards/generated": -3.7826313972473145, "rewards/margins": 5.640969276428223, "rewards/real": 1.858338713645935, "step": 1200 }, { "epoch": 0.77, "learning_rate": 3.405616779239246e-07, "logits/generated": 5.900615692138672, "logits/real": 4.525036334991455, "logps/generated": -850.4383544921875, "logps/real": -246.38864135742188, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/generated": -3.64056396484375, "rewards/margins": 5.370596885681152, "rewards/real": 1.7300331592559814, "step": 1210 }, { "epoch": 0.78, "learning_rate": 3.38784216139353e-07, "logits/generated": 6.163350582122803, "logits/real": 4.153713226318359, "logps/generated": -1029.7822265625, "logps/real": -221.838134765625, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/generated": -4.886613368988037, "rewards/margins": 6.8068389892578125, "rewards/real": 1.9202255010604858, "step": 1220 }, { "epoch": 0.79, "learning_rate": 3.370067543547814e-07, "logits/generated": 5.858575344085693, "logits/real": 4.069919109344482, "logps/generated": -794.5404663085938, "logps/real": -215.64620971679688, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/generated": -3.5341129302978516, "rewards/margins": 5.426337242126465, "rewards/real": 1.8922239542007446, "step": 1230 }, { "epoch": 0.79, "learning_rate": 3.352292925702097e-07, "logits/generated": 6.137094497680664, "logits/real": 4.2704057693481445, "logps/generated": -988.0245361328125, "logps/real": -224.5236358642578, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/generated": -4.90227746963501, "rewards/margins": 6.6614089012146, "rewards/real": 1.7591317892074585, "step": 1240 }, { "epoch": 0.8, "learning_rate": 3.334518307856381e-07, "logits/generated": 5.977653503417969, "logits/real": 4.102234363555908, "logps/generated": -871.8009643554688, "logps/real": -239.428955078125, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/generated": -3.9591381549835205, "rewards/margins": 5.843039512634277, "rewards/real": 1.883901834487915, "step": 1250 }, { "epoch": 0.81, "learning_rate": 3.316743690010665e-07, "logits/generated": 5.898265361785889, "logits/real": 3.970461368560791, "logps/generated": -968.3258056640625, "logps/real": -222.0578155517578, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/generated": -4.621912479400635, "rewards/margins": 6.498594760894775, "rewards/real": 1.8766825199127197, "step": 1260 }, { "epoch": 0.81, "learning_rate": 3.298969072164948e-07, "logits/generated": 5.933172225952148, "logits/real": 4.601564407348633, "logps/generated": -935.9641723632812, "logps/real": -240.28604125976562, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/generated": -4.39015007019043, "rewards/margins": 6.343320846557617, "rewards/real": 1.9531705379486084, "step": 1270 }, { "epoch": 0.82, "learning_rate": 3.281194454319232e-07, "logits/generated": 6.0884270668029785, "logits/real": 4.594856262207031, "logps/generated": -970.0340576171875, "logps/real": -238.2774658203125, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/generated": -4.799683094024658, "rewards/margins": 6.7805304527282715, "rewards/real": 1.980847716331482, "step": 1280 }, { "epoch": 0.83, "learning_rate": 3.2634198364735154e-07, "logits/generated": 6.068641185760498, "logits/real": 4.550690650939941, "logps/generated": -885.751953125, "logps/real": -224.6710205078125, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/generated": -4.135249137878418, "rewards/margins": 5.933065891265869, "rewards/real": 1.797816514968872, "step": 1290 }, { "epoch": 0.83, "learning_rate": 3.2456452186277996e-07, "logits/generated": 5.979355812072754, "logits/real": 4.242101192474365, "logps/generated": -818.2512817382812, "logps/real": -227.32369995117188, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/generated": -3.9465458393096924, "rewards/margins": 6.159433364868164, "rewards/real": 2.2128875255584717, "step": 1300 }, { "epoch": 0.84, "learning_rate": 3.227870600782083e-07, "logits/generated": 6.055127143859863, "logits/real": 4.004499912261963, "logps/generated": -912.0535278320312, "logps/real": -221.82894897460938, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/generated": -4.248215675354004, "rewards/margins": 6.312341690063477, "rewards/real": 2.0641255378723145, "step": 1310 }, { "epoch": 0.84, "learning_rate": 3.210095982936367e-07, "logits/generated": 5.85116720199585, "logits/real": 4.39048433303833, "logps/generated": -948.9578857421875, "logps/real": -230.64669799804688, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/generated": -4.669097900390625, "rewards/margins": 6.7218804359436035, "rewards/real": 2.052781581878662, "step": 1320 }, { "epoch": 0.85, "learning_rate": 3.1923213650906505e-07, "logits/generated": 5.8959479331970215, "logits/real": 4.247786521911621, "logps/generated": -939.8728637695312, "logps/real": -230.90365600585938, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/generated": -4.3719964027404785, "rewards/margins": 6.431820869445801, "rewards/real": 2.0598244667053223, "step": 1330 }, { "epoch": 0.86, "learning_rate": 3.1745467472449337e-07, "logits/generated": 5.981402397155762, "logits/real": 4.520476341247559, "logps/generated": -1016.5609130859375, "logps/real": -234.46566772460938, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/generated": -5.179906368255615, "rewards/margins": 7.442817687988281, "rewards/real": 2.262911319732666, "step": 1340 }, { "epoch": 0.86, "learning_rate": 3.156772129399218e-07, "logits/generated": 5.772179126739502, "logits/real": 4.208775520324707, "logps/generated": -790.708984375, "logps/real": -210.6012420654297, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/generated": -3.570807933807373, "rewards/margins": 5.612199306488037, "rewards/real": 2.041391134262085, "step": 1350 }, { "epoch": 0.87, "learning_rate": 3.1389975115535015e-07, "logits/generated": 6.091591835021973, "logits/real": 4.308236122131348, "logps/generated": -937.48095703125, "logps/real": -217.81820678710938, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/generated": -4.688237190246582, "rewards/margins": 6.935911655426025, "rewards/real": 2.2476744651794434, "step": 1360 }, { "epoch": 0.88, "learning_rate": 3.121222893707785e-07, "logits/generated": 5.910862922668457, "logits/real": 4.276075839996338, "logps/generated": -890.7863159179688, "logps/real": -241.32388305664062, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/generated": -4.329002380371094, "rewards/margins": 6.619669437408447, "rewards/real": 2.290666341781616, "step": 1370 }, { "epoch": 0.88, "learning_rate": 3.103448275862069e-07, "logits/generated": 6.195101261138916, "logits/real": 4.239165782928467, "logps/generated": -918.6710815429688, "logps/real": -233.1322479248047, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/generated": -4.716291904449463, "rewards/margins": 6.934659481048584, "rewards/real": 2.2183680534362793, "step": 1380 }, { "epoch": 0.89, "learning_rate": 3.0856736580163525e-07, "logits/generated": 6.146795749664307, "logits/real": 4.251899719238281, "logps/generated": -896.61669921875, "logps/real": -236.81289672851562, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/generated": -4.417421817779541, "rewards/margins": 6.579673767089844, "rewards/real": 2.162252426147461, "step": 1390 }, { "epoch": 0.9, "learning_rate": 3.067899040170636e-07, "logits/generated": 5.921133995056152, "logits/real": 4.311642646789551, "logps/generated": -1017.5269775390625, "logps/real": -231.27236938476562, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/generated": -5.362940311431885, "rewards/margins": 7.530875205993652, "rewards/real": 2.1679351329803467, "step": 1400 }, { "epoch": 0.9, "learning_rate": 3.05012442232492e-07, "logits/generated": 6.124849319458008, "logits/real": 4.260175704956055, "logps/generated": -1089.728759765625, "logps/real": -230.3025665283203, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/generated": -6.250988960266113, "rewards/margins": 8.456911087036133, "rewards/real": 2.2059216499328613, "step": 1410 }, { "epoch": 0.91, "learning_rate": 3.0323498044792035e-07, "logits/generated": 5.7611541748046875, "logits/real": 3.985320568084717, "logps/generated": -856.4290771484375, "logps/real": -219.84048461914062, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/generated": -4.234499931335449, "rewards/margins": 6.346347808837891, "rewards/real": 2.1118481159210205, "step": 1420 }, { "epoch": 0.91, "learning_rate": 3.014575186633487e-07, "logits/generated": 6.099104881286621, "logits/real": 4.165801525115967, "logps/generated": -1022.8517456054688, "logps/real": -225.13583374023438, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/generated": -5.710265636444092, "rewards/margins": 8.138273239135742, "rewards/real": 2.428006649017334, "step": 1430 }, { "epoch": 0.92, "learning_rate": 2.996800568787771e-07, "logits/generated": 5.9258551597595215, "logits/real": 4.278181076049805, "logps/generated": -991.6013793945312, "logps/real": -224.7017822265625, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/generated": -5.471372604370117, "rewards/margins": 7.898287773132324, "rewards/real": 2.426915168762207, "step": 1440 }, { "epoch": 0.93, "learning_rate": 2.9790259509420545e-07, "logits/generated": 6.0136613845825195, "logits/real": 4.205864906311035, "logps/generated": -879.1495971679688, "logps/real": -233.78097534179688, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/generated": -4.704037189483643, "rewards/margins": 6.998660087585449, "rewards/real": 2.2946221828460693, "step": 1450 }, { "epoch": 0.93, "learning_rate": 2.9612513330963387e-07, "logits/generated": 5.957737922668457, "logits/real": 4.087791919708252, "logps/generated": -961.3818359375, "logps/real": -189.39364624023438, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/generated": -5.026106834411621, "rewards/margins": 7.373734951019287, "rewards/real": 2.347627639770508, "step": 1460 }, { "epoch": 0.94, "learning_rate": 2.943476715250622e-07, "logits/generated": 5.931456565856934, "logits/real": 4.309812545776367, "logps/generated": -880.28955078125, "logps/real": -227.7857666015625, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/generated": -4.744623184204102, "rewards/margins": 6.99019718170166, "rewards/real": 2.2455737590789795, "step": 1470 }, { "epoch": 0.95, "learning_rate": 2.9257020974049054e-07, "logits/generated": 6.020743370056152, "logits/real": 4.34235143661499, "logps/generated": -918.6298828125, "logps/real": -233.72021484375, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/generated": -4.72176456451416, "rewards/margins": 7.291744232177734, "rewards/real": 2.5699801445007324, "step": 1480 }, { "epoch": 0.95, "learning_rate": 2.9079274795591896e-07, "logits/generated": 6.1958794593811035, "logits/real": 4.313004970550537, "logps/generated": -913.8150634765625, "logps/real": -237.1126251220703, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/generated": -5.31519889831543, "rewards/margins": 7.788308143615723, "rewards/real": 2.473109483718872, "step": 1490 }, { "epoch": 0.96, "learning_rate": 2.890152861713473e-07, "logits/generated": 5.900928497314453, "logits/real": 4.231713771820068, "logps/generated": -1001.9522705078125, "logps/real": -226.7261199951172, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/generated": -5.419561862945557, "rewards/margins": 7.794165134429932, "rewards/real": 2.3746023178100586, "step": 1500 }, { "epoch": 0.97, "learning_rate": 2.872378243867757e-07, "logits/generated": 5.979315280914307, "logits/real": 4.155876159667969, "logps/generated": -896.203125, "logps/real": -224.9453582763672, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/generated": -4.840609550476074, "rewards/margins": 7.281617164611816, "rewards/real": 2.441007137298584, "step": 1510 }, { "epoch": 0.97, "learning_rate": 2.85460362602204e-07, "logits/generated": 5.99771785736084, "logits/real": 4.335788249969482, "logps/generated": -985.0953369140625, "logps/real": -216.6324920654297, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/generated": -5.507411479949951, "rewards/margins": 8.085838317871094, "rewards/real": 2.5784270763397217, "step": 1520 }, { "epoch": 0.98, "learning_rate": 2.8368290081763243e-07, "logits/generated": 5.952843189239502, "logits/real": 4.346559524536133, "logps/generated": -945.6209106445312, "logps/real": -227.56997680664062, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/generated": -5.429326057434082, "rewards/margins": 8.042230606079102, "rewards/real": 2.612903118133545, "step": 1530 }, { "epoch": 0.99, "learning_rate": 2.819054390330608e-07, "logits/generated": 6.0241618156433105, "logits/real": 4.244063377380371, "logps/generated": -932.4935302734375, "logps/real": -231.8907012939453, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/generated": -5.1778459548950195, "rewards/margins": 7.901692867279053, "rewards/real": 2.723846435546875, "step": 1540 }, { "epoch": 0.99, "learning_rate": 2.801279772484891e-07, "logits/generated": 5.867781639099121, "logits/real": 4.314043045043945, "logps/generated": -851.2517700195312, "logps/real": -235.279296875, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/generated": -4.752983570098877, "rewards/margins": 7.189466953277588, "rewards/real": 2.436483383178711, "step": 1550 }, { "epoch": 1.0, "learning_rate": 2.783505154639175e-07, "logits/generated": 5.901434898376465, "logits/real": 4.186619281768799, "logps/generated": -1009.0958862304688, "logps/real": -212.00369262695312, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/generated": -5.709371089935303, "rewards/margins": 8.269929885864258, "rewards/real": 2.560558795928955, "step": 1560 }, { "epoch": 1.0, "learning_rate": 2.7657305367934584e-07, "logits/generated": 5.8600664138793945, "logits/real": 4.312530040740967, "logps/generated": -903.2091674804688, "logps/real": -217.84249877929688, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/generated": -5.082972049713135, "rewards/margins": 7.53509521484375, "rewards/real": 2.4521236419677734, "step": 1570 }, { "epoch": 1.01, "learning_rate": 2.7479559189477426e-07, "logits/generated": 6.000660419464111, "logits/real": 4.141573905944824, "logps/generated": -989.6737060546875, "logps/real": -216.5168914794922, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/generated": -5.764129161834717, "rewards/margins": 8.395139694213867, "rewards/real": 2.6310102939605713, "step": 1580 }, { "epoch": 1.02, "learning_rate": 2.730181301102026e-07, "logits/generated": 5.965339660644531, "logits/real": 4.219322204589844, "logps/generated": -854.77783203125, "logps/real": -235.1974639892578, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/generated": -4.901017665863037, "rewards/margins": 7.7865166664123535, "rewards/real": 2.8854994773864746, "step": 1590 }, { "epoch": 1.02, "learning_rate": 2.71240668325631e-07, "logits/generated": 6.012763023376465, "logits/real": 4.389718055725098, "logps/generated": -984.3873291015625, "logps/real": -239.3701629638672, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/generated": -5.781050682067871, "rewards/margins": 8.516217231750488, "rewards/real": 2.73516583442688, "step": 1600 }, { "epoch": 1.03, "learning_rate": 2.6946320654105936e-07, "logits/generated": 5.979636192321777, "logits/real": 4.670422554016113, "logps/generated": -967.5466918945312, "logps/real": -236.21444702148438, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/generated": -5.631641387939453, "rewards/margins": 8.182405471801758, "rewards/real": 2.5507636070251465, "step": 1610 }, { "epoch": 1.04, "learning_rate": 2.676857447564877e-07, "logits/generated": 6.084554195404053, "logits/real": 4.161642551422119, "logps/generated": -987.7828979492188, "logps/real": -225.3826446533203, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/generated": -5.720307350158691, "rewards/margins": 8.545812606811523, "rewards/real": 2.8255059719085693, "step": 1620 }, { "epoch": 1.04, "learning_rate": 2.659082829719161e-07, "logits/generated": 5.93519926071167, "logits/real": 4.27223539352417, "logps/generated": -930.1174926757812, "logps/real": -235.65243530273438, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/generated": -5.5342912673950195, "rewards/margins": 8.177888870239258, "rewards/real": 2.6435976028442383, "step": 1630 }, { "epoch": 1.05, "learning_rate": 2.6413082118734445e-07, "logits/generated": 6.053906440734863, "logits/real": 4.279298305511475, "logps/generated": -873.9427490234375, "logps/real": -234.75802612304688, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/generated": -5.114261627197266, "rewards/margins": 7.79233455657959, "rewards/real": 2.678072929382324, "step": 1640 }, { "epoch": 1.06, "learning_rate": 2.623533594027728e-07, "logits/generated": 5.621105194091797, "logits/real": 4.41392707824707, "logps/generated": -888.2545776367188, "logps/real": -219.32546997070312, "loss": 0.0103, "rewards/accuracies": 0.987500011920929, "rewards/generated": -5.186315536499023, "rewards/margins": 7.618834495544434, "rewards/real": 2.432518720626831, "step": 1650 }, { "epoch": 1.06, "learning_rate": 2.605758976182012e-07, "logits/generated": 6.062514781951904, "logits/real": 4.3643798828125, "logps/generated": -1034.1868896484375, "logps/real": -212.97476196289062, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/generated": -6.618842124938965, "rewards/margins": 9.310908317565918, "rewards/real": 2.6920673847198486, "step": 1660 }, { "epoch": 1.07, "learning_rate": 2.587984358336296e-07, "logits/generated": 5.926046848297119, "logits/real": 4.445965766906738, "logps/generated": -821.0653076171875, "logps/real": -250.19674682617188, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/generated": -4.612605094909668, "rewards/margins": 7.3182783126831055, "rewards/real": 2.7056736946105957, "step": 1670 }, { "epoch": 1.07, "learning_rate": 2.570209740490579e-07, "logits/generated": 5.849274635314941, "logits/real": 4.148129463195801, "logps/generated": -878.97021484375, "logps/real": -214.24234008789062, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/generated": -5.2036943435668945, "rewards/margins": 7.895529747009277, "rewards/real": 2.691835403442383, "step": 1680 }, { "epoch": 1.08, "learning_rate": 2.5524351226448634e-07, "logits/generated": 6.104997634887695, "logits/real": 4.673565864562988, "logps/generated": -864.9569091796875, "logps/real": -246.60794067382812, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/generated": -5.325832366943359, "rewards/margins": 8.104931831359863, "rewards/real": 2.7790980339050293, "step": 1690 }, { "epoch": 1.09, "learning_rate": 2.5346605047991465e-07, "logits/generated": 6.075316429138184, "logits/real": 4.084663391113281, "logps/generated": -872.2156372070312, "logps/real": -223.63442993164062, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/generated": -5.434564113616943, "rewards/margins": 8.21035385131836, "rewards/real": 2.7757906913757324, "step": 1700 }, { "epoch": 1.09, "learning_rate": 2.51688588695343e-07, "logits/generated": 6.089809894561768, "logits/real": 4.060254096984863, "logps/generated": -923.2381591796875, "logps/real": -191.72105407714844, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/generated": -5.731915473937988, "rewards/margins": 8.29061222076416, "rewards/real": 2.5586960315704346, "step": 1710 }, { "epoch": 1.1, "learning_rate": 2.499111269107714e-07, "logits/generated": 6.061944007873535, "logits/real": 4.62351131439209, "logps/generated": -841.89013671875, "logps/real": -263.7066955566406, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/generated": -5.006676197052002, "rewards/margins": 7.827229976654053, "rewards/real": 2.820553779602051, "step": 1720 }, { "epoch": 1.11, "learning_rate": 2.4813366512619975e-07, "logits/generated": 5.924272537231445, "logits/real": 4.0540995597839355, "logps/generated": -903.25, "logps/real": -217.80874633789062, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/generated": -5.3574018478393555, "rewards/margins": 8.290170669555664, "rewards/real": 2.9327681064605713, "step": 1730 }, { "epoch": 1.11, "learning_rate": 2.4635620334162817e-07, "logits/generated": 6.056190013885498, "logits/real": 4.321590900421143, "logps/generated": -932.0973510742188, "logps/real": -225.2566375732422, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/generated": -5.739737510681152, "rewards/margins": 8.39258861541748, "rewards/real": 2.652851104736328, "step": 1740 }, { "epoch": 1.12, "learning_rate": 2.4457874155705653e-07, "logits/generated": 6.022032737731934, "logits/real": 4.070498466491699, "logps/generated": -932.9993286132812, "logps/real": -211.3143768310547, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/generated": -5.863167762756348, "rewards/margins": 8.546854972839355, "rewards/real": 2.683687448501587, "step": 1750 }, { "epoch": 1.13, "learning_rate": 2.428012797724849e-07, "logits/generated": 6.195624351501465, "logits/real": 4.141390800476074, "logps/generated": -957.7415161132812, "logps/real": -210.617431640625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/generated": -5.7170329093933105, "rewards/margins": 8.636775016784668, "rewards/real": 2.919740915298462, "step": 1760 }, { "epoch": 1.13, "learning_rate": 2.4102381798791327e-07, "logits/generated": 6.033434867858887, "logits/real": 4.116759777069092, "logps/generated": -842.8616333007812, "logps/real": -230.01284790039062, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/generated": -5.187349319458008, "rewards/margins": 8.29853343963623, "rewards/real": 3.1111843585968018, "step": 1770 }, { "epoch": 1.14, "learning_rate": 2.392463562033416e-07, "logits/generated": 6.119296550750732, "logits/real": 4.1949968338012695, "logps/generated": -1000.3076171875, "logps/real": -228.13479614257812, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/generated": -6.538392066955566, "rewards/margins": 9.60668659210205, "rewards/real": 3.0682942867279053, "step": 1780 }, { "epoch": 1.15, "learning_rate": 2.3746889441877e-07, "logits/generated": 5.990731716156006, "logits/real": 4.539961814880371, "logps/generated": -782.185791015625, "logps/real": -251.5903778076172, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/generated": -4.510619163513184, "rewards/margins": 7.38437557220459, "rewards/real": 2.873756170272827, "step": 1790 }, { "epoch": 1.15, "learning_rate": 2.3569143263419836e-07, "logits/generated": 6.0371599197387695, "logits/real": 4.193976402282715, "logps/generated": -992.1876220703125, "logps/real": -220.98495483398438, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -6.436119079589844, "rewards/margins": 9.288522720336914, "rewards/real": 2.852402687072754, "step": 1800 }, { "epoch": 1.16, "learning_rate": 2.3391397084962673e-07, "logits/generated": 6.059049129486084, "logits/real": 4.382033348083496, "logps/generated": -784.0813598632812, "logps/real": -219.2273712158203, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/generated": -4.49301815032959, "rewards/margins": 7.499096870422363, "rewards/real": 3.0060791969299316, "step": 1810 }, { "epoch": 1.16, "learning_rate": 2.3213650906505507e-07, "logits/generated": 5.9765849113464355, "logits/real": 4.078164577484131, "logps/generated": -911.6340942382812, "logps/real": -189.57408142089844, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -5.761612415313721, "rewards/margins": 8.555708885192871, "rewards/real": 2.794097900390625, "step": 1820 }, { "epoch": 1.17, "learning_rate": 2.3035904728048346e-07, "logits/generated": 5.922765254974365, "logits/real": 4.084896564483643, "logps/generated": -967.2000122070312, "logps/real": -221.4872589111328, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/generated": -6.081551551818848, "rewards/margins": 9.249353408813477, "rewards/real": 3.167802572250366, "step": 1830 }, { "epoch": 1.18, "learning_rate": 2.2858158549591183e-07, "logits/generated": 6.103907585144043, "logits/real": 4.329716682434082, "logps/generated": -922.7809448242188, "logps/real": -229.83584594726562, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -5.810357570648193, "rewards/margins": 8.797757148742676, "rewards/real": 2.9873995780944824, "step": 1840 }, { "epoch": 1.18, "learning_rate": 2.268041237113402e-07, "logits/generated": 5.952910423278809, "logits/real": 4.254477024078369, "logps/generated": -829.0789184570312, "logps/real": -191.4152374267578, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/generated": -4.8961920738220215, "rewards/margins": 7.95013952255249, "rewards/real": 3.053947925567627, "step": 1850 }, { "epoch": 1.19, "learning_rate": 2.2502666192676856e-07, "logits/generated": 6.104610919952393, "logits/real": 4.344254016876221, "logps/generated": -927.4021606445312, "logps/real": -209.98464965820312, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/generated": -6.0140275955200195, "rewards/margins": 8.953369140625, "rewards/real": 2.939342737197876, "step": 1860 }, { "epoch": 1.2, "learning_rate": 2.2324920014219693e-07, "logits/generated": 5.963999271392822, "logits/real": 4.088013648986816, "logps/generated": -981.2156982421875, "logps/real": -223.4355010986328, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/generated": -6.4049482345581055, "rewards/margins": 9.413580894470215, "rewards/real": 3.0086326599121094, "step": 1870 }, { "epoch": 1.2, "learning_rate": 2.2147173835762532e-07, "logits/generated": 6.034571647644043, "logits/real": 3.9964118003845215, "logps/generated": -919.56103515625, "logps/real": -212.9126739501953, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/generated": -5.920588970184326, "rewards/margins": 8.81024169921875, "rewards/real": 2.889652967453003, "step": 1880 }, { "epoch": 1.21, "learning_rate": 2.1969427657305366e-07, "logits/generated": 5.908297061920166, "logits/real": 4.290045738220215, "logps/generated": -903.4417724609375, "logps/real": -216.17636108398438, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/generated": -6.024001598358154, "rewards/margins": 8.847755432128906, "rewards/real": 2.8237533569335938, "step": 1890 }, { "epoch": 1.22, "learning_rate": 2.1791681478848203e-07, "logits/generated": 5.78277587890625, "logits/real": 4.178603172302246, "logps/generated": -897.1768798828125, "logps/real": -224.93814086914062, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -5.952237129211426, "rewards/margins": 9.044183731079102, "rewards/real": 3.091947555541992, "step": 1900 }, { "epoch": 1.22, "learning_rate": 2.161393530039104e-07, "logits/generated": 5.6169939041137695, "logits/real": 4.14485502243042, "logps/generated": -836.0411376953125, "logps/real": -214.97607421875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/generated": -5.351627826690674, "rewards/margins": 8.435457229614258, "rewards/real": 3.0838301181793213, "step": 1910 }, { "epoch": 1.23, "learning_rate": 2.1436189121933878e-07, "logits/generated": 5.942612171173096, "logits/real": 4.3120036125183105, "logps/generated": -1033.1988525390625, "logps/real": -231.5688018798828, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/generated": -6.671760559082031, "rewards/margins": 9.734092712402344, "rewards/real": 3.0623319149017334, "step": 1920 }, { "epoch": 1.23, "learning_rate": 2.1258442943476715e-07, "logits/generated": 6.000979423522949, "logits/real": 3.862889051437378, "logps/generated": -996.46875, "logps/real": -188.82571411132812, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/generated": -6.690791130065918, "rewards/margins": 9.859224319458008, "rewards/real": 3.168431520462036, "step": 1930 }, { "epoch": 1.24, "learning_rate": 2.1080696765019552e-07, "logits/generated": 6.081242084503174, "logits/real": 4.578803062438965, "logps/generated": -1046.6558837890625, "logps/real": -228.24392700195312, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/generated": -7.087597846984863, "rewards/margins": 10.14047622680664, "rewards/real": 3.0528788566589355, "step": 1940 }, { "epoch": 1.25, "learning_rate": 2.0902950586562388e-07, "logits/generated": 5.984295845031738, "logits/real": 4.308724880218506, "logps/generated": -935.5621948242188, "logps/real": -218.9115753173828, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/generated": -6.082803726196289, "rewards/margins": 9.108025550842285, "rewards/real": 3.025221347808838, "step": 1950 }, { "epoch": 1.25, "learning_rate": 2.0725204408105225e-07, "logits/generated": 5.89461612701416, "logits/real": 4.198281288146973, "logps/generated": -961.24560546875, "logps/real": -220.1450653076172, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -6.416398048400879, "rewards/margins": 9.537870407104492, "rewards/real": 3.1214730739593506, "step": 1960 }, { "epoch": 1.26, "learning_rate": 2.0547458229648061e-07, "logits/generated": 6.187463283538818, "logits/real": 4.732372283935547, "logps/generated": -980.1790161132812, "logps/real": -251.7775421142578, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -6.859214782714844, "rewards/margins": 9.723730087280273, "rewards/real": 2.8645150661468506, "step": 1970 }, { "epoch": 1.27, "learning_rate": 2.0369712051190898e-07, "logits/generated": 5.920609951019287, "logits/real": 4.184802055358887, "logps/generated": -969.5177612304688, "logps/real": -220.58566284179688, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -6.475825309753418, "rewards/margins": 9.5746431350708, "rewards/real": 3.098818063735962, "step": 1980 }, { "epoch": 1.27, "learning_rate": 2.0191965872733735e-07, "logits/generated": 6.136545181274414, "logits/real": 4.201231002807617, "logps/generated": -993.6239013671875, "logps/real": -216.8855438232422, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/generated": -6.71942138671875, "rewards/margins": 9.831668853759766, "rewards/real": 3.1122474670410156, "step": 1990 }, { "epoch": 1.28, "learning_rate": 2.001421969427657e-07, "logits/generated": 5.892589569091797, "logits/real": 4.182487487792969, "logps/generated": -942.9978637695312, "logps/real": -218.97500610351562, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/generated": -6.580080986022949, "rewards/margins": 9.862131118774414, "rewards/real": 3.2820498943328857, "step": 2000 }, { "epoch": 1.29, "learning_rate": 1.983647351581941e-07, "logits/generated": 5.94655179977417, "logits/real": 4.0822672843933105, "logps/generated": -957.2570190429688, "logps/real": -196.55645751953125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -6.333019256591797, "rewards/margins": 9.670191764831543, "rewards/real": 3.3371729850769043, "step": 2010 }, { "epoch": 1.29, "learning_rate": 1.9658727337362247e-07, "logits/generated": 5.719095230102539, "logits/real": 4.3129563331604, "logps/generated": -879.9132080078125, "logps/real": -218.69076538085938, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/generated": -6.011284828186035, "rewards/margins": 9.261569023132324, "rewards/real": 3.2502846717834473, "step": 2020 }, { "epoch": 1.3, "learning_rate": 1.9480981158905084e-07, "logits/generated": 5.970053195953369, "logits/real": 4.292898654937744, "logps/generated": -888.4221801757812, "logps/real": -221.7141876220703, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/generated": -5.928704738616943, "rewards/margins": 9.284444808959961, "rewards/real": 3.3557403087615967, "step": 2030 }, { "epoch": 1.31, "learning_rate": 1.9303234980447918e-07, "logits/generated": 5.851454257965088, "logits/real": 3.963120222091675, "logps/generated": -1002.041015625, "logps/real": -187.06405639648438, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/generated": -6.853819370269775, "rewards/margins": 10.24903678894043, "rewards/real": 3.3952178955078125, "step": 2040 }, { "epoch": 1.31, "learning_rate": 1.9125488801990754e-07, "logits/generated": 5.893982887268066, "logits/real": 4.2695136070251465, "logps/generated": -952.0750732421875, "logps/real": -224.6346435546875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/generated": -6.340090751647949, "rewards/margins": 9.606979370117188, "rewards/real": 3.26688814163208, "step": 2050 }, { "epoch": 1.32, "learning_rate": 1.8947742623533593e-07, "logits/generated": 6.079073905944824, "logits/real": 4.082903861999512, "logps/generated": -1034.026123046875, "logps/real": -209.4253692626953, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -7.287435054779053, "rewards/margins": 10.478463172912598, "rewards/real": 3.191028356552124, "step": 2060 }, { "epoch": 1.32, "learning_rate": 1.876999644507643e-07, "logits/generated": 6.078752517700195, "logits/real": 4.00618314743042, "logps/generated": -836.09326171875, "logps/real": -218.91085815429688, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/generated": -5.683650970458984, "rewards/margins": 8.769237518310547, "rewards/real": 3.0855870246887207, "step": 2070 }, { "epoch": 1.33, "learning_rate": 1.8592250266619267e-07, "logits/generated": 6.1372151374816895, "logits/real": 4.399873733520508, "logps/generated": -925.8645629882812, "logps/real": -231.36962890625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/generated": -6.495853424072266, "rewards/margins": 9.849853515625, "rewards/real": 3.353998899459839, "step": 2080 }, { "epoch": 1.34, "learning_rate": 1.8414504088162103e-07, "logits/generated": 6.150097370147705, "logits/real": 4.1030778884887695, "logps/generated": -969.1099853515625, "logps/real": -210.23385620117188, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -6.695557594299316, "rewards/margins": 9.90971851348877, "rewards/real": 3.214160203933716, "step": 2090 }, { "epoch": 1.34, "learning_rate": 1.8236757909704943e-07, "logits/generated": 5.9813079833984375, "logits/real": 4.158692359924316, "logps/generated": -843.7550659179688, "logps/real": -198.7861328125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/generated": -5.507570743560791, "rewards/margins": 8.458051681518555, "rewards/real": 2.950481414794922, "step": 2100 }, { "epoch": 1.35, "learning_rate": 1.805901173124778e-07, "logits/generated": 6.074183940887451, "logits/real": 4.167660236358643, "logps/generated": -904.7354736328125, "logps/real": -216.07406616210938, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/generated": -5.98261833190918, "rewards/margins": 9.305191993713379, "rewards/real": 3.3225739002227783, "step": 2110 }, { "epoch": 1.36, "learning_rate": 1.7881265552790613e-07, "logits/generated": 6.014121055603027, "logits/real": 4.315189838409424, "logps/generated": -998.3967895507812, "logps/real": -213.5117950439453, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -6.877680778503418, "rewards/margins": 10.27568244934082, "rewards/real": 3.398001194000244, "step": 2120 }, { "epoch": 1.36, "learning_rate": 1.770351937433345e-07, "logits/generated": 6.0427069664001465, "logits/real": 4.096532344818115, "logps/generated": -962.1192626953125, "logps/real": -211.24386596679688, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/generated": -6.622589111328125, "rewards/margins": 9.86971664428711, "rewards/real": 3.24712872505188, "step": 2130 }, { "epoch": 1.37, "learning_rate": 1.7525773195876286e-07, "logits/generated": 5.948111534118652, "logits/real": 4.327864170074463, "logps/generated": -862.5738525390625, "logps/real": -236.5964813232422, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -5.988071441650391, "rewards/margins": 9.170713424682617, "rewards/real": 3.1826424598693848, "step": 2140 }, { "epoch": 1.38, "learning_rate": 1.7348027017419126e-07, "logits/generated": 6.061889171600342, "logits/real": 4.250190258026123, "logps/generated": -962.5604248046875, "logps/real": -212.2547149658203, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/generated": -6.656978607177734, "rewards/margins": 10.204740524291992, "rewards/real": 3.547761917114258, "step": 2150 }, { "epoch": 1.38, "learning_rate": 1.7170280838961962e-07, "logits/generated": 6.133549690246582, "logits/real": 4.258768081665039, "logps/generated": -847.89306640625, "logps/real": -213.8179931640625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -5.887804985046387, "rewards/margins": 9.027533531188965, "rewards/real": 3.1397290229797363, "step": 2160 }, { "epoch": 1.39, "learning_rate": 1.69925346605048e-07, "logits/generated": 6.092435359954834, "logits/real": 3.95710825920105, "logps/generated": -1076.5191650390625, "logps/real": -204.7867889404297, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -7.864550590515137, "rewards/margins": 11.004741668701172, "rewards/real": 3.1401913166046143, "step": 2170 }, { "epoch": 1.39, "learning_rate": 1.6814788482047635e-07, "logits/generated": 6.069060325622559, "logits/real": 4.248088836669922, "logps/generated": -1034.2674560546875, "logps/real": -239.43045043945312, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -7.654637813568115, "rewards/margins": 11.039973258972168, "rewards/real": 3.385335922241211, "step": 2180 }, { "epoch": 1.4, "learning_rate": 1.6637042303590475e-07, "logits/generated": 6.127971649169922, "logits/real": 4.42900276184082, "logps/generated": -1067.667724609375, "logps/real": -212.1188507080078, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -7.7270331382751465, "rewards/margins": 11.030082702636719, "rewards/real": 3.3030498027801514, "step": 2190 }, { "epoch": 1.41, "learning_rate": 1.6459296125133309e-07, "logits/generated": 6.039584159851074, "logits/real": 4.363356113433838, "logps/generated": -967.5791015625, "logps/real": -234.0696258544922, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -6.919652462005615, "rewards/margins": 10.246850967407227, "rewards/real": 3.3271987438201904, "step": 2200 }, { "epoch": 1.41, "learning_rate": 1.6281549946676145e-07, "logits/generated": 5.848420143127441, "logits/real": 4.181252479553223, "logps/generated": -879.5675659179688, "logps/real": -204.01536560058594, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -6.058112144470215, "rewards/margins": 9.286849975585938, "rewards/real": 3.2287373542785645, "step": 2210 }, { "epoch": 1.42, "learning_rate": 1.6103803768218982e-07, "logits/generated": 6.0381035804748535, "logits/real": 4.147576332092285, "logps/generated": -888.4138793945312, "logps/real": -218.30050659179688, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -6.2195258140563965, "rewards/margins": 9.568934440612793, "rewards/real": 3.3494086265563965, "step": 2220 }, { "epoch": 1.43, "learning_rate": 1.5926057589761818e-07, "logits/generated": 6.021380424499512, "logits/real": 4.303619384765625, "logps/generated": -751.9827880859375, "logps/real": -225.69869995117188, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -5.188918113708496, "rewards/margins": 8.795283317565918, "rewards/real": 3.606365203857422, "step": 2230 }, { "epoch": 1.43, "learning_rate": 1.5748311411304658e-07, "logits/generated": 5.8918375968933105, "logits/real": 4.548980712890625, "logps/generated": -925.9884033203125, "logps/real": -212.60781860351562, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -6.482697486877441, "rewards/margins": 9.820296287536621, "rewards/real": 3.337599992752075, "step": 2240 }, { "epoch": 1.44, "learning_rate": 1.5570565232847494e-07, "logits/generated": 5.850405216217041, "logits/real": 4.177556037902832, "logps/generated": -941.3117065429688, "logps/real": -207.508544921875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -6.867137908935547, "rewards/margins": 10.460260391235352, "rewards/real": 3.5931217670440674, "step": 2250 }, { "epoch": 1.45, "learning_rate": 1.539281905439033e-07, "logits/generated": 6.011968612670898, "logits/real": 4.186308860778809, "logps/generated": -850.3424072265625, "logps/real": -213.4459991455078, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/generated": -6.205097198486328, "rewards/margins": 9.331758499145508, "rewards/real": 3.1266608238220215, "step": 2260 }, { "epoch": 1.45, "learning_rate": 1.5215072875933165e-07, "logits/generated": 5.9976019859313965, "logits/real": 4.6466498374938965, "logps/generated": -959.1027221679688, "logps/real": -230.4766845703125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -6.896917819976807, "rewards/margins": 10.29023551940918, "rewards/real": 3.3933181762695312, "step": 2270 }, { "epoch": 1.46, "learning_rate": 1.5037326697476004e-07, "logits/generated": 5.940140247344971, "logits/real": 3.9636178016662598, "logps/generated": -811.2144775390625, "logps/real": -191.14474487304688, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -5.637009620666504, "rewards/margins": 9.112122535705566, "rewards/real": 3.4751129150390625, "step": 2280 }, { "epoch": 1.47, "learning_rate": 1.485958051901884e-07, "logits/generated": 6.092099189758301, "logits/real": 4.396188259124756, "logps/generated": -1047.5501708984375, "logps/real": -235.4965362548828, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/generated": -7.702287197113037, "rewards/margins": 11.126907348632812, "rewards/real": 3.42461895942688, "step": 2290 }, { "epoch": 1.47, "learning_rate": 1.4681834340561677e-07, "logits/generated": 6.009620666503906, "logits/real": 4.459410667419434, "logps/generated": -1028.2261962890625, "logps/real": -224.2885284423828, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -7.595302581787109, "rewards/margins": 11.32234001159668, "rewards/real": 3.7270379066467285, "step": 2300 }, { "epoch": 1.48, "learning_rate": 1.4504088162104514e-07, "logits/generated": 6.041359901428223, "logits/real": 4.407500267028809, "logps/generated": -1056.5250244140625, "logps/real": -230.5962677001953, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/generated": -8.136027336120605, "rewards/margins": 11.669519424438477, "rewards/real": 3.533491611480713, "step": 2310 }, { "epoch": 1.48, "learning_rate": 1.432634198364735e-07, "logits/generated": 6.096525192260742, "logits/real": 4.020089149475098, "logps/generated": -940.7005615234375, "logps/real": -217.18240356445312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -6.886039733886719, "rewards/margins": 10.523492813110352, "rewards/real": 3.637453079223633, "step": 2320 }, { "epoch": 1.49, "learning_rate": 1.414859580519019e-07, "logits/generated": 5.8838653564453125, "logits/real": 4.09322452545166, "logps/generated": -871.5797729492188, "logps/real": -189.5806427001953, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/generated": -5.9241437911987305, "rewards/margins": 9.4077730178833, "rewards/real": 3.483628511428833, "step": 2330 }, { "epoch": 1.5, "learning_rate": 1.3970849626733024e-07, "logits/generated": 6.006686687469482, "logits/real": 4.329590797424316, "logps/generated": -1036.4449462890625, "logps/real": -228.02566528320312, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -7.656490325927734, "rewards/margins": 10.992205619812012, "rewards/real": 3.3357155323028564, "step": 2340 }, { "epoch": 1.5, "learning_rate": 1.379310344827586e-07, "logits/generated": 6.243993282318115, "logits/real": 4.345526695251465, "logps/generated": -972.0113525390625, "logps/real": -213.86898803710938, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/generated": -7.251333713531494, "rewards/margins": 10.62246322631836, "rewards/real": 3.371129274368286, "step": 2350 }, { "epoch": 1.51, "learning_rate": 1.3615357269818697e-07, "logits/generated": 6.056420803070068, "logits/real": 4.507519245147705, "logps/generated": -959.6647338867188, "logps/real": -228.41311645507812, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/generated": -7.193532466888428, "rewards/margins": 10.772246360778809, "rewards/real": 3.578713893890381, "step": 2360 }, { "epoch": 1.52, "learning_rate": 1.3437611091361536e-07, "logits/generated": 5.848100662231445, "logits/real": 4.477749824523926, "logps/generated": -885.1476440429688, "logps/real": -217.71826171875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -6.187966823577881, "rewards/margins": 9.537440299987793, "rewards/real": 3.349473476409912, "step": 2370 }, { "epoch": 1.52, "learning_rate": 1.3259864912904373e-07, "logits/generated": 6.177857875823975, "logits/real": 3.999253511428833, "logps/generated": -849.5914916992188, "logps/real": -186.1688690185547, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/generated": -5.99931526184082, "rewards/margins": 9.616048812866211, "rewards/real": 3.6167335510253906, "step": 2380 }, { "epoch": 1.53, "learning_rate": 1.308211873444721e-07, "logits/generated": 5.982209205627441, "logits/real": 4.022818565368652, "logps/generated": -943.8885498046875, "logps/real": -191.94265747070312, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/generated": -7.117369174957275, "rewards/margins": 10.699490547180176, "rewards/real": 3.5821213722229004, "step": 2390 }, { "epoch": 1.54, "learning_rate": 1.2904372555990046e-07, "logits/generated": 6.1165690422058105, "logits/real": 4.515404224395752, "logps/generated": -1075.3916015625, "logps/real": -223.2095489501953, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/generated": -7.828172206878662, "rewards/margins": 11.10367202758789, "rewards/real": 3.2754998207092285, "step": 2400 }, { "epoch": 1.54, "learning_rate": 1.2726626377532883e-07, "logits/generated": 5.962019920349121, "logits/real": 4.166319847106934, "logps/generated": -1018.0177001953125, "logps/real": -193.1107177734375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/generated": -7.824549198150635, "rewards/margins": 11.278244972229004, "rewards/real": 3.4536960124969482, "step": 2410 }, { "epoch": 1.55, "learning_rate": 1.254888019907572e-07, "logits/generated": 5.941873073577881, "logits/real": 4.457114219665527, "logps/generated": -839.1423950195312, "logps/real": -225.7877960205078, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/generated": -6.322813987731934, "rewards/margins": 9.847611427307129, "rewards/real": 3.5247981548309326, "step": 2420 }, { "epoch": 1.55, "learning_rate": 1.2371134020618556e-07, "logits/generated": 6.1170220375061035, "logits/real": 4.145179748535156, "logps/generated": -971.7708740234375, "logps/real": -213.4941864013672, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/generated": -7.316300392150879, "rewards/margins": 10.892778396606445, "rewards/real": 3.5764777660369873, "step": 2430 }, { "epoch": 1.56, "learning_rate": 1.2193387842161392e-07, "logits/generated": 5.986542701721191, "logits/real": 4.025667190551758, "logps/generated": -830.68359375, "logps/real": -206.99441528320312, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/generated": -5.811788082122803, "rewards/margins": 9.347892761230469, "rewards/real": 3.5361053943634033, "step": 2440 }, { "epoch": 1.57, "learning_rate": 1.2015641663704232e-07, "logits/generated": 5.972803592681885, "logits/real": 4.296057224273682, "logps/generated": -1000.8699340820312, "logps/real": -219.41787719726562, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -7.70371150970459, "rewards/margins": 11.355711936950684, "rewards/real": 3.6520004272460938, "step": 2450 }, { "epoch": 1.57, "learning_rate": 1.1837895485247067e-07, "logits/generated": 6.02608585357666, "logits/real": 4.351847171783447, "logps/generated": -974.85693359375, "logps/real": -215.2225341796875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -7.276649475097656, "rewards/margins": 10.775941848754883, "rewards/real": 3.4992916584014893, "step": 2460 }, { "epoch": 1.58, "learning_rate": 1.1660149306789902e-07, "logits/generated": 5.748597145080566, "logits/real": 4.363656997680664, "logps/generated": -966.9947509765625, "logps/real": -231.53848266601562, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -7.087010860443115, "rewards/margins": 10.6896390914917, "rewards/real": 3.6026291847229004, "step": 2470 }, { "epoch": 1.59, "learning_rate": 1.148240312833274e-07, "logits/generated": 6.186932563781738, "logits/real": 4.215136528015137, "logps/generated": -960.4109497070312, "logps/real": -200.14752197265625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -7.6329665184021, "rewards/margins": 11.175156593322754, "rewards/real": 3.5421881675720215, "step": 2480 }, { "epoch": 1.59, "learning_rate": 1.1304656949875577e-07, "logits/generated": 5.933046340942383, "logits/real": 4.051640510559082, "logps/generated": -947.9317626953125, "logps/real": -187.852783203125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -6.863749027252197, "rewards/margins": 10.43601131439209, "rewards/real": 3.57226300239563, "step": 2490 }, { "epoch": 1.6, "learning_rate": 1.1126910771418415e-07, "logits/generated": 6.067012786865234, "logits/real": 4.100882530212402, "logps/generated": -922.0003662109375, "logps/real": -218.1196746826172, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/generated": -6.713512420654297, "rewards/margins": 10.470623016357422, "rewards/real": 3.7571117877960205, "step": 2500 }, { "epoch": 1.61, "learning_rate": 1.094916459296125e-07, "logits/generated": 5.888331413269043, "logits/real": 4.533158302307129, "logps/generated": -941.0359497070312, "logps/real": -235.983154296875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -7.0601677894592285, "rewards/margins": 10.506025314331055, "rewards/real": 3.445856809616089, "step": 2510 }, { "epoch": 1.61, "learning_rate": 1.0771418414504088e-07, "logits/generated": 6.122256278991699, "logits/real": 4.691792964935303, "logps/generated": -1032.4281005859375, "logps/real": -242.85391235351562, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -7.817018032073975, "rewards/margins": 11.140421867370605, "rewards/real": 3.3234035968780518, "step": 2520 }, { "epoch": 1.62, "learning_rate": 1.0593672236046925e-07, "logits/generated": 6.091986179351807, "logits/real": 4.298600196838379, "logps/generated": -979.9375, "logps/real": -207.18222045898438, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -7.318638801574707, "rewards/margins": 10.859162330627441, "rewards/real": 3.540522813796997, "step": 2530 }, { "epoch": 1.63, "learning_rate": 1.0415926057589762e-07, "logits/generated": 5.999621868133545, "logits/real": 4.219559192657471, "logps/generated": -949.9793090820312, "logps/real": -210.6415557861328, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -7.1169939041137695, "rewards/margins": 10.84516716003418, "rewards/real": 3.7281742095947266, "step": 2540 }, { "epoch": 1.63, "learning_rate": 1.0238179879132598e-07, "logits/generated": 5.945744514465332, "logits/real": 4.196887969970703, "logps/generated": -999.5768432617188, "logps/real": -197.3428192138672, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/generated": -7.847522735595703, "rewards/margins": 11.279948234558105, "rewards/real": 3.4324257373809814, "step": 2550 }, { "epoch": 1.64, "learning_rate": 1.0060433700675434e-07, "logits/generated": 6.025457859039307, "logits/real": 4.405503273010254, "logps/generated": -911.369140625, "logps/real": -226.285400390625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -6.9073333740234375, "rewards/margins": 10.433539390563965, "rewards/real": 3.526205539703369, "step": 2560 }, { "epoch": 1.64, "learning_rate": 9.882687522218272e-08, "logits/generated": 5.8123369216918945, "logits/real": 4.084736347198486, "logps/generated": -892.3932495117188, "logps/real": -207.2658233642578, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -6.392351150512695, "rewards/margins": 10.307291030883789, "rewards/real": 3.914940595626831, "step": 2570 }, { "epoch": 1.65, "learning_rate": 9.704941343761109e-08, "logits/generated": 6.000952243804932, "logits/real": 4.174363136291504, "logps/generated": -1008.0392456054688, "logps/real": -216.5176239013672, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -7.5495429039001465, "rewards/margins": 11.444334030151367, "rewards/real": 3.8947906494140625, "step": 2580 }, { "epoch": 1.66, "learning_rate": 9.527195165303946e-08, "logits/generated": 5.935231685638428, "logits/real": 4.574646949768066, "logps/generated": -1026.3360595703125, "logps/real": -224.52346801757812, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -7.995066165924072, "rewards/margins": 11.642257690429688, "rewards/real": 3.6471920013427734, "step": 2590 }, { "epoch": 1.66, "learning_rate": 9.349448986846782e-08, "logits/generated": 6.165256500244141, "logits/real": 4.483449459075928, "logps/generated": -1073.19873046875, "logps/real": -215.8119354248047, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -8.13926887512207, "rewards/margins": 11.734591484069824, "rewards/real": 3.595323085784912, "step": 2600 }, { "epoch": 1.67, "learning_rate": 9.17170280838962e-08, "logits/generated": 6.178045272827148, "logits/real": 4.426362991333008, "logps/generated": -981.1510009765625, "logps/real": -221.8535614013672, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -7.534024238586426, "rewards/margins": 11.1114501953125, "rewards/real": 3.5774269104003906, "step": 2610 }, { "epoch": 1.68, "learning_rate": 8.993956629932455e-08, "logits/generated": 6.002736568450928, "logits/real": 4.408158302307129, "logps/generated": -1037.64697265625, "logps/real": -206.6656036376953, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -7.90814733505249, "rewards/margins": 11.406362533569336, "rewards/real": 3.498215913772583, "step": 2620 }, { "epoch": 1.68, "learning_rate": 8.816210451475293e-08, "logits/generated": 5.922328948974609, "logits/real": 4.30651330947876, "logps/generated": -883.3487548828125, "logps/real": -226.1007843017578, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/generated": -6.406916618347168, "rewards/margins": 10.177984237670898, "rewards/real": 3.7710673809051514, "step": 2630 }, { "epoch": 1.69, "learning_rate": 8.63846427301813e-08, "logits/generated": 5.988922595977783, "logits/real": 4.155168533325195, "logps/generated": -918.8670654296875, "logps/real": -203.3168487548828, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -7.007664680480957, "rewards/margins": 10.59145450592041, "rewards/real": 3.5837910175323486, "step": 2640 }, { "epoch": 1.7, "learning_rate": 8.460718094560966e-08, "logits/generated": 5.974033832550049, "logits/real": 4.323070526123047, "logps/generated": -953.7561645507812, "logps/real": -215.21804809570312, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -7.087655544281006, "rewards/margins": 10.743782043457031, "rewards/real": 3.656125545501709, "step": 2650 }, { "epoch": 1.7, "learning_rate": 8.282971916103803e-08, "logits/generated": 5.881442546844482, "logits/real": 4.603425025939941, "logps/generated": -941.333984375, "logps/real": -229.0399932861328, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -6.910244941711426, "rewards/margins": 10.281949996948242, "rewards/real": 3.3717052936553955, "step": 2660 }, { "epoch": 1.71, "learning_rate": 8.10522573764664e-08, "logits/generated": 6.160346031188965, "logits/real": 4.405267238616943, "logps/generated": -1009.9052734375, "logps/real": -224.79244995117188, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -7.713784694671631, "rewards/margins": 11.180540084838867, "rewards/real": 3.4667556285858154, "step": 2670 }, { "epoch": 1.71, "learning_rate": 7.927479559189478e-08, "logits/generated": 6.0979323387146, "logits/real": 4.142810821533203, "logps/generated": -919.0299072265625, "logps/real": -200.34954833984375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -6.830052375793457, "rewards/margins": 10.462355613708496, "rewards/real": 3.6323037147521973, "step": 2680 }, { "epoch": 1.72, "learning_rate": 7.749733380732314e-08, "logits/generated": 5.893312454223633, "logits/real": 4.082333564758301, "logps/generated": -922.8045043945312, "logps/real": -211.110107421875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -7.06411600112915, "rewards/margins": 11.093643188476562, "rewards/real": 4.029527187347412, "step": 2690 }, { "epoch": 1.73, "learning_rate": 7.571987202275151e-08, "logits/generated": 6.046978950500488, "logits/real": 4.000611305236816, "logps/generated": -851.3162231445312, "logps/real": -213.77294921875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -6.470201015472412, "rewards/margins": 10.399231910705566, "rewards/real": 3.929030656814575, "step": 2700 }, { "epoch": 1.73, "learning_rate": 7.394241023817987e-08, "logits/generated": 6.083460807800293, "logits/real": 4.147582054138184, "logps/generated": -869.0367431640625, "logps/real": -222.88449096679688, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -6.679482936859131, "rewards/margins": 10.497906684875488, "rewards/real": 3.8184237480163574, "step": 2710 }, { "epoch": 1.74, "learning_rate": 7.216494845360824e-08, "logits/generated": 5.894058704376221, "logits/real": 4.494288921356201, "logps/generated": -1004.9658203125, "logps/real": -224.24234008789062, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -7.787278175354004, "rewards/margins": 11.781339645385742, "rewards/real": 3.994061231613159, "step": 2720 }, { "epoch": 1.75, "learning_rate": 7.038748666903662e-08, "logits/generated": 6.083498954772949, "logits/real": 4.627425670623779, "logps/generated": -1075.3470458984375, "logps/real": -226.70223999023438, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -8.463549613952637, "rewards/margins": 12.042739868164062, "rewards/real": 3.579190731048584, "step": 2730 }, { "epoch": 1.75, "learning_rate": 6.861002488446497e-08, "logits/generated": 5.915851593017578, "logits/real": 3.9263007640838623, "logps/generated": -1111.206298828125, "logps/real": -196.06072998046875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -8.594639778137207, "rewards/margins": 12.281318664550781, "rewards/real": 3.6866791248321533, "step": 2740 }, { "epoch": 1.76, "learning_rate": 6.683256309989335e-08, "logits/generated": 5.970302581787109, "logits/real": 4.437585830688477, "logps/generated": -840.1096801757812, "logps/real": -226.9420928955078, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -6.3191237449646, "rewards/margins": 10.023898124694824, "rewards/real": 3.704774856567383, "step": 2750 }, { "epoch": 1.77, "learning_rate": 6.505510131532172e-08, "logits/generated": 6.018714904785156, "logits/real": 4.127536296844482, "logps/generated": -929.6011962890625, "logps/real": -204.32046508789062, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -7.2565131187438965, "rewards/margins": 10.836641311645508, "rewards/real": 3.5801289081573486, "step": 2760 }, { "epoch": 1.77, "learning_rate": 6.32776395307501e-08, "logits/generated": 5.825669765472412, "logits/real": 4.458681106567383, "logps/generated": -974.4544067382812, "logps/real": -212.163818359375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -7.371243953704834, "rewards/margins": 10.979463577270508, "rewards/real": 3.6082186698913574, "step": 2770 }, { "epoch": 1.78, "learning_rate": 6.150017774617845e-08, "logits/generated": 5.914022922515869, "logits/real": 4.451127529144287, "logps/generated": -971.3380126953125, "logps/real": -248.98635864257812, "loss": 0.0053, "rewards/accuracies": 0.987500011920929, "rewards/generated": -7.341151237487793, "rewards/margins": 11.175028800964355, "rewards/real": 3.833878755569458, "step": 2780 }, { "epoch": 1.79, "learning_rate": 5.972271596160682e-08, "logits/generated": 6.177198886871338, "logits/real": 4.612268924713135, "logps/generated": -955.9030151367188, "logps/real": -221.28634643554688, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -7.392415523529053, "rewards/margins": 10.885235786437988, "rewards/real": 3.4928202629089355, "step": 2790 }, { "epoch": 1.79, "learning_rate": 5.794525417703519e-08, "logits/generated": 5.88433837890625, "logits/real": 4.339784145355225, "logps/generated": -990.3663330078125, "logps/real": -223.5160369873047, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -7.730770111083984, "rewards/margins": 11.330351829528809, "rewards/real": 3.599581480026245, "step": 2800 }, { "epoch": 1.8, "learning_rate": 5.616779239246356e-08, "logits/generated": 6.089413642883301, "logits/real": 4.758141994476318, "logps/generated": -946.3424072265625, "logps/real": -223.90957641601562, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -7.317401885986328, "rewards/margins": 11.000505447387695, "rewards/real": 3.6831040382385254, "step": 2810 }, { "epoch": 1.8, "learning_rate": 5.439033060789193e-08, "logits/generated": 5.951352119445801, "logits/real": 4.1157331466674805, "logps/generated": -876.4993896484375, "logps/real": -212.88046264648438, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -6.624871730804443, "rewards/margins": 10.744012832641602, "rewards/real": 4.119140625, "step": 2820 }, { "epoch": 1.81, "learning_rate": 5.26128688233203e-08, "logits/generated": 5.940617561340332, "logits/real": 3.8113338947296143, "logps/generated": -888.90087890625, "logps/real": -203.17901611328125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -6.581952095031738, "rewards/margins": 10.407301902770996, "rewards/real": 3.825350522994995, "step": 2830 }, { "epoch": 1.82, "learning_rate": 5.0835407038748666e-08, "logits/generated": 5.964734077453613, "logits/real": 4.341010093688965, "logps/generated": -881.1970825195312, "logps/real": -232.49813842773438, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -6.593221187591553, "rewards/margins": 10.22175407409668, "rewards/real": 3.628532886505127, "step": 2840 }, { "epoch": 1.82, "learning_rate": 4.905794525417703e-08, "logits/generated": 5.8404388427734375, "logits/real": 4.435536861419678, "logps/generated": -969.4814453125, "logps/real": -215.01858520507812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -7.376192569732666, "rewards/margins": 11.145563125610352, "rewards/real": 3.769369602203369, "step": 2850 }, { "epoch": 1.83, "learning_rate": 4.72804834696054e-08, "logits/generated": 5.9956512451171875, "logits/real": 4.474661827087402, "logps/generated": -909.1708984375, "logps/real": -216.86392211914062, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -7.260798454284668, "rewards/margins": 10.6810302734375, "rewards/real": 3.4202327728271484, "step": 2860 }, { "epoch": 1.84, "learning_rate": 4.550302168503377e-08, "logits/generated": 6.177473545074463, "logits/real": 4.074435234069824, "logps/generated": -931.0374145507812, "logps/real": -199.68115234375, "loss": 0.0081, "rewards/accuracies": 0.987500011920929, "rewards/generated": -7.1744065284729, "rewards/margins": 11.10449504852295, "rewards/real": 3.9300880432128906, "step": 2870 }, { "epoch": 1.84, "learning_rate": 4.372555990046214e-08, "logits/generated": 6.158198356628418, "logits/real": 4.45182466506958, "logps/generated": -1053.023681640625, "logps/real": -227.9947967529297, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/generated": -8.230512619018555, "rewards/margins": 11.803583145141602, "rewards/real": 3.5730698108673096, "step": 2880 }, { "epoch": 1.85, "learning_rate": 4.194809811589051e-08, "logits/generated": 5.763731956481934, "logits/real": 4.512047290802002, "logps/generated": -877.0777587890625, "logps/real": -239.983642578125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -6.7064409255981445, "rewards/margins": 10.49316120147705, "rewards/real": 3.7867202758789062, "step": 2890 }, { "epoch": 1.86, "learning_rate": 4.0170636331318876e-08, "logits/generated": 6.0582170486450195, "logits/real": 4.427424430847168, "logps/generated": -1025.2308349609375, "logps/real": -215.9585418701172, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -8.088347434997559, "rewards/margins": 11.211756706237793, "rewards/real": 3.1234092712402344, "step": 2900 }, { "epoch": 1.86, "learning_rate": 3.839317454674725e-08, "logits/generated": 6.159816741943359, "logits/real": 4.5531134605407715, "logps/generated": -914.5631713867188, "logps/real": -223.43667602539062, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -7.19748067855835, "rewards/margins": 11.130581855773926, "rewards/real": 3.933100938796997, "step": 2910 }, { "epoch": 1.87, "learning_rate": 3.6615712762175614e-08, "logits/generated": 6.062338829040527, "logits/real": 3.9617767333984375, "logps/generated": -1065.44189453125, "logps/real": -200.53231811523438, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -8.692277908325195, "rewards/margins": 12.533926010131836, "rewards/real": 3.841648578643799, "step": 2920 }, { "epoch": 1.87, "learning_rate": 3.4838250977603974e-08, "logits/generated": 5.966034889221191, "logits/real": 4.169475555419922, "logps/generated": -900.4786376953125, "logps/real": -219.13211059570312, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -6.983071804046631, "rewards/margins": 10.687957763671875, "rewards/real": 3.7048866748809814, "step": 2930 }, { "epoch": 1.88, "learning_rate": 3.3060789193032346e-08, "logits/generated": 6.001954555511475, "logits/real": 4.039769649505615, "logps/generated": -948.9764404296875, "logps/real": -213.39022827148438, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -7.106842041015625, "rewards/margins": 10.968811988830566, "rewards/real": 3.8619697093963623, "step": 2940 }, { "epoch": 1.89, "learning_rate": 3.128332740846071e-08, "logits/generated": 6.0790205001831055, "logits/real": 4.268954753875732, "logps/generated": -969.9235229492188, "logps/real": -210.45849609375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -7.430506706237793, "rewards/margins": 11.069308280944824, "rewards/real": 3.638800859451294, "step": 2950 }, { "epoch": 1.89, "learning_rate": 2.9505865623889085e-08, "logits/generated": 6.002804756164551, "logits/real": 4.1793718338012695, "logps/generated": -1044.44287109375, "logps/real": -208.38851928710938, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -8.205821990966797, "rewards/margins": 11.79900074005127, "rewards/real": 3.593177318572998, "step": 2960 }, { "epoch": 1.9, "learning_rate": 2.7728403839317454e-08, "logits/generated": 6.175272464752197, "logits/real": 4.347874641418457, "logps/generated": -908.1828002929688, "logps/real": -213.10421752929688, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -7.079012870788574, "rewards/margins": 10.570651054382324, "rewards/real": 3.4916369915008545, "step": 2970 }, { "epoch": 1.91, "learning_rate": 2.5950942054745824e-08, "logits/generated": 5.989743232727051, "logits/real": 4.590392112731934, "logps/generated": -881.7191162109375, "logps/real": -221.69577026367188, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -6.744132995605469, "rewards/margins": 10.367905616760254, "rewards/real": 3.6237728595733643, "step": 2980 }, { "epoch": 1.91, "learning_rate": 2.4173480270174193e-08, "logits/generated": 5.84287166595459, "logits/real": 4.579287052154541, "logps/generated": -880.3978271484375, "logps/real": -232.52279663085938, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -6.6468071937561035, "rewards/margins": 10.200614929199219, "rewards/real": 3.553807020187378, "step": 2990 }, { "epoch": 1.92, "learning_rate": 2.2396018485602556e-08, "logits/generated": 6.122179985046387, "logits/real": 4.397927761077881, "logps/generated": -998.4055786132812, "logps/real": -210.6758575439453, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -7.7901740074157715, "rewards/margins": 11.620798110961914, "rewards/real": 3.830622434616089, "step": 3000 }, { "epoch": 1.93, "learning_rate": 2.0618556701030925e-08, "logits/generated": 5.882719993591309, "logits/real": 4.193206787109375, "logps/generated": -996.2776489257812, "logps/real": -182.93798828125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -8.111333847045898, "rewards/margins": 11.724261283874512, "rewards/real": 3.612928867340088, "step": 3010 }, { "epoch": 1.93, "learning_rate": 1.8841094916459295e-08, "logits/generated": 6.127943992614746, "logits/real": 4.209385395050049, "logps/generated": -1060.7899169921875, "logps/real": -205.1070098876953, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -8.274694442749023, "rewards/margins": 11.977832794189453, "rewards/real": 3.703138828277588, "step": 3020 }, { "epoch": 1.94, "learning_rate": 1.7063633131887664e-08, "logits/generated": 5.8941450119018555, "logits/real": 4.013119697570801, "logps/generated": -973.5771484375, "logps/real": -188.08303833007812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -7.611605644226074, "rewards/margins": 11.475560188293457, "rewards/real": 3.8639538288116455, "step": 3030 }, { "epoch": 1.94, "learning_rate": 1.5286171347316033e-08, "logits/generated": 6.093392372131348, "logits/real": 4.220031261444092, "logps/generated": -948.8116455078125, "logps/real": -216.5891876220703, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -7.310966491699219, "rewards/margins": 10.79468059539795, "rewards/real": 3.4837143421173096, "step": 3040 }, { "epoch": 1.95, "learning_rate": 1.35087095627444e-08, "logits/generated": 6.005775451660156, "logits/real": 4.406448841094971, "logps/generated": -944.8621826171875, "logps/real": -192.6377716064453, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -7.452921390533447, "rewards/margins": 11.0402193069458, "rewards/real": 3.587296962738037, "step": 3050 }, { "epoch": 1.96, "learning_rate": 1.1731247778172769e-08, "logits/generated": 6.163327693939209, "logits/real": 4.344714641571045, "logps/generated": -917.35498046875, "logps/real": -198.57122802734375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -7.304339408874512, "rewards/margins": 10.936470985412598, "rewards/real": 3.632131576538086, "step": 3060 }, { "epoch": 1.96, "learning_rate": 9.953785993601137e-09, "logits/generated": 6.108910083770752, "logits/real": 4.15443754196167, "logps/generated": -962.0145263671875, "logps/real": -203.24107360839844, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -7.498394012451172, "rewards/margins": 10.694067001342773, "rewards/real": 3.1956734657287598, "step": 3070 }, { "epoch": 1.97, "learning_rate": 8.176324209029506e-09, "logits/generated": 6.031611919403076, "logits/real": 3.9185752868652344, "logps/generated": -981.349609375, "logps/real": -206.8091278076172, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -7.681130409240723, "rewards/margins": 11.426141738891602, "rewards/real": 3.745011806488037, "step": 3080 }, { "epoch": 1.98, "learning_rate": 6.398862424457874e-09, "logits/generated": 5.970823764801025, "logits/real": 4.175595283508301, "logps/generated": -943.4645385742188, "logps/real": -184.49136352539062, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -6.951342582702637, "rewards/margins": 10.521745681762695, "rewards/real": 3.5704009532928467, "step": 3090 }, { "epoch": 1.98, "learning_rate": 4.621400639886242e-09, "logits/generated": 6.0879998207092285, "logits/real": 4.444035530090332, "logps/generated": -1086.6719970703125, "logps/real": -244.66543579101562, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -8.595160484313965, "rewards/margins": 12.099470138549805, "rewards/real": 3.5043106079101562, "step": 3100 }, { "epoch": 1.99, "learning_rate": 2.8439388553146107e-09, "logits/generated": 6.041680335998535, "logits/real": 4.341583728790283, "logps/generated": -994.8681640625, "logps/real": -205.96835327148438, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/generated": -7.797635078430176, "rewards/margins": 11.64828872680664, "rewards/real": 3.850653886795044, "step": 3110 }, { "epoch": 2.0, "learning_rate": 1.066477070742979e-09, "logits/generated": 6.018385887145996, "logits/real": 3.9701621532440186, "logps/generated": -901.3045654296875, "logps/real": -204.19090270996094, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -6.618832588195801, "rewards/margins": 10.127131462097168, "rewards/real": 3.5082993507385254, "step": 3120 }, { "epoch": 2.0, "step": 3126, "total_flos": 0.0, "train_loss": 0.12108022763431954, "train_runtime": 40273.1203, "train_samples_per_second": 2.483, "train_steps_per_second": 0.078 } ], "logging_steps": 10, "max_steps": 3126, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }