{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9902995720399428, "eval_steps": 100, "global_step": 218, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.2727272727272729e-07, "logits/chosen": -2.779836893081665, "logits/rejected": -2.772892951965332, "logps/chosen": -67.39044952392578, "logps/rejected": -65.7892074584961, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.09, "learning_rate": 2.2727272727272728e-06, "logits/chosen": -2.764805793762207, "logits/rejected": -2.7586262226104736, "logps/chosen": -63.05746841430664, "logps/rejected": -64.96013641357422, "loss": 0.6864, "rewards/accuracies": 0.6006944179534912, "rewards/chosen": 0.009925955906510353, "rewards/margins": 0.013827367685735226, "rewards/rejected": -0.0039014113135635853, "step": 10 }, { "epoch": 0.18, "learning_rate": 4.5454545454545455e-06, "logits/chosen": -2.7655444145202637, "logits/rejected": -2.7531120777130127, "logps/chosen": -60.976318359375, "logps/rejected": -64.35781860351562, "loss": 0.5979, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.06535812467336655, "rewards/margins": 0.22067773342132568, "rewards/rejected": -0.15531960129737854, "step": 20 }, { "epoch": 0.27, "learning_rate": 4.795918367346939e-06, "logits/chosen": -2.7331104278564453, "logits/rejected": -2.722367763519287, "logps/chosen": -63.54418182373047, "logps/rejected": -77.56448364257812, "loss": 0.3997, "rewards/accuracies": 0.878125011920929, "rewards/chosen": -0.11322204768657684, "rewards/margins": 0.9385285377502441, "rewards/rejected": -1.0517505407333374, "step": 30 }, { "epoch": 0.37, "learning_rate": 4.540816326530613e-06, "logits/chosen": -2.685359477996826, "logits/rejected": -2.6721653938293457, "logps/chosen": -67.8324966430664, "logps/rejected": -89.94172668457031, "loss": 0.2768, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.5856838226318359, "rewards/margins": 1.7414783239364624, "rewards/rejected": -2.327162265777588, "step": 40 }, { "epoch": 0.46, "learning_rate": 4.2857142857142855e-06, "logits/chosen": -2.660297393798828, "logits/rejected": -2.6442055702209473, "logps/chosen": -72.59104919433594, "logps/rejected": -93.20745849609375, "loss": 0.2475, "rewards/accuracies": 0.90625, "rewards/chosen": -0.7966881394386292, "rewards/margins": 2.058647632598877, "rewards/rejected": -2.8553357124328613, "step": 50 }, { "epoch": 0.55, "learning_rate": 4.03061224489796e-06, "logits/chosen": -2.643165111541748, "logits/rejected": -2.63153076171875, "logps/chosen": -72.09125518798828, "logps/rejected": -99.36156463623047, "loss": 0.2392, "rewards/accuracies": 0.903124988079071, "rewards/chosen": -1.083268404006958, "rewards/margins": 2.3197226524353027, "rewards/rejected": -3.4029908180236816, "step": 60 }, { "epoch": 0.64, "learning_rate": 3.7755102040816327e-06, "logits/chosen": -2.638352632522583, "logits/rejected": -2.6236445903778076, "logps/chosen": -77.40001678466797, "logps/rejected": -107.3912124633789, "loss": 0.2044, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -1.5230330228805542, "rewards/margins": 2.718259572982788, "rewards/rejected": -4.241292476654053, "step": 70 }, { "epoch": 0.73, "learning_rate": 3.5204081632653062e-06, "logits/chosen": -2.6177425384521484, "logits/rejected": -2.597738265991211, "logps/chosen": -75.93782043457031, "logps/rejected": -108.6824951171875, "loss": 0.1696, "rewards/accuracies": 0.9375, "rewards/chosen": -1.198141098022461, "rewards/margins": 3.195936918258667, "rewards/rejected": -4.394078254699707, "step": 80 }, { "epoch": 0.82, "learning_rate": 3.2653061224489794e-06, "logits/chosen": -2.6300315856933594, "logits/rejected": -2.6183547973632812, "logps/chosen": -80.39871978759766, "logps/rejected": -113.6379165649414, "loss": 0.1925, "rewards/accuracies": 0.8968750238418579, "rewards/chosen": -1.5853779315948486, "rewards/margins": 3.1853203773498535, "rewards/rejected": -4.770698547363281, "step": 90 }, { "epoch": 0.91, "learning_rate": 3.0102040816326534e-06, "logits/chosen": -2.6064066886901855, "logits/rejected": -2.594722270965576, "logps/chosen": -81.89433288574219, "logps/rejected": -115.00162506103516, "loss": 0.1534, "rewards/accuracies": 0.9468749761581421, "rewards/chosen": -1.6648212671279907, "rewards/margins": 3.4960713386535645, "rewards/rejected": -5.160892486572266, "step": 100 }, { "epoch": 1.0, "eval_logits/chosen": -2.6083757877349854, "eval_logits/rejected": -2.5937814712524414, "eval_logps/chosen": -80.02838134765625, "eval_logps/rejected": -119.6004409790039, "eval_loss": 0.15436382591724396, "eval_rewards/accuracies": 0.939497709274292, "eval_rewards/chosen": -1.6908209323883057, "eval_rewards/margins": 3.7731716632843018, "eval_rewards/rejected": -5.463992595672607, "eval_runtime": 295.1725, "eval_samples_per_second": 2.968, "eval_steps_per_second": 2.968, "step": 109 }, { "epoch": 1.0, "learning_rate": 2.7551020408163266e-06, "logits/chosen": -2.6033756732940674, "logits/rejected": -2.582958221435547, "logps/chosen": -80.38322448730469, "logps/rejected": -116.6507797241211, "loss": 0.138, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.656935691833496, "rewards/margins": 3.698695659637451, "rewards/rejected": -5.3556318283081055, "step": 110 }, { "epoch": 1.1, "learning_rate": 2.5e-06, "logits/chosen": -2.604905843734741, "logits/rejected": -2.591404438018799, "logps/chosen": -80.86669921875, "logps/rejected": -119.6518783569336, "loss": 0.1395, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -1.803047776222229, "rewards/margins": 3.7810962200164795, "rewards/rejected": -5.58414363861084, "step": 120 }, { "epoch": 1.19, "learning_rate": 2.244897959183674e-06, "logits/chosen": -2.590282440185547, "logits/rejected": -2.576897144317627, "logps/chosen": -83.53189086914062, "logps/rejected": -131.67037963867188, "loss": 0.1004, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -1.9269969463348389, "rewards/margins": 4.388735294342041, "rewards/rejected": -6.315732002258301, "step": 130 }, { "epoch": 1.28, "learning_rate": 1.989795918367347e-06, "logits/chosen": -2.578322410583496, "logits/rejected": -2.5577735900878906, "logps/chosen": -85.14395141601562, "logps/rejected": -126.8256607055664, "loss": 0.1422, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2692408561706543, "rewards/margins": 4.1861348152160645, "rewards/rejected": -6.455375671386719, "step": 140 }, { "epoch": 1.37, "learning_rate": 1.7346938775510206e-06, "logits/chosen": -2.5812907218933105, "logits/rejected": -2.5663979053497314, "logps/chosen": -87.79288482666016, "logps/rejected": -131.3497314453125, "loss": 0.1284, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.4120209217071533, "rewards/margins": 4.293553352355957, "rewards/rejected": -6.705574035644531, "step": 150 }, { "epoch": 1.46, "learning_rate": 1.479591836734694e-06, "logits/chosen": -2.596059560775757, "logits/rejected": -2.58156156539917, "logps/chosen": -83.30199432373047, "logps/rejected": -132.67092895507812, "loss": 0.1066, "rewards/accuracies": 0.9593750238418579, "rewards/chosen": -2.065701484680176, "rewards/margins": 4.5842509269714355, "rewards/rejected": -6.6499528884887695, "step": 160 }, { "epoch": 1.55, "learning_rate": 1.2244897959183673e-06, "logits/chosen": -2.5813376903533936, "logits/rejected": -2.569676160812378, "logps/chosen": -82.94264221191406, "logps/rejected": -132.35789489746094, "loss": 0.0994, "rewards/accuracies": 0.96875, "rewards/chosen": -2.1285476684570312, "rewards/margins": 4.649580955505371, "rewards/rejected": -6.778128147125244, "step": 170 }, { "epoch": 1.64, "learning_rate": 9.69387755102041e-07, "logits/chosen": -2.575303077697754, "logits/rejected": -2.5673909187316895, "logps/chosen": -86.48652648925781, "logps/rejected": -134.35574340820312, "loss": 0.1224, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -2.278262138366699, "rewards/margins": 4.526266098022461, "rewards/rejected": -6.80452823638916, "step": 180 }, { "epoch": 1.73, "learning_rate": 7.142857142857143e-07, "logits/chosen": -2.5673184394836426, "logits/rejected": -2.5392918586730957, "logps/chosen": -85.58756256103516, "logps/rejected": -128.51121520996094, "loss": 0.1345, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -2.157008409500122, "rewards/margins": 4.34613037109375, "rewards/rejected": -6.503138542175293, "step": 190 }, { "epoch": 1.83, "learning_rate": 4.591836734693878e-07, "logits/chosen": -2.5774478912353516, "logits/rejected": -2.556039333343506, "logps/chosen": -84.05760955810547, "logps/rejected": -132.11903381347656, "loss": 0.1088, "rewards/accuracies": 0.9593750238418579, "rewards/chosen": -2.128478527069092, "rewards/margins": 4.659018039703369, "rewards/rejected": -6.787497043609619, "step": 200 }, { "epoch": 1.92, "learning_rate": 2.0408163265306121e-07, "logits/chosen": -2.563249111175537, "logits/rejected": -2.5445797443389893, "logps/chosen": -83.83250427246094, "logps/rejected": -133.15673828125, "loss": 0.0995, "rewards/accuracies": 0.965624988079071, "rewards/chosen": -2.065305709838867, "rewards/margins": 4.883781909942627, "rewards/rejected": -6.949087619781494, "step": 210 }, { "epoch": 1.99, "eval_logits/chosen": -2.5726864337921143, "eval_logits/rejected": -2.556718587875366, "eval_logps/chosen": -85.11767578125, "eval_logps/rejected": -133.85464477539062, "eval_loss": 0.12992651760578156, "eval_rewards/accuracies": 0.9520547986030579, "eval_rewards/chosen": -2.199751138687134, "eval_rewards/margins": 4.689663887023926, "eval_rewards/rejected": -6.889414310455322, "eval_runtime": 288.5007, "eval_samples_per_second": 3.036, "eval_steps_per_second": 3.036, "step": 218 }, { "epoch": 1.99, "step": 218, "total_flos": 0.0, "train_loss": 0.20939183043777396, "train_runtime": 5162.9911, "train_samples_per_second": 1.358, "train_steps_per_second": 0.042 } ], "logging_steps": 10, "max_steps": 218, "num_train_epochs": 2, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }