|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9902995720399428, |
|
"eval_steps": 100, |
|
"global_step": 218, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.2727272727272729e-07, |
|
"logits/chosen": -2.779836893081665, |
|
"logits/rejected": -2.772892951965332, |
|
"logps/chosen": -67.39044952392578, |
|
"logps/rejected": -65.7892074584961, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.2727272727272728e-06, |
|
"logits/chosen": -2.764805793762207, |
|
"logits/rejected": -2.7586262226104736, |
|
"logps/chosen": -63.05746841430664, |
|
"logps/rejected": -64.96013641357422, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.6006944179534912, |
|
"rewards/chosen": 0.009925955906510353, |
|
"rewards/margins": 0.013827367685735226, |
|
"rewards/rejected": -0.0039014113135635853, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"logits/chosen": -2.7655444145202637, |
|
"logits/rejected": -2.7531120777130127, |
|
"logps/chosen": -60.976318359375, |
|
"logps/rejected": -64.35781860351562, |
|
"loss": 0.5979, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.06535812467336655, |
|
"rewards/margins": 0.22067773342132568, |
|
"rewards/rejected": -0.15531960129737854, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.795918367346939e-06, |
|
"logits/chosen": -2.7331104278564453, |
|
"logits/rejected": -2.722367763519287, |
|
"logps/chosen": -63.54418182373047, |
|
"logps/rejected": -77.56448364257812, |
|
"loss": 0.3997, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -0.11322204768657684, |
|
"rewards/margins": 0.9385285377502441, |
|
"rewards/rejected": -1.0517505407333374, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.540816326530613e-06, |
|
"logits/chosen": -2.685359477996826, |
|
"logits/rejected": -2.6721653938293457, |
|
"logps/chosen": -67.8324966430664, |
|
"logps/rejected": -89.94172668457031, |
|
"loss": 0.2768, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.5856838226318359, |
|
"rewards/margins": 1.7414783239364624, |
|
"rewards/rejected": -2.327162265777588, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"logits/chosen": -2.660297393798828, |
|
"logits/rejected": -2.6442055702209473, |
|
"logps/chosen": -72.59104919433594, |
|
"logps/rejected": -93.20745849609375, |
|
"loss": 0.2475, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.7966881394386292, |
|
"rewards/margins": 2.058647632598877, |
|
"rewards/rejected": -2.8553357124328613, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.03061224489796e-06, |
|
"logits/chosen": -2.643165111541748, |
|
"logits/rejected": -2.63153076171875, |
|
"logps/chosen": -72.09125518798828, |
|
"logps/rejected": -99.36156463623047, |
|
"loss": 0.2392, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/chosen": -1.083268404006958, |
|
"rewards/margins": 2.3197226524353027, |
|
"rewards/rejected": -3.4029908180236816, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.7755102040816327e-06, |
|
"logits/chosen": -2.638352632522583, |
|
"logits/rejected": -2.6236445903778076, |
|
"logps/chosen": -77.40001678466797, |
|
"logps/rejected": -107.3912124633789, |
|
"loss": 0.2044, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -1.5230330228805542, |
|
"rewards/margins": 2.718259572982788, |
|
"rewards/rejected": -4.241292476654053, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.5204081632653062e-06, |
|
"logits/chosen": -2.6177425384521484, |
|
"logits/rejected": -2.597738265991211, |
|
"logps/chosen": -75.93782043457031, |
|
"logps/rejected": -108.6824951171875, |
|
"loss": 0.1696, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.198141098022461, |
|
"rewards/margins": 3.195936918258667, |
|
"rewards/rejected": -4.394078254699707, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.2653061224489794e-06, |
|
"logits/chosen": -2.6300315856933594, |
|
"logits/rejected": -2.6183547973632812, |
|
"logps/chosen": -80.39871978759766, |
|
"logps/rejected": -113.6379165649414, |
|
"loss": 0.1925, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -1.5853779315948486, |
|
"rewards/margins": 3.1853203773498535, |
|
"rewards/rejected": -4.770698547363281, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.0102040816326534e-06, |
|
"logits/chosen": -2.6064066886901855, |
|
"logits/rejected": -2.594722270965576, |
|
"logps/chosen": -81.89433288574219, |
|
"logps/rejected": -115.00162506103516, |
|
"loss": 0.1534, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/chosen": -1.6648212671279907, |
|
"rewards/margins": 3.4960713386535645, |
|
"rewards/rejected": -5.160892486572266, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.6083757877349854, |
|
"eval_logits/rejected": -2.5937814712524414, |
|
"eval_logps/chosen": -80.02838134765625, |
|
"eval_logps/rejected": -119.6004409790039, |
|
"eval_loss": 0.15436382591724396, |
|
"eval_rewards/accuracies": 0.939497709274292, |
|
"eval_rewards/chosen": -1.6908209323883057, |
|
"eval_rewards/margins": 3.7731716632843018, |
|
"eval_rewards/rejected": -5.463992595672607, |
|
"eval_runtime": 295.1725, |
|
"eval_samples_per_second": 2.968, |
|
"eval_steps_per_second": 2.968, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.7551020408163266e-06, |
|
"logits/chosen": -2.6033756732940674, |
|
"logits/rejected": -2.582958221435547, |
|
"logps/chosen": -80.38322448730469, |
|
"logps/rejected": -116.6507797241211, |
|
"loss": 0.138, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.656935691833496, |
|
"rewards/margins": 3.698695659637451, |
|
"rewards/rejected": -5.3556318283081055, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -2.604905843734741, |
|
"logits/rejected": -2.591404438018799, |
|
"logps/chosen": -80.86669921875, |
|
"logps/rejected": -119.6518783569336, |
|
"loss": 0.1395, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -1.803047776222229, |
|
"rewards/margins": 3.7810962200164795, |
|
"rewards/rejected": -5.58414363861084, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.244897959183674e-06, |
|
"logits/chosen": -2.590282440185547, |
|
"logits/rejected": -2.576897144317627, |
|
"logps/chosen": -83.53189086914062, |
|
"logps/rejected": -131.67037963867188, |
|
"loss": 0.1004, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -1.9269969463348389, |
|
"rewards/margins": 4.388735294342041, |
|
"rewards/rejected": -6.315732002258301, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.989795918367347e-06, |
|
"logits/chosen": -2.578322410583496, |
|
"logits/rejected": -2.5577735900878906, |
|
"logps/chosen": -85.14395141601562, |
|
"logps/rejected": -126.8256607055664, |
|
"loss": 0.1422, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.2692408561706543, |
|
"rewards/margins": 4.1861348152160645, |
|
"rewards/rejected": -6.455375671386719, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.7346938775510206e-06, |
|
"logits/chosen": -2.5812907218933105, |
|
"logits/rejected": -2.5663979053497314, |
|
"logps/chosen": -87.79288482666016, |
|
"logps/rejected": -131.3497314453125, |
|
"loss": 0.1284, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.4120209217071533, |
|
"rewards/margins": 4.293553352355957, |
|
"rewards/rejected": -6.705574035644531, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.479591836734694e-06, |
|
"logits/chosen": -2.596059560775757, |
|
"logits/rejected": -2.58156156539917, |
|
"logps/chosen": -83.30199432373047, |
|
"logps/rejected": -132.67092895507812, |
|
"loss": 0.1066, |
|
"rewards/accuracies": 0.9593750238418579, |
|
"rewards/chosen": -2.065701484680176, |
|
"rewards/margins": 4.5842509269714355, |
|
"rewards/rejected": -6.6499528884887695, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.2244897959183673e-06, |
|
"logits/chosen": -2.5813376903533936, |
|
"logits/rejected": -2.569676160812378, |
|
"logps/chosen": -82.94264221191406, |
|
"logps/rejected": -132.35789489746094, |
|
"loss": 0.0994, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -2.1285476684570312, |
|
"rewards/margins": 4.649580955505371, |
|
"rewards/rejected": -6.778128147125244, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.69387755102041e-07, |
|
"logits/chosen": -2.575303077697754, |
|
"logits/rejected": -2.5673909187316895, |
|
"logps/chosen": -86.48652648925781, |
|
"logps/rejected": -134.35574340820312, |
|
"loss": 0.1224, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -2.278262138366699, |
|
"rewards/margins": 4.526266098022461, |
|
"rewards/rejected": -6.80452823638916, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 7.142857142857143e-07, |
|
"logits/chosen": -2.5673184394836426, |
|
"logits/rejected": -2.5392918586730957, |
|
"logps/chosen": -85.58756256103516, |
|
"logps/rejected": -128.51121520996094, |
|
"loss": 0.1345, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.157008409500122, |
|
"rewards/margins": 4.34613037109375, |
|
"rewards/rejected": -6.503138542175293, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.591836734693878e-07, |
|
"logits/chosen": -2.5774478912353516, |
|
"logits/rejected": -2.556039333343506, |
|
"logps/chosen": -84.05760955810547, |
|
"logps/rejected": -132.11903381347656, |
|
"loss": 0.1088, |
|
"rewards/accuracies": 0.9593750238418579, |
|
"rewards/chosen": -2.128478527069092, |
|
"rewards/margins": 4.659018039703369, |
|
"rewards/rejected": -6.787497043609619, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.0408163265306121e-07, |
|
"logits/chosen": -2.563249111175537, |
|
"logits/rejected": -2.5445797443389893, |
|
"logps/chosen": -83.83250427246094, |
|
"logps/rejected": -133.15673828125, |
|
"loss": 0.0995, |
|
"rewards/accuracies": 0.965624988079071, |
|
"rewards/chosen": -2.065305709838867, |
|
"rewards/margins": 4.883781909942627, |
|
"rewards/rejected": -6.949087619781494, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_logits/chosen": -2.5726864337921143, |
|
"eval_logits/rejected": -2.556718587875366, |
|
"eval_logps/chosen": -85.11767578125, |
|
"eval_logps/rejected": -133.85464477539062, |
|
"eval_loss": 0.12992651760578156, |
|
"eval_rewards/accuracies": 0.9520547986030579, |
|
"eval_rewards/chosen": -2.199751138687134, |
|
"eval_rewards/margins": 4.689663887023926, |
|
"eval_rewards/rejected": -6.889414310455322, |
|
"eval_runtime": 288.5007, |
|
"eval_samples_per_second": 3.036, |
|
"eval_steps_per_second": 3.036, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"step": 218, |
|
"total_flos": 0.0, |
|
"train_loss": 0.20939183043777396, |
|
"train_runtime": 5162.9911, |
|
"train_samples_per_second": 1.358, |
|
"train_steps_per_second": 0.042 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 218, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|