|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": -1.8667490482330322, |
|
"logits/rejected": -1.8710733652114868, |
|
"logps/chosen": -36.97007369995117, |
|
"logps/rejected": -33.66944885253906, |
|
"loss": 0.9317, |
|
"rewards/accuracies": 0.5694444179534912, |
|
"rewards/chosen": 0.03287407010793686, |
|
"rewards/margins": 0.06830974668264389, |
|
"rewards/rejected": -0.03543568402528763, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": -1.9981460571289062, |
|
"logits/rejected": -2.000789165496826, |
|
"logps/chosen": -29.641231536865234, |
|
"logps/rejected": -29.06744384765625, |
|
"loss": 0.9955, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.0008672710391692817, |
|
"rewards/margins": 0.004467610269784927, |
|
"rewards/rejected": -0.0036003391724079847, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -1.920600175857544, |
|
"logits/rejected": -1.917925238609314, |
|
"logps/chosen": -31.395061492919922, |
|
"logps/rejected": -33.240909576416016, |
|
"loss": 0.9609, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.01893135905265808, |
|
"rewards/margins": 0.03913776949048042, |
|
"rewards/rejected": -0.020206410437822342, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": -2.017815113067627, |
|
"logits/rejected": -2.0090720653533936, |
|
"logps/chosen": -32.5806884765625, |
|
"logps/rejected": -32.515098571777344, |
|
"loss": 1.0013, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.003494268748909235, |
|
"rewards/margins": -0.0013132141903042793, |
|
"rewards/rejected": -0.002181055024266243, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": -1.8630876541137695, |
|
"logits/rejected": -1.8523353338241577, |
|
"logps/chosen": -33.549766540527344, |
|
"logps/rejected": -35.46318435668945, |
|
"loss": 0.9833, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.006940312683582306, |
|
"rewards/margins": 0.016694897785782814, |
|
"rewards/rejected": -0.009754580445587635, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": -1.9419746398925781, |
|
"logits/rejected": -1.943914771080017, |
|
"logps/chosen": -32.527896881103516, |
|
"logps/rejected": -33.21547317504883, |
|
"loss": 0.9153, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.06505907326936722, |
|
"rewards/margins": 0.10474522411823273, |
|
"rewards/rejected": -0.03968615084886551, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": -2.07257080078125, |
|
"logits/rejected": -2.0775399208068848, |
|
"logps/chosen": -34.00202560424805, |
|
"logps/rejected": -36.622886657714844, |
|
"loss": 0.9636, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.012188142165541649, |
|
"rewards/margins": 0.03642461448907852, |
|
"rewards/rejected": -0.04861275106668472, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": -1.9333629608154297, |
|
"logits/rejected": -1.9364970922470093, |
|
"logps/chosen": -34.302101135253906, |
|
"logps/rejected": -34.63160705566406, |
|
"loss": 0.862, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.1131378561258316, |
|
"rewards/margins": 0.1484164148569107, |
|
"rewards/rejected": -0.0352785661816597, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": -1.9408857822418213, |
|
"logits/rejected": -1.945412039756775, |
|
"logps/chosen": -32.36528015136719, |
|
"logps/rejected": -32.34526824951172, |
|
"loss": 0.9225, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.09192506223917007, |
|
"rewards/margins": 0.07964853197336197, |
|
"rewards/rejected": 0.012276534922420979, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": -2.037550210952759, |
|
"logits/rejected": -2.0355725288391113, |
|
"logps/chosen": -32.142730712890625, |
|
"logps/rejected": -31.29366683959961, |
|
"loss": 0.8913, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.08829358220100403, |
|
"rewards/margins": 0.1130049005150795, |
|
"rewards/rejected": -0.02471131458878517, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.232161283493042, |
|
"eval_logits/rejected": -2.2273108959198, |
|
"eval_logps/chosen": -34.040714263916016, |
|
"eval_logps/rejected": -37.54047775268555, |
|
"eval_loss": 0.9844526052474976, |
|
"eval_rewards/accuracies": 0.5195183157920837, |
|
"eval_rewards/chosen": -0.005542654078453779, |
|
"eval_rewards/margins": 0.015924591571092606, |
|
"eval_rewards/rejected": -0.021467244252562523, |
|
"eval_runtime": 145.9018, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": -1.9926517009735107, |
|
"logits/rejected": -1.9902753829956055, |
|
"logps/chosen": -33.12412643432617, |
|
"logps/rejected": -34.011417388916016, |
|
"loss": 0.9361, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.10772128403186798, |
|
"rewards/margins": 0.09156213700771332, |
|
"rewards/rejected": 0.016159160062670708, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": -2.00441312789917, |
|
"logits/rejected": -1.996093988418579, |
|
"logps/chosen": -32.33955383300781, |
|
"logps/rejected": -32.13432312011719, |
|
"loss": 0.9401, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.09481850266456604, |
|
"rewards/margins": 0.07035262137651443, |
|
"rewards/rejected": 0.024465877562761307, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": -2.0318965911865234, |
|
"logits/rejected": -2.023927688598633, |
|
"logps/chosen": -30.336984634399414, |
|
"logps/rejected": -32.0634765625, |
|
"loss": 0.9061, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.12486012279987335, |
|
"rewards/margins": 0.1342838853597641, |
|
"rewards/rejected": -0.009423775598406792, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": -1.9620994329452515, |
|
"logits/rejected": -1.9723354578018188, |
|
"logps/chosen": -31.222240447998047, |
|
"logps/rejected": -32.57916259765625, |
|
"loss": 0.795, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.17767605185508728, |
|
"rewards/margins": 0.21753115952014923, |
|
"rewards/rejected": -0.03985511139035225, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": -1.8727748394012451, |
|
"logits/rejected": -1.87395441532135, |
|
"logps/chosen": -33.931861877441406, |
|
"logps/rejected": -34.79869842529297, |
|
"loss": 0.7946, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.23474939167499542, |
|
"rewards/margins": 0.272051066160202, |
|
"rewards/rejected": -0.03730170056223869, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": -1.9248745441436768, |
|
"logits/rejected": -1.9214649200439453, |
|
"logps/chosen": -36.014469146728516, |
|
"logps/rejected": -32.73783493041992, |
|
"loss": 0.8532, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.14148668944835663, |
|
"rewards/margins": 0.15506146848201752, |
|
"rewards/rejected": -0.013574766926467419, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": -2.025555372238159, |
|
"logits/rejected": -2.01819109916687, |
|
"logps/chosen": -33.50218200683594, |
|
"logps/rejected": -31.41971206665039, |
|
"loss": 0.7292, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.26611366868019104, |
|
"rewards/margins": 0.3271873891353607, |
|
"rewards/rejected": -0.06107370927929878, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": -2.0320167541503906, |
|
"logits/rejected": -2.037261486053467, |
|
"logps/chosen": -32.24850845336914, |
|
"logps/rejected": -32.45344924926758, |
|
"loss": 0.7865, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.2666531801223755, |
|
"rewards/margins": 0.23813048005104065, |
|
"rewards/rejected": 0.028522688895463943, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": -2.032525062561035, |
|
"logits/rejected": -2.0297436714172363, |
|
"logps/chosen": -31.313217163085938, |
|
"logps/rejected": -31.349472045898438, |
|
"loss": 0.8387, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.1581628918647766, |
|
"rewards/margins": 0.18629543483257294, |
|
"rewards/rejected": -0.02813255414366722, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": -1.902632713317871, |
|
"logits/rejected": -1.907284140586853, |
|
"logps/chosen": -31.320043563842773, |
|
"logps/rejected": -32.85698699951172, |
|
"loss": 0.7293, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.2576131224632263, |
|
"rewards/margins": 0.31706100702285767, |
|
"rewards/rejected": -0.05944784730672836, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.228637933731079, |
|
"eval_logits/rejected": -2.2238004207611084, |
|
"eval_logps/chosen": -34.053680419921875, |
|
"eval_logps/rejected": -37.581058502197266, |
|
"eval_loss": 0.9601577520370483, |
|
"eval_rewards/accuracies": 0.5714285373687744, |
|
"eval_rewards/chosen": -0.01721162348985672, |
|
"eval_rewards/margins": 0.040783192962408066, |
|
"eval_rewards/rejected": -0.057994820177555084, |
|
"eval_runtime": 145.5388, |
|
"eval_samples_per_second": 2.357, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": -2.0149245262145996, |
|
"logits/rejected": -2.025560140609741, |
|
"logps/chosen": -31.77438735961914, |
|
"logps/rejected": -33.95419692993164, |
|
"loss": 0.7666, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.17722666263580322, |
|
"rewards/margins": 0.25957340002059937, |
|
"rewards/rejected": -0.08234670013189316, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": -1.906951904296875, |
|
"logits/rejected": -1.9217418432235718, |
|
"logps/chosen": -29.83829116821289, |
|
"logps/rejected": -31.636096954345703, |
|
"loss": 0.7204, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.2459266185760498, |
|
"rewards/margins": 0.3160557448863983, |
|
"rewards/rejected": -0.07012919336557388, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": -1.9629713296890259, |
|
"logits/rejected": -1.9669532775878906, |
|
"logps/chosen": -33.124656677246094, |
|
"logps/rejected": -31.630443572998047, |
|
"loss": 0.7348, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.2635299265384674, |
|
"rewards/margins": 0.3365571200847626, |
|
"rewards/rejected": -0.07302714884281158, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": -1.9614177942276, |
|
"logits/rejected": -1.9395818710327148, |
|
"logps/chosen": -33.87095260620117, |
|
"logps/rejected": -35.10104751586914, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.254514217376709, |
|
"rewards/margins": 0.38974156975746155, |
|
"rewards/rejected": -0.13522735238075256, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": -2.0029492378234863, |
|
"logits/rejected": -1.999629020690918, |
|
"logps/chosen": -32.730865478515625, |
|
"logps/rejected": -36.28009796142578, |
|
"loss": 0.8055, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.17478153109550476, |
|
"rewards/margins": 0.2244400531053543, |
|
"rewards/rejected": -0.04965851828455925, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": -1.8703190088272095, |
|
"logits/rejected": -1.8679043054580688, |
|
"logps/chosen": -33.98231887817383, |
|
"logps/rejected": -35.54644775390625, |
|
"loss": 0.8042, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.18547315895557404, |
|
"rewards/margins": 0.22311437129974365, |
|
"rewards/rejected": -0.03764120861887932, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": -1.8551464080810547, |
|
"logits/rejected": -1.852746605873108, |
|
"logps/chosen": -34.20850372314453, |
|
"logps/rejected": -31.803356170654297, |
|
"loss": 0.8096, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.170187309384346, |
|
"rewards/margins": 0.19900819659233093, |
|
"rewards/rejected": -0.02882089652121067, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": -1.9582526683807373, |
|
"logits/rejected": -1.947749376296997, |
|
"logps/chosen": -35.0114631652832, |
|
"logps/rejected": -31.88564682006836, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.3106640875339508, |
|
"rewards/margins": 0.3508565425872803, |
|
"rewards/rejected": -0.040192440152168274, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": -2.053699493408203, |
|
"logits/rejected": -2.038789749145508, |
|
"logps/chosen": -30.727243423461914, |
|
"logps/rejected": -32.641685485839844, |
|
"loss": 0.8827, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.17719359695911407, |
|
"rewards/margins": 0.16805905103683472, |
|
"rewards/rejected": 0.009134533815085888, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": -1.9243850708007812, |
|
"logits/rejected": -1.9218356609344482, |
|
"logps/chosen": -32.43050003051758, |
|
"logps/rejected": -30.8950138092041, |
|
"loss": 0.6144, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.446951299905777, |
|
"rewards/margins": 0.5173346400260925, |
|
"rewards/rejected": -0.07038338482379913, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.2248916625976562, |
|
"eval_logits/rejected": -2.2200686931610107, |
|
"eval_logps/chosen": -34.08660125732422, |
|
"eval_logps/rejected": -37.603153228759766, |
|
"eval_loss": 0.9712583422660828, |
|
"eval_rewards/accuracies": 0.5282392501831055, |
|
"eval_rewards/chosen": -0.046843186020851135, |
|
"eval_rewards/margins": 0.031036507338285446, |
|
"eval_rewards/rejected": -0.07787969708442688, |
|
"eval_runtime": 145.8399, |
|
"eval_samples_per_second": 2.352, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": -1.9094560146331787, |
|
"logits/rejected": -1.9062097072601318, |
|
"logps/chosen": -31.33370018005371, |
|
"logps/rejected": -33.83475112915039, |
|
"loss": 0.728, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.24268333613872528, |
|
"rewards/margins": 0.32601919770240784, |
|
"rewards/rejected": -0.08333584666252136, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": -1.9593706130981445, |
|
"logits/rejected": -1.9471466541290283, |
|
"logps/chosen": -34.3392333984375, |
|
"logps/rejected": -33.68544387817383, |
|
"loss": 0.6954, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.2311573475599289, |
|
"rewards/margins": 0.36310091614723206, |
|
"rewards/rejected": -0.13194358348846436, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": -1.9940307140350342, |
|
"logits/rejected": -1.9925845861434937, |
|
"logps/chosen": -33.20854568481445, |
|
"logps/rejected": -32.552764892578125, |
|
"loss": 0.7359, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.24952416121959686, |
|
"rewards/margins": 0.31538745760917664, |
|
"rewards/rejected": -0.06586329638957977, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": -2.080763578414917, |
|
"logits/rejected": -2.065063953399658, |
|
"logps/chosen": -33.82006072998047, |
|
"logps/rejected": -33.105167388916016, |
|
"loss": 0.7407, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.32339948415756226, |
|
"rewards/margins": 0.31181785464286804, |
|
"rewards/rejected": 0.011581619270145893, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": -1.9535402059555054, |
|
"logits/rejected": -1.9526821374893188, |
|
"logps/chosen": -32.8734130859375, |
|
"logps/rejected": -32.565185546875, |
|
"loss": 0.645, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.36781299114227295, |
|
"rewards/margins": 0.4578477442264557, |
|
"rewards/rejected": -0.09003473073244095, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": -1.909014344215393, |
|
"logits/rejected": -1.919298768043518, |
|
"logps/chosen": -31.879894256591797, |
|
"logps/rejected": -35.34550857543945, |
|
"loss": 0.6942, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.29231759905815125, |
|
"rewards/margins": 0.34339430928230286, |
|
"rewards/rejected": -0.051076728850603104, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": -2.0480055809020996, |
|
"logits/rejected": -2.0415189266204834, |
|
"logps/chosen": -33.37665939331055, |
|
"logps/rejected": -29.24251937866211, |
|
"loss": 0.7583, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.24777868390083313, |
|
"rewards/margins": 0.262265145778656, |
|
"rewards/rejected": -0.014486486092209816, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": -1.9075886011123657, |
|
"logits/rejected": -1.9097877740859985, |
|
"logps/chosen": -33.8558464050293, |
|
"logps/rejected": -30.982568740844727, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.3448841869831085, |
|
"rewards/margins": 0.44383174180984497, |
|
"rewards/rejected": -0.09894753992557526, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.8125879040012112, |
|
"train_runtime": 3249.3137, |
|
"train_samples_per_second": 0.948, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|