File size: 19,153 Bytes
7d2d4e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 711,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10548523206751055,
"grad_norm": 2.3504333482192794,
"learning_rate": 6.944444444444445e-06,
"log_odds_chosen": 0.15013551712036133,
"log_odds_ratio": -0.6692488193511963,
"logits/chosen": -0.8321835994720459,
"logits/rejected": -0.832346498966217,
"logps/chosen": -0.9450153112411499,
"logps/rejected": -1.0411678552627563,
"loss": 1.7955,
"nll_loss": 1.4582228660583496,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.09450153261423111,
"rewards/margins": 0.00961525458842516,
"rewards/rejected": -0.10411678999662399,
"step": 25
},
{
"epoch": 0.2109704641350211,
"grad_norm": 1.9691856311458618,
"learning_rate": 1.388888888888889e-05,
"log_odds_chosen": 0.3434825539588928,
"log_odds_ratio": -0.6003904938697815,
"logits/chosen": -0.7575013637542725,
"logits/rejected": -0.7675329446792603,
"logps/chosen": -0.5159790515899658,
"logps/rejected": -0.6563442349433899,
"loss": 0.5969,
"nll_loss": 0.4879494607448578,
"rewards/accuracies": 0.5899999737739563,
"rewards/chosen": -0.05159790441393852,
"rewards/margins": 0.01403652224689722,
"rewards/rejected": -0.06563442200422287,
"step": 50
},
{
"epoch": 0.31645569620253167,
"grad_norm": 1.085908619339194,
"learning_rate": 1.9906103286384977e-05,
"log_odds_chosen": 0.42845839262008667,
"log_odds_ratio": -0.5688134431838989,
"logits/chosen": -0.7494800686836243,
"logits/rejected": -0.7198505997657776,
"logps/chosen": -0.43672674894332886,
"logps/rejected": -0.6124382615089417,
"loss": 0.5533,
"nll_loss": 0.4733336567878723,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.04367266967892647,
"rewards/margins": 0.01757114939391613,
"rewards/rejected": -0.061243828386068344,
"step": 75
},
{
"epoch": 0.4219409282700422,
"grad_norm": 1.1240076241404764,
"learning_rate": 1.912363067292645e-05,
"log_odds_chosen": 0.29888662695884705,
"log_odds_ratio": -0.6093687415122986,
"logits/chosen": -0.709399402141571,
"logits/rejected": -0.7048219442367554,
"logps/chosen": -0.5603894591331482,
"logps/rejected": -0.7039618492126465,
"loss": 0.56,
"nll_loss": 0.49526843428611755,
"rewards/accuracies": 0.5099999904632568,
"rewards/chosen": -0.056038953363895416,
"rewards/margins": 0.014357234351336956,
"rewards/rejected": -0.07039617747068405,
"step": 100
},
{
"epoch": 0.5274261603375527,
"grad_norm": 1.0788263671658365,
"learning_rate": 1.834115805946792e-05,
"log_odds_chosen": 0.25319904088974,
"log_odds_ratio": -0.631630539894104,
"logits/chosen": -0.6328348517417908,
"logits/rejected": -0.6294259428977966,
"logps/chosen": -0.49323585629463196,
"logps/rejected": -0.5995710492134094,
"loss": 0.5485,
"nll_loss": 0.5131134986877441,
"rewards/accuracies": 0.47999998927116394,
"rewards/chosen": -0.049323588609695435,
"rewards/margins": 0.010633519850671291,
"rewards/rejected": -0.059957101941108704,
"step": 125
},
{
"epoch": 0.6329113924050633,
"grad_norm": 1.1078095413080318,
"learning_rate": 1.755868544600939e-05,
"log_odds_chosen": 0.25750571489334106,
"log_odds_ratio": -0.6318458914756775,
"logits/chosen": -0.7056828141212463,
"logits/rejected": -0.6951937675476074,
"logps/chosen": -0.5244600176811218,
"logps/rejected": -0.605254590511322,
"loss": 0.5609,
"nll_loss": 0.478809654712677,
"rewards/accuracies": 0.5099999904632568,
"rewards/chosen": -0.052445996552705765,
"rewards/margins": 0.008079464547336102,
"rewards/rejected": -0.06052546575665474,
"step": 150
},
{
"epoch": 0.7383966244725738,
"grad_norm": 1.0260136766275174,
"learning_rate": 1.6776212832550862e-05,
"log_odds_chosen": 0.3896116316318512,
"log_odds_ratio": -0.5955315232276917,
"logits/chosen": -0.6688608527183533,
"logits/rejected": -0.6982090473175049,
"logps/chosen": -0.4829683303833008,
"logps/rejected": -0.6143837571144104,
"loss": 0.5201,
"nll_loss": 0.4728917181491852,
"rewards/accuracies": 0.5600000023841858,
"rewards/chosen": -0.048296838998794556,
"rewards/margins": 0.013141541741788387,
"rewards/rejected": -0.06143837794661522,
"step": 175
},
{
"epoch": 0.8438818565400844,
"grad_norm": 1.0922913456111067,
"learning_rate": 1.5993740219092334e-05,
"log_odds_chosen": 0.35299748182296753,
"log_odds_ratio": -0.5904638767242432,
"logits/chosen": -0.7948518395423889,
"logits/rejected": -0.7930269837379456,
"logps/chosen": -0.5347493886947632,
"logps/rejected": -0.7274565696716309,
"loss": 0.5678,
"nll_loss": 0.5108060240745544,
"rewards/accuracies": 0.5699999928474426,
"rewards/chosen": -0.05347493290901184,
"rewards/margins": 0.01927073672413826,
"rewards/rejected": -0.0727456733584404,
"step": 200
},
{
"epoch": 0.9493670886075949,
"grad_norm": 1.171188249527363,
"learning_rate": 1.5211267605633803e-05,
"log_odds_chosen": 0.19867561757564545,
"log_odds_ratio": -0.6661250591278076,
"logits/chosen": -0.7350332736968994,
"logits/rejected": -0.7309374213218689,
"logps/chosen": -0.4790670871734619,
"logps/rejected": -0.578321099281311,
"loss": 0.5593,
"nll_loss": 0.4474111795425415,
"rewards/accuracies": 0.49000000953674316,
"rewards/chosen": -0.04790670797228813,
"rewards/margins": 0.009925400838255882,
"rewards/rejected": -0.05783211067318916,
"step": 225
},
{
"epoch": 1.0548523206751055,
"grad_norm": 1.0561427838830786,
"learning_rate": 1.4428794992175275e-05,
"log_odds_chosen": 0.5409557819366455,
"log_odds_ratio": -0.5446589589118958,
"logits/chosen": -0.8016952276229858,
"logits/rejected": -0.7894623279571533,
"logps/chosen": -0.466928094625473,
"logps/rejected": -0.6494468450546265,
"loss": 0.5096,
"nll_loss": 0.4225366711616516,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.04669281467795372,
"rewards/margins": 0.018251873552799225,
"rewards/rejected": -0.06494467705488205,
"step": 250
},
{
"epoch": 1.160337552742616,
"grad_norm": 1.0616103418922775,
"learning_rate": 1.3646322378716745e-05,
"log_odds_chosen": 0.6413868069648743,
"log_odds_ratio": -0.5098162293434143,
"logits/chosen": -0.8740493655204773,
"logits/rejected": -0.9086742997169495,
"logps/chosen": -0.43551456928253174,
"logps/rejected": -0.7041670083999634,
"loss": 0.4928,
"nll_loss": 0.4237639605998993,
"rewards/accuracies": 0.6899999976158142,
"rewards/chosen": -0.04355145990848541,
"rewards/margins": 0.026865236461162567,
"rewards/rejected": -0.07041670382022858,
"step": 275
},
{
"epoch": 1.2658227848101267,
"grad_norm": 1.0597296687116873,
"learning_rate": 1.2863849765258216e-05,
"log_odds_chosen": 0.6442282795906067,
"log_odds_ratio": -0.5011763572692871,
"logits/chosen": -1.0111823081970215,
"logits/rejected": -1.0184077024459839,
"logps/chosen": -0.45919105410575867,
"logps/rejected": -0.7301878333091736,
"loss": 0.4639,
"nll_loss": 0.40898972749710083,
"rewards/accuracies": 0.6800000071525574,
"rewards/chosen": -0.045919109135866165,
"rewards/margins": 0.02709968015551567,
"rewards/rejected": -0.07301878184080124,
"step": 300
},
{
"epoch": 1.371308016877637,
"grad_norm": 1.0935677703953148,
"learning_rate": 1.2081377151799686e-05,
"log_odds_chosen": 0.6757858395576477,
"log_odds_ratio": -0.4901491403579712,
"logits/chosen": -0.9852328300476074,
"logits/rejected": -1.0046324729919434,
"logps/chosen": -0.41143473982810974,
"logps/rejected": -0.6924607753753662,
"loss": 0.4867,
"nll_loss": 0.4125671088695526,
"rewards/accuracies": 0.6700000166893005,
"rewards/chosen": -0.041143473237752914,
"rewards/margins": 0.028102604672312737,
"rewards/rejected": -0.0692460760474205,
"step": 325
},
{
"epoch": 1.4767932489451476,
"grad_norm": 1.1585743076882284,
"learning_rate": 1.129890453834116e-05,
"log_odds_chosen": 0.4605286121368408,
"log_odds_ratio": -0.5612049102783203,
"logits/chosen": -0.9352016448974609,
"logits/rejected": -0.9490614533424377,
"logps/chosen": -0.43350136280059814,
"logps/rejected": -0.5812506675720215,
"loss": 0.4718,
"nll_loss": 0.4129113256931305,
"rewards/accuracies": 0.5600000023841858,
"rewards/chosen": -0.043350137770175934,
"rewards/margins": 0.014774931594729424,
"rewards/rejected": -0.05812506750226021,
"step": 350
},
{
"epoch": 1.5822784810126582,
"grad_norm": 1.0715102217336756,
"learning_rate": 1.051643192488263e-05,
"log_odds_chosen": 0.6196006536483765,
"log_odds_ratio": -0.4975026845932007,
"logits/chosen": -1.0041831731796265,
"logits/rejected": -1.0140608549118042,
"logps/chosen": -0.3989720046520233,
"logps/rejected": -0.6381548047065735,
"loss": 0.4857,
"nll_loss": 0.42035380005836487,
"rewards/accuracies": 0.6200000047683716,
"rewards/chosen": -0.03989719972014427,
"rewards/margins": 0.023918280377984047,
"rewards/rejected": -0.06381548196077347,
"step": 375
},
{
"epoch": 1.6877637130801688,
"grad_norm": 1.0097421405350755,
"learning_rate": 9.7339593114241e-06,
"log_odds_chosen": 0.5579173564910889,
"log_odds_ratio": -0.5348711609840393,
"logits/chosen": -1.0373647212982178,
"logits/rejected": -1.037854790687561,
"logps/chosen": -0.45267555117607117,
"logps/rejected": -0.66820228099823,
"loss": 0.4691,
"nll_loss": 0.410322904586792,
"rewards/accuracies": 0.6200000047683716,
"rewards/chosen": -0.04526755213737488,
"rewards/margins": 0.021552674472332,
"rewards/rejected": -0.06682023406028748,
"step": 400
},
{
"epoch": 1.7932489451476794,
"grad_norm": 1.0546124322913446,
"learning_rate": 8.951486697965573e-06,
"log_odds_chosen": 0.7115356922149658,
"log_odds_ratio": -0.5003312826156616,
"logits/chosen": -1.1095713376998901,
"logits/rejected": -1.1265352964401245,
"logps/chosen": -0.44462206959724426,
"logps/rejected": -0.7215204834938049,
"loss": 0.4849,
"nll_loss": 0.43178752064704895,
"rewards/accuracies": 0.6399999856948853,
"rewards/chosen": -0.04446220397949219,
"rewards/margins": 0.027689840644598007,
"rewards/rejected": -0.0721520483493805,
"step": 425
},
{
"epoch": 1.8987341772151898,
"grad_norm": 1.1203509702182832,
"learning_rate": 8.169014084507043e-06,
"log_odds_chosen": 0.6642155647277832,
"log_odds_ratio": -0.5045433640480042,
"logits/chosen": -1.0597500801086426,
"logits/rejected": -1.0680012702941895,
"logps/chosen": -0.4701367914676666,
"logps/rejected": -0.704271674156189,
"loss": 0.4792,
"nll_loss": 0.4112149477005005,
"rewards/accuracies": 0.6399999856948853,
"rewards/chosen": -0.04701368510723114,
"rewards/margins": 0.023413481190800667,
"rewards/rejected": -0.07042715698480606,
"step": 450
},
{
"epoch": 2.0042194092827006,
"grad_norm": 1.2606333093207305,
"learning_rate": 7.386541471048514e-06,
"log_odds_chosen": 0.5286250114440918,
"log_odds_ratio": -0.5306064486503601,
"logits/chosen": -1.0645604133605957,
"logits/rejected": -1.0691789388656616,
"logps/chosen": -0.44743481278419495,
"logps/rejected": -0.6287744045257568,
"loss": 0.4803,
"nll_loss": 0.42209434509277344,
"rewards/accuracies": 0.6299999952316284,
"rewards/chosen": -0.04474348574876785,
"rewards/margins": 0.018133964389562607,
"rewards/rejected": -0.06287744641304016,
"step": 475
},
{
"epoch": 2.109704641350211,
"grad_norm": 1.0966083263839905,
"learning_rate": 6.604068857589985e-06,
"log_odds_chosen": 0.8573014140129089,
"log_odds_ratio": -0.4275921583175659,
"logits/chosen": -1.1114364862442017,
"logits/rejected": -1.1227397918701172,
"logps/chosen": -0.3463269770145416,
"logps/rejected": -0.6633933782577515,
"loss": 0.4078,
"nll_loss": 0.3584522306919098,
"rewards/accuracies": 0.7400000095367432,
"rewards/chosen": -0.03463269770145416,
"rewards/margins": 0.03170664235949516,
"rewards/rejected": -0.06633934378623962,
"step": 500
},
{
"epoch": 2.2151898734177213,
"grad_norm": 1.1009276796065117,
"learning_rate": 5.821596244131456e-06,
"log_odds_chosen": 0.7615014910697937,
"log_odds_ratio": -0.47703635692596436,
"logits/chosen": -1.1601929664611816,
"logits/rejected": -1.1780595779418945,
"logps/chosen": -0.39950495958328247,
"logps/rejected": -0.6856523752212524,
"loss": 0.4048,
"nll_loss": 0.41196614503860474,
"rewards/accuracies": 0.5899999737739563,
"rewards/chosen": -0.03995049372315407,
"rewards/margins": 0.028614744544029236,
"rewards/rejected": -0.0685652419924736,
"step": 525
},
{
"epoch": 2.320675105485232,
"grad_norm": 1.1135597142860487,
"learning_rate": 5.039123630672926e-06,
"log_odds_chosen": 0.8249316215515137,
"log_odds_ratio": -0.43479686975479126,
"logits/chosen": -1.1542495489120483,
"logits/rejected": -1.1734535694122314,
"logps/chosen": -0.39931461215019226,
"logps/rejected": -0.733440637588501,
"loss": 0.4061,
"nll_loss": 0.39662715792655945,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.039931461215019226,
"rewards/margins": 0.03341260552406311,
"rewards/rejected": -0.07334406673908234,
"step": 550
},
{
"epoch": 2.4261603375527425,
"grad_norm": 1.1107780211778215,
"learning_rate": 4.2566510172143975e-06,
"log_odds_chosen": 1.200696587562561,
"log_odds_ratio": -0.3881736099720001,
"logits/chosen": -1.2098137140274048,
"logits/rejected": -1.2287673950195312,
"logps/chosen": -0.36806556582450867,
"logps/rejected": -0.7970322370529175,
"loss": 0.3924,
"nll_loss": 0.3511776328086853,
"rewards/accuracies": 0.7200000286102295,
"rewards/chosen": -0.03680655360221863,
"rewards/margins": 0.042896661907434464,
"rewards/rejected": -0.07970321923494339,
"step": 575
},
{
"epoch": 2.5316455696202533,
"grad_norm": 1.3159040332886311,
"learning_rate": 3.474178403755869e-06,
"log_odds_chosen": 1.0953081846237183,
"log_odds_ratio": -0.3781184256076813,
"logits/chosen": -1.1880977153778076,
"logits/rejected": -1.2077745199203491,
"logps/chosen": -0.32341885566711426,
"logps/rejected": -0.7020614147186279,
"loss": 0.4006,
"nll_loss": 0.3256089389324188,
"rewards/accuracies": 0.7799999713897705,
"rewards/chosen": -0.032341886311769485,
"rewards/margins": 0.03786425665020943,
"rewards/rejected": -0.07020614296197891,
"step": 600
},
{
"epoch": 2.6371308016877637,
"grad_norm": 1.070273138577641,
"learning_rate": 2.69170579029734e-06,
"log_odds_chosen": 0.6728782057762146,
"log_odds_ratio": -0.5007551312446594,
"logits/chosen": -1.1887409687042236,
"logits/rejected": -1.2133393287658691,
"logps/chosen": -0.40768423676490784,
"logps/rejected": -0.6612467169761658,
"loss": 0.4134,
"nll_loss": 0.42612510919570923,
"rewards/accuracies": 0.5600000023841858,
"rewards/chosen": -0.04076842963695526,
"rewards/margins": 0.025356244295835495,
"rewards/rejected": -0.06612467020750046,
"step": 625
},
{
"epoch": 2.742616033755274,
"grad_norm": 1.1092420255798914,
"learning_rate": 1.9092331768388107e-06,
"log_odds_chosen": 0.9050965309143066,
"log_odds_ratio": -0.42763033509254456,
"logits/chosen": -1.192216396331787,
"logits/rejected": -1.2236640453338623,
"logps/chosen": -0.35716915130615234,
"logps/rejected": -0.735872209072113,
"loss": 0.4171,
"nll_loss": 0.35565415024757385,
"rewards/accuracies": 0.6600000262260437,
"rewards/chosen": -0.03571692109107971,
"rewards/margins": 0.03787030279636383,
"rewards/rejected": -0.07358721643686295,
"step": 650
},
{
"epoch": 2.848101265822785,
"grad_norm": 1.0762367696616766,
"learning_rate": 1.1267605633802817e-06,
"log_odds_chosen": 0.8493003845214844,
"log_odds_ratio": -0.4451717734336853,
"logits/chosen": -1.2292503118515015,
"logits/rejected": -1.2512164115905762,
"logps/chosen": -0.40543216466903687,
"logps/rejected": -0.7575715780258179,
"loss": 0.4083,
"nll_loss": 0.3527292311191559,
"rewards/accuracies": 0.6700000166893005,
"rewards/chosen": -0.040543220937252045,
"rewards/margins": 0.03521393612027168,
"rewards/rejected": -0.07575715333223343,
"step": 675
},
{
"epoch": 2.9535864978902953,
"grad_norm": 1.4284839235977267,
"learning_rate": 3.4428794992175273e-07,
"log_odds_chosen": 0.6508604288101196,
"log_odds_ratio": -0.5013710856437683,
"logits/chosen": -1.2339705228805542,
"logits/rejected": -1.2357439994812012,
"logps/chosen": -0.38158729672431946,
"logps/rejected": -0.603762149810791,
"loss": 0.4226,
"nll_loss": 0.40551432967185974,
"rewards/accuracies": 0.5199999809265137,
"rewards/chosen": -0.03815872594714165,
"rewards/margins": 0.022217486053705215,
"rewards/rejected": -0.06037621572613716,
"step": 700
}
],
"logging_steps": 25,
"max_steps": 711,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|