{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 17336,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 5.7683433317951084e-05,
      "grad_norm": 0.3999878466129303,
      "learning_rate": 1.1534025374855825e-07,
      "loss": 1.182,
      "step": 1
    },
    {
      "epoch": 0.0002884171665897554,
      "grad_norm": 0.3274151384830475,
      "learning_rate": 5.767012687427913e-07,
      "loss": 1.0887,
      "step": 5
    },
    {
      "epoch": 0.0005768343331795108,
      "grad_norm": 0.42087307572364807,
      "learning_rate": 1.1534025374855826e-06,
      "loss": 1.2133,
      "step": 10
    },
    {
      "epoch": 0.0008652514997692663,
      "grad_norm": 0.48705124855041504,
      "learning_rate": 1.7301038062283738e-06,
      "loss": 1.1889,
      "step": 15
    },
    {
      "epoch": 0.0011536686663590216,
      "grad_norm": 0.3737321197986603,
      "learning_rate": 2.3068050749711653e-06,
      "loss": 1.2104,
      "step": 20
    },
    {
      "epoch": 0.001442085832948777,
      "grad_norm": 0.3640059232711792,
      "learning_rate": 2.8835063437139563e-06,
      "loss": 1.1713,
      "step": 25
    },
    {
      "epoch": 0.0017305029995385325,
      "grad_norm": 0.3122299611568451,
      "learning_rate": 3.4602076124567477e-06,
      "loss": 1.0437,
      "step": 30
    },
    {
      "epoch": 0.0020189201661282878,
      "grad_norm": 0.3902992010116577,
      "learning_rate": 4.036908881199539e-06,
      "loss": 1.2119,
      "step": 35
    },
    {
      "epoch": 0.0023073373327180432,
      "grad_norm": 0.3269282579421997,
      "learning_rate": 4.6136101499423305e-06,
      "loss": 1.1553,
      "step": 40
    },
    {
      "epoch": 0.0025957544993077987,
      "grad_norm": 0.3726448118686676,
      "learning_rate": 5.190311418685121e-06,
      "loss": 1.1356,
      "step": 45
    },
    {
      "epoch": 0.002884171665897554,
      "grad_norm": 0.32537516951560974,
      "learning_rate": 5.7670126874279126e-06,
      "loss": 1.1376,
      "step": 50
    },
    {
      "epoch": 0.0031725888324873096,
      "grad_norm": 0.2968432307243347,
      "learning_rate": 6.3437139561707036e-06,
      "loss": 1.1193,
      "step": 55
    },
    {
      "epoch": 0.003461005999077065,
      "grad_norm": 0.3939710855484009,
      "learning_rate": 6.920415224913495e-06,
      "loss": 1.1053,
      "step": 60
    },
    {
      "epoch": 0.0037494231656668205,
      "grad_norm": 0.31792372465133667,
      "learning_rate": 7.497116493656286e-06,
      "loss": 1.1071,
      "step": 65
    },
    {
      "epoch": 0.0040378403322565756,
      "grad_norm": 0.31301212310791016,
      "learning_rate": 8.073817762399077e-06,
      "loss": 1.0676,
      "step": 70
    },
    {
      "epoch": 0.0043262574988463314,
      "grad_norm": 0.3126572370529175,
      "learning_rate": 8.650519031141868e-06,
      "loss": 1.0958,
      "step": 75
    },
    {
      "epoch": 0.0046146746654360865,
      "grad_norm": 0.28482192754745483,
      "learning_rate": 9.227220299884661e-06,
      "loss": 1.0434,
      "step": 80
    },
    {
      "epoch": 0.004903091832025842,
      "grad_norm": 0.3318271338939667,
      "learning_rate": 9.803921568627451e-06,
      "loss": 0.9889,
      "step": 85
    },
    {
      "epoch": 0.005191508998615597,
      "grad_norm": 0.30128103494644165,
      "learning_rate": 1.0380622837370241e-05,
      "loss": 1.1145,
      "step": 90
    },
    {
      "epoch": 0.005479926165205353,
      "grad_norm": 0.3132428526878357,
      "learning_rate": 1.0957324106113035e-05,
      "loss": 1.0595,
      "step": 95
    },
    {
      "epoch": 0.005768343331795108,
      "grad_norm": 0.3268517255783081,
      "learning_rate": 1.1534025374855825e-05,
      "loss": 0.9886,
      "step": 100
    },
    {
      "epoch": 0.006056760498384864,
      "grad_norm": 0.33606696128845215,
      "learning_rate": 1.2110726643598615e-05,
      "loss": 1.0143,
      "step": 105
    },
    {
      "epoch": 0.006345177664974619,
      "grad_norm": 0.3881881535053253,
      "learning_rate": 1.2687427912341407e-05,
      "loss": 1.0,
      "step": 110
    },
    {
      "epoch": 0.006633594831564375,
      "grad_norm": 0.2971743047237396,
      "learning_rate": 1.3264129181084197e-05,
      "loss": 0.957,
      "step": 115
    },
    {
      "epoch": 0.00692201199815413,
      "grad_norm": 0.30101463198661804,
      "learning_rate": 1.384083044982699e-05,
      "loss": 1.0343,
      "step": 120
    },
    {
      "epoch": 0.007210429164743885,
      "grad_norm": 0.3259972333908081,
      "learning_rate": 1.4417531718569783e-05,
      "loss": 1.0182,
      "step": 125
    },
    {
      "epoch": 0.007498846331333641,
      "grad_norm": 0.2966211438179016,
      "learning_rate": 1.4994232987312573e-05,
      "loss": 1.047,
      "step": 130
    },
    {
      "epoch": 0.007787263497923396,
      "grad_norm": 0.3242364525794983,
      "learning_rate": 1.5570934256055363e-05,
      "loss": 1.0458,
      "step": 135
    },
    {
      "epoch": 0.008075680664513151,
      "grad_norm": 0.31030040979385376,
      "learning_rate": 1.6147635524798155e-05,
      "loss": 1.047,
      "step": 140
    },
    {
      "epoch": 0.008364097831102908,
      "grad_norm": 0.3165462911128998,
      "learning_rate": 1.6724336793540947e-05,
      "loss": 1.0784,
      "step": 145
    },
    {
      "epoch": 0.008652514997692663,
      "grad_norm": 0.3422790467739105,
      "learning_rate": 1.7301038062283735e-05,
      "loss": 1.0578,
      "step": 150
    },
    {
      "epoch": 0.008940932164282418,
      "grad_norm": 0.32128044962882996,
      "learning_rate": 1.787773933102653e-05,
      "loss": 1.0142,
      "step": 155
    },
    {
      "epoch": 0.009229349330872173,
      "grad_norm": 0.30257320404052734,
      "learning_rate": 1.8454440599769322e-05,
      "loss": 0.9874,
      "step": 160
    },
    {
      "epoch": 0.00951776649746193,
      "grad_norm": 0.30845504999160767,
      "learning_rate": 1.903114186851211e-05,
      "loss": 0.9731,
      "step": 165
    },
    {
      "epoch": 0.009806183664051685,
      "grad_norm": 0.36576882004737854,
      "learning_rate": 1.9607843137254903e-05,
      "loss": 1.0243,
      "step": 170
    },
    {
      "epoch": 0.01009460083064144,
      "grad_norm": 0.34204497933387756,
      "learning_rate": 2.0184544405997694e-05,
      "loss": 1.1213,
      "step": 175
    },
    {
      "epoch": 0.010383017997231195,
      "grad_norm": 0.357164204120636,
      "learning_rate": 2.0761245674740483e-05,
      "loss": 1.0324,
      "step": 180
    },
    {
      "epoch": 0.01067143516382095,
      "grad_norm": 0.3807251453399658,
      "learning_rate": 2.1337946943483278e-05,
      "loss": 0.9613,
      "step": 185
    },
    {
      "epoch": 0.010959852330410707,
      "grad_norm": 0.3466082215309143,
      "learning_rate": 2.191464821222607e-05,
      "loss": 1.0752,
      "step": 190
    },
    {
      "epoch": 0.011248269497000462,
      "grad_norm": 0.3457271456718445,
      "learning_rate": 2.249134948096886e-05,
      "loss": 1.0638,
      "step": 195
    },
    {
      "epoch": 0.011536686663590217,
      "grad_norm": 0.3821125626564026,
      "learning_rate": 2.306805074971165e-05,
      "loss": 1.0103,
      "step": 200
    },
    {
      "epoch": 0.011825103830179972,
      "grad_norm": 0.3460346758365631,
      "learning_rate": 2.3644752018454442e-05,
      "loss": 1.0103,
      "step": 205
    },
    {
      "epoch": 0.012113520996769728,
      "grad_norm": 0.3334082365036011,
      "learning_rate": 2.422145328719723e-05,
      "loss": 1.0671,
      "step": 210
    },
    {
      "epoch": 0.012401938163359483,
      "grad_norm": 0.3596220910549164,
      "learning_rate": 2.4798154555940022e-05,
      "loss": 0.9234,
      "step": 215
    },
    {
      "epoch": 0.012690355329949238,
      "grad_norm": 0.4396967887878418,
      "learning_rate": 2.5374855824682814e-05,
      "loss": 1.0464,
      "step": 220
    },
    {
      "epoch": 0.012978772496538993,
      "grad_norm": 0.3878267705440521,
      "learning_rate": 2.5951557093425606e-05,
      "loss": 1.0128,
      "step": 225
    },
    {
      "epoch": 0.01326718966312875,
      "grad_norm": 0.42701923847198486,
      "learning_rate": 2.6528258362168395e-05,
      "loss": 1.0255,
      "step": 230
    },
    {
      "epoch": 0.013555606829718505,
      "grad_norm": 0.41437554359436035,
      "learning_rate": 2.7104959630911193e-05,
      "loss": 0.9773,
      "step": 235
    },
    {
      "epoch": 0.01384402399630826,
      "grad_norm": 0.35881930589675903,
      "learning_rate": 2.768166089965398e-05,
      "loss": 1.0045,
      "step": 240
    },
    {
      "epoch": 0.014132441162898015,
      "grad_norm": 0.3705314099788666,
      "learning_rate": 2.8258362168396773e-05,
      "loss": 1.008,
      "step": 245
    },
    {
      "epoch": 0.01442085832948777,
      "grad_norm": 0.35067903995513916,
      "learning_rate": 2.8835063437139565e-05,
      "loss": 0.9855,
      "step": 250
    },
    {
      "epoch": 0.014709275496077527,
      "grad_norm": 0.3850333094596863,
      "learning_rate": 2.9411764705882354e-05,
      "loss": 1.0378,
      "step": 255
    },
    {
      "epoch": 0.014997692662667282,
      "grad_norm": 0.3616912066936493,
      "learning_rate": 2.9988465974625146e-05,
      "loss": 1.0055,
      "step": 260
    },
    {
      "epoch": 0.015286109829257037,
      "grad_norm": 0.3630939722061157,
      "learning_rate": 3.0565167243367934e-05,
      "loss": 0.9497,
      "step": 265
    },
    {
      "epoch": 0.015574526995846792,
      "grad_norm": 0.3719247579574585,
      "learning_rate": 3.1141868512110726e-05,
      "loss": 1.0438,
      "step": 270
    },
    {
      "epoch": 0.015862944162436547,
      "grad_norm": 0.36650553345680237,
      "learning_rate": 3.171856978085352e-05,
      "loss": 0.9474,
      "step": 275
    },
    {
      "epoch": 0.016151361329026302,
      "grad_norm": 0.386202335357666,
      "learning_rate": 3.229527104959631e-05,
      "loss": 0.9687,
      "step": 280
    },
    {
      "epoch": 0.01643977849561606,
      "grad_norm": 0.3516092896461487,
      "learning_rate": 3.28719723183391e-05,
      "loss": 0.9146,
      "step": 285
    },
    {
      "epoch": 0.016728195662205816,
      "grad_norm": 0.35326242446899414,
      "learning_rate": 3.344867358708189e-05,
      "loss": 0.9616,
      "step": 290
    },
    {
      "epoch": 0.01701661282879557,
      "grad_norm": 0.35251685976982117,
      "learning_rate": 3.4025374855824685e-05,
      "loss": 0.9696,
      "step": 295
    },
    {
      "epoch": 0.017305029995385326,
      "grad_norm": 0.3731367290019989,
      "learning_rate": 3.460207612456747e-05,
      "loss": 0.9631,
      "step": 300
    },
    {
      "epoch": 0.01759344716197508,
      "grad_norm": 0.36492493748664856,
      "learning_rate": 3.517877739331027e-05,
      "loss": 1.0923,
      "step": 305
    },
    {
      "epoch": 0.017881864328564836,
      "grad_norm": 0.37138622999191284,
      "learning_rate": 3.575547866205306e-05,
      "loss": 1.048,
      "step": 310
    },
    {
      "epoch": 0.01817028149515459,
      "grad_norm": 0.34608566761016846,
      "learning_rate": 3.633217993079585e-05,
      "loss": 1.0394,
      "step": 315
    },
    {
      "epoch": 0.018458698661744346,
      "grad_norm": 0.35083648562431335,
      "learning_rate": 3.6908881199538644e-05,
      "loss": 1.0003,
      "step": 320
    },
    {
      "epoch": 0.0187471158283341,
      "grad_norm": 0.33873873949050903,
      "learning_rate": 3.748558246828143e-05,
      "loss": 0.9683,
      "step": 325
    },
    {
      "epoch": 0.01903553299492386,
      "grad_norm": 0.33576804399490356,
      "learning_rate": 3.806228373702422e-05,
      "loss": 0.9974,
      "step": 330
    },
    {
      "epoch": 0.019323950161513614,
      "grad_norm": 0.3308757543563843,
      "learning_rate": 3.863898500576701e-05,
      "loss": 0.985,
      "step": 335
    },
    {
      "epoch": 0.01961236732810337,
      "grad_norm": 0.3734375536441803,
      "learning_rate": 3.9215686274509805e-05,
      "loss": 0.9665,
      "step": 340
    },
    {
      "epoch": 0.019900784494693124,
      "grad_norm": 0.3812713325023651,
      "learning_rate": 3.97923875432526e-05,
      "loss": 0.9961,
      "step": 345
    },
    {
      "epoch": 0.02018920166128288,
      "grad_norm": 0.3471347689628601,
      "learning_rate": 4.036908881199539e-05,
      "loss": 0.9386,
      "step": 350
    },
    {
      "epoch": 0.020477618827872635,
      "grad_norm": 0.34909528493881226,
      "learning_rate": 4.094579008073818e-05,
      "loss": 0.9794,
      "step": 355
    },
    {
      "epoch": 0.02076603599446239,
      "grad_norm": 0.34423884749412537,
      "learning_rate": 4.1522491349480966e-05,
      "loss": 1.0128,
      "step": 360
    },
    {
      "epoch": 0.021054453161052145,
      "grad_norm": 0.33039391040802,
      "learning_rate": 4.209919261822376e-05,
      "loss": 0.978,
      "step": 365
    },
    {
      "epoch": 0.0213428703276419,
      "grad_norm": 0.35275402665138245,
      "learning_rate": 4.2675893886966556e-05,
      "loss": 1.0011,
      "step": 370
    },
    {
      "epoch": 0.021631287494231658,
      "grad_norm": 0.3698658049106598,
      "learning_rate": 4.325259515570935e-05,
      "loss": 1.0207,
      "step": 375
    },
    {
      "epoch": 0.021919704660821413,
      "grad_norm": 0.37382742762565613,
      "learning_rate": 4.382929642445214e-05,
      "loss": 0.9759,
      "step": 380
    },
    {
      "epoch": 0.022208121827411168,
      "grad_norm": 0.3058774173259735,
      "learning_rate": 4.440599769319493e-05,
      "loss": 0.8755,
      "step": 385
    },
    {
      "epoch": 0.022496538994000923,
      "grad_norm": 0.33155399560928345,
      "learning_rate": 4.498269896193772e-05,
      "loss": 1.0688,
      "step": 390
    },
    {
      "epoch": 0.022784956160590678,
      "grad_norm": 0.3562370240688324,
      "learning_rate": 4.555940023068051e-05,
      "loss": 1.0279,
      "step": 395
    },
    {
      "epoch": 0.023073373327180433,
      "grad_norm": 0.3331949710845947,
      "learning_rate": 4.61361014994233e-05,
      "loss": 1.006,
      "step": 400
    },
    {
      "epoch": 0.023361790493770188,
      "grad_norm": 0.36301690340042114,
      "learning_rate": 4.671280276816609e-05,
      "loss": 1.0008,
      "step": 405
    },
    {
      "epoch": 0.023650207660359943,
      "grad_norm": 0.3455171585083008,
      "learning_rate": 4.7289504036908884e-05,
      "loss": 0.9911,
      "step": 410
    },
    {
      "epoch": 0.0239386248269497,
      "grad_norm": 0.33813151717185974,
      "learning_rate": 4.7866205305651676e-05,
      "loss": 0.9613,
      "step": 415
    },
    {
      "epoch": 0.024227041993539457,
      "grad_norm": 0.30761656165122986,
      "learning_rate": 4.844290657439446e-05,
      "loss": 1.0059,
      "step": 420
    },
    {
      "epoch": 0.024515459160129212,
      "grad_norm": 0.31521427631378174,
      "learning_rate": 4.901960784313725e-05,
      "loss": 1.0266,
      "step": 425
    },
    {
      "epoch": 0.024803876326718967,
      "grad_norm": 0.3083288073539734,
      "learning_rate": 4.9596309111880045e-05,
      "loss": 0.9526,
      "step": 430
    },
    {
      "epoch": 0.025092293493308722,
      "grad_norm": 0.3199276924133301,
      "learning_rate": 5.017301038062284e-05,
      "loss": 1.0201,
      "step": 435
    },
    {
      "epoch": 0.025380710659898477,
      "grad_norm": 0.3202233910560608,
      "learning_rate": 5.074971164936563e-05,
      "loss": 0.9186,
      "step": 440
    },
    {
      "epoch": 0.025669127826488232,
      "grad_norm": 0.3125661611557007,
      "learning_rate": 5.132641291810843e-05,
      "loss": 1.0323,
      "step": 445
    },
    {
      "epoch": 0.025957544993077987,
      "grad_norm": 0.3188762068748474,
      "learning_rate": 5.190311418685121e-05,
      "loss": 0.9921,
      "step": 450
    },
    {
      "epoch": 0.026245962159667742,
      "grad_norm": 0.3226945400238037,
      "learning_rate": 5.2479815455594004e-05,
      "loss": 1.0233,
      "step": 455
    },
    {
      "epoch": 0.0265343793262575,
      "grad_norm": 0.33580970764160156,
      "learning_rate": 5.305651672433679e-05,
      "loss": 0.9985,
      "step": 460
    },
    {
      "epoch": 0.026822796492847256,
      "grad_norm": 0.2981513440608978,
      "learning_rate": 5.363321799307959e-05,
      "loss": 0.9998,
      "step": 465
    },
    {
      "epoch": 0.02711121365943701,
      "grad_norm": 0.3163251280784607,
      "learning_rate": 5.4209919261822386e-05,
      "loss": 0.9028,
      "step": 470
    },
    {
      "epoch": 0.027399630826026766,
      "grad_norm": 0.30679088830947876,
      "learning_rate": 5.478662053056517e-05,
      "loss": 0.9625,
      "step": 475
    },
    {
      "epoch": 0.02768804799261652,
      "grad_norm": 0.3147639036178589,
      "learning_rate": 5.536332179930796e-05,
      "loss": 0.984,
      "step": 480
    },
    {
      "epoch": 0.027976465159206276,
      "grad_norm": 0.29801392555236816,
      "learning_rate": 5.594002306805075e-05,
      "loss": 0.9277,
      "step": 485
    },
    {
      "epoch": 0.02826488232579603,
      "grad_norm": 0.3048481047153473,
      "learning_rate": 5.651672433679355e-05,
      "loss": 1.011,
      "step": 490
    },
    {
      "epoch": 0.028553299492385786,
      "grad_norm": 0.3025212585926056,
      "learning_rate": 5.709342560553633e-05,
      "loss": 0.9997,
      "step": 495
    },
    {
      "epoch": 0.02884171665897554,
      "grad_norm": 0.29069867730140686,
      "learning_rate": 5.767012687427913e-05,
      "loss": 0.9725,
      "step": 500
    },
    {
      "epoch": 0.0291301338255653,
      "grad_norm": 0.2819552421569824,
      "learning_rate": 5.8246828143021916e-05,
      "loss": 0.9482,
      "step": 505
    },
    {
      "epoch": 0.029418550992155054,
      "grad_norm": 0.311065673828125,
      "learning_rate": 5.882352941176471e-05,
      "loss": 1.0066,
      "step": 510
    },
    {
      "epoch": 0.02970696815874481,
      "grad_norm": 0.3073347508907318,
      "learning_rate": 5.940023068050749e-05,
      "loss": 1.0394,
      "step": 515
    },
    {
      "epoch": 0.029995385325334564,
      "grad_norm": 0.29304638504981995,
      "learning_rate": 5.997693194925029e-05,
      "loss": 0.8912,
      "step": 520
    },
    {
      "epoch": 0.03028380249192432,
      "grad_norm": 0.29948490858078003,
      "learning_rate": 6.0553633217993076e-05,
      "loss": 1.071,
      "step": 525
    },
    {
      "epoch": 0.030572219658514074,
      "grad_norm": 0.3014158606529236,
      "learning_rate": 6.113033448673587e-05,
      "loss": 0.9749,
      "step": 530
    },
    {
      "epoch": 0.03086063682510383,
      "grad_norm": 0.30555838346481323,
      "learning_rate": 6.170703575547867e-05,
      "loss": 1.0307,
      "step": 535
    },
    {
      "epoch": 0.031149053991693584,
      "grad_norm": 0.29762470722198486,
      "learning_rate": 6.228373702422145e-05,
      "loss": 0.9906,
      "step": 540
    },
    {
      "epoch": 0.03143747115828334,
      "grad_norm": 0.30303990840911865,
      "learning_rate": 6.286043829296425e-05,
      "loss": 0.9647,
      "step": 545
    },
    {
      "epoch": 0.031725888324873094,
      "grad_norm": 0.293807715177536,
      "learning_rate": 6.343713956170704e-05,
      "loss": 0.9659,
      "step": 550
    },
    {
      "epoch": 0.03201430549146285,
      "grad_norm": 0.2783466577529907,
      "learning_rate": 6.401384083044983e-05,
      "loss": 0.9687,
      "step": 555
    },
    {
      "epoch": 0.032302722658052604,
      "grad_norm": 0.28931179642677307,
      "learning_rate": 6.459054209919262e-05,
      "loss": 1.015,
      "step": 560
    },
    {
      "epoch": 0.03259113982464236,
      "grad_norm": 0.29564398527145386,
      "learning_rate": 6.516724336793542e-05,
      "loss": 0.9644,
      "step": 565
    },
    {
      "epoch": 0.03287955699123212,
      "grad_norm": 0.28108495473861694,
      "learning_rate": 6.57439446366782e-05,
      "loss": 0.8925,
      "step": 570
    },
    {
      "epoch": 0.033167974157821876,
      "grad_norm": 0.29815274477005005,
      "learning_rate": 6.6320645905421e-05,
      "loss": 0.9802,
      "step": 575
    },
    {
      "epoch": 0.03345639132441163,
      "grad_norm": 0.2788611054420471,
      "learning_rate": 6.689734717416379e-05,
      "loss": 0.9828,
      "step": 580
    },
    {
      "epoch": 0.033744808491001387,
      "grad_norm": 0.292481392621994,
      "learning_rate": 6.747404844290659e-05,
      "loss": 0.9494,
      "step": 585
    },
    {
      "epoch": 0.03403322565759114,
      "grad_norm": 0.3360653221607208,
      "learning_rate": 6.805074971164937e-05,
      "loss": 0.9748,
      "step": 590
    },
    {
      "epoch": 0.0343216428241809,
      "grad_norm": 0.2787121534347534,
      "learning_rate": 6.862745098039216e-05,
      "loss": 1.0023,
      "step": 595
    },
    {
      "epoch": 0.03461005999077065,
      "grad_norm": 0.2681010663509369,
      "learning_rate": 6.920415224913494e-05,
      "loss": 1.0315,
      "step": 600
    },
    {
      "epoch": 0.03489847715736041,
      "grad_norm": 0.2744525372982025,
      "learning_rate": 6.978085351787774e-05,
      "loss": 1.0026,
      "step": 605
    },
    {
      "epoch": 0.03518689432395016,
      "grad_norm": 0.27870893478393555,
      "learning_rate": 7.035755478662054e-05,
      "loss": 1.0194,
      "step": 610
    },
    {
      "epoch": 0.03547531149053992,
      "grad_norm": 0.2868039309978485,
      "learning_rate": 7.093425605536332e-05,
      "loss": 1.0519,
      "step": 615
    },
    {
      "epoch": 0.03576372865712967,
      "grad_norm": 0.2700194716453552,
      "learning_rate": 7.151095732410612e-05,
      "loss": 1.0284,
      "step": 620
    },
    {
      "epoch": 0.03605214582371943,
      "grad_norm": 0.27872154116630554,
      "learning_rate": 7.20876585928489e-05,
      "loss": 0.9432,
      "step": 625
    },
    {
      "epoch": 0.03634056299030918,
      "grad_norm": 0.29088643193244934,
      "learning_rate": 7.26643598615917e-05,
      "loss": 0.9543,
      "step": 630
    },
    {
      "epoch": 0.03662898015689894,
      "grad_norm": 0.29298341274261475,
      "learning_rate": 7.324106113033449e-05,
      "loss": 0.9481,
      "step": 635
    },
    {
      "epoch": 0.03691739732348869,
      "grad_norm": 0.2777993083000183,
      "learning_rate": 7.381776239907729e-05,
      "loss": 1.0225,
      "step": 640
    },
    {
      "epoch": 0.03720581449007845,
      "grad_norm": 0.2685664892196655,
      "learning_rate": 7.439446366782007e-05,
      "loss": 0.9842,
      "step": 645
    },
    {
      "epoch": 0.0374942316566682,
      "grad_norm": 0.2854040861129761,
      "learning_rate": 7.497116493656286e-05,
      "loss": 1.0532,
      "step": 650
    },
    {
      "epoch": 0.03778264882325796,
      "grad_norm": 0.2788238525390625,
      "learning_rate": 7.554786620530564e-05,
      "loss": 1.0158,
      "step": 655
    },
    {
      "epoch": 0.03807106598984772,
      "grad_norm": 0.2755304276943207,
      "learning_rate": 7.612456747404844e-05,
      "loss": 0.9772,
      "step": 660
    },
    {
      "epoch": 0.038359483156437474,
      "grad_norm": 0.35789754986763,
      "learning_rate": 7.670126874279123e-05,
      "loss": 1.0072,
      "step": 665
    },
    {
      "epoch": 0.03864790032302723,
      "grad_norm": 0.26069143414497375,
      "learning_rate": 7.727797001153403e-05,
      "loss": 0.9633,
      "step": 670
    },
    {
      "epoch": 0.038936317489616984,
      "grad_norm": 0.25112205743789673,
      "learning_rate": 7.785467128027682e-05,
      "loss": 0.9779,
      "step": 675
    },
    {
      "epoch": 0.03922473465620674,
      "grad_norm": 0.2676317095756531,
      "learning_rate": 7.843137254901961e-05,
      "loss": 0.9924,
      "step": 680
    },
    {
      "epoch": 0.039513151822796494,
      "grad_norm": 0.26556289196014404,
      "learning_rate": 7.900807381776241e-05,
      "loss": 0.9615,
      "step": 685
    },
    {
      "epoch": 0.03980156898938625,
      "grad_norm": 0.2796500623226166,
      "learning_rate": 7.95847750865052e-05,
      "loss": 0.9579,
      "step": 690
    },
    {
      "epoch": 0.040089986155976004,
      "grad_norm": 0.2656737267971039,
      "learning_rate": 8.016147635524799e-05,
      "loss": 0.9664,
      "step": 695
    },
    {
      "epoch": 0.04037840332256576,
      "grad_norm": 0.27284595370292664,
      "learning_rate": 8.073817762399078e-05,
      "loss": 0.9255,
      "step": 700
    },
    {
      "epoch": 0.040666820489155514,
      "grad_norm": 0.27243107557296753,
      "learning_rate": 8.131487889273358e-05,
      "loss": 1.0724,
      "step": 705
    },
    {
      "epoch": 0.04095523765574527,
      "grad_norm": 0.2859233617782593,
      "learning_rate": 8.189158016147636e-05,
      "loss": 0.9977,
      "step": 710
    },
    {
      "epoch": 0.041243654822335024,
      "grad_norm": 0.25867760181427,
      "learning_rate": 8.246828143021915e-05,
      "loss": 0.9898,
      "step": 715
    },
    {
      "epoch": 0.04153207198892478,
      "grad_norm": 0.4606216549873352,
      "learning_rate": 8.304498269896193e-05,
      "loss": 0.9751,
      "step": 720
    },
    {
      "epoch": 0.041820489155514534,
      "grad_norm": 0.27268925309181213,
      "learning_rate": 8.362168396770473e-05,
      "loss": 0.961,
      "step": 725
    },
    {
      "epoch": 0.04210890632210429,
      "grad_norm": 0.27551740407943726,
      "learning_rate": 8.419838523644751e-05,
      "loss": 1.0218,
      "step": 730
    },
    {
      "epoch": 0.042397323488694044,
      "grad_norm": 0.26447197794914246,
      "learning_rate": 8.477508650519031e-05,
      "loss": 0.8937,
      "step": 735
    },
    {
      "epoch": 0.0426857406552838,
      "grad_norm": 0.27464747428894043,
      "learning_rate": 8.535178777393311e-05,
      "loss": 1.02,
      "step": 740
    },
    {
      "epoch": 0.04297415782187356,
      "grad_norm": 0.2542886435985565,
      "learning_rate": 8.59284890426759e-05,
      "loss": 1.0397,
      "step": 745
    },
    {
      "epoch": 0.043262574988463316,
      "grad_norm": 0.264526903629303,
      "learning_rate": 8.65051903114187e-05,
      "loss": 1.0214,
      "step": 750
    },
    {
      "epoch": 0.04355099215505307,
      "grad_norm": 0.28241610527038574,
      "learning_rate": 8.708189158016148e-05,
      "loss": 0.9854,
      "step": 755
    },
    {
      "epoch": 0.043839409321642826,
      "grad_norm": 0.2658286988735199,
      "learning_rate": 8.765859284890428e-05,
      "loss": 1.0165,
      "step": 760
    },
    {
      "epoch": 0.04412782648823258,
      "grad_norm": 0.2767401933670044,
      "learning_rate": 8.823529411764706e-05,
      "loss": 0.9447,
      "step": 765
    },
    {
      "epoch": 0.044416243654822336,
      "grad_norm": 0.2705742120742798,
      "learning_rate": 8.881199538638986e-05,
      "loss": 1.016,
      "step": 770
    },
    {
      "epoch": 0.04470466082141209,
      "grad_norm": 0.2611309885978699,
      "learning_rate": 8.938869665513265e-05,
      "loss": 0.9275,
      "step": 775
    },
    {
      "epoch": 0.044993077988001846,
      "grad_norm": 0.26546046137809753,
      "learning_rate": 8.996539792387543e-05,
      "loss": 0.9261,
      "step": 780
    },
    {
      "epoch": 0.0452814951545916,
      "grad_norm": 0.2639457583427429,
      "learning_rate": 9.054209919261822e-05,
      "loss": 1.137,
      "step": 785
    },
    {
      "epoch": 0.045569912321181356,
      "grad_norm": 0.2529033124446869,
      "learning_rate": 9.111880046136102e-05,
      "loss": 0.9829,
      "step": 790
    },
    {
      "epoch": 0.04585832948777111,
      "grad_norm": 0.2628052532672882,
      "learning_rate": 9.16955017301038e-05,
      "loss": 1.0721,
      "step": 795
    },
    {
      "epoch": 0.046146746654360866,
      "grad_norm": 0.24454466998577118,
      "learning_rate": 9.22722029988466e-05,
      "loss": 0.9685,
      "step": 800
    },
    {
      "epoch": 0.04643516382095062,
      "grad_norm": 0.2661277651786804,
      "learning_rate": 9.28489042675894e-05,
      "loss": 1.0083,
      "step": 805
    },
    {
      "epoch": 0.046723580987540377,
      "grad_norm": 0.2556845545768738,
      "learning_rate": 9.342560553633218e-05,
      "loss": 0.9418,
      "step": 810
    },
    {
      "epoch": 0.04701199815413013,
      "grad_norm": 0.26760879158973694,
      "learning_rate": 9.400230680507498e-05,
      "loss": 0.9693,
      "step": 815
    },
    {
      "epoch": 0.04730041532071989,
      "grad_norm": 0.27097398042678833,
      "learning_rate": 9.457900807381777e-05,
      "loss": 1.02,
      "step": 820
    },
    {
      "epoch": 0.04758883248730964,
      "grad_norm": 0.25728651881217957,
      "learning_rate": 9.515570934256057e-05,
      "loss": 1.0475,
      "step": 825
    },
    {
      "epoch": 0.0478772496538994,
      "grad_norm": 0.25373902916908264,
      "learning_rate": 9.573241061130335e-05,
      "loss": 1.0506,
      "step": 830
    },
    {
      "epoch": 0.04816566682048916,
      "grad_norm": 0.2508525252342224,
      "learning_rate": 9.630911188004614e-05,
      "loss": 0.9788,
      "step": 835
    },
    {
      "epoch": 0.048454083987078914,
      "grad_norm": 0.25410938262939453,
      "learning_rate": 9.688581314878892e-05,
      "loss": 1.0306,
      "step": 840
    },
    {
      "epoch": 0.04874250115366867,
      "grad_norm": 0.329357385635376,
      "learning_rate": 9.746251441753172e-05,
      "loss": 0.8915,
      "step": 845
    },
    {
      "epoch": 0.049030918320258424,
      "grad_norm": 0.2622867524623871,
      "learning_rate": 9.80392156862745e-05,
      "loss": 1.0084,
      "step": 850
    },
    {
      "epoch": 0.04931933548684818,
      "grad_norm": 0.24103546142578125,
      "learning_rate": 9.86159169550173e-05,
      "loss": 0.9618,
      "step": 855
    },
    {
      "epoch": 0.049607752653437934,
      "grad_norm": 0.24415351450443268,
      "learning_rate": 9.919261822376009e-05,
      "loss": 0.9831,
      "step": 860
    },
    {
      "epoch": 0.04989616982002769,
      "grad_norm": 0.2494598776102066,
      "learning_rate": 9.976931949250289e-05,
      "loss": 0.9796,
      "step": 865
    },
    {
      "epoch": 0.050184586986617444,
      "grad_norm": 0.25438565015792847,
      "learning_rate": 0.00010034602076124569,
      "loss": 0.962,
      "step": 870
    },
    {
      "epoch": 0.0504730041532072,
      "grad_norm": 0.2473691701889038,
      "learning_rate": 0.00010092272202998847,
      "loss": 0.9957,
      "step": 875
    },
    {
      "epoch": 0.050761421319796954,
      "grad_norm": 0.2563316524028778,
      "learning_rate": 0.00010149942329873126,
      "loss": 0.9428,
      "step": 880
    },
    {
      "epoch": 0.05104983848638671,
      "grad_norm": 0.2498437613248825,
      "learning_rate": 0.00010207612456747407,
      "loss": 1.0268,
      "step": 885
    },
    {
      "epoch": 0.051338255652976464,
      "grad_norm": 0.30138102173805237,
      "learning_rate": 0.00010265282583621685,
      "loss": 1.0179,
      "step": 890
    },
    {
      "epoch": 0.05162667281956622,
      "grad_norm": 0.2591732144355774,
      "learning_rate": 0.00010322952710495964,
      "loss": 1.0329,
      "step": 895
    },
    {
      "epoch": 0.051915089986155974,
      "grad_norm": 0.2612927258014679,
      "learning_rate": 0.00010380622837370242,
      "loss": 1.0217,
      "step": 900
    },
    {
      "epoch": 0.05220350715274573,
      "grad_norm": 0.2682620882987976,
      "learning_rate": 0.00010438292964244522,
      "loss": 0.9739,
      "step": 905
    },
    {
      "epoch": 0.052491924319335484,
      "grad_norm": 0.25226083397865295,
      "learning_rate": 0.00010495963091118801,
      "loss": 0.9301,
      "step": 910
    },
    {
      "epoch": 0.05278034148592524,
      "grad_norm": 0.2584647536277771,
      "learning_rate": 0.00010553633217993079,
      "loss": 0.948,
      "step": 915
    },
    {
      "epoch": 0.053068758652515,
      "grad_norm": 0.25511860847473145,
      "learning_rate": 0.00010611303344867358,
      "loss": 0.9927,
      "step": 920
    },
    {
      "epoch": 0.053357175819104756,
      "grad_norm": 0.24508269131183624,
      "learning_rate": 0.00010668973471741639,
      "loss": 0.9725,
      "step": 925
    },
    {
      "epoch": 0.05364559298569451,
      "grad_norm": 0.2486460655927658,
      "learning_rate": 0.00010726643598615918,
      "loss": 0.9572,
      "step": 930
    },
    {
      "epoch": 0.053934010152284266,
      "grad_norm": 0.2520204186439514,
      "learning_rate": 0.00010784313725490196,
      "loss": 1.0018,
      "step": 935
    },
    {
      "epoch": 0.05422242731887402,
      "grad_norm": 0.2417331486940384,
      "learning_rate": 0.00010841983852364477,
      "loss": 0.9141,
      "step": 940
    },
    {
      "epoch": 0.054510844485463776,
      "grad_norm": 0.2488359957933426,
      "learning_rate": 0.00010899653979238756,
      "loss": 1.0062,
      "step": 945
    },
    {
      "epoch": 0.05479926165205353,
      "grad_norm": 0.24121712148189545,
      "learning_rate": 0.00010957324106113034,
      "loss": 1.0109,
      "step": 950
    },
    {
      "epoch": 0.055087678818643286,
      "grad_norm": 0.23568566143512726,
      "learning_rate": 0.00011014994232987313,
      "loss": 1.0166,
      "step": 955
    },
    {
      "epoch": 0.05537609598523304,
      "grad_norm": 0.24867838621139526,
      "learning_rate": 0.00011072664359861593,
      "loss": 0.9547,
      "step": 960
    },
    {
      "epoch": 0.055664513151822796,
      "grad_norm": 0.25426313281059265,
      "learning_rate": 0.00011130334486735871,
      "loss": 0.8802,
      "step": 965
    },
    {
      "epoch": 0.05595293031841255,
      "grad_norm": 0.25565817952156067,
      "learning_rate": 0.0001118800461361015,
      "loss": 1.0254,
      "step": 970
    },
    {
      "epoch": 0.056241347485002306,
      "grad_norm": 0.2581862807273865,
      "learning_rate": 0.00011245674740484428,
      "loss": 0.9911,
      "step": 975
    },
    {
      "epoch": 0.05652976465159206,
      "grad_norm": 0.2568804919719696,
      "learning_rate": 0.0001130334486735871,
      "loss": 0.9868,
      "step": 980
    },
    {
      "epoch": 0.056818181818181816,
      "grad_norm": 0.24871297180652618,
      "learning_rate": 0.00011361014994232988,
      "loss": 0.9995,
      "step": 985
    },
    {
      "epoch": 0.05710659898477157,
      "grad_norm": 0.2451828271150589,
      "learning_rate": 0.00011418685121107266,
      "loss": 1.0186,
      "step": 990
    },
    {
      "epoch": 0.057395016151361326,
      "grad_norm": 0.22989226877689362,
      "learning_rate": 0.00011476355247981545,
      "loss": 0.9973,
      "step": 995
    },
    {
      "epoch": 0.05768343331795108,
      "grad_norm": 0.24537120759487152,
      "learning_rate": 0.00011534025374855826,
      "loss": 0.9782,
      "step": 1000
    },
    {
      "epoch": 0.05797185048454084,
      "grad_norm": 0.2385062724351883,
      "learning_rate": 0.00011591695501730105,
      "loss": 0.9829,
      "step": 1005
    },
    {
      "epoch": 0.0582602676511306,
      "grad_norm": 0.24060112237930298,
      "learning_rate": 0.00011649365628604383,
      "loss": 0.9604,
      "step": 1010
    },
    {
      "epoch": 0.05854868481772035,
      "grad_norm": 0.24133679270744324,
      "learning_rate": 0.00011707035755478663,
      "loss": 1.0636,
      "step": 1015
    },
    {
      "epoch": 0.05883710198431011,
      "grad_norm": 0.24426597356796265,
      "learning_rate": 0.00011764705882352942,
      "loss": 1.03,
      "step": 1020
    },
    {
      "epoch": 0.05912551915089986,
      "grad_norm": 0.24399541318416595,
      "learning_rate": 0.0001182237600922722,
      "loss": 1.0323,
      "step": 1025
    },
    {
      "epoch": 0.05941393631748962,
      "grad_norm": 0.23879031836986542,
      "learning_rate": 0.00011880046136101499,
      "loss": 0.919,
      "step": 1030
    },
    {
      "epoch": 0.059702353484079373,
      "grad_norm": 0.2616792619228363,
      "learning_rate": 0.0001193771626297578,
      "loss": 0.973,
      "step": 1035
    },
    {
      "epoch": 0.05999077065066913,
      "grad_norm": 0.2673472464084625,
      "learning_rate": 0.00011995386389850058,
      "loss": 1.034,
      "step": 1040
    },
    {
      "epoch": 0.060279187817258884,
      "grad_norm": 0.25476494431495667,
      "learning_rate": 0.00012053056516724337,
      "loss": 0.9774,
      "step": 1045
    },
    {
      "epoch": 0.06056760498384864,
      "grad_norm": 0.24083387851715088,
      "learning_rate": 0.00012110726643598615,
      "loss": 0.978,
      "step": 1050
    },
    {
      "epoch": 0.060856022150438394,
      "grad_norm": 0.24353915452957153,
      "learning_rate": 0.00012168396770472896,
      "loss": 0.9743,
      "step": 1055
    },
    {
      "epoch": 0.06114443931702815,
      "grad_norm": 0.21972060203552246,
      "learning_rate": 0.00012226066897347174,
      "loss": 0.9603,
      "step": 1060
    },
    {
      "epoch": 0.061432856483617904,
      "grad_norm": 0.2495606243610382,
      "learning_rate": 0.00012283737024221453,
      "loss": 0.9428,
      "step": 1065
    },
    {
      "epoch": 0.06172127365020766,
      "grad_norm": 0.243063822388649,
      "learning_rate": 0.00012341407151095733,
      "loss": 1.0545,
      "step": 1070
    },
    {
      "epoch": 0.062009690816797414,
      "grad_norm": 0.43066951632499695,
      "learning_rate": 0.00012399077277970013,
      "loss": 1.0169,
      "step": 1075
    },
    {
      "epoch": 0.06229810798338717,
      "grad_norm": 0.2651910185813904,
      "learning_rate": 0.0001245674740484429,
      "loss": 0.9391,
      "step": 1080
    },
    {
      "epoch": 0.06258652514997692,
      "grad_norm": 0.2392721027135849,
      "learning_rate": 0.0001251441753171857,
      "loss": 0.9285,
      "step": 1085
    },
    {
      "epoch": 0.06287494231656668,
      "grad_norm": 0.27125298976898193,
      "learning_rate": 0.0001257208765859285,
      "loss": 1.0027,
      "step": 1090
    },
    {
      "epoch": 0.06316335948315643,
      "grad_norm": 0.23103715479373932,
      "learning_rate": 0.0001262975778546713,
      "loss": 0.9483,
      "step": 1095
    },
    {
      "epoch": 0.06345177664974619,
      "grad_norm": 0.26006826758384705,
      "learning_rate": 0.00012687427912341407,
      "loss": 0.9914,
      "step": 1100
    },
    {
      "epoch": 0.06374019381633594,
      "grad_norm": 0.24031592905521393,
      "learning_rate": 0.00012745098039215687,
      "loss": 0.9377,
      "step": 1105
    },
    {
      "epoch": 0.0640286109829257,
      "grad_norm": 0.23456595838069916,
      "learning_rate": 0.00012802768166089967,
      "loss": 0.9399,
      "step": 1110
    },
    {
      "epoch": 0.06431702814951545,
      "grad_norm": 0.23771512508392334,
      "learning_rate": 0.00012860438292964244,
      "loss": 0.9292,
      "step": 1115
    },
    {
      "epoch": 0.06460544531610521,
      "grad_norm": 0.260166198015213,
      "learning_rate": 0.00012918108419838524,
      "loss": 1.0257,
      "step": 1120
    },
    {
      "epoch": 0.06489386248269496,
      "grad_norm": 0.2397325038909912,
      "learning_rate": 0.00012975778546712804,
      "loss": 0.9911,
      "step": 1125
    },
    {
      "epoch": 0.06518227964928472,
      "grad_norm": 0.2309105545282364,
      "learning_rate": 0.00013033448673587084,
      "loss": 0.945,
      "step": 1130
    },
    {
      "epoch": 0.06547069681587447,
      "grad_norm": 0.2553274631500244,
      "learning_rate": 0.0001309111880046136,
      "loss": 1.0365,
      "step": 1135
    },
    {
      "epoch": 0.06575911398246424,
      "grad_norm": 0.24227184057235718,
      "learning_rate": 0.0001314878892733564,
      "loss": 1.0088,
      "step": 1140
    },
    {
      "epoch": 0.066047531149054,
      "grad_norm": 0.24748703837394714,
      "learning_rate": 0.0001320645905420992,
      "loss": 0.9954,
      "step": 1145
    },
    {
      "epoch": 0.06633594831564375,
      "grad_norm": 0.237881138920784,
      "learning_rate": 0.000132641291810842,
      "loss": 1.0068,
      "step": 1150
    },
    {
      "epoch": 0.06662436548223351,
      "grad_norm": 0.2657199203968048,
      "learning_rate": 0.00013321799307958477,
      "loss": 1.0157,
      "step": 1155
    },
    {
      "epoch": 0.06691278264882326,
      "grad_norm": 0.22322721779346466,
      "learning_rate": 0.00013379469434832757,
      "loss": 0.9121,
      "step": 1160
    },
    {
      "epoch": 0.06720119981541302,
      "grad_norm": 0.2632087171077728,
      "learning_rate": 0.00013437139561707037,
      "loss": 1.0139,
      "step": 1165
    },
    {
      "epoch": 0.06748961698200277,
      "grad_norm": 0.23888643085956573,
      "learning_rate": 0.00013494809688581317,
      "loss": 0.9508,
      "step": 1170
    },
    {
      "epoch": 0.06777803414859253,
      "grad_norm": 0.2553633153438568,
      "learning_rate": 0.00013552479815455594,
      "loss": 0.9303,
      "step": 1175
    },
    {
      "epoch": 0.06806645131518228,
      "grad_norm": 0.23953106999397278,
      "learning_rate": 0.00013610149942329874,
      "loss": 1.0124,
      "step": 1180
    },
    {
      "epoch": 0.06835486848177204,
      "grad_norm": 0.25543469190597534,
      "learning_rate": 0.00013667820069204154,
      "loss": 1.0293,
      "step": 1185
    },
    {
      "epoch": 0.0686432856483618,
      "grad_norm": 0.261433869600296,
      "learning_rate": 0.0001372549019607843,
      "loss": 0.9565,
      "step": 1190
    },
    {
      "epoch": 0.06893170281495155,
      "grad_norm": 0.24583804607391357,
      "learning_rate": 0.0001378316032295271,
      "loss": 1.0264,
      "step": 1195
    },
    {
      "epoch": 0.0692201199815413,
      "grad_norm": 0.24933773279190063,
      "learning_rate": 0.00013840830449826988,
      "loss": 0.9649,
      "step": 1200
    },
    {
      "epoch": 0.06950853714813106,
      "grad_norm": 0.2605167329311371,
      "learning_rate": 0.0001389850057670127,
      "loss": 1.0164,
      "step": 1205
    },
    {
      "epoch": 0.06979695431472081,
      "grad_norm": 0.2488076090812683,
      "learning_rate": 0.00013956170703575548,
      "loss": 1.0082,
      "step": 1210
    },
    {
      "epoch": 0.07008537148131057,
      "grad_norm": 0.2312484085559845,
      "learning_rate": 0.00014013840830449828,
      "loss": 1.0003,
      "step": 1215
    },
    {
      "epoch": 0.07037378864790032,
      "grad_norm": 0.25844693183898926,
      "learning_rate": 0.00014071510957324108,
      "loss": 1.022,
      "step": 1220
    },
    {
      "epoch": 0.07066220581449008,
      "grad_norm": 0.24452704191207886,
      "learning_rate": 0.00014129181084198387,
      "loss": 0.9942,
      "step": 1225
    },
    {
      "epoch": 0.07095062298107983,
      "grad_norm": 0.25007927417755127,
      "learning_rate": 0.00014186851211072665,
      "loss": 0.9771,
      "step": 1230
    },
    {
      "epoch": 0.07123904014766959,
      "grad_norm": 0.22107909619808197,
      "learning_rate": 0.00014244521337946944,
      "loss": 0.9623,
      "step": 1235
    },
    {
      "epoch": 0.07152745731425934,
      "grad_norm": 0.25855445861816406,
      "learning_rate": 0.00014302191464821224,
      "loss": 1.0234,
      "step": 1240
    },
    {
      "epoch": 0.0718158744808491,
      "grad_norm": 0.24878598749637604,
      "learning_rate": 0.00014359861591695501,
      "loss": 0.978,
      "step": 1245
    },
    {
      "epoch": 0.07210429164743885,
      "grad_norm": 0.24573691189289093,
      "learning_rate": 0.0001441753171856978,
      "loss": 1.0046,
      "step": 1250
    },
    {
      "epoch": 0.07239270881402861,
      "grad_norm": 0.24604535102844238,
      "learning_rate": 0.00014475201845444058,
      "loss": 1.0427,
      "step": 1255
    },
    {
      "epoch": 0.07268112598061836,
      "grad_norm": 0.2441253662109375,
      "learning_rate": 0.0001453287197231834,
      "loss": 0.9979,
      "step": 1260
    },
    {
      "epoch": 0.07296954314720812,
      "grad_norm": 0.24777497351169586,
      "learning_rate": 0.00014590542099192618,
      "loss": 1.0299,
      "step": 1265
    },
    {
      "epoch": 0.07325796031379787,
      "grad_norm": 0.24094311892986298,
      "learning_rate": 0.00014648212226066898,
      "loss": 0.9391,
      "step": 1270
    },
    {
      "epoch": 0.07354637748038763,
      "grad_norm": 0.22625485062599182,
      "learning_rate": 0.00014705882352941178,
      "loss": 0.9858,
      "step": 1275
    },
    {
      "epoch": 0.07383479464697738,
      "grad_norm": 0.23727013170719147,
      "learning_rate": 0.00014763552479815458,
      "loss": 0.9819,
      "step": 1280
    },
    {
      "epoch": 0.07412321181356714,
      "grad_norm": 0.2502304017543793,
      "learning_rate": 0.00014821222606689735,
      "loss": 0.9641,
      "step": 1285
    },
    {
      "epoch": 0.0744116289801569,
      "grad_norm": 0.2629458010196686,
      "learning_rate": 0.00014878892733564015,
      "loss": 0.9894,
      "step": 1290
    },
    {
      "epoch": 0.07470004614674665,
      "grad_norm": 0.2599036693572998,
      "learning_rate": 0.00014936562860438295,
      "loss": 1.0051,
      "step": 1295
    },
    {
      "epoch": 0.0749884633133364,
      "grad_norm": 0.26761215925216675,
      "learning_rate": 0.00014994232987312572,
      "loss": 0.9867,
      "step": 1300
    },
    {
      "epoch": 0.07527688047992616,
      "grad_norm": 0.22773049771785736,
      "learning_rate": 0.00015051903114186852,
      "loss": 0.9696,
      "step": 1305
    },
    {
      "epoch": 0.07556529764651591,
      "grad_norm": 0.2541469633579254,
      "learning_rate": 0.0001510957324106113,
      "loss": 0.9657,
      "step": 1310
    },
    {
      "epoch": 0.07585371481310568,
      "grad_norm": 0.24339397251605988,
      "learning_rate": 0.00015167243367935411,
      "loss": 0.9592,
      "step": 1315
    },
    {
      "epoch": 0.07614213197969544,
      "grad_norm": 0.24885432422161102,
      "learning_rate": 0.00015224913494809689,
      "loss": 0.9516,
      "step": 1320
    },
    {
      "epoch": 0.07643054914628519,
      "grad_norm": 0.24829605221748352,
      "learning_rate": 0.00015282583621683968,
      "loss": 0.9578,
      "step": 1325
    },
    {
      "epoch": 0.07671896631287495,
      "grad_norm": 0.2368239462375641,
      "learning_rate": 0.00015340253748558246,
      "loss": 0.9982,
      "step": 1330
    },
    {
      "epoch": 0.0770073834794647,
      "grad_norm": 0.2484530210494995,
      "learning_rate": 0.00015397923875432528,
      "loss": 0.9453,
      "step": 1335
    },
    {
      "epoch": 0.07729580064605446,
      "grad_norm": 0.26049789786338806,
      "learning_rate": 0.00015455594002306805,
      "loss": 1.0236,
      "step": 1340
    },
    {
      "epoch": 0.07758421781264421,
      "grad_norm": 0.24843549728393555,
      "learning_rate": 0.00015513264129181085,
      "loss": 0.9244,
      "step": 1345
    },
    {
      "epoch": 0.07787263497923397,
      "grad_norm": 0.2523777484893799,
      "learning_rate": 0.00015570934256055365,
      "loss": 1.0428,
      "step": 1350
    },
    {
      "epoch": 0.07816105214582372,
      "grad_norm": 0.2532496750354767,
      "learning_rate": 0.00015628604382929645,
      "loss": 0.9755,
      "step": 1355
    },
    {
      "epoch": 0.07844946931241348,
      "grad_norm": 0.24413040280342102,
      "learning_rate": 0.00015686274509803922,
      "loss": 1.0127,
      "step": 1360
    },
    {
      "epoch": 0.07873788647900323,
      "grad_norm": 0.23477444052696228,
      "learning_rate": 0.00015743944636678202,
      "loss": 0.9863,
      "step": 1365
    },
    {
      "epoch": 0.07902630364559299,
      "grad_norm": 0.25841665267944336,
      "learning_rate": 0.00015801614763552482,
      "loss": 0.9707,
      "step": 1370
    },
    {
      "epoch": 0.07931472081218274,
      "grad_norm": 0.2560499310493469,
      "learning_rate": 0.0001585928489042676,
      "loss": 1.0088,
      "step": 1375
    },
    {
      "epoch": 0.0796031379787725,
      "grad_norm": 0.26023638248443604,
      "learning_rate": 0.0001591695501730104,
      "loss": 0.9853,
      "step": 1380
    },
    {
      "epoch": 0.07989155514536225,
      "grad_norm": 0.24000810086727142,
      "learning_rate": 0.00015974625144175316,
      "loss": 0.9841,
      "step": 1385
    },
    {
      "epoch": 0.08017997231195201,
      "grad_norm": 0.243475079536438,
      "learning_rate": 0.00016032295271049598,
      "loss": 0.8794,
      "step": 1390
    },
    {
      "epoch": 0.08046838947854176,
      "grad_norm": 0.2558750510215759,
      "learning_rate": 0.00016089965397923876,
      "loss": 0.9301,
      "step": 1395
    },
    {
      "epoch": 0.08075680664513152,
      "grad_norm": 0.24674943089485168,
      "learning_rate": 0.00016147635524798155,
      "loss": 0.9552,
      "step": 1400
    },
    {
      "epoch": 0.08104522381172127,
      "grad_norm": 0.23435547947883606,
      "learning_rate": 0.00016205305651672435,
      "loss": 0.9505,
      "step": 1405
    },
    {
      "epoch": 0.08133364097831103,
      "grad_norm": 0.24860598146915436,
      "learning_rate": 0.00016262975778546715,
      "loss": 1.0281,
      "step": 1410
    },
    {
      "epoch": 0.08162205814490078,
      "grad_norm": 0.24203436076641083,
      "learning_rate": 0.00016320645905420992,
      "loss": 0.9113,
      "step": 1415
    },
    {
      "epoch": 0.08191047531149054,
      "grad_norm": 0.2528266906738281,
      "learning_rate": 0.00016378316032295272,
      "loss": 0.9578,
      "step": 1420
    },
    {
      "epoch": 0.0821988924780803,
      "grad_norm": 0.29618388414382935,
      "learning_rate": 0.00016435986159169552,
      "loss": 0.982,
      "step": 1425
    },
    {
      "epoch": 0.08248730964467005,
      "grad_norm": 0.248749777674675,
      "learning_rate": 0.0001649365628604383,
      "loss": 0.9963,
      "step": 1430
    },
    {
      "epoch": 0.0827757268112598,
      "grad_norm": 0.25069600343704224,
      "learning_rate": 0.0001655132641291811,
      "loss": 1.0234,
      "step": 1435
    },
    {
      "epoch": 0.08306414397784956,
      "grad_norm": 0.24154260754585266,
      "learning_rate": 0.00016608996539792386,
      "loss": 0.976,
      "step": 1440
    },
    {
      "epoch": 0.08335256114443931,
      "grad_norm": 0.24604587256908417,
      "learning_rate": 0.0001666666666666667,
      "loss": 0.9341,
      "step": 1445
    },
    {
      "epoch": 0.08364097831102907,
      "grad_norm": 0.23897351324558258,
      "learning_rate": 0.00016724336793540946,
      "loss": 1.0146,
      "step": 1450
    },
    {
      "epoch": 0.08392939547761882,
      "grad_norm": 0.24604809284210205,
      "learning_rate": 0.00016782006920415226,
      "loss": 0.916,
      "step": 1455
    },
    {
      "epoch": 0.08421781264420858,
      "grad_norm": 0.2406589388847351,
      "learning_rate": 0.00016839677047289503,
      "loss": 0.9518,
      "step": 1460
    },
    {
      "epoch": 0.08450622981079833,
      "grad_norm": 0.24334654211997986,
      "learning_rate": 0.00016897347174163786,
      "loss": 0.9728,
      "step": 1465
    },
    {
      "epoch": 0.08479464697738809,
      "grad_norm": 0.2420976758003235,
      "learning_rate": 0.00016955017301038063,
      "loss": 0.983,
      "step": 1470
    },
    {
      "epoch": 0.08508306414397784,
      "grad_norm": 0.2604774832725525,
      "learning_rate": 0.00017012687427912343,
      "loss": 0.9632,
      "step": 1475
    },
    {
      "epoch": 0.0853714813105676,
      "grad_norm": 0.24979344010353088,
      "learning_rate": 0.00017070357554786622,
      "loss": 0.9323,
      "step": 1480
    },
    {
      "epoch": 0.08565989847715735,
      "grad_norm": 0.25925835967063904,
      "learning_rate": 0.000171280276816609,
      "loss": 1.0242,
      "step": 1485
    },
    {
      "epoch": 0.08594831564374712,
      "grad_norm": 0.2391650229692459,
      "learning_rate": 0.0001718569780853518,
      "loss": 0.9205,
      "step": 1490
    },
    {
      "epoch": 0.08623673281033688,
      "grad_norm": 0.2430115044116974,
      "learning_rate": 0.00017243367935409457,
      "loss": 0.9971,
      "step": 1495
    },
    {
      "epoch": 0.08652514997692663,
      "grad_norm": 0.23013629019260406,
      "learning_rate": 0.0001730103806228374,
      "loss": 0.9255,
      "step": 1500
    },
    {
      "epoch": 0.08681356714351639,
      "grad_norm": 0.24768148362636566,
      "learning_rate": 0.00017358708189158016,
      "loss": 0.9575,
      "step": 1505
    },
    {
      "epoch": 0.08710198431010614,
      "grad_norm": 0.24201525747776031,
      "learning_rate": 0.00017416378316032296,
      "loss": 0.9346,
      "step": 1510
    },
    {
      "epoch": 0.0873904014766959,
      "grad_norm": 0.24337361752986908,
      "learning_rate": 0.00017474048442906573,
      "loss": 0.9557,
      "step": 1515
    },
    {
      "epoch": 0.08767881864328565,
      "grad_norm": 0.2556352913379669,
      "learning_rate": 0.00017531718569780856,
      "loss": 0.9355,
      "step": 1520
    },
    {
      "epoch": 0.08796723580987541,
      "grad_norm": 0.2400965839624405,
      "learning_rate": 0.00017589388696655133,
      "loss": 1.035,
      "step": 1525
    },
    {
      "epoch": 0.08825565297646516,
      "grad_norm": 0.2564597427845001,
      "learning_rate": 0.00017647058823529413,
      "loss": 1.0211,
      "step": 1530
    },
    {
      "epoch": 0.08854407014305492,
      "grad_norm": 0.24977734684944153,
      "learning_rate": 0.00017704728950403693,
      "loss": 0.9954,
      "step": 1535
    },
    {
      "epoch": 0.08883248730964467,
      "grad_norm": 0.2484363615512848,
      "learning_rate": 0.00017762399077277973,
      "loss": 1.0266,
      "step": 1540
    },
    {
      "epoch": 0.08912090447623443,
      "grad_norm": 0.2474583089351654,
      "learning_rate": 0.0001782006920415225,
      "loss": 1.0453,
      "step": 1545
    },
    {
      "epoch": 0.08940932164282418,
      "grad_norm": 0.26252439618110657,
      "learning_rate": 0.0001787773933102653,
      "loss": 1.0519,
      "step": 1550
    },
    {
      "epoch": 0.08969773880941394,
      "grad_norm": 0.26378124952316284,
      "learning_rate": 0.0001793540945790081,
      "loss": 0.9504,
      "step": 1555
    },
    {
      "epoch": 0.08998615597600369,
      "grad_norm": 0.26493802666664124,
      "learning_rate": 0.00017993079584775087,
      "loss": 0.9936,
      "step": 1560
    },
    {
      "epoch": 0.09027457314259345,
      "grad_norm": 0.2636902928352356,
      "learning_rate": 0.00018050749711649367,
      "loss": 1.0266,
      "step": 1565
    },
    {
      "epoch": 0.0905629903091832,
      "grad_norm": 0.26412469148635864,
      "learning_rate": 0.00018108419838523644,
      "loss": 0.9843,
      "step": 1570
    },
    {
      "epoch": 0.09085140747577296,
      "grad_norm": 0.23820865154266357,
      "learning_rate": 0.00018166089965397926,
      "loss": 0.9403,
      "step": 1575
    },
    {
      "epoch": 0.09113982464236271,
      "grad_norm": 0.24650079011917114,
      "learning_rate": 0.00018223760092272203,
      "loss": 1.086,
      "step": 1580
    },
    {
      "epoch": 0.09142824180895247,
      "grad_norm": 0.2429857850074768,
      "learning_rate": 0.00018281430219146483,
      "loss": 0.9621,
      "step": 1585
    },
    {
      "epoch": 0.09171665897554222,
      "grad_norm": 0.23865139484405518,
      "learning_rate": 0.0001833910034602076,
      "loss": 0.9839,
      "step": 1590
    },
    {
      "epoch": 0.09200507614213198,
      "grad_norm": 0.2494489699602127,
      "learning_rate": 0.00018396770472895043,
      "loss": 1.0557,
      "step": 1595
    },
    {
      "epoch": 0.09229349330872173,
      "grad_norm": 0.247470885515213,
      "learning_rate": 0.0001845444059976932,
      "loss": 0.9255,
      "step": 1600
    },
    {
      "epoch": 0.09258191047531149,
      "grad_norm": 0.2614395022392273,
      "learning_rate": 0.000185121107266436,
      "loss": 0.9396,
      "step": 1605
    },
    {
      "epoch": 0.09287032764190124,
      "grad_norm": 0.2510371208190918,
      "learning_rate": 0.0001856978085351788,
      "loss": 0.9432,
      "step": 1610
    },
    {
      "epoch": 0.093158744808491,
      "grad_norm": 0.26177895069122314,
      "learning_rate": 0.00018627450980392157,
      "loss": 0.9958,
      "step": 1615
    },
    {
      "epoch": 0.09344716197508075,
      "grad_norm": 0.24554099142551422,
      "learning_rate": 0.00018685121107266437,
      "loss": 0.9845,
      "step": 1620
    },
    {
      "epoch": 0.09373557914167051,
      "grad_norm": 0.2630642354488373,
      "learning_rate": 0.00018742791234140714,
      "loss": 0.9659,
      "step": 1625
    },
    {
      "epoch": 0.09402399630826026,
      "grad_norm": 0.2493613213300705,
      "learning_rate": 0.00018800461361014997,
      "loss": 1.0318,
      "step": 1630
    },
    {
      "epoch": 0.09431241347485002,
      "grad_norm": 0.25541171431541443,
      "learning_rate": 0.00018858131487889274,
      "loss": 0.9631,
      "step": 1635
    },
    {
      "epoch": 0.09460083064143977,
      "grad_norm": 0.25047364830970764,
      "learning_rate": 0.00018915801614763554,
      "loss": 0.9596,
      "step": 1640
    },
    {
      "epoch": 0.09488924780802953,
      "grad_norm": 0.25506365299224854,
      "learning_rate": 0.0001897347174163783,
      "loss": 0.9745,
      "step": 1645
    },
    {
      "epoch": 0.09517766497461928,
      "grad_norm": 0.25513169169425964,
      "learning_rate": 0.00019031141868512113,
      "loss": 0.9668,
      "step": 1650
    },
    {
      "epoch": 0.09546608214120904,
      "grad_norm": 0.27256086468696594,
      "learning_rate": 0.0001908881199538639,
      "loss": 0.9768,
      "step": 1655
    },
    {
      "epoch": 0.0957544993077988,
      "grad_norm": 0.2694586217403412,
      "learning_rate": 0.0001914648212226067,
      "loss": 0.9661,
      "step": 1660
    },
    {
      "epoch": 0.09604291647438856,
      "grad_norm": 0.2510400414466858,
      "learning_rate": 0.00019204152249134948,
      "loss": 0.972,
      "step": 1665
    },
    {
      "epoch": 0.09633133364097832,
      "grad_norm": 0.25899869203567505,
      "learning_rate": 0.00019261822376009227,
      "loss": 0.932,
      "step": 1670
    },
    {
      "epoch": 0.09661975080756807,
      "grad_norm": 0.2670111060142517,
      "learning_rate": 0.00019319492502883507,
      "loss": 1.0586,
      "step": 1675
    },
    {
      "epoch": 0.09690816797415783,
      "grad_norm": 0.2525533437728882,
      "learning_rate": 0.00019377162629757784,
      "loss": 1.0007,
      "step": 1680
    },
    {
      "epoch": 0.09719658514074758,
      "grad_norm": 0.262040376663208,
      "learning_rate": 0.00019434832756632067,
      "loss": 0.9174,
      "step": 1685
    },
    {
      "epoch": 0.09748500230733734,
      "grad_norm": 0.2837670147418976,
      "learning_rate": 0.00019492502883506344,
      "loss": 0.8906,
      "step": 1690
    },
    {
      "epoch": 0.09777341947392709,
      "grad_norm": 0.2778575122356415,
      "learning_rate": 0.00019550173010380624,
      "loss": 0.9947,
      "step": 1695
    },
    {
      "epoch": 0.09806183664051685,
      "grad_norm": 0.24817965924739838,
      "learning_rate": 0.000196078431372549,
      "loss": 1.0096,
      "step": 1700
    },
    {
      "epoch": 0.0983502538071066,
      "grad_norm": 0.2699022591114044,
      "learning_rate": 0.00019665513264129184,
      "loss": 1.0408,
      "step": 1705
    },
    {
      "epoch": 0.09863867097369636,
      "grad_norm": 0.26346924901008606,
      "learning_rate": 0.0001972318339100346,
      "loss": 1.0206,
      "step": 1710
    },
    {
      "epoch": 0.09892708814028611,
      "grad_norm": 0.2642849087715149,
      "learning_rate": 0.0001978085351787774,
      "loss": 0.9985,
      "step": 1715
    },
    {
      "epoch": 0.09921550530687587,
      "grad_norm": 0.27539825439453125,
      "learning_rate": 0.00019838523644752018,
      "loss": 0.9614,
      "step": 1720
    },
    {
      "epoch": 0.09950392247346562,
      "grad_norm": 0.25085148215293884,
      "learning_rate": 0.000198961937716263,
      "loss": 0.9855,
      "step": 1725
    },
    {
      "epoch": 0.09979233964005538,
      "grad_norm": 0.2658745348453522,
      "learning_rate": 0.00019953863898500578,
      "loss": 0.9996,
      "step": 1730
    },
    {
      "epoch": 0.10008075680664513,
      "grad_norm": 0.2539004981517792,
      "learning_rate": 0.00019999999797274117,
      "loss": 0.9276,
      "step": 1735
    },
    {
      "epoch": 0.10036917397323489,
      "grad_norm": 0.2977031469345093,
      "learning_rate": 0.0001999999270186907,
      "loss": 0.991,
      "step": 1740
    },
    {
      "epoch": 0.10065759113982464,
      "grad_norm": 0.279045045375824,
      "learning_rate": 0.0001999997547017808,
      "loss": 0.9691,
      "step": 1745
    },
    {
      "epoch": 0.1009460083064144,
      "grad_norm": 0.2583720088005066,
      "learning_rate": 0.0001999994810221862,
      "loss": 0.9513,
      "step": 1750
    },
    {
      "epoch": 0.10123442547300415,
      "grad_norm": 0.2970728874206543,
      "learning_rate": 0.00019999910598018426,
      "loss": 0.9861,
      "step": 1755
    },
    {
      "epoch": 0.10152284263959391,
      "grad_norm": 0.25419795513153076,
      "learning_rate": 0.00019999862957615513,
      "loss": 1.0042,
      "step": 1760
    },
    {
      "epoch": 0.10181125980618366,
      "grad_norm": 0.26986587047576904,
      "learning_rate": 0.00019999805181058176,
      "loss": 0.9627,
      "step": 1765
    },
    {
      "epoch": 0.10209967697277342,
      "grad_norm": 0.2580127418041229,
      "learning_rate": 0.00019999737268404973,
      "loss": 1.0264,
      "step": 1770
    },
    {
      "epoch": 0.10238809413936317,
      "grad_norm": 0.25490108132362366,
      "learning_rate": 0.00019999659219724749,
      "loss": 0.9655,
      "step": 1775
    },
    {
      "epoch": 0.10267651130595293,
      "grad_norm": 0.2768772840499878,
      "learning_rate": 0.00019999571035096608,
      "loss": 1.0224,
      "step": 1780
    },
    {
      "epoch": 0.10296492847254268,
      "grad_norm": 0.25926682353019714,
      "learning_rate": 0.00019999472714609943,
      "loss": 0.9058,
      "step": 1785
    },
    {
      "epoch": 0.10325334563913244,
      "grad_norm": 0.2662297487258911,
      "learning_rate": 0.00019999364258364413,
      "loss": 0.9776,
      "step": 1790
    },
    {
      "epoch": 0.10354176280572219,
      "grad_norm": 0.2684202194213867,
      "learning_rate": 0.0001999924566646995,
      "loss": 0.9563,
      "step": 1795
    },
    {
      "epoch": 0.10383017997231195,
      "grad_norm": 0.25693777203559875,
      "learning_rate": 0.00019999116939046764,
      "loss": 1.0355,
      "step": 1800
    },
    {
      "epoch": 0.1041185971389017,
      "grad_norm": 0.24468928575515747,
      "learning_rate": 0.0001999897807622534,
      "loss": 1.0907,
      "step": 1805
    },
    {
      "epoch": 0.10440701430549146,
      "grad_norm": 0.25416669249534607,
      "learning_rate": 0.0001999882907814643,
      "loss": 1.0232,
      "step": 1810
    },
    {
      "epoch": 0.10469543147208121,
      "grad_norm": 0.27336063981056213,
      "learning_rate": 0.00019998669944961062,
      "loss": 0.9219,
      "step": 1815
    },
    {
      "epoch": 0.10498384863867097,
      "grad_norm": 0.26575300097465515,
      "learning_rate": 0.0001999850067683054,
      "loss": 0.9423,
      "step": 1820
    },
    {
      "epoch": 0.10527226580526072,
      "grad_norm": 0.2702259421348572,
      "learning_rate": 0.00019998321273926437,
      "loss": 1.0044,
      "step": 1825
    },
    {
      "epoch": 0.10556068297185048,
      "grad_norm": 0.25690004229545593,
      "learning_rate": 0.00019998131736430604,
      "loss": 0.9728,
      "step": 1830
    },
    {
      "epoch": 0.10584910013844025,
      "grad_norm": 0.27018973231315613,
      "learning_rate": 0.00019997932064535158,
      "loss": 1.0005,
      "step": 1835
    },
    {
      "epoch": 0.10613751730503,
      "grad_norm": 0.276635080575943,
      "learning_rate": 0.00019997722258442499,
      "loss": 0.9643,
      "step": 1840
    },
    {
      "epoch": 0.10642593447161976,
      "grad_norm": 0.2834376394748688,
      "learning_rate": 0.00019997502318365286,
      "loss": 0.9691,
      "step": 1845
    },
    {
      "epoch": 0.10671435163820951,
      "grad_norm": 0.2709560692310333,
      "learning_rate": 0.00019997272244526456,
      "loss": 0.9281,
      "step": 1850
    },
    {
      "epoch": 0.10700276880479927,
      "grad_norm": 0.2817580997943878,
      "learning_rate": 0.00019997032037159224,
      "loss": 1.0373,
      "step": 1855
    },
    {
      "epoch": 0.10729118597138902,
      "grad_norm": 0.27111148834228516,
      "learning_rate": 0.00019996781696507069,
      "loss": 1.0148,
      "step": 1860
    },
    {
      "epoch": 0.10757960313797878,
      "grad_norm": 0.25952383875846863,
      "learning_rate": 0.00019996521222823743,
      "loss": 0.9546,
      "step": 1865
    },
    {
      "epoch": 0.10786802030456853,
      "grad_norm": 0.2788892686367035,
      "learning_rate": 0.00019996250616373268,
      "loss": 0.9801,
      "step": 1870
    },
    {
      "epoch": 0.10815643747115829,
      "grad_norm": 1.3752487897872925,
      "learning_rate": 0.00019995969877429945,
      "loss": 0.9122,
      "step": 1875
    },
    {
      "epoch": 0.10844485463774804,
      "grad_norm": 0.2783893048763275,
      "learning_rate": 0.0001999567900627833,
      "loss": 1.0063,
      "step": 1880
    },
    {
      "epoch": 0.1087332718043378,
      "grad_norm": 0.27742305397987366,
      "learning_rate": 0.0001999537800321327,
      "loss": 0.9959,
      "step": 1885
    },
    {
      "epoch": 0.10902168897092755,
      "grad_norm": 0.2651984691619873,
      "learning_rate": 0.0001999506686853986,
      "loss": 1.007,
      "step": 1890
    },
    {
      "epoch": 0.10931010613751731,
      "grad_norm": 0.25715208053588867,
      "learning_rate": 0.0001999474560257348,
      "loss": 0.9855,
      "step": 1895
    },
    {
      "epoch": 0.10959852330410706,
      "grad_norm": 0.27990275621414185,
      "learning_rate": 0.00019994414205639775,
      "loss": 0.9599,
      "step": 1900
    },
    {
      "epoch": 0.10988694047069682,
      "grad_norm": 0.25654786825180054,
      "learning_rate": 0.00019994072678074655,
      "loss": 0.9955,
      "step": 1905
    },
    {
      "epoch": 0.11017535763728657,
      "grad_norm": 0.28725671768188477,
      "learning_rate": 0.00019993721020224308,
      "loss": 0.9419,
      "step": 1910
    },
    {
      "epoch": 0.11046377480387633,
      "grad_norm": 0.25918087363243103,
      "learning_rate": 0.00019993359232445176,
      "loss": 0.9585,
      "step": 1915
    },
    {
      "epoch": 0.11075219197046608,
      "grad_norm": 0.25459691882133484,
      "learning_rate": 0.0001999298731510399,
      "loss": 0.9382,
      "step": 1920
    },
    {
      "epoch": 0.11104060913705584,
      "grad_norm": 0.2630646526813507,
      "learning_rate": 0.00019992605268577727,
      "loss": 0.9103,
      "step": 1925
    },
    {
      "epoch": 0.11132902630364559,
      "grad_norm": 0.2786347270011902,
      "learning_rate": 0.00019992213093253643,
      "loss": 1.0174,
      "step": 1930
    },
    {
      "epoch": 0.11161744347023535,
      "grad_norm": 0.25533023476600647,
      "learning_rate": 0.00019991810789529257,
      "loss": 1.003,
      "step": 1935
    },
    {
      "epoch": 0.1119058606368251,
      "grad_norm": 0.2641088366508484,
      "learning_rate": 0.0001999139835781236,
      "loss": 0.9767,
      "step": 1940
    },
    {
      "epoch": 0.11219427780341486,
      "grad_norm": 0.2834392189979553,
      "learning_rate": 0.00019990975798521,
      "loss": 1.0413,
      "step": 1945
    },
    {
      "epoch": 0.11248269497000461,
      "grad_norm": 0.29145047068595886,
      "learning_rate": 0.00019990543112083503,
      "loss": 0.9319,
      "step": 1950
    },
    {
      "epoch": 0.11277111213659437,
      "grad_norm": 0.2648943364620209,
      "learning_rate": 0.00019990100298938442,
      "loss": 0.9541,
      "step": 1955
    },
    {
      "epoch": 0.11305952930318412,
      "grad_norm": 0.2761361002922058,
      "learning_rate": 0.00019989647359534672,
      "loss": 1.041,
      "step": 1960
    },
    {
      "epoch": 0.11334794646977388,
      "grad_norm": 0.26408612728118896,
      "learning_rate": 0.00019989184294331308,
      "loss": 0.9914,
      "step": 1965
    },
    {
      "epoch": 0.11363636363636363,
      "grad_norm": 0.28646019101142883,
      "learning_rate": 0.0001998871110379772,
      "loss": 1.0491,
      "step": 1970
    },
    {
      "epoch": 0.11392478080295339,
      "grad_norm": 0.28368857502937317,
      "learning_rate": 0.0001998822778841355,
      "loss": 1.0156,
      "step": 1975
    },
    {
      "epoch": 0.11421319796954314,
      "grad_norm": 0.2637539207935333,
      "learning_rate": 0.00019987734348668706,
      "loss": 0.9229,
      "step": 1980
    },
    {
      "epoch": 0.1145016151361329,
      "grad_norm": 0.3021569848060608,
      "learning_rate": 0.00019987230785063344,
      "loss": 1.0092,
      "step": 1985
    },
    {
      "epoch": 0.11479003230272265,
      "grad_norm": 0.2628127336502075,
      "learning_rate": 0.00019986717098107896,
      "loss": 0.9768,
      "step": 1990
    },
    {
      "epoch": 0.11507844946931241,
      "grad_norm": 0.2722758948802948,
      "learning_rate": 0.0001998619328832305,
      "loss": 1.0138,
      "step": 1995
    },
    {
      "epoch": 0.11536686663590216,
      "grad_norm": 0.261016845703125,
      "learning_rate": 0.00019985659356239758,
      "loss": 1.0576,
      "step": 2000
    },
    {
      "epoch": 0.11565528380249192,
      "grad_norm": 0.26313841342926025,
      "learning_rate": 0.0001998511530239922,
      "loss": 0.9934,
      "step": 2005
    },
    {
      "epoch": 0.11594370096908169,
      "grad_norm": 0.2713305354118347,
      "learning_rate": 0.00019984561127352914,
      "loss": 1.0219,
      "step": 2010
    },
    {
      "epoch": 0.11623211813567144,
      "grad_norm": 0.24656793475151062,
      "learning_rate": 0.00019983996831662566,
      "loss": 1.0266,
      "step": 2015
    },
    {
      "epoch": 0.1165205353022612,
      "grad_norm": 0.259756863117218,
      "learning_rate": 0.00019983422415900158,
      "loss": 1.0254,
      "step": 2020
    },
    {
      "epoch": 0.11680895246885095,
      "grad_norm": 0.2553316652774811,
      "learning_rate": 0.0001998283788064794,
      "loss": 0.9304,
      "step": 2025
    },
    {
      "epoch": 0.1170973696354407,
      "grad_norm": 0.27868157625198364,
      "learning_rate": 0.00019982243226498411,
      "loss": 1.0193,
      "step": 2030
    },
    {
      "epoch": 0.11738578680203046,
      "grad_norm": 0.2899937927722931,
      "learning_rate": 0.00019981638454054333,
      "loss": 0.8705,
      "step": 2035
    },
    {
      "epoch": 0.11767420396862022,
      "grad_norm": 0.2696991264820099,
      "learning_rate": 0.00019981023563928716,
      "loss": 0.9649,
      "step": 2040
    },
    {
      "epoch": 0.11796262113520997,
      "grad_norm": 0.26514795422554016,
      "learning_rate": 0.00019980398556744837,
      "loss": 0.9288,
      "step": 2045
    },
    {
      "epoch": 0.11825103830179973,
      "grad_norm": 0.2759961187839508,
      "learning_rate": 0.00019979763433136216,
      "loss": 0.9752,
      "step": 2050
    },
    {
      "epoch": 0.11853945546838948,
      "grad_norm": 0.24151116609573364,
      "learning_rate": 0.00019979118193746637,
      "loss": 0.9837,
      "step": 2055
    },
    {
      "epoch": 0.11882787263497924,
      "grad_norm": 0.2888840436935425,
      "learning_rate": 0.00019978462839230133,
      "loss": 1.0506,
      "step": 2060
    },
    {
      "epoch": 0.11911628980156899,
      "grad_norm": 0.258368581533432,
      "learning_rate": 0.00019977797370250986,
      "loss": 0.9598,
      "step": 2065
    },
    {
      "epoch": 0.11940470696815875,
      "grad_norm": 0.27287793159484863,
      "learning_rate": 0.0001997712178748374,
      "loss": 1.0012,
      "step": 2070
    },
    {
      "epoch": 0.1196931241347485,
      "grad_norm": 0.2549577057361603,
      "learning_rate": 0.00019976436091613184,
      "loss": 1.0228,
      "step": 2075
    },
    {
      "epoch": 0.11998154130133826,
      "grad_norm": 0.26153385639190674,
      "learning_rate": 0.0001997574028333436,
      "loss": 0.9636,
      "step": 2080
    },
    {
      "epoch": 0.12026995846792801,
      "grad_norm": 0.2726786136627197,
      "learning_rate": 0.00019975034363352556,
      "loss": 0.9345,
      "step": 2085
    },
    {
      "epoch": 0.12055837563451777,
      "grad_norm": 0.27283257246017456,
      "learning_rate": 0.0001997431833238332,
      "loss": 0.9742,
      "step": 2090
    },
    {
      "epoch": 0.12084679280110752,
      "grad_norm": 0.35528162121772766,
      "learning_rate": 0.00019973592191152437,
      "loss": 1.0162,
      "step": 2095
    },
    {
      "epoch": 0.12113520996769728,
      "grad_norm": 0.2918716371059418,
      "learning_rate": 0.00019972855940395947,
      "loss": 1.0201,
      "step": 2100
    },
    {
      "epoch": 0.12142362713428703,
      "grad_norm": 0.28611305356025696,
      "learning_rate": 0.00019972109580860132,
      "loss": 0.9773,
      "step": 2105
    },
    {
      "epoch": 0.12171204430087679,
      "grad_norm": 0.2829110026359558,
      "learning_rate": 0.00019971353113301527,
      "loss": 1.0952,
      "step": 2110
    },
    {
      "epoch": 0.12200046146746654,
      "grad_norm": 0.26948046684265137,
      "learning_rate": 0.0001997058653848691,
      "loss": 1.0,
      "step": 2115
    },
    {
      "epoch": 0.1222888786340563,
      "grad_norm": 0.259901762008667,
      "learning_rate": 0.00019969809857193306,
      "loss": 0.9584,
      "step": 2120
    },
    {
      "epoch": 0.12257729580064605,
      "grad_norm": 0.2724592387676239,
      "learning_rate": 0.00019969023070207973,
      "loss": 0.9421,
      "step": 2125
    },
    {
      "epoch": 0.12286571296723581,
      "grad_norm": 0.26687607169151306,
      "learning_rate": 0.0001996822617832843,
      "loss": 0.9197,
      "step": 2130
    },
    {
      "epoch": 0.12315413013382556,
      "grad_norm": 0.28086045384407043,
      "learning_rate": 0.00019967419182362429,
      "loss": 0.9574,
      "step": 2135
    },
    {
      "epoch": 0.12344254730041532,
      "grad_norm": 0.27749550342559814,
      "learning_rate": 0.0001996660208312796,
      "loss": 0.9948,
      "step": 2140
    },
    {
      "epoch": 0.12373096446700507,
      "grad_norm": 0.26183804869651794,
      "learning_rate": 0.00019965774881453263,
      "loss": 1.0297,
      "step": 2145
    },
    {
      "epoch": 0.12401938163359483,
      "grad_norm": 0.2577550411224365,
      "learning_rate": 0.00019964937578176816,
      "loss": 0.9852,
      "step": 2150
    },
    {
      "epoch": 0.12430779880018458,
      "grad_norm": 0.279245525598526,
      "learning_rate": 0.00019964090174147327,
      "loss": 0.9754,
      "step": 2155
    },
    {
      "epoch": 0.12459621596677434,
      "grad_norm": 0.2758920192718506,
      "learning_rate": 0.00019963232670223752,
      "loss": 0.9894,
      "step": 2160
    },
    {
      "epoch": 0.12488463313336409,
      "grad_norm": 0.29135221242904663,
      "learning_rate": 0.00019962365067275286,
      "loss": 0.9535,
      "step": 2165
    },
    {
      "epoch": 0.12517305029995385,
      "grad_norm": 0.2922044098377228,
      "learning_rate": 0.00019961487366181355,
      "loss": 0.9631,
      "step": 2170
    },
    {
      "epoch": 0.1254614674665436,
      "grad_norm": 0.2769213020801544,
      "learning_rate": 0.0001996059956783162,
      "loss": 1.0147,
      "step": 2175
    },
    {
      "epoch": 0.12574988463313336,
      "grad_norm": 0.26981329917907715,
      "learning_rate": 0.00019959701673125983,
      "loss": 1.0227,
      "step": 2180
    },
    {
      "epoch": 0.1260383017997231,
      "grad_norm": 0.27635276317596436,
      "learning_rate": 0.00019958793682974574,
      "loss": 0.9745,
      "step": 2185
    },
    {
      "epoch": 0.12632671896631287,
      "grad_norm": 0.28650912642478943,
      "learning_rate": 0.00019957875598297759,
      "loss": 1.0018,
      "step": 2190
    },
    {
      "epoch": 0.12661513613290262,
      "grad_norm": 0.26457536220550537,
      "learning_rate": 0.00019956947420026136,
      "loss": 1.0461,
      "step": 2195
    },
    {
      "epoch": 0.12690355329949238,
      "grad_norm": 0.28600943088531494,
      "learning_rate": 0.00019956009149100533,
      "loss": 0.9647,
      "step": 2200
    },
    {
      "epoch": 0.12719197046608213,
      "grad_norm": 0.2786143720149994,
      "learning_rate": 0.00019955060786472012,
      "loss": 0.9247,
      "step": 2205
    },
    {
      "epoch": 0.1274803876326719,
      "grad_norm": 0.2701742351055145,
      "learning_rate": 0.00019954102333101856,
      "loss": 0.9729,
      "step": 2210
    },
    {
      "epoch": 0.12776880479926164,
      "grad_norm": 0.28489363193511963,
      "learning_rate": 0.00019953133789961584,
      "loss": 0.9782,
      "step": 2215
    },
    {
      "epoch": 0.1280572219658514,
      "grad_norm": 0.26730117201805115,
      "learning_rate": 0.0001995215515803294,
      "loss": 0.9715,
      "step": 2220
    },
    {
      "epoch": 0.12834563913244115,
      "grad_norm": 0.28904202580451965,
      "learning_rate": 0.00019951166438307894,
      "loss": 0.9835,
      "step": 2225
    },
    {
      "epoch": 0.1286340562990309,
      "grad_norm": 0.2703316807746887,
      "learning_rate": 0.00019950167631788642,
      "loss": 0.9696,
      "step": 2230
    },
    {
      "epoch": 0.12892247346562066,
      "grad_norm": 0.28106942772865295,
      "learning_rate": 0.000199491587394876,
      "loss": 0.9521,
      "step": 2235
    },
    {
      "epoch": 0.12921089063221042,
      "grad_norm": 0.27781790494918823,
      "learning_rate": 0.00019948139762427416,
      "loss": 0.9942,
      "step": 2240
    },
    {
      "epoch": 0.12949930779880017,
      "grad_norm": 0.26624271273612976,
      "learning_rate": 0.00019947110701640952,
      "loss": 0.9662,
      "step": 2245
    },
    {
      "epoch": 0.12978772496538993,
      "grad_norm": 0.2619341015815735,
      "learning_rate": 0.000199460715581713,
      "loss": 0.9083,
      "step": 2250
    },
    {
      "epoch": 0.13007614213197968,
      "grad_norm": 0.2704095244407654,
      "learning_rate": 0.00019945022333071752,
      "loss": 1.0528,
      "step": 2255
    },
    {
      "epoch": 0.13036455929856944,
      "grad_norm": 0.2684679627418518,
      "learning_rate": 0.0001994396302740585,
      "loss": 0.9707,
      "step": 2260
    },
    {
      "epoch": 0.1306529764651592,
      "grad_norm": 0.2747194766998291,
      "learning_rate": 0.00019942893642247326,
      "loss": 0.9843,
      "step": 2265
    },
    {
      "epoch": 0.13094139363174895,
      "grad_norm": 0.27362656593322754,
      "learning_rate": 0.00019941814178680144,
      "loss": 1.0139,
      "step": 2270
    },
    {
      "epoch": 0.13122981079833873,
      "grad_norm": 0.2812444567680359,
      "learning_rate": 0.00019940724637798477,
      "loss": 0.9368,
      "step": 2275
    },
    {
      "epoch": 0.13151822796492849,
      "grad_norm": 0.2766329050064087,
      "learning_rate": 0.00019939625020706724,
      "loss": 0.9932,
      "step": 2280
    },
    {
      "epoch": 0.13180664513151824,
      "grad_norm": 0.27966558933258057,
      "learning_rate": 0.0001993851532851948,
      "loss": 0.9762,
      "step": 2285
    },
    {
      "epoch": 0.132095062298108,
      "grad_norm": 0.2908051908016205,
      "learning_rate": 0.00019937395562361564,
      "loss": 1.0419,
      "step": 2290
    },
    {
      "epoch": 0.13238347946469775,
      "grad_norm": 0.272611528635025,
      "learning_rate": 0.0001993626572336801,
      "loss": 0.9563,
      "step": 2295
    },
    {
      "epoch": 0.1326718966312875,
      "grad_norm": 0.2815663814544678,
      "learning_rate": 0.00019935125812684047,
      "loss": 0.9881,
      "step": 2300
    },
    {
      "epoch": 0.13296031379787726,
      "grad_norm": 0.28436169028282166,
      "learning_rate": 0.0001993397583146513,
      "loss": 1.0,
      "step": 2305
    },
    {
      "epoch": 0.13324873096446702,
      "grad_norm": 0.2669413983821869,
      "learning_rate": 0.00019932815780876904,
      "loss": 0.9727,
      "step": 2310
    },
    {
      "epoch": 0.13353714813105677,
      "grad_norm": 0.2894003093242645,
      "learning_rate": 0.00019931645662095237,
      "loss": 0.9613,
      "step": 2315
    },
    {
      "epoch": 0.13382556529764653,
      "grad_norm": 0.27110880613327026,
      "learning_rate": 0.00019930465476306197,
      "loss": 0.9912,
      "step": 2320
    },
    {
      "epoch": 0.13411398246423628,
      "grad_norm": 0.28134435415267944,
      "learning_rate": 0.0001992927522470605,
      "loss": 1.0183,
      "step": 2325
    },
    {
      "epoch": 0.13440239963082604,
      "grad_norm": 0.2562038004398346,
      "learning_rate": 0.00019928074908501272,
      "loss": 0.9604,
      "step": 2330
    },
    {
      "epoch": 0.1346908167974158,
      "grad_norm": 0.3024313747882843,
      "learning_rate": 0.0001992686452890854,
      "loss": 0.9825,
      "step": 2335
    },
    {
      "epoch": 0.13497923396400555,
      "grad_norm": 0.28540000319480896,
      "learning_rate": 0.00019925644087154734,
      "loss": 0.9882,
      "step": 2340
    },
    {
      "epoch": 0.1352676511305953,
      "grad_norm": 0.26603230834007263,
      "learning_rate": 0.0001992441358447692,
      "loss": 0.9883,
      "step": 2345
    },
    {
      "epoch": 0.13555606829718506,
      "grad_norm": 0.28682613372802734,
      "learning_rate": 0.00019923173022122378,
      "loss": 0.9404,
      "step": 2350
    },
    {
      "epoch": 0.1358444854637748,
      "grad_norm": 0.29595518112182617,
      "learning_rate": 0.00019921922401348576,
      "loss": 0.963,
      "step": 2355
    },
    {
      "epoch": 0.13613290263036457,
      "grad_norm": 0.2716725468635559,
      "learning_rate": 0.00019920661723423183,
      "loss": 0.9273,
      "step": 2360
    },
    {
      "epoch": 0.13642131979695432,
      "grad_norm": 0.2717735469341278,
      "learning_rate": 0.00019919390989624054,
      "loss": 0.9808,
      "step": 2365
    },
    {
      "epoch": 0.13670973696354408,
      "grad_norm": 0.3040509521961212,
      "learning_rate": 0.00019918110201239247,
      "loss": 1.0277,
      "step": 2370
    },
    {
      "epoch": 0.13699815413013383,
      "grad_norm": 0.2620179355144501,
      "learning_rate": 0.00019916819359567001,
      "loss": 1.0213,
      "step": 2375
    },
    {
      "epoch": 0.1372865712967236,
      "grad_norm": 0.3165864646434784,
      "learning_rate": 0.00019915518465915758,
      "loss": 0.9429,
      "step": 2380
    },
    {
      "epoch": 0.13757498846331334,
      "grad_norm": 0.311599999666214,
      "learning_rate": 0.0001991420752160414,
      "loss": 1.0413,
      "step": 2385
    },
    {
      "epoch": 0.1378634056299031,
      "grad_norm": 0.2847161293029785,
      "learning_rate": 0.00019912886527960954,
      "loss": 0.99,
      "step": 2390
    },
    {
      "epoch": 0.13815182279649285,
      "grad_norm": 0.2932097017765045,
      "learning_rate": 0.00019911555486325203,
      "loss": 1.0477,
      "step": 2395
    },
    {
      "epoch": 0.1384402399630826,
      "grad_norm": 0.269297331571579,
      "learning_rate": 0.0001991021439804607,
      "loss": 1.0069,
      "step": 2400
    },
    {
      "epoch": 0.13872865712967236,
      "grad_norm": 0.2708551287651062,
      "learning_rate": 0.00019908863264482917,
      "loss": 0.9499,
      "step": 2405
    },
    {
      "epoch": 0.13901707429626212,
      "grad_norm": 0.2815878093242645,
      "learning_rate": 0.00019907502087005297,
      "loss": 1.0067,
      "step": 2410
    },
    {
      "epoch": 0.13930549146285187,
      "grad_norm": 0.2791478633880615,
      "learning_rate": 0.00019906130866992935,
      "loss": 0.9474,
      "step": 2415
    },
    {
      "epoch": 0.13959390862944163,
      "grad_norm": 0.2578289806842804,
      "learning_rate": 0.00019904749605835742,
      "loss": 0.9546,
      "step": 2420
    },
    {
      "epoch": 0.13988232579603138,
      "grad_norm": 0.27858108282089233,
      "learning_rate": 0.00019903358304933805,
      "loss": 1.0216,
      "step": 2425
    },
    {
      "epoch": 0.14017074296262114,
      "grad_norm": 0.2818721830844879,
      "learning_rate": 0.00019901956965697387,
      "loss": 0.9856,
      "step": 2430
    },
    {
      "epoch": 0.1404591601292109,
      "grad_norm": 0.30525150895118713,
      "learning_rate": 0.0001990054558954693,
      "loss": 1.0186,
      "step": 2435
    },
    {
      "epoch": 0.14074757729580065,
      "grad_norm": 0.2578755021095276,
      "learning_rate": 0.00019899124177913041,
      "loss": 0.9917,
      "step": 2440
    },
    {
      "epoch": 0.1410359944623904,
      "grad_norm": 0.2822008728981018,
      "learning_rate": 0.0001989769273223651,
      "loss": 0.915,
      "step": 2445
    },
    {
      "epoch": 0.14132441162898016,
      "grad_norm": 0.279206782579422,
      "learning_rate": 0.00019896251253968288,
      "loss": 0.9978,
      "step": 2450
    },
    {
      "epoch": 0.1416128287955699,
      "grad_norm": 0.3062402009963989,
      "learning_rate": 0.000198947997445695,
      "loss": 0.9784,
      "step": 2455
    },
    {
      "epoch": 0.14190124596215967,
      "grad_norm": 0.28376179933547974,
      "learning_rate": 0.0001989333820551144,
      "loss": 0.8916,
      "step": 2460
    },
    {
      "epoch": 0.14218966312874942,
      "grad_norm": 0.3137090504169464,
      "learning_rate": 0.00019891866638275564,
      "loss": 1.0176,
      "step": 2465
    },
    {
      "epoch": 0.14247808029533918,
      "grad_norm": 0.27621030807495117,
      "learning_rate": 0.00019890385044353501,
      "loss": 0.9188,
      "step": 2470
    },
    {
      "epoch": 0.14276649746192893,
      "grad_norm": 0.26816585659980774,
      "learning_rate": 0.00019888893425247032,
      "loss": 0.9401,
      "step": 2475
    },
    {
      "epoch": 0.1430549146285187,
      "grad_norm": 0.2706138789653778,
      "learning_rate": 0.00019887391782468113,
      "loss": 0.9599,
      "step": 2480
    },
    {
      "epoch": 0.14334333179510844,
      "grad_norm": 0.27414408326148987,
      "learning_rate": 0.00019885880117538846,
      "loss": 0.9372,
      "step": 2485
    },
    {
      "epoch": 0.1436317489616982,
      "grad_norm": 0.26232287287712097,
      "learning_rate": 0.000198843584319915,
      "loss": 1.0162,
      "step": 2490
    },
    {
      "epoch": 0.14392016612828795,
      "grad_norm": 0.2994023263454437,
      "learning_rate": 0.00019882826727368508,
      "loss": 1.0131,
      "step": 2495
    },
    {
      "epoch": 0.1442085832948777,
      "grad_norm": 0.3080557584762573,
      "learning_rate": 0.0001988128500522244,
      "loss": 0.9965,
      "step": 2500
    },
    {
      "epoch": 0.14449700046146746,
      "grad_norm": 0.2668229341506958,
      "learning_rate": 0.00019879733267116035,
      "loss": 1.0265,
      "step": 2505
    },
    {
      "epoch": 0.14478541762805722,
      "grad_norm": 0.3170202672481537,
      "learning_rate": 0.00019878171514622187,
      "loss": 0.9301,
      "step": 2510
    },
    {
      "epoch": 0.14507383479464697,
      "grad_norm": 0.2696046829223633,
      "learning_rate": 0.0001987659974932392,
      "loss": 0.9447,
      "step": 2515
    },
    {
      "epoch": 0.14536225196123673,
      "grad_norm": 0.29331788420677185,
      "learning_rate": 0.00019875017972814435,
      "loss": 0.9869,
      "step": 2520
    },
    {
      "epoch": 0.14565066912782648,
      "grad_norm": 0.27732712030410767,
      "learning_rate": 0.0001987342618669706,
      "loss": 0.9317,
      "step": 2525
    },
    {
      "epoch": 0.14593908629441624,
      "grad_norm": 0.29539668560028076,
      "learning_rate": 0.00019871824392585276,
      "loss": 0.932,
      "step": 2530
    },
    {
      "epoch": 0.146227503461006,
      "grad_norm": 0.2826956510543823,
      "learning_rate": 0.00019870212592102711,
      "loss": 1.0275,
      "step": 2535
    },
    {
      "epoch": 0.14651592062759575,
      "grad_norm": 0.27761781215667725,
      "learning_rate": 0.00019868590786883134,
      "loss": 1.0548,
      "step": 2540
    },
    {
      "epoch": 0.1468043377941855,
      "grad_norm": 0.268969863653183,
      "learning_rate": 0.00019866958978570452,
      "loss": 0.8818,
      "step": 2545
    },
    {
      "epoch": 0.14709275496077526,
      "grad_norm": 0.2976461350917816,
      "learning_rate": 0.00019865317168818713,
      "loss": 0.962,
      "step": 2550
    },
    {
      "epoch": 0.147381172127365,
      "grad_norm": 0.2762574255466461,
      "learning_rate": 0.00019863665359292108,
      "loss": 1.0253,
      "step": 2555
    },
    {
      "epoch": 0.14766958929395477,
      "grad_norm": 0.24959316849708557,
      "learning_rate": 0.0001986200355166495,
      "loss": 0.952,
      "step": 2560
    },
    {
      "epoch": 0.14795800646054452,
      "grad_norm": 0.2605302035808563,
      "learning_rate": 0.0001986033174762171,
      "loss": 0.9412,
      "step": 2565
    },
    {
      "epoch": 0.14824642362713428,
      "grad_norm": 0.2719340920448303,
      "learning_rate": 0.0001985864994885697,
      "loss": 0.9876,
      "step": 2570
    },
    {
      "epoch": 0.14853484079372403,
      "grad_norm": 0.2838572561740875,
      "learning_rate": 0.00019856958157075445,
      "loss": 1.0004,
      "step": 2575
    },
    {
      "epoch": 0.1488232579603138,
      "grad_norm": 0.2722460627555847,
      "learning_rate": 0.00019855256373991993,
      "loss": 0.9117,
      "step": 2580
    },
    {
      "epoch": 0.14911167512690354,
      "grad_norm": 0.2888812720775604,
      "learning_rate": 0.0001985354460133159,
      "loss": 0.9098,
      "step": 2585
    },
    {
      "epoch": 0.1494000922934933,
      "grad_norm": 0.27125051617622375,
      "learning_rate": 0.00019851822840829338,
      "loss": 0.9125,
      "step": 2590
    },
    {
      "epoch": 0.14968850946008305,
      "grad_norm": 0.2893664240837097,
      "learning_rate": 0.0001985009109423046,
      "loss": 0.9997,
      "step": 2595
    },
    {
      "epoch": 0.1499769266266728,
      "grad_norm": 0.27496811747550964,
      "learning_rate": 0.0001984834936329031,
      "loss": 1.0189,
      "step": 2600
    },
    {
      "epoch": 0.15026534379326256,
      "grad_norm": 0.28814586997032166,
      "learning_rate": 0.00019846597649774358,
      "loss": 1.06,
      "step": 2605
    },
    {
      "epoch": 0.15055376095985232,
      "grad_norm": 0.35203614830970764,
      "learning_rate": 0.00019844835955458193,
      "loss": 1.0003,
      "step": 2610
    },
    {
      "epoch": 0.15084217812644207,
      "grad_norm": 0.2919403612613678,
      "learning_rate": 0.00019843064282127511,
      "loss": 0.9567,
      "step": 2615
    },
    {
      "epoch": 0.15113059529303183,
      "grad_norm": 0.2837710678577423,
      "learning_rate": 0.00019841282631578145,
      "loss": 0.9919,
      "step": 2620
    },
    {
      "epoch": 0.1514190124596216,
      "grad_norm": 0.319578617811203,
      "learning_rate": 0.0001983949100561602,
      "loss": 0.9816,
      "step": 2625
    },
    {
      "epoch": 0.15170742962621137,
      "grad_norm": 0.2784458100795746,
      "learning_rate": 0.00019837689406057183,
      "loss": 0.9575,
      "step": 2630
    },
    {
      "epoch": 0.15199584679280112,
      "grad_norm": 0.26964443922042847,
      "learning_rate": 0.00019835877834727787,
      "loss": 0.9494,
      "step": 2635
    },
    {
      "epoch": 0.15228426395939088,
      "grad_norm": 0.2933679521083832,
      "learning_rate": 0.00019834056293464093,
      "loss": 1.0165,
      "step": 2640
    },
    {
      "epoch": 0.15257268112598063,
      "grad_norm": 0.27530142664909363,
      "learning_rate": 0.00019832224784112473,
      "loss": 1.0242,
      "step": 2645
    },
    {
      "epoch": 0.15286109829257039,
      "grad_norm": 0.28296253085136414,
      "learning_rate": 0.00019830383308529393,
      "loss": 1.0447,
      "step": 2650
    },
    {
      "epoch": 0.15314951545916014,
      "grad_norm": 0.2897213399410248,
      "learning_rate": 0.0001982853186858143,
      "loss": 0.9933,
      "step": 2655
    },
    {
      "epoch": 0.1534379326257499,
      "grad_norm": 0.29725173115730286,
      "learning_rate": 0.00019826670466145262,
      "loss": 0.8896,
      "step": 2660
    },
    {
      "epoch": 0.15372634979233965,
      "grad_norm": 0.27441513538360596,
      "learning_rate": 0.0001982479910310765,
      "loss": 0.9831,
      "step": 2665
    },
    {
      "epoch": 0.1540147669589294,
      "grad_norm": 0.29334786534309387,
      "learning_rate": 0.00019822917781365474,
      "loss": 1.0099,
      "step": 2670
    },
    {
      "epoch": 0.15430318412551916,
      "grad_norm": 0.2920885682106018,
      "learning_rate": 0.00019821026502825687,
      "loss": 1.0279,
      "step": 2675
    },
    {
      "epoch": 0.15459160129210892,
      "grad_norm": 0.2887846529483795,
      "learning_rate": 0.00019819125269405352,
      "loss": 0.9975,
      "step": 2680
    },
    {
      "epoch": 0.15488001845869867,
      "grad_norm": 0.29183831810951233,
      "learning_rate": 0.00019817214083031614,
      "loss": 1.001,
      "step": 2685
    },
    {
      "epoch": 0.15516843562528843,
      "grad_norm": 0.26283201575279236,
      "learning_rate": 0.00019815292945641705,
      "loss": 0.9868,
      "step": 2690
    },
    {
      "epoch": 0.15545685279187818,
      "grad_norm": 0.2814032733440399,
      "learning_rate": 0.00019813361859182945,
      "loss": 0.9914,
      "step": 2695
    },
    {
      "epoch": 0.15574526995846794,
      "grad_norm": 0.28302204608917236,
      "learning_rate": 0.0001981142082561274,
      "loss": 0.8995,
      "step": 2700
    },
    {
      "epoch": 0.1560336871250577,
      "grad_norm": 0.2865697145462036,
      "learning_rate": 0.00019809469846898586,
      "loss": 0.955,
      "step": 2705
    },
    {
      "epoch": 0.15632210429164745,
      "grad_norm": 0.28486767411231995,
      "learning_rate": 0.0001980750892501804,
      "loss": 0.9249,
      "step": 2710
    },
    {
      "epoch": 0.1566105214582372,
      "grad_norm": 0.31295526027679443,
      "learning_rate": 0.00019805538061958765,
      "loss": 0.941,
      "step": 2715
    },
    {
      "epoch": 0.15689893862482696,
      "grad_norm": 0.30136919021606445,
      "learning_rate": 0.0001980355725971847,
      "loss": 0.9598,
      "step": 2720
    },
    {
      "epoch": 0.1571873557914167,
      "grad_norm": 0.2663707435131073,
      "learning_rate": 0.00019801566520304963,
      "loss": 0.9623,
      "step": 2725
    },
    {
      "epoch": 0.15747577295800647,
      "grad_norm": 0.2665877044200897,
      "learning_rate": 0.0001979956584573612,
      "loss": 0.9904,
      "step": 2730
    },
    {
      "epoch": 0.15776419012459622,
      "grad_norm": 0.2973937392234802,
      "learning_rate": 0.00019797555238039872,
      "loss": 0.9526,
      "step": 2735
    },
    {
      "epoch": 0.15805260729118598,
      "grad_norm": 0.2698862850666046,
      "learning_rate": 0.00019795534699254238,
      "loss": 0.9318,
      "step": 2740
    },
    {
      "epoch": 0.15834102445777573,
      "grad_norm": 0.28309038281440735,
      "learning_rate": 0.0001979350423142729,
      "loss": 0.9845,
      "step": 2745
    },
    {
      "epoch": 0.15862944162436549,
      "grad_norm": 0.29097744822502136,
      "learning_rate": 0.00019791463836617176,
      "loss": 0.9371,
      "step": 2750
    },
    {
      "epoch": 0.15891785879095524,
      "grad_norm": 0.27511849999427795,
      "learning_rate": 0.00019789413516892098,
      "loss": 1.0101,
      "step": 2755
    },
    {
      "epoch": 0.159206275957545,
      "grad_norm": 0.289734810590744,
      "learning_rate": 0.00019787353274330313,
      "loss": 1.0161,
      "step": 2760
    },
    {
      "epoch": 0.15949469312413475,
      "grad_norm": 0.2949714958667755,
      "learning_rate": 0.00019785283111020156,
      "loss": 1.039,
      "step": 2765
    },
    {
      "epoch": 0.1597831102907245,
      "grad_norm": 0.2833018898963928,
      "learning_rate": 0.00019783203029059997,
      "loss": 0.9582,
      "step": 2770
    },
    {
      "epoch": 0.16007152745731426,
      "grad_norm": 0.2823984920978546,
      "learning_rate": 0.00019781113030558267,
      "loss": 0.9568,
      "step": 2775
    },
    {
      "epoch": 0.16035994462390402,
      "grad_norm": 0.30174189805984497,
      "learning_rate": 0.00019779013117633454,
      "loss": 0.9622,
      "step": 2780
    },
    {
      "epoch": 0.16064836179049377,
      "grad_norm": 0.2764327824115753,
      "learning_rate": 0.0001977690329241409,
      "loss": 1.0071,
      "step": 2785
    },
    {
      "epoch": 0.16093677895708353,
      "grad_norm": 0.28060150146484375,
      "learning_rate": 0.00019774783557038755,
      "loss": 0.9681,
      "step": 2790
    },
    {
      "epoch": 0.16122519612367328,
      "grad_norm": 0.2678576111793518,
      "learning_rate": 0.00019772653913656076,
      "loss": 1.0248,
      "step": 2795
    },
    {
      "epoch": 0.16151361329026304,
      "grad_norm": 0.306606650352478,
      "learning_rate": 0.00019770514364424725,
      "loss": 1.0177,
      "step": 2800
    },
    {
      "epoch": 0.1618020304568528,
      "grad_norm": 0.29886582493782043,
      "learning_rate": 0.00019768364911513405,
      "loss": 0.9611,
      "step": 2805
    },
    {
      "epoch": 0.16209044762344255,
      "grad_norm": 0.2940407395362854,
      "learning_rate": 0.00019766205557100868,
      "loss": 0.9679,
      "step": 2810
    },
    {
      "epoch": 0.1623788647900323,
      "grad_norm": 0.27756741642951965,
      "learning_rate": 0.000197640363033759,
      "loss": 0.9272,
      "step": 2815
    },
    {
      "epoch": 0.16266728195662206,
      "grad_norm": 0.27457764744758606,
      "learning_rate": 0.0001976185715253732,
      "loss": 1.0172,
      "step": 2820
    },
    {
      "epoch": 0.1629556991232118,
      "grad_norm": 0.30826953053474426,
      "learning_rate": 0.00019759668106793975,
      "loss": 0.992,
      "step": 2825
    },
    {
      "epoch": 0.16324411628980157,
      "grad_norm": 0.2786210775375366,
      "learning_rate": 0.0001975746916836475,
      "loss": 0.9978,
      "step": 2830
    },
    {
      "epoch": 0.16353253345639132,
      "grad_norm": 0.2771185338497162,
      "learning_rate": 0.00019755260339478556,
      "loss": 0.9633,
      "step": 2835
    },
    {
      "epoch": 0.16382095062298108,
      "grad_norm": 0.2794210910797119,
      "learning_rate": 0.0001975304162237432,
      "loss": 0.9595,
      "step": 2840
    },
    {
      "epoch": 0.16410936778957083,
      "grad_norm": 0.2792012691497803,
      "learning_rate": 0.00019750813019301004,
      "loss": 1.0335,
      "step": 2845
    },
    {
      "epoch": 0.1643977849561606,
      "grad_norm": 0.304283082485199,
      "learning_rate": 0.00019748574532517586,
      "loss": 0.9989,
      "step": 2850
    },
    {
      "epoch": 0.16468620212275034,
      "grad_norm": 0.2838886082172394,
      "learning_rate": 0.00019746326164293056,
      "loss": 0.9652,
      "step": 2855
    },
    {
      "epoch": 0.1649746192893401,
      "grad_norm": 0.275785356760025,
      "learning_rate": 0.0001974406791690643,
      "loss": 0.9703,
      "step": 2860
    },
    {
      "epoch": 0.16526303645592985,
      "grad_norm": 0.3098074495792389,
      "learning_rate": 0.00019741799792646734,
      "loss": 1.0071,
      "step": 2865
    },
    {
      "epoch": 0.1655514536225196,
      "grad_norm": 0.2729983925819397,
      "learning_rate": 0.00019739521793813006,
      "loss": 0.9223,
      "step": 2870
    },
    {
      "epoch": 0.16583987078910936,
      "grad_norm": 0.286050409078598,
      "learning_rate": 0.0001973723392271429,
      "loss": 0.9947,
      "step": 2875
    },
    {
      "epoch": 0.16612828795569912,
      "grad_norm": 0.2677772641181946,
      "learning_rate": 0.00019734936181669638,
      "loss": 1.0642,
      "step": 2880
    },
    {
      "epoch": 0.16641670512228887,
      "grad_norm": 0.3227224051952362,
      "learning_rate": 0.00019732628573008114,
      "loss": 1.0097,
      "step": 2885
    },
    {
      "epoch": 0.16670512228887863,
      "grad_norm": 0.287588506937027,
      "learning_rate": 0.00019730311099068771,
      "loss": 1.0178,
      "step": 2890
    },
    {
      "epoch": 0.16699353945546838,
      "grad_norm": 0.30312904715538025,
      "learning_rate": 0.00019727983762200677,
      "loss": 0.9637,
      "step": 2895
    },
    {
      "epoch": 0.16728195662205814,
      "grad_norm": 0.2764815092086792,
      "learning_rate": 0.00019725646564762878,
      "loss": 0.9786,
      "step": 2900
    },
    {
      "epoch": 0.1675703737886479,
      "grad_norm": 0.3129001557826996,
      "learning_rate": 0.00019723299509124433,
      "loss": 0.9505,
      "step": 2905
    },
    {
      "epoch": 0.16785879095523765,
      "grad_norm": 0.29697930812835693,
      "learning_rate": 0.00019720942597664385,
      "loss": 0.9866,
      "step": 2910
    },
    {
      "epoch": 0.1681472081218274,
      "grad_norm": 0.30350685119628906,
      "learning_rate": 0.00019718575832771768,
      "loss": 0.9753,
      "step": 2915
    },
    {
      "epoch": 0.16843562528841716,
      "grad_norm": 0.2997938394546509,
      "learning_rate": 0.00019716199216845604,
      "loss": 1.0002,
      "step": 2920
    },
    {
      "epoch": 0.1687240424550069,
      "grad_norm": 0.2617998421192169,
      "learning_rate": 0.000197138127522949,
      "loss": 0.931,
      "step": 2925
    },
    {
      "epoch": 0.16901245962159667,
      "grad_norm": 0.2833821773529053,
      "learning_rate": 0.00019711416441538652,
      "loss": 1.0101,
      "step": 2930
    },
    {
      "epoch": 0.16930087678818642,
      "grad_norm": 0.287142813205719,
      "learning_rate": 0.00019709010287005825,
      "loss": 1.0126,
      "step": 2935
    },
    {
      "epoch": 0.16958929395477618,
      "grad_norm": 0.2692398428916931,
      "learning_rate": 0.00019706594291135366,
      "loss": 0.9623,
      "step": 2940
    },
    {
      "epoch": 0.16987771112136593,
      "grad_norm": 0.3134477436542511,
      "learning_rate": 0.00019704168456376205,
      "loss": 1.0175,
      "step": 2945
    },
    {
      "epoch": 0.1701661282879557,
      "grad_norm": 0.28351497650146484,
      "learning_rate": 0.0001970173278518724,
      "loss": 0.9537,
      "step": 2950
    },
    {
      "epoch": 0.17045454545454544,
      "grad_norm": 0.2851005494594574,
      "learning_rate": 0.00019699287280037332,
      "loss": 1.0136,
      "step": 2955
    },
    {
      "epoch": 0.1707429626211352,
      "grad_norm": 0.3006639778614044,
      "learning_rate": 0.00019696831943405324,
      "loss": 1.0825,
      "step": 2960
    },
    {
      "epoch": 0.17103137978772495,
      "grad_norm": 0.2862212359905243,
      "learning_rate": 0.0001969436677778001,
      "loss": 0.9826,
      "step": 2965
    },
    {
      "epoch": 0.1713197969543147,
      "grad_norm": 0.2898406684398651,
      "learning_rate": 0.0001969189178566016,
      "loss": 1.0052,
      "step": 2970
    },
    {
      "epoch": 0.1716082141209045,
      "grad_norm": 0.3075491487979889,
      "learning_rate": 0.000196894069695545,
      "loss": 0.9692,
      "step": 2975
    },
    {
      "epoch": 0.17189663128749424,
      "grad_norm": 0.28366634249687195,
      "learning_rate": 0.00019686912331981702,
      "loss": 0.993,
      "step": 2980
    },
    {
      "epoch": 0.172185048454084,
      "grad_norm": 0.2819202244281769,
      "learning_rate": 0.00019684407875470415,
      "loss": 1.0018,
      "step": 2985
    },
    {
      "epoch": 0.17247346562067375,
      "grad_norm": 0.34952133893966675,
      "learning_rate": 0.00019681893602559224,
      "loss": 0.982,
      "step": 2990
    },
    {
      "epoch": 0.1727618827872635,
      "grad_norm": 0.3122062087059021,
      "learning_rate": 0.0001967936951579667,
      "loss": 0.9914,
      "step": 2995
    },
    {
      "epoch": 0.17305029995385326,
      "grad_norm": 0.27795976400375366,
      "learning_rate": 0.00019676835617741249,
      "loss": 0.9665,
      "step": 3000
    },
    {
      "epoch": 0.17333871712044302,
      "grad_norm": 0.2866445779800415,
      "learning_rate": 0.0001967429191096138,
      "loss": 0.9751,
      "step": 3005
    },
    {
      "epoch": 0.17362713428703277,
      "grad_norm": 0.28401291370391846,
      "learning_rate": 0.0001967173839803545,
      "loss": 0.9746,
      "step": 3010
    },
    {
      "epoch": 0.17391555145362253,
      "grad_norm": 0.2761111855506897,
      "learning_rate": 0.00019669175081551773,
      "loss": 0.9802,
      "step": 3015
    },
    {
      "epoch": 0.17420396862021229,
      "grad_norm": 0.2995210587978363,
      "learning_rate": 0.00019666601964108598,
      "loss": 0.9399,
      "step": 3020
    },
    {
      "epoch": 0.17449238578680204,
      "grad_norm": 0.28632500767707825,
      "learning_rate": 0.00019664019048314116,
      "loss": 0.983,
      "step": 3025
    },
    {
      "epoch": 0.1747808029533918,
      "grad_norm": 0.2868204116821289,
      "learning_rate": 0.00019661426336786445,
      "loss": 0.9339,
      "step": 3030
    },
    {
      "epoch": 0.17506922011998155,
      "grad_norm": 0.2975151836872101,
      "learning_rate": 0.00019658823832153632,
      "loss": 0.9176,
      "step": 3035
    },
    {
      "epoch": 0.1753576372865713,
      "grad_norm": 0.32150018215179443,
      "learning_rate": 0.00019656211537053654,
      "loss": 1.0361,
      "step": 3040
    },
    {
      "epoch": 0.17564605445316106,
      "grad_norm": 0.27139896154403687,
      "learning_rate": 0.00019653589454134406,
      "loss": 0.9399,
      "step": 3045
    },
    {
      "epoch": 0.17593447161975082,
      "grad_norm": 0.29438334703445435,
      "learning_rate": 0.00019650957586053716,
      "loss": 0.9869,
      "step": 3050
    },
    {
      "epoch": 0.17622288878634057,
      "grad_norm": 0.26268067955970764,
      "learning_rate": 0.00019648315935479315,
      "loss": 1.037,
      "step": 3055
    },
    {
      "epoch": 0.17651130595293033,
      "grad_norm": 0.28784453868865967,
      "learning_rate": 0.00019645664505088864,
      "loss": 0.9737,
      "step": 3060
    },
    {
      "epoch": 0.17679972311952008,
      "grad_norm": 0.29806089401245117,
      "learning_rate": 0.00019643003297569923,
      "loss": 0.9884,
      "step": 3065
    },
    {
      "epoch": 0.17708814028610984,
      "grad_norm": 0.2772783935070038,
      "learning_rate": 0.00019640332315619977,
      "loss": 1.0022,
      "step": 3070
    },
    {
      "epoch": 0.1773765574526996,
      "grad_norm": 0.279499351978302,
      "learning_rate": 0.0001963765156194641,
      "loss": 1.0034,
      "step": 3075
    },
    {
      "epoch": 0.17766497461928935,
      "grad_norm": 0.29856428503990173,
      "learning_rate": 0.00019634961039266506,
      "loss": 1.0251,
      "step": 3080
    },
    {
      "epoch": 0.1779533917858791,
      "grad_norm": 0.2960283160209656,
      "learning_rate": 0.00019632260750307467,
      "loss": 0.9978,
      "step": 3085
    },
    {
      "epoch": 0.17824180895246886,
      "grad_norm": 0.3026635944843292,
      "learning_rate": 0.0001962955069780638,
      "loss": 0.9345,
      "step": 3090
    },
    {
      "epoch": 0.1785302261190586,
      "grad_norm": 0.30011415481567383,
      "learning_rate": 0.00019626830884510236,
      "loss": 1.0426,
      "step": 3095
    },
    {
      "epoch": 0.17881864328564837,
      "grad_norm": 0.31029802560806274,
      "learning_rate": 0.00019624101313175918,
      "loss": 1.0291,
      "step": 3100
    },
    {
      "epoch": 0.17910706045223812,
      "grad_norm": 0.29978078603744507,
      "learning_rate": 0.00019621361986570194,
      "loss": 0.9394,
      "step": 3105
    },
    {
      "epoch": 0.17939547761882788,
      "grad_norm": 0.298728346824646,
      "learning_rate": 0.00019618612907469732,
      "loss": 0.9875,
      "step": 3110
    },
    {
      "epoch": 0.17968389478541763,
      "grad_norm": 0.2664894461631775,
      "learning_rate": 0.00019615854078661077,
      "loss": 0.9905,
      "step": 3115
    },
    {
      "epoch": 0.17997231195200739,
      "grad_norm": 0.284242182970047,
      "learning_rate": 0.00019613085502940658,
      "loss": 1.1183,
      "step": 3120
    },
    {
      "epoch": 0.18026072911859714,
      "grad_norm": 0.29005903005599976,
      "learning_rate": 0.00019610307183114787,
      "loss": 0.9643,
      "step": 3125
    },
    {
      "epoch": 0.1805491462851869,
      "grad_norm": 0.31152260303497314,
      "learning_rate": 0.00019607519121999647,
      "loss": 0.955,
      "step": 3130
    },
    {
      "epoch": 0.18083756345177665,
      "grad_norm": 0.3107044994831085,
      "learning_rate": 0.00019604721322421303,
      "loss": 0.9592,
      "step": 3135
    },
    {
      "epoch": 0.1811259806183664,
      "grad_norm": 0.3071282207965851,
      "learning_rate": 0.00019601913787215683,
      "loss": 0.9844,
      "step": 3140
    },
    {
      "epoch": 0.18141439778495616,
      "grad_norm": 0.29717057943344116,
      "learning_rate": 0.00019599096519228585,
      "loss": 0.9394,
      "step": 3145
    },
    {
      "epoch": 0.18170281495154592,
      "grad_norm": 0.3277190625667572,
      "learning_rate": 0.0001959626952131568,
      "loss": 0.8651,
      "step": 3150
    },
    {
      "epoch": 0.18199123211813567,
      "grad_norm": 0.2847001254558563,
      "learning_rate": 0.00019593432796342496,
      "loss": 1.0355,
      "step": 3155
    },
    {
      "epoch": 0.18227964928472543,
      "grad_norm": 0.2961786091327667,
      "learning_rate": 0.00019590586347184417,
      "loss": 1.0553,
      "step": 3160
    },
    {
      "epoch": 0.18256806645131518,
      "grad_norm": 0.2928047478199005,
      "learning_rate": 0.00019587730176726686,
      "loss": 0.9886,
      "step": 3165
    },
    {
      "epoch": 0.18285648361790494,
      "grad_norm": 0.320328027009964,
      "learning_rate": 0.00019584864287864408,
      "loss": 0.9522,
      "step": 3170
    },
    {
      "epoch": 0.1831449007844947,
      "grad_norm": 0.2688181400299072,
      "learning_rate": 0.00019581988683502525,
      "loss": 1.0481,
      "step": 3175
    },
    {
      "epoch": 0.18343331795108445,
      "grad_norm": 0.31589022278785706,
      "learning_rate": 0.0001957910336655584,
      "loss": 0.9825,
      "step": 3180
    },
    {
      "epoch": 0.1837217351176742,
      "grad_norm": 0.30393970012664795,
      "learning_rate": 0.00019576208339948988,
      "loss": 0.9845,
      "step": 3185
    },
    {
      "epoch": 0.18401015228426396,
      "grad_norm": 0.27783116698265076,
      "learning_rate": 0.00019573303606616459,
      "loss": 0.9965,
      "step": 3190
    },
    {
      "epoch": 0.1842985694508537,
      "grad_norm": 0.29289042949676514,
      "learning_rate": 0.00019570389169502569,
      "loss": 0.9849,
      "step": 3195
    },
    {
      "epoch": 0.18458698661744347,
      "grad_norm": 0.28584346175193787,
      "learning_rate": 0.00019567465031561487,
      "loss": 1.0468,
      "step": 3200
    },
    {
      "epoch": 0.18487540378403322,
      "grad_norm": 0.2989406883716583,
      "learning_rate": 0.00019564531195757193,
      "loss": 0.9834,
      "step": 3205
    },
    {
      "epoch": 0.18516382095062298,
      "grad_norm": 0.3050430715084076,
      "learning_rate": 0.0001956158766506352,
      "loss": 1.0285,
      "step": 3210
    },
    {
      "epoch": 0.18545223811721273,
      "grad_norm": 0.30927765369415283,
      "learning_rate": 0.00019558634442464113,
      "loss": 0.911,
      "step": 3215
    },
    {
      "epoch": 0.18574065528380249,
      "grad_norm": 0.2875533401966095,
      "learning_rate": 0.00019555671530952445,
      "loss": 0.9708,
      "step": 3220
    },
    {
      "epoch": 0.18602907245039224,
      "grad_norm": 0.338565856218338,
      "learning_rate": 0.00019552698933531808,
      "loss": 0.9928,
      "step": 3225
    },
    {
      "epoch": 0.186317489616982,
      "grad_norm": 0.2844907343387604,
      "learning_rate": 0.00019549716653215318,
      "loss": 0.9989,
      "step": 3230
    },
    {
      "epoch": 0.18660590678357175,
      "grad_norm": 0.2826622426509857,
      "learning_rate": 0.00019546724693025896,
      "loss": 0.9663,
      "step": 3235
    },
    {
      "epoch": 0.1868943239501615,
      "grad_norm": 0.29726430773735046,
      "learning_rate": 0.00019543723055996282,
      "loss": 0.9864,
      "step": 3240
    },
    {
      "epoch": 0.18718274111675126,
      "grad_norm": 0.2944948673248291,
      "learning_rate": 0.0001954071174516903,
      "loss": 0.9907,
      "step": 3245
    },
    {
      "epoch": 0.18747115828334102,
      "grad_norm": 0.2960521876811981,
      "learning_rate": 0.00019537690763596487,
      "loss": 0.9947,
      "step": 3250
    },
    {
      "epoch": 0.18775957544993077,
      "grad_norm": 0.2907959520816803,
      "learning_rate": 0.0001953466011434081,
      "loss": 0.998,
      "step": 3255
    },
    {
      "epoch": 0.18804799261652053,
      "grad_norm": 0.27448657155036926,
      "learning_rate": 0.00019531619800473952,
      "loss": 0.9299,
      "step": 3260
    },
    {
      "epoch": 0.18833640978311028,
      "grad_norm": 0.2914285361766815,
      "learning_rate": 0.00019528569825077668,
      "loss": 0.9851,
      "step": 3265
    },
    {
      "epoch": 0.18862482694970004,
      "grad_norm": 0.28481677174568176,
      "learning_rate": 0.00019525510191243498,
      "loss": 1.0796,
      "step": 3270
    },
    {
      "epoch": 0.1889132441162898,
      "grad_norm": 0.3071490526199341,
      "learning_rate": 0.00019522440902072782,
      "loss": 1.0045,
      "step": 3275
    },
    {
      "epoch": 0.18920166128287955,
      "grad_norm": 0.31344813108444214,
      "learning_rate": 0.0001951936196067664,
      "loss": 1.0384,
      "step": 3280
    },
    {
      "epoch": 0.1894900784494693,
      "grad_norm": 0.29477670788764954,
      "learning_rate": 0.00019516273370175972,
      "loss": 0.9663,
      "step": 3285
    },
    {
      "epoch": 0.18977849561605906,
      "grad_norm": 0.36153990030288696,
      "learning_rate": 0.00019513175133701474,
      "loss": 0.9459,
      "step": 3290
    },
    {
      "epoch": 0.1900669127826488,
      "grad_norm": 0.29918980598449707,
      "learning_rate": 0.000195100672543936,
      "loss": 0.9239,
      "step": 3295
    },
    {
      "epoch": 0.19035532994923857,
      "grad_norm": 0.2978503108024597,
      "learning_rate": 0.00019506949735402588,
      "loss": 0.9286,
      "step": 3300
    },
    {
      "epoch": 0.19064374711582832,
      "grad_norm": 0.3202069103717804,
      "learning_rate": 0.00019503822579888453,
      "loss": 1.0259,
      "step": 3305
    },
    {
      "epoch": 0.19093216428241808,
      "grad_norm": 0.3225456774234772,
      "learning_rate": 0.00019500685791020968,
      "loss": 0.9501,
      "step": 3310
    },
    {
      "epoch": 0.19122058144900783,
      "grad_norm": 0.3228490948677063,
      "learning_rate": 0.00019497539371979674,
      "loss": 1.0353,
      "step": 3315
    },
    {
      "epoch": 0.1915089986155976,
      "grad_norm": 0.32369717955589294,
      "learning_rate": 0.00019494383325953875,
      "loss": 0.9616,
      "step": 3320
    },
    {
      "epoch": 0.19179741578218737,
      "grad_norm": 0.3090066909790039,
      "learning_rate": 0.0001949121765614263,
      "loss": 0.9646,
      "step": 3325
    },
    {
      "epoch": 0.19208583294877712,
      "grad_norm": 0.26542478799819946,
      "learning_rate": 0.00019488042365754758,
      "loss": 0.979,
      "step": 3330
    },
    {
      "epoch": 0.19237425011536688,
      "grad_norm": 0.2973325848579407,
      "learning_rate": 0.0001948485745800882,
      "loss": 0.9433,
      "step": 3335
    },
    {
      "epoch": 0.19266266728195663,
      "grad_norm": 0.30728423595428467,
      "learning_rate": 0.0001948166293613314,
      "loss": 0.9544,
      "step": 3340
    },
    {
      "epoch": 0.1929510844485464,
      "grad_norm": 0.27854323387145996,
      "learning_rate": 0.00019478458803365772,
      "loss": 0.9428,
      "step": 3345
    },
    {
      "epoch": 0.19323950161513614,
      "grad_norm": 0.27844732999801636,
      "learning_rate": 0.00019475245062954523,
      "loss": 1.0545,
      "step": 3350
    },
    {
      "epoch": 0.1935279187817259,
      "grad_norm": 0.28251299262046814,
      "learning_rate": 0.00019472021718156937,
      "loss": 0.9315,
      "step": 3355
    },
    {
      "epoch": 0.19381633594831565,
      "grad_norm": 0.2970223128795624,
      "learning_rate": 0.00019468788772240286,
      "loss": 1.0053,
      "step": 3360
    },
    {
      "epoch": 0.1941047531149054,
      "grad_norm": 0.29227715730667114,
      "learning_rate": 0.0001946554622848158,
      "loss": 1.0171,
      "step": 3365
    },
    {
      "epoch": 0.19439317028149516,
      "grad_norm": 0.3032057285308838,
      "learning_rate": 0.00019462294090167554,
      "loss": 1.0456,
      "step": 3370
    },
    {
      "epoch": 0.19468158744808492,
      "grad_norm": 0.2863052189350128,
      "learning_rate": 0.00019459032360594677,
      "loss": 0.9876,
      "step": 3375
    },
    {
      "epoch": 0.19497000461467467,
      "grad_norm": 0.29493972659111023,
      "learning_rate": 0.0001945576104306913,
      "loss": 0.9076,
      "step": 3380
    },
    {
      "epoch": 0.19525842178126443,
      "grad_norm": 0.28630873560905457,
      "learning_rate": 0.00019452480140906819,
      "loss": 0.9734,
      "step": 3385
    },
    {
      "epoch": 0.19554683894785418,
      "grad_norm": 0.28571903705596924,
      "learning_rate": 0.00019449189657433358,
      "loss": 1.0033,
      "step": 3390
    },
    {
      "epoch": 0.19583525611444394,
      "grad_norm": 0.3060779273509979,
      "learning_rate": 0.0001944588959598408,
      "loss": 0.9493,
      "step": 3395
    },
    {
      "epoch": 0.1961236732810337,
      "grad_norm": 0.28723883628845215,
      "learning_rate": 0.00019442579959904024,
      "loss": 0.9713,
      "step": 3400
    },
    {
      "epoch": 0.19641209044762345,
      "grad_norm": 0.29430314898490906,
      "learning_rate": 0.00019439260752547935,
      "loss": 0.9476,
      "step": 3405
    },
    {
      "epoch": 0.1967005076142132,
      "grad_norm": 0.3151422142982483,
      "learning_rate": 0.0001943593197728026,
      "loss": 1.0443,
      "step": 3410
    },
    {
      "epoch": 0.19698892478080296,
      "grad_norm": 0.32313892245292664,
      "learning_rate": 0.00019432593637475138,
      "loss": 0.9968,
      "step": 3415
    },
    {
      "epoch": 0.19727734194739271,
      "grad_norm": 0.27212581038475037,
      "learning_rate": 0.00019429245736516415,
      "loss": 0.9608,
      "step": 3420
    },
    {
      "epoch": 0.19756575911398247,
      "grad_norm": 0.28393295407295227,
      "learning_rate": 0.00019425888277797615,
      "loss": 1.025,
      "step": 3425
    },
    {
      "epoch": 0.19785417628057222,
      "grad_norm": 0.31957364082336426,
      "learning_rate": 0.00019422521264721962,
      "loss": 0.9412,
      "step": 3430
    },
    {
      "epoch": 0.19814259344716198,
      "grad_norm": 0.28313255310058594,
      "learning_rate": 0.0001941914470070236,
      "loss": 0.8898,
      "step": 3435
    },
    {
      "epoch": 0.19843101061375173,
      "grad_norm": 0.30928754806518555,
      "learning_rate": 0.00019415758589161385,
      "loss": 1.0036,
      "step": 3440
    },
    {
      "epoch": 0.1987194277803415,
      "grad_norm": 0.30498096346855164,
      "learning_rate": 0.00019412362933531307,
      "loss": 0.8956,
      "step": 3445
    },
    {
      "epoch": 0.19900784494693124,
      "grad_norm": 0.2994639277458191,
      "learning_rate": 0.0001940895773725406,
      "loss": 0.958,
      "step": 3450
    },
    {
      "epoch": 0.199296262113521,
      "grad_norm": 0.27892401814460754,
      "learning_rate": 0.00019405543003781251,
      "loss": 1.0444,
      "step": 3455
    },
    {
      "epoch": 0.19958467928011075,
      "grad_norm": 0.3064746558666229,
      "learning_rate": 0.00019402118736574155,
      "loss": 0.9791,
      "step": 3460
    },
    {
      "epoch": 0.1998730964467005,
      "grad_norm": 0.30257728695869446,
      "learning_rate": 0.00019398684939103707,
      "loss": 1.0427,
      "step": 3465
    },
    {
      "epoch": 0.20016151361329027,
      "grad_norm": 0.30800187587738037,
      "learning_rate": 0.00019395241614850504,
      "loss": 0.9739,
      "step": 3470
    },
    {
      "epoch": 0.20044993077988002,
      "grad_norm": 0.30980637669563293,
      "learning_rate": 0.00019391788767304804,
      "loss": 0.9858,
      "step": 3475
    },
    {
      "epoch": 0.20073834794646978,
      "grad_norm": 0.300564706325531,
      "learning_rate": 0.00019388326399966515,
      "loss": 1.0109,
      "step": 3480
    },
    {
      "epoch": 0.20102676511305953,
      "grad_norm": 0.2801668047904968,
      "learning_rate": 0.0001938485451634519,
      "loss": 0.9414,
      "step": 3485
    },
    {
      "epoch": 0.20131518227964929,
      "grad_norm": 0.32072213292121887,
      "learning_rate": 0.00019381373119960033,
      "loss": 1.0513,
      "step": 3490
    },
    {
      "epoch": 0.20160359944623904,
      "grad_norm": 0.3455287218093872,
      "learning_rate": 0.00019377882214339893,
      "loss": 0.9559,
      "step": 3495
    },
    {
      "epoch": 0.2018920166128288,
      "grad_norm": 0.29594236612319946,
      "learning_rate": 0.00019374381803023252,
      "loss": 1.0114,
      "step": 3500
    },
    {
      "epoch": 0.20218043377941855,
      "grad_norm": 0.29461246728897095,
      "learning_rate": 0.0001937087188955823,
      "loss": 0.9981,
      "step": 3505
    },
    {
      "epoch": 0.2024688509460083,
      "grad_norm": 0.29845237731933594,
      "learning_rate": 0.00019367352477502576,
      "loss": 0.9626,
      "step": 3510
    },
    {
      "epoch": 0.20275726811259806,
      "grad_norm": 0.29965266585350037,
      "learning_rate": 0.00019363823570423675,
      "loss": 0.9343,
      "step": 3515
    },
    {
      "epoch": 0.20304568527918782,
      "grad_norm": 0.3167986273765564,
      "learning_rate": 0.0001936028517189852,
      "loss": 0.9134,
      "step": 3520
    },
    {
      "epoch": 0.20333410244577757,
      "grad_norm": 0.2801003158092499,
      "learning_rate": 0.00019356737285513748,
      "loss": 0.9585,
      "step": 3525
    },
    {
      "epoch": 0.20362251961236733,
      "grad_norm": 0.34285858273506165,
      "learning_rate": 0.00019353179914865596,
      "loss": 1.0429,
      "step": 3530
    },
    {
      "epoch": 0.20391093677895708,
      "grad_norm": 0.2997133135795593,
      "learning_rate": 0.00019349613063559916,
      "loss": 0.9669,
      "step": 3535
    },
    {
      "epoch": 0.20419935394554684,
      "grad_norm": 0.2827471196651459,
      "learning_rate": 0.00019346036735212177,
      "loss": 1.0545,
      "step": 3540
    },
    {
      "epoch": 0.2044877711121366,
      "grad_norm": 0.29712873697280884,
      "learning_rate": 0.00019342450933447448,
      "loss": 0.8974,
      "step": 3545
    },
    {
      "epoch": 0.20477618827872635,
      "grad_norm": 0.30608931183815,
      "learning_rate": 0.00019338855661900405,
      "loss": 0.9705,
      "step": 3550
    },
    {
      "epoch": 0.2050646054453161,
      "grad_norm": 0.3042439818382263,
      "learning_rate": 0.00019335250924215318,
      "loss": 0.9509,
      "step": 3555
    },
    {
      "epoch": 0.20535302261190586,
      "grad_norm": 0.32983705401420593,
      "learning_rate": 0.00019331636724046058,
      "loss": 0.9299,
      "step": 3560
    },
    {
      "epoch": 0.2056414397784956,
      "grad_norm": 0.31326207518577576,
      "learning_rate": 0.0001932801306505608,
      "loss": 1.0091,
      "step": 3565
    },
    {
      "epoch": 0.20592985694508537,
      "grad_norm": 0.362664133310318,
      "learning_rate": 0.00019324379950918437,
      "loss": 1.0372,
      "step": 3570
    },
    {
      "epoch": 0.20621827411167512,
      "grad_norm": 0.30850011110305786,
      "learning_rate": 0.00019320737385315756,
      "loss": 1.007,
      "step": 3575
    },
    {
      "epoch": 0.20650669127826488,
      "grad_norm": 0.28833866119384766,
      "learning_rate": 0.00019317085371940246,
      "loss": 0.9142,
      "step": 3580
    },
    {
      "epoch": 0.20679510844485463,
      "grad_norm": 0.30341702699661255,
      "learning_rate": 0.00019313423914493703,
      "loss": 0.9421,
      "step": 3585
    },
    {
      "epoch": 0.20708352561144439,
      "grad_norm": 0.29995712637901306,
      "learning_rate": 0.00019309753016687477,
      "loss": 0.9276,
      "step": 3590
    },
    {
      "epoch": 0.20737194277803414,
      "grad_norm": 0.3257008492946625,
      "learning_rate": 0.00019306072682242505,
      "loss": 0.9618,
      "step": 3595
    },
    {
      "epoch": 0.2076603599446239,
      "grad_norm": 0.31132039427757263,
      "learning_rate": 0.00019302382914889284,
      "loss": 1.0191,
      "step": 3600
    },
    {
      "epoch": 0.20794877711121365,
      "grad_norm": 0.2903344929218292,
      "learning_rate": 0.00019298683718367864,
      "loss": 0.9276,
      "step": 3605
    },
    {
      "epoch": 0.2082371942778034,
      "grad_norm": 0.28606170415878296,
      "learning_rate": 0.00019294975096427862,
      "loss": 0.9942,
      "step": 3610
    },
    {
      "epoch": 0.20852561144439316,
      "grad_norm": 0.31145986914634705,
      "learning_rate": 0.00019291257052828447,
      "loss": 1.0452,
      "step": 3615
    },
    {
      "epoch": 0.20881402861098292,
      "grad_norm": 0.29645946621894836,
      "learning_rate": 0.00019287529591338333,
      "loss": 0.9606,
      "step": 3620
    },
    {
      "epoch": 0.20910244577757267,
      "grad_norm": 0.2780356705188751,
      "learning_rate": 0.0001928379271573579,
      "loss": 0.952,
      "step": 3625
    },
    {
      "epoch": 0.20939086294416243,
      "grad_norm": 0.31448647379875183,
      "learning_rate": 0.0001928004642980862,
      "loss": 0.938,
      "step": 3630
    },
    {
      "epoch": 0.20967928011075218,
      "grad_norm": 0.45712539553642273,
      "learning_rate": 0.0001927629073735417,
      "loss": 0.9825,
      "step": 3635
    },
    {
      "epoch": 0.20996769727734194,
      "grad_norm": 0.2840917408466339,
      "learning_rate": 0.00019272525642179323,
      "loss": 0.9532,
      "step": 3640
    },
    {
      "epoch": 0.2102561144439317,
      "grad_norm": 0.301581472158432,
      "learning_rate": 0.00019268751148100486,
      "loss": 0.9401,
      "step": 3645
    },
    {
      "epoch": 0.21054453161052145,
      "grad_norm": 0.2878675162792206,
      "learning_rate": 0.00019264967258943595,
      "loss": 0.96,
      "step": 3650
    },
    {
      "epoch": 0.2108329487771112,
      "grad_norm": 0.3223981261253357,
      "learning_rate": 0.0001926117397854412,
      "loss": 0.9317,
      "step": 3655
    },
    {
      "epoch": 0.21112136594370096,
      "grad_norm": 0.3064003884792328,
      "learning_rate": 0.0001925737131074703,
      "loss": 1.0191,
      "step": 3660
    },
    {
      "epoch": 0.2114097831102907,
      "grad_norm": 0.29931890964508057,
      "learning_rate": 0.0001925355925940683,
      "loss": 1.0221,
      "step": 3665
    },
    {
      "epoch": 0.2116982002768805,
      "grad_norm": 0.29575493931770325,
      "learning_rate": 0.00019249737828387522,
      "loss": 0.9803,
      "step": 3670
    },
    {
      "epoch": 0.21198661744347025,
      "grad_norm": 0.29677698016166687,
      "learning_rate": 0.0001924590702156262,
      "loss": 0.9743,
      "step": 3675
    },
    {
      "epoch": 0.21227503461006,
      "grad_norm": 0.28778114914894104,
      "learning_rate": 0.00019242066842815146,
      "loss": 1.0134,
      "step": 3680
    },
    {
      "epoch": 0.21256345177664976,
      "grad_norm": 0.3022085130214691,
      "learning_rate": 0.00019238217296037614,
      "loss": 1.0065,
      "step": 3685
    },
    {
      "epoch": 0.21285186894323951,
      "grad_norm": 0.28485235571861267,
      "learning_rate": 0.00019234358385132038,
      "loss": 1.0066,
      "step": 3690
    },
    {
      "epoch": 0.21314028610982927,
      "grad_norm": 0.2786906063556671,
      "learning_rate": 0.00019230490114009928,
      "loss": 0.9393,
      "step": 3695
    },
    {
      "epoch": 0.21342870327641902,
      "grad_norm": 0.30164533853530884,
      "learning_rate": 0.00019226612486592271,
      "loss": 0.8972,
      "step": 3700
    },
    {
      "epoch": 0.21371712044300878,
      "grad_norm": 0.303313672542572,
      "learning_rate": 0.00019222725506809547,
      "loss": 0.9892,
      "step": 3705
    },
    {
      "epoch": 0.21400553760959853,
      "grad_norm": 0.2972022593021393,
      "learning_rate": 0.00019218829178601713,
      "loss": 1.0382,
      "step": 3710
    },
    {
      "epoch": 0.2142939547761883,
      "grad_norm": 0.2921942174434662,
      "learning_rate": 0.00019214923505918202,
      "loss": 1.0,
      "step": 3715
    },
    {
      "epoch": 0.21458237194277804,
      "grad_norm": 0.2801063358783722,
      "learning_rate": 0.00019211008492717914,
      "loss": 0.9779,
      "step": 3720
    },
    {
      "epoch": 0.2148707891093678,
      "grad_norm": 0.2877048850059509,
      "learning_rate": 0.00019207084142969225,
      "loss": 1.0471,
      "step": 3725
    },
    {
      "epoch": 0.21515920627595755,
      "grad_norm": 0.3104129135608673,
      "learning_rate": 0.0001920315046064997,
      "loss": 0.9551,
      "step": 3730
    },
    {
      "epoch": 0.2154476234425473,
      "grad_norm": 0.28808075189590454,
      "learning_rate": 0.0001919920744974745,
      "loss": 0.9911,
      "step": 3735
    },
    {
      "epoch": 0.21573604060913706,
      "grad_norm": 0.2917241156101227,
      "learning_rate": 0.00019195255114258408,
      "loss": 0.9557,
      "step": 3740
    },
    {
      "epoch": 0.21602445777572682,
      "grad_norm": 0.3106626272201538,
      "learning_rate": 0.0001919129345818905,
      "loss": 0.9817,
      "step": 3745
    },
    {
      "epoch": 0.21631287494231657,
      "grad_norm": 0.30126625299453735,
      "learning_rate": 0.00019187322485555031,
      "loss": 0.996,
      "step": 3750
    },
    {
      "epoch": 0.21660129210890633,
      "grad_norm": 0.28781163692474365,
      "learning_rate": 0.0001918334220038144,
      "loss": 0.9824,
      "step": 3755
    },
    {
      "epoch": 0.21688970927549608,
      "grad_norm": 0.3098173141479492,
      "learning_rate": 0.00019179352606702813,
      "loss": 0.9516,
      "step": 3760
    },
    {
      "epoch": 0.21717812644208584,
      "grad_norm": 0.29839685559272766,
      "learning_rate": 0.00019175353708563117,
      "loss": 1.0092,
      "step": 3765
    },
    {
      "epoch": 0.2174665436086756,
      "grad_norm": 0.2970857322216034,
      "learning_rate": 0.00019171345510015758,
      "loss": 1.0161,
      "step": 3770
    },
    {
      "epoch": 0.21775496077526535,
      "grad_norm": 0.3346623480319977,
      "learning_rate": 0.00019167328015123558,
      "loss": 0.9373,
      "step": 3775
    },
    {
      "epoch": 0.2180433779418551,
      "grad_norm": 0.29829534888267517,
      "learning_rate": 0.0001916330122795877,
      "loss": 0.9769,
      "step": 3780
    },
    {
      "epoch": 0.21833179510844486,
      "grad_norm": 0.28773486614227295,
      "learning_rate": 0.00019159265152603064,
      "loss": 0.9643,
      "step": 3785
    },
    {
      "epoch": 0.21862021227503461,
      "grad_norm": 0.35629943013191223,
      "learning_rate": 0.00019155219793147522,
      "loss": 1.0356,
      "step": 3790
    },
    {
      "epoch": 0.21890862944162437,
      "grad_norm": 0.3033393323421478,
      "learning_rate": 0.00019151165153692644,
      "loss": 0.9561,
      "step": 3795
    },
    {
      "epoch": 0.21919704660821412,
      "grad_norm": 0.32361406087875366,
      "learning_rate": 0.00019147101238348326,
      "loss": 0.9953,
      "step": 3800
    },
    {
      "epoch": 0.21948546377480388,
      "grad_norm": 0.36144763231277466,
      "learning_rate": 0.00019143028051233873,
      "loss": 0.9526,
      "step": 3805
    },
    {
      "epoch": 0.21977388094139363,
      "grad_norm": 0.275502473115921,
      "learning_rate": 0.00019138945596477994,
      "loss": 0.9281,
      "step": 3810
    },
    {
      "epoch": 0.2200622981079834,
      "grad_norm": 0.32906025648117065,
      "learning_rate": 0.0001913485387821877,
      "loss": 0.9382,
      "step": 3815
    },
    {
      "epoch": 0.22035071527457314,
      "grad_norm": 0.28945717215538025,
      "learning_rate": 0.00019130752900603702,
      "loss": 1.0106,
      "step": 3820
    },
    {
      "epoch": 0.2206391324411629,
      "grad_norm": 0.30186885595321655,
      "learning_rate": 0.00019126642667789654,
      "loss": 0.9785,
      "step": 3825
    },
    {
      "epoch": 0.22092754960775265,
      "grad_norm": 0.27561014890670776,
      "learning_rate": 0.00019122523183942879,
      "loss": 1.0386,
      "step": 3830
    },
    {
      "epoch": 0.2212159667743424,
      "grad_norm": 0.3077535331249237,
      "learning_rate": 0.00019118394453239006,
      "loss": 1.0155,
      "step": 3835
    },
    {
      "epoch": 0.22150438394093216,
      "grad_norm": 0.3126158118247986,
      "learning_rate": 0.00019114256479863038,
      "loss": 0.9581,
      "step": 3840
    },
    {
      "epoch": 0.22179280110752192,
      "grad_norm": 0.275310754776001,
      "learning_rate": 0.00019110109268009347,
      "loss": 1.0005,
      "step": 3845
    },
    {
      "epoch": 0.22208121827411167,
      "grad_norm": 0.31148555874824524,
      "learning_rate": 0.00019105952821881668,
      "loss": 1.0132,
      "step": 3850
    },
    {
      "epoch": 0.22236963544070143,
      "grad_norm": 0.2904588580131531,
      "learning_rate": 0.00019101787145693098,
      "loss": 0.9738,
      "step": 3855
    },
    {
      "epoch": 0.22265805260729118,
      "grad_norm": 0.28576961159706116,
      "learning_rate": 0.00019097612243666086,
      "loss": 0.952,
      "step": 3860
    },
    {
      "epoch": 0.22294646977388094,
      "grad_norm": 0.31309062242507935,
      "learning_rate": 0.0001909342812003244,
      "loss": 0.9586,
      "step": 3865
    },
    {
      "epoch": 0.2232348869404707,
      "grad_norm": 0.30974939465522766,
      "learning_rate": 0.00019089234779033306,
      "loss": 0.9903,
      "step": 3870
    },
    {
      "epoch": 0.22352330410706045,
      "grad_norm": 0.2999672293663025,
      "learning_rate": 0.00019085032224919177,
      "loss": 0.9514,
      "step": 3875
    },
    {
      "epoch": 0.2238117212736502,
      "grad_norm": 0.2947905957698822,
      "learning_rate": 0.00019080820461949886,
      "loss": 0.9602,
      "step": 3880
    },
    {
      "epoch": 0.22410013844023996,
      "grad_norm": 0.2898218333721161,
      "learning_rate": 0.00019076599494394602,
      "loss": 1.0081,
      "step": 3885
    },
    {
      "epoch": 0.22438855560682971,
      "grad_norm": 0.27696529030799866,
      "learning_rate": 0.00019072369326531824,
      "loss": 0.9245,
      "step": 3890
    },
    {
      "epoch": 0.22467697277341947,
      "grad_norm": 0.30240634083747864,
      "learning_rate": 0.00019068129962649365,
      "loss": 0.9759,
      "step": 3895
    },
    {
      "epoch": 0.22496538994000922,
      "grad_norm": 0.3089566230773926,
      "learning_rate": 0.00019063881407044373,
      "loss": 0.9144,
      "step": 3900
    },
    {
      "epoch": 0.22525380710659898,
      "grad_norm": 0.3179451823234558,
      "learning_rate": 0.00019059623664023311,
      "loss": 1.0379,
      "step": 3905
    },
    {
      "epoch": 0.22554222427318874,
      "grad_norm": 0.29353705048561096,
      "learning_rate": 0.00019055356737901952,
      "loss": 1.0612,
      "step": 3910
    },
    {
      "epoch": 0.2258306414397785,
      "grad_norm": 0.317120224237442,
      "learning_rate": 0.00019051080633005372,
      "loss": 0.9758,
      "step": 3915
    },
    {
      "epoch": 0.22611905860636825,
      "grad_norm": 0.29097092151641846,
      "learning_rate": 0.00019046795353667965,
      "loss": 1.0292,
      "step": 3920
    },
    {
      "epoch": 0.226407475772958,
      "grad_norm": 0.3430017828941345,
      "learning_rate": 0.00019042500904233408,
      "loss": 0.9483,
      "step": 3925
    },
    {
      "epoch": 0.22669589293954776,
      "grad_norm": 0.31500279903411865,
      "learning_rate": 0.00019038197289054684,
      "loss": 0.9524,
      "step": 3930
    },
    {
      "epoch": 0.2269843101061375,
      "grad_norm": 0.2940191328525543,
      "learning_rate": 0.00019033884512494064,
      "loss": 0.9503,
      "step": 3935
    },
    {
      "epoch": 0.22727272727272727,
      "grad_norm": 0.37484368681907654,
      "learning_rate": 0.00019029562578923106,
      "loss": 0.9885,
      "step": 3940
    },
    {
      "epoch": 0.22756114443931702,
      "grad_norm": 0.3004748225212097,
      "learning_rate": 0.00019025231492722643,
      "loss": 0.9923,
      "step": 3945
    },
    {
      "epoch": 0.22784956160590678,
      "grad_norm": 0.30483829975128174,
      "learning_rate": 0.000190208912582828,
      "loss": 0.9507,
      "step": 3950
    },
    {
      "epoch": 0.22813797877249653,
      "grad_norm": 0.30382221937179565,
      "learning_rate": 0.0001901654188000296,
      "loss": 0.9534,
      "step": 3955
    },
    {
      "epoch": 0.22842639593908629,
      "grad_norm": 0.3174874484539032,
      "learning_rate": 0.0001901218336229178,
      "loss": 1.0329,
      "step": 3960
    },
    {
      "epoch": 0.22871481310567604,
      "grad_norm": 0.3040885925292969,
      "learning_rate": 0.00019007815709567183,
      "loss": 0.978,
      "step": 3965
    },
    {
      "epoch": 0.2290032302722658,
      "grad_norm": 0.3258110284805298,
      "learning_rate": 0.0001900343892625635,
      "loss": 1.0537,
      "step": 3970
    },
    {
      "epoch": 0.22929164743885555,
      "grad_norm": 0.2845390737056732,
      "learning_rate": 0.00018999053016795719,
      "loss": 0.9592,
      "step": 3975
    },
    {
      "epoch": 0.2295800646054453,
      "grad_norm": 0.3034794330596924,
      "learning_rate": 0.00018994657985630972,
      "loss": 0.9808,
      "step": 3980
    },
    {
      "epoch": 0.22986848177203506,
      "grad_norm": 0.3224650025367737,
      "learning_rate": 0.00018990253837217042,
      "loss": 0.9952,
      "step": 3985
    },
    {
      "epoch": 0.23015689893862482,
      "grad_norm": 0.3139420449733734,
      "learning_rate": 0.00018985840576018107,
      "loss": 0.9499,
      "step": 3990
    },
    {
      "epoch": 0.23044531610521457,
      "grad_norm": 0.2906229794025421,
      "learning_rate": 0.00018981418206507575,
      "loss": 0.9593,
      "step": 3995
    },
    {
      "epoch": 0.23073373327180433,
      "grad_norm": 0.28899601101875305,
      "learning_rate": 0.00018976986733168093,
      "loss": 1.0203,
      "step": 4000
    },
    {
      "epoch": 0.23102215043839408,
      "grad_norm": 0.3095887303352356,
      "learning_rate": 0.00018972546160491528,
      "loss": 1.0639,
      "step": 4005
    },
    {
      "epoch": 0.23131056760498384,
      "grad_norm": 0.2887146472930908,
      "learning_rate": 0.00018968096492978976,
      "loss": 0.9895,
      "step": 4010
    },
    {
      "epoch": 0.2315989847715736,
      "grad_norm": 0.2964550256729126,
      "learning_rate": 0.0001896363773514075,
      "loss": 0.9801,
      "step": 4015
    },
    {
      "epoch": 0.23188740193816337,
      "grad_norm": 0.319967657327652,
      "learning_rate": 0.0001895916989149638,
      "loss": 1.0451,
      "step": 4020
    },
    {
      "epoch": 0.23217581910475313,
      "grad_norm": 0.33027443289756775,
      "learning_rate": 0.000189546929665746,
      "loss": 1.0692,
      "step": 4025
    },
    {
      "epoch": 0.23246423627134288,
      "grad_norm": 0.29336920380592346,
      "learning_rate": 0.00018950206964913355,
      "loss": 0.9877,
      "step": 4030
    },
    {
      "epoch": 0.23275265343793264,
      "grad_norm": 0.29890185594558716,
      "learning_rate": 0.0001894571189105979,
      "loss": 0.9255,
      "step": 4035
    },
    {
      "epoch": 0.2330410706045224,
      "grad_norm": 0.30321866273880005,
      "learning_rate": 0.00018941207749570237,
      "loss": 1.0392,
      "step": 4040
    },
    {
      "epoch": 0.23332948777111215,
      "grad_norm": 0.3135153651237488,
      "learning_rate": 0.00018936694545010232,
      "loss": 0.9713,
      "step": 4045
    },
    {
      "epoch": 0.2336179049377019,
      "grad_norm": 0.29142776131629944,
      "learning_rate": 0.0001893217228195449,
      "loss": 1.0032,
      "step": 4050
    },
    {
      "epoch": 0.23390632210429166,
      "grad_norm": 0.3098786771297455,
      "learning_rate": 0.0001892764096498691,
      "loss": 1.0406,
      "step": 4055
    },
    {
      "epoch": 0.2341947392708814,
      "grad_norm": 0.3293483257293701,
      "learning_rate": 0.00018923100598700561,
      "loss": 1.0142,
      "step": 4060
    },
    {
      "epoch": 0.23448315643747117,
      "grad_norm": 0.2999286651611328,
      "learning_rate": 0.00018918551187697703,
      "loss": 0.9461,
      "step": 4065
    },
    {
      "epoch": 0.23477157360406092,
      "grad_norm": 0.3046085238456726,
      "learning_rate": 0.00018913992736589746,
      "loss": 0.9985,
      "step": 4070
    },
    {
      "epoch": 0.23505999077065068,
      "grad_norm": 0.2792486846446991,
      "learning_rate": 0.00018909425249997267,
      "loss": 0.99,
      "step": 4075
    },
    {
      "epoch": 0.23534840793724043,
      "grad_norm": 0.30676671862602234,
      "learning_rate": 0.0001890484873255001,
      "loss": 0.9918,
      "step": 4080
    },
    {
      "epoch": 0.2356368251038302,
      "grad_norm": 0.2913929522037506,
      "learning_rate": 0.00018900263188886864,
      "loss": 0.9612,
      "step": 4085
    },
    {
      "epoch": 0.23592524227041994,
      "grad_norm": 0.32379987835884094,
      "learning_rate": 0.00018895668623655873,
      "loss": 0.9277,
      "step": 4090
    },
    {
      "epoch": 0.2362136594370097,
      "grad_norm": 0.33832383155822754,
      "learning_rate": 0.00018891065041514224,
      "loss": 0.9477,
      "step": 4095
    },
    {
      "epoch": 0.23650207660359945,
      "grad_norm": 0.31692832708358765,
      "learning_rate": 0.0001888645244712824,
      "loss": 0.96,
      "step": 4100
    },
    {
      "epoch": 0.2367904937701892,
      "grad_norm": 0.3002290725708008,
      "learning_rate": 0.0001888183084517338,
      "loss": 0.9275,
      "step": 4105
    },
    {
      "epoch": 0.23707891093677896,
      "grad_norm": 0.27560725808143616,
      "learning_rate": 0.00018877200240334236,
      "loss": 1.0375,
      "step": 4110
    },
    {
      "epoch": 0.23736732810336872,
      "grad_norm": 0.3038588762283325,
      "learning_rate": 0.0001887256063730453,
      "loss": 1.0229,
      "step": 4115
    },
    {
      "epoch": 0.23765574526995847,
      "grad_norm": 0.2903522253036499,
      "learning_rate": 0.00018867912040787096,
      "loss": 1.0118,
      "step": 4120
    },
    {
      "epoch": 0.23794416243654823,
      "grad_norm": 0.3210826516151428,
      "learning_rate": 0.0001886325445549389,
      "loss": 0.9883,
      "step": 4125
    },
    {
      "epoch": 0.23823257960313798,
      "grad_norm": 0.375205934047699,
      "learning_rate": 0.00018858587886145975,
      "loss": 0.9813,
      "step": 4130
    },
    {
      "epoch": 0.23852099676972774,
      "grad_norm": 0.3124467134475708,
      "learning_rate": 0.0001885391233747352,
      "loss": 0.9015,
      "step": 4135
    },
    {
      "epoch": 0.2388094139363175,
      "grad_norm": 0.3262174725532532,
      "learning_rate": 0.00018849227814215805,
      "loss": 0.8783,
      "step": 4140
    },
    {
      "epoch": 0.23909783110290725,
      "grad_norm": 0.31825995445251465,
      "learning_rate": 0.00018844534321121195,
      "loss": 1.0335,
      "step": 4145
    },
    {
      "epoch": 0.239386248269497,
      "grad_norm": 0.3026926517486572,
      "learning_rate": 0.00018839831862947152,
      "loss": 0.9791,
      "step": 4150
    },
    {
      "epoch": 0.23967466543608676,
      "grad_norm": 0.32558876276016235,
      "learning_rate": 0.0001883512044446023,
      "loss": 1.0042,
      "step": 4155
    },
    {
      "epoch": 0.23996308260267651,
      "grad_norm": 0.2892070412635803,
      "learning_rate": 0.00018830400070436057,
      "loss": 0.8757,
      "step": 4160
    },
    {
      "epoch": 0.24025149976926627,
      "grad_norm": 0.31175941228866577,
      "learning_rate": 0.00018825670745659345,
      "loss": 0.986,
      "step": 4165
    },
    {
      "epoch": 0.24053991693585602,
      "grad_norm": 0.3003990054130554,
      "learning_rate": 0.00018820932474923873,
      "loss": 0.9732,
      "step": 4170
    },
    {
      "epoch": 0.24082833410244578,
      "grad_norm": 0.29946771264076233,
      "learning_rate": 0.00018816185263032496,
      "loss": 0.985,
      "step": 4175
    },
    {
      "epoch": 0.24111675126903553,
      "grad_norm": 0.29952332377433777,
      "learning_rate": 0.00018811429114797123,
      "loss": 0.9689,
      "step": 4180
    },
    {
      "epoch": 0.2414051684356253,
      "grad_norm": 0.3125024735927582,
      "learning_rate": 0.00018806664035038727,
      "loss": 0.971,
      "step": 4185
    },
    {
      "epoch": 0.24169358560221504,
      "grad_norm": 0.338549941778183,
      "learning_rate": 0.00018801890028587333,
      "loss": 0.9967,
      "step": 4190
    },
    {
      "epoch": 0.2419820027688048,
      "grad_norm": 0.3147549033164978,
      "learning_rate": 0.00018797107100282015,
      "loss": 1.0016,
      "step": 4195
    },
    {
      "epoch": 0.24227041993539455,
      "grad_norm": 0.28421711921691895,
      "learning_rate": 0.0001879231525497089,
      "loss": 0.9418,
      "step": 4200
    },
    {
      "epoch": 0.2425588371019843,
      "grad_norm": 0.3105412721633911,
      "learning_rate": 0.00018787514497511104,
      "loss": 1.0044,
      "step": 4205
    },
    {
      "epoch": 0.24284725426857406,
      "grad_norm": 0.2936135530471802,
      "learning_rate": 0.0001878270483276886,
      "loss": 0.9557,
      "step": 4210
    },
    {
      "epoch": 0.24313567143516382,
      "grad_norm": 0.3218764662742615,
      "learning_rate": 0.00018777886265619365,
      "loss": 0.9989,
      "step": 4215
    },
    {
      "epoch": 0.24342408860175357,
      "grad_norm": 0.29364484548568726,
      "learning_rate": 0.00018773058800946858,
      "loss": 0.9341,
      "step": 4220
    },
    {
      "epoch": 0.24371250576834333,
      "grad_norm": 0.29040706157684326,
      "learning_rate": 0.0001876822244364461,
      "loss": 0.9869,
      "step": 4225
    },
    {
      "epoch": 0.24400092293493308,
      "grad_norm": 0.31661713123321533,
      "learning_rate": 0.00018763377198614887,
      "loss": 0.9548,
      "step": 4230
    },
    {
      "epoch": 0.24428934010152284,
      "grad_norm": 0.30058059096336365,
      "learning_rate": 0.00018758523070768973,
      "loss": 0.9072,
      "step": 4235
    },
    {
      "epoch": 0.2445777572681126,
      "grad_norm": 0.3189939856529236,
      "learning_rate": 0.00018753660065027152,
      "loss": 0.9999,
      "step": 4240
    },
    {
      "epoch": 0.24486617443470235,
      "grad_norm": 0.3253864645957947,
      "learning_rate": 0.00018748788186318712,
      "loss": 0.9708,
      "step": 4245
    },
    {
      "epoch": 0.2451545916012921,
      "grad_norm": 0.307716429233551,
      "learning_rate": 0.00018743907439581933,
      "loss": 0.9375,
      "step": 4250
    },
    {
      "epoch": 0.24544300876788186,
      "grad_norm": 0.2934640049934387,
      "learning_rate": 0.00018739017829764082,
      "loss": 0.9647,
      "step": 4255
    },
    {
      "epoch": 0.24573142593447161,
      "grad_norm": 0.3377256393432617,
      "learning_rate": 0.0001873411936182141,
      "loss": 0.9755,
      "step": 4260
    },
    {
      "epoch": 0.24601984310106137,
      "grad_norm": 0.3084704875946045,
      "learning_rate": 0.0001872921204071915,
      "loss": 1.0172,
      "step": 4265
    },
    {
      "epoch": 0.24630826026765112,
      "grad_norm": 0.3088560402393341,
      "learning_rate": 0.000187242958714315,
      "loss": 0.9861,
      "step": 4270
    },
    {
      "epoch": 0.24659667743424088,
      "grad_norm": 0.28719452023506165,
      "learning_rate": 0.00018719370858941644,
      "loss": 0.9762,
      "step": 4275
    },
    {
      "epoch": 0.24688509460083063,
      "grad_norm": 0.31891629099845886,
      "learning_rate": 0.00018714437008241709,
      "loss": 1.0395,
      "step": 4280
    },
    {
      "epoch": 0.2471735117674204,
      "grad_norm": 0.32796710729599,
      "learning_rate": 0.000187094943243328,
      "loss": 0.967,
      "step": 4285
    },
    {
      "epoch": 0.24746192893401014,
      "grad_norm": 0.3454214930534363,
      "learning_rate": 0.00018704542812224956,
      "loss": 0.938,
      "step": 4290
    },
    {
      "epoch": 0.2477503461005999,
      "grad_norm": 0.2978779375553131,
      "learning_rate": 0.00018699582476937185,
      "loss": 0.981,
      "step": 4295
    },
    {
      "epoch": 0.24803876326718965,
      "grad_norm": 0.3401256501674652,
      "learning_rate": 0.00018694613323497422,
      "loss": 1.0089,
      "step": 4300
    },
    {
      "epoch": 0.2483271804337794,
      "grad_norm": 0.32507994771003723,
      "learning_rate": 0.0001868963535694255,
      "loss": 1.0443,
      "step": 4305
    },
    {
      "epoch": 0.24861559760036916,
      "grad_norm": 0.31554827094078064,
      "learning_rate": 0.0001868464858231838,
      "loss": 1.0414,
      "step": 4310
    },
    {
      "epoch": 0.24890401476695892,
      "grad_norm": 0.3291451632976532,
      "learning_rate": 0.00018679653004679655,
      "loss": 0.9676,
      "step": 4315
    },
    {
      "epoch": 0.24919243193354867,
      "grad_norm": 0.3101532459259033,
      "learning_rate": 0.0001867464862909004,
      "loss": 0.955,
      "step": 4320
    },
    {
      "epoch": 0.24948084910013843,
      "grad_norm": 0.29966261982917786,
      "learning_rate": 0.00018669635460622107,
      "loss": 0.9035,
      "step": 4325
    },
    {
      "epoch": 0.24976926626672818,
      "grad_norm": 0.2881443500518799,
      "learning_rate": 0.00018664613504357366,
      "loss": 0.9708,
      "step": 4330
    },
    {
      "epoch": 0.25005768343331797,
      "grad_norm": 0.29754626750946045,
      "learning_rate": 0.00018659582765386204,
      "loss": 1.0263,
      "step": 4335
    },
    {
      "epoch": 0.2503461005999077,
      "grad_norm": 0.3321414291858673,
      "learning_rate": 0.0001865454324880794,
      "loss": 0.9843,
      "step": 4340
    },
    {
      "epoch": 0.2506345177664975,
      "grad_norm": 0.32111719250679016,
      "learning_rate": 0.00018649494959730765,
      "loss": 1.0291,
      "step": 4345
    },
    {
      "epoch": 0.2509229349330872,
      "grad_norm": 0.3495931327342987,
      "learning_rate": 0.00018644437903271778,
      "loss": 1.0373,
      "step": 4350
    },
    {
      "epoch": 0.251211352099677,
      "grad_norm": 0.30436307191848755,
      "learning_rate": 0.0001863937208455696,
      "loss": 0.9767,
      "step": 4355
    },
    {
      "epoch": 0.2514997692662667,
      "grad_norm": 0.3309740126132965,
      "learning_rate": 0.00018634297508721167,
      "loss": 0.9387,
      "step": 4360
    },
    {
      "epoch": 0.2517881864328565,
      "grad_norm": 0.300322949886322,
      "learning_rate": 0.00018629214180908144,
      "loss": 1.0123,
      "step": 4365
    },
    {
      "epoch": 0.2520766035994462,
      "grad_norm": 0.3226313591003418,
      "learning_rate": 0.00018624122106270506,
      "loss": 0.9499,
      "step": 4370
    },
    {
      "epoch": 0.252365020766036,
      "grad_norm": 0.32126346230506897,
      "learning_rate": 0.00018619021289969717,
      "loss": 0.9617,
      "step": 4375
    },
    {
      "epoch": 0.25265343793262574,
      "grad_norm": 0.2929309010505676,
      "learning_rate": 0.00018613911737176125,
      "loss": 0.9452,
      "step": 4380
    },
    {
      "epoch": 0.2529418550992155,
      "grad_norm": 0.29882681369781494,
      "learning_rate": 0.00018608793453068914,
      "loss": 0.9957,
      "step": 4385
    },
    {
      "epoch": 0.25323027226580525,
      "grad_norm": 0.2783080041408539,
      "learning_rate": 0.0001860366644283613,
      "loss": 0.9397,
      "step": 4390
    },
    {
      "epoch": 0.25351868943239503,
      "grad_norm": 0.2922220230102539,
      "learning_rate": 0.00018598530711674667,
      "loss": 0.9619,
      "step": 4395
    },
    {
      "epoch": 0.25380710659898476,
      "grad_norm": 0.2756292223930359,
      "learning_rate": 0.00018593386264790243,
      "loss": 0.9608,
      "step": 4400
    },
    {
      "epoch": 0.25409552376557454,
      "grad_norm": 0.32587939500808716,
      "learning_rate": 0.00018588233107397429,
      "loss": 0.8999,
      "step": 4405
    },
    {
      "epoch": 0.25438394093216427,
      "grad_norm": 0.301612913608551,
      "learning_rate": 0.00018583071244719607,
      "loss": 0.909,
      "step": 4410
    },
    {
      "epoch": 0.25467235809875405,
      "grad_norm": 0.3122866153717041,
      "learning_rate": 0.00018577900681989,
      "loss": 0.9398,
      "step": 4415
    },
    {
      "epoch": 0.2549607752653438,
      "grad_norm": 0.30573856830596924,
      "learning_rate": 0.0001857272142444664,
      "loss": 0.9165,
      "step": 4420
    },
    {
      "epoch": 0.25524919243193356,
      "grad_norm": 0.29823189973831177,
      "learning_rate": 0.00018567533477342377,
      "loss": 0.9528,
      "step": 4425
    },
    {
      "epoch": 0.2555376095985233,
      "grad_norm": 0.3344714641571045,
      "learning_rate": 0.0001856233684593486,
      "loss": 0.9577,
      "step": 4430
    },
    {
      "epoch": 0.25582602676511307,
      "grad_norm": 0.29007846117019653,
      "learning_rate": 0.0001855713153549155,
      "loss": 0.944,
      "step": 4435
    },
    {
      "epoch": 0.2561144439317028,
      "grad_norm": 0.2928242087364197,
      "learning_rate": 0.00018551917551288706,
      "loss": 0.9878,
      "step": 4440
    },
    {
      "epoch": 0.2564028610982926,
      "grad_norm": 0.3003365695476532,
      "learning_rate": 0.0001854669489861137,
      "loss": 0.9784,
      "step": 4445
    },
    {
      "epoch": 0.2566912782648823,
      "grad_norm": 0.30604249238967896,
      "learning_rate": 0.0001854146358275338,
      "loss": 0.9803,
      "step": 4450
    },
    {
      "epoch": 0.2569796954314721,
      "grad_norm": 0.31301596760749817,
      "learning_rate": 0.00018536223609017348,
      "loss": 1.0573,
      "step": 4455
    },
    {
      "epoch": 0.2572681125980618,
      "grad_norm": 0.30836206674575806,
      "learning_rate": 0.00018530974982714667,
      "loss": 0.9928,
      "step": 4460
    },
    {
      "epoch": 0.2575565297646516,
      "grad_norm": 0.3122254014015198,
      "learning_rate": 0.00018525717709165498,
      "loss": 1.0245,
      "step": 4465
    },
    {
      "epoch": 0.2578449469312413,
      "grad_norm": 0.29952389001846313,
      "learning_rate": 0.0001852045179369877,
      "loss": 1.0159,
      "step": 4470
    },
    {
      "epoch": 0.2581333640978311,
      "grad_norm": 0.2811339199542999,
      "learning_rate": 0.00018515177241652163,
      "loss": 0.9483,
      "step": 4475
    },
    {
      "epoch": 0.25842178126442084,
      "grad_norm": 0.3140300512313843,
      "learning_rate": 0.0001850989405837212,
      "loss": 0.98,
      "step": 4480
    },
    {
      "epoch": 0.2587101984310106,
      "grad_norm": 0.3146283030509949,
      "learning_rate": 0.00018504602249213838,
      "loss": 1.0204,
      "step": 4485
    },
    {
      "epoch": 0.25899861559760035,
      "grad_norm": 0.28882843255996704,
      "learning_rate": 0.0001849930181954124,
      "loss": 0.995,
      "step": 4490
    },
    {
      "epoch": 0.25928703276419013,
      "grad_norm": 0.35614368319511414,
      "learning_rate": 0.00018493992774727005,
      "loss": 1.0179,
      "step": 4495
    },
    {
      "epoch": 0.25957544993077986,
      "grad_norm": 0.3043900728225708,
      "learning_rate": 0.00018488675120152532,
      "loss": 0.9413,
      "step": 4500
    },
    {
      "epoch": 0.25986386709736964,
      "grad_norm": 0.2888356149196625,
      "learning_rate": 0.00018483348861207953,
      "loss": 0.9917,
      "step": 4505
    },
    {
      "epoch": 0.26015228426395937,
      "grad_norm": 0.31191486120224,
      "learning_rate": 0.00018478014003292116,
      "loss": 0.9503,
      "step": 4510
    },
    {
      "epoch": 0.26044070143054915,
      "grad_norm": 0.2871573269367218,
      "learning_rate": 0.00018472670551812596,
      "loss": 1.0236,
      "step": 4515
    },
    {
      "epoch": 0.2607291185971389,
      "grad_norm": 0.3728832006454468,
      "learning_rate": 0.0001846731851218567,
      "loss": 1.0037,
      "step": 4520
    },
    {
      "epoch": 0.26101753576372866,
      "grad_norm": 0.27702075242996216,
      "learning_rate": 0.00018461957889836324,
      "loss": 0.9536,
      "step": 4525
    },
    {
      "epoch": 0.2613059529303184,
      "grad_norm": 0.2843487560749054,
      "learning_rate": 0.00018456588690198236,
      "loss": 0.974,
      "step": 4530
    },
    {
      "epoch": 0.26159437009690817,
      "grad_norm": 0.3026067912578583,
      "learning_rate": 0.0001845121091871379,
      "loss": 1.0121,
      "step": 4535
    },
    {
      "epoch": 0.2618827872634979,
      "grad_norm": 0.299246221780777,
      "learning_rate": 0.0001844582458083405,
      "loss": 0.9328,
      "step": 4540
    },
    {
      "epoch": 0.2621712044300877,
      "grad_norm": 0.29690268635749817,
      "learning_rate": 0.0001844042968201877,
      "loss": 0.9492,
      "step": 4545
    },
    {
      "epoch": 0.26245962159667746,
      "grad_norm": 0.29138097167015076,
      "learning_rate": 0.0001843502622773637,
      "loss": 0.9715,
      "step": 4550
    },
    {
      "epoch": 0.2627480387632672,
      "grad_norm": 0.2924482822418213,
      "learning_rate": 0.0001842961422346396,
      "loss": 0.9897,
      "step": 4555
    },
    {
      "epoch": 0.26303645592985697,
      "grad_norm": 0.28473740816116333,
      "learning_rate": 0.00018424193674687297,
      "loss": 1.0282,
      "step": 4560
    },
    {
      "epoch": 0.2633248730964467,
      "grad_norm": 0.3194859027862549,
      "learning_rate": 0.00018418764586900817,
      "loss": 0.9995,
      "step": 4565
    },
    {
      "epoch": 0.2636132902630365,
      "grad_norm": 0.31165921688079834,
      "learning_rate": 0.00018413326965607593,
      "loss": 1.0285,
      "step": 4570
    },
    {
      "epoch": 0.2639017074296262,
      "grad_norm": 0.28910648822784424,
      "learning_rate": 0.00018407880816319363,
      "loss": 0.9465,
      "step": 4575
    },
    {
      "epoch": 0.264190124596216,
      "grad_norm": 0.3027464747428894,
      "learning_rate": 0.00018402426144556504,
      "loss": 0.9554,
      "step": 4580
    },
    {
      "epoch": 0.2644785417628057,
      "grad_norm": 0.3191346824169159,
      "learning_rate": 0.0001839696295584803,
      "loss": 1.0284,
      "step": 4585
    },
    {
      "epoch": 0.2647669589293955,
      "grad_norm": 0.32781797647476196,
      "learning_rate": 0.0001839149125573159,
      "loss": 0.9761,
      "step": 4590
    },
    {
      "epoch": 0.26505537609598523,
      "grad_norm": 0.28181716799736023,
      "learning_rate": 0.0001838601104975346,
      "loss": 1.0894,
      "step": 4595
    },
    {
      "epoch": 0.265343793262575,
      "grad_norm": 0.35118234157562256,
      "learning_rate": 0.00018380522343468532,
      "loss": 0.9843,
      "step": 4600
    },
    {
      "epoch": 0.26563221042916474,
      "grad_norm": 0.30681881308555603,
      "learning_rate": 0.0001837502514244033,
      "loss": 1.0639,
      "step": 4605
    },
    {
      "epoch": 0.2659206275957545,
      "grad_norm": 0.3133811056613922,
      "learning_rate": 0.00018369519452240973,
      "loss": 1.0317,
      "step": 4610
    },
    {
      "epoch": 0.26620904476234425,
      "grad_norm": 0.3321933150291443,
      "learning_rate": 0.00018364005278451187,
      "loss": 0.9626,
      "step": 4615
    },
    {
      "epoch": 0.26649746192893403,
      "grad_norm": 0.30032068490982056,
      "learning_rate": 0.00018358482626660303,
      "loss": 1.0235,
      "step": 4620
    },
    {
      "epoch": 0.26678587909552376,
      "grad_norm": 0.315247118473053,
      "learning_rate": 0.00018352951502466244,
      "loss": 1.0141,
      "step": 4625
    },
    {
      "epoch": 0.26707429626211354,
      "grad_norm": 0.2941517233848572,
      "learning_rate": 0.0001834741191147552,
      "loss": 0.9924,
      "step": 4630
    },
    {
      "epoch": 0.26736271342870327,
      "grad_norm": 0.30521127581596375,
      "learning_rate": 0.00018341863859303218,
      "loss": 1.0182,
      "step": 4635
    },
    {
      "epoch": 0.26765113059529305,
      "grad_norm": 0.3334304392337799,
      "learning_rate": 0.00018336307351573018,
      "loss": 0.9819,
      "step": 4640
    },
    {
      "epoch": 0.2679395477618828,
      "grad_norm": 0.28640317916870117,
      "learning_rate": 0.00018330742393917143,
      "loss": 1.0039,
      "step": 4645
    },
    {
      "epoch": 0.26822796492847256,
      "grad_norm": 0.30890411138534546,
      "learning_rate": 0.00018325168991976408,
      "loss": 1.0092,
      "step": 4650
    },
    {
      "epoch": 0.2685163820950623,
      "grad_norm": 0.29789072275161743,
      "learning_rate": 0.00018319587151400174,
      "loss": 1.0011,
      "step": 4655
    },
    {
      "epoch": 0.26880479926165207,
      "grad_norm": 0.2906172275543213,
      "learning_rate": 0.00018313996877846361,
      "loss": 0.9535,
      "step": 4660
    },
    {
      "epoch": 0.2690932164282418,
      "grad_norm": 0.2868962585926056,
      "learning_rate": 0.00018308398176981433,
      "loss": 1.0084,
      "step": 4665
    },
    {
      "epoch": 0.2693816335948316,
      "grad_norm": 0.3024742007255554,
      "learning_rate": 0.00018302791054480394,
      "loss": 1.05,
      "step": 4670
    },
    {
      "epoch": 0.2696700507614213,
      "grad_norm": 0.29981881380081177,
      "learning_rate": 0.00018297175516026788,
      "loss": 0.9848,
      "step": 4675
    },
    {
      "epoch": 0.2699584679280111,
      "grad_norm": 0.303254634141922,
      "learning_rate": 0.00018291551567312694,
      "loss": 0.9698,
      "step": 4680
    },
    {
      "epoch": 0.2702468850946008,
      "grad_norm": 0.3180643618106842,
      "learning_rate": 0.0001828591921403871,
      "loss": 1.0005,
      "step": 4685
    },
    {
      "epoch": 0.2705353022611906,
      "grad_norm": 0.300870805978775,
      "learning_rate": 0.00018280278461913952,
      "loss": 0.9951,
      "step": 4690
    },
    {
      "epoch": 0.27082371942778033,
      "grad_norm": 0.30927881598472595,
      "learning_rate": 0.00018274629316656054,
      "loss": 0.9021,
      "step": 4695
    },
    {
      "epoch": 0.2711121365943701,
      "grad_norm": 0.310472697019577,
      "learning_rate": 0.00018268971783991152,
      "loss": 1.0217,
      "step": 4700
    },
    {
      "epoch": 0.27140055376095984,
      "grad_norm": 0.33175238966941833,
      "learning_rate": 0.00018263305869653892,
      "loss": 0.9618,
      "step": 4705
    },
    {
      "epoch": 0.2716889709275496,
      "grad_norm": 0.333126038312912,
      "learning_rate": 0.00018257631579387412,
      "loss": 1.0605,
      "step": 4710
    },
    {
      "epoch": 0.27197738809413935,
      "grad_norm": 0.32339242100715637,
      "learning_rate": 0.00018251948918943334,
      "loss": 1.0171,
      "step": 4715
    },
    {
      "epoch": 0.27226580526072913,
      "grad_norm": 0.28846561908721924,
      "learning_rate": 0.0001824625789408177,
      "loss": 0.9603,
      "step": 4720
    },
    {
      "epoch": 0.27255422242731886,
      "grad_norm": 0.2988503873348236,
      "learning_rate": 0.0001824055851057131,
      "loss": 0.9954,
      "step": 4725
    },
    {
      "epoch": 0.27284263959390864,
      "grad_norm": 0.2900153398513794,
      "learning_rate": 0.00018234850774189018,
      "loss": 0.8959,
      "step": 4730
    },
    {
      "epoch": 0.27313105676049837,
      "grad_norm": 0.3061988353729248,
      "learning_rate": 0.00018229134690720425,
      "loss": 0.9985,
      "step": 4735
    },
    {
      "epoch": 0.27341947392708815,
      "grad_norm": 0.323887437582016,
      "learning_rate": 0.00018223410265959516,
      "loss": 0.9946,
      "step": 4740
    },
    {
      "epoch": 0.2737078910936779,
      "grad_norm": 0.28910937905311584,
      "learning_rate": 0.00018217677505708737,
      "loss": 0.9593,
      "step": 4745
    },
    {
      "epoch": 0.27399630826026766,
      "grad_norm": 0.3210904896259308,
      "learning_rate": 0.00018211936415778984,
      "loss": 0.9201,
      "step": 4750
    },
    {
      "epoch": 0.2742847254268574,
      "grad_norm": 0.280989408493042,
      "learning_rate": 0.00018206187001989593,
      "loss": 0.9341,
      "step": 4755
    },
    {
      "epoch": 0.2745731425934472,
      "grad_norm": 0.3101036846637726,
      "learning_rate": 0.0001820042927016834,
      "loss": 0.9668,
      "step": 4760
    },
    {
      "epoch": 0.2748615597600369,
      "grad_norm": 0.3021515905857086,
      "learning_rate": 0.00018194663226151427,
      "loss": 0.9514,
      "step": 4765
    },
    {
      "epoch": 0.2751499769266267,
      "grad_norm": 0.30778107047080994,
      "learning_rate": 0.0001818888887578349,
      "loss": 0.967,
      "step": 4770
    },
    {
      "epoch": 0.2754383940932164,
      "grad_norm": 0.32916298508644104,
      "learning_rate": 0.00018183106224917576,
      "loss": 0.977,
      "step": 4775
    },
    {
      "epoch": 0.2757268112598062,
      "grad_norm": 0.3261403441429138,
      "learning_rate": 0.00018177315279415153,
      "loss": 0.9491,
      "step": 4780
    },
    {
      "epoch": 0.2760152284263959,
      "grad_norm": 0.3058185279369354,
      "learning_rate": 0.0001817151604514609,
      "loss": 0.985,
      "step": 4785
    },
    {
      "epoch": 0.2763036455929857,
      "grad_norm": 0.2961861491203308,
      "learning_rate": 0.00018165708527988664,
      "loss": 0.966,
      "step": 4790
    },
    {
      "epoch": 0.27659206275957543,
      "grad_norm": 0.306802362203598,
      "learning_rate": 0.0001815989273382954,
      "loss": 0.9942,
      "step": 4795
    },
    {
      "epoch": 0.2768804799261652,
      "grad_norm": 0.2971879839897156,
      "learning_rate": 0.00018154068668563782,
      "loss": 0.9362,
      "step": 4800
    },
    {
      "epoch": 0.27716889709275494,
      "grad_norm": 0.3133615255355835,
      "learning_rate": 0.00018148236338094833,
      "loss": 0.9792,
      "step": 4805
    },
    {
      "epoch": 0.2774573142593447,
      "grad_norm": 0.287922739982605,
      "learning_rate": 0.00018142395748334513,
      "loss": 0.8758,
      "step": 4810
    },
    {
      "epoch": 0.27774573142593445,
      "grad_norm": 0.31516680121421814,
      "learning_rate": 0.00018136546905203016,
      "loss": 0.9796,
      "step": 4815
    },
    {
      "epoch": 0.27803414859252423,
      "grad_norm": 0.2937026917934418,
      "learning_rate": 0.000181306898146289,
      "loss": 0.9357,
      "step": 4820
    },
    {
      "epoch": 0.27832256575911396,
      "grad_norm": 0.28911980986595154,
      "learning_rate": 0.00018124824482549086,
      "loss": 1.0377,
      "step": 4825
    },
    {
      "epoch": 0.27861098292570374,
      "grad_norm": 0.31102392077445984,
      "learning_rate": 0.00018118950914908843,
      "loss": 0.9278,
      "step": 4830
    },
    {
      "epoch": 0.27889940009229347,
      "grad_norm": 0.34064656496047974,
      "learning_rate": 0.00018113069117661797,
      "loss": 0.9204,
      "step": 4835
    },
    {
      "epoch": 0.27918781725888325,
      "grad_norm": 0.31624704599380493,
      "learning_rate": 0.00018107179096769901,
      "loss": 0.9523,
      "step": 4840
    },
    {
      "epoch": 0.279476234425473,
      "grad_norm": 0.2988069951534271,
      "learning_rate": 0.00018101280858203462,
      "loss": 1.02,
      "step": 4845
    },
    {
      "epoch": 0.27976465159206276,
      "grad_norm": 0.3373366892337799,
      "learning_rate": 0.00018095374407941104,
      "loss": 1.0102,
      "step": 4850
    },
    {
      "epoch": 0.2800530687586525,
      "grad_norm": 0.3316998779773712,
      "learning_rate": 0.00018089459751969778,
      "loss": 1.0128,
      "step": 4855
    },
    {
      "epoch": 0.2803414859252423,
      "grad_norm": 0.30349063873291016,
      "learning_rate": 0.0001808353689628475,
      "loss": 0.8867,
      "step": 4860
    },
    {
      "epoch": 0.280629903091832,
      "grad_norm": 0.33365175127983093,
      "learning_rate": 0.0001807760584688961,
      "loss": 0.9744,
      "step": 4865
    },
    {
      "epoch": 0.2809183202584218,
      "grad_norm": 0.3064230978488922,
      "learning_rate": 0.0001807166660979623,
      "loss": 0.9424,
      "step": 4870
    },
    {
      "epoch": 0.2812067374250115,
      "grad_norm": 0.29559001326560974,
      "learning_rate": 0.00018065719191024808,
      "loss": 0.999,
      "step": 4875
    },
    {
      "epoch": 0.2814951545916013,
      "grad_norm": 0.33819547295570374,
      "learning_rate": 0.00018059763596603814,
      "loss": 1.0021,
      "step": 4880
    },
    {
      "epoch": 0.281783571758191,
      "grad_norm": 0.321237713098526,
      "learning_rate": 0.00018053799832570014,
      "loss": 0.9694,
      "step": 4885
    },
    {
      "epoch": 0.2820719889247808,
      "grad_norm": 0.3277602791786194,
      "learning_rate": 0.0001804782790496846,
      "loss": 0.9753,
      "step": 4890
    },
    {
      "epoch": 0.2823604060913706,
      "grad_norm": 0.2939560115337372,
      "learning_rate": 0.00018041847819852468,
      "loss": 0.9314,
      "step": 4895
    },
    {
      "epoch": 0.2826488232579603,
      "grad_norm": 0.30268988013267517,
      "learning_rate": 0.00018035859583283626,
      "loss": 0.973,
      "step": 4900
    },
    {
      "epoch": 0.2829372404245501,
      "grad_norm": 0.30159807205200195,
      "learning_rate": 0.00018029863201331783,
      "loss": 0.912,
      "step": 4905
    },
    {
      "epoch": 0.2832256575911398,
      "grad_norm": 0.3018011152744293,
      "learning_rate": 0.00018023858680075061,
      "loss": 1.0129,
      "step": 4910
    },
    {
      "epoch": 0.2835140747577296,
      "grad_norm": 0.3190675973892212,
      "learning_rate": 0.0001801784602559981,
      "loss": 0.9403,
      "step": 4915
    },
    {
      "epoch": 0.28380249192431933,
      "grad_norm": 0.32579466700553894,
      "learning_rate": 0.00018011825244000632,
      "loss": 0.9585,
      "step": 4920
    },
    {
      "epoch": 0.2840909090909091,
      "grad_norm": 0.28825148940086365,
      "learning_rate": 0.00018005796341380372,
      "loss": 0.9275,
      "step": 4925
    },
    {
      "epoch": 0.28437932625749884,
      "grad_norm": 0.3098837435245514,
      "learning_rate": 0.00017999759323850098,
      "loss": 1.0372,
      "step": 4930
    },
    {
      "epoch": 0.2846677434240886,
      "grad_norm": 0.3242741823196411,
      "learning_rate": 0.0001799371419752911,
      "loss": 0.9727,
      "step": 4935
    },
    {
      "epoch": 0.28495616059067835,
      "grad_norm": 0.31144097447395325,
      "learning_rate": 0.0001798766096854493,
      "loss": 0.926,
      "step": 4940
    },
    {
      "epoch": 0.28524457775726814,
      "grad_norm": 0.3283907175064087,
      "learning_rate": 0.0001798159964303328,
      "loss": 1.0166,
      "step": 4945
    },
    {
      "epoch": 0.28553299492385786,
      "grad_norm": 0.32130008935928345,
      "learning_rate": 0.00017975530227138105,
      "loss": 0.9836,
      "step": 4950
    },
    {
      "epoch": 0.28582141209044765,
      "grad_norm": 0.3087569773197174,
      "learning_rate": 0.00017969452727011536,
      "loss": 1.0186,
      "step": 4955
    },
    {
      "epoch": 0.2861098292570374,
      "grad_norm": 0.29481446743011475,
      "learning_rate": 0.00017963367148813913,
      "loss": 1.0168,
      "step": 4960
    },
    {
      "epoch": 0.28639824642362716,
      "grad_norm": 0.30410903692245483,
      "learning_rate": 0.0001795727349871375,
      "loss": 1.0112,
      "step": 4965
    },
    {
      "epoch": 0.2866866635902169,
      "grad_norm": 0.30060312151908875,
      "learning_rate": 0.0001795117178288775,
      "loss": 1.0348,
      "step": 4970
    },
    {
      "epoch": 0.28697508075680667,
      "grad_norm": 0.3111194670200348,
      "learning_rate": 0.00017945062007520797,
      "loss": 1.0389,
      "step": 4975
    },
    {
      "epoch": 0.2872634979233964,
      "grad_norm": 0.3265964090824127,
      "learning_rate": 0.00017938944178805933,
      "loss": 0.9882,
      "step": 4980
    },
    {
      "epoch": 0.2875519150899862,
      "grad_norm": 0.29788386821746826,
      "learning_rate": 0.0001793281830294437,
      "loss": 0.9822,
      "step": 4985
    },
    {
      "epoch": 0.2878403322565759,
      "grad_norm": 0.29151782393455505,
      "learning_rate": 0.00017926684386145478,
      "loss": 0.9768,
      "step": 4990
    },
    {
      "epoch": 0.2881287494231657,
      "grad_norm": 0.30184727907180786,
      "learning_rate": 0.0001792054243462677,
      "loss": 0.9629,
      "step": 4995
    },
    {
      "epoch": 0.2884171665897554,
      "grad_norm": 0.34121939539909363,
      "learning_rate": 0.00017914392454613913,
      "loss": 1.0261,
      "step": 5000
    },
    {
      "epoch": 0.2887055837563452,
      "grad_norm": 0.29807743430137634,
      "learning_rate": 0.00017908234452340707,
      "loss": 0.9572,
      "step": 5005
    },
    {
      "epoch": 0.2889940009229349,
      "grad_norm": 0.31767940521240234,
      "learning_rate": 0.00017902068434049077,
      "loss": 0.9559,
      "step": 5010
    },
    {
      "epoch": 0.2892824180895247,
      "grad_norm": 0.33446744084358215,
      "learning_rate": 0.0001789589440598909,
      "loss": 1.0301,
      "step": 5015
    },
    {
      "epoch": 0.28957083525611443,
      "grad_norm": 0.30399268865585327,
      "learning_rate": 0.00017889712374418912,
      "loss": 1.0066,
      "step": 5020
    },
    {
      "epoch": 0.2898592524227042,
      "grad_norm": 0.31030428409576416,
      "learning_rate": 0.0001788352234560484,
      "loss": 0.9751,
      "step": 5025
    },
    {
      "epoch": 0.29014766958929394,
      "grad_norm": 0.30979663133621216,
      "learning_rate": 0.00017877324325821264,
      "loss": 1.0192,
      "step": 5030
    },
    {
      "epoch": 0.2904360867558837,
      "grad_norm": 0.31306302547454834,
      "learning_rate": 0.0001787111832135068,
      "loss": 1.018,
      "step": 5035
    },
    {
      "epoch": 0.29072450392247345,
      "grad_norm": 0.2913960814476013,
      "learning_rate": 0.00017864904338483676,
      "loss": 0.954,
      "step": 5040
    },
    {
      "epoch": 0.29101292108906324,
      "grad_norm": 0.3290291428565979,
      "learning_rate": 0.00017858682383518928,
      "loss": 1.0345,
      "step": 5045
    },
    {
      "epoch": 0.29130133825565296,
      "grad_norm": 0.29978567361831665,
      "learning_rate": 0.00017852452462763192,
      "loss": 0.9141,
      "step": 5050
    },
    {
      "epoch": 0.29158975542224275,
      "grad_norm": 0.3192616403102875,
      "learning_rate": 0.00017846214582531298,
      "loss": 1.0308,
      "step": 5055
    },
    {
      "epoch": 0.2918781725888325,
      "grad_norm": 0.30386027693748474,
      "learning_rate": 0.00017839968749146142,
      "loss": 0.9891,
      "step": 5060
    },
    {
      "epoch": 0.29216658975542226,
      "grad_norm": 0.31576067209243774,
      "learning_rate": 0.00017833714968938687,
      "loss": 0.8924,
      "step": 5065
    },
    {
      "epoch": 0.292455006922012,
      "grad_norm": 0.30237501859664917,
      "learning_rate": 0.0001782745324824795,
      "loss": 0.9355,
      "step": 5070
    },
    {
      "epoch": 0.29274342408860177,
      "grad_norm": 0.29534316062927246,
      "learning_rate": 0.00017821183593420988,
      "loss": 0.918,
      "step": 5075
    },
    {
      "epoch": 0.2930318412551915,
      "grad_norm": 0.3078969120979309,
      "learning_rate": 0.00017814906010812912,
      "loss": 1.018,
      "step": 5080
    },
    {
      "epoch": 0.2933202584217813,
      "grad_norm": 0.31464046239852905,
      "learning_rate": 0.00017808620506786865,
      "loss": 0.8985,
      "step": 5085
    },
    {
      "epoch": 0.293608675588371,
      "grad_norm": 0.28147396445274353,
      "learning_rate": 0.00017802327087714016,
      "loss": 0.9324,
      "step": 5090
    },
    {
      "epoch": 0.2938970927549608,
      "grad_norm": 0.30480238795280457,
      "learning_rate": 0.00017796025759973558,
      "loss": 0.9891,
      "step": 5095
    },
    {
      "epoch": 0.2941855099215505,
      "grad_norm": 0.2980847656726837,
      "learning_rate": 0.00017789716529952704,
      "loss": 0.9646,
      "step": 5100
    },
    {
      "epoch": 0.2944739270881403,
      "grad_norm": 0.3064950108528137,
      "learning_rate": 0.00017783399404046674,
      "loss": 0.9412,
      "step": 5105
    },
    {
      "epoch": 0.29476234425473,
      "grad_norm": 0.29476818442344666,
      "learning_rate": 0.00017777074388658693,
      "loss": 0.9941,
      "step": 5110
    },
    {
      "epoch": 0.2950507614213198,
      "grad_norm": 0.3126581609249115,
      "learning_rate": 0.00017770741490199979,
      "loss": 0.915,
      "step": 5115
    },
    {
      "epoch": 0.29533917858790953,
      "grad_norm": 0.29231584072113037,
      "learning_rate": 0.00017764400715089744,
      "loss": 0.986,
      "step": 5120
    },
    {
      "epoch": 0.2956275957544993,
      "grad_norm": 0.32379642128944397,
      "learning_rate": 0.00017758052069755188,
      "loss": 0.954,
      "step": 5125
    },
    {
      "epoch": 0.29591601292108904,
      "grad_norm": 0.34569108486175537,
      "learning_rate": 0.0001775169556063148,
      "loss": 0.9136,
      "step": 5130
    },
    {
      "epoch": 0.2962044300876788,
      "grad_norm": 0.3077993392944336,
      "learning_rate": 0.00017745331194161766,
      "loss": 0.9378,
      "step": 5135
    },
    {
      "epoch": 0.29649284725426855,
      "grad_norm": 0.29212331771850586,
      "learning_rate": 0.00017738958976797157,
      "loss": 0.9831,
      "step": 5140
    },
    {
      "epoch": 0.29678126442085834,
      "grad_norm": 0.30094853043556213,
      "learning_rate": 0.00017732578914996712,
      "loss": 1.0074,
      "step": 5145
    },
    {
      "epoch": 0.29706968158744806,
      "grad_norm": 0.2973226010799408,
      "learning_rate": 0.00017726191015227452,
      "loss": 0.9542,
      "step": 5150
    },
    {
      "epoch": 0.29735809875403785,
      "grad_norm": 0.31063079833984375,
      "learning_rate": 0.00017719795283964345,
      "loss": 1.0237,
      "step": 5155
    },
    {
      "epoch": 0.2976465159206276,
      "grad_norm": 0.3208075165748596,
      "learning_rate": 0.00017713391727690284,
      "loss": 1.0179,
      "step": 5160
    },
    {
      "epoch": 0.29793493308721736,
      "grad_norm": 0.31248340010643005,
      "learning_rate": 0.00017706980352896108,
      "loss": 0.9811,
      "step": 5165
    },
    {
      "epoch": 0.2982233502538071,
      "grad_norm": 0.3174075186252594,
      "learning_rate": 0.0001770056116608057,
      "loss": 0.9975,
      "step": 5170
    },
    {
      "epoch": 0.29851176742039687,
      "grad_norm": 0.2985789477825165,
      "learning_rate": 0.0001769413417375035,
      "loss": 0.953,
      "step": 5175
    },
    {
      "epoch": 0.2988001845869866,
      "grad_norm": 0.304510235786438,
      "learning_rate": 0.0001768769938242003,
      "loss": 1.0,
      "step": 5180
    },
    {
      "epoch": 0.2990886017535764,
      "grad_norm": 0.30035126209259033,
      "learning_rate": 0.00017681256798612112,
      "loss": 1.0334,
      "step": 5185
    },
    {
      "epoch": 0.2993770189201661,
      "grad_norm": 0.3091324269771576,
      "learning_rate": 0.0001767480642885698,
      "loss": 1.0038,
      "step": 5190
    },
    {
      "epoch": 0.2996654360867559,
      "grad_norm": 0.32166004180908203,
      "learning_rate": 0.00017668348279692921,
      "loss": 0.9949,
      "step": 5195
    },
    {
      "epoch": 0.2999538532533456,
      "grad_norm": 0.29622697830200195,
      "learning_rate": 0.00017661882357666105,
      "loss": 0.9714,
      "step": 5200
    },
    {
      "epoch": 0.3002422704199354,
      "grad_norm": 0.31370845437049866,
      "learning_rate": 0.00017655408669330576,
      "loss": 0.9996,
      "step": 5205
    },
    {
      "epoch": 0.3005306875865251,
      "grad_norm": 0.31638461351394653,
      "learning_rate": 0.00017648927221248264,
      "loss": 0.9975,
      "step": 5210
    },
    {
      "epoch": 0.3008191047531149,
      "grad_norm": 0.3722272217273712,
      "learning_rate": 0.00017642438019988945,
      "loss": 1.0313,
      "step": 5215
    },
    {
      "epoch": 0.30110752191970463,
      "grad_norm": 0.3506329357624054,
      "learning_rate": 0.00017635941072130268,
      "loss": 1.0279,
      "step": 5220
    },
    {
      "epoch": 0.3013959390862944,
      "grad_norm": 0.2909005582332611,
      "learning_rate": 0.0001762943638425773,
      "loss": 1.0225,
      "step": 5225
    },
    {
      "epoch": 0.30168435625288414,
      "grad_norm": 0.2934955060482025,
      "learning_rate": 0.00017622923962964672,
      "loss": 1.0326,
      "step": 5230
    },
    {
      "epoch": 0.3019727734194739,
      "grad_norm": 0.38688069581985474,
      "learning_rate": 0.00017616403814852278,
      "loss": 1.063,
      "step": 5235
    },
    {
      "epoch": 0.30226119058606365,
      "grad_norm": 0.28810983896255493,
      "learning_rate": 0.0001760987594652956,
      "loss": 0.9573,
      "step": 5240
    },
    {
      "epoch": 0.30254960775265344,
      "grad_norm": 0.3192325830459595,
      "learning_rate": 0.00017603340364613355,
      "loss": 0.9995,
      "step": 5245
    },
    {
      "epoch": 0.3028380249192432,
      "grad_norm": 0.3177827000617981,
      "learning_rate": 0.00017596797075728322,
      "loss": 0.9979,
      "step": 5250
    },
    {
      "epoch": 0.30312644208583295,
      "grad_norm": 0.3133276402950287,
      "learning_rate": 0.00017590246086506933,
      "loss": 0.9178,
      "step": 5255
    },
    {
      "epoch": 0.30341485925242273,
      "grad_norm": 0.31365862488746643,
      "learning_rate": 0.00017583687403589454,
      "loss": 1.0893,
      "step": 5260
    },
    {
      "epoch": 0.30370327641901246,
      "grad_norm": 0.2886902391910553,
      "learning_rate": 0.0001757712103362397,
      "loss": 0.9814,
      "step": 5265
    },
    {
      "epoch": 0.30399169358560224,
      "grad_norm": 0.3023292124271393,
      "learning_rate": 0.0001757054698326634,
      "loss": 0.9635,
      "step": 5270
    },
    {
      "epoch": 0.30428011075219197,
      "grad_norm": 0.30080515146255493,
      "learning_rate": 0.00017563965259180216,
      "loss": 1.0034,
      "step": 5275
    },
    {
      "epoch": 0.30456852791878175,
      "grad_norm": 0.34142324328422546,
      "learning_rate": 0.00017557375868037026,
      "loss": 1.0152,
      "step": 5280
    },
    {
      "epoch": 0.3048569450853715,
      "grad_norm": 0.3014017939567566,
      "learning_rate": 0.00017550778816515967,
      "loss": 0.9798,
      "step": 5285
    },
    {
      "epoch": 0.30514536225196126,
      "grad_norm": 0.4091859459877014,
      "learning_rate": 0.0001754417411130401,
      "loss": 1.0376,
      "step": 5290
    },
    {
      "epoch": 0.305433779418551,
      "grad_norm": 0.2926144301891327,
      "learning_rate": 0.00017537561759095873,
      "loss": 0.966,
      "step": 5295
    },
    {
      "epoch": 0.30572219658514077,
      "grad_norm": 0.30158326029777527,
      "learning_rate": 0.0001753094176659403,
      "loss": 0.9269,
      "step": 5300
    },
    {
      "epoch": 0.3060106137517305,
      "grad_norm": 0.32335394620895386,
      "learning_rate": 0.00017524314140508705,
      "loss": 0.9784,
      "step": 5305
    },
    {
      "epoch": 0.3062990309183203,
      "grad_norm": 0.29012882709503174,
      "learning_rate": 0.0001751767888755785,
      "loss": 0.8869,
      "step": 5310
    },
    {
      "epoch": 0.30658744808491,
      "grad_norm": 0.3356166183948517,
      "learning_rate": 0.00017511036014467157,
      "loss": 0.9995,
      "step": 5315
    },
    {
      "epoch": 0.3068758652514998,
      "grad_norm": 0.29851922392845154,
      "learning_rate": 0.00017504385527970028,
      "loss": 0.9666,
      "step": 5320
    },
    {
      "epoch": 0.3071642824180895,
      "grad_norm": 0.29468950629234314,
      "learning_rate": 0.00017497727434807598,
      "loss": 1.0196,
      "step": 5325
    },
    {
      "epoch": 0.3074526995846793,
      "grad_norm": 0.29625648260116577,
      "learning_rate": 0.00017491061741728702,
      "loss": 1.0007,
      "step": 5330
    },
    {
      "epoch": 0.30774111675126903,
      "grad_norm": 0.30475690960884094,
      "learning_rate": 0.00017484388455489883,
      "loss": 1.0158,
      "step": 5335
    },
    {
      "epoch": 0.3080295339178588,
      "grad_norm": 0.29841533303260803,
      "learning_rate": 0.00017477707582855384,
      "loss": 0.9383,
      "step": 5340
    },
    {
      "epoch": 0.30831795108444854,
      "grad_norm": 0.3112857937812805,
      "learning_rate": 0.00017471019130597127,
      "loss": 0.952,
      "step": 5345
    },
    {
      "epoch": 0.3086063682510383,
      "grad_norm": 0.30072787404060364,
      "learning_rate": 0.00017464323105494727,
      "loss": 0.9599,
      "step": 5350
    },
    {
      "epoch": 0.30889478541762805,
      "grad_norm": 0.33163365721702576,
      "learning_rate": 0.0001745761951433547,
      "loss": 1.03,
      "step": 5355
    },
    {
      "epoch": 0.30918320258421783,
      "grad_norm": 0.3288794159889221,
      "learning_rate": 0.00017450908363914316,
      "loss": 0.9898,
      "step": 5360
    },
    {
      "epoch": 0.30947161975080756,
      "grad_norm": 0.3404468595981598,
      "learning_rate": 0.0001744418966103388,
      "loss": 0.9744,
      "step": 5365
    },
    {
      "epoch": 0.30976003691739734,
      "grad_norm": 0.3131037950515747,
      "learning_rate": 0.00017437463412504437,
      "loss": 1.0509,
      "step": 5370
    },
    {
      "epoch": 0.31004845408398707,
      "grad_norm": 0.31082791090011597,
      "learning_rate": 0.00017430729625143908,
      "loss": 0.9927,
      "step": 5375
    },
    {
      "epoch": 0.31033687125057685,
      "grad_norm": 0.3165504038333893,
      "learning_rate": 0.00017423988305777864,
      "loss": 1.0446,
      "step": 5380
    },
    {
      "epoch": 0.3106252884171666,
      "grad_norm": 0.3058837652206421,
      "learning_rate": 0.00017417239461239498,
      "loss": 0.9513,
      "step": 5385
    },
    {
      "epoch": 0.31091370558375636,
      "grad_norm": 0.31753161549568176,
      "learning_rate": 0.0001741048309836964,
      "loss": 0.9855,
      "step": 5390
    },
    {
      "epoch": 0.3112021227503461,
      "grad_norm": 0.3206409513950348,
      "learning_rate": 0.00017403719224016735,
      "loss": 0.9444,
      "step": 5395
    },
    {
      "epoch": 0.31149053991693587,
      "grad_norm": 0.3047698438167572,
      "learning_rate": 0.00017396947845036844,
      "loss": 0.9291,
      "step": 5400
    },
    {
      "epoch": 0.3117789570835256,
      "grad_norm": 0.3192146122455597,
      "learning_rate": 0.0001739016896829364,
      "loss": 0.9296,
      "step": 5405
    },
    {
      "epoch": 0.3120673742501154,
      "grad_norm": 0.33970245718955994,
      "learning_rate": 0.00017383382600658388,
      "loss": 1.021,
      "step": 5410
    },
    {
      "epoch": 0.3123557914167051,
      "grad_norm": 0.3308781385421753,
      "learning_rate": 0.00017376588749009946,
      "loss": 0.979,
      "step": 5415
    },
    {
      "epoch": 0.3126442085832949,
      "grad_norm": 0.28747421503067017,
      "learning_rate": 0.0001736978742023477,
      "loss": 0.9705,
      "step": 5420
    },
    {
      "epoch": 0.3129326257498846,
      "grad_norm": 0.2859438359737396,
      "learning_rate": 0.0001736297862122688,
      "loss": 0.9193,
      "step": 5425
    },
    {
      "epoch": 0.3132210429164744,
      "grad_norm": 0.3263635039329529,
      "learning_rate": 0.00017356162358887875,
      "loss": 1.0189,
      "step": 5430
    },
    {
      "epoch": 0.31350946008306413,
      "grad_norm": 0.3261274993419647,
      "learning_rate": 0.0001734933864012692,
      "loss": 1.0329,
      "step": 5435
    },
    {
      "epoch": 0.3137978772496539,
      "grad_norm": 0.3366422653198242,
      "learning_rate": 0.00017342507471860733,
      "loss": 0.9987,
      "step": 5440
    },
    {
      "epoch": 0.31408629441624364,
      "grad_norm": 0.29917559027671814,
      "learning_rate": 0.00017335668861013592,
      "loss": 0.9922,
      "step": 5445
    },
    {
      "epoch": 0.3143747115828334,
      "grad_norm": 0.30047109723091125,
      "learning_rate": 0.0001732882281451731,
      "loss": 1.0203,
      "step": 5450
    },
    {
      "epoch": 0.31466312874942315,
      "grad_norm": 0.31822001934051514,
      "learning_rate": 0.00017321969339311241,
      "loss": 0.9877,
      "step": 5455
    },
    {
      "epoch": 0.31495154591601293,
      "grad_norm": 0.35328808426856995,
      "learning_rate": 0.0001731510844234227,
      "loss": 0.9792,
      "step": 5460
    },
    {
      "epoch": 0.31523996308260266,
      "grad_norm": 0.33993515372276306,
      "learning_rate": 0.00017308240130564802,
      "loss": 0.9535,
      "step": 5465
    },
    {
      "epoch": 0.31552838024919244,
      "grad_norm": 0.3094184994697571,
      "learning_rate": 0.0001730136441094076,
      "loss": 1.0092,
      "step": 5470
    },
    {
      "epoch": 0.31581679741578217,
      "grad_norm": 0.31005391478538513,
      "learning_rate": 0.00017294481290439575,
      "loss": 0.9881,
      "step": 5475
    },
    {
      "epoch": 0.31610521458237195,
      "grad_norm": 0.35270652174949646,
      "learning_rate": 0.00017287590776038177,
      "loss": 1.0174,
      "step": 5480
    },
    {
      "epoch": 0.3163936317489617,
      "grad_norm": 0.31024622917175293,
      "learning_rate": 0.00017280692874720998,
      "loss": 0.9835,
      "step": 5485
    },
    {
      "epoch": 0.31668204891555146,
      "grad_norm": 0.30939292907714844,
      "learning_rate": 0.0001727378759347995,
      "loss": 0.9864,
      "step": 5490
    },
    {
      "epoch": 0.3169704660821412,
      "grad_norm": 0.3189314305782318,
      "learning_rate": 0.00017266874939314434,
      "loss": 1.0451,
      "step": 5495
    },
    {
      "epoch": 0.31725888324873097,
      "grad_norm": 0.335095077753067,
      "learning_rate": 0.0001725995491923131,
      "loss": 1.0026,
      "step": 5500
    },
    {
      "epoch": 0.3175473004153207,
      "grad_norm": 0.31108853220939636,
      "learning_rate": 0.0001725302754024492,
      "loss": 1.0007,
      "step": 5505
    },
    {
      "epoch": 0.3178357175819105,
      "grad_norm": 0.30693233013153076,
      "learning_rate": 0.00017246092809377058,
      "loss": 0.9338,
      "step": 5510
    },
    {
      "epoch": 0.3181241347485002,
      "grad_norm": 0.28529733419418335,
      "learning_rate": 0.00017239150733656966,
      "loss": 0.9947,
      "step": 5515
    },
    {
      "epoch": 0.31841255191509,
      "grad_norm": 0.3156437277793884,
      "learning_rate": 0.0001723220132012134,
      "loss": 1.0326,
      "step": 5520
    },
    {
      "epoch": 0.3187009690816797,
      "grad_norm": 0.3074203431606293,
      "learning_rate": 0.0001722524457581431,
      "loss": 1.033,
      "step": 5525
    },
    {
      "epoch": 0.3189893862482695,
      "grad_norm": 0.2984369695186615,
      "learning_rate": 0.00017218280507787435,
      "loss": 0.9646,
      "step": 5530
    },
    {
      "epoch": 0.31927780341485923,
      "grad_norm": 0.31262287497520447,
      "learning_rate": 0.00017211309123099696,
      "loss": 1.0107,
      "step": 5535
    },
    {
      "epoch": 0.319566220581449,
      "grad_norm": 0.3307073712348938,
      "learning_rate": 0.00017204330428817496,
      "loss": 0.982,
      "step": 5540
    },
    {
      "epoch": 0.31985463774803874,
      "grad_norm": 0.31009113788604736,
      "learning_rate": 0.00017197344432014645,
      "loss": 0.943,
      "step": 5545
    },
    {
      "epoch": 0.3201430549146285,
      "grad_norm": 0.3452712297439575,
      "learning_rate": 0.00017190351139772348,
      "loss": 0.9966,
      "step": 5550
    },
    {
      "epoch": 0.32043147208121825,
      "grad_norm": 0.3429424464702606,
      "learning_rate": 0.0001718335055917922,
      "loss": 1.0072,
      "step": 5555
    },
    {
      "epoch": 0.32071988924780803,
      "grad_norm": 0.32212451100349426,
      "learning_rate": 0.00017176342697331246,
      "loss": 0.9886,
      "step": 5560
    },
    {
      "epoch": 0.32100830641439776,
      "grad_norm": 0.3044155538082123,
      "learning_rate": 0.00017169327561331808,
      "loss": 1.0059,
      "step": 5565
    },
    {
      "epoch": 0.32129672358098754,
      "grad_norm": 0.31964996457099915,
      "learning_rate": 0.00017162305158291655,
      "loss": 0.9924,
      "step": 5570
    },
    {
      "epoch": 0.32158514074757727,
      "grad_norm": 0.29949527978897095,
      "learning_rate": 0.0001715527549532889,
      "loss": 1.0092,
      "step": 5575
    },
    {
      "epoch": 0.32187355791416705,
      "grad_norm": 0.3664807081222534,
      "learning_rate": 0.00017148238579568995,
      "loss": 0.9933,
      "step": 5580
    },
    {
      "epoch": 0.3221619750807568,
      "grad_norm": 0.3005771338939667,
      "learning_rate": 0.0001714119441814479,
      "loss": 0.9662,
      "step": 5585
    },
    {
      "epoch": 0.32245039224734656,
      "grad_norm": 0.30175963044166565,
      "learning_rate": 0.00017134143018196447,
      "loss": 0.9698,
      "step": 5590
    },
    {
      "epoch": 0.32273880941393635,
      "grad_norm": 0.30512237548828125,
      "learning_rate": 0.00017127084386871466,
      "loss": 0.9445,
      "step": 5595
    },
    {
      "epoch": 0.3230272265805261,
      "grad_norm": 0.2964697480201721,
      "learning_rate": 0.00017120018531324689,
      "loss": 0.9598,
      "step": 5600
    },
    {
      "epoch": 0.32331564374711586,
      "grad_norm": 0.32373014092445374,
      "learning_rate": 0.0001711294545871827,
      "loss": 0.9872,
      "step": 5605
    },
    {
      "epoch": 0.3236040609137056,
      "grad_norm": 0.31650540232658386,
      "learning_rate": 0.00017105865176221684,
      "loss": 0.9516,
      "step": 5610
    },
    {
      "epoch": 0.32389247808029537,
      "grad_norm": 0.3336973190307617,
      "learning_rate": 0.00017098777691011718,
      "loss": 0.9658,
      "step": 5615
    },
    {
      "epoch": 0.3241808952468851,
      "grad_norm": 0.3417705297470093,
      "learning_rate": 0.00017091683010272447,
      "loss": 0.9986,
      "step": 5620
    },
    {
      "epoch": 0.3244693124134749,
      "grad_norm": 0.31307974457740784,
      "learning_rate": 0.00017084581141195253,
      "loss": 0.9374,
      "step": 5625
    },
    {
      "epoch": 0.3247577295800646,
      "grad_norm": 0.2898584306240082,
      "learning_rate": 0.00017077472090978798,
      "loss": 0.936,
      "step": 5630
    },
    {
      "epoch": 0.3250461467466544,
      "grad_norm": 0.32295459508895874,
      "learning_rate": 0.00017070355866829017,
      "loss": 0.9609,
      "step": 5635
    },
    {
      "epoch": 0.3253345639132441,
      "grad_norm": 0.2989657521247864,
      "learning_rate": 0.00017063232475959133,
      "loss": 0.9887,
      "step": 5640
    },
    {
      "epoch": 0.3256229810798339,
      "grad_norm": 0.31220418214797974,
      "learning_rate": 0.00017056101925589623,
      "loss": 1.0109,
      "step": 5645
    },
    {
      "epoch": 0.3259113982464236,
      "grad_norm": 0.33185145258903503,
      "learning_rate": 0.00017048964222948217,
      "loss": 1.0592,
      "step": 5650
    },
    {
      "epoch": 0.3261998154130134,
      "grad_norm": 0.32256993651390076,
      "learning_rate": 0.000170418193752699,
      "loss": 0.9829,
      "step": 5655
    },
    {
      "epoch": 0.32648823257960313,
      "grad_norm": 0.3156249523162842,
      "learning_rate": 0.00017034667389796904,
      "loss": 0.9038,
      "step": 5660
    },
    {
      "epoch": 0.3267766497461929,
      "grad_norm": 0.32424643635749817,
      "learning_rate": 0.0001702750827377869,
      "loss": 1.0174,
      "step": 5665
    },
    {
      "epoch": 0.32706506691278264,
      "grad_norm": 0.29936036467552185,
      "learning_rate": 0.00017020342034471944,
      "loss": 1.0131,
      "step": 5670
    },
    {
      "epoch": 0.3273534840793724,
      "grad_norm": 0.2940688133239746,
      "learning_rate": 0.0001701316867914058,
      "loss": 1.007,
      "step": 5675
    },
    {
      "epoch": 0.32764190124596215,
      "grad_norm": 0.33732593059539795,
      "learning_rate": 0.00017005988215055718,
      "loss": 0.9991,
      "step": 5680
    },
    {
      "epoch": 0.32793031841255194,
      "grad_norm": 0.2845425605773926,
      "learning_rate": 0.00016998800649495693,
      "loss": 0.9274,
      "step": 5685
    },
    {
      "epoch": 0.32821873557914166,
      "grad_norm": 0.3055282235145569,
      "learning_rate": 0.00016991605989746025,
      "loss": 1.0231,
      "step": 5690
    },
    {
      "epoch": 0.32850715274573145,
      "grad_norm": 0.33001506328582764,
      "learning_rate": 0.0001698440424309944,
      "loss": 1.0293,
      "step": 5695
    },
    {
      "epoch": 0.3287955699123212,
      "grad_norm": 0.32202020287513733,
      "learning_rate": 0.00016977195416855828,
      "loss": 0.9725,
      "step": 5700
    },
    {
      "epoch": 0.32908398707891096,
      "grad_norm": 0.3239056468009949,
      "learning_rate": 0.0001696997951832228,
      "loss": 1.0648,
      "step": 5705
    },
    {
      "epoch": 0.3293724042455007,
      "grad_norm": 0.2853987216949463,
      "learning_rate": 0.00016962756554813037,
      "loss": 0.9922,
      "step": 5710
    },
    {
      "epoch": 0.32966082141209047,
      "grad_norm": 0.2992650866508484,
      "learning_rate": 0.00016955526533649504,
      "loss": 0.9936,
      "step": 5715
    },
    {
      "epoch": 0.3299492385786802,
      "grad_norm": 0.29636356234550476,
      "learning_rate": 0.0001694828946216025,
      "loss": 0.9924,
      "step": 5720
    },
    {
      "epoch": 0.33023765574527,
      "grad_norm": 0.2977345585823059,
      "learning_rate": 0.00016941045347680973,
      "loss": 0.972,
      "step": 5725
    },
    {
      "epoch": 0.3305260729118597,
      "grad_norm": 0.2948797047138214,
      "learning_rate": 0.00016933794197554524,
      "loss": 0.967,
      "step": 5730
    },
    {
      "epoch": 0.3308144900784495,
      "grad_norm": 0.3199259340763092,
      "learning_rate": 0.00016926536019130884,
      "loss": 0.9491,
      "step": 5735
    },
    {
      "epoch": 0.3311029072450392,
      "grad_norm": 0.3436327278614044,
      "learning_rate": 0.00016919270819767152,
      "loss": 0.9865,
      "step": 5740
    },
    {
      "epoch": 0.331391324411629,
      "grad_norm": 0.2954476773738861,
      "learning_rate": 0.0001691199860682755,
      "loss": 0.9706,
      "step": 5745
    },
    {
      "epoch": 0.3316797415782187,
      "grad_norm": 0.30476078391075134,
      "learning_rate": 0.00016904719387683407,
      "loss": 0.9166,
      "step": 5750
    },
    {
      "epoch": 0.3319681587448085,
      "grad_norm": 0.3269798755645752,
      "learning_rate": 0.0001689743316971315,
      "loss": 0.9568,
      "step": 5755
    },
    {
      "epoch": 0.33225657591139823,
      "grad_norm": 0.29287829995155334,
      "learning_rate": 0.00016890139960302304,
      "loss": 0.9311,
      "step": 5760
    },
    {
      "epoch": 0.332544993077988,
      "grad_norm": 0.3217598497867584,
      "learning_rate": 0.00016882839766843485,
      "loss": 1.0023,
      "step": 5765
    },
    {
      "epoch": 0.33283341024457774,
      "grad_norm": 0.306896448135376,
      "learning_rate": 0.00016875532596736373,
      "loss": 0.936,
      "step": 5770
    },
    {
      "epoch": 0.3331218274111675,
      "grad_norm": 0.32418233156204224,
      "learning_rate": 0.00016868218457387736,
      "loss": 0.9939,
      "step": 5775
    },
    {
      "epoch": 0.33341024457775725,
      "grad_norm": 0.325740247964859,
      "learning_rate": 0.00016860897356211403,
      "loss": 0.9642,
      "step": 5780
    },
    {
      "epoch": 0.33369866174434704,
      "grad_norm": 0.29711437225341797,
      "learning_rate": 0.00016853569300628253,
      "loss": 0.943,
      "step": 5785
    },
    {
      "epoch": 0.33398707891093676,
      "grad_norm": 0.3324286937713623,
      "learning_rate": 0.00016846234298066218,
      "loss": 0.9789,
      "step": 5790
    },
    {
      "epoch": 0.33427549607752655,
      "grad_norm": 0.3122076690196991,
      "learning_rate": 0.00016838892355960274,
      "loss": 1.0296,
      "step": 5795
    },
    {
      "epoch": 0.3345639132441163,
      "grad_norm": 0.32273441553115845,
      "learning_rate": 0.0001683154348175243,
      "loss": 0.9853,
      "step": 5800
    },
    {
      "epoch": 0.33485233041070606,
      "grad_norm": 0.35843372344970703,
      "learning_rate": 0.00016824187682891714,
      "loss": 1.0195,
      "step": 5805
    },
    {
      "epoch": 0.3351407475772958,
      "grad_norm": 0.33089888095855713,
      "learning_rate": 0.00016816824966834183,
      "loss": 1.0346,
      "step": 5810
    },
    {
      "epoch": 0.33542916474388557,
      "grad_norm": 0.3273804783821106,
      "learning_rate": 0.00016809455341042906,
      "loss": 1.0994,
      "step": 5815
    },
    {
      "epoch": 0.3357175819104753,
      "grad_norm": 0.3188895583152771,
      "learning_rate": 0.00016802078812987948,
      "loss": 0.9853,
      "step": 5820
    },
    {
      "epoch": 0.3360059990770651,
      "grad_norm": 0.3157017230987549,
      "learning_rate": 0.00016794695390146374,
      "loss": 0.9587,
      "step": 5825
    },
    {
      "epoch": 0.3362944162436548,
      "grad_norm": 0.3376935124397278,
      "learning_rate": 0.0001678730508000224,
      "loss": 0.8833,
      "step": 5830
    },
    {
      "epoch": 0.3365828334102446,
      "grad_norm": 0.32190343737602234,
      "learning_rate": 0.00016779907890046575,
      "loss": 1.0592,
      "step": 5835
    },
    {
      "epoch": 0.3368712505768343,
      "grad_norm": 0.3182050883769989,
      "learning_rate": 0.00016772503827777396,
      "loss": 0.9834,
      "step": 5840
    },
    {
      "epoch": 0.3371596677434241,
      "grad_norm": 0.28917548060417175,
      "learning_rate": 0.00016765092900699675,
      "loss": 0.939,
      "step": 5845
    },
    {
      "epoch": 0.3374480849100138,
      "grad_norm": 0.31394827365875244,
      "learning_rate": 0.00016757675116325343,
      "loss": 0.9955,
      "step": 5850
    },
    {
      "epoch": 0.3377365020766036,
      "grad_norm": 0.29517030715942383,
      "learning_rate": 0.00016750250482173287,
      "loss": 0.9974,
      "step": 5855
    },
    {
      "epoch": 0.33802491924319333,
      "grad_norm": 0.30578872561454773,
      "learning_rate": 0.0001674281900576933,
      "loss": 1.0233,
      "step": 5860
    },
    {
      "epoch": 0.3383133364097831,
      "grad_norm": 0.3007958233356476,
      "learning_rate": 0.00016735380694646236,
      "loss": 0.9597,
      "step": 5865
    },
    {
      "epoch": 0.33860175357637284,
      "grad_norm": 0.3038438856601715,
      "learning_rate": 0.00016727935556343698,
      "loss": 0.9776,
      "step": 5870
    },
    {
      "epoch": 0.3388901707429626,
      "grad_norm": 0.31204402446746826,
      "learning_rate": 0.00016720483598408326,
      "loss": 0.9629,
      "step": 5875
    },
    {
      "epoch": 0.33917858790955235,
      "grad_norm": 0.3024327754974365,
      "learning_rate": 0.0001671302482839364,
      "loss": 0.9279,
      "step": 5880
    },
    {
      "epoch": 0.33946700507614214,
      "grad_norm": 0.31090736389160156,
      "learning_rate": 0.00016705559253860067,
      "loss": 0.95,
      "step": 5885
    },
    {
      "epoch": 0.33975542224273186,
      "grad_norm": 0.3600342571735382,
      "learning_rate": 0.00016698086882374939,
      "loss": 1.0251,
      "step": 5890
    },
    {
      "epoch": 0.34004383940932165,
      "grad_norm": 0.3187331557273865,
      "learning_rate": 0.00016690607721512465,
      "loss": 0.9718,
      "step": 5895
    },
    {
      "epoch": 0.3403322565759114,
      "grad_norm": 0.3226509988307953,
      "learning_rate": 0.00016683121778853746,
      "loss": 0.9604,
      "step": 5900
    },
    {
      "epoch": 0.34062067374250116,
      "grad_norm": 0.31832170486450195,
      "learning_rate": 0.00016675629061986747,
      "loss": 0.95,
      "step": 5905
    },
    {
      "epoch": 0.3409090909090909,
      "grad_norm": 0.29185858368873596,
      "learning_rate": 0.00016668129578506315,
      "loss": 1.0028,
      "step": 5910
    },
    {
      "epoch": 0.34119750807568067,
      "grad_norm": 0.30505648255348206,
      "learning_rate": 0.00016660623336014137,
      "loss": 0.9506,
      "step": 5915
    },
    {
      "epoch": 0.3414859252422704,
      "grad_norm": 0.30190521478652954,
      "learning_rate": 0.00016653110342118764,
      "loss": 0.9906,
      "step": 5920
    },
    {
      "epoch": 0.3417743424088602,
      "grad_norm": 0.28448954224586487,
      "learning_rate": 0.00016645590604435592,
      "loss": 0.9539,
      "step": 5925
    },
    {
      "epoch": 0.3420627595754499,
      "grad_norm": 0.3030353784561157,
      "learning_rate": 0.0001663806413058684,
      "loss": 1.0043,
      "step": 5930
    },
    {
      "epoch": 0.3423511767420397,
      "grad_norm": 0.321473091840744,
      "learning_rate": 0.00016630530928201566,
      "loss": 0.9228,
      "step": 5935
    },
    {
      "epoch": 0.3426395939086294,
      "grad_norm": 0.3062508702278137,
      "learning_rate": 0.00016622991004915645,
      "loss": 1.003,
      "step": 5940
    },
    {
      "epoch": 0.3429280110752192,
      "grad_norm": 0.30550527572631836,
      "learning_rate": 0.00016615444368371768,
      "loss": 0.9754,
      "step": 5945
    },
    {
      "epoch": 0.343216428241809,
      "grad_norm": 0.2936306297779083,
      "learning_rate": 0.00016607891026219418,
      "loss": 0.9352,
      "step": 5950
    },
    {
      "epoch": 0.3435048454083987,
      "grad_norm": 0.3158090114593506,
      "learning_rate": 0.0001660033098611489,
      "loss": 1.0072,
      "step": 5955
    },
    {
      "epoch": 0.3437932625749885,
      "grad_norm": 0.30227041244506836,
      "learning_rate": 0.00016592764255721264,
      "loss": 1.0412,
      "step": 5960
    },
    {
      "epoch": 0.3440816797415782,
      "grad_norm": 0.319016695022583,
      "learning_rate": 0.00016585190842708397,
      "loss": 0.9956,
      "step": 5965
    },
    {
      "epoch": 0.344370096908168,
      "grad_norm": 0.3120563328266144,
      "learning_rate": 0.00016577610754752925,
      "loss": 0.988,
      "step": 5970
    },
    {
      "epoch": 0.3446585140747577,
      "grad_norm": 0.31160399317741394,
      "learning_rate": 0.00016570023999538247,
      "loss": 0.9859,
      "step": 5975
    },
    {
      "epoch": 0.3449469312413475,
      "grad_norm": 0.33563148975372314,
      "learning_rate": 0.00016562430584754516,
      "loss": 0.9598,
      "step": 5980
    },
    {
      "epoch": 0.34523534840793724,
      "grad_norm": 0.30352646112442017,
      "learning_rate": 0.00016554830518098647,
      "loss": 1.0189,
      "step": 5985
    },
    {
      "epoch": 0.345523765574527,
      "grad_norm": 0.32421186566352844,
      "learning_rate": 0.00016547223807274287,
      "loss": 0.957,
      "step": 5990
    },
    {
      "epoch": 0.34581218274111675,
      "grad_norm": 0.304166316986084,
      "learning_rate": 0.00016539610459991816,
      "loss": 0.9022,
      "step": 5995
    },
    {
      "epoch": 0.34610059990770653,
      "grad_norm": 0.29363691806793213,
      "learning_rate": 0.00016531990483968357,
      "loss": 0.9502,
      "step": 6000
    },
    {
      "epoch": 0.34638901707429626,
      "grad_norm": 0.2799575626850128,
      "learning_rate": 0.00016524363886927734,
      "loss": 0.9679,
      "step": 6005
    },
    {
      "epoch": 0.34667743424088604,
      "grad_norm": 0.31635069847106934,
      "learning_rate": 0.00016516730676600493,
      "loss": 0.9555,
      "step": 6010
    },
    {
      "epoch": 0.34696585140747577,
      "grad_norm": 0.35151907801628113,
      "learning_rate": 0.00016509090860723874,
      "loss": 1.0035,
      "step": 6015
    },
    {
      "epoch": 0.34725426857406555,
      "grad_norm": 0.3108111321926117,
      "learning_rate": 0.00016501444447041824,
      "loss": 0.9622,
      "step": 6020
    },
    {
      "epoch": 0.3475426857406553,
      "grad_norm": 0.30233535170555115,
      "learning_rate": 0.00016493791443304974,
      "loss": 0.9351,
      "step": 6025
    },
    {
      "epoch": 0.34783110290724506,
      "grad_norm": 0.30143481492996216,
      "learning_rate": 0.00016486131857270628,
      "loss": 0.9925,
      "step": 6030
    },
    {
      "epoch": 0.3481195200738348,
      "grad_norm": 0.29362648725509644,
      "learning_rate": 0.00016478465696702767,
      "loss": 1.0382,
      "step": 6035
    },
    {
      "epoch": 0.34840793724042457,
      "grad_norm": 0.3204686641693115,
      "learning_rate": 0.00016470792969372039,
      "loss": 0.952,
      "step": 6040
    },
    {
      "epoch": 0.3486963544070143,
      "grad_norm": 0.3001445233821869,
      "learning_rate": 0.00016463113683055748,
      "loss": 1.04,
      "step": 6045
    },
    {
      "epoch": 0.3489847715736041,
      "grad_norm": 0.29392385482788086,
      "learning_rate": 0.00016455427845537835,
      "loss": 0.9467,
      "step": 6050
    },
    {
      "epoch": 0.3492731887401938,
      "grad_norm": 0.33031490445137024,
      "learning_rate": 0.000164477354646089,
      "loss": 0.9735,
      "step": 6055
    },
    {
      "epoch": 0.3495616059067836,
      "grad_norm": 0.3166663944721222,
      "learning_rate": 0.0001644003654806616,
      "loss": 1.0249,
      "step": 6060
    },
    {
      "epoch": 0.3498500230733733,
      "grad_norm": 0.33485808968544006,
      "learning_rate": 0.00016432331103713465,
      "loss": 0.9104,
      "step": 6065
    },
    {
      "epoch": 0.3501384402399631,
      "grad_norm": 0.3192949891090393,
      "learning_rate": 0.00016424619139361282,
      "loss": 1.1312,
      "step": 6070
    },
    {
      "epoch": 0.3504268574065528,
      "grad_norm": 0.29363909363746643,
      "learning_rate": 0.00016416900662826676,
      "loss": 0.9196,
      "step": 6075
    },
    {
      "epoch": 0.3507152745731426,
      "grad_norm": 0.310404896736145,
      "learning_rate": 0.00016409175681933328,
      "loss": 0.9595,
      "step": 6080
    },
    {
      "epoch": 0.35100369173973234,
      "grad_norm": 0.3029721677303314,
      "learning_rate": 0.00016401444204511504,
      "loss": 0.9954,
      "step": 6085
    },
    {
      "epoch": 0.3512921089063221,
      "grad_norm": 0.3040766716003418,
      "learning_rate": 0.00016393706238398056,
      "loss": 0.9765,
      "step": 6090
    },
    {
      "epoch": 0.35158052607291185,
      "grad_norm": 0.34396886825561523,
      "learning_rate": 0.00016385961791436416,
      "loss": 0.9751,
      "step": 6095
    },
    {
      "epoch": 0.35186894323950163,
      "grad_norm": 0.3235977590084076,
      "learning_rate": 0.00016378210871476577,
      "loss": 0.9789,
      "step": 6100
    },
    {
      "epoch": 0.35215736040609136,
      "grad_norm": 0.32465118169784546,
      "learning_rate": 0.000163704534863751,
      "loss": 0.9131,
      "step": 6105
    },
    {
      "epoch": 0.35244577757268114,
      "grad_norm": 0.3071576952934265,
      "learning_rate": 0.00016362689643995105,
      "loss": 1.0081,
      "step": 6110
    },
    {
      "epoch": 0.35273419473927087,
      "grad_norm": 0.34030023217201233,
      "learning_rate": 0.00016354919352206242,
      "loss": 0.9797,
      "step": 6115
    },
    {
      "epoch": 0.35302261190586065,
      "grad_norm": 0.31560373306274414,
      "learning_rate": 0.00016347142618884712,
      "loss": 0.9883,
      "step": 6120
    },
    {
      "epoch": 0.3533110290724504,
      "grad_norm": 0.3090599775314331,
      "learning_rate": 0.00016339359451913237,
      "loss": 0.9808,
      "step": 6125
    },
    {
      "epoch": 0.35359944623904016,
      "grad_norm": 0.3321547210216522,
      "learning_rate": 0.00016331569859181062,
      "loss": 0.9567,
      "step": 6130
    },
    {
      "epoch": 0.3538878634056299,
      "grad_norm": 0.32135429978370667,
      "learning_rate": 0.00016323773848583953,
      "loss": 0.9698,
      "step": 6135
    },
    {
      "epoch": 0.35417628057221967,
      "grad_norm": 0.29677772521972656,
      "learning_rate": 0.00016315971428024168,
      "loss": 0.9991,
      "step": 6140
    },
    {
      "epoch": 0.3544646977388094,
      "grad_norm": 0.3057997524738312,
      "learning_rate": 0.00016308162605410472,
      "loss": 0.9348,
      "step": 6145
    },
    {
      "epoch": 0.3547531149053992,
      "grad_norm": 0.2951048016548157,
      "learning_rate": 0.0001630034738865812,
      "loss": 0.9084,
      "step": 6150
    },
    {
      "epoch": 0.3550415320719889,
      "grad_norm": 0.3238579332828522,
      "learning_rate": 0.00016292525785688842,
      "loss": 0.9612,
      "step": 6155
    },
    {
      "epoch": 0.3553299492385787,
      "grad_norm": 0.31826281547546387,
      "learning_rate": 0.00016284697804430843,
      "loss": 1.004,
      "step": 6160
    },
    {
      "epoch": 0.3556183664051684,
      "grad_norm": 0.3389774262905121,
      "learning_rate": 0.00016276863452818798,
      "loss": 0.9539,
      "step": 6165
    },
    {
      "epoch": 0.3559067835717582,
      "grad_norm": 0.3698391318321228,
      "learning_rate": 0.00016269022738793832,
      "loss": 1.1102,
      "step": 6170
    },
    {
      "epoch": 0.35619520073834793,
      "grad_norm": 0.32285526394844055,
      "learning_rate": 0.0001626117567030352,
      "loss": 0.9831,
      "step": 6175
    },
    {
      "epoch": 0.3564836179049377,
      "grad_norm": 0.3242669105529785,
      "learning_rate": 0.00016253322255301887,
      "loss": 0.9603,
      "step": 6180
    },
    {
      "epoch": 0.35677203507152744,
      "grad_norm": 0.3193565607070923,
      "learning_rate": 0.00016245462501749384,
      "loss": 1.0302,
      "step": 6185
    },
    {
      "epoch": 0.3570604522381172,
      "grad_norm": 0.3204268217086792,
      "learning_rate": 0.0001623759641761289,
      "loss": 1.0044,
      "step": 6190
    },
    {
      "epoch": 0.35734886940470695,
      "grad_norm": 0.28855907917022705,
      "learning_rate": 0.00016229724010865688,
      "loss": 0.9842,
      "step": 6195
    },
    {
      "epoch": 0.35763728657129673,
      "grad_norm": 0.3026552200317383,
      "learning_rate": 0.00016221845289487492,
      "loss": 0.9917,
      "step": 6200
    },
    {
      "epoch": 0.35792570373788646,
      "grad_norm": 0.33038827776908875,
      "learning_rate": 0.000162139602614644,
      "loss": 1.0027,
      "step": 6205
    },
    {
      "epoch": 0.35821412090447624,
      "grad_norm": 0.2994578778743744,
      "learning_rate": 0.00016206068934788905,
      "loss": 0.9743,
      "step": 6210
    },
    {
      "epoch": 0.35850253807106597,
      "grad_norm": 0.29378122091293335,
      "learning_rate": 0.00016198171317459895,
      "loss": 0.8709,
      "step": 6215
    },
    {
      "epoch": 0.35879095523765575,
      "grad_norm": 0.29244738817214966,
      "learning_rate": 0.0001619026741748262,
      "loss": 0.8787,
      "step": 6220
    },
    {
      "epoch": 0.3590793724042455,
      "grad_norm": 0.28639066219329834,
      "learning_rate": 0.00016182357242868704,
      "loss": 1.0387,
      "step": 6225
    },
    {
      "epoch": 0.35936778957083526,
      "grad_norm": 0.33279699087142944,
      "learning_rate": 0.00016174440801636138,
      "loss": 0.9867,
      "step": 6230
    },
    {
      "epoch": 0.359656206737425,
      "grad_norm": 0.287534236907959,
      "learning_rate": 0.00016166518101809257,
      "loss": 1.0265,
      "step": 6235
    },
    {
      "epoch": 0.35994462390401477,
      "grad_norm": 0.30234718322753906,
      "learning_rate": 0.0001615858915141874,
      "loss": 0.9442,
      "step": 6240
    },
    {
      "epoch": 0.3602330410706045,
      "grad_norm": 0.3060368597507477,
      "learning_rate": 0.00016150653958501605,
      "loss": 0.9826,
      "step": 6245
    },
    {
      "epoch": 0.3605214582371943,
      "grad_norm": 0.3163430988788605,
      "learning_rate": 0.00016142712531101196,
      "loss": 0.9444,
      "step": 6250
    },
    {
      "epoch": 0.360809875403784,
      "grad_norm": 0.32550159096717834,
      "learning_rate": 0.00016134764877267176,
      "loss": 1.0238,
      "step": 6255
    },
    {
      "epoch": 0.3610982925703738,
      "grad_norm": 0.32287704944610596,
      "learning_rate": 0.0001612681100505552,
      "loss": 1.0156,
      "step": 6260
    },
    {
      "epoch": 0.3613867097369635,
      "grad_norm": 0.29501256346702576,
      "learning_rate": 0.00016118850922528508,
      "loss": 0.9461,
      "step": 6265
    },
    {
      "epoch": 0.3616751269035533,
      "grad_norm": 0.3390810191631317,
      "learning_rate": 0.00016110884637754713,
      "loss": 1.008,
      "step": 6270
    },
    {
      "epoch": 0.36196354407014303,
      "grad_norm": 0.33765143156051636,
      "learning_rate": 0.00016102912158808992,
      "loss": 0.9744,
      "step": 6275
    },
    {
      "epoch": 0.3622519612367328,
      "grad_norm": 0.3217780292034149,
      "learning_rate": 0.00016094933493772487,
      "loss": 0.985,
      "step": 6280
    },
    {
      "epoch": 0.36254037840332254,
      "grad_norm": 0.3047623038291931,
      "learning_rate": 0.00016086948650732605,
      "loss": 1.003,
      "step": 6285
    },
    {
      "epoch": 0.3628287955699123,
      "grad_norm": 0.35244515538215637,
      "learning_rate": 0.00016078957637783017,
      "loss": 0.9471,
      "step": 6290
    },
    {
      "epoch": 0.3631172127365021,
      "grad_norm": 0.3418562114238739,
      "learning_rate": 0.0001607096046302365,
      "loss": 0.9324,
      "step": 6295
    },
    {
      "epoch": 0.36340562990309183,
      "grad_norm": 0.32155272364616394,
      "learning_rate": 0.00016062957134560675,
      "loss": 0.9364,
      "step": 6300
    },
    {
      "epoch": 0.3636940470696816,
      "grad_norm": 0.320909321308136,
      "learning_rate": 0.00016054947660506494,
      "loss": 0.9888,
      "step": 6305
    },
    {
      "epoch": 0.36398246423627134,
      "grad_norm": 0.30325910449028015,
      "learning_rate": 0.0001604693204897975,
      "loss": 0.93,
      "step": 6310
    },
    {
      "epoch": 0.3642708814028611,
      "grad_norm": 0.3497485816478729,
      "learning_rate": 0.0001603891030810531,
      "loss": 1.0178,
      "step": 6315
    },
    {
      "epoch": 0.36455929856945085,
      "grad_norm": 0.3147721290588379,
      "learning_rate": 0.00016030882446014234,
      "loss": 0.9138,
      "step": 6320
    },
    {
      "epoch": 0.36484771573604063,
      "grad_norm": 0.31661319732666016,
      "learning_rate": 0.00016022848470843802,
      "loss": 0.9617,
      "step": 6325
    },
    {
      "epoch": 0.36513613290263036,
      "grad_norm": 0.3261774778366089,
      "learning_rate": 0.00016014808390737485,
      "loss": 0.93,
      "step": 6330
    },
    {
      "epoch": 0.36542455006922014,
      "grad_norm": 0.31109583377838135,
      "learning_rate": 0.00016006762213844947,
      "loss": 1.0262,
      "step": 6335
    },
    {
      "epoch": 0.36571296723580987,
      "grad_norm": 0.3141893446445465,
      "learning_rate": 0.00015998709948322027,
      "loss": 1.003,
      "step": 6340
    },
    {
      "epoch": 0.36600138440239965,
      "grad_norm": 0.30060815811157227,
      "learning_rate": 0.00015990651602330741,
      "loss": 0.965,
      "step": 6345
    },
    {
      "epoch": 0.3662898015689894,
      "grad_norm": 0.3138159215450287,
      "learning_rate": 0.00015982587184039263,
      "loss": 0.9993,
      "step": 6350
    },
    {
      "epoch": 0.36657821873557916,
      "grad_norm": 0.30857542157173157,
      "learning_rate": 0.00015974516701621925,
      "loss": 0.9592,
      "step": 6355
    },
    {
      "epoch": 0.3668666359021689,
      "grad_norm": 0.32973772287368774,
      "learning_rate": 0.00015966440163259202,
      "loss": 0.9751,
      "step": 6360
    },
    {
      "epoch": 0.3671550530687587,
      "grad_norm": 0.2922951281070709,
      "learning_rate": 0.00015958357577137712,
      "loss": 0.99,
      "step": 6365
    },
    {
      "epoch": 0.3674434702353484,
      "grad_norm": 0.31458890438079834,
      "learning_rate": 0.00015950268951450198,
      "loss": 1.0391,
      "step": 6370
    },
    {
      "epoch": 0.3677318874019382,
      "grad_norm": 0.2942887246608734,
      "learning_rate": 0.0001594217429439553,
      "loss": 0.8909,
      "step": 6375
    },
    {
      "epoch": 0.3680203045685279,
      "grad_norm": 0.35455599427223206,
      "learning_rate": 0.00015934073614178696,
      "loss": 0.9409,
      "step": 6380
    },
    {
      "epoch": 0.3683087217351177,
      "grad_norm": 0.32667621970176697,
      "learning_rate": 0.00015925966919010773,
      "loss": 0.9703,
      "step": 6385
    },
    {
      "epoch": 0.3685971389017074,
      "grad_norm": 0.3091600239276886,
      "learning_rate": 0.00015917854217108954,
      "loss": 0.9424,
      "step": 6390
    },
    {
      "epoch": 0.3688855560682972,
      "grad_norm": 0.2892536222934723,
      "learning_rate": 0.0001590973551669651,
      "loss": 0.9214,
      "step": 6395
    },
    {
      "epoch": 0.36917397323488693,
      "grad_norm": 0.3017813265323639,
      "learning_rate": 0.00015901610826002787,
      "loss": 0.9155,
      "step": 6400
    },
    {
      "epoch": 0.3694623904014767,
      "grad_norm": 0.34070533514022827,
      "learning_rate": 0.00015893480153263213,
      "loss": 0.9354,
      "step": 6405
    },
    {
      "epoch": 0.36975080756806644,
      "grad_norm": 0.3184935748577118,
      "learning_rate": 0.0001588534350671928,
      "loss": 1.0191,
      "step": 6410
    },
    {
      "epoch": 0.3700392247346562,
      "grad_norm": 0.3152550756931305,
      "learning_rate": 0.00015877200894618532,
      "loss": 0.946,
      "step": 6415
    },
    {
      "epoch": 0.37032764190124595,
      "grad_norm": 0.3296424448490143,
      "learning_rate": 0.00015869052325214554,
      "loss": 1.1301,
      "step": 6420
    },
    {
      "epoch": 0.37061605906783573,
      "grad_norm": 0.3112740218639374,
      "learning_rate": 0.0001586089780676698,
      "loss": 0.9555,
      "step": 6425
    },
    {
      "epoch": 0.37090447623442546,
      "grad_norm": 0.34751659631729126,
      "learning_rate": 0.00015852737347541465,
      "loss": 1.0707,
      "step": 6430
    },
    {
      "epoch": 0.37119289340101524,
      "grad_norm": 0.3217742145061493,
      "learning_rate": 0.00015844570955809694,
      "loss": 0.9849,
      "step": 6435
    },
    {
      "epoch": 0.37148131056760497,
      "grad_norm": 0.35857248306274414,
      "learning_rate": 0.00015836398639849355,
      "loss": 1.0154,
      "step": 6440
    },
    {
      "epoch": 0.37176972773419475,
      "grad_norm": 0.3333515226840973,
      "learning_rate": 0.00015828220407944154,
      "loss": 1.0059,
      "step": 6445
    },
    {
      "epoch": 0.3720581449007845,
      "grad_norm": 0.31444647908210754,
      "learning_rate": 0.00015820036268383785,
      "loss": 1.0227,
      "step": 6450
    },
    {
      "epoch": 0.37234656206737426,
      "grad_norm": 0.3097481429576874,
      "learning_rate": 0.0001581184622946393,
      "loss": 0.9426,
      "step": 6455
    },
    {
      "epoch": 0.372634979233964,
      "grad_norm": 0.32116419076919556,
      "learning_rate": 0.00015803650299486252,
      "loss": 0.9438,
      "step": 6460
    },
    {
      "epoch": 0.3729233964005538,
      "grad_norm": 0.3407098352909088,
      "learning_rate": 0.00015795448486758388,
      "loss": 1.0078,
      "step": 6465
    },
    {
      "epoch": 0.3732118135671435,
      "grad_norm": 0.3003779351711273,
      "learning_rate": 0.00015787240799593937,
      "loss": 0.9564,
      "step": 6470
    },
    {
      "epoch": 0.3735002307337333,
      "grad_norm": 0.28709009289741516,
      "learning_rate": 0.00015779027246312448,
      "loss": 0.9607,
      "step": 6475
    },
    {
      "epoch": 0.373788647900323,
      "grad_norm": 0.3018396496772766,
      "learning_rate": 0.00015770807835239424,
      "loss": 0.9747,
      "step": 6480
    },
    {
      "epoch": 0.3740770650669128,
      "grad_norm": 0.2960224151611328,
      "learning_rate": 0.00015762582574706298,
      "loss": 0.9753,
      "step": 6485
    },
    {
      "epoch": 0.3743654822335025,
      "grad_norm": 0.29029327630996704,
      "learning_rate": 0.00015754351473050435,
      "loss": 0.9303,
      "step": 6490
    },
    {
      "epoch": 0.3746538994000923,
      "grad_norm": 0.31783005595207214,
      "learning_rate": 0.00015746114538615124,
      "loss": 0.9661,
      "step": 6495
    },
    {
      "epoch": 0.37494231656668203,
      "grad_norm": 0.3153396248817444,
      "learning_rate": 0.0001573787177974956,
      "loss": 1.0327,
      "step": 6500
    },
    {
      "epoch": 0.3752307337332718,
      "grad_norm": 0.28863847255706787,
      "learning_rate": 0.00015729623204808847,
      "loss": 0.9343,
      "step": 6505
    },
    {
      "epoch": 0.37551915089986154,
      "grad_norm": 0.31009867787361145,
      "learning_rate": 0.00015721368822153986,
      "loss": 0.9591,
      "step": 6510
    },
    {
      "epoch": 0.3758075680664513,
      "grad_norm": 0.3038884401321411,
      "learning_rate": 0.00015713108640151853,
      "loss": 0.9929,
      "step": 6515
    },
    {
      "epoch": 0.37609598523304105,
      "grad_norm": 0.3314804434776306,
      "learning_rate": 0.0001570484266717522,
      "loss": 0.9816,
      "step": 6520
    },
    {
      "epoch": 0.37638440239963084,
      "grad_norm": 0.3055729568004608,
      "learning_rate": 0.0001569657091160271,
      "loss": 0.9554,
      "step": 6525
    },
    {
      "epoch": 0.37667281956622056,
      "grad_norm": 0.29258403182029724,
      "learning_rate": 0.00015688293381818823,
      "loss": 0.9179,
      "step": 6530
    },
    {
      "epoch": 0.37696123673281035,
      "grad_norm": 0.30928143858909607,
      "learning_rate": 0.00015680010086213908,
      "loss": 0.9057,
      "step": 6535
    },
    {
      "epoch": 0.3772496538994001,
      "grad_norm": 0.3039039373397827,
      "learning_rate": 0.0001567172103318415,
      "loss": 0.9327,
      "step": 6540
    },
    {
      "epoch": 0.37753807106598986,
      "grad_norm": 0.30910852551460266,
      "learning_rate": 0.00015663426231131585,
      "loss": 1.0292,
      "step": 6545
    },
    {
      "epoch": 0.3778264882325796,
      "grad_norm": 0.30116426944732666,
      "learning_rate": 0.00015655125688464062,
      "loss": 0.9728,
      "step": 6550
    },
    {
      "epoch": 0.37811490539916937,
      "grad_norm": 0.293541818857193,
      "learning_rate": 0.0001564681941359525,
      "loss": 0.9825,
      "step": 6555
    },
    {
      "epoch": 0.3784033225657591,
      "grad_norm": 0.3120487630367279,
      "learning_rate": 0.00015638507414944642,
      "loss": 1.0029,
      "step": 6560
    },
    {
      "epoch": 0.3786917397323489,
      "grad_norm": 0.32010599970817566,
      "learning_rate": 0.00015630189700937516,
      "loss": 0.8747,
      "step": 6565
    },
    {
      "epoch": 0.3789801568989386,
      "grad_norm": 0.2856518626213074,
      "learning_rate": 0.0001562186628000496,
      "loss": 0.9913,
      "step": 6570
    },
    {
      "epoch": 0.3792685740655284,
      "grad_norm": 0.2798760235309601,
      "learning_rate": 0.00015613537160583829,
      "loss": 0.9699,
      "step": 6575
    },
    {
      "epoch": 0.3795569912321181,
      "grad_norm": 0.2922796308994293,
      "learning_rate": 0.00015605202351116765,
      "loss": 0.9391,
      "step": 6580
    },
    {
      "epoch": 0.3798454083987079,
      "grad_norm": 0.30891719460487366,
      "learning_rate": 0.0001559686186005218,
      "loss": 0.9812,
      "step": 6585
    },
    {
      "epoch": 0.3801338255652976,
      "grad_norm": 0.29908978939056396,
      "learning_rate": 0.00015588515695844234,
      "loss": 0.9752,
      "step": 6590
    },
    {
      "epoch": 0.3804222427318874,
      "grad_norm": 0.31617826223373413,
      "learning_rate": 0.00015580163866952846,
      "loss": 0.9801,
      "step": 6595
    },
    {
      "epoch": 0.38071065989847713,
      "grad_norm": 0.31201690435409546,
      "learning_rate": 0.00015571806381843676,
      "loss": 0.9959,
      "step": 6600
    },
    {
      "epoch": 0.3809990770650669,
      "grad_norm": 0.3266178369522095,
      "learning_rate": 0.00015563443248988116,
      "loss": 0.9302,
      "step": 6605
    },
    {
      "epoch": 0.38128749423165664,
      "grad_norm": 0.3142068386077881,
      "learning_rate": 0.00015555074476863282,
      "loss": 0.9307,
      "step": 6610
    },
    {
      "epoch": 0.3815759113982464,
      "grad_norm": 0.3227985203266144,
      "learning_rate": 0.0001554670007395201,
      "loss": 0.917,
      "step": 6615
    },
    {
      "epoch": 0.38186432856483615,
      "grad_norm": 0.3386768698692322,
      "learning_rate": 0.00015538320048742835,
      "loss": 1.0017,
      "step": 6620
    },
    {
      "epoch": 0.38215274573142594,
      "grad_norm": 0.3295430541038513,
      "learning_rate": 0.0001552993440973,
      "loss": 1.0512,
      "step": 6625
    },
    {
      "epoch": 0.38244116289801566,
      "grad_norm": 0.2914939820766449,
      "learning_rate": 0.00015521543165413428,
      "loss": 0.9476,
      "step": 6630
    },
    {
      "epoch": 0.38272958006460545,
      "grad_norm": 0.3167172372341156,
      "learning_rate": 0.0001551314632429874,
      "loss": 1.0272,
      "step": 6635
    },
    {
      "epoch": 0.3830179972311952,
      "grad_norm": 0.3107517957687378,
      "learning_rate": 0.00015504743894897218,
      "loss": 0.9716,
      "step": 6640
    },
    {
      "epoch": 0.38330641439778496,
      "grad_norm": 0.3065870404243469,
      "learning_rate": 0.00015496335885725808,
      "loss": 1.0395,
      "step": 6645
    },
    {
      "epoch": 0.38359483156437474,
      "grad_norm": 0.3044165372848511,
      "learning_rate": 0.00015487922305307118,
      "loss": 0.9299,
      "step": 6650
    },
    {
      "epoch": 0.38388324873096447,
      "grad_norm": 0.29545021057128906,
      "learning_rate": 0.00015479503162169395,
      "loss": 0.9476,
      "step": 6655
    },
    {
      "epoch": 0.38417166589755425,
      "grad_norm": 0.3002990186214447,
      "learning_rate": 0.0001547107846484653,
      "loss": 0.9963,
      "step": 6660
    },
    {
      "epoch": 0.384460083064144,
      "grad_norm": 0.33107367157936096,
      "learning_rate": 0.00015462648221878052,
      "loss": 0.9646,
      "step": 6665
    },
    {
      "epoch": 0.38474850023073376,
      "grad_norm": 0.3228285610675812,
      "learning_rate": 0.00015454212441809095,
      "loss": 1.0694,
      "step": 6670
    },
    {
      "epoch": 0.3850369173973235,
      "grad_norm": 0.3043341040611267,
      "learning_rate": 0.00015445771133190412,
      "loss": 0.9703,
      "step": 6675
    },
    {
      "epoch": 0.38532533456391327,
      "grad_norm": 0.3010495901107788,
      "learning_rate": 0.00015437324304578363,
      "loss": 0.9275,
      "step": 6680
    },
    {
      "epoch": 0.385613751730503,
      "grad_norm": 0.3137162923812866,
      "learning_rate": 0.00015428871964534907,
      "loss": 1.0299,
      "step": 6685
    },
    {
      "epoch": 0.3859021688970928,
      "grad_norm": 0.3311811089515686,
      "learning_rate": 0.00015420414121627575,
      "loss": 0.9973,
      "step": 6690
    },
    {
      "epoch": 0.3861905860636825,
      "grad_norm": 0.2890627682209015,
      "learning_rate": 0.00015411950784429486,
      "loss": 0.9347,
      "step": 6695
    },
    {
      "epoch": 0.3864790032302723,
      "grad_norm": 0.32376575469970703,
      "learning_rate": 0.00015403481961519334,
      "loss": 0.9397,
      "step": 6700
    },
    {
      "epoch": 0.386767420396862,
      "grad_norm": 0.3138471245765686,
      "learning_rate": 0.0001539500766148136,
      "loss": 0.9889,
      "step": 6705
    },
    {
      "epoch": 0.3870558375634518,
      "grad_norm": 0.33765313029289246,
      "learning_rate": 0.00015386527892905365,
      "loss": 1.0173,
      "step": 6710
    },
    {
      "epoch": 0.3873442547300415,
      "grad_norm": 0.3095814287662506,
      "learning_rate": 0.0001537804266438669,
      "loss": 1.0448,
      "step": 6715
    },
    {
      "epoch": 0.3876326718966313,
      "grad_norm": 0.3238964080810547,
      "learning_rate": 0.0001536955198452621,
      "loss": 0.9843,
      "step": 6720
    },
    {
      "epoch": 0.38792108906322104,
      "grad_norm": 0.3234662711620331,
      "learning_rate": 0.00015361055861930328,
      "loss": 1.0203,
      "step": 6725
    },
    {
      "epoch": 0.3882095062298108,
      "grad_norm": 0.2965511679649353,
      "learning_rate": 0.0001535255430521097,
      "loss": 0.9069,
      "step": 6730
    },
    {
      "epoch": 0.38849792339640055,
      "grad_norm": 0.32197943329811096,
      "learning_rate": 0.00015344047322985555,
      "loss": 0.9222,
      "step": 6735
    },
    {
      "epoch": 0.38878634056299033,
      "grad_norm": 0.2778390645980835,
      "learning_rate": 0.00015335534923877013,
      "loss": 0.9485,
      "step": 6740
    },
    {
      "epoch": 0.38907475772958006,
      "grad_norm": 0.37352126836776733,
      "learning_rate": 0.0001532701711651376,
      "loss": 1.0752,
      "step": 6745
    },
    {
      "epoch": 0.38936317489616984,
      "grad_norm": 0.32387804985046387,
      "learning_rate": 0.000153184939095297,
      "loss": 0.9647,
      "step": 6750
    },
    {
      "epoch": 0.38965159206275957,
      "grad_norm": 0.30921655893325806,
      "learning_rate": 0.00015309965311564194,
      "loss": 0.9618,
      "step": 6755
    },
    {
      "epoch": 0.38994000922934935,
      "grad_norm": 0.3546212911605835,
      "learning_rate": 0.00015301431331262095,
      "loss": 1.0117,
      "step": 6760
    },
    {
      "epoch": 0.3902284263959391,
      "grad_norm": 0.29029014706611633,
      "learning_rate": 0.00015292891977273683,
      "loss": 0.9657,
      "step": 6765
    },
    {
      "epoch": 0.39051684356252886,
      "grad_norm": 0.3335913121700287,
      "learning_rate": 0.00015284347258254704,
      "loss": 0.9895,
      "step": 6770
    },
    {
      "epoch": 0.3908052607291186,
      "grad_norm": 0.2966814935207367,
      "learning_rate": 0.00015275797182866336,
      "loss": 0.9901,
      "step": 6775
    },
    {
      "epoch": 0.39109367789570837,
      "grad_norm": 0.29803818464279175,
      "learning_rate": 0.0001526724175977518,
      "loss": 0.9878,
      "step": 6780
    },
    {
      "epoch": 0.3913820950622981,
      "grad_norm": 0.30125004053115845,
      "learning_rate": 0.00015258680997653275,
      "loss": 1.0261,
      "step": 6785
    },
    {
      "epoch": 0.3916705122288879,
      "grad_norm": 0.33115720748901367,
      "learning_rate": 0.0001525011490517805,
      "loss": 0.9936,
      "step": 6790
    },
    {
      "epoch": 0.3919589293954776,
      "grad_norm": 0.3167159855365753,
      "learning_rate": 0.0001524154349103235,
      "loss": 0.9257,
      "step": 6795
    },
    {
      "epoch": 0.3922473465620674,
      "grad_norm": 0.3028007447719574,
      "learning_rate": 0.00015232966763904416,
      "loss": 0.9338,
      "step": 6800
    },
    {
      "epoch": 0.3925357637286571,
      "grad_norm": 0.28450897336006165,
      "learning_rate": 0.00015224384732487868,
      "loss": 0.9803,
      "step": 6805
    },
    {
      "epoch": 0.3928241808952469,
      "grad_norm": 0.30390694737434387,
      "learning_rate": 0.00015215797405481704,
      "loss": 0.929,
      "step": 6810
    },
    {
      "epoch": 0.3931125980618366,
      "grad_norm": 0.28544360399246216,
      "learning_rate": 0.00015207204791590288,
      "loss": 1.0024,
      "step": 6815
    },
    {
      "epoch": 0.3934010152284264,
      "grad_norm": 0.31118243932724,
      "learning_rate": 0.00015198606899523352,
      "loss": 0.9276,
      "step": 6820
    },
    {
      "epoch": 0.39368943239501614,
      "grad_norm": 0.32028988003730774,
      "learning_rate": 0.00015190003737995967,
      "loss": 0.9942,
      "step": 6825
    },
    {
      "epoch": 0.3939778495616059,
      "grad_norm": 0.3325750231742859,
      "learning_rate": 0.00015181395315728554,
      "loss": 0.9642,
      "step": 6830
    },
    {
      "epoch": 0.39426626672819565,
      "grad_norm": 0.33307701349258423,
      "learning_rate": 0.00015172781641446852,
      "loss": 0.9986,
      "step": 6835
    },
    {
      "epoch": 0.39455468389478543,
      "grad_norm": 0.31831297278404236,
      "learning_rate": 0.00015164162723881947,
      "loss": 0.9855,
      "step": 6840
    },
    {
      "epoch": 0.39484310106137516,
      "grad_norm": 0.28365767002105713,
      "learning_rate": 0.00015155538571770218,
      "loss": 0.9571,
      "step": 6845
    },
    {
      "epoch": 0.39513151822796494,
      "grad_norm": 0.3154856860637665,
      "learning_rate": 0.00015146909193853363,
      "loss": 0.9904,
      "step": 6850
    },
    {
      "epoch": 0.39541993539455467,
      "grad_norm": 0.3086288571357727,
      "learning_rate": 0.0001513827459887837,
      "loss": 0.965,
      "step": 6855
    },
    {
      "epoch": 0.39570835256114445,
      "grad_norm": 0.3205987513065338,
      "learning_rate": 0.0001512963479559752,
      "loss": 1.0125,
      "step": 6860
    },
    {
      "epoch": 0.3959967697277342,
      "grad_norm": 0.29472029209136963,
      "learning_rate": 0.00015120989792768367,
      "loss": 0.9909,
      "step": 6865
    },
    {
      "epoch": 0.39628518689432396,
      "grad_norm": 0.34999117255210876,
      "learning_rate": 0.00015112339599153746,
      "loss": 1.0217,
      "step": 6870
    },
    {
      "epoch": 0.3965736040609137,
      "grad_norm": 0.2855675518512726,
      "learning_rate": 0.00015103684223521742,
      "loss": 1.0462,
      "step": 6875
    },
    {
      "epoch": 0.39686202122750347,
      "grad_norm": 0.3382554352283478,
      "learning_rate": 0.00015095023674645698,
      "loss": 1.038,
      "step": 6880
    },
    {
      "epoch": 0.3971504383940932,
      "grad_norm": 0.3201339840888977,
      "learning_rate": 0.000150863579613042,
      "loss": 0.9419,
      "step": 6885
    },
    {
      "epoch": 0.397438855560683,
      "grad_norm": 0.3181384205818176,
      "learning_rate": 0.00015077687092281074,
      "loss": 0.984,
      "step": 6890
    },
    {
      "epoch": 0.3977272727272727,
      "grad_norm": 0.3145751357078552,
      "learning_rate": 0.00015069011076365357,
      "loss": 0.952,
      "step": 6895
    },
    {
      "epoch": 0.3980156898938625,
      "grad_norm": 0.2770393490791321,
      "learning_rate": 0.00015060329922351326,
      "loss": 0.9023,
      "step": 6900
    },
    {
      "epoch": 0.3983041070604522,
      "grad_norm": 0.3097230792045593,
      "learning_rate": 0.00015051643639038447,
      "loss": 0.9512,
      "step": 6905
    },
    {
      "epoch": 0.398592524227042,
      "grad_norm": 0.31388595700263977,
      "learning_rate": 0.0001504295223523139,
      "loss": 0.9373,
      "step": 6910
    },
    {
      "epoch": 0.3988809413936317,
      "grad_norm": 0.30969393253326416,
      "learning_rate": 0.0001503425571974002,
      "loss": 1.0089,
      "step": 6915
    },
    {
      "epoch": 0.3991693585602215,
      "grad_norm": 0.3253278434276581,
      "learning_rate": 0.00015025554101379379,
      "loss": 0.9958,
      "step": 6920
    },
    {
      "epoch": 0.39945777572681124,
      "grad_norm": 0.28762054443359375,
      "learning_rate": 0.00015016847388969683,
      "loss": 0.9883,
      "step": 6925
    },
    {
      "epoch": 0.399746192893401,
      "grad_norm": 0.31128495931625366,
      "learning_rate": 0.0001500813559133631,
      "loss": 0.9916,
      "step": 6930
    },
    {
      "epoch": 0.40003461005999075,
      "grad_norm": 0.3101608157157898,
      "learning_rate": 0.00014999418717309793,
      "loss": 0.9811,
      "step": 6935
    },
    {
      "epoch": 0.40032302722658053,
      "grad_norm": 0.3171060085296631,
      "learning_rate": 0.00014990696775725812,
      "loss": 0.9856,
      "step": 6940
    },
    {
      "epoch": 0.40061144439317026,
      "grad_norm": 0.33557629585266113,
      "learning_rate": 0.00014981969775425185,
      "loss": 1.0214,
      "step": 6945
    },
    {
      "epoch": 0.40089986155976004,
      "grad_norm": 0.2946593761444092,
      "learning_rate": 0.0001497323772525385,
      "loss": 0.9413,
      "step": 6950
    },
    {
      "epoch": 0.40118827872634977,
      "grad_norm": 0.39665666222572327,
      "learning_rate": 0.00014964500634062877,
      "loss": 1.0003,
      "step": 6955
    },
    {
      "epoch": 0.40147669589293955,
      "grad_norm": 0.28560730814933777,
      "learning_rate": 0.00014955758510708434,
      "loss": 0.9861,
      "step": 6960
    },
    {
      "epoch": 0.4017651130595293,
      "grad_norm": 0.30924665927886963,
      "learning_rate": 0.00014947011364051794,
      "loss": 1.0107,
      "step": 6965
    },
    {
      "epoch": 0.40205353022611906,
      "grad_norm": 0.2980976998806,
      "learning_rate": 0.00014938259202959317,
      "loss": 0.9834,
      "step": 6970
    },
    {
      "epoch": 0.4023419473927088,
      "grad_norm": 0.31071528792381287,
      "learning_rate": 0.00014929502036302458,
      "loss": 1.0827,
      "step": 6975
    },
    {
      "epoch": 0.40263036455929857,
      "grad_norm": 0.30230122804641724,
      "learning_rate": 0.00014920739872957732,
      "loss": 0.9203,
      "step": 6980
    },
    {
      "epoch": 0.4029187817258883,
      "grad_norm": 0.3271790146827698,
      "learning_rate": 0.0001491197272180673,
      "loss": 0.9462,
      "step": 6985
    },
    {
      "epoch": 0.4032071988924781,
      "grad_norm": 0.3099105954170227,
      "learning_rate": 0.00014903200591736087,
      "loss": 1.0032,
      "step": 6990
    },
    {
      "epoch": 0.40349561605906786,
      "grad_norm": 0.298093318939209,
      "learning_rate": 0.00014894423491637498,
      "loss": 0.9668,
      "step": 6995
    },
    {
      "epoch": 0.4037840332256576,
      "grad_norm": 0.3025701940059662,
      "learning_rate": 0.00014885641430407686,
      "loss": 0.9442,
      "step": 7000
    },
    {
      "epoch": 0.4040724503922474,
      "grad_norm": 0.30681777000427246,
      "learning_rate": 0.00014876854416948405,
      "loss": 1.0638,
      "step": 7005
    },
    {
      "epoch": 0.4043608675588371,
      "grad_norm": 0.2943170964717865,
      "learning_rate": 0.0001486806246016643,
      "loss": 1.0032,
      "step": 7010
    },
    {
      "epoch": 0.4046492847254269,
      "grad_norm": 0.35952475666999817,
      "learning_rate": 0.00014859265568973546,
      "loss": 0.9793,
      "step": 7015
    },
    {
      "epoch": 0.4049377018920166,
      "grad_norm": 0.30241405963897705,
      "learning_rate": 0.00014850463752286543,
      "loss": 0.9869,
      "step": 7020
    },
    {
      "epoch": 0.4052261190586064,
      "grad_norm": 0.30669084191322327,
      "learning_rate": 0.000148416570190272,
      "loss": 0.9314,
      "step": 7025
    },
    {
      "epoch": 0.4055145362251961,
      "grad_norm": 0.3244229853153229,
      "learning_rate": 0.00014832845378122276,
      "loss": 1.0062,
      "step": 7030
    },
    {
      "epoch": 0.4058029533917859,
      "grad_norm": 0.3314943313598633,
      "learning_rate": 0.0001482402883850351,
      "loss": 0.9591,
      "step": 7035
    },
    {
      "epoch": 0.40609137055837563,
      "grad_norm": 0.30962324142456055,
      "learning_rate": 0.00014815207409107608,
      "loss": 0.9297,
      "step": 7040
    },
    {
      "epoch": 0.4063797877249654,
      "grad_norm": 0.3165276050567627,
      "learning_rate": 0.00014806381098876227,
      "loss": 1.0911,
      "step": 7045
    },
    {
      "epoch": 0.40666820489155514,
      "grad_norm": 0.3096354901790619,
      "learning_rate": 0.00014797549916755975,
      "loss": 0.9584,
      "step": 7050
    },
    {
      "epoch": 0.4069566220581449,
      "grad_norm": 0.3194222152233124,
      "learning_rate": 0.00014788713871698397,
      "loss": 0.9737,
      "step": 7055
    },
    {
      "epoch": 0.40724503922473465,
      "grad_norm": 0.3206169605255127,
      "learning_rate": 0.0001477987297265997,
      "loss": 0.9935,
      "step": 7060
    },
    {
      "epoch": 0.40753345639132443,
      "grad_norm": 0.2967379689216614,
      "learning_rate": 0.00014771027228602086,
      "loss": 0.9805,
      "step": 7065
    },
    {
      "epoch": 0.40782187355791416,
      "grad_norm": 0.31947875022888184,
      "learning_rate": 0.0001476217664849105,
      "loss": 1.0187,
      "step": 7070
    },
    {
      "epoch": 0.40811029072450394,
      "grad_norm": 0.3055800497531891,
      "learning_rate": 0.00014753321241298072,
      "loss": 0.9962,
      "step": 7075
    },
    {
      "epoch": 0.40839870789109367,
      "grad_norm": 0.3292485475540161,
      "learning_rate": 0.00014744461015999248,
      "loss": 0.9679,
      "step": 7080
    },
    {
      "epoch": 0.40868712505768345,
      "grad_norm": 0.3264350891113281,
      "learning_rate": 0.00014735595981575568,
      "loss": 1.0154,
      "step": 7085
    },
    {
      "epoch": 0.4089755422242732,
      "grad_norm": 0.32917913794517517,
      "learning_rate": 0.00014726726147012889,
      "loss": 0.9888,
      "step": 7090
    },
    {
      "epoch": 0.40926395939086296,
      "grad_norm": 0.34813347458839417,
      "learning_rate": 0.00014717851521301933,
      "loss": 1.0196,
      "step": 7095
    },
    {
      "epoch": 0.4095523765574527,
      "grad_norm": 0.407846063375473,
      "learning_rate": 0.00014708972113438285,
      "loss": 1.0062,
      "step": 7100
    },
    {
      "epoch": 0.4098407937240425,
      "grad_norm": 0.3226325511932373,
      "learning_rate": 0.00014700087932422367,
      "loss": 0.9841,
      "step": 7105
    },
    {
      "epoch": 0.4101292108906322,
      "grad_norm": 0.28346356749534607,
      "learning_rate": 0.00014691198987259454,
      "loss": 0.9968,
      "step": 7110
    },
    {
      "epoch": 0.410417628057222,
      "grad_norm": 0.3126351237297058,
      "learning_rate": 0.00014682305286959631,
      "loss": 0.9506,
      "step": 7115
    },
    {
      "epoch": 0.4107060452238117,
      "grad_norm": 0.3126891851425171,
      "learning_rate": 0.00014673406840537824,
      "loss": 0.9417,
      "step": 7120
    },
    {
      "epoch": 0.4109944623904015,
      "grad_norm": 0.2949593961238861,
      "learning_rate": 0.00014664503657013756,
      "loss": 0.9393,
      "step": 7125
    },
    {
      "epoch": 0.4112828795569912,
      "grad_norm": 0.3290235996246338,
      "learning_rate": 0.00014655595745411955,
      "loss": 1.0241,
      "step": 7130
    },
    {
      "epoch": 0.411571296723581,
      "grad_norm": 0.3105536997318268,
      "learning_rate": 0.00014646683114761735,
      "loss": 0.911,
      "step": 7135
    },
    {
      "epoch": 0.41185971389017073,
      "grad_norm": 0.29566848278045654,
      "learning_rate": 0.00014637765774097206,
      "loss": 1.0272,
      "step": 7140
    },
    {
      "epoch": 0.4121481310567605,
      "grad_norm": 0.3105669915676117,
      "learning_rate": 0.00014628843732457248,
      "loss": 1.01,
      "step": 7145
    },
    {
      "epoch": 0.41243654822335024,
      "grad_norm": 0.2995404005050659,
      "learning_rate": 0.000146199169988855,
      "loss": 0.9107,
      "step": 7150
    },
    {
      "epoch": 0.41272496538994,
      "grad_norm": 0.31976473331451416,
      "learning_rate": 0.00014610985582430363,
      "loss": 1.0212,
      "step": 7155
    },
    {
      "epoch": 0.41301338255652975,
      "grad_norm": 0.32434025406837463,
      "learning_rate": 0.00014602049492144984,
      "loss": 0.9824,
      "step": 7160
    },
    {
      "epoch": 0.41330179972311953,
      "grad_norm": 0.2803219258785248,
      "learning_rate": 0.00014593108737087241,
      "loss": 0.9267,
      "step": 7165
    },
    {
      "epoch": 0.41359021688970926,
      "grad_norm": 0.2851792573928833,
      "learning_rate": 0.00014584163326319754,
      "loss": 0.9277,
      "step": 7170
    },
    {
      "epoch": 0.41387863405629904,
      "grad_norm": 0.32938867807388306,
      "learning_rate": 0.00014575213268909842,
      "loss": 0.9406,
      "step": 7175
    },
    {
      "epoch": 0.41416705122288877,
      "grad_norm": 0.3028603792190552,
      "learning_rate": 0.00014566258573929557,
      "loss": 0.9894,
      "step": 7180
    },
    {
      "epoch": 0.41445546838947855,
      "grad_norm": 0.2850208580493927,
      "learning_rate": 0.00014557299250455633,
      "loss": 1.0507,
      "step": 7185
    },
    {
      "epoch": 0.4147438855560683,
      "grad_norm": 0.331676721572876,
      "learning_rate": 0.0001454833530756951,
      "loss": 0.994,
      "step": 7190
    },
    {
      "epoch": 0.41503230272265806,
      "grad_norm": 0.30083078145980835,
      "learning_rate": 0.00014539366754357297,
      "loss": 1.009,
      "step": 7195
    },
    {
      "epoch": 0.4153207198892478,
      "grad_norm": 0.3384031653404236,
      "learning_rate": 0.0001453039359990979,
      "loss": 1.0258,
      "step": 7200
    },
    {
      "epoch": 0.4156091370558376,
      "grad_norm": 0.2972491979598999,
      "learning_rate": 0.0001452141585332243,
      "loss": 1.0282,
      "step": 7205
    },
    {
      "epoch": 0.4158975542224273,
      "grad_norm": 0.3435482382774353,
      "learning_rate": 0.00014512433523695332,
      "loss": 0.9686,
      "step": 7210
    },
    {
      "epoch": 0.4161859713890171,
      "grad_norm": 0.318953275680542,
      "learning_rate": 0.0001450344662013325,
      "loss": 0.9745,
      "step": 7215
    },
    {
      "epoch": 0.4164743885556068,
      "grad_norm": 0.3264816701412201,
      "learning_rate": 0.0001449445515174557,
      "loss": 0.9602,
      "step": 7220
    },
    {
      "epoch": 0.4167628057221966,
      "grad_norm": 0.30100223422050476,
      "learning_rate": 0.00014485459127646307,
      "loss": 0.9971,
      "step": 7225
    },
    {
      "epoch": 0.4170512228887863,
      "grad_norm": 0.29668816924095154,
      "learning_rate": 0.000144764585569541,
      "loss": 0.9988,
      "step": 7230
    },
    {
      "epoch": 0.4173396400553761,
      "grad_norm": 0.32644376158714294,
      "learning_rate": 0.00014467453448792188,
      "loss": 0.9821,
      "step": 7235
    },
    {
      "epoch": 0.41762805722196583,
      "grad_norm": 0.31165027618408203,
      "learning_rate": 0.00014458443812288415,
      "loss": 0.9882,
      "step": 7240
    },
    {
      "epoch": 0.4179164743885556,
      "grad_norm": 0.3170810043811798,
      "learning_rate": 0.00014449429656575205,
      "loss": 0.9978,
      "step": 7245
    },
    {
      "epoch": 0.41820489155514534,
      "grad_norm": 0.29307204484939575,
      "learning_rate": 0.00014440410990789582,
      "loss": 0.9341,
      "step": 7250
    },
    {
      "epoch": 0.4184933087217351,
      "grad_norm": 0.33344531059265137,
      "learning_rate": 0.00014431387824073125,
      "loss": 0.9938,
      "step": 7255
    },
    {
      "epoch": 0.41878172588832485,
      "grad_norm": 0.3290488123893738,
      "learning_rate": 0.00014422360165571976,
      "loss": 0.9463,
      "step": 7260
    },
    {
      "epoch": 0.41907014305491463,
      "grad_norm": 0.33802330493927,
      "learning_rate": 0.0001441332802443684,
      "loss": 0.9686,
      "step": 7265
    },
    {
      "epoch": 0.41935856022150436,
      "grad_norm": 0.3061620891094208,
      "learning_rate": 0.0001440429140982296,
      "loss": 0.9766,
      "step": 7270
    },
    {
      "epoch": 0.41964697738809414,
      "grad_norm": 0.30755650997161865,
      "learning_rate": 0.00014395250330890113,
      "loss": 0.9463,
      "step": 7275
    },
    {
      "epoch": 0.41993539455468387,
      "grad_norm": 0.3155378997325897,
      "learning_rate": 0.000143862047968026,
      "loss": 0.9407,
      "step": 7280
    },
    {
      "epoch": 0.42022381172127365,
      "grad_norm": 0.3173658847808838,
      "learning_rate": 0.00014377154816729246,
      "loss": 0.9581,
      "step": 7285
    },
    {
      "epoch": 0.4205122288878634,
      "grad_norm": 0.32038891315460205,
      "learning_rate": 0.00014368100399843366,
      "loss": 1.0172,
      "step": 7290
    },
    {
      "epoch": 0.42080064605445316,
      "grad_norm": 0.31820791959762573,
      "learning_rate": 0.0001435904155532279,
      "loss": 0.9354,
      "step": 7295
    },
    {
      "epoch": 0.4210890632210429,
      "grad_norm": 0.3380236029624939,
      "learning_rate": 0.00014349978292349825,
      "loss": 0.98,
      "step": 7300
    },
    {
      "epoch": 0.4213774803876327,
      "grad_norm": 0.28636041283607483,
      "learning_rate": 0.00014340910620111265,
      "loss": 1.0169,
      "step": 7305
    },
    {
      "epoch": 0.4216658975542224,
      "grad_norm": 0.27922528982162476,
      "learning_rate": 0.0001433183854779836,
      "loss": 0.9598,
      "step": 7310
    },
    {
      "epoch": 0.4219543147208122,
      "grad_norm": 0.2958151698112488,
      "learning_rate": 0.00014322762084606843,
      "loss": 0.9574,
      "step": 7315
    },
    {
      "epoch": 0.4222427318874019,
      "grad_norm": 0.322035551071167,
      "learning_rate": 0.00014313681239736865,
      "loss": 0.9757,
      "step": 7320
    },
    {
      "epoch": 0.4225311490539917,
      "grad_norm": 0.3313840627670288,
      "learning_rate": 0.00014304596022393052,
      "loss": 0.9766,
      "step": 7325
    },
    {
      "epoch": 0.4228195662205814,
      "grad_norm": 0.32730937004089355,
      "learning_rate": 0.00014295506441784435,
      "loss": 0.9779,
      "step": 7330
    },
    {
      "epoch": 0.4231079833871712,
      "grad_norm": 0.3087776005268097,
      "learning_rate": 0.0001428641250712449,
      "loss": 0.951,
      "step": 7335
    },
    {
      "epoch": 0.423396400553761,
      "grad_norm": 0.28657153248786926,
      "learning_rate": 0.00014277314227631086,
      "loss": 1.0397,
      "step": 7340
    },
    {
      "epoch": 0.4236848177203507,
      "grad_norm": 0.3487570881843567,
      "learning_rate": 0.00014268211612526515,
      "loss": 1.0699,
      "step": 7345
    },
    {
      "epoch": 0.4239732348869405,
      "grad_norm": 0.31562715768814087,
      "learning_rate": 0.00014259104671037452,
      "loss": 0.9947,
      "step": 7350
    },
    {
      "epoch": 0.4242616520535302,
      "grad_norm": 0.3040665090084076,
      "learning_rate": 0.00014249993412394958,
      "loss": 0.9742,
      "step": 7355
    },
    {
      "epoch": 0.42455006922012,
      "grad_norm": 0.28409066796302795,
      "learning_rate": 0.00014240877845834472,
      "loss": 0.9224,
      "step": 7360
    },
    {
      "epoch": 0.42483848638670973,
      "grad_norm": 0.3323284089565277,
      "learning_rate": 0.00014231757980595803,
      "loss": 0.8986,
      "step": 7365
    },
    {
      "epoch": 0.4251269035532995,
      "grad_norm": 0.33217912912368774,
      "learning_rate": 0.00014222633825923108,
      "loss": 0.9937,
      "step": 7370
    },
    {
      "epoch": 0.42541532071988925,
      "grad_norm": 0.31237930059432983,
      "learning_rate": 0.00014213505391064905,
      "loss": 1.0209,
      "step": 7375
    },
    {
      "epoch": 0.42570373788647903,
      "grad_norm": 0.33656054735183716,
      "learning_rate": 0.00014204372685274039,
      "loss": 1.0028,
      "step": 7380
    },
    {
      "epoch": 0.42599215505306876,
      "grad_norm": 0.31092768907546997,
      "learning_rate": 0.00014195235717807687,
      "loss": 0.9364,
      "step": 7385
    },
    {
      "epoch": 0.42628057221965854,
      "grad_norm": 0.3244108557701111,
      "learning_rate": 0.00014186094497927352,
      "loss": 1.0035,
      "step": 7390
    },
    {
      "epoch": 0.42656898938624827,
      "grad_norm": 0.31575846672058105,
      "learning_rate": 0.0001417694903489884,
      "loss": 0.9241,
      "step": 7395
    },
    {
      "epoch": 0.42685740655283805,
      "grad_norm": 0.2925664782524109,
      "learning_rate": 0.00014167799337992258,
      "loss": 1.0251,
      "step": 7400
    },
    {
      "epoch": 0.4271458237194278,
      "grad_norm": 0.31504228711128235,
      "learning_rate": 0.00014158645416482011,
      "loss": 0.9347,
      "step": 7405
    },
    {
      "epoch": 0.42743424088601756,
      "grad_norm": 0.2886911928653717,
      "learning_rate": 0.00014149487279646781,
      "loss": 0.903,
      "step": 7410
    },
    {
      "epoch": 0.4277226580526073,
      "grad_norm": 0.3230822682380676,
      "learning_rate": 0.00014140324936769524,
      "loss": 0.9672,
      "step": 7415
    },
    {
      "epoch": 0.42801107521919707,
      "grad_norm": 0.29172009229660034,
      "learning_rate": 0.00014131158397137462,
      "loss": 0.9792,
      "step": 7420
    },
    {
      "epoch": 0.4282994923857868,
      "grad_norm": 0.2988271415233612,
      "learning_rate": 0.00014121987670042064,
      "loss": 0.9907,
      "step": 7425
    },
    {
      "epoch": 0.4285879095523766,
      "grad_norm": 0.3402508795261383,
      "learning_rate": 0.00014112812764779053,
      "loss": 0.9922,
      "step": 7430
    },
    {
      "epoch": 0.4288763267189663,
      "grad_norm": 0.3000766932964325,
      "learning_rate": 0.00014103633690648376,
      "loss": 1.009,
      "step": 7435
    },
    {
      "epoch": 0.4291647438855561,
      "grad_norm": 0.287803590297699,
      "learning_rate": 0.00014094450456954218,
      "loss": 0.9878,
      "step": 7440
    },
    {
      "epoch": 0.4294531610521458,
      "grad_norm": 0.31101298332214355,
      "learning_rate": 0.00014085263073004972,
      "loss": 0.9197,
      "step": 7445
    },
    {
      "epoch": 0.4297415782187356,
      "grad_norm": 0.30686867237091064,
      "learning_rate": 0.00014076071548113238,
      "loss": 1.0046,
      "step": 7450
    },
    {
      "epoch": 0.4300299953853253,
      "grad_norm": 0.32540297508239746,
      "learning_rate": 0.00014066875891595811,
      "loss": 0.9943,
      "step": 7455
    },
    {
      "epoch": 0.4303184125519151,
      "grad_norm": 0.32309621572494507,
      "learning_rate": 0.0001405767611277369,
      "loss": 0.9379,
      "step": 7460
    },
    {
      "epoch": 0.43060682971850484,
      "grad_norm": 0.30645352602005005,
      "learning_rate": 0.0001404847222097203,
      "loss": 0.9759,
      "step": 7465
    },
    {
      "epoch": 0.4308952468850946,
      "grad_norm": 0.3069199025630951,
      "learning_rate": 0.00014039264225520175,
      "loss": 0.9785,
      "step": 7470
    },
    {
      "epoch": 0.43118366405168435,
      "grad_norm": 0.30918145179748535,
      "learning_rate": 0.00014030052135751613,
      "loss": 0.954,
      "step": 7475
    },
    {
      "epoch": 0.43147208121827413,
      "grad_norm": 0.32399114966392517,
      "learning_rate": 0.0001402083596100399,
      "loss": 1.0961,
      "step": 7480
    },
    {
      "epoch": 0.43176049838486386,
      "grad_norm": 0.33643585443496704,
      "learning_rate": 0.00014011615710619085,
      "loss": 1.0204,
      "step": 7485
    },
    {
      "epoch": 0.43204891555145364,
      "grad_norm": 0.32877838611602783,
      "learning_rate": 0.00014002391393942826,
      "loss": 0.97,
      "step": 7490
    },
    {
      "epoch": 0.43233733271804337,
      "grad_norm": 0.31330958008766174,
      "learning_rate": 0.00013993163020325242,
      "loss": 0.9539,
      "step": 7495
    },
    {
      "epoch": 0.43262574988463315,
      "grad_norm": 0.33290615677833557,
      "learning_rate": 0.00013983930599120487,
      "loss": 1.0575,
      "step": 7500
    },
    {
      "epoch": 0.4329141670512229,
      "grad_norm": 0.2965332567691803,
      "learning_rate": 0.00013974694139686812,
      "loss": 0.9635,
      "step": 7505
    },
    {
      "epoch": 0.43320258421781266,
      "grad_norm": 0.3181534707546234,
      "learning_rate": 0.0001396545365138657,
      "loss": 1.1166,
      "step": 7510
    },
    {
      "epoch": 0.4334910013844024,
      "grad_norm": 0.31753942370414734,
      "learning_rate": 0.00013956209143586181,
      "loss": 1.0099,
      "step": 7515
    },
    {
      "epoch": 0.43377941855099217,
      "grad_norm": 0.31996965408325195,
      "learning_rate": 0.00013946960625656153,
      "loss": 0.9755,
      "step": 7520
    },
    {
      "epoch": 0.4340678357175819,
      "grad_norm": 0.31612056493759155,
      "learning_rate": 0.00013937708106971056,
      "loss": 1.0283,
      "step": 7525
    },
    {
      "epoch": 0.4343562528841717,
      "grad_norm": 0.3391875922679901,
      "learning_rate": 0.00013928451596909516,
      "loss": 0.9188,
      "step": 7530
    },
    {
      "epoch": 0.4346446700507614,
      "grad_norm": 0.323889821767807,
      "learning_rate": 0.00013919191104854196,
      "loss": 0.9728,
      "step": 7535
    },
    {
      "epoch": 0.4349330872173512,
      "grad_norm": 0.3123781085014343,
      "learning_rate": 0.00013909926640191813,
      "loss": 1.0161,
      "step": 7540
    },
    {
      "epoch": 0.4352215043839409,
      "grad_norm": 0.3935023844242096,
      "learning_rate": 0.00013900658212313093,
      "loss": 1.0055,
      "step": 7545
    },
    {
      "epoch": 0.4355099215505307,
      "grad_norm": 0.30142349004745483,
      "learning_rate": 0.0001389138583061279,
      "loss": 1.0241,
      "step": 7550
    },
    {
      "epoch": 0.4357983387171204,
      "grad_norm": 0.3473080098628998,
      "learning_rate": 0.00013882109504489659,
      "loss": 0.911,
      "step": 7555
    },
    {
      "epoch": 0.4360867558837102,
      "grad_norm": 0.2962372601032257,
      "learning_rate": 0.00013872829243346453,
      "loss": 0.9448,
      "step": 7560
    },
    {
      "epoch": 0.43637517305029994,
      "grad_norm": 0.2840719521045685,
      "learning_rate": 0.00013863545056589925,
      "loss": 0.9958,
      "step": 7565
    },
    {
      "epoch": 0.4366635902168897,
      "grad_norm": 0.3180067837238312,
      "learning_rate": 0.00013854256953630797,
      "loss": 0.991,
      "step": 7570
    },
    {
      "epoch": 0.43695200738347945,
      "grad_norm": 0.37251415848731995,
      "learning_rate": 0.0001384496494388376,
      "loss": 0.9553,
      "step": 7575
    },
    {
      "epoch": 0.43724042455006923,
      "grad_norm": 0.36039602756500244,
      "learning_rate": 0.00013835669036767466,
      "loss": 0.9297,
      "step": 7580
    },
    {
      "epoch": 0.43752884171665896,
      "grad_norm": 0.30950820446014404,
      "learning_rate": 0.00013826369241704524,
      "loss": 0.9534,
      "step": 7585
    },
    {
      "epoch": 0.43781725888324874,
      "grad_norm": 0.3068082928657532,
      "learning_rate": 0.00013817065568121477,
      "loss": 0.9184,
      "step": 7590
    },
    {
      "epoch": 0.43810567604983847,
      "grad_norm": 0.30188676714897156,
      "learning_rate": 0.00013807758025448803,
      "loss": 0.9726,
      "step": 7595
    },
    {
      "epoch": 0.43839409321642825,
      "grad_norm": 0.290171355009079,
      "learning_rate": 0.00013798446623120893,
      "loss": 0.9274,
      "step": 7600
    },
    {
      "epoch": 0.438682510383018,
      "grad_norm": 0.3495076894760132,
      "learning_rate": 0.0001378913137057607,
      "loss": 0.9428,
      "step": 7605
    },
    {
      "epoch": 0.43897092754960776,
      "grad_norm": 0.313632994890213,
      "learning_rate": 0.00013779812277256537,
      "loss": 0.9952,
      "step": 7610
    },
    {
      "epoch": 0.4392593447161975,
      "grad_norm": 0.3400636911392212,
      "learning_rate": 0.00013770489352608404,
      "loss": 0.917,
      "step": 7615
    },
    {
      "epoch": 0.43954776188278727,
      "grad_norm": 0.33605697751045227,
      "learning_rate": 0.0001376116260608166,
      "loss": 0.9214,
      "step": 7620
    },
    {
      "epoch": 0.439836179049377,
      "grad_norm": 0.2898849546909332,
      "learning_rate": 0.0001375183204713017,
      "loss": 0.9064,
      "step": 7625
    },
    {
      "epoch": 0.4401245962159668,
      "grad_norm": 0.3333107829093933,
      "learning_rate": 0.0001374249768521166,
      "loss": 1.067,
      "step": 7630
    },
    {
      "epoch": 0.4404130133825565,
      "grad_norm": 0.3403453528881073,
      "learning_rate": 0.00013733159529787719,
      "loss": 0.9259,
      "step": 7635
    },
    {
      "epoch": 0.4407014305491463,
      "grad_norm": 0.294716477394104,
      "learning_rate": 0.0001372381759032377,
      "loss": 0.9791,
      "step": 7640
    },
    {
      "epoch": 0.440989847715736,
      "grad_norm": 0.3185860514640808,
      "learning_rate": 0.00013714471876289075,
      "loss": 0.9606,
      "step": 7645
    },
    {
      "epoch": 0.4412782648823258,
      "grad_norm": 0.2923915982246399,
      "learning_rate": 0.00013705122397156727,
      "loss": 0.9309,
      "step": 7650
    },
    {
      "epoch": 0.4415666820489155,
      "grad_norm": 0.32446566224098206,
      "learning_rate": 0.00013695769162403633,
      "loss": 0.9541,
      "step": 7655
    },
    {
      "epoch": 0.4418550992155053,
      "grad_norm": 0.3201291859149933,
      "learning_rate": 0.00013686412181510504,
      "loss": 0.9672,
      "step": 7660
    },
    {
      "epoch": 0.44214351638209504,
      "grad_norm": 0.2882368266582489,
      "learning_rate": 0.00013677051463961855,
      "loss": 0.94,
      "step": 7665
    },
    {
      "epoch": 0.4424319335486848,
      "grad_norm": 0.31610748171806335,
      "learning_rate": 0.0001366768701924598,
      "loss": 0.9958,
      "step": 7670
    },
    {
      "epoch": 0.44272035071527455,
      "grad_norm": 0.29429611563682556,
      "learning_rate": 0.00013658318856854955,
      "loss": 0.9828,
      "step": 7675
    },
    {
      "epoch": 0.44300876788186433,
      "grad_norm": 0.2986331284046173,
      "learning_rate": 0.0001364894698628462,
      "loss": 0.981,
      "step": 7680
    },
    {
      "epoch": 0.44329718504845406,
      "grad_norm": 0.3160644471645355,
      "learning_rate": 0.0001363957141703459,
      "loss": 1.0728,
      "step": 7685
    },
    {
      "epoch": 0.44358560221504384,
      "grad_norm": 0.3175942897796631,
      "learning_rate": 0.00013630192158608202,
      "loss": 0.953,
      "step": 7690
    },
    {
      "epoch": 0.4438740193816336,
      "grad_norm": 0.30581793189048767,
      "learning_rate": 0.00013620809220512558,
      "loss": 0.9689,
      "step": 7695
    },
    {
      "epoch": 0.44416243654822335,
      "grad_norm": 0.30399492383003235,
      "learning_rate": 0.00013611422612258477,
      "loss": 1.0377,
      "step": 7700
    },
    {
      "epoch": 0.44445085371481313,
      "grad_norm": 0.35260793566703796,
      "learning_rate": 0.00013602032343360497,
      "loss": 0.9977,
      "step": 7705
    },
    {
      "epoch": 0.44473927088140286,
      "grad_norm": 0.3687379062175751,
      "learning_rate": 0.00013592638423336875,
      "loss": 0.9886,
      "step": 7710
    },
    {
      "epoch": 0.44502768804799264,
      "grad_norm": 0.30002057552337646,
      "learning_rate": 0.00013583240861709563,
      "loss": 0.9398,
      "step": 7715
    },
    {
      "epoch": 0.44531610521458237,
      "grad_norm": 0.2873061001300812,
      "learning_rate": 0.00013573839668004202,
      "loss": 0.8944,
      "step": 7720
    },
    {
      "epoch": 0.44560452238117215,
      "grad_norm": 0.3006219267845154,
      "learning_rate": 0.00013564434851750119,
      "loss": 1.0028,
      "step": 7725
    },
    {
      "epoch": 0.4458929395477619,
      "grad_norm": 0.2903684973716736,
      "learning_rate": 0.00013555026422480313,
      "loss": 0.9164,
      "step": 7730
    },
    {
      "epoch": 0.44618135671435166,
      "grad_norm": 0.33015668392181396,
      "learning_rate": 0.00013545614389731442,
      "loss": 1.0059,
      "step": 7735
    },
    {
      "epoch": 0.4464697738809414,
      "grad_norm": 0.31042858958244324,
      "learning_rate": 0.00013536198763043823,
      "loss": 0.8928,
      "step": 7740
    },
    {
      "epoch": 0.4467581910475312,
      "grad_norm": 0.3223642408847809,
      "learning_rate": 0.00013526779551961403,
      "loss": 1.021,
      "step": 7745
    },
    {
      "epoch": 0.4470466082141209,
      "grad_norm": 0.3327777087688446,
      "learning_rate": 0.00013517356766031777,
      "loss": 0.9867,
      "step": 7750
    },
    {
      "epoch": 0.4473350253807107,
      "grad_norm": 0.28827667236328125,
      "learning_rate": 0.00013507930414806153,
      "loss": 0.9634,
      "step": 7755
    },
    {
      "epoch": 0.4476234425473004,
      "grad_norm": 0.3253006041049957,
      "learning_rate": 0.00013498500507839363,
      "loss": 1.0064,
      "step": 7760
    },
    {
      "epoch": 0.4479118597138902,
      "grad_norm": 0.3410976231098175,
      "learning_rate": 0.00013489067054689834,
      "loss": 1.0264,
      "step": 7765
    },
    {
      "epoch": 0.4482002768804799,
      "grad_norm": 0.3115012049674988,
      "learning_rate": 0.00013479630064919593,
      "loss": 0.9865,
      "step": 7770
    },
    {
      "epoch": 0.4484886940470697,
      "grad_norm": 0.33605247735977173,
      "learning_rate": 0.00013470189548094242,
      "loss": 0.9313,
      "step": 7775
    },
    {
      "epoch": 0.44877711121365943,
      "grad_norm": 0.32948291301727295,
      "learning_rate": 0.00013460745513782976,
      "loss": 0.9301,
      "step": 7780
    },
    {
      "epoch": 0.4490655283802492,
      "grad_norm": 0.2822820842266083,
      "learning_rate": 0.0001345129797155854,
      "loss": 0.987,
      "step": 7785
    },
    {
      "epoch": 0.44935394554683894,
      "grad_norm": 0.3184073567390442,
      "learning_rate": 0.0001344184693099724,
      "loss": 0.9474,
      "step": 7790
    },
    {
      "epoch": 0.4496423627134287,
      "grad_norm": 0.30305254459381104,
      "learning_rate": 0.0001343239240167893,
      "loss": 0.9738,
      "step": 7795
    },
    {
      "epoch": 0.44993077988001845,
      "grad_norm": 0.2888820171356201,
      "learning_rate": 0.00013422934393186994,
      "loss": 0.9428,
      "step": 7800
    },
    {
      "epoch": 0.45021919704660823,
      "grad_norm": 0.30135294795036316,
      "learning_rate": 0.0001341347291510835,
      "loss": 0.9629,
      "step": 7805
    },
    {
      "epoch": 0.45050761421319796,
      "grad_norm": 0.3297244608402252,
      "learning_rate": 0.0001340400797703343,
      "loss": 0.9554,
      "step": 7810
    },
    {
      "epoch": 0.45079603137978774,
      "grad_norm": 0.31398284435272217,
      "learning_rate": 0.0001339453958855617,
      "loss": 0.9741,
      "step": 7815
    },
    {
      "epoch": 0.45108444854637747,
      "grad_norm": 0.30296990275382996,
      "learning_rate": 0.00013385067759274014,
      "loss": 0.917,
      "step": 7820
    },
    {
      "epoch": 0.45137286571296725,
      "grad_norm": 0.3051854968070984,
      "learning_rate": 0.00013375592498787871,
      "loss": 1.0307,
      "step": 7825
    },
    {
      "epoch": 0.451661282879557,
      "grad_norm": 0.3125559687614441,
      "learning_rate": 0.00013366113816702164,
      "loss": 1.0156,
      "step": 7830
    },
    {
      "epoch": 0.45194970004614676,
      "grad_norm": 0.3818068504333496,
      "learning_rate": 0.00013356631722624744,
      "loss": 1.0099,
      "step": 7835
    },
    {
      "epoch": 0.4522381172127365,
      "grad_norm": 0.3067907989025116,
      "learning_rate": 0.0001334714622616695,
      "loss": 0.9715,
      "step": 7840
    },
    {
      "epoch": 0.4525265343793263,
      "grad_norm": 0.31763195991516113,
      "learning_rate": 0.00013337657336943555,
      "loss": 0.903,
      "step": 7845
    },
    {
      "epoch": 0.452814951545916,
      "grad_norm": 0.296339213848114,
      "learning_rate": 0.0001332816506457278,
      "loss": 0.9954,
      "step": 7850
    },
    {
      "epoch": 0.4531033687125058,
      "grad_norm": 0.29348504543304443,
      "learning_rate": 0.00013318669418676266,
      "loss": 0.9327,
      "step": 7855
    },
    {
      "epoch": 0.4533917858790955,
      "grad_norm": 0.3079511225223541,
      "learning_rate": 0.0001330917040887908,
      "loss": 1.0196,
      "step": 7860
    },
    {
      "epoch": 0.4536802030456853,
      "grad_norm": 0.29583096504211426,
      "learning_rate": 0.000132996680448097,
      "loss": 0.987,
      "step": 7865
    },
    {
      "epoch": 0.453968620212275,
      "grad_norm": 0.3352641761302948,
      "learning_rate": 0.00013290162336099996,
      "loss": 0.9933,
      "step": 7870
    },
    {
      "epoch": 0.4542570373788648,
      "grad_norm": 0.32410529255867004,
      "learning_rate": 0.00013280653292385233,
      "loss": 0.9673,
      "step": 7875
    },
    {
      "epoch": 0.45454545454545453,
      "grad_norm": 0.3224494755268097,
      "learning_rate": 0.00013271140923304064,
      "loss": 0.9507,
      "step": 7880
    },
    {
      "epoch": 0.4548338717120443,
      "grad_norm": 0.2842070460319519,
      "learning_rate": 0.00013261625238498496,
      "loss": 0.9414,
      "step": 7885
    },
    {
      "epoch": 0.45512228887863404,
      "grad_norm": 0.31733888387680054,
      "learning_rate": 0.00013252106247613914,
      "loss": 1.013,
      "step": 7890
    },
    {
      "epoch": 0.4554107060452238,
      "grad_norm": 0.33846980333328247,
      "learning_rate": 0.0001324258396029904,
      "loss": 0.964,
      "step": 7895
    },
    {
      "epoch": 0.45569912321181355,
      "grad_norm": 0.29883426427841187,
      "learning_rate": 0.00013233058386205948,
      "loss": 0.9998,
      "step": 7900
    },
    {
      "epoch": 0.45598754037840333,
      "grad_norm": 0.3294544219970703,
      "learning_rate": 0.0001322352953499004,
      "loss": 0.9731,
      "step": 7905
    },
    {
      "epoch": 0.45627595754499306,
      "grad_norm": 0.4528438150882721,
      "learning_rate": 0.00013213997416310034,
      "loss": 1.0049,
      "step": 7910
    },
    {
      "epoch": 0.45656437471158284,
      "grad_norm": 0.30099087953567505,
      "learning_rate": 0.0001320446203982797,
      "loss": 0.9577,
      "step": 7915
    },
    {
      "epoch": 0.45685279187817257,
      "grad_norm": 0.32508721947669983,
      "learning_rate": 0.00013194923415209183,
      "loss": 1.0045,
      "step": 7920
    },
    {
      "epoch": 0.45714120904476235,
      "grad_norm": 0.29300785064697266,
      "learning_rate": 0.00013185381552122303,
      "loss": 0.9194,
      "step": 7925
    },
    {
      "epoch": 0.4574296262113521,
      "grad_norm": 0.306942343711853,
      "learning_rate": 0.00013175836460239243,
      "loss": 0.9877,
      "step": 7930
    },
    {
      "epoch": 0.45771804337794186,
      "grad_norm": 0.3295309841632843,
      "learning_rate": 0.00013166288149235188,
      "loss": 0.9917,
      "step": 7935
    },
    {
      "epoch": 0.4580064605445316,
      "grad_norm": 0.5229211449623108,
      "learning_rate": 0.00013156736628788584,
      "loss": 0.9927,
      "step": 7940
    },
    {
      "epoch": 0.4582948777111214,
      "grad_norm": 0.3033890128135681,
      "learning_rate": 0.00013147181908581136,
      "loss": 0.9158,
      "step": 7945
    },
    {
      "epoch": 0.4585832948777111,
      "grad_norm": 0.3171187937259674,
      "learning_rate": 0.00013137623998297785,
      "loss": 0.9485,
      "step": 7950
    },
    {
      "epoch": 0.4588717120443009,
      "grad_norm": 0.31747764348983765,
      "learning_rate": 0.00013128062907626718,
      "loss": 0.9021,
      "step": 7955
    },
    {
      "epoch": 0.4591601292108906,
      "grad_norm": 0.3300734758377075,
      "learning_rate": 0.00013118498646259323,
      "loss": 0.9613,
      "step": 7960
    },
    {
      "epoch": 0.4594485463774804,
      "grad_norm": 0.31003138422966003,
      "learning_rate": 0.00013108931223890225,
      "loss": 0.9745,
      "step": 7965
    },
    {
      "epoch": 0.4597369635440701,
      "grad_norm": 0.2931799590587616,
      "learning_rate": 0.0001309936065021724,
      "loss": 0.9516,
      "step": 7970
    },
    {
      "epoch": 0.4600253807106599,
      "grad_norm": 0.3191987872123718,
      "learning_rate": 0.00013089786934941387,
      "loss": 0.9241,
      "step": 7975
    },
    {
      "epoch": 0.46031379787724963,
      "grad_norm": 0.28786250948905945,
      "learning_rate": 0.0001308021008776686,
      "loss": 0.9766,
      "step": 7980
    },
    {
      "epoch": 0.4606022150438394,
      "grad_norm": 0.30729734897613525,
      "learning_rate": 0.0001307063011840103,
      "loss": 1.004,
      "step": 7985
    },
    {
      "epoch": 0.46089063221042914,
      "grad_norm": 0.3125825822353363,
      "learning_rate": 0.00013061047036554444,
      "loss": 1.0104,
      "step": 7990
    },
    {
      "epoch": 0.4611790493770189,
      "grad_norm": 0.3065352141857147,
      "learning_rate": 0.0001305146085194079,
      "loss": 0.9858,
      "step": 7995
    },
    {
      "epoch": 0.46146746654360865,
      "grad_norm": 0.2968180477619171,
      "learning_rate": 0.00013041871574276905,
      "loss": 1.0341,
      "step": 8000
    },
    {
      "epoch": 0.46175588371019843,
      "grad_norm": 0.3416571021080017,
      "learning_rate": 0.0001303227921328276,
      "loss": 0.9738,
      "step": 8005
    },
    {
      "epoch": 0.46204430087678816,
      "grad_norm": 0.31280043721199036,
      "learning_rate": 0.00013022683778681458,
      "loss": 0.9728,
      "step": 8010
    },
    {
      "epoch": 0.46233271804337794,
      "grad_norm": 0.333219975233078,
      "learning_rate": 0.00013013085280199214,
      "loss": 0.9559,
      "step": 8015
    },
    {
      "epoch": 0.46262113520996767,
      "grad_norm": 0.3534589111804962,
      "learning_rate": 0.00013003483727565344,
      "loss": 0.9473,
      "step": 8020
    },
    {
      "epoch": 0.46290955237655745,
      "grad_norm": 0.312299519777298,
      "learning_rate": 0.00012993879130512263,
      "loss": 0.968,
      "step": 8025
    },
    {
      "epoch": 0.4631979695431472,
      "grad_norm": 0.304463267326355,
      "learning_rate": 0.00012984271498775473,
      "loss": 0.9619,
      "step": 8030
    },
    {
      "epoch": 0.46348638670973696,
      "grad_norm": 0.27623772621154785,
      "learning_rate": 0.00012974660842093554,
      "loss": 0.9282,
      "step": 8035
    },
    {
      "epoch": 0.46377480387632675,
      "grad_norm": 0.3356596529483795,
      "learning_rate": 0.00012965047170208145,
      "loss": 0.9826,
      "step": 8040
    },
    {
      "epoch": 0.4640632210429165,
      "grad_norm": 0.34293317794799805,
      "learning_rate": 0.00012955430492863948,
      "loss": 1.0262,
      "step": 8045
    },
    {
      "epoch": 0.46435163820950626,
      "grad_norm": 0.3161788582801819,
      "learning_rate": 0.00012945810819808715,
      "loss": 1.019,
      "step": 8050
    },
    {
      "epoch": 0.464640055376096,
      "grad_norm": 0.31269657611846924,
      "learning_rate": 0.00012936188160793218,
      "loss": 0.9221,
      "step": 8055
    },
    {
      "epoch": 0.46492847254268577,
      "grad_norm": 0.3549450933933258,
      "learning_rate": 0.00012926562525571273,
      "loss": 1.0017,
      "step": 8060
    },
    {
      "epoch": 0.4652168897092755,
      "grad_norm": 0.3124108910560608,
      "learning_rate": 0.00012916933923899702,
      "loss": 0.9945,
      "step": 8065
    },
    {
      "epoch": 0.4655053068758653,
      "grad_norm": 0.3089344799518585,
      "learning_rate": 0.00012907302365538348,
      "loss": 1.0043,
      "step": 8070
    },
    {
      "epoch": 0.465793724042455,
      "grad_norm": 0.3016572594642639,
      "learning_rate": 0.00012897667860250028,
      "loss": 0.932,
      "step": 8075
    },
    {
      "epoch": 0.4660821412090448,
      "grad_norm": 0.2821403741836548,
      "learning_rate": 0.0001288803041780057,
      "loss": 0.8898,
      "step": 8080
    },
    {
      "epoch": 0.4663705583756345,
      "grad_norm": 0.3156525790691376,
      "learning_rate": 0.00012878390047958761,
      "loss": 0.9543,
      "step": 8085
    },
    {
      "epoch": 0.4666589755422243,
      "grad_norm": 0.2793186902999878,
      "learning_rate": 0.0001286874676049637,
      "loss": 0.936,
      "step": 8090
    },
    {
      "epoch": 0.466947392708814,
      "grad_norm": 0.3266434669494629,
      "learning_rate": 0.00012859100565188104,
      "loss": 1.0449,
      "step": 8095
    },
    {
      "epoch": 0.4672358098754038,
      "grad_norm": 0.31635022163391113,
      "learning_rate": 0.00012849451471811643,
      "loss": 1.0126,
      "step": 8100
    },
    {
      "epoch": 0.46752422704199353,
      "grad_norm": 0.3141334652900696,
      "learning_rate": 0.0001283979949014758,
      "loss": 0.9654,
      "step": 8105
    },
    {
      "epoch": 0.4678126442085833,
      "grad_norm": 0.31797635555267334,
      "learning_rate": 0.00012830144629979456,
      "loss": 0.9323,
      "step": 8110
    },
    {
      "epoch": 0.46810106137517304,
      "grad_norm": 0.32226845622062683,
      "learning_rate": 0.00012820486901093717,
      "loss": 0.9622,
      "step": 8115
    },
    {
      "epoch": 0.4683894785417628,
      "grad_norm": 0.3159628212451935,
      "learning_rate": 0.00012810826313279717,
      "loss": 0.9863,
      "step": 8120
    },
    {
      "epoch": 0.46867789570835255,
      "grad_norm": 0.3085826337337494,
      "learning_rate": 0.00012801162876329713,
      "loss": 1.0229,
      "step": 8125
    },
    {
      "epoch": 0.46896631287494234,
      "grad_norm": 0.32204851508140564,
      "learning_rate": 0.00012791496600038854,
      "loss": 0.9969,
      "step": 8130
    },
    {
      "epoch": 0.46925473004153206,
      "grad_norm": 0.3067418038845062,
      "learning_rate": 0.00012781827494205147,
      "loss": 0.9431,
      "step": 8135
    },
    {
      "epoch": 0.46954314720812185,
      "grad_norm": 0.31123608350753784,
      "learning_rate": 0.00012772155568629499,
      "loss": 0.9736,
      "step": 8140
    },
    {
      "epoch": 0.4698315643747116,
      "grad_norm": 0.31212547421455383,
      "learning_rate": 0.00012762480833115644,
      "loss": 0.9642,
      "step": 8145
    },
    {
      "epoch": 0.47011998154130136,
      "grad_norm": 0.28204548358917236,
      "learning_rate": 0.00012752803297470187,
      "loss": 1.004,
      "step": 8150
    },
    {
      "epoch": 0.4704083987078911,
      "grad_norm": 0.3246347904205322,
      "learning_rate": 0.00012743122971502555,
      "loss": 0.9538,
      "step": 8155
    },
    {
      "epoch": 0.47069681587448087,
      "grad_norm": 0.3177697956562042,
      "learning_rate": 0.00012733439865025012,
      "loss": 0.9978,
      "step": 8160
    },
    {
      "epoch": 0.4709852330410706,
      "grad_norm": 0.32368218898773193,
      "learning_rate": 0.0001272375398785264,
      "loss": 0.995,
      "step": 8165
    },
    {
      "epoch": 0.4712736502076604,
      "grad_norm": 0.34451228380203247,
      "learning_rate": 0.0001271406534980333,
      "loss": 1.0219,
      "step": 8170
    },
    {
      "epoch": 0.4715620673742501,
      "grad_norm": 0.3153943121433258,
      "learning_rate": 0.00012704373960697766,
      "loss": 1.0028,
      "step": 8175
    },
    {
      "epoch": 0.4718504845408399,
      "grad_norm": 0.3561108112335205,
      "learning_rate": 0.0001269467983035943,
      "loss": 0.9848,
      "step": 8180
    },
    {
      "epoch": 0.4721389017074296,
      "grad_norm": 0.3053421080112457,
      "learning_rate": 0.00012684982968614567,
      "loss": 0.9592,
      "step": 8185
    },
    {
      "epoch": 0.4724273188740194,
      "grad_norm": 0.31110382080078125,
      "learning_rate": 0.00012675283385292212,
      "loss": 0.9648,
      "step": 8190
    },
    {
      "epoch": 0.4727157360406091,
      "grad_norm": 0.34353601932525635,
      "learning_rate": 0.00012665581090224136,
      "loss": 0.9924,
      "step": 8195
    },
    {
      "epoch": 0.4730041532071989,
      "grad_norm": 0.3134307265281677,
      "learning_rate": 0.00012655876093244878,
      "loss": 0.9842,
      "step": 8200
    },
    {
      "epoch": 0.47329257037378863,
      "grad_norm": 0.3005763590335846,
      "learning_rate": 0.00012646168404191704,
      "loss": 0.883,
      "step": 8205
    },
    {
      "epoch": 0.4735809875403784,
      "grad_norm": 0.31364625692367554,
      "learning_rate": 0.00012636458032904617,
      "loss": 1.0272,
      "step": 8210
    },
    {
      "epoch": 0.47386940470696814,
      "grad_norm": 0.32760295271873474,
      "learning_rate": 0.00012626744989226326,
      "loss": 0.9793,
      "step": 8215
    },
    {
      "epoch": 0.4741578218735579,
      "grad_norm": 0.31284964084625244,
      "learning_rate": 0.00012617029283002265,
      "loss": 0.9602,
      "step": 8220
    },
    {
      "epoch": 0.47444623904014765,
      "grad_norm": 0.28940585255622864,
      "learning_rate": 0.00012607310924080557,
      "loss": 0.9804,
      "step": 8225
    },
    {
      "epoch": 0.47473465620673744,
      "grad_norm": 0.3072567880153656,
      "learning_rate": 0.00012597589922312008,
      "loss": 0.8965,
      "step": 8230
    },
    {
      "epoch": 0.47502307337332716,
      "grad_norm": 0.2848748564720154,
      "learning_rate": 0.0001258786628755012,
      "loss": 0.8763,
      "step": 8235
    },
    {
      "epoch": 0.47531149053991695,
      "grad_norm": 0.31714004278182983,
      "learning_rate": 0.00012578140029651053,
      "loss": 0.9775,
      "step": 8240
    },
    {
      "epoch": 0.4755999077065067,
      "grad_norm": 0.2894527316093445,
      "learning_rate": 0.00012568411158473625,
      "loss": 1.0269,
      "step": 8245
    },
    {
      "epoch": 0.47588832487309646,
      "grad_norm": 0.3150068521499634,
      "learning_rate": 0.00012558679683879301,
      "loss": 1.0003,
      "step": 8250
    },
    {
      "epoch": 0.4761767420396862,
      "grad_norm": 0.3005748987197876,
      "learning_rate": 0.00012548945615732202,
      "loss": 1.0234,
      "step": 8255
    },
    {
      "epoch": 0.47646515920627597,
      "grad_norm": 0.33544933795928955,
      "learning_rate": 0.0001253920896389905,
      "loss": 0.9997,
      "step": 8260
    },
    {
      "epoch": 0.4767535763728657,
      "grad_norm": 0.31992655992507935,
      "learning_rate": 0.00012529469738249208,
      "loss": 0.9854,
      "step": 8265
    },
    {
      "epoch": 0.4770419935394555,
      "grad_norm": 0.3050898611545563,
      "learning_rate": 0.00012519727948654642,
      "loss": 0.9838,
      "step": 8270
    },
    {
      "epoch": 0.4773304107060452,
      "grad_norm": 0.31912678480148315,
      "learning_rate": 0.00012509983604989917,
      "loss": 0.982,
      "step": 8275
    },
    {
      "epoch": 0.477618827872635,
      "grad_norm": 0.3090691566467285,
      "learning_rate": 0.00012500236717132178,
      "loss": 0.9482,
      "step": 8280
    },
    {
      "epoch": 0.4779072450392247,
      "grad_norm": 0.2905727028846741,
      "learning_rate": 0.00012490487294961167,
      "loss": 0.9127,
      "step": 8285
    },
    {
      "epoch": 0.4781956622058145,
      "grad_norm": 0.3192508816719055,
      "learning_rate": 0.0001248073534835917,
      "loss": 0.9899,
      "step": 8290
    },
    {
      "epoch": 0.4784840793724042,
      "grad_norm": 0.29958266019821167,
      "learning_rate": 0.00012470980887211062,
      "loss": 0.9993,
      "step": 8295
    },
    {
      "epoch": 0.478772496538994,
      "grad_norm": 0.3005164861679077,
      "learning_rate": 0.0001246122392140424,
      "loss": 0.9824,
      "step": 8300
    },
    {
      "epoch": 0.47906091370558374,
      "grad_norm": 0.37904489040374756,
      "learning_rate": 0.00012451464460828656,
      "loss": 1.0149,
      "step": 8305
    },
    {
      "epoch": 0.4793493308721735,
      "grad_norm": 0.30801454186439514,
      "learning_rate": 0.00012441702515376786,
      "loss": 1.0023,
      "step": 8310
    },
    {
      "epoch": 0.47963774803876325,
      "grad_norm": 0.3129716217517853,
      "learning_rate": 0.00012431938094943618,
      "loss": 0.9613,
      "step": 8315
    },
    {
      "epoch": 0.47992616520535303,
      "grad_norm": 0.30742931365966797,
      "learning_rate": 0.0001242217120942666,
      "loss": 0.8988,
      "step": 8320
    },
    {
      "epoch": 0.48021458237194276,
      "grad_norm": 0.3507063090801239,
      "learning_rate": 0.00012412401868725913,
      "loss": 0.9813,
      "step": 8325
    },
    {
      "epoch": 0.48050299953853254,
      "grad_norm": 0.28657853603363037,
      "learning_rate": 0.00012402630082743868,
      "loss": 0.9277,
      "step": 8330
    },
    {
      "epoch": 0.48079141670512227,
      "grad_norm": 0.3005477786064148,
      "learning_rate": 0.00012392855861385492,
      "loss": 0.9476,
      "step": 8335
    },
    {
      "epoch": 0.48107983387171205,
      "grad_norm": 0.2988882064819336,
      "learning_rate": 0.00012383079214558227,
      "loss": 0.9513,
      "step": 8340
    },
    {
      "epoch": 0.4813682510383018,
      "grad_norm": 0.31848201155662537,
      "learning_rate": 0.0001237330015217196,
      "loss": 0.9201,
      "step": 8345
    },
    {
      "epoch": 0.48165666820489156,
      "grad_norm": 0.2883610725402832,
      "learning_rate": 0.00012363518684139043,
      "loss": 0.9438,
      "step": 8350
    },
    {
      "epoch": 0.4819450853714813,
      "grad_norm": 0.3046533167362213,
      "learning_rate": 0.0001235373482037426,
      "loss": 0.9581,
      "step": 8355
    },
    {
      "epoch": 0.48223350253807107,
      "grad_norm": 0.3131345510482788,
      "learning_rate": 0.00012343948570794815,
      "loss": 0.977,
      "step": 8360
    },
    {
      "epoch": 0.4825219197046608,
      "grad_norm": 0.3097611665725708,
      "learning_rate": 0.00012334159945320342,
      "loss": 0.944,
      "step": 8365
    },
    {
      "epoch": 0.4828103368712506,
      "grad_norm": 0.3010179102420807,
      "learning_rate": 0.00012324368953872883,
      "loss": 0.9699,
      "step": 8370
    },
    {
      "epoch": 0.4830987540378403,
      "grad_norm": 0.32022956013679504,
      "learning_rate": 0.00012314575606376863,
      "loss": 1.0071,
      "step": 8375
    },
    {
      "epoch": 0.4833871712044301,
      "grad_norm": 0.32526081800460815,
      "learning_rate": 0.00012304779912759118,
      "loss": 0.9378,
      "step": 8380
    },
    {
      "epoch": 0.4836755883710198,
      "grad_norm": 0.32581695914268494,
      "learning_rate": 0.00012294981882948844,
      "loss": 0.9235,
      "step": 8385
    },
    {
      "epoch": 0.4839640055376096,
      "grad_norm": 0.30687832832336426,
      "learning_rate": 0.00012285181526877615,
      "loss": 0.9252,
      "step": 8390
    },
    {
      "epoch": 0.4842524227041994,
      "grad_norm": 0.2729445993900299,
      "learning_rate": 0.00012275378854479358,
      "loss": 0.9161,
      "step": 8395
    },
    {
      "epoch": 0.4845408398707891,
      "grad_norm": 0.3410494029521942,
      "learning_rate": 0.00012265573875690344,
      "loss": 0.983,
      "step": 8400
    },
    {
      "epoch": 0.4848292570373789,
      "grad_norm": 0.3035064935684204,
      "learning_rate": 0.00012255766600449198,
      "loss": 0.934,
      "step": 8405
    },
    {
      "epoch": 0.4851176742039686,
      "grad_norm": 0.30714723467826843,
      "learning_rate": 0.0001224595703869685,
      "loss": 0.8976,
      "step": 8410
    },
    {
      "epoch": 0.4854060913705584,
      "grad_norm": 0.3508041799068451,
      "learning_rate": 0.00012236145200376566,
      "loss": 1.0483,
      "step": 8415
    },
    {
      "epoch": 0.48569450853714813,
      "grad_norm": 0.30225586891174316,
      "learning_rate": 0.0001222633109543392,
      "loss": 0.9137,
      "step": 8420
    },
    {
      "epoch": 0.4859829257037379,
      "grad_norm": 0.32091522216796875,
      "learning_rate": 0.0001221651473381676,
      "loss": 0.944,
      "step": 8425
    },
    {
      "epoch": 0.48627134287032764,
      "grad_norm": 0.3079957962036133,
      "learning_rate": 0.00012206696125475249,
      "loss": 1.0086,
      "step": 8430
    },
    {
      "epoch": 0.4865597600369174,
      "grad_norm": 0.28919950127601624,
      "learning_rate": 0.00012196875280361817,
      "loss": 1.0097,
      "step": 8435
    },
    {
      "epoch": 0.48684817720350715,
      "grad_norm": 0.296323299407959,
      "learning_rate": 0.00012187052208431158,
      "loss": 0.9956,
      "step": 8440
    },
    {
      "epoch": 0.48713659437009693,
      "grad_norm": 0.3021702468395233,
      "learning_rate": 0.00012177226919640223,
      "loss": 1.0361,
      "step": 8445
    },
    {
      "epoch": 0.48742501153668666,
      "grad_norm": 0.3154110908508301,
      "learning_rate": 0.0001216739942394822,
      "loss": 0.9982,
      "step": 8450
    },
    {
      "epoch": 0.48771342870327644,
      "grad_norm": 0.2946849763393402,
      "learning_rate": 0.0001215756973131658,
      "loss": 1.094,
      "step": 8455
    },
    {
      "epoch": 0.48800184586986617,
      "grad_norm": 0.3265274167060852,
      "learning_rate": 0.00012147737851708973,
      "loss": 0.9622,
      "step": 8460
    },
    {
      "epoch": 0.48829026303645595,
      "grad_norm": 0.30080366134643555,
      "learning_rate": 0.00012137903795091276,
      "loss": 0.9274,
      "step": 8465
    },
    {
      "epoch": 0.4885786802030457,
      "grad_norm": 0.3038170337677002,
      "learning_rate": 0.00012128067571431583,
      "loss": 0.9048,
      "step": 8470
    },
    {
      "epoch": 0.48886709736963546,
      "grad_norm": 0.3240787982940674,
      "learning_rate": 0.00012118229190700172,
      "loss": 1.0785,
      "step": 8475
    },
    {
      "epoch": 0.4891555145362252,
      "grad_norm": 0.3066551387310028,
      "learning_rate": 0.00012108388662869519,
      "loss": 1.0428,
      "step": 8480
    },
    {
      "epoch": 0.48944393170281497,
      "grad_norm": 0.3393528461456299,
      "learning_rate": 0.0001209854599791427,
      "loss": 0.94,
      "step": 8485
    },
    {
      "epoch": 0.4897323488694047,
      "grad_norm": 0.31477421522140503,
      "learning_rate": 0.0001208870120581124,
      "loss": 0.9802,
      "step": 8490
    },
    {
      "epoch": 0.4900207660359945,
      "grad_norm": 0.3190643787384033,
      "learning_rate": 0.00012078854296539397,
      "loss": 0.9995,
      "step": 8495
    },
    {
      "epoch": 0.4903091832025842,
      "grad_norm": 0.2997737526893616,
      "learning_rate": 0.00012069005280079862,
      "loss": 0.9562,
      "step": 8500
    },
    {
      "epoch": 0.490597600369174,
      "grad_norm": 0.2990228831768036,
      "learning_rate": 0.0001205915416641588,
      "loss": 0.9139,
      "step": 8505
    },
    {
      "epoch": 0.4908860175357637,
      "grad_norm": 0.3303566575050354,
      "learning_rate": 0.00012049300965532832,
      "loss": 0.9472,
      "step": 8510
    },
    {
      "epoch": 0.4911744347023535,
      "grad_norm": 0.34171241521835327,
      "learning_rate": 0.00012039445687418212,
      "loss": 1.0068,
      "step": 8515
    },
    {
      "epoch": 0.49146285186894323,
      "grad_norm": 0.32005879282951355,
      "learning_rate": 0.00012029588342061621,
      "loss": 0.9828,
      "step": 8520
    },
    {
      "epoch": 0.491751269035533,
      "grad_norm": 0.30151885747909546,
      "learning_rate": 0.00012019728939454748,
      "loss": 1.001,
      "step": 8525
    },
    {
      "epoch": 0.49203968620212274,
      "grad_norm": 0.3093133568763733,
      "learning_rate": 0.00012009867489591377,
      "loss": 0.9059,
      "step": 8530
    },
    {
      "epoch": 0.4923281033687125,
      "grad_norm": 0.3029720187187195,
      "learning_rate": 0.00012000004002467364,
      "loss": 0.9823,
      "step": 8535
    },
    {
      "epoch": 0.49261652053530225,
      "grad_norm": 0.3385627269744873,
      "learning_rate": 0.00011990138488080622,
      "loss": 0.9238,
      "step": 8540
    },
    {
      "epoch": 0.49290493770189203,
      "grad_norm": 0.32065922021865845,
      "learning_rate": 0.00011980270956431135,
      "loss": 0.9322,
      "step": 8545
    },
    {
      "epoch": 0.49319335486848176,
      "grad_norm": 0.3160153031349182,
      "learning_rate": 0.00011970401417520913,
      "loss": 0.9436,
      "step": 8550
    },
    {
      "epoch": 0.49348177203507154,
      "grad_norm": 0.28007084131240845,
      "learning_rate": 0.00011960529881354017,
      "loss": 0.8472,
      "step": 8555
    },
    {
      "epoch": 0.49377018920166127,
      "grad_norm": 0.3082873523235321,
      "learning_rate": 0.00011950656357936525,
      "loss": 0.8565,
      "step": 8560
    },
    {
      "epoch": 0.49405860636825105,
      "grad_norm": 0.2938149571418762,
      "learning_rate": 0.00011940780857276528,
      "loss": 0.9621,
      "step": 8565
    },
    {
      "epoch": 0.4943470235348408,
      "grad_norm": 0.3359840512275696,
      "learning_rate": 0.00011930903389384123,
      "loss": 0.9477,
      "step": 8570
    },
    {
      "epoch": 0.49463544070143056,
      "grad_norm": 0.3250032663345337,
      "learning_rate": 0.00011921023964271403,
      "loss": 0.9795,
      "step": 8575
    },
    {
      "epoch": 0.4949238578680203,
      "grad_norm": 0.3110741078853607,
      "learning_rate": 0.00011911142591952437,
      "loss": 0.955,
      "step": 8580
    },
    {
      "epoch": 0.49521227503461007,
      "grad_norm": 0.29950112104415894,
      "learning_rate": 0.00011901259282443285,
      "loss": 0.9587,
      "step": 8585
    },
    {
      "epoch": 0.4955006922011998,
      "grad_norm": 0.3032659590244293,
      "learning_rate": 0.0001189137404576195,
      "loss": 1.0214,
      "step": 8590
    },
    {
      "epoch": 0.4957891093677896,
      "grad_norm": 0.3259546458721161,
      "learning_rate": 0.00011881486891928404,
      "loss": 1.0018,
      "step": 8595
    },
    {
      "epoch": 0.4960775265343793,
      "grad_norm": 0.3006402850151062,
      "learning_rate": 0.00011871597830964551,
      "loss": 1.0101,
      "step": 8600
    },
    {
      "epoch": 0.4963659437009691,
      "grad_norm": 0.32679426670074463,
      "learning_rate": 0.00011861706872894236,
      "loss": 0.9485,
      "step": 8605
    },
    {
      "epoch": 0.4966543608675588,
      "grad_norm": 0.27988797426223755,
      "learning_rate": 0.00011851814027743223,
      "loss": 0.8054,
      "step": 8610
    },
    {
      "epoch": 0.4969427780341486,
      "grad_norm": 0.2938796579837799,
      "learning_rate": 0.00011841919305539194,
      "loss": 0.9298,
      "step": 8615
    },
    {
      "epoch": 0.49723119520073833,
      "grad_norm": 0.3216566741466522,
      "learning_rate": 0.00011832022716311722,
      "loss": 0.9831,
      "step": 8620
    },
    {
      "epoch": 0.4975196123673281,
      "grad_norm": 0.3104879856109619,
      "learning_rate": 0.0001182212427009229,
      "loss": 0.9916,
      "step": 8625
    },
    {
      "epoch": 0.49780802953391784,
      "grad_norm": 0.33819061517715454,
      "learning_rate": 0.00011812223976914243,
      "loss": 1.0035,
      "step": 8630
    },
    {
      "epoch": 0.4980964467005076,
      "grad_norm": 0.29182150959968567,
      "learning_rate": 0.00011802321846812816,
      "loss": 0.9055,
      "step": 8635
    },
    {
      "epoch": 0.49838486386709735,
      "grad_norm": 0.303713321685791,
      "learning_rate": 0.00011792417889825094,
      "loss": 0.8924,
      "step": 8640
    },
    {
      "epoch": 0.49867328103368713,
      "grad_norm": 0.3153342604637146,
      "learning_rate": 0.00011782512115990023,
      "loss": 0.9802,
      "step": 8645
    },
    {
      "epoch": 0.49896169820027686,
      "grad_norm": 0.3053840398788452,
      "learning_rate": 0.00011772604535348382,
      "loss": 0.9128,
      "step": 8650
    },
    {
      "epoch": 0.49925011536686664,
      "grad_norm": 0.3106091320514679,
      "learning_rate": 0.00011762695157942789,
      "loss": 1.0137,
      "step": 8655
    },
    {
      "epoch": 0.49953853253345637,
      "grad_norm": 0.325166255235672,
      "learning_rate": 0.00011752783993817675,
      "loss": 0.9791,
      "step": 8660
    },
    {
      "epoch": 0.49982694970004615,
      "grad_norm": 0.31137755513191223,
      "learning_rate": 0.00011742871053019294,
      "loss": 0.9528,
      "step": 8665
    },
    {
      "epoch": 0.5001153668666359,
      "grad_norm": 0.30196908116340637,
      "learning_rate": 0.00011732956345595682,
      "loss": 0.9264,
      "step": 8670
    },
    {
      "epoch": 0.5004037840332256,
      "grad_norm": 0.30455246567726135,
      "learning_rate": 0.00011723039881596686,
      "loss": 0.9306,
      "step": 8675
    },
    {
      "epoch": 0.5006922011998154,
      "grad_norm": 0.33094102144241333,
      "learning_rate": 0.00011713121671073924,
      "loss": 0.9644,
      "step": 8680
    },
    {
      "epoch": 0.5009806183664052,
      "grad_norm": 0.29138949513435364,
      "learning_rate": 0.00011703201724080783,
      "loss": 0.919,
      "step": 8685
    },
    {
      "epoch": 0.501269035532995,
      "grad_norm": 0.3106113374233246,
      "learning_rate": 0.00011693280050672417,
      "loss": 1.008,
      "step": 8690
    },
    {
      "epoch": 0.5015574526995846,
      "grad_norm": 0.30877065658569336,
      "learning_rate": 0.00011683356660905716,
      "loss": 0.9312,
      "step": 8695
    },
    {
      "epoch": 0.5018458698661744,
      "grad_norm": 0.3161839246749878,
      "learning_rate": 0.00011673431564839327,
      "loss": 0.921,
      "step": 8700
    },
    {
      "epoch": 0.5021342870327642,
      "grad_norm": 0.30622875690460205,
      "learning_rate": 0.00011663504772533617,
      "loss": 0.9326,
      "step": 8705
    },
    {
      "epoch": 0.502422704199354,
      "grad_norm": 0.32432737946510315,
      "learning_rate": 0.0001165357629405067,
      "loss": 1.0314,
      "step": 8710
    },
    {
      "epoch": 0.5027111213659436,
      "grad_norm": 0.3023039698600769,
      "learning_rate": 0.00011643646139454287,
      "loss": 0.9921,
      "step": 8715
    },
    {
      "epoch": 0.5029995385325334,
      "grad_norm": 0.31420597434043884,
      "learning_rate": 0.00011633714318809962,
      "loss": 0.9713,
      "step": 8720
    },
    {
      "epoch": 0.5032879556991232,
      "grad_norm": 0.3235868215560913,
      "learning_rate": 0.00011623780842184881,
      "loss": 0.9795,
      "step": 8725
    },
    {
      "epoch": 0.503576372865713,
      "grad_norm": 0.3257627785205841,
      "learning_rate": 0.00011613845719647909,
      "loss": 1.0116,
      "step": 8730
    },
    {
      "epoch": 0.5038647900323027,
      "grad_norm": 0.289419561624527,
      "learning_rate": 0.00011603908961269571,
      "loss": 0.924,
      "step": 8735
    },
    {
      "epoch": 0.5041532071988925,
      "grad_norm": 0.31289026141166687,
      "learning_rate": 0.00011593970577122067,
      "loss": 0.8984,
      "step": 8740
    },
    {
      "epoch": 0.5044416243654822,
      "grad_norm": 0.3069057762622833,
      "learning_rate": 0.00011584030577279223,
      "loss": 0.9594,
      "step": 8745
    },
    {
      "epoch": 0.504730041532072,
      "grad_norm": 0.3309262990951538,
      "learning_rate": 0.00011574088971816523,
      "loss": 1.0637,
      "step": 8750
    },
    {
      "epoch": 0.5050184586986618,
      "grad_norm": 0.343013733625412,
      "learning_rate": 0.00011564145770811068,
      "loss": 1.0467,
      "step": 8755
    },
    {
      "epoch": 0.5053068758652515,
      "grad_norm": 0.298500657081604,
      "learning_rate": 0.00011554200984341577,
      "loss": 0.9842,
      "step": 8760
    },
    {
      "epoch": 0.5055952930318413,
      "grad_norm": 0.294319212436676,
      "learning_rate": 0.00011544254622488378,
      "loss": 0.9636,
      "step": 8765
    },
    {
      "epoch": 0.505883710198431,
      "grad_norm": 0.30474424362182617,
      "learning_rate": 0.00011534306695333395,
      "loss": 0.9891,
      "step": 8770
    },
    {
      "epoch": 0.5061721273650208,
      "grad_norm": 0.3213635981082916,
      "learning_rate": 0.00011524357212960135,
      "loss": 0.9494,
      "step": 8775
    },
    {
      "epoch": 0.5064605445316105,
      "grad_norm": 0.30603140592575073,
      "learning_rate": 0.00011514406185453692,
      "loss": 0.9003,
      "step": 8780
    },
    {
      "epoch": 0.5067489616982003,
      "grad_norm": 0.305373877286911,
      "learning_rate": 0.00011504453622900717,
      "loss": 1.0101,
      "step": 8785
    },
    {
      "epoch": 0.5070373788647901,
      "grad_norm": 0.31912145018577576,
      "learning_rate": 0.00011494499535389418,
      "loss": 0.9679,
      "step": 8790
    },
    {
      "epoch": 0.5073257960313798,
      "grad_norm": 0.2848472595214844,
      "learning_rate": 0.00011484543933009549,
      "loss": 0.9735,
      "step": 8795
    },
    {
      "epoch": 0.5076142131979695,
      "grad_norm": 0.3449084460735321,
      "learning_rate": 0.00011474586825852405,
      "loss": 0.9184,
      "step": 8800
    },
    {
      "epoch": 0.5079026303645593,
      "grad_norm": 0.30281150341033936,
      "learning_rate": 0.00011464628224010797,
      "loss": 1.0068,
      "step": 8805
    },
    {
      "epoch": 0.5081910475311491,
      "grad_norm": 0.29601889848709106,
      "learning_rate": 0.00011454668137579059,
      "loss": 0.9992,
      "step": 8810
    },
    {
      "epoch": 0.5084794646977389,
      "grad_norm": 0.3145044147968292,
      "learning_rate": 0.00011444706576653024,
      "loss": 0.9517,
      "step": 8815
    },
    {
      "epoch": 0.5087678818643285,
      "grad_norm": 0.3319196403026581,
      "learning_rate": 0.00011434743551330028,
      "loss": 0.9928,
      "step": 8820
    },
    {
      "epoch": 0.5090562990309183,
      "grad_norm": 0.28969430923461914,
      "learning_rate": 0.00011424779071708878,
      "loss": 0.9698,
      "step": 8825
    },
    {
      "epoch": 0.5093447161975081,
      "grad_norm": 0.2996658384799957,
      "learning_rate": 0.00011414813147889868,
      "loss": 1.0052,
      "step": 8830
    },
    {
      "epoch": 0.5096331333640979,
      "grad_norm": 0.2955401539802551,
      "learning_rate": 0.0001140484578997475,
      "loss": 0.9902,
      "step": 8835
    },
    {
      "epoch": 0.5099215505306876,
      "grad_norm": 0.3238406777381897,
      "learning_rate": 0.00011394877008066731,
      "loss": 1.0101,
      "step": 8840
    },
    {
      "epoch": 0.5102099676972773,
      "grad_norm": 0.304666668176651,
      "learning_rate": 0.00011384906812270457,
      "loss": 0.9317,
      "step": 8845
    },
    {
      "epoch": 0.5104983848638671,
      "grad_norm": 0.2767610251903534,
      "learning_rate": 0.00011374935212692018,
      "loss": 0.9771,
      "step": 8850
    },
    {
      "epoch": 0.5107868020304569,
      "grad_norm": 0.3337164521217346,
      "learning_rate": 0.00011364962219438913,
      "loss": 0.9174,
      "step": 8855
    },
    {
      "epoch": 0.5110752191970466,
      "grad_norm": 0.3500116169452667,
      "learning_rate": 0.00011354987842620061,
      "loss": 0.9645,
      "step": 8860
    },
    {
      "epoch": 0.5113636363636364,
      "grad_norm": 0.3071758449077606,
      "learning_rate": 0.00011345012092345786,
      "loss": 0.9561,
      "step": 8865
    },
    {
      "epoch": 0.5116520535302261,
      "grad_norm": 0.3307574689388275,
      "learning_rate": 0.000113350349787278,
      "loss": 1.0172,
      "step": 8870
    },
    {
      "epoch": 0.5119404706968159,
      "grad_norm": 0.31679022312164307,
      "learning_rate": 0.00011325056511879197,
      "loss": 0.9626,
      "step": 8875
    },
    {
      "epoch": 0.5122288878634056,
      "grad_norm": 0.30507731437683105,
      "learning_rate": 0.00011315076701914449,
      "loss": 0.9616,
      "step": 8880
    },
    {
      "epoch": 0.5125173050299954,
      "grad_norm": 0.328744113445282,
      "learning_rate": 0.00011305095558949376,
      "loss": 0.9907,
      "step": 8885
    },
    {
      "epoch": 0.5128057221965852,
      "grad_norm": 0.3259487748146057,
      "learning_rate": 0.00011295113093101162,
      "loss": 1.0606,
      "step": 8890
    },
    {
      "epoch": 0.5130941393631749,
      "grad_norm": 0.30750012397766113,
      "learning_rate": 0.00011285129314488328,
      "loss": 0.9538,
      "step": 8895
    },
    {
      "epoch": 0.5133825565297646,
      "grad_norm": 0.3048967719078064,
      "learning_rate": 0.0001127514423323072,
      "loss": 0.982,
      "step": 8900
    },
    {
      "epoch": 0.5136709736963544,
      "grad_norm": 0.33238890767097473,
      "learning_rate": 0.00011265157859449513,
      "loss": 0.9261,
      "step": 8905
    },
    {
      "epoch": 0.5139593908629442,
      "grad_norm": 0.312454491853714,
      "learning_rate": 0.00011255170203267186,
      "loss": 0.9946,
      "step": 8910
    },
    {
      "epoch": 0.514247808029534,
      "grad_norm": 0.28780901432037354,
      "learning_rate": 0.0001124518127480753,
      "loss": 0.9761,
      "step": 8915
    },
    {
      "epoch": 0.5145362251961236,
      "grad_norm": 0.29829463362693787,
      "learning_rate": 0.000112351910841956,
      "loss": 0.9599,
      "step": 8920
    },
    {
      "epoch": 0.5148246423627134,
      "grad_norm": 0.289553701877594,
      "learning_rate": 0.0001122519964155776,
      "loss": 0.998,
      "step": 8925
    },
    {
      "epoch": 0.5151130595293032,
      "grad_norm": 0.33418598771095276,
      "learning_rate": 0.00011215206957021618,
      "loss": 1.0044,
      "step": 8930
    },
    {
      "epoch": 0.515401476695893,
      "grad_norm": 0.3013911843299866,
      "learning_rate": 0.00011205213040716063,
      "loss": 1.0204,
      "step": 8935
    },
    {
      "epoch": 0.5156898938624827,
      "grad_norm": 0.30099862813949585,
      "learning_rate": 0.00011195217902771212,
      "loss": 0.9752,
      "step": 8940
    },
    {
      "epoch": 0.5159783110290724,
      "grad_norm": 0.3052062690258026,
      "learning_rate": 0.00011185221553318438,
      "loss": 1.0301,
      "step": 8945
    },
    {
      "epoch": 0.5162667281956622,
      "grad_norm": 0.30699998140335083,
      "learning_rate": 0.0001117522400249033,
      "loss": 0.9504,
      "step": 8950
    },
    {
      "epoch": 0.516555145362252,
      "grad_norm": 0.3200739324092865,
      "learning_rate": 0.00011165225260420697,
      "loss": 0.9353,
      "step": 8955
    },
    {
      "epoch": 0.5168435625288417,
      "grad_norm": 0.28724315762519836,
      "learning_rate": 0.00011155225337244562,
      "loss": 0.9249,
      "step": 8960
    },
    {
      "epoch": 0.5171319796954315,
      "grad_norm": 0.312840074300766,
      "learning_rate": 0.00011145224243098138,
      "loss": 0.9696,
      "step": 8965
    },
    {
      "epoch": 0.5174203968620212,
      "grad_norm": 0.3123422861099243,
      "learning_rate": 0.00011135221988118825,
      "loss": 0.9377,
      "step": 8970
    },
    {
      "epoch": 0.517708814028611,
      "grad_norm": 0.3269249200820923,
      "learning_rate": 0.00011125218582445207,
      "loss": 1.0189,
      "step": 8975
    },
    {
      "epoch": 0.5179972311952007,
      "grad_norm": 0.34945985674858093,
      "learning_rate": 0.00011115214036217026,
      "loss": 0.9692,
      "step": 8980
    },
    {
      "epoch": 0.5182856483617905,
      "grad_norm": 0.30264851450920105,
      "learning_rate": 0.00011105208359575186,
      "loss": 0.9469,
      "step": 8985
    },
    {
      "epoch": 0.5185740655283803,
      "grad_norm": 0.3047720491886139,
      "learning_rate": 0.0001109520156266173,
      "loss": 0.9471,
      "step": 8990
    },
    {
      "epoch": 0.51886248269497,
      "grad_norm": 0.33226409554481506,
      "learning_rate": 0.00011085193655619845,
      "loss": 0.9561,
      "step": 8995
    },
    {
      "epoch": 0.5191508998615597,
      "grad_norm": 0.3194473385810852,
      "learning_rate": 0.00011075184648593838,
      "loss": 1.0039,
      "step": 9000
    },
    {
      "epoch": 0.5194393170281495,
      "grad_norm": 0.334067165851593,
      "learning_rate": 0.00011065174551729134,
      "loss": 0.9985,
      "step": 9005
    },
    {
      "epoch": 0.5197277341947393,
      "grad_norm": 0.3037133514881134,
      "learning_rate": 0.00011055163375172257,
      "loss": 1.0081,
      "step": 9010
    },
    {
      "epoch": 0.5200161513613291,
      "grad_norm": 0.3324849605560303,
      "learning_rate": 0.00011045151129070832,
      "loss": 0.9596,
      "step": 9015
    },
    {
      "epoch": 0.5203045685279187,
      "grad_norm": 0.29767823219299316,
      "learning_rate": 0.00011035137823573561,
      "loss": 0.8858,
      "step": 9020
    },
    {
      "epoch": 0.5205929856945085,
      "grad_norm": 0.3264126181602478,
      "learning_rate": 0.00011025123468830232,
      "loss": 0.9279,
      "step": 9025
    },
    {
      "epoch": 0.5208814028610983,
      "grad_norm": 0.28214511275291443,
      "learning_rate": 0.0001101510807499168,
      "loss": 0.9387,
      "step": 9030
    },
    {
      "epoch": 0.5211698200276881,
      "grad_norm": 0.30854344367980957,
      "learning_rate": 0.00011005091652209809,
      "loss": 0.91,
      "step": 9035
    },
    {
      "epoch": 0.5214582371942778,
      "grad_norm": 0.3331091105937958,
      "learning_rate": 0.00010995074210637557,
      "loss": 1.0045,
      "step": 9040
    },
    {
      "epoch": 0.5217466543608675,
      "grad_norm": 0.29864826798439026,
      "learning_rate": 0.00010985055760428893,
      "loss": 0.8801,
      "step": 9045
    },
    {
      "epoch": 0.5220350715274573,
      "grad_norm": 0.3084268867969513,
      "learning_rate": 0.00010975036311738818,
      "loss": 0.9623,
      "step": 9050
    },
    {
      "epoch": 0.5223234886940471,
      "grad_norm": 0.31782200932502747,
      "learning_rate": 0.00010965015874723332,
      "loss": 1.056,
      "step": 9055
    },
    {
      "epoch": 0.5226119058606368,
      "grad_norm": 0.29870155453681946,
      "learning_rate": 0.00010954994459539452,
      "loss": 0.9459,
      "step": 9060
    },
    {
      "epoch": 0.5229003230272266,
      "grad_norm": 0.32133176922798157,
      "learning_rate": 0.0001094497207634517,
      "loss": 0.9144,
      "step": 9065
    },
    {
      "epoch": 0.5231887401938163,
      "grad_norm": 0.2952311933040619,
      "learning_rate": 0.00010934948735299475,
      "loss": 0.9592,
      "step": 9070
    },
    {
      "epoch": 0.5234771573604061,
      "grad_norm": 0.32097524404525757,
      "learning_rate": 0.00010924924446562317,
      "loss": 1.0487,
      "step": 9075
    },
    {
      "epoch": 0.5237655745269958,
      "grad_norm": 0.3255499005317688,
      "learning_rate": 0.00010914899220294607,
      "loss": 0.9538,
      "step": 9080
    },
    {
      "epoch": 0.5240539916935856,
      "grad_norm": 0.3189411461353302,
      "learning_rate": 0.00010904873066658208,
      "loss": 1.0008,
      "step": 9085
    },
    {
      "epoch": 0.5243424088601754,
      "grad_norm": 0.29466235637664795,
      "learning_rate": 0.00010894845995815928,
      "loss": 0.999,
      "step": 9090
    },
    {
      "epoch": 0.5246308260267651,
      "grad_norm": 0.31723839044570923,
      "learning_rate": 0.00010884818017931495,
      "loss": 0.9266,
      "step": 9095
    },
    {
      "epoch": 0.5249192431933549,
      "grad_norm": 0.26466256380081177,
      "learning_rate": 0.00010874789143169568,
      "loss": 0.9945,
      "step": 9100
    },
    {
      "epoch": 0.5252076603599446,
      "grad_norm": 0.3435070514678955,
      "learning_rate": 0.00010864759381695701,
      "loss": 0.93,
      "step": 9105
    },
    {
      "epoch": 0.5254960775265344,
      "grad_norm": 0.3118496835231781,
      "learning_rate": 0.00010854728743676362,
      "loss": 0.9869,
      "step": 9110
    },
    {
      "epoch": 0.5257844946931242,
      "grad_norm": 0.2827567458152771,
      "learning_rate": 0.00010844697239278891,
      "loss": 0.9564,
      "step": 9115
    },
    {
      "epoch": 0.5260729118597139,
      "grad_norm": 0.27921804785728455,
      "learning_rate": 0.00010834664878671525,
      "loss": 0.9807,
      "step": 9120
    },
    {
      "epoch": 0.5263613290263036,
      "grad_norm": 0.2913965582847595,
      "learning_rate": 0.00010824631672023349,
      "loss": 0.9486,
      "step": 9125
    },
    {
      "epoch": 0.5266497461928934,
      "grad_norm": 0.3042261004447937,
      "learning_rate": 0.00010814597629504324,
      "loss": 0.9576,
      "step": 9130
    },
    {
      "epoch": 0.5269381633594832,
      "grad_norm": 0.31277990341186523,
      "learning_rate": 0.00010804562761285246,
      "loss": 0.9182,
      "step": 9135
    },
    {
      "epoch": 0.527226580526073,
      "grad_norm": 0.3407859802246094,
      "learning_rate": 0.00010794527077537755,
      "loss": 0.9203,
      "step": 9140
    },
    {
      "epoch": 0.5275149976926626,
      "grad_norm": 0.3242495059967041,
      "learning_rate": 0.00010784490588434309,
      "loss": 1.0222,
      "step": 9145
    },
    {
      "epoch": 0.5278034148592524,
      "grad_norm": 0.3136400282382965,
      "learning_rate": 0.00010774453304148192,
      "loss": 1.0051,
      "step": 9150
    },
    {
      "epoch": 0.5280918320258422,
      "grad_norm": 0.3262755572795868,
      "learning_rate": 0.00010764415234853484,
      "loss": 1.0198,
      "step": 9155
    },
    {
      "epoch": 0.528380249192432,
      "grad_norm": 0.3099469542503357,
      "learning_rate": 0.00010754376390725074,
      "loss": 0.9721,
      "step": 9160
    },
    {
      "epoch": 0.5286686663590217,
      "grad_norm": 0.3080025315284729,
      "learning_rate": 0.00010744336781938624,
      "loss": 0.9494,
      "step": 9165
    },
    {
      "epoch": 0.5289570835256114,
      "grad_norm": 0.4399293065071106,
      "learning_rate": 0.00010734296418670582,
      "loss": 0.9977,
      "step": 9170
    },
    {
      "epoch": 0.5292455006922012,
      "grad_norm": 0.3164156377315521,
      "learning_rate": 0.00010724255311098146,
      "loss": 0.9743,
      "step": 9175
    },
    {
      "epoch": 0.529533917858791,
      "grad_norm": 0.3243144750595093,
      "learning_rate": 0.00010714213469399283,
      "loss": 0.9734,
      "step": 9180
    },
    {
      "epoch": 0.5298223350253807,
      "grad_norm": 0.35423848032951355,
      "learning_rate": 0.00010704170903752695,
      "loss": 0.9779,
      "step": 9185
    },
    {
      "epoch": 0.5301107521919705,
      "grad_norm": 0.36763471364974976,
      "learning_rate": 0.00010694127624337826,
      "loss": 0.9587,
      "step": 9190
    },
    {
      "epoch": 0.5303991693585602,
      "grad_norm": 0.323824405670166,
      "learning_rate": 0.00010684083641334832,
      "loss": 1.007,
      "step": 9195
    },
    {
      "epoch": 0.53068758652515,
      "grad_norm": 0.3197695016860962,
      "learning_rate": 0.00010674038964924597,
      "loss": 0.9734,
      "step": 9200
    },
    {
      "epoch": 0.5309760036917397,
      "grad_norm": 0.31020426750183105,
      "learning_rate": 0.00010663993605288693,
      "loss": 1.0066,
      "step": 9205
    },
    {
      "epoch": 0.5312644208583295,
      "grad_norm": 0.30677443742752075,
      "learning_rate": 0.00010653947572609393,
      "loss": 0.9763,
      "step": 9210
    },
    {
      "epoch": 0.5315528380249193,
      "grad_norm": 0.28415751457214355,
      "learning_rate": 0.0001064390087706965,
      "loss": 1.0104,
      "step": 9215
    },
    {
      "epoch": 0.531841255191509,
      "grad_norm": 0.3487168252468109,
      "learning_rate": 0.0001063385352885309,
      "loss": 0.9149,
      "step": 9220
    },
    {
      "epoch": 0.5321296723580987,
      "grad_norm": 0.2908375561237335,
      "learning_rate": 0.00010623805538144,
      "loss": 0.8763,
      "step": 9225
    },
    {
      "epoch": 0.5324180895246885,
      "grad_norm": 0.30931004881858826,
      "learning_rate": 0.00010613756915127319,
      "loss": 0.9634,
      "step": 9230
    },
    {
      "epoch": 0.5327065066912783,
      "grad_norm": 0.28703534603118896,
      "learning_rate": 0.00010603707669988627,
      "loss": 0.9669,
      "step": 9235
    },
    {
      "epoch": 0.5329949238578681,
      "grad_norm": 0.3313189446926117,
      "learning_rate": 0.00010593657812914129,
      "loss": 1.0061,
      "step": 9240
    },
    {
      "epoch": 0.5332833410244577,
      "grad_norm": 0.2962750494480133,
      "learning_rate": 0.00010583607354090657,
      "loss": 0.9598,
      "step": 9245
    },
    {
      "epoch": 0.5335717581910475,
      "grad_norm": 0.32524964213371277,
      "learning_rate": 0.00010573556303705652,
      "loss": 1.0356,
      "step": 9250
    },
    {
      "epoch": 0.5338601753576373,
      "grad_norm": 0.31498393416404724,
      "learning_rate": 0.00010563504671947153,
      "loss": 0.9552,
      "step": 9255
    },
    {
      "epoch": 0.5341485925242271,
      "grad_norm": 0.2950249910354614,
      "learning_rate": 0.00010553452469003789,
      "loss": 0.9432,
      "step": 9260
    },
    {
      "epoch": 0.5344370096908168,
      "grad_norm": 0.27950915694236755,
      "learning_rate": 0.00010543399705064771,
      "loss": 1.0472,
      "step": 9265
    },
    {
      "epoch": 0.5347254268574065,
      "grad_norm": 0.3320115804672241,
      "learning_rate": 0.00010533346390319867,
      "loss": 1.057,
      "step": 9270
    },
    {
      "epoch": 0.5350138440239963,
      "grad_norm": 0.29796433448791504,
      "learning_rate": 0.00010523292534959419,
      "loss": 0.947,
      "step": 9275
    },
    {
      "epoch": 0.5353022611905861,
      "grad_norm": 0.3084598779678345,
      "learning_rate": 0.00010513238149174304,
      "loss": 0.9316,
      "step": 9280
    },
    {
      "epoch": 0.5355906783571758,
      "grad_norm": 0.2846716344356537,
      "learning_rate": 0.00010503183243155952,
      "loss": 0.9935,
      "step": 9285
    },
    {
      "epoch": 0.5358790955237656,
      "grad_norm": 0.33815798163414,
      "learning_rate": 0.00010493127827096298,
      "loss": 0.9659,
      "step": 9290
    },
    {
      "epoch": 0.5361675126903553,
      "grad_norm": 0.3921065628528595,
      "learning_rate": 0.00010483071911187818,
      "loss": 0.9826,
      "step": 9295
    },
    {
      "epoch": 0.5364559298569451,
      "grad_norm": 0.3282729983329773,
      "learning_rate": 0.00010473015505623477,
      "loss": 0.9255,
      "step": 9300
    },
    {
      "epoch": 0.5367443470235348,
      "grad_norm": 0.2944236099720001,
      "learning_rate": 0.00010462958620596745,
      "loss": 0.9622,
      "step": 9305
    },
    {
      "epoch": 0.5370327641901246,
      "grad_norm": 0.30026480555534363,
      "learning_rate": 0.00010452901266301574,
      "loss": 0.9721,
      "step": 9310
    },
    {
      "epoch": 0.5373211813567144,
      "grad_norm": 0.3067670464515686,
      "learning_rate": 0.00010442843452932394,
      "loss": 1.0303,
      "step": 9315
    },
    {
      "epoch": 0.5376095985233041,
      "grad_norm": 0.33017462491989136,
      "learning_rate": 0.000104327851906841,
      "loss": 0.9387,
      "step": 9320
    },
    {
      "epoch": 0.5378980156898938,
      "grad_norm": 0.2776556611061096,
      "learning_rate": 0.00010422726489752041,
      "loss": 0.8753,
      "step": 9325
    },
    {
      "epoch": 0.5381864328564836,
      "grad_norm": 0.316010981798172,
      "learning_rate": 0.00010412667360332013,
      "loss": 0.9669,
      "step": 9330
    },
    {
      "epoch": 0.5384748500230734,
      "grad_norm": 0.29539474844932556,
      "learning_rate": 0.00010402607812620244,
      "loss": 0.9677,
      "step": 9335
    },
    {
      "epoch": 0.5387632671896632,
      "grad_norm": 0.2884175479412079,
      "learning_rate": 0.00010392547856813384,
      "loss": 0.9596,
      "step": 9340
    },
    {
      "epoch": 0.5390516843562528,
      "grad_norm": 0.2999941110610962,
      "learning_rate": 0.00010382487503108503,
      "loss": 1.053,
      "step": 9345
    },
    {
      "epoch": 0.5393401015228426,
      "grad_norm": 0.352108895778656,
      "learning_rate": 0.00010372426761703067,
      "loss": 1.0039,
      "step": 9350
    },
    {
      "epoch": 0.5396285186894324,
      "grad_norm": 0.2817211449146271,
      "learning_rate": 0.00010362365642794943,
      "loss": 0.9056,
      "step": 9355
    },
    {
      "epoch": 0.5399169358560222,
      "grad_norm": 0.3019465506076813,
      "learning_rate": 0.00010352304156582376,
      "loss": 0.9113,
      "step": 9360
    },
    {
      "epoch": 0.5402053530226119,
      "grad_norm": 0.3267442286014557,
      "learning_rate": 0.00010342242313263974,
      "loss": 0.9913,
      "step": 9365
    },
    {
      "epoch": 0.5404937701892016,
      "grad_norm": 0.3325335383415222,
      "learning_rate": 0.0001033218012303873,
      "loss": 0.976,
      "step": 9370
    },
    {
      "epoch": 0.5407821873557914,
      "grad_norm": 0.3207154870033264,
      "learning_rate": 0.00010322117596105967,
      "loss": 0.9713,
      "step": 9375
    },
    {
      "epoch": 0.5410706045223812,
      "grad_norm": 0.2877350449562073,
      "learning_rate": 0.00010312054742665362,
      "loss": 0.9579,
      "step": 9380
    },
    {
      "epoch": 0.5413590216889709,
      "grad_norm": 0.3093964159488678,
      "learning_rate": 0.00010301991572916914,
      "loss": 0.9728,
      "step": 9385
    },
    {
      "epoch": 0.5416474388555607,
      "grad_norm": 0.30820104479789734,
      "learning_rate": 0.0001029192809706095,
      "loss": 0.9679,
      "step": 9390
    },
    {
      "epoch": 0.5419358560221504,
      "grad_norm": 0.3071676790714264,
      "learning_rate": 0.00010281864325298102,
      "loss": 0.9454,
      "step": 9395
    },
    {
      "epoch": 0.5422242731887402,
      "grad_norm": 0.3092137575149536,
      "learning_rate": 0.00010271800267829308,
      "loss": 0.9537,
      "step": 9400
    },
    {
      "epoch": 0.5425126903553299,
      "grad_norm": 0.3113473951816559,
      "learning_rate": 0.00010261735934855788,
      "loss": 0.997,
      "step": 9405
    },
    {
      "epoch": 0.5428011075219197,
      "grad_norm": 0.34891951084136963,
      "learning_rate": 0.00010251671336579048,
      "loss": 1.0035,
      "step": 9410
    },
    {
      "epoch": 0.5430895246885095,
      "grad_norm": 0.33309951424598694,
      "learning_rate": 0.00010241606483200857,
      "loss": 0.9692,
      "step": 9415
    },
    {
      "epoch": 0.5433779418550992,
      "grad_norm": 0.31338512897491455,
      "learning_rate": 0.00010231541384923248,
      "loss": 0.9677,
      "step": 9420
    },
    {
      "epoch": 0.5436663590216889,
      "grad_norm": 0.30394166707992554,
      "learning_rate": 0.00010221476051948502,
      "loss": 0.9955,
      "step": 9425
    },
    {
      "epoch": 0.5439547761882787,
      "grad_norm": 0.3012612462043762,
      "learning_rate": 0.0001021141049447913,
      "loss": 0.9789,
      "step": 9430
    },
    {
      "epoch": 0.5442431933548685,
      "grad_norm": 0.3203679919242859,
      "learning_rate": 0.00010201344722717881,
      "loss": 1.0433,
      "step": 9435
    },
    {
      "epoch": 0.5445316105214583,
      "grad_norm": 0.33140864968299866,
      "learning_rate": 0.00010191278746867714,
      "loss": 0.9415,
      "step": 9440
    },
    {
      "epoch": 0.544820027688048,
      "grad_norm": 0.3454248607158661,
      "learning_rate": 0.00010181212577131796,
      "loss": 1.0054,
      "step": 9445
    },
    {
      "epoch": 0.5451084448546377,
      "grad_norm": 0.31142881512641907,
      "learning_rate": 0.00010171146223713496,
      "loss": 0.9692,
      "step": 9450
    },
    {
      "epoch": 0.5453968620212275,
      "grad_norm": 0.29943177103996277,
      "learning_rate": 0.00010161079696816362,
      "loss": 0.9,
      "step": 9455
    },
    {
      "epoch": 0.5456852791878173,
      "grad_norm": 0.3128969669342041,
      "learning_rate": 0.00010151013006644128,
      "loss": 0.991,
      "step": 9460
    },
    {
      "epoch": 0.5459736963544071,
      "grad_norm": 0.32823264598846436,
      "learning_rate": 0.00010140946163400675,
      "loss": 0.9202,
      "step": 9465
    },
    {
      "epoch": 0.5462621135209967,
      "grad_norm": 0.31170547008514404,
      "learning_rate": 0.00010130879177290061,
      "loss": 0.9825,
      "step": 9470
    },
    {
      "epoch": 0.5465505306875865,
      "grad_norm": 0.31054195761680603,
      "learning_rate": 0.00010120812058516467,
      "loss": 0.9623,
      "step": 9475
    },
    {
      "epoch": 0.5468389478541763,
      "grad_norm": 0.2681087851524353,
      "learning_rate": 0.00010110744817284232,
      "loss": 0.8792,
      "step": 9480
    },
    {
      "epoch": 0.5471273650207661,
      "grad_norm": 0.40381041169166565,
      "learning_rate": 0.00010100677463797799,
      "loss": 0.9968,
      "step": 9485
    },
    {
      "epoch": 0.5474157821873558,
      "grad_norm": 0.2984217703342438,
      "learning_rate": 0.00010090610008261738,
      "loss": 0.9848,
      "step": 9490
    },
    {
      "epoch": 0.5477041993539455,
      "grad_norm": 0.312328040599823,
      "learning_rate": 0.00010080542460880711,
      "loss": 1.0204,
      "step": 9495
    },
    {
      "epoch": 0.5479926165205353,
      "grad_norm": 0.2800973057746887,
      "learning_rate": 0.00010070474831859486,
      "loss": 0.9493,
      "step": 9500
    },
    {
      "epoch": 0.5482810336871251,
      "grad_norm": 0.33222299814224243,
      "learning_rate": 0.00010060407131402902,
      "loss": 0.9683,
      "step": 9505
    },
    {
      "epoch": 0.5485694508537148,
      "grad_norm": 0.2953050434589386,
      "learning_rate": 0.0001005033936971588,
      "loss": 1.0451,
      "step": 9510
    },
    {
      "epoch": 0.5488578680203046,
      "grad_norm": 0.3120376765727997,
      "learning_rate": 0.00010040271557003394,
      "loss": 1.0147,
      "step": 9515
    },
    {
      "epoch": 0.5491462851868943,
      "grad_norm": 0.2867201268672943,
      "learning_rate": 0.00010030203703470477,
      "loss": 0.9954,
      "step": 9520
    },
    {
      "epoch": 0.5494347023534841,
      "grad_norm": 0.3178676962852478,
      "learning_rate": 0.00010020135819322203,
      "loss": 0.9652,
      "step": 9525
    },
    {
      "epoch": 0.5497231195200738,
      "grad_norm": 0.2976604104042053,
      "learning_rate": 0.00010010067914763668,
      "loss": 0.9695,
      "step": 9530
    },
    {
      "epoch": 0.5500115366866636,
      "grad_norm": 0.2862003445625305,
      "learning_rate": 0.0001,
      "loss": 1.0483,
      "step": 9535
    },
    {
      "epoch": 0.5502999538532534,
      "grad_norm": 0.32227590680122375,
      "learning_rate": 9.989932085236334e-05,
      "loss": 0.9444,
      "step": 9540
    },
    {
      "epoch": 0.5505883710198431,
      "grad_norm": 0.33216729760169983,
      "learning_rate": 9.979864180677801e-05,
      "loss": 0.9703,
      "step": 9545
    },
    {
      "epoch": 0.5508767881864328,
      "grad_norm": 0.31401535868644714,
      "learning_rate": 9.969796296529525e-05,
      "loss": 0.9568,
      "step": 9550
    },
    {
      "epoch": 0.5511652053530226,
      "grad_norm": 0.31950071454048157,
      "learning_rate": 9.959728442996606e-05,
      "loss": 0.9557,
      "step": 9555
    },
    {
      "epoch": 0.5514536225196124,
      "grad_norm": 0.2919849753379822,
      "learning_rate": 9.949660630284122e-05,
      "loss": 0.9718,
      "step": 9560
    },
    {
      "epoch": 0.5517420396862022,
      "grad_norm": 0.29922500252723694,
      "learning_rate": 9.939592868597097e-05,
      "loss": 0.9499,
      "step": 9565
    },
    {
      "epoch": 0.5520304568527918,
      "grad_norm": 0.32805556058883667,
      "learning_rate": 9.929525168140516e-05,
      "loss": 1.0231,
      "step": 9570
    },
    {
      "epoch": 0.5523188740193816,
      "grad_norm": 0.2967434525489807,
      "learning_rate": 9.919457539119293e-05,
      "loss": 0.9839,
      "step": 9575
    },
    {
      "epoch": 0.5526072911859714,
      "grad_norm": 0.3082091212272644,
      "learning_rate": 9.909389991738263e-05,
      "loss": 0.9543,
      "step": 9580
    },
    {
      "epoch": 0.5528957083525612,
      "grad_norm": 0.31558457016944885,
      "learning_rate": 9.899322536202205e-05,
      "loss": 0.929,
      "step": 9585
    },
    {
      "epoch": 0.5531841255191509,
      "grad_norm": 0.32369035482406616,
      "learning_rate": 9.889255182715769e-05,
      "loss": 0.9577,
      "step": 9590
    },
    {
      "epoch": 0.5534725426857406,
      "grad_norm": 0.30929917097091675,
      "learning_rate": 9.879187941483536e-05,
      "loss": 0.9378,
      "step": 9595
    },
    {
      "epoch": 0.5537609598523304,
      "grad_norm": 0.32841646671295166,
      "learning_rate": 9.869120822709946e-05,
      "loss": 0.9896,
      "step": 9600
    },
    {
      "epoch": 0.5540493770189202,
      "grad_norm": 0.31913015246391296,
      "learning_rate": 9.859053836599327e-05,
      "loss": 1.0478,
      "step": 9605
    },
    {
      "epoch": 0.5543377941855099,
      "grad_norm": 0.3005629777908325,
      "learning_rate": 9.848986993355877e-05,
      "loss": 1.0315,
      "step": 9610
    },
    {
      "epoch": 0.5546262113520997,
      "grad_norm": 0.3119855523109436,
      "learning_rate": 9.838920303183636e-05,
      "loss": 0.9761,
      "step": 9615
    },
    {
      "epoch": 0.5549146285186894,
      "grad_norm": 0.29266947507858276,
      "learning_rate": 9.828853776286505e-05,
      "loss": 0.921,
      "step": 9620
    },
    {
      "epoch": 0.5552030456852792,
      "grad_norm": 0.3018263876438141,
      "learning_rate": 9.818787422868204e-05,
      "loss": 0.967,
      "step": 9625
    },
    {
      "epoch": 0.5554914628518689,
      "grad_norm": 0.27815303206443787,
      "learning_rate": 9.808721253132289e-05,
      "loss": 0.9625,
      "step": 9630
    },
    {
      "epoch": 0.5557798800184587,
      "grad_norm": 0.34874555468559265,
      "learning_rate": 9.798655277282124e-05,
      "loss": 0.9527,
      "step": 9635
    },
    {
      "epoch": 0.5560682971850485,
      "grad_norm": 0.30870407819747925,
      "learning_rate": 9.78858950552087e-05,
      "loss": 0.9634,
      "step": 9640
    },
    {
      "epoch": 0.5563567143516382,
      "grad_norm": 0.3843367099761963,
      "learning_rate": 9.778523948051504e-05,
      "loss": 0.9582,
      "step": 9645
    },
    {
      "epoch": 0.5566451315182279,
      "grad_norm": 0.2970375716686249,
      "learning_rate": 9.768458615076751e-05,
      "loss": 0.9807,
      "step": 9650
    },
    {
      "epoch": 0.5569335486848177,
      "grad_norm": 0.29214388132095337,
      "learning_rate": 9.758393516799146e-05,
      "loss": 0.9638,
      "step": 9655
    },
    {
      "epoch": 0.5572219658514075,
      "grad_norm": 0.30512815713882446,
      "learning_rate": 9.748328663420952e-05,
      "loss": 0.9968,
      "step": 9660
    },
    {
      "epoch": 0.5575103830179973,
      "grad_norm": 0.35112571716308594,
      "learning_rate": 9.738264065144214e-05,
      "loss": 1.0067,
      "step": 9665
    },
    {
      "epoch": 0.5577988001845869,
      "grad_norm": 0.2951698899269104,
      "learning_rate": 9.728199732170696e-05,
      "loss": 0.9334,
      "step": 9670
    },
    {
      "epoch": 0.5580872173511767,
      "grad_norm": 0.30807235836982727,
      "learning_rate": 9.718135674701899e-05,
      "loss": 0.9796,
      "step": 9675
    },
    {
      "epoch": 0.5583756345177665,
      "grad_norm": 0.2801293730735779,
      "learning_rate": 9.708071902939054e-05,
      "loss": 0.9888,
      "step": 9680
    },
    {
      "epoch": 0.5586640516843563,
      "grad_norm": 0.3309718668460846,
      "learning_rate": 9.698008427083087e-05,
      "loss": 0.9936,
      "step": 9685
    },
    {
      "epoch": 0.558952468850946,
      "grad_norm": 0.2967337369918823,
      "learning_rate": 9.687945257334641e-05,
      "loss": 0.8834,
      "step": 9690
    },
    {
      "epoch": 0.5592408860175357,
      "grad_norm": 0.2973209321498871,
      "learning_rate": 9.677882403894036e-05,
      "loss": 0.8997,
      "step": 9695
    },
    {
      "epoch": 0.5595293031841255,
      "grad_norm": 0.28040575981140137,
      "learning_rate": 9.667819876961272e-05,
      "loss": 0.884,
      "step": 9700
    },
    {
      "epoch": 0.5598177203507153,
      "grad_norm": 0.3078805208206177,
      "learning_rate": 9.657757686736027e-05,
      "loss": 0.9495,
      "step": 9705
    },
    {
      "epoch": 0.560106137517305,
      "grad_norm": 0.3054788410663605,
      "learning_rate": 9.647695843417628e-05,
      "loss": 1.0202,
      "step": 9710
    },
    {
      "epoch": 0.5603945546838948,
      "grad_norm": 0.40618494153022766,
      "learning_rate": 9.637634357205058e-05,
      "loss": 0.9471,
      "step": 9715
    },
    {
      "epoch": 0.5606829718504845,
      "grad_norm": 0.33074966073036194,
      "learning_rate": 9.627573238296933e-05,
      "loss": 1.0419,
      "step": 9720
    },
    {
      "epoch": 0.5609713890170743,
      "grad_norm": 0.320569783449173,
      "learning_rate": 9.617512496891498e-05,
      "loss": 0.9851,
      "step": 9725
    },
    {
      "epoch": 0.561259806183664,
      "grad_norm": 0.2868152856826782,
      "learning_rate": 9.60745214318662e-05,
      "loss": 0.936,
      "step": 9730
    },
    {
      "epoch": 0.5615482233502538,
      "grad_norm": 0.3062056005001068,
      "learning_rate": 9.597392187379758e-05,
      "loss": 0.9793,
      "step": 9735
    },
    {
      "epoch": 0.5618366405168436,
      "grad_norm": 0.30484551191329956,
      "learning_rate": 9.58733263966799e-05,
      "loss": 0.9776,
      "step": 9740
    },
    {
      "epoch": 0.5621250576834333,
      "grad_norm": 0.32705235481262207,
      "learning_rate": 9.577273510247958e-05,
      "loss": 0.9882,
      "step": 9745
    },
    {
      "epoch": 0.562413474850023,
      "grad_norm": 0.3526208996772766,
      "learning_rate": 9.567214809315903e-05,
      "loss": 0.9331,
      "step": 9750
    },
    {
      "epoch": 0.5627018920166128,
      "grad_norm": 0.3613347113132477,
      "learning_rate": 9.557156547067607e-05,
      "loss": 1.0471,
      "step": 9755
    },
    {
      "epoch": 0.5629903091832026,
      "grad_norm": 0.29372262954711914,
      "learning_rate": 9.547098733698428e-05,
      "loss": 0.9677,
      "step": 9760
    },
    {
      "epoch": 0.5632787263497924,
      "grad_norm": 0.30436399579048157,
      "learning_rate": 9.537041379403258e-05,
      "loss": 0.9875,
      "step": 9765
    },
    {
      "epoch": 0.563567143516382,
      "grad_norm": 0.3027651607990265,
      "learning_rate": 9.526984494376524e-05,
      "loss": 1.0115,
      "step": 9770
    },
    {
      "epoch": 0.5638555606829718,
      "grad_norm": 0.3011035621166229,
      "learning_rate": 9.516928088812184e-05,
      "loss": 0.9937,
      "step": 9775
    },
    {
      "epoch": 0.5641439778495616,
      "grad_norm": 0.2939337193965912,
      "learning_rate": 9.5068721729037e-05,
      "loss": 0.9299,
      "step": 9780
    },
    {
      "epoch": 0.5644323950161514,
      "grad_norm": 0.2924240827560425,
      "learning_rate": 9.496816756844052e-05,
      "loss": 1.0545,
      "step": 9785
    },
    {
      "epoch": 0.5647208121827412,
      "grad_norm": 0.3084491789340973,
      "learning_rate": 9.486761850825694e-05,
      "loss": 0.9381,
      "step": 9790
    },
    {
      "epoch": 0.5650092293493308,
      "grad_norm": 0.29923975467681885,
      "learning_rate": 9.476707465040583e-05,
      "loss": 0.9886,
      "step": 9795
    },
    {
      "epoch": 0.5652976465159206,
      "grad_norm": 0.3646468222141266,
      "learning_rate": 9.466653609680137e-05,
      "loss": 0.973,
      "step": 9800
    },
    {
      "epoch": 0.5655860636825104,
      "grad_norm": 0.2971440553665161,
      "learning_rate": 9.456600294935231e-05,
      "loss": 1.0352,
      "step": 9805
    },
    {
      "epoch": 0.5658744808491002,
      "grad_norm": 0.27280595898628235,
      "learning_rate": 9.446547530996214e-05,
      "loss": 1.0609,
      "step": 9810
    },
    {
      "epoch": 0.5661628980156899,
      "grad_norm": 0.2964895963668823,
      "learning_rate": 9.436495328052846e-05,
      "loss": 0.9792,
      "step": 9815
    },
    {
      "epoch": 0.5664513151822796,
      "grad_norm": 0.30843856930732727,
      "learning_rate": 9.426443696294351e-05,
      "loss": 0.9715,
      "step": 9820
    },
    {
      "epoch": 0.5667397323488694,
      "grad_norm": 0.30792513489723206,
      "learning_rate": 9.416392645909347e-05,
      "loss": 0.9974,
      "step": 9825
    },
    {
      "epoch": 0.5670281495154592,
      "grad_norm": 0.29897308349609375,
      "learning_rate": 9.406342187085875e-05,
      "loss": 0.9381,
      "step": 9830
    },
    {
      "epoch": 0.5673165666820489,
      "grad_norm": 0.33265048265457153,
      "learning_rate": 9.396292330011377e-05,
      "loss": 0.9877,
      "step": 9835
    },
    {
      "epoch": 0.5676049838486387,
      "grad_norm": 0.29707449674606323,
      "learning_rate": 9.386243084872682e-05,
      "loss": 0.9378,
      "step": 9840
    },
    {
      "epoch": 0.5678934010152284,
      "grad_norm": 0.31178462505340576,
      "learning_rate": 9.376194461856001e-05,
      "loss": 0.9454,
      "step": 9845
    },
    {
      "epoch": 0.5681818181818182,
      "grad_norm": 0.26626142859458923,
      "learning_rate": 9.36614647114691e-05,
      "loss": 1.018,
      "step": 9850
    },
    {
      "epoch": 0.5684702353484079,
      "grad_norm": 0.4671937823295593,
      "learning_rate": 9.356099122930352e-05,
      "loss": 0.9548,
      "step": 9855
    },
    {
      "epoch": 0.5687586525149977,
      "grad_norm": 0.31160303950309753,
      "learning_rate": 9.34605242739061e-05,
      "loss": 1.0282,
      "step": 9860
    },
    {
      "epoch": 0.5690470696815875,
      "grad_norm": 0.2990851104259491,
      "learning_rate": 9.336006394711308e-05,
      "loss": 0.9297,
      "step": 9865
    },
    {
      "epoch": 0.5693354868481773,
      "grad_norm": 0.2787928879261017,
      "learning_rate": 9.325961035075405e-05,
      "loss": 0.938,
      "step": 9870
    },
    {
      "epoch": 0.5696239040147669,
      "grad_norm": 0.30769744515419006,
      "learning_rate": 9.315916358665166e-05,
      "loss": 0.9144,
      "step": 9875
    },
    {
      "epoch": 0.5699123211813567,
      "grad_norm": 0.30106931924819946,
      "learning_rate": 9.305872375662176e-05,
      "loss": 0.9833,
      "step": 9880
    },
    {
      "epoch": 0.5702007383479465,
      "grad_norm": 0.3208131492137909,
      "learning_rate": 9.295829096247304e-05,
      "loss": 1.0075,
      "step": 9885
    },
    {
      "epoch": 0.5704891555145363,
      "grad_norm": 0.31395384669303894,
      "learning_rate": 9.285786530600718e-05,
      "loss": 0.9917,
      "step": 9890
    },
    {
      "epoch": 0.5707775726811259,
      "grad_norm": 0.32120105624198914,
      "learning_rate": 9.275744688901858e-05,
      "loss": 0.9922,
      "step": 9895
    },
    {
      "epoch": 0.5710659898477157,
      "grad_norm": 0.30216944217681885,
      "learning_rate": 9.26570358132942e-05,
      "loss": 0.9661,
      "step": 9900
    },
    {
      "epoch": 0.5713544070143055,
      "grad_norm": 0.344277560710907,
      "learning_rate": 9.255663218061379e-05,
      "loss": 0.9521,
      "step": 9905
    },
    {
      "epoch": 0.5716428241808953,
      "grad_norm": 0.2767544388771057,
      "learning_rate": 9.245623609274928e-05,
      "loss": 0.8752,
      "step": 9910
    },
    {
      "epoch": 0.571931241347485,
      "grad_norm": 0.29667040705680847,
      "learning_rate": 9.235584765146519e-05,
      "loss": 0.9666,
      "step": 9915
    },
    {
      "epoch": 0.5722196585140747,
      "grad_norm": 0.2905004322528839,
      "learning_rate": 9.225546695851815e-05,
      "loss": 0.9477,
      "step": 9920
    },
    {
      "epoch": 0.5725080756806645,
      "grad_norm": 0.3022611439228058,
      "learning_rate": 9.215509411565695e-05,
      "loss": 0.9692,
      "step": 9925
    },
    {
      "epoch": 0.5727964928472543,
      "grad_norm": 0.31084057688713074,
      "learning_rate": 9.20547292246225e-05,
      "loss": 0.975,
      "step": 9930
    },
    {
      "epoch": 0.573084910013844,
      "grad_norm": 0.3119238615036011,
      "learning_rate": 9.195437238714755e-05,
      "loss": 0.9913,
      "step": 9935
    },
    {
      "epoch": 0.5733733271804338,
      "grad_norm": 0.2962389588356018,
      "learning_rate": 9.185402370495677e-05,
      "loss": 0.9217,
      "step": 9940
    },
    {
      "epoch": 0.5736617443470236,
      "grad_norm": 0.372999370098114,
      "learning_rate": 9.17536832797665e-05,
      "loss": 0.9033,
      "step": 9945
    },
    {
      "epoch": 0.5739501615136133,
      "grad_norm": 0.3164578974246979,
      "learning_rate": 9.165335121328477e-05,
      "loss": 0.9219,
      "step": 9950
    },
    {
      "epoch": 0.574238578680203,
      "grad_norm": 0.3055517375469208,
      "learning_rate": 9.155302760721112e-05,
      "loss": 0.9748,
      "step": 9955
    },
    {
      "epoch": 0.5745269958467928,
      "grad_norm": 0.32085710763931274,
      "learning_rate": 9.14527125632364e-05,
      "loss": 1.0123,
      "step": 9960
    },
    {
      "epoch": 0.5748154130133826,
      "grad_norm": 0.30235058069229126,
      "learning_rate": 9.135240618304301e-05,
      "loss": 0.9886,
      "step": 9965
    },
    {
      "epoch": 0.5751038301799724,
      "grad_norm": 0.3043424189090729,
      "learning_rate": 9.125210856830433e-05,
      "loss": 0.962,
      "step": 9970
    },
    {
      "epoch": 0.575392247346562,
      "grad_norm": 0.32040169835090637,
      "learning_rate": 9.115181982068506e-05,
      "loss": 0.9993,
      "step": 9975
    },
    {
      "epoch": 0.5756806645131518,
      "grad_norm": 0.3150070607662201,
      "learning_rate": 9.105154004184071e-05,
      "loss": 0.9307,
      "step": 9980
    },
    {
      "epoch": 0.5759690816797416,
      "grad_norm": 0.32180115580558777,
      "learning_rate": 9.095126933341793e-05,
      "loss": 0.9771,
      "step": 9985
    },
    {
      "epoch": 0.5762574988463314,
      "grad_norm": 0.3003450632095337,
      "learning_rate": 9.085100779705398e-05,
      "loss": 1.0007,
      "step": 9990
    },
    {
      "epoch": 0.576545916012921,
      "grad_norm": 0.3017977774143219,
      "learning_rate": 9.075075553437687e-05,
      "loss": 0.9714,
      "step": 9995
    },
    {
      "epoch": 0.5768343331795108,
      "grad_norm": 0.29309558868408203,
      "learning_rate": 9.065051264700527e-05,
      "loss": 0.8992,
      "step": 10000
    },
    {
      "epoch": 0.5771227503461006,
      "grad_norm": 0.2850026786327362,
      "learning_rate": 9.05502792365483e-05,
      "loss": 0.9094,
      "step": 10005
    },
    {
      "epoch": 0.5774111675126904,
      "grad_norm": 0.30001845955848694,
      "learning_rate": 9.045005540460552e-05,
      "loss": 0.9405,
      "step": 10010
    },
    {
      "epoch": 0.5776995846792801,
      "grad_norm": 0.29599326848983765,
      "learning_rate": 9.03498412527667e-05,
      "loss": 0.9898,
      "step": 10015
    },
    {
      "epoch": 0.5779880018458698,
      "grad_norm": 0.32731327414512634,
      "learning_rate": 9.024963688261186e-05,
      "loss": 1.0046,
      "step": 10020
    },
    {
      "epoch": 0.5782764190124596,
      "grad_norm": 0.28468936681747437,
      "learning_rate": 9.01494423957111e-05,
      "loss": 0.8711,
      "step": 10025
    },
    {
      "epoch": 0.5785648361790494,
      "grad_norm": 0.29854655265808105,
      "learning_rate": 9.004925789362446e-05,
      "loss": 0.9974,
      "step": 10030
    },
    {
      "epoch": 0.5788532533456391,
      "grad_norm": 0.28597164154052734,
      "learning_rate": 8.994908347790193e-05,
      "loss": 0.966,
      "step": 10035
    },
    {
      "epoch": 0.5791416705122289,
      "grad_norm": 0.293830007314682,
      "learning_rate": 8.984891925008321e-05,
      "loss": 0.9991,
      "step": 10040
    },
    {
      "epoch": 0.5794300876788187,
      "grad_norm": 0.30949392914772034,
      "learning_rate": 8.974876531169772e-05,
      "loss": 0.9751,
      "step": 10045
    },
    {
      "epoch": 0.5797185048454084,
      "grad_norm": 0.280025839805603,
      "learning_rate": 8.964862176426443e-05,
      "loss": 0.9497,
      "step": 10050
    },
    {
      "epoch": 0.5800069220119981,
      "grad_norm": 0.35741013288497925,
      "learning_rate": 8.954848870929171e-05,
      "loss": 0.9903,
      "step": 10055
    },
    {
      "epoch": 0.5802953391785879,
      "grad_norm": 0.3232552707195282,
      "learning_rate": 8.944836624827748e-05,
      "loss": 0.9788,
      "step": 10060
    },
    {
      "epoch": 0.5805837563451777,
      "grad_norm": 0.3030491769313812,
      "learning_rate": 8.93482544827087e-05,
      "loss": 0.9476,
      "step": 10065
    },
    {
      "epoch": 0.5808721735117675,
      "grad_norm": 0.2858760952949524,
      "learning_rate": 8.924815351406163e-05,
      "loss": 0.9144,
      "step": 10070
    },
    {
      "epoch": 0.5811605906783571,
      "grad_norm": 0.2993592321872711,
      "learning_rate": 8.914806344380156e-05,
      "loss": 0.9965,
      "step": 10075
    },
    {
      "epoch": 0.5814490078449469,
      "grad_norm": 0.2993376851081848,
      "learning_rate": 8.904798437338272e-05,
      "loss": 0.9078,
      "step": 10080
    },
    {
      "epoch": 0.5817374250115367,
      "grad_norm": 0.29207873344421387,
      "learning_rate": 8.89479164042482e-05,
      "loss": 0.9786,
      "step": 10085
    },
    {
      "epoch": 0.5820258421781265,
      "grad_norm": 0.29630357027053833,
      "learning_rate": 8.884785963782975e-05,
      "loss": 0.9935,
      "step": 10090
    },
    {
      "epoch": 0.5823142593447161,
      "grad_norm": 0.3105989694595337,
      "learning_rate": 8.874781417554797e-05,
      "loss": 0.9515,
      "step": 10095
    },
    {
      "epoch": 0.5826026765113059,
      "grad_norm": 0.31932732462882996,
      "learning_rate": 8.864778011881175e-05,
      "loss": 1.0112,
      "step": 10100
    },
    {
      "epoch": 0.5828910936778957,
      "grad_norm": 0.3012641668319702,
      "learning_rate": 8.854775756901866e-05,
      "loss": 0.9548,
      "step": 10105
    },
    {
      "epoch": 0.5831795108444855,
      "grad_norm": 0.3154539465904236,
      "learning_rate": 8.84477466275544e-05,
      "loss": 0.937,
      "step": 10110
    },
    {
      "epoch": 0.5834679280110752,
      "grad_norm": 0.31845125555992126,
      "learning_rate": 8.834774739579304e-05,
      "loss": 0.9737,
      "step": 10115
    },
    {
      "epoch": 0.583756345177665,
      "grad_norm": 0.29611679911613464,
      "learning_rate": 8.824775997509675e-05,
      "loss": 0.9389,
      "step": 10120
    },
    {
      "epoch": 0.5840447623442547,
      "grad_norm": 0.28183600306510925,
      "learning_rate": 8.814778446681563e-05,
      "loss": 0.9568,
      "step": 10125
    },
    {
      "epoch": 0.5843331795108445,
      "grad_norm": 0.2992686629295349,
      "learning_rate": 8.80478209722879e-05,
      "loss": 1.0136,
      "step": 10130
    },
    {
      "epoch": 0.5846215966774342,
      "grad_norm": 0.3107997477054596,
      "learning_rate": 8.794786959283938e-05,
      "loss": 0.9818,
      "step": 10135
    },
    {
      "epoch": 0.584910013844024,
      "grad_norm": 0.29976189136505127,
      "learning_rate": 8.784793042978384e-05,
      "loss": 1.0196,
      "step": 10140
    },
    {
      "epoch": 0.5851984310106138,
      "grad_norm": 0.26917529106140137,
      "learning_rate": 8.774800358442246e-05,
      "loss": 0.9463,
      "step": 10145
    },
    {
      "epoch": 0.5854868481772035,
      "grad_norm": 0.295770525932312,
      "learning_rate": 8.764808915804401e-05,
      "loss": 0.9743,
      "step": 10150
    },
    {
      "epoch": 0.5857752653437933,
      "grad_norm": 0.2809622883796692,
      "learning_rate": 8.754818725192475e-05,
      "loss": 0.9612,
      "step": 10155
    },
    {
      "epoch": 0.586063682510383,
      "grad_norm": 0.30057060718536377,
      "learning_rate": 8.744829796732812e-05,
      "loss": 0.9618,
      "step": 10160
    },
    {
      "epoch": 0.5863520996769728,
      "grad_norm": 0.31506556272506714,
      "learning_rate": 8.734842140550488e-05,
      "loss": 0.99,
      "step": 10165
    },
    {
      "epoch": 0.5866405168435626,
      "grad_norm": 0.2896447777748108,
      "learning_rate": 8.724855766769282e-05,
      "loss": 0.9272,
      "step": 10170
    },
    {
      "epoch": 0.5869289340101523,
      "grad_norm": 0.32177969813346863,
      "learning_rate": 8.714870685511676e-05,
      "loss": 0.9798,
      "step": 10175
    },
    {
      "epoch": 0.587217351176742,
      "grad_norm": 0.33434587717056274,
      "learning_rate": 8.70488690689884e-05,
      "loss": 1.0404,
      "step": 10180
    },
    {
      "epoch": 0.5875057683433318,
      "grad_norm": 0.28400492668151855,
      "learning_rate": 8.694904441050625e-05,
      "loss": 0.9206,
      "step": 10185
    },
    {
      "epoch": 0.5877941855099216,
      "grad_norm": 0.3033732771873474,
      "learning_rate": 8.684923298085555e-05,
      "loss": 0.9642,
      "step": 10190
    },
    {
      "epoch": 0.5880826026765114,
      "grad_norm": 0.3117521405220032,
      "learning_rate": 8.674943488120801e-05,
      "loss": 1.0299,
      "step": 10195
    },
    {
      "epoch": 0.588371019843101,
      "grad_norm": 0.2808005213737488,
      "learning_rate": 8.6649650212722e-05,
      "loss": 0.9186,
      "step": 10200
    },
    {
      "epoch": 0.5886594370096908,
      "grad_norm": 0.306206613779068,
      "learning_rate": 8.654987907654214e-05,
      "loss": 0.9317,
      "step": 10205
    },
    {
      "epoch": 0.5889478541762806,
      "grad_norm": 0.28762176632881165,
      "learning_rate": 8.645012157379941e-05,
      "loss": 0.9352,
      "step": 10210
    },
    {
      "epoch": 0.5892362713428704,
      "grad_norm": 0.3348996639251709,
      "learning_rate": 8.635037780561093e-05,
      "loss": 0.9577,
      "step": 10215
    },
    {
      "epoch": 0.58952468850946,
      "grad_norm": 0.3033459186553955,
      "learning_rate": 8.625064787307986e-05,
      "loss": 0.9789,
      "step": 10220
    },
    {
      "epoch": 0.5898131056760498,
      "grad_norm": 0.28775766491889954,
      "learning_rate": 8.615093187729544e-05,
      "loss": 0.9979,
      "step": 10225
    },
    {
      "epoch": 0.5901015228426396,
      "grad_norm": 0.312083899974823,
      "learning_rate": 8.605122991933271e-05,
      "loss": 0.8772,
      "step": 10230
    },
    {
      "epoch": 0.5903899400092294,
      "grad_norm": 0.3176785409450531,
      "learning_rate": 8.595154210025251e-05,
      "loss": 0.9501,
      "step": 10235
    },
    {
      "epoch": 0.5906783571758191,
      "grad_norm": 0.3222027122974396,
      "learning_rate": 8.585186852110134e-05,
      "loss": 0.9476,
      "step": 10240
    },
    {
      "epoch": 0.5909667743424089,
      "grad_norm": 0.2980990707874298,
      "learning_rate": 8.575220928291123e-05,
      "loss": 1.0012,
      "step": 10245
    },
    {
      "epoch": 0.5912551915089986,
      "grad_norm": 0.3284415006637573,
      "learning_rate": 8.565256448669976e-05,
      "loss": 0.9454,
      "step": 10250
    },
    {
      "epoch": 0.5915436086755884,
      "grad_norm": 0.41807207465171814,
      "learning_rate": 8.555293423346977e-05,
      "loss": 1.0065,
      "step": 10255
    },
    {
      "epoch": 0.5918320258421781,
      "grad_norm": 0.3130856156349182,
      "learning_rate": 8.545331862420944e-05,
      "loss": 1.0001,
      "step": 10260
    },
    {
      "epoch": 0.5921204430087679,
      "grad_norm": 0.3069097697734833,
      "learning_rate": 8.535371775989204e-05,
      "loss": 0.9449,
      "step": 10265
    },
    {
      "epoch": 0.5924088601753577,
      "grad_norm": 0.38203707337379456,
      "learning_rate": 8.525413174147598e-05,
      "loss": 0.9914,
      "step": 10270
    },
    {
      "epoch": 0.5926972773419474,
      "grad_norm": 0.3196433186531067,
      "learning_rate": 8.515456066990455e-05,
      "loss": 0.9902,
      "step": 10275
    },
    {
      "epoch": 0.5929856945085371,
      "grad_norm": 0.294821172952652,
      "learning_rate": 8.505500464610584e-05,
      "loss": 0.9215,
      "step": 10280
    },
    {
      "epoch": 0.5932741116751269,
      "grad_norm": 0.28446537256240845,
      "learning_rate": 8.495546377099287e-05,
      "loss": 1.0246,
      "step": 10285
    },
    {
      "epoch": 0.5935625288417167,
      "grad_norm": 0.2978392243385315,
      "learning_rate": 8.485593814546307e-05,
      "loss": 0.9981,
      "step": 10290
    },
    {
      "epoch": 0.5938509460083065,
      "grad_norm": 0.30145201086997986,
      "learning_rate": 8.475642787039867e-05,
      "loss": 0.9243,
      "step": 10295
    },
    {
      "epoch": 0.5941393631748961,
      "grad_norm": 0.3130795955657959,
      "learning_rate": 8.465693304666606e-05,
      "loss": 1.0306,
      "step": 10300
    },
    {
      "epoch": 0.5944277803414859,
      "grad_norm": 0.3145841062068939,
      "learning_rate": 8.455745377511626e-05,
      "loss": 1.0172,
      "step": 10305
    },
    {
      "epoch": 0.5947161975080757,
      "grad_norm": 0.3414532244205475,
      "learning_rate": 8.445799015658427e-05,
      "loss": 0.9902,
      "step": 10310
    },
    {
      "epoch": 0.5950046146746655,
      "grad_norm": 0.2802201807498932,
      "learning_rate": 8.435854229188934e-05,
      "loss": 0.9086,
      "step": 10315
    },
    {
      "epoch": 0.5952930318412551,
      "grad_norm": 0.31181442737579346,
      "learning_rate": 8.425911028183479e-05,
      "loss": 0.9267,
      "step": 10320
    },
    {
      "epoch": 0.5955814490078449,
      "grad_norm": 0.3048861026763916,
      "learning_rate": 8.415969422720778e-05,
      "loss": 0.965,
      "step": 10325
    },
    {
      "epoch": 0.5958698661744347,
      "grad_norm": 0.28428834676742554,
      "learning_rate": 8.406029422877937e-05,
      "loss": 0.8888,
      "step": 10330
    },
    {
      "epoch": 0.5961582833410245,
      "grad_norm": 0.3297795355319977,
      "learning_rate": 8.396091038730431e-05,
      "loss": 0.9079,
      "step": 10335
    },
    {
      "epoch": 0.5964467005076142,
      "grad_norm": 0.331345796585083,
      "learning_rate": 8.386154280352094e-05,
      "loss": 0.8413,
      "step": 10340
    },
    {
      "epoch": 0.596735117674204,
      "grad_norm": 0.4152957499027252,
      "learning_rate": 8.37621915781512e-05,
      "loss": 0.9664,
      "step": 10345
    },
    {
      "epoch": 0.5970235348407937,
      "grad_norm": 0.31456562876701355,
      "learning_rate": 8.366285681190039e-05,
      "loss": 0.9788,
      "step": 10350
    },
    {
      "epoch": 0.5973119520073835,
      "grad_norm": 0.29719334840774536,
      "learning_rate": 8.356353860545715e-05,
      "loss": 0.9616,
      "step": 10355
    },
    {
      "epoch": 0.5976003691739732,
      "grad_norm": 0.3752502501010895,
      "learning_rate": 8.34642370594933e-05,
      "loss": 0.9854,
      "step": 10360
    },
    {
      "epoch": 0.597888786340563,
      "grad_norm": 0.3314830958843231,
      "learning_rate": 8.336495227466385e-05,
      "loss": 0.92,
      "step": 10365
    },
    {
      "epoch": 0.5981772035071528,
      "grad_norm": 0.2938712239265442,
      "learning_rate": 8.326568435160677e-05,
      "loss": 0.8947,
      "step": 10370
    },
    {
      "epoch": 0.5984656206737425,
      "grad_norm": 0.28025558590888977,
      "learning_rate": 8.316643339094285e-05,
      "loss": 0.9338,
      "step": 10375
    },
    {
      "epoch": 0.5987540378403322,
      "grad_norm": 0.31624218821525574,
      "learning_rate": 8.306719949327588e-05,
      "loss": 0.9782,
      "step": 10380
    },
    {
      "epoch": 0.599042455006922,
      "grad_norm": 0.3080288767814636,
      "learning_rate": 8.296798275919217e-05,
      "loss": 0.9119,
      "step": 10385
    },
    {
      "epoch": 0.5993308721735118,
      "grad_norm": 0.31243014335632324,
      "learning_rate": 8.286878328926077e-05,
      "loss": 0.9489,
      "step": 10390
    },
    {
      "epoch": 0.5996192893401016,
      "grad_norm": 0.28355371952056885,
      "learning_rate": 8.276960118403314e-05,
      "loss": 0.9557,
      "step": 10395
    },
    {
      "epoch": 0.5999077065066912,
      "grad_norm": 0.30809932947158813,
      "learning_rate": 8.26704365440432e-05,
      "loss": 0.9632,
      "step": 10400
    },
    {
      "epoch": 0.600196123673281,
      "grad_norm": 0.32145076990127563,
      "learning_rate": 8.257128946980713e-05,
      "loss": 0.9607,
      "step": 10405
    },
    {
      "epoch": 0.6004845408398708,
      "grad_norm": 0.30059847235679626,
      "learning_rate": 8.247216006182326e-05,
      "loss": 0.9015,
      "step": 10410
    },
    {
      "epoch": 0.6007729580064606,
      "grad_norm": 0.2907862067222595,
      "learning_rate": 8.237304842057214e-05,
      "loss": 0.9672,
      "step": 10415
    },
    {
      "epoch": 0.6010613751730502,
      "grad_norm": 0.27866873145103455,
      "learning_rate": 8.227395464651618e-05,
      "loss": 0.9158,
      "step": 10420
    },
    {
      "epoch": 0.60134979233964,
      "grad_norm": 0.33694329857826233,
      "learning_rate": 8.217487884009979e-05,
      "loss": 0.9546,
      "step": 10425
    },
    {
      "epoch": 0.6016382095062298,
      "grad_norm": 0.28351861238479614,
      "learning_rate": 8.20758211017491e-05,
      "loss": 1.0306,
      "step": 10430
    },
    {
      "epoch": 0.6019266266728196,
      "grad_norm": 0.3070996105670929,
      "learning_rate": 8.197678153187185e-05,
      "loss": 1.0044,
      "step": 10435
    },
    {
      "epoch": 0.6022150438394093,
      "grad_norm": 0.29808667302131653,
      "learning_rate": 8.187776023085762e-05,
      "loss": 0.9833,
      "step": 10440
    },
    {
      "epoch": 0.602503461005999,
      "grad_norm": 0.2630424499511719,
      "learning_rate": 8.177875729907714e-05,
      "loss": 0.8795,
      "step": 10445
    },
    {
      "epoch": 0.6027918781725888,
      "grad_norm": 0.3080122470855713,
      "learning_rate": 8.167977283688282e-05,
      "loss": 0.9663,
      "step": 10450
    },
    {
      "epoch": 0.6030802953391786,
      "grad_norm": 0.3310147821903229,
      "learning_rate": 8.158080694460807e-05,
      "loss": 0.9654,
      "step": 10455
    },
    {
      "epoch": 0.6033687125057683,
      "grad_norm": 0.27639147639274597,
      "learning_rate": 8.148185972256778e-05,
      "loss": 0.9594,
      "step": 10460
    },
    {
      "epoch": 0.6036571296723581,
      "grad_norm": 0.298957496881485,
      "learning_rate": 8.138293127105768e-05,
      "loss": 0.9537,
      "step": 10465
    },
    {
      "epoch": 0.6039455468389479,
      "grad_norm": 0.28867480158805847,
      "learning_rate": 8.128402169035451e-05,
      "loss": 0.903,
      "step": 10470
    },
    {
      "epoch": 0.6042339640055376,
      "grad_norm": 0.3202032148838043,
      "learning_rate": 8.118513108071599e-05,
      "loss": 0.8776,
      "step": 10475
    },
    {
      "epoch": 0.6045223811721273,
      "grad_norm": 0.29487964510917664,
      "learning_rate": 8.108625954238051e-05,
      "loss": 0.9058,
      "step": 10480
    },
    {
      "epoch": 0.6048107983387171,
      "grad_norm": 0.29023751616477966,
      "learning_rate": 8.098740717556718e-05,
      "loss": 0.9536,
      "step": 10485
    },
    {
      "epoch": 0.6050992155053069,
      "grad_norm": 0.28486311435699463,
      "learning_rate": 8.088857408047562e-05,
      "loss": 0.8713,
      "step": 10490
    },
    {
      "epoch": 0.6053876326718967,
      "grad_norm": 0.30855536460876465,
      "learning_rate": 8.0789760357286e-05,
      "loss": 0.9776,
      "step": 10495
    },
    {
      "epoch": 0.6056760498384864,
      "grad_norm": 0.3332061767578125,
      "learning_rate": 8.06909661061588e-05,
      "loss": 0.9547,
      "step": 10500
    },
    {
      "epoch": 0.6059644670050761,
      "grad_norm": 0.3112873435020447,
      "learning_rate": 8.059219142723474e-05,
      "loss": 0.964,
      "step": 10505
    },
    {
      "epoch": 0.6062528841716659,
      "grad_norm": 0.3010444641113281,
      "learning_rate": 8.049343642063477e-05,
      "loss": 0.9752,
      "step": 10510
    },
    {
      "epoch": 0.6065413013382557,
      "grad_norm": 0.33293378353118896,
      "learning_rate": 8.039470118645982e-05,
      "loss": 0.9532,
      "step": 10515
    },
    {
      "epoch": 0.6068297185048455,
      "grad_norm": 0.2785302698612213,
      "learning_rate": 8.029598582479088e-05,
      "loss": 1.0026,
      "step": 10520
    },
    {
      "epoch": 0.6071181356714351,
      "grad_norm": 0.2951408326625824,
      "learning_rate": 8.019729043568866e-05,
      "loss": 0.9447,
      "step": 10525
    },
    {
      "epoch": 0.6074065528380249,
      "grad_norm": 0.3003128170967102,
      "learning_rate": 8.00986151191938e-05,
      "loss": 1.0167,
      "step": 10530
    },
    {
      "epoch": 0.6076949700046147,
      "grad_norm": 0.2881382405757904,
      "learning_rate": 7.999995997532641e-05,
      "loss": 0.9712,
      "step": 10535
    },
    {
      "epoch": 0.6079833871712045,
      "grad_norm": 0.3272612392902374,
      "learning_rate": 7.990132510408625e-05,
      "loss": 0.932,
      "step": 10540
    },
    {
      "epoch": 0.6082718043377942,
      "grad_norm": 0.29652857780456543,
      "learning_rate": 7.980271060545255e-05,
      "loss": 0.9897,
      "step": 10545
    },
    {
      "epoch": 0.6085602215043839,
      "grad_norm": 0.2992599308490753,
      "learning_rate": 7.970411657938381e-05,
      "loss": 0.9029,
      "step": 10550
    },
    {
      "epoch": 0.6088486386709737,
      "grad_norm": 0.285044401884079,
      "learning_rate": 7.960554312581789e-05,
      "loss": 0.9396,
      "step": 10555
    },
    {
      "epoch": 0.6091370558375635,
      "grad_norm": 0.2942273020744324,
      "learning_rate": 7.95069903446717e-05,
      "loss": 0.9674,
      "step": 10560
    },
    {
      "epoch": 0.6094254730041532,
      "grad_norm": 0.30003759264945984,
      "learning_rate": 7.940845833584123e-05,
      "loss": 0.9442,
      "step": 10565
    },
    {
      "epoch": 0.609713890170743,
      "grad_norm": 0.2985435426235199,
      "learning_rate": 7.930994719920142e-05,
      "loss": 0.9265,
      "step": 10570
    },
    {
      "epoch": 0.6100023073373327,
      "grad_norm": 0.29074397683143616,
      "learning_rate": 7.921145703460603e-05,
      "loss": 1.0108,
      "step": 10575
    },
    {
      "epoch": 0.6102907245039225,
      "grad_norm": 0.35004884004592896,
      "learning_rate": 7.911298794188761e-05,
      "loss": 0.9595,
      "step": 10580
    },
    {
      "epoch": 0.6105791416705122,
      "grad_norm": 0.3128565549850464,
      "learning_rate": 7.901454002085731e-05,
      "loss": 0.9111,
      "step": 10585
    },
    {
      "epoch": 0.610867558837102,
      "grad_norm": 0.34627801179885864,
      "learning_rate": 7.891611337130482e-05,
      "loss": 0.9754,
      "step": 10590
    },
    {
      "epoch": 0.6111559760036918,
      "grad_norm": 0.28770461678504944,
      "learning_rate": 7.881770809299833e-05,
      "loss": 0.9568,
      "step": 10595
    },
    {
      "epoch": 0.6114443931702815,
      "grad_norm": 0.43506577610969543,
      "learning_rate": 7.871932428568418e-05,
      "loss": 0.942,
      "step": 10600
    },
    {
      "epoch": 0.6117328103368712,
      "grad_norm": 0.2795199751853943,
      "learning_rate": 7.862096204908726e-05,
      "loss": 0.9539,
      "step": 10605
    },
    {
      "epoch": 0.612021227503461,
      "grad_norm": 0.28520259261131287,
      "learning_rate": 7.852262148291028e-05,
      "loss": 0.9625,
      "step": 10610
    },
    {
      "epoch": 0.6123096446700508,
      "grad_norm": 0.3216764032840729,
      "learning_rate": 7.842430268683422e-05,
      "loss": 1.0665,
      "step": 10615
    },
    {
      "epoch": 0.6125980618366406,
      "grad_norm": 0.29916471242904663,
      "learning_rate": 7.832600576051779e-05,
      "loss": 0.9015,
      "step": 10620
    },
    {
      "epoch": 0.6128864790032302,
      "grad_norm": 0.2716759443283081,
      "learning_rate": 7.822773080359778e-05,
      "loss": 0.9538,
      "step": 10625
    },
    {
      "epoch": 0.61317489616982,
      "grad_norm": 0.31317955255508423,
      "learning_rate": 7.812947791568845e-05,
      "loss": 1.0093,
      "step": 10630
    },
    {
      "epoch": 0.6134633133364098,
      "grad_norm": 0.3098663091659546,
      "learning_rate": 7.803124719638184e-05,
      "loss": 0.9008,
      "step": 10635
    },
    {
      "epoch": 0.6137517305029996,
      "grad_norm": 0.30039840936660767,
      "learning_rate": 7.793303874524752e-05,
      "loss": 0.9435,
      "step": 10640
    },
    {
      "epoch": 0.6140401476695893,
      "grad_norm": 0.28601938486099243,
      "learning_rate": 7.783485266183242e-05,
      "loss": 0.9447,
      "step": 10645
    },
    {
      "epoch": 0.614328564836179,
      "grad_norm": 0.31414878368377686,
      "learning_rate": 7.773668904566085e-05,
      "loss": 0.9465,
      "step": 10650
    },
    {
      "epoch": 0.6146169820027688,
      "grad_norm": 0.2823723554611206,
      "learning_rate": 7.763854799623433e-05,
      "loss": 0.9305,
      "step": 10655
    },
    {
      "epoch": 0.6149053991693586,
      "grad_norm": 0.28703320026397705,
      "learning_rate": 7.75404296130315e-05,
      "loss": 0.9711,
      "step": 10660
    },
    {
      "epoch": 0.6151938163359483,
      "grad_norm": 0.36116015911102295,
      "learning_rate": 7.744233399550806e-05,
      "loss": 0.967,
      "step": 10665
    },
    {
      "epoch": 0.6154822335025381,
      "grad_norm": 0.293555349111557,
      "learning_rate": 7.734426124309656e-05,
      "loss": 1.0009,
      "step": 10670
    },
    {
      "epoch": 0.6157706506691278,
      "grad_norm": 0.3458692729473114,
      "learning_rate": 7.724621145520645e-05,
      "loss": 1.0352,
      "step": 10675
    },
    {
      "epoch": 0.6160590678357176,
      "grad_norm": 0.2964923679828644,
      "learning_rate": 7.714818473122385e-05,
      "loss": 0.9484,
      "step": 10680
    },
    {
      "epoch": 0.6163474850023073,
      "grad_norm": 0.30109351873397827,
      "learning_rate": 7.705018117051157e-05,
      "loss": 0.9068,
      "step": 10685
    },
    {
      "epoch": 0.6166359021688971,
      "grad_norm": 0.26956018805503845,
      "learning_rate": 7.695220087240885e-05,
      "loss": 0.9494,
      "step": 10690
    },
    {
      "epoch": 0.6169243193354869,
      "grad_norm": 0.3016825020313263,
      "learning_rate": 7.685424393623137e-05,
      "loss": 0.9229,
      "step": 10695
    },
    {
      "epoch": 0.6172127365020766,
      "grad_norm": 0.284314900636673,
      "learning_rate": 7.675631046127123e-05,
      "loss": 0.9199,
      "step": 10700
    },
    {
      "epoch": 0.6175011536686663,
      "grad_norm": 0.2942012548446655,
      "learning_rate": 7.665840054679659e-05,
      "loss": 0.925,
      "step": 10705
    },
    {
      "epoch": 0.6177895708352561,
      "grad_norm": 0.33849096298217773,
      "learning_rate": 7.656051429205188e-05,
      "loss": 0.9594,
      "step": 10710
    },
    {
      "epoch": 0.6180779880018459,
      "grad_norm": 0.3064514398574829,
      "learning_rate": 7.646265179625743e-05,
      "loss": 0.9324,
      "step": 10715
    },
    {
      "epoch": 0.6183664051684357,
      "grad_norm": 0.3193131387233734,
      "learning_rate": 7.636481315860958e-05,
      "loss": 0.9513,
      "step": 10720
    },
    {
      "epoch": 0.6186548223350253,
      "grad_norm": 0.28184202313423157,
      "learning_rate": 7.626699847828042e-05,
      "loss": 0.9926,
      "step": 10725
    },
    {
      "epoch": 0.6189432395016151,
      "grad_norm": 0.28118598461151123,
      "learning_rate": 7.616920785441777e-05,
      "loss": 0.8637,
      "step": 10730
    },
    {
      "epoch": 0.6192316566682049,
      "grad_norm": 0.33061110973358154,
      "learning_rate": 7.607144138614509e-05,
      "loss": 0.9664,
      "step": 10735
    },
    {
      "epoch": 0.6195200738347947,
      "grad_norm": 0.2889274060726166,
      "learning_rate": 7.597369917256132e-05,
      "loss": 0.9826,
      "step": 10740
    },
    {
      "epoch": 0.6198084910013844,
      "grad_norm": 0.28369957208633423,
      "learning_rate": 7.587598131274088e-05,
      "loss": 0.9739,
      "step": 10745
    },
    {
      "epoch": 0.6200969081679741,
      "grad_norm": 0.28314968943595886,
      "learning_rate": 7.577828790573345e-05,
      "loss": 0.9097,
      "step": 10750
    },
    {
      "epoch": 0.6203853253345639,
      "grad_norm": 0.2938774824142456,
      "learning_rate": 7.568061905056383e-05,
      "loss": 0.9658,
      "step": 10755
    },
    {
      "epoch": 0.6206737425011537,
      "grad_norm": 0.270669549703598,
      "learning_rate": 7.55829748462322e-05,
      "loss": 0.934,
      "step": 10760
    },
    {
      "epoch": 0.6209621596677434,
      "grad_norm": 0.3004055321216583,
      "learning_rate": 7.548535539171343e-05,
      "loss": 0.96,
      "step": 10765
    },
    {
      "epoch": 0.6212505768343332,
      "grad_norm": 0.28886789083480835,
      "learning_rate": 7.538776078595762e-05,
      "loss": 0.9317,
      "step": 10770
    },
    {
      "epoch": 0.6215389940009229,
      "grad_norm": 0.3282342553138733,
      "learning_rate": 7.529019112788937e-05,
      "loss": 0.9816,
      "step": 10775
    },
    {
      "epoch": 0.6218274111675127,
      "grad_norm": 0.31142762303352356,
      "learning_rate": 7.519264651640829e-05,
      "loss": 0.9848,
      "step": 10780
    },
    {
      "epoch": 0.6221158283341024,
      "grad_norm": 0.31296736001968384,
      "learning_rate": 7.50951270503884e-05,
      "loss": 0.9546,
      "step": 10785
    },
    {
      "epoch": 0.6224042455006922,
      "grad_norm": 0.2920689582824707,
      "learning_rate": 7.499763282867823e-05,
      "loss": 1.0018,
      "step": 10790
    },
    {
      "epoch": 0.622692662667282,
      "grad_norm": 0.3036629259586334,
      "learning_rate": 7.490016395010087e-05,
      "loss": 1.0164,
      "step": 10795
    },
    {
      "epoch": 0.6229810798338717,
      "grad_norm": 0.3179445266723633,
      "learning_rate": 7.480272051345358e-05,
      "loss": 0.9614,
      "step": 10800
    },
    {
      "epoch": 0.6232694970004614,
      "grad_norm": 0.33129721879959106,
      "learning_rate": 7.470530261750793e-05,
      "loss": 0.9597,
      "step": 10805
    },
    {
      "epoch": 0.6235579141670512,
      "grad_norm": 0.27982205152511597,
      "learning_rate": 7.460791036100952e-05,
      "loss": 1.0011,
      "step": 10810
    },
    {
      "epoch": 0.623846331333641,
      "grad_norm": 0.2774750292301178,
      "learning_rate": 7.451054384267802e-05,
      "loss": 0.953,
      "step": 10815
    },
    {
      "epoch": 0.6241347485002308,
      "grad_norm": 0.3093210458755493,
      "learning_rate": 7.4413203161207e-05,
      "loss": 0.9847,
      "step": 10820
    },
    {
      "epoch": 0.6244231656668204,
      "grad_norm": 0.32347777485847473,
      "learning_rate": 7.431588841526378e-05,
      "loss": 1.0336,
      "step": 10825
    },
    {
      "epoch": 0.6247115828334102,
      "grad_norm": 0.2853950262069702,
      "learning_rate": 7.421859970348949e-05,
      "loss": 0.9333,
      "step": 10830
    },
    {
      "epoch": 0.625,
      "grad_norm": 0.31536683440208435,
      "learning_rate": 7.412133712449879e-05,
      "loss": 0.9859,
      "step": 10835
    },
    {
      "epoch": 0.6252884171665898,
      "grad_norm": 0.3622225522994995,
      "learning_rate": 7.402410077687993e-05,
      "loss": 0.9806,
      "step": 10840
    },
    {
      "epoch": 0.6255768343331796,
      "grad_norm": 0.27675193548202515,
      "learning_rate": 7.39268907591945e-05,
      "loss": 0.9244,
      "step": 10845
    },
    {
      "epoch": 0.6258652514997692,
      "grad_norm": 0.3075655698776245,
      "learning_rate": 7.382970716997736e-05,
      "loss": 0.9561,
      "step": 10850
    },
    {
      "epoch": 0.626153668666359,
      "grad_norm": 0.28471189737319946,
      "learning_rate": 7.373255010773677e-05,
      "loss": 1.0043,
      "step": 10855
    },
    {
      "epoch": 0.6264420858329488,
      "grad_norm": 0.3159666657447815,
      "learning_rate": 7.363541967095387e-05,
      "loss": 0.9872,
      "step": 10860
    },
    {
      "epoch": 0.6267305029995386,
      "grad_norm": 0.30512797832489014,
      "learning_rate": 7.353831595808298e-05,
      "loss": 0.9348,
      "step": 10865
    },
    {
      "epoch": 0.6270189201661283,
      "grad_norm": 0.29550230503082275,
      "learning_rate": 7.344123906755124e-05,
      "loss": 1.0045,
      "step": 10870
    },
    {
      "epoch": 0.627307337332718,
      "grad_norm": 0.2956281900405884,
      "learning_rate": 7.334418909775866e-05,
      "loss": 0.9955,
      "step": 10875
    },
    {
      "epoch": 0.6275957544993078,
      "grad_norm": 0.3497249484062195,
      "learning_rate": 7.324716614707793e-05,
      "loss": 0.9717,
      "step": 10880
    },
    {
      "epoch": 0.6278841716658976,
      "grad_norm": 0.3222239017486572,
      "learning_rate": 7.315017031385434e-05,
      "loss": 0.9774,
      "step": 10885
    },
    {
      "epoch": 0.6281725888324873,
      "grad_norm": 0.2839806377887726,
      "learning_rate": 7.305320169640575e-05,
      "loss": 0.8747,
      "step": 10890
    },
    {
      "epoch": 0.6284610059990771,
      "grad_norm": 0.28460511565208435,
      "learning_rate": 7.295626039302234e-05,
      "loss": 0.9222,
      "step": 10895
    },
    {
      "epoch": 0.6287494231656668,
      "grad_norm": 0.28321826457977295,
      "learning_rate": 7.285934650196672e-05,
      "loss": 0.9395,
      "step": 10900
    },
    {
      "epoch": 0.6290378403322566,
      "grad_norm": 0.30483925342559814,
      "learning_rate": 7.27624601214736e-05,
      "loss": 0.9941,
      "step": 10905
    },
    {
      "epoch": 0.6293262574988463,
      "grad_norm": 0.32150912284851074,
      "learning_rate": 7.266560134974989e-05,
      "loss": 0.9828,
      "step": 10910
    },
    {
      "epoch": 0.6296146746654361,
      "grad_norm": 0.2850598990917206,
      "learning_rate": 7.25687702849745e-05,
      "loss": 0.9422,
      "step": 10915
    },
    {
      "epoch": 0.6299030918320259,
      "grad_norm": 0.3211795687675476,
      "learning_rate": 7.247196702529815e-05,
      "loss": 0.9291,
      "step": 10920
    },
    {
      "epoch": 0.6301915089986156,
      "grad_norm": 0.28331828117370605,
      "learning_rate": 7.23751916688436e-05,
      "loss": 0.9529,
      "step": 10925
    },
    {
      "epoch": 0.6304799261652053,
      "grad_norm": 0.2783966660499573,
      "learning_rate": 7.227844431370502e-05,
      "loss": 0.9945,
      "step": 10930
    },
    {
      "epoch": 0.6307683433317951,
      "grad_norm": 0.28569692373275757,
      "learning_rate": 7.218172505794854e-05,
      "loss": 0.9894,
      "step": 10935
    },
    {
      "epoch": 0.6310567604983849,
      "grad_norm": 0.2852317988872528,
      "learning_rate": 7.208503399961149e-05,
      "loss": 0.9604,
      "step": 10940
    },
    {
      "epoch": 0.6313451776649747,
      "grad_norm": 0.29289114475250244,
      "learning_rate": 7.19883712367029e-05,
      "loss": 0.9199,
      "step": 10945
    },
    {
      "epoch": 0.6316335948315643,
      "grad_norm": 0.2972031831741333,
      "learning_rate": 7.189173686720287e-05,
      "loss": 0.988,
      "step": 10950
    },
    {
      "epoch": 0.6319220119981541,
      "grad_norm": 0.31643036007881165,
      "learning_rate": 7.179513098906286e-05,
      "loss": 0.9148,
      "step": 10955
    },
    {
      "epoch": 0.6322104291647439,
      "grad_norm": 0.31982874870300293,
      "learning_rate": 7.169855370020547e-05,
      "loss": 0.9133,
      "step": 10960
    },
    {
      "epoch": 0.6324988463313337,
      "grad_norm": 0.2920014560222626,
      "learning_rate": 7.160200509852419e-05,
      "loss": 0.9445,
      "step": 10965
    },
    {
      "epoch": 0.6327872634979234,
      "grad_norm": 0.31772705912590027,
      "learning_rate": 7.15054852818836e-05,
      "loss": 0.9335,
      "step": 10970
    },
    {
      "epoch": 0.6330756806645131,
      "grad_norm": 0.2917129099369049,
      "learning_rate": 7.140899434811898e-05,
      "loss": 0.9335,
      "step": 10975
    },
    {
      "epoch": 0.6333640978311029,
      "grad_norm": 0.3037589192390442,
      "learning_rate": 7.131253239503635e-05,
      "loss": 0.9397,
      "step": 10980
    },
    {
      "epoch": 0.6336525149976927,
      "grad_norm": 0.28663626313209534,
      "learning_rate": 7.121609952041241e-05,
      "loss": 0.9181,
      "step": 10985
    },
    {
      "epoch": 0.6339409321642824,
      "grad_norm": 0.288944810628891,
      "learning_rate": 7.111969582199431e-05,
      "loss": 0.9224,
      "step": 10990
    },
    {
      "epoch": 0.6342293493308722,
      "grad_norm": 0.31760546565055847,
      "learning_rate": 7.102332139749975e-05,
      "loss": 0.9432,
      "step": 10995
    },
    {
      "epoch": 0.6345177664974619,
      "grad_norm": 0.3294132947921753,
      "learning_rate": 7.092697634461654e-05,
      "loss": 0.9897,
      "step": 11000
    },
    {
      "epoch": 0.6348061836640517,
      "grad_norm": 0.3404971957206726,
      "learning_rate": 7.083066076100299e-05,
      "loss": 0.9885,
      "step": 11005
    },
    {
      "epoch": 0.6350946008306414,
      "grad_norm": 0.3096059262752533,
      "learning_rate": 7.073437474428732e-05,
      "loss": 0.9761,
      "step": 11010
    },
    {
      "epoch": 0.6353830179972312,
      "grad_norm": 0.29714658856391907,
      "learning_rate": 7.063811839206785e-05,
      "loss": 0.9564,
      "step": 11015
    },
    {
      "epoch": 0.635671435163821,
      "grad_norm": 0.3057291507720947,
      "learning_rate": 7.05418918019129e-05,
      "loss": 0.9389,
      "step": 11020
    },
    {
      "epoch": 0.6359598523304107,
      "grad_norm": 0.29941415786743164,
      "learning_rate": 7.044569507136052e-05,
      "loss": 0.954,
      "step": 11025
    },
    {
      "epoch": 0.6362482694970004,
      "grad_norm": 0.3494766056537628,
      "learning_rate": 7.034952829791858e-05,
      "loss": 0.995,
      "step": 11030
    },
    {
      "epoch": 0.6365366866635902,
      "grad_norm": 0.31812405586242676,
      "learning_rate": 7.025339157906448e-05,
      "loss": 0.985,
      "step": 11035
    },
    {
      "epoch": 0.63682510383018,
      "grad_norm": 0.3052515387535095,
      "learning_rate": 7.01572850122453e-05,
      "loss": 0.9408,
      "step": 11040
    },
    {
      "epoch": 0.6371135209967698,
      "grad_norm": 0.2897067368030548,
      "learning_rate": 7.00612086948774e-05,
      "loss": 0.8831,
      "step": 11045
    },
    {
      "epoch": 0.6374019381633594,
      "grad_norm": 0.30502036213874817,
      "learning_rate": 6.996516272434658e-05,
      "loss": 1.0114,
      "step": 11050
    },
    {
      "epoch": 0.6376903553299492,
      "grad_norm": 0.29145947098731995,
      "learning_rate": 6.986914719800788e-05,
      "loss": 0.9856,
      "step": 11055
    },
    {
      "epoch": 0.637978772496539,
      "grad_norm": 0.3128416836261749,
      "learning_rate": 6.97731622131854e-05,
      "loss": 0.9045,
      "step": 11060
    },
    {
      "epoch": 0.6382671896631288,
      "grad_norm": 0.3100086748600006,
      "learning_rate": 6.96772078671724e-05,
      "loss": 0.9582,
      "step": 11065
    },
    {
      "epoch": 0.6385556068297185,
      "grad_norm": 0.29209864139556885,
      "learning_rate": 6.9581284257231e-05,
      "loss": 0.9107,
      "step": 11070
    },
    {
      "epoch": 0.6388440239963082,
      "grad_norm": 0.31914541125297546,
      "learning_rate": 6.948539148059211e-05,
      "loss": 0.8921,
      "step": 11075
    },
    {
      "epoch": 0.639132441162898,
      "grad_norm": 0.3100963234901428,
      "learning_rate": 6.938952963445559e-05,
      "loss": 0.9396,
      "step": 11080
    },
    {
      "epoch": 0.6394208583294878,
      "grad_norm": 0.2907158136367798,
      "learning_rate": 6.929369881598968e-05,
      "loss": 0.9779,
      "step": 11085
    },
    {
      "epoch": 0.6397092754960775,
      "grad_norm": 0.3513832688331604,
      "learning_rate": 6.919789912233146e-05,
      "loss": 0.9685,
      "step": 11090
    },
    {
      "epoch": 0.6399976926626673,
      "grad_norm": 0.31142088770866394,
      "learning_rate": 6.910213065058615e-05,
      "loss": 0.9449,
      "step": 11095
    },
    {
      "epoch": 0.640286109829257,
      "grad_norm": 0.2722773849964142,
      "learning_rate": 6.900639349782762e-05,
      "loss": 0.8805,
      "step": 11100
    },
    {
      "epoch": 0.6405745269958468,
      "grad_norm": 0.3007947504520416,
      "learning_rate": 6.89106877610978e-05,
      "loss": 0.9022,
      "step": 11105
    },
    {
      "epoch": 0.6408629441624365,
      "grad_norm": 0.3130947947502136,
      "learning_rate": 6.88150135374068e-05,
      "loss": 1.0078,
      "step": 11110
    },
    {
      "epoch": 0.6411513613290263,
      "grad_norm": 0.2944614887237549,
      "learning_rate": 6.871937092373287e-05,
      "loss": 0.964,
      "step": 11115
    },
    {
      "epoch": 0.6414397784956161,
      "grad_norm": 0.31232336163520813,
      "learning_rate": 6.862376001702213e-05,
      "loss": 0.9654,
      "step": 11120
    },
    {
      "epoch": 0.6417281956622058,
      "grad_norm": 0.3049473464488983,
      "learning_rate": 6.852818091418865e-05,
      "loss": 0.9922,
      "step": 11125
    },
    {
      "epoch": 0.6420166128287955,
      "grad_norm": 0.28708407282829285,
      "learning_rate": 6.843263371211414e-05,
      "loss": 0.8918,
      "step": 11130
    },
    {
      "epoch": 0.6423050299953853,
      "grad_norm": 0.30203092098236084,
      "learning_rate": 6.833711850764813e-05,
      "loss": 0.9724,
      "step": 11135
    },
    {
      "epoch": 0.6425934471619751,
      "grad_norm": 0.2970055043697357,
      "learning_rate": 6.824163539760759e-05,
      "loss": 1.0243,
      "step": 11140
    },
    {
      "epoch": 0.6428818643285649,
      "grad_norm": 0.3000982999801636,
      "learning_rate": 6.814618447877698e-05,
      "loss": 0.9773,
      "step": 11145
    },
    {
      "epoch": 0.6431702814951545,
      "grad_norm": 0.29718953371047974,
      "learning_rate": 6.805076584790818e-05,
      "loss": 0.8987,
      "step": 11150
    },
    {
      "epoch": 0.6434586986617443,
      "grad_norm": 0.27283528447151184,
      "learning_rate": 6.79553796017203e-05,
      "loss": 0.9429,
      "step": 11155
    },
    {
      "epoch": 0.6437471158283341,
      "grad_norm": 0.2902205288410187,
      "learning_rate": 6.786002583689968e-05,
      "loss": 0.9464,
      "step": 11160
    },
    {
      "epoch": 0.6440355329949239,
      "grad_norm": 0.30263763666152954,
      "learning_rate": 6.776470465009965e-05,
      "loss": 0.9825,
      "step": 11165
    },
    {
      "epoch": 0.6443239501615136,
      "grad_norm": 0.31235364079475403,
      "learning_rate": 6.766941613794053e-05,
      "loss": 0.9986,
      "step": 11170
    },
    {
      "epoch": 0.6446123673281033,
      "grad_norm": 0.3030349612236023,
      "learning_rate": 6.757416039700963e-05,
      "loss": 0.9523,
      "step": 11175
    },
    {
      "epoch": 0.6449007844946931,
      "grad_norm": 0.29484447836875916,
      "learning_rate": 6.747893752386088e-05,
      "loss": 0.8984,
      "step": 11180
    },
    {
      "epoch": 0.6451892016612829,
      "grad_norm": 0.2747655510902405,
      "learning_rate": 6.738374761501505e-05,
      "loss": 0.9555,
      "step": 11185
    },
    {
      "epoch": 0.6454776188278727,
      "grad_norm": 0.2867422103881836,
      "learning_rate": 6.728859076695938e-05,
      "loss": 0.9511,
      "step": 11190
    },
    {
      "epoch": 0.6457660359944624,
      "grad_norm": 0.3082755208015442,
      "learning_rate": 6.719346707614766e-05,
      "loss": 0.9675,
      "step": 11195
    },
    {
      "epoch": 0.6460544531610521,
      "grad_norm": 0.2720474302768707,
      "learning_rate": 6.709837663900007e-05,
      "loss": 0.9483,
      "step": 11200
    },
    {
      "epoch": 0.6463428703276419,
      "grad_norm": 0.3050679862499237,
      "learning_rate": 6.700331955190303e-05,
      "loss": 0.8734,
      "step": 11205
    },
    {
      "epoch": 0.6466312874942317,
      "grad_norm": 0.3173052966594696,
      "learning_rate": 6.690829591120922e-05,
      "loss": 0.9713,
      "step": 11210
    },
    {
      "epoch": 0.6469197046608214,
      "grad_norm": 0.2943088710308075,
      "learning_rate": 6.681330581323735e-05,
      "loss": 0.9718,
      "step": 11215
    },
    {
      "epoch": 0.6472081218274112,
      "grad_norm": 0.3078162372112274,
      "learning_rate": 6.671834935427222e-05,
      "loss": 1.0134,
      "step": 11220
    },
    {
      "epoch": 0.647496538994001,
      "grad_norm": 0.30523622035980225,
      "learning_rate": 6.662342663056444e-05,
      "loss": 0.953,
      "step": 11225
    },
    {
      "epoch": 0.6477849561605907,
      "grad_norm": 0.31331026554107666,
      "learning_rate": 6.652853773833052e-05,
      "loss": 0.926,
      "step": 11230
    },
    {
      "epoch": 0.6480733733271804,
      "grad_norm": 0.32736653089523315,
      "learning_rate": 6.64336827737526e-05,
      "loss": 0.9555,
      "step": 11235
    },
    {
      "epoch": 0.6483617904937702,
      "grad_norm": 0.30823779106140137,
      "learning_rate": 6.633886183297838e-05,
      "loss": 0.9442,
      "step": 11240
    },
    {
      "epoch": 0.64865020766036,
      "grad_norm": 0.3050725758075714,
      "learning_rate": 6.624407501212128e-05,
      "loss": 0.8628,
      "step": 11245
    },
    {
      "epoch": 0.6489386248269498,
      "grad_norm": 0.289765864610672,
      "learning_rate": 6.614932240725989e-05,
      "loss": 1.0329,
      "step": 11250
    },
    {
      "epoch": 0.6492270419935394,
      "grad_norm": 0.29332610964775085,
      "learning_rate": 6.605460411443831e-05,
      "loss": 0.9276,
      "step": 11255
    },
    {
      "epoch": 0.6495154591601292,
      "grad_norm": 0.301318496465683,
      "learning_rate": 6.595992022966571e-05,
      "loss": 0.9452,
      "step": 11260
    },
    {
      "epoch": 0.649803876326719,
      "grad_norm": 0.30513182282447815,
      "learning_rate": 6.586527084891654e-05,
      "loss": 0.9093,
      "step": 11265
    },
    {
      "epoch": 0.6500922934933088,
      "grad_norm": 0.297073096036911,
      "learning_rate": 6.577065606813011e-05,
      "loss": 0.8822,
      "step": 11270
    },
    {
      "epoch": 0.6503807106598984,
      "grad_norm": 0.2968006432056427,
      "learning_rate": 6.567607598321074e-05,
      "loss": 0.9486,
      "step": 11275
    },
    {
      "epoch": 0.6506691278264882,
      "grad_norm": 0.2799467146396637,
      "learning_rate": 6.558153069002764e-05,
      "loss": 0.9775,
      "step": 11280
    },
    {
      "epoch": 0.650957544993078,
      "grad_norm": 0.3077338933944702,
      "learning_rate": 6.548702028441462e-05,
      "loss": 0.9383,
      "step": 11285
    },
    {
      "epoch": 0.6512459621596678,
      "grad_norm": 0.3266589641571045,
      "learning_rate": 6.539254486217026e-05,
      "loss": 1.0737,
      "step": 11290
    },
    {
      "epoch": 0.6515343793262575,
      "grad_norm": 0.2924719750881195,
      "learning_rate": 6.529810451905759e-05,
      "loss": 0.8547,
      "step": 11295
    },
    {
      "epoch": 0.6518227964928472,
      "grad_norm": 0.304733544588089,
      "learning_rate": 6.520369935080411e-05,
      "loss": 0.9368,
      "step": 11300
    },
    {
      "epoch": 0.652111213659437,
      "grad_norm": 0.3150797188282013,
      "learning_rate": 6.510932945310167e-05,
      "loss": 1.0042,
      "step": 11305
    },
    {
      "epoch": 0.6523996308260268,
      "grad_norm": 0.29850059747695923,
      "learning_rate": 6.501499492160636e-05,
      "loss": 0.9136,
      "step": 11310
    },
    {
      "epoch": 0.6526880479926165,
      "grad_norm": 0.3074968755245209,
      "learning_rate": 6.49206958519385e-05,
      "loss": 0.915,
      "step": 11315
    },
    {
      "epoch": 0.6529764651592063,
      "grad_norm": 0.31671762466430664,
      "learning_rate": 6.482643233968224e-05,
      "loss": 0.9417,
      "step": 11320
    },
    {
      "epoch": 0.653264882325796,
      "grad_norm": 0.3049052059650421,
      "learning_rate": 6.473220448038602e-05,
      "loss": 0.9117,
      "step": 11325
    },
    {
      "epoch": 0.6535532994923858,
      "grad_norm": 0.28879445791244507,
      "learning_rate": 6.463801236956184e-05,
      "loss": 0.9448,
      "step": 11330
    },
    {
      "epoch": 0.6538417166589755,
      "grad_norm": 0.2787480652332306,
      "learning_rate": 6.45438561026856e-05,
      "loss": 0.9127,
      "step": 11335
    },
    {
      "epoch": 0.6541301338255653,
      "grad_norm": 0.31340980529785156,
      "learning_rate": 6.44497357751969e-05,
      "loss": 0.9709,
      "step": 11340
    },
    {
      "epoch": 0.6544185509921551,
      "grad_norm": 0.2779744565486908,
      "learning_rate": 6.435565148249882e-05,
      "loss": 0.9482,
      "step": 11345
    },
    {
      "epoch": 0.6547069681587449,
      "grad_norm": 0.3121470510959625,
      "learning_rate": 6.426160331995801e-05,
      "loss": 0.9688,
      "step": 11350
    },
    {
      "epoch": 0.6549953853253345,
      "grad_norm": 0.29920753836631775,
      "learning_rate": 6.416759138290438e-05,
      "loss": 0.9957,
      "step": 11355
    },
    {
      "epoch": 0.6552838024919243,
      "grad_norm": 0.2805769741535187,
      "learning_rate": 6.407361576663124e-05,
      "loss": 0.9614,
      "step": 11360
    },
    {
      "epoch": 0.6555722196585141,
      "grad_norm": 0.31521645188331604,
      "learning_rate": 6.397967656639504e-05,
      "loss": 0.943,
      "step": 11365
    },
    {
      "epoch": 0.6558606368251039,
      "grad_norm": 0.29340264201164246,
      "learning_rate": 6.388577387741524e-05,
      "loss": 0.9442,
      "step": 11370
    },
    {
      "epoch": 0.6561490539916935,
      "grad_norm": 0.3037976324558258,
      "learning_rate": 6.379190779487443e-05,
      "loss": 0.9699,
      "step": 11375
    },
    {
      "epoch": 0.6564374711582833,
      "grad_norm": 0.32146474719047546,
      "learning_rate": 6.369807841391798e-05,
      "loss": 0.972,
      "step": 11380
    },
    {
      "epoch": 0.6567258883248731,
      "grad_norm": 0.2791215777397156,
      "learning_rate": 6.360428582965414e-05,
      "loss": 0.9761,
      "step": 11385
    },
    {
      "epoch": 0.6570143054914629,
      "grad_norm": 0.3133072853088379,
      "learning_rate": 6.351053013715383e-05,
      "loss": 1.0486,
      "step": 11390
    },
    {
      "epoch": 0.6573027226580526,
      "grad_norm": 0.320552796125412,
      "learning_rate": 6.341681143145048e-05,
      "loss": 0.9421,
      "step": 11395
    },
    {
      "epoch": 0.6575911398246423,
      "grad_norm": 0.2836511731147766,
      "learning_rate": 6.332312980754025e-05,
      "loss": 0.9323,
      "step": 11400
    },
    {
      "epoch": 0.6578795569912321,
      "grad_norm": 0.3099246919155121,
      "learning_rate": 6.322948536038144e-05,
      "loss": 1.0394,
      "step": 11405
    },
    {
      "epoch": 0.6581679741578219,
      "grad_norm": 0.3043697476387024,
      "learning_rate": 6.313587818489497e-05,
      "loss": 0.9704,
      "step": 11410
    },
    {
      "epoch": 0.6584563913244116,
      "grad_norm": 0.3238884508609772,
      "learning_rate": 6.304230837596365e-05,
      "loss": 0.9943,
      "step": 11415
    },
    {
      "epoch": 0.6587448084910014,
      "grad_norm": 0.26987263560295105,
      "learning_rate": 6.294877602843275e-05,
      "loss": 0.9566,
      "step": 11420
    },
    {
      "epoch": 0.6590332256575911,
      "grad_norm": 0.29144686460494995,
      "learning_rate": 6.285528123710929e-05,
      "loss": 1.0097,
      "step": 11425
    },
    {
      "epoch": 0.6593216428241809,
      "grad_norm": 0.28795817494392395,
      "learning_rate": 6.276182409676234e-05,
      "loss": 0.9627,
      "step": 11430
    },
    {
      "epoch": 0.6596100599907706,
      "grad_norm": 0.3085058331489563,
      "learning_rate": 6.266840470212285e-05,
      "loss": 0.9518,
      "step": 11435
    },
    {
      "epoch": 0.6598984771573604,
      "grad_norm": 0.2758595049381256,
      "learning_rate": 6.25750231478834e-05,
      "loss": 0.9786,
      "step": 11440
    },
    {
      "epoch": 0.6601868943239502,
      "grad_norm": 0.29992714524269104,
      "learning_rate": 6.248167952869833e-05,
      "loss": 0.9749,
      "step": 11445
    },
    {
      "epoch": 0.66047531149054,
      "grad_norm": 0.2847946286201477,
      "learning_rate": 6.238837393918341e-05,
      "loss": 1.0312,
      "step": 11450
    },
    {
      "epoch": 0.6607637286571296,
      "grad_norm": 0.3597642481327057,
      "learning_rate": 6.229510647391599e-05,
      "loss": 0.9781,
      "step": 11455
    },
    {
      "epoch": 0.6610521458237194,
      "grad_norm": 0.30279117822647095,
      "learning_rate": 6.220187722743466e-05,
      "loss": 0.9767,
      "step": 11460
    },
    {
      "epoch": 0.6613405629903092,
      "grad_norm": 0.3455253541469574,
      "learning_rate": 6.210868629423932e-05,
      "loss": 1.0143,
      "step": 11465
    },
    {
      "epoch": 0.661628980156899,
      "grad_norm": 0.2912869155406952,
      "learning_rate": 6.201553376879108e-05,
      "loss": 0.9544,
      "step": 11470
    },
    {
      "epoch": 0.6619173973234886,
      "grad_norm": 0.30142757296562195,
      "learning_rate": 6.192241974551198e-05,
      "loss": 0.9898,
      "step": 11475
    },
    {
      "epoch": 0.6622058144900784,
      "grad_norm": 0.3466334342956543,
      "learning_rate": 6.182934431878526e-05,
      "loss": 0.9499,
      "step": 11480
    },
    {
      "epoch": 0.6624942316566682,
      "grad_norm": 0.29196497797966003,
      "learning_rate": 6.173630758295479e-05,
      "loss": 0.9437,
      "step": 11485
    },
    {
      "epoch": 0.662782648823258,
      "grad_norm": 0.29323986172676086,
      "learning_rate": 6.164330963232535e-05,
      "loss": 0.8918,
      "step": 11490
    },
    {
      "epoch": 0.6630710659898477,
      "grad_norm": 0.3342483937740326,
      "learning_rate": 6.155035056116243e-05,
      "loss": 0.9651,
      "step": 11495
    },
    {
      "epoch": 0.6633594831564374,
      "grad_norm": 0.29042768478393555,
      "learning_rate": 6.145743046369205e-05,
      "loss": 0.9427,
      "step": 11500
    },
    {
      "epoch": 0.6636479003230272,
      "grad_norm": 0.2897365987300873,
      "learning_rate": 6.136454943410077e-05,
      "loss": 0.9875,
      "step": 11505
    },
    {
      "epoch": 0.663936317489617,
      "grad_norm": 0.2877280116081238,
      "learning_rate": 6.127170756653546e-05,
      "loss": 0.9621,
      "step": 11510
    },
    {
      "epoch": 0.6642247346562067,
      "grad_norm": 0.41063976287841797,
      "learning_rate": 6.117890495510345e-05,
      "loss": 0.9689,
      "step": 11515
    },
    {
      "epoch": 0.6645131518227965,
      "grad_norm": 0.300569087266922,
      "learning_rate": 6.108614169387215e-05,
      "loss": 0.9432,
      "step": 11520
    },
    {
      "epoch": 0.6648015689893862,
      "grad_norm": 0.300698459148407,
      "learning_rate": 6.099341787686908e-05,
      "loss": 0.9182,
      "step": 11525
    },
    {
      "epoch": 0.665089986155976,
      "grad_norm": 0.3228488564491272,
      "learning_rate": 6.090073359808188e-05,
      "loss": 0.9576,
      "step": 11530
    },
    {
      "epoch": 0.6653784033225658,
      "grad_norm": 0.27102410793304443,
      "learning_rate": 6.080808895145802e-05,
      "loss": 0.9485,
      "step": 11535
    },
    {
      "epoch": 0.6656668204891555,
      "grad_norm": 0.3214710056781769,
      "learning_rate": 6.071548403090488e-05,
      "loss": 1.0114,
      "step": 11540
    },
    {
      "epoch": 0.6659552376557453,
      "grad_norm": 0.32063308358192444,
      "learning_rate": 6.062291893028944e-05,
      "loss": 0.9854,
      "step": 11545
    },
    {
      "epoch": 0.666243654822335,
      "grad_norm": 0.2712962031364441,
      "learning_rate": 6.053039374343849e-05,
      "loss": 0.9318,
      "step": 11550
    },
    {
      "epoch": 0.6665320719889248,
      "grad_norm": 0.29538100957870483,
      "learning_rate": 6.043790856413825e-05,
      "loss": 1.0111,
      "step": 11555
    },
    {
      "epoch": 0.6668204891555145,
      "grad_norm": 0.3049154281616211,
      "learning_rate": 6.0345463486134325e-05,
      "loss": 0.9196,
      "step": 11560
    },
    {
      "epoch": 0.6671089063221043,
      "grad_norm": 0.3039052188396454,
      "learning_rate": 6.025305860313188e-05,
      "loss": 1.0061,
      "step": 11565
    },
    {
      "epoch": 0.6673973234886941,
      "grad_norm": 0.2950868308544159,
      "learning_rate": 6.0160694008795114e-05,
      "loss": 0.9581,
      "step": 11570
    },
    {
      "epoch": 0.6676857406552839,
      "grad_norm": 0.32517221570014954,
      "learning_rate": 6.0068369796747594e-05,
      "loss": 0.9702,
      "step": 11575
    },
    {
      "epoch": 0.6679741578218735,
      "grad_norm": 0.28849300742149353,
      "learning_rate": 5.9976086060571765e-05,
      "loss": 0.9907,
      "step": 11580
    },
    {
      "epoch": 0.6682625749884633,
      "grad_norm": 0.3085767924785614,
      "learning_rate": 5.988384289380915e-05,
      "loss": 0.9651,
      "step": 11585
    },
    {
      "epoch": 0.6685509921550531,
      "grad_norm": 0.2863781154155731,
      "learning_rate": 5.979164038996015e-05,
      "loss": 0.9576,
      "step": 11590
    },
    {
      "epoch": 0.6688394093216429,
      "grad_norm": 0.33025938272476196,
      "learning_rate": 5.9699478642483896e-05,
      "loss": 0.9488,
      "step": 11595
    },
    {
      "epoch": 0.6691278264882325,
      "grad_norm": 0.2808819115161896,
      "learning_rate": 5.960735774479826e-05,
      "loss": 0.8781,
      "step": 11600
    },
    {
      "epoch": 0.6694162436548223,
      "grad_norm": 0.3095723092556,
      "learning_rate": 5.951527779027968e-05,
      "loss": 1.0953,
      "step": 11605
    },
    {
      "epoch": 0.6697046608214121,
      "grad_norm": 0.2829914391040802,
      "learning_rate": 5.942323887226311e-05,
      "loss": 0.9082,
      "step": 11610
    },
    {
      "epoch": 0.6699930779880019,
      "grad_norm": 0.28427544236183167,
      "learning_rate": 5.933124108404189e-05,
      "loss": 0.9043,
      "step": 11615
    },
    {
      "epoch": 0.6702814951545916,
      "grad_norm": 0.30131834745407104,
      "learning_rate": 5.923928451886767e-05,
      "loss": 0.9115,
      "step": 11620
    },
    {
      "epoch": 0.6705699123211813,
      "grad_norm": 0.27953872084617615,
      "learning_rate": 5.914736926995034e-05,
      "loss": 0.9719,
      "step": 11625
    },
    {
      "epoch": 0.6708583294877711,
      "grad_norm": 0.2982015013694763,
      "learning_rate": 5.905549543045783e-05,
      "loss": 0.9212,
      "step": 11630
    },
    {
      "epoch": 0.6711467466543609,
      "grad_norm": 0.2859979271888733,
      "learning_rate": 5.8963663093516264e-05,
      "loss": 0.8745,
      "step": 11635
    },
    {
      "epoch": 0.6714351638209506,
      "grad_norm": 0.3366292119026184,
      "learning_rate": 5.887187235220948e-05,
      "loss": 0.9363,
      "step": 11640
    },
    {
      "epoch": 0.6717235809875404,
      "grad_norm": 0.317078560590744,
      "learning_rate": 5.8780123299579385e-05,
      "loss": 0.9509,
      "step": 11645
    },
    {
      "epoch": 0.6720119981541302,
      "grad_norm": 0.28258809447288513,
      "learning_rate": 5.868841602862541e-05,
      "loss": 0.9911,
      "step": 11650
    },
    {
      "epoch": 0.6723004153207199,
      "grad_norm": 0.2883169949054718,
      "learning_rate": 5.859675063230477e-05,
      "loss": 0.9584,
      "step": 11655
    },
    {
      "epoch": 0.6725888324873096,
      "grad_norm": 0.3138083517551422,
      "learning_rate": 5.8505127203532216e-05,
      "loss": 1.0294,
      "step": 11660
    },
    {
      "epoch": 0.6728772496538994,
      "grad_norm": 0.2859340012073517,
      "learning_rate": 5.841354583517991e-05,
      "loss": 0.9843,
      "step": 11665
    },
    {
      "epoch": 0.6731656668204892,
      "grad_norm": 0.31252771615982056,
      "learning_rate": 5.8322006620077426e-05,
      "loss": 0.9464,
      "step": 11670
    },
    {
      "epoch": 0.673454083987079,
      "grad_norm": 0.2721909284591675,
      "learning_rate": 5.823050965101162e-05,
      "loss": 0.938,
      "step": 11675
    },
    {
      "epoch": 0.6737425011536686,
      "grad_norm": 0.30047523975372314,
      "learning_rate": 5.8139055020726494e-05,
      "loss": 0.9684,
      "step": 11680
    },
    {
      "epoch": 0.6740309183202584,
      "grad_norm": 0.2787550985813141,
      "learning_rate": 5.804764282192314e-05,
      "loss": 0.9879,
      "step": 11685
    },
    {
      "epoch": 0.6743193354868482,
      "grad_norm": 0.3054908514022827,
      "learning_rate": 5.7956273147259645e-05,
      "loss": 0.9232,
      "step": 11690
    },
    {
      "epoch": 0.674607752653438,
      "grad_norm": 0.2886941730976105,
      "learning_rate": 5.786494608935098e-05,
      "loss": 0.9515,
      "step": 11695
    },
    {
      "epoch": 0.6748961698200276,
      "grad_norm": 0.29352888464927673,
      "learning_rate": 5.77736617407689e-05,
      "loss": 1.0245,
      "step": 11700
    },
    {
      "epoch": 0.6751845869866174,
      "grad_norm": 0.3170221447944641,
      "learning_rate": 5.768242019404198e-05,
      "loss": 0.9051,
      "step": 11705
    },
    {
      "epoch": 0.6754730041532072,
      "grad_norm": 0.2965846061706543,
      "learning_rate": 5.7591221541655285e-05,
      "loss": 0.9583,
      "step": 11710
    },
    {
      "epoch": 0.675761421319797,
      "grad_norm": 0.2991654574871063,
      "learning_rate": 5.750006587605043e-05,
      "loss": 0.938,
      "step": 11715
    },
    {
      "epoch": 0.6760498384863867,
      "grad_norm": 0.28856098651885986,
      "learning_rate": 5.74089532896255e-05,
      "loss": 0.9213,
      "step": 11720
    },
    {
      "epoch": 0.6763382556529764,
      "grad_norm": 0.6358820199966431,
      "learning_rate": 5.7317883874734823e-05,
      "loss": 1.0292,
      "step": 11725
    },
    {
      "epoch": 0.6766266728195662,
      "grad_norm": 0.2861112952232361,
      "learning_rate": 5.722685772368912e-05,
      "loss": 0.9703,
      "step": 11730
    },
    {
      "epoch": 0.676915089986156,
      "grad_norm": 0.29392847418785095,
      "learning_rate": 5.713587492875513e-05,
      "loss": 0.9174,
      "step": 11735
    },
    {
      "epoch": 0.6772035071527457,
      "grad_norm": 0.30904361605644226,
      "learning_rate": 5.704493558215567e-05,
      "loss": 0.9269,
      "step": 11740
    },
    {
      "epoch": 0.6774919243193355,
      "grad_norm": 0.30134832859039307,
      "learning_rate": 5.695403977606955e-05,
      "loss": 0.9305,
      "step": 11745
    },
    {
      "epoch": 0.6777803414859253,
      "grad_norm": 0.29707950353622437,
      "learning_rate": 5.6863187602631354e-05,
      "loss": 0.9669,
      "step": 11750
    },
    {
      "epoch": 0.678068758652515,
      "grad_norm": 0.2993502914905548,
      "learning_rate": 5.677237915393165e-05,
      "loss": 0.9547,
      "step": 11755
    },
    {
      "epoch": 0.6783571758191047,
      "grad_norm": 0.26072099804878235,
      "learning_rate": 5.668161452201639e-05,
      "loss": 0.9151,
      "step": 11760
    },
    {
      "epoch": 0.6786455929856945,
      "grad_norm": 0.30072999000549316,
      "learning_rate": 5.659089379888738e-05,
      "loss": 0.9176,
      "step": 11765
    },
    {
      "epoch": 0.6789340101522843,
      "grad_norm": 0.27792972326278687,
      "learning_rate": 5.650021707650173e-05,
      "loss": 0.8993,
      "step": 11770
    },
    {
      "epoch": 0.6792224273188741,
      "grad_norm": 0.3117034435272217,
      "learning_rate": 5.6409584446772135e-05,
      "loss": 0.9597,
      "step": 11775
    },
    {
      "epoch": 0.6795108444854637,
      "grad_norm": 0.29373329877853394,
      "learning_rate": 5.6318996001566384e-05,
      "loss": 0.8972,
      "step": 11780
    },
    {
      "epoch": 0.6797992616520535,
      "grad_norm": 0.26909390091896057,
      "learning_rate": 5.622845183270757e-05,
      "loss": 0.9559,
      "step": 11785
    },
    {
      "epoch": 0.6800876788186433,
      "grad_norm": 0.28705233335494995,
      "learning_rate": 5.613795203197401e-05,
      "loss": 0.9498,
      "step": 11790
    },
    {
      "epoch": 0.6803760959852331,
      "grad_norm": 0.31131601333618164,
      "learning_rate": 5.604749669109889e-05,
      "loss": 0.9168,
      "step": 11795
    },
    {
      "epoch": 0.6806645131518227,
      "grad_norm": 0.28067898750305176,
      "learning_rate": 5.5957085901770424e-05,
      "loss": 0.9545,
      "step": 11800
    },
    {
      "epoch": 0.6809529303184125,
      "grad_norm": 0.2953431010246277,
      "learning_rate": 5.5866719755631625e-05,
      "loss": 0.893,
      "step": 11805
    },
    {
      "epoch": 0.6812413474850023,
      "grad_norm": 0.2954767644405365,
      "learning_rate": 5.577639834428026e-05,
      "loss": 0.9665,
      "step": 11810
    },
    {
      "epoch": 0.6815297646515921,
      "grad_norm": 0.2888740599155426,
      "learning_rate": 5.5686121759268793e-05,
      "loss": 0.897,
      "step": 11815
    },
    {
      "epoch": 0.6818181818181818,
      "grad_norm": 0.2831745147705078,
      "learning_rate": 5.559589009210421e-05,
      "loss": 0.9831,
      "step": 11820
    },
    {
      "epoch": 0.6821065989847716,
      "grad_norm": 0.26822391152381897,
      "learning_rate": 5.550570343424797e-05,
      "loss": 1.0036,
      "step": 11825
    },
    {
      "epoch": 0.6823950161513613,
      "grad_norm": 0.3146960139274597,
      "learning_rate": 5.5415561877115876e-05,
      "loss": 0.9881,
      "step": 11830
    },
    {
      "epoch": 0.6826834333179511,
      "grad_norm": 0.3015124499797821,
      "learning_rate": 5.5325465512078154e-05,
      "loss": 1.0135,
      "step": 11835
    },
    {
      "epoch": 0.6829718504845408,
      "grad_norm": 0.28780919313430786,
      "learning_rate": 5.523541443045904e-05,
      "loss": 0.9213,
      "step": 11840
    },
    {
      "epoch": 0.6832602676511306,
      "grad_norm": 0.3086405098438263,
      "learning_rate": 5.514540872353693e-05,
      "loss": 0.9785,
      "step": 11845
    },
    {
      "epoch": 0.6835486848177204,
      "grad_norm": 0.28687992691993713,
      "learning_rate": 5.505544848254432e-05,
      "loss": 1.0139,
      "step": 11850
    },
    {
      "epoch": 0.6838371019843101,
      "grad_norm": 0.3073662519454956,
      "learning_rate": 5.496553379866753e-05,
      "loss": 0.9409,
      "step": 11855
    },
    {
      "epoch": 0.6841255191508998,
      "grad_norm": 0.29431843757629395,
      "learning_rate": 5.4875664763046705e-05,
      "loss": 0.9019,
      "step": 11860
    },
    {
      "epoch": 0.6844139363174896,
      "grad_norm": 0.31620967388153076,
      "learning_rate": 5.4785841466775726e-05,
      "loss": 0.9975,
      "step": 11865
    },
    {
      "epoch": 0.6847023534840794,
      "grad_norm": 0.2722678482532501,
      "learning_rate": 5.4696064000902146e-05,
      "loss": 0.9492,
      "step": 11870
    },
    {
      "epoch": 0.6849907706506692,
      "grad_norm": 0.28141093254089355,
      "learning_rate": 5.460633245642703e-05,
      "loss": 0.9502,
      "step": 11875
    },
    {
      "epoch": 0.6852791878172588,
      "grad_norm": 0.27788811922073364,
      "learning_rate": 5.451664692430493e-05,
      "loss": 0.9604,
      "step": 11880
    },
    {
      "epoch": 0.6855676049838486,
      "grad_norm": 0.3031676113605499,
      "learning_rate": 5.4427007495443684e-05,
      "loss": 1.0095,
      "step": 11885
    },
    {
      "epoch": 0.6858560221504384,
      "grad_norm": 0.30585047602653503,
      "learning_rate": 5.433741426070442e-05,
      "loss": 1.0091,
      "step": 11890
    },
    {
      "epoch": 0.6861444393170282,
      "grad_norm": 0.29617300629615784,
      "learning_rate": 5.424786731090157e-05,
      "loss": 1.0051,
      "step": 11895
    },
    {
      "epoch": 0.686432856483618,
      "grad_norm": 0.267132431268692,
      "learning_rate": 5.415836673680253e-05,
      "loss": 0.99,
      "step": 11900
    },
    {
      "epoch": 0.6867212736502076,
      "grad_norm": 0.29219532012939453,
      "learning_rate": 5.40689126291276e-05,
      "loss": 1.0008,
      "step": 11905
    },
    {
      "epoch": 0.6870096908167974,
      "grad_norm": 0.3108190894126892,
      "learning_rate": 5.3979505078550184e-05,
      "loss": 0.9479,
      "step": 11910
    },
    {
      "epoch": 0.6872981079833872,
      "grad_norm": 0.36880865693092346,
      "learning_rate": 5.389014417569635e-05,
      "loss": 1.0612,
      "step": 11915
    },
    {
      "epoch": 0.687586525149977,
      "grad_norm": 0.29092344641685486,
      "learning_rate": 5.380083001114503e-05,
      "loss": 1.0152,
      "step": 11920
    },
    {
      "epoch": 0.6878749423165667,
      "grad_norm": 0.30183884501457214,
      "learning_rate": 5.371156267542752e-05,
      "loss": 0.9995,
      "step": 11925
    },
    {
      "epoch": 0.6881633594831564,
      "grad_norm": 0.29405462741851807,
      "learning_rate": 5.362234225902794e-05,
      "loss": 0.9237,
      "step": 11930
    },
    {
      "epoch": 0.6884517766497462,
      "grad_norm": 0.2716309428215027,
      "learning_rate": 5.353316885238269e-05,
      "loss": 0.898,
      "step": 11935
    },
    {
      "epoch": 0.688740193816336,
      "grad_norm": 0.30954423546791077,
      "learning_rate": 5.3444042545880514e-05,
      "loss": 0.9184,
      "step": 11940
    },
    {
      "epoch": 0.6890286109829257,
      "grad_norm": 0.2786799967288971,
      "learning_rate": 5.3354963429862484e-05,
      "loss": 1.0208,
      "step": 11945
    },
    {
      "epoch": 0.6893170281495155,
      "grad_norm": 0.3064625561237335,
      "learning_rate": 5.3265931594621756e-05,
      "loss": 0.9567,
      "step": 11950
    },
    {
      "epoch": 0.6896054453161052,
      "grad_norm": 0.3363032639026642,
      "learning_rate": 5.317694713040369e-05,
      "loss": 1.0089,
      "step": 11955
    },
    {
      "epoch": 0.689893862482695,
      "grad_norm": 0.2882344722747803,
      "learning_rate": 5.3088010127405496e-05,
      "loss": 0.9548,
      "step": 11960
    },
    {
      "epoch": 0.6901822796492847,
      "grad_norm": 0.3060877323150635,
      "learning_rate": 5.299912067577635e-05,
      "loss": 0.9522,
      "step": 11965
    },
    {
      "epoch": 0.6904706968158745,
      "grad_norm": 0.3129549026489258,
      "learning_rate": 5.29102788656172e-05,
      "loss": 0.9276,
      "step": 11970
    },
    {
      "epoch": 0.6907591139824643,
      "grad_norm": 0.31678634881973267,
      "learning_rate": 5.282148478698068e-05,
      "loss": 1.0038,
      "step": 11975
    },
    {
      "epoch": 0.691047531149054,
      "grad_norm": 0.31607529520988464,
      "learning_rate": 5.273273852987113e-05,
      "loss": 0.885,
      "step": 11980
    },
    {
      "epoch": 0.6913359483156437,
      "grad_norm": 0.26990994811058044,
      "learning_rate": 5.2644040184244325e-05,
      "loss": 0.9675,
      "step": 11985
    },
    {
      "epoch": 0.6916243654822335,
      "grad_norm": 0.31181198358535767,
      "learning_rate": 5.255538984000753e-05,
      "loss": 0.9581,
      "step": 11990
    },
    {
      "epoch": 0.6919127826488233,
      "grad_norm": 0.2867415249347687,
      "learning_rate": 5.246678758701932e-05,
      "loss": 0.9137,
      "step": 11995
    },
    {
      "epoch": 0.6922011998154131,
      "grad_norm": 0.29689162969589233,
      "learning_rate": 5.237823351508953e-05,
      "loss": 1.0561,
      "step": 12000
    },
    {
      "epoch": 0.6924896169820027,
      "grad_norm": 0.30153143405914307,
      "learning_rate": 5.228972771397918e-05,
      "loss": 0.9205,
      "step": 12005
    },
    {
      "epoch": 0.6927780341485925,
      "grad_norm": 0.27744805812835693,
      "learning_rate": 5.2201270273400296e-05,
      "loss": 0.9878,
      "step": 12010
    },
    {
      "epoch": 0.6930664513151823,
      "grad_norm": 0.287998765707016,
      "learning_rate": 5.211286128301602e-05,
      "loss": 0.964,
      "step": 12015
    },
    {
      "epoch": 0.6933548684817721,
      "grad_norm": 0.3049549162387848,
      "learning_rate": 5.202450083244026e-05,
      "loss": 1.0198,
      "step": 12020
    },
    {
      "epoch": 0.6936432856483618,
      "grad_norm": 0.30203524231910706,
      "learning_rate": 5.193618901123776e-05,
      "loss": 0.9331,
      "step": 12025
    },
    {
      "epoch": 0.6939317028149515,
      "grad_norm": 0.3099840581417084,
      "learning_rate": 5.184792590892397e-05,
      "loss": 0.9736,
      "step": 12030
    },
    {
      "epoch": 0.6942201199815413,
      "grad_norm": 0.3033467233181,
      "learning_rate": 5.175971161496491e-05,
      "loss": 1.0176,
      "step": 12035
    },
    {
      "epoch": 0.6945085371481311,
      "grad_norm": 0.27871066331863403,
      "learning_rate": 5.167154621877728e-05,
      "loss": 0.8854,
      "step": 12040
    },
    {
      "epoch": 0.6947969543147208,
      "grad_norm": 0.2897888123989105,
      "learning_rate": 5.158342980972805e-05,
      "loss": 0.9439,
      "step": 12045
    },
    {
      "epoch": 0.6950853714813106,
      "grad_norm": 0.29491978883743286,
      "learning_rate": 5.14953624771346e-05,
      "loss": 0.959,
      "step": 12050
    },
    {
      "epoch": 0.6953737886479003,
      "grad_norm": 0.3069283068180084,
      "learning_rate": 5.140734431026453e-05,
      "loss": 0.992,
      "step": 12055
    },
    {
      "epoch": 0.6956622058144901,
      "grad_norm": 0.32911011576652527,
      "learning_rate": 5.131937539833571e-05,
      "loss": 1.0046,
      "step": 12060
    },
    {
      "epoch": 0.6959506229810798,
      "grad_norm": 0.29555436968803406,
      "learning_rate": 5.1231455830516004e-05,
      "loss": 0.9604,
      "step": 12065
    },
    {
      "epoch": 0.6962390401476696,
      "grad_norm": 0.28055113554000854,
      "learning_rate": 5.1143585695923166e-05,
      "loss": 0.9453,
      "step": 12070
    },
    {
      "epoch": 0.6965274573142594,
      "grad_norm": 0.32180923223495483,
      "learning_rate": 5.105576508362504e-05,
      "loss": 0.9524,
      "step": 12075
    },
    {
      "epoch": 0.6968158744808491,
      "grad_norm": 0.29000669717788696,
      "learning_rate": 5.09679940826391e-05,
      "loss": 0.9389,
      "step": 12080
    },
    {
      "epoch": 0.6971042916474388,
      "grad_norm": 0.3079163432121277,
      "learning_rate": 5.0880272781932744e-05,
      "loss": 0.9959,
      "step": 12085
    },
    {
      "epoch": 0.6973927088140286,
      "grad_norm": 0.2964383661746979,
      "learning_rate": 5.079260127042267e-05,
      "loss": 0.916,
      "step": 12090
    },
    {
      "epoch": 0.6976811259806184,
      "grad_norm": 0.2799603044986725,
      "learning_rate": 5.070497963697542e-05,
      "loss": 0.9438,
      "step": 12095
    },
    {
      "epoch": 0.6979695431472082,
      "grad_norm": 0.28270310163497925,
      "learning_rate": 5.061740797040684e-05,
      "loss": 0.9447,
      "step": 12100
    },
    {
      "epoch": 0.6982579603137978,
      "grad_norm": 0.28763705492019653,
      "learning_rate": 5.0529886359482105e-05,
      "loss": 0.9393,
      "step": 12105
    },
    {
      "epoch": 0.6985463774803876,
      "grad_norm": 0.3127501606941223,
      "learning_rate": 5.044241489291569e-05,
      "loss": 0.9411,
      "step": 12110
    },
    {
      "epoch": 0.6988347946469774,
      "grad_norm": 0.30937451124191284,
      "learning_rate": 5.035499365937122e-05,
      "loss": 0.9552,
      "step": 12115
    },
    {
      "epoch": 0.6991232118135672,
      "grad_norm": 0.2949715852737427,
      "learning_rate": 5.0267622747461487e-05,
      "loss": 0.9715,
      "step": 12120
    },
    {
      "epoch": 0.6994116289801569,
      "grad_norm": 0.32532167434692383,
      "learning_rate": 5.018030224574818e-05,
      "loss": 0.8913,
      "step": 12125
    },
    {
      "epoch": 0.6997000461467466,
      "grad_norm": 0.2869986593723297,
      "learning_rate": 5.009303224274191e-05,
      "loss": 0.996,
      "step": 12130
    },
    {
      "epoch": 0.6999884633133364,
      "grad_norm": 0.3281845450401306,
      "learning_rate": 5.000581282690211e-05,
      "loss": 1.0192,
      "step": 12135
    },
    {
      "epoch": 0.7002768804799262,
      "grad_norm": 0.2783426344394684,
      "learning_rate": 4.991864408663692e-05,
      "loss": 0.9268,
      "step": 12140
    },
    {
      "epoch": 0.7005652976465159,
      "grad_norm": 0.32887738943099976,
      "learning_rate": 4.983152611030323e-05,
      "loss": 1.0089,
      "step": 12145
    },
    {
      "epoch": 0.7008537148131057,
      "grad_norm": 0.2962915897369385,
      "learning_rate": 4.974445898620622e-05,
      "loss": 0.9788,
      "step": 12150
    },
    {
      "epoch": 0.7011421319796954,
      "grad_norm": 0.2988428473472595,
      "learning_rate": 4.965744280259982e-05,
      "loss": 0.8686,
      "step": 12155
    },
    {
      "epoch": 0.7014305491462852,
      "grad_norm": 0.29604843258857727,
      "learning_rate": 4.957047764768612e-05,
      "loss": 0.9572,
      "step": 12160
    },
    {
      "epoch": 0.7017189663128749,
      "grad_norm": 0.30213579535484314,
      "learning_rate": 4.9483563609615555e-05,
      "loss": 0.972,
      "step": 12165
    },
    {
      "epoch": 0.7020073834794647,
      "grad_norm": 0.29231953620910645,
      "learning_rate": 4.939670077648676e-05,
      "loss": 0.9307,
      "step": 12170
    },
    {
      "epoch": 0.7022958006460545,
      "grad_norm": 0.31731900572776794,
      "learning_rate": 4.930988923634641e-05,
      "loss": 0.9383,
      "step": 12175
    },
    {
      "epoch": 0.7025842178126442,
      "grad_norm": 0.3002810776233673,
      "learning_rate": 4.922312907718929e-05,
      "loss": 1.0575,
      "step": 12180
    },
    {
      "epoch": 0.7028726349792339,
      "grad_norm": 0.32030534744262695,
      "learning_rate": 4.913642038695802e-05,
      "loss": 1.0175,
      "step": 12185
    },
    {
      "epoch": 0.7031610521458237,
      "grad_norm": 0.30039533972740173,
      "learning_rate": 4.9049763253543054e-05,
      "loss": 0.9111,
      "step": 12190
    },
    {
      "epoch": 0.7034494693124135,
      "grad_norm": 0.29103514552116394,
      "learning_rate": 4.896315776478263e-05,
      "loss": 1.0018,
      "step": 12195
    },
    {
      "epoch": 0.7037378864790033,
      "grad_norm": 0.31603309512138367,
      "learning_rate": 4.8876604008462554e-05,
      "loss": 0.9354,
      "step": 12200
    },
    {
      "epoch": 0.7040263036455929,
      "grad_norm": 0.2983769476413727,
      "learning_rate": 4.879010207231632e-05,
      "loss": 0.9266,
      "step": 12205
    },
    {
      "epoch": 0.7043147208121827,
      "grad_norm": 0.2903640866279602,
      "learning_rate": 4.870365204402483e-05,
      "loss": 0.9105,
      "step": 12210
    },
    {
      "epoch": 0.7046031379787725,
      "grad_norm": 0.2937866449356079,
      "learning_rate": 4.8617254011216316e-05,
      "loss": 1.0096,
      "step": 12215
    },
    {
      "epoch": 0.7048915551453623,
      "grad_norm": 0.29428091645240784,
      "learning_rate": 4.8530908061466404e-05,
      "loss": 1.0238,
      "step": 12220
    },
    {
      "epoch": 0.705179972311952,
      "grad_norm": 0.2882111072540283,
      "learning_rate": 4.844461428229782e-05,
      "loss": 0.9895,
      "step": 12225
    },
    {
      "epoch": 0.7054683894785417,
      "grad_norm": 0.2777700126171112,
      "learning_rate": 4.835837276118058e-05,
      "loss": 0.9442,
      "step": 12230
    },
    {
      "epoch": 0.7057568066451315,
      "grad_norm": 0.30342546105384827,
      "learning_rate": 4.827218358553148e-05,
      "loss": 0.9846,
      "step": 12235
    },
    {
      "epoch": 0.7060452238117213,
      "grad_norm": 0.29388317465782166,
      "learning_rate": 4.8186046842714504e-05,
      "loss": 0.8987,
      "step": 12240
    },
    {
      "epoch": 0.7063336409783111,
      "grad_norm": 0.29445379972457886,
      "learning_rate": 4.8099962620040314e-05,
      "loss": 0.9868,
      "step": 12245
    },
    {
      "epoch": 0.7066220581449008,
      "grad_norm": 0.3062158226966858,
      "learning_rate": 4.801393100476651e-05,
      "loss": 0.9183,
      "step": 12250
    },
    {
      "epoch": 0.7069104753114905,
      "grad_norm": 0.26878073811531067,
      "learning_rate": 4.792795208409714e-05,
      "loss": 0.9274,
      "step": 12255
    },
    {
      "epoch": 0.7071988924780803,
      "grad_norm": 0.2894469201564789,
      "learning_rate": 4.784202594518298e-05,
      "loss": 0.9681,
      "step": 12260
    },
    {
      "epoch": 0.7074873096446701,
      "grad_norm": 0.33790189027786255,
      "learning_rate": 4.775615267512133e-05,
      "loss": 0.935,
      "step": 12265
    },
    {
      "epoch": 0.7077757268112598,
      "grad_norm": 0.293639212846756,
      "learning_rate": 4.767033236095585e-05,
      "loss": 0.9128,
      "step": 12270
    },
    {
      "epoch": 0.7080641439778496,
      "grad_norm": 0.3044992685317993,
      "learning_rate": 4.758456508967651e-05,
      "loss": 0.9461,
      "step": 12275
    },
    {
      "epoch": 0.7083525611444393,
      "grad_norm": 0.3080674111843109,
      "learning_rate": 4.749885094821951e-05,
      "loss": 1.0456,
      "step": 12280
    },
    {
      "epoch": 0.7086409783110291,
      "grad_norm": 0.2785399556159973,
      "learning_rate": 4.7413190023467266e-05,
      "loss": 0.9056,
      "step": 12285
    },
    {
      "epoch": 0.7089293954776188,
      "grad_norm": 0.27793818712234497,
      "learning_rate": 4.732758240224818e-05,
      "loss": 0.9389,
      "step": 12290
    },
    {
      "epoch": 0.7092178126442086,
      "grad_norm": 0.29727405309677124,
      "learning_rate": 4.7242028171336675e-05,
      "loss": 0.9914,
      "step": 12295
    },
    {
      "epoch": 0.7095062298107984,
      "grad_norm": 0.3035670220851898,
      "learning_rate": 4.715652741745298e-05,
      "loss": 0.9744,
      "step": 12300
    },
    {
      "epoch": 0.7097946469773881,
      "grad_norm": 0.33569151163101196,
      "learning_rate": 4.7071080227263164e-05,
      "loss": 0.9731,
      "step": 12305
    },
    {
      "epoch": 0.7100830641439778,
      "grad_norm": 0.29499197006225586,
      "learning_rate": 4.6985686687379103e-05,
      "loss": 0.9284,
      "step": 12310
    },
    {
      "epoch": 0.7103714813105676,
      "grad_norm": 0.27395883202552795,
      "learning_rate": 4.690034688435809e-05,
      "loss": 0.9208,
      "step": 12315
    },
    {
      "epoch": 0.7106598984771574,
      "grad_norm": 0.30385783314704895,
      "learning_rate": 4.6815060904703046e-05,
      "loss": 1.0277,
      "step": 12320
    },
    {
      "epoch": 0.7109483156437472,
      "grad_norm": 0.28144723176956177,
      "learning_rate": 4.672982883486243e-05,
      "loss": 0.9771,
      "step": 12325
    },
    {
      "epoch": 0.7112367328103368,
      "grad_norm": 0.3171987235546112,
      "learning_rate": 4.664465076122991e-05,
      "loss": 0.9382,
      "step": 12330
    },
    {
      "epoch": 0.7115251499769266,
      "grad_norm": 0.33855724334716797,
      "learning_rate": 4.655952677014449e-05,
      "loss": 0.9713,
      "step": 12335
    },
    {
      "epoch": 0.7118135671435164,
      "grad_norm": 0.30471786856651306,
      "learning_rate": 4.647445694789032e-05,
      "loss": 0.9294,
      "step": 12340
    },
    {
      "epoch": 0.7121019843101062,
      "grad_norm": 0.2840113937854767,
      "learning_rate": 4.638944138069672e-05,
      "loss": 0.9443,
      "step": 12345
    },
    {
      "epoch": 0.7123904014766959,
      "grad_norm": 0.3151952028274536,
      "learning_rate": 4.630448015473794e-05,
      "loss": 0.9651,
      "step": 12350
    },
    {
      "epoch": 0.7126788186432856,
      "grad_norm": 0.28496062755584717,
      "learning_rate": 4.621957335613316e-05,
      "loss": 0.9156,
      "step": 12355
    },
    {
      "epoch": 0.7129672358098754,
      "grad_norm": 0.27828526496887207,
      "learning_rate": 4.613472107094641e-05,
      "loss": 0.9541,
      "step": 12360
    },
    {
      "epoch": 0.7132556529764652,
      "grad_norm": 0.320968896150589,
      "learning_rate": 4.6049923385186414e-05,
      "loss": 1.021,
      "step": 12365
    },
    {
      "epoch": 0.7135440701430549,
      "grad_norm": 0.3339998424053192,
      "learning_rate": 4.596518038480667e-05,
      "loss": 1.0306,
      "step": 12370
    },
    {
      "epoch": 0.7138324873096447,
      "grad_norm": 0.29373204708099365,
      "learning_rate": 4.5880492155705134e-05,
      "loss": 0.9256,
      "step": 12375
    },
    {
      "epoch": 0.7141209044762344,
      "grad_norm": 0.28952670097351074,
      "learning_rate": 4.579585878372428e-05,
      "loss": 0.9118,
      "step": 12380
    },
    {
      "epoch": 0.7144093216428242,
      "grad_norm": 0.31708285212516785,
      "learning_rate": 4.571128035465096e-05,
      "loss": 0.9585,
      "step": 12385
    },
    {
      "epoch": 0.7146977388094139,
      "grad_norm": 0.27138781547546387,
      "learning_rate": 4.562675695421634e-05,
      "loss": 0.9503,
      "step": 12390
    },
    {
      "epoch": 0.7149861559760037,
      "grad_norm": 0.29679614305496216,
      "learning_rate": 4.554228866809591e-05,
      "loss": 1.0058,
      "step": 12395
    },
    {
      "epoch": 0.7152745731425935,
      "grad_norm": 0.30243879556655884,
      "learning_rate": 4.545787558190907e-05,
      "loss": 0.8985,
      "step": 12400
    },
    {
      "epoch": 0.7155629903091832,
      "grad_norm": 0.2855212986469269,
      "learning_rate": 4.53735177812195e-05,
      "loss": 1.0001,
      "step": 12405
    },
    {
      "epoch": 0.7158514074757729,
      "grad_norm": 0.29729998111724854,
      "learning_rate": 4.5289215351534666e-05,
      "loss": 0.9448,
      "step": 12410
    },
    {
      "epoch": 0.7161398246423627,
      "grad_norm": 0.2747268080711365,
      "learning_rate": 4.520496837830609e-05,
      "loss": 0.9207,
      "step": 12415
    },
    {
      "epoch": 0.7164282418089525,
      "grad_norm": 0.2757469117641449,
      "learning_rate": 4.512077694692888e-05,
      "loss": 0.9151,
      "step": 12420
    },
    {
      "epoch": 0.7167166589755423,
      "grad_norm": 0.2848719358444214,
      "learning_rate": 4.503664114274193e-05,
      "loss": 0.9135,
      "step": 12425
    },
    {
      "epoch": 0.7170050761421319,
      "grad_norm": 0.26633986830711365,
      "learning_rate": 4.495256105102784e-05,
      "loss": 0.9802,
      "step": 12430
    },
    {
      "epoch": 0.7172934933087217,
      "grad_norm": 0.29793140292167664,
      "learning_rate": 4.4868536757012614e-05,
      "loss": 0.9695,
      "step": 12435
    },
    {
      "epoch": 0.7175819104753115,
      "grad_norm": 0.2670263648033142,
      "learning_rate": 4.478456834586574e-05,
      "loss": 0.9149,
      "step": 12440
    },
    {
      "epoch": 0.7178703276419013,
      "grad_norm": 0.35273703932762146,
      "learning_rate": 4.4700655902700075e-05,
      "loss": 0.9942,
      "step": 12445
    },
    {
      "epoch": 0.718158744808491,
      "grad_norm": 0.29049214720726013,
      "learning_rate": 4.4616799512571675e-05,
      "loss": 0.9267,
      "step": 12450
    },
    {
      "epoch": 0.7184471619750807,
      "grad_norm": 0.2781081199645996,
      "learning_rate": 4.453299926047997e-05,
      "loss": 0.9169,
      "step": 12455
    },
    {
      "epoch": 0.7187355791416705,
      "grad_norm": 0.315525084733963,
      "learning_rate": 4.4449255231367183e-05,
      "loss": 1.0033,
      "step": 12460
    },
    {
      "epoch": 0.7190239963082603,
      "grad_norm": 0.2818520665168762,
      "learning_rate": 4.4365567510118854e-05,
      "loss": 0.9783,
      "step": 12465
    },
    {
      "epoch": 0.71931241347485,
      "grad_norm": 0.3043684959411621,
      "learning_rate": 4.428193618156322e-05,
      "loss": 1.0024,
      "step": 12470
    },
    {
      "epoch": 0.7196008306414398,
      "grad_norm": 0.29822173714637756,
      "learning_rate": 4.419836133047157e-05,
      "loss": 1.031,
      "step": 12475
    },
    {
      "epoch": 0.7198892478080295,
      "grad_norm": 0.27869507670402527,
      "learning_rate": 4.411484304155771e-05,
      "loss": 0.9294,
      "step": 12480
    },
    {
      "epoch": 0.7201776649746193,
      "grad_norm": 0.2925718426704407,
      "learning_rate": 4.403138139947822e-05,
      "loss": 0.9644,
      "step": 12485
    },
    {
      "epoch": 0.720466082141209,
      "grad_norm": 0.2630343735218048,
      "learning_rate": 4.394797648883236e-05,
      "loss": 0.9852,
      "step": 12490
    },
    {
      "epoch": 0.7207544993077988,
      "grad_norm": 0.2998071014881134,
      "learning_rate": 4.386462839416173e-05,
      "loss": 1.0253,
      "step": 12495
    },
    {
      "epoch": 0.7210429164743886,
      "grad_norm": 0.30434897541999817,
      "learning_rate": 4.378133719995044e-05,
      "loss": 0.9224,
      "step": 12500
    },
    {
      "epoch": 0.7213313336409783,
      "grad_norm": 0.30247268080711365,
      "learning_rate": 4.3698102990624825e-05,
      "loss": 1.0018,
      "step": 12505
    },
    {
      "epoch": 0.721619750807568,
      "grad_norm": 0.283896267414093,
      "learning_rate": 4.36149258505536e-05,
      "loss": 0.9589,
      "step": 12510
    },
    {
      "epoch": 0.7219081679741578,
      "grad_norm": 0.2718851566314697,
      "learning_rate": 4.353180586404752e-05,
      "loss": 0.8899,
      "step": 12515
    },
    {
      "epoch": 0.7221965851407476,
      "grad_norm": 0.3284825384616852,
      "learning_rate": 4.344874311535944e-05,
      "loss": 0.8988,
      "step": 12520
    },
    {
      "epoch": 0.7224850023073374,
      "grad_norm": 0.29092779755592346,
      "learning_rate": 4.336573768868418e-05,
      "loss": 0.8914,
      "step": 12525
    },
    {
      "epoch": 0.722773419473927,
      "grad_norm": 0.2990238666534424,
      "learning_rate": 4.3282789668158476e-05,
      "loss": 0.9355,
      "step": 12530
    },
    {
      "epoch": 0.7230618366405168,
      "grad_norm": 0.29451605677604675,
      "learning_rate": 4.319989913786093e-05,
      "loss": 0.9247,
      "step": 12535
    },
    {
      "epoch": 0.7233502538071066,
      "grad_norm": 0.29550743103027344,
      "learning_rate": 4.31170661818118e-05,
      "loss": 0.9232,
      "step": 12540
    },
    {
      "epoch": 0.7236386709736964,
      "grad_norm": 0.3342383801937103,
      "learning_rate": 4.303429088397293e-05,
      "loss": 0.9329,
      "step": 12545
    },
    {
      "epoch": 0.7239270881402861,
      "grad_norm": 0.28196507692337036,
      "learning_rate": 4.295157332824785e-05,
      "loss": 0.9368,
      "step": 12550
    },
    {
      "epoch": 0.7242155053068758,
      "grad_norm": 0.29440298676490784,
      "learning_rate": 4.2868913598481464e-05,
      "loss": 0.9863,
      "step": 12555
    },
    {
      "epoch": 0.7245039224734656,
      "grad_norm": 0.28593847155570984,
      "learning_rate": 4.27863117784602e-05,
      "loss": 0.9481,
      "step": 12560
    },
    {
      "epoch": 0.7247923396400554,
      "grad_norm": 0.2748074233531952,
      "learning_rate": 4.270376795191152e-05,
      "loss": 0.9292,
      "step": 12565
    },
    {
      "epoch": 0.7250807568066451,
      "grad_norm": 0.3184608817100525,
      "learning_rate": 4.262128220250441e-05,
      "loss": 0.9201,
      "step": 12570
    },
    {
      "epoch": 0.7253691739732349,
      "grad_norm": 0.27379128336906433,
      "learning_rate": 4.2538854613848776e-05,
      "loss": 0.923,
      "step": 12575
    },
    {
      "epoch": 0.7256575911398246,
      "grad_norm": 0.3166322410106659,
      "learning_rate": 4.245648526949567e-05,
      "loss": 0.9177,
      "step": 12580
    },
    {
      "epoch": 0.7259460083064144,
      "grad_norm": 0.3264232873916626,
      "learning_rate": 4.237417425293706e-05,
      "loss": 0.9677,
      "step": 12585
    },
    {
      "epoch": 0.7262344254730042,
      "grad_norm": 0.29863858222961426,
      "learning_rate": 4.229192164760576e-05,
      "loss": 1.0023,
      "step": 12590
    },
    {
      "epoch": 0.7265228426395939,
      "grad_norm": 0.3183000981807709,
      "learning_rate": 4.220972753687552e-05,
      "loss": 0.9889,
      "step": 12595
    },
    {
      "epoch": 0.7268112598061837,
      "grad_norm": 0.3113453686237335,
      "learning_rate": 4.212759200406065e-05,
      "loss": 0.9502,
      "step": 12600
    },
    {
      "epoch": 0.7270996769727734,
      "grad_norm": 0.3108140528202057,
      "learning_rate": 4.204551513241615e-05,
      "loss": 0.883,
      "step": 12605
    },
    {
      "epoch": 0.7273880941393632,
      "grad_norm": 0.2969469428062439,
      "learning_rate": 4.1963497005137516e-05,
      "loss": 0.9898,
      "step": 12610
    },
    {
      "epoch": 0.7276765113059529,
      "grad_norm": 0.3002479672431946,
      "learning_rate": 4.188153770536072e-05,
      "loss": 0.9679,
      "step": 12615
    },
    {
      "epoch": 0.7279649284725427,
      "grad_norm": 0.30599701404571533,
      "learning_rate": 4.179963731616221e-05,
      "loss": 0.937,
      "step": 12620
    },
    {
      "epoch": 0.7282533456391325,
      "grad_norm": 0.28612884879112244,
      "learning_rate": 4.171779592055848e-05,
      "loss": 0.9575,
      "step": 12625
    },
    {
      "epoch": 0.7285417628057222,
      "grad_norm": 0.2787284255027771,
      "learning_rate": 4.163601360150646e-05,
      "loss": 0.9889,
      "step": 12630
    },
    {
      "epoch": 0.7288301799723119,
      "grad_norm": 0.3069520890712738,
      "learning_rate": 4.155429044190311e-05,
      "loss": 0.9688,
      "step": 12635
    },
    {
      "epoch": 0.7291185971389017,
      "grad_norm": 0.26980647444725037,
      "learning_rate": 4.147262652458539e-05,
      "loss": 0.9243,
      "step": 12640
    },
    {
      "epoch": 0.7294070143054915,
      "grad_norm": 0.29087504744529724,
      "learning_rate": 4.139102193233025e-05,
      "loss": 0.977,
      "step": 12645
    },
    {
      "epoch": 0.7296954314720813,
      "grad_norm": 0.286493182182312,
      "learning_rate": 4.130947674785447e-05,
      "loss": 0.9924,
      "step": 12650
    },
    {
      "epoch": 0.7299838486386709,
      "grad_norm": 0.3031412661075592,
      "learning_rate": 4.1227991053814694e-05,
      "loss": 0.934,
      "step": 12655
    },
    {
      "epoch": 0.7302722658052607,
      "grad_norm": 0.2925845980644226,
      "learning_rate": 4.114656493280721e-05,
      "loss": 0.9979,
      "step": 12660
    },
    {
      "epoch": 0.7305606829718505,
      "grad_norm": 0.3840329349040985,
      "learning_rate": 4.106519846736788e-05,
      "loss": 1.0479,
      "step": 12665
    },
    {
      "epoch": 0.7308491001384403,
      "grad_norm": 0.2865562438964844,
      "learning_rate": 4.098389173997218e-05,
      "loss": 0.9629,
      "step": 12670
    },
    {
      "epoch": 0.73113751730503,
      "grad_norm": 0.270366907119751,
      "learning_rate": 4.090264483303493e-05,
      "loss": 0.9989,
      "step": 12675
    },
    {
      "epoch": 0.7314259344716197,
      "grad_norm": 0.27948349714279175,
      "learning_rate": 4.082145782891046e-05,
      "loss": 0.9558,
      "step": 12680
    },
    {
      "epoch": 0.7317143516382095,
      "grad_norm": 0.27884334325790405,
      "learning_rate": 4.074033080989227e-05,
      "loss": 0.9059,
      "step": 12685
    },
    {
      "epoch": 0.7320027688047993,
      "grad_norm": 0.30114471912384033,
      "learning_rate": 4.065926385821307e-05,
      "loss": 0.9401,
      "step": 12690
    },
    {
      "epoch": 0.732291185971389,
      "grad_norm": 0.2634121775627136,
      "learning_rate": 4.057825705604468e-05,
      "loss": 0.9164,
      "step": 12695
    },
    {
      "epoch": 0.7325796031379788,
      "grad_norm": 0.2946275472640991,
      "learning_rate": 4.049731048549804e-05,
      "loss": 0.9005,
      "step": 12700
    },
    {
      "epoch": 0.7328680203045685,
      "grad_norm": 0.28885963559150696,
      "learning_rate": 4.041642422862295e-05,
      "loss": 0.917,
      "step": 12705
    },
    {
      "epoch": 0.7331564374711583,
      "grad_norm": 0.28542929887771606,
      "learning_rate": 4.033559836740801e-05,
      "loss": 1.0206,
      "step": 12710
    },
    {
      "epoch": 0.733444854637748,
      "grad_norm": 0.2643488943576813,
      "learning_rate": 4.0254832983780786e-05,
      "loss": 0.9287,
      "step": 12715
    },
    {
      "epoch": 0.7337332718043378,
      "grad_norm": 0.28975099325180054,
      "learning_rate": 4.017412815960735e-05,
      "loss": 0.8696,
      "step": 12720
    },
    {
      "epoch": 0.7340216889709276,
      "grad_norm": 0.2887983024120331,
      "learning_rate": 4.0093483976692616e-05,
      "loss": 1.0434,
      "step": 12725
    },
    {
      "epoch": 0.7343101061375173,
      "grad_norm": 0.295878529548645,
      "learning_rate": 4.001290051677975e-05,
      "loss": 0.9562,
      "step": 12730
    },
    {
      "epoch": 0.734598523304107,
      "grad_norm": 0.29598087072372437,
      "learning_rate": 3.993237786155055e-05,
      "loss": 1.0612,
      "step": 12735
    },
    {
      "epoch": 0.7348869404706968,
      "grad_norm": 0.29324209690093994,
      "learning_rate": 3.985191609262519e-05,
      "loss": 0.877,
      "step": 12740
    },
    {
      "epoch": 0.7351753576372866,
      "grad_norm": 0.26347586512565613,
      "learning_rate": 3.9771515291562033e-05,
      "loss": 0.9648,
      "step": 12745
    },
    {
      "epoch": 0.7354637748038764,
      "grad_norm": 0.3339488208293915,
      "learning_rate": 3.969117553985772e-05,
      "loss": 0.9263,
      "step": 12750
    },
    {
      "epoch": 0.735752191970466,
      "grad_norm": 0.2952229678630829,
      "learning_rate": 3.961089691894692e-05,
      "loss": 0.9163,
      "step": 12755
    },
    {
      "epoch": 0.7360406091370558,
      "grad_norm": 0.27959832549095154,
      "learning_rate": 3.9530679510202476e-05,
      "loss": 0.9824,
      "step": 12760
    },
    {
      "epoch": 0.7363290263036456,
      "grad_norm": 0.2998709976673126,
      "learning_rate": 3.945052339493507e-05,
      "loss": 0.9003,
      "step": 12765
    },
    {
      "epoch": 0.7366174434702354,
      "grad_norm": 0.29380345344543457,
      "learning_rate": 3.9370428654393296e-05,
      "loss": 0.9567,
      "step": 12770
    },
    {
      "epoch": 0.7369058606368251,
      "grad_norm": 0.313002347946167,
      "learning_rate": 3.929039536976353e-05,
      "loss": 0.8868,
      "step": 12775
    },
    {
      "epoch": 0.7371942778034148,
      "grad_norm": 0.2953389883041382,
      "learning_rate": 3.921042362216983e-05,
      "loss": 1.0127,
      "step": 12780
    },
    {
      "epoch": 0.7374826949700046,
      "grad_norm": 0.3077605366706848,
      "learning_rate": 3.913051349267399e-05,
      "loss": 0.9811,
      "step": 12785
    },
    {
      "epoch": 0.7377711121365944,
      "grad_norm": 0.30430668592453003,
      "learning_rate": 3.905066506227515e-05,
      "loss": 0.963,
      "step": 12790
    },
    {
      "epoch": 0.7380595293031841,
      "grad_norm": 0.4236873388290405,
      "learning_rate": 3.897087841191009e-05,
      "loss": 0.9818,
      "step": 12795
    },
    {
      "epoch": 0.7383479464697739,
      "grad_norm": 0.3383219242095947,
      "learning_rate": 3.8891153622452904e-05,
      "loss": 0.9542,
      "step": 12800
    },
    {
      "epoch": 0.7386363636363636,
      "grad_norm": 0.2933284640312195,
      "learning_rate": 3.881149077471495e-05,
      "loss": 0.937,
      "step": 12805
    },
    {
      "epoch": 0.7389247808029534,
      "grad_norm": 0.2885740101337433,
      "learning_rate": 3.873188994944483e-05,
      "loss": 0.9057,
      "step": 12810
    },
    {
      "epoch": 0.7392131979695431,
      "grad_norm": 0.3335943818092346,
      "learning_rate": 3.8652351227328256e-05,
      "loss": 0.8971,
      "step": 12815
    },
    {
      "epoch": 0.7395016151361329,
      "grad_norm": 0.34636521339416504,
      "learning_rate": 3.857287468898806e-05,
      "loss": 0.9614,
      "step": 12820
    },
    {
      "epoch": 0.7397900323027227,
      "grad_norm": 0.3440823256969452,
      "learning_rate": 3.8493460414983976e-05,
      "loss": 0.9456,
      "step": 12825
    },
    {
      "epoch": 0.7400784494693124,
      "grad_norm": 0.2916651666164398,
      "learning_rate": 3.8414108485812613e-05,
      "loss": 0.9229,
      "step": 12830
    },
    {
      "epoch": 0.7403668666359021,
      "grad_norm": 0.2580519914627075,
      "learning_rate": 3.833481898190745e-05,
      "loss": 0.892,
      "step": 12835
    },
    {
      "epoch": 0.7406552838024919,
      "grad_norm": 0.28569847345352173,
      "learning_rate": 3.825559198363861e-05,
      "loss": 0.9541,
      "step": 12840
    },
    {
      "epoch": 0.7409437009690817,
      "grad_norm": 0.28932294249534607,
      "learning_rate": 3.8176427571312945e-05,
      "loss": 1.0493,
      "step": 12845
    },
    {
      "epoch": 0.7412321181356715,
      "grad_norm": 0.3193461000919342,
      "learning_rate": 3.8097325825173826e-05,
      "loss": 1.0026,
      "step": 12850
    },
    {
      "epoch": 0.7415205353022611,
      "grad_norm": 0.26970720291137695,
      "learning_rate": 3.801828682540107e-05,
      "loss": 0.8857,
      "step": 12855
    },
    {
      "epoch": 0.7418089524688509,
      "grad_norm": 0.2908310890197754,
      "learning_rate": 3.793931065211096e-05,
      "loss": 0.974,
      "step": 12860
    },
    {
      "epoch": 0.7420973696354407,
      "grad_norm": 0.28948134183883667,
      "learning_rate": 3.7860397385356006e-05,
      "loss": 0.9374,
      "step": 12865
    },
    {
      "epoch": 0.7423857868020305,
      "grad_norm": 0.3087465763092041,
      "learning_rate": 3.778154710512512e-05,
      "loss": 0.9918,
      "step": 12870
    },
    {
      "epoch": 0.7426742039686202,
      "grad_norm": 0.28556784987449646,
      "learning_rate": 3.770275989134312e-05,
      "loss": 0.9906,
      "step": 12875
    },
    {
      "epoch": 0.7429626211352099,
      "grad_norm": 0.30968794226646423,
      "learning_rate": 3.762403582387114e-05,
      "loss": 0.967,
      "step": 12880
    },
    {
      "epoch": 0.7432510383017997,
      "grad_norm": 0.30540212988853455,
      "learning_rate": 3.754537498250617e-05,
      "loss": 0.9226,
      "step": 12885
    },
    {
      "epoch": 0.7435394554683895,
      "grad_norm": 0.29100221395492554,
      "learning_rate": 3.746677744698114e-05,
      "loss": 0.9921,
      "step": 12890
    },
    {
      "epoch": 0.7438278726349792,
      "grad_norm": 0.26872479915618896,
      "learning_rate": 3.738824329696483e-05,
      "loss": 0.9524,
      "step": 12895
    },
    {
      "epoch": 0.744116289801569,
      "grad_norm": 0.2934696674346924,
      "learning_rate": 3.730977261206171e-05,
      "loss": 0.9088,
      "step": 12900
    },
    {
      "epoch": 0.7444047069681587,
      "grad_norm": 0.31584224104881287,
      "learning_rate": 3.723136547181205e-05,
      "loss": 1.0156,
      "step": 12905
    },
    {
      "epoch": 0.7446931241347485,
      "grad_norm": 0.2779346704483032,
      "learning_rate": 3.715302195569159e-05,
      "loss": 0.9704,
      "step": 12910
    },
    {
      "epoch": 0.7449815413013382,
      "grad_norm": 0.30584028363227844,
      "learning_rate": 3.7074742143111604e-05,
      "loss": 0.9977,
      "step": 12915
    },
    {
      "epoch": 0.745269958467928,
      "grad_norm": 0.29079389572143555,
      "learning_rate": 3.69965261134188e-05,
      "loss": 0.9664,
      "step": 12920
    },
    {
      "epoch": 0.7455583756345178,
      "grad_norm": 0.289608895778656,
      "learning_rate": 3.691837394589527e-05,
      "loss": 0.9142,
      "step": 12925
    },
    {
      "epoch": 0.7458467928011075,
      "grad_norm": 0.294889360666275,
      "learning_rate": 3.684028571975836e-05,
      "loss": 0.8972,
      "step": 12930
    },
    {
      "epoch": 0.7461352099676973,
      "grad_norm": 0.2971971035003662,
      "learning_rate": 3.6762261514160504e-05,
      "loss": 0.9482,
      "step": 12935
    },
    {
      "epoch": 0.746423627134287,
      "grad_norm": 0.28286072611808777,
      "learning_rate": 3.6684301408189406e-05,
      "loss": 0.9655,
      "step": 12940
    },
    {
      "epoch": 0.7467120443008768,
      "grad_norm": 0.2943524420261383,
      "learning_rate": 3.660640548086765e-05,
      "loss": 0.9522,
      "step": 12945
    },
    {
      "epoch": 0.7470004614674666,
      "grad_norm": 0.2993481457233429,
      "learning_rate": 3.652857381115293e-05,
      "loss": 0.9377,
      "step": 12950
    },
    {
      "epoch": 0.7472888786340564,
      "grad_norm": 0.3094017803668976,
      "learning_rate": 3.6450806477937625e-05,
      "loss": 0.9582,
      "step": 12955
    },
    {
      "epoch": 0.747577295800646,
      "grad_norm": 0.29749640822410583,
      "learning_rate": 3.637310356004897e-05,
      "loss": 0.9853,
      "step": 12960
    },
    {
      "epoch": 0.7478657129672358,
      "grad_norm": 0.2731906771659851,
      "learning_rate": 3.6295465136249006e-05,
      "loss": 0.9099,
      "step": 12965
    },
    {
      "epoch": 0.7481541301338256,
      "grad_norm": 0.30480098724365234,
      "learning_rate": 3.6217891285234265e-05,
      "loss": 0.9377,
      "step": 12970
    },
    {
      "epoch": 0.7484425473004154,
      "grad_norm": 0.3027295172214508,
      "learning_rate": 3.614038208563588e-05,
      "loss": 0.9813,
      "step": 12975
    },
    {
      "epoch": 0.748730964467005,
      "grad_norm": 0.30749472975730896,
      "learning_rate": 3.6062937616019433e-05,
      "loss": 0.96,
      "step": 12980
    },
    {
      "epoch": 0.7490193816335948,
      "grad_norm": 0.28358033299446106,
      "learning_rate": 3.598555795488496e-05,
      "loss": 0.9461,
      "step": 12985
    },
    {
      "epoch": 0.7493077988001846,
      "grad_norm": 0.28506404161453247,
      "learning_rate": 3.5908243180666734e-05,
      "loss": 0.9203,
      "step": 12990
    },
    {
      "epoch": 0.7495962159667744,
      "grad_norm": 0.27330875396728516,
      "learning_rate": 3.5830993371733254e-05,
      "loss": 0.9645,
      "step": 12995
    },
    {
      "epoch": 0.7498846331333641,
      "grad_norm": 0.2925858795642853,
      "learning_rate": 3.5753808606387226e-05,
      "loss": 0.8993,
      "step": 13000
    },
    {
      "epoch": 0.7501730502999538,
      "grad_norm": 0.2975315749645233,
      "learning_rate": 3.5676688962865344e-05,
      "loss": 0.9683,
      "step": 13005
    },
    {
      "epoch": 0.7504614674665436,
      "grad_norm": 0.25685515999794006,
      "learning_rate": 3.55996345193384e-05,
      "loss": 0.8821,
      "step": 13010
    },
    {
      "epoch": 0.7507498846331334,
      "grad_norm": 0.2893523871898651,
      "learning_rate": 3.5522645353911013e-05,
      "loss": 0.9397,
      "step": 13015
    },
    {
      "epoch": 0.7510383017997231,
      "grad_norm": 0.2877350449562073,
      "learning_rate": 3.544572154462165e-05,
      "loss": 0.9481,
      "step": 13020
    },
    {
      "epoch": 0.7513267189663129,
      "grad_norm": 0.4357761740684509,
      "learning_rate": 3.5368863169442556e-05,
      "loss": 0.9335,
      "step": 13025
    },
    {
      "epoch": 0.7516151361329027,
      "grad_norm": 0.3034375309944153,
      "learning_rate": 3.529207030627959e-05,
      "loss": 0.9487,
      "step": 13030
    },
    {
      "epoch": 0.7519035532994924,
      "grad_norm": 0.3313092291355133,
      "learning_rate": 3.5215343032972356e-05,
      "loss": 1.0163,
      "step": 13035
    },
    {
      "epoch": 0.7521919704660821,
      "grad_norm": 0.3019075393676758,
      "learning_rate": 3.513868142729373e-05,
      "loss": 0.9449,
      "step": 13040
    },
    {
      "epoch": 0.7524803876326719,
      "grad_norm": 0.2984979450702667,
      "learning_rate": 3.506208556695028e-05,
      "loss": 0.9403,
      "step": 13045
    },
    {
      "epoch": 0.7527688047992617,
      "grad_norm": 0.299967885017395,
      "learning_rate": 3.498555552958176e-05,
      "loss": 1.0028,
      "step": 13050
    },
    {
      "epoch": 0.7530572219658515,
      "grad_norm": 0.2753196954727173,
      "learning_rate": 3.490909139276127e-05,
      "loss": 0.8964,
      "step": 13055
    },
    {
      "epoch": 0.7533456391324411,
      "grad_norm": 0.3142189681529999,
      "learning_rate": 3.483269323399512e-05,
      "loss": 0.9908,
      "step": 13060
    },
    {
      "epoch": 0.7536340562990309,
      "grad_norm": 0.28006476163864136,
      "learning_rate": 3.475636113072266e-05,
      "loss": 0.9061,
      "step": 13065
    },
    {
      "epoch": 0.7539224734656207,
      "grad_norm": 0.3137742578983307,
      "learning_rate": 3.468009516031644e-05,
      "loss": 0.9464,
      "step": 13070
    },
    {
      "epoch": 0.7542108906322105,
      "grad_norm": 0.3132518231868744,
      "learning_rate": 3.4603895400081846e-05,
      "loss": 0.8994,
      "step": 13075
    },
    {
      "epoch": 0.7544993077988001,
      "grad_norm": 0.27635499835014343,
      "learning_rate": 3.452776192725717e-05,
      "loss": 0.9249,
      "step": 13080
    },
    {
      "epoch": 0.7547877249653899,
      "grad_norm": 0.29366254806518555,
      "learning_rate": 3.445169481901357e-05,
      "loss": 0.9538,
      "step": 13085
    },
    {
      "epoch": 0.7550761421319797,
      "grad_norm": 0.2829080820083618,
      "learning_rate": 3.437569415245483e-05,
      "loss": 1.0516,
      "step": 13090
    },
    {
      "epoch": 0.7553645592985695,
      "grad_norm": 0.30424532294273376,
      "learning_rate": 3.4299760004617573e-05,
      "loss": 0.934,
      "step": 13095
    },
    {
      "epoch": 0.7556529764651592,
      "grad_norm": 0.30195289850234985,
      "learning_rate": 3.422389245247075e-05,
      "loss": 0.9592,
      "step": 13100
    },
    {
      "epoch": 0.755941393631749,
      "grad_norm": 0.3074832856655121,
      "learning_rate": 3.414809157291603e-05,
      "loss": 1.0067,
      "step": 13105
    },
    {
      "epoch": 0.7562298107983387,
      "grad_norm": 0.2848927974700928,
      "learning_rate": 3.407235744278734e-05,
      "loss": 0.8393,
      "step": 13110
    },
    {
      "epoch": 0.7565182279649285,
      "grad_norm": 0.29484933614730835,
      "learning_rate": 3.3996690138851115e-05,
      "loss": 0.9396,
      "step": 13115
    },
    {
      "epoch": 0.7568066451315182,
      "grad_norm": 0.30370378494262695,
      "learning_rate": 3.3921089737805866e-05,
      "loss": 0.9017,
      "step": 13120
    },
    {
      "epoch": 0.757095062298108,
      "grad_norm": 0.30583134293556213,
      "learning_rate": 3.384555631628236e-05,
      "loss": 0.9263,
      "step": 13125
    },
    {
      "epoch": 0.7573834794646978,
      "grad_norm": 0.27707552909851074,
      "learning_rate": 3.3770089950843564e-05,
      "loss": 0.9476,
      "step": 13130
    },
    {
      "epoch": 0.7576718966312875,
      "grad_norm": 0.31780895590782166,
      "learning_rate": 3.3694690717984354e-05,
      "loss": 0.9377,
      "step": 13135
    },
    {
      "epoch": 0.7579603137978772,
      "grad_norm": 0.3167707026004791,
      "learning_rate": 3.361935869413163e-05,
      "loss": 0.9783,
      "step": 13140
    },
    {
      "epoch": 0.758248730964467,
      "grad_norm": 0.30130988359451294,
      "learning_rate": 3.354409395564412e-05,
      "loss": 1.0485,
      "step": 13145
    },
    {
      "epoch": 0.7585371481310568,
      "grad_norm": 0.30067166686058044,
      "learning_rate": 3.3468896578812344e-05,
      "loss": 0.955,
      "step": 13150
    },
    {
      "epoch": 0.7588255652976466,
      "grad_norm": 0.27905625104904175,
      "learning_rate": 3.3393766639858635e-05,
      "loss": 0.9141,
      "step": 13155
    },
    {
      "epoch": 0.7591139824642362,
      "grad_norm": 0.27241066098213196,
      "learning_rate": 3.331870421493688e-05,
      "loss": 0.9288,
      "step": 13160
    },
    {
      "epoch": 0.759402399630826,
      "grad_norm": 0.34241122007369995,
      "learning_rate": 3.324370938013252e-05,
      "loss": 1.0194,
      "step": 13165
    },
    {
      "epoch": 0.7596908167974158,
      "grad_norm": 0.3029732406139374,
      "learning_rate": 3.316878221146253e-05,
      "loss": 1.0148,
      "step": 13170
    },
    {
      "epoch": 0.7599792339640056,
      "grad_norm": 0.30240458250045776,
      "learning_rate": 3.3093922784875344e-05,
      "loss": 0.9751,
      "step": 13175
    },
    {
      "epoch": 0.7602676511305952,
      "grad_norm": 0.2856149673461914,
      "learning_rate": 3.301913117625065e-05,
      "loss": 0.9089,
      "step": 13180
    },
    {
      "epoch": 0.760556068297185,
      "grad_norm": 0.29663828015327454,
      "learning_rate": 3.2944407461399326e-05,
      "loss": 0.9934,
      "step": 13185
    },
    {
      "epoch": 0.7608444854637748,
      "grad_norm": 0.33323925733566284,
      "learning_rate": 3.286975171606362e-05,
      "loss": 0.92,
      "step": 13190
    },
    {
      "epoch": 0.7611329026303646,
      "grad_norm": 0.3022535741329193,
      "learning_rate": 3.279516401591677e-05,
      "loss": 0.9502,
      "step": 13195
    },
    {
      "epoch": 0.7614213197969543,
      "grad_norm": 0.29512476921081543,
      "learning_rate": 3.272064443656303e-05,
      "loss": 0.9373,
      "step": 13200
    },
    {
      "epoch": 0.761709736963544,
      "grad_norm": 0.2822323441505432,
      "learning_rate": 3.264619305353762e-05,
      "loss": 0.9714,
      "step": 13205
    },
    {
      "epoch": 0.7619981541301338,
      "grad_norm": 0.27018293738365173,
      "learning_rate": 3.257180994230671e-05,
      "loss": 0.9341,
      "step": 13210
    },
    {
      "epoch": 0.7622865712967236,
      "grad_norm": 0.2699947655200958,
      "learning_rate": 3.249749517826715e-05,
      "loss": 0.9173,
      "step": 13215
    },
    {
      "epoch": 0.7625749884633133,
      "grad_norm": 0.38015538454055786,
      "learning_rate": 3.2423248836746575e-05,
      "loss": 0.953,
      "step": 13220
    },
    {
      "epoch": 0.7628634056299031,
      "grad_norm": 0.293496698141098,
      "learning_rate": 3.234907099300327e-05,
      "loss": 0.9636,
      "step": 13225
    },
    {
      "epoch": 0.7631518227964929,
      "grad_norm": 0.3389447331428528,
      "learning_rate": 3.227496172222603e-05,
      "loss": 0.954,
      "step": 13230
    },
    {
      "epoch": 0.7634402399630826,
      "grad_norm": 0.2947312891483307,
      "learning_rate": 3.220092109953424e-05,
      "loss": 0.9586,
      "step": 13235
    },
    {
      "epoch": 0.7637286571296723,
      "grad_norm": 0.2841379940509796,
      "learning_rate": 3.212694919997764e-05,
      "loss": 1.0088,
      "step": 13240
    },
    {
      "epoch": 0.7640170742962621,
      "grad_norm": 0.30646151304244995,
      "learning_rate": 3.205304609853629e-05,
      "loss": 0.9978,
      "step": 13245
    },
    {
      "epoch": 0.7643054914628519,
      "grad_norm": 0.2783312201499939,
      "learning_rate": 3.197921187012055e-05,
      "loss": 0.9793,
      "step": 13250
    },
    {
      "epoch": 0.7645939086294417,
      "grad_norm": 0.267666757106781,
      "learning_rate": 3.190544658957094e-05,
      "loss": 0.9076,
      "step": 13255
    },
    {
      "epoch": 0.7648823257960313,
      "grad_norm": 0.26863396167755127,
      "learning_rate": 3.1831750331658196e-05,
      "loss": 0.8923,
      "step": 13260
    },
    {
      "epoch": 0.7651707429626211,
      "grad_norm": 0.29262346029281616,
      "learning_rate": 3.1758123171082874e-05,
      "loss": 0.9693,
      "step": 13265
    },
    {
      "epoch": 0.7654591601292109,
      "grad_norm": 0.30880671739578247,
      "learning_rate": 3.168456518247575e-05,
      "loss": 0.9445,
      "step": 13270
    },
    {
      "epoch": 0.7657475772958007,
      "grad_norm": 0.3033742308616638,
      "learning_rate": 3.161107644039728e-05,
      "loss": 0.9311,
      "step": 13275
    },
    {
      "epoch": 0.7660359944623903,
      "grad_norm": 0.2817222774028778,
      "learning_rate": 3.153765701933784e-05,
      "loss": 0.9141,
      "step": 13280
    },
    {
      "epoch": 0.7663244116289801,
      "grad_norm": 0.29499107599258423,
      "learning_rate": 3.1464306993717505e-05,
      "loss": 1.0122,
      "step": 13285
    },
    {
      "epoch": 0.7666128287955699,
      "grad_norm": 0.3005344569683075,
      "learning_rate": 3.1391026437885984e-05,
      "loss": 0.9728,
      "step": 13290
    },
    {
      "epoch": 0.7669012459621597,
      "grad_norm": 0.32257968187332153,
      "learning_rate": 3.1317815426122646e-05,
      "loss": 0.9155,
      "step": 13295
    },
    {
      "epoch": 0.7671896631287495,
      "grad_norm": 0.3038580119609833,
      "learning_rate": 3.12446740326363e-05,
      "loss": 1.0097,
      "step": 13300
    },
    {
      "epoch": 0.7674780802953391,
      "grad_norm": 0.30271029472351074,
      "learning_rate": 3.117160233156521e-05,
      "loss": 0.9174,
      "step": 13305
    },
    {
      "epoch": 0.7677664974619289,
      "grad_norm": 0.29580479860305786,
      "learning_rate": 3.109860039697699e-05,
      "loss": 1.0233,
      "step": 13310
    },
    {
      "epoch": 0.7680549146285187,
      "grad_norm": 0.3255249857902527,
      "learning_rate": 3.1025668302868505e-05,
      "loss": 1.0239,
      "step": 13315
    },
    {
      "epoch": 0.7683433317951085,
      "grad_norm": 0.2764644920825958,
      "learning_rate": 3.0952806123165945e-05,
      "loss": 0.906,
      "step": 13320
    },
    {
      "epoch": 0.7686317489616982,
      "grad_norm": 0.29159998893737793,
      "learning_rate": 3.0880013931724503e-05,
      "loss": 0.9322,
      "step": 13325
    },
    {
      "epoch": 0.768920166128288,
      "grad_norm": 0.2871926426887512,
      "learning_rate": 3.0807291802328494e-05,
      "loss": 0.9471,
      "step": 13330
    },
    {
      "epoch": 0.7692085832948777,
      "grad_norm": 0.2964244484901428,
      "learning_rate": 3.073463980869117e-05,
      "loss": 0.9872,
      "step": 13335
    },
    {
      "epoch": 0.7694970004614675,
      "grad_norm": 0.2808452844619751,
      "learning_rate": 3.066205802445477e-05,
      "loss": 0.9309,
      "step": 13340
    },
    {
      "epoch": 0.7697854176280572,
      "grad_norm": 0.3023243248462677,
      "learning_rate": 3.0589546523190325e-05,
      "loss": 0.9376,
      "step": 13345
    },
    {
      "epoch": 0.770073834794647,
      "grad_norm": 0.28334978222846985,
      "learning_rate": 3.0517105378397536e-05,
      "loss": 0.9403,
      "step": 13350
    },
    {
      "epoch": 0.7703622519612368,
      "grad_norm": 0.29692623019218445,
      "learning_rate": 3.044473466350496e-05,
      "loss": 0.9514,
      "step": 13355
    },
    {
      "epoch": 0.7706506691278265,
      "grad_norm": 0.28074130415916443,
      "learning_rate": 3.037243445186965e-05,
      "loss": 0.875,
      "step": 13360
    },
    {
      "epoch": 0.7709390862944162,
      "grad_norm": 0.2846575081348419,
      "learning_rate": 3.030020481677721e-05,
      "loss": 0.9103,
      "step": 13365
    },
    {
      "epoch": 0.771227503461006,
      "grad_norm": 0.2886015474796295,
      "learning_rate": 3.0228045831441733e-05,
      "loss": 0.9395,
      "step": 13370
    },
    {
      "epoch": 0.7715159206275958,
      "grad_norm": 0.31642863154411316,
      "learning_rate": 3.0155957569005634e-05,
      "loss": 0.9715,
      "step": 13375
    },
    {
      "epoch": 0.7718043377941856,
      "grad_norm": 0.27962034940719604,
      "learning_rate": 3.0083940102539763e-05,
      "loss": 0.9315,
      "step": 13380
    },
    {
      "epoch": 0.7720927549607752,
      "grad_norm": 0.3114453852176666,
      "learning_rate": 3.00119935050431e-05,
      "loss": 0.9689,
      "step": 13385
    },
    {
      "epoch": 0.772381172127365,
      "grad_norm": 0.2955044209957123,
      "learning_rate": 2.994011784944284e-05,
      "loss": 0.9821,
      "step": 13390
    },
    {
      "epoch": 0.7726695892939548,
      "grad_norm": 0.2887440621852875,
      "learning_rate": 2.9868313208594212e-05,
      "loss": 0.9542,
      "step": 13395
    },
    {
      "epoch": 0.7729580064605446,
      "grad_norm": 0.30911940336227417,
      "learning_rate": 2.9796579655280576e-05,
      "loss": 0.8929,
      "step": 13400
    },
    {
      "epoch": 0.7732464236271342,
      "grad_norm": 0.2785640060901642,
      "learning_rate": 2.9724917262213157e-05,
      "loss": 0.9291,
      "step": 13405
    },
    {
      "epoch": 0.773534840793724,
      "grad_norm": 0.30418962240219116,
      "learning_rate": 2.9653326102030964e-05,
      "loss": 0.9497,
      "step": 13410
    },
    {
      "epoch": 0.7738232579603138,
      "grad_norm": 0.27844002842903137,
      "learning_rate": 2.9581806247301e-05,
      "loss": 0.9347,
      "step": 13415
    },
    {
      "epoch": 0.7741116751269036,
      "grad_norm": 0.2980695962905884,
      "learning_rate": 2.9510357770517825e-05,
      "loss": 0.9666,
      "step": 13420
    },
    {
      "epoch": 0.7744000922934933,
      "grad_norm": 0.30434754490852356,
      "learning_rate": 2.9438980744103807e-05,
      "loss": 0.996,
      "step": 13425
    },
    {
      "epoch": 0.774688509460083,
      "grad_norm": 0.28719010949134827,
      "learning_rate": 2.9367675240408654e-05,
      "loss": 1.0049,
      "step": 13430
    },
    {
      "epoch": 0.7749769266266728,
      "grad_norm": 0.30200961232185364,
      "learning_rate": 2.9296441331709823e-05,
      "loss": 0.9612,
      "step": 13435
    },
    {
      "epoch": 0.7752653437932626,
      "grad_norm": 0.27808678150177,
      "learning_rate": 2.9225279090212067e-05,
      "loss": 0.9418,
      "step": 13440
    },
    {
      "epoch": 0.7755537609598523,
      "grad_norm": 0.29861539602279663,
      "learning_rate": 2.9154188588047504e-05,
      "loss": 0.8939,
      "step": 13445
    },
    {
      "epoch": 0.7758421781264421,
      "grad_norm": 0.29694679379463196,
      "learning_rate": 2.9083169897275552e-05,
      "loss": 0.9092,
      "step": 13450
    },
    {
      "epoch": 0.7761305952930319,
      "grad_norm": 0.30668821930885315,
      "learning_rate": 2.901222308988283e-05,
      "loss": 1.0086,
      "step": 13455
    },
    {
      "epoch": 0.7764190124596216,
      "grad_norm": 0.2928648591041565,
      "learning_rate": 2.894134823778315e-05,
      "loss": 0.9638,
      "step": 13460
    },
    {
      "epoch": 0.7767074296262113,
      "grad_norm": 0.2744688391685486,
      "learning_rate": 2.8870545412817306e-05,
      "loss": 0.9842,
      "step": 13465
    },
    {
      "epoch": 0.7769958467928011,
      "grad_norm": 0.313619464635849,
      "learning_rate": 2.8799814686753134e-05,
      "loss": 0.9996,
      "step": 13470
    },
    {
      "epoch": 0.7772842639593909,
      "grad_norm": 0.27718332409858704,
      "learning_rate": 2.8729156131285362e-05,
      "loss": 0.8795,
      "step": 13475
    },
    {
      "epoch": 0.7775726811259807,
      "grad_norm": 0.2873843312263489,
      "learning_rate": 2.8658569818035542e-05,
      "loss": 0.9017,
      "step": 13480
    },
    {
      "epoch": 0.7778610982925703,
      "grad_norm": 0.27686628699302673,
      "learning_rate": 2.8588055818552096e-05,
      "loss": 0.9119,
      "step": 13485
    },
    {
      "epoch": 0.7781495154591601,
      "grad_norm": 0.3248569965362549,
      "learning_rate": 2.851761420431006e-05,
      "loss": 0.9749,
      "step": 13490
    },
    {
      "epoch": 0.7784379326257499,
      "grad_norm": 0.293582558631897,
      "learning_rate": 2.8447245046711103e-05,
      "loss": 0.9441,
      "step": 13495
    },
    {
      "epoch": 0.7787263497923397,
      "grad_norm": 0.2862909436225891,
      "learning_rate": 2.8376948417083483e-05,
      "loss": 0.9887,
      "step": 13500
    },
    {
      "epoch": 0.7790147669589293,
      "grad_norm": 0.3073969781398773,
      "learning_rate": 2.8306724386681894e-05,
      "loss": 1.0304,
      "step": 13505
    },
    {
      "epoch": 0.7793031841255191,
      "grad_norm": 0.29679036140441895,
      "learning_rate": 2.823657302668755e-05,
      "loss": 0.9249,
      "step": 13510
    },
    {
      "epoch": 0.7795916012921089,
      "grad_norm": 0.28810662031173706,
      "learning_rate": 2.8166494408207812e-05,
      "loss": 0.9222,
      "step": 13515
    },
    {
      "epoch": 0.7798800184586987,
      "grad_norm": 0.2718978226184845,
      "learning_rate": 2.8096488602276528e-05,
      "loss": 0.929,
      "step": 13520
    },
    {
      "epoch": 0.7801684356252884,
      "grad_norm": 0.3145858645439148,
      "learning_rate": 2.8026555679853594e-05,
      "loss": 0.9618,
      "step": 13525
    },
    {
      "epoch": 0.7804568527918782,
      "grad_norm": 0.28058719635009766,
      "learning_rate": 2.7956695711825075e-05,
      "loss": 1.0025,
      "step": 13530
    },
    {
      "epoch": 0.7807452699584679,
      "grad_norm": 0.28933846950531006,
      "learning_rate": 2.7886908769003074e-05,
      "loss": 0.9186,
      "step": 13535
    },
    {
      "epoch": 0.7810336871250577,
      "grad_norm": 0.29064711928367615,
      "learning_rate": 2.7817194922125666e-05,
      "loss": 0.9462,
      "step": 13540
    },
    {
      "epoch": 0.7813221042916474,
      "grad_norm": 0.29872819781303406,
      "learning_rate": 2.774755424185691e-05,
      "loss": 0.9759,
      "step": 13545
    },
    {
      "epoch": 0.7816105214582372,
      "grad_norm": 0.28359314799308777,
      "learning_rate": 2.7677986798786615e-05,
      "loss": 0.9732,
      "step": 13550
    },
    {
      "epoch": 0.781898938624827,
      "grad_norm": 0.3036571443080902,
      "learning_rate": 2.7608492663430363e-05,
      "loss": 1.0159,
      "step": 13555
    },
    {
      "epoch": 0.7821873557914167,
      "grad_norm": 0.2676851749420166,
      "learning_rate": 2.753907190622944e-05,
      "loss": 0.9785,
      "step": 13560
    },
    {
      "epoch": 0.7824757729580064,
      "grad_norm": 0.3076855540275574,
      "learning_rate": 2.7469724597550805e-05,
      "loss": 0.9082,
      "step": 13565
    },
    {
      "epoch": 0.7827641901245962,
      "grad_norm": 0.28778043389320374,
      "learning_rate": 2.7400450807686938e-05,
      "loss": 0.9618,
      "step": 13570
    },
    {
      "epoch": 0.783052607291186,
      "grad_norm": 0.29155904054641724,
      "learning_rate": 2.7331250606855695e-05,
      "loss": 0.9694,
      "step": 13575
    },
    {
      "epoch": 0.7833410244577758,
      "grad_norm": 0.27802708745002747,
      "learning_rate": 2.726212406520051e-05,
      "loss": 0.8822,
      "step": 13580
    },
    {
      "epoch": 0.7836294416243654,
      "grad_norm": 0.3171776533126831,
      "learning_rate": 2.7193071252790013e-05,
      "loss": 0.9722,
      "step": 13585
    },
    {
      "epoch": 0.7839178587909552,
      "grad_norm": 0.2921862006187439,
      "learning_rate": 2.712409223961826e-05,
      "loss": 0.9562,
      "step": 13590
    },
    {
      "epoch": 0.784206275957545,
      "grad_norm": 0.29876551032066345,
      "learning_rate": 2.7055187095604296e-05,
      "loss": 0.8812,
      "step": 13595
    },
    {
      "epoch": 0.7844946931241348,
      "grad_norm": 0.33072373270988464,
      "learning_rate": 2.698635589059242e-05,
      "loss": 1.0232,
      "step": 13600
    },
    {
      "epoch": 0.7847831102907245,
      "grad_norm": 0.3147570788860321,
      "learning_rate": 2.6917598694351998e-05,
      "loss": 0.9484,
      "step": 13605
    },
    {
      "epoch": 0.7850715274573142,
      "grad_norm": 0.27354785799980164,
      "learning_rate": 2.6848915576577317e-05,
      "loss": 0.8926,
      "step": 13610
    },
    {
      "epoch": 0.785359944623904,
      "grad_norm": 0.34007906913757324,
      "learning_rate": 2.6780306606887605e-05,
      "loss": 0.9087,
      "step": 13615
    },
    {
      "epoch": 0.7856483617904938,
      "grad_norm": 0.29760247468948364,
      "learning_rate": 2.6711771854826905e-05,
      "loss": 0.9881,
      "step": 13620
    },
    {
      "epoch": 0.7859367789570835,
      "grad_norm": 0.28661879897117615,
      "learning_rate": 2.6643311389864088e-05,
      "loss": 0.9354,
      "step": 13625
    },
    {
      "epoch": 0.7862251961236733,
      "grad_norm": 0.3026251196861267,
      "learning_rate": 2.657492528139268e-05,
      "loss": 1.0238,
      "step": 13630
    },
    {
      "epoch": 0.786513613290263,
      "grad_norm": 0.30861523747444153,
      "learning_rate": 2.6506613598730833e-05,
      "loss": 1.0146,
      "step": 13635
    },
    {
      "epoch": 0.7868020304568528,
      "grad_norm": 0.2883349061012268,
      "learning_rate": 2.643837641112128e-05,
      "loss": 0.9967,
      "step": 13640
    },
    {
      "epoch": 0.7870904476234426,
      "grad_norm": 0.2896696627140045,
      "learning_rate": 2.6370213787731214e-05,
      "loss": 0.9344,
      "step": 13645
    },
    {
      "epoch": 0.7873788647900323,
      "grad_norm": 0.2672329545021057,
      "learning_rate": 2.630212579765231e-05,
      "loss": 0.9655,
      "step": 13650
    },
    {
      "epoch": 0.7876672819566221,
      "grad_norm": 0.28561004996299744,
      "learning_rate": 2.6234112509900532e-05,
      "loss": 0.9152,
      "step": 13655
    },
    {
      "epoch": 0.7879556991232118,
      "grad_norm": 0.2793280780315399,
      "learning_rate": 2.6166173993416154e-05,
      "loss": 0.9476,
      "step": 13660
    },
    {
      "epoch": 0.7882441162898016,
      "grad_norm": 0.2930983006954193,
      "learning_rate": 2.6098310317063634e-05,
      "loss": 0.9762,
      "step": 13665
    },
    {
      "epoch": 0.7885325334563913,
      "grad_norm": 0.3089233338832855,
      "learning_rate": 2.603052154963158e-05,
      "loss": 0.9858,
      "step": 13670
    },
    {
      "epoch": 0.7888209506229811,
      "grad_norm": 0.2855857312679291,
      "learning_rate": 2.59628077598327e-05,
      "loss": 0.9528,
      "step": 13675
    },
    {
      "epoch": 0.7891093677895709,
      "grad_norm": 0.2728000283241272,
      "learning_rate": 2.5895169016303623e-05,
      "loss": 0.9513,
      "step": 13680
    },
    {
      "epoch": 0.7893977849561606,
      "grad_norm": 0.29234278202056885,
      "learning_rate": 2.5827605387605035e-05,
      "loss": 1.0129,
      "step": 13685
    },
    {
      "epoch": 0.7896862021227503,
      "grad_norm": 0.3364224433898926,
      "learning_rate": 2.576011694222139e-05,
      "loss": 0.9246,
      "step": 13690
    },
    {
      "epoch": 0.7899746192893401,
      "grad_norm": 0.30209240317344666,
      "learning_rate": 2.5692703748560932e-05,
      "loss": 0.8882,
      "step": 13695
    },
    {
      "epoch": 0.7902630364559299,
      "grad_norm": 0.31578031182289124,
      "learning_rate": 2.5625365874955674e-05,
      "loss": 1.0172,
      "step": 13700
    },
    {
      "epoch": 0.7905514536225197,
      "grad_norm": 0.31756725907325745,
      "learning_rate": 2.5558103389661214e-05,
      "loss": 1.0161,
      "step": 13705
    },
    {
      "epoch": 0.7908398707891093,
      "grad_norm": 0.28036510944366455,
      "learning_rate": 2.5490916360856853e-05,
      "loss": 1.0237,
      "step": 13710
    },
    {
      "epoch": 0.7911282879556991,
      "grad_norm": 0.2866663336753845,
      "learning_rate": 2.5423804856645307e-05,
      "loss": 0.9911,
      "step": 13715
    },
    {
      "epoch": 0.7914167051222889,
      "grad_norm": 0.3004799783229828,
      "learning_rate": 2.5356768945052745e-05,
      "loss": 0.9495,
      "step": 13720
    },
    {
      "epoch": 0.7917051222888787,
      "grad_norm": 0.30194994807243347,
      "learning_rate": 2.528980869402875e-05,
      "loss": 0.952,
      "step": 13725
    },
    {
      "epoch": 0.7919935394554684,
      "grad_norm": 0.2838331162929535,
      "learning_rate": 2.522292417144617e-05,
      "loss": 1.0005,
      "step": 13730
    },
    {
      "epoch": 0.7922819566220581,
      "grad_norm": 0.2979831397533417,
      "learning_rate": 2.5156115445101193e-05,
      "loss": 1.1029,
      "step": 13735
    },
    {
      "epoch": 0.7925703737886479,
      "grad_norm": 0.25850915908813477,
      "learning_rate": 2.5089382582712994e-05,
      "loss": 0.916,
      "step": 13740
    },
    {
      "epoch": 0.7928587909552377,
      "grad_norm": 0.27513688802719116,
      "learning_rate": 2.502272565192405e-05,
      "loss": 0.8642,
      "step": 13745
    },
    {
      "epoch": 0.7931472081218274,
      "grad_norm": 0.28276485204696655,
      "learning_rate": 2.4956144720299712e-05,
      "loss": 0.8874,
      "step": 13750
    },
    {
      "epoch": 0.7934356252884172,
      "grad_norm": 0.3068575859069824,
      "learning_rate": 2.4889639855328473e-05,
      "loss": 0.9945,
      "step": 13755
    },
    {
      "epoch": 0.7937240424550069,
      "grad_norm": 0.28170597553253174,
      "learning_rate": 2.482321112442151e-05,
      "loss": 0.9553,
      "step": 13760
    },
    {
      "epoch": 0.7940124596215967,
      "grad_norm": 0.34394562244415283,
      "learning_rate": 2.4756858594912945e-05,
      "loss": 0.9402,
      "step": 13765
    },
    {
      "epoch": 0.7943008767881864,
      "grad_norm": 0.2997315227985382,
      "learning_rate": 2.4690582334059685e-05,
      "loss": 0.8787,
      "step": 13770
    },
    {
      "epoch": 0.7945892939547762,
      "grad_norm": 0.27617332339286804,
      "learning_rate": 2.4624382409041292e-05,
      "loss": 0.95,
      "step": 13775
    },
    {
      "epoch": 0.794877711121366,
      "grad_norm": 0.3137553334236145,
      "learning_rate": 2.455825888695994e-05,
      "loss": 0.9173,
      "step": 13780
    },
    {
      "epoch": 0.7951661282879557,
      "grad_norm": 0.3070252537727356,
      "learning_rate": 2.449221183484036e-05,
      "loss": 1.0014,
      "step": 13785
    },
    {
      "epoch": 0.7954545454545454,
      "grad_norm": 0.2886539697647095,
      "learning_rate": 2.4426241319629772e-05,
      "loss": 1.0092,
      "step": 13790
    },
    {
      "epoch": 0.7957429626211352,
      "grad_norm": 0.2871996760368347,
      "learning_rate": 2.436034740819786e-05,
      "loss": 0.934,
      "step": 13795
    },
    {
      "epoch": 0.796031379787725,
      "grad_norm": 0.3281834125518799,
      "learning_rate": 2.4294530167336615e-05,
      "loss": 0.91,
      "step": 13800
    },
    {
      "epoch": 0.7963197969543148,
      "grad_norm": 0.2982265055179596,
      "learning_rate": 2.422878966376032e-05,
      "loss": 0.958,
      "step": 13805
    },
    {
      "epoch": 0.7966082141209044,
      "grad_norm": 0.30917835235595703,
      "learning_rate": 2.4163125964105448e-05,
      "loss": 0.9478,
      "step": 13810
    },
    {
      "epoch": 0.7968966312874942,
      "grad_norm": 0.2909094989299774,
      "learning_rate": 2.4097539134930703e-05,
      "loss": 1.0116,
      "step": 13815
    },
    {
      "epoch": 0.797185048454084,
      "grad_norm": 0.2671252191066742,
      "learning_rate": 2.4032029242716826e-05,
      "loss": 0.913,
      "step": 13820
    },
    {
      "epoch": 0.7974734656206738,
      "grad_norm": 0.2950332760810852,
      "learning_rate": 2.3966596353866466e-05,
      "loss": 0.9618,
      "step": 13825
    },
    {
      "epoch": 0.7977618827872635,
      "grad_norm": 0.3227890729904175,
      "learning_rate": 2.390124053470443e-05,
      "loss": 0.9547,
      "step": 13830
    },
    {
      "epoch": 0.7980502999538532,
      "grad_norm": 0.28030773997306824,
      "learning_rate": 2.383596185147724e-05,
      "loss": 0.96,
      "step": 13835
    },
    {
      "epoch": 0.798338717120443,
      "grad_norm": 0.29495128989219666,
      "learning_rate": 2.3770760370353294e-05,
      "loss": 0.9523,
      "step": 13840
    },
    {
      "epoch": 0.7986271342870328,
      "grad_norm": 0.33828744292259216,
      "learning_rate": 2.3705636157422707e-05,
      "loss": 0.9811,
      "step": 13845
    },
    {
      "epoch": 0.7989155514536225,
      "grad_norm": 0.306768536567688,
      "learning_rate": 2.364058927869732e-05,
      "loss": 0.9979,
      "step": 13850
    },
    {
      "epoch": 0.7992039686202123,
      "grad_norm": 0.3236815333366394,
      "learning_rate": 2.357561980011057e-05,
      "loss": 0.9182,
      "step": 13855
    },
    {
      "epoch": 0.799492385786802,
      "grad_norm": 0.3143635392189026,
      "learning_rate": 2.3510727787517382e-05,
      "loss": 0.9045,
      "step": 13860
    },
    {
      "epoch": 0.7997808029533918,
      "grad_norm": 0.30166271328926086,
      "learning_rate": 2.3445913306694246e-05,
      "loss": 0.8859,
      "step": 13865
    },
    {
      "epoch": 0.8000692201199815,
      "grad_norm": 0.3122861087322235,
      "learning_rate": 2.3381176423338956e-05,
      "loss": 0.9755,
      "step": 13870
    },
    {
      "epoch": 0.8003576372865713,
      "grad_norm": 0.27881136536598206,
      "learning_rate": 2.33165172030708e-05,
      "loss": 0.941,
      "step": 13875
    },
    {
      "epoch": 0.8006460544531611,
      "grad_norm": 0.3169102072715759,
      "learning_rate": 2.325193571143024e-05,
      "loss": 0.881,
      "step": 13880
    },
    {
      "epoch": 0.8009344716197508,
      "grad_norm": 0.2874906659126282,
      "learning_rate": 2.3187432013878908e-05,
      "loss": 0.9404,
      "step": 13885
    },
    {
      "epoch": 0.8012228887863405,
      "grad_norm": 0.349324494600296,
      "learning_rate": 2.31230061757997e-05,
      "loss": 0.9942,
      "step": 13890
    },
    {
      "epoch": 0.8015113059529303,
      "grad_norm": 0.28128063678741455,
      "learning_rate": 2.30586582624965e-05,
      "loss": 0.9676,
      "step": 13895
    },
    {
      "epoch": 0.8017997231195201,
      "grad_norm": 0.27755919098854065,
      "learning_rate": 2.299438833919432e-05,
      "loss": 0.9676,
      "step": 13900
    },
    {
      "epoch": 0.8020881402861099,
      "grad_norm": 0.2768385410308838,
      "learning_rate": 2.2930196471038924e-05,
      "loss": 0.9379,
      "step": 13905
    },
    {
      "epoch": 0.8023765574526995,
      "grad_norm": 0.30479341745376587,
      "learning_rate": 2.286608272309716e-05,
      "loss": 0.9811,
      "step": 13910
    },
    {
      "epoch": 0.8026649746192893,
      "grad_norm": 0.2795366942882538,
      "learning_rate": 2.2802047160356576e-05,
      "loss": 1.0481,
      "step": 13915
    },
    {
      "epoch": 0.8029533917858791,
      "grad_norm": 0.32293620705604553,
      "learning_rate": 2.2738089847725497e-05,
      "loss": 0.9632,
      "step": 13920
    },
    {
      "epoch": 0.8032418089524689,
      "grad_norm": 0.34504541754722595,
      "learning_rate": 2.267421085003293e-05,
      "loss": 0.9606,
      "step": 13925
    },
    {
      "epoch": 0.8035302261190586,
      "grad_norm": 0.3101238012313843,
      "learning_rate": 2.2610410232028467e-05,
      "loss": 1.0869,
      "step": 13930
    },
    {
      "epoch": 0.8038186432856483,
      "grad_norm": 0.29028749465942383,
      "learning_rate": 2.254668805838236e-05,
      "loss": 0.9505,
      "step": 13935
    },
    {
      "epoch": 0.8041070604522381,
      "grad_norm": 0.29808133840560913,
      "learning_rate": 2.2483044393685215e-05,
      "loss": 0.9556,
      "step": 13940
    },
    {
      "epoch": 0.8043954776188279,
      "grad_norm": 0.27106401324272156,
      "learning_rate": 2.2419479302448144e-05,
      "loss": 0.898,
      "step": 13945
    },
    {
      "epoch": 0.8046838947854176,
      "grad_norm": 0.2756030559539795,
      "learning_rate": 2.235599284910258e-05,
      "loss": 0.9095,
      "step": 13950
    },
    {
      "epoch": 0.8049723119520074,
      "grad_norm": 0.2623896896839142,
      "learning_rate": 2.229258509800023e-05,
      "loss": 0.9346,
      "step": 13955
    },
    {
      "epoch": 0.8052607291185971,
      "grad_norm": 0.30703356862068176,
      "learning_rate": 2.2229256113413087e-05,
      "loss": 0.9422,
      "step": 13960
    },
    {
      "epoch": 0.8055491462851869,
      "grad_norm": 0.2994453012943268,
      "learning_rate": 2.2166005959533266e-05,
      "loss": 1.0257,
      "step": 13965
    },
    {
      "epoch": 0.8058375634517766,
      "grad_norm": 0.28820228576660156,
      "learning_rate": 2.210283470047296e-05,
      "loss": 0.9725,
      "step": 13970
    },
    {
      "epoch": 0.8061259806183664,
      "grad_norm": 0.2879176735877991,
      "learning_rate": 2.2039742400264406e-05,
      "loss": 0.9524,
      "step": 13975
    },
    {
      "epoch": 0.8064143977849562,
      "grad_norm": 0.28816553950309753,
      "learning_rate": 2.1976729122859864e-05,
      "loss": 0.9219,
      "step": 13980
    },
    {
      "epoch": 0.8067028149515459,
      "grad_norm": 0.30006927251815796,
      "learning_rate": 2.191379493213137e-05,
      "loss": 0.9587,
      "step": 13985
    },
    {
      "epoch": 0.8069912321181357,
      "grad_norm": 0.3044586777687073,
      "learning_rate": 2.185093989187087e-05,
      "loss": 0.978,
      "step": 13990
    },
    {
      "epoch": 0.8072796492847254,
      "grad_norm": 0.2731790840625763,
      "learning_rate": 2.1788164065790127e-05,
      "loss": 0.9413,
      "step": 13995
    },
    {
      "epoch": 0.8075680664513152,
      "grad_norm": 0.2864460349082947,
      "learning_rate": 2.1725467517520526e-05,
      "loss": 0.9536,
      "step": 14000
    },
    {
      "epoch": 0.807856483617905,
      "grad_norm": 0.2875843346118927,
      "learning_rate": 2.1662850310613147e-05,
      "loss": 0.8936,
      "step": 14005
    },
    {
      "epoch": 0.8081449007844947,
      "grad_norm": 0.286458283662796,
      "learning_rate": 2.1600312508538602e-05,
      "loss": 0.9558,
      "step": 14010
    },
    {
      "epoch": 0.8084333179510844,
      "grad_norm": 0.28374427556991577,
      "learning_rate": 2.1537854174687034e-05,
      "loss": 0.8465,
      "step": 14015
    },
    {
      "epoch": 0.8087217351176742,
      "grad_norm": 0.2845346927642822,
      "learning_rate": 2.1475475372368094e-05,
      "loss": 1.0074,
      "step": 14020
    },
    {
      "epoch": 0.809010152284264,
      "grad_norm": 0.3177453279495239,
      "learning_rate": 2.1413176164810732e-05,
      "loss": 0.9771,
      "step": 14025
    },
    {
      "epoch": 0.8092985694508538,
      "grad_norm": 0.2712036073207855,
      "learning_rate": 2.1350956615163254e-05,
      "loss": 0.9188,
      "step": 14030
    },
    {
      "epoch": 0.8095869866174434,
      "grad_norm": 0.274812787771225,
      "learning_rate": 2.1288816786493194e-05,
      "loss": 0.9834,
      "step": 14035
    },
    {
      "epoch": 0.8098754037840332,
      "grad_norm": 0.28117087483406067,
      "learning_rate": 2.1226756741787356e-05,
      "loss": 0.9579,
      "step": 14040
    },
    {
      "epoch": 0.810163820950623,
      "grad_norm": 0.2860463559627533,
      "learning_rate": 2.1164776543951635e-05,
      "loss": 0.8912,
      "step": 14045
    },
    {
      "epoch": 0.8104522381172128,
      "grad_norm": 0.29822874069213867,
      "learning_rate": 2.1102876255810887e-05,
      "loss": 0.9204,
      "step": 14050
    },
    {
      "epoch": 0.8107406552838025,
      "grad_norm": 0.2793227732181549,
      "learning_rate": 2.1041055940109133e-05,
      "loss": 0.9382,
      "step": 14055
    },
    {
      "epoch": 0.8110290724503922,
      "grad_norm": 0.2899545729160309,
      "learning_rate": 2.0979315659509223e-05,
      "loss": 0.9919,
      "step": 14060
    },
    {
      "epoch": 0.811317489616982,
      "grad_norm": 0.2842821180820465,
      "learning_rate": 2.091765547659298e-05,
      "loss": 0.8838,
      "step": 14065
    },
    {
      "epoch": 0.8116059067835718,
      "grad_norm": 0.282894492149353,
      "learning_rate": 2.085607545386088e-05,
      "loss": 0.9497,
      "step": 14070
    },
    {
      "epoch": 0.8118943239501615,
      "grad_norm": 0.31623557209968567,
      "learning_rate": 2.0794575653732308e-05,
      "loss": 0.9599,
      "step": 14075
    },
    {
      "epoch": 0.8121827411167513,
      "grad_norm": 0.2882692813873291,
      "learning_rate": 2.0733156138545252e-05,
      "loss": 0.9176,
      "step": 14080
    },
    {
      "epoch": 0.812471158283341,
      "grad_norm": 0.312507688999176,
      "learning_rate": 2.0671816970556312e-05,
      "loss": 1.0327,
      "step": 14085
    },
    {
      "epoch": 0.8127595754499308,
      "grad_norm": 0.27789467573165894,
      "learning_rate": 2.0610558211940702e-05,
      "loss": 0.9604,
      "step": 14090
    },
    {
      "epoch": 0.8130479926165205,
      "grad_norm": 0.2766311466693878,
      "learning_rate": 2.0549379924792045e-05,
      "loss": 1.0102,
      "step": 14095
    },
    {
      "epoch": 0.8133364097831103,
      "grad_norm": 0.2832636833190918,
      "learning_rate": 2.0488282171122498e-05,
      "loss": 0.9976,
      "step": 14100
    },
    {
      "epoch": 0.8136248269497001,
      "grad_norm": 0.314397931098938,
      "learning_rate": 2.042726501286253e-05,
      "loss": 0.944,
      "step": 14105
    },
    {
      "epoch": 0.8139132441162898,
      "grad_norm": 0.2753946781158447,
      "learning_rate": 2.036632851186091e-05,
      "loss": 0.866,
      "step": 14110
    },
    {
      "epoch": 0.8142016612828795,
      "grad_norm": 0.28425517678260803,
      "learning_rate": 2.0305472729884656e-05,
      "loss": 0.9753,
      "step": 14115
    },
    {
      "epoch": 0.8144900784494693,
      "grad_norm": 0.265170693397522,
      "learning_rate": 2.0244697728618966e-05,
      "loss": 0.9903,
      "step": 14120
    },
    {
      "epoch": 0.8147784956160591,
      "grad_norm": 0.3373314440250397,
      "learning_rate": 2.0184003569667198e-05,
      "loss": 0.929,
      "step": 14125
    },
    {
      "epoch": 0.8150669127826489,
      "grad_norm": 0.2899061143398285,
      "learning_rate": 2.0123390314550717e-05,
      "loss": 0.9916,
      "step": 14130
    },
    {
      "epoch": 0.8153553299492385,
      "grad_norm": 0.2940767705440521,
      "learning_rate": 2.0062858024708895e-05,
      "loss": 0.899,
      "step": 14135
    },
    {
      "epoch": 0.8156437471158283,
      "grad_norm": 0.29175153374671936,
      "learning_rate": 2.000240676149904e-05,
      "loss": 0.9465,
      "step": 14140
    },
    {
      "epoch": 0.8159321642824181,
      "grad_norm": 0.2850426733493805,
      "learning_rate": 1.9942036586196312e-05,
      "loss": 0.978,
      "step": 14145
    },
    {
      "epoch": 0.8162205814490079,
      "grad_norm": 0.2718311548233032,
      "learning_rate": 1.9881747559993703e-05,
      "loss": 0.887,
      "step": 14150
    },
    {
      "epoch": 0.8165089986155976,
      "grad_norm": 0.33359721302986145,
      "learning_rate": 1.9821539744001906e-05,
      "loss": 0.9738,
      "step": 14155
    },
    {
      "epoch": 0.8167974157821873,
      "grad_norm": 0.2917875349521637,
      "learning_rate": 1.976141319924939e-05,
      "loss": 0.9638,
      "step": 14160
    },
    {
      "epoch": 0.8170858329487771,
      "grad_norm": 0.2972804009914398,
      "learning_rate": 1.9701367986682152e-05,
      "loss": 0.9336,
      "step": 14165
    },
    {
      "epoch": 0.8173742501153669,
      "grad_norm": 0.30192670226097107,
      "learning_rate": 1.964140416716379e-05,
      "loss": 1.0037,
      "step": 14170
    },
    {
      "epoch": 0.8176626672819566,
      "grad_norm": 0.29572001099586487,
      "learning_rate": 1.9581521801475368e-05,
      "loss": 0.9069,
      "step": 14175
    },
    {
      "epoch": 0.8179510844485464,
      "grad_norm": 0.3252546787261963,
      "learning_rate": 1.9521720950315403e-05,
      "loss": 1.0215,
      "step": 14180
    },
    {
      "epoch": 0.8182395016151361,
      "grad_norm": 0.28887739777565,
      "learning_rate": 1.9462001674299846e-05,
      "loss": 0.9823,
      "step": 14185
    },
    {
      "epoch": 0.8185279187817259,
      "grad_norm": 0.30198100209236145,
      "learning_rate": 1.940236403396186e-05,
      "loss": 0.9403,
      "step": 14190
    },
    {
      "epoch": 0.8188163359483156,
      "grad_norm": 0.2884944975376129,
      "learning_rate": 1.934280808975193e-05,
      "loss": 0.9939,
      "step": 14195
    },
    {
      "epoch": 0.8191047531149054,
      "grad_norm": 0.2968595027923584,
      "learning_rate": 1.9283333902037694e-05,
      "loss": 0.9257,
      "step": 14200
    },
    {
      "epoch": 0.8193931702814952,
      "grad_norm": 0.2883051931858063,
      "learning_rate": 1.9223941531103918e-05,
      "loss": 0.9695,
      "step": 14205
    },
    {
      "epoch": 0.819681587448085,
      "grad_norm": 0.29692569375038147,
      "learning_rate": 1.9164631037152513e-05,
      "loss": 1.005,
      "step": 14210
    },
    {
      "epoch": 0.8199700046146746,
      "grad_norm": 0.26805904507637024,
      "learning_rate": 1.9105402480302237e-05,
      "loss": 0.9818,
      "step": 14215
    },
    {
      "epoch": 0.8202584217812644,
      "grad_norm": 0.27732157707214355,
      "learning_rate": 1.9046255920588985e-05,
      "loss": 0.9986,
      "step": 14220
    },
    {
      "epoch": 0.8205468389478542,
      "grad_norm": 0.314737468957901,
      "learning_rate": 1.8987191417965378e-05,
      "loss": 0.951,
      "step": 14225
    },
    {
      "epoch": 0.820835256114444,
      "grad_norm": 0.3076435625553131,
      "learning_rate": 1.8928209032301013e-05,
      "loss": 1.0224,
      "step": 14230
    },
    {
      "epoch": 0.8211236732810336,
      "grad_norm": 0.29111447930336,
      "learning_rate": 1.886930882338208e-05,
      "loss": 0.9591,
      "step": 14235
    },
    {
      "epoch": 0.8214120904476234,
      "grad_norm": 0.3361826241016388,
      "learning_rate": 1.8810490850911577e-05,
      "loss": 1.0308,
      "step": 14240
    },
    {
      "epoch": 0.8217005076142132,
      "grad_norm": 0.2926064431667328,
      "learning_rate": 1.8751755174509156e-05,
      "loss": 0.8994,
      "step": 14245
    },
    {
      "epoch": 0.821988924780803,
      "grad_norm": 0.33232754468917847,
      "learning_rate": 1.8693101853711004e-05,
      "loss": 0.9446,
      "step": 14250
    },
    {
      "epoch": 0.8222773419473927,
      "grad_norm": 0.3139682710170746,
      "learning_rate": 1.8634530947969853e-05,
      "loss": 1.0135,
      "step": 14255
    },
    {
      "epoch": 0.8225657591139824,
      "grad_norm": 0.2984192669391632,
      "learning_rate": 1.857604251665487e-05,
      "loss": 0.9311,
      "step": 14260
    },
    {
      "epoch": 0.8228541762805722,
      "grad_norm": 0.2784828841686249,
      "learning_rate": 1.851763661905167e-05,
      "loss": 0.9251,
      "step": 14265
    },
    {
      "epoch": 0.823142593447162,
      "grad_norm": 0.2821832597255707,
      "learning_rate": 1.845931331436219e-05,
      "loss": 0.9135,
      "step": 14270
    },
    {
      "epoch": 0.8234310106137517,
      "grad_norm": 0.2858486771583557,
      "learning_rate": 1.840107266170462e-05,
      "loss": 0.9792,
      "step": 14275
    },
    {
      "epoch": 0.8237194277803415,
      "grad_norm": 0.3098810613155365,
      "learning_rate": 1.8342914720113404e-05,
      "loss": 0.9468,
      "step": 14280
    },
    {
      "epoch": 0.8240078449469312,
      "grad_norm": 0.28920066356658936,
      "learning_rate": 1.828483954853911e-05,
      "loss": 0.9434,
      "step": 14285
    },
    {
      "epoch": 0.824296262113521,
      "grad_norm": 0.36170583963394165,
      "learning_rate": 1.822684720584852e-05,
      "loss": 0.9733,
      "step": 14290
    },
    {
      "epoch": 0.8245846792801107,
      "grad_norm": 0.2947959303855896,
      "learning_rate": 1.8168937750824278e-05,
      "loss": 1.0517,
      "step": 14295
    },
    {
      "epoch": 0.8248730964467005,
      "grad_norm": 0.264804482460022,
      "learning_rate": 1.8111111242165124e-05,
      "loss": 0.9247,
      "step": 14300
    },
    {
      "epoch": 0.8251615136132903,
      "grad_norm": 0.3069083094596863,
      "learning_rate": 1.8053367738485748e-05,
      "loss": 0.941,
      "step": 14305
    },
    {
      "epoch": 0.82544993077988,
      "grad_norm": 0.29265064001083374,
      "learning_rate": 1.7995707298316632e-05,
      "loss": 0.9741,
      "step": 14310
    },
    {
      "epoch": 0.8257383479464697,
      "grad_norm": 0.2899533212184906,
      "learning_rate": 1.7938129980104103e-05,
      "loss": 0.9867,
      "step": 14315
    },
    {
      "epoch": 0.8260267651130595,
      "grad_norm": 0.312055379152298,
      "learning_rate": 1.788063584221017e-05,
      "loss": 1.0349,
      "step": 14320
    },
    {
      "epoch": 0.8263151822796493,
      "grad_norm": 0.2941701412200928,
      "learning_rate": 1.7823224942912643e-05,
      "loss": 1.0064,
      "step": 14325
    },
    {
      "epoch": 0.8266035994462391,
      "grad_norm": 0.2945972979068756,
      "learning_rate": 1.776589734040487e-05,
      "loss": 0.9446,
      "step": 14330
    },
    {
      "epoch": 0.8268920166128289,
      "grad_norm": 0.35159391164779663,
      "learning_rate": 1.770865309279578e-05,
      "loss": 0.9587,
      "step": 14335
    },
    {
      "epoch": 0.8271804337794185,
      "grad_norm": 0.3027874529361725,
      "learning_rate": 1.7651492258109835e-05,
      "loss": 1.0183,
      "step": 14340
    },
    {
      "epoch": 0.8274688509460083,
      "grad_norm": 0.2887841761112213,
      "learning_rate": 1.7594414894286893e-05,
      "loss": 0.9893,
      "step": 14345
    },
    {
      "epoch": 0.8277572681125981,
      "grad_norm": 0.2893112003803253,
      "learning_rate": 1.7537421059182314e-05,
      "loss": 0.928,
      "step": 14350
    },
    {
      "epoch": 0.8280456852791879,
      "grad_norm": 0.28454893827438354,
      "learning_rate": 1.7480510810566685e-05,
      "loss": 0.959,
      "step": 14355
    },
    {
      "epoch": 0.8283341024457775,
      "grad_norm": 0.29442286491394043,
      "learning_rate": 1.74236842061259e-05,
      "loss": 0.9051,
      "step": 14360
    },
    {
      "epoch": 0.8286225196123673,
      "grad_norm": 0.31676793098449707,
      "learning_rate": 1.7366941303461083e-05,
      "loss": 1.0464,
      "step": 14365
    },
    {
      "epoch": 0.8289109367789571,
      "grad_norm": 0.27417492866516113,
      "learning_rate": 1.7310282160088465e-05,
      "loss": 0.9545,
      "step": 14370
    },
    {
      "epoch": 0.8291993539455469,
      "grad_norm": 0.3116649091243744,
      "learning_rate": 1.72537068334395e-05,
      "loss": 1.0463,
      "step": 14375
    },
    {
      "epoch": 0.8294877711121366,
      "grad_norm": 0.28560543060302734,
      "learning_rate": 1.7197215380860497e-05,
      "loss": 1.0247,
      "step": 14380
    },
    {
      "epoch": 0.8297761882787263,
      "grad_norm": 0.2737559378147125,
      "learning_rate": 1.7140807859612928e-05,
      "loss": 0.91,
      "step": 14385
    },
    {
      "epoch": 0.8300646054453161,
      "grad_norm": 0.2955358326435089,
      "learning_rate": 1.7084484326873062e-05,
      "loss": 0.8977,
      "step": 14390
    },
    {
      "epoch": 0.8303530226119059,
      "grad_norm": 0.2852216362953186,
      "learning_rate": 1.7028244839732144e-05,
      "loss": 0.9707,
      "step": 14395
    },
    {
      "epoch": 0.8306414397784956,
      "grad_norm": 0.3453192412853241,
      "learning_rate": 1.6972089455196115e-05,
      "loss": 0.9683,
      "step": 14400
    },
    {
      "epoch": 0.8309298569450854,
      "grad_norm": 0.29188206791877747,
      "learning_rate": 1.6916018230185704e-05,
      "loss": 0.9029,
      "step": 14405
    },
    {
      "epoch": 0.8312182741116751,
      "grad_norm": 0.2981170117855072,
      "learning_rate": 1.6860031221536398e-05,
      "loss": 0.9743,
      "step": 14410
    },
    {
      "epoch": 0.8315066912782649,
      "grad_norm": 0.28368160128593445,
      "learning_rate": 1.680412848599826e-05,
      "loss": 0.9265,
      "step": 14415
    },
    {
      "epoch": 0.8317951084448546,
      "grad_norm": 0.27159884572029114,
      "learning_rate": 1.674831008023594e-05,
      "loss": 0.9942,
      "step": 14420
    },
    {
      "epoch": 0.8320835256114444,
      "grad_norm": 0.27852651476860046,
      "learning_rate": 1.66925760608286e-05,
      "loss": 0.9342,
      "step": 14425
    },
    {
      "epoch": 0.8323719427780342,
      "grad_norm": 0.3071039319038391,
      "learning_rate": 1.6636926484269855e-05,
      "loss": 0.9772,
      "step": 14430
    },
    {
      "epoch": 0.832660359944624,
      "grad_norm": 0.35024166107177734,
      "learning_rate": 1.658136140696781e-05,
      "loss": 0.8992,
      "step": 14435
    },
    {
      "epoch": 0.8329487771112136,
      "grad_norm": 0.3019395172595978,
      "learning_rate": 1.6525880885244815e-05,
      "loss": 0.9288,
      "step": 14440
    },
    {
      "epoch": 0.8332371942778034,
      "grad_norm": 0.32100218534469604,
      "learning_rate": 1.6470484975337565e-05,
      "loss": 0.9542,
      "step": 14445
    },
    {
      "epoch": 0.8335256114443932,
      "grad_norm": 0.30146655440330505,
      "learning_rate": 1.641517373339696e-05,
      "loss": 0.9153,
      "step": 14450
    },
    {
      "epoch": 0.833814028610983,
      "grad_norm": 0.2922408878803253,
      "learning_rate": 1.6359947215488157e-05,
      "loss": 0.8254,
      "step": 14455
    },
    {
      "epoch": 0.8341024457775726,
      "grad_norm": 0.3297399580478668,
      "learning_rate": 1.6304805477590312e-05,
      "loss": 0.9451,
      "step": 14460
    },
    {
      "epoch": 0.8343908629441624,
      "grad_norm": 0.26936909556388855,
      "learning_rate": 1.6249748575596702e-05,
      "loss": 0.9624,
      "step": 14465
    },
    {
      "epoch": 0.8346792801107522,
      "grad_norm": 0.2955014407634735,
      "learning_rate": 1.6194776565314672e-05,
      "loss": 0.9156,
      "step": 14470
    },
    {
      "epoch": 0.834967697277342,
      "grad_norm": 0.27296435832977295,
      "learning_rate": 1.6139889502465434e-05,
      "loss": 1.0005,
      "step": 14475
    },
    {
      "epoch": 0.8352561144439317,
      "grad_norm": 0.2949640452861786,
      "learning_rate": 1.6085087442684122e-05,
      "loss": 0.9119,
      "step": 14480
    },
    {
      "epoch": 0.8355445316105214,
      "grad_norm": 0.28256770968437195,
      "learning_rate": 1.6030370441519704e-05,
      "loss": 0.991,
      "step": 14485
    },
    {
      "epoch": 0.8358329487771112,
      "grad_norm": 0.26805078983306885,
      "learning_rate": 1.597573855443497e-05,
      "loss": 0.8828,
      "step": 14490
    },
    {
      "epoch": 0.836121365943701,
      "grad_norm": 0.31226465106010437,
      "learning_rate": 1.592119183680638e-05,
      "loss": 0.9714,
      "step": 14495
    },
    {
      "epoch": 0.8364097831102907,
      "grad_norm": 0.31233757734298706,
      "learning_rate": 1.5866730343924085e-05,
      "loss": 0.96,
      "step": 14500
    },
    {
      "epoch": 0.8366982002768805,
      "grad_norm": 0.28462743759155273,
      "learning_rate": 1.581235413099187e-05,
      "loss": 0.9373,
      "step": 14505
    },
    {
      "epoch": 0.8369866174434702,
      "grad_norm": 0.2800358533859253,
      "learning_rate": 1.575806325312702e-05,
      "loss": 0.9018,
      "step": 14510
    },
    {
      "epoch": 0.83727503461006,
      "grad_norm": 0.2955632209777832,
      "learning_rate": 1.5703857765360407e-05,
      "loss": 0.876,
      "step": 14515
    },
    {
      "epoch": 0.8375634517766497,
      "grad_norm": 0.3308626115322113,
      "learning_rate": 1.5649737722636315e-05,
      "loss": 0.9169,
      "step": 14520
    },
    {
      "epoch": 0.8378518689432395,
      "grad_norm": 0.2876846492290497,
      "learning_rate": 1.5595703179812327e-05,
      "loss": 0.9658,
      "step": 14525
    },
    {
      "epoch": 0.8381402861098293,
      "grad_norm": 0.3003042936325073,
      "learning_rate": 1.554175419165951e-05,
      "loss": 0.9288,
      "step": 14530
    },
    {
      "epoch": 0.838428703276419,
      "grad_norm": 0.28002721071243286,
      "learning_rate": 1.5487890812862094e-05,
      "loss": 0.9352,
      "step": 14535
    },
    {
      "epoch": 0.8387171204430087,
      "grad_norm": 0.30450934171676636,
      "learning_rate": 1.5434113098017667e-05,
      "loss": 0.9025,
      "step": 14540
    },
    {
      "epoch": 0.8390055376095985,
      "grad_norm": 0.2632887065410614,
      "learning_rate": 1.5380421101636778e-05,
      "loss": 0.8946,
      "step": 14545
    },
    {
      "epoch": 0.8392939547761883,
      "grad_norm": 0.2949424088001251,
      "learning_rate": 1.5326814878143304e-05,
      "loss": 0.9088,
      "step": 14550
    },
    {
      "epoch": 0.8395823719427781,
      "grad_norm": 0.31812626123428345,
      "learning_rate": 1.5273294481874044e-05,
      "loss": 0.9816,
      "step": 14555
    },
    {
      "epoch": 0.8398707891093677,
      "grad_norm": 0.28734949231147766,
      "learning_rate": 1.5219859967078854e-05,
      "loss": 0.9553,
      "step": 14560
    },
    {
      "epoch": 0.8401592062759575,
      "grad_norm": 0.2892124652862549,
      "learning_rate": 1.5166511387920512e-05,
      "loss": 0.9617,
      "step": 14565
    },
    {
      "epoch": 0.8404476234425473,
      "grad_norm": 0.335213303565979,
      "learning_rate": 1.5113248798474689e-05,
      "loss": 0.9573,
      "step": 14570
    },
    {
      "epoch": 0.8407360406091371,
      "grad_norm": 0.29609566926956177,
      "learning_rate": 1.5060072252729963e-05,
      "loss": 0.8896,
      "step": 14575
    },
    {
      "epoch": 0.8410244577757268,
      "grad_norm": 0.30370384454727173,
      "learning_rate": 1.5006981804587595e-05,
      "loss": 0.9445,
      "step": 14580
    },
    {
      "epoch": 0.8413128749423165,
      "grad_norm": 0.32674163579940796,
      "learning_rate": 1.495397750786165e-05,
      "loss": 0.9271,
      "step": 14585
    },
    {
      "epoch": 0.8416012921089063,
      "grad_norm": 0.306863397359848,
      "learning_rate": 1.4901059416278806e-05,
      "loss": 0.9588,
      "step": 14590
    },
    {
      "epoch": 0.8418897092754961,
      "grad_norm": 0.29718735814094543,
      "learning_rate": 1.4848227583478392e-05,
      "loss": 0.9887,
      "step": 14595
    },
    {
      "epoch": 0.8421781264420858,
      "grad_norm": 0.2633509635925293,
      "learning_rate": 1.4795482063012367e-05,
      "loss": 0.9955,
      "step": 14600
    },
    {
      "epoch": 0.8424665436086756,
      "grad_norm": 0.297232061624527,
      "learning_rate": 1.4742822908345045e-05,
      "loss": 0.996,
      "step": 14605
    },
    {
      "epoch": 0.8427549607752653,
      "grad_norm": 0.27509748935699463,
      "learning_rate": 1.4690250172853348e-05,
      "loss": 0.9716,
      "step": 14610
    },
    {
      "epoch": 0.8430433779418551,
      "grad_norm": 0.28519466519355774,
      "learning_rate": 1.463776390982654e-05,
      "loss": 0.9296,
      "step": 14615
    },
    {
      "epoch": 0.8433317951084448,
      "grad_norm": 0.3120754063129425,
      "learning_rate": 1.4585364172466231e-05,
      "loss": 0.9693,
      "step": 14620
    },
    {
      "epoch": 0.8436202122750346,
      "grad_norm": 0.29427772760391235,
      "learning_rate": 1.4533051013886323e-05,
      "loss": 0.8632,
      "step": 14625
    },
    {
      "epoch": 0.8439086294416244,
      "grad_norm": 0.32537657022476196,
      "learning_rate": 1.4480824487112943e-05,
      "loss": 1.0125,
      "step": 14630
    },
    {
      "epoch": 0.8441970466082142,
      "grad_norm": 0.2881564199924469,
      "learning_rate": 1.44286846450845e-05,
      "loss": 0.9031,
      "step": 14635
    },
    {
      "epoch": 0.8444854637748038,
      "grad_norm": 0.2654680013656616,
      "learning_rate": 1.437663154065142e-05,
      "loss": 0.894,
      "step": 14640
    },
    {
      "epoch": 0.8447738809413936,
      "grad_norm": 0.27556008100509644,
      "learning_rate": 1.4324665226576261e-05,
      "loss": 0.9709,
      "step": 14645
    },
    {
      "epoch": 0.8450622981079834,
      "grad_norm": 0.27537065744400024,
      "learning_rate": 1.4272785755533601e-05,
      "loss": 0.9357,
      "step": 14650
    },
    {
      "epoch": 0.8453507152745732,
      "grad_norm": 0.28559306263923645,
      "learning_rate": 1.4220993180109987e-05,
      "loss": 0.9329,
      "step": 14655
    },
    {
      "epoch": 0.8456391324411628,
      "grad_norm": 0.281829297542572,
      "learning_rate": 1.4169287552803923e-05,
      "loss": 0.9157,
      "step": 14660
    },
    {
      "epoch": 0.8459275496077526,
      "grad_norm": 0.2907716929912567,
      "learning_rate": 1.411766892602574e-05,
      "loss": 0.9752,
      "step": 14665
    },
    {
      "epoch": 0.8462159667743424,
      "grad_norm": 0.2846973240375519,
      "learning_rate": 1.4066137352097575e-05,
      "loss": 0.9054,
      "step": 14670
    },
    {
      "epoch": 0.8465043839409322,
      "grad_norm": 0.2842434048652649,
      "learning_rate": 1.4014692883253333e-05,
      "loss": 0.9578,
      "step": 14675
    },
    {
      "epoch": 0.846792801107522,
      "grad_norm": 0.26932570338249207,
      "learning_rate": 1.396333557163868e-05,
      "loss": 0.8609,
      "step": 14680
    },
    {
      "epoch": 0.8470812182741116,
      "grad_norm": 0.27889811992645264,
      "learning_rate": 1.3912065469310886e-05,
      "loss": 0.9687,
      "step": 14685
    },
    {
      "epoch": 0.8473696354407014,
      "grad_norm": 0.30398595333099365,
      "learning_rate": 1.3860882628238781e-05,
      "loss": 0.9927,
      "step": 14690
    },
    {
      "epoch": 0.8476580526072912,
      "grad_norm": 0.2693740427494049,
      "learning_rate": 1.380978710030284e-05,
      "loss": 0.993,
      "step": 14695
    },
    {
      "epoch": 0.847946469773881,
      "grad_norm": 0.3081601560115814,
      "learning_rate": 1.3758778937294947e-05,
      "loss": 0.9079,
      "step": 14700
    },
    {
      "epoch": 0.8482348869404707,
      "grad_norm": 0.3294139802455902,
      "learning_rate": 1.3707858190918555e-05,
      "loss": 0.9586,
      "step": 14705
    },
    {
      "epoch": 0.8485233041070604,
      "grad_norm": 0.3140733242034912,
      "learning_rate": 1.365702491278833e-05,
      "loss": 0.8856,
      "step": 14710
    },
    {
      "epoch": 0.8488117212736502,
      "grad_norm": 0.29933077096939087,
      "learning_rate": 1.3606279154430435e-05,
      "loss": 0.9661,
      "step": 14715
    },
    {
      "epoch": 0.84910013844024,
      "grad_norm": 0.299584299325943,
      "learning_rate": 1.3555620967282235e-05,
      "loss": 0.9069,
      "step": 14720
    },
    {
      "epoch": 0.8493885556068297,
      "grad_norm": 0.30480751395225525,
      "learning_rate": 1.3505050402692366e-05,
      "loss": 0.8985,
      "step": 14725
    },
    {
      "epoch": 0.8496769727734195,
      "grad_norm": 0.33949342370033264,
      "learning_rate": 1.3454567511920634e-05,
      "loss": 0.962,
      "step": 14730
    },
    {
      "epoch": 0.8499653899400093,
      "grad_norm": 0.2982217073440552,
      "learning_rate": 1.3404172346137945e-05,
      "loss": 0.9534,
      "step": 14735
    },
    {
      "epoch": 0.850253807106599,
      "grad_norm": 0.26105549931526184,
      "learning_rate": 1.3353864956426366e-05,
      "loss": 0.9366,
      "step": 14740
    },
    {
      "epoch": 0.8505422242731887,
      "grad_norm": 0.30479708313941956,
      "learning_rate": 1.330364539377893e-05,
      "loss": 0.9505,
      "step": 14745
    },
    {
      "epoch": 0.8508306414397785,
      "grad_norm": 0.28560617566108704,
      "learning_rate": 1.3253513709099652e-05,
      "loss": 0.9081,
      "step": 14750
    },
    {
      "epoch": 0.8511190586063683,
      "grad_norm": 0.2818647623062134,
      "learning_rate": 1.3203469953203474e-05,
      "loss": 0.9252,
      "step": 14755
    },
    {
      "epoch": 0.8514074757729581,
      "grad_norm": 0.3095349073410034,
      "learning_rate": 1.3153514176816195e-05,
      "loss": 0.9276,
      "step": 14760
    },
    {
      "epoch": 0.8516958929395477,
      "grad_norm": 0.27864789962768555,
      "learning_rate": 1.3103646430574523e-05,
      "loss": 0.9634,
      "step": 14765
    },
    {
      "epoch": 0.8519843101061375,
      "grad_norm": 0.3282797932624817,
      "learning_rate": 1.305386676502578e-05,
      "loss": 0.9707,
      "step": 14770
    },
    {
      "epoch": 0.8522727272727273,
      "grad_norm": 0.2912672758102417,
      "learning_rate": 1.3004175230628169e-05,
      "loss": 0.9301,
      "step": 14775
    },
    {
      "epoch": 0.8525611444393171,
      "grad_norm": 0.28119948506355286,
      "learning_rate": 1.2954571877750443e-05,
      "loss": 0.9853,
      "step": 14780
    },
    {
      "epoch": 0.8528495616059067,
      "grad_norm": 0.3060617744922638,
      "learning_rate": 1.290505675667204e-05,
      "loss": 0.9772,
      "step": 14785
    },
    {
      "epoch": 0.8531379787724965,
      "grad_norm": 0.3374637961387634,
      "learning_rate": 1.2855629917582935e-05,
      "loss": 0.9944,
      "step": 14790
    },
    {
      "epoch": 0.8534263959390863,
      "grad_norm": 0.292756587266922,
      "learning_rate": 1.2806291410583593e-05,
      "loss": 0.9562,
      "step": 14795
    },
    {
      "epoch": 0.8537148131056761,
      "grad_norm": 0.292419970035553,
      "learning_rate": 1.2757041285685011e-05,
      "loss": 0.9447,
      "step": 14800
    },
    {
      "epoch": 0.8540032302722658,
      "grad_norm": 0.2827741503715515,
      "learning_rate": 1.2707879592808548e-05,
      "loss": 0.9309,
      "step": 14805
    },
    {
      "epoch": 0.8542916474388556,
      "grad_norm": 0.29757630825042725,
      "learning_rate": 1.2658806381785926e-05,
      "loss": 0.9113,
      "step": 14810
    },
    {
      "epoch": 0.8545800646054453,
      "grad_norm": 0.28717100620269775,
      "learning_rate": 1.2609821702359215e-05,
      "loss": 0.8644,
      "step": 14815
    },
    {
      "epoch": 0.8548684817720351,
      "grad_norm": 0.32317039370536804,
      "learning_rate": 1.2560925604180673e-05,
      "loss": 1.046,
      "step": 14820
    },
    {
      "epoch": 0.8551568989386248,
      "grad_norm": 0.3420310318470001,
      "learning_rate": 1.2512118136812878e-05,
      "loss": 1.0261,
      "step": 14825
    },
    {
      "epoch": 0.8554453161052146,
      "grad_norm": 0.2869220972061157,
      "learning_rate": 1.2463399349728488e-05,
      "loss": 0.9597,
      "step": 14830
    },
    {
      "epoch": 0.8557337332718044,
      "grad_norm": 0.31378278136253357,
      "learning_rate": 1.2414769292310301e-05,
      "loss": 0.9628,
      "step": 14835
    },
    {
      "epoch": 0.8560221504383941,
      "grad_norm": 0.27532947063446045,
      "learning_rate": 1.2366228013851156e-05,
      "loss": 0.9479,
      "step": 14840
    },
    {
      "epoch": 0.8563105676049838,
      "grad_norm": 0.3077858090400696,
      "learning_rate": 1.2317775563553902e-05,
      "loss": 0.9652,
      "step": 14845
    },
    {
      "epoch": 0.8565989847715736,
      "grad_norm": 0.31280088424682617,
      "learning_rate": 1.2269411990531421e-05,
      "loss": 0.9224,
      "step": 14850
    },
    {
      "epoch": 0.8568874019381634,
      "grad_norm": 0.3110673427581787,
      "learning_rate": 1.2221137343806377e-05,
      "loss": 0.9567,
      "step": 14855
    },
    {
      "epoch": 0.8571758191047532,
      "grad_norm": 0.29823023080825806,
      "learning_rate": 1.2172951672311427e-05,
      "loss": 0.9816,
      "step": 14860
    },
    {
      "epoch": 0.8574642362713428,
      "grad_norm": 0.30940476059913635,
      "learning_rate": 1.2124855024888937e-05,
      "loss": 0.9098,
      "step": 14865
    },
    {
      "epoch": 0.8577526534379326,
      "grad_norm": 0.3029901683330536,
      "learning_rate": 1.207684745029114e-05,
      "loss": 1.0217,
      "step": 14870
    },
    {
      "epoch": 0.8580410706045224,
      "grad_norm": 0.2684774398803711,
      "learning_rate": 1.2028928997179867e-05,
      "loss": 0.974,
      "step": 14875
    },
    {
      "epoch": 0.8583294877711122,
      "grad_norm": 0.31164249777793884,
      "learning_rate": 1.1981099714126654e-05,
      "loss": 0.9657,
      "step": 14880
    },
    {
      "epoch": 0.8586179049377018,
      "grad_norm": 0.2923769950866699,
      "learning_rate": 1.193335964961273e-05,
      "loss": 0.9379,
      "step": 14885
    },
    {
      "epoch": 0.8589063221042916,
      "grad_norm": 0.31549927592277527,
      "learning_rate": 1.1885708852028777e-05,
      "loss": 0.9436,
      "step": 14890
    },
    {
      "epoch": 0.8591947392708814,
      "grad_norm": 0.3174637258052826,
      "learning_rate": 1.1838147369675056e-05,
      "loss": 1.0007,
      "step": 14895
    },
    {
      "epoch": 0.8594831564374712,
      "grad_norm": 0.26647695899009705,
      "learning_rate": 1.1790675250761263e-05,
      "loss": 0.9489,
      "step": 14900
    },
    {
      "epoch": 0.8597715736040609,
      "grad_norm": 0.2883162498474121,
      "learning_rate": 1.1743292543406558e-05,
      "loss": 0.9913,
      "step": 14905
    },
    {
      "epoch": 0.8600599907706507,
      "grad_norm": 0.28514915704727173,
      "learning_rate": 1.1695999295639459e-05,
      "loss": 0.9422,
      "step": 14910
    },
    {
      "epoch": 0.8603484079372404,
      "grad_norm": 0.2868596911430359,
      "learning_rate": 1.1648795555397719e-05,
      "loss": 0.93,
      "step": 14915
    },
    {
      "epoch": 0.8606368251038302,
      "grad_norm": 0.29067856073379517,
      "learning_rate": 1.1601681370528484e-05,
      "loss": 0.9241,
      "step": 14920
    },
    {
      "epoch": 0.8609252422704199,
      "grad_norm": 0.31721532344818115,
      "learning_rate": 1.1554656788788054e-05,
      "loss": 0.9678,
      "step": 14925
    },
    {
      "epoch": 0.8612136594370097,
      "grad_norm": 0.28983375430107117,
      "learning_rate": 1.150772185784198e-05,
      "loss": 0.9476,
      "step": 14930
    },
    {
      "epoch": 0.8615020766035995,
      "grad_norm": 0.2977122664451599,
      "learning_rate": 1.1460876625264816e-05,
      "loss": 0.9909,
      "step": 14935
    },
    {
      "epoch": 0.8617904937701892,
      "grad_norm": 0.3428827226161957,
      "learning_rate": 1.1414121138540279e-05,
      "loss": 0.9896,
      "step": 14940
    },
    {
      "epoch": 0.8620789109367789,
      "grad_norm": 0.2909860908985138,
      "learning_rate": 1.1367455445061115e-05,
      "loss": 0.998,
      "step": 14945
    },
    {
      "epoch": 0.8623673281033687,
      "grad_norm": 0.31093189120292664,
      "learning_rate": 1.1320879592129052e-05,
      "loss": 0.938,
      "step": 14950
    },
    {
      "epoch": 0.8626557452699585,
      "grad_norm": 0.2798452377319336,
      "learning_rate": 1.1274393626954715e-05,
      "loss": 0.9822,
      "step": 14955
    },
    {
      "epoch": 0.8629441624365483,
      "grad_norm": 0.279249370098114,
      "learning_rate": 1.1227997596657636e-05,
      "loss": 0.8768,
      "step": 14960
    },
    {
      "epoch": 0.8632325796031379,
      "grad_norm": 0.32201087474823,
      "learning_rate": 1.1181691548266226e-05,
      "loss": 0.9705,
      "step": 14965
    },
    {
      "epoch": 0.8635209967697277,
      "grad_norm": 0.3062988817691803,
      "learning_rate": 1.1135475528717642e-05,
      "loss": 0.9492,
      "step": 14970
    },
    {
      "epoch": 0.8638094139363175,
      "grad_norm": 0.30123475193977356,
      "learning_rate": 1.108934958485779e-05,
      "loss": 0.9371,
      "step": 14975
    },
    {
      "epoch": 0.8640978311029073,
      "grad_norm": 0.3014609217643738,
      "learning_rate": 1.1043313763441277e-05,
      "loss": 0.9114,
      "step": 14980
    },
    {
      "epoch": 0.864386248269497,
      "grad_norm": 0.29384082555770874,
      "learning_rate": 1.0997368111131346e-05,
      "loss": 0.9757,
      "step": 14985
    },
    {
      "epoch": 0.8646746654360867,
      "grad_norm": 0.2640998363494873,
      "learning_rate": 1.0951512674499898e-05,
      "loss": 0.8896,
      "step": 14990
    },
    {
      "epoch": 0.8649630826026765,
      "grad_norm": 0.28632596135139465,
      "learning_rate": 1.090574750002733e-05,
      "loss": 1.004,
      "step": 14995
    },
    {
      "epoch": 0.8652514997692663,
      "grad_norm": 0.29420316219329834,
      "learning_rate": 1.0860072634102569e-05,
      "loss": 0.9623,
      "step": 15000
    },
    {
      "epoch": 0.865539916935856,
      "grad_norm": 0.2923297882080078,
      "learning_rate": 1.0814488123022992e-05,
      "loss": 0.993,
      "step": 15005
    },
    {
      "epoch": 0.8658283341024458,
      "grad_norm": 0.4265219569206238,
      "learning_rate": 1.0768994012994371e-05,
      "loss": 0.9131,
      "step": 15010
    },
    {
      "epoch": 0.8661167512690355,
      "grad_norm": 0.279988169670105,
      "learning_rate": 1.0723590350130951e-05,
      "loss": 0.9918,
      "step": 15015
    },
    {
      "epoch": 0.8664051684356253,
      "grad_norm": 0.32483842968940735,
      "learning_rate": 1.0678277180455109e-05,
      "loss": 0.9298,
      "step": 15020
    },
    {
      "epoch": 0.866693585602215,
      "grad_norm": 0.27693018317222595,
      "learning_rate": 1.0633054549897692e-05,
      "loss": 0.9287,
      "step": 15025
    },
    {
      "epoch": 0.8669820027688048,
      "grad_norm": 0.29998135566711426,
      "learning_rate": 1.0587922504297642e-05,
      "loss": 0.9638,
      "step": 15030
    },
    {
      "epoch": 0.8672704199353946,
      "grad_norm": 0.31143343448638916,
      "learning_rate": 1.0542881089402134e-05,
      "loss": 1.0159,
      "step": 15035
    },
    {
      "epoch": 0.8675588371019843,
      "grad_norm": 0.30745813250541687,
      "learning_rate": 1.049793035086647e-05,
      "loss": 0.966,
      "step": 15040
    },
    {
      "epoch": 0.8678472542685741,
      "grad_norm": 0.2762928009033203,
      "learning_rate": 1.0453070334254e-05,
      "loss": 0.8719,
      "step": 15045
    },
    {
      "epoch": 0.8681356714351638,
      "grad_norm": 0.3265017569065094,
      "learning_rate": 1.040830108503622e-05,
      "loss": 0.9099,
      "step": 15050
    },
    {
      "epoch": 0.8684240886017536,
      "grad_norm": 0.3012024760246277,
      "learning_rate": 1.0363622648592518e-05,
      "loss": 1.0014,
      "step": 15055
    },
    {
      "epoch": 0.8687125057683434,
      "grad_norm": 0.3051886558532715,
      "learning_rate": 1.031903507021027e-05,
      "loss": 0.9775,
      "step": 15060
    },
    {
      "epoch": 0.8690009229349331,
      "grad_norm": 0.2711678743362427,
      "learning_rate": 1.0274538395084754e-05,
      "loss": 0.8841,
      "step": 15065
    },
    {
      "epoch": 0.8692893401015228,
      "grad_norm": 0.28673282265663147,
      "learning_rate": 1.0230132668319082e-05,
      "loss": 0.9822,
      "step": 15070
    },
    {
      "epoch": 0.8695777572681126,
      "grad_norm": 0.28427520394325256,
      "learning_rate": 1.0185817934924257e-05,
      "loss": 0.9592,
      "step": 15075
    },
    {
      "epoch": 0.8698661744347024,
      "grad_norm": 0.27094730734825134,
      "learning_rate": 1.014159423981893e-05,
      "loss": 0.9069,
      "step": 15080
    },
    {
      "epoch": 0.8701545916012922,
      "grad_norm": 0.2950955033302307,
      "learning_rate": 1.0097461627829585e-05,
      "loss": 0.9891,
      "step": 15085
    },
    {
      "epoch": 0.8704430087678818,
      "grad_norm": 0.28739115595817566,
      "learning_rate": 1.0053420143690284e-05,
      "loss": 1.0194,
      "step": 15090
    },
    {
      "epoch": 0.8707314259344716,
      "grad_norm": 0.30966266989707947,
      "learning_rate": 1.0009469832042839e-05,
      "loss": 1.0324,
      "step": 15095
    },
    {
      "epoch": 0.8710198431010614,
      "grad_norm": 0.29282572865486145,
      "learning_rate": 9.965610737436515e-06,
      "loss": 0.9035,
      "step": 15100
    },
    {
      "epoch": 0.8713082602676512,
      "grad_norm": 0.2793741226196289,
      "learning_rate": 9.921842904328172e-06,
      "loss": 0.9867,
      "step": 15105
    },
    {
      "epoch": 0.8715966774342409,
      "grad_norm": 0.3010117709636688,
      "learning_rate": 9.87816637708221e-06,
      "loss": 0.9313,
      "step": 15110
    },
    {
      "epoch": 0.8718850946008306,
      "grad_norm": 0.2735041081905365,
      "learning_rate": 9.834581199970427e-06,
      "loss": 0.9388,
      "step": 15115
    },
    {
      "epoch": 0.8721735117674204,
      "grad_norm": 0.26067879796028137,
      "learning_rate": 9.791087417172019e-06,
      "loss": 0.8819,
      "step": 15120
    },
    {
      "epoch": 0.8724619289340102,
      "grad_norm": 0.2928871214389801,
      "learning_rate": 9.74768507277355e-06,
      "loss": 1.0077,
      "step": 15125
    },
    {
      "epoch": 0.8727503461005999,
      "grad_norm": 0.2853434085845947,
      "learning_rate": 9.704374210768952e-06,
      "loss": 0.997,
      "step": 15130
    },
    {
      "epoch": 0.8730387632671897,
      "grad_norm": 0.2779994010925293,
      "learning_rate": 9.661154875059364e-06,
      "loss": 0.8973,
      "step": 15135
    },
    {
      "epoch": 0.8733271804337794,
      "grad_norm": 0.35284411907196045,
      "learning_rate": 9.618027109453176e-06,
      "loss": 0.9867,
      "step": 15140
    },
    {
      "epoch": 0.8736155976003692,
      "grad_norm": 0.3049831986427307,
      "learning_rate": 9.574990957665941e-06,
      "loss": 0.9175,
      "step": 15145
    },
    {
      "epoch": 0.8739040147669589,
      "grad_norm": 0.29838353395462036,
      "learning_rate": 9.532046463320365e-06,
      "loss": 1.0109,
      "step": 15150
    },
    {
      "epoch": 0.8741924319335487,
      "grad_norm": 0.2807800769805908,
      "learning_rate": 9.489193669946273e-06,
      "loss": 0.9607,
      "step": 15155
    },
    {
      "epoch": 0.8744808491001385,
      "grad_norm": 0.3492079973220825,
      "learning_rate": 9.446432620980517e-06,
      "loss": 1.0417,
      "step": 15160
    },
    {
      "epoch": 0.8747692662667282,
      "grad_norm": 0.3055107295513153,
      "learning_rate": 9.403763359766892e-06,
      "loss": 0.968,
      "step": 15165
    },
    {
      "epoch": 0.8750576834333179,
      "grad_norm": 0.28376808762550354,
      "learning_rate": 9.361185929556282e-06,
      "loss": 0.9536,
      "step": 15170
    },
    {
      "epoch": 0.8753461005999077,
      "grad_norm": 0.26349642872810364,
      "learning_rate": 9.318700373506362e-06,
      "loss": 0.9339,
      "step": 15175
    },
    {
      "epoch": 0.8756345177664975,
      "grad_norm": 0.2932707369327545,
      "learning_rate": 9.276306734681805e-06,
      "loss": 0.9592,
      "step": 15180
    },
    {
      "epoch": 0.8759229349330873,
      "grad_norm": 0.27456268668174744,
      "learning_rate": 9.234005056053963e-06,
      "loss": 0.9042,
      "step": 15185
    },
    {
      "epoch": 0.8762113520996769,
      "grad_norm": 0.2756810486316681,
      "learning_rate": 9.191795380501134e-06,
      "loss": 0.939,
      "step": 15190
    },
    {
      "epoch": 0.8764997692662667,
      "grad_norm": 0.3087191879749298,
      "learning_rate": 9.14967775080824e-06,
      "loss": 0.9555,
      "step": 15195
    },
    {
      "epoch": 0.8767881864328565,
      "grad_norm": 0.29341921210289,
      "learning_rate": 9.107652209666973e-06,
      "loss": 0.9725,
      "step": 15200
    },
    {
      "epoch": 0.8770766035994463,
      "grad_norm": 0.28505462408065796,
      "learning_rate": 9.065718799675626e-06,
      "loss": 0.9523,
      "step": 15205
    },
    {
      "epoch": 0.877365020766036,
      "grad_norm": 0.28436028957366943,
      "learning_rate": 9.023877563339134e-06,
      "loss": 0.9626,
      "step": 15210
    },
    {
      "epoch": 0.8776534379326257,
      "grad_norm": 0.2829095423221588,
      "learning_rate": 8.982128543069025e-06,
      "loss": 0.9674,
      "step": 15215
    },
    {
      "epoch": 0.8779418550992155,
      "grad_norm": 0.29247331619262695,
      "learning_rate": 8.940471781183335e-06,
      "loss": 0.9893,
      "step": 15220
    },
    {
      "epoch": 0.8782302722658053,
      "grad_norm": 0.277696818113327,
      "learning_rate": 8.898907319906546e-06,
      "loss": 0.9762,
      "step": 15225
    },
    {
      "epoch": 0.878518689432395,
      "grad_norm": 0.32393842935562134,
      "learning_rate": 8.857435201369645e-06,
      "loss": 0.9658,
      "step": 15230
    },
    {
      "epoch": 0.8788071065989848,
      "grad_norm": 0.2773330807685852,
      "learning_rate": 8.816055467609963e-06,
      "loss": 0.9994,
      "step": 15235
    },
    {
      "epoch": 0.8790955237655745,
      "grad_norm": 0.29264041781425476,
      "learning_rate": 8.774768160571257e-06,
      "loss": 0.984,
      "step": 15240
    },
    {
      "epoch": 0.8793839409321643,
      "grad_norm": 0.3316905200481415,
      "learning_rate": 8.733573322103484e-06,
      "loss": 1.0043,
      "step": 15245
    },
    {
      "epoch": 0.879672358098754,
      "grad_norm": 0.3055762052536011,
      "learning_rate": 8.692470993962987e-06,
      "loss": 0.9468,
      "step": 15250
    },
    {
      "epoch": 0.8799607752653438,
      "grad_norm": 0.2926937937736511,
      "learning_rate": 8.651461217812295e-06,
      "loss": 0.9743,
      "step": 15255
    },
    {
      "epoch": 0.8802491924319336,
      "grad_norm": 0.3095841407775879,
      "learning_rate": 8.610544035220103e-06,
      "loss": 0.9596,
      "step": 15260
    },
    {
      "epoch": 0.8805376095985233,
      "grad_norm": 0.2798426151275635,
      "learning_rate": 8.569719487661276e-06,
      "loss": 0.8799,
      "step": 15265
    },
    {
      "epoch": 0.880826026765113,
      "grad_norm": 0.26808908581733704,
      "learning_rate": 8.528987616516748e-06,
      "loss": 0.9304,
      "step": 15270
    },
    {
      "epoch": 0.8811144439317028,
      "grad_norm": 0.283551424741745,
      "learning_rate": 8.48834846307357e-06,
      "loss": 0.9231,
      "step": 15275
    },
    {
      "epoch": 0.8814028610982926,
      "grad_norm": 0.26878780126571655,
      "learning_rate": 8.44780206852478e-06,
      "loss": 0.9712,
      "step": 15280
    },
    {
      "epoch": 0.8816912782648824,
      "grad_norm": 0.28564929962158203,
      "learning_rate": 8.40734847396938e-06,
      "loss": 0.9675,
      "step": 15285
    },
    {
      "epoch": 0.881979695431472,
      "grad_norm": 0.2827951908111572,
      "learning_rate": 8.366987720412322e-06,
      "loss": 0.9926,
      "step": 15290
    },
    {
      "epoch": 0.8822681125980618,
      "grad_norm": 0.2900889217853546,
      "learning_rate": 8.32671984876443e-06,
      "loss": 0.9621,
      "step": 15295
    },
    {
      "epoch": 0.8825565297646516,
      "grad_norm": 0.2751014828681946,
      "learning_rate": 8.286544899842441e-06,
      "loss": 0.9992,
      "step": 15300
    },
    {
      "epoch": 0.8828449469312414,
      "grad_norm": 0.35621145367622375,
      "learning_rate": 8.246462914368835e-06,
      "loss": 1.0027,
      "step": 15305
    },
    {
      "epoch": 0.883133364097831,
      "grad_norm": 0.27734360098838806,
      "learning_rate": 8.206473932971903e-06,
      "loss": 0.9242,
      "step": 15310
    },
    {
      "epoch": 0.8834217812644208,
      "grad_norm": 0.310468852519989,
      "learning_rate": 8.16657799618561e-06,
      "loss": 0.998,
      "step": 15315
    },
    {
      "epoch": 0.8837101984310106,
      "grad_norm": 0.2635403275489807,
      "learning_rate": 8.126775144449705e-06,
      "loss": 0.9703,
      "step": 15320
    },
    {
      "epoch": 0.8839986155976004,
      "grad_norm": 0.3096494674682617,
      "learning_rate": 8.087065418109519e-06,
      "loss": 0.9764,
      "step": 15325
    },
    {
      "epoch": 0.8842870327641901,
      "grad_norm": 0.3017280399799347,
      "learning_rate": 8.04744885741593e-06,
      "loss": 0.8677,
      "step": 15330
    },
    {
      "epoch": 0.8845754499307799,
      "grad_norm": 0.3181787431240082,
      "learning_rate": 8.007925502525527e-06,
      "loss": 0.9595,
      "step": 15335
    },
    {
      "epoch": 0.8848638670973696,
      "grad_norm": 0.2798701822757721,
      "learning_rate": 7.968495393500285e-06,
      "loss": 0.9224,
      "step": 15340
    },
    {
      "epoch": 0.8851522842639594,
      "grad_norm": 0.2835695743560791,
      "learning_rate": 7.92915857030776e-06,
      "loss": 0.9276,
      "step": 15345
    },
    {
      "epoch": 0.8854407014305491,
      "grad_norm": 0.28347131609916687,
      "learning_rate": 7.889915072820874e-06,
      "loss": 0.9064,
      "step": 15350
    },
    {
      "epoch": 0.8857291185971389,
      "grad_norm": 0.2914379835128784,
      "learning_rate": 7.850764940818e-06,
      "loss": 0.949,
      "step": 15355
    },
    {
      "epoch": 0.8860175357637287,
      "grad_norm": 0.326035737991333,
      "learning_rate": 7.811708213982883e-06,
      "loss": 0.9154,
      "step": 15360
    },
    {
      "epoch": 0.8863059529303184,
      "grad_norm": 0.25818875432014465,
      "learning_rate": 7.77274493190454e-06,
      "loss": 0.9831,
      "step": 15365
    },
    {
      "epoch": 0.8865943700969081,
      "grad_norm": 0.2907062768936157,
      "learning_rate": 7.733875134077307e-06,
      "loss": 1.0559,
      "step": 15370
    },
    {
      "epoch": 0.8868827872634979,
      "grad_norm": 0.2752329707145691,
      "learning_rate": 7.69509885990073e-06,
      "loss": 0.9661,
      "step": 15375
    },
    {
      "epoch": 0.8871712044300877,
      "grad_norm": 0.29247012734413147,
      "learning_rate": 7.656416148679612e-06,
      "loss": 0.9606,
      "step": 15380
    },
    {
      "epoch": 0.8874596215966775,
      "grad_norm": 0.3025994896888733,
      "learning_rate": 7.617827039623893e-06,
      "loss": 0.962,
      "step": 15385
    },
    {
      "epoch": 0.8877480387632672,
      "grad_norm": 0.30906012654304504,
      "learning_rate": 7.579331571848569e-06,
      "loss": 0.9593,
      "step": 15390
    },
    {
      "epoch": 0.8880364559298569,
      "grad_norm": 0.2803690731525421,
      "learning_rate": 7.540929784373818e-06,
      "loss": 0.8979,
      "step": 15395
    },
    {
      "epoch": 0.8883248730964467,
      "grad_norm": 0.2828715145587921,
      "learning_rate": 7.502621716124791e-06,
      "loss": 0.891,
      "step": 15400
    },
    {
      "epoch": 0.8886132902630365,
      "grad_norm": 0.29924464225769043,
      "learning_rate": 7.464407405931728e-06,
      "loss": 0.9579,
      "step": 15405
    },
    {
      "epoch": 0.8889017074296263,
      "grad_norm": 0.26582613587379456,
      "learning_rate": 7.4262868925296995e-06,
      "loss": 0.9038,
      "step": 15410
    },
    {
      "epoch": 0.8891901245962159,
      "grad_norm": 0.2724932134151459,
      "learning_rate": 7.388260214558829e-06,
      "loss": 0.9405,
      "step": 15415
    },
    {
      "epoch": 0.8894785417628057,
      "grad_norm": 0.2881430387496948,
      "learning_rate": 7.35032741056404e-06,
      "loss": 1.0487,
      "step": 15420
    },
    {
      "epoch": 0.8897669589293955,
      "grad_norm": 0.29068121314048767,
      "learning_rate": 7.3124885189951645e-06,
      "loss": 0.9252,
      "step": 15425
    },
    {
      "epoch": 0.8900553760959853,
      "grad_norm": 0.29160958528518677,
      "learning_rate": 7.274743578206788e-06,
      "loss": 0.9886,
      "step": 15430
    },
    {
      "epoch": 0.890343793262575,
      "grad_norm": 0.2769414782524109,
      "learning_rate": 7.237092626458297e-06,
      "loss": 1.0098,
      "step": 15435
    },
    {
      "epoch": 0.8906322104291647,
      "grad_norm": 0.28141847252845764,
      "learning_rate": 7.199535701913806e-06,
      "loss": 0.9594,
      "step": 15440
    },
    {
      "epoch": 0.8909206275957545,
      "grad_norm": 0.2850087881088257,
      "learning_rate": 7.16207284264212e-06,
      "loss": 0.9334,
      "step": 15445
    },
    {
      "epoch": 0.8912090447623443,
      "grad_norm": 0.2679743468761444,
      "learning_rate": 7.124704086616684e-06,
      "loss": 0.8826,
      "step": 15450
    },
    {
      "epoch": 0.891497461928934,
      "grad_norm": 0.283372163772583,
      "learning_rate": 7.0874294717155675e-06,
      "loss": 0.9705,
      "step": 15455
    },
    {
      "epoch": 0.8917858790955238,
      "grad_norm": 0.2819139063358307,
      "learning_rate": 7.05024903572139e-06,
      "loss": 0.9305,
      "step": 15460
    },
    {
      "epoch": 0.8920742962621135,
      "grad_norm": 0.2858351767063141,
      "learning_rate": 7.013162816321373e-06,
      "loss": 0.9842,
      "step": 15465
    },
    {
      "epoch": 0.8923627134287033,
      "grad_norm": 0.2785593867301941,
      "learning_rate": 6.976170851107178e-06,
      "loss": 1.0081,
      "step": 15470
    },
    {
      "epoch": 0.892651130595293,
      "grad_norm": 0.3193453848361969,
      "learning_rate": 6.939273177574945e-06,
      "loss": 0.9765,
      "step": 15475
    },
    {
      "epoch": 0.8929395477618828,
      "grad_norm": 0.34608909487724304,
      "learning_rate": 6.902469833125236e-06,
      "loss": 1.0135,
      "step": 15480
    },
    {
      "epoch": 0.8932279649284726,
      "grad_norm": 0.3069968521595001,
      "learning_rate": 6.865760855062997e-06,
      "loss": 0.9317,
      "step": 15485
    },
    {
      "epoch": 0.8935163820950623,
      "grad_norm": 0.2878912687301636,
      "learning_rate": 6.8291462805975535e-06,
      "loss": 0.9027,
      "step": 15490
    },
    {
      "epoch": 0.893804799261652,
      "grad_norm": 0.26937398314476013,
      "learning_rate": 6.792626146842462e-06,
      "loss": 0.8568,
      "step": 15495
    },
    {
      "epoch": 0.8940932164282418,
      "grad_norm": 0.3143722414970398,
      "learning_rate": 6.756200490815645e-06,
      "loss": 0.9664,
      "step": 15500
    },
    {
      "epoch": 0.8943816335948316,
      "grad_norm": 0.28063690662384033,
      "learning_rate": 6.7198693494392005e-06,
      "loss": 0.9556,
      "step": 15505
    },
    {
      "epoch": 0.8946700507614214,
      "grad_norm": 0.27173417806625366,
      "learning_rate": 6.683632759539449e-06,
      "loss": 0.9858,
      "step": 15510
    },
    {
      "epoch": 0.894958467928011,
      "grad_norm": 0.30416610836982727,
      "learning_rate": 6.647490757846841e-06,
      "loss": 0.9376,
      "step": 15515
    },
    {
      "epoch": 0.8952468850946008,
      "grad_norm": 0.3319179117679596,
      "learning_rate": 6.611443380995963e-06,
      "loss": 1.0079,
      "step": 15520
    },
    {
      "epoch": 0.8955353022611906,
      "grad_norm": 0.30170193314552307,
      "learning_rate": 6.57549066552553e-06,
      "loss": 0.9143,
      "step": 15525
    },
    {
      "epoch": 0.8958237194277804,
      "grad_norm": 0.2698569893836975,
      "learning_rate": 6.5396326478782465e-06,
      "loss": 0.9221,
      "step": 15530
    },
    {
      "epoch": 0.8961121365943701,
      "grad_norm": 0.2803119421005249,
      "learning_rate": 6.50386936440085e-06,
      "loss": 0.8905,
      "step": 15535
    },
    {
      "epoch": 0.8964005537609598,
      "grad_norm": 0.4507327377796173,
      "learning_rate": 6.468200851344042e-06,
      "loss": 0.9726,
      "step": 15540
    },
    {
      "epoch": 0.8966889709275496,
      "grad_norm": 0.290579229593277,
      "learning_rate": 6.432627144862513e-06,
      "loss": 1.0112,
      "step": 15545
    },
    {
      "epoch": 0.8969773880941394,
      "grad_norm": 0.29394960403442383,
      "learning_rate": 6.397148281014798e-06,
      "loss": 0.9204,
      "step": 15550
    },
    {
      "epoch": 0.8972658052607291,
      "grad_norm": 0.29478779435157776,
      "learning_rate": 6.361764295763284e-06,
      "loss": 0.9157,
      "step": 15555
    },
    {
      "epoch": 0.8975542224273189,
      "grad_norm": 0.2959185838699341,
      "learning_rate": 6.326475224974249e-06,
      "loss": 0.9941,
      "step": 15560
    },
    {
      "epoch": 0.8978426395939086,
      "grad_norm": 0.3056466281414032,
      "learning_rate": 6.291281104417712e-06,
      "loss": 0.9631,
      "step": 15565
    },
    {
      "epoch": 0.8981310567604984,
      "grad_norm": 0.3037865459918976,
      "learning_rate": 6.256181969767505e-06,
      "loss": 0.9736,
      "step": 15570
    },
    {
      "epoch": 0.8984194739270881,
      "grad_norm": 0.29159796237945557,
      "learning_rate": 6.22117785660109e-06,
      "loss": 0.9766,
      "step": 15575
    },
    {
      "epoch": 0.8987078910936779,
      "grad_norm": 0.29030078649520874,
      "learning_rate": 6.186268800399675e-06,
      "loss": 0.9445,
      "step": 15580
    },
    {
      "epoch": 0.8989963082602677,
      "grad_norm": 0.30294716358184814,
      "learning_rate": 6.1514548365481315e-06,
      "loss": 0.9299,
      "step": 15585
    },
    {
      "epoch": 0.8992847254268574,
      "grad_norm": 0.29575252532958984,
      "learning_rate": 6.116736000334888e-06,
      "loss": 0.9612,
      "step": 15590
    },
    {
      "epoch": 0.8995731425934471,
      "grad_norm": 0.3025875389575958,
      "learning_rate": 6.082112326951983e-06,
      "loss": 0.9427,
      "step": 15595
    },
    {
      "epoch": 0.8998615597600369,
      "grad_norm": 0.31641924381256104,
      "learning_rate": 6.047583851494965e-06,
      "loss": 1.0077,
      "step": 15600
    },
    {
      "epoch": 0.9001499769266267,
      "grad_norm": 0.2969208061695099,
      "learning_rate": 6.0131506089629586e-06,
      "loss": 0.9908,
      "step": 15605
    },
    {
      "epoch": 0.9004383940932165,
      "grad_norm": 0.2816386818885803,
      "learning_rate": 5.978812634258468e-06,
      "loss": 0.9903,
      "step": 15610
    },
    {
      "epoch": 0.9007268112598061,
      "grad_norm": 0.29898086190223694,
      "learning_rate": 5.9445699621874966e-06,
      "loss": 0.9487,
      "step": 15615
    },
    {
      "epoch": 0.9010152284263959,
      "grad_norm": 0.2998582720756531,
      "learning_rate": 5.910422627459411e-06,
      "loss": 0.9809,
      "step": 15620
    },
    {
      "epoch": 0.9013036455929857,
      "grad_norm": 0.31181856989860535,
      "learning_rate": 5.876370664686926e-06,
      "loss": 0.9769,
      "step": 15625
    },
    {
      "epoch": 0.9015920627595755,
      "grad_norm": 0.2707262337207794,
      "learning_rate": 5.842414108386151e-06,
      "loss": 0.9884,
      "step": 15630
    },
    {
      "epoch": 0.9018804799261652,
      "grad_norm": 0.3136463761329651,
      "learning_rate": 5.8085529929764345e-06,
      "loss": 1.0145,
      "step": 15635
    },
    {
      "epoch": 0.9021688970927549,
      "grad_norm": 0.3019693195819855,
      "learning_rate": 5.774787352780387e-06,
      "loss": 0.9954,
      "step": 15640
    },
    {
      "epoch": 0.9024573142593447,
      "grad_norm": 0.2765672206878662,
      "learning_rate": 5.741117222023862e-06,
      "loss": 0.9421,
      "step": 15645
    },
    {
      "epoch": 0.9027457314259345,
      "grad_norm": 0.31335878372192383,
      "learning_rate": 5.707542634835883e-06,
      "loss": 0.978,
      "step": 15650
    },
    {
      "epoch": 0.9030341485925242,
      "grad_norm": 0.2650074362754822,
      "learning_rate": 5.674063625248638e-06,
      "loss": 0.9065,
      "step": 15655
    },
    {
      "epoch": 0.903322565759114,
      "grad_norm": 0.31178587675094604,
      "learning_rate": 5.640680227197426e-06,
      "loss": 0.9972,
      "step": 15660
    },
    {
      "epoch": 0.9036109829257037,
      "grad_norm": 0.2522607445716858,
      "learning_rate": 5.607392474520667e-06,
      "loss": 0.8805,
      "step": 15665
    },
    {
      "epoch": 0.9038994000922935,
      "grad_norm": 0.27256470918655396,
      "learning_rate": 5.574200400959773e-06,
      "loss": 0.9331,
      "step": 15670
    },
    {
      "epoch": 0.9041878172588832,
      "grad_norm": 0.27973702549934387,
      "learning_rate": 5.541104040159217e-06,
      "loss": 0.9407,
      "step": 15675
    },
    {
      "epoch": 0.904476234425473,
      "grad_norm": 0.3060024678707123,
      "learning_rate": 5.5081034256664445e-06,
      "loss": 0.9476,
      "step": 15680
    },
    {
      "epoch": 0.9047646515920628,
      "grad_norm": 0.27370506525039673,
      "learning_rate": 5.475198590931829e-06,
      "loss": 0.9436,
      "step": 15685
    },
    {
      "epoch": 0.9050530687586525,
      "grad_norm": 0.27347618341445923,
      "learning_rate": 5.442389569308703e-06,
      "loss": 0.9515,
      "step": 15690
    },
    {
      "epoch": 0.9053414859252422,
      "grad_norm": 0.32937949895858765,
      "learning_rate": 5.4096763940532316e-06,
      "loss": 1.0078,
      "step": 15695
    },
    {
      "epoch": 0.905629903091832,
      "grad_norm": 0.28055548667907715,
      "learning_rate": 5.377059098324455e-06,
      "loss": 0.9161,
      "step": 15700
    },
    {
      "epoch": 0.9059183202584218,
      "grad_norm": 0.29839953780174255,
      "learning_rate": 5.344537715184228e-06,
      "loss": 0.9526,
      "step": 15705
    },
    {
      "epoch": 0.9062067374250116,
      "grad_norm": 0.27476581931114197,
      "learning_rate": 5.312112277597159e-06,
      "loss": 0.9429,
      "step": 15710
    },
    {
      "epoch": 0.9064951545916012,
      "grad_norm": 0.2815583050251007,
      "learning_rate": 5.279782818430656e-06,
      "loss": 0.9715,
      "step": 15715
    },
    {
      "epoch": 0.906783571758191,
      "grad_norm": 0.3266623914241791,
      "learning_rate": 5.247549370454763e-06,
      "loss": 1.0225,
      "step": 15720
    },
    {
      "epoch": 0.9070719889247808,
      "grad_norm": 0.28937751054763794,
      "learning_rate": 5.215411966342287e-06,
      "loss": 0.9117,
      "step": 15725
    },
    {
      "epoch": 0.9073604060913706,
      "grad_norm": 0.3165436387062073,
      "learning_rate": 5.183370638668616e-06,
      "loss": 0.9378,
      "step": 15730
    },
    {
      "epoch": 0.9076488232579604,
      "grad_norm": 0.2963590919971466,
      "learning_rate": 5.151425419911815e-06,
      "loss": 0.9014,
      "step": 15735
    },
    {
      "epoch": 0.90793724042455,
      "grad_norm": 0.3045542538166046,
      "learning_rate": 5.119576342452459e-06,
      "loss": 0.9728,
      "step": 15740
    },
    {
      "epoch": 0.9082256575911398,
      "grad_norm": 0.26070326566696167,
      "learning_rate": 5.0878234385737135e-06,
      "loss": 0.8845,
      "step": 15745
    },
    {
      "epoch": 0.9085140747577296,
      "grad_norm": 0.3005124032497406,
      "learning_rate": 5.056166740461265e-06,
      "loss": 1.0158,
      "step": 15750
    },
    {
      "epoch": 0.9088024919243194,
      "grad_norm": 0.28957071900367737,
      "learning_rate": 5.024606280203281e-06,
      "loss": 0.9114,
      "step": 15755
    },
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 0.2658599019050598,
      "learning_rate": 4.993142089790337e-06,
      "loss": 0.935,
      "step": 15760
    },
    {
      "epoch": 0.9093793262574988,
      "grad_norm": 0.27619728446006775,
      "learning_rate": 4.961774201115487e-06,
      "loss": 0.9431,
      "step": 15765
    },
    {
      "epoch": 0.9096677434240886,
      "grad_norm": 0.28659436106681824,
      "learning_rate": 4.9305026459741224e-06,
      "loss": 0.968,
      "step": 15770
    },
    {
      "epoch": 0.9099561605906784,
      "grad_norm": 0.3058231472969055,
      "learning_rate": 4.89932745606404e-06,
      "loss": 0.9406,
      "step": 15775
    },
    {
      "epoch": 0.9102445777572681,
      "grad_norm": 0.28550755977630615,
      "learning_rate": 4.8682486629852975e-06,
      "loss": 0.9397,
      "step": 15780
    },
    {
      "epoch": 0.9105329949238579,
      "grad_norm": 0.30754169821739197,
      "learning_rate": 4.8372662982402835e-06,
      "loss": 0.9453,
      "step": 15785
    },
    {
      "epoch": 0.9108214120904476,
      "grad_norm": 0.29143068194389343,
      "learning_rate": 4.8063803932336114e-06,
      "loss": 0.9442,
      "step": 15790
    },
    {
      "epoch": 0.9111098292570374,
      "grad_norm": 0.31427714228630066,
      "learning_rate": 4.775590979272171e-06,
      "loss": 0.9886,
      "step": 15795
    },
    {
      "epoch": 0.9113982464236271,
      "grad_norm": 0.3036996126174927,
      "learning_rate": 4.74489808756502e-06,
      "loss": 0.9234,
      "step": 15800
    },
    {
      "epoch": 0.9116866635902169,
      "grad_norm": 0.31301310658454895,
      "learning_rate": 4.714301749223326e-06,
      "loss": 0.9896,
      "step": 15805
    },
    {
      "epoch": 0.9119750807568067,
      "grad_norm": 0.2925277054309845,
      "learning_rate": 4.683801995260484e-06,
      "loss": 0.997,
      "step": 15810
    },
    {
      "epoch": 0.9122634979233964,
      "grad_norm": 0.31284570693969727,
      "learning_rate": 4.653398856591917e-06,
      "loss": 1.0578,
      "step": 15815
    },
    {
      "epoch": 0.9125519150899861,
      "grad_norm": 0.3127239942550659,
      "learning_rate": 4.623092364035153e-06,
      "loss": 0.8718,
      "step": 15820
    },
    {
      "epoch": 0.9128403322565759,
      "grad_norm": 0.2932998836040497,
      "learning_rate": 4.592882548309707e-06,
      "loss": 0.9773,
      "step": 15825
    },
    {
      "epoch": 0.9131287494231657,
      "grad_norm": 0.30286481976509094,
      "learning_rate": 4.562769440037174e-06,
      "loss": 0.9534,
      "step": 15830
    },
    {
      "epoch": 0.9134171665897555,
      "grad_norm": 0.31667113304138184,
      "learning_rate": 4.532753069741058e-06,
      "loss": 0.9168,
      "step": 15835
    },
    {
      "epoch": 0.9137055837563451,
      "grad_norm": 0.32030534744262695,
      "learning_rate": 4.502833467846857e-06,
      "loss": 0.9492,
      "step": 15840
    },
    {
      "epoch": 0.9139940009229349,
      "grad_norm": 0.30892324447631836,
      "learning_rate": 4.473010664681932e-06,
      "loss": 0.9133,
      "step": 15845
    },
    {
      "epoch": 0.9142824180895247,
      "grad_norm": 0.2874343991279602,
      "learning_rate": 4.443284690475558e-06,
      "loss": 0.9352,
      "step": 15850
    },
    {
      "epoch": 0.9145708352561145,
      "grad_norm": 0.28388267755508423,
      "learning_rate": 4.413655575358866e-06,
      "loss": 0.9042,
      "step": 15855
    },
    {
      "epoch": 0.9148592524227042,
      "grad_norm": 0.315041720867157,
      "learning_rate": 4.384123349364788e-06,
      "loss": 0.942,
      "step": 15860
    },
    {
      "epoch": 0.9151476695892939,
      "grad_norm": 0.2852926552295685,
      "learning_rate": 4.354688042428057e-06,
      "loss": 0.9484,
      "step": 15865
    },
    {
      "epoch": 0.9154360867558837,
      "grad_norm": 0.2800235450267792,
      "learning_rate": 4.32534968438516e-06,
      "loss": 0.9049,
      "step": 15870
    },
    {
      "epoch": 0.9157245039224735,
      "grad_norm": 0.31681615114212036,
      "learning_rate": 4.296108304974311e-06,
      "loss": 0.9684,
      "step": 15875
    },
    {
      "epoch": 0.9160129210890632,
      "grad_norm": 0.31426697969436646,
      "learning_rate": 4.266963933835455e-06,
      "loss": 0.9265,
      "step": 15880
    },
    {
      "epoch": 0.916301338255653,
      "grad_norm": 0.26269903779029846,
      "learning_rate": 4.237916600510139e-06,
      "loss": 1.0063,
      "step": 15885
    },
    {
      "epoch": 0.9165897554222427,
      "grad_norm": 0.3267369568347931,
      "learning_rate": 4.208966334441633e-06,
      "loss": 0.9517,
      "step": 15890
    },
    {
      "epoch": 0.9168781725888325,
      "grad_norm": 0.282438188791275,
      "learning_rate": 4.180113164974764e-06,
      "loss": 0.9814,
      "step": 15895
    },
    {
      "epoch": 0.9171665897554222,
      "grad_norm": 0.2685856819152832,
      "learning_rate": 4.151357121355947e-06,
      "loss": 1.0062,
      "step": 15900
    },
    {
      "epoch": 0.917455006922012,
      "grad_norm": 0.28898438811302185,
      "learning_rate": 4.122698232733147e-06,
      "loss": 1.0028,
      "step": 15905
    },
    {
      "epoch": 0.9177434240886018,
      "grad_norm": 0.2767029404640198,
      "learning_rate": 4.0941365281558454e-06,
      "loss": 0.9812,
      "step": 15910
    },
    {
      "epoch": 0.9180318412551915,
      "grad_norm": 0.2602791488170624,
      "learning_rate": 4.065672036575052e-06,
      "loss": 1.1087,
      "step": 15915
    },
    {
      "epoch": 0.9183202584217812,
      "grad_norm": 0.3219699263572693,
      "learning_rate": 4.037304786843188e-06,
      "loss": 1.0031,
      "step": 15920
    },
    {
      "epoch": 0.918608675588371,
      "grad_norm": 0.26737821102142334,
      "learning_rate": 4.009034807714152e-06,
      "loss": 1.0295,
      "step": 15925
    },
    {
      "epoch": 0.9188970927549608,
      "grad_norm": 0.32514479756355286,
      "learning_rate": 3.980862127843199e-06,
      "loss": 1.0016,
      "step": 15930
    },
    {
      "epoch": 0.9191855099215506,
      "grad_norm": 0.33426961302757263,
      "learning_rate": 3.952786775786987e-06,
      "loss": 1.0007,
      "step": 15935
    },
    {
      "epoch": 0.9194739270881402,
      "grad_norm": 0.2924538850784302,
      "learning_rate": 3.924808780003531e-06,
      "loss": 0.9427,
      "step": 15940
    },
    {
      "epoch": 0.91976234425473,
      "grad_norm": 0.27361974120140076,
      "learning_rate": 3.896928168852143e-06,
      "loss": 0.9666,
      "step": 15945
    },
    {
      "epoch": 0.9200507614213198,
      "grad_norm": 0.3665061891078949,
      "learning_rate": 3.86914497059343e-06,
      "loss": 1.0528,
      "step": 15950
    },
    {
      "epoch": 0.9203391785879096,
      "grad_norm": 0.3001386225223541,
      "learning_rate": 3.841459213389232e-06,
      "loss": 0.9524,
      "step": 15955
    },
    {
      "epoch": 0.9206275957544993,
      "grad_norm": 0.35374686121940613,
      "learning_rate": 3.813870925302698e-06,
      "loss": 0.9395,
      "step": 15960
    },
    {
      "epoch": 0.920916012921089,
      "grad_norm": 0.2924344837665558,
      "learning_rate": 3.7863801342980845e-06,
      "loss": 1.0259,
      "step": 15965
    },
    {
      "epoch": 0.9212044300876788,
      "grad_norm": 0.3271997272968292,
      "learning_rate": 3.7589868682408434e-06,
      "loss": 1.0295,
      "step": 15970
    },
    {
      "epoch": 0.9214928472542686,
      "grad_norm": 0.3009391725063324,
      "learning_rate": 3.7316911548976543e-06,
      "loss": 1.0151,
      "step": 15975
    },
    {
      "epoch": 0.9217812644208583,
      "grad_norm": 0.287616491317749,
      "learning_rate": 3.7044930219362063e-06,
      "loss": 1.0131,
      "step": 15980
    },
    {
      "epoch": 0.9220696815874481,
      "grad_norm": 0.30860260128974915,
      "learning_rate": 3.677392496925347e-06,
      "loss": 0.9404,
      "step": 15985
    },
    {
      "epoch": 0.9223580987540378,
      "grad_norm": 0.2797485589981079,
      "learning_rate": 3.6503896073349587e-06,
      "loss": 0.9273,
      "step": 15990
    },
    {
      "epoch": 0.9226465159206276,
      "grad_norm": 0.26379159092903137,
      "learning_rate": 3.6234843805359353e-06,
      "loss": 0.9457,
      "step": 15995
    },
    {
      "epoch": 0.9229349330872173,
      "grad_norm": 0.29575875401496887,
      "learning_rate": 3.5966768438002507e-06,
      "loss": 0.9845,
      "step": 16000
    },
    {
      "epoch": 0.9232233502538071,
      "grad_norm": 0.2721844017505646,
      "learning_rate": 3.56996702430078e-06,
      "loss": 0.9169,
      "step": 16005
    },
    {
      "epoch": 0.9235117674203969,
      "grad_norm": 0.2789749205112457,
      "learning_rate": 3.5433549491113884e-06,
      "loss": 0.8933,
      "step": 16010
    },
    {
      "epoch": 0.9238001845869867,
      "grad_norm": 0.27569326758384705,
      "learning_rate": 3.516840645206854e-06,
      "loss": 0.9735,
      "step": 16015
    },
    {
      "epoch": 0.9240886017535763,
      "grad_norm": 0.2820409834384918,
      "learning_rate": 3.4904241394628557e-06,
      "loss": 0.9656,
      "step": 16020
    },
    {
      "epoch": 0.9243770189201661,
      "grad_norm": 0.37577053904533386,
      "learning_rate": 3.464105458655953e-06,
      "loss": 0.9673,
      "step": 16025
    },
    {
      "epoch": 0.9246654360867559,
      "grad_norm": 0.29112502932548523,
      "learning_rate": 3.4378846294634835e-06,
      "loss": 1.0414,
      "step": 16030
    },
    {
      "epoch": 0.9249538532533457,
      "grad_norm": 0.30589404702186584,
      "learning_rate": 3.4117616784637097e-06,
      "loss": 0.9485,
      "step": 16035
    },
    {
      "epoch": 0.9252422704199353,
      "grad_norm": 0.3032233715057373,
      "learning_rate": 3.3857366321355722e-06,
      "loss": 0.9903,
      "step": 16040
    },
    {
      "epoch": 0.9255306875865251,
      "grad_norm": 0.3229020833969116,
      "learning_rate": 3.3598095168588696e-06,
      "loss": 0.963,
      "step": 16045
    },
    {
      "epoch": 0.9258191047531149,
      "grad_norm": 0.2780771255493164,
      "learning_rate": 3.3339803589140352e-06,
      "loss": 0.9459,
      "step": 16050
    },
    {
      "epoch": 0.9261075219197047,
      "grad_norm": 0.2907694876194,
      "learning_rate": 3.3082491844822926e-06,
      "loss": 0.9683,
      "step": 16055
    },
    {
      "epoch": 0.9263959390862944,
      "grad_norm": 0.2736465632915497,
      "learning_rate": 3.2826160196455123e-06,
      "loss": 0.9295,
      "step": 16060
    },
    {
      "epoch": 0.9266843562528841,
      "grad_norm": 0.2800248861312866,
      "learning_rate": 3.2570808903862106e-06,
      "loss": 0.959,
      "step": 16065
    },
    {
      "epoch": 0.9269727734194739,
      "grad_norm": 0.27502167224884033,
      "learning_rate": 3.23164382258756e-06,
      "loss": 0.931,
      "step": 16070
    },
    {
      "epoch": 0.9272611905860637,
      "grad_norm": 0.29219675064086914,
      "learning_rate": 3.206304842033292e-06,
      "loss": 0.9285,
      "step": 16075
    },
    {
      "epoch": 0.9275496077526535,
      "grad_norm": 0.2969021797180176,
      "learning_rate": 3.181063974407772e-06,
      "loss": 0.9363,
      "step": 16080
    },
    {
      "epoch": 0.9278380249192432,
      "grad_norm": 0.3350149989128113,
      "learning_rate": 3.1559212452958674e-06,
      "loss": 0.9529,
      "step": 16085
    },
    {
      "epoch": 0.928126442085833,
      "grad_norm": 0.278353214263916,
      "learning_rate": 3.1308766801829926e-06,
      "loss": 0.9384,
      "step": 16090
    },
    {
      "epoch": 0.9284148592524227,
      "grad_norm": 0.27826988697052,
      "learning_rate": 3.1059303044550515e-06,
      "loss": 0.9324,
      "step": 16095
    },
    {
      "epoch": 0.9287032764190125,
      "grad_norm": 0.2794398367404938,
      "learning_rate": 3.081082143398395e-06,
      "loss": 0.9595,
      "step": 16100
    },
    {
      "epoch": 0.9289916935856022,
      "grad_norm": 0.29913151264190674,
      "learning_rate": 3.056332222199898e-06,
      "loss": 0.9629,
      "step": 16105
    },
    {
      "epoch": 0.929280110752192,
      "grad_norm": 0.2864798307418823,
      "learning_rate": 3.0316805659467705e-06,
      "loss": 0.881,
      "step": 16110
    },
    {
      "epoch": 0.9295685279187818,
      "grad_norm": 0.3133912980556488,
      "learning_rate": 3.0071271996266804e-06,
      "loss": 0.8897,
      "step": 16115
    },
    {
      "epoch": 0.9298569450853715,
      "grad_norm": 0.2826102674007416,
      "learning_rate": 2.9826721481276077e-06,
      "loss": 0.9238,
      "step": 16120
    },
    {
      "epoch": 0.9301453622519612,
      "grad_norm": 0.27337539196014404,
      "learning_rate": 2.958315436237935e-06,
      "loss": 0.9702,
      "step": 16125
    },
    {
      "epoch": 0.930433779418551,
      "grad_norm": 0.30676543712615967,
      "learning_rate": 2.934057088646336e-06,
      "loss": 0.9966,
      "step": 16130
    },
    {
      "epoch": 0.9307221965851408,
      "grad_norm": 0.2815453112125397,
      "learning_rate": 2.9098971299417634e-06,
      "loss": 0.9087,
      "step": 16135
    },
    {
      "epoch": 0.9310106137517306,
      "grad_norm": 0.26081717014312744,
      "learning_rate": 2.8858355846134944e-06,
      "loss": 0.9942,
      "step": 16140
    },
    {
      "epoch": 0.9312990309183202,
      "grad_norm": 0.27590805292129517,
      "learning_rate": 2.8618724770509864e-06,
      "loss": 0.9347,
      "step": 16145
    },
    {
      "epoch": 0.93158744808491,
      "grad_norm": 0.29033374786376953,
      "learning_rate": 2.8380078315439653e-06,
      "loss": 0.9639,
      "step": 16150
    },
    {
      "epoch": 0.9318758652514998,
      "grad_norm": 0.3115253150463104,
      "learning_rate": 2.814241672282336e-06,
      "loss": 0.987,
      "step": 16155
    },
    {
      "epoch": 0.9321642824180896,
      "grad_norm": 0.3148709237575531,
      "learning_rate": 2.790574023356163e-06,
      "loss": 0.9225,
      "step": 16160
    },
    {
      "epoch": 0.9324526995846792,
      "grad_norm": 0.2759745717048645,
      "learning_rate": 2.767004908755677e-06,
      "loss": 1.0109,
      "step": 16165
    },
    {
      "epoch": 0.932741116751269,
      "grad_norm": 0.2825234830379486,
      "learning_rate": 2.7435343523712242e-06,
      "loss": 0.9106,
      "step": 16170
    },
    {
      "epoch": 0.9330295339178588,
      "grad_norm": 0.2725534737110138,
      "learning_rate": 2.7201623779932516e-06,
      "loss": 0.9313,
      "step": 16175
    },
    {
      "epoch": 0.9333179510844486,
      "grad_norm": 0.29524463415145874,
      "learning_rate": 2.6968890093122754e-06,
      "loss": 1.0006,
      "step": 16180
    },
    {
      "epoch": 0.9336063682510383,
      "grad_norm": 0.2792085111141205,
      "learning_rate": 2.6737142699188587e-06,
      "loss": 0.9995,
      "step": 16185
    },
    {
      "epoch": 0.933894785417628,
      "grad_norm": 0.28851646184921265,
      "learning_rate": 2.650638183303611e-06,
      "loss": 0.932,
      "step": 16190
    },
    {
      "epoch": 0.9341832025842178,
      "grad_norm": 0.2972305119037628,
      "learning_rate": 2.62766077285711e-06,
      "loss": 0.9405,
      "step": 16195
    },
    {
      "epoch": 0.9344716197508076,
      "grad_norm": 0.2950534522533417,
      "learning_rate": 2.6047820618699592e-06,
      "loss": 0.9827,
      "step": 16200
    },
    {
      "epoch": 0.9347600369173973,
      "grad_norm": 0.2992311120033264,
      "learning_rate": 2.5820020735326632e-06,
      "loss": 0.9423,
      "step": 16205
    },
    {
      "epoch": 0.9350484540839871,
      "grad_norm": 0.2768268287181854,
      "learning_rate": 2.5593208309357187e-06,
      "loss": 0.9297,
      "step": 16210
    },
    {
      "epoch": 0.9353368712505769,
      "grad_norm": 0.2867565453052521,
      "learning_rate": 2.536738357069468e-06,
      "loss": 0.9676,
      "step": 16215
    },
    {
      "epoch": 0.9356252884171666,
      "grad_norm": 0.2953318655490875,
      "learning_rate": 2.514254674824168e-06,
      "loss": 0.986,
      "step": 16220
    },
    {
      "epoch": 0.9359137055837563,
      "grad_norm": 0.3065684139728546,
      "learning_rate": 2.491869806989966e-06,
      "loss": 0.9845,
      "step": 16225
    },
    {
      "epoch": 0.9362021227503461,
      "grad_norm": 0.29751312732696533,
      "learning_rate": 2.469583776256812e-06,
      "loss": 0.9883,
      "step": 16230
    },
    {
      "epoch": 0.9364905399169359,
      "grad_norm": 0.25876522064208984,
      "learning_rate": 2.447396605214469e-06,
      "loss": 0.9825,
      "step": 16235
    },
    {
      "epoch": 0.9367789570835257,
      "grad_norm": 0.28333285450935364,
      "learning_rate": 2.4253083163525038e-06,
      "loss": 0.895,
      "step": 16240
    },
    {
      "epoch": 0.9370673742501153,
      "grad_norm": 0.2942551076412201,
      "learning_rate": 2.4033189320602613e-06,
      "loss": 0.9891,
      "step": 16245
    },
    {
      "epoch": 0.9373557914167051,
      "grad_norm": 0.27687203884124756,
      "learning_rate": 2.3814284746268344e-06,
      "loss": 0.9201,
      "step": 16250
    },
    {
      "epoch": 0.9376442085832949,
      "grad_norm": 0.29299119114875793,
      "learning_rate": 2.359636966241019e-06,
      "loss": 0.9656,
      "step": 16255
    },
    {
      "epoch": 0.9379326257498847,
      "grad_norm": 0.2875472903251648,
      "learning_rate": 2.3379444289913342e-06,
      "loss": 0.9556,
      "step": 16260
    },
    {
      "epoch": 0.9382210429164743,
      "grad_norm": 0.282306432723999,
      "learning_rate": 2.3163508848659587e-06,
      "loss": 0.9693,
      "step": 16265
    },
    {
      "epoch": 0.9385094600830641,
      "grad_norm": 0.31084609031677246,
      "learning_rate": 2.2948563557527836e-06,
      "loss": 0.9276,
      "step": 16270
    },
    {
      "epoch": 0.9387978772496539,
      "grad_norm": 0.27823126316070557,
      "learning_rate": 2.273460863439236e-06,
      "loss": 0.9533,
      "step": 16275
    },
    {
      "epoch": 0.9390862944162437,
      "grad_norm": 0.2766770124435425,
      "learning_rate": 2.2521644296124466e-06,
      "loss": 0.9623,
      "step": 16280
    },
    {
      "epoch": 0.9393747115828334,
      "grad_norm": 0.2881390154361725,
      "learning_rate": 2.2309670758591138e-06,
      "loss": 0.9366,
      "step": 16285
    },
    {
      "epoch": 0.9396631287494231,
      "grad_norm": 0.27332398295402527,
      "learning_rate": 2.209868823665473e-06,
      "loss": 0.9053,
      "step": 16290
    },
    {
      "epoch": 0.9399515459160129,
      "grad_norm": 0.27232545614242554,
      "learning_rate": 2.1888696944173504e-06,
      "loss": 0.9867,
      "step": 16295
    },
    {
      "epoch": 0.9402399630826027,
      "grad_norm": 0.274514764547348,
      "learning_rate": 2.1679697094000638e-06,
      "loss": 0.9459,
      "step": 16300
    },
    {
      "epoch": 0.9405283802491924,
      "grad_norm": 0.28133681416511536,
      "learning_rate": 2.1471688897984675e-06,
      "loss": 0.9392,
      "step": 16305
    },
    {
      "epoch": 0.9408167974157822,
      "grad_norm": 0.30380240082740784,
      "learning_rate": 2.1264672566968736e-06,
      "loss": 0.9615,
      "step": 16310
    },
    {
      "epoch": 0.941105214582372,
      "grad_norm": 0.2733669877052307,
      "learning_rate": 2.105864831079063e-06,
      "loss": 0.9116,
      "step": 16315
    },
    {
      "epoch": 0.9413936317489617,
      "grad_norm": 0.28536126017570496,
      "learning_rate": 2.0853616338282644e-06,
      "loss": 0.95,
      "step": 16320
    },
    {
      "epoch": 0.9416820489155514,
      "grad_norm": 0.28393039107322693,
      "learning_rate": 2.064957685727109e-06,
      "loss": 0.9179,
      "step": 16325
    },
    {
      "epoch": 0.9419704660821412,
      "grad_norm": 0.3136070966720581,
      "learning_rate": 2.044653007457653e-06,
      "loss": 1.0152,
      "step": 16330
    },
    {
      "epoch": 0.942258883248731,
      "grad_norm": 0.28725332021713257,
      "learning_rate": 2.0244476196012995e-06,
      "loss": 0.9516,
      "step": 16335
    },
    {
      "epoch": 0.9425473004153208,
      "grad_norm": 0.28715264797210693,
      "learning_rate": 2.0043415426388324e-06,
      "loss": 0.9183,
      "step": 16340
    },
    {
      "epoch": 0.9428357175819104,
      "grad_norm": 0.2766190469264984,
      "learning_rate": 1.98433479695036e-06,
      "loss": 0.9351,
      "step": 16345
    },
    {
      "epoch": 0.9431241347485002,
      "grad_norm": 0.26775482296943665,
      "learning_rate": 1.964427402815294e-06,
      "loss": 0.9339,
      "step": 16350
    },
    {
      "epoch": 0.94341255191509,
      "grad_norm": 0.35428106784820557,
      "learning_rate": 1.9446193804123826e-06,
      "loss": 1.0776,
      "step": 16355
    },
    {
      "epoch": 0.9437009690816798,
      "grad_norm": 0.2934764325618744,
      "learning_rate": 1.924910749819586e-06,
      "loss": 0.9934,
      "step": 16360
    },
    {
      "epoch": 0.9439893862482694,
      "grad_norm": 0.29175451397895813,
      "learning_rate": 1.9053015310141587e-06,
      "loss": 0.9219,
      "step": 16365
    },
    {
      "epoch": 0.9442778034148592,
      "grad_norm": 0.279075562953949,
      "learning_rate": 1.8857917438725892e-06,
      "loss": 0.9532,
      "step": 16370
    },
    {
      "epoch": 0.944566220581449,
      "grad_norm": 0.2834916114807129,
      "learning_rate": 1.86638140817057e-06,
      "loss": 0.9375,
      "step": 16375
    },
    {
      "epoch": 0.9448546377480388,
      "grad_norm": 0.28975823521614075,
      "learning_rate": 1.8470705435829849e-06,
      "loss": 0.962,
      "step": 16380
    },
    {
      "epoch": 0.9451430549146285,
      "grad_norm": 0.27287888526916504,
      "learning_rate": 1.8278591696838765e-06,
      "loss": 0.9551,
      "step": 16385
    },
    {
      "epoch": 0.9454314720812182,
      "grad_norm": 0.2761301100254059,
      "learning_rate": 1.8087473059464788e-06,
      "loss": 0.9122,
      "step": 16390
    },
    {
      "epoch": 0.945719889247808,
      "grad_norm": 0.2874256372451782,
      "learning_rate": 1.7897349717431288e-06,
      "loss": 0.9413,
      "step": 16395
    },
    {
      "epoch": 0.9460083064143978,
      "grad_norm": 0.2810600697994232,
      "learning_rate": 1.770822186345289e-06,
      "loss": 0.9441,
      "step": 16400
    },
    {
      "epoch": 0.9462967235809875,
      "grad_norm": 0.26397740840911865,
      "learning_rate": 1.752008968923502e-06,
      "loss": 0.8909,
      "step": 16405
    },
    {
      "epoch": 0.9465851407475773,
      "grad_norm": 0.28463664650917053,
      "learning_rate": 1.7332953385474027e-06,
      "loss": 0.9269,
      "step": 16410
    },
    {
      "epoch": 0.946873557914167,
      "grad_norm": 0.29349541664123535,
      "learning_rate": 1.7146813141856955e-06,
      "loss": 0.9918,
      "step": 16415
    },
    {
      "epoch": 0.9471619750807568,
      "grad_norm": 0.2936592996120453,
      "learning_rate": 1.6961669147060765e-06,
      "loss": 1.0146,
      "step": 16420
    },
    {
      "epoch": 0.9474503922473465,
      "grad_norm": 0.28148314356803894,
      "learning_rate": 1.67775215887529e-06,
      "loss": 0.9788,
      "step": 16425
    },
    {
      "epoch": 0.9477388094139363,
      "grad_norm": 0.27716365456581116,
      "learning_rate": 1.6594370653590706e-06,
      "loss": 0.9918,
      "step": 16430
    },
    {
      "epoch": 0.9480272265805261,
      "grad_norm": 0.2905281186103821,
      "learning_rate": 1.641221652722158e-06,
      "loss": 0.9588,
      "step": 16435
    },
    {
      "epoch": 0.9483156437471159,
      "grad_norm": 0.2908460199832916,
      "learning_rate": 1.6231059394281934e-06,
      "loss": 0.9696,
      "step": 16440
    },
    {
      "epoch": 0.9486040609137056,
      "grad_norm": 0.3046049475669861,
      "learning_rate": 1.6050899438398104e-06,
      "loss": 0.9385,
      "step": 16445
    },
    {
      "epoch": 0.9488924780802953,
      "grad_norm": 0.2991294860839844,
      "learning_rate": 1.587173684218557e-06,
      "loss": 0.9193,
      "step": 16450
    },
    {
      "epoch": 0.9491808952468851,
      "grad_norm": 0.29674336314201355,
      "learning_rate": 1.5693571787248728e-06,
      "loss": 0.9054,
      "step": 16455
    },
    {
      "epoch": 0.9494693124134749,
      "grad_norm": 0.3032147288322449,
      "learning_rate": 1.55164044541809e-06,
      "loss": 0.9429,
      "step": 16460
    },
    {
      "epoch": 0.9497577295800647,
      "grad_norm": 0.3129865527153015,
      "learning_rate": 1.5340235022564098e-06,
      "loss": 1.0181,
      "step": 16465
    },
    {
      "epoch": 0.9500461467466543,
      "grad_norm": 0.3160928785800934,
      "learning_rate": 1.5165063670968926e-06,
      "loss": 0.9554,
      "step": 16470
    },
    {
      "epoch": 0.9503345639132441,
      "grad_norm": 0.28156229853630066,
      "learning_rate": 1.499089057695402e-06,
      "loss": 0.9426,
      "step": 16475
    },
    {
      "epoch": 0.9506229810798339,
      "grad_norm": 0.3097882866859436,
      "learning_rate": 1.4817715917066488e-06,
      "loss": 1.0092,
      "step": 16480
    },
    {
      "epoch": 0.9509113982464237,
      "grad_norm": 0.30563685297966003,
      "learning_rate": 1.464553986684114e-06,
      "loss": 0.949,
      "step": 16485
    },
    {
      "epoch": 0.9511998154130133,
      "grad_norm": 0.28421536087989807,
      "learning_rate": 1.4474362600800706e-06,
      "loss": 0.9152,
      "step": 16490
    },
    {
      "epoch": 0.9514882325796031,
      "grad_norm": 0.28444600105285645,
      "learning_rate": 1.4304184292455613e-06,
      "loss": 0.967,
      "step": 16495
    },
    {
      "epoch": 0.9517766497461929,
      "grad_norm": 0.2795342803001404,
      "learning_rate": 1.4135005114303435e-06,
      "loss": 0.9806,
      "step": 16500
    },
    {
      "epoch": 0.9520650669127827,
      "grad_norm": 0.3018958866596222,
      "learning_rate": 1.3966825237829106e-06,
      "loss": 0.9117,
      "step": 16505
    },
    {
      "epoch": 0.9523534840793724,
      "grad_norm": 0.27752962708473206,
      "learning_rate": 1.379964483350482e-06,
      "loss": 0.921,
      "step": 16510
    },
    {
      "epoch": 0.9526419012459622,
      "grad_norm": 0.28480061888694763,
      "learning_rate": 1.363346407078947e-06,
      "loss": 0.9557,
      "step": 16515
    },
    {
      "epoch": 0.9529303184125519,
      "grad_norm": 0.3103495240211487,
      "learning_rate": 1.3468283118128756e-06,
      "loss": 0.9517,
      "step": 16520
    },
    {
      "epoch": 0.9532187355791417,
      "grad_norm": 0.2942695617675781,
      "learning_rate": 1.3304102142954965e-06,
      "loss": 0.9146,
      "step": 16525
    },
    {
      "epoch": 0.9535071527457314,
      "grad_norm": 0.30698221921920776,
      "learning_rate": 1.314092131168665e-06,
      "loss": 0.96,
      "step": 16530
    },
    {
      "epoch": 0.9537955699123212,
      "grad_norm": 0.2886694073677063,
      "learning_rate": 1.2978740789728827e-06,
      "loss": 0.8702,
      "step": 16535
    },
    {
      "epoch": 0.954083987078911,
      "grad_norm": 0.29679936170578003,
      "learning_rate": 1.2817560741472445e-06,
      "loss": 0.8778,
      "step": 16540
    },
    {
      "epoch": 0.9543724042455007,
      "grad_norm": 0.3064761459827423,
      "learning_rate": 1.2657381330294149e-06,
      "loss": 0.8782,
      "step": 16545
    },
    {
      "epoch": 0.9546608214120904,
      "grad_norm": 0.26160743832588196,
      "learning_rate": 1.2498202718556617e-06,
      "loss": 0.9198,
      "step": 16550
    },
    {
      "epoch": 0.9549492385786802,
      "grad_norm": 0.283074289560318,
      "learning_rate": 1.2340025067608007e-06,
      "loss": 0.9356,
      "step": 16555
    },
    {
      "epoch": 0.95523765574527,
      "grad_norm": 0.27303585410118103,
      "learning_rate": 1.2182848537781622e-06,
      "loss": 0.91,
      "step": 16560
    },
    {
      "epoch": 0.9555260729118598,
      "grad_norm": 0.2848970890045166,
      "learning_rate": 1.2026673288396462e-06,
      "loss": 0.9014,
      "step": 16565
    },
    {
      "epoch": 0.9558144900784494,
      "grad_norm": 0.2902991473674774,
      "learning_rate": 1.187149947775612e-06,
      "loss": 0.9074,
      "step": 16570
    },
    {
      "epoch": 0.9561029072450392,
      "grad_norm": 0.2852989137172699,
      "learning_rate": 1.1717327263149447e-06,
      "loss": 0.943,
      "step": 16575
    },
    {
      "epoch": 0.956391324411629,
      "grad_norm": 0.286842405796051,
      "learning_rate": 1.1564156800849879e-06,
      "loss": 0.9744,
      "step": 16580
    },
    {
      "epoch": 0.9566797415782188,
      "grad_norm": 0.302713543176651,
      "learning_rate": 1.1411988246115556e-06,
      "loss": 0.9402,
      "step": 16585
    },
    {
      "epoch": 0.9569681587448085,
      "grad_norm": 0.31443941593170166,
      "learning_rate": 1.1260821753188987e-06,
      "loss": 0.9582,
      "step": 16590
    },
    {
      "epoch": 0.9572565759113982,
      "grad_norm": 0.28639456629753113,
      "learning_rate": 1.1110657475296827e-06,
      "loss": 0.927,
      "step": 16595
    },
    {
      "epoch": 0.957544993077988,
      "grad_norm": 0.27512067556381226,
      "learning_rate": 1.0961495564650092e-06,
      "loss": 0.9749,
      "step": 16600
    },
    {
      "epoch": 0.9578334102445778,
      "grad_norm": 0.2878815531730652,
      "learning_rate": 1.0813336172443622e-06,
      "loss": 0.9624,
      "step": 16605
    },
    {
      "epoch": 0.9581218274111675,
      "grad_norm": 0.31200575828552246,
      "learning_rate": 1.0666179448856174e-06,
      "loss": 0.9508,
      "step": 16610
    },
    {
      "epoch": 0.9584102445777573,
      "grad_norm": 0.2852694094181061,
      "learning_rate": 1.0520025543050094e-06,
      "loss": 0.951,
      "step": 16615
    },
    {
      "epoch": 0.958698661744347,
      "grad_norm": 0.29534223675727844,
      "learning_rate": 1.0374874603171326e-06,
      "loss": 0.9803,
      "step": 16620
    },
    {
      "epoch": 0.9589870789109368,
      "grad_norm": 0.3066651225090027,
      "learning_rate": 1.0230726776349063e-06,
      "loss": 0.9267,
      "step": 16625
    },
    {
      "epoch": 0.9592754960775265,
      "grad_norm": 0.2924138605594635,
      "learning_rate": 1.0087582208695768e-06,
      "loss": 0.9759,
      "step": 16630
    },
    {
      "epoch": 0.9595639132441163,
      "grad_norm": 0.2614937424659729,
      "learning_rate": 9.945441045306925e-07,
      "loss": 0.947,
      "step": 16635
    },
    {
      "epoch": 0.9598523304107061,
      "grad_norm": 0.3080776333808899,
      "learning_rate": 9.804303430261174e-07,
      "loss": 0.9668,
      "step": 16640
    },
    {
      "epoch": 0.9601407475772958,
      "grad_norm": 0.30279916524887085,
      "learning_rate": 9.664169506619525e-07,
      "loss": 0.9199,
      "step": 16645
    },
    {
      "epoch": 0.9604291647438855,
      "grad_norm": 0.29995423555374146,
      "learning_rate": 9.525039416425907e-07,
      "loss": 0.9283,
      "step": 16650
    },
    {
      "epoch": 0.9607175819104753,
      "grad_norm": 0.26539433002471924,
      "learning_rate": 9.386913300706735e-07,
      "loss": 0.9023,
      "step": 16655
    },
    {
      "epoch": 0.9610059990770651,
      "grad_norm": 0.4439990222454071,
      "learning_rate": 9.249791299470567e-07,
      "loss": 0.9354,
      "step": 16660
    },
    {
      "epoch": 0.9612944162436549,
      "grad_norm": 0.2836225926876068,
      "learning_rate": 9.113673551708446e-07,
      "loss": 0.9507,
      "step": 16665
    },
    {
      "epoch": 0.9615828334102445,
      "grad_norm": 0.2889987826347351,
      "learning_rate": 8.978560195393115e-07,
      "loss": 0.9704,
      "step": 16670
    },
    {
      "epoch": 0.9618712505768343,
      "grad_norm": 0.3012427091598511,
      "learning_rate": 8.844451367479689e-07,
      "loss": 0.9667,
      "step": 16675
    },
    {
      "epoch": 0.9621596677434241,
      "grad_norm": 0.27891865372657776,
      "learning_rate": 8.711347203904541e-07,
      "loss": 0.9804,
      "step": 16680
    },
    {
      "epoch": 0.9624480849100139,
      "grad_norm": 0.27315354347229004,
      "learning_rate": 8.57924783958608e-07,
      "loss": 0.9159,
      "step": 16685
    },
    {
      "epoch": 0.9627365020766036,
      "grad_norm": 0.2848674952983856,
      "learning_rate": 8.448153408424087e-07,
      "loss": 0.9553,
      "step": 16690
    },
    {
      "epoch": 0.9630249192431933,
      "grad_norm": 0.30129551887512207,
      "learning_rate": 8.318064043299823e-07,
      "loss": 0.8892,
      "step": 16695
    },
    {
      "epoch": 0.9633133364097831,
      "grad_norm": 0.2893418073654175,
      "learning_rate": 8.188979876075475e-07,
      "loss": 1.0384,
      "step": 16700
    },
    {
      "epoch": 0.9636017535763729,
      "grad_norm": 0.2639276385307312,
      "learning_rate": 8.060901037594714e-07,
      "loss": 0.923,
      "step": 16705
    },
    {
      "epoch": 0.9638901707429626,
      "grad_norm": 0.2888321876525879,
      "learning_rate": 7.933827657682025e-07,
      "loss": 0.9154,
      "step": 16710
    },
    {
      "epoch": 0.9641785879095524,
      "grad_norm": 0.29655885696411133,
      "learning_rate": 7.807759865142483e-07,
      "loss": 0.9545,
      "step": 16715
    },
    {
      "epoch": 0.9644670050761421,
      "grad_norm": 0.30424603819847107,
      "learning_rate": 7.682697787762317e-07,
      "loss": 0.9101,
      "step": 16720
    },
    {
      "epoch": 0.9647554222427319,
      "grad_norm": 0.295279860496521,
      "learning_rate": 7.558641552308121e-07,
      "loss": 0.9619,
      "step": 16725
    },
    {
      "epoch": 0.9650438394093216,
      "grad_norm": 0.29094138741493225,
      "learning_rate": 7.435591284526866e-07,
      "loss": 0.9636,
      "step": 16730
    },
    {
      "epoch": 0.9653322565759114,
      "grad_norm": 0.2868499755859375,
      "learning_rate": 7.31354710914589e-07,
      "loss": 1.0484,
      "step": 16735
    },
    {
      "epoch": 0.9656206737425012,
      "grad_norm": 0.28914302587509155,
      "learning_rate": 7.192509149872684e-07,
      "loss": 0.9048,
      "step": 16740
    },
    {
      "epoch": 0.9659090909090909,
      "grad_norm": 0.2850790023803711,
      "learning_rate": 7.072477529395105e-07,
      "loss": 0.933,
      "step": 16745
    },
    {
      "epoch": 0.9661975080756806,
      "grad_norm": 0.27555444836616516,
      "learning_rate": 6.953452369380497e-07,
      "loss": 0.9781,
      "step": 16750
    },
    {
      "epoch": 0.9664859252422704,
      "grad_norm": 0.3144330084323883,
      "learning_rate": 6.835433790476354e-07,
      "loss": 0.9763,
      "step": 16755
    },
    {
      "epoch": 0.9667743424088602,
      "grad_norm": 0.2599543631076813,
      "learning_rate": 6.718421912309758e-07,
      "loss": 0.8993,
      "step": 16760
    },
    {
      "epoch": 0.96706275957545,
      "grad_norm": 0.2996498644351959,
      "learning_rate": 6.602416853487392e-07,
      "loss": 0.9793,
      "step": 16765
    },
    {
      "epoch": 0.9673511767420396,
      "grad_norm": 0.28505122661590576,
      "learning_rate": 6.487418731595418e-07,
      "loss": 0.9164,
      "step": 16770
    },
    {
      "epoch": 0.9676395939086294,
      "grad_norm": 0.2904942035675049,
      "learning_rate": 6.373427663199261e-07,
      "loss": 0.96,
      "step": 16775
    },
    {
      "epoch": 0.9679280110752192,
      "grad_norm": 0.2883807122707367,
      "learning_rate": 6.260443763843493e-07,
      "loss": 0.9771,
      "step": 16780
    },
    {
      "epoch": 0.968216428241809,
      "grad_norm": 0.3035736382007599,
      "learning_rate": 6.148467148052172e-07,
      "loss": 0.9108,
      "step": 16785
    },
    {
      "epoch": 0.9685048454083988,
      "grad_norm": 0.2641626000404358,
      "learning_rate": 6.037497929327839e-07,
      "loss": 1.008,
      "step": 16790
    },
    {
      "epoch": 0.9687932625749884,
      "grad_norm": 0.28156375885009766,
      "learning_rate": 5.927536220152296e-07,
      "loss": 0.9439,
      "step": 16795
    },
    {
      "epoch": 0.9690816797415782,
      "grad_norm": 0.2757076621055603,
      "learning_rate": 5.818582131985939e-07,
      "loss": 0.9123,
      "step": 16800
    },
    {
      "epoch": 0.969370096908168,
      "grad_norm": 0.29132992029190063,
      "learning_rate": 5.710635775267759e-07,
      "loss": 0.8855,
      "step": 16805
    },
    {
      "epoch": 0.9696585140747578,
      "grad_norm": 0.27889660000801086,
      "learning_rate": 5.603697259415341e-07,
      "loss": 0.9758,
      "step": 16810
    },
    {
      "epoch": 0.9699469312413475,
      "grad_norm": 0.3405865430831909,
      "learning_rate": 5.497766692824868e-07,
      "loss": 1.0036,
      "step": 16815
    },
    {
      "epoch": 0.9702353484079372,
      "grad_norm": 0.30193057656288147,
      "learning_rate": 5.392844182870449e-07,
      "loss": 0.9067,
      "step": 16820
    },
    {
      "epoch": 0.970523765574527,
      "grad_norm": 0.2817526161670685,
      "learning_rate": 5.288929835904788e-07,
      "loss": 0.9047,
      "step": 16825
    },
    {
      "epoch": 0.9708121827411168,
      "grad_norm": 0.2961997091770172,
      "learning_rate": 5.186023757258407e-07,
      "loss": 0.9158,
      "step": 16830
    },
    {
      "epoch": 0.9711005999077065,
      "grad_norm": 0.2850496470928192,
      "learning_rate": 5.08412605123998e-07,
      "loss": 0.9862,
      "step": 16835
    },
    {
      "epoch": 0.9713890170742963,
      "grad_norm": 0.297963410615921,
      "learning_rate": 4.983236821135995e-07,
      "loss": 0.9943,
      "step": 16840
    },
    {
      "epoch": 0.971677434240886,
      "grad_norm": 0.2760219871997833,
      "learning_rate": 4.883356169210651e-07,
      "loss": 0.9675,
      "step": 16845
    },
    {
      "epoch": 0.9719658514074758,
      "grad_norm": 0.29743608832359314,
      "learning_rate": 4.784484196706073e-07,
      "loss": 0.9248,
      "step": 16850
    },
    {
      "epoch": 0.9722542685740655,
      "grad_norm": 0.37705740332603455,
      "learning_rate": 4.6866210038417625e-07,
      "loss": 0.9858,
      "step": 16855
    },
    {
      "epoch": 0.9725426857406553,
      "grad_norm": 0.29688891768455505,
      "learning_rate": 4.5897666898145896e-07,
      "loss": 0.9439,
      "step": 16860
    },
    {
      "epoch": 0.9728311029072451,
      "grad_norm": 0.3039180040359497,
      "learning_rate": 4.4939213527990245e-07,
      "loss": 1.0214,
      "step": 16865
    },
    {
      "epoch": 0.9731195200738348,
      "grad_norm": 0.28710174560546875,
      "learning_rate": 4.3990850899467975e-07,
      "loss": 0.9145,
      "step": 16870
    },
    {
      "epoch": 0.9734079372404245,
      "grad_norm": 0.29073718190193176,
      "learning_rate": 4.305257997386458e-07,
      "loss": 0.901,
      "step": 16875
    },
    {
      "epoch": 0.9736963544070143,
      "grad_norm": 0.2846122682094574,
      "learning_rate": 4.2124401702241524e-07,
      "loss": 0.9701,
      "step": 16880
    },
    {
      "epoch": 0.9739847715736041,
      "grad_norm": 0.27932944893836975,
      "learning_rate": 4.120631702542732e-07,
      "loss": 0.9515,
      "step": 16885
    },
    {
      "epoch": 0.9742731887401939,
      "grad_norm": 0.2845093309879303,
      "learning_rate": 4.029832687401758e-07,
      "loss": 0.8943,
      "step": 16890
    },
    {
      "epoch": 0.9745616059067835,
      "grad_norm": 0.281512051820755,
      "learning_rate": 3.940043216838052e-07,
      "loss": 0.8849,
      "step": 16895
    },
    {
      "epoch": 0.9748500230733733,
      "grad_norm": 0.30090752243995667,
      "learning_rate": 3.851263381864589e-07,
      "loss": 0.9418,
      "step": 16900
    },
    {
      "epoch": 0.9751384402399631,
      "grad_norm": 0.26722562313079834,
      "learning_rate": 3.7634932724713854e-07,
      "loss": 0.9539,
      "step": 16905
    },
    {
      "epoch": 0.9754268574065529,
      "grad_norm": 0.2945277988910675,
      "learning_rate": 3.67673297762483e-07,
      "loss": 0.9728,
      "step": 16910
    },
    {
      "epoch": 0.9757152745731426,
      "grad_norm": 0.3588777184486389,
      "learning_rate": 3.590982585267466e-07,
      "loss": 1.0383,
      "step": 16915
    },
    {
      "epoch": 0.9760036917397323,
      "grad_norm": 0.29528379440307617,
      "learning_rate": 3.506242182318653e-07,
      "loss": 1.0303,
      "step": 16920
    },
    {
      "epoch": 0.9762921089063221,
      "grad_norm": 0.3141199052333832,
      "learning_rate": 3.422511854673682e-07,
      "loss": 0.8857,
      "step": 16925
    },
    {
      "epoch": 0.9765805260729119,
      "grad_norm": 0.2811961770057678,
      "learning_rate": 3.339791687203997e-07,
      "loss": 0.9484,
      "step": 16930
    },
    {
      "epoch": 0.9768689432395016,
      "grad_norm": 0.2975912094116211,
      "learning_rate": 3.2580817637571923e-07,
      "loss": 0.9638,
      "step": 16935
    },
    {
      "epoch": 0.9771573604060914,
      "grad_norm": 0.3165859580039978,
      "learning_rate": 3.177382167156906e-07,
      "loss": 0.9067,
      "step": 16940
    },
    {
      "epoch": 0.9774457775726811,
      "grad_norm": 0.381736159324646,
      "learning_rate": 3.097692979202704e-07,
      "loss": 0.9496,
      "step": 16945
    },
    {
      "epoch": 0.9777341947392709,
      "grad_norm": 0.28172680735588074,
      "learning_rate": 3.019014280669641e-07,
      "loss": 0.8839,
      "step": 16950
    },
    {
      "epoch": 0.9780226119058606,
      "grad_norm": 0.3085029125213623,
      "learning_rate": 2.9413461513090324e-07,
      "loss": 0.8825,
      "step": 16955
    },
    {
      "epoch": 0.9783110290724504,
      "grad_norm": 0.3072126507759094,
      "learning_rate": 2.8646886698473484e-07,
      "loss": 0.9802,
      "step": 16960
    },
    {
      "epoch": 0.9785994462390402,
      "grad_norm": 0.2818610966205597,
      "learning_rate": 2.789041913986878e-07,
      "loss": 0.9383,
      "step": 16965
    },
    {
      "epoch": 0.9788878634056299,
      "grad_norm": 0.297048419713974,
      "learning_rate": 2.7144059604055085e-07,
      "loss": 0.9903,
      "step": 16970
    },
    {
      "epoch": 0.9791762805722196,
      "grad_norm": 0.30197322368621826,
      "learning_rate": 2.640780884756389e-07,
      "loss": 0.9497,
      "step": 16975
    },
    {
      "epoch": 0.9794646977388094,
      "grad_norm": 0.312307208776474,
      "learning_rate": 2.568166761668156e-07,
      "loss": 0.9123,
      "step": 16980
    },
    {
      "epoch": 0.9797531149053992,
      "grad_norm": 0.30723750591278076,
      "learning_rate": 2.496563664744378e-07,
      "loss": 0.9771,
      "step": 16985
    },
    {
      "epoch": 0.980041532071989,
      "grad_norm": 0.29737958312034607,
      "learning_rate": 2.4259716665641083e-07,
      "loss": 0.9366,
      "step": 16990
    },
    {
      "epoch": 0.9803299492385786,
      "grad_norm": 0.34504443407058716,
      "learning_rate": 2.3563908386816657e-07,
      "loss": 0.984,
      "step": 16995
    },
    {
      "epoch": 0.9806183664051684,
      "grad_norm": 0.2985767722129822,
      "learning_rate": 2.2878212516260766e-07,
      "loss": 0.9287,
      "step": 17000
    },
    {
      "epoch": 0.9809067835717582,
      "grad_norm": 0.27487707138061523,
      "learning_rate": 2.2202629749015213e-07,
      "loss": 0.9725,
      "step": 17005
    },
    {
      "epoch": 0.981195200738348,
      "grad_norm": 0.2767324149608612,
      "learning_rate": 2.1537160769870002e-07,
      "loss": 0.916,
      "step": 17010
    },
    {
      "epoch": 0.9814836179049377,
      "grad_norm": 0.2735516130924225,
      "learning_rate": 2.0881806253364444e-07,
      "loss": 0.9905,
      "step": 17015
    },
    {
      "epoch": 0.9817720350715274,
      "grad_norm": 0.2798011600971222,
      "learning_rate": 2.0236566863784944e-07,
      "loss": 0.884,
      "step": 17020
    },
    {
      "epoch": 0.9820604522381172,
      "grad_norm": 0.32747435569763184,
      "learning_rate": 1.9601443255164998e-07,
      "loss": 0.9432,
      "step": 17025
    },
    {
      "epoch": 0.982348869404707,
      "grad_norm": 0.2849380373954773,
      "learning_rate": 1.8976436071284076e-07,
      "loss": 0.9873,
      "step": 17030
    },
    {
      "epoch": 0.9826372865712967,
      "grad_norm": 0.2873431444168091,
      "learning_rate": 1.8361545945668747e-07,
      "loss": 0.9483,
      "step": 17035
    },
    {
      "epoch": 0.9829257037378865,
      "grad_norm": 0.30190160870552063,
      "learning_rate": 1.775677350159044e-07,
      "loss": 0.9605,
      "step": 17040
    },
    {
      "epoch": 0.9832141209044762,
      "grad_norm": 0.32146456837654114,
      "learning_rate": 1.7162119352062135e-07,
      "loss": 0.9293,
      "step": 17045
    },
    {
      "epoch": 0.983502538071066,
      "grad_norm": 0.26646754145622253,
      "learning_rate": 1.657758409984278e-07,
      "loss": 0.9107,
      "step": 17050
    },
    {
      "epoch": 0.9837909552376557,
      "grad_norm": 0.2890629470348358,
      "learning_rate": 1.6003168337437313e-07,
      "loss": 0.953,
      "step": 17055
    },
    {
      "epoch": 0.9840793724042455,
      "grad_norm": 0.3307664692401886,
      "learning_rate": 1.5438872647086655e-07,
      "loss": 1.0018,
      "step": 17060
    },
    {
      "epoch": 0.9843677895708353,
      "grad_norm": 0.29319411516189575,
      "learning_rate": 1.488469760077993e-07,
      "loss": 0.9824,
      "step": 17065
    },
    {
      "epoch": 0.984656206737425,
      "grad_norm": 0.2756195366382599,
      "learning_rate": 1.4340643760244464e-07,
      "loss": 0.924,
      "step": 17070
    },
    {
      "epoch": 0.9849446239040147,
      "grad_norm": 0.2855581045150757,
      "learning_rate": 1.3806711676950245e-07,
      "loss": 0.9318,
      "step": 17075
    },
    {
      "epoch": 0.9852330410706045,
      "grad_norm": 0.2845672369003296,
      "learning_rate": 1.328290189210435e-07,
      "loss": 0.8953,
      "step": 17080
    },
    {
      "epoch": 0.9855214582371943,
      "grad_norm": 0.2991209924221039,
      "learning_rate": 1.2769214936657615e-07,
      "loss": 0.9597,
      "step": 17085
    },
    {
      "epoch": 0.9858098754037841,
      "grad_norm": 0.29595181345939636,
      "learning_rate": 1.2265651331296869e-07,
      "loss": 0.9461,
      "step": 17090
    },
    {
      "epoch": 0.9860982925703737,
      "grad_norm": 0.31181660294532776,
      "learning_rate": 1.1772211586449367e-07,
      "loss": 0.9795,
      "step": 17095
    },
    {
      "epoch": 0.9863867097369635,
      "grad_norm": 0.292348176240921,
      "learning_rate": 1.1288896202281685e-07,
      "loss": 0.9391,
      "step": 17100
    },
    {
      "epoch": 0.9866751269035533,
      "grad_norm": 0.7884487509727478,
      "learning_rate": 1.0815705668694165e-07,
      "loss": 0.9307,
      "step": 17105
    },
    {
      "epoch": 0.9869635440701431,
      "grad_norm": 0.3149639070034027,
      "learning_rate": 1.0352640465327578e-07,
      "loss": 0.9954,
      "step": 17110
    },
    {
      "epoch": 0.9872519612367328,
      "grad_norm": 0.3055214583873749,
      "learning_rate": 9.899701061558687e-08,
      "loss": 0.9155,
      "step": 17115
    },
    {
      "epoch": 0.9875403784033225,
      "grad_norm": 0.28656816482543945,
      "learning_rate": 9.456887916499125e-08,
      "loss": 0.996,
      "step": 17120
    },
    {
      "epoch": 0.9878287955699123,
      "grad_norm": 0.3164063096046448,
      "learning_rate": 9.024201478998739e-08,
      "loss": 0.9331,
      "step": 17125
    },
    {
      "epoch": 0.9881172127365021,
      "grad_norm": 0.2873050570487976,
      "learning_rate": 8.601642187640036e-08,
      "loss": 1.055,
      "step": 17130
    },
    {
      "epoch": 0.9884056299030919,
      "grad_norm": 0.2891038656234741,
      "learning_rate": 8.189210470742614e-08,
      "loss": 0.9559,
      "step": 17135
    },
    {
      "epoch": 0.9886940470696816,
      "grad_norm": 0.2986387312412262,
      "learning_rate": 7.786906746358735e-08,
      "loss": 1.0243,
      "step": 17140
    },
    {
      "epoch": 0.9889824642362713,
      "grad_norm": 0.30982112884521484,
      "learning_rate": 7.394731422274426e-08,
      "loss": 0.987,
      "step": 17145
    },
    {
      "epoch": 0.9892708814028611,
      "grad_norm": 0.3040391504764557,
      "learning_rate": 7.012684896011702e-08,
      "loss": 0.9656,
      "step": 17150
    },
    {
      "epoch": 0.9895592985694509,
      "grad_norm": 0.2852766811847687,
      "learning_rate": 6.640767554823013e-08,
      "loss": 0.937,
      "step": 17155
    },
    {
      "epoch": 0.9898477157360406,
      "grad_norm": 0.2964860200881958,
      "learning_rate": 6.278979775694582e-08,
      "loss": 0.9073,
      "step": 17160
    },
    {
      "epoch": 0.9901361329026304,
      "grad_norm": 0.26632729172706604,
      "learning_rate": 5.927321925346396e-08,
      "loss": 0.9722,
      "step": 17165
    },
    {
      "epoch": 0.9904245500692201,
      "grad_norm": 0.29934900999069214,
      "learning_rate": 5.585794360226659e-08,
      "loss": 1.0155,
      "step": 17170
    },
    {
      "epoch": 0.9907129672358099,
      "grad_norm": 0.2905521094799042,
      "learning_rate": 5.254397426520674e-08,
      "loss": 0.9827,
      "step": 17175
    },
    {
      "epoch": 0.9910013844023996,
      "grad_norm": 0.2757057547569275,
      "learning_rate": 4.9331314601408495e-08,
      "loss": 0.8905,
      "step": 17180
    },
    {
      "epoch": 0.9912898015689894,
      "grad_norm": 0.3028772473335266,
      "learning_rate": 4.621996786731142e-08,
      "loss": 0.9571,
      "step": 17185
    },
    {
      "epoch": 0.9915782187355792,
      "grad_norm": 0.28152334690093994,
      "learning_rate": 4.320993721668165e-08,
      "loss": 0.9648,
      "step": 17190
    },
    {
      "epoch": 0.991866635902169,
      "grad_norm": 0.2736474871635437,
      "learning_rate": 4.030122570055639e-08,
      "loss": 0.91,
      "step": 17195
    },
    {
      "epoch": 0.9921550530687586,
      "grad_norm": 0.31447696685791016,
      "learning_rate": 3.7493836267310514e-08,
      "loss": 0.9955,
      "step": 17200
    },
    {
      "epoch": 0.9924434702353484,
      "grad_norm": 0.3125499188899994,
      "learning_rate": 3.4787771762578856e-08,
      "loss": 0.9984,
      "step": 17205
    },
    {
      "epoch": 0.9927318874019382,
      "grad_norm": 0.28617557883262634,
      "learning_rate": 3.218303492932284e-08,
      "loss": 0.9312,
      "step": 17210
    },
    {
      "epoch": 0.993020304568528,
      "grad_norm": 0.29386937618255615,
      "learning_rate": 2.9679628407763837e-08,
      "loss": 0.8687,
      "step": 17215
    },
    {
      "epoch": 0.9933087217351176,
      "grad_norm": 0.3066183626651764,
      "learning_rate": 2.7277554735449794e-08,
      "loss": 1.0048,
      "step": 17220
    },
    {
      "epoch": 0.9935971389017074,
      "grad_norm": 0.28535598516464233,
      "learning_rate": 2.4976816347177524e-08,
      "loss": 0.9516,
      "step": 17225
    },
    {
      "epoch": 0.9938855560682972,
      "grad_norm": 0.27699753642082214,
      "learning_rate": 2.2777415575037098e-08,
      "loss": 0.9985,
      "step": 17230
    },
    {
      "epoch": 0.994173973234887,
      "grad_norm": 0.27334800362586975,
      "learning_rate": 2.0679354648422968e-08,
      "loss": 0.9417,
      "step": 17235
    },
    {
      "epoch": 0.9944623904014767,
      "grad_norm": 0.2618880569934845,
      "learning_rate": 1.8682635693978433e-08,
      "loss": 0.9599,
      "step": 17240
    },
    {
      "epoch": 0.9947508075680664,
      "grad_norm": 0.2842215895652771,
      "learning_rate": 1.6787260735640075e-08,
      "loss": 0.9852,
      "step": 17245
    },
    {
      "epoch": 0.9950392247346562,
      "grad_norm": 0.29653632640838623,
      "learning_rate": 1.499323169462663e-08,
      "loss": 0.9443,
      "step": 17250
    },
    {
      "epoch": 0.995327641901246,
      "grad_norm": 0.2780844569206238,
      "learning_rate": 1.3300550389394595e-08,
      "loss": 0.9895,
      "step": 17255
    },
    {
      "epoch": 0.9956160590678357,
      "grad_norm": 0.2827605903148651,
      "learning_rate": 1.1709218535715938e-08,
      "loss": 0.9583,
      "step": 17260
    },
    {
      "epoch": 0.9959044762344255,
      "grad_norm": 0.2900019884109497,
      "learning_rate": 1.0219237746611487e-08,
      "loss": 0.932,
      "step": 17265
    },
    {
      "epoch": 0.9961928934010152,
      "grad_norm": 0.28944316506385803,
      "learning_rate": 8.83060953235093e-09,
      "loss": 0.9875,
      "step": 17270
    },
    {
      "epoch": 0.996481310567605,
      "grad_norm": 0.3088100254535675,
      "learning_rate": 7.543335300497223e-09,
      "loss": 0.9681,
      "step": 17275
    },
    {
      "epoch": 0.9967697277341947,
      "grad_norm": 0.31131041049957275,
      "learning_rate": 6.357416355884382e-09,
      "loss": 0.9911,
      "step": 17280
    },
    {
      "epoch": 0.9970581449007845,
      "grad_norm": 0.28186625242233276,
      "learning_rate": 5.272853900573082e-09,
      "loss": 0.8708,
      "step": 17285
    },
    {
      "epoch": 0.9973465620673743,
      "grad_norm": 0.27050358057022095,
      "learning_rate": 4.289649033928367e-09,
      "loss": 0.9871,
      "step": 17290
    },
    {
      "epoch": 0.997634979233964,
      "grad_norm": 0.27937865257263184,
      "learning_rate": 3.407802752530831e-09,
      "loss": 0.9746,
      "step": 17295
    },
    {
      "epoch": 0.9979233964005537,
      "grad_norm": 0.28143933415412903,
      "learning_rate": 2.627315950265441e-09,
      "loss": 0.983,
      "step": 17300
    },
    {
      "epoch": 0.9982118135671435,
      "grad_norm": 0.2851989269256592,
      "learning_rate": 1.9481894182549198e-09,
      "loss": 0.9577,
      "step": 17305
    },
    {
      "epoch": 0.9985002307337333,
      "grad_norm": 0.2806662917137146,
      "learning_rate": 1.3704238448708496e-09,
      "loss": 0.9316,
      "step": 17310
    },
    {
      "epoch": 0.9987886479003231,
      "grad_norm": 0.2934742271900177,
      "learning_rate": 8.940198157558755e-10,
      "loss": 0.9058,
      "step": 17315
    },
    {
      "epoch": 0.9990770650669127,
      "grad_norm": 0.29554542899131775,
      "learning_rate": 5.189778138237067e-10,
      "loss": 0.9034,
      "step": 17320
    },
    {
      "epoch": 0.9993654822335025,
      "grad_norm": 0.28154152631759644,
      "learning_rate": 2.452982192036046e-10,
      "loss": 0.8279,
      "step": 17325
    },
    {
      "epoch": 0.9996538994000923,
      "grad_norm": 0.28424975275993347,
      "learning_rate": 7.298130931809865e-11,
      "loss": 0.9575,
      "step": 17330
    },
    {
      "epoch": 0.9999423165666821,
      "grad_norm": 0.27066484093666077,
      "learning_rate": 2.0272588274750093e-12,
      "loss": 0.9597,
      "step": 17335
    },
    {
      "epoch": 1.0,
      "eval_loss": 0.9577949643135071,
      "eval_runtime": 1918.8672,
      "eval_samples_per_second": 8.0,
      "eval_steps_per_second": 1.0,
      "step": 17336
    },
    {
      "epoch": 1.0,
      "step": 17336,
      "total_flos": 1.2189365563134312e+19,
      "train_loss": 0.9574739795073309,
      "train_runtime": 64802.032,
      "train_samples_per_second": 2.14,
      "train_steps_per_second": 0.268
    }
  ],
  "logging_steps": 5,
  "max_steps": 17336,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.2189365563134312e+19,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}