{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9998199819981998,
  "eval_steps": 695,
  "global_step": 2777,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00036003600360036,
      "grad_norm": 5.391221523284912,
      "learning_rate": 4.000000000000001e-06,
      "loss": 11.6744,
      "step": 1
    },
    {
      "epoch": 0.00036003600360036,
      "eval_loss": 11.619186401367188,
      "eval_runtime": 101.0372,
      "eval_samples_per_second": 46.3,
      "eval_steps_per_second": 11.58,
      "step": 1
    },
    {
      "epoch": 0.00072007200720072,
      "grad_norm": 4.955807209014893,
      "learning_rate": 8.000000000000001e-06,
      "loss": 11.5784,
      "step": 2
    },
    {
      "epoch": 0.00108010801080108,
      "grad_norm": 6.398324966430664,
      "learning_rate": 1.2e-05,
      "loss": 11.8558,
      "step": 3
    },
    {
      "epoch": 0.00144014401440144,
      "grad_norm": 5.072609901428223,
      "learning_rate": 1.6000000000000003e-05,
      "loss": 11.4906,
      "step": 4
    },
    {
      "epoch": 0.0018001800180018,
      "grad_norm": 6.004848957061768,
      "learning_rate": 2e-05,
      "loss": 11.8837,
      "step": 5
    },
    {
      "epoch": 0.00216021602160216,
      "grad_norm": 4.884363174438477,
      "learning_rate": 2.4e-05,
      "loss": 11.68,
      "step": 6
    },
    {
      "epoch": 0.0025202520252025204,
      "grad_norm": 5.704855442047119,
      "learning_rate": 2.8000000000000003e-05,
      "loss": 11.68,
      "step": 7
    },
    {
      "epoch": 0.00288028802880288,
      "grad_norm": 5.609814643859863,
      "learning_rate": 3.2000000000000005e-05,
      "loss": 11.5502,
      "step": 8
    },
    {
      "epoch": 0.0032403240324032404,
      "grad_norm": 5.240819931030273,
      "learning_rate": 3.6e-05,
      "loss": 11.7237,
      "step": 9
    },
    {
      "epoch": 0.0036003600360036,
      "grad_norm": 5.8036932945251465,
      "learning_rate": 4e-05,
      "loss": 11.7074,
      "step": 10
    },
    {
      "epoch": 0.0039603960396039604,
      "grad_norm": 6.281662464141846,
      "learning_rate": 4.4000000000000006e-05,
      "loss": 11.4896,
      "step": 11
    },
    {
      "epoch": 0.00432043204320432,
      "grad_norm": 5.4554829597473145,
      "learning_rate": 4.8e-05,
      "loss": 11.6188,
      "step": 12
    },
    {
      "epoch": 0.00468046804680468,
      "grad_norm": 5.878482341766357,
      "learning_rate": 5.2000000000000004e-05,
      "loss": 11.3579,
      "step": 13
    },
    {
      "epoch": 0.005040504050405041,
      "grad_norm": 5.315092086791992,
      "learning_rate": 5.6000000000000006e-05,
      "loss": 11.3018,
      "step": 14
    },
    {
      "epoch": 0.0054005400540054005,
      "grad_norm": 5.732967853546143,
      "learning_rate": 6e-05,
      "loss": 11.4945,
      "step": 15
    },
    {
      "epoch": 0.00576057605760576,
      "grad_norm": 4.960424900054932,
      "learning_rate": 6.400000000000001e-05,
      "loss": 11.2206,
      "step": 16
    },
    {
      "epoch": 0.006120612061206121,
      "grad_norm": 5.72969913482666,
      "learning_rate": 6.800000000000001e-05,
      "loss": 11.2825,
      "step": 17
    },
    {
      "epoch": 0.006480648064806481,
      "grad_norm": 5.419050693511963,
      "learning_rate": 7.2e-05,
      "loss": 10.9409,
      "step": 18
    },
    {
      "epoch": 0.006840684068406841,
      "grad_norm": 5.5492987632751465,
      "learning_rate": 7.6e-05,
      "loss": 11.4013,
      "step": 19
    },
    {
      "epoch": 0.0072007200720072,
      "grad_norm": 5.730124473571777,
      "learning_rate": 8e-05,
      "loss": 11.2501,
      "step": 20
    },
    {
      "epoch": 0.007560756075607561,
      "grad_norm": 6.309354305267334,
      "learning_rate": 8.4e-05,
      "loss": 11.0562,
      "step": 21
    },
    {
      "epoch": 0.007920792079207921,
      "grad_norm": 5.861652374267578,
      "learning_rate": 8.800000000000001e-05,
      "loss": 11.0803,
      "step": 22
    },
    {
      "epoch": 0.008280828082808282,
      "grad_norm": 6.468849182128906,
      "learning_rate": 9.200000000000001e-05,
      "loss": 11.2922,
      "step": 23
    },
    {
      "epoch": 0.00864086408640864,
      "grad_norm": 7.292991638183594,
      "learning_rate": 9.6e-05,
      "loss": 10.8244,
      "step": 24
    },
    {
      "epoch": 0.009000900090009001,
      "grad_norm": 9.702167510986328,
      "learning_rate": 0.0001,
      "loss": 10.7772,
      "step": 25
    },
    {
      "epoch": 0.00936093609360936,
      "grad_norm": 5.527791976928711,
      "learning_rate": 0.00010400000000000001,
      "loss": 11.0536,
      "step": 26
    },
    {
      "epoch": 0.00972097209720972,
      "grad_norm": 5.660224437713623,
      "learning_rate": 0.00010800000000000001,
      "loss": 11.3104,
      "step": 27
    },
    {
      "epoch": 0.010081008100810081,
      "grad_norm": 5.601504802703857,
      "learning_rate": 0.00011200000000000001,
      "loss": 10.9429,
      "step": 28
    },
    {
      "epoch": 0.01044104410441044,
      "grad_norm": 5.513530731201172,
      "learning_rate": 0.000116,
      "loss": 10.9418,
      "step": 29
    },
    {
      "epoch": 0.010801080108010801,
      "grad_norm": 5.222030162811279,
      "learning_rate": 0.00012,
      "loss": 10.6514,
      "step": 30
    },
    {
      "epoch": 0.011161116111611162,
      "grad_norm": 5.038498401641846,
      "learning_rate": 0.000124,
      "loss": 10.8715,
      "step": 31
    },
    {
      "epoch": 0.01152115211521152,
      "grad_norm": 5.638880729675293,
      "learning_rate": 0.00012800000000000002,
      "loss": 10.3996,
      "step": 32
    },
    {
      "epoch": 0.011881188118811881,
      "grad_norm": 5.269554615020752,
      "learning_rate": 0.000132,
      "loss": 10.3747,
      "step": 33
    },
    {
      "epoch": 0.012241224122412242,
      "grad_norm": 5.762288570404053,
      "learning_rate": 0.00013600000000000003,
      "loss": 10.5589,
      "step": 34
    },
    {
      "epoch": 0.012601260126012601,
      "grad_norm": 5.092434406280518,
      "learning_rate": 0.00014,
      "loss": 10.2383,
      "step": 35
    },
    {
      "epoch": 0.012961296129612962,
      "grad_norm": 5.491393089294434,
      "learning_rate": 0.000144,
      "loss": 10.0614,
      "step": 36
    },
    {
      "epoch": 0.01332133213321332,
      "grad_norm": 5.155731201171875,
      "learning_rate": 0.000148,
      "loss": 9.8436,
      "step": 37
    },
    {
      "epoch": 0.013681368136813681,
      "grad_norm": 5.45465087890625,
      "learning_rate": 0.000152,
      "loss": 10.14,
      "step": 38
    },
    {
      "epoch": 0.014041404140414042,
      "grad_norm": 5.7989606857299805,
      "learning_rate": 0.00015600000000000002,
      "loss": 9.9675,
      "step": 39
    },
    {
      "epoch": 0.0144014401440144,
      "grad_norm": 5.706261157989502,
      "learning_rate": 0.00016,
      "loss": 9.5643,
      "step": 40
    },
    {
      "epoch": 0.014761476147614761,
      "grad_norm": 5.091904640197754,
      "learning_rate": 0.000164,
      "loss": 9.5091,
      "step": 41
    },
    {
      "epoch": 0.015121512151215122,
      "grad_norm": 5.238409996032715,
      "learning_rate": 0.000168,
      "loss": 9.3674,
      "step": 42
    },
    {
      "epoch": 0.015481548154815481,
      "grad_norm": 4.824737071990967,
      "learning_rate": 0.000172,
      "loss": 9.5337,
      "step": 43
    },
    {
      "epoch": 0.015841584158415842,
      "grad_norm": 4.952434539794922,
      "learning_rate": 0.00017600000000000002,
      "loss": 9.2853,
      "step": 44
    },
    {
      "epoch": 0.016201620162016202,
      "grad_norm": 5.05830192565918,
      "learning_rate": 0.00018,
      "loss": 9.5244,
      "step": 45
    },
    {
      "epoch": 0.016561656165616563,
      "grad_norm": 4.598903656005859,
      "learning_rate": 0.00018400000000000003,
      "loss": 9.1549,
      "step": 46
    },
    {
      "epoch": 0.01692169216921692,
      "grad_norm": 5.076130390167236,
      "learning_rate": 0.000188,
      "loss": 9.4544,
      "step": 47
    },
    {
      "epoch": 0.01728172817281728,
      "grad_norm": 4.568606853485107,
      "learning_rate": 0.000192,
      "loss": 9.3589,
      "step": 48
    },
    {
      "epoch": 0.01764176417641764,
      "grad_norm": 6.102865219116211,
      "learning_rate": 0.000196,
      "loss": 9.3574,
      "step": 49
    },
    {
      "epoch": 0.018001800180018002,
      "grad_norm": 6.295242786407471,
      "learning_rate": 0.0002,
      "loss": 9.2564,
      "step": 50
    },
    {
      "epoch": 0.018361836183618363,
      "grad_norm": 4.133947849273682,
      "learning_rate": 0.0001999999336410622,
      "loss": 9.0434,
      "step": 51
    },
    {
      "epoch": 0.01872187218721872,
      "grad_norm": 3.816929817199707,
      "learning_rate": 0.00019999973456433681,
      "loss": 8.9869,
      "step": 52
    },
    {
      "epoch": 0.01908190819081908,
      "grad_norm": 3.87251877784729,
      "learning_rate": 0.00019999940277008808,
      "loss": 8.9041,
      "step": 53
    },
    {
      "epoch": 0.01944194419441944,
      "grad_norm": 3.925431728363037,
      "learning_rate": 0.00019999893825875637,
      "loss": 8.8908,
      "step": 54
    },
    {
      "epoch": 0.019801980198019802,
      "grad_norm": 4.555037975311279,
      "learning_rate": 0.00019999834103095812,
      "loss": 8.9972,
      "step": 55
    },
    {
      "epoch": 0.020162016201620163,
      "grad_norm": 4.029503345489502,
      "learning_rate": 0.00019999761108748597,
      "loss": 8.9023,
      "step": 56
    },
    {
      "epoch": 0.020522052205220524,
      "grad_norm": 3.193443536758423,
      "learning_rate": 0.00019999674842930876,
      "loss": 8.6223,
      "step": 57
    },
    {
      "epoch": 0.02088208820882088,
      "grad_norm": 3.635911226272583,
      "learning_rate": 0.0001999957530575713,
      "loss": 8.4718,
      "step": 58
    },
    {
      "epoch": 0.02124212421242124,
      "grad_norm": 3.919508218765259,
      "learning_rate": 0.00019999462497359466,
      "loss": 8.4637,
      "step": 59
    },
    {
      "epoch": 0.021602160216021602,
      "grad_norm": 3.876338481903076,
      "learning_rate": 0.000199993364178876,
      "loss": 8.576,
      "step": 60
    },
    {
      "epoch": 0.021962196219621963,
      "grad_norm": 3.777447462081909,
      "learning_rate": 0.00019999197067508865,
      "loss": 8.3571,
      "step": 61
    },
    {
      "epoch": 0.022322232223222323,
      "grad_norm": 3.5673623085021973,
      "learning_rate": 0.000199990444464082,
      "loss": 8.5479,
      "step": 62
    },
    {
      "epoch": 0.02268226822682268,
      "grad_norm": 3.583082914352417,
      "learning_rate": 0.00019998878554788166,
      "loss": 8.1977,
      "step": 63
    },
    {
      "epoch": 0.02304230423042304,
      "grad_norm": 2.835787773132324,
      "learning_rate": 0.00019998699392868922,
      "loss": 8.2202,
      "step": 64
    },
    {
      "epoch": 0.023402340234023402,
      "grad_norm": 3.372305154800415,
      "learning_rate": 0.00019998506960888256,
      "loss": 8.3326,
      "step": 65
    },
    {
      "epoch": 0.023762376237623763,
      "grad_norm": 3.7524638175964355,
      "learning_rate": 0.00019998301259101554,
      "loss": 8.2353,
      "step": 66
    },
    {
      "epoch": 0.024122412241224123,
      "grad_norm": 3.233792304992676,
      "learning_rate": 0.00019998082287781826,
      "loss": 7.8709,
      "step": 67
    },
    {
      "epoch": 0.024482448244824484,
      "grad_norm": 3.251406192779541,
      "learning_rate": 0.0001999785004721968,
      "loss": 8.3691,
      "step": 68
    },
    {
      "epoch": 0.02484248424842484,
      "grad_norm": 3.2626826763153076,
      "learning_rate": 0.00019997604537723342,
      "loss": 8.1906,
      "step": 69
    },
    {
      "epoch": 0.025202520252025202,
      "grad_norm": 3.216583490371704,
      "learning_rate": 0.00019997345759618647,
      "loss": 7.9592,
      "step": 70
    },
    {
      "epoch": 0.025562556255625563,
      "grad_norm": 3.309875011444092,
      "learning_rate": 0.0001999707371324904,
      "loss": 8.0691,
      "step": 71
    },
    {
      "epoch": 0.025922592259225923,
      "grad_norm": 3.656224250793457,
      "learning_rate": 0.00019996788398975578,
      "loss": 8.0343,
      "step": 72
    },
    {
      "epoch": 0.026282628262826284,
      "grad_norm": 3.816601276397705,
      "learning_rate": 0.00019996489817176918,
      "loss": 8.3206,
      "step": 73
    },
    {
      "epoch": 0.02664266426642664,
      "grad_norm": 4.16778564453125,
      "learning_rate": 0.00019996177968249334,
      "loss": 7.9706,
      "step": 74
    },
    {
      "epoch": 0.027002700270027002,
      "grad_norm": 5.989762306213379,
      "learning_rate": 0.0001999585285260671,
      "loss": 8.1316,
      "step": 75
    },
    {
      "epoch": 0.027362736273627362,
      "grad_norm": 6.801653861999512,
      "learning_rate": 0.00019995514470680527,
      "loss": 8.0789,
      "step": 76
    },
    {
      "epoch": 0.027722772277227723,
      "grad_norm": 6.341710090637207,
      "learning_rate": 0.00019995162822919883,
      "loss": 7.5837,
      "step": 77
    },
    {
      "epoch": 0.028082808280828084,
      "grad_norm": 4.6270575523376465,
      "learning_rate": 0.0001999479790979147,
      "loss": 7.8758,
      "step": 78
    },
    {
      "epoch": 0.028442844284428444,
      "grad_norm": 3.671124219894409,
      "learning_rate": 0.00019994419731779602,
      "loss": 7.8061,
      "step": 79
    },
    {
      "epoch": 0.0288028802880288,
      "grad_norm": 3.308453321456909,
      "learning_rate": 0.0001999402828938618,
      "loss": 8.104,
      "step": 80
    },
    {
      "epoch": 0.029162916291629162,
      "grad_norm": 2.890606641769409,
      "learning_rate": 0.00019993623583130723,
      "loss": 7.624,
      "step": 81
    },
    {
      "epoch": 0.029522952295229523,
      "grad_norm": 3.6852352619171143,
      "learning_rate": 0.0001999320561355035,
      "loss": 7.7895,
      "step": 82
    },
    {
      "epoch": 0.029882988298829884,
      "grad_norm": 3.132948160171509,
      "learning_rate": 0.00019992774381199778,
      "loss": 7.9111,
      "step": 83
    },
    {
      "epoch": 0.030243024302430244,
      "grad_norm": 3.1692230701446533,
      "learning_rate": 0.00019992329886651331,
      "loss": 7.3854,
      "step": 84
    },
    {
      "epoch": 0.0306030603060306,
      "grad_norm": 2.8339498043060303,
      "learning_rate": 0.00019991872130494933,
      "loss": 7.4969,
      "step": 85
    },
    {
      "epoch": 0.030963096309630962,
      "grad_norm": 2.557858943939209,
      "learning_rate": 0.00019991401113338104,
      "loss": 7.5541,
      "step": 86
    },
    {
      "epoch": 0.031323132313231326,
      "grad_norm": 3.0587706565856934,
      "learning_rate": 0.00019990916835805974,
      "loss": 7.1242,
      "step": 87
    },
    {
      "epoch": 0.031683168316831684,
      "grad_norm": 3.4932024478912354,
      "learning_rate": 0.00019990419298541263,
      "loss": 7.0832,
      "step": 88
    },
    {
      "epoch": 0.03204320432043204,
      "grad_norm": 3.3262085914611816,
      "learning_rate": 0.00019989908502204292,
      "loss": 7.0744,
      "step": 89
    },
    {
      "epoch": 0.032403240324032405,
      "grad_norm": 2.8556482791900635,
      "learning_rate": 0.00019989384447472984,
      "loss": 7.3702,
      "step": 90
    },
    {
      "epoch": 0.03276327632763276,
      "grad_norm": 3.247647762298584,
      "learning_rate": 0.00019988847135042842,
      "loss": 7.356,
      "step": 91
    },
    {
      "epoch": 0.033123312331233126,
      "grad_norm": 2.9567129611968994,
      "learning_rate": 0.00019988296565626987,
      "loss": 7.0572,
      "step": 92
    },
    {
      "epoch": 0.03348334833483348,
      "grad_norm": 3.328278064727783,
      "learning_rate": 0.00019987732739956115,
      "loss": 6.9327,
      "step": 93
    },
    {
      "epoch": 0.03384338433843384,
      "grad_norm": 3.0802903175354004,
      "learning_rate": 0.00019987155658778529,
      "loss": 7.3882,
      "step": 94
    },
    {
      "epoch": 0.034203420342034205,
      "grad_norm": 3.4182589054107666,
      "learning_rate": 0.00019986565322860115,
      "loss": 7.3887,
      "step": 95
    },
    {
      "epoch": 0.03456345634563456,
      "grad_norm": 3.3240511417388916,
      "learning_rate": 0.00019985961732984356,
      "loss": 7.045,
      "step": 96
    },
    {
      "epoch": 0.034923492349234926,
      "grad_norm": 3.699915647506714,
      "learning_rate": 0.00019985344889952327,
      "loss": 7.2765,
      "step": 97
    },
    {
      "epoch": 0.03528352835283528,
      "grad_norm": 3.5944244861602783,
      "learning_rate": 0.00019984714794582683,
      "loss": 7.3357,
      "step": 98
    },
    {
      "epoch": 0.03564356435643564,
      "grad_norm": 4.225754261016846,
      "learning_rate": 0.00019984071447711675,
      "loss": 7.1458,
      "step": 99
    },
    {
      "epoch": 0.036003600360036005,
      "grad_norm": 6.09434700012207,
      "learning_rate": 0.0001998341485019314,
      "loss": 7.3447,
      "step": 100
    },
    {
      "epoch": 0.03636363636363636,
      "grad_norm": 4.437989711761475,
      "learning_rate": 0.000199827450028985,
      "loss": 7.1732,
      "step": 101
    },
    {
      "epoch": 0.036723672367236726,
      "grad_norm": 4.548201560974121,
      "learning_rate": 0.00019982061906716764,
      "loss": 7.2764,
      "step": 102
    },
    {
      "epoch": 0.03708370837083708,
      "grad_norm": 3.839555263519287,
      "learning_rate": 0.00019981365562554522,
      "loss": 7.3635,
      "step": 103
    },
    {
      "epoch": 0.03744374437443744,
      "grad_norm": 3.7629542350769043,
      "learning_rate": 0.00019980655971335945,
      "loss": 7.5026,
      "step": 104
    },
    {
      "epoch": 0.037803780378037805,
      "grad_norm": 3.7448863983154297,
      "learning_rate": 0.00019979933134002789,
      "loss": 7.0869,
      "step": 105
    },
    {
      "epoch": 0.03816381638163816,
      "grad_norm": 4.550889492034912,
      "learning_rate": 0.00019979197051514386,
      "loss": 6.6809,
      "step": 106
    },
    {
      "epoch": 0.038523852385238526,
      "grad_norm": 3.9035253524780273,
      "learning_rate": 0.00019978447724847652,
      "loss": 6.9597,
      "step": 107
    },
    {
      "epoch": 0.03888388838883888,
      "grad_norm": 3.1347122192382812,
      "learning_rate": 0.00019977685154997082,
      "loss": 6.6319,
      "step": 108
    },
    {
      "epoch": 0.03924392439243925,
      "grad_norm": 3.7817418575286865,
      "learning_rate": 0.0001997690934297473,
      "loss": 6.8094,
      "step": 109
    },
    {
      "epoch": 0.039603960396039604,
      "grad_norm": 2.7304024696350098,
      "learning_rate": 0.00019976120289810247,
      "loss": 6.7951,
      "step": 110
    },
    {
      "epoch": 0.03996399639963996,
      "grad_norm": 3.387932300567627,
      "learning_rate": 0.00019975317996550845,
      "loss": 6.7153,
      "step": 111
    },
    {
      "epoch": 0.040324032403240326,
      "grad_norm": 2.864016532897949,
      "learning_rate": 0.0001997450246426131,
      "loss": 6.7663,
      "step": 112
    },
    {
      "epoch": 0.04068406840684068,
      "grad_norm": 2.8113808631896973,
      "learning_rate": 0.00019973673694024,
      "loss": 6.629,
      "step": 113
    },
    {
      "epoch": 0.04104410441044105,
      "grad_norm": 2.6250040531158447,
      "learning_rate": 0.00019972831686938843,
      "loss": 6.2432,
      "step": 114
    },
    {
      "epoch": 0.041404140414041404,
      "grad_norm": 2.955963134765625,
      "learning_rate": 0.00019971976444123327,
      "loss": 6.7115,
      "step": 115
    },
    {
      "epoch": 0.04176417641764176,
      "grad_norm": 3.0330076217651367,
      "learning_rate": 0.00019971107966712518,
      "loss": 6.6801,
      "step": 116
    },
    {
      "epoch": 0.042124212421242126,
      "grad_norm": 3.377650260925293,
      "learning_rate": 0.00019970226255859038,
      "loss": 6.523,
      "step": 117
    },
    {
      "epoch": 0.04248424842484248,
      "grad_norm": 3.515782117843628,
      "learning_rate": 0.00019969331312733076,
      "loss": 6.7592,
      "step": 118
    },
    {
      "epoch": 0.04284428442844285,
      "grad_norm": 3.7806248664855957,
      "learning_rate": 0.0001996842313852238,
      "loss": 6.6889,
      "step": 119
    },
    {
      "epoch": 0.043204320432043204,
      "grad_norm": 3.435762405395508,
      "learning_rate": 0.0001996750173443226,
      "loss": 6.7071,
      "step": 120
    },
    {
      "epoch": 0.04356435643564356,
      "grad_norm": 3.39365553855896,
      "learning_rate": 0.00019966567101685587,
      "loss": 6.6396,
      "step": 121
    },
    {
      "epoch": 0.043924392439243926,
      "grad_norm": 3.594801425933838,
      "learning_rate": 0.0001996561924152278,
      "loss": 6.435,
      "step": 122
    },
    {
      "epoch": 0.04428442844284428,
      "grad_norm": 4.025651454925537,
      "learning_rate": 0.00019964658155201829,
      "loss": 7.0821,
      "step": 123
    },
    {
      "epoch": 0.04464446444644465,
      "grad_norm": 3.463228464126587,
      "learning_rate": 0.00019963683843998253,
      "loss": 6.5565,
      "step": 124
    },
    {
      "epoch": 0.045004500450045004,
      "grad_norm": 4.928868770599365,
      "learning_rate": 0.00019962696309205148,
      "loss": 6.7665,
      "step": 125
    },
    {
      "epoch": 0.04536453645364536,
      "grad_norm": 4.847606658935547,
      "learning_rate": 0.00019961695552133145,
      "loss": 6.4155,
      "step": 126
    },
    {
      "epoch": 0.045724572457245725,
      "grad_norm": 6.5078654289245605,
      "learning_rate": 0.00019960681574110426,
      "loss": 6.878,
      "step": 127
    },
    {
      "epoch": 0.04608460846084608,
      "grad_norm": 4.644542217254639,
      "learning_rate": 0.0001995965437648273,
      "loss": 6.4635,
      "step": 128
    },
    {
      "epoch": 0.04644464446444645,
      "grad_norm": 4.330336093902588,
      "learning_rate": 0.00019958613960613318,
      "loss": 6.434,
      "step": 129
    },
    {
      "epoch": 0.046804680468046804,
      "grad_norm": 3.3238542079925537,
      "learning_rate": 0.00019957560327883017,
      "loss": 6.1268,
      "step": 130
    },
    {
      "epoch": 0.04716471647164717,
      "grad_norm": 3.5520670413970947,
      "learning_rate": 0.0001995649347969019,
      "loss": 6.4838,
      "step": 131
    },
    {
      "epoch": 0.047524752475247525,
      "grad_norm": 3.1387791633605957,
      "learning_rate": 0.0001995541341745072,
      "loss": 6.042,
      "step": 132
    },
    {
      "epoch": 0.04788478847884788,
      "grad_norm": 5.484352111816406,
      "learning_rate": 0.0001995432014259806,
      "loss": 6.2728,
      "step": 133
    },
    {
      "epoch": 0.04824482448244825,
      "grad_norm": 3.3683643341064453,
      "learning_rate": 0.00019953213656583168,
      "loss": 6.0557,
      "step": 134
    },
    {
      "epoch": 0.048604860486048604,
      "grad_norm": 2.7083218097686768,
      "learning_rate": 0.00019952093960874556,
      "loss": 6.4812,
      "step": 135
    },
    {
      "epoch": 0.04896489648964897,
      "grad_norm": 2.7084672451019287,
      "learning_rate": 0.00019950961056958258,
      "loss": 6.6694,
      "step": 136
    },
    {
      "epoch": 0.049324932493249325,
      "grad_norm": 2.790422201156616,
      "learning_rate": 0.00019949814946337838,
      "loss": 6.2798,
      "step": 137
    },
    {
      "epoch": 0.04968496849684968,
      "grad_norm": 3.1344246864318848,
      "learning_rate": 0.00019948655630534396,
      "loss": 6.1944,
      "step": 138
    },
    {
      "epoch": 0.05004500450045005,
      "grad_norm": 3.3408470153808594,
      "learning_rate": 0.00019947483111086545,
      "loss": 6.3483,
      "step": 139
    },
    {
      "epoch": 0.050405040504050404,
      "grad_norm": 2.659766435623169,
      "learning_rate": 0.00019946297389550433,
      "loss": 6.2585,
      "step": 140
    },
    {
      "epoch": 0.05076507650765077,
      "grad_norm": 2.8162424564361572,
      "learning_rate": 0.0001994509846749972,
      "loss": 6.349,
      "step": 141
    },
    {
      "epoch": 0.051125112511251125,
      "grad_norm": 3.114379644393921,
      "learning_rate": 0.0001994388634652559,
      "loss": 6.4585,
      "step": 142
    },
    {
      "epoch": 0.05148514851485148,
      "grad_norm": 3.2588369846343994,
      "learning_rate": 0.00019942661028236745,
      "loss": 6.2732,
      "step": 143
    },
    {
      "epoch": 0.051845184518451846,
      "grad_norm": 3.0048773288726807,
      "learning_rate": 0.00019941422514259402,
      "loss": 6.4056,
      "step": 144
    },
    {
      "epoch": 0.052205220522052204,
      "grad_norm": 2.687142848968506,
      "learning_rate": 0.00019940170806237293,
      "loss": 6.118,
      "step": 145
    },
    {
      "epoch": 0.05256525652565257,
      "grad_norm": 2.4336202144622803,
      "learning_rate": 0.00019938905905831654,
      "loss": 6.3721,
      "step": 146
    },
    {
      "epoch": 0.052925292529252925,
      "grad_norm": 3.150280475616455,
      "learning_rate": 0.00019937627814721237,
      "loss": 5.7968,
      "step": 147
    },
    {
      "epoch": 0.05328532853285328,
      "grad_norm": 3.0133798122406006,
      "learning_rate": 0.00019936336534602295,
      "loss": 6.0878,
      "step": 148
    },
    {
      "epoch": 0.053645364536453646,
      "grad_norm": 3.5488665103912354,
      "learning_rate": 0.0001993503206718859,
      "loss": 6.6781,
      "step": 149
    },
    {
      "epoch": 0.054005400540054004,
      "grad_norm": 4.942127704620361,
      "learning_rate": 0.0001993371441421138,
      "loss": 6.2772,
      "step": 150
    },
    {
      "epoch": 0.05436543654365437,
      "grad_norm": 4.3673200607299805,
      "learning_rate": 0.00019932383577419432,
      "loss": 6.272,
      "step": 151
    },
    {
      "epoch": 0.054725472547254725,
      "grad_norm": 2.8859446048736572,
      "learning_rate": 0.00019931039558578997,
      "loss": 5.7987,
      "step": 152
    },
    {
      "epoch": 0.05508550855085508,
      "grad_norm": 3.215345859527588,
      "learning_rate": 0.00019929682359473834,
      "loss": 5.9682,
      "step": 153
    },
    {
      "epoch": 0.055445544554455446,
      "grad_norm": 3.1671488285064697,
      "learning_rate": 0.00019928311981905184,
      "loss": 6.6487,
      "step": 154
    },
    {
      "epoch": 0.0558055805580558,
      "grad_norm": 3.035339832305908,
      "learning_rate": 0.00019926928427691786,
      "loss": 6.3195,
      "step": 155
    },
    {
      "epoch": 0.05616561656165617,
      "grad_norm": 2.819296360015869,
      "learning_rate": 0.00019925531698669862,
      "loss": 6.2373,
      "step": 156
    },
    {
      "epoch": 0.056525652565256525,
      "grad_norm": 2.6943399906158447,
      "learning_rate": 0.00019924121796693127,
      "loss": 5.7856,
      "step": 157
    },
    {
      "epoch": 0.05688568856885689,
      "grad_norm": 3.4273905754089355,
      "learning_rate": 0.00019922698723632767,
      "loss": 6.406,
      "step": 158
    },
    {
      "epoch": 0.057245724572457246,
      "grad_norm": 3.285869836807251,
      "learning_rate": 0.00019921262481377455,
      "loss": 6.297,
      "step": 159
    },
    {
      "epoch": 0.0576057605760576,
      "grad_norm": 3.145364761352539,
      "learning_rate": 0.0001991981307183334,
      "loss": 5.9435,
      "step": 160
    },
    {
      "epoch": 0.05796579657965797,
      "grad_norm": 3.239286184310913,
      "learning_rate": 0.0001991835049692405,
      "loss": 5.8419,
      "step": 161
    },
    {
      "epoch": 0.058325832583258325,
      "grad_norm": 2.5348026752471924,
      "learning_rate": 0.00019916874758590684,
      "loss": 5.8359,
      "step": 162
    },
    {
      "epoch": 0.05868586858685869,
      "grad_norm": 2.9281060695648193,
      "learning_rate": 0.0001991538585879181,
      "loss": 5.6654,
      "step": 163
    },
    {
      "epoch": 0.059045904590459046,
      "grad_norm": 3.6080162525177,
      "learning_rate": 0.0001991388379950346,
      "loss": 6.3303,
      "step": 164
    },
    {
      "epoch": 0.0594059405940594,
      "grad_norm": 3.5625193119049072,
      "learning_rate": 0.00019912368582719142,
      "loss": 5.9362,
      "step": 165
    },
    {
      "epoch": 0.05976597659765977,
      "grad_norm": 2.861238956451416,
      "learning_rate": 0.00019910840210449817,
      "loss": 5.6185,
      "step": 166
    },
    {
      "epoch": 0.060126012601260125,
      "grad_norm": 3.1427133083343506,
      "learning_rate": 0.00019909298684723904,
      "loss": 5.9749,
      "step": 167
    },
    {
      "epoch": 0.06048604860486049,
      "grad_norm": 2.6581568717956543,
      "learning_rate": 0.0001990774400758729,
      "loss": 5.7056,
      "step": 168
    },
    {
      "epoch": 0.060846084608460846,
      "grad_norm": 2.240708351135254,
      "learning_rate": 0.00019906176181103304,
      "loss": 6.0279,
      "step": 169
    },
    {
      "epoch": 0.0612061206120612,
      "grad_norm": 2.948249578475952,
      "learning_rate": 0.00019904595207352737,
      "loss": 5.6673,
      "step": 170
    },
    {
      "epoch": 0.06156615661566157,
      "grad_norm": 2.8593263626098633,
      "learning_rate": 0.00019903001088433816,
      "loss": 5.6589,
      "step": 171
    },
    {
      "epoch": 0.061926192619261924,
      "grad_norm": 2.995178699493408,
      "learning_rate": 0.0001990139382646223,
      "loss": 5.6627,
      "step": 172
    },
    {
      "epoch": 0.06228622862286229,
      "grad_norm": 3.6065990924835205,
      "learning_rate": 0.000198997734235711,
      "loss": 6.2448,
      "step": 173
    },
    {
      "epoch": 0.06264626462646265,
      "grad_norm": 4.687837600708008,
      "learning_rate": 0.00019898139881910986,
      "loss": 5.9976,
      "step": 174
    },
    {
      "epoch": 0.063006300630063,
      "grad_norm": 4.329146862030029,
      "learning_rate": 0.00019896493203649897,
      "loss": 6.4935,
      "step": 175
    },
    {
      "epoch": 0.06336633663366337,
      "grad_norm": 3.881782054901123,
      "learning_rate": 0.00019894833390973266,
      "loss": 5.8349,
      "step": 176
    },
    {
      "epoch": 0.06372637263726373,
      "grad_norm": 4.233585834503174,
      "learning_rate": 0.00019893160446083963,
      "loss": 5.9879,
      "step": 177
    },
    {
      "epoch": 0.06408640864086408,
      "grad_norm": 3.393862247467041,
      "learning_rate": 0.0001989147437120228,
      "loss": 5.8239,
      "step": 178
    },
    {
      "epoch": 0.06444644464446445,
      "grad_norm": 3.3493380546569824,
      "learning_rate": 0.00019889775168565943,
      "loss": 6.1353,
      "step": 179
    },
    {
      "epoch": 0.06480648064806481,
      "grad_norm": 3.0439703464508057,
      "learning_rate": 0.000198880628404301,
      "loss": 6.0866,
      "step": 180
    },
    {
      "epoch": 0.06516651665166516,
      "grad_norm": 3.1560416221618652,
      "learning_rate": 0.0001988633738906731,
      "loss": 5.6327,
      "step": 181
    },
    {
      "epoch": 0.06552655265526552,
      "grad_norm": 4.7591657638549805,
      "learning_rate": 0.00019884598816767563,
      "loss": 5.839,
      "step": 182
    },
    {
      "epoch": 0.06588658865886589,
      "grad_norm": 2.586634874343872,
      "learning_rate": 0.0001988284712583825,
      "loss": 5.4748,
      "step": 183
    },
    {
      "epoch": 0.06624662466246625,
      "grad_norm": 2.2929999828338623,
      "learning_rate": 0.0001988108231860418,
      "loss": 5.8326,
      "step": 184
    },
    {
      "epoch": 0.0666066606660666,
      "grad_norm": 2.576805830001831,
      "learning_rate": 0.0001987930439740757,
      "loss": 5.8293,
      "step": 185
    },
    {
      "epoch": 0.06696669666966697,
      "grad_norm": 3.108707904815674,
      "learning_rate": 0.0001987751336460803,
      "loss": 5.9231,
      "step": 186
    },
    {
      "epoch": 0.06732673267326733,
      "grad_norm": 2.368267059326172,
      "learning_rate": 0.00019875709222582594,
      "loss": 5.6469,
      "step": 187
    },
    {
      "epoch": 0.06768676867686768,
      "grad_norm": 3.020864486694336,
      "learning_rate": 0.0001987389197372567,
      "loss": 5.8781,
      "step": 188
    },
    {
      "epoch": 0.06804680468046805,
      "grad_norm": 2.432720184326172,
      "learning_rate": 0.00019872061620449078,
      "loss": 6.2033,
      "step": 189
    },
    {
      "epoch": 0.06840684068406841,
      "grad_norm": 2.4212331771850586,
      "learning_rate": 0.00019870218165182025,
      "loss": 5.7962,
      "step": 190
    },
    {
      "epoch": 0.06876687668766877,
      "grad_norm": 2.8409509658813477,
      "learning_rate": 0.00019868361610371097,
      "loss": 5.6254,
      "step": 191
    },
    {
      "epoch": 0.06912691269126912,
      "grad_norm": 2.1944754123687744,
      "learning_rate": 0.00019866491958480284,
      "loss": 5.5786,
      "step": 192
    },
    {
      "epoch": 0.06948694869486949,
      "grad_norm": 3.18087100982666,
      "learning_rate": 0.00019864609211990946,
      "loss": 5.561,
      "step": 193
    },
    {
      "epoch": 0.06984698469846985,
      "grad_norm": 2.7998907566070557,
      "learning_rate": 0.0001986271337340182,
      "loss": 5.8962,
      "step": 194
    },
    {
      "epoch": 0.0702070207020702,
      "grad_norm": 2.61316180229187,
      "learning_rate": 0.00019860804445229023,
      "loss": 5.6438,
      "step": 195
    },
    {
      "epoch": 0.07056705670567057,
      "grad_norm": 2.3283307552337646,
      "learning_rate": 0.0001985888243000605,
      "loss": 5.7617,
      "step": 196
    },
    {
      "epoch": 0.07092709270927093,
      "grad_norm": 3.0777363777160645,
      "learning_rate": 0.00019856947330283752,
      "loss": 5.5723,
      "step": 197
    },
    {
      "epoch": 0.07128712871287128,
      "grad_norm": 3.0738508701324463,
      "learning_rate": 0.00019854999148630355,
      "loss": 5.8281,
      "step": 198
    },
    {
      "epoch": 0.07164716471647165,
      "grad_norm": 3.864041328430176,
      "learning_rate": 0.00019853037887631448,
      "loss": 6.1932,
      "step": 199
    },
    {
      "epoch": 0.07200720072007201,
      "grad_norm": 4.276803493499756,
      "learning_rate": 0.0001985106354988997,
      "loss": 6.0477,
      "step": 200
    },
    {
      "epoch": 0.07236723672367237,
      "grad_norm": 4.5397748947143555,
      "learning_rate": 0.0001984907613802622,
      "loss": 5.4879,
      "step": 201
    },
    {
      "epoch": 0.07272727272727272,
      "grad_norm": 3.2944319248199463,
      "learning_rate": 0.0001984707565467785,
      "loss": 6.1137,
      "step": 202
    },
    {
      "epoch": 0.07308730873087309,
      "grad_norm": 3.5978901386260986,
      "learning_rate": 0.0001984506210249986,
      "loss": 5.8026,
      "step": 203
    },
    {
      "epoch": 0.07344734473447345,
      "grad_norm": 3.680521249771118,
      "learning_rate": 0.00019843035484164593,
      "loss": 5.7198,
      "step": 204
    },
    {
      "epoch": 0.0738073807380738,
      "grad_norm": 2.7733376026153564,
      "learning_rate": 0.00019840995802361734,
      "loss": 5.8044,
      "step": 205
    },
    {
      "epoch": 0.07416741674167417,
      "grad_norm": 3.0227551460266113,
      "learning_rate": 0.00019838943059798304,
      "loss": 5.625,
      "step": 206
    },
    {
      "epoch": 0.07452745274527453,
      "grad_norm": 2.693079710006714,
      "learning_rate": 0.00019836877259198662,
      "loss": 5.3865,
      "step": 207
    },
    {
      "epoch": 0.07488748874887488,
      "grad_norm": 2.970263957977295,
      "learning_rate": 0.00019834798403304494,
      "loss": 5.4135,
      "step": 208
    },
    {
      "epoch": 0.07524752475247524,
      "grad_norm": 2.4294893741607666,
      "learning_rate": 0.0001983270649487481,
      "loss": 5.59,
      "step": 209
    },
    {
      "epoch": 0.07560756075607561,
      "grad_norm": 2.7756547927856445,
      "learning_rate": 0.0001983060153668595,
      "loss": 5.3611,
      "step": 210
    },
    {
      "epoch": 0.07596759675967597,
      "grad_norm": 2.610945224761963,
      "learning_rate": 0.00019828483531531568,
      "loss": 5.6271,
      "step": 211
    },
    {
      "epoch": 0.07632763276327632,
      "grad_norm": 3.3113486766815186,
      "learning_rate": 0.00019826352482222638,
      "loss": 5.4726,
      "step": 212
    },
    {
      "epoch": 0.07668766876687669,
      "grad_norm": 2.1098732948303223,
      "learning_rate": 0.0001982420839158744,
      "loss": 5.2566,
      "step": 213
    },
    {
      "epoch": 0.07704770477047705,
      "grad_norm": 2.361694812774658,
      "learning_rate": 0.0001982205126247157,
      "loss": 5.3923,
      "step": 214
    },
    {
      "epoch": 0.0774077407740774,
      "grad_norm": 2.2943015098571777,
      "learning_rate": 0.00019819881097737915,
      "loss": 5.6276,
      "step": 215
    },
    {
      "epoch": 0.07776777677767777,
      "grad_norm": 2.6616368293762207,
      "learning_rate": 0.0001981769790026668,
      "loss": 5.4411,
      "step": 216
    },
    {
      "epoch": 0.07812781278127813,
      "grad_norm": 2.558661937713623,
      "learning_rate": 0.00019815501672955358,
      "loss": 5.0324,
      "step": 217
    },
    {
      "epoch": 0.0784878487848785,
      "grad_norm": 3.133481025695801,
      "learning_rate": 0.00019813292418718732,
      "loss": 5.8089,
      "step": 218
    },
    {
      "epoch": 0.07884788478847884,
      "grad_norm": 2.8957459926605225,
      "learning_rate": 0.0001981107014048888,
      "loss": 5.6448,
      "step": 219
    },
    {
      "epoch": 0.07920792079207921,
      "grad_norm": 2.435612916946411,
      "learning_rate": 0.00019808834841215158,
      "loss": 5.5601,
      "step": 220
    },
    {
      "epoch": 0.07956795679567957,
      "grad_norm": 2.545100450515747,
      "learning_rate": 0.0001980658652386421,
      "loss": 5.6624,
      "step": 221
    },
    {
      "epoch": 0.07992799279927992,
      "grad_norm": 2.8909523487091064,
      "learning_rate": 0.00019804325191419956,
      "loss": 5.9231,
      "step": 222
    },
    {
      "epoch": 0.08028802880288029,
      "grad_norm": 2.783823251724243,
      "learning_rate": 0.00019802050846883592,
      "loss": 6.2122,
      "step": 223
    },
    {
      "epoch": 0.08064806480648065,
      "grad_norm": 3.4865949153900146,
      "learning_rate": 0.0001979976349327357,
      "loss": 6.0217,
      "step": 224
    },
    {
      "epoch": 0.081008100810081,
      "grad_norm": 4.174108028411865,
      "learning_rate": 0.00019797463133625626,
      "loss": 6.0016,
      "step": 225
    },
    {
      "epoch": 0.08136813681368137,
      "grad_norm": 3.6121280193328857,
      "learning_rate": 0.00019795149770992745,
      "loss": 5.5763,
      "step": 226
    },
    {
      "epoch": 0.08172817281728173,
      "grad_norm": 3.4251668453216553,
      "learning_rate": 0.00019792823408445174,
      "loss": 5.8605,
      "step": 227
    },
    {
      "epoch": 0.0820882088208821,
      "grad_norm": 3.3870620727539062,
      "learning_rate": 0.0001979048404907041,
      "loss": 5.8316,
      "step": 228
    },
    {
      "epoch": 0.08244824482448244,
      "grad_norm": 1.994755506515503,
      "learning_rate": 0.000197881316959732,
      "loss": 5.5797,
      "step": 229
    },
    {
      "epoch": 0.08280828082808281,
      "grad_norm": 2.855531692504883,
      "learning_rate": 0.00019785766352275542,
      "loss": 5.6349,
      "step": 230
    },
    {
      "epoch": 0.08316831683168317,
      "grad_norm": 2.65555739402771,
      "learning_rate": 0.00019783388021116664,
      "loss": 5.4636,
      "step": 231
    },
    {
      "epoch": 0.08352835283528352,
      "grad_norm": 2.921633243560791,
      "learning_rate": 0.00019780996705653044,
      "loss": 5.417,
      "step": 232
    },
    {
      "epoch": 0.08388838883888389,
      "grad_norm": 3.474546194076538,
      "learning_rate": 0.00019778592409058378,
      "loss": 5.3859,
      "step": 233
    },
    {
      "epoch": 0.08424842484248425,
      "grad_norm": 2.511760711669922,
      "learning_rate": 0.00019776175134523597,
      "loss": 5.5061,
      "step": 234
    },
    {
      "epoch": 0.0846084608460846,
      "grad_norm": 2.2720072269439697,
      "learning_rate": 0.00019773744885256863,
      "loss": 5.2613,
      "step": 235
    },
    {
      "epoch": 0.08496849684968497,
      "grad_norm": 2.45263409614563,
      "learning_rate": 0.0001977130166448355,
      "loss": 5.7824,
      "step": 236
    },
    {
      "epoch": 0.08532853285328533,
      "grad_norm": 2.810452461242676,
      "learning_rate": 0.0001976884547544624,
      "loss": 5.6098,
      "step": 237
    },
    {
      "epoch": 0.0856885688568857,
      "grad_norm": 2.936281204223633,
      "learning_rate": 0.00019766376321404746,
      "loss": 5.4772,
      "step": 238
    },
    {
      "epoch": 0.08604860486048604,
      "grad_norm": 3.000941276550293,
      "learning_rate": 0.00019763894205636072,
      "loss": 5.4838,
      "step": 239
    },
    {
      "epoch": 0.08640864086408641,
      "grad_norm": 1.9919918775558472,
      "learning_rate": 0.00019761399131434427,
      "loss": 5.1271,
      "step": 240
    },
    {
      "epoch": 0.08676867686768677,
      "grad_norm": 2.818498373031616,
      "learning_rate": 0.00019758891102111226,
      "loss": 5.3386,
      "step": 241
    },
    {
      "epoch": 0.08712871287128712,
      "grad_norm": 2.278205156326294,
      "learning_rate": 0.00019756370120995066,
      "loss": 5.5661,
      "step": 242
    },
    {
      "epoch": 0.08748874887488749,
      "grad_norm": 2.9450225830078125,
      "learning_rate": 0.00019753836191431742,
      "loss": 5.3917,
      "step": 243
    },
    {
      "epoch": 0.08784878487848785,
      "grad_norm": 2.6072285175323486,
      "learning_rate": 0.00019751289316784237,
      "loss": 5.1402,
      "step": 244
    },
    {
      "epoch": 0.08820882088208822,
      "grad_norm": 1.9053361415863037,
      "learning_rate": 0.000197487295004327,
      "loss": 5.4664,
      "step": 245
    },
    {
      "epoch": 0.08856885688568857,
      "grad_norm": 2.451340436935425,
      "learning_rate": 0.00019746156745774468,
      "loss": 5.5551,
      "step": 246
    },
    {
      "epoch": 0.08892889288928893,
      "grad_norm": 2.6773860454559326,
      "learning_rate": 0.0001974357105622405,
      "loss": 5.5615,
      "step": 247
    },
    {
      "epoch": 0.0892889288928893,
      "grad_norm": 2.6578361988067627,
      "learning_rate": 0.00019740972435213115,
      "loss": 5.8491,
      "step": 248
    },
    {
      "epoch": 0.08964896489648964,
      "grad_norm": 3.2765934467315674,
      "learning_rate": 0.00019738360886190496,
      "loss": 5.8359,
      "step": 249
    },
    {
      "epoch": 0.09000900090009001,
      "grad_norm": 4.44022274017334,
      "learning_rate": 0.0001973573641262219,
      "loss": 6.3776,
      "step": 250
    },
    {
      "epoch": 0.09036903690369037,
      "grad_norm": 3.5060126781463623,
      "learning_rate": 0.00019733099017991341,
      "loss": 5.2258,
      "step": 251
    },
    {
      "epoch": 0.09072907290729072,
      "grad_norm": 3.3604626655578613,
      "learning_rate": 0.00019730448705798239,
      "loss": 5.5046,
      "step": 252
    },
    {
      "epoch": 0.09108910891089109,
      "grad_norm": 2.7966156005859375,
      "learning_rate": 0.00019727785479560327,
      "loss": 5.6467,
      "step": 253
    },
    {
      "epoch": 0.09144914491449145,
      "grad_norm": 2.827324628829956,
      "learning_rate": 0.0001972510934281218,
      "loss": 5.5496,
      "step": 254
    },
    {
      "epoch": 0.09180918091809182,
      "grad_norm": 2.5747945308685303,
      "learning_rate": 0.0001972242029910551,
      "loss": 5.1269,
      "step": 255
    },
    {
      "epoch": 0.09216921692169217,
      "grad_norm": 2.47976016998291,
      "learning_rate": 0.0001971971835200916,
      "loss": 5.5411,
      "step": 256
    },
    {
      "epoch": 0.09252925292529253,
      "grad_norm": 2.3428852558135986,
      "learning_rate": 0.00019717003505109095,
      "loss": 5.1775,
      "step": 257
    },
    {
      "epoch": 0.0928892889288929,
      "grad_norm": 3.0765557289123535,
      "learning_rate": 0.00019714275762008405,
      "loss": 5.3954,
      "step": 258
    },
    {
      "epoch": 0.09324932493249324,
      "grad_norm": 2.549563407897949,
      "learning_rate": 0.0001971153512632729,
      "loss": 5.3875,
      "step": 259
    },
    {
      "epoch": 0.09360936093609361,
      "grad_norm": 2.8717353343963623,
      "learning_rate": 0.00019708781601703065,
      "loss": 5.047,
      "step": 260
    },
    {
      "epoch": 0.09396939693969397,
      "grad_norm": 2.2203121185302734,
      "learning_rate": 0.00019706015191790145,
      "loss": 5.2141,
      "step": 261
    },
    {
      "epoch": 0.09432943294329434,
      "grad_norm": 2.383986473083496,
      "learning_rate": 0.00019703235900260055,
      "loss": 5.429,
      "step": 262
    },
    {
      "epoch": 0.09468946894689469,
      "grad_norm": 2.2943928241729736,
      "learning_rate": 0.00019700443730801413,
      "loss": 5.4756,
      "step": 263
    },
    {
      "epoch": 0.09504950495049505,
      "grad_norm": 1.895622730255127,
      "learning_rate": 0.0001969763868711992,
      "loss": 5.1072,
      "step": 264
    },
    {
      "epoch": 0.09540954095409541,
      "grad_norm": 1.9407848119735718,
      "learning_rate": 0.0001969482077293838,
      "loss": 5.0003,
      "step": 265
    },
    {
      "epoch": 0.09576957695769577,
      "grad_norm": 2.3450191020965576,
      "learning_rate": 0.00019691989991996663,
      "loss": 5.3993,
      "step": 266
    },
    {
      "epoch": 0.09612961296129613,
      "grad_norm": 2.7572624683380127,
      "learning_rate": 0.00019689146348051719,
      "loss": 5.648,
      "step": 267
    },
    {
      "epoch": 0.0964896489648965,
      "grad_norm": 2.0007078647613525,
      "learning_rate": 0.00019686289844877579,
      "loss": 5.0566,
      "step": 268
    },
    {
      "epoch": 0.09684968496849684,
      "grad_norm": 2.1224796772003174,
      "learning_rate": 0.00019683420486265327,
      "loss": 4.9926,
      "step": 269
    },
    {
      "epoch": 0.09720972097209721,
      "grad_norm": 1.9481003284454346,
      "learning_rate": 0.00019680538276023118,
      "loss": 5.3721,
      "step": 270
    },
    {
      "epoch": 0.09756975697569757,
      "grad_norm": 2.778371810913086,
      "learning_rate": 0.0001967764321797616,
      "loss": 5.43,
      "step": 271
    },
    {
      "epoch": 0.09792979297929794,
      "grad_norm": 2.592320442199707,
      "learning_rate": 0.0001967473531596671,
      "loss": 5.4041,
      "step": 272
    },
    {
      "epoch": 0.09828982898289829,
      "grad_norm": 2.72464656829834,
      "learning_rate": 0.00019671814573854078,
      "loss": 5.9389,
      "step": 273
    },
    {
      "epoch": 0.09864986498649865,
      "grad_norm": 3.2835888862609863,
      "learning_rate": 0.00019668880995514604,
      "loss": 5.5321,
      "step": 274
    },
    {
      "epoch": 0.09900990099009901,
      "grad_norm": 4.350769519805908,
      "learning_rate": 0.00019665934584841682,
      "loss": 6.0003,
      "step": 275
    },
    {
      "epoch": 0.09936993699369936,
      "grad_norm": 3.744112968444824,
      "learning_rate": 0.00019662975345745713,
      "loss": 5.7387,
      "step": 276
    },
    {
      "epoch": 0.09972997299729973,
      "grad_norm": 2.7531681060791016,
      "learning_rate": 0.00019660003282154147,
      "loss": 5.0919,
      "step": 277
    },
    {
      "epoch": 0.1000900090009001,
      "grad_norm": 2.2056615352630615,
      "learning_rate": 0.00019657018398011434,
      "loss": 5.1607,
      "step": 278
    },
    {
      "epoch": 0.10045004500450044,
      "grad_norm": 3.7169909477233887,
      "learning_rate": 0.0001965402069727906,
      "loss": 5.37,
      "step": 279
    },
    {
      "epoch": 0.10081008100810081,
      "grad_norm": 4.994881629943848,
      "learning_rate": 0.00019651010183935498,
      "loss": 5.4116,
      "step": 280
    },
    {
      "epoch": 0.10117011701170117,
      "grad_norm": 2.110886335372925,
      "learning_rate": 0.00019647986861976246,
      "loss": 5.6185,
      "step": 281
    },
    {
      "epoch": 0.10153015301530154,
      "grad_norm": 2.4211294651031494,
      "learning_rate": 0.00019644950735413788,
      "loss": 5.2895,
      "step": 282
    },
    {
      "epoch": 0.10189018901890189,
      "grad_norm": 2.3990745544433594,
      "learning_rate": 0.0001964190180827761,
      "loss": 5.5752,
      "step": 283
    },
    {
      "epoch": 0.10225022502250225,
      "grad_norm": 2.2132537364959717,
      "learning_rate": 0.00019638840084614182,
      "loss": 4.953,
      "step": 284
    },
    {
      "epoch": 0.10261026102610261,
      "grad_norm": 2.9952592849731445,
      "learning_rate": 0.00019635765568486955,
      "loss": 5.2637,
      "step": 285
    },
    {
      "epoch": 0.10297029702970296,
      "grad_norm": 2.621925115585327,
      "learning_rate": 0.00019632678263976368,
      "loss": 5.3269,
      "step": 286
    },
    {
      "epoch": 0.10333033303330333,
      "grad_norm": 1.9161827564239502,
      "learning_rate": 0.0001962957817517982,
      "loss": 5.2743,
      "step": 287
    },
    {
      "epoch": 0.10369036903690369,
      "grad_norm": 2.8809425830841064,
      "learning_rate": 0.00019626465306211687,
      "loss": 4.8225,
      "step": 288
    },
    {
      "epoch": 0.10405040504050406,
      "grad_norm": 2.3898348808288574,
      "learning_rate": 0.00019623339661203301,
      "loss": 5.2633,
      "step": 289
    },
    {
      "epoch": 0.10441044104410441,
      "grad_norm": 2.4710440635681152,
      "learning_rate": 0.00019620201244302952,
      "loss": 5.1064,
      "step": 290
    },
    {
      "epoch": 0.10477047704770477,
      "grad_norm": 2.3487277030944824,
      "learning_rate": 0.00019617050059675878,
      "loss": 5.3717,
      "step": 291
    },
    {
      "epoch": 0.10513051305130514,
      "grad_norm": 2.8060383796691895,
      "learning_rate": 0.0001961388611150427,
      "loss": 5.0828,
      "step": 292
    },
    {
      "epoch": 0.10549054905490549,
      "grad_norm": 1.8839240074157715,
      "learning_rate": 0.00019610709403987246,
      "loss": 4.8968,
      "step": 293
    },
    {
      "epoch": 0.10585058505850585,
      "grad_norm": 2.4121265411376953,
      "learning_rate": 0.00019607519941340867,
      "loss": 5.3503,
      "step": 294
    },
    {
      "epoch": 0.10621062106210621,
      "grad_norm": 2.379385232925415,
      "learning_rate": 0.00019604317727798124,
      "loss": 5.4793,
      "step": 295
    },
    {
      "epoch": 0.10657065706570656,
      "grad_norm": 2.5868208408355713,
      "learning_rate": 0.00019601102767608923,
      "loss": 5.4413,
      "step": 296
    },
    {
      "epoch": 0.10693069306930693,
      "grad_norm": 2.258801221847534,
      "learning_rate": 0.00019597875065040094,
      "loss": 5.3289,
      "step": 297
    },
    {
      "epoch": 0.10729072907290729,
      "grad_norm": 3.434406042098999,
      "learning_rate": 0.0001959463462437537,
      "loss": 5.8155,
      "step": 298
    },
    {
      "epoch": 0.10765076507650766,
      "grad_norm": 2.431286573410034,
      "learning_rate": 0.00019591381449915397,
      "loss": 5.5256,
      "step": 299
    },
    {
      "epoch": 0.10801080108010801,
      "grad_norm": 2.626918315887451,
      "learning_rate": 0.0001958811554597772,
      "loss": 5.6643,
      "step": 300
    },
    {
      "epoch": 0.10837083708370837,
      "grad_norm": 4.818332672119141,
      "learning_rate": 0.00019584836916896781,
      "loss": 5.7093,
      "step": 301
    },
    {
      "epoch": 0.10873087308730874,
      "grad_norm": 2.928095579147339,
      "learning_rate": 0.000195815455670239,
      "loss": 5.4955,
      "step": 302
    },
    {
      "epoch": 0.10909090909090909,
      "grad_norm": 2.9136202335357666,
      "learning_rate": 0.0001957824150072729,
      "loss": 5.2354,
      "step": 303
    },
    {
      "epoch": 0.10945094509450945,
      "grad_norm": 2.9085636138916016,
      "learning_rate": 0.0001957492472239204,
      "loss": 5.5329,
      "step": 304
    },
    {
      "epoch": 0.10981098109810981,
      "grad_norm": 1.9821232557296753,
      "learning_rate": 0.00019571595236420102,
      "loss": 5.2906,
      "step": 305
    },
    {
      "epoch": 0.11017101710171016,
      "grad_norm": 2.7555158138275146,
      "learning_rate": 0.00019568253047230302,
      "loss": 5.003,
      "step": 306
    },
    {
      "epoch": 0.11053105310531053,
      "grad_norm": 2.532947063446045,
      "learning_rate": 0.00019564898159258324,
      "loss": 5.206,
      "step": 307
    },
    {
      "epoch": 0.11089108910891089,
      "grad_norm": 2.4232444763183594,
      "learning_rate": 0.00019561530576956703,
      "loss": 5.0053,
      "step": 308
    },
    {
      "epoch": 0.11125112511251126,
      "grad_norm": 3.1183736324310303,
      "learning_rate": 0.00019558150304794822,
      "loss": 5.078,
      "step": 309
    },
    {
      "epoch": 0.1116111611161116,
      "grad_norm": 2.4659345149993896,
      "learning_rate": 0.00019554757347258907,
      "loss": 5.1274,
      "step": 310
    },
    {
      "epoch": 0.11197119711971197,
      "grad_norm": 2.3671152591705322,
      "learning_rate": 0.0001955135170885202,
      "loss": 5.4931,
      "step": 311
    },
    {
      "epoch": 0.11233123312331234,
      "grad_norm": 2.0349647998809814,
      "learning_rate": 0.0001954793339409405,
      "loss": 5.1148,
      "step": 312
    },
    {
      "epoch": 0.11269126912691269,
      "grad_norm": 2.184335470199585,
      "learning_rate": 0.00019544502407521712,
      "loss": 5.6701,
      "step": 313
    },
    {
      "epoch": 0.11305130513051305,
      "grad_norm": 2.2550289630889893,
      "learning_rate": 0.00019541058753688538,
      "loss": 5.3741,
      "step": 314
    },
    {
      "epoch": 0.11341134113411341,
      "grad_norm": 2.894897937774658,
      "learning_rate": 0.00019537602437164875,
      "loss": 5.1487,
      "step": 315
    },
    {
      "epoch": 0.11377137713771378,
      "grad_norm": 2.2867188453674316,
      "learning_rate": 0.0001953413346253787,
      "loss": 4.9861,
      "step": 316
    },
    {
      "epoch": 0.11413141314131413,
      "grad_norm": 2.5983200073242188,
      "learning_rate": 0.00019530651834411474,
      "loss": 5.1494,
      "step": 317
    },
    {
      "epoch": 0.11449144914491449,
      "grad_norm": 2.7470312118530273,
      "learning_rate": 0.0001952715755740643,
      "loss": 5.0505,
      "step": 318
    },
    {
      "epoch": 0.11485148514851486,
      "grad_norm": 2.3131463527679443,
      "learning_rate": 0.00019523650636160268,
      "loss": 4.8398,
      "step": 319
    },
    {
      "epoch": 0.1152115211521152,
      "grad_norm": 2.4200801849365234,
      "learning_rate": 0.00019520131075327298,
      "loss": 5.3922,
      "step": 320
    },
    {
      "epoch": 0.11557155715571557,
      "grad_norm": 1.904872179031372,
      "learning_rate": 0.0001951659887957861,
      "loss": 5.5282,
      "step": 321
    },
    {
      "epoch": 0.11593159315931593,
      "grad_norm": 1.897890329360962,
      "learning_rate": 0.00019513054053602055,
      "loss": 5.37,
      "step": 322
    },
    {
      "epoch": 0.11629162916291629,
      "grad_norm": 2.7623322010040283,
      "learning_rate": 0.00019509496602102252,
      "loss": 5.7574,
      "step": 323
    },
    {
      "epoch": 0.11665166516651665,
      "grad_norm": 4.012531757354736,
      "learning_rate": 0.00019505926529800576,
      "loss": 5.5631,
      "step": 324
    },
    {
      "epoch": 0.11701170117011701,
      "grad_norm": 3.5521020889282227,
      "learning_rate": 0.00019502343841435151,
      "loss": 5.8678,
      "step": 325
    },
    {
      "epoch": 0.11737173717371738,
      "grad_norm": 3.4806554317474365,
      "learning_rate": 0.00019498748541760846,
      "loss": 5.2348,
      "step": 326
    },
    {
      "epoch": 0.11773177317731773,
      "grad_norm": 3.5824429988861084,
      "learning_rate": 0.00019495140635549261,
      "loss": 5.5141,
      "step": 327
    },
    {
      "epoch": 0.11809180918091809,
      "grad_norm": 2.9857945442199707,
      "learning_rate": 0.00019491520127588738,
      "loss": 5.2804,
      "step": 328
    },
    {
      "epoch": 0.11845184518451846,
      "grad_norm": 1.9040719270706177,
      "learning_rate": 0.00019487887022684336,
      "loss": 5.0913,
      "step": 329
    },
    {
      "epoch": 0.1188118811881188,
      "grad_norm": 2.358011245727539,
      "learning_rate": 0.00019484241325657835,
      "loss": 5.3166,
      "step": 330
    },
    {
      "epoch": 0.11917191719171917,
      "grad_norm": 2.452613115310669,
      "learning_rate": 0.00019480583041347726,
      "loss": 5.2579,
      "step": 331
    },
    {
      "epoch": 0.11953195319531953,
      "grad_norm": 1.885299563407898,
      "learning_rate": 0.0001947691217460921,
      "loss": 5.2208,
      "step": 332
    },
    {
      "epoch": 0.1198919891989199,
      "grad_norm": 2.377636671066284,
      "learning_rate": 0.00019473228730314179,
      "loss": 5.3096,
      "step": 333
    },
    {
      "epoch": 0.12025202520252025,
      "grad_norm": 2.386121988296509,
      "learning_rate": 0.00019469532713351222,
      "loss": 5.1569,
      "step": 334
    },
    {
      "epoch": 0.12061206120612061,
      "grad_norm": 1.9236092567443848,
      "learning_rate": 0.00019465824128625617,
      "loss": 5.1291,
      "step": 335
    },
    {
      "epoch": 0.12097209720972098,
      "grad_norm": 2.0662996768951416,
      "learning_rate": 0.00019462102981059317,
      "loss": 5.4015,
      "step": 336
    },
    {
      "epoch": 0.12133213321332133,
      "grad_norm": 2.1840786933898926,
      "learning_rate": 0.00019458369275590954,
      "loss": 5.1743,
      "step": 337
    },
    {
      "epoch": 0.12169216921692169,
      "grad_norm": 2.220691204071045,
      "learning_rate": 0.00019454623017175812,
      "loss": 5.0971,
      "step": 338
    },
    {
      "epoch": 0.12205220522052206,
      "grad_norm": 1.8053256273269653,
      "learning_rate": 0.00019450864210785858,
      "loss": 5.347,
      "step": 339
    },
    {
      "epoch": 0.1224122412241224,
      "grad_norm": 1.9502235651016235,
      "learning_rate": 0.0001944709286140969,
      "loss": 5.2748,
      "step": 340
    },
    {
      "epoch": 0.12277227722772277,
      "grad_norm": 1.7731366157531738,
      "learning_rate": 0.0001944330897405257,
      "loss": 5.2389,
      "step": 341
    },
    {
      "epoch": 0.12313231323132313,
      "grad_norm": 2.857713460922241,
      "learning_rate": 0.00019439512553736394,
      "loss": 5.5584,
      "step": 342
    },
    {
      "epoch": 0.1234923492349235,
      "grad_norm": 1.5755183696746826,
      "learning_rate": 0.00019435703605499683,
      "loss": 5.0005,
      "step": 343
    },
    {
      "epoch": 0.12385238523852385,
      "grad_norm": 2.1318392753601074,
      "learning_rate": 0.00019431882134397598,
      "loss": 4.8136,
      "step": 344
    },
    {
      "epoch": 0.12421242124212421,
      "grad_norm": 2.3851094245910645,
      "learning_rate": 0.0001942804814550191,
      "loss": 5.2057,
      "step": 345
    },
    {
      "epoch": 0.12457245724572458,
      "grad_norm": 2.685013771057129,
      "learning_rate": 0.0001942420164390101,
      "loss": 5.5869,
      "step": 346
    },
    {
      "epoch": 0.12493249324932493,
      "grad_norm": 2.550307035446167,
      "learning_rate": 0.0001942034263469989,
      "loss": 5.068,
      "step": 347
    },
    {
      "epoch": 0.1252925292529253,
      "grad_norm": 2.6020565032958984,
      "learning_rate": 0.00019416471123020156,
      "loss": 5.5365,
      "step": 348
    },
    {
      "epoch": 0.12565256525652566,
      "grad_norm": 2.915011405944824,
      "learning_rate": 0.0001941258711399998,
      "loss": 5.5553,
      "step": 349
    },
    {
      "epoch": 0.126012601260126,
      "grad_norm": 3.451205015182495,
      "learning_rate": 0.00019408690612794148,
      "loss": 5.6268,
      "step": 350
    },
    {
      "epoch": 0.12637263726372638,
      "grad_norm": 4.140198707580566,
      "learning_rate": 0.00019404781624574011,
      "loss": 5.5532,
      "step": 351
    },
    {
      "epoch": 0.12673267326732673,
      "grad_norm": 2.5888521671295166,
      "learning_rate": 0.00019400860154527493,
      "loss": 5.0767,
      "step": 352
    },
    {
      "epoch": 0.12709270927092708,
      "grad_norm": 3.074704170227051,
      "learning_rate": 0.00019396926207859084,
      "loss": 5.1876,
      "step": 353
    },
    {
      "epoch": 0.12745274527452746,
      "grad_norm": 2.2525579929351807,
      "learning_rate": 0.0001939297978978984,
      "loss": 4.954,
      "step": 354
    },
    {
      "epoch": 0.1278127812781278,
      "grad_norm": 2.2643632888793945,
      "learning_rate": 0.0001938902090555736,
      "loss": 5.5216,
      "step": 355
    },
    {
      "epoch": 0.12817281728172816,
      "grad_norm": 2.2976651191711426,
      "learning_rate": 0.00019385049560415794,
      "loss": 5.3145,
      "step": 356
    },
    {
      "epoch": 0.12853285328532854,
      "grad_norm": 2.773254156112671,
      "learning_rate": 0.00019381065759635822,
      "loss": 5.2893,
      "step": 357
    },
    {
      "epoch": 0.1288928892889289,
      "grad_norm": 1.7148758172988892,
      "learning_rate": 0.0001937706950850466,
      "loss": 4.8919,
      "step": 358
    },
    {
      "epoch": 0.12925292529252924,
      "grad_norm": 1.769167184829712,
      "learning_rate": 0.00019373060812326052,
      "loss": 5.1463,
      "step": 359
    },
    {
      "epoch": 0.12961296129612962,
      "grad_norm": 2.7855257987976074,
      "learning_rate": 0.00019369039676420252,
      "loss": 5.3797,
      "step": 360
    },
    {
      "epoch": 0.12997299729972997,
      "grad_norm": 1.7530555725097656,
      "learning_rate": 0.00019365006106124028,
      "loss": 5.0662,
      "step": 361
    },
    {
      "epoch": 0.13033303330333032,
      "grad_norm": 1.963545560836792,
      "learning_rate": 0.00019360960106790643,
      "loss": 5.0004,
      "step": 362
    },
    {
      "epoch": 0.1306930693069307,
      "grad_norm": 1.887229561805725,
      "learning_rate": 0.0001935690168378987,
      "loss": 5.1733,
      "step": 363
    },
    {
      "epoch": 0.13105310531053105,
      "grad_norm": 1.974086046218872,
      "learning_rate": 0.00019352830842507958,
      "loss": 5.1651,
      "step": 364
    },
    {
      "epoch": 0.13141314131413143,
      "grad_norm": 2.0009772777557373,
      "learning_rate": 0.00019348747588347637,
      "loss": 5.1493,
      "step": 365
    },
    {
      "epoch": 0.13177317731773178,
      "grad_norm": 1.8167927265167236,
      "learning_rate": 0.0001934465192672812,
      "loss": 4.9338,
      "step": 366
    },
    {
      "epoch": 0.13213321332133213,
      "grad_norm": 1.8077260255813599,
      "learning_rate": 0.0001934054386308508,
      "loss": 4.8103,
      "step": 367
    },
    {
      "epoch": 0.1324932493249325,
      "grad_norm": 2.6427648067474365,
      "learning_rate": 0.00019336423402870653,
      "loss": 5.4321,
      "step": 368
    },
    {
      "epoch": 0.13285328532853286,
      "grad_norm": 2.079245090484619,
      "learning_rate": 0.00019332290551553425,
      "loss": 5.3425,
      "step": 369
    },
    {
      "epoch": 0.1332133213321332,
      "grad_norm": 1.9009953737258911,
      "learning_rate": 0.00019328145314618432,
      "loss": 5.3169,
      "step": 370
    },
    {
      "epoch": 0.13357335733573358,
      "grad_norm": 1.8705310821533203,
      "learning_rate": 0.0001932398769756714,
      "loss": 4.9403,
      "step": 371
    },
    {
      "epoch": 0.13393339333933393,
      "grad_norm": 1.8321329355239868,
      "learning_rate": 0.0001931981770591745,
      "loss": 5.1256,
      "step": 372
    },
    {
      "epoch": 0.13429342934293428,
      "grad_norm": 2.4803407192230225,
      "learning_rate": 0.0001931563534520369,
      "loss": 5.7172,
      "step": 373
    },
    {
      "epoch": 0.13465346534653466,
      "grad_norm": 2.4806675910949707,
      "learning_rate": 0.00019311440620976597,
      "loss": 5.2023,
      "step": 374
    },
    {
      "epoch": 0.135013501350135,
      "grad_norm": 2.764505386352539,
      "learning_rate": 0.00019307233538803323,
      "loss": 5.7188,
      "step": 375
    },
    {
      "epoch": 0.13537353735373536,
      "grad_norm": 3.1487159729003906,
      "learning_rate": 0.0001930301410426741,
      "loss": 5.4085,
      "step": 376
    },
    {
      "epoch": 0.13573357335733574,
      "grad_norm": 4.365496635437012,
      "learning_rate": 0.00019298782322968815,
      "loss": 5.181,
      "step": 377
    },
    {
      "epoch": 0.1360936093609361,
      "grad_norm": 4.676678657531738,
      "learning_rate": 0.0001929453820052386,
      "loss": 4.8029,
      "step": 378
    },
    {
      "epoch": 0.13645364536453644,
      "grad_norm": 2.482147455215454,
      "learning_rate": 0.00019290281742565256,
      "loss": 5.2323,
      "step": 379
    },
    {
      "epoch": 0.13681368136813682,
      "grad_norm": 2.6805992126464844,
      "learning_rate": 0.0001928601295474208,
      "loss": 4.7856,
      "step": 380
    },
    {
      "epoch": 0.13717371737173717,
      "grad_norm": 2.0542752742767334,
      "learning_rate": 0.00019281731842719782,
      "loss": 4.8101,
      "step": 381
    },
    {
      "epoch": 0.13753375337533755,
      "grad_norm": 1.3544633388519287,
      "learning_rate": 0.0001927743841218016,
      "loss": 4.9205,
      "step": 382
    },
    {
      "epoch": 0.1378937893789379,
      "grad_norm": 1.80148184299469,
      "learning_rate": 0.00019273132668821364,
      "loss": 5.2472,
      "step": 383
    },
    {
      "epoch": 0.13825382538253825,
      "grad_norm": 1.7780407667160034,
      "learning_rate": 0.00019268814618357886,
      "loss": 5.1249,
      "step": 384
    },
    {
      "epoch": 0.13861386138613863,
      "grad_norm": 1.7747406959533691,
      "learning_rate": 0.00019264484266520547,
      "loss": 5.0265,
      "step": 385
    },
    {
      "epoch": 0.13897389738973898,
      "grad_norm": 2.0032706260681152,
      "learning_rate": 0.00019260141619056507,
      "loss": 4.8149,
      "step": 386
    },
    {
      "epoch": 0.13933393339333933,
      "grad_norm": 1.8248376846313477,
      "learning_rate": 0.00019255786681729225,
      "loss": 5.0561,
      "step": 387
    },
    {
      "epoch": 0.1396939693969397,
      "grad_norm": 2.1387648582458496,
      "learning_rate": 0.0001925141946031849,
      "loss": 5.2459,
      "step": 388
    },
    {
      "epoch": 0.14005400540054005,
      "grad_norm": 1.6618605852127075,
      "learning_rate": 0.0001924703996062038,
      "loss": 5.5427,
      "step": 389
    },
    {
      "epoch": 0.1404140414041404,
      "grad_norm": 1.8091217279434204,
      "learning_rate": 0.00019242648188447272,
      "loss": 5.3355,
      "step": 390
    },
    {
      "epoch": 0.14077407740774078,
      "grad_norm": 1.5371900796890259,
      "learning_rate": 0.0001923824414962784,
      "loss": 5.0075,
      "step": 391
    },
    {
      "epoch": 0.14113411341134113,
      "grad_norm": 1.6160904169082642,
      "learning_rate": 0.00019233827850007027,
      "loss": 5.0782,
      "step": 392
    },
    {
      "epoch": 0.14149414941494148,
      "grad_norm": 2.6607415676116943,
      "learning_rate": 0.0001922939929544605,
      "loss": 4.9883,
      "step": 393
    },
    {
      "epoch": 0.14185418541854186,
      "grad_norm": 2.1520042419433594,
      "learning_rate": 0.00019224958491822396,
      "loss": 4.8165,
      "step": 394
    },
    {
      "epoch": 0.1422142214221422,
      "grad_norm": 1.8465698957443237,
      "learning_rate": 0.000192205054450298,
      "loss": 4.9328,
      "step": 395
    },
    {
      "epoch": 0.14257425742574256,
      "grad_norm": 2.0882647037506104,
      "learning_rate": 0.00019216040160978262,
      "loss": 5.0565,
      "step": 396
    },
    {
      "epoch": 0.14293429342934294,
      "grad_norm": 1.8768270015716553,
      "learning_rate": 0.00019211562645594002,
      "loss": 4.9836,
      "step": 397
    },
    {
      "epoch": 0.1432943294329433,
      "grad_norm": 2.7058422565460205,
      "learning_rate": 0.00019207072904819486,
      "loss": 5.5061,
      "step": 398
    },
    {
      "epoch": 0.14365436543654364,
      "grad_norm": 2.3394227027893066,
      "learning_rate": 0.000192025709446134,
      "loss": 5.3377,
      "step": 399
    },
    {
      "epoch": 0.14401440144014402,
      "grad_norm": 2.7763686180114746,
      "learning_rate": 0.00019198056770950656,
      "loss": 5.6518,
      "step": 400
    },
    {
      "epoch": 0.14437443744374437,
      "grad_norm": 3.3545148372650146,
      "learning_rate": 0.00019193530389822363,
      "loss": 5.2922,
      "step": 401
    },
    {
      "epoch": 0.14473447344734475,
      "grad_norm": 1.994736671447754,
      "learning_rate": 0.00019188991807235844,
      "loss": 5.0987,
      "step": 402
    },
    {
      "epoch": 0.1450945094509451,
      "grad_norm": 2.1159956455230713,
      "learning_rate": 0.00019184441029214608,
      "loss": 4.9078,
      "step": 403
    },
    {
      "epoch": 0.14545454545454545,
      "grad_norm": 1.9915326833724976,
      "learning_rate": 0.00019179878061798347,
      "loss": 4.8249,
      "step": 404
    },
    {
      "epoch": 0.14581458145814583,
      "grad_norm": 2.081965208053589,
      "learning_rate": 0.00019175302911042936,
      "loss": 4.949,
      "step": 405
    },
    {
      "epoch": 0.14617461746174618,
      "grad_norm": 1.8846912384033203,
      "learning_rate": 0.0001917071558302042,
      "loss": 4.7634,
      "step": 406
    },
    {
      "epoch": 0.14653465346534653,
      "grad_norm": 2.0804243087768555,
      "learning_rate": 0.00019166116083819002,
      "loss": 5.2972,
      "step": 407
    },
    {
      "epoch": 0.1468946894689469,
      "grad_norm": 1.6286754608154297,
      "learning_rate": 0.0001916150441954304,
      "loss": 5.024,
      "step": 408
    },
    {
      "epoch": 0.14725472547254725,
      "grad_norm": 1.9272427558898926,
      "learning_rate": 0.00019156880596313033,
      "loss": 4.902,
      "step": 409
    },
    {
      "epoch": 0.1476147614761476,
      "grad_norm": 2.1995272636413574,
      "learning_rate": 0.0001915224462026563,
      "loss": 5.2471,
      "step": 410
    },
    {
      "epoch": 0.14797479747974798,
      "grad_norm": 1.864698052406311,
      "learning_rate": 0.0001914759649755359,
      "loss": 5.1977,
      "step": 411
    },
    {
      "epoch": 0.14833483348334833,
      "grad_norm": 1.937286615371704,
      "learning_rate": 0.0001914293623434581,
      "loss": 4.7627,
      "step": 412
    },
    {
      "epoch": 0.14869486948694868,
      "grad_norm": 1.9285743236541748,
      "learning_rate": 0.00019138263836827288,
      "loss": 4.8136,
      "step": 413
    },
    {
      "epoch": 0.14905490549054906,
      "grad_norm": 1.9823040962219238,
      "learning_rate": 0.00019133579311199133,
      "loss": 5.1382,
      "step": 414
    },
    {
      "epoch": 0.1494149414941494,
      "grad_norm": 1.8620997667312622,
      "learning_rate": 0.00019128882663678546,
      "loss": 4.9124,
      "step": 415
    },
    {
      "epoch": 0.14977497749774976,
      "grad_norm": 2.0542571544647217,
      "learning_rate": 0.00019124173900498818,
      "loss": 4.6567,
      "step": 416
    },
    {
      "epoch": 0.15013501350135014,
      "grad_norm": 1.7971733808517456,
      "learning_rate": 0.00019119453027909323,
      "loss": 4.8622,
      "step": 417
    },
    {
      "epoch": 0.1504950495049505,
      "grad_norm": 1.7473721504211426,
      "learning_rate": 0.00019114720052175498,
      "loss": 5.1331,
      "step": 418
    },
    {
      "epoch": 0.15085508550855087,
      "grad_norm": 2.0732085704803467,
      "learning_rate": 0.0001910997497957885,
      "loss": 5.1333,
      "step": 419
    },
    {
      "epoch": 0.15121512151215122,
      "grad_norm": 1.8084912300109863,
      "learning_rate": 0.0001910521781641694,
      "loss": 4.8632,
      "step": 420
    },
    {
      "epoch": 0.15157515751575157,
      "grad_norm": 2.067760705947876,
      "learning_rate": 0.0001910044856900337,
      "loss": 5.2845,
      "step": 421
    },
    {
      "epoch": 0.15193519351935195,
      "grad_norm": 2.5814943313598633,
      "learning_rate": 0.0001909566724366779,
      "loss": 5.398,
      "step": 422
    },
    {
      "epoch": 0.1522952295229523,
      "grad_norm": 2.2594242095947266,
      "learning_rate": 0.0001909087384675587,
      "loss": 5.25,
      "step": 423
    },
    {
      "epoch": 0.15265526552655265,
      "grad_norm": 2.8528106212615967,
      "learning_rate": 0.000190860683846293,
      "loss": 5.6809,
      "step": 424
    },
    {
      "epoch": 0.15301530153015303,
      "grad_norm": 2.9201619625091553,
      "learning_rate": 0.00019081250863665794,
      "loss": 5.5664,
      "step": 425
    },
    {
      "epoch": 0.15337533753375338,
      "grad_norm": 3.6063146591186523,
      "learning_rate": 0.00019076421290259058,
      "loss": 5.3827,
      "step": 426
    },
    {
      "epoch": 0.15373537353735373,
      "grad_norm": 2.526179313659668,
      "learning_rate": 0.00019071579670818808,
      "loss": 5.106,
      "step": 427
    },
    {
      "epoch": 0.1540954095409541,
      "grad_norm": 2.3812053203582764,
      "learning_rate": 0.00019066726011770726,
      "loss": 4.8439,
      "step": 428
    },
    {
      "epoch": 0.15445544554455445,
      "grad_norm": 2.1316640377044678,
      "learning_rate": 0.00019061860319556496,
      "loss": 5.3182,
      "step": 429
    },
    {
      "epoch": 0.1548154815481548,
      "grad_norm": 2.0962913036346436,
      "learning_rate": 0.00019056982600633755,
      "loss": 4.9082,
      "step": 430
    },
    {
      "epoch": 0.15517551755175518,
      "grad_norm": 1.7051233053207397,
      "learning_rate": 0.0001905209286147611,
      "loss": 5.1091,
      "step": 431
    },
    {
      "epoch": 0.15553555355535553,
      "grad_norm": 1.8145262002944946,
      "learning_rate": 0.00019047191108573125,
      "loss": 4.7386,
      "step": 432
    },
    {
      "epoch": 0.15589558955895588,
      "grad_norm": 2.0659284591674805,
      "learning_rate": 0.00019042277348430288,
      "loss": 5.0365,
      "step": 433
    },
    {
      "epoch": 0.15625562556255626,
      "grad_norm": 2.1042418479919434,
      "learning_rate": 0.0001903735158756905,
      "loss": 5.0352,
      "step": 434
    },
    {
      "epoch": 0.1566156615661566,
      "grad_norm": 1.6454683542251587,
      "learning_rate": 0.00019032413832526773,
      "loss": 4.6965,
      "step": 435
    },
    {
      "epoch": 0.156975697569757,
      "grad_norm": 2.451984167098999,
      "learning_rate": 0.00019027464089856736,
      "loss": 5.3104,
      "step": 436
    },
    {
      "epoch": 0.15733573357335734,
      "grad_norm": 1.5949114561080933,
      "learning_rate": 0.00019022502366128135,
      "loss": 4.9479,
      "step": 437
    },
    {
      "epoch": 0.1576957695769577,
      "grad_norm": 1.6969925165176392,
      "learning_rate": 0.00019017528667926068,
      "loss": 5.0748,
      "step": 438
    },
    {
      "epoch": 0.15805580558055807,
      "grad_norm": 1.8219711780548096,
      "learning_rate": 0.00019012543001851518,
      "loss": 4.8468,
      "step": 439
    },
    {
      "epoch": 0.15841584158415842,
      "grad_norm": 1.819425344467163,
      "learning_rate": 0.00019007545374521355,
      "loss": 4.6628,
      "step": 440
    },
    {
      "epoch": 0.15877587758775877,
      "grad_norm": 1.949058175086975,
      "learning_rate": 0.0001900253579256832,
      "loss": 5.1057,
      "step": 441
    },
    {
      "epoch": 0.15913591359135915,
      "grad_norm": 1.5444921255111694,
      "learning_rate": 0.00018997514262641035,
      "loss": 5.0297,
      "step": 442
    },
    {
      "epoch": 0.1594959495949595,
      "grad_norm": 1.449641227722168,
      "learning_rate": 0.00018992480791403958,
      "loss": 4.9619,
      "step": 443
    },
    {
      "epoch": 0.15985598559855985,
      "grad_norm": 2.433344841003418,
      "learning_rate": 0.00018987435385537404,
      "loss": 5.3115,
      "step": 444
    },
    {
      "epoch": 0.16021602160216022,
      "grad_norm": 1.7659337520599365,
      "learning_rate": 0.00018982378051737538,
      "loss": 4.9919,
      "step": 445
    },
    {
      "epoch": 0.16057605760576057,
      "grad_norm": 1.6800904273986816,
      "learning_rate": 0.0001897730879671634,
      "loss": 4.7254,
      "step": 446
    },
    {
      "epoch": 0.16093609360936093,
      "grad_norm": 3.3446006774902344,
      "learning_rate": 0.00018972227627201617,
      "loss": 5.5292,
      "step": 447
    },
    {
      "epoch": 0.1612961296129613,
      "grad_norm": 3.71976375579834,
      "learning_rate": 0.0001896713454993699,
      "loss": 5.5853,
      "step": 448
    },
    {
      "epoch": 0.16165616561656165,
      "grad_norm": 2.7482266426086426,
      "learning_rate": 0.00018962029571681886,
      "loss": 5.6693,
      "step": 449
    },
    {
      "epoch": 0.162016201620162,
      "grad_norm": 3.5169460773468018,
      "learning_rate": 0.00018956912699211517,
      "loss": 5.8031,
      "step": 450
    },
    {
      "epoch": 0.16237623762376238,
      "grad_norm": 3.7753636837005615,
      "learning_rate": 0.00018951783939316893,
      "loss": 4.818,
      "step": 451
    },
    {
      "epoch": 0.16273627362736273,
      "grad_norm": 2.4180219173431396,
      "learning_rate": 0.00018946643298804793,
      "loss": 4.6077,
      "step": 452
    },
    {
      "epoch": 0.1630963096309631,
      "grad_norm": 2.407536745071411,
      "learning_rate": 0.0001894149078449777,
      "loss": 5.2159,
      "step": 453
    },
    {
      "epoch": 0.16345634563456346,
      "grad_norm": 1.8214914798736572,
      "learning_rate": 0.00018936326403234125,
      "loss": 5.1427,
      "step": 454
    },
    {
      "epoch": 0.1638163816381638,
      "grad_norm": 1.9131064414978027,
      "learning_rate": 0.00018931150161867916,
      "loss": 4.7128,
      "step": 455
    },
    {
      "epoch": 0.1641764176417642,
      "grad_norm": 1.8776406049728394,
      "learning_rate": 0.00018925962067268946,
      "loss": 4.9725,
      "step": 456
    },
    {
      "epoch": 0.16453645364536454,
      "grad_norm": 2.3590633869171143,
      "learning_rate": 0.0001892076212632274,
      "loss": 4.8201,
      "step": 457
    },
    {
      "epoch": 0.1648964896489649,
      "grad_norm": 2.2605080604553223,
      "learning_rate": 0.0001891555034593055,
      "loss": 4.8645,
      "step": 458
    },
    {
      "epoch": 0.16525652565256527,
      "grad_norm": 1.9868052005767822,
      "learning_rate": 0.00018910326733009337,
      "loss": 5.4631,
      "step": 459
    },
    {
      "epoch": 0.16561656165616562,
      "grad_norm": 1.6422542333602905,
      "learning_rate": 0.00018905091294491776,
      "loss": 5.1917,
      "step": 460
    },
    {
      "epoch": 0.16597659765976597,
      "grad_norm": 1.5032970905303955,
      "learning_rate": 0.00018899844037326225,
      "loss": 4.7756,
      "step": 461
    },
    {
      "epoch": 0.16633663366336635,
      "grad_norm": 1.892916202545166,
      "learning_rate": 0.00018894584968476733,
      "loss": 4.9898,
      "step": 462
    },
    {
      "epoch": 0.1666966696669667,
      "grad_norm": 2.54939341545105,
      "learning_rate": 0.00018889314094923024,
      "loss": 5.1087,
      "step": 463
    },
    {
      "epoch": 0.16705670567056705,
      "grad_norm": 1.70566987991333,
      "learning_rate": 0.0001888403142366049,
      "loss": 4.8872,
      "step": 464
    },
    {
      "epoch": 0.16741674167416742,
      "grad_norm": 1.6707229614257812,
      "learning_rate": 0.00018878736961700182,
      "loss": 5.0393,
      "step": 465
    },
    {
      "epoch": 0.16777677767776777,
      "grad_norm": 2.0254170894622803,
      "learning_rate": 0.00018873430716068792,
      "loss": 5.0949,
      "step": 466
    },
    {
      "epoch": 0.16813681368136812,
      "grad_norm": 1.7791615724563599,
      "learning_rate": 0.00018868112693808665,
      "loss": 5.0134,
      "step": 467
    },
    {
      "epoch": 0.1684968496849685,
      "grad_norm": 2.100054979324341,
      "learning_rate": 0.00018862782901977754,
      "loss": 5.0241,
      "step": 468
    },
    {
      "epoch": 0.16885688568856885,
      "grad_norm": 2.047647714614868,
      "learning_rate": 0.0001885744134764966,
      "loss": 5.1617,
      "step": 469
    },
    {
      "epoch": 0.1692169216921692,
      "grad_norm": 1.9202643632888794,
      "learning_rate": 0.00018852088037913577,
      "loss": 5.1913,
      "step": 470
    },
    {
      "epoch": 0.16957695769576958,
      "grad_norm": 1.5470832586288452,
      "learning_rate": 0.00018846722979874297,
      "loss": 4.935,
      "step": 471
    },
    {
      "epoch": 0.16993699369936993,
      "grad_norm": 1.6920344829559326,
      "learning_rate": 0.00018841346180652213,
      "loss": 5.2467,
      "step": 472
    },
    {
      "epoch": 0.1702970297029703,
      "grad_norm": 2.0190439224243164,
      "learning_rate": 0.00018835957647383303,
      "loss": 5.4482,
      "step": 473
    },
    {
      "epoch": 0.17065706570657066,
      "grad_norm": 2.1805849075317383,
      "learning_rate": 0.0001883055738721911,
      "loss": 5.2908,
      "step": 474
    },
    {
      "epoch": 0.171017101710171,
      "grad_norm": 2.504727363586426,
      "learning_rate": 0.00018825145407326747,
      "loss": 5.1893,
      "step": 475
    },
    {
      "epoch": 0.1713771377137714,
      "grad_norm": 3.133061647415161,
      "learning_rate": 0.00018819721714888877,
      "loss": 5.0389,
      "step": 476
    },
    {
      "epoch": 0.17173717371737174,
      "grad_norm": 2.387793779373169,
      "learning_rate": 0.00018814286317103714,
      "loss": 4.963,
      "step": 477
    },
    {
      "epoch": 0.1720972097209721,
      "grad_norm": 2.892192840576172,
      "learning_rate": 0.00018808839221184999,
      "loss": 5.3117,
      "step": 478
    },
    {
      "epoch": 0.17245724572457247,
      "grad_norm": 2.097123861312866,
      "learning_rate": 0.00018803380434362,
      "loss": 5.2016,
      "step": 479
    },
    {
      "epoch": 0.17281728172817282,
      "grad_norm": 1.3749529123306274,
      "learning_rate": 0.00018797909963879503,
      "loss": 4.8276,
      "step": 480
    },
    {
      "epoch": 0.17317731773177317,
      "grad_norm": 2.303584575653076,
      "learning_rate": 0.00018792427816997803,
      "loss": 5.1623,
      "step": 481
    },
    {
      "epoch": 0.17353735373537355,
      "grad_norm": 4.135527610778809,
      "learning_rate": 0.00018786934000992688,
      "loss": 4.7037,
      "step": 482
    },
    {
      "epoch": 0.1738973897389739,
      "grad_norm": 1.7476530075073242,
      "learning_rate": 0.00018781428523155435,
      "loss": 4.74,
      "step": 483
    },
    {
      "epoch": 0.17425742574257425,
      "grad_norm": 1.6134731769561768,
      "learning_rate": 0.00018775911390792795,
      "loss": 5.1493,
      "step": 484
    },
    {
      "epoch": 0.17461746174617462,
      "grad_norm": 1.7763808965682983,
      "learning_rate": 0.00018770382611226987,
      "loss": 4.8619,
      "step": 485
    },
    {
      "epoch": 0.17497749774977497,
      "grad_norm": 1.4742817878723145,
      "learning_rate": 0.00018764842191795698,
      "loss": 4.8855,
      "step": 486
    },
    {
      "epoch": 0.17533753375337532,
      "grad_norm": 1.5574274063110352,
      "learning_rate": 0.00018759290139852048,
      "loss": 5.187,
      "step": 487
    },
    {
      "epoch": 0.1756975697569757,
      "grad_norm": 1.4653335809707642,
      "learning_rate": 0.000187537264627646,
      "loss": 4.8548,
      "step": 488
    },
    {
      "epoch": 0.17605760576057605,
      "grad_norm": 1.5503312349319458,
      "learning_rate": 0.0001874815116791736,
      "loss": 5.0649,
      "step": 489
    },
    {
      "epoch": 0.17641764176417643,
      "grad_norm": 1.7193348407745361,
      "learning_rate": 0.00018742564262709725,
      "loss": 4.667,
      "step": 490
    },
    {
      "epoch": 0.17677767776777678,
      "grad_norm": 1.4068859815597534,
      "learning_rate": 0.00018736965754556528,
      "loss": 4.5735,
      "step": 491
    },
    {
      "epoch": 0.17713771377137713,
      "grad_norm": 2.0566108226776123,
      "learning_rate": 0.00018731355650887985,
      "loss": 5.4348,
      "step": 492
    },
    {
      "epoch": 0.1774977497749775,
      "grad_norm": 1.428653359413147,
      "learning_rate": 0.00018725733959149712,
      "loss": 4.8304,
      "step": 493
    },
    {
      "epoch": 0.17785778577857786,
      "grad_norm": 2.3756580352783203,
      "learning_rate": 0.00018720100686802694,
      "loss": 5.3095,
      "step": 494
    },
    {
      "epoch": 0.1782178217821782,
      "grad_norm": 1.7651077508926392,
      "learning_rate": 0.00018714455841323287,
      "loss": 5.0266,
      "step": 495
    },
    {
      "epoch": 0.1785778577857786,
      "grad_norm": 2.093381643295288,
      "learning_rate": 0.00018708799430203218,
      "loss": 5.1646,
      "step": 496
    },
    {
      "epoch": 0.17893789378937894,
      "grad_norm": 1.4486280679702759,
      "learning_rate": 0.00018703131460949554,
      "loss": 4.9302,
      "step": 497
    },
    {
      "epoch": 0.1792979297929793,
      "grad_norm": 2.9753105640411377,
      "learning_rate": 0.000186974519410847,
      "loss": 5.7727,
      "step": 498
    },
    {
      "epoch": 0.17965796579657967,
      "grad_norm": 3.126038074493408,
      "learning_rate": 0.00018691760878146395,
      "loss": 5.5132,
      "step": 499
    },
    {
      "epoch": 0.18001800180018002,
      "grad_norm": 2.3378102779388428,
      "learning_rate": 0.00018686058279687698,
      "loss": 5.5611,
      "step": 500
    },
    {
      "epoch": 0.18037803780378037,
      "grad_norm": 3.1730830669403076,
      "learning_rate": 0.0001868034415327698,
      "loss": 5.0738,
      "step": 501
    },
    {
      "epoch": 0.18073807380738074,
      "grad_norm": 2.214451789855957,
      "learning_rate": 0.000186746185064979,
      "loss": 4.9771,
      "step": 502
    },
    {
      "epoch": 0.1810981098109811,
      "grad_norm": 2.729839324951172,
      "learning_rate": 0.00018668881346949417,
      "loss": 5.0892,
      "step": 503
    },
    {
      "epoch": 0.18145814581458145,
      "grad_norm": 1.8582335710525513,
      "learning_rate": 0.00018663132682245772,
      "loss": 4.8183,
      "step": 504
    },
    {
      "epoch": 0.18181818181818182,
      "grad_norm": 1.577508807182312,
      "learning_rate": 0.0001865737252001647,
      "loss": 4.7524,
      "step": 505
    },
    {
      "epoch": 0.18217821782178217,
      "grad_norm": 1.4923256635665894,
      "learning_rate": 0.00018651600867906272,
      "loss": 4.7477,
      "step": 506
    },
    {
      "epoch": 0.18253825382538255,
      "grad_norm": 1.3686710596084595,
      "learning_rate": 0.00018645817733575193,
      "loss": 4.8028,
      "step": 507
    },
    {
      "epoch": 0.1828982898289829,
      "grad_norm": 1.701985239982605,
      "learning_rate": 0.00018640023124698486,
      "loss": 4.9122,
      "step": 508
    },
    {
      "epoch": 0.18325832583258325,
      "grad_norm": 1.4300390481948853,
      "learning_rate": 0.00018634217048966637,
      "loss": 4.9376,
      "step": 509
    },
    {
      "epoch": 0.18361836183618363,
      "grad_norm": 2.832374095916748,
      "learning_rate": 0.0001862839951408534,
      "loss": 5.0504,
      "step": 510
    },
    {
      "epoch": 0.18397839783978398,
      "grad_norm": 1.3927937746047974,
      "learning_rate": 0.0001862257052777551,
      "loss": 4.7502,
      "step": 511
    },
    {
      "epoch": 0.18433843384338433,
      "grad_norm": 2.047490358352661,
      "learning_rate": 0.0001861673009777325,
      "loss": 4.6108,
      "step": 512
    },
    {
      "epoch": 0.1846984698469847,
      "grad_norm": 1.9925168752670288,
      "learning_rate": 0.00018610878231829854,
      "loss": 4.907,
      "step": 513
    },
    {
      "epoch": 0.18505850585058506,
      "grad_norm": 3.167588710784912,
      "learning_rate": 0.00018605014937711796,
      "loss": 5.2115,
      "step": 514
    },
    {
      "epoch": 0.1854185418541854,
      "grad_norm": 1.751932144165039,
      "learning_rate": 0.00018599140223200716,
      "loss": 5.0556,
      "step": 515
    },
    {
      "epoch": 0.1857785778577858,
      "grad_norm": 1.883099913597107,
      "learning_rate": 0.0001859325409609341,
      "loss": 5.3705,
      "step": 516
    },
    {
      "epoch": 0.18613861386138614,
      "grad_norm": 1.7942845821380615,
      "learning_rate": 0.00018587356564201817,
      "loss": 5.2473,
      "step": 517
    },
    {
      "epoch": 0.1864986498649865,
      "grad_norm": 2.2205777168273926,
      "learning_rate": 0.0001858144763535302,
      "loss": 4.8881,
      "step": 518
    },
    {
      "epoch": 0.18685868586858687,
      "grad_norm": 1.5663084983825684,
      "learning_rate": 0.0001857552731738922,
      "loss": 5.0613,
      "step": 519
    },
    {
      "epoch": 0.18721872187218722,
      "grad_norm": 1.7280100584030151,
      "learning_rate": 0.00018569595618167745,
      "loss": 4.5994,
      "step": 520
    },
    {
      "epoch": 0.18757875787578757,
      "grad_norm": 1.6075050830841064,
      "learning_rate": 0.00018563652545561013,
      "loss": 5.1569,
      "step": 521
    },
    {
      "epoch": 0.18793879387938794,
      "grad_norm": 1.9055190086364746,
      "learning_rate": 0.00018557698107456549,
      "loss": 5.1687,
      "step": 522
    },
    {
      "epoch": 0.1882988298829883,
      "grad_norm": 2.2929351329803467,
      "learning_rate": 0.00018551732311756952,
      "loss": 5.1761,
      "step": 523
    },
    {
      "epoch": 0.18865886588658867,
      "grad_norm": 2.500159502029419,
      "learning_rate": 0.000185457551663799,
      "loss": 5.6979,
      "step": 524
    },
    {
      "epoch": 0.18901890189018902,
      "grad_norm": 2.326322555541992,
      "learning_rate": 0.00018539766679258134,
      "loss": 5.1247,
      "step": 525
    },
    {
      "epoch": 0.18937893789378937,
      "grad_norm": 6.278741359710693,
      "learning_rate": 0.0001853376685833945,
      "loss": 4.9737,
      "step": 526
    },
    {
      "epoch": 0.18973897389738975,
      "grad_norm": 2.955718994140625,
      "learning_rate": 0.00018527755711586678,
      "loss": 5.1884,
      "step": 527
    },
    {
      "epoch": 0.1900990099009901,
      "grad_norm": 1.8568588495254517,
      "learning_rate": 0.0001852173324697769,
      "loss": 5.0796,
      "step": 528
    },
    {
      "epoch": 0.19045904590459045,
      "grad_norm": 1.538630723953247,
      "learning_rate": 0.00018515699472505364,
      "loss": 4.8754,
      "step": 529
    },
    {
      "epoch": 0.19081908190819083,
      "grad_norm": 1.7481509447097778,
      "learning_rate": 0.00018509654396177609,
      "loss": 5.1271,
      "step": 530
    },
    {
      "epoch": 0.19117911791179118,
      "grad_norm": 1.5548195838928223,
      "learning_rate": 0.00018503598026017312,
      "loss": 4.8736,
      "step": 531
    },
    {
      "epoch": 0.19153915391539153,
      "grad_norm": 1.5747162103652954,
      "learning_rate": 0.00018497530370062363,
      "loss": 4.922,
      "step": 532
    },
    {
      "epoch": 0.1918991899189919,
      "grad_norm": 1.2808769941329956,
      "learning_rate": 0.00018491451436365627,
      "loss": 4.6306,
      "step": 533
    },
    {
      "epoch": 0.19225922592259226,
      "grad_norm": 1.9267456531524658,
      "learning_rate": 0.00018485361232994932,
      "loss": 5.0321,
      "step": 534
    },
    {
      "epoch": 0.1926192619261926,
      "grad_norm": 1.491786241531372,
      "learning_rate": 0.0001847925976803307,
      "loss": 4.7556,
      "step": 535
    },
    {
      "epoch": 0.192979297929793,
      "grad_norm": 1.279565691947937,
      "learning_rate": 0.00018473147049577774,
      "loss": 4.8633,
      "step": 536
    },
    {
      "epoch": 0.19333933393339334,
      "grad_norm": 1.7372647523880005,
      "learning_rate": 0.00018467023085741717,
      "loss": 5.1584,
      "step": 537
    },
    {
      "epoch": 0.1936993699369937,
      "grad_norm": 1.3435121774673462,
      "learning_rate": 0.0001846088788465249,
      "loss": 4.7903,
      "step": 538
    },
    {
      "epoch": 0.19405940594059407,
      "grad_norm": 1.2509280443191528,
      "learning_rate": 0.00018454741454452603,
      "loss": 4.8515,
      "step": 539
    },
    {
      "epoch": 0.19441944194419442,
      "grad_norm": 1.8514949083328247,
      "learning_rate": 0.0001844858380329947,
      "loss": 4.8086,
      "step": 540
    },
    {
      "epoch": 0.19477947794779477,
      "grad_norm": 1.8898829221725464,
      "learning_rate": 0.00018442414939365387,
      "loss": 4.7823,
      "step": 541
    },
    {
      "epoch": 0.19513951395139514,
      "grad_norm": 1.3015843629837036,
      "learning_rate": 0.00018436234870837547,
      "loss": 4.6961,
      "step": 542
    },
    {
      "epoch": 0.1954995499549955,
      "grad_norm": 1.529765248298645,
      "learning_rate": 0.00018430043605918006,
      "loss": 4.7827,
      "step": 543
    },
    {
      "epoch": 0.19585958595859587,
      "grad_norm": 1.641842007637024,
      "learning_rate": 0.00018423841152823673,
      "loss": 5.2028,
      "step": 544
    },
    {
      "epoch": 0.19621962196219622,
      "grad_norm": 1.861567497253418,
      "learning_rate": 0.00018417627519786315,
      "loss": 5.013,
      "step": 545
    },
    {
      "epoch": 0.19657965796579657,
      "grad_norm": 1.7786455154418945,
      "learning_rate": 0.00018411402715052538,
      "loss": 5.1648,
      "step": 546
    },
    {
      "epoch": 0.19693969396939695,
      "grad_norm": 1.5268160104751587,
      "learning_rate": 0.00018405166746883762,
      "loss": 5.0003,
      "step": 547
    },
    {
      "epoch": 0.1972997299729973,
      "grad_norm": 1.5869791507720947,
      "learning_rate": 0.00018398919623556238,
      "loss": 5.0791,
      "step": 548
    },
    {
      "epoch": 0.19765976597659765,
      "grad_norm": 1.8240504264831543,
      "learning_rate": 0.00018392661353361015,
      "loss": 5.1986,
      "step": 549
    },
    {
      "epoch": 0.19801980198019803,
      "grad_norm": 1.9763150215148926,
      "learning_rate": 0.00018386391944603934,
      "loss": 5.4118,
      "step": 550
    },
    {
      "epoch": 0.19837983798379838,
      "grad_norm": 3.0309135913848877,
      "learning_rate": 0.0001838011140560562,
      "loss": 5.1273,
      "step": 551
    },
    {
      "epoch": 0.19873987398739873,
      "grad_norm": 2.879659414291382,
      "learning_rate": 0.00018373819744701476,
      "loss": 4.9732,
      "step": 552
    },
    {
      "epoch": 0.1990999099909991,
      "grad_norm": 2.4230196475982666,
      "learning_rate": 0.00018367516970241657,
      "loss": 4.8701,
      "step": 553
    },
    {
      "epoch": 0.19945994599459946,
      "grad_norm": 1.527785301208496,
      "learning_rate": 0.00018361203090591071,
      "loss": 4.9533,
      "step": 554
    },
    {
      "epoch": 0.1998199819981998,
      "grad_norm": 2.2586495876312256,
      "learning_rate": 0.00018354878114129367,
      "loss": 5.0682,
      "step": 555
    },
    {
      "epoch": 0.2001800180018002,
      "grad_norm": 1.911534309387207,
      "learning_rate": 0.00018348542049250916,
      "loss": 4.5963,
      "step": 556
    },
    {
      "epoch": 0.20054005400540054,
      "grad_norm": 1.68331778049469,
      "learning_rate": 0.00018342194904364813,
      "loss": 4.9191,
      "step": 557
    },
    {
      "epoch": 0.2009000900090009,
      "grad_norm": 1.540486216545105,
      "learning_rate": 0.00018335836687894853,
      "loss": 5.1383,
      "step": 558
    },
    {
      "epoch": 0.20126012601260126,
      "grad_norm": 2.277985095977783,
      "learning_rate": 0.00018329467408279522,
      "loss": 5.1415,
      "step": 559
    },
    {
      "epoch": 0.20162016201620162,
      "grad_norm": 1.8936793804168701,
      "learning_rate": 0.00018323087073971993,
      "loss": 4.5897,
      "step": 560
    },
    {
      "epoch": 0.201980198019802,
      "grad_norm": 1.8286054134368896,
      "learning_rate": 0.00018316695693440117,
      "loss": 4.6607,
      "step": 561
    },
    {
      "epoch": 0.20234023402340234,
      "grad_norm": 1.3202015161514282,
      "learning_rate": 0.00018310293275166392,
      "loss": 5.0079,
      "step": 562
    },
    {
      "epoch": 0.2027002700270027,
      "grad_norm": 1.8645577430725098,
      "learning_rate": 0.00018303879827647975,
      "loss": 4.7252,
      "step": 563
    },
    {
      "epoch": 0.20306030603060307,
      "grad_norm": 1.383417010307312,
      "learning_rate": 0.00018297455359396657,
      "loss": 5.0536,
      "step": 564
    },
    {
      "epoch": 0.20342034203420342,
      "grad_norm": 1.9294304847717285,
      "learning_rate": 0.0001829101987893885,
      "loss": 4.9231,
      "step": 565
    },
    {
      "epoch": 0.20378037803780377,
      "grad_norm": 1.5366313457489014,
      "learning_rate": 0.00018284573394815597,
      "loss": 4.7901,
      "step": 566
    },
    {
      "epoch": 0.20414041404140415,
      "grad_norm": 1.1666430234909058,
      "learning_rate": 0.00018278115915582526,
      "loss": 4.4916,
      "step": 567
    },
    {
      "epoch": 0.2045004500450045,
      "grad_norm": 1.4304466247558594,
      "learning_rate": 0.0001827164744980987,
      "loss": 4.8673,
      "step": 568
    },
    {
      "epoch": 0.20486048604860485,
      "grad_norm": 1.5400853157043457,
      "learning_rate": 0.00018265168006082437,
      "loss": 5.1139,
      "step": 569
    },
    {
      "epoch": 0.20522052205220523,
      "grad_norm": 1.3901218175888062,
      "learning_rate": 0.0001825867759299961,
      "loss": 5.3105,
      "step": 570
    },
    {
      "epoch": 0.20558055805580558,
      "grad_norm": 1.6825261116027832,
      "learning_rate": 0.00018252176219175328,
      "loss": 4.8088,
      "step": 571
    },
    {
      "epoch": 0.20594059405940593,
      "grad_norm": 1.423314094543457,
      "learning_rate": 0.00018245663893238075,
      "loss": 4.9159,
      "step": 572
    },
    {
      "epoch": 0.2063006300630063,
      "grad_norm": 2.1610755920410156,
      "learning_rate": 0.00018239140623830868,
      "loss": 5.0322,
      "step": 573
    },
    {
      "epoch": 0.20666066606660666,
      "grad_norm": 2.3829901218414307,
      "learning_rate": 0.00018232606419611255,
      "loss": 5.305,
      "step": 574
    },
    {
      "epoch": 0.207020702070207,
      "grad_norm": 2.3431599140167236,
      "learning_rate": 0.00018226061289251298,
      "loss": 5.3596,
      "step": 575
    },
    {
      "epoch": 0.20738073807380739,
      "grad_norm": 2.5606956481933594,
      "learning_rate": 0.00018219505241437545,
      "loss": 5.3337,
      "step": 576
    },
    {
      "epoch": 0.20774077407740774,
      "grad_norm": 1.3641765117645264,
      "learning_rate": 0.00018212938284871047,
      "loss": 4.5617,
      "step": 577
    },
    {
      "epoch": 0.20810081008100811,
      "grad_norm": 1.8042774200439453,
      "learning_rate": 0.00018206360428267332,
      "loss": 4.8936,
      "step": 578
    },
    {
      "epoch": 0.20846084608460846,
      "grad_norm": 1.7920327186584473,
      "learning_rate": 0.0001819977168035639,
      "loss": 5.0555,
      "step": 579
    },
    {
      "epoch": 0.20882088208820881,
      "grad_norm": 1.3233819007873535,
      "learning_rate": 0.0001819317204988267,
      "loss": 4.9664,
      "step": 580
    },
    {
      "epoch": 0.2091809180918092,
      "grad_norm": 1.3599638938903809,
      "learning_rate": 0.00018186561545605054,
      "loss": 5.1357,
      "step": 581
    },
    {
      "epoch": 0.20954095409540954,
      "grad_norm": 1.2919039726257324,
      "learning_rate": 0.0001817994017629687,
      "loss": 5.0696,
      "step": 582
    },
    {
      "epoch": 0.2099009900990099,
      "grad_norm": 1.4741486310958862,
      "learning_rate": 0.00018173307950745854,
      "loss": 4.8335,
      "step": 583
    },
    {
      "epoch": 0.21026102610261027,
      "grad_norm": 1.8232271671295166,
      "learning_rate": 0.0001816666487775416,
      "loss": 4.9422,
      "step": 584
    },
    {
      "epoch": 0.21062106210621062,
      "grad_norm": 1.6007291078567505,
      "learning_rate": 0.0001816001096613833,
      "loss": 4.8473,
      "step": 585
    },
    {
      "epoch": 0.21098109810981097,
      "grad_norm": 1.5359485149383545,
      "learning_rate": 0.00018153346224729293,
      "loss": 5.0639,
      "step": 586
    },
    {
      "epoch": 0.21134113411341135,
      "grad_norm": 1.622120976448059,
      "learning_rate": 0.00018146670662372354,
      "loss": 4.9315,
      "step": 587
    },
    {
      "epoch": 0.2117011701170117,
      "grad_norm": 2.3048367500305176,
      "learning_rate": 0.00018139984287927175,
      "loss": 4.8382,
      "step": 588
    },
    {
      "epoch": 0.21206120612061205,
      "grad_norm": 1.7604765892028809,
      "learning_rate": 0.00018133287110267776,
      "loss": 4.776,
      "step": 589
    },
    {
      "epoch": 0.21242124212421243,
      "grad_norm": 1.3824963569641113,
      "learning_rate": 0.00018126579138282503,
      "loss": 5.1347,
      "step": 590
    },
    {
      "epoch": 0.21278127812781278,
      "grad_norm": 2.024390935897827,
      "learning_rate": 0.00018119860380874037,
      "loss": 5.0018,
      "step": 591
    },
    {
      "epoch": 0.21314131413141313,
      "grad_norm": 1.3187147378921509,
      "learning_rate": 0.00018113130846959368,
      "loss": 5.1671,
      "step": 592
    },
    {
      "epoch": 0.2135013501350135,
      "grad_norm": 1.140762209892273,
      "learning_rate": 0.00018106390545469795,
      "loss": 4.675,
      "step": 593
    },
    {
      "epoch": 0.21386138613861386,
      "grad_norm": 2.119356870651245,
      "learning_rate": 0.00018099639485350897,
      "loss": 4.651,
      "step": 594
    },
    {
      "epoch": 0.21422142214221424,
      "grad_norm": 1.6680924892425537,
      "learning_rate": 0.0001809287767556254,
      "loss": 4.718,
      "step": 595
    },
    {
      "epoch": 0.21458145814581459,
      "grad_norm": 1.3462631702423096,
      "learning_rate": 0.00018086105125078857,
      "loss": 5.0576,
      "step": 596
    },
    {
      "epoch": 0.21494149414941494,
      "grad_norm": 1.4861905574798584,
      "learning_rate": 0.00018079321842888227,
      "loss": 4.9842,
      "step": 597
    },
    {
      "epoch": 0.2153015301530153,
      "grad_norm": 1.6816765069961548,
      "learning_rate": 0.00018072527837993284,
      "loss": 5.3918,
      "step": 598
    },
    {
      "epoch": 0.21566156615661566,
      "grad_norm": 2.004573345184326,
      "learning_rate": 0.00018065723119410884,
      "loss": 5.0587,
      "step": 599
    },
    {
      "epoch": 0.21602160216021601,
      "grad_norm": 3.218261480331421,
      "learning_rate": 0.00018058907696172108,
      "loss": 5.5396,
      "step": 600
    },
    {
      "epoch": 0.2163816381638164,
      "grad_norm": 1.584768533706665,
      "learning_rate": 0.00018052081577322234,
      "loss": 4.9708,
      "step": 601
    },
    {
      "epoch": 0.21674167416741674,
      "grad_norm": 1.6932294368743896,
      "learning_rate": 0.0001804524477192075,
      "loss": 4.8287,
      "step": 602
    },
    {
      "epoch": 0.2171017101710171,
      "grad_norm": 1.7169231176376343,
      "learning_rate": 0.00018038397289041314,
      "loss": 4.551,
      "step": 603
    },
    {
      "epoch": 0.21746174617461747,
      "grad_norm": 1.5970929861068726,
      "learning_rate": 0.0001803153913777176,
      "loss": 4.8561,
      "step": 604
    },
    {
      "epoch": 0.21782178217821782,
      "grad_norm": 1.2572895288467407,
      "learning_rate": 0.00018024670327214084,
      "loss": 4.9702,
      "step": 605
    },
    {
      "epoch": 0.21818181818181817,
      "grad_norm": 1.986138105392456,
      "learning_rate": 0.00018017790866484422,
      "loss": 5.0378,
      "step": 606
    },
    {
      "epoch": 0.21854185418541855,
      "grad_norm": 1.478252649307251,
      "learning_rate": 0.00018010900764713048,
      "loss": 4.7493,
      "step": 607
    },
    {
      "epoch": 0.2189018901890189,
      "grad_norm": 1.4298301935195923,
      "learning_rate": 0.0001800400003104436,
      "loss": 5.3147,
      "step": 608
    },
    {
      "epoch": 0.21926192619261925,
      "grad_norm": 1.337350845336914,
      "learning_rate": 0.00017997088674636872,
      "loss": 4.6894,
      "step": 609
    },
    {
      "epoch": 0.21962196219621963,
      "grad_norm": 1.3257286548614502,
      "learning_rate": 0.00017990166704663177,
      "loss": 5.0221,
      "step": 610
    },
    {
      "epoch": 0.21998199819981998,
      "grad_norm": 1.4358315467834473,
      "learning_rate": 0.00017983234130309968,
      "loss": 5.0326,
      "step": 611
    },
    {
      "epoch": 0.22034203420342033,
      "grad_norm": 1.6882402896881104,
      "learning_rate": 0.00017976290960778024,
      "loss": 4.895,
      "step": 612
    },
    {
      "epoch": 0.2207020702070207,
      "grad_norm": 1.3076093196868896,
      "learning_rate": 0.00017969337205282155,
      "loss": 4.8874,
      "step": 613
    },
    {
      "epoch": 0.22106210621062106,
      "grad_norm": 1.3435124158859253,
      "learning_rate": 0.00017962372873051252,
      "loss": 4.9925,
      "step": 614
    },
    {
      "epoch": 0.22142214221422143,
      "grad_norm": 2.2377662658691406,
      "learning_rate": 0.00017955397973328215,
      "loss": 5.0888,
      "step": 615
    },
    {
      "epoch": 0.22178217821782178,
      "grad_norm": 1.777191400527954,
      "learning_rate": 0.00017948412515369995,
      "loss": 4.8241,
      "step": 616
    },
    {
      "epoch": 0.22214221422142214,
      "grad_norm": 1.478715419769287,
      "learning_rate": 0.00017941416508447536,
      "loss": 4.507,
      "step": 617
    },
    {
      "epoch": 0.2225022502250225,
      "grad_norm": 1.3011283874511719,
      "learning_rate": 0.00017934409961845791,
      "loss": 4.7552,
      "step": 618
    },
    {
      "epoch": 0.22286228622862286,
      "grad_norm": 1.2801713943481445,
      "learning_rate": 0.00017927392884863703,
      "loss": 4.5152,
      "step": 619
    },
    {
      "epoch": 0.2232223222322232,
      "grad_norm": 1.8236119747161865,
      "learning_rate": 0.00017920365286814183,
      "loss": 5.012,
      "step": 620
    },
    {
      "epoch": 0.2235823582358236,
      "grad_norm": 1.5958244800567627,
      "learning_rate": 0.00017913327177024115,
      "loss": 5.091,
      "step": 621
    },
    {
      "epoch": 0.22394239423942394,
      "grad_norm": 1.5439236164093018,
      "learning_rate": 0.00017906278564834324,
      "loss": 4.8818,
      "step": 622
    },
    {
      "epoch": 0.2243024302430243,
      "grad_norm": 1.7584513425827026,
      "learning_rate": 0.0001789921945959958,
      "loss": 5.1626,
      "step": 623
    },
    {
      "epoch": 0.22466246624662467,
      "grad_norm": 1.7982007265090942,
      "learning_rate": 0.00017892149870688578,
      "loss": 5.0671,
      "step": 624
    },
    {
      "epoch": 0.22502250225022502,
      "grad_norm": 2.3741369247436523,
      "learning_rate": 0.00017885069807483926,
      "loss": 5.3132,
      "step": 625
    },
    {
      "epoch": 0.22538253825382537,
      "grad_norm": 2.7315993309020996,
      "learning_rate": 0.00017877979279382135,
      "loss": 5.0911,
      "step": 626
    },
    {
      "epoch": 0.22574257425742575,
      "grad_norm": 1.9711253643035889,
      "learning_rate": 0.00017870878295793598,
      "loss": 5.0541,
      "step": 627
    },
    {
      "epoch": 0.2261026102610261,
      "grad_norm": 2.0043630599975586,
      "learning_rate": 0.00017863766866142594,
      "loss": 5.3504,
      "step": 628
    },
    {
      "epoch": 0.22646264626462645,
      "grad_norm": 1.8370227813720703,
      "learning_rate": 0.00017856644999867264,
      "loss": 4.9745,
      "step": 629
    },
    {
      "epoch": 0.22682268226822683,
      "grad_norm": 1.3397353887557983,
      "learning_rate": 0.00017849512706419592,
      "loss": 5.0795,
      "step": 630
    },
    {
      "epoch": 0.22718271827182718,
      "grad_norm": 1.9813218116760254,
      "learning_rate": 0.0001784236999526541,
      "loss": 4.5778,
      "step": 631
    },
    {
      "epoch": 0.22754275427542756,
      "grad_norm": 1.3915972709655762,
      "learning_rate": 0.00017835216875884368,
      "loss": 5.3249,
      "step": 632
    },
    {
      "epoch": 0.2279027902790279,
      "grad_norm": 1.8962548971176147,
      "learning_rate": 0.0001782805335776994,
      "loss": 5.2326,
      "step": 633
    },
    {
      "epoch": 0.22826282628262826,
      "grad_norm": 1.5071747303009033,
      "learning_rate": 0.00017820879450429394,
      "loss": 4.7139,
      "step": 634
    },
    {
      "epoch": 0.22862286228622863,
      "grad_norm": 1.6194506883621216,
      "learning_rate": 0.0001781369516338378,
      "loss": 5.1211,
      "step": 635
    },
    {
      "epoch": 0.22898289828982898,
      "grad_norm": 1.541662335395813,
      "learning_rate": 0.0001780650050616794,
      "loss": 5.1637,
      "step": 636
    },
    {
      "epoch": 0.22934293429342933,
      "grad_norm": 1.373365044593811,
      "learning_rate": 0.00017799295488330467,
      "loss": 5.0808,
      "step": 637
    },
    {
      "epoch": 0.2297029702970297,
      "grad_norm": 1.1762840747833252,
      "learning_rate": 0.0001779208011943371,
      "loss": 4.7893,
      "step": 638
    },
    {
      "epoch": 0.23006300630063006,
      "grad_norm": 1.5785561800003052,
      "learning_rate": 0.00017784854409053747,
      "loss": 4.8314,
      "step": 639
    },
    {
      "epoch": 0.2304230423042304,
      "grad_norm": 2.131624221801758,
      "learning_rate": 0.00017777618366780393,
      "loss": 4.9747,
      "step": 640
    },
    {
      "epoch": 0.2307830783078308,
      "grad_norm": 1.3572007417678833,
      "learning_rate": 0.00017770372002217172,
      "loss": 4.7746,
      "step": 641
    },
    {
      "epoch": 0.23114311431143114,
      "grad_norm": 0.9324185252189636,
      "learning_rate": 0.00017763115324981294,
      "loss": 4.6222,
      "step": 642
    },
    {
      "epoch": 0.2315031503150315,
      "grad_norm": 1.7739285230636597,
      "learning_rate": 0.0001775584834470368,
      "loss": 4.5461,
      "step": 643
    },
    {
      "epoch": 0.23186318631863187,
      "grad_norm": 1.4608396291732788,
      "learning_rate": 0.000177485710710289,
      "loss": 4.9555,
      "step": 644
    },
    {
      "epoch": 0.23222322232223222,
      "grad_norm": 1.2847424745559692,
      "learning_rate": 0.00017741283513615205,
      "loss": 4.668,
      "step": 645
    },
    {
      "epoch": 0.23258325832583257,
      "grad_norm": 1.3863086700439453,
      "learning_rate": 0.00017733985682134482,
      "loss": 5.1688,
      "step": 646
    },
    {
      "epoch": 0.23294329432943295,
      "grad_norm": 1.4129760265350342,
      "learning_rate": 0.00017726677586272263,
      "loss": 5.1561,
      "step": 647
    },
    {
      "epoch": 0.2333033303330333,
      "grad_norm": 1.450738549232483,
      "learning_rate": 0.00017719359235727694,
      "loss": 5.4269,
      "step": 648
    },
    {
      "epoch": 0.23366336633663368,
      "grad_norm": 1.901845097541809,
      "learning_rate": 0.00017712030640213534,
      "loss": 5.0974,
      "step": 649
    },
    {
      "epoch": 0.23402340234023403,
      "grad_norm": 2.5835745334625244,
      "learning_rate": 0.00017704691809456143,
      "loss": 5.4495,
      "step": 650
    },
    {
      "epoch": 0.23438343834383438,
      "grad_norm": 2.3962457180023193,
      "learning_rate": 0.00017697342753195456,
      "loss": 4.7914,
      "step": 651
    },
    {
      "epoch": 0.23474347434743476,
      "grad_norm": 1.527147889137268,
      "learning_rate": 0.00017689983481184989,
      "loss": 4.9679,
      "step": 652
    },
    {
      "epoch": 0.2351035103510351,
      "grad_norm": 1.4330006837844849,
      "learning_rate": 0.00017682614003191807,
      "loss": 5.0339,
      "step": 653
    },
    {
      "epoch": 0.23546354635463546,
      "grad_norm": 1.0066897869110107,
      "learning_rate": 0.0001767523432899653,
      "loss": 4.8585,
      "step": 654
    },
    {
      "epoch": 0.23582358235823583,
      "grad_norm": 1.3479336500167847,
      "learning_rate": 0.00017667844468393295,
      "loss": 4.662,
      "step": 655
    },
    {
      "epoch": 0.23618361836183618,
      "grad_norm": 0.9287430644035339,
      "learning_rate": 0.0001766044443118978,
      "loss": 4.8556,
      "step": 656
    },
    {
      "epoch": 0.23654365436543653,
      "grad_norm": 1.4379856586456299,
      "learning_rate": 0.00017653034227207152,
      "loss": 5.1246,
      "step": 657
    },
    {
      "epoch": 0.2369036903690369,
      "grad_norm": 1.7954436540603638,
      "learning_rate": 0.00017645613866280077,
      "loss": 5.1441,
      "step": 658
    },
    {
      "epoch": 0.23726372637263726,
      "grad_norm": 1.1439120769500732,
      "learning_rate": 0.00017638183358256696,
      "loss": 4.7969,
      "step": 659
    },
    {
      "epoch": 0.2376237623762376,
      "grad_norm": 1.266098976135254,
      "learning_rate": 0.00017630742712998628,
      "loss": 4.9832,
      "step": 660
    },
    {
      "epoch": 0.237983798379838,
      "grad_norm": 1.0109524726867676,
      "learning_rate": 0.00017623291940380937,
      "loss": 4.731,
      "step": 661
    },
    {
      "epoch": 0.23834383438343834,
      "grad_norm": 1.1992263793945312,
      "learning_rate": 0.0001761583105029213,
      "loss": 4.6046,
      "step": 662
    },
    {
      "epoch": 0.2387038703870387,
      "grad_norm": 1.3985931873321533,
      "learning_rate": 0.00017608360052634138,
      "loss": 5.0826,
      "step": 663
    },
    {
      "epoch": 0.23906390639063907,
      "grad_norm": 1.4095903635025024,
      "learning_rate": 0.00017600878957322314,
      "loss": 4.6519,
      "step": 664
    },
    {
      "epoch": 0.23942394239423942,
      "grad_norm": 1.2141473293304443,
      "learning_rate": 0.00017593387774285412,
      "loss": 4.9543,
      "step": 665
    },
    {
      "epoch": 0.2397839783978398,
      "grad_norm": 1.5373363494873047,
      "learning_rate": 0.00017585886513465566,
      "loss": 4.8106,
      "step": 666
    },
    {
      "epoch": 0.24014401440144015,
      "grad_norm": 1.7050813436508179,
      "learning_rate": 0.0001757837518481829,
      "loss": 4.651,
      "step": 667
    },
    {
      "epoch": 0.2405040504050405,
      "grad_norm": 1.3369519710540771,
      "learning_rate": 0.0001757085379831246,
      "loss": 4.9524,
      "step": 668
    },
    {
      "epoch": 0.24086408640864088,
      "grad_norm": 1.4655152559280396,
      "learning_rate": 0.00017563322363930306,
      "loss": 4.8456,
      "step": 669
    },
    {
      "epoch": 0.24122412241224123,
      "grad_norm": 2.142806053161621,
      "learning_rate": 0.00017555780891667384,
      "loss": 5.0776,
      "step": 670
    },
    {
      "epoch": 0.24158415841584158,
      "grad_norm": 1.640889286994934,
      "learning_rate": 0.00017548229391532572,
      "loss": 4.7559,
      "step": 671
    },
    {
      "epoch": 0.24194419441944195,
      "grad_norm": 1.3450456857681274,
      "learning_rate": 0.00017540667873548063,
      "loss": 5.0178,
      "step": 672
    },
    {
      "epoch": 0.2423042304230423,
      "grad_norm": 1.837351679801941,
      "learning_rate": 0.00017533096347749344,
      "loss": 5.2248,
      "step": 673
    },
    {
      "epoch": 0.24266426642664266,
      "grad_norm": 1.7267247438430786,
      "learning_rate": 0.00017525514824185185,
      "loss": 5.3538,
      "step": 674
    },
    {
      "epoch": 0.24302430243024303,
      "grad_norm": 2.6373047828674316,
      "learning_rate": 0.0001751792331291762,
      "loss": 5.3673,
      "step": 675
    },
    {
      "epoch": 0.24338433843384338,
      "grad_norm": 2.385141134262085,
      "learning_rate": 0.00017510321824021943,
      "loss": 4.7227,
      "step": 676
    },
    {
      "epoch": 0.24374437443744373,
      "grad_norm": 2.1644632816314697,
      "learning_rate": 0.00017502710367586687,
      "loss": 5.0503,
      "step": 677
    },
    {
      "epoch": 0.2441044104410441,
      "grad_norm": 1.4525551795959473,
      "learning_rate": 0.0001749508895371362,
      "loss": 4.7614,
      "step": 678
    },
    {
      "epoch": 0.24446444644464446,
      "grad_norm": 1.5059491395950317,
      "learning_rate": 0.00017487457592517714,
      "loss": 4.8818,
      "step": 679
    },
    {
      "epoch": 0.2448244824482448,
      "grad_norm": 1.5318140983581543,
      "learning_rate": 0.00017479816294127152,
      "loss": 4.9906,
      "step": 680
    },
    {
      "epoch": 0.2451845184518452,
      "grad_norm": 1.6971796751022339,
      "learning_rate": 0.00017472165068683305,
      "loss": 4.9896,
      "step": 681
    },
    {
      "epoch": 0.24554455445544554,
      "grad_norm": 1.204179286956787,
      "learning_rate": 0.0001746450392634071,
      "loss": 4.6281,
      "step": 682
    },
    {
      "epoch": 0.2459045904590459,
      "grad_norm": 1.596869707107544,
      "learning_rate": 0.00017456832877267084,
      "loss": 4.9251,
      "step": 683
    },
    {
      "epoch": 0.24626462646264627,
      "grad_norm": 1.7464261054992676,
      "learning_rate": 0.00017449151931643272,
      "loss": 4.8314,
      "step": 684
    },
    {
      "epoch": 0.24662466246624662,
      "grad_norm": 1.2955352067947388,
      "learning_rate": 0.00017441461099663262,
      "loss": 4.6449,
      "step": 685
    },
    {
      "epoch": 0.246984698469847,
      "grad_norm": 1.4609013795852661,
      "learning_rate": 0.00017433760391534167,
      "loss": 5.248,
      "step": 686
    },
    {
      "epoch": 0.24734473447344735,
      "grad_norm": 1.3379247188568115,
      "learning_rate": 0.00017426049817476197,
      "loss": 4.6666,
      "step": 687
    },
    {
      "epoch": 0.2477047704770477,
      "grad_norm": 1.2864155769348145,
      "learning_rate": 0.00017418329387722668,
      "loss": 4.7158,
      "step": 688
    },
    {
      "epoch": 0.24806480648064808,
      "grad_norm": 1.673769474029541,
      "learning_rate": 0.0001741059911251997,
      "loss": 5.1281,
      "step": 689
    },
    {
      "epoch": 0.24842484248424843,
      "grad_norm": 1.1956952810287476,
      "learning_rate": 0.00017402859002127555,
      "loss": 4.7296,
      "step": 690
    },
    {
      "epoch": 0.24878487848784878,
      "grad_norm": 1.7122550010681152,
      "learning_rate": 0.0001739510906681794,
      "loss": 4.7988,
      "step": 691
    },
    {
      "epoch": 0.24914491449144915,
      "grad_norm": 1.0974791049957275,
      "learning_rate": 0.00017387349316876666,
      "loss": 4.6796,
      "step": 692
    },
    {
      "epoch": 0.2495049504950495,
      "grad_norm": 1.4044756889343262,
      "learning_rate": 0.00017379579762602317,
      "loss": 4.7857,
      "step": 693
    },
    {
      "epoch": 0.24986498649864985,
      "grad_norm": 1.515895128250122,
      "learning_rate": 0.00017371800414306478,
      "loss": 4.7323,
      "step": 694
    },
    {
      "epoch": 0.2502250225022502,
      "grad_norm": 1.1949536800384521,
      "learning_rate": 0.0001736401128231373,
      "loss": 4.9984,
      "step": 695
    },
    {
      "epoch": 0.2502250225022502,
      "eval_loss": 4.921685218811035,
      "eval_runtime": 101.2807,
      "eval_samples_per_second": 46.188,
      "eval_steps_per_second": 11.552,
      "step": 695
    },
    {
      "epoch": 0.2505850585058506,
      "grad_norm": 2.016028881072998,
      "learning_rate": 0.00017356212376961648,
      "loss": 4.8223,
      "step": 696
    },
    {
      "epoch": 0.25094509450945096,
      "grad_norm": 1.5955978631973267,
      "learning_rate": 0.00017348403708600772,
      "loss": 4.6324,
      "step": 697
    },
    {
      "epoch": 0.2513051305130513,
      "grad_norm": 1.7373415231704712,
      "learning_rate": 0.00017340585287594604,
      "loss": 5.3296,
      "step": 698
    },
    {
      "epoch": 0.25166516651665166,
      "grad_norm": 2.196326494216919,
      "learning_rate": 0.0001733275712431958,
      "loss": 5.2923,
      "step": 699
    },
    {
      "epoch": 0.252025202520252,
      "grad_norm": 3.086874008178711,
      "learning_rate": 0.00017324919229165075,
      "loss": 5.1968,
      "step": 700
    },
    {
      "epoch": 0.25238523852385236,
      "grad_norm": 2.4407310485839844,
      "learning_rate": 0.0001731707161253338,
      "loss": 4.9754,
      "step": 701
    },
    {
      "epoch": 0.25274527452745277,
      "grad_norm": 1.2670294046401978,
      "learning_rate": 0.00017309214284839678,
      "loss": 4.8269,
      "step": 702
    },
    {
      "epoch": 0.2531053105310531,
      "grad_norm": 1.6742947101593018,
      "learning_rate": 0.00017301347256512054,
      "loss": 5.2089,
      "step": 703
    },
    {
      "epoch": 0.25346534653465347,
      "grad_norm": 1.399596095085144,
      "learning_rate": 0.00017293470537991463,
      "loss": 5.1412,
      "step": 704
    },
    {
      "epoch": 0.2538253825382538,
      "grad_norm": 1.1809393167495728,
      "learning_rate": 0.0001728558413973171,
      "loss": 4.9513,
      "step": 705
    },
    {
      "epoch": 0.25418541854185417,
      "grad_norm": 0.9787651896476746,
      "learning_rate": 0.00017277688072199457,
      "loss": 4.53,
      "step": 706
    },
    {
      "epoch": 0.2545454545454545,
      "grad_norm": 1.3147783279418945,
      "learning_rate": 0.00017269782345874203,
      "loss": 4.7199,
      "step": 707
    },
    {
      "epoch": 0.2549054905490549,
      "grad_norm": 1.2243294715881348,
      "learning_rate": 0.00017261866971248258,
      "loss": 4.7546,
      "step": 708
    },
    {
      "epoch": 0.2552655265526553,
      "grad_norm": 1.3117494583129883,
      "learning_rate": 0.00017253941958826732,
      "loss": 4.8072,
      "step": 709
    },
    {
      "epoch": 0.2556255625562556,
      "grad_norm": 1.1374012231826782,
      "learning_rate": 0.00017246007319127545,
      "loss": 4.7096,
      "step": 710
    },
    {
      "epoch": 0.255985598559856,
      "grad_norm": 1.3385273218154907,
      "learning_rate": 0.00017238063062681374,
      "loss": 4.6932,
      "step": 711
    },
    {
      "epoch": 0.2563456345634563,
      "grad_norm": 1.1105351448059082,
      "learning_rate": 0.00017230109200031668,
      "loss": 4.9573,
      "step": 712
    },
    {
      "epoch": 0.25670567056705673,
      "grad_norm": 1.1182695627212524,
      "learning_rate": 0.00017222145741734626,
      "loss": 4.7149,
      "step": 713
    },
    {
      "epoch": 0.2570657065706571,
      "grad_norm": 1.5708491802215576,
      "learning_rate": 0.00017214172698359182,
      "loss": 4.8978,
      "step": 714
    },
    {
      "epoch": 0.25742574257425743,
      "grad_norm": 1.0699411630630493,
      "learning_rate": 0.00017206190080486987,
      "loss": 4.8549,
      "step": 715
    },
    {
      "epoch": 0.2577857785778578,
      "grad_norm": 1.2931327819824219,
      "learning_rate": 0.00017198197898712404,
      "loss": 4.5914,
      "step": 716
    },
    {
      "epoch": 0.25814581458145813,
      "grad_norm": 1.082270860671997,
      "learning_rate": 0.00017190196163642483,
      "loss": 4.6384,
      "step": 717
    },
    {
      "epoch": 0.2585058505850585,
      "grad_norm": 1.0534776449203491,
      "learning_rate": 0.00017182184885896964,
      "loss": 4.4978,
      "step": 718
    },
    {
      "epoch": 0.2588658865886589,
      "grad_norm": 1.1717240810394287,
      "learning_rate": 0.0001717416407610824,
      "loss": 4.8859,
      "step": 719
    },
    {
      "epoch": 0.25922592259225924,
      "grad_norm": 1.2578966617584229,
      "learning_rate": 0.00017166133744921357,
      "loss": 4.8346,
      "step": 720
    },
    {
      "epoch": 0.2595859585958596,
      "grad_norm": 1.708533763885498,
      "learning_rate": 0.00017158093902994005,
      "loss": 4.9671,
      "step": 721
    },
    {
      "epoch": 0.25994599459945994,
      "grad_norm": 1.2007981538772583,
      "learning_rate": 0.00017150044560996488,
      "loss": 4.7716,
      "step": 722
    },
    {
      "epoch": 0.2603060306030603,
      "grad_norm": 1.6198557615280151,
      "learning_rate": 0.00017141985729611725,
      "loss": 5.2346,
      "step": 723
    },
    {
      "epoch": 0.26066606660666064,
      "grad_norm": 1.9489868879318237,
      "learning_rate": 0.00017133917419535221,
      "loss": 5.2922,
      "step": 724
    },
    {
      "epoch": 0.26102610261026105,
      "grad_norm": 1.8972506523132324,
      "learning_rate": 0.00017125839641475072,
      "loss": 5.1553,
      "step": 725
    },
    {
      "epoch": 0.2613861386138614,
      "grad_norm": 4.419572353363037,
      "learning_rate": 0.00017117752406151926,
      "loss": 4.7237,
      "step": 726
    },
    {
      "epoch": 0.26174617461746175,
      "grad_norm": 1.8346415758132935,
      "learning_rate": 0.00017109655724298995,
      "loss": 5.3116,
      "step": 727
    },
    {
      "epoch": 0.2621062106210621,
      "grad_norm": 1.0029700994491577,
      "learning_rate": 0.00017101549606662024,
      "loss": 4.604,
      "step": 728
    },
    {
      "epoch": 0.26246624662466245,
      "grad_norm": 1.7620614767074585,
      "learning_rate": 0.00017093434063999278,
      "loss": 4.8614,
      "step": 729
    },
    {
      "epoch": 0.26282628262826285,
      "grad_norm": 1.2524189949035645,
      "learning_rate": 0.0001708530910708153,
      "loss": 5.058,
      "step": 730
    },
    {
      "epoch": 0.2631863186318632,
      "grad_norm": 1.5832024812698364,
      "learning_rate": 0.00017077174746692056,
      "loss": 4.8144,
      "step": 731
    },
    {
      "epoch": 0.26354635463546355,
      "grad_norm": 1.6468006372451782,
      "learning_rate": 0.00017069030993626603,
      "loss": 4.7682,
      "step": 732
    },
    {
      "epoch": 0.2639063906390639,
      "grad_norm": 1.5815296173095703,
      "learning_rate": 0.00017060877858693385,
      "loss": 4.626,
      "step": 733
    },
    {
      "epoch": 0.26426642664266425,
      "grad_norm": 1.1093206405639648,
      "learning_rate": 0.00017052715352713075,
      "loss": 4.6702,
      "step": 734
    },
    {
      "epoch": 0.2646264626462646,
      "grad_norm": 1.2010303735733032,
      "learning_rate": 0.00017044543486518772,
      "loss": 4.6888,
      "step": 735
    },
    {
      "epoch": 0.264986498649865,
      "grad_norm": 1.3065500259399414,
      "learning_rate": 0.00017036362270956009,
      "loss": 4.6952,
      "step": 736
    },
    {
      "epoch": 0.26534653465346536,
      "grad_norm": 1.1059433221817017,
      "learning_rate": 0.00017028171716882714,
      "loss": 4.4088,
      "step": 737
    },
    {
      "epoch": 0.2657065706570657,
      "grad_norm": 1.786853313446045,
      "learning_rate": 0.00017019971835169223,
      "loss": 5.2083,
      "step": 738
    },
    {
      "epoch": 0.26606660666066606,
      "grad_norm": 1.19621741771698,
      "learning_rate": 0.00017011762636698244,
      "loss": 4.6439,
      "step": 739
    },
    {
      "epoch": 0.2664266426642664,
      "grad_norm": 1.2290087938308716,
      "learning_rate": 0.00017003544132364846,
      "loss": 4.9241,
      "step": 740
    },
    {
      "epoch": 0.26678667866786676,
      "grad_norm": 1.106048822402954,
      "learning_rate": 0.00016995316333076458,
      "loss": 4.8128,
      "step": 741
    },
    {
      "epoch": 0.26714671467146717,
      "grad_norm": 1.1408966779708862,
      "learning_rate": 0.00016987079249752843,
      "loss": 4.8207,
      "step": 742
    },
    {
      "epoch": 0.2675067506750675,
      "grad_norm": 1.3608462810516357,
      "learning_rate": 0.00016978832893326074,
      "loss": 5.0293,
      "step": 743
    },
    {
      "epoch": 0.26786678667866787,
      "grad_norm": 1.0211683511734009,
      "learning_rate": 0.00016970577274740545,
      "loss": 4.5325,
      "step": 744
    },
    {
      "epoch": 0.2682268226822682,
      "grad_norm": 1.0703456401824951,
      "learning_rate": 0.0001696231240495294,
      "loss": 5.0604,
      "step": 745
    },
    {
      "epoch": 0.26858685868586857,
      "grad_norm": 1.1928843259811401,
      "learning_rate": 0.00016954038294932216,
      "loss": 4.884,
      "step": 746
    },
    {
      "epoch": 0.268946894689469,
      "grad_norm": 1.1553142070770264,
      "learning_rate": 0.00016945754955659595,
      "loss": 5.2998,
      "step": 747
    },
    {
      "epoch": 0.2693069306930693,
      "grad_norm": 1.1428641080856323,
      "learning_rate": 0.0001693746239812855,
      "loss": 5.1812,
      "step": 748
    },
    {
      "epoch": 0.2696669666966697,
      "grad_norm": 1.5842117071151733,
      "learning_rate": 0.0001692916063334479,
      "loss": 5.0772,
      "step": 749
    },
    {
      "epoch": 0.27002700270027,
      "grad_norm": 1.990872859954834,
      "learning_rate": 0.00016920849672326236,
      "loss": 5.282,
      "step": 750
    },
    {
      "epoch": 0.2703870387038704,
      "grad_norm": 1.2507834434509277,
      "learning_rate": 0.00016912529526103023,
      "loss": 4.6507,
      "step": 751
    },
    {
      "epoch": 0.2707470747074707,
      "grad_norm": 1.4511178731918335,
      "learning_rate": 0.0001690420020571747,
      "loss": 4.7657,
      "step": 752
    },
    {
      "epoch": 0.27110711071107113,
      "grad_norm": 1.1944736242294312,
      "learning_rate": 0.00016895861722224074,
      "loss": 5.1535,
      "step": 753
    },
    {
      "epoch": 0.2714671467146715,
      "grad_norm": 1.556862235069275,
      "learning_rate": 0.00016887514086689494,
      "loss": 4.8301,
      "step": 754
    },
    {
      "epoch": 0.27182718271827183,
      "grad_norm": 1.0710150003433228,
      "learning_rate": 0.00016879157310192535,
      "loss": 5.0124,
      "step": 755
    },
    {
      "epoch": 0.2721872187218722,
      "grad_norm": 1.3693212270736694,
      "learning_rate": 0.00016870791403824132,
      "loss": 5.1666,
      "step": 756
    },
    {
      "epoch": 0.27254725472547253,
      "grad_norm": 0.9505925178527832,
      "learning_rate": 0.0001686241637868734,
      "loss": 4.7864,
      "step": 757
    },
    {
      "epoch": 0.2729072907290729,
      "grad_norm": 1.162520170211792,
      "learning_rate": 0.00016854032245897308,
      "loss": 5.0125,
      "step": 758
    },
    {
      "epoch": 0.2732673267326733,
      "grad_norm": 1.1077611446380615,
      "learning_rate": 0.0001684563901658129,
      "loss": 5.0365,
      "step": 759
    },
    {
      "epoch": 0.27362736273627364,
      "grad_norm": 0.9608715176582336,
      "learning_rate": 0.0001683723670187859,
      "loss": 4.6992,
      "step": 760
    },
    {
      "epoch": 0.273987398739874,
      "grad_norm": 2.9034841060638428,
      "learning_rate": 0.00016828825312940592,
      "loss": 4.5776,
      "step": 761
    },
    {
      "epoch": 0.27434743474347434,
      "grad_norm": 1.195059061050415,
      "learning_rate": 0.0001682040486093071,
      "loss": 4.707,
      "step": 762
    },
    {
      "epoch": 0.2747074707470747,
      "grad_norm": 1.4137169122695923,
      "learning_rate": 0.00016811975357024382,
      "loss": 4.9037,
      "step": 763
    },
    {
      "epoch": 0.2750675067506751,
      "grad_norm": 1.4018524885177612,
      "learning_rate": 0.00016803536812409075,
      "loss": 5.0312,
      "step": 764
    },
    {
      "epoch": 0.27542754275427545,
      "grad_norm": 1.1424809694290161,
      "learning_rate": 0.00016795089238284242,
      "loss": 4.7786,
      "step": 765
    },
    {
      "epoch": 0.2757875787578758,
      "grad_norm": 1.3780423402786255,
      "learning_rate": 0.00016786632645861323,
      "loss": 5.0719,
      "step": 766
    },
    {
      "epoch": 0.27614761476147615,
      "grad_norm": 1.1080266237258911,
      "learning_rate": 0.00016778167046363734,
      "loss": 4.8753,
      "step": 767
    },
    {
      "epoch": 0.2765076507650765,
      "grad_norm": 1.6309232711791992,
      "learning_rate": 0.0001676969245102683,
      "loss": 4.6502,
      "step": 768
    },
    {
      "epoch": 0.27686768676867685,
      "grad_norm": 0.9395397305488586,
      "learning_rate": 0.0001676120887109792,
      "loss": 4.5501,
      "step": 769
    },
    {
      "epoch": 0.27722772277227725,
      "grad_norm": 0.9613596796989441,
      "learning_rate": 0.00016752716317836229,
      "loss": 4.6644,
      "step": 770
    },
    {
      "epoch": 0.2775877587758776,
      "grad_norm": 1.1425570249557495,
      "learning_rate": 0.00016744214802512893,
      "loss": 4.5632,
      "step": 771
    },
    {
      "epoch": 0.27794779477947795,
      "grad_norm": 1.5352091789245605,
      "learning_rate": 0.00016735704336410943,
      "loss": 4.9709,
      "step": 772
    },
    {
      "epoch": 0.2783078307830783,
      "grad_norm": 1.2851957082748413,
      "learning_rate": 0.00016727184930825288,
      "loss": 5.1564,
      "step": 773
    },
    {
      "epoch": 0.27866786678667865,
      "grad_norm": 1.5510812997817993,
      "learning_rate": 0.00016718656597062705,
      "loss": 5.3022,
      "step": 774
    },
    {
      "epoch": 0.279027902790279,
      "grad_norm": 3.7985401153564453,
      "learning_rate": 0.00016710119346441814,
      "loss": 5.1354,
      "step": 775
    },
    {
      "epoch": 0.2793879387938794,
      "grad_norm": 1.5523865222930908,
      "learning_rate": 0.00016701573190293077,
      "loss": 4.8188,
      "step": 776
    },
    {
      "epoch": 0.27974797479747976,
      "grad_norm": 0.9541698694229126,
      "learning_rate": 0.00016693018139958763,
      "loss": 4.5135,
      "step": 777
    },
    {
      "epoch": 0.2801080108010801,
      "grad_norm": 0.9594448208808899,
      "learning_rate": 0.0001668445420679296,
      "loss": 4.8982,
      "step": 778
    },
    {
      "epoch": 0.28046804680468046,
      "grad_norm": 1.0501006841659546,
      "learning_rate": 0.00016675881402161536,
      "loss": 4.9397,
      "step": 779
    },
    {
      "epoch": 0.2808280828082808,
      "grad_norm": 1.0564275979995728,
      "learning_rate": 0.0001666729973744214,
      "loss": 4.5046,
      "step": 780
    },
    {
      "epoch": 0.2811881188118812,
      "grad_norm": 1.144883394241333,
      "learning_rate": 0.00016658709224024162,
      "loss": 4.5505,
      "step": 781
    },
    {
      "epoch": 0.28154815481548157,
      "grad_norm": 1.21260666847229,
      "learning_rate": 0.00016650109873308765,
      "loss": 4.9434,
      "step": 782
    },
    {
      "epoch": 0.2819081908190819,
      "grad_norm": 1.0558801889419556,
      "learning_rate": 0.00016641501696708813,
      "loss": 4.5204,
      "step": 783
    },
    {
      "epoch": 0.28226822682268227,
      "grad_norm": 0.9824090003967285,
      "learning_rate": 0.00016632884705648898,
      "loss": 5.0061,
      "step": 784
    },
    {
      "epoch": 0.2826282628262826,
      "grad_norm": 1.2358840703964233,
      "learning_rate": 0.0001662425891156531,
      "loss": 5.1995,
      "step": 785
    },
    {
      "epoch": 0.28298829882988297,
      "grad_norm": 1.0543807744979858,
      "learning_rate": 0.0001661562432590602,
      "loss": 5.0176,
      "step": 786
    },
    {
      "epoch": 0.2833483348334834,
      "grad_norm": 1.2233498096466064,
      "learning_rate": 0.00016606980960130665,
      "loss": 4.9524,
      "step": 787
    },
    {
      "epoch": 0.2837083708370837,
      "grad_norm": 1.098244547843933,
      "learning_rate": 0.00016598328825710533,
      "loss": 4.6454,
      "step": 788
    },
    {
      "epoch": 0.2840684068406841,
      "grad_norm": 1.366742491722107,
      "learning_rate": 0.00016589667934128558,
      "loss": 4.9312,
      "step": 789
    },
    {
      "epoch": 0.2844284428442844,
      "grad_norm": 1.4314054250717163,
      "learning_rate": 0.00016580998296879292,
      "loss": 4.5107,
      "step": 790
    },
    {
      "epoch": 0.2847884788478848,
      "grad_norm": 1.1748394966125488,
      "learning_rate": 0.00016572319925468892,
      "loss": 4.8257,
      "step": 791
    },
    {
      "epoch": 0.2851485148514851,
      "grad_norm": 1.6585294008255005,
      "learning_rate": 0.00016563632831415102,
      "loss": 5.0808,
      "step": 792
    },
    {
      "epoch": 0.28550855085508553,
      "grad_norm": 1.0720629692077637,
      "learning_rate": 0.00016554937026247253,
      "loss": 4.8906,
      "step": 793
    },
    {
      "epoch": 0.2858685868586859,
      "grad_norm": 1.641931176185608,
      "learning_rate": 0.0001654623252150624,
      "loss": 5.2314,
      "step": 794
    },
    {
      "epoch": 0.28622862286228623,
      "grad_norm": 1.1715306043624878,
      "learning_rate": 0.00016537519328744486,
      "loss": 4.7414,
      "step": 795
    },
    {
      "epoch": 0.2865886588658866,
      "grad_norm": 1.3205702304840088,
      "learning_rate": 0.00016528797459525963,
      "loss": 5.0718,
      "step": 796
    },
    {
      "epoch": 0.28694869486948693,
      "grad_norm": 1.5365204811096191,
      "learning_rate": 0.00016520066925426144,
      "loss": 4.8465,
      "step": 797
    },
    {
      "epoch": 0.2873087308730873,
      "grad_norm": 1.1206798553466797,
      "learning_rate": 0.00016511327738032015,
      "loss": 4.8585,
      "step": 798
    },
    {
      "epoch": 0.2876687668766877,
      "grad_norm": 1.5003643035888672,
      "learning_rate": 0.00016502579908942035,
      "loss": 5.1255,
      "step": 799
    },
    {
      "epoch": 0.28802880288028804,
      "grad_norm": 1.6085450649261475,
      "learning_rate": 0.00016493823449766136,
      "loss": 5.6082,
      "step": 800
    },
    {
      "epoch": 0.2883888388838884,
      "grad_norm": 3.5468766689300537,
      "learning_rate": 0.00016485058372125712,
      "loss": 4.9319,
      "step": 801
    },
    {
      "epoch": 0.28874887488748874,
      "grad_norm": 1.450865626335144,
      "learning_rate": 0.0001647628468765358,
      "loss": 5.1702,
      "step": 802
    },
    {
      "epoch": 0.2891089108910891,
      "grad_norm": 1.1530933380126953,
      "learning_rate": 0.00016467502407993992,
      "loss": 4.7842,
      "step": 803
    },
    {
      "epoch": 0.2894689468946895,
      "grad_norm": 1.0461550951004028,
      "learning_rate": 0.00016458711544802603,
      "loss": 4.964,
      "step": 804
    },
    {
      "epoch": 0.28982898289828984,
      "grad_norm": 1.2056033611297607,
      "learning_rate": 0.00016449912109746457,
      "loss": 4.7158,
      "step": 805
    },
    {
      "epoch": 0.2901890189018902,
      "grad_norm": 1.1307734251022339,
      "learning_rate": 0.0001644110411450398,
      "loss": 4.891,
      "step": 806
    },
    {
      "epoch": 0.29054905490549054,
      "grad_norm": 0.9744582772254944,
      "learning_rate": 0.00016432287570764952,
      "loss": 4.8994,
      "step": 807
    },
    {
      "epoch": 0.2909090909090909,
      "grad_norm": 1.094619631767273,
      "learning_rate": 0.00016423462490230509,
      "loss": 4.7232,
      "step": 808
    },
    {
      "epoch": 0.29126912691269125,
      "grad_norm": 1.2090365886688232,
      "learning_rate": 0.00016414628884613107,
      "loss": 4.6844,
      "step": 809
    },
    {
      "epoch": 0.29162916291629165,
      "grad_norm": 1.1496669054031372,
      "learning_rate": 0.00016405786765636514,
      "loss": 4.6257,
      "step": 810
    },
    {
      "epoch": 0.291989198919892,
      "grad_norm": 1.021824836730957,
      "learning_rate": 0.00016396936145035812,
      "loss": 4.8279,
      "step": 811
    },
    {
      "epoch": 0.29234923492349235,
      "grad_norm": 0.9202598333358765,
      "learning_rate": 0.00016388077034557355,
      "loss": 4.6532,
      "step": 812
    },
    {
      "epoch": 0.2927092709270927,
      "grad_norm": 1.1226942539215088,
      "learning_rate": 0.00016379209445958762,
      "loss": 4.8354,
      "step": 813
    },
    {
      "epoch": 0.29306930693069305,
      "grad_norm": 1.0273377895355225,
      "learning_rate": 0.00016370333391008913,
      "loss": 4.3541,
      "step": 814
    },
    {
      "epoch": 0.2934293429342934,
      "grad_norm": 1.2255477905273438,
      "learning_rate": 0.00016361448881487914,
      "loss": 4.7901,
      "step": 815
    },
    {
      "epoch": 0.2937893789378938,
      "grad_norm": 0.9498298764228821,
      "learning_rate": 0.00016352555929187096,
      "loss": 4.6441,
      "step": 816
    },
    {
      "epoch": 0.29414941494149416,
      "grad_norm": 1.0662606954574585,
      "learning_rate": 0.00016343654545909007,
      "loss": 4.9122,
      "step": 817
    },
    {
      "epoch": 0.2945094509450945,
      "grad_norm": 1.0344738960266113,
      "learning_rate": 0.00016334744743467364,
      "loss": 4.6176,
      "step": 818
    },
    {
      "epoch": 0.29486948694869486,
      "grad_norm": 0.7476336359977722,
      "learning_rate": 0.00016325826533687072,
      "loss": 4.4825,
      "step": 819
    },
    {
      "epoch": 0.2952295229522952,
      "grad_norm": 1.0336343050003052,
      "learning_rate": 0.00016316899928404187,
      "loss": 5.072,
      "step": 820
    },
    {
      "epoch": 0.2955895589558956,
      "grad_norm": 1.3752336502075195,
      "learning_rate": 0.00016307964939465914,
      "loss": 4.9852,
      "step": 821
    },
    {
      "epoch": 0.29594959495949597,
      "grad_norm": 1.2596033811569214,
      "learning_rate": 0.00016299021578730579,
      "loss": 4.9698,
      "step": 822
    },
    {
      "epoch": 0.2963096309630963,
      "grad_norm": 1.857324481010437,
      "learning_rate": 0.0001629006985806761,
      "loss": 5.2779,
      "step": 823
    },
    {
      "epoch": 0.29666966696669667,
      "grad_norm": 1.6580755710601807,
      "learning_rate": 0.0001628110978935756,
      "loss": 5.1542,
      "step": 824
    },
    {
      "epoch": 0.297029702970297,
      "grad_norm": 2.069047212600708,
      "learning_rate": 0.00016272141384492025,
      "loss": 5.4598,
      "step": 825
    },
    {
      "epoch": 0.29738973897389737,
      "grad_norm": 1.1856191158294678,
      "learning_rate": 0.00016263164655373692,
      "loss": 4.5994,
      "step": 826
    },
    {
      "epoch": 0.29774977497749777,
      "grad_norm": 1.4068639278411865,
      "learning_rate": 0.00016254179613916278,
      "loss": 4.6774,
      "step": 827
    },
    {
      "epoch": 0.2981098109810981,
      "grad_norm": 1.251482367515564,
      "learning_rate": 0.00016245186272044544,
      "loss": 5.0974,
      "step": 828
    },
    {
      "epoch": 0.2984698469846985,
      "grad_norm": 1.6903501749038696,
      "learning_rate": 0.0001623618464169426,
      "loss": 4.8803,
      "step": 829
    },
    {
      "epoch": 0.2988298829882988,
      "grad_norm": 0.92371666431427,
      "learning_rate": 0.000162271747348122,
      "loss": 4.851,
      "step": 830
    },
    {
      "epoch": 0.2991899189918992,
      "grad_norm": 1.2435139417648315,
      "learning_rate": 0.0001621815656335612,
      "loss": 4.5808,
      "step": 831
    },
    {
      "epoch": 0.2995499549954995,
      "grad_norm": 1.1530983448028564,
      "learning_rate": 0.00016209130139294744,
      "loss": 4.5771,
      "step": 832
    },
    {
      "epoch": 0.29990999099909993,
      "grad_norm": 1.008490800857544,
      "learning_rate": 0.00016200095474607753,
      "loss": 5.1045,
      "step": 833
    },
    {
      "epoch": 0.3002700270027003,
      "grad_norm": 1.6871010065078735,
      "learning_rate": 0.0001619105258128576,
      "loss": 4.7034,
      "step": 834
    },
    {
      "epoch": 0.30063006300630063,
      "grad_norm": 1.0384951829910278,
      "learning_rate": 0.00016182001471330302,
      "loss": 4.8448,
      "step": 835
    },
    {
      "epoch": 0.300990099009901,
      "grad_norm": 1.4010770320892334,
      "learning_rate": 0.0001617294215675382,
      "loss": 4.8944,
      "step": 836
    },
    {
      "epoch": 0.30135013501350133,
      "grad_norm": 1.1556458473205566,
      "learning_rate": 0.00016163874649579647,
      "loss": 4.5784,
      "step": 837
    },
    {
      "epoch": 0.30171017101710174,
      "grad_norm": 0.8926945924758911,
      "learning_rate": 0.00016154798961841977,
      "loss": 4.5535,
      "step": 838
    },
    {
      "epoch": 0.3020702070207021,
      "grad_norm": 1.1621904373168945,
      "learning_rate": 0.0001614571510558588,
      "loss": 4.8086,
      "step": 839
    },
    {
      "epoch": 0.30243024302430244,
      "grad_norm": 1.0140894651412964,
      "learning_rate": 0.00016136623092867248,
      "loss": 4.6833,
      "step": 840
    },
    {
      "epoch": 0.3027902790279028,
      "grad_norm": 0.8983398079872131,
      "learning_rate": 0.00016127522935752814,
      "loss": 4.7961,
      "step": 841
    },
    {
      "epoch": 0.30315031503150314,
      "grad_norm": 0.8260481357574463,
      "learning_rate": 0.0001611841464632011,
      "loss": 4.5209,
      "step": 842
    },
    {
      "epoch": 0.3035103510351035,
      "grad_norm": 0.8811922073364258,
      "learning_rate": 0.0001610929823665747,
      "loss": 5.0509,
      "step": 843
    },
    {
      "epoch": 0.3038703870387039,
      "grad_norm": 1.1481789350509644,
      "learning_rate": 0.00016100173718863986,
      "loss": 4.8124,
      "step": 844
    },
    {
      "epoch": 0.30423042304230424,
      "grad_norm": 0.8910513520240784,
      "learning_rate": 0.0001609104110504954,
      "loss": 4.7773,
      "step": 845
    },
    {
      "epoch": 0.3045904590459046,
      "grad_norm": 0.9736277461051941,
      "learning_rate": 0.00016081900407334732,
      "loss": 4.6872,
      "step": 846
    },
    {
      "epoch": 0.30495049504950494,
      "grad_norm": 1.2259780168533325,
      "learning_rate": 0.00016072751637850904,
      "loss": 5.3895,
      "step": 847
    },
    {
      "epoch": 0.3053105310531053,
      "grad_norm": 1.333270788192749,
      "learning_rate": 0.00016063594808740113,
      "loss": 5.3089,
      "step": 848
    },
    {
      "epoch": 0.30567056705670564,
      "grad_norm": 1.8068937063217163,
      "learning_rate": 0.00016054429932155104,
      "loss": 5.1097,
      "step": 849
    },
    {
      "epoch": 0.30603060306030605,
      "grad_norm": 2.003225326538086,
      "learning_rate": 0.00016045257020259304,
      "loss": 5.2578,
      "step": 850
    },
    {
      "epoch": 0.3063906390639064,
      "grad_norm": 2.1421890258789062,
      "learning_rate": 0.00016036076085226814,
      "loss": 4.633,
      "step": 851
    },
    {
      "epoch": 0.30675067506750675,
      "grad_norm": 1.4106087684631348,
      "learning_rate": 0.00016026887139242372,
      "loss": 5.0115,
      "step": 852
    },
    {
      "epoch": 0.3071107110711071,
      "grad_norm": 1.3179970979690552,
      "learning_rate": 0.00016017690194501351,
      "loss": 5.0523,
      "step": 853
    },
    {
      "epoch": 0.30747074707470745,
      "grad_norm": 1.1221106052398682,
      "learning_rate": 0.00016008485263209742,
      "loss": 4.469,
      "step": 854
    },
    {
      "epoch": 0.30783078307830786,
      "grad_norm": 0.877373456954956,
      "learning_rate": 0.00015999272357584133,
      "loss": 4.6435,
      "step": 855
    },
    {
      "epoch": 0.3081908190819082,
      "grad_norm": 1.2408608198165894,
      "learning_rate": 0.000159900514898517,
      "loss": 4.8264,
      "step": 856
    },
    {
      "epoch": 0.30855085508550856,
      "grad_norm": 1.0822373628616333,
      "learning_rate": 0.0001598082267225018,
      "loss": 5.0694,
      "step": 857
    },
    {
      "epoch": 0.3089108910891089,
      "grad_norm": 1.031474232673645,
      "learning_rate": 0.00015971585917027862,
      "loss": 4.6595,
      "step": 858
    },
    {
      "epoch": 0.30927092709270926,
      "grad_norm": 1.0592454671859741,
      "learning_rate": 0.00015962341236443574,
      "loss": 4.5791,
      "step": 859
    },
    {
      "epoch": 0.3096309630963096,
      "grad_norm": 0.7305690050125122,
      "learning_rate": 0.0001595308864276666,
      "loss": 4.9633,
      "step": 860
    },
    {
      "epoch": 0.30999099909991,
      "grad_norm": 1.120477318763733,
      "learning_rate": 0.00015943828148276966,
      "loss": 4.7703,
      "step": 861
    },
    {
      "epoch": 0.31035103510351036,
      "grad_norm": 1.1676793098449707,
      "learning_rate": 0.0001593455976526482,
      "loss": 4.6116,
      "step": 862
    },
    {
      "epoch": 0.3107110711071107,
      "grad_norm": 1.009584665298462,
      "learning_rate": 0.0001592528350603103,
      "loss": 5.0741,
      "step": 863
    },
    {
      "epoch": 0.31107110711071106,
      "grad_norm": 1.012658953666687,
      "learning_rate": 0.0001591599938288684,
      "loss": 4.7777,
      "step": 864
    },
    {
      "epoch": 0.3114311431143114,
      "grad_norm": 0.9617002010345459,
      "learning_rate": 0.00015906707408153947,
      "loss": 4.5044,
      "step": 865
    },
    {
      "epoch": 0.31179117911791177,
      "grad_norm": 1.048292875289917,
      "learning_rate": 0.00015897407594164467,
      "loss": 4.495,
      "step": 866
    },
    {
      "epoch": 0.31215121512151217,
      "grad_norm": 0.9742674827575684,
      "learning_rate": 0.00015888099953260905,
      "loss": 4.7548,
      "step": 867
    },
    {
      "epoch": 0.3125112511251125,
      "grad_norm": 1.1329879760742188,
      "learning_rate": 0.00015878784497796176,
      "loss": 4.8652,
      "step": 868
    },
    {
      "epoch": 0.31287128712871287,
      "grad_norm": 0.9601360559463501,
      "learning_rate": 0.0001586946124013354,
      "loss": 4.6138,
      "step": 869
    },
    {
      "epoch": 0.3132313231323132,
      "grad_norm": 1.0140951871871948,
      "learning_rate": 0.00015860130192646646,
      "loss": 4.6407,
      "step": 870
    },
    {
      "epoch": 0.31359135913591357,
      "grad_norm": 1.9553427696228027,
      "learning_rate": 0.00015850791367719443,
      "loss": 5.0412,
      "step": 871
    },
    {
      "epoch": 0.313951395139514,
      "grad_norm": 0.8475087285041809,
      "learning_rate": 0.0001584144477774623,
      "loss": 4.6872,
      "step": 872
    },
    {
      "epoch": 0.31431143114311433,
      "grad_norm": 1.4645193815231323,
      "learning_rate": 0.00015832090435131604,
      "loss": 5.0024,
      "step": 873
    },
    {
      "epoch": 0.3146714671467147,
      "grad_norm": 1.4982762336730957,
      "learning_rate": 0.00015822728352290447,
      "loss": 5.1353,
      "step": 874
    },
    {
      "epoch": 0.31503150315031503,
      "grad_norm": 1.3741843700408936,
      "learning_rate": 0.00015813358541647915,
      "loss": 5.2968,
      "step": 875
    },
    {
      "epoch": 0.3153915391539154,
      "grad_norm": 3.021010398864746,
      "learning_rate": 0.0001580398101563943,
      "loss": 5.2408,
      "step": 876
    },
    {
      "epoch": 0.31575157515751573,
      "grad_norm": 1.1927891969680786,
      "learning_rate": 0.00015794595786710632,
      "loss": 5.0367,
      "step": 877
    },
    {
      "epoch": 0.31611161116111614,
      "grad_norm": 0.9293290376663208,
      "learning_rate": 0.00015785202867317407,
      "loss": 4.8202,
      "step": 878
    },
    {
      "epoch": 0.3164716471647165,
      "grad_norm": 0.9863614439964294,
      "learning_rate": 0.00015775802269925836,
      "loss": 4.5804,
      "step": 879
    },
    {
      "epoch": 0.31683168316831684,
      "grad_norm": 1.4362967014312744,
      "learning_rate": 0.0001576639400701219,
      "loss": 4.6306,
      "step": 880
    },
    {
      "epoch": 0.3171917191719172,
      "grad_norm": 1.408553123474121,
      "learning_rate": 0.0001575697809106292,
      "loss": 5.0018,
      "step": 881
    },
    {
      "epoch": 0.31755175517551754,
      "grad_norm": 0.8808412551879883,
      "learning_rate": 0.00015747554534574626,
      "loss": 4.8715,
      "step": 882
    },
    {
      "epoch": 0.3179117911791179,
      "grad_norm": 1.1640198230743408,
      "learning_rate": 0.0001573812335005405,
      "loss": 4.8324,
      "step": 883
    },
    {
      "epoch": 0.3182718271827183,
      "grad_norm": 1.0969043970108032,
      "learning_rate": 0.00015728684550018064,
      "loss": 4.739,
      "step": 884
    },
    {
      "epoch": 0.31863186318631864,
      "grad_norm": 1.165368676185608,
      "learning_rate": 0.00015719238146993646,
      "loss": 4.7556,
      "step": 885
    },
    {
      "epoch": 0.318991899189919,
      "grad_norm": 1.1786396503448486,
      "learning_rate": 0.00015709784153517851,
      "loss": 4.7313,
      "step": 886
    },
    {
      "epoch": 0.31935193519351934,
      "grad_norm": 1.2101906538009644,
      "learning_rate": 0.00015700322582137827,
      "loss": 4.8052,
      "step": 887
    },
    {
      "epoch": 0.3197119711971197,
      "grad_norm": 1.2660850286483765,
      "learning_rate": 0.0001569085344541077,
      "loss": 5.0038,
      "step": 888
    },
    {
      "epoch": 0.3200720072007201,
      "grad_norm": 1.249326229095459,
      "learning_rate": 0.00015681376755903912,
      "loss": 5.0246,
      "step": 889
    },
    {
      "epoch": 0.32043204320432045,
      "grad_norm": 1.0174130201339722,
      "learning_rate": 0.00015671892526194516,
      "loss": 4.93,
      "step": 890
    },
    {
      "epoch": 0.3207920792079208,
      "grad_norm": 0.8214113712310791,
      "learning_rate": 0.00015662400768869854,
      "loss": 4.8746,
      "step": 891
    },
    {
      "epoch": 0.32115211521152115,
      "grad_norm": 0.9143441319465637,
      "learning_rate": 0.0001565290149652718,
      "loss": 4.8004,
      "step": 892
    },
    {
      "epoch": 0.3215121512151215,
      "grad_norm": 1.020521879196167,
      "learning_rate": 0.0001564339472177373,
      "loss": 4.832,
      "step": 893
    },
    {
      "epoch": 0.32187218721872185,
      "grad_norm": 0.85971999168396,
      "learning_rate": 0.00015633880457226692,
      "loss": 4.8473,
      "step": 894
    },
    {
      "epoch": 0.32223222322232226,
      "grad_norm": 1.136918306350708,
      "learning_rate": 0.00015624358715513192,
      "loss": 4.6338,
      "step": 895
    },
    {
      "epoch": 0.3225922592259226,
      "grad_norm": 1.1943031549453735,
      "learning_rate": 0.0001561482950927029,
      "loss": 5.1376,
      "step": 896
    },
    {
      "epoch": 0.32295229522952296,
      "grad_norm": 1.0146737098693848,
      "learning_rate": 0.00015605292851144942,
      "loss": 4.9343,
      "step": 897
    },
    {
      "epoch": 0.3233123312331233,
      "grad_norm": 1.2824499607086182,
      "learning_rate": 0.00015595748753793998,
      "loss": 4.9354,
      "step": 898
    },
    {
      "epoch": 0.32367236723672366,
      "grad_norm": 1.8544282913208008,
      "learning_rate": 0.00015586197229884184,
      "loss": 5.2082,
      "step": 899
    },
    {
      "epoch": 0.324032403240324,
      "grad_norm": 2.0411593914031982,
      "learning_rate": 0.00015576638292092077,
      "loss": 5.3725,
      "step": 900
    },
    {
      "epoch": 0.3243924392439244,
      "grad_norm": 1.9044406414031982,
      "learning_rate": 0.00015567071953104096,
      "loss": 5.0433,
      "step": 901
    },
    {
      "epoch": 0.32475247524752476,
      "grad_norm": 1.3686928749084473,
      "learning_rate": 0.00015557498225616487,
      "loss": 5.0233,
      "step": 902
    },
    {
      "epoch": 0.3251125112511251,
      "grad_norm": 1.0368106365203857,
      "learning_rate": 0.0001554791712233529,
      "loss": 4.6988,
      "step": 903
    },
    {
      "epoch": 0.32547254725472546,
      "grad_norm": 0.9846864938735962,
      "learning_rate": 0.00015538328655976353,
      "loss": 4.6639,
      "step": 904
    },
    {
      "epoch": 0.3258325832583258,
      "grad_norm": 1.0573269128799438,
      "learning_rate": 0.00015528732839265272,
      "loss": 4.8586,
      "step": 905
    },
    {
      "epoch": 0.3261926192619262,
      "grad_norm": 0.842383623123169,
      "learning_rate": 0.0001551912968493742,
      "loss": 4.8161,
      "step": 906
    },
    {
      "epoch": 0.32655265526552657,
      "grad_norm": 1.0880528688430786,
      "learning_rate": 0.00015509519205737896,
      "loss": 4.9867,
      "step": 907
    },
    {
      "epoch": 0.3269126912691269,
      "grad_norm": 1.0716383457183838,
      "learning_rate": 0.0001549990141442153,
      "loss": 5.1915,
      "step": 908
    },
    {
      "epoch": 0.32727272727272727,
      "grad_norm": 1.0352778434753418,
      "learning_rate": 0.00015490276323752838,
      "loss": 4.745,
      "step": 909
    },
    {
      "epoch": 0.3276327632763276,
      "grad_norm": 1.0129772424697876,
      "learning_rate": 0.00015480643946506043,
      "loss": 4.4961,
      "step": 910
    },
    {
      "epoch": 0.32799279927992797,
      "grad_norm": 1.0241246223449707,
      "learning_rate": 0.00015471004295465035,
      "loss": 4.7148,
      "step": 911
    },
    {
      "epoch": 0.3283528352835284,
      "grad_norm": 1.0528348684310913,
      "learning_rate": 0.0001546135738342335,
      "loss": 4.8904,
      "step": 912
    },
    {
      "epoch": 0.3287128712871287,
      "grad_norm": 0.90036940574646,
      "learning_rate": 0.00015451703223184166,
      "loss": 4.6939,
      "step": 913
    },
    {
      "epoch": 0.3290729072907291,
      "grad_norm": 1.3356916904449463,
      "learning_rate": 0.00015442041827560274,
      "loss": 4.7737,
      "step": 914
    },
    {
      "epoch": 0.32943294329432943,
      "grad_norm": 1.0879907608032227,
      "learning_rate": 0.0001543237320937408,
      "loss": 4.7154,
      "step": 915
    },
    {
      "epoch": 0.3297929792979298,
      "grad_norm": 1.069511890411377,
      "learning_rate": 0.00015422697381457567,
      "loss": 4.7343,
      "step": 916
    },
    {
      "epoch": 0.33015301530153013,
      "grad_norm": 1.1529669761657715,
      "learning_rate": 0.00015413014356652286,
      "loss": 4.976,
      "step": 917
    },
    {
      "epoch": 0.33051305130513053,
      "grad_norm": 1.2753455638885498,
      "learning_rate": 0.00015403324147809344,
      "loss": 4.8575,
      "step": 918
    },
    {
      "epoch": 0.3308730873087309,
      "grad_norm": 0.9767019748687744,
      "learning_rate": 0.0001539362676778938,
      "loss": 4.8543,
      "step": 919
    },
    {
      "epoch": 0.33123312331233123,
      "grad_norm": 0.9553641080856323,
      "learning_rate": 0.00015383922229462549,
      "loss": 4.977,
      "step": 920
    },
    {
      "epoch": 0.3315931593159316,
      "grad_norm": 0.8495627641677856,
      "learning_rate": 0.0001537421054570851,
      "loss": 4.7638,
      "step": 921
    },
    {
      "epoch": 0.33195319531953194,
      "grad_norm": 0.8193251490592957,
      "learning_rate": 0.000153644917294164,
      "loss": 4.7902,
      "step": 922
    },
    {
      "epoch": 0.33231323132313234,
      "grad_norm": 1.0532013177871704,
      "learning_rate": 0.00015354765793484834,
      "loss": 5.1835,
      "step": 923
    },
    {
      "epoch": 0.3326732673267327,
      "grad_norm": 1.482528805732727,
      "learning_rate": 0.00015345032750821856,
      "loss": 5.5671,
      "step": 924
    },
    {
      "epoch": 0.33303330333033304,
      "grad_norm": 2.2630598545074463,
      "learning_rate": 0.00015335292614344963,
      "loss": 5.4195,
      "step": 925
    },
    {
      "epoch": 0.3333933393339334,
      "grad_norm": 1.2598108053207397,
      "learning_rate": 0.0001532554539698105,
      "loss": 4.7357,
      "step": 926
    },
    {
      "epoch": 0.33375337533753374,
      "grad_norm": 1.5963959693908691,
      "learning_rate": 0.00015315791111666425,
      "loss": 4.826,
      "step": 927
    },
    {
      "epoch": 0.3341134113411341,
      "grad_norm": 1.3899768590927124,
      "learning_rate": 0.0001530602977134676,
      "loss": 4.6758,
      "step": 928
    },
    {
      "epoch": 0.3344734473447345,
      "grad_norm": 1.2996914386749268,
      "learning_rate": 0.00015296261388977108,
      "loss": 5.0804,
      "step": 929
    },
    {
      "epoch": 0.33483348334833485,
      "grad_norm": 0.9530441164970398,
      "learning_rate": 0.00015286485977521845,
      "loss": 4.7637,
      "step": 930
    },
    {
      "epoch": 0.3351935193519352,
      "grad_norm": 0.8917075395584106,
      "learning_rate": 0.000152767035499547,
      "loss": 4.7331,
      "step": 931
    },
    {
      "epoch": 0.33555355535553555,
      "grad_norm": 1.269867181777954,
      "learning_rate": 0.000152669141192587,
      "loss": 5.0558,
      "step": 932
    },
    {
      "epoch": 0.3359135913591359,
      "grad_norm": 1.514622449874878,
      "learning_rate": 0.00015257117698426172,
      "loss": 4.9906,
      "step": 933
    },
    {
      "epoch": 0.33627362736273625,
      "grad_norm": 1.0862271785736084,
      "learning_rate": 0.00015247314300458712,
      "loss": 4.7081,
      "step": 934
    },
    {
      "epoch": 0.33663366336633666,
      "grad_norm": 0.8043900728225708,
      "learning_rate": 0.00015237503938367186,
      "loss": 4.9713,
      "step": 935
    },
    {
      "epoch": 0.336993699369937,
      "grad_norm": 0.8863739371299744,
      "learning_rate": 0.00015227686625171697,
      "loss": 5.0078,
      "step": 936
    },
    {
      "epoch": 0.33735373537353736,
      "grad_norm": 0.8409878611564636,
      "learning_rate": 0.00015217862373901575,
      "loss": 4.7633,
      "step": 937
    },
    {
      "epoch": 0.3377137713771377,
      "grad_norm": 0.9702298045158386,
      "learning_rate": 0.00015208031197595356,
      "loss": 4.404,
      "step": 938
    },
    {
      "epoch": 0.33807380738073806,
      "grad_norm": 0.8834205269813538,
      "learning_rate": 0.0001519819310930077,
      "loss": 4.7584,
      "step": 939
    },
    {
      "epoch": 0.3384338433843384,
      "grad_norm": 1.1648602485656738,
      "learning_rate": 0.00015188348122074715,
      "loss": 4.8814,
      "step": 940
    },
    {
      "epoch": 0.3387938793879388,
      "grad_norm": 0.9649667143821716,
      "learning_rate": 0.00015178496248983254,
      "loss": 4.6399,
      "step": 941
    },
    {
      "epoch": 0.33915391539153916,
      "grad_norm": 0.9292764067649841,
      "learning_rate": 0.00015168637503101584,
      "loss": 4.7679,
      "step": 942
    },
    {
      "epoch": 0.3395139513951395,
      "grad_norm": 0.8649988174438477,
      "learning_rate": 0.0001515877189751402,
      "loss": 4.7873,
      "step": 943
    },
    {
      "epoch": 0.33987398739873986,
      "grad_norm": 0.9205020666122437,
      "learning_rate": 0.00015148899445313981,
      "loss": 4.8269,
      "step": 944
    },
    {
      "epoch": 0.3402340234023402,
      "grad_norm": 1.2201025485992432,
      "learning_rate": 0.00015139020159603983,
      "loss": 4.7058,
      "step": 945
    },
    {
      "epoch": 0.3405940594059406,
      "grad_norm": 1.0716661214828491,
      "learning_rate": 0.00015129134053495604,
      "loss": 4.7139,
      "step": 946
    },
    {
      "epoch": 0.34095409540954097,
      "grad_norm": 0.9085667729377747,
      "learning_rate": 0.00015119241140109467,
      "loss": 4.5195,
      "step": 947
    },
    {
      "epoch": 0.3413141314131413,
      "grad_norm": 0.9523339867591858,
      "learning_rate": 0.0001510934143257524,
      "loss": 5.1915,
      "step": 948
    },
    {
      "epoch": 0.34167416741674167,
      "grad_norm": 1.3508998155593872,
      "learning_rate": 0.00015099434944031606,
      "loss": 5.2323,
      "step": 949
    },
    {
      "epoch": 0.342034203420342,
      "grad_norm": 1.9917466640472412,
      "learning_rate": 0.00015089521687626243,
      "loss": 5.4336,
      "step": 950
    },
    {
      "epoch": 0.34239423942394237,
      "grad_norm": 1.3084412813186646,
      "learning_rate": 0.0001507960167651582,
      "loss": 4.7862,
      "step": 951
    },
    {
      "epoch": 0.3427542754275428,
      "grad_norm": 1.4469012022018433,
      "learning_rate": 0.0001506967492386596,
      "loss": 4.9395,
      "step": 952
    },
    {
      "epoch": 0.3431143114311431,
      "grad_norm": 0.9551678895950317,
      "learning_rate": 0.0001505974144285124,
      "loss": 5.1265,
      "step": 953
    },
    {
      "epoch": 0.3434743474347435,
      "grad_norm": 1.559654951095581,
      "learning_rate": 0.00015049801246655163,
      "loss": 4.7992,
      "step": 954
    },
    {
      "epoch": 0.3438343834383438,
      "grad_norm": 1.190437912940979,
      "learning_rate": 0.0001503985434847015,
      "loss": 5.1382,
      "step": 955
    },
    {
      "epoch": 0.3441944194419442,
      "grad_norm": 0.8244277238845825,
      "learning_rate": 0.00015029900761497506,
      "loss": 4.5185,
      "step": 956
    },
    {
      "epoch": 0.3445544554455445,
      "grad_norm": 0.788070797920227,
      "learning_rate": 0.00015019940498947428,
      "loss": 4.7384,
      "step": 957
    },
    {
      "epoch": 0.34491449144914493,
      "grad_norm": 0.7468547224998474,
      "learning_rate": 0.00015009973574038962,
      "loss": 4.8971,
      "step": 958
    },
    {
      "epoch": 0.3452745274527453,
      "grad_norm": 0.8603270649909973,
      "learning_rate": 0.00015000000000000001,
      "loss": 4.3912,
      "step": 959
    },
    {
      "epoch": 0.34563456345634563,
      "grad_norm": 0.9192284345626831,
      "learning_rate": 0.00014990019790067256,
      "loss": 4.482,
      "step": 960
    },
    {
      "epoch": 0.345994599459946,
      "grad_norm": 0.8669421672821045,
      "learning_rate": 0.00014980032957486255,
      "loss": 4.5312,
      "step": 961
    },
    {
      "epoch": 0.34635463546354633,
      "grad_norm": 0.9489680528640747,
      "learning_rate": 0.00014970039515511304,
      "loss": 4.6693,
      "step": 962
    },
    {
      "epoch": 0.34671467146714674,
      "grad_norm": 1.0888569355010986,
      "learning_rate": 0.00014960039477405487,
      "loss": 4.869,
      "step": 963
    },
    {
      "epoch": 0.3470747074707471,
      "grad_norm": 1.0825499296188354,
      "learning_rate": 0.0001495003285644065,
      "loss": 4.5917,
      "step": 964
    },
    {
      "epoch": 0.34743474347434744,
      "grad_norm": 1.1033159494400024,
      "learning_rate": 0.0001494001966589736,
      "loss": 5.0973,
      "step": 965
    },
    {
      "epoch": 0.3477947794779478,
      "grad_norm": 0.7403422594070435,
      "learning_rate": 0.00014929999919064917,
      "loss": 4.7091,
      "step": 966
    },
    {
      "epoch": 0.34815481548154814,
      "grad_norm": 0.9281265735626221,
      "learning_rate": 0.00014919973629241314,
      "loss": 4.5066,
      "step": 967
    },
    {
      "epoch": 0.3485148514851485,
      "grad_norm": 1.0097898244857788,
      "learning_rate": 0.00014909940809733222,
      "loss": 4.8756,
      "step": 968
    },
    {
      "epoch": 0.3488748874887489,
      "grad_norm": 1.0524588823318481,
      "learning_rate": 0.00014899901473855998,
      "loss": 4.715,
      "step": 969
    },
    {
      "epoch": 0.34923492349234925,
      "grad_norm": 1.0118778944015503,
      "learning_rate": 0.00014889855634933627,
      "loss": 4.843,
      "step": 970
    },
    {
      "epoch": 0.3495949594959496,
      "grad_norm": 1.1899731159210205,
      "learning_rate": 0.00014879803306298736,
      "loss": 4.8947,
      "step": 971
    },
    {
      "epoch": 0.34995499549954995,
      "grad_norm": 1.4655492305755615,
      "learning_rate": 0.00014869744501292561,
      "loss": 5.0872,
      "step": 972
    },
    {
      "epoch": 0.3503150315031503,
      "grad_norm": 1.59488844871521,
      "learning_rate": 0.0001485967923326494,
      "loss": 5.13,
      "step": 973
    },
    {
      "epoch": 0.35067506750675065,
      "grad_norm": 2.68699049949646,
      "learning_rate": 0.00014849607515574276,
      "loss": 5.0265,
      "step": 974
    },
    {
      "epoch": 0.35103510351035105,
      "grad_norm": 1.9067751169204712,
      "learning_rate": 0.00014839529361587538,
      "loss": 5.4959,
      "step": 975
    },
    {
      "epoch": 0.3513951395139514,
      "grad_norm": 1.8809281587600708,
      "learning_rate": 0.00014829444784680244,
      "loss": 5.3514,
      "step": 976
    },
    {
      "epoch": 0.35175517551755175,
      "grad_norm": 1.4820325374603271,
      "learning_rate": 0.00014819353798236427,
      "loss": 4.6617,
      "step": 977
    },
    {
      "epoch": 0.3521152115211521,
      "grad_norm": 1.1088730096817017,
      "learning_rate": 0.00014809256415648626,
      "loss": 4.8845,
      "step": 978
    },
    {
      "epoch": 0.35247524752475246,
      "grad_norm": 1.4352535009384155,
      "learning_rate": 0.00014799152650317877,
      "loss": 4.7856,
      "step": 979
    },
    {
      "epoch": 0.35283528352835286,
      "grad_norm": 0.9059150218963623,
      "learning_rate": 0.00014789042515653687,
      "loss": 4.8338,
      "step": 980
    },
    {
      "epoch": 0.3531953195319532,
      "grad_norm": 1.1553621292114258,
      "learning_rate": 0.00014778926025074002,
      "loss": 4.8173,
      "step": 981
    },
    {
      "epoch": 0.35355535553555356,
      "grad_norm": 1.1156034469604492,
      "learning_rate": 0.00014768803192005223,
      "loss": 4.6934,
      "step": 982
    },
    {
      "epoch": 0.3539153915391539,
      "grad_norm": 0.906928300857544,
      "learning_rate": 0.00014758674029882152,
      "loss": 4.7319,
      "step": 983
    },
    {
      "epoch": 0.35427542754275426,
      "grad_norm": 1.2109665870666504,
      "learning_rate": 0.00014748538552148002,
      "loss": 4.7891,
      "step": 984
    },
    {
      "epoch": 0.3546354635463546,
      "grad_norm": 1.0026674270629883,
      "learning_rate": 0.0001473839677225436,
      "loss": 4.6982,
      "step": 985
    },
    {
      "epoch": 0.354995499549955,
      "grad_norm": 0.9090620279312134,
      "learning_rate": 0.00014728248703661182,
      "loss": 4.5659,
      "step": 986
    },
    {
      "epoch": 0.35535553555355537,
      "grad_norm": 1.0185548067092896,
      "learning_rate": 0.00014718094359836772,
      "loss": 4.7228,
      "step": 987
    },
    {
      "epoch": 0.3557155715571557,
      "grad_norm": 0.7778179049491882,
      "learning_rate": 0.00014707933754257754,
      "loss": 4.4779,
      "step": 988
    },
    {
      "epoch": 0.35607560756075607,
      "grad_norm": 1.0444481372833252,
      "learning_rate": 0.00014697766900409074,
      "loss": 4.9622,
      "step": 989
    },
    {
      "epoch": 0.3564356435643564,
      "grad_norm": 1.5330283641815186,
      "learning_rate": 0.00014687593811783963,
      "loss": 4.8482,
      "step": 990
    },
    {
      "epoch": 0.35679567956795677,
      "grad_norm": 0.9238508343696594,
      "learning_rate": 0.00014677414501883926,
      "loss": 4.9441,
      "step": 991
    },
    {
      "epoch": 0.3571557155715572,
      "grad_norm": 1.4587763547897339,
      "learning_rate": 0.0001466722898421873,
      "loss": 4.8419,
      "step": 992
    },
    {
      "epoch": 0.3575157515751575,
      "grad_norm": 1.1518075466156006,
      "learning_rate": 0.00014657037272306368,
      "loss": 4.6916,
      "step": 993
    },
    {
      "epoch": 0.3578757875787579,
      "grad_norm": 0.799138069152832,
      "learning_rate": 0.00014646839379673076,
      "loss": 4.3815,
      "step": 994
    },
    {
      "epoch": 0.3582358235823582,
      "grad_norm": 1.213332176208496,
      "learning_rate": 0.00014636635319853275,
      "loss": 5.1047,
      "step": 995
    },
    {
      "epoch": 0.3585958595859586,
      "grad_norm": 1.3635562658309937,
      "learning_rate": 0.00014626425106389573,
      "loss": 5.2238,
      "step": 996
    },
    {
      "epoch": 0.358955895589559,
      "grad_norm": 0.9962284564971924,
      "learning_rate": 0.00014616208752832758,
      "loss": 5.0498,
      "step": 997
    },
    {
      "epoch": 0.35931593159315933,
      "grad_norm": 1.2514888048171997,
      "learning_rate": 0.00014605986272741748,
      "loss": 4.936,
      "step": 998
    },
    {
      "epoch": 0.3596759675967597,
      "grad_norm": 1.1103378534317017,
      "learning_rate": 0.00014595757679683607,
      "loss": 5.0447,
      "step": 999
    },
    {
      "epoch": 0.36003600360036003,
      "grad_norm": 2.1691062450408936,
      "learning_rate": 0.00014585522987233503,
      "loss": 5.491,
      "step": 1000
    },
    {
      "epoch": 0.3603960396039604,
      "grad_norm": 1.8741892576217651,
      "learning_rate": 0.00014575282208974702,
      "loss": 5.2204,
      "step": 1001
    },
    {
      "epoch": 0.36075607560756073,
      "grad_norm": 1.0809324979782104,
      "learning_rate": 0.0001456503535849855,
      "loss": 4.9838,
      "step": 1002
    },
    {
      "epoch": 0.36111611161116114,
      "grad_norm": 1.172060489654541,
      "learning_rate": 0.00014554782449404448,
      "loss": 4.7522,
      "step": 1003
    },
    {
      "epoch": 0.3614761476147615,
      "grad_norm": 0.8441461324691772,
      "learning_rate": 0.00014544523495299842,
      "loss": 4.9235,
      "step": 1004
    },
    {
      "epoch": 0.36183618361836184,
      "grad_norm": 1.1785805225372314,
      "learning_rate": 0.00014534258509800197,
      "loss": 4.7191,
      "step": 1005
    },
    {
      "epoch": 0.3621962196219622,
      "grad_norm": 2.189944267272949,
      "learning_rate": 0.00014523987506528978,
      "loss": 4.9356,
      "step": 1006
    },
    {
      "epoch": 0.36255625562556254,
      "grad_norm": 0.9100619554519653,
      "learning_rate": 0.00014513710499117647,
      "loss": 4.6031,
      "step": 1007
    },
    {
      "epoch": 0.3629162916291629,
      "grad_norm": 1.0797786712646484,
      "learning_rate": 0.0001450342750120563,
      "loss": 4.6115,
      "step": 1008
    },
    {
      "epoch": 0.3632763276327633,
      "grad_norm": 0.8818701505661011,
      "learning_rate": 0.00014493138526440303,
      "loss": 4.711,
      "step": 1009
    },
    {
      "epoch": 0.36363636363636365,
      "grad_norm": 1.0068962574005127,
      "learning_rate": 0.00014482843588476974,
      "loss": 4.9463,
      "step": 1010
    },
    {
      "epoch": 0.363996399639964,
      "grad_norm": 1.7812259197235107,
      "learning_rate": 0.00014472542700978867,
      "loss": 4.7244,
      "step": 1011
    },
    {
      "epoch": 0.36435643564356435,
      "grad_norm": 0.9703940153121948,
      "learning_rate": 0.00014462235877617098,
      "loss": 4.7569,
      "step": 1012
    },
    {
      "epoch": 0.3647164716471647,
      "grad_norm": 0.9530327320098877,
      "learning_rate": 0.0001445192313207067,
      "loss": 5.1042,
      "step": 1013
    },
    {
      "epoch": 0.3650765076507651,
      "grad_norm": 0.8761149048805237,
      "learning_rate": 0.00014441604478026437,
      "loss": 4.8209,
      "step": 1014
    },
    {
      "epoch": 0.36543654365436545,
      "grad_norm": 1.0026590824127197,
      "learning_rate": 0.00014431279929179097,
      "loss": 4.8765,
      "step": 1015
    },
    {
      "epoch": 0.3657965796579658,
      "grad_norm": 0.7755431532859802,
      "learning_rate": 0.00014420949499231172,
      "loss": 4.5195,
      "step": 1016
    },
    {
      "epoch": 0.36615661566156615,
      "grad_norm": 0.9436198472976685,
      "learning_rate": 0.00014410613201892985,
      "loss": 4.6982,
      "step": 1017
    },
    {
      "epoch": 0.3665166516651665,
      "grad_norm": 0.8173322081565857,
      "learning_rate": 0.00014400271050882653,
      "loss": 5.1097,
      "step": 1018
    },
    {
      "epoch": 0.36687668766876685,
      "grad_norm": 0.9280872344970703,
      "learning_rate": 0.00014389923059926062,
      "loss": 4.757,
      "step": 1019
    },
    {
      "epoch": 0.36723672367236726,
      "grad_norm": 0.9003520011901855,
      "learning_rate": 0.00014379569242756846,
      "loss": 4.9784,
      "step": 1020
    },
    {
      "epoch": 0.3675967596759676,
      "grad_norm": 1.3225197792053223,
      "learning_rate": 0.0001436920961311637,
      "loss": 5.1636,
      "step": 1021
    },
    {
      "epoch": 0.36795679567956796,
      "grad_norm": 0.940623939037323,
      "learning_rate": 0.00014358844184753712,
      "loss": 5.1865,
      "step": 1022
    },
    {
      "epoch": 0.3683168316831683,
      "grad_norm": 1.1208678483963013,
      "learning_rate": 0.0001434847297142565,
      "loss": 5.2052,
      "step": 1023
    },
    {
      "epoch": 0.36867686768676866,
      "grad_norm": 1.6092015504837036,
      "learning_rate": 0.00014338095986896637,
      "loss": 5.2666,
      "step": 1024
    },
    {
      "epoch": 0.369036903690369,
      "grad_norm": 1.844506025314331,
      "learning_rate": 0.0001432771324493879,
      "loss": 5.4748,
      "step": 1025
    },
    {
      "epoch": 0.3693969396939694,
      "grad_norm": 2.317615032196045,
      "learning_rate": 0.00014317324759331856,
      "loss": 4.6974,
      "step": 1026
    },
    {
      "epoch": 0.36975697569756977,
      "grad_norm": 0.9359394907951355,
      "learning_rate": 0.00014306930543863219,
      "loss": 4.7296,
      "step": 1027
    },
    {
      "epoch": 0.3701170117011701,
      "grad_norm": 0.8531212210655212,
      "learning_rate": 0.00014296530612327863,
      "loss": 4.6999,
      "step": 1028
    },
    {
      "epoch": 0.37047704770477047,
      "grad_norm": 0.7763692736625671,
      "learning_rate": 0.0001428612497852835,
      "loss": 5.0536,
      "step": 1029
    },
    {
      "epoch": 0.3708370837083708,
      "grad_norm": 0.7523413896560669,
      "learning_rate": 0.0001427571365627482,
      "loss": 4.5295,
      "step": 1030
    },
    {
      "epoch": 0.3711971197119712,
      "grad_norm": 1.2539907693862915,
      "learning_rate": 0.00014265296659384956,
      "loss": 4.887,
      "step": 1031
    },
    {
      "epoch": 0.3715571557155716,
      "grad_norm": 1.1317229270935059,
      "learning_rate": 0.00014254874001683976,
      "loss": 4.5146,
      "step": 1032
    },
    {
      "epoch": 0.3719171917191719,
      "grad_norm": 0.9427993893623352,
      "learning_rate": 0.0001424444569700461,
      "loss": 4.5106,
      "step": 1033
    },
    {
      "epoch": 0.3722772277227723,
      "grad_norm": 0.813029944896698,
      "learning_rate": 0.00014234011759187083,
      "loss": 4.8805,
      "step": 1034
    },
    {
      "epoch": 0.3726372637263726,
      "grad_norm": 0.7851507663726807,
      "learning_rate": 0.00014223572202079094,
      "loss": 5.1088,
      "step": 1035
    },
    {
      "epoch": 0.372997299729973,
      "grad_norm": 0.8256126046180725,
      "learning_rate": 0.00014213127039535803,
      "loss": 4.6876,
      "step": 1036
    },
    {
      "epoch": 0.3733573357335734,
      "grad_norm": 0.9763374924659729,
      "learning_rate": 0.00014202676285419812,
      "loss": 4.8456,
      "step": 1037
    },
    {
      "epoch": 0.37371737173717373,
      "grad_norm": 0.9133673310279846,
      "learning_rate": 0.0001419221995360113,
      "loss": 4.525,
      "step": 1038
    },
    {
      "epoch": 0.3740774077407741,
      "grad_norm": 0.7200229167938232,
      "learning_rate": 0.00014181758057957186,
      "loss": 4.6137,
      "step": 1039
    },
    {
      "epoch": 0.37443744374437443,
      "grad_norm": 0.5833539962768555,
      "learning_rate": 0.0001417129061237278,
      "loss": 4.6604,
      "step": 1040
    },
    {
      "epoch": 0.3747974797479748,
      "grad_norm": 0.9346949458122253,
      "learning_rate": 0.0001416081763074009,
      "loss": 4.8527,
      "step": 1041
    },
    {
      "epoch": 0.37515751575157513,
      "grad_norm": 0.7368828058242798,
      "learning_rate": 0.00014150339126958633,
      "loss": 4.932,
      "step": 1042
    },
    {
      "epoch": 0.37551755175517554,
      "grad_norm": 0.7255602478981018,
      "learning_rate": 0.00014139855114935252,
      "loss": 4.6075,
      "step": 1043
    },
    {
      "epoch": 0.3758775877587759,
      "grad_norm": 0.7426056861877441,
      "learning_rate": 0.00014129365608584108,
      "loss": 4.5646,
      "step": 1044
    },
    {
      "epoch": 0.37623762376237624,
      "grad_norm": 1.131618618965149,
      "learning_rate": 0.00014118870621826656,
      "loss": 4.9526,
      "step": 1045
    },
    {
      "epoch": 0.3765976597659766,
      "grad_norm": 0.8817772269248962,
      "learning_rate": 0.0001410837016859161,
      "loss": 4.5403,
      "step": 1046
    },
    {
      "epoch": 0.37695769576957694,
      "grad_norm": 0.8336977958679199,
      "learning_rate": 0.00014097864262814955,
      "loss": 5.1945,
      "step": 1047
    },
    {
      "epoch": 0.37731773177317735,
      "grad_norm": 1.1306034326553345,
      "learning_rate": 0.00014087352918439904,
      "loss": 5.2583,
      "step": 1048
    },
    {
      "epoch": 0.3776777677767777,
      "grad_norm": 1.4777275323867798,
      "learning_rate": 0.00014076836149416887,
      "loss": 5.2032,
      "step": 1049
    },
    {
      "epoch": 0.37803780378037805,
      "grad_norm": 1.7121351957321167,
      "learning_rate": 0.00014066313969703545,
      "loss": 5.3917,
      "step": 1050
    },
    {
      "epoch": 0.3783978397839784,
      "grad_norm": 1.3249011039733887,
      "learning_rate": 0.00014055786393264683,
      "loss": 4.5258,
      "step": 1051
    },
    {
      "epoch": 0.37875787578757875,
      "grad_norm": 1.185707688331604,
      "learning_rate": 0.0001404525343407228,
      "loss": 4.7997,
      "step": 1052
    },
    {
      "epoch": 0.3791179117911791,
      "grad_norm": 0.7875931262969971,
      "learning_rate": 0.00014034715106105456,
      "loss": 4.5115,
      "step": 1053
    },
    {
      "epoch": 0.3794779477947795,
      "grad_norm": 0.6152934432029724,
      "learning_rate": 0.00014024171423350455,
      "loss": 4.5271,
      "step": 1054
    },
    {
      "epoch": 0.37983798379837985,
      "grad_norm": 0.7910044193267822,
      "learning_rate": 0.00014013622399800627,
      "loss": 4.591,
      "step": 1055
    },
    {
      "epoch": 0.3801980198019802,
      "grad_norm": 1.1457709074020386,
      "learning_rate": 0.00014003068049456418,
      "loss": 4.5818,
      "step": 1056
    },
    {
      "epoch": 0.38055805580558055,
      "grad_norm": 0.8700783252716064,
      "learning_rate": 0.0001399250838632533,
      "loss": 4.5598,
      "step": 1057
    },
    {
      "epoch": 0.3809180918091809,
      "grad_norm": 0.9683236479759216,
      "learning_rate": 0.00013981943424421932,
      "loss": 4.9761,
      "step": 1058
    },
    {
      "epoch": 0.38127812781278125,
      "grad_norm": 0.8842438459396362,
      "learning_rate": 0.00013971373177767805,
      "loss": 4.878,
      "step": 1059
    },
    {
      "epoch": 0.38163816381638166,
      "grad_norm": 0.8185229301452637,
      "learning_rate": 0.0001396079766039157,
      "loss": 4.7094,
      "step": 1060
    },
    {
      "epoch": 0.381998199819982,
      "grad_norm": 1.1154496669769287,
      "learning_rate": 0.0001395021688632882,
      "loss": 4.9167,
      "step": 1061
    },
    {
      "epoch": 0.38235823582358236,
      "grad_norm": 1.122309923171997,
      "learning_rate": 0.00013939630869622133,
      "loss": 4.8653,
      "step": 1062
    },
    {
      "epoch": 0.3827182718271827,
      "grad_norm": 0.7007623910903931,
      "learning_rate": 0.00013929039624321053,
      "loss": 4.3717,
      "step": 1063
    },
    {
      "epoch": 0.38307830783078306,
      "grad_norm": 1.0007191896438599,
      "learning_rate": 0.00013918443164482046,
      "loss": 4.7034,
      "step": 1064
    },
    {
      "epoch": 0.38343834383438347,
      "grad_norm": 0.8400418758392334,
      "learning_rate": 0.00013907841504168516,
      "loss": 5.0231,
      "step": 1065
    },
    {
      "epoch": 0.3837983798379838,
      "grad_norm": 0.8117850422859192,
      "learning_rate": 0.00013897234657450757,
      "loss": 4.716,
      "step": 1066
    },
    {
      "epoch": 0.38415841584158417,
      "grad_norm": 0.9142310619354248,
      "learning_rate": 0.00013886622638405952,
      "loss": 4.3933,
      "step": 1067
    },
    {
      "epoch": 0.3845184518451845,
      "grad_norm": 0.8659790754318237,
      "learning_rate": 0.0001387600546111815,
      "loss": 5.0209,
      "step": 1068
    },
    {
      "epoch": 0.38487848784878487,
      "grad_norm": 1.464654803276062,
      "learning_rate": 0.0001386538313967824,
      "loss": 4.8429,
      "step": 1069
    },
    {
      "epoch": 0.3852385238523852,
      "grad_norm": 1.0455034971237183,
      "learning_rate": 0.0001385475568818394,
      "loss": 4.9994,
      "step": 1070
    },
    {
      "epoch": 0.3855985598559856,
      "grad_norm": 1.0117931365966797,
      "learning_rate": 0.00013844123120739782,
      "loss": 4.7524,
      "step": 1071
    },
    {
      "epoch": 0.385958595859586,
      "grad_norm": 1.0140798091888428,
      "learning_rate": 0.0001383348545145708,
      "loss": 4.5978,
      "step": 1072
    },
    {
      "epoch": 0.3863186318631863,
      "grad_norm": 0.9929814338684082,
      "learning_rate": 0.00013822842694453924,
      "loss": 5.0697,
      "step": 1073
    },
    {
      "epoch": 0.3866786678667867,
      "grad_norm": 1.8501864671707153,
      "learning_rate": 0.00013812194863855156,
      "loss": 5.4553,
      "step": 1074
    },
    {
      "epoch": 0.387038703870387,
      "grad_norm": 1.3297158479690552,
      "learning_rate": 0.0001380154197379235,
      "loss": 5.1434,
      "step": 1075
    },
    {
      "epoch": 0.3873987398739874,
      "grad_norm": 1.3064671754837036,
      "learning_rate": 0.00013790884038403795,
      "loss": 4.7108,
      "step": 1076
    },
    {
      "epoch": 0.3877587758775878,
      "grad_norm": 1.8386310338974,
      "learning_rate": 0.00013780221071834476,
      "loss": 4.6172,
      "step": 1077
    },
    {
      "epoch": 0.38811881188118813,
      "grad_norm": 1.4296783208847046,
      "learning_rate": 0.00013769553088236055,
      "loss": 5.0008,
      "step": 1078
    },
    {
      "epoch": 0.3884788478847885,
      "grad_norm": 1.3294901847839355,
      "learning_rate": 0.0001375888010176686,
      "loss": 4.8293,
      "step": 1079
    },
    {
      "epoch": 0.38883888388838883,
      "grad_norm": 0.9052990674972534,
      "learning_rate": 0.0001374820212659184,
      "loss": 5.0959,
      "step": 1080
    },
    {
      "epoch": 0.3891989198919892,
      "grad_norm": 0.6910704374313354,
      "learning_rate": 0.00013737519176882588,
      "loss": 4.8354,
      "step": 1081
    },
    {
      "epoch": 0.38955895589558953,
      "grad_norm": 0.973875880241394,
      "learning_rate": 0.00013726831266817278,
      "loss": 4.6088,
      "step": 1082
    },
    {
      "epoch": 0.38991899189918994,
      "grad_norm": 0.7647384405136108,
      "learning_rate": 0.00013716138410580685,
      "loss": 4.7574,
      "step": 1083
    },
    {
      "epoch": 0.3902790279027903,
      "grad_norm": 1.0366477966308594,
      "learning_rate": 0.00013705440622364137,
      "loss": 4.5836,
      "step": 1084
    },
    {
      "epoch": 0.39063906390639064,
      "grad_norm": 0.7150110006332397,
      "learning_rate": 0.00013694737916365517,
      "loss": 4.5989,
      "step": 1085
    },
    {
      "epoch": 0.390999099909991,
      "grad_norm": 0.8569515943527222,
      "learning_rate": 0.0001368403030678922,
      "loss": 4.7995,
      "step": 1086
    },
    {
      "epoch": 0.39135913591359134,
      "grad_norm": 0.8392965793609619,
      "learning_rate": 0.0001367331780784616,
      "loss": 4.9211,
      "step": 1087
    },
    {
      "epoch": 0.39171917191719174,
      "grad_norm": 0.764937698841095,
      "learning_rate": 0.00013662600433753745,
      "loss": 4.9077,
      "step": 1088
    },
    {
      "epoch": 0.3920792079207921,
      "grad_norm": 0.971203088760376,
      "learning_rate": 0.00013651878198735838,
      "loss": 4.7849,
      "step": 1089
    },
    {
      "epoch": 0.39243924392439244,
      "grad_norm": 0.8801988363265991,
      "learning_rate": 0.00013641151117022767,
      "loss": 4.685,
      "step": 1090
    },
    {
      "epoch": 0.3927992799279928,
      "grad_norm": 0.7537325620651245,
      "learning_rate": 0.00013630419202851284,
      "loss": 4.7856,
      "step": 1091
    },
    {
      "epoch": 0.39315931593159315,
      "grad_norm": 1.0417050123214722,
      "learning_rate": 0.00013619682470464558,
      "loss": 4.3694,
      "step": 1092
    },
    {
      "epoch": 0.3935193519351935,
      "grad_norm": 0.7336771488189697,
      "learning_rate": 0.00013608940934112156,
      "loss": 4.6453,
      "step": 1093
    },
    {
      "epoch": 0.3938793879387939,
      "grad_norm": 0.8440720438957214,
      "learning_rate": 0.0001359819460805001,
      "loss": 5.1781,
      "step": 1094
    },
    {
      "epoch": 0.39423942394239425,
      "grad_norm": 1.0878543853759766,
      "learning_rate": 0.00013587443506540422,
      "loss": 5.1328,
      "step": 1095
    },
    {
      "epoch": 0.3945994599459946,
      "grad_norm": 0.8983904719352722,
      "learning_rate": 0.0001357668764385202,
      "loss": 5.043,
      "step": 1096
    },
    {
      "epoch": 0.39495949594959495,
      "grad_norm": 1.2309449911117554,
      "learning_rate": 0.0001356592703425976,
      "loss": 4.8985,
      "step": 1097
    },
    {
      "epoch": 0.3953195319531953,
      "grad_norm": 0.9788826107978821,
      "learning_rate": 0.00013555161692044892,
      "loss": 5.1936,
      "step": 1098
    },
    {
      "epoch": 0.39567956795679565,
      "grad_norm": 1.42829430103302,
      "learning_rate": 0.00013544391631494952,
      "loss": 5.2761,
      "step": 1099
    },
    {
      "epoch": 0.39603960396039606,
      "grad_norm": 1.6005115509033203,
      "learning_rate": 0.00013533616866903735,
      "loss": 5.116,
      "step": 1100
    },
    {
      "epoch": 0.3963996399639964,
      "grad_norm": 3.0721354484558105,
      "learning_rate": 0.00013522837412571282,
      "loss": 4.9747,
      "step": 1101
    },
    {
      "epoch": 0.39675967596759676,
      "grad_norm": 0.9887019395828247,
      "learning_rate": 0.0001351205328280385,
      "loss": 4.4736,
      "step": 1102
    },
    {
      "epoch": 0.3971197119711971,
      "grad_norm": 0.8533967137336731,
      "learning_rate": 0.00013501264491913906,
      "loss": 4.7911,
      "step": 1103
    },
    {
      "epoch": 0.39747974797479746,
      "grad_norm": 1.4522795677185059,
      "learning_rate": 0.00013490471054220112,
      "loss": 5.2936,
      "step": 1104
    },
    {
      "epoch": 0.39783978397839787,
      "grad_norm": 1.0104986429214478,
      "learning_rate": 0.00013479672984047288,
      "loss": 4.7986,
      "step": 1105
    },
    {
      "epoch": 0.3981998199819982,
      "grad_norm": 0.674165666103363,
      "learning_rate": 0.00013468870295726398,
      "loss": 4.7862,
      "step": 1106
    },
    {
      "epoch": 0.39855985598559857,
      "grad_norm": 0.7466803193092346,
      "learning_rate": 0.00013458063003594543,
      "loss": 4.5683,
      "step": 1107
    },
    {
      "epoch": 0.3989198919891989,
      "grad_norm": 1.233678936958313,
      "learning_rate": 0.00013447251121994933,
      "loss": 4.8117,
      "step": 1108
    },
    {
      "epoch": 0.39927992799279927,
      "grad_norm": 0.7311433553695679,
      "learning_rate": 0.00013436434665276865,
      "loss": 4.8338,
      "step": 1109
    },
    {
      "epoch": 0.3996399639963996,
      "grad_norm": 1.2107664346694946,
      "learning_rate": 0.00013425613647795713,
      "loss": 4.747,
      "step": 1110
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9774342775344849,
      "learning_rate": 0.000134147880839129,
      "loss": 4.6647,
      "step": 1111
    },
    {
      "epoch": 0.4003600360036004,
      "grad_norm": 0.8316183090209961,
      "learning_rate": 0.00013403957987995882,
      "loss": 4.6156,
      "step": 1112
    },
    {
      "epoch": 0.4007200720072007,
      "grad_norm": 0.8299689888954163,
      "learning_rate": 0.00013393123374418137,
      "loss": 4.7576,
      "step": 1113
    },
    {
      "epoch": 0.4010801080108011,
      "grad_norm": 0.8191565275192261,
      "learning_rate": 0.00013382284257559132,
      "loss": 4.6748,
      "step": 1114
    },
    {
      "epoch": 0.4014401440144014,
      "grad_norm": 1.1108641624450684,
      "learning_rate": 0.00013371440651804313,
      "loss": 4.6654,
      "step": 1115
    },
    {
      "epoch": 0.4018001800180018,
      "grad_norm": 1.5050597190856934,
      "learning_rate": 0.00013360592571545082,
      "loss": 4.7271,
      "step": 1116
    },
    {
      "epoch": 0.4021602160216022,
      "grad_norm": 0.6661246418952942,
      "learning_rate": 0.00013349740031178784,
      "loss": 4.7434,
      "step": 1117
    },
    {
      "epoch": 0.40252025202520253,
      "grad_norm": 0.9325123429298401,
      "learning_rate": 0.00013338883045108674,
      "loss": 4.5033,
      "step": 1118
    },
    {
      "epoch": 0.4028802880288029,
      "grad_norm": 0.7152042984962463,
      "learning_rate": 0.00013328021627743915,
      "loss": 4.98,
      "step": 1119
    },
    {
      "epoch": 0.40324032403240323,
      "grad_norm": 1.1741433143615723,
      "learning_rate": 0.00013317155793499556,
      "loss": 4.5421,
      "step": 1120
    },
    {
      "epoch": 0.4036003600360036,
      "grad_norm": 1.2132052183151245,
      "learning_rate": 0.00013306285556796495,
      "loss": 5.1257,
      "step": 1121
    },
    {
      "epoch": 0.403960396039604,
      "grad_norm": 0.9931376576423645,
      "learning_rate": 0.00013295410932061478,
      "loss": 4.9036,
      "step": 1122
    },
    {
      "epoch": 0.40432043204320434,
      "grad_norm": 1.2949656248092651,
      "learning_rate": 0.00013284531933727083,
      "loss": 5.2814,
      "step": 1123
    },
    {
      "epoch": 0.4046804680468047,
      "grad_norm": 1.2032676935195923,
      "learning_rate": 0.0001327364857623168,
      "loss": 4.9101,
      "step": 1124
    },
    {
      "epoch": 0.40504050405040504,
      "grad_norm": 2.935535192489624,
      "learning_rate": 0.00013262760874019428,
      "loss": 5.5174,
      "step": 1125
    },
    {
      "epoch": 0.4054005400540054,
      "grad_norm": 1.6732901334762573,
      "learning_rate": 0.00013251868841540257,
      "loss": 4.8242,
      "step": 1126
    },
    {
      "epoch": 0.40576057605760574,
      "grad_norm": 1.4497289657592773,
      "learning_rate": 0.00013240972493249847,
      "loss": 5.3078,
      "step": 1127
    },
    {
      "epoch": 0.40612061206120614,
      "grad_norm": 1.0038177967071533,
      "learning_rate": 0.00013230071843609587,
      "loss": 4.8709,
      "step": 1128
    },
    {
      "epoch": 0.4064806480648065,
      "grad_norm": 0.8603922724723816,
      "learning_rate": 0.000132191669070866,
      "loss": 4.7909,
      "step": 1129
    },
    {
      "epoch": 0.40684068406840684,
      "grad_norm": 0.7687175273895264,
      "learning_rate": 0.00013208257698153677,
      "loss": 4.9778,
      "step": 1130
    },
    {
      "epoch": 0.4072007200720072,
      "grad_norm": 0.8526626825332642,
      "learning_rate": 0.00013197344231289296,
      "loss": 4.5872,
      "step": 1131
    },
    {
      "epoch": 0.40756075607560754,
      "grad_norm": 0.7719563841819763,
      "learning_rate": 0.0001318642652097757,
      "loss": 4.4888,
      "step": 1132
    },
    {
      "epoch": 0.4079207920792079,
      "grad_norm": 0.7941271662712097,
      "learning_rate": 0.0001317550458170826,
      "loss": 4.526,
      "step": 1133
    },
    {
      "epoch": 0.4082808280828083,
      "grad_norm": 0.9268568158149719,
      "learning_rate": 0.00013164578427976727,
      "loss": 4.7826,
      "step": 1134
    },
    {
      "epoch": 0.40864086408640865,
      "grad_norm": 1.0768158435821533,
      "learning_rate": 0.00013153648074283936,
      "loss": 4.8116,
      "step": 1135
    },
    {
      "epoch": 0.409000900090009,
      "grad_norm": 0.8673043847084045,
      "learning_rate": 0.00013142713535136414,
      "loss": 4.982,
      "step": 1136
    },
    {
      "epoch": 0.40936093609360935,
      "grad_norm": 0.9443647265434265,
      "learning_rate": 0.00013131774825046245,
      "loss": 4.7423,
      "step": 1137
    },
    {
      "epoch": 0.4097209720972097,
      "grad_norm": 0.9891415238380432,
      "learning_rate": 0.00013120831958531067,
      "loss": 4.8075,
      "step": 1138
    },
    {
      "epoch": 0.4100810081008101,
      "grad_norm": 0.9973192811012268,
      "learning_rate": 0.00013109884950114007,
      "loss": 4.7516,
      "step": 1139
    },
    {
      "epoch": 0.41044104410441046,
      "grad_norm": 0.8082119822502136,
      "learning_rate": 0.00013098933814323707,
      "loss": 4.4898,
      "step": 1140
    },
    {
      "epoch": 0.4108010801080108,
      "grad_norm": 0.7986486554145813,
      "learning_rate": 0.0001308797856569428,
      "loss": 4.9171,
      "step": 1141
    },
    {
      "epoch": 0.41116111611161116,
      "grad_norm": 1.0446362495422363,
      "learning_rate": 0.00013077019218765305,
      "loss": 4.9862,
      "step": 1142
    },
    {
      "epoch": 0.4115211521152115,
      "grad_norm": 0.7441645860671997,
      "learning_rate": 0.00013066055788081788,
      "loss": 4.6925,
      "step": 1143
    },
    {
      "epoch": 0.41188118811881186,
      "grad_norm": 0.8899047374725342,
      "learning_rate": 0.00013055088288194163,
      "loss": 4.7605,
      "step": 1144
    },
    {
      "epoch": 0.41224122412241226,
      "grad_norm": 0.8489072918891907,
      "learning_rate": 0.0001304411673365826,
      "loss": 4.8283,
      "step": 1145
    },
    {
      "epoch": 0.4126012601260126,
      "grad_norm": 1.014509916305542,
      "learning_rate": 0.000130331411390353,
      "loss": 4.5665,
      "step": 1146
    },
    {
      "epoch": 0.41296129612961296,
      "grad_norm": 0.9186869263648987,
      "learning_rate": 0.00013022161518891855,
      "loss": 4.9906,
      "step": 1147
    },
    {
      "epoch": 0.4133213321332133,
      "grad_norm": 1.4205145835876465,
      "learning_rate": 0.00013011177887799845,
      "loss": 5.2449,
      "step": 1148
    },
    {
      "epoch": 0.41368136813681367,
      "grad_norm": 0.9844337105751038,
      "learning_rate": 0.0001300019026033651,
      "loss": 5.0538,
      "step": 1149
    },
    {
      "epoch": 0.414041404140414,
      "grad_norm": 1.7266241312026978,
      "learning_rate": 0.00012989198651084397,
      "loss": 5.46,
      "step": 1150
    },
    {
      "epoch": 0.4144014401440144,
      "grad_norm": 1.170470952987671,
      "learning_rate": 0.00012978203074631334,
      "loss": 4.8089,
      "step": 1151
    },
    {
      "epoch": 0.41476147614761477,
      "grad_norm": 1.0281877517700195,
      "learning_rate": 0.00012967203545570418,
      "loss": 4.6209,
      "step": 1152
    },
    {
      "epoch": 0.4151215121512151,
      "grad_norm": 0.9903915524482727,
      "learning_rate": 0.00012956200078499994,
      "loss": 4.3591,
      "step": 1153
    },
    {
      "epoch": 0.41548154815481547,
      "grad_norm": 1.1112825870513916,
      "learning_rate": 0.00012945192688023624,
      "loss": 4.6581,
      "step": 1154
    },
    {
      "epoch": 0.4158415841584158,
      "grad_norm": 0.7413083910942078,
      "learning_rate": 0.00012934181388750087,
      "loss": 4.762,
      "step": 1155
    },
    {
      "epoch": 0.41620162016201623,
      "grad_norm": 0.9765453338623047,
      "learning_rate": 0.0001292316619529334,
      "loss": 4.7615,
      "step": 1156
    },
    {
      "epoch": 0.4165616561656166,
      "grad_norm": 0.8726073503494263,
      "learning_rate": 0.00012912147122272523,
      "loss": 4.9829,
      "step": 1157
    },
    {
      "epoch": 0.41692169216921693,
      "grad_norm": 0.7000720500946045,
      "learning_rate": 0.00012901124184311905,
      "loss": 4.7909,
      "step": 1158
    },
    {
      "epoch": 0.4172817281728173,
      "grad_norm": 1.2081289291381836,
      "learning_rate": 0.00012890097396040903,
      "loss": 4.838,
      "step": 1159
    },
    {
      "epoch": 0.41764176417641763,
      "grad_norm": 0.8126158714294434,
      "learning_rate": 0.0001287906677209403,
      "loss": 4.5821,
      "step": 1160
    },
    {
      "epoch": 0.418001800180018,
      "grad_norm": 0.9014402031898499,
      "learning_rate": 0.00012868032327110904,
      "loss": 5.1858,
      "step": 1161
    },
    {
      "epoch": 0.4183618361836184,
      "grad_norm": 0.9191497564315796,
      "learning_rate": 0.00012856994075736197,
      "loss": 4.8331,
      "step": 1162
    },
    {
      "epoch": 0.41872187218721874,
      "grad_norm": 0.7086000442504883,
      "learning_rate": 0.0001284595203261965,
      "loss": 4.9313,
      "step": 1163
    },
    {
      "epoch": 0.4190819081908191,
      "grad_norm": 0.7402483224868774,
      "learning_rate": 0.0001283490621241602,
      "loss": 5.0152,
      "step": 1164
    },
    {
      "epoch": 0.41944194419441944,
      "grad_norm": 0.8372672200202942,
      "learning_rate": 0.00012823856629785093,
      "loss": 4.4173,
      "step": 1165
    },
    {
      "epoch": 0.4198019801980198,
      "grad_norm": 0.7594465017318726,
      "learning_rate": 0.00012812803299391628,
      "loss": 4.5408,
      "step": 1166
    },
    {
      "epoch": 0.42016201620162014,
      "grad_norm": 1.4983152151107788,
      "learning_rate": 0.00012801746235905384,
      "loss": 5.0135,
      "step": 1167
    },
    {
      "epoch": 0.42052205220522054,
      "grad_norm": 1.4116381406784058,
      "learning_rate": 0.00012790685454001054,
      "loss": 4.7556,
      "step": 1168
    },
    {
      "epoch": 0.4208820882088209,
      "grad_norm": 1.290519118309021,
      "learning_rate": 0.00012779620968358273,
      "loss": 4.5214,
      "step": 1169
    },
    {
      "epoch": 0.42124212421242124,
      "grad_norm": 0.8605509400367737,
      "learning_rate": 0.00012768552793661594,
      "loss": 4.6911,
      "step": 1170
    },
    {
      "epoch": 0.4216021602160216,
      "grad_norm": 0.9390202164649963,
      "learning_rate": 0.00012757480944600462,
      "loss": 5.0831,
      "step": 1171
    },
    {
      "epoch": 0.42196219621962194,
      "grad_norm": 1.106753945350647,
      "learning_rate": 0.00012746405435869198,
      "loss": 5.0177,
      "step": 1172
    },
    {
      "epoch": 0.42232223222322235,
      "grad_norm": 1.1474226713180542,
      "learning_rate": 0.00012735326282166984,
      "loss": 4.9379,
      "step": 1173
    },
    {
      "epoch": 0.4226822682268227,
      "grad_norm": 1.3341996669769287,
      "learning_rate": 0.00012724243498197837,
      "loss": 5.1112,
      "step": 1174
    },
    {
      "epoch": 0.42304230423042305,
      "grad_norm": 1.7512990236282349,
      "learning_rate": 0.0001271315709867059,
      "loss": 5.0614,
      "step": 1175
    },
    {
      "epoch": 0.4234023402340234,
      "grad_norm": 1.0912712812423706,
      "learning_rate": 0.0001270206709829888,
      "loss": 4.9184,
      "step": 1176
    },
    {
      "epoch": 0.42376237623762375,
      "grad_norm": 1.343735933303833,
      "learning_rate": 0.0001269097351180112,
      "loss": 4.8776,
      "step": 1177
    },
    {
      "epoch": 0.4241224122412241,
      "grad_norm": 1.0407140254974365,
      "learning_rate": 0.00012679876353900482,
      "loss": 4.752,
      "step": 1178
    },
    {
      "epoch": 0.4244824482448245,
      "grad_norm": 1.1934579610824585,
      "learning_rate": 0.00012668775639324874,
      "loss": 4.6586,
      "step": 1179
    },
    {
      "epoch": 0.42484248424842486,
      "grad_norm": 0.8980515599250793,
      "learning_rate": 0.00012657671382806937,
      "loss": 4.6048,
      "step": 1180
    },
    {
      "epoch": 0.4252025202520252,
      "grad_norm": 0.772833526134491,
      "learning_rate": 0.00012646563599083996,
      "loss": 4.8211,
      "step": 1181
    },
    {
      "epoch": 0.42556255625562556,
      "grad_norm": 0.8841773867607117,
      "learning_rate": 0.0001263545230289807,
      "loss": 4.4507,
      "step": 1182
    },
    {
      "epoch": 0.4259225922592259,
      "grad_norm": 0.7956357002258301,
      "learning_rate": 0.00012624337508995834,
      "loss": 5.0147,
      "step": 1183
    },
    {
      "epoch": 0.42628262826282626,
      "grad_norm": 0.8929400444030762,
      "learning_rate": 0.00012613219232128608,
      "loss": 4.9053,
      "step": 1184
    },
    {
      "epoch": 0.42664266426642666,
      "grad_norm": 0.7843483090400696,
      "learning_rate": 0.0001260209748705233,
      "loss": 5.0519,
      "step": 1185
    },
    {
      "epoch": 0.427002700270027,
      "grad_norm": 0.7303058505058289,
      "learning_rate": 0.00012590972288527546,
      "loss": 4.5666,
      "step": 1186
    },
    {
      "epoch": 0.42736273627362736,
      "grad_norm": 0.9950706362724304,
      "learning_rate": 0.0001257984365131938,
      "loss": 4.8163,
      "step": 1187
    },
    {
      "epoch": 0.4277227722772277,
      "grad_norm": 0.9004842042922974,
      "learning_rate": 0.0001256871159019753,
      "loss": 4.8224,
      "step": 1188
    },
    {
      "epoch": 0.42808280828082806,
      "grad_norm": 0.7937396168708801,
      "learning_rate": 0.00012557576119936225,
      "loss": 4.5765,
      "step": 1189
    },
    {
      "epoch": 0.42844284428442847,
      "grad_norm": 0.8856514096260071,
      "learning_rate": 0.00012546437255314222,
      "loss": 4.6348,
      "step": 1190
    },
    {
      "epoch": 0.4288028802880288,
      "grad_norm": 1.235495686531067,
      "learning_rate": 0.00012535295011114795,
      "loss": 4.8799,
      "step": 1191
    },
    {
      "epoch": 0.42916291629162917,
      "grad_norm": 1.0895787477493286,
      "learning_rate": 0.00012524149402125685,
      "loss": 5.2455,
      "step": 1192
    },
    {
      "epoch": 0.4295229522952295,
      "grad_norm": 0.842725396156311,
      "learning_rate": 0.00012513000443139112,
      "loss": 4.758,
      "step": 1193
    },
    {
      "epoch": 0.42988298829882987,
      "grad_norm": 0.8005720973014832,
      "learning_rate": 0.00012501848148951735,
      "loss": 4.7166,
      "step": 1194
    },
    {
      "epoch": 0.4302430243024302,
      "grad_norm": 0.9036151170730591,
      "learning_rate": 0.00012490692534364642,
      "loss": 4.3832,
      "step": 1195
    },
    {
      "epoch": 0.4306030603060306,
      "grad_norm": 1.1666127443313599,
      "learning_rate": 0.00012479533614183334,
      "loss": 4.6348,
      "step": 1196
    },
    {
      "epoch": 0.430963096309631,
      "grad_norm": 1.1041556596755981,
      "learning_rate": 0.00012468371403217684,
      "loss": 4.8944,
      "step": 1197
    },
    {
      "epoch": 0.4313231323132313,
      "grad_norm": 0.9621699452400208,
      "learning_rate": 0.00012457205916281943,
      "loss": 4.7718,
      "step": 1198
    },
    {
      "epoch": 0.4316831683168317,
      "grad_norm": 1.1753240823745728,
      "learning_rate": 0.00012446037168194714,
      "loss": 4.9495,
      "step": 1199
    },
    {
      "epoch": 0.43204320432043203,
      "grad_norm": 1.4692611694335938,
      "learning_rate": 0.00012434865173778915,
      "loss": 5.8088,
      "step": 1200
    },
    {
      "epoch": 0.4324032403240324,
      "grad_norm": 3.4769439697265625,
      "learning_rate": 0.00012423689947861787,
      "loss": 5.2299,
      "step": 1201
    },
    {
      "epoch": 0.4327632763276328,
      "grad_norm": 0.7332233190536499,
      "learning_rate": 0.00012412511505274844,
      "loss": 4.4305,
      "step": 1202
    },
    {
      "epoch": 0.43312331233123313,
      "grad_norm": 0.791152834892273,
      "learning_rate": 0.00012401329860853885,
      "loss": 5.0611,
      "step": 1203
    },
    {
      "epoch": 0.4334833483348335,
      "grad_norm": 1.2914446592330933,
      "learning_rate": 0.00012390145029438947,
      "loss": 4.9828,
      "step": 1204
    },
    {
      "epoch": 0.43384338433843384,
      "grad_norm": 0.8650038838386536,
      "learning_rate": 0.000123789570258743,
      "loss": 5.2274,
      "step": 1205
    },
    {
      "epoch": 0.4342034203420342,
      "grad_norm": 0.7898814678192139,
      "learning_rate": 0.00012367765865008428,
      "loss": 4.5792,
      "step": 1206
    },
    {
      "epoch": 0.4345634563456346,
      "grad_norm": 1.1311291456222534,
      "learning_rate": 0.00012356571561693996,
      "loss": 4.8372,
      "step": 1207
    },
    {
      "epoch": 0.43492349234923494,
      "grad_norm": 0.7760782241821289,
      "learning_rate": 0.00012345374130787854,
      "loss": 4.7993,
      "step": 1208
    },
    {
      "epoch": 0.4352835283528353,
      "grad_norm": 1.017967700958252,
      "learning_rate": 0.0001233417358715099,
      "loss": 4.6437,
      "step": 1209
    },
    {
      "epoch": 0.43564356435643564,
      "grad_norm": 1.2804354429244995,
      "learning_rate": 0.00012322969945648523,
      "loss": 4.2972,
      "step": 1210
    },
    {
      "epoch": 0.436003600360036,
      "grad_norm": 0.8337898254394531,
      "learning_rate": 0.000123117632211497,
      "loss": 4.6893,
      "step": 1211
    },
    {
      "epoch": 0.43636363636363634,
      "grad_norm": 0.867520809173584,
      "learning_rate": 0.00012300553428527832,
      "loss": 4.9483,
      "step": 1212
    },
    {
      "epoch": 0.43672367236723675,
      "grad_norm": 0.8490375280380249,
      "learning_rate": 0.0001228934058266033,
      "loss": 4.6251,
      "step": 1213
    },
    {
      "epoch": 0.4370837083708371,
      "grad_norm": 0.8703073263168335,
      "learning_rate": 0.0001227812469842864,
      "loss": 4.5385,
      "step": 1214
    },
    {
      "epoch": 0.43744374437443745,
      "grad_norm": 0.9746583104133606,
      "learning_rate": 0.0001226690579071825,
      "loss": 4.8996,
      "step": 1215
    },
    {
      "epoch": 0.4378037803780378,
      "grad_norm": 0.7318705916404724,
      "learning_rate": 0.00012255683874418645,
      "loss": 4.4165,
      "step": 1216
    },
    {
      "epoch": 0.43816381638163815,
      "grad_norm": 0.8295899629592896,
      "learning_rate": 0.00012244458964423327,
      "loss": 4.5525,
      "step": 1217
    },
    {
      "epoch": 0.4385238523852385,
      "grad_norm": 0.9655759930610657,
      "learning_rate": 0.00012233231075629747,
      "loss": 4.5796,
      "step": 1218
    },
    {
      "epoch": 0.4388838883888389,
      "grad_norm": 0.7518540620803833,
      "learning_rate": 0.0001222200022293933,
      "loss": 4.6897,
      "step": 1219
    },
    {
      "epoch": 0.43924392439243926,
      "grad_norm": 0.6703758239746094,
      "learning_rate": 0.0001221076642125742,
      "loss": 4.4407,
      "step": 1220
    },
    {
      "epoch": 0.4396039603960396,
      "grad_norm": 0.6767472624778748,
      "learning_rate": 0.00012199529685493278,
      "loss": 4.6747,
      "step": 1221
    },
    {
      "epoch": 0.43996399639963996,
      "grad_norm": 0.7684047818183899,
      "learning_rate": 0.00012188290030560063,
      "loss": 4.6929,
      "step": 1222
    },
    {
      "epoch": 0.4403240324032403,
      "grad_norm": 1.4957590103149414,
      "learning_rate": 0.00012177047471374807,
      "loss": 5.2717,
      "step": 1223
    },
    {
      "epoch": 0.44068406840684066,
      "grad_norm": 1.0840327739715576,
      "learning_rate": 0.00012165802022858399,
      "loss": 5.199,
      "step": 1224
    },
    {
      "epoch": 0.44104410441044106,
      "grad_norm": 1.6059932708740234,
      "learning_rate": 0.00012154553699935553,
      "loss": 5.493,
      "step": 1225
    },
    {
      "epoch": 0.4414041404140414,
      "grad_norm": 1.5921814441680908,
      "learning_rate": 0.0001214330251753481,
      "loss": 4.8692,
      "step": 1226
    },
    {
      "epoch": 0.44176417641764176,
      "grad_norm": 0.9325987696647644,
      "learning_rate": 0.00012132048490588492,
      "loss": 4.8154,
      "step": 1227
    },
    {
      "epoch": 0.4421242124212421,
      "grad_norm": 1.2291182279586792,
      "learning_rate": 0.00012120791634032715,
      "loss": 4.5689,
      "step": 1228
    },
    {
      "epoch": 0.44248424842484246,
      "grad_norm": 0.8440655469894409,
      "learning_rate": 0.00012109531962807332,
      "loss": 4.8022,
      "step": 1229
    },
    {
      "epoch": 0.44284428442844287,
      "grad_norm": 0.8819119334220886,
      "learning_rate": 0.00012098269491855942,
      "loss": 4.745,
      "step": 1230
    },
    {
      "epoch": 0.4432043204320432,
      "grad_norm": 0.7879696488380432,
      "learning_rate": 0.00012087004236125858,
      "loss": 4.9081,
      "step": 1231
    },
    {
      "epoch": 0.44356435643564357,
      "grad_norm": 0.8012781143188477,
      "learning_rate": 0.0001207573621056809,
      "loss": 5.1384,
      "step": 1232
    },
    {
      "epoch": 0.4439243924392439,
      "grad_norm": 4.395026683807373,
      "learning_rate": 0.00012064465430137315,
      "loss": 5.099,
      "step": 1233
    },
    {
      "epoch": 0.44428442844284427,
      "grad_norm": 0.737079381942749,
      "learning_rate": 0.00012053191909791883,
      "loss": 4.5297,
      "step": 1234
    },
    {
      "epoch": 0.4446444644464446,
      "grad_norm": 0.7090547680854797,
      "learning_rate": 0.00012041915664493761,
      "loss": 4.3716,
      "step": 1235
    },
    {
      "epoch": 0.445004500450045,
      "grad_norm": 0.8456137776374817,
      "learning_rate": 0.00012030636709208551,
      "loss": 4.5888,
      "step": 1236
    },
    {
      "epoch": 0.4453645364536454,
      "grad_norm": 0.9556293487548828,
      "learning_rate": 0.00012019355058905435,
      "loss": 4.6386,
      "step": 1237
    },
    {
      "epoch": 0.4457245724572457,
      "grad_norm": 1.3399239778518677,
      "learning_rate": 0.00012008070728557186,
      "loss": 4.9974,
      "step": 1238
    },
    {
      "epoch": 0.4460846084608461,
      "grad_norm": 0.9864078760147095,
      "learning_rate": 0.00011996783733140122,
      "loss": 4.5971,
      "step": 1239
    },
    {
      "epoch": 0.4464446444644464,
      "grad_norm": 0.9206048846244812,
      "learning_rate": 0.0001198549408763411,
      "loss": 4.6504,
      "step": 1240
    },
    {
      "epoch": 0.4468046804680468,
      "grad_norm": 0.7920734882354736,
      "learning_rate": 0.00011974201807022525,
      "loss": 4.5222,
      "step": 1241
    },
    {
      "epoch": 0.4471647164716472,
      "grad_norm": 0.9126737713813782,
      "learning_rate": 0.00011962906906292238,
      "loss": 4.4975,
      "step": 1242
    },
    {
      "epoch": 0.44752475247524753,
      "grad_norm": 0.8361069560050964,
      "learning_rate": 0.00011951609400433605,
      "loss": 4.8476,
      "step": 1243
    },
    {
      "epoch": 0.4478847884788479,
      "grad_norm": 1.2121682167053223,
      "learning_rate": 0.00011940309304440433,
      "loss": 4.9648,
      "step": 1244
    },
    {
      "epoch": 0.44824482448244823,
      "grad_norm": 0.7456281781196594,
      "learning_rate": 0.00011929006633309974,
      "loss": 4.6785,
      "step": 1245
    },
    {
      "epoch": 0.4486048604860486,
      "grad_norm": 0.775759220123291,
      "learning_rate": 0.00011917701402042889,
      "loss": 4.8784,
      "step": 1246
    },
    {
      "epoch": 0.448964896489649,
      "grad_norm": 1.0275262594223022,
      "learning_rate": 0.00011906393625643244,
      "loss": 5.1059,
      "step": 1247
    },
    {
      "epoch": 0.44932493249324934,
      "grad_norm": 0.8145440816879272,
      "learning_rate": 0.00011895083319118477,
      "loss": 5.0963,
      "step": 1248
    },
    {
      "epoch": 0.4496849684968497,
      "grad_norm": 1.1181066036224365,
      "learning_rate": 0.00011883770497479387,
      "loss": 5.0131,
      "step": 1249
    },
    {
      "epoch": 0.45004500450045004,
      "grad_norm": 1.8296819925308228,
      "learning_rate": 0.00011872455175740112,
      "loss": 5.2702,
      "step": 1250
    },
    {
      "epoch": 0.4504050405040504,
      "grad_norm": 1.730488896369934,
      "learning_rate": 0.00011861137368918105,
      "loss": 5.1305,
      "step": 1251
    },
    {
      "epoch": 0.45076507650765074,
      "grad_norm": 1.0825215578079224,
      "learning_rate": 0.00011849817092034118,
      "loss": 4.9019,
      "step": 1252
    },
    {
      "epoch": 0.45112511251125115,
      "grad_norm": 0.8123356103897095,
      "learning_rate": 0.00011838494360112185,
      "loss": 4.4342,
      "step": 1253
    },
    {
      "epoch": 0.4514851485148515,
      "grad_norm": 0.9870261549949646,
      "learning_rate": 0.00011827169188179592,
      "loss": 4.9825,
      "step": 1254
    },
    {
      "epoch": 0.45184518451845185,
      "grad_norm": 0.8250070810317993,
      "learning_rate": 0.00011815841591266872,
      "loss": 4.5846,
      "step": 1255
    },
    {
      "epoch": 0.4522052205220522,
      "grad_norm": 1.0997467041015625,
      "learning_rate": 0.00011804511584407763,
      "loss": 4.5639,
      "step": 1256
    },
    {
      "epoch": 0.45256525652565255,
      "grad_norm": 0.7792536020278931,
      "learning_rate": 0.00011793179182639218,
      "loss": 4.6994,
      "step": 1257
    },
    {
      "epoch": 0.4529252925292529,
      "grad_norm": 0.6587983965873718,
      "learning_rate": 0.00011781844401001352,
      "loss": 4.8202,
      "step": 1258
    },
    {
      "epoch": 0.4532853285328533,
      "grad_norm": 0.9794634580612183,
      "learning_rate": 0.00011770507254537453,
      "loss": 4.4016,
      "step": 1259
    },
    {
      "epoch": 0.45364536453645365,
      "grad_norm": 0.8039897084236145,
      "learning_rate": 0.00011759167758293935,
      "loss": 4.7848,
      "step": 1260
    },
    {
      "epoch": 0.454005400540054,
      "grad_norm": 1.3053600788116455,
      "learning_rate": 0.0001174782592732034,
      "loss": 4.9723,
      "step": 1261
    },
    {
      "epoch": 0.45436543654365436,
      "grad_norm": 1.1314433813095093,
      "learning_rate": 0.00011736481776669306,
      "loss": 4.9508,
      "step": 1262
    },
    {
      "epoch": 0.4547254725472547,
      "grad_norm": 0.8784576058387756,
      "learning_rate": 0.00011725135321396543,
      "loss": 4.7798,
      "step": 1263
    },
    {
      "epoch": 0.4550855085508551,
      "grad_norm": 0.7723816633224487,
      "learning_rate": 0.00011713786576560835,
      "loss": 4.8276,
      "step": 1264
    },
    {
      "epoch": 0.45544554455445546,
      "grad_norm": 0.8358681797981262,
      "learning_rate": 0.00011702435557223987,
      "loss": 4.6874,
      "step": 1265
    },
    {
      "epoch": 0.4558055805580558,
      "grad_norm": 0.9585774540901184,
      "learning_rate": 0.00011691082278450836,
      "loss": 4.7501,
      "step": 1266
    },
    {
      "epoch": 0.45616561656165616,
      "grad_norm": 0.7977786660194397,
      "learning_rate": 0.00011679726755309205,
      "loss": 4.6517,
      "step": 1267
    },
    {
      "epoch": 0.4565256525652565,
      "grad_norm": 0.8501828908920288,
      "learning_rate": 0.00011668369002869912,
      "loss": 4.9298,
      "step": 1268
    },
    {
      "epoch": 0.45688568856885686,
      "grad_norm": 1.0356849431991577,
      "learning_rate": 0.00011657009036206718,
      "loss": 4.7837,
      "step": 1269
    },
    {
      "epoch": 0.45724572457245727,
      "grad_norm": 0.8533749580383301,
      "learning_rate": 0.00011645646870396333,
      "loss": 4.7115,
      "step": 1270
    },
    {
      "epoch": 0.4576057605760576,
      "grad_norm": 0.8388182520866394,
      "learning_rate": 0.00011634282520518383,
      "loss": 5.0224,
      "step": 1271
    },
    {
      "epoch": 0.45796579657965797,
      "grad_norm": 1.1325950622558594,
      "learning_rate": 0.00011622916001655388,
      "loss": 4.6831,
      "step": 1272
    },
    {
      "epoch": 0.4583258325832583,
      "grad_norm": 0.7390809059143066,
      "learning_rate": 0.00011611547328892754,
      "loss": 5.0284,
      "step": 1273
    },
    {
      "epoch": 0.45868586858685867,
      "grad_norm": 1.6919934749603271,
      "learning_rate": 0.00011600176517318741,
      "loss": 5.3664,
      "step": 1274
    },
    {
      "epoch": 0.459045904590459,
      "grad_norm": 1.8952467441558838,
      "learning_rate": 0.00011588803582024446,
      "loss": 5.2845,
      "step": 1275
    },
    {
      "epoch": 0.4594059405940594,
      "grad_norm": 1.0147459506988525,
      "learning_rate": 0.0001157742853810379,
      "loss": 4.4858,
      "step": 1276
    },
    {
      "epoch": 0.4597659765976598,
      "grad_norm": 0.9189479947090149,
      "learning_rate": 0.00011566051400653486,
      "loss": 4.9424,
      "step": 1277
    },
    {
      "epoch": 0.4601260126012601,
      "grad_norm": 1.2866673469543457,
      "learning_rate": 0.00011554672184773032,
      "loss": 4.8392,
      "step": 1278
    },
    {
      "epoch": 0.4604860486048605,
      "grad_norm": 0.6796092987060547,
      "learning_rate": 0.00011543290905564683,
      "loss": 4.7882,
      "step": 1279
    },
    {
      "epoch": 0.4608460846084608,
      "grad_norm": 0.7936111092567444,
      "learning_rate": 0.00011531907578133429,
      "loss": 5.0133,
      "step": 1280
    },
    {
      "epoch": 0.46120612061206123,
      "grad_norm": 0.8667351007461548,
      "learning_rate": 0.00011520522217586984,
      "loss": 4.7428,
      "step": 1281
    },
    {
      "epoch": 0.4615661566156616,
      "grad_norm": 0.796633243560791,
      "learning_rate": 0.00011509134839035748,
      "loss": 4.8215,
      "step": 1282
    },
    {
      "epoch": 0.46192619261926193,
      "grad_norm": 0.7455129027366638,
      "learning_rate": 0.00011497745457592816,
      "loss": 4.8385,
      "step": 1283
    },
    {
      "epoch": 0.4622862286228623,
      "grad_norm": 0.6883043646812439,
      "learning_rate": 0.0001148635408837393,
      "loss": 4.9312,
      "step": 1284
    },
    {
      "epoch": 0.46264626462646263,
      "grad_norm": 1.5165472030639648,
      "learning_rate": 0.00011474960746497472,
      "loss": 4.987,
      "step": 1285
    },
    {
      "epoch": 0.463006300630063,
      "grad_norm": 0.7015155553817749,
      "learning_rate": 0.00011463565447084445,
      "loss": 4.4301,
      "step": 1286
    },
    {
      "epoch": 0.4633663366336634,
      "grad_norm": 0.9830206632614136,
      "learning_rate": 0.0001145216820525845,
      "loss": 4.8288,
      "step": 1287
    },
    {
      "epoch": 0.46372637263726374,
      "grad_norm": 1.205315113067627,
      "learning_rate": 0.00011440769036145662,
      "loss": 4.9204,
      "step": 1288
    },
    {
      "epoch": 0.4640864086408641,
      "grad_norm": 0.6896906495094299,
      "learning_rate": 0.00011429367954874819,
      "loss": 4.5911,
      "step": 1289
    },
    {
      "epoch": 0.46444644464446444,
      "grad_norm": 0.6286702752113342,
      "learning_rate": 0.00011417964976577187,
      "loss": 4.7917,
      "step": 1290
    },
    {
      "epoch": 0.4648064806480648,
      "grad_norm": 0.7007503509521484,
      "learning_rate": 0.00011406560116386562,
      "loss": 4.8353,
      "step": 1291
    },
    {
      "epoch": 0.46516651665166514,
      "grad_norm": 0.785198986530304,
      "learning_rate": 0.00011395153389439233,
      "loss": 4.988,
      "step": 1292
    },
    {
      "epoch": 0.46552655265526555,
      "grad_norm": 0.643828272819519,
      "learning_rate": 0.0001138374481087396,
      "loss": 4.4371,
      "step": 1293
    },
    {
      "epoch": 0.4658865886588659,
      "grad_norm": 0.8021451234817505,
      "learning_rate": 0.00011372334395831972,
      "loss": 4.6181,
      "step": 1294
    },
    {
      "epoch": 0.46624662466246625,
      "grad_norm": 1.27736234664917,
      "learning_rate": 0.00011360922159456928,
      "loss": 4.8397,
      "step": 1295
    },
    {
      "epoch": 0.4666066606660666,
      "grad_norm": 0.6858236193656921,
      "learning_rate": 0.00011349508116894903,
      "loss": 4.9309,
      "step": 1296
    },
    {
      "epoch": 0.46696669666966695,
      "grad_norm": 0.9422666430473328,
      "learning_rate": 0.00011338092283294377,
      "loss": 4.8297,
      "step": 1297
    },
    {
      "epoch": 0.46732673267326735,
      "grad_norm": 0.8717568516731262,
      "learning_rate": 0.00011326674673806195,
      "loss": 5.2597,
      "step": 1298
    },
    {
      "epoch": 0.4676867686768677,
      "grad_norm": 1.1170207262039185,
      "learning_rate": 0.00011315255303583572,
      "loss": 5.1702,
      "step": 1299
    },
    {
      "epoch": 0.46804680468046805,
      "grad_norm": 1.6666972637176514,
      "learning_rate": 0.0001130383418778205,
      "loss": 5.3505,
      "step": 1300
    },
    {
      "epoch": 0.4684068406840684,
      "grad_norm": 4.088332653045654,
      "learning_rate": 0.0001129241134155949,
      "loss": 4.9151,
      "step": 1301
    },
    {
      "epoch": 0.46876687668766875,
      "grad_norm": 0.8962947130203247,
      "learning_rate": 0.00011280986780076057,
      "loss": 4.916,
      "step": 1302
    },
    {
      "epoch": 0.4691269126912691,
      "grad_norm": 0.748858630657196,
      "learning_rate": 0.0001126956051849418,
      "loss": 4.5858,
      "step": 1303
    },
    {
      "epoch": 0.4694869486948695,
      "grad_norm": 0.9193429350852966,
      "learning_rate": 0.00011258132571978555,
      "loss": 4.6093,
      "step": 1304
    },
    {
      "epoch": 0.46984698469846986,
      "grad_norm": 0.9823437929153442,
      "learning_rate": 0.00011246702955696106,
      "loss": 4.849,
      "step": 1305
    },
    {
      "epoch": 0.4702070207020702,
      "grad_norm": 0.6782870292663574,
      "learning_rate": 0.0001123527168481598,
      "loss": 4.9946,
      "step": 1306
    },
    {
      "epoch": 0.47056705670567056,
      "grad_norm": 0.6508191823959351,
      "learning_rate": 0.00011223838774509514,
      "loss": 4.7743,
      "step": 1307
    },
    {
      "epoch": 0.4709270927092709,
      "grad_norm": 0.682228684425354,
      "learning_rate": 0.00011212404239950224,
      "loss": 4.7985,
      "step": 1308
    },
    {
      "epoch": 0.47128712871287126,
      "grad_norm": 1.045074224472046,
      "learning_rate": 0.00011200968096313787,
      "loss": 4.6407,
      "step": 1309
    },
    {
      "epoch": 0.47164716471647167,
      "grad_norm": 1.3870502710342407,
      "learning_rate": 0.00011189530358778005,
      "loss": 4.9701,
      "step": 1310
    },
    {
      "epoch": 0.472007200720072,
      "grad_norm": 0.7295395731925964,
      "learning_rate": 0.00011178091042522795,
      "loss": 4.372,
      "step": 1311
    },
    {
      "epoch": 0.47236723672367237,
      "grad_norm": 1.3397456407546997,
      "learning_rate": 0.00011166650162730188,
      "loss": 4.9535,
      "step": 1312
    },
    {
      "epoch": 0.4727272727272727,
      "grad_norm": 0.8281415700912476,
      "learning_rate": 0.00011155207734584263,
      "loss": 4.5739,
      "step": 1313
    },
    {
      "epoch": 0.47308730873087307,
      "grad_norm": 0.7725363969802856,
      "learning_rate": 0.00011143763773271178,
      "loss": 5.0986,
      "step": 1314
    },
    {
      "epoch": 0.4734473447344735,
      "grad_norm": 0.7773278951644897,
      "learning_rate": 0.00011132318293979109,
      "loss": 4.8597,
      "step": 1315
    },
    {
      "epoch": 0.4738073807380738,
      "grad_norm": 0.8446284532546997,
      "learning_rate": 0.00011120871311898254,
      "loss": 4.6486,
      "step": 1316
    },
    {
      "epoch": 0.4741674167416742,
      "grad_norm": 0.7795506119728088,
      "learning_rate": 0.00011109422842220805,
      "loss": 4.6422,
      "step": 1317
    },
    {
      "epoch": 0.4745274527452745,
      "grad_norm": 0.6184239387512207,
      "learning_rate": 0.0001109797290014093,
      "loss": 4.4654,
      "step": 1318
    },
    {
      "epoch": 0.4748874887488749,
      "grad_norm": 0.8903937339782715,
      "learning_rate": 0.00011086521500854745,
      "loss": 4.5689,
      "step": 1319
    },
    {
      "epoch": 0.4752475247524752,
      "grad_norm": 0.7741526365280151,
      "learning_rate": 0.00011075068659560308,
      "loss": 4.6233,
      "step": 1320
    },
    {
      "epoch": 0.47560756075607563,
      "grad_norm": 0.9469612836837769,
      "learning_rate": 0.00011063614391457582,
      "loss": 4.9393,
      "step": 1321
    },
    {
      "epoch": 0.475967596759676,
      "grad_norm": 1.173879623413086,
      "learning_rate": 0.00011052158711748434,
      "loss": 5.1219,
      "step": 1322
    },
    {
      "epoch": 0.47632763276327633,
      "grad_norm": 1.115415334701538,
      "learning_rate": 0.00011040701635636592,
      "loss": 4.6119,
      "step": 1323
    },
    {
      "epoch": 0.4766876687668767,
      "grad_norm": 1.5755536556243896,
      "learning_rate": 0.00011029243178327649,
      "loss": 5.3156,
      "step": 1324
    },
    {
      "epoch": 0.47704770477047703,
      "grad_norm": 1.1577731370925903,
      "learning_rate": 0.00011017783355029026,
      "loss": 5.1467,
      "step": 1325
    },
    {
      "epoch": 0.4774077407740774,
      "grad_norm": 1.519116759300232,
      "learning_rate": 0.00011006322180949953,
      "loss": 4.6894,
      "step": 1326
    },
    {
      "epoch": 0.4777677767776778,
      "grad_norm": 0.9527605772018433,
      "learning_rate": 0.00010994859671301462,
      "loss": 5.0449,
      "step": 1327
    },
    {
      "epoch": 0.47812781278127814,
      "grad_norm": 0.8860084414482117,
      "learning_rate": 0.00010983395841296348,
      "loss": 4.6926,
      "step": 1328
    },
    {
      "epoch": 0.4784878487848785,
      "grad_norm": 0.8542980551719666,
      "learning_rate": 0.00010971930706149167,
      "loss": 4.5156,
      "step": 1329
    },
    {
      "epoch": 0.47884788478847884,
      "grad_norm": 0.9299918413162231,
      "learning_rate": 0.00010960464281076197,
      "loss": 4.6254,
      "step": 1330
    },
    {
      "epoch": 0.4792079207920792,
      "grad_norm": 0.7832848429679871,
      "learning_rate": 0.00010948996581295436,
      "loss": 4.4019,
      "step": 1331
    },
    {
      "epoch": 0.4795679567956796,
      "grad_norm": 0.7857620120048523,
      "learning_rate": 0.00010937527622026575,
      "loss": 4.7156,
      "step": 1332
    },
    {
      "epoch": 0.47992799279927995,
      "grad_norm": 0.8863069415092468,
      "learning_rate": 0.00010926057418490971,
      "loss": 4.5967,
      "step": 1333
    },
    {
      "epoch": 0.4802880288028803,
      "grad_norm": 0.9793080687522888,
      "learning_rate": 0.00010914585985911632,
      "loss": 4.4778,
      "step": 1334
    },
    {
      "epoch": 0.48064806480648065,
      "grad_norm": 1.0404165983200073,
      "learning_rate": 0.00010903113339513205,
      "loss": 4.9781,
      "step": 1335
    },
    {
      "epoch": 0.481008100810081,
      "grad_norm": 1.001639723777771,
      "learning_rate": 0.00010891639494521935,
      "loss": 4.8293,
      "step": 1336
    },
    {
      "epoch": 0.48136813681368135,
      "grad_norm": 0.8920490145683289,
      "learning_rate": 0.00010880164466165674,
      "loss": 4.3999,
      "step": 1337
    },
    {
      "epoch": 0.48172817281728175,
      "grad_norm": 1.003096342086792,
      "learning_rate": 0.00010868688269673828,
      "loss": 5.01,
      "step": 1338
    },
    {
      "epoch": 0.4820882088208821,
      "grad_norm": 0.7430381774902344,
      "learning_rate": 0.00010857210920277366,
      "loss": 4.8974,
      "step": 1339
    },
    {
      "epoch": 0.48244824482448245,
      "grad_norm": 0.8446447849273682,
      "learning_rate": 0.00010845732433208779,
      "loss": 4.6448,
      "step": 1340
    },
    {
      "epoch": 0.4828082808280828,
      "grad_norm": 0.7498891353607178,
      "learning_rate": 0.00010834252823702076,
      "loss": 4.761,
      "step": 1341
    },
    {
      "epoch": 0.48316831683168315,
      "grad_norm": 0.6509808301925659,
      "learning_rate": 0.00010822772106992747,
      "loss": 4.7459,
      "step": 1342
    },
    {
      "epoch": 0.4835283528352835,
      "grad_norm": 0.9102056622505188,
      "learning_rate": 0.00010811290298317755,
      "loss": 4.9137,
      "step": 1343
    },
    {
      "epoch": 0.4838883888388839,
      "grad_norm": 1.0613189935684204,
      "learning_rate": 0.00010799807412915517,
      "loss": 4.3103,
      "step": 1344
    },
    {
      "epoch": 0.48424842484248426,
      "grad_norm": 1.0716426372528076,
      "learning_rate": 0.0001078832346602587,
      "loss": 5.0106,
      "step": 1345
    },
    {
      "epoch": 0.4846084608460846,
      "grad_norm": 0.8888649344444275,
      "learning_rate": 0.00010776838472890065,
      "loss": 4.9421,
      "step": 1346
    },
    {
      "epoch": 0.48496849684968496,
      "grad_norm": 0.7990186214447021,
      "learning_rate": 0.0001076535244875074,
      "loss": 5.0825,
      "step": 1347
    },
    {
      "epoch": 0.4853285328532853,
      "grad_norm": 1.1681544780731201,
      "learning_rate": 0.00010753865408851906,
      "loss": 4.9825,
      "step": 1348
    },
    {
      "epoch": 0.4856885688568857,
      "grad_norm": 1.141554355621338,
      "learning_rate": 0.00010742377368438914,
      "loss": 5.1929,
      "step": 1349
    },
    {
      "epoch": 0.48604860486048607,
      "grad_norm": 1.1723651885986328,
      "learning_rate": 0.00010730888342758454,
      "loss": 5.4049,
      "step": 1350
    },
    {
      "epoch": 0.4864086408640864,
      "grad_norm": 2.490889310836792,
      "learning_rate": 0.0001071939834705851,
      "loss": 4.6995,
      "step": 1351
    },
    {
      "epoch": 0.48676867686768677,
      "grad_norm": 0.8721453547477722,
      "learning_rate": 0.00010707907396588361,
      "loss": 4.7901,
      "step": 1352
    },
    {
      "epoch": 0.4871287128712871,
      "grad_norm": 1.2546683549880981,
      "learning_rate": 0.00010696415506598557,
      "loss": 4.5257,
      "step": 1353
    },
    {
      "epoch": 0.48748874887488747,
      "grad_norm": 0.9638644456863403,
      "learning_rate": 0.00010684922692340884,
      "loss": 4.6166,
      "step": 1354
    },
    {
      "epoch": 0.4878487848784879,
      "grad_norm": 0.8636690974235535,
      "learning_rate": 0.00010673428969068364,
      "loss": 4.9165,
      "step": 1355
    },
    {
      "epoch": 0.4882088208820882,
      "grad_norm": 1.7687629461288452,
      "learning_rate": 0.00010661934352035224,
      "loss": 4.5002,
      "step": 1356
    },
    {
      "epoch": 0.4885688568856886,
      "grad_norm": 0.9787132740020752,
      "learning_rate": 0.00010650438856496872,
      "loss": 4.5295,
      "step": 1357
    },
    {
      "epoch": 0.4889288928892889,
      "grad_norm": 0.8555500507354736,
      "learning_rate": 0.0001063894249770989,
      "loss": 4.7362,
      "step": 1358
    },
    {
      "epoch": 0.4892889288928893,
      "grad_norm": 1.4172734022140503,
      "learning_rate": 0.00010627445290931997,
      "loss": 4.7544,
      "step": 1359
    },
    {
      "epoch": 0.4896489648964896,
      "grad_norm": 1.0591694116592407,
      "learning_rate": 0.00010615947251422044,
      "loss": 4.5676,
      "step": 1360
    },
    {
      "epoch": 0.49000900090009003,
      "grad_norm": 1.2712838649749756,
      "learning_rate": 0.00010604448394439983,
      "loss": 5.1231,
      "step": 1361
    },
    {
      "epoch": 0.4903690369036904,
      "grad_norm": 1.0828771591186523,
      "learning_rate": 0.00010592948735246854,
      "loss": 4.7859,
      "step": 1362
    },
    {
      "epoch": 0.49072907290729073,
      "grad_norm": 1.1068415641784668,
      "learning_rate": 0.00010581448289104758,
      "loss": 5.0285,
      "step": 1363
    },
    {
      "epoch": 0.4910891089108911,
      "grad_norm": 0.7831496000289917,
      "learning_rate": 0.00010569947071276847,
      "loss": 4.5127,
      "step": 1364
    },
    {
      "epoch": 0.49144914491449143,
      "grad_norm": 0.9309321641921997,
      "learning_rate": 0.00010558445097027292,
      "loss": 4.8782,
      "step": 1365
    },
    {
      "epoch": 0.4918091809180918,
      "grad_norm": 0.8637677431106567,
      "learning_rate": 0.00010546942381621265,
      "loss": 4.4378,
      "step": 1366
    },
    {
      "epoch": 0.4921692169216922,
      "grad_norm": 0.7803555727005005,
      "learning_rate": 0.0001053543894032493,
      "loss": 4.681,
      "step": 1367
    },
    {
      "epoch": 0.49252925292529254,
      "grad_norm": 0.8252502083778381,
      "learning_rate": 0.00010523934788405407,
      "loss": 5.1739,
      "step": 1368
    },
    {
      "epoch": 0.4928892889288929,
      "grad_norm": 0.750989556312561,
      "learning_rate": 0.00010512429941130766,
      "loss": 4.6177,
      "step": 1369
    },
    {
      "epoch": 0.49324932493249324,
      "grad_norm": 0.8373405933380127,
      "learning_rate": 0.00010500924413769988,
      "loss": 4.8114,
      "step": 1370
    },
    {
      "epoch": 0.4936093609360936,
      "grad_norm": 0.764385998249054,
      "learning_rate": 0.00010489418221592973,
      "loss": 4.6901,
      "step": 1371
    },
    {
      "epoch": 0.493969396939694,
      "grad_norm": 1.1150025129318237,
      "learning_rate": 0.00010477911379870488,
      "loss": 5.0304,
      "step": 1372
    },
    {
      "epoch": 0.49432943294329434,
      "grad_norm": 1.9383710622787476,
      "learning_rate": 0.00010466403903874176,
      "loss": 5.4131,
      "step": 1373
    },
    {
      "epoch": 0.4946894689468947,
      "grad_norm": 1.0594481229782104,
      "learning_rate": 0.0001045489580887651,
      "loss": 4.9395,
      "step": 1374
    },
    {
      "epoch": 0.49504950495049505,
      "grad_norm": 1.1613411903381348,
      "learning_rate": 0.00010443387110150791,
      "loss": 5.1006,
      "step": 1375
    },
    {
      "epoch": 0.4954095409540954,
      "grad_norm": 2.1980786323547363,
      "learning_rate": 0.00010431877822971117,
      "loss": 5.0446,
      "step": 1376
    },
    {
      "epoch": 0.49576957695769575,
      "grad_norm": 0.950690746307373,
      "learning_rate": 0.00010420367962612372,
      "loss": 4.9041,
      "step": 1377
    },
    {
      "epoch": 0.49612961296129615,
      "grad_norm": 0.8650350570678711,
      "learning_rate": 0.00010408857544350194,
      "loss": 4.6469,
      "step": 1378
    },
    {
      "epoch": 0.4964896489648965,
      "grad_norm": 0.6349000930786133,
      "learning_rate": 0.00010397346583460971,
      "loss": 4.6103,
      "step": 1379
    },
    {
      "epoch": 0.49684968496849685,
      "grad_norm": 0.8492519855499268,
      "learning_rate": 0.00010385835095221803,
      "loss": 4.9419,
      "step": 1380
    },
    {
      "epoch": 0.4972097209720972,
      "grad_norm": 0.7640931606292725,
      "learning_rate": 0.00010374323094910496,
      "loss": 4.7148,
      "step": 1381
    },
    {
      "epoch": 0.49756975697569755,
      "grad_norm": 0.705634593963623,
      "learning_rate": 0.00010362810597805526,
      "loss": 4.7858,
      "step": 1382
    },
    {
      "epoch": 0.4979297929792979,
      "grad_norm": 0.640390932559967,
      "learning_rate": 0.0001035129761918604,
      "loss": 4.6866,
      "step": 1383
    },
    {
      "epoch": 0.4982898289828983,
      "grad_norm": 0.6646218299865723,
      "learning_rate": 0.0001033978417433182,
      "loss": 4.7531,
      "step": 1384
    },
    {
      "epoch": 0.49864986498649866,
      "grad_norm": 0.7338967323303223,
      "learning_rate": 0.00010328270278523256,
      "loss": 4.8981,
      "step": 1385
    },
    {
      "epoch": 0.499009900990099,
      "grad_norm": 0.8542543053627014,
      "learning_rate": 0.00010316755947041352,
      "loss": 4.7243,
      "step": 1386
    },
    {
      "epoch": 0.49936993699369936,
      "grad_norm": 0.9543262124061584,
      "learning_rate": 0.00010305241195167687,
      "loss": 4.5981,
      "step": 1387
    },
    {
      "epoch": 0.4997299729972997,
      "grad_norm": 1.9068870544433594,
      "learning_rate": 0.00010293726038184393,
      "loss": 5.2105,
      "step": 1388
    },
    {
      "epoch": 0.5000900090009001,
      "grad_norm": 0.8715320229530334,
      "learning_rate": 0.00010282210491374138,
      "loss": 4.6966,
      "step": 1389
    },
    {
      "epoch": 0.5004500450045004,
      "grad_norm": 0.6652578115463257,
      "learning_rate": 0.00010270694570020116,
      "loss": 4.7134,
      "step": 1390
    },
    {
      "epoch": 0.5004500450045004,
      "eval_loss": 4.819091796875,
      "eval_runtime": 100.2944,
      "eval_samples_per_second": 46.643,
      "eval_steps_per_second": 11.666,
      "step": 1390
    },
    {
      "epoch": 0.5008100810081008,
      "grad_norm": 0.826438844203949,
      "learning_rate": 0.00010259178289406011,
      "loss": 4.8383,
      "step": 1391
    },
    {
      "epoch": 0.5011701170117012,
      "grad_norm": 0.7669975757598877,
      "learning_rate": 0.00010247661664815986,
      "loss": 4.9241,
      "step": 1392
    },
    {
      "epoch": 0.5015301530153016,
      "grad_norm": 0.7355509996414185,
      "learning_rate": 0.00010236144711534661,
      "loss": 4.4544,
      "step": 1393
    },
    {
      "epoch": 0.5018901890189019,
      "grad_norm": 0.6872754693031311,
      "learning_rate": 0.0001022462744484709,
      "loss": 4.9361,
      "step": 1394
    },
    {
      "epoch": 0.5022502250225023,
      "grad_norm": 0.6951656937599182,
      "learning_rate": 0.00010213109880038747,
      "loss": 4.7553,
      "step": 1395
    },
    {
      "epoch": 0.5026102610261026,
      "grad_norm": 0.6541697382926941,
      "learning_rate": 0.00010201592032395499,
      "loss": 4.6221,
      "step": 1396
    },
    {
      "epoch": 0.502970297029703,
      "grad_norm": 0.729430079460144,
      "learning_rate": 0.00010190073917203589,
      "loss": 4.9455,
      "step": 1397
    },
    {
      "epoch": 0.5033303330333033,
      "grad_norm": 1.0041310787200928,
      "learning_rate": 0.00010178555549749613,
      "loss": 5.1839,
      "step": 1398
    },
    {
      "epoch": 0.5036903690369037,
      "grad_norm": 0.9455398321151733,
      "learning_rate": 0.00010167036945320503,
      "loss": 5.0098,
      "step": 1399
    },
    {
      "epoch": 0.504050405040504,
      "grad_norm": 1.5032222270965576,
      "learning_rate": 0.0001015551811920351,
      "loss": 5.4927,
      "step": 1400
    },
    {
      "epoch": 0.5044104410441044,
      "grad_norm": 0.9934758543968201,
      "learning_rate": 0.00010143999086686171,
      "loss": 5.1418,
      "step": 1401
    },
    {
      "epoch": 0.5047704770477047,
      "grad_norm": 1.0731993913650513,
      "learning_rate": 0.00010132479863056303,
      "loss": 5.1302,
      "step": 1402
    },
    {
      "epoch": 0.5051305130513052,
      "grad_norm": 0.7515692710876465,
      "learning_rate": 0.00010120960463601976,
      "loss": 4.5247,
      "step": 1403
    },
    {
      "epoch": 0.5054905490549055,
      "grad_norm": 1.0562608242034912,
      "learning_rate": 0.00010109440903611493,
      "loss": 4.9218,
      "step": 1404
    },
    {
      "epoch": 0.5058505850585059,
      "grad_norm": 0.7232738137245178,
      "learning_rate": 0.00010097921198373368,
      "loss": 5.0449,
      "step": 1405
    },
    {
      "epoch": 0.5062106210621062,
      "grad_norm": 1.5582194328308105,
      "learning_rate": 0.00010086401363176305,
      "loss": 4.9883,
      "step": 1406
    },
    {
      "epoch": 0.5065706570657066,
      "grad_norm": 0.5701386332511902,
      "learning_rate": 0.00010074881413309193,
      "loss": 4.7997,
      "step": 1407
    },
    {
      "epoch": 0.5069306930693069,
      "grad_norm": 0.8754087686538696,
      "learning_rate": 0.00010063361364061057,
      "loss": 4.5842,
      "step": 1408
    },
    {
      "epoch": 0.5072907290729073,
      "grad_norm": 0.9331681132316589,
      "learning_rate": 0.00010051841230721065,
      "loss": 4.5656,
      "step": 1409
    },
    {
      "epoch": 0.5076507650765076,
      "grad_norm": 1.0775038003921509,
      "learning_rate": 0.0001004032102857849,
      "loss": 4.5366,
      "step": 1410
    },
    {
      "epoch": 0.508010801080108,
      "grad_norm": 1.0820711851119995,
      "learning_rate": 0.00010028800772922706,
      "loss": 4.859,
      "step": 1411
    },
    {
      "epoch": 0.5083708370837083,
      "grad_norm": 0.8344745635986328,
      "learning_rate": 0.00010017280479043147,
      "loss": 4.7325,
      "step": 1412
    },
    {
      "epoch": 0.5087308730873087,
      "grad_norm": 0.9460582137107849,
      "learning_rate": 0.00010005760162229305,
      "loss": 4.9831,
      "step": 1413
    },
    {
      "epoch": 0.509090909090909,
      "grad_norm": 0.8958419561386108,
      "learning_rate": 9.994239837770699e-05,
      "loss": 5.0868,
      "step": 1414
    },
    {
      "epoch": 0.5094509450945095,
      "grad_norm": 0.792186975479126,
      "learning_rate": 9.982719520956855e-05,
      "loss": 4.7051,
      "step": 1415
    },
    {
      "epoch": 0.5098109810981098,
      "grad_norm": 0.7827296257019043,
      "learning_rate": 9.971199227077295e-05,
      "loss": 4.8934,
      "step": 1416
    },
    {
      "epoch": 0.5101710171017102,
      "grad_norm": 0.8142650723457336,
      "learning_rate": 9.959678971421508e-05,
      "loss": 4.4786,
      "step": 1417
    },
    {
      "epoch": 0.5105310531053106,
      "grad_norm": 0.9982710480690002,
      "learning_rate": 9.948158769278939e-05,
      "loss": 4.8992,
      "step": 1418
    },
    {
      "epoch": 0.5108910891089109,
      "grad_norm": 0.8093355298042297,
      "learning_rate": 9.936638635938945e-05,
      "loss": 4.9529,
      "step": 1419
    },
    {
      "epoch": 0.5112511251125113,
      "grad_norm": 0.6960091590881348,
      "learning_rate": 9.925118586690809e-05,
      "loss": 4.6388,
      "step": 1420
    },
    {
      "epoch": 0.5116111611161116,
      "grad_norm": 0.8927443623542786,
      "learning_rate": 9.913598636823693e-05,
      "loss": 4.7726,
      "step": 1421
    },
    {
      "epoch": 0.511971197119712,
      "grad_norm": 1.0828852653503418,
      "learning_rate": 9.902078801626636e-05,
      "loss": 5.1102,
      "step": 1422
    },
    {
      "epoch": 0.5123312331233123,
      "grad_norm": 0.8618959784507751,
      "learning_rate": 9.890559096388509e-05,
      "loss": 4.7913,
      "step": 1423
    },
    {
      "epoch": 0.5126912691269127,
      "grad_norm": 1.5530176162719727,
      "learning_rate": 9.879039536398024e-05,
      "loss": 4.9746,
      "step": 1424
    },
    {
      "epoch": 0.513051305130513,
      "grad_norm": 1.6995723247528076,
      "learning_rate": 9.867520136943698e-05,
      "loss": 4.9776,
      "step": 1425
    },
    {
      "epoch": 0.5134113411341135,
      "grad_norm": 1.1592552661895752,
      "learning_rate": 9.856000913313832e-05,
      "loss": 4.7069,
      "step": 1426
    },
    {
      "epoch": 0.5137713771377138,
      "grad_norm": 0.8983449339866638,
      "learning_rate": 9.844481880796491e-05,
      "loss": 4.7614,
      "step": 1427
    },
    {
      "epoch": 0.5141314131413142,
      "grad_norm": 0.6931654810905457,
      "learning_rate": 9.832963054679497e-05,
      "loss": 4.7884,
      "step": 1428
    },
    {
      "epoch": 0.5144914491449145,
      "grad_norm": 1.0931594371795654,
      "learning_rate": 9.821444450250392e-05,
      "loss": 4.7423,
      "step": 1429
    },
    {
      "epoch": 0.5148514851485149,
      "grad_norm": 0.8028708696365356,
      "learning_rate": 9.809926082796415e-05,
      "loss": 4.932,
      "step": 1430
    },
    {
      "epoch": 0.5152115211521152,
      "grad_norm": 0.5453850030899048,
      "learning_rate": 9.798407967604502e-05,
      "loss": 4.886,
      "step": 1431
    },
    {
      "epoch": 0.5155715571557156,
      "grad_norm": 0.8439428806304932,
      "learning_rate": 9.786890119961253e-05,
      "loss": 4.9247,
      "step": 1432
    },
    {
      "epoch": 0.5159315931593159,
      "grad_norm": 0.7233529686927795,
      "learning_rate": 9.775372555152912e-05,
      "loss": 4.9305,
      "step": 1433
    },
    {
      "epoch": 0.5162916291629163,
      "grad_norm": 0.6636310815811157,
      "learning_rate": 9.763855288465341e-05,
      "loss": 4.5924,
      "step": 1434
    },
    {
      "epoch": 0.5166516651665166,
      "grad_norm": 0.6490011215209961,
      "learning_rate": 9.752338335184015e-05,
      "loss": 4.6092,
      "step": 1435
    },
    {
      "epoch": 0.517011701170117,
      "grad_norm": 0.6125934720039368,
      "learning_rate": 9.740821710593989e-05,
      "loss": 4.725,
      "step": 1436
    },
    {
      "epoch": 0.5173717371737173,
      "grad_norm": 0.8968194127082825,
      "learning_rate": 9.729305429979887e-05,
      "loss": 4.8702,
      "step": 1437
    },
    {
      "epoch": 0.5177317731773178,
      "grad_norm": 0.9133161902427673,
      "learning_rate": 9.717789508625865e-05,
      "loss": 4.7776,
      "step": 1438
    },
    {
      "epoch": 0.5180918091809181,
      "grad_norm": 0.7110667824745178,
      "learning_rate": 9.70627396181561e-05,
      "loss": 4.5551,
      "step": 1439
    },
    {
      "epoch": 0.5184518451845185,
      "grad_norm": 0.7550270557403564,
      "learning_rate": 9.694758804832314e-05,
      "loss": 4.9471,
      "step": 1440
    },
    {
      "epoch": 0.5188118811881188,
      "grad_norm": 0.8355233669281006,
      "learning_rate": 9.68324405295865e-05,
      "loss": 4.6401,
      "step": 1441
    },
    {
      "epoch": 0.5191719171917192,
      "grad_norm": 1.0176023244857788,
      "learning_rate": 9.671729721476746e-05,
      "loss": 4.7754,
      "step": 1442
    },
    {
      "epoch": 0.5195319531953195,
      "grad_norm": 0.7926576733589172,
      "learning_rate": 9.660215825668184e-05,
      "loss": 4.7933,
      "step": 1443
    },
    {
      "epoch": 0.5198919891989199,
      "grad_norm": 0.8102412819862366,
      "learning_rate": 9.648702380813958e-05,
      "loss": 4.9955,
      "step": 1444
    },
    {
      "epoch": 0.5202520252025202,
      "grad_norm": 0.7579959034919739,
      "learning_rate": 9.637189402194476e-05,
      "loss": 4.5224,
      "step": 1445
    },
    {
      "epoch": 0.5206120612061206,
      "grad_norm": 1.1166945695877075,
      "learning_rate": 9.625676905089506e-05,
      "loss": 5.0929,
      "step": 1446
    },
    {
      "epoch": 0.5209720972097209,
      "grad_norm": 0.7166795134544373,
      "learning_rate": 9.614164904778196e-05,
      "loss": 4.9213,
      "step": 1447
    },
    {
      "epoch": 0.5213321332133213,
      "grad_norm": 1.4086633920669556,
      "learning_rate": 9.602653416539031e-05,
      "loss": 4.9692,
      "step": 1448
    },
    {
      "epoch": 0.5216921692169217,
      "grad_norm": 0.8931282162666321,
      "learning_rate": 9.591142455649808e-05,
      "loss": 4.9206,
      "step": 1449
    },
    {
      "epoch": 0.5220522052205221,
      "grad_norm": 1.0117639303207397,
      "learning_rate": 9.579632037387632e-05,
      "loss": 5.0037,
      "step": 1450
    },
    {
      "epoch": 0.5224122412241224,
      "grad_norm": 1.4104924201965332,
      "learning_rate": 9.568122177028884e-05,
      "loss": 4.6522,
      "step": 1451
    },
    {
      "epoch": 0.5227722772277228,
      "grad_norm": 0.8898065090179443,
      "learning_rate": 9.556612889849214e-05,
      "loss": 4.4271,
      "step": 1452
    },
    {
      "epoch": 0.5231323132313231,
      "grad_norm": 1.3225266933441162,
      "learning_rate": 9.545104191123493e-05,
      "loss": 5.0652,
      "step": 1453
    },
    {
      "epoch": 0.5234923492349235,
      "grad_norm": 0.5962294340133667,
      "learning_rate": 9.533596096125825e-05,
      "loss": 4.9742,
      "step": 1454
    },
    {
      "epoch": 0.5238523852385238,
      "grad_norm": 0.6631790399551392,
      "learning_rate": 9.522088620129511e-05,
      "loss": 4.4972,
      "step": 1455
    },
    {
      "epoch": 0.5242124212421242,
      "grad_norm": 0.7971886992454529,
      "learning_rate": 9.510581778407031e-05,
      "loss": 4.8759,
      "step": 1456
    },
    {
      "epoch": 0.5245724572457245,
      "grad_norm": 0.8555065989494324,
      "learning_rate": 9.499075586230013e-05,
      "loss": 4.6214,
      "step": 1457
    },
    {
      "epoch": 0.5249324932493249,
      "grad_norm": 0.7049931883811951,
      "learning_rate": 9.487570058869237e-05,
      "loss": 4.3151,
      "step": 1458
    },
    {
      "epoch": 0.5252925292529252,
      "grad_norm": 0.6889054775238037,
      "learning_rate": 9.476065211594593e-05,
      "loss": 4.5404,
      "step": 1459
    },
    {
      "epoch": 0.5256525652565257,
      "grad_norm": 0.7169436812400818,
      "learning_rate": 9.464561059675073e-05,
      "loss": 4.4303,
      "step": 1460
    },
    {
      "epoch": 0.5260126012601261,
      "grad_norm": 0.6740374565124512,
      "learning_rate": 9.453057618378737e-05,
      "loss": 4.5571,
      "step": 1461
    },
    {
      "epoch": 0.5263726372637264,
      "grad_norm": 1.2154463529586792,
      "learning_rate": 9.44155490297271e-05,
      "loss": 4.6135,
      "step": 1462
    },
    {
      "epoch": 0.5267326732673268,
      "grad_norm": 0.7495080828666687,
      "learning_rate": 9.430052928723153e-05,
      "loss": 4.4763,
      "step": 1463
    },
    {
      "epoch": 0.5270927092709271,
      "grad_norm": 1.2248061895370483,
      "learning_rate": 9.418551710895243e-05,
      "loss": 4.655,
      "step": 1464
    },
    {
      "epoch": 0.5274527452745275,
      "grad_norm": 0.7806352376937866,
      "learning_rate": 9.407051264753147e-05,
      "loss": 4.8002,
      "step": 1465
    },
    {
      "epoch": 0.5278127812781278,
      "grad_norm": 0.720514714717865,
      "learning_rate": 9.395551605560018e-05,
      "loss": 4.7797,
      "step": 1466
    },
    {
      "epoch": 0.5281728172817282,
      "grad_norm": 0.8030894994735718,
      "learning_rate": 9.38405274857796e-05,
      "loss": 4.4738,
      "step": 1467
    },
    {
      "epoch": 0.5285328532853285,
      "grad_norm": 0.9285864233970642,
      "learning_rate": 9.372554709068005e-05,
      "loss": 4.8816,
      "step": 1468
    },
    {
      "epoch": 0.5288928892889289,
      "grad_norm": 0.7847463488578796,
      "learning_rate": 9.361057502290113e-05,
      "loss": 4.8936,
      "step": 1469
    },
    {
      "epoch": 0.5292529252925292,
      "grad_norm": 0.866493821144104,
      "learning_rate": 9.349561143503128e-05,
      "loss": 4.6361,
      "step": 1470
    },
    {
      "epoch": 0.5296129612961296,
      "grad_norm": 0.8414556384086609,
      "learning_rate": 9.338065647964779e-05,
      "loss": 4.3976,
      "step": 1471
    },
    {
      "epoch": 0.52997299729973,
      "grad_norm": 0.7641831040382385,
      "learning_rate": 9.326571030931637e-05,
      "loss": 5.205,
      "step": 1472
    },
    {
      "epoch": 0.5303330333033304,
      "grad_norm": 1.0077167749404907,
      "learning_rate": 9.315077307659117e-05,
      "loss": 5.0721,
      "step": 1473
    },
    {
      "epoch": 0.5306930693069307,
      "grad_norm": 0.9120255708694458,
      "learning_rate": 9.303584493401444e-05,
      "loss": 4.7782,
      "step": 1474
    },
    {
      "epoch": 0.5310531053105311,
      "grad_norm": 1.3254908323287964,
      "learning_rate": 9.292092603411641e-05,
      "loss": 5.464,
      "step": 1475
    },
    {
      "epoch": 0.5314131413141314,
      "grad_norm": 1.8736522197723389,
      "learning_rate": 9.280601652941494e-05,
      "loss": 5.1757,
      "step": 1476
    },
    {
      "epoch": 0.5317731773177318,
      "grad_norm": 0.9066369533538818,
      "learning_rate": 9.269111657241548e-05,
      "loss": 4.8926,
      "step": 1477
    },
    {
      "epoch": 0.5321332133213321,
      "grad_norm": 1.7387737035751343,
      "learning_rate": 9.257622631561085e-05,
      "loss": 4.7481,
      "step": 1478
    },
    {
      "epoch": 0.5324932493249325,
      "grad_norm": 0.750296950340271,
      "learning_rate": 9.246134591148099e-05,
      "loss": 4.2386,
      "step": 1479
    },
    {
      "epoch": 0.5328532853285328,
      "grad_norm": 1.1397596597671509,
      "learning_rate": 9.234647551249261e-05,
      "loss": 4.9964,
      "step": 1480
    },
    {
      "epoch": 0.5332133213321332,
      "grad_norm": 0.8775277137756348,
      "learning_rate": 9.223161527109937e-05,
      "loss": 4.7732,
      "step": 1481
    },
    {
      "epoch": 0.5335733573357335,
      "grad_norm": 0.9205058217048645,
      "learning_rate": 9.211676533974131e-05,
      "loss": 4.5175,
      "step": 1482
    },
    {
      "epoch": 0.533933393339334,
      "grad_norm": 1.167930245399475,
      "learning_rate": 9.200192587084488e-05,
      "loss": 5.0832,
      "step": 1483
    },
    {
      "epoch": 0.5342934293429343,
      "grad_norm": 0.6535282135009766,
      "learning_rate": 9.188709701682247e-05,
      "loss": 4.5957,
      "step": 1484
    },
    {
      "epoch": 0.5346534653465347,
      "grad_norm": 0.9349364042282104,
      "learning_rate": 9.177227893007254e-05,
      "loss": 4.6587,
      "step": 1485
    },
    {
      "epoch": 0.535013501350135,
      "grad_norm": 0.8866311311721802,
      "learning_rate": 9.165747176297929e-05,
      "loss": 4.697,
      "step": 1486
    },
    {
      "epoch": 0.5353735373537354,
      "grad_norm": 1.0513415336608887,
      "learning_rate": 9.154267566791223e-05,
      "loss": 4.7072,
      "step": 1487
    },
    {
      "epoch": 0.5357335733573357,
      "grad_norm": 0.7274541258811951,
      "learning_rate": 9.142789079722638e-05,
      "loss": 4.3303,
      "step": 1488
    },
    {
      "epoch": 0.5360936093609361,
      "grad_norm": 0.845374345779419,
      "learning_rate": 9.131311730326172e-05,
      "loss": 4.6276,
      "step": 1489
    },
    {
      "epoch": 0.5364536453645364,
      "grad_norm": 0.7885565161705017,
      "learning_rate": 9.119835533834331e-05,
      "loss": 4.8416,
      "step": 1490
    },
    {
      "epoch": 0.5368136813681368,
      "grad_norm": 0.8569223284721375,
      "learning_rate": 9.108360505478066e-05,
      "loss": 4.6285,
      "step": 1491
    },
    {
      "epoch": 0.5371737173717371,
      "grad_norm": 0.762844443321228,
      "learning_rate": 9.096886660486797e-05,
      "loss": 4.8427,
      "step": 1492
    },
    {
      "epoch": 0.5375337533753375,
      "grad_norm": 0.8421863913536072,
      "learning_rate": 9.085414014088369e-05,
      "loss": 4.905,
      "step": 1493
    },
    {
      "epoch": 0.537893789378938,
      "grad_norm": 0.7791987061500549,
      "learning_rate": 9.073942581509034e-05,
      "loss": 4.7764,
      "step": 1494
    },
    {
      "epoch": 0.5382538253825383,
      "grad_norm": 0.9516592621803284,
      "learning_rate": 9.062472377973427e-05,
      "loss": 4.7255,
      "step": 1495
    },
    {
      "epoch": 0.5386138613861386,
      "grad_norm": 0.7316043376922607,
      "learning_rate": 9.051003418704565e-05,
      "loss": 4.7619,
      "step": 1496
    },
    {
      "epoch": 0.538973897389739,
      "grad_norm": 0.9737268686294556,
      "learning_rate": 9.039535718923804e-05,
      "loss": 4.8169,
      "step": 1497
    },
    {
      "epoch": 0.5393339333933393,
      "grad_norm": 1.1187301874160767,
      "learning_rate": 9.028069293850838e-05,
      "loss": 4.9653,
      "step": 1498
    },
    {
      "epoch": 0.5396939693969397,
      "grad_norm": 1.5251964330673218,
      "learning_rate": 9.016604158703654e-05,
      "loss": 5.3705,
      "step": 1499
    },
    {
      "epoch": 0.54005400540054,
      "grad_norm": 1.3677904605865479,
      "learning_rate": 9.005140328698539e-05,
      "loss": 5.3299,
      "step": 1500
    },
    {
      "epoch": 0.5404140414041404,
      "grad_norm": 2.879427194595337,
      "learning_rate": 8.993677819050046e-05,
      "loss": 5.0086,
      "step": 1501
    },
    {
      "epoch": 0.5407740774077407,
      "grad_norm": 0.67740797996521,
      "learning_rate": 8.982216644970979e-05,
      "loss": 4.842,
      "step": 1502
    },
    {
      "epoch": 0.5411341134113411,
      "grad_norm": 0.6215454339981079,
      "learning_rate": 8.970756821672352e-05,
      "loss": 4.819,
      "step": 1503
    },
    {
      "epoch": 0.5414941494149415,
      "grad_norm": 0.7233485579490662,
      "learning_rate": 8.95929836436341e-05,
      "loss": 5.1138,
      "step": 1504
    },
    {
      "epoch": 0.5418541854185418,
      "grad_norm": 0.8734248280525208,
      "learning_rate": 8.947841288251568e-05,
      "loss": 4.5471,
      "step": 1505
    },
    {
      "epoch": 0.5422142214221423,
      "grad_norm": 0.7609754800796509,
      "learning_rate": 8.93638560854242e-05,
      "loss": 4.5867,
      "step": 1506
    },
    {
      "epoch": 0.5425742574257426,
      "grad_norm": 0.591051459312439,
      "learning_rate": 8.924931340439694e-05,
      "loss": 4.7519,
      "step": 1507
    },
    {
      "epoch": 0.542934293429343,
      "grad_norm": 0.7521675229072571,
      "learning_rate": 8.913478499145254e-05,
      "loss": 4.9931,
      "step": 1508
    },
    {
      "epoch": 0.5432943294329433,
      "grad_norm": 0.7421920895576477,
      "learning_rate": 8.902027099859074e-05,
      "loss": 4.9076,
      "step": 1509
    },
    {
      "epoch": 0.5436543654365437,
      "grad_norm": 0.732514500617981,
      "learning_rate": 8.890577157779198e-05,
      "loss": 4.9947,
      "step": 1510
    },
    {
      "epoch": 0.544014401440144,
      "grad_norm": 0.872582733631134,
      "learning_rate": 8.879128688101749e-05,
      "loss": 4.4497,
      "step": 1511
    },
    {
      "epoch": 0.5443744374437444,
      "grad_norm": 0.6524242162704468,
      "learning_rate": 8.867681706020894e-05,
      "loss": 4.8583,
      "step": 1512
    },
    {
      "epoch": 0.5447344734473447,
      "grad_norm": 0.7136997580528259,
      "learning_rate": 8.856236226728825e-05,
      "loss": 4.7893,
      "step": 1513
    },
    {
      "epoch": 0.5450945094509451,
      "grad_norm": 0.6949340105056763,
      "learning_rate": 8.844792265415738e-05,
      "loss": 4.5592,
      "step": 1514
    },
    {
      "epoch": 0.5454545454545454,
      "grad_norm": 0.6542133092880249,
      "learning_rate": 8.833349837269814e-05,
      "loss": 4.6849,
      "step": 1515
    },
    {
      "epoch": 0.5458145814581458,
      "grad_norm": 0.9554135203361511,
      "learning_rate": 8.821908957477203e-05,
      "loss": 4.9527,
      "step": 1516
    },
    {
      "epoch": 0.5461746174617462,
      "grad_norm": 0.7612982988357544,
      "learning_rate": 8.810469641222001e-05,
      "loss": 5.1376,
      "step": 1517
    },
    {
      "epoch": 0.5465346534653466,
      "grad_norm": 0.6914458274841309,
      "learning_rate": 8.799031903686217e-05,
      "loss": 4.7025,
      "step": 1518
    },
    {
      "epoch": 0.5468946894689469,
      "grad_norm": 0.5740631818771362,
      "learning_rate": 8.787595760049777e-05,
      "loss": 4.4563,
      "step": 1519
    },
    {
      "epoch": 0.5472547254725473,
      "grad_norm": 0.7158836126327515,
      "learning_rate": 8.776161225490489e-05,
      "loss": 4.5597,
      "step": 1520
    },
    {
      "epoch": 0.5476147614761476,
      "grad_norm": 0.6493854522705078,
      "learning_rate": 8.764728315184024e-05,
      "loss": 4.9063,
      "step": 1521
    },
    {
      "epoch": 0.547974797479748,
      "grad_norm": 0.7967309951782227,
      "learning_rate": 8.753297044303896e-05,
      "loss": 4.7724,
      "step": 1522
    },
    {
      "epoch": 0.5483348334833483,
      "grad_norm": 0.8738707900047302,
      "learning_rate": 8.741867428021446e-05,
      "loss": 5.0162,
      "step": 1523
    },
    {
      "epoch": 0.5486948694869487,
      "grad_norm": 1.0331279039382935,
      "learning_rate": 8.73043948150582e-05,
      "loss": 4.8083,
      "step": 1524
    },
    {
      "epoch": 0.549054905490549,
      "grad_norm": 1.4867066144943237,
      "learning_rate": 8.719013219923947e-05,
      "loss": 5.3772,
      "step": 1525
    },
    {
      "epoch": 0.5494149414941494,
      "grad_norm": 0.9216363430023193,
      "learning_rate": 8.707588658440511e-05,
      "loss": 4.8583,
      "step": 1526
    },
    {
      "epoch": 0.5497749774977497,
      "grad_norm": 0.6520406007766724,
      "learning_rate": 8.696165812217953e-05,
      "loss": 4.735,
      "step": 1527
    },
    {
      "epoch": 0.5501350135013502,
      "grad_norm": 0.742540717124939,
      "learning_rate": 8.684744696416432e-05,
      "loss": 5.052,
      "step": 1528
    },
    {
      "epoch": 0.5504950495049505,
      "grad_norm": 0.7773281335830688,
      "learning_rate": 8.673325326193806e-05,
      "loss": 4.7007,
      "step": 1529
    },
    {
      "epoch": 0.5508550855085509,
      "grad_norm": 0.7746535539627075,
      "learning_rate": 8.661907716705627e-05,
      "loss": 4.6244,
      "step": 1530
    },
    {
      "epoch": 0.5512151215121512,
      "grad_norm": 0.7446948885917664,
      "learning_rate": 8.650491883105097e-05,
      "loss": 4.863,
      "step": 1531
    },
    {
      "epoch": 0.5515751575157516,
      "grad_norm": 0.714407205581665,
      "learning_rate": 8.639077840543077e-05,
      "loss": 4.8291,
      "step": 1532
    },
    {
      "epoch": 0.5519351935193519,
      "grad_norm": 0.9697020053863525,
      "learning_rate": 8.627665604168032e-05,
      "loss": 4.6392,
      "step": 1533
    },
    {
      "epoch": 0.5522952295229523,
      "grad_norm": 0.7405415773391724,
      "learning_rate": 8.616255189126043e-05,
      "loss": 4.5928,
      "step": 1534
    },
    {
      "epoch": 0.5526552655265526,
      "grad_norm": 0.6478596329689026,
      "learning_rate": 8.604846610560771e-05,
      "loss": 4.7173,
      "step": 1535
    },
    {
      "epoch": 0.553015301530153,
      "grad_norm": 0.6404251456260681,
      "learning_rate": 8.593439883613441e-05,
      "loss": 4.453,
      "step": 1536
    },
    {
      "epoch": 0.5533753375337533,
      "grad_norm": 0.6942550539970398,
      "learning_rate": 8.582035023422815e-05,
      "loss": 4.7839,
      "step": 1537
    },
    {
      "epoch": 0.5537353735373537,
      "grad_norm": 0.7156291604042053,
      "learning_rate": 8.570632045125185e-05,
      "loss": 4.9326,
      "step": 1538
    },
    {
      "epoch": 0.554095409540954,
      "grad_norm": 0.7970767617225647,
      "learning_rate": 8.559230963854338e-05,
      "loss": 4.7284,
      "step": 1539
    },
    {
      "epoch": 0.5544554455445545,
      "grad_norm": 0.7158586382865906,
      "learning_rate": 8.547831794741552e-05,
      "loss": 4.6065,
      "step": 1540
    },
    {
      "epoch": 0.5548154815481549,
      "grad_norm": 0.5735970735549927,
      "learning_rate": 8.536434552915556e-05,
      "loss": 4.7163,
      "step": 1541
    },
    {
      "epoch": 0.5551755175517552,
      "grad_norm": 0.5961145758628845,
      "learning_rate": 8.525039253502529e-05,
      "loss": 4.4807,
      "step": 1542
    },
    {
      "epoch": 0.5555355535553556,
      "grad_norm": 1.0740234851837158,
      "learning_rate": 8.513645911626071e-05,
      "loss": 4.8928,
      "step": 1543
    },
    {
      "epoch": 0.5558955895589559,
      "grad_norm": 0.8574578762054443,
      "learning_rate": 8.502254542407186e-05,
      "loss": 4.8314,
      "step": 1544
    },
    {
      "epoch": 0.5562556255625563,
      "grad_norm": 0.6801769733428955,
      "learning_rate": 8.490865160964253e-05,
      "loss": 4.8751,
      "step": 1545
    },
    {
      "epoch": 0.5566156615661566,
      "grad_norm": 1.083175778388977,
      "learning_rate": 8.47947778241302e-05,
      "loss": 4.8205,
      "step": 1546
    },
    {
      "epoch": 0.556975697569757,
      "grad_norm": 0.8133716583251953,
      "learning_rate": 8.468092421866573e-05,
      "loss": 4.8413,
      "step": 1547
    },
    {
      "epoch": 0.5573357335733573,
      "grad_norm": 0.8513250350952148,
      "learning_rate": 8.45670909443532e-05,
      "loss": 5.0853,
      "step": 1548
    },
    {
      "epoch": 0.5576957695769577,
      "grad_norm": 1.1919034719467163,
      "learning_rate": 8.445327815226969e-05,
      "loss": 5.0187,
      "step": 1549
    },
    {
      "epoch": 0.558055805580558,
      "grad_norm": 1.377685308456421,
      "learning_rate": 8.433948599346516e-05,
      "loss": 5.1966,
      "step": 1550
    },
    {
      "epoch": 0.5584158415841585,
      "grad_norm": 0.9856991767883301,
      "learning_rate": 8.422571461896215e-05,
      "loss": 4.9112,
      "step": 1551
    },
    {
      "epoch": 0.5587758775877588,
      "grad_norm": 1.103893756866455,
      "learning_rate": 8.411196417975558e-05,
      "loss": 4.6929,
      "step": 1552
    },
    {
      "epoch": 0.5591359135913592,
      "grad_norm": 0.7098972201347351,
      "learning_rate": 8.399823482681262e-05,
      "loss": 4.8045,
      "step": 1553
    },
    {
      "epoch": 0.5594959495949595,
      "grad_norm": 0.8016402721405029,
      "learning_rate": 8.388452671107246e-05,
      "loss": 4.9784,
      "step": 1554
    },
    {
      "epoch": 0.5598559855985599,
      "grad_norm": 0.8243830800056458,
      "learning_rate": 8.377083998344615e-05,
      "loss": 4.7894,
      "step": 1555
    },
    {
      "epoch": 0.5602160216021602,
      "grad_norm": 0.708741307258606,
      "learning_rate": 8.36571747948162e-05,
      "loss": 4.8073,
      "step": 1556
    },
    {
      "epoch": 0.5605760576057606,
      "grad_norm": 1.3110893964767456,
      "learning_rate": 8.354353129603668e-05,
      "loss": 4.9727,
      "step": 1557
    },
    {
      "epoch": 0.5609360936093609,
      "grad_norm": 0.8679048418998718,
      "learning_rate": 8.342990963793283e-05,
      "loss": 4.5692,
      "step": 1558
    },
    {
      "epoch": 0.5612961296129613,
      "grad_norm": 0.9281949400901794,
      "learning_rate": 8.33163099713009e-05,
      "loss": 5.0668,
      "step": 1559
    },
    {
      "epoch": 0.5616561656165616,
      "grad_norm": 0.9578452110290527,
      "learning_rate": 8.320273244690796e-05,
      "loss": 5.0065,
      "step": 1560
    },
    {
      "epoch": 0.562016201620162,
      "grad_norm": 0.8283352851867676,
      "learning_rate": 8.308917721549167e-05,
      "loss": 4.8786,
      "step": 1561
    },
    {
      "epoch": 0.5623762376237624,
      "grad_norm": 0.9560914635658264,
      "learning_rate": 8.297564442776014e-05,
      "loss": 4.6688,
      "step": 1562
    },
    {
      "epoch": 0.5627362736273628,
      "grad_norm": 0.966285765171051,
      "learning_rate": 8.286213423439169e-05,
      "loss": 4.6505,
      "step": 1563
    },
    {
      "epoch": 0.5630963096309631,
      "grad_norm": 0.6457225680351257,
      "learning_rate": 8.274864678603458e-05,
      "loss": 4.7324,
      "step": 1564
    },
    {
      "epoch": 0.5634563456345635,
      "grad_norm": 0.9832311272621155,
      "learning_rate": 8.263518223330697e-05,
      "loss": 4.2546,
      "step": 1565
    },
    {
      "epoch": 0.5638163816381638,
      "grad_norm": 0.5678200125694275,
      "learning_rate": 8.252174072679661e-05,
      "loss": 4.8263,
      "step": 1566
    },
    {
      "epoch": 0.5641764176417642,
      "grad_norm": 0.6344163417816162,
      "learning_rate": 8.240832241706068e-05,
      "loss": 4.6787,
      "step": 1567
    },
    {
      "epoch": 0.5645364536453645,
      "grad_norm": 0.5460944771766663,
      "learning_rate": 8.22949274546255e-05,
      "loss": 4.6593,
      "step": 1568
    },
    {
      "epoch": 0.5648964896489649,
      "grad_norm": 0.9370971322059631,
      "learning_rate": 8.218155598998648e-05,
      "loss": 5.0046,
      "step": 1569
    },
    {
      "epoch": 0.5652565256525652,
      "grad_norm": 0.7465395331382751,
      "learning_rate": 8.206820817360787e-05,
      "loss": 4.6039,
      "step": 1570
    },
    {
      "epoch": 0.5656165616561656,
      "grad_norm": 0.6622176170349121,
      "learning_rate": 8.195488415592238e-05,
      "loss": 4.6292,
      "step": 1571
    },
    {
      "epoch": 0.5659765976597659,
      "grad_norm": 0.7051721811294556,
      "learning_rate": 8.184158408733131e-05,
      "loss": 4.7648,
      "step": 1572
    },
    {
      "epoch": 0.5663366336633663,
      "grad_norm": 0.7271416783332825,
      "learning_rate": 8.172830811820407e-05,
      "loss": 5.0451,
      "step": 1573
    },
    {
      "epoch": 0.5666966696669667,
      "grad_norm": 0.9978146553039551,
      "learning_rate": 8.161505639887817e-05,
      "loss": 5.4136,
      "step": 1574
    },
    {
      "epoch": 0.5670567056705671,
      "grad_norm": 1.500651240348816,
      "learning_rate": 8.150182907965883e-05,
      "loss": 5.3961,
      "step": 1575
    },
    {
      "epoch": 0.5674167416741674,
      "grad_norm": 1.3992029428482056,
      "learning_rate": 8.138862631081896e-05,
      "loss": 4.9493,
      "step": 1576
    },
    {
      "epoch": 0.5677767776777678,
      "grad_norm": 1.5307037830352783,
      "learning_rate": 8.127544824259889e-05,
      "loss": 5.277,
      "step": 1577
    },
    {
      "epoch": 0.5681368136813681,
      "grad_norm": 0.7250295281410217,
      "learning_rate": 8.116229502520618e-05,
      "loss": 4.5189,
      "step": 1578
    },
    {
      "epoch": 0.5684968496849685,
      "grad_norm": 0.6795329451560974,
      "learning_rate": 8.104916680881527e-05,
      "loss": 4.862,
      "step": 1579
    },
    {
      "epoch": 0.5688568856885688,
      "grad_norm": 0.5743387341499329,
      "learning_rate": 8.093606374356759e-05,
      "loss": 4.8536,
      "step": 1580
    },
    {
      "epoch": 0.5692169216921692,
      "grad_norm": 0.700360119342804,
      "learning_rate": 8.082298597957112e-05,
      "loss": 4.7792,
      "step": 1581
    },
    {
      "epoch": 0.5695769576957695,
      "grad_norm": 0.6575736999511719,
      "learning_rate": 8.070993366690029e-05,
      "loss": 5.0423,
      "step": 1582
    },
    {
      "epoch": 0.5699369936993699,
      "grad_norm": 0.7689267992973328,
      "learning_rate": 8.059690695559568e-05,
      "loss": 4.5819,
      "step": 1583
    },
    {
      "epoch": 0.5702970297029702,
      "grad_norm": 0.7079759836196899,
      "learning_rate": 8.048390599566397e-05,
      "loss": 4.6153,
      "step": 1584
    },
    {
      "epoch": 0.5706570657065707,
      "grad_norm": 0.5983802080154419,
      "learning_rate": 8.037093093707763e-05,
      "loss": 4.7194,
      "step": 1585
    },
    {
      "epoch": 0.5710171017101711,
      "grad_norm": 0.8288666009902954,
      "learning_rate": 8.025798192977481e-05,
      "loss": 4.4559,
      "step": 1586
    },
    {
      "epoch": 0.5713771377137714,
      "grad_norm": 0.82045978307724,
      "learning_rate": 8.014505912365893e-05,
      "loss": 4.321,
      "step": 1587
    },
    {
      "epoch": 0.5717371737173718,
      "grad_norm": 0.5901921391487122,
      "learning_rate": 8.003216266859877e-05,
      "loss": 4.4723,
      "step": 1588
    },
    {
      "epoch": 0.5720972097209721,
      "grad_norm": 0.5934436917304993,
      "learning_rate": 7.991929271442817e-05,
      "loss": 4.722,
      "step": 1589
    },
    {
      "epoch": 0.5724572457245725,
      "grad_norm": 0.6561322212219238,
      "learning_rate": 7.980644941094566e-05,
      "loss": 5.1444,
      "step": 1590
    },
    {
      "epoch": 0.5728172817281728,
      "grad_norm": 0.532434344291687,
      "learning_rate": 7.969363290791451e-05,
      "loss": 4.544,
      "step": 1591
    },
    {
      "epoch": 0.5731773177317732,
      "grad_norm": 0.5906174778938293,
      "learning_rate": 7.958084335506239e-05,
      "loss": 4.6771,
      "step": 1592
    },
    {
      "epoch": 0.5735373537353735,
      "grad_norm": 0.8805077075958252,
      "learning_rate": 7.946808090208122e-05,
      "loss": 4.8108,
      "step": 1593
    },
    {
      "epoch": 0.5738973897389739,
      "grad_norm": 0.6874720454216003,
      "learning_rate": 7.935534569862686e-05,
      "loss": 4.5281,
      "step": 1594
    },
    {
      "epoch": 0.5742574257425742,
      "grad_norm": 1.0771909952163696,
      "learning_rate": 7.924263789431912e-05,
      "loss": 4.9165,
      "step": 1595
    },
    {
      "epoch": 0.5746174617461746,
      "grad_norm": 1.0937650203704834,
      "learning_rate": 7.912995763874143e-05,
      "loss": 5.0992,
      "step": 1596
    },
    {
      "epoch": 0.574977497749775,
      "grad_norm": 1.5542136430740356,
      "learning_rate": 7.90173050814406e-05,
      "loss": 5.1426,
      "step": 1597
    },
    {
      "epoch": 0.5753375337533754,
      "grad_norm": 1.1191812753677368,
      "learning_rate": 7.89046803719267e-05,
      "loss": 5.2826,
      "step": 1598
    },
    {
      "epoch": 0.5756975697569757,
      "grad_norm": 0.9704378843307495,
      "learning_rate": 7.879208365967287e-05,
      "loss": 5.1034,
      "step": 1599
    },
    {
      "epoch": 0.5760576057605761,
      "grad_norm": 1.3196125030517578,
      "learning_rate": 7.867951509411506e-05,
      "loss": 5.1528,
      "step": 1600
    },
    {
      "epoch": 0.5764176417641764,
      "grad_norm": 2.333880662918091,
      "learning_rate": 7.856697482465196e-05,
      "loss": 4.9852,
      "step": 1601
    },
    {
      "epoch": 0.5767776777677768,
      "grad_norm": 1.0826363563537598,
      "learning_rate": 7.84544630006445e-05,
      "loss": 4.8263,
      "step": 1602
    },
    {
      "epoch": 0.5771377137713771,
      "grad_norm": 0.980603814125061,
      "learning_rate": 7.834197977141603e-05,
      "loss": 4.4921,
      "step": 1603
    },
    {
      "epoch": 0.5774977497749775,
      "grad_norm": 0.6565625071525574,
      "learning_rate": 7.822952528625191e-05,
      "loss": 4.7848,
      "step": 1604
    },
    {
      "epoch": 0.5778577857785778,
      "grad_norm": 0.9467577934265137,
      "learning_rate": 7.811709969439938e-05,
      "loss": 4.6023,
      "step": 1605
    },
    {
      "epoch": 0.5782178217821782,
      "grad_norm": 0.7946081161499023,
      "learning_rate": 7.800470314506724e-05,
      "loss": 4.543,
      "step": 1606
    },
    {
      "epoch": 0.5785778577857785,
      "grad_norm": 0.9473694562911987,
      "learning_rate": 7.789233578742582e-05,
      "loss": 5.1386,
      "step": 1607
    },
    {
      "epoch": 0.578937893789379,
      "grad_norm": 0.7135196924209595,
      "learning_rate": 7.77799977706067e-05,
      "loss": 4.8152,
      "step": 1608
    },
    {
      "epoch": 0.5792979297929793,
      "grad_norm": 0.7521832585334778,
      "learning_rate": 7.766768924370254e-05,
      "loss": 4.65,
      "step": 1609
    },
    {
      "epoch": 0.5796579657965797,
      "grad_norm": 0.7366710305213928,
      "learning_rate": 7.755541035576677e-05,
      "loss": 4.448,
      "step": 1610
    },
    {
      "epoch": 0.58001800180018,
      "grad_norm": 0.9552801847457886,
      "learning_rate": 7.744316125581355e-05,
      "loss": 4.5018,
      "step": 1611
    },
    {
      "epoch": 0.5803780378037804,
      "grad_norm": 0.6024777293205261,
      "learning_rate": 7.733094209281756e-05,
      "loss": 4.7512,
      "step": 1612
    },
    {
      "epoch": 0.5807380738073807,
      "grad_norm": 1.195746898651123,
      "learning_rate": 7.721875301571359e-05,
      "loss": 4.8037,
      "step": 1613
    },
    {
      "epoch": 0.5810981098109811,
      "grad_norm": 0.8305825591087341,
      "learning_rate": 7.71065941733967e-05,
      "loss": 4.6396,
      "step": 1614
    },
    {
      "epoch": 0.5814581458145814,
      "grad_norm": 0.937282145023346,
      "learning_rate": 7.699446571472166e-05,
      "loss": 4.4877,
      "step": 1615
    },
    {
      "epoch": 0.5818181818181818,
      "grad_norm": 0.7361468076705933,
      "learning_rate": 7.688236778850306e-05,
      "loss": 4.604,
      "step": 1616
    },
    {
      "epoch": 0.5821782178217821,
      "grad_norm": 0.985714852809906,
      "learning_rate": 7.677030054351477e-05,
      "loss": 4.7311,
      "step": 1617
    },
    {
      "epoch": 0.5825382538253825,
      "grad_norm": 0.647030234336853,
      "learning_rate": 7.665826412849013e-05,
      "loss": 4.9563,
      "step": 1618
    },
    {
      "epoch": 0.582898289828983,
      "grad_norm": 0.583246648311615,
      "learning_rate": 7.654625869212146e-05,
      "loss": 4.4818,
      "step": 1619
    },
    {
      "epoch": 0.5832583258325833,
      "grad_norm": 0.9955174326896667,
      "learning_rate": 7.643428438306004e-05,
      "loss": 4.7674,
      "step": 1620
    },
    {
      "epoch": 0.5836183618361837,
      "grad_norm": 0.9936240911483765,
      "learning_rate": 7.632234134991575e-05,
      "loss": 4.5255,
      "step": 1621
    },
    {
      "epoch": 0.583978397839784,
      "grad_norm": 0.9111624360084534,
      "learning_rate": 7.6210429741257e-05,
      "loss": 4.7815,
      "step": 1622
    },
    {
      "epoch": 0.5843384338433844,
      "grad_norm": 0.8694493174552917,
      "learning_rate": 7.609854970561053e-05,
      "loss": 5.22,
      "step": 1623
    },
    {
      "epoch": 0.5846984698469847,
      "grad_norm": 1.2981653213500977,
      "learning_rate": 7.598670139146117e-05,
      "loss": 5.1569,
      "step": 1624
    },
    {
      "epoch": 0.585058505850585,
      "grad_norm": 1.4491846561431885,
      "learning_rate": 7.587488494725157e-05,
      "loss": 5.644,
      "step": 1625
    },
    {
      "epoch": 0.5854185418541854,
      "grad_norm": 4.114619255065918,
      "learning_rate": 7.576310052138215e-05,
      "loss": 4.7732,
      "step": 1626
    },
    {
      "epoch": 0.5857785778577858,
      "grad_norm": 0.9385104179382324,
      "learning_rate": 7.565134826221083e-05,
      "loss": 4.8205,
      "step": 1627
    },
    {
      "epoch": 0.5861386138613861,
      "grad_norm": 0.8031629323959351,
      "learning_rate": 7.55396283180529e-05,
      "loss": 4.65,
      "step": 1628
    },
    {
      "epoch": 0.5864986498649865,
      "grad_norm": 0.8165589570999146,
      "learning_rate": 7.542794083718059e-05,
      "loss": 4.8113,
      "step": 1629
    },
    {
      "epoch": 0.5868586858685868,
      "grad_norm": 0.7751879096031189,
      "learning_rate": 7.531628596782316e-05,
      "loss": 4.7953,
      "step": 1630
    },
    {
      "epoch": 0.5872187218721873,
      "grad_norm": 0.8618746995925903,
      "learning_rate": 7.520466385816671e-05,
      "loss": 4.6135,
      "step": 1631
    },
    {
      "epoch": 0.5875787578757876,
      "grad_norm": 1.1003646850585938,
      "learning_rate": 7.509307465635358e-05,
      "loss": 4.7031,
      "step": 1632
    },
    {
      "epoch": 0.587938793879388,
      "grad_norm": 0.7130169868469238,
      "learning_rate": 7.498151851048267e-05,
      "loss": 4.364,
      "step": 1633
    },
    {
      "epoch": 0.5882988298829883,
      "grad_norm": 0.816373884677887,
      "learning_rate": 7.48699955686089e-05,
      "loss": 4.8291,
      "step": 1634
    },
    {
      "epoch": 0.5886588658865887,
      "grad_norm": 0.7508324384689331,
      "learning_rate": 7.475850597874319e-05,
      "loss": 4.8374,
      "step": 1635
    },
    {
      "epoch": 0.589018901890189,
      "grad_norm": 0.6991592645645142,
      "learning_rate": 7.464704988885209e-05,
      "loss": 4.7721,
      "step": 1636
    },
    {
      "epoch": 0.5893789378937894,
      "grad_norm": 0.6070843935012817,
      "learning_rate": 7.453562744685778e-05,
      "loss": 5.1812,
      "step": 1637
    },
    {
      "epoch": 0.5897389738973897,
      "grad_norm": 0.580551266670227,
      "learning_rate": 7.442423880063778e-05,
      "loss": 4.7607,
      "step": 1638
    },
    {
      "epoch": 0.5900990099009901,
      "grad_norm": 0.8013322353363037,
      "learning_rate": 7.431288409802473e-05,
      "loss": 4.6397,
      "step": 1639
    },
    {
      "epoch": 0.5904590459045904,
      "grad_norm": 0.8236899971961975,
      "learning_rate": 7.42015634868062e-05,
      "loss": 5.0199,
      "step": 1640
    },
    {
      "epoch": 0.5908190819081908,
      "grad_norm": 0.6588436365127563,
      "learning_rate": 7.409027711472456e-05,
      "loss": 4.4477,
      "step": 1641
    },
    {
      "epoch": 0.5911791179117912,
      "grad_norm": 0.9058972001075745,
      "learning_rate": 7.39790251294767e-05,
      "loss": 4.6782,
      "step": 1642
    },
    {
      "epoch": 0.5915391539153916,
      "grad_norm": 0.7739425897598267,
      "learning_rate": 7.386780767871397e-05,
      "loss": 4.6644,
      "step": 1643
    },
    {
      "epoch": 0.5918991899189919,
      "grad_norm": 0.9859951138496399,
      "learning_rate": 7.37566249100417e-05,
      "loss": 4.9345,
      "step": 1644
    },
    {
      "epoch": 0.5922592259225923,
      "grad_norm": 0.94615638256073,
      "learning_rate": 7.364547697101933e-05,
      "loss": 4.9104,
      "step": 1645
    },
    {
      "epoch": 0.5926192619261926,
      "grad_norm": 0.6184026598930359,
      "learning_rate": 7.353436400916004e-05,
      "loss": 4.7239,
      "step": 1646
    },
    {
      "epoch": 0.592979297929793,
      "grad_norm": 0.6131138205528259,
      "learning_rate": 7.342328617193067e-05,
      "loss": 4.9109,
      "step": 1647
    },
    {
      "epoch": 0.5933393339333933,
      "grad_norm": 0.8020132780075073,
      "learning_rate": 7.331224360675126e-05,
      "loss": 4.7352,
      "step": 1648
    },
    {
      "epoch": 0.5936993699369937,
      "grad_norm": 1.0741486549377441,
      "learning_rate": 7.320123646099519e-05,
      "loss": 5.2285,
      "step": 1649
    },
    {
      "epoch": 0.594059405940594,
      "grad_norm": 1.2344449758529663,
      "learning_rate": 7.309026488198884e-05,
      "loss": 5.3315,
      "step": 1650
    },
    {
      "epoch": 0.5944194419441944,
      "grad_norm": 0.947981059551239,
      "learning_rate": 7.297932901701123e-05,
      "loss": 4.8494,
      "step": 1651
    },
    {
      "epoch": 0.5947794779477947,
      "grad_norm": 0.7184794545173645,
      "learning_rate": 7.286842901329412e-05,
      "loss": 4.4837,
      "step": 1652
    },
    {
      "epoch": 0.5951395139513952,
      "grad_norm": 0.659092903137207,
      "learning_rate": 7.275756501802166e-05,
      "loss": 4.4865,
      "step": 1653
    },
    {
      "epoch": 0.5954995499549955,
      "grad_norm": 0.8012544512748718,
      "learning_rate": 7.264673717833019e-05,
      "loss": 4.5965,
      "step": 1654
    },
    {
      "epoch": 0.5958595859585959,
      "grad_norm": 1.0120086669921875,
      "learning_rate": 7.253594564130804e-05,
      "loss": 4.7858,
      "step": 1655
    },
    {
      "epoch": 0.5962196219621962,
      "grad_norm": 1.0920915603637695,
      "learning_rate": 7.242519055399539e-05,
      "loss": 4.7804,
      "step": 1656
    },
    {
      "epoch": 0.5965796579657966,
      "grad_norm": 0.6081851720809937,
      "learning_rate": 7.231447206338407e-05,
      "loss": 4.8375,
      "step": 1657
    },
    {
      "epoch": 0.596939693969397,
      "grad_norm": 1.0984938144683838,
      "learning_rate": 7.22037903164173e-05,
      "loss": 4.7434,
      "step": 1658
    },
    {
      "epoch": 0.5972997299729973,
      "grad_norm": 1.1671141386032104,
      "learning_rate": 7.209314545998949e-05,
      "loss": 4.6966,
      "step": 1659
    },
    {
      "epoch": 0.5976597659765976,
      "grad_norm": 1.2776623964309692,
      "learning_rate": 7.198253764094618e-05,
      "loss": 4.8766,
      "step": 1660
    },
    {
      "epoch": 0.598019801980198,
      "grad_norm": 1.1133800745010376,
      "learning_rate": 7.187196700608373e-05,
      "loss": 4.9712,
      "step": 1661
    },
    {
      "epoch": 0.5983798379837983,
      "grad_norm": 0.8766571283340454,
      "learning_rate": 7.176143370214914e-05,
      "loss": 4.7079,
      "step": 1662
    },
    {
      "epoch": 0.5987398739873987,
      "grad_norm": 0.8257899880409241,
      "learning_rate": 7.165093787583984e-05,
      "loss": 4.3869,
      "step": 1663
    },
    {
      "epoch": 0.599099909990999,
      "grad_norm": 0.6749482750892639,
      "learning_rate": 7.154047967380354e-05,
      "loss": 4.6119,
      "step": 1664
    },
    {
      "epoch": 0.5994599459945995,
      "grad_norm": 0.6785940527915955,
      "learning_rate": 7.143005924263803e-05,
      "loss": 4.5932,
      "step": 1665
    },
    {
      "epoch": 0.5998199819981999,
      "grad_norm": 0.6335827708244324,
      "learning_rate": 7.131967672889101e-05,
      "loss": 4.6642,
      "step": 1666
    },
    {
      "epoch": 0.6001800180018002,
      "grad_norm": 0.8998749256134033,
      "learning_rate": 7.12093322790597e-05,
      "loss": 4.8338,
      "step": 1667
    },
    {
      "epoch": 0.6005400540054006,
      "grad_norm": 0.7676742672920227,
      "learning_rate": 7.1099026039591e-05,
      "loss": 4.4145,
      "step": 1668
    },
    {
      "epoch": 0.6009000900090009,
      "grad_norm": 0.6727948188781738,
      "learning_rate": 7.098875815688095e-05,
      "loss": 4.8703,
      "step": 1669
    },
    {
      "epoch": 0.6012601260126013,
      "grad_norm": 1.0915569067001343,
      "learning_rate": 7.087852877727481e-05,
      "loss": 4.9705,
      "step": 1670
    },
    {
      "epoch": 0.6016201620162016,
      "grad_norm": 0.6964028477668762,
      "learning_rate": 7.07683380470666e-05,
      "loss": 5.1096,
      "step": 1671
    },
    {
      "epoch": 0.601980198019802,
      "grad_norm": 0.8635994791984558,
      "learning_rate": 7.065818611249915e-05,
      "loss": 5.1809,
      "step": 1672
    },
    {
      "epoch": 0.6023402340234023,
      "grad_norm": 0.8733118772506714,
      "learning_rate": 7.054807311976379e-05,
      "loss": 4.9687,
      "step": 1673
    },
    {
      "epoch": 0.6027002700270027,
      "grad_norm": 0.8445477485656738,
      "learning_rate": 7.043799921500009e-05,
      "loss": 5.0272,
      "step": 1674
    },
    {
      "epoch": 0.603060306030603,
      "grad_norm": 1.4227135181427002,
      "learning_rate": 7.032796454429583e-05,
      "loss": 5.6162,
      "step": 1675
    },
    {
      "epoch": 0.6034203420342035,
      "grad_norm": 0.8639402985572815,
      "learning_rate": 7.021796925368667e-05,
      "loss": 4.684,
      "step": 1676
    },
    {
      "epoch": 0.6037803780378038,
      "grad_norm": 1.1845555305480957,
      "learning_rate": 7.010801348915608e-05,
      "loss": 4.9312,
      "step": 1677
    },
    {
      "epoch": 0.6041404140414042,
      "grad_norm": 0.5735663771629333,
      "learning_rate": 6.999809739663492e-05,
      "loss": 4.8477,
      "step": 1678
    },
    {
      "epoch": 0.6045004500450045,
      "grad_norm": 0.7418989539146423,
      "learning_rate": 6.988822112200156e-05,
      "loss": 4.9024,
      "step": 1679
    },
    {
      "epoch": 0.6048604860486049,
      "grad_norm": 0.7708451151847839,
      "learning_rate": 6.977838481108145e-05,
      "loss": 4.442,
      "step": 1680
    },
    {
      "epoch": 0.6052205220522052,
      "grad_norm": 1.4791598320007324,
      "learning_rate": 6.966858860964702e-05,
      "loss": 4.7687,
      "step": 1681
    },
    {
      "epoch": 0.6055805580558056,
      "grad_norm": 0.7193477749824524,
      "learning_rate": 6.955883266341741e-05,
      "loss": 4.7705,
      "step": 1682
    },
    {
      "epoch": 0.6059405940594059,
      "grad_norm": 0.904472291469574,
      "learning_rate": 6.944911711805842e-05,
      "loss": 4.6842,
      "step": 1683
    },
    {
      "epoch": 0.6063006300630063,
      "grad_norm": 0.6349477171897888,
      "learning_rate": 6.933944211918215e-05,
      "loss": 4.6385,
      "step": 1684
    },
    {
      "epoch": 0.6066606660666066,
      "grad_norm": 0.7457664012908936,
      "learning_rate": 6.922980781234699e-05,
      "loss": 4.3806,
      "step": 1685
    },
    {
      "epoch": 0.607020702070207,
      "grad_norm": 0.7954607605934143,
      "learning_rate": 6.91202143430572e-05,
      "loss": 5.1228,
      "step": 1686
    },
    {
      "epoch": 0.6073807380738074,
      "grad_norm": 0.6613931059837341,
      "learning_rate": 6.901066185676295e-05,
      "loss": 4.3413,
      "step": 1687
    },
    {
      "epoch": 0.6077407740774078,
      "grad_norm": 0.6740626692771912,
      "learning_rate": 6.890115049885994e-05,
      "loss": 4.8427,
      "step": 1688
    },
    {
      "epoch": 0.6081008100810081,
      "grad_norm": 0.6859798431396484,
      "learning_rate": 6.879168041468938e-05,
      "loss": 4.6523,
      "step": 1689
    },
    {
      "epoch": 0.6084608460846085,
      "grad_norm": 0.7126948833465576,
      "learning_rate": 6.868225174953755e-05,
      "loss": 4.987,
      "step": 1690
    },
    {
      "epoch": 0.6088208820882088,
      "grad_norm": 0.6304237842559814,
      "learning_rate": 6.85728646486359e-05,
      "loss": 4.5052,
      "step": 1691
    },
    {
      "epoch": 0.6091809180918092,
      "grad_norm": 0.5101225972175598,
      "learning_rate": 6.846351925716068e-05,
      "loss": 4.449,
      "step": 1692
    },
    {
      "epoch": 0.6095409540954095,
      "grad_norm": 0.8675354719161987,
      "learning_rate": 6.835421572023272e-05,
      "loss": 4.7941,
      "step": 1693
    },
    {
      "epoch": 0.6099009900990099,
      "grad_norm": 0.8150596022605896,
      "learning_rate": 6.82449541829174e-05,
      "loss": 4.8855,
      "step": 1694
    },
    {
      "epoch": 0.6102610261026102,
      "grad_norm": 0.7487970590591431,
      "learning_rate": 6.81357347902243e-05,
      "loss": 4.5067,
      "step": 1695
    },
    {
      "epoch": 0.6106210621062106,
      "grad_norm": 0.7239964008331299,
      "learning_rate": 6.80265576871071e-05,
      "loss": 4.8702,
      "step": 1696
    },
    {
      "epoch": 0.6109810981098109,
      "grad_norm": 0.668526291847229,
      "learning_rate": 6.791742301846326e-05,
      "loss": 4.9668,
      "step": 1697
    },
    {
      "epoch": 0.6113411341134113,
      "grad_norm": 0.673700749874115,
      "learning_rate": 6.780833092913403e-05,
      "loss": 4.8733,
      "step": 1698
    },
    {
      "epoch": 0.6117011701170117,
      "grad_norm": 0.7996618747711182,
      "learning_rate": 6.769928156390414e-05,
      "loss": 4.7596,
      "step": 1699
    },
    {
      "epoch": 0.6120612061206121,
      "grad_norm": 1.36536705493927,
      "learning_rate": 6.759027506750158e-05,
      "loss": 5.1771,
      "step": 1700
    },
    {
      "epoch": 0.6124212421242125,
      "grad_norm": 3.8609843254089355,
      "learning_rate": 6.748131158459742e-05,
      "loss": 5.4209,
      "step": 1701
    },
    {
      "epoch": 0.6127812781278128,
      "grad_norm": 0.9584519863128662,
      "learning_rate": 6.737239125980573e-05,
      "loss": 4.6022,
      "step": 1702
    },
    {
      "epoch": 0.6131413141314132,
      "grad_norm": 0.7238506078720093,
      "learning_rate": 6.726351423768322e-05,
      "loss": 4.4749,
      "step": 1703
    },
    {
      "epoch": 0.6135013501350135,
      "grad_norm": 0.8569992780685425,
      "learning_rate": 6.715468066272921e-05,
      "loss": 5.2025,
      "step": 1704
    },
    {
      "epoch": 0.6138613861386139,
      "grad_norm": 0.7682144641876221,
      "learning_rate": 6.704589067938523e-05,
      "loss": 4.8465,
      "step": 1705
    },
    {
      "epoch": 0.6142214221422142,
      "grad_norm": 0.8438863158226013,
      "learning_rate": 6.693714443203507e-05,
      "loss": 4.9573,
      "step": 1706
    },
    {
      "epoch": 0.6145814581458146,
      "grad_norm": 0.8496419191360474,
      "learning_rate": 6.682844206500445e-05,
      "loss": 4.9977,
      "step": 1707
    },
    {
      "epoch": 0.6149414941494149,
      "grad_norm": 0.6941019296646118,
      "learning_rate": 6.671978372256084e-05,
      "loss": 4.9231,
      "step": 1708
    },
    {
      "epoch": 0.6153015301530153,
      "grad_norm": 0.7767177224159241,
      "learning_rate": 6.661116954891328e-05,
      "loss": 4.8246,
      "step": 1709
    },
    {
      "epoch": 0.6156615661566157,
      "grad_norm": 0.7980932593345642,
      "learning_rate": 6.650259968821218e-05,
      "loss": 4.8588,
      "step": 1710
    },
    {
      "epoch": 0.6160216021602161,
      "grad_norm": 0.8906815052032471,
      "learning_rate": 6.639407428454922e-05,
      "loss": 4.4652,
      "step": 1711
    },
    {
      "epoch": 0.6163816381638164,
      "grad_norm": 0.9898139834403992,
      "learning_rate": 6.62855934819569e-05,
      "loss": 4.6276,
      "step": 1712
    },
    {
      "epoch": 0.6167416741674168,
      "grad_norm": 0.5662094354629517,
      "learning_rate": 6.617715742440869e-05,
      "loss": 4.6352,
      "step": 1713
    },
    {
      "epoch": 0.6171017101710171,
      "grad_norm": 0.7726882696151733,
      "learning_rate": 6.606876625581863e-05,
      "loss": 4.6636,
      "step": 1714
    },
    {
      "epoch": 0.6174617461746175,
      "grad_norm": 0.7297523617744446,
      "learning_rate": 6.59604201200412e-05,
      "loss": 4.796,
      "step": 1715
    },
    {
      "epoch": 0.6178217821782178,
      "grad_norm": 0.7021007537841797,
      "learning_rate": 6.585211916087102e-05,
      "loss": 4.703,
      "step": 1716
    },
    {
      "epoch": 0.6181818181818182,
      "grad_norm": 0.7362711429595947,
      "learning_rate": 6.574386352204289e-05,
      "loss": 5.1348,
      "step": 1717
    },
    {
      "epoch": 0.6185418541854185,
      "grad_norm": 1.0990628004074097,
      "learning_rate": 6.563565334723134e-05,
      "loss": 4.7556,
      "step": 1718
    },
    {
      "epoch": 0.6189018901890189,
      "grad_norm": 0.5887585282325745,
      "learning_rate": 6.55274887800507e-05,
      "loss": 4.4678,
      "step": 1719
    },
    {
      "epoch": 0.6192619261926192,
      "grad_norm": 0.7051016688346863,
      "learning_rate": 6.54193699640546e-05,
      "loss": 4.8287,
      "step": 1720
    },
    {
      "epoch": 0.6196219621962196,
      "grad_norm": 0.8038293123245239,
      "learning_rate": 6.531129704273604e-05,
      "loss": 4.905,
      "step": 1721
    },
    {
      "epoch": 0.61998199819982,
      "grad_norm": 0.9665653705596924,
      "learning_rate": 6.520327015952713e-05,
      "loss": 4.7933,
      "step": 1722
    },
    {
      "epoch": 0.6203420342034204,
      "grad_norm": 0.7566668391227722,
      "learning_rate": 6.509528945779888e-05,
      "loss": 5.2126,
      "step": 1723
    },
    {
      "epoch": 0.6207020702070207,
      "grad_norm": 0.8725939989089966,
      "learning_rate": 6.498735508086093e-05,
      "loss": 5.1499,
      "step": 1724
    },
    {
      "epoch": 0.6210621062106211,
      "grad_norm": 1.4660851955413818,
      "learning_rate": 6.487946717196153e-05,
      "loss": 5.2557,
      "step": 1725
    },
    {
      "epoch": 0.6214221422142214,
      "grad_norm": 1.0590465068817139,
      "learning_rate": 6.47716258742872e-05,
      "loss": 5.2878,
      "step": 1726
    },
    {
      "epoch": 0.6217821782178218,
      "grad_norm": 0.7507508397102356,
      "learning_rate": 6.466383133096267e-05,
      "loss": 4.7792,
      "step": 1727
    },
    {
      "epoch": 0.6221422142214221,
      "grad_norm": 0.6140052080154419,
      "learning_rate": 6.45560836850505e-05,
      "loss": 5.0165,
      "step": 1728
    },
    {
      "epoch": 0.6225022502250225,
      "grad_norm": 0.637725293636322,
      "learning_rate": 6.44483830795511e-05,
      "loss": 4.6269,
      "step": 1729
    },
    {
      "epoch": 0.6228622862286228,
      "grad_norm": 0.7622739672660828,
      "learning_rate": 6.434072965740242e-05,
      "loss": 4.6183,
      "step": 1730
    },
    {
      "epoch": 0.6232223222322232,
      "grad_norm": 0.5821534395217896,
      "learning_rate": 6.423312356147983e-05,
      "loss": 4.6595,
      "step": 1731
    },
    {
      "epoch": 0.6235823582358235,
      "grad_norm": 0.9150854349136353,
      "learning_rate": 6.412556493459581e-05,
      "loss": 4.6189,
      "step": 1732
    },
    {
      "epoch": 0.623942394239424,
      "grad_norm": 0.6262230277061462,
      "learning_rate": 6.40180539194999e-05,
      "loss": 4.8841,
      "step": 1733
    },
    {
      "epoch": 0.6243024302430243,
      "grad_norm": 1.0352506637573242,
      "learning_rate": 6.391059065887847e-05,
      "loss": 4.7395,
      "step": 1734
    },
    {
      "epoch": 0.6246624662466247,
      "grad_norm": 0.7014068961143494,
      "learning_rate": 6.380317529535442e-05,
      "loss": 5.0256,
      "step": 1735
    },
    {
      "epoch": 0.625022502250225,
      "grad_norm": 0.5638285875320435,
      "learning_rate": 6.369580797148718e-05,
      "loss": 4.885,
      "step": 1736
    },
    {
      "epoch": 0.6253825382538254,
      "grad_norm": 0.6988465189933777,
      "learning_rate": 6.358848882977233e-05,
      "loss": 4.406,
      "step": 1737
    },
    {
      "epoch": 0.6257425742574257,
      "grad_norm": 0.6802326440811157,
      "learning_rate": 6.348121801264163e-05,
      "loss": 4.6854,
      "step": 1738
    },
    {
      "epoch": 0.6261026102610261,
      "grad_norm": 0.7166337370872498,
      "learning_rate": 6.337399566246257e-05,
      "loss": 4.9086,
      "step": 1739
    },
    {
      "epoch": 0.6264626462646264,
      "grad_norm": 0.7824912071228027,
      "learning_rate": 6.326682192153838e-05,
      "loss": 4.6632,
      "step": 1740
    },
    {
      "epoch": 0.6268226822682268,
      "grad_norm": 0.656356155872345,
      "learning_rate": 6.315969693210782e-05,
      "loss": 4.837,
      "step": 1741
    },
    {
      "epoch": 0.6271827182718271,
      "grad_norm": 0.653224766254425,
      "learning_rate": 6.305262083634488e-05,
      "loss": 4.8749,
      "step": 1742
    },
    {
      "epoch": 0.6275427542754275,
      "grad_norm": 0.6347287893295288,
      "learning_rate": 6.294559377635864e-05,
      "loss": 4.9606,
      "step": 1743
    },
    {
      "epoch": 0.627902790279028,
      "grad_norm": 0.6182569265365601,
      "learning_rate": 6.283861589419316e-05,
      "loss": 4.609,
      "step": 1744
    },
    {
      "epoch": 0.6282628262826283,
      "grad_norm": 0.6387762427330017,
      "learning_rate": 6.273168733182722e-05,
      "loss": 4.8894,
      "step": 1745
    },
    {
      "epoch": 0.6286228622862287,
      "grad_norm": 0.7164087295532227,
      "learning_rate": 6.262480823117416e-05,
      "loss": 4.8205,
      "step": 1746
    },
    {
      "epoch": 0.628982898289829,
      "grad_norm": 1.3452858924865723,
      "learning_rate": 6.251797873408161e-05,
      "loss": 5.041,
      "step": 1747
    },
    {
      "epoch": 0.6293429342934294,
      "grad_norm": 0.9320101737976074,
      "learning_rate": 6.241119898233144e-05,
      "loss": 5.0222,
      "step": 1748
    },
    {
      "epoch": 0.6297029702970297,
      "grad_norm": 0.9436710476875305,
      "learning_rate": 6.230446911763943e-05,
      "loss": 5.1603,
      "step": 1749
    },
    {
      "epoch": 0.6300630063006301,
      "grad_norm": 1.33075749874115,
      "learning_rate": 6.219778928165527e-05,
      "loss": 5.3501,
      "step": 1750
    },
    {
      "epoch": 0.6304230423042304,
      "grad_norm": 1.5732166767120361,
      "learning_rate": 6.209115961596208e-05,
      "loss": 4.5678,
      "step": 1751
    },
    {
      "epoch": 0.6307830783078308,
      "grad_norm": 0.752070963382721,
      "learning_rate": 6.198458026207652e-05,
      "loss": 4.7007,
      "step": 1752
    },
    {
      "epoch": 0.6311431143114311,
      "grad_norm": 1.0328824520111084,
      "learning_rate": 6.187805136144847e-05,
      "loss": 4.5845,
      "step": 1753
    },
    {
      "epoch": 0.6315031503150315,
      "grad_norm": 0.7405611276626587,
      "learning_rate": 6.177157305546078e-05,
      "loss": 4.8958,
      "step": 1754
    },
    {
      "epoch": 0.6318631863186318,
      "grad_norm": 1.0916255712509155,
      "learning_rate": 6.16651454854292e-05,
      "loss": 4.9579,
      "step": 1755
    },
    {
      "epoch": 0.6322232223222323,
      "grad_norm": 0.7531924843788147,
      "learning_rate": 6.15587687926022e-05,
      "loss": 4.9045,
      "step": 1756
    },
    {
      "epoch": 0.6325832583258326,
      "grad_norm": 0.6694497466087341,
      "learning_rate": 6.145244311816063e-05,
      "loss": 4.8199,
      "step": 1757
    },
    {
      "epoch": 0.632943294329433,
      "grad_norm": 0.674801766872406,
      "learning_rate": 6.134616860321764e-05,
      "loss": 5.045,
      "step": 1758
    },
    {
      "epoch": 0.6333033303330333,
      "grad_norm": 1.0546095371246338,
      "learning_rate": 6.123994538881851e-05,
      "loss": 5.2049,
      "step": 1759
    },
    {
      "epoch": 0.6336633663366337,
      "grad_norm": 0.6533979177474976,
      "learning_rate": 6.113377361594049e-05,
      "loss": 5.0091,
      "step": 1760
    },
    {
      "epoch": 0.634023402340234,
      "grad_norm": 0.8369541764259338,
      "learning_rate": 6.102765342549246e-05,
      "loss": 4.6803,
      "step": 1761
    },
    {
      "epoch": 0.6343834383438344,
      "grad_norm": 1.1984913349151611,
      "learning_rate": 6.092158495831486e-05,
      "loss": 4.8795,
      "step": 1762
    },
    {
      "epoch": 0.6347434743474347,
      "grad_norm": 0.5820183753967285,
      "learning_rate": 6.0815568355179556e-05,
      "loss": 4.8205,
      "step": 1763
    },
    {
      "epoch": 0.6351035103510351,
      "grad_norm": 0.6554083228111267,
      "learning_rate": 6.070960375678949e-05,
      "loss": 4.7503,
      "step": 1764
    },
    {
      "epoch": 0.6354635463546354,
      "grad_norm": 0.9061247706413269,
      "learning_rate": 6.0603691303778696e-05,
      "loss": 4.8138,
      "step": 1765
    },
    {
      "epoch": 0.6358235823582358,
      "grad_norm": 0.766362190246582,
      "learning_rate": 6.0497831136711836e-05,
      "loss": 4.4586,
      "step": 1766
    },
    {
      "epoch": 0.6361836183618362,
      "grad_norm": 0.7624132037162781,
      "learning_rate": 6.039202339608432e-05,
      "loss": 4.7515,
      "step": 1767
    },
    {
      "epoch": 0.6365436543654366,
      "grad_norm": 0.6750558614730835,
      "learning_rate": 6.028626822232193e-05,
      "loss": 5.0298,
      "step": 1768
    },
    {
      "epoch": 0.6369036903690369,
      "grad_norm": 0.8097175359725952,
      "learning_rate": 6.018056575578075e-05,
      "loss": 4.8065,
      "step": 1769
    },
    {
      "epoch": 0.6372637263726373,
      "grad_norm": 0.76420658826828,
      "learning_rate": 6.007491613674669e-05,
      "loss": 4.6139,
      "step": 1770
    },
    {
      "epoch": 0.6376237623762376,
      "grad_norm": 0.7700027823448181,
      "learning_rate": 5.996931950543583e-05,
      "loss": 5.0582,
      "step": 1771
    },
    {
      "epoch": 0.637983798379838,
      "grad_norm": 0.6350474953651428,
      "learning_rate": 5.986377600199371e-05,
      "loss": 4.8447,
      "step": 1772
    },
    {
      "epoch": 0.6383438343834383,
      "grad_norm": 0.7874675989151001,
      "learning_rate": 5.9758285766495495e-05,
      "loss": 5.1058,
      "step": 1773
    },
    {
      "epoch": 0.6387038703870387,
      "grad_norm": 1.3597450256347656,
      "learning_rate": 5.965284893894547e-05,
      "loss": 5.2674,
      "step": 1774
    },
    {
      "epoch": 0.639063906390639,
      "grad_norm": 0.9899166822433472,
      "learning_rate": 5.9547465659277215e-05,
      "loss": 5.1647,
      "step": 1775
    },
    {
      "epoch": 0.6394239423942394,
      "grad_norm": 2.18182635307312,
      "learning_rate": 5.944213606735322e-05,
      "loss": 4.7493,
      "step": 1776
    },
    {
      "epoch": 0.6397839783978397,
      "grad_norm": 0.7582593560218811,
      "learning_rate": 5.933686030296459e-05,
      "loss": 4.7778,
      "step": 1777
    },
    {
      "epoch": 0.6401440144014402,
      "grad_norm": 0.7539111375808716,
      "learning_rate": 5.923163850583113e-05,
      "loss": 4.5992,
      "step": 1778
    },
    {
      "epoch": 0.6405040504050405,
      "grad_norm": 0.8477987051010132,
      "learning_rate": 5.9126470815600966e-05,
      "loss": 4.7375,
      "step": 1779
    },
    {
      "epoch": 0.6408640864086409,
      "grad_norm": 0.6299402713775635,
      "learning_rate": 5.9021357371850486e-05,
      "loss": 4.652,
      "step": 1780
    },
    {
      "epoch": 0.6412241224122412,
      "grad_norm": 0.7032667994499207,
      "learning_rate": 5.8916298314083915e-05,
      "loss": 4.3023,
      "step": 1781
    },
    {
      "epoch": 0.6415841584158416,
      "grad_norm": 0.8376038074493408,
      "learning_rate": 5.881129378173347e-05,
      "loss": 4.6135,
      "step": 1782
    },
    {
      "epoch": 0.641944194419442,
      "grad_norm": 0.9109644889831543,
      "learning_rate": 5.8706343914158914e-05,
      "loss": 4.4688,
      "step": 1783
    },
    {
      "epoch": 0.6423042304230423,
      "grad_norm": 1.2081356048583984,
      "learning_rate": 5.860144885064751e-05,
      "loss": 4.6571,
      "step": 1784
    },
    {
      "epoch": 0.6426642664266426,
      "grad_norm": 0.7176885008811951,
      "learning_rate": 5.8496608730413716e-05,
      "loss": 4.8487,
      "step": 1785
    },
    {
      "epoch": 0.643024302430243,
      "grad_norm": 0.5789865851402283,
      "learning_rate": 5.8391823692599124e-05,
      "loss": 4.8274,
      "step": 1786
    },
    {
      "epoch": 0.6433843384338434,
      "grad_norm": 0.6851209998130798,
      "learning_rate": 5.828709387627218e-05,
      "loss": 4.4276,
      "step": 1787
    },
    {
      "epoch": 0.6437443744374437,
      "grad_norm": 0.7992741465568542,
      "learning_rate": 5.818241942042819e-05,
      "loss": 4.236,
      "step": 1788
    },
    {
      "epoch": 0.644104410441044,
      "grad_norm": 0.6117070317268372,
      "learning_rate": 5.807780046398873e-05,
      "loss": 4.7178,
      "step": 1789
    },
    {
      "epoch": 0.6444644464446445,
      "grad_norm": 0.5066046118736267,
      "learning_rate": 5.797323714580192e-05,
      "loss": 4.608,
      "step": 1790
    },
    {
      "epoch": 0.6448244824482449,
      "grad_norm": 0.729286789894104,
      "learning_rate": 5.786872960464196e-05,
      "loss": 5.1699,
      "step": 1791
    },
    {
      "epoch": 0.6451845184518452,
      "grad_norm": 0.6828146576881409,
      "learning_rate": 5.7764277979209094e-05,
      "loss": 4.761,
      "step": 1792
    },
    {
      "epoch": 0.6455445544554456,
      "grad_norm": 0.5605363249778748,
      "learning_rate": 5.765988240812921e-05,
      "loss": 4.7329,
      "step": 1793
    },
    {
      "epoch": 0.6459045904590459,
      "grad_norm": 0.7322149872779846,
      "learning_rate": 5.755554302995393e-05,
      "loss": 4.9112,
      "step": 1794
    },
    {
      "epoch": 0.6462646264626463,
      "grad_norm": 0.6481497287750244,
      "learning_rate": 5.74512599831603e-05,
      "loss": 4.6671,
      "step": 1795
    },
    {
      "epoch": 0.6466246624662466,
      "grad_norm": 0.6252749562263489,
      "learning_rate": 5.73470334061505e-05,
      "loss": 4.9359,
      "step": 1796
    },
    {
      "epoch": 0.646984698469847,
      "grad_norm": 0.8327919840812683,
      "learning_rate": 5.724286343725185e-05,
      "loss": 5.0185,
      "step": 1797
    },
    {
      "epoch": 0.6473447344734473,
      "grad_norm": 0.8310493230819702,
      "learning_rate": 5.713875021471653e-05,
      "loss": 5.057,
      "step": 1798
    },
    {
      "epoch": 0.6477047704770477,
      "grad_norm": 0.8026371002197266,
      "learning_rate": 5.7034693876721376e-05,
      "loss": 4.8322,
      "step": 1799
    },
    {
      "epoch": 0.648064806480648,
      "grad_norm": 1.5379436016082764,
      "learning_rate": 5.693069456136779e-05,
      "loss": 5.3851,
      "step": 1800
    },
    {
      "epoch": 0.6484248424842485,
      "grad_norm": 0.8481616973876953,
      "learning_rate": 5.682675240668143e-05,
      "loss": 4.6216,
      "step": 1801
    },
    {
      "epoch": 0.6487848784878488,
      "grad_norm": 0.8129981756210327,
      "learning_rate": 5.6722867550612116e-05,
      "loss": 4.4348,
      "step": 1802
    },
    {
      "epoch": 0.6491449144914492,
      "grad_norm": 0.8237068057060242,
      "learning_rate": 5.661904013103365e-05,
      "loss": 4.833,
      "step": 1803
    },
    {
      "epoch": 0.6495049504950495,
      "grad_norm": 0.6432878375053406,
      "learning_rate": 5.6515270285743524e-05,
      "loss": 4.6828,
      "step": 1804
    },
    {
      "epoch": 0.6498649864986499,
      "grad_norm": 0.9345207214355469,
      "learning_rate": 5.6411558152462894e-05,
      "loss": 4.5397,
      "step": 1805
    },
    {
      "epoch": 0.6502250225022502,
      "grad_norm": 0.5214217901229858,
      "learning_rate": 5.630790386883631e-05,
      "loss": 4.6287,
      "step": 1806
    },
    {
      "epoch": 0.6505850585058506,
      "grad_norm": 0.7941548824310303,
      "learning_rate": 5.620430757243156e-05,
      "loss": 4.7249,
      "step": 1807
    },
    {
      "epoch": 0.6509450945094509,
      "grad_norm": 0.6697788238525391,
      "learning_rate": 5.6100769400739383e-05,
      "loss": 4.6915,
      "step": 1808
    },
    {
      "epoch": 0.6513051305130513,
      "grad_norm": 0.8533397912979126,
      "learning_rate": 5.599728949117348e-05,
      "loss": 4.9724,
      "step": 1809
    },
    {
      "epoch": 0.6516651665166516,
      "grad_norm": 0.6576551795005798,
      "learning_rate": 5.589386798107018e-05,
      "loss": 4.6834,
      "step": 1810
    },
    {
      "epoch": 0.652025202520252,
      "grad_norm": 0.9863411784172058,
      "learning_rate": 5.579050500768836e-05,
      "loss": 5.2171,
      "step": 1811
    },
    {
      "epoch": 0.6523852385238524,
      "grad_norm": 0.6458398699760437,
      "learning_rate": 5.5687200708209076e-05,
      "loss": 4.849,
      "step": 1812
    },
    {
      "epoch": 0.6527452745274528,
      "grad_norm": 0.6335414052009583,
      "learning_rate": 5.558395521973565e-05,
      "loss": 4.4948,
      "step": 1813
    },
    {
      "epoch": 0.6531053105310531,
      "grad_norm": 0.664715051651001,
      "learning_rate": 5.54807686792933e-05,
      "loss": 4.5827,
      "step": 1814
    },
    {
      "epoch": 0.6534653465346535,
      "grad_norm": 0.6272945404052734,
      "learning_rate": 5.5377641223829e-05,
      "loss": 4.8311,
      "step": 1815
    },
    {
      "epoch": 0.6538253825382538,
      "grad_norm": 0.5388202667236328,
      "learning_rate": 5.527457299021133e-05,
      "loss": 4.7276,
      "step": 1816
    },
    {
      "epoch": 0.6541854185418542,
      "grad_norm": 0.6526055335998535,
      "learning_rate": 5.5171564115230254e-05,
      "loss": 4.8818,
      "step": 1817
    },
    {
      "epoch": 0.6545454545454545,
      "grad_norm": 0.6699817776679993,
      "learning_rate": 5.5068614735597e-05,
      "loss": 4.5449,
      "step": 1818
    },
    {
      "epoch": 0.6549054905490549,
      "grad_norm": 0.7195981740951538,
      "learning_rate": 5.496572498794372e-05,
      "loss": 5.0371,
      "step": 1819
    },
    {
      "epoch": 0.6552655265526552,
      "grad_norm": 0.7665285468101501,
      "learning_rate": 5.486289500882355e-05,
      "loss": 4.5342,
      "step": 1820
    },
    {
      "epoch": 0.6556255625562556,
      "grad_norm": 0.7282291054725647,
      "learning_rate": 5.476012493471023e-05,
      "loss": 4.7795,
      "step": 1821
    },
    {
      "epoch": 0.6559855985598559,
      "grad_norm": 0.7076379656791687,
      "learning_rate": 5.4657414901998095e-05,
      "loss": 4.5155,
      "step": 1822
    },
    {
      "epoch": 0.6563456345634563,
      "grad_norm": 0.9629925489425659,
      "learning_rate": 5.4554765047001613e-05,
      "loss": 4.7799,
      "step": 1823
    },
    {
      "epoch": 0.6567056705670568,
      "grad_norm": 1.2366801500320435,
      "learning_rate": 5.445217550595552e-05,
      "loss": 5.1011,
      "step": 1824
    },
    {
      "epoch": 0.6570657065706571,
      "grad_norm": 1.4854378700256348,
      "learning_rate": 5.43496464150145e-05,
      "loss": 4.9074,
      "step": 1825
    },
    {
      "epoch": 0.6574257425742575,
      "grad_norm": 0.7619197368621826,
      "learning_rate": 5.424717791025302e-05,
      "loss": 4.3182,
      "step": 1826
    },
    {
      "epoch": 0.6577857785778578,
      "grad_norm": 0.718901515007019,
      "learning_rate": 5.4144770127665024e-05,
      "loss": 4.8806,
      "step": 1827
    },
    {
      "epoch": 0.6581458145814582,
      "grad_norm": 0.6655319333076477,
      "learning_rate": 5.4042423203163975e-05,
      "loss": 4.6217,
      "step": 1828
    },
    {
      "epoch": 0.6585058505850585,
      "grad_norm": 0.6734433770179749,
      "learning_rate": 5.394013727258254e-05,
      "loss": 4.512,
      "step": 1829
    },
    {
      "epoch": 0.6588658865886589,
      "grad_norm": 0.77203768491745,
      "learning_rate": 5.3837912471672446e-05,
      "loss": 4.7527,
      "step": 1830
    },
    {
      "epoch": 0.6592259225922592,
      "grad_norm": 0.665772020816803,
      "learning_rate": 5.3735748936104255e-05,
      "loss": 4.6671,
      "step": 1831
    },
    {
      "epoch": 0.6595859585958596,
      "grad_norm": 0.8873734474182129,
      "learning_rate": 5.363364680146725e-05,
      "loss": 5.2537,
      "step": 1832
    },
    {
      "epoch": 0.6599459945994599,
      "grad_norm": 0.8361346125602722,
      "learning_rate": 5.3531606203269236e-05,
      "loss": 4.6509,
      "step": 1833
    },
    {
      "epoch": 0.6603060306030603,
      "grad_norm": 0.8583419919013977,
      "learning_rate": 5.342962727693633e-05,
      "loss": 4.8991,
      "step": 1834
    },
    {
      "epoch": 0.6606660666066607,
      "grad_norm": 0.6351554989814758,
      "learning_rate": 5.332771015781275e-05,
      "loss": 4.6789,
      "step": 1835
    },
    {
      "epoch": 0.6610261026102611,
      "grad_norm": 0.8681540489196777,
      "learning_rate": 5.322585498116075e-05,
      "loss": 4.6612,
      "step": 1836
    },
    {
      "epoch": 0.6613861386138614,
      "grad_norm": 0.7458162307739258,
      "learning_rate": 5.31240618821604e-05,
      "loss": 4.5165,
      "step": 1837
    },
    {
      "epoch": 0.6617461746174618,
      "grad_norm": 1.2762198448181152,
      "learning_rate": 5.302233099590928e-05,
      "loss": 4.3505,
      "step": 1838
    },
    {
      "epoch": 0.6621062106210621,
      "grad_norm": 0.8469787836074829,
      "learning_rate": 5.292066245742246e-05,
      "loss": 5.2084,
      "step": 1839
    },
    {
      "epoch": 0.6624662466246625,
      "grad_norm": 1.1678879261016846,
      "learning_rate": 5.2819056401632304e-05,
      "loss": 4.6556,
      "step": 1840
    },
    {
      "epoch": 0.6628262826282628,
      "grad_norm": 0.635159432888031,
      "learning_rate": 5.271751296338823e-05,
      "loss": 4.7362,
      "step": 1841
    },
    {
      "epoch": 0.6631863186318632,
      "grad_norm": 0.7722536325454712,
      "learning_rate": 5.2616032277456463e-05,
      "loss": 5.0082,
      "step": 1842
    },
    {
      "epoch": 0.6635463546354635,
      "grad_norm": 0.5722522139549255,
      "learning_rate": 5.251461447852003e-05,
      "loss": 4.8685,
      "step": 1843
    },
    {
      "epoch": 0.6639063906390639,
      "grad_norm": 0.4967202842235565,
      "learning_rate": 5.2413259701178505e-05,
      "loss": 4.7663,
      "step": 1844
    },
    {
      "epoch": 0.6642664266426642,
      "grad_norm": 0.6773428320884705,
      "learning_rate": 5.231196807994779e-05,
      "loss": 4.8131,
      "step": 1845
    },
    {
      "epoch": 0.6646264626462647,
      "grad_norm": 0.681438684463501,
      "learning_rate": 5.221073974925997e-05,
      "loss": 4.9336,
      "step": 1846
    },
    {
      "epoch": 0.664986498649865,
      "grad_norm": 0.8762783408164978,
      "learning_rate": 5.210957484346314e-05,
      "loss": 5.4102,
      "step": 1847
    },
    {
      "epoch": 0.6653465346534654,
      "grad_norm": 0.971596360206604,
      "learning_rate": 5.200847349682121e-05,
      "loss": 5.5351,
      "step": 1848
    },
    {
      "epoch": 0.6657065706570657,
      "grad_norm": 0.8573640584945679,
      "learning_rate": 5.190743584351376e-05,
      "loss": 5.221,
      "step": 1849
    },
    {
      "epoch": 0.6660666066606661,
      "grad_norm": 1.9084817171096802,
      "learning_rate": 5.180646201763577e-05,
      "loss": 5.6038,
      "step": 1850
    },
    {
      "epoch": 0.6664266426642664,
      "grad_norm": 1.0819993019104004,
      "learning_rate": 5.170555215319757e-05,
      "loss": 4.7475,
      "step": 1851
    },
    {
      "epoch": 0.6667866786678668,
      "grad_norm": 1.0352802276611328,
      "learning_rate": 5.160470638412461e-05,
      "loss": 4.815,
      "step": 1852
    },
    {
      "epoch": 0.6671467146714671,
      "grad_norm": 0.9234697222709656,
      "learning_rate": 5.150392484425728e-05,
      "loss": 5.1675,
      "step": 1853
    },
    {
      "epoch": 0.6675067506750675,
      "grad_norm": 0.7216193079948425,
      "learning_rate": 5.140320766735063e-05,
      "loss": 4.7963,
      "step": 1854
    },
    {
      "epoch": 0.6678667866786678,
      "grad_norm": 0.7376987934112549,
      "learning_rate": 5.130255498707438e-05,
      "loss": 4.5616,
      "step": 1855
    },
    {
      "epoch": 0.6682268226822682,
      "grad_norm": 0.8142603039741516,
      "learning_rate": 5.120196693701267e-05,
      "loss": 4.4483,
      "step": 1856
    },
    {
      "epoch": 0.6685868586858685,
      "grad_norm": 0.558108389377594,
      "learning_rate": 5.1101443650663764e-05,
      "loss": 4.9678,
      "step": 1857
    },
    {
      "epoch": 0.668946894689469,
      "grad_norm": 0.8197848200798035,
      "learning_rate": 5.100098526144006e-05,
      "loss": 4.7343,
      "step": 1858
    },
    {
      "epoch": 0.6693069306930693,
      "grad_norm": 0.75601726770401,
      "learning_rate": 5.090059190266779e-05,
      "loss": 5.1422,
      "step": 1859
    },
    {
      "epoch": 0.6696669666966697,
      "grad_norm": 0.6538819074630737,
      "learning_rate": 5.0800263707586903e-05,
      "loss": 4.7189,
      "step": 1860
    },
    {
      "epoch": 0.67002700270027,
      "grad_norm": 1.0711504220962524,
      "learning_rate": 5.0700000809350836e-05,
      "loss": 4.703,
      "step": 1861
    },
    {
      "epoch": 0.6703870387038704,
      "grad_norm": 0.7144097089767456,
      "learning_rate": 5.059980334102637e-05,
      "loss": 4.3519,
      "step": 1862
    },
    {
      "epoch": 0.6707470747074707,
      "grad_norm": 0.801908016204834,
      "learning_rate": 5.049967143559349e-05,
      "loss": 4.7171,
      "step": 1863
    },
    {
      "epoch": 0.6711071107110711,
      "grad_norm": 0.9164612293243408,
      "learning_rate": 5.0399605225945135e-05,
      "loss": 4.7499,
      "step": 1864
    },
    {
      "epoch": 0.6714671467146714,
      "grad_norm": 0.6613209843635559,
      "learning_rate": 5.0299604844886985e-05,
      "loss": 4.6498,
      "step": 1865
    },
    {
      "epoch": 0.6718271827182718,
      "grad_norm": 0.8215205669403076,
      "learning_rate": 5.019967042513748e-05,
      "loss": 4.9471,
      "step": 1866
    },
    {
      "epoch": 0.6721872187218721,
      "grad_norm": 1.1121480464935303,
      "learning_rate": 5.009980209932743e-05,
      "loss": 5.0059,
      "step": 1867
    },
    {
      "epoch": 0.6725472547254725,
      "grad_norm": 0.516362190246582,
      "learning_rate": 5.000000000000002e-05,
      "loss": 4.5532,
      "step": 1868
    },
    {
      "epoch": 0.672907290729073,
      "grad_norm": 0.5757459998130798,
      "learning_rate": 4.990026425961038e-05,
      "loss": 4.6692,
      "step": 1869
    },
    {
      "epoch": 0.6732673267326733,
      "grad_norm": 0.6386864185333252,
      "learning_rate": 4.980059501052572e-05,
      "loss": 4.709,
      "step": 1870
    },
    {
      "epoch": 0.6736273627362737,
      "grad_norm": 1.0361477136611938,
      "learning_rate": 4.9700992385024934e-05,
      "loss": 5.1841,
      "step": 1871
    },
    {
      "epoch": 0.673987398739874,
      "grad_norm": 0.7029489874839783,
      "learning_rate": 4.960145651529856e-05,
      "loss": 4.6923,
      "step": 1872
    },
    {
      "epoch": 0.6743474347434744,
      "grad_norm": 0.9367014765739441,
      "learning_rate": 4.9501987533448413e-05,
      "loss": 5.4315,
      "step": 1873
    },
    {
      "epoch": 0.6747074707470747,
      "grad_norm": 1.1029951572418213,
      "learning_rate": 4.940258557148765e-05,
      "loss": 5.5143,
      "step": 1874
    },
    {
      "epoch": 0.6750675067506751,
      "grad_norm": 1.3592629432678223,
      "learning_rate": 4.930325076134042e-05,
      "loss": 5.2936,
      "step": 1875
    },
    {
      "epoch": 0.6754275427542754,
      "grad_norm": 2.5242362022399902,
      "learning_rate": 4.920398323484182e-05,
      "loss": 4.9574,
      "step": 1876
    },
    {
      "epoch": 0.6757875787578758,
      "grad_norm": 0.8447120189666748,
      "learning_rate": 4.9104783123737566e-05,
      "loss": 4.8343,
      "step": 1877
    },
    {
      "epoch": 0.6761476147614761,
      "grad_norm": 0.9122353196144104,
      "learning_rate": 4.9005650559683946e-05,
      "loss": 4.7467,
      "step": 1878
    },
    {
      "epoch": 0.6765076507650765,
      "grad_norm": 0.7419308423995972,
      "learning_rate": 4.890658567424763e-05,
      "loss": 4.8193,
      "step": 1879
    },
    {
      "epoch": 0.6768676867686768,
      "grad_norm": 0.9262195825576782,
      "learning_rate": 4.880758859890536e-05,
      "loss": 4.8589,
      "step": 1880
    },
    {
      "epoch": 0.6772277227722773,
      "grad_norm": 0.7551014423370361,
      "learning_rate": 4.8708659465043996e-05,
      "loss": 4.9234,
      "step": 1881
    },
    {
      "epoch": 0.6775877587758776,
      "grad_norm": 0.949427604675293,
      "learning_rate": 4.860979840396016e-05,
      "loss": 5.0352,
      "step": 1882
    },
    {
      "epoch": 0.677947794779478,
      "grad_norm": 0.545581042766571,
      "learning_rate": 4.851100554686021e-05,
      "loss": 4.7575,
      "step": 1883
    },
    {
      "epoch": 0.6783078307830783,
      "grad_norm": 1.049039363861084,
      "learning_rate": 4.841228102485984e-05,
      "loss": 4.9452,
      "step": 1884
    },
    {
      "epoch": 0.6786678667866787,
      "grad_norm": 0.7244091033935547,
      "learning_rate": 4.831362496898418e-05,
      "loss": 4.1711,
      "step": 1885
    },
    {
      "epoch": 0.679027902790279,
      "grad_norm": 0.6629701852798462,
      "learning_rate": 4.821503751016746e-05,
      "loss": 4.4553,
      "step": 1886
    },
    {
      "epoch": 0.6793879387938794,
      "grad_norm": 0.8092941045761108,
      "learning_rate": 4.8116518779252885e-05,
      "loss": 4.4547,
      "step": 1887
    },
    {
      "epoch": 0.6797479747974797,
      "grad_norm": 0.938777506351471,
      "learning_rate": 4.8018068906992356e-05,
      "loss": 4.8169,
      "step": 1888
    },
    {
      "epoch": 0.6801080108010801,
      "grad_norm": 0.8320968747138977,
      "learning_rate": 4.791968802404648e-05,
      "loss": 5.0336,
      "step": 1889
    },
    {
      "epoch": 0.6804680468046804,
      "grad_norm": 0.7015447020530701,
      "learning_rate": 4.7821376260984285e-05,
      "loss": 4.5182,
      "step": 1890
    },
    {
      "epoch": 0.6808280828082808,
      "grad_norm": 0.839224100112915,
      "learning_rate": 4.772313374828304e-05,
      "loss": 5.0579,
      "step": 1891
    },
    {
      "epoch": 0.6811881188118812,
      "grad_norm": 0.7794513702392578,
      "learning_rate": 4.762496061632814e-05,
      "loss": 4.5722,
      "step": 1892
    },
    {
      "epoch": 0.6815481548154816,
      "grad_norm": 0.8089141845703125,
      "learning_rate": 4.752685699541287e-05,
      "loss": 4.6715,
      "step": 1893
    },
    {
      "epoch": 0.6819081908190819,
      "grad_norm": 0.7713329195976257,
      "learning_rate": 4.742882301573828e-05,
      "loss": 4.6361,
      "step": 1894
    },
    {
      "epoch": 0.6822682268226823,
      "grad_norm": 0.5469356775283813,
      "learning_rate": 4.733085880741301e-05,
      "loss": 4.6219,
      "step": 1895
    },
    {
      "epoch": 0.6826282628262826,
      "grad_norm": 0.5594090223312378,
      "learning_rate": 4.7232964500453006e-05,
      "loss": 4.8541,
      "step": 1896
    },
    {
      "epoch": 0.682988298829883,
      "grad_norm": 1.0760741233825684,
      "learning_rate": 4.713514022478155e-05,
      "loss": 5.1101,
      "step": 1897
    },
    {
      "epoch": 0.6833483348334833,
      "grad_norm": 0.7999005317687988,
      "learning_rate": 4.7037386110228985e-05,
      "loss": 4.8297,
      "step": 1898
    },
    {
      "epoch": 0.6837083708370837,
      "grad_norm": 0.8169113397598267,
      "learning_rate": 4.6939702286532414e-05,
      "loss": 4.9598,
      "step": 1899
    },
    {
      "epoch": 0.684068406840684,
      "grad_norm": 1.252626895904541,
      "learning_rate": 4.684208888333577e-05,
      "loss": 5.188,
      "step": 1900
    },
    {
      "epoch": 0.6844284428442844,
      "grad_norm": 0.5477898716926575,
      "learning_rate": 4.6744546030189486e-05,
      "loss": 4.418,
      "step": 1901
    },
    {
      "epoch": 0.6847884788478847,
      "grad_norm": 0.5308620929718018,
      "learning_rate": 4.6647073856550415e-05,
      "loss": 4.9039,
      "step": 1902
    },
    {
      "epoch": 0.6851485148514852,
      "grad_norm": 0.49975287914276123,
      "learning_rate": 4.654967249178147e-05,
      "loss": 4.9186,
      "step": 1903
    },
    {
      "epoch": 0.6855085508550856,
      "grad_norm": 0.8217042684555054,
      "learning_rate": 4.645234206515171e-05,
      "loss": 4.643,
      "step": 1904
    },
    {
      "epoch": 0.6858685868586859,
      "grad_norm": 0.5679884552955627,
      "learning_rate": 4.635508270583601e-05,
      "loss": 4.3299,
      "step": 1905
    },
    {
      "epoch": 0.6862286228622863,
      "grad_norm": 0.6399744749069214,
      "learning_rate": 4.625789454291493e-05,
      "loss": 4.7144,
      "step": 1906
    },
    {
      "epoch": 0.6865886588658866,
      "grad_norm": 0.6763333678245544,
      "learning_rate": 4.6160777705374524e-05,
      "loss": 4.8988,
      "step": 1907
    },
    {
      "epoch": 0.686948694869487,
      "grad_norm": 0.6468039155006409,
      "learning_rate": 4.606373232210621e-05,
      "loss": 4.7513,
      "step": 1908
    },
    {
      "epoch": 0.6873087308730873,
      "grad_norm": 0.5941967964172363,
      "learning_rate": 4.596675852190656e-05,
      "loss": 4.522,
      "step": 1909
    },
    {
      "epoch": 0.6876687668766877,
      "grad_norm": 0.6434370279312134,
      "learning_rate": 4.586985643347717e-05,
      "loss": 4.5992,
      "step": 1910
    },
    {
      "epoch": 0.688028802880288,
      "grad_norm": 0.684079110622406,
      "learning_rate": 4.577302618542435e-05,
      "loss": 4.824,
      "step": 1911
    },
    {
      "epoch": 0.6883888388838884,
      "grad_norm": 0.5801157355308533,
      "learning_rate": 4.567626790625921e-05,
      "loss": 4.8966,
      "step": 1912
    },
    {
      "epoch": 0.6887488748874887,
      "grad_norm": 0.5670872330665588,
      "learning_rate": 4.5579581724397255e-05,
      "loss": 4.2661,
      "step": 1913
    },
    {
      "epoch": 0.689108910891089,
      "grad_norm": 0.521619439125061,
      "learning_rate": 4.548296776815839e-05,
      "loss": 4.4828,
      "step": 1914
    },
    {
      "epoch": 0.6894689468946895,
      "grad_norm": 0.6507243514060974,
      "learning_rate": 4.538642616576652e-05,
      "loss": 4.6493,
      "step": 1915
    },
    {
      "epoch": 0.6898289828982899,
      "grad_norm": 0.5946521759033203,
      "learning_rate": 4.5289957045349653e-05,
      "loss": 4.7168,
      "step": 1916
    },
    {
      "epoch": 0.6901890189018902,
      "grad_norm": 0.574393630027771,
      "learning_rate": 4.519356053493958e-05,
      "loss": 4.7364,
      "step": 1917
    },
    {
      "epoch": 0.6905490549054906,
      "grad_norm": 0.6550558805465698,
      "learning_rate": 4.5097236762471653e-05,
      "loss": 4.6582,
      "step": 1918
    },
    {
      "epoch": 0.6909090909090909,
      "grad_norm": 0.9266576766967773,
      "learning_rate": 4.5000985855784746e-05,
      "loss": 4.7063,
      "step": 1919
    },
    {
      "epoch": 0.6912691269126913,
      "grad_norm": 0.5542899966239929,
      "learning_rate": 4.490480794262104e-05,
      "loss": 4.8331,
      "step": 1920
    },
    {
      "epoch": 0.6916291629162916,
      "grad_norm": 0.8518589735031128,
      "learning_rate": 4.480870315062583e-05,
      "loss": 4.9934,
      "step": 1921
    },
    {
      "epoch": 0.691989198919892,
      "grad_norm": 0.9087819457054138,
      "learning_rate": 4.471267160734731e-05,
      "loss": 5.1262,
      "step": 1922
    },
    {
      "epoch": 0.6923492349234923,
      "grad_norm": 0.9683169722557068,
      "learning_rate": 4.4616713440236516e-05,
      "loss": 5.2386,
      "step": 1923
    },
    {
      "epoch": 0.6927092709270927,
      "grad_norm": 1.0893546342849731,
      "learning_rate": 4.4520828776647104e-05,
      "loss": 4.8366,
      "step": 1924
    },
    {
      "epoch": 0.693069306930693,
      "grad_norm": 1.2286421060562134,
      "learning_rate": 4.442501774383515e-05,
      "loss": 5.369,
      "step": 1925
    },
    {
      "epoch": 0.6934293429342935,
      "grad_norm": 0.5617548227310181,
      "learning_rate": 4.432928046895905e-05,
      "loss": 4.6432,
      "step": 1926
    },
    {
      "epoch": 0.6937893789378938,
      "grad_norm": 0.7067145109176636,
      "learning_rate": 4.4233617079079236e-05,
      "loss": 4.5953,
      "step": 1927
    },
    {
      "epoch": 0.6941494149414942,
      "grad_norm": 0.659268856048584,
      "learning_rate": 4.413802770115816e-05,
      "loss": 5.1397,
      "step": 1928
    },
    {
      "epoch": 0.6945094509450945,
      "grad_norm": 0.7078251242637634,
      "learning_rate": 4.404251246206005e-05,
      "loss": 4.7738,
      "step": 1929
    },
    {
      "epoch": 0.6948694869486949,
      "grad_norm": 0.5560519695281982,
      "learning_rate": 4.3947071488550605e-05,
      "loss": 4.49,
      "step": 1930
    },
    {
      "epoch": 0.6952295229522952,
      "grad_norm": 0.5539645552635193,
      "learning_rate": 4.385170490729712e-05,
      "loss": 4.4477,
      "step": 1931
    },
    {
      "epoch": 0.6955895589558956,
      "grad_norm": 0.7647258639335632,
      "learning_rate": 4.375641284486808e-05,
      "loss": 4.5653,
      "step": 1932
    },
    {
      "epoch": 0.6959495949594959,
      "grad_norm": 0.5726279020309448,
      "learning_rate": 4.366119542773314e-05,
      "loss": 4.6699,
      "step": 1933
    },
    {
      "epoch": 0.6963096309630963,
      "grad_norm": 0.9219176769256592,
      "learning_rate": 4.3566052782262735e-05,
      "loss": 4.6111,
      "step": 1934
    },
    {
      "epoch": 0.6966696669666966,
      "grad_norm": 0.8426400423049927,
      "learning_rate": 4.347098503472822e-05,
      "loss": 4.8859,
      "step": 1935
    },
    {
      "epoch": 0.697029702970297,
      "grad_norm": 0.7717829942703247,
      "learning_rate": 4.337599231130147e-05,
      "loss": 4.3902,
      "step": 1936
    },
    {
      "epoch": 0.6973897389738974,
      "grad_norm": 0.7202708125114441,
      "learning_rate": 4.328107473805487e-05,
      "loss": 4.8488,
      "step": 1937
    },
    {
      "epoch": 0.6977497749774978,
      "grad_norm": 0.8424999117851257,
      "learning_rate": 4.318623244096092e-05,
      "loss": 4.6686,
      "step": 1938
    },
    {
      "epoch": 0.6981098109810981,
      "grad_norm": 0.6789209842681885,
      "learning_rate": 4.309146554589234e-05,
      "loss": 4.8707,
      "step": 1939
    },
    {
      "epoch": 0.6984698469846985,
      "grad_norm": 0.8634017705917358,
      "learning_rate": 4.2996774178621736e-05,
      "loss": 4.8603,
      "step": 1940
    },
    {
      "epoch": 0.6988298829882988,
      "grad_norm": 0.7735937833786011,
      "learning_rate": 4.2902158464821496e-05,
      "loss": 4.7517,
      "step": 1941
    },
    {
      "epoch": 0.6991899189918992,
      "grad_norm": 0.6638203263282776,
      "learning_rate": 4.2807618530063565e-05,
      "loss": 4.5559,
      "step": 1942
    },
    {
      "epoch": 0.6995499549954995,
      "grad_norm": 0.7306031584739685,
      "learning_rate": 4.271315449981934e-05,
      "loss": 4.3512,
      "step": 1943
    },
    {
      "epoch": 0.6999099909990999,
      "grad_norm": 0.8207063674926758,
      "learning_rate": 4.2618766499459516e-05,
      "loss": 5.0055,
      "step": 1944
    },
    {
      "epoch": 0.7002700270027002,
      "grad_norm": 0.6139596700668335,
      "learning_rate": 4.2524454654253775e-05,
      "loss": 4.6997,
      "step": 1945
    },
    {
      "epoch": 0.7006300630063006,
      "grad_norm": 0.9638845324516296,
      "learning_rate": 4.2430219089370823e-05,
      "loss": 5.1796,
      "step": 1946
    },
    {
      "epoch": 0.700990099009901,
      "grad_norm": 1.1057206392288208,
      "learning_rate": 4.23360599298781e-05,
      "loss": 5.3861,
      "step": 1947
    },
    {
      "epoch": 0.7013501350135013,
      "grad_norm": 0.869283139705658,
      "learning_rate": 4.224197730074169e-05,
      "loss": 5.035,
      "step": 1948
    },
    {
      "epoch": 0.7017101710171018,
      "grad_norm": 0.7809675931930542,
      "learning_rate": 4.2147971326825966e-05,
      "loss": 5.0235,
      "step": 1949
    },
    {
      "epoch": 0.7020702070207021,
      "grad_norm": 1.561911940574646,
      "learning_rate": 4.20540421328937e-05,
      "loss": 5.2386,
      "step": 1950
    },
    {
      "epoch": 0.7024302430243025,
      "grad_norm": 0.843798816204071,
      "learning_rate": 4.1960189843605745e-05,
      "loss": 4.9271,
      "step": 1951
    },
    {
      "epoch": 0.7027902790279028,
      "grad_norm": 0.7116716504096985,
      "learning_rate": 4.1866414583520877e-05,
      "loss": 4.9151,
      "step": 1952
    },
    {
      "epoch": 0.7031503150315032,
      "grad_norm": 0.8110635280609131,
      "learning_rate": 4.177271647709556e-05,
      "loss": 4.8799,
      "step": 1953
    },
    {
      "epoch": 0.7035103510351035,
      "grad_norm": 0.8753562569618225,
      "learning_rate": 4.1679095648683986e-05,
      "loss": 4.6706,
      "step": 1954
    },
    {
      "epoch": 0.7038703870387039,
      "grad_norm": 0.9446102380752563,
      "learning_rate": 4.158555222253771e-05,
      "loss": 4.7998,
      "step": 1955
    },
    {
      "epoch": 0.7042304230423042,
      "grad_norm": 1.3820642232894897,
      "learning_rate": 4.149208632280559e-05,
      "loss": 4.7055,
      "step": 1956
    },
    {
      "epoch": 0.7045904590459046,
      "grad_norm": 1.0470118522644043,
      "learning_rate": 4.139869807353357e-05,
      "loss": 4.7841,
      "step": 1957
    },
    {
      "epoch": 0.7049504950495049,
      "grad_norm": 0.8367030024528503,
      "learning_rate": 4.130538759866457e-05,
      "loss": 5.2838,
      "step": 1958
    },
    {
      "epoch": 0.7053105310531053,
      "grad_norm": 0.6585936546325684,
      "learning_rate": 4.121215502203829e-05,
      "loss": 4.8096,
      "step": 1959
    },
    {
      "epoch": 0.7056705670567057,
      "grad_norm": 0.9417300224304199,
      "learning_rate": 4.1119000467390955e-05,
      "loss": 4.6286,
      "step": 1960
    },
    {
      "epoch": 0.7060306030603061,
      "grad_norm": 0.7592486143112183,
      "learning_rate": 4.102592405835536e-05,
      "loss": 5.1779,
      "step": 1961
    },
    {
      "epoch": 0.7063906390639064,
      "grad_norm": 0.6947286128997803,
      "learning_rate": 4.0932925918460516e-05,
      "loss": 4.6453,
      "step": 1962
    },
    {
      "epoch": 0.7067506750675068,
      "grad_norm": 0.5848962664604187,
      "learning_rate": 4.084000617113164e-05,
      "loss": 4.7674,
      "step": 1963
    },
    {
      "epoch": 0.7071107110711071,
      "grad_norm": 1.2515913248062134,
      "learning_rate": 4.074716493968975e-05,
      "loss": 4.6358,
      "step": 1964
    },
    {
      "epoch": 0.7074707470747075,
      "grad_norm": 0.7720615863800049,
      "learning_rate": 4.0654402347351814e-05,
      "loss": 4.4811,
      "step": 1965
    },
    {
      "epoch": 0.7078307830783078,
      "grad_norm": 0.7216893434524536,
      "learning_rate": 4.056171851723035e-05,
      "loss": 4.5832,
      "step": 1966
    },
    {
      "epoch": 0.7081908190819082,
      "grad_norm": 0.5522660613059998,
      "learning_rate": 4.046911357233343e-05,
      "loss": 4.75,
      "step": 1967
    },
    {
      "epoch": 0.7085508550855085,
      "grad_norm": 0.7386857271194458,
      "learning_rate": 4.037658763556428e-05,
      "loss": 4.4266,
      "step": 1968
    },
    {
      "epoch": 0.7089108910891089,
      "grad_norm": 0.8335764408111572,
      "learning_rate": 4.028414082972141e-05,
      "loss": 4.3608,
      "step": 1969
    },
    {
      "epoch": 0.7092709270927092,
      "grad_norm": 0.7491186261177063,
      "learning_rate": 4.019177327749822e-05,
      "loss": 4.8352,
      "step": 1970
    },
    {
      "epoch": 0.7096309630963097,
      "grad_norm": 0.9205876588821411,
      "learning_rate": 4.0099485101483014e-05,
      "loss": 5.0586,
      "step": 1971
    },
    {
      "epoch": 0.70999099909991,
      "grad_norm": 0.6963374018669128,
      "learning_rate": 4.000727642415867e-05,
      "loss": 4.499,
      "step": 1972
    },
    {
      "epoch": 0.7103510351035104,
      "grad_norm": 0.9542635083198547,
      "learning_rate": 3.991514736790258e-05,
      "loss": 5.1105,
      "step": 1973
    },
    {
      "epoch": 0.7107110711071107,
      "grad_norm": 1.1971359252929688,
      "learning_rate": 3.982309805498649e-05,
      "loss": 4.9849,
      "step": 1974
    },
    {
      "epoch": 0.7110711071107111,
      "grad_norm": 1.055363416671753,
      "learning_rate": 3.9731128607576306e-05,
      "loss": 5.2035,
      "step": 1975
    },
    {
      "epoch": 0.7114311431143114,
      "grad_norm": 2.2710304260253906,
      "learning_rate": 3.963923914773187e-05,
      "loss": 4.7704,
      "step": 1976
    },
    {
      "epoch": 0.7117911791179118,
      "grad_norm": 0.739315927028656,
      "learning_rate": 3.954742979740695e-05,
      "loss": 5.2393,
      "step": 1977
    },
    {
      "epoch": 0.7121512151215121,
      "grad_norm": 0.7492758631706238,
      "learning_rate": 3.945570067844901e-05,
      "loss": 4.7168,
      "step": 1978
    },
    {
      "epoch": 0.7125112511251125,
      "grad_norm": 0.4400799870491028,
      "learning_rate": 3.936405191259891e-05,
      "loss": 4.3876,
      "step": 1979
    },
    {
      "epoch": 0.7128712871287128,
      "grad_norm": 0.9049892425537109,
      "learning_rate": 3.927248362149097e-05,
      "loss": 4.5856,
      "step": 1980
    },
    {
      "epoch": 0.7132313231323132,
      "grad_norm": 0.6218405961990356,
      "learning_rate": 3.9180995926652705e-05,
      "loss": 4.8043,
      "step": 1981
    },
    {
      "epoch": 0.7135913591359135,
      "grad_norm": 0.6553037166595459,
      "learning_rate": 3.9089588949504655e-05,
      "loss": 4.805,
      "step": 1982
    },
    {
      "epoch": 0.713951395139514,
      "grad_norm": 0.45045459270477295,
      "learning_rate": 3.899826281136015e-05,
      "loss": 4.6216,
      "step": 1983
    },
    {
      "epoch": 0.7143114311431144,
      "grad_norm": 0.6989582180976868,
      "learning_rate": 3.890701763342536e-05,
      "loss": 4.9492,
      "step": 1984
    },
    {
      "epoch": 0.7146714671467147,
      "grad_norm": 0.7872725129127502,
      "learning_rate": 3.8815853536798904e-05,
      "loss": 4.9512,
      "step": 1985
    },
    {
      "epoch": 0.715031503150315,
      "grad_norm": 0.6595510244369507,
      "learning_rate": 3.8724770642471865e-05,
      "loss": 4.4377,
      "step": 1986
    },
    {
      "epoch": 0.7153915391539154,
      "grad_norm": 0.47658970952033997,
      "learning_rate": 3.863376907132752e-05,
      "loss": 4.6671,
      "step": 1987
    },
    {
      "epoch": 0.7157515751575158,
      "grad_norm": 0.42084112763404846,
      "learning_rate": 3.854284894414122e-05,
      "loss": 4.4798,
      "step": 1988
    },
    {
      "epoch": 0.7161116111611161,
      "grad_norm": 0.5337504148483276,
      "learning_rate": 3.8452010381580216e-05,
      "loss": 4.3987,
      "step": 1989
    },
    {
      "epoch": 0.7164716471647165,
      "grad_norm": 0.6366197466850281,
      "learning_rate": 3.836125350420358e-05,
      "loss": 5.2636,
      "step": 1990
    },
    {
      "epoch": 0.7168316831683168,
      "grad_norm": 0.8405591249465942,
      "learning_rate": 3.82705784324618e-05,
      "loss": 4.8786,
      "step": 1991
    },
    {
      "epoch": 0.7171917191719172,
      "grad_norm": 0.6100167632102966,
      "learning_rate": 3.8179985286696986e-05,
      "loss": 4.5409,
      "step": 1992
    },
    {
      "epoch": 0.7175517551755175,
      "grad_norm": 0.699909508228302,
      "learning_rate": 3.8089474187142406e-05,
      "loss": 4.8712,
      "step": 1993
    },
    {
      "epoch": 0.717911791179118,
      "grad_norm": 0.6978292465209961,
      "learning_rate": 3.79990452539225e-05,
      "loss": 4.9707,
      "step": 1994
    },
    {
      "epoch": 0.7182718271827183,
      "grad_norm": 0.8318164348602295,
      "learning_rate": 3.790869860705258e-05,
      "loss": 4.8339,
      "step": 1995
    },
    {
      "epoch": 0.7186318631863187,
      "grad_norm": 0.639952540397644,
      "learning_rate": 3.781843436643882e-05,
      "loss": 4.9529,
      "step": 1996
    },
    {
      "epoch": 0.718991899189919,
      "grad_norm": 0.8525128364562988,
      "learning_rate": 3.772825265187802e-05,
      "loss": 5.0666,
      "step": 1997
    },
    {
      "epoch": 0.7193519351935194,
      "grad_norm": 1.0021530389785767,
      "learning_rate": 3.763815358305743e-05,
      "loss": 5.1745,
      "step": 1998
    },
    {
      "epoch": 0.7197119711971197,
      "grad_norm": 1.0609560012817383,
      "learning_rate": 3.7548137279554586e-05,
      "loss": 5.1994,
      "step": 1999
    },
    {
      "epoch": 0.7200720072007201,
      "grad_norm": 1.7202980518341064,
      "learning_rate": 3.7458203860837234e-05,
      "loss": 5.4059,
      "step": 2000
    },
    {
      "epoch": 0.7204320432043204,
      "grad_norm": 1.2057273387908936,
      "learning_rate": 3.736835344626311e-05,
      "loss": 4.8807,
      "step": 2001
    },
    {
      "epoch": 0.7207920792079208,
      "grad_norm": 0.9880334734916687,
      "learning_rate": 3.727858615507974e-05,
      "loss": 5.044,
      "step": 2002
    },
    {
      "epoch": 0.7211521152115211,
      "grad_norm": 0.6543089747428894,
      "learning_rate": 3.7188902106424416e-05,
      "loss": 4.7401,
      "step": 2003
    },
    {
      "epoch": 0.7215121512151215,
      "grad_norm": 0.5077190399169922,
      "learning_rate": 3.709930141932386e-05,
      "loss": 5.0552,
      "step": 2004
    },
    {
      "epoch": 0.7218721872187218,
      "grad_norm": 0.6145045757293701,
      "learning_rate": 3.7009784212694265e-05,
      "loss": 5.1865,
      "step": 2005
    },
    {
      "epoch": 0.7222322232223223,
      "grad_norm": 0.49541333317756653,
      "learning_rate": 3.692035060534088e-05,
      "loss": 4.8607,
      "step": 2006
    },
    {
      "epoch": 0.7225922592259226,
      "grad_norm": 0.8183977007865906,
      "learning_rate": 3.683100071595813e-05,
      "loss": 4.4959,
      "step": 2007
    },
    {
      "epoch": 0.722952295229523,
      "grad_norm": 0.7221876382827759,
      "learning_rate": 3.674173466312928e-05,
      "loss": 4.3129,
      "step": 2008
    },
    {
      "epoch": 0.7233123312331233,
      "grad_norm": 0.9419858455657959,
      "learning_rate": 3.665255256532638e-05,
      "loss": 5.1136,
      "step": 2009
    },
    {
      "epoch": 0.7236723672367237,
      "grad_norm": 0.500443696975708,
      "learning_rate": 3.656345454090996e-05,
      "loss": 4.9675,
      "step": 2010
    },
    {
      "epoch": 0.724032403240324,
      "grad_norm": 0.5056512355804443,
      "learning_rate": 3.6474440708129045e-05,
      "loss": 4.9805,
      "step": 2011
    },
    {
      "epoch": 0.7243924392439244,
      "grad_norm": 0.734207808971405,
      "learning_rate": 3.638551118512089e-05,
      "loss": 4.5258,
      "step": 2012
    },
    {
      "epoch": 0.7247524752475247,
      "grad_norm": 0.6353861689567566,
      "learning_rate": 3.6296666089910936e-05,
      "loss": 5.0694,
      "step": 2013
    },
    {
      "epoch": 0.7251125112511251,
      "grad_norm": 0.6294865012168884,
      "learning_rate": 3.620790554041241e-05,
      "loss": 4.6572,
      "step": 2014
    },
    {
      "epoch": 0.7254725472547254,
      "grad_norm": 0.6333225965499878,
      "learning_rate": 3.611922965442648e-05,
      "loss": 4.6558,
      "step": 2015
    },
    {
      "epoch": 0.7258325832583258,
      "grad_norm": 0.6373658180236816,
      "learning_rate": 3.603063854964188e-05,
      "loss": 5.0726,
      "step": 2016
    },
    {
      "epoch": 0.7261926192619262,
      "grad_norm": 0.8722042441368103,
      "learning_rate": 3.594213234363486e-05,
      "loss": 4.6815,
      "step": 2017
    },
    {
      "epoch": 0.7265526552655266,
      "grad_norm": 0.7341203689575195,
      "learning_rate": 3.5853711153868965e-05,
      "loss": 4.957,
      "step": 2018
    },
    {
      "epoch": 0.7269126912691269,
      "grad_norm": 0.5686054825782776,
      "learning_rate": 3.5765375097694916e-05,
      "loss": 4.7206,
      "step": 2019
    },
    {
      "epoch": 0.7272727272727273,
      "grad_norm": 1.069074273109436,
      "learning_rate": 3.56771242923505e-05,
      "loss": 5.3025,
      "step": 2020
    },
    {
      "epoch": 0.7276327632763276,
      "grad_norm": 0.6375458836555481,
      "learning_rate": 3.558895885496023e-05,
      "loss": 4.6998,
      "step": 2021
    },
    {
      "epoch": 0.727992799279928,
      "grad_norm": 0.6588398218154907,
      "learning_rate": 3.550087890253544e-05,
      "loss": 4.9146,
      "step": 2022
    },
    {
      "epoch": 0.7283528352835283,
      "grad_norm": 1.0640164613723755,
      "learning_rate": 3.541288455197398e-05,
      "loss": 4.9861,
      "step": 2023
    },
    {
      "epoch": 0.7287128712871287,
      "grad_norm": 1.0872198343276978,
      "learning_rate": 3.53249759200601e-05,
      "loss": 5.1508,
      "step": 2024
    },
    {
      "epoch": 0.729072907290729,
      "grad_norm": 1.2077916860580444,
      "learning_rate": 3.523715312346421e-05,
      "loss": 5.3081,
      "step": 2025
    },
    {
      "epoch": 0.7294329432943294,
      "grad_norm": 1.2681756019592285,
      "learning_rate": 3.51494162787429e-05,
      "loss": 4.9204,
      "step": 2026
    },
    {
      "epoch": 0.7297929792979297,
      "grad_norm": 0.5897660255432129,
      "learning_rate": 3.506176550233863e-05,
      "loss": 4.4497,
      "step": 2027
    },
    {
      "epoch": 0.7301530153015302,
      "grad_norm": 0.7174355983734131,
      "learning_rate": 3.497420091057969e-05,
      "loss": 4.9662,
      "step": 2028
    },
    {
      "epoch": 0.7305130513051306,
      "grad_norm": 0.6896499991416931,
      "learning_rate": 3.488672261967989e-05,
      "loss": 4.8752,
      "step": 2029
    },
    {
      "epoch": 0.7308730873087309,
      "grad_norm": 0.9643046259880066,
      "learning_rate": 3.479933074573858e-05,
      "loss": 4.723,
      "step": 2030
    },
    {
      "epoch": 0.7312331233123313,
      "grad_norm": 0.6577152609825134,
      "learning_rate": 3.47120254047404e-05,
      "loss": 4.6554,
      "step": 2031
    },
    {
      "epoch": 0.7315931593159316,
      "grad_norm": 0.6549072265625,
      "learning_rate": 3.462480671255515e-05,
      "loss": 4.7597,
      "step": 2032
    },
    {
      "epoch": 0.731953195319532,
      "grad_norm": 0.595219075679779,
      "learning_rate": 3.4537674784937614e-05,
      "loss": 4.6868,
      "step": 2033
    },
    {
      "epoch": 0.7323132313231323,
      "grad_norm": 0.6932517290115356,
      "learning_rate": 3.445062973752745e-05,
      "loss": 4.557,
      "step": 2034
    },
    {
      "epoch": 0.7326732673267327,
      "grad_norm": 0.6161606907844543,
      "learning_rate": 3.4363671685848986e-05,
      "loss": 4.7073,
      "step": 2035
    },
    {
      "epoch": 0.733033303330333,
      "grad_norm": 0.6454643607139587,
      "learning_rate": 3.427680074531113e-05,
      "loss": 4.5563,
      "step": 2036
    },
    {
      "epoch": 0.7333933393339334,
      "grad_norm": 0.7846769094467163,
      "learning_rate": 3.419001703120709e-05,
      "loss": 4.4964,
      "step": 2037
    },
    {
      "epoch": 0.7337533753375337,
      "grad_norm": 0.9348975419998169,
      "learning_rate": 3.410332065871441e-05,
      "loss": 4.4099,
      "step": 2038
    },
    {
      "epoch": 0.7341134113411341,
      "grad_norm": 0.5604249835014343,
      "learning_rate": 3.401671174289469e-05,
      "loss": 4.6552,
      "step": 2039
    },
    {
      "epoch": 0.7344734473447345,
      "grad_norm": 0.593227744102478,
      "learning_rate": 3.393019039869338e-05,
      "loss": 4.441,
      "step": 2040
    },
    {
      "epoch": 0.7348334833483349,
      "grad_norm": 1.0157716274261475,
      "learning_rate": 3.3843756740939817e-05,
      "loss": 4.5417,
      "step": 2041
    },
    {
      "epoch": 0.7351935193519352,
      "grad_norm": 0.9067453145980835,
      "learning_rate": 3.3757410884346894e-05,
      "loss": 5.033,
      "step": 2042
    },
    {
      "epoch": 0.7355535553555356,
      "grad_norm": 0.6749830842018127,
      "learning_rate": 3.367115294351104e-05,
      "loss": 4.8072,
      "step": 2043
    },
    {
      "epoch": 0.7359135913591359,
      "grad_norm": 0.4967857897281647,
      "learning_rate": 3.358498303291191e-05,
      "loss": 4.5516,
      "step": 2044
    },
    {
      "epoch": 0.7362736273627363,
      "grad_norm": 0.5756816864013672,
      "learning_rate": 3.3498901266912396e-05,
      "loss": 4.6619,
      "step": 2045
    },
    {
      "epoch": 0.7366336633663366,
      "grad_norm": 0.7196652889251709,
      "learning_rate": 3.3412907759758385e-05,
      "loss": 4.9634,
      "step": 2046
    },
    {
      "epoch": 0.736993699369937,
      "grad_norm": 0.5623925924301147,
      "learning_rate": 3.332700262557864e-05,
      "loss": 4.8888,
      "step": 2047
    },
    {
      "epoch": 0.7373537353735373,
      "grad_norm": 1.13788640499115,
      "learning_rate": 3.324118597838464e-05,
      "loss": 5.2636,
      "step": 2048
    },
    {
      "epoch": 0.7377137713771377,
      "grad_norm": 1.3406879901885986,
      "learning_rate": 3.31554579320704e-05,
      "loss": 5.2915,
      "step": 2049
    },
    {
      "epoch": 0.738073807380738,
      "grad_norm": 1.0955404043197632,
      "learning_rate": 3.3069818600412375e-05,
      "loss": 5.0604,
      "step": 2050
    },
    {
      "epoch": 0.7384338433843385,
      "grad_norm": 2.2472808361053467,
      "learning_rate": 3.298426809706928e-05,
      "loss": 4.6431,
      "step": 2051
    },
    {
      "epoch": 0.7387938793879388,
      "grad_norm": 0.5622848272323608,
      "learning_rate": 3.289880653558188e-05,
      "loss": 4.9647,
      "step": 2052
    },
    {
      "epoch": 0.7391539153915392,
      "grad_norm": 0.537049412727356,
      "learning_rate": 3.281343402937297e-05,
      "loss": 4.8475,
      "step": 2053
    },
    {
      "epoch": 0.7395139513951395,
      "grad_norm": 0.7046445608139038,
      "learning_rate": 3.2728150691747115e-05,
      "loss": 4.8618,
      "step": 2054
    },
    {
      "epoch": 0.7398739873987399,
      "grad_norm": 0.5483196973800659,
      "learning_rate": 3.264295663589061e-05,
      "loss": 4.7506,
      "step": 2055
    },
    {
      "epoch": 0.7402340234023402,
      "grad_norm": 0.7228554487228394,
      "learning_rate": 3.25578519748711e-05,
      "loss": 4.9671,
      "step": 2056
    },
    {
      "epoch": 0.7405940594059406,
      "grad_norm": 0.7139778733253479,
      "learning_rate": 3.2472836821637744e-05,
      "loss": 4.837,
      "step": 2057
    },
    {
      "epoch": 0.7409540954095409,
      "grad_norm": 0.6157667636871338,
      "learning_rate": 3.238791128902082e-05,
      "loss": 4.6113,
      "step": 2058
    },
    {
      "epoch": 0.7413141314131413,
      "grad_norm": 1.1201602220535278,
      "learning_rate": 3.230307548973174e-05,
      "loss": 4.7593,
      "step": 2059
    },
    {
      "epoch": 0.7416741674167416,
      "grad_norm": 0.6364259123802185,
      "learning_rate": 3.2218329536362704e-05,
      "loss": 4.4534,
      "step": 2060
    },
    {
      "epoch": 0.742034203420342,
      "grad_norm": 0.8327221870422363,
      "learning_rate": 3.213367354138678e-05,
      "loss": 5.1408,
      "step": 2061
    },
    {
      "epoch": 0.7423942394239424,
      "grad_norm": 0.624847948551178,
      "learning_rate": 3.204910761715763e-05,
      "loss": 4.5746,
      "step": 2062
    },
    {
      "epoch": 0.7427542754275428,
      "grad_norm": 0.7800330519676208,
      "learning_rate": 3.196463187590929e-05,
      "loss": 4.8576,
      "step": 2063
    },
    {
      "epoch": 0.7431143114311431,
      "grad_norm": 1.7853704690933228,
      "learning_rate": 3.18802464297562e-05,
      "loss": 4.8703,
      "step": 2064
    },
    {
      "epoch": 0.7434743474347435,
      "grad_norm": 0.6822658777236938,
      "learning_rate": 3.17959513906929e-05,
      "loss": 4.7471,
      "step": 2065
    },
    {
      "epoch": 0.7438343834383438,
      "grad_norm": 0.9525924921035767,
      "learning_rate": 3.1711746870594086e-05,
      "loss": 4.9575,
      "step": 2066
    },
    {
      "epoch": 0.7441944194419442,
      "grad_norm": 1.0060420036315918,
      "learning_rate": 3.162763298121408e-05,
      "loss": 4.2924,
      "step": 2067
    },
    {
      "epoch": 0.7445544554455445,
      "grad_norm": 0.8173012733459473,
      "learning_rate": 3.1543609834187115e-05,
      "loss": 5.1242,
      "step": 2068
    },
    {
      "epoch": 0.7449144914491449,
      "grad_norm": 0.6984362602233887,
      "learning_rate": 3.145967754102691e-05,
      "loss": 4.5351,
      "step": 2069
    },
    {
      "epoch": 0.7452745274527453,
      "grad_norm": 0.6665016412734985,
      "learning_rate": 3.137583621312665e-05,
      "loss": 4.6194,
      "step": 2070
    },
    {
      "epoch": 0.7456345634563456,
      "grad_norm": 1.0984699726104736,
      "learning_rate": 3.129208596175872e-05,
      "loss": 4.9804,
      "step": 2071
    },
    {
      "epoch": 0.745994599459946,
      "grad_norm": 0.7321772575378418,
      "learning_rate": 3.120842689807468e-05,
      "loss": 4.9711,
      "step": 2072
    },
    {
      "epoch": 0.7463546354635463,
      "grad_norm": 1.0770906209945679,
      "learning_rate": 3.112485913310508e-05,
      "loss": 5.2855,
      "step": 2073
    },
    {
      "epoch": 0.7467146714671468,
      "grad_norm": 0.8592466711997986,
      "learning_rate": 3.10413827777593e-05,
      "loss": 4.9253,
      "step": 2074
    },
    {
      "epoch": 0.7470747074707471,
      "grad_norm": 1.0927594900131226,
      "learning_rate": 3.0957997942825336e-05,
      "loss": 5.1627,
      "step": 2075
    },
    {
      "epoch": 0.7474347434743475,
      "grad_norm": 1.0298612117767334,
      "learning_rate": 3.0874704738969794e-05,
      "loss": 5.1717,
      "step": 2076
    },
    {
      "epoch": 0.7477947794779478,
      "grad_norm": 0.8015543222427368,
      "learning_rate": 3.079150327673766e-05,
      "loss": 4.6964,
      "step": 2077
    },
    {
      "epoch": 0.7481548154815482,
      "grad_norm": 0.7126684188842773,
      "learning_rate": 3.070839366655215e-05,
      "loss": 4.9846,
      "step": 2078
    },
    {
      "epoch": 0.7485148514851485,
      "grad_norm": 1.5721231698989868,
      "learning_rate": 3.062537601871452e-05,
      "loss": 4.8304,
      "step": 2079
    },
    {
      "epoch": 0.7488748874887489,
      "grad_norm": 0.6689034104347229,
      "learning_rate": 3.054245044340408e-05,
      "loss": 4.6386,
      "step": 2080
    },
    {
      "epoch": 0.7492349234923492,
      "grad_norm": 1.0003162622451782,
      "learning_rate": 3.0459617050677868e-05,
      "loss": 4.9661,
      "step": 2081
    },
    {
      "epoch": 0.7495949594959496,
      "grad_norm": 0.7442778944969177,
      "learning_rate": 3.0376875950470617e-05,
      "loss": 4.4624,
      "step": 2082
    },
    {
      "epoch": 0.7499549954995499,
      "grad_norm": 1.011825442314148,
      "learning_rate": 3.0294227252594555e-05,
      "loss": 4.8084,
      "step": 2083
    },
    {
      "epoch": 0.7503150315031503,
      "grad_norm": 0.5705128908157349,
      "learning_rate": 3.021167106673928e-05,
      "loss": 4.7329,
      "step": 2084
    },
    {
      "epoch": 0.7506750675067507,
      "grad_norm": 0.4860425889492035,
      "learning_rate": 3.0129207502471625e-05,
      "loss": 4.6015,
      "step": 2085
    },
    {
      "epoch": 0.7506750675067507,
      "eval_loss": 4.810147762298584,
      "eval_runtime": 101.7868,
      "eval_samples_per_second": 45.959,
      "eval_steps_per_second": 11.495,
      "step": 2085
    },
    {
      "epoch": 0.7510351035103511,
      "grad_norm": 0.671138346195221,
      "learning_rate": 3.0046836669235433e-05,
      "loss": 4.6252,
      "step": 2086
    },
    {
      "epoch": 0.7513951395139514,
      "grad_norm": 0.9334102869033813,
      "learning_rate": 2.996455867635155e-05,
      "loss": 4.5999,
      "step": 2087
    },
    {
      "epoch": 0.7517551755175518,
      "grad_norm": 0.8894442319869995,
      "learning_rate": 2.988237363301758e-05,
      "loss": 5.0383,
      "step": 2088
    },
    {
      "epoch": 0.7521152115211521,
      "grad_norm": 0.6898563504219055,
      "learning_rate": 2.9800281648307794e-05,
      "loss": 4.8665,
      "step": 2089
    },
    {
      "epoch": 0.7524752475247525,
      "grad_norm": 0.6982426047325134,
      "learning_rate": 2.9718282831172883e-05,
      "loss": 4.8052,
      "step": 2090
    },
    {
      "epoch": 0.7528352835283528,
      "grad_norm": 0.6563032865524292,
      "learning_rate": 2.9636377290439944e-05,
      "loss": 4.4563,
      "step": 2091
    },
    {
      "epoch": 0.7531953195319532,
      "grad_norm": 0.747215986251831,
      "learning_rate": 2.9554565134812294e-05,
      "loss": 5.0272,
      "step": 2092
    },
    {
      "epoch": 0.7535553555355535,
      "grad_norm": 0.842694878578186,
      "learning_rate": 2.9472846472869298e-05,
      "loss": 5.1278,
      "step": 2093
    },
    {
      "epoch": 0.7539153915391539,
      "grad_norm": 1.041561484336853,
      "learning_rate": 2.9391221413066182e-05,
      "loss": 5.2336,
      "step": 2094
    },
    {
      "epoch": 0.7542754275427542,
      "grad_norm": 0.7632551193237305,
      "learning_rate": 2.930969006373402e-05,
      "loss": 4.7186,
      "step": 2095
    },
    {
      "epoch": 0.7546354635463547,
      "grad_norm": 1.0100700855255127,
      "learning_rate": 2.922825253307947e-05,
      "loss": 5.0076,
      "step": 2096
    },
    {
      "epoch": 0.754995499549955,
      "grad_norm": 0.5503267049789429,
      "learning_rate": 2.9146908929184713e-05,
      "loss": 4.6652,
      "step": 2097
    },
    {
      "epoch": 0.7553555355535554,
      "grad_norm": 0.7919767498970032,
      "learning_rate": 2.9065659360007247e-05,
      "loss": 5.4393,
      "step": 2098
    },
    {
      "epoch": 0.7557155715571557,
      "grad_norm": 1.0583096742630005,
      "learning_rate": 2.898450393337977e-05,
      "loss": 5.2044,
      "step": 2099
    },
    {
      "epoch": 0.7560756075607561,
      "grad_norm": 1.1439197063446045,
      "learning_rate": 2.8903442757010035e-05,
      "loss": 5.2308,
      "step": 2100
    },
    {
      "epoch": 0.7564356435643564,
      "grad_norm": 1.3537611961364746,
      "learning_rate": 2.8822475938480764e-05,
      "loss": 4.7218,
      "step": 2101
    },
    {
      "epoch": 0.7567956795679568,
      "grad_norm": 0.6820851564407349,
      "learning_rate": 2.874160358524931e-05,
      "loss": 4.7861,
      "step": 2102
    },
    {
      "epoch": 0.7571557155715571,
      "grad_norm": 1.5382546186447144,
      "learning_rate": 2.8660825804647795e-05,
      "loss": 4.6253,
      "step": 2103
    },
    {
      "epoch": 0.7575157515751575,
      "grad_norm": 0.9953200221061707,
      "learning_rate": 2.8580142703882796e-05,
      "loss": 4.5608,
      "step": 2104
    },
    {
      "epoch": 0.7578757875787578,
      "grad_norm": 0.7481911778450012,
      "learning_rate": 2.8499554390035143e-05,
      "loss": 4.9828,
      "step": 2105
    },
    {
      "epoch": 0.7582358235823582,
      "grad_norm": 0.8973466753959656,
      "learning_rate": 2.8419060970059974e-05,
      "loss": 4.4942,
      "step": 2106
    },
    {
      "epoch": 0.7585958595859585,
      "grad_norm": 0.6188588738441467,
      "learning_rate": 2.8338662550786443e-05,
      "loss": 4.917,
      "step": 2107
    },
    {
      "epoch": 0.758955895589559,
      "grad_norm": 0.8116443157196045,
      "learning_rate": 2.8258359238917665e-05,
      "loss": 4.8898,
      "step": 2108
    },
    {
      "epoch": 0.7593159315931594,
      "grad_norm": 0.996688187122345,
      "learning_rate": 2.8178151141030406e-05,
      "loss": 4.6581,
      "step": 2109
    },
    {
      "epoch": 0.7596759675967597,
      "grad_norm": 1.0279045104980469,
      "learning_rate": 2.8098038363575186e-05,
      "loss": 4.8797,
      "step": 2110
    },
    {
      "epoch": 0.76003600360036,
      "grad_norm": 0.7636517286300659,
      "learning_rate": 2.8018021012875994e-05,
      "loss": 4.4845,
      "step": 2111
    },
    {
      "epoch": 0.7603960396039604,
      "grad_norm": 0.8215141892433167,
      "learning_rate": 2.7938099195130153e-05,
      "loss": 4.7628,
      "step": 2112
    },
    {
      "epoch": 0.7607560756075608,
      "grad_norm": 1.067277431488037,
      "learning_rate": 2.7858273016408197e-05,
      "loss": 5.2078,
      "step": 2113
    },
    {
      "epoch": 0.7611161116111611,
      "grad_norm": 0.7776371240615845,
      "learning_rate": 2.7778542582653744e-05,
      "loss": 5.1417,
      "step": 2114
    },
    {
      "epoch": 0.7614761476147615,
      "grad_norm": 0.8139586448669434,
      "learning_rate": 2.769890799968332e-05,
      "loss": 4.8309,
      "step": 2115
    },
    {
      "epoch": 0.7618361836183618,
      "grad_norm": 0.7961241006851196,
      "learning_rate": 2.7619369373186288e-05,
      "loss": 4.9364,
      "step": 2116
    },
    {
      "epoch": 0.7621962196219622,
      "grad_norm": 0.6579238176345825,
      "learning_rate": 2.753992680872457e-05,
      "loss": 4.8429,
      "step": 2117
    },
    {
      "epoch": 0.7625562556255625,
      "grad_norm": 0.6686379909515381,
      "learning_rate": 2.746058041173266e-05,
      "loss": 4.8558,
      "step": 2118
    },
    {
      "epoch": 0.762916291629163,
      "grad_norm": 0.6807066202163696,
      "learning_rate": 2.7381330287517426e-05,
      "loss": 4.4459,
      "step": 2119
    },
    {
      "epoch": 0.7632763276327633,
      "grad_norm": 0.6293361783027649,
      "learning_rate": 2.7302176541257986e-05,
      "loss": 4.898,
      "step": 2120
    },
    {
      "epoch": 0.7636363636363637,
      "grad_norm": 0.8723804950714111,
      "learning_rate": 2.7223119278005438e-05,
      "loss": 4.6484,
      "step": 2121
    },
    {
      "epoch": 0.763996399639964,
      "grad_norm": 0.848070502281189,
      "learning_rate": 2.7144158602682924e-05,
      "loss": 5.2841,
      "step": 2122
    },
    {
      "epoch": 0.7643564356435644,
      "grad_norm": 0.9075490832328796,
      "learning_rate": 2.7065294620085424e-05,
      "loss": 5.2137,
      "step": 2123
    },
    {
      "epoch": 0.7647164716471647,
      "grad_norm": 1.0051218271255493,
      "learning_rate": 2.6986527434879472e-05,
      "loss": 5.1463,
      "step": 2124
    },
    {
      "epoch": 0.7650765076507651,
      "grad_norm": 1.058005928993225,
      "learning_rate": 2.6907857151603234e-05,
      "loss": 5.2255,
      "step": 2125
    },
    {
      "epoch": 0.7654365436543654,
      "grad_norm": 1.0294654369354248,
      "learning_rate": 2.6829283874666233e-05,
      "loss": 4.6688,
      "step": 2126
    },
    {
      "epoch": 0.7657965796579658,
      "grad_norm": 0.8858208656311035,
      "learning_rate": 2.6750807708349267e-05,
      "loss": 4.6658,
      "step": 2127
    },
    {
      "epoch": 0.7661566156615661,
      "grad_norm": 0.8013286590576172,
      "learning_rate": 2.6672428756804225e-05,
      "loss": 4.849,
      "step": 2128
    },
    {
      "epoch": 0.7665166516651665,
      "grad_norm": 0.7389269471168518,
      "learning_rate": 2.659414712405398e-05,
      "loss": 4.4728,
      "step": 2129
    },
    {
      "epoch": 0.7668766876687669,
      "grad_norm": 0.6583431363105774,
      "learning_rate": 2.6515962913992275e-05,
      "loss": 4.9133,
      "step": 2130
    },
    {
      "epoch": 0.7672367236723673,
      "grad_norm": 0.6830927133560181,
      "learning_rate": 2.643787623038354e-05,
      "loss": 4.7863,
      "step": 2131
    },
    {
      "epoch": 0.7675967596759676,
      "grad_norm": 0.6234518885612488,
      "learning_rate": 2.6359887176862718e-05,
      "loss": 4.6174,
      "step": 2132
    },
    {
      "epoch": 0.767956795679568,
      "grad_norm": 0.7106865048408508,
      "learning_rate": 2.6281995856935237e-05,
      "loss": 4.7574,
      "step": 2133
    },
    {
      "epoch": 0.7683168316831683,
      "grad_norm": 0.9051821231842041,
      "learning_rate": 2.6204202373976818e-05,
      "loss": 4.8563,
      "step": 2134
    },
    {
      "epoch": 0.7686768676867687,
      "grad_norm": 0.6196146011352539,
      "learning_rate": 2.6126506831233344e-05,
      "loss": 4.5112,
      "step": 2135
    },
    {
      "epoch": 0.769036903690369,
      "grad_norm": 0.6337679624557495,
      "learning_rate": 2.6048909331820636e-05,
      "loss": 4.6024,
      "step": 2136
    },
    {
      "epoch": 0.7693969396939694,
      "grad_norm": 0.6263675689697266,
      "learning_rate": 2.5971409978724458e-05,
      "loss": 4.6405,
      "step": 2137
    },
    {
      "epoch": 0.7697569756975697,
      "grad_norm": 1.0419749021530151,
      "learning_rate": 2.5894008874800325e-05,
      "loss": 4.7905,
      "step": 2138
    },
    {
      "epoch": 0.7701170117011701,
      "grad_norm": 0.8129563927650452,
      "learning_rate": 2.581670612277335e-05,
      "loss": 4.4431,
      "step": 2139
    },
    {
      "epoch": 0.7704770477047704,
      "grad_norm": 0.7132530212402344,
      "learning_rate": 2.5739501825238053e-05,
      "loss": 4.7698,
      "step": 2140
    },
    {
      "epoch": 0.7708370837083708,
      "grad_norm": 0.6291266083717346,
      "learning_rate": 2.566239608465838e-05,
      "loss": 4.5729,
      "step": 2141
    },
    {
      "epoch": 0.7711971197119712,
      "grad_norm": 0.5569373965263367,
      "learning_rate": 2.558538900336741e-05,
      "loss": 4.9298,
      "step": 2142
    },
    {
      "epoch": 0.7715571557155716,
      "grad_norm": 0.72600257396698,
      "learning_rate": 2.5508480683567315e-05,
      "loss": 4.8837,
      "step": 2143
    },
    {
      "epoch": 0.771917191719172,
      "grad_norm": 0.7014186978340149,
      "learning_rate": 2.543167122732918e-05,
      "loss": 4.6547,
      "step": 2144
    },
    {
      "epoch": 0.7722772277227723,
      "grad_norm": 0.8487231731414795,
      "learning_rate": 2.5354960736592883e-05,
      "loss": 4.6923,
      "step": 2145
    },
    {
      "epoch": 0.7726372637263726,
      "grad_norm": 0.6868178844451904,
      "learning_rate": 2.5278349313166992e-05,
      "loss": 4.6032,
      "step": 2146
    },
    {
      "epoch": 0.772997299729973,
      "grad_norm": 0.739762544631958,
      "learning_rate": 2.5201837058728505e-05,
      "loss": 4.8253,
      "step": 2147
    },
    {
      "epoch": 0.7733573357335733,
      "grad_norm": 1.2171458005905151,
      "learning_rate": 2.512542407482289e-05,
      "loss": 5.0895,
      "step": 2148
    },
    {
      "epoch": 0.7737173717371737,
      "grad_norm": 1.3037205934524536,
      "learning_rate": 2.504911046286382e-05,
      "loss": 4.9572,
      "step": 2149
    },
    {
      "epoch": 0.774077407740774,
      "grad_norm": 1.3282265663146973,
      "learning_rate": 2.4972896324133144e-05,
      "loss": 5.4213,
      "step": 2150
    },
    {
      "epoch": 0.7744374437443744,
      "grad_norm": 1.392838716506958,
      "learning_rate": 2.4896781759780585e-05,
      "loss": 4.7398,
      "step": 2151
    },
    {
      "epoch": 0.7747974797479747,
      "grad_norm": 0.6054560542106628,
      "learning_rate": 2.4820766870823807e-05,
      "loss": 4.7054,
      "step": 2152
    },
    {
      "epoch": 0.7751575157515752,
      "grad_norm": 0.6699235439300537,
      "learning_rate": 2.4744851758148156e-05,
      "loss": 4.6727,
      "step": 2153
    },
    {
      "epoch": 0.7755175517551756,
      "grad_norm": 0.5790958404541016,
      "learning_rate": 2.4669036522506584e-05,
      "loss": 4.8101,
      "step": 2154
    },
    {
      "epoch": 0.7758775877587759,
      "grad_norm": 0.8083108067512512,
      "learning_rate": 2.45933212645194e-05,
      "loss": 4.5601,
      "step": 2155
    },
    {
      "epoch": 0.7762376237623763,
      "grad_norm": 1.0053972005844116,
      "learning_rate": 2.451770608467432e-05,
      "loss": 4.6501,
      "step": 2156
    },
    {
      "epoch": 0.7765976597659766,
      "grad_norm": 0.6478819847106934,
      "learning_rate": 2.4442191083326195e-05,
      "loss": 4.6527,
      "step": 2157
    },
    {
      "epoch": 0.776957695769577,
      "grad_norm": 1.433661937713623,
      "learning_rate": 2.4366776360696942e-05,
      "loss": 5.0813,
      "step": 2158
    },
    {
      "epoch": 0.7773177317731773,
      "grad_norm": 1.1521214246749878,
      "learning_rate": 2.429146201687538e-05,
      "loss": 4.5685,
      "step": 2159
    },
    {
      "epoch": 0.7776777677767777,
      "grad_norm": 0.6991515159606934,
      "learning_rate": 2.42162481518171e-05,
      "loss": 4.7082,
      "step": 2160
    },
    {
      "epoch": 0.778037803780378,
      "grad_norm": 0.8507077693939209,
      "learning_rate": 2.414113486534434e-05,
      "loss": 4.6635,
      "step": 2161
    },
    {
      "epoch": 0.7783978397839784,
      "grad_norm": 0.6432574987411499,
      "learning_rate": 2.4066122257145894e-05,
      "loss": 4.5532,
      "step": 2162
    },
    {
      "epoch": 0.7787578757875787,
      "grad_norm": 0.6845300793647766,
      "learning_rate": 2.3991210426776855e-05,
      "loss": 4.6837,
      "step": 2163
    },
    {
      "epoch": 0.7791179117911791,
      "grad_norm": 0.743084728717804,
      "learning_rate": 2.3916399473658623e-05,
      "loss": 4.7629,
      "step": 2164
    },
    {
      "epoch": 0.7794779477947795,
      "grad_norm": 0.7912582159042358,
      "learning_rate": 2.3841689497078746e-05,
      "loss": 4.8711,
      "step": 2165
    },
    {
      "epoch": 0.7798379837983799,
      "grad_norm": 0.6151897311210632,
      "learning_rate": 2.376708059619065e-05,
      "loss": 4.5838,
      "step": 2166
    },
    {
      "epoch": 0.7801980198019802,
      "grad_norm": 0.8373451828956604,
      "learning_rate": 2.3692572870013718e-05,
      "loss": 4.9176,
      "step": 2167
    },
    {
      "epoch": 0.7805580558055806,
      "grad_norm": 0.5105034708976746,
      "learning_rate": 2.361816641743303e-05,
      "loss": 4.5368,
      "step": 2168
    },
    {
      "epoch": 0.7809180918091809,
      "grad_norm": 0.6912795305252075,
      "learning_rate": 2.354386133719927e-05,
      "loss": 4.3363,
      "step": 2169
    },
    {
      "epoch": 0.7812781278127813,
      "grad_norm": 0.8666832447052002,
      "learning_rate": 2.3469657727928506e-05,
      "loss": 4.5923,
      "step": 2170
    },
    {
      "epoch": 0.7816381638163816,
      "grad_norm": 0.8544076085090637,
      "learning_rate": 2.339555568810221e-05,
      "loss": 4.9953,
      "step": 2171
    },
    {
      "epoch": 0.781998199819982,
      "grad_norm": 1.0247130393981934,
      "learning_rate": 2.3321555316067045e-05,
      "loss": 5.4613,
      "step": 2172
    },
    {
      "epoch": 0.7823582358235823,
      "grad_norm": 1.0253169536590576,
      "learning_rate": 2.3247656710034737e-05,
      "loss": 5.1228,
      "step": 2173
    },
    {
      "epoch": 0.7827182718271827,
      "grad_norm": 1.0105392932891846,
      "learning_rate": 2.3173859968081944e-05,
      "loss": 4.7797,
      "step": 2174
    },
    {
      "epoch": 0.783078307830783,
      "grad_norm": 0.8575915098190308,
      "learning_rate": 2.3100165188150125e-05,
      "loss": 5.0751,
      "step": 2175
    },
    {
      "epoch": 0.7834383438343835,
      "grad_norm": 1.8292618989944458,
      "learning_rate": 2.3026572468045437e-05,
      "loss": 4.8729,
      "step": 2176
    },
    {
      "epoch": 0.7837983798379838,
      "grad_norm": 0.7877120971679688,
      "learning_rate": 2.295308190543859e-05,
      "loss": 4.3804,
      "step": 2177
    },
    {
      "epoch": 0.7841584158415842,
      "grad_norm": 2.0647926330566406,
      "learning_rate": 2.287969359786466e-05,
      "loss": 4.7416,
      "step": 2178
    },
    {
      "epoch": 0.7845184518451845,
      "grad_norm": 1.065865159034729,
      "learning_rate": 2.280640764272306e-05,
      "loss": 4.7786,
      "step": 2179
    },
    {
      "epoch": 0.7848784878487849,
      "grad_norm": 0.7052769064903259,
      "learning_rate": 2.2733224137277366e-05,
      "loss": 4.9935,
      "step": 2180
    },
    {
      "epoch": 0.7852385238523852,
      "grad_norm": 0.8767135739326477,
      "learning_rate": 2.266014317865519e-05,
      "loss": 4.7496,
      "step": 2181
    },
    {
      "epoch": 0.7855985598559856,
      "grad_norm": 0.5927574634552002,
      "learning_rate": 2.2587164863847975e-05,
      "loss": 5.0468,
      "step": 2182
    },
    {
      "epoch": 0.7859585958595859,
      "grad_norm": 1.1241964101791382,
      "learning_rate": 2.251428928971102e-05,
      "loss": 4.837,
      "step": 2183
    },
    {
      "epoch": 0.7863186318631863,
      "grad_norm": 0.5535566806793213,
      "learning_rate": 2.244151655296327e-05,
      "loss": 4.5108,
      "step": 2184
    },
    {
      "epoch": 0.7866786678667866,
      "grad_norm": 0.8191620111465454,
      "learning_rate": 2.236884675018709e-05,
      "loss": 4.9146,
      "step": 2185
    },
    {
      "epoch": 0.787038703870387,
      "grad_norm": 0.7227510809898376,
      "learning_rate": 2.2296279977828337e-05,
      "loss": 4.4775,
      "step": 2186
    },
    {
      "epoch": 0.7873987398739875,
      "grad_norm": 0.5830511450767517,
      "learning_rate": 2.222381633219608e-05,
      "loss": 4.604,
      "step": 2187
    },
    {
      "epoch": 0.7877587758775878,
      "grad_norm": 0.6540189981460571,
      "learning_rate": 2.2151455909462538e-05,
      "loss": 4.5976,
      "step": 2188
    },
    {
      "epoch": 0.7881188118811882,
      "grad_norm": 0.6274272203445435,
      "learning_rate": 2.2079198805662914e-05,
      "loss": 4.5981,
      "step": 2189
    },
    {
      "epoch": 0.7884788478847885,
      "grad_norm": 0.9411934018135071,
      "learning_rate": 2.2007045116695313e-05,
      "loss": 4.6338,
      "step": 2190
    },
    {
      "epoch": 0.7888388838883889,
      "grad_norm": 0.640446662902832,
      "learning_rate": 2.1934994938320584e-05,
      "loss": 4.5651,
      "step": 2191
    },
    {
      "epoch": 0.7891989198919892,
      "grad_norm": 0.6163681149482727,
      "learning_rate": 2.1863048366162208e-05,
      "loss": 4.4887,
      "step": 2192
    },
    {
      "epoch": 0.7895589558955896,
      "grad_norm": 0.6504824757575989,
      "learning_rate": 2.179120549570609e-05,
      "loss": 4.84,
      "step": 2193
    },
    {
      "epoch": 0.7899189918991899,
      "grad_norm": 0.6089352965354919,
      "learning_rate": 2.1719466422300607e-05,
      "loss": 4.8128,
      "step": 2194
    },
    {
      "epoch": 0.7902790279027903,
      "grad_norm": 0.8295852541923523,
      "learning_rate": 2.1647831241156302e-05,
      "loss": 5.124,
      "step": 2195
    },
    {
      "epoch": 0.7906390639063906,
      "grad_norm": 0.6523507237434387,
      "learning_rate": 2.1576300047345932e-05,
      "loss": 4.6459,
      "step": 2196
    },
    {
      "epoch": 0.790999099909991,
      "grad_norm": 0.6584704518318176,
      "learning_rate": 2.15048729358041e-05,
      "loss": 4.6353,
      "step": 2197
    },
    {
      "epoch": 0.7913591359135913,
      "grad_norm": 0.9848087430000305,
      "learning_rate": 2.1433550001327373e-05,
      "loss": 5.323,
      "step": 2198
    },
    {
      "epoch": 0.7917191719171918,
      "grad_norm": 0.8073936104774475,
      "learning_rate": 2.136233133857405e-05,
      "loss": 5.1241,
      "step": 2199
    },
    {
      "epoch": 0.7920792079207921,
      "grad_norm": 1.2630630731582642,
      "learning_rate": 2.129121704206405e-05,
      "loss": 5.2807,
      "step": 2200
    },
    {
      "epoch": 0.7924392439243925,
      "grad_norm": 1.061435580253601,
      "learning_rate": 2.1220207206178688e-05,
      "loss": 4.6137,
      "step": 2201
    },
    {
      "epoch": 0.7927992799279928,
      "grad_norm": 0.7881227731704712,
      "learning_rate": 2.114930192516076e-05,
      "loss": 4.5962,
      "step": 2202
    },
    {
      "epoch": 0.7931593159315932,
      "grad_norm": 0.7256401777267456,
      "learning_rate": 2.107850129311426e-05,
      "loss": 4.8192,
      "step": 2203
    },
    {
      "epoch": 0.7935193519351935,
      "grad_norm": 0.9993703365325928,
      "learning_rate": 2.1007805404004242e-05,
      "loss": 5.1882,
      "step": 2204
    },
    {
      "epoch": 0.7938793879387939,
      "grad_norm": 0.714908242225647,
      "learning_rate": 2.09372143516568e-05,
      "loss": 4.6632,
      "step": 2205
    },
    {
      "epoch": 0.7942394239423942,
      "grad_norm": 0.9289857745170593,
      "learning_rate": 2.0866728229758857e-05,
      "loss": 4.8051,
      "step": 2206
    },
    {
      "epoch": 0.7945994599459946,
      "grad_norm": 1.0063886642456055,
      "learning_rate": 2.0796347131858186e-05,
      "loss": 4.8422,
      "step": 2207
    },
    {
      "epoch": 0.7949594959495949,
      "grad_norm": 0.7214351296424866,
      "learning_rate": 2.072607115136298e-05,
      "loss": 4.639,
      "step": 2208
    },
    {
      "epoch": 0.7953195319531953,
      "grad_norm": 0.7175842523574829,
      "learning_rate": 2.065590038154209e-05,
      "loss": 5.0236,
      "step": 2209
    },
    {
      "epoch": 0.7956795679567957,
      "grad_norm": 0.5457351207733154,
      "learning_rate": 2.058583491552465e-05,
      "loss": 4.5711,
      "step": 2210
    },
    {
      "epoch": 0.7960396039603961,
      "grad_norm": 0.6841213703155518,
      "learning_rate": 2.0515874846300077e-05,
      "loss": 4.6929,
      "step": 2211
    },
    {
      "epoch": 0.7963996399639964,
      "grad_norm": 0.8968706727027893,
      "learning_rate": 2.044602026671786e-05,
      "loss": 4.7611,
      "step": 2212
    },
    {
      "epoch": 0.7967596759675968,
      "grad_norm": 0.6250977516174316,
      "learning_rate": 2.0376271269487514e-05,
      "loss": 4.3924,
      "step": 2213
    },
    {
      "epoch": 0.7971197119711971,
      "grad_norm": 0.6848214864730835,
      "learning_rate": 2.0306627947178446e-05,
      "loss": 4.5518,
      "step": 2214
    },
    {
      "epoch": 0.7974797479747975,
      "grad_norm": 0.5987040996551514,
      "learning_rate": 2.0237090392219805e-05,
      "loss": 4.9023,
      "step": 2215
    },
    {
      "epoch": 0.7978397839783978,
      "grad_norm": 0.6589849591255188,
      "learning_rate": 2.0167658696900317e-05,
      "loss": 4.42,
      "step": 2216
    },
    {
      "epoch": 0.7981998199819982,
      "grad_norm": 0.7822523713111877,
      "learning_rate": 2.0098332953368272e-05,
      "loss": 4.8455,
      "step": 2217
    },
    {
      "epoch": 0.7985598559855985,
      "grad_norm": 0.8310371041297913,
      "learning_rate": 2.0029113253631314e-05,
      "loss": 5.1098,
      "step": 2218
    },
    {
      "epoch": 0.7989198919891989,
      "grad_norm": 0.8362820148468018,
      "learning_rate": 1.995999968955641e-05,
      "loss": 4.5659,
      "step": 2219
    },
    {
      "epoch": 0.7992799279927992,
      "grad_norm": 0.7935851812362671,
      "learning_rate": 1.9890992352869543e-05,
      "loss": 4.5946,
      "step": 2220
    },
    {
      "epoch": 0.7996399639963997,
      "grad_norm": 0.6581932902336121,
      "learning_rate": 1.9822091335155812e-05,
      "loss": 5.0322,
      "step": 2221
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6121277213096619,
      "learning_rate": 1.9753296727859195e-05,
      "loss": 4.5539,
      "step": 2222
    },
    {
      "epoch": 0.8003600360036004,
      "grad_norm": 0.7371995449066162,
      "learning_rate": 1.9684608622282417e-05,
      "loss": 5.2824,
      "step": 2223
    },
    {
      "epoch": 0.8007200720072007,
      "grad_norm": 0.825187087059021,
      "learning_rate": 1.9616027109586887e-05,
      "loss": 5.0987,
      "step": 2224
    },
    {
      "epoch": 0.8010801080108011,
      "grad_norm": 1.2908596992492676,
      "learning_rate": 1.9547552280792524e-05,
      "loss": 5.2177,
      "step": 2225
    },
    {
      "epoch": 0.8014401440144014,
      "grad_norm": 2.2421438694000244,
      "learning_rate": 1.947918422677769e-05,
      "loss": 4.7303,
      "step": 2226
    },
    {
      "epoch": 0.8018001800180018,
      "grad_norm": 0.5044226050376892,
      "learning_rate": 1.941092303827896e-05,
      "loss": 4.6811,
      "step": 2227
    },
    {
      "epoch": 0.8021602160216021,
      "grad_norm": 0.5311564207077026,
      "learning_rate": 1.9342768805891178e-05,
      "loss": 4.8866,
      "step": 2228
    },
    {
      "epoch": 0.8025202520252025,
      "grad_norm": 0.8058616518974304,
      "learning_rate": 1.927472162006717e-05,
      "loss": 4.5184,
      "step": 2229
    },
    {
      "epoch": 0.8028802880288028,
      "grad_norm": 0.690186619758606,
      "learning_rate": 1.920678157111776e-05,
      "loss": 4.657,
      "step": 2230
    },
    {
      "epoch": 0.8032403240324032,
      "grad_norm": 0.7219494581222534,
      "learning_rate": 1.9138948749211472e-05,
      "loss": 4.7445,
      "step": 2231
    },
    {
      "epoch": 0.8036003600360035,
      "grad_norm": 0.6288260221481323,
      "learning_rate": 1.9071223244374614e-05,
      "loss": 4.9032,
      "step": 2232
    },
    {
      "epoch": 0.803960396039604,
      "grad_norm": 0.7226089239120483,
      "learning_rate": 1.9003605146491054e-05,
      "loss": 5.1746,
      "step": 2233
    },
    {
      "epoch": 0.8043204320432044,
      "grad_norm": 0.8098820447921753,
      "learning_rate": 1.8936094545302095e-05,
      "loss": 4.8424,
      "step": 2234
    },
    {
      "epoch": 0.8046804680468047,
      "grad_norm": 0.7526129484176636,
      "learning_rate": 1.8868691530406336e-05,
      "loss": 4.5838,
      "step": 2235
    },
    {
      "epoch": 0.8050405040504051,
      "grad_norm": 0.8739807605743408,
      "learning_rate": 1.8801396191259645e-05,
      "loss": 4.7443,
      "step": 2236
    },
    {
      "epoch": 0.8054005400540054,
      "grad_norm": 0.6892088651657104,
      "learning_rate": 1.8734208617174988e-05,
      "loss": 4.9657,
      "step": 2237
    },
    {
      "epoch": 0.8057605760576058,
      "grad_norm": 0.5015419125556946,
      "learning_rate": 1.866712889732225e-05,
      "loss": 4.7528,
      "step": 2238
    },
    {
      "epoch": 0.8061206120612061,
      "grad_norm": 0.5005907416343689,
      "learning_rate": 1.8600157120728244e-05,
      "loss": 4.7449,
      "step": 2239
    },
    {
      "epoch": 0.8064806480648065,
      "grad_norm": 0.526184618473053,
      "learning_rate": 1.8533293376276472e-05,
      "loss": 4.6524,
      "step": 2240
    },
    {
      "epoch": 0.8068406840684068,
      "grad_norm": 0.6554297804832458,
      "learning_rate": 1.8466537752707068e-05,
      "loss": 4.8402,
      "step": 2241
    },
    {
      "epoch": 0.8072007200720072,
      "grad_norm": 0.6219531893730164,
      "learning_rate": 1.839989033861673e-05,
      "loss": 4.6053,
      "step": 2242
    },
    {
      "epoch": 0.8075607560756075,
      "grad_norm": 0.6740393042564392,
      "learning_rate": 1.8333351222458407e-05,
      "loss": 4.385,
      "step": 2243
    },
    {
      "epoch": 0.807920792079208,
      "grad_norm": 0.7463712096214294,
      "learning_rate": 1.826692049254145e-05,
      "loss": 4.8666,
      "step": 2244
    },
    {
      "epoch": 0.8082808280828083,
      "grad_norm": 0.6941218376159668,
      "learning_rate": 1.820059823703133e-05,
      "loss": 4.6104,
      "step": 2245
    },
    {
      "epoch": 0.8086408640864087,
      "grad_norm": 0.8766574859619141,
      "learning_rate": 1.8134384543949478e-05,
      "loss": 4.6754,
      "step": 2246
    },
    {
      "epoch": 0.809000900090009,
      "grad_norm": 0.7981788516044617,
      "learning_rate": 1.8068279501173335e-05,
      "loss": 4.8805,
      "step": 2247
    },
    {
      "epoch": 0.8093609360936094,
      "grad_norm": 1.02590811252594,
      "learning_rate": 1.8002283196436097e-05,
      "loss": 4.9571,
      "step": 2248
    },
    {
      "epoch": 0.8097209720972097,
      "grad_norm": 1.0470219850540161,
      "learning_rate": 1.7936395717326704e-05,
      "loss": 5.117,
      "step": 2249
    },
    {
      "epoch": 0.8100810081008101,
      "grad_norm": 1.3908179998397827,
      "learning_rate": 1.787061715128956e-05,
      "loss": 5.2106,
      "step": 2250
    },
    {
      "epoch": 0.8104410441044104,
      "grad_norm": 0.970249354839325,
      "learning_rate": 1.7804947585624588e-05,
      "loss": 5.0495,
      "step": 2251
    },
    {
      "epoch": 0.8108010801080108,
      "grad_norm": 0.7076825499534607,
      "learning_rate": 1.773938710748706e-05,
      "loss": 4.6217,
      "step": 2252
    },
    {
      "epoch": 0.8111611161116111,
      "grad_norm": 0.657702624797821,
      "learning_rate": 1.7673935803887453e-05,
      "loss": 4.4113,
      "step": 2253
    },
    {
      "epoch": 0.8115211521152115,
      "grad_norm": 0.6246639490127563,
      "learning_rate": 1.760859376169133e-05,
      "loss": 4.5696,
      "step": 2254
    },
    {
      "epoch": 0.8118811881188119,
      "grad_norm": 0.5377760529518127,
      "learning_rate": 1.754336106761927e-05,
      "loss": 4.5045,
      "step": 2255
    },
    {
      "epoch": 0.8122412241224123,
      "grad_norm": 0.5879418849945068,
      "learning_rate": 1.7478237808246722e-05,
      "loss": 5.1566,
      "step": 2256
    },
    {
      "epoch": 0.8126012601260126,
      "grad_norm": 0.6851582527160645,
      "learning_rate": 1.741322407000391e-05,
      "loss": 4.776,
      "step": 2257
    },
    {
      "epoch": 0.812961296129613,
      "grad_norm": 1.0152539014816284,
      "learning_rate": 1.7348319939175637e-05,
      "loss": 4.4992,
      "step": 2258
    },
    {
      "epoch": 0.8133213321332133,
      "grad_norm": 0.6916372179985046,
      "learning_rate": 1.7283525501901323e-05,
      "loss": 4.1276,
      "step": 2259
    },
    {
      "epoch": 0.8136813681368137,
      "grad_norm": 0.5402399897575378,
      "learning_rate": 1.7218840844174754e-05,
      "loss": 4.7419,
      "step": 2260
    },
    {
      "epoch": 0.814041404140414,
      "grad_norm": 0.48339545726776123,
      "learning_rate": 1.715426605184407e-05,
      "loss": 4.3284,
      "step": 2261
    },
    {
      "epoch": 0.8144014401440144,
      "grad_norm": 0.7083166241645813,
      "learning_rate": 1.70898012106115e-05,
      "loss": 4.5239,
      "step": 2262
    },
    {
      "epoch": 0.8147614761476147,
      "grad_norm": 0.5646843314170837,
      "learning_rate": 1.7025446406033453e-05,
      "loss": 4.5845,
      "step": 2263
    },
    {
      "epoch": 0.8151215121512151,
      "grad_norm": 0.7311326265335083,
      "learning_rate": 1.696120172352025e-05,
      "loss": 4.5815,
      "step": 2264
    },
    {
      "epoch": 0.8154815481548154,
      "grad_norm": 0.743579626083374,
      "learning_rate": 1.6897067248336095e-05,
      "loss": 5.0977,
      "step": 2265
    },
    {
      "epoch": 0.8158415841584158,
      "grad_norm": 0.6290472149848938,
      "learning_rate": 1.683304306559884e-05,
      "loss": 4.7559,
      "step": 2266
    },
    {
      "epoch": 0.8162016201620163,
      "grad_norm": 0.5165727734565735,
      "learning_rate": 1.676912926028007e-05,
      "loss": 4.8021,
      "step": 2267
    },
    {
      "epoch": 0.8165616561656166,
      "grad_norm": 0.5954656600952148,
      "learning_rate": 1.6705325917204805e-05,
      "loss": 4.7522,
      "step": 2268
    },
    {
      "epoch": 0.816921692169217,
      "grad_norm": 0.6288970708847046,
      "learning_rate": 1.66416331210515e-05,
      "loss": 4.7196,
      "step": 2269
    },
    {
      "epoch": 0.8172817281728173,
      "grad_norm": 0.6287972331047058,
      "learning_rate": 1.6578050956351886e-05,
      "loss": 5.1404,
      "step": 2270
    },
    {
      "epoch": 0.8176417641764177,
      "grad_norm": 0.8024221658706665,
      "learning_rate": 1.6514579507490848e-05,
      "loss": 5.0936,
      "step": 2271
    },
    {
      "epoch": 0.818001800180018,
      "grad_norm": 0.5822760462760925,
      "learning_rate": 1.6451218858706374e-05,
      "loss": 4.9177,
      "step": 2272
    },
    {
      "epoch": 0.8183618361836184,
      "grad_norm": 0.8307278752326965,
      "learning_rate": 1.6387969094089316e-05,
      "loss": 5.2499,
      "step": 2273
    },
    {
      "epoch": 0.8187218721872187,
      "grad_norm": 0.9024346470832825,
      "learning_rate": 1.632483029758345e-05,
      "loss": 5.276,
      "step": 2274
    },
    {
      "epoch": 0.819081908190819,
      "grad_norm": 1.2056093215942383,
      "learning_rate": 1.626180255298525e-05,
      "loss": 5.3459,
      "step": 2275
    },
    {
      "epoch": 0.8194419441944194,
      "grad_norm": 1.9106166362762451,
      "learning_rate": 1.619888594394382e-05,
      "loss": 4.8601,
      "step": 2276
    },
    {
      "epoch": 0.8198019801980198,
      "grad_norm": 0.6660627126693726,
      "learning_rate": 1.6136080553960687e-05,
      "loss": 4.7553,
      "step": 2277
    },
    {
      "epoch": 0.8201620162016202,
      "grad_norm": 0.4384493827819824,
      "learning_rate": 1.6073386466389872e-05,
      "loss": 4.5587,
      "step": 2278
    },
    {
      "epoch": 0.8205220522052206,
      "grad_norm": 0.5030012726783752,
      "learning_rate": 1.601080376443763e-05,
      "loss": 4.5943,
      "step": 2279
    },
    {
      "epoch": 0.8208820882088209,
      "grad_norm": 0.5574566721916199,
      "learning_rate": 1.5948332531162413e-05,
      "loss": 4.5229,
      "step": 2280
    },
    {
      "epoch": 0.8212421242124213,
      "grad_norm": 0.519743025302887,
      "learning_rate": 1.5885972849474672e-05,
      "loss": 4.5851,
      "step": 2281
    },
    {
      "epoch": 0.8216021602160216,
      "grad_norm": 0.5831529498100281,
      "learning_rate": 1.5823724802136865e-05,
      "loss": 4.7909,
      "step": 2282
    },
    {
      "epoch": 0.821962196219622,
      "grad_norm": 0.8960453271865845,
      "learning_rate": 1.576158847176329e-05,
      "loss": 4.8842,
      "step": 2283
    },
    {
      "epoch": 0.8223222322232223,
      "grad_norm": 0.6710848808288574,
      "learning_rate": 1.5699563940819962e-05,
      "loss": 4.8542,
      "step": 2284
    },
    {
      "epoch": 0.8226822682268227,
      "grad_norm": 0.5260566473007202,
      "learning_rate": 1.5637651291624523e-05,
      "loss": 4.6999,
      "step": 2285
    },
    {
      "epoch": 0.823042304230423,
      "grad_norm": 0.6472052335739136,
      "learning_rate": 1.557585060634612e-05,
      "loss": 4.5737,
      "step": 2286
    },
    {
      "epoch": 0.8234023402340234,
      "grad_norm": 0.757953941822052,
      "learning_rate": 1.5514161967005337e-05,
      "loss": 4.4606,
      "step": 2287
    },
    {
      "epoch": 0.8237623762376237,
      "grad_norm": 0.5936845541000366,
      "learning_rate": 1.5452585455473977e-05,
      "loss": 4.5589,
      "step": 2288
    },
    {
      "epoch": 0.8241224122412241,
      "grad_norm": 0.6369442343711853,
      "learning_rate": 1.539112115347511e-05,
      "loss": 4.963,
      "step": 2289
    },
    {
      "epoch": 0.8244824482448245,
      "grad_norm": 0.8031920194625854,
      "learning_rate": 1.5329769142582827e-05,
      "loss": 4.9157,
      "step": 2290
    },
    {
      "epoch": 0.8248424842484249,
      "grad_norm": 0.756523847579956,
      "learning_rate": 1.526852950422226e-05,
      "loss": 4.6052,
      "step": 2291
    },
    {
      "epoch": 0.8252025202520252,
      "grad_norm": 0.6446996331214905,
      "learning_rate": 1.5207402319669306e-05,
      "loss": 4.7325,
      "step": 2292
    },
    {
      "epoch": 0.8255625562556256,
      "grad_norm": 0.6594321727752686,
      "learning_rate": 1.5146387670050687e-05,
      "loss": 4.7268,
      "step": 2293
    },
    {
      "epoch": 0.8259225922592259,
      "grad_norm": 0.6736615896224976,
      "learning_rate": 1.5085485636343755e-05,
      "loss": 5.0275,
      "step": 2294
    },
    {
      "epoch": 0.8262826282628263,
      "grad_norm": 0.5317438244819641,
      "learning_rate": 1.5024696299376407e-05,
      "loss": 4.7624,
      "step": 2295
    },
    {
      "epoch": 0.8266426642664266,
      "grad_norm": 0.6013367772102356,
      "learning_rate": 1.4964019739826907e-05,
      "loss": 4.5998,
      "step": 2296
    },
    {
      "epoch": 0.827002700270027,
      "grad_norm": 0.5864752531051636,
      "learning_rate": 1.4903456038223939e-05,
      "loss": 4.7793,
      "step": 2297
    },
    {
      "epoch": 0.8273627362736273,
      "grad_norm": 1.0099995136260986,
      "learning_rate": 1.4843005274946365e-05,
      "loss": 5.441,
      "step": 2298
    },
    {
      "epoch": 0.8277227722772277,
      "grad_norm": 1.1725364923477173,
      "learning_rate": 1.4782667530223126e-05,
      "loss": 4.894,
      "step": 2299
    },
    {
      "epoch": 0.828082808280828,
      "grad_norm": 1.4512721300125122,
      "learning_rate": 1.4722442884133214e-05,
      "loss": 5.316,
      "step": 2300
    },
    {
      "epoch": 0.8284428442844285,
      "grad_norm": 0.6787356734275818,
      "learning_rate": 1.4662331416605501e-05,
      "loss": 4.4722,
      "step": 2301
    },
    {
      "epoch": 0.8288028802880288,
      "grad_norm": 1.0046409368515015,
      "learning_rate": 1.4602333207418651e-05,
      "loss": 4.455,
      "step": 2302
    },
    {
      "epoch": 0.8291629162916292,
      "grad_norm": 0.7777742743492126,
      "learning_rate": 1.454244833620102e-05,
      "loss": 4.277,
      "step": 2303
    },
    {
      "epoch": 0.8295229522952295,
      "grad_norm": 1.2001330852508545,
      "learning_rate": 1.4482676882430502e-05,
      "loss": 4.9538,
      "step": 2304
    },
    {
      "epoch": 0.8298829882988299,
      "grad_norm": 0.6005185842514038,
      "learning_rate": 1.4423018925434528e-05,
      "loss": 4.5779,
      "step": 2305
    },
    {
      "epoch": 0.8302430243024302,
      "grad_norm": 0.7251618504524231,
      "learning_rate": 1.4363474544389877e-05,
      "loss": 4.6006,
      "step": 2306
    },
    {
      "epoch": 0.8306030603060306,
      "grad_norm": 0.9123652577400208,
      "learning_rate": 1.4304043818322565e-05,
      "loss": 4.927,
      "step": 2307
    },
    {
      "epoch": 0.8309630963096309,
      "grad_norm": 0.9391204118728638,
      "learning_rate": 1.424472682610779e-05,
      "loss": 5.0999,
      "step": 2308
    },
    {
      "epoch": 0.8313231323132313,
      "grad_norm": 0.6396461725234985,
      "learning_rate": 1.4185523646469822e-05,
      "loss": 4.552,
      "step": 2309
    },
    {
      "epoch": 0.8316831683168316,
      "grad_norm": 0.5968081951141357,
      "learning_rate": 1.4126434357981877e-05,
      "loss": 4.6309,
      "step": 2310
    },
    {
      "epoch": 0.832043204320432,
      "grad_norm": 0.8504002690315247,
      "learning_rate": 1.4067459039065956e-05,
      "loss": 4.995,
      "step": 2311
    },
    {
      "epoch": 0.8324032403240325,
      "grad_norm": 0.9265114068984985,
      "learning_rate": 1.4008597767992871e-05,
      "loss": 4.8508,
      "step": 2312
    },
    {
      "epoch": 0.8327632763276328,
      "grad_norm": 0.5217415690422058,
      "learning_rate": 1.3949850622882054e-05,
      "loss": 4.8427,
      "step": 2313
    },
    {
      "epoch": 0.8331233123312332,
      "grad_norm": 0.8731891512870789,
      "learning_rate": 1.3891217681701474e-05,
      "loss": 4.713,
      "step": 2314
    },
    {
      "epoch": 0.8334833483348335,
      "grad_norm": 0.602576732635498,
      "learning_rate": 1.3832699022267515e-05,
      "loss": 4.5357,
      "step": 2315
    },
    {
      "epoch": 0.8338433843384339,
      "grad_norm": 0.6332751512527466,
      "learning_rate": 1.3774294722244907e-05,
      "loss": 4.6608,
      "step": 2316
    },
    {
      "epoch": 0.8342034203420342,
      "grad_norm": 0.7543874979019165,
      "learning_rate": 1.3716004859146592e-05,
      "loss": 4.6207,
      "step": 2317
    },
    {
      "epoch": 0.8345634563456346,
      "grad_norm": 0.888930082321167,
      "learning_rate": 1.3657829510333654e-05,
      "loss": 4.6348,
      "step": 2318
    },
    {
      "epoch": 0.8349234923492349,
      "grad_norm": 0.8453302979469299,
      "learning_rate": 1.3599768753015152e-05,
      "loss": 4.7868,
      "step": 2319
    },
    {
      "epoch": 0.8352835283528353,
      "grad_norm": 0.5163218379020691,
      "learning_rate": 1.3541822664248094e-05,
      "loss": 4.4345,
      "step": 2320
    },
    {
      "epoch": 0.8356435643564356,
      "grad_norm": 0.49913036823272705,
      "learning_rate": 1.3483991320937306e-05,
      "loss": 4.961,
      "step": 2321
    },
    {
      "epoch": 0.836003600360036,
      "grad_norm": 1.1673535108566284,
      "learning_rate": 1.3426274799835337e-05,
      "loss": 4.8401,
      "step": 2322
    },
    {
      "epoch": 0.8363636363636363,
      "grad_norm": 0.8345517516136169,
      "learning_rate": 1.336867317754229e-05,
      "loss": 5.0522,
      "step": 2323
    },
    {
      "epoch": 0.8367236723672368,
      "grad_norm": 0.9774869680404663,
      "learning_rate": 1.3311186530505838e-05,
      "loss": 5.1665,
      "step": 2324
    },
    {
      "epoch": 0.8370837083708371,
      "grad_norm": 1.130599856376648,
      "learning_rate": 1.3253814935021026e-05,
      "loss": 5.1565,
      "step": 2325
    },
    {
      "epoch": 0.8374437443744375,
      "grad_norm": 0.9888586401939392,
      "learning_rate": 1.3196558467230247e-05,
      "loss": 4.9575,
      "step": 2326
    },
    {
      "epoch": 0.8378037803780378,
      "grad_norm": 0.6156508326530457,
      "learning_rate": 1.3139417203123027e-05,
      "loss": 4.9267,
      "step": 2327
    },
    {
      "epoch": 0.8381638163816382,
      "grad_norm": 0.6930103302001953,
      "learning_rate": 1.3082391218536061e-05,
      "loss": 4.7754,
      "step": 2328
    },
    {
      "epoch": 0.8385238523852385,
      "grad_norm": 0.7321805357933044,
      "learning_rate": 1.3025480589153005e-05,
      "loss": 4.8377,
      "step": 2329
    },
    {
      "epoch": 0.8388838883888389,
      "grad_norm": 0.7025576233863831,
      "learning_rate": 1.2968685390504465e-05,
      "loss": 4.8009,
      "step": 2330
    },
    {
      "epoch": 0.8392439243924392,
      "grad_norm": 0.5497130155563354,
      "learning_rate": 1.29120056979678e-05,
      "loss": 4.4953,
      "step": 2331
    },
    {
      "epoch": 0.8396039603960396,
      "grad_norm": 0.5606801509857178,
      "learning_rate": 1.2855441586767113e-05,
      "loss": 4.4783,
      "step": 2332
    },
    {
      "epoch": 0.8399639963996399,
      "grad_norm": 0.5393441915512085,
      "learning_rate": 1.2798993131973091e-05,
      "loss": 4.4954,
      "step": 2333
    },
    {
      "epoch": 0.8403240324032403,
      "grad_norm": 0.601349949836731,
      "learning_rate": 1.2742660408502904e-05,
      "loss": 4.6084,
      "step": 2334
    },
    {
      "epoch": 0.8406840684068407,
      "grad_norm": 0.5767045617103577,
      "learning_rate": 1.2686443491120149e-05,
      "loss": 4.7144,
      "step": 2335
    },
    {
      "epoch": 0.8410441044104411,
      "grad_norm": 0.7713471055030823,
      "learning_rate": 1.263034245443473e-05,
      "loss": 4.673,
      "step": 2336
    },
    {
      "epoch": 0.8414041404140414,
      "grad_norm": 0.771685779094696,
      "learning_rate": 1.2574357372902767e-05,
      "loss": 4.9869,
      "step": 2337
    },
    {
      "epoch": 0.8417641764176418,
      "grad_norm": 0.5168091654777527,
      "learning_rate": 1.2518488320826449e-05,
      "loss": 4.5899,
      "step": 2338
    },
    {
      "epoch": 0.8421242124212421,
      "grad_norm": 1.225448727607727,
      "learning_rate": 1.2462735372353996e-05,
      "loss": 4.7007,
      "step": 2339
    },
    {
      "epoch": 0.8424842484248425,
      "grad_norm": 0.5362923741340637,
      "learning_rate": 1.2407098601479539e-05,
      "loss": 4.696,
      "step": 2340
    },
    {
      "epoch": 0.8428442844284428,
      "grad_norm": 0.6797324419021606,
      "learning_rate": 1.2351578082043047e-05,
      "loss": 4.791,
      "step": 2341
    },
    {
      "epoch": 0.8432043204320432,
      "grad_norm": 1.1674048900604248,
      "learning_rate": 1.2296173887730123e-05,
      "loss": 4.9394,
      "step": 2342
    },
    {
      "epoch": 0.8435643564356435,
      "grad_norm": 0.6934359073638916,
      "learning_rate": 1.2240886092072068e-05,
      "loss": 4.5077,
      "step": 2343
    },
    {
      "epoch": 0.8439243924392439,
      "grad_norm": 0.5877205729484558,
      "learning_rate": 1.2185714768445667e-05,
      "loss": 4.5705,
      "step": 2344
    },
    {
      "epoch": 0.8442844284428442,
      "grad_norm": 0.6093941926956177,
      "learning_rate": 1.2130659990073146e-05,
      "loss": 4.5415,
      "step": 2345
    },
    {
      "epoch": 0.8446444644464447,
      "grad_norm": 0.7287856936454773,
      "learning_rate": 1.2075721830021969e-05,
      "loss": 5.0374,
      "step": 2346
    },
    {
      "epoch": 0.845004500450045,
      "grad_norm": 0.8647234439849854,
      "learning_rate": 1.2020900361204968e-05,
      "loss": 4.6786,
      "step": 2347
    },
    {
      "epoch": 0.8453645364536454,
      "grad_norm": 0.7940008044242859,
      "learning_rate": 1.1966195656380031e-05,
      "loss": 5.0893,
      "step": 2348
    },
    {
      "epoch": 0.8457245724572457,
      "grad_norm": 1.0051583051681519,
      "learning_rate": 1.1911607788150036e-05,
      "loss": 5.203,
      "step": 2349
    },
    {
      "epoch": 0.8460846084608461,
      "grad_norm": 0.9990129470825195,
      "learning_rate": 1.1857136828962855e-05,
      "loss": 5.0418,
      "step": 2350
    },
    {
      "epoch": 0.8464446444644464,
      "grad_norm": 1.3434467315673828,
      "learning_rate": 1.1802782851111205e-05,
      "loss": 5.1628,
      "step": 2351
    },
    {
      "epoch": 0.8468046804680468,
      "grad_norm": 0.7505450248718262,
      "learning_rate": 1.1748545926732535e-05,
      "loss": 4.6661,
      "step": 2352
    },
    {
      "epoch": 0.8471647164716472,
      "grad_norm": 1.0241285562515259,
      "learning_rate": 1.169442612780891e-05,
      "loss": 4.7007,
      "step": 2353
    },
    {
      "epoch": 0.8475247524752475,
      "grad_norm": 1.4250359535217285,
      "learning_rate": 1.1640423526166988e-05,
      "loss": 4.5734,
      "step": 2354
    },
    {
      "epoch": 0.8478847884788479,
      "grad_norm": 0.8431483507156372,
      "learning_rate": 1.158653819347788e-05,
      "loss": 4.6603,
      "step": 2355
    },
    {
      "epoch": 0.8482448244824482,
      "grad_norm": 0.6706793904304504,
      "learning_rate": 1.1532770201257082e-05,
      "loss": 4.7574,
      "step": 2356
    },
    {
      "epoch": 0.8486048604860486,
      "grad_norm": 0.6527566909790039,
      "learning_rate": 1.1479119620864276e-05,
      "loss": 4.6939,
      "step": 2357
    },
    {
      "epoch": 0.848964896489649,
      "grad_norm": 0.846868634223938,
      "learning_rate": 1.1425586523503395e-05,
      "loss": 4.7602,
      "step": 2358
    },
    {
      "epoch": 0.8493249324932494,
      "grad_norm": 0.8098002672195435,
      "learning_rate": 1.1372170980222441e-05,
      "loss": 4.9113,
      "step": 2359
    },
    {
      "epoch": 0.8496849684968497,
      "grad_norm": 0.6968252062797546,
      "learning_rate": 1.1318873061913405e-05,
      "loss": 4.8477,
      "step": 2360
    },
    {
      "epoch": 0.8500450045004501,
      "grad_norm": 0.6603767275810242,
      "learning_rate": 1.1265692839312092e-05,
      "loss": 4.7987,
      "step": 2361
    },
    {
      "epoch": 0.8504050405040504,
      "grad_norm": 0.9465769529342651,
      "learning_rate": 1.1212630382998213e-05,
      "loss": 4.5938,
      "step": 2362
    },
    {
      "epoch": 0.8507650765076508,
      "grad_norm": 0.6345024704933167,
      "learning_rate": 1.1159685763395111e-05,
      "loss": 4.7153,
      "step": 2363
    },
    {
      "epoch": 0.8511251125112511,
      "grad_norm": 0.7378025054931641,
      "learning_rate": 1.1106859050769769e-05,
      "loss": 4.5385,
      "step": 2364
    },
    {
      "epoch": 0.8514851485148515,
      "grad_norm": 0.9905508160591125,
      "learning_rate": 1.1054150315232681e-05,
      "loss": 4.7941,
      "step": 2365
    },
    {
      "epoch": 0.8518451845184518,
      "grad_norm": 0.8407430648803711,
      "learning_rate": 1.1001559626737756e-05,
      "loss": 4.7788,
      "step": 2366
    },
    {
      "epoch": 0.8522052205220522,
      "grad_norm": 0.8498520255088806,
      "learning_rate": 1.0949087055082252e-05,
      "loss": 4.1181,
      "step": 2367
    },
    {
      "epoch": 0.8525652565256525,
      "grad_norm": 0.9800708889961243,
      "learning_rate": 1.089673266990663e-05,
      "loss": 4.8319,
      "step": 2368
    },
    {
      "epoch": 0.852925292529253,
      "grad_norm": 0.7625902891159058,
      "learning_rate": 1.0844496540694515e-05,
      "loss": 4.7761,
      "step": 2369
    },
    {
      "epoch": 0.8532853285328533,
      "grad_norm": 0.638638436794281,
      "learning_rate": 1.0792378736772612e-05,
      "loss": 4.7487,
      "step": 2370
    },
    {
      "epoch": 0.8536453645364537,
      "grad_norm": 0.6259344220161438,
      "learning_rate": 1.0740379327310569e-05,
      "loss": 4.9022,
      "step": 2371
    },
    {
      "epoch": 0.854005400540054,
      "grad_norm": 0.9138006567955017,
      "learning_rate": 1.0688498381320855e-05,
      "loss": 5.0246,
      "step": 2372
    },
    {
      "epoch": 0.8543654365436544,
      "grad_norm": 0.9508568048477173,
      "learning_rate": 1.0636735967658784e-05,
      "loss": 4.9478,
      "step": 2373
    },
    {
      "epoch": 0.8547254725472547,
      "grad_norm": 1.1043336391448975,
      "learning_rate": 1.0585092155022336e-05,
      "loss": 4.8974,
      "step": 2374
    },
    {
      "epoch": 0.8550855085508551,
      "grad_norm": 1.3299425840377808,
      "learning_rate": 1.0533567011952094e-05,
      "loss": 5.1492,
      "step": 2375
    },
    {
      "epoch": 0.8554455445544554,
      "grad_norm": 0.6157066822052002,
      "learning_rate": 1.0482160606831093e-05,
      "loss": 4.7135,
      "step": 2376
    },
    {
      "epoch": 0.8558055805580558,
      "grad_norm": 1.1526126861572266,
      "learning_rate": 1.0430873007884857e-05,
      "loss": 4.8683,
      "step": 2377
    },
    {
      "epoch": 0.8561656165616561,
      "grad_norm": 0.5212879180908203,
      "learning_rate": 1.0379704283181179e-05,
      "loss": 4.5955,
      "step": 2378
    },
    {
      "epoch": 0.8565256525652565,
      "grad_norm": 0.8130112290382385,
      "learning_rate": 1.0328654500630108e-05,
      "loss": 4.7918,
      "step": 2379
    },
    {
      "epoch": 0.8568856885688569,
      "grad_norm": 0.7200890183448792,
      "learning_rate": 1.0277723727983845e-05,
      "loss": 4.8406,
      "step": 2380
    },
    {
      "epoch": 0.8572457245724573,
      "grad_norm": 0.6026584506034851,
      "learning_rate": 1.0226912032836611e-05,
      "loss": 4.7515,
      "step": 2381
    },
    {
      "epoch": 0.8576057605760576,
      "grad_norm": 0.9684290885925293,
      "learning_rate": 1.0176219482624616e-05,
      "loss": 4.8093,
      "step": 2382
    },
    {
      "epoch": 0.857965796579658,
      "grad_norm": 0.6427994966506958,
      "learning_rate": 1.0125646144625955e-05,
      "loss": 4.5308,
      "step": 2383
    },
    {
      "epoch": 0.8583258325832583,
      "grad_norm": 1.033554196357727,
      "learning_rate": 1.007519208596045e-05,
      "loss": 4.8341,
      "step": 2384
    },
    {
      "epoch": 0.8586858685868587,
      "grad_norm": 0.6669801473617554,
      "learning_rate": 1.002485737358968e-05,
      "loss": 4.8964,
      "step": 2385
    },
    {
      "epoch": 0.859045904590459,
      "grad_norm": 0.6307418942451477,
      "learning_rate": 9.974642074316798e-06,
      "loss": 4.8266,
      "step": 2386
    },
    {
      "epoch": 0.8594059405940594,
      "grad_norm": 0.6424444913864136,
      "learning_rate": 9.924546254786493e-06,
      "loss": 4.7471,
      "step": 2387
    },
    {
      "epoch": 0.8597659765976597,
      "grad_norm": 0.8725467920303345,
      "learning_rate": 9.874569981484861e-06,
      "loss": 4.5142,
      "step": 2388
    },
    {
      "epoch": 0.8601260126012601,
      "grad_norm": 1.1564704179763794,
      "learning_rate": 9.824713320739342e-06,
      "loss": 4.7016,
      "step": 2389
    },
    {
      "epoch": 0.8604860486048604,
      "grad_norm": 0.7655138969421387,
      "learning_rate": 9.774976338718677e-06,
      "loss": 4.3319,
      "step": 2390
    },
    {
      "epoch": 0.8608460846084608,
      "grad_norm": 0.7302666306495667,
      "learning_rate": 9.725359101432674e-06,
      "loss": 4.6624,
      "step": 2391
    },
    {
      "epoch": 0.8612061206120613,
      "grad_norm": 0.7123817801475525,
      "learning_rate": 9.675861674732312e-06,
      "loss": 4.5181,
      "step": 2392
    },
    {
      "epoch": 0.8615661566156616,
      "grad_norm": 0.5299736261367798,
      "learning_rate": 9.62648412430951e-06,
      "loss": 4.6567,
      "step": 2393
    },
    {
      "epoch": 0.861926192619262,
      "grad_norm": 0.711216390132904,
      "learning_rate": 9.577226515697124e-06,
      "loss": 4.6318,
      "step": 2394
    },
    {
      "epoch": 0.8622862286228623,
      "grad_norm": 0.7324408888816833,
      "learning_rate": 9.528088914268784e-06,
      "loss": 4.6413,
      "step": 2395
    },
    {
      "epoch": 0.8626462646264627,
      "grad_norm": 0.6073545217514038,
      "learning_rate": 9.479071385238892e-06,
      "loss": 4.6813,
      "step": 2396
    },
    {
      "epoch": 0.863006300630063,
      "grad_norm": 0.9429351687431335,
      "learning_rate": 9.430173993662451e-06,
      "loss": 4.7784,
      "step": 2397
    },
    {
      "epoch": 0.8633663366336634,
      "grad_norm": 0.8551303148269653,
      "learning_rate": 9.381396804435061e-06,
      "loss": 5.4424,
      "step": 2398
    },
    {
      "epoch": 0.8637263726372637,
      "grad_norm": 0.9081370234489441,
      "learning_rate": 9.332739882292752e-06,
      "loss": 5.1161,
      "step": 2399
    },
    {
      "epoch": 0.8640864086408641,
      "grad_norm": 0.9314940571784973,
      "learning_rate": 9.284203291811954e-06,
      "loss": 5.0829,
      "step": 2400
    },
    {
      "epoch": 0.8644464446444644,
      "grad_norm": 0.747048556804657,
      "learning_rate": 9.23578709740942e-06,
      "loss": 4.671,
      "step": 2401
    },
    {
      "epoch": 0.8648064806480648,
      "grad_norm": 0.8901441097259521,
      "learning_rate": 9.187491363342093e-06,
      "loss": 4.7503,
      "step": 2402
    },
    {
      "epoch": 0.8651665166516652,
      "grad_norm": 0.8733905553817749,
      "learning_rate": 9.139316153707023e-06,
      "loss": 4.1668,
      "step": 2403
    },
    {
      "epoch": 0.8655265526552656,
      "grad_norm": 0.8293418288230896,
      "learning_rate": 9.091261532441342e-06,
      "loss": 4.9468,
      "step": 2404
    },
    {
      "epoch": 0.8658865886588659,
      "grad_norm": 0.6938745379447937,
      "learning_rate": 9.043327563322112e-06,
      "loss": 4.8042,
      "step": 2405
    },
    {
      "epoch": 0.8662466246624663,
      "grad_norm": 0.7350160479545593,
      "learning_rate": 8.995514309966302e-06,
      "loss": 5.0458,
      "step": 2406
    },
    {
      "epoch": 0.8666066606660666,
      "grad_norm": 0.5033836960792542,
      "learning_rate": 8.947821835830616e-06,
      "loss": 4.7879,
      "step": 2407
    },
    {
      "epoch": 0.866966696669667,
      "grad_norm": 0.9561224579811096,
      "learning_rate": 8.900250204211514e-06,
      "loss": 4.8389,
      "step": 2408
    },
    {
      "epoch": 0.8673267326732673,
      "grad_norm": 0.8856688141822815,
      "learning_rate": 8.852799478245032e-06,
      "loss": 4.5677,
      "step": 2409
    },
    {
      "epoch": 0.8676867686768677,
      "grad_norm": 0.48359963297843933,
      "learning_rate": 8.80546972090679e-06,
      "loss": 4.6154,
      "step": 2410
    },
    {
      "epoch": 0.868046804680468,
      "grad_norm": 0.6133762001991272,
      "learning_rate": 8.758260995011825e-06,
      "loss": 4.818,
      "step": 2411
    },
    {
      "epoch": 0.8684068406840684,
      "grad_norm": 0.7412658333778381,
      "learning_rate": 8.711173363214553e-06,
      "loss": 4.5966,
      "step": 2412
    },
    {
      "epoch": 0.8687668766876687,
      "grad_norm": 0.681463897228241,
      "learning_rate": 8.664206888008697e-06,
      "loss": 4.5624,
      "step": 2413
    },
    {
      "epoch": 0.8691269126912692,
      "grad_norm": 0.7318177223205566,
      "learning_rate": 8.617361631727138e-06,
      "loss": 4.5771,
      "step": 2414
    },
    {
      "epoch": 0.8694869486948695,
      "grad_norm": 0.8274372220039368,
      "learning_rate": 8.570637656541914e-06,
      "loss": 4.9709,
      "step": 2415
    },
    {
      "epoch": 0.8698469846984699,
      "grad_norm": 0.6884218454360962,
      "learning_rate": 8.524035024464105e-06,
      "loss": 4.9416,
      "step": 2416
    },
    {
      "epoch": 0.8702070207020702,
      "grad_norm": 0.7390003800392151,
      "learning_rate": 8.47755379734373e-06,
      "loss": 4.7099,
      "step": 2417
    },
    {
      "epoch": 0.8705670567056706,
      "grad_norm": 1.129050374031067,
      "learning_rate": 8.431194036869672e-06,
      "loss": 4.8879,
      "step": 2418
    },
    {
      "epoch": 0.8709270927092709,
      "grad_norm": 0.7855664491653442,
      "learning_rate": 8.384955804569627e-06,
      "loss": 4.8775,
      "step": 2419
    },
    {
      "epoch": 0.8712871287128713,
      "grad_norm": 0.5347578525543213,
      "learning_rate": 8.338839161809997e-06,
      "loss": 4.6191,
      "step": 2420
    },
    {
      "epoch": 0.8716471647164716,
      "grad_norm": 0.754165768623352,
      "learning_rate": 8.292844169795833e-06,
      "loss": 4.5964,
      "step": 2421
    },
    {
      "epoch": 0.872007200720072,
      "grad_norm": 0.7422668933868408,
      "learning_rate": 8.24697088957066e-06,
      "loss": 4.804,
      "step": 2422
    },
    {
      "epoch": 0.8723672367236723,
      "grad_norm": 0.8895533680915833,
      "learning_rate": 8.201219382016556e-06,
      "loss": 5.1019,
      "step": 2423
    },
    {
      "epoch": 0.8727272727272727,
      "grad_norm": 0.7388155460357666,
      "learning_rate": 8.15558970785395e-06,
      "loss": 4.7046,
      "step": 2424
    },
    {
      "epoch": 0.873087308730873,
      "grad_norm": 1.3550125360488892,
      "learning_rate": 8.110081927641566e-06,
      "loss": 4.9702,
      "step": 2425
    },
    {
      "epoch": 0.8734473447344735,
      "grad_norm": 2.8201375007629395,
      "learning_rate": 8.064696101776358e-06,
      "loss": 5.3071,
      "step": 2426
    },
    {
      "epoch": 0.8738073807380738,
      "grad_norm": 1.0882468223571777,
      "learning_rate": 8.019432290493457e-06,
      "loss": 4.851,
      "step": 2427
    },
    {
      "epoch": 0.8741674167416742,
      "grad_norm": 0.9948346018791199,
      "learning_rate": 7.974290553866005e-06,
      "loss": 5.0427,
      "step": 2428
    },
    {
      "epoch": 0.8745274527452745,
      "grad_norm": 0.6691415309906006,
      "learning_rate": 7.929270951805178e-06,
      "loss": 4.9892,
      "step": 2429
    },
    {
      "epoch": 0.8748874887488749,
      "grad_norm": 0.775093138217926,
      "learning_rate": 7.884373544060009e-06,
      "loss": 4.3908,
      "step": 2430
    },
    {
      "epoch": 0.8752475247524752,
      "grad_norm": 0.6868644952774048,
      "learning_rate": 7.839598390217396e-06,
      "loss": 4.7946,
      "step": 2431
    },
    {
      "epoch": 0.8756075607560756,
      "grad_norm": 0.6689639091491699,
      "learning_rate": 7.794945549701993e-06,
      "loss": 4.8674,
      "step": 2432
    },
    {
      "epoch": 0.875967596759676,
      "grad_norm": 0.9124707579612732,
      "learning_rate": 7.750415081776063e-06,
      "loss": 4.9911,
      "step": 2433
    },
    {
      "epoch": 0.8763276327632763,
      "grad_norm": 0.7038251161575317,
      "learning_rate": 7.70600704553951e-06,
      "loss": 4.8584,
      "step": 2434
    },
    {
      "epoch": 0.8766876687668766,
      "grad_norm": 0.7156389951705933,
      "learning_rate": 7.661721499929753e-06,
      "loss": 4.3274,
      "step": 2435
    },
    {
      "epoch": 0.877047704770477,
      "grad_norm": 0.8068670034408569,
      "learning_rate": 7.6175585037216226e-06,
      "loss": 4.5658,
      "step": 2436
    },
    {
      "epoch": 0.8774077407740775,
      "grad_norm": 0.7935437560081482,
      "learning_rate": 7.573518115527289e-06,
      "loss": 4.9122,
      "step": 2437
    },
    {
      "epoch": 0.8777677767776778,
      "grad_norm": 0.9261611700057983,
      "learning_rate": 7.529600393796232e-06,
      "loss": 4.8509,
      "step": 2438
    },
    {
      "epoch": 0.8781278127812782,
      "grad_norm": 0.8355916738510132,
      "learning_rate": 7.485805396815126e-06,
      "loss": 4.3652,
      "step": 2439
    },
    {
      "epoch": 0.8784878487848785,
      "grad_norm": 0.43560856580734253,
      "learning_rate": 7.442133182707745e-06,
      "loss": 4.5542,
      "step": 2440
    },
    {
      "epoch": 0.8788478847884789,
      "grad_norm": 0.7524927258491516,
      "learning_rate": 7.3985838094349444e-06,
      "loss": 4.7226,
      "step": 2441
    },
    {
      "epoch": 0.8792079207920792,
      "grad_norm": 0.7564715147018433,
      "learning_rate": 7.355157334794516e-06,
      "loss": 4.7208,
      "step": 2442
    },
    {
      "epoch": 0.8795679567956796,
      "grad_norm": 0.9967451691627502,
      "learning_rate": 7.3118538164211545e-06,
      "loss": 5.2971,
      "step": 2443
    },
    {
      "epoch": 0.8799279927992799,
      "grad_norm": 0.8346577286720276,
      "learning_rate": 7.2686733117863784e-06,
      "loss": 4.7256,
      "step": 2444
    },
    {
      "epoch": 0.8802880288028803,
      "grad_norm": 0.638346254825592,
      "learning_rate": 7.225615878198422e-06,
      "loss": 4.8184,
      "step": 2445
    },
    {
      "epoch": 0.8806480648064806,
      "grad_norm": 0.5529339909553528,
      "learning_rate": 7.1826815728021965e-06,
      "loss": 4.5564,
      "step": 2446
    },
    {
      "epoch": 0.881008100810081,
      "grad_norm": 0.5807334184646606,
      "learning_rate": 7.1398704525792e-06,
      "loss": 4.8166,
      "step": 2447
    },
    {
      "epoch": 0.8813681368136813,
      "grad_norm": 0.7800282835960388,
      "learning_rate": 7.097182574347472e-06,
      "loss": 5.1405,
      "step": 2448
    },
    {
      "epoch": 0.8817281728172818,
      "grad_norm": 0.6698582768440247,
      "learning_rate": 7.054617994761414e-06,
      "loss": 4.907,
      "step": 2449
    },
    {
      "epoch": 0.8820882088208821,
      "grad_norm": 1.5296711921691895,
      "learning_rate": 7.012176770311862e-06,
      "loss": 5.3242,
      "step": 2450
    },
    {
      "epoch": 0.8824482448244825,
      "grad_norm": 1.2979846000671387,
      "learning_rate": 6.969858957325904e-06,
      "loss": 4.922,
      "step": 2451
    },
    {
      "epoch": 0.8828082808280828,
      "grad_norm": 0.6412333250045776,
      "learning_rate": 6.927664611966811e-06,
      "loss": 5.0412,
      "step": 2452
    },
    {
      "epoch": 0.8831683168316832,
      "grad_norm": 0.619648814201355,
      "learning_rate": 6.8855937902340576e-06,
      "loss": 4.507,
      "step": 2453
    },
    {
      "epoch": 0.8835283528352835,
      "grad_norm": 0.5163532495498657,
      "learning_rate": 6.843646547963123e-06,
      "loss": 4.9747,
      "step": 2454
    },
    {
      "epoch": 0.8838883888388839,
      "grad_norm": 0.7194183468818665,
      "learning_rate": 6.801822940825509e-06,
      "loss": 4.5837,
      "step": 2455
    },
    {
      "epoch": 0.8842484248424842,
      "grad_norm": 0.8414213061332703,
      "learning_rate": 6.760123024328624e-06,
      "loss": 4.6327,
      "step": 2456
    },
    {
      "epoch": 0.8846084608460846,
      "grad_norm": 0.684772253036499,
      "learning_rate": 6.718546853815688e-06,
      "loss": 4.9001,
      "step": 2457
    },
    {
      "epoch": 0.8849684968496849,
      "grad_norm": 0.47863101959228516,
      "learning_rate": 6.67709448446574e-06,
      "loss": 4.8486,
      "step": 2458
    },
    {
      "epoch": 0.8853285328532853,
      "grad_norm": 0.6075344681739807,
      "learning_rate": 6.635765971293484e-06,
      "loss": 4.9541,
      "step": 2459
    },
    {
      "epoch": 0.8856885688568857,
      "grad_norm": 0.5354955196380615,
      "learning_rate": 6.594561369149199e-06,
      "loss": 4.6317,
      "step": 2460
    },
    {
      "epoch": 0.8860486048604861,
      "grad_norm": 0.7306193113327026,
      "learning_rate": 6.553480732718808e-06,
      "loss": 4.524,
      "step": 2461
    },
    {
      "epoch": 0.8864086408640864,
      "grad_norm": 0.6062951683998108,
      "learning_rate": 6.512524116523633e-06,
      "loss": 4.5702,
      "step": 2462
    },
    {
      "epoch": 0.8867686768676868,
      "grad_norm": 0.7549055814743042,
      "learning_rate": 6.4716915749204465e-06,
      "loss": 4.6434,
      "step": 2463
    },
    {
      "epoch": 0.8871287128712871,
      "grad_norm": 0.83303302526474,
      "learning_rate": 6.4309831621013005e-06,
      "loss": 4.7192,
      "step": 2464
    },
    {
      "epoch": 0.8874887488748875,
      "grad_norm": 0.6464311480522156,
      "learning_rate": 6.390398932093555e-06,
      "loss": 4.7064,
      "step": 2465
    },
    {
      "epoch": 0.8878487848784878,
      "grad_norm": 1.1855717897415161,
      "learning_rate": 6.3499389387597254e-06,
      "loss": 4.9074,
      "step": 2466
    },
    {
      "epoch": 0.8882088208820882,
      "grad_norm": 0.8404142260551453,
      "learning_rate": 6.30960323579749e-06,
      "loss": 4.8212,
      "step": 2467
    },
    {
      "epoch": 0.8885688568856885,
      "grad_norm": 0.5642232894897461,
      "learning_rate": 6.269391876739495e-06,
      "loss": 4.8076,
      "step": 2468
    },
    {
      "epoch": 0.8889288928892889,
      "grad_norm": 0.8032687306404114,
      "learning_rate": 6.229304914953405e-06,
      "loss": 5.019,
      "step": 2469
    },
    {
      "epoch": 0.8892889288928892,
      "grad_norm": 0.8474968671798706,
      "learning_rate": 6.189342403641807e-06,
      "loss": 5.0512,
      "step": 2470
    },
    {
      "epoch": 0.8896489648964897,
      "grad_norm": 0.6336872577667236,
      "learning_rate": 6.149504395842087e-06,
      "loss": 4.6737,
      "step": 2471
    },
    {
      "epoch": 0.89000900090009,
      "grad_norm": 0.704339325428009,
      "learning_rate": 6.109790944426397e-06,
      "loss": 4.5293,
      "step": 2472
    },
    {
      "epoch": 0.8903690369036904,
      "grad_norm": 0.8684128522872925,
      "learning_rate": 6.070202102101597e-06,
      "loss": 4.7989,
      "step": 2473
    },
    {
      "epoch": 0.8907290729072908,
      "grad_norm": 1.042490839958191,
      "learning_rate": 6.030737921409169e-06,
      "loss": 4.9338,
      "step": 2474
    },
    {
      "epoch": 0.8910891089108911,
      "grad_norm": 1.1774296760559082,
      "learning_rate": 5.9913984547250945e-06,
      "loss": 5.2439,
      "step": 2475
    },
    {
      "epoch": 0.8914491449144915,
      "grad_norm": 1.316601037979126,
      "learning_rate": 5.95218375425991e-06,
      "loss": 5.2065,
      "step": 2476
    },
    {
      "epoch": 0.8918091809180918,
      "grad_norm": 0.9096778631210327,
      "learning_rate": 5.913093872058528e-06,
      "loss": 4.6322,
      "step": 2477
    },
    {
      "epoch": 0.8921692169216922,
      "grad_norm": 0.6777582764625549,
      "learning_rate": 5.874128860000216e-06,
      "loss": 4.4147,
      "step": 2478
    },
    {
      "epoch": 0.8925292529252925,
      "grad_norm": 0.5754499435424805,
      "learning_rate": 5.835288769798486e-06,
      "loss": 4.833,
      "step": 2479
    },
    {
      "epoch": 0.8928892889288929,
      "grad_norm": 1.0769809484481812,
      "learning_rate": 5.7965736530010916e-06,
      "loss": 4.4729,
      "step": 2480
    },
    {
      "epoch": 0.8932493249324932,
      "grad_norm": 0.6358700394630432,
      "learning_rate": 5.757983560989921e-06,
      "loss": 4.9246,
      "step": 2481
    },
    {
      "epoch": 0.8936093609360936,
      "grad_norm": 1.1338918209075928,
      "learning_rate": 5.719518544980929e-06,
      "loss": 4.7706,
      "step": 2482
    },
    {
      "epoch": 0.893969396939694,
      "grad_norm": 0.4643517732620239,
      "learning_rate": 5.681178656024055e-06,
      "loss": 4.7676,
      "step": 2483
    },
    {
      "epoch": 0.8943294329432944,
      "grad_norm": 1.0510177612304688,
      "learning_rate": 5.642963945003188e-06,
      "loss": 4.6983,
      "step": 2484
    },
    {
      "epoch": 0.8946894689468947,
      "grad_norm": 0.8604787588119507,
      "learning_rate": 5.604874462636078e-06,
      "loss": 4.4971,
      "step": 2485
    },
    {
      "epoch": 0.8950495049504951,
      "grad_norm": 0.8846144080162048,
      "learning_rate": 5.566910259474289e-06,
      "loss": 4.4979,
      "step": 2486
    },
    {
      "epoch": 0.8954095409540954,
      "grad_norm": 0.7206079959869385,
      "learning_rate": 5.529071385903084e-06,
      "loss": 4.8595,
      "step": 2487
    },
    {
      "epoch": 0.8957695769576958,
      "grad_norm": 0.8029129505157471,
      "learning_rate": 5.491357892141425e-06,
      "loss": 4.871,
      "step": 2488
    },
    {
      "epoch": 0.8961296129612961,
      "grad_norm": 0.5468530654907227,
      "learning_rate": 5.453769828241872e-06,
      "loss": 4.4908,
      "step": 2489
    },
    {
      "epoch": 0.8964896489648965,
      "grad_norm": 1.086614727973938,
      "learning_rate": 5.416307244090502e-06,
      "loss": 4.8457,
      "step": 2490
    },
    {
      "epoch": 0.8968496849684968,
      "grad_norm": 1.064418077468872,
      "learning_rate": 5.378970189406829e-06,
      "loss": 4.6813,
      "step": 2491
    },
    {
      "epoch": 0.8972097209720972,
      "grad_norm": 0.5295194387435913,
      "learning_rate": 5.341758713743828e-06,
      "loss": 4.557,
      "step": 2492
    },
    {
      "epoch": 0.8975697569756975,
      "grad_norm": 0.8219357132911682,
      "learning_rate": 5.304672866487792e-06,
      "loss": 4.8301,
      "step": 2493
    },
    {
      "epoch": 0.897929792979298,
      "grad_norm": 0.9336304664611816,
      "learning_rate": 5.267712696858229e-06,
      "loss": 4.7836,
      "step": 2494
    },
    {
      "epoch": 0.8982898289828983,
      "grad_norm": 0.8698000907897949,
      "learning_rate": 5.230878253907912e-06,
      "loss": 4.6849,
      "step": 2495
    },
    {
      "epoch": 0.8986498649864987,
      "grad_norm": 0.6905087232589722,
      "learning_rate": 5.194169586522734e-06,
      "loss": 4.7967,
      "step": 2496
    },
    {
      "epoch": 0.899009900990099,
      "grad_norm": 0.754138171672821,
      "learning_rate": 5.157586743421672e-06,
      "loss": 4.9295,
      "step": 2497
    },
    {
      "epoch": 0.8993699369936994,
      "grad_norm": 1.8262755870819092,
      "learning_rate": 5.121129773156663e-06,
      "loss": 5.313,
      "step": 2498
    },
    {
      "epoch": 0.8997299729972997,
      "grad_norm": 0.7745803594589233,
      "learning_rate": 5.0847987241126385e-06,
      "loss": 5.1595,
      "step": 2499
    },
    {
      "epoch": 0.9000900090009001,
      "grad_norm": 1.3876433372497559,
      "learning_rate": 5.0485936445074046e-06,
      "loss": 5.2019,
      "step": 2500
    },
    {
      "epoch": 0.9004500450045004,
      "grad_norm": 1.133023738861084,
      "learning_rate": 5.012514582391592e-06,
      "loss": 4.5723,
      "step": 2501
    },
    {
      "epoch": 0.9008100810081008,
      "grad_norm": 0.6465590000152588,
      "learning_rate": 4.976561585648509e-06,
      "loss": 4.7929,
      "step": 2502
    },
    {
      "epoch": 0.9011701170117011,
      "grad_norm": 1.2847857475280762,
      "learning_rate": 4.9407347019942544e-06,
      "loss": 4.8718,
      "step": 2503
    },
    {
      "epoch": 0.9015301530153015,
      "grad_norm": 0.569114089012146,
      "learning_rate": 4.905033978977491e-06,
      "loss": 4.4804,
      "step": 2504
    },
    {
      "epoch": 0.9018901890189019,
      "grad_norm": 0.9793164134025574,
      "learning_rate": 4.869459463979465e-06,
      "loss": 4.986,
      "step": 2505
    },
    {
      "epoch": 0.9022502250225023,
      "grad_norm": 0.5514426231384277,
      "learning_rate": 4.8340112042139065e-06,
      "loss": 4.9524,
      "step": 2506
    },
    {
      "epoch": 0.9026102610261026,
      "grad_norm": 0.8211607336997986,
      "learning_rate": 4.798689246727006e-06,
      "loss": 4.8468,
      "step": 2507
    },
    {
      "epoch": 0.902970297029703,
      "grad_norm": 0.5269903540611267,
      "learning_rate": 4.7634936383973095e-06,
      "loss": 4.8626,
      "step": 2508
    },
    {
      "epoch": 0.9033303330333033,
      "grad_norm": 0.6444000005722046,
      "learning_rate": 4.728424425935707e-06,
      "loss": 4.551,
      "step": 2509
    },
    {
      "epoch": 0.9036903690369037,
      "grad_norm": 1.075435757637024,
      "learning_rate": 4.693481655885257e-06,
      "loss": 4.8247,
      "step": 2510
    },
    {
      "epoch": 0.904050405040504,
      "grad_norm": 1.0397629737854004,
      "learning_rate": 4.658665374621307e-06,
      "loss": 4.6963,
      "step": 2511
    },
    {
      "epoch": 0.9044104410441044,
      "grad_norm": 0.6805405616760254,
      "learning_rate": 4.623975628351273e-06,
      "loss": 4.4516,
      "step": 2512
    },
    {
      "epoch": 0.9047704770477047,
      "grad_norm": 0.7398169040679932,
      "learning_rate": 4.58941246311464e-06,
      "loss": 4.7951,
      "step": 2513
    },
    {
      "epoch": 0.9051305130513051,
      "grad_norm": 0.6716864109039307,
      "learning_rate": 4.554975924782912e-06,
      "loss": 4.7471,
      "step": 2514
    },
    {
      "epoch": 0.9054905490549054,
      "grad_norm": 0.6767914295196533,
      "learning_rate": 4.520666059059531e-06,
      "loss": 4.5634,
      "step": 2515
    },
    {
      "epoch": 0.9058505850585058,
      "grad_norm": 0.7175542712211609,
      "learning_rate": 4.486482911479839e-06,
      "loss": 4.719,
      "step": 2516
    },
    {
      "epoch": 0.9062106210621063,
      "grad_norm": 0.9069615602493286,
      "learning_rate": 4.452426527410947e-06,
      "loss": 5.5713,
      "step": 2517
    },
    {
      "epoch": 0.9065706570657066,
      "grad_norm": 0.6263923048973083,
      "learning_rate": 4.418496952051798e-06,
      "loss": 4.7829,
      "step": 2518
    },
    {
      "epoch": 0.906930693069307,
      "grad_norm": 0.7558562159538269,
      "learning_rate": 4.384694230432984e-06,
      "loss": 4.9266,
      "step": 2519
    },
    {
      "epoch": 0.9072907290729073,
      "grad_norm": 0.6696991324424744,
      "learning_rate": 4.351018407416763e-06,
      "loss": 4.3571,
      "step": 2520
    },
    {
      "epoch": 0.9076507650765077,
      "grad_norm": 0.6993823051452637,
      "learning_rate": 4.317469527696983e-06,
      "loss": 5.2419,
      "step": 2521
    },
    {
      "epoch": 0.908010801080108,
      "grad_norm": 0.6072081923484802,
      "learning_rate": 4.2840476357989825e-06,
      "loss": 4.9883,
      "step": 2522
    },
    {
      "epoch": 0.9083708370837084,
      "grad_norm": 0.8503673672676086,
      "learning_rate": 4.250752776079614e-06,
      "loss": 5.0176,
      "step": 2523
    },
    {
      "epoch": 0.9087308730873087,
      "grad_norm": 0.9142279624938965,
      "learning_rate": 4.217584992727108e-06,
      "loss": 5.2182,
      "step": 2524
    },
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 1.77701735496521,
      "learning_rate": 4.184544329761009e-06,
      "loss": 5.2836,
      "step": 2525
    },
    {
      "epoch": 0.9094509450945094,
      "grad_norm": 2.4094419479370117,
      "learning_rate": 4.151630831032205e-06,
      "loss": 4.853,
      "step": 2526
    },
    {
      "epoch": 0.9098109810981098,
      "grad_norm": 0.5602378249168396,
      "learning_rate": 4.118844540222788e-06,
      "loss": 5.1699,
      "step": 2527
    },
    {
      "epoch": 0.9101710171017102,
      "grad_norm": 0.7844763994216919,
      "learning_rate": 4.0861855008460405e-06,
      "loss": 4.8816,
      "step": 2528
    },
    {
      "epoch": 0.9105310531053106,
      "grad_norm": 0.5660812258720398,
      "learning_rate": 4.0536537562463225e-06,
      "loss": 4.8106,
      "step": 2529
    },
    {
      "epoch": 0.9108910891089109,
      "grad_norm": 0.5048322081565857,
      "learning_rate": 4.021249349599077e-06,
      "loss": 4.2835,
      "step": 2530
    },
    {
      "epoch": 0.9112511251125113,
      "grad_norm": 0.7268801927566528,
      "learning_rate": 3.988972323910778e-06,
      "loss": 4.59,
      "step": 2531
    },
    {
      "epoch": 0.9116111611161116,
      "grad_norm": 0.7188135981559753,
      "learning_rate": 3.95682272201876e-06,
      "loss": 4.6502,
      "step": 2532
    },
    {
      "epoch": 0.911971197119712,
      "grad_norm": 0.6325691342353821,
      "learning_rate": 3.924800586591326e-06,
      "loss": 4.7787,
      "step": 2533
    },
    {
      "epoch": 0.9123312331233123,
      "grad_norm": 0.5503108501434326,
      "learning_rate": 3.892905960127546e-06,
      "loss": 4.6064,
      "step": 2534
    },
    {
      "epoch": 0.9126912691269127,
      "grad_norm": 0.8401983380317688,
      "learning_rate": 3.861138884957316e-06,
      "loss": 4.3366,
      "step": 2535
    },
    {
      "epoch": 0.913051305130513,
      "grad_norm": 0.521528422832489,
      "learning_rate": 3.829499403241221e-06,
      "loss": 4.7727,
      "step": 2536
    },
    {
      "epoch": 0.9134113411341134,
      "grad_norm": 0.5778352618217468,
      "learning_rate": 3.797987556970495e-06,
      "loss": 4.9353,
      "step": 2537
    },
    {
      "epoch": 0.9137713771377137,
      "grad_norm": 0.5739848613739014,
      "learning_rate": 3.7666033879670048e-06,
      "loss": 4.973,
      "step": 2538
    },
    {
      "epoch": 0.9141314131413142,
      "grad_norm": 0.5352855324745178,
      "learning_rate": 3.735346937883144e-06,
      "loss": 4.511,
      "step": 2539
    },
    {
      "epoch": 0.9144914491449145,
      "grad_norm": 0.7256152033805847,
      "learning_rate": 3.7042182482018075e-06,
      "loss": 4.5012,
      "step": 2540
    },
    {
      "epoch": 0.9148514851485149,
      "grad_norm": 0.8612756133079529,
      "learning_rate": 3.6732173602363363e-06,
      "loss": 4.6615,
      "step": 2541
    },
    {
      "epoch": 0.9152115211521152,
      "grad_norm": 0.6105715036392212,
      "learning_rate": 3.6423443151304526e-06,
      "loss": 4.451,
      "step": 2542
    },
    {
      "epoch": 0.9155715571557156,
      "grad_norm": 0.6826533079147339,
      "learning_rate": 3.611599153858214e-06,
      "loss": 4.6159,
      "step": 2543
    },
    {
      "epoch": 0.9159315931593159,
      "grad_norm": 0.6430963277816772,
      "learning_rate": 3.580981917223913e-06,
      "loss": 4.7071,
      "step": 2544
    },
    {
      "epoch": 0.9162916291629163,
      "grad_norm": 0.6871779561042786,
      "learning_rate": 3.5504926458621246e-06,
      "loss": 4.741,
      "step": 2545
    },
    {
      "epoch": 0.9166516651665166,
      "grad_norm": 0.7385034561157227,
      "learning_rate": 3.5201313802375456e-06,
      "loss": 4.6154,
      "step": 2546
    },
    {
      "epoch": 0.917011701170117,
      "grad_norm": 0.9472239017486572,
      "learning_rate": 3.4898981606450333e-06,
      "loss": 5.0247,
      "step": 2547
    },
    {
      "epoch": 0.9173717371737173,
      "grad_norm": 0.7648311853408813,
      "learning_rate": 3.4597930272094235e-06,
      "loss": 5.1778,
      "step": 2548
    },
    {
      "epoch": 0.9177317731773177,
      "grad_norm": 1.013818383216858,
      "learning_rate": 3.4298160198856568e-06,
      "loss": 4.9648,
      "step": 2549
    },
    {
      "epoch": 0.918091809180918,
      "grad_norm": 1.1295243501663208,
      "learning_rate": 3.3999671784585517e-06,
      "loss": 5.2376,
      "step": 2550
    },
    {
      "epoch": 0.9184518451845185,
      "grad_norm": 3.160092830657959,
      "learning_rate": 3.370246542542865e-06,
      "loss": 4.6393,
      "step": 2551
    },
    {
      "epoch": 0.9188118811881189,
      "grad_norm": 0.7330535054206848,
      "learning_rate": 3.3406541515832003e-06,
      "loss": 4.5272,
      "step": 2552
    },
    {
      "epoch": 0.9191719171917192,
      "grad_norm": 0.9496917724609375,
      "learning_rate": 3.311190044853951e-06,
      "loss": 5.049,
      "step": 2553
    },
    {
      "epoch": 0.9195319531953196,
      "grad_norm": 0.7707210779190063,
      "learning_rate": 3.2818542614592497e-06,
      "loss": 4.7573,
      "step": 2554
    },
    {
      "epoch": 0.9198919891989199,
      "grad_norm": 0.673030436038971,
      "learning_rate": 3.252646840332918e-06,
      "loss": 4.6971,
      "step": 2555
    },
    {
      "epoch": 0.9202520252025203,
      "grad_norm": 0.6012186408042908,
      "learning_rate": 3.2235678202384267e-06,
      "loss": 4.5512,
      "step": 2556
    },
    {
      "epoch": 0.9206120612061206,
      "grad_norm": 1.0911415815353394,
      "learning_rate": 3.1946172397688267e-06,
      "loss": 4.3919,
      "step": 2557
    },
    {
      "epoch": 0.920972097209721,
      "grad_norm": 0.7308849692344666,
      "learning_rate": 3.1657951373467497e-06,
      "loss": 4.8589,
      "step": 2558
    },
    {
      "epoch": 0.9213321332133213,
      "grad_norm": 0.676698625087738,
      "learning_rate": 3.1371015512242306e-06,
      "loss": 4.8728,
      "step": 2559
    },
    {
      "epoch": 0.9216921692169217,
      "grad_norm": 1.072251796722412,
      "learning_rate": 3.1085365194828075e-06,
      "loss": 5.1019,
      "step": 2560
    },
    {
      "epoch": 0.922052205220522,
      "grad_norm": 0.6631917953491211,
      "learning_rate": 3.0801000800333877e-06,
      "loss": 4.6797,
      "step": 2561
    },
    {
      "epoch": 0.9224122412241225,
      "grad_norm": 0.5610212683677673,
      "learning_rate": 3.051792270616216e-06,
      "loss": 4.6816,
      "step": 2562
    },
    {
      "epoch": 0.9227722772277228,
      "grad_norm": 0.5413954257965088,
      "learning_rate": 3.023613128800795e-06,
      "loss": 4.6725,
      "step": 2563
    },
    {
      "epoch": 0.9231323132313232,
      "grad_norm": 0.7505655288696289,
      "learning_rate": 2.995562691985898e-06,
      "loss": 4.7661,
      "step": 2564
    },
    {
      "epoch": 0.9234923492349235,
      "grad_norm": 0.810063362121582,
      "learning_rate": 2.9676409973994566e-06,
      "loss": 4.773,
      "step": 2565
    },
    {
      "epoch": 0.9238523852385239,
      "grad_norm": 1.0670089721679688,
      "learning_rate": 2.939848082098562e-06,
      "loss": 5.1925,
      "step": 2566
    },
    {
      "epoch": 0.9242124212421242,
      "grad_norm": 0.6282406449317932,
      "learning_rate": 2.912183982969385e-06,
      "loss": 4.9323,
      "step": 2567
    },
    {
      "epoch": 0.9245724572457246,
      "grad_norm": 0.9556277394294739,
      "learning_rate": 2.8846487367271135e-06,
      "loss": 4.8946,
      "step": 2568
    },
    {
      "epoch": 0.9249324932493249,
      "grad_norm": 0.5398349761962891,
      "learning_rate": 2.8572423799159586e-06,
      "loss": 4.5384,
      "step": 2569
    },
    {
      "epoch": 0.9252925292529253,
      "grad_norm": 0.6668215990066528,
      "learning_rate": 2.8299649489090475e-06,
      "loss": 4.6729,
      "step": 2570
    },
    {
      "epoch": 0.9256525652565256,
      "grad_norm": 0.6637030839920044,
      "learning_rate": 2.802816479908399e-06,
      "loss": 4.8677,
      "step": 2571
    },
    {
      "epoch": 0.926012601260126,
      "grad_norm": 0.5411921143531799,
      "learning_rate": 2.7757970089449024e-06,
      "loss": 4.6855,
      "step": 2572
    },
    {
      "epoch": 0.9263726372637263,
      "grad_norm": 0.5961670875549316,
      "learning_rate": 2.748906571878207e-06,
      "loss": 4.913,
      "step": 2573
    },
    {
      "epoch": 0.9267326732673268,
      "grad_norm": 0.8857282400131226,
      "learning_rate": 2.722145204396742e-06,
      "loss": 5.2247,
      "step": 2574
    },
    {
      "epoch": 0.9270927092709271,
      "grad_norm": 1.259244680404663,
      "learning_rate": 2.6955129420176196e-06,
      "loss": 5.4074,
      "step": 2575
    },
    {
      "epoch": 0.9274527452745275,
      "grad_norm": 0.9549702405929565,
      "learning_rate": 2.6690098200866098e-06,
      "loss": 4.5297,
      "step": 2576
    },
    {
      "epoch": 0.9278127812781278,
      "grad_norm": 0.7229859232902527,
      "learning_rate": 2.6426358737781098e-06,
      "loss": 4.9248,
      "step": 2577
    },
    {
      "epoch": 0.9281728172817282,
      "grad_norm": 0.722059428691864,
      "learning_rate": 2.6163911380950425e-06,
      "loss": 4.8236,
      "step": 2578
    },
    {
      "epoch": 0.9285328532853285,
      "grad_norm": 0.6506609916687012,
      "learning_rate": 2.590275647868867e-06,
      "loss": 4.4557,
      "step": 2579
    },
    {
      "epoch": 0.9288928892889289,
      "grad_norm": 0.7389491200447083,
      "learning_rate": 2.564289437759515e-06,
      "loss": 4.7994,
      "step": 2580
    },
    {
      "epoch": 0.9292529252925292,
      "grad_norm": 0.5561854839324951,
      "learning_rate": 2.53843254225532e-06,
      "loss": 4.2714,
      "step": 2581
    },
    {
      "epoch": 0.9296129612961296,
      "grad_norm": 0.7266308069229126,
      "learning_rate": 2.5127049956730207e-06,
      "loss": 4.3677,
      "step": 2582
    },
    {
      "epoch": 0.9299729972997299,
      "grad_norm": 0.7052050828933716,
      "learning_rate": 2.4871068321576596e-06,
      "loss": 4.5698,
      "step": 2583
    },
    {
      "epoch": 0.9303330333033303,
      "grad_norm": 0.7006728649139404,
      "learning_rate": 2.4616380856825716e-06,
      "loss": 4.7132,
      "step": 2584
    },
    {
      "epoch": 0.9306930693069307,
      "grad_norm": 0.6953990459442139,
      "learning_rate": 2.436298790049363e-06,
      "loss": 4.6789,
      "step": 2585
    },
    {
      "epoch": 0.9310531053105311,
      "grad_norm": 0.6005169749259949,
      "learning_rate": 2.4110889788877656e-06,
      "loss": 4.4247,
      "step": 2586
    },
    {
      "epoch": 0.9314131413141314,
      "grad_norm": 0.5607119798660278,
      "learning_rate": 2.3860086856557383e-06,
      "loss": 4.5032,
      "step": 2587
    },
    {
      "epoch": 0.9317731773177318,
      "grad_norm": 0.8107718825340271,
      "learning_rate": 2.3610579436393e-06,
      "loss": 4.9354,
      "step": 2588
    },
    {
      "epoch": 0.9321332133213321,
      "grad_norm": 0.5992099642753601,
      "learning_rate": 2.33623678595255e-06,
      "loss": 4.5928,
      "step": 2589
    },
    {
      "epoch": 0.9324932493249325,
      "grad_norm": 0.5759278535842896,
      "learning_rate": 2.311545245537594e-06,
      "loss": 4.8887,
      "step": 2590
    },
    {
      "epoch": 0.9328532853285328,
      "grad_norm": 0.7263954281806946,
      "learning_rate": 2.286983355164529e-06,
      "loss": 4.8844,
      "step": 2591
    },
    {
      "epoch": 0.9332133213321332,
      "grad_norm": 0.6867969632148743,
      "learning_rate": 2.2625511474313685e-06,
      "loss": 4.5716,
      "step": 2592
    },
    {
      "epoch": 0.9335733573357335,
      "grad_norm": 0.6144258379936218,
      "learning_rate": 2.23824865476403e-06,
      "loss": 4.7591,
      "step": 2593
    },
    {
      "epoch": 0.9339333933393339,
      "grad_norm": 0.6983000040054321,
      "learning_rate": 2.2140759094162467e-06,
      "loss": 4.5595,
      "step": 2594
    },
    {
      "epoch": 0.9342934293429342,
      "grad_norm": 0.7773663997650146,
      "learning_rate": 2.1900329434695887e-06,
      "loss": 4.6514,
      "step": 2595
    },
    {
      "epoch": 0.9346534653465347,
      "grad_norm": 0.7720439434051514,
      "learning_rate": 2.166119788833354e-06,
      "loss": 4.9227,
      "step": 2596
    },
    {
      "epoch": 0.9350135013501351,
      "grad_norm": 0.7285527586936951,
      "learning_rate": 2.1423364772445887e-06,
      "loss": 4.8956,
      "step": 2597
    },
    {
      "epoch": 0.9353735373537354,
      "grad_norm": 1.0870779752731323,
      "learning_rate": 2.118683040267999e-06,
      "loss": 5.0033,
      "step": 2598
    },
    {
      "epoch": 0.9357335733573358,
      "grad_norm": 0.965726912021637,
      "learning_rate": 2.095159509295919e-06,
      "loss": 5.2102,
      "step": 2599
    },
    {
      "epoch": 0.9360936093609361,
      "grad_norm": 1.4614653587341309,
      "learning_rate": 2.0717659155482738e-06,
      "loss": 5.5101,
      "step": 2600
    },
    {
      "epoch": 0.9364536453645365,
      "grad_norm": 0.5772082209587097,
      "learning_rate": 2.0485022900725513e-06,
      "loss": 4.786,
      "step": 2601
    },
    {
      "epoch": 0.9368136813681368,
      "grad_norm": 0.7124701142311096,
      "learning_rate": 2.025368663743743e-06,
      "loss": 4.4701,
      "step": 2602
    },
    {
      "epoch": 0.9371737173717372,
      "grad_norm": 0.7923992872238159,
      "learning_rate": 2.002365067264289e-06,
      "loss": 5.1817,
      "step": 2603
    },
    {
      "epoch": 0.9375337533753375,
      "grad_norm": 0.6645485758781433,
      "learning_rate": 1.9794915311641018e-06,
      "loss": 4.5608,
      "step": 2604
    },
    {
      "epoch": 0.9378937893789379,
      "grad_norm": 1.0128847360610962,
      "learning_rate": 1.9567480858004306e-06,
      "loss": 4.549,
      "step": 2605
    },
    {
      "epoch": 0.9382538253825382,
      "grad_norm": 0.6514415144920349,
      "learning_rate": 1.9341347613579087e-06,
      "loss": 5.0031,
      "step": 2606
    },
    {
      "epoch": 0.9386138613861386,
      "grad_norm": 0.9877171516418457,
      "learning_rate": 1.91165158784844e-06,
      "loss": 5.0519,
      "step": 2607
    },
    {
      "epoch": 0.938973897389739,
      "grad_norm": 0.6819136738777161,
      "learning_rate": 1.889298595111233e-06,
      "loss": 4.6016,
      "step": 2608
    },
    {
      "epoch": 0.9393339333933394,
      "grad_norm": 0.9286605715751648,
      "learning_rate": 1.8670758128126909e-06,
      "loss": 4.8841,
      "step": 2609
    },
    {
      "epoch": 0.9396939693969397,
      "grad_norm": 0.5582537651062012,
      "learning_rate": 1.844983270446432e-06,
      "loss": 4.7466,
      "step": 2610
    },
    {
      "epoch": 0.9400540054005401,
      "grad_norm": 0.9149574041366577,
      "learning_rate": 1.8230209973331914e-06,
      "loss": 4.5378,
      "step": 2611
    },
    {
      "epoch": 0.9404140414041404,
      "grad_norm": 0.5852335691452026,
      "learning_rate": 1.8011890226208527e-06,
      "loss": 4.906,
      "step": 2612
    },
    {
      "epoch": 0.9407740774077408,
      "grad_norm": 0.7403162717819214,
      "learning_rate": 1.7794873752843277e-06,
      "loss": 4.6849,
      "step": 2613
    },
    {
      "epoch": 0.9411341134113411,
      "grad_norm": 0.7608280777931213,
      "learning_rate": 1.7579160841256104e-06,
      "loss": 4.6213,
      "step": 2614
    },
    {
      "epoch": 0.9414941494149415,
      "grad_norm": 0.7691354751586914,
      "learning_rate": 1.7364751777736332e-06,
      "loss": 4.6725,
      "step": 2615
    },
    {
      "epoch": 0.9418541854185418,
      "grad_norm": 0.7262532114982605,
      "learning_rate": 1.7151646846843227e-06,
      "loss": 4.9798,
      "step": 2616
    },
    {
      "epoch": 0.9422142214221422,
      "grad_norm": 0.5878902077674866,
      "learning_rate": 1.6939846331405108e-06,
      "loss": 4.707,
      "step": 2617
    },
    {
      "epoch": 0.9425742574257425,
      "grad_norm": 0.637844443321228,
      "learning_rate": 1.6729350512519005e-06,
      "loss": 4.9285,
      "step": 2618
    },
    {
      "epoch": 0.942934293429343,
      "grad_norm": 0.6767174005508423,
      "learning_rate": 1.6520159669550783e-06,
      "loss": 4.8655,
      "step": 2619
    },
    {
      "epoch": 0.9432943294329433,
      "grad_norm": 0.6745604872703552,
      "learning_rate": 1.6312274080133804e-06,
      "loss": 4.9893,
      "step": 2620
    },
    {
      "epoch": 0.9436543654365437,
      "grad_norm": 0.6165941953659058,
      "learning_rate": 1.6105694020169593e-06,
      "loss": 4.676,
      "step": 2621
    },
    {
      "epoch": 0.944014401440144,
      "grad_norm": 1.2223420143127441,
      "learning_rate": 1.5900419763826614e-06,
      "loss": 4.7306,
      "step": 2622
    },
    {
      "epoch": 0.9443744374437444,
      "grad_norm": 0.6925899386405945,
      "learning_rate": 1.5696451583540827e-06,
      "loss": 5.2183,
      "step": 2623
    },
    {
      "epoch": 0.9447344734473447,
      "grad_norm": 0.6742193102836609,
      "learning_rate": 1.5493789750014031e-06,
      "loss": 4.9525,
      "step": 2624
    },
    {
      "epoch": 0.9450945094509451,
      "grad_norm": 1.818490982055664,
      "learning_rate": 1.5292434532215072e-06,
      "loss": 5.372,
      "step": 2625
    },
    {
      "epoch": 0.9454545454545454,
      "grad_norm": 3.1944949626922607,
      "learning_rate": 1.5092386197378183e-06,
      "loss": 4.7497,
      "step": 2626
    },
    {
      "epoch": 0.9458145814581458,
      "grad_norm": 0.5739015340805054,
      "learning_rate": 1.489364501100332e-06,
      "loss": 4.6389,
      "step": 2627
    },
    {
      "epoch": 0.9461746174617461,
      "grad_norm": 0.8385379910469055,
      "learning_rate": 1.4696211236855272e-06,
      "loss": 4.8675,
      "step": 2628
    },
    {
      "epoch": 0.9465346534653465,
      "grad_norm": 0.7263203859329224,
      "learning_rate": 1.4500085136964326e-06,
      "loss": 4.5676,
      "step": 2629
    },
    {
      "epoch": 0.946894689468947,
      "grad_norm": 0.9063706398010254,
      "learning_rate": 1.430526697162482e-06,
      "loss": 4.4987,
      "step": 2630
    },
    {
      "epoch": 0.9472547254725473,
      "grad_norm": 0.9547297954559326,
      "learning_rate": 1.4111756999395154e-06,
      "loss": 4.7062,
      "step": 2631
    },
    {
      "epoch": 0.9476147614761476,
      "grad_norm": 0.7871283292770386,
      "learning_rate": 1.3919555477097668e-06,
      "loss": 4.6941,
      "step": 2632
    },
    {
      "epoch": 0.947974797479748,
      "grad_norm": 0.8934873342514038,
      "learning_rate": 1.3728662659818204e-06,
      "loss": 4.5796,
      "step": 2633
    },
    {
      "epoch": 0.9483348334833483,
      "grad_norm": 0.5550655126571655,
      "learning_rate": 1.3539078800905659e-06,
      "loss": 4.6309,
      "step": 2634
    },
    {
      "epoch": 0.9486948694869487,
      "grad_norm": 0.8748136758804321,
      "learning_rate": 1.3350804151971653e-06,
      "loss": 4.7027,
      "step": 2635
    },
    {
      "epoch": 0.949054905490549,
      "grad_norm": 0.6412554383277893,
      "learning_rate": 1.3163838962890195e-06,
      "loss": 4.7536,
      "step": 2636
    },
    {
      "epoch": 0.9494149414941494,
      "grad_norm": 0.5575628280639648,
      "learning_rate": 1.2978183481797801e-06,
      "loss": 4.9255,
      "step": 2637
    },
    {
      "epoch": 0.9497749774977498,
      "grad_norm": 0.6832976341247559,
      "learning_rate": 1.2793837955092258e-06,
      "loss": 4.8386,
      "step": 2638
    },
    {
      "epoch": 0.9501350135013501,
      "grad_norm": 0.7369568347930908,
      "learning_rate": 1.261080262743297e-06,
      "loss": 4.8507,
      "step": 2639
    },
    {
      "epoch": 0.9504950495049505,
      "grad_norm": 0.6423168778419495,
      "learning_rate": 1.2429077741740736e-06,
      "loss": 4.5613,
      "step": 2640
    },
    {
      "epoch": 0.9508550855085508,
      "grad_norm": 0.6358969807624817,
      "learning_rate": 1.2248663539196848e-06,
      "loss": 4.5165,
      "step": 2641
    },
    {
      "epoch": 0.9512151215121513,
      "grad_norm": 0.726514995098114,
      "learning_rate": 1.2069560259243328e-06,
      "loss": 5.0404,
      "step": 2642
    },
    {
      "epoch": 0.9515751575157516,
      "grad_norm": 0.6469401121139526,
      "learning_rate": 1.1891768139582037e-06,
      "loss": 4.3748,
      "step": 2643
    },
    {
      "epoch": 0.951935193519352,
      "grad_norm": 0.7196247577667236,
      "learning_rate": 1.1715287416175113e-06,
      "loss": 4.7141,
      "step": 2644
    },
    {
      "epoch": 0.9522952295229523,
      "grad_norm": 0.6038268804550171,
      "learning_rate": 1.1540118323243865e-06,
      "loss": 4.5587,
      "step": 2645
    },
    {
      "epoch": 0.9526552655265527,
      "grad_norm": 0.6786099076271057,
      "learning_rate": 1.1366261093268992e-06,
      "loss": 5.1968,
      "step": 2646
    },
    {
      "epoch": 0.953015301530153,
      "grad_norm": 0.5878410339355469,
      "learning_rate": 1.1193715956990258e-06,
      "loss": 4.7435,
      "step": 2647
    },
    {
      "epoch": 0.9533753375337534,
      "grad_norm": 0.8352496027946472,
      "learning_rate": 1.1022483143405705e-06,
      "loss": 4.9383,
      "step": 2648
    },
    {
      "epoch": 0.9537353735373537,
      "grad_norm": 0.7436163425445557,
      "learning_rate": 1.08525628797721e-06,
      "loss": 4.9565,
      "step": 2649
    },
    {
      "epoch": 0.9540954095409541,
      "grad_norm": 0.9482848644256592,
      "learning_rate": 1.068395539160394e-06,
      "loss": 4.9916,
      "step": 2650
    },
    {
      "epoch": 0.9544554455445544,
      "grad_norm": 1.374624252319336,
      "learning_rate": 1.0516660902673448e-06,
      "loss": 4.6837,
      "step": 2651
    },
    {
      "epoch": 0.9548154815481548,
      "grad_norm": 0.5657400488853455,
      "learning_rate": 1.035067963501024e-06,
      "loss": 4.3842,
      "step": 2652
    },
    {
      "epoch": 0.9551755175517552,
      "grad_norm": 0.640466570854187,
      "learning_rate": 1.018601180890133e-06,
      "loss": 4.8339,
      "step": 2653
    },
    {
      "epoch": 0.9555355535553556,
      "grad_norm": 0.7675738334655762,
      "learning_rate": 1.0022657642890231e-06,
      "loss": 4.5358,
      "step": 2654
    },
    {
      "epoch": 0.9558955895589559,
      "grad_norm": 0.6688444018363953,
      "learning_rate": 9.86061735377708e-07,
      "loss": 4.8008,
      "step": 2655
    },
    {
      "epoch": 0.9562556255625563,
      "grad_norm": 0.7679070830345154,
      "learning_rate": 9.699891156618402e-07,
      "loss": 4.8804,
      "step": 2656
    },
    {
      "epoch": 0.9566156615661566,
      "grad_norm": 0.7303805351257324,
      "learning_rate": 9.540479264726676e-07,
      "loss": 4.6286,
      "step": 2657
    },
    {
      "epoch": 0.956975697569757,
      "grad_norm": 0.7357656955718994,
      "learning_rate": 9.382381889669667e-07,
      "loss": 4.7679,
      "step": 2658
    },
    {
      "epoch": 0.9573357335733573,
      "grad_norm": 0.8778092265129089,
      "learning_rate": 9.225599241271199e-07,
      "loss": 4.8732,
      "step": 2659
    },
    {
      "epoch": 0.9576957695769577,
      "grad_norm": 0.4600130021572113,
      "learning_rate": 9.070131527609604e-07,
      "loss": 4.8048,
      "step": 2660
    },
    {
      "epoch": 0.958055805580558,
      "grad_norm": 0.6551803946495056,
      "learning_rate": 8.9159789550185e-07,
      "loss": 4.506,
      "step": 2661
    },
    {
      "epoch": 0.9584158415841584,
      "grad_norm": 0.7119221687316895,
      "learning_rate": 8.763141728085789e-07,
      "loss": 4.5518,
      "step": 2662
    },
    {
      "epoch": 0.9587758775877587,
      "grad_norm": 0.571420431137085,
      "learning_rate": 8.611620049653879e-07,
      "loss": 4.8277,
      "step": 2663
    },
    {
      "epoch": 0.9591359135913592,
      "grad_norm": 0.9699096083641052,
      "learning_rate": 8.461414120819133e-07,
      "loss": 4.9504,
      "step": 2664
    },
    {
      "epoch": 0.9594959495949595,
      "grad_norm": 1.1072616577148438,
      "learning_rate": 8.312524140931644e-07,
      "loss": 4.5514,
      "step": 2665
    },
    {
      "epoch": 0.9598559855985599,
      "grad_norm": 0.5243213176727295,
      "learning_rate": 8.16495030759501e-07,
      "loss": 5.0715,
      "step": 2666
    },
    {
      "epoch": 0.9602160216021602,
      "grad_norm": 1.0006332397460938,
      "learning_rate": 8.018692816666118e-07,
      "loss": 5.0062,
      "step": 2667
    },
    {
      "epoch": 0.9605760576057606,
      "grad_norm": 0.7771649956703186,
      "learning_rate": 7.873751862254696e-07,
      "loss": 4.8082,
      "step": 2668
    },
    {
      "epoch": 0.9609360936093609,
      "grad_norm": 0.6831532120704651,
      "learning_rate": 7.730127636723539e-07,
      "loss": 4.8668,
      "step": 2669
    },
    {
      "epoch": 0.9612961296129613,
      "grad_norm": 0.9201724529266357,
      "learning_rate": 7.587820330687389e-07,
      "loss": 4.5572,
      "step": 2670
    },
    {
      "epoch": 0.9616561656165616,
      "grad_norm": 0.730021595954895,
      "learning_rate": 7.446830133013616e-07,
      "loss": 4.5861,
      "step": 2671
    },
    {
      "epoch": 0.962016201620162,
      "grad_norm": 0.7570610642433167,
      "learning_rate": 7.307157230821426e-07,
      "loss": 5.0164,
      "step": 2672
    },
    {
      "epoch": 0.9623762376237623,
      "grad_norm": 0.8982148170471191,
      "learning_rate": 7.168801809481763e-07,
      "loss": 5.0613,
      "step": 2673
    },
    {
      "epoch": 0.9627362736273627,
      "grad_norm": 0.7683990001678467,
      "learning_rate": 7.031764052616852e-07,
      "loss": 4.9976,
      "step": 2674
    },
    {
      "epoch": 0.963096309630963,
      "grad_norm": 1.3601149320602417,
      "learning_rate": 6.896044142100433e-07,
      "loss": 5.3859,
      "step": 2675
    },
    {
      "epoch": 0.9634563456345635,
      "grad_norm": 0.7018367648124695,
      "learning_rate": 6.761642258056978e-07,
      "loss": 4.7254,
      "step": 2676
    },
    {
      "epoch": 0.9638163816381639,
      "grad_norm": 0.8308577537536621,
      "learning_rate": 6.628558578862021e-07,
      "loss": 5.1009,
      "step": 2677
    },
    {
      "epoch": 0.9641764176417642,
      "grad_norm": 0.6796321272850037,
      "learning_rate": 6.496793281141056e-07,
      "loss": 4.7258,
      "step": 2678
    },
    {
      "epoch": 0.9645364536453646,
      "grad_norm": 0.8252047300338745,
      "learning_rate": 6.366346539770529e-07,
      "loss": 4.5518,
      "step": 2679
    },
    {
      "epoch": 0.9648964896489649,
      "grad_norm": 0.7564746737480164,
      "learning_rate": 6.237218527876399e-07,
      "loss": 4.4544,
      "step": 2680
    },
    {
      "epoch": 0.9652565256525653,
      "grad_norm": 0.49899938702583313,
      "learning_rate": 6.109409416834688e-07,
      "loss": 4.7035,
      "step": 2681
    },
    {
      "epoch": 0.9656165616561656,
      "grad_norm": 0.8740639686584473,
      "learning_rate": 5.982919376270823e-07,
      "loss": 4.5884,
      "step": 2682
    },
    {
      "epoch": 0.965976597659766,
      "grad_norm": 1.1928685903549194,
      "learning_rate": 5.857748574059851e-07,
      "loss": 4.7686,
      "step": 2683
    },
    {
      "epoch": 0.9663366336633663,
      "grad_norm": 0.8761278986930847,
      "learning_rate": 5.733897176325665e-07,
      "loss": 4.9492,
      "step": 2684
    },
    {
      "epoch": 0.9666966696669667,
      "grad_norm": 0.6431811451911926,
      "learning_rate": 5.611365347441334e-07,
      "loss": 4.8721,
      "step": 2685
    },
    {
      "epoch": 0.967056705670567,
      "grad_norm": 0.7401660084724426,
      "learning_rate": 5.49015325002833e-07,
      "loss": 4.8349,
      "step": 2686
    },
    {
      "epoch": 0.9674167416741675,
      "grad_norm": 0.7588707804679871,
      "learning_rate": 5.370261044956971e-07,
      "loss": 4.6324,
      "step": 2687
    },
    {
      "epoch": 0.9677767776777678,
      "grad_norm": 0.5506744980812073,
      "learning_rate": 5.25168889134553e-07,
      "loss": 4.6068,
      "step": 2688
    },
    {
      "epoch": 0.9681368136813682,
      "grad_norm": 0.6155896782875061,
      "learning_rate": 5.134436946560572e-07,
      "loss": 4.7839,
      "step": 2689
    },
    {
      "epoch": 0.9684968496849685,
      "grad_norm": 0.7731359601020813,
      "learning_rate": 5.018505366216175e-07,
      "loss": 4.8457,
      "step": 2690
    },
    {
      "epoch": 0.9688568856885689,
      "grad_norm": 0.6838685870170593,
      "learning_rate": 4.903894304174372e-07,
      "loss": 4.9202,
      "step": 2691
    },
    {
      "epoch": 0.9692169216921692,
      "grad_norm": 0.7036203742027283,
      "learning_rate": 4.790603912544489e-07,
      "loss": 4.609,
      "step": 2692
    },
    {
      "epoch": 0.9695769576957696,
      "grad_norm": 0.6218310594558716,
      "learning_rate": 4.678634341683252e-07,
      "loss": 4.7149,
      "step": 2693
    },
    {
      "epoch": 0.9699369936993699,
      "grad_norm": 0.6616173982620239,
      "learning_rate": 4.567985740194236e-07,
      "loss": 4.5665,
      "step": 2694
    },
    {
      "epoch": 0.9702970297029703,
      "grad_norm": 0.677174985408783,
      "learning_rate": 4.458658254927972e-07,
      "loss": 5.1044,
      "step": 2695
    },
    {
      "epoch": 0.9706570657065706,
      "grad_norm": 0.7272735238075256,
      "learning_rate": 4.3506520309813947e-07,
      "loss": 5.0046,
      "step": 2696
    },
    {
      "epoch": 0.971017101710171,
      "grad_norm": 0.9171267151832581,
      "learning_rate": 4.2439672116982855e-07,
      "loss": 5.1378,
      "step": 2697
    },
    {
      "epoch": 0.9713771377137714,
      "grad_norm": 0.9265881180763245,
      "learning_rate": 4.138603938668273e-07,
      "loss": 5.1032,
      "step": 2698
    },
    {
      "epoch": 0.9717371737173718,
      "grad_norm": 0.8648788332939148,
      "learning_rate": 4.034562351727389e-07,
      "loss": 5.3116,
      "step": 2699
    },
    {
      "epoch": 0.9720972097209721,
      "grad_norm": 1.2427067756652832,
      "learning_rate": 3.9318425889574017e-07,
      "loss": 5.5403,
      "step": 2700
    },
    {
      "epoch": 0.9724572457245725,
      "grad_norm": 1.1116799116134644,
      "learning_rate": 3.8304447866857053e-07,
      "loss": 5.1365,
      "step": 2701
    },
    {
      "epoch": 0.9728172817281728,
      "grad_norm": 0.5051364898681641,
      "learning_rate": 3.73036907948543e-07,
      "loss": 4.3293,
      "step": 2702
    },
    {
      "epoch": 0.9731773177317732,
      "grad_norm": 1.0161361694335938,
      "learning_rate": 3.631615600174887e-07,
      "loss": 4.4407,
      "step": 2703
    },
    {
      "epoch": 0.9735373537353735,
      "grad_norm": 0.8723888993263245,
      "learning_rate": 3.5341844798174594e-07,
      "loss": 5.0663,
      "step": 2704
    },
    {
      "epoch": 0.9738973897389739,
      "grad_norm": 0.6905868649482727,
      "learning_rate": 3.4380758477219333e-07,
      "loss": 4.9586,
      "step": 2705
    },
    {
      "epoch": 0.9742574257425742,
      "grad_norm": 0.6786131262779236,
      "learning_rate": 3.343289831441387e-07,
      "loss": 4.4407,
      "step": 2706
    },
    {
      "epoch": 0.9746174617461746,
      "grad_norm": 0.7154151201248169,
      "learning_rate": 3.2498265567739717e-07,
      "loss": 4.6941,
      "step": 2707
    },
    {
      "epoch": 0.9749774977497749,
      "grad_norm": 0.7994034290313721,
      "learning_rate": 3.1576861477621287e-07,
      "loss": 4.857,
      "step": 2708
    },
    {
      "epoch": 0.9753375337533753,
      "grad_norm": 0.6819135546684265,
      "learning_rate": 3.0668687266925956e-07,
      "loss": 5.0646,
      "step": 2709
    },
    {
      "epoch": 0.9756975697569757,
      "grad_norm": 0.613059401512146,
      "learning_rate": 2.977374414096401e-07,
      "loss": 4.9047,
      "step": 2710
    },
    {
      "epoch": 0.9760576057605761,
      "grad_norm": 0.9619209170341492,
      "learning_rate": 2.889203328748424e-07,
      "loss": 4.5585,
      "step": 2711
    },
    {
      "epoch": 0.9764176417641764,
      "grad_norm": 0.5810762047767639,
      "learning_rate": 2.8023555876673937e-07,
      "loss": 4.8418,
      "step": 2712
    },
    {
      "epoch": 0.9767776777677768,
      "grad_norm": 0.6123738884925842,
      "learning_rate": 2.7168313061159964e-07,
      "loss": 4.5524,
      "step": 2713
    },
    {
      "epoch": 0.9771377137713771,
      "grad_norm": 0.6715987324714661,
      "learning_rate": 2.6326305976001055e-07,
      "loss": 4.9585,
      "step": 2714
    },
    {
      "epoch": 0.9774977497749775,
      "grad_norm": 0.6126198768615723,
      "learning_rate": 2.549753573869107e-07,
      "loss": 4.6261,
      "step": 2715
    },
    {
      "epoch": 0.9778577857785778,
      "grad_norm": 0.6247376203536987,
      "learning_rate": 2.468200344915572e-07,
      "loss": 4.7465,
      "step": 2716
    },
    {
      "epoch": 0.9782178217821782,
      "grad_norm": 0.6758084297180176,
      "learning_rate": 2.3879710189753656e-07,
      "loss": 4.6146,
      "step": 2717
    },
    {
      "epoch": 0.9785778577857785,
      "grad_norm": 0.8351864218711853,
      "learning_rate": 2.3090657025270912e-07,
      "loss": 4.9151,
      "step": 2718
    },
    {
      "epoch": 0.9789378937893789,
      "grad_norm": 1.0861241817474365,
      "learning_rate": 2.2314845002922025e-07,
      "loss": 4.75,
      "step": 2719
    },
    {
      "epoch": 0.9792979297929792,
      "grad_norm": 0.8706967830657959,
      "learning_rate": 2.15522751523467e-07,
      "loss": 5.033,
      "step": 2720
    },
    {
      "epoch": 0.9796579657965797,
      "grad_norm": 0.8538120985031128,
      "learning_rate": 2.080294848561426e-07,
      "loss": 4.5049,
      "step": 2721
    },
    {
      "epoch": 0.9800180018001801,
      "grad_norm": 0.567323625087738,
      "learning_rate": 2.0066865997212525e-07,
      "loss": 4.7168,
      "step": 2722
    },
    {
      "epoch": 0.9803780378037804,
      "grad_norm": 0.8506109118461609,
      "learning_rate": 1.9344028664056713e-07,
      "loss": 5.0249,
      "step": 2723
    },
    {
      "epoch": 0.9807380738073808,
      "grad_norm": 1.3589588403701782,
      "learning_rate": 1.8634437445479435e-07,
      "loss": 5.1037,
      "step": 2724
    },
    {
      "epoch": 0.9810981098109811,
      "grad_norm": 1.3143948316574097,
      "learning_rate": 1.7938093283236258e-07,
      "loss": 5.555,
      "step": 2725
    },
    {
      "epoch": 0.9814581458145815,
      "grad_norm": 1.7907097339630127,
      "learning_rate": 1.7254997101500137e-07,
      "loss": 4.8558,
      "step": 2726
    },
    {
      "epoch": 0.9818181818181818,
      "grad_norm": 0.6720486283302307,
      "learning_rate": 1.6585149806860324e-07,
      "loss": 4.4563,
      "step": 2727
    },
    {
      "epoch": 0.9821782178217822,
      "grad_norm": 0.6979610919952393,
      "learning_rate": 1.5928552288326793e-07,
      "loss": 4.5348,
      "step": 2728
    },
    {
      "epoch": 0.9825382538253825,
      "grad_norm": 0.8894920349121094,
      "learning_rate": 1.5285205417319149e-07,
      "loss": 4.7034,
      "step": 2729
    },
    {
      "epoch": 0.9828982898289829,
      "grad_norm": 0.6074763536453247,
      "learning_rate": 1.4655110047675503e-07,
      "loss": 4.5014,
      "step": 2730
    },
    {
      "epoch": 0.9832583258325832,
      "grad_norm": 0.7769091129302979,
      "learning_rate": 1.403826701564359e-07,
      "loss": 4.7347,
      "step": 2731
    },
    {
      "epoch": 0.9836183618361836,
      "grad_norm": 0.7279649376869202,
      "learning_rate": 1.3434677139885222e-07,
      "loss": 4.5844,
      "step": 2732
    },
    {
      "epoch": 0.983978397839784,
      "grad_norm": 0.8169944882392883,
      "learning_rate": 1.2844341221471824e-07,
      "loss": 4.939,
      "step": 2733
    },
    {
      "epoch": 0.9843384338433844,
      "grad_norm": 0.7737520337104797,
      "learning_rate": 1.2267260043885564e-07,
      "loss": 4.3103,
      "step": 2734
    },
    {
      "epoch": 0.9846984698469847,
      "grad_norm": 0.7852435111999512,
      "learning_rate": 1.170343437301491e-07,
      "loss": 4.6278,
      "step": 2735
    },
    {
      "epoch": 0.9850585058505851,
      "grad_norm": 0.5969253182411194,
      "learning_rate": 1.1152864957157949e-07,
      "loss": 4.5914,
      "step": 2736
    },
    {
      "epoch": 0.9854185418541854,
      "grad_norm": 0.8359677791595459,
      "learning_rate": 1.0615552527017958e-07,
      "loss": 4.5289,
      "step": 2737
    },
    {
      "epoch": 0.9857785778577858,
      "grad_norm": 0.8683612942695618,
      "learning_rate": 1.0091497795706728e-07,
      "loss": 4.5384,
      "step": 2738
    },
    {
      "epoch": 0.9861386138613861,
      "grad_norm": 0.6547925472259521,
      "learning_rate": 9.580701458736796e-08,
      "loss": 4.7082,
      "step": 2739
    },
    {
      "epoch": 0.9864986498649865,
      "grad_norm": 0.5458620190620422,
      "learning_rate": 9.083164194025883e-08,
      "loss": 4.6053,
      "step": 2740
    },
    {
      "epoch": 0.9868586858685868,
      "grad_norm": 0.6568934917449951,
      "learning_rate": 8.598886661895788e-08,
      "loss": 4.5799,
      "step": 2741
    },
    {
      "epoch": 0.9872187218721872,
      "grad_norm": 0.661688506603241,
      "learning_rate": 8.127869505069053e-08,
      "loss": 4.3995,
      "step": 2742
    },
    {
      "epoch": 0.9875787578757875,
      "grad_norm": 0.7148826122283936,
      "learning_rate": 7.670113348670071e-08,
      "loss": 4.607,
      "step": 2743
    },
    {
      "epoch": 0.987938793879388,
      "grad_norm": 0.5700961947441101,
      "learning_rate": 7.225618800222877e-08,
      "loss": 4.7385,
      "step": 2744
    },
    {
      "epoch": 0.9882988298829883,
      "grad_norm": 0.6849833130836487,
      "learning_rate": 6.794386449651135e-08,
      "loss": 4.9545,
      "step": 2745
    },
    {
      "epoch": 0.9886588658865887,
      "grad_norm": 0.6745620965957642,
      "learning_rate": 6.376416869277036e-08,
      "loss": 4.8148,
      "step": 2746
    },
    {
      "epoch": 0.989018901890189,
      "grad_norm": 0.9582657814025879,
      "learning_rate": 5.971710613821291e-08,
      "loss": 5.3346,
      "step": 2747
    },
    {
      "epoch": 0.9893789378937894,
      "grad_norm": 0.8358666300773621,
      "learning_rate": 5.5802682204009194e-08,
      "loss": 5.1755,
      "step": 2748
    },
    {
      "epoch": 0.9897389738973897,
      "grad_norm": 0.9131016135215759,
      "learning_rate": 5.2020902085303525e-08,
      "loss": 5.2653,
      "step": 2749
    },
    {
      "epoch": 0.9900990099009901,
      "grad_norm": 1.4401273727416992,
      "learning_rate": 4.837177080119215e-08,
      "loss": 5.5157,
      "step": 2750
    },
    {
      "epoch": 0.9904590459045904,
      "grad_norm": 0.676723301410675,
      "learning_rate": 4.485529319473436e-08,
      "loss": 4.8633,
      "step": 2751
    },
    {
      "epoch": 0.9908190819081908,
      "grad_norm": 0.7600442171096802,
      "learning_rate": 4.147147393290807e-08,
      "loss": 4.7974,
      "step": 2752
    },
    {
      "epoch": 0.9911791179117911,
      "grad_norm": 1.2827167510986328,
      "learning_rate": 3.8220317506654226e-08,
      "loss": 4.8168,
      "step": 2753
    },
    {
      "epoch": 0.9915391539153915,
      "grad_norm": 0.9814483523368835,
      "learning_rate": 3.510182823083241e-08,
      "loss": 4.736,
      "step": 2754
    },
    {
      "epoch": 0.991899189918992,
      "grad_norm": 0.9813688397407532,
      "learning_rate": 3.2116010244254144e-08,
      "loss": 4.7668,
      "step": 2755
    },
    {
      "epoch": 0.9922592259225923,
      "grad_norm": 0.7599554657936096,
      "learning_rate": 2.9262867509605163e-08,
      "loss": 4.7808,
      "step": 2756
    },
    {
      "epoch": 0.9926192619261927,
      "grad_norm": 0.5308656692504883,
      "learning_rate": 2.6542403813545334e-08,
      "loss": 4.571,
      "step": 2757
    },
    {
      "epoch": 0.992979297929793,
      "grad_norm": 0.5827195048332214,
      "learning_rate": 2.3954622766597657e-08,
      "loss": 4.7896,
      "step": 2758
    },
    {
      "epoch": 0.9933393339333934,
      "grad_norm": 0.9100288152694702,
      "learning_rate": 2.1499527803214846e-08,
      "loss": 4.8101,
      "step": 2759
    },
    {
      "epoch": 0.9936993699369937,
      "grad_norm": 1.0028470754623413,
      "learning_rate": 1.9177122181757156e-08,
      "loss": 4.6816,
      "step": 2760
    },
    {
      "epoch": 0.994059405940594,
      "grad_norm": 0.7796440720558167,
      "learning_rate": 1.698740898444795e-08,
      "loss": 4.6857,
      "step": 2761
    },
    {
      "epoch": 0.9944194419441944,
      "grad_norm": 0.6619350910186768,
      "learning_rate": 1.4930391117451426e-08,
      "loss": 4.8632,
      "step": 2762
    },
    {
      "epoch": 0.9947794779477948,
      "grad_norm": 0.6579605937004089,
      "learning_rate": 1.3006071310783797e-08,
      "loss": 4.8485,
      "step": 2763
    },
    {
      "epoch": 0.9951395139513951,
      "grad_norm": 0.7904688119888306,
      "learning_rate": 1.1214452118368802e-08,
      "loss": 4.6414,
      "step": 2764
    },
    {
      "epoch": 0.9954995499549955,
      "grad_norm": 0.6213950514793396,
      "learning_rate": 9.555535917993297e-09,
      "loss": 4.56,
      "step": 2765
    },
    {
      "epoch": 0.9958595859585958,
      "grad_norm": 0.5204569697380066,
      "learning_rate": 8.029324911351666e-09,
      "loss": 4.42,
      "step": 2766
    },
    {
      "epoch": 0.9962196219621963,
      "grad_norm": 0.5820871591567993,
      "learning_rate": 6.635821124001406e-09,
      "loss": 4.7025,
      "step": 2767
    },
    {
      "epoch": 0.9965796579657966,
      "grad_norm": 0.5928208231925964,
      "learning_rate": 5.375026405352035e-09,
      "loss": 5.1903,
      "step": 2768
    },
    {
      "epoch": 0.996939693969397,
      "grad_norm": 0.5016415119171143,
      "learning_rate": 4.246942428709488e-09,
      "loss": 4.7524,
      "step": 2769
    },
    {
      "epoch": 0.9972997299729973,
      "grad_norm": 0.650364339351654,
      "learning_rate": 3.2515706912539245e-09,
      "loss": 4.3618,
      "step": 2770
    },
    {
      "epoch": 0.9976597659765977,
      "grad_norm": 1.0009171962738037,
      "learning_rate": 2.388912514017516e-09,
      "loss": 4.9389,
      "step": 2771
    },
    {
      "epoch": 0.998019801980198,
      "grad_norm": 1.0807517766952515,
      "learning_rate": 1.6589690418955528e-09,
      "loss": 5.1394,
      "step": 2772
    },
    {
      "epoch": 0.9983798379837984,
      "grad_norm": 0.9495770335197449,
      "learning_rate": 1.0617412436464413e-09,
      "loss": 5.1927,
      "step": 2773
    },
    {
      "epoch": 0.9987398739873987,
      "grad_norm": 0.8993694186210632,
      "learning_rate": 5.972299119250125e-10,
      "loss": 5.2012,
      "step": 2774
    },
    {
      "epoch": 0.9990999099909991,
      "grad_norm": 1.2877267599105835,
      "learning_rate": 2.6543566319370275e-10,
      "loss": 5.2814,
      "step": 2775
    },
    {
      "epoch": 0.9994599459945994,
      "grad_norm": 0.5570570826530457,
      "learning_rate": 6.63589378113727e-11,
      "loss": 4.7664,
      "step": 2776
    },
    {
      "epoch": 0.9998199819981998,
      "grad_norm": 0.6813939213752747,
      "learning_rate": 0.0,
      "loss": 4.551,
      "step": 2777
    }
  ],
  "logging_steps": 1,
  "max_steps": 2777,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 695,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3015768690130944.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}